llvm.org GIT mirror llvm / 50d4008
[LSR] Canonicalize reg1 + ... + regN into reg1 + ... + 1*regN. This commit introduces a canonical representation for the formulae. Basically, as soon as a formula has more that one base register, the scaled register field is used for one of them. The register put into the scaled register is preferably a loop variant. The commit refactors how the formulae are built in order to produce such representation. This yields a more accurate, but still perfectible, cost model. <rdar://problem/16731508> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209230 91177308-0d34-0410-b5e6-96231b3b80d8 Quentin Colombet 6 years ago
3 changed file(s) with 397 addition(s) and 202 deletion(s). Raw diff Collapse all Expand all
237237 int64_t Scale;
238238
239239 /// BaseRegs - The list of "base" registers for this use. When this is
240 /// non-empty,
240 /// non-empty. The canonical representation of a formula is
241 /// 1. BaseRegs.size > 1 implies ScaledReg != NULL and
242 /// 2. ScaledReg != NULL implies Scale != 1 || !BaseRegs.empty().
243 /// #1 enforces that the scaled register is always used when at least two
244 /// registers are needed by the formula: e.g., reg1 + reg2 is reg1 + 1 * reg2.
245 /// #2 enforces that 1 * reg is reg.
246 /// This invariant can be temporarly broken while building a formula.
247 /// However, every formula inserted into the LSRInstance must be in canonical
248 /// form.
241249 SmallVector BaseRegs;
242250
243251 /// ScaledReg - The 'scaled' register for this use. This should be non-null
254262 ScaledReg(nullptr), UnfoldedOffset(0) {}
255263
256264 void InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);
265
266 bool isCanonical() const;
267
268 void Canonicalize();
269
270 bool Unscale();
257271
258272 size_t getNumRegs() const;
259273 Type *getType() const;
345359 BaseRegs.push_back(Sum);
346360 HasBaseReg = true;
347361 }
362 Canonicalize();
363 }
364
365 /// \brief Check whether or not this formula statisfies the canonical
366 /// representation.
367 /// \see Formula::BaseRegs.
368 bool Formula::isCanonical() const {
369 if (ScaledReg)
370 return Scale != 1 || !BaseRegs.empty();
371 return BaseRegs.size() <= 1;
372 }
373
374 /// \brief Helper method to morph a formula into its canonical representation.
375 /// \see Formula::BaseRegs.
376 /// Every formula having more than one base register, must use the ScaledReg
377 /// field. Otherwise, we would have to do special cases everywhere in LSR
378 /// to treat reg1 + reg2 + ... the same way as reg1 + 1*reg2 + ...
379 /// On the other hand, 1*reg should be canonicalized into reg.
380 void Formula::Canonicalize() {
381 if (isCanonical())
382 return;
383 // So far we did not need this case. This is easy to implement but it is
384 // useless to maintain dead code. Beside it could hurt compile time.
385 assert(!BaseRegs.empty() && "1*reg => reg, should not be needed.");
386 // Keep the invariant sum in BaseRegs and one of the variant sum in ScaledReg.
387 ScaledReg = BaseRegs.back();
388 BaseRegs.pop_back();
389 Scale = 1;
390 size_t BaseRegsSize = BaseRegs.size();
391 size_t Try = 0;
392 // If ScaledReg is an invariant, try to find a variant expression.
393 while (Try < BaseRegsSize && !isa(ScaledReg))
394 std::swap(ScaledReg, BaseRegs[Try++]);
395 }
396
397 /// \brief Get rid of the scale in the formula.
398 /// In other words, this method morphes reg1 + 1*reg2 into reg1 + reg2.
399 /// \return true if it was possible to get rid of the scale, false otherwise.
400 /// \note After this operation the formula may not be in the canonical form.
401 bool Formula::Unscale() {
402 if (Scale != 1)
403 return false;
404 Scale = 0;
405 BaseRegs.push_back(ScaledReg);
406 ScaledReg = nullptr;
407 return true;
348408 }
349409
350410 /// getNumRegs - Return the total number of register operands used by this
775835 namespace {
776836 class LSRUse;
777837 }
778 // Check if it is legal to fold 2 base registers.
779 static bool isLegal2RegAMUse(const TargetTransformInfo &TTI, const LSRUse &LU,
780 const Formula &F);
838
839 /// \brief Check if the addressing mode defined by \p F is completely
840 /// folded in \p LU at isel time.
841 /// This includes address-mode folding and special icmp tricks.
842 /// This function returns true if \p LU can accommodate what \p F
843 /// defines and up to 1 base + 1 scaled + offset.
844 /// In other words, if \p F has several base registers, this function may
845 /// still return true. Therefore, users still need to account for
846 /// additional base registers and/or unfolded offsets to derive an
847 /// accurate cost model.
848 static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
849 const LSRUse &LU, const Formula &F);
781850 // Get the cost of the scaling factor used in F for LU.
782851 static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
783852 const LSRUse &LU, const Formula &F);
921990 ScalarEvolution &SE, DominatorTree &DT,
922991 const LSRUse &LU,
923992 SmallPtrSet *LoserRegs) {
993 assert(F.isCanonical() && "Cost is accurate only for canonical formula");
924994 // Tally up the registers.
925995 if (const SCEV *ScaledReg = F.ScaledReg) {
926996 if (VisitedRegs.count(ScaledReg)) {
9441014 }
9451015
9461016 // Determine how many (unfolded) adds we'll need inside the loop.
947 size_t NumBaseParts = F.BaseRegs.size() + (F.UnfoldedOffset != 0);
1017 size_t NumBaseParts = F.getNumRegs();
9481018 if (NumBaseParts > 1)
9491019 // Do not count the base and a possible second register if the target
9501020 // allows to fold 2 registers.
951 NumBaseAdds += NumBaseParts - (1 + isLegal2RegAMUse(TTI, LU, F));
1021 NumBaseAdds +=
1022 NumBaseParts - (1 + (F.Scale && isAMCompletelyFolded(TTI, LU, F)));
1023 NumBaseAdds += (F.UnfoldedOffset != 0);
9521024
9531025 // Accumulate non-free scaling amounts.
9541026 ScaleCost += getScalingFactorCost(TTI, LU, F);
12091281
12101282 /// InsertFormula - If the given formula has not yet been inserted, add it to
12111283 /// the list, and return true. Return false otherwise.
1284 /// The formula must be in canonical form.
12121285 bool LSRUse::InsertFormula(const Formula &F) {
1286 assert(F.isCanonical() && "Invalid canonical representation");
1287
12131288 if (!Formulae.empty() && RigidFormula)
12141289 return false;
12151290
12351310
12361311 // Record registers now being used by this use.
12371312 Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
1313 if (F.ScaledReg)
1314 Regs.insert(F.ScaledReg);
12381315
12391316 return true;
12401317 }
13011378 }
13021379 #endif
13031380
1304 /// isLegalUse - Test whether the use described by AM is "legal", meaning it can
1305 /// be completely folded into the user instruction at isel time. This includes
1306 /// address-mode folding and special icmp tricks.
1307 static bool isLegalUse(const TargetTransformInfo &TTI, LSRUse::KindType Kind,
1308 Type *AccessTy, GlobalValue *BaseGV, int64_t BaseOffset,
1309 bool HasBaseReg, int64_t Scale) {
1381 static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
1382 LSRUse::KindType Kind, Type *AccessTy,
1383 GlobalValue *BaseGV, int64_t BaseOffset,
1384 bool HasBaseReg, int64_t Scale) {
13101385 switch (Kind) {
13111386 case LSRUse::Address:
13121387 return TTI.isLegalAddressingMode(AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale);
13571432 llvm_unreachable("Invalid LSRUse Kind!");
13581433 }
13591434
1360 static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
1361 int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy,
1362 GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg,
1363 int64_t Scale) {
1435 static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
1436 int64_t MinOffset, int64_t MaxOffset,
1437 LSRUse::KindType Kind, Type *AccessTy,
1438 GlobalValue *BaseGV, int64_t BaseOffset,
1439 bool HasBaseReg, int64_t Scale) {
13641440 // Check for overflow.
13651441 if (((int64_t)((uint64_t)BaseOffset + MinOffset) > BaseOffset) !=
13661442 (MinOffset > 0))
13711447 return false;
13721448 MaxOffset = (uint64_t)BaseOffset + MaxOffset;
13731449
1374 return isLegalUse(TTI, Kind, AccessTy, BaseGV, MinOffset, HasBaseReg,
1375 Scale) &&
1376 isLegalUse(TTI, Kind, AccessTy, BaseGV, MaxOffset, HasBaseReg, Scale);
1450 return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MinOffset,
1451 HasBaseReg, Scale) &&
1452 isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MaxOffset,
1453 HasBaseReg, Scale);
1454 }
1455
1456 static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
1457 int64_t MinOffset, int64_t MaxOffset,
1458 LSRUse::KindType Kind, Type *AccessTy,
1459 const Formula &F) {
1460 // For the purpose of isAMCompletelyFolded either having a canonical formula
1461 // or a scale not equal to zero is correct.
1462 // Problems may arise from non canonical formulae having a scale == 0.
1463 // Strictly speaking it would best to just rely on canonical formulae.
1464 // However, when we generate the scaled formulae, we first check that the
1465 // scaling factor is profitable before computing the actual ScaledReg for
1466 // compile time sake.
1467 assert((F.isCanonical() || F.Scale != 0));
1468 return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy,
1469 F.BaseGV, F.BaseOffset, F.HasBaseReg, F.Scale);
1470 }
1471
1472 /// isLegalUse - Test whether we know how to expand the current formula.
1473 static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
1474 int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy,
1475 GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg,
1476 int64_t Scale) {
1477 // We know how to expand completely foldable formulae.
1478 return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
1479 BaseOffset, HasBaseReg, Scale) ||
1480 // Or formulae that use a base register produced by a sum of base
1481 // registers.
1482 (Scale == 1 &&
1483 isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy,
1484 BaseGV, BaseOffset, true, 0));
13771485 }
13781486
13791487 static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
13831491 F.BaseOffset, F.HasBaseReg, F.Scale);
13841492 }
13851493
1386 static bool isLegal2RegAMUse(const TargetTransformInfo &TTI, const LSRUse &LU,
1387 const Formula &F) {
1388 // If F is used as an Addressing Mode, it may fold one Base plus one
1389 // scaled register. If the scaled register is nil, do as if another
1390 // element of the base regs is a 1-scaled register.
1391 // This is possible if BaseRegs has at least 2 registers.
1392
1393 // If this is not an address calculation, this is not an addressing mode
1394 // use.
1395 if (LU.Kind != LSRUse::Address)
1396 return false;
1397
1398 // F is already scaled.
1399 if (F.Scale != 0)
1400 return false;
1401
1402 // We need to keep one register for the base and one to scale.
1403 if (F.BaseRegs.size() < 2)
1404 return false;
1405
1406 return isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
1407 F.BaseGV, F.BaseOffset, F.HasBaseReg, 1);
1408 }
1494 static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
1495 const LSRUse &LU, const Formula &F) {
1496 return isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
1497 LU.AccessTy, F.BaseGV, F.BaseOffset, F.HasBaseReg,
1498 F.Scale);
1499 }
14091500
14101501 static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
14111502 const LSRUse &LU, const Formula &F) {
14121503 if (!F.Scale)
14131504 return 0;
1414 assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
1415 LU.AccessTy, F) && "Illegal formula in use.");
1505
1506 // If the use is not completely folded in that instruction, we will have to
1507 // pay an extra cost only for scale != 1.
1508 if (!isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
1509 LU.AccessTy, F))
1510 return F.Scale != 1;
14161511
14171512 switch (LU.Kind) {
14181513 case LSRUse::Address: {
14311526 return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);
14321527 }
14331528 case LSRUse::ICmpZero:
1434 // ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg.
1435 // Therefore, return 0 in case F.Scale == -1.
1436 return F.Scale != -1;
1437
14381529 case LSRUse::Basic:
14391530 case LSRUse::Special:
1531 // The use is completely folded, i.e., everything is folded into the
1532 // instruction.
14401533 return 0;
14411534 }
14421535
14611554 HasBaseReg = true;
14621555 }
14631556
1464 return isLegalUse(TTI, Kind, AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale);
1557 return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, BaseOffset,
1558 HasBaseReg, Scale);
14651559 }
14661560
14671561 static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
14861580 // base and a scale.
14871581 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
14881582
1489 return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
1490 BaseOffset, HasBaseReg, Scale);
1583 return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
1584 BaseOffset, HasBaseReg, Scale);
14911585 }
14921586
14931587 namespace {
16431737
16441738 void GenerateReassociations(LSRUse &LU, unsigned LUIdx, Formula Base,
16451739 unsigned Depth = 0);
1740
1741 void GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
1742 const Formula &Base, unsigned Depth,
1743 size_t Idx, bool IsScaledReg = false);
16461744 void GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base);
1745 void GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,
1746 const Formula &Base, size_t Idx,
1747 bool IsScaledReg = false);
16471748 void GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
1749 void GenerateConstantOffsetsImpl(LSRUse &LU, unsigned LUIdx,
1750 const Formula &Base,
1751 const SmallVectorImpl &Worklist,
1752 size_t Idx, bool IsScaledReg = false);
16481753 void GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
16491754 void GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Formula Base);
16501755 void GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base);
21472252 // the uses will have all its uses outside the loop, for example.
21482253 if (LU.Kind != Kind)
21492254 return false;
2150 // Conservatively assume HasBaseReg is true for now.
2151 if (NewOffset < LU.MinOffset) {
2152 if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ nullptr,
2153 LU.MaxOffset - NewOffset, HasBaseReg))
2154 return false;
2155 NewMinOffset = NewOffset;
2156 } else if (NewOffset > LU.MaxOffset) {
2157 if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ nullptr,
2158 NewOffset - LU.MinOffset, HasBaseReg))
2159 return false;
2160 NewMaxOffset = NewOffset;
2161 }
2255
21622256 // Check for a mismatched access type, and fall back conservatively as needed.
21632257 // TODO: Be less conservative when the type is similar and can use the same
21642258 // addressing modes.
21652259 if (Kind == LSRUse::Address && AccessTy != LU.AccessTy)
21662260 NewAccessTy = Type::getVoidTy(AccessTy->getContext());
2261
2262 // Conservatively assume HasBaseReg is true for now.
2263 if (NewOffset < LU.MinOffset) {
2264 if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr,
2265 LU.MaxOffset - NewOffset, HasBaseReg))
2266 return false;
2267 NewMinOffset = NewOffset;
2268 } else if (NewOffset > LU.MaxOffset) {
2269 if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr,
2270 NewOffset - LU.MinOffset, HasBaseReg))
2271 return false;
2272 NewMaxOffset = NewOffset;
2273 }
21672274
21682275 // Update the use.
21692276 LU.MinOffset = NewMinOffset;
29933100 /// InsertFormula - If the given formula has not yet been inserted, add it to
29943101 /// the list, and return true. Return false otherwise.
29953102 bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
3103 // Do not insert formula that we will not be able to expand.
3104 assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) &&
3105 "Formula is illegal");
29963106 if (!LU.InsertFormula(F))
29973107 return false;
29983108
31483258 return S;
31493259 }
31503260
3261 /// \brief Helper function for LSRInstance::GenerateReassociations.
3262 void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
3263 const Formula &Base,
3264 unsigned Depth, size_t Idx,
3265 bool IsScaledReg) {
3266 const SCEV *BaseReg = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
3267 SmallVector AddOps;
3268 const SCEV *Remainder = CollectSubexprs(BaseReg, nullptr, AddOps, L, SE);
3269 if (Remainder)
3270 AddOps.push_back(Remainder);
3271
3272 if (AddOps.size() == 1)
3273 return;
3274
3275 for (SmallVectorImpl::const_iterator J = AddOps.begin(),
3276 JE = AddOps.end();
3277 J != JE; ++J) {
3278
3279 // Loop-variant "unknown" values are uninteresting; we won't be able to
3280 // do anything meaningful with them.
3281 if (isa(*J) && !SE.isLoopInvariant(*J, L))
3282 continue;
3283
3284 // Don't pull a constant into a register if the constant could be folded
3285 // into an immediate field.
3286 if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
3287 LU.AccessTy, *J, Base.getNumRegs() > 1))
3288 continue;
3289
3290 // Collect all operands except *J.
3291 SmallVector InnerAddOps(
3292 ((const SmallVector &)AddOps).begin(), J);
3293 InnerAddOps.append(std::next(J),
3294 ((const SmallVector &)AddOps).end());
3295
3296 // Don't leave just a constant behind in a register if the constant could
3297 // be folded into an immediate field.
3298 if (InnerAddOps.size() == 1 &&
3299 isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
3300 LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1))
3301 continue;
3302
3303 const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
3304 if (InnerSum->isZero())
3305 continue;
3306 Formula F = Base;
3307
3308 // Add the remaining pieces of the add back into the new formula.
3309 const SCEVConstant *InnerSumSC = dyn_cast(InnerSum);
3310 if (InnerSumSC && SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 &&
3311 TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
3312 InnerSumSC->getValue()->getZExtValue())) {
3313 F.UnfoldedOffset =
3314 (uint64_t)F.UnfoldedOffset + InnerSumSC->getValue()->getZExtValue();
3315 if (IsScaledReg)
3316 F.ScaledReg = nullptr;
3317 else
3318 F.BaseRegs.erase(F.BaseRegs.begin() + Idx);
3319 } else if (IsScaledReg)
3320 F.ScaledReg = InnerSum;
3321 else
3322 F.BaseRegs[Idx] = InnerSum;
3323
3324 // Add J as its own register, or an unfolded immediate.
3325 const SCEVConstant *SC = dyn_cast(*J);
3326 if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
3327 TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
3328 SC->getValue()->getZExtValue()))
3329 F.UnfoldedOffset =
3330 (uint64_t)F.UnfoldedOffset + SC->getValue()->getZExtValue();
3331 else
3332 F.BaseRegs.push_back(*J);
3333 // We may have changed the number of register in base regs, adjust the
3334 // formula accordingly.
3335 F.Canonicalize();
3336
3337 if (InsertFormula(LU, LUIdx, F))
3338 // If that formula hadn't been seen before, recurse to find more like
3339 // it.
3340 GenerateReassociations(LU, LUIdx, LU.Formulae.back(), Depth + 1);
3341 }
3342 }
3343
31513344 /// GenerateReassociations - Split out subexpressions from adds and the bases of
31523345 /// addrecs.
31533346 void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
3154 Formula Base,
3155 unsigned Depth) {
3347 Formula Base, unsigned Depth) {
3348 assert(Base.isCanonical() && "Input must be in the canonical form");
31563349 // Arbitrarily cap recursion to protect compile time.
3157 if (Depth >= 3) return;
3158
3159 for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
3160 const SCEV *BaseReg = Base.BaseRegs[i];
3161
3162 SmallVector AddOps;
3163 const SCEV *Remainder = CollectSubexprs(BaseReg, nullptr, AddOps, L, SE);
3164 if (Remainder)
3165 AddOps.push_back(Remainder);
3166
3167 if (AddOps.size() == 1) continue;
3168
3169 for (SmallVectorImpl::const_iterator J = AddOps.begin(),
3170 JE = AddOps.end(); J != JE; ++J) {
3171
3172 // Loop-variant "unknown" values are uninteresting; we won't be able to
3173 // do anything meaningful with them.
3174 if (isa(*J) && !SE.isLoopInvariant(*J, L))
3175 continue;
3176
3177 // Don't pull a constant into a register if the constant could be folded
3178 // into an immediate field.
3179 if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
3180 LU.AccessTy, *J, Base.getNumRegs() > 1))
3181 continue;
3182
3183 // Collect all operands except *J.
3184 SmallVector InnerAddOps(
3185 ((const SmallVector &)AddOps).begin(), J);
3186 InnerAddOps.append(std::next(J),
3187 ((const SmallVector &)AddOps).end());
3188
3189 // Don't leave just a constant behind in a register if the constant could
3190 // be folded into an immediate field.
3191 if (InnerAddOps.size() == 1 &&
3192 isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
3193 LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1))
3194 continue;
3195
3196 const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
3197 if (InnerSum->isZero())
3198 continue;
3199 Formula F = Base;
3200
3201 // Add the remaining pieces of the add back into the new formula.
3202 const SCEVConstant *InnerSumSC = dyn_cast(InnerSum);
3203 if (InnerSumSC &&
3204 SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 &&
3205 TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
3206 InnerSumSC->getValue()->getZExtValue())) {
3207 F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
3208 InnerSumSC->getValue()->getZExtValue();
3209 F.BaseRegs.erase(F.BaseRegs.begin() + i);
3210 } else
3211 F.BaseRegs[i] = InnerSum;
3212
3213 // Add J as its own register, or an unfolded immediate.
3214 const SCEVConstant *SC = dyn_cast(*J);
3215 if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
3216 TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
3217 SC->getValue()->getZExtValue()))
3218 F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
3219 SC->getValue()->getZExtValue();
3220 else
3221 F.BaseRegs.push_back(*J);
3222
3223 if (InsertFormula(LU, LUIdx, F))
3224 // If that formula hadn't been seen before, recurse to find more like
3225 // it.
3226 GenerateReassociations(LU, LUIdx, LU.Formulae.back(), Depth+1);
3227 }
3228 }
3350 if (Depth >= 3)
3351 return;
3352
3353 for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
3354 GenerateReassociationsImpl(LU, LUIdx, Base, Depth, i);
3355
3356 if (Base.Scale == 1)
3357 GenerateReassociationsImpl(LU, LUIdx, Base, Depth,
3358 /* Idx */ -1, /* IsScaledReg */ true);
32293359 }
32303360
32313361 /// GenerateCombinations - Generate a formula consisting of all of the
32333363 void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
32343364 Formula Base) {
32353365 // This method is only interesting on a plurality of registers.
3236 if (Base.BaseRegs.size() <= 1) return;
3237
3366 if (Base.BaseRegs.size() + (Base.Scale == 1) <= 1)
3367 return;
3368
3369 // Flatten the representation, i.e., reg1 + 1*reg2 => reg1 + reg2, before
3370 // processing the formula.
3371 Base.Unscale();
32383372 Formula F = Base;
32393373 F.BaseRegs.clear();
32403374 SmallVector Ops;
32543388 // rather than proceed with zero in a register.
32553389 if (!Sum->isZero()) {
32563390 F.BaseRegs.push_back(Sum);
3391 F.Canonicalize();
32573392 (void)InsertFormula(LU, LUIdx, F);
32583393 }
32593394 }
3395 }
3396
3397 /// \brief Helper function for LSRInstance::GenerateSymbolicOffsets.
3398 void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,
3399 const Formula &Base, size_t Idx,
3400 bool IsScaledReg) {
3401 const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
3402 GlobalValue *GV = ExtractSymbol(G, SE);
3403 if (G->isZero() || !GV)
3404 return;
3405 Formula F = Base;
3406 F.BaseGV = GV;
3407 if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
3408 return;
3409 if (IsScaledReg)
3410 F.ScaledReg = G;
3411 else
3412 F.BaseRegs[Idx] = G;
3413 (void)InsertFormula(LU, LUIdx, F);
32603414 }
32613415
32623416 /// GenerateSymbolicOffsets - Generate reuse formulae using symbolic offsets.
32653419 // We can't add a symbolic offset if the address already contains one.
32663420 if (Base.BaseGV) return;
32673421
3268 for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
3269 const SCEV *G = Base.BaseRegs[i];
3270 GlobalValue *GV = ExtractSymbol(G, SE);
3271 if (G->isZero() || !GV)
3272 continue;
3422 for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
3423 GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, i);
3424 if (Base.Scale == 1)
3425 GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, /* Idx */ -1,
3426 /* IsScaledReg */ true);
3427 }
3428
3429 /// \brief Helper function for LSRInstance::GenerateConstantOffsets.
3430 void LSRInstance::GenerateConstantOffsetsImpl(
3431 LSRUse &LU, unsigned LUIdx, const Formula &Base,
3432 const SmallVectorImpl &Worklist, size_t Idx, bool IsScaledReg) {
3433 const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
3434 for (SmallVectorImpl::const_iterator I = Worklist.begin(),
3435 E = Worklist.end();
3436 I != E; ++I) {
32733437 Formula F = Base;
3274 F.BaseGV = GV;
3275 if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
3276 continue;
3277 F.BaseRegs[i] = G;
3278 (void)InsertFormula(LU, LUIdx, F);
3279 }
3438 F.BaseOffset = (uint64_t)Base.BaseOffset - *I;
3439 if (isLegalUse(TTI, LU.MinOffset - *I, LU.MaxOffset - *I, LU.Kind,
3440 LU.AccessTy, F)) {
3441 // Add the offset to the base register.
3442 const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G);
3443 // If it cancelled out, drop the base register, otherwise update it.
3444 if (NewG->isZero()) {
3445 if (IsScaledReg) {
3446 F.Scale = 0;
3447 F.ScaledReg = nullptr;
3448 } else
3449 F.DeleteBaseReg(F.BaseRegs[Idx]);
3450 F.Canonicalize();
3451 } else if (IsScaledReg)
3452 F.ScaledReg = NewG;
3453 else
3454 F.BaseRegs[Idx] = NewG;
3455
3456 (void)InsertFormula(LU, LUIdx, F);
3457 }
3458 }
3459
3460 int64_t Imm = ExtractImmediate(G, SE);
3461 if (G->isZero() || Imm == 0)
3462 return;
3463 Formula F = Base;
3464 F.BaseOffset = (uint64_t)F.BaseOffset + Imm;
3465 if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
3466 return;
3467 if (IsScaledReg)
3468 F.ScaledReg = G;
3469 else
3470 F.BaseRegs[Idx] = G;
3471 (void)InsertFormula(LU, LUIdx, F);
32803472 }
32813473
32823474 /// GenerateConstantOffsets - Generate reuse formulae using symbolic offsets.
32893481 if (LU.MaxOffset != LU.MinOffset)
32903482 Worklist.push_back(LU.MaxOffset);
32913483
3292 for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
3293 const SCEV *G = Base.BaseRegs[i];
3294
3295 for (SmallVectorImpl::const_iterator I = Worklist.begin(),
3296 E = Worklist.end(); I != E; ++I) {
3297 Formula F = Base;
3298 F.BaseOffset = (uint64_t)Base.BaseOffset - *I;
3299 if (isLegalUse(TTI, LU.MinOffset - *I, LU.MaxOffset - *I, LU.Kind,
3300 LU.AccessTy, F)) {
3301 // Add the offset to the base register.
3302 const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G);
3303 // If it cancelled out, drop the base register, otherwise update it.
3304 if (NewG->isZero()) {
3305 std::swap(F.BaseRegs[i], F.BaseRegs.back());
3306 F.BaseRegs.pop_back();
3307 } else
3308 F.BaseRegs[i] = NewG;
3309
3310 (void)InsertFormula(LU, LUIdx, F);
3311 }
3312 }
3313
3314 int64_t Imm = ExtractImmediate(G, SE);
3315 if (G->isZero() || Imm == 0)
3316 continue;
3317 Formula F = Base;
3318 F.BaseOffset = (uint64_t)F.BaseOffset + Imm;
3319 if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
3320 continue;
3321 F.BaseRegs[i] = G;
3322 (void)InsertFormula(LU, LUIdx, F);
3323 }
3484 for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
3485 GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, i);
3486 if (Base.Scale == 1)
3487 GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, /* Idx */ -1,
3488 /* IsScaledReg */ true);
33243489 }
33253490
33263491 /// GenerateICmpZeroScales - For ICmpZero, check to see if we can scale up
34203585 if (!IntTy) return;
34213586
34223587 // If this Formula already has a scaled register, we can't add another one.
3423 if (Base.Scale != 0) return;
3588 // Try to unscale the formula to generate a better scale.
3589 if (Base.Scale != 0 && !Base.Unscale())
3590 return;
3591
3592 assert(Base.Scale == 0 && "Unscale did not did its job!");
34243593
34253594 // Check each interesting stride.
34263595 for (SmallSetVector::const_iterator
34613630 Formula F = Base;
34623631 F.ScaledReg = Quotient;
34633632 F.DeleteBaseReg(F.BaseRegs[i]);
3633 // The canonical representation of 1*reg is reg, which is already in
3634 // Base. In that case, do not try to insert the formula, it will be
3635 // rejected anyway.
3636 if (F.Scale == 1 && F.BaseRegs.empty())
3637 continue;
34643638 (void)InsertFormula(LU, LUIdx, F);
34653639 }
34663640 }
36253799
36263800 // TODO: Use a more targeted data structure.
36273801 for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
3628 const Formula &F = LU.Formulae[L];
3802 Formula F = LU.Formulae[L];
3803 // FIXME: The code for the scaled and unscaled registers looks
3804 // very similar but slightly different. Investigate if they
3805 // could be merged. That way, we would not have to unscale the
3806 // Formula.
3807 F.Unscale();
36293808 // Use the immediate in the scaled register.
36303809 if (F.ScaledReg == OrigReg) {
36313810 int64_t Offset = (uint64_t)F.BaseOffset + Imm * (uint64_t)F.Scale;
36513830 continue;
36523831
36533832 // OK, looks good.
3833 NewF.Canonicalize();
36543834 (void)InsertFormula(LU, LUIdx, NewF);
36553835 } else {
36563836 // Use the immediate in a base register.
36843864 goto skip_formula;
36853865
36863866 // Ok, looks good.
3867 NewF.Canonicalize();
36873868 (void)InsertFormula(LU, LUIdx, NewF);
36883869 break;
36893870 skip_formula:;
39374118 for (SmallVectorImpl::const_iterator I = LU.Formulae.begin(),
39384119 E = LU.Formulae.end(); I != E; ++I) {
39394120 const Formula &F = *I;
3940 if (F.BaseOffset == 0 || F.Scale != 0)
4121 if (F.BaseOffset == 0 || (F.Scale != 0 && F.Scale != 1))
39414122 continue;
39424123
39434124 LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU);
43984579 Loops, SE, DT);
43994580
44004581 if (LU.Kind == LSRUse::ICmpZero) {
4401 // An interesting way of "folding" with an icmp is to use a negated
4402 // scale, which we'll implement by inserting it into the other operand
4403 // of the icmp.
4404 assert(F.Scale == -1 &&
4405 "The only scale supported by ICmpZero uses is -1!");
4406 ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr, IP);
4582 // Expand ScaleReg as if it was part of the base regs.
4583 if (F.Scale == 1)
4584 Ops.push_back(
4585 SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, IP)));
4586 else {
4587 // An interesting way of "folding" with an icmp is to use a negated
4588 // scale, which we'll implement by inserting it into the other operand
4589 // of the icmp.
4590 assert(F.Scale == -1 &&
4591 "The only scale supported by ICmpZero uses is -1!");
4592 ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr, IP);
4593 }
44074594 } else {
44084595 // Otherwise just expand the scaled register and an explicit scale,
44094596 // which is expected to be matched as part of the address.
44104597
44114598 // Flush the operand list to suppress SCEVExpander hoisting address modes.
4412 if (!Ops.empty() && LU.Kind == LSRUse::Address) {
4599 // Unless the addressing mode will not be folded.
4600 if (!Ops.empty() && LU.Kind == LSRUse::Address &&
4601 isAMCompletelyFolded(TTI, LU, F)) {
44134602 Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
44144603 Ops.clear();
44154604 Ops.push_back(SE.getUnknown(FullV));
44164605 }
44174606 ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, IP));
4418 ScaledS = SE.getMulExpr(ScaledS,
4419 SE.getConstant(ScaledS->getType(), F.Scale));
4607 if (F.Scale != 1)
4608 ScaledS =
4609 SE.getMulExpr(ScaledS, SE.getConstant(ScaledS->getType(), F.Scale));
44204610 Ops.push_back(ScaledS);
44214611 }
44224612 }
44944684 }
44954685 CI->setOperand(1, ICmpScaledV);
44964686 } else {
4497 assert(F.Scale == 0 &&
4687 // A scale of 1 means that the scale has been expanded as part of the
4688 // base regs.
4689 assert((F.Scale == 0 || F.Scale == 1) &&
44984690 "ICmp does not support folding a global value and "
44994691 "a scale at the same time!");
45004692 Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy),
0 ; RUN: opt -S -loop-reduce < %s | FileCheck %s
11 ; Complex addressing mode are costly.
22 ; Make loop-reduce prefer unscaled accesses.
3 ; On X86, reg1 + 1*reg2 has the same cost as reg1 + 8*reg2.
4 ; Therefore, LSR currently prefers to fold as much computation as possible
5 ; in the addressing mode.
36 ;
47 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
58 target triple = "x86_64-apple-macosx"
1720 %tmp = add nsw i64 %indvars.iv, -1
1821 %arrayidx = getelementptr inbounds double* %b, i64 %tmp
1922 %tmp1 = load double* %arrayidx, align 8
20 ; The induction variable should carry the scaling factor: 1 * 8 = 8.
21 ; CHECK: [[IVNEXT]] = add nuw nsw i64 [[IV]], 8
23 ; The induction variable should carry the scaling factor: 1.
24 ; CHECK: [[IVNEXT]] = add nuw nsw i64 [[IV]], 1
2225 %indvars.iv.next = add i64 %indvars.iv, 1
2326 %arrayidx2 = getelementptr inbounds double* %c, i64 %indvars.iv.next
2427 %tmp2 = load double* %arrayidx2, align 8
2629 %arrayidx4 = getelementptr inbounds double* %a, i64 %indvars.iv
2730 store double %mul, double* %arrayidx4, align 8
2831 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
29 ; Comparison should be 19 * 8 = 152.
30 ; CHECK: icmp eq i32 {{%[^,]+}}, 152
32 ; Comparison should be 19 * 1 = 19.
33 ; CHECK: icmp eq i32 {{%[^,]+}}, 19
3134 %exitcond = icmp eq i32 %lftr.wideiv, 20
3235 br i1 %exitcond, label %for.end, label %for.body
3336
44
55 ; CHECK-LABEL: count_up
66 ; CHECK-NOT: {{and|movz|sar|shl}}
7 ; CHECK: addq $8,
7 ; CHECK: incq
88 ; CHECK-NOT: {{and|movz|sar|shl}}
99 ; CHECK: jne
1010 define void @count_up(double* %d, i64 %n) nounwind {
7070
7171 ; CHECK-LABEL: count_up_signed
7272 ; CHECK-NOT: {{and|movz|sar|shl}}
73 ; CHECK: addq $8,
73 ; CHECK: incq
7474 ; CHECK-NOT: {{and|movz|sar|shl}}
7575 ; CHECK: jne
7676 define void @count_up_signed(double* %d, i64 %n) nounwind {
241241
242242 ; CHECK-LABEL: another_count_down_signed
243243 ; CHECK-NOT: {{and|movz|sar|shl}}
244 ; CHECK: addq $-8,
244 ; CHECK: decq
245245 ; CHECK-NOT: {{and|movz|sar|shl}}
246246 ; CHECK: jne
247247 define void @another_count_down_signed(double* %d, i64 %n) nounwind {