84#include "llvm/Config/llvm-config.h"
133#define DEBUG_TYPE "loop-reduce"
150 cl::desc(
"Enable LSR phi elimination"));
155 cl::desc(
"Add instruction count to a LSR cost model"));
160 cl::desc(
"Narrow LSR complex solution using"
161 " expectation of registers number"));
167 cl::desc(
"Narrow LSR search space by filtering non-optimal formulae"
168 " with the same ScaledReg and Scale"));
172 cl::desc(
"A flag that overrides the target's preferred addressing mode."),
175 "Don't prefer any addressing mode"),
178 "Prefer pre-indexed addressing mode"),
181 "Prefer post-indexed addressing mode")));
185 cl::init(std::numeric_limits<uint16_t>::max()),
186 cl::desc(
"LSR search space complexity limit"));
190 cl::desc(
"The limit on recursion depth for LSRs setup cost"));
194 cl::desc(
"Attempt to replace primary IV with other IV."));
198 cl::desc(
"Attempt to drop solution if it is less profitable"));
201 "Number of terminating condition fold recognized and performed");
207 cl::desc(
"Stress test LSR IV chains"));
216 static const unsigned UnknownAddressSpace =
217 std::numeric_limits<unsigned>::max();
219 Type *MemTy =
nullptr;
220 unsigned AddrSpace = UnknownAddressSpace;
222 MemAccessTy() =
default;
223 MemAccessTy(
Type *Ty,
unsigned AS) : MemTy(Ty), AddrSpace(AS) {}
226 return MemTy ==
Other.MemTy && AddrSpace ==
Other.AddrSpace;
232 unsigned AS = UnknownAddressSpace) {
252#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
254 OS <<
"[NumUses=" << UsedByIndices.count() <<
']';
268 RegUsesTy RegUsesMap;
272 void countRegister(
const SCEV *Reg,
size_t LUIdx);
273 void dropRegister(
const SCEV *Reg,
size_t LUIdx);
274 void swapAndDropUse(
size_t LUIdx,
size_t LastLUIdx);
276 bool isRegUsedByUsesOtherThan(
const SCEV *Reg,
size_t LUIdx)
const;
294RegUseTracker::countRegister(
const SCEV *Reg,
size_t LUIdx) {
295 std::pair<RegUsesTy::iterator, bool> Pair =
296 RegUsesMap.insert(std::make_pair(Reg, RegSortData()));
297 RegSortData &RSD = Pair.first->second;
300 RSD.UsedByIndices.resize(std::max(RSD.UsedByIndices.size(), LUIdx + 1));
301 RSD.UsedByIndices.set(LUIdx);
305RegUseTracker::dropRegister(
const SCEV *Reg,
size_t LUIdx) {
306 RegUsesTy::iterator It = RegUsesMap.find(Reg);
307 assert(It != RegUsesMap.end());
308 RegSortData &RSD = It->second;
309 assert(RSD.UsedByIndices.size() > LUIdx);
310 RSD.UsedByIndices.reset(LUIdx);
314RegUseTracker::swapAndDropUse(
size_t LUIdx,
size_t LastLUIdx) {
315 assert(LUIdx <= LastLUIdx);
319 for (
auto &Pair : RegUsesMap) {
321 if (LUIdx < UsedByIndices.
size())
322 UsedByIndices[LUIdx] =
323 LastLUIdx < UsedByIndices.
size() ? UsedByIndices[LastLUIdx] :
false;
324 UsedByIndices.
resize(std::min(UsedByIndices.
size(), LastLUIdx));
329RegUseTracker::isRegUsedByUsesOtherThan(
const SCEV *Reg,
size_t LUIdx)
const {
330 RegUsesTy::const_iterator
I = RegUsesMap.find(Reg);
331 if (
I == RegUsesMap.end())
335 if (i == -1)
return false;
336 if ((
size_t)i != LUIdx)
return true;
341 RegUsesTy::const_iterator
I = RegUsesMap.find(Reg);
342 assert(
I != RegUsesMap.end() &&
"Unknown register!");
343 return I->second.UsedByIndices;
346void RegUseTracker::clear() {
360 int64_t BaseOffset = 0;
363 bool HasBaseReg =
false;
386 const SCEV *ScaledReg =
nullptr;
391 int64_t UnfoldedOffset = 0;
399 void canonicalize(
const Loop &L);
403 bool hasZeroEnd()
const;
405 size_t getNumRegs()
const;
408 void deleteBaseReg(
const SCEV *&S);
410 bool referencesReg(
const SCEV *S)
const;
411 bool hasRegsUsedByUsesOtherThan(
size_t LUIdx,
412 const RegUseTracker &RegUses)
const;
433 for (
const SCEV *S :
Add->operands())
440 if (!AR->getStart()->isZero() && AR->isAffine()) {
443 AR->getStepRecurrence(SE),
459 const SCEV *NegOne = SE.
getSCEV(ConstantInt::getAllOnesValue(
461 for (
const SCEV *S : MyGood)
463 for (
const SCEV *S : MyBad)
482 BaseRegs.push_back(Sum);
488 BaseRegs.push_back(Sum);
496 return isa<SCEVAddRecExpr>(S) && (cast<SCEVAddRecExpr>(S)->getLoop() == &L);
503bool Formula::isCanonical(
const Loop &L)
const {
505 return BaseRegs.size() <= 1;
510 if (Scale == 1 && BaseRegs.empty())
530void Formula::canonicalize(
const Loop &L) {
534 if (BaseRegs.empty()) {
536 assert(ScaledReg &&
"Expected 1*reg => reg");
537 assert(Scale == 1 &&
"Expected 1*reg => reg");
538 BaseRegs.push_back(ScaledReg);
546 ScaledReg = BaseRegs.pop_back_val();
557 if (
I != BaseRegs.end())
567bool Formula::unscale() {
571 BaseRegs.push_back(ScaledReg);
576bool Formula::hasZeroEnd()
const {
577 if (UnfoldedOffset || BaseOffset)
579 if (BaseRegs.size() != 1 || ScaledReg)
586size_t Formula::getNumRegs()
const {
587 return !!ScaledReg + BaseRegs.size();
592Type *Formula::getType()
const {
593 return !BaseRegs.empty() ? BaseRegs.front()->getType() :
594 ScaledReg ? ScaledReg->getType() :
595 BaseGV ? BaseGV->getType() :
600void Formula::deleteBaseReg(
const SCEV *&S) {
601 if (&S != &BaseRegs.back())
607bool Formula::referencesReg(
const SCEV *S)
const {
613bool Formula::hasRegsUsedByUsesOtherThan(
size_t LUIdx,
614 const RegUseTracker &RegUses)
const {
616 if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))
618 for (
const SCEV *BaseReg : BaseRegs)
619 if (RegUses.isRegUsedByUsesOtherThan(BaseReg, LUIdx))
624#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
629 BaseGV->printAsOperand(
OS,
false);
631 if (BaseOffset != 0) {
635 for (
const SCEV *BaseReg : BaseRegs) {
637 OS <<
"reg(" << *BaseReg <<
')';
639 if (HasBaseReg && BaseRegs.empty()) {
641 OS <<
"**error: HasBaseReg**";
642 }
else if (!HasBaseReg && !BaseRegs.empty()) {
644 OS <<
"**error: !HasBaseReg**";
648 OS << Scale <<
"*reg(";
655 if (UnfoldedOffset != 0) {
657 OS <<
"imm(" << UnfoldedOffset <<
')';
698 bool IgnoreSignificantBits =
false) {
709 if (
RA.isAllOnes()) {
723 const APInt &LA =
C->getAPInt();
732 if ((IgnoreSignificantBits ||
isAddRecSExtable(AR, SE)) && AR->isAffine()) {
734 IgnoreSignificantBits);
735 if (!Step)
return nullptr;
737 IgnoreSignificantBits);
738 if (!Start)
return nullptr;
751 for (
const SCEV *S :
Add->operands()) {
753 if (!
Op)
return nullptr;
769 dyn_cast<SCEVConstant>(MulRHS->getOperand(0));
784 IgnoreSignificantBits)) {
803 if (
C->getAPInt().getSignificantBits() <= 64) {
805 return C->getValue()->getSExtValue();
813 }
else if (
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
828 if (
const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
829 if (
GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) {
839 }
else if (
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
855 bool isAddress = isa<LoadInst>(Inst);
856 if (
StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
857 if (SI->getPointerOperand() == OperandVal)
859 }
else if (
IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
862 switch (II->getIntrinsicID()) {
863 case Intrinsic::memset:
864 case Intrinsic::prefetch:
865 case Intrinsic::masked_load:
866 if (II->getArgOperand(0) == OperandVal)
869 case Intrinsic::masked_store:
870 if (II->getArgOperand(1) == OperandVal)
873 case Intrinsic::memmove:
874 case Intrinsic::memcpy:
875 if (II->getArgOperand(0) == OperandVal ||
876 II->getArgOperand(1) == OperandVal)
882 if (IntrInfo.
PtrVal == OperandVal)
887 }
else if (
AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
888 if (RMW->getPointerOperand() == OperandVal)
891 if (CmpX->getPointerOperand() == OperandVal)
900 MemAccessTy AccessTy = MemAccessTy::getUnknown(Inst->
getContext());
907 if (
const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
908 AccessTy.AddrSpace = SI->getPointerAddressSpace();
909 }
else if (
const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
910 AccessTy.AddrSpace = LI->getPointerAddressSpace();
911 }
else if (
const AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
912 AccessTy.AddrSpace = RMW->getPointerAddressSpace();
914 AccessTy.AddrSpace = CmpX->getPointerAddressSpace();
915 }
else if (
IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
916 switch (II->getIntrinsicID()) {
917 case Intrinsic::prefetch:
918 case Intrinsic::memset:
919 AccessTy.AddrSpace = II->getArgOperand(0)->getType()->getPointerAddressSpace();
920 AccessTy.MemTy = OperandVal->
getType();
922 case Intrinsic::memmove:
923 case Intrinsic::memcpy:
925 AccessTy.MemTy = OperandVal->
getType();
927 case Intrinsic::masked_load:
931 case Intrinsic::masked_store:
933 II->getArgOperand(1)->getType()->getPointerAddressSpace();
993 if (!Processed.
insert(S).second)
997 for (
const SCEV *S :
Add->operands()) {
1013 Value *UVal = U->getValue();
1017 if (UI && UI->
getOpcode() == Instruction::Mul &&
1051 const LSRUse &LU,
const Formula &
F);
1055 const LSRUse &LU,
const Formula &
F,
1062 const Loop *
L =
nullptr;
1072 L(
L), SE(&SE),
TTI(&
TTI), AMK(AMK) {
1090 return ((
C.Insns |
C.NumRegs |
C.AddRecCost |
C.NumIVMuls |
C.NumBaseAdds
1091 |
C.ImmCost |
C.SetupCost |
C.ScaleCost) != ~0u)
1092 || ((
C.Insns &
C.NumRegs &
C.AddRecCost &
C.NumIVMuls &
C.NumBaseAdds
1093 &
C.ImmCost &
C.SetupCost &
C.ScaleCost) == ~0u);
1099 return C.NumRegs == ~0
u;
1102 void RateFormula(
const Formula &
F,
1112 void RateRegister(
const Formula &
F,
const SCEV *
Reg,
1114 void RatePrimaryRegister(
const Formula &
F,
const SCEV *
Reg,
1127 Value *OperandValToReplace =
nullptr;
1139 LSRFixup() =
default;
1141 bool isUseFullyOutsideLoop(
const Loop *L)
const;
1149struct UniquifierDenseMapInfo {
1152 V.push_back(
reinterpret_cast<const SCEV *
>(-1));
1158 V.push_back(
reinterpret_cast<const SCEV *
>(-2));
1194 MemAccessTy AccessTy;
1200 int64_t MinOffset = std::numeric_limits<int64_t>::max();
1201 int64_t MaxOffset = std::numeric_limits<int64_t>::min();
1205 bool AllFixupsOutsideLoop =
true;
1212 bool RigidFormula =
false;
1218 Type *WidestFixupType =
nullptr;
1228 LSRUse(KindType K, MemAccessTy AT) :
Kind(
K), AccessTy(AT) {}
1230 LSRFixup &getNewFixup() {
1231 Fixups.push_back(LSRFixup());
1235 void pushFixup(LSRFixup &f) {
1237 if (
f.Offset > MaxOffset)
1238 MaxOffset =
f.Offset;
1239 if (
f.Offset < MinOffset)
1240 MinOffset =
f.Offset;
1243 bool HasFormulaWithSameRegs(
const Formula &
F)
const;
1244 float getNotSelectedProbability(
const SCEV *Reg)
const;
1245 bool InsertFormula(
const Formula &
F,
const Loop &L);
1246 void DeleteFormula(Formula &
F);
1247 void RecomputeRegs(
size_t LUIdx, RegUseTracker &Reguses);
1256 LSRUse::KindType Kind, MemAccessTy AccessTy,
1258 bool HasBaseReg, int64_t Scale,
1262 if (isa<SCEVUnknown>(Reg) || isa<SCEVConstant>(Reg))
1266 if (
const auto *S = dyn_cast<SCEVAddRecExpr>(Reg))
1268 if (
auto S = dyn_cast<SCEVIntegralCastExpr>(Reg))
1270 if (
auto S = dyn_cast<SCEVNAryExpr>(Reg))
1272 [&](
unsigned i,
const SCEV *Reg) {
1273 return i + getSetupCost(Reg, Depth - 1);
1275 if (
auto S = dyn_cast<SCEVUDivExpr>(Reg))
1282void Cost::RateRegister(
const Formula &
F,
const SCEV *Reg,
1288 if (AR->getLoop() != L) {
1295 if (!AR->getLoop()->contains(L)) {
1305 unsigned LoopCost = 1;
1312 if (
auto *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)))
1313 if (Step->getAPInt() ==
F.BaseOffset)
1316 const SCEV *LoopStep = AR->getStepRecurrence(*SE);
1317 if (isa<SCEVConstant>(LoopStep)) {
1318 const SCEV *LoopStart = AR->getStart();
1319 if (!isa<SCEVConstant>(LoopStart) &&
1325 C.AddRecCost += LoopCost;
1329 if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) {
1330 if (!Regs.
count(AR->getOperand(1))) {
1331 RateRegister(
F, AR->getOperand(1), Regs);
1343 C.SetupCost = std::min<unsigned>(
C.SetupCost, 1 << 16);
1345 C.NumIVMuls += isa<SCEVMulExpr>(Reg) &&
1352void Cost::RatePrimaryRegister(
const Formula &
F,
const SCEV *Reg,
1355 if (LoserRegs && LoserRegs->
count(Reg)) {
1359 if (Regs.
insert(Reg).second) {
1360 RateRegister(
F, Reg, Regs);
1361 if (LoserRegs && isLoser())
1366void Cost::RateFormula(
const Formula &
F,
1373 assert(
F.isCanonical(*L) &&
"Cost is accurate only for canonical formula");
1375 unsigned PrevAddRecCost =
C.AddRecCost;
1376 unsigned PrevNumRegs =
C.NumRegs;
1377 unsigned PrevNumBaseAdds =
C.NumBaseAdds;
1378 if (
const SCEV *ScaledReg =
F.ScaledReg) {
1379 if (VisitedRegs.
count(ScaledReg)) {
1383 RatePrimaryRegister(
F, ScaledReg, Regs, LoserRegs);
1387 for (
const SCEV *BaseReg :
F.BaseRegs) {
1388 if (VisitedRegs.
count(BaseReg)) {
1392 RatePrimaryRegister(
F, BaseReg, Regs, LoserRegs);
1398 size_t NumBaseParts =
F.getNumRegs();
1399 if (NumBaseParts > 1)
1404 C.NumBaseAdds += (
F.UnfoldedOffset != 0);
1410 for (
const LSRFixup &
Fixup : LU.Fixups) {
1411 int64_t
O =
Fixup.Offset;
1421 if (LU.Kind == LSRUse::Address &&
Offset != 0 &&
1438 if (
C.NumRegs > TTIRegNum) {
1441 if (PrevNumRegs > TTIRegNum)
1442 C.Insns += (
C.NumRegs - PrevNumRegs);
1444 C.Insns += (
C.NumRegs - TTIRegNum);
1457 if (LU.Kind == LSRUse::ICmpZero && !
F.hasZeroEnd() &&
1461 C.Insns += (
C.AddRecCost - PrevAddRecCost);
1464 if (LU.Kind != LSRUse::ICmpZero)
1465 C.Insns +=
C.NumBaseAdds - PrevNumBaseAdds;
1471 C.Insns = std::numeric_limits<unsigned>::max();
1472 C.NumRegs = std::numeric_limits<unsigned>::max();
1473 C.AddRecCost = std::numeric_limits<unsigned>::max();
1474 C.NumIVMuls = std::numeric_limits<unsigned>::max();
1475 C.NumBaseAdds = std::numeric_limits<unsigned>::max();
1476 C.ImmCost = std::numeric_limits<unsigned>::max();
1477 C.SetupCost = std::numeric_limits<unsigned>::max();
1478 C.ScaleCost = std::numeric_limits<unsigned>::max();
1482bool Cost::isLess(
const Cost &
Other)
const {
1484 C.Insns !=
Other.C.Insns)
1485 return C.Insns <
Other.C.Insns;
1489#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1492 OS <<
C.Insns <<
" instruction" << (
C.Insns == 1 ?
" " :
"s ");
1493 OS <<
C.NumRegs <<
" reg" << (
C.NumRegs == 1 ?
"" :
"s");
1494 if (
C.AddRecCost != 0)
1495 OS <<
", with addrec cost " <<
C.AddRecCost;
1496 if (
C.NumIVMuls != 0)
1497 OS <<
", plus " <<
C.NumIVMuls <<
" IV mul"
1498 << (
C.NumIVMuls == 1 ?
"" :
"s");
1499 if (
C.NumBaseAdds != 0)
1500 OS <<
", plus " <<
C.NumBaseAdds <<
" base add"
1501 << (
C.NumBaseAdds == 1 ?
"" :
"s");
1502 if (
C.ScaleCost != 0)
1503 OS <<
", plus " <<
C.ScaleCost <<
" scale cost";
1505 OS <<
", plus " <<
C.ImmCost <<
" imm cost";
1506 if (
C.SetupCost != 0)
1507 OS <<
", plus " <<
C.SetupCost <<
" setup cost";
1516bool LSRFixup::isUseFullyOutsideLoop(
const Loop *L)
const {
1518 if (
const PHINode *PN = dyn_cast<PHINode>(UserInst)) {
1519 for (
unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
1520 if (PN->getIncomingValue(i) == OperandValToReplace &&
1521 L->contains(PN->getIncomingBlock(i)))
1526 return !
L->contains(UserInst);
1529#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1533 if (
StoreInst *Store = dyn_cast<StoreInst>(UserInst)) {
1535 Store->getOperand(0)->printAsOperand(
OS,
false);
1536 }
else if (UserInst->getType()->isVoidTy())
1537 OS << UserInst->getOpcodeName();
1539 UserInst->printAsOperand(
OS,
false);
1541 OS <<
", OperandValToReplace=";
1542 OperandValToReplace->printAsOperand(
OS,
false);
1544 for (
const Loop *PIL : PostIncLoops) {
1545 OS <<
", PostIncLoop=";
1546 PIL->getHeader()->printAsOperand(
OS,
false);
1560bool LSRUse::HasFormulaWithSameRegs(
const Formula &
F)
const {
1562 if (
F.ScaledReg)
Key.push_back(
F.ScaledReg);
1565 return Uniquifier.
count(Key);
1569float LSRUse::getNotSelectedProbability(
const SCEV *Reg)
const {
1571 for (
const Formula &
F : Formulae)
1572 if (
F.referencesReg(Reg))
1574 return ((
float)(Formulae.size() - FNum)) / Formulae.size();
1579bool LSRUse::InsertFormula(
const Formula &
F,
const Loop &L) {
1580 assert(
F.isCanonical(L) &&
"Invalid canonical representation");
1582 if (!Formulae.empty() && RigidFormula)
1586 if (
F.ScaledReg)
Key.push_back(
F.ScaledReg);
1590 if (!Uniquifier.
insert(Key).second)
1594 assert((!
F.ScaledReg || !
F.ScaledReg->isZero()) &&
1595 "Zero allocated in a scaled register!");
1597 for (
const SCEV *BaseReg :
F.BaseRegs)
1598 assert(!BaseReg->
isZero() &&
"Zero allocated in a base register!");
1602 Formulae.push_back(
F);
1605 Regs.
insert(
F.BaseRegs.begin(),
F.BaseRegs.end());
1613void LSRUse::DeleteFormula(Formula &
F) {
1614 if (&
F != &Formulae.back())
1616 Formulae.pop_back();
1620void LSRUse::RecomputeRegs(
size_t LUIdx, RegUseTracker &RegUses) {
1624 for (
const Formula &
F : Formulae) {
1625 if (
F.ScaledReg) Regs.
insert(
F.ScaledReg);
1626 Regs.
insert(
F.BaseRegs.begin(),
F.BaseRegs.end());
1630 for (
const SCEV *S : OldRegs)
1632 RegUses.dropRegister(S, LUIdx);
1635#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1637 OS <<
"LSR Use: Kind=";
1639 case Basic:
OS <<
"Basic";
break;
1640 case Special:
OS <<
"Special";
break;
1641 case ICmpZero:
OS <<
"ICmpZero";
break;
1643 OS <<
"Address of ";
1644 if (AccessTy.MemTy->isPointerTy())
1647 OS << *AccessTy.MemTy;
1650 OS <<
" in addrspace(" << AccessTy.AddrSpace <<
')';
1653 OS <<
", Offsets={";
1654 bool NeedComma =
false;
1655 for (
const LSRFixup &
Fixup : Fixups) {
1656 if (NeedComma)
OS <<
',';
1662 if (AllFixupsOutsideLoop)
1663 OS <<
", all-fixups-outside-loop";
1665 if (WidestFixupType)
1666 OS <<
", widest fixup type: " << *WidestFixupType;
1675 LSRUse::KindType Kind, MemAccessTy AccessTy,
1677 bool HasBaseReg, int64_t Scale,
1680 case LSRUse::Address:
1682 HasBaseReg, Scale, AccessTy.AddrSpace,
Fixup);
1684 case LSRUse::ICmpZero:
1691 if (Scale != 0 && HasBaseReg && BaseOffset != 0)
1696 if (Scale != 0 && Scale != -1)
1701 if (BaseOffset != 0) {
1709 BaseOffset = -(
uint64_t)BaseOffset;
1718 return !BaseGV && Scale == 0 && BaseOffset == 0;
1720 case LSRUse::Special:
1722 return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset == 0;
1729 int64_t MinOffset, int64_t MaxOffset,
1730 LSRUse::KindType Kind, MemAccessTy AccessTy,
1732 bool HasBaseReg, int64_t Scale) {
1734 if (((int64_t)((
uint64_t)BaseOffset + MinOffset) > BaseOffset) !=
1737 MinOffset = (
uint64_t)BaseOffset + MinOffset;
1738 if (((int64_t)((
uint64_t)BaseOffset + MaxOffset) > BaseOffset) !=
1741 MaxOffset = (
uint64_t)BaseOffset + MaxOffset;
1744 HasBaseReg, Scale) &&
1750 int64_t MinOffset, int64_t MaxOffset,
1751 LSRUse::KindType Kind, MemAccessTy AccessTy,
1752 const Formula &
F,
const Loop &L) {
1760 assert((
F.isCanonical(L) ||
F.Scale != 0));
1762 F.BaseGV,
F.BaseOffset,
F.HasBaseReg,
F.Scale);
1767 int64_t MaxOffset, LSRUse::KindType Kind,
1769 int64_t BaseOffset,
bool HasBaseReg, int64_t Scale) {
1772 BaseOffset, HasBaseReg, Scale) ||
1777 BaseGV, BaseOffset,
true, 0));
1781 int64_t MaxOffset, LSRUse::KindType Kind,
1782 MemAccessTy AccessTy,
const Formula &
F) {
1783 return isLegalUse(
TTI, MinOffset, MaxOffset, Kind, AccessTy,
F.BaseGV,
1784 F.BaseOffset,
F.HasBaseReg,
F.Scale);
1788 const LSRUse &LU,
const Formula &
F) {
1791 for (
const LSRFixup &
Fixup : LU.Fixups)
1793 (
F.BaseOffset +
Fixup.Offset),
F.HasBaseReg,
1794 F.Scale,
Fixup.UserInst))
1800 LU.AccessTy,
F.BaseGV,
F.BaseOffset,
F.HasBaseReg,
1805 const LSRUse &LU,
const Formula &
F,
1814 return F.Scale != 1;
1817 case LSRUse::Address: {
1820 LU.AccessTy.MemTy,
F.BaseGV,
1822 F.Scale, LU.AccessTy.AddrSpace);
1824 LU.AccessTy.MemTy,
F.BaseGV,
1826 F.Scale, LU.AccessTy.AddrSpace);
1829 "Legal addressing mode has an illegal cost!");
1830 return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);
1832 case LSRUse::ICmpZero:
1834 case LSRUse::Special:
1844 LSRUse::KindType Kind, MemAccessTy AccessTy,
1848 if (BaseOffset == 0 && !BaseGV)
return true;
1852 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
1856 if (!HasBaseReg && Scale == 1) {
1867 int64_t MaxOffset, LSRUse::KindType Kind,
1868 MemAccessTy AccessTy,
const SCEV *S,
1871 if (S->
isZero())
return true;
1879 if (!S->
isZero())
return false;
1882 if (BaseOffset == 0 && !BaseGV)
return true;
1886 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
1889 BaseOffset, HasBaseReg, Scale);
1906 const SCEV *IncExpr;
1909 : UserInst(
U), IVOperand(
O), IncExpr(E) {}
1916 const SCEV *ExprBase =
nullptr;
1918 IVChain() =
default;
1919 IVChain(
const IVInc &Head,
const SCEV *
Base)
1920 : Incs(1, Head), ExprBase(
Base) {}
1927 return std::next(Incs.
begin());
1934 bool hasIncs()
const {
return Incs.
size() >= 2; }
1943 bool isProfitableIncrement(
const SCEV *OperExpr,
1944 const SCEV *IncExpr,
1969 bool Changed =
false;
1996 RegUseTracker RegUses;
2001 static const unsigned MaxChains = 8;
2012 void OptimizeShadowIV();
2015 void OptimizeLoopTermCond();
2019 void FinalizeChain(IVChain &Chain);
2020 void CollectChains();
2021 void GenerateIVChain(
const IVChain &Chain,
2024 void CollectInterestingTypesAndFactors();
2025 void CollectFixupsAndInitialFormulae();
2031 bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
bool HasBaseReg,
2032 LSRUse::KindType Kind, MemAccessTy AccessTy);
2034 std::pair<size_t, int64_t> getUse(
const SCEV *&Expr, LSRUse::KindType Kind,
2035 MemAccessTy AccessTy);
2037 void DeleteUse(LSRUse &LU,
size_t LUIdx);
2039 LSRUse *FindUseWithSimilarFormula(
const Formula &
F,
const LSRUse &OrigLU);
2041 void InsertInitialFormula(
const SCEV *S, LSRUse &LU,
size_t LUIdx);
2042 void InsertSupplementalFormula(
const SCEV *S, LSRUse &LU,
size_t LUIdx);
2043 void CountRegisters(
const Formula &
F,
size_t LUIdx);
2044 bool InsertFormula(LSRUse &LU,
unsigned LUIdx,
const Formula &
F);
2046 void CollectLoopInvariantFixupsAndFormulae();
2048 void GenerateReassociations(LSRUse &LU,
unsigned LUIdx, Formula
Base,
2049 unsigned Depth = 0);
2051 void GenerateReassociationsImpl(LSRUse &LU,
unsigned LUIdx,
2053 size_t Idx,
bool IsScaledReg =
false);
2054 void GenerateCombinations(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2055 void GenerateSymbolicOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
2056 const Formula &
Base,
size_t Idx,
2057 bool IsScaledReg =
false);
2058 void GenerateSymbolicOffsets(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2059 void GenerateConstantOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
2060 const Formula &
Base,
2062 size_t Idx,
bool IsScaledReg =
false);
2063 void GenerateConstantOffsets(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2064 void GenerateICmpZeroScales(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2065 void GenerateScales(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2066 void GenerateTruncates(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2067 void GenerateCrossUseConstantOffsets();
2068 void GenerateAllReuseFormulae();
2070 void FilterOutUndesirableDedicatedRegisters();
2072 size_t EstimateSearchSpaceComplexity()
const;
2073 void NarrowSearchSpaceByDetectingSupersets();
2074 void NarrowSearchSpaceByCollapsingUnrolledCode();
2075 void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
2076 void NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
2077 void NarrowSearchSpaceByFilterPostInc();
2078 void NarrowSearchSpaceByDeletingCostlyFormulas();
2079 void NarrowSearchSpaceByPickingWinnerRegs();
2080 void NarrowSearchSpaceUsingHeuristics();
2085 const Cost &CurCost,
2095 const LSRUse &LU)
const;
2097 Value *Expand(
const LSRUse &LU,
const LSRFixup &LF,
const Formula &
F,
2100 void RewriteForPHI(
PHINode *PN,
const LSRUse &LU,
const LSRFixup &LF,
2103 void Rewrite(
const LSRUse &LU,
const LSRFixup &LF,
const Formula &
F,
2112 bool getChanged()
const {
return Changed; }
2114 return ScalarEvolutionIVs;
2128void LSRInstance::OptimizeShadowIV() {
2130 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2138 Type *DestTy =
nullptr;
2139 bool IsSigned =
false;
2153 if (
UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) {
2155 DestTy = UCast->getDestTy();
2157 else if (
SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) {
2159 DestTy = SCast->getDestTy();
2161 if (!DestTy)
continue;
2181 if (Mantissa == -1)
continue;
2185 unsigned Entry, Latch;
2195 if (!
Init)
continue;
2196 Constant *NewInit = ConstantFP::get(DestTy, IsSigned ?
2197 (
double)
Init->getSExtValue() :
2198 (
double)
Init->getZExtValue());
2202 if (!Incr)
continue;
2203 if (Incr->
getOpcode() != Instruction::Add
2204 && Incr->
getOpcode() != Instruction::Sub)
2220 if (!
C->getValue().isStrictlyPositive())
2227 Constant *CFP = ConstantFP::get(DestTy,
C->getZExtValue());
2229 Incr->
getOpcode() == Instruction::Add ? Instruction::FAdd
2230 : Instruction::FSub,
2248 if (
U.getUser() ==
Cond) {
2316 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2321 const SCEV *IterationCount = SE.
getAddExpr(One, BackedgeTakenCount);
2322 if (IterationCount != SE.
getSCEV(Sel))
return Cond;
2329 if (
const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) {
2330 Pred = ICmpInst::ICMP_SLE;
2332 }
else if (
const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(IterationCount)) {
2333 Pred = ICmpInst::ICMP_SLT;
2335 }
else if (
const SCEVUMaxExpr *U = dyn_cast<SCEVUMaxExpr>(IterationCount)) {
2336 Pred = ICmpInst::ICMP_ULT;
2345 if (
Max->getNumOperands() != 2)
2348 const SCEV *MaxLHS =
Max->getOperand(0);
2349 const SCEV *MaxRHS =
Max->getOperand(1);
2354 (ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->
isZero() : (MaxLHS != One)))
2367 "Loop condition operand is an addrec in a different loop!");
2371 Value *NewRHS =
nullptr;
2372 if (ICmpInst::isTrueWhenEqual(Pred)) {
2375 if (
ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
2376 if (BO1->isOne() && SE.
getSCEV(BO->getOperand(0)) == MaxRHS)
2377 NewRHS = BO->getOperand(0);
2379 if (
ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
2380 if (BO1->isOne() && SE.
getSCEV(BO->getOperand(0)) == MaxRHS)
2381 NewRHS = BO->getOperand(0);
2388 else if (
const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(MaxRHS))
2389 NewRHS = SU->getValue();
2402 Cond->getOperand(0), NewRHS,
"scmp");
2406 Cond->replaceAllUsesWith(NewCond);
2409 Cond->eraseFromParent();
2411 if (
Cmp->use_empty())
2412 Cmp->eraseFromParent();
2418LSRInstance::OptimizeLoopTermCond() {
2435 L->getExitingBlocks(ExitingBlocks);
2443 for (
BasicBlock *ExitingBlock : ExitingBlocks) {
2449 BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
2459 if (!FindIVUserForCond(
Cond, CondUse))
2473 if (!DT.dominates(ExitingBlock, LatchBlock))
2478 if (LatchBlock != ExitingBlock)
2482 if (&*UI != CondUse &&
2483 !DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) {
2486 const SCEV *
A = IU.getStride(*CondUse, L);
2487 const SCEV *
B = IU.getStride(*UI, L);
2488 if (!
A || !
B)
continue;
2501 if (
C->isOne() ||
C->isMinusOne())
2502 goto decline_post_inc;
2504 if (
C->getValue().getSignificantBits() >= 64 ||
2505 C->getValue().isMinSignedValue())
2506 goto decline_post_inc;
2510 TTI, UI->getUser(), UI->getOperandValToReplace());
2511 int64_t Scale =
C->getSExtValue();
2515 AccessTy.AddrSpace))
2516 goto decline_post_inc;
2521 AccessTy.AddrSpace))
2522 goto decline_post_inc;
2527 LLVM_DEBUG(
dbgs() <<
" Change loop exiting icmp to use postinc iv: "
2533 if (
Cond->getNextNonDebugInstruction() != TermBr) {
2534 if (
Cond->hasOneUse()) {
2535 Cond->moveBefore(TermBr);
2539 Cond = cast<ICmpInst>(
Cond->clone());
2540 Cond->setName(
L->getHeader()->getName() +
".termcond");
2562 IVIncInsertPos =
L->getLoopLatch()->getTerminator();
2564 IVIncInsertPos = DT.findNearestCommonDominator(IVIncInsertPos, Inst);
2569bool LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
2570 bool HasBaseReg, LSRUse::KindType Kind,
2571 MemAccessTy AccessTy) {
2572 int64_t NewMinOffset = LU.MinOffset;
2573 int64_t NewMaxOffset = LU.MaxOffset;
2574 MemAccessTy NewAccessTy = AccessTy;
2579 if (LU.Kind != Kind)
2585 if (Kind == LSRUse::Address) {
2586 if (AccessTy.MemTy != LU.AccessTy.MemTy) {
2587 NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->getContext(),
2588 AccessTy.AddrSpace);
2593 if (NewOffset < LU.MinOffset) {
2595 LU.MaxOffset - NewOffset, HasBaseReg))
2597 NewMinOffset = NewOffset;
2598 }
else if (NewOffset > LU.MaxOffset) {
2600 NewOffset - LU.MinOffset, HasBaseReg))
2602 NewMaxOffset = NewOffset;
2606 LU.MinOffset = NewMinOffset;
2607 LU.MaxOffset = NewMaxOffset;
2608 LU.AccessTy = NewAccessTy;
2615std::pair<size_t, int64_t> LSRInstance::getUse(
const SCEV *&Expr,
2616 LSRUse::KindType Kind,
2617 MemAccessTy AccessTy) {
2628 std::pair<UseMapTy::iterator, bool>
P =
2632 size_t LUIdx =
P.first->second;
2633 LSRUse &LU =
Uses[LUIdx];
2634 if (reconcileNewOffset(LU,
Offset,
true, Kind, AccessTy))
2636 return std::make_pair(LUIdx,
Offset);
2640 size_t LUIdx =
Uses.size();
2641 P.first->second = LUIdx;
2642 Uses.push_back(LSRUse(Kind, AccessTy));
2643 LSRUse &LU =
Uses[LUIdx];
2647 return std::make_pair(LUIdx,
Offset);
2651void LSRInstance::DeleteUse(LSRUse &LU,
size_t LUIdx) {
2652 if (&LU != &
Uses.back())
2657 RegUses.swapAndDropUse(LUIdx,
Uses.size());
2663LSRInstance::FindUseWithSimilarFormula(
const Formula &OrigF,
2664 const LSRUse &OrigLU) {
2666 for (LSRUse &LU :
Uses) {
2672 if (&LU != &OrigLU &&
2673 LU.Kind != LSRUse::ICmpZero &&
2674 LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
2675 LU.WidestFixupType == OrigLU.WidestFixupType &&
2676 LU.HasFormulaWithSameRegs(OrigF)) {
2678 for (
const Formula &
F : LU.Formulae) {
2681 if (
F.BaseRegs == OrigF.BaseRegs &&
2682 F.ScaledReg == OrigF.ScaledReg &&
2683 F.BaseGV == OrigF.BaseGV &&
2684 F.Scale == OrigF.Scale &&
2685 F.UnfoldedOffset == OrigF.UnfoldedOffset) {
2686 if (
F.BaseOffset == 0)
2701void LSRInstance::CollectInterestingTypesAndFactors() {
2707 const SCEV *Expr = IU.getExpr(U);
2725 }
while (!Worklist.
empty());
2730 I = Strides.
begin(), E = Strides.
end();
I != E; ++
I)
2732 std::next(
I); NewStrideIter != E; ++NewStrideIter) {
2733 const SCEV *OldStride = *
I;
2734 const SCEV *NewStride = *NewStrideIter;
2745 dyn_cast_or_null<SCEVConstant>(
getExactSDiv(NewStride, OldStride,
2747 if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
2748 Factors.insert(Factor->getAPInt().getSExtValue());
2753 if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
2754 Factors.insert(Factor->getAPInt().getSExtValue());
2760 if (
Types.size() == 1)
2772 for(; OI != OE; ++OI) {
2773 if (
Instruction *Oper = dyn_cast<Instruction>(*OI)) {
2778 dyn_cast<SCEVAddRecExpr>(SE.
getSCEV(Oper))) {
2790 if (
TruncInst *Trunc = dyn_cast<TruncInst>(Oper))
2791 return Trunc->getOperand(0);
2813 return getExprBase(cast<SCEVTruncateExpr>(S)->getOperand());
2815 return getExprBase(cast<SCEVZeroExtendExpr>(S)->getOperand());
2817 return getExprBase(cast<SCEVSignExtendExpr>(S)->getOperand());
2824 if (SubExpr->getSCEVType() ==
scAddExpr)
2827 if (SubExpr->getSCEVType() !=
scMulExpr)
2833 return getExprBase(cast<SCEVAddRecExpr>(S)->getStart());
2843bool IVChain::isProfitableIncrement(
const SCEV *OperExpr,
2844 const SCEV *IncExpr,
2852 if (!isa<SCEVConstant>(IncExpr)) {
2854 if (isa<SCEVConstant>(SE.
getMinusSCEV(OperExpr, HeadExpr)))
2879 if (!Chain.hasIncs())
2882 if (!
Users.empty()) {
2883 LLVM_DEBUG(
dbgs() <<
"Chain: " << *Chain.Incs[0].UserInst <<
" users:\n";
2885 :
Users) {
dbgs() <<
" " << *Inst <<
"\n"; });
2888 assert(!Chain.Incs.empty() &&
"empty IV chains are not allowed");
2896 if (isa<PHINode>(Chain.tailUserInst())
2897 && SE.
getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
2900 const SCEV *LastIncExpr =
nullptr;
2901 unsigned NumConstIncrements = 0;
2902 unsigned NumVarIncrements = 0;
2903 unsigned NumReusedIncrements = 0;
2908 for (
const IVInc &Inc : Chain) {
2911 if (Inc.IncExpr->isZero())
2916 if (isa<SCEVConstant>(Inc.IncExpr)) {
2917 ++NumConstIncrements;
2921 if (Inc.IncExpr == LastIncExpr)
2922 ++NumReusedIncrements;
2926 LastIncExpr = Inc.IncExpr;
2931 if (NumConstIncrements > 1)
2938 cost += NumVarIncrements;
2942 cost -= NumReusedIncrements;
2944 LLVM_DEBUG(
dbgs() <<
"Chain: " << *Chain.Incs[0].UserInst <<
" Cost: " << cost
2961 unsigned ChainIdx = 0, NChains = IVChainVec.size();
2962 const SCEV *LastIncExpr =
nullptr;
2963 for (; ChainIdx < NChains; ++ChainIdx) {
2964 IVChain &Chain = IVChainVec[ChainIdx];
2978 if (isa<PHINode>(UserInst) && isa<PHINode>(Chain.tailUserInst()))
2984 if (isa<SCEVCouldNotCompute>(IncExpr) || !SE.
isLoopInvariant(IncExpr, L))
2987 if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {
2988 LastIncExpr = IncExpr;
2994 if (ChainIdx == NChains) {
2995 if (isa<PHINode>(UserInst))
3001 LastIncExpr = OperExpr;
3005 if (!isa<SCEVAddRecExpr>(LastIncExpr))
3008 IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr),
3010 ChainUsersVec.
resize(NChains);
3011 LLVM_DEBUG(
dbgs() <<
"IV Chain#" << ChainIdx <<
" Head: (" << *UserInst
3012 <<
") IV=" << *LastIncExpr <<
"\n");
3014 LLVM_DEBUG(
dbgs() <<
"IV Chain#" << ChainIdx <<
" Inc: (" << *UserInst
3015 <<
") IV+" << *LastIncExpr <<
"\n");
3017 IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
3019 IVChain &Chain = IVChainVec[ChainIdx];
3023 if (!LastIncExpr->
isZero()) {
3024 ChainUsersVec[ChainIdx].FarUsers.
insert(NearUsers.
begin(),
3040 IVChain::const_iterator IncIter = Chain.Incs.begin();
3041 IVChain::const_iterator IncEnd = Chain.Incs.end();
3042 for( ; IncIter != IncEnd; ++IncIter) {
3043 if (IncIter->UserInst == OtherUse)
3046 if (IncIter != IncEnd)
3050 && !isa<SCEVUnknown>(SE.
getSCEV(OtherUse))
3051 && IU.isIVUserOrOperand(OtherUse)) {
3054 NearUsers.
insert(OtherUse);
3059 ChainUsersVec[ChainIdx].FarUsers.
erase(UserInst);
3084void LSRInstance::CollectChains() {
3090 for (
DomTreeNode *Rung = DT.getNode(
L->getLoopLatch());
3091 Rung->
getBlock() != LoopHeader; Rung = Rung->getIDom()) {
3100 if (isa<PHINode>(
I) || !IU.isIVUserOrOperand(&
I))
3110 for (
unsigned ChainIdx = 0, NChains = IVChainVec.size();
3111 ChainIdx < NChains; ++ChainIdx) {
3112 ChainUsersVec[ChainIdx].NearUsers.
erase(&
I);
3118 while (IVOpIter != IVOpEnd) {
3119 Instruction *IVOpInst = cast<Instruction>(*IVOpIter);
3120 if (UniqueOperands.
insert(IVOpInst).second)
3121 ChainInstruction(&
I, IVOpInst, ChainUsersVec);
3122 IVOpIter =
findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3127 for (
PHINode &PN :
L->getHeader()->phis()) {
3132 dyn_cast<Instruction>(PN.getIncomingValueForBlock(
L->getLoopLatch()));
3134 ChainInstruction(&PN, IncV, ChainUsersVec);
3137 unsigned ChainIdx = 0;
3138 for (
unsigned UsersIdx = 0, NChains = IVChainVec.size();
3139 UsersIdx < NChains; ++UsersIdx) {
3141 ChainUsersVec[UsersIdx].FarUsers, SE,
TTI))
3144 if (ChainIdx != UsersIdx)
3145 IVChainVec[ChainIdx] = IVChainVec[UsersIdx];
3146 FinalizeChain(IVChainVec[ChainIdx]);
3149 IVChainVec.resize(ChainIdx);
3152void LSRInstance::FinalizeChain(IVChain &Chain) {
3153 assert(!Chain.Incs.empty() &&
"empty IV chains are not allowed");
3154 LLVM_DEBUG(
dbgs() <<
"Final Chain: " << *Chain.Incs[0].UserInst <<
"\n");
3156 for (
const IVInc &Inc : Chain) {
3158 auto UseI =
find(Inc.UserInst->operands(), Inc.IVOperand);
3159 assert(UseI != Inc.UserInst->op_end() &&
"cannot find IV operand");
3160 IVIncSet.insert(UseI);
3167 const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr);
3185void LSRInstance::GenerateIVChain(
const IVChain &Chain,
3189 const IVInc &Head = Chain.Incs[0];
3194 Value *IVSrc =
nullptr;
3195 while (IVOpIter != IVOpEnd) {
3206 if (SE.
getSCEV(*IVOpIter) == Head.IncExpr
3207 || SE.
getSCEV(IVSrc) == Head.IncExpr) {
3210 IVOpIter =
findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3212 if (IVOpIter == IVOpEnd) {
3214 LLVM_DEBUG(
dbgs() <<
"Concealed chain head: " << *Head.UserInst <<
"\n");
3217 assert(IVSrc &&
"Failed to find IV chain source");
3222 const SCEV *LeftOverExpr =
nullptr;
3223 for (
const IVInc &Inc : Chain) {
3225 if (isa<PHINode>(InsertPt))
3226 InsertPt =
L->getLoopLatch()->getTerminator();
3230 Value *IVOper = IVSrc;
3231 if (!Inc.IncExpr->isZero()) {
3235 LeftOverExpr = LeftOverExpr ?
3236 SE.
getAddExpr(LeftOverExpr, IncExpr) : IncExpr;
3238 if (LeftOverExpr && !LeftOverExpr->
isZero()) {
3241 Value *IncV =
Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt);
3244 IVOper =
Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
3248 assert(IVTy == IVOper->
getType() &&
"inconsistent IV increment type");
3250 LeftOverExpr =
nullptr;
3253 Type *OperTy = Inc.IVOperand->getType();
3254 if (IVTy != OperTy) {
3256 "cannot extend a chained IV");
3258 IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy,
"lsr.chain");
3260 Inc.UserInst->replaceUsesOfWith(Inc.IVOperand, IVOper);
3261 if (
auto *OperandIsInstr = dyn_cast<Instruction>(Inc.IVOperand))
3266 if (isa<PHINode>(Chain.tailUserInst())) {
3267 for (
PHINode &Phi :
L->getHeader()->phis()) {
3271 Phi.getIncomingValueForBlock(
L->getLoopLatch()));
3274 Value *IVOper = IVSrc;
3276 if (IVTy != PostIncTy) {
3278 IRBuilder<> Builder(
L->getLoopLatch()->getTerminator());
3279 Builder.SetCurrentDebugLocation(PostIncV->
getDebugLoc());
3280 IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy,
"lsr.chain");
3282 Phi.replaceUsesOfWith(PostIncV, IVOper);
3288void LSRInstance::CollectFixupsAndInitialFormulae() {
3290 bool SaveCmp =
TTI.
canSaveCmp(L, &ExitBranch, &SE, &LI, &DT, &AC, &TLI);
3302 assert(UseI != UserInst->
op_end() &&
"cannot find IV operand");
3303 if (IVIncSet.count(UseI)) {
3304 LLVM_DEBUG(
dbgs() <<
"Use is in profitable chain: " << **UseI <<
'\n');
3308 LSRUse::KindType
Kind = LSRUse::Basic;
3309 MemAccessTy AccessTy;
3311 Kind = LSRUse::Address;
3315 const SCEV *S = IU.getExpr(U);
3326 if (
ICmpInst *CI = dyn_cast<ICmpInst>(UserInst)) {
3329 if (SaveCmp && CI == dyn_cast<ICmpInst>(ExitBranch->
getCondition()))
3331 if (CI->isEquality()) {
3334 Value *
NV = CI->getOperand(1);
3335 if (NV ==
U.getOperandValToReplace()) {
3336 CI->setOperand(1, CI->getOperand(0));
3337 CI->setOperand(0, NV);
3338 NV = CI->getOperand(1);
3345 (!
NV->getType()->isPointerTy() ||
3352 Kind = LSRUse::ICmpZero;
3354 }
else if (
L->isLoopInvariant(NV) &&
3355 (!isa<Instruction>(NV) ||
3356 DT.dominates(cast<Instruction>(NV),
L->getHeader())) &&
3357 !
NV->getType()->isPointerTy()) {
3368 Kind = LSRUse::ICmpZero;
3370 assert(!isa<SCEVCouldNotCompute>(S));
3375 for (
size_t i = 0, e = Factors.size(); i != e; ++i)
3376 if (Factors[i] != -1)
3377 Factors.insert(-(
uint64_t)Factors[i]);
3383 std::pair<size_t, int64_t>
P = getUse(S, Kind, AccessTy);
3384 size_t LUIdx =
P.first;
3386 LSRUse &LU =
Uses[LUIdx];
3389 LSRFixup &LF = LU.getNewFixup();
3390 LF.UserInst = UserInst;
3391 LF.OperandValToReplace =
U.getOperandValToReplace();
3392 LF.PostIncLoops = TmpPostIncLoops;
3394 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3397 if (!VisitedLSRUse.
count(LUIdx) && !LF.isUseFullyOutsideLoop(L)) {
3399 F.initialMatch(S, L, SE);
3400 BaselineCost.RateFormula(
F, Regs, VisitedRegs, LU);
3401 VisitedLSRUse.
insert(LUIdx);
3404 if (!LU.WidestFixupType ||
3407 LU.WidestFixupType = LF.OperandValToReplace->getType();
3410 if (LU.Formulae.empty()) {
3411 InsertInitialFormula(S, LU, LUIdx);
3412 CountRegisters(LU.Formulae.back(), LUIdx);
3421void LSRInstance::InsertInitialFormula(
const SCEV *S, LSRUse &LU,
3425 LU.RigidFormula =
true;
3428 F.initialMatch(S, L, SE);
3429 bool Inserted = InsertFormula(LU, LUIdx,
F);
3430 assert(Inserted &&
"Initial formula already exists!"); (void)Inserted;
3436LSRInstance::InsertSupplementalFormula(
const SCEV *S,
3437 LSRUse &LU,
size_t LUIdx) {
3439 F.BaseRegs.push_back(S);
3440 F.HasBaseReg =
true;
3441 bool Inserted = InsertFormula(LU, LUIdx,
F);
3442 assert(Inserted &&
"Supplemental formula already exists!"); (void)Inserted;
3446void LSRInstance::CountRegisters(
const Formula &
F,
size_t LUIdx) {
3448 RegUses.countRegister(
F.ScaledReg, LUIdx);
3449 for (
const SCEV *BaseReg :
F.BaseRegs)
3450 RegUses.countRegister(BaseReg, LUIdx);
3455bool LSRInstance::InsertFormula(LSRUse &LU,
unsigned LUIdx,
const Formula &
F) {
3458 "Formula is illegal");
3460 if (!LU.InsertFormula(
F, *L))
3463 CountRegisters(
F, LUIdx);
3473LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
3482 while (!Worklist.
empty()) {
3486 if (!Visited.
insert(S).second)
3493 else if (
const SCEVUDivExpr *
D = dyn_cast<SCEVUDivExpr>(S)) {
3496 }
else if (
const SCEVUnknown *US = dyn_cast<SCEVUnknown>(S)) {
3497 const Value *
V = US->getValue();
3498 if (
const Instruction *Inst = dyn_cast<Instruction>(V)) {
3500 if (
L->contains(Inst))
continue;
3501 }
else if (isa<Constant>(V))
3504 for (
const Use &U :
V->uses()) {
3505 const Instruction *UserInst = dyn_cast<Instruction>(
U.getUser());
3517 const BasicBlock *UseBB = !isa<PHINode>(UserInst) ?
3519 cast<PHINode>(UserInst)->getIncomingBlock(
3521 if (!DT.dominates(
L->getHeader(), UseBB))
3534 if (isa<PHINode>(UserInst)) {
3535 const auto *PhiNode = cast<PHINode>(UserInst);
3536 bool HasIncompatibleEHPTerminatedBlock =
false;
3538 for (
unsigned int I = 0;
I < PhiNode->getNumIncomingValues();
I++) {
3539 if (PhiNode->getIncomingValue(
I) == ExpectedValue) {
3540 if (PhiNode->getIncomingBlock(
I)->getTerminator()->isEHPad()) {
3541 HasIncompatibleEHPTerminatedBlock =
true;
3546 if (HasIncompatibleEHPTerminatedBlock) {
3559 if (!isa<SCEVUnknown>(UserS))
3568 if (
const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
3569 unsigned OtherIdx = !
U.getOperandNo();
3570 Value *OtherOp =
const_cast<Value *
>(ICI->getOperand(OtherIdx));
3575 std::pair<size_t, int64_t>
P = getUse(
3576 S, LSRUse::Basic, MemAccessTy());
3577 size_t LUIdx =
P.first;
3579 LSRUse &LU =
Uses[LUIdx];
3580 LSRFixup &LF = LU.getNewFixup();
3581 LF.UserInst =
const_cast<Instruction *
>(UserInst);
3582 LF.OperandValToReplace =
U;
3584 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3585 if (!LU.WidestFixupType ||
3588 LU.WidestFixupType = LF.OperandValToReplace->getType();
3589 InsertSupplementalFormula(US, LU, LUIdx);
3590 CountRegisters(LU.Formulae.back(),
Uses.size() - 1);
3606 unsigned Depth = 0) {
3613 for (
const SCEV *S :
Add->operands()) {
3619 }
else if (
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
3628 if (Remainder && (AR->
getLoop() == L || !isa<SCEVAddRecExpr>(Remainder))) {
3630 Remainder =
nullptr;
3648 const SCEV *Remainder =
3661 LSRUse &LU,
const SCEV *S,
const Loop *L,
3663 if (LU.Kind != LSRUse::Address ||
3664 !LU.AccessTy.getType()->isIntOrIntVectorTy())
3670 if (!isa<SCEVConstant>(LoopStep))
3676 if (!isa<SCEVConstant>(LoopStart) && SE.
isLoopInvariant(LoopStart, L))
3683void LSRInstance::GenerateReassociationsImpl(LSRUse &LU,
unsigned LUIdx,
3684 const Formula &
Base,
3687 const SCEV *BaseReg = IsScaledReg ?
Base.ScaledReg :
Base.BaseRegs[
Idx];
3699 if (AddOps.
size() == 1)
3713 LU.AccessTy, *J,
Base.getNumRegs() > 1))
3719 InnerAddOps.append(std::next(J),
3724 if (InnerAddOps.size() == 1 &&
3726 LU.AccessTy, InnerAddOps[0],
Base.getNumRegs() > 1))
3735 const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
3742 F.ScaledReg =
nullptr;
3744 F.BaseRegs.erase(
F.BaseRegs.begin() +
Idx);
3745 }
else if (IsScaledReg)
3746 F.ScaledReg = InnerSum;
3748 F.BaseRegs[
Idx] = InnerSum;
3754 SC->getValue()->getZExtValue()))
3756 (
uint64_t)
F.UnfoldedOffset +
SC->getValue()->getZExtValue();
3758 F.BaseRegs.push_back(*J);
3763 if (InsertFormula(LU, LUIdx,
F))
3770 GenerateReassociations(LU, LUIdx, LU.Formulae.back(),
3776void LSRInstance::GenerateReassociations(LSRUse &LU,
unsigned LUIdx,
3778 assert(
Base.isCanonical(*L) &&
"Input must be in the canonical form");
3783 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
3784 GenerateReassociationsImpl(LU, LUIdx,
Base,
Depth, i);
3786 if (
Base.Scale == 1)
3787 GenerateReassociationsImpl(LU, LUIdx,
Base,
Depth,
3793void LSRInstance::GenerateCombinations(LSRUse &LU,
unsigned LUIdx,
3796 if (
Base.BaseRegs.size() + (
Base.Scale == 1) +
3797 (
Base.UnfoldedOffset != 0) <= 1)
3804 Formula NewBase =
Base;
3805 NewBase.BaseRegs.clear();
3806 Type *CombinedIntegerType =
nullptr;
3807 for (
const SCEV *BaseReg :
Base.BaseRegs) {
3810 if (!CombinedIntegerType)
3815 NewBase.BaseRegs.push_back(BaseReg);
3819 if (Ops.
size() == 0)
3824 auto GenerateFormula = [&](
const SCEV *Sum) {
3825 Formula
F = NewBase;
3833 F.BaseRegs.push_back(Sum);
3835 (void)InsertFormula(LU, LUIdx,
F);
3839 if (Ops.
size() > 1) {
3846 if (NewBase.UnfoldedOffset) {
3847 assert(CombinedIntegerType &&
"Missing a type for the unfolded offset");
3850 NewBase.UnfoldedOffset = 0;
3856void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
3857 const Formula &
Base,
size_t Idx,
3861 if (
G->isZero() || !GV)
3865 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F))
3870 F.BaseRegs[
Idx] =
G;
3871 (void)InsertFormula(LU, LUIdx,
F);
3875void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU,
unsigned LUIdx,
3878 if (
Base.BaseGV)
return;
3880 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
3881 GenerateSymbolicOffsetsImpl(LU, LUIdx,
Base, i);
3882 if (
Base.Scale == 1)
3883 GenerateSymbolicOffsetsImpl(LU, LUIdx,
Base, -1,
3888void LSRInstance::GenerateConstantOffsetsImpl(
3889 LSRUse &LU,
unsigned LUIdx,
const Formula &
Base,
3892 auto GenerateOffset = [&](
const SCEV *
G, int64_t
Offset) {
3896 if (
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F)) {
3903 F.ScaledReg =
nullptr;
3905 F.deleteBaseReg(
F.BaseRegs[
Idx]);
3907 }
else if (IsScaledReg)
3910 F.BaseRegs[
Idx] = NewG;
3912 (void)InsertFormula(LU, LUIdx,
F);
3927 if (
auto *GAR = dyn_cast<SCEVAddRecExpr>(
G)) {
3929 dyn_cast<SCEVConstant>(GAR->getStepRecurrence(SE))) {
3930 const APInt &StepInt = StepRec->getAPInt();
3934 for (int64_t
Offset : Worklist) {
3941 for (int64_t
Offset : Worklist)
3945 if (
G->isZero() || Imm == 0)
3954 F.BaseRegs[
Idx] =
G;
3959 (void)InsertFormula(LU, LUIdx,
F);
3963void LSRInstance::GenerateConstantOffsets(LSRUse &LU,
unsigned LUIdx,
3969 if (LU.MaxOffset != LU.MinOffset)
3972 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
3973 GenerateConstantOffsetsImpl(LU, LUIdx,
Base, Worklist, i);
3974 if (
Base.Scale == 1)
3975 GenerateConstantOffsetsImpl(LU, LUIdx,
Base, Worklist, -1,
3981void LSRInstance::GenerateICmpZeroScales(LSRUse &LU,
unsigned LUIdx,
3983 if (LU.Kind != LSRUse::ICmpZero)
return;
3991 if (LU.MinOffset != LU.MaxOffset)
return;
3994 if (
Base.ScaledReg &&
Base.ScaledReg->getType()->isPointerTy())
3996 for (
const SCEV *BaseReg :
Base.BaseRegs)
3999 assert(!
Base.BaseGV &&
"ICmpZero use is not legal!");
4002 for (int64_t Factor : Factors) {
4007 if (
Base.BaseOffset == std::numeric_limits<int64_t>::min() && Factor == -1)
4009 int64_t NewBaseOffset = (
uint64_t)
Base.BaseOffset * Factor;
4010 assert(Factor != 0 &&
"Zero factor not expected!");
4011 if (NewBaseOffset / Factor !=
Base.BaseOffset)
4019 int64_t
Offset = LU.MinOffset;
4020 if (
Offset == std::numeric_limits<int64_t>::min() && Factor == -1)
4023 if (
Offset / Factor != LU.MinOffset)
4031 F.BaseOffset = NewBaseOffset;
4043 for (
size_t i = 0, e =
F.BaseRegs.size(); i !=
e; ++i) {
4057 if (
F.UnfoldedOffset != 0) {
4058 if (
F.UnfoldedOffset == std::numeric_limits<int64_t>::min() &&
4061 F.UnfoldedOffset = (
uint64_t)
F.UnfoldedOffset * Factor;
4062 if (
F.UnfoldedOffset / Factor !=
Base.UnfoldedOffset)
4071 (void)InsertFormula(LU, LUIdx,
F);
4078void LSRInstance::GenerateScales(LSRUse &LU,
unsigned LUIdx, Formula
Base) {
4085 if (
Base.Scale != 0 && !
Base.unscale())
4088 assert(
Base.Scale == 0 &&
"unscale did not did its job!");
4091 for (int64_t Factor : Factors) {
4092 Base.Scale = Factor;
4093 Base.HasBaseReg =
Base.BaseRegs.size() > 1;
4095 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4099 if (LU.Kind == LSRUse::Basic &&
4100 isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,
4101 LU.AccessTy,
Base) &&
4102 LU.AllFixupsOutsideLoop)
4103 LU.Kind = LSRUse::Special;
4109 if (LU.Kind == LSRUse::ICmpZero &&
4110 !
Base.HasBaseReg &&
Base.BaseOffset == 0 && !
Base.BaseGV)
4113 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i) {
4115 if (AR && (AR->
getLoop() == L || LU.AllFixupsOutsideLoop)) {
4122 if (!Quotient->isZero()) {
4125 F.ScaledReg = Quotient;
4126 F.deleteBaseReg(
F.BaseRegs[i]);
4130 if (
F.Scale == 1 && (
F.BaseRegs.empty() ||
4131 (AR->
getLoop() != L && LU.AllFixupsOutsideLoop)))
4135 if (
F.Scale == 1 && LU.AllFixupsOutsideLoop)
4137 (void)InsertFormula(LU, LUIdx,
F);
4153 const SCEV *Result =
nullptr;
4154 for (
auto &L :
Loops) {
4158 if (!New || (Result && New != Result))
4163 assert(Result &&
"failed to create expression");
4168void LSRInstance::GenerateTruncates(LSRUse &LU,
unsigned LUIdx, Formula
Base) {
4170 if (
Base.BaseGV)
return;
4180 if (
Base.ScaledReg &&
Base.ScaledReg->getType()->isPointerTy())
4183 [](
const SCEV *S) { return S->getType()->isPointerTy(); }))
4187 for (
auto &LF : LU.Fixups)
4188 Loops.push_back(LF.PostIncLoops);
4190 for (
Type *SrcTy : Types) {
4199 const SCEV *NewScaledReg =
4201 if (!NewScaledReg || NewScaledReg->
isZero())
4203 F.ScaledReg = NewScaledReg;
4205 bool HasZeroBaseReg =
false;
4206 for (
const SCEV *&BaseReg :
F.BaseRegs) {
4207 const SCEV *NewBaseReg =
4209 if (!NewBaseReg || NewBaseReg->
isZero()) {
4210 HasZeroBaseReg =
true;
4213 BaseReg = NewBaseReg;
4220 if (!
F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
4224 (void)InsertFormula(LU, LUIdx,
F);
4237 const SCEV *OrigReg;
4240 : LUIdx(LI),
Imm(
I), OrigReg(
R) {}
4248#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4250 OS <<
"in formulae referencing " << *OrigReg <<
" in use " << LUIdx
4251 <<
" , add offset " <<
Imm;
4261void LSRInstance::GenerateCrossUseConstantOffsets() {
4263 using ImmMapTy = std::map<int64_t, const SCEV *>;
4268 for (
const SCEV *
Use : RegUses) {
4271 auto Pair =
Map.insert(std::make_pair(Reg, ImmMapTy()));
4274 Pair.first->second.insert(std::make_pair(Imm,
Use));
4275 UsedByIndicesMap[
Reg] |= RegUses.getUsedByIndices(
Use);
4283 for (
const SCEV *Reg : Sequence) {
4284 const ImmMapTy &Imms =
Map.find(Reg)->second;
4287 if (Imms.size() == 1)
4290 LLVM_DEBUG(
dbgs() <<
"Generating cross-use offsets for " << *Reg <<
':';
4291 for (
const auto &Entry
4293 <<
' ' << Entry.first;
4297 for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
4299 const SCEV *OrigReg = J->second;
4301 int64_t JImm = J->first;
4302 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);
4304 if (!isa<SCEVConstant>(OrigReg) &&
4305 UsedByIndicesMap[Reg].
count() == 1) {
4313 int64_t
First = Imms.begin()->first;
4314 int64_t
Last = std::prev(Imms.end())->first;
4321 ImmMapTy::const_iterator OtherImms[] = {
4322 Imms.begin(), std::prev(Imms.end()),
4323 Imms.lower_bound(Avg)};
4324 for (
const auto &M : OtherImms) {
4325 if (M == J || M == JE)
continue;
4331 if (UniqueItems.
insert(std::make_pair(LUIdx, Imm)).second)
4339 UsedByIndicesMap.
clear();
4340 UniqueItems.
clear();
4343 for (
const WorkItem &WI : WorkItems) {
4344 size_t LUIdx = WI.LUIdx;
4345 LSRUse &LU =
Uses[LUIdx];
4346 int64_t
Imm = WI.Imm;
4347 const SCEV *OrigReg = WI.OrigReg;
4354 for (
size_t L = 0, LE = LU.Formulae.size();
L !=
LE; ++
L) {
4355 Formula
F = LU.Formulae[
L];
4362 if (
F.ScaledReg == OrigReg) {
4369 NewF.BaseOffset =
Offset;
4370 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4373 NewF.ScaledReg = SE.
getAddExpr(NegImmS, NewF.ScaledReg);
4378 if (
const SCEVConstant *
C = dyn_cast<SCEVConstant>(NewF.ScaledReg))
4379 if (
C->getValue()->isNegative() != (NewF.BaseOffset < 0) &&
4381 .ule(std::abs(NewF.BaseOffset)))
4385 NewF.canonicalize(*this->L);
4386 (void)InsertFormula(LU, LUIdx, NewF);
4389 for (
size_t N = 0, NE =
F.BaseRegs.size();
N !=
NE; ++
N) {
4390 const SCEV *BaseReg =
F.BaseRegs[
N];
4391 if (BaseReg != OrigReg)
4394 NewF.BaseOffset = (
uint64_t)NewF.BaseOffset + Imm;
4396 LU.Kind, LU.AccessTy, NewF)) {
4403 NewF.UnfoldedOffset = (
uint64_t)NewF.UnfoldedOffset + Imm;
4405 NewF.BaseRegs[
N] = SE.
getAddExpr(NegImmS, BaseReg);
4410 for (
const SCEV *NewReg : NewF.BaseRegs)
4412 if ((
C->getAPInt() + NewF.BaseOffset)
4414 .slt(std::abs(NewF.BaseOffset)) &&
4416 (
unsigned)llvm::countr_zero<uint64_t>(NewF.BaseOffset))
4420 NewF.canonicalize(*this->L);
4421 (void)InsertFormula(LU, LUIdx, NewF);
4432LSRInstance::GenerateAllReuseFormulae() {
4435 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4436 LSRUse &LU =
Uses[LUIdx];
4437 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4438 GenerateReassociations(LU, LUIdx, LU.Formulae[i]);
4439 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4440 GenerateCombinations(LU, LUIdx, LU.Formulae[i]);
4442 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4443 LSRUse &LU =
Uses[LUIdx];
4444 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4445 GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);
4446 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4447 GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);
4448 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4449 GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);
4450 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4451 GenerateScales(LU, LUIdx, LU.Formulae[i]);
4453 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4454 LSRUse &LU =
Uses[LUIdx];
4455 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4456 GenerateTruncates(LU, LUIdx, LU.Formulae[i]);
4459 GenerateCrossUseConstantOffsets();
4462 "After generating reuse formulae:\n";
4463 print_uses(
dbgs()));
4468void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
4473 bool ChangedFormulae =
false;
4478 using BestFormulaeTy =
4481 BestFormulaeTy BestFormulae;
4483 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4484 LSRUse &LU =
Uses[LUIdx];
4489 for (
size_t FIdx = 0, NumForms = LU.Formulae.size();
4490 FIdx != NumForms; ++FIdx) {
4491 Formula &
F = LU.Formulae[FIdx];
4502 CostF.RateFormula(
F, Regs, VisitedRegs, LU, &LoserRegs);
4503 if (CostF.isLoser()) {
4515 for (
const SCEV *Reg :
F.BaseRegs) {
4516 if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
4520 RegUses.isRegUsedByUsesOtherThan(
F.ScaledReg, LUIdx))
4521 Key.push_back(
F.ScaledReg);
4526 std::pair<BestFormulaeTy::const_iterator, bool>
P =
4527 BestFormulae.insert(std::make_pair(Key, FIdx));
4531 Formula &Best = LU.Formulae[
P.first->second];
4533 Cost CostBest(L, SE,
TTI, AMK);
4535 CostBest.RateFormula(Best, Regs, VisitedRegs, LU);
4536 if (CostF.isLess(CostBest))
4540 " in favor of formula ";
4541 Best.print(
dbgs());
dbgs() <<
'\n');
4544 ChangedFormulae =
true;
4546 LU.DeleteFormula(
F);
4554 LU.RecomputeRegs(LUIdx, RegUses);
4557 BestFormulae.clear();
4562 "After filtering out undesirable candidates:\n";
4570size_t LSRInstance::EstimateSearchSpaceComplexity()
const {
4572 for (
const LSRUse &LU :
Uses) {
4573 size_t FSize = LU.Formulae.size();
4588void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
4592 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by eliminating formulae "
4593 "which use a superset of registers used by other "
4596 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4597 LSRUse &LU =
Uses[LUIdx];
4599 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
4600 Formula &
F = LU.Formulae[i];
4605 I =
F.BaseRegs.begin(), E =
F.BaseRegs.end();
I != E; ++
I) {
4610 NewF.BaseOffset += (
uint64_t)
C->getValue()->getSExtValue();
4611 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4612 (
I -
F.BaseRegs.begin()));
4613 if (LU.HasFormulaWithSameRegs(NewF)) {
4616 LU.DeleteFormula(
F);
4622 }
else if (
const SCEVUnknown *U = dyn_cast<SCEVUnknown>(*
I)) {
4623 if (
GlobalValue *GV = dyn_cast<GlobalValue>(
U->getValue()))
4627 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4628 (
I -
F.BaseRegs.begin()));
4629 if (LU.HasFormulaWithSameRegs(NewF)) {
4632 LU.DeleteFormula(
F);
4643 LU.RecomputeRegs(LUIdx, RegUses);
4652void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
4657 dbgs() <<
"The search space is too complex.\n"
4658 "Narrowing the search space by assuming that uses separated "
4659 "by a constant offset will use the same registers.\n");
4663 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4664 LSRUse &LU =
Uses[LUIdx];
4665 for (
const Formula &
F : LU.Formulae) {
4666 if (
F.BaseOffset == 0 || (
F.Scale != 0 &&
F.Scale != 1))
4669 LSRUse *LUThatHas = FindUseWithSimilarFormula(
F, LU);
4673 if (!reconcileNewOffset(*LUThatHas,
F.BaseOffset,
false,
4674 LU.Kind, LU.AccessTy))
4679 LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
4682 for (LSRFixup &
Fixup : LU.Fixups) {
4683 Fixup.Offset +=
F.BaseOffset;
4684 LUThatHas->pushFixup(
Fixup);
4690 for (
size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
4691 Formula &
F = LUThatHas->Formulae[i];
4692 if (!
isLegalUse(
TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,
4693 LUThatHas->Kind, LUThatHas->AccessTy,
F)) {
4695 LUThatHas->DeleteFormula(
F);
4703 LUThatHas->RecomputeRegs(LUThatHas - &
Uses.front(), RegUses);
4706 DeleteUse(LU, LUIdx);
4719void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
4723 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by re-filtering out "
4724 "undesirable dedicated registers.\n");
4726 FilterOutUndesirableDedicatedRegisters();
4741void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {
4746 dbgs() <<
"The search space is too complex.\n"
4747 "Narrowing the search space by choosing the best Formula "
4748 "from the Formulae with the same Scale and ScaledReg.\n");
4753 BestFormulaeTy BestFormulae;
4755 bool ChangedFormulae =
false;
4760 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4761 LSRUse &LU =
Uses[LUIdx];
4766 auto IsBetterThan = [&](Formula &FA, Formula &FB) {
4771 size_t FARegNum = 0;
4772 for (
const SCEV *Reg : FA.BaseRegs) {
4773 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);
4774 FARegNum += (NumUses - UsedByIndices.
count() + 1);
4776 size_t FBRegNum = 0;
4777 for (
const SCEV *Reg : FB.BaseRegs) {
4778 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);
4779 FBRegNum += (NumUses - UsedByIndices.
count() + 1);
4781 if (FARegNum != FBRegNum)
4782 return FARegNum < FBRegNum;
4789 CostFA.RateFormula(FA, Regs, VisitedRegs, LU);
4791 CostFB.RateFormula(FB, Regs, VisitedRegs, LU);
4792 return CostFA.isLess(CostFB);
4796 for (
size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
4798 Formula &
F = LU.Formulae[FIdx];
4801 auto P = BestFormulae.insert({{
F.ScaledReg,
F.Scale}, FIdx});
4805 Formula &Best = LU.Formulae[
P.first->second];
4806 if (IsBetterThan(
F, Best))
4810 " in favor of formula ";
4811 Best.print(
dbgs());
dbgs() <<
'\n');
4813 ChangedFormulae =
true;
4815 LU.DeleteFormula(
F);
4821 LU.RecomputeRegs(LUIdx, RegUses);
4824 BestFormulae.clear();
4829 "After filtering out undesirable candidates:\n";
4836void LSRInstance::NarrowSearchSpaceByFilterPostInc() {
4843 "Narrowing the search space by choosing the lowest "
4844 "register Formula for PostInc Uses.\n");
4846 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4847 LSRUse &LU =
Uses[LUIdx];
4849 if (LU.Kind != LSRUse::Address)
4855 size_t MinRegs = std::numeric_limits<size_t>::max();
4856 for (
const Formula &
F : LU.Formulae)
4857 MinRegs = std::min(
F.getNumRegs(), MinRegs);
4860 for (
size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
4862 Formula &
F = LU.Formulae[FIdx];
4863 if (
F.getNumRegs() > MinRegs) {
4866 LU.DeleteFormula(
F);
4873 LU.RecomputeRegs(LUIdx, RegUses);
4924void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {
4937 DenseMap <const SCEV *, float> RegNumMap;
4938 for (
const SCEV *Reg : RegUses) {
4939 if (UniqRegs.
count(Reg))
4942 for (
const LSRUse &LU :
Uses) {
4943 if (!LU.Regs.count(Reg))
4945 float P = LU.getNotSelectedProbability(Reg);
4951 RegNumMap.
insert(std::make_pair(Reg, PNotSel));
4955 dbgs() <<
"Narrowing the search space by deleting costly formulas\n");
4958 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4959 LSRUse &LU =
Uses[LUIdx];
4961 if (LU.Formulae.size() < 2)
4966 float FMinRegNum = LU.Formulae[0].getNumRegs();
4967 float FMinARegNum = LU.Formulae[0].getNumRegs();
4969 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
4970 Formula &
F = LU.Formulae[i];
4973 for (
const SCEV *BaseReg :
F.BaseRegs) {
4974 if (UniqRegs.
count(BaseReg))
4976 FRegNum += RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);
4977 if (isa<SCEVAddRecExpr>(BaseReg))
4979 RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);
4981 if (
const SCEV *ScaledReg =
F.ScaledReg) {
4982 if (!UniqRegs.
count(ScaledReg)) {
4984 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
4985 if (isa<SCEVAddRecExpr>(ScaledReg))
4987 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
4990 if (FMinRegNum > FRegNum ||
4991 (FMinRegNum == FRegNum && FMinARegNum > FARegNum)) {
4992 FMinRegNum = FRegNum;
4993 FMinARegNum = FARegNum;
4998 dbgs() <<
" with min reg num " << FMinRegNum <<
'\n');
5000 std::swap(LU.Formulae[MinIdx], LU.Formulae[0]);
5001 while (LU.Formulae.size() != 1) {
5004 LU.Formulae.pop_back();
5006 LU.RecomputeRegs(LUIdx, RegUses);
5007 assert(LU.Formulae.size() == 1 &&
"Should be exactly 1 min regs formula");
5008 Formula &
F = LU.Formulae[0];
5011 UniqRegs.
insert(
F.BaseRegs.begin(),
F.BaseRegs.end());
5024 MemAccessTy AccessType) {
5025 if (Best->
getType() != Reg->getType() ||
5026 (isa<SCEVAddRecExpr>(Best) && isa<SCEVAddRecExpr>(Reg) &&
5027 cast<SCEVAddRecExpr>(Best)->getLoop() !=
5028 cast<SCEVAddRecExpr>(Reg)->getLoop()))
5030 const auto *Diff = dyn_cast<SCEVConstant>(SE.
getMinusSCEV(Best, Reg));
5035 AccessType.MemTy,
nullptr,
5036 Diff->getAPInt().getSExtValue(),
5037 true, 0, AccessType.AddrSpace) &&
5039 AccessType.MemTy,
nullptr,
5040 -Diff->getAPInt().getSExtValue(),
5041 true, 0, AccessType.AddrSpace);
5047void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
5058 const SCEV *Best =
nullptr;
5059 unsigned BestNum = 0;
5060 for (
const SCEV *Reg : RegUses) {
5061 if (Taken.
count(Reg))
5065 BestNum = RegUses.getUsedByIndices(Reg).count();
5067 unsigned Count = RegUses.getUsedByIndices(Reg).count();
5068 if (Count > BestNum) {
5076 if (Count == BestNum) {
5077 int LUIdx = RegUses.getUsedByIndices(Reg).find_first();
5078 if (LUIdx >= 0 &&
Uses[LUIdx].Kind == LSRUse::Address &&
5080 Uses[LUIdx].AccessTy)) {
5087 assert(Best &&
"Failed to find best LSRUse candidate");
5089 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by assuming " << *Best
5090 <<
" will yield profitable reuse.\n");
5095 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5096 LSRUse &LU =
Uses[LUIdx];
5097 if (!LU.Regs.count(Best))
continue;
5100 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
5101 Formula &
F = LU.Formulae[i];
5102 if (!
F.referencesReg(Best)) {
5104 LU.DeleteFormula(
F);
5108 assert(e != 0 &&
"Use has no formulae left! Is Regs inconsistent?");
5114 LU.RecomputeRegs(LUIdx, RegUses);
5125void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
5126 NarrowSearchSpaceByDetectingSupersets();
5127 NarrowSearchSpaceByCollapsingUnrolledCode();
5128 NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
5130 NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
5131 NarrowSearchSpaceByFilterPostInc();
5133 NarrowSearchSpaceByDeletingCostlyFormulas();
5135 NarrowSearchSpaceByPickingWinnerRegs();
5142 const Cost &CurCost,
5155 const LSRUse &LU =
Uses[Workspace.
size()];
5162 for (
const SCEV *S : CurRegs)
5163 if (LU.Regs.count(S))
5167 Cost NewCost(L, SE,
TTI, AMK);
5168 for (
const Formula &
F : LU.Formulae) {
5176 int NumReqRegsToFind = std::min(
F.getNumRegs(), ReqRegs.
size());
5177 for (
const SCEV *Reg : ReqRegs) {
5178 if ((
F.ScaledReg &&
F.ScaledReg == Reg) ||
5181 if (NumReqRegsToFind == 0)
5185 if (NumReqRegsToFind != 0) {
5196 NewCost.RateFormula(
F, NewRegs, VisitedRegs, LU);
5197 if (NewCost.isLess(SolutionCost)) {
5199 if (Workspace.
size() !=
Uses.size()) {
5200 SolveRecurse(Solution, SolutionCost, Workspace, NewCost,
5201 NewRegs, VisitedRegs);
5202 if (
F.getNumRegs() == 1 && Workspace.
size() == 1)
5203 VisitedRegs.
insert(
F.ScaledReg ?
F.ScaledReg :
F.BaseRegs[0]);
5206 dbgs() <<
".\nRegs:\n";
5207 for (
const SCEV *S : NewRegs)
dbgs()
5208 <<
"- " << *S <<
"\n";
5211 SolutionCost = NewCost;
5212 Solution = Workspace;
5223 Cost SolutionCost(L, SE,
TTI, AMK);
5224 SolutionCost.Lose();
5225 Cost CurCost(L, SE,
TTI, AMK);
5231 SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
5232 CurRegs, VisitedRegs);
5233 if (Solution.
empty()) {
5240 "The chosen solution requires ";
5241 SolutionCost.print(
dbgs());
dbgs() <<
":\n";
5242 for (
size_t i = 0, e =
Uses.size(); i != e; ++i) {
5247 Solution[i]->print(
dbgs());
5253 if (BaselineCost.isLess(SolutionCost)) {
5256 dbgs() <<
"Baseline is more profitable than chosen solution, "
5257 "add option 'lsr-drop-solution' to drop LSR solution.\n");
5260 "solution, dropping LSR solution.\n";);
5275 bool AllDominate =
true;
5279 if (isa<CatchSwitchInst>(Tentative))
5283 if (Inst == Tentative || !DT.dominates(Inst, Tentative)) {
5284 AllDominate =
false;
5290 (!BetterPos || !DT.dominates(Inst, BetterPos)))
5300 const Loop *IPLoop = LI.getLoopFor(IP->getParent());
5301 unsigned IPLoopDepth = IPLoop ? IPLoop->
getLoopDepth() : 0;
5304 for (
DomTreeNode *Rung = DT.getNode(IP->getParent()); ; ) {
5305 if (!Rung)
return IP;
5306 Rung = Rung->getIDom();
5307 if (!Rung)
return IP;
5308 IDom = Rung->getBlock();
5311 const Loop *IDomLoop = LI.getLoopFor(IDom);
5312 unsigned IDomDepth = IDomLoop ? IDomLoop->
getLoopDepth() : 0;
5313 if (IDomDepth <= IPLoopDepth &&
5314 (IDomDepth != IPLoopDepth || IDomLoop == IPLoop))
5332 if (
Instruction *
I = dyn_cast<Instruction>(LF.OperandValToReplace))
5334 if (LU.Kind == LSRUse::ICmpZero)
5336 dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1)))
5338 if (LF.PostIncLoops.count(L)) {
5339 if (LF.isUseFullyOutsideLoop(L))
5340 Inputs.
push_back(
L->getLoopLatch()->getTerminator());
5346 for (
const Loop *PIL : LF.PostIncLoops) {
5347 if (PIL == L)
continue;
5352 if (!ExitingBlocks.
empty()) {
5354 for (
unsigned i = 1, e = ExitingBlocks.
size(); i != e; ++i)
5355 BB = DT.findNearestCommonDominator(BB, ExitingBlocks[i]);
5360 assert(!isa<PHINode>(LowestIP) && !LowestIP->isEHPad()
5361 && !isa<DbgInfoIntrinsic>(LowestIP) &&
5362 "Insertion point must be a normal instruction");
5369 while (isa<PHINode>(IP)) ++IP;
5372 while (IP->isEHPad()) ++IP;
5375 while (isa<DbgInfoIntrinsic>(IP)) ++IP;
5380 while (
Rewriter.isInsertedInstruction(&*IP) && IP != LowestIP)
5388Value *LSRInstance::Expand(
const LSRUse &LU,
const LSRFixup &LF,
5391 if (LU.RigidFormula)
5392 return LF.OperandValToReplace;
5396 IP = AdjustInsertPositionForExpand(IP, LF, LU);
5401 Rewriter.setPostInc(LF.PostIncLoops);
5404 Type *OpTy = LF.OperandValToReplace->getType();
5406 Type *Ty =
F.getType();
5420 for (
const SCEV *Reg :
F.BaseRegs) {
5421 assert(!
Reg->isZero() &&
"Zero allocated in a base register!");
5429 Value *ICmpScaledV =
nullptr;
5431 const SCEV *ScaledS =
F.ScaledReg;
5437 if (LU.Kind == LSRUse::ICmpZero) {
5447 "The only scale supported by ICmpZero uses is -1!");
5448 ICmpScaledV =
Rewriter.expandCodeFor(ScaledS,
nullptr);
5456 if (!Ops.
empty() && LU.Kind == LSRUse::Address &&
5492 if (LU.Kind == LSRUse::ICmpZero) {
5499 ICmpScaledV = ConstantInt::get(IntTy,
Offset);
5509 int64_t UnfoldedOffset =
F.UnfoldedOffset;
5510 if (UnfoldedOffset != 0) {
5528 if (LU.Kind == LSRUse::ICmpZero) {
5529 ICmpInst *CI = cast<ICmpInst>(LF.UserInst);
5530 if (
auto *OperandIsInstr = dyn_cast<Instruction>(CI->
getOperand(1)))
5532 assert(!
F.BaseGV &&
"ICmp does not support folding a global value and "
5533 "a scale at the same time!");
5534 if (
F.Scale == -1) {
5535 if (ICmpScaledV->
getType() != OpTy) {
5545 assert((
F.Scale == 0 ||
F.Scale == 1) &&
5546 "ICmp does not support folding a global value and "
5547 "a scale at the same time!");
5550 if (
C->getType() != OpTy) {
5554 assert(
C &&
"Cast of ConstantInt should have folded");
5567void LSRInstance::RewriteForPHI(
5568 PHINode *PN,
const LSRUse &LU,
const LSRFixup &LF,
const Formula &
F,
5580 bool needUpdateFixups =
false;
5591 Loop *PNLoop = LI.getLoopFor(Parent);
5592 if (!PNLoop || Parent != PNLoop->
getHeader()) {
5599 .setMergeIdenticalEdges()
5600 .setKeepOneInputPHIs());
5614 if (
L->contains(BB) && !
L->contains(PN))
5622 needUpdateFixups =
true;
5627 std::pair<DenseMap<BasicBlock *, Value *>::iterator,
bool> Pair =
5628 Inserted.insert(std::make_pair(BB,
static_cast<Value *
>(
nullptr)));
5636 Type *OpTy = LF.OperandValToReplace->getType();
5640 LF.OperandValToReplace->getType(),
"tmp",
5646 if (
auto *
I = dyn_cast<Instruction>(FullV))
5647 if (
L->contains(
I) && !
L->contains(BB))
5651 Pair.first->second = FullV;
5658 if (needUpdateFixups) {
5659 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx)
5660 for (LSRFixup &
Fixup :
Uses[LUIdx].Fixups)
5664 if (
Fixup.UserInst == PN) {
5667 bool foundInOriginalPHI =
false;
5669 if (val ==
Fixup.OperandValToReplace) {
5670 foundInOriginalPHI =
true;
5675 if (foundInOriginalPHI)
5686 if (val ==
Fixup.OperandValToReplace)
5687 Fixup.UserInst = NewPN;
5699void LSRInstance::Rewrite(
const LSRUse &LU,
const LSRFixup &LF,
5704 if (
PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {
5705 RewriteForPHI(PN, LU, LF,
F, DeadInsts);
5707 Value *FullV = Expand(LU, LF,
F, LF.UserInst->getIterator(), DeadInsts);
5710 Type *OpTy = LF.OperandValToReplace->getType();
5711 if (FullV->
getType() != OpTy) {
5714 FullV, OpTy,
"tmp", LF.UserInst->getIterator());
5723 if (LU.Kind == LSRUse::ICmpZero)
5726 LF.UserInst->replaceUsesOfWith(LF.OperandValToReplace, FullV);
5729 if (
auto *OperandIsInstr = dyn_cast<Instruction>(LF.OperandValToReplace))
5739 if (LU.Kind != LSRUse::Address)
5746 if (IVIncInsertPos->
getParent() == LHeader)
5749 if (!
Fixup.OperandValToReplace ||
5751 Instruction *UI = cast<Instruction>(U);
5752 return UI->getParent() != LHeader;
5757 Type *Ty =
I->getType();
5765void LSRInstance::ImplementSolution(
5772 for (
const IVChain &Chain : IVChainVec) {
5773 if (
PHINode *PN = dyn_cast<PHINode>(Chain.tailUserInst()))
5778 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx)
5779 for (
const LSRFixup &
Fixup :
Uses[LUIdx].Fixups) {
5782 ?
L->getHeader()->getTerminator()
5784 Rewriter.setIVIncInsertPos(L, InsertPos);
5785 Rewrite(
Uses[LUIdx],
Fixup, *Solution[LUIdx], DeadInsts);
5789 for (
const IVChain &Chain : IVChainVec) {
5790 GenerateIVChain(Chain, DeadInsts);
5796 ScalarEvolutionIVs.push_back(
IV);
5812 for (
PHINode &PN :
L->getHeader()->phis()) {
5814 Value *Start =
nullptr, *Step =
nullptr;
5819 case Instruction::Sub:
5824 case Instruction::Add:
5830 if (!isa<Constant>(Step))
5841 [&](
Use &U) {return DT.dominates(IVIncInsertPos, U);}))
5854 : IU(IU), SE(SE), DT(DT), LI(LI), AC(AC), TLI(TLI),
TTI(
TTI),
L(
L),
5857 :
TTI.getPreferredAddressingMode(
L, &SE)),
5858 Rewriter(SE,
L->getHeader()->getModule()->getDataLayout(),
"lsr",
false),
5859 BaselineCost(
L, SE,
TTI, AMK) {
5861 if (!
L->isLoopSimplifyForm())
5865 if (IU.
empty())
return;
5869 unsigned NumUsers = 0;
5873 LLVM_DEBUG(
dbgs() <<
"LSR skipping loop, too many IV Users in " << U
5880 if (
auto *PN = dyn_cast<PHINode>(
U.getUser())) {
5882 if (isa<FuncletPadInst>(FirstNonPHI) ||
5883 isa<CatchSwitchInst>(FirstNonPHI))
5885 if (isa<CatchSwitchInst>(PredBB->getFirstNonPHI()))
5891 L->getHeader()->printAsOperand(
dbgs(),
false);
5904 OptimizeLoopTermCond();
5907 if (IU.empty())
return;
5910 if (!
L->isInnermost()) {
5923 CollectInterestingTypesAndFactors();
5924 CollectFixupsAndInitialFormulae();
5925 CollectLoopInvariantFixupsAndFormulae();
5931 print_uses(
dbgs()));
5933 BaselineCost.print(
dbgs());
dbgs() <<
"\n");
5937 GenerateAllReuseFormulae();
5939 FilterOutUndesirableDedicatedRegisters();
5940 NarrowSearchSpaceUsingHeuristics();
5950 if (Solution.
empty())
5955 for (
const LSRUse &LU :
Uses) {
5956 for (
const Formula &
F : LU.Formulae)
5958 F) &&
"Illegal formula generated!");
5963 ImplementSolution(Solution);
5966#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
5967void LSRInstance::print_factors_and_types(
raw_ostream &
OS)
const {
5968 if (Factors.empty() &&
Types.empty())
return;
5970 OS <<
"LSR has identified the following interesting factors and types: ";
5973 for (int64_t Factor : Factors) {
5976 OS <<
'*' << Factor;
5979 for (
Type *Ty : Types) {
5982 OS <<
'(' << *Ty <<
')';
5988 OS <<
"LSR is examining the following fixup sites:\n";
5989 for (
const LSRUse &LU :
Uses)
5990 for (
const LSRFixup &LF : LU.Fixups) {
5998 OS <<
"LSR is examining the following uses:\n";
5999 for (
const LSRUse &LU :
Uses) {
6003 for (
const Formula &
F : LU.Formulae) {
6012 print_factors_and_types(
OS);
6024class LoopStrengthReduce :
public LoopPass {
6028 LoopStrengthReduce();
6037LoopStrengthReduce::LoopStrengthReduce() :
LoopPass(
ID) {
6041void LoopStrengthReduce::getAnalysisUsage(
AnalysisUsage &AU)
const {
6073 return {Begin,
End};
6076struct SCEVDbgValueBuilder {
6077 SCEVDbgValueBuilder() =
default;
6078 SCEVDbgValueBuilder(
const SCEVDbgValueBuilder &
Base) { clone(
Base); }
6080 void clone(
const SCEVDbgValueBuilder &
Base) {
6081 LocationOps =
Base.LocationOps;
6086 LocationOps.clear();
6103 unsigned ArgIndex = 0;
6104 if (It != LocationOps.
end()) {
6105 ArgIndex = std::distance(LocationOps.
begin(), It);
6107 ArgIndex = LocationOps.
size();
6119 if (
C->getAPInt().getSignificantBits() > 64)
6121 Expr.
push_back(llvm::dwarf::DW_OP_consts);
6122 Expr.
push_back(
C->getAPInt().getSExtValue());
6129 return ToDwarfOpIter(Expr);
6136 assert((isa<llvm::SCEVAddExpr>(CommExpr) || isa<SCEVMulExpr>(CommExpr)) &&
6137 "Expected arithmetic SCEV type");
6139 unsigned EmitOperator = 0;
6140 for (
const auto &
Op : CommExpr->
operands()) {
6143 if (EmitOperator >= 1)
6144 pushOperator(DwarfOp);
6155 bool Success = pushSCEV(Inner);
6157 IsSigned ? llvm::dwarf::DW_ATE_signed
6158 : llvm::dwarf::DW_ATE_unsigned};
6159 for (
const auto &
Op : CastOps)
6167 if (
const SCEVConstant *StartInt = dyn_cast<SCEVConstant>(S)) {
6168 Success &= pushConst(StartInt);
6170 }
else if (
const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
6173 pushLocation(
U->getValue());
6175 }
else if (
const SCEVMulExpr *MulRec = dyn_cast<SCEVMulExpr>(S)) {
6176 Success &= pushArithmeticExpr(MulRec, llvm::dwarf::DW_OP_mul);
6178 }
else if (
const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
6179 Success &= pushSCEV(UDiv->getLHS());
6180 Success &= pushSCEV(UDiv->getRHS());
6181 pushOperator(llvm::dwarf::DW_OP_div);
6183 }
else if (
const SCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(S)) {
6185 assert((isa<SCEVZeroExtendExpr>(Cast) || isa<SCEVTruncateExpr>(Cast) ||
6186 isa<SCEVPtrToIntExpr>(Cast) || isa<SCEVSignExtendExpr>(Cast)) &&
6187 "Unexpected cast type in SCEV.");
6188 Success &= pushCast(Cast, (isa<SCEVSignExtendExpr>(Cast)));
6190 }
else if (
const SCEVAddExpr *AddExpr = dyn_cast<SCEVAddExpr>(S)) {
6191 Success &= pushArithmeticExpr(AddExpr, llvm::dwarf::DW_OP_plus);
6193 }
else if (isa<SCEVAddRecExpr>(S)) {
6208 if (
C->getAPInt().getSignificantBits() > 64)
6210 int64_t
I =
C->getAPInt().getSExtValue();
6212 case llvm::dwarf::DW_OP_plus:
6213 case llvm::dwarf::DW_OP_minus:
6215 case llvm::dwarf::DW_OP_mul:
6216 case llvm::dwarf::DW_OP_div:
6232 if (isa<SCEVAddRecExpr>(SAR.
getStart()))
6239 if (!isIdentityFunction(llvm::dwarf::DW_OP_mul, Stride)) {
6240 if (!pushSCEV(Stride))
6242 pushOperator(llvm::dwarf::DW_OP_mul);
6244 if (!isIdentityFunction(llvm::dwarf::DW_OP_plus, Start)) {
6245 if (!pushSCEV(Start))
6247 pushOperator(llvm::dwarf::DW_OP_plus);
6253 void createOffsetExpr(int64_t
Offset,
Value *OffsetValue) {
6254 pushLocation(OffsetValue);
6257 dbgs() <<
"scev-salvage: Generated IV offset expression. Offset: "
6258 << std::to_string(
Offset) <<
"\n");
6264 bool createIterCountExpr(
const SCEV *S,
6265 const SCEVDbgValueBuilder &IterationCount,
6272 if (!isa<SCEVAddRecExpr>(S))
6275 LLVM_DEBUG(
dbgs() <<
"scev-salvage: Location to salvage SCEV: " << *S
6278 const auto *Rec = cast<SCEVAddRecExpr>(S);
6279 if (!Rec->isAffine())
6287 clone(IterationCount);
6288 if (!SCEVToValueExpr(*Rec, SE))
6302 if (isa<SCEVAddRecExpr>(SAR.
getStart())) {
6303 LLVM_DEBUG(
dbgs() <<
"scev-salvage: IV SCEV. Unsupported nested AddRec: "
6311 if (!isIdentityFunction(llvm::dwarf::DW_OP_minus, Start)) {
6312 if (!pushSCEV(Start))
6314 pushOperator(llvm::dwarf::DW_OP_minus);
6316 if (!isIdentityFunction(llvm::dwarf::DW_OP_div, Stride)) {
6317 if (!pushSCEV(Stride))
6319 pushOperator(llvm::dwarf::DW_OP_div);
6330 "Expected the locations vector to contain the IV");
6335 "Expected the location ops to contain the IV.");
6339 for (
const auto &
Op : LocationOps) {
6340 auto It =
find(DestLocations,
Op);
6341 if (It != DestLocations.
end()) {
6343 DestIndexMap.
push_back(std::distance(DestLocations.
begin(), It));
6351 for (
const auto &
Op : expr_ops()) {
6353 Op.appendToVector(DestExpr);
6360 uint64_t NewIndex = DestIndexMap[
Op.getArg(0)];
6368struct DVIRecoveryRec {
6371 HadLocationArgList(
false) {}
6373 : DbgRef(DVR), Expr(DVR->getExpression()), HadLocationArgList(
false) {}
6377 bool HadLocationArgList;
6383 for (
auto &RE : RecoveryExprs)
6385 RecoveryExprs.clear();
6388 ~DVIRecoveryRec() {
clear(); }
6396 auto expr_ops = ToDwarfOpIter(Expr);
6398 for (
auto Op : expr_ops)
6407template <
typename T>
6411 "contain any DW_OP_llvm_arg operands.");
6413 DbgVal.setExpression(DIExpression::get(DbgVal.getContext(), Ops));
6414 DbgVal.setExpression(DIExpression::get(DbgVal.getContext(), Ops));
6418template <
typename T>
6423 "Expected expression that references DIArglist locations using "
6424 "DW_OP_llvm_arg operands.");
6426 for (
Value *V : Locations)
6430 DbgVal.setExpression(DIExpression::get(DbgVal.getContext(), Ops));
6441 auto UpdateDbgValueInstImpl = [&](
auto *DbgVal) {
6443 if (NumLLVMArgs == 0) {
6450 "Lone LLVM_arg in a DIExpression should refer to location-op 0.");
6462 if (!DVIRec.Expr->isComplex() && SalvageExpr->
isComplex()) {
6465 DbgVal->setExpression(SalvageExpr);
6468 if (isa<DbgValueInst *>(DVIRec.DbgRef))
6469 UpdateDbgValueInstImpl(cast<DbgValueInst *>(DVIRec.DbgRef));
6471 UpdateDbgValueInstImpl(cast<DbgVariableRecord *>(DVIRec.DbgRef));
6485 auto RestorePreTransformStateImpl = [&](
auto *DbgVal) {
6486 LLVM_DEBUG(
dbgs() <<
"scev-salvage: restore dbg.value to pre-LSR state\n"
6487 <<
"scev-salvage: post-LSR: " << *DbgVal <<
'\n');
6488 assert(DVIRec.Expr &&
"Expected an expression");
6489 DbgVal->setExpression(DVIRec.Expr);
6493 if (!DVIRec.HadLocationArgList) {
6494 assert(DVIRec.LocationOps.size() == 1 &&
6495 "Unexpected number of location ops.");
6499 Value *CachedValue =
6504 for (
WeakVH VH : DVIRec.LocationOps) {
6509 DbgVal->setRawLocation(
6512 LLVM_DEBUG(
dbgs() <<
"scev-salvage: pre-LSR: " << *DbgVal <<
'\n');
6514 if (isa<DbgValueInst *>(DVIRec.DbgRef))
6515 RestorePreTransformStateImpl(cast<DbgValueInst *>(DVIRec.DbgRef));
6517 RestorePreTransformStateImpl(cast<DbgVariableRecord *>(DVIRec.DbgRef));
6522 const SCEV *SCEVInductionVar,
6523 SCEVDbgValueBuilder IterCountExpr) {
6525 if (isa<DbgValueInst *>(DVIRec.DbgRef)
6526 ? !cast<DbgValueInst *>(DVIRec.DbgRef)->isKillLocation()
6527 : !cast<DbgVariableRecord *>(DVIRec.DbgRef)->isKillLocation())
6539 LocationOpIndexMap.
assign(DVIRec.LocationOps.size(), -1);
6541 NewLocationOps.
push_back(LSRInductionVar);
6543 for (
unsigned i = 0; i < DVIRec.LocationOps.size(); i++) {
6544 WeakVH VH = DVIRec.LocationOps[i];
6548 if (VH && !isa<UndefValue>(VH)) {
6550 LocationOpIndexMap[i] = NewLocationOps.
size() - 1;
6552 <<
" now at index " << LocationOpIndexMap[i] <<
"\n");
6560 LLVM_DEBUG(
dbgs() <<
"scev-salvage: SCEV for location at index: " << i
6561 <<
" refers to a location that is now undef or erased. "
6562 "Salvage abandoned.\n");
6566 LLVM_DEBUG(
dbgs() <<
"scev-salvage: salvaging location at index " << i
6567 <<
" with SCEV: " << *DVIRec.SCEVs[i] <<
"\n");
6569 DVIRec.RecoveryExprs[i] = std::make_unique<SCEVDbgValueBuilder>();
6570 SCEVDbgValueBuilder *SalvageExpr = DVIRec.RecoveryExprs[i].get();
6574 if (std::optional<APInt>
Offset =
6576 if (
Offset->getSignificantBits() <= 64)
6577 SalvageExpr->createOffsetExpr(
Offset->getSExtValue(), LSRInductionVar);
6578 }
else if (!SalvageExpr->createIterCountExpr(DVIRec.SCEVs[i], IterCountExpr,
6586 if (DVIRec.Expr->getNumElements() == 0) {
6587 assert(DVIRec.RecoveryExprs.size() == 1 &&
6588 "Expected only a single recovery expression for an empty "
6590 assert(DVIRec.RecoveryExprs[0] &&
6591 "Expected a SCEVDbgSalvageBuilder for location 0");
6592 SCEVDbgValueBuilder *
B = DVIRec.RecoveryExprs[0].get();
6593 B->appendToVectors(
NewExpr, NewLocationOps);
6595 for (
const auto &
Op : DVIRec.Expr->expr_ops()) {
6603 SCEVDbgValueBuilder *DbgBuilder =
6604 DVIRec.RecoveryExprs[LocationArgIndex].get();
6610 assert(LocationOpIndexMap[
Op.getArg(0)] != -1 &&
6611 "Expected a positive index for the location-op position.");
6612 NewExpr.push_back(LocationOpIndexMap[
Op.getArg(0)]);
6616 DbgBuilder->appendToVectors(
NewExpr, NewLocationOps);
6620 if (isa<DbgValueInst *>(DVIRec.DbgRef))
6622 << *cast<DbgValueInst *>(DVIRec.DbgRef) <<
"\n");
6625 << *cast<DbgVariableRecord *>(DVIRec.DbgRef) <<
"\n");
6633 SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &DVIToUpdate) {
6634 if (DVIToUpdate.empty())
6638 assert(SCEVInductionVar &&
6639 "Anticipated a SCEV for the post-LSR induction variable");
6642 dyn_cast<SCEVAddRecExpr>(SCEVInductionVar)) {
6643 if (!IVAddRec->isAffine())
6651 SCEVDbgValueBuilder IterCountExpr;
6652 IterCountExpr.pushLocation(LSRInductionVar);
6653 if (!IterCountExpr.SCEVToIterCountExpr(*IVAddRec, SE))
6656 LLVM_DEBUG(
dbgs() <<
"scev-salvage: IV SCEV: " << *SCEVInductionVar
6659 for (
auto &DVIRec : DVIToUpdate) {
6660 SalvageDVI(L, SE, LSRInductionVar, *DVIRec, SCEVInductionVar,
6671 SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &SalvageableDVISCEVs,
6673 for (
const auto &
B : L->getBlocks()) {
6674 for (
auto &
I : *
B) {
6675 auto ProcessDbgValue = [&](
auto *DbgVal) ->
bool {
6678 if (DbgVal->isKillLocation())
6683 const auto &HasTranslatableLocationOps =
6684 [&](
const auto *DbgValToTranslate) ->
bool {
6685 for (
const auto LocOp : DbgValToTranslate->location_ops()) {
6699 if (!HasTranslatableLocationOps(DbgVal))
6702 std::unique_ptr<DVIRecoveryRec> NewRec =
6703 std::make_unique<DVIRecoveryRec>(DbgVal);
6707 NewRec->RecoveryExprs.resize(DbgVal->getNumVariableLocationOps());
6708 for (
const auto LocOp : DbgVal->location_ops()) {
6709 NewRec->SCEVs.push_back(SE.
getSCEV(LocOp));
6710 NewRec->LocationOps.push_back(LocOp);
6711 NewRec->HadLocationArgList = DbgVal->hasArgList();
6713 SalvageableDVISCEVs.push_back(std::move(NewRec));
6717 if (DVR.isDbgValue() || DVR.isDbgAssign())
6718 ProcessDbgValue(&DVR);
6720 auto DVI = dyn_cast<DbgValueInst>(&
I);
6723 if (ProcessDbgValue(DVI))
6724 DVIHandles.insert(DVI);
6733 const LSRInstance &LSR) {
6735 auto IsSuitableIV = [&](
PHINode *
P) {
6746 for (
const WeakVH &
IV : LSR.getScalarEvolutionIVs()) {
6753 if (IsSuitableIV(
P))
6757 for (
PHINode &
P : L.getHeader()->phis()) {
6758 if (IsSuitableIV(&
P))
6764static std::optional<std::tuple<PHINode *, PHINode *, const SCEV *, bool>>
6767 if (!L->isInnermost()) {
6769 return std::nullopt;
6772 if (!L->isLoopSimplifyForm()) {
6774 return std::nullopt;
6778 LLVM_DEBUG(
dbgs() <<
"Cannot fold on backedge that is loop variant\n");
6779 return std::nullopt;
6785 return std::nullopt;
6786 auto *TermCond = dyn_cast<ICmpInst>(BI->
getCondition());
6789 dbgs() <<
"Cannot fold on branching condition that is not an ICmpInst");
6790 return std::nullopt;
6792 if (!TermCond->hasOneUse()) {
6795 <<
"Cannot replace terminating condition with more than one use\n");
6796 return std::nullopt;
6800 Value *
RHS = TermCond->getOperand(1);
6801 if (!
LHS || !L->isLoopInvariant(
RHS))
6804 return std::nullopt;
6808 Value *ToFoldStart, *ToFoldStep;
6810 return std::nullopt;
6813 if (ToFold->
getParent() != L->getHeader())
6814 return std::nullopt;
6819 return std::nullopt;
6823 const unsigned ExpansionBudget = [&]() {
6826 return std::min(Budget, SmallTC);
6828 return std::min(Budget, *SmallTC);
6834 const DataLayout &
DL = L->getHeader()->getModule()->getDataLayout();
6837 PHINode *ToHelpFold =
nullptr;
6838 const SCEV *TermValueS =
nullptr;
6839 bool MustDropPoison =
false;
6840 auto InsertPt = L->getLoopPreheader()->getTerminator();
6841 for (
PHINode &PN : L->getHeader()->phis()) {
6847 <<
"' is not SCEV-able, not qualified for the "
6848 "terminating condition folding.\n");
6853 if (!AddRec || !AddRec->
isAffine()) {
6855 <<
"' is not an affine add recursion, not qualified "
6856 "for the terminating condition folding.\n");
6874 const SCEV *TermValueSLocal =
PostInc->evaluateAtIteration(BECount, SE);
6877 dbgs() <<
"Is not safe to expand terminating value for phi node" << PN
6885 dbgs() <<
"Is too expensive to expand terminating value for phi node"
6902 bool MustDropPoisonLocal =
false;
6924 TermValueS = TermValueSLocal;
6925 MustDropPoison = MustDropPoisonLocal;
6929 <<
"Cannot find other AddRec IV to help folding\n";);
6932 <<
"\nFound loop that can fold terminating condition\n"
6934 <<
" TermCond: " << *TermCond <<
"\n"
6935 <<
" BrandInst: " << *BI <<
"\n"
6936 <<
" ToFold: " << *ToFold <<
"\n"
6937 <<
" ToHelpFold: " << *ToHelpFold <<
"\n");
6939 if (!ToFold || !ToHelpFold)
6940 return std::nullopt;
6941 return std::make_tuple(ToFold, ToHelpFold, TermValueS, MustDropPoison);
6956 bool Changed =
false;
6957 std::unique_ptr<MemorySSAUpdater> MSSAU;
6959 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
6962 const LSRInstance &Reducer =
6963 LSRInstance(L, IU, SE, DT, LI,
TTI, AC, TLI, MSSAU.get());
6964 Changed |= Reducer.getChanged();
6970 const DataLayout &
DL = L->getHeader()->getModule()->getDataLayout();
6975 unsigned numFolded =
Rewriter.replaceCongruentIVs(L, &DT, DeadInsts, &
TTI);
6989 if (L->isRecursivelyLCSSAForm(DT, LI) && L->getExitBlock()) {
6991 const DataLayout &
DL = L->getHeader()->getModule()->getDataLayout();
7004 const bool EnableFormTerm = [&] {
7016 if (EnableFormTerm) {
7018 auto [ToFold, ToHelpFold, TermValueS, MustDrop] = *Opt;
7023 BasicBlock *LoopPreheader = L->getLoopPreheader();
7029 <<
"New term-cond phi-node:\n"
7030 << *ToHelpFold <<
"\n");
7032 Value *StartValue = ToHelpFold->getIncomingValueForBlock(LoopPreheader);
7034 Value *LoopValue = ToHelpFold->getIncomingValueForBlock(LoopLatch);
7038 cast<Instruction>(LoopValue)->dropPoisonGeneratingFlags();
7041 const DataLayout &
DL = L->getHeader()->getModule()->getDataLayout();
7045 "Terminating value was checked safe in canFoldTerminatingCondition");
7052 << *StartValue <<
"\n"
7053 <<
"Terminating value of new term-cond phi-node:\n"
7054 << *TermValue <<
"\n");
7060 Value *NewTermCond =
7062 "lsr_fold_term_cond.replaced_term_cond");
7068 << *OldTermCond <<
"\n"
7069 <<
"New term-cond:\n" << *NewTermCond <<
"\n");
7079 if (SalvageableDVIRecords.
empty())
7085 for (
const auto &L : LI) {
7089 LLVM_DEBUG(
dbgs() <<
"scev-salvage: SCEV salvaging not possible. An IV "
7090 "could not be identified.\n");
7094 for (
auto &Rec : SalvageableDVIRecords)
7096 SalvageableDVIRecords.
clear();
7105 auto &IU = getAnalysis<IVUsersWrapperPass>().getIU();
7106 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
7107 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
7108 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
7109 const auto &
TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
7110 *
L->getHeader()->getParent());
7111 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
7112 *
L->getHeader()->getParent());
7113 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
7114 *
L->getHeader()->getParent());
7115 auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
7118 MSSA = &MSSAAnalysis->getMSSA();
7135char LoopStrengthReduce::ID = 0;
7138 "Loop Strength Reduction",
false,
false)
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file implements a class to represent arbitrary precision integral constant values and operations...
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static bool isEqual(const Function &Caller, const Function &Callee)
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static void clear(coro::Shape &Shape)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
This file contains constants used for implementing Dwarf debug support.
std::optional< std::vector< StOtherPiece > > Other
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Rewrite Partial Register Uses
iv Induction Variable Users
This header provides classes for managing per-loop analyses.
static bool SalvageDVI(llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, DVIRecoveryRec &DVIRec, const SCEV *SCEVInductionVar, SCEVDbgValueBuilder IterCountExpr)
static std::optional< std::tuple< PHINode *, PHINode *, const SCEV *, bool > > canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT, const LoopInfo &LI, const TargetTransformInfo &TTI)
static Value * getWideOperand(Value *Oper)
IVChain logic must consistently peek base TruncInst operands, so wrap it in a convenient helper.
static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE)
Return true if the given add can be sign-extended without changing its value.
static bool mayUsePostIncMode(const TargetTransformInfo &TTI, LSRUse &LU, const SCEV *S, const Loop *L, ScalarEvolution &SE)
Return true if the SCEV represents a value that may end up as a post-increment operation.
static void restorePreTransformState(DVIRecoveryRec &DVIRec)
Restore the DVI's pre-LSR arguments. Substitute undef for any erased values.
static bool containsAddRecDependentOnLoop(const SCEV *S, const Loop &L)
static User::op_iterator findIVOperand(User::op_iterator OI, User::op_iterator OE, Loop *L, ScalarEvolution &SE)
Helper for CollectChains that finds an IV operand (computed by an AddRec in this loop) within [OI,...
static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE)
Return true if the given mul can be sign-extended without changing its value.
static const unsigned MaxSCEVSalvageExpressionSize
Limit the size of expression that SCEV-based salvaging will attempt to translate into a DIExpression.
static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
Return true if this AddRec is already a phi in its loop.
static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F, const Loop &L)
static cl::opt< bool > InsnsCost("lsr-insns-cost", cl::Hidden, cl::init(true), cl::desc("Add instruction count to a LSR cost model"))
static cl::opt< bool > StressIVChain("stress-ivchain", cl::Hidden, cl::init(false), cl::desc("Stress test LSR IV chains"))
static bool isAddressUse(const TargetTransformInfo &TTI, Instruction *Inst, Value *OperandVal)
Returns true if the specified instruction is using the specified value as an address.
static GlobalValue * ExtractSymbol(const SCEV *&S, ScalarEvolution &SE)
If S involves the addition of a GlobalValue address, return that symbol, and mutate S to point to a n...
static void updateDVIWithLocation(T &DbgVal, Value *Location, SmallVectorImpl< uint64_t > &Ops)
Overwrites DVI with the location and Ops as the DIExpression.
static cl::opt< TTI::AddressingModeKind > PreferredAddresingMode("lsr-preferred-addressing-mode", cl::Hidden, cl::init(TTI::AMK_None), cl::desc("A flag that overrides the target's preferred addressing mode."), cl::values(clEnumValN(TTI::AMK_None, "none", "Don't prefer any addressing mode"), clEnumValN(TTI::AMK_PreIndexed, "preindexed", "Prefer pre-indexed addressing mode"), clEnumValN(TTI::AMK_PostIndexed, "postindexed", "Prefer post-indexed addressing mode")))
static const SCEV * getExprBase(const SCEV *S)
Return an approximation of this SCEV expression's "base", or NULL for any constant.
static llvm::PHINode * GetInductionVariable(const Loop &L, ScalarEvolution &SE, const LSRInstance &LSR)
Ideally pick the PHI IV inserted by ScalarEvolutionExpander.
static cl::opt< cl::boolOrDefault > AllowTerminatingConditionFoldingAfterLSR("lsr-term-fold", cl::Hidden, cl::desc("Attempt to replace primary IV with other IV."))
static bool IsSimplerBaseSCEVForTarget(const TargetTransformInfo &TTI, ScalarEvolution &SE, const SCEV *Best, const SCEV *Reg, MemAccessTy AccessType)
static const unsigned MaxIVUsers
MaxIVUsers is an arbitrary threshold that provides an early opportunity for bail out.
static cl::opt< bool > AllowDropSolutionIfLessProfitable("lsr-drop-solution", cl::Hidden, cl::init(false), cl::desc("Attempt to drop solution if it is less profitable"))
static bool isHighCostExpansion(const SCEV *S, SmallPtrSetImpl< const SCEV * > &Processed, ScalarEvolution &SE)
Check if expanding this expression is likely to incur significant cost.
static Value * getValueOrPoison(WeakVH &VH, LLVMContext &C)
Cached location ops may be erased during LSR, in which case a poison is required when restoring from ...
static MemAccessTy getAccessType(const TargetTransformInfo &TTI, Instruction *Inst, Value *OperandVal)
Return the type of the memory being accessed.
static unsigned numLLVMArgOps(SmallVectorImpl< uint64_t > &Expr)
Returns the total number of DW_OP_llvm_arg operands in the expression.
static bool isAlwaysFoldable(const TargetTransformInfo &TTI, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg)
static void DbgRewriteSalvageableDVIs(llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, SmallVector< std::unique_ptr< DVIRecoveryRec >, 2 > &DVIToUpdate)
Obtain an expression for the iteration count, then attempt to salvage the dbg.value intrinsics.
static cl::opt< bool > EnablePhiElim("enable-lsr-phielim", cl::Hidden, cl::init(true), cl::desc("Enable LSR phi elimination"))
static void DbgGatherSalvagableDVI(Loop *L, ScalarEvolution &SE, SmallVector< std::unique_ptr< DVIRecoveryRec >, 2 > &SalvageableDVISCEVs, SmallSet< AssertingVH< DbgValueInst >, 2 > &DVIHandles)
Identify and cache salvageable DVI locations and expressions along with the corresponding SCEV(s).
static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
Return true if the given addrec can be sign-extended without changing its value.
static bool canHoistIVInc(const TargetTransformInfo &TTI, const LSRFixup &Fixup, const LSRUse &LU, Instruction *IVIncInsertPos, Loop *L)
static void DoInitialMatch(const SCEV *S, Loop *L, SmallVectorImpl< const SCEV * > &Good, SmallVectorImpl< const SCEV * > &Bad, ScalarEvolution &SE)
Recursion helper for initialMatch.
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F)
Check if the addressing mode defined by F is completely folded in LU at isel time.
static cl::opt< bool > LSRExpNarrow("lsr-exp-narrow", cl::Hidden, cl::init(false), cl::desc("Narrow LSR complex solution using" " expectation of registers number"))
static cl::opt< bool > FilterSameScaledReg("lsr-filter-same-scaled-reg", cl::Hidden, cl::init(true), cl::desc("Narrow LSR search space by filtering non-optimal formulae" " with the same ScaledReg and Scale"))
static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset, int64_t MaxOffset, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale)
Test whether we know how to expand the current formula.
static void updateDVIWithLocations(T &DbgVal, SmallVectorImpl< Value * > &Locations, SmallVectorImpl< uint64_t > &Ops)
Overwrite DVI with locations placed into a DIArglist.
static cl::opt< unsigned > ComplexityLimit("lsr-complexity-limit", cl::Hidden, cl::init(std::numeric_limits< uint16_t >::max()), cl::desc("LSR search space complexity limit"))
static void UpdateDbgValueInst(DVIRecoveryRec &DVIRec, SmallVectorImpl< Value * > &NewLocationOps, SmallVectorImpl< uint64_t > &NewExpr)
Write the new expression and new location ops for the dbg.value.
static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE)
If S involves the addition of a constant integer value, return that integer value,...
static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT, LoopInfo &LI, const TargetTransformInfo &TTI, AssumptionCache &AC, TargetLibraryInfo &TLI, MemorySSA *MSSA)
static bool isProfitableChain(IVChain &Chain, SmallPtrSetImpl< Instruction * > &Users, ScalarEvolution &SE, const TargetTransformInfo &TTI)
Return true if the number of registers needed for the chain is estimated to be less than the number r...
static const SCEV * CollectSubexprs(const SCEV *S, const SCEVConstant *C, SmallVectorImpl< const SCEV * > &Ops, const Loop *L, ScalarEvolution &SE, unsigned Depth=0)
Split S into subexpressions which can be pulled out into separate registers.
static const SCEV * getExactSDiv(const SCEV *LHS, const SCEV *RHS, ScalarEvolution &SE, bool IgnoreSignificantBits=false)
Return an expression for LHS /s RHS, if it can be determined and if the remainder is known to be zero...
static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, Value *Operand, const TargetTransformInfo &TTI)
Return true if the IVInc can be folded into an addressing mode.
static const SCEV * getAnyExtendConsideringPostIncUses(ArrayRef< PostIncLoopSet > Loops, const SCEV *Expr, Type *ToTy, ScalarEvolution &SE)
Extend/Truncate Expr to ToTy considering post-inc uses in Loops.
static unsigned getSetupCost(const SCEV *Reg, unsigned Depth)
static cl::opt< unsigned > SetupCostDepthLimit("lsr-setupcost-depth-limit", cl::Hidden, cl::init(7), cl::desc("The limit on recursion depth for LSRs setup cost"))
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
Module.h This file contains the declarations for the Module class.
PowerPC TLS Dynamic Call Fixup
This header defines various interfaces for pass management in LLVM.
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PointerIntPair class.
const SmallVectorImpl< MachineOperand > & Cond
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI optimize exec mask operations pre RA
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
This defines the Use class.
Virtual Register Rewriter
static const uint32_t IV[8]
Class recording the (high level) value of a variable.
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
bool isNegative() const
Determine sign of this APInt.
APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
APInt srem(const APInt &RHS) const
Function for signed remainder operation.
int64_t getSExtValue() const
Get sign extended value.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequiredID(const void *ID)
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Value handle that asserts if the Value is deleted.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
LLVM Basic Block Representation.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
bool isLandingPad() const
Return true if this basic block is a landing pad.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name, BasicBlock::iterator InsertBefore)
Construct a binary instruction, given the opcode and the two operands.
BinaryOps getOpcode() const
Conditional or Unconditional Branch instruction.
void setCondition(Value *V)
void swapSuccessors()
Swap the successors of this branch instruction.
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Value * getCondition() const
static Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name, BasicBlock::iterator InsertBefore)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
This is the shared class of boolean and integer constants.
static bool isValueValidForType(Type *Ty, uint64_t V)
This static method returns true if the type Ty is big enough to represent the value V.
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
This is an important base class in LLVM.
static DIArgList * get(LLVMContext &Context, ArrayRef< ValueAsMetadata * > Args)
An iterator for expression operands.
static DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
static void appendOffset(SmallVectorImpl< uint64_t > &Ops, int64_t Offset)
Append Ops with operations to apply the Offset.
bool isComplex() const
Return whether the location is computed on the expression stack, meaning it cannot be a simple regist...
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
This represents the llvm.dbg.value instruction.
Record of a variable value-assignment, aka a non instruction representation of the dbg....
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Implements a dense probed hash-table based set.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
IVStrideUse - Keep track of one use of a strided induction variable.
void transformToPostInc(const Loop *L)
transformToPostInc - Transform the expression to post-inc form for the given loop.
Value * getOperandValToReplace() const
getOperandValToReplace - Return the Value of the operand in the user instruction that this IVStrideUs...
void setUser(Instruction *NewUser)
setUser - Assign a new user instruction for this use.
Analysis pass that exposes the IVUsers for a loop.
ilist< IVStrideUse >::const_iterator const_iterator
void print(raw_ostream &OS) const
std::optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
const BasicBlock * getParent() const
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Type * getAccessType() const LLVM_READONLY
Return the type this instruction accesses in memory, if any.
bool hasPoisonGeneratingFlags() const LLVM_READONLY
Return true if this operator has flags which may cause this instruction to evaluate to poison despite...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
An instruction for reading from memory.
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
BlockT * getHeader() const
unsigned getLoopDepth() const
Return the nesting level of this loop.
The legacy pass manager's analysis pass to compute loop information.
virtual bool runOnLoop(Loop *L, LPPassManager &LPM)=0
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Represents a single loop in the control flow graph.
An analysis that produces MemorySSA for a function.
Legacy analysis pass which computes MemorySSA.
Encapsulates MemorySSA, including all data associated with memory accesses.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
op_range incoming_values()
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr, BasicBlock::iterator InsertBefore)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
static unsigned getIncomingValueNumForOperand(unsigned i)
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
PointerIntPair - This class implements a pair of a pointer and small integer.
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
This node represents an addition of some number of SCEVs.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStart() const
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
const SCEVAddRecExpr * getPostIncExpr(ScalarEvolution &SE) const
Return an expression representing the value of this expression one iteration of the loop ahead.
const Loop * getLoop() const
This is the base class for unary cast operator classes.
This node is the base class for n'ary commutative operators.
This class represents a constant integer value.
ConstantInt * getValue() const
const APInt & getAPInt() const
This class uses information about analyze scalars to rewrite expressions in canonical form.
bool isSafeToExpand(const SCEV *S) const
Return true if the given expression is safe to expand in the sense that all materialized values are s...
bool isHighCostExpansion(ArrayRef< const SCEV * > Exprs, Loop *L, unsigned Budget, const TargetTransformInfo *TTI, const Instruction *At)
Return true for expressions that can't be evaluated at runtime within given Budget.
void clear()
Erase the contents of the InsertedExpressions map so that users trying to expand the same expression ...
Value * expandCodeFor(const SCEV *SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
This is the base class for unary integral cast operator classes.
This node represents multiplication of some number of SCEVs.
This node is a base class providing common functionality for n'ary operators.
bool hasNoUnsignedWrap() const
bool hasNoSelfWrap() const
bool hasNoSignedWrap() const
ArrayRef< const SCEV * > operands() const
This class represents a signed maximum selection.
This class represents a binary unsigned division operation.
This class represents an unsigned maximum selection.
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
This class represents an analyzed expression in the program.
ArrayRef< const SCEV * > operands() const
Return operands of this SCEV expression.
unsigned short getExpressionSize() const
bool isZero() const
Return true if the expression is a constant zero.
SCEVTypes getSCEVType() const
Type * getType() const
Return the LLVM type of this SCEV expression.
This class represents a cast from signed integer to floating point.
The main scalar evolution driver.
bool isKnownNonZero(const SCEV *S)
Test if the given expression is known to be non-zero.
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
const SCEV * getConstant(ConstantInt *V)
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
unsigned getSmallConstantMaxTripCount(const Loop *L)
Returns the upper bound of the loop trip count as a normal unsigned value.
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
const SCEV * getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L, SCEV::NoWrapFlags Flags)
Get an add recurrence expression for the specified loop.
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
bool hasLoopInvariantBackedgeTakenCount(const Loop *L)
Return true if the specified loop has an analyzable loop-invariant backedge-taken count.
const SCEV * getAnyExtendExpr(const SCEV *Op, Type *Ty)
getAnyExtendExpr - Return a SCEV for the given operand extended with unspecified bits out to the give...
bool containsUndefs(const SCEV *S) const
Return true if the SCEV expression contains an undef value.
const SCEV * getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
bool hasComputableLoopEvolution(const SCEV *S, const Loop *L)
Return true if the given SCEV changes value in a known way in the specified loop.
const SCEV * getPointerBase(const SCEV *V)
Transitively follow the chain of pointer-type operands until reaching a SCEV that does not have a sin...
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
const SCEV * getUnknown(Value *V)
std::optional< APInt > computeConstantDifference(const SCEV *LHS, const SCEV *RHS)
Compute LHS - RHS and returns the result as an APInt if it is a constant, and std::nullopt if it isn'...
bool properlyDominates(const SCEV *S, const BasicBlock *BB)
Return true if elements that makes up the given SCEV properly dominate the specified basic block.
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
bool containsErasedValue(const SCEV *S) const
Return true if the SCEV expression contains a Value that has been optimised out and is now a nullptr.
LLVMContext & getContext() const
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
iterator end()
Get an iterator to the end of the SetVector.
iterator begin()
Get an iterator to the beginning of the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
int find_first() const
Returns the index of the first set bit, -1 if none of the bits are set.
iterator_range< const_set_bits_iterator > set_bits() const
int find_next(unsigned Prev) const
Returns the index of the next set bit following the "Prev" bit.
size_type size() const
Returns the number of bits in this bitvector.
void resize(unsigned N, bool t=false)
Grow or shrink the bitvector.
size_type count() const
Returns the number of bits which are set.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
A SetVector that performs no allocations if smaller than a certain size.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
typename SuperClass::const_iterator const_iterator
iterator insert(iterator I, T &&Elt)
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
int64_t getFixed() const
Returns the fixed component of the stack.
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
bool isPointerTy() const
True if this is an instance of PointerType.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static Type * getVoidTy(LLVMContext &C)
int getFPMantissaWidth() const
Return the width of the mantissa of this type.
static IntegerType * getInt8Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
This class represents a cast unsigned integer to floating point.
A Use represents the edge between a Value definition and its users.
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< use_iterator > uses()
A nullable Value handle that is nullable.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
Reg
All possible values of the reg field in the ModR/M byte.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
@ DW_OP_LLVM_arg
Only used in LLVM metadata.
@ DW_OP_LLVM_convert
Only used in LLVM metadata.
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< PhiNode * > Phi
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
const_iterator end(StringRef path)
Get end iterator over path.
This is an optimization pass for GlobalISel generic memory operations.
bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, Instruction *OnPathTo, DominatorTree *DT)
Return true if undefined behavior would provable be executed on the path to OnPathTo if Root produced...
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
std::optional< unsigned > getLoopEstimatedTripCount(Loop *L, unsigned *EstimatedLoopInvocationWeight=nullptr)
Returns a loop's estimated trip count based on branch weight metadata.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
APFloat abs(APFloat X)
Returns the absolute value of the argument.
bool operator!=(uint64_t V1, const APInt &V2)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
bool matchSimpleRecurrence(const PHINode *P, BinaryOperator *&BO, Value *&Start, Value *&Step)
Attempt to match a simple first order recurrence cycle of the form: iv = phi Ty [Start,...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
auto reverse(ContainerTy &&C)
const SCEV * denormalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE)
Denormalize S to be post-increment for all loops present in Loops.
void sort(IteratorTy Start, IteratorTy End)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
cl::opt< unsigned > SCEVCheapExpansionBudget
Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
void SplitLandingPadPredecessors(BasicBlock *OrigBB, ArrayRef< BasicBlock * > Preds, const char *Suffix, const char *Suffix2, SmallVectorImpl< BasicBlock * > &NewBBs, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method transforms the landing pad, OrigBB, by introducing two new basic blocks into the function...
const SCEV * normalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE, bool CheckInvertible=true)
Normalize S to be post-increment for all loops present in Loops.
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Pass * createLoopStrengthReducePass()
BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
bool RecursivelyDeleteTriviallyDeadInstructionsPermissive(SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow instructions that are not...
constexpr unsigned BitWidth
bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
void initializeLoopStrengthReducePass(PassRegistry &)
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool isAlmostDeadIV(PHINode *IV, BasicBlock *LatchBlock, Value *Cond)
Return true if the induction variable IV in a Loop whose latch is LatchBlock would become dead if the...
int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, ScalarEvolution *SE, const TargetTransformInfo *TTI, SCEVExpander &Rewriter, DominatorTree *DT, ReplaceExitVal ReplaceExitValue, SmallVector< WeakTrackingVH, 16 > &DeadInsts)
If the final value of any expressions that are recurrent in the loop can be computed,...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
bool SCEVExprContains(const SCEV *Root, PredTy Pred)
Return true if any node in Root satisfies the predicate Pred.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Option class for critical edge splitting.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
TargetTransformInfo & TTI
Information about a load/store intrinsic defined by the target.
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.