68#define DEBUG_TYPE "simple-loop-unswitch"
73STATISTIC(NumBranches,
"Number of branches unswitched");
74STATISTIC(NumSwitches,
"Number of switches unswitched");
75STATISTIC(NumSelects,
"Number of selects turned into branches for unswitching");
76STATISTIC(NumGuards,
"Number of guards turned into branches for unswitching");
77STATISTIC(NumTrivial,
"Number of unswitches that are trivial");
79 NumCostMultiplierSkipped,
80 "Number of unswitch candidates that had their cost multiplier skipped");
82 "Number of invariant conditions injected and unswitched");
86 cl::desc(
"Forcibly enables non-trivial loop unswitching rather than "
87 "following the configuration passed into the pass."));
91 cl::desc(
"The cost threshold for unswitching a loop."));
95 cl::desc(
"Enable unswitch cost multiplier that prohibits exponential "
96 "explosion in nontrivial unswitch."));
99 cl::desc(
"Toplevel siblings divisor for cost multiplier."));
102 cl::desc(
"Number of unswitch candidates that are ignored when calculating "
103 "cost multiplier."));
106 cl::desc(
"If enabled, simple loop unswitching will also consider "
107 "llvm.experimental.guard intrinsics as unswitch candidates."));
109 "simple-loop-unswitch-drop-non-trivial-implicit-null-checks",
111 cl::desc(
"If enabled, drop make.implicit metadata in unswitched implicit "
112 "null checks to save time analyzing if we can keep it."));
115 cl::desc(
"Max number of memory uses to explore during "
116 "partial unswitching analysis"),
120 cl::desc(
"If enabled, the freeze instruction will be added to condition "
121 "of loop unswitch to prevent miscompilation."));
124 "simple-loop-unswitch-inject-invariant-conditions",
cl::Hidden,
125 cl::desc(
"Whether we should inject new invariants and unswitch them to "
126 "eliminate some existing (non-invariant) conditions."),
130 "simple-loop-unswitch-inject-invariant-condition-hotness-threshold",
132 "unswitch on them to eliminate branches that are "
133 "not-taken 1/<this option> times or less."),
144 : Term(Term), Invariant(Invariant), InLoopSucc(InLoopSucc) {}
147struct InjectedInvariant {
155 : Pred(Pred),
LHS(
LHS),
RHS(
RHS), InLoopSucc(InLoopSucc) {}
158struct NonTrivialUnswitchCandidate {
161 std::optional<InstructionCost>
Cost;
162 std::optional<InjectedInvariant> PendingInjection;
163 NonTrivialUnswitchCandidate(
165 std::optional<InstructionCost>
Cost = std::nullopt,
166 std::optional<InjectedInvariant> PendingInjection = std::nullopt)
167 : TI(TI), Invariants(Invariants),
Cost(
Cost),
168 PendingInjection(PendingInjection) {};
170 bool hasPendingInjection()
const {
return PendingInjection.has_value(); }
194 assert(!L.isLoopInvariant(&Root) &&
195 "Only need to walk the graph if root itself is not invariant.");
208 for (
Value *OpV :
I.operand_values()) {
210 if (isa<Constant>(OpV))
214 if (L.isLoopInvariant(OpV)) {
225 if (Visited.
insert(OpI).second)
229 }
while (!Worklist.
empty());
236 assert(!isa<Constant>(Invariant) &&
"Why are we unswitching on a constant?");
241 Instruction *UserI = dyn_cast<Instruction>(U.getUser());
244 if (UserI && L.contains(UserI))
255 auto *PN = dyn_cast<PHINode>(&
I);
262 if (!L.isLoopInvariant(PN->getIncomingValueForBlock(&ExitingBB)))
278 for (
Value *Inv : Invariants) {
287 Direction ? &NormalSucc : &UnswitchedSucc);
296 for (
auto *Val :
reverse(ToDuplicate)) {
310 auto *DefiningAccess = MemUse->getDefiningAccess();
312 while (L.contains(DefiningAccess->getBlock())) {
315 if (
auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess))
317 MemPhi->getIncomingValueForBlock(L.getLoopPreheader());
319 DefiningAccess = cast<MemoryDef>(DefiningAccess)->getDefiningAccess();
330 Direction ? &NormalSucc : &UnswitchedSucc);
347 for (
auto i : seq<int>(0, PN.getNumOperands())) {
348 assert(PN.getIncomingBlock(i) == &OldExitingBB &&
349 "Found incoming block different from unique predecessor!");
350 PN.setIncomingBlock(i, &OldPH);
367 assert(&ExitBB != &UnswitchedBB &&
368 "Must have different loop exit and unswitched blocks!");
372 PN.getName() +
".split");
373 NewPN->insertBefore(InsertPt);
384 for (
int i = PN.getNumIncomingValues() - 1; i >= 0; --i) {
385 if (PN.getIncomingBlock(i) != &OldExitingBB)
391 PN.removeIncomingValue(i);
393 NewPN->addIncoming(
Incoming, &OldPH);
398 PN.replaceAllUsesWith(NewPN);
399 NewPN->addIncoming(&PN, &ExitBB);
412 Loop *OldParentL = L.getParentLoop();
417 L.getExitBlocks(Exits);
418 Loop *NewParentL =
nullptr;
419 for (
auto *ExitBB : Exits)
421 if (!NewParentL || NewParentL->
contains(ExitL))
424 if (NewParentL == OldParentL)
430 "Can only hoist this loop up the nest!");
435 "Parent loop of this loop should contain this loop's preheader!");
450 for (
Loop *OldContainingL = OldParentL; OldContainingL != NewParentL;
454 return BB == &Preheader || L.contains(BB);
457 OldContainingL->getBlocksSet().erase(&Preheader);
459 OldContainingL->getBlocksSet().erase(BB);
482 Loop *Current = TopMost;
512 LLVM_DEBUG(
dbgs() <<
" Trying to unswitch branch: " << BI <<
"\n");
519 bool FullUnswitch =
false;
522 if (L.isLoopInvariant(
Cond)) {
526 if (
auto *CondInst = dyn_cast<Instruction>(
Cond))
528 if (Invariants.
empty()) {
535 bool ExitDirection =
true;
536 int LoopExitSuccIdx = 0;
538 if (L.contains(LoopExitBB)) {
539 ExitDirection =
false;
542 if (L.contains(LoopExitBB)) {
547 auto *ContinueBB = BI.
getSuccessor(1 - LoopExitSuccIdx);
550 LLVM_DEBUG(
dbgs() <<
" Loop exit PHI's aren't loop-invariant!\n");
563 "non-full unswitch!\n");
569 dbgs() <<
" unswitching trivial invariant conditions for: " << BI
571 for (
Value *Invariant : Invariants) {
572 dbgs() <<
" " << *Invariant <<
" == true";
573 if (Invariant != Invariants.back())
605 if (FullUnswitch && LoopExitBB->getUniquePredecessor()) {
607 "A branch's parent isn't a predecessor!");
608 UnswitchedBB = LoopExitBB;
611 SplitBlock(LoopExitBB, LoopExitBB->begin(), &DT, &LI, MSSAU,
"",
false);
643 "Must have an `or` of `i1`s or `select i1 X, true, Y`s for the "
647 "Must have an `and` of `i1`s or `select i1 X, Y, false`s for the"
650 *OldPH, Invariants, ExitDirection, *UnswitchedBB, *NewPH,
661 Updates.
push_back({cfg::UpdateKind::Insert, OldPH, UnswitchedBB});
669 ParentBB->getTerminator()->eraseFromParent();
681 if (UnswitchedBB == LoopExitBB)
685 *ParentBB, *OldPH, FullUnswitch);
696 for (
Value *Invariant : Invariants)
742 LLVM_DEBUG(
dbgs() <<
" Trying to unswitch switch: " << SI <<
"\n");
743 Value *LoopCond = SI.getCondition();
746 if (!L.isLoopInvariant(LoopCond))
749 auto *ParentBB = SI.getParent();
756 auto IsTriviallyUnswitchableExitBlock = [&](
BasicBlock &BBToCheck) {
758 if (L.contains(&BBToCheck))
767 auto *TI = BBToCheck.getTerminator();
768 bool isUnreachable = isa<UnreachableInst>(TI);
769 return !isUnreachable ||
770 (isUnreachable && (BBToCheck.getFirstNonPHIOrDbg() != TI));
774 for (
auto Case : SI.cases())
775 if (IsTriviallyUnswitchableExitBlock(*Case.getCaseSuccessor()))
776 ExitCaseIndices.
push_back(Case.getCaseIndex());
780 if (IsTriviallyUnswitchableExitBlock(*SI.getDefaultDest())) {
781 DefaultExitBB = SI.getDefaultDest();
782 }
else if (ExitCaseIndices.
empty())
797 if (!ExitL || ExitL->
contains(OuterL))
800 for (
unsigned Index : ExitCaseIndices) {
801 auto CaseI = SI.case_begin() +
Index;
804 if (!ExitL || ExitL->
contains(OuterL))
818 SI.setDefaultDest(
nullptr);
826 ExitCases.reserve(ExitCaseIndices.
size());
831 auto CaseI = SI.case_begin() +
Index;
834 ExitCases.emplace_back(CaseI->getCaseValue(), CaseI->getCaseSuccessor(), W);
842 if (SI.getNumCases() > 0 &&
844 return Case.getCaseSuccessor() == SI.case_begin()->getCaseSuccessor();
846 CommonSuccBB = SI.case_begin()->getCaseSuccessor();
847 if (!DefaultExitBB) {
851 if (SI.getNumCases() == 0)
852 CommonSuccBB = SI.getDefaultDest();
853 else if (SI.getDefaultDest() != CommonSuccBB)
854 CommonSuccBB =
nullptr;
880 UnswitchedExitBBs.
insert(DefaultExitBB);
888 DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;
893 for (
auto &ExitCase :
reverse(ExitCases)) {
901 if (UnswitchedExitBBs.
insert(ExitBB).second)
908 BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB];
917 std::get<1>(ExitCase) = SplitExitBB;
922 for (
auto &ExitCase :
reverse(ExitCases)) {
924 BasicBlock *UnswitchedBB = std::get<1>(ExitCase);
926 NewSIW.
addCase(CaseVal, UnswitchedBB, std::get<2>(ExitCase));
937 for (
const auto &Case : SI.cases())
940 }
else if (DefaultCaseWeight) {
943 for (
const auto &Case : SI.cases()) {
946 "case weight must be defined as default case weight is defined");
961 bool SkippedFirst = DefaultExitBB ==
nullptr;
962 for (
auto Case : SI.cases()) {
964 "Non-common successor!");
976 }
else if (DefaultExitBB) {
977 assert(SI.getNumCases() > 0 &&
978 "If we had no cases we'd have a common successor!");
983 auto LastCaseI = std::prev(SI.case_end());
985 SI.setDefaultDest(LastCaseI->getCaseSuccessor());
996 for (
auto *UnswitchedExitBB : UnswitchedExitBBs) {
1000 for (
auto SplitUnswitchedPair : SplitExitBBMap) {
1001 DTUpdates.
push_back({DT.
Delete, ParentBB, SplitUnswitchedPair.first});
1013 assert(DT.
verify(DominatorTree::VerificationLevel::Fast));
1043 bool Changed =
false;
1058 Visited.
insert(CurrentBB);
1065 if (!isa<MemoryPhi>(*Defs->begin()) || (++Defs->begin() != Defs->end()))
1073 if (
auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
1077 if (isa<Constant>(SI->getCondition()))
1091 auto *BI = dyn_cast<BranchInst>(CurrentBB->
getTerminator());
1092 if (!BI || BI->isConditional())
1095 CurrentBB = BI->getSuccessor(0);
1099 auto *BI = dyn_cast<BranchInst>(CurrentTerm);
1107 if (!BI->isConditional() ||
1122 if (BI->isConditional())
1126 CurrentBB = BI->getSuccessor(0);
1131 }
while (L.contains(CurrentBB) && Visited.
insert(CurrentBB).second);
1169 NewBlocks.
reserve(L.getNumBlocks() + ExitBlocks.
size());
1180 VMap[OldBB] = NewBB;
1188 auto It = DominatingSucc.
find(BB);
1189 return It != DominatingSucc.
end() && It->second != UnswitchedSuccBB;
1193 auto *ClonedPH = CloneBlock(LoopPH);
1196 for (
auto *LoopBB : L.blocks())
1197 if (!SkipBlock(LoopBB))
1203 for (
auto *ExitBB : ExitBlocks) {
1204 if (SkipBlock(ExitBB))
1212 auto *MergeBB =
SplitBlock(ExitBB, ExitBB->begin(), &DT, &LI, MSSAU);
1217 MergeBB->takeName(ExitBB);
1218 ExitBB->setName(
Twine(MergeBB->getName()) +
".split");
1221 auto *ClonedExitBB = CloneBlock(ExitBB);
1222 assert(ClonedExitBB->getTerminator()->getNumSuccessors() == 1 &&
1223 "Exit block should have been split to have one successor!");
1224 assert(ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB &&
1225 "Cloned exit block has the wrong successor!");
1231 std::prev(ClonedExitBB->end())))) {
1238 (isa<PHINode>(
I) || isa<LandingPadInst>(
I) || isa<CatchPadInst>(
I)) &&
1239 "Bad instruction in exit block!");
1241 assert(VMap.
lookup(&
I) == &ClonedI &&
"Mismatch in the value map!");
1244 if (SE && isa<PHINode>(
I))
1249 MergePN->insertBefore(MergeBB->getFirstInsertionPt());
1250 I.replaceAllUsesWith(MergePN);
1251 MergePN->addIncoming(&
I, ExitBB);
1252 MergePN->addIncoming(&ClonedI, ClonedExitBB);
1261 Module *M = ClonedPH->getParent()->getParent();
1262 for (
auto *ClonedBB : NewBlocks)
1269 if (
auto *II = dyn_cast<AssumeInst>(&
I))
1275 for (
auto *LoopBB : L.blocks())
1276 if (SkipBlock(LoopBB))
1278 if (
auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.
lookup(SuccBB)))
1279 for (
PHINode &PN : ClonedSuccBB->phis())
1280 PN.removeIncomingValue(LoopBB,
false);
1284 auto *ClonedParentBB = cast<BasicBlock>(VMap.
lookup(ParentBB));
1286 if (SuccBB == UnswitchedSuccBB)
1289 auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.
lookup(SuccBB));
1293 ClonedSuccBB->removePredecessor(ClonedParentBB,
1299 auto *ClonedSuccBB = cast<BasicBlock>(VMap.
lookup(UnswitchedSuccBB));
1300 Instruction *ClonedTerminator = ClonedParentBB->getTerminator();
1303 Value *ClonedConditionToErase =
nullptr;
1304 if (
auto *BI = dyn_cast<BranchInst>(ClonedTerminator))
1305 ClonedConditionToErase = BI->getCondition();
1306 else if (
auto *SI = dyn_cast<SwitchInst>(ClonedTerminator))
1307 ClonedConditionToErase = SI->getCondition();
1312 if (ClonedConditionToErase)
1319 for (
PHINode &PN : ClonedSuccBB->phis()) {
1323 for (
int i = PN.getNumOperands() - 1; i >= 0; --i) {
1324 if (PN.getIncomingBlock(i) != ClonedParentBB)
1330 PN.removeIncomingValue(i,
false);
1336 for (
auto *ClonedBB : NewBlocks) {
1338 if (SuccSet.
insert(SuccBB).second)
1339 DTUpdates.
push_back({DominatorTree::Insert, ClonedBB, SuccBB});
1354 auto AddClonedBlocksToLoop = [&](
Loop &OrigL,
Loop &ClonedL) {
1355 assert(ClonedL.getBlocks().empty() &&
"Must start with an empty loop!");
1357 for (
auto *BB : OrigL.
blocks()) {
1358 auto *ClonedBB = cast<BasicBlock>(VMap.
lookup(BB));
1359 ClonedL.addBlockEntry(ClonedBB);
1372 AddClonedBlocksToLoop(OrigRootL, *ClonedRootL);
1384 LoopsToClone.
push_back({ClonedRootL, ChildL});
1386 Loop *ClonedParentL, *L;
1387 std::tie(ClonedParentL, L) = LoopsToClone.
pop_back_val();
1390 AddClonedBlocksToLoop(*L, *ClonedL);
1392 LoopsToClone.
push_back({ClonedL, ChildL});
1393 }
while (!LoopsToClone.
empty());
1414 Loop *ClonedL =
nullptr;
1419 auto *ClonedPH = cast<BasicBlock>(VMap.
lookup(OrigPH));
1420 auto *ClonedHeader = cast<BasicBlock>(VMap.
lookup(OrigHeader));
1426 Loop *ParentL =
nullptr;
1430 for (
auto *ExitBB : ExitBlocks)
1431 if (
auto *ClonedExitBB = cast_or_null<BasicBlock>(VMap.
lookup(ExitBB)))
1433 ExitLoopMap[ClonedExitBB] = ExitL;
1434 ClonedExitsInLoops.
push_back(ClonedExitBB);
1435 if (!ParentL || (ParentL != ExitL && ParentL->
contains(ExitL)))
1440 "The computed parent loop should always contain (or be) the parent of "
1441 "the original loop.");
1448 for (
auto *BB : OrigL.
blocks())
1449 if (
auto *ClonedBB = cast_or_null<BasicBlock>(VMap.
lookup(BB)))
1450 ClonedLoopBlocks.
insert(ClonedBB);
1461 if (Pred == ClonedPH)
1466 assert(ClonedLoopBlocks.
count(Pred) &&
"Found a predecessor of the loop "
1467 "header other than the preheader "
1468 "that is not part of the loop!");
1473 if (BlocksInClonedLoop.
insert(Pred).second && Pred != ClonedHeader)
1480 if (!BlocksInClonedLoop.
empty()) {
1481 BlocksInClonedLoop.
insert(ClonedHeader);
1483 while (!Worklist.
empty()) {
1486 "Didn't put block into the loop set!");
1494 if (ClonedLoopBlocks.
count(Pred) &&
1495 BlocksInClonedLoop.
insert(Pred).second)
1514 for (
auto *BB : OrigL.
blocks()) {
1515 auto *ClonedBB = cast_or_null<BasicBlock>(VMap.
lookup(BB));
1516 if (!ClonedBB || !BlocksInClonedLoop.
count(ClonedBB))
1528 for (
Loop *PL = ClonedL; PL; PL = PL->getParentLoop())
1529 PL->addBlockEntry(ClonedBB);
1536 for (
Loop *ChildL : OrigL) {
1537 auto *ClonedChildHeader =
1538 cast_or_null<BasicBlock>(VMap.
lookup(ChildL->getHeader()));
1539 if (!ClonedChildHeader || !BlocksInClonedLoop.
count(ClonedChildHeader))
1545 for (
auto *ChildLoopBB : ChildL->blocks())
1547 cast<BasicBlock>(VMap.
lookup(ChildLoopBB))) &&
1548 "Child cloned loop has a header within the cloned outer "
1549 "loop but not all of its blocks!");
1564 if (BlocksInClonedLoop.
empty())
1565 UnloopedBlockSet.
insert(ClonedPH);
1566 for (
auto *ClonedBB : ClonedLoopBlocks)
1567 if (!BlocksInClonedLoop.
count(ClonedBB))
1568 UnloopedBlockSet.
insert(ClonedBB);
1574 auto OrderedClonedExitsInLoops = ClonedExitsInLoops;
1576 return ExitLoopMap.
lookup(
LHS)->getLoopDepth() <
1577 ExitLoopMap.
lookup(
RHS)->getLoopDepth();
1582 while (!UnloopedBlockSet.
empty() && !OrderedClonedExitsInLoops.empty()) {
1583 assert(Worklist.
empty() &&
"Didn't clear worklist!");
1585 BasicBlock *ExitBB = OrderedClonedExitsInLoops.pop_back_val();
1600 if (!UnloopedBlockSet.
erase(PredBB)) {
1602 (BlocksInClonedLoop.
count(PredBB) || ExitLoopMap.
count(PredBB)) &&
1603 "Predecessor not mapped to a loop!");
1610 bool Inserted = ExitLoopMap.
insert({PredBB, ExitL}).second;
1612 assert(Inserted &&
"Should only visit an unlooped block once!");
1617 }
while (!Worklist.
empty());
1626 for (
auto *BB : llvm::concat<BasicBlock *const>(
1627 ArrayRef(ClonedPH), ClonedLoopBlocks, ClonedExitsInLoops))
1629 OuterL->addBasicBlockToLoop(BB, LI);
1632 for (
auto &BBAndL : ExitLoopMap) {
1633 auto *BB = BBAndL.first;
1634 auto *OuterL = BBAndL.second;
1636 "Failed to put all blocks into outer loops!");
1643 for (
Loop *ChildL : OrigL) {
1644 auto *ClonedChildHeader =
1645 cast_or_null<BasicBlock>(VMap.
lookup(ChildL->getHeader()));
1646 if (!ClonedChildHeader || BlocksInClonedLoop.
count(ClonedChildHeader))
1650 for (
auto *ChildLoopBB : ChildL->blocks())
1652 "Cloned a child loop header but not all of that loops blocks!");
1656 *ChildL, ExitLoopMap.
lookup(ClonedChildHeader), VMap, LI));
1662 ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
1666 for (
BasicBlock *BB : llvm::concat<BasicBlock *const>(L.blocks(), ExitBlocks))
1667 for (
const auto &VMap : VMaps)
1668 if (
BasicBlock *ClonedBB = cast_or_null<BasicBlock>(VMap->lookup(BB)))
1671 SuccBB->removePredecessor(ClonedBB);
1684 BB->dropAllReferences();
1687 BB->eraseFromParent();
1704 DeathCandidates.
append(L.blocks().begin(), L.blocks().end());
1705 while (!DeathCandidates.
empty()) {
1709 SuccBB->removePredecessor(BB);
1726 for (
Loop *ParentL = &L; ParentL; ParentL = ParentL->getParentLoop()) {
1727 for (
auto *BB : DeadBlockSet)
1728 ParentL->getBlocksSet().erase(BB);
1730 [&](
BasicBlock *BB) { return DeadBlockSet.count(BB); });
1736 if (!DeadBlockSet.count(ChildL->getHeader()))
1739 assert(llvm::all_of(ChildL->blocks(),
1740 [&](BasicBlock *ChildBB) {
1741 return DeadBlockSet.count(ChildBB);
1743 "If the child loop header is dead all blocks in the child loop must "
1744 "be dead as well!");
1755 for (
auto *BB : DeadBlockSet) {
1757 assert(!DT.getNode(BB) &&
"Should already have cleared domtree!");
1758 LI.changeLoopFor(BB,
nullptr);
1764 BB->dropAllReferences();
1769 for (
auto *BB : DeadBlockSet)
1770 BB->eraseFromParent();
1788 auto *PH = L.getLoopPreheader();
1789 auto *Header = L.getHeader();
1803 assert(L.contains(Pred) &&
"Found a predecessor of the loop header other "
1804 "than the preheader that is not part of the "
1810 if (LoopBlockSet.
insert(Pred).second && Pred != Header)
1815 if (LoopBlockSet.
empty())
1816 return LoopBlockSet;
1819 while (!Worklist.
empty()) {
1821 assert(LoopBlockSet.
count(BB) &&
"Didn't put block into the loop set!");
1833 assert(L.contains(InnerL) &&
1834 "Should not reach a loop *outside* this loop!");
1837 auto *InnerPH = InnerL->getLoopPreheader();
1838 assert(L.contains(InnerPH) &&
"Cannot contain an inner loop block "
1839 "but not contain the inner loop "
1841 if (!LoopBlockSet.
insert(InnerPH).second)
1851 for (
auto *InnerBB : InnerL->blocks()) {
1852 if (InnerBB == BB) {
1854 "Block should already be in the set!");
1858 LoopBlockSet.
insert(InnerBB);
1870 if (L.contains(Pred) && LoopBlockSet.
insert(Pred).second)
1874 assert(LoopBlockSet.
count(Header) &&
"Cannot fail to add the header!");
1878 return LoopBlockSet;
1899 auto *PH = L.getLoopPreheader();
1903 Loop *ParentL =
nullptr;
1907 for (
auto *ExitBB : ExitBlocks)
1911 if (!ParentL || (ParentL != ExitL && ParentL->
contains(ExitL)))
1923 if (!LoopBlockSet.empty() && L.getParentLoop() != ParentL) {
1925 for (
Loop *IL = L.getParentLoop(); IL != ParentL;
1927 IL->getBlocksSet().erase(PH);
1928 for (
auto *BB : L.blocks())
1929 IL->getBlocksSet().erase(BB);
1931 return BB == PH || L.contains(BB);
1936 L.getParentLoop()->removeChildLoop(&L);
1944 auto &
Blocks = L.getBlocksVector();
1946 LoopBlockSet.empty()
1948 : std::stable_partition(
1950 [&](
BasicBlock *BB) { return LoopBlockSet.count(BB); });
1954 if (LoopBlockSet.empty())
1955 UnloopedBlocks.
insert(PH);
1959 L.getBlocksSet().erase(BB);
1970 Loop *PrevExitL = L.getParentLoop();
1972 auto RemoveUnloopedBlocksFromLoop =
1974 for (
auto *BB : UnloopedBlocks)
1975 L.getBlocksSet().erase(BB);
1977 return UnloopedBlocks.count(BB);
1982 while (!UnloopedBlocks.
empty() && !ExitsInLoops.
empty()) {
1983 assert(Worklist.
empty() &&
"Didn't clear worklist!");
1984 assert(NewExitLoopBlocks.empty() &&
"Didn't clear loop set!");
1989 assert(ExitL.
contains(&L) &&
"Exit loop must contain the inner loop!");
1995 for (; PrevExitL != &ExitL; PrevExitL = PrevExitL->
getParentLoop())
1996 RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
2010 if (!UnloopedBlocks.
erase(PredBB)) {
2011 assert((NewExitLoopBlocks.count(PredBB) ||
2013 "Predecessor not in a nested loop (or already visited)!");
2020 bool Inserted = NewExitLoopBlocks.insert(PredBB).second;
2022 assert(Inserted &&
"Should only visit an unlooped block once!");
2027 }
while (!Worklist.
empty());
2032 for (
auto *BB : NewExitLoopBlocks)
2034 if (BBL == &L || !L.contains(BBL))
2039 NewExitLoopBlocks.clear();
2045 RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
2046 for (
auto *BB : UnloopedBlocks)
2048 if (BBL == &L || !L.contains(BBL))
2054 auto &SubLoops = L.getSubLoopsVector();
2055 auto SubLoopsSplitI =
2056 LoopBlockSet.empty()
2058 : std::stable_partition(
2059 SubLoops.begin(), SubLoops.end(), [&](
Loop *SubL) {
2060 return LoopBlockSet.count(SubL->getHeader());
2062 for (
auto *HoistedL :
make_range(SubLoopsSplitI, SubLoops.end())) {
2064 HoistedL->setParentLoop(
nullptr);
2074 if (
auto *NewParentL = LI.
getLoopFor(HoistedL->getLoopPreheader()))
2075 NewParentL->addChildLoop(HoistedL);
2079 SubLoops.erase(SubLoopsSplitI, SubLoops.end());
2083 assert(SubLoops.empty() &&
2084 "Failed to remove all subloops from the original loop!");
2085 if (
Loop *ParentL = L.getParentLoop())
2103template <
typename CallableT>
2115 if (!Callable(
N->getBlock()))
2121 "Cannot visit a node twice when walking a tree!");
2124 }
while (!DomWorklist.
empty());
2128 bool CurrentLoopValid,
bool PartiallyInvariant,
2131 if (!NewLoops.
empty())
2132 U.addSiblingLoops(NewLoops);
2136 if (CurrentLoopValid) {
2137 if (PartiallyInvariant) {
2140 auto &
Context = L.getHeader()->getContext();
2145 Context, L.getLoopID(), {
"llvm.loop.unswitch.partial"},
2146 {DisableUnswitchMD});
2147 L.setLoopID(NewLoopID);
2148 }
else if (InjectedCondition) {
2150 auto &
Context = L.getHeader()->getContext();
2155 Context, L.getLoopID(), {
"llvm.loop.unswitch.injection"},
2156 {DisableUnswitchMD});
2157 L.setLoopID(NewLoopID);
2159 U.revisitCurrentLoop();
2161 U.markLoopAsDeleted(L, LoopName);
2168 LPMUpdater &LoopUpdater,
bool InsertFreeze,
bool InjectedCondition) {
2171 SwitchInst *SI = BI ? nullptr : cast<SwitchInst>(&TI);
2175 std::string LoopName(L.getName());
2181 "Can only unswitch switches and conditional branch!");
2185 !PartiallyInvariant);
2188 "Cannot have other invariants with full unswitching!");
2191 "Partial unswitching requires an instruction as the condition!");
2204 if (!FullUnswitch) {
2208 PartiallyInvariant) &&
2209 "Only `or`, `and`, an `select`, partially invariant instructions "
2210 "can combine invariants being unswitched.");
2221 BI ? BI->
getSuccessor(1 - ClonedSucc) : SI->getDefaultDest();
2226 for (
auto Case : SI->cases())
2227 if (Case.getCaseSuccessor() != RetainedSuccBB)
2228 UnswitchedSuccBBs.
insert(Case.getCaseSuccessor());
2230 assert(!UnswitchedSuccBBs.
count(RetainedSuccBB) &&
2231 "Should not unswitch the same successor we are retaining!");
2240 Loop *ParentL = L.getParentLoop();
2249 Loop *OuterExitL = &L;
2251 L.getUniqueExitBlocks(ExitBlocks);
2252 for (
auto *ExitBB : ExitBlocks) {
2256 if (!NewOuterExitL) {
2258 OuterExitL =
nullptr;
2261 if (NewOuterExitL != OuterExitL && NewOuterExitL->
contains(OuterExitL))
2262 OuterExitL = NewOuterExitL;
2282 for (
auto *SuccBB : llvm::concat<BasicBlock *const>(
ArrayRef(RetainedSuccBB),
2284 if (SuccBB->getUniquePredecessor() ||
2286 return PredBB == ParentBB || DT.
dominates(SuccBB, PredBB);
2289 DominatingSucc[BB] = SuccBB;
2308 for (
auto *SuccBB : UnswitchedSuccBBs) {
2311 L, LoopPH, SplitBB, ExitBlocks, ParentBB, SuccBB, RetainedSuccBB,
2312 DominatingSucc, *VMaps.
back(), DTUpdates, AC, DT, LI, MSSAU, SE);
2317 if (TI.
getMetadata(LLVMContext::MD_make_implicit)) {
2321 TI.
setMetadata(LLVMContext::MD_make_implicit,
nullptr);
2328 TI.
setMetadata(LLVMContext::MD_make_implicit,
nullptr);
2335 SplitBB->getTerminator()->eraseFromParent();
2343 NewTI->
insertInto(ParentBB, ParentBB->end());
2354 DTUpdates.
push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2356 assert(SI &&
"Must either be a branch or switch!");
2359 assert(SI->getDefaultDest() == RetainedSuccBB &&
2360 "Not retaining default successor!");
2361 SI->setDefaultDest(LoopPH);
2362 for (
const auto &Case : SI->cases())
2363 if (Case.getCaseSuccessor() == RetainedSuccBB)
2364 Case.setSuccessor(LoopPH);
2366 Case.setSuccessor(ClonedPHs.
find(Case.getCaseSuccessor())->second);
2369 SI->setCondition(
new FreezeInst(SI->getCondition(),
2370 SI->getCondition()->getName() +
".fr",
2371 SI->getIterator()));
2378 {DominatorTree::Insert, SplitBB, ClonedPHs.
find(SuccBB)->second});
2392 for (
auto &VMap : VMaps)
2408 "Only one possible unswitched block for a branch!");
2412 DTUpdates.
push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB});
2422 "Not retaining default successor!");
2423 for (
const auto &Case : NewSI->
cases())
2424 Case.getCaseSuccessor()->removePredecessor(
2432 DTUpdates.
push_back({DominatorTree::Delete, ParentBB, SuccBB});
2436 ParentBB->getTerminator()->eraseFromParent();
2442 assert(BI &&
"Only branches have partial unswitching.");
2444 "Only one possible unswitched block for a branch!");
2448 if (PartiallyInvariant)
2450 *SplitBB, Invariants,
Direction, *ClonedPH, *LoopPH, L, MSSAU);
2453 *SplitBB, Invariants,
Direction, *ClonedPH, *LoopPH,
2456 DTUpdates.
push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2463 for (
auto &VMap : VMaps)
2483 for (std::unique_ptr<ValueToValueMapTy> &VMap : VMaps)
2506 assert(DT.
verify(DominatorTree::VerificationLevel::Fast));
2508 if (BI && !PartiallyInvariant) {
2514 "Only one possible unswitched block for a branch!");
2526 bool ReplaceUnswitched =
2527 FullUnswitch || (Invariants.
size() == 1) || PartiallyInvariant;
2535 for (
Value *Invariant : Invariants) {
2536 assert(!isa<Constant>(Invariant) &&
2537 "Should not be replacing constant values!");
2540 Instruction *UserI = dyn_cast<Instruction>(U.getUser());
2547 U.set(ContinueReplacement);
2548 else if (ReplaceUnswitched &&
2550 U.set(UnswitchedReplacement);
2567 auto UpdateLoop = [&](
Loop &UpdateL) {
2569 UpdateL.verifyLoop();
2570 for (
Loop *ChildL : UpdateL) {
2571 ChildL->verifyLoop();
2572 assert(ChildL->isRecursivelyLCSSAForm(DT, LI) &&
2573 "Perturbed a child loop's LCSSA form!");
2593 for (
Loop *UpdatedL :
2594 llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops)) {
2595 UpdateLoop(*UpdatedL);
2596 if (UpdatedL->isOutermost())
2597 OuterExitL =
nullptr;
2601 if (L.isOutermost())
2602 OuterExitL =
nullptr;
2607 if (OuterExitL != &L)
2608 for (
Loop *OuterL = ParentL; OuterL != OuterExitL;
2610 UpdateLoop(*OuterL);
2622 for (
Loop *UpdatedL : llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops))
2623 if (UpdatedL->getParentLoop() == ParentL)
2625 postUnswitch(L, LoopUpdater, LoopName, IsStillLoop, PartiallyInvariant,
2626 InjectedCondition, SibLoops);
2649 auto BBCostIt = BBCostMap.
find(
N.getBlock());
2650 if (BBCostIt == BBCostMap.
end())
2654 auto DTCostIt = DTCostMap.
find(&
N);
2655 if (DTCostIt != DTCostMap.
end())
2656 return DTCostIt->second;
2661 N.begin(),
N.end(), BBCostIt->second,
2663 return Sum + computeDomSubtreeCost(*ChildN, BBCostMap, DTCostMap);
2665 bool Inserted = DTCostMap.
insert({&
N,
Cost}).second;
2667 assert(Inserted &&
"Should not insert a node while visiting children!");
2702 SI->getMetadata(LLVMContext::MD_prof), &DTU, &LI);
2704 BasicBlock *ThenBB = CondBr->getSuccessor(0),
2705 *TailBB = CondBr->getSuccessor(1);
2710 PHINode::Create(SI->getType(), 2,
"unswitched.select", SI->getIterator());
2711 Phi->addIncoming(SI->getTrueValue(), ThenBB);
2712 Phi->addIncoming(SI->getFalseValue(), HeadBB);
2713 SI->replaceAllUsesWith(Phi);
2714 SI->eraseFromParent();
2757 GI->
getMetadata(LLVMContext::MD_prof), &DTU, &LI);
2764 GuardedBlock->
setName(
"guarded");
2815 return L.contains(SuccBB);
2817 NumCostMultiplierSkipped++;
2821 auto *ParentL = L.getParentLoop();
2822 int SiblingsCount = (ParentL ? ParentL->getSubLoopsVector().size()
2823 : std::distance(LI.
begin(), LI.
end()));
2827 int UnswitchedClones = 0;
2828 for (
const auto &Candidate : UnswitchCandidates) {
2831 bool SkipExitingSuccessors = DT.
dominates(CondBlock, Latch);
2832 if (isa<SelectInst>(CI)) {
2837 if (!SkipExitingSuccessors)
2841 int NonExitingSuccessors =
2843 [SkipExitingSuccessors, &L](
const BasicBlock *SuccBB) {
2844 return !SkipExitingSuccessors || L.contains(SuccBB);
2846 UnswitchedClones +=
Log2_32(NonExitingSuccessors);
2854 unsigned ClonesPower =
2858 int SiblingsMultiplier =
2859 std::max((ParentL ? SiblingsCount
2869 CostMultiplier = std::min(SiblingsMultiplier * (1 << ClonesPower),
2873 <<
" (siblings " << SiblingsMultiplier <<
" * clones "
2874 << (1 << ClonesPower) <<
")"
2875 <<
" for unswitch candidate: " << TI <<
"\n");
2876 return CostMultiplier;
2884 assert(UnswitchCandidates.
empty() &&
"Should be!");
2888 if (isa<Constant>(
Cond))
2890 if (L.isLoopInvariant(
Cond)) {
2898 if (!Invariants.
empty())
2899 UnswitchCandidates.
push_back({
I, std::move(Invariants)});
2904 bool CollectGuards =
false;
2906 auto *GuardDecl = L.getHeader()->getParent()->getParent()->getFunction(
2908 if (GuardDecl && !GuardDecl->use_empty())
2909 CollectGuards =
true;
2912 for (
auto *BB : L.blocks()) {
2916 for (
auto &
I : *BB) {
2917 if (
auto *SI = dyn_cast<SelectInst>(&
I)) {
2918 auto *
Cond = SI->getCondition();
2920 if (
Cond->getType()->isIntegerTy(1) && !SI->getType()->isIntegerTy(1))
2921 AddUnswitchCandidatesForInst(SI,
Cond);
2922 }
else if (CollectGuards &&
isGuard(&
I)) {
2926 if (!isa<Constant>(
Cond) && L.isLoopInvariant(
Cond))
2931 if (
auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
2934 if (!isa<Constant>(SI->getCondition()) &&
2935 L.isLoopInvariant(SI->getCondition()) && !BB->getUniqueSuccessor())
2936 UnswitchCandidates.
push_back({SI, {SI->getCondition()}});
2940 auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
2941 if (!BI || !BI->isConditional() ||
2942 BI->getSuccessor(0) == BI->getSuccessor(1))
2945 AddUnswitchCandidatesForInst(BI, BI->getCondition());
2949 !
any_of(UnswitchCandidates, [&L](
auto &TerminatorAndInvariants) {
2950 return TerminatorAndInvariants.TI == L.getHeader()->getTerminator();
2955 dbgs() <<
"simple-loop-unswitch: Found partially invariant condition "
2956 << *
Info->InstToDuplicate[0] <<
"\n");
2957 PartialIVInfo = *
Info;
2958 PartialIVCondBranch = L.getHeader()->getTerminator();
2962 {L.getHeader()->getTerminator(), std::move(ValsToDuplicate)});
2965 return !UnswitchCandidates.
empty();
2978 if (!L.contains(IfTrue)) {
2979 Pred = ICmpInst::getInversePredicate(Pred);
2984 if (L.isLoopInvariant(
LHS)) {
2985 Pred = ICmpInst::getSwappedPredicate(Pred);
2991 Pred = ICmpInst::ICMP_ULT;
2992 RHS = ConstantInt::get(
3004 if (L.isLoopInvariant(
LHS) || !L.isLoopInvariant(
RHS))
3007 if (Pred != ICmpInst::ICMP_ULT)
3010 if (!L.contains(IfTrue) || L.contains(IfFalse))
3014 if (L.getHeader() == IfTrue)
3031 assert(Weights.
size() == 2 &&
"Unexpected profile data!");
3033 auto Num = Weights[
Idx];
3034 auto Denom = Weights[0] + Weights[1];
3036 if (Denom == 0 || Num > Denom)
3039 if (LikelyTaken > ActualTaken)
3062static NonTrivialUnswitchCandidate
3066 assert(Candidate.hasPendingInjection() &&
"Nothing to inject!");
3067 BasicBlock *Preheader = L.getLoopPreheader();
3068 assert(Preheader &&
"Loop is not in simplified form?");
3070 "Unswitching branch of inner loop!");
3072 auto Pred = Candidate.PendingInjection->Pred;
3073 auto *
LHS = Candidate.PendingInjection->LHS;
3074 auto *
RHS = Candidate.PendingInjection->RHS;
3075 auto *InLoopSucc = Candidate.PendingInjection->InLoopSucc;
3076 auto *TI = cast<BranchInst>(Candidate.TI);
3077 auto *BB = Candidate.TI->getParent();
3078 auto *OutOfLoopSucc = InLoopSucc == TI->getSuccessor(0) ? TI->getSuccessor(1)
3079 : TI->getSuccessor(0);
3081 assert(L.contains(InLoopSucc) &&
"Not supported yet!");
3082 assert(!L.contains(OutOfLoopSucc) &&
"Not supported yet!");
3083 auto &Ctx = BB->getContext();
3086 assert(ICmpInst::isUnsigned(Pred) &&
"Not supported yet!");
3096 auto *InjectedCond =
3097 ICmpInst::Create(Instruction::ICmp, Pred,
LHS,
RHS,
"injected.cond",
3101 BB->getParent(), InLoopSucc);
3104 Builder.
CreateCondBr(InjectedCond, InLoopSucc, CheckBlock);
3107 Builder.
CreateCondBr(TI->getCondition(), TI->getSuccessor(0),
3108 TI->getSuccessor(1));
3112 for (
auto &
I : *InLoopSucc) {
3113 auto *PN = dyn_cast<PHINode>(&
I);
3116 auto *Inc = PN->getIncomingValueForBlock(BB);
3117 PN->addIncoming(Inc, CheckBlock);
3119 OutOfLoopSucc->replacePhiUsesWith(BB, CheckBlock);
3122 { DominatorTree::Insert, BB, CheckBlock },
3123 { DominatorTree::Insert, CheckBlock, InLoopSucc },
3124 { DominatorTree::Insert, CheckBlock, OutOfLoopSucc },
3125 { DominatorTree::Delete, BB, OutOfLoopSucc }
3131 L.addBasicBlockToLoop(CheckBlock, LI);
3143 LLVM_DEBUG(
dbgs() <<
"Injected a new loop-invariant branch " << *InvariantBr
3144 <<
" and considering it for unswitching.");
3145 ++NumInvariantConditionsInjected;
3146 return NonTrivialUnswitchCandidate(InvariantBr, { InjectedCond },
3167 assert(ICmpInst::isStrictPredicate(Pred));
3168 if (Compares.
size() < 2)
3171 for (
auto Prev = Compares.
begin(), Next = Compares.
begin() + 1;
3172 Next != Compares.
end(); ++Prev, ++Next) {
3176 InjectedInvariant ToInject(NonStrictPred,
LHS,
RHS, InLoopSucc);
3177 NonTrivialUnswitchCandidate Candidate(Prev->Term, { LHS, RHS },
3178 std::nullopt, std::move(ToInject));
3179 UnswitchCandidates.
push_back(std::move(Candidate));
3209 auto *Latch = L.getLoopLatch();
3213 assert(L.getLoopPreheader() &&
"Must have a preheader!");
3218 for (
auto *DTN = DT.
getNode(Latch); L.contains(DTN->getBlock());
3219 DTN = DTN->getIDom()) {
3222 BasicBlock *IfTrue =
nullptr, *IfFalse =
nullptr;
3223 auto *BB = DTN->getBlock();
3227 auto *Term = BB->getTerminator();
3241 CompareDesc
Desc(cast<BranchInst>(Term),
RHS, IfTrue);
3242 while (
auto *Zext = dyn_cast<ZExtInst>(
LHS))
3243 LHS = Zext->getOperand(0);
3244 CandidatesULT[
LHS].push_back(
Desc);
3248 for (
auto &It : CandidatesULT)
3250 UnswitchCandidates, L, ICmpInst::ICMP_ULT, It.second, DT);
3255 if (!L.isSafeToClone())
3257 for (
auto *BB : L.blocks())
3258 for (
auto &
I : *BB) {
3259 if (
I.getType()->isTokenTy() &&
I.isUsedOutsideOfBlock(BB))
3261 if (
auto *CB = dyn_cast<CallBase>(&
I)) {
3262 assert(!CB->cannotDuplicate() &&
"Checked by L.isSafeToClone().");
3263 if (CB->isConvergent())
3276 if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
3280 L.getUniqueExitBlocks(ExitBlocks);
3285 for (
auto *ExitBB : ExitBlocks) {
3286 auto *
I = ExitBB->getFirstNonPHI();
3287 if (isa<CleanupPadInst>(
I) || isa<CatchSwitchInst>(
I)) {
3288 LLVM_DEBUG(
dbgs() <<
"Cannot unswitch because of cleanuppad/catchswitch "
3316 L.getHeader()->getParent()->hasMinSize()
3320 for (
auto *BB : L.blocks()) {
3322 for (
auto &
I : *BB) {
3327 assert(
Cost >= 0 &&
"Must not have negative costs!");
3329 assert(LoopCost >= 0 &&
"Must not have negative loop costs!");
3330 BBCostMap[BB] =
Cost;
3354 if (isa<SelectInst>(TI))
3363 if (!Visited.
insert(SuccBB).second)
3371 if (!FullUnswitch) {
3372 auto &BI = cast<BranchInst>(TI);
3375 if (SuccBB == BI.getSuccessor(1))
3378 if (SuccBB == BI.getSuccessor(0))
3381 SuccBB == BI.getSuccessor(0)) ||
3383 SuccBB == BI.getSuccessor(1)))
3391 if (SuccBB->getUniquePredecessor() ||
3393 return PredBB == &BB || DT.
dominates(SuccBB, PredBB);
3397 "Non-duplicated cost should never exceed total loop cost!");
3406 int SuccessorsCount =
isGuard(&TI) ? 2 : Visited.
size();
3407 assert(SuccessorsCount > 1 &&
3408 "Cannot unswitch a condition without multiple distinct successors!");
3409 return (LoopCost -
Cost) * (SuccessorsCount - 1);
3412 std::optional<NonTrivialUnswitchCandidate> Best;
3413 for (
auto &Candidate : UnswitchCandidates) {
3418 !BI || Candidate.hasPendingInjection() ||
3419 (Invariants.
size() == 1 &&
3421 InstructionCost CandidateCost = ComputeUnswitchedCost(TI, FullUnswitch);
3425 int CostMultiplier =
3429 "cost multiplier needs to be in the range of 1..UnswitchThreshold");
3430 CandidateCost *= CostMultiplier;
3432 <<
" (multiplier: " << CostMultiplier <<
")"
3433 <<
" for unswitch candidate: " << TI <<
"\n");
3436 <<
" for unswitch candidate: " << TI <<
"\n");
3439 if (!Best || CandidateCost < Best->
Cost) {
3441 Best->Cost = CandidateCost;
3444 assert(Best &&
"Must be!");
3456 assert(isa<BranchInst>(TI) || isa<SwitchInst>(TI));
3466 if (
BranchInst *BI = dyn_cast<BranchInst>(&TI))
3471 Cond, &AC, L.getLoopPreheader()->getTerminator(), &DT);
3485 PartialIVCondBranch, L, LI, AA, MSSAU);
3488 PartialIVCondBranch, L, DT, LI, AA,
3491 if (UnswitchCandidates.
empty())
3495 dbgs() <<
"Considering " << UnswitchCandidates.
size()
3496 <<
" non-trivial loop invariant conditions for unswitching.\n");
3499 UnswitchCandidates, L, DT, LI, AC,
TTI, PartialIVInfo);
3501 assert(Best.TI &&
"Failed to find loop unswitch candidate");
3502 assert(Best.Cost &&
"Failed to compute cost");
3505 LLVM_DEBUG(
dbgs() <<
"Cannot unswitch, lowest cost found: " << *Best.Cost
3510 bool InjectedCondition =
false;
3511 if (Best.hasPendingInjection()) {
3513 InjectedCondition =
true;
3515 assert(!Best.hasPendingInjection() &&
3516 "All injections should have been done by now!");
3518 if (Best.TI != PartialIVCondBranch)
3522 if (
auto *SI = dyn_cast<SelectInst>(Best.TI)) {
3528 SI->getCondition(), &AC, L.getLoopPreheader()->getTerminator(), &DT);
3538 LLVM_DEBUG(
dbgs() <<
" Unswitching non-trivial (cost = " << Best.Cost
3539 <<
") terminator: " << *Best.TI <<
"\n");
3541 LI, AC, SE, MSSAU, LoopUpdater, InsertFreeze,
3573 assert(L.isRecursivelyLCSSAForm(DT, LI) &&
3574 "Loops must be in LCSSA form before unswitching.");
3577 if (!L.isLoopSimplifyForm())
3590 const Function *
F = L.getHeader()->getParent();
3603 bool ContinueWithNonTrivial =
3605 if (!ContinueWithNonTrivial)
3609 if (
F->hasOptSize())
3614 auto IsLoopNestCold = [&](
const Loop *L) {
3620 Parent = Parent->getParentLoop();
3624 Worklist.
insert(Worklist.
end(), L->getSubLoops().begin(),
3625 L->getSubLoops().end());
3626 while (!Worklist.
empty()) {
3630 Worklist.
insert(Worklist.
end(), CurLoop->getSubLoops().begin(),
3631 CurLoop->getSubLoops().end());
3665 Function &
F = *L.getHeader()->getParent();
3668 if (
auto OuterProxy =
3672 LLVM_DEBUG(
dbgs() <<
"Unswitching loop in " <<
F.getName() <<
": " << L
3675 std::optional<MemorySSAUpdater> MSSAU;
3682 &AR.
SE, MSSAU ? &*MSSAU :
nullptr, PSI, AR.
BFI, U))
3701 OS, MapClassName2PassName);
3704 OS << (NonTrivial ?
"" :
"no-") <<
"nontrivial;";
3705 OS << (Trivial ?
"" :
"no-") <<
"trivial";
Analysis containing CSE Info
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
DenseMap< Block *, BlockRelaxAux > Blocks
This file defines a set of templates that efficiently compute a dominator tree over a generic graph.
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
This header provides classes for managing per-loop analyses.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Provides some synthesis utilities to produce sequences of values.
This file implements a set that has insertion order iteration characteristics.
static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH)
Rewrite the PHI nodes in an unswitched loop exit basic block.
static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, bool Trivial, bool NonTrivial, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, LPMUpdater &LoopUpdater)
Unswitch control flow predicated on loop invariant conditions.
static void canonicalizeForInvariantConditionInjection(ICmpInst::Predicate &Pred, Value *&LHS, Value *&RHS, BasicBlock *&IfTrue, BasicBlock *&IfFalse, const Loop &L)
Tries to canonicalize condition described by:
static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
This routine scans the loop to find a branch or switch which occurs before any side effects occur.
static cl::opt< bool > EnableNonTrivialUnswitch("enable-nontrivial-unswitch", cl::init(false), cl::Hidden, cl::desc("Forcibly enables non-trivial loop unswitching rather than " "following the configuration passed into the pass."))
static cl::opt< bool > UnswitchGuards("simple-loop-unswitch-guards", cl::init(true), cl::Hidden, cl::desc("If enabled, simple loop unswitching will also consider " "llvm.experimental.guard intrinsics as unswitch candidates."))
static SmallPtrSet< const BasicBlock *, 16 > recomputeLoopBlockSet(Loop &L, LoopInfo &LI)
Recompute the set of blocks in a loop after unswitching.
static int CalculateUnswitchCostMultiplier(const Instruction &TI, const Loop &L, const LoopInfo &LI, const DominatorTree &DT, ArrayRef< NonTrivialUnswitchCandidate > UnswitchCandidates)
Cost multiplier is a way to limit potentially exponential behavior of loop-unswitch.
static void buildPartialInvariantUnswitchConditionalBranch(BasicBlock &BB, ArrayRef< Value * > ToDuplicate, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L, MemorySSAUpdater *MSSAU)
Copy a set of loop invariant values, and conditionally branch on them.
static TinyPtrVector< Value * > collectHomogenousInstGraphLoopInvariants(const Loop &L, Instruction &Root, const LoopInfo &LI)
Collect all of the loop invariant input values transitively used by the homogeneous instruction graph...
static void deleteDeadClonedBlocks(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, ArrayRef< std::unique_ptr< ValueToValueMapTy > > VMaps, DominatorTree &DT, MemorySSAUpdater *MSSAU)
void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable)
Helper to visit a dominator subtree, invoking a callable on each node.
static BranchInst * turnSelectIntoBranch(SelectInst *SI, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, AssumptionCache *AC)
Turns a select instruction into implicit control flow branch, making the following replacement:
static bool isSafeForNoNTrivialUnswitching(Loop &L, LoopInfo &LI)
void postUnswitch(Loop &L, LPMUpdater &U, StringRef LoopName, bool CurrentLoopValid, bool PartiallyInvariant, bool InjectedCondition, ArrayRef< Loop * > NewLoops)
static void buildPartialUnswitchConditionalBranch(BasicBlock &BB, ArrayRef< Value * > Invariants, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze, const Instruction *I, AssumptionCache *AC, const DominatorTree &DT)
Copy a set of loop invariant values ToDuplicate and insert them at the end of BB and conditionally br...
static cl::opt< int > UnswitchNumInitialUnscaledCandidates("unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden, cl::desc("Number of unswitch candidates that are ignored when calculating " "cost multiplier."))
static bool shouldTryInjectInvariantCondition(const ICmpInst::Predicate Pred, const Value *LHS, const Value *RHS, const BasicBlock *IfTrue, const BasicBlock *IfFalse, const Loop &L)
Returns true, if predicate described by ( Pred, LHS, RHS ) succeeding into blocks ( IfTrue,...
static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate(ArrayRef< NonTrivialUnswitchCandidate > UnswitchCandidates, const Loop &L, const DominatorTree &DT, const LoopInfo &LI, AssumptionCache &AC, const TargetTransformInfo &TTI, const IVConditionInfo &PartialIVInfo)
static cl::opt< bool > EnableUnswitchCostMultiplier("enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden, cl::desc("Enable unswitch cost multiplier that prohibits exponential " "explosion in nontrivial unswitch."))
static Value * skipTrivialSelect(Value *Cond)
static Loop * getTopMostExitingLoop(const BasicBlock *ExitBB, const LoopInfo &LI)
static bool collectUnswitchCandidatesWithInjections(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, Loop &L, const DominatorTree &DT, const LoopInfo &LI, AAResults &AA, const MemorySSAUpdater *MSSAU)
Collect unswitch candidates by invariant conditions that are not immediately present in the loop.
static cl::opt< int > UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden, cl::desc("The cost threshold for unswitching a loop."))
static void replaceLoopInvariantUses(const Loop &L, Value *Invariant, Constant &Replacement)
static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch a trivial branch if the condition is loop invariant.
static bool collectUnswitchCandidates(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, const Loop &L, const LoopInfo &LI, AAResults &AA, const MemorySSAUpdater *MSSAU)
static cl::opt< bool > InjectInvariantConditions("simple-loop-unswitch-inject-invariant-conditions", cl::Hidden, cl::desc("Whether we should inject new invariants and unswitch them to " "eliminate some existing (non-invariant) conditions."), cl::init(true))
static cl::opt< bool > FreezeLoopUnswitchCond("freeze-loop-unswitch-cond", cl::init(true), cl::Hidden, cl::desc("If enabled, the freeze instruction will be added to condition " "of loop unswitch to prevent miscompilation."))
static InstructionCost computeDomSubtreeCost(DomTreeNode &N, const SmallDenseMap< BasicBlock *, InstructionCost, 4 > &BBCostMap, SmallDenseMap< DomTreeNode *, InstructionCost, 4 > &DTCostMap)
Recursively compute the cost of a dominator subtree based on the per-block cost map provided.
static bool shouldInsertFreeze(Loop &L, Instruction &TI, DominatorTree &DT, AssumptionCache &AC)
static cl::opt< int > UnswitchSiblingsToplevelDiv("unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden, cl::desc("Toplevel siblings divisor for cost multiplier."))
static cl::opt< unsigned > MSSAThreshold("simple-loop-unswitch-memoryssa-threshold", cl::desc("Max number of memory uses to explore during " "partial unswitching analysis"), cl::init(100), cl::Hidden)
static bool areLoopExitPHIsLoopInvariant(const Loop &L, const BasicBlock &ExitingBB, const BasicBlock &ExitBB)
Check that all the LCSSA PHI nodes in the loop exit block have trivial incoming values along this edg...
static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB, BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH, bool FullUnswitch)
Rewrite the PHI nodes in the loop exit basic block and the split off unswitched block.
static bool insertCandidatesWithPendingInjections(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, Loop &L, ICmpInst::Predicate Pred, ArrayRef< CompareDesc > Compares, const DominatorTree &DT)
Given chain of loop branch conditions looking like: br (Variant < Invariant1) br (Variant < Invariant...
static NonTrivialUnswitchCandidate injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, MemorySSAUpdater *MSSAU)
Materialize pending invariant condition of the given candidate into IR.
static cl::opt< bool > DropNonTrivialImplicitNullChecks("simple-loop-unswitch-drop-non-trivial-implicit-null-checks", cl::init(false), cl::Hidden, cl::desc("If enabled, drop make.implicit metadata in unswitched implicit " "null checks to save time analyzing if we can keep it."))
static cl::opt< unsigned > InjectInvariantConditionHotnesThreshold("simple-loop-unswitch-inject-invariant-condition-hotness-threshold", cl::Hidden, cl::desc("Only try to inject loop invariant conditions and " "unswitch on them to eliminate branches that are " "not-taken 1/<this option> times or less."), cl::init(16))
static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch a trivial switch if the condition is loop invariant.
static void unswitchNontrivialInvariants(Loop &L, Instruction &TI, ArrayRef< Value * > Invariants, IVConditionInfo &PartialIVInfo, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, LPMUpdater &LoopUpdater, bool InsertFreeze, bool InjectedCondition)
static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, LoopInfo &LI, SmallVectorImpl< Loop * > &HoistedLoops, ScalarEvolution *SE)
Rebuild a loop after unswitching removes some subset of blocks and edges.
static bool unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, LPMUpdater &LoopUpdater)
static BasicBlock * buildClonedLoopBlocks(Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB, ArrayRef< BasicBlock * > ExitBlocks, BasicBlock *ParentBB, BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB, const SmallDenseMap< BasicBlock *, BasicBlock *, 16 > &DominatingSucc, ValueToValueMapTy &VMap, SmallVectorImpl< DominatorTree::UpdateType > &DTUpdates, AssumptionCache &AC, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE)
Build the cloned blocks for an unswitched copy of the given loop.
static void deleteDeadBlocksFromLoop(Loop &L, SmallVectorImpl< BasicBlock * > &ExitBlocks, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE, LPMUpdater &LoopUpdater)
bool shouldTryInjectBasingOnMetadata(const BranchInst *BI, const BasicBlock *TakenSucc)
Returns true, if metadata on BI allows us to optimize branching into TakenSucc via injection of invar...
static BranchInst * turnGuardIntoBranch(IntrinsicInst *GI, Loop &L, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU)
Turns a llvm.experimental.guard intrinsic into implicit control flow branch, making the following rep...
static Loop * cloneLoopNest(Loop &OrigRootL, Loop *RootParentL, const ValueToValueMapTy &VMap, LoopInfo &LI)
Recursively clone the specified loop and all of its children.
static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE)
Hoist the current loop up to the innermost loop containing a remaining exit.
static void buildClonedLoops(Loop &OrigL, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, LoopInfo &LI, SmallVectorImpl< Loop * > &NonChildClonedLoops)
Build the cloned loops of an original loop from unswitching.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
This defines the Use class.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
A cache of @llvm.assume calls within a function.
void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
InstListType::iterator iterator
Instruction iterators...
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
void setCondition(Value *V)
void swapSuccessors()
Swap the successors of this branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, BasicBlock::iterator InsertBefore)
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Value * getCondition() const
Value * getArgOperand(unsigned i) const
void setArgOperand(unsigned i, Value *v)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
This is the shared class of boolean and integer constants.
static ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getFalse(LLVMContext &Context)
This is an important base class in LLVM.
bool isOneValue() const
Returns true if the value is one.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
void insertEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge insertion and update the tree.
static constexpr UpdateKind Delete
static constexpr UpdateKind Insert
void deleteEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge deletion and update the tree.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
This class represents a freeze function that returns random concrete value if an operand is either a ...
This implementation of LoopSafetyInfo use ImplicitControlFlowTracking to give precise answers on "may...
bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const override
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
void computeLoopSafetyInfo(const Loop *CurLoop) override
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
bool isRelational() const
Return true if the predicate is relational (not EQ or NE).
Value * CreateFreeze(Value *V, const Twine &Name="")
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
const BasicBlock * getParent() const
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
bool isTerminator() const
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
A wrapper class for inspecting calls to intrinsic functions.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
void markLoopAsDeleted(Loop &L, llvm::StringRef Name)
Loop passes should use this method to indicate they have deleted a loop from the nest.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
BlockT * getHeader() const
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
void reserveBlocks(unsigned size)
interface to do reserve() for Blocks
iterator_range< block_iterator > blocks() const
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop.
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
Wrapper class to LoopBlocksDFS that provides a standard begin()/end() interface for the DFS reverse p...
void perform(const LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
void addTopLevelLoop(LoopT *New)
This adds the specified loop to the collection of top-level loops.
LoopT * AllocateLoop(ArgsTy &&...Args)
LoopT * removeLoop(iterator I)
This removes the specified top-level loop from this loop info object.
void changeLoopFor(BlockT *BB, LoopT *L)
Change the top-level loop that contains BB to the specified loop.
unsigned getLoopDepth(const BlockT *BB) const
Return the loop nesting level of the specified block.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
void destroy(LoopT *L)
Destroy a loop that has been removed from the LoopInfo nest.
Represents a single loop in the control flow graph.
StringRef getName() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static MDString * get(LLVMContext &Context, StringRef Str)
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
An analysis that produces MemorySSA for a function.
MemorySSA * getMemorySSA() const
Get handle on MemorySSA.
void removeEdge(BasicBlock *From, BasicBlock *To)
Update the MemoryPhi in To following an edge deletion between From and To.
void updateForClonedLoop(const LoopBlocksRPO &LoopBlocks, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VM, bool IgnoreIncomingWithNoClones=false)
Update MemorySSA after a loop was cloned, given the blocks in RPO order, the exit blocks and a 1:1 ma...
MemoryAccess * createMemoryAccessInBB(Instruction *I, MemoryAccess *Definition, const BasicBlock *BB, MemorySSA::InsertionPlace Point)
Create a MemoryAccess in MemorySSA at a specified point in a block.
void removeDuplicatePhiEdgesBetween(const BasicBlock *From, const BasicBlock *To)
Update the MemoryPhi in To to have a single incoming edge from From, following a CFG change that repl...
void removeBlocks(const SmallSetVector< BasicBlock *, 8 > &DeadBlocks)
Remove all MemoryAcceses in a set of BasicBlocks about to be deleted.
void moveAllAfterSpliceBlocks(BasicBlock *From, BasicBlock *To, Instruction *Start)
From block was spliced into From and To.
void applyInsertUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT)
Apply CFG insert updates, analogous with the DT edge updates.
void applyUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT, bool UpdateDTFirst=false)
Apply CFG updates, analogous with the DT edge updates.
void moveToPlace(MemoryUseOrDef *What, BasicBlock *BB, MemorySSA::InsertionPlace Where)
void updateExitBlocksForClonedLoop(ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, DominatorTree &DT)
Update phi nodes in exit block successors following cloning.
Encapsulates MemorySSA, including all data associated with memory accesses.
void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
const DefsList * getBlockDefs(const BasicBlock *BB) const
Return the list of MemoryDef's and MemoryPhi's for a given basic block.
A Module instance is used to store all the information related to an LLVM module.
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr, BasicBlock::iterator InsertBefore)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool hasProfileSummary() const
Returns true if profile summary is available.
bool isColdBlock(const BBType *BB, BFIT *BFI) const
Returns true if BasicBlock BB is considered cold.
The main scalar evolution driver.
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
void forgetTopmostLoop(const Loop *L)
void forgetValue(Value *V)
This method should be called by the client when it has changed a value in a way that may effect its v...
void forgetBlockAndLoopDispositions(Value *V=nullptr)
Called when the client has changed the disposition of values in a loop or block.
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
iterator begin()
Get an iterator to the beginning of the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
void setSuccessorWeight(unsigned idx, CaseWeightOpt W)
Instruction::InstListType::iterator eraseFromParent()
Delegate the call to the underlying SwitchInst::eraseFromParent() and mark this object to not touch t...
void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W)
Delegate the call to the underlying SwitchInst::addCase() and set the specified branch weight for the...
CaseWeightOpt getSuccessorWeight(unsigned idx)
std::optional< uint32_t > CaseWeightOpt
SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
unsigned getSuccessorIndex() const
Returns successor index for current case successor.
BasicBlockT * getCaseSuccessor() const
Resolves successor for current case.
ConstantIntT * getCaseValue() const
Resolves case value for current case.
static SwitchInst * Create(Value *Value, BasicBlock *Default, unsigned NumCases, BasicBlock::iterator InsertBefore)
BasicBlock * getDefaultDest() const
void setDefaultDest(BasicBlock *DefaultCase)
iterator_range< CaseIt > cases()
Iteration adapter for range-for loops.
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
void push_back(EltTy NewVal)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
unsigned getIntegerBitWidth() const
bool isIntegerTy() const
True if this is an instance of IntegerType.
A Use represents the edge between a Value definition and its users.
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
size_type count(const KeyT &Val) const
Return 1 if the specified key is in the map, 0 otherwise.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
void setName(const Twine &Name)
Change the name of the value.
LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< use_iterator > uses()
StringRef getName() const
Return a constant reference to the value's name.
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
bool match(Val *V, const Pattern &P)
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
class_match< BasicBlock > m_BasicBlock()
Match an arbitrary basic block value and ignore it.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void stable_sort(R &&Range)
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
auto successors(const MachineBasicBlock *BB)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
MDNode * findOptionMDForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for a loop.
void RemapDbgVariableRecordRange(Module *M, iterator_range< DbgRecordIterator > Range, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Remap the Values used in the DbgVariableRecord V using the value map VM.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental....
auto reverse(ContainerTy &&C)
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, DebugInfoFinder *DIFinder=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
detail::zippy< detail::zip_first, T, U, Args... > zip_first(T &&t, U &&u, Args &&...args)
zip iterator that, for the sake of efficiency, assumes the first iteratee to be the shortest.
void sort(IteratorTy Start, IteratorTy End)
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool VerifyLoopInfo
Enable verification of loop info.
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
bool VerifyMemorySSA
Enables verification of MemorySSA.
bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Ensure that all exit blocks of the loop are dedicated exits.
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, llvm::ArrayRef< llvm::StringRef > RemovePrefixes, llvm::ArrayRef< llvm::MDNode * > AddAttrs)
Create a new LoopID after the loop has been transformed.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
auto predecessors(const MachineBasicBlock *BB)
bool pred_empty(const BasicBlock *BB)
Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
std::optional< IVConditionInfo > hasPartialIVCondition(const Loop &L, unsigned MSSAThreshold, const MemorySSA &MSSA, AAResults &AA)
Check if the loop header has a conditional branch that is not loop-invariant, because it involves loa...
bool formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put loop into LCSSA form.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
A special type used by analysis passes to provide an address that identifies that particular analysis...
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Description of the encoding of one expression Op.
Struct to hold information about a partially invariant condition.
SmallVector< Instruction * > InstToDuplicate
Instructions that need to be duplicated and checked for the unswitching condition.
Constant * KnownValue
Constant to indicate for which value the condition is invariant.
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
TargetTransformInfo & TTI
Direction
An enum for the direction of the loop.
A CRTP mix-in to automatically provide informational APIs needed for passes.