llvm.org GIT mirror llvm / b92fd73
[PM/LoopUnswitch] Add partial non-trivial unswitching for invariant conditions feeding a chain of `and`s or `or`s for a branch. Much like with full non-trivial unswitching, we rely on the pass manager to handle iterating until all of the profitable unswitches have been done. This is to allow other more profitable unswitches to fire on any of the cloned, simpler versions of the loop if viable. Threading the partial unswiching through the non-trivial unswitching logic motivated some minor refactorings. If those are too disruptive to make it reasonable to review this patch, I can separate them out, but it'll be somewhat timeconsuming so I wanted to send it for initial review as-is. Feel free to tell me whether it warrants pulling apart. I've tried to re-use (and factor out) logic form the partial trivial unswitching, but not as much could be shared as I had haped. Still, this wasn't as bad as I naively expected. Some basic testing is added, but I probably need more. Suggestions for things you'd like to see tested more than welcome. One thing I'd like to do is add some testing that when we schedule this with loop-instsimplify it effectively cleans up the cruft created. Last but not least, this uncovered a bug that has been in loop cloning the entire time for non-trivial unswitching. Specifically, we didn't correctly add the outer-most cloned loop to the list of cloned loops. This meant that LCSSA wouldn't be updated for it hypothetically, and more significantly that we would never visit it in the loop pass manager. I noticed this while checking loop-instsimplify by hand. I'll try to separate this bugfix out into its own patch with a more focused test. But it is just one line, so shouldn't significantly confuse the review here. After this patch, the only missing "feature" in this unswitch I'm aware of us non-trivial unswitching of switches. I'll try implementing *full* non-trivial unswitching of switches (which is at least a sound thing to implement), but *partial* non-trivial unswitching of switches is something I don't see any sound and principled way to implement. I also have no interesting test cases for the latter, so I'm not really worried. The rest of the things that need to be ported are bug-fixes and more narrow / targeted support for specific issues. Differential Revision: https://reviews.llvm.org/D47522 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@335203 91177308-0d34-0410-b5e6-96231b3b80d8 Chandler Carruth 1 year, 2 months ago
3 changed file(s) with 409 addition(s) and 97 deletion(s). Raw diff Collapse all Expand all
107107 return *this;
108108 }
109109
110 TinyPtrVector(std::initializer_list IL)
111 : Val(IL.size() == 0
112 ? PtrUnion()
113 : IL.size() == 1 ? PtrUnion(*IL.begin())
114 : PtrUnion(new VecTy(IL.begin(), IL.end()))) {}
115
110116 /// Constructor from an ArrayRef.
111117 ///
112118 /// This also is a constructor for individual array elements due to the single
7676 /// which have the exact same opcode and finds all inputs which are loop
7777 /// invariant. For some operations these can be re-associated and unswitched out
7878 /// of the loop entirely.
79 static SmallVector>
79 static TinyPtrVector>
8080 collectHomogenousInstGraphLoopInvariants(Loop &L, Instruction &Root,
8181 LoopInfo &LI) {
82 SmallVector Invariants;
8382 assert(!L.isLoopInvariant(&Root) &&
8483 "Only need to walk the graph if root itself is not invariant.");
84 TinyPtrVector Invariants;
8585
8686 // Build a worklist and recurse through operators collecting invariants.
8787 SmallVector Worklist;
147147 return false;
148148 }
149149 llvm_unreachable("Basic blocks should never be empty!");
150 }
151
152 /// Insert code to test a set of loop invariant values, and conditionally branch
153 /// on them.
154 static void buildPartialUnswitchConditionalBranch(BasicBlock &BB,
155 ArrayRef Invariants,
156 bool Direction,
157 BasicBlock &UnswitchedSucc,
158 BasicBlock &NormalSucc) {
159 IRBuilder<> IRB(&BB);
160 Value *Cond = Invariants.front();
161 for (Value *Invariant :
162 make_range(std::next(Invariants.begin()), Invariants.end()))
163 if (Direction)
164 Cond = IRB.CreateOr(Cond, Invariant);
165 else
166 Cond = IRB.CreateAnd(Cond, Invariant);
167
168 IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
169 Direction ? &NormalSucc : &UnswitchedSucc);
150170 }
151171
152172 /// Rewrite the PHI nodes in an unswitched loop exit basic block.
238258 LLVM_DEBUG(dbgs() << " Trying to unswitch branch: " << BI << "\n");
239259
240260 // The loop invariant values that we want to unswitch.
241 SmallVector> Invariants;
261 TinyPtrVector> Invariants;
242262
243263 // When true, we're fully unswitching the branch rather than just unswitching
244264 // some input conditions to the branch.
335355 } else {
336356 // Only unswitching a subset of inputs to the condition, so we will need to
337357 // build a new branch that merges the invariant inputs.
338 IRBuilder<> IRB(OldPH);
339 Value *Cond = Invariants.front();
340358 if (ExitDirection)
341359 assert(cast(BI.getCondition())->getOpcode() ==
342360 Instruction::Or &&
345363 assert(cast(BI.getCondition())->getOpcode() ==
346364 Instruction::And &&
347365 "Must have an `and` of `i1`s for the condition!");
348 for (Value *Invariant :
349 make_range(std::next(Invariants.begin()), Invariants.end()))
350 if (ExitDirection)
351 Cond = IRB.CreateOr(Cond, Invariant);
352 else
353 Cond = IRB.CreateAnd(Cond, Invariant);
354
355 BasicBlock *Succs[2];
356 Succs[LoopExitSuccIdx] = UnswitchedBB;
357 Succs[1 - LoopExitSuccIdx] = NewPH;
358 IRB.CreateCondBr(Cond, Succs[0], Succs[1]);
366 buildPartialUnswitchConditionalBranch(*OldPH, Invariants, ExitDirection,
367 *UnswitchedBB, *NewPH);
359368 }
360369
361370 // Rewrite the relevant PHI nodes.
15831592 /// Once unswitching has been performed it runs the provided callback to report
15841593 /// the new loops and no-longer valid loops to the caller.
15851594 static bool unswitchInvariantBranch(
1586 Loop &L, BranchInst &BI, DominatorTree &DT, LoopInfo &LI,
1587 AssumptionCache &AC,
1595 Loop &L, BranchInst &BI, ArrayRef Invariants, DominatorTree &DT,
1596 LoopInfo &LI, AssumptionCache &AC,
15881597 function_ref)> UnswitchCB) {
1598 auto *ParentBB = BI.getParent();
1599
1600 // We can only unswitch conditional branches with an invariant condition or
1601 // combining invariant conditions with an instruction.
15891602 assert(BI.isConditional() && "Can only unswitch a conditional branch!");
1590 assert(L.isLoopInvariant(BI.getCondition()) &&
1591 "Can only unswitch an invariant branch condition!");
1592
1593 // Constant and BBs tracking the cloned and continuing successor.
1594 const int ClonedSucc = 0;
1595 auto *ParentBB = BI.getParent();
1603 bool FullUnswitch = BI.getCondition() == Invariants[0];
1604 if (FullUnswitch)
1605 assert(Invariants.size() == 1 &&
1606 "Cannot have other invariants with full unswitching!");
1607 else
1608 assert(isa(BI.getCondition()) &&
1609 "Partial unswitching requires an instruction as the condition!");
1610
1611 // Constant and BBs tracking the cloned and continuing successor. When we are
1612 // unswitching the entire condition, this can just be trivially chosen to
1613 // unswitch towards `true`. However, when we are unswitching a set of
1614 // invariants combined with `and` or `or`, the combining operation determines
1615 // the best direction to unswitch: we want to unswitch the direction that will
1616 // collapse the branch.
1617 bool Direction = true;
1618 int ClonedSucc = 0;
1619 if (!FullUnswitch) {
1620 if (cast(BI.getCondition())->getOpcode() != Instruction::Or) {
1621 assert(cast(BI.getCondition())->getOpcode() == Instruction::And &&
1622 "Only `or` and `and` instructions can combine invariants being unswitched.");
1623 Direction = false;
1624 ClonedSucc = 1;
1625 }
1626 }
15961627 auto *UnswitchedSuccBB = BI.getSuccessor(ClonedSucc);
15971628 auto *ContinueSuccBB = BI.getSuccessor(1 - ClonedSucc);
15981629
16501681 return true;
16511682 });
16521683 }
1653 // Similarly, if the edge we *are* cloning in the unswitch (the unswitched
1654 // edge) dominates its target, we will end up with dead nodes in the original
1655 // loop and its exits that will need to be deleted. Here, we just retain that
1656 // the property holds and will compute the deleted set later.
1684 // If we are doing full unswitching, then similarly to the above, the edge we
1685 // *are* cloning in the unswitch (the unswitched edge) dominates its target,
1686 // we will end up with dead nodes in the original loop and its exits that will
1687 // need to be deleted. Here, we just retain that the property holds and will
1688 // compute the deleted set later.
16571689 bool DeleteUnswitchedSucc =
1658 UnswitchedSuccBB->getUniquePredecessor() ||
1659 llvm::all_of(predecessors(UnswitchedSuccBB), [&](BasicBlock *PredBB) {
1660 return PredBB == ParentBB || DT.dominates(UnswitchedSuccBB, PredBB);
1661 });
1690 FullUnswitch &&
1691 (UnswitchedSuccBB->getUniquePredecessor() ||
1692 llvm::all_of(predecessors(UnswitchedSuccBB), [&](BasicBlock *PredBB) {
1693 return PredBB == ParentBB || DT.dominates(UnswitchedSuccBB, PredBB);
1694 }));
16621695
16631696 // Split the preheader, so that we know that there is a safe place to insert
16641697 // the conditional branch. We will change the preheader to have a conditional
16791712 L, LoopPH, SplitBB, ExitBlocks, ParentBB, UnswitchedSuccBB,
16801713 ContinueSuccBB, SkippedLoopAndExitBlocks, VMap, DTUpdates, AC, DT, LI);
16811714
1682 // Remove the parent as a predecessor of the unswitched successor.
1683 UnswitchedSuccBB->removePredecessor(ParentBB, /*DontDeleteUselessPHIs*/ true);
1684
1685 // Now splice the branch from the original loop and use it to select between
1686 // the two loops.
1715 // The stitching of the branched code back together depends on whether we're
1716 // doing full unswitching or not with the exception that we always want to
1717 // nuke the initial terminator placed in the split block.
16871718 SplitBB->getTerminator()->eraseFromParent();
1688 SplitBB->getInstList().splice(SplitBB->end(), ParentBB->getInstList(), BI);
1689 BI.setSuccessor(ClonedSucc, ClonedPH);
1690 BI.setSuccessor(1 - ClonedSucc, LoopPH);
1691
1692 // Create a new unconditional branch to the continuing block (as opposed to
1693 // the one cloned).
1694 BranchInst::Create(ContinueSuccBB, ParentBB);
1719 if (FullUnswitch) {
1720 // Remove the parent as a predecessor of the
1721 // unswitched successor.
1722 UnswitchedSuccBB->removePredecessor(ParentBB,
1723 /*DontDeleteUselessPHIs*/ true);
1724 DTUpdates.push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB});
1725
1726 // Now splice the branch from the original loop and use it to select between
1727 // the two loops.
1728 SplitBB->getInstList().splice(SplitBB->end(), ParentBB->getInstList(), BI);
1729 BI.setSuccessor(ClonedSucc, ClonedPH);
1730 BI.setSuccessor(1 - ClonedSucc, LoopPH);
1731
1732 // Create a new unconditional branch to the continuing block (as opposed to
1733 // the one cloned).
1734 BranchInst::Create(ContinueSuccBB, ParentBB);
1735 } else {
1736 // When doing a partial unswitch, we have to do a bit more work to build up
1737 // the branch in the split block.
1738 buildPartialUnswitchConditionalBranch(*SplitBB, Invariants, Direction,
1739 *ClonedPH, *LoopPH);
1740 }
16951741
16961742 // Before we update the dominator tree, collect the dead blocks if we're going
16971743 // to end up deleting the unswitched successor.
17161762 }
17171763 }
17181764
1719 // Add the remaining edges to our updates and apply them to get an up-to-date
1765 // Add the remaining edge to our updates and apply them to get an up-to-date
17201766 // dominator tree. Note that this will cause the dead blocks above to be
17211767 // unreachable and no longer in the dominator tree.
1722 DTUpdates.push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB});
17231768 DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
17241769 DT.applyUpdates(DTUpdates);
17251770
17431788 // FIXME: Remove this when the bugs stop showing up and rely on existing
17441789 // verification steps.
17451790 assert(DT.verify(DominatorTree::VerificationLevel::Fast));
1791
1792 // Now we want to replace all the uses of the invariants within both the
1793 // original and cloned blocks. We do this here so that we can use the now
1794 // updated dominator tree to identify which side the users are on.
1795 ConstantInt *UnswitchedReplacement =
1796 Direction ? ConstantInt::getTrue(BI.getContext())
1797 : ConstantInt::getFalse(BI.getContext());
1798 ConstantInt *ContinueReplacement =
1799 Direction ? ConstantInt::getFalse(BI.getContext())
1800 : ConstantInt::getTrue(BI.getContext());
1801 for (Value *Invariant : Invariants)
1802 for (auto UI = Invariant->use_begin(), UE = Invariant->use_end();
1803 UI != UE;) {
1804 // Grab the use and walk past it so we can clobber it in the use list.
1805 Use *U = &*UI++;
1806 Instruction *UserI = dyn_cast(U->getUser());
1807 if (!UserI)
1808 continue;
1809
1810 // Replace it with the 'continue' side if in the main loop body, and the
1811 // unswitched if in the cloned blocks.
1812 if (DT.dominates(LoopPH, UserI->getParent()))
1813 U->set(ContinueReplacement);
1814 else if (DT.dominates(ClonedPH, UserI->getParent()))
1815 U->set(UnswitchedReplacement);
1816 }
17461817
17471818 // We can change which blocks are exit blocks of all the cloned sibling
17481819 // loops, the current loop, and any parent loops which shared exit blocks
18531924 return Cost;
18541925 }
18551926
1856 /// Unswitch control flow predicated on loop invariant conditions.
1857 ///
1858 /// This first hoists all branches or switches which are trivial (IE, do not
1859 /// require duplicating any part of the loop) out of the loop body. It then
1860 /// looks at other loop invariant control flows and tries to unswitch those as
1861 /// well by cloning the loop if the result is small enough.
1862 static bool
1863 unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
1864 TargetTransformInfo &TTI, bool NonTrivial,
1865 function_ref)> UnswitchCB) {
1866 assert(L.isRecursivelyLCSSAForm(DT, LI) &&
1867 "Loops must be in LCSSA form before unswitching.");
1868
1869 // Must be in loop simplified form: we need a preheader and dedicated exits.
1870 if (!L.isLoopSimplifyForm())
1871 return false;
1872
1873 // Try trivial unswitch first before loop over other basic blocks in the loop.
1874 if (unswitchAllTrivialConditions(L, DT, LI)) {
1875 // If we unswitched successfully we will want to clean up the loop before
1876 // processing it further so just mark it as unswitched and return.
1877 UnswitchCB(/*CurrentLoopValid*/ true, {});
1878 return true;
1879 }
1880
1881 // If we're not doing non-trivial unswitching, we're done. We both accept
1882 // a parameter but also check a local flag that can be used for testing
1883 // a debugging.
1884 if (!NonTrivial && !EnableNonTrivialUnswitch)
1885 return false;
1886
1887 // Collect all remaining invariant branch conditions within this loop (as
1888 // opposed to an inner loop which would be handled when visiting that inner
1889 // loop).
1890 SmallVector UnswitchCandidates;
1891 for (auto *BB : L.blocks())
1892 if (LI.getLoopFor(BB) == &L)
1893 if (auto *BI = dyn_cast(BB->getTerminator()))
1894 if (BI->isConditional() && L.isLoopInvariant(BI->getCondition()) &&
1895 BI->getSuccessor(0) != BI->getSuccessor(1))
1896 UnswitchCandidates.push_back(BI);
1927 static bool unswitchBestCondition(
1928 Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
1929 TargetTransformInfo &TTI,
1930 function_ref)> UnswitchCB) {
1931 // Collect all invariant conditions within this loop (as opposed to an inner
1932 // loop which would be handled when visiting that inner loop).
1933 SmallVector>, 4>
1934 UnswitchCandidates;
1935 for (auto *BB : L.blocks()) {
1936 if (LI.getLoopFor(BB) != &L)
1937 continue;
1938
1939 auto *BI = dyn_cast(BB->getTerminator());
1940 // FIXME: Handle switches here!
1941 if (!BI || !BI->isConditional() || isa(BI->getCondition()) ||
1942 BI->getSuccessor(0) == BI->getSuccessor(1))
1943 continue;
1944
1945 if (L.isLoopInvariant(BI->getCondition())) {
1946 UnswitchCandidates.push_back({BI, {BI->getCondition()}});
1947 continue;
1948 }
1949
1950 Instruction &CondI = *cast(BI->getCondition());
1951 if (CondI.getOpcode() != Instruction::And &&
1952 CondI.getOpcode() != Instruction::Or)
1953 continue;
1954
1955 TinyPtrVector Invariants =
1956 collectHomogenousInstGraphLoopInvariants(L, CondI, LI);
1957 if (Invariants.empty())
1958 continue;
1959
1960 UnswitchCandidates.push_back({BI, std::move(Invariants)});
1961 }
18971962
18981963 // If we didn't find any candidates, we're done.
18991964 if (UnswitchCandidates.empty())
19672032 SmallDenseMap DTCostMap;
19682033 // Given a terminator which might be unswitched, computes the non-duplicated
19692034 // cost for that terminator.
1970 auto ComputeUnswitchedCost = [&](TerminatorInst *TI) {
1971 BasicBlock &BB = *TI->getParent();
2035 auto ComputeUnswitchedCost = [&](TerminatorInst &TI, bool FullUnswitch) {
2036 BasicBlock &BB = *TI.getParent();
19722037 SmallPtrSet Visited;
19732038
19742039 int Cost = LoopCost;
19762041 // Don't count successors more than once.
19772042 if (!Visited.insert(SuccBB).second)
19782043 continue;
2044
2045 // If this is a partial unswitch candidate, then it must be a conditional
2046 // branch with a condition of either `or` or `and`. In that case, one of
2047 // the successors is necessarily duplicated, so don't even try to remove
2048 // its cost.
2049 if (!FullUnswitch) {
2050 auto &BI = cast(TI);
2051 if (cast(BI.getCondition())->getOpcode() ==
2052 Instruction::And) {
2053 if (SuccBB == BI.getSuccessor(1))
2054 continue;
2055 } else {
2056 assert(cast(BI.getCondition())->getOpcode() ==
2057 Instruction::Or &&
2058 "Only `and` and `or` conditions can result in a partial "
2059 "unswitch!");
2060 if (SuccBB == BI.getSuccessor(0))
2061 continue;
2062 }
2063 }
19792064
19802065 // This successor's domtree will not need to be duplicated after
19812066 // unswitching if the edge to the successor dominates it (and thus the
20002085 };
20012086 TerminatorInst *BestUnswitchTI = nullptr;
20022087 int BestUnswitchCost;
2003 for (TerminatorInst *CandidateTI : UnswitchCandidates) {
2004 int CandidateCost = ComputeUnswitchedCost(CandidateTI);
2088 ArrayRef BestUnswitchInvariants;
2089 for (auto &TerminatorAndInvariants : UnswitchCandidates) {
2090 TerminatorInst &TI = *TerminatorAndInvariants.first;
2091 ArrayRef Invariants = TerminatorAndInvariants.second;
2092 BranchInst *BI = dyn_cast(&TI);
2093 int CandidateCost =
2094 ComputeUnswitchedCost(TI, /*FullUnswitch*/ Invariants.size() == 1 && BI &&
2095 Invariants[0] == BI->getCondition());
20052096 LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost
2006 << " for unswitch candidate: " << *CandidateTI << "\n");
2097 << " for unswitch candidate: " << TI << "\n");
20072098 if (!BestUnswitchTI || CandidateCost < BestUnswitchCost) {
2008 BestUnswitchTI = CandidateTI;
2099 BestUnswitchTI = &TI;
20092100 BestUnswitchCost = CandidateCost;
2101 BestUnswitchInvariants = Invariants;
20102102 }
20112103 }
20122104
20162108 return false;
20172109 }
20182110
2111 auto *UnswitchBI = dyn_cast(BestUnswitchTI);
2112 if (!UnswitchBI) {
2113 // FIXME: Add support for unswitching a switch here!
2114 LLVM_DEBUG(dbgs() << "Cannot unswitch anything but a branch!\n");
2115 return false;
2116 }
2117
20192118 LLVM_DEBUG(dbgs() << " Trying to unswitch non-trivial (cost = "
2020 << BestUnswitchCost << ") branch: " << *BestUnswitchTI
2021 << "\n");
2022 return unswitchInvariantBranch(L, cast(*BestUnswitchTI), DT, LI,
2119 << BestUnswitchCost << ") branch: " << *UnswitchBI << "\n");
2120 return unswitchInvariantBranch(L, *UnswitchBI, BestUnswitchInvariants, DT, LI,
20232121 AC, UnswitchCB);
2122 }
2123
2124 /// Unswitch control flow predicated on loop invariant conditions.
2125 ///
2126 /// This first hoists all branches or switches which are trivial (IE, do not
2127 /// require duplicating any part of the loop) out of the loop body. It then
2128 /// looks at other loop invariant control flows and tries to unswitch those as
2129 /// well by cloning the loop if the result is small enough.
2130 static bool
2131 unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
2132 TargetTransformInfo &TTI, bool NonTrivial,
2133 function_ref)> UnswitchCB) {
2134 assert(L.isRecursivelyLCSSAForm(DT, LI) &&
2135 "Loops must be in LCSSA form before unswitching.");
2136 bool Changed = false;
2137
2138 // Must be in loop simplified form: we need a preheader and dedicated exits.
2139 if (!L.isLoopSimplifyForm())
2140 return false;
2141
2142 // Try trivial unswitch first before loop over other basic blocks in the loop.
2143 if (unswitchAllTrivialConditions(L, DT, LI)) {
2144 // If we unswitched successfully we will want to clean up the loop before
2145 // processing it further so just mark it as unswitched and return.
2146 UnswitchCB(/*CurrentLoopValid*/ true, {});
2147 return true;
2148 }
2149
2150 // If we're not doing non-trivial unswitching, we're done. We both accept
2151 // a parameter but also check a local flag that can be used for testing
2152 // a debugging.
2153 if (!NonTrivial && !EnableNonTrivialUnswitch)
2154 return false;
2155
2156 // For non-trivial unswitching, because it often creates new loops, we rely on
2157 // the pass manager to iterate on the loops rather than trying to immediately
2158 // reach a fixed point. There is no substantial advantage to iterating
2159 // internally, and if any of the new loops are simplified enough to contain
2160 // trivial unswitching we want to prefer those.
2161
2162 // Try to unswitch the best invariant condition. We prefer this full unswitch to
2163 // a partial unswitch when possible below the threshold.
2164 if (unswitchBestCondition(L, DT, LI, AC, TTI, UnswitchCB))
2165 return true;
2166
2167 // No other opportunities to unswitch.
2168 return Changed;
20242169 }
20252170
20262171 PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
26612661 ret i32 0
26622662 ; CHECK: loop_exit:
26632663 ; CHECK-NEXT: ret
2664 }
2664 }
2665
2666 ; Non-trivial partial loop unswitching of an invariant input to an 'or'.
2667 define i32 @test25(i1* %ptr, i1 %cond) {
2668 ; CHECK-LABEL: @test25(
2669 entry:
2670 br label %loop_begin
2671 ; CHECK-NEXT: entry:
2672 ; CHECK-NEXT: br i1 %cond, label %entry.split.us, label %entry.split
2673
2674 loop_begin:
2675 %v1 = load i1, i1* %ptr
2676 %cond_or = or i1 %v1, %cond
2677 br i1 %cond_or, label %loop_a, label %loop_b
2678
2679 loop_a:
2680 call void @a()
2681 br label %latch
2682 ; The 'loop_a' unswitched loop.
2683 ;
2684 ; CHECK: entry.split.us:
2685 ; CHECK-NEXT: br label %loop_begin.us
2686 ;
2687 ; CHECK: loop_begin.us:
2688 ; CHECK-NEXT: %[[V1_US:.*]] = load i1, i1* %ptr
2689 ; CHECK-NEXT: %[[OR_US:.*]] = or i1 %[[V1_US]], true
2690 ; CHECK-NEXT: br label %loop_a.us
2691 ;
2692 ; CHECK: loop_a.us:
2693 ; CHECK-NEXT: call void @a()
2694 ; CHECK-NEXT: br label %latch.us
2695 ;
2696 ; CHECK: latch.us:
2697 ; CHECK-NEXT: %[[V2_US:.*]] = load i1, i1* %ptr
2698 ; CHECK-NEXT: br i1 %[[V2_US]], label %loop_begin.us, label %loop_exit.split.us
2699 ;
2700 ; CHECK: loop_exit.split.us:
2701 ; CHECK-NEXT: br label %loop_exit
2702
2703 loop_b:
2704 call void @b()
2705 br label %latch
2706 ; The original loop.
2707 ;
2708 ; CHECK: entry.split:
2709 ; CHECK-NEXT: br label %loop_begin
2710 ;
2711 ; CHECK: loop_begin:
2712 ; CHECK-NEXT: %[[V1:.*]] = load i1, i1* %ptr
2713 ; CHECK-NEXT: %[[OR:.*]] = or i1 %[[V1]], false
2714 ; CHECK-NEXT: br i1 %[[OR]], label %loop_a, label %loop_b
2715 ;
2716 ; CHECK: loop_a:
2717 ; CHECK-NEXT: call void @a()
2718 ; CHECK-NEXT: br label %latch
2719 ;
2720 ; CHECK: loop_b:
2721 ; CHECK-NEXT: call void @b()
2722 ; CHECK-NEXT: br label %latch
2723
2724 latch:
2725 %v2 = load i1, i1* %ptr
2726 br i1 %v2, label %loop_begin, label %loop_exit
2727 ; CHECK: latch:
2728 ; CHECK-NEXT: %[[V2:.*]] = load i1, i1* %ptr
2729 ; CHECK-NEXT: br i1 %[[V2]], label %loop_begin, label %loop_exit.split
2730
2731 loop_exit:
2732 ret i32 0
2733 ; CHECK: loop_exit.split:
2734 ; CHECK-NEXT: br label %loop_exit
2735 ;
2736 ; CHECK: loop_exit:
2737 ; CHECK-NEXT: ret
2738 }
2739
2740 ; Non-trivial partial loop unswitching of multiple invariant inputs to an `and`
2741 ; chain.
2742 define i32 @test26(i1* %ptr1, i1* %ptr2, i1* %ptr3, i1 %cond1, i1 %cond2, i1 %cond3) {
2743 ; CHECK-LABEL: @test26(
2744 entry:
2745 br label %loop_begin
2746 ; CHECK-NEXT: entry:
2747 ; CHECK-NEXT: %[[INV_AND:.*]] = and i1 %cond3, %cond1
2748 ; CHECK-NEXT: br i1 %[[INV_AND]], label %entry.split, label %entry.split.us
2749
2750 loop_begin:
2751 %v1 = load i1, i1* %ptr1
2752 %v2 = load i1, i1* %ptr2
2753 %cond_and1 = and i1 %v1, %cond1
2754 %cond_or1 = or i1 %v2, %cond2
2755 %cond_and2 = and i1 %cond_and1, %cond_or1
2756 %cond_and3 = and i1 %cond_and2, %cond3
2757 br i1 %cond_and3, label %loop_a, label %loop_b
2758 ; The 'loop_b' unswitched loop.
2759 ;
2760 ; CHECK: entry.split.us:
2761 ; CHECK-NEXT: br label %loop_begin.us
2762 ;
2763 ; CHECK: loop_begin.us:
2764 ; CHECK-NEXT: %[[V1_US:.*]] = load i1, i1* %ptr1
2765 ; CHECK-NEXT: %[[V2_US:.*]] = load i1, i1* %ptr2
2766 ; CHECK-NEXT: %[[AND1_US:.*]] = and i1 %[[V1_US]], false
2767 ; CHECK-NEXT: %[[OR1_US:.*]] = or i1 %[[V2_US]], %cond2
2768 ; CHECK-NEXT: %[[AND2_US:.*]] = and i1 %[[AND1_US]], %[[OR1_US]]
2769 ; CHECK-NEXT: %[[AND3_US:.*]] = and i1 %[[AND2_US]], false
2770 ; CHECK-NEXT: br label %loop_b.us
2771 ;
2772 ; CHECK: loop_b.us:
2773 ; CHECK-NEXT: call void @b()
2774 ; CHECK-NEXT: br label %latch.us
2775 ;
2776 ; CHECK: latch.us:
2777 ; CHECK-NEXT: %[[V3_US:.*]] = load i1, i1* %ptr3
2778 ; CHECK-NEXT: br i1 %[[V3_US]], label %loop_begin.us, label %loop_exit.split.us
2779 ;
2780 ; CHECK: loop_exit.split.us:
2781 ; CHECK-NEXT: br label %loop_exit
2782
2783 ; The original loop.
2784 ;
2785 ; CHECK: entry.split:
2786 ; CHECK-NEXT: br label %loop_begin
2787 ;
2788 ; CHECK: loop_begin:
2789 ; CHECK-NEXT: %[[V1:.*]] = load i1, i1* %ptr1
2790 ; CHECK-NEXT: %[[V2:.*]] = load i1, i1* %ptr2
2791 ; CHECK-NEXT: %[[AND1:.*]] = and i1 %[[V1]], true
2792 ; CHECK-NEXT: %[[OR1:.*]] = or i1 %[[V2]], %cond2
2793 ; CHECK-NEXT: %[[AND2:.*]] = and i1 %[[AND1]], %[[OR1]]
2794 ; CHECK-NEXT: %[[AND3:.*]] = and i1 %[[AND2]], true
2795 ; CHECK-NEXT: br i1 %[[AND3]], label %loop_a, label %loop_b
2796
2797 loop_a:
2798 call void @a()
2799 br label %latch
2800 ; CHECK: loop_a:
2801 ; CHECK-NEXT: call void @a()
2802 ; CHECK-NEXT: br label %latch
2803
2804 loop_b:
2805 call void @b()
2806 br label %latch
2807 ; CHECK: loop_b:
2808 ; CHECK-NEXT: call void @b()
2809 ; CHECK-NEXT: br label %latch
2810
2811 latch:
2812 %v3 = load i1, i1* %ptr3
2813 br i1 %v3, label %loop_begin, label %loop_exit
2814 ; CHECK: latch:
2815 ; CHECK-NEXT: %[[V3:.*]] = load i1, i1* %ptr3
2816 ; CHECK-NEXT: br i1 %[[V3]], label %loop_begin, label %loop_exit.split
2817
2818 loop_exit:
2819 ret i32 0
2820 ; CHECK: loop_exit.split:
2821 ; CHECK-NEXT: br label %loop_exit
2822 ;
2823 ; CHECK: loop_exit:
2824 ; CHECK-NEXT: ret
2825 }