llvm.org GIT mirror llvm / c96b7cb
[PM/LoopUnswitch] When using the new SimpleLoopUnswitch pass, schedule loop-cleanup passes at the beginning of the loop pass pipeline, and re-enqueue loops after even trivial unswitching. This will allow us to much more consistently avoid simplifying code while doing trivial unswitching. I've also added a test case that specifically shows effective iteration using this technique. I've unconditionally updated the new PM as that is always using the SimpleLoopUnswitch pass, and I've made the pipeline changes for the old PM conditional on using this new unswitch pass. I added a bunch of comments to the loop pass pipeline in the old PM to make it more clear what is going on when reviewing. Hopefully this will unblock doing *partial* unswitching instead of just full unswitching. Differential Revision: https://reviews.llvm.org/D47408 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@333493 91177308-0d34-0410-b5e6-96231b3b80d8 Chandler Carruth 1 year, 3 months ago
6 changed file(s) with 108 addition(s) and 36 deletion(s). Raw diff Collapse all Expand all
389389
390390 // Add the primary loop simplification pipeline.
391391 // FIXME: Currently this is split into two loop pass pipelines because we run
392 // some function passes in between them. These can and should be replaced by
393 // loop pass equivalenst but those aren't ready yet. Specifically,
394 // `SimplifyCFGPass` and `InstCombinePass` are used. We have
395 // `LoopSimplifyCFGPass` which isn't yet powerful enough, and the closest to
396 // the other we have is `LoopInstSimplify`.
392 // some function passes in between them. These can and should be removed
393 // and/or replaced by scheduling the loop pass equivalents in the correct
394 // positions. But those equivalent passes aren't powerful enough yet.
395 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
396 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
397 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
398 // `LoopInstSimplify`.
397399 LoopPassManager LPM1(DebugLogging), LPM2(DebugLogging);
400
401 // Simplify the loop body. We do this initially to clean up after other loop
402 // passes run, either when iterating on a loop or on inner loops with
403 // implications on the outer loop.
404 LPM1.addPass(LoopInstSimplifyPass());
405 LPM1.addPass(LoopSimplifyCFGPass());
398406
399407 // Rotate Loop - disable header duplication at -Oz
400408 LPM1.addPass(LoopRotatePass(Level != Oz));
137137 cl::Hidden,
138138 cl::desc("Disable shrink-wrap library calls"));
139139
140 static cl::opt
141 EnableSimpleLoopUnswitch("enable-simple-loop-unswitch", cl::init(false),
142 cl::Hidden,
143 cl::desc("Enable the simple loop unswitch pass."));
140 static cl::opt EnableSimpleLoopUnswitch(
141 "enable-simple-loop-unswitch", cl::init(false), cl::Hidden,
142 cl::desc("Enable the simple loop unswitch pass. Also enables independent "
143 "cleanup passes integrated into the loop pass manager pipeline."));
144144
145145 static cl::opt EnableGVNSink(
146146 "enable-gvn-sink", cl::init(false), cl::Hidden,
334334 MPM.add(createTailCallEliminationPass()); // Eliminate tail calls
335335 MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
336336 MPM.add(createReassociatePass()); // Reassociate expressions
337
338 // Begin the loop pass pipeline.
339 if (EnableSimpleLoopUnswitch) {
340 // The simple loop unswitch pass relies on separate cleanup passes. Schedule
341 // them first so when we re-process a loop they run before other loop
342 // passes.
343 MPM.add(createLoopInstSimplifyPass());
344 MPM.add(createLoopSimplifyCFGPass());
345 }
337346 // Rotate Loop - disable header duplication at -Oz
338347 MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
339348 MPM.add(createLICMPass()); // Hoist loop invariants
341350 MPM.add(createSimpleLoopUnswitchLegacyPass());
342351 else
343352 MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
353 // FIXME: We break the loop pass pipeline here in order to do full
354 // simplify-cfg. Eventually loop-simplifycfg should be enhanced to replace the
355 // need for this.
344356 MPM.add(createCFGSimplificationPass());
345357 addInstructionCombiningPass(MPM);
358 // We resume loop passes creating a second loop pipeline here.
346359 MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars
347360 MPM.add(createLoopIdiomPass()); // Recognize idioms like memset.
348361 addExtensionsToPM(EP_LateLoopOptimizations, MPM);
349362 MPM.add(createLoopDeletionPass()); // Delete dead loops
350363
351364 if (EnableLoopInterchange) {
365 // FIXME: These are function passes and break the loop pass pipeline.
352366 MPM.add(createLoopInterchangePass()); // Interchange loops
353367 MPM.add(createCFGSimplificationPass());
354368 }
355369 if (!DisableUnrollLoops)
356370 MPM.add(createSimpleLoopUnrollPass(OptLevel)); // Unroll small loops
357371 addExtensionsToPM(EP_LoopOptimizerEnd, MPM);
372 // This ends the loop pass pipelines.
358373
359374 if (OptLevel > 1) {
360375 MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds
14651465 static bool unswitchInvariantBranch(
14661466 Loop &L, BranchInst &BI, DominatorTree &DT, LoopInfo &LI,
14671467 AssumptionCache &AC,
1468 function_ref)> NonTrivialUnswitchCB) {
1468 function_ref)> UnswitchCB) {
14691469 assert(BI.isConditional() && "Can only unswitch a conditional branch!");
14701470 assert(L.isLoopInvariant(BI.getCondition()) &&
14711471 "Can only unswitch an invariant branch condition!");
17051705 for (Loop *UpdatedL : llvm::concat(NonChildClonedLoops, HoistedLoops))
17061706 if (UpdatedL->getParentLoop() == ParentL)
17071707 SibLoops.push_back(UpdatedL);
1708 NonTrivialUnswitchCB(IsStillLoop, SibLoops);
1708 UnswitchCB(IsStillLoop, SibLoops);
17091709
17101710 ++NumBranches;
17111711 return true;
17531753 static bool
17541754 unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
17551755 TargetTransformInfo &TTI, bool NonTrivial,
1756 function_ref)> NonTrivialUnswitchCB) {
1756 function_ref)> UnswitchCB) {
17571757 assert(L.isRecursivelyLCSSAForm(DT, LI) &&
17581758 "Loops must be in LCSSA form before unswitching.");
1759 bool Changed = false;
17601759
17611760 // Must be in loop simplified form: we need a preheader and dedicated exits.
17621761 if (!L.isLoopSimplifyForm())
17631762 return false;
17641763
17651764 // Try trivial unswitch first before loop over other basic blocks in the loop.
1766 Changed |= unswitchAllTrivialConditions(L, DT, LI);
1765 if (unswitchAllTrivialConditions(L, DT, LI)) {
1766 // If we unswitched successfully we will want to clean up the loop before
1767 // processing it further so just mark it as unswitched and return.
1768 UnswitchCB(/*CurrentLoopValid*/ true, {});
1769 return true;
1770 }
17671771
17681772 // If we're not doing non-trivial unswitching, we're done. We both accept
17691773 // a parameter but also check a local flag that can be used for testing
17701774 // a debugging.
17711775 if (!NonTrivial && !EnableNonTrivialUnswitch)
1772 return Changed;
1776 return false;
17731777
17741778 // Collect all remaining invariant branch conditions within this loop (as
17751779 // opposed to an inner loop which would be handled when visiting that inner
17841788
17851789 // If we didn't find any candidates, we're done.
17861790 if (UnswitchCandidates.empty())
1787 return Changed;
1791 return false;
17881792
17891793 // Check if there are irreducible CFG cycles in this loop. If so, we cannot
17901794 // easily unswitch non-trivial edges out of the loop. Doing so might turn the
17951799 LoopBlocksRPO RPOT(&L);
17961800 RPOT.perform(&LI);
17971801 if (containsIrreducibleCFG(RPOT, LI))
1798 return Changed;
1802 return false;
17991803
18001804 LLVM_DEBUG(
18011805 dbgs() << "Considering " << UnswitchCandidates.size()
18231827 continue;
18241828
18251829 if (I.getType()->isTokenTy() && I.isUsedOutsideOfBlock(BB))
1826 return Changed;
1830 return false;
18271831 if (auto CS = CallSite(&I))
18281832 if (CS.isConvergent() || CS.cannotDuplicate())
1829 return Changed;
1833 return false;
18301834
18311835 Cost += TTI.getUserCost(&I);
18321836 }
18971901 }
18981902 }
18991903
1900 if (BestUnswitchCost < UnswitchThreshold) {
1901 LLVM_DEBUG(dbgs() << " Trying to unswitch non-trivial (cost = "
1902 << BestUnswitchCost << ") branch: " << *BestUnswitchTI
1903 << "\n");
1904 Changed |= unswitchInvariantBranch(L, cast(*BestUnswitchTI), DT,
1905 LI, AC, NonTrivialUnswitchCB);
1906 } else {
1904 if (BestUnswitchCost >= UnswitchThreshold) {
19071905 LLVM_DEBUG(dbgs() << "Cannot unswitch, lowest cost found: "
19081906 << BestUnswitchCost << "\n");
1909 }
1910
1911 return Changed;
1907 return false;
1908 }
1909
1910 LLVM_DEBUG(dbgs() << " Trying to unswitch non-trivial (cost = "
1911 << BestUnswitchCost << ") branch: " << *BestUnswitchTI
1912 << "\n");
1913 return unswitchInvariantBranch(L, cast(*BestUnswitchTI), DT, LI,
1914 AC, UnswitchCB);
19121915 }
19131916
19141917 PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
19241927 // after it has been deleted.
19251928 std::string LoopName = L.getName();
19261929
1927 auto NonTrivialUnswitchCB = [&L, &U, &LoopName](bool CurrentLoopValid,
1928 ArrayRef NewLoops) {
1930 auto UnswitchCB = [&L, &U, &LoopName](bool CurrentLoopValid,
1931 ArrayRef NewLoops) {
19291932 // If we did a non-trivial unswitch, we have added new (cloned) loops.
1930 U.addSiblingLoops(NewLoops);
1933 if (!NewLoops.empty())
1934 U.addSiblingLoops(NewLoops);
19311935
19321936 // If the current loop remains valid, we should revisit it to catch any
19331937 // other unswitch opportunities. Otherwise, we need to mark it as deleted.
19381942 };
19391943
19401944 if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.TTI, NonTrivial,
1941 NonTrivialUnswitchCB))
1945 UnswitchCB))
19421946 return PreservedAnalyses::all();
19431947
19441948 // Historically this pass has had issues with the dominator tree so verify it
19861990 auto &AC = getAnalysis().getAssumptionCache(F);
19871991 auto &TTI = getAnalysis().getTTI(F);
19881992
1989 auto NonTrivialUnswitchCB = [&L, &LPM](bool CurrentLoopValid,
1990 ArrayRef NewLoops) {
1993 auto UnswitchCB = [&L, &LPM](bool CurrentLoopValid,
1994 ArrayRef NewLoops) {
19911995 // If we did a non-trivial unswitch, we have added new (cloned) loops.
19921996 for (auto *NewL : NewLoops)
19931997 LPM.addLoop(*NewL);
20022006 };
20032007
20042008 bool Changed =
2005 unswitchLoop(*L, DT, LI, AC, TTI, NonTrivial, NonTrivialUnswitchCB);
2009 unswitchLoop(*L, DT, LI, AC, TTI, NonTrivial, UnswitchCB);
20062010
20072011 // If anything was unswitched, also clear any cached information about this
20082012 // loop.
144144 ; CHECK-O-NEXT: Running analysis: ScalarEvolutionAnalysis
145145 ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
146146 ; CHECK-O-NEXT: Starting Loop pass manager run.
147 ; CHECK-O-NEXT: Running pass: LoopInstSimplifyPass
148 ; CHECK-O-NEXT: Running pass: LoopSimplifyCFGPass
147149 ; CHECK-O-NEXT: Running pass: LoopRotatePass
148150 ; CHECK-O-NEXT: Running pass: LICM
149151 ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
128128 ; CHECK-O-NEXT: Running analysis: ScalarEvolutionAnalysis
129129 ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
130130 ; CHECK-O-NEXT: Starting Loop pass manager run.
131 ; CHECK-O-NEXT: Running pass: LoopInstSimplifyPass
132 ; CHECK-O-NEXT: Running pass: LoopSimplifyCFGPass
131133 ; CHECK-O-NEXT: Running pass: LoopRotatePass
132134 ; CHECK-O-NEXT: Running pass: LICM
133135 ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
0 ; RUN: opt -passes='loop(loop-instsimplify,simplify-cfg,unswitch),verify' -S < %s | FileCheck %s
1
2 declare void @some_func() noreturn
3
4 define i32 @test1(i32* %var, i1 %cond1, i1 %cond2) {
5 ; CHECK-LABEL: @test1(
6 entry:
7 br label %loop_begin
8 ; CHECK-NEXT: entry:
9 ; CHECK-NEXT: br i1 %{{.*}}, label %entry.split, label %loop_exit.split
10 ;
11 ; CHECK: entry.split:
12 ; CHECK-NEXT: br i1 %{{.*}}, label %entry.split.split, label %loop_exit
13 ;
14 ; CHECK: entry.split.split:
15 ; CHECK-NEXT: br label %do_something
16
17 loop_begin:
18 br i1 %cond1, label %continue, label %loop_exit ; first trivial condition
19
20 continue:
21 %var_val = load i32, i32* %var
22 %var_cond = trunc i32 %var_val to i1
23 %maybe_cond = select i1 %cond1, i1 %cond2, i1 %var_cond
24 br i1 %maybe_cond, label %do_something, label %loop_exit ; second trivial condition
25
26 do_something:
27 call void @some_func() noreturn nounwind
28 br label %loop_begin
29 ; CHECK: do_something:
30 ; CHECK-NEXT: call
31 ; CHECK-NEXT: br label %do_something
32
33 loop_exit:
34 ret i32 0
35 ; CHECK: loop_exit:
36 ; CHECK-NEXT: br label %loop_exit.split
37 ;
38 ; CHECK: loop_exit.split:
39 ; CHECK-NEXT: ret
40 }