llvm.org GIT mirror llvm / ba89ffc
[PM/LoopUnswitch] Fix PR37651 by correctly invalidating SCEV when unswitching loops. Original patch trying to address this was sent in D47624, but that didn't quite handle things correctly. There are two key principles used to select whether and how to invalidate SCEV-cached information about loops: 1) We must invalidate any info SCEV has cached before unswitching as we may change (or destroy) the loop structure by the act of unswitching, and make it hard to recover everything we want to invalidate within SCEV. 2) We need to invalidate all of the loops whose CFGs are mutated by the unswitching. Notably, this isn't the *entire* loop nest, this is every loop contained by the outermost loop reached by an exit block relevant to the unswitch. And we need to do this even when doing trivial unswitching. I've added more focused tests that directly check that SCEV starts off with imprecise information and after unswitching (and simplifying instructions) re-querying SCEV will produce precise information. These tests also specifically work to check that an *outer* loop's information becomes precise. However, the testing here is still a bit imperfect. Crafting test cases that reliably fail to be analyzed by SCEV before unswitching and succeed afterward proved ... very, very hard. It took me several hours and careful work to build these, and I'm not optimistic about necessarily coming up with more to cover more elaborate possibilities. Fortunately, the code pattern we are testing here in the pass is really straightforward and reliable. Thanks to Max Kazantsev for the initial work on this as well as the review, and to Hal Finkel for helping me talk through approaches to test this stuff even if it didn't come to much. Differential Revision: https://reviews.llvm.org/D47624 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@336183 91177308-0d34-0410-b5e6-96231b3b80d8 Chandler Carruth 1 year, 2 months ago
2 changed file(s) with 272 addition(s) and 21 deletion(s). Raw diff Collapse all Expand all
252252 /// (splitting the exit block as necessary). It simplifies the branch within
253253 /// the loop to an unconditional branch but doesn't remove it entirely. Further
254254 /// cleanup can be done with some simplify-cfg like pass.
255 ///
256 /// If `SE` is not null, it will be updated based on the potential loop SCEVs
257 /// invalidated by this.
255258 static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
256 LoopInfo &LI) {
259 LoopInfo &LI, ScalarEvolution *SE) {
257260 assert(BI.isConditional() && "Can only unswitch a conditional branch!");
258261 LLVM_DEBUG(dbgs() << " Trying to unswitch branch: " << BI << "\n");
259262
316319 dbgs() << "\n";
317320 }
318321 });
322
323 // If we have scalar evolutions, we need to invalidate them including this
324 // loop and the loop containing the exit block.
325 if (SE) {
326 if (Loop *ExitL = LI.getLoopFor(LoopExitBB))
327 SE->forgetLoop(ExitL);
328 else
329 // Forget the entire nest as this exits the entire nest.
330 SE->forgetTopmostLoop(&L);
331 }
319332
320333 // Split the preheader, so that we know that there is a safe place to insert
321334 // the conditional branch. We will change the preheader to have a conditional
419432 /// switch will not be revisited. If after unswitching there is only a single
420433 /// in-loop successor, the switch is further simplified to an unconditional
421434 /// branch. Still more cleanup can be done with some simplify-cfg like pass.
435 ///
436 /// If `SE` is not null, it will be updated based on the potential loop SCEVs
437 /// invalidated by this.
422438 static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
423 LoopInfo &LI) {
439 LoopInfo &LI, ScalarEvolution *SE) {
424440 LLVM_DEBUG(dbgs() << " Trying to unswitch switch: " << SI << "\n");
425441 Value *LoopCond = SI.getCondition();
426442
447463
448464 LLVM_DEBUG(dbgs() << " unswitching trivial cases...\n");
449465
466 // We may need to invalidate SCEVs for the outermost loop reached by any of
467 // the exits.
468 Loop *OuterL = &L;
469
450470 SmallVector, 4> ExitCases;
451471 ExitCases.reserve(ExitCaseIndices.size());
452472 // We walk the case indices backwards so that we remove the last case first
453473 // and don't disrupt the earlier indices.
454474 for (unsigned Index : reverse(ExitCaseIndices)) {
455475 auto CaseI = SI.case_begin() + Index;
476 // Compute the outer loop from this exit.
477 Loop *ExitL = LI.getLoopFor(CaseI->getCaseSuccessor());
478 if (!ExitL || ExitL->contains(OuterL))
479 OuterL = ExitL;
456480 // Save the value of this case.
457481 ExitCases.push_back({CaseI->getCaseValue(), CaseI->getCaseSuccessor()});
458482 // Delete the unswitched cases.
459483 SI.removeCase(CaseI);
484 }
485
486 if (SE) {
487 if (OuterL)
488 SE->forgetLoop(OuterL);
489 else
490 SE->forgetTopmostLoop(&L);
460491 }
461492
462493 // Check if after this all of the remaining cases point at the same
616647 ///
617648 /// The return value indicates whether anything was unswitched (and therefore
618649 /// changed).
650 ///
651 /// If `SE` is not null, it will be updated based on the potential loop SCEVs
652 /// invalidated by this.
619653 static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT,
620 LoopInfo &LI) {
654 LoopInfo &LI, ScalarEvolution *SE) {
621655 bool Changed = false;
622656
623657 // If loop header has only one reachable successor we should keep looking for
651685 if (isa(SI->getCondition()))
652686 return Changed;
653687
654 if (!unswitchTrivialSwitch(L, *SI, DT, LI))
688 if (!unswitchTrivialSwitch(L, *SI, DT, LI, SE))
655689 // Couldn't unswitch this one so we're done.
656690 return Changed;
657691
683717
684718 // Found a trivial condition candidate: non-foldable conditional branch. If
685719 // we fail to unswitch this, we can't do anything else that is trivial.
686 if (!unswitchTrivialBranch(L, *BI, DT, LI))
720 if (!unswitchTrivialBranch(L, *BI, DT, LI, SE))
687721 return Changed;
688722
689723 // Mark that we managed to unswitch something.
16211655 static bool unswitchNontrivialInvariants(
16221656 Loop &L, TerminatorInst &TI, ArrayRef Invariants,
16231657 DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
1624 function_ref)> UnswitchCB) {
1658 function_ref)> UnswitchCB,
1659 ScalarEvolution *SE) {
16251660 auto *ParentBB = TI.getParent();
16261661 BranchInst *BI = dyn_cast(&TI);
16271662 SwitchInst *SI = BI ? nullptr : cast(&TI);
17021737 }
17031738 if (NewOuterExitL != OuterExitL && NewOuterExitL->contains(OuterExitL))
17041739 OuterExitL = NewOuterExitL;
1740 }
1741
1742 // At this point, we're definitely going to unswitch something so invalidate
1743 // any cached information in ScalarEvolution for the outer most loop
1744 // containing an exit block and all nested loops.
1745 if (SE) {
1746 if (OuterExitL)
1747 SE->forgetLoop(OuterExitL);
1748 else
1749 SE->forgetTopmostLoop(&L);
17051750 }
17061751
17071752 // If the edge from this terminator to a successor dominates that successor,
19672012 return Cost;
19682013 }
19692014
1970 static bool unswitchBestCondition(
1971 Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
1972 TargetTransformInfo &TTI,
1973 function_ref)> UnswitchCB) {
2015 static bool
2016 unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI,
2017 AssumptionCache &AC, TargetTransformInfo &TTI,
2018 function_ref)> UnswitchCB,
2019 ScalarEvolution *SE) {
19742020 // Collect all invariant conditions within this loop (as opposed to an inner
19752021 // loop which would be handled when visiting that inner loop).
19762022 SmallVector>, 4>
21632209 << BestUnswitchCost << ") terminator: " << *BestUnswitchTI
21642210 << "\n");
21652211 return unswitchNontrivialInvariants(
2166 L, *BestUnswitchTI, BestUnswitchInvariants, DT, LI, AC, UnswitchCB);
2212 L, *BestUnswitchTI, BestUnswitchInvariants, DT, LI, AC, UnswitchCB, SE);
21672213 }
21682214
21692215 /// Unswitch control flow predicated on loop invariant conditions.
21722218 /// require duplicating any part of the loop) out of the loop body. It then
21732219 /// looks at other loop invariant control flows and tries to unswitch those as
21742220 /// well by cloning the loop if the result is small enough.
2175 static bool
2176 unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
2177 TargetTransformInfo &TTI, bool NonTrivial,
2178 function_ref)> UnswitchCB) {
2221 ///
2222 /// The `DT`, `LI`, `AC`, `TTI` parameters are required analyses that are also
2223 /// updated based on the unswitch.
2224 ///
2225 /// If either `NonTrivial` is true or the flag `EnableNonTrivialUnswitch` is
2226 /// true, we will attempt to do non-trivial unswitching as well as trivial
2227 /// unswitching.
2228 ///
2229 /// The `UnswitchCB` callback provided will be run after unswitching is
2230 /// complete, with the first parameter set to `true` if the provided loop
2231 /// remains a loop, and a list of new sibling loops created.
2232 ///
2233 /// If `SE` is non-null, we will update that analysis based on the unswitching
2234 /// done.
2235 static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI,
2236 AssumptionCache &AC, TargetTransformInfo &TTI,
2237 bool NonTrivial,
2238 function_ref)> UnswitchCB,
2239 ScalarEvolution *SE) {
21792240 assert(L.isRecursivelyLCSSAForm(DT, LI) &&
21802241 "Loops must be in LCSSA form before unswitching.");
21812242 bool Changed = false;
21852246 return false;
21862247
21872248 // Try trivial unswitch first before loop over other basic blocks in the loop.
2188 if (unswitchAllTrivialConditions(L, DT, LI)) {
2249 if (unswitchAllTrivialConditions(L, DT, LI, SE)) {
21892250 // If we unswitched successfully we will want to clean up the loop before
21902251 // processing it further so just mark it as unswitched and return.
21912252 UnswitchCB(/*CurrentLoopValid*/ true, {});
22062267
22072268 // Try to unswitch the best invariant condition. We prefer this full unswitch to
22082269 // a partial unswitch when possible below the threshold.
2209 if (unswitchBestCondition(L, DT, LI, AC, TTI, UnswitchCB))
2270 if (unswitchBestCondition(L, DT, LI, AC, TTI, UnswitchCB, SE))
22102271 return true;
22112272
22122273 // No other opportunities to unswitch.
22402301 U.markLoopAsDeleted(L, LoopName);
22412302 };
22422303
2243 if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.TTI, NonTrivial,
2244 UnswitchCB))
2304 if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.TTI, NonTrivial, UnswitchCB,
2305 &AR.SE))
22452306 return PreservedAnalyses::all();
22462307
22472308 // Historically this pass has had issues with the dominator tree so verify it
22882349 auto &LI = getAnalysis().getLoopInfo();
22892350 auto &AC = getAnalysis().getAssumptionCache(F);
22902351 auto &TTI = getAnalysis().getTTI(F);
2352
2353 auto *SEWP = getAnalysisIfAvailable();
2354 auto *SE = SEWP ? &SEWP->getSE() : nullptr;
22912355
22922356 auto UnswitchCB = [&L, &LPM](bool CurrentLoopValid,
22932357 ArrayRef NewLoops) {
23042368 LPM.markLoopAsDeleted(*L);
23052369 };
23062370
2307 bool Changed =
2308 unswitchLoop(*L, DT, LI, AC, TTI, NonTrivial, UnswitchCB);
2371 bool Changed = unswitchLoop(*L, DT, LI, AC, TTI, NonTrivial, UnswitchCB, SE);
23092372
23102373 // If anything was unswitched, also clear any cached information about this
23112374 // loop.
0 ; RUN: opt -passes='print,loop(unswitch,loop-instsimplify),print' -enable-nontrivial-unswitch -S < %s 2>%t.scev | FileCheck %s
1 ; RUN: FileCheck %s --check-prefix=SCEV < %t.scev
2
3 target triple = "x86_64-unknown-linux-gnu"
4
5 declare void @f()
6
7 ; Check that trivially unswitching an inner loop resets both the inner and outer
8 ; loop trip count.
9 define void @test1(i32 %n, i32 %m, i1 %cond) {
10 ; Check that SCEV has no trip count before unswitching.
11 ; SCEV-LABEL: Determining loop execution counts for: @test1
12 ; SCEV: Loop %inner_loop_begin: Unpredictable backedge-taken count.
13 ; SCEV: Loop %outer_loop_begin: Unpredictable backedge-taken count.
14 ;
15 ; Now check that after unswitching and simplifying instructions we get clean
16 ; backedge-taken counts.
17 ; SCEV-LABEL: Determining loop execution counts for: @test1
18 ; SCEV: Loop %inner_loop_begin: backedge-taken count is (-1 + (1 smax %m))
19 ; SCEV: Loop %outer_loop_begin: backedge-taken count is (-1 + (1 smax %n))
20 ;
21 ; And verify the code matches what we expect.
22 ; CHECK-LABEL: define void @test1(
23 entry:
24 br label %outer_loop_begin
25 ; Ensure the outer loop didn't get unswitched.
26 ; CHECK: entry:
27 ; CHECK-NEXT: br label %outer_loop_begin
28
29 outer_loop_begin:
30 %i = phi i32 [ %i.next, %outer_loop_latch ], [ 0, %entry ]
31 ; Block unswitching of the outer loop with a noduplicate call.
32 call void @f() noduplicate
33 br label %inner_loop_begin
34 ; Ensure the inner loop got unswitched into the outer loop.
35 ; CHECK: outer_loop_begin:
36 ; CHECK-NEXT: %{{.*}} = phi i32
37 ; CHECK-NEXT: call void @f()
38 ; CHECK-NEXT: br i1 %cond,
39
40 inner_loop_begin:
41 %j = phi i32 [ %j.next, %inner_loop_latch ], [ 0, %outer_loop_begin ]
42 br i1 %cond, label %inner_loop_latch, label %inner_loop_early_exit
43
44 inner_loop_latch:
45 %j.next = add nsw i32 %j, 1
46 %j.cmp = icmp slt i32 %j.next, %m
47 br i1 %j.cmp, label %inner_loop_begin, label %inner_loop_late_exit
48
49 inner_loop_early_exit:
50 %j.lcssa = phi i32 [ %i, %inner_loop_begin ]
51 br label %outer_loop_latch
52
53 inner_loop_late_exit:
54 br label %outer_loop_latch
55
56 outer_loop_latch:
57 %i.phi = phi i32 [ %j.lcssa, %inner_loop_early_exit ], [ %i, %inner_loop_late_exit ]
58 %i.next = add nsw i32 %i.phi, 1
59 %i.cmp = icmp slt i32 %i.next, %n
60 br i1 %i.cmp, label %outer_loop_begin, label %exit
61
62 exit:
63 ret void
64 }
65
66 ; Check that trivially unswitching an inner loop resets both the inner and outer
67 ; loop trip count.
68 define void @test2(i32 %n, i32 %m, i32 %cond) {
69 ; Check that SCEV has no trip count before unswitching.
70 ; SCEV-LABEL: Determining loop execution counts for: @test2
71 ; SCEV: Loop %inner_loop_begin: Unpredictable backedge-taken count.
72 ; SCEV: Loop %outer_loop_begin: Unpredictable backedge-taken count.
73 ;
74 ; Now check that after unswitching and simplifying instructions we get clean
75 ; backedge-taken counts.
76 ; SCEV-LABEL: Determining loop execution counts for: @test2
77 ; SCEV: Loop %inner_loop_begin: backedge-taken count is (-1 + (1 smax %m))
78 ; FIXME: The following backedge taken count should be known but isn't apparently
79 ; just because of a switch in the outer loop.
80 ; SCEV: Loop %outer_loop_begin: Unpredictable backedge-taken count.
81 ;
82 ; CHECK-LABEL: define void @test2(
83 entry:
84 br label %outer_loop_begin
85 ; Ensure the outer loop didn't get unswitched.
86 ; CHECK: entry:
87 ; CHECK-NEXT: br label %outer_loop_begin
88
89 outer_loop_begin:
90 %i = phi i32 [ %i.next, %outer_loop_latch ], [ 0, %entry ]
91 ; Block unswitching of the outer loop with a noduplicate call.
92 call void @f() noduplicate
93 br label %inner_loop_begin
94 ; Ensure the inner loop got unswitched into the outer loop.
95 ; CHECK: outer_loop_begin:
96 ; CHECK-NEXT: %{{.*}} = phi i32
97 ; CHECK-NEXT: call void @f()
98 ; CHECK-NEXT: switch i32 %cond,
99
100 inner_loop_begin:
101 %j = phi i32 [ %j.next, %inner_loop_latch ], [ 0, %outer_loop_begin ]
102 switch i32 %cond, label %inner_loop_early_exit [
103 i32 1, label %inner_loop_latch
104 i32 2, label %inner_loop_latch
105 ]
106
107 inner_loop_latch:
108 %j.next = add nsw i32 %j, 1
109 %j.cmp = icmp slt i32 %j.next, %m
110 br i1 %j.cmp, label %inner_loop_begin, label %inner_loop_late_exit
111
112 inner_loop_early_exit:
113 %j.lcssa = phi i32 [ %i, %inner_loop_begin ]
114 br label %outer_loop_latch
115
116 inner_loop_late_exit:
117 br label %outer_loop_latch
118
119 outer_loop_latch:
120 %i.phi = phi i32 [ %j.lcssa, %inner_loop_early_exit ], [ %i, %inner_loop_late_exit ]
121 %i.next = add nsw i32 %i.phi, 1
122 %i.cmp = icmp slt i32 %i.next, %n
123 br i1 %i.cmp, label %outer_loop_begin, label %exit
124
125 exit:
126 ret void
127 }
128
129 ; Check that non-trivial unswitching of a branch in an inner loop into the outer
130 ; loop invalidates both inner and outer.
131 define void @test3(i32 %n, i32 %m, i1 %cond) {
132 ; Check that SCEV has no trip count before unswitching.
133 ; SCEV-LABEL: Determining loop execution counts for: @test3
134 ; SCEV: Loop %inner_loop_begin: Unpredictable backedge-taken count.
135 ; SCEV: Loop %outer_loop_begin: Unpredictable backedge-taken count.
136 ;
137 ; Now check that after unswitching and simplifying instructions we get clean
138 ; backedge-taken counts.
139 ; SCEV-LABEL: Determining loop execution counts for: @test3
140 ; SCEV: Loop %inner_loop_begin{{.*}}: backedge-taken count is (-1 + (1 smax %m))
141 ; SCEV: Loop %outer_loop_begin: backedge-taken count is (-1 + (1 smax %n))
142 ;
143 ; And verify the code matches what we expect.
144 ; CHECK-LABEL: define void @test3(
145 entry:
146 br label %outer_loop_begin
147 ; Ensure the outer loop didn't get unswitched.
148 ; CHECK: entry:
149 ; CHECK-NEXT: br label %outer_loop_begin
150
151 outer_loop_begin:
152 %i = phi i32 [ %i.next, %outer_loop_latch ], [ 0, %entry ]
153 ; Block unswitching of the outer loop with a noduplicate call.
154 call void @f() noduplicate
155 br label %inner_loop_begin
156 ; Ensure the inner loop got unswitched into the outer loop.
157 ; CHECK: outer_loop_begin:
158 ; CHECK-NEXT: %{{.*}} = phi i32
159 ; CHECK-NEXT: call void @f()
160 ; CHECK-NEXT: br i1 %cond,
161
162 inner_loop_begin:
163 %j = phi i32 [ %j.next, %inner_loop_latch ], [ 0, %outer_loop_begin ]
164 %j.tmp = add nsw i32 %j, 1
165 br i1 %cond, label %inner_loop_latch, label %inner_loop_early_exit
166
167 inner_loop_latch:
168 %j.next = add nsw i32 %j, 1
169 %j.cmp = icmp slt i32 %j.next, %m
170 br i1 %j.cmp, label %inner_loop_begin, label %inner_loop_late_exit
171
172 inner_loop_early_exit:
173 %j.lcssa = phi i32 [ %j.tmp, %inner_loop_begin ]
174 br label %outer_loop_latch
175
176 inner_loop_late_exit:
177 br label %outer_loop_latch
178
179 outer_loop_latch:
180 %inc.phi = phi i32 [ %j.lcssa, %inner_loop_early_exit ], [ 1, %inner_loop_late_exit ]
181 %i.next = add nsw i32 %i, %inc.phi
182 %i.cmp = icmp slt i32 %i.next, %n
183 br i1 %i.cmp, label %outer_loop_begin, label %exit
184
185 exit:
186 ret void
187 }