llvm.org GIT mirror llvm / 79fad5d
[SimpleLoopUnswitch] Unswitch by experimental.guard intrinsics This patch adds support of `llvm.experimental.guard` intrinsics to non-trivial simple loop unswitching. These intrinsics represent implicit control flow which has pretty much the same semantics as usual conditional branches. The algorithm of dealing with them is following: - Consider guards as unswitching candidates; - If a guard is considered the best candidate, turn it into a branch; - Apply normal unswitching algorithm on this branch. The patch has no compile time effect on code that does not contain any guards. Differential Revision: https://reviews.llvm.org/D53744 Reviewed By: chandlerc git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@345387 91177308-0d34-0410-b5e6-96231b3b80d8 Max Kazantsev 10 months ago
2 changed file(s) with 345 addition(s) and 2 deletion(s). Raw diff Collapse all Expand all
1818 #include "llvm/Analysis/AssumptionCache.h"
1919 #include "llvm/Analysis/CFG.h"
2020 #include "llvm/Analysis/CodeMetrics.h"
21 #include "llvm/Analysis/GuardUtils.h"
2122 #include "llvm/Analysis/InstructionSimplify.h"
2223 #include "llvm/Analysis/LoopAnalysisManager.h"
2324 #include "llvm/Analysis/LoopInfo.h"
5859
5960 STATISTIC(NumBranches, "Number of branches unswitched");
6061 STATISTIC(NumSwitches, "Number of switches unswitched");
62 STATISTIC(NumGuards, "Number of guards turned into branches for unswitching");
6163 STATISTIC(NumTrivial, "Number of unswitches that are trivial");
6264
6365 static cl::opt EnableNonTrivialUnswitch(
6870 static cl::opt
6971 UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden,
7072 cl::desc("The cost threshold for unswitching a loop."));
73
74 static cl::opt UnswitchGuards(
75 "simple-loop-unswitch-guards", cl::init(true), cl::Hidden,
76 cl::desc("If enabled, simple loop unswitching will also consider "
77 "llvm.experimental.guard intrinsics as unswitch candidates."));
7178
7279 /// Collect all of the loop invariant input values transitively used by the
7380 /// homogeneous instruction graph from a given root.
21682175 return Cost;
21692176 }
21702177
2178 /// Turns a llvm.experimental.guard intrinsic into implicit control flow branch,
2179 /// making the following replacement:
2180 ///
2181 ///
2182 /// call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
2183 ///
2184 ///
2185 /// into
2186 ///
2187 ///
2188 /// br i1 %cond, label %guarded, label %deopt
2189 ///
2190 /// guarded:
2191 ///
2192 ///
2193 /// deopt:
2194 /// call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
2195 /// unreachable
2196 ///
2197 /// It also makes all relevant DT and LI updates, so that all structures are in
2198 /// valid state after this transform.
2199 static BranchInst *
2200 turnGuardIntoBranch(IntrinsicInst *GI, Loop &L,
2201 SmallVectorImpl &ExitBlocks,
2202 DominatorTree &DT, LoopInfo &LI) {
2203 SmallVector DTUpdates;
2204 LLVM_DEBUG(dbgs() << "Turning " << *GI << " into a branch.\n");
2205 BasicBlock *CheckBB = GI->getParent();
2206
2207 // Remove all CheckBB's successors from DomTree. A block can be seen among
2208 // successors more than once, but for DomTree it should be added only once.
2209 SmallPtrSet Successors;
2210 for (auto *Succ : successors(CheckBB))
2211 if (Successors.insert(Succ).second)
2212 DTUpdates.push_back({DominatorTree::Delete, CheckBB, Succ});
2213
2214 Instruction *DeoptBlockTerm =
2215 SplitBlockAndInsertIfThen(GI->getArgOperand(0), GI, true);
2216 BranchInst *CheckBI = cast(CheckBB->getTerminator());
2217 // SplitBlockAndInsertIfThen inserts control flow that branches to
2218 // DeoptBlockTerm if the condition is true. We want the opposite.
2219 CheckBI->swapSuccessors();
2220
2221 BasicBlock *GuardedBlock = CheckBI->getSuccessor(0);
2222 GuardedBlock->setName("guarded");
2223 CheckBI->getSuccessor(1)->setName("deopt");
2224
2225 // We now have a new exit block.
2226 ExitBlocks.push_back(CheckBI->getSuccessor(1));
2227
2228 GI->moveBefore(DeoptBlockTerm);
2229 GI->setArgOperand(0, ConstantInt::getFalse(GI->getContext()));
2230
2231 // Add new successors of CheckBB into DomTree.
2232 for (auto *Succ : successors(CheckBB))
2233 DTUpdates.push_back({DominatorTree::Insert, CheckBB, Succ});
2234
2235 // Now the blocks that used to be CheckBB's successors are GuardedBlock's
2236 // successors.
2237 for (auto *Succ : Successors)
2238 DTUpdates.push_back({DominatorTree::Insert, GuardedBlock, Succ});
2239
2240 // Make proper changes to DT.
2241 DT.applyUpdates(DTUpdates);
2242 // Inform LI of a new loop block.
2243 L.addBasicBlockToLoop(GuardedBlock, LI);
2244
2245 ++NumGuards;
2246 return CheckBI;
2247 }
2248
21712249 static bool
21722250 unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI,
21732251 AssumptionCache &AC, TargetTransformInfo &TTI,
21772255 // loop which would be handled when visiting that inner loop).
21782256 SmallVector>, 4>
21792257 UnswitchCandidates;
2258
2259 // Whether or not we should also collect guards in the loop.
2260 bool CollectGuards = false;
2261 if (UnswitchGuards) {
2262 auto *GuardDecl = L.getHeader()->getParent()->getParent()->getFunction(
2263 Intrinsic::getName(Intrinsic::experimental_guard));
2264 if (GuardDecl && !GuardDecl->use_empty())
2265 CollectGuards = true;
2266 }
2267
21802268 for (auto *BB : L.blocks()) {
21812269 if (LI.getLoopFor(BB) != &L)
21822270 continue;
2271
2272 if (CollectGuards)
2273 for (auto &I : *BB)
2274 if (isGuard(&I)) {
2275 auto *Cond = cast(&I)->getArgOperand(0);
2276 // TODO: Support AND, OR conditions and partial unswitching.
2277 if (!isa(Cond) && L.isLoopInvariant(Cond))
2278 UnswitchCandidates.push_back({&I, {Cond}});
2279 }
21832280
21842281 if (auto *SI = dyn_cast(BB->getTerminator())) {
21852282 // We can only consider fully loop-invariant switch conditions as we need
23452442 // Now scale the cost by the number of unique successors minus one. We
23462443 // subtract one because there is already at least one copy of the entire
23472444 // loop. This is computing the new cost of unswitching a condition.
2348 assert(Visited.size() > 1 &&
2445 // Note that guards always have 2 unique successors that are implicit and
2446 // will be materialized if we decide to unswitch it.
2447 int SuccessorsCount = isGuard(&TI) ? 2 : Visited.size();
2448 assert(SuccessorsCount > 1 &&
23492449 "Cannot unswitch a condition without multiple distinct successors!");
2350 return Cost * (Visited.size() - 1);
2450 return Cost * (SuccessorsCount - 1);
23512451 };
23522452 Instruction *BestUnswitchTI = nullptr;
23532453 int BestUnswitchCost;
23742474 return false;
23752475 }
23762476
2477 // If the best candidate is a guard, turn it into a branch.
2478 if (isGuard(BestUnswitchTI))
2479 BestUnswitchTI = turnGuardIntoBranch(cast(BestUnswitchTI), L,
2480 ExitBlocks, DT, LI);
2481
23772482 LLVM_DEBUG(dbgs() << " Unswitching non-trivial (cost = "
23782483 << BestUnswitchCost << ") terminator: " << *BestUnswitchTI
23792484 << "\n");
0 ; RUN: opt -passes='loop(unswitch),verify' -enable-nontrivial-unswitch -simple-loop-unswitch-guards -S < %s | FileCheck %s
1 ; RUN: opt -simple-loop-unswitch -enable-nontrivial-unswitch -simple-loop-unswitch-guards -S < %s | FileCheck %s
2
3 declare void @llvm.experimental.guard(i1, ...)
4
5 define void @test_simple_case(i1 %cond, i32 %N) {
6 ; CHECK-LABEL: @test_simple_case(
7 ; CHECK-NEXT: entry:
8 ; CHECK-NEXT: br i1 [[COND:%.*]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]]
9 ; CHECK: entry.split.us:
10 ; CHECK-NEXT: br label [[LOOP_US:%.*]]
11 ; CHECK: loop.us:
12 ; CHECK-NEXT: [[IV_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ [[IV_NEXT_US:%.*]], [[GUARDED_US:%.*]] ]
13 ; CHECK-NEXT: br label [[GUARDED_US]]
14 ; CHECK: guarded.us:
15 ; CHECK-NEXT: [[IV_NEXT_US]] = add i32 [[IV_US]], 1
16 ; CHECK-NEXT: [[LOOP_COND_US:%.*]] = icmp slt i32 [[IV_NEXT_US]], [[N:%.*]]
17 ; CHECK-NEXT: br i1 [[LOOP_COND_US]], label [[LOOP_US]], label [[EXIT_SPLIT_US:%.*]]
18 ; CHECK: deopt:
19 ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
20 ; CHECK-NEXT: unreachable
21 ;
22
23 entry:
24 br label %loop
25
26 loop:
27 %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
28 call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
29 %iv.next = add i32 %iv, 1
30 %loop.cond = icmp slt i32 %iv.next, %N
31 br i1 %loop.cond, label %loop, label %exit
32
33 exit:
34 ret void
35 }
36
37 define void @test_two_guards(i1 %cond1, i1 %cond2, i32 %N) {
38 ; CHECK-LABEL: @test_two_guards(
39 ; CHECK-NEXT: entry:
40 ; CHECK-NEXT: br i1 [[COND1:%.*]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]]
41 ; CHECK: entry.split.us:
42 ; CHECK-NEXT: br i1 [[COND2:%.*]], label [[ENTRY_SPLIT_US_SPLIT_US:%.*]], label [[ENTRY_SPLIT_US_SPLIT:%.*]]
43 ; CHECK: entry.split.us.split.us:
44 ; CHECK-NEXT: br label [[LOOP_US_US:%.*]]
45 ; CHECK: loop.us.us:
46 ; CHECK-NEXT: [[IV_US_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US_SPLIT_US]] ], [ [[IV_NEXT_US_US:%.*]], [[GUARDED_US2:%.*]] ]
47 ; CHECK-NEXT: br label [[GUARDED_US_US:%.*]]
48 ; CHECK: guarded.us.us:
49 ; CHECK-NEXT: br label [[GUARDED_US2]]
50 ; CHECK: guarded.us2:
51 ; CHECK-NEXT: [[IV_NEXT_US_US]] = add i32 [[IV_US_US]], 1
52 ; CHECK-NEXT: [[LOOP_COND_US_US:%.*]] = icmp slt i32 [[IV_NEXT_US_US]], [[N:%.*]]
53 ; CHECK-NEXT: br i1 [[LOOP_COND_US_US]], label [[LOOP_US_US]], label [[EXIT_SPLIT_US_SPLIT_US:%.*]]
54 ; CHECK: deopt1:
55 ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
56 ; CHECK-NEXT: unreachable
57 ; CHECK: deopt:
58 ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
59 ; CHECK-NEXT: unreachable
60 ; CHECK: exit:
61 ; CHECK-NEXT: ret void
62 ;
63
64 entry:
65 br label %loop
66
67 loop:
68 %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
69 call void (i1, ...) @llvm.experimental.guard(i1 %cond1) [ "deopt"() ]
70 call void (i1, ...) @llvm.experimental.guard(i1 %cond2) [ "deopt"() ]
71 %iv.next = add i32 %iv, 1
72 %loop.cond = icmp slt i32 %iv.next, %N
73 br i1 %loop.cond, label %loop, label %exit
74
75 exit:
76 ret void
77 }
78
79 define void @test_conditional_guards(i1 %cond, i32 %N) {
80 ; CHECK-LABEL: @test_conditional_guards(
81 ; CHECK-NEXT: entry:
82 ; CHECK-NEXT: br i1 [[COND:%.*]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]]
83 ; CHECK: entry.split.us:
84 ; CHECK-NEXT: br label [[LOOP_US:%.*]]
85 ; CHECK: loop.us:
86 ; CHECK-NEXT: [[IV_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ [[IV_NEXT_US:%.*]], [[BACKEDGE_US:%.*]] ]
87 ; CHECK-NEXT: [[CONDITION_US:%.*]] = icmp eq i32 [[IV_US]], 123
88 ; CHECK-NEXT: br i1 [[CONDITION_US]], label [[GUARD_US:%.*]], label [[BACKEDGE_US]]
89 ; CHECK: guard.us:
90 ; CHECK-NEXT: br label [[GUARDED_US:%.*]]
91 ; CHECK: backedge.us:
92 ; CHECK-NEXT: [[IV_NEXT_US]] = add i32 [[IV_US]], 1
93 ; CHECK-NEXT: [[LOOP_COND_US:%.*]] = icmp slt i32 [[IV_NEXT_US]], [[N:%.*]]
94 ; CHECK-NEXT: br i1 [[LOOP_COND_US]], label [[LOOP_US]], label [[EXIT_SPLIT_US:%.*]]
95 ; CHECK: loop:
96 ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
97 ; CHECK-NEXT: [[CONDITION:%.*]] = icmp eq i32 [[IV]], 123
98 ; CHECK-NEXT: br i1 [[CONDITION]], label [[GUARD:%.*]], label [[BACKEDGE]]
99 ; CHECK: guard:
100 ; CHECK-NEXT: br label [[DEOPT:%.*]]
101 ; CHECK: deopt:
102 ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
103 ; CHECK-NEXT: unreachable
104 ; CHECK: backedge:
105 ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
106 ; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp slt i32 [[IV_NEXT]], [[N]]
107 ; CHECK-NEXT: br i1 [[LOOP_COND]], label %loop, label [[EXIT_SPLIT:%.*]]
108 ;
109
110 entry:
111 br label %loop
112
113 loop:
114 %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]
115 %condition = icmp eq i32 %iv, 123
116 br i1 %condition, label %guard, label %backedge
117
118 guard:
119 call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
120 br label %backedge
121
122 backedge:
123 %iv.next = add i32 %iv, 1
124 %loop.cond = icmp slt i32 %iv.next, %N
125 br i1 %loop.cond, label %loop, label %exit
126
127 exit:
128 ret void
129 }
130
131 define void @test_nested_loop(i1 %cond, i32 %N) {
132 ; CHECK-LABEL: @test_nested_loop(
133 ; CHECK-NEXT: entry:
134 ; CHECK-NEXT: br i1 [[COND:%.*]], label [[ENTRY_SPLIT:%.*]], label [[OUTER_LOOP_SPLIT:%.*]]
135 ; CHECK: entry.split:
136 ; CHECK-NEXT: br label [[OUTER_LOOP:%.*]]
137 ; CHECK: outer_loop:
138 ; CHECK-NEXT: br label [[OUTER_LOOP_SPLIT_US:%.*]]
139 ; CHECK: outer_loop.split.us:
140 ; CHECK-NEXT: br label [[LOOP_US:%.*]]
141 ; CHECK: loop.us:
142 ; CHECK-NEXT: [[IV_US:%.*]] = phi i32 [ 0, [[OUTER_LOOP_SPLIT_US]] ], [ [[IV_NEXT_US:%.*]], [[GUARDED_US:%.*]] ]
143 ; CHECK-NEXT: br label [[GUARDED_US]]
144 ; CHECK: guarded.us:
145 ; CHECK-NEXT: [[IV_NEXT_US]] = add i32 [[IV_US]], 1
146 ; CHECK-NEXT: [[LOOP_COND_US:%.*]] = icmp slt i32 [[IV_NEXT_US]], [[N:%.*]]
147 ; CHECK-NEXT: br i1 [[LOOP_COND_US]], label [[LOOP_US]], label [[OUTER_BACKEDGE_SPLIT_US:%.*]]
148 ; CHECK: outer_backedge.split.us:
149 ; CHECK-NEXT: br label [[OUTER_BACKEDGE:%.*]]
150 ; CHECK: deopt:
151 ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
152 ; CHECK-NEXT: unreachable
153 ; CHECK: outer_backedge:
154 ; CHECK-NEXT: br i1 false, label [[OUTER_LOOP]], label [[EXIT:%.*]]
155 ;
156
157 entry:
158 br label %outer_loop
159
160 outer_loop:
161 br label %loop
162
163 loop:
164 %iv = phi i32 [ 0, %outer_loop ], [ %iv.next, %loop ]
165 call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
166 %iv.next = add i32 %iv, 1
167 %loop.cond = icmp slt i32 %iv.next, %N
168 br i1 %loop.cond, label %loop, label %outer_backedge
169
170 outer_backedge:
171 br i1 undef, label %outer_loop, label %exit
172
173 exit:
174 ret void
175 }
176
177 define void @test_sibling_loops(i1 %cond1, i1 %cond2, i32 %N) {
178 ; CHECK-LABEL: @test_sibling_loops(
179 ; CHECK-NEXT: entry:
180 ; CHECK-NEXT: br i1 [[COND1:%.*]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]]
181 ; CHECK: [[IV1_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ [[IV1_NEXT_US:%.*]], [[GUARDED_US:%.*]] ]
182 ; CHECK-NEXT: br label [[GUARDED_US]]
183 ; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
184 ; CHECK-NEXT: unreachable
185 ; CHECK: [[IV2_US:%.*]] = phi i32 [ 0, [[BETWEEN:%.*]] ], [ [[IV1_NEXT_US2:%.*]], [[GUARDED_US2:%.*]] ]
186 ; CHECK-NEXT: br label [[GUARDED_US2]]
187 ; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
188 ; CHECK-NEXT: unreachable
189 ;
190
191 entry:
192 br label %loop1
193
194 loop1:
195 %iv1 = phi i32 [ 0, %entry ], [ %iv1.next, %loop1 ]
196 call void (i1, ...) @llvm.experimental.guard(i1 %cond1) [ "deopt"() ]
197 %iv1.next = add i32 %iv1, 1
198 %loop1.cond = icmp slt i32 %iv1.next, %N
199 br i1 %loop1.cond, label %loop1, label %between
200
201 between:
202 br label %loop2
203
204 loop2:
205 %iv2 = phi i32 [ 0, %between ], [ %iv2.next, %loop2 ]
206 call void (i1, ...) @llvm.experimental.guard(i1 %cond2) [ "deopt"() ]
207 %iv2.next = add i32 %iv2, 1
208 %loop2.cond = icmp slt i32 %iv2.next, %N
209 br i1 %loop2.cond, label %loop2, label %exit
210
211 exit:
212 ret void
213 }
214
215 ; Check that we don't do anything because of cleanuppad.
216 ; CHECK-LABEL: @test_cleanuppad(
217 ; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
218 ; CHECK-NOT: call void (i1, ...) @llvm.experimental.guard(
219 define void @test_cleanuppad(i1 %cond, i32 %N) personality i32 (...)* @__CxxFrameHandler3 {
220
221 entry:
222 br label %loop
223
224 loop:
225 %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
226 call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
227 %iv.next = add i32 %iv, 1
228 invoke void @may_throw(i32 %iv) to label %loop unwind label %exit
229
230 exit:
231 %cp = cleanuppad within none []
232 cleanupret from %cp unwind to caller
233
234 }
235
236 declare void @may_throw(i32 %i)
237 declare i32 @__CxxFrameHandler3(...)