llvm.org GIT mirror llvm / cbb0a9e
Rework loop predication pass We've found a serious issue with the current implementation of loop predication. The current implementation relies on SCEV and this turned out to be problematic. To fix the problem we had to rework the pass substantially. We have had the reworked implementation in our downstream tree for a while. This is the initial patch of the series of changes to upstream the new implementation. For now the transformation is limited to the following case: * The loop has a single latch with either ult or slt icmp condition. * The step of the IV used in the latch condition is 1. * The IV of the latch condition is the same as the post increment IV of the guard condition. * The guard condition is ult. See the review or the LoopPredication.cpp header for the details about the problem and the new implementation. Reviewed By: sanjoy, mkazantsev Differential Revision: https://reviews.llvm.org/D37569 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@313981 91177308-0d34-0410-b5e6-96231b3b80d8 Artur Pilipenko 1 year, 11 months ago
4 changed file(s) with 518 addition(s) and 196 deletion(s). Raw diff Collapse all Expand all
3232 // }
3333 // else
3434 // deoptimize
35 //
36 // It's tempting to rely on SCEV here, but it has proven to be problematic.
37 // Generally the facts SCEV provides about the increment step of add
38 // recurrences are true if the backedge of the loop is taken, which implicitly
39 // assumes that the guard doesn't fail. Using these facts to optimize the
40 // guard results in a circular logic where the guard is optimized under the
41 // assumption that it never fails.
42 //
43 // For example, in the loop below the induction variable will be marked as nuw
44 // basing on the guard. Basing on nuw the guard predicate will be considered
45 // monotonic. Given a monotonic condition it's tempting to replace the induction
46 // variable in the condition with its value on the last iteration. But this
47 // transformation is not correct, e.g. e = 4, b = 5 breaks the loop.
48 //
49 // for (int i = b; i != e; i++)
50 // guard(i u< len)
51 //
52 // One of the ways to reason about this problem is to use an inductive proof
53 // approach. Given the loop:
54 //
55 // if (B(Start)) {
56 // do {
57 // I = PHI(Start, I.INC)
58 // I.INC = I + Step
59 // guard(G(I));
60 // } while (B(I.INC));
61 // }
62 //
63 // where B(x) and G(x) are predicates that map integers to booleans, we want a
64 // loop invariant expression M such the following program has the same semantics
65 // as the above:
66 //
67 // if (B(Start)) {
68 // do {
69 // I = PHI(Start, I.INC)
70 // I.INC = I + Step
71 // guard(G(Start) && M);
72 // } while (B(I.INC));
73 // }
74 //
75 // One solution for M is M = forall X . (G(X) && B(X + Step)) => G(X + Step)
76 //
77 // Informal proof that the transformation above is correct:
78 //
79 // By the definition of guards we can rewrite the guard condition to:
80 // G(I) && G(Start) && M
81 //
82 // Let's prove that for each iteration of the loop:
83 // G(Start) && M => G(I)
84 // And the condition above can be simplified to G(Start) && M.
85 //
86 // Induction base.
87 // G(Start) && M => G(Start)
88 //
89 // Induction step. Assuming G(Start) && M => G(I) on the subsequent
90 // iteration:
91 //
92 // B(I + Step) is true because it's the backedge condition.
93 // G(I) is true because the backedge is guarded by this condition.
94 //
95 // So M = forall X . (G(X) && B(X + Step)) => G(X + Step) implies
96 // G(I + Step).
97 //
98 // Note that we can use anything stronger than M, i.e. any condition which
99 // implies M.
100 //
101 // For now the transformation is limited to the following case:
102 // * The loop has a single latch with either ult or slt icmp condition.
103 // * The step of the IV used in the latch condition is 1.
104 // * The IV of the latch condition is the same as the post increment IV of the
105 // guard condition.
106 // * The guard condition is ult.
107 //
108 // In this case the latch is of the from:
109 // ++i u< latchLimit or ++i s< latchLimit
110 // and the guard is of the form:
111 // i u< guardLimit
112 //
113 // For the unsigned latch comparison case M is:
114 // forall X . X u< guardLimit && (X + 1) u< latchLimit =>
115 // (X + 1) u< guardLimit
116 //
117 // This is true if latchLimit u<= guardLimit since then
118 // (X + 1) u< latchLimit u<= guardLimit == (X + 1) u< guardLimit.
119 //
120 // So the widened condition is:
121 // i.start u< guardLimit && latchLimit u<= guardLimit
122 //
123 // For the signed latch comparison case M is:
124 // forall X . X u< guardLimit && (X + 1) s< latchLimit =>
125 // (X + 1) u< guardLimit
126 //
127 // The only way the antecedent can be true and the consequent can be false is
128 // if
129 // X == guardLimit - 1
130 // (and guardLimit is non-zero, but we won't use this latter fact).
131 // If X == guardLimit - 1 then the second half of the antecedent is
132 // guardLimit s< latchLimit
133 // and its negation is
134 // latchLimit s<= guardLimit.
135 //
136 // In other words, if latchLimit s<= guardLimit then:
137 // (the ranges below are written in ConstantRange notation, where [A, B) is the
138 // set for (I = A; I != B; I++ /*maywrap*/) yield(I);)
139 //
140 // forall X . X u< guardLimit && (X + 1) s< latchLimit => (X + 1) u< guardLimit
141 // == forall X . X u< guardLimit && (X + 1) s< guardLimit => (X + 1) u< guardLimit
142 // == forall X . X in [0, guardLimit) && (X + 1) in [INT_MIN, guardLimit) => (X + 1) in [0, guardLimit)
143 // == forall X . X in [0, guardLimit) && X in [INT_MAX, guardLimit-1) => X in [-1, guardLimit-1)
144 // == forall X . X in [0, guardLimit-1) => X in [-1, guardLimit-1)
145 // == true
146 //
147 // So the widened condition is:
148 // i.start u< guardLimit && latchLimit s<= guardLimit
35149 //
36150 //===----------------------------------------------------------------------===//
37151
74188 Loop *L;
75189 const DataLayout *DL;
76190 BasicBlock *Preheader;
77
78 Optional parseLoopICmp(ICmpInst *ICI);
191 LoopICmp LatchCheck;
192
193 Optional parseLoopICmp(ICmpInst *ICI) {
194 return parseLoopICmp(ICI->getPredicate(), ICI->getOperand(0),
195 ICI->getOperand(1));
196 }
197 Optional parseLoopICmp(ICmpInst::Predicate Pred, Value *LHS,
198 Value *RHS);
199
200 Optional parseLoopLatchICmp();
79201
80202 Value *expandCheck(SCEVExpander &Expander, IRBuilder<> &Builder,
81203 ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS,
134256 }
135257
136258 Optional
137 LoopPredication::parseLoopICmp(ICmpInst *ICI) {
138 ICmpInst::Predicate Pred = ICI->getPredicate();
139
140 Value *LHS = ICI->getOperand(0);
141 Value *RHS = ICI->getOperand(1);
259 LoopPredication::parseLoopICmp(ICmpInst::Predicate Pred, Value *LHS,
260 Value *RHS) {
142261 const SCEV *LHSS = SE->getSCEV(LHS);
143262 if (isa(LHSS))
144263 return None;
164283 IRBuilder<> &Builder,
165284 ICmpInst::Predicate Pred, const SCEV *LHS,
166285 const SCEV *RHS, Instruction *InsertAt) {
286 // TODO: we can check isLoopEntryGuardedByCond before emitting the check
287
167288 Type *Ty = LHS->getType();
168289 assert(Ty == RHS->getType() && "expandCheck operands have different types?");
169290 Value *LHSV = Expander.expandCodeFor(LHS, Ty, InsertAt);
180301 DEBUG(dbgs() << "Analyzing ICmpInst condition:\n");
181302 DEBUG(ICI->dump());
182303
304 // parseLoopStructure guarantees that the latch condition is:
305 // ++i u< latchLimit or ++i s< latchLimit
306 // We are looking for the range checks of the form:
307 // i u< guardLimit
183308 auto RangeCheck = parseLoopICmp(ICI);
184309 if (!RangeCheck) {
185310 DEBUG(dbgs() << "Failed to parse the loop latch condition!\n");
186311 return None;
187312 }
188
189 ICmpInst::Predicate Pred = RangeCheck->Pred;
190 const SCEVAddRecExpr *IndexAR = RangeCheck->IV;
191 const SCEV *RHSS = RangeCheck->Limit;
313 if (RangeCheck->Pred != ICmpInst::ICMP_ULT) {
314 DEBUG(dbgs() << "Unsupported range check predicate(" << RangeCheck->Pred
315 << ")!\n");
316 return None;
317 }
318 auto *RangeCheckIV = RangeCheck->IV;
319 auto *PostIncRangeCheckIV = RangeCheckIV->getPostIncExpr(*SE);
320 if (LatchCheck.IV != PostIncRangeCheckIV) {
321 DEBUG(dbgs() << "Post increment range check IV (" << *PostIncRangeCheckIV
322 << ") is not the same as latch IV (" << *LatchCheck.IV
323 << ")!\n");
324 return None;
325 }
326 assert(RangeCheckIV->getStepRecurrence(*SE)->isOne() && "must be one");
327 const SCEV *Start = RangeCheckIV->getStart();
328
329 // Generate the widened condition. See the file header comment for reasoning.
330 // If the latch condition is unsigned:
331 // i.start u< guardLimit && latchLimit u<= guardLimit
332 // If the latch condition is signed:
333 // i.start u< guardLimit && latchLimit s<= guardLimit
334
335 auto LimitCheckPred = ICmpInst::isSigned(LatchCheck.Pred)
336 ? ICmpInst::ICMP_SLE
337 : ICmpInst::ICMP_ULE;
192338
193339 auto CanExpand = [this](const SCEV *S) {
194340 return SE->isLoopInvariant(S, L) && isSafeToExpand(S, *SE);
195341 };
196 if (!CanExpand(RHSS))
197 return None;
198
199 DEBUG(dbgs() << "IndexAR: ");
200 DEBUG(IndexAR->dump());
201
202 bool IsIncreasing = false;
203 if (!SE->isMonotonicPredicate(IndexAR, Pred, IsIncreasing))
204 return None;
205
206 // If the predicate is increasing the condition can change from false to true
207 // as the loop progresses, in this case take the value on the first iteration
208 // for the widened check. Otherwise the condition can change from true to
209 // false as the loop progresses, so take the value on the last iteration.
210 const SCEV *NewLHSS = IsIncreasing
211 ? IndexAR->getStart()
212 : SE->getSCEVAtScope(IndexAR, L->getParentLoop());
213 if (NewLHSS == IndexAR) {
214 DEBUG(dbgs() << "Can't compute NewLHSS!\n");
215 return None;
216 }
217
218 DEBUG(dbgs() << "NewLHSS: ");
219 DEBUG(NewLHSS->dump());
220
221 if (!CanExpand(NewLHSS))
222 return None;
223
224 DEBUG(dbgs() << "NewLHSS is loop invariant and safe to expand. Expand!\n");
342 if (!CanExpand(Start) || !CanExpand(LatchCheck.Limit) ||
343 !CanExpand(RangeCheck->Limit))
344 return None;
225345
226346 Instruction *InsertAt = Preheader->getTerminator();
227 return expandCheck(Expander, Builder, Pred, NewLHSS, RHSS, InsertAt);
347 auto *FirstIterationCheck = expandCheck(Expander, Builder, RangeCheck->Pred,
348 Start, RangeCheck->Limit, InsertAt);
349 auto *LimitCheck = expandCheck(Expander, Builder, LimitCheckPred,
350 LatchCheck.Limit, RangeCheck->Limit, InsertAt);
351 return Builder.CreateAnd(FirstIterationCheck, LimitCheck);
228352 }
229353
230354 bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard,
287411 return true;
288412 }
289413
414 Optional LoopPredication::parseLoopLatchICmp() {
415 using namespace PatternMatch;
416
417 BasicBlock *LoopLatch = L->getLoopLatch();
418 if (!LoopLatch) {
419 DEBUG(dbgs() << "The loop doesn't have a single latch!\n");
420 return None;
421 }
422
423 ICmpInst::Predicate Pred;
424 Value *LHS, *RHS;
425 BasicBlock *TrueDest, *FalseDest;
426
427 if (!match(LoopLatch->getTerminator(),
428 m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)), TrueDest,
429 FalseDest))) {
430 DEBUG(dbgs() << "Failed to match the latch terminator!\n");
431 return None;
432 }
433 assert((TrueDest == L->getHeader() || FalseDest == L->getHeader()) &&
434 "One of the latch's destinations must be the header");
435 if (TrueDest != L->getHeader())
436 Pred = ICmpInst::getInversePredicate(Pred);
437
438 auto Result = parseLoopICmp(Pred, LHS, RHS);
439 if (!Result) {
440 DEBUG(dbgs() << "Failed to parse the loop latch condition!\n");
441 return None;
442 }
443
444 if (Result->Pred != ICmpInst::ICMP_ULT &&
445 Result->Pred != ICmpInst::ICMP_SLT) {
446 DEBUG(dbgs() << "Unsupported loop latch predicate(" << Result->Pred
447 << ")!\n");
448 return None;
449 }
450
451 // Check affine first, so if it's not we don't try to compute the step
452 // recurrence.
453 if (!Result->IV->isAffine()) {
454 DEBUG(dbgs() << "The induction variable is not affine!\n");
455 return None;
456 }
457
458 auto *Step = Result->IV->getStepRecurrence(*SE);
459 if (!Step->isOne()) {
460 DEBUG(dbgs() << "Unsupported loop stride(" << *Step << ")!\n");
461 return None;
462 }
463
464 return Result;
465 }
466
290467 bool LoopPredication::runOnLoop(Loop *Loop) {
291468 L = Loop;
292469
306483 Preheader = L->getLoopPreheader();
307484 if (!Preheader)
308485 return false;
486
487 auto LatchCheckOpt = parseLoopLatchICmp();
488 if (!LatchCheckOpt)
489 return false;
490 LatchCheck = *LatchCheckOpt;
309491
310492 // Collect all the guards into a vector and process later, so as not
311493 // to invalidate the instruction iterator.
1010
1111 loop.preheader:
1212 ; CHECK: loop.preheader:
13 ; CHECK: [[max_index:[^ ]+]] = add i32 %n, -1
14 ; CHECK-NEXT: [[wide_cond:[^ ]+]] = icmp ult i32 [[max_index]], %length
13 ; CHECK: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, %length
14 ; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp ule i32 %n, %length
15 ; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]]
1516 ; CHECK-NEXT: br label %loop
1617 br label %loop
1718
4546
4647 loop.preheader:
4748 ; CHECK: loop.preheader:
48 ; CHECK: [[max_index:[^ ]+]] = add i32 %n, -1
49 ; CHECK-NEXT: [[wide_cond:[^ ]+]] = icmp ult i32 [[max_index]], %length
49 ; CHECK: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, %length
50 ; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp ule i32 %n, %length
51 ; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]]
5052 ; CHECK-NEXT: br label %loop
5153 br label %loop
5254
7274 ret i32 %result
7375 }
7476
77 define i32 @signed_loop_0_to_n_ult_check(i32* %array, i32 %length, i32 %n) {
78 ; CHECK-LABEL: @signed_loop_0_to_n_ult_check
79 entry:
80 %tmp5 = icmp sle i32 %n, 0
81 br i1 %tmp5, label %exit, label %loop.preheader
82
83 loop.preheader:
84 ; CHECK: loop.preheader:
85 ; CHECK: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, %length
86 ; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp sle i32 %n, %length
87 ; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]]
88 ; CHECK-NEXT: br label %loop
89 br label %loop
90
91 loop:
92 ; CHECK: loop:
93 ; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
94 %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
95 %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ]
96 %within.bounds = icmp ult i32 %i, %length
97 call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
98
99 %i.i64 = zext i32 %i to i64
100 %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
101 %array.i = load i32, i32* %array.i.ptr, align 4
102 %loop.acc.next = add i32 %loop.acc, %array.i
103
104 %i.next = add nuw i32 %i, 1
105 %continue = icmp slt i32 %i.next, %n
106 br i1 %continue, label %loop, label %exit
107
108 exit:
109 %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
110 ret i32 %result
111 }
112
113 define i32 @unsupported_latch_pred_loop_0_to_n(i32* %array, i32 %length, i32 %n) {
114 ; CHECK-LABEL: @unsupported_latch_pred_loop_0_to_n
115 entry:
116 %tmp5 = icmp sle i32 %n, 0
117 br i1 %tmp5, label %exit, label %loop.preheader
118
119 loop.preheader:
120 ; CHECK: loop.preheader:
121 ; CHECK-NEXT: br label %loop
122 br label %loop
123
124 loop:
125 ; CHECK: loop:
126 ; CHECK: %within.bounds = icmp ult i32 %i, %length
127 ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
128 %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
129 %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ]
130 %within.bounds = icmp ult i32 %i, %length
131 call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
132
133 %i.i64 = zext i32 %i to i64
134 %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
135 %array.i = load i32, i32* %array.i.ptr, align 4
136 %loop.acc.next = add i32 %loop.acc, %array.i
137
138 %i.next = add nsw i32 %i, 1
139 %continue = icmp ne i32 %i.next, %n
140 br i1 %continue, label %loop, label %exit
141
142 exit:
143 %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
144 ret i32 %result
145 }
146
147 define i32 @signed_loop_0_to_n_unsupported_iv_step(i32* %array, i32 %length, i32 %n) {
148 ; CHECK-LABEL: @signed_loop_0_to_n_unsupported_iv_step
149 entry:
150 %tmp5 = icmp sle i32 %n, 0
151 br i1 %tmp5, label %exit, label %loop.preheader
152
153 loop.preheader:
154 ; CHECK: loop.preheader:
155 ; CHECK-NEXT: br label %loop
156 br label %loop
157
158 loop:
159 ; CHECK: loop:
160 ; CHECK: %within.bounds = icmp ult i32 %i, %length
161 ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
162 %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
163 %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ]
164 %within.bounds = icmp ult i32 %i, %length
165 call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
166
167 %i.i64 = zext i32 %i to i64
168 %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
169 %array.i = load i32, i32* %array.i.ptr, align 4
170 %loop.acc.next = add i32 %loop.acc, %array.i
171
172 %i.next = add nsw i32 %i, 2
173 %continue = icmp slt i32 %i.next, %n
174 br i1 %continue, label %loop, label %exit
175
176 exit:
177 %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
178 ret i32 %result
179 }
180
181 define i32 @signed_loop_0_to_n_equal_iv_range_check(i32* %array, i32 %length, i32 %n) {
182 ; CHECK-LABEL: @signed_loop_0_to_n_equal_iv_range_check
183 entry:
184 %tmp5 = icmp sle i32 %n, 0
185 br i1 %tmp5, label %exit, label %loop.preheader
186
187 loop.preheader:
188 ; CHECK: loop.preheader:
189 ; CHECK: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, %length
190 ; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp sle i32 %n, %length
191 ; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]]
192 ; CHECK-NEXT: br label %loop
193 br label %loop
194
195 loop:
196 ; CHECK: loop:
197 ; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
198 %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
199 %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ]
200 %j = phi i32 [ %j.next, %loop ], [ 0, %loop.preheader ]
201
202 %within.bounds = icmp ult i32 %j, %length
203 call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
204
205 %i.i64 = zext i32 %i to i64
206 %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
207 %array.i = load i32, i32* %array.i.ptr, align 4
208 %loop.acc.next = add i32 %loop.acc, %array.i
209
210 %j.next = add nsw i32 %j, 1
211 %i.next = add nsw i32 %i, 1
212 %continue = icmp slt i32 %i.next, %n
213 br i1 %continue, label %loop, label %exit
214
215 exit:
216 %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
217 ret i32 %result
218 }
219
220 define i32 @signed_loop_0_to_n_unrelated_iv_range_check(i32* %array, i32 %start, i32 %length, i32 %n) {
221 ; CHECK-LABEL: @signed_loop_0_to_n_unrelated_iv_range_check
222 entry:
223 %tmp5 = icmp sle i32 %n, 0
224 br i1 %tmp5, label %exit, label %loop.preheader
225
226 loop.preheader:
227 ; CHECK: loop.preheader:
228 ; CHECK-NEXT: br label %loop
229 br label %loop
230
231 loop:
232 ; CHECK: loop:
233 ; CHECK: %within.bounds = icmp ult i32 %j, %length
234 ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
235 %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
236 %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ]
237 %j = phi i32 [ %j.next, %loop ], [ %start, %loop.preheader ]
238
239 %within.bounds = icmp ult i32 %j, %length
240 call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
241
242 %i.i64 = zext i32 %i to i64
243 %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
244 %array.i = load i32, i32* %array.i.ptr, align 4
245 %loop.acc.next = add i32 %loop.acc, %array.i
246
247 %j.next = add nsw i32 %j, 1
248 %i.next = add nsw i32 %i, 1
249 %continue = icmp slt i32 %i.next, %n
250 br i1 %continue, label %loop, label %exit
251
252 exit:
253 %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
254 ret i32 %result
255 }
75256
76257 define i32 @two_range_checks(i32* %array.1, i32 %length.1,
77258 i32* %array.2, i32 %length.2, i32 %n) {
82263
83264 loop.preheader:
84265 ; CHECK: loop.preheader:
85 ; CHECK: [[max_index:[^ ]+]] = add i32 %n, -1
86 ; CHECK-NEXT: [[wide_cond_1:[^ ]+]] = icmp ult i32 [[max_index]], %length.{{1|2}}
87 ; CHECK-NEXT: [[wide_cond_2:[^ ]+]] = icmp ult i32 [[max_index]], %length.{{1|2}}
266 ; CHECK: [[first_iteration_check_1:[^ ]+]] = icmp ult i32 0, %length.{{1|2}}
267 ; CHECK-NEXT: [[limit_check_1:[^ ]+]] = icmp ule i32 %n, %length.{{1|2}}
268 ; CHECK-NEXT: [[wide_cond_1:[^ ]+]] = and i1 [[first_iteration_check_1]], [[limit_check_1]]
269 ; CHECK-NEXT: [[first_iteration_check_2:[^ ]+]] = icmp ult i32 0, %length.{{1|2}}
270 ; CHECK-NEXT: [[limit_check_2:[^ ]+]] = icmp ule i32 %n, %length.{{1|2}}
271 ; CHECK-NEXT: [[wide_cond_2:[^ ]+]] = and i1 [[first_iteration_check_2]], [[limit_check_2]]
88272 ; CHECK-NEXT: br label %loop
89273 br label %loop
90274
127311
128312 loop.preheader:
129313 ; CHECK: loop.preheader:
130 ; CHECK: [[max_index:[^ ]+]] = add i32 %n, -1
131 ; CHECK-NEXT: [[wide_cond_1:[^ ]+]] = icmp ult i32 [[max_index]], %length.{{1|2|3}}
132 ; CHECK-NEXT: [[wide_cond_2:[^ ]+]] = icmp ult i32 [[max_index]], %length.{{1|2|3}}
133 ; CHECK-NEXT: [[wide_cond_3:[^ ]+]] = icmp ult i32 [[max_index]], %length.{{1|2|3}}
314 ; CHECK: [[first_iteration_check_1:[^ ]+]] = icmp ult i32 0, %length.{{1|2|3}}
315 ; CHECK-NEXT: [[limit_check_1:[^ ]+]] = icmp ule i32 %n, %length.{{1|2|3}}
316 ; CHECK-NEXT: [[wide_cond_1:[^ ]+]] = and i1 [[first_iteration_check_1]], [[limit_check_1]]
317 ; CHECK-NEXT: [[first_iteration_check_2:[^ ]+]] = icmp ult i32 0, %length.{{1|2|3}}
318 ; CHECK-NEXT: [[limit_check_2:[^ ]+]] = icmp ule i32 %n, %length.{{1|2|3}}
319 ; CHECK-NEXT: [[wide_cond_2:[^ ]+]] = and i1 [[first_iteration_check_2]], [[limit_check_2]]
320 ; CHECK-NEXT: [[first_iteration_check_3:[^ ]+]] = icmp ult i32 0, %length.{{1|2|3}}
321 ; CHECK-NEXT: [[limit_check_3:[^ ]+]] = icmp ule i32 %n, %length.{{1|2|3}}
322 ; CHECK-NEXT: [[wide_cond_3:[^ ]+]] = and i1 [[first_iteration_check_3]], [[limit_check_3]]
134323 ; CHECK-NEXT: br label %loop
135324 br label %loop
136325
180369
181370 loop.preheader:
182371 ; CHECK: loop.preheader:
183 ; CHECK: [[max_index:[^ ]+]] = add i32 %n, -1
184 ; CHECK-NEXT: [[wide_cond_1:[^ ]+]] = icmp ult i32 [[max_index]], %length.1
185 ; CHECK-NEXT: [[wide_cond_2:[^ ]+]] = icmp ult i32 [[max_index]], %length.2
186 ; CHECK-NEXT: [[wide_cond_3:[^ ]+]] = icmp ult i32 [[max_index]], %length.3
372 ; CHECK: [[first_iteration_check_1:[^ ]+]] = icmp ult i32 0, %length.{{1|2|3}}
373 ; CHECK-NEXT: [[limit_check_1:[^ ]+]] = icmp ule i32 %n, %length.{{1|2|3}}
374 ; CHECK-NEXT: [[wide_cond_1:[^ ]+]] = and i1 [[first_iteration_check_1]], [[limit_check_1]]
375 ; CHECK-NEXT: [[first_iteration_check_2:[^ ]+]] = icmp ult i32 0, %length.{{1|2|3}}
376 ; CHECK-NEXT: [[limit_check_2:[^ ]+]] = icmp ule i32 %n, %length.{{1|2|3}}
377 ; CHECK-NEXT: [[wide_cond_2:[^ ]+]] = and i1 [[first_iteration_check_2]], [[limit_check_2]]
378 ; CHECK-NEXT: [[first_iteration_check_3:[^ ]+]] = icmp ult i32 0, %length.{{1|2|3}}
379 ; CHECK-NEXT: [[limit_check_3:[^ ]+]] = icmp ule i32 %n, %length.{{1|2|3}}
380 ; CHECK-NEXT: [[wide_cond_3:[^ ]+]] = and i1 [[first_iteration_check_3]], [[limit_check_3]]
187381 ; CHECK-NEXT: br label %loop
188382 br label %loop
189383
227421 ret i32 %result
228422 }
229423
230 define i32 @signed_loop_start_to_n_sge_0_check(i32* %array, i32 %length, i32 %start, i32 %n) {
231 ; CHECK-LABEL: @signed_loop_start_to_n_sge_0_check
232 entry:
233 %tmp5 = icmp eq i32 %n, 0
234 br i1 %tmp5, label %exit, label %loop.preheader
235
236 loop.preheader:
237 ; CHECK: loop.preheader:
238 ; CHECK-NEXT: [[wide_cond:[^ ]+]] = icmp sge i32 %start, 0
239 ; CHECK-NEXT: br label %loop
240 br label %loop
241
242 loop:
243 ; CHECK: loop:
244 ; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
245 %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
246 %i = phi i32 [ %i.next, %loop ], [ %start, %loop.preheader ]
247 %within.bounds = icmp sge i32 %i, 0
248 call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
249
250 %i.i64 = zext i32 %i to i64
251 %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
252 %array.i = load i32, i32* %array.i.ptr, align 4
253 %loop.acc.next = add i32 %loop.acc, %array.i
254
255 %i.next = add nsw i32 %i, 1
256 %continue = icmp slt i32 %i.next, %n
257 br i1 %continue, label %loop, label %exit
258
259 exit:
260 %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
261 ret i32 %result
262 }
263
264 define i32 @signed_loop_start_to_n_upper_slt_length_check(i32* %array, i32 %length, i32 %start, i32 %n) {
265 ; CHECK-LABEL: @signed_loop_start_to_n_upper_slt_length_check
266 entry:
267 %tmp5 = icmp sle i32 %n, 0
268 br i1 %tmp5, label %exit, label %loop.preheader
269
270 loop.preheader:
271 ; CHECK: loop.preheader:
272 ; CHECK: [[start_1:[^ ]+]] = add i32 %start, 1
273 ; CHECK-NEXT: [[n_sgt_start_1:[^ ]+]] = icmp sgt i32 %n, [[start_1]]
274 ; CHECK-NEXT: [[smax:[^ ]+]] = select i1 [[n_sgt_start_1]], i32 %n, i32 [[start_1]]
275 ; CHECK-NEXT: [[max_index:[^ ]+]] = add i32 [[smax]], -1
276 ; CHECK-NEXT: [[wide_cond:[^ ]+]] = icmp slt i32 [[max_index]], %length
277 ; CHECK-NEXT: br label %loop
278 br label %loop
279
280 loop:
281 ; CHECK: loop:
282 ; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
283 %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
284 %i = phi i32 [ %i.next, %loop ], [ %start, %loop.preheader ]
285 %within.bounds = icmp slt i32 %i, %length
286 call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
287
288 %i.i64 = zext i32 %i to i64
289 %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
290 %array.i = load i32, i32* %array.i.ptr, align 4
291 %loop.acc.next = add i32 %loop.acc, %array.i
292
293 %i.next = add nsw i32 %i, 1
294 %continue = icmp slt i32 %i.next, %n
295 br i1 %continue, label %loop, label %exit
296
297 exit:
298 %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
299 ret i32 %result
300 }
301
302 define i32 @signed_loop_start_to_n_both_checks(i32* %array, i32 %length, i32 %start, i32 %n) {
303 ; CHECK-LABEL: @signed_loop_start_to_n_both_checks
304 entry:
305 %tmp5 = icmp sle i32 %n, 0
306 br i1 %tmp5, label %exit, label %loop.preheader
307
308 loop.preheader:
309 ; CHECK: loop.preheader:
310 ; CHECK: [[lower_check:[^ ]+]] = icmp sge i32 %start, 0
311 ; CHECK-NEXT: [[start_1:[^ ]+]] = add i32 %start, 1
312 ; CHECK-NEXT: [[n_sgt_start_1:[^ ]+]] = icmp sgt i32 %n, [[start_1]]
313 ; CHECK-NEXT: [[smax:[^ ]+]] = select i1 [[n_sgt_start_1]], i32 %n, i32 [[start_1]]
314 ; CHECK-NEXT: [[max_index:[^ ]+]] = add i32 [[smax]], -1
315 ; CHECK-NEXT: [[upper_check:[^ ]+]] = icmp slt i32 [[max_index]], %length
316 ; CHECK-NEXT: br label %loop
317 br label %loop
318
319 loop:
320 ; CHECK: loop:
321 ; CHECK: [[wide_cond:[^ ]+]] = and i1 [[lower_check]], [[upper_check]]
322 ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
323 %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
324 %i = phi i32 [ %i.next, %loop ], [ %start, %loop.preheader ]
325 %within.bounds.1 = icmp slt i32 %i, %length
326 %within.bounds.2 = icmp sge i32 %i, 0
327 %within.bounds = and i1 %within.bounds.1, %within.bounds.2
328 call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
329
330 %i.i64 = zext i32 %i to i64
331 %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
332 %array.i = load i32, i32* %array.i.ptr, align 4
333 %loop.acc.next = add i32 %loop.acc, %array.i
334
335 %i.next = add nsw i32 %i, 1
336 %continue = icmp slt i32 %i.next, %n
337 br i1 %continue, label %loop, label %exit
338
339 exit:
340 %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
341 ret i32 %result
342 }
343
344424 define i32 @unsigned_loop_0_to_n_unrelated_condition(i32* %array, i32 %length, i32 %n, i32 %x) {
345425 ; CHECK-LABEL: @unsigned_loop_0_to_n_unrelated_condition
346426 entry:
349429
350430 loop.preheader:
351431 ; CHECK: loop.preheader:
352 ; CHECK: [[max_index:[^ ]+]] = add i32 %n, -1
353 ; CHECK-NEXT: [[wide_cond:[^ ]+]] = icmp ult i32 [[max_index]], %length
432 ; CHECK: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, %length
433 ; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp ule i32 %n, %length
434 ; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]]
354435 ; CHECK-NEXT: br label %loop
355436 br label %loop
356437
438519 loop:
439520 ; CHECK: loop:
440521 ; CHECK: %bound = add i32 %i, %x
441 ; CHECK-NEXT: %within.bounds = icmp slt i32 %i, %bound
522 ; CHECK-NEXT: %within.bounds = icmp ult i32 %i, %bound
442523 ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
443524 %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
444525 %i = phi i32 [ %i.next, %loop ], [ %start, %loop.preheader ]
445526 %bound = add i32 %i, %x
446 %within.bounds = icmp slt i32 %i, %bound
527 %within.bounds = icmp ult i32 %i, %bound
447528 call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
448529
449530 %i.i64 = zext i32 %i to i64
502583
503584 loop.preheader:
504585 ; CHECK: loop.preheader:
505 ; CHECK: [[max_index:[^ ]+]] = add i32 %n, -1
506 ; CHECK-NEXT: [[length:[^ ]+]] = zext i16 %length.i16 to i32
507 ; CHECK-NEXT: [[wide_cond:[^ ]+]] = icmp ult i32 [[max_index]], [[length]]
586 ; CHECK: [[length:[^ ]+]] = zext i16 %length.i16 to i32
587 ; CHECK-NEXT: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, [[length]]
588 ; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp ule i32 %n, [[length]]
589 ; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]]
508590 ; CHECK-NEXT: br label %loop
509591 br label %loop
510592
99 br i1 %tmp5, label %exit, label %outer.loop.preheader
1010
1111 outer.loop.preheader:
12 ; CHECK: outer.loop.preheader:
13 ; CHECK: [[iteration_count:[^ ]+]] = add i32 %l, -1
1412 br label %outer.loop
1513
1614 outer.loop:
2119
2220 inner.loop.preheader:
2321 ; CHECK: inner.loop.preheader:
24 ; CHECK: [[wide_cond:[^ ]+]] = icmp slt i32 [[iteration_count]], %length
22 ; CHECK: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, %length
23 ; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp sle i32 %l, %length
24 ; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]]
25 ; CHECK-NEXT: br label %inner.loop
2526 br label %inner.loop
2627
2728 inner.loop:
3031 %inner.loop.acc = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %inner.loop.preheader ]
3132 %j = phi i32 [ %j.next, %inner.loop ], [ 0, %inner.loop.preheader ]
3233
33 %within.bounds = icmp slt i32 %j, %length
34 %within.bounds = icmp ult i32 %j, %length
3435 call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
3536
3637 %j.i64 = zext i32 %j to i64
6162
6263 outer.loop.preheader:
6364 ; CHECK: outer.loop.preheader:
64 ; CHECK: [[iteration_count:[^ ]+]] = add i32 %n, -1
65 ; CHECK: [[wide_cond:[^ ]+]] = icmp slt i32 [[iteration_count]], %length
65 ; CHECK: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, %length
66 ; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp sle i32 %n, %length
67 ; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]]
68 ; CHECK-NEXT: br label %outer.loop
6669 br label %outer.loop
6770
6871 outer.loop:
8184 %inner.loop.acc = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %inner.loop.preheader ]
8285 %j = phi i32 [ %j.next, %inner.loop ], [ 0, %inner.loop.preheader ]
8386
84 %within.bounds = icmp slt i32 %i, %length
87 %within.bounds = icmp ult i32 %i, %length
8588 call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
8689
8790 %i.i64 = zext i32 %i to i64
111114 br i1 %tmp5, label %exit, label %outer.loop.preheader
112115
113116 outer.loop.preheader:
117 ; CHECK: outer.loop.preheader:
118 ; CHECK-NEXT: [[first_iteration_check_outer:[^ ]+]] = icmp ult i32 0, %length
119 ; CHECK-NEXT: [[limit_check_outer:[^ ]+]] = icmp sle i32 %n, %length
120 ; CHECK-NEXT: [[wide_cond_outer:[^ ]+]] = and i1 [[first_iteration_check_outer]], [[limit_check_outer]]
121 ; CHECK-NEXT: br label %outer.loop
114122 br label %outer.loop
115123
116124 outer.loop:
117125 ; CHECK: outer.loop:
118 ; CHECK: [[i_1:[^ ]+]] = add i32 %i, 1
119 ; CHECK-NEXT: [[l_sgt_i_1:[^ ]+]] = icmp sgt i32 %l, [[i_1]]
120 ; CHECK-NEXT: [[smax:[^ ]+]] = select i1 [[l_sgt_i_1]], i32 %l, i32 [[i_1]]
121 ; CHECK-NEXT: [[max_j:[^ ]+]] = add i32 [[smax]], -1
122126 %outer.loop.acc = phi i32 [ %outer.loop.acc.next, %outer.loop.inc ], [ 0, %outer.loop.preheader ]
123127 %i = phi i32 [ %i.next, %outer.loop.inc ], [ 0, %outer.loop.preheader ]
124128 %tmp6 = icmp sle i32 %l, 0
126130
127131 inner.loop.preheader:
128132 ; CHECK: inner.loop.preheader:
129 ; CHECK: [[wide_cond:[^ ]+]] = icmp slt i32 [[max_j]], %length
130 br label %inner.loop
131
132 inner.loop:
133 ; CHECK: inner.loop:
133 ; CHECK: [[limit_check_inner:[^ ]+]] = icmp sle i32 %l, %length
134 ; CHECK: br label %inner.loop
135 br label %inner.loop
136
137 inner.loop:
138 ; CHECK: inner.loop:
139 ; CHECK: [[wide_cond:[^ ]+]] = and i1 [[limit_check_inner]], [[wide_cond_outer]]
134140 ; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
135141 %inner.loop.acc = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %inner.loop.preheader ]
136142 %j = phi i32 [ %j.next, %inner.loop ], [ %i, %inner.loop.preheader ]
137143
138 %within.bounds = icmp slt i32 %j, %length
144 %within.bounds = icmp ult i32 %j, %length
139145 call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
140146
141147 %j.i64 = zext i32 %j to i64
156162 exit:
157163 %result = phi i32 [ 0, %entry ], [ %outer.loop.acc.next, %outer.loop.inc ]
158164 ret i32 %result
165 }
166
167 define i32 @cant_expand_guard_check_start(i32* %array, i32 %length, i32 %n, i32 %l, i32 %maybezero) {
168 ; CHECK-LABEL: @cant_expand_guard_check_start
169 entry:
170 %tmp5 = icmp sle i32 %n, 0
171 br i1 %tmp5, label %exit, label %outer.loop.preheader
172
173 outer.loop.preheader:
174 br label %outer.loop
175
176 outer.loop:
177 %outer.loop.acc = phi i32 [ %outer.loop.acc.next, %outer.loop.inc ], [ 0, %outer.loop.preheader ]
178 %i = phi i32 [ %i.next, %outer.loop.inc ], [ 0, %outer.loop.preheader ]
179 %tmp6 = icmp sle i32 %l, 0
180 %div = udiv i32 %i, %maybezero
181 br i1 %tmp6, label %outer.loop.inc, label %inner.loop.preheader
182
183 inner.loop.preheader:
184 ; CHECK: inner.loop.preheader:
185 ; CHECK: br label %inner.loop
186 br label %inner.loop
187
188 inner.loop:
189 ; CHECK: inner.loop:
190 ; CHECK: %within.bounds = icmp ult i32 %j, %length
191 ; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
192 %inner.loop.acc = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %inner.loop.preheader ]
193 %j = phi i32 [ %j.next, %inner.loop ], [ %div, %inner.loop.preheader ]
194
195 %within.bounds = icmp ult i32 %j, %length
196 call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
197
198 %j.i64 = zext i32 %j to i64
199 %array.j.ptr = getelementptr inbounds i32, i32* %array, i64 %j.i64
200 %array.j = load i32, i32* %array.j.ptr, align 4
201 %inner.loop.acc.next = add i32 %inner.loop.acc, %array.j
202
203 %j.next = add nsw i32 %j, 1
204 %inner.continue = icmp slt i32 %j.next, %l
205 br i1 %inner.continue, label %inner.loop, label %outer.loop.inc
206
207 outer.loop.inc:
208 %outer.loop.acc.next = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %outer.loop ]
209 %i.next = add nsw i32 %i, 1
210 %outer.continue = icmp slt i32 %i.next, %n
211 br i1 %outer.continue, label %outer.loop, label %exit
212
213 exit:
214 %result = phi i32 [ 0, %entry ], [ %outer.loop.acc.next, %outer.loop.inc ]
215 ret i32 %result
159216 }
1010
1111 loop.preheader:
1212 ; CHECK: loop.preheader:
13 ; CHECK: [[iteration_count:[^ ]+]] = add i32 %n, -1
14 ; CHECK-NEXT: [[wide_cond:[^ ]+]] = icmp ult i32 [[iteration_count]], %length
13 ; CHECK: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, %length
14 ; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp ule i32 %n, %length
15 ; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]]
1516 ; CHECK-NEXT: br label %loop
1617 br label %loop
1718