llvm.org GIT mirror llvm / 7c3eddc
[Loop Predication] Teach LP about reverse loops Summary: Currently, we only support predication for forward loops with step of 1. This patch enables loop predication for reverse or countdownLoops, which satisfy the following conditions: 1. The step of the IV is -1. 2. The loop has a singe latch as B(X) = X <pred> latchLimit with pred as s> or u> 3. The IV of the guard is the decrement IV of the latch condition (Guard is: G(X) = X-1 u< guardLimit). This patch was downstream for a while and is the last series of patches that's from our LP implementation downstream. Reviewers: apilipenko, mkazantsev, sanjoy Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D40353 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@319659 91177308-0d34-0410-b5e6-96231b3b80d8 Anna Thomas 1 year, 9 months ago
2 changed file(s) with 277 addition(s) and 60 deletion(s). Raw diff Collapse all Expand all
9797 // Note that we can use anything stronger than M, i.e. any condition which
9898 // implies M.
9999 //
100 // For now the transformation is limited to the following case:
100 // When S = 1 (i.e. forward iterating loop), the transformation is supported
101 // when:
101102 // * The loop has a single latch with the condition of the form:
102103 // B(X) = latchStart + X latchLimit,
103104 // where is u<, u<=, s<, or s<=.
104 // * The step of the IV used in the latch condition is 1.
105105 // * The guard condition is of the form
106106 // G(X) = guardStart + X u< guardLimit
107107 //
108 // For the ult latch comparison case M is:
109 // forall X . guardStart + X u< guardLimit && latchStart + X
110 // guardStart + X + 1 u< guardLimit
111 //
112 // The only way the antecedent can be true and the consequent can be false is
113 // if
114 // X == guardLimit - 1 - guardStart
115 // (and guardLimit is non-zero, but we won't use this latter fact).
116 // If X == guardLimit - 1 - guardStart then the second half of the antecedent is
117 // latchStart + guardLimit - 1 - guardStart u< latchLimit
118 // and its negation is
119 // latchStart + guardLimit - 1 - guardStart u>= latchLimit
120 //
121 // In other words, if
122 // latchLimit u<= latchStart + guardLimit - 1 - guardStart
123 // then:
124 // (the ranges below are written in ConstantRange notation, where [A, B) is the
125 // set for (I = A; I != B; I++ /*maywrap*/) yield(I);)
126 //
127 // forall X . guardStart + X u< guardLimit &&
128 // latchStart + X u< latchLimit =>
129 // guardStart + X + 1 u< guardLimit
130 // == forall X . guardStart + X u< guardLimit &&
131 // latchStart + X u< latchStart + guardLimit - 1 - guardStart =>
132 // guardStart + X + 1 u< guardLimit
133 // == forall X . (guardStart + X) in [0, guardLimit) &&
134 // (latchStart + X) in [0, latchStart + guardLimit - 1 - guardStart) =>
135 // (guardStart + X + 1) in [0, guardLimit)
136 // == forall X . X in [-guardStart, guardLimit - guardStart) &&
137 // X in [-latchStart, guardLimit - 1 - guardStart) =>
138 // X in [-guardStart - 1, guardLimit - guardStart - 1)
139 // == true
140 //
141 // So the widened condition is:
142 // guardStart u< guardLimit &&
143 // latchStart + guardLimit - 1 - guardStart u>= latchLimit
144 // Similarly for ule condition the widened condition is:
145 // guardStart u< guardLimit &&
146 // latchStart + guardLimit - 1 - guardStart u> latchLimit
147 // For slt condition the widened condition is:
148 // guardStart u< guardLimit &&
149 // latchStart + guardLimit - 1 - guardStart s>= latchLimit
150 // For sle condition the widened condition is:
151 // guardStart u< guardLimit &&
152 // latchStart + guardLimit - 1 - guardStart s> latchLimit
153 //
108 // For the ult latch comparison case M is:
109 // forall X . guardStart + X u< guardLimit && latchStart + X
110 // guardStart + X + 1 u< guardLimit
111 //
112 // The only way the antecedent can be true and the consequent can be false is
113 // if
114 // X == guardLimit - 1 - guardStart
115 // (and guardLimit is non-zero, but we won't use this latter fact).
116 // If X == guardLimit - 1 - guardStart then the second half of the antecedent is
117 // latchStart + guardLimit - 1 - guardStart u< latchLimit
118 // and its negation is
119 // latchStart + guardLimit - 1 - guardStart u>= latchLimit
120 //
121 // In other words, if
122 // latchLimit u<= latchStart + guardLimit - 1 - guardStart
123 // then:
124 // (the ranges below are written in ConstantRange notation, where [A, B) is the
125 // set for (I = A; I != B; I++ /*maywrap*/) yield(I);)
126 //
127 // forall X . guardStart + X u< guardLimit &&
128 // latchStart + X u< latchLimit =>
129 // guardStart + X + 1 u< guardLimit
130 // == forall X . guardStart + X u< guardLimit &&
131 // latchStart + X u< latchStart + guardLimit - 1 - guardStart =>
132 // guardStart + X + 1 u< guardLimit
133 // == forall X . (guardStart + X) in [0, guardLimit) &&
134 // (latchStart + X) in [0, latchStart + guardLimit - 1 - guardStart) =>
135 // (guardStart + X + 1) in [0, guardLimit)
136 // == forall X . X in [-guardStart, guardLimit - guardStart) &&
137 // X in [-latchStart, guardLimit - 1 - guardStart) =>
138 // X in [-guardStart - 1, guardLimit - guardStart - 1)
139 // == true
140 //
141 // So the widened condition is:
142 // guardStart u< guardLimit &&
143 // latchStart + guardLimit - 1 - guardStart u>= latchLimit
144 // Similarly for ule condition the widened condition is:
145 // guardStart u< guardLimit &&
146 // latchStart + guardLimit - 1 - guardStart u> latchLimit
147 // For slt condition the widened condition is:
148 // guardStart u< guardLimit &&
149 // latchStart + guardLimit - 1 - guardStart s>= latchLimit
150 // For sle condition the widened condition is:
151 // guardStart u< guardLimit &&
152 // latchStart + guardLimit - 1 - guardStart s> latchLimit
153 //
154 // When S = -1 (i.e. reverse iterating loop), the transformation is supported
155 // when:
156 // * The loop has a single latch with the condition of the form:
157 // B(X) = X latchLimit, where is u> or s>.
158 // * The guard condition is of the form
159 // G(X) = X - 1 u< guardLimit
160 //
161 // For the ugt latch comparison case M is:
162 // forall X. X-1 u< guardLimit and X u> latchLimit => X-2 u< guardLimit
163 //
164 // The only way the antecedent can be true and the consequent can be false is if
165 // X == 1.
166 // If X == 1 then the second half of the antecedent is
167 // 1 u> latchLimit, and its negation is latchLimit u>= 1.
168 //
169 // So the widened condition is:
170 // guardStart u< guardLimit && latchLimit u>= 1.
171 // Similarly for sgt condition the widened condition is:
172 // guardStart u< guardLimit && latchLimit s>= 1.
154173 //===----------------------------------------------------------------------===//
155174
156175 #include "llvm/Transforms/Scalar/LoopPredication.h"
176195 static cl::opt EnableIVTruncation("loop-predication-enable-iv-truncation",
177196 cl::Hidden, cl::init(true));
178197
198 static cl::opt EnableCountDownLoop("loop-predication-enable-count-down-loop",
199 cl::Hidden, cl::init(true));
179200 namespace {
180201 class LoopPredication {
181202 /// Represents an induction variable check:
222243 LoopICmp RangeCheck,
223244 SCEVExpander &Expander,
224245 IRBuilder<> &Builder);
225
246 Optional widenICmpRangeCheckDecrementingLoop(LoopICmp LatchCheck,
247 LoopICmp RangeCheck,
248 SCEVExpander &Expander,
249 IRBuilder<> &Builder);
226250 bool widenGuardConditions(IntrinsicInst *II, SCEVExpander &Expander);
227251
228252 // When the IV type is wider than the range operand type, we can still do loop
359383 }
360384
361385 bool LoopPredication::isSupportedStep(const SCEV* Step) {
362 return Step->isOne();
386 return Step->isOne() || (Step->isAllOnesValue() && EnableCountDownLoop);
363387 }
364388
365389 bool LoopPredication::CanExpand(const SCEV* S) {
419443 GuardStart, GuardLimit, InsertAt);
420444 return Builder.CreateAnd(FirstIterationCheck, LimitCheck);
421445 }
446
447 Optional LoopPredication::widenICmpRangeCheckDecrementingLoop(
448 LoopPredication::LoopICmp LatchCheck, LoopPredication::LoopICmp RangeCheck,
449 SCEVExpander &Expander, IRBuilder<> &Builder) {
450 auto *Ty = RangeCheck.IV->getType();
451 const SCEV *GuardStart = RangeCheck.IV->getStart();
452 const SCEV *GuardLimit = RangeCheck.Limit;
453 const SCEV *LatchLimit = LatchCheck.Limit;
454 if (!CanExpand(GuardStart) || !CanExpand(GuardLimit) ||
455 !CanExpand(LatchLimit)) {
456 DEBUG(dbgs() << "Can't expand limit check!\n");
457 return None;
458 }
459 // The decrement of the latch check IV should be the same as the
460 // rangeCheckIV.
461 auto *PostDecLatchCheckIV = LatchCheck.IV->getPostIncExpr(*SE);
462 if (RangeCheck.IV != PostDecLatchCheckIV) {
463 DEBUG(dbgs() << "Not the same. PostDecLatchCheckIV: "
464 << *PostDecLatchCheckIV
465 << " and RangeCheckIV: " << *RangeCheck.IV << "\n");
466 return None;
467 }
468
469 // Generate the widened condition for CountDownLoop:
470 // guardStart u< guardLimit &&
471 // latchLimit 1.
472 // See the header comment for reasoning of the checks.
473 Instruction *InsertAt = Preheader->getTerminator();
474 auto LimitCheckPred = ICmpInst::isSigned(LatchCheck.Pred)
475 ? ICmpInst::ICMP_SGE
476 : ICmpInst::ICMP_UGE;
477 auto *FirstIterationCheck = expandCheck(Expander, Builder, ICmpInst::ICMP_ULT,
478 GuardStart, GuardLimit, InsertAt);
479 auto *LimitCheck = expandCheck(Expander, Builder, LimitCheckPred, LatchLimit,
480 SE->getOne(Ty), InsertAt);
481 return Builder.CreateAnd(FirstIterationCheck, LimitCheck);
482 }
483
422484 /// If ICI can be widened to a loop invariant condition emits the loop
423485 /// invariant condition in the loop preheader and return it, otherwise
424486 /// returns None.
466528 }
467529
468530 LoopICmp CurrLatchCheck = *CurrLatchCheckOpt;
469 // At this point the range check step and latch step should have the same
470 // value and type.
471 assert(Step == CurrLatchCheck.IV->getStepRecurrence(*SE) &&
472 "Range and latch should have same step recurrence!");
473
474 return widenICmpRangeCheckIncrementingLoop(CurrLatchCheck, *RangeCheck,
475 Expander, Builder);
531 // At this point, the range and latch step should have the same type, but need
532 // not have the same value (we support both 1 and -1 steps).
533 assert(Step->getType() ==
534 CurrLatchCheck.IV->getStepRecurrence(*SE)->getType() &&
535 "Range and latch steps should be of same type!");
536 if (Step != CurrLatchCheck.IV->getStepRecurrence(*SE)) {
537 DEBUG(dbgs() << "Range and latch have different step values!\n");
538 return None;
539 }
540
541 if (Step->isOne())
542 return widenICmpRangeCheckIncrementingLoop(CurrLatchCheck, *RangeCheck,
543 Expander, Builder);
544 else {
545 assert(Step->isAllOnesValue() && "Step should be -1!");
546 return widenICmpRangeCheckDecrementingLoop(CurrLatchCheck, *RangeCheck,
547 Expander, Builder);
548 }
476549 }
477550
478551 bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard,
579652 }
580653
581654 auto IsUnsupportedPredicate = [](const SCEV *Step, ICmpInst::Predicate Pred) {
582 assert(Step->isOne() && "expected Step to be one!");
583 return Pred != ICmpInst::ICMP_ULT && Pred != ICmpInst::ICMP_SLT &&
584 Pred != ICmpInst::ICMP_ULE && Pred != ICmpInst::ICMP_SLE;
655 if (Step->isOne()) {
656 return Pred != ICmpInst::ICMP_ULT && Pred != ICmpInst::ICMP_SLT &&
657 Pred != ICmpInst::ICMP_ULE && Pred != ICmpInst::ICMP_SLE;
658 } else {
659 assert(Step->isAllOnesValue() && "Step should be -1!");
660 return Pred != ICmpInst::ICMP_UGT && Pred != ICmpInst::ICMP_SGT;
661 }
585662 };
586663
587664 if (IsUnsupportedPredicate(Step, Result->Pred)) {
0 ; RUN: opt -S -loop-predication -loop-predication-enable-count-down-loop=true < %s 2>&1 | FileCheck %s
1 ; RUN: opt -S -passes='require,loop(loop-predication)' -loop-predication-enable-count-down-loop=true < %s 2>&1 | FileCheck %s
2
3 declare void @llvm.experimental.guard(i1, ...)
4
5 define i32 @signed_reverse_loop_n_to_lower_limit(i32* %array, i32 %length, i32 %n, i32 %lowerlimit) {
6 ; CHECK-LABEL: @signed_reverse_loop_n_to_lower_limit(
7 entry:
8 %tmp5 = icmp eq i32 %n, 0
9 br i1 %tmp5, label %exit, label %loop.preheader
10
11 ; CHECK: loop.preheader:
12 ; CHECK-NEXT: [[range_start:%.*]] = add i32 %n, -1
13 ; CHECK-NEXT: [[first_iteration_check:%.*]] = icmp ult i32 [[range_start]], %length
14 ; CHECK-NEXT: [[no_wrap_check:%.*]] = icmp sge i32 %lowerlimit, 1
15 ; CHECK-NEXT: [[wide_cond:%.*]] = and i1 [[first_iteration_check]], [[no_wrap_check]]
16 loop.preheader:
17 br label %loop
18
19 ; CHECK: loop:
20 ; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
21 loop:
22 %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
23 %i = phi i32 [ %i.next, %loop ], [ %n, %loop.preheader ]
24 %i.next = add nsw i32 %i, -1
25 %within.bounds = icmp ult i32 %i.next, %length
26 call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
27 %i.i64 = zext i32 %i.next to i64
28 %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
29 %array.i = load i32, i32* %array.i.ptr, align 4
30 %loop.acc.next = add i32 %loop.acc, %array.i
31 %continue = icmp sgt i32 %i, %lowerlimit
32 br i1 %continue, label %loop, label %exit
33
34 exit:
35 %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
36 ret i32 %result
37 }
38
39 define i32 @unsigned_reverse_loop_n_to_lower_limit(i32* %array, i32 %length, i32 %n, i32 %lowerlimit) {
40 ; CHECK-LABEL: @unsigned_reverse_loop_n_to_lower_limit(
41 entry:
42 %tmp5 = icmp eq i32 %n, 0
43 br i1 %tmp5, label %exit, label %loop.preheader
44
45 ; CHECK: loop.preheader:
46 ; CHECK-NEXT: [[range_start:%.*]] = add i32 %n, -1
47 ; CHECK-NEXT: [[first_iteration_check:%.*]] = icmp ult i32 [[range_start]], %length
48 ; CHECK-NEXT: [[no_wrap_check:%.*]] = icmp uge i32 %lowerlimit, 1
49 ; CHECK-NEXT: [[wide_cond:%.*]] = and i1 [[first_iteration_check]], [[no_wrap_check]]
50 loop.preheader:
51 br label %loop
52
53 ; CHECK: loop:
54 ; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
55 loop:
56 %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
57 %i = phi i32 [ %i.next, %loop ], [ %n, %loop.preheader ]
58 %i.next = add nsw i32 %i, -1
59 %within.bounds = icmp ult i32 %i.next, %length
60 call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
61 %i.i64 = zext i32 %i.next to i64
62 %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
63 %array.i = load i32, i32* %array.i.ptr, align 4
64 %loop.acc.next = add i32 %loop.acc, %array.i
65 %continue = icmp ugt i32 %i, %lowerlimit
66 br i1 %continue, label %loop, label %exit
67
68 exit:
69 %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
70 ret i32 %result
71 }
72
73
74 ; if we predicated the loop, the guard will definitely fail and we will
75 ; deoptimize early on.
76 define i32 @unsigned_reverse_loop_n_to_0(i32* %array, i32 %length, i32 %n, i32 %lowerlimit) {
77 ; CHECK-LABEL: @unsigned_reverse_loop_n_to_0(
78 entry:
79 %tmp5 = icmp eq i32 %n, 0
80 br i1 %tmp5, label %exit, label %loop.preheader
81
82 ; CHECK: loop.preheader:
83 ; CHECK-NEXT: [[range_start:%.*]] = add i32 %n, -1
84 ; CHECK-NEXT: [[first_iteration_check:%.*]] = icmp ult i32 [[range_start]], %length
85 ; CHECK-NEXT: [[wide_cond:%.*]] = and i1 [[first_iteration_check]], false
86 loop.preheader:
87 br label %loop
88
89 ; CHECK: loop:
90 ; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
91 loop:
92 %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
93 %i = phi i32 [ %i.next, %loop ], [ %n, %loop.preheader ]
94 %i.next = add nsw i32 %i, -1
95 %within.bounds = icmp ult i32 %i.next, %length
96 call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
97 %i.i64 = zext i32 %i.next to i64
98 %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
99 %array.i = load i32, i32* %array.i.ptr, align 4
100 %loop.acc.next = add i32 %loop.acc, %array.i
101 %continue = icmp ugt i32 %i, 0
102 br i1 %continue, label %loop, label %exit
103
104 exit:
105 %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
106 ret i32 %result
107 }
108
109 ; do not loop predicate when the range has step -1 and latch has step 1.
110 define i32 @reverse_loop_range_step_increment(i32 %n, i32* %array, i32 %length) {
111 ; CHECK-LABEL: @reverse_loop_range_step_increment(
112 entry:
113 %tmp5 = icmp eq i32 %n, 0
114 br i1 %tmp5, label %exit, label %loop.preheader
115
116 loop.preheader:
117 br label %loop
118
119 ; CHECK: loop:
120 ; CHECK: llvm.experimental.guard(i1 %within.bounds, i32 9)
121 loop:
122 %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
123 %i = phi i32 [ %i.next, %loop ], [ %n, %loop.preheader ]
124 %irc = phi i32 [ %i.inc, %loop ], [ 1, %loop.preheader ]
125 %i.inc = add nuw nsw i32 %irc, 1
126 %within.bounds = icmp ult i32 %irc, %length
127 call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
128 %i.i64 = zext i32 %irc to i64
129 %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
130 %array.i = load i32, i32* %array.i.ptr, align 4
131 %i.next = add nsw i32 %i, -1
132 %loop.acc.next = add i32 %loop.acc, %array.i
133 %continue = icmp ugt i32 %i, 65534
134 br i1 %continue, label %loop, label %exit
135
136 exit:
137 %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
138 ret i32 %result
139 }