llvm.org GIT mirror llvm / 946dd32
[LoopPredication] Support ule, sle latch predicates This is a follow up for the loop predication change 313981 to support ule, sle latch predicates. Reviewed By: mkazantsev Differential Revision: https://reviews.llvm.org/D38177 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@315616 91177308-0d34-0410-b5e6-96231b3b80d8 Artur Pilipenko 1 year, 11 months ago
2 changed file(s) with 144 addition(s) and 21 deletion(s). Raw diff Collapse all Expand all
9999 // implies M.
100100 //
101101 // For now the transformation is limited to the following case:
102 // * The loop has a single latch with either ult or slt icmp condition.
102 // * The loop has a single latch with the condition of the form:
103 // ++i latchLimit, where is u<, u<=, s<, or s<=.
103104 // * The step of the IV used in the latch condition is 1.
104105 // * The IV of the latch condition is the same as the post increment IV of the
105106 // guard condition.
106 // * The guard condition is ult.
107 //
108 // In this case the latch is of the from:
109 // ++i u< latchLimit or ++i s< latchLimit
110 // and the guard is of the form:
111 // i u< guardLimit
112 //
113 // For the unsigned latch comparison case M is:
107 // * The guard condition is
108 // i u< guardLimit.
109 //
110 // For the ult latch comparison case M is:
114111 // forall X . X u< guardLimit && (X + 1) u< latchLimit =>
115112 // (X + 1) u< guardLimit
116113 //
117114 // This is true if latchLimit u<= guardLimit since then
118115 // (X + 1) u< latchLimit u<= guardLimit == (X + 1) u< guardLimit.
119116 //
120 // So the widened condition is:
117 // So for ult condition the widened condition is:
121118 // i.start u< guardLimit && latchLimit u<= guardLimit
119 // Similarly for ule condition the widened condition is:
120 // i.start u< guardLimit && latchLimit u< guardLimit
122121 //
123122 // For the signed latch comparison case M is:
124123 // forall X . X u< guardLimit && (X + 1) s< latchLimit =>
146145 //
147146 // So the widened condition is:
148147 // i.start u< guardLimit && latchLimit s<= guardLimit
148 // Similarly for sle condition the widened condition is:
149 // i.start u< guardLimit && latchLimit s< guardLimit
149150 //
150151 //===----------------------------------------------------------------------===//
151152
302303 DEBUG(ICI->dump());
303304
304305 // parseLoopStructure guarantees that the latch condition is:
305 // ++i u< latchLimit or ++i s< latchLimit
306 // ++i latchLimit, where is u<, u<=, s<, or s<=.
306307 // We are looking for the range checks of the form:
307308 // i u< guardLimit
308309 auto RangeCheck = parseLoopICmp(ICI);
326327 assert(RangeCheckIV->getStepRecurrence(*SE)->isOne() && "must be one");
327328 const SCEV *Start = RangeCheckIV->getStart();
328329
329 // Generate the widened condition. See the file header comment for reasoning.
330 // If the latch condition is unsigned:
331 // i.start u< guardLimit && latchLimit u<= guardLimit
332 // If the latch condition is signed:
333 // i.start u< guardLimit && latchLimit s<= guardLimit
334
335 auto LimitCheckPred = ICmpInst::isSigned(LatchCheck.Pred)
336 ? ICmpInst::ICMP_SLE
337 : ICmpInst::ICMP_ULE;
330 // Generate the widened condition:
331 // i.start u< guardLimit && latchLimit guardLimit
332 // where depends on the latch condition predicate. See the file
333 // header comment for the reasoning.
334 ICmpInst::Predicate LimitCheckPred;
335 switch (LatchCheck.Pred) {
336 case ICmpInst::ICMP_ULT:
337 LimitCheckPred = ICmpInst::ICMP_ULE;
338 break;
339 case ICmpInst::ICMP_ULE:
340 LimitCheckPred = ICmpInst::ICMP_ULT;
341 break;
342 case ICmpInst::ICMP_SLT:
343 LimitCheckPred = ICmpInst::ICMP_SLE;
344 break;
345 case ICmpInst::ICMP_SLE:
346 LimitCheckPred = ICmpInst::ICMP_SLT;
347 break;
348 default:
349 llvm_unreachable("Unsupported loop latch!");
350 }
338351
339352 auto CanExpand = [this](const SCEV *S) {
340353 return SE->isLoopInvariant(S, L) && isSafeToExpand(S, *SE);
442455 }
443456
444457 if (Result->Pred != ICmpInst::ICMP_ULT &&
445 Result->Pred != ICmpInst::ICMP_SLT) {
458 Result->Pred != ICmpInst::ICMP_SLT &&
459 Result->Pred != ICmpInst::ICMP_ULE &&
460 Result->Pred != ICmpInst::ICMP_SLE) {
446461 DEBUG(dbgs() << "Unsupported loop latch predicate(" << Result->Pred
447462 << ")!\n");
448463 return None;
3838 ret i32 %result
3939 }
4040
41 define i32 @unsigned_loop_0_to_n_ule_latch_ult_check(i32* %array, i32 %length, i32 %n) {
42 ; CHECK-LABEL: @unsigned_loop_0_to_n_ule_latch_ult_check
43 entry:
44 %tmp5 = icmp eq i32 %n, 0
45 br i1 %tmp5, label %exit, label %loop.preheader
46
47 loop.preheader:
48 ; CHECK: loop.preheader:
49 ; CHECK: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, %length
50 ; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp ult i32 %n, %length
51 ; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]]
52 ; CHECK-NEXT: br label %loop
53 br label %loop
54
55 loop:
56 ; CHECK: loop:
57 ; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
58 %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
59 %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ]
60 %within.bounds = icmp ult i32 %i, %length
61 call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
62
63 %i.i64 = zext i32 %i to i64
64 %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
65 %array.i = load i32, i32* %array.i.ptr, align 4
66 %loop.acc.next = add i32 %loop.acc, %array.i
67
68 %i.next = add nuw i32 %i, 1
69 %continue = icmp ule i32 %i.next, %n
70 br i1 %continue, label %loop, label %exit
71
72 exit:
73 %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
74 ret i32 %result
75 }
76
4177 define i32 @unsigned_loop_0_to_n_ugt_check(i32* %array, i32 %length, i32 %n) {
4278 ; CHECK-LABEL: @unsigned_loop_0_to_n_ugt_check
4379 entry:
103139
104140 %i.next = add nuw i32 %i, 1
105141 %continue = icmp slt i32 %i.next, %n
142 br i1 %continue, label %loop, label %exit
143
144 exit:
145 %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
146 ret i32 %result
147 }
148
149 define i32 @signed_loop_0_to_n_inverse_latch_predicate(i32* %array, i32 %length, i32 %n) {
150 ; CHECK-LABEL: @signed_loop_0_to_n_inverse_latch_predicate
151 entry:
152 %tmp5 = icmp sle i32 %n, 0
153 br i1 %tmp5, label %exit, label %loop.preheader
154
155 loop.preheader:
156 ; CHECK: loop.preheader:
157 ; CHECK: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, %length
158 ; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp slt i32 %n, %length
159 ; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]]
160 ; CHECK-NEXT: br label %loop
161 br label %loop
162
163 loop:
164 ; CHECK: loop:
165 ; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
166 %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
167 %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ]
168 %within.bounds = icmp ult i32 %i, %length
169 call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
170
171 %i.i64 = zext i32 %i to i64
172 %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
173 %array.i = load i32, i32* %array.i.ptr, align 4
174 %loop.acc.next = add i32 %loop.acc, %array.i
175
176 %i.next = add nuw i32 %i, 1
177 %continue = icmp sgt i32 %i.next, %n
178 br i1 %continue, label %exit, label %loop
179
180 exit:
181 %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
182 ret i32 %result
183 }
184
185 define i32 @signed_loop_0_to_n_sle_latch_ult_check(i32* %array, i32 %length, i32 %n) {
186 ; CHECK-LABEL: @signed_loop_0_to_n_sle_latch_ult_check
187 entry:
188 %tmp5 = icmp sle i32 %n, 0
189 br i1 %tmp5, label %exit, label %loop.preheader
190
191 loop.preheader:
192 ; CHECK: loop.preheader:
193 ; CHECK: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, %length
194 ; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp slt i32 %n, %length
195 ; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]]
196 ; CHECK-NEXT: br label %loop
197 br label %loop
198
199 loop:
200 ; CHECK: loop:
201 ; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
202 %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
203 %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ]
204 %within.bounds = icmp ult i32 %i, %length
205 call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
206
207 %i.i64 = zext i32 %i to i64
208 %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
209 %array.i = load i32, i32* %array.i.ptr, align 4
210 %loop.acc.next = add i32 %loop.acc, %array.i
211
212 %i.next = add nuw i32 %i, 1
213 %continue = icmp sle i32 %i.next, %n
106214 br i1 %continue, label %loop, label %exit
107215
108216 exit: