llvm.org GIT mirror llvm / 91a90a2
[Loop Predication] Teach LP about reverse loops with uge and sge latch conditions Add support of uge and sge latch condition to Loop Prediction for reverse loops. Reviewers: apilipenko, mkazantsev, sanjoy, anna Reviewed By: anna Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D42837 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@324589 91177308-0d34-0410-b5e6-96231b3b80d8 Serguei Katkov 1 year, 7 months ago
2 changed file(s) with 113 addition(s) and 5 deletion(s). Raw diff Collapse all Expand all
154154 // When S = -1 (i.e. reverse iterating loop), the transformation is supported
155155 // when:
156156 // * The loop has a single latch with the condition of the form:
157 // B(X) = X latchLimit, where is u> or s>.
157 // B(X) = X latchLimit, where is u>, u>=, s>, or s>=.
158158 // * The guard condition is of the form
159159 // G(X) = X - 1 u< guardLimit
160160 //
170170 // guardStart u< guardLimit && latchLimit u>= 1.
171171 // Similarly for sgt condition the widened condition is:
172172 // guardStart u< guardLimit && latchLimit s>= 1.
173 // For uge condition the widened condition is:
174 // guardStart u< guardLimit && latchLimit u> 1.
175 // For sge condition the widened condition is:
176 // guardStart u< guardLimit && latchLimit s> 1.
173177 //===----------------------------------------------------------------------===//
174178
175179 #include "llvm/Transforms/Scalar/LoopPredication.h"
484488 // latchLimit 1.
485489 // See the header comment for reasoning of the checks.
486490 Instruction *InsertAt = Preheader->getTerminator();
487 auto LimitCheckPred = ICmpInst::isSigned(LatchCheck.Pred)
488 ? ICmpInst::ICMP_SGE
489 : ICmpInst::ICMP_UGE;
491 auto LimitCheckPred = getLatchPredicateForGuard(LatchCheck.Pred);
490492 auto *FirstIterationCheck = expandCheck(Expander, Builder, ICmpInst::ICMP_ULT,
491493 GuardStart, GuardLimit, InsertAt);
492494 auto *LimitCheck = expandCheck(Expander, Builder, LimitCheckPred, LatchLimit,
670672 Pred != ICmpInst::ICMP_ULE && Pred != ICmpInst::ICMP_SLE;
671673 } else {
672674 assert(Step->isAllOnesValue() && "Step should be -1!");
673 return Pred != ICmpInst::ICMP_UGT && Pred != ICmpInst::ICMP_SGT;
675 return Pred != ICmpInst::ICMP_UGT && Pred != ICmpInst::ICMP_SGT &&
676 Pred != ICmpInst::ICMP_UGE && Pred != ICmpInst::ICMP_SGE;
674677 }
675678 };
676679
137137 %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
138138 ret i32 %result
139139 }
140
141 define i32 @signed_reverse_loop_n_to_lower_limit_equal(i32* %array, i32 %length, i32 %n, i32 %lowerlimit) {
142 ; CHECK-LABEL: @signed_reverse_loop_n_to_lower_limit_equal(
143 entry:
144 %tmp5 = icmp eq i32 %n, 0
145 br i1 %tmp5, label %exit, label %loop.preheader
146
147 ; CHECK: loop.preheader:
148 ; CHECK-NEXT: [[range_start:%.*]] = add i32 %n, -1
149 ; CHECK-NEXT: [[first_iteration_check:%.*]] = icmp ult i32 [[range_start]], %length
150 ; CHECK-NEXT: [[no_wrap_check:%.*]] = icmp sgt i32 %lowerlimit, 1
151 ; CHECK-NEXT: [[wide_cond:%.*]] = and i1 [[first_iteration_check]], [[no_wrap_check]]
152 loop.preheader:
153 br label %loop
154
155 ; CHECK: loop:
156 ; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
157 loop:
158 %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
159 %i = phi i32 [ %i.next, %loop ], [ %n, %loop.preheader ]
160 %i.next = add nsw i32 %i, -1
161 %within.bounds = icmp ult i32 %i.next, %length
162 call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
163 %i.i64 = zext i32 %i.next to i64
164 %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
165 %array.i = load i32, i32* %array.i.ptr, align 4
166 %loop.acc.next = add i32 %loop.acc, %array.i
167 %continue = icmp sge i32 %i, %lowerlimit
168 br i1 %continue, label %loop, label %exit
169
170 exit:
171 %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
172 ret i32 %result
173 }
174
175 define i32 @unsigned_reverse_loop_n_to_lower_limit_equal(i32* %array, i32 %length, i32 %n, i32 %lowerlimit) {
176 ; CHECK-LABEL: @unsigned_reverse_loop_n_to_lower_limit_equal(
177 entry:
178 %tmp5 = icmp eq i32 %n, 0
179 br i1 %tmp5, label %exit, label %loop.preheader
180
181 ; CHECK: loop.preheader:
182 ; CHECK-NEXT: [[range_start:%.*]] = add i32 %n, -1
183 ; CHECK-NEXT: [[first_iteration_check:%.*]] = icmp ult i32 [[range_start]], %length
184 ; CHECK-NEXT: [[no_wrap_check:%.*]] = icmp ugt i32 %lowerlimit, 1
185 ; CHECK-NEXT: [[wide_cond:%.*]] = and i1 [[first_iteration_check]], [[no_wrap_check]]
186 loop.preheader:
187 br label %loop
188
189 ; CHECK: loop:
190 ; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
191 loop:
192 %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
193 %i = phi i32 [ %i.next, %loop ], [ %n, %loop.preheader ]
194 %i.next = add nsw i32 %i, -1
195 %within.bounds = icmp ult i32 %i.next, %length
196 call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
197 %i.i64 = zext i32 %i.next to i64
198 %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
199 %array.i = load i32, i32* %array.i.ptr, align 4
200 %loop.acc.next = add i32 %loop.acc, %array.i
201 %continue = icmp uge i32 %i, %lowerlimit
202 br i1 %continue, label %loop, label %exit
203
204 exit:
205 %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
206 ret i32 %result
207 }
208
209
210 ; if we predicated the loop, the guard will definitely fail and we will
211 ; deoptimize early on.
212 define i32 @unsigned_reverse_loop_n_to_1(i32* %array, i32 %length, i32 %n, i32 %lowerlimit) {
213 ; CHECK-LABEL: @unsigned_reverse_loop_n_to_1(
214 entry:
215 %tmp5 = icmp eq i32 %n, 0
216 br i1 %tmp5, label %exit, label %loop.preheader
217
218 ; CHECK: loop.preheader:
219 ; CHECK-NEXT: [[range_start:%.*]] = add i32 %n, -1
220 ; CHECK-NEXT: [[first_iteration_check:%.*]] = icmp ult i32 [[range_start]], %length
221 ; CHECK-NEXT: [[wide_cond:%.*]] = and i1 [[first_iteration_check]], false
222 loop.preheader:
223 br label %loop
224
225 ; CHECK: loop:
226 ; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
227 loop:
228 %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
229 %i = phi i32 [ %i.next, %loop ], [ %n, %loop.preheader ]
230 %i.next = add nsw i32 %i, -1
231 %within.bounds = icmp ult i32 %i.next, %length
232 call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
233 %i.i64 = zext i32 %i.next to i64
234 %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
235 %array.i = load i32, i32* %array.i.ptr, align 4
236 %loop.acc.next = add i32 %loop.acc, %array.i
237 %continue = icmp uge i32 %i, 1
238 br i1 %continue, label %loop, label %exit
239
240 exit:
241 %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
242 ret i32 %result
243 }
244