llvm.org GIT mirror llvm / b69a2a9
[LoopPredication] Enable predication when latchCheckIV is wider than rangeCheck Summary: This patch allows us to predicate range checks that have a type narrower than the latch check type. We leverage SCEV analysis to identify a truncate for the latchLimit and latchStart. There is also safety checks in place which requires the start and limit to be known at compile time. We require this to make sure that the SCEV truncate expr for the IV corresponding to the latch does not cause us to lose information about the IV range. Added tests show the loop predication over range checks that are of various types and are narrower than the latch type. This enhancement has been in our downstream tree for a while. Reviewers: apilipenko, sanjoy, mkazantsev Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D39500 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@317269 91177308-0d34-0410-b5e6-96231b3b80d8 Anna Thomas 1 year, 10 months ago
2 changed file(s) with 235 addition(s) and 11 deletion(s). Raw diff Collapse all Expand all
173173
174174 using namespace llvm;
175175
176 static cl::opt EnableIVTruncation("loop-predication-enable-iv-truncation",
177 cl::Hidden, cl::init(true));
178
176179 namespace {
177180 class LoopPredication {
178181 /// Represents an induction variable check:
211214 IRBuilder<> &Builder);
212215 bool widenGuardConditions(IntrinsicInst *II, SCEVExpander &Expander);
213216
217 // When the IV type is wider than the range operand type, we can still do loop
218 // predication, by generating SCEVs for the range and latch that are of the
219 // same type. We achieve this by generating a SCEV truncate expression for the
220 // latch IV. This is done iff truncation of the IV is a safe operation,
221 // without loss of information.
222 // Another way to achieve this is by generating a wider type SCEV for the
223 // range check operand, however, this needs a more involved check that
224 // operands do not overflow. This can lead to loss of information when the
225 // range operand is of the form: add i32 %offset, %iv. We need to prove that
226 // sext(x + y) is same as sext(x) + sext(y).
227 // This function returns true if we can safely represent the IV type in
228 // the RangeCheckType without loss of information.
229 bool isSafeToTruncateWideIVType(Type *RangeCheckType);
230 // Return the loopLatchCheck corresponding to the RangeCheckType if safe to do
231 // so.
232 Optional generateLoopLatchCheck(Type *RangeCheckType);
214233 public:
215234 LoopPredication(ScalarEvolution *SE) : SE(SE){};
216235 bool runOnLoop(Loop *L);
300319 return Builder.CreateICmp(Pred, LHSV, RHSV);
301320 }
302321
322 Optional
323 LoopPredication::generateLoopLatchCheck(Type *RangeCheckType) {
324
325 auto *LatchType = LatchCheck.IV->getType();
326 if (RangeCheckType == LatchType)
327 return LatchCheck;
328 // For now, bail out if latch type is narrower than range type.
329 if (DL->getTypeSizeInBits(LatchType) < DL->getTypeSizeInBits(RangeCheckType))
330 return None;
331 if (!isSafeToTruncateWideIVType(RangeCheckType))
332 return None;
333 // We can now safely identify the truncated version of the IV and limit for
334 // RangeCheckType.
335 LoopICmp NewLatchCheck;
336 NewLatchCheck.Pred = LatchCheck.Pred;
337 NewLatchCheck.IV = dyn_cast(
338 SE->getTruncateExpr(LatchCheck.IV, RangeCheckType));
339 if (!NewLatchCheck.IV)
340 return None;
341 NewLatchCheck.Limit = SE->getTruncateExpr(LatchCheck.Limit, RangeCheckType);
342 DEBUG(dbgs() << "IV of type: " << *LatchType
343 << "can be represented as range check type:" << *RangeCheckType
344 << "\n");
345 DEBUG(dbgs() << "LatchCheck.IV: " << *NewLatchCheck.IV << "\n");
346 DEBUG(dbgs() << "LatchCheck.Limit: " << *NewLatchCheck.Limit << "\n");
347 return NewLatchCheck;
348 }
349
303350 /// If ICI can be widened to a loop invariant condition emits the loop
304351 /// invariant condition in the loop preheader and return it, otherwise
305352 /// returns None.
324371 return None;
325372 }
326373 auto *RangeCheckIV = RangeCheck->IV;
327 auto *Ty = RangeCheckIV->getType();
328 if (Ty != LatchCheck.IV->getType()) {
329 DEBUG(dbgs() << "Type mismatch between range check and latch IVs!\n");
330 return None;
331 }
332374 if (!RangeCheckIV->isAffine()) {
333375 DEBUG(dbgs() << "Range check IV is not affine!\n");
334376 return None;
335377 }
336378 auto *Step = RangeCheckIV->getStepRecurrence(*SE);
337 if (Step != LatchCheck.IV->getStepRecurrence(*SE)) {
379 // We cannot just compare with latch IV step because the latch and range IVs
380 // may have different types.
381 if (!Step->isOne()) {
338382 DEBUG(dbgs() << "Range check and latch have IVs different steps!\n");
339383 return None;
340384 }
341 assert(Step->isOne() && "must be one");
342
385 auto *Ty = RangeCheckIV->getType();
386 auto CurrLatchCheckOpt = generateLoopLatchCheck(Ty);
387 if (!CurrLatchCheckOpt) {
388 DEBUG(dbgs() << "Failed to generate a loop latch check "
389 "corresponding to range type: "
390 << *Ty << "\n");
391 return None;
392 }
393
394 LoopICmp CurrLatchCheck = *CurrLatchCheckOpt;
395 // At this point the range check step and latch step should have the same
396 // value and type.
397 assert(Step == CurrLatchCheck.IV->getStepRecurrence(*SE) &&
398 "Range and latch should have same step recurrence!");
343399 // Generate the widened condition:
344400 // guardStart u< guardLimit &&
345401 // latchLimit guardLimit - 1 - guardStart + latchStart
347403 // header comment for the reasoning.
348404 const SCEV *GuardStart = RangeCheckIV->getStart();
349405 const SCEV *GuardLimit = RangeCheck->Limit;
350 const SCEV *LatchStart = LatchCheck.IV->getStart();
351 const SCEV *LatchLimit = LatchCheck.Limit;
406 const SCEV *LatchStart = CurrLatchCheck.IV->getStart();
407 const SCEV *LatchLimit = CurrLatchCheck.Limit;
352408
353409 // guardLimit - guardStart + latchStart - 1
354410 const SCEV *RHS =
356412 SE->getMinusSCEV(LatchStart, SE->getOne(Ty)));
357413
358414 ICmpInst::Predicate LimitCheckPred;
359 switch (LatchCheck.Pred) {
415 switch (CurrLatchCheck.Pred) {
360416 case ICmpInst::ICMP_ULT:
361417 LimitCheckPred = ICmpInst::ICMP_ULE;
362418 break;
509565 return Result;
510566 }
511567
568 // Returns true if its safe to truncate the IV to RangeCheckType.
569 bool LoopPredication::isSafeToTruncateWideIVType(Type *RangeCheckType) {
570 if (!EnableIVTruncation)
571 return false;
572 assert(DL->getTypeSizeInBits(LatchCheck.IV->getType()) >
573 DL->getTypeSizeInBits(RangeCheckType) &&
574 "Expected latch check IV type to be larger than range check operand "
575 "type!");
576 // The start and end values of the IV should be known. This is to guarantee
577 // that truncating the wide type will not lose information.
578 auto *Limit = dyn_cast(LatchCheck.Limit);
579 auto *Start = dyn_cast(LatchCheck.IV->getStart());
580 if (!Limit || !Start)
581 return false;
582 // This check makes sure that the IV does not change sign during loop
583 // iterations. Consider latchType = i64, LatchStart = 5, Pred = ICMP_SGE,
584 // LatchEnd = 2, rangeCheckType = i32. If it's not a monotonic predicate, the
585 // IV wraps around, and the truncation of the IV would lose the range of
586 // iterations between 2^32 and 2^64.
587 bool Increasing;
588 if (!SE->isMonotonicPredicate(LatchCheck.IV, LatchCheck.Pred, Increasing))
589 return false;
590 // The active bits should be less than the bits in the RangeCheckType. This
591 // guarantees that truncating the latch check to RangeCheckType is a safe
592 // operation.
593 auto RangeCheckTypeBitSize = DL->getTypeSizeInBits(RangeCheckType);
594 return Start->getAPInt().getActiveBits() < RangeCheckTypeBitSize &&
595 Limit->getAPInt().getActiveBits() < RangeCheckTypeBitSize;
596 }
597
512598 bool LoopPredication::runOnLoop(Loop *Loop) {
513599 L = Loop;
514600
0 ; RUN: opt -S -loop-predication -loop-predication-enable-iv-truncation=true < %s 2>&1 | FileCheck %s
1 declare void @llvm.experimental.guard(i1, ...)
2
3 declare i32 @length(i8*)
4
5 declare i16 @short_length(i8*)
6 ; Consider range check of type i16 and i32, while IV is of type i64
7 ; We can loop predicate this because the IV range is within i16 and within i32.
8 define i64 @iv_wider_type_rc_two_narrow_types(i32 %offA, i16 %offB, i8* %arrA, i8* %arrB) {
9 ; CHECK-LABEL: iv_wider_type_rc_two_narrow_types
10 entry:
11 ; CHECK-LABEL: entry:
12 ; CHECK: [[idxB:[^ ]+]] = sub i16 %lengthB, %offB
13 ; CHECK-NEXT: [[limit_checkB:[^ ]+]] = icmp ule i16 16, [[idxB]]
14 ; CHECK-NEXT: [[first_iteration_checkB:[^ ]+]] = icmp ult i16 %offB, %lengthB
15 ; CHECK-NEXT: [[WideChkB:[^ ]+]] = and i1 [[first_iteration_checkB]], [[limit_checkB]]
16 ; CHECK-NEXT: [[idxA:[^ ]+]] = sub i32 %lengthA, %offA
17 ; CHECK-NEXT: [[limit_checkA:[^ ]+]] = icmp ule i32 16, [[idxA]]
18 ; CHECK-NEXT: [[first_iteration_checkA:[^ ]+]] = icmp ult i32 %offA, %lengthA
19 ; CHECK-NEXT: [[WideChkA:[^ ]+]] = and i1 [[first_iteration_checkA]], [[limit_checkA]]
20 %lengthA = call i32 @length(i8* %arrA)
21 %lengthB = call i16 @short_length(i8* %arrB)
22 br label %loop
23
24 loop:
25 ; CHECK-LABEL: loop:
26 ; CHECK: [[invariant_check:[^ ]+]] = and i1 [[WideChkB]], [[WideChkA]]
27 ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[invariant_check]], i32 9)
28 %iv = phi i64 [0, %entry ], [ %iv.next, %loop ]
29 %iv.trunc.32 = trunc i64 %iv to i32
30 %iv.trunc.16 = trunc i64 %iv to i16
31 %indexA = add i32 %iv.trunc.32, %offA
32 %indexB = add i16 %iv.trunc.16, %offB
33 %rcA = icmp ult i32 %indexA, %lengthA
34 %rcB = icmp ult i16 %indexB, %lengthB
35 %wide.chk = and i1 %rcA, %rcB
36 call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk, i32 9) [ "deopt"() ]
37 %indexA.ext = zext i32 %indexA to i64
38 %addrA = getelementptr inbounds i8, i8* %arrA, i64 %indexA.ext
39 %eltA = load i8, i8* %addrA
40 %indexB.ext = zext i16 %indexB to i64
41 %addrB = getelementptr inbounds i8, i8* %arrB, i64 %indexB.ext
42 store i8 %eltA, i8* %addrB
43 %iv.next = add nuw nsw i64 %iv, 1
44 %latch.check = icmp ult i64 %iv.next, 16
45 br i1 %latch.check, label %loop, label %exit
46
47 exit:
48 ret i64 %iv
49 }
50
51
52 ; Consider an IV of type long and an array access into int array.
53 ; IV is of type i64 while the range check operands are of type i32 and i64.
54 define i64 @iv_rc_different_types(i32 %offA, i32 %offB, i8* %arrA, i8* %arrB, i64 %max)
55 {
56 ; CHECK-LABEL: iv_rc_different_types
57 entry:
58 ; CHECK-LABEL: entry:
59 ; CHECK: [[lenB:[^ ]+]] = add i32 %lengthB, -1
60 ; CHECK-NEXT: [[idxB:[^ ]+]] = sub i32 [[lenB]], %offB
61 ; CHECK-NEXT: [[limit_checkB:[^ ]+]] = icmp ule i32 15, [[idxB]]
62 ; CHECK-NEXT: [[first_iteration_checkB:[^ ]+]] = icmp ult i32 %offB, %lengthB
63 ; CHECK-NEXT: [[WideChkB:[^ ]+]] = and i1 [[first_iteration_checkB]], [[limit_checkB]]
64 ; CHECK-NEXT: [[maxMinusOne:[^ ]+]] = add i64 %max, -1
65 ; CHECK-NEXT: [[limit_checkMax:[^ ]+]] = icmp ule i64 15, [[maxMinusOne]]
66 ; CHECK-NEXT: [[first_iteration_checkMax:[^ ]+]] = icmp ult i64 0, %max
67 ; CHECK-NEXT: [[WideChkMax:[^ ]+]] = and i1 [[first_iteration_checkMax]], [[limit_checkMax]]
68 ; CHECK-NEXT: [[lenA:[^ ]+]] = add i32 %lengthA, -1
69 ; CHECK-NEXT: [[idxA:[^ ]+]] = sub i32 [[lenA]], %offA
70 ; CHECK-NEXT: [[limit_checkA:[^ ]+]] = icmp ule i32 15, [[idxA]]
71 ; CHECK-NEXT: [[first_iteration_checkA:[^ ]+]] = icmp ult i32 %offA, %lengthA
72 ; CHECK-NEXT: [[WideChkA:[^ ]+]] = and i1 [[first_iteration_checkA]], [[limit_checkA]]
73 %lengthA = call i32 @length(i8* %arrA)
74 %lengthB = call i32 @length(i8* %arrB)
75 br label %loop
76
77 loop:
78 ; CHECK-LABEL: loop:
79 ; CHECK: [[BandMax:[^ ]+]] = and i1 [[WideChkB]], [[WideChkMax]]
80 ; CHECK: [[ABandMax:[^ ]+]] = and i1 [[BandMax]], [[WideChkA]]
81 ; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[ABandMax]], i32 9)
82 %iv = phi i64 [0, %entry ], [ %iv.next, %loop ]
83 %iv.trunc = trunc i64 %iv to i32
84 %indexA = add i32 %iv.trunc, %offA
85 %indexB = add i32 %iv.trunc, %offB
86 %rcA = icmp ult i32 %indexA, %lengthA
87 %rcIV = icmp ult i64 %iv, %max
88 %wide.chk = and i1 %rcA, %rcIV
89 %rcB = icmp ult i32 %indexB, %lengthB
90 %wide.chk.final = and i1 %wide.chk, %rcB
91 call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk.final, i32 9) [ "deopt"() ]
92 %indexA.ext = zext i32 %indexA to i64
93 %addrA = getelementptr inbounds i8, i8* %arrA, i64 %indexA.ext
94 %eltA = load i8, i8* %addrA
95 %indexB.ext = zext i32 %indexB to i64
96 %addrB = getelementptr inbounds i8, i8* %arrB, i64 %indexB.ext
97 %eltB = load i8, i8* %addrB
98 %result = xor i8 %eltA, %eltB
99 store i8 %result, i8* %addrA
100 %iv.next = add nuw nsw i64 %iv, 1
101 %latch.check = icmp ult i64 %iv, 15
102 br i1 %latch.check, label %loop, label %exit
103
104 exit:
105 ret i64 %iv
106 }
107
108 ; cannot narrow the IV to the range type, because we lose information.
109 ; for (i64 i= 5; i>= 2; i++)
110 ; this loop wraps around after reaching 2^64.
111 define i64 @iv_rc_different_type(i32 %offA, i8* %arrA) {
112 ; CHECK-LABEL: iv_rc_different_type
113 entry:
114 %lengthA = call i32 @length(i8* %arrA)
115 br label %loop
116
117 loop:
118 ; CHECK-LABEL: loop:
119 ; CHECK: %rcA = icmp ult i32 %indexA, %lengthA
120 ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %rcA, i32 9)
121 %iv = phi i64 [ 5, %entry ], [ %iv.next, %loop ]
122 %iv.trunc.32 = trunc i64 %iv to i32
123 %indexA = add i32 %iv.trunc.32, %offA
124 %rcA = icmp ult i32 %indexA, %lengthA
125 call void (i1, ...) @llvm.experimental.guard(i1 %rcA, i32 9) [ "deopt"() ]
126 %indexA.ext = zext i32 %indexA to i64
127 %addrA = getelementptr inbounds i8, i8* %arrA, i64 %indexA.ext
128 %eltA = load i8, i8* %addrA
129 %res = add i8 %eltA, 2
130 store i8 %eltA, i8* %addrA
131 %iv.next = add i64 %iv, 1
132 %latch.check = icmp sge i64 %iv.next, 2
133 br i1 %latch.check, label %loop, label %exit
134
135 exit:
136 ret i64 %iv
137 }