llvm.org GIT mirror llvm / f9207fe
[LoopPred] Handle a subset of NE comparison based latches At the moment, LoopPredication completely bails out if it sees a latch of the form: %cmp = icmp ne %iv, %N br i1 %cmp, label %loop, label %exit OR %cmp = icmp ne %iv.next, %NPlus1 br i1 %cmp, label %loop, label %exit This is unfortunate since this is exactly the form that LFTR likes to produce. So, go ahead and recognize simple cases where we can. For pre-increment loops, we leverage the fact that LFTR likes canonical counters (i.e. those starting at zero) and a (presumed) range fact on RHS to discharge the check trivially. For post-increment forms, the key insight is in remembering that LFTR had to insert a (N+1) for the RHS. CVP can hopefully prove that add nsw/nuw (if there's appropriate range on N to start with). This leaves us both with the post-inc IV and the RHS involving an nsw/nuw add, and SCEV can discharge that with no problem. This does still need to be extended to handle non-one steps, or other harder patterns of variable (but range restricted) starting values. That'll come later. Differential Revision: https://reviews.llvm.org/D62748 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@362282 91177308-0d34-0410-b5e6-96231b3b80d8 Philip Reames 3 months ago
2 changed file(s) with 50 addition(s) and 30 deletion(s). Raw diff Collapse all Expand all
229229 cl::init(true));
230230
231231 namespace {
232 /// Represents an induction variable check:
233 /// icmp Pred, ,
234 struct LoopICmp {
235 ICmpInst::Predicate Pred;
236 const SCEVAddRecExpr *IV;
237 const SCEV *Limit;
238 LoopICmp(ICmpInst::Predicate Pred, const SCEVAddRecExpr *IV,
239 const SCEV *Limit)
240 : Pred(Pred), IV(IV), Limit(Limit) {}
241 LoopICmp() {}
242 void dump() {
243 dbgs() << "LoopICmp Pred = " << Pred << ", IV = " << *IV
244 << ", Limit = " << *Limit << "\n";
245 }
246 };
247
232248 class LoopPredication {
233 /// Represents an induction variable check:
234 /// icmp Pred, ,
235 struct LoopICmp {
236 ICmpInst::Predicate Pred;
237 const SCEVAddRecExpr *IV;
238 const SCEV *Limit;
239 LoopICmp(ICmpInst::Predicate Pred, const SCEVAddRecExpr *IV,
240 const SCEV *Limit)
241 : Pred(Pred), IV(IV), Limit(Limit) {}
242 LoopICmp() {}
243 void dump() {
244 dbgs() << "LoopICmp Pred = " << Pred << ", IV = " << *IV
245 << ", Limit = " << *Limit << "\n";
246 }
247 };
248
249249 AliasAnalysis *AA;
250250 ScalarEvolution *SE;
251251 BranchProbabilityInfo *BPI;
381381 return getLoopPassPreservedAnalyses();
382382 }
383383
384 OptionalPredication::LoopICmp>
384 OptionalICmp>
385385 LoopPredication::parseLoopICmp(ICmpInst::Predicate Pred, Value *LHS,
386386 Value *RHS) {
387387 const SCEV *LHSS = SE->getSCEV(LHS);
427427 return Builder.CreateICmp(Pred, LHSV, RHSV);
428428 }
429429
430 OptionalPredication::LoopICmp>
430 OptionalICmp>
431431 LoopPredication::generateLoopLatchCheck(Type *RangeCheckType) {
432432
433433 auto *LatchType = LatchCheck.IV->getType();
517517 }
518518
519519 Optional LoopPredication::widenICmpRangeCheckIncrementingLoop(
520 LoopPredication::LoopICmp LatchCheck, LoopPredication::LoopICmp RangeCheck,
520 LoopICmp LatchCheck, LoopICmp RangeCheck,
521521 SCEVExpander &Expander, Instruction *Guard) {
522522 auto *Ty = RangeCheck.IV->getType();
523523 // Generate the widened condition for the forward loop:
566566 }
567567
568568 Optional LoopPredication::widenICmpRangeCheckDecrementingLoop(
569 LoopPredication::LoopICmp LatchCheck, LoopPredication::LoopICmp RangeCheck,
569 LoopICmp LatchCheck, LoopICmp RangeCheck,
570570 SCEVExpander &Expander, Instruction *Guard) {
571571 auto *Ty = RangeCheck.IV->getType();
572572 const SCEV *GuardStart = RangeCheck.IV->getStart();
612612 IRBuilder<> Builder(findInsertPt(Guard, {FirstIterationCheck, LimitCheck}));
613613 return Builder.CreateAnd(FirstIterationCheck, LimitCheck);
614614 }
615
616 static void normalizePredicate(ScalarEvolution *SE, Loop *L,
617 LoopICmp& RC) {
618 // LFTR canonicalizes checks to the ICMP_NE form instead of an ULT/SLT form.
619 // Normalize back to the ULT/SLT form for ease of handling.
620 if (RC.Pred == ICmpInst::ICMP_NE &&
621 RC.IV->getStepRecurrence(*SE)->isOne() &&
622 SE->isKnownPredicate(ICmpInst::ICMP_ULE, RC.IV->getStart(), RC.Limit))
623 RC.Pred = ICmpInst::ICMP_ULT;
624 }
625
615626
616627 /// If ICI can be widened to a loop invariant condition emits the loop
617628 /// invariant condition in the loop preheader and return it, otherwise
797808 return true;
798809 }
799810
800 OptionalPredication::LoopICmp> LoopPredication::parseLoopLatchICmp() {
811 OptionalICmp> LoopPredication::parseLoopLatchICmp() {
801812 using namespace PatternMatch;
802813
803814 BasicBlock *LoopLatch = L->getLoopLatch();
851862 }
852863 };
853864
865 normalizePredicate(SE, L, *Result);
854866 if (IsUnsupportedPredicate(Step, Result->Pred)) {
855867 LLVM_DEBUG(dbgs() << "Unsupported loop latch predicate(" << Result->Pred
856868 << ")!\n");
16021602 ; CHECK-NEXT: loop.preheader:
16031603 ; CHECK-NEXT: [[N:%.*]] = zext i16 [[N16:%.*]] to i32
16041604 ; CHECK-NEXT: [[NPLUS1:%.*]] = add nuw nsw i32 [[N]], 1
1605 ; CHECK-NEXT: [[TMP0:%.*]] = icmp ule i32 [[NPLUS1]], [[LENGTH:%.*]]
1606 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 0, [[LENGTH]]
1607 ; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[TMP1]], [[TMP0]]
16051608 ; CHECK-NEXT: br label [[LOOP:%.*]]
16061609 ; CHECK: loop:
16071610 ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER:%.*]] ]
1608 ; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH:%.*]]
1609 ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_BOUNDS]], i32 9) [ "deopt"() ]
1611 ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[TMP2]], i32 9) [ "deopt"() ]
16101612 ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
16111613 ; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ne i32 [[I_NEXT]], [[NPLUS1]]
16121614 ; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]]
16361638 ; CHECK-LABEL: @ne_latch_zext_preinc(
16371639 ; CHECK-NEXT: loop.preheader:
16381640 ; CHECK-NEXT: [[N:%.*]] = zext i16 [[N16:%.*]] to i32
1641 ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[LENGTH:%.*]], -1
1642 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i32 [[N]], [[TMP0]]
1643 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 0, [[LENGTH]]
1644 ; CHECK-NEXT: [[TMP3:%.*]] = and i1 [[TMP2]], [[TMP1]]
16391645 ; CHECK-NEXT: br label [[LOOP:%.*]]
16401646 ; CHECK: loop:
16411647 ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER:%.*]] ]
1642 ; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH:%.*]]
1643 ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_BOUNDS]], i32 9) [ "deopt"() ]
1648 ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[TMP3]], i32 9) [ "deopt"() ]
16441649 ; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1
16451650 ; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ne i32 [[I]], [[N]]
16461651 ; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]]
17141719 ; CHECK-NEXT: [[TMP5:%.*]] = icmp sle i32 [[N:%.*]], 0
17151720 ; CHECK-NEXT: br i1 [[TMP5]], label [[EXIT:%.*]], label [[LOOP_PREHEADER:%.*]]
17161721 ; CHECK: loop.preheader:
1717 ; CHECK-NEXT: br label [[LOOP:%.*]]
1718 ; CHECK: loop:
1719 ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ]
1720 ; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH:%.*]]
1721 ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_BOUNDS]], i32 9) [ "deopt"() ]
1722 ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[LENGTH:%.*]], -1
1723 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i32 [[N]], [[TMP0]]
1724 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 0, [[LENGTH]]
1725 ; CHECK-NEXT: [[TMP3:%.*]] = and i1 [[TMP2]], [[TMP1]]
1726 ; CHECK-NEXT: br label [[LOOP:%.*]]
1727 ; CHECK: loop:
1728 ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ]
1729 ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[TMP3]], i32 9) [ "deopt"() ]
17221730 ; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1
17231731 ; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ne i32 [[I]], [[N]]
17241732 ; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]