llvm.org GIT mirror llvm / 5136ca2
Handle loop with negtive induction variable increment This patch extend LoopReroll pass to hand the loops which is similar to the following: while (len > 1) { sum4 += buf[len]; sum4 += buf[len-1]; len -= 2; } git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@243171 91177308-0d34-0410-b5e6-96231b3b80d8 Lawrence Hu 4 years ago
2 changed file(s) with 83 addition(s) and 37 deletion(s). Raw diff Collapse all Expand all
165165 typedef SmallVector SmallInstructionVector;
166166 typedef SmallSet SmallInstructionSet;
167167
168 // A chain of isomorphic instructions, indentified by a single-use PHI,
168 // Map between induction variable and its increment
169 DenseMap IVToIncMap;
170
171 // A chain of isomorphic instructions, identified by a single-use PHI
169172 // representing a reduction. Only the last value may be used outside the
170173 // loop.
171174 struct SimpleLoopReduction {
334337 // x[i*3+1] = y2
335338 // x[i*3+2] = y3
336339 //
337 // Base instruction -> i*3
340 // Base instruction -> i*3
338341 // +---+----+
339342 // / | \
340343 // ST[y1] +1 +2 <-- Roots
365368 struct DAGRootTracker {
366369 DAGRootTracker(LoopReroll *Parent, Loop *L, Instruction *IV,
367370 ScalarEvolution *SE, AliasAnalysis *AA,
368 TargetLibraryInfo *TLI)
369 : Parent(Parent), L(L), SE(SE), AA(AA), TLI(TLI), IV(IV) {}
371 TargetLibraryInfo *TLI,
372 DenseMap &IncrMap)
373 : Parent(Parent), L(L), SE(SE), AA(AA), TLI(TLI), IV(IV),
374 IVToIncMap(IncrMap) {}
370375
371376 /// Stage 1: Find all the DAG roots for the induction variable.
372377 bool findRoots();
416421 // The loop induction variable.
417422 Instruction *IV;
418423 // Loop step amount.
419 uint64_t Inc;
424 int64_t Inc;
420425 // Loop reroll count; if Inc == 1, this records the scaling applied
421426 // to the indvar: a[i*2+0] = ...; a[i*2+1] = ... ;
422427 // If Inc is not 1, Scale = Inc.
429434 // they are used in (or specially, IL_All for instructions
430435 // used in the loop increment mechanism).
431436 UsesTy Uses;
437 // Map between induction variable and its increment
438 DenseMap &IVToIncMap;
432439 };
433440
434441 void collectPossibleIVs(Loop *L, SmallInstructionVector &PossibleIVs);
483490 continue;
484491 if (const SCEVConstant *IncSCEV =
485492 dyn_cast(PHISCEV->getStepRecurrence(*SE))) {
486 if (!IncSCEV->getValue()->getValue().isStrictlyPositive())
493 const APInt &AInt = IncSCEV->getValue()->getValue().abs();
494 if (IncSCEV->getValue()->isZero() || AInt.uge(MaxInc))
487495 continue;
488 if (IncSCEV->getValue()->uge(MaxInc))
489 continue;
490
491 DEBUG(dbgs() << "LRR: Possible IV: " << *I << " = " <<
492 *PHISCEV << "\n");
496 IVToIncMap[I] = IncSCEV->getValue()->getSExtValue();
497 DEBUG(dbgs() << "LRR: Possible IV: " << *I << " = " << *PHISCEV
498 << "\n");
493499 PossibleIVs.push_back(I);
494500 }
495501 }
698704 }
699705 }
700706
701 int64_t V = CI->getValue().getSExtValue();
707 int64_t V = std::abs(CI->getValue().getSExtValue());
702708 if (Roots.find(V) != Roots.end())
703709 // No duplicates, please.
704710 return false;
705
706 // FIXME: Add support for negative values.
707 if (V < 0) {
708 DEBUG(dbgs() << "LRR: Aborting due to negative value: " << V << "\n");
709 return false;
710 }
711711
712712 Roots[V] = cast(I);
713713 }
730730 unsigned NumBaseUses = BaseUsers.size();
731731 if (NumBaseUses == 0)
732732 NumBaseUses = Roots.begin()->second->getNumUses();
733
733
734734 // Check that every node has the same number of users.
735735 for (auto &KV : Roots) {
736736 if (KV.first == 0)
743743 }
744744 }
745745
746 return true;
746 return true;
747747 }
748748
749749 bool LoopReroll::DAGRootTracker::
786786 if (!collectPossibleRoots(IVU, V))
787787 return false;
788788
789 // If we didn't get a root for index zero, then IVU must be
789 // If we didn't get a root for index zero, then IVU must be
790790 // subsumed.
791791 if (V.find(0) == V.end())
792792 SubsumedInsts.insert(IVU);
817817 }
818818
819819 bool LoopReroll::DAGRootTracker::findRoots() {
820
821 const SCEVAddRecExpr *RealIVSCEV = cast(SE->getSCEV(IV));
822 Inc = cast(RealIVSCEV->getOperand(1))->
823 getValue()->getZExtValue();
820 Inc = IVToIncMap[IV];
824821
825822 assert(RootSets.empty() && "Unclean state!");
826 if (Inc == 1) {
823 if (std::abs(Inc) == 1) {
827824 for (auto *IVU : IV->users()) {
828825 if (isLoopIncrement(IVU, IV))
829826 LoopIncs.push_back(cast(IVU));
11021099 " vs. " << *RootInst << "\n");
11031100 return false;
11041101 }
1105
1102
11061103 RootIt = TryIt;
11071104 RootInst = TryIt->first;
11081105 }
11091106
11101107 // All instructions between the last root and this root
1111 // may belong to some other iteration. If they belong to a
1108 // may belong to some other iteration. If they belong to a
11121109 // future iteration, then they're dangerous to alias with.
1113 //
1110 //
11141111 // Note that because we allow a limited amount of flexibility in the order
11151112 // that we visit nodes, LastRootIt might be *before* RootIt, in which
11161113 // case we've already checked this set of instructions so we shouldn't
12661263
12671264 ++J;
12681265 }
1266 bool Negative = IVToIncMap[IV] < 0;
12691267 const DataLayout &DL = Header->getModule()->getDataLayout();
12701268
12711269 // We need to create a new induction variable for each different BaseInst.
12741272 const SCEVAddRecExpr *RealIVSCEV =
12751273 cast(SE->getSCEV(DRS.BaseInst));
12761274 const SCEV *Start = RealIVSCEV->getStart();
1277 const SCEVAddRecExpr *H = cast
1278 (SE->getAddRecExpr(Start,
1279 SE->getConstant(RealIVSCEV->getType(), 1),
1280 L, SCEV::FlagAnyWrap));
1275 const SCEVAddRecExpr *H = cast(SE->getAddRecExpr(
1276 Start, SE->getConstant(RealIVSCEV->getType(), Negative ? -1 : 1), L,
1277 SCEV::FlagAnyWrap));
12811278 { // Limit the lifetime of SCEVExpander.
12821279 SCEVExpander Expander(*SE, DL, "reroll");
12831280 Value *NewIV = Expander.expandCodeFor(H, IV->getType(), Header->begin());
12931290 const SCEV *ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE);
12941291
12951292 // Iteration count SCEV minus 1
1296 const SCEV *ICMinus1SCEV =
1297 SE->getMinusSCEV(ICSCEV, SE->getConstant(ICSCEV->getType(), 1));
1293 const SCEV *ICMinus1SCEV = SE->getMinusSCEV(
1294 ICSCEV, SE->getConstant(ICSCEV->getType(), Negative ? -1 : 1));
12981295
12991296 Value *ICMinus1; // Iteration count minus 1
13001297 if (isa(ICMinus1SCEV)) {
14431440 bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
14441441 const SCEV *IterCount,
14451442 ReductionTracker &Reductions) {
1446 DAGRootTracker DAGRoots(this, L, IV, SE, AA, TLI);
1443 DAGRootTracker DAGRoots(this, L, IV, SE, AA, TLI, IVToIncMap);
14471444
14481445 if (!DAGRoots.findRoots())
14491446 return false;
14501447 DEBUG(dbgs() << "LRR: Found all root induction increments for: " <<
14511448 *IV << "\n");
1452
1449
14531450 if (!DAGRoots.validate(Reductions))
14541451 return false;
14551452 if (!Reductions.validateSelected())
14961493 // First, we need to find the induction variable with respect to which we can
14971494 // reroll (there may be several possible options).
14981495 SmallInstructionVector PossibleIVs;
1496 IVToIncMap.clear();
14991497 collectPossibleIVs(L, PossibleIVs);
15001498
15011499 if (PossibleIVs.empty()) {
0 ; RUN: opt -S -loop-reroll %s | FileCheck %s
1 target triple = "aarch64--linux-gnu"
2 @buf = global [16 x i8] c"\0A\0A\0A\0A\0A\0A\0A\0A\0A\0A\0A\0A\0A\0A\0A\0A", align 1
3
4 define i32 @test1(i32 %len, i8* nocapture readonly %buf) #0 {
5 entry:
6 %cmp.13 = icmp sgt i32 %len, 1
7 br i1 %cmp.13, label %while.body.lr.ph, label %while.end
8
9 while.body.lr.ph: ; preds = %entry
10 br label %while.body
11
12 while.body:
13 ;CHECK-LABEL: while.body:
14 ;CHECK-NEXT: %indvar = phi i32 [ %indvar.next, %while.body ], [ 0, %while.body.lr.ph ]
15 ;CHECK-NEXT: %sum4.015 = phi i64 [ 0, %while.body.lr.ph ], [ %add, %while.body ]
16 ;CHECK-NOT: %sub5 = add nsw i32 %len.addr.014, -1
17 ;CHECK-NOT: %sub5 = add nsw i32 %len.addr.014, -2
18 ;CHECK: br i1 %exitcond, label %while.cond.while.end_crit_edge, label %while.body
19
20 %sum4.015 = phi i64 [ 0, %while.body.lr.ph ], [ %add4, %while.body ]
21 %len.addr.014 = phi i32 [ %len, %while.body.lr.ph ], [ %sub5, %while.body ]
22 %idxprom = sext i32 %len.addr.014 to i64
23 %arrayidx = getelementptr inbounds i8, i8* %buf, i64 %idxprom
24 %0 = load i8, i8* %arrayidx, align 1
25 %conv = zext i8 %0 to i64
26 %add = add i64 %conv, %sum4.015
27 %sub = add nsw i32 %len.addr.014, -1
28 %idxprom1 = sext i32 %sub to i64
29 %arrayidx2 = getelementptr inbounds i8, i8* %buf, i64 %idxprom1
30 %1 = load i8, i8* %arrayidx2, align 1
31 %conv3 = zext i8 %1 to i64
32 %add4 = add i64 %add, %conv3
33 %sub5 = add nsw i32 %len.addr.014, -2
34 %cmp = icmp sgt i32 %sub5, 1
35 br i1 %cmp, label %while.body, label %while.cond.while.end_crit_edge
36
37 while.cond.while.end_crit_edge: ; preds = %while.body
38 %add4.lcssa = phi i64 [ %add4, %while.body ]
39 %phitmp = trunc i64 %add4.lcssa to i32
40 br label %while.end
41
42 while.end: ; preds = %while.cond.while.end_crit_edge, %entry
43 %sum4.0.lcssa = phi i32 [ %phitmp, %while.cond.while.end_crit_edge ], [ 0, %entry ]
44 ret i32 %sum4.0.lcssa
45 unreachable
46 }
47