llvm.org GIT mirror llvm / 1774024
Reroll loops with multiple IV and negative step part 3 support multiple induction variables This patch enable loop reroll for the following case: for(int i=0; i<N; i += 2) { S += *a++; S += *a++; }; Differential Revision: http://reviews.llvm.org/D16550 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@268147 91177308-0d34-0410-b5e6-96231b3b80d8 Lawrence Hu 3 years ago
2 changed file(s) with 289 addition(s) and 9 deletion(s). Raw diff Collapse all Expand all
162162
163163 // Map between induction variable and its increment
164164 DenseMap IVToIncMap;
165 // For loop with multiple induction variable, remember the one used only to
166 // control the loop.
167 Instruction *LoopControlIV;
165168
166169 // A chain of isomorphic instructions, identified by a single-use PHI
167170 // representing a reduction. Only the last value may be used outside the
349352 ScalarEvolution *SE, AliasAnalysis *AA,
350353 TargetLibraryInfo *TLI, DominatorTree *DT, LoopInfo *LI,
351354 bool PreserveLCSSA,
352 DenseMap &IncrMap)
355 DenseMap &IncrMap,
356 Instruction *LoopCtrlIV)
353357 : Parent(Parent), L(L), SE(SE), AA(AA), TLI(TLI), DT(DT), LI(LI),
354 PreserveLCSSA(PreserveLCSSA), IV(IV), IVToIncMap(IncrMap) {}
358 PreserveLCSSA(PreserveLCSSA), IV(IV), IVToIncMap(IncrMap),
359 LoopControlIV(LoopCtrlIV) {}
355360
356361 /// Stage 1: Find all the DAG roots for the induction variable.
357362 bool findRoots();
390395 UsesTy::iterator Start,
391396 UsesTy::iterator End);
392397 void replaceIV(Instruction *Inst, Instruction *IV, const SCEV *IterCount);
398 void updateNonLoopCtrlIncr();
393399
394400 LoopReroll *Parent;
395401
420426 UsesTy Uses;
421427 // Map between induction variable and its increment
422428 DenseMap &IVToIncMap;
429 Instruction *LoopControlIV;
423430 };
424431
432 // Check if it is a compare-like instruction whose user is a branch
433 bool isCompareUsedByBranch(Instruction *I) {
434 auto *TI = I->getParent()->getTerminator();
435 if (!isa(TI) || !isa(I))
436 return false;
437 return I->hasOneUse() && TI->getOperand(0) == I;
438 };
439
440 bool isLoopControlIV(Loop *L, Instruction *IV);
425441 void collectPossibleIVs(Loop *L, SmallInstructionVector &PossibleIVs);
426442 void collectPossibleReductions(Loop *L,
427443 ReductionTracker &Reductions);
491507 }
492508 }
493509 return CIncSCEV;
510 }
511
512 // Check if an IV is only used to control the loop. There are two cases:
513 // 1. It only has one use which is loop increment, and the increment is only
514 // used by comparison and the PHI, and the comparison is only used by branch.
515 // 2. It is used by loop increment and the comparison, the loop increment is
516 // only used by the PHI, and the comparison is used only by the branch.
517 bool LoopReroll::isLoopControlIV(Loop *L, Instruction *IV) {
518
519 unsigned IVUses = IV->getNumUses();
520 if (IVUses != 2 && IVUses != 1)
521 return false;
522
523 for (auto *User : IV->users()) {
524 int32_t IncOrCmpUses = User->getNumUses();
525 bool IsCompInst = isCompareUsedByBranch(cast(User));
526
527 // User can only have one or two uses.
528 if (IncOrCmpUses != 2 && IncOrCmpUses != 1)
529 return false;
530
531 // Case 1
532 if (IVUses == 1) {
533 // The only user must be the loop increment.
534 // The loop increment must have two uses.
535 if (IsCompInst || IncOrCmpUses != 2)
536 return false;
537 }
538
539 // Case 2
540 if (IVUses == 2 && IncOrCmpUses != 1)
541 return false;
542
543 // The users of the IV must be a binary operation or a comparison
544 if (auto *BO = dyn_cast(User)) {
545 if (BO->getOpcode() == Instruction::Add) {
546 // Loop Increment
547 // User of Loop Increment should be either PHI or CMP
548 for (auto *UU : User->users()) {
549 if (PHINode *PN = dyn_cast(UU)) {
550 if (PN != IV)
551 return false;
552 }
553 // Must be a CMP
554 else if (!isCompareUsedByBranch(dyn_cast(UU)))
555 return false;
556 }
557 } else
558 return false;
559 // Compare : can only have one use, and must be branch
560 } else if (!IsCompInst)
561 return false;
562 }
563 return true;
494564 }
495565
496566 // Collect the list of loop induction variables with respect to which it might
524594 IVToIncMap[&*I] = IncSCEV->getValue()->getSExtValue();
525595 DEBUG(dbgs() << "LRR: Possible IV: " << *I << " = " << *PHISCEV
526596 << "\n");
527 PossibleIVs.push_back(&*I);
597
598 if (isLoopControlIV(L, &*I)) {
599 assert(!LoopControlIV && "Found two loop control only IV");
600 LoopControlIV = &(*I);
601 DEBUG(dbgs() << "LRR: Possible loop control only IV: " << *I << " = "
602 << *PHISCEV << "\n");
603 } else
604 PossibleIVs.push_back(&*I);
528605 }
529606 }
530607 }
10691146 // Make sure we mark the reduction PHIs as used in all iterations.
10701147 for (auto *I : PossibleRedPHISet) {
10711148 Uses[I].set(IL_All);
1149 }
1150
1151 // Make sure we mark loop-control-only PHIs as used in all iterations. See
1152 // comment above LoopReroll::isLoopControlIV for more information.
1153 BasicBlock *Header = L->getHeader();
1154 if (LoopControlIV && LoopControlIV != IV) {
1155 for (auto *U : LoopControlIV->users()) {
1156 Instruction *IVUser = dyn_cast(U);
1157 // IVUser could be loop increment or compare
1158 Uses[IVUser].set(IL_All);
1159 for (auto *UU : IVUser->users()) {
1160 Instruction *UUser = dyn_cast(UU);
1161 // UUser could be compare, PHI or branch
1162 Uses[UUser].set(IL_All);
1163 // Is UUser a compare instruction?
1164 if (UU->hasOneUse()) {
1165 Instruction *BI = dyn_cast(*UUser->user_begin());
1166 if (BI == cast(Header->getTerminator()))
1167 Uses[BI].set(IL_All);
1168 }
1169 }
1170 }
10721171 }
10731172
10741173 // Make sure all instructions in the loop are in one and only one
13131412 ++J;
13141413 }
13151414
1316 // We need to create a new induction variable for each different BaseInst.
1317 for (auto &DRS : RootSets)
1318 // Insert the new induction variable.
1319 replaceIV(DRS.BaseInst, IV, IterCount);
1415 bool HasTwoIVs = LoopControlIV && LoopControlIV != IV;
1416
1417 if (HasTwoIVs) {
1418 updateNonLoopCtrlIncr();
1419 replaceIV(LoopControlIV, LoopControlIV, IterCount);
1420 } else
1421 // We need to create a new induction variable for each different BaseInst.
1422 for (auto &DRS : RootSets)
1423 // Insert the new induction variable.
1424 replaceIV(DRS.BaseInst, IV, IterCount);
13201425
13211426 SimplifyInstructionsInBlock(Header, TLI);
13221427 DeleteDeadPHIs(Header, TLI);
1428 }
1429
1430 // For non-loop-control IVs, we only need to update the last increment
1431 // with right amount, then we are done.
1432 void LoopReroll::DAGRootTracker::updateNonLoopCtrlIncr() {
1433 const SCEV *NewInc = nullptr;
1434 for (auto *LoopInc : LoopIncs) {
1435 GetElementPtrInst *GEP = dyn_cast(LoopInc);
1436 const SCEVConstant *COp = nullptr;
1437 if (GEP && LoopInc->getOperand(0)->getType()->isPointerTy()) {
1438 COp = dyn_cast(SE->getSCEV(LoopInc->getOperand(1)));
1439 } else {
1440 COp = dyn_cast(SE->getSCEV(LoopInc->getOperand(0)));
1441 if (!COp)
1442 COp = dyn_cast(SE->getSCEV(LoopInc->getOperand(1)));
1443 }
1444
1445 assert(COp && "Didn't find constant operand of LoopInc!\n");
1446
1447 const APInt &AInt = COp->getValue()->getValue();
1448 const SCEV *ScaleSCEV = SE->getConstant(COp->getType(), Scale);
1449 if (AInt.isNegative()) {
1450 NewInc = SE->getNegativeSCEV(COp);
1451 NewInc = SE->getUDivExpr(NewInc, ScaleSCEV);
1452 NewInc = SE->getNegativeSCEV(NewInc);
1453 } else
1454 NewInc = SE->getUDivExpr(COp, ScaleSCEV);
1455
1456 LoopInc->setOperand(1, dyn_cast(NewInc)->getValue());
1457 }
13231458 }
13241459
13251460 void LoopReroll::DAGRootTracker::replaceIV(Instruction *Inst,
13271462 const SCEV *IterCount) {
13281463 BasicBlock *Header = L->getHeader();
13291464 int64_t Inc = IVToIncMap[InstIV];
1330 bool Negative = Inc < 0;
1465 bool NeedNewIV = InstIV == LoopControlIV;
1466 bool Negative = !NeedNewIV && Inc < 0;
13311467
13321468 const SCEVAddRecExpr *RealIVSCEV = cast(SE->getSCEV(Inst));
13331469 const SCEV *Start = RealIVSCEV->getStart();
1470
1471 if (NeedNewIV)
1472 Start = SE->getConstant(Start->getType(), 0);
13341473
13351474 const SCEV *SizeOfExpr = nullptr;
13361475 const SCEV *IncrExpr =
13581497 // FIXME: Why do we need this check?
13591498 if (Uses[BI].find_first() == IL_All) {
13601499 const SCEV *ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE);
1500
1501 if (NeedNewIV)
1502 ICSCEV = SE->getMulExpr(IterCount,
1503 SE->getConstant(IterCount->getType(), Scale));
1504 else
1505 ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE);
13611506
13621507 // Iteration count SCEV minus or plus 1
13631508 const SCEV *MinusPlus1SCEV =
15131658 const SCEV *IterCount,
15141659 ReductionTracker &Reductions) {
15151660 DAGRootTracker DAGRoots(this, L, IV, SE, AA, TLI, DT, LI, PreserveLCSSA,
1516 IVToIncMap);
1661 IVToIncMap, LoopControlIV);
15171662
15181663 if (!DAGRoots.findRoots())
15191664 return false;
15651710 // reroll (there may be several possible options).
15661711 SmallInstructionVector PossibleIVs;
15671712 IVToIncMap.clear();
1713 LoopControlIV = nullptr;
15681714 collectPossibleIVs(L, PossibleIVs);
15691715
15701716 if (PossibleIVs.empty()) {
0 ; RUN: opt -S -loop-reroll %s | FileCheck %s
1 declare i32 @goo(i32, i32)
2
3 @buf = external global i8*
4 @aaa = global [16 x i8] c"\01\02\03\04\05\06\07\08\09\0A\0B\0C\0D\0E\0F\10", align 1
5
6 define i32 @test1(i32 %len) {
7 entry:
8 br label %while.body
9
10 while.body:
11 ;CHECK-LABEL: while.body:
12 ;CHECK-NEXT: %indvar = phi i32 [ %indvar.next, %while.body ], [ 0, %entry ]
13 ;CHECK-NEXT: %buf.021 = phi i8* [ getelementptr inbounds ([16 x i8], [16 x i8]* @aaa, i64 0, i64 0), %entry ], [ %add.ptr, %while.body ]
14 ;CHECK-NEXT: %sum44.020 = phi i64 [ 0, %entry ], [ %add, %while.body ]
15 ;CHECK-NEXT: [[T2:%[0-9]+]] = load i8, i8* %buf.021, align 1
16 ;CHECK-NEXT: %conv = zext i8 [[T2]] to i64
17 ;CHECK-NEXT: %add = add i64 %conv, %sum44.020
18 ;CHECK-NEXT: %add.ptr = getelementptr inbounds i8, i8* %buf.021, i64 1
19 ;CHECK-NEXT: %indvar.next = add i32 %indvar, 1
20 ;CHECK-NEXT: %exitcond = icmp eq i32 %indvar, 1
21 ;CHECK-NEXT: br i1 %exitcond, label %while.end, label %while.body
22
23 %dec22 = phi i32 [ 4, %entry ], [ %dec, %while.body ]
24 %buf.021 = phi i8* [ getelementptr inbounds ([16 x i8], [16 x i8]* @aaa, i64 0, i64 0), %entry ], [ %add.ptr, %while.body ]
25 %sum44.020 = phi i64 [ 0, %entry ], [ %add9, %while.body ]
26 %0 = load i8, i8* %buf.021, align 1
27 %conv = zext i8 %0 to i64
28 %add = add i64 %conv, %sum44.020
29 %arrayidx1 = getelementptr inbounds i8, i8* %buf.021, i64 1
30 %1 = load i8, i8* %arrayidx1, align 1
31 %conv2 = zext i8 %1 to i64
32 %add3 = add i64 %add, %conv2
33 %arrayidx4 = getelementptr inbounds i8, i8* %buf.021, i64 2
34 %2 = load i8, i8* %arrayidx4, align 1
35 %conv5 = zext i8 %2 to i64
36 %add6 = add i64 %add3, %conv5
37 %arrayidx7 = getelementptr inbounds i8, i8* %buf.021, i64 3
38 %3 = load i8, i8* %arrayidx7, align 1
39 %conv8 = zext i8 %3 to i64
40 %add9 = add i64 %add6, %conv8
41 %add.ptr = getelementptr inbounds i8, i8* %buf.021, i64 4
42 %dec = add nsw i32 %dec22, -1
43 %tobool = icmp eq i32 %dec, 0
44 br i1 %tobool, label %while.end, label %while.body
45
46 while.end: ; preds = %while.body
47 %conv11 = trunc i64 %add9 to i32
48 %call = tail call i32 @goo(i32 0, i32 %conv11)
49 unreachable
50 }
51
52 define i32 @test2(i32 %N, i32* nocapture readonly %a, i32 %S) {
53 entry:
54 %cmp.9 = icmp sgt i32 %N, 0
55 br i1 %cmp.9, label %for.body.lr.ph, label %for.cond.cleanup
56
57 for.body.lr.ph:
58 br label %for.body
59
60 for.cond.for.cond.cleanup_crit_edge:
61 br label %for.cond.cleanup
62
63 for.cond.cleanup:
64 %S.addr.0.lcssa = phi i32 [ %add2, %for.cond.for.cond.cleanup_crit_edge ], [ %S, %entry ]
65 ret i32 %S.addr.0.lcssa
66
67 for.body:
68 ;CHECK-LABEL: for.body:
69 ;CHECK-NEXT: %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %for.body.lr.ph ]
70 ;CHECK-NEXT: %S.addr.011 = phi i32 [ %S, %for.body.lr.ph ], [ %add, %for.body ]
71 ;CHECK-NEXT: %a.addr.010 = phi i32* [ %a, %for.body.lr.ph ], [ %incdec.ptr1, %for.body ]
72 ;CHECK-NEXT: %4 = load i32, i32* %a.addr.010, align 4
73 ;CHECK-NEXT: %add = add nsw i32 %4, %S.addr.011
74 ;CHECK-NEXT: %incdec.ptr1 = getelementptr inbounds i32, i32* %a.addr.010, i64 1
75 ;CHECK-NEXT: %indvar.next = add i32 %indvar, 1
76 ;CHECK-NEXT: %exitcond = icmp eq i32 %indvar, %3
77 ;CHECK-NEXT: br i1 %exitcond, label %for.cond.for.cond.cleanup_crit_edge, label %for.body
78
79 %i.012 = phi i32 [ 0, %for.body.lr.ph ], [ %add3, %for.body ]
80 %S.addr.011 = phi i32 [ %S, %for.body.lr.ph ], [ %add2, %for.body ]
81 %a.addr.010 = phi i32* [ %a, %for.body.lr.ph ], [ %incdec.ptr1, %for.body ]
82 %incdec.ptr = getelementptr inbounds i32, i32* %a.addr.010, i64 1
83 %0 = load i32, i32* %a.addr.010, align 4
84 %add = add nsw i32 %0, %S.addr.011
85 %incdec.ptr1 = getelementptr inbounds i32, i32* %a.addr.010, i64 2
86 %1 = load i32, i32* %incdec.ptr, align 4
87 %add2 = add nsw i32 %add, %1
88 %add3 = add nsw i32 %i.012, 2
89 %cmp = icmp slt i32 %add3, %N
90 br i1 %cmp, label %for.body, label %for.cond.for.cond.cleanup_crit_edge
91 }
92
93 define i32 @test3(i32* nocapture readonly %buf, i32 %len) #0 {
94 entry:
95 %cmp10 = icmp sgt i32 %len, 1
96 br i1 %cmp10, label %while.body.preheader, label %while.end
97
98 while.body.preheader: ; preds = %entry
99 br label %while.body
100
101 while.body: ; preds = %while.body.preheader, %while.body
102 ;CHECK-LABEL: while.body:
103 ;CHECK-NEXT: %indvar = phi i32 [ %indvar.next, %while.body ], [ 0, %while.body.preheader ]
104 ;CHECK-NEXT: %S.012 = phi i32 [ %add, %while.body ], [ undef, %while.body.preheader ]
105 ;CHECK-NEXT: %buf.addr.011 = phi i32* [ %add.ptr, %while.body ], [ %buf, %while.body.preheader ]
106 ;CHECK-NEXT: %4 = load i32, i32* %buf.addr.011, align 4
107 ;CHECK-NEXT: %add = add nsw i32 %4, %S.012
108 ;CHECK-NEXT: %add.ptr = getelementptr inbounds i32, i32* %buf.addr.011, i64 -1
109 ;CHECK-NEXT: %indvar.next = add i32 %indvar, 1
110 ;CHECK-NEXT: %exitcond = icmp eq i32 %indvar, %3
111 ;CHECK-NEXT: br i1 %exitcond, label %while.end.loopexit, label %while.body
112
113 %i.013 = phi i32 [ %sub, %while.body ], [ %len, %while.body.preheader ]
114 %S.012 = phi i32 [ %add2, %while.body ], [ undef, %while.body.preheader ]
115 %buf.addr.011 = phi i32* [ %add.ptr, %while.body ], [ %buf, %while.body.preheader ]
116 %0 = load i32, i32* %buf.addr.011, align 4
117 %add = add nsw i32 %0, %S.012
118 %arrayidx1 = getelementptr inbounds i32, i32* %buf.addr.011, i64 -1
119 %1 = load i32, i32* %arrayidx1, align 4
120 %add2 = add nsw i32 %add, %1
121 %add.ptr = getelementptr inbounds i32, i32* %buf.addr.011, i64 -2
122 %sub = add nsw i32 %i.013, -2
123 %cmp = icmp sgt i32 %sub, 1
124 br i1 %cmp, label %while.body, label %while.end.loopexit
125
126 while.end.loopexit: ; preds = %while.body
127 br label %while.end
128
129 while.end: ; preds = %while.end.loopexit, %entry
130 %S.0.lcssa = phi i32 [ undef, %entry ], [ %add2, %while.end.loopexit ]
131 ret i32 %S.0.lcssa
132 }
133