llvm.org GIT mirror llvm / 08074bb
Enable loopreroll for sext of loop control only IV This patch extend loopreroll to allow the instruction chain of loop control only IV has sext. Differential Revision: http://reviews.llvm.org/D19820 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@269121 91177308-0d34-0410-b5e6-96231b3b80d8 Lawrence Hu 3 years ago
2 changed file(s) with 219 addition(s) and 12 deletion(s). Raw diff Collapse all Expand all
511511
512512 // Check if an IV is only used to control the loop. There are two cases:
513513 // 1. It only has one use which is loop increment, and the increment is only
514 // used by comparison and the PHI, and the comparison is only used by branch.
514 // used by comparison and the PHI (could has sext with nsw in between), and the
515 // comparison is only used by branch.
515516 // 2. It is used by loop increment and the comparison, the loop increment is
516517 // only used by the PHI, and the comparison is used only by the branch.
517518 bool LoopReroll::isLoopControlIV(Loop *L, Instruction *IV) {
518
519519 unsigned IVUses = IV->getNumUses();
520520 if (IVUses != 2 && IVUses != 1)
521521 return false;
550550 if (PN != IV)
551551 return false;
552552 }
553 // Must be a CMP
554 else if (!isCompareUsedByBranch(dyn_cast(UU)))
555 return false;
553 // Must be a CMP or an ext (of a value with nsw) then CMP
554 else {
555 Instruction *UUser = dyn_cast(UU);
556 // Skip SExt if we are extending an nsw value
557 // TODO: Allow ZExt too
558 if (BO->hasNoSignedWrap() && UUser && UUser->getNumUses() == 1 &&
559 isa(UUser))
560 UUser = dyn_cast(*(UUser->user_begin()));
561 if (!isCompareUsedByBranch(UUser))
562 return false;
563 }
556564 }
557565 } else
558566 return false;
11601168 Instruction *UUser = dyn_cast(UU);
11611169 // UUser could be compare, PHI or branch
11621170 Uses[UUser].set(IL_All);
1171 // Skip SExt
1172 if (isa(UUser)) {
1173 UUser = dyn_cast(*(UUser->user_begin()));
1174 Uses[UUser].set(IL_All);
1175 }
11631176 // Is UUser a compare instruction?
11641177 if (UU->hasOneUse()) {
11651178 Instruction *BI = dyn_cast(*UUser->user_begin());
15011514 if (NeedNewIV)
15021515 ICSCEV = SE->getMulExpr(IterCount,
15031516 SE->getConstant(IterCount->getType(), Scale));
1504 else
1505 ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE);
15061517
15071518 // Iteration count SCEV minus or plus 1
15081519 const SCEV *MinusPlus1SCEV =
15141525
15151526 const SCEV *ICMinusPlus1SCEV = SE->getMinusSCEV(ICSCEV, MinusPlus1SCEV);
15161527 // Iteration count minus 1
1517 Value *ICMinusPlus1 = nullptr;
1528 Instruction *InsertPtr = nullptr;
15181529 if (isa(ICMinusPlus1SCEV)) {
1519 ICMinusPlus1 =
1520 Expander.expandCodeFor(ICMinusPlus1SCEV, NewIV->getType(), BI);
1530 InsertPtr = BI;
15211531 } else {
15221532 BasicBlock *Preheader = L->getLoopPreheader();
15231533 if (!Preheader)
15241534 Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
1525 ICMinusPlus1 = Expander.expandCodeFor(
1526 ICMinusPlus1SCEV, NewIV->getType(), Preheader->getTerminator());
1535 InsertPtr = Preheader->getTerminator();
15271536 }
1537
1538 if (!isa(NewIV->getType()) && NeedNewIV &&
1539 (SE->getTypeSizeInBits(NewIV->getType()) <
1540 SE->getTypeSizeInBits(ICMinusPlus1SCEV->getType()))) {
1541 IRBuilder<> Builder(BI);
1542 Builder.SetCurrentDebugLocation(BI->getDebugLoc());
1543 NewIV = Builder.CreateSExt(NewIV, ICMinusPlus1SCEV->getType());
1544 }
1545 Value *ICMinusPlus1 = Expander.expandCodeFor(
1546 ICMinusPlus1SCEV, NewIV->getType(), InsertPtr);
15281547
15291548 Value *Cond =
15301549 new ICmpInst(BI, CmpInst::ICMP_EQ, NewIV, ICMinusPlus1, "exitcond");
17041723
17051724 const SCEV *LIBETC = SE->getBackedgeTakenCount(L);
17061725 const SCEV *IterCount = SE->getAddExpr(LIBETC, SE->getOne(LIBETC->getType()));
1726 DEBUG(dbgs() << "\n Before Reroll:\n" << *(L->getHeader()) << "\n");
17071727 DEBUG(dbgs() << "LRR: iteration count = " << *IterCount << "\n");
17081728
17091729 // First, we need to find the induction variable with respect to which we can
17301750 Changed = true;
17311751 break;
17321752 }
1753 DEBUG(dbgs() << "\n After Reroll:\n" << *(L->getHeader()) << "\n");
17331754
17341755 // Trip count of L has changed so SE must be re-evaluated.
17351756 if (Changed)
0 ; RUN: opt -S -loop-reroll %s | FileCheck %s
1 target triple = "aarch64--linux-gnu"
2
3 define void @test(i32 %n, float* %arrayidx200, float* %arrayidx164, float* %arrayidx172) {
4 entry:
5 %rem.i = srem i32 %n, 4
6 %t22 = load float, float* %arrayidx172, align 4
7 %cmp.9 = icmp eq i32 %n, 0
8 %t7 = sext i32 %n to i64
9 br i1 %cmp.9, label %while.end, label %while.body.preheader
10
11 while.body.preheader:
12 br label %while.body
13
14 while.body:
15 ;CHECK-LABEL: while.body:
16 ;CHECK-NEXT: %indvars.iv.i423 = phi i64 [ %indvars.iv.next.i424, %while.body ], [ 0, %while.body.preheader ]
17 ;CHECK-NEXT: [[T1:%[0-9]+]] = trunc i64 %indvars.iv.i423 to i32
18 ;CHECK-NEXT: %arrayidx62.i = getelementptr inbounds float, float* %arrayidx200, i64 %indvars.iv.i423
19 ;CHECK-NEXT: %t1 = load float, float* %arrayidx62.i, align 4
20 ;CHECK-NEXT: %arrayidx64.i = getelementptr inbounds float, float* %arrayidx164, i64 %indvars.iv.i423
21 ;CHECK-NEXT: %t2 = load float, float* %arrayidx64.i, align 4
22 ;CHECK-NEXT: %mul65.i = fmul fast float %t2, %t22
23 ;CHECK-NEXT: %add66.i = fadd fast float %mul65.i, %t1
24 ;CHECK-NEXT: store float %add66.i, float* %arrayidx62.i, align 4
25 ;CHECK-NEXT: %indvars.iv.next.i424 = add i64 %indvars.iv.i423, 1
26 ;CHECK-NEXT: [[T2:%[0-9]+]] = sext i32 [[T1]] to i64
27 ;CHECK-NEXT: %exitcond = icmp eq i64 [[T2]], %{{[0-9]+}}
28 ;CHECK-NEXT: br i1 %exitcond, label %while.end.loopexit, label %while.body
29
30 %indvars.iv.i423 = phi i64 [ %indvars.iv.next.i424, %while.body ], [ 0, %while.body.preheader ]
31 %i.22.i = phi i32 [ %add103.i, %while.body ], [ %rem.i, %while.body.preheader ]
32 %arrayidx62.i = getelementptr inbounds float, float* %arrayidx200, i64 %indvars.iv.i423
33 %t1 = load float, float* %arrayidx62.i, align 4
34 %arrayidx64.i = getelementptr inbounds float, float* %arrayidx164, i64 %indvars.iv.i423
35 %t2 = load float, float* %arrayidx64.i, align 4
36 %mul65.i = fmul fast float %t2, %t22
37 %add66.i = fadd fast float %mul65.i, %t1
38 store float %add66.i, float* %arrayidx62.i, align 4
39 %t3 = add nsw i64 %indvars.iv.i423, 1
40 %arrayidx71.i = getelementptr inbounds float, float* %arrayidx200, i64 %t3
41 %t4 = load float, float* %arrayidx71.i, align 4
42 %arrayidx74.i = getelementptr inbounds float, float* %arrayidx164, i64 %t3
43 %t5 = load float, float* %arrayidx74.i, align 4
44 %mul75.i = fmul fast float %t5, %t22
45 %add76.i = fadd fast float %mul75.i, %t4
46 store float %add76.i, float* %arrayidx71.i, align 4
47 %add103.i = add nsw i32 %i.22.i, 2
48 %t6 = sext i32 %add103.i to i64
49 %cmp58.i = icmp slt i64 %t6, %t7
50 %indvars.iv.next.i424 = add i64 %indvars.iv.i423, 2
51 br i1 %cmp58.i, label %while.body, label %while.end.loopexit
52
53 while.end.loopexit:
54 br label %while.end
55
56 while.end:
57 ret void
58 }
59
60 ; Function Attrs: noinline norecurse nounwind
61 define i32 @test2(i64 %n, i32* nocapture %x, i32* nocapture readonly %y) {
62 entry:
63 %cmp18 = icmp sgt i64 %n, 0
64 br i1 %cmp18, label %for.body.preheader, label %for.end
65
66 for.body.preheader: ; preds = %entry
67 br label %for.body
68
69 for.body: ; preds = %for.body.preheader, %for.body
70
71 ;CHECK: for.body:
72 ;CHECK-NEXT: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %for.body.preheader ]
73 ;CHECK-NEXT: %arrayidx = getelementptr inbounds i32, i32* %y, i64 %indvar
74 ;CHECK-NEXT: [[T1:%[0-9]+]] = load i32, i32* %arrayidx, align 4
75 ;CHECK-NEXT: %arrayidx3 = getelementptr inbounds i32, i32* %x, i64 %indvar
76 ;CHECK-NEXT: store i32 [[T1]], i32* %arrayidx3, align 4
77 ;CHECK-NEXT: %indvar.next = add i64 %indvar, 1
78 ;CHECK-NEXT: %exitcond = icmp eq i64 %indvar, %{{[0-9]+}}
79 ;CHECK-NEXT: br i1 %exitcond, label %for.end.loopexit, label %for.body
80
81 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
82 %arrayidx = getelementptr inbounds i32, i32* %y, i64 %indvars.iv
83 %0 = load i32, i32* %arrayidx, align 4
84 %arrayidx3 = getelementptr inbounds i32, i32* %x, i64 %indvars.iv
85 store i32 %0, i32* %arrayidx3, align 4
86 %1 = or i64 %indvars.iv, 1
87 %arrayidx5 = getelementptr inbounds i32, i32* %y, i64 %1
88 %2 = load i32, i32* %arrayidx5, align 4
89 %arrayidx8 = getelementptr inbounds i32, i32* %x, i64 %1
90 store i32 %2, i32* %arrayidx8, align 4
91 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
92 %cmp = icmp slt i64 %indvars.iv.next, %n
93 br i1 %cmp, label %for.body, label %for.end.loopexit
94
95 for.end.loopexit: ; preds = %for.body
96 br label %for.end
97
98 for.end: ; preds = %for.end.loopexit, %entry
99 ret i32 0
100 }
101
102 ; Function Attrs: noinline norecurse nounwind
103 define i32 @test3(i32 %n, i32* nocapture %x, i32* nocapture readonly %y) {
104 entry:
105 %cmp21 = icmp sgt i32 %n, 0
106 br i1 %cmp21, label %for.body.preheader, label %for.end
107
108 for.body.preheader: ; preds = %entry
109 br label %for.body
110
111 for.body: ; preds = %for.body.preheader, %for.body
112
113 ;CHECK: for.body:
114 ;CHECK: %add12 = add i8 %i.022, 2
115 ;CHECK-NEXT: %conv = sext i8 %add12 to i32
116 ;CHECK-NEXT: %cmp = icmp slt i32 %conv, %n
117 ;CHECK-NEXT: br i1 %cmp, label %for.body, label %for.end.loopexit
118
119 %conv23 = phi i32 [ %conv, %for.body ], [ 0, %for.body.preheader ]
120 %i.022 = phi i8 [ %add12, %for.body ], [ 0, %for.body.preheader ]
121 %idxprom = sext i8 %i.022 to i64
122 %arrayidx = getelementptr inbounds i32, i32* %y, i64 %idxprom
123 %0 = load i32, i32* %arrayidx, align 4
124 %arrayidx3 = getelementptr inbounds i32, i32* %x, i64 %idxprom
125 store i32 %0, i32* %arrayidx3, align 4
126 %add = or i32 %conv23, 1
127 %idxprom5 = sext i32 %add to i64
128 %arrayidx6 = getelementptr inbounds i32, i32* %y, i64 %idxprom5
129 %1 = load i32, i32* %arrayidx6, align 4
130 %arrayidx10 = getelementptr inbounds i32, i32* %x, i64 %idxprom5
131 store i32 %1, i32* %arrayidx10, align 4
132 %add12 = add i8 %i.022, 2
133 %conv = sext i8 %add12 to i32
134 %cmp = icmp slt i32 %conv, %n
135 br i1 %cmp, label %for.body, label %for.end.loopexit
136
137 for.end.loopexit: ; preds = %for.body
138 br label %for.end
139
140 for.end: ; preds = %for.end.loopexit, %entry
141 ret i32 0
142 }
143
144 ; Function Attrs: noinline norecurse nounwind
145 define i32 @test4(i64 %n, i32* nocapture %x, i32* nocapture readonly %y) {
146 entry:
147 %cmp18 = icmp eq i64 %n, 0
148 br i1 %cmp18, label %for.end, label %for.body.preheader
149
150 for.body.preheader: ; preds = %entry
151 br label %for.body
152
153 for.body: ; preds = %for.body.preheader, %for.body
154
155 ;CHECK: for.body:
156 ;CHECK-NEXT: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %for.body.preheader ]
157 ;CHECK-NEXT: %arrayidx = getelementptr inbounds i32, i32* %y, i64 %indvar
158 ;CHECK-NEXT: [[T1:%[0-9]+]] = load i32, i32* %arrayidx, align 4
159 ;CHECK-NEXT: %arrayidx3 = getelementptr inbounds i32, i32* %x, i64 %indvar
160 ;CHECK-NEXT: store i32 [[T1]], i32* %arrayidx3, align 4
161 ;CHECK-NEXT: %indvar.next = add i64 %indvar, 1
162 ;CHECK-NEXT: %exitcond = icmp eq i64 %indvar, %{{[0-9]+}}
163 ;CHECK-NEXT: br i1 %exitcond, label %for.end.loopexit, label %for.body
164
165 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
166 %arrayidx = getelementptr inbounds i32, i32* %y, i64 %indvars.iv
167 %0 = load i32, i32* %arrayidx, align 4
168 %arrayidx3 = getelementptr inbounds i32, i32* %x, i64 %indvars.iv
169 store i32 %0, i32* %arrayidx3, align 4
170 %1 = or i64 %indvars.iv, 1
171 %arrayidx5 = getelementptr inbounds i32, i32* %y, i64 %1
172 %2 = load i32, i32* %arrayidx5, align 4
173 %arrayidx8 = getelementptr inbounds i32, i32* %x, i64 %1
174 store i32 %2, i32* %arrayidx8, align 4
175 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
176 %cmp = icmp ult i64 %indvars.iv.next, %n
177 br i1 %cmp, label %for.body, label %for.end.loopexit
178
179 for.end.loopexit: ; preds = %for.body
180 br label %for.end
181
182 for.end: ; preds = %for.end.loopexit, %entry
183 ret i32 0
184 }
185