llvm.org GIT mirror llvm / dace98d
Fix loop rerolling pass failure with non-consant loop lower bound The loop rerolling pass was failing with an assertion failure from a failed cast on loops like this: void foo(int *A, int *B, int m, int n) { for (int i = m; i < n; i+=4) { A[i+0] = B[i+0] * 4; A[i+1] = B[i+1] * 4; A[i+2] = B[i+2] * 4; A[i+3] = B[i+3] * 4; } } The code was casting the SCEV-expanded code for the new induction variable to a phi-node. When the loop had a non-constant lower bound, the SCEV expander would end the code expansion with an add insted of a phi node and the cast would fail. It looks like the cast to a phi node was only needed to get the induction variable value coming from the backedge to compute the end of loop condition. This patch changes the loop reroller to compare the induction variable to the number of times the backedge is taken instead of the iteration count of the loop. In other words, we stop the loop when the current value of the induction variable == IterationCount-1. Previously, the comparison was comparing the induction variable value from the next iteration == IterationCount. This problem only seems to occur on 32-bit targets. For some reason, the loop is not rerolled on 64-bit targets. PR18290 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@198425 91177308-0d34-0410-b5e6-96231b3b80d8 David Peixotto 5 years ago
4 changed file(s) with 171 addition(s) and 17 deletion(s). Raw diff Collapse all Expand all
10881088 L, SCEV::FlagAnyWrap));
10891089 { // Limit the lifetime of SCEVExpander.
10901090 SCEVExpander Expander(*SE, "reroll");
1091 PHINode *NewIV =
1092 cast(Expander.expandCodeFor(H, IV->getType(),
1093 Header->begin()));
1091 Value *NewIV = Expander.expandCodeFor(H, IV->getType(), Header->begin());
1092
10941093 for (DenseSet::iterator J = BaseUseSet.begin(),
10951094 JE = BaseUseSet.end(); J != JE; ++J)
10961095 (*J)->replaceUsesOfWith(IV, NewIV);
11011100 if (Inc == 1)
11021101 ICSCEV =
11031102 SE->getMulExpr(ICSCEV, SE->getConstant(ICSCEV->getType(), Scale));
1104 Value *IC;
1105 if (isa(ICSCEV)) {
1106 IC = Expander.expandCodeFor(ICSCEV, NewIV->getType(), BI);
1103 // Iteration count SCEV minus 1
1104 const SCEV *ICMinus1SCEV =
1105 SE->getMinusSCEV(ICSCEV, SE->getConstant(ICSCEV->getType(), 1));
1106
1107 Value *ICMinus1; // Iteration count minus 1
1108 if (isa(ICMinus1SCEV)) {
1109 ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(), BI);
11071110 } else {
11081111 BasicBlock *Preheader = L->getLoopPreheader();
11091112 if (!Preheader)
11101113 Preheader = InsertPreheaderForLoop(L, this);
11111114
1112 IC = Expander.expandCodeFor(ICSCEV, NewIV->getType(),
1113 Preheader->getTerminator());
1115 ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(),
1116 Preheader->getTerminator());
11141117 }
11151118
1116 Value *NewIVNext = NewIV->getIncomingValueForBlock(Header);
1117 Value *Cond = new ICmpInst(BI, CmpInst::ICMP_EQ, NewIVNext, IC,
1119 Value *Cond = new ICmpInst(BI, CmpInst::ICMP_EQ, NewIV, ICMinus1,
11181120 "exitcond");
11191121 BI->setCondition(Cond);
11201122
3232 ; CHECK: %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %entry ]
3333 ; CHECK: %call = tail call i32 @foo(i32 %indvar) #1
3434 ; CHECK: %indvar.next = add i32 %indvar, 1
35 ; CHECK: %exitcond1 = icmp eq i32 %indvar.next, 498
35 ; CHECK: %exitcond1 = icmp eq i32 %indvar, 497
3636 ; CHECK: br i1 %exitcond1, label %for.end, label %for.body
3737
3838 ; CHECK: ret
8282 ; CHECK: %arrayidx = getelementptr inbounds i32* %x, i64 %indvar
8383 ; CHECK: store i32 %call, i32* %arrayidx, align 4
8484 ; CHECK: %indvar.next = add i64 %indvar, 1
85 ; CHECK: %exitcond = icmp eq i64 %indvar.next, 1500
85 ; CHECK: %exitcond = icmp eq i64 %indvar, 1499
8686 ; CHECK: br i1 %exitcond, label %for.end, label %for.body
8787
8888 ; CHECK: ret
130130 ; CHECK: %arrayidx = getelementptr inbounds i32* %x, i64 %indvars.iv
131131 ; CHECK: store i32 %call, i32* %arrayidx, align 4
132132 ; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
133 ; CHECK: %exitcond1 = icmp eq i64 %indvars.iv.next, 1500
133 ; CHECK: %exitcond1 = icmp eq i64 %indvars.iv, 1499
134134 ; CHECK: br i1 %exitcond1, label %for.end, label %for.body
135135
136136 ; CHECK: ret
212212 ; CHECK: %add = fadd float %1, %mul
213213 ; CHECK: store float %add, float* %arrayidx2, align 4
214214 ; CHECK: %indvar.next = add i64 %indvar, 1
215 ; CHECK: %exitcond = icmp eq i64 %indvar.next, 3200
215 ; CHECK: %exitcond = icmp eq i64 %indvar, 3199
216216 ; CHECK: br i1 %exitcond, label %for.end, label %for.body
217217
218218 ; CHECK: ret
312312 ; CHECK: %add = fadd float %2, %mul
313313 ; CHECK: store float %add, float* %arrayidx4, align 4
314314 ; CHECK: %indvar.next = add i64 %indvar, 1
315 ; CHECK: %exitcond = icmp eq i64 %indvar.next, 3200
315 ; CHECK: %exitcond = icmp eq i64 %indvar, 3199
316316 ; CHECK: br i1 %exitcond, label %for.end, label %for.body
317317
318318 ; CHECK: ret
0 ; RUN: opt < %s -loop-reroll -S | FileCheck %s
1 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
2 target triple = "thumbv7-none-linux"
3
4 ;void foo(int *A, int *B, int m, int n) {
5 ; for (int i = m; i < n; i+=4) {
6 ; A[i+0] = B[i+0] * 4;
7 ; A[i+1] = B[i+1] * 4;
8 ; A[i+2] = B[i+2] * 4;
9 ; A[i+3] = B[i+3] * 4;
10 ; }
11 ;}
12 define void @foo(i32* nocapture %A, i32* nocapture readonly %B, i32 %m, i32 %n) {
13 entry:
14 %cmp34 = icmp slt i32 %m, %n
15 br i1 %cmp34, label %for.body, label %for.end
16
17 for.body: ; preds = %entry, %for.body
18 %i.035 = phi i32 [ %add18, %for.body ], [ %m, %entry ]
19 %arrayidx = getelementptr inbounds i32* %B, i32 %i.035
20 %0 = load i32* %arrayidx, align 4
21 %mul = shl nsw i32 %0, 2
22 %arrayidx2 = getelementptr inbounds i32* %A, i32 %i.035
23 store i32 %mul, i32* %arrayidx2, align 4
24 %add3 = add nsw i32 %i.035, 1
25 %arrayidx4 = getelementptr inbounds i32* %B, i32 %add3
26 %1 = load i32* %arrayidx4, align 4
27 %mul5 = shl nsw i32 %1, 2
28 %arrayidx7 = getelementptr inbounds i32* %A, i32 %add3
29 store i32 %mul5, i32* %arrayidx7, align 4
30 %add8 = add nsw i32 %i.035, 2
31 %arrayidx9 = getelementptr inbounds i32* %B, i32 %add8
32 %2 = load i32* %arrayidx9, align 4
33 %mul10 = shl nsw i32 %2, 2
34 %arrayidx12 = getelementptr inbounds i32* %A, i32 %add8
35 store i32 %mul10, i32* %arrayidx12, align 4
36 %add13 = add nsw i32 %i.035, 3
37 %arrayidx14 = getelementptr inbounds i32* %B, i32 %add13
38 %3 = load i32* %arrayidx14, align 4
39 %mul15 = shl nsw i32 %3, 2
40 %arrayidx17 = getelementptr inbounds i32* %A, i32 %add13
41 store i32 %mul15, i32* %arrayidx17, align 4
42 %add18 = add nsw i32 %i.035, 4
43 %cmp = icmp slt i32 %add18, %n
44 br i1 %cmp, label %for.body, label %for.end
45
46 for.end: ; preds = %for.body, %entry
47 ret void
48 }
49 ; CHECK-LABEL: @foo
50 ; CHECK: for.body.preheader: ; preds = %entry
51 ; CHECK: %0 = add i32 %n, -1
52 ; CHECK: %1 = sub i32 %0, %m
53 ; CHECK: %2 = lshr i32 %1, 2
54 ; CHECK: %3 = mul i32 %2, 4
55 ; CHECK: %4 = add i32 %m, %3
56 ; CHECK: %5 = add i32 %4, 3
57 ; CHECK: br label %for.body
58
59 ; CHECK: for.body: ; preds = %for.body, %for.body.preheader
60 ; CHECK: %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %for.body.preheader ]
61 ; CHECK: %6 = add i32 %m, %indvar
62 ; CHECK: %arrayidx = getelementptr inbounds i32* %B, i32 %6
63 ; CHECK: %7 = load i32* %arrayidx, align 4
64 ; CHECK: %mul = shl nsw i32 %7, 2
65 ; CHECK: %arrayidx2 = getelementptr inbounds i32* %A, i32 %6
66 ; CHECK: store i32 %mul, i32* %arrayidx2, align 4
67 ; CHECK: %indvar.next = add i32 %indvar, 1
68 ; CHECK: %exitcond = icmp eq i32 %6, %5
69 ; CHECK: br i1 %exitcond, label %for.end, label %for.body
70
71 ;void daxpy_ur(int n,float da,float *dx,float *dy)
72 ; {
73 ; int m = n % 4;
74 ; for (int i = m; i < n; i = i + 4)
75 ; {
76 ; dy[i] = dy[i] + da*dx[i];
77 ; dy[i+1] = dy[i+1] + da*dx[i+1];
78 ; dy[i+2] = dy[i+2] + da*dx[i+2];
79 ; dy[i+3] = dy[i+3] + da*dx[i+3];
80 ; }
81 ; }
82 define void @daxpy_ur(i32 %n, float %da, float* nocapture readonly %dx, float* nocapture %dy) {
83 entry:
84 %rem = srem i32 %n, 4
85 %cmp55 = icmp slt i32 %rem, %n
86 br i1 %cmp55, label %for.body, label %for.end
87
88 for.body: ; preds = %entry, %for.body
89 %i.056 = phi i32 [ %add27, %for.body ], [ %rem, %entry ]
90 %arrayidx = getelementptr inbounds float* %dy, i32 %i.056
91 %0 = load float* %arrayidx, align 4
92 %arrayidx1 = getelementptr inbounds float* %dx, i32 %i.056
93 %1 = load float* %arrayidx1, align 4
94 %mul = fmul float %1, %da
95 %add = fadd float %0, %mul
96 store float %add, float* %arrayidx, align 4
97 %add3 = add nsw i32 %i.056, 1
98 %arrayidx4 = getelementptr inbounds float* %dy, i32 %add3
99 %2 = load float* %arrayidx4, align 4
100 %arrayidx6 = getelementptr inbounds float* %dx, i32 %add3
101 %3 = load float* %arrayidx6, align 4
102 %mul7 = fmul float %3, %da
103 %add8 = fadd float %2, %mul7
104 store float %add8, float* %arrayidx4, align 4
105 %add11 = add nsw i32 %i.056, 2
106 %arrayidx12 = getelementptr inbounds float* %dy, i32 %add11
107 %4 = load float* %arrayidx12, align 4
108 %arrayidx14 = getelementptr inbounds float* %dx, i32 %add11
109 %5 = load float* %arrayidx14, align 4
110 %mul15 = fmul float %5, %da
111 %add16 = fadd float %4, %mul15
112 store float %add16, float* %arrayidx12, align 4
113 %add19 = add nsw i32 %i.056, 3
114 %arrayidx20 = getelementptr inbounds float* %dy, i32 %add19
115 %6 = load float* %arrayidx20, align 4
116 %arrayidx22 = getelementptr inbounds float* %dx, i32 %add19
117 %7 = load float* %arrayidx22, align 4
118 %mul23 = fmul float %7, %da
119 %add24 = fadd float %6, %mul23
120 store float %add24, float* %arrayidx20, align 4
121 %add27 = add nsw i32 %i.056, 4
122 %cmp = icmp slt i32 %add27, %n
123 br i1 %cmp, label %for.body, label %for.end
124
125 for.end: ; preds = %for.body, %entry
126 ret void
127 }
128
129 ; CHECK-LABEL: @daxpy_ur
130 ; CHECK: for.body.preheader:
131 ; CHECK: %0 = add i32 %n, -1
132 ; CHECK: %1 = sub i32 %0, %rem
133 ; CHECK: %2 = lshr i32 %1, 2
134 ; CHECK: %3 = mul i32 %2, 4
135 ; CHECK: %4 = add i32 %rem, %3
136 ; CHECK: %5 = add i32 %4, 3
137 ; CHECK: br label %for.body
138
139 ; CHECK: for.body:
140 ; CHECK: %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %for.body.preheader ]
141 ; CHECK: %6 = add i32 %rem, %indvar
142 ; CHECK: %arrayidx = getelementptr inbounds float* %dy, i32 %6
143 ; CHECK: %7 = load float* %arrayidx, align 4
144 ; CHECK: %arrayidx1 = getelementptr inbounds float* %dx, i32 %6
145 ; CHECK: %8 = load float* %arrayidx1, align 4
146 ; CHECK: %mul = fmul float %8, %da
147 ; CHECK: %add = fadd float %7, %mul
148 ; CHECK: store float %add, float* %arrayidx, align 4
149 ; CHECK: %indvar.next = add i32 %indvar, 1
150 ; CHECK: %exitcond = icmp eq i32 %6, %5
151 ; CHECK: br i1 %exitcond, label %for.end, label %for.body
3737 ; CHECK: %0 = load i32* %arrayidx, align 4
3838 ; CHECK: %add = add nsw i32 %0, %r.029
3939 ; CHECK: %indvar.next = add i64 %indvar, 1
40 ; CHECK: %exitcond = icmp eq i64 %indvar.next, 400
40 ; CHECK: %exitcond = icmp eq i64 %indvar, 399
4141 ; CHECK: br i1 %exitcond, label %for.end, label %for.body
4242
4343 ; CHECK: ret
8282 ; CHECK: %0 = load float* %arrayidx, align 4
8383 ; CHECK: %add = fadd float %0, %r.029
8484 ; CHECK: %indvar.next = add i64 %indvar, 1
85 ; CHECK: %exitcond = icmp eq i64 %indvar.next, 400
85 ; CHECK: %exitcond = icmp eq i64 %indvar, 399
8686 ; CHECK: br i1 %exitcond, label %for.end, label %for.body
8787
8888 ; CHECK: ret