llvm.org GIT mirror llvm / 8659296
Allow setting MaxRerollIterations above 16 By Ayal Zaks. Differential Revision http://reviews.llvm.org/D17258 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@261517 91177308-0d34-0410-b5e6-96231b3b80d8 Elena Demikhovsky 3 years ago
2 changed file(s) with 332 addition(s) and 5 deletion(s). Raw diff Collapse all Expand all
1313 #include "llvm/Transforms/Scalar.h"
1414 #include "llvm/ADT/MapVector.h"
1515 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SmallBitVector.h"
16 #include "llvm/ADT/BitVector.h"
1717 #include "llvm/ADT/SmallSet.h"
1818 #include "llvm/ADT/Statistic.h"
1919 #include "llvm/Analysis/AliasAnalysis.h"
127127
128128 namespace {
129129 enum IterationLimits {
130 /// The maximum number of iterations that we'll try and reroll. This
131 /// has to be less than 25 in order to fit into a SmallBitVector.
132 IL_MaxRerollIterations = 16,
130 /// The maximum number of iterations that we'll try and reroll.
131 IL_MaxRerollIterations = 32,
133132 /// The bitvector index used by loop induction variables and other
134133 /// instructions that belong to all iterations.
135134 IL_All,
364363 void replace(const SCEV *IterCount);
365364
366365 protected:
367 typedef MapVectorSmallBitVector> UsesTy;
366 typedef MapVectorBitVector> UsesTy;
368367
369368 bool findRootsRecursive(Instruction *IVU,
370369 SmallInstructionSet SubsumedInsts);
0 ; RUN: opt < %s -loop-reroll -S | FileCheck %s
1 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
2 target triple = "x86_64-unknown-linux-gnu"
3
4 ; void goo32(float alpha, float *a, float *b) {
5 ; for (int i = 0; i < 3200; i += 32) {
6 ; a[i] += alpha * b[i];
7 ; a[i + 1] += alpha * b[i + 1];
8 ; a[i + 2] += alpha * b[i + 2];
9 ; a[i + 3] += alpha * b[i + 3];
10 ; a[i + 4] += alpha * b[i + 4];
11 ; a[i + 5] += alpha * b[i + 5];
12 ; a[i + 6] += alpha * b[i + 6];
13 ; a[i + 7] += alpha * b[i + 7];
14 ; a[i + 8] += alpha * b[i + 8];
15 ; a[i + 9] += alpha * b[i + 9];
16 ; a[i + 10] += alpha * b[i + 10];
17 ; a[i + 11] += alpha * b[i + 11];
18 ; a[i + 12] += alpha * b[i + 12];
19 ; a[i + 13] += alpha * b[i + 13];
20 ; a[i + 14] += alpha * b[i + 14];
21 ; a[i + 15] += alpha * b[i + 15];
22 ; a[i + 16] += alpha * b[i + 16];
23 ; a[i + 17] += alpha * b[i + 17];
24 ; a[i + 18] += alpha * b[i + 18];
25 ; a[i + 19] += alpha * b[i + 19];
26 ; a[i + 20] += alpha * b[i + 20];
27 ; a[i + 21] += alpha * b[i + 21];
28 ; a[i + 22] += alpha * b[i + 22];
29 ; a[i + 23] += alpha * b[i + 23];
30 ; a[i + 24] += alpha * b[i + 24];
31 ; a[i + 25] += alpha * b[i + 25];
32 ; a[i + 26] += alpha * b[i + 26];
33 ; a[i + 27] += alpha * b[i + 27];
34 ; a[i + 28] += alpha * b[i + 28];
35 ; a[i + 29] += alpha * b[i + 29];
36 ; a[i + 30] += alpha * b[i + 30];
37 ; a[i + 31] += alpha * b[i + 31];
38 ; }
39 ; }
40
41 ; Function Attrs: norecurse nounwind uwtable
42 define void @goo32(float %alpha, float* %a, float* readonly %b) #0 {
43 entry:
44 br label %for.body
45
46 for.body: ; preds = %entry, %for.body
47 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
48 %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
49 %0 = load float, float* %arrayidx, align 4
50 %mul = fmul float %0, %alpha
51 %arrayidx2 = getelementptr inbounds float, float* %a, i64 %indvars.iv
52 %1 = load float, float* %arrayidx2, align 4
53 %add = fadd float %1, %mul
54 store float %add, float* %arrayidx2, align 4
55 %2 = or i64 %indvars.iv, 1
56 %arrayidx5 = getelementptr inbounds float, float* %b, i64 %2
57 %3 = load float, float* %arrayidx5, align 4
58 %mul6 = fmul float %3, %alpha
59 %arrayidx9 = getelementptr inbounds float, float* %a, i64 %2
60 %4 = load float, float* %arrayidx9, align 4
61 %add10 = fadd float %4, %mul6
62 store float %add10, float* %arrayidx9, align 4
63 %5 = or i64 %indvars.iv, 2
64 %arrayidx13 = getelementptr inbounds float, float* %b, i64 %5
65 %6 = load float, float* %arrayidx13, align 4
66 %mul14 = fmul float %6, %alpha
67 %arrayidx17 = getelementptr inbounds float, float* %a, i64 %5
68 %7 = load float, float* %arrayidx17, align 4
69 %add18 = fadd float %7, %mul14
70 store float %add18, float* %arrayidx17, align 4
71 %8 = or i64 %indvars.iv, 3
72 %arrayidx21 = getelementptr inbounds float, float* %b, i64 %8
73 %9 = load float, float* %arrayidx21, align 4
74 %mul22 = fmul float %9, %alpha
75 %arrayidx25 = getelementptr inbounds float, float* %a, i64 %8
76 %10 = load float, float* %arrayidx25, align 4
77 %add26 = fadd float %10, %mul22
78 store float %add26, float* %arrayidx25, align 4
79 %11 = or i64 %indvars.iv, 4
80 %arrayidx29 = getelementptr inbounds float, float* %b, i64 %11
81 %12 = load float, float* %arrayidx29, align 4
82 %mul30 = fmul float %12, %alpha
83 %arrayidx33 = getelementptr inbounds float, float* %a, i64 %11
84 %13 = load float, float* %arrayidx33, align 4
85 %add34 = fadd float %13, %mul30
86 store float %add34, float* %arrayidx33, align 4
87 %14 = or i64 %indvars.iv, 5
88 %arrayidx37 = getelementptr inbounds float, float* %b, i64 %14
89 %15 = load float, float* %arrayidx37, align 4
90 %mul38 = fmul float %15, %alpha
91 %arrayidx41 = getelementptr inbounds float, float* %a, i64 %14
92 %16 = load float, float* %arrayidx41, align 4
93 %add42 = fadd float %16, %mul38
94 store float %add42, float* %arrayidx41, align 4
95 %17 = or i64 %indvars.iv, 6
96 %arrayidx45 = getelementptr inbounds float, float* %b, i64 %17
97 %18 = load float, float* %arrayidx45, align 4
98 %mul46 = fmul float %18, %alpha
99 %arrayidx49 = getelementptr inbounds float, float* %a, i64 %17
100 %19 = load float, float* %arrayidx49, align 4
101 %add50 = fadd float %19, %mul46
102 store float %add50, float* %arrayidx49, align 4
103 %20 = or i64 %indvars.iv, 7
104 %arrayidx53 = getelementptr inbounds float, float* %b, i64 %20
105 %21 = load float, float* %arrayidx53, align 4
106 %mul54 = fmul float %21, %alpha
107 %arrayidx57 = getelementptr inbounds float, float* %a, i64 %20
108 %22 = load float, float* %arrayidx57, align 4
109 %add58 = fadd float %22, %mul54
110 store float %add58, float* %arrayidx57, align 4
111 %23 = or i64 %indvars.iv, 8
112 %arrayidx61 = getelementptr inbounds float, float* %b, i64 %23
113 %24 = load float, float* %arrayidx61, align 4
114 %mul62 = fmul float %24, %alpha
115 %arrayidx65 = getelementptr inbounds float, float* %a, i64 %23
116 %25 = load float, float* %arrayidx65, align 4
117 %add66 = fadd float %25, %mul62
118 store float %add66, float* %arrayidx65, align 4
119 %26 = or i64 %indvars.iv, 9
120 %arrayidx69 = getelementptr inbounds float, float* %b, i64 %26
121 %27 = load float, float* %arrayidx69, align 4
122 %mul70 = fmul float %27, %alpha
123 %arrayidx73 = getelementptr inbounds float, float* %a, i64 %26
124 %28 = load float, float* %arrayidx73, align 4
125 %add74 = fadd float %28, %mul70
126 store float %add74, float* %arrayidx73, align 4
127 %29 = or i64 %indvars.iv, 10
128 %arrayidx77 = getelementptr inbounds float, float* %b, i64 %29
129 %30 = load float, float* %arrayidx77, align 4
130 %mul78 = fmul float %30, %alpha
131 %arrayidx81 = getelementptr inbounds float, float* %a, i64 %29
132 %31 = load float, float* %arrayidx81, align 4
133 %add82 = fadd float %31, %mul78
134 store float %add82, float* %arrayidx81, align 4
135 %32 = or i64 %indvars.iv, 11
136 %arrayidx85 = getelementptr inbounds float, float* %b, i64 %32
137 %33 = load float, float* %arrayidx85, align 4
138 %mul86 = fmul float %33, %alpha
139 %arrayidx89 = getelementptr inbounds float, float* %a, i64 %32
140 %34 = load float, float* %arrayidx89, align 4
141 %add90 = fadd float %34, %mul86
142 store float %add90, float* %arrayidx89, align 4
143 %35 = or i64 %indvars.iv, 12
144 %arrayidx93 = getelementptr inbounds float, float* %b, i64 %35
145 %36 = load float, float* %arrayidx93, align 4
146 %mul94 = fmul float %36, %alpha
147 %arrayidx97 = getelementptr inbounds float, float* %a, i64 %35
148 %37 = load float, float* %arrayidx97, align 4
149 %add98 = fadd float %37, %mul94
150 store float %add98, float* %arrayidx97, align 4
151 %38 = or i64 %indvars.iv, 13
152 %arrayidx101 = getelementptr inbounds float, float* %b, i64 %38
153 %39 = load float, float* %arrayidx101, align 4
154 %mul102 = fmul float %39, %alpha
155 %arrayidx105 = getelementptr inbounds float, float* %a, i64 %38
156 %40 = load float, float* %arrayidx105, align 4
157 %add106 = fadd float %40, %mul102
158 store float %add106, float* %arrayidx105, align 4
159 %41 = or i64 %indvars.iv, 14
160 %arrayidx109 = getelementptr inbounds float, float* %b, i64 %41
161 %42 = load float, float* %arrayidx109, align 4
162 %mul110 = fmul float %42, %alpha
163 %arrayidx113 = getelementptr inbounds float, float* %a, i64 %41
164 %43 = load float, float* %arrayidx113, align 4
165 %add114 = fadd float %43, %mul110
166 store float %add114, float* %arrayidx113, align 4
167 %44 = or i64 %indvars.iv, 15
168 %arrayidx117 = getelementptr inbounds float, float* %b, i64 %44
169 %45 = load float, float* %arrayidx117, align 4
170 %mul118 = fmul float %45, %alpha
171 %arrayidx121 = getelementptr inbounds float, float* %a, i64 %44
172 %46 = load float, float* %arrayidx121, align 4
173 %add122 = fadd float %46, %mul118
174 store float %add122, float* %arrayidx121, align 4
175 %47 = or i64 %indvars.iv, 16
176 %arrayidx125 = getelementptr inbounds float, float* %b, i64 %47
177 %48 = load float, float* %arrayidx125, align 4
178 %mul126 = fmul float %48, %alpha
179 %arrayidx129 = getelementptr inbounds float, float* %a, i64 %47
180 %49 = load float, float* %arrayidx129, align 4
181 %add130 = fadd float %49, %mul126
182 store float %add130, float* %arrayidx129, align 4
183 %50 = or i64 %indvars.iv, 17
184 %arrayidx133 = getelementptr inbounds float, float* %b, i64 %50
185 %51 = load float, float* %arrayidx133, align 4
186 %mul134 = fmul float %51, %alpha
187 %arrayidx137 = getelementptr inbounds float, float* %a, i64 %50
188 %52 = load float, float* %arrayidx137, align 4
189 %add138 = fadd float %52, %mul134
190 store float %add138, float* %arrayidx137, align 4
191 %53 = or i64 %indvars.iv, 18
192 %arrayidx141 = getelementptr inbounds float, float* %b, i64 %53
193 %54 = load float, float* %arrayidx141, align 4
194 %mul142 = fmul float %54, %alpha
195 %arrayidx145 = getelementptr inbounds float, float* %a, i64 %53
196 %55 = load float, float* %arrayidx145, align 4
197 %add146 = fadd float %55, %mul142
198 store float %add146, float* %arrayidx145, align 4
199 %56 = or i64 %indvars.iv, 19
200 %arrayidx149 = getelementptr inbounds float, float* %b, i64 %56
201 %57 = load float, float* %arrayidx149, align 4
202 %mul150 = fmul float %57, %alpha
203 %arrayidx153 = getelementptr inbounds float, float* %a, i64 %56
204 %58 = load float, float* %arrayidx153, align 4
205 %add154 = fadd float %58, %mul150
206 store float %add154, float* %arrayidx153, align 4
207 %59 = or i64 %indvars.iv, 20
208 %arrayidx157 = getelementptr inbounds float, float* %b, i64 %59
209 %60 = load float, float* %arrayidx157, align 4
210 %mul158 = fmul float %60, %alpha
211 %arrayidx161 = getelementptr inbounds float, float* %a, i64 %59
212 %61 = load float, float* %arrayidx161, align 4
213 %add162 = fadd float %61, %mul158
214 store float %add162, float* %arrayidx161, align 4
215 %62 = or i64 %indvars.iv, 21
216 %arrayidx165 = getelementptr inbounds float, float* %b, i64 %62
217 %63 = load float, float* %arrayidx165, align 4
218 %mul166 = fmul float %63, %alpha
219 %arrayidx169 = getelementptr inbounds float, float* %a, i64 %62
220 %64 = load float, float* %arrayidx169, align 4
221 %add170 = fadd float %64, %mul166
222 store float %add170, float* %arrayidx169, align 4
223 %65 = or i64 %indvars.iv, 22
224 %arrayidx173 = getelementptr inbounds float, float* %b, i64 %65
225 %66 = load float, float* %arrayidx173, align 4
226 %mul174 = fmul float %66, %alpha
227 %arrayidx177 = getelementptr inbounds float, float* %a, i64 %65
228 %67 = load float, float* %arrayidx177, align 4
229 %add178 = fadd float %67, %mul174
230 store float %add178, float* %arrayidx177, align 4
231 %68 = or i64 %indvars.iv, 23
232 %arrayidx181 = getelementptr inbounds float, float* %b, i64 %68
233 %69 = load float, float* %arrayidx181, align 4
234 %mul182 = fmul float %69, %alpha
235 %arrayidx185 = getelementptr inbounds float, float* %a, i64 %68
236 %70 = load float, float* %arrayidx185, align 4
237 %add186 = fadd float %70, %mul182
238 store float %add186, float* %arrayidx185, align 4
239 %71 = or i64 %indvars.iv, 24
240 %arrayidx189 = getelementptr inbounds float, float* %b, i64 %71
241 %72 = load float, float* %arrayidx189, align 4
242 %mul190 = fmul float %72, %alpha
243 %arrayidx193 = getelementptr inbounds float, float* %a, i64 %71
244 %73 = load float, float* %arrayidx193, align 4
245 %add194 = fadd float %73, %mul190
246 store float %add194, float* %arrayidx193, align 4
247 %74 = or i64 %indvars.iv, 25
248 %arrayidx197 = getelementptr inbounds float, float* %b, i64 %74
249 %75 = load float, float* %arrayidx197, align 4
250 %mul198 = fmul float %75, %alpha
251 %arrayidx201 = getelementptr inbounds float, float* %a, i64 %74
252 %76 = load float, float* %arrayidx201, align 4
253 %add202 = fadd float %76, %mul198
254 store float %add202, float* %arrayidx201, align 4
255 %77 = or i64 %indvars.iv, 26
256 %arrayidx205 = getelementptr inbounds float, float* %b, i64 %77
257 %78 = load float, float* %arrayidx205, align 4
258 %mul206 = fmul float %78, %alpha
259 %arrayidx209 = getelementptr inbounds float, float* %a, i64 %77
260 %79 = load float, float* %arrayidx209, align 4
261 %add210 = fadd float %79, %mul206
262 store float %add210, float* %arrayidx209, align 4
263 %80 = or i64 %indvars.iv, 27
264 %arrayidx213 = getelementptr inbounds float, float* %b, i64 %80
265 %81 = load float, float* %arrayidx213, align 4
266 %mul214 = fmul float %81, %alpha
267 %arrayidx217 = getelementptr inbounds float, float* %a, i64 %80
268 %82 = load float, float* %arrayidx217, align 4
269 %add218 = fadd float %82, %mul214
270 store float %add218, float* %arrayidx217, align 4
271 %83 = or i64 %indvars.iv, 28
272 %arrayidx221 = getelementptr inbounds float, float* %b, i64 %83
273 %84 = load float, float* %arrayidx221, align 4
274 %mul222 = fmul float %84, %alpha
275 %arrayidx225 = getelementptr inbounds float, float* %a, i64 %83
276 %85 = load float, float* %arrayidx225, align 4
277 %add226 = fadd float %85, %mul222
278 store float %add226, float* %arrayidx225, align 4
279 %86 = or i64 %indvars.iv, 29
280 %arrayidx229 = getelementptr inbounds float, float* %b, i64 %86
281 %87 = load float, float* %arrayidx229, align 4
282 %mul230 = fmul float %87, %alpha
283 %arrayidx233 = getelementptr inbounds float, float* %a, i64 %86
284 %88 = load float, float* %arrayidx233, align 4
285 %add234 = fadd float %88, %mul230
286 store float %add234, float* %arrayidx233, align 4
287 %89 = or i64 %indvars.iv, 30
288 %arrayidx237 = getelementptr inbounds float, float* %b, i64 %89
289 %90 = load float, float* %arrayidx237, align 4
290 %mul238 = fmul float %90, %alpha
291 %arrayidx241 = getelementptr inbounds float, float* %a, i64 %89
292 %91 = load float, float* %arrayidx241, align 4
293 %add242 = fadd float %91, %mul238
294 store float %add242, float* %arrayidx241, align 4
295 %92 = or i64 %indvars.iv, 31
296 %arrayidx245 = getelementptr inbounds float, float* %b, i64 %92
297 %93 = load float, float* %arrayidx245, align 4
298 %mul246 = fmul float %93, %alpha
299 %arrayidx249 = getelementptr inbounds float, float* %a, i64 %92
300 %94 = load float, float* %arrayidx249, align 4
301 %add250 = fadd float %94, %mul246
302 store float %add250, float* %arrayidx249, align 4
303 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 32
304 %cmp = icmp slt i64 %indvars.iv.next, 3200
305 br i1 %cmp, label %for.body, label %for.end
306
307 ; CHECK-LABEL: @goo32
308
309 ; CHECK: for.body:
310 ; CHECK: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ]
311 ; CHECK: %arrayidx = getelementptr inbounds float, float* %b, i64 %indvar
312 ; CHECK: %0 = load float, float* %arrayidx, align 4
313 ; CHECK: %mul = fmul float %0, %alpha
314 ; CHECK: %arrayidx2 = getelementptr inbounds float, float* %a, i64 %indvar
315 ; CHECK: %1 = load float, float* %arrayidx2, align 4
316 ; CHECK: %add = fadd float %1, %mul
317 ; CHECK: store float %add, float* %arrayidx2, align 4
318 ; CHECK: %indvar.next = add i64 %indvar, 1
319 ; CHECK: %exitcond = icmp eq i64 %indvar, 3199
320 ; CHECK: br i1 %exitcond, label %for.end, label %for.body
321 ; CHECK: ret
322
323 for.end: ; preds = %for.body
324 ret void
325 }
326
327 attributes #0 = { nounwind uwtable }