llvm.org GIT mirror llvm / 6f9eeb2
[DA] Enable -da-delinearize by default This enables da-delinearize in Dependence Analysis for delinearizing array accesses into multiple dimensions. This can help to increase the power of Dependence analysis on multi-dimensional arrays and prevent having to fall back to the slower and less accurate MIV tests. It adds static checks on the bounds of the arrays to ensure that one dimension doesn't overflow into another, and brings our code in line with our tests. Differential Revision: https://reviews.llvm.org/D45872 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@335217 91177308-0d34-0410-b5e6-96231b3b80d8 David Green 2 years ago
17 changed file(s) with 653 addition(s) and 55 deletion(s). Raw diff Collapse all Expand all
556556 const SCEV *X,
557557 const SCEV *Y) const;
558558
559 /// isKnownLessThan - Compare to see if S is less than Size
560 /// Another wrapper for isKnownNegative(S - max(Size, 1)) with some extra
561 /// checking if S is an AddRec and we can prove lessthan using the loop
562 /// bounds.
563 bool isKnownLessThan(const SCEV *S, const SCEV *Size) const;
564
559565 /// collectUpperBound - All subscripts are the same type (on my machine,
560566 /// an i64). The loop bound may be a smaller type. collectUpperBound
561567 /// find the bound, if available, and zero extends it to the Type T.
107107 STATISTIC(BanerjeeSuccesses, "Banerjee successes");
108108
109109 static cl::opt
110 Delinearize("da-delinearize", cl::init(false), cl::Hidden, cl::ZeroOrMore,
111 cl::desc("Try to delinearize array references."));
110 Delinearize("da-delinearize", cl::init(true), cl::Hidden, cl::ZeroOrMore,
111 cl::desc("Try to delinearize array references."));
112112
113113 //===----------------------------------------------------------------------===//
114114 // basics
993993 }
994994 }
995995
996 /// Compare to see if S is less than Size, using isKnownNegative(S - max(Size, 1))
997 /// with some extra checking if S is an AddRec and we can prove less-than using
998 /// the loop bounds.
999 bool DependenceInfo::isKnownLessThan(const SCEV *S, const SCEV *Size) const {
1000 // First unify to the same type
1001 auto *SType = dyn_cast(S->getType());
1002 auto *SizeType = dyn_cast(Size->getType());
1003 if (!SType || !SizeType)
1004 return false;
1005 Type *MaxType =
1006 (SType->getBitWidth() >= SizeType->getBitWidth()) ? SType : SizeType;
1007 S = SE->getTruncateOrZeroExtend(S, MaxType);
1008 Size = SE->getTruncateOrZeroExtend(Size, MaxType);
1009
1010 // Special check for addrecs using BE taken count
1011 const SCEV *Bound = SE->getMinusSCEV(S, Size);
1012 if (const SCEVAddRecExpr *AddRec = dyn_cast(Bound)) {
1013 if (AddRec->isAffine()) {
1014 const SCEV *BECount = SE->getBackedgeTakenCount(AddRec->getLoop());
1015 if (!isa(BECount)) {
1016 const SCEV *Limit = AddRec->evaluateAtIteration(BECount, *SE);
1017 if (SE->isKnownNegative(Limit))
1018 return true;
1019 }
1020 }
1021 }
1022
1023 // Check using normal isKnownNegative
1024 const SCEV *LimitedBound =
1025 SE->getMinusSCEV(S, SE->getSMaxExpr(Size, SE->getOne(Size->getType())));
1026 return SE->isKnownNegative(LimitedBound);
1027 }
9961028
9971029 // All subscripts are all the same type.
9981030 // Loop bound may be smaller (e.g., a char).
32523284
32533285 int size = SrcSubscripts.size();
32543286
3287 // Statically check that the array bounds are in-range. The first subscript we
3288 // don't have a size for and it cannot overflow into another subscript, so is
3289 // always safe. The others need to be 0 <= subscript[i] < bound, for both src
3290 // and dst.
3291 // FIXME: It may be better to record these sizes and add them as constraints
3292 // to the dependency checks.
3293 for (int i = 1; i < size; ++i) {
3294 if (!SE->isKnownNonNegative(SrcSubscripts[i]))
3295 return false;
3296
3297 if (!isKnownLessThan(SrcSubscripts[i], Sizes[i - 1]))
3298 return false;
3299
3300 if (!SE->isKnownNonNegative(DstSubscripts[i]))
3301 return false;
3302
3303 if (!isKnownLessThan(DstSubscripts[i], Sizes[i - 1]))
3304 return false;
3305 }
3306
32553307 LLVM_DEBUG({
32563308 dbgs() << "\nSrcSubscripts: ";
32573309 for (int i = 0; i < size; i++)
32703322 Pair[i].Src = SrcSubscripts[i];
32713323 Pair[i].Dst = DstSubscripts[i];
32723324 unifySubscriptType(&Pair[i]);
3273
3274 // FIXME: we should record the bounds SrcSizes[i] and DstSizes[i] that the
3275 // delinearization has found, and add these constraints to the dependence
3276 // check to avoid memory accesses overflow from one dimension into another.
3277 // This is related to the problem of determining the existence of data
3278 // dependences in array accesses using a different number of subscripts: in
3279 // C one can access an array A[100][100]; as A[0][9999], *A[9999], etc.
32803325 }
32813326
32823327 return true;
None ; RUN: opt -basicaa -da -analyze -da-delinearize < %s
0 ; RUN: opt -basicaa -da -analyze < %s
11 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
22 target triple = "x86_64-unknown-linux-gnu"
33
0 ; RUN: opt < %s -analyze -basicaa -da -da-delinearize=false | FileCheck %s
1 ; RUN: opt < %s -analyze -basicaa -da -da-delinearize | FileCheck %s -check-prefix=DELIN
2
3 ; ModuleID = 'Banerjee.bc'
1 ; RUN: opt < %s -analyze -basicaa -da | FileCheck %s -check-prefix=DELIN
2
43 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
54 target triple = "x86_64-apple-macosx10.6.0"
65
None ; RUN: opt < %s -analyze -basicaa -da -da-delinearize | FileCheck %s
1
2 ; ModuleID = 'Coupled.bc'
0 ; RUN: opt < %s -analyze -basicaa -da | FileCheck %s
1
32 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
43 target triple = "x86_64-apple-macosx10.6.0"
54
0 ; RUN: opt < %s -analyze -basicaa -da | FileCheck %s
1
2 target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
3 target triple = "thumbv8m.main-arm-none-eabi"
4
5 ; CHECK-LABEL: t1
6 ;; for (int i = 0; i < n; i++)
7 ;; for (int j = 0; j < m; j++)
8 ;; for (int k = 0; k < o; k++)
9 ;; = A[i*m*o + j*o + k]
10 ;; A[i*m*o + j*o + k] =
11 define void @t1(i32 %n, i32 %m, i32 %o, i32* nocapture %A) {
12 ; CHECK: da analyze - none!
13 ; CHECK: da analyze - consistent anti [0 0 0|<]!
14 ; CHECK: da analyze - none!
15 entry:
16 %cmp49 = icmp sgt i32 %n, 0
17 br i1 %cmp49, label %for.cond1.preheader.lr.ph, label %for.cond.cleanup
18
19 for.cond1.preheader.lr.ph: ; preds = %entry
20 %cmp247 = icmp sgt i32 %m, 0
21 %cmp645 = icmp sgt i32 %o, 0
22 br label %for.cond1.preheader
23
24 for.cond1.preheader: ; preds = %for.cond.cleanup3, %for.cond1.preheader.lr.ph
25 %i.050 = phi i32 [ 0, %for.cond1.preheader.lr.ph ], [ %inc23, %for.cond.cleanup3 ]
26 br i1 %cmp247, label %for.cond5.preheader.lr.ph, label %for.cond.cleanup3
27
28 for.cond5.preheader.lr.ph: ; preds = %for.cond1.preheader
29 %mul = mul nsw i32 %i.050, %m
30 br label %for.cond5.preheader
31
32 for.cond5.preheader: ; preds = %for.cond.cleanup7, %for.cond5.preheader.lr.ph
33 %j.048 = phi i32 [ 0, %for.cond5.preheader.lr.ph ], [ %inc20, %for.cond.cleanup7 ]
34 br i1 %cmp645, label %for.body8.lr.ph, label %for.cond.cleanup7
35
36 for.body8.lr.ph: ; preds = %for.cond5.preheader
37 %mul944 = add i32 %j.048, %mul
38 %add = mul i32 %mul944, %o
39 br label %for.body8
40
41 for.body8: ; preds = %for.body8, %for.body8.lr.ph
42 %k.046 = phi i32 [ 0, %for.body8.lr.ph ], [ %inc, %for.body8 ]
43 %add11 = add nsw i32 %k.046, %add
44 %arrayidx = getelementptr inbounds i32, i32* %A, i32 %add11
45 %0 = load i32, i32* %arrayidx, align 4
46 %add12 = add nsw i32 %0, 1
47 store i32 %add12, i32* %arrayidx, align 4
48 %inc = add nuw nsw i32 %k.046, 1
49 %exitcond = icmp eq i32 %inc, %o
50 br i1 %exitcond, label %for.cond.cleanup7, label %for.body8
51
52 for.cond.cleanup7: ; preds = %for.body8, %for.cond5.preheader
53 %inc20 = add nuw nsw i32 %j.048, 1
54 %exitcond51 = icmp eq i32 %inc20, %m
55 br i1 %exitcond51, label %for.cond.cleanup3, label %for.cond5.preheader
56
57 for.cond.cleanup3: ; preds = %for.cond.cleanup7, %for.cond1.preheader
58 %inc23 = add nuw nsw i32 %i.050, 1
59 %exitcond52 = icmp eq i32 %inc23, %n
60 br i1 %exitcond52, label %for.cond.cleanup, label %for.cond1.preheader
61
62 for.cond.cleanup: ; preds = %for.cond.cleanup3, %entry
63 ret void
64 }
65
66 ; CHECK-LABEL: t2
67 ;; for (int i = 0; i < n; i++)
68 ;; for (int j = 0; j < m; j++)
69 ;; for (int k = 0; k < o; k++)
70 ;; = A[i*m*o + j*o + k]
71 ;; A[i*m*o + j*o + k + 1] =
72 define void @t2(i32 %n, i32 %m, i32 %o, i32* nocapture %A) {
73 ; CHECK: da analyze - none!
74 ; CHECK: da analyze - anti [* * *|<]!
75 ; CHECK: da analyze - output [* * *]!
76 entry:
77 %cmp49 = icmp sgt i32 %n, 0
78 br i1 %cmp49, label %for.cond1.preheader.lr.ph, label %for.cond.cleanup
79
80 for.cond1.preheader.lr.ph: ; preds = %entry
81 %cmp247 = icmp sgt i32 %m, 0
82 %cmp645 = icmp sgt i32 %o, 0
83 br label %for.cond1.preheader
84
85 for.cond1.preheader: ; preds = %for.cond.cleanup3, %for.cond1.preheader.lr.ph
86 %i.050 = phi i32 [ 0, %for.cond1.preheader.lr.ph ], [ %inc23, %for.cond.cleanup3 ]
87 br i1 %cmp247, label %for.cond5.preheader.lr.ph, label %for.cond.cleanup3
88
89 for.cond5.preheader.lr.ph: ; preds = %for.cond1.preheader
90 %mul = mul nsw i32 %i.050, %m
91 br label %for.cond5.preheader
92
93 for.cond5.preheader: ; preds = %for.cond.cleanup7, %for.cond5.preheader.lr.ph
94 %j.048 = phi i32 [ 0, %for.cond5.preheader.lr.ph ], [ %inc20, %for.cond.cleanup7 ]
95 br i1 %cmp645, label %for.body8.lr.ph, label %for.cond.cleanup7
96
97 for.body8.lr.ph: ; preds = %for.cond5.preheader
98 %mul944 = add i32 %j.048, %mul
99 %add = mul i32 %mul944, %o
100 br label %for.body8
101
102 for.body8: ; preds = %for.body8, %for.body8.lr.ph
103 %k.046 = phi i32 [ 0, %for.body8.lr.ph ], [ %inc, %for.body8 ]
104 %add11 = add nsw i32 %k.046, %add
105 %arrayidx = getelementptr inbounds i32, i32* %A, i32 %add11
106 %0 = load i32, i32* %arrayidx, align 4
107 %add12 = add nsw i32 %0, 1
108 %add111 = add nsw i32 %add11, 1
109 %arrayidx2 = getelementptr inbounds i32, i32* %A, i32 %add111
110 store i32 %add12, i32* %arrayidx2, align 4
111 %inc = add nuw nsw i32 %k.046, 1
112 %exitcond = icmp eq i32 %inc, %o
113 br i1 %exitcond, label %for.cond.cleanup7, label %for.body8
114
115 for.cond.cleanup7: ; preds = %for.body8, %for.cond5.preheader
116 %inc20 = add nuw nsw i32 %j.048, 1
117 %exitcond51 = icmp eq i32 %inc20, %m
118 br i1 %exitcond51, label %for.cond.cleanup3, label %for.cond5.preheader
119
120 for.cond.cleanup3: ; preds = %for.cond.cleanup7, %for.cond1.preheader
121 %inc23 = add nuw nsw i32 %i.050, 1
122 %exitcond52 = icmp eq i32 %inc23, %n
123 br i1 %exitcond52, label %for.cond.cleanup, label %for.cond1.preheader
124
125 for.cond.cleanup: ; preds = %for.cond.cleanup3, %entry
126 ret void
127 }
128
129 ; CHECK-LABEL: t3
130 ;; for (int i = 0; i < n; i++)
131 ;; for (int j = 0; j < m; j++)
132 ;; for (int k = 0; k < o; k++)
133 ;; = A[i*m*o + j*o + k]
134 ;; A[i*m*o + j*o + k - 1] =
135 define void @t3(i32 %n, i32 %m, i32 %o, i32* nocapture %A) {
136 ; CHECK: da analyze - none!
137 ; CHECK: da analyze - anti [* * *|<]!
138 ; CHECK: da analyze - output [* * *]!
139 entry:
140 %cmp49 = icmp sgt i32 %n, 0
141 br i1 %cmp49, label %for.cond1.preheader.lr.ph, label %for.cond.cleanup
142
143 for.cond1.preheader.lr.ph: ; preds = %entry
144 %cmp247 = icmp sgt i32 %m, 0
145 %cmp645 = icmp sgt i32 %o, 0
146 br label %for.cond1.preheader
147
148 for.cond1.preheader: ; preds = %for.cond.cleanup3, %for.cond1.preheader.lr.ph
149 %i.050 = phi i32 [ 0, %for.cond1.preheader.lr.ph ], [ %inc23, %for.cond.cleanup3 ]
150 br i1 %cmp247, label %for.cond5.preheader.lr.ph, label %for.cond.cleanup3
151
152 for.cond5.preheader.lr.ph: ; preds = %for.cond1.preheader
153 %mul = mul nsw i32 %i.050, %m
154 br label %for.cond5.preheader
155
156 for.cond5.preheader: ; preds = %for.cond.cleanup7, %for.cond5.preheader.lr.ph
157 %j.048 = phi i32 [ 0, %for.cond5.preheader.lr.ph ], [ %inc20, %for.cond.cleanup7 ]
158 br i1 %cmp645, label %for.body8.lr.ph, label %for.cond.cleanup7
159
160 for.body8.lr.ph: ; preds = %for.cond5.preheader
161 %mul944 = add i32 %j.048, %mul
162 %add = mul i32 %mul944, %o
163 br label %for.body8
164
165 for.body8: ; preds = %for.body8, %for.body8.lr.ph
166 %k.046 = phi i32 [ 0, %for.body8.lr.ph ], [ %inc, %for.body8 ]
167 %add11 = add nsw i32 %k.046, %add
168 %arrayidx = getelementptr inbounds i32, i32* %A, i32 %add11
169 %0 = load i32, i32* %arrayidx, align 4
170 %add12 = add nsw i32 %0, 1
171 %add111 = sub nsw i32 %add11, 1
172 %arrayidx2 = getelementptr inbounds i32, i32* %A, i32 %add111
173 store i32 %add12, i32* %arrayidx2, align 4
174 %inc = add nuw nsw i32 %k.046, 1
175 %exitcond = icmp eq i32 %inc, %o
176 br i1 %exitcond, label %for.cond.cleanup7, label %for.body8
177
178 for.cond.cleanup7: ; preds = %for.body8, %for.cond5.preheader
179 %inc20 = add nuw nsw i32 %j.048, 1
180 %exitcond51 = icmp eq i32 %inc20, %m
181 br i1 %exitcond51, label %for.cond.cleanup3, label %for.cond5.preheader
182
183 for.cond.cleanup3: ; preds = %for.cond.cleanup7, %for.cond1.preheader
184 %inc23 = add nuw nsw i32 %i.050, 1
185 %exitcond52 = icmp eq i32 %inc23, %n
186 br i1 %exitcond52, label %for.cond.cleanup, label %for.cond1.preheader
187
188 for.cond.cleanup: ; preds = %for.cond.cleanup3, %entry
189 ret void
190 }
191
192 ; CHECK-LABEL: t4
193 ;; for (int i = 0; i < n; i++)
194 ;; for (int j = 0; j < m; j++)
195 ;; for (int k = 0; k < o; k++)
196 ;; = A[i*m*o + j*o + k]
197 ;; A[i*m*o + j*o + k + o] =
198 define void @t4(i32 %n, i32 %m, i32 %o, i32* nocapture %A) {
199 ; CHECK: da analyze - none!
200 ; CHECK: da analyze - anti [* * *|<]!
201 ; CHECK: da analyze - output [* * *]!
202 entry:
203 %cmp49 = icmp sgt i32 %n, 0
204 br i1 %cmp49, label %for.cond1.preheader.lr.ph, label %for.cond.cleanup
205
206 for.cond1.preheader.lr.ph: ; preds = %entry
207 %cmp247 = icmp sgt i32 %m, 0
208 %cmp645 = icmp sgt i32 %o, 0
209 br label %for.cond1.preheader
210
211 for.cond1.preheader: ; preds = %for.cond.cleanup3, %for.cond1.preheader.lr.ph
212 %i.050 = phi i32 [ 0, %for.cond1.preheader.lr.ph ], [ %inc23, %for.cond.cleanup3 ]
213 br i1 %cmp247, label %for.cond5.preheader.lr.ph, label %for.cond.cleanup3
214
215 for.cond5.preheader.lr.ph: ; preds = %for.cond1.preheader
216 %mul = mul nsw i32 %i.050, %m
217 br label %for.cond5.preheader
218
219 for.cond5.preheader: ; preds = %for.cond.cleanup7, %for.cond5.preheader.lr.ph
220 %j.048 = phi i32 [ 0, %for.cond5.preheader.lr.ph ], [ %inc20, %for.cond.cleanup7 ]
221 br i1 %cmp645, label %for.body8.lr.ph, label %for.cond.cleanup7
222
223 for.body8.lr.ph: ; preds = %for.cond5.preheader
224 %mul944 = add i32 %j.048, %mul
225 %add = mul i32 %mul944, %o
226 br label %for.body8
227
228 for.body8: ; preds = %for.body8, %for.body8.lr.ph
229 %k.046 = phi i32 [ 0, %for.body8.lr.ph ], [ %inc, %for.body8 ]
230 %add11 = add nsw i32 %k.046, %add
231 %arrayidx = getelementptr inbounds i32, i32* %A, i32 %add11
232 %0 = load i32, i32* %arrayidx, align 4
233 %add12 = add nsw i32 %0, 1
234 %add111 = add nsw i32 %add11, %o
235 %arrayidx2 = getelementptr inbounds i32, i32* %A, i32 %add111
236 store i32 %add12, i32* %arrayidx2, align 4
237 %inc = add nuw nsw i32 %k.046, 1
238 %exitcond = icmp eq i32 %inc, %o
239 br i1 %exitcond, label %for.cond.cleanup7, label %for.body8
240
241 for.cond.cleanup7: ; preds = %for.body8, %for.cond5.preheader
242 %inc20 = add nuw nsw i32 %j.048, 1
243 %exitcond51 = icmp eq i32 %inc20, %m
244 br i1 %exitcond51, label %for.cond.cleanup3, label %for.cond5.preheader
245
246 for.cond.cleanup3: ; preds = %for.cond.cleanup7, %for.cond1.preheader
247 %inc23 = add nuw nsw i32 %i.050, 1
248 %exitcond52 = icmp eq i32 %inc23, %n
249 br i1 %exitcond52, label %for.cond.cleanup, label %for.cond1.preheader
250
251 for.cond.cleanup: ; preds = %for.cond.cleanup3, %entry
252 ret void
253 }
254
255 ; CHECK-LABEL: t5
256 ;; for (int i = 0; i < n; i++)
257 ;; for (int j = 0; j < m; j++)
258 ;; for (int k = 0; k < o; k++)
259 ;; = A[i*m*o + j*o + k]
260 ;; A[i*m*o + j*o + k - o] =
261 define void @t5(i32 %n, i32 %m, i32 %o, i32* nocapture %A) {
262 ; CHECK: da analyze - none!
263 ; CHECK: da analyze - anti [* * *|<]!
264 ; CHECK: da analyze - output [* * *]!
265 entry:
266 %cmp49 = icmp sgt i32 %n, 0
267 br i1 %cmp49, label %for.cond1.preheader.lr.ph, label %for.cond.cleanup
268
269 for.cond1.preheader.lr.ph: ; preds = %entry
270 %cmp247 = icmp sgt i32 %m, 0
271 %cmp645 = icmp sgt i32 %o, 0
272 br label %for.cond1.preheader
273
274 for.cond1.preheader: ; preds = %for.cond.cleanup3, %for.cond1.preheader.lr.ph
275 %i.050 = phi i32 [ 0, %for.cond1.preheader.lr.ph ], [ %inc23, %for.cond.cleanup3 ]
276 br i1 %cmp247, label %for.cond5.preheader.lr.ph, label %for.cond.cleanup3
277
278 for.cond5.preheader.lr.ph: ; preds = %for.cond1.preheader
279 %mul = mul nsw i32 %i.050, %m
280 br label %for.cond5.preheader
281
282 for.cond5.preheader: ; preds = %for.cond.cleanup7, %for.cond5.preheader.lr.ph
283 %j.048 = phi i32 [ 0, %for.cond5.preheader.lr.ph ], [ %inc20, %for.cond.cleanup7 ]
284 br i1 %cmp645, label %for.body8.lr.ph, label %for.cond.cleanup7
285
286 for.body8.lr.ph: ; preds = %for.cond5.preheader
287 %mul944 = add i32 %j.048, %mul
288 %add = mul i32 %mul944, %o
289 br label %for.body8
290
291 for.body8: ; preds = %for.body8, %for.body8.lr.ph
292 %k.046 = phi i32 [ 0, %for.body8.lr.ph ], [ %inc, %for.body8 ]
293 %add11 = add nsw i32 %k.046, %add
294 %arrayidx = getelementptr inbounds i32, i32* %A, i32 %add11
295 %0 = load i32, i32* %arrayidx, align 4
296 %add12 = add nsw i32 %0, 1
297 %add111 = sub nsw i32 %add11, %o
298 %arrayidx2 = getelementptr inbounds i32, i32* %A, i32 %add111
299 store i32 %add12, i32* %arrayidx2, align 4
300 %inc = add nuw nsw i32 %k.046, 1
301 %exitcond = icmp eq i32 %inc, %o
302 br i1 %exitcond, label %for.cond.cleanup7, label %for.body8
303
304 for.cond.cleanup7: ; preds = %for.body8, %for.cond5.preheader
305 %inc20 = add nuw nsw i32 %j.048, 1
306 %exitcond51 = icmp eq i32 %inc20, %m
307 br i1 %exitcond51, label %for.cond.cleanup3, label %for.cond5.preheader
308
309 for.cond.cleanup3: ; preds = %for.cond.cleanup7, %for.cond1.preheader
310 %inc23 = add nuw nsw i32 %i.050, 1
311 %exitcond52 = icmp eq i32 %inc23, %n
312 br i1 %exitcond52, label %for.cond.cleanup, label %for.cond1.preheader
313
314 for.cond.cleanup: ; preds = %for.cond.cleanup3, %entry
315 ret void
316 }
317
318 ; CHECK-LABEL: t6
319 ;; for (int i = 0; i < n; i++)
320 ;; for (int j = 0; j < m; j++)
321 ;; for (int k = 0; k < o; k++)
322 ;; = A[i*m*o + j*o + k]
323 ;; A[i*m*o + j*o + k + m*o] =
324 define void @t6(i32 %n, i32 %m, i32 %o, i32* nocapture %A) {
325 ; CHECK: da analyze - none!
326 ; CHECK: da analyze - consistent anti [-1 0 0]!
327 ; CHECK: da analyze - none!
328 entry:
329 %cmp49 = icmp sgt i32 %n, 0
330 br i1 %cmp49, label %for.cond1.preheader.lr.ph, label %for.cond.cleanup
331
332 for.cond1.preheader.lr.ph: ; preds = %entry
333 %cmp247 = icmp sgt i32 %m, 0
334 %cmp645 = icmp sgt i32 %o, 0
335 br label %for.cond1.preheader
336
337 for.cond1.preheader: ; preds = %for.cond.cleanup3, %for.cond1.preheader.lr.ph
338 %i.050 = phi i32 [ 0, %for.cond1.preheader.lr.ph ], [ %inc23, %for.cond.cleanup3 ]
339 br i1 %cmp247, label %for.cond5.preheader.lr.ph, label %for.cond.cleanup3
340
341 for.cond5.preheader.lr.ph: ; preds = %for.cond1.preheader
342 %mul = mul nsw i32 %i.050, %m
343 br label %for.cond5.preheader
344
345 for.cond5.preheader: ; preds = %for.cond.cleanup7, %for.cond5.preheader.lr.ph
346 %j.048 = phi i32 [ 0, %for.cond5.preheader.lr.ph ], [ %inc20, %for.cond.cleanup7 ]
347 br i1 %cmp645, label %for.body8.lr.ph, label %for.cond.cleanup7
348
349 for.body8.lr.ph: ; preds = %for.cond5.preheader
350 %mul944 = add i32 %j.048, %mul
351 %add = mul i32 %mul944, %o
352 br label %for.body8
353
354 for.body8: ; preds = %for.body8, %for.body8.lr.ph
355 %k.046 = phi i32 [ 0, %for.body8.lr.ph ], [ %inc, %for.body8 ]
356 %add11 = add nsw i32 %k.046, %add
357 %arrayidx = getelementptr inbounds i32, i32* %A, i32 %add11
358 %0 = load i32, i32* %arrayidx, align 4
359 %add12 = add nsw i32 %0, 1
360 %mo = mul i32 %m, %o
361 %add111 = add nsw i32 %add11, %mo
362 %arrayidx2 = getelementptr inbounds i32, i32* %A, i32 %add111
363 store i32 %add12, i32* %arrayidx2, align 4
364 %inc = add nuw nsw i32 %k.046, 1
365 %exitcond = icmp eq i32 %inc, %o
366 br i1 %exitcond, label %for.cond.cleanup7, label %for.body8
367
368 for.cond.cleanup7: ; preds = %for.body8, %for.cond5.preheader
369 %inc20 = add nuw nsw i32 %j.048, 1
370 %exitcond51 = icmp eq i32 %inc20, %m
371 br i1 %exitcond51, label %for.cond.cleanup3, label %for.cond5.preheader
372
373 for.cond.cleanup3: ; preds = %for.cond.cleanup7, %for.cond1.preheader
374 %inc23 = add nuw nsw i32 %i.050, 1
375 %exitcond52 = icmp eq i32 %inc23, %n
376 br i1 %exitcond52, label %for.cond.cleanup, label %for.cond1.preheader
377
378 for.cond.cleanup: ; preds = %for.cond.cleanup3, %entry
379 ret void
380 }
381
382 ; CHECK-LABEL: t7
383 ;; for (int i = 0; i < n; i++)
384 ;; for (int j = 0; j < m; j++)
385 ;; for (int k = 0; k < o; k++)
386 ;; = A[i*m*o + j*o + k]
387 ;; A[i*m*o + j*o + k - m*o] =
388 define void @t7(i32 %n, i32 %m, i32 %o, i32* nocapture %A) {
389 ; CHECK: da analyze - none!
390 ; CHECK: da analyze - consistent anti [1 0 0]!
391 ; CHECK: da analyze - none!
392 entry:
393 %cmp49 = icmp sgt i32 %n, 0
394 br i1 %cmp49, label %for.cond1.preheader.lr.ph, label %for.cond.cleanup
395
396 for.cond1.preheader.lr.ph: ; preds = %entry
397 %cmp247 = icmp sgt i32 %m, 0
398 %cmp645 = icmp sgt i32 %o, 0
399 br label %for.cond1.preheader
400
401 for.cond1.preheader: ; preds = %for.cond.cleanup3, %for.cond1.preheader.lr.ph
402 %i.050 = phi i32 [ 0, %for.cond1.preheader.lr.ph ], [ %inc23, %for.cond.cleanup3 ]
403 br i1 %cmp247, label %for.cond5.preheader.lr.ph, label %for.cond.cleanup3
404
405 for.cond5.preheader.lr.ph: ; preds = %for.cond1.preheader
406 %mul = mul nsw i32 %i.050, %m
407 br label %for.cond5.preheader
408
409 for.cond5.preheader: ; preds = %for.cond.cleanup7, %for.cond5.preheader.lr.ph
410 %j.048 = phi i32 [ 0, %for.cond5.preheader.lr.ph ], [ %inc20, %for.cond.cleanup7 ]
411 br i1 %cmp645, label %for.body8.lr.ph, label %for.cond.cleanup7
412
413 for.body8.lr.ph: ; preds = %for.cond5.preheader
414 %mul944 = add i32 %j.048, %mul
415 %add = mul i32 %mul944, %o
416 br label %for.body8
417
418 for.body8: ; preds = %for.body8, %for.body8.lr.ph
419 %k.046 = phi i32 [ 0, %for.body8.lr.ph ], [ %inc, %for.body8 ]
420 %add11 = add nsw i32 %k.046, %add
421 %arrayidx = getelementptr inbounds i32, i32* %A, i32 %add11
422 %0 = load i32, i32* %arrayidx, align 4
423 %add12 = add nsw i32 %0, 1
424 %mo = mul i32 %m, %o
425 %add111 = sub nsw i32 %add11, %mo
426 %arrayidx2 = getelementptr inbounds i32, i32* %A, i32 %add111
427 store i32 %add12, i32* %arrayidx2, align 4
428 %inc = add nuw nsw i32 %k.046, 1
429 %exitcond = icmp eq i32 %inc, %o
430 br i1 %exitcond, label %for.cond.cleanup7, label %for.body8
431
432 for.cond.cleanup7: ; preds = %for.body8, %for.cond5.preheader
433 %inc20 = add nuw nsw i32 %j.048, 1
434 %exitcond51 = icmp eq i32 %inc20, %m
435 br i1 %exitcond51, label %for.cond.cleanup3, label %for.cond5.preheader
436
437 for.cond.cleanup3: ; preds = %for.cond.cleanup7, %for.cond1.preheader
438 %inc23 = add nuw nsw i32 %i.050, 1
439 %exitcond52 = icmp eq i32 %inc23, %n
440 br i1 %exitcond52, label %for.cond.cleanup, label %for.cond1.preheader
441
442 for.cond.cleanup: ; preds = %for.cond.cleanup3, %entry
443 ret void
444 }
445
446 ; CHECK-LABEL: t8
447 ;; for (int i = 0; i < n; i++)
448 ;; for (int j = 0; j < m; j++)
449 ;; for (int k = 1; k < o; k++)
450 ;; = A[i*m*o + j*o + k]
451 ;; A[i*m*o + j*o + k - 1] =
452 define void @t8(i32 %n, i32 %m, i32 %o, i32* nocapture %A) {
453 ; CHECK: da analyze - none!
454 ; CHECK: da analyze - consistent anti [0 0 1]!
455 ; CHECK: da analyze - none!
456 entry:
457 %cmp49 = icmp sgt i32 %n, 0
458 br i1 %cmp49, label %for.cond1.preheader.lr.ph, label %for.cond.cleanup
459
460 for.cond1.preheader.lr.ph: ; preds = %entry
461 %cmp247 = icmp sgt i32 %m, 0
462 %cmp645 = icmp sgt i32 %o, 0
463 br label %for.cond1.preheader
464
465 for.cond1.preheader: ; preds = %for.cond.cleanup3, %for.cond1.preheader.lr.ph
466 %i.050 = phi i32 [ 0, %for.cond1.preheader.lr.ph ], [ %inc23, %for.cond.cleanup3 ]
467 br i1 %cmp247, label %for.cond5.preheader.lr.ph, label %for.cond.cleanup3
468
469 for.cond5.preheader.lr.ph: ; preds = %for.cond1.preheader
470 %mul = mul nsw i32 %i.050, %m
471 br label %for.cond5.preheader
472
473 for.cond5.preheader: ; preds = %for.cond.cleanup7, %for.cond5.preheader.lr.ph
474 %j.048 = phi i32 [ 0, %for.cond5.preheader.lr.ph ], [ %inc20, %for.cond.cleanup7 ]
475 br i1 %cmp645, label %for.body8.lr.ph, label %for.cond.cleanup7
476
477 for.body8.lr.ph: ; preds = %for.cond5.preheader
478 %mul944 = add i32 %j.048, %mul
479 %add = mul i32 %mul944, %o
480 br label %for.body8
481
482 for.body8: ; preds = %for.body8, %for.body8.lr.ph
483 %k.046 = phi i32 [ 1, %for.body8.lr.ph ], [ %inc, %for.body8 ]
484 %add11 = add nsw i32 %k.046, %add
485 %arrayidx = getelementptr inbounds i32, i32* %A, i32 %add11
486 %0 = load i32, i32* %arrayidx, align 4
487 %add12 = add nsw i32 %0, 1
488 %add111 = sub nsw i32 %add11, 1
489 %arrayidx2 = getelementptr inbounds i32, i32* %A, i32 %add111
490 store i32 %add12, i32* %arrayidx2, align 4
491 %inc = add nuw nsw i32 %k.046, 1
492 %exitcond = icmp eq i32 %inc, %o
493 br i1 %exitcond, label %for.cond.cleanup7, label %for.body8
494
495 for.cond.cleanup7: ; preds = %for.body8, %for.cond5.preheader
496 %inc20 = add nuw nsw i32 %j.048, 1
497 %exitcond51 = icmp eq i32 %inc20, %m
498 br i1 %exitcond51, label %for.cond.cleanup3, label %for.cond5.preheader
499
500 for.cond.cleanup3: ; preds = %for.cond.cleanup7, %for.cond1.preheader
501 %inc23 = add nuw nsw i32 %i.050, 1
502 %exitcond52 = icmp eq i32 %inc23, %n
503 br i1 %exitcond52, label %for.cond.cleanup, label %for.cond1.preheader
504
505 for.cond.cleanup: ; preds = %for.cond.cleanup3, %entry
506 ret void
507 }
508
509
510 ; CHECK-LABEL: test_sizes
511 define double @test_sizes(i16 %h, i16 %N, i16* nocapture %array) {
512 ; CHECK: da analyze - consistent input [0 S]!
513 ; CHECK: da analyze - anti [* *|<]!
514 ; CHECK: da analyze - output [* *]!
515 entry:
516 %cmp28 = icmp sgt i16 %N, 1
517 br i1 %cmp28, label %for.body.lr.ph, label %for.end12
518
519 for.body.lr.ph: ; preds = %entry
520 %cmp425 = icmp slt i16 %h, 0
521 %0 = add i16 %h, 1
522 %wide.trip.count = zext i16 %N to i32
523 br label %for.body
524
525 for.body: ; preds = %for.inc10, %for.body.lr.ph
526 %indvars.iv32 = phi i32 [ 1, %for.body.lr.ph ], [ %indvars.iv.next33, %for.inc10 ]
527 %indvars.iv = phi i16 [ 2, %for.body.lr.ph ], [ %indvars.iv.next, %for.inc10 ]
528 br i1 %cmp425, label %for.inc10, label %for.body5.lr.ph
529
530 for.body5.lr.ph: ; preds = %for.body
531 %1 = sext i16 %indvars.iv to i32
532 %arrayidx = getelementptr inbounds i16, i16* %array, i32 %indvars.iv32
533 br label %for.body5
534
535 for.body5: ; preds = %for.body5, %for.body5.lr.ph
536 %indvars.iv30 = phi i32 [ %indvars.iv.next31, %for.body5 ], [ %1, %for.body5.lr.ph ]
537 %j.027 = phi i16 [ %inc, %for.body5 ], [ 0, %for.body5.lr.ph ]
538 %2 = load i16, i16* %arrayidx, align 4
539 %add6 = add nsw i16 %2, %j.027
540 %arrayidx8 = getelementptr inbounds i16, i16* %array, i32 %indvars.iv30
541 store i16 %add6, i16* %arrayidx8, align 4
542 %inc = add nuw nsw i16 %j.027, 1
543 %indvars.iv.next31 = add nsw i32 %indvars.iv30, 1
544 %exitcond = icmp eq i16 %inc, %0
545 br i1 %exitcond, label %for.inc10, label %for.body5
546
547 for.inc10: ; preds = %for.body5, %for.body
548 %indvars.iv.next33 = add nuw nsw i32 %indvars.iv32, 1
549 %indvars.iv.next = add i16 %indvars.iv, %0
550 %exitcond34 = icmp eq i32 %indvars.iv.next33, %wide.trip.count
551 br i1 %exitcond34, label %for.end12, label %for.body
552
553 for.end12: ; preds = %for.inc10, %entry
554 ret double undef
555 }
None ; RUN: opt < %s -analyze -basicaa -da -da-delinearize | FileCheck %s
1
2 ; ModuleID = 'ExactSIV.bc'
0 ; RUN: opt < %s -analyze -basicaa -da | FileCheck %s
1
32 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
43 target triple = "x86_64-apple-macosx10.6.0"
54
None ; RUN: opt < %s -analyze -basicaa -da -da-delinearize | FileCheck %s -check-prefix=DELIN
1
2 ; ModuleID = 'GCD.bc'
0 ; RUN: opt < %s -analyze -basicaa -da | FileCheck %s -check-prefix=DELIN
1
32 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
43 target triple = "x86_64-apple-macosx10.6.0"
54
227226 br label %for.cond1.preheader
228227
229228 ; DELIN-LABEL: gcd4
230 ; DELIN: da analyze - none!
231 ; DELIN: da analyze - none!
232 ; DELIN: da analyze - confused!
233 ; DELIN: da analyze - none!
229 ; DELIN: da analyze - output [* *]!
230 ; DELIN: da analyze - none!
231 ; DELIN: da analyze - confused!
232 ; DELIN: da analyze - input [* *]!
234233 ; DELIN: da analyze - confused!
235234 ; DELIN: da analyze - none!
236235
289288 br label %for.cond1.preheader
290289
291290 ; DELIN-LABEL: gcd5
292 ; DELIN: da analyze - none!
293 ; DELIN: da analyze - flow [> *]!
294 ; DELIN: da analyze - confused!
295 ; DELIN: da analyze - none!
291 ; DELIN: da analyze - output [* *]!
292 ; DELIN: da analyze - flow [<> *]!
293 ; DELIN: da analyze - confused!
294 ; DELIN: da analyze - input [* *]!
296295 ; DELIN: da analyze - confused!
297296 ; DELIN: da analyze - none!
298297
352351 br i1 %cmp4, label %for.cond1.preheader.preheader, label %for.end12
353352
354353 ; DELIN-LABEL: gcd6
355 ; DELIN: da analyze - none!
356 ; DELIN: da analyze - none!
357 ; DELIN: da analyze - confused!
358 ; DELIN: da analyze - none!
354 ; DELIN: da analyze - output [* *]!
355 ; DELIN: da analyze - none!
356 ; DELIN: da analyze - confused!
357 ; DELIN: da analyze - input [* *]!
359358 ; DELIN: da analyze - confused!
360359 ; DELIN: da analyze - output [* *]!
361360
None ; RUN: opt < %s -analyze -basicaa -da -da-delinearize | FileCheck %s
0 ; RUN: opt < %s -analyze -basicaa -da | FileCheck %s
11
22 ; Test for a bug, which caused an assert when an invalid
33 ; SCEVAddRecExpr is created in addToCoefficient.
None ; RUN: opt < %s -analyze -basicaa -da-delinearize -da
0 ; RUN: opt < %s -analyze -basicaa -da
11 ;
22 ; CHECK: da analyze - consistent input [S S]!
33 ; CHECK: da analyze - confused!
None ; RUN: opt < %s -analyze -basicaa -da -da-delinearize | FileCheck %s -check-prefix=DELIN
0 ; RUN: opt < %s -analyze -basicaa -da | FileCheck %s -check-prefix=DELIN
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33 target triple = "x86_64-apple-macosx10.6.0"
None ; RUN: opt < %s -analyze -basicaa -da -da-delinearize | FileCheck %s
1
2 ; ModuleID = 'Preliminary.bc'
0 ; RUN: opt < %s -analyze -basicaa -da | FileCheck %s
1
32 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
43 target triple = "x86_64-apple-macosx10.6.0"
54
None ; RUN: opt < %s -analyze -basicaa -da -da-delinearize | FileCheck %s
1
2 ; ModuleID = 'Propagating.bc'
0 ; RUN: opt < %s -analyze -basicaa -da | FileCheck %s
1
32 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
43 target triple = "x86_64-apple-macosx10.6.0"
54
None ; RUN: opt < %s -analyze -basicaa -da -da-delinearize | FileCheck %s
1
2 ; ModuleID = 'Separability.bc'
0 ; RUN: opt < %s -analyze -basicaa -da | FileCheck %s
1
32 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
43 target triple = "x86_64-apple-macosx10.6.0"
54
None ; RUN: opt < %s -analyze -basicaa -da -da-delinearize | FileCheck %s
1
2 ; ModuleID = 'StrongSIV.bc'
0 ; RUN: opt < %s -analyze -basicaa -da | FileCheck %s
1
32 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
43 target triple = "x86_64-apple-macosx10.6.0"
54
None ; RUN: opt < %s -analyze -basicaa -da -da-delinearize | FileCheck %s
1
2 ; ModuleID = 'SymbolicSIV.bc'
0 ; RUN: opt < %s -analyze -basicaa -da | FileCheck %s
1
32 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
43 target triple = "x86_64-apple-macosx10.6.0"
54
335334
336335 ; CHECK-LABEL: symbolicsiv6
337336 ; CHECK: da analyze - none!
338 ; CHECK: da analyze - flow [0|<]!
337 ; CHECK: da analyze - none!
339338 ; CHECK: da analyze - confused!
340339 ; CHECK: da analyze - none!
341340 ; CHECK: da analyze - confused!
384383 br i1 %cmp1, label %for.end, label %for.body.preheader
385384 ; CHECK-LABEL: symbolicsiv7
386385 ; CHECK: da analyze - none!
387 ; CHECK: da analyze - flow [0|<]!
386 ; CHECK: da analyze - flow [<>]!
388387 ; CHECK: da analyze - confused!
389388 ; CHECK: da analyze - none!
390389 ; CHECK: da analyze - confused!
None ; RUN: opt < %s -basicaa -da-delinearize -loop-interchange -verify-dom-info -verify-loop-info \
0 ; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -verify-loop-info \
11 ; RUN: -S -pass-remarks=loop-interchange 2>&1 | FileCheck %s
22
33 @A10 = local_unnamed_addr global [3 x [3 x i32]] zeroinitializer, align 16