llvm.org GIT mirror llvm / 0b9e022
[SLPVectorizer] Precommit of supernode.ll test for D63661 This is a pre-commit of the tests introduced by the SuperNode SLP patch D63661. Committed on behalf of @vporpo (Vasileios Porpodas) Differential Revision: https://reviews.llvm.org/D63664 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@364320 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 2 months ago
1 changed file(s) with 327 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
0 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
1 ; RUN: opt -slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux -mcpu=corei7-avx -verify | FileCheck %s -check-prefix=ENABLED
2 ;
3 ; Without supernode operand reordering, this does not get fully vectorized.
4 ; S[0] = (A[0] + B[0]) + C[0]
5 ; S[1] = (B[1] + C[1]) + A[1]
6 define void @test_supernode_add(double* %Aarray, double* %Barray, double *%Carray, double *%Sarray) {
7 ; ENABLED-LABEL: @test_supernode_add(
8 ; ENABLED-NEXT: entry:
9 ; ENABLED-NEXT: [[IDXA0:%.*]] = getelementptr inbounds double, double* [[AARRAY:%.*]], i64 0
10 ; ENABLED-NEXT: [[IDXA1:%.*]] = getelementptr inbounds double, double* [[AARRAY]], i64 1
11 ; ENABLED-NEXT: [[IDXB0:%.*]] = getelementptr inbounds double, double* [[BARRAY:%.*]], i64 0
12 ; ENABLED-NEXT: [[IDXB1:%.*]] = getelementptr inbounds double, double* [[BARRAY]], i64 1
13 ; ENABLED-NEXT: [[IDXC0:%.*]] = getelementptr inbounds double, double* [[CARRAY:%.*]], i64 0
14 ; ENABLED-NEXT: [[IDXC1:%.*]] = getelementptr inbounds double, double* [[CARRAY]], i64 1
15 ; ENABLED-NEXT: [[IDXS0:%.*]] = getelementptr inbounds double, double* [[SARRAY:%.*]], i64 0
16 ; ENABLED-NEXT: [[IDXS1:%.*]] = getelementptr inbounds double, double* [[SARRAY]], i64 1
17 ; ENABLED-NEXT: [[A0:%.*]] = load double, double* [[IDXA0]], align 8
18 ; ENABLED-NEXT: [[A1:%.*]] = load double, double* [[IDXA1]], align 8
19 ; ENABLED-NEXT: [[TMP0:%.*]] = bitcast double* [[IDXB0]] to <2 x double>*
20 ; ENABLED-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
21 ; ENABLED-NEXT: [[C0:%.*]] = load double, double* [[IDXC0]], align 8
22 ; ENABLED-NEXT: [[C1:%.*]] = load double, double* [[IDXC1]], align 8
23 ; ENABLED-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[A0]], i32 0
24 ; ENABLED-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[C1]], i32 1
25 ; ENABLED-NEXT: [[TMP4:%.*]] = fadd fast <2 x double> [[TMP3]], [[TMP1]]
26 ; ENABLED-NEXT: [[TMP5:%.*]] = insertelement <2 x double> undef, double [[C0]], i32 0
27 ; ENABLED-NEXT: [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[A1]], i32 1
28 ; ENABLED-NEXT: [[TMP7:%.*]] = fadd fast <2 x double> [[TMP4]], [[TMP6]]
29 ; ENABLED-NEXT: [[TMP8:%.*]] = bitcast double* [[IDXS0]] to <2 x double>*
30 ; ENABLED-NEXT: store <2 x double> [[TMP7]], <2 x double>* [[TMP8]], align 8
31 ; ENABLED-NEXT: ret void
32 ;
33 entry:
34 %idxA0 = getelementptr inbounds double, double* %Aarray, i64 0
35 %idxA1 = getelementptr inbounds double, double* %Aarray, i64 1
36 %idxB0 = getelementptr inbounds double, double* %Barray, i64 0
37 %idxB1 = getelementptr inbounds double, double* %Barray, i64 1
38 %idxC0 = getelementptr inbounds double, double* %Carray, i64 0
39 %idxC1 = getelementptr inbounds double, double* %Carray, i64 1
40 %idxS0 = getelementptr inbounds double, double* %Sarray, i64 0
41 %idxS1 = getelementptr inbounds double, double* %Sarray, i64 1
42
43 %A0 = load double, double *%idxA0, align 8
44 %A1 = load double, double *%idxA1, align 8
45
46 %B0 = load double, double *%idxB0, align 8
47 %B1 = load double, double *%idxB1, align 8
48
49 %C0 = load double, double *%idxC0, align 8
50 %C1 = load double, double *%idxC1, align 8
51
52 %addA0B0 = fadd fast double %A0, %B0
53 %addB1C1 = fadd fast double %B1, %C1
54 %add0 = fadd fast double %addA0B0, %C0
55 %add1 = fadd fast double %addB1C1, %A1
56 store double %add0, double *%idxS0, align 8
57 store double %add1, double *%idxS1, align 8
58 ret void
59 }
60
61
62 ; Without supernode operand reordering, this does not get fully vectorized.
63 ; S[0] = (A[0] - B[0]) + C[0]
64 ; S[1] = (C[1] - B[1]) + A[1]
65 define void @test_supernode_addsub(double* %Aarray, double* %Barray, double *%Carray, double *%Sarray) {
66 ; ENABLED-LABEL: @test_supernode_addsub(
67 ; ENABLED-NEXT: entry:
68 ; ENABLED-NEXT: [[IDXA0:%.*]] = getelementptr inbounds double, double* [[AARRAY:%.*]], i64 0
69 ; ENABLED-NEXT: [[IDXA1:%.*]] = getelementptr inbounds double, double* [[AARRAY]], i64 1
70 ; ENABLED-NEXT: [[IDXB0:%.*]] = getelementptr inbounds double, double* [[BARRAY:%.*]], i64 0
71 ; ENABLED-NEXT: [[IDXB1:%.*]] = getelementptr inbounds double, double* [[BARRAY]], i64 1
72 ; ENABLED-NEXT: [[IDXC0:%.*]] = getelementptr inbounds double, double* [[CARRAY:%.*]], i64 0
73 ; ENABLED-NEXT: [[IDXC1:%.*]] = getelementptr inbounds double, double* [[CARRAY]], i64 1
74 ; ENABLED-NEXT: [[IDXS0:%.*]] = getelementptr inbounds double, double* [[SARRAY:%.*]], i64 0
75 ; ENABLED-NEXT: [[IDXS1:%.*]] = getelementptr inbounds double, double* [[SARRAY]], i64 1
76 ; ENABLED-NEXT: [[A0:%.*]] = load double, double* [[IDXA0]], align 8
77 ; ENABLED-NEXT: [[A1:%.*]] = load double, double* [[IDXA1]], align 8
78 ; ENABLED-NEXT: [[TMP0:%.*]] = bitcast double* [[IDXB0]] to <2 x double>*
79 ; ENABLED-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
80 ; ENABLED-NEXT: [[C0:%.*]] = load double, double* [[IDXC0]], align 8
81 ; ENABLED-NEXT: [[C1:%.*]] = load double, double* [[IDXC1]], align 8
82 ; ENABLED-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[A0]], i32 0
83 ; ENABLED-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[C1]], i32 1
84 ; ENABLED-NEXT: [[TMP4:%.*]] = fsub fast <2 x double> [[TMP3]], [[TMP1]]
85 ; ENABLED-NEXT: [[TMP5:%.*]] = insertelement <2 x double> undef, double [[C0]], i32 0
86 ; ENABLED-NEXT: [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[A1]], i32 1
87 ; ENABLED-NEXT: [[TMP7:%.*]] = fadd fast <2 x double> [[TMP4]], [[TMP6]]
88 ; ENABLED-NEXT: [[TMP8:%.*]] = bitcast double* [[IDXS0]] to <2 x double>*
89 ; ENABLED-NEXT: store <2 x double> [[TMP7]], <2 x double>* [[TMP8]], align 8
90 ; ENABLED-NEXT: ret void
91 ;
92 entry:
93 %idxA0 = getelementptr inbounds double, double* %Aarray, i64 0
94 %idxA1 = getelementptr inbounds double, double* %Aarray, i64 1
95 %idxB0 = getelementptr inbounds double, double* %Barray, i64 0
96 %idxB1 = getelementptr inbounds double, double* %Barray, i64 1
97 %idxC0 = getelementptr inbounds double, double* %Carray, i64 0
98 %idxC1 = getelementptr inbounds double, double* %Carray, i64 1
99 %idxS0 = getelementptr inbounds double, double* %Sarray, i64 0
100 %idxS1 = getelementptr inbounds double, double* %Sarray, i64 1
101
102 %A0 = load double, double *%idxA0, align 8
103 %A1 = load double, double *%idxA1, align 8
104
105 %B0 = load double, double *%idxB0, align 8
106 %B1 = load double, double *%idxB1, align 8
107
108 %C0 = load double, double *%idxC0, align 8
109 %C1 = load double, double *%idxC1, align 8
110
111 %subA0B0 = fsub fast double %A0, %B0
112 %subC1B1 = fsub fast double %C1, %B1
113 %add0 = fadd fast double %subA0B0, %C0
114 %add1 = fadd fast double %subC1B1, %A1
115 store double %add0, double *%idxS0, align 8
116 store double %add1, double *%idxS1, align 8
117 ret void
118 }
119
120 ; Without supernode operand reordering, this does not get fully vectorized.
121 ; This checks that the super-node works with alternate sequences.
122 ;
123 ; S[0] = (A[0] - B[0]) - C[0]
124 ; S[1] = (B[1] + C[1]) + A[1]
125 define void @test_supernode_addsub_alt(double* %Aarray, double* %Barray, double *%Carray, double *%Sarray) {
126 ; ENABLED-LABEL: @test_supernode_addsub_alt(
127 ; ENABLED-NEXT: entry:
128 ; ENABLED-NEXT: [[IDXA0:%.*]] = getelementptr inbounds double, double* [[AARRAY:%.*]], i64 0
129 ; ENABLED-NEXT: [[IDXA1:%.*]] = getelementptr inbounds double, double* [[AARRAY]], i64 1
130 ; ENABLED-NEXT: [[IDXB0:%.*]] = getelementptr inbounds double, double* [[BARRAY:%.*]], i64 0
131 ; ENABLED-NEXT: [[IDXB1:%.*]] = getelementptr inbounds double, double* [[BARRAY]], i64 1
132 ; ENABLED-NEXT: [[IDXC0:%.*]] = getelementptr inbounds double, double* [[CARRAY:%.*]], i64 0
133 ; ENABLED-NEXT: [[IDXC1:%.*]] = getelementptr inbounds double, double* [[CARRAY]], i64 1
134 ; ENABLED-NEXT: [[IDXS0:%.*]] = getelementptr inbounds double, double* [[SARRAY:%.*]], i64 0
135 ; ENABLED-NEXT: [[IDXS1:%.*]] = getelementptr inbounds double, double* [[SARRAY]], i64 1
136 ; ENABLED-NEXT: [[A0:%.*]] = load double, double* [[IDXA0]], align 8
137 ; ENABLED-NEXT: [[A1:%.*]] = load double, double* [[IDXA1]], align 8
138 ; ENABLED-NEXT: [[B0:%.*]] = load double, double* [[IDXB0]], align 8
139 ; ENABLED-NEXT: [[B1:%.*]] = load double, double* [[IDXB1]], align 8
140 ; ENABLED-NEXT: [[C0:%.*]] = load double, double* [[IDXC0]], align 8
141 ; ENABLED-NEXT: [[C1:%.*]] = load double, double* [[IDXC1]], align 8
142 ; ENABLED-NEXT: [[SUBA0B0:%.*]] = fsub fast double [[A0]], [[B0]]
143 ; ENABLED-NEXT: [[ADDB1C1:%.*]] = fadd fast double [[B1]], [[C1]]
144 ; ENABLED-NEXT: [[SUB0:%.*]] = fsub fast double [[SUBA0B0]], [[C0]]
145 ; ENABLED-NEXT: [[ADD1:%.*]] = fadd fast double [[ADDB1C1]], [[A1]]
146 ; ENABLED-NEXT: store double [[SUB0]], double* [[IDXS0]], align 8
147 ; ENABLED-NEXT: store double [[ADD1]], double* [[IDXS1]], align 8
148 ; ENABLED-NEXT: ret void
149 ;
150 entry:
151 %idxA0 = getelementptr inbounds double, double* %Aarray, i64 0
152 %idxA1 = getelementptr inbounds double, double* %Aarray, i64 1
153 %idxB0 = getelementptr inbounds double, double* %Barray, i64 0
154 %idxB1 = getelementptr inbounds double, double* %Barray, i64 1
155 %idxC0 = getelementptr inbounds double, double* %Carray, i64 0
156 %idxC1 = getelementptr inbounds double, double* %Carray, i64 1
157 %idxS0 = getelementptr inbounds double, double* %Sarray, i64 0
158 %idxS1 = getelementptr inbounds double, double* %Sarray, i64 1
159
160 %A0 = load double, double *%idxA0, align 8
161 %A1 = load double, double *%idxA1, align 8
162
163 %B0 = load double, double *%idxB0, align 8
164 %B1 = load double, double *%idxB1, align 8
165
166 %C0 = load double, double *%idxC0, align 8
167 %C1 = load double, double *%idxC1, align 8
168
169 %subA0B0 = fsub fast double %A0, %B0
170 %addB1C1 = fadd fast double %B1, %C1
171 %sub0 = fsub fast double %subA0B0, %C0
172 %add1 = fadd fast double %addB1C1, %A1
173 store double %sub0, double *%idxS0, align 8
174 store double %add1, double *%idxS1, align 8
175 ret void
176 }
177
178 ; This checks that vectorizeTree() works correctly with the supernode
179 ; and does not generate uses before defs.
180 ; If all of the operands of the supernode are vectorizable, then the scheduler
181 ; will fix their position in the program. If not, then the scheduler may not
182 ; touch them, leading to uses before defs.
183 ;
184 ; A0 = ...
185 ; C = ...
186 ; t1 = A0 + C
187 ; B0 = ...
188 ; t2 = t1 + B0
189 ; A1 = ...
190 ; B1 = ...
191 ; t3 = A1 + B1
192 ; D = ...
193 ; t4 = t3 + D
194 ;
195 ;
196 ; A0 C A1 B1 A0 C A1 D A0:1 C,D
197 ; \ / \ / Reorder \ / \ / Bundles \ /
198 ; t1 + B0 t3 + D -------> t1 + B0 t3 + B1 ------> t1:3 + B0:1
199 ; |/ |/ |/ |/ |/
200 ; t2 + t4 + t2 + t4 + t2:4 +
201 ;
202 ; After reordering, 'D' conceptually becomes an operand of t3:
203 ; t3 = A1 + D
204 ; But D is defined *after* its use.
205 ;
206 define void @supernode_scheduling(double* %Aarray, double* %Barray, double *%Carray, double *%Darray, double *%Sarray) {
207 ; ENABLED-LABEL: @supernode_scheduling(
208 ; ENABLED-NEXT: entry:
209 ; ENABLED-NEXT: [[IDXA0:%.*]] = getelementptr inbounds double, double* [[AARRAY:%.*]], i64 0
210 ; ENABLED-NEXT: [[IDXA1:%.*]] = getelementptr inbounds double, double* [[AARRAY]], i64 1
211 ; ENABLED-NEXT: [[IDXB0:%.*]] = getelementptr inbounds double, double* [[BARRAY:%.*]], i64 0
212 ; ENABLED-NEXT: [[IDXB1:%.*]] = getelementptr inbounds double, double* [[BARRAY]], i64 1
213 ; ENABLED-NEXT: [[IDXC:%.*]] = getelementptr inbounds double, double* [[CARRAY:%.*]], i64 0
214 ; ENABLED-NEXT: [[IDXD:%.*]] = getelementptr inbounds double, double* [[DARRAY:%.*]], i64 0
215 ; ENABLED-NEXT: [[IDXS0:%.*]] = getelementptr inbounds double, double* [[SARRAY:%.*]], i64 0
216 ; ENABLED-NEXT: [[IDXS1:%.*]] = getelementptr inbounds double, double* [[SARRAY]], i64 1
217 ; ENABLED-NEXT: [[C:%.*]] = load double, double* [[IDXC]], align 8
218 ; ENABLED-NEXT: [[B0:%.*]] = load double, double* [[IDXB0]], align 8
219 ; ENABLED-NEXT: [[TMP0:%.*]] = bitcast double* [[IDXA0]] to <2 x double>*
220 ; ENABLED-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
221 ; ENABLED-NEXT: [[B1:%.*]] = load double, double* [[IDXB1]], align 8
222 ; ENABLED-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[C]], i32 0
223 ; ENABLED-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[B1]], i32 1
224 ; ENABLED-NEXT: [[TMP4:%.*]] = fadd fast <2 x double> [[TMP1]], [[TMP3]]
225 ; ENABLED-NEXT: [[D:%.*]] = load double, double* [[IDXD]], align 8
226 ; ENABLED-NEXT: [[TMP5:%.*]] = insertelement <2 x double> undef, double [[B0]], i32 0
227 ; ENABLED-NEXT: [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[D]], i32 1
228 ; ENABLED-NEXT: [[TMP7:%.*]] = fadd fast <2 x double> [[TMP4]], [[TMP6]]
229 ; ENABLED-NEXT: [[TMP8:%.*]] = bitcast double* [[IDXS0]] to <2 x double>*
230 ; ENABLED-NEXT: store <2 x double> [[TMP7]], <2 x double>* [[TMP8]], align 8
231 ; ENABLED-NEXT: ret void
232 ;
233 entry:
234 %idxA0 = getelementptr inbounds double, double* %Aarray, i64 0
235 %idxA1 = getelementptr inbounds double, double* %Aarray, i64 1
236 %idxB0 = getelementptr inbounds double, double* %Barray, i64 0
237 %idxB1 = getelementptr inbounds double, double* %Barray, i64 1
238 %idxC = getelementptr inbounds double, double* %Carray, i64 0
239 %idxD = getelementptr inbounds double, double* %Darray, i64 0
240 %idxS0 = getelementptr inbounds double, double* %Sarray, i64 0
241 %idxS1 = getelementptr inbounds double, double* %Sarray, i64 1
242
243
244 %A0 = load double, double *%idxA0, align 8
245 %C = load double, double *%idxC, align 8
246 %t1 = fadd fast double %A0, %C
247 %B0 = load double, double *%idxB0, align 8
248 %t2 = fadd fast double %t1, %B0
249 %A1 = load double, double *%idxA1, align 8
250 %B1 = load double, double *%idxB1, align 8
251 %t3 = fadd fast double %A1, %B1
252 %D = load double, double *%idxD, align 8
253 %t4 = fadd fast double %t3, %D
254
255 store double %t2, double *%idxS0, align 8
256 store double %t4, double *%idxS1, align 8
257 ret void
258 }
259
260
261 ; The SLP scheduler has trouble moving instructions across blocks.
262 ; Even though we can build a SuperNode for this example, we should not because the scheduler
263 ; cannot handle the cross-block instruction motion that is required once the operands of the
264 ; SuperNode are reordered.
265 ;
266 ; bb1:
267 ; A0 = ...
268 ; B1 = ...
269 ; Tmp0 = A0 + 2.0
270 ; Tmp1 = B1 + 2.0
271 ;
272 ; bb2:
273 ; A1 = ...
274 ; B0 = ...
275 ; S[0] = Tmp0 + B0
276 ; S[1] = Tmp1 + A1
277 define void @supernode_scheduling_cross_block(double* %Aarray, double* %Barray, double *%Sarray) {
278 ; ENABLED-LABEL: @supernode_scheduling_cross_block(
279 ; ENABLED-NEXT: entry:
280 ; ENABLED-NEXT: [[IDXA0:%.*]] = getelementptr inbounds double, double* [[AARRAY:%.*]], i64 0
281 ; ENABLED-NEXT: [[IDXA1:%.*]] = getelementptr inbounds double, double* [[AARRAY]], i64 1
282 ; ENABLED-NEXT: [[IDXB0:%.*]] = getelementptr inbounds double, double* [[BARRAY:%.*]], i64 0
283 ; ENABLED-NEXT: [[IDXB1:%.*]] = getelementptr inbounds double, double* [[BARRAY]], i64 1
284 ; ENABLED-NEXT: [[IDXS0:%.*]] = getelementptr inbounds double, double* [[SARRAY:%.*]], i64 0
285 ; ENABLED-NEXT: [[IDXS1:%.*]] = getelementptr inbounds double, double* [[SARRAY]], i64 1
286 ; ENABLED-NEXT: [[A0:%.*]] = load double, double* [[IDXA0]], align 8
287 ; ENABLED-NEXT: [[B1:%.*]] = load double, double* [[IDXB1]], align 8
288 ; ENABLED-NEXT: [[TMP0:%.*]] = insertelement <2 x double> undef, double [[A0]], i32 0
289 ; ENABLED-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[B1]], i32 1
290 ; ENABLED-NEXT: [[TMP2:%.*]] = fadd fast <2 x double> [[TMP1]],
291 ; ENABLED-NEXT: br label [[BB:%.*]]
292 ; ENABLED: bb:
293 ; ENABLED-NEXT: [[A1:%.*]] = load double, double* [[IDXA1]], align 8
294 ; ENABLED-NEXT: [[B0:%.*]] = load double, double* [[IDXB0]], align 8
295 ; ENABLED-NEXT: [[TMP3:%.*]] = insertelement <2 x double> undef, double [[B0]], i32 0
296 ; ENABLED-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[A1]], i32 1
297 ; ENABLED-NEXT: [[TMP5:%.*]] = fadd fast <2 x double> [[TMP2]], [[TMP4]]
298 ; ENABLED-NEXT: [[TMP6:%.*]] = bitcast double* [[IDXS0]] to <2 x double>*
299 ; ENABLED-NEXT: store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8
300 ; ENABLED-NEXT: ret void
301 ;
302 entry:
303 %idxA0 = getelementptr inbounds double, double* %Aarray, i64 0
304 %idxA1 = getelementptr inbounds double, double* %Aarray, i64 1
305 %idxB0 = getelementptr inbounds double, double* %Barray, i64 0
306 %idxB1 = getelementptr inbounds double, double* %Barray, i64 1
307 %idxS0 = getelementptr inbounds double, double* %Sarray, i64 0
308 %idxS1 = getelementptr inbounds double, double* %Sarray, i64 1
309
310 %A0 = load double, double *%idxA0, align 8
311 %B1 = load double, double *%idxB1, align 8
312 %Tmp0 = fadd fast double %A0, 2.0
313 %Tmp1 = fadd fast double %B1, 2.0
314 br label %bb
315
316 bb:
317 %A1 = load double, double *%idxA1, align 8
318 %B0 = load double, double *%idxB0, align 8
319
320 %Sum0 = fadd fast double %Tmp0, %B0
321 %Sum1 = fadd fast double %Tmp1, %A1
322
323 store double %Sum0, double *%idxS0, align 8
324 store double %Sum1, double *%idxS1, align 8
325 ret void
326 }