llvm.org GIT mirror llvm / 6f9e8d8
[X86][AVX] Renamed vperm2f128 tests to make it quicker to review git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@266621 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 4 years ago
1 changed file(s) with 79 addition(s) and 79 deletion(s). Raw diff Collapse all Expand all
11 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1
22 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
33
4 define <8 x float> @A(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
5 ; ALL-LABEL: A:
4 define <8 x float> @shuffle_v8f32_45670123(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
5 ; ALL-LABEL: shuffle_v8f32_45670123:
66 ; ALL: ## BB#0: ## %entry
77 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
88 ; ALL-NEXT: retq
1111 ret <8 x float> %shuffle
1212 }
1313
14 define <8 x float> @B(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
15 ; ALL-LABEL: B:
14 define <8 x float> @shuffle_v8f32_0123cdef(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
15 ; ALL-LABEL: shuffle_v8f32_0123cdef:
1616 ; ALL: ## BB#0: ## %entry
1717 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
1818 ; ALL-NEXT: retq
2121 ret <8 x float> %shuffle
2222 }
2323
24 define <8 x float> @C(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
25 ; ALL-LABEL: C:
24 define <8 x float> @shuffle_v8f32_01230123(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
25 ; ALL-LABEL: shuffle_v8f32_01230123:
2626 ; ALL: ## BB#0: ## %entry
2727 ; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
2828 ; ALL-NEXT: retq
3131 ret <8 x float> %shuffle
3232 }
3333
34 define <8 x float> @D(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
35 ; ALL-LABEL: D:
34 define <8 x float> @shuffle_v8f32_45674567(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
35 ; ALL-LABEL: shuffle_v8f32_45674567:
3636 ; ALL: ## BB#0: ## %entry
3737 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
3838 ; ALL-NEXT: retq
5151 ret <32 x i8> %shuffle
5252 }
5353
54 define <4 x i64> @E2(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
55 ; ALL-LABEL: E2:
54 define <4 x i64> @shuffle_v4i64_6701(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
55 ; ALL-LABEL: shuffle_v4i64_6701:
5656 ; ALL: ## BB#0: ## %entry
5757 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
5858 ; ALL-NEXT: retq
6161 ret <4 x i64> %shuffle
6262 }
6363
64 define <32 x i8> @Ei(<32 x i8> %a, <32 x i8> %b) nounwind uwtable readnone ssp {
65 ; AVX1-LABEL: Ei:
64 define <4 x i64> @shuffle_v4i64_6701_domain(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
65 ; AVX1-LABEL: shuffle_v4i64_6701_domain:
66 ; AVX1: ## BB#0: ## %entry
67 ; AVX1-NEXT: vpaddq {{.*}}(%rip), %xmm0, %xmm0
68 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
69 ; AVX1-NEXT: retq
70 ;
71 ; AVX2-LABEL: shuffle_v4i64_6701_domain:
72 ; AVX2: ## BB#0: ## %entry
73 ; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2
74 ; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0
75 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
76 ; AVX2-NEXT: retq
77 entry:
78 ; add forces execution domain
79 %a2 = add <4 x i64> %a,
80 %shuffle = shufflevector <4 x i64> %a2, <4 x i64> %b, <4 x i32>
81 ret <4 x i64> %shuffle
82 }
83
84 define <32 x i8> @shuffle_v32i8_2323(<32 x i8> %a, <32 x i8> %b) nounwind uwtable readnone ssp {
85 ; AVX1-LABEL: shuffle_v32i8_2323:
6686 ; AVX1: ## BB#0: ## %entry
6787 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
6888 ; AVX1-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0
7090 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
7191 ; AVX1-NEXT: retq
7292 ;
73 ; AVX2-LABEL: Ei:
93 ; AVX2-LABEL: shuffle_v32i8_2323:
7494 ; AVX2: ## BB#0: ## %entry
7595 ; AVX2-NEXT: vpaddb {{.*}}(%rip), %ymm0, %ymm0
7696 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
82102 ret <32 x i8> %shuffle
83103 }
84104
85 define <4 x i64> @E2i(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
86 ; AVX1-LABEL: E2i:
87 ; AVX1: ## BB#0: ## %entry
88 ; AVX1-NEXT: vpaddq {{.*}}(%rip), %xmm0, %xmm0
89 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
90 ; AVX1-NEXT: retq
91 ;
92 ; AVX2-LABEL: E2i:
93 ; AVX2: ## BB#0: ## %entry
94 ; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2
95 ; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0
96 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
97 ; AVX2-NEXT: retq
98 entry:
99 ; add forces execution domain
100 %a2 = add <4 x i64> %a,
101 %shuffle = shufflevector <4 x i64> %a2, <4 x i64> %b, <4 x i32>
102 ret <4 x i64> %shuffle
103 }
104
105 define <8 x i32> @E3i(<8 x i32> %a, <8 x i32> %b) nounwind uwtable readnone ssp {
106 ; AVX1-LABEL: E3i:
105 define <8 x i32> @shuffle_v8i32_u5u7cdef(<8 x i32> %a, <8 x i32> %b) nounwind uwtable readnone ssp {
106 ; AVX1-LABEL: shuffle_v8i32_u5u7cdef:
107107 ; AVX1: ## BB#0: ## %entry
108108 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
109109 ; AVX1-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0
111111 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
112112 ; AVX1-NEXT: retq
113113 ;
114 ; AVX2-LABEL: E3i:
114 ; AVX2-LABEL: shuffle_v8i32_u5u7cdef:
115115 ; AVX2: ## BB#0: ## %entry
116116 ; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm2
117117 ; AVX2-NEXT: vpaddd %ymm2, %ymm0, %ymm0
124124 ret <8 x i32> %shuffle
125125 }
126126
127 define <16 x i16> @E4i(<16 x i16> %a, <16 x i16> %b) nounwind uwtable readnone ssp {
128 ; AVX1-LABEL: E4i:
127 define <16 x i16> @shuffle_v16i16_4501(<16 x i16> %a, <16 x i16> %b) nounwind uwtable readnone ssp {
128 ; AVX1-LABEL: shuffle_v16i16_4501:
129129 ; AVX1: ## BB#0: ## %entry
130130 ; AVX1-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0
131131 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
132132 ; AVX1-NEXT: retq
133133 ;
134 ; AVX2-LABEL: E4i:
134 ; AVX2-LABEL: shuffle_v16i16_4501:
135135 ; AVX2: ## BB#0: ## %entry
136136 ; AVX2-NEXT: vpaddw {{.*}}(%rip), %ymm0, %ymm0
137137 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
143143 ret <16 x i16> %shuffle
144144 }
145145
146 define <16 x i16> @E5i(<16 x i16>* %a, <16 x i16>* %b) nounwind uwtable readnone ssp {
147 ; AVX1-LABEL: E5i:
146 define <16 x i16> @shuffle_v16i16_4501_mem(<16 x i16>* %a, <16 x i16>* %b) nounwind uwtable readnone ssp {
147 ; AVX1-LABEL: shuffle_v16i16_4501_mem:
148148 ; AVX1: ## BB#0: ## %entry
149149 ; AVX1-NEXT: vmovdqa (%rdi), %ymm0
150150 ; AVX1-NEXT: vmovaps (%rsi), %ymm1
152152 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
153153 ; AVX1-NEXT: retq
154154 ;
155 ; AVX2-LABEL: E5i:
155 ; AVX2-LABEL: shuffle_v16i16_4501_mem:
156156 ; AVX2: ## BB#0: ## %entry
157157 ; AVX2-NEXT: vmovdqa (%rdi), %ymm0
158158 ; AVX2-NEXT: vmovdqa (%rsi), %ymm1
169169
170170 ;;;; Cases with undef indicies mixed in the mask
171171
172 define <8 x float> @F(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
173 ; ALL-LABEL: F:
172 define <8 x float> @shuffle_v8f32_uu67u9ub(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
173 ; ALL-LABEL: shuffle_v8f32_uu67u9ub:
174174 ; ALL: ## BB#0: ## %entry
175175 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
176176 ; ALL-NEXT: retq
179179 ret <8 x float> %shuffle
180180 }
181181
182 define <8 x float> @F2(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
183 ; ALL-LABEL: F2:
182 define <8 x float> @shuffle_v8f32_uu67uu67(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
183 ; ALL-LABEL: shuffle_v8f32_uu67uu67:
184184 ; ALL: ## BB#0: ## %entry
185185 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
186186 ; ALL-NEXT: retq
189189 ret <8 x float> %shuffle
190190 }
191191
192 define <8 x float> @F3(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
193 ; ALL-LABEL: F3:
192 define <8 x float> @shuffle_v8f32_uu67uuab(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
193 ; ALL-LABEL: shuffle_v8f32_uu67uuab:
194194 ; ALL: ## BB#0: ## %entry
195195 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
196196 ; ALL-NEXT: retq
199199 ret <8 x float> %shuffle
200200 }
201201
202 define <8 x float> @F4(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
203 ; ALL-LABEL: F4:
202 define <8 x float> @shuffle_v8f32_uu67uuef(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
203 ; ALL-LABEL: shuffle_v8f32_uu67uuef:
204204 ; ALL: ## BB#0: ## %entry
205205 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
206206 ; ALL-NEXT: retq
209209 ret <8 x float> %shuffle
210210 }
211211
212 define <8 x float> @F5(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
213 ; ALL-LABEL: F5:
212 define <8 x float> @shuffle_v8f32_uu674567(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
213 ; ALL-LABEL: shuffle_v8f32_uu674567:
214214 ; ALL: ## BB#0: ## %entry
215215 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
216216 ; ALL-NEXT: retq
219219 ret <8 x float> %shuffle
220220 }
221221
222 define <8 x float> @F6(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
223 ; ALL-LABEL: F6:
222 define <8 x float> @shuffle_v8f32_uu6789ab(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
223 ; ALL-LABEL: shuffle_v8f32_uu6789ab:
224224 ; ALL: ## BB#0: ## %entry
225225 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
226226 ; ALL-NEXT: retq
229229 ret <8 x float> %shuffle
230230 }
231231
232 define <8 x float> @F7(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
233 ; ALL-LABEL: F7:
232 define <8 x float> @shuffle_v8f32_4567uu67(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
233 ; ALL-LABEL: shuffle_v8f32_4567uu67:
234234 ; ALL: ## BB#0: ## %entry
235235 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
236236 ; ALL-NEXT: retq
239239 ret <8 x float> %shuffle
240240 }
241241
242 define <8 x float> @F8(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
243 ; ALL-LABEL: F8:
242 define <8 x float> @shuffle_v8f32_4567uuef(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
243 ; ALL-LABEL: shuffle_v8f32_4567uuef:
244244 ; ALL: ## BB#0: ## %entry
245245 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
246246 ; ALL-NEXT: retq
251251
252252 ;;;; Cases we must not select vperm2f128
253253
254 define <8 x float> @G(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
255 ; ALL-LABEL: G:
254 define <8 x float> @shuffle_v8f32_uu67ucuf(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
255 ; ALL-LABEL: shuffle_v8f32_uu67ucuf:
256256 ; ALL: ## BB#0: ## %entry
257257 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
258258 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
266266 ;; PR22984: https://llvm.org/bugs/show_bug.cgi?id=22984
267267 ;; Prefer xor+vblendpd over vperm2f128 because that has better performance.
268268
269 define <4 x double> @vperm2z_0x08(<4 x double> %a) {
270 ; ALL-LABEL: vperm2z_0x08:
269 define <4 x double> @shuffle_v4f64_zz01(<4 x double> %a) {
270 ; ALL-LABEL: shuffle_v4f64_zz01:
271271 ; ALL: ## BB#0:
272272 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1]
273273 ; ALL-NEXT: retq
275275 ret <4 x double> %s
276276 }
277277
278 define <4 x double> @vperm2z_0x18(<4 x double> %a) {
279 ; ALL-LABEL: vperm2z_0x18:
278 define <4 x double> @shuffle_v4f64_zz23(<4 x double> %a) {
279 ; ALL-LABEL: shuffle_v4f64_zz23:
280280 ; ALL: ## BB#0:
281281 ; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1
282282 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
285285 ret <4 x double> %s
286286 }
287287
288 define <4 x double> @vperm2z_0x28(<4 x double> %a) {
289 ; ALL-LABEL: vperm2z_0x28:
288 define <4 x double> @shuffle_v4f64_zz45(<4 x double> %a) {
289 ; ALL-LABEL: shuffle_v4f64_zz45:
290290 ; ALL: ## BB#0:
291291 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1]
292292 ; ALL-NEXT: retq
294294 ret <4 x double> %s
295295 }
296296
297 define <4 x double> @vperm2z_0x38(<4 x double> %a) {
298 ; ALL-LABEL: vperm2z_0x38:
297 define <4 x double> @shuffle_v4f64_zz67(<4 x double> %a) {
298 ; ALL-LABEL: shuffle_v4f64_zz67:
299299 ; ALL: ## BB#0:
300300 ; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1
301301 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
304304 ret <4 x double> %s
305305 }
306306
307 define <4 x double> @vperm2z_0x80(<4 x double> %a) {
308 ; ALL-LABEL: vperm2z_0x80:
307 define <4 x double> @shuffle_v4f64_01zz(<4 x double> %a) {
308 ; ALL-LABEL: shuffle_v4f64_01zz:
309309 ; ALL: ## BB#0:
310310 ; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1
311311 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
314314 ret <4 x double> %s
315315 }
316316
317 define <4 x double> @vperm2z_0x81(<4 x double> %a) {
318 ; ALL-LABEL: vperm2z_0x81:
317 define <4 x double> @shuffle_v4f64_23zz(<4 x double> %a) {
318 ; ALL-LABEL: shuffle_v4f64_23zz:
319319 ; ALL: ## BB#0:
320320 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
321321 ; ALL-NEXT: retq
323323 ret <4 x double> %s
324324 }
325325
326 define <4 x double> @vperm2z_0x82(<4 x double> %a) {
327 ; ALL-LABEL: vperm2z_0x82:
326 define <4 x double> @shuffle_v4f64_45zz(<4 x double> %a) {
327 ; ALL-LABEL: shuffle_v4f64_45zz:
328328 ; ALL: ## BB#0:
329329 ; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1
330330 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
333333 ret <4 x double> %s
334334 }
335335
336 define <4 x double> @vperm2z_0x83(<4 x double> %a) {
337 ; ALL-LABEL: vperm2z_0x83:
336 define <4 x double> @shuffle_v4f64_67zz(<4 x double> %a) {
337 ; ALL-LABEL: shuffle_v4f64_67zz:
338338 ; ALL: ## BB#0:
339339 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
340340 ; ALL-NEXT: retq
344344
345345 ;; With AVX2 select the integer version of the instruction. Use an add to force the domain selection.
346346
347 define <4 x i64> @vperm2z_int_0x83(<4 x i64> %a, <4 x i64> %b) {
348 ; AVX1-LABEL: vperm2z_int_0x83:
347 define <4 x i64> @shuffle_v4i64_67zz(<4 x i64> %a, <4 x i64> %b) {
348 ; AVX1-LABEL: shuffle_v4i64_67zz:
349349 ; AVX1: ## BB#0:
350350 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
351351 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
355355 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
356356 ; AVX1-NEXT: retq
357357 ;
358 ; AVX2-LABEL: vperm2z_int_0x83:
358 ; AVX2-LABEL: shuffle_v4i64_67zz:
359359 ; AVX2: ## BB#0:
360360 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
361361 ; AVX2-NEXT: vpaddq %ymm0, %ymm1, %ymm0