llvm.org GIT mirror llvm / 3e60eea
[X86][AVX512VL] Add fast-isel MOVDDUP/MOVSLDUP/MOVSHDUP shuffle tests git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@274448 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 4 years ago
1 changed file(s) with 407 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=X32
2 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=X64
3
4 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512vl-builtins.c
5
6 define <2 x double> @test_mm_movddup_pd(<2 x double> %a0) {
7 ; X32-LABEL: test_mm_movddup_pd:
8 ; X32: # BB#0:
9 ; X32-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
10 ; X32-NEXT: retl
11 ;
12 ; X64-LABEL: test_mm_movddup_pd:
13 ; X64: # BB#0:
14 ; X64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
15 ; X64-NEXT: retq
16 %res = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> zeroinitializer
17 ret <2 x double> %res
18 }
19
20 define <2 x double> @test_mm_mask_movddup_pd(<2 x double> %a0, i8 %a1, <2 x double> %a2) {
21 ; X32-LABEL: test_mm_mask_movddup_pd:
22 ; X32: # BB#0:
23 ; X32-NEXT: pushl %eax
24 ; X32-NEXT: .Ltmp0:
25 ; X32-NEXT: .cfi_def_cfa_offset 8
26 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
27 ; X32-NEXT: andb $3, %al
28 ; X32-NEXT: movb %al, {{[0-9]+}}(%esp)
29 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
30 ; X32-NEXT: kmovw %eax, %k1
31 ; X32-NEXT: vmovddup {{.*#+}} xmm0 = xmm1[0,0]
32 ; X32-NEXT: popl %eax
33 ; X32-NEXT: retl
34 ;
35 ; X64-LABEL: test_mm_mask_movddup_pd:
36 ; X64: # BB#0:
37 ; X64-NEXT: andb $3, %dil
38 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
39 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
40 ; X64-NEXT: kmovw %eax, %k1
41 ; X64-NEXT: vmovddup {{.*#+}} xmm0 = xmm1[0,0]
42 ; X64-NEXT: retq
43 %trn1 = trunc i8 %a1 to i2
44 %arg1 = bitcast i2 %trn1 to <2 x i1>
45 %res0 = shufflevector <2 x double> %a2, <2 x double> undef, <2 x i32> zeroinitializer
46 %res1 = select <2 x i1> %arg1, <2 x double> %res0, <2 x double> %a0
47 ret <2 x double> %res1
48 }
49
50 define <2 x double> @test_mm_maskz_movddup_pd(i8 %a0, <2 x double> %a1) {
51 ; X32-LABEL: test_mm_maskz_movddup_pd:
52 ; X32: # BB#0:
53 ; X32-NEXT: pushl %eax
54 ; X32-NEXT: .Ltmp1:
55 ; X32-NEXT: .cfi_def_cfa_offset 8
56 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
57 ; X32-NEXT: andb $3, %al
58 ; X32-NEXT: movb %al, {{[0-9]+}}(%esp)
59 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
60 ; X32-NEXT: kmovw %eax, %k1
61 ; X32-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
62 ; X32-NEXT: popl %eax
63 ; X32-NEXT: retl
64 ;
65 ; X64-LABEL: test_mm_maskz_movddup_pd:
66 ; X64: # BB#0:
67 ; X64-NEXT: andb $3, %dil
68 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
69 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
70 ; X64-NEXT: kmovw %eax, %k1
71 ; X64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
72 ; X64-NEXT: retq
73 %trn1 = trunc i8 %a0 to i2
74 %arg0 = bitcast i2 %trn1 to <2 x i1>
75 %res0 = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> zeroinitializer
76 %res1 = select <2 x i1> %arg0, <2 x double> %res0, <2 x double> zeroinitializer
77 ret <2 x double> %res1
78 }
79
80 define <4 x double> @test_mm256_movddup_pd(<4 x double> %a0) {
81 ; X32-LABEL: test_mm256_movddup_pd:
82 ; X32: # BB#0:
83 ; X32-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
84 ; X32-NEXT: retl
85 ;
86 ; X64-LABEL: test_mm256_movddup_pd:
87 ; X64: # BB#0:
88 ; X64-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
89 ; X64-NEXT: retq
90 %res = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32>
91 ret <4 x double> %res
92 }
93
94 define <4 x double> @test_mm256_mask_movddup_pd(<4 x double> %a0, i8 %a1, <4 x double> %a2) {
95 ; X32-LABEL: test_mm256_mask_movddup_pd:
96 ; X32: # BB#0:
97 ; X32-NEXT: pushl %eax
98 ; X32-NEXT: .Ltmp2:
99 ; X32-NEXT: .cfi_def_cfa_offset 8
100 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
101 ; X32-NEXT: andb $15, %al
102 ; X32-NEXT: movb %al, (%esp)
103 ; X32-NEXT: movzbl (%esp), %eax
104 ; X32-NEXT: kmovw %eax, %k1
105 ; X32-NEXT: vmovddup {{.*#+}} ymm0 = ymm1[0,0,2,2]
106 ; X32-NEXT: popl %eax
107 ; X32-NEXT: retl
108 ;
109 ; X64-LABEL: test_mm256_mask_movddup_pd:
110 ; X64: # BB#0:
111 ; X64-NEXT: andb $15, %dil
112 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
113 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
114 ; X64-NEXT: kmovw %eax, %k1
115 ; X64-NEXT: vmovddup {{.*#+}} ymm0 = ymm1[0,0,2,2]
116 ; X64-NEXT: retq
117 %trn1 = trunc i8 %a1 to i4
118 %arg1 = bitcast i4 %trn1 to <4 x i1>
119 %res0 = shufflevector <4 x double> %a2, <4 x double> undef, <4 x i32>
120 %res1 = select <4 x i1> %arg1, <4 x double> %res0, <4 x double> %a0
121 ret <4 x double> %res1
122 }
123
124 define <4 x double> @test_mm256_maskz_movddup_pd(i8 %a0, <4 x double> %a1) {
125 ; X32-LABEL: test_mm256_maskz_movddup_pd:
126 ; X32: # BB#0:
127 ; X32-NEXT: pushl %eax
128 ; X32-NEXT: .Ltmp3:
129 ; X32-NEXT: .cfi_def_cfa_offset 8
130 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
131 ; X32-NEXT: andb $15, %al
132 ; X32-NEXT: movb %al, (%esp)
133 ; X32-NEXT: movzbl (%esp), %eax
134 ; X32-NEXT: kmovw %eax, %k1
135 ; X32-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
136 ; X32-NEXT: popl %eax
137 ; X32-NEXT: retl
138 ;
139 ; X64-LABEL: test_mm256_maskz_movddup_pd:
140 ; X64: # BB#0:
141 ; X64-NEXT: andb $15, %dil
142 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
143 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
144 ; X64-NEXT: kmovw %eax, %k1
145 ; X64-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
146 ; X64-NEXT: retq
147 %trn1 = trunc i8 %a0 to i4
148 %arg0 = bitcast i4 %trn1 to <4 x i1>
149 %res0 = shufflevector <4 x double> %a1, <4 x double> undef, <4 x i32>
150 %res1 = select <4 x i1> %arg0, <4 x double> %res0, <4 x double> zeroinitializer
151 ret <4 x double> %res1
152 }
153
154 define <4 x float> @test_mm_movehdup_ps(<4 x float> %a0) {
155 ; X32-LABEL: test_mm_movehdup_ps:
156 ; X32: # BB#0:
157 ; X32-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
158 ; X32-NEXT: retl
159 ;
160 ; X64-LABEL: test_mm_movehdup_ps:
161 ; X64: # BB#0:
162 ; X64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
163 ; X64-NEXT: retq
164 %res = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32>
165 ret <4 x float> %res
166 }
167
168 define <4 x float> @test_mm_mask_movehdup_ps(<4 x float> %a0, i8 %a1, <4 x float> %a2) {
169 ; X32-LABEL: test_mm_mask_movehdup_ps:
170 ; X32: # BB#0:
171 ; X32-NEXT: pushl %eax
172 ; X32-NEXT: .Ltmp4:
173 ; X32-NEXT: .cfi_def_cfa_offset 8
174 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
175 ; X32-NEXT: andb $15, %al
176 ; X32-NEXT: movb %al, (%esp)
177 ; X32-NEXT: movzbl (%esp), %eax
178 ; X32-NEXT: kmovw %eax, %k1
179 ; X32-NEXT: vmovshdup {{.*#+}} xmm0 = xmm1[1,1,3,3]
180 ; X32-NEXT: popl %eax
181 ; X32-NEXT: retl
182 ;
183 ; X64-LABEL: test_mm_mask_movehdup_ps:
184 ; X64: # BB#0:
185 ; X64-NEXT: andb $15, %dil
186 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
187 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
188 ; X64-NEXT: kmovw %eax, %k1
189 ; X64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm1[1,1,3,3]
190 ; X64-NEXT: retq
191 %trn1 = trunc i8 %a1 to i4
192 %arg1 = bitcast i4 %trn1 to <4 x i1>
193 %res0 = shufflevector <4 x float> %a2, <4 x float> undef, <4 x i32>
194 %res1 = select <4 x i1> %arg1, <4 x float> %res0, <4 x float> %a0
195 ret <4 x float> %res1
196 }
197
198 define <4 x float> @test_mm_maskz_movehdup_ps(i8 %a0, <4 x float> %a1) {
199 ; X32-LABEL: test_mm_maskz_movehdup_ps:
200 ; X32: # BB#0:
201 ; X32-NEXT: pushl %eax
202 ; X32-NEXT: .Ltmp5:
203 ; X32-NEXT: .cfi_def_cfa_offset 8
204 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
205 ; X32-NEXT: andb $15, %al
206 ; X32-NEXT: movb %al, (%esp)
207 ; X32-NEXT: movzbl (%esp), %eax
208 ; X32-NEXT: kmovw %eax, %k1
209 ; X32-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
210 ; X32-NEXT: popl %eax
211 ; X32-NEXT: retl
212 ;
213 ; X64-LABEL: test_mm_maskz_movehdup_ps:
214 ; X64: # BB#0:
215 ; X64-NEXT: andb $15, %dil
216 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
217 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
218 ; X64-NEXT: kmovw %eax, %k1
219 ; X64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
220 ; X64-NEXT: retq
221 %trn0 = trunc i8 %a0 to i4
222 %arg0 = bitcast i4 %trn0 to <4 x i1>
223 %res0 = shufflevector <4 x float> %a1, <4 x float> undef, <4 x i32>
224 %res1 = select <4 x i1> %arg0, <4 x float> %res0, <4 x float> zeroinitializer
225 ret <4 x float> %res1
226 }
227
228 define <8 x float> @test_mm256_movehdup_ps(<8 x float> %a0) {
229 ; X32-LABEL: test_mm256_movehdup_ps:
230 ; X32: # BB#0:
231 ; X32-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
232 ; X32-NEXT: retl
233 ;
234 ; X64-LABEL: test_mm256_movehdup_ps:
235 ; X64: # BB#0:
236 ; X64-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
237 ; X64-NEXT: retq
238 %res = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32>
239 ret <8 x float> %res
240 }
241
242 define <8 x float> @test_mm256_mask_movehdup_ps(<8 x float> %a0, i8 %a1, <8 x float> %a2) {
243 ; X32-LABEL: test_mm256_mask_movehdup_ps:
244 ; X32: # BB#0:
245 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
246 ; X32-NEXT: kmovw %eax, %k1
247 ; X32-NEXT: vmovshdup {{.*#+}} ymm0 = ymm1[1,1,3,3,5,5,7,7]
248 ; X32-NEXT: retl
249 ;
250 ; X64-LABEL: test_mm256_mask_movehdup_ps:
251 ; X64: # BB#0:
252 ; X64-NEXT: kmovw %edi, %k1
253 ; X64-NEXT: vmovshdup {{.*#+}} ymm0 = ymm1[1,1,3,3,5,5,7,7]
254 ; X64-NEXT: retq
255 %arg1 = bitcast i8 %a1 to <8 x i1>
256 %res0 = shufflevector <8 x float> %a2, <8 x float> undef, <8 x i32>
257 %res1 = select <8 x i1> %arg1, <8 x float> %res0, <8 x float> %a0
258 ret <8 x float> %res1
259 }
260
261 define <8 x float> @test_mm256_maskz_movehdup_ps(i8 %a0, <8 x float> %a1) {
262 ; X32-LABEL: test_mm256_maskz_movehdup_ps:
263 ; X32: # BB#0:
264 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
265 ; X32-NEXT: kmovw %eax, %k1
266 ; X32-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
267 ; X32-NEXT: retl
268 ;
269 ; X64-LABEL: test_mm256_maskz_movehdup_ps:
270 ; X64: # BB#0:
271 ; X64-NEXT: kmovw %edi, %k1
272 ; X64-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
273 ; X64-NEXT: retq
274 %arg0 = bitcast i8 %a0 to <8 x i1>
275 %res0 = shufflevector <8 x float> %a1, <8 x float> undef, <8 x i32>
276 %res1 = select <8 x i1> %arg0, <8 x float> %res0, <8 x float> zeroinitializer
277 ret <8 x float> %res1
278 }
279
280 define <4 x float> @test_mm_moveldup_ps(<4 x float> %a0) {
281 ; X32-LABEL: test_mm_moveldup_ps:
282 ; X32: # BB#0:
283 ; X32-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
284 ; X32-NEXT: retl
285 ;
286 ; X64-LABEL: test_mm_moveldup_ps:
287 ; X64: # BB#0:
288 ; X64-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
289 ; X64-NEXT: retq
290 %res = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32>
291 ret <4 x float> %res
292 }
293
294 define <4 x float> @test_mm_mask_moveldup_ps(<4 x float> %a0, i8 %a1, <4 x float> %a2) {
295 ; X32-LABEL: test_mm_mask_moveldup_ps:
296 ; X32: # BB#0:
297 ; X32-NEXT: pushl %eax
298 ; X32-NEXT: .Ltmp6:
299 ; X32-NEXT: .cfi_def_cfa_offset 8
300 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
301 ; X32-NEXT: andb $15, %al
302 ; X32-NEXT: movb %al, (%esp)
303 ; X32-NEXT: movzbl (%esp), %eax
304 ; X32-NEXT: kmovw %eax, %k1
305 ; X32-NEXT: vmovsldup {{.*#+}} xmm0 = xmm1[0,0,2,2]
306 ; X32-NEXT: popl %eax
307 ; X32-NEXT: retl
308 ;
309 ; X64-LABEL: test_mm_mask_moveldup_ps:
310 ; X64: # BB#0:
311 ; X64-NEXT: andb $15, %dil
312 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
313 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
314 ; X64-NEXT: kmovw %eax, %k1
315 ; X64-NEXT: vmovsldup {{.*#+}} xmm0 = xmm1[0,0,2,2]
316 ; X64-NEXT: retq
317 %trn1 = trunc i8 %a1 to i4
318 %arg1 = bitcast i4 %trn1 to <4 x i1>
319 %res0 = shufflevector <4 x float> %a2, <4 x float> undef, <4 x i32>
320 %res1 = select <4 x i1> %arg1, <4 x float> %res0, <4 x float> %a0
321 ret <4 x float> %res1
322 }
323
324 define <4 x float> @test_mm_maskz_moveldup_ps(i8 %a0, <4 x float> %a1) {
325 ; X32-LABEL: test_mm_maskz_moveldup_ps:
326 ; X32: # BB#0:
327 ; X32-NEXT: pushl %eax
328 ; X32-NEXT: .Ltmp7:
329 ; X32-NEXT: .cfi_def_cfa_offset 8
330 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
331 ; X32-NEXT: andb $15, %al
332 ; X32-NEXT: movb %al, (%esp)
333 ; X32-NEXT: movzbl (%esp), %eax
334 ; X32-NEXT: kmovw %eax, %k1
335 ; X32-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
336 ; X32-NEXT: popl %eax
337 ; X32-NEXT: retl
338 ;
339 ; X64-LABEL: test_mm_maskz_moveldup_ps:
340 ; X64: # BB#0:
341 ; X64-NEXT: andb $15, %dil
342 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
343 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
344 ; X64-NEXT: kmovw %eax, %k1
345 ; X64-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
346 ; X64-NEXT: retq
347 %trn0 = trunc i8 %a0 to i4
348 %arg0 = bitcast i4 %trn0 to <4 x i1>
349 %res0 = shufflevector <4 x float> %a1, <4 x float> undef, <4 x i32>
350 %res1 = select <4 x i1> %arg0, <4 x float> %res0, <4 x float> zeroinitializer
351 ret <4 x float> %res1
352 }
353
354 define <8 x float> @test_mm256_moveldup_ps(<8 x float> %a0) {
355 ; X32-LABEL: test_mm256_moveldup_ps:
356 ; X32: # BB#0:
357 ; X32-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
358 ; X32-NEXT: retl
359 ;
360 ; X64-LABEL: test_mm256_moveldup_ps:
361 ; X64: # BB#0:
362 ; X64-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
363 ; X64-NEXT: retq
364 %res = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32>
365 ret <8 x float> %res
366 }
367
368 define <8 x float> @test_mm256_mask_moveldup_ps(<8 x float> %a0, i8 %a1, <8 x float> %a2) {
369 ; X32-LABEL: test_mm256_mask_moveldup_ps:
370 ; X32: # BB#0:
371 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
372 ; X32-NEXT: kmovw %eax, %k1
373 ; X32-NEXT: vmovsldup {{.*#+}} ymm0 = ymm1[0,0,2,2,4,4,6,6]
374 ; X32-NEXT: retl
375 ;
376 ; X64-LABEL: test_mm256_mask_moveldup_ps:
377 ; X64: # BB#0:
378 ; X64-NEXT: kmovw %edi, %k1
379 ; X64-NEXT: vmovsldup {{.*#+}} ymm0 = ymm1[0,0,2,2,4,4,6,6]
380 ; X64-NEXT: retq
381 %arg1 = bitcast i8 %a1 to <8 x i1>
382 %res0 = shufflevector <8 x float> %a2, <8 x float> undef, <8 x i32>
383 %res1 = select <8 x i1> %arg1, <8 x float> %res0, <8 x float> %a0
384 ret <8 x float> %res1
385 }
386
387 define <8 x float> @test_mm256_maskz_moveldup_ps(i8 %a0, <8 x float> %a1) {
388 ; X32-LABEL: test_mm256_maskz_moveldup_ps:
389 ; X32: # BB#0:
390 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
391 ; X32-NEXT: kmovw %eax, %k1
392 ; X32-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
393 ; X32-NEXT: retl
394 ;
395 ; X64-LABEL: test_mm256_maskz_moveldup_ps:
396 ; X64: # BB#0:
397 ; X64-NEXT: kmovw %edi, %k1
398 ; X64-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
399 ; X64-NEXT: retq
400 %arg0 = bitcast i8 %a0 to <8 x i1>
401 %res0 = shufflevector <8 x float> %a1, <8 x float> undef, <8 x i32>
402 %res1 = select <8 x i1> %arg0, <8 x float> %res0, <8 x float> zeroinitializer
403 ret <8 x float> %res1
404 }
405
406 !0 = !{i32 1}