llvm.org GIT mirror llvm / ad818a7
[X86][AVX] Regenerate AVX intrinsics tests on 32 + 64-bit targets git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@316325 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 2 years ago
3 changed file(s) with 464 addition(s) and 566 deletion(s). Raw diff Collapse all Expand all
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X86
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X64
1 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=X86
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=X64
33
44 ; We don't check any vinsertf128 variant with immediate 0 because that's just a blend.
55
66 define <4 x double> @test_x86_avx_vinsertf128_pd_256_1(<4 x double> %a0, <2 x double> %a1) {
7 ; X86-LABEL: test_x86_avx_vinsertf128_pd_256_1:
8 ; X86: # BB#0:
9 ; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
10 ; X86-NEXT: retl
11 ;
12 ; X64-LABEL: test_x86_avx_vinsertf128_pd_256_1:
13 ; X64: # BB#0:
14 ; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
15 ; X64-NEXT: retq
7 ; CHECK-LABEL: test_x86_avx_vinsertf128_pd_256_1:
8 ; CHECK: # BB#0:
9 ; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
10 ; CHECK-NEXT: ret{{[l|q]}}
1611 %res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %a0, <2 x double> %a1, i8 1)
1712 ret <4 x double> %res
1813 }
1914 declare <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double>, <2 x double>, i8) nounwind readnone
2015
2116 define <8 x float> @test_x86_avx_vinsertf128_ps_256_1(<8 x float> %a0, <4 x float> %a1) {
22 ; X86-LABEL: test_x86_avx_vinsertf128_ps_256_1:
23 ; X86: # BB#0:
24 ; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
25 ; X86-NEXT: retl
26 ;
27 ; X64-LABEL: test_x86_avx_vinsertf128_ps_256_1:
28 ; X64: # BB#0:
29 ; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
30 ; X64-NEXT: retq
17 ; CHECK-LABEL: test_x86_avx_vinsertf128_ps_256_1:
18 ; CHECK: # BB#0:
19 ; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
20 ; CHECK-NEXT: ret{{[l|q]}}
3121 %res = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> %a0, <4 x float> %a1, i8 1)
3222 ret <8 x float> %res
3323 }
3424 declare <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float>, <4 x float>, i8) nounwind readnone
3525
3626 define <8 x i32> @test_x86_avx_vinsertf128_si_256_1(<8 x i32> %a0, <4 x i32> %a1) {
37 ; X86-LABEL: test_x86_avx_vinsertf128_si_256_1:
38 ; X86: # BB#0:
39 ; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
40 ; X86-NEXT: retl
41 ;
42 ; X64-LABEL: test_x86_avx_vinsertf128_si_256_1:
43 ; X64: # BB#0:
44 ; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
45 ; X64-NEXT: retq
27 ; CHECK-LABEL: test_x86_avx_vinsertf128_si_256_1:
28 ; CHECK: # BB#0:
29 ; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
30 ; CHECK-NEXT: ret{{[l|q]}}
4631 %res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> %a0, <4 x i32> %a1, i8 1)
4732 ret <8 x i32> %res
4833 }
5136 ; of a vinsertf128 $0 which should be optimized into a blend, so just check that it's
5237 ; not a vinsertf128 $1.
5338 define <8 x i32> @test_x86_avx_vinsertf128_si_256_2(<8 x i32> %a0, <4 x i32> %a1) {
54 ; X86-LABEL: test_x86_avx_vinsertf128_si_256_2:
55 ; X86: # BB#0:
56 ; X86-NEXT: # kill: %XMM1 %XMM1 %YMM1
57 ; X86-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
58 ; X86-NEXT: retl
59 ;
60 ; X64-LABEL: test_x86_avx_vinsertf128_si_256_2:
61 ; X64: # BB#0:
62 ; X64-NEXT: # kill: %XMM1 %XMM1 %YMM1
63 ; X64-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
64 ; X64-NEXT: retq
39 ; CHECK-LABEL: test_x86_avx_vinsertf128_si_256_2:
40 ; CHECK: # BB#0:
41 ; CHECK-NEXT: # kill: %XMM1 %XMM1 %YMM1
42 ; CHECK-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
43 ; CHECK-NEXT: ret{{[l|q]}}
6544 %res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> %a0, <4 x i32> %a1, i8 2)
6645 ret <8 x i32> %res
6746 }
7049 ; We don't check any vextractf128 variant with immediate 0 because that's just a move.
7150
7251 define <2 x double> @test_x86_avx_vextractf128_pd_256_1(<4 x double> %a0) {
73 ; X86-LABEL: test_x86_avx_vextractf128_pd_256_1:
74 ; X86: # BB#0:
75 ; X86-NEXT: vextractf128 $1, %ymm0, %xmm0
76 ; X86-NEXT: vzeroupper
77 ; X86-NEXT: retl
78 ;
79 ; X64-LABEL: test_x86_avx_vextractf128_pd_256_1:
80 ; X64: # BB#0:
81 ; X64-NEXT: vextractf128 $1, %ymm0, %xmm0
82 ; X64-NEXT: vzeroupper
83 ; X64-NEXT: retq
52 ; CHECK-LABEL: test_x86_avx_vextractf128_pd_256_1:
53 ; CHECK: # BB#0:
54 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
55 ; CHECK-NEXT: vzeroupper
56 ; CHECK-NEXT: ret{{[l|q]}}
8457 %res = call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a0, i8 1)
8558 ret <2 x double> %res
8659 }
8760 declare <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double>, i8) nounwind readnone
8861
8962 define <4 x float> @test_x86_avx_vextractf128_ps_256_1(<8 x float> %a0) {
90 ; X86-LABEL: test_x86_avx_vextractf128_ps_256_1:
91 ; X86: # BB#0:
92 ; X86-NEXT: vextractf128 $1, %ymm0, %xmm0
93 ; X86-NEXT: vzeroupper
94 ; X86-NEXT: retl
95 ;
96 ; X64-LABEL: test_x86_avx_vextractf128_ps_256_1:
97 ; X64: # BB#0:
98 ; X64-NEXT: vextractf128 $1, %ymm0, %xmm0
99 ; X64-NEXT: vzeroupper
100 ; X64-NEXT: retq
63 ; CHECK-LABEL: test_x86_avx_vextractf128_ps_256_1:
64 ; CHECK: # BB#0:
65 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
66 ; CHECK-NEXT: vzeroupper
67 ; CHECK-NEXT: ret{{[l|q]}}
10168 %res = call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %a0, i8 1)
10269 ret <4 x float> %res
10370 }
10471 declare <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float>, i8) nounwind readnone
10572
10673 define <4 x i32> @test_x86_avx_vextractf128_si_256_1(<8 x i32> %a0) {
107 ; X86-LABEL: test_x86_avx_vextractf128_si_256_1:
108 ; X86: # BB#0:
109 ; X86-NEXT: vextractf128 $1, %ymm0, %xmm0
110 ; X86-NEXT: vzeroupper
111 ; X86-NEXT: retl
112 ;
113 ; X64-LABEL: test_x86_avx_vextractf128_si_256_1:
114 ; X64: # BB#0:
115 ; X64-NEXT: vextractf128 $1, %ymm0, %xmm0
116 ; X64-NEXT: vzeroupper
117 ; X64-NEXT: retq
74 ; CHECK-LABEL: test_x86_avx_vextractf128_si_256_1:
75 ; CHECK: # BB#0:
76 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
77 ; CHECK-NEXT: vzeroupper
78 ; CHECK-NEXT: ret{{[l|q]}}
11879 %res = call <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32> %a0, i8 1)
11980 ret <4 x i32> %res
12081 }
12485 ; of a vextractf128 $0 which should be optimized away, so just check that it's
12586 ; not a vextractf128 of any kind.
12687 define <2 x double> @test_x86_avx_extractf128_pd_256_2(<4 x double> %a0) {
127 ; X86-LABEL: test_x86_avx_extractf128_pd_256_2:
128 ; X86: # BB#0:
129 ; X86-NEXT: # kill: %XMM0 %XMM0 %YMM0
130 ; X86-NEXT: vzeroupper
131 ; X86-NEXT: retl
132 ;
133 ; X64-LABEL: test_x86_avx_extractf128_pd_256_2:
134 ; X64: # BB#0:
135 ; X64-NEXT: # kill: %XMM0 %XMM0 %YMM0
136 ; X64-NEXT: vzeroupper
137 ; X64-NEXT: retq
88 ; CHECK-LABEL: test_x86_avx_extractf128_pd_256_2:
89 ; CHECK: # BB#0:
90 ; CHECK-NEXT: # kill: %XMM0 %XMM0 %YMM0
91 ; CHECK-NEXT: vzeroupper
92 ; CHECK-NEXT: ret{{[l|q]}}
13893 %res = call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a0, i8 2)
13994 ret <2 x double> %res
14095 }
145100 ; X86: # BB#0:
146101 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
147102 ; X86-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
148 ; X86-NEXT: retl
103 ; X86-NEXT: ret{{[l|q]}}
149104 ;
150105 ; X64-LABEL: test_x86_avx_vbroadcastf128_pd_256:
151106 ; X64: # BB#0:
152107 ; X64-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
153 ; X64-NEXT: retq
108 ; X64-NEXT: ret{{[l|q]}}
154109 %res = call <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8* %a0) ; <<4 x double>> [#uses=1]
155110 ret <4 x double> %res
156111 }
162117 ; X86: # BB#0:
163118 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
164119 ; X86-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
165 ; X86-NEXT: retl
120 ; X86-NEXT: ret{{[l|q]}}
166121 ;
167122 ; X64-LABEL: test_x86_avx_vbroadcastf128_ps_256:
168123 ; X64: # BB#0:
169124 ; X64-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
170 ; X64-NEXT: retq
125 ; X64-NEXT: ret{{[l|q]}}
171126 %res = call <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8* %a0) ; <<8 x float>> [#uses=1]
172127 ret <8 x float> %res
173128 }
175130
176131
177132 define <4 x double> @test_x86_avx_blend_pd_256(<4 x double> %a0, <4 x double> %a1) {
178 ; X86-LABEL: test_x86_avx_blend_pd_256:
179 ; X86: # BB#0:
180 ; X86-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3]
181 ; X86-NEXT: retl
182 ;
183 ; X64-LABEL: test_x86_avx_blend_pd_256:
184 ; X64: # BB#0:
185 ; X64-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3]
186 ; X64-NEXT: retq
133 ; CHECK-LABEL: test_x86_avx_blend_pd_256:
134 ; CHECK: # BB#0:
135 ; CHECK-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3]
136 ; CHECK-NEXT: ret{{[l|q]}}
187137 %res = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %a0, <4 x double> %a1, i32 7) ; <<4 x double>> [#uses=1]
188138 ret <4 x double> %res
189139 }
191141
192142
193143 define <8 x float> @test_x86_avx_blend_ps_256(<8 x float> %a0, <8 x float> %a1) {
194 ; X86-LABEL: test_x86_avx_blend_ps_256:
195 ; X86: # BB#0:
196 ; X86-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7]
197 ; X86-NEXT: retl
198 ;
199 ; X64-LABEL: test_x86_avx_blend_ps_256:
200 ; X64: # BB#0:
201 ; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7]
202 ; X64-NEXT: retq
144 ; CHECK-LABEL: test_x86_avx_blend_ps_256:
145 ; CHECK: # BB#0:
146 ; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7]
147 ; CHECK-NEXT: ret{{[l|q]}}
203148 %res = call <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1]
204149 ret <8 x float> %res
205150 }
207152
208153
209154 define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) {
210 ; X86-LABEL: test_x86_avx_dp_ps_256:
211 ; X86: # BB#0:
212 ; X86-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0
213 ; X86-NEXT: retl
214 ;
215 ; X64-LABEL: test_x86_avx_dp_ps_256:
216 ; X64: # BB#0:
217 ; X64-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0
218 ; X64-NEXT: retq
155 ; CHECK-LABEL: test_x86_avx_dp_ps_256:
156 ; CHECK: # BB#0:
157 ; CHECK-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0
158 ; CHECK-NEXT: ret{{[l|q]}}
219159 %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1]
220160 ret <8 x float> %res
221161 }
223163
224164
225165 define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
226 ; X86-LABEL: test_x86_sse2_psll_dq:
227 ; X86: # BB#0:
228 ; X86-NEXT: vpslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
229 ; X86-NEXT: retl
230 ;
231 ; X64-LABEL: test_x86_sse2_psll_dq:
232 ; X64: # BB#0:
233 ; X64-NEXT: vpslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
234 ; X64-NEXT: retq
166 ; CHECK-LABEL: test_x86_sse2_psll_dq:
167 ; CHECK: # BB#0:
168 ; CHECK-NEXT: vpslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
169 ; CHECK-NEXT: ret{{[l|q]}}
235170 %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1]
236171 ret <2 x i64> %res
237172 }
239174
240175
241176 define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
242 ; X86-LABEL: test_x86_sse2_psrl_dq:
243 ; X86: # BB#0:
244 ; X86-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
245 ; X86-NEXT: retl
246 ;
247 ; X64-LABEL: test_x86_sse2_psrl_dq:
248 ; X64: # BB#0:
249 ; X64-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
250 ; X64-NEXT: retq
177 ; CHECK-LABEL: test_x86_sse2_psrl_dq:
178 ; CHECK: # BB#0:
179 ; CHECK-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
180 ; CHECK-NEXT: ret{{[l|q]}}
251181 %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1]
252182 ret <2 x i64> %res
253183 }
255185
256186
257187 define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) {
258 ; X86-LABEL: test_x86_sse41_blendpd:
259 ; X86: # BB#0:
260 ; X86-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
261 ; X86-NEXT: retl
262 ;
263 ; X64-LABEL: test_x86_sse41_blendpd:
264 ; X64: # BB#0:
265 ; X64-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
266 ; X64-NEXT: retq
188 ; CHECK-LABEL: test_x86_sse41_blendpd:
189 ; CHECK: # BB#0:
190 ; CHECK-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
191 ; CHECK-NEXT: ret{{[l|q]}}
267192 %res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i8 2) ; <<2 x double>> [#uses=1]
268193 ret <2 x double> %res
269194 }
271196
272197
273198 define <4 x float> @test_x86_sse41_blendps(<4 x float> %a0, <4 x float> %a1) {
274 ; X86-LABEL: test_x86_sse41_blendps:
275 ; X86: # BB#0:
276 ; X86-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
277 ; X86-NEXT: retl
278 ;
279 ; X64-LABEL: test_x86_sse41_blendps:
280 ; X64: # BB#0:
281 ; X64-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
282 ; X64-NEXT: retq
199 ; CHECK-LABEL: test_x86_sse41_blendps:
200 ; CHECK: # BB#0:
201 ; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
202 ; CHECK-NEXT: ret{{[l|q]}}
283203 %res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
284204 ret <4 x float> %res
285205 }
287207
288208
289209 define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
290 ; X86-LABEL: test_x86_sse41_pblendw:
291 ; X86: # BB#0:
292 ; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3,4,5,6,7]
293 ; X86-NEXT: retl
294 ;
295 ; X64-LABEL: test_x86_sse41_pblendw:
296 ; X64: # BB#0:
297 ; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3,4,5,6,7]
298 ; X64-NEXT: retq
210 ; CHECK-LABEL: test_x86_sse41_pblendw:
211 ; CHECK: # BB#0:
212 ; CHECK-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3,4,5,6,7]
213 ; CHECK-NEXT: ret{{[l|q]}}
299214 %res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i8 7) ; <<8 x i16>> [#uses=1]
300215 ret <8 x i16> %res
301216 }
303218
304219
305220 define <4 x i32> @test_x86_sse41_pmovsxbd(<16 x i8> %a0) {
306 ; X86-LABEL: test_x86_sse41_pmovsxbd:
307 ; X86: # BB#0:
308 ; X86-NEXT: vpmovsxbd %xmm0, %xmm0
309 ; X86-NEXT: retl
310 ;
311 ; X64-LABEL: test_x86_sse41_pmovsxbd:
312 ; X64: # BB#0:
313 ; X64-NEXT: vpmovsxbd %xmm0, %xmm0
314 ; X64-NEXT: retq
221 ; CHECK-LABEL: test_x86_sse41_pmovsxbd:
222 ; CHECK: # BB#0:
223 ; CHECK-NEXT: vpmovsxbd %xmm0, %xmm0
224 ; CHECK-NEXT: ret{{[l|q]}}
315225 %res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
316226 ret <4 x i32> %res
317227 }
319229
320230
321231 define <2 x i64> @test_x86_sse41_pmovsxbq(<16 x i8> %a0) {
322 ; X86-LABEL: test_x86_sse41_pmovsxbq:
323 ; X86: # BB#0:
324 ; X86-NEXT: vpmovsxbq %xmm0, %xmm0
325 ; X86-NEXT: retl
326 ;
327 ; X64-LABEL: test_x86_sse41_pmovsxbq:
328 ; X64: # BB#0:
329 ; X64-NEXT: vpmovsxbq %xmm0, %xmm0
330 ; X64-NEXT: retq
232 ; CHECK-LABEL: test_x86_sse41_pmovsxbq:
233 ; CHECK: # BB#0:
234 ; CHECK-NEXT: vpmovsxbq %xmm0, %xmm0
235 ; CHECK-NEXT: ret{{[l|q]}}
331236 %res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
332237 ret <2 x i64> %res
333238 }
335240
336241
337242 define <8 x i16> @test_x86_sse41_pmovsxbw(<16 x i8> %a0) {
338 ; X86-LABEL: test_x86_sse41_pmovsxbw:
339 ; X86: # BB#0:
340 ; X86-NEXT: vpmovsxbw %xmm0, %xmm0
341 ; X86-NEXT: retl
342 ;
343 ; X64-LABEL: test_x86_sse41_pmovsxbw:
344 ; X64: # BB#0:
345 ; X64-NEXT: vpmovsxbw %xmm0, %xmm0
346 ; X64-NEXT: retq
243 ; CHECK-LABEL: test_x86_sse41_pmovsxbw:
244 ; CHECK: # BB#0:
245 ; CHECK-NEXT: vpmovsxbw %xmm0, %xmm0
246 ; CHECK-NEXT: ret{{[l|q]}}
347247 %res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
348248 ret <8 x i16> %res
349249 }
351251
352252
353253 define <2 x i64> @test_x86_sse41_pmovsxdq(<4 x i32> %a0) {
354 ; X86-LABEL: test_x86_sse41_pmovsxdq:
355 ; X86: # BB#0:
356 ; X86-NEXT: vpmovsxdq %xmm0, %xmm0
357 ; X86-NEXT: retl
358 ;
359 ; X64-LABEL: test_x86_sse41_pmovsxdq:
360 ; X64: # BB#0:
361 ; X64-NEXT: vpmovsxdq %xmm0, %xmm0
362 ; X64-NEXT: retq
254 ; CHECK-LABEL: test_x86_sse41_pmovsxdq:
255 ; CHECK: # BB#0:
256 ; CHECK-NEXT: vpmovsxdq %xmm0, %xmm0
257 ; CHECK-NEXT: ret{{[l|q]}}
363258 %res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
364259 ret <2 x i64> %res
365260 }
367262
368263
369264 define <4 x i32> @test_x86_sse41_pmovsxwd(<8 x i16> %a0) {
370 ; X86-LABEL: test_x86_sse41_pmovsxwd:
371 ; X86: # BB#0:
372 ; X86-NEXT: vpmovsxwd %xmm0, %xmm0
373 ; X86-NEXT: retl
374 ;
375 ; X64-LABEL: test_x86_sse41_pmovsxwd:
376 ; X64: # BB#0:
377 ; X64-NEXT: vpmovsxwd %xmm0, %xmm0
378 ; X64-NEXT: retq
265 ; CHECK-LABEL: test_x86_sse41_pmovsxwd:
266 ; CHECK: # BB#0:
267 ; CHECK-NEXT: vpmovsxwd %xmm0, %xmm0
268 ; CHECK-NEXT: ret{{[l|q]}}
379269 %res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
380270 ret <4 x i32> %res
381271 }
383273
384274
385275 define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) {
386 ; X86-LABEL: test_x86_sse41_pmovsxwq:
387 ; X86: # BB#0:
388 ; X86-NEXT: vpmovsxwq %xmm0, %xmm0
389 ; X86-NEXT: retl
390 ;
391 ; X64-LABEL: test_x86_sse41_pmovsxwq:
392 ; X64: # BB#0:
393 ; X64-NEXT: vpmovsxwq %xmm0, %xmm0
394 ; X64-NEXT: retq
276 ; CHECK-LABEL: test_x86_sse41_pmovsxwq:
277 ; CHECK: # BB#0:
278 ; CHECK-NEXT: vpmovsxwq %xmm0, %xmm0
279 ; CHECK-NEXT: ret{{[l|q]}}
395280 %res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
396281 ret <2 x i64> %res
397282 }
399284
400285
401286 define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) {
402 ; X86-LABEL: test_x86_sse41_pmovzxbd:
403 ; X86: # BB#0:
404 ; X86-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
405 ; X86-NEXT: retl
406 ;
407 ; X64-LABEL: test_x86_sse41_pmovzxbd:
408 ; X64: # BB#0:
409 ; X64-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
410 ; X64-NEXT: retq
287 ; CHECK-LABEL: test_x86_sse41_pmovzxbd:
288 ; CHECK: # BB#0:
289 ; CHECK-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
290 ; CHECK-NEXT: ret{{[l|q]}}
411291 %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
412292 ret <4 x i32> %res
413293 }
415295
416296
417297 define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) {
418 ; X86-LABEL: test_x86_sse41_pmovzxbq:
419 ; X86: # BB#0:
420 ; X86-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
421 ; X86-NEXT: retl
422 ;
423 ; X64-LABEL: test_x86_sse41_pmovzxbq:
424 ; X64: # BB#0:
425 ; X64-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
426 ; X64-NEXT: retq
298 ; CHECK-LABEL: test_x86_sse41_pmovzxbq:
299 ; CHECK: # BB#0:
300 ; CHECK-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
301 ; CHECK-NEXT: ret{{[l|q]}}
427302 %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
428303 ret <2 x i64> %res
429304 }
431306
432307
433308 define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) {
434 ; X86-LABEL: test_x86_sse41_pmovzxbw:
435 ; X86: # BB#0:
436 ; X86-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
437 ; X86-NEXT: retl
438 ;
439 ; X64-LABEL: test_x86_sse41_pmovzxbw:
440 ; X64: # BB#0:
441 ; X64-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
442 ; X64-NEXT: retq
309 ; CHECK-LABEL: test_x86_sse41_pmovzxbw:
310 ; CHECK: # BB#0:
311 ; CHECK-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
312 ; CHECK-NEXT: ret{{[l|q]}}
443313 %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
444314 ret <8 x i16> %res
445315 }
447317
448318
449319 define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) {
450 ; X86-LABEL: test_x86_sse41_pmovzxdq:
451 ; X86: # BB#0:
452 ; X86-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
453 ; X86-NEXT: retl
454 ;
455 ; X64-LABEL: test_x86_sse41_pmovzxdq:
456 ; X64: # BB#0:
457 ; X64-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
458 ; X64-NEXT: retq
320 ; CHECK-LABEL: test_x86_sse41_pmovzxdq:
321 ; CHECK: # BB#0:
322 ; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
323 ; CHECK-NEXT: ret{{[l|q]}}
459324 %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
460325 ret <2 x i64> %res
461326 }
463328
464329
465330 define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) {
466 ; X86-LABEL: test_x86_sse41_pmovzxwd:
467 ; X86: # BB#0:
468 ; X86-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
469 ; X86-NEXT: retl
470 ;
471 ; X64-LABEL: test_x86_sse41_pmovzxwd:
472 ; X64: # BB#0:
473 ; X64-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
474 ; X64-NEXT: retq
331 ; CHECK-LABEL: test_x86_sse41_pmovzxwd:
332 ; CHECK: # BB#0:
333 ; CHECK-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
334 ; CHECK-NEXT: ret{{[l|q]}}
475335 %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
476336 ret <4 x i32> %res
477337 }
479339
480340
481341 define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) {
482 ; X86-LABEL: test_x86_sse41_pmovzxwq:
483 ; X86: # BB#0:
484 ; X86-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
485 ; X86-NEXT: retl
486 ;
487 ; X64-LABEL: test_x86_sse41_pmovzxwq:
488 ; X64: # BB#0:
489 ; X64-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
490 ; X64-NEXT: retq
342 ; CHECK-LABEL: test_x86_sse41_pmovzxwq:
343 ; CHECK: # BB#0:
344 ; CHECK-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
345 ; CHECK-NEXT: ret{{[l|q]}}
491346 %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
492347 ret <2 x i64> %res
493348 }
495350
496351
497352 define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) {
498 ; X86-LABEL: test_x86_sse2_cvtdq2pd:
499 ; X86: # BB#0:
500 ; X86-NEXT: vcvtdq2pd %xmm0, %xmm0
501 ; X86-NEXT: retl
502 ;
503 ; X64-LABEL: test_x86_sse2_cvtdq2pd:
504 ; X64: # BB#0:
505 ; X64-NEXT: vcvtdq2pd %xmm0, %xmm0
506 ; X64-NEXT: retq
353 ; CHECK-LABEL: test_x86_sse2_cvtdq2pd:
354 ; CHECK: # BB#0:
355 ; CHECK-NEXT: vcvtdq2pd %xmm0, %xmm0
356 ; CHECK-NEXT: ret{{[l|q]}}
507357 %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1]
508358 ret <2 x double> %res
509359 }
511361
512362
513363 define <4 x double> @test_x86_avx_cvtdq2_pd_256(<4 x i32> %a0) {
514 ; X86-LABEL: test_x86_avx_cvtdq2_pd_256:
515 ; X86: # BB#0:
516 ; X86-NEXT: vcvtdq2pd %xmm0, %ymm0
517 ; X86-NEXT: retl
518 ;
519 ; X64-LABEL: test_x86_avx_cvtdq2_pd_256:
520 ; X64: # BB#0:
521 ; X64-NEXT: vcvtdq2pd %xmm0, %ymm0
522 ; X64-NEXT: retq
364 ; CHECK-LABEL: test_x86_avx_cvtdq2_pd_256:
365 ; CHECK: # BB#0:
366 ; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm0
367 ; CHECK-NEXT: ret{{[l|q]}}
523368 %res = call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %a0) ; <<4 x double>> [#uses=1]
524369 ret <4 x double> %res
525370 }
527372
528373
529374 define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) {
530 ; X86-LABEL: test_x86_sse2_cvtps2pd:
531 ; X86: # BB#0:
532 ; X86-NEXT: vcvtps2pd %xmm0, %xmm0
533 ; X86-NEXT: retl
534 ;
535 ; X64-LABEL: test_x86_sse2_cvtps2pd:
536 ; X64: # BB#0:
537 ; X64-NEXT: vcvtps2pd %xmm0, %xmm0
538 ; X64-NEXT: retq
375 ; CHECK-LABEL: test_x86_sse2_cvtps2pd:
376 ; CHECK: # BB#0:
377 ; CHECK-NEXT: vcvtps2pd %xmm0, %xmm0
378 ; CHECK-NEXT: ret{{[l|q]}}
539379 %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1]
540380 ret <2 x double> %res
541381 }
543383
544384
545385 define <4 x double> @test_x86_avx_cvt_ps2_pd_256(<4 x float> %a0) {
546 ; X86-LABEL: test_x86_avx_cvt_ps2_pd_256:
547 ; X86: # BB#0:
548 ; X86-NEXT: vcvtps2pd %xmm0, %ymm0
549 ; X86-NEXT: retl
550 ;
551 ; X64-LABEL: test_x86_avx_cvt_ps2_pd_256:
552 ; X64: # BB#0:
553 ; X64-NEXT: vcvtps2pd %xmm0, %ymm0
554 ; X64-NEXT: retq
386 ; CHECK-LABEL: test_x86_avx_cvt_ps2_pd_256:
387 ; CHECK: # BB#0:
388 ; CHECK-NEXT: vcvtps2pd %xmm0, %ymm0
389 ; CHECK-NEXT: ret{{[l|q]}}
555390 %res = call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %a0) ; <<4 x double>> [#uses=1]
556391 ret <4 x double> %res
557392 }
566401 ; X86-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
567402 ; X86-NEXT: vpsubb %xmm1, %xmm0, %xmm0
568403 ; X86-NEXT: vmovdqu %xmm0, (%eax)
569 ; X86-NEXT: retl
404 ; X86-NEXT: ret{{[l|q]}}
570405 ;
571406 ; X64-LABEL: test_x86_sse2_storeu_dq:
572407 ; X64: # BB#0:
573408 ; X64-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
574409 ; X64-NEXT: vpsubb %xmm1, %xmm0, %xmm0
575410 ; X64-NEXT: vmovdqu %xmm0, (%rdi)
576 ; X64-NEXT: retq
411 ; X64-NEXT: ret{{[l|q]}}
577412 %a2 = add <16 x i8> %a1,
578413 call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a2)
579414 ret void
590425 ; X86-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
591426 ; X86-NEXT: vaddpd %xmm1, %xmm0, %xmm0
592427 ; X86-NEXT: vmovupd %xmm0, (%eax)
593 ; X86-NEXT: retl
428 ; X86-NEXT: ret{{[l|q]}}
594429 ;
595430 ; X64-LABEL: test_x86_sse2_storeu_pd:
596431 ; X64: # BB#0:
598433 ; X64-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
599434 ; X64-NEXT: vaddpd %xmm1, %xmm0, %xmm0
600435 ; X64-NEXT: vmovupd %xmm0, (%rdi)
601 ; X64-NEXT: retq
436 ; X64-NEXT: ret{{[l|q]}}
602437 %a2 = fadd <2 x double> %a1,
603438 call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2)
604439 ret void
611446 ; X86: # BB#0:
612447 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
613448 ; X86-NEXT: vmovups %xmm0, (%eax)
614 ; X86-NEXT: retl
449 ; X86-NEXT: ret{{[l|q]}}
615450 ;
616451 ; X64-LABEL: test_x86_sse_storeu_ps:
617452 ; X64: # BB#0:
618453 ; X64-NEXT: vmovups %xmm0, (%rdi)
619 ; X64-NEXT: retq
454 ; X64-NEXT: ret{{[l|q]}}
620455 call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1)
621456 ret void
622457 }
636471 ; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
637472 ; X86-NEXT: vmovups %ymm0, (%eax)
638473 ; X86-NEXT: vzeroupper
639 ; X86-NEXT: retl
474 ; X86-NEXT: ret{{[l|q]}}
640475 ;
641476 ; X64-LABEL: test_x86_avx_storeu_dq_256:
642477 ; X64: # BB#0:
647482 ; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
648483 ; X64-NEXT: vmovups %ymm0, (%rdi)
649484 ; X64-NEXT: vzeroupper
650 ; X64-NEXT: retq
485 ; X64-NEXT: ret{{[l|q]}}
651486 %a2 = add <32 x i8> %a1,
652487 call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2)
653488 ret void
664499 ; X86-NEXT: vaddpd %ymm1, %ymm0, %ymm0
665500 ; X86-NEXT: vmovupd %ymm0, (%eax)
666501 ; X86-NEXT: vzeroupper
667 ; X86-NEXT: retl
502 ; X86-NEXT: ret{{[l|q]}}
668503 ;
669504 ; X64-LABEL: test_x86_avx_storeu_pd_256:
670505 ; X64: # BB#0:
672507 ; X64-NEXT: vaddpd %ymm1, %ymm0, %ymm0
673508 ; X64-NEXT: vmovupd %ymm0, (%rdi)
674509 ; X64-NEXT: vzeroupper
675 ; X64-NEXT: retq
510 ; X64-NEXT: ret{{[l|q]}}
676511 %a2 = fadd <4 x double> %a1,
677512 call void @llvm.x86.avx.storeu.pd.256(i8* %a0, <4 x double> %a2)
678513 ret void
686521 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
687522 ; X86-NEXT: vmovups %ymm0, (%eax)
688523 ; X86-NEXT: vzeroupper
689 ; X86-NEXT: retl
524 ; X86-NEXT: ret{{[l|q]}}
690525 ;
691526 ; X64-LABEL: test_x86_avx_storeu_ps_256:
692527 ; X64: # BB#0:
693528 ; X64-NEXT: vmovups %ymm0, (%rdi)
694529 ; X64-NEXT: vzeroupper
695 ; X64-NEXT: retq
530 ; X64-NEXT: ret{{[l|q]}}
696531 call void @llvm.x86.avx.storeu.ps.256(i8* %a0, <8 x float> %a1)
697532 ret void
698533 }
700535
701536
702537 define <2 x double> @test_x86_avx_vpermil_pd(<2 x double> %a0) {
703 ; X86-LABEL: test_x86_avx_vpermil_pd:
704 ; X86: # BB#0:
705 ; X86-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
706 ; X86-NEXT: retl
707 ;
708 ; X64-LABEL: test_x86_avx_vpermil_pd:
709 ; X64: # BB#0:
710 ; X64-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
711 ; X64-NEXT: retq
538 ; CHECK-LABEL: test_x86_avx_vpermil_pd:
539 ; CHECK: # BB#0:
540 ; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
541 ; CHECK-NEXT: ret{{[l|q]}}
712542 %res = call <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double> %a0, i8 1) ; <<2 x double>> [#uses=1]
713543 ret <2 x double> %res
714544 }
716546
717547
718548 define <4 x double> @test_x86_avx_vpermil_pd_256(<4 x double> %a0) {
719 ; X86-LABEL: test_x86_avx_vpermil_pd_256:
720 ; X86: # BB#0:
721 ; X86-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,2]
722 ; X86-NEXT: retl
723 ;
724 ; X64-LABEL: test_x86_avx_vpermil_pd_256:
725 ; X64: # BB#0:
726 ; X64-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,2]
727 ; X64-NEXT: retq
549 ; CHECK-LABEL: test_x86_avx_vpermil_pd_256:
550 ; CHECK: # BB#0:
551 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,2]
552 ; CHECK-NEXT: ret{{[l|q]}}
728553 %res = call <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double> %a0, i8 7) ; <<4 x double>> [#uses=1]
729554 ret <4 x double> %res
730555 }
732557
733558
734559 define <4 x float> @test_x86_avx_vpermil_ps(<4 x float> %a0) {
735 ; X86-LABEL: test_x86_avx_vpermil_ps:
736 ; X86: # BB#0:
737 ; X86-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,0,0]
738 ; X86-NEXT: retl
739 ;
740 ; X64-LABEL: test_x86_avx_vpermil_ps:
741 ; X64: # BB#0:
742 ; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,0,0]
743 ; X64-NEXT: retq
560 ; CHECK-LABEL: test_x86_avx_vpermil_ps:
561 ; CHECK: # BB#0:
562 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,0,0]
563 ; CHECK-NEXT: ret{{[l|q]}}
744564 %res = call <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float> %a0, i8 7) ; <<4 x float>> [#uses=1]
745565 ret <4 x float> %res
746566 }
748568
749569
750570 define <8 x float> @test_x86_avx_vpermil_ps_256(<8 x float> %a0) {
751 ; X86-LABEL: test_x86_avx_vpermil_ps_256:
752 ; X86: # BB#0:
753 ; X86-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,1,0,0,7,5,4,4]
754 ; X86-NEXT: retl
755 ;
756 ; X64-LABEL: test_x86_avx_vpermil_ps_256:
757 ; X64: # BB#0:
758 ; X64-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,1,0,0,7,5,4,4]
759 ; X64-NEXT: retq
571 ; CHECK-LABEL: test_x86_avx_vpermil_ps_256:
572 ; CHECK: # BB#0:
573 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,1,0,0,7,5,4,4]
574 ; CHECK-NEXT: ret{{[l|q]}}
760575 %res = call <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float> %a0, i8 7) ; <<8 x float>> [#uses=1]
761576 ret <8 x float> %res
762577 }
764579
765580
766581 define <4 x double> @test_x86_avx_vperm2f128_pd_256(<4 x double> %a0, <4 x double> %a1) {
767 ; X86-LABEL: test_x86_avx_vperm2f128_pd_256:
768 ; X86: # BB#0:
769 ; X86-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
770 ; X86-NEXT: retl
771 ;
772 ; X64-LABEL: test_x86_avx_vperm2f128_pd_256:
773 ; X64: # BB#0:
774 ; X64-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
775 ; X64-NEXT: retq
582 ; CHECK-LABEL: test_x86_avx_vperm2f128_pd_256:
583 ; CHECK: # BB#0:
584 ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
585 ; CHECK-NEXT: ret{{[l|q]}}
776586 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 3) ; <<4 x double>> [#uses=1]
777587 ret <4 x double> %res
778588 }
780590
781591
782592 define <8 x float> @test_x86_avx_vperm2f128_ps_256(<8 x float> %a0, <8 x float> %a1) {
783 ; X86-LABEL: test_x86_avx_vperm2f128_ps_256:
784 ; X86: # BB#0:
785 ; X86-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
786 ; X86-NEXT: retl
787 ;
788 ; X64-LABEL: test_x86_avx_vperm2f128_ps_256:
789 ; X64: # BB#0:
790 ; X64-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
791 ; X64-NEXT: retq
593 ; CHECK-LABEL: test_x86_avx_vperm2f128_ps_256:
594 ; CHECK: # BB#0:
595 ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
596 ; CHECK-NEXT: ret{{[l|q]}}
792597 %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 3) ; <<8 x float>> [#uses=1]
793598 ret <8 x float> %res
794599 }
796601
797602
798603 define <8 x i32> @test_x86_avx_vperm2f128_si_256(<8 x i32> %a0, <8 x i32> %a1) {
799 ; X86-LABEL: test_x86_avx_vperm2f128_si_256:
800 ; X86: # BB#0:
801 ; X86-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
802 ; X86-NEXT: retl
803 ;
804 ; X64-LABEL: test_x86_avx_vperm2f128_si_256:
805 ; X64: # BB#0:
806 ; X64-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
807 ; X64-NEXT: retq
604 ; CHECK-LABEL: test_x86_avx_vperm2f128_si_256:
605 ; CHECK: # BB#0:
606 ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
607 ; CHECK-NEXT: ret{{[l|q]}}
808608 %res = call <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32> %a0, <8 x i32> %a1, i8 3) ; <<8 x i32>> [#uses=1]
809609 ret <8 x i32> %res
810610 }
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx,pclmul -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mcpu=skx -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512VL
1 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx,pclmul -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=X86 --check-prefix=X86-AVX
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mcpu=skx -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512VL --check-prefix=X86 --check-prefix=X86-AVX512VL
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx,pclmul -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=X64 --check-prefix=X64-AVX
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512VL --check-prefix=X64 --check-prefix=X64-AVX512VL
35
46 define <4 x double> @test_x86_avx_addsub_pd_256(<4 x double> %a0, <4 x double> %a1) {
57 ; CHECK-LABEL: test_x86_avx_addsub_pd_256:
68 ; CHECK: # BB#0:
79 ; CHECK-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xd0,0xc1]
8 ; CHECK-NEXT: retl # encoding: [0xc3]
10 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
911 %res = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
1012 ret <4 x double> %res
1113 }
1618 ; CHECK-LABEL: test_x86_avx_addsub_ps_256:
1719 ; CHECK: # BB#0:
1820 ; CHECK-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xff,0xd0,0xc1]
19 ; CHECK-NEXT: retl # encoding: [0xc3]
21 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2022 %res = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
2123 ret <8 x float> %res
2224 }
2729 ; CHECK-LABEL: test_x86_avx_blendv_pd_256:
2830 ; CHECK: # BB#0:
2931 ; CHECK-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x4b,0xc1,0x20]
30 ; CHECK-NEXT: retl # encoding: [0xc3]
32 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3133 %res = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) ; <<4 x double>> [#uses=1]
3234 ret <4 x double> %res
3335 }
3840 ; CHECK-LABEL: test_x86_avx_blendv_ps_256:
3941 ; CHECK: # BB#0:
4042 ; CHECK-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x4a,0xc1,0x20]
41 ; CHECK-NEXT: retl # encoding: [0xc3]
43 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
4244 %res = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) ; <<8 x float>> [#uses=1]
4345 ret <8 x float> %res
4446 }
4951 ; CHECK-LABEL: test_x86_avx_cmp_pd_256:
5052 ; CHECK: # BB#0:
5153 ; CHECK-NEXT: vcmpordpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xc2,0xc1,0x07]
52 ; CHECK-NEXT: retl # encoding: [0xc3]
54 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5355 %res = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
5456 ret <4 x double> %res
5557 }
6062 ; CHECK-LABEL: test_x86_avx_cmp_ps_256:
6163 ; CHECK: # BB#0:
6264 ; CHECK-NEXT: vcmpordps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0xc2,0xc1,0x07]
63 ; CHECK-NEXT: retl # encoding: [0xc3]
65 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6466 %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
6567 ret <8 x float> %res
6668 }
100102 ; CHECK-NEXT: vcmpge_oqps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x1d]
101103 ; CHECK-NEXT: vcmpgt_oqps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x1e]
102104 ; CHECK-NEXT: vcmptrue_usps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0xc2,0xc1,0x1f]
103 ; CHECK-NEXT: retl # encoding: [0xc3]
105 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
104106 %a2 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 0) ; <<8 x float>> [#uses=1]
105107 %a3 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a2, i8 1) ; <<8 x float>> [#uses=1]
106108 %a4 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a3, i8 2) ; <<8 x float>> [#uses=1]
143145 ; AVX: # BB#0:
144146 ; AVX-NEXT: vcvtpd2ps %ymm0, %xmm0 # encoding: [0xc5,0xfd,0x5a,0xc0]
145147 ; AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
146 ; AVX-NEXT: retl # encoding: [0xc3]
148 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
147149 ;
148150 ; AVX512VL-LABEL: test_x86_avx_cvt_pd2_ps_256:
149151 ; AVX512VL: # BB#0:
150152 ; AVX512VL-NEXT: vcvtpd2ps %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x5a,0xc0]
151153 ; AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
152 ; AVX512VL-NEXT: retl # encoding: [0xc3]
154 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
153155 %res = call <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> %a0) ; <<4 x float>> [#uses=1]
154156 ret <4 x float> %res
155157 }
161163 ; AVX: # BB#0:
162164 ; AVX-NEXT: vcvtpd2dq %ymm0, %xmm0 # encoding: [0xc5,0xff,0xe6,0xc0]
163165 ; AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
164 ; AVX-NEXT: retl # encoding: [0xc3]
166 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
165167 ;
166168 ; AVX512VL-LABEL: test_x86_avx_cvt_pd2dq_256:
167169 ; AVX512VL: # BB#0:
168170 ; AVX512VL-NEXT: vcvtpd2dq %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xff,0xe6,0xc0]
169171 ; AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
170 ; AVX512VL-NEXT: retl # encoding: [0xc3]
172 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
171173 %res = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1]
172174 ret <4 x i32> %res
173175 }
178180 ; CHECK-LABEL: test_x86_avx_cvt_ps2dq_256:
179181 ; CHECK: # BB#0:
180182 ; CHECK-NEXT: vcvtps2dq %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x5b,0xc0]
181 ; CHECK-NEXT: retl # encoding: [0xc3]
183 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
182184 %res = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1]
183185 ret <8 x i32> %res
184186 }
189191 ; AVX-LABEL: test_x86_avx_cvtdq2_ps_256:
190192 ; AVX: # BB#0:
191193 ; AVX-NEXT: vcvtdq2ps %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x5b,0xc0]
192 ; AVX-NEXT: retl # encoding: [0xc3]
194 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
193195 ;
194196 ; AVX512VL-LABEL: test_x86_avx_cvtdq2_ps_256:
195197 ; AVX512VL: # BB#0:
196198 ; AVX512VL-NEXT: vcvtdq2ps %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5b,0xc0]
197 ; AVX512VL-NEXT: retl # encoding: [0xc3]
199 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
198200 %res = call <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32> %a0) ; <<8 x float>> [#uses=1]
199201 ret <8 x float> %res
200202 }
206208 ; AVX: # BB#0:
207209 ; AVX-NEXT: vcvttpd2dq %ymm0, %xmm0 # encoding: [0xc5,0xfd,0xe6,0xc0]
208210 ; AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
209 ; AVX-NEXT: retl # encoding: [0xc3]
211 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
210212 ;
211213 ; AVX512VL-LABEL: test_x86_avx_cvtt_pd2dq_256:
212214 ; AVX512VL: # BB#0:
213215 ; AVX512VL-NEXT: vcvttpd2dq %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe6,0xc0]
214216 ; AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
215 ; AVX512VL-NEXT: retl # encoding: [0xc3]
217 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
216218 %res = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1]
217219 ret <4 x i32> %res
218220 }
223225 ; AVX-LABEL: test_x86_avx_cvtt_ps2dq_256:
224226 ; AVX: # BB#0:
225227 ; AVX-NEXT: vcvttps2dq %ymm0, %ymm0 # encoding: [0xc5,0xfe,0x5b,0xc0]
226 ; AVX-NEXT: retl # encoding: [0xc3]
228 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
227229 ;
228230 ; AVX512VL-LABEL: test_x86_avx_cvtt_ps2dq_256:
229231 ; AVX512VL: # BB#0:
230232 ; AVX512VL-NEXT: vcvttps2dq %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x5b,0xc0]
231 ; AVX512VL-NEXT: retl # encoding: [0xc3]
233 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
232234 %res = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1]
233235 ret <8 x i32> %res
234236 }
239241 ; CHECK-LABEL: test_x86_avx_dp_ps_256:
240242 ; CHECK: # BB#0:
241243 ; CHECK-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x40,0xc1,0x07]
242 ; CHECK-NEXT: retl # encoding: [0xc3]
244 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
243245 %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
244246 ret <8 x float> %res
245247 }
250252 ; CHECK-LABEL: test_x86_avx_hadd_pd_256:
251253 ; CHECK: # BB#0:
252254 ; CHECK-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x7c,0xc1]
253 ; CHECK-NEXT: retl # encoding: [0xc3]
255 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
254256 %res = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
255257 ret <4 x double> %res
256258 }
261263 ; CHECK-LABEL: test_x86_avx_hadd_ps_256:
262264 ; CHECK: # BB#0:
263265 ; CHECK-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xff,0x7c,0xc1]
264 ; CHECK-NEXT: retl # encoding: [0xc3]
266 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
265267 %res = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
266268 ret <8 x float> %res
267269 }
272274 ; CHECK-LABEL: test_x86_avx_hsub_pd_256:
273275 ; CHECK: # BB#0:
274276 ; CHECK-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x7d,0xc1]
275 ; CHECK-NEXT: retl # encoding: [0xc3]
277 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
276278 %res = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
277279 ret <4 x double> %res
278280 }
283285 ; CHECK-LABEL: test_x86_avx_hsub_ps_256:
284286 ; CHECK: # BB#0:
285287 ; CHECK-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xff,0x7d,0xc1]
286 ; CHECK-NEXT: retl # encoding: [0xc3]
288 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
287289 %res = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
288290 ret <8 x float> %res
289291 }
291293
292294
293295 define <32 x i8> @test_x86_avx_ldu_dq_256(i8* %a0) {
294 ; CHECK-LABEL: test_x86_avx_ldu_dq_256:
295 ; CHECK: # BB#0:
296 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
297 ; CHECK-NEXT: vlddqu (%eax), %ymm0 # encoding: [0xc5,0xff,0xf0,0x00]
298 ; CHECK-NEXT: retl # encoding: [0xc3]
296 ; X86-LABEL: test_x86_avx_ldu_dq_256:
297 ; X86: # BB#0:
298 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
299 ; X86-NEXT: vlddqu (%eax), %ymm0 # encoding: [0xc5,0xff,0xf0,0x00]
300 ; X86-NEXT: ret{{[l|q]}} # encoding: [0xc3]
301 ;
302 ; X64-LABEL: test_x86_avx_ldu_dq_256:
303 ; X64: # BB#0:
304 ; X64-NEXT: vlddqu (%rdi), %ymm0 # encoding: [0xc5,0xff,0xf0,0x07]
305 ; X64-NEXT: ret{{[l|q]}} # encoding: [0xc3]
299306 %res = call <32 x i8> @llvm.x86.avx.ldu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1]
300307 ret <32 x i8> %res
301308 }
303310
304311
305312 define <2 x double> @test_x86_avx_maskload_pd(i8* %a0, <2 x i64> %mask) {
306 ; CHECK-LABEL: test_x86_avx_maskload_pd:
307 ; CHECK: # BB#0:
308 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
309 ; CHECK-NEXT: vmaskmovpd (%eax), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x2d,0x00]
310 ; CHECK-NEXT: retl # encoding: [0xc3]
313 ; X86-LABEL: test_x86_avx_maskload_pd:
314 ; X86: # BB#0:
315 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
316 ; X86-NEXT: vmaskmovpd (%eax), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x2d,0x00]
317 ; X86-NEXT: ret{{[l|q]}} # encoding: [0xc3]
318 ;
319 ; X64-LABEL: test_x86_avx_maskload_pd:
320 ; X64: # BB#0:
321 ; X64-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x2d,0x07]
322 ; X64-NEXT: ret{{[l|q]}} # encoding: [0xc3]
311323 %res = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %a0, <2 x i64> %mask) ; <<2 x double>> [#uses=1]
312324 ret <2 x double> %res
313325 }
315327
316328
317329 define <4 x double> @test_x86_avx_maskload_pd_256(i8* %a0, <4 x i64> %mask) {
318 ; CHECK-LABEL: test_x86_avx_maskload_pd_256:
319 ; CHECK: # BB#0:
320 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
321 ; CHECK-NEXT: vmaskmovpd (%eax), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x2d,0x00]
322 ; CHECK-NEXT: retl # encoding: [0xc3]
330 ; X86-LABEL: test_x86_avx_maskload_pd_256:
331 ; X86: # BB#0:
332 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
333 ; X86-NEXT: vmaskmovpd (%eax), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x2d,0x00]
334 ; X86-NEXT: ret{{[l|q]}} # encoding: [0xc3]
335 ;
336 ; X64-LABEL: test_x86_avx_maskload_pd_256:
337 ; X64: # BB#0:
338 ; X64-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x2d,0x07]
339 ; X64-NEXT: ret{{[l|q]}} # encoding: [0xc3]
323340 %res = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %a0, <4 x i64> %mask) ; <<4 x double>> [#uses=1]
324341 ret <4 x double> %res
325342 }
327344
328345
329346 define <4 x float> @test_x86_avx_maskload_ps(i8* %a0, <4 x i32> %mask) {
330 ; CHECK-LABEL: test_x86_avx_maskload_ps:
331 ; CHECK: # BB#0:
332 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
333 ; CHECK-NEXT: vmaskmovps (%eax), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x2c,0x00]
334 ; CHECK-NEXT: retl # encoding: [0xc3]
347 ; X86-LABEL: test_x86_avx_maskload_ps:
348 ; X86: # BB#0:
349 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
350 ; X86-NEXT: vmaskmovps (%eax), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x2c,0x00]
351 ; X86-NEXT: ret{{[l|q]}} # encoding: [0xc3]
352 ;
353 ; X64-LABEL: test_x86_avx_maskload_ps:
354 ; X64: # BB#0:
355 ; X64-NEXT: vmaskmovps (%rdi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x2c,0x07]
356 ; X64-NEXT: ret{{[l|q]}} # encoding: [0xc3]
335357 %res = call <4 x float> @llvm.x86.avx.maskload.ps(i8* %a0, <4 x i32> %mask) ; <<4 x float>> [#uses=1]
336358 ret <4 x float> %res
337359 }
339361
340362
341363 define <8 x float> @test_x86_avx_maskload_ps_256(i8* %a0, <8 x i32> %mask) {
342 ; CHECK-LABEL: test_x86_avx_maskload_ps_256:
343 ; CHECK: # BB#0:
344 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
345 ; CHECK-NEXT: vmaskmovps (%eax), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x2c,0x00]
346 ; CHECK-NEXT: retl # encoding: [0xc3]
364 ; X86-LABEL: test_x86_avx_maskload_ps_256:
365 ; X86: # BB#0:
366 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
367 ; X86-NEXT: vmaskmovps (%eax), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x2c,0x00]
368 ; X86-NEXT: ret{{[l|q]}} # encoding: [0xc3]
369 ;
370 ; X64-LABEL: test_x86_avx_maskload_ps_256:
371 ; X64: # BB#0:
372 ; X64-NEXT: vmaskmovps (%rdi), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x2c,0x07]
373 ; X64-NEXT: ret{{[l|q]}} # encoding: [0xc3]
347374 %res = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %a0, <8 x i32> %mask) ; <<8 x float>> [#uses=1]
348375 ret <8 x float> %res
349376 }
351378
352379
353380 define void @test_x86_avx_maskstore_pd(i8* %a0, <2 x i64> %mask, <2 x double> %a2) {
354 ; CHECK-LABEL: test_x86_avx_maskstore_pd:
355 ; CHECK: # BB#0:
356 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
357 ; CHECK-NEXT: vmaskmovpd %xmm1, %xmm0, (%eax) # encoding: [0xc4,0xe2,0x79,0x2f,0x08]
358 ; CHECK-NEXT: retl # encoding: [0xc3]
381 ; X86-LABEL: test_x86_avx_maskstore_pd:
382 ; X86: # BB#0:
383 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
384 ; X86-NEXT: vmaskmovpd %xmm1, %xmm0, (%eax) # encoding: [0xc4,0xe2,0x79,0x2f,0x08]
385 ; X86-NEXT: ret{{[l|q]}} # encoding: [0xc3]
386 ;
387 ; X64-LABEL: test_x86_avx_maskstore_pd:
388 ; X64: # BB#0:
389 ; X64-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # encoding: [0xc4,0xe2,0x79,0x2f,0x0f]
390 ; X64-NEXT: ret{{[l|q]}} # encoding: [0xc3]
359391 call void @llvm.x86.avx.maskstore.pd(i8* %a0, <2 x i64> %mask, <2 x double> %a2)
360392 ret void
361393 }
363395
364396
365397 define void @test_x86_avx_maskstore_pd_256(i8* %a0, <4 x i64> %mask, <4 x double> %a2) {
366 ; CHECK-LABEL: test_x86_avx_maskstore_pd_256:
367 ; CHECK: # BB#0:
368 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
369 ; CHECK-NEXT: vmaskmovpd %ymm1, %ymm0, (%eax) # encoding: [0xc4,0xe2,0x7d,0x2f,0x08]
370 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
371 ; CHECK-NEXT: retl # encoding: [0xc3]
398 ; X86-LABEL: test_x86_avx_maskstore_pd_256:
399 ; X86: # BB#0:
400 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
401 ; X86-NEXT: vmaskmovpd %ymm1, %ymm0, (%eax) # encoding: [0xc4,0xe2,0x7d,0x2f,0x08]
402 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
403 ; X86-NEXT: ret{{[l|q]}} # encoding: [0xc3]
404 ;
405 ; X64-LABEL: test_x86_avx_maskstore_pd_256:
406 ; X64: # BB#0:
407 ; X64-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # encoding: [0xc4,0xe2,0x7d,0x2f,0x0f]
408 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
409 ; X64-NEXT: ret{{[l|q]}} # encoding: [0xc3]
372410 call void @llvm.x86.avx.maskstore.pd.256(i8* %a0, <4 x i64> %mask, <4 x double> %a2)
373411 ret void
374412 }
376414
377415
378416 define void @test_x86_avx_maskstore_ps(i8* %a0, <4 x i32> %mask, <4 x float> %a2) {
379 ; CHECK-LABEL: test_x86_avx_maskstore_ps:
380 ; CHECK: # BB#0:
381 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
382 ; CHECK-NEXT: vmaskmovps %xmm1, %xmm0, (%eax) # encoding: [0xc4,0xe2,0x79,0x2e,0x08]
383 ; CHECK-NEXT: retl # encoding: [0xc3]
417 ; X86-LABEL: test_x86_avx_maskstore_ps:
418 ; X86: # BB#0:
419 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
420 ; X86-NEXT: vmaskmovps %xmm1, %xmm0, (%eax) # encoding: [0xc4,0xe2,0x79,0x2e,0x08]
421 ; X86-NEXT: ret{{[l|q]}} # encoding: [0xc3]
422 ;
423 ; X64-LABEL: test_x86_avx_maskstore_ps:
424 ; X64: # BB#0:
425 ; X64-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # encoding: [0xc4,0xe2,0x79,0x2e,0x0f]
426 ; X64-NEXT: ret{{[l|q]}} # encoding: [0xc3]
384427 call void @llvm.x86.avx.maskstore.ps(i8* %a0, <4 x i32> %mask, <4 x float> %a2)
385428 ret void
386429 }
388431
389432
390433 define void @test_x86_avx_maskstore_ps_256(i8* %a0, <8 x i32> %mask, <8 x float> %a2) {
391 ; CHECK-LABEL: test_x86_avx_maskstore_ps_256:
392 ; CHECK: # BB#0:
393 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
394 ; CHECK-NEXT: vmaskmovps %ymm1, %ymm0, (%eax) # encoding: [0xc4,0xe2,0x7d,0x2e,0x08]
395 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
396 ; CHECK-NEXT: retl # encoding: [0xc3]
434 ; X86-LABEL: test_x86_avx_maskstore_ps_256:
435 ; X86: # BB#0:
436 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
437 ; X86-NEXT: vmaskmovps %ymm1, %ymm0, (%eax) # encoding: [0xc4,0xe2,0x7d,0x2e,0x08]
438 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
439 ; X86-NEXT: ret{{[l|q]}} # encoding: [0xc3]
440 ;
441 ; X64-LABEL: test_x86_avx_maskstore_ps_256:
442 ; X64: # BB#0:
443 ; X64-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # encoding: [0xc4,0xe2,0x7d,0x2e,0x0f]
444 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
445 ; X64-NEXT: ret{{[l|q]}} # encoding: [0xc3]
397446 call void @llvm.x86.avx.maskstore.ps.256(i8* %a0, <8 x i32> %mask, <8 x float> %a2)
398447 ret void
399448 }
404453 ; AVX-LABEL: test_x86_avx_max_pd_256:
405454 ; AVX: # BB#0:
406455 ; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x5f,0xc1]
407 ; AVX-NEXT: retl # encoding: [0xc3]
456 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
408457 ;
409458 ; AVX512VL-LABEL: test_x86_avx_max_pd_256:
410459 ; AVX512VL: # BB#0:
411460 ; AVX512VL-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x5f,0xc1]
412 ; AVX512VL-NEXT: retl # encoding: [0xc3]
461 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
413462 %res = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
414463 ret <4 x double> %res
415464 }
420469 ; AVX-LABEL: test_x86_avx_max_ps_256:
421470 ; AVX: # BB#0:
422471 ; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x5f,0xc1]
423 ; AVX-NEXT: retl # encoding: [0xc3]
472 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
424473 ;
425474 ; AVX512VL-LABEL: test_x86_avx_max_ps_256:
426475 ; AVX512VL: # BB#0:
427476 ; AVX512VL-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5f,0xc1]
428 ; AVX512VL-NEXT: retl # encoding: [0xc3]
477 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
429478 %res = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
430479 ret <8 x float> %res
431480 }
436485 ; AVX-LABEL: test_x86_avx_min_pd_256:
437486 ; AVX: # BB#0:
438487 ; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x5d,0xc1]
439 ; AVX-NEXT: retl # encoding: [0xc3]
488 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
440489 ;
441490 ; AVX512VL-LABEL: test_x86_avx_min_pd_256:
442491 ; AVX512VL: # BB#0:
443492 ; AVX512VL-NEXT: vminpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x5d,0xc1]
444 ; AVX512VL-NEXT: retl # encoding: [0xc3]
493 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
445494 %res = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
446495 ret <4 x double> %res
447496 }
452501 ; AVX-LABEL: test_x86_avx_min_ps_256:
453502 ; AVX: # BB#0:
454503 ; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x5d,0xc1]
455 ; AVX-NEXT: retl # encoding: [0xc3]
504 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
456505 ;
457506 ; AVX512VL-LABEL: test_x86_avx_min_ps_256:
458507 ; AVX512VL: # BB#0:
459508 ; AVX512VL-NEXT: vminps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5d,0xc1]
460 ; AVX512VL-NEXT: retl # encoding: [0xc3]
509 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
461510 %res = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
462511 ret <8 x float> %res
463512 }
469518 ; CHECK: # BB#0:
470519 ; CHECK-NEXT: vmovmskpd %ymm0, %eax # encoding: [0xc5,0xfd,0x50,0xc0]
471520 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
472 ; CHECK-NEXT: retl # encoding: [0xc3]
521 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
473522 %res = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0) ; [#uses=1]
474523 ret i32 %res
475524 }
481530 ; CHECK: # BB#0:
482531 ; CHECK-NEXT: vmovmskps %ymm0, %eax # encoding: [0xc5,0xfc,0x50,0xc0]
483532 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
484 ; CHECK-NEXT: retl # encoding: [0xc3]
533 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
485534 %res = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0) ; [#uses=1]
486535 ret i32 %res
487536 }
488537 declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone
489
490
491
492
493
494538
495539
496540 define i32 @test_x86_avx_ptestc_256(<4 x i64> %a0, <4 x i64> %a1) {
500544 ; CHECK-NEXT: vptest %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x17,0xc1]
501545 ; CHECK-NEXT: setb %al # encoding: [0x0f,0x92,0xc0]
502546 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
503 ; CHECK-NEXT: retl # encoding: [0xc3]
547 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
504548 %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a0, <4 x i64> %a1) ; [#uses=1]
505549 ret i32 %res
506550 }
514558 ; CHECK-NEXT: vptest %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x17,0xc1]
515559 ; CHECK-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
516560 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
517 ; CHECK-NEXT: retl # encoding: [0xc3]
561 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
518562 %res = call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %a0, <4 x i64> %a1) ; [#uses=1]
519563 ret i32 %res
520564 }
528572 ; CHECK-NEXT: vptest %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x17,0xc1]
529573 ; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
530574 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
531 ; CHECK-NEXT: retl # encoding: [0xc3]
575 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
532576 %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a0, <4 x i64> %a1) ; [#uses=1]
533577 ret i32 %res
534578 }
539583 ; AVX-LABEL: test_x86_avx_rcp_ps_256:
540584 ; AVX: # BB#0:
541585 ; AVX-NEXT: vrcpps %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x53,0xc0]
542 ; AVX-NEXT: retl # encoding: [0xc3]
586 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
543587 ;
544588 ; AVX512VL-LABEL: test_x86_avx_rcp_ps_256:
545589 ; AVX512VL: # BB#0:
546590 ; AVX512VL-NEXT: vrcp14ps %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7d,0x28,0x4c,0xc0]
547 ; AVX512VL-NEXT: retl # encoding: [0xc3]
591 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
548592 %res = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
549593 ret <8 x float> %res
550594 }
555599 ; CHECK-LABEL: test_x86_avx_round_pd_256:
556600 ; CHECK: # BB#0:
557601 ; CHECK-NEXT: vroundpd $7, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x09,0xc0,0x07]
558 ; CHECK-NEXT: retl # encoding: [0xc3]
602 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
559603 %res = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 7) ; <<4 x double>> [#uses=1]
560604 ret <4 x double> %res
561605 }
566610 ; CHECK-LABEL: test_x86_avx_round_ps_256:
567611 ; CHECK: # BB#0:
568612 ; CHECK-NEXT: vroundps $7, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x08,0xc0,0x07]
569 ; CHECK-NEXT: retl # encoding: [0xc3]
613 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
570614 %res = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 7) ; <<8 x float>> [#uses=1]
571615 ret <8 x float> %res
572616 }
577621 ; AVX-LABEL: test_x86_avx_rsqrt_ps_256:
578622 ; AVX: # BB#0:
579623 ; AVX-NEXT: vrsqrtps %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x52,0xc0]
580 ; AVX-NEXT: retl # encoding: [0xc3]
624 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
581625 ;
582626 ; AVX512VL-LABEL: test_x86_avx_rsqrt_ps_256:
583627 ; AVX512VL: # BB#0:
584628 ; AVX512VL-NEXT: vrsqrt14ps %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7d,0x28,0x4e,0xc0]
585 ; AVX512VL-NEXT: retl # encoding: [0xc3]
629 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
586630 %res = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
587631 ret <8 x float> %res
588632 }
593637 ; CHECK-LABEL: test_x86_avx_sqrt_pd_256:
594638 ; CHECK: # BB#0:
595639 ; CHECK-NEXT: vsqrtpd %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x51,0xc0]
596 ; CHECK-NEXT: retl # encoding: [0xc3]
640 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
597641 %res = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0) ; <<4 x double>> [#uses=1]
598642 ret <4 x double> %res
599643 }
604648 ; CHECK-LABEL: test_x86_avx_sqrt_ps_256:
605649 ; CHECK: # BB#0:
606650 ; CHECK-NEXT: vsqrtps %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x51,0xc0]
607 ; CHECK-NEXT: retl # encoding: [0xc3]
651 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
608652 %res = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
609653 ret <8 x float> %res
610654 }
615659 ; AVX-LABEL: test_x86_avx_vpermilvar_pd:
616660 ; AVX: # BB#0:
617661 ; AVX-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x0d,0xc1]
618 ; AVX-NEXT: retl # encoding: [0xc3]
662 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
619663 ;
620664 ; AVX512VL-LABEL: test_x86_avx_vpermilvar_pd:
621665 ; AVX512VL: # BB#0:
622666 ; AVX512VL-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0d,0xc1]
623 ; AVX512VL-NEXT: retl # encoding: [0xc3]
667 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
624668 %res = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1) ; <<2 x double>> [#uses=1]
625669 ret <2 x double> %res
626670 }
631675 ; AVX-LABEL: test_x86_avx_vpermilvar_pd_256:
632676 ; AVX: # BB#0:
633677 ; AVX-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x0d,0xc1]
634 ; AVX-NEXT: retl # encoding: [0xc3]
678 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
635679 ;
636680 ; AVX512VL-LABEL: test_x86_avx_vpermilvar_pd_256:
637681 ; AVX512VL: # BB#0:
638682 ; AVX512VL-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x0d,0xc1]
639 ; AVX512VL-NEXT: retl # encoding: [0xc3]
683 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
640684 %res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1) ; <<4 x double>> [#uses=1]
641685 ret <4 x double> %res
642686 }
647691 ; AVX: # BB#0:
648692 ; AVX-NEXT: vpermilpd $9, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x05,0xc0,0x09]
649693 ; AVX-NEXT: # ymm0 = ymm0[1,0,2,3]
650 ; AVX-NEXT: retl # encoding: [0xc3]
694 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
651695 ;
652696 ; AVX512VL-LABEL: test_x86_avx_vpermilvar_pd_256_2:
653697 ; AVX512VL: # BB#0:
654698 ; AVX512VL-NEXT: vpermilpd $9, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x05,0xc0,0x09]
655699 ; AVX512VL-NEXT: # ymm0 = ymm0[1,0,2,3]
656 ; AVX512VL-NEXT: retl # encoding: [0xc3]
700 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
657701 %res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> ) ; <<4 x double>> [#uses=1]
658702 ret <4 x double> %res
659703 }
662706 ; AVX-LABEL: test_x86_avx_vpermilvar_ps:
663707 ; AVX: # BB#0:
664708 ; AVX-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x0c,0xc1]
665 ; AVX-NEXT: retl # encoding: [0xc3]
709 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
666710 ;
667711 ; AVX512VL-LABEL: test_x86_avx_vpermilvar_ps:
668712 ; AVX512VL: # BB#0:
669713 ; AVX512VL-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0c,0xc1]
670 ; AVX512VL-NEXT: retl # encoding: [0xc3]
714 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
671715 %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1) ; <<4 x float>> [#uses=1]
672716 ret <4 x float> %res
673717 }
674718 define <4 x float> @test_x86_avx_vpermilvar_ps_load(<4 x float> %a0, <4 x i32>* %a1) {
675 ; AVX-LABEL: test_x86_avx_vpermilvar_ps_load:
676 ; AVX: # BB#0:
677 ; AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
678 ; AVX-NEXT: vpermilps (%eax), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x0c,0x00]
679 ; AVX-NEXT: retl # encoding: [0xc3]
680 ;
681 ; AVX512VL-LABEL: test_x86_avx_vpermilvar_ps_load:
682 ; AVX512VL: # BB#0:
683 ; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
684 ; AVX512VL-NEXT: vpermilps (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0c,0x00]
685 ; AVX512VL-NEXT: retl # encoding: [0xc3]
719 ; X86-AVX-LABEL: test_x86_avx_vpermilvar_ps_load:
720 ; X86-AVX: # BB#0:
721 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
722 ; X86-AVX-NEXT: vpermilps (%eax), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x0c,0x00]
723 ; X86-AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
724 ;
725 ; X86-AVX512VL-LABEL: test_x86_avx_vpermilvar_ps_load:
726 ; X86-AVX512VL: # BB#0:
727 ; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
728 ; X86-AVX512VL-NEXT: vpermilps (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0c,0x00]
729 ; X86-AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
730 ;
731 ; X64-AVX-LABEL: test_x86_avx_vpermilvar_ps_load:
732 ; X64-AVX: # BB#0:
733 ; X64-AVX-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x0c,0x07]
734 ; X64-AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
735 ;
736 ; X64-AVX512VL-LABEL: test_x86_avx_vpermilvar_ps_load:
737 ; X64-AVX512VL: # BB#0:
738 ; X64-AVX512VL-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0c,0x07]
739 ; X64-AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
686740 %a2 = load <4 x i32>, <4 x i32>* %a1
687741 %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a2) ; <<4 x float>> [#uses=1]
688742 ret <4 x float> %res
694748 ; AVX-LABEL: test_x86_avx_vpermilvar_ps_256:
695749 ; AVX: # BB#0:
696750 ; AVX-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x0c,0xc1]
697 ; AVX-NEXT: retl # encoding: [0xc3]
751 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
698752 ;
699753 ; AVX512VL-LABEL: test_x86_avx_vpermilvar_ps_256:
700754 ; AVX512VL: # BB#0:
701755 ; AVX512VL-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x0c,0xc1]
702 ; AVX512VL-NEXT: retl # encoding: [0xc3]
756 ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
703757 %res = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1) ; <<8 x float>> [#uses=1]
704758 ret <8 x float> %res
705759 }
712766 ; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
713767 ; CHECK-NEXT: vtestpd %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x79,0x0f,0xc1]
714768 ; CHECK-NEXT: setb %al # encoding: [0x0f,0x92,0xc0]
715 ; CHECK-NEXT: retl # encoding: [0xc3]
769 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
716770 %res = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1]
717771 ret i32 %res
718772 }
726780 ; CHECK-NEXT: vtestpd %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x0f,0xc1]
727781 ; CHECK-NEXT: setb %al # encoding: [0x0f,0x92,0xc0]
728782 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
729 ; CHECK-NEXT: retl # encoding: [0xc3]
783 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
730784 %res = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1) ; [#uses=1]
731785 ret i32 %res
732786 }
739793 ; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
740794 ; CHECK-NEXT: vtestps %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x79,0x0e,0xc1]
741795 ; CHECK-NEXT: setb %al # encoding: [0x0f,0x92,0xc0]
742 ; CHECK-NEXT: retl # encoding: [0xc3]
796 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
743797 %res = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1) ; [#uses=1]
744798 ret i32 %res
745799 }
753807 ; CHECK-NEXT: vtestps %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x0e,0xc1]
754808 ; CHECK-NEXT: setb %al # encoding: [0x0f,0x92,0xc0]
755809 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
756 ; CHECK-NEXT: retl # encoding: [0xc3]
810 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
757811 %res = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1) ; [#uses=1]
758812 ret i32 %res
759813 }
766820 ; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
767821 ; CHECK-NEXT: vtestpd %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x79,0x0f,0xc1]
768822 ; CHECK-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
769 ; CHECK-NEXT: retl # encoding: [0xc3]
823 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
770824 %res = call i32 @llvm.x86.avx.vtestnzc.pd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1]
771825 ret i32 %res
772826 }
780834 ; CHECK-NEXT: vtestpd %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x0f,0xc1]
781835 ; CHECK-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
782836 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
783 ; CHECK-NEXT: retl # encoding: [0xc3]
837 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
784838 %res = call i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double> %a0, <4 x double> %a1) ; [#uses=1]
785839 ret i32 %res
786840 }
793847 ; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
794848 ; CHECK-NEXT: vtestps %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x79,0x0e,0xc1]
795849 ; CHECK-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
796 ; CHECK-NEXT: retl # encoding: [0xc3]
850 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
797851 %res = call i32 @llvm.x86.avx.vtestnzc.ps(<4 x float> %a0, <4 x float> %a1) ; [#uses=1]
798852 ret i32 %res
799853 }
807861 ; CHECK-NEXT: vtestps %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x0e,0xc1]
808862 ; CHECK-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
809863 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
810 ; CHECK-NEXT: retl # encoding: [0xc3]
864 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
811865 %res = call i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float> %a0, <8 x float> %a1) ; [#uses=1]
812866 ret i32 %res
813867 }
820874 ; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
821875 ; CHECK-NEXT: vtestpd %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x79,0x0f,0xc1]
822876 ; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
823 ; CHECK-NEXT: retl # encoding: [0xc3]
877 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
824878 %res = call i32 @llvm.x86.avx.vtestz.pd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1]
825879 ret i32 %res
826880 }
834888 ; CHECK-NEXT: vtestpd %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x0f,0xc1]
835889 ; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
836890 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
837 ; CHECK-NEXT: retl # encoding: [0xc3]
891 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
838892 %res = call i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> %a0, <4 x double> %a1) ; [#uses=1]
839893 ret i32 %res
840894 }
847901 ; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
848902 ; CHECK-NEXT: vtestps %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x79,0x0e,0xc1]
849903 ; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
850 ; CHECK-NEXT: retl # encoding: [0xc3]
904 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
851905 %res = call i32 @llvm.x86.avx.vtestz.ps(<4 x float> %a0, <4 x float> %a1) ; [#uses=1]
852906 ret i32 %res
853907 }
861915 ; CHECK-NEXT: vtestps %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x0e,0xc1]
862916 ; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
863917 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
864 ; CHECK-NEXT: retl # encoding: [0xc3]
918 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
865919 %res = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %a0, <8 x float> %a1) ; [#uses=1]
866920 ret i32 %res
867921 }
872926 ; CHECK-LABEL: test_x86_avx_vzeroall:
873927 ; CHECK: # BB#0:
874928 ; CHECK-NEXT: vzeroall # encoding: [0xc5,0xfc,0x77]
875 ; CHECK-NEXT: retl # encoding: [0xc3]
929 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
876930 call void @llvm.x86.avx.vzeroall()
877931 ret void
878932 }
883937 ; CHECK-LABEL: test_x86_avx_vzeroupper:
884938 ; CHECK: # BB#0:
885939 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
886 ; CHECK-NEXT: retl # encoding: [0xc3]
940 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
887941 call void @llvm.x86.avx.vzeroupper()
888942 ret void
889943 }
890944 declare void @llvm.x86.avx.vzeroupper() nounwind
891945
892946 define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind {
893 ; AVX-LABEL: movnt_dq:
894 ; AVX: # BB#0:
895 ; AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
896 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
897 ; AVX-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfb,0xc1]
898 ; AVX-NEXT: vmovntdq %ymm0, (%eax) # encoding: [0xc5,0xfd,0xe7,0x00]
899 ; AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
900 ; AVX-NEXT: retl # encoding: [0xc3]
901 ;
902 ; AVX512VL-LABEL: movnt_dq:
903 ; AVX512VL: # BB#0:
904 ; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
905 ; AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
906 ; AVX512VL-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfb,0xc1]
907 ; AVX512VL-NEXT: vmovntdq %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe7,0x00]
908 ; AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
909 ; AVX512VL-NEXT: retl # encoding: [0xc3]
947 ; X86-AVX-LABEL: movnt_dq:
948 ; X86-AVX: # BB#0:
949 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
950 ; X86-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
951 ; X86-AVX-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfb,0xc1]
952 ; X86-AVX-NEXT: vmovntdq %ymm0, (%eax) # encoding: [0xc5,0xfd,0xe7,0x00]
953 ; X86-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
954 ; X86-AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
955 ;
956 ; X86-AVX512VL-LABEL: movnt_dq:
957 ; X86-AVX512VL: # BB#0:
958 ; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
959 ; X86-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
960 ; X86-AVX512VL-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfb,0xc1]
961 ; X86-AVX512VL-NEXT: vmovntdq %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe7,0x00]
962 ; X86-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
963 ; X86-AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
964 ;
965 ; X64-AVX-LABEL: movnt_dq:
966 ; X64-AVX: # BB#0:
967 ; X64-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
968 ; X64-AVX-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfb,0xc1]
969 ; X64-AVX-NEXT: vmovntdq %ymm0, (%rdi) # encoding: [0xc5,0xfd,0xe7,0x07]
970 ; X64-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
971 ; X64-AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
972 ;
973 ; X64-AVX512VL-LABEL: movnt_dq:
974 ; X64-AVX512VL: # BB#0:
975 ; X64-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
976 ; X64-AVX512VL-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfb,0xc1]
977 ; X64-AVX512VL-NEXT: vmovntdq %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe7,0x07]
978 ; X64-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
979 ; X64-AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
910980 %a2 = add <2 x i64> %a1,
911981 %a3 = shufflevector <2 x i64> %a2, <2 x i64> undef, <4 x i32>
912982 tail call void @llvm.x86.avx.movnt.dq.256(i8* %p, <4 x i64> %a3) nounwind
915985 declare void @llvm.x86.avx.movnt.dq.256(i8*, <4 x i64>) nounwind
916986
917987 define void @movnt_ps(i8* %p, <8 x float> %a) nounwind {
918 ; AVX-LABEL: movnt_ps:
919 ; AVX: # BB#0:
920 ; AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
921 ; AVX-NEXT: vmovntps %ymm0, (%eax) # encoding: [0xc5,0xfc,0x2b,0x00]
922 ; AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
923 ; AVX-NEXT: retl # encoding: [0xc3]
924 ;
925 ; AVX512VL-LABEL: movnt_ps:
926 ; AVX512VL: # BB#0:
927 ; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
928 ; AVX512VL-NEXT: vmovntps %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x2b,0x00]
929 ; AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
930 ; AVX512VL-NEXT: retl # encoding: [0xc3]
988 ; X86-AVX-LABEL: movnt_ps:
989 ; X86-AVX: # BB#0:
990 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
991 ; X86-AVX-NEXT: vmovntps %ymm0, (%eax) # encoding: [0xc5,0xfc,0x2b,0x00]
992 ; X86-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
993 ; X86-AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
994 ;
995 ; X86-AVX512VL-LABEL: movnt_ps:
996 ; X86-AVX512VL: # BB#0:
997 ; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
998 ; X86-AVX512VL-NEXT: vmovntps %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x2b,0x00]
999 ; X86-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1000 ; X86-AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1001 ;
1002 ; X64-AVX-LABEL: movnt_ps:
1003 ; X64-AVX: # BB#0:
1004 ; X64-AVX-NEXT: vmovntps %ymm0, (%rdi) # encoding: [0xc5,0xfc,0x2b,0x07]
1005 ; X64-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1006 ; X64-AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1007 ;
1008 ; X64-AVX512VL-LABEL: movnt_ps:
1009 ; X64-AVX512VL: # BB#0:
1010 ; X64-AVX512VL-NEXT: vmovntps %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x2b,0x07]
1011 ; X64-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1012 ; X64-AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
9311013 tail call void @llvm.x86.avx.movnt.ps.256(i8* %p, <8 x float> %a) nounwind
9321014 ret void
9331015 }
9351017
9361018 define void @movnt_pd(i8* %p, <4 x double> %a1) nounwind {
9371019 ; add operation forces the execution domain.
938 ; AVX-LABEL: movnt_pd:
939 ; AVX: # BB#0:
940 ; AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
941 ; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x57,0xc9]
942 ; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x58,0xc1]
943 ; AVX-NEXT: vmovntpd %ymm0, (%eax) # encoding: [0xc5,0xfd,0x2b,0x00]
944 ; AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
945 ; AVX-NEXT: retl # encoding: [0xc3]
946 ;
947 ; AVX512VL-LABEL: movnt_pd:
948 ; AVX512VL: # BB#0:
949 ; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
950 ; AVX512VL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x57,0xc9]
951 ; AVX512VL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1]
952 ; AVX512VL-NEXT: vmovntpd %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x2b,0x00]
953 ; AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
954 ; AVX512VL-NEXT: retl # encoding: [0xc3]
1020 ; X86-AVX-LABEL: movnt_pd:
1021 ; X86-AVX: # BB#0:
1022 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1023 ; X86-AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x57,0xc9]
1024 ; X86-AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x58,0xc1]
1025 ; X86-AVX-NEXT: vmovntpd %ymm0, (%eax) # encoding: [0xc5,0xfd,0x2b,0x00]
1026 ; X86-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1027 ; X86-AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1028 ;
1029 ; X86-AVX512VL-LABEL: movnt_pd:
1030 ; X86-AVX512VL: # BB#0:
1031 ; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1032 ; X86-AVX512VL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x57,0xc9]
1033 ; X86-AVX512VL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1]
1034 ; X86-AVX512VL-NEXT: vmovntpd %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x2b,0x00]
1035 ; X86-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1036 ; X86-AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1037 ;
1038 ; X64-AVX-LABEL: movnt_pd:
1039 ; X64-AVX: # BB#0:
1040 ; X64-AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x57,0xc9]
1041 ; X64-AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x58,0xc1]
1042 ; X64-AVX-NEXT: vmovntpd %ymm0, (%rdi) # encoding: [0xc5,0xfd,0x2b,0x07]
1043 ; X64-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1044 ; X64-AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1045 ;
1046 ; X64-AVX512VL-LABEL: movnt_pd:
1047 ; X64-AVX512VL: # BB#0:
1048 ; X64-AVX512VL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x57,0xc9]
1049 ; X64-AVX512VL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1]
1050 ; X64-AVX512VL-NEXT: vmovntpd %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x2b,0x07]
1051 ; X64-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1052 ; X64-AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
9551053 %a2 = fadd <4 x double> %a1,
9561054 tail call void @llvm.x86.avx.movnt.pd.256(i8* %p, <4 x double> %a2) nounwind
9571055 ret void
9641062 ; CHECK-LABEL: test_x86_pclmulqdq:
9651063 ; CHECK: # BB#0:
9661064 ; CHECK-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x44,0xc1,0x00]
967 ; CHECK-NEXT: retl # encoding: [0xc3]
1065 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
9681066 %res = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %a0, <2 x i64> %a1, i8 0) ; <<2 x i64>> [#uses=1]
9691067 ret <2 x i64> %res
9701068 }
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7 -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7 -mattr=+avx512vl | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512VL
1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512VL
33
44 define <4 x double> @test_x86_avx_vzeroall(<4 x double> %a, <4 x double> %b) {
55 ; AVX-LABEL: test_x86_avx_vzeroall:
88 ; AVX-NEXT: vmovupd %ymm0, -{{[0-9]+}}(%rsp) # 32-byte Spill
99 ; AVX-NEXT: vzeroall
1010 ; AVX-NEXT: vmovups -{{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
11 ; AVX-NEXT: retq
11 ; AVX-NEXT: ret{{[l|q]}}
1212 ;
1313 ; AVX512VL-LABEL: test_x86_avx_vzeroall:
1414 ; AVX512VL: # BB#0:
1515 ; AVX512VL-NEXT: vaddpd %ymm1, %ymm0, %ymm16
1616 ; AVX512VL-NEXT: vzeroall
1717 ; AVX512VL-NEXT: vmovapd %ymm16, %ymm0
18 ; AVX512VL-NEXT: retq
18 ; AVX512VL-NEXT: ret{{[l|q]}}
1919 %c = fadd <4 x double> %a, %b
2020 call void @llvm.x86.avx.vzeroall()
2121 ret <4 x double> %c
2929 ; AVX-NEXT: vmovupd %ymm0, -{{[0-9]+}}(%rsp) # 32-byte Spill
3030 ; AVX-NEXT: vzeroupper
3131 ; AVX-NEXT: vmovups -{{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
32 ; AVX-NEXT: retq
32 ; AVX-NEXT: ret{{[l|q]}}
3333 ;
3434 ; AVX512VL-LABEL: test_x86_avx_vzeroupper:
3535 ; AVX512VL: # BB#0:
3636 ; AVX512VL-NEXT: vaddpd %ymm1, %ymm0, %ymm16
3737 ; AVX512VL-NEXT: vzeroupper
3838 ; AVX512VL-NEXT: vmovapd %ymm16, %ymm0
39 ; AVX512VL-NEXT: retq
39 ; AVX512VL-NEXT: ret{{[l|q]}}
4040 %c = fadd <4 x double> %a, %b
4141 call void @llvm.x86.avx.vzeroupper()
4242 ret <4 x double> %c