llvm.org GIT mirror llvm / ba555e4
[X86][AVX2] Regenerate and add 32-bit tests to core tests git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@283666 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 3 years ago
10 changed file(s) with 1689 addition(s) and 663 deletion(s). Raw diff Collapse all Expand all
None ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
1
2 ; CHECK: vpaddq %ymm
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X32
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X64
3
34 define <4 x i64> @test_vpaddq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
5 ; X32-LABEL: test_vpaddq:
6 ; X32: ## BB#0:
7 ; X32-NEXT: vpaddq %ymm1, %ymm0, %ymm0
8 ; X32-NEXT: retl
9 ;
10 ; X64-LABEL: test_vpaddq:
11 ; X64: ## BB#0:
12 ; X64-NEXT: vpaddq %ymm1, %ymm0, %ymm0
13 ; X64-NEXT: retq
414 %x = add <4 x i64> %i, %j
515 ret <4 x i64> %x
616 }
717
8 ; CHECK: vpaddd %ymm
918 define <8 x i32> @test_vpaddd(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
19 ; X32-LABEL: test_vpaddd:
20 ; X32: ## BB#0:
21 ; X32-NEXT: vpaddd %ymm1, %ymm0, %ymm0
22 ; X32-NEXT: retl
23 ;
24 ; X64-LABEL: test_vpaddd:
25 ; X64: ## BB#0:
26 ; X64-NEXT: vpaddd %ymm1, %ymm0, %ymm0
27 ; X64-NEXT: retq
1028 %x = add <8 x i32> %i, %j
1129 ret <8 x i32> %x
1230 }
1331
14 ; CHECK: vpaddw %ymm
1532 define <16 x i16> @test_vpaddw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
33 ; X32-LABEL: test_vpaddw:
34 ; X32: ## BB#0:
35 ; X32-NEXT: vpaddw %ymm1, %ymm0, %ymm0
36 ; X32-NEXT: retl
37 ;
38 ; X64-LABEL: test_vpaddw:
39 ; X64: ## BB#0:
40 ; X64-NEXT: vpaddw %ymm1, %ymm0, %ymm0
41 ; X64-NEXT: retq
1642 %x = add <16 x i16> %i, %j
1743 ret <16 x i16> %x
1844 }
1945
20 ; CHECK: vpaddb %ymm
2146 define <32 x i8> @test_vpaddb(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
47 ; X32-LABEL: test_vpaddb:
48 ; X32: ## BB#0:
49 ; X32-NEXT: vpaddb %ymm1, %ymm0, %ymm0
50 ; X32-NEXT: retl
51 ;
52 ; X64-LABEL: test_vpaddb:
53 ; X64: ## BB#0:
54 ; X64-NEXT: vpaddb %ymm1, %ymm0, %ymm0
55 ; X64-NEXT: retq
2256 %x = add <32 x i8> %i, %j
2357 ret <32 x i8> %x
2458 }
2559
26 ; CHECK: vpsubq %ymm
2760 define <4 x i64> @test_vpsubq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
61 ; X32-LABEL: test_vpsubq:
62 ; X32: ## BB#0:
63 ; X32-NEXT: vpsubq %ymm1, %ymm0, %ymm0
64 ; X32-NEXT: retl
65 ;
66 ; X64-LABEL: test_vpsubq:
67 ; X64: ## BB#0:
68 ; X64-NEXT: vpsubq %ymm1, %ymm0, %ymm0
69 ; X64-NEXT: retq
2870 %x = sub <4 x i64> %i, %j
2971 ret <4 x i64> %x
3072 }
3173
32 ; CHECK: vpsubd %ymm
3374 define <8 x i32> @test_vpsubd(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
75 ; X32-LABEL: test_vpsubd:
76 ; X32: ## BB#0:
77 ; X32-NEXT: vpsubd %ymm1, %ymm0, %ymm0
78 ; X32-NEXT: retl
79 ;
80 ; X64-LABEL: test_vpsubd:
81 ; X64: ## BB#0:
82 ; X64-NEXT: vpsubd %ymm1, %ymm0, %ymm0
83 ; X64-NEXT: retq
3484 %x = sub <8 x i32> %i, %j
3585 ret <8 x i32> %x
3686 }
3787
38 ; CHECK: vpsubw %ymm
3988 define <16 x i16> @test_vpsubw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
89 ; X32-LABEL: test_vpsubw:
90 ; X32: ## BB#0:
91 ; X32-NEXT: vpsubw %ymm1, %ymm0, %ymm0
92 ; X32-NEXT: retl
93 ;
94 ; X64-LABEL: test_vpsubw:
95 ; X64: ## BB#0:
96 ; X64-NEXT: vpsubw %ymm1, %ymm0, %ymm0
97 ; X64-NEXT: retq
4098 %x = sub <16 x i16> %i, %j
4199 ret <16 x i16> %x
42100 }
43101
44 ; CHECK: vpsubb %ymm
45102 define <32 x i8> @test_vpsubb(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
103 ; X32-LABEL: test_vpsubb:
104 ; X32: ## BB#0:
105 ; X32-NEXT: vpsubb %ymm1, %ymm0, %ymm0
106 ; X32-NEXT: retl
107 ;
108 ; X64-LABEL: test_vpsubb:
109 ; X64: ## BB#0:
110 ; X64-NEXT: vpsubb %ymm1, %ymm0, %ymm0
111 ; X64-NEXT: retq
46112 %x = sub <32 x i8> %i, %j
47113 ret <32 x i8> %x
48114 }
49115
50 ; CHECK: vpmulld %ymm
51116 define <8 x i32> @test_vpmulld(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
117 ; X32-LABEL: test_vpmulld:
118 ; X32: ## BB#0:
119 ; X32-NEXT: vpmulld %ymm1, %ymm0, %ymm0
120 ; X32-NEXT: retl
121 ;
122 ; X64-LABEL: test_vpmulld:
123 ; X64: ## BB#0:
124 ; X64-NEXT: vpmulld %ymm1, %ymm0, %ymm0
125 ; X64-NEXT: retq
52126 %x = mul <8 x i32> %i, %j
53127 ret <8 x i32> %x
54128 }
55129
56 ; CHECK: vpmullw %ymm
57130 define <16 x i16> @test_vpmullw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
131 ; X32-LABEL: test_vpmullw:
132 ; X32: ## BB#0:
133 ; X32-NEXT: vpmullw %ymm1, %ymm0, %ymm0
134 ; X32-NEXT: retl
135 ;
136 ; X64-LABEL: test_vpmullw:
137 ; X64: ## BB#0:
138 ; X64-NEXT: vpmullw %ymm1, %ymm0, %ymm0
139 ; X64-NEXT: retq
58140 %x = mul <16 x i16> %i, %j
59141 ret <16 x i16> %x
60142 }
61143
62 ; CHECK: mul-v16i8
63 ; CHECK: # BB#0:
64 ; CHECK-NEXT: vpmovsxbw %xmm1, %ymm1
65 ; CHECK-NEXT: vpmovsxbw %xmm0, %ymm0
66 ; CHECK-NEXT: vpmullw %ymm1, %ymm0, %ymm0
67 ; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1
68 ; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
69 ; CHECK-NEXT: vpshufb %xmm2, %xmm1, %xmm1
70 ; CHECK-NEXT: vpshufb %xmm2, %xmm0, %xmm0
71 ; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
72 ; CHECK-NEXT: vzeroupper
73 ; CHECK-NEXT: retq
74144 define <16 x i8> @mul-v16i8(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
75145 %x = mul <16 x i8> %i, %j
76146 ret <16 x i8> %x
77147 }
78148
79 ; CHECK: mul-v32i8
80 ; CHECK: # BB#0:
81 ; CHECK-NEXT: vextracti128 $1, %ymm1, %xmm2
82 ; CHECK-NEXT: vpmovsxbw %xmm2, %ymm2
83 ; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm3
84 ; CHECK-NEXT: vpmovsxbw %xmm3, %ymm3
85 ; CHECK-NEXT: vpmullw %ymm2, %ymm3, %ymm2
86 ; CHECK-NEXT: vextracti128 $1, %ymm2, %xmm3
87 ; CHECK-NEXT: vmovdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
88 ; CHECK-NEXT: vpshufb %xmm4, %xmm3, %xmm3
89 ; CHECK-NEXT: vpshufb %xmm4, %xmm2, %xmm2
90 ; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
91 ; CHECK-NEXT: vpmovsxbw %xmm1, %ymm1
92 ; CHECK-NEXT: vpmovsxbw %xmm0, %ymm0
93 ; CHECK-NEXT: vpmullw %ymm1, %ymm0, %ymm0
94 ; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1
95 ; CHECK-NEXT: vpshufb %xmm4, %xmm1, %xmm1
96 ; CHECK-NEXT: vpshufb %xmm4, %xmm0, %xmm0
97 ; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
98 ; CHECK-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
99 ; CHECK-NEXT: retq
100149 define <32 x i8> @mul-v32i8(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
101150 %x = mul <32 x i8> %i, %j
102151 ret <32 x i8> %x
103152 }
104153
105 ; CHECK: mul-v4i64
106 ; CHECK: vpmuludq %ymm
107 ; CHECK-NEXT: vpsrlq $32, %ymm
108 ; CHECK-NEXT: vpmuludq %ymm
109 ; CHECK-NEXT: vpsllq $32, %ymm
110 ; CHECK-NEXT: vpaddq %ymm
111 ; CHECK-NEXT: vpsrlq $32, %ymm
112 ; CHECK-NEXT: vpmuludq %ymm
113 ; CHECK-NEXT: vpsllq $32, %ymm
114 ; CHECK-NEXT: vpaddq %ymm
115154 define <4 x i64> @mul-v4i64(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
116155 %x = mul <4 x i64> %i, %j
117156 ret <4 x i64> %x
118157 }
119158
120 ; CHECK: mul_const1
121 ; CHECK: vpaddd
122 ; CHECK: ret
123159 define <8 x i32> @mul_const1(<8 x i32> %x) {
160 ; X32-LABEL: mul_const1:
161 ; X32: ## BB#0:
162 ; X32-NEXT: vpaddd %ymm0, %ymm0, %ymm0
163 ; X32-NEXT: retl
164 ;
165 ; X64-LABEL: mul_const1:
166 ; X64: ## BB#0:
167 ; X64-NEXT: vpaddd %ymm0, %ymm0, %ymm0
168 ; X64-NEXT: retq
124169 %y = mul <8 x i32> %x,
125170 ret <8 x i32> %y
126171 }
127172
128 ; CHECK: mul_const2
129 ; CHECK: vpsllq $2
130 ; CHECK: ret
131173 define <4 x i64> @mul_const2(<4 x i64> %x) {
174 ; X32-LABEL: mul_const2:
175 ; X32: ## BB#0:
176 ; X32-NEXT: vpsllq $2, %ymm0, %ymm0
177 ; X32-NEXT: retl
178 ;
179 ; X64-LABEL: mul_const2:
180 ; X64: ## BB#0:
181 ; X64-NEXT: vpsllq $2, %ymm0, %ymm0
182 ; X64-NEXT: retq
132183 %y = mul <4 x i64> %x,
133184 ret <4 x i64> %y
134185 }
135186
136 ; CHECK: mul_const3
137 ; CHECK: vpsllw $3
138 ; CHECK: ret
139187 define <16 x i16> @mul_const3(<16 x i16> %x) {
188 ; X32-LABEL: mul_const3:
189 ; X32: ## BB#0:
190 ; X32-NEXT: vpsllw $3, %ymm0, %ymm0
191 ; X32-NEXT: retl
192 ;
193 ; X64-LABEL: mul_const3:
194 ; X64: ## BB#0:
195 ; X64-NEXT: vpsllw $3, %ymm0, %ymm0
196 ; X64-NEXT: retq
140197 %y = mul <16 x i16> %x,
141198 ret <16 x i16> %y
142199 }
143200
144 ; CHECK: mul_const4
145 ; CHECK: vpxor
146 ; CHECK: vpsubq
147 ; CHECK: ret
148201 define <4 x i64> @mul_const4(<4 x i64> %x) {
202 ; X32-LABEL: mul_const4:
203 ; X32: ## BB#0:
204 ; X32-NEXT: vpxor %ymm1, %ymm1, %ymm1
205 ; X32-NEXT: vpsubq %ymm0, %ymm1, %ymm0
206 ; X32-NEXT: retl
207 ;
208 ; X64-LABEL: mul_const4:
209 ; X64: ## BB#0:
210 ; X64-NEXT: vpxor %ymm1, %ymm1, %ymm1
211 ; X64-NEXT: vpsubq %ymm0, %ymm1, %ymm0
212 ; X64-NEXT: retq
149213 %y = mul <4 x i64> %x,
150214 ret <4 x i64> %y
151215 }
152216
153 ; CHECK: mul_const5
154 ; CHECK: vxorps
155 ; CHECK-NEXT: ret
156217 define <8 x i32> @mul_const5(<8 x i32> %x) {
218 ; X32-LABEL: mul_const5:
219 ; X32: ## BB#0:
220 ; X32-NEXT: vxorps %ymm0, %ymm0, %ymm0
221 ; X32-NEXT: retl
222 ;
223 ; X64-LABEL: mul_const5:
224 ; X64: ## BB#0:
225 ; X64-NEXT: vxorps %ymm0, %ymm0, %ymm0
226 ; X64-NEXT: retq
157227 %y = mul <8 x i32> %x,
158228 ret <8 x i32> %y
159229 }
160230
161 ; CHECK: mul_const6
162 ; CHECK: vpmulld
163 ; CHECK: ret
164231 define <8 x i32> @mul_const6(<8 x i32> %x) {
232 ; X32-LABEL: mul_const6:
233 ; X32: ## BB#0:
234 ; X32-NEXT: vpmulld LCPI18_0, %ymm0, %ymm0
235 ; X32-NEXT: retl
236 ;
237 ; X64-LABEL: mul_const6:
238 ; X64: ## BB#0:
239 ; X64-NEXT: vpmulld {{.*}}(%rip), %ymm0, %ymm0
240 ; X64-NEXT: retq
165241 %y = mul <8 x i32> %x,
166242 ret <8 x i32> %y
167243 }
168244
169 ; CHECK: mul_const7
170 ; CHECK: vpaddq
171 ; CHECK: vpaddq
172 ; CHECK: ret
173245 define <8 x i64> @mul_const7(<8 x i64> %x) {
246 ; X32-LABEL: mul_const7:
247 ; X32: ## BB#0:
248 ; X32-NEXT: vpaddq %ymm0, %ymm0, %ymm0
249 ; X32-NEXT: vpaddq %ymm1, %ymm1, %ymm1
250 ; X32-NEXT: retl
251 ;
252 ; X64-LABEL: mul_const7:
253 ; X64: ## BB#0:
254 ; X64-NEXT: vpaddq %ymm0, %ymm0, %ymm0
255 ; X64-NEXT: vpaddq %ymm1, %ymm1, %ymm1
256 ; X64-NEXT: retq
174257 %y = mul <8 x i64> %x,
175258 ret <8 x i64> %y
176259 }
177260
178 ; CHECK: mul_const8
179 ; CHECK: vpsllw $3
180 ; CHECK: ret
181261 define <8 x i16> @mul_const8(<8 x i16> %x) {
262 ; X32-LABEL: mul_const8:
263 ; X32: ## BB#0:
264 ; X32-NEXT: vpsllw $3, %xmm0, %xmm0
265 ; X32-NEXT: retl
266 ;
267 ; X64-LABEL: mul_const8:
268 ; X64: ## BB#0:
269 ; X64-NEXT: vpsllw $3, %xmm0, %xmm0
270 ; X64-NEXT: retq
182271 %y = mul <8 x i16> %x,
183272 ret <8 x i16> %y
184273 }
185274
186 ; CHECK: mul_const9
187 ; CHECK: vpmulld
188 ; CHECK: ret
189275 define <8 x i32> @mul_const9(<8 x i32> %x) {
276 ; X32-LABEL: mul_const9:
277 ; X32: ## BB#0:
278 ; X32-NEXT: movl $2, %eax
279 ; X32-NEXT: vmovd %eax, %xmm1
280 ; X32-NEXT: vpmulld %ymm1, %ymm0, %ymm0
281 ; X32-NEXT: retl
282 ;
283 ; X64-LABEL: mul_const9:
284 ; X64: ## BB#0:
285 ; X64-NEXT: movl $2, %eax
286 ; X64-NEXT: vmovd %eax, %xmm1
287 ; X64-NEXT: vpmulld %ymm1, %ymm0, %ymm0
288 ; X64-NEXT: retq
190289 %y = mul <8 x i32> %x,
191290 ret <8 x i32> %y
192291 }
193292
194 ; CHECK: mul_const10
195 ; CHECK: vpmulld
196 ; CHECK: ret
197293 define <4 x i32> @mul_const10(<4 x i32> %x) {
198294 ; %x * 0x01010101
295 ; X32-LABEL: mul_const10:
296 ; X32: ## BB#0:
297 ; X32-NEXT: vpbroadcastd LCPI22_0, %xmm1
298 ; X32-NEXT: vpmulld %xmm1, %xmm0, %xmm0
299 ; X32-NEXT: retl
300 ;
301 ; X64-LABEL: mul_const10:
302 ; X64: ## BB#0:
303 ; X64-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
304 ; X64-NEXT: vpmulld %xmm1, %xmm0, %xmm0
305 ; X64-NEXT: retq
199306 %m = mul <4 x i32> %x,
200307 ret <4 x i32> %m
201308 }
202309
203 ; CHECK: mul_const11
204 ; CHECK: vpmulld
205 ; CHECK: ret
206310 define <4 x i32> @mul_const11(<4 x i32> %x) {
207311 ; %x * 0x80808080
312 ; X32-LABEL: mul_const11:
313 ; X32: ## BB#0:
314 ; X32-NEXT: vpbroadcastd LCPI23_0, %xmm1
315 ; X32-NEXT: vpmulld %xmm1, %xmm0, %xmm0
316 ; X32-NEXT: retl
317 ;
318 ; X64-LABEL: mul_const11:
319 ; X64: ## BB#0:
320 ; X64-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
321 ; X64-NEXT: vpmulld %xmm1, %xmm0, %xmm0
322 ; X64-NEXT: retq
208323 %m = mul <4 x i32> %x,
209324 ret <4 x i32> %m
210325 }
None ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X32
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X64
13
2 ; CHECK: vpcmpgtd %ymm
3 define <8 x i32> @int256-cmp(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
4 define <8 x i32> @v8i32_cmpgt(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
5 ; X32-LABEL: v8i32_cmpgt:
6 ; X32: ## BB#0:
7 ; X32-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0
8 ; X32-NEXT: retl
9 ;
10 ; X64-LABEL: v8i32_cmpgt:
11 ; X64: ## BB#0:
12 ; X64-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0
13 ; X64-NEXT: retq
414 %bincmp = icmp slt <8 x i32> %i, %j
515 %x = sext <8 x i1> %bincmp to <8 x i32>
616 ret <8 x i32> %x
717 }
818
9 ; CHECK: vpcmpgtq %ymm
10 define <4 x i64> @v4i64-cmp(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
19 define <4 x i64> @v4i64_cmpgt(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
20 ; X32-LABEL: v4i64_cmpgt:
21 ; X32: ## BB#0:
22 ; X32-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
23 ; X32-NEXT: retl
24 ;
25 ; X64-LABEL: v4i64_cmpgt:
26 ; X64: ## BB#0:
27 ; X64-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
28 ; X64-NEXT: retq
1129 %bincmp = icmp slt <4 x i64> %i, %j
1230 %x = sext <4 x i1> %bincmp to <4 x i64>
1331 ret <4 x i64> %x
1432 }
1533
16 ; CHECK: vpcmpgtw %ymm
17 define <16 x i16> @v16i16-cmp(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
34 define <16 x i16> @v16i16_cmpgt(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
35 ; X32-LABEL: v16i16_cmpgt:
36 ; X32: ## BB#0:
37 ; X32-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
38 ; X32-NEXT: retl
39 ;
40 ; X64-LABEL: v16i16_cmpgt:
41 ; X64: ## BB#0:
42 ; X64-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
43 ; X64-NEXT: retq
1844 %bincmp = icmp slt <16 x i16> %i, %j
1945 %x = sext <16 x i1> %bincmp to <16 x i16>
2046 ret <16 x i16> %x
2147 }
2248
23 ; CHECK: vpcmpgtb %ymm
24 define <32 x i8> @v32i8-cmp(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
49 define <32 x i8> @v32i8_cmpgt(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
50 ; X32-LABEL: v32i8_cmpgt:
51 ; X32: ## BB#0:
52 ; X32-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
53 ; X32-NEXT: retl
54 ;
55 ; X64-LABEL: v32i8_cmpgt:
56 ; X64: ## BB#0:
57 ; X64-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
58 ; X64-NEXT: retq
2559 %bincmp = icmp slt <32 x i8> %i, %j
2660 %x = sext <32 x i1> %bincmp to <32 x i8>
2761 ret <32 x i8> %x
2862 }
2963
30 ; CHECK: vpcmpeqd %ymm
31 define <8 x i32> @int256-cmpeq(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
64 define <8 x i32> @int256_cmpeq(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
65 ; X32-LABEL: int256_cmpeq:
66 ; X32: ## BB#0:
67 ; X32-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0
68 ; X32-NEXT: retl
69 ;
70 ; X64-LABEL: int256_cmpeq:
71 ; X64: ## BB#0:
72 ; X64-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0
73 ; X64-NEXT: retq
3274 %bincmp = icmp eq <8 x i32> %i, %j
3375 %x = sext <8 x i1> %bincmp to <8 x i32>
3476 ret <8 x i32> %x
3577 }
3678
37 ; CHECK: vpcmpeqq %ymm
38 define <4 x i64> @v4i64-cmpeq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
79 define <4 x i64> @v4i64_cmpeq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
80 ; X32-LABEL: v4i64_cmpeq:
81 ; X32: ## BB#0:
82 ; X32-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
83 ; X32-NEXT: retl
84 ;
85 ; X64-LABEL: v4i64_cmpeq:
86 ; X64: ## BB#0:
87 ; X64-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
88 ; X64-NEXT: retq
3989 %bincmp = icmp eq <4 x i64> %i, %j
4090 %x = sext <4 x i1> %bincmp to <4 x i64>
4191 ret <4 x i64> %x
4292 }
4393
44 ; CHECK: vpcmpeqw %ymm
45 define <16 x i16> @v16i16-cmpeq(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
94 define <16 x i16> @v16i16_cmpeq(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
95 ; X32-LABEL: v16i16_cmpeq:
96 ; X32: ## BB#0:
97 ; X32-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
98 ; X32-NEXT: retl
99 ;
100 ; X64-LABEL: v16i16_cmpeq:
101 ; X64: ## BB#0:
102 ; X64-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
103 ; X64-NEXT: retq
46104 %bincmp = icmp eq <16 x i16> %i, %j
47105 %x = sext <16 x i1> %bincmp to <16 x i16>
48106 ret <16 x i16> %x
49107 }
50108
51 ; CHECK: vpcmpeqb %ymm
52 define <32 x i8> @v32i8-cmpeq(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
109 define <32 x i8> @v32i8_cmpeq(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
110 ; X32-LABEL: v32i8_cmpeq:
111 ; X32: ## BB#0:
112 ; X32-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
113 ; X32-NEXT: retl
114 ;
115 ; X64-LABEL: v32i8_cmpeq:
116 ; X64: ## BB#0:
117 ; X64-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
118 ; X64-NEXT: retq
53119 %bincmp = icmp eq <32 x i8> %i, %j
54120 %x = sext <32 x i1> %bincmp to <32 x i8>
55121 ret <32 x i8> %x
56122 }
57
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
1 ; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X32
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X64
23
34 define <4 x i32> @trunc4(<4 x i64> %A) nounwind {
4 ; CHECK-LABEL: trunc4:
5 ; CHECK: ## BB#0:
6 ; CHECK-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
7 ; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
8 ; CHECK-NEXT: ## kill: %XMM0 %XMM0 %YMM0
9 ; CHECK-NEXT: vzeroupper
10 ; CHECK-NEXT: retq
5 ; X32-LABEL: trunc4:
6 ; X32: ## BB#0:
7 ; X32-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
8 ; X32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
9 ; X32-NEXT: ## kill: %XMM0 %XMM0 %YMM0
10 ; X32-NEXT: vzeroupper
11 ; X32-NEXT: retl
12 ;
13 ; X64-LABEL: trunc4:
14 ; X64: ## BB#0:
15 ; X64-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
16 ; X64-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
17 ; X64-NEXT: ## kill: %XMM0 %XMM0 %YMM0
18 ; X64-NEXT: vzeroupper
19 ; X64-NEXT: retq
1120 %B = trunc <4 x i64> %A to <4 x i32>
1221 ret <4 x i32>%B
1322 }
1423
1524 define <8 x i16> @trunc8(<8 x i32> %A) nounwind {
16 ; CHECK-LABEL: trunc8:
17 ; CHECK: ## BB#0:
18 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
19 ; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
20 ; CHECK-NEXT: ## kill: %XMM0 %XMM0 %YMM0
21 ; CHECK-NEXT: vzeroupper
22 ; CHECK-NEXT: retq
25 ; X32-LABEL: trunc8:
26 ; X32: ## BB#0:
27 ; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
28 ; X32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
29 ; X32-NEXT: ## kill: %XMM0 %XMM0 %YMM0
30 ; X32-NEXT: vzeroupper
31 ; X32-NEXT: retl
32 ;
33 ; X64-LABEL: trunc8:
34 ; X64: ## BB#0:
35 ; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
36 ; X64-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
37 ; X64-NEXT: ## kill: %XMM0 %XMM0 %YMM0
38 ; X64-NEXT: vzeroupper
39 ; X64-NEXT: retq
2340 %B = trunc <8 x i32> %A to <8 x i16>
2441 ret <8 x i16>%B
2542 }
2643
2744 define <4 x i64> @sext4(<4 x i32> %A) nounwind {
28 ; CHECK-LABEL: sext4:
29 ; CHECK: ## BB#0:
30 ; CHECK-NEXT: vpmovsxdq %xmm0, %ymm0
31 ; CHECK-NEXT: retq
45 ; X32-LABEL: sext4:
46 ; X32: ## BB#0:
47 ; X32-NEXT: vpmovsxdq %xmm0, %ymm0
48 ; X32-NEXT: retl
49 ;
50 ; X64-LABEL: sext4:
51 ; X64: ## BB#0:
52 ; X64-NEXT: vpmovsxdq %xmm0, %ymm0
53 ; X64-NEXT: retq
3254 %B = sext <4 x i32> %A to <4 x i64>
3355 ret <4 x i64>%B
3456 }
3557
3658 define <8 x i32> @sext8(<8 x i16> %A) nounwind {
37 ; CHECK-LABEL: sext8:
38 ; CHECK: ## BB#0:
39 ; CHECK-NEXT: vpmovsxwd %xmm0, %ymm0
40 ; CHECK-NEXT: retq
59 ; X32-LABEL: sext8:
60 ; X32: ## BB#0:
61 ; X32-NEXT: vpmovsxwd %xmm0, %ymm0
62 ; X32-NEXT: retl
63 ;
64 ; X64-LABEL: sext8:
65 ; X64: ## BB#0:
66 ; X64-NEXT: vpmovsxwd %xmm0, %ymm0
67 ; X64-NEXT: retq
4168 %B = sext <8 x i16> %A to <8 x i32>
4269 ret <8 x i32>%B
4370 }
4471
4572 define <4 x i64> @zext4(<4 x i32> %A) nounwind {
46 ; CHECK-LABEL: zext4:
47 ; CHECK: ## BB#0:
48 ; CHECK-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
49 ; CHECK-NEXT: retq
73 ; X32-LABEL: zext4:
74 ; X32: ## BB#0:
75 ; X32-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
76 ; X32-NEXT: retl
77 ;
78 ; X64-LABEL: zext4:
79 ; X64: ## BB#0:
80 ; X64-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
81 ; X64-NEXT: retq
5082 %B = zext <4 x i32> %A to <4 x i64>
5183 ret <4 x i64>%B
5284 }
5385
5486 define <8 x i32> @zext8(<8 x i16> %A) nounwind {
55 ; CHECK-LABEL: zext8:
56 ; CHECK: ## BB#0:
57 ; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
58 ; CHECK-NEXT: retq
87 ; X32-LABEL: zext8:
88 ; X32: ## BB#0:
89 ; X32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
90 ; X32-NEXT: retl
91 ;
92 ; X64-LABEL: zext8:
93 ; X64: ## BB#0:
94 ; X64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
95 ; X64-NEXT: retq
5996 %B = zext <8 x i16> %A to <8 x i32>
6097 ret <8 x i32>%B
6198 }
6299
63100 define <8 x i32> @zext_8i8_8i32(<8 x i8> %A) nounwind {
64 ; CHECK-LABEL: zext_8i8_8i32:
65 ; CHECK: ## BB#0:
66 ; CHECK-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
67 ; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
68 ; CHECK-NEXT: retq
101 ; X32-LABEL: zext_8i8_8i32:
102 ; X32: ## BB#0:
103 ; X32-NEXT: vpand LCPI6_0, %xmm0, %xmm0
104 ; X32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
105 ; X32-NEXT: retl
106 ;
107 ; X64-LABEL: zext_8i8_8i32:
108 ; X64: ## BB#0:
109 ; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
110 ; X64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
111 ; X64-NEXT: retq
69112 %B = zext <8 x i8> %A to <8 x i32>
70113 ret <8 x i32>%B
71114 }
72115
73116 define <16 x i16> @zext_16i8_16i16(<16 x i8> %z) {
74 ; CHECK-LABEL: zext_16i8_16i16:
75 ; CHECK: ## BB#0:
76 ; CHECK-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
77 ; CHECK-NEXT: retq
117 ; X32-LABEL: zext_16i8_16i16:
118 ; X32: ## BB#0:
119 ; X32-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
120 ; X32-NEXT: retl
121 ;
122 ; X64-LABEL: zext_16i8_16i16:
123 ; X64: ## BB#0:
124 ; X64-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
125 ; X64-NEXT: retq
78126 %t = zext <16 x i8> %z to <16 x i16>
79127 ret <16 x i16> %t
80128 }
81129
82130 define <16 x i16> @sext_16i8_16i16(<16 x i8> %z) {
83 ; CHECK-LABEL: sext_16i8_16i16:
84 ; CHECK: ## BB#0:
85 ; CHECK-NEXT: vpmovsxbw %xmm0, %ymm0
86 ; CHECK-NEXT: retq
131 ; X32-LABEL: sext_16i8_16i16:
132 ; X32: ## BB#0:
133 ; X32-NEXT: vpmovsxbw %xmm0, %ymm0
134 ; X32-NEXT: retl
135 ;
136 ; X64-LABEL: sext_16i8_16i16:
137 ; X64: ## BB#0:
138 ; X64-NEXT: vpmovsxbw %xmm0, %ymm0
139 ; X64-NEXT: retq
87140 %t = sext <16 x i8> %z to <16 x i16>
88141 ret <16 x i16> %t
89142 }
90143
91144 define <16 x i8> @trunc_16i16_16i8(<16 x i16> %z) {
92 ; CHECK-LABEL: trunc_16i16_16i8:
93 ; CHECK: ## BB#0:
94 ; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1
95 ; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
96 ; CHECK-NEXT: vpshufb %xmm2, %xmm1, %xmm1
97 ; CHECK-NEXT: vpshufb %xmm2, %xmm0, %xmm0
98 ; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
99 ; CHECK-NEXT: vzeroupper
100 ; CHECK-NEXT: retq
145 ; X32-LABEL: trunc_16i16_16i8:
146 ; X32: ## BB#0:
147 ; X32-NEXT: vextracti128 $1, %ymm0, %xmm1
148 ; X32-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
149 ; X32-NEXT: vpshufb %xmm2, %xmm1, %xmm1
150 ; X32-NEXT: vpshufb %xmm2, %xmm0, %xmm0
151 ; X32-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
152 ; X32-NEXT: vzeroupper
153 ; X32-NEXT: retl
154 ;
155 ; X64-LABEL: trunc_16i16_16i8:
156 ; X64: ## BB#0:
157 ; X64-NEXT: vextracti128 $1, %ymm0, %xmm1
158 ; X64-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
159 ; X64-NEXT: vpshufb %xmm2, %xmm1, %xmm1
160 ; X64-NEXT: vpshufb %xmm2, %xmm0, %xmm0
161 ; X64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
162 ; X64-NEXT: vzeroupper
163 ; X64-NEXT: retq
101164 %t = trunc <16 x i16> %z to <16 x i8>
102165 ret <16 x i8> %t
103166 }
104167
105168 define <4 x i64> @load_sext_test1(<4 x i32> *%ptr) {
106 ; CHECK-LABEL: load_sext_test1:
107 ; CHECK: ## BB#0:
108 ; CHECK-NEXT: vpmovsxdq (%rdi), %ymm0
109 ; CHECK-NEXT: retq
169 ; X32-LABEL: load_sext_test1:
170 ; X32: ## BB#0:
171 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
172 ; X32-NEXT: vpmovsxdq (%eax), %ymm0
173 ; X32-NEXT: retl
174 ;
175 ; X64-LABEL: load_sext_test1:
176 ; X64: ## BB#0:
177 ; X64-NEXT: vpmovsxdq (%rdi), %ymm0
178 ; X64-NEXT: retq
110179 %X = load <4 x i32>, <4 x i32>* %ptr
111180 %Y = sext <4 x i32> %X to <4 x i64>
112181 ret <4 x i64>%Y
113182 }
114183
115184 define <4 x i64> @load_sext_test2(<4 x i8> *%ptr) {
116 ; CHECK-LABEL: load_sext_test2:
117 ; CHECK: ## BB#0:
118 ; CHECK-NEXT: vpmovsxbq (%rdi), %ymm0
119 ; CHECK-NEXT: retq
185 ; X32-LABEL: load_sext_test2:
186 ; X32: ## BB#0:
187 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
188 ; X32-NEXT: vpmovsxbq (%eax), %ymm0
189 ; X32-NEXT: retl
190 ;
191 ; X64-LABEL: load_sext_test2:
192 ; X64: ## BB#0:
193 ; X64-NEXT: vpmovsxbq (%rdi), %ymm0
194 ; X64-NEXT: retq
120195 %X = load <4 x i8>, <4 x i8>* %ptr
121196 %Y = sext <4 x i8> %X to <4 x i64>
122197 ret <4 x i64>%Y
123198 }
124199
125200 define <4 x i64> @load_sext_test3(<4 x i16> *%ptr) {
126 ; CHECK-LABEL: load_sext_test3:
127 ; CHECK: ## BB#0:
128 ; CHECK-NEXT: vpmovsxwq (%rdi), %ymm0
129 ; CHECK-NEXT: retq
201 ; X32-LABEL: load_sext_test3:
202 ; X32: ## BB#0:
203 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
204 ; X32-NEXT: vpmovsxwq (%eax), %ymm0
205 ; X32-NEXT: retl
206 ;
207 ; X64-LABEL: load_sext_test3:
208 ; X64: ## BB#0:
209 ; X64-NEXT: vpmovsxwq (%rdi), %ymm0
210 ; X64-NEXT: retq
130211 %X = load <4 x i16>, <4 x i16>* %ptr
131212 %Y = sext <4 x i16> %X to <4 x i64>
132213 ret <4 x i64>%Y
133214 }
134215
135216 define <8 x i32> @load_sext_test4(<8 x i16> *%ptr) {
136 ; CHECK-LABEL: load_sext_test4:
137 ; CHECK: ## BB#0:
138 ; CHECK-NEXT: vpmovsxwd (%rdi), %ymm0
139 ; CHECK-NEXT: retq
217 ; X32-LABEL: load_sext_test4:
218 ; X32: ## BB#0:
219 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
220 ; X32-NEXT: vpmovsxwd (%eax), %ymm0
221 ; X32-NEXT: retl
222 ;
223 ; X64-LABEL: load_sext_test4:
224 ; X64: ## BB#0:
225 ; X64-NEXT: vpmovsxwd (%rdi), %ymm0
226 ; X64-NEXT: retq
140227 %X = load <8 x i16>, <8 x i16>* %ptr
141228 %Y = sext <8 x i16> %X to <8 x i32>
142229 ret <8 x i32>%Y
143230 }
144231
145232 define <8 x i32> @load_sext_test5(<8 x i8> *%ptr) {
146 ; CHECK-LABEL: load_sext_test5:
147 ; CHECK: ## BB#0:
148 ; CHECK-NEXT: vpmovsxbd (%rdi), %ymm0
149 ; CHECK-NEXT: retq
233 ; X32-LABEL: load_sext_test5:
234 ; X32: ## BB#0:
235 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
236 ; X32-NEXT: vpmovsxbd (%eax), %ymm0
237 ; X32-NEXT: retl
238 ;
239 ; X64-LABEL: load_sext_test5:
240 ; X64: ## BB#0:
241 ; X64-NEXT: vpmovsxbd (%rdi), %ymm0
242 ; X64-NEXT: retq
150243 %X = load <8 x i8>, <8 x i8>* %ptr
151244 %Y = sext <8 x i8> %X to <8 x i32>
152245 ret <8 x i32>%Y
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 -mattr=+fma | FileCheck %s
1 ; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X32
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X64
23
34 ; This test checks combinations of FNEG and FMA intrinsics
45
56 define <8 x float> @test1(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
6 ; CHECK-LABEL: test1:
7 ; CHECK: # BB#0: # %entry
8 ; CHECK-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0
9 ; CHECK-NEXT: retq
7 ; X32-LABEL: test1:
8 ; X32: ## BB#0: ## %entry
9 ; X32-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0
10 ; X32-NEXT: retl
11 ;
12 ; X64-LABEL: test1:
13 ; X64: ## BB#0: ## %entry
14 ; X64-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0
15 ; X64-NEXT: retq
1016 entry:
1117 %sub.i = fsub <8 x float> , %c
1218 %0 = tail call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %sub.i) #2
1622 declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
1723
1824 define <4 x float> @test2(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
19 ; CHECK-LABEL: test2:
20 ; CHECK: # BB#0: # %entry
21 ; CHECK-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0
22 ; CHECK-NEXT: retq
25 ; X32-LABEL: test2:
26 ; X32: ## BB#0: ## %entry
27 ; X32-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0
28 ; X32-NEXT: retl
29 ;
30 ; X64-LABEL: test2:
31 ; X64: ## BB#0: ## %entry
32 ; X64-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0
33 ; X64-NEXT: retq
2334 entry:
2435 %0 = tail call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) #2
2536 %sub.i = fsub <4 x float> , %0
2940 declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %c)
3041
3142 define <4 x float> @test3(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
32 ; CHECK-LABEL: test3:
33 ; CHECK: # BB#0: # %entry
34 ; CHECK-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0
35 ; CHECK-NEXT: vbroadcastss {{.*}}(%rip), %xmm1
36 ; CHECK-NEXT: vxorps %xmm1, %xmm0, %xmm0
37 ; CHECK-NEXT: retq
43 ; X32-LABEL: test3:
44 ; X32: ## BB#0: ## %entry
45 ; X32-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0
46 ; X32-NEXT: vbroadcastss LCPI2_0, %xmm1
47 ; X32-NEXT: vxorps %xmm1, %xmm0, %xmm0
48 ; X32-NEXT: retl
49 ;
50 ; X64-LABEL: test3:
51 ; X64: ## BB#0: ## %entry
52 ; X64-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0
53 ; X64-NEXT: vbroadcastss {{.*}}(%rip), %xmm1
54 ; X64-NEXT: vxorps %xmm1, %xmm0, %xmm0
55 ; X64-NEXT: retq
3856 entry:
3957 %0 = tail call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c) #2
4058 %sub.i = fsub <4 x float> , %0
4462 declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c)
4563
4664 define <8 x float> @test4(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
47 ; CHECK-LABEL: test4:
48 ; CHECK: # BB#0: # %entry
49 ; CHECK-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
50 ; CHECK-NEXT: retq
65 ; X32-LABEL: test4:
66 ; X32: ## BB#0: ## %entry
67 ; X32-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
68 ; X32-NEXT: retl
69 ;
70 ; X64-LABEL: test4:
71 ; X64: ## BB#0: ## %entry
72 ; X64-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
73 ; X64-NEXT: retq
5174 entry:
5275 %0 = tail call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c) #2
5376 %sub.i = fsub <8 x float> , %0
5578 }
5679
5780 define <8 x float> @test5(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
58 ; CHECK-LABEL: test5:
59 ; CHECK: # BB#0: # %entry
60 ; CHECK-NEXT: vbroadcastss {{.*}}(%rip), %ymm3
61 ; CHECK-NEXT: vxorps %ymm3, %ymm2, %ymm2
62 ; CHECK-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0
63 ; CHECK-NEXT: retq
81 ; X32-LABEL: test5:
82 ; X32: ## BB#0: ## %entry
83 ; X32-NEXT: vbroadcastss LCPI4_0, %ymm3
84 ; X32-NEXT: vxorps %ymm3, %ymm2, %ymm2
85 ; X32-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0
86 ; X32-NEXT: retl
87 ;
88 ; X64-LABEL: test5:
89 ; X64: ## BB#0: ## %entry
90 ; X64-NEXT: vbroadcastss {{.*}}(%rip), %ymm3
91 ; X64-NEXT: vxorps %ymm3, %ymm2, %ymm2
92 ; X64-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0
93 ; X64-NEXT: retq
6494 entry:
6595 %sub.c = fsub <8 x float> , %c
6696 %0 = tail call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %sub.c) #2
71101
72102
73103 define <2 x double> @test6(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
74 ; CHECK-LABEL: test6:
75 ; CHECK: # BB#0: # %entry
76 ; CHECK-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0
77 ; CHECK-NEXT: retq
104 ; X32-LABEL: test6:
105 ; X32: ## BB#0: ## %entry
106 ; X32-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0
107 ; X32-NEXT: retl
108 ;
109 ; X64-LABEL: test6:
110 ; X64: ## BB#0: ## %entry
111 ; X64-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0
112 ; X64-NEXT: retq
78113 entry:
79114 %0 = tail call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) #2
80115 %sub.i = fsub <2 x double> , %0
None ; RUN: not llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx
1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 | FileCheck %s
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X32
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X64
23
34 declare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float>, i8*,
45 <4 x i32>, <4 x float>, i8) nounwind readonly
56
6 define <4 x float> @test_x86_avx2_gather_d_ps(i8* %a1,
7 <4 x i32> %idx, <4 x float> %mask) {
7 define <4 x float> @test_x86_avx2_gather_d_ps(i8* %a1, <4 x i32> %idx, <4 x float> %mask) {
8 ; X32-LABEL: test_x86_avx2_gather_d_ps:
9 ; X32: ## BB#0:
10 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
11 ; X32-NEXT: vgatherdps %xmm1, (%eax,%xmm0,2), %xmm2
12 ; X32-NEXT: vmovaps %xmm2, %xmm0
13 ; X32-NEXT: retl
14 ;
15 ; X64-LABEL: test_x86_avx2_gather_d_ps:
16 ; X64: ## BB#0:
17 ; X64-NEXT: vgatherdps %xmm1, (%rdi,%xmm0,2), %xmm2
18 ; X64-NEXT: vmovaps %xmm2, %xmm0
19 ; X64-NEXT: retq
820 %res = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> undef,
921 i8* %a1, <4 x i32> %idx, <4 x float> %mask, i8 2) ;
1022 ret <4 x float> %res
1123 }
1224
13 ; CHECK: test_x86_avx2_gather_d_ps
14 ; CHECK: vgatherdps
15 ; CHECK-NOT: [[DST]]
16 ; CHECK: [[DST:%xmm[0-9]+]]{{$}}
17 ; CHECK: vmovaps
18 ; CHECK: ret
19
2025 declare <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double>, i8*,
2126 <4 x i32>, <2 x double>, i8) nounwind readonly
2227
23 define <2 x double> @test_x86_avx2_gather_d_pd(i8* %a1,
24 <4 x i32> %idx, <2 x double> %mask) {
28 define <2 x double> @test_x86_avx2_gather_d_pd(i8* %a1, <4 x i32> %idx, <2 x double> %mask) {
29 ; X32-LABEL: test_x86_avx2_gather_d_pd:
30 ; X32: ## BB#0:
31 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
32 ; X32-NEXT: vgatherdpd %xmm1, (%eax,%xmm0,2), %xmm2
33 ; X32-NEXT: vmovapd %xmm2, %xmm0
34 ; X32-NEXT: retl
35 ;
36 ; X64-LABEL: test_x86_avx2_gather_d_pd:
37 ; X64: ## BB#0:
38 ; X64-NEXT: vgatherdpd %xmm1, (%rdi,%xmm0,2), %xmm2
39 ; X64-NEXT: vmovapd %xmm2, %xmm0
40 ; X64-NEXT: retq
2541 %res = call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> undef,
2642 i8* %a1, <4 x i32> %idx, <2 x double> %mask, i8 2) ;
2743 ret <2 x double> %res
2844 }
2945
30 ; CHECK: test_x86_avx2_gather_d_pd
31 ; CHECK: vgatherdpd
32 ; CHECK: vmovapd
33 ; CHECK: ret
34
3546 declare <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float>, i8*,
3647 <8 x i32>, <8 x float>, i8) nounwind readonly
3748
38 define <8 x float> @test_x86_avx2_gather_d_ps_256(i8* %a1,
39 <8 x i32> %idx, <8 x float> %mask) {
49 define <8 x float> @test_x86_avx2_gather_d_ps_256(i8* %a1, <8 x i32> %idx, <8 x float> %mask) {
50 ; X32-LABEL: test_x86_avx2_gather_d_ps_256:
51 ; X32: ## BB#0:
52 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
53 ; X32-NEXT: vgatherdps %ymm1, (%eax,%ymm0,4), %ymm2
54 ; X32-NEXT: vmovaps %ymm2, %ymm0
55 ; X32-NEXT: retl
56 ;
57 ; X64-LABEL: test_x86_avx2_gather_d_ps_256:
58 ; X64: ## BB#0:
59 ; X64-NEXT: vgatherdps %ymm1, (%rdi,%ymm0,4), %ymm2
60 ; X64-NEXT: vmovaps %ymm2, %ymm0
61 ; X64-NEXT: retq
4062 %res = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> undef,
4163 i8* %a1, <8 x i32> %idx, <8 x float> %mask, i8 4) ;
4264 ret <8 x float> %res
4365 }
44 ; CHECK-LABEL: @test_x86_avx2_gather_d_ps_256
45 ; CHECK: vgatherdps %ymm
46 ; CHECK: ret
4766
4867 declare <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double>, i8*,
4968 <4 x i32>, <4 x double>, i8) nounwind readonly
5069
51 define <4 x double> @test_x86_avx2_gather_d_pd_256(i8* %a1,
52 <4 x i32> %idx, <4 x double> %mask) {
70 define <4 x double> @test_x86_avx2_gather_d_pd_256(i8* %a1, <4 x i32> %idx, <4 x double> %mask) {
71 ; X32-LABEL: test_x86_avx2_gather_d_pd_256:
72 ; X32: ## BB#0:
73 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
74 ; X32-NEXT: vgatherdpd %ymm1, (%eax,%xmm0,8), %ymm2
75 ; X32-NEXT: vmovapd %ymm2, %ymm0
76 ; X32-NEXT: retl
77 ;
78 ; X64-LABEL: test_x86_avx2_gather_d_pd_256:
79 ; X64: ## BB#0:
80 ; X64-NEXT: vgatherdpd %ymm1, (%rdi,%xmm0,8), %ymm2
81 ; X64-NEXT: vmovapd %ymm2, %ymm0
82 ; X64-NEXT: retq
5383 %res = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> undef,
5484 i8* %a1, <4 x i32> %idx, <4 x double> %mask, i8 8) ;
5585 ret <4 x double> %res
5686 }
57
58 ; CHECK-LABEL: test_x86_avx2_gather_d_pd_256
59 ; CHECK: vgatherdpd %ymm
60 ; CHECK: ret
None ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X32
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X64
13
2 ; CHECK: vpandn
3 ; CHECK: vpandn %ymm
4 ; CHECK: ret
54 define <4 x i64> @vpandn(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
5 ; X32-LABEL: vpandn:
6 ; X32: ## BB#0: ## %entry
7 ; X32-NEXT: vpaddq LCPI0_0, %ymm0, %ymm1
8 ; X32-NEXT: vpandn %ymm0, %ymm1, %ymm0
9 ; X32-NEXT: retl
10 ;
11 ; X64-LABEL: vpandn:
12 ; X64: ## BB#0: ## %entry
13 ; X64-NEXT: vpbroadcastq {{.*}}(%rip), %ymm1
14 ; X64-NEXT: vpaddq %ymm1, %ymm0, %ymm1
15 ; X64-NEXT: vpandn %ymm0, %ymm1, %ymm0
16 ; X64-NEXT: retq
617 entry:
718 ; Force the execution domain with an add.
819 %a2 = add <4 x i64> %a,
1122 ret <4 x i64> %x
1223 }
1324
14 ; CHECK: vpand
15 ; CHECK: vpand %ymm
16 ; CHECK: ret
1725 define <4 x i64> @vpand(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
26 ; X32-LABEL: vpand:
27 ; X32: ## BB#0: ## %entry
28 ; X32-NEXT: vpaddq LCPI1_0, %ymm0, %ymm0
29 ; X32-NEXT: vpand %ymm1, %ymm0, %ymm0
30 ; X32-NEXT: retl
31 ;
32 ; X64-LABEL: vpand:
33 ; X64: ## BB#0: ## %entry
34 ; X64-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2
35 ; X64-NEXT: vpaddq %ymm2, %ymm0, %ymm0
36 ; X64-NEXT: vpand %ymm1, %ymm0, %ymm0
37 ; X64-NEXT: retq
1838 entry:
1939 ; Force the execution domain with an add.
2040 %a2 = add <4 x i64> %a,
2242 ret <4 x i64> %x
2343 }
2444
25 ; CHECK: vpor
26 ; CHECK: vpor %ymm
27 ; CHECK: ret
2845 define <4 x i64> @vpor(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
46 ; X32-LABEL: vpor:
47 ; X32: ## BB#0: ## %entry
48 ; X32-NEXT: vpaddq LCPI2_0, %ymm0, %ymm0
49 ; X32-NEXT: vpor %ymm1, %ymm0, %ymm0
50 ; X32-NEXT: retl
51 ;
52 ; X64-LABEL: vpor:
53 ; X64: ## BB#0: ## %entry
54 ; X64-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2
55 ; X64-NEXT: vpaddq %ymm2, %ymm0, %ymm0
56 ; X64-NEXT: vpor %ymm1, %ymm0, %ymm0
57 ; X64-NEXT: retq
2958 entry:
3059 ; Force the execution domain with an add.
3160 %a2 = add <4 x i64> %a,
3362 ret <4 x i64> %x
3463 }
3564
36 ; CHECK: vpxor
37 ; CHECK: vpxor %ymm
38 ; CHECK: ret
3965 define <4 x i64> @vpxor(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
66 ; X32-LABEL: vpxor:
67 ; X32: ## BB#0: ## %entry
68 ; X32-NEXT: vpaddq LCPI3_0, %ymm0, %ymm0
69 ; X32-NEXT: vpxor %ymm1, %ymm0, %ymm0
70 ; X32-NEXT: retl
71 ;
72 ; X64-LABEL: vpxor:
73 ; X64: ## BB#0: ## %entry
74 ; X64-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2
75 ; X64-NEXT: vpaddq %ymm2, %ymm0, %ymm0
76 ; X64-NEXT: vpxor %ymm1, %ymm0, %ymm0
77 ; X64-NEXT: retq
4078 entry:
4179 ; Force the execution domain with an add.
4280 %a2 = add <4 x i64> %a,
4482 ret <4 x i64> %x
4583 }
4684
47 ; CHECK: vpblendvb
48 ; CHECK: vpblendvb %ymm
49 ; CHECK: ret
5085 define <32 x i8> @vpblendvb(<32 x i1> %cond, <32 x i8> %x, <32 x i8> %y) {
86 ; X32-LABEL: vpblendvb:
87 ; X32: ## BB#0:
88 ; X32-NEXT: vpsllw $7, %ymm0, %ymm0
89 ; X32-NEXT: vpand LCPI4_0, %ymm0, %ymm0
90 ; X32-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
91 ; X32-NEXT: retl
92 ;
93 ; X64-LABEL: vpblendvb:
94 ; X64: ## BB#0:
95 ; X64-NEXT: vpsllw $7, %ymm0, %ymm0
96 ; X64-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
97 ; X64-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
98 ; X64-NEXT: retq
5199 %min = select <32 x i1> %cond, <32 x i8> %x, <32 x i8> %y
52100 ret <32 x i8> %min
53101 }
54102
55103 define <8 x i32> @allOnes() nounwind {
56 ; CHECK: vpcmpeqd
57 ; CHECK-NOT: vinsert
104 ; X32-LABEL: allOnes:
105 ; X32: ## BB#0:
106 ; X32-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
107 ; X32-NEXT: retl
108 ;
109 ; X64-LABEL: allOnes:
110 ; X64: ## BB#0:
111 ; X64-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
112 ; X64-NEXT: retq
58113 ret <8 x i32>
59114 }
60115
61116 define <16 x i16> @allOnes2() nounwind {
62 ; CHECK: vpcmpeqd
63 ; CHECK-NOT: vinsert
117 ; X32-LABEL: allOnes2:
118 ; X32: ## BB#0:
119 ; X32-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
120 ; X32-NEXT: retl
121 ;
122 ; X64-LABEL: allOnes2:
123 ; X64: ## BB#0:
124 ; X64-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
125 ; X64-NEXT: retq
64126 ret <16 x i16>
65127 }
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s
1 ; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X32
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X64
23
34 define <16 x i16> @phaddw1(<16 x i16> %x, <16 x i16> %y) {
4 ; CHECK-LABEL: phaddw1:
5 ; CHECK: # BB#0:
6 ; CHECK-NEXT: vphaddw %ymm1, %ymm0, %ymm0
7 ; CHECK-NEXT: retq
5 ; X32-LABEL: phaddw1:
6 ; X32: ## BB#0:
7 ; X32-NEXT: vphaddw %ymm1, %ymm0, %ymm0
8 ; X32-NEXT: retl
9 ;
10 ; X64-LABEL: phaddw1:
11 ; X64: ## BB#0:
12 ; X64-NEXT: vphaddw %ymm1, %ymm0, %ymm0
13 ; X64-NEXT: retq
814 %a = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32>
915 %b = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32>
1016 %r = add <16 x i16> %a, %b
1218 }
1319
1420 define <16 x i16> @phaddw2(<16 x i16> %x, <16 x i16> %y) {
15 ; CHECK-LABEL: phaddw2:
16 ; CHECK: # BB#0:
17 ; CHECK-NEXT: vphaddw %ymm1, %ymm0, %ymm0
18 ; CHECK-NEXT: retq
21 ; X32-LABEL: phaddw2:
22 ; X32: ## BB#0:
23 ; X32-NEXT: vphaddw %ymm1, %ymm0, %ymm0
24 ; X32-NEXT: retl
25 ;
26 ; X64-LABEL: phaddw2:
27 ; X64: ## BB#0:
28 ; X64-NEXT: vphaddw %ymm1, %ymm0, %ymm0
29 ; X64-NEXT: retq
1930 %a = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32>
2031 %b = shufflevector <16 x i16> %y, <16 x i16> %x, <16 x i32>
2132 %r = add <16 x i16> %a, %b
2334 }
2435
2536 define <8 x i32> @phaddd1(<8 x i32> %x, <8 x i32> %y) {
26 ; CHECK-LABEL: phaddd1:
27 ; CHECK: # BB#0:
28 ; CHECK-NEXT: vphaddd %ymm1, %ymm0, %ymm0
29 ; CHECK-NEXT: retq
37 ; X32-LABEL: phaddd1:
38 ; X32: ## BB#0:
39 ; X32-NEXT: vphaddd %ymm1, %ymm0, %ymm0
40 ; X32-NEXT: retl
41 ;
42 ; X64-LABEL: phaddd1:
43 ; X64: ## BB#0:
44 ; X64-NEXT: vphaddd %ymm1, %ymm0, %ymm0
45 ; X64-NEXT: retq
3046 %a = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32>
3147 %b = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32>
3248 %r = add <8 x i32> %a, %b
3450 }
3551
3652 define <8 x i32> @phaddd2(<8 x i32> %x, <8 x i32> %y) {
37 ; CHECK-LABEL: phaddd2:
38 ; CHECK: # BB#0:
39 ; CHECK-NEXT: vphaddd %ymm1, %ymm0, %ymm0
40 ; CHECK-NEXT: retq
53 ; X32-LABEL: phaddd2:
54 ; X32: ## BB#0:
55 ; X32-NEXT: vphaddd %ymm1, %ymm0, %ymm0
56 ; X32-NEXT: retl
57 ;
58 ; X64-LABEL: phaddd2:
59 ; X64: ## BB#0:
60 ; X64-NEXT: vphaddd %ymm1, %ymm0, %ymm0
61 ; X64-NEXT: retq
4162 %a = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32>
4263 %b = shufflevector <8 x i32> %y, <8 x i32> %x, <8 x i32>
4364 %r = add <8 x i32> %a, %b
4566 }
4667
4768 define <8 x i32> @phaddd3(<8 x i32> %x) {
48 ; CHECK-LABEL: phaddd3:
49 ; CHECK: # BB#0:
50 ; CHECK-NEXT: vphaddd %ymm0, %ymm0, %ymm0
51 ; CHECK-NEXT: retq
69 ; X32-LABEL: phaddd3:
70 ; X32: ## BB#0:
71 ; X32-NEXT: vphaddd %ymm0, %ymm0, %ymm0
72 ; X32-NEXT: retl
73 ;
74 ; X64-LABEL: phaddd3:
75 ; X64: ## BB#0:
76 ; X64-NEXT: vphaddd %ymm0, %ymm0, %ymm0
77 ; X64-NEXT: retq
5278 %a = shufflevector <8 x i32> %x, <8 x i32> undef, <8 x i32>
5379 %b = shufflevector <8 x i32> %x, <8 x i32> undef, <8 x i32>
5480 %r = add <8 x i32> %a, %b
5682 }
5783
5884 define <16 x i16> @phsubw1(<16 x i16> %x, <16 x i16> %y) {
59 ; CHECK-LABEL: phsubw1:
60 ; CHECK: # BB#0:
61 ; CHECK-NEXT: vphsubw %ymm1, %ymm0, %ymm0
62 ; CHECK-NEXT: retq
85 ; X32-LABEL: phsubw1:
86 ; X32: ## BB#0:
87 ; X32-NEXT: vphsubw %ymm1, %ymm0, %ymm0
88 ; X32-NEXT: retl
89 ;
90 ; X64-LABEL: phsubw1:
91 ; X64: ## BB#0:
92 ; X64-NEXT: vphsubw %ymm1, %ymm0, %ymm0
93 ; X64-NEXT: retq
6394 %a = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32>
6495 %b = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32>
6596 %r = sub <16 x i16> %a, %b
6798 }
6899
69100 define <8 x i32> @phsubd1(<8 x i32> %x, <8 x i32> %y) {
70 ; CHECK-LABEL: phsubd1:
71 ; CHECK: # BB#0:
72 ; CHECK-NEXT: vphsubd %ymm1, %ymm0, %ymm0
73 ; CHECK-NEXT: retq
101 ; X32-LABEL: phsubd1:
102 ; X32: ## BB#0:
103 ; X32-NEXT: vphsubd %ymm1, %ymm0, %ymm0
104 ; X32-NEXT: retl
105 ;
106 ; X64-LABEL: phsubd1:
107 ; X64: ## BB#0:
108 ; X64-NEXT: vphsubd %ymm1, %ymm0, %ymm0
109 ; X64-NEXT: retq
74110 %a = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32>
75111 %b = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32>
76112 %r = sub <8 x i32> %a, %b
78114 }
79115
80116 define <8 x i32> @phsubd2(<8 x i32> %x, <8 x i32> %y) {
81 ; CHECK-LABEL: phsubd2:
82 ; CHECK: # BB#0:
83 ; CHECK-NEXT: vphsubd %ymm1, %ymm0, %ymm0
84 ; CHECK-NEXT: retq
117 ; X32-LABEL: phsubd2:
118 ; X32: ## BB#0:
119 ; X32-NEXT: vphsubd %ymm1, %ymm0, %ymm0
120 ; X32-NEXT: retl
121 ;
122 ; X64-LABEL: phsubd2:
123 ; X64: ## BB#0:
124 ; X64-NEXT: vphsubd %ymm1, %ymm0, %ymm0
125 ; X64-NEXT: retq
85126 %a = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32>
86127 %b = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32>
87128 %r = sub <8 x i32> %a, %b
None ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
1
2 ; CHECK: variable_shl0
3 ; CHECK: psllvd
4 ; CHECK: ret
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X32
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X64
3
54 define <4 x i32> @variable_shl0(<4 x i32> %x, <4 x i32> %y) {
5 ; X32-LABEL: variable_shl0:
6 ; X32: ## BB#0:
7 ; X32-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
8 ; X32-NEXT: retl
9 ;
10 ; X64-LABEL: variable_shl0:
11 ; X64: ## BB#0:
12 ; X64-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
13 ; X64-NEXT: retq
614 %k = shl <4 x i32> %x, %y
715 ret <4 x i32> %k
816 }
9 ; CHECK: variable_shl1
10 ; CHECK: psllvd
11 ; CHECK: ret
17
1218 define <8 x i32> @variable_shl1(<8 x i32> %x, <8 x i32> %y) {
19 ; X32-LABEL: variable_shl1:
20 ; X32: ## BB#0:
21 ; X32-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
22 ; X32-NEXT: retl
23 ;
24 ; X64-LABEL: variable_shl1:
25 ; X64: ## BB#0:
26 ; X64-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
27 ; X64-NEXT: retq
1328 %k = shl <8 x i32> %x, %y
1429 ret <8 x i32> %k
1530 }
16 ; CHECK: variable_shl2
17 ; CHECK: psllvq
18 ; CHECK: ret
31
1932 define <2 x i64> @variable_shl2(<2 x i64> %x, <2 x i64> %y) {
33 ; X32-LABEL: variable_shl2:
34 ; X32: ## BB#0:
35 ; X32-NEXT: vpsllvq %xmm1, %xmm0, %xmm0
36 ; X32-NEXT: retl
37 ;
38 ; X64-LABEL: variable_shl2:
39 ; X64: ## BB#0:
40 ; X64-NEXT: vpsllvq %xmm1, %xmm0, %xmm0
41 ; X64-NEXT: retq
2042 %k = shl <2 x i64> %x, %y
2143 ret <2 x i64> %k
2244 }
23 ; CHECK: variable_shl3
24 ; CHECK: psllvq
25 ; CHECK: ret
45
2646 define <4 x i64> @variable_shl3(<4 x i64> %x, <4 x i64> %y) {
47 ; X32-LABEL: variable_shl3:
48 ; X32: ## BB#0:
49 ; X32-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
50 ; X32-NEXT: retl
51 ;
52 ; X64-LABEL: variable_shl3:
53 ; X64: ## BB#0:
54 ; X64-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
55 ; X64-NEXT: retq
2756 %k = shl <4 x i64> %x, %y
2857 ret <4 x i64> %k
2958 }
30 ; CHECK: variable_srl0
31 ; CHECK: psrlvd
32 ; CHECK: ret
59
3360 define <4 x i32> @variable_srl0(<4 x i32> %x, <4 x i32> %y) {
61 ; X32-LABEL: variable_srl0:
62 ; X32: ## BB#0:
63 ; X32-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
64 ; X32-NEXT: retl
65 ;
66 ; X64-LABEL: variable_srl0:
67 ; X64: ## BB#0:
68 ; X64-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
69 ; X64-NEXT: retq
3470 %k = lshr <4 x i32> %x, %y
3571 ret <4 x i32> %k
3672 }
37 ; CHECK: variable_srl1
38 ; CHECK: psrlvd
39 ; CHECK: ret
73
4074 define <8 x i32> @variable_srl1(<8 x i32> %x, <8 x i32> %y) {
75 ; X32-LABEL: variable_srl1:
76 ; X32: ## BB#0:
77 ; X32-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
78 ; X32-NEXT: retl
79 ;
80 ; X64-LABEL: variable_srl1:
81 ; X64: ## BB#0:
82 ; X64-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
83 ; X64-NEXT: retq
4184 %k = lshr <8 x i32> %x, %y
4285 ret <8 x i32> %k
4386 }
44 ; CHECK: variable_srl2
45 ; CHECK: psrlvq
46 ; CHECK: ret
87
4788 define <2 x i64> @variable_srl2(<2 x i64> %x, <2 x i64> %y) {
89 ; X32-LABEL: variable_srl2:
90 ; X32: ## BB#0:
91 ; X32-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
92 ; X32-NEXT: retl
93 ;
94 ; X64-LABEL: variable_srl2:
95 ; X64: ## BB#0:
96 ; X64-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
97 ; X64-NEXT: retq
4898 %k = lshr <2 x i64> %x, %y
4999 ret <2 x i64> %k
50100 }
51 ; CHECK: variable_srl3
52 ; CHECK: psrlvq
53 ; CHECK: ret
101
54102 define <4 x i64> @variable_srl3(<4 x i64> %x, <4 x i64> %y) {
103 ; X32-LABEL: variable_srl3:
104 ; X32: ## BB#0:
105 ; X32-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
106 ; X32-NEXT: retl
107 ;
108 ; X64-LABEL: variable_srl3:
109 ; X64: ## BB#0:
110 ; X64-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
111 ; X64-NEXT: retq
55112 %k = lshr <4 x i64> %x, %y
56113 ret <4 x i64> %k
57114 }
58115
59 ; CHECK: variable_sra0
60 ; CHECK: vpsravd
61 ; CHECK: ret
62116 define <4 x i32> @variable_sra0(<4 x i32> %x, <4 x i32> %y) {
117 ; X32-LABEL: variable_sra0:
118 ; X32: ## BB#0:
119 ; X32-NEXT: vpsravd %xmm1, %xmm0, %xmm0
120 ; X32-NEXT: retl
121 ;
122 ; X64-LABEL: variable_sra0:
123 ; X64: ## BB#0:
124 ; X64-NEXT: vpsravd %xmm1, %xmm0, %xmm0
125 ; X64-NEXT: retq
63126 %k = ashr <4 x i32> %x, %y
64127 ret <4 x i32> %k
65128 }
66 ; CHECK: variable_sra1
67 ; CHECK: vpsravd
68 ; CHECK: ret
129
69130 define <8 x i32> @variable_sra1(<8 x i32> %x, <8 x i32> %y) {
131 ; X32-LABEL: variable_sra1:
132 ; X32: ## BB#0:
133 ; X32-NEXT: vpsravd %ymm1, %ymm0, %ymm0
134 ; X32-NEXT: retl
135 ;
136 ; X64-LABEL: variable_sra1:
137 ; X64: ## BB#0:
138 ; X64-NEXT: vpsravd %ymm1, %ymm0, %ymm0
139 ; X64-NEXT: retq
70140 %k = ashr <8 x i32> %x, %y
71141 ret <8 x i32> %k
72142 }
73143
74144 ;;; Shift left
75 ; CHECK: vpslld
145
76146 define <8 x i32> @vshift00(<8 x i32> %a) nounwind readnone {
77 %s = shl <8 x i32> %a,
78 2>
147 ; X32-LABEL: vshift00:
148 ; X32: ## BB#0:
149 ; X32-NEXT: vpslld $2, %ymm0, %ymm0
150 ; X32-NEXT: retl
151 ;
152 ; X64-LABEL: vshift00:
153 ; X64: ## BB#0:
154 ; X64-NEXT: vpslld $2, %ymm0, %ymm0
155 ; X64-NEXT: retq
156 %s = shl <8 x i32> %a,
79157 ret <8 x i32> %s
80158 }
81159
82 ; CHECK: vpsllw
83160 define <16 x i16> @vshift01(<16 x i16> %a) nounwind readnone {
161 ; X32-LABEL: vshift01:
162 ; X32: ## BB#0:
163 ; X32-NEXT: vpsllw $2, %ymm0, %ymm0
164 ; X32-NEXT: retl
165 ;
166 ; X64-LABEL: vshift01:
167 ; X64: ## BB#0:
168 ; X64-NEXT: vpsllw $2, %ymm0, %ymm0
169 ; X64-NEXT: retq
84170 %s = shl <16 x i16> %a,
85171 ret <16 x i16> %s
86172 }
87173
88 ; CHECK: vpsllq
89174 define <4 x i64> @vshift02(<4 x i64> %a) nounwind readnone {
175 ; X32-LABEL: vshift02:
176 ; X32: ## BB#0:
177 ; X32-NEXT: vpsllq $2, %ymm0, %ymm0
178 ; X32-NEXT: retl
179 ;
180 ; X64-LABEL: vshift02:
181 ; X64: ## BB#0:
182 ; X64-NEXT: vpsllq $2, %ymm0, %ymm0
183 ; X64-NEXT: retq
90184 %s = shl <4 x i64> %a,
91185 ret <4 x i64> %s
92186 }
93187
94188 ;;; Logical Shift right
95 ; CHECK: vpsrld
189
96190 define <8 x i32> @vshift03(<8 x i32> %a) nounwind readnone {
97 %s = lshr <8 x i32> %a,
98 2>
191 ; X32-LABEL: vshift03:
192 ; X32: ## BB#0:
193 ; X32-NEXT: vpsrld $2, %ymm0, %ymm0
194 ; X32-NEXT: retl
195 ;
196 ; X64-LABEL: vshift03:
197 ; X64: ## BB#0:
198 ; X64-NEXT: vpsrld $2, %ymm0, %ymm0
199 ; X64-NEXT: retq
200 %s = lshr <8 x i32> %a,
99201 ret <8 x i32> %s
100202 }
101203
102 ; CHECK: vpsrlw
103204 define <16 x i16> @vshift04(<16 x i16> %a) nounwind readnone {
205 ; X32-LABEL: vshift04:
206 ; X32: ## BB#0:
207 ; X32-NEXT: vpsrlw $2, %ymm0, %ymm0
208 ; X32-NEXT: retl
209 ;
210 ; X64-LABEL: vshift04:
211 ; X64: ## BB#0:
212 ; X64-NEXT: vpsrlw $2, %ymm0, %ymm0
213 ; X64-NEXT: retq
104214 %s = lshr <16 x i16> %a,
105215 ret <16 x i16> %s
106216 }
107217
108 ; CHECK: vpsrlq
109218 define <4 x i64> @vshift05(<4 x i64> %a) nounwind readnone {
219 ; X32-LABEL: vshift05:
220 ; X32: ## BB#0:
221 ; X32-NEXT: vpsrlq $2, %ymm0, %ymm0
222 ; X32-NEXT: retl
223 ;
224 ; X64-LABEL: vshift05:
225 ; X64: ## BB#0:
226 ; X64-NEXT: vpsrlq $2, %ymm0, %ymm0
227 ; X64-NEXT: retq
110228 %s = lshr <4 x i64> %a,
111229 ret <4 x i64> %s
112230 }
113231
114232 ;;; Arithmetic Shift right
115 ; CHECK: vpsrad
233
116234 define <8 x i32> @vshift06(<8 x i32> %a) nounwind readnone {
117 %s = ashr <8 x i32> %a,
118 2>
235 ; X32-LABEL: vshift06:
236 ; X32: ## BB#0:
237 ; X32-NEXT: vpsrad $2, %ymm0, %ymm0
238 ; X32-NEXT: retl
239 ;
240 ; X64-LABEL: vshift06:
241 ; X64: ## BB#0:
242 ; X64-NEXT: vpsrad $2, %ymm0, %ymm0
243 ; X64-NEXT: retq
244 %s = ashr <8 x i32> %a,
119245 ret <8 x i32> %s
120246 }
121247
122 ; CHECK: vpsraw
123248 define <16 x i16> @vshift07(<16 x i16> %a) nounwind readnone {
249 ; X32-LABEL: vshift07:
250 ; X32: ## BB#0:
251 ; X32-NEXT: vpsraw $2, %ymm0, %ymm0
252 ; X32-NEXT: retl
253 ;
254 ; X64-LABEL: vshift07:
255 ; X64: ## BB#0:
256 ; X64-NEXT: vpsraw $2, %ymm0, %ymm0
257 ; X64-NEXT: retq
124258 %s = ashr <16 x i16> %a,
125259 ret <16 x i16> %s
126260 }
127261
128 ; CHECK: variable_sra0_load
129 ; CHECK: vpsravd (%
130 ; CHECK: ret
131262 define <4 x i32> @variable_sra0_load(<4 x i32> %x, <4 x i32>* %y) {
263 ; X32-LABEL: variable_sra0_load:
264 ; X32: ## BB#0:
265 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
266 ; X32-NEXT: vpsravd (%eax), %xmm0, %xmm0
267 ; X32-NEXT: retl
268 ;
269 ; X64-LABEL: variable_sra0_load:
270 ; X64: ## BB#0:
271 ; X64-NEXT: vpsravd (%rdi), %xmm0, %xmm0
272 ; X64-NEXT: retq
132273 %y1 = load <4 x i32>, <4 x i32>* %y
133274 %k = ashr <4 x i32> %x, %y1
134275 ret <4 x i32> %k
135276 }
136277
137 ; CHECK: variable_sra1_load
138 ; CHECK: vpsravd (%
139 ; CHECK: ret
140278 define <8 x i32> @variable_sra1_load(<8 x i32> %x, <8 x i32>* %y) {
279 ; X32-LABEL: variable_sra1_load:
280 ; X32: ## BB#0:
281 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
282 ; X32-NEXT: vpsravd (%eax), %ymm0, %ymm0
283 ; X32-NEXT: retl
284 ;
285 ; X64-LABEL: variable_sra1_load:
286 ; X64: ## BB#0:
287 ; X64-NEXT: vpsravd (%rdi), %ymm0, %ymm0
288 ; X64-NEXT: retq
141289 %y1 = load <8 x i32>, <8 x i32>* %y
142290 %k = ashr <8 x i32> %x, %y1
143291 ret <8 x i32> %k
144292 }
145293
146 ; CHECK: variable_shl0_load
147 ; CHECK: vpsllvd (%
148 ; CHECK: ret
149294 define <4 x i32> @variable_shl0_load(<4 x i32> %x, <4 x i32>* %y) {
295 ; X32-LABEL: variable_shl0_load:
296 ; X32: ## BB#0:
297 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
298 ; X32-NEXT: vpsllvd (%eax), %xmm0, %xmm0
299 ; X32-NEXT: retl
300 ;
301 ; X64-LABEL: variable_shl0_load:
302 ; X64: ## BB#0:
303 ; X64-NEXT: vpsllvd (%rdi), %xmm0, %xmm0
304 ; X64-NEXT: retq
150305 %y1 = load <4 x i32>, <4 x i32>* %y
151306 %k = shl <4 x i32> %x, %y1
152307 ret <4 x i32> %k
153308 }
154 ; CHECK: variable_shl1_load
155 ; CHECK: vpsllvd (%
156 ; CHECK: ret
309
157310 define <8 x i32> @variable_shl1_load(<8 x i32> %x, <8 x i32>* %y) {
311 ; X32-LABEL: variable_shl1_load:
312 ; X32: ## BB#0:
313 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
314 ; X32-NEXT: vpsllvd (%eax), %ymm0, %ymm0
315 ; X32-NEXT: retl
316 ;
317 ; X64-LABEL: variable_shl1_load:
318 ; X64: ## BB#0:
319 ; X64-NEXT: vpsllvd (%rdi), %ymm0, %ymm0
320 ; X64-NEXT: retq
158321 %y1 = load <8 x i32>, <8 x i32>* %y
159322 %k = shl <8 x i32> %x, %y1
160323 ret <8 x i32> %k
161324 }
162 ; CHECK: variable_shl2_load
163 ; CHECK: vpsllvq (%
164 ; CHECK: ret
325
165326 define <2 x i64> @variable_shl2_load(<2 x i64> %x, <2 x i64>* %y) {
327 ; X32-LABEL: variable_shl2_load:
328 ; X32: ## BB#0:
329 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
330 ; X32-NEXT: vpsllvq (%eax), %xmm0, %xmm0
331 ; X32-NEXT: retl
332 ;
333 ; X64-LABEL: variable_shl2_load:
334 ; X64: ## BB#0:
335 ; X64-NEXT: vpsllvq (%rdi), %xmm0, %xmm0
336 ; X64-NEXT: retq
166337 %y1 = load <2 x i64>, <2 x i64>* %y
167338 %k = shl <2 x i64> %x, %y1
168339 ret <2 x i64> %k
169340 }
170 ; CHECK: variable_shl3_load
171 ; CHECK: vpsllvq (%
172 ; CHECK: ret
341
173342 define <4 x i64> @variable_shl3_load(<4 x i64> %x, <4 x i64>* %y) {
343 ; X32-LABEL: variable_shl3_load:
344 ; X32: ## BB#0:
345 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
346 ; X32-NEXT: vpsllvq (%eax), %ymm0, %ymm0
347 ; X32-NEXT: retl
348 ;
349 ; X64-LABEL: variable_shl3_load:
350 ; X64: ## BB#0:
351 ; X64-NEXT: vpsllvq (%rdi), %ymm0, %ymm0
352 ; X64-NEXT: retq
174353 %y1 = load <4 x i64>, <4 x i64>* %y
175354 %k = shl <4 x i64> %x, %y1
176355 ret <4 x i64> %k
177356 }
178 ; CHECK: variable_srl0_load
179 ; CHECK: vpsrlvd (%
180 ; CHECK: ret
357
181358 define <4 x i32> @variable_srl0_load(<4 x i32> %x, <4 x i32>* %y) {
359 ; X32-LABEL: variable_srl0_load:
360 ; X32: ## BB#0:
361 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
362 ; X32-NEXT: vpsrlvd (%eax), %xmm0, %xmm0
363 ; X32-NEXT: retl
364 ;
365 ; X64-LABEL: variable_srl0_load:
366 ; X64: ## BB#0:
367 ; X64-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0
368 ; X64-NEXT: retq
182369 %y1 = load <4 x i32>, <4 x i32>* %y
183370 %k = lshr <4 x i32> %x, %y1
184371 ret <4 x i32> %k
185372 }
186 ; CHECK: variable_srl1_load
187 ; CHECK: vpsrlvd (%
188 ; CHECK: ret
373
189374 define <8 x i32> @variable_srl1_load(<8 x i32> %x, <8 x i32>* %y) {
375 ; X32-LABEL: variable_srl1_load:
376 ; X32: ## BB#0:
377 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
378 ; X32-NEXT: vpsrlvd (%eax), %ymm0, %ymm0
379 ; X32-NEXT: retl
380 ;
381 ; X64-LABEL: variable_srl1_load:
382 ; X64: ## BB#0:
383 ; X64-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0
384 ; X64-NEXT: retq
190385 %y1 = load <8 x i32>, <8 x i32>* %y
191386 %k = lshr <8 x i32> %x, %y1
192387 ret <8 x i32> %k
193388 }
194 ; CHECK: variable_srl2_load
195 ; CHECK: vpsrlvq (%
196 ; CHECK: ret
389
197390 define <2 x i64> @variable_srl2_load(<2 x i64> %x, <2 x i64>* %y) {
391 ; X32-LABEL: variable_srl2_load:
392 ; X32: ## BB#0:
393 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
394 ; X32-NEXT: vpsrlvq (%eax), %xmm0, %xmm0
395 ; X32-NEXT: retl
396 ;
397 ; X64-LABEL: variable_srl2_load:
398 ; X64: ## BB#0:
399 ; X64-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0
400 ; X64-NEXT: retq
198401 %y1 = load <2 x i64>, <2 x i64>* %y
199402 %k = lshr <2 x i64> %x, %y1
200403 ret <2 x i64> %k
201404 }
202 ; CHECK: variable_srl3_load
203 ; CHECK: vpsrlvq (%
204 ; CHECK: ret
405
205406 define <4 x i64> @variable_srl3_load(<4 x i64> %x, <4 x i64>* %y) {
407 ; X32-LABEL: variable_srl3_load:
408 ; X32: ## BB#0:
409 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
410 ; X32-NEXT: vpsrlvq (%eax), %ymm0, %ymm0
411 ; X32-NEXT: retl
412 ;
413 ; X64-LABEL: variable_srl3_load:
414 ; X64: ## BB#0:
415 ; X64-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0
416 ; X64-NEXT: retq
206417 %y1 = load <4 x i64>, <4 x i64>* %y
207418 %k = lshr <4 x i64> %x, %y1
208419 ret <4 x i64> %k
209420 }
210421
211422 define <32 x i8> @shl9(<32 x i8> %A) nounwind {
423 ; X32-LABEL: shl9:
424 ; X32: ## BB#0:
425 ; X32-NEXT: vpsllw $3, %ymm0, %ymm0
426 ; X32-NEXT: vpand LCPI28_0, %ymm0, %ymm0
427 ; X32-NEXT: retl
428 ;
429 ; X64-LABEL: shl9:
430 ; X64: ## BB#0:
431 ; X64-NEXT: vpsllw $3, %ymm0, %ymm0
432 ; X64-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
433 ; X64-NEXT: retq
212434 %B = shl <32 x i8> %A,
213435 ret <32 x i8> %B
214 ; CHECK-LABEL: shl9:
215 ; CHECK: vpsllw $3
216 ; CHECK: vpand
217 ; CHECK: ret
218436 }
219437
220438 define <32 x i8> @shr9(<32 x i8> %A) nounwind {
439 ; X32-LABEL: shr9:
440 ; X32: ## BB#0:
441 ; X32-NEXT: vpsrlw $3, %ymm0, %ymm0
442 ; X32-NEXT: vpand LCPI29_0, %ymm0, %ymm0
443 ; X32-NEXT: retl
444 ;
445 ; X64-LABEL: shr9:
446 ; X64: ## BB#0:
447 ; X64-NEXT: vpsrlw $3, %ymm0, %ymm0
448 ; X64-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
449 ; X64-NEXT: retq
221450 %B = lshr <32 x i8> %A,
222451 ret <32 x i8> %B
223 ; CHECK-LABEL: shr9:
224 ; CHECK: vpsrlw $3
225 ; CHECK: vpand
226 ; CHECK: ret
227452 }
228453
229454 define <32 x i8> @sra_v32i8_7(<32 x i8> %A) nounwind {
455 ; X32-LABEL: sra_v32i8_7:
456 ; X32: ## BB#0:
457 ; X32-NEXT: vpxor %ymm1, %ymm1, %ymm1
458 ; X32-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
459 ; X32-NEXT: retl
460 ;
461 ; X64-LABEL: sra_v32i8_7:
462 ; X64: ## BB#0:
463 ; X64-NEXT: vpxor %ymm1, %ymm1, %ymm1
464 ; X64-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
465 ; X64-NEXT: retq
230466 %B = ashr <32 x i8> %A,
231467 ret <32 x i8> %B
232 ; CHECK-LABEL: sra_v32i8_7:
233 ; CHECK: vpxor
234 ; CHECK: vpcmpgtb
235 ; CHECK: ret
236468 }
237469
238470 define <32 x i8> @sra_v32i8(<32 x i8> %A) nounwind {
471 ; X32-LABEL: sra_v32i8:
472 ; X32: ## BB#0:
473 ; X32-NEXT: vpsrlw $3, %ymm0, %ymm0
474 ; X32-NEXT: vpand LCPI31_0, %ymm0, %ymm0
475 ; X32-NEXT: vmovdqa {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
476 ; X32-NEXT: vpxor %ymm1, %ymm0, %ymm0
477 ; X32-NEXT: vpsubb %ymm1, %ymm0, %ymm0
478 ; X32-NEXT: retl
479 ;
480 ; X64-LABEL: sra_v32i8:
481 ; X64: ## BB#0:
482 ; X64-NEXT: vpsrlw $3, %ymm0, %ymm0
483 ; X64-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
484 ; X64-NEXT: vmovdqa {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
485 ; X64-NEXT: vpxor %ymm1, %ymm0, %ymm0
486 ; X64-NEXT: vpsubb %ymm1, %ymm0, %ymm0
487 ; X64-NEXT: retq
239488 %B = ashr <32 x i8> %A,
240489 ret <32 x i8> %B
241 ; CHECK-LABEL: sra_v32i8:
242 ; CHECK: vpsrlw $3
243 ; CHECK: vpand
244 ; CHECK: vpxor
245 ; CHECK: vpsubb
246 ; CHECK: ret
247 }
248
249 ; CHECK: _sext_v16i16
250 ; CHECK: vpsllw
251 ; CHECK: vpsraw
252 ; CHECK-NOT: vinsertf128
490 }
491
253492 define <16 x i16> @sext_v16i16(<16 x i16> %a) nounwind {
493 ; X32-LABEL: sext_v16i16:
494 ; X32: ## BB#0:
495 ; X32-NEXT: vpsllw $8, %ymm0, %ymm0
496 ; X32-NEXT: vpsraw $8, %ymm0, %ymm0
497 ; X32-NEXT: retl
498 ;
499 ; X64-LABEL: sext_v16i16:
500 ; X64: ## BB#0:
501 ; X64-NEXT: vpsllw $8, %ymm0, %ymm0
502 ; X64-NEXT: vpsraw $8, %ymm0, %ymm0
503 ; X64-NEXT: retq
254504 %b = trunc <16 x i16> %a to <16 x i8>
255505 %c = sext <16 x i8> %b to <16 x i16>
256506 ret <16 x i16> %c
257507 }
258508
259 ; CHECK: _sext_v8i32
260 ; CHECK: vpslld
261 ; CHECK: vpsrad
262 ; CHECK-NOT: vinsertf128
263509 define <8 x i32> @sext_v8i32(<8 x i32> %a) nounwind {
510 ; X32-LABEL: sext_v8i32:
511 ; X32: ## BB#0:
512 ; X32-NEXT: vpslld $16, %ymm0, %ymm0
513 ; X32-NEXT: vpsrad $16, %ymm0, %ymm0
514 ; X32-NEXT: retl
515 ;
516 ; X64-LABEL: sext_v8i32:
517 ; X64: ## BB#0:
518 ; X64-NEXT: vpslld $16, %ymm0, %ymm0
519 ; X64-NEXT: vpsrad $16, %ymm0, %ymm0
520 ; X64-NEXT: retq
264521 %b = trunc <8 x i32> %a to <8 x i16>
265522 %c = sext <8 x i16> %b to <8 x i32>
266523 ret <8 x i32> %c
267524 }
268525
269526 define <8 x i16> @variable_shl16(<8 x i16> %lhs, <8 x i16> %rhs) {
270 ; CHECK-LABEL: variable_shl16:
271 ; CHECK-DAG: vpmovzxwd %xmm1, [[AMT:%ymm[0-9]+]]
272 ; CHECK-DAG: vpmovzxwd %xmm0, [[LHS:%ymm[0-9]+]]
273 ; CHECK: vpsllvd [[AMT]], [[LHS]], {{%ymm[0-9]+}}
274 ; CHECK: vpshufb
275 ; CHECK: vpermq
527 ; X32-LABEL: variable_shl16:
528 ; X32: ## BB#0:
529 ; X32-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
530 ; X32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
531 ; X32-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
532 ; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
533 ; X32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
534 ; X32-NEXT: ## kill: %XMM0 %XMM0 %YMM0
535 ; X32-NEXT: vzeroupper
536 ; X32-NEXT: retl
537 ;
538 ; X64-LABEL: variable_shl16:
539 ; X64: ## BB#0:
540 ; X64-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
541 ; X64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
542 ; X64-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
543 ; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
544 ; X64-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
545 ; X64-NEXT: ## kill: %XMM0 %XMM0 %YMM0
546 ; X64-NEXT: vzeroupper
547 ; X64-NEXT: retq
276548 %res = shl <8 x i16> %lhs, %rhs
277549 ret <8 x i16> %res
278550 }
279551
280552 define <8 x i16> @variable_ashr16(<8 x i16> %lhs, <8 x i16> %rhs) {
281 ; CHECK-LABEL: variable_ashr16:
282 ; CHECK-DAG: vpmovzxwd %xmm1, [[AMT:%ymm[0-9]+]]
283 ; CHECK-DAG: vpmovsxwd %xmm0, [[LHS:%ymm[0-9]+]]
284 ; CHECK: vpsravd [[AMT]], [[LHS]], {{%ymm[0-9]+}}
285 ; CHECK: vpshufb
286 ; CHECK: vpermq
553 ; X32-LABEL: variable_ashr16:
554 ; X32: ## BB#0:
555 ; X32-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
556 ; X32-NEXT: vpmovsxwd %xmm0, %ymm0
557 ; X32-NEXT: vpsravd %ymm1, %ymm0, %ymm0
558 ; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
559 ; X32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
560 ; X32-NEXT: ## kill: %XMM0 %XMM0 %YMM0
561 ; X32-NEXT: vzeroupper
562 ; X32-NEXT: retl
563 ;
564 ; X64-LABEL: variable_ashr16:
565 ; X64: ## BB#0:
566 ; X64-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
567 ; X64-NEXT: vpmovsxwd %xmm0, %ymm0
568 ; X64-NEXT: vpsravd %ymm1, %ymm0, %ymm0
569 ; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
570 ; X64-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
571 ; X64-NEXT: ## kill: %XMM0 %XMM0 %YMM0
572 ; X64-NEXT: vzeroupper
573 ; X64-NEXT: retq
287574 %res = ashr <8 x i16> %lhs, %rhs
288575 ret <8 x i16> %res
289576 }
290577
291578 define <8 x i16> @variable_lshr16(<8 x i16> %lhs, <8 x i16> %rhs) {
292 ; CHECK-LABEL: variable_lshr16:
293 ; CHECK-DAG: vpmovzxwd %xmm1, [[AMT:%ymm[0-9]+]]
294 ; CHECK-DAG: vpmovzxwd %xmm0, [[LHS:%ymm[0-9]+]]
295 ; CHECK: vpsrlvd [[AMT]], [[LHS]], {{%ymm[0-9]+}}
296 ; CHECK: vpshufb
297 ; CHECK: vpermq
579 ; X32-LABEL: variable_lshr16:
580 ; X32: ## BB#0:
581 ; X32-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
582 ; X32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
583 ; X32-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
584 ; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
585 ; X32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
586 ; X32-NEXT: ## kill: %XMM0 %XMM0 %YMM0
587 ; X32-NEXT: vzeroupper
588 ; X32-NEXT: retl
589 ;
590 ; X64-LABEL: variable_lshr16:
591 ; X64: ## BB#0:
592 ; X64-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
593 ; X64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
594 ; X64-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
595 ; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
596 ; X64-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
597 ; X64-NEXT: ## kill: %XMM0 %XMM0 %YMM0
598 ; X64-NEXT: vzeroupper
599 ; X64-NEXT: retq
298600 %res = lshr <8 x i16> %lhs, %rhs
299601 ret <8 x i16> %res
300 }
602 }
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 | FileCheck %s
1 ; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X32
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X64
23
34 ; AVX2 Logical Shift Left
45
56 define <16 x i16> @test_sllw_1(<16 x i16> %InVec) {
6 ; CHECK-LABEL: test_sllw_1:
7 ; CHECK: # BB#0: # %entry
8 ; CHECK-NEXT: retq
7 ; X32-LABEL: test_sllw_1:
8 ; X32: ## BB#0: ## %entry
9 ; X32-NEXT: retl
10 ;
11 ; X64-LABEL: test_sllw_1:
12 ; X64: ## BB#0: ## %entry
13 ; X64-NEXT: retq
914 entry:
1015 %shl = shl <16 x i16> %InVec,
1116 ret <16 x i16> %shl
1217 }
1318
1419 define <16 x i16> @test_sllw_2(<16 x i16> %InVec) {
15 ; CHECK-LABEL: test_sllw_2:
16 ; CHECK: # BB#0: # %entry
17 ; CHECK-NEXT: vpaddw %ymm0, %ymm0, %ymm0
18 ; CHECK-NEXT: retq
20 ; X32-LABEL: test_sllw_2:
21 ; X32: ## BB#0: ## %entry
22 ; X32-NEXT: vpaddw %ymm0, %ymm0, %ymm0
23 ; X32-NEXT: retl
24 ;
25 ; X64-LABEL: test_sllw_2:
26 ; X64: ## BB#0: ## %entry
27 ; X64-NEXT: vpaddw %ymm0, %ymm0, %ymm0
28 ; X64-NEXT: retq
1929 entry:
2030 %shl = shl <16 x i16> %InVec,
2131 ret <16 x i16> %shl
2232 }
2333
2434 define <16 x i16> @test_sllw_3(<16 x i16> %InVec) {
25 ; CHECK-LABEL: test_sllw_3:
26 ; CHECK: # BB#0: # %entry
27 ; CHECK-NEXT: vpsllw $15, %ymm0, %ymm0
28 ; CHECK-NEXT: retq
35 ; X32-LABEL: test_sllw_3:
36 ; X32: ## BB#0: ## %entry
37 ; X32-NEXT: vpsllw $15, %ymm0, %ymm0
38 ; X32-NEXT: retl
39 ;
40 ; X64-LABEL: test_sllw_3:
41 ; X64: ## BB#0: ## %entry
42 ; X64-NEXT: vpsllw $15, %ymm0, %ymm0
43 ; X64-NEXT: retq
2944 entry:
3045 %shl = shl <16 x i16> %InVec,
3146 ret <16 x i16> %shl
3247 }
3348
3449 define <8 x i32> @test_slld_1(<8 x i32> %InVec) {
35 ; CHECK-LABEL: test_slld_1:
36 ; CHECK: # BB#0: # %entry
37 ; CHECK-NEXT: retq
50 ; X32-LABEL: test_slld_1:
51 ; X32: ## BB#0: ## %entry
52 ; X32-NEXT: retl
53 ;
54 ; X64-LABEL: test_slld_1:
55 ; X64: ## BB#0: ## %entry
56 ; X64-NEXT: retq
3857 entry:
3958 %shl = shl <8 x i32> %InVec,
4059 ret <8 x i32> %shl
4160 }
4261
4362 define <8 x i32> @test_slld_2(<8 x i32> %InVec) {
44 ; CHECK-LABEL: test_slld_2:
45 ; CHECK: # BB#0: # %entry
46 ; CHECK-NEXT: vpaddd %ymm0, %ymm0, %ymm0
47 ; CHECK-NEXT: retq
63 ; X32-LABEL: test_slld_2:
64 ; X32: ## BB#0: ## %entry
65 ; X32-NEXT: vpaddd %ymm0, %ymm0, %ymm0
66 ; X32-NEXT: retl
67 ;
68 ; X64-LABEL: test_slld_2:
69 ; X64: ## BB#0: ## %entry
70 ; X64-NEXT: vpaddd %ymm0, %ymm0, %ymm0
71 ; X64-NEXT: retq
4872 entry:
4973 %shl = shl <8 x i32> %InVec,
5074 ret <8 x i32> %shl
5175 }
5276
5377 define <8 x i32> @test_vpslld_var(i32 %shift) {
54 ; CHECK-LABEL: test_vpslld_var:
55 ; CHECK: # BB#0:
56 ; CHECK-NEXT: vmovd %edi, %xmm0
57 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [192,193,194,195,196,197,198,199]
58 ; CHECK-NEXT: vpslld %xmm0, %ymm1, %ymm0
59 ; CHECK-NEXT: retq
78 ; X32-LABEL: test_vpslld_var:
79 ; X32: ## BB#0:
80 ; X32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
81 ; X32-NEXT: vmovdqa {{.*#+}} ymm1 = [192,193,194,195,196,197,198,199]
82 ; X32-NEXT: vpslld %xmm0, %ymm1, %ymm0
83 ; X32-NEXT: retl
84 ;
85 ; X64-LABEL: test_vpslld_var:
86 ; X64: ## BB#0:
87 ; X64-NEXT: vmovd %edi, %xmm0
88 ; X64-NEXT: vmovdqa {{.*#+}} ymm1 = [192,193,194,195,196,197,198,199]
89 ; X64-NEXT: vpslld %xmm0, %ymm1, %ymm0
90 ; X64-NEXT: retq
6091 %amt = insertelement <8 x i32> undef, i32 %shift, i32 0
6192 %tmp = shl <8 x i32> , %amt
6293 ret <8 x i32> %tmp
6394 }
6495
6596 define <8 x i32> @test_slld_3(<8 x i32> %InVec) {
66 ; CHECK-LABEL: test_slld_3:
67 ; CHECK: # BB#0: # %entry
68 ; CHECK-NEXT: vpslld $31, %ymm0, %ymm0
69 ; CHECK-NEXT: retq
97 ; X32-LABEL: test_slld_3:
98 ; X32: ## BB#0: ## %entry
99 ; X32-NEXT: vpslld $31, %ymm0, %ymm0
100 ; X32-NEXT: retl
101 ;
102 ; X64-LABEL: test_slld_3:
103 ; X64: ## BB#0: ## %entry
104 ; X64-NEXT: vpslld $31, %ymm0, %ymm0
105 ; X64-NEXT: retq
70106 entry:
71107 %shl = shl <8 x i32> %InVec,
72108 ret <8 x i32> %shl
73109 }
74110
75111 define <4 x i64> @test_sllq_1(<4 x i64> %InVec) {
76 ; CHECK-LABEL: test_sllq_1:
77 ; CHECK: # BB#0: # %entry
78 ; CHECK-NEXT: retq
112 ; X32-LABEL: test_sllq_1:
113 ; X32: ## BB#0: ## %entry
114 ; X32-NEXT: retl
115 ;
116 ; X64-LABEL: test_sllq_1:
117 ; X64: ## BB#0: ## %entry
118 ; X64-NEXT: retq
79119 entry:
80120 %shl = shl <4 x i64> %InVec,
81121 ret <4 x i64> %shl
82122 }
83123
84124 define <4 x i64> @test_sllq_2(<4 x i64> %InVec) {
85 ; CHECK-LABEL: test_sllq_2:
86 ; CHECK: # BB#0: # %entry
87 ; CHECK-NEXT: vpaddq %ymm0, %ymm0, %ymm0
88 ; CHECK-NEXT: retq
125 ; X32-LABEL: test_sllq_2:
126 ; X32: ## BB#0: ## %entry
127 ; X32-NEXT: vpaddq %ymm0, %ymm0, %ymm0
128 ; X32-NEXT: retl
129 ;
130 ; X64-LABEL: test_sllq_2:
131 ; X64: ## BB#0: ## %entry
132 ; X64-NEXT: vpaddq %ymm0, %ymm0, %ymm0
133 ; X64-NEXT: retq
89134 entry:
90135 %shl = shl <4 x i64> %InVec,
91136 ret <4 x i64> %shl
92137 }
93138
94139 define <4 x i64> @test_sllq_3(<4 x i64> %InVec) {
95 ; CHECK-LABEL: test_sllq_3:
96 ; CHECK: # BB#0: # %entry
97 ; CHECK-NEXT: vpsllq $63, %ymm0, %ymm0
98 ; CHECK-NEXT: retq
140 ; X32-LABEL: test_sllq_3:
141 ; X32: ## BB#0: ## %entry
142 ; X32-NEXT: vpsllq $63, %ymm0, %ymm0
143 ; X32-NEXT: retl
144 ;
145 ; X64-LABEL: test_sllq_3:
146 ; X64: ## BB#0: ## %entry
147 ; X64-NEXT: vpsllq $63, %ymm0, %ymm0
148 ; X64-NEXT: retq
99149 entry:
100150 %shl = shl <4 x i64> %InVec,
101151 ret <4 x i64> %shl
104154 ; AVX2 Arithmetic Shift
105155
106156 define <16 x i16> @test_sraw_1(<16 x i16> %InVec) {
107 ; CHECK-LABEL: test_sraw_1:
108 ; CHECK: # BB#0: # %entry
109 ; CHECK-NEXT: retq
157 ; X32-LABEL: test_sraw_1:
158 ; X32: ## BB#0: ## %entry
159 ; X32-NEXT: retl
160 ;
161 ; X64-LABEL: test_sraw_1:
162 ; X64: ## BB#0: ## %entry
163 ; X64-NEXT: retq
110164 entry:
111165 %shl = ashr <16 x i16> %InVec,
112166 ret <16 x i16> %shl
113167 }
114168
115169 define <16 x i16> @test_sraw_2(<16 x i16> %InVec) {
116 ; CHECK-LABEL: test_sraw_2:
117 ; CHECK: # BB#0: # %entry
118 ; CHECK-NEXT: vpsraw $1, %ymm0, %ymm0
119 ; CHECK-NEXT: retq
170 ; X32-LABEL: test_sraw_2:
171 ; X32: ## BB#0: ## %entry
172 ; X32-NEXT: vpsraw $1, %ymm0, %ymm0
173 ; X32-NEXT: retl
174 ;
175 ; X64-LABEL: test_sraw_2:
176 ; X64: ## BB#0: ## %entry
177 ; X64-NEXT: vpsraw $1, %ymm0, %ymm0
178 ; X64-NEXT: retq
120179 entry:
121180 %shl = ashr <16 x i16> %InVec,
122181 ret <16 x i16> %shl
123182 }
124183
125184 define <16 x i16> @test_sraw_3(<16 x i16> %InVec) {
126 ; CHECK-LABEL: test_sraw_3:
127 ; CHECK: # BB#0: # %entry
128 ; CHECK-NEXT: vpsraw $15, %ymm0, %ymm0
129 ; CHECK-NEXT: retq
185 ; X32-LABEL: test_sraw_3:
186 ; X32: ## BB#0: ## %entry
187 ; X32-NEXT: vpsraw $15, %ymm0, %ymm0
188 ; X32-NEXT: retl
189 ;
190 ; X64-LABEL: test_sraw_3:
191 ; X64: ## BB#0: ## %entry
192 ; X64-NEXT: vpsraw $15, %ymm0, %ymm0
193 ; X64-NEXT: retq
130194 entry:
131195 %shl = ashr <16 x i16> %InVec,
132196 ret <16 x i16> %shl
133197 }
134198
135199 define <8 x i32> @test_srad_1(<8 x i32> %InVec) {
136 ; CHECK-LABEL: test_srad_1:
137 ; CHECK: # BB#0: # %entry
138 ; CHECK-NEXT: retq
200 ; X32-LABEL: test_srad_1:
201 ; X32: ## BB#0: ## %entry
202 ; X32-NEXT: retl
203 ;
204 ; X64-LABEL: test_srad_1:
205 ; X64: ## BB#0: ## %entry
206 ; X64-NEXT: retq
139207 entry:
140208 %shl = ashr <8 x i32> %InVec,
141209 ret <8 x i32> %shl
142210 }
143211
144212 define <8 x i32> @test_srad_2(<8 x i32> %InVec) {
145 ; CHECK-LABEL: test_srad_2:
146 ; CHECK: # BB#0: # %entry
147 ; CHECK-NEXT: vpsrad $1, %ymm0, %ymm0
148 ; CHECK-NEXT: retq
213 ; X32-LABEL: test_srad_2:
214 ; X32: ## BB#0: ## %entry
215 ; X32-NEXT: vpsrad $1, %ymm0, %ymm0
216 ; X32-NEXT: retl
217 ;
218 ; X64-LABEL: test_srad_2:
219 ; X64: ## BB#0: ## %entry
220 ; X64-NEXT: vpsrad $1, %ymm0, %ymm0
221 ; X64-NEXT: retq
149222 entry:
150223 %shl = ashr <8 x i32> %InVec,
151224 ret <8 x i32> %shl
152225 }
153226
154227 define <8 x i32> @test_srad_3(<8 x i32> %InVec) {
155 ; CHECK-LABEL: test_srad_3:
156 ; CHECK: # BB#0: # %entry
157 ; CHECK-NEXT: vpsrad $31, %ymm0, %ymm0
158 ; CHECK-NEXT: retq
228 ; X32-LABEL: test_srad_3:
229 ; X32: ## BB#0: ## %entry
230 ; X32-NEXT: vpsrad $31, %ymm0, %ymm0
231 ; X32-NEXT: retl
232 ;
233 ; X64-LABEL: test_srad_3:
234 ; X64: ## BB#0: ## %entry
235 ; X64-NEXT: vpsrad $31, %ymm0, %ymm0
236 ; X64-NEXT: retq
159237 entry:
160238 %shl = ashr <8 x i32> %InVec,
161239 ret <8 x i32> %shl
164242 ; SSE Logical Shift Right
165243
166244 define <16 x i16> @test_srlw_1(<16 x i16> %InVec) {
167 ; CHECK-LABEL: test_srlw_1:
168 ; CHECK: # BB#0: # %entry
169 ; CHECK-NEXT: retq
245 ; X32-LABEL: test_srlw_1:
246 ; X32: ## BB#0: ## %entry
247 ; X32-NEXT: retl
248 ;
249 ; X64-LABEL: test_srlw_1:
250 ; X64: ## BB#0: ## %entry
251 ; X64-NEXT: retq
170252 entry:
171253 %shl = lshr <16 x i16> %InVec,
172254 ret <16 x i16> %shl
173255 }
174256
175257 define <16 x i16> @test_srlw_2(<16 x i16> %InVec) {
176 ; CHECK-LABEL: test_srlw_2:
177 ; CHECK: # BB#0: # %entry
178 ; CHECK-NEXT: vpsrlw $1, %ymm0, %ymm0
179 ; CHECK-NEXT: retq
258 ; X32-LABEL: test_srlw_2:
259 ; X32: ## BB#0: ## %entry
260 ; X32-NEXT: vpsrlw $1, %ymm0, %ymm0
261 ; X32-NEXT: retl
262 ;
263 ; X64-LABEL: test_srlw_2:
264 ; X64: ## BB#0: ## %entry
265 ; X64-NEXT: vpsrlw $1, %ymm0, %ymm0
266 ; X64-NEXT: retq
180267 entry:
181268 %shl = lshr <16 x i16> %InVec,
182269 ret <16 x i16> %shl
183270 }
184271
185272 define <16 x i16> @test_srlw_3(<16 x i16> %InVec) {
186 ; CHECK-LABEL: test_srlw_3:
187 ; CHECK: # BB#0: # %entry
188 ; CHECK-NEXT: vpsrlw $15, %ymm0, %ymm0
189 ; CHECK-NEXT: retq
273 ; X32-LABEL: test_srlw_3:
274 ; X32: ## BB#0: ## %entry
275 ; X32-NEXT: vpsrlw $15, %ymm0, %ymm0
276 ; X32-NEXT: retl
277 ;
278 ; X64-LABEL: test_srlw_3:
279 ; X64: ## BB#0: ## %entry
280 ; X64-NEXT: vpsrlw $15, %ymm0, %ymm0
281 ; X64-NEXT: retq
190282 entry:
191283 %shl = lshr <16 x i16> %InVec,
192284 ret <16 x i16> %shl
193285 }
194286
195287 define <8 x i32> @test_srld_1(<8 x i32> %InVec) {
196 ; CHECK-LABEL: test_srld_1:
197 ; CHECK: # BB#0: # %entry
198 ; CHECK-NEXT: retq
288 ; X32-LABEL: test_srld_1:
289 ; X32: ## BB#0: ## %entry
290 ; X32-NEXT: retl
291 ;
292 ; X64-LABEL: test_srld_1:
293 ; X64: ## BB#0: ## %entry
294 ; X64-NEXT: retq
199295 entry:
200296 %shl = lshr <8 x i32> %InVec,
201297 ret <8 x i32> %shl
202298 }
203299
204300 define <8 x i32> @test_srld_2(<8 x i32> %InVec) {
205 ; CHECK-LABEL: test_srld_2:
206 ; CHECK: # BB#0: # %entry
207 ; CHECK-NEXT: vpsrld $1, %ymm0, %ymm0
208 ; CHECK-NEXT: retq
301 ; X32-LABEL: test_srld_2:
302 ; X32: ## BB#0: ## %entry
303 ; X32-NEXT: vpsrld $1, %ymm0, %ymm0
304 ; X32-NEXT: retl
305 ;
306 ; X64-LABEL: test_srld_2:
307 ; X64: ## BB#0: ## %entry
308 ; X64-NEXT: vpsrld $1, %ymm0, %ymm0
309 ; X64-NEXT: retq
209310 entry:
210311 %shl = lshr <8 x i32> %InVec,
211312 ret <8 x i32> %shl
212313 }
213314
214315 define <8 x i32> @test_srld_3(<8 x i32> %InVec) {
215 ; CHECK-LABEL: test_srld_3:
216 ; CHECK: # BB#0: # %entry
217 ; CHECK-NEXT: vpsrld $31, %ymm0, %ymm0
218 ; CHECK-NEXT: retq
316 ; X32-LABEL: test_srld_3:
317 ; X32: ## BB#0: ## %entry
318 ; X32-NEXT: vpsrld $31, %ymm0, %ymm0
319 ; X32-NEXT: retl
320 ;
321 ; X64-LABEL: test_srld_3:
322 ; X64: ## BB#0: ## %entry
323 ; X64-NEXT: vpsrld $31, %ymm0, %ymm0
324 ; X64-NEXT: retq
219325 entry:
220326 %shl = lshr <8 x i32> %InVec,
221327 ret <8 x i32> %shl
222328 }
223329
224330 define <4 x i64> @test_srlq_1(<4 x i64> %InVec) {
225 ; CHECK-LABEL: test_srlq_1:
226 ; CHECK: # BB#0: # %entry
227 ; CHECK-NEXT: retq
331 ; X32-LABEL: test_srlq_1:
332 ; X32: ## BB#0: ## %entry
333 ; X32-NEXT: retl
334 ;
335 ; X64-LABEL: test_srlq_1:
336 ; X64: ## BB#0: ## %entry
337 ; X64-NEXT: retq
228338 entry:
229339 %shl = lshr <4 x i64> %InVec,
230340 ret <4 x i64> %shl
231341 }
232342
233343 define <4 x i64> @test_srlq_2(<4 x i64> %InVec) {
234 ; CHECK-LABEL: test_srlq_2:
235 ; CHECK: # BB#0: # %entry
236 ; CHECK-NEXT: vpsrlq $1, %ymm0, %ymm0
237 ; CHECK-NEXT: retq
344 ; X32-LABEL: test_srlq_2:
345 ; X32: ## BB#0: ## %entry
346 ; X32-NEXT: vpsrlq $1, %ymm0, %ymm0
347 ; X32-NEXT: retl
348 ;
349 ; X64-LABEL: test_srlq_2:
350 ; X64: ## BB#0: ## %entry
351 ; X64-NEXT: vpsrlq $1, %ymm0, %ymm0
352 ; X64-NEXT: retq
238353 entry:
239354 %shl = lshr <4 x i64> %InVec,
240355 ret <4 x i64> %shl
241356 }
242357
243358 define <4 x i64> @test_srlq_3(<4 x i64> %InVec) {
244 ; CHECK-LABEL: test_srlq_3:
245 ; CHECK: # BB#0: # %entry
246 ; CHECK-NEXT: vpsrlq $63, %ymm0, %ymm0
247 ; CHECK-NEXT: retq
359 ; X32-LABEL: test_srlq_3:
360 ; X32: ## BB#0: ## %entry
361 ; X32-NEXT: vpsrlq $63, %ymm0, %ymm0
362 ; X32-NEXT: retl
363 ;
364 ; X64-LABEL: test_srlq_3:
365 ; X64: ## BB#0: ## %entry
366 ; X64-NEXT: vpsrlq $63, %ymm0, %ymm0
367 ; X64-NEXT: retq
248368 entry:
249369 %shl = lshr <4 x i64> %InVec,
250370 ret <4 x i64> %shl
251371 }
252372
253373 define <4 x i32> @srl_trunc_and_v4i64(<4 x i32> %x, <4 x i64> %y) nounwind {
254 ; CHECK-LABEL: srl_trunc_and_v4i64:
255 ; CHECK: # BB#0:
256 ; CHECK-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
257 ; CHECK-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
258 ; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2
259 ; CHECK-NEXT: vpand %xmm2, %xmm1, %xmm1
260 ; CHECK-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
261 ; CHECK-NEXT: vzeroupper
262 ; CHECK-NEXT: retq
374 ; X32-LABEL: srl_trunc_and_v4i64:
375 ; X32: ## BB#0:
376 ; X32-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
377 ; X32-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
378 ; X32-NEXT: vpbroadcastd LCPI25_0, %xmm2
379 ; X32-NEXT: vpand %xmm2, %xmm1, %xmm1
380 ; X32-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
381 ; X32-NEXT: vzeroupper
382 ; X32-NEXT: retl
383 ;
384 ; X64-LABEL: srl_trunc_and_v4i64:
385 ; X64: ## BB#0:
386 ; X64-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
387 ; X64-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
388 ; X64-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2
389 ; X64-NEXT: vpand %xmm2, %xmm1, %xmm1
390 ; X64-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
391 ; X64-NEXT: vzeroupper
392 ; X64-NEXT: retq
263393 %and = and <4 x i64> %y,
264394 %trunc = trunc <4 x i64> %and to <4 x i32>
265395 %sra = lshr <4 x i32> %x, %trunc
271401 ;
272402
273403 define <8 x i16> @shl_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
274 ; CHECK-LABEL: shl_8i16:
275 ; CHECK: # BB#0:
276 ; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
277 ; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
278 ; CHECK-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
279 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
280 ; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
281 ; CHECK-NEXT: # kill: %XMM0 %XMM0 %YMM0
282 ; CHECK-NEXT: vzeroupper
283 ; CHECK-NEXT: retq
404 ; X32-LABEL: shl_8i16:
405 ; X32: ## BB#0:
406 ; X32-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
407 ; X32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
408 ; X32-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
409 ; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
410 ; X32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
411 ; X32-NEXT: ## kill: %XMM0 %XMM0 %YMM0
412 ; X32-NEXT: vzeroupper
413 ; X32-NEXT: retl
414 ;
415 ; X64-LABEL: shl_8i16:
416 ; X64: ## BB#0:
417 ; X64-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
418 ; X64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
419 ; X64-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
420 ; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
421 ; X64-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
422 ; X64-NEXT: ## kill: %XMM0 %XMM0 %YMM0
423 ; X64-NEXT: vzeroupper
424 ; X64-NEXT: retq
284425 %shl = shl <8 x i16> %r, %a
285426 ret <8 x i16> %shl
286427 }
287428
288429 define <16 x i16> @shl_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
289 ; CHECK-LABEL: shl_16i16:
290 ; CHECK: # BB#0:
291 ; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2
292 ; CHECK-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
293 ; CHECK-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
294 ; CHECK-NEXT: vpsllvd %ymm3, %ymm4, %ymm3
295 ; CHECK-NEXT: vpsrld $16, %ymm3, %ymm3
296 ; CHECK-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
297 ; CHECK-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
298 ; CHECK-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
299 ; CHECK-NEXT: vpsrld $16, %ymm0, %ymm0
300 ; CHECK-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
301 ; CHECK-NEXT: retq
430 ; X32-LABEL: shl_16i16:
431 ; X32: ## BB#0:
432 ; X32-NEXT: vpxor %ymm2, %ymm2, %ymm2
433 ; X32-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
434 ; X32-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
435 ; X32-NEXT: vpsllvd %ymm3, %ymm4, %ymm3
436 ; X32-NEXT: vpsrld $16, %ymm3, %ymm3
437 ; X32-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
438 ; X32-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
439 ; X32-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
440 ; X32-NEXT: vpsrld $16, %ymm0, %ymm0
441 ; X32-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
442 ; X32-NEXT: retl
443 ;
444 ; X64-LABEL: shl_16i16:
445 ; X64: ## BB#0:
446 ; X64-NEXT: vpxor %ymm2, %ymm2, %ymm2
447 ; X64-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
448 ; X64-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
449 ; X64-NEXT: vpsllvd %ymm3, %ymm4, %ymm3
450 ; X64-NEXT: vpsrld $16, %ymm3, %ymm3
451 ; X64-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
452 ; X64-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
453 ; X64-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
454 ; X64-NEXT: vpsrld $16, %ymm0, %ymm0
455 ; X64-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
456 ; X64-NEXT: retq
302457 %shl = shl <16 x i16> %r, %a
303458 ret <16 x i16> %shl
304459 }
305460
306461 define <32 x i8> @shl_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
307 ; CHECK-LABEL: shl_32i8:
308 ; CHECK: # BB#0:
309 ; CHECK-NEXT: vpsllw $5, %ymm1, %ymm1
310 ; CHECK-NEXT: vpsllw $4, %ymm0, %ymm2
311 ; CHECK-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
312 ; CHECK-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
313 ; CHECK-NEXT: vpsllw $2, %ymm0, %ymm2
314 ; CHECK-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
315 ; CHECK-NEXT: vpaddb %ymm1, %ymm1, %ymm1
316 ; CHECK-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
317 ; CHECK-NEXT: vpaddb %ymm0, %ymm0, %ymm2
318 ; CHECK-NEXT: vpaddb %ymm1, %ymm1, %ymm1
319 ; CHECK-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
320 ; CHECK-NEXT: retq
462 ; X32-LABEL: shl_32i8:
463 ; X32: ## BB#0:
464 ; X32-NEXT: vpsllw $5, %ymm1, %ymm1
465 ; X32-NEXT: vpsllw $4, %ymm0, %ymm2
466 ; X32-NEXT: vpand LCPI28_0, %ymm2, %ymm2
467 ; X32-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
468 ; X32-NEXT: vpsllw $2, %ymm0, %ymm2
469 ; X32-NEXT: vpand LCPI28_1, %ymm2, %ymm2
470 ; X32-NEXT: vpaddb %ymm1, %ymm1, %ymm1
471 ; X32-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
472 ; X32-NEXT: vpaddb %ymm0, %ymm0, %ymm2
473 ; X32-NEXT: vpaddb %ymm1, %ymm1, %ymm1
474 ; X32-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
475 ; X32-NEXT: retl
476 ;
477 ; X64-LABEL: shl_32i8:
478 ; X64: ## BB#0:
479 ; X64-NEXT: vpsllw $5, %ymm1, %ymm1
480 ; X64-NEXT: vpsllw $4, %ymm0, %ymm2
481 ; X64-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
482 ; X64-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
483 ; X64-NEXT: vpsllw $2, %ymm0, %ymm2
484 ; X64-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
485 ; X64-NEXT: vpaddb %ymm1, %ymm1, %ymm1
486 ; X64-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
487 ; X64-NEXT: vpaddb %ymm0, %ymm0, %ymm2
488 ; X64-NEXT: vpaddb %ymm1, %ymm1, %ymm1
489 ; X64-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
490 ; X64-NEXT: retq
321491 %shl = shl <32 x i8> %r, %a
322492 ret <32 x i8> %shl
323493 }
324494
325495 define <8 x i16> @ashr_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
326 ; CHECK-LABEL: ashr_8i16:
327 ; CHECK: # BB#0:
328 ; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
329 ; CHECK-NEXT: vpmovsxwd %xmm0, %ymm0
330 ; CHECK-NEXT: vpsravd %ymm1, %ymm0, %ymm0
331 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
332 ; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
333 ; CHECK-NEXT: # kill: %XMM0 %XMM0 %YMM0
334 ; CHECK-NEXT: vzeroupper
335 ; CHECK-NEXT: retq
496 ; X32-LABEL: ashr_8i16:
497 ; X32: ## BB#0:
498 ; X32-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
499 ; X32-NEXT: vpmovsxwd %xmm0, %ymm0
500 ; X32-NEXT: vpsravd %ymm1, %ymm0, %ymm0
501 ; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
502 ; X32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
503 ; X32-NEXT: ## kill: %XMM0 %XMM0 %YMM0
504 ; X32-NEXT: vzeroupper
505 ; X32-NEXT: retl
506 ;
507 ; X64-LABEL: ashr_8i16:
508 ; X64: ## BB#0:
509 ; X64-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
510 ; X64-NEXT: vpmovsxwd %xmm0, %ymm0
511 ; X64-NEXT: vpsravd %ymm1, %ymm0, %ymm0
512 ; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
513 ; X64-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
514 ; X64-NEXT: ## kill: %XMM0 %XMM0 %YMM0
515 ; X64-NEXT: vzeroupper
516 ; X64-NEXT: retq
336517 %ashr = ashr <8 x i16> %r, %a
337518 ret <8 x i16> %ashr
338519 }
339520
340521 define <16 x i16> @ashr_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
341 ; CHECK-LABEL: ashr_16i16:
342 ; CHECK: # BB#0:
343 ; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2
344 ; CHECK-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
345 ; CHECK-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
346 ; CHECK-NEXT: vpsravd %ymm3, %ymm4, %ymm3
347 ; CHECK-NEXT: vpsrld $16, %ymm3, %ymm3
348 ; CHECK-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
349 ; CHECK-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
350 ; CHECK-NEXT: vpsravd %ymm1, %ymm0, %ymm0
351 ; CHECK-NEXT: vpsrld $16, %ymm0, %ymm0
352 ; CHECK-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
353 ; CHECK-NEXT: retq
522 ; X32-LABEL: ashr_16i16:
523 ; X32: ## BB#0:
524 ; X32-NEXT: vpxor %ymm2, %ymm2, %ymm2
525 ; X32-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
526 ; X32-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
527 ; X32-NEXT: vpsravd %ymm3, %ymm4, %ymm3
528 ; X32-NEXT: vpsrld $16, %ymm3, %ymm3
529 ; X32-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
530 ; X32-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
531 ; X32-NEXT: vpsravd %ymm1, %ymm0, %ymm0
532 ; X32-NEXT: vpsrld $16, %ymm0, %ymm0
533 ; X32-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
534 ; X32-NEXT: retl
535 ;
536 ; X64-LABEL: ashr_16i16:
537 ; X64: ## BB#0:
538 ; X64-NEXT: vpxor %ymm2, %ymm2, %ymm2
539 ; X64-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
540 ; X64-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
541 ; X64-NEXT: vpsravd %ymm3, %ymm4, %ymm3
542 ; X64-NEXT: vpsrld $16, %ymm3, %ymm3
543 ; X64-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
544 ; X64-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
545 ; X64-NEXT: vpsravd %ymm1, %ymm0, %ymm0
546 ; X64-NEXT: vpsrld $16, %ymm0, %ymm0
547 ; X64-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
548 ; X64-NEXT: retq
354549 %ashr = ashr <16 x i16> %r, %a
355550 ret <16 x i16> %ashr
356551 }
357552
358553 define <32 x i8> @ashr_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
359 ; CHECK-LABEL: ashr_32i8:
360 ; CHECK: # BB#0:
361 ; CHECK-NEXT: vpsllw $5, %ymm1, %ymm1
362 ; CHECK-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
363 ; CHECK-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
364 ; CHECK-NEXT: vpsraw $4, %ymm3, %ymm4
365 ; CHECK-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
366 ; CHECK-NEXT: vpsraw $2, %ymm3, %ymm4
367 ; CHECK-NEXT: vpaddw %ymm2, %ymm2, %ymm2
368 ; CHECK-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
369 ; CHECK-NEXT: vpsraw $1, %ymm3, %ymm4
370 ; CHECK-NEXT: vpaddw %ymm2, %ymm2, %ymm2
371 ; CHECK-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2
372 ; CHECK-NEXT: vpsrlw $8, %ymm2, %ymm2
373 ; CHECK-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
374 ; CHECK-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
375 ; CHECK-NEXT: vpsraw $4, %ymm0, %ymm3
376 ; CHECK-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
377 ; CHECK-NEXT: vpsraw $2, %ymm0, %ymm3
378 ; CHECK-NEXT: vpaddw %ymm1, %ymm1, %ymm1
379 ; CHECK-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
380 ; CHECK-NEXT: vpsraw $1, %ymm0, %ymm3
381 ; CHECK-NEXT: vpaddw %ymm1, %ymm1, %ymm1
382 ; CHECK-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
383 ; CHECK-NEXT: vpsrlw $8, %ymm0, %ymm0
384 ; CHECK-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
385 ; CHECK-NEXT: retq
554 ; X32-LABEL: ashr_32i8:
555 ; X32: ## BB#0:
556 ; X32-NEXT: vpsllw $5, %ymm1, %ymm1
557 ; X32-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
558 ; X32-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
559 ; X32-NEXT: vpsraw $4, %ymm3, %ymm4
560 ; X32-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
561 ; X32-NEXT: vpsraw $2, %ymm3, %ymm4
562 ; X32-NEXT: vpaddw %ymm2, %ymm2, %ymm2
563 ; X32-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
564 ; X32-NEXT: vpsraw $1, %ymm3, %ymm4
565 ; X32-NEXT: vpaddw %ymm2, %ymm2, %ymm2
566 ; X32-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2
567 ; X32-NEXT: vpsrlw $8, %ymm2, %ymm2
568 ; X32-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
569 ; X32-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
570 ; X32-NEXT: vpsraw $4, %ymm0, %ymm3
571 ; X32-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
572 ; X32-NEXT: vpsraw $2, %ymm0, %ymm3
573 ; X32-NEXT: vpaddw %ymm1, %ymm1, %ymm1
574 ; X32-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
575 ; X32-NEXT: vpsraw $1, %ymm0, %ymm3
576 ; X32-NEXT: vpaddw %ymm1, %ymm1, %ymm1
577 ; X32-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
578 ; X32-NEXT: vpsrlw $8, %ymm0, %ymm0
579 ; X32-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
580 ; X32-NEXT: retl
581 ;
582 ; X64-LABEL: ashr_32i8:
583 ; X64: ## BB#0:
584 ; X64-NEXT: vpsllw $5, %ymm1, %ymm1
585 ; X64-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
586 ; X64-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
587 ; X64-NEXT: vpsraw $4, %ymm3, %ymm4
588 ; X64-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
589 ; X64-NEXT: vpsraw $2, %ymm3, %ymm4
590 ; X64-NEXT: vpaddw %ymm2, %ymm2, %ymm2
591 ; X64-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
592 ; X64-NEXT: vpsraw $1, %ymm3, %ymm4
593 ; X64-NEXT: vpaddw %ymm2, %ymm2, %ymm2
594 ; X64-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2
595 ; X64-NEXT: vpsrlw $8, %ymm2, %ymm2
596 ; X64-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
597 ; X64-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
598 ; X64-NEXT: vpsraw $4, %ymm0, %ymm3
599 ; X64-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
600 ; X64-NEXT: vpsraw $2, %ymm0, %ymm3
601 ; X64-NEXT: vpaddw %ymm1, %ymm1, %ymm1
602 ; X64-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
603 ; X64-NEXT: vpsraw $1, %ymm0, %ymm3
604 ; X64-NEXT: vpaddw %ymm1, %ymm1, %ymm1
605 ; X64-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
606 ; X64-NEXT: vpsrlw $8, %ymm0, %ymm0
607 ; X64-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
608 ; X64-NEXT: retq
386609 %ashr = ashr <32 x i8> %r, %a
387610 ret <32 x i8> %ashr
388611 }
389612
390613 define <8 x i16> @lshr_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
391 ; CHECK-LABEL: lshr_8i16:
392 ; CHECK: # BB#0:
393 ; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
394 ; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
395 ; CHECK-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
396 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
397 ; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
398 ; CHECK-NEXT: # kill: %XMM0 %XMM0 %YMM0
399 ; CHECK-NEXT: vzeroupper
400 ; CHECK-NEXT: retq
614 ; X32-LABEL: lshr_8i16:
615 ; X32: ## BB#0:
616 ; X32-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
617 ; X32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
618 ; X32-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
619 ; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
620 ; X32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
621 ; X32-NEXT: ## kill: %XMM0 %XMM0 %YMM0
622 ; X32-NEXT: vzeroupper
623 ; X32-NEXT: retl
624 ;
625 ; X64-LABEL: lshr_8i16:
626 ; X64: ## BB#0:
627 ; X64-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
628 ; X64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
629 ; X64-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
630 ; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
631 ; X64-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
632 ; X64-NEXT: ## kill: %XMM0 %XMM0 %YMM0
633 ; X64-NEXT: vzeroupper
634 ; X64-NEXT: retq
401635 %lshr = lshr <8 x i16> %r, %a
402636 ret <8 x i16> %lshr
403637 }
404638
405639 define <16 x i16> @lshr_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
406 ; CHECK-LABEL: lshr_16i16:
407 ; CHECK: # BB#0:
408 ; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2
409 ; CHECK-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
410 ; CHECK-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
411 ; CHECK-NEXT: vpsrlvd %ymm3, %ymm4, %ymm3
412 ; CHECK-NEXT: vpsrld $16, %ymm3, %ymm3
413 ; CHECK-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
414 ; CHECK-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
415 ; CHECK-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
416 ; CHECK-NEXT: vpsrld $16, %ymm0, %ymm0
417 ; CHECK-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
418 ; CHECK-NEXT: retq
640 ; X32-LABEL: lshr_16i16:
641 ; X32: ## BB#0:
642 ; X32-NEXT: vpxor %ymm2, %ymm2, %ymm2
643 ; X32-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
644 ; X32-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
645 ; X32-NEXT: vpsrlvd %ymm3, %ymm4, %ymm3
646 ; X32-NEXT: vpsrld $16, %ymm3, %ymm3
647 ; X32-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
648 ; X32-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
649 ; X32-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
650 ; X32-NEXT: vpsrld $16, %ymm0, %ymm0
651 ; X32-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
652 ; X32-NEXT: retl
653 ;
654 ; X64-LABEL: lshr_16i16:
655 ; X64: ## BB#0:
656 ; X64-NEXT: vpxor %ymm2, %ymm2, %ymm2
657 ; X64-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
658 ; X64-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
659 ; X64-NEXT: vpsrlvd %ymm3, %ymm4, %ymm3
660 ; X64-NEXT: vpsrld $16, %ymm3, %ymm3
661 ; X64-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
662 ; X64-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
663 ; X64-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
664 ; X64-NEXT: vpsrld $16, %ymm0, %ymm0
665 ; X64-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
666 ; X64-NEXT: retq
419667 %lshr = lshr <16 x i16> %r, %a
420668 ret <16 x i16> %lshr
421669 }
422670
423671 define <32 x i8> @lshr_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
424 ; CHECK-LABEL: lshr_32i8:
425 ; CHECK: # BB#0:
426 ; CHECK-NEXT: vpsllw $5, %ymm1, %ymm1
427 ; CHECK-NEXT: vpsrlw $4, %ymm0, %ymm2
428 ; CHECK-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
429 ; CHECK-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
430 ; CHECK-NEXT: vpsrlw $2, %ymm0, %ymm2
431 ; CHECK-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
432 ; CHECK-NEXT: vpaddb %ymm1, %ymm1, %ymm1
433 ; CHECK-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
434 ; CHECK-NEXT: vpsrlw $1, %ymm0, %ymm2
435 ; CHECK-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
436 ; CHECK-NEXT: vpaddb %ymm1, %ymm1, %ymm1
437 ; CHECK-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
438 ; CHECK-NEXT: retq
672 ; X32-LABEL: lshr_32i8:
673 ; X32: ## BB#0:
674 ; X32-NEXT: vpsllw $5, %ymm1, %ymm1
675 ; X32-NEXT: vpsrlw $4, %ymm0, %ymm2
676 ; X32-NEXT: vpand LCPI34_0, %ymm2, %ymm2
677 ; X32-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
678 ; X32-NEXT: vpsrlw $2, %ymm0, %ymm2
679 ; X32-NEXT: vpand LCPI34_1, %ymm2, %ymm2
680 ; X32-NEXT: vpaddb %ymm1, %ymm1, %ymm1
681 ; X32-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
682 ; X32-NEXT: vpsrlw $1, %ymm0, %ymm2
683 ; X32-NEXT: vpand LCPI34_2, %ymm2, %ymm2
684 ; X32-NEXT: vpaddb %ymm1, %ymm1, %ymm1
685 ; X32-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
686 ; X32-NEXT: retl
687 ;
688 ; X64-LABEL: lshr_32i8:
689 ; X64: ## BB#0:
690 ; X64-NEXT: vpsllw $5, %ymm1, %ymm1
691 ; X64-NEXT: vpsrlw $4, %ymm0, %ymm2
692 ; X64-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
693 ; X64-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
694 ; X64-NEXT: vpsrlw $2, %ymm0, %ymm2
695 ; X64-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
696 ; X64-NEXT: vpaddb %ymm1, %ymm1, %ymm1
697 ; X64-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
698 ; X64-NEXT: vpsrlw $1, %ymm0, %ymm2
699 ; X64-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
700 ; X64-NEXT: vpaddb %ymm1, %ymm1, %ymm1
701 ; X64-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
702 ; X64-NEXT: retq
439703 %lshr = lshr <32 x i8> %r, %a
440704 ret <32 x i8> %lshr
441705 }
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
1 ; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X32
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X64
23
34 define <8 x i32> @perm_cl_int_8x32(<8 x i32> %A) nounwind readnone {
4 ; CHECK-LABEL: perm_cl_int_8x32:
5 ; CHECK: ## BB#0: ## %entry
6 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [0,7,2,1,2,7,6,0]
7 ; CHECK-NEXT: vpermd %ymm0, %ymm1, %ymm0
8 ; CHECK-NEXT: retq
5 ; X32-LABEL: perm_cl_int_8x32:
6 ; X32: ## BB#0: ## %entry
7 ; X32-NEXT: vmovdqa {{.*#+}} ymm1 = [0,7,2,1,2,7,6,0]
8 ; X32-NEXT: vpermd %ymm0, %ymm1, %ymm0
9 ; X32-NEXT: retl
10 ;
11 ; X64-LABEL: perm_cl_int_8x32:
12 ; X64: ## BB#0: ## %entry
13 ; X64-NEXT: vmovdqa {{.*#+}} ymm1 = [0,7,2,1,2,7,6,0]
14 ; X64-NEXT: vpermd %ymm0, %ymm1, %ymm0
15 ; X64-NEXT: retq
916 entry:
1017 %B = shufflevector <8 x i32> %A, <8 x i32> undef, <8 x i32>
1118 ret <8 x i32> %B
1320
1421
1522 define <8 x float> @perm_cl_fp_8x32(<8 x float> %A) nounwind readnone {
16 ; CHECK-LABEL: perm_cl_fp_8x32:
17 ; CHECK: ## BB#0: ## %entry
18 ; CHECK-NEXT: vmovaps {{.*#+}} ymm1 =
19 ; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0
20 ; CHECK-NEXT: retq
23 ; X32-LABEL: perm_cl_fp_8x32:
24 ; X32: ## BB#0: ## %entry
25 ; X32-NEXT: vmovaps {{.*#+}} ymm1 =
26 ; X32-NEXT: vpermps %ymm0, %ymm1, %ymm0
27 ; X32-NEXT: retl
28 ;
29 ; X64-LABEL: perm_cl_fp_8x32:
30 ; X64: ## BB#0: ## %entry
31 ; X64-NEXT: vmovaps {{.*#+}} ymm1 =
32 ; X64-NEXT: vpermps %ymm0, %ymm1, %ymm0
33 ; X64-NEXT: retq
2134 entry:
2235 %B = shufflevector <8 x float> %A, <8 x float> undef, <8 x i32>
2336 ret <8 x float> %B
2437 }
2538
2639 define <4 x i64> @perm_cl_int_4x64(<4 x i64> %A) nounwind readnone {
27 ; CHECK-LABEL: perm_cl_int_4x64:
28 ; CHECK: ## BB#0: ## %entry
29 ; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,1]
30 ; CHECK-NEXT: retq
40 ; X32-LABEL: perm_cl_int_4x64:
41 ; X32: ## BB#0: ## %entry
42 ; X32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,1]
43 ; X32-NEXT: retl
44 ;
45 ; X64-LABEL: perm_cl_int_4x64:
46 ; X64: ## BB#0: ## %entry
47 ; X64-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,1]
48 ; X64-NEXT: retq
3149 entry:
3250 %B = shufflevector <4 x i64> %A, <4 x i64> undef, <4 x i32>
3351 ret <4 x i64> %B
3452 }
3553
3654 define <4 x double> @perm_cl_fp_4x64(<4 x double> %A) nounwind readnone {
37 ; CHECK-LABEL: perm_cl_fp_4x64:
38 ; CHECK: ## BB#0: ## %entry
39 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,3,2,1]
40 ; CHECK-NEXT: retq
55 ; X32-LABEL: perm_cl_fp_4x64:
56 ; X32: ## BB#0: ## %entry
57 ; X32-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,3,2,1]
58 ; X32-NEXT: retl
59 ;
60 ; X64-LABEL: perm_cl_fp_4x64:
61 ; X64: ## BB#0: ## %entry
62 ; X64-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,3,2,1]
63 ; X64-NEXT: retq
4164 entry:
4265 %B = shufflevector <4 x double> %A, <4 x double> undef, <4 x i32>
4366 ret <4 x double> %B