llvm.org GIT mirror llvm / 84a3fee
[X86][SSE] pslldq/psrldq shuffle mask decodes Patch to provide shuffle decodes and asm comments for the sse pslldq/psrldq SSE2/AVX2 byte shift instructions. Differential Revision: http://reviews.llvm.org/D5598 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@219738 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 5 years ago
7 changed file(s) with 217 addition(s) and 146 deletion(s). Raw diff Collapse all Expand all
196196 case X86::VMOVSHDUPrm:
197197 DestName = getRegName(MI->getOperand(0).getReg());
198198 DecodeMOVSHDUPMask(MVT::v4f32, ShuffleMask);
199 break;
200
201 case X86::PSLLDQri:
202 case X86::VPSLLDQri:
203 Src1Name = getRegName(MI->getOperand(1).getReg());
204 DestName = getRegName(MI->getOperand(0).getReg());
205 if(MI->getOperand(MI->getNumOperands()-1).isImm())
206 DecodePSLLDQMask(MVT::v16i8,
207 MI->getOperand(MI->getNumOperands()-1).getImm(),
208 ShuffleMask);
209 break;
210
211 case X86::VPSLLDQYri:
212 Src1Name = getRegName(MI->getOperand(1).getReg());
213 DestName = getRegName(MI->getOperand(0).getReg());
214 if(MI->getOperand(MI->getNumOperands()-1).isImm())
215 DecodePSLLDQMask(MVT::v32i8,
216 MI->getOperand(MI->getNumOperands()-1).getImm(),
217 ShuffleMask);
218 break;
219
220 case X86::PSRLDQri:
221 case X86::VPSRLDQri:
222 Src1Name = getRegName(MI->getOperand(1).getReg());
223 DestName = getRegName(MI->getOperand(0).getReg());
224 if(MI->getOperand(MI->getNumOperands()-1).isImm())
225 DecodePSRLDQMask(MVT::v16i8,
226 MI->getOperand(MI->getNumOperands()-1).getImm(),
227 ShuffleMask);
228 break;
229
230 case X86::VPSRLDQYri:
231 Src1Name = getRegName(MI->getOperand(1).getReg());
232 DestName = getRegName(MI->getOperand(0).getReg());
233 if(MI->getOperand(MI->getNumOperands()-1).isImm())
234 DecodePSRLDQMask(MVT::v32i8,
235 MI->getOperand(MI->getNumOperands()-1).getImm(),
236 ShuffleMask);
199237 break;
200238
201239 case X86::PALIGNR128rr:
7676 ShuffleMask.push_back(2 * i + 1);
7777 ShuffleMask.push_back(2 * i + 1);
7878 }
79 }
80
81 void DecodePSLLDQMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask) {
82 unsigned VectorSizeInBits = VT.getSizeInBits();
83 unsigned NumElts = VectorSizeInBits / 8;
84 unsigned NumLanes = VectorSizeInBits / 128;
85 unsigned NumLaneElts = NumElts / NumLanes;
86
87 for (unsigned l = 0; l < NumElts; l += NumLaneElts)
88 for (unsigned i = 0; i < NumLaneElts; ++i) {
89 int M = SM_SentinelZero;
90 if (i >= Imm) M = i - Imm + l;
91 ShuffleMask.push_back(M);
92 }
93 }
94
95 void DecodePSRLDQMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask) {
96 unsigned VectorSizeInBits = VT.getSizeInBits();
97 unsigned NumElts = VectorSizeInBits / 8;
98 unsigned NumLanes = VectorSizeInBits / 128;
99 unsigned NumLaneElts = NumElts / NumLanes;
100
101 for (unsigned l = 0; l < NumElts; l += NumLaneElts)
102 for (unsigned i = 0; i < NumLaneElts; ++i) {
103 unsigned Base = i + Imm;
104 int M = Base + l;
105 if (Base >= NumLaneElts) M = SM_SentinelZero;
106 ShuffleMask.push_back(M);
107 }
79108 }
80109
81110 void DecodePALIGNRMask(MVT VT, unsigned Imm,
3838 void DecodeMOVSLDUPMask(MVT VT, SmallVectorImpl &ShuffleMask);
3939
4040 void DecodeMOVSHDUPMask(MVT VT, SmallVectorImpl &ShuffleMask);
41
42 void DecodePSLLDQMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask);
43
44 void DecodePSRLDQMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask);
4145
4246 void DecodePALIGNRMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask);
4347
454454 ret <4 x i32> %res
455455 }
456456 declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
457
458
459 define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
460 ; CHECK: vpslldq
461 %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
462 ret <2 x i64> %res
463 }
464 declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
465
466
467 define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
468 ; CHECK: vpslldq
469 %res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
470 ret <2 x i64> %res
471 }
457
458
459 define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
460 ; CHECK: vpslldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
461 %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
462 ret <2 x i64> %res
463 }
464 declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
465
466
467 define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
468 ; CHECK: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8]
469 %res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
470 ret <2 x i64> %res
471 }
472472 declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone
473473
474474
550550 ret <4 x i32> %res
551551 }
552552 declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
553
554
555 define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
556 ; CHECK: vpsrldq
557 %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
558 ret <2 x i64> %res
559 }
560 declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
561
562
563 define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
564 ; CHECK: vpsrldq
565 %res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
566 ret <2 x i64> %res
567 }
553
554
555 define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
556 ; CHECK: vpsrldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
557 %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
558 ret <2 x i64> %res
559 }
560 declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
561
562
563 define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
564 ; CHECK: vpsrldq {{.*#+}} xmm0 = xmm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero
565 %res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
566 ret <2 x i64> %res
567 }
568568 declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone
569569
570570
157157 ret <8 x i32> %res
158158 }
159159 declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) nounwind readnone
160
161
162 define <4 x i64> @test_x86_avx2_psll_dq(<4 x i64> %a0) {
163 ; CHECK: vpslldq
164 %res = call <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
165 ret <4 x i64> %res
166 }
167 declare <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64>, i32) nounwind readnone
168
169
170 define <4 x i64> @test_x86_avx2_psll_dq_bs(<4 x i64> %a0) {
171 ; CHECK: vpslldq
172 %res = call <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
173 ret <4 x i64> %res
174 }
160
161
162 define <4 x i64> @test_x86_avx2_psll_dq(<4 x i64> %a0) {
163 ; CHECK: vpslldq {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
164 %res = call <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
165 ret <4 x i64> %res
166 }
167 declare <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64>, i32) nounwind readnone
168
169
170 define <4 x i64> @test_x86_avx2_psll_dq_bs(<4 x i64> %a0) {
171 ; CHECK: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8],zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24]
172 %res = call <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
173 ret <4 x i64> %res
174 }
175175 declare <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64>, i32) nounwind readnone
176176
177177
253253 ret <8 x i32> %res
254254 }
255255 declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) nounwind readnone
256
257
258 define <4 x i64> @test_x86_avx2_psrl_dq(<4 x i64> %a0) {
259 ; CHECK: vpsrldq
260 %res = call <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
261 ret <4 x i64> %res
262 }
263 declare <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64>, i32) nounwind readnone
264
265
266 define <4 x i64> @test_x86_avx2_psrl_dq_bs(<4 x i64> %a0) {
267 ; CHECK: vpsrldq
268 %res = call <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
269 ret <4 x i64> %res
270 }
256
257
258 define <4 x i64> @test_x86_avx2_psrl_dq(<4 x i64> %a0) {
259 ; CHECK: vpsrldq {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
260 %res = call <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
261 ret <4 x i64> %res
262 }
263 declare <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64>, i32) nounwind readnone
264
265
266 define <4 x i64> @test_x86_avx2_psrl_dq_bs(<4 x i64> %a0) {
267 ; CHECK: vpsrldq {{.*#+}} ymm0 = ymm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,ymm0[23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero
268 %res = call <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
269 ret <4 x i64> %res
270 }
271271 declare <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64>, i32) nounwind readnone
272272
273273
407407 ret <4 x i32> %res
408408 }
409409 declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
410
411
412 define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
413 ; CHECK: pslldq
414 %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
415 ret <2 x i64> %res
416 }
417 declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
418
419
420 define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
421 ; CHECK: pslldq
422 %res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
423 ret <2 x i64> %res
424 }
410
411
412 define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
413 ; CHECK: pslldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
414 %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
415 ret <2 x i64> %res
416 }
417 declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
418
419
420 define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
421 ; CHECK: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8]
422 %res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
423 ret <2 x i64> %res
424 }
425425 declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone
426426
427427
503503 ret <4 x i32> %res
504504 }
505505 declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
506
507
508 define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
509 ; CHECK: psrldq
510 %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
511 ret <2 x i64> %res
512 }
513 declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
514
515
516 define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
517 ; CHECK: psrldq
518 %res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
519 ret <2 x i64> %res
520 }
506
507
508 define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
509 ; CHECK: psrldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
510 %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
511 ret <2 x i64> %res
512 }
513 declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
514
515
516 define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
517 ; CHECK: psrldq {{.*#+}} xmm0 = xmm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero
518 %res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
519 ret <2 x i64> %res
520 }
521521 declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone
522522
523523
13961396
13971397 define <8 x i16> @shuffle_v8i16_z8zzzzzz(i16 %i) {
13981398 ; SSE-LABEL: shuffle_v8i16_z8zzzzzz:
1399 ; SSE: # BB#0:
1400 ; SSE-NEXT: movzwl %di, %eax
1401 ; SSE-NEXT: movd %eax, %xmm0
1402 ; SSE-NEXT: pslldq $2, %xmm0
1403 ; SSE-NEXT: retq
1404 ;
1405 ; AVX-LABEL: shuffle_v8i16_z8zzzzzz:
1406 ; AVX: # BB#0:
1407 ; AVX-NEXT: movzwl %di, %eax
1408 ; AVX-NEXT: vmovd %eax, %xmm0
1409 ; AVX-NEXT: vpslldq $2, %xmm0, %xmm0
1410 ; AVX-NEXT: retq
1411 %a = insertelement <8 x i16> undef, i16 %i, i32 0
1412 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32>
1399 ; SSE: # BB#0:
1400 ; SSE-NEXT: movzwl %di, %eax
1401 ; SSE-NEXT: movd %eax, %xmm0
1402 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1403 ; SSE-NEXT: retq
1404 ;
1405 ; AVX-LABEL: shuffle_v8i16_z8zzzzzz:
1406 ; AVX: # BB#0:
1407 ; AVX-NEXT: movzwl %di, %eax
1408 ; AVX-NEXT: vmovd %eax, %xmm0
1409 ; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1410 ; AVX-NEXT: retq
1411 %a = insertelement <8 x i16> undef, i16 %i, i32 0
1412 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32>
14131413 ret <8 x i16> %shuffle
14141414 }
14151415
14161416 define <8 x i16> @shuffle_v8i16_zzzzz8zz(i16 %i) {
14171417 ; SSE-LABEL: shuffle_v8i16_zzzzz8zz:
1418 ; SSE: # BB#0:
1419 ; SSE-NEXT: movzwl %di, %eax
1420 ; SSE-NEXT: movd %eax, %xmm0
1421 ; SSE-NEXT: pslldq $10, %xmm0
1422 ; SSE-NEXT: retq
1423 ;
1424 ; AVX-LABEL: shuffle_v8i16_zzzzz8zz:
1425 ; AVX: # BB#0:
1426 ; AVX-NEXT: movzwl %di, %eax
1427 ; AVX-NEXT: vmovd %eax, %xmm0
1428 ; AVX-NEXT: vpslldq $10, %xmm0, %xmm0
1429 ; AVX-NEXT: retq
1430 %a = insertelement <8 x i16> undef, i16 %i, i32 0
1431 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32>
1418 ; SSE: # BB#0:
1419 ; SSE-NEXT: movzwl %di, %eax
1420 ; SSE-NEXT: movd %eax, %xmm0
1421 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
1422 ; SSE-NEXT: retq
1423 ;
1424 ; AVX-LABEL: shuffle_v8i16_zzzzz8zz:
1425 ; AVX: # BB#0:
1426 ; AVX-NEXT: movzwl %di, %eax
1427 ; AVX-NEXT: vmovd %eax, %xmm0
1428 ; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
1429 ; AVX-NEXT: retq
1430 %a = insertelement <8 x i16> undef, i16 %i, i32 0
1431 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32>
14321432 ret <8 x i16> %shuffle
14331433 }
14341434
14351435 define <8 x i16> @shuffle_v8i16_zuuzuuz8(i16 %i) {
14361436 ; SSE-LABEL: shuffle_v8i16_zuuzuuz8:
1437 ; SSE: # BB#0:
1438 ; SSE-NEXT: movzwl %di, %eax
1439 ; SSE-NEXT: movd %eax, %xmm0
1440 ; SSE-NEXT: pslldq $14, %xmm0
1441 ; SSE-NEXT: retq
1442 ;
1443 ; AVX-LABEL: shuffle_v8i16_zuuzuuz8:
1444 ; AVX: # BB#0:
1445 ; AVX-NEXT: movzwl %di, %eax
1446 ; AVX-NEXT: vmovd %eax, %xmm0
1447 ; AVX-NEXT: vpslldq $14, %xmm0, %xmm0
1448 ; AVX-NEXT: retq
1449 %a = insertelement <8 x i16> undef, i16 %i, i32 0
1450 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32>
1437 ; SSE: # BB#0:
1438 ; SSE-NEXT: movzwl %di, %eax
1439 ; SSE-NEXT: movd %eax, %xmm0
1440 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1]
1441 ; SSE-NEXT: retq
1442 ;
1443 ; AVX-LABEL: shuffle_v8i16_zuuzuuz8:
1444 ; AVX: # BB#0:
1445 ; AVX-NEXT: movzwl %di, %eax
1446 ; AVX-NEXT: vmovd %eax, %xmm0
1447 ; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1]
1448 ; AVX-NEXT: retq
1449 %a = insertelement <8 x i16> undef, i16 %i, i32 0
1450 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32>
14511451 ret <8 x i16> %shuffle
14521452 }
14531453
14541454 define <8 x i16> @shuffle_v8i16_zzBzzzzz(i16 %i) {
14551455 ; SSE-LABEL: shuffle_v8i16_zzBzzzzz:
1456 ; SSE: # BB#0:
1457 ; SSE-NEXT: movzwl %di, %eax
1458 ; SSE-NEXT: movd %eax, %xmm0
1459 ; SSE-NEXT: pslldq $4, %xmm0
1460 ; SSE-NEXT: retq
1461 ;
1462 ; AVX-LABEL: shuffle_v8i16_zzBzzzzz:
1463 ; AVX: # BB#0:
1464 ; AVX-NEXT: movzwl %di, %eax
1465 ; AVX-NEXT: vmovd %eax, %xmm0
1466 ; AVX-NEXT: vpslldq $4, %xmm0, %xmm0
1467 ; AVX-NEXT: retq
1468 %a = insertelement <8 x i16> undef, i16 %i, i32 3
1469 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32>
1456 ; SSE: # BB#0:
1457 ; SSE-NEXT: movzwl %di, %eax
1458 ; SSE-NEXT: movd %eax, %xmm0
1459 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
1460 ; SSE-NEXT: retq
1461 ;
1462 ; AVX-LABEL: shuffle_v8i16_zzBzzzzz:
1463 ; AVX: # BB#0:
1464 ; AVX-NEXT: movzwl %di, %eax
1465 ; AVX-NEXT: vmovd %eax, %xmm0
1466 ; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
1467 ; AVX-NEXT: retq
1468 %a = insertelement <8 x i16> undef, i16 %i, i32 3
1469 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32>
14701470 ret <8 x i16> %shuffle
14711471 }
14721472