llvm.org GIT mirror llvm / 5dff218
[X86][AVX512] Added AVX512 SHUFP*/VPERMILP* shuffle decode comments. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@253396 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 4 years ago
5 changed file(s) with 127 addition(s) and 194 deletion(s). Raw diff Collapse all Expand all
154154 CASE_AVX_INS_COMMON(Inst, Y, r##src) \
155155 CASE_SSE_INS_COMMON(Inst, r##src) \
156156
157 #define CASE_SHUF(Inst, src) \
158 CASE_MASK_INS_COMMON(Inst, Z, r##src##i) \
159 CASE_MASK_INS_COMMON(Inst, Z256, r##src##i) \
160 CASE_MASK_INS_COMMON(Inst, Z128, r##src##i) \
161 CASE_AVX_INS_COMMON(Inst, , r##src##i) \
162 CASE_AVX_INS_COMMON(Inst, Y, r##src##i) \
163 CASE_SSE_INS_COMMON(Inst, r##src##i) \
164
165 #define CASE_VPERM(Inst, src) \
166 CASE_MASK_INS_COMMON(Inst, Z, src##i) \
167 CASE_MASK_INS_COMMON(Inst, Z256, src##i) \
168 CASE_MASK_INS_COMMON(Inst, Z128, src##i) \
169 CASE_AVX_INS_COMMON(Inst, , src##i) \
170 CASE_AVX_INS_COMMON(Inst, Y, src##i) \
171
157172 #define CASE_VSHUF(Inst, src) \
158173 CASE_MASK_INS_COMMON(SHUFF##Inst, Z, r##src##i) \
159174 CASE_MASK_INS_COMMON(SHUFI##Inst, Z, r##src##i) \
514529 DecodeUNPCKLMask(getRegOperandVectorVT(MI, MVT::i64, 0), ShuffleMask);
515530 break;
516531
517 case X86::SHUFPDrri:
518 case X86::VSHUFPDrri:
519 case X86::VSHUFPDYrri:
520 Src2Name = getRegName(MI->getOperand(2).getReg());
521 // FALL THROUGH.
522 case X86::SHUFPDrmi:
523 case X86::VSHUFPDrmi:
524 case X86::VSHUFPDYrmi:
532 CASE_SHUF(SHUFPD, r)
533 Src2Name = getRegName(MI->getOperand(2).getReg());
534 // FALL THROUGH.
535 CASE_SHUF(SHUFPD, m)
525536 if (MI->getOperand(MI->getNumOperands() - 1).isImm())
526537 DecodeSHUFPMask(getRegOperandVectorVT(MI, MVT::f64, 0),
527538 MI->getOperand(MI->getNumOperands() - 1).getImm(),
530541 DestName = getRegName(MI->getOperand(0).getReg());
531542 break;
532543
533 case X86::SHUFPSrri:
534 case X86::VSHUFPSrri:
535 case X86::VSHUFPSYrri:
536 Src2Name = getRegName(MI->getOperand(2).getReg());
537 // FALL THROUGH.
538 case X86::SHUFPSrmi:
539 case X86::VSHUFPSrmi:
540 case X86::VSHUFPSYrmi:
544 CASE_SHUF(SHUFPS, r)
545 Src2Name = getRegName(MI->getOperand(2).getReg());
546 // FALL THROUGH.
547 CASE_SHUF(SHUFPS, m)
541548 if (MI->getOperand(MI->getNumOperands() - 1).isImm())
542549 DecodeSHUFPMask(getRegOperandVectorVT(MI, MVT::f32, 0),
543550 MI->getOperand(MI->getNumOperands() - 1).getImm(),
604611 DestName = getRegName(MI->getOperand(0).getReg());
605612 break;
606613
607 case X86::VPERMILPSri:
608 case X86::VPERMILPSYri:
609 Src1Name = getRegName(MI->getOperand(1).getReg());
610 // FALL THROUGH.
611 case X86::VPERMILPSmi:
612 case X86::VPERMILPSYmi:
614 CASE_VPERM(PERMILPS, r)
615 Src1Name = getRegName(MI->getOperand(1).getReg());
616 // FALL THROUGH.
617 CASE_VPERM(PERMILPS, m)
613618 if (MI->getOperand(MI->getNumOperands() - 1).isImm())
614619 DecodePSHUFMask(getRegOperandVectorVT(MI, MVT::f32, 0),
615620 MI->getOperand(MI->getNumOperands() - 1).getImm(),
617622 DestName = getRegName(MI->getOperand(0).getReg());
618623 break;
619624
620 case X86::VPERMILPDri:
621 case X86::VPERMILPDYri:
622 Src1Name = getRegName(MI->getOperand(1).getReg());
623 // FALL THROUGH.
624 case X86::VPERMILPDmi:
625 case X86::VPERMILPDYmi:
625 CASE_VPERM(PERMILPD, r)
626 Src1Name = getRegName(MI->getOperand(1).getReg());
627 // FALL THROUGH.
628 CASE_VPERM(PERMILPD, m)
626629 if (MI->getOperand(MI->getNumOperands() - 1).isImm())
627630 DecodePSHUFMask(getRegOperandVectorVT(MI, MVT::f64, 0),
628631 MI->getOperand(MI->getNumOperands() - 1).getImm(),
43984398 ; CHECK-NEXT: movzbl %dil, %eax
43994399 ; CHECK-NEXT: kmovw %eax, %k1
44004400 ; CHECK-NEXT: vshufpd $22, %zmm1, %zmm0, %zmm2 {%k1}
4401 ; CHECK-NEXT: ## zmm2 = zmm2[0],k1[1],zmm2[3],k1[2],zmm2[5],k1[4],zmm2[6],k1[6]
44014402 ; CHECK-NEXT: vshufpd $22, %zmm1, %zmm0, %zmm3 {%k1} {z}
4403 ; CHECK-NEXT: ## zmm3 = k1[0],zmm0[1],k1[3],zmm0[2],k1[5],zmm0[4],k1[6],zmm0[6]
44024404 ; CHECK-NEXT: vshufpd $22, %zmm1, %zmm0, %zmm0
4405 ; CHECK-NEXT: ## zmm0 = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6]
44034406 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
44044407 ; CHECK-NEXT: vaddpd %zmm3, %zmm0, %zmm0
44054408 ; CHECK-NEXT: retq
44194422 ; CHECK: ## BB#0:
44204423 ; CHECK-NEXT: kmovw %edi, %k1
44214424 ; CHECK-NEXT: vshufps $22, %zmm1, %zmm0, %zmm2 {%k1}
4425 ; CHECK-NEXT: ## zmm2 = zmm2[2,1],k1[1,0],zmm2[6,5],k1[5,4],zmm2[10,9],k1[9,8],zmm2[14,13],k1[13,12]
44224426 ; CHECK-NEXT: vshufps $22, %zmm1, %zmm0, %zmm0
4427 ; CHECK-NEXT: ## zmm0 = zmm0[2,1],zmm1[1,0],zmm0[6,5],zmm1[5,4],zmm0[10,9],zmm1[9,8],zmm0[14,13],zmm1[13,12]
44234428 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
44244429 ; CHECK-NEXT: retq
44254430 %res = call <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 %x4)
4426 %res1 = call <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 -1)
4431 %res1 = call <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 -1)
44274432 %res2 = fadd <16 x float> %res, %res1
44284433 ret <16 x float> %res2
44294434 }
44344439 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_512:
44354440 ; CHECK: ## BB#0:
44364441 ; CHECK-NEXT: movzbl %dil, %eax
4437 ; CHECK-NEXT: kmovw %eax, %k1
4438 ; CHECK-NEXT: vpermilpd $22, %zmm0, %zmm1 {%k1}
4439 ; CHECK-NEXT: vpermilpd $22, %zmm0, %zmm2 {%k1} {z}
4440 ; CHECK-NEXT: vpermilpd $22, %zmm0, %zmm0
4441 ; CHECK-NEXT: vaddpd %zmm2, %zmm1, %zmm1
4442 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
4443 ; CHECK-NEXT: retq
4442 ; CHECK-NEXT: kmovw %eax, %k1
4443 ; CHECK-NEXT: vpermilpd $22, %zmm0, %zmm1 {%k1}
4444 ; CHECK-NEXT: ## zmm1 = zmm1[0,1,3,2,5,4,6,6]
4445 ; CHECK-NEXT: vpermilpd $22, %zmm0, %zmm2 {%k1} {z}
4446 ; CHECK-NEXT: ## zmm2 = k1[0,1,3,2,5,4,6,6]
4447 ; CHECK-NEXT: vpermilpd $22, %zmm0, %zmm0
4448 ; CHECK-NEXT: ## zmm0 = zmm0[0,1,3,2,5,4,6,6]
4449 ; CHECK-NEXT: vaddpd %zmm2, %zmm1, %zmm1
4450 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
4451 ; CHECK-NEXT: retq
44444452 %res = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> %x2, i8 %x3)
44454453 %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> zeroinitializer, i8 %x3)
44464454 %res2 = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> %x2, i8 -1)
44544462 define <16 x float>@test_int_x86_avx512_mask_vpermil_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) {
44554463 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_512:
44564464 ; CHECK: ## BB#0:
4457 ; CHECK-NEXT: kmovw %edi, %k1
4458 ; CHECK-NEXT: vpermilps $22, %zmm0, %zmm1 {%k1}
4459 ; CHECK-NEXT: vpermilps $22, %zmm0, %zmm2 {%k1} {z}
4460 ; CHECK-NEXT: vpermilps $22, %zmm0, %zmm0
4461 ; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm1
4462 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
4463 ; CHECK-NEXT: retq
4465 ; CHECK-NEXT: kmovw %edi, %k1
4466 ; CHECK-NEXT: vpermilps $22, %zmm0, %zmm1 {%k1}
4467 ; CHECK-NEXT: ## zmm1 = zmm1[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
4468 ; CHECK-NEXT: vpermilps $22, %zmm0, %zmm2 {%k1} {z}
4469 ; CHECK-NEXT: ## zmm2 = k1[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
4470 ; CHECK-NEXT: vpermilps $22, %zmm0, %zmm0
4471 ; CHECK-NEXT: ## zmm0 = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
4472 ; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm1
4473 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
4474 ; CHECK-NEXT: retq
44644475 %res = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 %x3)
44654476 %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> zeroinitializer, i16 %x3)
44664477 %res2 = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 -1)
47254725 ; CHECK-NEXT: movzbl %dil, %eax
47264726 ; CHECK-NEXT: kmovw %eax, %k1
47274727 ; CHECK-NEXT: vshufpd $22, %xmm1, %xmm0, %xmm2 {%k1}
4728 ; CHECK-NEXT: ## xmm2 = xmm2[0],k1[1]
47284729 ; CHECK-NEXT: vshufpd $22, %xmm1, %xmm0, %xmm3 {%k1} {z}
4730 ; CHECK-NEXT: ## xmm3 = k1[0],xmm0[1]
47294731 ; CHECK-NEXT: vshufpd $22, %xmm1, %xmm0, %xmm0
4730 ; CHECK: vaddpd %xmm0, %xmm2, %xmm0
4732 ; CHECK-NEXT: ## xmm0 = xmm0[0],xmm1[1]
4733 ; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0
47314734 ; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0
47324735 ; CHECK-NEXT: retq
47334736 %res = call <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double> %x0, <2 x double> %x1, i32 22, <2 x double> %x3, i8 %x4)
47464749 ; CHECK-NEXT: movzbl %dil, %eax
47474750 ; CHECK-NEXT: kmovw %eax, %k1
47484751 ; CHECK-NEXT: vshufpd $22, %ymm1, %ymm0, %ymm2 {%k1}
4752 ; CHECK-NEXT: ## ymm2 = ymm2[0],k1[1],ymm2[3],k1[2]
47494753 ; CHECK-NEXT: vshufpd $22, %ymm1, %ymm0, %ymm0
4750 ; CHECK: vaddpd %ymm0, %ymm2, %ymm0
4754 ; CHECK-NEXT: ## ymm0 = ymm0[0],ymm1[1],ymm0[3],ymm1[2]
4755 ; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0
47514756 ; CHECK-NEXT: retq
47524757 %res = call <4 x double> @llvm.x86.avx512.mask.shuf.pd.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> %x3, i8 %x4)
47534758 %res1 = call <4 x double> @llvm.x86.avx512.mask.shuf.pd.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> %x3, i8 -1)
47634768 ; CHECK-NEXT: movzbl %dil, %eax
47644769 ; CHECK-NEXT: kmovw %eax, %k1
47654770 ; CHECK-NEXT: vshufps $22, %xmm1, %xmm0, %xmm2 {%k1}
4771 ; CHECK-NEXT: ## xmm2 = xmm2[2,1],k1[1,0]
47664772 ; CHECK-NEXT: vshufps $22, %xmm1, %xmm0, %xmm0
4767 ; CHECK: vaddps %xmm0, %xmm2, %xmm0
4773 ; CHECK-NEXT: ## xmm0 = xmm0[2,1],xmm1[1,0]
4774 ; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0
47684775 ; CHECK-NEXT: retq
47694776 %res = call <4 x float> @llvm.x86.avx512.mask.shuf.ps.128(<4 x float> %x0, <4 x float> %x1, i32 22, <4 x float> %x3, i8 %x4)
47704777 %res1 = call <4 x float> @llvm.x86.avx512.mask.shuf.ps.128(<4 x float> %x0, <4 x float> %x1, i32 22, <4 x float> %x3, i8 -1)
47804787 ; CHECK-NEXT: movzbl %dil, %eax
47814788 ; CHECK-NEXT: kmovw %eax, %k1
47824789 ; CHECK-NEXT: vshufps $22, %ymm1, %ymm0, %ymm2 {%k1}
4790 ; CHECK-NEXT: ## ymm2 = ymm2[2,1],k1[1,0],ymm2[6,5],k1[5,4]
47834791 ; CHECK-NEXT: vshufps $22, %ymm1, %ymm0, %ymm0
4784 ; CHECK: vaddps %ymm0, %ymm2, %ymm0
4792 ; CHECK-NEXT: ## ymm0 = ymm0[2,1],ymm1[1,0],ymm0[6,5],ymm1[5,4]
4793 ; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0
47854794 ; CHECK-NEXT: retq
47864795 %res = call <8 x float> @llvm.x86.avx512.mask.shuf.ps.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 %x4)
47874796 %res1 = call <8 x float> @llvm.x86.avx512.mask.shuf.ps.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 -1)
48694878 ; CHECK-NEXT: movzbl %dil, %eax
48704879 ; CHECK-NEXT: kmovw %eax, %k1
48714880 ; CHECK-NEXT: vpermilpd $22, %ymm0, %ymm1 {%k1}
4881 ; CHECK-NEXT: ## ymm1 = ymm1[0,1,3,2]
48724882 ; CHECK-NEXT: vpermilpd $22, %ymm0, %ymm2 {%k1} {z}
4883 ; CHECK-NEXT: ## ymm2 = k1[0,1,3,2]
48734884 ; CHECK-NEXT: vpermilpd $22, %ymm0, %ymm0
4885 ; CHECK-NEXT: ## ymm0 = ymm0[0,1,3,2]
48744886 ; CHECK-NEXT: vaddpd %ymm2, %ymm1, %ymm1
48754887 ; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
48764888 ; CHECK-NEXT: retq
48904902 ; CHECK-NEXT: movzbl %dil, %eax
48914903 ; CHECK-NEXT: kmovw %eax, %k1
48924904 ; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm1 {%k1}
4905 ; CHECK-NEXT: ## xmm1 = xmm1[1,0]
48934906 ; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm2 {%k1} {z}
4907 ; CHECK-NEXT: ## xmm2 = k1[1,0]
48944908 ; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm0
4909 ; CHECK-NEXT: ## xmm0 = xmm0[1,0]
48954910 ; CHECK-NEXT: vaddpd %xmm2, %xmm1, %xmm1
48964911 ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
48974912 ; CHECK-NEXT: retq
49114926 ; CHECK-NEXT: movzbl %dil, %eax
49124927 ; CHECK-NEXT: kmovw %eax, %k1
49134928 ; CHECK-NEXT: vpermilps $22, %ymm0, %ymm1 {%k1}
4929 ; CHECK-NEXT: ## ymm1 = ymm1[2,1,1,0,6,5,5,4]
49144930 ; CHECK-NEXT: vpermilps $22, %ymm0, %ymm2 {%k1} {z}
4931 ; CHECK-NEXT: ## ymm2 = k1[2,1,1,0,6,5,5,4]
49154932 ; CHECK-NEXT: vpermilps $22, %ymm0, %ymm0
4933 ; CHECK-NEXT: ## ymm0 = ymm0[2,1,1,0,6,5,5,4]
49164934 ; CHECK-NEXT: vaddps %ymm2, %ymm1, %ymm1
49174935 ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
49184936 ; CHECK-NEXT: retq
49324950 ; CHECK-NEXT: movzbl %dil, %eax
49334951 ; CHECK-NEXT: kmovw %eax, %k1
49344952 ; CHECK-NEXT: vpermilps $22, %xmm0, %xmm1 {%k1}
4953 ; CHECK-NEXT: ## xmm1 = xmm1[2,1,1,0]
49354954 ; CHECK-NEXT: vpermilps $22, %xmm0, %xmm2 {%k1} {z}
4955 ; CHECK-NEXT: ## xmm2 = k1[2,1,1,0]
49364956 ; CHECK-NEXT: vpermilps $22, %xmm0, %xmm0
4957 ; CHECK-NEXT: ## xmm0 = xmm0[2,1,1,0]
49374958 ; CHECK-NEXT: vaddps %xmm2, %xmm1, %xmm1
49384959 ; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0
49394960 ; CHECK-NEXT: retq
141141 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
142142 ; SSE-NEXT: retq
143143 ;
144 ; AVX1-LABEL: shuffle_v2f64_10:
145 ; AVX1: # BB#0:
146 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
147 ; AVX1-NEXT: retq
148 ;
149 ; AVX2-LABEL: shuffle_v2f64_10:
150 ; AVX2: # BB#0:
151 ; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
152 ; AVX2-NEXT: retq
153 ;
154 ; AVX512VL-LABEL: shuffle_v2f64_10:
155 ; AVX512VL: # BB#0:
156 ; AVX512VL-NEXT: vpermilpd $1, %xmm0, %xmm0
157 ; AVX512VL-NEXT: retq
144 ; AVX-LABEL: shuffle_v2f64_10:
145 ; AVX: # BB#0:
146 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
147 ; AVX-NEXT: retq
158148
159149 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32>
160150 ret <2 x double> %shuffle
218208 ; SSE-NEXT: movapd %xmm1, %xmm0
219209 ; SSE-NEXT: retq
220210 ;
221 ; AVX1-LABEL: shuffle_v2f64_32:
222 ; AVX1: # BB#0:
223 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0]
224 ; AVX1-NEXT: retq
225 ;
226 ; AVX2-LABEL: shuffle_v2f64_32:
227 ; AVX2: # BB#0:
228 ; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0]
229 ; AVX2-NEXT: retq
230 ;
231 ; AVX512VL-LABEL: shuffle_v2f64_32:
232 ; AVX512VL: # BB#0:
233 ; AVX512VL-NEXT: vpermilpd $1, %xmm1, %xmm0
234 ; AVX512VL-NEXT: retq
211 ; AVX-LABEL: shuffle_v2f64_32:
212 ; AVX: # BB#0:
213 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0]
214 ; AVX-NEXT: retq
235215
236216 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32>
237217 ret <2 x double> %shuffle
954934 ; AVX512VL-LABEL: shuffle_v2f64_bitcast_1z:
955935 ; AVX512VL: # BB#0:
956936 ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
957 ; AVX512VL-NEXT: vshufpd $1, %xmm1, %xmm0, %xmm0
937 ; AVX512VL-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
958938 ; AVX512VL-NEXT: retq
959939 %shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32>
960940 %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float>
14361416 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
14371417 ; SSE-NEXT: retq
14381418 ;
1439 ; AVX1-LABEL: shuffle_mem_v2f64_10:
1440 ; AVX1: # BB#0:
1441 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0]
1442 ; AVX1-NEXT: retq
1443 ;
1444 ; AVX2-LABEL: shuffle_mem_v2f64_10:
1445 ; AVX2: # BB#0:
1446 ; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0]
1447 ; AVX2-NEXT: retq
1448 ;
1449 ; AVX512VL-LABEL: shuffle_mem_v2f64_10:
1450 ; AVX512VL: # BB#0:
1451 ; AVX512VL-NEXT: vpermilpd $1, (%rdi), %xmm0
1452 ; AVX512VL-NEXT: retq
1419 ; AVX-LABEL: shuffle_mem_v2f64_10:
1420 ; AVX: # BB#0:
1421 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0]
1422 ; AVX-NEXT: retq
14531423
14541424 %a = load <2 x double>, <2 x double>* %ptr
14551425 %shuffle = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32>
169169 }
170170
171171 define <4 x double> @shuffle_v4f64_0023(<4 x double> %a, <4 x double> %b) {
172 ; AVX1-LABEL: shuffle_v4f64_0023:
173 ; AVX1: # BB#0:
174 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,3]
175 ; AVX1-NEXT: retq
176 ;
177 ; AVX2-LABEL: shuffle_v4f64_0023:
178 ; AVX2: # BB#0:
179 ; AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,3]
180 ; AVX2-NEXT: retq
181 ;
182 ; AVX512VL-LABEL: shuffle_v4f64_0023:
183 ; AVX512VL: # BB#0:
184 ; AVX512VL-NEXT: vpermilpd $8, %ymm0, %ymm0
185 ; AVX512VL-NEXT: retq
172 ; ALL-LABEL: shuffle_v4f64_0023:
173 ; ALL: # BB#0:
174 ; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,3]
175 ; ALL-NEXT: retq
186176
187177 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32>
188178 ret <4 x double> %shuffle
198188 }
199189
200190 define <4 x double> @shuffle_v4f64_1032(<4 x double> %a, <4 x double> %b) {
201 ; AVX1-LABEL: shuffle_v4f64_1032:
202 ; AVX1: # BB#0:
203 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
204 ; AVX1-NEXT: retq
205 ;
206 ; AVX2-LABEL: shuffle_v4f64_1032:
207 ; AVX2: # BB#0:
208 ; AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
209 ; AVX2-NEXT: retq
210 ;
211 ; AVX512VL-LABEL: shuffle_v4f64_1032:
212 ; AVX512VL: # BB#0:
213 ; AVX512VL-NEXT: vpermilpd $5, %ymm0, %ymm0
214 ; AVX512VL-NEXT: retq
191 ; ALL-LABEL: shuffle_v4f64_1032:
192 ; ALL: # BB#0:
193 ; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
194 ; ALL-NEXT: retq
215195 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32>
216196 ret <4 x double> %shuffle
217197 }
218198
219199 define <4 x double> @shuffle_v4f64_1133(<4 x double> %a, <4 x double> %b) {
220 ; AVX1-LABEL: shuffle_v4f64_1133:
221 ; AVX1: # BB#0:
222 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,3]
223 ; AVX1-NEXT: retq
224 ;
225 ; AVX2-LABEL: shuffle_v4f64_1133:
226 ; AVX2: # BB#0:
227 ; AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,3]
228 ; AVX2-NEXT: retq
229 ;
230 ; AVX512VL-LABEL: shuffle_v4f64_1133:
231 ; AVX512VL: # BB#0:
232 ; AVX512VL-NEXT: vpermilpd $15, %ymm0, %ymm0
233 ; AVX512VL-NEXT: retq
200 ; ALL-LABEL: shuffle_v4f64_1133:
201 ; ALL: # BB#0:
202 ; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,3]
203 ; ALL-NEXT: retq
234204 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32>
235205 ret <4 x double> %shuffle
236206 }
237207
238208 define <4 x double> @shuffle_v4f64_1023(<4 x double> %a, <4 x double> %b) {
239 ; AVX1-LABEL: shuffle_v4f64_1023:
240 ; AVX1: # BB#0:
241 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3]
242 ; AVX1-NEXT: retq
243 ;
244 ; AVX2-LABEL: shuffle_v4f64_1023:
245 ; AVX2: # BB#0:
246 ; AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3]
247 ; AVX2-NEXT: retq
248 ;
249 ; AVX512VL-LABEL: shuffle_v4f64_1023:
250 ; AVX512VL: # BB#0:
251 ; AVX512VL-NEXT: vpermilpd $9, %ymm0, %ymm0
252 ; AVX512VL-NEXT: retq
209 ; ALL-LABEL: shuffle_v4f64_1023:
210 ; ALL: # BB#0:
211 ; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3]
212 ; ALL-NEXT: retq
253213 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32>
254214 ret <4 x double> %shuffle
255215 }
256216
257217 define <4 x double> @shuffle_v4f64_1022(<4 x double> %a, <4 x double> %b) {
258 ; AVX1-LABEL: shuffle_v4f64_1022:
259 ; AVX1: # BB#0:
260 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,2]
261 ; AVX1-NEXT: retq
262 ;
263 ; AVX2-LABEL: shuffle_v4f64_1022:
264 ; AVX2: # BB#0:
265 ; AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,2]
266 ; AVX2-NEXT: retq
267 ;
268 ; AVX512VL-LABEL: shuffle_v4f64_1022:
269 ; AVX512VL: # BB#0:
270 ; AVX512VL-NEXT: vpermilpd $1, %ymm0, %ymm0
271 ; AVX512VL-NEXT: retq
218 ; ALL-LABEL: shuffle_v4f64_1022:
219 ; ALL: # BB#0:
220 ; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,2]
221 ; ALL-NEXT: retq
272222 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32>
273223 ret <4 x double> %shuffle
274224 }
343293 }
344294
345295 define <4 x double> @shuffle_v4f64_5163(<4 x double> %a, <4 x double> %b) {
346 ; AVX1-LABEL: shuffle_v4f64_5163:
347 ; AVX1: # BB#0:
348 ; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[2],ymm0[3]
349 ; AVX1-NEXT: retq
350 ;
351 ; AVX2-LABEL: shuffle_v4f64_5163:
352 ; AVX2: # BB#0:
353 ; AVX2-NEXT: vshufpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[2],ymm0[3]
354 ; AVX2-NEXT: retq
355 ;
356 ; AVX512VL-LABEL: shuffle_v4f64_5163:
357 ; AVX512VL: # BB#0:
358 ; AVX512VL-NEXT: vshufpd $11, %ymm0, %ymm1, %ymm0
359 ; AVX512VL-NEXT: retq
296 ; ALL-LABEL: shuffle_v4f64_5163:
297 ; ALL: # BB#0:
298 ; ALL-NEXT: vshufpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[2],ymm0[3]
299 ; ALL-NEXT: retq
360300 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32>
361301 ret <4 x double> %shuffle
362302 }
442382 ; AVX512VL-LABEL: shuffle_v4f64_1054:
443383 ; AVX512VL: # BB#0:
444384 ; AVX512VL-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0
445 ; AVX512VL-NEXT: vpermilpd $5, %ymm0, %ymm0
385 ; AVX512VL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
446386 ; AVX512VL-NEXT: retq
447387 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32>
448388 ret <4 x double> %shuffle
464404 ; AVX512VL-LABEL: shuffle_v4f64_3254:
465405 ; AVX512VL: # BB#0:
466406 ; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
467 ; AVX512VL-NEXT: vpermilpd $5, %ymm0, %ymm0
407 ; AVX512VL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
468408 ; AVX512VL-NEXT: retq
469409 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32>
470410 ret <4 x double> %shuffle
486426 ; AVX512VL-LABEL: shuffle_v4f64_3276:
487427 ; AVX512VL: # BB#0:
488428 ; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
489 ; AVX512VL-NEXT: vpermilpd $5, %ymm0, %ymm0
429 ; AVX512VL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
490430 ; AVX512VL-NEXT: retq
491431 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32>
492432 ret <4 x double> %shuffle
493433 }
494434
495435 define <4 x double> @shuffle_v4f64_1076(<4 x double> %a, <4 x double> %b) {
496 ; AVX1-LABEL: shuffle_v4f64_1076:
497 ; AVX1: # BB#0:
498 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
499 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
500 ; AVX1-NEXT: retq
501 ;
502 ; AVX2-LABEL: shuffle_v4f64_1076:
503 ; AVX2: # BB#0:
504 ; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
505 ; AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
506 ; AVX2-NEXT: retq
507 ;
508 ; AVX512VL-LABEL: shuffle_v4f64_1076:
509 ; AVX512VL: # BB#0:
510 ; AVX512VL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
511 ; AVX512VL-NEXT: vpermilpd $5, %ymm0, %ymm0
512 ; AVX512VL-NEXT: retq
436 ; ALL-LABEL: shuffle_v4f64_1076:
437 ; ALL: # BB#0:
438 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
439 ; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
440 ; ALL-NEXT: retq
513441 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32>
514442 ret <4 x double> %shuffle
515443 }