llvm.org GIT mirror llvm / 2f0ee90
[X86][AVX512] Add support for VPERM/VSHUF masked shuffle comments git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@274462 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 4 years ago
7 changed file(s) with 122 addition(s) and 66 deletion(s). Raw diff Collapse all Expand all
8989 CASE_AVX_INS_COMMON(Inst, Y, suf) \
9090 CASE_SSE_INS_COMMON(Inst, suf)
9191
92 #define CASE_MASK_SHUF(Inst, src) \
93 CASE_MASK_INS_COMMON(Inst, Z, r##src##i) \
94 CASE_MASK_INS_COMMON(Inst, Z256, r##src##i) \
95 CASE_MASK_INS_COMMON(Inst, Z128, r##src##i)
96
97 #define CASE_MASKZ_SHUF(Inst, src) \
98 CASE_MASKZ_INS_COMMON(Inst, Z, r##src##i) \
99 CASE_MASKZ_INS_COMMON(Inst, Z256, r##src##i) \
100 CASE_MASKZ_INS_COMMON(Inst, Z128, r##src##i)
101
92102 #define CASE_VPERM(Inst, src) \
93103 CASE_AVX512_INS_COMMON(Inst, Z, src##i) \
94104 CASE_AVX512_INS_COMMON(Inst, Z256, src##i) \
96106 CASE_AVX_INS_COMMON(Inst, , src##i) \
97107 CASE_AVX_INS_COMMON(Inst, Y, src##i)
98108
109 #define CASE_MASK_VPERM(Inst, src) \
110 CASE_MASK_INS_COMMON(Inst, Z, src##i) \
111 CASE_MASK_INS_COMMON(Inst, Z256, src##i) \
112 CASE_MASK_INS_COMMON(Inst, Z128, src##i)
113
114 #define CASE_MASKZ_VPERM(Inst, src) \
115 CASE_MASKZ_INS_COMMON(Inst, Z, src##i) \
116 CASE_MASKZ_INS_COMMON(Inst, Z256, src##i) \
117 CASE_MASKZ_INS_COMMON(Inst, Z128, src##i)
118
99119 #define CASE_VSHUF(Inst, src) \
100120 CASE_AVX512_INS_COMMON(SHUFF##Inst, Z, r##src##i) \
101121 CASE_AVX512_INS_COMMON(SHUFI##Inst, Z, r##src##i) \
102122 CASE_AVX512_INS_COMMON(SHUFF##Inst, Z256, r##src##i) \
103123 CASE_AVX512_INS_COMMON(SHUFI##Inst, Z256, r##src##i)
124
125 #define CASE_MASK_VSHUF(Inst, src) \
126 CASE_MASK_INS_COMMON(SHUFF##Inst, Z, r##src##i) \
127 CASE_MASK_INS_COMMON(SHUFI##Inst, Z, r##src##i) \
128 CASE_MASK_INS_COMMON(SHUFF##Inst, Z256, r##src##i) \
129 CASE_MASK_INS_COMMON(SHUFI##Inst, Z256, r##src##i)
130
131 #define CASE_MASKZ_VSHUF(Inst, src) \
132 CASE_MASKZ_INS_COMMON(SHUFF##Inst, Z, r##src##i) \
133 CASE_MASKZ_INS_COMMON(SHUFI##Inst, Z, r##src##i) \
134 CASE_MASKZ_INS_COMMON(SHUFF##Inst, Z256, r##src##i) \
135 CASE_MASKZ_INS_COMMON(SHUFI##Inst, Z256, r##src##i)
104136
105137 static unsigned getVectorRegSize(unsigned RegNo) {
106138 if (X86::ZMM0 <= RegNo && RegNo <= X86::ZMM31)
177209 CASE_MASKZ_PMOVZX(PMOVZXWD, r)
178210 CASE_MASKZ_PMOVZX(PMOVZXWQ, m)
179211 CASE_MASKZ_PMOVZX(PMOVZXWQ, r)
212 CASE_MASKZ_SHUF(SHUFPD, m)
213 CASE_MASKZ_SHUF(SHUFPD, r)
214 CASE_MASKZ_SHUF(SHUFPS, m)
215 CASE_MASKZ_SHUF(SHUFPS, r)
216 CASE_MASKZ_VPERM(PERMILPD, m)
217 CASE_MASKZ_VPERM(PERMILPD, r)
218 CASE_MASKZ_VPERM(PERMILPS, m)
219 CASE_MASKZ_VPERM(PERMILPS, r)
220 CASE_MASKZ_VSHUF(64X2, m)
221 CASE_MASKZ_VSHUF(64X2, r)
222 CASE_MASKZ_VSHUF(32X4, m)
223 CASE_MASKZ_VSHUF(32X4, r)
180224 MaskWithZero = true;
181225 MaskRegName = getRegName(MI->getOperand(1).getReg());
182226 break;
198242 CASE_MASK_PMOVZX(PMOVZXWD, r)
199243 CASE_MASK_PMOVZX(PMOVZXWQ, m)
200244 CASE_MASK_PMOVZX(PMOVZXWQ, r)
245 CASE_MASK_SHUF(SHUFPD, m)
246 CASE_MASK_SHUF(SHUFPD, r)
247 CASE_MASK_SHUF(SHUFPS, m)
248 CASE_MASK_SHUF(SHUFPS, r)
249 CASE_MASK_VPERM(PERMILPD, m)
250 CASE_MASK_VPERM(PERMILPD, r)
251 CASE_MASK_VPERM(PERMILPS, m)
252 CASE_MASK_VPERM(PERMILPS, r)
253 CASE_MASK_VSHUF(64X2, m)
254 CASE_MASK_VSHUF(64X2, r)
255 CASE_MASK_VSHUF(32X4, m)
256 CASE_MASK_VSHUF(32X4, r)
201257 MaskRegName = getRegName(MI->getOperand(2).getReg());
202258 break;
203259 }
52545254 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f32x4:
52555255 ; CHECK: ## BB#0:
52565256 ; CHECK-NEXT: kmovw %edi, %k1
5257 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
5257 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
52585258 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
52595259 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
52605260 ; CHECK-NEXT: retq
52705270 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f64x2:
52715271 ; CHECK: ## BB#0:
52725272 ; CHECK-NEXT: kmovw %edi, %k1
5273 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 = zmm0[4,5,2,3],zmm1[2,3,0,1]
5274 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm3 = zmm0[4,5,2,3],zmm1[2,3,0,1]
5273 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,2,3],zmm1[2,3,0,1]
5274 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm3 {%k1} {z} = zmm0[4,5,2,3],zmm1[2,3,0,1]
52755275 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[4,5,2,3],zmm1[2,3,0,1]
52765276 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
52775277 ; CHECK-NEXT: vaddpd %zmm3, %zmm0, %zmm0
52915291 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i32x4:
52925292 ; CHECK: ## BB#0:
52935293 ; CHECK-NEXT: kmovw %edi, %k1
5294 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
5294 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
52955295 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
52965296 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
52975297 ; CHECK-NEXT: retq
53075307 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i64x2:
53085308 ; CHECK: ## BB#0:
53095309 ; CHECK-NEXT: kmovw %edi, %k1
5310 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm0[4,5,2,3],zmm1[2,3,0,1]
5310 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,2,3],zmm1[2,3,0,1]
53115311 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[4,5,2,3],zmm1[2,3,0,1]
53125312 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
53135313 ; CHECK-NEXT: retq
54065406 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_pd_512:
54075407 ; CHECK: ## BB#0:
54085408 ; CHECK-NEXT: kmovw %edi, %k1
5409 ; CHECK-NEXT: vshufpd {{.*#+}} zmm2 = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6]
5410 ; CHECK-NEXT: vshufpd {{.*#+}} zmm3 = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6]
5409 ; CHECK-NEXT: vshufpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6]
5410 ; CHECK-NEXT: vshufpd {{.*#+}} zmm3 {%k1} {z} = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6]
54115411 ; CHECK-NEXT: vshufpd {{.*#+}} zmm0 = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6]
54125412 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
54135413 ; CHECK-NEXT: vaddpd %zmm3, %zmm0, %zmm0
54275427 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_ps_512:
54285428 ; CHECK: ## BB#0:
54295429 ; CHECK-NEXT: kmovw %edi, %k1
5430 ; CHECK-NEXT: vshufps {{.*#+}} zmm2 = zmm0[2,1],zmm1[1,0],zmm0[6,5],zmm1[5,4],zmm0[10,9],zmm1[9,8],zmm0[14,13],zmm1[13,12]
5430 ; CHECK-NEXT: vshufps {{.*#+}} zmm2 {%k1} = zmm0[2,1],zmm1[1,0],zmm0[6,5],zmm1[5,4],zmm0[10,9],zmm1[9,8],zmm0[14,13],zmm1[13,12]
54315431 ; CHECK-NEXT: vshufps {{.*#+}} zmm0 = zmm0[2,1],zmm1[1,0],zmm0[6,5],zmm1[5,4],zmm0[10,9],zmm1[9,8],zmm0[14,13],zmm1[13,12]
54325432 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
54335433 ; CHECK-NEXT: retq
54435443 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_512:
54445444 ; CHECK: ## BB#0:
54455445 ; CHECK-NEXT: kmovw %edi, %k1
5446 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm1 = zmm0[0,1,3,2,5,4,6,6]
5447 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm2 = zmm0[0,1,3,2,5,4,6,6]
5446 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm1 {%k1} = zmm0[0,1,3,2,5,4,6,6]
5447 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,3,2,5,4,6,6]
54485448 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[0,1,3,2,5,4,6,6]
54495449 ; CHECK-NEXT: vaddpd %zmm2, %zmm1, %zmm1
54505450 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
54635463 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_512:
54645464 ; CHECK: ## BB#0:
54655465 ; CHECK-NEXT: kmovw %edi, %k1
5466 ; CHECK-NEXT: vpermilps {{.*#+}} zmm1 = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
5467 ; CHECK-NEXT: vpermilps {{.*#+}} zmm2 = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
5466 ; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
5467 ; CHECK-NEXT: vpermilps {{.*#+}} zmm2 {%k1} {z} = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
54685468 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
54695469 ; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm1
54705470 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
58655865 ; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x4_512:
58665866 ; CHECK: ## BB#0:
58675867 ; CHECK-NEXT: kmovw %edi, %k1
5868 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
5869 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm1 = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
5868 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
5869 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
58705870 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
58715871 ; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0
58725872 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
58865886 ; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf64x4_512:
58875887 ; CHECK: ## BB#0:
58885888 ; CHECK-NEXT: kmovw %edi, %k1
5889 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 = zmm0[0,1,2,3,0,1,2,3]
5890 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm1 = zmm0[0,1,2,3,0,1,2,3]
5889 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3]
5890 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3]
58915891 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3,0,1,2,3]
58925892 ; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0
58935893 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
59075907 ; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x4_512:
59085908 ; CHECK: ## BB#0:
59095909 ; CHECK-NEXT: kmovw %edi, %k1
5910 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
5911 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm1 = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
5910 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
5911 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
59125912 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
59135913 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0
59145914 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
59285928 ; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti64x4_512:
59295929 ; CHECK: ## BB#0:
59305930 ; CHECK-NEXT: kmovw %edi, %k1
5931 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm0[0,1,2,3,0,1,2,3]
5932 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[0,1,2,3,0,1,2,3]
5931 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3]
5932 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3]
59335933 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3,0,1,2,3]
59345934 ; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0
59355935 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
636636 ; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x8_512:
637637 ; CHECK: ## BB#0:
638638 ; CHECK-NEXT: kmovw %edi, %k1
639 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
640 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm1 = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
639 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
640 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
641641 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
642642 ; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0
643643 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
657657 ; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf64x2_512:
658658 ; CHECK: ## BB#0:
659659 ; CHECK-NEXT: kmovb %edi, %k1
660 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 = zmm0[0,1,0,1,0,1,0,1]
661 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm1 = zmm0[0,1,0,1,0,1,0,1]
660 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,0,1,0,1,0,1]
661 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,0,1,0,1,0,1]
662662 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1]
663663 ; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0
664664 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
678678 ; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x8_512:
679679 ; CHECK: ## BB#0:
680680 ; CHECK-NEXT: kmovw %edi, %k1
681 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
682 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm1 = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
681 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
682 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
683683 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
684684 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0
685685 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
699699 ; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti64x2_512:
700700 ; CHECK: ## BB#0:
701701 ; CHECK-NEXT: kmovb %edi, %k1
702 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm0[0,1,0,1,0,1,0,1]
703 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[0,1,0,1,0,1,0,1]
702 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,0,1,0,1,0,1]
703 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,0,1,0,1,0,1]
704704 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1]
705705 ; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0
706706 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
None ; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
11 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512dq -mattr=+avx512vl --show-mc-encoding| FileCheck %s
22
33 define <8 x i64> @test_mask_mullo_epi64_rr_512(<8 x i64> %a, <8 x i64> %b) {
23682368 ; CHECK: ## BB#0:
23692369 ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
23702370 ; CHECK-NEXT: vshuff64x2 $0, %ymm0, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x23,0xd0,0x00]
2371 ; CHECK-NEXT: ## ymm2 = ymm0[0,1,0,1]
2371 ; CHECK-NEXT: ## ymm2 {%k1} {z} = ymm0[0,1,0,1]
23722372 ; CHECK-NEXT: vshuff64x2 $0, %ymm0, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x23,0xc8,0x00]
2373 ; CHECK-NEXT: ## ymm1 = ymm0[0,1,0,1]
2373 ; CHECK-NEXT: ## ymm1 {%k1} = ymm0[0,1,0,1]
23742374 ; CHECK-NEXT: vshuff64x2 $0, %ymm0, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x23,0xc0,0x00]
23752375 ; CHECK-NEXT: ## ymm0 = ymm0[0,1,0,1]
23762376 ; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x58,0xc1]
23922392 ; CHECK: ## BB#0:
23932393 ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
23942394 ; CHECK-NEXT: vshufi64x2 $0, %ymm0, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x43,0xd0,0x00]
2395 ; CHECK-NEXT: ## ymm2 = ymm0[0,1,0,1]
2395 ; CHECK-NEXT: ## ymm2 {%k1} {z} = ymm0[0,1,0,1]
23962396 ; CHECK-NEXT: vshufi64x2 $0, %ymm0, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x43,0xc8,0x00]
2397 ; CHECK-NEXT: ## ymm1 = ymm0[0,1,0,1]
2397 ; CHECK-NEXT: ## ymm1 {%k1} = ymm0[0,1,0,1]
23982398 ; CHECK-NEXT: vshufi64x2 $0, %ymm0, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x43,0xc0,0x00]
23992399 ; CHECK-NEXT: ## ymm0 = ymm0[0,1,0,1]
24002400 ; CHECK-NEXT: vpaddq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xd4,0xc1]
54135413 ; CHECK: ## BB#0:
54145414 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
54155415 ; CHECK-NEXT: vshuff32x4 $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x23,0xd1,0x16]
5416 ; CHECK-NEXT: ## ymm2 = ymm0[0,1,2,3],ymm1[4,5,6,7]
5416 ; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0,1,2,3],ymm1[4,5,6,7]
54175417 ; CHECK-NEXT: vshuff32x4 $22, %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x23,0xd9,0x16]
5418 ; CHECK-NEXT: ## ymm3 = ymm0[0,1,2,3],ymm1[4,5,6,7]
5418 ; CHECK-NEXT: ## ymm3 {%k1} {z} = ymm0[0,1,2,3],ymm1[4,5,6,7]
54195419 ; CHECK-NEXT: vshuff32x4 $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x23,0xc1,0x16]
54205420 ; CHECK-NEXT: ## ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
54215421 ; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6c,0x28,0x58,0xc0]
54365436 ; CHECK: ## BB#0:
54375437 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
54385438 ; CHECK-NEXT: vshuff64x2 $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x23,0xd1,0x16]
5439 ; CHECK-NEXT: ## ymm2 = ymm0[0,1],ymm1[2,3]
5439 ; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0,1],ymm1[2,3]
54405440 ; CHECK-NEXT: vshuff64x2 $22, %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x23,0xd9,0x16]
5441 ; CHECK-NEXT: ## ymm3 = ymm0[0,1],ymm1[2,3]
5441 ; CHECK-NEXT: ## ymm3 {%k1} {z} = ymm0[0,1],ymm1[2,3]
54425442 ; CHECK-NEXT: vshuff64x2 $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x23,0xc1,0x16]
54435443 ; CHECK-NEXT: ## ymm0 = ymm0[0,1],ymm1[2,3]
54445444 ; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xc0]
54595459 ; CHECK: ## BB#0:
54605460 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
54615461 ; CHECK-NEXT: vshufi32x4 $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x43,0xd1,0x16]
5462 ; CHECK-NEXT: ## ymm2 = ymm0[0,1,2,3],ymm1[4,5,6,7]
5462 ; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0,1,2,3],ymm1[4,5,6,7]
54635463 ; CHECK-NEXT: vshufi32x4 $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x43,0xc1,0x16]
54645464 ; CHECK-NEXT: ## ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
54655465 ; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc0]
54775477 ; CHECK: ## BB#0:
54785478 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
54795479 ; CHECK-NEXT: vshufi64x2 $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x43,0xd1,0x16]
5480 ; CHECK-NEXT: ## ymm2 = ymm0[0,1],ymm1[2,3]
5480 ; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0,1],ymm1[2,3]
54815481 ; CHECK-NEXT: vshufi64x2 $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x43,0xc1,0x16]
54825482 ; CHECK-NEXT: ## ymm0 = ymm0[0,1],ymm1[2,3]
54835483 ; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc0]
55835583 ; CHECK: ## BB#0:
55845584 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
55855585 ; CHECK-NEXT: vshufpd $22, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xc6,0xd1,0x16]
5586 ; CHECK-NEXT: ## xmm2 = xmm0[0],xmm1[1]
5586 ; CHECK-NEXT: ## xmm2 {%k1} = xmm0[0],xmm1[1]
55875587 ; CHECK-NEXT: vshufpd $22, %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0xc6,0xd9,0x16]
5588 ; CHECK-NEXT: ## xmm3 = xmm0[0],xmm1[1]
5588 ; CHECK-NEXT: ## xmm3 {%k1} {z} = xmm0[0],xmm1[1]
55895589 ; CHECK-NEXT: vshufpd $22, %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xc6,0xc1,0x16]
55905590 ; CHECK-NEXT: ## xmm0 = xmm0[0],xmm1[1]
55915591 ; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0x58,0xc0]
56065606 ; CHECK: ## BB#0:
56075607 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
56085608 ; CHECK-NEXT: vshufpd $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xc6,0xd1,0x16]
5609 ; CHECK-NEXT: ## ymm2 = ymm0[0],ymm1[1],ymm0[3],ymm1[2]
5609 ; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0],ymm1[1],ymm0[3],ymm1[2]
56105610 ; CHECK-NEXT: vshufpd $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xc6,0xc1,0x16]
56115611 ; CHECK-NEXT: ## ymm0 = ymm0[0],ymm1[1],ymm0[3],ymm1[2]
56125612 ; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xc0]
56245624 ; CHECK: ## BB#0:
56255625 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
56265626 ; CHECK-NEXT: vshufps $22, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0xc6,0xd1,0x16]
5627 ; CHECK-NEXT: ## xmm2 = xmm0[2,1],xmm1[1,0]
5627 ; CHECK-NEXT: ## xmm2 {%k1} = xmm0[2,1],xmm1[1,0]
56285628 ; CHECK-NEXT: vshufps $22, %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0xc6,0xc1,0x16]
56295629 ; CHECK-NEXT: ## xmm0 = xmm0[2,1],xmm1[1,0]
56305630 ; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6c,0x08,0x58,0xc0]
56425642 ; CHECK: ## BB#0:
56435643 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
56445644 ; CHECK-NEXT: vshufps $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0xc6,0xd1,0x16]
5645 ; CHECK-NEXT: ## ymm2 = ymm0[2,1],ymm1[1,0],ymm0[6,5],ymm1[5,4]
5645 ; CHECK-NEXT: ## ymm2 {%k1} = ymm0[2,1],ymm1[1,0],ymm0[6,5],ymm1[5,4]
56465646 ; CHECK-NEXT: vshufps $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0xc6,0xc1,0x16]
56475647 ; CHECK-NEXT: ## ymm0 = ymm0[2,1],ymm1[1,0],ymm0[6,5],ymm1[5,4]
56485648 ; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6c,0x28,0x58,0xc0]
57285728 ; CHECK: ## BB#0:
57295729 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
57305730 ; CHECK-NEXT: vpermilpd $22, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x05,0xc8,0x16]
5731 ; CHECK-NEXT: ## ymm1 = ymm0[0,1,3,2]
5731 ; CHECK-NEXT: ## ymm1 {%k1} = ymm0[0,1,3,2]
57325732 ; CHECK-NEXT: vpermilpd $22, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x05,0xd0,0x16]
5733 ; CHECK-NEXT: ## ymm2 = ymm0[0,1,3,2]
5733 ; CHECK-NEXT: ## ymm2 {%k1} {z} = ymm0[0,1,3,2]
57345734 ; CHECK-NEXT: vpermilpd $22, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x05,0xc0,0x16]
57355735 ; CHECK-NEXT: ## ymm0 = ymm0[0,1,3,2]
57365736 ; CHECK-NEXT: vaddpd %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xca]
57515751 ; CHECK: ## BB#0:
57525752 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
57535753 ; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x05,0xc8,0x01]
5754 ; CHECK-NEXT: ## xmm1 = xmm0[1,0]
5754 ; CHECK-NEXT: ## xmm1 {%k1} = xmm0[1,0]
57555755 ; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0x89,0x05,0xd0,0x01]
5756 ; CHECK-NEXT: ## xmm2 = xmm0[1,0]
5756 ; CHECK-NEXT: ## xmm2 {%k1} {z} = xmm0[1,0]
57575757 ; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0xfd,0x08,0x05,0xc0,0x01]
57585758 ; CHECK-NEXT: ## xmm0 = xmm0[1,0]
57595759 ; CHECK-NEXT: vaddpd %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xca]
57745774 ; CHECK: ## BB#0:
57755775 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
57765776 ; CHECK-NEXT: vpermilps $22, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x04,0xc8,0x16]
5777 ; CHECK-NEXT: ## ymm1 = ymm0[2,1,1,0,6,5,5,4]
5777 ; CHECK-NEXT: ## ymm1 {%k1} = ymm0[2,1,1,0,6,5,5,4]
57785778 ; CHECK-NEXT: vpermilps $22, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x04,0xd0,0x16]
5779 ; CHECK-NEXT: ## ymm2 = ymm0[2,1,1,0,6,5,5,4]
5779 ; CHECK-NEXT: ## ymm2 {%k1} {z} = ymm0[2,1,1,0,6,5,5,4]
57805780 ; CHECK-NEXT: vpermilps $22, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x04,0xc0,0x16]
57815781 ; CHECK-NEXT: ## ymm0 = ymm0[2,1,1,0,6,5,5,4]
57825782 ; CHECK-NEXT: vaddps %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xca]
57975797 ; CHECK: ## BB#0:
57985798 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
57995799 ; CHECK-NEXT: vpermilps $22, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x04,0xc8,0x16]
5800 ; CHECK-NEXT: ## xmm1 = xmm0[2,1,1,0]
5800 ; CHECK-NEXT: ## xmm1 {%k1} = xmm0[2,1,1,0]
58015801 ; CHECK-NEXT: vpermilps $22, %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0x89,0x04,0xd0,0x16]
5802 ; CHECK-NEXT: ## xmm2 = xmm0[2,1,1,0]
5802 ; CHECK-NEXT: ## xmm2 {%k1} {z} = xmm0[2,1,1,0]
58035803 ; CHECK-NEXT: vpermilps $22, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x04,0xc0,0x16]
58045804 ; CHECK-NEXT: ## xmm0 = xmm0[2,1,1,0]
58055805 ; CHECK-NEXT: vaddps %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xca]
65746574 ; CHECK: ## BB#0:
65756575 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
65766576 ; CHECK-NEXT: vshuff32x4 $0, %ymm0, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x23,0xd0,0x00]
6577 ; CHECK-NEXT: ## ymm2 = ymm0[0,1,2,3,0,1,2,3]
6577 ; CHECK-NEXT: ## ymm2 {%k1} {z} = ymm0[0,1,2,3,0,1,2,3]
65786578 ; CHECK-NEXT: vshuff32x4 $0, %ymm0, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x23,0xc8,0x00]
6579 ; CHECK-NEXT: ## ymm1 = ymm0[0,1,2,3,0,1,2,3]
6579 ; CHECK-NEXT: ## ymm1 {%k1} = ymm0[0,1,2,3,0,1,2,3]
65806580 ; CHECK-NEXT: vshuff32x4 $0, %ymm0, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x23,0xc0,0x00]
65816581 ; CHECK-NEXT: ## ymm0 = ymm0[0,1,2,3,0,1,2,3]
65826582 ; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x58,0xc1]
65976597 ; CHECK: ## BB#0:
65986598 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
65996599 ; CHECK-NEXT: vshufi32x4 $0, %ymm0, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x43,0xd0,0x00]
6600 ; CHECK-NEXT: ## ymm2 = ymm0[0,1,2,3,0,1,2,3]
6600 ; CHECK-NEXT: ## ymm2 {%k1} {z} = ymm0[0,1,2,3,0,1,2,3]
66016601 ; CHECK-NEXT: vshufi32x4 $0, %ymm0, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x43,0xc8,0x00]
6602 ; CHECK-NEXT: ## ymm1 = ymm0[0,1,2,3,0,1,2,3]
6602 ; CHECK-NEXT: ## ymm1 {%k1} = ymm0[0,1,2,3,0,1,2,3]
66036603 ; CHECK-NEXT: vshufi32x4 $0, %ymm0, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x43,0xc0,0x00]
66046604 ; CHECK-NEXT: ## ymm0 = ymm0[0,1,2,3,0,1,2,3]
66056605 ; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfe,0xc1]
21252125 ; AVX512F-NEXT: vpmovsxwq %xmm2, %zmm2
21262126 ; AVX512F-NEXT: vpsllq $63, %zmm2, %zmm2
21272127 ; AVX512F-NEXT: vptestmq %zmm2, %zmm2, %k1
2128 ; AVX512F-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],zmm1[2,3,0,1]
2128 ; AVX512F-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[2,3,0,1]
21292129 ; AVX512F-NEXT: retq
21302130 ;
21312131 ; AVX512F-32-LABEL: test_vshuff64x2_512_maskz:
21332133 ; AVX512F-32-NEXT: vpmovsxwq %xmm2, %zmm2
21342134 ; AVX512F-32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm2, %zmm2
21352135 ; AVX512F-32-NEXT: vptestmq %zmm2, %zmm2, %k1
2136 ; AVX512F-32-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],zmm1[2,3,0,1]
2136 ; AVX512F-32-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[2,3,0,1]
21372137 ; AVX512F-32-NEXT: retl
21382138 %y = shufflevector <8 x double> %x, <8 x double> %x1, <8 x i32>
21392139 %res = select <8 x i1> %mask, <8 x double> %y, <8 x double> zeroinitializer
21462146 ; AVX512F-NEXT: vpmovsxwq %xmm2, %zmm2
21472147 ; AVX512F-NEXT: vpsllq $63, %zmm2, %zmm2
21482148 ; AVX512F-NEXT: vptestmq %zmm2, %zmm2, %k1
2149 ; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],zmm1[2,3,0,1]
2149 ; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} = zmm0[0,1,4,5],zmm1[2,3,0,1]
21502150 ; AVX512F-NEXT: retq
21512151 ;
21522152 ; AVX512F-32-LABEL: test_vshufi64x2_512_mask:
21542154 ; AVX512F-32-NEXT: vpmovsxwq %xmm2, %zmm2
21552155 ; AVX512F-32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm2, %zmm2
21562156 ; AVX512F-32-NEXT: vptestmq %zmm2, %zmm2, %k1
2157 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],zmm1[2,3,0,1]
2157 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} = zmm0[0,1,4,5],zmm1[2,3,0,1]
21582158 ; AVX512F-32-NEXT: retl
21592159 %y = shufflevector <8 x i64> %x, <8 x i64> %x1, <8 x i32>
21602160 %res = select <8 x i1> %mask, <8 x i64> %y, <8 x i64> %x
21832183 ; AVX512F-NEXT: vpmovsxwq %xmm1, %zmm1
21842184 ; AVX512F-NEXT: vpsllq $63, %zmm1, %zmm1
21852185 ; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k1
2186 ; AVX512F-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],mem[2,3,0,1]
2186 ; AVX512F-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} = zmm0[0,1,4,5],mem[2,3,0,1]
21872187 ; AVX512F-NEXT: retq
21882188 ;
21892189 ; AVX512F-32-LABEL: test_vshuff64x2_512_mem_mask:
21922192 ; AVX512F-32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm1, %zmm1
21932193 ; AVX512F-32-NEXT: vptestmq %zmm1, %zmm1, %k1
21942194 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
2195 ; AVX512F-32-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],mem[2,3,0,1]
2195 ; AVX512F-32-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} = zmm0[0,1,4,5],mem[2,3,0,1]
21962196 ; AVX512F-32-NEXT: retl
21972197 %x1 = load <8 x double>,<8 x double> *%ptr,align 1
21982198 %y = shufflevector <8 x double> %x, <8 x double> %x1, <8 x i32>
22062206 ; AVX512F-NEXT: vpmovsxwq %xmm1, %zmm1
22072207 ; AVX512F-NEXT: vpsllq $63, %zmm1, %zmm1
22082208 ; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k1
2209 ; AVX512F-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],mem[2,3,0,1]
2209 ; AVX512F-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],mem[2,3,0,1]
22102210 ; AVX512F-NEXT: retq
22112211 ;
22122212 ; AVX512F-32-LABEL: test_vshuff64x2_512_mem_maskz:
22152215 ; AVX512F-32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm1, %zmm1
22162216 ; AVX512F-32-NEXT: vptestmq %zmm1, %zmm1, %k1
22172217 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
2218 ; AVX512F-32-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],mem[2,3,0,1]
2218 ; AVX512F-32-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],mem[2,3,0,1]
22192219 ; AVX512F-32-NEXT: retl
22202220 %x1 = load <8 x double>,<8 x double> *%ptr,align 1
22212221 %y = shufflevector <8 x double> %x, <8 x double> %x1, <8 x i32>
232232 ; CHECK-LABEL: combine_vpermt2var_16f32_vpermilps_mask:
233233 ; CHECK: # BB#0:
234234 ; CHECK-NEXT: kmovw %edi, %k1
235 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
235 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
236236 ; CHECK-NEXT: retq
237237 %res0 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> , <16 x float> %x0, <16 x float> %x1, i16 %m)
238238 ret <16 x float> %res0
241241 ; CHECK-LABEL: combine_vpermt2var_16f32_vpermilps_mask_load:
242242 ; CHECK: # BB#0:
243243 ; CHECK-NEXT: kmovw %esi, %k1
244 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = mem[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
244 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = mem[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
245245 ; CHECK-NEXT: retq
246246 %x0 = load <16 x float>, <16 x float> *%p0
247247 %res0 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> , <16 x float> %x0, <16 x float> %x1, i16 %m)