llvm.org GIT mirror llvm / 99b6232
AVX512: Implemented encoding and intrinsics for VMOVSHDUP/VMOVSLDUP instructions. Differential Revision: http://reviews.llvm.org/D14322 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@253185 91177308-0d34-0410-b5e6-96231b3b80d8 Igor Breger 4 years ago
10 changed file(s) with 536 addition(s) and 115 deletion(s). Raw diff Collapse all Expand all
15771577 GCCBuiltin<"__builtin_ia32_shufps512_mask">,
15781578 Intrinsic<[llvm_v16f32_ty],
15791579 [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i32_ty, llvm_v16f32_ty, llvm_i16_ty],
1580 [IntrNoMem]>;
1581
1582 def int_x86_avx512_mask_movshdup_128 :
1583 GCCBuiltin<"__builtin_ia32_movshdup128_mask">,
1584 Intrinsic<[llvm_v4f32_ty],
1585 [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
1586 [IntrNoMem]>;
1587
1588 def int_x86_avx512_mask_movshdup_256 :
1589 GCCBuiltin<"__builtin_ia32_movshdup256_mask">,
1590 Intrinsic<[llvm_v8f32_ty],
1591 [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty],
1592 [IntrNoMem]>;
1593
1594 def int_x86_avx512_mask_movshdup_512 :
1595 GCCBuiltin<"__builtin_ia32_movshdup512_mask">,
1596 Intrinsic<[llvm_v16f32_ty],
1597 [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty],
1598 [IntrNoMem]>;
1599
1600 def int_x86_avx512_mask_movsldup_128 :
1601 GCCBuiltin<"__builtin_ia32_movsldup128_mask">,
1602 Intrinsic<[llvm_v4f32_ty],
1603 [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
1604 [IntrNoMem]>;
1605
1606 def int_x86_avx512_mask_movsldup_256 :
1607 GCCBuiltin<"__builtin_ia32_movsldup256_mask">,
1608 Intrinsic<[llvm_v8f32_ty],
1609 [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty],
1610 [IntrNoMem]>;
1611
1612 def int_x86_avx512_mask_movsldup_512 :
1613 GCCBuiltin<"__builtin_ia32_movsldup512_mask">,
1614 Intrinsic<[llvm_v16f32_ty],
1615 [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty],
15801616 [IntrNoMem]>;
15811617 }
15821618
1919 #include "llvm/Support/raw_ostream.h"
2020
2121 using namespace llvm;
22
23 static unsigned getVectorRegSize(unsigned RegNo) {
24
25 if (X86::ZMM0 <= RegNo && RegNo <= X86::ZMM31)
26 return 512;
27 if (X86::YMM0 <= RegNo && RegNo <= X86::YMM31)
28 return 256;
29 if (X86::XMM0 <= RegNo && RegNo <= X86::XMM31)
30 return 128;
31
32 llvm_unreachable("Unknown vector reg!");
33 return 0;
34 }
35
36 static MVT getRegOperandVectorVT(const MCInst *MI, const MVT &ScalarVT,
37 unsigned OperandIndex) {
38 unsigned OpReg = MI->getOperand(OperandIndex).getReg();
39 return MVT::getVectorVT(ScalarVT,
40 getVectorRegSize(OpReg)/ScalarVT.getSizeInBits());
41 }
2242
2343 /// \brief Extracts the src/dst types for a given zero extension instruction.
2444 /// \note While the number of elements in DstVT type correct, the
106126 }
107127 }
108128
109 #define CASE_VSHUF_COMMON(Inst, Suffix, src2) \
110 case X86::VSHUFF##Inst##Suffix##r##src2##i: \
111 case X86::VSHUFF##Inst##Suffix##r##src2##ik: \
112 case X86::VSHUFF##Inst##Suffix##r##src2##ikz: \
113 case X86::VSHUFI##Inst##Suffix##r##src2##i: \
114 case X86::VSHUFI##Inst##Suffix##r##src2##ik: \
115 case X86::VSHUFI##Inst##Suffix##r##src2##ikz:
116
117 #define CASE_VSHUF(Inst) \
118 CASE_VSHUF_COMMON(Inst, Z, r) \
119 CASE_VSHUF_COMMON(Inst, Z, m) \
120 CASE_VSHUF_COMMON(Inst, Z256, r) \
121 CASE_VSHUF_COMMON(Inst, Z256, m) \
129 #define CASE_MASK_INS_COMMON(Inst, Suffix, src) \
130 case X86::V##Inst##Suffix##src: \
131 case X86::V##Inst##Suffix##src##k: \
132 case X86::V##Inst##Suffix##src##kz:
133
134 #define CASE_SSE_INS_COMMON(Inst, src) \
135 case X86::Inst##src:
136
137 #define CASE_AVX_INS_COMMON(Inst, Suffix, src) \
138 case X86::V##Inst##Suffix##src:
139
140 #define CASE_MOVDUP(Inst, src) \
141 CASE_MASK_INS_COMMON(Inst, Z, r##src) \
142 CASE_MASK_INS_COMMON(Inst, Z256, r##src) \
143 CASE_MASK_INS_COMMON(Inst, Z128, r##src) \
144 CASE_AVX_INS_COMMON(Inst, , r##src) \
145 CASE_AVX_INS_COMMON(Inst, Y, r##src) \
146 CASE_SSE_INS_COMMON(Inst, r##src) \
147
148 #define CASE_VSHUF(Inst, src) \
149 CASE_MASK_INS_COMMON(SHUFF##Inst, Z, r##src##i) \
150 CASE_MASK_INS_COMMON(SHUFI##Inst, Z, r##src##i) \
151 CASE_MASK_INS_COMMON(SHUFF##Inst, Z256, r##src##i) \
152 CASE_MASK_INS_COMMON(SHUFI##Inst, Z256, r##src##i) \
122153
123154 /// \brief Extracts the types and if it has memory operand for a given
124155 /// (SHUFF32x4/SHUFF64x2/SHUFI32x4/SHUFI64x2) instruction.
128159 default:
129160 llvm_unreachable("Unknown VSHUF64x2 family instructions.");
130161 break;
131 CASE_VSHUF_COMMON(64X2, Z, m)
162 CASE_VSHUF(64X2, m)
132163 HasMemOp = true; // FALL THROUGH.
133 CASE_VSHUF_COMMON(64X2, Z, r)
134 VT = MVT::v8i64;
135 break;
136 CASE_VSHUF_COMMON(64X2, Z256, m)
164 CASE_VSHUF(64X2, r)
165 VT = getRegOperandVectorVT(MI, MVT::i64, 0);
166 break;
167 CASE_VSHUF(32X4, m)
137168 HasMemOp = true; // FALL THROUGH.
138 CASE_VSHUF_COMMON(64X2, Z256, r)
139 VT = MVT::v4i64;
140 break;
141 CASE_VSHUF_COMMON(32X4, Z, m)
142 HasMemOp = true; // FALL THROUGH.
143 CASE_VSHUF_COMMON(32X4, Z, r)
144 VT = MVT::v16i32;
145 break;
146 CASE_VSHUF_COMMON(32X4, Z256, m)
147 HasMemOp = true; // FALL THROUGH.
148 CASE_VSHUF_COMMON(32X4, Z256, r)
149 VT = MVT::v8i32;
169 CASE_VSHUF(32X4, r)
170 VT = getRegOperandVectorVT(MI, MVT::i32, 0);
150171 break;
151172 }
152173 }
296317 DestName = getRegName(MI->getOperand(0).getReg());
297318 DecodeMOVHLPSMask(2, ShuffleMask);
298319 break;
299
300 case X86::MOVSLDUPrr:
301 case X86::VMOVSLDUPrr:
302 Src1Name = getRegName(MI->getOperand(1).getReg());
303 // FALL THROUGH.
304 case X86::MOVSLDUPrm:
305 case X86::VMOVSLDUPrm:
306 DestName = getRegName(MI->getOperand(0).getReg());
307 DecodeMOVSLDUPMask(MVT::v4f32, ShuffleMask);
308 break;
309
310 case X86::VMOVSHDUPYrr:
311 Src1Name = getRegName(MI->getOperand(1).getReg());
312 // FALL THROUGH.
313 case X86::VMOVSHDUPYrm:
314 DestName = getRegName(MI->getOperand(0).getReg());
315 DecodeMOVSHDUPMask(MVT::v8f32, ShuffleMask);
316 break;
317
318 case X86::VMOVSLDUPYrr:
319 Src1Name = getRegName(MI->getOperand(1).getReg());
320 // FALL THROUGH.
321 case X86::VMOVSLDUPYrm:
322 DestName = getRegName(MI->getOperand(0).getReg());
323 DecodeMOVSLDUPMask(MVT::v8f32, ShuffleMask);
324 break;
325
326 case X86::MOVSHDUPrr:
327 case X86::VMOVSHDUPrr:
328 Src1Name = getRegName(MI->getOperand(1).getReg());
329 // FALL THROUGH.
330 case X86::MOVSHDUPrm:
331 case X86::VMOVSHDUPrm:
332 DestName = getRegName(MI->getOperand(0).getReg());
333 DecodeMOVSHDUPMask(MVT::v4f32, ShuffleMask);
334 break;
335
320 CASE_MOVDUP(MOVSLDUP, r)
321 Src1Name = getRegName(MI->getOperand(MI->getNumOperands() - 1).getReg());
322 // FALL THROUGH.
323 CASE_MOVDUP(MOVSLDUP, m) {
324 MVT VT = getRegOperandVectorVT(MI, MVT::f32, 0);
325 DestName = getRegName(MI->getOperand(0).getReg());
326 DecodeMOVSLDUPMask(VT, ShuffleMask);
327 break;
328 }
329 CASE_MOVDUP(MOVSHDUP, r)
330 Src1Name = getRegName(MI->getOperand(MI->getNumOperands() - 1).getReg());
331 // FALL THROUGH.
332 CASE_MOVDUP(MOVSHDUP, m) {
333 MVT VT = getRegOperandVectorVT(MI, MVT::f32, 0);
334 DestName = getRegName(MI->getOperand(0).getReg());
335 DecodeMOVSHDUPMask(VT, ShuffleMask);
336 break;
337 }
336338 case X86::VMOVDDUPYrr:
337339 Src1Name = getRegName(MI->getOperand(1).getReg());
338340 // FALL THROUGH.
770772 Src1Name = getRegName(MI->getOperand(1).getReg());
771773 DestName = getRegName(MI->getOperand(0).getReg());
772774 break;
773 CASE_VSHUF(64X2)
774 CASE_VSHUF(32X4) {
775 CASE_VSHUF(64X2, r)
776 CASE_VSHUF(64X2, m)
777 CASE_VSHUF(32X4, r)
778 CASE_VSHUF(32X4, m) {
775779 MVT VT;
776780 bool HasMemOp;
777781 unsigned NumOp = MI->getNumOperands();
42534253 def : Pat<(X86Movddup (v8f64 (scalar_to_vector (loadf64 addr:$src)))),
42544254 (VMOVDDUPZrm addr:$src)>;
42554255
4256 //===---------------------------------------------------------------------===//
4257 // Replicate Single FP - MOVSHDUP and MOVSLDUP
4258 //===---------------------------------------------------------------------===//
4259 multiclass avx512_replicate_sfp op, SDNode OpNode, string OpcodeStr,
4260 ValueType vt, RegisterClass RC, PatFrag mem_frag,
4261 X86MemOperand x86memop> {
4262 def rr : AVX512XSI
4263 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4264 [(set RC:$dst, (vt (OpNode RC:$src)))]>, EVEX;
4265 let mayLoad = 1 in
4266 def rm : AVX512XSI
4267 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4268 [(set RC:$dst, (OpNode (mem_frag addr:$src)))]>, EVEX;
4269 }
4270
4271 defm VMOVSHDUPZ : avx512_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
4272 v16f32, VR512, loadv16f32, f512mem>, EVEX_V512,
4273 EVEX_CD8<32, CD8VF>;
4274 defm VMOVSLDUPZ : avx512_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
4275 v16f32, VR512, loadv16f32, f512mem>, EVEX_V512,
4276 EVEX_CD8<32, CD8VF>;
4277
4278 def : Pat<(v16i32 (X86Movshdup VR512:$src)), (VMOVSHDUPZrr VR512:$src)>;
4279 def : Pat<(v16i32 (X86Movshdup (loadv16i32 addr:$src))),
4280 (VMOVSHDUPZrm addr:$src)>;
4281 def : Pat<(v16i32 (X86Movsldup VR512:$src)), (VMOVSLDUPZrr VR512:$src)>;
4282 def : Pat<(v16i32 (X86Movsldup (loadv16i32 addr:$src))),
4283 (VMOVSLDUPZrm addr:$src)>;
4284
42854256 //===----------------------------------------------------------------------===//
42864257 // Move Low to High and High to Low packed FP Instructions
42874258 //===----------------------------------------------------------------------===//
70557026 multiclass avx512_unary_rm opc, string OpcodeStr, SDNode OpNode,
70567027 X86VectorVTInfo _> {
70577028 defm rr : AVX512_maskable
7058 (ins _.RC:$src1), OpcodeStr##_.Suffix,
7029 (ins _.RC:$src1), OpcodeStr,
70597030 "$src1", "$src1",
70607031 (_.VT (OpNode _.RC:$src1))>, EVEX, AVX5128IBase;
70617032
70627033 let mayLoad = 1 in
70637034 defm rm : AVX512_maskable
7064 (ins _.MemOp:$src1), OpcodeStr##_.Suffix,
7035 (ins _.MemOp:$src1), OpcodeStr,
70657036 "$src1", "$src1",
70667037 (_.VT (OpNode (bitconvert (_.LdFrag addr:$src1))))>,
70677038 EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>;
70727043 avx512_unary_rm {
70737044 let mayLoad = 1 in
70747045 defm rmb : AVX512_maskable
7075 (ins _.ScalarMemOp:$src1), OpcodeStr##_.Suffix,
7046 (ins _.ScalarMemOp:$src1), OpcodeStr,
70767047 "${src1}"##_.BroadcastStr,
70777048 "${src1}"##_.BroadcastStr,
70787049 (_.VT (OpNode (X86VBroadcast
71097080
71107081 multiclass avx512_unary_rm_vl_dq opc_d, bits<8> opc_q, string OpcodeStr,
71117082 SDNode OpNode, Predicate prd> {
7112 defm Q : avx512_unary_rmb_vl, OpNode, avx512vl_i64_info,
7083 defm Q : avx512_unary_rmb_vl#"q", OpNode, avx512vl_i64_info,
71137084 prd>, VEX_W;
7114 defm D : avx512_unary_rmb_vl, OpNode, avx512vl_i32_info, prd>;
7085 defm D : avx512_unary_rmb_vl#"d", OpNode, avx512vl_i32_info,
7086 prd>;
71157087 }
71167088
71177089 multiclass avx512_unary_rm_vl_bw opc_b, bits<8> opc_w, string OpcodeStr,
71187090 SDNode OpNode, Predicate prd> {
7119 defm W : avx512_unary_rm_vl;
7120 defm B : avx512_unary_rm_vl_info, prd>;
7091 defm W : avx512_unary_rm_vl_info, prd>;
7092 defm B : avx512_unary_rm_vl;
71217093 }
71227094
71237095 multiclass avx512_unary_rm_vl_all opc_b, bits<8> opc_w,
71517123 defm VPLZCNT : avx512_ctlz<0x44, "vplzcnt", HasCDI>;
71527124 defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict, HasCDI>;
71537125
7126 //===---------------------------------------------------------------------===//
7127 // Replicate Single FP - MOVSHDUP and MOVSLDUP
7128 //===---------------------------------------------------------------------===//
7129 multiclass avx512_replicate opc, string OpcodeStr, SDNode OpNode>{
7130 defm NAME: avx512_unary_rm_vl
7131 HasAVX512>, XS;
7132 let isCodeGenOnly = 1 in
7133 defm NAME#_I: avx512_unary_rm_vl
7134 HasAVX512>, XS;
7135 }
7136
7137 defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup>;
7138 defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup>;
71547139 //===----------------------------------------------------------------------===//
71557140 // AVX-512 - Unpack Instructions
71567141 //===----------------------------------------------------------------------===//
51345134 IIC_SSE_MOV_LH>, Sched<[WriteLoad]>;
51355135 }
51365136
5137 let Predicates = [HasAVX] in {
5137 let Predicates = [HasAVX, NoVLX] in {
51385138 defm VMOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
51395139 v4f32, VR128, loadv4f32, f128mem>, VEX;
51405140 defm VMOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
51495149 defm MOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "movsldup", v4f32, VR128,
51505150 memopv4f32, f128mem>;
51515151
5152 let Predicates = [HasAVX] in {
5152 let Predicates = [HasAVX, NoVLX] in {
51535153 def : Pat<(v4i32 (X86Movshdup VR128:$src)),
51545154 (VMOVSHDUPrr VR128:$src)>;
51555155 def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (loadv2i64 addr:$src)))),
775775 X86_INTRINSIC_DATA(avx512_mask_max_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0),
776776 X86_INTRINSIC_DATA(avx512_mask_max_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FMAX,
777777 X86ISD::FMAX_RND),
778 X86_INTRINSIC_DATA(avx512_mask_max_sd_round, INTR_TYPE_SCALAR_MASK_RM, X86ISD::FMAX,
779 X86ISD::FMAX_RND),
780 X86_INTRINSIC_DATA(avx512_mask_max_ss_round, INTR_TYPE_SCALAR_MASK_RM, X86ISD::FMAX,
781 X86ISD::FMAX_RND),
778 X86_INTRINSIC_DATA(avx512_mask_max_sd_round, INTR_TYPE_SCALAR_MASK_RM,
779 X86ISD::FMAX, X86ISD::FMAX_RND),
780 X86_INTRINSIC_DATA(avx512_mask_max_ss_round, INTR_TYPE_SCALAR_MASK_RM,
781 X86ISD::FMAX, X86ISD::FMAX_RND),
782782 X86_INTRINSIC_DATA(avx512_mask_min_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FMIN, 0),
783783 X86_INTRINSIC_DATA(avx512_mask_min_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FMIN, 0),
784784 X86_INTRINSIC_DATA(avx512_mask_min_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FMIN,
787787 X86_INTRINSIC_DATA(avx512_mask_min_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FMIN, 0),
788788 X86_INTRINSIC_DATA(avx512_mask_min_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FMIN,
789789 X86ISD::FMIN_RND),
790 X86_INTRINSIC_DATA(avx512_mask_min_sd_round, INTR_TYPE_SCALAR_MASK_RM, X86ISD::FMIN,
791 X86ISD::FMIN_RND),
792 X86_INTRINSIC_DATA(avx512_mask_min_ss_round, INTR_TYPE_SCALAR_MASK_RM, X86ISD::FMIN,
793 X86ISD::FMIN_RND),
790 X86_INTRINSIC_DATA(avx512_mask_min_sd_round, INTR_TYPE_SCALAR_MASK_RM,
791 X86ISD::FMIN, X86ISD::FMIN_RND),
792 X86_INTRINSIC_DATA(avx512_mask_min_ss_round, INTR_TYPE_SCALAR_MASK_RM,
793 X86ISD::FMIN, X86ISD::FMIN_RND),
794 X86_INTRINSIC_DATA(avx512_mask_movshdup_128, INTR_TYPE_1OP_MASK,
795 X86ISD::MOVSHDUP, 0),
796 X86_INTRINSIC_DATA(avx512_mask_movshdup_256, INTR_TYPE_1OP_MASK,
797 X86ISD::MOVSHDUP, 0),
798 X86_INTRINSIC_DATA(avx512_mask_movshdup_512, INTR_TYPE_1OP_MASK,
799 X86ISD::MOVSHDUP, 0),
800 X86_INTRINSIC_DATA(avx512_mask_movsldup_128, INTR_TYPE_1OP_MASK,
801 X86ISD::MOVSLDUP, 0),
802 X86_INTRINSIC_DATA(avx512_mask_movsldup_256, INTR_TYPE_1OP_MASK,
803 X86ISD::MOVSLDUP, 0),
804 X86_INTRINSIC_DATA(avx512_mask_movsldup_512, INTR_TYPE_1OP_MASK,
805 X86ISD::MOVSLDUP, 0),
794806 X86_INTRINSIC_DATA(avx512_mask_mul_pd_128, INTR_TYPE_2OP_MASK, ISD::FMUL, 0),
795807 X86_INTRINSIC_DATA(avx512_mask_mul_pd_256, INTR_TYPE_2OP_MASK, ISD::FMUL, 0),
796808 X86_INTRINSIC_DATA(avx512_mask_mul_pd_512, INTR_TYPE_2OP_MASK, ISD::FMUL,
343343 ret <16 x i16> %shuffle
344344 }
345345
346 define <8 x float> @shuffle_v8f32_11335577(<8 x float> %a, <8 x float> %b) {
347 ; vmovshdup 256 test
348 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32>
349 ret <8 x float> %shuffle
350 }
351
352 define <4 x float> @shuffle_v4f32_1133(<4 x float> %a, <4 x float> %b) {
353 ; vmovshdup 128 test
354 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32>
355 ret <4 x float> %shuffle
356 }
357
358 define <8 x float> @shuffle_v8f32_00224466(<8 x float> %a, <8 x float> %b) {
359 ; vmovsldup 256 test
360 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32>
361 ret <8 x float> %shuffle
362 }
363
364 define <4 x float> @shuffle_v4f32_0022(<4 x float> %a, <4 x float> %b) {
365 ; vmovsldup 128 test
366 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32>
367 ret <4 x float> %shuffle
368 }
369
346370 define <2 x double> @insert_mem_lo_v2f64(double* %ptr, <2 x double> %b) {
347371 %a = load double, double* %ptr
348372 %v = insertelement <2 x double> undef, double %a, i32 0
46754675 ret <8 x i64> %res2
46764676 }
46774677
4678 declare <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float>, <16 x float>, i16)
4679
4680 define <16 x float>@test_int_x86_avx512_mask_movsldup_512(<16 x float> %x0, <16 x float> %x1, i16 %x2) {
4681 ; CHECK-LABEL: test_int_x86_avx512_mask_movsldup_512:
4682 ; CHECK: ## BB#0:
4683 ; CHECK-NEXT: kmovw %edi, %k1
4684 ; CHECK-NEXT: vmovsldup %zmm0, %zmm1 {%k1}
4685 ; CHECK-NEXT: ## zmm1 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
4686 ; CHECK-NEXT: vmovsldup %zmm0, %zmm2 {%k1} {z}
4687 ; CHECK-NEXT: ## zmm2 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
4688 ; CHECK-NEXT: vmovsldup %zmm0, %zmm0
4689 ; CHECK-NEXT: ## zmm0 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
4690 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
4691 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
4692 ; CHECK-NEXT: retq
4693 %res = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> %x1, i16 %x2)
4694 %res1 = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> %x1, i16 -1)
4695 %res2 = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> zeroinitializer, i16 %x2)
4696 %res3 = fadd <16 x float> %res, %res1
4697 %res4 = fadd <16 x float> %res2, %res3
4698 ret <16 x float> %res4
4699 }
4700
4701 declare <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float>, <16 x float>, i16)
4702
4703 define <16 x float>@test_int_x86_avx512_mask_movshdup_512(<16 x float> %x0, <16 x float> %x1, i16 %x2) {
4704 ; CHECK-LABEL: test_int_x86_avx512_mask_movshdup_512:
4705 ; CHECK: ## BB#0:
4706 ; CHECK-NEXT: kmovw %edi, %k1
4707 ; CHECK-NEXT: vmovshdup %zmm0, %zmm1 {%k1}
4708 ; CHECK-NEXT: ## zmm1 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
4709 ; CHECK-NEXT: vmovshdup %zmm0, %zmm2 {%k1} {z}
4710 ; CHECK-NEXT: ## zmm2 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
4711 ; CHECK-NEXT: vmovshdup %zmm0, %zmm0
4712 ; CHECK-NEXT: ## zmm0 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
4713 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
4714 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
4715 ; CHECK-NEXT: retq
4716 %res = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> %x1, i16 %x2)
4717 %res1 = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> %x1, i16 -1)
4718 %res2 = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> zeroinitializer, i16 %x2)
4719 %res3 = fadd <16 x float> %res, %res1
4720 %res4 = fadd <16 x float> %res2, %res3
4721 ret <16 x float> %res4
4722 }
4723
53335333 }
53345334
53355335 declare <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float>, i32, <8 x i16>, i8) nounwind readonly
5336
5337 declare <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float>, <4 x float>, i8)
5338
5339 define <4 x float>@test_int_x86_avx512_mask_movsldup_128(<4 x float> %x0, <4 x float> %x1, i8 %x2) {
5340 ; CHECK-LABEL: test_int_x86_avx512_mask_movsldup_128:
5341 ; CHECK: ## BB#0:
5342 ; CHECK-NEXT: movzbl %dil, %eax
5343 ; CHECK-NEXT: kmovw %eax, %k1
5344 ; CHECK-NEXT: vmovsldup %xmm0, %xmm1 {%k1}
5345 ; CHECK-NEXT: ## xmm1 = xmm0[0,0,2,2]
5346 ; CHECK-NEXT: vmovsldup %xmm0, %xmm2 {%k1} {z}
5347 ; CHECK-NEXT: ## xmm2 = xmm0[0,0,2,2]
5348 ; CHECK-NEXT: vmovsldup %xmm0, %xmm0
5349 ; CHECK-NEXT: ## xmm0 = xmm0[0,0,2,2]
5350 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
5351 ; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0
5352 ; CHECK-NEXT: retq
5353 %res = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> %x1, i8 %x2)
5354 %res1 = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> %x1, i8 -1)
5355 %res2 = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> zeroinitializer, i8 %x2)
5356 %res3 = fadd <4 x float> %res, %res1
5357 %res4 = fadd <4 x float> %res2, %res3
5358 ret <4 x float> %res4
5359 }
5360
5361 declare <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float>, <8 x float>, i8)
5362
5363 define <8 x float>@test_int_x86_avx512_mask_movsldup_256(<8 x float> %x0, <8 x float> %x1, i8 %x2) {
5364 ; CHECK-LABEL: test_int_x86_avx512_mask_movsldup_256:
5365 ; CHECK: ## BB#0:
5366 ; CHECK-NEXT: movzbl %dil, %eax
5367 ; CHECK-NEXT: kmovw %eax, %k1
5368 ; CHECK-NEXT: vmovsldup %ymm0, %ymm1 {%k1}
5369 ; CHECK-NEXT: ## ymm1 = ymm0[0,0,2,2,4,4,6,6]
5370 ; CHECK-NEXT: vmovsldup %ymm0, %ymm2 {%k1} {z}
5371 ; CHECK-NEXT: ## ymm2 = ymm0[0,0,2,2,4,4,6,6]
5372 ; CHECK-NEXT: vmovsldup %ymm0, %ymm0
5373 ; CHECK-NEXT: ## ymm0 = ymm0[0,0,2,2,4,4,6,6]
5374 ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
5375 ; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0
5376 ; CHECK-NEXT: retq
5377 %res = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> %x1, i8 %x2)
5378 %res1 = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> %x1, i8 -1)
5379 %res2 = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> zeroinitializer, i8 %x2)
5380 %res3 = fadd <8 x float> %res, %res1
5381 %res4 = fadd <8 x float> %res2, %res3
5382 ret <8 x float> %res4
5383 }
5384
5385 declare <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float>, <4 x float>, i8)
5386
5387 define <4 x float>@test_int_x86_avx512_mask_movshdup_128(<4 x float> %x0, <4 x float> %x1, i8 %x2) {
5388 ; CHECK-LABEL: test_int_x86_avx512_mask_movshdup_128:
5389 ; CHECK: ## BB#0:
5390 ; CHECK-NEXT: movzbl %dil, %eax
5391 ; CHECK-NEXT: kmovw %eax, %k1
5392 ; CHECK-NEXT: vmovshdup %xmm0, %xmm1 {%k1}
5393 ; CHECK-NEXT: ## xmm1 = xmm0[1,1,3,3]
5394 ; CHECK-NEXT: vmovshdup %xmm0, %xmm2 {%k1} {z}
5395 ; CHECK-NEXT: ## xmm2 = xmm0[1,1,3,3]
5396 ; CHECK-NEXT: vmovshdup %xmm0, %xmm0
5397 ; CHECK-NEXT: ## xmm0 = xmm0[1,1,3,3]
5398 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
5399 ; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0
5400 ; CHECK-NEXT: retq
5401 %res = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> %x1, i8 %x2)
5402 %res1 = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> %x1, i8 -1)
5403 %res2 = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> zeroinitializer, i8 %x2)
5404 %res3 = fadd <4 x float> %res, %res1
5405 %res4 = fadd <4 x float> %res2, %res3
5406 ret <4 x float> %res4
5407 }
5408
5409 declare <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float>, <8 x float>, i8)
5410
5411 define <8 x float>@test_int_x86_avx512_mask_movshdup_256(<8 x float> %x0, <8 x float> %x1, i8 %x2) {
5412 ; CHECK-LABEL: test_int_x86_avx512_mask_movshdup_256:
5413 ; CHECK: ## BB#0:
5414 ; CHECK-NEXT: movzbl %dil, %eax
5415 ; CHECK-NEXT: kmovw %eax, %k1
5416 ; CHECK-NEXT: vmovshdup %ymm0, %ymm1 {%k1}
5417 ; CHECK-NEXT: ## ymm1 = ymm0[1,1,3,3,5,5,7,7]
5418 ; CHECK-NEXT: vmovshdup %ymm0, %ymm2 {%k1} {z}
5419 ; CHECK-NEXT: ## ymm2 = ymm0[1,1,3,3,5,5,7,7]
5420 ; CHECK-NEXT: vmovshdup %ymm0, %ymm0
5421 ; CHECK-NEXT: ## ymm0 = ymm0[1,1,3,3,5,5,7,7]
5422 ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
5423 ; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0
5424 ; CHECK-NEXT: retq
5425 %res = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> %x1, i8 %x2)
5426 %res1 = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> %x1, i8 -1)
5427 %res2 = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> zeroinitializer, i8 %x2)
5428 %res3 = fadd <8 x float> %res, %res1
5429 %res4 = fadd <8 x float> %res2, %res3
5430 ret <8 x float> %res4
5431 }
5432
1829618296 // CHECK: encoding: [0xc5,0xf9,0x7e,0xaa,0xfc,0xfd,0xff,0xff]
1829718297 vmovd %xmm5, -516(%rdx)
1829818298
18299 // CHECK: vmovshdup %zmm27, %zmm16
18300 // CHECK: encoding: [0x62,0x81,0x7e,0x48,0x16,0xc3]
18301 vmovshdup %zmm27, %zmm16
18302
18303 // CHECK: vmovshdup %zmm27, %zmm16 {%k4}
18304 // CHECK: encoding: [0x62,0x81,0x7e,0x4c,0x16,0xc3]
18305 vmovshdup %zmm27, %zmm16 {%k4}
18306
18307 // CHECK: vmovshdup %zmm27, %zmm16 {%k4} {z}
18308 // CHECK: encoding: [0x62,0x81,0x7e,0xcc,0x16,0xc3]
18309 vmovshdup %zmm27, %zmm16 {%k4} {z}
18310
18311 // CHECK: vmovshdup (%rcx), %zmm16
18312 // CHECK: encoding: [0x62,0xe1,0x7e,0x48,0x16,0x01]
18313 vmovshdup (%rcx), %zmm16
18314
18315 // CHECK: vmovshdup 291(%rax,%r14,8), %zmm16
18316 // CHECK: encoding: [0x62,0xa1,0x7e,0x48,0x16,0x84,0xf0,0x23,0x01,0x00,0x00]
18317 vmovshdup 291(%rax,%r14,8), %zmm16
18318
18319 // CHECK: vmovshdup 8128(%rdx), %zmm16
18320 // CHECK: encoding: [0x62,0xe1,0x7e,0x48,0x16,0x42,0x7f]
18321 vmovshdup 8128(%rdx), %zmm16
18322
18323 // CHECK: vmovshdup 8192(%rdx), %zmm16
18324 // CHECK: encoding: [0x62,0xe1,0x7e,0x48,0x16,0x82,0x00,0x20,0x00,0x00]
18325 vmovshdup 8192(%rdx), %zmm16
18326
18327 // CHECK: vmovshdup -8192(%rdx), %zmm16
18328 // CHECK: encoding: [0x62,0xe1,0x7e,0x48,0x16,0x42,0x80]
18329 vmovshdup -8192(%rdx), %zmm16
18330
18331 // CHECK: vmovshdup -8256(%rdx), %zmm16
18332 // CHECK: encoding: [0x62,0xe1,0x7e,0x48,0x16,0x82,0xc0,0xdf,0xff,0xff]
18333 vmovshdup -8256(%rdx), %zmm16
18334
18335 // CHECK: vmovsldup %zmm14, %zmm13
18336 // CHECK: encoding: [0x62,0x51,0x7e,0x48,0x12,0xee]
18337 vmovsldup %zmm14, %zmm13
18338
18339 // CHECK: vmovsldup %zmm14, %zmm13 {%k6}
18340 // CHECK: encoding: [0x62,0x51,0x7e,0x4e,0x12,0xee]
18341 vmovsldup %zmm14, %zmm13 {%k6}
18342
18343 // CHECK: vmovsldup %zmm14, %zmm13 {%k6} {z}
18344 // CHECK: encoding: [0x62,0x51,0x7e,0xce,0x12,0xee]
18345 vmovsldup %zmm14, %zmm13 {%k6} {z}
18346
18347 // CHECK: vmovsldup (%rcx), %zmm13
18348 // CHECK: encoding: [0x62,0x71,0x7e,0x48,0x12,0x29]
18349 vmovsldup (%rcx), %zmm13
18350
18351 // CHECK: vmovsldup 291(%rax,%r14,8), %zmm13
18352 // CHECK: encoding: [0x62,0x31,0x7e,0x48,0x12,0xac,0xf0,0x23,0x01,0x00,0x00]
18353 vmovsldup 291(%rax,%r14,8), %zmm13
18354
18355 // CHECK: vmovsldup 8128(%rdx), %zmm13
18356 // CHECK: encoding: [0x62,0x71,0x7e,0x48,0x12,0x6a,0x7f]
18357 vmovsldup 8128(%rdx), %zmm13
18358
18359 // CHECK: vmovsldup 8192(%rdx), %zmm13
18360 // CHECK: encoding: [0x62,0x71,0x7e,0x48,0x12,0xaa,0x00,0x20,0x00,0x00]
18361 vmovsldup 8192(%rdx), %zmm13
18362
18363 // CHECK: vmovsldup -8192(%rdx), %zmm13
18364 // CHECK: encoding: [0x62,0x71,0x7e,0x48,0x12,0x6a,0x80]
18365 vmovsldup -8192(%rdx), %zmm13
18366
18367 // CHECK: vmovsldup -8256(%rdx), %zmm13
18368 // CHECK: encoding: [0x62,0x71,0x7e,0x48,0x12,0xaa,0xc0,0xdf,0xff,0xff]
18369 vmovsldup -8256(%rdx), %zmm13
18370
1829918371 // CHECK: vmovlps (%rcx), %xmm20, %xmm7
1830018372 // CHECK: encoding: [0x62,0xf1,0x5c,0x00,0x12,0x39]
1830118373 vmovlps (%rcx), %xmm20, %xmm7
2197721977 // CHECK: vcvtps2ph $123, %ymm30, -2064(%rdx)
2197821978 // CHECK: encoding: [0x62,0x63,0x7d,0x28,0x1d,0xb2,0xf0,0xf7,0xff,0xff,0x7b]
2197921979 vcvtps2ph $0x7b, %ymm30, -2064(%rdx)
21980
21981 // CHECK: vmovshdup %xmm18, %xmm23
21982 // CHECK: encoding: [0x62,0xa1,0x7e,0x08,0x16,0xfa]
21983 vmovshdup %xmm18, %xmm23
21984
21985 // CHECK: vmovshdup %xmm18, %xmm23 {%k2}
21986 // CHECK: encoding: [0x62,0xa1,0x7e,0x0a,0x16,0xfa]
21987 vmovshdup %xmm18, %xmm23 {%k2}
21988
21989 // CHECK: vmovshdup %xmm18, %xmm23 {%k2} {z}
21990 // CHECK: encoding: [0x62,0xa1,0x7e,0x8a,0x16,0xfa]
21991 vmovshdup %xmm18, %xmm23 {%k2} {z}
21992
21993 // CHECK: vmovshdup (%rcx), %xmm23
21994 // CHECK: encoding: [0x62,0xe1,0x7e,0x08,0x16,0x39]
21995 vmovshdup (%rcx), %xmm23
21996
21997 // CHECK: vmovshdup 291(%rax,%r14,8), %xmm23
21998 // CHECK: encoding: [0x62,0xa1,0x7e,0x08,0x16,0xbc,0xf0,0x23,0x01,0x00,0x00]
21999 vmovshdup 291(%rax,%r14,8), %xmm23
22000
22001 // CHECK: vmovshdup 2032(%rdx), %xmm23
22002 // CHECK: encoding: [0x62,0xe1,0x7e,0x08,0x16,0x7a,0x7f]
22003 vmovshdup 2032(%rdx), %xmm23
22004
22005 // CHECK: vmovshdup 2048(%rdx), %xmm23
22006 // CHECK: encoding: [0x62,0xe1,0x7e,0x08,0x16,0xba,0x00,0x08,0x00,0x00]
22007 vmovshdup 2048(%rdx), %xmm23
22008
22009 // CHECK: vmovshdup -2048(%rdx), %xmm23
22010 // CHECK: encoding: [0x62,0xe1,0x7e,0x08,0x16,0x7a,0x80]
22011 vmovshdup -2048(%rdx), %xmm23
22012
22013 // CHECK: vmovshdup -2064(%rdx), %xmm23
22014 // CHECK: encoding: [0x62,0xe1,0x7e,0x08,0x16,0xba,0xf0,0xf7,0xff,0xff]
22015 vmovshdup -2064(%rdx), %xmm23
22016
22017 // CHECK: vmovshdup %ymm24, %ymm18
22018 // CHECK: encoding: [0x62,0x81,0x7e,0x28,0x16,0xd0]
22019 vmovshdup %ymm24, %ymm18
22020
22021 // CHECK: vmovshdup %ymm24, %ymm18 {%k3}
22022 // CHECK: encoding: [0x62,0x81,0x7e,0x2b,0x16,0xd0]
22023 vmovshdup %ymm24, %ymm18 {%k3}
22024
22025 // CHECK: vmovshdup %ymm24, %ymm18 {%k3} {z}
22026 // CHECK: encoding: [0x62,0x81,0x7e,0xab,0x16,0xd0]
22027 vmovshdup %ymm24, %ymm18 {%k3} {z}
22028
22029 // CHECK: vmovshdup (%rcx), %ymm18
22030 // CHECK: encoding: [0x62,0xe1,0x7e,0x28,0x16,0x11]
22031 vmovshdup (%rcx), %ymm18
22032
22033 // CHECK: vmovshdup 291(%rax,%r14,8), %ymm18
22034 // CHECK: encoding: [0x62,0xa1,0x7e,0x28,0x16,0x94,0xf0,0x23,0x01,0x00,0x00]
22035 vmovshdup 291(%rax,%r14,8), %ymm18
22036
22037 // CHECK: vmovshdup 4064(%rdx), %ymm18
22038 // CHECK: encoding: [0x62,0xe1,0x7e,0x28,0x16,0x52,0x7f]
22039 vmovshdup 4064(%rdx), %ymm18
22040
22041 // CHECK: vmovshdup 4096(%rdx), %ymm18
22042 // CHECK: encoding: [0x62,0xe1,0x7e,0x28,0x16,0x92,0x00,0x10,0x00,0x00]
22043 vmovshdup 4096(%rdx), %ymm18
22044
22045 // CHECK: vmovshdup -4096(%rdx), %ymm18
22046 // CHECK: encoding: [0x62,0xe1,0x7e,0x28,0x16,0x52,0x80]
22047 vmovshdup -4096(%rdx), %ymm18
22048
22049 // CHECK: vmovshdup -4128(%rdx), %ymm18
22050 // CHECK: encoding: [0x62,0xe1,0x7e,0x28,0x16,0x92,0xe0,0xef,0xff,0xff]
22051 vmovshdup -4128(%rdx), %ymm18
22052
22053 // CHECK: vmovsldup %xmm21, %xmm25
22054 // CHECK: encoding: [0x62,0x21,0x7e,0x08,0x12,0xcd]
22055 vmovsldup %xmm21, %xmm25
22056
22057 // CHECK: vmovsldup %xmm21, %xmm25 {%k5}
22058 // CHECK: encoding: [0x62,0x21,0x7e,0x0d,0x12,0xcd]
22059 vmovsldup %xmm21, %xmm25 {%k5}
22060
22061 // CHECK: vmovsldup %xmm21, %xmm25 {%k5} {z}
22062 // CHECK: encoding: [0x62,0x21,0x7e,0x8d,0x12,0xcd]
22063 vmovsldup %xmm21, %xmm25 {%k5} {z}
22064
22065 // CHECK: vmovsldup (%rcx), %xmm25
22066 // CHECK: encoding: [0x62,0x61,0x7e,0x08,0x12,0x09]
22067 vmovsldup (%rcx), %xmm25
22068
22069 // CHECK: vmovsldup 291(%rax,%r14,8), %xmm25
22070 // CHECK: encoding: [0x62,0x21,0x7e,0x08,0x12,0x8c,0xf0,0x23,0x01,0x00,0x00]
22071 vmovsldup 291(%rax,%r14,8), %xmm25
22072
22073 // CHECK: vmovsldup 2032(%rdx), %xmm25
22074 // CHECK: encoding: [0x62,0x61,0x7e,0x08,0x12,0x4a,0x7f]
22075 vmovsldup 2032(%rdx), %xmm25
22076
22077 // CHECK: vmovsldup 2048(%rdx), %xmm25
22078 // CHECK: encoding: [0x62,0x61,0x7e,0x08,0x12,0x8a,0x00,0x08,0x00,0x00]
22079 vmovsldup 2048(%rdx), %xmm25
22080
22081 // CHECK: vmovsldup -2048(%rdx), %xmm25
22082 // CHECK: encoding: [0x62,0x61,0x7e,0x08,0x12,0x4a,0x80]
22083 vmovsldup -2048(%rdx), %xmm25
22084
22085 // CHECK: vmovsldup -2064(%rdx), %xmm25
22086 // CHECK: encoding: [0x62,0x61,0x7e,0x08,0x12,0x8a,0xf0,0xf7,0xff,0xff]
22087 vmovsldup -2064(%rdx), %xmm25
22088
22089 // CHECK: vmovsldup %ymm29, %ymm24
22090 // CHECK: encoding: [0x62,0x01,0x7e,0x28,0x12,0xc5]
22091 vmovsldup %ymm29, %ymm24
22092
22093 // CHECK: vmovsldup %ymm29, %ymm24 {%k5}
22094 // CHECK: encoding: [0x62,0x01,0x7e,0x2d,0x12,0xc5]
22095 vmovsldup %ymm29, %ymm24 {%k5}
22096
22097 // CHECK: vmovsldup %ymm29, %ymm24 {%k5} {z}
22098 // CHECK: encoding: [0x62,0x01,0x7e,0xad,0x12,0xc5]
22099 vmovsldup %ymm29, %ymm24 {%k5} {z}
22100
22101 // CHECK: vmovsldup (%rcx), %ymm24
22102 // CHECK: encoding: [0x62,0x61,0x7e,0x28,0x12,0x01]
22103 vmovsldup (%rcx), %ymm24
22104
22105 // CHECK: vmovsldup 291(%rax,%r14,8), %ymm24
22106 // CHECK: encoding: [0x62,0x21,0x7e,0x28,0x12,0x84,0xf0,0x23,0x01,0x00,0x00]
22107 vmovsldup 291(%rax,%r14,8), %ymm24
22108
22109 // CHECK: vmovsldup 4064(%rdx), %ymm24
22110 // CHECK: encoding: [0x62,0x61,0x7e,0x28,0x12,0x42,0x7f]
22111 vmovsldup 4064(%rdx), %ymm24
22112
22113 // CHECK: vmovsldup 4096(%rdx), %ymm24
22114 // CHECK: encoding: [0x62,0x61,0x7e,0x28,0x12,0x82,0x00,0x10,0x00,0x00]
22115 vmovsldup 4096(%rdx), %ymm24
22116
22117 // CHECK: vmovsldup -4096(%rdx), %ymm24
22118 // CHECK: encoding: [0x62,0x61,0x7e,0x28,0x12,0x42,0x80]
22119 vmovsldup -4096(%rdx), %ymm24
22120
22121 // CHECK: vmovsldup -4128(%rdx), %ymm24
22122 // CHECK: encoding: [0x62,0x61,0x7e,0x28,0x12,0x82,0xe0,0xef,0xff,0xff]
22123 vmovsldup -4128(%rdx), %ymm24
22124