llvm.org GIT mirror llvm / e20dfeb
AVX512: Implemented encoding, intrinsics and DAG lowering for VMOVDDUP instructions. Differential Revision: http://reviews.llvm.org/D14702 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@253548 91177308-0d34-0410-b5e6-96231b3b80d8 Igor Breger 4 years ago
12 changed file(s) with 346 addition(s) and 79 deletion(s). Raw diff Collapse all Expand all
16161616 GCCBuiltin<"__builtin_ia32_movsldup512_mask">,
16171617 Intrinsic<[llvm_v16f32_ty],
16181618 [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty],
1619 [IntrNoMem]>;
1620
1621 def int_x86_avx512_mask_movddup_128 :
1622 GCCBuiltin<"__builtin_ia32_movddup128_mask">,
1623 Intrinsic<[llvm_v2f64_ty],
1624 [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
1625 [IntrNoMem]>;
1626
1627 def int_x86_avx512_mask_movddup_256 :
1628 GCCBuiltin<"__builtin_ia32_movddup256_mask">,
1629 Intrinsic<[llvm_v4f64_ty],
1630 [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty],
1631 [IntrNoMem]>;
1632
1633 def int_x86_avx512_mask_movddup_512 :
1634 GCCBuiltin<"__builtin_ia32_movddup512_mask">,
1635 Intrinsic<[llvm_v8f64_ty],
1636 [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty],
16191637 [IntrNoMem]>;
16201638 }
16211639
308308 CASE_MOVDUP(MOVSLDUP, r)
309309 Src1Name = getRegName(MI->getOperand(MI->getNumOperands() - 1).getReg());
310310 // FALL THROUGH.
311 CASE_MOVDUP(MOVSLDUP, m) {
312 MVT VT = getRegOperandVectorVT(MI, MVT::f32, 0);
313 DestName = getRegName(MI->getOperand(0).getReg());
314 DecodeMOVSLDUPMask(VT, ShuffleMask);
315 break;
316 }
311 CASE_MOVDUP(MOVSLDUP, m)
312 DestName = getRegName(MI->getOperand(0).getReg());
313 DecodeMOVSLDUPMask(getRegOperandVectorVT(MI, MVT::f32, 0), ShuffleMask);
314 break;
317315
318316 CASE_MOVDUP(MOVSHDUP, r)
319317 Src1Name = getRegName(MI->getOperand(MI->getNumOperands() - 1).getReg());
320318 // FALL THROUGH.
321 CASE_MOVDUP(MOVSHDUP, m) {
322 MVT VT = getRegOperandVectorVT(MI, MVT::f32, 0);
323 DestName = getRegName(MI->getOperand(0).getReg());
324 DecodeMOVSHDUPMask(VT, ShuffleMask);
325 break;
326 }
327
328 case X86::VMOVDDUPYrr:
329 Src1Name = getRegName(MI->getOperand(1).getReg());
330 // FALL THROUGH.
331 case X86::VMOVDDUPYrm:
332 DestName = getRegName(MI->getOperand(0).getReg());
333 DecodeMOVDDUPMask(MVT::v4f64, ShuffleMask);
334 break;
335
336 case X86::MOVDDUPrr:
337 case X86::VMOVDDUPrr:
338 Src1Name = getRegName(MI->getOperand(1).getReg());
339 // FALL THROUGH.
340 case X86::MOVDDUPrm:
341 case X86::VMOVDDUPrm:
342 DestName = getRegName(MI->getOperand(0).getReg());
343 DecodeMOVDDUPMask(MVT::v2f64, ShuffleMask);
319 CASE_MOVDUP(MOVSHDUP, m)
320 DestName = getRegName(MI->getOperand(0).getReg());
321 DecodeMOVSHDUPMask(getRegOperandVectorVT(MI, MVT::f32, 0), ShuffleMask);
322 break;
323
324 CASE_MOVDUP(MOVDDUP, r)
325 Src1Name = getRegName(MI->getOperand(MI->getNumOperands() - 1).getReg());
326 // FALL THROUGH.
327 CASE_MOVDUP(MOVDDUP, m)
328 DestName = getRegName(MI->getOperand(0).getReg());
329 DecodeMOVDDUPMask(getRegOperandVectorVT(MI, MVT::f64, 0), ShuffleMask);
344330 break;
345331
346332 case X86::PSLLDQri:
42254225 defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb>;
42264226
42274227 //===----------------------------------------------------------------------===//
4228 // AVX-512 - MOVDDUP
4229 //===----------------------------------------------------------------------===//
4230
4231 multiclass avx512_movddup
4232 X86MemOperand x86memop, PatFrag memop_frag> {
4233 def rr : AVX512PDI<0x12, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
4234 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4235 [(set RC:$dst, (VT (X86Movddup RC:$src)))]>, EVEX;
4236 def rm : AVX512PDI<0x12, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
4237 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4238 [(set RC:$dst,
4239 (VT (X86Movddup (memop_frag addr:$src))))]>, EVEX;
4240 }
4241
4242 defm VMOVDDUPZ : avx512_movddup<"vmovddup", VR512, v8f64, f512mem, loadv8f64>,
4243 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
4244 def : Pat<(X86Movddup (v8f64 (scalar_to_vector (loadf64 addr:$src)))),
4245 (VMOVDDUPZrm addr:$src)>;
4246
4247 //===----------------------------------------------------------------------===//
42484228 // Move Low to High and High to Low packed FP Instructions
42494229 //===----------------------------------------------------------------------===//
42504230 def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
71277107
71287108 defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup>;
71297109 defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup>;
7110
7111 //===----------------------------------------------------------------------===//
7112 // AVX-512 - MOVDDUP
7113 //===----------------------------------------------------------------------===//
7114
7115 multiclass avx512_movddup_128 opc, string OpcodeStr, SDNode OpNode,
7116 X86VectorVTInfo _> {
7117 defm rr : AVX512_maskable
7118 (ins _.RC:$src), OpcodeStr, "$src", "$src",
7119 (_.VT (OpNode (_.VT _.RC:$src)))>, EVEX;
7120 let mayLoad = 1 in
7121 defm rm : AVX512_maskable
7122 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
7123 (_.VT (OpNode (_.VT (scalar_to_vector
7124 (_.ScalarLdFrag addr:$src)))))>,
7125 EVEX, EVEX_CD8<_.EltSize, CD8VH>;
7126 }
7127
7128 multiclass avx512_movddup_common opc, string OpcodeStr, SDNode OpNode,
7129 AVX512VLVectorVTInfo VTInfo> {
7130
7131 defm Z : avx512_unary_rm, EVEX_V512;
7132
7133 let Predicates = [HasAVX512, HasVLX] in {
7134 defm Z256 : avx512_unary_rm,
7135 EVEX_V256;
7136 defm Z128 : avx512_movddup_128,
7137 EVEX_V128;
7138 }
7139 }
7140
7141 multiclass avx512_movddup opc, string OpcodeStr, SDNode OpNode>{
7142 defm NAME: avx512_movddup_common
7143 avx512vl_f64_info>, XD, VEX_W;
7144 let isCodeGenOnly = 1 in
7145 defm NAME#_I: avx512_movddup_common
7146 avx512vl_i64_info>;
7147 }
7148
7149 defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup>;
7150
7151 def : Pat<(X86Movddup (loadv2f64 addr:$src)),
7152 (VMOVDDUPZ128rm addr:$src)>, Requires<[HasAVX512, HasVLX]>;
7153 def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))),
7154 (VMOVDDUPZ128rm addr:$src)>, Requires<[HasAVX512, HasVLX]>;
7155
71307156 //===----------------------------------------------------------------------===//
71317157 // AVX-512 - Unpack Instructions
71327158 //===----------------------------------------------------------------------===//
52055205 def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
52065206 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
52075207 [(set VR256:$dst,
5208 (v4f64 (X86Movddup
5209 (scalar_to_vector (loadf64 addr:$src)))))]>,
5208 (v4f64 (X86Movddup (loadv4f64 addr:$src))))]>,
52105209 Sched<[WriteLoad]>;
52115210 }
52125211
5213 let Predicates = [HasAVX] in {
5212 let Predicates = [HasAVX, NoVLX] in {
52145213 defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX;
52155214 defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX, VEX_L;
52165215 }
52175216
52185217 defm MOVDDUP : sse3_replicate_dfp<"movddup">;
52195218
5220 let Predicates = [HasAVX] in {
5219
5220 let Predicates = [HasAVX, NoVLX] in {
52215221 def : Pat<(X86Movddup (loadv2f64 addr:$src)),
52225222 (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
5223
5224 // 256-bit version
5225 def : Pat<(X86Movddup (loadv4i64 addr:$src)),
5226 (VMOVDDUPYrm addr:$src)>;
5227 def : Pat<(X86Movddup (v4i64 VR256:$src)),
5228 (VMOVDDUPYrr VR256:$src)>;
5229 }
5230
5231 let Predicates = [HasAVX] in {
52235232 def : Pat<(X86Movddup (bc_v2f64 (loadv4f32 addr:$src))),
52245233 (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
52255234 def : Pat<(X86Movddup (bc_v2f64 (loadv2i64 addr:$src))),
52275236 def : Pat<(X86Movddup (bc_v2f64
52285237 (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
52295238 (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
5230
5231 // 256-bit version
5232 def : Pat<(X86Movddup (loadv4f64 addr:$src)),
5233 (VMOVDDUPYrm addr:$src)>;
5234 def : Pat<(X86Movddup (loadv4i64 addr:$src)),
5235 (VMOVDDUPYrm addr:$src)>;
5236 def : Pat<(X86Movddup (v4i64 (scalar_to_vector (loadi64 addr:$src)))),
5237 (VMOVDDUPYrm addr:$src)>;
5238 def : Pat<(X86Movddup (v4i64 VR256:$src)),
5239 (VMOVDDUPYrr VR256:$src)>;
52405239 }
52415240
52425241 let Predicates = [UseAVX, OptForSize] in {
797797 X86ISD::FMIN, X86ISD::FMIN_RND),
798798 X86_INTRINSIC_DATA(avx512_mask_min_ss_round, INTR_TYPE_SCALAR_MASK_RM,
799799 X86ISD::FMIN, X86ISD::FMIN_RND),
800 X86_INTRINSIC_DATA(avx512_mask_movddup_128, INTR_TYPE_1OP_MASK,
801 X86ISD::MOVDDUP, 0),
802 X86_INTRINSIC_DATA(avx512_mask_movddup_256, INTR_TYPE_1OP_MASK,
803 X86ISD::MOVDDUP, 0),
804 X86_INTRINSIC_DATA(avx512_mask_movddup_512, INTR_TYPE_1OP_MASK,
805 X86ISD::MOVDDUP, 0),
800806 X86_INTRINSIC_DATA(avx512_mask_movshdup_128, INTR_TYPE_1OP_MASK,
801807 X86ISD::MOVSHDUP, 0),
802808 X86_INTRINSIC_DATA(avx512_mask_movshdup_256, INTR_TYPE_1OP_MASK,
405405 ret void
406406 }
407407
408 define <2 x double> @test39(double* %ptr) nounwind {
409 %a = load double, double* %ptr
410 %v = insertelement <2 x double> undef, double %a, i32 0
411 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32>
412 ret <2 x double> %shuffle
413 }
414
415 define <2 x double> @test40(<2 x double>* %ptr) nounwind {
416 %v = load <2 x double>, <2 x double>* %ptr
417 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32>
418 ret <2 x double> %shuffle
419 }
420
421 define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) {
422 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32>
423 ret <2 x double> %shuffle
424 }
425
426 define <4 x double> @shuffle_v4f64_0022(<4 x double> %a, <4 x double> %b) {
427 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32>
428 ret <4 x double> %shuffle
429 }
430
47494749 ret <16 x float> %res4
47504750 }
47514751
4752 declare <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double>, <8 x double>, i8)
4753
4754 define <8 x double>@test_int_x86_avx512_mask_movddup_512(<8 x double> %x0, <8 x double> %x1, i8 %x2) {
4755 ; CHECK-LABEL: test_int_x86_avx512_mask_movddup_512:
4756 ; CHECK: ## BB#0:
4757 ; CHECK-NEXT: movzbl %dil, %eax
4758 ; CHECK-NEXT: kmovw %eax, %k1
4759 ; CHECK-NEXT: vmovddup %zmm0, %zmm1 {%k1}
4760 ; CHECK-NEXT: ## zmm1 = zmm0[0,0,2,2,4,4,6,6]
4761 ; CHECK-NEXT: vmovddup %zmm0, %zmm2 {%k1} {z}
4762 ; CHECK-NEXT: ## zmm2 = zmm0[0,0,2,2,4,4,6,6]
4763 ; CHECK-NEXT: vmovddup %zmm0, %zmm0
4764 ; CHECK-NEXT: ## zmm0 = zmm0[0,0,2,2,4,4,6,6]
4765 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
4766 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
4767 ; CHECK-NEXT: retq
4768 %res = call <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double> %x0, <8 x double> %x1, i8 %x2)
4769 %res1 = call <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double> %x0, <8 x double> %x1, i8 -1)
4770 %res2 = call <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double> %x0, <8 x double> zeroinitializer, i8 %x2)
4771 %res3 = fadd <8 x double> %res, %res1
4772 %res4 = fadd <8 x double> %res2, %res3
4773 ret <8 x double> %res4
4774 }
4775
54825482 %res4 = fadd <8 x float> %res2, %res3
54835483 ret <8 x float> %res4
54845484 }
5485
5485 declare <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double>, <2 x double>, i8)
5486
5487 define <2 x double>@test_int_x86_avx512_mask_movddup_128(<2 x double> %x0, <2 x double> %x1, i8 %x2) {
5488 ; CHECK-LABEL: test_int_x86_avx512_mask_movddup_128:
5489 ; CHECK: ## BB#0:
5490 ; CHECK-NEXT: movzbl %dil, %eax
5491 ; CHECK-NEXT: kmovw %eax, %k1
5492 ; CHECK-NEXT: vmovddup %xmm0, %xmm1 {%k1}
5493 ; CHECK-NEXT: ## xmm1 = xmm0[0,0]
5494 ; CHECK-NEXT: vmovddup %xmm0, %xmm2 {%k1} {z}
5495 ; CHECK-NEXT: ## xmm2 = xmm0[0,0]
5496 ; CHECK-NEXT: vmovddup %xmm0, %xmm0
5497 ; CHECK-NEXT: ## xmm0 = xmm0[0,0]
5498 ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
5499 ; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0
5500 ; CHECK-NEXT: retq
5501 %res = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> %x1, i8 %x2)
5502 %res1 = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> %x1, i8 -1)
5503 %res2 = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> zeroinitializer, i8 %x2)
5504 %res3 = fadd <2 x double> %res, %res1
5505 %res4 = fadd <2 x double> %res2, %res3
5506 ret <2 x double> %res4
5507 }
5508
5509 declare <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double>, <4 x double>, i8)
5510
5511 define <4 x double>@test_int_x86_avx512_mask_movddup_256(<4 x double> %x0, <4 x double> %x1, i8 %x2) {
5512 ; CHECK-LABEL: test_int_x86_avx512_mask_movddup_256:
5513 ; CHECK: ## BB#0:
5514 ; CHECK-NEXT: movzbl %dil, %eax
5515 ; CHECK-NEXT: kmovw %eax, %k1
5516 ; CHECK-NEXT: vmovddup %ymm0, %ymm1 {%k1}
5517 ; CHECK-NEXT: ## ymm1 = ymm0[0,0,2,2]
5518 ; CHECK-NEXT: vmovddup %ymm0, %ymm2 {%k1} {z}
5519 ; CHECK-NEXT: ## ymm2 = ymm0[0,0,2,2]
5520 ; CHECK-NEXT: vmovddup %ymm0, %ymm0
5521 ; CHECK-NEXT: ## ymm0 = ymm0[0,0,2,2]
5522 ; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0
5523 ; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0
5524 ; CHECK-NEXT: retq
5525 %res = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> %x1, i8 %x2)
5526 %res1 = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> %x1, i8 -1)
5527 %res2 = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> zeroinitializer, i8 %x2)
5528 %res3 = fadd <4 x double> %res, %res1
5529 %res4 = fadd <4 x double> %res2, %res3
5530 ret <4 x double> %res4
5531 }
13601360 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
13611361 ; SSE41-NEXT: retq
13621362 ;
1363 ; AVX1-LABEL: insert_dup_mem_v2f64:
1364 ; AVX1: # BB#0:
1365 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
1366 ; AVX1-NEXT: retq
1367 ;
1368 ; AVX2-LABEL: insert_dup_mem_v2f64:
1369 ; AVX2: # BB#0:
1370 ; AVX2-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
1371 ; AVX2-NEXT: retq
1372 ;
1373 ; AVX512VL-LABEL: insert_dup_mem_v2f64:
1374 ; AVX512VL: # BB#0:
1375 ; AVX512VL-NEXT: vmovsd (%rdi), %xmm0
1376 ; AVX512VL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
1377 ; AVX512VL-NEXT: retq
1363 ; AVX-LABEL: insert_dup_mem_v2f64:
1364 ; AVX: # BB#0:
1365 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
1366 ; AVX-NEXT: retq
13781367 %a = load double, double* %ptr
13791368 %v = insertelement <2 x double> undef, double %a, i32 0
13801369 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32>
13811370 ret <2 x double> %shuffle
13821371 }
1372
1373 define <2 x double> @insert_dup_mem128_v2f64(<2 x double>* %ptr) nounwind {
1374 ; SSE2-LABEL: insert_dup_mem128_v2f64:
1375 ; SSE2: # BB#0:
1376 ; SSE2-NEXT: movaps (%rdi), %xmm0
1377 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1378 ; SSE2-NEXT: retq
1379 ;
1380 ; SSE3-LABEL: insert_dup_mem128_v2f64:
1381 ; SSE3: # BB#0:
1382 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1383 ; SSE3-NEXT: retq
1384 ;
1385 ; SSSE3-LABEL: insert_dup_mem128_v2f64:
1386 ; SSSE3: # BB#0:
1387 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1388 ; SSSE3-NEXT: retq
1389 ;
1390 ; SSE41-LABEL: insert_dup_mem128_v2f64:
1391 ; SSE41: # BB#0:
1392 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1393 ; SSE41-NEXT: retq
1394 ;
1395 ; AVX-LABEL: insert_dup_mem128_v2f64:
1396 ; AVX: # BB#0:
1397 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
1398 ; AVX-NEXT: retq
1399 %v = load <2 x double>, <2 x double>* %ptr
1400 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32>
1401 ret <2 x double> %shuffle
1402 }
1403
13831404
13841405 define <2 x i64> @insert_dup_mem_v2i64(i64* %ptr) {
13851406 ; SSE-LABEL: insert_dup_mem_v2i64:
183183 ; ALL: # BB#0:
184184 ; ALL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
185185 ; ALL-NEXT: retq
186 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32>
187 ret <4 x double> %shuffle
188 }
189
190 define <4 x double> @shuffle_v4f64mem_0022(<4 x double>* %ptr, <4 x double> %b) {
191 ; ALL-LABEL: shuffle_v4f64mem_0022:
192 ; ALL: # BB#0:
193 ; ALL-NEXT: vmovddup {{.*#+}} ymm0 = mem[0,0,2,2]
194 ; ALL-NEXT: retq
195 %a = load <4 x double>, <4 x double>* %ptr
186196 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32>
187197 ret <4 x double> %shuffle
188198 }
1856018560 // CHECK: encoding: [0x62,0x61,0xfd,0x08,0x17,0x8a,0xf8,0xfb,0xff,0xff]
1856118561 vmovhpd %xmm25, -1032(%rdx)
1856218562
18563 // CHECK: vmovddup %zmm29, %zmm5
18564 // CHECK: encoding: [0x62,0x91,0xff,0x48,0x12,0xed]
18565 vmovddup %zmm29, %zmm5
18566
18567 // CHECK: vmovddup %zmm29, %zmm5 {%k4}
18568 // CHECK: encoding: [0x62,0x91,0xff,0x4c,0x12,0xed]
18569 vmovddup %zmm29, %zmm5 {%k4}
18570
18571 // CHECK: vmovddup %zmm29, %zmm5 {%k4} {z}
18572 // CHECK: encoding: [0x62,0x91,0xff,0xcc,0x12,0xed]
18573 vmovddup %zmm29, %zmm5 {%k4} {z}
18574
18575 // CHECK: vmovddup (%rcx), %zmm5
18576 // CHECK: encoding: [0x62,0xf1,0xff,0x48,0x12,0x29]
18577 vmovddup (%rcx), %zmm5
18578
18579 // CHECK: vmovddup 291(%rax,%r14,8), %zmm5
18580 // CHECK: encoding: [0x62,0xb1,0xff,0x48,0x12,0xac,0xf0,0x23,0x01,0x00,0x00]
18581 vmovddup 291(%rax,%r14,8), %zmm5
18582
18583 // CHECK: vmovddup 8128(%rdx), %zmm5
18584 // CHECK: encoding: [0x62,0xf1,0xff,0x48,0x12,0x6a,0x7f]
18585 vmovddup 8128(%rdx), %zmm5
18586
18587 // CHECK: vmovddup 8192(%rdx), %zmm5
18588 // CHECK: encoding: [0x62,0xf1,0xff,0x48,0x12,0xaa,0x00,0x20,0x00,0x00]
18589 vmovddup 8192(%rdx), %zmm5
18590
18591 // CHECK: vmovddup -8192(%rdx), %zmm5
18592 // CHECK: encoding: [0x62,0xf1,0xff,0x48,0x12,0x6a,0x80]
18593 vmovddup -8192(%rdx), %zmm5
18594
18595 // CHECK: vmovddup -8256(%rdx), %zmm5
18596 // CHECK: encoding: [0x62,0xf1,0xff,0x48,0x12,0xaa,0xc0,0xdf,0xff,0xff]
18597 vmovddup -8256(%rdx), %zmm5
18598
1856318599 // CHECK: vmovsd.s %xmm15, %xmm22, %xmm21
1856418600 // CHECK: encoding: [0x62,0x31,0xcf,0x00,0x11,0xfd]
1856518601 vmovsd.s %xmm15, %xmm22, %xmm21
2212222122 // CHECK: encoding: [0x62,0x61,0x7e,0x28,0x12,0x82,0xe0,0xef,0xff,0xff]
2212322123 vmovsldup -4128(%rdx), %ymm24
2212422124
22125 // CHECK: vmovddup %xmm23, %xmm17
22126 // CHECK: encoding: [0x62,0xa1,0xff,0x08,0x12,0xcf]
22127 vmovddup %xmm23, %xmm17
22128
22129 // CHECK: vmovddup %xmm23, %xmm17 {%k6}
22130 // CHECK: encoding: [0x62,0xa1,0xff,0x0e,0x12,0xcf]
22131 vmovddup %xmm23, %xmm17 {%k6}
22132
22133 // CHECK: vmovddup %xmm23, %xmm17 {%k6} {z}
22134 // CHECK: encoding: [0x62,0xa1,0xff,0x8e,0x12,0xcf]
22135 vmovddup %xmm23, %xmm17 {%k6} {z}
22136
22137 // CHECK: vmovddup (%rcx), %xmm17
22138 // CHECK: encoding: [0x62,0xe1,0xff,0x08,0x12,0x09]
22139 vmovddup (%rcx), %xmm17
22140
22141 // CHECK: vmovddup 291(%rax,%r14,8), %xmm17
22142 // CHECK: encoding: [0x62,0xa1,0xff,0x08,0x12,0x8c,0xf0,0x23,0x01,0x00,0x00]
22143 vmovddup 291(%rax,%r14,8), %xmm17
22144
22145 // CHECK: vmovddup 1016(%rdx), %xmm17
22146 // CHECK: encoding: [0x62,0xe1,0xff,0x08,0x12,0x4a,0x7f]
22147 vmovddup 1016(%rdx), %xmm17
22148
22149 // CHECK: vmovddup 1024(%rdx), %xmm17
22150 // CHECK: encoding: [0x62,0xe1,0xff,0x08,0x12,0x8a,0x00,0x04,0x00,0x00]
22151 vmovddup 1024(%rdx), %xmm17
22152
22153 // CHECK: vmovddup -1024(%rdx), %xmm17
22154 // CHECK: encoding: [0x62,0xe1,0xff,0x08,0x12,0x4a,0x80]
22155 vmovddup -1024(%rdx), %xmm17
22156
22157 // CHECK: vmovddup -1032(%rdx), %xmm17
22158 // CHECK: encoding: [0x62,0xe1,0xff,0x08,0x12,0x8a,0xf8,0xfb,0xff,0xff]
22159 vmovddup -1032(%rdx), %xmm17
22160
22161 // CHECK: vmovddup %ymm25, %ymm18
22162 // CHECK: encoding: [0x62,0x81,0xff,0x28,0x12,0xd1]
22163 vmovddup %ymm25, %ymm18
22164
22165 // CHECK: vmovddup %ymm25, %ymm18 {%k4}
22166 // CHECK: encoding: [0x62,0x81,0xff,0x2c,0x12,0xd1]
22167 vmovddup %ymm25, %ymm18 {%k4}
22168
22169 // CHECK: vmovddup %ymm25, %ymm18 {%k4} {z}
22170 // CHECK: encoding: [0x62,0x81,0xff,0xac,0x12,0xd1]
22171 vmovddup %ymm25, %ymm18 {%k4} {z}
22172
22173 // CHECK: vmovddup (%rcx), %ymm18
22174 // CHECK: encoding: [0x62,0xe1,0xff,0x28,0x12,0x11]
22175 vmovddup (%rcx), %ymm18
22176
22177 // CHECK: vmovddup 291(%rax,%r14,8), %ymm18
22178 // CHECK: encoding: [0x62,0xa1,0xff,0x28,0x12,0x94,0xf0,0x23,0x01,0x00,0x00]
22179 vmovddup 291(%rax,%r14,8), %ymm18
22180
22181 // CHECK: vmovddup 4064(%rdx), %ymm18
22182 // CHECK: encoding: [0x62,0xe1,0xff,0x28,0x12,0x52,0x7f]
22183 vmovddup 4064(%rdx), %ymm18
22184
22185 // CHECK: vmovddup 4096(%rdx), %ymm18
22186 // CHECK: encoding: [0x62,0xe1,0xff,0x28,0x12,0x92,0x00,0x10,0x00,0x00]
22187 vmovddup 4096(%rdx), %ymm18
22188
22189 // CHECK: vmovddup -4096(%rdx), %ymm18
22190 // CHECK: encoding: [0x62,0xe1,0xff,0x28,0x12,0x52,0x80]
22191 vmovddup -4096(%rdx), %ymm18
22192
22193 // CHECK: vmovddup -4128(%rdx), %ymm18
22194 // CHECK: encoding: [0x62,0xe1,0xff,0x28,0x12,0x92,0xe0,0xef,0xff,0xff]
22195 vmovddup -4128(%rdx), %ymm18
22196
2212522197 // CHECK: vmovapd.s %xmm27, %xmm26
2212622198 // CHECK: encoding: [0x62,0x01,0xfd,0x08,0x29,0xda]
2212722199 vmovapd.s %xmm27, %xmm26