llvm.org GIT mirror llvm / 40307c7
X86: add more GATHER intrinsics in LLVM Corrected type for index of llvm.x86.avx2.gather.d.pd.256 from 256-bit to 128-bit. Corrected types for src|dst|mask of llvm.x86.avx2.gather.q.ps.256 from 256-bit to 128-bit. Support the following intrinsics: llvm.x86.avx2.gather.d.q, llvm.x86.avx2.gather.q.q llvm.x86.avx2.gather.d.q.256, llvm.x86.avx2.gather.q.q.256 llvm.x86.avx2.gather.d.d, llvm.x86.avx2.gather.q.d llvm.x86.avx2.gather.d.d.256, llvm.x86.avx2.gather.q.d.256 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@159402 91177308-0d34-0410-b5e6-96231b3b80d8 Manman Ren 7 years ago
8 changed file(s) with 235 addition(s) and 43 deletion(s). Raw diff Collapse all Expand all
17511751 [IntrReadMem]>;
17521752 def int_x86_avx2_gather_d_pd_256 : GCCBuiltin<"__builtin_ia32_gatherd_pd256">,
17531753 Intrinsic<[llvm_v4f64_ty],
1754 [llvm_v4f64_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v4f64_ty, llvm_i8_ty],
1754 [llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4f64_ty, llvm_i8_ty],
17551755 [IntrReadMem]>;
17561756 def int_x86_avx2_gather_q_pd : GCCBuiltin<"__builtin_ia32_gatherq_pd">,
17571757 Intrinsic<[llvm_v2f64_ty],
17741774 [llvm_v4f32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v4f32_ty, llvm_i8_ty],
17751775 [IntrReadMem]>;
17761776 def int_x86_avx2_gather_q_ps_256 : GCCBuiltin<"__builtin_ia32_gatherq_ps256">,
1777 Intrinsic<[llvm_v8f32_ty],
1778 [llvm_v8f32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v8f32_ty, llvm_i8_ty],
1777 Intrinsic<[llvm_v4f32_ty],
1778 [llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4f32_ty, llvm_i8_ty],
1779 [IntrReadMem]>;
1780
1781 def int_x86_avx2_gather_d_q : GCCBuiltin<"__builtin_ia32_gatherd_q">,
1782 Intrinsic<[llvm_v2i64_ty],
1783 [llvm_v2i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v2i64_ty, llvm_i8_ty],
1784 [IntrReadMem]>;
1785 def int_x86_avx2_gather_d_q_256 : GCCBuiltin<"__builtin_ia32_gatherd_q256">,
1786 Intrinsic<[llvm_v4i64_ty],
1787 [llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i64_ty, llvm_i8_ty],
1788 [IntrReadMem]>;
1789 def int_x86_avx2_gather_q_q : GCCBuiltin<"__builtin_ia32_gatherq_q">,
1790 Intrinsic<[llvm_v2i64_ty],
1791 [llvm_v2i64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty],
1792 [IntrReadMem]>;
1793 def int_x86_avx2_gather_q_q_256 : GCCBuiltin<"__builtin_ia32_gatherq_q256">,
1794 Intrinsic<[llvm_v4i64_ty],
1795 [llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty],
1796 [IntrReadMem]>;
1797 def int_x86_avx2_gather_d_d : GCCBuiltin<"__builtin_ia32_gatherd_d">,
1798 Intrinsic<[llvm_v4i32_ty],
1799 [llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty],
1800 [IntrReadMem]>;
1801 def int_x86_avx2_gather_d_d_256 : GCCBuiltin<"__builtin_ia32_gatherd_d256">,
1802 Intrinsic<[llvm_v8i32_ty],
1803 [llvm_v8i32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty],
1804 [IntrReadMem]>;
1805 def int_x86_avx2_gather_q_d : GCCBuiltin<"__builtin_ia32_gatherq_d">,
1806 Intrinsic<[llvm_v4i32_ty],
1807 [llvm_v4i32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty],
1808 [IntrReadMem]>;
1809 def int_x86_avx2_gather_q_d_256 : GCCBuiltin<"__builtin_ia32_gatherq_d256">,
1810 Intrinsic<[llvm_v4i32_ty],
1811 [llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty],
17791812 [IntrReadMem]>;
17801813 }
17811814
505505 // We can tell whether it is VSIB or SIB after instruction ID is decoded,
506506 // but instruction ID may not be decoded yet when calling readSIB.
507507 uint32_t Opcode = mcInst.getOpcode();
508 bool IsGather = (Opcode == X86::VGATHERDPDrm ||
509 Opcode == X86::VGATHERQPDrm ||
510 Opcode == X86::VGATHERDPSrm ||
511 Opcode == X86::VGATHERQPSrm);
512 bool IsGatherY = (Opcode == X86::VGATHERDPDYrm ||
513 Opcode == X86::VGATHERQPDYrm ||
514 Opcode == X86::VGATHERDPSYrm ||
515 Opcode == X86::VGATHERQPSYrm);
516 if (IsGather || IsGatherY) {
508 bool IndexIs128 = (Opcode == X86::VGATHERDPDrm ||
509 Opcode == X86::VGATHERDPDYrm ||
510 Opcode == X86::VGATHERQPDrm ||
511 Opcode == X86::VGATHERDPSrm ||
512 Opcode == X86::VGATHERQPSrm ||
513 Opcode == X86::VPGATHERDQrm ||
514 Opcode == X86::VPGATHERDQYrm ||
515 Opcode == X86::VPGATHERQQrm ||
516 Opcode == X86::VPGATHERDDrm ||
517 Opcode == X86::VPGATHERQDrm);
518 bool IndexIs256 = (Opcode == X86::VGATHERQPDYrm ||
519 Opcode == X86::VGATHERDPSYrm ||
520 Opcode == X86::VGATHERQPSYrm ||
521 Opcode == X86::VPGATHERQQYrm ||
522 Opcode == X86::VPGATHERDDYrm ||
523 Opcode == X86::VPGATHERQDYrm);
524 if (IndexIs128 || IndexIs256) {
517525 unsigned IndexOffset = insn.sibIndex -
518526 (insn.addressSize == 8 ? SIB_INDEX_RAX:SIB_INDEX_EAX);
519 SIBIndex IndexBase = IsGatherY ? SIB_INDEX_YMM0 : SIB_INDEX_XMM0;
527 SIBIndex IndexBase = IndexIs256 ? SIB_INDEX_YMM0 : SIB_INDEX_XMM0;
520528 insn.sibIndex = (SIBIndex)(IndexBase +
521529 (insn.sibIndex == SIB_INDEX_NONE ? 4 : IndexOffset));
522530 }
20102010 return SelectGather(Node, X86::VGATHERQPSrm);
20112011 case Intrinsic::x86_avx2_gather_q_ps_256:
20122012 return SelectGather(Node, X86::VGATHERQPSYrm);
2013 case Intrinsic::x86_avx2_gather_d_q:
2014 return SelectGather(Node, X86::VPGATHERDQrm);
2015 case Intrinsic::x86_avx2_gather_d_q_256:
2016 return SelectGather(Node, X86::VPGATHERDQYrm);
2017 case Intrinsic::x86_avx2_gather_q_q:
2018 return SelectGather(Node, X86::VPGATHERQQrm);
2019 case Intrinsic::x86_avx2_gather_q_q_256:
2020 return SelectGather(Node, X86::VPGATHERQQYrm);
2021 case Intrinsic::x86_avx2_gather_d_d:
2022 return SelectGather(Node, X86::VPGATHERDDrm);
2023 case Intrinsic::x86_avx2_gather_d_d_256:
2024 return SelectGather(Node, X86::VPGATHERDDYrm);
2025 case Intrinsic::x86_avx2_gather_q_d:
2026 return SelectGather(Node, X86::VPGATHERQDrm);
2027 case Intrinsic::x86_avx2_gather_q_d_256:
2028 return SelectGather(Node, X86::VPGATHERQDYrm);
20132029 }
20142030 break;
20152031 }
324324 let ParserMatchClass = X86Mem128AsmOperand; }
325325 def f256mem : X86MemOperand<"printf256mem">{
326326 let ParserMatchClass = X86Mem256AsmOperand; }
327 def v128mem : Operand {
328 let PrintMethod = "printf128mem";
327 def v128mem : X86MemOperand<"printf128mem"> {
329328 let MIOperandInfo = (ops ptr_rc, i8imm, VR128, i32imm, i8imm);
330329 let ParserMatchClass = X86Mem128AsmOperand; }
331 def v256mem : Operand {
332 let PrintMethod = "printf256mem";
330 def v256mem : X86MemOperand<"printf256mem"> {
333331 let MIOperandInfo = (ops ptr_rc, i8imm, VR256, i32imm, i8imm);
334332 let ParserMatchClass = X86Mem256AsmOperand; }
335333 }
79967996
79977997 //===----------------------------------------------------------------------===//
79987998 // VGATHER - GATHER Operations
7999 //
8000 // [(set VR128:$dst, (IntGather128 VR128:$src1, addr:$src2, VR128:$idx,
8001 // VR128:$mask, (i8 imm:$sc)))]>, VEX_4VOp3;
8002 // [(set VR256:$dst, (IntGather256 VR256:$src1, addr:$src2, VR256:$idx,
8003 // VR256:$mask, (i8 imm:$sc)))]>, VEX_4VOp3;
80047999 multiclass avx2_gather opc, string OpcodeStr,
8000 RegisterClass RC256, X86MemOperand memop256,
80058001 Intrinsic IntGather128, Intrinsic IntGather256> {
80068002 def rm : AVX28I
80078003 (ins VR128:$src1, v128mem:$src2, VR128:$mask),
80088004 !strconcat(OpcodeStr,
80098005 "\t{$src1, $src2, $mask|$mask, $src2, $src1}"),
80108006 []>, VEX_4VOp3;
8011 def Yrm : AVX28I
8012 (ins VR256:$src1, v256mem:$src2, VR256:$mask),
8007 def Yrm : AVX28I),
8008 (ins RC256:$src1, memop256:$src2, RC256:$mask),
80138009 !strconcat(OpcodeStr,
80148010 "\t{$src1, $src2, $mask|$mask, $src2, $src1}"),
8015 []>, VEX_4VOp3;
8016 }
8017
8018 //let Constraints = "$src1 = $dst, $mask = $mask_wb" in {
8011 []>, VEX_4VOp3, VEX_L;
8012 }
8013
80198014 let Constraints = "$src1 = $dst" in {
80208015 defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd",
8016 VR256, v128mem,
80218017 int_x86_avx2_gather_d_pd,
80228018 int_x86_avx2_gather_d_pd_256>, VEX_W;
80238019 defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd",
8020 VR256, v256mem,
80248021 int_x86_avx2_gather_q_pd,
80258022 int_x86_avx2_gather_q_pd_256>, VEX_W;
80268023 defm VGATHERDPS : avx2_gather<0x92, "vgatherdps",
8024 VR256, v256mem,
80278025 int_x86_avx2_gather_d_ps,
80288026 int_x86_avx2_gather_d_ps_256>;
80298027 defm VGATHERQPS : avx2_gather<0x93, "vgatherqps",
8028 VR128, v256mem,
80308029 int_x86_avx2_gather_q_ps,
80318030 int_x86_avx2_gather_q_ps_256>;
8032 }
8031 defm VPGATHERDQ : avx2_gather<0x90, "vpgatherdq",
8032 VR256, v128mem,
8033 int_x86_avx2_gather_d_q,
8034 int_x86_avx2_gather_d_q_256>, VEX_W;
8035 defm VPGATHERQQ : avx2_gather<0x91, "vpgatherqq",
8036 VR256, v256mem,
8037 int_x86_avx2_gather_q_q,
8038 int_x86_avx2_gather_q_q_256>, VEX_W;
8039 defm VPGATHERDD : avx2_gather<0x90, "vpgatherdd",
8040 VR256, v256mem,
8041 int_x86_avx2_gather_d_d,
8042 int_x86_avx2_gather_d_d_256>;
8043 defm VPGATHERQD : avx2_gather<0x91, "vpgatherqd",
8044 VR128, v256mem,
8045 int_x86_avx2_gather_q_d,
8046 int_x86_avx2_gather_q_d_256>;
8047 }
987987 <4 x i32>, <2 x double>, i8) nounwind readonly
988988
989989 define <4 x double> @test_x86_avx2_gather_d_pd_256(<4 x double> %a0, i8* %a1,
990 <8 x i32> %idx, <4 x double> %mask) {
990 <4 x i32> %idx, <4 x double> %mask) {
991991 ; CHECK: vgatherdpd
992992 %res = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> %a0,
993 i8* %a1, <8 x i32> %idx, <4 x double> %mask, i8 2) ;
993 i8* %a1, <4 x i32> %idx, <4 x double> %mask, i8 2) ;
994994 ret <4 x double> %res
995995 }
996996 declare <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double>, i8*,
997 <8 x i32>, <4 x double>, i8) nounwind readonly
997 <4 x i32>, <4 x double>, i8) nounwind readonly
998998
999999 define <2 x double> @test_x86_avx2_gather_q_pd(<2 x double> %a0, i8* %a1,
10001000 <2 x i64> %idx, <2 x double> %mask) {
10461046 declare <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float>, i8*,
10471047 <2 x i64>, <4 x float>, i8) nounwind readonly
10481048
1049 define <8 x float> @test_x86_avx2_gather_q_ps_256(<8 x float> %a0, i8* %a1,
1050 <4 x i64> %idx, <8 x float> %mask) {
1049 define <4 x float> @test_x86_avx2_gather_q_ps_256(<4 x float> %a0, i8* %a1,
1050 <4 x i64> %idx, <4 x float> %mask) {
10511051 ; CHECK: vgatherqps
1052 %res = call <8 x float> @llvm.x86.avx2.gather.q.ps.256(<8 x float> %a0,
1053 i8* %a1, <4 x i64> %idx, <8 x float> %mask, i8 2) ;
1054 ret <8 x float> %res
1055 }
1056 declare <8 x float> @llvm.x86.avx2.gather.q.ps.256(<8 x float>, i8*,
1057 <4 x i64>, <8 x float>, i8) nounwind readonly
1052 %res = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> %a0,
1053 i8* %a1, <4 x i64> %idx, <4 x float> %mask, i8 2) ;
1054 ret <4 x float> %res
1055 }
1056 declare <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float>, i8*,
1057 <4 x i64>, <4 x float>, i8) nounwind readonly
1058
1059 define <2 x i64> @test_x86_avx2_gather_d_q(<2 x i64> %a0, i8* %a1,
1060 <4 x i32> %idx, <2 x i64> %mask) {
1061 ; CHECK: vpgatherdq
1062 %res = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> %a0,
1063 i8* %a1, <4 x i32> %idx, <2 x i64> %mask, i8 2) ;
1064 ret <2 x i64> %res
1065 }
1066 declare <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64>, i8*,
1067 <4 x i32>, <2 x i64>, i8) nounwind readonly
1068
1069 define <4 x i64> @test_x86_avx2_gather_d_q_256(<4 x i64> %a0, i8* %a1,
1070 <4 x i32> %idx, <4 x i64> %mask) {
1071 ; CHECK: vpgatherdq
1072 %res = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> %a0,
1073 i8* %a1, <4 x i32> %idx, <4 x i64> %mask, i8 2) ;
1074 ret <4 x i64> %res
1075 }
1076 declare <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64>, i8*,
1077 <4 x i32>, <4 x i64>, i8) nounwind readonly
1078
1079 define <2 x i64> @test_x86_avx2_gather_q_q(<2 x i64> %a0, i8* %a1,
1080 <2 x i64> %idx, <2 x i64> %mask) {
1081 ; CHECK: vpgatherqq
1082 %res = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> %a0,
1083 i8* %a1, <2 x i64> %idx, <2 x i64> %mask, i8 2) ;
1084 ret <2 x i64> %res
1085 }
1086 declare <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64>, i8*,
1087 <2 x i64>, <2 x i64>, i8) nounwind readonly
1088
1089 define <4 x i64> @test_x86_avx2_gather_q_q_256(<4 x i64> %a0, i8* %a1,
1090 <4 x i64> %idx, <4 x i64> %mask) {
1091 ; CHECK: vpgatherqq
1092 %res = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> %a0,
1093 i8* %a1, <4 x i64> %idx, <4 x i64> %mask, i8 2) ;
1094 ret <4 x i64> %res
1095 }
1096 declare <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64>, i8*,
1097 <4 x i64>, <4 x i64>, i8) nounwind readonly
1098
1099 define <4 x i32> @test_x86_avx2_gather_d_d(<4 x i32> %a0, i8* %a1,
1100 <4 x i32> %idx, <4 x i32> %mask) {
1101 ; CHECK: vpgatherdd
1102 %res = call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> %a0,
1103 i8* %a1, <4 x i32> %idx, <4 x i32> %mask, i8 2) ;
1104 ret <4 x i32> %res
1105 }
1106 declare <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32>, i8*,
1107 <4 x i32>, <4 x i32>, i8) nounwind readonly
1108
1109 define <8 x i32> @test_x86_avx2_gather_d_d_256(<8 x i32> %a0, i8* %a1,
1110 <8 x i32> %idx, <8 x i32> %mask) {
1111 ; CHECK: vpgatherdd
1112 %res = call <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32> %a0,
1113 i8* %a1, <8 x i32> %idx, <8 x i32> %mask, i8 2) ;
1114 ret <8 x i32> %res
1115 }
1116 declare <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32>, i8*,
1117 <8 x i32>, <8 x i32>, i8) nounwind readonly
1118
1119 define <4 x i32> @test_x86_avx2_gather_q_d(<4 x i32> %a0, i8* %a1,
1120 <2 x i64> %idx, <4 x i32> %mask) {
1121 ; CHECK: vpgatherqd
1122 %res = call <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32> %a0,
1123 i8* %a1, <2 x i64> %idx, <4 x i32> %mask, i8 2) ;
1124 ret <4 x i32> %res
1125 }
1126 declare <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32>, i8*,
1127 <2 x i64>, <4 x i32>, i8) nounwind readonly
1128
1129 define <4 x i32> @test_x86_avx2_gather_q_d_256(<4 x i32> %a0, i8* %a1,
1130 <4 x i64> %idx, <4 x i32> %mask) {
1131 ; CHECK: vpgatherqd
1132 %res = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> %a0,
1133 i8* %a1, <4 x i64> %idx, <4 x i32> %mask, i8 2) ;
1134 ret <4 x i32> %res
1135 }
1136 declare <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32>, i8*,
1137 <4 x i64>, <4 x i32>, i8) nounwind readonly
727727 # CHECK: vgatherdpd %xmm0, (%rdi,%xmm1,2), %xmm2
728728 0xc4 0xe2 0xe9 0x92 0x04 0x4f
729729
730 # CHECK: vgatherqps %ymm8, (%r15,%ymm9,2), %ymm10
730 # CHECK: vgatherdpd %ymm0, (%rdi,%xmm1,2), %ymm2
731 0xc4 0xe2 0xed 0x92 0x04 0x4f
732
733 # CHECK: vgatherqps %xmm8, (%r15,%xmm9,2), %xmm10
734 0xc4 0x02 0x29 0x93 0x04 0x4f
735
736 # CHECK: vgatherqps %xmm8, (%r15,%ymm9,2), %xmm10
731737 0xc4 0x02 0x2d 0x93 0x04 0x4f
738
739 # CHECK: vpgatherdq %xmm0, (%rdi,%xmm1,2), %xmm2
740 0xc4 0xe2 0xe9 0x90 0x04 0x4f
741
742 # CHECK: vpgatherdq %ymm0, (%rdi,%xmm1,2), %ymm2
743 0xc4 0xe2 0xed 0x90 0x04 0x4f
744
745 # CHECK: vpgatherqd %xmm8, (%r15,%xmm9,2), %xmm10
746 0xc4 0x02 0x29 0x91 0x04 0x4f
747
748 # CHECK: vpgatherqd %xmm8, (%r15,%ymm9,2), %xmm10
749 0xc4 0x02 0x2d 0x91 0x04 0x4f
732750
733751 # rdar://8812056 lldb doesn't print the x86 lock prefix when disassembling
734752 # CHECK: lock
41254125 // CHECK: encoding: [0xc4,0xe2,0xe9,0x92,0x04,0x4f]
41264126 vgatherdpd %xmm0, (%rdi,%xmm1,2), %xmm2
41274127
4128 // CHECK: vgatherqps %ymm8, (%r15,%ymm9,2), %ymm10
4128 // CHECK: vgatherdpd %ymm0, (%rdi,%xmm1,2), %ymm2
4129 // CHECK: encoding: [0xc4,0xe2,0xed,0x92,0x04,0x4f]
4130 vgatherdpd %ymm0, (%rdi,%xmm1,2), %ymm2
4131
4132 // CHECK: vgatherqps %xmm8, (%r15,%xmm9,2), %xmm10
4133 // CHECK: encoding: [0xc4,0x02,0x29,0x93,0x04,0x4f]
4134 vgatherqps %xmm8, (%r15,%xmm9,2), %xmm10
4135
4136 // CHECK: vgatherqps %xmm8, (%r15,%ymm9,2), %xmm10
41294137 // CHECK: encoding: [0xc4,0x02,0x2d,0x93,0x04,0x4f]
4130 vgatherqps %ymm8, (%r15,%ymm9,2), %ymm10
4138 vgatherqps %xmm8, (%r15,%ymm9,2), %xmm10
4139
4140 // CHECK: vpgatherdq %xmm0, (%rdi,%xmm1,2), %xmm2
4141 // CHECK: encoding: [0xc4,0xe2,0xe9,0x90,0x04,0x4f]
4142 vpgatherdq %xmm0, (%rdi,%xmm1,2), %xmm2
4143
4144 // CHECK: vpgatherdq %ymm0, (%rdi,%xmm1,2), %ymm2
4145 // CHECK: encoding: [0xc4,0xe2,0xed,0x90,0x04,0x4f]
4146 vpgatherdq %ymm0, (%rdi,%xmm1,2), %ymm2
4147
4148 // CHECK: vpgatherqd %xmm8, (%r15,%xmm9,2), %xmm10
4149 // CHECK: encoding: [0xc4,0x02,0x29,0x91,0x04,0x4f]
4150 vpgatherqd %xmm8, (%r15,%xmm9,2), %xmm10
4151
4152 // CHECK: vpgatherqd %xmm8, (%r15,%ymm9,2), %xmm10
4153 // CHECK: encoding: [0xc4,0x02,0x2d,0x91,0x04,0x4f]
4154 vpgatherqd %xmm8, (%r15,%ymm9,2), %xmm10