llvm.org GIT mirror llvm / 51f7c58
[X86][AVX512CD] add mask broadcast intrinsics Differential Revision: http://reviews.llvm.org/D14573 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@253450 91177308-0d34-0410-b5e6-96231b3b80d8 Asaf Badouh 3 years ago
10 changed file(s) with 112 addition(s) and 23 deletion(s). Raw diff Collapse all Expand all
49544954
49554955 def int_x86_avx512_pbroadcastq_i64_512 :
49564956 Intrinsic<[llvm_v8i64_ty], [llvm_i64_ty], [IntrNoMem]>;
4957 def int_x86_avx512_broadcastmw_512 :
4958 GCCBuiltin<"__builtin_ia32_broadcastmw512">,
4959 Intrinsic<[llvm_v16i32_ty], [llvm_i16_ty], [IntrNoMem]>;
4960 def int_x86_avx512_broadcastmw_256 :
4961 GCCBuiltin<"__builtin_ia32_broadcastmw256">,
4962 Intrinsic<[llvm_v8i32_ty], [llvm_i16_ty], [IntrNoMem]>;
4963 def int_x86_avx512_broadcastmw_128 :
4964 GCCBuiltin<"__builtin_ia32_broadcastmw128">,
4965 Intrinsic<[llvm_v4i32_ty], [llvm_i16_ty], [IntrNoMem]>;
4966 def int_x86_avx512_broadcastmb_512 :
4967 GCCBuiltin<"__builtin_ia32_broadcastmb512">,
4968 Intrinsic<[llvm_v8i64_ty], [llvm_i8_ty], [IntrNoMem]>;
4969 def int_x86_avx512_broadcastmb_256 :
4970 GCCBuiltin<"__builtin_ia32_broadcastmb256">,
4971 Intrinsic<[llvm_v4i64_ty], [llvm_i8_ty], [IntrNoMem]>;
4972 def int_x86_avx512_broadcastmb_128 :
4973 GCCBuiltin<"__builtin_ia32_broadcastmb128">,
4974 Intrinsic<[llvm_v2i64_ty], [llvm_i8_ty], [IntrNoMem]>;
49574975 }
49584976
49594977 // Vector sign and zero extend
832832 case TYPE_XMM256:
833833 case TYPE_XMM512:
834834 case TYPE_VK1:
835 case TYPE_VK2:
836 case TYPE_VK4:
835837 case TYPE_VK8:
836838 case TYPE_VK16:
839 case TYPE_VK32:
840 case TYPE_VK64:
837841 case TYPE_DEBUGREG:
838842 case TYPE_CONTROLREG:
839843 case TYPE_BNDR:
14871487 case TYPE_XMM: \
14881488 return prefix##_XMM0 + index; \
14891489 case TYPE_VK1: \
1490 case TYPE_VK2: \
1491 case TYPE_VK4: \
14901492 case TYPE_VK8: \
14911493 case TYPE_VK16: \
1494 case TYPE_VK32: \
1495 case TYPE_VK64: \
14921496 if (index > 7) \
14931497 *valid = 0; \
14941498 return prefix##_K0 + index; \
1646116461 return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
1646216462 DataToCompress),
1646316463 Mask, PassThru, Subtarget, DAG);
16464 }
16465 case BROADCASTM: {
16466 SDValue Mask = Op.getOperand(1);
16467 MVT MaskVT = MVT::getVectorVT(MVT::i1, Mask.getSimpleValueType().getSizeInBits());
16468 Mask = DAG.getBitcast(MaskVT, Mask);
16469 return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Mask);
1646416470 }
1646516471 case BLEND: {
1646616472 SDValue Mask = Op.getOperand(3);
2010720113 case X86ISD::UNPCKL: return "X86ISD::UNPCKL";
2010820114 case X86ISD::UNPCKH: return "X86ISD::UNPCKH";
2010920115 case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST";
20116 case X86ISD::VBROADCASTM: return "X86ISD::VBROADCASTM";
2011020117 case X86ISD::SUBV_BROADCAST: return "X86ISD::SUBV_BROADCAST";
2011120118 case X86ISD::VEXTRACT: return "X86ISD::VEXTRACT";
2011220119 case X86ISD::VPERMILPV: return "X86ISD::VPERMILPV";
402402 VFPCLASS,
403403 // Broadcast scalar to vector
404404 VBROADCAST,
405 // Broadcast mask to vector
406 VBROADCASTM,
405407 // Broadcast subvector to vector
406408 SUBV_BROADCAST,
407409 // Insert/Extract vector element
11121112 //===----------------------------------------------------------------------===//
11131113 // AVX-512 BROADCAST MASK TO VECTOR REGISTER
11141114 //---
1115
1116 multiclass avx512_mask_broadcast opc, string OpcodeStr,
1117 RegisterClass KRC> {
1118 let Predicates = [HasCDI] in
1119 def Zrr : AVX512XS8I,
1115 multiclass avx512_mask_broadcastm opc, string OpcodeStr,
1116 X86VectorVTInfo _, RegisterClass KRC> {
1117 def rr : AVX512XS8I
11201118 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1121 []>, EVEX, EVEX_V512;
1122
1123 let Predicates = [HasCDI, HasVLX] in {
1124 def Z128rr : AVX512XS8I
1125 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1126 []>, EVEX, EVEX_V128;
1127 def Z256rr : AVX512XS8I
1128 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1129 []>, EVEX, EVEX_V256;
1130 }
1131 }
1132
1133 let Predicates = [HasCDI] in {
1119 [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>, EVEX;
1120 }
1121
1122 multiclass avx512_mask_broadcast opc, string OpcodeStr,
1123 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
1124 let Predicates = [HasCDI] in
1125 defm Z : avx512_mask_broadcastm, EVEX_V512;
1126 let Predicates = [HasCDI, HasVLX] in {
1127 defm Z256 : avx512_mask_broadcastm, EVEX_V256;
1128 defm Z128 : avx512_mask_broadcastm, EVEX_V128;
1129 }
1130 }
1131
11341132 defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
1135 VK16>;
1133 avx512vl_i32_info, VK16>;
11361134 defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
1137 VK8>, VEX_W;
1138 }
1135 avx512vl_i64_info, VK8>, VEX_W;
11391136
11401137 //===----------------------------------------------------------------------===//
11411138 // -- VPERM2I - 3 source operands form --
294294 SDTCisInt<2>, SDTCisInt<3>]>;
295295
296296 def SDTVBroadcast : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
297 def SDTVBroadcastm : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>]>;
297 def SDTVBroadcastm : SDTypeProfile<1, 1, [SDTCisVec<0>,
298 SDTCisInt<0>, SDTCisInt<1>]>;
298299
299300 def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
300301 SDTCisSameAs<1,2>, SDTCisVT<3, i8>]>;
386387 SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>, []>;
387388
388389 def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>;
390 def X86VBroadcastm : SDNode<"X86ISD::VBROADCASTM", SDTVBroadcastm>;
389391 def X86Vinsert : SDNode<"X86ISD::VINSERT", SDTypeProfile<1, 3,
390392 [SDTCisSameAs<0, 1>, SDTCisPtrTy<3>]>, []>;
391393 def X86Vextract : SDNode<"X86ISD::VEXTRACT", SDTypeProfile<1, 2,
2929 COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM,
3030 TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
3131 EXPAND_FROM_MEM, BLEND, INSERT_SUBVEC,
32 TERLOG_OP_MASK, TERLOG_OP_MASKZ
32 TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM
3333 };
3434
3535 struct IntrinsicData {
314314 X86_INTRINSIC_DATA(avx2_psubus_b, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
315315 X86_INTRINSIC_DATA(avx2_psubus_w, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
316316 X86_INTRINSIC_DATA(avx2_vperm2i128, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
317 X86_INTRINSIC_DATA(avx512_broadcastmb_128, BROADCASTM, X86ISD::VBROADCASTM, 0),
318 X86_INTRINSIC_DATA(avx512_broadcastmb_256, BROADCASTM, X86ISD::VBROADCASTM, 0),
319 X86_INTRINSIC_DATA(avx512_broadcastmb_512, BROADCASTM, X86ISD::VBROADCASTM, 0),
320 X86_INTRINSIC_DATA(avx512_broadcastmw_128, BROADCASTM, X86ISD::VBROADCASTM, 0),
321 X86_INTRINSIC_DATA(avx512_broadcastmw_256, BROADCASTM, X86ISD::VBROADCASTM, 0),
322 X86_INTRINSIC_DATA(avx512_broadcastmw_512, BROADCASTM, X86ISD::VBROADCASTM, 0),
317323 X86_INTRINSIC_DATA(avx512_cvtsi2sd32, INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0),
318324 X86_INTRINSIC_DATA(avx512_cvtsi2sd64, INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0),
319325 X86_INTRINSIC_DATA(avx512_cvtsi2ss32, INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0),
0 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd | FileCheck %s
1
2 define <16 x i32> @test_x86_vbroadcastmw_512(i16 %a0) {
3 ; CHECK: test_x86_vbroadcastmw_512
4 ; CHECK: vpbroadcastmw2d %k0, %zmm0
5 %res = call <16 x i32> @llvm.x86.avx512.broadcastmw.512(i16 %a0) ;
6 ret <16 x i32> %res
7 }
8 declare <16 x i32> @llvm.x86.avx512.broadcastmw.512(i16)
9
10 define <8 x i64> @test_x86_broadcastmb_512(i8 %a0) {
11 ; CHECK: test_x86_broadcastmb_512
12 ; CHECK: vpbroadcastmb2q %k0, %zmm0
13 %res = call <8 x i64> @llvm.x86.avx512.broadcastmb.512(i8 %a0) ;
14 ret <8 x i64> %res
15 }
16 declare <8 x i64> @llvm.x86.avx512.broadcastmb.512(i8)
17
145145 ret <4 x i64> %res2
146146 }
147147
148 define <8 x i32> @test_x86_vbroadcastmw_256(i16 %a0) {
149 ; CHECK: test_x86_vbroadcastmw_256
150 ; CHECK: vpbroadcastmw2d %k0, %ymm0
151 %res = call <8 x i32> @llvm.x86.avx512.broadcastmw.256(i16 %a0) ;
152 ret <8 x i32> %res
153 }
154 declare <8 x i32> @llvm.x86.avx512.broadcastmw.256(i16)
155
156 define <4 x i32> @test_x86_vbroadcastmw_128(i16 %a0) {
157 ; CHECK: test_x86_vbroadcastmw_128
158 ; CHECK: vpbroadcastmw2d %k0, %xmm0
159 %res = call <4 x i32> @llvm.x86.avx512.broadcastmw.128(i16 %a0) ;
160 ret <4 x i32> %res
161 }
162 declare <4 x i32> @llvm.x86.avx512.broadcastmw.128(i16)
163
164 define <4 x i64> @test_x86_broadcastmb_256(i8 %a0) {
165 ; CHECK: test_x86_broadcastmb_256
166 ; CHECK: vpbroadcastmb2q %k0, %ymm0
167 %res = call <4 x i64> @llvm.x86.avx512.broadcastmb.256(i8 %a0) ;
168 ret <4 x i64> %res
169 }
170 declare <4 x i64> @llvm.x86.avx512.broadcastmb.256(i8)
171
172 define <2 x i64> @test_x86_broadcastmb_128(i8 %a0) {
173 ; CHECK: test_x86_broadcastmb_128
174 ; CHECK: vpbroadcastmb2q %k0, %xmm0
175 %res = call <2 x i64> @llvm.x86.avx512.broadcastmb.128(i8 %a0) ;
176 ret <2 x i64> %res
177 }
178 declare <2 x i64> @llvm.x86.avx512.broadcastmb.128(i8)