llvm.org GIT mirror llvm / cfa5724
[AVX512] Added intrinsics for VPCMPEQB and VPCMPEQW. Added new operand type for intrinsics (IIT_V64) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218668 91177308-0d34-0410-b5e6-96231b3b80d8 Robert Khasanov 5 years ago
6 changed file(s) with 96 addition(s) and 44 deletion(s). Raw diff Collapse all Expand all
32333233 [IntrNoMem]>;
32343234 }
32353235
3236 // Compares
3237 let TargetPrefix = "x86" in {
3238 // 512-bit
3239 def int_x86_avx512_mask_pcmpeq_b_512 : GCCBuiltin<"__builtin_ia32_pcmpeqb512_mask">,
3240 Intrinsic<[llvm_i64_ty], [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty],
3241 [IntrNoMem]>;
3242 def int_x86_avx512_mask_pcmpeq_w_512 : GCCBuiltin<"__builtin_ia32_pcmpeqw512_mask">,
3243 Intrinsic<[llvm_i32_ty], [llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty],
3244 [IntrNoMem]>;
3245 def int_x86_avx512_mask_pcmpeq_d_512 : GCCBuiltin<"__builtin_ia32_pcmpeqd512_mask">,
3246 Intrinsic<[llvm_i16_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty],
3247 [IntrNoMem]>;
3248 def int_x86_avx512_mask_pcmpeq_q_512 : GCCBuiltin<"__builtin_ia32_pcmpeqq512_mask">,
3249 Intrinsic<[llvm_i8_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty],
3250 [IntrNoMem]>;
3251 }
3252
32363253 // Misc.
32373254 let TargetPrefix = "x86" in {
32383255 def int_x86_avx512_mask_cmp_ps_512 : GCCBuiltin<"__builtin_ia32_cmpps512_mask">,
32413258 def int_x86_avx512_mask_cmp_pd_512 : GCCBuiltin<"__builtin_ia32_cmppd512_mask">,
32423259 Intrinsic<[llvm_i8_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i32_ty,
32433260 llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
3244
3245 def int_x86_avx512_mask_pcmpeq_d_512 : GCCBuiltin<"__builtin_ia32_pcmpeqd512_mask">,
3246 Intrinsic<[llvm_i16_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty],
3247 [IntrNoMem]>;
3248 def int_x86_avx512_mask_pcmpeq_q_512 : GCCBuiltin<"__builtin_ia32_pcmpeqq512_mask">,
3249 Intrinsic<[llvm_i8_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty],
3250 [IntrNoMem]>;
32513261 def int_x86_avx512_mask_pand_d_512 : GCCBuiltin<"__builtin_ia32_pandd512_mask">,
32523262 Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
32533263 llvm_v16i32_ty, llvm_i16_ty],
473473 ///
474474 /// NOTE: This must be kept in synch with the copy in TblGen/IntrinsicEmitter!
475475 enum IIT_Info {
476 // Common values should be encoded with 0-15.
476 // Common values should be encoded with 0-16.
477477 IIT_Done = 0,
478478 IIT_I1 = 1,
479479 IIT_I8 = 2,
488488 IIT_V8 = 11,
489489 IIT_V16 = 12,
490490 IIT_V32 = 13,
491 IIT_PTR = 14,
492 IIT_ARG = 15,
493
494 // Values from 16+ are only encodable with the inefficient encoding.
495 IIT_MMX = 16,
496 IIT_METADATA = 17,
497 IIT_EMPTYSTRUCT = 18,
498 IIT_STRUCT2 = 19,
499 IIT_STRUCT3 = 20,
500 IIT_STRUCT4 = 21,
501 IIT_STRUCT5 = 22,
502 IIT_EXTEND_ARG = 23,
503 IIT_TRUNC_ARG = 24,
504 IIT_ANYPTR = 25,
505 IIT_V1 = 26,
506 IIT_VARARG = 27,
507 IIT_HALF_VEC_ARG = 28
491 IIT_V64 = 14,
492 IIT_PTR = 15,
493 IIT_ARG = 16,
494
495 // Values from 17+ are only encodable with the inefficient encoding.
496 IIT_MMX = 17,
497 IIT_METADATA = 18,
498 IIT_EMPTYSTRUCT = 19,
499 IIT_STRUCT2 = 20,
500 IIT_STRUCT3 = 21,
501 IIT_STRUCT4 = 22,
502 IIT_STRUCT5 = 23,
503 IIT_EXTEND_ARG = 24,
504 IIT_TRUNC_ARG = 25,
505 IIT_ANYPTR = 26,
506 IIT_V1 = 27,
507 IIT_VARARG = 28,
508 IIT_HALF_VEC_ARG = 29
508509 };
509510
510511
573574 return;
574575 case IIT_V32:
575576 OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 32));
577 DecodeIITType(NextElt, Infos, OutputTable);
578 return;
579 case IIT_V64:
580 OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 64));
576581 DecodeIITType(NextElt, Infos, OutputTable);
577582 return;
578583 case IIT_PTR:
155155 X86_INTRINSIC_DATA(avx2_psubus_b, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
156156 X86_INTRINSIC_DATA(avx2_psubus_w, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
157157 X86_INTRINSIC_DATA(avx2_vperm2i128, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
158 X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_512, CMP_MASK, X86ISD::PCMPEQM, 0),
158159 X86_INTRINSIC_DATA(avx512_mask_pcmpeq_d_512, CMP_MASK, X86ISD::PCMPEQM, 0),
159160 X86_INTRINSIC_DATA(avx512_mask_pcmpeq_q_512, CMP_MASK, X86ISD::PCMPEQM, 0),
161 X86_INTRINSIC_DATA(avx512_mask_pcmpeq_w_512, CMP_MASK, X86ISD::PCMPEQM, 0),
160162 X86_INTRINSIC_DATA(avx_hadd_pd_256, INTR_TYPE_2OP, X86ISD::FHADD, 0),
161163 X86_INTRINSIC_DATA(avx_hadd_ps_256, INTR_TYPE_2OP, X86ISD::FHADD, 0),
162164 X86_INTRINSIC_DATA(avx_hsub_pd_256, INTR_TYPE_2OP, X86ISD::FHSUB, 0),
0 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw --show-mc-encoding| FileCheck %s
1
2 define i64 @test_pcmpeq_b(<64 x i8> %a, <64 x i8> %b) {
3 ; CHECK-LABEL: test_pcmpeq_b
4 ; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 ##
5 %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1)
6 ret i64 %res
7 }
8
9 define i64 @test_mask_pcmpeq_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
10 ; CHECK-LABEL: test_mask_pcmpeq_b
11 ; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} ##
12 %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask)
13 ret i64 %res
14 }
15
16 declare i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8>, <64 x i8>, i64)
17
18 define i32 @test_pcmpeq_w(<32 x i16> %a, <32 x i16> %b) {
19 ; CHECK-LABEL: test_pcmpeq_w
20 ; CHECK: vpcmpeqw %zmm1, %zmm0, %k0 ##
21 %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1)
22 ret i32 %res
23 }
24
25 define i32 @test_mask_pcmpeq_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
26 ; CHECK-LABEL: test_mask_pcmpeq_w
27 ; CHECK: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} ##
28 %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask)
29 ret i32 %res
30 }
31
32 declare i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16>, <32 x i16>, i32)
2525 def isVoid : ValueType<0, 56>; // Produces no value
2626 def llvm_vararg_ty : LLVMType; // this means vararg here
2727
28 // CHECK: /* 0 */ 0, 27, 0,
28 // CHECK: /* 0 */ 0, 28, 0,
2929 def int_foo : Intrinsic<"llvm.foo", [llvm_vararg_ty]>;
224224
225225 // NOTE: This must be kept in synch with the copy in lib/VMCore/Function.cpp!
226226 enum IIT_Info {
227 // Common values should be encoded with 0-15.
227 // Common values should be encoded with 0-16.
228228 IIT_Done = 0,
229229 IIT_I1 = 1,
230230 IIT_I8 = 2,
239239 IIT_V8 = 11,
240240 IIT_V16 = 12,
241241 IIT_V32 = 13,
242 IIT_PTR = 14,
243 IIT_ARG = 15,
244
245 // Values from 16+ are only encodable with the inefficient encoding.
246 IIT_MMX = 16,
247 IIT_METADATA = 17,
248 IIT_EMPTYSTRUCT = 18,
249 IIT_STRUCT2 = 19,
250 IIT_STRUCT3 = 20,
251 IIT_STRUCT4 = 21,
252 IIT_STRUCT5 = 22,
253 IIT_EXTEND_ARG = 23,
254 IIT_TRUNC_ARG = 24,
255 IIT_ANYPTR = 25,
256 IIT_V1 = 26,
257 IIT_VARARG = 27,
258 IIT_HALF_VEC_ARG = 28
242 IIT_V64 = 14,
243 IIT_PTR = 15,
244 IIT_ARG = 16,
245
246 // Values from 17+ are only encodable with the inefficient encoding.
247 IIT_MMX = 17,
248 IIT_METADATA = 18,
249 IIT_EMPTYSTRUCT = 19,
250 IIT_STRUCT2 = 20,
251 IIT_STRUCT3 = 21,
252 IIT_STRUCT4 = 22,
253 IIT_STRUCT5 = 23,
254 IIT_EXTEND_ARG = 24,
255 IIT_TRUNC_ARG = 25,
256 IIT_ANYPTR = 26,
257 IIT_V1 = 27,
258 IIT_VARARG = 28,
259 IIT_HALF_VEC_ARG = 29
259260 };
260261
261262
355356 case 8: Sig.push_back(IIT_V8); break;
356357 case 16: Sig.push_back(IIT_V16); break;
357358 case 32: Sig.push_back(IIT_V32); break;
359 case 64: Sig.push_back(IIT_V64); break;
358360 }
359361
360362 return EncodeFixedValueType(VVT.getVectorElementType().SimpleTy, Sig);