llvm.org GIT mirror llvm / 9c5eb83
[X86] Remove GCCBuiltins from 512-bit cvt(u)qqtops, cvt(u)qqtopd, and cvt(u)dqtops intrinsics. Add new variadic uitofp/sitofp with rounding mode intrinsics. Summary: See clang patch D56998 for a full description. Reviewers: RKSimon, spatel Reviewed By: RKSimon Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D56999 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@352266 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 1 year, 4 months ago
9 changed file(s) with 293 addition(s) and 126 deletion(s). Raw diff Collapse all Expand all
26872687
26882688 // Vector convert
26892689 let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
2690 def int_x86_avx512_mask_cvtdq2ps_512 :
2691 GCCBuiltin<"__builtin_ia32_cvtdq2ps512_mask">,
2692 Intrinsic<[llvm_v16f32_ty],
2693 [llvm_v16i32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty],
2690 def int_x86_avx512_sitofp_round :
2691 Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty, llvm_i32_ty],
2692 [IntrNoMem]>;
2693
2694 def int_x86_avx512_uitofp_round :
2695 Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty, llvm_i32_ty],
26942696 [IntrNoMem]>;
26952697
26962698 def int_x86_avx512_mask_cvtpd2dq_128 :
28612863 [llvm_v8f32_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty],
28622864 [IntrNoMem]>;
28632865
2864 def int_x86_avx512_mask_cvtqq2pd_512 :
2865 GCCBuiltin<"__builtin_ia32_cvtqq2pd512_mask">,
2866 Intrinsic<[llvm_v8f64_ty],
2867 [llvm_v8i64_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty],
2868 [IntrNoMem]>;
2869
28702866 def int_x86_avx512_mask_cvtqq2ps_128 :
28712867 GCCBuiltin<"__builtin_ia32_cvtqq2ps128_mask">,
28722868 Intrinsic<[llvm_v4f32_ty],
28732869 [llvm_v2i64_ty, llvm_v4f32_ty, llvm_i8_ty],
28742870 [IntrNoMem]>;
28752871
2876 def int_x86_avx512_mask_cvtqq2ps_256 :
2877 GCCBuiltin<"__builtin_ia32_cvtqq2ps256_mask">,
2878 Intrinsic<[llvm_v4f32_ty],
2879 [llvm_v4i64_ty, llvm_v4f32_ty, llvm_i8_ty],
2880 [IntrNoMem]>;
2881
2882 def int_x86_avx512_mask_cvtqq2ps_512 :
2883 GCCBuiltin<"__builtin_ia32_cvtqq2ps512_mask">,
2884 Intrinsic<[llvm_v8f32_ty],
2885 [llvm_v8i64_ty, llvm_v8f32_ty, llvm_i8_ty, llvm_i32_ty],
2886 [IntrNoMem]>;
2887
28882872 def int_x86_avx512_mask_cvttpd2dq_128 :
28892873 GCCBuiltin<"__builtin_ia32_cvttpd2dq128_mask">,
28902874 Intrinsic<[llvm_v4i32_ty],
30112995 [llvm_v8f32_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty],
30122996 [IntrNoMem]>;
30132997
3014 def int_x86_avx512_mask_cvtudq2ps_512 :
3015 GCCBuiltin<"__builtin_ia32_cvtudq2ps512_mask">,
3016 Intrinsic<[llvm_v16f32_ty],
3017 [llvm_v16i32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty],
3018 [IntrNoMem]>;
3019
3020 def int_x86_avx512_mask_cvtuqq2pd_512 :
3021 GCCBuiltin<"__builtin_ia32_cvtuqq2pd512_mask">,
3022 Intrinsic<[llvm_v8f64_ty],
3023 [llvm_v8i64_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty],
3024 [IntrNoMem]>;
3025
30262998 def int_x86_avx512_mask_cvtuqq2ps_128 :
30272999 GCCBuiltin<"__builtin_ia32_cvtuqq2ps128_mask">,
30283000 Intrinsic<[llvm_v4f32_ty],
30293001 [llvm_v2i64_ty, llvm_v4f32_ty, llvm_i8_ty],
3030 [IntrNoMem]>;
3031
3032 def int_x86_avx512_mask_cvtuqq2ps_256 :
3033 GCCBuiltin<"__builtin_ia32_cvtuqq2ps256_mask">,
3034 Intrinsic<[llvm_v4f32_ty],
3035 [llvm_v4i64_ty, llvm_v4f32_ty, llvm_i8_ty],
3036 [IntrNoMem]>;
3037
3038 def int_x86_avx512_mask_cvtuqq2ps_512 :
3039 GCCBuiltin<"__builtin_ia32_cvtuqq2ps512_mask">,
3040 Intrinsic<[llvm_v8f32_ty],
3041 [llvm_v8i64_ty, llvm_v8f32_ty, llvm_i8_ty, llvm_i32_ty],
30423002 [IntrNoMem]>;
30433003
30443004 def int_x86_avx512_mask_rndscale_pd_128 : GCCBuiltin<"__builtin_ia32_rndscalepd_128_mask">,
197197 Name.startswith("avx512.mask.pmull.") || // Added in 4.0
198198 Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
199199 Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
200 Name == "avx512.mask.cvtudq2ps.128" || // Added in 7.0
201 Name == "avx512.mask.cvtudq2ps.256" || // Added in 7.0
202 Name == "avx512.mask.cvtqq2pd.128" || // Added in 7.0
203 Name == "avx512.mask.cvtqq2pd.256" || // Added in 7.0
204 Name == "avx512.mask.cvtuqq2pd.128" || // Added in 7.0
205 Name == "avx512.mask.cvtuqq2pd.256" || // Added in 7.0
206 Name == "avx512.mask.cvtdq2ps.128" || // Added in 7.0
207 Name == "avx512.mask.cvtdq2ps.256" || // Added in 7.0
200 Name.startswith("avx512.mask.cvtudq2ps.") || // Added in 7.0 updated 9.0
201 Name.startswith("avx512.mask.cvtqq2pd.") || // Added in 7.0 updated 9.0
202 Name.startswith("avx512.mask.cvtuqq2pd.") || // Added in 7.0 updated 9.0
203 Name.startswith("avx512.mask.cvtdq2ps.") || // Added in 7.0 updated 9.0
204 Name == "avx512.mask.cvtqq2ps.256" || // Added in 9.0
205 Name == "avx512.mask.cvtqq2ps.512" || // Added in 9.0
206 Name == "avx512.mask.cvtuqq2ps.256" || // Added in 9.0
207 Name == "avx512.mask.cvtuqq2ps.512" || // Added in 9.0
208208 Name == "avx512.mask.cvtpd2dq.256" || // Added in 7.0
209209 Name == "avx512.mask.cvtpd2ps.256" || // Added in 7.0
210210 Name == "avx512.mask.cvttpd2dq.256" || // Added in 7.0
19571957 Name == "avx.cvtdq2.ps.256" ||
19581958 Name.startswith("avx512.mask.cvtdq2pd.") ||
19591959 Name.startswith("avx512.mask.cvtudq2pd.") ||
1960 Name == "avx512.mask.cvtdq2ps.128" ||
1961 Name == "avx512.mask.cvtdq2ps.256" ||
1962 Name == "avx512.mask.cvtudq2ps.128" ||
1963 Name == "avx512.mask.cvtudq2ps.256" ||
1964 Name == "avx512.mask.cvtqq2pd.128" ||
1965 Name == "avx512.mask.cvtqq2pd.256" ||
1966 Name == "avx512.mask.cvtuqq2pd.128" ||
1967 Name == "avx512.mask.cvtuqq2pd.256" ||
1960 Name.startswith("avx512.mask.cvtdq2ps.") ||
1961 Name.startswith("avx512.mask.cvtudq2ps.") ||
1962 Name.startswith("avx512.mask.cvtqq2pd.") ||
1963 Name.startswith("avx512.mask.cvtuqq2pd.") ||
1964 Name == "avx512.mask.cvtqq2ps.256" ||
1965 Name == "avx512.mask.cvtqq2ps.512" ||
1966 Name == "avx512.mask.cvtuqq2ps.256" ||
1967 Name == "avx512.mask.cvtuqq2ps.512" ||
19681968 Name == "sse2.cvtps2pd" ||
19691969 Name == "avx.cvt.ps2.pd.256" ||
19701970 Name == "avx512.mask.cvtps2pd.128" ||
19711971 Name == "avx512.mask.cvtps2pd.256")) {
19721972 Type *DstTy = CI->getType();
19731973 Rep = CI->getArgOperand(0);
1974 Type *SrcTy = Rep->getType();
19741975
19751976 unsigned NumDstElts = DstTy->getVectorNumElements();
1976 if (NumDstElts < Rep->getType()->getVectorNumElements()) {
1977 if (NumDstElts < SrcTy->getVectorNumElements()) {
19771978 assert(NumDstElts == 2 && "Unexpected vector size");
19781979 uint32_t ShuffleMask[2] = { 0, 1 };
19791980 Rep = Builder.CreateShuffleVector(Rep, Rep, ShuffleMask);
19801981 }
19811982
1982 bool IsPS2PD = (StringRef::npos != Name.find("ps2"));
1983 bool IsPS2PD = SrcTy->getVectorElementType()->isFloatTy();
19831984 bool IsUnsigned = (StringRef::npos != Name.find("cvtu"));
19841985 if (IsPS2PD)
19851986 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
1986 else if (IsUnsigned)
1987 Rep = Builder.CreateUIToFP(Rep, DstTy, "cvt");
1988 else
1989 Rep = Builder.CreateSIToFP(Rep, DstTy, "cvt");
1990
1991 if (CI->getNumArgOperands() == 3)
1987 else if (CI->getNumArgOperands() == 4 &&
1988 (!isa(CI->getArgOperand(3)) ||
1989 cast(CI->getArgOperand(3))->getZExtValue() != 4)) {
1990 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
1991 : Intrinsic::x86_avx512_sitofp_round;
1992 Function *F = Intrinsic::getDeclaration(CI->getModule(), IID,
1993 { DstTy, SrcTy });
1994 Rep = Builder.CreateCall(F, { Rep, CI->getArgOperand(3) });
1995 } else {
1996 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
1997 : Builder.CreateSIToFP(Rep, DstTy, "cvt");
1998 }
1999
2000 if (CI->getNumArgOperands() >= 3)
19922001 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
19932002 CI->getArgOperand(1));
19942003 } else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) {
512512 X86ISD::CONFLICT, 0),
513513 X86_INTRINSIC_DATA(avx512_mask_conflict_q_512, INTR_TYPE_1OP_MASK,
514514 X86ISD::CONFLICT, 0),
515 X86_INTRINSIC_DATA(avx512_mask_cvtdq2ps_512, INTR_TYPE_1OP_MASK,
516 ISD::SINT_TO_FP, X86ISD::SINT_TO_FP_RND), //er
517515 X86_INTRINSIC_DATA(avx512_mask_cvtpd2dq_128, CVTPD2DQ_MASK,
518516 X86ISD::CVTP2SI, X86ISD::MCVTP2SI),
519517 X86_INTRINSIC_DATA(avx512_mask_cvtpd2dq_512, INTR_TYPE_1OP_MASK,
566564 X86ISD::CVTP2UI, 0),
567565 X86_INTRINSIC_DATA(avx512_mask_cvtps2uqq_512, INTR_TYPE_1OP_MASK,
568566 X86ISD::CVTP2UI, X86ISD::CVTP2UI_RND),
569 X86_INTRINSIC_DATA(avx512_mask_cvtqq2pd_512, INTR_TYPE_1OP_MASK,
570 ISD::SINT_TO_FP, X86ISD::SINT_TO_FP_RND),
571567 X86_INTRINSIC_DATA(avx512_mask_cvtqq2ps_128, CVTQQ2PS_MASK,
572568 X86ISD::CVTSI2P, X86ISD::MCVTSI2P),
573 X86_INTRINSIC_DATA(avx512_mask_cvtqq2ps_256, INTR_TYPE_1OP_MASK,
574 ISD::SINT_TO_FP, 0),
575 X86_INTRINSIC_DATA(avx512_mask_cvtqq2ps_512, INTR_TYPE_1OP_MASK,
576 ISD::SINT_TO_FP, X86ISD::SINT_TO_FP_RND),
577569 X86_INTRINSIC_DATA(avx512_mask_cvtsd2ss_round, INTR_TYPE_SCALAR_MASK_RM,
578570 X86ISD::VFPROUNDS_RND, 0),
579571 X86_INTRINSIC_DATA(avx512_mask_cvtss2sd_round, INTR_TYPE_SCALAR_MASK_RM,
620612 X86ISD::CVTTP2UI, 0),
621613 X86_INTRINSIC_DATA(avx512_mask_cvttps2uqq_512, INTR_TYPE_1OP_MASK,
622614 X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_RND),
623 X86_INTRINSIC_DATA(avx512_mask_cvtudq2ps_512, INTR_TYPE_1OP_MASK,
624 ISD::UINT_TO_FP, X86ISD::UINT_TO_FP_RND),
625 X86_INTRINSIC_DATA(avx512_mask_cvtuqq2pd_512, INTR_TYPE_1OP_MASK,
626 ISD::UINT_TO_FP, X86ISD::UINT_TO_FP_RND),
627615 X86_INTRINSIC_DATA(avx512_mask_cvtuqq2ps_128, CVTQQ2PS_MASK,
628616 X86ISD::CVTUI2P, X86ISD::MCVTUI2P),
629 X86_INTRINSIC_DATA(avx512_mask_cvtuqq2ps_256, INTR_TYPE_1OP_MASK,
630 ISD::UINT_TO_FP, 0),
631 X86_INTRINSIC_DATA(avx512_mask_cvtuqq2ps_512, INTR_TYPE_1OP_MASK,
632 ISD::UINT_TO_FP, X86ISD::UINT_TO_FP_RND),
633617 X86_INTRINSIC_DATA(avx512_mask_div_sd_round, INTR_TYPE_SCALAR_MASK_RM,
634618 X86ISD::FDIVS_RND, 0),
635619 X86_INTRINSIC_DATA(avx512_mask_div_ss_round, INTR_TYPE_SCALAR_MASK_RM,
1005989 X86_INTRINSIC_DATA(avx512_rsqrt28_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0),
1006990 X86_INTRINSIC_DATA(avx512_rsqrt28_sd, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RSQRT28S, 0),
1007991 X86_INTRINSIC_DATA(avx512_rsqrt28_ss, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RSQRT28S, 0),
992 X86_INTRINSIC_DATA(avx512_sitofp_round, INTR_TYPE_1OP, ISD::SINT_TO_FP, X86ISD::SINT_TO_FP_RND),
1008993 X86_INTRINSIC_DATA(avx512_sqrt_pd_512, INTR_TYPE_1OP, ISD::FSQRT, X86ISD::FSQRT_RND),
1009994 X86_INTRINSIC_DATA(avx512_sqrt_ps_512, INTR_TYPE_1OP, ISD::FSQRT, X86ISD::FSQRT_RND),
1010995 X86_INTRINSIC_DATA(avx512_sub_pd_512, INTR_TYPE_2OP, ISD::FSUB, X86ISD::FSUB_RND),
1011996 X86_INTRINSIC_DATA(avx512_sub_ps_512, INTR_TYPE_2OP, ISD::FSUB, X86ISD::FSUB_RND),
997 X86_INTRINSIC_DATA(avx512_uitofp_round, INTR_TYPE_1OP, ISD::UINT_TO_FP, X86ISD::UINT_TO_FP_RND),
1012998 X86_INTRINSIC_DATA(avx512_vcomi_sd, COMI_RM, X86ISD::COMI, X86ISD::UCOMI),
1013999 X86_INTRINSIC_DATA(avx512_vcomi_ss, COMI_RM, X86ISD::COMI, X86ISD::UCOMI),
10141000 X86_INTRINSIC_DATA(avx512_vcvtsd2si32, INTR_TYPE_1OP, X86ISD::CVTS2SI, X86ISD::CVTS2SI_RND),
97339733 %res4 = add <8 x i32> %res3, %res2
97349734 ret <8 x i32> %res4
97359735 }
9736
9737 declare <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32>, <16 x float>, i16, i32)
9738
9739 define <16 x float> @test_int_x86_avx512_mask_cvt_dq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) {
9740 ; X86-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_512:
9741 ; X86: ## %bb.0:
9742 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9743 ; X86-NEXT: vcvtdq2ps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5b,0xc8]
9744 ; X86-NEXT: vcvtdq2ps {rn-sae}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x5b,0xc0]
9745 ; X86-NEXT: vaddps %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0]
9746 ; X86-NEXT: retl ## encoding: [0xc3]
9747 ;
9748 ; X64-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_512:
9749 ; X64: ## %bb.0:
9750 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9751 ; X64-NEXT: vcvtdq2ps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5b,0xc8]
9752 ; X64-NEXT: vcvtdq2ps {rn-sae}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x5b,0xc0]
9753 ; X64-NEXT: vaddps %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0]
9754 ; X64-NEXT: retq ## encoding: [0xc3]
9755 %res = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 %x2, i32 4)
9756 %res1 = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 -1, i32 0)
9757 %res2 = fadd <16 x float> %res, %res1
9758 ret <16 x float> %res2
9759 }
9760
9761 declare <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32>, <16 x float>, i16, i32)
9762
9763 define <16 x float> @test_int_x86_avx512_mask_cvt_udq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) {
9764 ; X86-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_512:
9765 ; X86: ## %bb.0:
9766 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9767 ; X86-NEXT: vcvtudq2ps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7f,0x49,0x7a,0xc8]
9768 ; X86-NEXT: vcvtudq2ps {rn-sae}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7f,0x18,0x7a,0xc0]
9769 ; X86-NEXT: vaddps %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0]
9770 ; X86-NEXT: retl ## encoding: [0xc3]
9771 ;
9772 ; X64-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_512:
9773 ; X64: ## %bb.0:
9774 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9775 ; X64-NEXT: vcvtudq2ps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7f,0x49,0x7a,0xc8]
9776 ; X64-NEXT: vcvtudq2ps {rn-sae}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7f,0x18,0x7a,0xc0]
9777 ; X64-NEXT: vaddps %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0]
9778 ; X64-NEXT: retq ## encoding: [0xc3]
9779 %res = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 %x2, i32 4)
9780 %res1 = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 -1, i32 0)
9781 %res2 = fadd <16 x float> %res, %res1
9782 ret <16 x float> %res2
9783 }
31323132 ret void
31333133 }
31343134
3135 declare <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32>, <16 x float>, i16, i32)
3135 declare <16 x float> @llvm.x86.avx512.sitofp.round.v16f32.v16i32(<16 x i32>, i32)
31363136
31373137 define <16 x float>@test_int_x86_avx512_mask_cvt_dq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) {
31383138 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_512:
31423142 ; CHECK-NEXT: vcvtdq2ps {rn-sae}, %zmm0, %zmm0
31433143 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
31443144 ; CHECK-NEXT: retq
3145 %res = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 %x2, i32 4)
3146 %res1 = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 -1, i32 0)
3147 %res2 = fadd <16 x float> %res, %res1
3145 %cvt = sitofp <16 x i32> %x0 to <16 x float>
3146 %1 = bitcast i16 %x2 to <16 x i1>
3147 %2 = select <16 x i1> %1, <16 x float> %cvt, <16 x float> %x1
3148 %3 = call <16 x float> @llvm.x86.avx512.sitofp.round.v16f32.v16i32(<16 x i32> %x0, i32 0)
3149 %res2 = fadd <16 x float> %2, %3
31483150 ret <16 x float> %res2
31493151 }
31503152
32603262 ret <8 x i32> %res2
32613263 }
32623264
3263 declare <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32>, <16 x float>, i16, i32)
3265 declare <16 x float> @llvm.x86.avx512.uitofp.round.v16f32.v16i32(<16 x i32>, i32)
32643266
32653267 define <16 x float>@test_int_x86_avx512_mask_cvt_udq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) {
32663268 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_512:
32703272 ; CHECK-NEXT: vcvtudq2ps {rn-sae}, %zmm0, %zmm0
32713273 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
32723274 ; CHECK-NEXT: retq
3273 %res = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 %x2, i32 4)
3274 %res1 = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 -1, i32 0)
3275 %res2 = fadd <16 x float> %res, %res1
3275 %cvt = uitofp <16 x i32> %x0 to <16 x float>
3276 %1 = bitcast i16 %x2 to <16 x i1>
3277 %2 = select <16 x i1> %1, <16 x float> %cvt, <16 x float> %x1
3278 %3 = call <16 x float> @llvm.x86.avx512.uitofp.round.v16f32.v16i32(<16 x i32> %x0, i32 0)
3279 %res2 = fadd <16 x float> %2, %3
32763280 ret <16 x float> %res2
32773281 }
32783282
566566 %res1 = call i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 2, i16 %res)
567567 ret i16 %res1
568568 }
569
570 declare <8 x double> @llvm.x86.avx512.mask.cvtqq2pd.512(<8 x i64>, <8 x double>, i8, i32)
571
572 define <8 x double>@test_int_x86_avx512_mask_cvt_qq2pd_512(<8 x i64> %x0, <8 x double> %x1, i8 %x2) {
573 ; X86-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_512:
574 ; X86: # %bb.0:
575 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
576 ; X86-NEXT: vcvtqq2pd %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x49,0xe6,0xc8]
577 ; X86-NEXT: vcvtqq2pd {rn-sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfe,0x18,0xe6,0xc0]
578 ; X86-NEXT: vaddpd %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0]
579 ; X86-NEXT: retl # encoding: [0xc3]
580 ;
581 ; X64-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_512:
582 ; X64: # %bb.0:
583 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
584 ; X64-NEXT: vcvtqq2pd %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x49,0xe6,0xc8]
585 ; X64-NEXT: vcvtqq2pd {rn-sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfe,0x18,0xe6,0xc0]
586 ; X64-NEXT: vaddpd %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0]
587 ; X64-NEXT: retq # encoding: [0xc3]
588 %res = call <8 x double> @llvm.x86.avx512.mask.cvtqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 %x2, i32 4)
589 %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 -1, i32 0)
590 %res2 = fadd <8 x double> %res, %res1
591 ret <8 x double> %res2
592 }
593
594 declare <8 x float> @llvm.x86.avx512.mask.cvtqq2ps.512(<8 x i64>, <8 x float>, i8, i32)
595
596 define <8 x float> @test_int_x86_avx512_mask_cvt_qq2ps_512(<8 x i64> %x0, <8 x float> %x1, i8 %x2) {
597 ; X86-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_512:
598 ; X86: # %bb.0:
599 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
600 ; X86-NEXT: vcvtqq2ps %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x49,0x5b,0xc8]
601 ; X86-NEXT: vcvtqq2ps {rn-sae}, %zmm0, %ymm0 # encoding: [0x62,0xf1,0xfc,0x18,0x5b,0xc0]
602 ; X86-NEXT: vaddps %ymm0, %ymm1, %ymm0 # encoding: [0xc5,0xf4,0x58,0xc0]
603 ; X86-NEXT: retl # encoding: [0xc3]
604 ;
605 ; X64-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_512:
606 ; X64: # %bb.0:
607 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
608 ; X64-NEXT: vcvtqq2ps %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x49,0x5b,0xc8]
609 ; X64-NEXT: vcvtqq2ps {rn-sae}, %zmm0, %ymm0 # encoding: [0x62,0xf1,0xfc,0x18,0x5b,0xc0]
610 ; X64-NEXT: vaddps %ymm0, %ymm1, %ymm0 # encoding: [0xc5,0xf4,0x58,0xc0]
611 ; X64-NEXT: retq # encoding: [0xc3]
612 %res = call <8 x float> @llvm.x86.avx512.mask.cvtqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 %x2, i32 4)
613 %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 -1, i32 0)
614 %res2 = fadd <8 x float> %res, %res1
615 ret <8 x float> %res2
616 }
617
618 declare <8 x double> @llvm.x86.avx512.mask.cvtuqq2pd.512(<8 x i64>, <8 x double>, i8, i32)
619
620 define <8 x double> @test_int_x86_avx512_mask_cvt_uqq2pd_512(<8 x i64> %x0, <8 x double> %x1, i8 %x2) {
621 ; X86-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_512:
622 ; X86: # %bb.0:
623 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
624 ; X86-NEXT: vcvtuqq2pd %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x49,0x7a,0xc8]
625 ; X86-NEXT: vcvtuqq2pd {rn-sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfe,0x18,0x7a,0xc0]
626 ; X86-NEXT: vaddpd %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0]
627 ; X86-NEXT: retl # encoding: [0xc3]
628 ;
629 ; X64-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_512:
630 ; X64: # %bb.0:
631 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
632 ; X64-NEXT: vcvtuqq2pd %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x49,0x7a,0xc8]
633 ; X64-NEXT: vcvtuqq2pd {rn-sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfe,0x18,0x7a,0xc0]
634 ; X64-NEXT: vaddpd %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0]
635 ; X64-NEXT: retq # encoding: [0xc3]
636 %res = call <8 x double> @llvm.x86.avx512.mask.cvtuqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 %x2, i32 4)
637 %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtuqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 -1, i32 0)
638 %res2 = fadd <8 x double> %res, %res1
639 ret <8 x double> %res2
640 }
641
642 declare <8 x float> @llvm.x86.avx512.mask.cvtuqq2ps.512(<8 x i64>, <8 x float>, i8, i32)
643
644 define <8 x float> @test_int_x86_avx512_mask_cvt_uqq2ps_512(<8 x i64> %x0, <8 x float> %x1, i8 %x2) {
645 ; X86-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_512:
646 ; X86: # %bb.0:
647 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
648 ; X86-NEXT: vcvtuqq2ps %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x7a,0xc8]
649 ; X86-NEXT: vcvtuqq2ps {rn-sae}, %zmm0, %ymm0 # encoding: [0x62,0xf1,0xff,0x18,0x7a,0xc0]
650 ; X86-NEXT: vaddps %ymm0, %ymm1, %ymm0 # encoding: [0xc5,0xf4,0x58,0xc0]
651 ; X86-NEXT: retl # encoding: [0xc3]
652 ;
653 ; X64-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_512:
654 ; X64: # %bb.0:
655 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
656 ; X64-NEXT: vcvtuqq2ps %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x7a,0xc8]
657 ; X64-NEXT: vcvtuqq2ps {rn-sae}, %zmm0, %ymm0 # encoding: [0x62,0xf1,0xff,0x18,0x7a,0xc0]
658 ; X64-NEXT: vaddps %ymm0, %ymm1, %ymm0 # encoding: [0xc5,0xf4,0x58,0xc0]
659 ; X64-NEXT: retq # encoding: [0xc3]
660 %res = call <8 x float> @llvm.x86.avx512.mask.cvtuqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 %x2, i32 4)
661 %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtuqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 -1, i32 0)
662 %res2 = fadd <8 x float> %res, %res1
663 ret <8 x float> %res2
664 }
211211 ret <8 x i64> %res2
212212 }
213213
214 declare <8 x double> @llvm.x86.avx512.mask.cvtqq2pd.512(<8 x i64>, <8 x double>, i8, i32)
214 declare <8 x double> @llvm.x86.avx512.sitofp.round.v8f64.v8i64(<8 x i64>, i32)
215215
216216 define <8 x double>@test_int_x86_avx512_mask_cvt_qq2pd_512(<8 x i64> %x0, <8 x double> %x1, i8 %x2) {
217217 ; X86-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_512:
229229 ; X64-NEXT: vcvtqq2pd {rn-sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfe,0x18,0xe6,0xc0]
230230 ; X64-NEXT: vaddpd %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0]
231231 ; X64-NEXT: retq # encoding: [0xc3]
232 %res = call <8 x double> @llvm.x86.avx512.mask.cvtqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 %x2, i32 4)
233 %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 -1, i32 0)
234 %res2 = fadd <8 x double> %res, %res1
232 %cvt = sitofp <8 x i64> %x0 to <8 x double>
233 %1 = bitcast i8 %x2 to <8 x i1>
234 %2 = select <8 x i1> %1, <8 x double> %cvt, <8 x double> %x1
235 %3 = call <8 x double> @llvm.x86.avx512.sitofp.round.v8f64.v8i64(<8 x i64> %x0, i32 0)
236 %res2 = fadd <8 x double> %2, %3
235237 ret <8 x double> %res2
236238 }
237239
238 declare <8 x float> @llvm.x86.avx512.mask.cvtqq2ps.512(<8 x i64>, <8 x float>, i8, i32)
240 declare <8 x float> @llvm.x86.avx512.sitofp.round.v8f32.v8i64(<8 x i64>, i32)
239241
240242 define <8 x float>@test_int_x86_avx512_mask_cvt_qq2ps_512(<8 x i64> %x0, <8 x float> %x1, i8 %x2) {
241243 ; X86-AVX512DQ-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_512:
269271 ; X64-AVX512DQVL-NEXT: vcvtqq2ps {rn-sae}, %zmm0, %ymm0 # encoding: [0x62,0xf1,0xfc,0x18,0x5b,0xc0]
270272 ; X64-AVX512DQVL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
271273 ; X64-AVX512DQVL-NEXT: retq # encoding: [0xc3]
272 %res = call <8 x float> @llvm.x86.avx512.mask.cvtqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 %x2, i32 4)
273 %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 -1, i32 0)
274 %res2 = fadd <8 x float> %res, %res1
274 %cvt = sitofp <8 x i64> %x0 to <8 x float>
275 %1 = bitcast i8 %x2 to <8 x i1>
276 %2 = select <8 x i1> %1, <8 x float> %cvt, <8 x float> %x1
277 %3 = call <8 x float> @llvm.x86.avx512.sitofp.round.v8f32.v8i64(<8 x i64> %x0, i32 0)
278 %res2 = fadd <8 x float> %2, %3
275279 ret <8 x float> %res2
276280 }
277281
371375 ret <8 x i64> %res2
372376 }
373377
374 declare <8 x double> @llvm.x86.avx512.mask.cvtuqq2pd.512(<8 x i64>, <8 x double>, i8, i32)
378 declare <8 x double> @llvm.x86.avx512.uitofp.round.v8f64.v8i64(<8 x i64>, i32)
375379
376380 define <8 x double>@test_int_x86_avx512_mask_cvt_uqq2pd_512(<8 x i64> %x0, <8 x double> %x1, i8 %x2) {
377381 ; X86-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_512:
389393 ; X64-NEXT: vcvtuqq2pd {rn-sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfe,0x18,0x7a,0xc0]
390394 ; X64-NEXT: vaddpd %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0]
391395 ; X64-NEXT: retq # encoding: [0xc3]
392 %res = call <8 x double> @llvm.x86.avx512.mask.cvtuqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 %x2, i32 4)
393 %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtuqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 -1, i32 0)
394 %res2 = fadd <8 x double> %res, %res1
396 %cvt = uitofp <8 x i64> %x0 to <8 x double>
397 %1 = bitcast i8 %x2 to <8 x i1>
398 %2 = select <8 x i1> %1, <8 x double> %cvt, <8 x double> %x1
399 %3 = call <8 x double> @llvm.x86.avx512.uitofp.round.v8f64.v8i64(<8 x i64> %x0, i32 0)
400 %res2 = fadd <8 x double> %2, %3
395401 ret <8 x double> %res2
396402 }
397403
398 declare <8 x float> @llvm.x86.avx512.mask.cvtuqq2ps.512(<8 x i64>, <8 x float>, i8, i32)
404 declare <8 x float> @llvm.x86.avx512.uitofp.round.v8f32.v8i64(<8 x i64>, i32)
399405
400406 define <8 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_512(<8 x i64> %x0, <8 x float> %x1, i8 %x2) {
401407 ; X86-AVX512DQ-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_512:
429435 ; X64-AVX512DQVL-NEXT: vcvtuqq2ps {rn-sae}, %zmm0, %ymm0 # encoding: [0x62,0xf1,0xff,0x18,0x7a,0xc0]
430436 ; X64-AVX512DQVL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
431437 ; X64-AVX512DQVL-NEXT: retq # encoding: [0xc3]
432 %res = call <8 x float> @llvm.x86.avx512.mask.cvtuqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 %x2, i32 4)
433 %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtuqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 -1, i32 0)
434 %res2 = fadd <8 x float> %res, %res1
438 %cvt = uitofp <8 x i64> %x0 to <8 x float>
439 %1 = bitcast i8 %x2 to <8 x i1>
440 %2 = select <8 x i1> %1, <8 x float> %cvt, <8 x float> %x1
441 %3 = call <8 x float> @llvm.x86.avx512.uitofp.round.v8f32.v8i64(<8 x i64> %x0, i32 0)
442 %res2 = fadd <8 x float> %2, %3
435443 ret <8 x float> %res2
436444 }
437445
29892989 %res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %x0, i32 4, i8 %res)
29902990 ret i8 %res1
29912991 }
2992
2993 declare <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.256(<4 x i64>, <4 x float>, i8)
2994
2995 define <4 x float> @test_int_x86_avx512_mask_cvt_qq2ps_256(<4 x i64> %x0, <4 x float> %x1, i8 %x2) {
2996 ; X86-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_256:
2997 ; X86: # %bb.0:
2998 ; X86-NEXT: vcvtqq2ps %ymm0, %xmm2 # encoding: [0x62,0xf1,0xfc,0x28,0x5b,0xd0]
2999 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
3000 ; X86-NEXT: vcvtqq2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x29,0x5b,0xc8]
3001 ; X86-NEXT: vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2]
3002 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
3003 ; X86-NEXT: retl # encoding: [0xc3]
3004 ;
3005 ; X64-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_256:
3006 ; X64: # %bb.0:
3007 ; X64-NEXT: vcvtqq2ps %ymm0, %xmm2 # encoding: [0x62,0xf1,0xfc,0x28,0x5b,0xd0]
3008 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3009 ; X64-NEXT: vcvtqq2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x29,0x5b,0xc8]
3010 ; X64-NEXT: vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2]
3011 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
3012 ; X64-NEXT: retq # encoding: [0xc3]
3013 %res = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.256(<4 x i64> %x0, <4 x float> %x1, i8 %x2)
3014 %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.256(<4 x i64> %x0, <4 x float> %x1, i8 -1)
3015 %res2 = fadd <4 x float> %res, %res1
3016 ret <4 x float> %res2
3017 }
3018
3019 declare <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.256(<4 x i64>, <4 x float>, i8)
3020
3021 define <4 x float> @test_int_x86_avx512_mask_cvt_uqq2ps_256(<4 x i64> %x0, <4 x float> %x1, i8 %x2) {
3022 ; X86-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_256:
3023 ; X86: # %bb.0:
3024 ; X86-NEXT: vcvtuqq2ps %ymm0, %xmm2 # encoding: [0x62,0xf1,0xff,0x28,0x7a,0xd0]
3025 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
3026 ; X86-NEXT: vcvtuqq2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x7a,0xc8]
3027 ; X86-NEXT: vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2]
3028 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
3029 ; X86-NEXT: retl # encoding: [0xc3]
3030 ;
3031 ; X64-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_256:
3032 ; X64: # %bb.0:
3033 ; X64-NEXT: vcvtuqq2ps %ymm0, %xmm2 # encoding: [0x62,0xf1,0xff,0x28,0x7a,0xd0]
3034 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3035 ; X64-NEXT: vcvtuqq2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x7a,0xc8]
3036 ; X64-NEXT: vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2]
3037 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
3038 ; X64-NEXT: retq # encoding: [0xc3]
3039 %res = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.256(<4 x i64> %x0, <4 x float> %x1, i8 %x2)
3040 %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.256(<4 x i64> %x0, <4 x float> %x1, i8 -1)
3041 %res2 = fadd <4 x float> %res, %res1
3042 ret <4 x float> %res2
3043 }
241241 ret <4 x float> %res4
242242 }
243243
244 declare <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.256(<4 x i64>, <4 x float>, i8)
245
246244 define <4 x float>@test_int_x86_avx512_mask_cvt_qq2ps_256(<4 x i64> %x0, <4 x float> %x1, i8 %x2) {
247245 ; X86-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_256:
248246 ; X86: # %bb.0:
247 ; X86-NEXT: vcvtqq2ps %ymm0, %xmm2 # encoding: [0x62,0xf1,0xfc,0x28,0x5b,0xd0]
249248 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
250249 ; X86-NEXT: vcvtqq2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x29,0x5b,0xc8]
251 ; X86-NEXT: vcvtqq2ps %ymm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x28,0x5b,0xc0]
252 ; X86-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
250 ; X86-NEXT: vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2]
253251 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
254252 ; X86-NEXT: retl # encoding: [0xc3]
255253 ;
256254 ; X64-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_256:
257255 ; X64: # %bb.0:
256 ; X64-NEXT: vcvtqq2ps %ymm0, %xmm2 # encoding: [0x62,0xf1,0xfc,0x28,0x5b,0xd0]
258257 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
259258 ; X64-NEXT: vcvtqq2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x29,0x5b,0xc8]
260 ; X64-NEXT: vcvtqq2ps %ymm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x28,0x5b,0xc0]
261 ; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
259 ; X64-NEXT: vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2]
262260 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
263261 ; X64-NEXT: retq # encoding: [0xc3]
264 %res = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.256(<4 x i64> %x0, <4 x float> %x1, i8 %x2)
265 %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.256(<4 x i64> %x0, <4 x float> %x1, i8 -1)
266 %res2 = fadd <4 x float> %res, %res1
262 %cvt1 = sitofp <4 x i64> %x0 to <4 x float>
263 %1 = bitcast i8 %x2 to <8 x i1>
264 %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32>
265 %2 = select <4 x i1> %extract, <4 x float> %cvt1, <4 x float> %x1
266 %cvt = sitofp <4 x i64> %x0 to <4 x float>
267 %res2 = fadd <4 x float> %2, %cvt
267268 ret <4 x float> %res2
268269 }
269270
464465 define <4 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_256(<4 x i64> %x0, <4 x float> %x1, i8 %x2) {
465466 ; X86-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_256:
466467 ; X86: # %bb.0:
468 ; X86-NEXT: vcvtuqq2ps %ymm0, %xmm2 # encoding: [0x62,0xf1,0xff,0x28,0x7a,0xd0]
467469 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
468470 ; X86-NEXT: vcvtuqq2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x7a,0xc8]
469 ; X86-NEXT: vcvtuqq2ps %ymm0, %xmm0 # encoding: [0x62,0xf1,0xff,0x28,0x7a,0xc0]
470 ; X86-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
471 ; X86-NEXT: vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2]
471472 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
472473 ; X86-NEXT: retl # encoding: [0xc3]
473474 ;
474475 ; X64-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_256:
475476 ; X64: # %bb.0:
477 ; X64-NEXT: vcvtuqq2ps %ymm0, %xmm2 # encoding: [0x62,0xf1,0xff,0x28,0x7a,0xd0]
476478 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
477479 ; X64-NEXT: vcvtuqq2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x7a,0xc8]
478 ; X64-NEXT: vcvtuqq2ps %ymm0, %xmm0 # encoding: [0x62,0xf1,0xff,0x28,0x7a,0xc0]
479 ; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
480 ; X64-NEXT: vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2]
480481 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
481482 ; X64-NEXT: retq # encoding: [0xc3]
482 %res = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.256(<4 x i64> %x0, <4 x float> %x1, i8 %x2)
483 %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.256(<4 x i64> %x0, <4 x float> %x1, i8 -1)
484 %res2 = fadd <4 x float> %res, %res1
483 %cvt1 = uitofp <4 x i64> %x0 to <4 x float>
484 %1 = bitcast i8 %x2 to <8 x i1>
485 %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32>
486 %2 = select <4 x i1> %extract, <4 x float> %cvt1, <4 x float> %x1
487 %cvt = uitofp <4 x i64> %x0 to <4 x float>
488 %res2 = fadd <4 x float> %2, %cvt
485489 ret <4 x float> %res2
486490 }
487491