llvm.org GIT mirror llvm / 1f044d4
AVX-512: Embedded Rounding Control - encoding and printing Changed intrinsics for vrcp14/vrcp28 vrsqrt14/vrsqrt28 - aligned with GCC. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@199102 91177308-0d34-0410-b5e6-96231b3b80d8 Elena Demikhovsky 6 years ago
10 changed file(s) with 361 addition(s) and 257 deletion(s). Raw diff Collapse all Expand all
12471247 def int_x86_avx_ptestnzc_256 : GCCBuiltin<"__builtin_ia32_ptestnzc256">,
12481248 Intrinsic<[llvm_i32_ty], [llvm_v4i64_ty,
12491249 llvm_v4i64_ty], [IntrNoMem]>;
1250 def int_x86_avx512_mask_ptestm_d_512 : GCCBuiltin<"__builtin_ia32_ptestmd512">,
1251 Intrinsic<[llvm_i16_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
1252 llvm_i16_ty], [IntrNoMem]>;
1253 def int_x86_avx512_mask_ptestm_q_512 : GCCBuiltin<"__builtin_ia32_ptestmq512">,
1254 Intrinsic<[llvm_i8_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
1255 llvm_i8_ty], [IntrNoMem]>;
12501256 }
12511257
12521258 // Vector extract sign mask
16951701 def int_x86_avx2_pbroadcastq_256 :
16961702 GCCBuiltin<"__builtin_ia32_pbroadcastq256">,
16971703 Intrinsic<[llvm_v4i64_ty], [llvm_v2i64_ty], [IntrNoMem]>;
1704 def int_x86_avx512_mask_pbroadcast_d_gpr_512 :
1705 GCCBuiltin<"__builtin_ia32_pbroadcastd512_gpr_mask">,
1706 Intrinsic<[llvm_v16i32_ty], [llvm_i32_ty, llvm_v16i32_ty,
1707 llvm_i16_ty], [IntrNoMem]>;
1708 def int_x86_avx512_mask_pbroadcast_q_gpr_512 :
1709 GCCBuiltin<"__builtin_ia32_pbroadcastq512_gpr_mask">,
1710 Intrinsic<[llvm_v8i64_ty], [llvm_i64_ty, llvm_v8i64_ty,
1711 llvm_i8_ty], [IntrNoMem]>;
1712 def int_x86_avx512_mask_pbroadcast_q_mem_512 :
1713 GCCBuiltin<"__builtin_ia32_pbroadcastq512_mem_mask">,
1714 Intrinsic<[llvm_v8i64_ty], [llvm_i64_ty, llvm_v8i64_ty,
1715 llvm_i8_ty], [IntrNoMem]>;
16981716 }
16991717
17001718 // Vector permutation
28752893 def int_x86_avx512_sqrt_ps_512 : GCCBuiltin<"__builtin_ia32_sqrtps512">,
28762894 Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty], [IntrNoMem]>;
28772895
2878 def int_x86_avx512_rcp14_ps_512 : GCCBuiltin<"__builtin_ia32_rcp14ps512">,
2879 Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty],
2896 def int_x86_avx512_rsqrt14_ss : GCCBuiltin<"__builtin_ia32_rsqrt14ss_mask">,
2897 Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
2898 llvm_i8_ty], [IntrNoMem]>;
2899 def int_x86_avx512_rsqrt14_sd : GCCBuiltin<"__builtin_ia32_rsqrt14sd_mask">,
2900 Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
2901 llvm_i8_ty], [IntrNoMem]>;
2902
2903 def int_x86_avx512_rsqrt14_pd_512 : GCCBuiltin<"__builtin_ia32_rsqrt14pd512_mask">,
2904 Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
2905 llvm_i8_ty], [IntrNoMem]>;
2906 def int_x86_avx512_rsqrt14_ps_512 : GCCBuiltin<"__builtin_ia32_rsqrt14ps512_mask">,
2907 Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
2908 llvm_i16_ty], [IntrNoMem]>;
2909 def int_x86_avx512_rcp14_ss : GCCBuiltin<"__builtin_ia32_rcp14ss_mask">,
2910 Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
2911 llvm_i8_ty], [IntrNoMem]>;
2912 def int_x86_avx512_rcp14_sd : GCCBuiltin<"__builtin_ia32_rcp14sd_mask">,
2913 Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
2914 llvm_i8_ty], [IntrNoMem]>;
2915
2916 def int_x86_avx512_rcp14_pd_512 : GCCBuiltin<"__builtin_ia32_rcp14pd512_mask">,
2917 Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
2918 llvm_i8_ty], [IntrNoMem]>;
2919 def int_x86_avx512_rcp14_ps_512 : GCCBuiltin<"__builtin_ia32_rcp14ps512_mask">,
2920 Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
2921 llvm_i16_ty], [IntrNoMem]>;
2922
2923 def int_x86_avx512_rcp28_ps : GCCBuiltin<"__builtin_ia32_rcp28ps_mask">,
2924 Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
2925 llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
2926 def int_x86_avx512_rcp28_pd : GCCBuiltin<"__builtin_ia32_rcp28pd_mask">,
2927 Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
2928 llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
2929 def int_x86_avx512_rcp28_ss : GCCBuiltin<"__builtin_ia32_rcp28ss_mask">,
2930 Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
2931 llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty],
28802932 [IntrNoMem]>;
2881 def int_x86_avx512_rcp14_pd_512 : GCCBuiltin<"__builtin_ia32_rcp14pd512">,
2882 Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty],
2933 def int_x86_avx512_rcp28_sd : GCCBuiltin<"__builtin_ia32_rcp28sd_mask">,
2934 Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
2935 llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty],
28832936 [IntrNoMem]>;
2884 def int_x86_avx512_rcp14_ss : GCCBuiltin<"__builtin_ia32_rcp14ss">,
2885 Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
2937 def int_x86_avx512_rsqrt28_ps : GCCBuiltin<"__builtin_ia32_rsqrt28ps_mask">,
2938 Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
2939 llvm_i16_ty, llvm_i32_ty],
28862940 [IntrNoMem]>;
2887 def int_x86_avx512_rcp14_sd : GCCBuiltin<"__builtin_ia32_rcp14sd">,
2888 Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty],
2941 def int_x86_avx512_rsqrt28_pd : GCCBuiltin<"__builtin_ia32_rsqrt28pd_mask">,
2942 Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
2943 llvm_i8_ty, llvm_i32_ty],
28892944 [IntrNoMem]>;
2890 def int_x86_avx512_rsqrt14_ps_512 : GCCBuiltin<"__builtin_ia32_rsqrt14ps512">,
2891 Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty],
2945 def int_x86_avx512_rsqrt28_ss : GCCBuiltin<"__builtin_ia32_rsqrt28ss_mask">,
2946 Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
2947 llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty],
28922948 [IntrNoMem]>;
2893 def int_x86_avx512_rsqrt14_pd_512 : GCCBuiltin<"__builtin_ia32_rsqrt14pd512">,
2894 Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty],
2895 [IntrNoMem]>;
2896 def int_x86_avx512_rsqrt14_ss : GCCBuiltin<"__builtin_ia32_rsqrt14ss">,
2897 Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
2898 [IntrNoMem]>;
2899 def int_x86_avx512_rsqrt14_sd : GCCBuiltin<"__builtin_ia32_rsqrt14sd">,
2900 Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty],
2901 [IntrNoMem]>;
2902
2903 def int_x86_avx512_rcp28_ps_512 : GCCBuiltin<"__builtin_ia32_rcp28ps512">,
2904 Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty],
2905 [IntrNoMem]>;
2906 def int_x86_avx512_rcp28_pd_512 : GCCBuiltin<"__builtin_ia32_rcp28pd512">,
2907 Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty],
2908 [IntrNoMem]>;
2909 def int_x86_avx512_rcp28_ss : GCCBuiltin<"__builtin_ia32_rcp28ss">,
2910 Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
2911 [IntrNoMem]>;
2912 def int_x86_avx512_rcp28_sd : GCCBuiltin<"__builtin_ia32_rcp28sd">,
2913 Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty],
2914 [IntrNoMem]>;
2915 def int_x86_avx512_rsqrt28_ps_512 : GCCBuiltin<"__builtin_ia32_rsqrt28ps512">,
2916 Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty],
2917 [IntrNoMem]>;
2918 def int_x86_avx512_rsqrt28_pd_512 : GCCBuiltin<"__builtin_ia32_rsqrt28pd512">,
2919 Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty],
2920 [IntrNoMem]>;
2921 def int_x86_avx512_rsqrt28_ss : GCCBuiltin<"__builtin_ia32_rsqrt28ss">,
2922 Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
2923 [IntrNoMem]>;
2924 def int_x86_avx512_rsqrt28_sd : GCCBuiltin<"__builtin_ia32_rsqrt28sd">,
2925 Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty],
2949 def int_x86_avx512_rsqrt28_sd : GCCBuiltin<"__builtin_ia32_rsqrt28sd_mask">,
2950 Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
2951 llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty],
29262952 [IntrNoMem]>;
29272953 }
29282954
124124
125125 void X86ATTInstPrinter::printRoundingControl(const MCInst *MI, unsigned Op,
126126 raw_ostream &O) {
127 int64_t Imm = MI->getOperand(Op).getImm() & 0x1f;
127 int64_t Imm = MI->getOperand(Op).getImm() & 0x3;
128128 switch (Imm) {
129129 case 0: O << "{rn-sae}"; break;
130130 case 1: O << "{rd-sae}"; break;
131131 case 2: O << "{ru-sae}"; break;
132132 case 3: O << "{rz-sae}"; break;
133
134 default: llvm_unreachable("Invalid AVX-512 rounding control argument!");
135133 }
136134 }
137135 /// printPCRelImm - This is used to print an immediate value that ends up
114114
115115 void X86IntelInstPrinter::printRoundingControl(const MCInst *MI, unsigned Op,
116116 raw_ostream &O) {
117 int64_t Imm = MI->getOperand(Op).getImm() & 0x1f;
117 int64_t Imm = MI->getOperand(Op).getImm() & 0x3;
118118 switch (Imm) {
119119 case 0: O << "{rn-sae}"; break;
120120 case 1: O << "{rd-sae}"; break;
121121 case 2: O << "{ru-sae}"; break;
122122 case 3: O << "{rz-sae}"; break;
123
124 default: llvm_unreachable("Invalid AVX-512 rounding control argument!");
125123 }
126124 }
127125
502502 MemOp4 = 1U << 18,
503503
504504 /// XOP - Opcode prefix used by XOP instructions.
505 XOP = 1U << 19
506
505 XOP = 1U << 19,
506
507 /// Explicitly specified rounding control
508 EVEX_RC = 1U << 20
507509 };
508510
509511 // getBaseOpcodeFor - This function returns the "base" X86 opcode for the
603603 bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
604604 bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3;
605605 bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4;
606 bool HasEVEX_RC = false;
606 bool HasEVEX_RC = (TSFlags >> X86II::VEXShift) & X86II::EVEX_RC;
607607
608608 // VEX_R: opcode externsion equivalent to REX.R in
609609 // 1's complement (inverted) form
685685 // EVEX_aaa
686686 unsigned char EVEX_aaa = 0;
687687
688 bool EncodeRC = false;
689
688690 // Encode the operand size opcode prefix as needed.
689691 if (TSFlags & X86II::OpSize)
690692 VEX_PP = 0x01;
748750
749751 // Classify VEX_B, VEX_4V, VEX_R, VEX_X
750752 unsigned NumOps = Desc.getNumOperands();
751 unsigned RcOperand = NumOps-1;
752753 unsigned CurOp = 0;
753754 if (NumOps > 1 && Desc.getOperandConstraint(1, MCOI::TIED_TO) == 0)
754755 ++CurOp;
909910 if (HasVEX_4VOp3)
910911 VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
911912 if (EVEX_b) {
912 assert(RcOperand >= CurOp);
913 EVEX_rc = MI.getOperand(RcOperand).getImm() & 0x3;
914 HasEVEX_RC = true;
915 }
913 if (HasEVEX_RC) {
914 unsigned RcOperand = NumOps-1;
915 assert(RcOperand >= CurOp);
916 EVEX_rc = MI.getOperand(RcOperand).getImm() & 0x3;
917 }
918 EncodeRC = true;
919 }
916920 break;
917921 case X86II::MRMDestReg:
918922 // MRMDestReg instructions forms:
939943 VEX_R = 0x0;
940944 if (HasEVEX && X86II::is32ExtendedReg(MI.getOperand(CurOp).getReg()))
941945 EVEX_R2 = 0x0;
946 if (EVEX_b)
947 EncodeRC = true;
942948 break;
943949 case X86II::MRM0r: case X86II::MRM1r:
944950 case X86II::MRM2r: case X86II::MRM3r:
10121018 (VEX_4V << 3) |
10131019 (EVEX_U << 2) |
10141020 VEX_PP, CurByte, OS);
1015 if (HasEVEX_RC)
1021 if (EncodeRC)
10161022 EmitByte((EVEX_z << 7) |
10171023 (EVEX_rc << 5) |
10181024 (EVEX_b << 4) |
12921298 // It uses the EVEX.aaa field?
12931299 bool HasEVEX = (TSFlags >> X86II::VEXShift) & X86II::EVEX;
12941300 bool HasEVEX_K = HasEVEX && ((TSFlags >> X86II::VEXShift) & X86II::EVEX_K);
1295 bool HasEVEX_B = HasEVEX && ((TSFlags >> X86II::VEXShift) & X86II::EVEX_B);
1301 bool HasEVEX_RC = HasEVEX && ((TSFlags >> X86II::VEXShift) & X86II::EVEX_RC);
12961302
12971303 // Determine where the memory operand starts, if present.
12981304 int MemoryOperand = X86II::getMemoryOperandNo(TSFlags, Opcode);
13901396 if (HasVEX_4VOp3)
13911397 ++CurOp;
13921398 // do not count the rounding control operand
1393 if (HasEVEX_B)
1399 if (HasEVEX_RC)
13941400 NumOps--;
13951401 break;
13961402
431431 def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_i64_512 (i64 GR64:$src))),
432432 (VPBROADCASTQrZrr GR64:$src)>;
433433
434 def : Pat<(v16i32 (int_x86_avx512_mask_pbroadcast_d_gpr_512 (i32 GR32:$src),
435 (v16i32 immAllZerosV), (i16 GR16:$mask))),
436 (VPBROADCASTDrZkrr (COPY_TO_REGCLASS GR16:$mask, VK16WM), GR32:$src)>;
437 def : Pat<(v8i64 (int_x86_avx512_mask_pbroadcast_q_gpr_512 (i64 GR64:$src),
438 (bc_v8i64 (v16i32 immAllZerosV)), (i8 GR8:$mask))),
439 (VPBROADCASTQrZkrr (COPY_TO_REGCLASS GR8:$mask, VK8WM), GR64:$src)>;
440
434441 multiclass avx512_int_broadcast_rm opc, string OpcodeStr,
435442 X86MemOperand x86memop, PatFrag ld_frag,
436443 RegisterClass DstRC, ValueType OpVT, ValueType SrcVT,
638645
639646 def : Pat<(v16f32 (int_x86_avx512_mask_blend_ps_512 (v16f32 VR512:$src1),
640647 (v16f32 VR512:$src2), (i16 GR16:$mask))),
641 (VBLENDMPSZrr (COPY_TO_REGCLASS GR16:$mask, VK16),
648 (VBLENDMPSZrr (COPY_TO_REGCLASS GR16:$mask, VK16WM),
642649 VR512:$src1, VR512:$src2)>;
643650
644651 def : Pat<(v8f64 (int_x86_avx512_mask_blend_pd_512 (v8f64 VR512:$src1),
645652 (v8f64 VR512:$src2), (i8 GR8:$mask))),
646 (VBLENDMPDZrr (COPY_TO_REGCLASS GR8:$mask, VK8),
653 (VBLENDMPDZrr (COPY_TO_REGCLASS GR8:$mask, VK8WM),
647654 VR512:$src1, VR512:$src2)>;
648655
649656 defm VPBLENDMDZ : avx512_blendmask<0x64, "vpblendmd",
808815 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
809816 [(set KRC:$dst, (X86cmpm (vt RC:$src1), (vt RC:$src2), imm:$cc))], d>;
810817 def rrib: AVX512PIi8<0xC2, MRMSrcReg,
811 (outs KRC:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc, i32imm:$sae),
818 (outs KRC:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc),
812819 !strconcat("vcmp${cc}", suffix,
813820 "\t{{sae}, $src2, $src1, $dst|$dst, $src1, $src2, {sae}}"),
814821 [], d>, EVEX_B;
858865 (v16f32 VR512:$src2), imm:$cc, (i16 -1),
859866 FROUND_NO_EXC)),
860867 (COPY_TO_REGCLASS (VCMPPSZrrib VR512:$src1, VR512:$src2,
861 (I8Imm imm:$cc), (i32 0)), GR16)>;
868 (I8Imm imm:$cc)), GR16)>;
862869
863870 def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1),
864871 (v8f64 VR512:$src2), imm:$cc, (i8 -1),
865872 FROUND_NO_EXC)),
866873 (COPY_TO_REGCLASS (VCMPPDZrrib VR512:$src1, VR512:$src2,
867 (I8Imm imm:$cc), (i32 0)), GR8)>;
874 (I8Imm imm:$cc)), GR8)>;
868875
869876 def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1),
870877 (v16f32 VR512:$src2), imm:$cc, (i16 -1),
948955 def : Pat<(i8 (zext VK1:$src)),
949956 (EXTRACT_SUBREG
950957 (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_8bit)>;
958 def : Pat<(i64 (zext VK1:$src)),
959 (SUBREG_TO_REG (i64 0),
960 (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_32bit)>;
961
951962 }
952963 // With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
953964 let Predicates = [HasAVX512] in {
11691180 let Predicates = [HasAVX512] in {
11701181 def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
11711182 def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>;
1183 def : Pat<(i1 0), (COPY_TO_REGCLASS (KSET0W), VK1)>;
1184 def : Pat<(i1 1), (COPY_TO_REGCLASS (KSET1W), VK1)>;
1185 def : Pat<(i1 -1), (COPY_TO_REGCLASS (KSET1W), VK1)>;
11721186 }
11731187 def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 0))),
11741188 (v8i1 (COPY_TO_REGCLASS VK16:$src, VK8))>;
21112125 memopv8i64, X86testm, v8i64>, EVEX_V512, VEX_W,
21122126 EVEX_CD8<64, CD8VF>;
21132127
2128 def : Pat <(i16 (int_x86_avx512_mask_ptestm_d_512 (v16i32 VR512:$src1),
2129 (v16i32 VR512:$src2), (i16 -1))),
2130 (COPY_TO_REGCLASS (VPTESTMDZrr VR512:$src1, VR512:$src2), GR16)>;
2131
2132 def : Pat <(i8 (int_x86_avx512_mask_ptestm_q_512 (v8i64 VR512:$src1),
2133 (v8i64 VR512:$src2), (i8 -1))),
2134 (COPY_TO_REGCLASS (VPTESTMDZrr VR512:$src1, VR512:$src2), GR8)>;
21142135 //===----------------------------------------------------------------------===//
21152136 // AVX-512 Shift instructions
21162137 //===----------------------------------------------------------------------===//
27902811 (OpVT (OpNode (InVT SrcRC:$src))))], d>, EVEX;
27912812 def rrb : AVX512PI
27922813 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
2793 [], d>, EVEX, EVEX_B;
2814 [], d>, EVEX, EVEX_B, EVEX_RC;
27942815 let mayLoad = 1 in
27952816 def rm : AVX512PI
27962817 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
29152936 [], d>, EVEX;
29162937 def rrb : AVX512PI
29172938 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
2918 [], d>, EVEX, EVEX_B;
2939 [], d>, EVEX, EVEX_B, EVEX_RC;
29192940 let mayLoad = 1 in
29202941 def rm : AVX512PI
29212942 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
30243045 }
30253046 }
30263047
3027 /// avx512_unop_p - AVX-512 unops in packed form.
3028 multiclass avx512_fp_unop_p opc, string OpcodeStr, SDNode OpNode> {
3029 def PSZr : AVX5128I
3048 /// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
3049 multiclass avx512_fp14_s opc, string OpcodeStr, RegisterClass RC,
3050 X86MemOperand x86memop> {
3051 let hasSideEffects = 0 in {
3052 def rr : AVX5128I
3053 (ins RC:$src1, RC:$src2),
3054 !strconcat(OpcodeStr,
3055 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
3056 let mayLoad = 1 in {
3057 def rm : AVX5128I
3058 (ins RC:$src1, x86memop:$src2),
3059 !strconcat(OpcodeStr,
3060 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
3061 }
3062 }
3063 }
3064
3065 defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", FR32X, f32mem>,
3066 EVEX_CD8<32, CD8VT1>;
3067 defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", FR64X, f64mem>,
3068 VEX_W, EVEX_CD8<64, CD8VT1>;
3069 defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", FR32X, f32mem>,
3070 EVEX_CD8<32, CD8VT1>;
3071 defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", FR64X, f64mem>,
3072 VEX_W, EVEX_CD8<64, CD8VT1>;
3073
3074 def : Pat <(v4f32 (int_x86_avx512_rcp14_ss (v4f32 VR128X:$src1),
3075 (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1))),
3076 (COPY_TO_REGCLASS (VRCP14SSrr (COPY_TO_REGCLASS VR128X:$src1, FR32X),
3077 (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
3078
3079 def : Pat <(v2f64 (int_x86_avx512_rcp14_sd (v2f64 VR128X:$src1),
3080 (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1))),
3081 (COPY_TO_REGCLASS (VRCP14SDrr (COPY_TO_REGCLASS VR128X:$src1, FR64X),
3082 (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
3083
3084 def : Pat <(v4f32 (int_x86_avx512_rsqrt14_ss (v4f32 VR128X:$src1),
3085 (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1))),
3086 (COPY_TO_REGCLASS (VRSQRT14SSrr (COPY_TO_REGCLASS VR128X:$src1, FR32X),
3087 (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
3088
3089 def : Pat <(v2f64 (int_x86_avx512_rsqrt14_sd (v2f64 VR128X:$src1),
3090 (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1))),
3091 (COPY_TO_REGCLASS (VRSQRT14SDrr (COPY_TO_REGCLASS VR128X:$src1, FR64X),
3092 (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
3093
3094 /// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
3095 multiclass avx512_fp14_p opc, string OpcodeStr, SDNode OpNode,
3096 RegisterClass RC, X86MemOperand x86memop,
3097 PatFrag mem_frag, ValueType OpVt> {
3098 def r : AVX5128I
30303099 !strconcat(OpcodeStr,
3031 "ps\t{$src, $dst|$dst, $src}"),
3032 [(set VR512:$dst, (v16f32 (OpNode VR512:$src)))]>,
3033 EVEX, EVEX_V512;
3034 def PSZm : AVX5128I),
3100 "\t{$src, $dst|$dst, $src}"),
3101 [(set RC:$dst, (OpVt (OpNode RC:$src)))]>,
3102 EVEX;
3103 def m : AVX5128I
3104 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3105 [(set RC:$dst, (OpVt (OpNode (mem_frag addr:$src))))]>,
3106 EVEX;
3107 }
3108 defm VRSQRT14PSZ : avx512_fp14_p<0x4E, "vrsqrt14ps", X86frsqrt, VR512, f512mem,
3109 memopv16f32, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
3110 defm VRSQRT14PDZ : avx512_fp14_p<0x4E, "vrsqrt14pd", X86frsqrt, VR512, f512mem,
3111 memopv8f64, v8f64>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
3112 defm VRCP14PSZ : avx512_fp14_p<0x4C, "vrcp14ps", X86frcp, VR512, f512mem,
3113 memopv16f32, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
3114 defm VRCP14PDZ : avx512_fp14_p<0x4C, "vrcp14pd", X86frcp, VR512, f512mem,
3115 memopv8f64, v8f64>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
3116
3117 def : Pat <(v16f32 (int_x86_avx512_rsqrt14_ps_512 (v16f32 VR512:$src),
3118 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))),
3119 (VRSQRT14PSZr VR512:$src)>;
3120 def : Pat <(v8f64 (int_x86_avx512_rsqrt14_pd_512 (v8f64 VR512:$src),
3121 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
3122 (VRSQRT14PDZr VR512:$src)>;
3123
3124 def : Pat <(v16f32 (int_x86_avx512_rcp14_ps_512 (v16f32 VR512:$src),
3125 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))),
3126 (VRCP14PSZr VR512:$src)>;
3127 def : Pat <(v8f64 (int_x86_avx512_rcp14_pd_512 (v8f64 VR512:$src),
3128 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
3129 (VRCP14PDZr VR512:$src)>;
3130
3131 /// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
3132 multiclass avx512_fp28_s opc, string OpcodeStr, RegisterClass RC,
3133 X86MemOperand x86memop> {
3134 let hasSideEffects = 0, Predicates = [HasERI] in {
3135 def rr : AVX5128I
3136 (ins RC:$src1, RC:$src2),
3137 !strconcat(OpcodeStr,
3138 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
3139 def rrb : AVX5128I
3140 (ins RC:$src1, RC:$src2),
3141 !strconcat(OpcodeStr,
3142 "\t{{sae}, $src2, $src1, $dst|$dst, $src1, $src2, {sae}}"),
3143 []>, EVEX_4V, EVEX_B;
3144 let mayLoad = 1 in {
3145 def rm : AVX5128I
3146 (ins RC:$src1, x86memop:$src2),
3147 !strconcat(OpcodeStr,
3148 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
3149 }
3150 }
3151 }
3152
3153 defm VRCP28SS : avx512_fp28_s<0xCB, "vrcp28ss", FR32X, f32mem>,
3154 EVEX_CD8<32, CD8VT1>;
3155 defm VRCP28SD : avx512_fp28_s<0xCB, "vrcp28sd", FR64X, f64mem>,
3156 VEX_W, EVEX_CD8<64, CD8VT1>;
3157 defm VRSQRT28SS : avx512_fp28_s<0xCD, "vrsqrt28ss", FR32X, f32mem>,
3158 EVEX_CD8<32, CD8VT1>;
3159 defm VRSQRT28SD : avx512_fp28_s<0xCD, "vrsqrt28sd", FR64X, f64mem>,
3160 VEX_W, EVEX_CD8<64, CD8VT1>;
3161
3162 def : Pat <(v4f32 (int_x86_avx512_rcp28_ss (v4f32 VR128X:$src1),
3163 (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1),
3164 FROUND_NO_EXC)),
3165 (COPY_TO_REGCLASS (VRCP28SSrrb (COPY_TO_REGCLASS VR128X:$src1, FR32X),
3166 (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
3167
3168 def : Pat <(v2f64 (int_x86_avx512_rcp28_sd (v2f64 VR128X:$src1),
3169 (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1),
3170 FROUND_NO_EXC)),
3171 (COPY_TO_REGCLASS (VRCP28SDrrb (COPY_TO_REGCLASS VR128X:$src1, FR64X),
3172 (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
3173
3174 def : Pat <(v4f32 (int_x86_avx512_rsqrt28_ss (v4f32 VR128X:$src1),
3175 (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1),
3176 FROUND_NO_EXC)),
3177 (COPY_TO_REGCLASS (VRSQRT28SSrrb (COPY_TO_REGCLASS VR128X:$src1, FR32X),
3178 (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
3179
3180 def : Pat <(v2f64 (int_x86_avx512_rsqrt28_sd (v2f64 VR128X:$src1),
3181 (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1),
3182 FROUND_NO_EXC)),
3183 (COPY_TO_REGCLASS (VRSQRT28SDrrb (COPY_TO_REGCLASS VR128X:$src1, FR64X),
3184 (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
3185
3186 /// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
3187 multiclass avx512_fp28_p opc, string OpcodeStr,
3188 RegisterClass RC, X86MemOperand x86memop> {
3189 let hasSideEffects = 0, Predicates = [HasERI] in {
3190 def r : AVX5128I
30353191 !strconcat(OpcodeStr,
3036 "ps\t{$src, $dst|$dst, $src}"),
3037 [(set VR512:$dst, (OpNode (memopv16f32 addr:$src)))]>,
3038 EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
3039 def PDZr : AVX5128I),
3192 "\t{$src, $dst|$dst, $src}"),
3193 []>, EVEX;
3194 def rb : AVX5128I
30403195 !strconcat(OpcodeStr,
3041 "pd\t{$src, $dst|$dst, $src}"),
3042 [(set VR512:$dst, (v8f64 (OpNode VR512:$src)))]>,
3043 EVEX, EVEX_V512, VEX_W;
3044 def PDZm : AVX5128I
3045 !strconcat(OpcodeStr,
3046 "pd\t{$src, $dst|$dst, $src}"),
3047 [(set VR512:$dst, (OpNode (memopv16f32 addr:$src)))]>,
3048 EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
3049 }
3050
3051 /// avx512_fp_unop_p_int - AVX-512 intrinsics unops in packed forms.
3052 multiclass avx512_fp_unop_p_int opc, string OpcodeStr,
3053 Intrinsic V16F32Int, Intrinsic V8F64Int> {
3054 let isCodeGenOnly = 1 in {
3055 def PSZr_Int : AVX5128I
3056 !strconcat(OpcodeStr,
3057 "ps\t{$src, $dst|$dst, $src}"),
3058 [(set VR512:$dst, (V16F32Int VR512:$src))]>,
3059 EVEX, EVEX_V512;
3060 def PSZm_Int : AVX5128I
3061 !strconcat(OpcodeStr,
3062 "ps\t{$src, $dst|$dst, $src}"),
3063 [(set VR512:$dst,
3064 (V16F32Int (memopv16f32 addr:$src)))]>, EVEX,
3065 EVEX_V512, EVEX_CD8<32, CD8VF>;
3066 def PDZr_Int : AVX5128I
3067 !strconcat(OpcodeStr,
3068 "pd\t{$src, $dst|$dst, $src}"),
3069 [(set VR512:$dst, (V8F64Int VR512:$src))]>,
3070 EVEX, EVEX_V512, VEX_W;
3071 def PDZm_Int : AVX5128I
3072 !strconcat(OpcodeStr,
3073 "pd\t{$src, $dst|$dst, $src}"),
3074 [(set VR512:$dst,
3075 (V8F64Int (memopv8f64 addr:$src)))]>,
3076 EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
3077 } // isCodeGenOnly = 1
3078 }
3079
3080 /// avx512_fp_unop_s - AVX-512 unops in scalar form.
3081 multiclass avx512_fp_unop_s opc, string OpcodeStr> {
3082 let hasSideEffects = 0 in {
3083 def SSZr : AVX5128I
3084 (ins FR32X:$src1, FR32X:$src2),
3085 !strconcat(OpcodeStr,
3086 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3087 []>, EVEX_4V;
3088 let mayLoad = 1 in {
3089 def SSZm : AVX5128I
3090 (ins FR32X:$src1, f32mem:$src2),
3091 !strconcat(OpcodeStr,
3092 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3093 []>, EVEX_4V, EVEX_CD8<32, CD8VT1>;
3094 let isCodeGenOnly = 1 in
3095 def SSZm_Int : AVX5128I
3096 (ins VR128X:$src1, ssmem:$src2),
3097 !strconcat(OpcodeStr,
3098 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3099 []>, EVEX_4V, EVEX_CD8<32, CD8VT1>;
3196 "\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
3197 []>, EVEX, EVEX_B;
3198 def m : AVX5128I
3199 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3200 []>, EVEX;
31003201 }
3101 def SDZr : AVX5128I
3102 (ins FR64X:$src1, FR64X:$src2),
3103 !strconcat(OpcodeStr,
3104 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
3105 EVEX_4V, VEX_W;
3106 let mayLoad = 1 in {
3107 def SDZm : AVX5128I
3108 (ins FR64X:$src1, f64mem:$src2),
3109 !strconcat(OpcodeStr,
3110 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
3111 EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>;
3112 let isCodeGenOnly = 1 in
3113 def SDZm_Int : AVX5128I
3114 (ins VR128X:$src1, sdmem:$src2),
3115 !strconcat(OpcodeStr,
3116 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3117 []>, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>;
3118 }
3119 }
3120 }
3121
3122 defm VRCP14 : avx512_fp_unop_s<0x4D, "vrcp14">,
3123 avx512_fp_unop_p<0x4C, "vrcp14", X86frcp>,
3124 avx512_fp_unop_p_int<0x4C, "vrcp14",
3125 int_x86_avx512_rcp14_ps_512, int_x86_avx512_rcp14_pd_512>;
3126
3127 defm VRSQRT14 : avx512_fp_unop_s<0x4F, "vrsqrt14">,
3128 avx512_fp_unop_p<0x4E, "vrsqrt14", X86frsqrt>,
3129 avx512_fp_unop_p_int<0x4E, "vrsqrt14",
3130 int_x86_avx512_rsqrt14_ps_512, int_x86_avx512_rsqrt14_pd_512>;
3131
3132 def : Pat<(int_x86_avx512_rsqrt14_ss VR128X:$src),
3133 (COPY_TO_REGCLASS (VRSQRT14SSZr (f32 (IMPLICIT_DEF)),
3134 (COPY_TO_REGCLASS VR128X:$src, FR32)),
3135 VR128X)>;
3136 def : Pat<(int_x86_avx512_rsqrt14_ss sse_load_f32:$src),
3137 (VRSQRT14SSZm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
3138
3139 def : Pat<(int_x86_avx512_rcp14_ss VR128X:$src),
3140 (COPY_TO_REGCLASS (VRCP14SSZr (f32 (IMPLICIT_DEF)),
3141 (COPY_TO_REGCLASS VR128X:$src, FR32)),
3142 VR128X)>;
3143 def : Pat<(int_x86_avx512_rcp14_ss sse_load_f32:$src),
3144 (VRCP14SSZm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
3145
3146 let AddedComplexity = 20, Predicates = [HasERI] in {
3147 defm VRCP28 : avx512_fp_unop_s<0xCB, "vrcp28">,
3148 avx512_fp_unop_p<0xCA, "vrcp28", X86frcp>,
3149 avx512_fp_unop_p_int<0xCA, "vrcp28",
3150 int_x86_avx512_rcp28_ps_512, int_x86_avx512_rcp28_pd_512>;
3151
3152 defm VRSQRT28 : avx512_fp_unop_s<0xCD, "vrsqrt28">,
3153 avx512_fp_unop_p<0xCC, "vrsqrt28", X86frsqrt>,
3154 avx512_fp_unop_p_int<0xCC, "vrsqrt28",
3155 int_x86_avx512_rsqrt28_ps_512, int_x86_avx512_rsqrt28_pd_512>;
3156 }
3157
3158 let Predicates = [HasERI] in {
3159 def : Pat<(int_x86_avx512_rsqrt28_ss VR128X:$src),
3160 (COPY_TO_REGCLASS (VRSQRT28SSZr (f32 (IMPLICIT_DEF)),
3161 (COPY_TO_REGCLASS VR128X:$src, FR32)),
3162 VR128X)>;
3163 def : Pat<(int_x86_avx512_rsqrt28_ss sse_load_f32:$src),
3164 (VRSQRT28SSZm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
3165
3166 def : Pat<(int_x86_avx512_rcp28_ss VR128X:$src),
3167 (COPY_TO_REGCLASS (VRCP28SSZr (f32 (IMPLICIT_DEF)),
3168 (COPY_TO_REGCLASS VR128X:$src, FR32)),
3169 VR128X)>;
3170 def : Pat<(int_x86_avx512_rcp28_ss sse_load_f32:$src),
3171 (VRCP28SSZm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
3172 }
3202 }
3203 defm VRSQRT28PSZ : avx512_fp28_p<0xCC, "vrsqrt28ps", VR512, f512mem>,
3204 EVEX_V512, EVEX_CD8<32, CD8VF>;
3205 defm VRSQRT28PDZ : avx512_fp28_p<0xCC, "vrsqrt28pd", VR512, f512mem>,
3206 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
3207 defm VRCP28PSZ : avx512_fp28_p<0xCA, "vrcp28ps", VR512, f512mem>,
3208 EVEX_V512, EVEX_CD8<32, CD8VF>;
3209 defm VRCP28PDZ : avx512_fp28_p<0xCA, "vrcp28pd", VR512, f512mem>,
3210 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
3211
3212 def : Pat <(v16f32 (int_x86_avx512_rsqrt28_ps (v16f32 VR512:$src),
3213 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_NO_EXC)),
3214 (VRSQRT28PSZrb VR512:$src)>;
3215 def : Pat <(v8f64 (int_x86_avx512_rsqrt28_pd (v8f64 VR512:$src),
3216 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_NO_EXC)),
3217 (VRSQRT28PDZrb VR512:$src)>;
3218
3219 def : Pat <(v16f32 (int_x86_avx512_rcp28_ps (v16f32 VR512:$src),
3220 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_NO_EXC)),
3221 (VRCP28PSZrb VR512:$src)>;
3222 def : Pat <(v8f64 (int_x86_avx512_rcp28_pd (v8f64 VR512:$src),
3223 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_NO_EXC)),
3224 (VRCP28PDZrb VR512:$src)>;
3225
31733226 multiclass avx512_sqrt_packed opc, string OpcodeStr, SDNode OpNode,
31743227 Intrinsic V16F32Int, Intrinsic V8F64Int,
31753228 OpndItins itins_s, OpndItins itins_d> {
33023355 Requires<[OptForSize]>;
33033356
33043357 def : Pat<(f32 (X86frsqrt FR32X:$src)),
3305 (VRSQRT14SSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
3358 (VRSQRT14SSrr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
33063359 def : Pat<(f32 (X86frsqrt (load addr:$src))),
3307 (VRSQRT14SSZm (f32 (IMPLICIT_DEF)), addr:$src)>,
3360 (VRSQRT14SSrm (f32 (IMPLICIT_DEF)), addr:$src)>,
33083361 Requires<[OptForSize]>;
33093362
33103363 def : Pat<(f32 (X86frcp FR32X:$src)),
3311 (VRCP14SSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
3364 (VRCP14SSrr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
33123365 def : Pat<(f32 (X86frcp (load addr:$src))),
3313 (VRCP14SSZm (f32 (IMPLICIT_DEF)), addr:$src)>,
3366 (VRCP14SSrm (f32 (IMPLICIT_DEF)), addr:$src)>,
33143367 Requires<[OptForSize]>;
33153368
33163369 def : Pat<(int_x86_sse_sqrt_ss VR128X:$src),
149149 class EVEX_K { bit hasEVEX_K = 1; }
150150 class EVEX_KZ : EVEX_K { bit hasEVEX_Z = 1; }
151151 class EVEX_B { bit hasEVEX_B = 1; }
152 class EVEX_RC { bit hasEVEX_RC = 1; }
152153 class EVEX_V512 { bit hasEVEX_L2 = 1; bit hasVEX_L = 0; }
153154 class EVEX_CD8 {
154155 bits<2> EVEX_CD8E = !if(!eq(esize, 8), 0b00,
216217 bit has3DNow0F0FOpcode =0;// Wacky 3dNow! encoding?
217218 bit hasMemOp4Prefix = 0; // Same bit as VEX_W, but used for swapping operands
218219 bit hasXOP_Prefix = 0; // Does this inst require an XOP prefix?
220 bit hasEVEX_RC = 0; // Explicitly specified rounding control in FP instruction.
219221
220222 // TSFlags layout should be kept in sync with X86InstrInfo.h.
221223 let TSFlags{5-0} = FormBits;
246248 let TSFlags{49} = has3DNow0F0FOpcode;
247249 let TSFlags{50} = hasMemOp4Prefix;
248250 let TSFlags{51} = hasXOP_Prefix;
251 let TSFlags{52} = hasEVEX_RC;
249252 }
250253
251254 class PseudoI pattern>
5555 %b1 = add i16 %a, %b
5656 ret i16 %b1
5757
58 }
59
60 ; CHECK-LABEL: test5
61 ; CHECK: ret
62 define float @test5(float %p) #0 {
63 entry:
64 %cmp = fcmp oeq float %p, 0.000000e+00
65 br i1 %cmp, label %return, label %if.end
66
67 if.end: ; preds = %entry
68 %cmp1 = fcmp ogt float %p, 0.000000e+00
69 %cond = select i1 %cmp1, float 1.000000e+00, float -1.000000e+00
70 br label %return
71
72 return: ; preds = %if.end, %entry
73 %retval.0 = phi float [ %cond, %if.end ], [ %p, %entry ]
74 ret float %retval.0
5875 }
4646 }
4747
4848 define <16 x float> @test_rcp_ps_512(<16 x float> %a0) {
49 ; CHECK: vrcp14ps
50 %res = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %a0) ; <<16 x float>> [#uses=1]
51 ret <16 x float> %res
52 }
53 declare <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float>) nounwind readnone
49 ; CHECK: vrcp14ps {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x4c,0xc0]
50 %res = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1]
51 ret <16 x float> %res
52 }
53 declare <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
5454
5555 define <8 x double> @test_rcp_pd_512(<8 x double> %a0) {
56 ; CHECK: vrcp14pd
57 %res = call <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double> %a0) ; <<8 x double>> [#uses=1]
58 ret <8 x double> %res
59 }
60 declare <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double>) nounwind readnone
56 ; CHECK: vrcp14pd {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x4c,0xc0]
57 %res = call <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1) ; <<8 x double>> [#uses=1]
58 ret <8 x double> %res
59 }
60 declare <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double>, <8 x double>, i8) nounwind readnone
6161
6262 define <16 x float> @test_rcp28_ps_512(<16 x float> %a0) {
63 ; CHECK: vrcp28ps
64 %res = call <16 x float> @llvm.x86.avx512.rcp28.ps.512(<16 x float> %a0) ; <<16 x float>> [#uses=1]
65 ret <16 x float> %res
66 }
67 declare <16 x float> @llvm.x86.avx512.rcp28.ps.512(<16 x float>) nounwind readnone
63 ; CHECK: vrcp28ps {sae}, {{.*}}encoding: [0x62,0xf2,0x7d,0x18,0xca,0xc0]
64 %res = call <16 x float> @llvm.x86.avx512.rcp28.ps(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8) ; <<16 x float>> [#uses=1]
65 ret <16 x float> %res
66 }
67 declare <16 x float> @llvm.x86.avx512.rcp28.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone
6868
6969 define <8 x double> @test_rcp28_pd_512(<8 x double> %a0) {
70 ; CHECK: vrcp28pd
71 %res = call <8 x double> @llvm.x86.avx512.rcp28.pd.512(<8 x double> %a0) ; <<8 x double>> [#uses=1]
72 ret <8 x double> %res
73 }
74 declare <8 x double> @llvm.x86.avx512.rcp28.pd.512(<8 x double>) nounwind readnone
70 ; CHECK: vrcp28pd {sae}, {{.*}}encoding: [0x62,0xf2,0xfd,0x18,0xca,0xc0]
71 %res = call <8 x double> @llvm.x86.avx512.rcp28.pd(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 8) ; <<8 x double>> [#uses=1]
72 ret <8 x double> %res
73 }
74 declare <8 x double> @llvm.x86.avx512.rcp28.pd(<8 x double>, <8 x double>, i8, i32) nounwind readnone
7575
7676 declare <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double>, i32, <8 x double>, i8, i32)
7777
9090 }
9191
9292 define <16 x float> @test_rsqrt_ps_512(<16 x float> %a0) {
93 ; CHECK: vrsqrt14ps
94 %res = call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %a0) ; <<16 x float>> [#uses=1]
95 ret <16 x float> %res
96 }
97 declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>) nounwind readnone
93 ; CHECK: vrsqrt14ps {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x4e,0xc0]
94 %res = call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1]
95 ret <16 x float> %res
96 }
97 declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
9898
9999 define <16 x float> @test_rsqrt28_ps_512(<16 x float> %a0) {
100 ; CHECK: vrsqrt28ps
101 %res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps.512(<16 x float> %a0) ; <<16 x float>> [#uses=1]
102 ret <16 x float> %res
103 }
104 declare <16 x float> @llvm.x86.avx512.rsqrt28.ps.512(<16 x float>) nounwind readnone
100 ; CHECK: vrsqrt28ps {sae}, {{.*}}encoding: [0x62,0xf2,0x7d,0x18,0xcc,0xc0]
101 %res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8) ; <<16 x float>> [#uses=1]
102 ret <16 x float> %res
103 }
104 declare <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone
105105
106106 define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) {
107 ; CHECK: vrsqrt14ss
108 %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
107 ; CHECK: vrsqrt14ss {{.*}}encoding: [0x62,0xf2,0x7d,0x08,0x4f,0xc0]
108 %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]
109109 ret <4 x float> %res
110110 }
111 declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>) nounwind readnone
111 declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
112112
113113 define <4 x float> @test_rsqrt28_ss(<4 x float> %a0) {
114 ; CHECK: vrsqrt28ss
115 %res = call <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
114 ; CHECK: vrsqrt28ss {sae}, {{.*}}encoding: [0x62,0xf2,0x7d,0x18,0xcd,0xc0]
115 %res = call <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1, i32 8) ; <<4 x float>> [#uses=1]
116116 ret <4 x float> %res
117117 }
118 declare <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float>) nounwind readnone
118 declare <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
119119
120120 define <4 x float> @test_rcp14_ss(<4 x float> %a0) {
121 ; CHECK: vrcp14ss
122 %res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
121 ; CHECK: vrcp14ss {{.*}}encoding: [0x62,0xf2,0x7d,0x08,0x4d,0xc0]
122 %res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]
123123 ret <4 x float> %res
124124 }
125 declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>) nounwind readnone
125 declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
126126
127127 define <4 x float> @test_rcp28_ss(<4 x float> %a0) {
128 ; CHECK: vrcp28ss
129 %res = call <4 x float> @llvm.x86.avx512.rcp28.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
128 ; CHECK: vrcp28ss {sae}, {{.*}}encoding: [0x62,0xf2,0x7d,0x18,0xcb,0xc0]
129 %res = call <4 x float> @llvm.x86.avx512.rcp28.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1, i32 8) ; <<4 x float>> [#uses=1]
130130 ret <4 x float> %res
131131 }
132 declare <4 x float> @llvm.x86.avx512.rcp28.ss(<4 x float>) nounwind readnone
132 declare <4 x float> @llvm.x86.avx512.rcp28.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
133133
134134 define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) {
135135 ; CHECK: vsqrtpd
209209 return inheritsFrom(child, IC_EVEX_W_XD_K) ||
210210 inheritsFrom(child, IC_EVEX_L_W_XD_K);
211211 case IC_EVEX_OPSIZE_K:
212 return inheritsFrom(child, IC_EVEX_W_OPSIZE_K) ||
213 inheritsFrom(child, IC_EVEX_W_OPSIZE_K);
212 case IC_EVEX_OPSIZE_B:
213 return false;
214214 case IC_EVEX_W_K:
215215 case IC_EVEX_W_XS_K:
216216 case IC_EVEX_W_XD_K:
217217 case IC_EVEX_W_OPSIZE_K:
218 case IC_EVEX_W_OPSIZE_B:
218219 return false;
219220 case IC_EVEX_L_K:
220221 case IC_EVEX_L_XS_K: