llvm.org GIT mirror llvm / 0c6a9e1
[X86] Add support for using EVEX instructions for the legacy vcvtph2ps intrinsics. Looks like there's some missed load folding opportunities for i64 loads. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@317544 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 3 years ago
7 changed file(s) with 60 addition(s) and 40 deletion(s). Raw diff Collapse all Expand all
2517525175 case X86ISD::SCALAR_UINT_TO_FP_RND: return "X86ISD::SCALAR_UINT_TO_FP_RND";
2517625176 case X86ISD::CVTPS2PH: return "X86ISD::CVTPS2PH";
2517725177 case X86ISD::CVTPH2PS: return "X86ISD::CVTPH2PS";
25178 case X86ISD::CVTPH2PS_RND: return "X86ISD::CVTPH2PS_RND";
2517825179 case X86ISD::CVTP2SI: return "X86ISD::CVTP2SI";
2517925180 case X86ISD::CVTP2UI: return "X86ISD::CVTP2UI";
2518025181 case X86ISD::CVTP2SI_RND: return "X86ISD::CVTP2SI_RND";
562562 RSQRT28, RSQRT28S, RCP28, RCP28S, EXP2,
563563
564564 // Conversions between float and half-float.
565 CVTPS2PH, CVTPH2PS,
565 CVTPS2PH, CVTPH2PS, CVTPH2PS_RND,
566566
567567 // LWP insert record.
568568 LWPINS,
71767176 //===----------------------------------------------------------------------===//
71777177 multiclass avx512_cvtph2ps
71787178 X86MemOperand x86memop, PatFrag ld_frag> {
7179 defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), (ins _src.RC:$src),
7180 "vcvtph2ps", "$src", "$src",
7181 (X86cvtph2ps (_src.VT _src.RC:$src),
7182 (i32 FROUND_CURRENT))>, T8PD;
7183 defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst), (ins x86memop:$src),
7184 "vcvtph2ps", "$src", "$src",
7185 (X86cvtph2ps (_src.VT (bitconvert (ld_frag addr:$src))),
7186 (i32 FROUND_CURRENT))>, T8PD;
7179 defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
7180 (ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
7181 (X86cvtph2ps (_src.VT _src.RC:$src))>, T8PD;
7182 defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
7183 (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
7184 (X86cvtph2ps (_src.VT
7185 (bitconvert
7186 (ld_frag addr:$src))))>, T8PD;
71877187 }
71887188
71897189 multiclass avx512_cvtph2ps_sae {
7190 defm rb : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), (ins _src.RC:$src),
7191 "vcvtph2ps", "{sae}, $src", "$src, {sae}",
7192 (X86cvtph2ps (_src.VT _src.RC:$src),
7193 (i32 FROUND_NO_EXC))>, T8PD, EVEX_B;
7190 defm rb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
7191 (ins _src.RC:$src), "vcvtph2ps",
7192 "{sae}, $src", "$src, {sae}",
7193 (X86cvtph2psRnd (_src.VT _src.RC:$src),
7194 (i32 FROUND_NO_EXC))>, T8PD, EVEX_B;
71947195
71957196 }
71967197
590590 def X86cvtp2Int : SDNode<"X86ISD::CVTP2SI", SDTFloatToInt>;
591591 def X86cvtp2UInt : SDNode<"X86ISD::CVTP2UI", SDTFloatToInt>;
592592
593
593594 def X86cvtph2ps : SDNode<"X86ISD::CVTPH2PS",
595 SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>,
596 SDTCVecEltisVT<1, i16>]> >;
597
598 def X86cvtph2psRnd : SDNode<"X86ISD::CVTPH2PS_RND",
594599 SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f32>,
595600 SDTCVecEltisVT<1, i16>,
596601 SDTCisVT<2, i32>]> >;
76847684 //===----------------------------------------------------------------------===//
76857685 // Half precision conversion instructions
76867686 //===----------------------------------------------------------------------===//
7687 multiclass f16c_ph2ps, Intrinsic Int> {
7687 multiclass f16c_ph2ps> {
76887688 def rr : I<0x13, MRMSrcReg, (outs RC:$dst), (ins VR128:$src),
76897689 "vcvtph2ps\t{$src, $dst|$dst, $src}",
7690 [(set RC:$dst, (Int VR128:$src))]>,
7690 [(set RC:$dst, (X86cvtph2ps VR128:$src))]>,
76917691 T8PD, VEX, Sched<[WriteCvtF2F]>;
76927692 let hasSideEffects = 0, mayLoad = 1 in
76937693 def rm : I<0x13, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
77097709 TAPD, VEX;
77107710 }
77117711
7712 let Predicates = [HasF16C, NoVLX] in {
7713 defm VCVTPH2PS : f16c_ph2ps;
7714 defm VCVTPH2PSY : f16c_ph2ps, VEX_L;
7715
7716 // Pattern match vcvtph2ps of a scalar i64 load.
7717 def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzmovl_v2i64 addr:$src)))),
7718 (VCVTPH2PSrm addr:$src)>;
7719 def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzload_v2i64 addr:$src)))),
7720 (VCVTPH2PSrm addr:$src)>;
7721 def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert
7722 (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
7723 (VCVTPH2PSrm addr:$src)>;
7724 }
7725
77127726 let Predicates = [HasF16C] in {
7713 defm VCVTPH2PS : f16c_ph2ps;
7714 defm VCVTPH2PSY : f16c_ph2ps, VEX_L;
77157727 defm VCVTPS2PH : f16c_ps2ph;
77167728 defm VCVTPS2PHY : f16c_ps2ph, VEX_L;
7717
7718 // Pattern match vcvtph2ps of a scalar i64 load.
7719 def : Pat<(int_x86_vcvtph2ps_128 (vzmovl_v2i64 addr:$src)),
7720 (VCVTPH2PSrm addr:$src)>;
7721 def : Pat<(int_x86_vcvtph2ps_128 (vzload_v2i64 addr:$src)),
7722 (VCVTPH2PSrm addr:$src)>;
7723 def : Pat<(int_x86_vcvtph2ps_128 (bitconvert
7724 (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
7725 (VCVTPH2PSrm addr:$src)>;
77267729
77277730 def : Pat<(store (f64 (extractelt (bc_v2f64 (v8i16
77287731 (int_x86_vcvtps2ph_128 VR128:$src1, i32:$src2))), (iPTR 0))),
10701070 X86ISD::FSUBS_RND, 0),
10711071 X86_INTRINSIC_DATA(avx512_mask_sub_ss_round, INTR_TYPE_SCALAR_MASK_RM,
10721072 X86ISD::FSUBS_RND, 0),
1073 X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_128, INTR_TYPE_1OP_MASK_RM,
1073 X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_128, INTR_TYPE_1OP_MASK,
10741074 X86ISD::CVTPH2PS, 0),
1075 X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_256, INTR_TYPE_1OP_MASK_RM,
1075 X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_256, INTR_TYPE_1OP_MASK,
10761076 X86ISD::CVTPH2PS, 0),
1077 X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_512, INTR_TYPE_1OP_MASK_RM,
1078 X86ISD::CVTPH2PS, 0),
1077 X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_512, INTR_TYPE_1OP_MASK,
1078 X86ISD::CVTPH2PS, X86ISD::CVTPH2PS_RND),
10791079 X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_128, INTR_TYPE_2OP_MASK,
10801080 X86ISD::CVTPS2PH, 0),
10811081 X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_256, INTR_TYPE_2OP_MASK,
15851585 X86_INTRINSIC_DATA(ssse3_pmadd_ub_sw_128, INTR_TYPE_2OP, X86ISD::VPMADDUBSW, 0),
15861586 X86_INTRINSIC_DATA(ssse3_pmul_hr_sw_128, INTR_TYPE_2OP, X86ISD::MULHRS, 0),
15871587 X86_INTRINSIC_DATA(ssse3_pshuf_b_128, INTR_TYPE_2OP, X86ISD::PSHUFB, 0),
1588 X86_INTRINSIC_DATA(vcvtph2ps_128, INTR_TYPE_1OP, X86ISD::CVTPH2PS, 0),
1589 X86_INTRINSIC_DATA(vcvtph2ps_256, INTR_TYPE_1OP, X86ISD::CVTPH2PS, 0),
15881590 X86_INTRINSIC_DATA(xop_vpcomb, INTR_TYPE_3OP, X86ISD::VPCOM, 0),
15891591 X86_INTRINSIC_DATA(xop_vpcomd, INTR_TYPE_3OP, X86ISD::VPCOM, 0),
15901592 X86_INTRINSIC_DATA(xop_vpcomq, INTR_TYPE_3OP, X86ISD::VPCOM, 0),
1616 ;
1717 ; X32-AVX512VL-LABEL: test_x86_vcvtph2ps_128:
1818 ; X32-AVX512VL: # BB#0:
19 ; X32-AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x13,0xc0]
19 ; X32-AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0xc0]
2020 ; X32-AVX512VL-NEXT: retl # encoding: [0xc3]
2121 ;
2222 ; X64-AVX512VL-LABEL: test_x86_vcvtph2ps_128:
2323 ; X64-AVX512VL: # BB#0:
24 ; X64-AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x13,0xc0]
24 ; X64-AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0xc0]
2525 ; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
2626 %res = call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %a0) ; <<4 x float>> [#uses=1]
2727 ret <4 x float> %res
4242 ;
4343 ; X32-AVX512VL-LABEL: test_x86_vcvtph2ps_256:
4444 ; X32-AVX512VL: # BB#0:
45 ; X32-AVX512VL-NEXT: vcvtph2ps %xmm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x13,0xc0]
45 ; X32-AVX512VL-NEXT: vcvtph2ps %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x13,0xc0]
4646 ; X32-AVX512VL-NEXT: retl # encoding: [0xc3]
4747 ;
4848 ; X64-AVX512VL-LABEL: test_x86_vcvtph2ps_256:
4949 ; X64-AVX512VL: # BB#0:
50 ; X64-AVX512VL-NEXT: vcvtph2ps %xmm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x13,0xc0]
50 ; X64-AVX512VL-NEXT: vcvtph2ps %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x13,0xc0]
5151 ; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
5252 %res = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %a0) ; <<8 x float>> [#uses=1]
5353 ret <8 x float> %res
6969 ; X32-AVX512VL-LABEL: test_x86_vcvtph2ps_256_m:
7070 ; X32-AVX512VL: # BB#0:
7171 ; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
72 ; X32-AVX512VL-NEXT: vcvtph2ps (%eax), %ymm0 # encoding: [0xc4,0xe2,0x7d,0x13,0x00]
72 ; X32-AVX512VL-NEXT: vcvtph2ps (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x13,0x00]
7373 ; X32-AVX512VL-NEXT: retl # encoding: [0xc3]
7474 ;
7575 ; X64-AVX512VL-LABEL: test_x86_vcvtph2ps_256_m:
7676 ; X64-AVX512VL: # BB#0:
77 ; X64-AVX512VL-NEXT: vcvtph2ps (%rdi), %ymm0 # encoding: [0xc4,0xe2,0x7d,0x13,0x07]
77 ; X64-AVX512VL-NEXT: vcvtph2ps (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x13,0x07]
7878 ; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
7979 %load = load <8 x i16>, <8 x i16>* %a, align 16
8080 %res = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %load)
150150 ; X32-AVX512VL-LABEL: test_x86_vcvtps2ph_128_scalar:
151151 ; X32-AVX512VL: # BB#0:
152152 ; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
153 ; X32-AVX512VL-NEXT: vcvtph2ps (%eax), %xmm0 # encoding: [0xc4,0xe2,0x79,0x13,0x00]
153 ; X32-AVX512VL-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00]
154 ; X32-AVX512VL-NEXT: # xmm0 = mem[0],zero
155 ; X32-AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0xc0]
154156 ; X32-AVX512VL-NEXT: retl # encoding: [0xc3]
155157 ;
156158 ; X64-AVX512VL-LABEL: test_x86_vcvtps2ph_128_scalar:
157159 ; X64-AVX512VL: # BB#0:
158 ; X64-AVX512VL-NEXT: vcvtph2ps (%rdi), %xmm0 # encoding: [0xc4,0xe2,0x79,0x13,0x07]
160 ; X64-AVX512VL-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
161 ; X64-AVX512VL-NEXT: # xmm0 = mem[0],zero
162 ; X64-AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0xc0]
159163 ; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
160164 %load = load i64, i64* %ptr
161165 %ins1 = insertelement <2 x i64> undef, i64 %load, i32 0
180184 ; X32-AVX512VL-LABEL: test_x86_vcvtps2ph_128_scalar2:
181185 ; X32-AVX512VL: # BB#0:
182186 ; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
183 ; X32-AVX512VL-NEXT: vcvtph2ps (%eax), %xmm0 # encoding: [0xc4,0xe2,0x79,0x13,0x00]
187 ; X32-AVX512VL-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00]
188 ; X32-AVX512VL-NEXT: # xmm0 = mem[0],zero
189 ; X32-AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0xc0]
184190 ; X32-AVX512VL-NEXT: retl # encoding: [0xc3]
185191 ;
186192 ; X64-AVX512VL-LABEL: test_x86_vcvtps2ph_128_scalar2:
187193 ; X64-AVX512VL: # BB#0:
188 ; X64-AVX512VL-NEXT: vcvtph2ps (%rdi), %xmm0 # encoding: [0xc4,0xe2,0x79,0x13,0x07]
194 ; X64-AVX512VL-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
195 ; X64-AVX512VL-NEXT: # xmm0 = mem[0],zero
196 ; X64-AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0xc0]
189197 ; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
190198 %load = load i64, i64* %ptr
191199 %ins = insertelement <2 x i64> undef, i64 %load, i32 0