llvm.org GIT mirror llvm / 3cf3d08
[X86] Fix PR30926 - Add patterns for (v)cvtsi2s{s,d} and (v)cvtsd2s{s,d} The code emiited by Clang's intrinsics for (v)cvtsi2ss, (v)cvtsi2sd, (v)cvtsd2ss and (v)cvtss2sd is lowered to a code sequence that includes redundant (v)movss/(v)movsd instructions. This patch adds patterns for optimizing these sequences. Differential revision: https://reviews.llvm.org/D28455 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@291660 91177308-0d34-0410-b5e6-96231b3b80d8 Elad Cohen 3 years ago
6 changed file(s) with 220 addition(s) and 10 deletion(s). Raw diff Collapse all Expand all
59565956 (VCVTUSI2SDZrm_Int VR128X:$src1, addr:$src2)>;
59575957 } // Predicates = [HasAVX512]
59585958
5959 // Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
5960 // which produce unnecessary vmovs{s,d} instructions
5961 let Predicates = [HasAVX512] in {
5962 def : Pat<(v4f32 (X86Movss
5963 (v4f32 VR128X:$dst),
5964 (v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))),
5965 (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
5966
5967 def : Pat<(v4f32 (X86Movss
5968 (v4f32 VR128X:$dst),
5969 (v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))),
5970 (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
5971
5972 def : Pat<(v2f64 (X86Movsd
5973 (v2f64 VR128X:$dst),
5974 (v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))),
5975 (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
5976
5977 def : Pat<(v2f64 (X86Movsd
5978 (v2f64 VR128X:$dst),
5979 (v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))),
5980 (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
5981 } // Predicates = [HasAVX512]
5982
59595983 // Convert float/double to signed/unsigned int 32/64 with truncation
59605984 multiclass avx512_cvt_s_all opc, string asm, X86VectorVTInfo _SrcRC,
59615985 X86VectorVTInfo _DstRC, SDNode OpNode,
61356159 (COPY_TO_REGCLASS (VCVTSD2SSZrr (COPY_TO_REGCLASS FR64X:$src, VR128X),
61366160 (COPY_TO_REGCLASS FR64X:$src, VR128X)), VR128X)>,
61376161 Requires<[HasAVX512]>;
6162
6163 def : Pat<(v4f32 (X86Movss
6164 (v4f32 VR128X:$dst),
6165 (v4f32 (scalar_to_vector
6166 (f32 (fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
6167 (VCVTSD2SSZrr VR128X:$dst, VR128X:$src)>,
6168 Requires<[HasAVX512]>;
6169
6170 def : Pat<(v2f64 (X86Movsd
6171 (v2f64 VR128X:$dst),
6172 (v2f64 (scalar_to_vector
6173 (f64 (fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
6174 (VCVTSS2SDZrr VR128X:$dst, VR128X:$src)>,
6175 Requires<[HasAVX512]>;
6176
61386177 //===----------------------------------------------------------------------===//
61396178 // AVX-512 Vector convert from signed/unsigned integer to float/double
61406179 // and from float/double to signed/unsigned integer
3131 InstrItinClass rm = arg_rm;
3232 InstrItinClass ri = arg_ri;
3333 }
34
3534
3635 // scalar
3736 let Sched = WriteFAdd in {
19211920 Sched<[WriteCvtF2FLd, ReadAfterLd]>;
19221921 }
19231922 } // isCodeGenOnly = 1
1923
1924 // Patterns used for matching (v)cvtsi2ss, (v)cvtsi2sd, (v)cvtsd2ss and
1925 // (v)cvtss2sd intrinsic sequences from clang which produce unnecessary
1926 // vmovs{s,d} instructions
1927 let Predicates = [UseAVX] in {
1928 def : Pat<(v4f32 (X86Movss
1929 (v4f32 VR128:$dst),
1930 (v4f32 (scalar_to_vector
1931 (f32 (fpround (f64 (extractelt VR128:$src, (iPTR 0))))))))),
1932 (Int_VCVTSD2SSrr VR128:$dst, VR128:$src)>;
1933
1934 def : Pat<(v2f64 (X86Movsd
1935 (v2f64 VR128:$dst),
1936 (v2f64 (scalar_to_vector
1937 (f64 (fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))),
1938 (Int_VCVTSS2SDrr VR128:$dst, VR128:$src)>;
1939
1940 def : Pat<(v4f32 (X86Movss
1941 (v4f32 VR128:$dst),
1942 (v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))),
1943 (Int_VCVTSI2SS64rr VR128:$dst, GR64:$src)>;
1944
1945 def : Pat<(v4f32 (X86Movss
1946 (v4f32 VR128:$dst),
1947 (v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))),
1948 (Int_VCVTSI2SSrr VR128:$dst, GR32:$src)>;
1949
1950 def : Pat<(v2f64 (X86Movsd
1951 (v2f64 VR128:$dst),
1952 (v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))),
1953 (Int_VCVTSI2SD64rr VR128:$dst, GR64:$src)>;
1954
1955 def : Pat<(v2f64 (X86Movsd
1956 (v2f64 VR128:$dst),
1957 (v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))),
1958 (Int_VCVTSI2SDrr VR128:$dst, GR32:$src)>;
1959 } // Predicates = [UseAVX]
1960
1961 let Predicates = [UseSSE2] in {
1962 def : Pat<(v4f32 (X86Movss
1963 (v4f32 VR128:$dst),
1964 (v4f32 (scalar_to_vector
1965 (f32 (fpround (f64 (extractelt VR128:$src, (iPTR 0))))))))),
1966 (Int_CVTSD2SSrr VR128:$dst, VR128:$src)>;
1967
1968 def : Pat<(v2f64 (X86Movsd
1969 (v2f64 VR128:$dst),
1970 (v2f64 (scalar_to_vector
1971 (f64 (fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))),
1972 (Int_CVTSS2SDrr VR128:$dst, VR128:$src)>;
1973
1974 def : Pat<(v2f64 (X86Movsd
1975 (v2f64 VR128:$dst),
1976 (v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))),
1977 (Int_CVTSI2SD64rr VR128:$dst, GR64:$src)>;
1978
1979 def : Pat<(v2f64 (X86Movsd
1980 (v2f64 VR128:$dst),
1981 (v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))),
1982 (Int_CVTSI2SDrr VR128:$dst, GR32:$src)>;
1983 } // Predicates = [UseSSE2]
1984
1985 let Predicates = [UseSSE1] in {
1986 def : Pat<(v4f32 (X86Movss
1987 (v4f32 VR128:$dst),
1988 (v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))),
1989 (Int_CVTSI2SS64rr VR128:$dst, GR64:$src)>;
1990
1991 def : Pat<(v4f32 (X86Movss
1992 (v4f32 VR128:$dst),
1993 (v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))),
1994 (Int_CVTSI2SSrr VR128:$dst, GR32:$src)>;
1995 } // Predicates = [UseSSE1]
19241996
19251997 // Convert packed single/double fp to doubleword
19261998 def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
6161 ret <8 x float> %a
6262 }
6363
64 define <4 x float> @fptrunc01(<2 x double> %a0, <4 x float> %a1) nounwind {
65 ; CHECK-LABEL: fptrunc01:
66 ; CHECK: # BB#0:
67 ; CHECK-NEXT: vcvtsd2ss %xmm0, %xmm1, %xmm0
68 ; CHECK-NEXT: retq
69 %ext = extractelement <2 x double> %a0, i32 0
70 %cvt = fptrunc double %ext to float
71 %res = insertelement <4 x float> %a1, float %cvt, i32 0
72 ret <4 x float> %res
73 }
74
6475 define <4 x double> @fpext00(<4 x float> %b) nounwind {
6576 ; CHECK-LABEL: fpext00:
6677 ; CHECK: # BB#0:
6879 ; CHECK-NEXT: retq
6980 %a = fpext <4 x float> %b to <4 x double>
7081 ret <4 x double> %a
82 }
83
84 define <2 x double> @fpext01(<2 x double> %a0, <4 x float> %a1) nounwind {
85 ; CHECK-LABEL: fpext01:
86 ; CHECK: # BB#0:
87 ; CHECK-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0
88 ; CHECK-NEXT: retq
89 %ext = extractelement <4 x float> %a1, i32 0
90 %cvt = fpext float %ext to double
91 %res = insertelement <2 x double> %a0, double %cvt, i32 0
92 ret <2 x double> %res
7193 }
7294
7395 define double @funcA(i64* nocapture %e) nounwind uwtable readonly ssp {
447447 ret <4 x float> %c
448448 }
449449
450 define <4 x float> @fptrunc03(<2 x double> %a0, <4 x float> %a1) nounwind {
451 ; ALL-LABEL: fptrunc03:
452 ; ALL: ## BB#0:
453 ; ALL-NEXT: vcvtsd2ss %xmm0, %xmm1, %xmm0
454 ; ALL-NEXT: retq
455 %ext = extractelement <2 x double> %a0, i32 0
456 %cvt = fptrunc double %ext to float
457 %res = insertelement <4 x float> %a1, float %cvt, i32 0
458 ret <4 x float> %res
459 }
460
450461 define <8 x double> @fpext00(<8 x float> %b) nounwind {
451462 ; ALL-LABEL: fpext00:
452463 ; ALL: ## BB#0:
473484 %mask = fcmp ogt <4 x double>%a1, %b1
474485 %c = select <4 x i1>%mask, <4 x double>%a, <4 x double>zeroinitializer
475486 ret <4 x double> %c
487 }
488
489 define <2 x double> @fpext02(<2 x double> %a0, <4 x float> %a1) nounwind {
490 ; ALL-LABEL: fpext02:
491 ; ALL: ## BB#0:
492 ; ALL-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0
493 ; ALL-NEXT: retq
494 %ext = extractelement <4 x float> %a1, i32 0
495 %cvt = fpext float %ext to double
496 %res = insertelement <2 x double> %a0, double %cvt, i32 0
497 ret <2 x double> %res
476498 }
477499
478500 define double @funcA(i64* nocapture %e) {
12561256 define <2 x double> @test_mm_cvtsi32_sd(<2 x double> %a0, i32 %a1) nounwind {
12571257 ; X32-LABEL: test_mm_cvtsi32_sd:
12581258 ; X32: # BB#0:
1259 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1260 ; X32-NEXT: cvtsi2sdl %eax, %xmm1
1261 ; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1259 ; X32-NEXT: cvtsi2sdl {{[0-9]+}}(%esp), %xmm0
12621260 ; X32-NEXT: retl
12631261 ;
12641262 ; X64-LABEL: test_mm_cvtsi32_sd:
12651263 ; X64: # BB#0:
1266 ; X64-NEXT: cvtsi2sdl %edi, %xmm1
1267 ; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1264 ; X64-NEXT: cvtsi2sdl %edi, %xmm0
12681265 ; X64-NEXT: retq
12691266 %cvt = sitofp i32 %a1 to double
12701267 %res = insertelement <2 x double> %a0, double %cvt, i32 0
12921289 define <2 x double> @test_mm_cvtss_sd(<2 x double> %a0, <4 x float> %a1) nounwind {
12931290 ; X32-LABEL: test_mm_cvtss_sd:
12941291 ; X32: # BB#0:
1295 ; X32-NEXT: cvtss2sd %xmm1, %xmm1
1296 ; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1292 ; X32-NEXT: cvtss2sd %xmm1, %xmm0
12971293 ; X32-NEXT: retl
12981294 ;
12991295 ; X64-LABEL: test_mm_cvtss_sd:
13001296 ; X64: # BB#0:
1301 ; X64-NEXT: cvtss2sd %xmm1, %xmm1
1302 ; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1297 ; X64-NEXT: cvtss2sd %xmm1, %xmm0
13031298 ; X64-NEXT: retq
13041299 %ext = extractelement <4 x float> %a1, i32 0
13051300 %cvt = fpext float %ext to double
48174817 store <8 x float> %4, <8 x float>* %3, align 32
48184818 ret void
48194819 }
4820
4821 define <2 x double> @sitofp_i32_to_2f64(<2 x double> %a0, i32 %a1) nounwind {
4822 ; SSE-LABEL: sitofp_i32_to_2f64:
4823 ; SSE: # BB#0:
4824 ; SSE-NEXT: cvtsi2sdl %edi, %xmm0
4825 ; SSE-NEXT: retq
4826 ;
4827 ; AVX-LABEL: sitofp_i32_to_2f64:
4828 ; AVX: # BB#0:
4829 ; AVX-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0
4830 ; AVX-NEXT: retq
4831 %cvt = sitofp i32 %a1 to double
4832 %res = insertelement <2 x double> %a0, double %cvt, i32 0
4833 ret <2 x double> %res
4834 }
4835
4836 define <4 x float> @sitofp_i32_to_4f32(<4 x float> %a0, i32 %a1) nounwind {
4837 ; SSE-LABEL: sitofp_i32_to_4f32:
4838 ; SSE: # BB#0:
4839 ; SSE-NEXT: cvtsi2ssl %edi, %xmm0
4840 ; SSE-NEXT: retq
4841 ;
4842 ; AVX-LABEL: sitofp_i32_to_4f32:
4843 ; AVX: # BB#0:
4844 ; AVX-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0
4845 ; AVX-NEXT: retq
4846 %cvt = sitofp i32 %a1 to float
4847 %res = insertelement <4 x float> %a0, float %cvt, i32 0
4848 ret <4 x float> %res
4849 }
4850
4851 define <2 x double> @sitofp_i64_to_2f64(<2 x double> %a0, i64 %a1) nounwind {
4852 ; SSE-LABEL: sitofp_i64_to_2f64:
4853 ; SSE: # BB#0:
4854 ; SSE-NEXT: cvtsi2sdq %rdi, %xmm0
4855 ; SSE-NEXT: retq
4856 ;
4857 ; AVX-LABEL: sitofp_i64_to_2f64:
4858 ; AVX: # BB#0:
4859 ; AVX-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0
4860 ; AVX-NEXT: retq
4861 %cvt = sitofp i64 %a1 to double
4862 %res = insertelement <2 x double> %a0, double %cvt, i32 0
4863 ret <2 x double> %res
4864 }
4865
4866 define <4 x float> @sitofp_i64_to_4f32(<4 x float> %a0, i64 %a1) nounwind {
4867 ; SSE-LABEL: sitofp_i64_to_4f32:
4868 ; SSE: # BB#0:
4869 ; SSE-NEXT: cvtsi2ssq %rdi, %xmm0
4870 ; SSE-NEXT: retq
4871 ;
4872 ; AVX-LABEL: sitofp_i64_to_4f32:
4873 ; AVX: # BB#0:
4874 ; AVX-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0
4875 ; AVX-NEXT: retq
4876 %cvt = sitofp i64 %a1 to float
4877 %res = insertelement <4 x float> %a0, float %cvt, i32 0
4878 ret <4 x float> %res
4879 }