llvm.org GIT mirror llvm / b8150d8
Enhance PR11334 fix to support extload from v2f32/v4f32 - Fix an remaining issue of PR11674 as well git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@163528 91177308-0d34-0410-b5e6-96231b3b80d8 Michael Liao 8 years ago
4 changed file(s) with 43 addition(s) and 11 deletion(s). Raw diff Collapse all Expand all
931931
932932 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
933933 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
934
935 setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, Legal);
934936 }
935937
936938 if (Subtarget->hasSSE41()) {
10411043 setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
10421044 setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
10431045 setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal);
1046
1047 setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, Legal);
10441048
10451049 setOperationAction(ISD::SRL, MVT::v16i16, Custom);
10461050 setOperationAction(ISD::SRL, MVT::v32i8, Custom);
239239 def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>;
240240 def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>;
241241
242 // 128-/256-bit extload pattern fragments
243 def extloadv2f32 : PatFrag<(ops node:$ptr), (v2f64 (extloadvf32 node:$ptr))>;
244 def extloadv4f32 : PatFrag<(ops node:$ptr), (v4f64 (extloadvf32 node:$ptr))>;
245
242246 // Like 'store', but always requires 128-bit vector alignment.
243247 def alignedstore : PatFrag<(ops node:$val, node:$ptr),
244248 (store node:$val, node:$ptr), [{
20062006 "vcvtps2pd\t{$src, $dst|$dst, $src}",
20072007 [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
20082008 IIC_SSE_CVT_PD_RR>, TB, VEX;
2009 let neverHasSideEffects = 1, mayLoad = 1 in
20102009 def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
2011 "vcvtps2pd\t{$src, $dst|$dst, $src}", [],
2012 IIC_SSE_CVT_PD_RM>, TB, VEX;
2010 "vcvtps2pd\t{$src, $dst|$dst, $src}",
2011 [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))],
2012 IIC_SSE_CVT_PD_RM>, TB, VEX;
20132013 def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
20142014 "vcvtps2pd\t{$src, $dst|$dst, $src}",
20152015 [(set VR256:$dst,
20272027 "cvtps2pd\t{$src, $dst|$dst, $src}",
20282028 [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
20292029 IIC_SSE_CVT_PD_RR>, TB;
2030 let neverHasSideEffects = 1, mayLoad = 1 in
20312030 def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
2032 "cvtps2pd\t{$src, $dst|$dst, $src}", [],
2033 IIC_SSE_CVT_PD_RM>, TB;
2031 "cvtps2pd\t{$src, $dst|$dst, $src}",
2032 [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))],
2033 IIC_SSE_CVT_PD_RM>, TB;
20342034 }
20352035
20362036 // Convert Packed DW Integers to Packed Double FP
21332133 (VCVTPS2PDrr VR128:$src)>;
21342134 def : Pat<(v4f64 (fextend (v4f32 VR128:$src))),
21352135 (VCVTPS2PDYrr VR128:$src)>;
2136 def : Pat<(v4f64 (fextend (loadv4f32 addr:$src))),
2136 def : Pat<(v4f64 (extloadv4f32 addr:$src)),
21372137 (VCVTPS2PDYrm addr:$src)>;
21382138 }
21392139
0 ; RUN: llc < %s -march=x86 -mattr=+sse41,-avx | FileCheck %s
1 ; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck --check-prefix=AVX %s
12
23 ; PR11674
34 define void @fpext_frommem(<2 x float>* %in, <2 x double>* %out) {
45 entry:
5 ; TODO: We should be able to generate cvtps2pd for the load.
6 ; For now, just check that we generate something sane.
7 ; CHECK: cvtss2sd
8 ; CHECK: cvtss2sd
6 ; CHECK: cvtps2pd (%{{.+}}), %xmm{{[0-9]+}}
7 ; AVX: vcvtps2pd (%{{.+}}), %xmm{{[0-9]+}}
98 %0 = load <2 x float>* %in, align 8
109 %1 = fpext <2 x float> %0 to <2 x double>
1110 store <2 x double> %1, <2 x double>* %out, align 1
1211 ret void
1312 }
13
14 define void @fpext_frommem4(<4 x float>* %in, <4 x double>* %out) {
15 entry:
16 ; CHECK: cvtps2pd (%{{.+}}), %xmm{{[0-9]+}}
17 ; CHECK: cvtps2pd 8(%{{.+}}), %xmm{{[0-9]+}}
18 ; AVX: vcvtps2pd (%{{.+}}), %ymm{{[0-9]+}}
19 %0 = load <4 x float>* %in
20 %1 = fpext <4 x float> %0 to <4 x double>
21 store <4 x double> %1, <4 x double>* %out, align 1
22 ret void
23 }
24
25 define void @fpext_frommem8(<8 x float>* %in, <8 x double>* %out) {
26 entry:
27 ; CHECK: cvtps2pd (%{{.+}}), %xmm{{[0-9]+}}
28 ; CHECK: cvtps2pd 8(%{{.+}}), %xmm{{[0-9]+}}
29 ; CHECK: cvtps2pd 16(%{{.+}}), %xmm{{[0-9]+}}
30 ; CHECK: cvtps2pd 24(%{{.+}}), %xmm{{[0-9]+}}
31 ; AVX: vcvtps2pd (%{{.+}}), %ymm{{[0-9]+}}
32 ; AVX: vcvtps2pd 16(%{{.+}}), %ymm{{[0-9]+}}
33 %0 = load <8 x float>* %in
34 %1 = fpext <8 x float> %0 to <8 x double>
35 store <8 x double> %1, <8 x double>* %out, align 1
36 ret void
37 }