llvm.org GIT mirror llvm / 44c2d61
Add support for FP_ROUND from v2f64 to v2f32 - Due to the current matching vector elements constraints in ISD::FP_ROUND, rounding from v2f64 to v4f32 (after legalization from v2f32) is scalarized. Add a customized v2f32 widening to convert it into a target-specific X86ISD::VFPROUND to work around this constraints. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@165631 91177308-0d34-0410-b5e6-96231b3b80d8 Michael Liao 7 years ago
6 changed file(s) with 121 addition(s) and 15 deletion(s). Raw diff Collapse all Expand all
939939 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
940940
941941 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
942 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
942943
943944 setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, Legal);
944945 }
1146711468 }
1146811469 return;
1146911470 }
11471 case ISD::FP_ROUND: {
11472 SDValue V = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, N->getOperand(0));
11473 Results.push_back(V);
11474 return;
11475 }
1147011476 case ISD::READCYCLECOUNTER: {
1147111477 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
1147211478 SDValue TheChain = N->getOperand(0);
1166111667 case X86ISD::VSEXT_MOVL: return "X86ISD::VSEXT_MOVL";
1166211668 case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
1166311669 case X86ISD::VFPEXT: return "X86ISD::VFPEXT";
11670 case X86ISD::VFPROUND: return "X86ISD::VFPROUND";
1166411671 case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ";
1166511672 case X86ISD::VSRLDQ: return "X86ISD::VSRLDQ";
1166611673 case X86ISD::VSHL: return "X86ISD::VSHL";
231231
232232 // VFPEXT - Vector FP extend.
233233 VFPEXT,
234
235 // VFPROUND - Vector FP round.
236 VFPROUND,
234237
235238 // VSHL, VSRL - 128-bit vector logical left / right shift
236239 VSHLDQ, VSRLDQ,
9292 def X86vfpext : SDNode<"X86ISD::VFPEXT",
9393 SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
9494 SDTCisFP<0>, SDTCisFP<1>]>>;
95 def X86vfpround: SDNode<"X86ISD::VFPROUND",
96 SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
97 SDTCisFP<0>, SDTCisFP<1>]>>;
9598
9699 def X86vshldq : SDNode<"X86ISD::VSHLDQ", SDTIntShiftOp>;
97100 def X86vshrdq : SDNode<"X86ISD::VSRLDQ", SDTIntShiftOp>;
21242124 (VCVTDQ2PSYrm addr:$src)>;
21252125
21262126 // Match fround and fextend for 128/256-bit conversions
2127 def : Pat<(v4f32 (X86vfpround (v2f64 VR128:$src))),
2128 (VCVTPD2PSrr VR128:$src)>;
2129 def : Pat<(v4f32 (X86vfpround (memopv2f64 addr:$src))),
2130 (VCVTPD2PSXrm addr:$src)>;
21272131 def : Pat<(v4f32 (fround (v4f64 VR256:$src))),
21282132 (VCVTPD2PSYrr VR256:$src)>;
21292133 def : Pat<(v4f32 (fround (loadv4f64 addr:$src))),
21382142 }
21392143
21402144 let Predicates = [UseSSE2] in {
2141 // Match fextend for 128 conversions
2145 // Match fround and fextend for 128 conversions
2146 def : Pat<(v4f32 (X86vfpround (v2f64 VR128:$src))),
2147 (CVTPD2PSrr VR128:$src)>;
2148 def : Pat<(v4f32 (X86vfpround (memopv2f64 addr:$src))),
2149 (CVTPD2PSrm addr:$src)>;
2150
21422151 def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))),
21432152 (CVTPS2PDrr VR128:$src)>;
21442153 }
0 ; RUN: llc < %s -march=x86 -mattr=+sse2,-avx | FileCheck %s
1 ; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck %s --check-prefix=AVX
2
3 define <1 x float> @test1(<1 x double>* %p) nounwind {
4 ; CHECK: test1
5 ; CHECK: cvtsd2ss
6 ; CHECK: ret
7 ; AVX: test1
8 ; AVX: vcvtsd2ss
9 ; AVX: ret
10 %x = load <1 x double>* %p
11 %y = fptrunc <1 x double> %x to <1 x float>
12 ret <1 x float> %y
13 }
14
15 define <2 x float> @test2(<2 x double>* %p) nounwind {
16 ; CHECK: test2
17 ; CHECK: cvtpd2ps {{[0-9]*}}(%{{.*}})
18 ; CHECK: ret
19 ; AVX: test2
20 ; AVX: vcvtpd2psx {{[0-9]*}}(%{{.*}})
21 ; AVX: ret
22 %x = load <2 x double>* %p
23 %y = fptrunc <2 x double> %x to <2 x float>
24 ret <2 x float> %y
25 }
26
27 define <4 x float> @test3(<4 x double>* %p) nounwind {
28 ; CHECK: test3
29 ; CHECK: cvtpd2ps {{[0-9]*}}(%{{.*}})
30 ; CHECK: cvtpd2ps {{[0-9]*}}(%{{.*}})
31 ; CHECK: movlhps
32 ; CHECK: ret
33 ; AVX: test3
34 ; AVX: vcvtpd2psy {{[0-9]*}}(%{{.*}})
35 ; AVX: ret
36 %x = load <4 x double>* %p
37 %y = fptrunc <4 x double> %x to <4 x float>
38 ret <4 x float> %y
39 }
40
41 define <8 x float> @test4(<8 x double>* %p) nounwind {
42 ; CHECK: test4
43 ; CHECK: cvtpd2ps {{[0-9]*}}(%{{.*}})
44 ; CHECK: cvtpd2ps {{[0-9]*}}(%{{.*}})
45 ; CHECK: movlhps
46 ; CHECK: cvtpd2ps {{[0-9]*}}(%{{.*}})
47 ; CHECK: cvtpd2ps {{[0-9]*}}(%{{.*}})
48 ; CHECK: movlhps
49 ; CHECK: ret
50 ; AVX: test4
51 ; AVX: vcvtpd2psy {{[0-9]*}}(%{{.*}})
52 ; AVX: vcvtpd2psy {{[0-9]*}}(%{{.*}})
53 ; AVX: vinsertf128
54 ; AVX: ret
55 %x = load <8 x double>* %p
56 %y = fptrunc <8 x double> %x to <8 x float>
57 ret <8 x float> %y
58 }
59
60
0 ; RUN: llc < %s -march=x86 -mattr=+sse2,-avx | FileCheck %s
1 ; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck %s --check-prefix=AVX
12
23 define <1 x float> @test1(<1 x double> %x) nounwind {
4 ; CHECK: test1
35 ; CHECK: cvtsd2ss
46 ; CHECK: ret
7 ; AVX: test1
8 ; AVX: vcvtsd2ss
9 ; AVX: ret
510 %y = fptrunc <1 x double> %x to <1 x float>
611 ret <1 x float> %y
712 }
813
9
1014 define <2 x float> @test2(<2 x double> %x) nounwind {
11 ; FIXME: It would be nice if this compiled down to a cvtpd2ps
12 ; CHECK: cvtsd2ss
13 ; CHECK: cvtsd2ss
15 ; CHECK: test2
16 ; CHECK: cvtpd2ps
1417 ; CHECK: ret
18 ; AVX: test2
19 ; AVX-NOT: vcvtpd2psy
20 ; AVX: vcvtpd2ps
21 ; AVX: ret
1522 %y = fptrunc <2 x double> %x to <2 x float>
1623 ret <2 x float> %y
1724 }
1825
19 define <8 x float> @test3(<8 x double> %x) nounwind {
20 ; FIXME: It would be nice if this compiled down to a series of cvtpd2ps
21 ; CHECK: cvtsd2ss
22 ; CHECK: cvtsd2ss
23 ; CHECK: cvtsd2ss
24 ; CHECK: cvtsd2ss
25 ; CHECK: cvtsd2ss
26 ; CHECK: cvtsd2ss
27 ; CHECK: cvtsd2ss
28 ; CHECK: cvtsd2ss
26 define <4 x float> @test3(<4 x double> %x) nounwind {
27 ; CHECK: test3
28 ; CHECK: cvtpd2ps
29 ; CHECK: cvtpd2ps
30 ; CHECK: movlhps
2931 ; CHECK: ret
32 ; AVX: test3
33 ; AVX: vcvtpd2psy
34 ; AVX: ret
35 %y = fptrunc <4 x double> %x to <4 x float>
36 ret <4 x float> %y
37 }
38
39 define <8 x float> @test4(<8 x double> %x) nounwind {
40 ; CHECK: test4
41 ; CHECK: cvtpd2ps
42 ; CHECK: cvtpd2ps
43 ; CHECK: movlhps
44 ; CHECK: cvtpd2ps
45 ; CHECK: cvtpd2ps
46 ; CHECK: movlhps
47 ; CHECK: ret
48 ; AVX: test4
49 ; AVX: vcvtpd2psy
50 ; AVX: vcvtpd2psy
51 ; AVX: vinsertf128
52 ; AVX: ret
3053 %y = fptrunc <8 x double> %x to <8 x float>
3154 ret <8 x float> %y
3255 }