llvm.org GIT mirror llvm / 257e85e
R600: Custom lower frem git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@217553 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 6 years ago
3 changed file(s) with 123 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
128128 setOperationAction(ISD::FRINT, MVT::f32, Legal);
129129 setOperationAction(ISD::FROUND, MVT::f32, Legal);
130130 setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
131
132 setOperationAction(ISD::FREM, MVT::f32, Custom);
133 setOperationAction(ISD::FREM, MVT::f64, Custom);
131134
132135 // Lower floating point store/load to integer store/load to reduce the number
133136 // of patterns in tablegen.
346349 setOperationAction(ISD::FDIV, VT, Expand);
347350 setOperationAction(ISD::FEXP2, VT, Expand);
348351 setOperationAction(ISD::FLOG2, VT, Expand);
352 setOperationAction(ISD::FREM, VT, Expand);
349353 setOperationAction(ISD::FPOW, VT, Expand);
350354 setOperationAction(ISD::FFLOOR, VT, Expand);
351355 setOperationAction(ISD::FTRUNC, VT, Expand);
547551 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
548552 case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
549553 case ISD::SDIVREM: return LowerSDIVREM(Op, DAG);
554 case ISD::FREM: return LowerFREM(Op, DAG);
550555 case ISD::FCEIL: return LowerFCEIL(Op, DAG);
551556 case ISD::FTRUNC: return LowerFTRUNC(Op, DAG);
552557 case ISD::FRINT: return LowerFRINT(Op, DAG);
16471652 Rem
16481653 };
16491654 return DAG.getMergeValues(Res, DL);
1655 }
1656
1657 // (frem x, y) -> (fsub x, (fmul (ftrunc (fdiv x, y)), y))
1658 SDValue AMDGPUTargetLowering::LowerFREM(SDValue Op, SelectionDAG &DAG) const {
1659 SDLoc SL(Op);
1660 EVT VT = Op.getValueType();
1661 SDValue X = Op.getOperand(0);
1662 SDValue Y = Op.getOperand(1);
1663
1664 SDValue Div = DAG.getNode(ISD::FDIV, SL, VT, X, Y);
1665 SDValue Floor = DAG.getNode(ISD::FTRUNC, SL, VT, Div);
1666 SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, Floor, Y);
1667
1668 return DAG.getNode(ISD::FSUB, SL, VT, X, Mul);
16501669 }
16511670
16521671 SDValue AMDGPUTargetLowering::LowerFCEIL(SDValue Op, SelectionDAG &DAG) const {
4343 /// \returns The resulting chain.
4444
4545 SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
46 SDValue LowerFREM(SDValue Op, SelectionDAG &DAG) const;
4647 SDValue LowerFCEIL(SDValue Op, SelectionDAG &DAG) const;
4748 SDValue LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const;
4849 SDValue LowerFRINT(SDValue Op, SelectionDAG &DAG) const;
0 ; RUN: llc -march=r600 -mcpu=SI -enable-misched < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
1
2 ; FUNC-LABEL: @frem_f32:
3 ; SI-DAG: BUFFER_LOAD_DWORD [[X:v[0-9]+]], {{.*$}}
4 ; SI-DAG: BUFFER_LOAD_DWORD [[Y:v[0-9]+]], {{.*}} offset:0x10
5 ; SI-DAG: V_CMP
6 ; SI-DAG: V_MUL_F32
7 ; SI: V_RCP_F32_e32
8 ; SI: V_MUL_F32_e32
9 ; SI: V_MUL_F32_e32
10 ; SI: V_TRUNC_F32_e32
11 ; SI: V_MAD_F32
12 ; SI: S_ENDPGM
13 define void @frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
14 float addrspace(1)* %in2) #0 {
15 %gep2 = getelementptr float addrspace(1)* %in2, i32 4
16 %r0 = load float addrspace(1)* %in1, align 4
17 %r1 = load float addrspace(1)* %gep2, align 4
18 %r2 = frem float %r0, %r1
19 store float %r2, float addrspace(1)* %out, align 4
20 ret void
21 }
22
23 ; FUNC-LABEL: @unsafe_frem_f32:
24 ; SI: BUFFER_LOAD_DWORD [[Y:v[0-9]+]], {{.*}} offset:0x10
25 ; SI: BUFFER_LOAD_DWORD [[X:v[0-9]+]], {{.*}}
26 ; SI: V_RCP_F32_e32 [[INVY:v[0-9]+]], [[Y]]
27 ; SI: V_MUL_F32_e32 [[DIV:v[0-9]+]], [[INVY]], [[X]]
28 ; SI: V_TRUNC_F32_e32 [[TRUNC:v[0-9]+]], [[DIV]]
29 ; SI: V_MAD_F32 [[RESULT:v[0-9]+]], -[[TRUNC]], [[Y]], [[X]],
30 ; SI: BUFFER_STORE_DWORD [[RESULT]]
31 ; SI: S_ENDPGM
32 define void @unsafe_frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
33 float addrspace(1)* %in2) #1 {
34 %gep2 = getelementptr float addrspace(1)* %in2, i32 4
35 %r0 = load float addrspace(1)* %in1, align 4
36 %r1 = load float addrspace(1)* %gep2, align 4
37 %r2 = frem float %r0, %r1
38 store float %r2, float addrspace(1)* %out, align 4
39 ret void
40 }
41
42 ; TODO: This should check something when f64 fdiv is implemented
43 ; correctly
44
45 ; FUNC-LABEL: @frem_f64:
46 ; SI: S_ENDPGM
47 define void @frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
48 double addrspace(1)* %in2) #0 {
49 %r0 = load double addrspace(1)* %in1, align 8
50 %r1 = load double addrspace(1)* %in2, align 8
51 %r2 = frem double %r0, %r1
52 store double %r2, double addrspace(1)* %out, align 8
53 ret void
54 }
55
56 ; FUNC-LABEL: @unsafe_frem_f64:
57 ; SI: V_RCP_F64_e32
58 ; SI: V_MUL_F64
59 ; SI: V_BFE_I32
60 ; SI: V_FMA_F64
61 ; SI: S_ENDPGM
62 define void @unsafe_frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
63 double addrspace(1)* %in2) #1 {
64 %r0 = load double addrspace(1)* %in1, align 8
65 %r1 = load double addrspace(1)* %in2, align 8
66 %r2 = frem double %r0, %r1
67 store double %r2, double addrspace(1)* %out, align 8
68 ret void
69 }
70
71 define void @frem_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in1,
72 <2 x float> addrspace(1)* %in2) #0 {
73 %gep2 = getelementptr <2 x float> addrspace(1)* %in2, i32 4
74 %r0 = load <2 x float> addrspace(1)* %in1, align 8
75 %r1 = load <2 x float> addrspace(1)* %gep2, align 8
76 %r2 = frem <2 x float> %r0, %r1
77 store <2 x float> %r2, <2 x float> addrspace(1)* %out, align 8
78 ret void
79 }
80
81 define void @frem_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in1,
82 <4 x float> addrspace(1)* %in2) #0 {
83 %gep2 = getelementptr <4 x float> addrspace(1)* %in2, i32 4
84 %r0 = load <4 x float> addrspace(1)* %in1, align 16
85 %r1 = load <4 x float> addrspace(1)* %gep2, align 16
86 %r2 = frem <4 x float> %r0, %r1
87 store <4 x float> %r2, <4 x float> addrspace(1)* %out, align 16
88 ret void
89 }
90
91 define void @frem_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1,
92 <2 x double> addrspace(1)* %in2) #0 {
93 %gep2 = getelementptr <2 x double> addrspace(1)* %in2, i32 4
94 %r0 = load <2 x double> addrspace(1)* %in1, align 16
95 %r1 = load <2 x double> addrspace(1)* %gep2, align 16
96 %r2 = frem <2 x double> %r0, %r1
97 store <2 x double> %r2, <2 x double> addrspace(1)* %out, align 16
98 ret void
99 }
100
101 attributes #0 = { nounwind "unsafe-fp-math"="false" }
102 attributes #1 = { nounwind "unsafe-fp-math"="true" }