llvm.org GIT mirror llvm / 14b4c03
Add more fused mul+add/sub patterns. rdar://10139676 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@154484 91177308-0d34-0410-b5e6-96231b3b80d8 Evan Cheng 8 years ago
3 changed file(s) with 104 addition(s) and 20 deletion(s). Raw diff Collapse all Expand all
41324132 Requires<[HasVFP4,UseFusedMAC]>;
41334133
41344134 // Match @llvm.fma.* intrinsics
4135 def : Pat<(fma (v2f32 DPR:$src1), (v2f32 DPR:$Vn), (v2f32 DPR:$Vm)),
4135 def : Pat<(v2f32 (fma DPR:$src1, DPR:$Vn, DPR:$Vm)),
41364136 (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
41374137 Requires<[HasVFP4]>;
4138 def : Pat<(fma (v4f32 QPR:$src1), (v4f32 QPR:$Vn), (v4f32 QPR:$Vm)),
4138 def : Pat<(v4f32 (fma QPR:$src1, QPR:$Vn, QPR:$Vm)),
41394139 (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
41404140 Requires<[HasVFP4]>;
4141 def : Pat<(v2f32 (fma (fneg DPR:$src1), DPR:$Vn, DPR:$Vm)),
4142 (VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
4143 Requires<[HasVFP4]>;
4144 def : Pat<(v4f32 (fma (fneg QPR:$src1), QPR:$Vn, QPR:$Vm)),
4145 (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
4146 Requires<[HasVFP4]>;
41414147
41424148 // Vector Subtract Operations.
41434149
10801080 Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
10811081
10821082 // Match @llvm.fma.* intrinsics
1083 def : Pat<(fma (f64 DPR:$Ddin), (f64 DPR:$Dn), (f64 DPR:$Dm)),
1083 def : Pat<(f64 (fma DPR:$Ddin, DPR:$Dn, DPR:$Dm)),
10841084 (VFMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
10851085 Requires<[HasVFP4]>;
1086 def : Pat<(fma (f32 SPR:$Sdin), (f32 SPR:$Sn), (f32 SPR:$Sm)),
1086 def : Pat<(f32 (fma SPR:$Sdin, SPR:$Sn, SPR:$Sm)),
10871087 (VFMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
10881088 Requires<[HasVFP4]>;
10891089
11131113 (VFMSS SPR:$dstin, SPR:$a, SPR:$b)>,
11141114 Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
11151115
1116 // Match @llvm.fma.* intrinsics
1117 // (fma (fneg x), y, z) -> (vfms x, y, z)
1118 def : Pat<(f64 (fma (fneg DPR:$Ddin), DPR:$Dn, DPR:$Dm)),
1119 (VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
1120 Requires<[HasVFP4]>;
1121 def : Pat<(f32 (fma (fneg SPR:$Sdin), SPR:$Sn, SPR:$Sm)),
1122 (VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
1123 Requires<[HasVFP4]>;
1124 // (fneg (fma x, (fneg y), z) -> (vfms x, y, z)
1125 def : Pat<(fneg (f64 (fma DPR:$Ddin, (fneg DPR:$Dn), DPR:$Dm))),
1126 (VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
1127 Requires<[HasVFP4]>;
1128 def : Pat<(fneg (f32 (fma SPR:$Sdin, (fneg SPR:$Sn), SPR:$Sm))),
1129 (VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
1130 Requires<[HasVFP4]>;
1131
11161132 def VFNMAD : ADbI<0b11101, 0b01, 1, 0,
11171133 (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
11181134 IIC_fpFMAC64, "vfnma", ".f64\t$Dd, $Dn, $Dm",
11401156 Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
11411157
11421158 // Match @llvm.fma.* intrinsics
1159 // (fneg (fma x, y, z)) -> (vfnma x, y, z)
11431160 def : Pat<(fneg (fma (f64 DPR:$Ddin), (f64 DPR:$Dn), (f64 DPR:$Dm))),
11441161 (VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
11451162 Requires<[HasVFP4]>;
11461163 def : Pat<(fneg (fma (f32 SPR:$Sdin), (f32 SPR:$Sn), (f32 SPR:$Sm))),
1164 (VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
1165 Requires<[HasVFP4]>;
1166 // (fma (fneg x), y, (fneg z)) -> (vfnma x, y, z)
1167 def : Pat<(f64 (fma (fneg DPR:$Ddin), DPR:$Dn, (fneg DPR:$Dm))),
1168 (VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
1169 Requires<[HasVFP4]>;
1170 def : Pat<(f32 (fma (fneg SPR:$Sdin), SPR:$Sn, (fneg SPR:$Sm))),
11471171 (VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
11481172 Requires<[HasVFP4]>;
11491173
11711195 def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
11721196 (VFNMSS SPR:$dstin, SPR:$a, SPR:$b)>,
11731197 Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
1198
1199 // Match @llvm.fma.* intrinsics
1200 // (fneg (fma (fneg x), y, z)) -> (vnfms x, y, z)
1201 def : Pat<(fneg (f64 (fma (fneg DPR:$Ddin), DPR:$Dn, DPR:$Dm))),
1202 (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
1203 Requires<[HasVFP4]>;
1204 def : Pat<(fneg (f32 (fma (fneg SPR:$Sdin), SPR:$Sn, SPR:$Sm))),
1205 (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
1206 Requires<[HasVFP4]>;
1207 // (fma x, (fneg y), z) -> (vnfms x, y, z)
1208 def : Pat<(f64 (fma DPR:$Ddin, (fneg DPR:$Dn), DPR:$Dm)),
1209 (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
1210 Requires<[HasVFP4]>;
1211 def : Pat<(f32 (fma SPR:$Sdin, (fneg SPR:$Sn), SPR:$Sm)),
1212 (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
1213 Requires<[HasVFP4]>;
11741214
11751215 //===----------------------------------------------------------------------===//
11761216 // FP Conditional moves.
102102 entry:
103103 ; CHECK: test_fma_f32
104104 ; CHECK: vfma.f32
105 %call = tail call float @llvm.fma.f32(float %a, float %b, float %c) nounwind readnone
106 ret float %call
105 %tmp1 = tail call float @llvm.fma.f32(float %a, float %b, float %c) nounwind readnone
106 ret float %tmp1
107107 }
108108
109109 define double @test_fma_f64(double %a, double %b, double %c) nounwind readnone ssp {
110110 entry:
111111 ; CHECK: test_fma_f64
112112 ; CHECK: vfma.f64
113 %call = tail call double @llvm.fma.f64(double %a, double %b, double %c) nounwind readnone
114 ret double %call
113 %tmp1 = tail call double @llvm.fma.f64(double %a, double %b, double %c) nounwind readnone
114 ret double %tmp1
115115 }
116116
117117 define <2 x float> @test_fma_v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind readnone ssp {
118118 entry:
119119 ; CHECK: test_fma_v2f32
120120 ; CHECK: vfma.f32
121 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind
122 ret <2 x float> %0
121 %tmp1 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind
122 ret <2 x float> %tmp1
123123 }
124124
125 define float @test_fnma_f32(float %a, float %b, float %c) nounwind readnone ssp {
125 define double @test_fms_f64(double %a, double %b, double %c) nounwind readnone ssp {
126126 entry:
127 ; CHECK: test_fnma_f32
128 ; CHECK: vfnma.f32
129 %call = tail call float @llvm.fma.f32(float %a, float %b, float %c) nounwind readnone
130 %tmp1 = fsub float -0.0, %call
131 %tmp2 = fsub float %tmp1, %c
132 ret float %tmp2
127 ; CHECK: test_fms_f64
128 ; CHECK: vfms.f64
129 %tmp1 = fsub double -0.0, %a
130 %tmp2 = tail call double @llvm.fma.f64(double %tmp1, double %b, double %c) nounwind readnone
131 ret double %tmp2
132 }
133
134 define double @test_fms_f64_2(double %a, double %b, double %c) nounwind readnone ssp {
135 entry:
136 ; CHECK: test_fms_f64_2
137 ; CHECK: vfms.f64
138 %tmp1 = fsub double -0.0, %b
139 %tmp2 = tail call double @llvm.fma.f64(double %a, double %tmp1, double %c) nounwind readnone
140 %tmp3 = fsub double -0.0, %tmp2
141 ret double %tmp3
142 }
143
144 define double @test_fnms_f64(double %a, double %b, double %c) nounwind readnone ssp {
145 entry:
146 ; CHECK: test_fnms_f64
147 ; CHECK: vfnms.f64
148 %tmp1 = fsub double -0.0, %a
149 %tmp2 = tail call double @llvm.fma.f64(double %tmp1, double %b, double %c) nounwind readnone
150 %tmp3 = fsub double -0.0, %tmp2
151 ret double %tmp3
152 }
153
154 define double @test_fnms_f64_2(double %a, double %b, double %c) nounwind readnone ssp {
155 entry:
156 ; CHECK: test_fnms_f64_2
157 ; CHECK: vfnms.f64
158 %tmp1 = fsub double -0.0, %b
159 %tmp2 = tail call double @llvm.fma.f64(double %a, double %tmp1, double %c) nounwind readnone
160 ret double %tmp2
133161 }
134162
135163 define double @test_fnma_f64(double %a, double %b, double %c) nounwind readnone ssp {
136164 entry:
137165 ; CHECK: test_fnma_f64
138166 ; CHECK: vfnma.f64
139 %call = tail call double @llvm.fma.f64(double %a, double %b, double %c) nounwind readnone
140 %tmp = fsub double -0.0, %call
141 ret double %tmp
167 %tmp1 = tail call double @llvm.fma.f64(double %a, double %b, double %c) nounwind readnone
168 %tmp2 = fsub double -0.0, %tmp1
169 ret double %tmp2
170 }
171
172 define double @test_fnma_f64_2(double %a, double %b, double %c) nounwind readnone ssp {
173 entry:
174 ; CHECK: test_fnma_f64_2
175 ; CHECK: vfnma.f64
176 %tmp1 = fsub double -0.0, %a
177 %tmp2 = fsub double -0.0, %c
178 %tmp3 = tail call double @llvm.fma.f64(double %tmp1, double %b, double %tmp2) nounwind readnone
179 ret double %tmp3
142180 }
143181
144182 declare float @llvm.fma.f32(float, float, float) nounwind readnone