llvm.org GIT mirror llvm / e9b5b0c
[RISCV] Support Bit-Preserving FP in F/D Extensions Summary: This allows some integer bitwise operations to instead be performed by hardware fp instructions. This is correct because the RISC-V spec requires the F and D extensions to use the IEEE-754 standard representation, and fp register loads and stores to be bit-preserving. This is tested against the soft-float ABI, but with hardware float extensions enabled, so that the tests also ensure the optimisation also fires in this case. Reviewers: asb, luismarques Reviewed By: asb Subscribers: hiraditya, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, kito-cheng, shiva0217, jrtc27, zzheng, edward-jones, rogfer01, MartinMosbeck, brucehoult, the_o, rkruppe, PkmX, jocewei, psnobl, benna, Jim, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D62900 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@362790 91177308-0d34-0410-b5e6-96231b3b80d8 Sam Elliott 4 months ago
3 changed file(s) with 397 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
299299
300300 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
301301 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
302 }
303
304 bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
305 return (VT == MVT::f32 && Subtarget.hasStdExtF()) ||
306 (VT == MVT::f64 && Subtarget.hasStdExtD());
302307 }
303308
304309 // Changes the condition code and swaps operands if necessary, so the SetCC
7070 bool isTruncateFree(EVT SrcVT, EVT DstVT) const override;
7171 bool isZExtFree(SDValue Val, EVT VT2) const override;
7272 bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override;
73
74 bool hasBitPreservingFPLogic(EVT VT) const override;
7375
7476 // Provide custom lowering hooks for some operations.
7577 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc -mtriple=riscv32 -mattr=+f -verify-machineinstrs < %s \
2 ; RUN: | FileCheck -check-prefix=RV32F %s
3 ; RUN: llc -mtriple=riscv32 -mattr=+f,+d -verify-machineinstrs < %s \
4 ; RUN: | FileCheck -check-prefix=RV32FD %s
5 ; RUN: llc -mtriple=riscv64 -mattr=+f -verify-machineinstrs < %s \
6 ; RUN: | FileCheck -check-prefix=RV64F %s
7 ; RUN: llc -mtriple=riscv64 -mattr=+f,+d -verify-machineinstrs < %s \
8 ; RUN: | FileCheck -check-prefix=RV64FD %s
9
10 ; These functions perform extra work to ensure that `%a3` starts in a
11 ; floating-point register, if the machine has them, and the result of
12 ; the bitwise operation is then needed in a floating-point register.
13 ; This should mean the optimisations will fire even if you're using the
14 ; soft-float ABI on a machine with hardware floating-point support.
15
16 define float @bitcast_and(float %a1, float %a2) nounwind {
17 ; RV32F-LABEL: bitcast_and:
18 ; RV32F: # %bb.0:
19 ; RV32F-NEXT: fmv.w.x ft0, a1
20 ; RV32F-NEXT: fmv.w.x ft1, a0
21 ; RV32F-NEXT: fadd.s ft0, ft1, ft0
22 ; RV32F-NEXT: fabs.s ft0, ft0
23 ; RV32F-NEXT: fadd.s ft0, ft1, ft0
24 ; RV32F-NEXT: fmv.x.w a0, ft0
25 ; RV32F-NEXT: ret
26 ;
27 ; RV32FD-LABEL: bitcast_and:
28 ; RV32FD: # %bb.0:
29 ; RV32FD-NEXT: fmv.w.x ft0, a1
30 ; RV32FD-NEXT: fmv.w.x ft1, a0
31 ; RV32FD-NEXT: fadd.s ft0, ft1, ft0
32 ; RV32FD-NEXT: fabs.s ft0, ft0
33 ; RV32FD-NEXT: fadd.s ft0, ft1, ft0
34 ; RV32FD-NEXT: fmv.x.w a0, ft0
35 ; RV32FD-NEXT: ret
36 ;
37 ; RV64F-LABEL: bitcast_and:
38 ; RV64F: # %bb.0:
39 ; RV64F-NEXT: fmv.w.x ft0, a1
40 ; RV64F-NEXT: fmv.w.x ft1, a0
41 ; RV64F-NEXT: fadd.s ft0, ft1, ft0
42 ; RV64F-NEXT: fabs.s ft0, ft0
43 ; RV64F-NEXT: fadd.s ft0, ft1, ft0
44 ; RV64F-NEXT: fmv.x.w a0, ft0
45 ; RV64F-NEXT: ret
46 ;
47 ; RV64FD-LABEL: bitcast_and:
48 ; RV64FD: # %bb.0:
49 ; RV64FD-NEXT: fmv.w.x ft0, a1
50 ; RV64FD-NEXT: fmv.w.x ft1, a0
51 ; RV64FD-NEXT: fadd.s ft0, ft1, ft0
52 ; RV64FD-NEXT: fabs.s ft0, ft0
53 ; RV64FD-NEXT: fadd.s ft0, ft1, ft0
54 ; RV64FD-NEXT: fmv.x.w a0, ft0
55 ; RV64FD-NEXT: ret
56 %a3 = fadd float %a1, %a2
57 %bc1 = bitcast float %a3 to i32
58 %and = and i32 %bc1, 2147483647
59 %bc2 = bitcast i32 %and to float
60 %a4 = fadd float %a1, %bc2
61 ret float %a4
62 }
63
64 define double @bitcast_double_and(double %a1, double %a2) nounwind {
65 ; RV32F-LABEL: bitcast_double_and:
66 ; RV32F: # %bb.0:
67 ; RV32F-NEXT: addi sp, sp, -16
68 ; RV32F-NEXT: sw ra, 12(sp)
69 ; RV32F-NEXT: sw s0, 8(sp)
70 ; RV32F-NEXT: sw s1, 4(sp)
71 ; RV32F-NEXT: mv s0, a1
72 ; RV32F-NEXT: mv s1, a0
73 ; RV32F-NEXT: call __adddf3
74 ; RV32F-NEXT: mv a2, a0
75 ; RV32F-NEXT: lui a0, 524288
76 ; RV32F-NEXT: addi a0, a0, -1
77 ; RV32F-NEXT: and a3, a1, a0
78 ; RV32F-NEXT: mv a0, s1
79 ; RV32F-NEXT: mv a1, s0
80 ; RV32F-NEXT: call __adddf3
81 ; RV32F-NEXT: lw s1, 4(sp)
82 ; RV32F-NEXT: lw s0, 8(sp)
83 ; RV32F-NEXT: lw ra, 12(sp)
84 ; RV32F-NEXT: addi sp, sp, 16
85 ; RV32F-NEXT: ret
86 ;
87 ; RV32FD-LABEL: bitcast_double_and:
88 ; RV32FD: # %bb.0:
89 ; RV32FD-NEXT: addi sp, sp, -16
90 ; RV32FD-NEXT: sw a2, 8(sp)
91 ; RV32FD-NEXT: sw a3, 12(sp)
92 ; RV32FD-NEXT: fld ft0, 8(sp)
93 ; RV32FD-NEXT: sw a0, 8(sp)
94 ; RV32FD-NEXT: sw a1, 12(sp)
95 ; RV32FD-NEXT: fld ft1, 8(sp)
96 ; RV32FD-NEXT: fadd.d ft0, ft1, ft0
97 ; RV32FD-NEXT: fabs.d ft0, ft0
98 ; RV32FD-NEXT: fadd.d ft0, ft1, ft0
99 ; RV32FD-NEXT: fsd ft0, 8(sp)
100 ; RV32FD-NEXT: lw a0, 8(sp)
101 ; RV32FD-NEXT: lw a1, 12(sp)
102 ; RV32FD-NEXT: addi sp, sp, 16
103 ; RV32FD-NEXT: ret
104 ;
105 ; RV64F-LABEL: bitcast_double_and:
106 ; RV64F: # %bb.0:
107 ; RV64F-NEXT: addi sp, sp, -16
108 ; RV64F-NEXT: sd ra, 8(sp)
109 ; RV64F-NEXT: sd s0, 0(sp)
110 ; RV64F-NEXT: mv s0, a0
111 ; RV64F-NEXT: call __adddf3
112 ; RV64F-NEXT: addi a1, zero, -1
113 ; RV64F-NEXT: slli a1, a1, 63
114 ; RV64F-NEXT: addi a1, a1, -1
115 ; RV64F-NEXT: and a1, a0, a1
116 ; RV64F-NEXT: mv a0, s0
117 ; RV64F-NEXT: call __adddf3
118 ; RV64F-NEXT: ld s0, 0(sp)
119 ; RV64F-NEXT: ld ra, 8(sp)
120 ; RV64F-NEXT: addi sp, sp, 16
121 ; RV64F-NEXT: ret
122 ;
123 ; RV64FD-LABEL: bitcast_double_and:
124 ; RV64FD: # %bb.0:
125 ; RV64FD-NEXT: fmv.d.x ft0, a1
126 ; RV64FD-NEXT: fmv.d.x ft1, a0
127 ; RV64FD-NEXT: fadd.d ft0, ft1, ft0
128 ; RV64FD-NEXT: fabs.d ft0, ft0
129 ; RV64FD-NEXT: fadd.d ft0, ft1, ft0
130 ; RV64FD-NEXT: fmv.x.d a0, ft0
131 ; RV64FD-NEXT: ret
132 %a3 = fadd double %a1, %a2
133 %bc1 = bitcast double %a3 to i64
134 %and = and i64 %bc1, 9223372036854775807
135 %bc2 = bitcast i64 %and to double
136 %a4 = fadd double %a1, %bc2
137 ret double %a4
138 }
139
140
141 define float @bitcast_xor(float %a1, float %a2) nounwind {
142 ; RV32F-LABEL: bitcast_xor:
143 ; RV32F: # %bb.0:
144 ; RV32F-NEXT: fmv.w.x ft0, a1
145 ; RV32F-NEXT: fmv.w.x ft1, a0
146 ; RV32F-NEXT: fmul.s ft0, ft1, ft0
147 ; RV32F-NEXT: fneg.s ft0, ft0
148 ; RV32F-NEXT: fmul.s ft0, ft1, ft0
149 ; RV32F-NEXT: fmv.x.w a0, ft0
150 ; RV32F-NEXT: ret
151 ;
152 ; RV32FD-LABEL: bitcast_xor:
153 ; RV32FD: # %bb.0:
154 ; RV32FD-NEXT: fmv.w.x ft0, a1
155 ; RV32FD-NEXT: fmv.w.x ft1, a0
156 ; RV32FD-NEXT: fmul.s ft0, ft1, ft0
157 ; RV32FD-NEXT: fneg.s ft0, ft0
158 ; RV32FD-NEXT: fmul.s ft0, ft1, ft0
159 ; RV32FD-NEXT: fmv.x.w a0, ft0
160 ; RV32FD-NEXT: ret
161 ;
162 ; RV64F-LABEL: bitcast_xor:
163 ; RV64F: # %bb.0:
164 ; RV64F-NEXT: fmv.w.x ft0, a1
165 ; RV64F-NEXT: fmv.w.x ft1, a0
166 ; RV64F-NEXT: fmul.s ft0, ft1, ft0
167 ; RV64F-NEXT: fneg.s ft0, ft0
168 ; RV64F-NEXT: fmul.s ft0, ft1, ft0
169 ; RV64F-NEXT: fmv.x.w a0, ft0
170 ; RV64F-NEXT: ret
171 ;
172 ; RV64FD-LABEL: bitcast_xor:
173 ; RV64FD: # %bb.0:
174 ; RV64FD-NEXT: fmv.w.x ft0, a1
175 ; RV64FD-NEXT: fmv.w.x ft1, a0
176 ; RV64FD-NEXT: fmul.s ft0, ft1, ft0
177 ; RV64FD-NEXT: fneg.s ft0, ft0
178 ; RV64FD-NEXT: fmul.s ft0, ft1, ft0
179 ; RV64FD-NEXT: fmv.x.w a0, ft0
180 ; RV64FD-NEXT: ret
181 %a3 = fmul float %a1, %a2
182 %bc1 = bitcast float %a3 to i32
183 %and = xor i32 %bc1, 2147483648
184 %bc2 = bitcast i32 %and to float
185 %a4 = fmul float %a1, %bc2
186 ret float %a4
187 }
188
189 define double @bitcast_double_xor(double %a1, double %a2) nounwind {
190 ; RV32F-LABEL: bitcast_double_xor:
191 ; RV32F: # %bb.0:
192 ; RV32F-NEXT: addi sp, sp, -16
193 ; RV32F-NEXT: sw ra, 12(sp)
194 ; RV32F-NEXT: sw s0, 8(sp)
195 ; RV32F-NEXT: sw s1, 4(sp)
196 ; RV32F-NEXT: mv s0, a1
197 ; RV32F-NEXT: mv s1, a0
198 ; RV32F-NEXT: call __muldf3
199 ; RV32F-NEXT: mv a2, a0
200 ; RV32F-NEXT: lui a0, 524288
201 ; RV32F-NEXT: xor a3, a1, a0
202 ; RV32F-NEXT: mv a0, s1
203 ; RV32F-NEXT: mv a1, s0
204 ; RV32F-NEXT: call __muldf3
205 ; RV32F-NEXT: lw s1, 4(sp)
206 ; RV32F-NEXT: lw s0, 8(sp)
207 ; RV32F-NEXT: lw ra, 12(sp)
208 ; RV32F-NEXT: addi sp, sp, 16
209 ; RV32F-NEXT: ret
210 ;
211 ; RV32FD-LABEL: bitcast_double_xor:
212 ; RV32FD: # %bb.0:
213 ; RV32FD-NEXT: addi sp, sp, -16
214 ; RV32FD-NEXT: sw a2, 8(sp)
215 ; RV32FD-NEXT: sw a3, 12(sp)
216 ; RV32FD-NEXT: fld ft0, 8(sp)
217 ; RV32FD-NEXT: sw a0, 8(sp)
218 ; RV32FD-NEXT: sw a1, 12(sp)
219 ; RV32FD-NEXT: fld ft1, 8(sp)
220 ; RV32FD-NEXT: fmul.d ft0, ft1, ft0
221 ; RV32FD-NEXT: fneg.d ft0, ft0
222 ; RV32FD-NEXT: fmul.d ft0, ft1, ft0
223 ; RV32FD-NEXT: fsd ft0, 8(sp)
224 ; RV32FD-NEXT: lw a0, 8(sp)
225 ; RV32FD-NEXT: lw a1, 12(sp)
226 ; RV32FD-NEXT: addi sp, sp, 16
227 ; RV32FD-NEXT: ret
228 ;
229 ; RV64F-LABEL: bitcast_double_xor:
230 ; RV64F: # %bb.0:
231 ; RV64F-NEXT: addi sp, sp, -16
232 ; RV64F-NEXT: sd ra, 8(sp)
233 ; RV64F-NEXT: sd s0, 0(sp)
234 ; RV64F-NEXT: mv s0, a0
235 ; RV64F-NEXT: call __muldf3
236 ; RV64F-NEXT: addi a1, zero, -1
237 ; RV64F-NEXT: slli a1, a1, 63
238 ; RV64F-NEXT: xor a1, a0, a1
239 ; RV64F-NEXT: mv a0, s0
240 ; RV64F-NEXT: call __muldf3
241 ; RV64F-NEXT: ld s0, 0(sp)
242 ; RV64F-NEXT: ld ra, 8(sp)
243 ; RV64F-NEXT: addi sp, sp, 16
244 ; RV64F-NEXT: ret
245 ;
246 ; RV64FD-LABEL: bitcast_double_xor:
247 ; RV64FD: # %bb.0:
248 ; RV64FD-NEXT: fmv.d.x ft0, a1
249 ; RV64FD-NEXT: fmv.d.x ft1, a0
250 ; RV64FD-NEXT: fmul.d ft0, ft1, ft0
251 ; RV64FD-NEXT: fneg.d ft0, ft0
252 ; RV64FD-NEXT: fmul.d ft0, ft1, ft0
253 ; RV64FD-NEXT: fmv.x.d a0, ft0
254 ; RV64FD-NEXT: ret
255 %a3 = fmul double %a1, %a2
256 %bc1 = bitcast double %a3 to i64
257 %and = xor i64 %bc1, 9223372036854775808
258 %bc2 = bitcast i64 %and to double
259 %a4 = fmul double %a1, %bc2
260 ret double %a4
261 }
262
263 define float @bitcast_or(float %a1, float %a2) nounwind {
264 ; RV32F-LABEL: bitcast_or:
265 ; RV32F: # %bb.0:
266 ; RV32F-NEXT: fmv.w.x ft0, a1
267 ; RV32F-NEXT: fmv.w.x ft1, a0
268 ; RV32F-NEXT: fmul.s ft0, ft1, ft0
269 ; RV32F-NEXT: fabs.s ft0, ft0
270 ; RV32F-NEXT: fneg.s ft0, ft0
271 ; RV32F-NEXT: fmul.s ft0, ft1, ft0
272 ; RV32F-NEXT: fmv.x.w a0, ft0
273 ; RV32F-NEXT: ret
274 ;
275 ; RV32FD-LABEL: bitcast_or:
276 ; RV32FD: # %bb.0:
277 ; RV32FD-NEXT: fmv.w.x ft0, a1
278 ; RV32FD-NEXT: fmv.w.x ft1, a0
279 ; RV32FD-NEXT: fmul.s ft0, ft1, ft0
280 ; RV32FD-NEXT: fabs.s ft0, ft0
281 ; RV32FD-NEXT: fneg.s ft0, ft0
282 ; RV32FD-NEXT: fmul.s ft0, ft1, ft0
283 ; RV32FD-NEXT: fmv.x.w a0, ft0
284 ; RV32FD-NEXT: ret
285 ;
286 ; RV64F-LABEL: bitcast_or:
287 ; RV64F: # %bb.0:
288 ; RV64F-NEXT: fmv.w.x ft0, a1
289 ; RV64F-NEXT: fmv.w.x ft1, a0
290 ; RV64F-NEXT: fmul.s ft0, ft1, ft0
291 ; RV64F-NEXT: fabs.s ft0, ft0
292 ; RV64F-NEXT: fneg.s ft0, ft0
293 ; RV64F-NEXT: fmul.s ft0, ft1, ft0
294 ; RV64F-NEXT: fmv.x.w a0, ft0
295 ; RV64F-NEXT: ret
296 ;
297 ; RV64FD-LABEL: bitcast_or:
298 ; RV64FD: # %bb.0:
299 ; RV64FD-NEXT: fmv.w.x ft0, a1
300 ; RV64FD-NEXT: fmv.w.x ft1, a0
301 ; RV64FD-NEXT: fmul.s ft0, ft1, ft0
302 ; RV64FD-NEXT: fabs.s ft0, ft0
303 ; RV64FD-NEXT: fneg.s ft0, ft0
304 ; RV64FD-NEXT: fmul.s ft0, ft1, ft0
305 ; RV64FD-NEXT: fmv.x.w a0, ft0
306 ; RV64FD-NEXT: ret
307 %a3 = fmul float %a1, %a2
308 %bc1 = bitcast float %a3 to i32
309 %and = or i32 %bc1, 2147483648
310 %bc2 = bitcast i32 %and to float
311 %a4 = fmul float %a1, %bc2
312 ret float %a4
313 }
314
315 define double @bitcast_double_or(double %a1, double %a2) nounwind {
316 ; RV32F-LABEL: bitcast_double_or:
317 ; RV32F: # %bb.0:
318 ; RV32F-NEXT: addi sp, sp, -16
319 ; RV32F-NEXT: sw ra, 12(sp)
320 ; RV32F-NEXT: sw s0, 8(sp)
321 ; RV32F-NEXT: sw s1, 4(sp)
322 ; RV32F-NEXT: mv s0, a1
323 ; RV32F-NEXT: mv s1, a0
324 ; RV32F-NEXT: call __muldf3
325 ; RV32F-NEXT: mv a2, a0
326 ; RV32F-NEXT: lui a0, 524288
327 ; RV32F-NEXT: or a3, a1, a0
328 ; RV32F-NEXT: mv a0, s1
329 ; RV32F-NEXT: mv a1, s0
330 ; RV32F-NEXT: call __muldf3
331 ; RV32F-NEXT: lw s1, 4(sp)
332 ; RV32F-NEXT: lw s0, 8(sp)
333 ; RV32F-NEXT: lw ra, 12(sp)
334 ; RV32F-NEXT: addi sp, sp, 16
335 ; RV32F-NEXT: ret
336 ;
337 ; RV32FD-LABEL: bitcast_double_or:
338 ; RV32FD: # %bb.0:
339 ; RV32FD-NEXT: addi sp, sp, -16
340 ; RV32FD-NEXT: sw a2, 8(sp)
341 ; RV32FD-NEXT: sw a3, 12(sp)
342 ; RV32FD-NEXT: fld ft0, 8(sp)
343 ; RV32FD-NEXT: sw a0, 8(sp)
344 ; RV32FD-NEXT: sw a1, 12(sp)
345 ; RV32FD-NEXT: fld ft1, 8(sp)
346 ; RV32FD-NEXT: fmul.d ft0, ft1, ft0
347 ; RV32FD-NEXT: fabs.d ft0, ft0
348 ; RV32FD-NEXT: fneg.d ft0, ft0
349 ; RV32FD-NEXT: fmul.d ft0, ft1, ft0
350 ; RV32FD-NEXT: fsd ft0, 8(sp)
351 ; RV32FD-NEXT: lw a0, 8(sp)
352 ; RV32FD-NEXT: lw a1, 12(sp)
353 ; RV32FD-NEXT: addi sp, sp, 16
354 ; RV32FD-NEXT: ret
355 ;
356 ; RV64F-LABEL: bitcast_double_or:
357 ; RV64F: # %bb.0:
358 ; RV64F-NEXT: addi sp, sp, -16
359 ; RV64F-NEXT: sd ra, 8(sp)
360 ; RV64F-NEXT: sd s0, 0(sp)
361 ; RV64F-NEXT: mv s0, a0
362 ; RV64F-NEXT: call __muldf3
363 ; RV64F-NEXT: addi a1, zero, -1
364 ; RV64F-NEXT: slli a1, a1, 63
365 ; RV64F-NEXT: or a1, a0, a1
366 ; RV64F-NEXT: mv a0, s0
367 ; RV64F-NEXT: call __muldf3
368 ; RV64F-NEXT: ld s0, 0(sp)
369 ; RV64F-NEXT: ld ra, 8(sp)
370 ; RV64F-NEXT: addi sp, sp, 16
371 ; RV64F-NEXT: ret
372 ;
373 ; RV64FD-LABEL: bitcast_double_or:
374 ; RV64FD: # %bb.0:
375 ; RV64FD-NEXT: fmv.d.x ft0, a1
376 ; RV64FD-NEXT: fmv.d.x ft1, a0
377 ; RV64FD-NEXT: fmul.d ft0, ft1, ft0
378 ; RV64FD-NEXT: fabs.d ft0, ft0
379 ; RV64FD-NEXT: fneg.d ft0, ft0
380 ; RV64FD-NEXT: fmul.d ft0, ft1, ft0
381 ; RV64FD-NEXT: fmv.x.d a0, ft0
382 ; RV64FD-NEXT: ret
383 %a3 = fmul double %a1, %a2
384 %bc1 = bitcast double %a3 to i64
385 %and = or i64 %bc1, 9223372036854775808
386 %bc2 = bitcast i64 %and to double
387 %a4 = fmul double %a1, %bc2
388 ret double %a4
389 }