llvm.org GIT mirror llvm / 05bb7c5
AArch64: Better codegen for loading from __fp16. Loading will generally extend to an f32 or an 64, so make sure to match those patterns directly to load into the FPR16 register class directly rather than going through the integer GPRs. This also eliminates an extra step in the convert-to-f64 path which was first converting to f32 and then to f64 from there. rdar://17594379 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212573 91177308-0d34-0410-b5e6-96231b3b80d8 Jim Grosbach 6 years ago
2 changed file(s) with 163 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
22402240
22412241 def FCVTSHpseudo : Pseudo<(outs FPR32:$Rd), (ins FPR32:$Rn),
22422242 [(set (f32 FPR32:$Rd), (f16_to_f32 i32:$Rn))]>;
2243
2244 // When converting from f16 coming directly from a load, make sure we
2245 // load into the FPR16 registers rather than going through the GPRs.
2246 // f16->f32
2247 def : Pat<(f32 (f16_to_f32 (i32
2248 (zextloadi16 (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
2249 ro_Wextend16:$extend))))),
2250 (FCVTSHr (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend))>;
2251 def : Pat<(f32 (f16_to_f32 (i32
2252 (zextloadi16 (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
2253 ro_Xextend16:$extend))))),
2254 (FCVTSHr (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend))>;
2255 def : Pat <(f32 (f16_to_f32 (i32
2256 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
2257 (FCVTSHr (LDRHui GPR64sp:$Rn, uimm12s2:$offset))>;
2258 def : Pat <(f32 (f16_to_f32 (i32
2259 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))),
2260 (FCVTSHr (LDURHi GPR64sp:$Rn, simm9:$offset))>;
2261
2262 // f16->f64
2263 def : Pat<(f64 (fextend (f32 (f16_to_f32 (i32
2264 (zextloadi16 (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
2265 ro_Wextend16:$extend))))))),
2266 (FCVTDHr (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend))>;
2267 def : Pat<(f64 (fextend (f32 (f16_to_f32 (i32
2268 (zextloadi16 (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
2269 ro_Xextend16:$extend))))))),
2270 (FCVTDHr (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend))>;
2271 def : Pat <(f64 (fextend (f32 (f16_to_f32 (i32
2272 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))))),
2273 (FCVTDHr (LDRHui GPR64sp:$Rn, uimm12s2:$offset))>;
2274 def : Pat <(f64 (fextend (f32 (f16_to_f32 (i32
2275 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))))),
2276 (FCVTDHr (LDURHi GPR64sp:$Rn, simm9:$offset))>;
2277
22432278
22442279 //===----------------------------------------------------------------------===//
22452280 // Floating point single operand instructions.
0 ; RUN: llc < %s -mtriple=arm64-apple-ios -asm-verbose=false | FileCheck %s
1
2 define float @load0(i16* nocapture readonly %a) nounwind {
3 ; CHECK-LABEL: load0:
4 ; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0]
5 ; CHECK-NEXT: fcvt s0, [[HREG]]
6 ; CHECK-NEXT: ret
7
8 %tmp = load i16* %a, align 2
9 %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp)
10 ret float %tmp1
11 }
12
13 define double @load1(i16* nocapture readonly %a) nounwind {
14 ; CHECK-LABEL: load1:
15 ; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0]
16 ; CHECK-NEXT: fcvt d0, [[HREG]]
17 ; CHECK-NEXT: ret
18
19 %tmp = load i16* %a, align 2
20 %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp)
21 %conv = fpext float %tmp1 to double
22 ret double %conv
23 }
24
25 define float @load2(i16* nocapture readonly %a, i32 %i) nounwind {
26 ; CHECK-LABEL: load2:
27 ; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0, w1, sxtw #1]
28 ; CHECK-NEXT: fcvt s0, [[HREG]]
29 ; CHECK-NEXT: ret
30
31 %idxprom = sext i32 %i to i64
32 %arrayidx = getelementptr inbounds i16* %a, i64 %idxprom
33 %tmp = load i16* %arrayidx, align 2
34 %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp)
35 ret float %tmp1
36 }
37
38 define double @load3(i16* nocapture readonly %a, i32 %i) nounwind {
39 ; CHECK-LABEL: load3:
40 ; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0, w1, sxtw #1]
41 ; CHECK-NEXT: fcvt d0, [[HREG]]
42 ; CHECK-NEXT: ret
43
44 %idxprom = sext i32 %i to i64
45 %arrayidx = getelementptr inbounds i16* %a, i64 %idxprom
46 %tmp = load i16* %arrayidx, align 2
47 %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp)
48 %conv = fpext float %tmp1 to double
49 ret double %conv
50 }
51
52 define float @load4(i16* nocapture readonly %a, i64 %i) nounwind {
53 ; CHECK-LABEL: load4:
54 ; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0, x1, lsl #1]
55 ; CHECK-NEXT: fcvt s0, [[HREG]]
56 ; CHECK-NEXT: ret
57
58 %arrayidx = getelementptr inbounds i16* %a, i64 %i
59 %tmp = load i16* %arrayidx, align 2
60 %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp)
61 ret float %tmp1
62 }
63
64 define double @load5(i16* nocapture readonly %a, i64 %i) nounwind {
65 ; CHECK-LABEL: load5:
66 ; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0, x1, lsl #1]
67 ; CHECK-NEXT: fcvt d0, [[HREG]]
68 ; CHECK-NEXT: ret
69
70 %arrayidx = getelementptr inbounds i16* %a, i64 %i
71 %tmp = load i16* %arrayidx, align 2
72 %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp)
73 %conv = fpext float %tmp1 to double
74 ret double %conv
75 }
76
77 define float @load6(i16* nocapture readonly %a) nounwind {
78 ; CHECK-LABEL: load6:
79 ; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0, #20]
80 ; CHECK-NEXT: fcvt s0, [[HREG]]
81 ; CHECK-NEXT: ret
82
83 %arrayidx = getelementptr inbounds i16* %a, i64 10
84 %tmp = load i16* %arrayidx, align 2
85 %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp)
86 ret float %tmp1
87 }
88
89 define double @load7(i16* nocapture readonly %a) nounwind {
90 ; CHECK-LABEL: load7:
91 ; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0, #20]
92 ; CHECK-NEXT: fcvt d0, [[HREG]]
93 ; CHECK-NEXT: ret
94
95 %arrayidx = getelementptr inbounds i16* %a, i64 10
96 %tmp = load i16* %arrayidx, align 2
97 %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp)
98 %conv = fpext float %tmp1 to double
99 ret double %conv
100 }
101
102 define float @load8(i16* nocapture readonly %a) nounwind {
103 ; CHECK-LABEL: load8:
104 ; CHECK-NEXT: ldur [[HREG:h[0-9]+]], [x0, #-20]
105 ; CHECK-NEXT: fcvt s0, [[HREG]]
106 ; CHECK-NEXT: ret
107
108 %arrayidx = getelementptr inbounds i16* %a, i64 -10
109 %tmp = load i16* %arrayidx, align 2
110 %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp)
111 ret float %tmp1
112 }
113
114 define double @load9(i16* nocapture readonly %a) nounwind {
115 ; CHECK-LABEL: load9:
116 ; CHECK-NEXT: ldur [[HREG:h[0-9]+]], [x0, #-20]
117 ; CHECK-NEXT: fcvt d0, [[HREG]]
118 ; CHECK-NEXT: ret
119
120 %arrayidx = getelementptr inbounds i16* %a, i64 -10
121 %tmp = load i16* %arrayidx, align 2
122 %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp)
123 %conv = fpext float %tmp1 to double
124 ret double %conv
125 }
126
127 declare float @llvm.convert.from.fp16(i16) nounwind readnone