llvm.org GIT mirror llvm / a3edd6a
AArch64: Better codegen for storing to __fp16. Storing will generally be immediately preceded by rounding from an f32 or f64, so make sure to match those patterns directly to convert into the FPR16 register class directly rather than going through the integer GPRs. This also eliminates an extra step in the convert-from-f64 path which was first converting to f32 and then to f16 from there. rdar://17594379 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212638 91177308-0d34-0410-b5e6-96231b3b80d8 Jim Grosbach 6 years ago
2 changed file(s) with 166 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
22742274 def : Pat <(f64 (fextend (f32 (f16_to_f32 (i32
22752275 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))))),
22762276 (FCVTDHr (LDURHi GPR64sp:$Rn, simm9:$offset))>;
2277
2278 // When converting to f16 going directly to a store, make sure we use the
2279 // appropriate direct conversion instructions and store via the FPR16
2280 // registers rather than going through the GPRs.
2281 let AddedComplexity = 10 in {
2282 // f32->f16
2283 def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))),
2284 (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
2285 ro_Wextend16:$extend)),
2286 (STRHroW (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, GPR32:$Rm,
2287 ro_Wextend16:$extend)>;
2288 def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))),
2289 (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
2290 ro_Xextend16:$extend)),
2291 (STRHroX (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, GPR64:$Rm,
2292 ro_Xextend16:$extend)>;
2293 def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))),
2294 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
2295 (STRHui (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, uimm12s2:$offset)>;
2296 def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))),
2297 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
2298 (STURHi (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, simm9:$offset)>;
2299 // f64->f16
2300 def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))),
2301 (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
2302 ro_Wextend16:$extend)),
2303 (STRHroW (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, GPR32:$Rm,
2304 ro_Wextend16:$extend)>;
2305 def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))),
2306 (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
2307 ro_Xextend16:$extend)),
2308 (STRHroX (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, GPR64:$Rm,
2309 ro_Xextend16:$extend)>;
2310 def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))),
2311 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
2312 (STRHui (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, uimm12s2:$offset)>;
2313 def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))),
2314 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
2315 (STURHi (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, simm9:$offset)>;
2316 }
22772317
22782318
22792319 //===----------------------------------------------------------------------===//
124124 ret double %conv
125125 }
126126
127 define void @store0(i16* nocapture %a, float %val) nounwind {
128 ; CHECK-LABEL: store0:
129 ; CHECK-NEXT: fcvt h0, s0
130 ; CHECK-NEXT: str h0, [x0]
131 ; CHECK-NEXT: ret
132
133 %tmp = tail call i16 @llvm.convert.to.fp16(float %val)
134 store i16 %tmp, i16* %a, align 2
135 ret void
136 }
137
138 define void @store1(i16* nocapture %a, double %val) nounwind {
139 ; CHECK-LABEL: store1:
140 ; CHECK-NEXT: fcvt h0, d0
141 ; CHECK-NEXT: str h0, [x0]
142 ; CHECK-NEXT: ret
143
144 %conv = fptrunc double %val to float
145 %tmp = tail call i16 @llvm.convert.to.fp16(float %conv)
146 store i16 %tmp, i16* %a, align 2
147 ret void
148 }
149
150 define void @store2(i16* nocapture %a, i32 %i, float %val) nounwind {
151 ; CHECK-LABEL: store2:
152 ; CHECK-NEXT: fcvt h0, s0
153 ; CHECK-NEXT: str h0, [x0, w1, sxtw #1]
154 ; CHECK-NEXT: ret
155
156 %tmp = tail call i16 @llvm.convert.to.fp16(float %val)
157 %idxprom = sext i32 %i to i64
158 %arrayidx = getelementptr inbounds i16* %a, i64 %idxprom
159 store i16 %tmp, i16* %arrayidx, align 2
160 ret void
161 }
162
163 define void @store3(i16* nocapture %a, i32 %i, double %val) nounwind {
164 ; CHECK-LABEL: store3:
165 ; CHECK-NEXT: fcvt h0, d0
166 ; CHECK-NEXT: str h0, [x0, w1, sxtw #1]
167 ; CHECK-NEXT: ret
168
169 %conv = fptrunc double %val to float
170 %tmp = tail call i16 @llvm.convert.to.fp16(float %conv)
171 %idxprom = sext i32 %i to i64
172 %arrayidx = getelementptr inbounds i16* %a, i64 %idxprom
173 store i16 %tmp, i16* %arrayidx, align 2
174 ret void
175 }
176
177 define void @store4(i16* nocapture %a, i64 %i, float %val) nounwind {
178 ; CHECK-LABEL: store4:
179 ; CHECK-NEXT: fcvt h0, s0
180 ; CHECK-NEXT: str h0, [x0, x1, lsl #1]
181 ; CHECK-NEXT: ret
182
183 %tmp = tail call i16 @llvm.convert.to.fp16(float %val)
184 %arrayidx = getelementptr inbounds i16* %a, i64 %i
185 store i16 %tmp, i16* %arrayidx, align 2
186 ret void
187 }
188
189 define void @store5(i16* nocapture %a, i64 %i, double %val) nounwind {
190 ; CHECK-LABEL: store5:
191 ; CHECK-NEXT: fcvt h0, d0
192 ; CHECK-NEXT: str h0, [x0, x1, lsl #1]
193 ; CHECK-NEXT: ret
194
195 %conv = fptrunc double %val to float
196 %tmp = tail call i16 @llvm.convert.to.fp16(float %conv)
197 %arrayidx = getelementptr inbounds i16* %a, i64 %i
198 store i16 %tmp, i16* %arrayidx, align 2
199 ret void
200 }
201
202 define void @store6(i16* nocapture %a, float %val) nounwind {
203 ; CHECK-LABEL: store6:
204 ; CHECK-NEXT: fcvt h0, s0
205 ; CHECK-NEXT: str h0, [x0, #20]
206 ; CHECK-NEXT: ret
207
208 %tmp = tail call i16 @llvm.convert.to.fp16(float %val)
209 %arrayidx = getelementptr inbounds i16* %a, i64 10
210 store i16 %tmp, i16* %arrayidx, align 2
211 ret void
212 }
213
214 define void @store7(i16* nocapture %a, double %val) nounwind {
215 ; CHECK-LABEL: store7:
216 ; CHECK-NEXT: fcvt h0, d0
217 ; CHECK-NEXT: str h0, [x0, #20]
218 ; CHECK-NEXT: ret
219
220 %conv = fptrunc double %val to float
221 %tmp = tail call i16 @llvm.convert.to.fp16(float %conv)
222 %arrayidx = getelementptr inbounds i16* %a, i64 10
223 store i16 %tmp, i16* %arrayidx, align 2
224 ret void
225 }
226
227 define void @store8(i16* nocapture %a, float %val) nounwind {
228 ; CHECK-LABEL: store8:
229 ; CHECK-NEXT: fcvt h0, s0
230 ; CHECK-NEXT: stur h0, [x0, #-20]
231 ; CHECK-NEXT: ret
232
233 %tmp = tail call i16 @llvm.convert.to.fp16(float %val)
234 %arrayidx = getelementptr inbounds i16* %a, i64 -10
235 store i16 %tmp, i16* %arrayidx, align 2
236 ret void
237 }
238
239 define void @store9(i16* nocapture %a, double %val) nounwind {
240 ; CHECK-LABEL: store9:
241 ; CHECK-NEXT: fcvt h0, d0
242 ; CHECK-NEXT: stur h0, [x0, #-20]
243 ; CHECK-NEXT: ret
244
245 %conv = fptrunc double %val to float
246 %tmp = tail call i16 @llvm.convert.to.fp16(float %conv)
247 %arrayidx = getelementptr inbounds i16* %a, i64 -10
248 store i16 %tmp, i16* %arrayidx, align 2
249 ret void
250 }
251
252 declare i16 @llvm.convert.to.fp16(float) nounwind readnone
127253 declare float @llvm.convert.from.fp16(i16) nounwind readnone