llvm.org GIT mirror llvm / f8d927f
CodeGen: emit IR-level f16 conversion intrinsics as fptrunc/fpext This makes the first stage DAG for @llvm.convert.to.fp16 an fptrunc, and correspondingly @llvm.convert.from.fp16 an fpext. The legalisation path is now uniform, regardless of the input IR: fptrunc -> FP_TO_FP16 (if f16 illegal) -> libcall fpext -> FP16_TO_FP (if f16 illegal) -> libcall Each target should be able to select the version that best matches its operations and not be required to duplicate patterns for both fptrunc and FP_TO_FP16 (for example). As a result we can remove some redundant AArch64 patterns. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@213507 91177308-0d34-0410-b5e6-96231b3b80d8 Tim Northover 6 years ago
5 changed file(s) with 28 addition(s) and 99 deletion(s). Raw diff Collapse all Expand all
372372 SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
373373 EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
374374 SDValue Op = N->getOperand(0);
375
376 // There's only a libcall for f16 -> f32, so proceed in two stages. Also, it's
377 // entirely possible for both f16 and f32 to be legal, so use the fully
378 // hard-float FP_EXTEND rather than FP16_TO_FP.
379 if (Op.getValueType() == MVT::f16 && N->getValueType(0) != MVT::f32)
380 Op = DAG.getNode(ISD::FP_EXTEND, SDLoc(N), MVT::f32, Op);
381
375382 RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0));
376383 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
377384 return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N)).first;
510517
511518 SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {
512519 EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
520 if (N->getValueType(0) == MVT::f16)
521 return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), NVT, N->getOperand(0));
522
513523 SDValue Op = GetSoftenedFloat(N->getOperand(0));
514524 return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
515525 RTLIB::TRUNC_F32,
51545154 return nullptr;
51555155 }
51565156 case Intrinsic::convert_to_fp16:
5157 setValue(&I, DAG.getNode(ISD::FP_TO_FP16, sdl,
5158 MVT::i16, getValue(I.getArgOperand(0))));
5157 setValue(&I, DAG.getNode(ISD::BITCAST, sdl, MVT::i16,
5158 DAG.getNode(ISD::FP_ROUND, sdl, MVT::f16,
5159 getValue(I.getArgOperand(0)),
5160 DAG.getTargetConstant(0, MVT::i32))));
51595161 return nullptr;
51605162 case Intrinsic::convert_from_fp16:
51615163 setValue(&I,
5162 DAG.getNode(ISD::FP16_TO_FP, sdl, TLI->getValueType(I.getType()),
5163 getValue(I.getArgOperand(0))));
5164 DAG.getNode(ISD::FP_EXTEND, sdl, TLI->getValueType(I.getType()),
5165 DAG.getNode(ISD::BITCAST, sdl, MVT::f16,
5166 getValue(I.getArgOperand(0)))));
51645167 return nullptr;
51655168 case Intrinsic::pcmarker: {
51665169 SDValue Tmp = getValue(I.getArgOperand(0));
421421 /// getFPEXT - Return the FPEXT_*_* value for the given types, or
422422 /// UNKNOWN_LIBCALL if there is none.
423423 RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) {
424 if (OpVT == MVT::f32) {
424 if (OpVT == MVT::f16) {
425 if (RetVT == MVT::f32)
426 return FPEXT_F16_F32;
427 } else if (OpVT == MVT::f32) {
425428 if (RetVT == MVT::f64)
426429 return FPEXT_F32_F64;
427430 if (RetVT == MVT::f128)
22422242 //===----------------------------------------------------------------------===//
22432243
22442244 defm FCVT : FPConversion<"fcvt">;
2245
2246 def : Pat<(fp_to_f16 FPR32:$Rn),
2247 (i32 (COPY_TO_REGCLASS
2248 (f32 (SUBREG_TO_REG (i32 0), (FCVTHSr FPR32:$Rn), hsub)),
2249 GPR32))>;
2250
2251 def : Pat<(f32 (f16_to_fp i32:$Rn)),
2252 (FCVTSHr (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS i32:$Rn, FPR32)),
2253 hsub))>;
2254
2255 // When converting from f16 coming directly from a load, make sure we
2256 // load into the FPR16 registers rather than going through the GPRs.
2257 // f16->f32
2258 def : Pat<(f32 (f16_to_fp (i32
2259 (zextloadi16 (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
2260 ro_Wextend16:$extend))))),
2261 (FCVTSHr (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend))>;
2262 def : Pat<(f32 (f16_to_fp (i32
2263 (zextloadi16 (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
2264 ro_Xextend16:$extend))))),
2265 (FCVTSHr (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend))>;
2266 def : Pat <(f32 (f16_to_fp (i32
2267 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
2268 (FCVTSHr (LDRHui GPR64sp:$Rn, uimm12s2:$offset))>;
2269 def : Pat <(f32 (f16_to_fp (i32
2270 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))),
2271 (FCVTSHr (LDURHi GPR64sp:$Rn, simm9:$offset))>;
2272
2273 // f16->f64
2274 def : Pat<(f64 (fextend (f32 (f16_to_fp (i32
2275 (zextloadi16 (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
2276 ro_Wextend16:$extend))))))),
2277 (FCVTDHr (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend))>;
2278 def : Pat<(f64 (fextend (f32 (f16_to_fp (i32
2279 (zextloadi16 (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
2280 ro_Xextend16:$extend))))))),
2281 (FCVTDHr (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend))>;
2282 def : Pat <(f64 (fextend (f32 (f16_to_fp (i32
2283 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))))),
2284 (FCVTDHr (LDRHui GPR64sp:$Rn, uimm12s2:$offset))>;
2285 def : Pat <(f64 (fextend (f32 (f16_to_fp (i32
2286 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))))),
2287 (FCVTDHr (LDURHi GPR64sp:$Rn, simm9:$offset))>;
2288
2289 // When converting to f16 going directly to a store, make sure we use the
2290 // appropriate direct conversion instructions and store via the FPR16
2291 // registers rather than going through the GPRs.
2292 let AddedComplexity = 10 in {
2293 // f32->f16
2294 def : Pat< (truncstorei16 (assertzext (i32 (fp_to_f16 FPR32:$Rt))),
2295 (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
2296 ro_Wextend16:$extend)),
2297 (STRHroW (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, GPR32:$Rm,
2298 ro_Wextend16:$extend)>;
2299 def : Pat< (truncstorei16 (assertzext (i32 (fp_to_f16 FPR32:$Rt))),
2300 (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
2301 ro_Xextend16:$extend)),
2302 (STRHroX (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, GPR64:$Rm,
2303 ro_Xextend16:$extend)>;
2304 def : Pat <(truncstorei16 (assertzext (i32 (fp_to_f16 FPR32:$Rt))),
2305 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
2306 (STRHui (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, uimm12s2:$offset)>;
2307 def : Pat <(truncstorei16 (assertzext (i32 (fp_to_f16 FPR32:$Rt))),
2308 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
2309 (STURHi (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, simm9:$offset)>;
2310 // f64->f16
2311 def : Pat< (truncstorei16 (assertzext (i32 (fp_to_f16 (f32 (fround FPR64:$Rt))))),
2312 (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
2313 ro_Wextend16:$extend)),
2314 (STRHroW (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, GPR32:$Rm,
2315 ro_Wextend16:$extend)>;
2316 def : Pat< (truncstorei16 (assertzext (i32 (fp_to_f16 (f32 (fround FPR64:$Rt))))),
2317 (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
2318 ro_Xextend16:$extend)),
2319 (STRHroX (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, GPR64:$Rm,
2320 ro_Xextend16:$extend)>;
2321 def : Pat <(truncstorei16 (assertzext (i32 (fp_to_f16 (f32 (fround FPR64:$Rt))))),
2322 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
2323 (STRHui (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, uimm12s2:$offset)>;
2324 def : Pat <(truncstorei16 (assertzext (i32 (fp_to_f16 (f32 (fround FPR64:$Rt))))),
2325 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
2326 (STURHi (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, simm9:$offset)>;
2327 }
2328
23292245
23302246 //===----------------------------------------------------------------------===//
23312247 // Floating point single operand instructions.
1717 ; CHECK-NEXT: ret
1818
1919 %tmp = load i16* %a, align 2
20 %tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp)
21 %conv = fpext float %tmp1 to double
20 %conv = tail call double @llvm.convert.from.fp16.f64(i16 %tmp)
2221 ret double %conv
2322 }
2423
4443 %idxprom = sext i32 %i to i64
4544 %arrayidx = getelementptr inbounds i16* %a, i64 %idxprom
4645 %tmp = load i16* %arrayidx, align 2
47 %tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp)
48 %conv = fpext float %tmp1 to double
46 %conv = tail call double @llvm.convert.from.fp16.f64(i16 %tmp)
4947 ret double %conv
5048 }
5149
6967
7068 %arrayidx = getelementptr inbounds i16* %a, i64 %i
7169 %tmp = load i16* %arrayidx, align 2
72 %tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp)
73 %conv = fpext float %tmp1 to double
70 %conv = tail call double @llvm.convert.from.fp16.f64(i16 %tmp)
7471 ret double %conv
7572 }
7673
9491
9592 %arrayidx = getelementptr inbounds i16* %a, i64 10
9693 %tmp = load i16* %arrayidx, align 2
97 %tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp)
98 %conv = fpext float %tmp1 to double
94 %conv = tail call double @llvm.convert.from.fp16.f64(i16 %tmp)
9995 ret double %conv
10096 }
10197
119115
120116 %arrayidx = getelementptr inbounds i16* %a, i64 -10
121117 %tmp = load i16* %arrayidx, align 2
122 %tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp)
123 %conv = fpext float %tmp1 to double
118 %conv = tail call double @llvm.convert.from.fp16.f64(i16 %tmp)
124119 ret double %conv
125120 }
126121
251246
252247 declare i16 @llvm.convert.to.fp16.f32(float) nounwind readnone
253248 declare float @llvm.convert.from.fp16.f32(i16) nounwind readnone
249 declare i16 @llvm.convert.to.fp16.f64(double) nounwind readnone
250 declare double @llvm.convert.from.fp16.f64(i16) nounwind readnone