llvm.org GIT mirror llvm / 3e61ccd
CodeGen: extend f16 conversions to permit types > float. This makes the two intrinsics @llvm.convert.from.f16 and @llvm.convert.to.f16 accept types other than simple "float". This is only strictly needed for the truncate operation, since otherwise double rounding occurs and there's no way to represent the strict IEEE conversion. However, for symmetry we allow larger types in the extend too. During legalization, we can expand an "fp16_to_double" operation into two extends for convenience, but abort when the truncate isn't legal. A new libcall is probably needed here. Even after this commit, various target tweaks are needed to actually use the extended intrinsics. I've put these into separate commits for clarity, so there are no actual tests of f64 conversion here. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@213248 91177308-0d34-0410-b5e6-96231b3b80d8 Tim Northover 6 years ago
23 changed file(s) with 150 addition(s) and 125 deletion(s). Raw diff Collapse all Expand all
86528652
86538653 ::
86548654
8655 declare i16 @llvm.convert.to.fp16(float %a)
8656
8657 Overview:
8658 """""""""
8659
8660 The '``llvm.convert.to.fp16``' intrinsic function performs a conversion
8661 from single precision floating point format to half precision floating
8662 point format.
8655 declare i16 @llvm.convert.to.fp16.f32(float %a)
8656 declare i16 @llvm.convert.to.fp16.f64(double %a)
8657
8658 Overview:
8659 """""""""
8660
8661 The '``llvm.convert.to.fp16``' intrinsic function performs a conversion from a
8662 conventional floating point type to half precision floating point format.
86638663
86648664 Arguments:
86658665 """"""""""
86708670 Semantics:
86718671 """"""""""
86728672
8673 The '``llvm.convert.to.fp16``' intrinsic function performs a conversion
8674 from single precision floating point format to half precision floating
8675 point format. The return value is an ``i16`` which contains the
8676 converted number.
8673 The '``llvm.convert.to.fp16``' intrinsic function performs a conversion from a
8674 conventional floating point format to half precision floating point format. The
8675 return value is an ``i16`` which contains the converted number.
86778676
86788677 Examples:
86798678 """""""""
86808679
86818680 .. code-block:: llvm
86828681
8683 %res = call i16 @llvm.convert.to.fp16(float %a)
8682 %res = call i16 @llvm.convert.to.fp16.f32(float %a)
86848683 store i16 %res, i16* @x, align 2
86858684
86868685 .. _int_convert_from_fp16:
86938692
86948693 ::
86958694
8696 declare float @llvm.convert.from.fp16(i16 %a)
8695 declare float @llvm.convert.from.fp16.f32(i16 %a)
8696 declare double @llvm.convert.from.fp16.f64(i16 %a)
86978697
86988698 Overview:
86998699 """""""""
471471 /// 5) ISD::CvtCode indicating the type of conversion to do
472472 CONVERT_RNDSAT,
473473
474 /// FP16_TO_FP32, FP32_TO_FP16 - These operators are used to perform
475 /// promotions and truncation for half-precision (16 bit) floating
476 /// numbers. We need special nodes since FP16 is a storage-only type with
477 /// special semantics of operations.
478 FP16_TO_FP32, FP32_TO_FP16,
474 /// FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions
475 /// and truncation for half-precision (16 bit) floating numbers. These nodes
476 /// form a semi-softened interface for dealing with f16 (as an i16), which
477 /// is often a storage-only type but has native conversions.
478 FP16_TO_FP, FP_TO_FP16,
479479
480480 /// FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW,
481481 /// FLOG, FLOG2, FLOG10, FEXP, FEXP2,
495495
496496 // Intrisics to support half precision floating point format
497497 let Properties = [IntrNoMem] in {
498 def int_convert_to_fp16 : Intrinsic<[llvm_i16_ty], [llvm_float_ty]>,
499 GCCBuiltin<"__gnu_f2h_ieee">;
500 def int_convert_from_fp16 : Intrinsic<[llvm_float_ty], [llvm_i16_ty]>,
501 GCCBuiltin<"__gnu_h2f_ieee">;
498 def int_convert_to_fp16 : Intrinsic<[llvm_i16_ty], [llvm_anyfloat_ty]>;
499 def int_convert_from_fp16 : Intrinsic<[llvm_anyfloat_ty], [llvm_i16_ty]>;
502500 }
503501
504502 // These convert intrinsics are to support various conversions between
391391 def uint_to_fp : SDNode<"ISD::UINT_TO_FP" , SDTIntToFPOp>;
392392 def fp_to_sint : SDNode<"ISD::FP_TO_SINT" , SDTFPToIntOp>;
393393 def fp_to_uint : SDNode<"ISD::FP_TO_UINT" , SDTFPToIntOp>;
394 def f16_to_f32 : SDNode<"ISD::FP16_TO_FP32", SDTIntToFPOp>;
395 def f32_to_f16 : SDNode<"ISD::FP32_TO_FP16", SDTFPToIntOp>;
394 def f16_to_fp : SDNode<"ISD::FP16_TO_FP" , SDTIntToFPOp>;
395 def fp_to_f16 : SDNode<"ISD::FP_TO_FP16" , SDTFPToIntOp>;
396396
397397 def setcc : SDNode<"ISD::SETCC" , SDTSetCC>;
398398 def select : SDNode<"ISD::SELECT" , SDTSelect>;
11851185 if (Action != TargetLowering::Promote)
11861186 Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other);
11871187 break;
1188 case ISD::FP_TO_FP16:
11881189 case ISD::SINT_TO_FP:
11891190 case ISD::UINT_TO_FP:
11901191 case ISD::EXTRACT_VECTOR_ELT:
35123513 RTLIB::FMA_F80, RTLIB::FMA_F128,
35133514 RTLIB::FMA_PPCF128));
35143515 break;
3515 case ISD::FP16_TO_FP32:
3516 Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false));
3517 break;
3518 case ISD::FP32_TO_FP16:
3516 case ISD::FP16_TO_FP: {
3517 if (Node->getValueType(0) == MVT::f32) {
3518 Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false));
3519 break;
3520 }
3521
3522 // We can extend to types bigger than f32 in two steps without changing the
3523 // result. Since "f16 -> f32" is much more commonly available, give CodeGen
3524 // the option of emitting that before resorting to a libcall.
3525 SDValue Res =
3526 DAG.getNode(ISD::FP16_TO_FP, dl, MVT::f32, Node->getOperand(0));
3527 Results.push_back(
3528 DAG.getNode(ISD::FP_EXTEND, dl, Node->getValueType(0), Res));
3529 break;
3530 }
3531 case ISD::FP_TO_FP16:
3532 // Can't use two-step truncation here because the rounding may be
3533 // significant.
3534 assert(Node->getOperand(0).getValueType() == MVT::f32 &&
3535 "Don't know libcall for FPROUND_F64_F16");
35193536 Results.push_back(ExpandLibCall(RTLIB::FPROUND_F32_F16, Node, false));
35203537 break;
35213538 case ISD::ConstantFP: {
8484 case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break;
8585 case ISD::FP_EXTEND: R = SoftenFloatRes_FP_EXTEND(N); break;
8686 case ISD::FP_ROUND: R = SoftenFloatRes_FP_ROUND(N); break;
87 case ISD::FP16_TO_FP32:R = SoftenFloatRes_FP16_TO_FP32(N); break;
87 case ISD::FP16_TO_FP: R = SoftenFloatRes_FP16_TO_FP(N); break;
8888 case ISD::FPOW: R = SoftenFloatRes_FPOW(N); break;
8989 case ISD::FPOWI: R = SoftenFloatRes_FPOWI(N); break;
9090 case ISD::FREM: R = SoftenFloatRes_FREM(N); break;
379379
380380 // FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special
381381 // nodes?
382 SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP32(SDNode *N) {
383 EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
382 SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP(SDNode *N) {
383 EVT MidVT = TLI.getTypeToTransformTo(*DAG.getContext(), MVT::f32);
384384 SDValue Op = N->getOperand(0);
385 return TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, NVT, &Op, 1, false,
386 SDLoc(N)).first;
385 SDValue Res32 = TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MidVT, &Op, 1,
386 false, SDLoc(N)).first;
387 if (N->getValueType(0) == MVT::f32)
388 return Res32;
389
390 EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
391 RTLIB::Libcall LC = RTLIB::getFPEXT(MVT::f32, N->getValueType(0));
392 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
393 return TLI.makeLibCall(DAG, LC, NVT, &Res32, 1, false, SDLoc(N)).first;
387394 }
388395
389396 SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {
627634 case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break;
628635 case ISD::FP_TO_SINT: Res = SoftenFloatOp_FP_TO_SINT(N); break;
629636 case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_UINT(N); break;
630 case ISD::FP32_TO_FP16:Res = SoftenFloatOp_FP32_TO_FP16(N); break;
637 case ISD::FP_TO_FP16: Res = SoftenFloatOp_FP_TO_FP16(N); break;
631638 case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break;
632639 case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break;
633640 case ISD::STORE: Res = SoftenFloatOp_STORE(N, OpNo); break;
703710 return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first;
704711 }
705712
706 SDValue DAGTypeLegalizer::SoftenFloatOp_FP32_TO_FP16(SDNode *N) {
713 SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_FP16(SDNode *N) {
714 assert(N->getOperand(0).getValueType() == MVT::f32 &&
715 "Cannot soften in one step");
707716 EVT RVT = N->getValueType(0);
708717 RTLIB::Libcall LC = RTLIB::FPROUND_F32_F16;
709718 SDValue Op = GetSoftenedFloat(N->getOperand(0));
9898 case ISD::FP_TO_SINT:
9999 case ISD::FP_TO_UINT: Res = PromoteIntRes_FP_TO_XINT(N); break;
100100
101 case ISD::FP32_TO_FP16:Res = PromoteIntRes_FP32_TO_FP16(N); break;
101 case ISD::FP_TO_FP16: Res = PromoteIntRes_FP_TO_FP16(N); break;
102102
103103 case ISD::AND:
104104 case ISD::OR:
400400 DAG.getValueType(N->getValueType(0).getScalarType()));
401401 }
402402
403 SDValue DAGTypeLegalizer::PromoteIntRes_FP32_TO_FP16(SDNode *N) {
403 SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_FP16(SDNode *N) {
404404 EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
405405 SDLoc dl(N);
406406
825825 case ISD::STORE: Res = PromoteIntOp_STORE(cast(N),
826826 OpNo); break;
827827 case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break;
828 case ISD::FP16_TO_FP32:
828 case ISD::FP16_TO_FP:
829829 case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break;
830830 case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break;
831831
236236 SDValue PromoteIntRes_CTTZ(SDNode *N);
237237 SDValue PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N);
238238 SDValue PromoteIntRes_FP_TO_XINT(SDNode *N);
239 SDValue PromoteIntRes_FP32_TO_FP16(SDNode *N);
239 SDValue PromoteIntRes_FP_TO_FP16(SDNode *N);
240240 SDValue PromoteIntRes_INT_EXTEND(SDNode *N);
241241 SDValue PromoteIntRes_LOAD(LoadSDNode *N);
242242 SDValue PromoteIntRes_Overflow(SDNode *N);
402402 SDValue SoftenFloatRes_FNEARBYINT(SDNode *N);
403403 SDValue SoftenFloatRes_FNEG(SDNode *N);
404404 SDValue SoftenFloatRes_FP_EXTEND(SDNode *N);
405 SDValue SoftenFloatRes_FP16_TO_FP32(SDNode *N);
405 SDValue SoftenFloatRes_FP16_TO_FP(SDNode *N);
406406 SDValue SoftenFloatRes_FP_ROUND(SDNode *N);
407407 SDValue SoftenFloatRes_FPOW(SDNode *N);
408408 SDValue SoftenFloatRes_FPOWI(SDNode *N);
427427 SDValue SoftenFloatOp_FP_ROUND(SDNode *N);
428428 SDValue SoftenFloatOp_FP_TO_SINT(SDNode *N);
429429 SDValue SoftenFloatOp_FP_TO_UINT(SDNode *N);
430 SDValue SoftenFloatOp_FP32_TO_FP16(SDNode *N);
430 SDValue SoftenFloatOp_FP_TO_FP16(SDNode *N);
431431 SDValue SoftenFloatOp_SELECT_CC(SDNode *N);
432432 SDValue SoftenFloatOp_SETCC(SDNode *N);
433433 SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo);
51545154 return nullptr;
51555155 }
51565156 case Intrinsic::convert_to_fp16:
5157 setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, sdl,
5157 setValue(&I, DAG.getNode(ISD::FP_TO_FP16, sdl,
51585158 MVT::i16, getValue(I.getArgOperand(0))));
51595159 return nullptr;
51605160 case Intrinsic::convert_from_fp16:
5161 setValue(&I, DAG.getNode(ISD::FP16_TO_FP32, sdl,
5162 MVT::f32, getValue(I.getArgOperand(0))));
5161 setValue(&I,
5162 DAG.getNode(ISD::FP16_TO_FP, sdl, TLI->getValueType(I.getType()),
5163 getValue(I.getArgOperand(0))));
51635164 return nullptr;
51645165 case Intrinsic::pcmarker: {
51655166 SDValue Tmp = getValue(I.getArgOperand(0));
235235 case ISD::FP_TO_UINT: return "fp_to_uint";
236236 case ISD::BITCAST: return "bitcast";
237237 case ISD::ADDRSPACECAST: return "addrspacecast";
238 case ISD::FP16_TO_FP32: return "fp16_to_fp32";
239 case ISD::FP32_TO_FP16: return "fp32_to_fp16";
238 case ISD::FP16_TO_FP: return "fp16_to_fp";
239 case ISD::FP_TO_FP16: return "fp_to_fp16";
240240
241241 case ISD::CONVERT_RNDSAT: {
242242 switch (cast(this)->getCvtCode()) {
22432243
22442244 defm FCVT : FPConversion<"fcvt">;
22452245
2246 def : Pat<(f32_to_f16 FPR32:$Rn),
2246 def : Pat<(fp_to_f16 FPR32:$Rn),
22472247 (i32 (COPY_TO_REGCLASS
22482248 (f32 (SUBREG_TO_REG (i32 0), (FCVTHSr FPR32:$Rn), hsub)),
22492249 GPR32))>;
22502250
2251 def : Pat<(f32 (f16_to_f32 i32:$Rn)),
2251 def : Pat<(f32 (f16_to_fp i32:$Rn)),
22522252 (FCVTSHr (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS i32:$Rn, FPR32)),
22532253 hsub))>;
22542254
22552255 // When converting from f16 coming directly from a load, make sure we
22562256 // load into the FPR16 registers rather than going through the GPRs.
22572257 // f16->f32
2258 def : Pat<(f32 (f16_to_f32 (i32
2258 def : Pat<(f32 (f16_to_fp (i32
22592259 (zextloadi16 (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
22602260 ro_Wextend16:$extend))))),
22612261 (FCVTSHr (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend))>;
2262 def : Pat<(f32 (f16_to_f32 (i32
2262 def : Pat<(f32 (f16_to_fp (i32
22632263 (zextloadi16 (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
22642264 ro_Xextend16:$extend))))),
22652265 (FCVTSHr (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend))>;
2266 def : Pat <(f32 (f16_to_f32 (i32
2266 def : Pat <(f32 (f16_to_fp (i32
22672267 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
22682268 (FCVTSHr (LDRHui GPR64sp:$Rn, uimm12s2:$offset))>;
2269 def : Pat <(f32 (f16_to_f32 (i32
2269 def : Pat <(f32 (f16_to_fp (i32
22702270 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))),
22712271 (FCVTSHr (LDURHi GPR64sp:$Rn, simm9:$offset))>;
22722272
22732273 // f16->f64
2274 def : Pat<(f64 (fextend (f32 (f16_to_f32 (i32
2274 def : Pat<(f64 (fextend (f32 (f16_to_fp (i32
22752275 (zextloadi16 (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
22762276 ro_Wextend16:$extend))))))),
22772277 (FCVTDHr (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend))>;
2278 def : Pat<(f64 (fextend (f32 (f16_to_f32 (i32
2278 def : Pat<(f64 (fextend (f32 (f16_to_fp (i32
22792279 (zextloadi16 (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
22802280 ro_Xextend16:$extend))))))),
22812281 (FCVTDHr (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend))>;
2282 def : Pat <(f64 (fextend (f32 (f16_to_f32 (i32
2282 def : Pat <(f64 (fextend (f32 (f16_to_fp (i32
22832283 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))))),
22842284 (FCVTDHr (LDRHui GPR64sp:$Rn, uimm12s2:$offset))>;
2285 def : Pat <(f64 (fextend (f32 (f16_to_f32 (i32
2285 def : Pat <(f64 (fextend (f32 (f16_to_fp (i32
22862286 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))))),
22872287 (FCVTDHr (LDURHi GPR64sp:$Rn, simm9:$offset))>;
22882288
22912291 // registers rather than going through the GPRs.
22922292 let AddedComplexity = 10 in {
22932293 // f32->f16
2294 def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))),
2294 def : Pat< (truncstorei16 (assertzext (i32 (fp_to_f16 FPR32:$Rt))),
22952295 (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
22962296 ro_Wextend16:$extend)),
22972297 (STRHroW (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, GPR32:$Rm,
22982298 ro_Wextend16:$extend)>;
2299 def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))),
2299 def : Pat< (truncstorei16 (assertzext (i32 (fp_to_f16 FPR32:$Rt))),
23002300 (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
23012301 ro_Xextend16:$extend)),
23022302 (STRHroX (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, GPR64:$Rm,
23032303 ro_Xextend16:$extend)>;
2304 def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))),
2304 def : Pat <(truncstorei16 (assertzext (i32 (fp_to_f16 FPR32:$Rt))),
23052305 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
23062306 (STRHui (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, uimm12s2:$offset)>;
2307 def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))),
2307 def : Pat <(truncstorei16 (assertzext (i32 (fp_to_f16 FPR32:$Rt))),
23082308 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
23092309 (STURHi (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, simm9:$offset)>;
23102310 // f64->f16
2311 def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))),
2311 def : Pat< (truncstorei16 (assertzext (i32 (fp_to_f16 (f32 (fround FPR64:$Rt))))),
23122312 (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
23132313 ro_Wextend16:$extend)),
23142314 (STRHroW (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, GPR32:$Rm,
23152315 ro_Wextend16:$extend)>;
2316 def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))),
2316 def : Pat< (truncstorei16 (assertzext (i32 (fp_to_f16 (f32 (fround FPR64:$Rt))))),
23172317 (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
23182318 ro_Xextend16:$extend)),
23192319 (STRHroX (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, GPR64:$Rm,
23202320 ro_Xextend16:$extend)>;
2321 def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))),
2321 def : Pat <(truncstorei16 (assertzext (i32 (fp_to_f16 (f32 (fround FPR64:$Rt))))),
23222322 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
23232323 (STRHui (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, uimm12s2:$offset)>;
2324 def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))),
2324 def : Pat <(truncstorei16 (assertzext (i32 (fp_to_f16 (f32 (fround FPR64:$Rt))))),
23252325 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
23262326 (STURHi (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, simm9:$offset)>;
23272327 }
826826 }
827827 // Special handling for half-precision FP.
828828 if (!Subtarget->hasFP16()) {
829 setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand);
830 setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand);
829 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
830 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
831831 }
832832 }
833833
550550 /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm",
551551 [/* For disassembly only; pattern left blank */]>;
552552
553 def : Pat<(f32_to_f16 SPR:$a),
553 def : Pat<(fp_to_f16 SPR:$a),
554554 (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>;
555555
556 def : Pat<(f16_to_f32 GPR:$a),
556 def : Pat<(f16_to_fp GPR:$a),
557557 (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
558558
559559 def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
791791 "}}")))),
792792 Float32Regs, Int16Regs, int_nvvm_h2f>;
793793
794 def : Pat<(f32 (f16_to_f32 Int16Regs:$a)),
794 def : Pat<(f32 (f16_to_fp Int16Regs:$a)),
795795 (CVT_f32_f16 Int16Regs:$a, CvtNONE)>;
796 def : Pat<(i16 (f32_to_f16 Float32Regs:$a)),
796 def : Pat<(i16 (fp_to_f16 Float32Regs:$a)),
797797 (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>;
798 def : Pat<(i16 (f32_to_f16 Float32Regs:$a)),
798 def : Pat<(i16 (fp_to_f16 Float32Regs:$a)),
799799 (CVT_f16_f32 Float32Regs:$a, CvtRN)>;
800800
801801 //
10781078 >;
10791079 defm V_MOV_FED_B32 : VOP1_32 <0x00000009, "V_MOV_FED_B32", []>;
10801080 defm V_CVT_F16_F32 : VOP1_32 <0x0000000a, "V_CVT_F16_F32",
1081 [(set i32:$dst, (f32_to_f16 f32:$src0))]
1081 [(set i32:$dst, (fp_to_f16 f32:$src0))]
10821082 >;
10831083 defm V_CVT_F32_F16 : VOP1_32 <0x0000000b, "V_CVT_F32_F16",
1084 [(set f32:$dst, (f16_to_f32 i32:$src0))]
1084 [(set f32:$dst, (f16_to_fp i32:$src0))]
10851085 >;
10861086 //defm V_CVT_RPI_I32_F32 : VOP1_32 <0x0000000c, "V_CVT_RPI_I32_F32", []>;
10871087 //defm V_CVT_FLR_I32_F32 : VOP1_32 <0x0000000d, "V_CVT_FLR_I32_F32", []>;
518518 // If we don't have F16C support, then lower half float conversions
519519 // into library calls.
520520 if (TM.Options.UseSoftFloat || !Subtarget->hasF16C()) {
521 setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand);
522 setOperationAction(ISD::FP32_TO_FP16, MVT::i16, Expand);
521 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
522 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
523523 }
524524
525525 if (Subtarget->hasPOPCNT()) {
85398539
85408540 // Patterns for matching conversions from float to half-float and vice versa.
85418541 let Predicates = [HasF16C] in {
8542 def : Pat<(f32_to_f16 FR32:$src),
8542 def : Pat<(fp_to_f16 FR32:$src),
85438543 (i16 (EXTRACT_SUBREG (VMOVPDI2DIrr (VCVTPS2PHrr
85448544 (COPY_TO_REGCLASS FR32:$src, VR128), 0)), sub_16bit))>;
85458545
8546 def : Pat<(f16_to_f32 GR16:$src),
8546 def : Pat<(f16_to_fp GR16:$src),
85478547 (f32 (COPY_TO_REGCLASS (VCVTPH2PSrr
85488548 (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128)), FR32)) >;
85498549
8550 def : Pat<(f16_to_f32 (i16 (f32_to_f16 FR32:$src))),
8550 def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32:$src))),
85518551 (f32 (COPY_TO_REGCLASS (VCVTPH2PSrr
85528552 (VCVTPS2PHrr (COPY_TO_REGCLASS FR32:$src, VR128), 0)), FR32)) >;
85538553 }
6565 ; CHECK-LABEL: to_half:
6666 ; CHECK: fcvt h[[HALFVAL:[0-9]+]], s0
6767 ; CHECK: fmov {{w[0-9]+}}, {{s[0-9]+}}
68 %res = call i16 @llvm.convert.to.fp16(float %in)
68 %res = call i16 @llvm.convert.to.fp16.f32(float %in)
6969 ret i16 %res
7070 }
7171
7373 ; CHECK-LABEL: from_half:
7474 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
7575 ; CHECK: fcvt s0, {{h[0-9]+}}
76 %res = call float @llvm.convert.from.fp16(i16 %in)
76 %res = call float @llvm.convert.from.fp16.f32(i16 %in)
7777 ret float %res
7878 }
7979
80 declare float @llvm.convert.from.fp16(i16) #1
81 declare i16 @llvm.convert.to.fp16(float) #1
80 declare float @llvm.convert.from.fp16.f32(i16) #1
81 declare i16 @llvm.convert.to.fp16.f32(float) #1
66 ; CHECK-NEXT: ret
77
88 %tmp = load i16* %a, align 2
9 %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp)
9 %tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp)
1010 ret float %tmp1
1111 }
1212
1717 ; CHECK-NEXT: ret
1818
1919 %tmp = load i16* %a, align 2
20 %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp)
20 %tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp)
2121 %conv = fpext float %tmp1 to double
2222 ret double %conv
2323 }
3131 %idxprom = sext i32 %i to i64
3232 %arrayidx = getelementptr inbounds i16* %a, i64 %idxprom
3333 %tmp = load i16* %arrayidx, align 2
34 %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp)
34 %tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp)
3535 ret float %tmp1
3636 }
3737
4444 %idxprom = sext i32 %i to i64
4545 %arrayidx = getelementptr inbounds i16* %a, i64 %idxprom
4646 %tmp = load i16* %arrayidx, align 2
47 %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp)
47 %tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp)
4848 %conv = fpext float %tmp1 to double
4949 ret double %conv
5050 }
5757
5858 %arrayidx = getelementptr inbounds i16* %a, i64 %i
5959 %tmp = load i16* %arrayidx, align 2
60 %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp)
60 %tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp)
6161 ret float %tmp1
6262 }
6363
6969
7070 %arrayidx = getelementptr inbounds i16* %a, i64 %i
7171 %tmp = load i16* %arrayidx, align 2
72 %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp)
72 %tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp)
7373 %conv = fpext float %tmp1 to double
7474 ret double %conv
7575 }
8282
8383 %arrayidx = getelementptr inbounds i16* %a, i64 10
8484 %tmp = load i16* %arrayidx, align 2
85 %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp)
85 %tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp)
8686 ret float %tmp1
8787 }
8888
9494
9595 %arrayidx = getelementptr inbounds i16* %a, i64 10
9696 %tmp = load i16* %arrayidx, align 2
97 %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp)
97 %tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp)
9898 %conv = fpext float %tmp1 to double
9999 ret double %conv
100100 }
107107
108108 %arrayidx = getelementptr inbounds i16* %a, i64 -10
109109 %tmp = load i16* %arrayidx, align 2
110 %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp)
110 %tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp)
111111 ret float %tmp1
112112 }
113113
119119
120120 %arrayidx = getelementptr inbounds i16* %a, i64 -10
121121 %tmp = load i16* %arrayidx, align 2
122 %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp)
122 %tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp)
123123 %conv = fpext float %tmp1 to double
124124 ret double %conv
125125 }
130130 ; CHECK-NEXT: str h0, [x0]
131131 ; CHECK-NEXT: ret
132132
133 %tmp = tail call i16 @llvm.convert.to.fp16(float %val)
133 %tmp = tail call i16 @llvm.convert.to.fp16.f32(float %val)
134134 store i16 %tmp, i16* %a, align 2
135135 ret void
136136 }
142142 ; CHECK-NEXT: ret
143143
144144 %conv = fptrunc double %val to float
145 %tmp = tail call i16 @llvm.convert.to.fp16(float %conv)
145 %tmp = tail call i16 @llvm.convert.to.fp16.f32(float %conv)
146146 store i16 %tmp, i16* %a, align 2
147147 ret void
148148 }
153153 ; CHECK-NEXT: str h0, [x0, w1, sxtw #1]
154154 ; CHECK-NEXT: ret
155155
156 %tmp = tail call i16 @llvm.convert.to.fp16(float %val)
156 %tmp = tail call i16 @llvm.convert.to.fp16.f32(float %val)
157157 %idxprom = sext i32 %i to i64
158158 %arrayidx = getelementptr inbounds i16* %a, i64 %idxprom
159159 store i16 %tmp, i16* %arrayidx, align 2
167167 ; CHECK-NEXT: ret
168168
169169 %conv = fptrunc double %val to float
170 %tmp = tail call i16 @llvm.convert.to.fp16(float %conv)
170 %tmp = tail call i16 @llvm.convert.to.fp16.f32(float %conv)
171171 %idxprom = sext i32 %i to i64
172172 %arrayidx = getelementptr inbounds i16* %a, i64 %idxprom
173173 store i16 %tmp, i16* %arrayidx, align 2
180180 ; CHECK-NEXT: str h0, [x0, x1, lsl #1]
181181 ; CHECK-NEXT: ret
182182
183 %tmp = tail call i16 @llvm.convert.to.fp16(float %val)
183 %tmp = tail call i16 @llvm.convert.to.fp16.f32(float %val)
184184 %arrayidx = getelementptr inbounds i16* %a, i64 %i
185185 store i16 %tmp, i16* %arrayidx, align 2
186186 ret void
193193 ; CHECK-NEXT: ret
194194
195195 %conv = fptrunc double %val to float
196 %tmp = tail call i16 @llvm.convert.to.fp16(float %conv)
196 %tmp = tail call i16 @llvm.convert.to.fp16.f32(float %conv)
197197 %arrayidx = getelementptr inbounds i16* %a, i64 %i
198198 store i16 %tmp, i16* %arrayidx, align 2
199199 ret void
205205 ; CHECK-NEXT: str h0, [x0, #20]
206206 ; CHECK-NEXT: ret
207207
208 %tmp = tail call i16 @llvm.convert.to.fp16(float %val)
208 %tmp = tail call i16 @llvm.convert.to.fp16.f32(float %val)
209209 %arrayidx = getelementptr inbounds i16* %a, i64 10
210210 store i16 %tmp, i16* %arrayidx, align 2
211211 ret void
218218 ; CHECK-NEXT: ret
219219
220220 %conv = fptrunc double %val to float
221 %tmp = tail call i16 @llvm.convert.to.fp16(float %conv)
221 %tmp = tail call i16 @llvm.convert.to.fp16.f32(float %conv)
222222 %arrayidx = getelementptr inbounds i16* %a, i64 10
223223 store i16 %tmp, i16* %arrayidx, align 2
224224 ret void
230230 ; CHECK-NEXT: stur h0, [x0, #-20]
231231 ; CHECK-NEXT: ret
232232
233 %tmp = tail call i16 @llvm.convert.to.fp16(float %val)
233 %tmp = tail call i16 @llvm.convert.to.fp16.f32(float %val)
234234 %arrayidx = getelementptr inbounds i16* %a, i64 -10
235235 store i16 %tmp, i16* %arrayidx, align 2
236236 ret void
243243 ; CHECK-NEXT: ret
244244
245245 %conv = fptrunc double %val to float
246 %tmp = tail call i16 @llvm.convert.to.fp16(float %conv)
247 %arrayidx = getelementptr inbounds i16* %a, i64 -10
248 store i16 %tmp, i16* %arrayidx, align 2
249 ret void
250 }
251
252 declare i16 @llvm.convert.to.fp16(float) nounwind readnone
253 declare float @llvm.convert.from.fp16(i16) nounwind readnone
246 %tmp = tail call i16 @llvm.convert.to.fp16.f32(float %conv)
247 %arrayidx = getelementptr inbounds i16* %a, i64 -10
248 store i16 %tmp, i16* %arrayidx, align 2
249 ret void
250 }
251
252 declare i16 @llvm.convert.to.fp16.f32(float) nounwind readnone
253 declare float @llvm.convert.from.fp16.f32(i16) nounwind readnone
1212 entry:
1313 %0 = load i16* @x, align 2
1414 %1 = load i16* @y, align 2
15 %2 = tail call float @llvm.convert.from.fp16(i16 %0)
15 %2 = tail call float @llvm.convert.from.fp16.f32(i16 %0)
1616 ; CHECK: __gnu_h2f_ieee
1717 ; CHECK-FP16: vcvtb.f32.f16
18 %3 = tail call float @llvm.convert.from.fp16(i16 %1)
18 %3 = tail call float @llvm.convert.from.fp16.f32(i16 %1)
1919 ; CHECK: __gnu_h2f_ieee
2020 ; CHECK-FP16: vcvtb.f32.f16
2121 %4 = fadd float %2, %3
22 %5 = tail call i16 @llvm.convert.to.fp16(float %4)
22 %5 = tail call i16 @llvm.convert.to.fp16.f32(float %4)
2323 ; CHECK: __gnu_f2h_ieee
2424 ; CHECK-FP16: vcvtb.f16.f32
2525 store i16 %5, i16* @x, align 2
2626 ret void
2727 }
2828
29 declare float @llvm.convert.from.fp16(i16) nounwind readnone
29 declare float @llvm.convert.from.fp16.f32(i16) nounwind readnone
3030
31 declare i16 @llvm.convert.to.fp16(float) nounwind readnone
31 declare i16 @llvm.convert.to.fp16.f32(float) nounwind readnone
0 ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
11
2 declare i16 @llvm.convert.to.fp16(float) nounwind readnone
2 declare i16 @llvm.convert.to.fp16.f32(float) nounwind readnone
33
44 ; SI-LABEL: @test_convert_fp16_to_fp32:
55 ; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]]
77 ; SI: BUFFER_STORE_SHORT [[RESULT]]
88 define void @test_convert_fp16_to_fp32(i16 addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
99 %val = load float addrspace(1)* %in, align 4
10 %cvt = call i16 @llvm.convert.to.fp16(float %val) nounwind readnone
10 %cvt = call i16 @llvm.convert.to.fp16.f32(float %val) nounwind readnone
1111 store i16 %cvt, i16 addrspace(1)* %out, align 2
1212 ret void
1313 }
0 ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
11
2 declare float @llvm.convert.from.fp16(i16) nounwind readnone
2 declare float @llvm.convert.from.fp16.f32(i16) nounwind readnone
33
44 ; SI-LABEL: @test_convert_fp16_to_fp32:
55 ; SI: BUFFER_LOAD_USHORT [[VAL:v[0-9]+]]
77 ; SI: BUFFER_STORE_DWORD [[RESULT]]
88 define void @test_convert_fp16_to_fp32(float addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
99 %val = load i16 addrspace(1)* %in, align 2
10 %cvt = call float @llvm.convert.from.fp16(i16 %val) nounwind readnone
10 %cvt = call float @llvm.convert.from.fp16.f32(i16 %val) nounwind readnone
1111 store float %cvt, float addrspace(1)* %out, align 4
1212 ret void
1313 }
2020
2121
2222 define void @test1(float %src, i16* %dest) {
23 %1 = tail call i16 @llvm.convert.to.fp16(float %src)
23 %1 = tail call i16 @llvm.convert.to.fp16.f32(float %src)
2424 store i16 %1, i16* %dest, align 2
2525 ret void
2626 }
3333
3434 define float @test2(i16* nocapture %src) {
3535 %1 = load i16* %src, align 2
36 %2 = tail call float @llvm.convert.from.fp16(i16 %1)
36 %2 = tail call float @llvm.convert.from.fp16.f32(i16 %1)
3737 ret float %2
3838 }
3939 ; CHECK-LABEL: test2:
4444
4545
4646 define float @test3(float %src) nounwind uwtable readnone {
47 %1 = tail call i16 @llvm.convert.to.fp16(float %src)
48 %2 = tail call float @llvm.convert.from.fp16(i16 %1)
47 %1 = tail call i16 @llvm.convert.to.fp16.f32(float %src)
48 %2 = tail call float @llvm.convert.from.fp16.f32(i16 %1)
4949 ret float %2
5050 }
5151
5858 ; F16C-NEXT: vcvtph2ps
5959 ; F16C: ret
6060
61 declare float @llvm.convert.from.fp16(i16) nounwind readnone
62 declare i16 @llvm.convert.to.fp16(float) nounwind readnone
61 declare float @llvm.convert.from.fp16.f32(i16) nounwind readnone
62 declare i16 @llvm.convert.to.fp16.f32(float) nounwind readnone
6363