llvm.org GIT mirror llvm / 999821c
Transform div to mul with reciprocal only when fp imm is legal. This fixes PR12516 and uncovers one weird problem in legalize (workarounded) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@154394 91177308-0d34-0410-b5e6-96231b3b80d8 Anton Korobeynikov 8 years ago
2 changed file(s) with 40 addition(s) and 2 deletion(s). Raw diff Collapse all Expand all
57685768 APFloat N1APF = N1CFP->getValueAPF();
57695769 APFloat Recip(N1APF.getSemantics(), 1); // 1.0
57705770 APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
5771 // Only do the transform if the reciprocal is not too horrible (eg not NaN).
5772 if (st == APFloat::opOK || st == APFloat::opInexact)
5771 // Only do the transform if the reciprocal is not too horrible (eg not NaN)
5772 // and the reciprocal is a legal fp imm.
5773 if ((st == APFloat::opOK || st == APFloat::opInexact) &&
5774 (!LegalOperations ||
5775 // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
5776 // backend)... we should handle this gracefully after Legalize.
5777 // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) ||
5778 TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) ||
5779 TLI.isFPImmLegal(Recip, VT)))
57735780 return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0,
57745781 DAG.getConstantFP(Recip, VT));
57755782 }
0 ; RUN: llc < %s -march=arm -mcpu=cortex-a9 -enable-unsafe-fp-math
1 ;target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
2 ;target triple = "armv7-none-linux-gnueabi"
3
4 define arm_aapcs_vfpcc void @foo(<4 x float> %arg) nounwind align 2 {
5 bb4:
6 %tmp = extractelement <2 x float> undef, i32 0
7 br i1 undef, label %bb18, label %bb5
8
9 bb5: ; preds = %bb4
10 %tmp6 = fadd float %tmp, -1.500000e+01
11 %tmp7 = fdiv float %tmp6, 2.000000e+01
12 %tmp8 = fadd float %tmp7, 1.000000e+00
13 %tmp9 = fdiv float 1.000000e+00, %tmp8
14 %tmp10 = fsub float 1.000000e+00, %tmp9
15 %tmp11 = fmul float %tmp10, 1.000000e+01
16 %tmp12 = fadd float %tmp11, 1.500000e+01
17 %tmp13 = fdiv float %tmp12, %tmp
18 %tmp14 = insertelement <2 x float> undef, float %tmp13, i32 0
19 %tmp15 = shufflevector <2 x float> %tmp14, <2 x float> undef, <4 x i32> zeroinitializer
20 %tmp16 = fmul <4 x float> zeroinitializer, %tmp15
21 %tmp17 = fadd <4 x float> %tmp16, %arg
22 store <4 x float> %tmp17, <4 x float>* undef, align 8, !tbaa !0
23 br label %bb18
24
25 bb18: ; preds = %bb5, %bb4
26 ret void
27 }
28
29 !0 = metadata !{metadata !"omnipotent char", metadata !1}
30 !1 = metadata !{metadata !"Simple C/C++ TBAA", null}