llvm.org GIT mirror llvm / 7c06364
R600: Implement getRecipEstimate This requires a new hook to prevent expanding sqrt in terms of rsqrt and reciprocal. v_rcp_f32, v_rsq_f32, and v_sqrt_f32 are all the same rate, so this expansion would just double the number of instructions and cycles. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225828 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 5 years ago
5 changed file(s) with 42 addition(s) and 2 deletion(s). Raw diff Collapse all Expand all
215215 /// Return true if integer divide is usually cheaper than a sequence of
216216 /// several shifts, adds, and multiplies for this target.
217217 bool isIntDivCheap() const { return IntDivIsCheap; }
218
219 /// Return true if sqrt(x) is as cheap or cheaper than 1 / rsqrt(x)
220 bool isFsqrtCheap() const {
221 return FsqrtIsCheap;
222 }
218223
219224 /// Returns true if target has indicated at least one type should be bypassed.
220225 bool isSlowDivBypassed() const { return !BypassSlowDivWidths.empty(); }
11821187 /// possible, should be replaced by an alternate sequence of instructions not
11831188 /// containing an integer divide.
11841189 void setIntDivIsCheap(bool isCheap = true) { IntDivIsCheap = isCheap; }
1185
1190
1191 /// Tells the code generator that fsqrt is cheap, and should not be replaced
1192 /// with an alternative sequence of instructions.
1193 void setFsqrtIsCheap(bool isCheap = true) { FsqrtIsCheap = isCheap; }
1194
11861195 /// Tells the code generator that this target supports floating point
11871196 /// exceptions and cares about preserving floating point exception behavior.
11881197 void setHasFloatingPointExceptions(bool FPExceptions = true) {
16231632 /// model is in place. If we ever optimize for size, this will be set to true
16241633 /// unconditionally.
16251634 bool IntDivIsCheap;
1635
1636 // Don't expand fsqrt with an approximation based on the inverse sqrt.
1637 bool FsqrtIsCheap;
16261638
16271639 /// Tells the code generator to bypass slow divide or remainder
16281640 /// instructions. For example, BypassSlowDivWidths[32,8] tells the code
75377537 }
75387538
75397539 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
7540 if (DAG.getTarget().Options.UnsafeFPMath) {
7540 if (DAG.getTarget().Options.UnsafeFPMath &&
7541 !TLI.isFsqrtCheap()) {
75417542 // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5)
75427543 if (SDValue RV = BuildRsqrtEstimate(N->getOperand(0))) {
75437544 EVT VT = RV.getValueType();
709709 HasMultipleConditionRegisters = false;
710710 HasExtractBitsInsn = false;
711711 IntDivIsCheap = false;
712 FsqrtIsCheap = false;
712713 Pow2SDivIsCheap = false;
713714 JumpIsExpensive = false;
714715 PredictableSelectIsExpensive = false;
402402 // large sequence of instructions.
403403 setIntDivIsCheap(false);
404404 setPow2SDivIsCheap(false);
405 setFsqrtIsCheap(true);
405406
406407 // FIXME: Need to really handle these.
407408 MaxStoresPerMemcpy = 4096;
25842585 return SDValue();
25852586 }
25862587
2588 SDValue AMDGPUTargetLowering::getRecipEstimate(SDValue Operand,
2589 DAGCombinerInfo &DCI,
2590 unsigned &RefinementSteps) const {
2591 SelectionDAG &DAG = DCI.DAG;
2592 EVT VT = Operand.getValueType();
2593
2594 if (VT == MVT::f32) {
2595 // Reciprocal, < 1 ulp error.
2596 //
2597 // This reciprocal approximation converges to < 0.5 ulp error with one
2598 // newton rhapson performed with two fused multiple adds (FMAs).
2599
2600 RefinementSteps = 0;
2601 return DAG.getNode(AMDGPUISD::RCP, SDLoc(Operand), VT, Operand);
2602 }
2603
2604 // TODO: There is also f64 rcp instruction, but the documentation is less
2605 // clear on its precision.
2606
2607 return SDValue();
2608 }
2609
25872610 static void computeKnownBitsForMinMax(const SDValue Op0,
25882611 const SDValue Op1,
25892612 APInt &KnownZero,
170170 DAGCombinerInfo &DCI,
171171 unsigned &RefinementSteps,
172172 bool &UseOneConstNR) const override;
173 SDValue getRecipEstimate(SDValue Operand,
174 DAGCombinerInfo &DCI,
175 unsigned &RefinementSteps) const override;
173176
174177 virtual SDNode *PostISelFolding(MachineSDNode *N,
175178 SelectionDAG &DAG) const {