llvm.org GIT mirror llvm / 14debde
[SelectionDAG] remove constant folding limitations based on FP exceptions We don't have FP exception limits in the IR constant folder for the binops (apart from strict ops), so it does not make sense to have them here in the DAG either. Nothing else in the backend tries to preserve exceptions (again outside of strict ops), so I don't see how this could have ever worked for real code that cares about FP exceptions. There are still cases (examples: unary opcodes in SDAG, FMA in IR) where we are trying (at least partially) to preserve exceptions without even asking if the target supports FP exceptions. Those should be corrected in subsequent patches. Real support for FP exceptions requires several changes to handle the constrained/strict FP ops. Differential Revision: https://reviews.llvm.org/D61331 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@359791 91177308-0d34-0410-b5e6-96231b3b80d8 Sanjay Patel 1 year, 4 months ago
6 changed file(s) with 24 addition(s) and 69 deletion(s). Raw diff Collapse all Expand all
581581 return false;
582582 }
583583
584 /// Return true if target supports floating point exceptions.
585 bool hasFloatingPointExceptions() const {
586 return HasFloatingPointExceptions;
587 }
588
589584 /// Return true if target always beneficiates from combining into FMA for a
590585 /// given value type. This must typically return false on targets where FMA
591586 /// takes more cycles to execute than FADD.
19131908 /// predicates into separate sequences that increase the amount of flow
19141909 /// control.
19151910 void setJumpIsExpensive(bool isExpensive = true);
1916
1917 /// Tells the code generator that this target supports floating point
1918 /// exceptions and cares about preserving floating point exception behavior.
1919 void setHasFloatingPointExceptions(bool FPExceptions = true) {
1920 HasFloatingPointExceptions = FPExceptions;
1921 }
19221911
19231912 /// Tells the code generator which bitwidths to bypass.
19241913 void addBypassSlowDiv(unsigned int SlowBitWidth, unsigned int FastBitWidth) {
25792568 /// predication.
25802569 bool JumpIsExpensive;
25812570
2582 /// Whether the target supports or cares about preserving floating point
2583 /// exception behavior.
2584 bool HasFloatingPointExceptions;
2585
25862571 /// This target prefers to use _setjmp to implement llvm.setjmp.
25872572 ///
25882573 /// Defaults to false.
48034803
48044804 SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL,
48054805 EVT VT, SDValue N1, SDValue N2) {
4806 // TODO: We don't do any constant folding for strict FP opcodes here, but we
4807 // should. That will require dealing with a potentially non-default
4808 // rounding mode, checking the "opStatus" return value from the APFloat
4809 // math calculations, and possibly other variations.
48064810 auto *N1CFP = dyn_cast(N1.getNode());
48074811 auto *N2CFP = dyn_cast(N2.getNode());
4808 bool HasFPExceptions = TLI->hasFloatingPointExceptions();
48094812 if (N1CFP && N2CFP) {
48104813 APFloat C1 = N1CFP->getValueAPF(), C2 = N2CFP->getValueAPF();
4811 APFloat::opStatus Status;
48124814 switch (Opcode) {
48134815 case ISD::FADD:
4814 Status = C1.add(C2, APFloat::rmNearestTiesToEven);
4815 if (!HasFPExceptions || Status != APFloat::opInvalidOp)
4816 return getConstantFP(C1, DL, VT);
4817 break;
4816 C1.add(C2, APFloat::rmNearestTiesToEven);
4817 return getConstantFP(C1, DL, VT);
48184818 case ISD::FSUB:
4819 Status = C1.subtract(C2, APFloat::rmNearestTiesToEven);
4820 if (!HasFPExceptions || Status != APFloat::opInvalidOp)
4821 return getConstantFP(C1, DL, VT);
4822 break;
4819 C1.subtract(C2, APFloat::rmNearestTiesToEven);
4820 return getConstantFP(C1, DL, VT);
48234821 case ISD::FMUL:
4824 Status = C1.multiply(C2, APFloat::rmNearestTiesToEven);
4825 if (!HasFPExceptions || Status != APFloat::opInvalidOp)
4826 return getConstantFP(C1, DL, VT);
4827 break;
4822 C1.multiply(C2, APFloat::rmNearestTiesToEven);
4823 return getConstantFP(C1, DL, VT);
48284824 case ISD::FDIV:
4829 Status = C1.divide(C2, APFloat::rmNearestTiesToEven);
4830 if (!HasFPExceptions || Status != APFloat::opInvalidOp)
4831 return getConstantFP(C1, DL, VT);
4832 break;
4825 C1.divide(C2, APFloat::rmNearestTiesToEven);
4826 return getConstantFP(C1, DL, VT);
48334827 case ISD::FREM:
4834 Status = C1.mod(C2);
4835 if (!HasFPExceptions || Status != APFloat::opInvalidOp)
4836 return getConstantFP(C1, DL, VT);
4837 break;
4828 C1.mod(C2);
4829 return getConstantFP(C1, DL, VT);
48384830 case ISD::FCOPYSIGN:
48394831 C1.copySign(C2);
48404832 return getConstantFP(C1, DL, VT);
53105302 APFloat V1 = N1CFP->getValueAPF();
53115303 const APFloat &V2 = N2CFP->getValueAPF();
53125304 const APFloat &V3 = N3CFP->getValueAPF();
5313 APFloat::opStatus s =
5314 V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven);
5315 if (!TLI->hasFloatingPointExceptions() || s != APFloat::opInvalidOp)
5316 return getConstantFP(V1, DL, VT);
5305 V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven);
5306 return getConstantFP(V1, DL, VT);
53175307 }
53185308 break;
53195309 }
544544 JumpIsExpensive = JumpIsExpensiveOverride;
545545 PredictableSelectIsExpensive = false;
546546 EnableExtLdPromotion = false;
547 HasFloatingPointExceptions = true;
548547 StackPointerRegisterToSaveRestore = 0;
549548 BooleanContents = UndefinedBooleanContent;
550549 BooleanFloatContents = UndefinedBooleanContent;
728728 setTargetDAGCombine(ISD::ATOMIC_LOAD_FADD);
729729
730730 setSchedulingPreference(Sched::RegPressure);
731
732 // SI at least has hardware support for floating point exceptions, but no way
733 // of using or handling them is implemented. They are also optional in OpenCL
734 // (Section 7.3)
735 setHasFloatingPointExceptions(Subtarget->hasFPExceptions());
736731 }
737732
738733 const GCNSubtarget *SITargetLowering::getSubtarget() const {
4444 setBooleanContents(ZeroOrOneBooleanContent);
4545 // Except in SIMD vectors
4646 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
47 // WebAssembly does not produce floating-point exceptions on normal floating
48 // point operations.
49 setHasFloatingPointExceptions(false);
5047 // We don't know the microarchitecture here, so just reduce register pressure.
5148 setSchedulingPreference(Sched::RegPressure);
5249 // Tell ISel that we have a stack pointer.
1717 define double @constant_fold_frem_by_zero(double* %p) {
1818 ; CHECK-LABEL: constant_fold_frem_by_zero:
1919 ; CHECK: // %bb.0:
20 ; CHECK-NEXT: mov x8, #1
21 ; CHECK-NEXT: fmov d1, xzr
20 ; CHECK-NEXT: mov x8, #9221120237041090560
2221 ; CHECK-NEXT: fmov d0, x8
23 ; CHECK-NEXT: b fmod
22 ; CHECK-NEXT: ret
2423 %r = frem double 4.940660e-324, 0.0
2524 ret double %r
2625 }
3029 define double @constant_fold_fmul_nan(double* %p) {
3130 ; CHECK-LABEL: constant_fold_fmul_nan:
3231 ; CHECK: // %bb.0:
33 ; CHECK-NEXT: mov x8, #9218868437227405312
34 ; CHECK-NEXT: fmov d0, xzr
35 ; CHECK-NEXT: fmov d1, x8
36 ; CHECK-NEXT: fmul d0, d1, d0
32 ; CHECK-NEXT: mov x8, #9221120237041090560
33 ; CHECK-NEXT: fmov d0, x8
3734 ; CHECK-NEXT: ret
3835 %r = fmul double 0x7ff0000000000000, 0.0
3936 ret double %r
4441 define double @constant_fold_fadd_nan(double* %p) {
4542 ; CHECK-LABEL: constant_fold_fadd_nan:
4643 ; CHECK: // %bb.0:
47 ; CHECK-NEXT: mov x8, #-4503599627370496
48 ; CHECK-NEXT: mov x9, #9218868437227405312
44 ; CHECK-NEXT: mov x8, #9221120237041090560
4945 ; CHECK-NEXT: fmov d0, x8
50 ; CHECK-NEXT: fmov d1, x9
51 ; CHECK-NEXT: fadd d0, d1, d0
5246 ; CHECK-NEXT: ret
5347 %r = fadd double 0x7ff0000000000000, 0xfff0000000000000
5448 ret double %r
5953 define double @constant_fold_fsub_nan(double* %p) {
6054 ; CHECK-LABEL: constant_fold_fsub_nan:
6155 ; CHECK: // %bb.0:
62 ; CHECK-NEXT: mov x8, #9218868437227405312
56 ; CHECK-NEXT: mov x8, #9221120237041090560
6357 ; CHECK-NEXT: fmov d0, x8
64 ; CHECK-NEXT: fsub d0, d0, d0
6558 ; CHECK-NEXT: ret
6659 %r = fsub double 0x7ff0000000000000, 0x7ff0000000000000
6760 ret double %r
7265 define double @constant_fold_fma_nan(double* %p) {
7366 ; CHECK-LABEL: constant_fold_fma_nan:
7467 ; CHECK: // %bb.0:
75 ; CHECK-NEXT: mov x8, #4631107791820423168
76 ; CHECK-NEXT: mov x9, #9218868437227405312
77 ; CHECK-NEXT: fmov d0, xzr
78 ; CHECK-NEXT: fmov d1, x8
79 ; CHECK-NEXT: fmov d2, x9
80 ; CHECK-NEXT: fmadd d0, d2, d0, d1
68 ; CHECK-NEXT: mov x8, #9221120237041090560
69 ; CHECK-NEXT: fmov d0, x8
8170 ; CHECK-NEXT: ret
8271 %r = call double @llvm.fma.f64(double 0x7ff0000000000000, double 0.0, double 42.0)
8372 ret double %r