llvm.org GIT mirror llvm / a96468b
transform obscured FP sign bit ops into a fabs/fneg using TLI hook This is effectively a revert of: http://reviews.llvm.org/rL249702 - [InstCombine] transform masking off of an FP sign bit into a fabs() intrinsic call (PR24886) and: http://reviews.llvm.org/rL249701 - [ValueTracking] teach computeKnownBits that a fabs() clears sign bits and a reimplementation as a DAG combine for targets that have IEEE754-compliant fabs/fneg instructions. This is intended to resolve the objections raised on the dev list: http://lists.llvm.org/pipermail/llvm-dev/2016-April/098154.html and: https://llvm.org/bugs/show_bug.cgi?id=24886#c4 In the interest of patch minimalism, I've only partly enabled AArch64. PowerPC, MIPS, x86 and others can enable later. Differential Revision: http://reviews.llvm.org/D19391 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@271573 91177308-0d34-0410-b5e6-96231b3b80d8 Sanjay Patel 4 years ago
8 changed file(s) with 64 addition(s) and 101 deletion(s). Raw diff Collapse all Expand all
315315
316316 /// \brief Return true if it is cheap to speculate a call to intrinsic ctlz.
317317 virtual bool isCheapToSpeculateCtlz() const {
318 return false;
319 }
320
321 /// Return true if it is safe to transform an integer-domain bitwise operation
322 /// into the equivalent floating-point operation. This should be set to true
323 /// if the target has IEEE-754-compliant fabs/fneg operations for the input
324 /// type.
325 virtual bool hasBitPreservingFPLogic(EVT VT) const {
318326 return false;
319327 }
320328
990990 }
991991 case Instruction::BitCast: {
992992 Type *SrcTy = I->getOperand(0)->getType();
993 if ((SrcTy->isIntegerTy() || SrcTy->isPointerTy() ||
994 SrcTy->isFloatingPointTy()) &&
993 if ((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
995994 // TODO: For now, not handling conversions like:
996995 // (bitcast i64 %x to <2 x i32>)
997996 !I->getType()->isVectorTy()) {
13151314 // of bits which might be set provided by popcnt KnownOne2.
13161315 break;
13171316 }
1318 case Intrinsic::fabs: {
1319 Type *Ty = II->getType();
1320 APInt SignBit = APInt::getSignBit(Ty->getScalarSizeInBits());
1321 KnownZero |= APInt::getSplat(Ty->getPrimitiveSizeInBits(), SignBit);
1322 break;
1323 }
13241317 case Intrinsic::x86_sse42_crc32_64_64:
13251318 KnownZero |= APInt::getHighBitsSet(64, 32);
13261319 break;
13801373 unsigned BitWidth = KnownZero.getBitWidth();
13811374
13821375 assert((V->getType()->isIntOrIntVectorTy() ||
1383 V->getType()->isFPOrFPVectorTy() ||
13841376 V->getType()->getScalarType()->isPointerTy()) &&
1385 "Not integer, floating point, or pointer type!");
1377 "Not integer or pointer type!");
13861378 assert((Q.DL.getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) &&
13871379 (!V->getType()->isIntOrIntVectorTy() ||
13881380 V->getType()->getScalarSizeInBits() == BitWidth) &&
73507350 return DAG.getDataLayout().isBigEndian() ? 1 : 0;
73517351 }
73527352
7353 static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
7354 const TargetLowering &TLI) {
7355 // If this is not a bitcast to an FP type or if the target doesn't have
7356 // IEEE754-compliant FP logic, we're done.
7357 EVT VT = N->getValueType(0);
7358 if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
7359 return SDValue();
7360
7361 // TODO: Use splat values for the constant-checking below and remove this
7362 // restriction.
7363 SDValue N0 = N->getOperand(0);
7364 EVT SourceVT = N0.getValueType();
7365 if (SourceVT.isVector())
7366 return SDValue();
7367
7368 unsigned FPOpcode;
7369 APInt SignMask;
7370 switch (N0.getOpcode()) {
7371 case ISD::AND:
7372 FPOpcode = ISD::FABS;
7373 SignMask = ~APInt::getSignBit(SourceVT.getSizeInBits());
7374 break;
7375 case ISD::XOR:
7376 FPOpcode = ISD::FNEG;
7377 SignMask = APInt::getSignBit(SourceVT.getSizeInBits());
7378 break;
7379 // TODO: ISD::OR --> ISD::FNABS?
7380 default:
7381 return SDValue();
7382 }
7383
7384 // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
7385 // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
7386 SDValue LogicOp0 = N0.getOperand(0);
7387 ConstantSDNode *LogicOp1 = dyn_cast(N0.getOperand(1));
7388 if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
7389 LogicOp0.getOpcode() == ISD::BITCAST &&
7390 LogicOp0->getOperand(0).getValueType() == VT)
7391 return DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0->getOperand(0));
7392
7393 return SDValue();
7394 }
7395
73537396 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
73547397 SDValue N0 = N->getOperand(0);
73557398 EVT VT = N->getValueType(0);
74137456 return Load;
74147457 }
74157458 }
7459
7460 if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
7461 return V;
74167462
74177463 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
74187464 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
394394 bool isCheapToSpeculateCtlz() const override {
395395 return true;
396396 }
397
398 bool hasBitPreservingFPLogic(EVT VT) const override {
399 // FIXME: Is this always true? It should be true for vectors at least.
400 return VT == MVT::f32 || VT == MVT::f64;
401 }
402
397403 bool supportSplitCSR(MachineFunction *MF) const override {
398404 return MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS &&
399405 MF->getFunction()->hasFnAttribute(Attribute::NoUnwind);
15931593 if (Instruction *CastedAnd = foldCastedBitwiseLogic(I))
15941594 return CastedAnd;
15951595
1596 if (CastInst *Op0C = dyn_cast(Op0)) {
1597 Value *Op0COp = Op0C->getOperand(0);
1598 Type *SrcTy = Op0COp->getType();
1599
1600 // If we are masking off the sign bit of a floating-point value, convert
1601 // this to the canonical fabs intrinsic call and cast back to integer.
1602 // The backend should know how to optimize fabs().
1603 // TODO: This transform should also apply to vectors.
1604 ConstantInt *CI;
1605 if (isa(Op0C) && SrcTy->isFloatingPointTy() &&
1606 match(Op1, m_ConstantInt(CI)) && CI->isMaxValue(true)) {
1607 Module *M = I.getModule();
1608 Function *Fabs = Intrinsic::getDeclaration(M, Intrinsic::fabs, SrcTy);
1609 Value *Call = Builder->CreateCall(Fabs, Op0COp, "fabs");
1610 return CastInst::CreateBitOrPointerCast(Call, I.getType());
1611 }
1612 }
1613
16141596 if (Instruction *Select = foldBoolSextMaskToSelect(I))
16151597 return Select;
16161598
152152 define double @bitcast_fabs(double %x) {
153153 ; CHECK-LABEL: bitcast_fabs:
154154 ; CHECK: ; BB#0:
155 ; CHECK-NEXT: fmov x8, d0
156 ; CHECK-NEXT: and x8, x8, #0x7fffffffffffffff
157 ; CHECK-NEXT: fmov d0, x8
155 ; CHECK-NEXT: fabs d0, d0
158156 ; CHECK-NEXT: ret
159157 ;
160158 %bc1 = bitcast double %x to i64
166164 define float @bitcast_fneg(float %x) {
167165 ; CHECK-LABEL: bitcast_fneg:
168166 ; CHECK: ; BB#0:
169 ; CHECK-NEXT: fmov w8, s0
170 ; CHECK-NEXT: eor w8, w8, #0x80000000
171 ; CHECK-NEXT: fmov s0, w8
167 ; CHECK-NEXT: fneg s0, s0
172168 ; CHECK-NEXT: ret
173169 ;
174170 %bc1 = bitcast float %x to i32
102102 ret i64 %add
103103 }
104104
105 define i64 @fabs_double(double %x) {
106 ; CHECK-LABEL: @fabs_double(
107 ; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
108 ; CHECK-NEXT: %and = bitcast double %fabs to i64
109 ; CHECK-NEXT: ret i64 %and
110 %bc = bitcast double %x to i64
111 %and = and i64 %bc, 9223372036854775807
112 ret i64 %and
113 }
114
115 define i64 @fabs_double_swap(double %x) {
116 ; CHECK-LABEL: @fabs_double_swap(
117 ; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
118 ; CHECK-NEXT: %and = bitcast double %fabs to i64
119 ; CHECK-NEXT: ret i64 %and
120 %bc = bitcast double %x to i64
121 %and = and i64 9223372036854775807, %bc
122 ret i64 %and
123 }
124
125 define i32 @fabs_float(float %x) {
126 ; CHECK-LABEL: @fabs_float(
127 ; CHECK-NEXT: %fabs = call float @llvm.fabs.f32(float %x)
128 ; CHECK-NEXT: %and = bitcast float %fabs to i32
129 ; CHECK-NEXT: ret i32 %and
130 %bc = bitcast float %x to i32
131 %and = and i32 %bc, 2147483647
132 ret i32 %and
133 }
134
135 ; Make sure that only a bitcast is transformed.
136
137 define i64 @fabs_double_not_bitcast(double %x) {
138 ; CHECK-LABEL: @fabs_double_not_bitcast(
139 ; CHECK-NEXT: %bc = fptoui double %x to i64
140 ; CHECK-NEXT: %and = and i64 %bc, 9223372036854775807
141 ; CHECK-NEXT: ret i64 %and
142 %bc = fptoui double %x to i64
143 %and = and i64 %bc, 9223372036854775807
144 ret i64 %and
145 }
146
4040 declare float @llvm.fabs.f32(float)
4141 declare double @llvm.fabs.f64(double)
4242 declare fp128 @llvm.fabs.f128(fp128)
43 declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
4443
4544 define float @square_fabs_intrinsic_f32(float %x) {
4645 %mul = fmul float %x, %x
9897 ; CHECK-NEXT: ret float %sq
9998 }
10099
101 ; A scalar fabs op makes the sign bit zero, so masking off all of the other bits means we can return zero.
102
103 define i32 @fabs_value_tracking_f32(float %x) {
104 %call = call float @llvm.fabs.f32(float %x)
105 %bc = bitcast float %call to i32
106 %and = and i32 %bc, 2147483648
107 ret i32 %and
108
109 ; CHECK-LABEL: fabs_value_tracking_f32(
110 ; CHECK: ret i32 0
111 }
112
113 ; TODO: A vector fabs op makes the sign bits zero, so masking off all of the other bits means we can return zero.
114
115 define <4 x i32> @fabs_value_tracking_v4f32(<4 x float> %x) {
116 %call = call <4 x float> @llvm.fabs.v4f32(<4 x float> %x)
117 %bc = bitcast <4 x float> %call to <4 x i32>
118 %and = and <4 x i32> %bc,
119 ret <4 x i32> %and
120
121 ; CHECK-LABEL: fabs_value_tracking_v4f32(
122 ; CHECK: ret <4 x i32> %and
123 }
124