llvm.org GIT mirror llvm / 1f4353f
InstSimplify: Eliminate fabs on known positive git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@291624 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 2 years ago
6 changed file(s) with 212 addition(s) and 32 deletion(s). Raw diff Collapse all Expand all
168168
169169 /// Return true if we can prove that the specified FP value is either a NaN or
170170 /// never less than 0.0.
171 bool CannotBeOrderedLessThanZero(const Value *V, const TargetLibraryInfo *TLI,
172 unsigned Depth = 0);
171 /// If \p IncludeNeg0 is false, -0.0 is considered less than 0.0.
172 bool CannotBeOrderedLessThanZero(const Value *V, const TargetLibraryInfo *TLI);
173
174 /// \returns true if we can prove that the specified FP value has a 0 sign
175 /// bit.
176 bool SignBitMustBeZero(const Value *V, const TargetLibraryInfo *TLI);
173177
174178 /// If the specified value can be set by repeating the same byte in memory,
175179 /// return the i8 value that it is represented with. This is true for all i8
43074307 return nullptr;
43084308
43094309 // Unary Ops
4310 if (NumOperands == 1)
4311 if (IntrinsicInst *II = dyn_cast(*ArgBegin))
4310 if (NumOperands == 1) {
4311 if (IntrinsicInst *II = dyn_cast(*ArgBegin)) {
43124312 if (II->getIntrinsicID() == IID)
43134313 return II;
4314 }
4315
4316 switch (IID) {
4317 case Intrinsic::fabs: {
4318 if (SignBitMustBeZero(*ArgBegin, Q.TLI))
4319 return *ArgBegin;
4320 }
4321 default:
4322 break;
4323 }
4324 }
43144325
43154326 return nullptr;
43164327 }
25792579 return false;
25802580 }
25812581
2582 bool llvm::CannotBeOrderedLessThanZero(const Value *V,
2583 const TargetLibraryInfo *TLI,
2584 unsigned Depth) {
2585 if (const ConstantFP *CFP = dyn_cast(V))
2586 return !CFP->getValueAPF().isNegative() || CFP->getValueAPF().isZero();
2582 /// If \p SignBitOnly is true, test for a known 0 sign bit rather than a
2583 /// standard ordered compare. e.g. make -0.0 olt 0.0 be true because of the sign
2584 /// bit despite comparing equal.
2585 static bool cannotBeOrderedLessThanZeroImpl(const Value *V,
2586 const TargetLibraryInfo *TLI,
2587 bool SignBitOnly,
2588 unsigned Depth) {
2589 if (const ConstantFP *CFP = dyn_cast(V)) {
2590 return !CFP->getValueAPF().isNegative() ||
2591 (!SignBitOnly && CFP->getValueAPF().isZero());
2592 }
25872593
25882594 if (Depth == MaxDepth)
2589 return false; // Limit search depth.
2595 return false; // Limit search depth.
25902596
25912597 const Operator *I = dyn_cast(V);
2592 if (!I) return false;
2598 if (!I)
2599 return false;
25932600
25942601 switch (I->getOpcode()) {
2595 default: break;
2602 default:
2603 break;
25962604 // Unsigned integers are always nonnegative.
25972605 case Instruction::UIToFP:
25982606 return true;
25992607 case Instruction::FMul:
26002608 // x*x is always non-negative or a NaN.
2601 if (I->getOperand(0) == I->getOperand(1))
2609 if (I->getOperand(0) == I->getOperand(1) &&
2610 (!SignBitOnly || cast(I)->hasNoNaNs()))
26022611 return true;
2612
26032613 LLVM_FALLTHROUGH;
26042614 case Instruction::FAdd:
26052615 case Instruction::FDiv:
26062616 case Instruction::FRem:
2607 return CannotBeOrderedLessThanZero(I->getOperand(0), TLI, Depth + 1) &&
2608 CannotBeOrderedLessThanZero(I->getOperand(1), TLI, Depth + 1);
2617 return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
2618 Depth + 1) &&
2619 cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly,
2620 Depth + 1);
26092621 case Instruction::Select:
2610 return CannotBeOrderedLessThanZero(I->getOperand(1), TLI, Depth + 1) &&
2611 CannotBeOrderedLessThanZero(I->getOperand(2), TLI, Depth + 1);
2622 return cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly,
2623 Depth + 1) &&
2624 cannotBeOrderedLessThanZeroImpl(I->getOperand(2), TLI, SignBitOnly,
2625 Depth + 1);
26122626 case Instruction::FPExt:
26132627 case Instruction::FPTrunc:
26142628 // Widening/narrowing never change sign.
2615 return CannotBeOrderedLessThanZero(I->getOperand(0), TLI, Depth + 1);
2629 return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
2630 Depth + 1);
26162631 case Instruction::Call:
26172632 Intrinsic::ID IID = getIntrinsicForCallSite(cast(I), TLI);
26182633 switch (IID) {
26192634 default:
26202635 break;
26212636 case Intrinsic::maxnum:
2622 return CannotBeOrderedLessThanZero(I->getOperand(0), TLI, Depth + 1) ||
2623 CannotBeOrderedLessThanZero(I->getOperand(1), TLI, Depth + 1);
2637 return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
2638 Depth + 1) ||
2639 cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly,
2640 Depth + 1);
26242641 case Intrinsic::minnum:
2625 return CannotBeOrderedLessThanZero(I->getOperand(0), TLI, Depth + 1) &&
2626 CannotBeOrderedLessThanZero(I->getOperand(1), TLI, Depth + 1);
2642 return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
2643 Depth + 1) &&
2644 cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly,
2645 Depth + 1);
26272646 case Intrinsic::exp:
26282647 case Intrinsic::exp2:
26292648 case Intrinsic::fabs:
26352654 if (CI->getBitWidth() <= 64 && CI->getSExtValue() % 2u == 0)
26362655 return true;
26372656 }
2638 return CannotBeOrderedLessThanZero(I->getOperand(0), TLI, Depth + 1);
2657 return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
2658 Depth + 1);
26392659 case Intrinsic::fma:
26402660 case Intrinsic::fmuladd:
26412661 // x*x+y is non-negative if y is non-negative.
26422662 return I->getOperand(0) == I->getOperand(1) &&
2643 CannotBeOrderedLessThanZero(I->getOperand(2), TLI, Depth + 1);
2663 (!SignBitOnly || cast(I)->hasNoNaNs()) &&
2664 cannotBeOrderedLessThanZeroImpl(I->getOperand(2), TLI, SignBitOnly,
2665 Depth + 1);
26442666 }
26452667 break;
26462668 }
26472669 return false;
2670 }
2671
2672 bool llvm::CannotBeOrderedLessThanZero(const Value *V,
2673 const TargetLibraryInfo *TLI) {
2674 return cannotBeOrderedLessThanZeroImpl(V, TLI, false, 0);
2675 }
2676
2677 bool llvm::SignBitMustBeZero(const Value *V, const TargetLibraryInfo *TLI) {
2678 return cannotBeOrderedLessThanZeroImpl(V, TLI, true, 0);
26482679 }
26492680
26502681 /// If the specified value can be set by repeating the same byte in memory,
44 declare float @fabsf(float)
55 declare double @fabs(double)
66 declare fp128 @fabsl(fp128)
7 declare float @llvm.fma.f32(float, float, float)
8 declare float @llvm.fmuladd.f32(float, float, float)
79
810 define float @square_fabs_call_f32(float %x) {
911 %mul = fmul float %x, %x
7981 ; CHECK-NEXT: ret fp128 %fabsl
8082 }
8183
82 ; TODO: This should be able to elimnated the fabs
8384 define float @square_nnan_fabs_intrinsic_f32(float %x) {
8485 %mul = fmul nnan float %x, %x
8586 %fabsf = call float @llvm.fabs.f32(float %mul)
8788
8889 ; CHECK-LABEL: square_nnan_fabs_intrinsic_f32(
8990 ; CHECK-NEXT: %mul = fmul nnan float %x, %x
90 ; CHECK-NEXT: %fabsf = call float @llvm.fabs.f32(float %mul)
91 ; CHECK-NEXT: ret float %fabsf
91 ; CHECK-NEXT: ret float %mul
9292 }
9393
9494 ; Shrinking a library call to a smaller type should not be inhibited by nor inhibit the square optimization.
169169 %fabs = call float @llvm.fabs.f32(float %select)
170170 ret float %fabs
171171 }
172
173 ; The fabs cannot be eliminated because %x may be a NaN
174 define float @square_fma_fabs_intrinsic_f32(float %x) {
175 %fma = call float @llvm.fma.f32(float %x, float %x, float 1.0)
176 %fabsf = call float @llvm.fabs.f32(float %fma)
177 ret float %fabsf
178
179 ; CHECK-LABEL: @square_fma_fabs_intrinsic_f32(
180 ; CHECK-NEXT: %fma = call float @llvm.fma.f32(float %x, float %x, float 1.000000e+00)
181 ; CHECK-NEXT: %fabsf = call float @llvm.fabs.f32(float %fma)
182 ; CHECK-NEXT: ret float %fabsf
183 }
184
185 ; The fabs cannot be eliminated because %x may be a NaN
186 define float @square_nnan_fma_fabs_intrinsic_f32(float %x) {
187 %fma = call nnan float @llvm.fma.f32(float %x, float %x, float 1.0)
188 %fabsf = call float @llvm.fabs.f32(float %fma)
189 ret float %fabsf
190
191 ; CHECK-LABEL: @square_nnan_fma_fabs_intrinsic_f32(
192 ; CHECK-NEXT: %fma = call nnan float @llvm.fma.f32(float %x, float %x, float 1.000000e+00)
193 ; CHECK-NEXT: ret float %fma
194 }
195
196 define float @square_fmuladd_fabs_intrinsic_f32(float %x) {
197 %fmuladd = call float @llvm.fmuladd.f32(float %x, float %x, float 1.0)
198 %fabsf = call float @llvm.fabs.f32(float %fmuladd)
199 ret float %fabsf
200
201 ; CHECK-LABEL: @square_fmuladd_fabs_intrinsic_f32(
202 ; CHECK-NEXT: %fmuladd = call float @llvm.fmuladd.f32(float %x, float %x, float 1.000000e+00)
203 ; CHECK-NEXT: %fabsf = call float @llvm.fabs.f32(float %fmuladd)
204 ; CHECK-NEXT: ret float %fabsf
205 }
206
207 define float @square_nnan_fmuladd_fabs_intrinsic_f32(float %x) {
208 %fmuladd = call nnan float @llvm.fmuladd.f32(float %x, float %x, float 1.0)
209 %fabsf = call float @llvm.fabs.f32(float %fmuladd)
210 ret float %fabsf
211
212 ; CHECK-LABEL: @square_nnan_fmuladd_fabs_intrinsic_f32(
213 ; CHECK-NEXT: %fmuladd = call nnan float @llvm.fmuladd.f32(float %x, float %x, float 1.000000e+00)
214 ; CHECK-NEXT: ret float %fmuladd
215 }
240240 ; X/C1 * C2 => X * (C2/C1) is disabled if X/C1 has multiple uses
241241 @fmul2_external = external global float
242242 define float @fmul2_disable(float %f1) {
243 %div = fdiv fast float 1.000000e+00, %f1
243 %div = fdiv fast float 1.000000e+00, %f1
244244 store float %div, float* @fmul2_external
245245 %mul = fmul fast float %div, 2.000000e+00
246246 ret float %mul
671671
672672 ; CHECK-LABEL: sqrt_intrinsic_arg_4th(
673673 ; CHECK-NEXT: %mul = fmul fast double %x, %x
674 ; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %mul)
675 ; CHECK-NEXT: ret double %fabs
674 ; CHECK-NEXT: ret double %mul
676675 }
677676
678677 define double @sqrt_intrinsic_arg_5th(double %x) {
684683
685684 ; CHECK-LABEL: sqrt_intrinsic_arg_5th(
686685 ; CHECK-NEXT: %mul = fmul fast double %x, %x
687 ; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %mul)
688686 ; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %x)
689 ; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
687 ; CHECK-NEXT: %1 = fmul fast double %mul, %sqrt1
690688 ; CHECK-NEXT: ret double %1
691689 }
692690
102102 ret float %7
103103 }
104104
105 declare float @llvm.fabs.f32(float)
106
107 ; CHECK-LABEL: @fabs_select_positive_constants(
108 ; CHECK: %select = select i1 %cmp, float 1.000000e+00, float 2.000000e+00
109 ; CHECK-NEXT: ret float %select
110 define float @fabs_select_positive_constants(i32 %c) {
111 %cmp = icmp eq i32 %c, 0
112 %select = select i1 %cmp, float 1.0, float 2.0
113 %fabs = call float @llvm.fabs.f32(float %select)
114 ret float %fabs
115 }
116
117 ; CHECK-LABEL: @fabs_select_constant_variable(
118 ; CHECK: %select = select i1 %cmp, float 1.000000e+00, float %x
119 ; CHECK-NEXT: %fabs = call float @llvm.fabs.f32(float %select)
120 define float @fabs_select_constant_variable(i32 %c, float %x) {
121 %cmp = icmp eq i32 %c, 0
122 %select = select i1 %cmp, float 1.0, float %x
123 %fabs = call float @llvm.fabs.f32(float %select)
124 ret float %fabs
125 }
126
127 ; CHECK-LABEL: @fabs_select_neg0_pos0(
128 ; CHECK: %select = select i1 %cmp, float -0.000000e+00, float 0.000000e+00
129 ; CHECK: %fabs = call float @llvm.fabs.f32(float %select)
130 ; CHECK-NEXT: ret float %fabs
131 define float @fabs_select_neg0_pos0(float addrspace(1)* %out, i32 %c) {
132 %cmp = icmp eq i32 %c, 0
133 %select = select i1 %cmp, float -0.0, float 0.0
134 %fabs = call float @llvm.fabs.f32(float %select)
135 ret float %fabs
136 }
137
138 ; CHECK-LABEL: @fabs_select_neg0_neg1(
139 ; CHECK: %select = select i1 %cmp, float -0.000000e+00, float -1.000000e+00
140 ; CHECK: %fabs = call float @llvm.fabs.f32(float %select)
141 define float @fabs_select_neg0_neg1(float addrspace(1)* %out, i32 %c) {
142 %cmp = icmp eq i32 %c, 0
143 %select = select i1 %cmp, float -0.0, float -1.0
144 %fabs = call float @llvm.fabs.f32(float %select)
145 ret float %fabs
146 }
147
148 ; CHECK-LABEL: @fabs_select_nan_nan(
149 ; CHECK: %select = select i1 %cmp, float 0x7FF8000000000000, float 0x7FF8000100000000
150 ; CHECK-NEXT: ret float %select
151 define float @fabs_select_nan_nan(float addrspace(1)* %out, i32 %c) {
152 %cmp = icmp eq i32 %c, 0
153 %select = select i1 %cmp, float 0x7FF8000000000000, float 0x7FF8000100000000
154 %fabs = call float @llvm.fabs.f32(float %select)
155 ret float %fabs
156 }
157
158 ; CHECK-LABEL: @fabs_select_negnan_nan(
159 ; CHECK: %select = select i1 %cmp, float 0xFFF8000000000000, float 0x7FF8000000000000
160 ; CHECK: %fabs = call float @llvm.fabs.f32(float %select)
161 define float @fabs_select_negnan_nan(float addrspace(1)* %out, i32 %c) {
162 %cmp = icmp eq i32 %c, 0
163 %select = select i1 %cmp, float 0xFFF8000000000000, float 0x7FF8000000000000
164 %fabs = call float @llvm.fabs.f32(float %select)
165 ret float %fabs
166 }
167
168 ; CHECK-LABEL: @fabs_select_negnan_negnan(
169 ; CHECK: %select = select i1 %cmp, float 0xFFF8000000000000, float 0x7FF8000100000000
170 ; CHECK: %fabs = call float @llvm.fabs.f32(float %select)
171 define float @fabs_select_negnan_negnan(float addrspace(1)* %out, i32 %c) {
172 %cmp = icmp eq i32 %c, 0
173 %select = select i1 %cmp, float 0xFFF8000000000000, float 0x7FF8000100000000
174 %fabs = call float @llvm.fabs.f32(float %select)
175 ret float %fabs
176 }
177
178 ; CHECK-LABEL: @fabs_select_negnan_negzero(
179 ; CHECK: %select = select i1 %cmp, float 0xFFF8000000000000, float -0.000000e+00
180 ; CHECK: %fabs = call float @llvm.fabs.f32(float %select)
181 define float @fabs_select_negnan_negzero(float addrspace(1)* %out, i32 %c) {
182 %cmp = icmp eq i32 %c, 0
183 %select = select i1 %cmp, float 0xFFF8000000000000, float -0.0
184 %fabs = call float @llvm.fabs.f32(float %select)
185 ret float %fabs
186 }
187
188 ; CHECK-LABEL: @fabs_select_negnan_zero(
189 ; CHECK: %select = select i1 %cmp, float 0xFFF8000000000000, float 0.000000e+00
190 ; CHECK: %fabs = call float @llvm.fabs.f32(float %select)
191 define float @fabs_select_negnan_zero(float addrspace(1)* %out, i32 %c) {
192 %cmp = icmp eq i32 %c, 0
193 %select = select i1 %cmp, float 0xFFF8000000000000, float 0.0
194 %fabs = call float @llvm.fabs.f32(float %select)
195 ret float %fabs
196 }