llvm.org GIT mirror llvm / ee2422d
[InstSimplify] Drop leftover "division-by-zero guard" around `@llvm.umul.with.overflow` overflow bit Summary: Now that with D65143/D65144 we've produce `@llvm.umul.with.overflow`, and with D65147 we've flattened the CFG, we now can see that the guard may have been there to prevent division by zero is redundant. We can simply drop it: ``` ---------------------------------------- Name: no overflow and not zero %iszero = icmp ne i4 %y, 0 %umul = umul_overflow i4 %x, %y %umul.ov = extractvalue {i4, i1} %umul, 1 %retval.0 = and i1 %iszero, %umul.ov ret i1 %retval.0 => %iszero = icmp ne i4 %y, 0 %umul = umul_overflow i4 %x, %y %umul.ov = extractvalue {i4, i1} %umul, 1 %retval.0 = and i1 %iszero, %umul.ov ret %umul.ov Done: 1 Optimization is correct! ``` Reviewers: nikic, spatel, xbolva00 Reviewed By: spatel Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D65150 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@370350 91177308-0d34-0410-b5e6-96231b3b80d8 Roman Lebedev a month ago
4 changed file(s) with 52 addition(s) and 24 deletion(s). Raw diff Collapse all Expand all
17581758 return nullptr;
17591759 }
17601760
1761 /// The @llvm.[us]mul.with.overflow intrinsic could have been folded from some
1762 /// other form of check, e.g. one that was using division; it may have been
1763 /// guarded against division-by-zero. We can drop that check now.
1764 /// Look for:
1765 /// %Op0 = icmp ne i4 %X, 0
1766 /// %Agg = tail call { i4, i1 } @llvm.[us]mul.with.overflow.i4(i4 %X, i4 %???)
1767 /// %Op1 = extractvalue { i4, i1 } %Agg, 1
1768 /// %??? = and i1 %Op0, %Op1
1769 /// We can just return %Op1
1770 static Value *omitCheckForZeroBeforeMulWithOverflow(Value *Op0, Value *Op1) {
1771 ICmpInst::Predicate Pred;
1772 Value *X;
1773 if (!match(Op0, m_ICmp(Pred, m_Value(X), m_Zero())) ||
1774 Pred != ICmpInst::Predicate::ICMP_NE)
1775 return nullptr;
1776 auto *Extract = dyn_cast(Op1);
1777 // We should only be extracting the overflow bit.
1778 if (!Extract || !Extract->getIndices().equals(1))
1779 return nullptr;
1780 Value *Agg = Extract->getAggregateOperand();
1781 // This should be a multiplication-with-overflow intrinsic.
1782 if (!match(Agg, m_CombineOr(m_Intrinsic(),
1783 m_Intrinsic())))
1784 return nullptr;
1785 // One of its multipliers should be the value we checked for zero before.
1786 if (!match(Agg, m_CombineOr(m_Argument<0>(m_Specific(X)),
1787 m_Argument<1>(m_Specific(X)))))
1788 return nullptr;
1789 // Can omit 'and', and just return the overflow bit.
1790 return Op1;
1791 }
1792
17611793 /// Given operands for an And, see if we can fold the result.
17621794 /// If not, this returns null.
17631795 static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
18111843 (~(*Mask)).shl(*ShAmt).isNullValue())
18121844 return Op0;
18131845 }
1846
1847 // If we have a multiplication overflow check that is being 'and'ed with a
1848 // check that one of the multipliers is not zero, we can omit the 'and', and
1849 // only keep the overflow check.
1850 if (Value *V = omitCheckForZeroBeforeMulWithOverflow(Op0, Op1))
1851 return V;
1852 if (Value *V = omitCheckForZeroBeforeMulWithOverflow(Op1, Op0))
1853 return V;
18141854
18151855 // A & (-A) = A if A is a power of two or zero.
18161856 if (match(Op0, m_Neg(m_Specific(Op1))) ||
44
55 define i1 @t0_smul(i4 %size, i4 %nmemb) {
66 ; CHECK-LABEL: @t0_smul(
7 ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i4 [[SIZE:%.*]], 0
8 ; CHECK-NEXT: [[SMUL:%.*]] = tail call { i4, i1 } @llvm.smul.with.overflow.i4(i4 [[SIZE]], i4 [[NMEMB:%.*]])
7 ; CHECK-NEXT: [[SMUL:%.*]] = tail call { i4, i1 } @llvm.smul.with.overflow.i4(i4 [[SIZE:%.*]], i4 [[NMEMB:%.*]])
98 ; CHECK-NEXT: [[SMUL_OV:%.*]] = extractvalue { i4, i1 } [[SMUL]], 1
10 ; CHECK-NEXT: [[AND:%.*]] = and i1 [[SMUL_OV]], [[CMP]]
11 ; CHECK-NEXT: ret i1 [[AND]]
9 ; CHECK-NEXT: ret i1 [[SMUL_OV]]
1210 ;
1311 %cmp = icmp ne i4 %size, 0
1412 %smul = tail call { i4, i1 } @llvm.smul.with.overflow.i4(i4 %size, i4 %nmemb)
1917
2018 define i1 @t1_commutative(i4 %size, i4 %nmemb) {
2119 ; CHECK-LABEL: @t1_commutative(
22 ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i4 [[SIZE:%.*]], 0
23 ; CHECK-NEXT: [[SMUL:%.*]] = tail call { i4, i1 } @llvm.smul.with.overflow.i4(i4 [[SIZE]], i4 [[NMEMB:%.*]])
20 ; CHECK-NEXT: [[SMUL:%.*]] = tail call { i4, i1 } @llvm.smul.with.overflow.i4(i4 [[SIZE:%.*]], i4 [[NMEMB:%.*]])
2421 ; CHECK-NEXT: [[SMUL_OV:%.*]] = extractvalue { i4, i1 } [[SMUL]], 1
25 ; CHECK-NEXT: [[AND:%.*]] = and i1 [[CMP]], [[SMUL_OV]]
26 ; CHECK-NEXT: ret i1 [[AND]]
22 ; CHECK-NEXT: ret i1 [[SMUL_OV]]
2723 ;
2824 %cmp = icmp ne i4 %size, 0
2925 %smul = tail call { i4, i1 } @llvm.smul.with.overflow.i4(i4 %size, i4 %nmemb)
44
55 define i1 @t0_umul(i4 %size, i4 %nmemb) {
66 ; CHECK-LABEL: @t0_umul(
7 ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i4 [[SIZE:%.*]], 0
8 ; CHECK-NEXT: [[UMUL:%.*]] = tail call { i4, i1 } @llvm.umul.with.overflow.i4(i4 [[SIZE]], i4 [[NMEMB:%.*]])
7 ; CHECK-NEXT: [[UMUL:%.*]] = tail call { i4, i1 } @llvm.umul.with.overflow.i4(i4 [[SIZE:%.*]], i4 [[NMEMB:%.*]])
98 ; CHECK-NEXT: [[UMUL_OV:%.*]] = extractvalue { i4, i1 } [[UMUL]], 1
10 ; CHECK-NEXT: [[AND:%.*]] = and i1 [[UMUL_OV]], [[CMP]]
11 ; CHECK-NEXT: ret i1 [[AND]]
9 ; CHECK-NEXT: ret i1 [[UMUL_OV]]
1210 ;
1311 %cmp = icmp ne i4 %size, 0
1412 %umul = tail call { i4, i1 } @llvm.umul.with.overflow.i4(i4 %size, i4 %nmemb)
1917
2018 define i1 @t1_commutative(i4 %size, i4 %nmemb) {
2119 ; CHECK-LABEL: @t1_commutative(
22 ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i4 [[SIZE:%.*]], 0
23 ; CHECK-NEXT: [[UMUL:%.*]] = tail call { i4, i1 } @llvm.umul.with.overflow.i4(i4 [[SIZE]], i4 [[NMEMB:%.*]])
20 ; CHECK-NEXT: [[UMUL:%.*]] = tail call { i4, i1 } @llvm.umul.with.overflow.i4(i4 [[SIZE:%.*]], i4 [[NMEMB:%.*]])
2421 ; CHECK-NEXT: [[UMUL_OV:%.*]] = extractvalue { i4, i1 } [[UMUL]], 1
25 ; CHECK-NEXT: [[AND:%.*]] = and i1 [[CMP]], [[UMUL_OV]]
26 ; CHECK-NEXT: ret i1 [[AND]]
22 ; CHECK-NEXT: ret i1 [[UMUL_OV]]
2723 ;
2824 %cmp = icmp ne i4 %size, 0
2925 %umul = tail call { i4, i1 } @llvm.umul.with.overflow.i4(i4 %size, i4 %nmemb)
5151 ;
5252 ; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-LABEL: @will_not_overflow(
5353 ; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: bb:
54 ; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[T0:%.*]] = icmp ne i64 [[ARG:%.*]], 0
55 ; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[UMUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
54 ; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[UMUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG:%.*]], i64 [[ARG1:%.*]])
5655 ; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[UMUL_OV:%.*]] = extractvalue { i64, i1 } [[UMUL]], 1
57 ; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[T6:%.*]] = and i1 [[UMUL_OV]], [[T0]]
58 ; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: ret i1 [[T6]]
56 ; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: ret i1 [[UMUL_OV]]
5957 ;
6058 ; INSTCOMBINESIMPLIFYCFGCOSTLYONLY-LABEL: @will_not_overflow(
6159 ; INSTCOMBINESIMPLIFYCFGCOSTLYONLY-NEXT: bb:
6765 ;
6866 ; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-LABEL: @will_not_overflow(
6967 ; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT: bb:
70 ; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT: [[T0:%.*]] = icmp ne i64 [[ARG:%.*]], 0
71 ; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT: [[UMUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
68 ; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT: [[UMUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG:%.*]], i64 [[ARG1:%.*]])
7269 ; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT: [[UMUL_OV:%.*]] = extractvalue { i64, i1 } [[UMUL]], 1
73 ; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT: [[T6:%.*]] = and i1 [[UMUL_OV]], [[T0]]
74 ; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT: ret i1 [[T6]]
70 ; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT: ret i1 [[UMUL_OV]]
7571 ;
7672 bb:
7773 %t0 = icmp eq i64 %arg, 0