llvm.org GIT mirror llvm / f7bbb0f
[InstSimplify] Drop leftover "division-by-zero guard" around `@llvm.umul.with.overflow` inverted overflow bit Summary: Now that with D65143/D65144 we've produce `@llvm.umul.with.overflow`, and with D65147 we've flattened the CFG, we now can see that the guard may have been there to prevent division by zero is redundant. We can simply drop it: ``` ---------------------------------------- Name: no overflow or zero %iszero = icmp eq i4 %y, 0 %umul = smul_overflow i4 %x, %y %umul.ov = extractvalue {i4, i1} %umul, 1 %umul.ov.not = xor %umul.ov, -1 %retval.0 = or i1 %iszero, %umul.ov.not ret i1 %retval.0 => %iszero = icmp eq i4 %y, 0 %umul = smul_overflow i4 %x, %y %umul.ov = extractvalue {i4, i1} %umul, 1 %umul.ov.not = xor %umul.ov, -1 %retval.0 = or i1 %iszero, %umul.ov.not ret i1 %umul.ov.not Done: 1 Optimization is correct! ``` Note that this is inverted from what we have in a previous patch, here we are looking for the inverted overflow bit. And that inversion is kinda problematic - given this particular pattern we neither hoist that `not` closer to `ret` (then the pattern would have been identical to the one without inversion, and would have been handled by the previous patch), neither do the opposite transform. But regardless, we should handle this too. I've filled [[ https://bugs.llvm.org/show_bug.cgi?id=42720 | PR42720 ]]. Reviewers: nikic, spatel, xbolva00, RKSimon Reviewed By: spatel Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D65151 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@370351 91177308-0d34-0410-b5e6-96231b3b80d8 Roman Lebedev a month ago
4 changed file(s) with 71 addition(s) and 36 deletion(s). Raw diff Collapse all Expand all
17581758 return nullptr;
17591759 }
17601760
1761 /// Check that the Op1 is in expected form, i.e.:
1762 /// %Agg = tail call { i4, i1 } @llvm.[us]mul.with.overflow.i4(i4 %X, i4 %???)
1763 /// %Op1 = extractvalue { i4, i1 } %Agg, 1
1764 static bool omitCheckForZeroBeforeMulWithOverflowInternal(Value *Op1,
1765 Value *X) {
1766 auto *Extract = dyn_cast(Op1);
1767 // We should only be extracting the overflow bit.
1768 if (!Extract || !Extract->getIndices().equals(1))
1769 return false;
1770 Value *Agg = Extract->getAggregateOperand();
1771 // This should be a multiplication-with-overflow intrinsic.
1772 if (!match(Agg, m_CombineOr(m_Intrinsic(),
1773 m_Intrinsic())))
1774 return false;
1775 // One of its multipliers should be the value we checked for zero before.
1776 if (!match(Agg, m_CombineOr(m_Argument<0>(m_Specific(X)),
1777 m_Argument<1>(m_Specific(X)))))
1778 return false;
1779 return true;
1780 }
1781
17611782 /// The @llvm.[us]mul.with.overflow intrinsic could have been folded from some
17621783 /// other form of check, e.g. one that was using division; it may have been
17631784 /// guarded against division-by-zero. We can drop that check now.
17731794 if (!match(Op0, m_ICmp(Pred, m_Value(X), m_Zero())) ||
17741795 Pred != ICmpInst::Predicate::ICMP_NE)
17751796 return nullptr;
1776 auto *Extract = dyn_cast(Op1);
1777 // We should only be extracting the overflow bit.
1778 if (!Extract || !Extract->getIndices().equals(1))
1779 return nullptr;
1780 Value *Agg = Extract->getAggregateOperand();
1781 // This should be a multiplication-with-overflow intrinsic.
1782 if (!match(Agg, m_CombineOr(m_Intrinsic(),
1783 m_Intrinsic())))
1784 return nullptr;
1785 // One of its multipliers should be the value we checked for zero before.
1786 if (!match(Agg, m_CombineOr(m_Argument<0>(m_Specific(X)),
1787 m_Argument<1>(m_Specific(X)))))
1797 // Is Op1 in expected form?
1798 if (!omitCheckForZeroBeforeMulWithOverflowInternal(Op1, X))
17881799 return nullptr;
17891800 // Can omit 'and', and just return the overflow bit.
17901801 return Op1;
1802 }
1803
1804 /// The @llvm.[us]mul.with.overflow intrinsic could have been folded from some
1805 /// other form of check, e.g. one that was using division; it may have been
1806 /// guarded against division-by-zero. We can drop that check now.
1807 /// Look for:
1808 /// %Op0 = icmp eq i4 %X, 0
1809 /// %Agg = tail call { i4, i1 } @llvm.[us]mul.with.overflow.i4(i4 %X, i4 %???)
1810 /// %Op1 = extractvalue { i4, i1 } %Agg, 1
1811 /// %NotOp1 = xor i1 %Op1, true
1812 /// %or = or i1 %Op0, %NotOp1
1813 /// We can just return %NotOp1
1814 static Value *omitCheckForZeroBeforeInvertedMulWithOverflow(Value *Op0,
1815 Value *NotOp1) {
1816 ICmpInst::Predicate Pred;
1817 Value *X;
1818 if (!match(Op0, m_ICmp(Pred, m_Value(X), m_Zero())) ||
1819 Pred != ICmpInst::Predicate::ICMP_EQ)
1820 return nullptr;
1821 // We expect the other hand of an 'or' to be a 'not'.
1822 Value *Op1;
1823 if (!match(NotOp1, m_Not(m_Value(Op1))))
1824 return nullptr;
1825 // Is Op1 in expected form?
1826 if (!omitCheckForZeroBeforeMulWithOverflowInternal(Op1, X))
1827 return nullptr;
1828 // Can omit 'and', and just return the inverted overflow bit.
1829 return NotOp1;
17911830 }
17921831
17931832 /// Given operands for an And, see if we can fold the result.
20242063 return Op0;
20252064
20262065 if (Value *V = simplifyAndOrOfCmps(Q, Op0, Op1, false))
2066 return V;
2067
2068 // If we have a multiplication overflow check that is being 'and'ed with a
2069 // check that one of the multipliers is not zero, we can omit the 'and', and
2070 // only keep the overflow check.
2071 if (Value *V = omitCheckForZeroBeforeInvertedMulWithOverflow(Op0, Op1))
2072 return V;
2073 if (Value *V = omitCheckForZeroBeforeInvertedMulWithOverflow(Op1, Op0))
20272074 return V;
20282075
20292076 // Try some generic simplifications for associative operations.
44
55 define i1 @t0_umul(i4 %size, i4 %nmemb) {
66 ; CHECK-LABEL: @t0_umul(
7 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i4 [[SIZE:%.*]], 0
8 ; CHECK-NEXT: [[SMUL:%.*]] = tail call { i4, i1 } @llvm.smul.with.overflow.i4(i4 [[SIZE]], i4 [[NMEMB:%.*]])
7 ; CHECK-NEXT: [[SMUL:%.*]] = tail call { i4, i1 } @llvm.smul.with.overflow.i4(i4 [[SIZE:%.*]], i4 [[NMEMB:%.*]])
98 ; CHECK-NEXT: [[SMUL_OV:%.*]] = extractvalue { i4, i1 } [[SMUL]], 1
109 ; CHECK-NEXT: [[PHITMP:%.*]] = xor i1 [[SMUL_OV]], true
11 ; CHECK-NEXT: [[OR:%.*]] = or i1 [[CMP]], [[PHITMP]]
12 ; CHECK-NEXT: ret i1 [[OR]]
10 ; CHECK-NEXT: ret i1 [[PHITMP]]
1311 ;
1412 %cmp = icmp eq i4 %size, 0
1513 %smul = tail call { i4, i1 } @llvm.smul.with.overflow.i4(i4 %size, i4 %nmemb)
2119
2220 define i1 @t1_commutative(i4 %size, i4 %nmemb) {
2321 ; CHECK-LABEL: @t1_commutative(
24 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i4 [[SIZE:%.*]], 0
25 ; CHECK-NEXT: [[SMUL:%.*]] = tail call { i4, i1 } @llvm.smul.with.overflow.i4(i4 [[SIZE]], i4 [[NMEMB:%.*]])
22 ; CHECK-NEXT: [[SMUL:%.*]] = tail call { i4, i1 } @llvm.smul.with.overflow.i4(i4 [[SIZE:%.*]], i4 [[NMEMB:%.*]])
2623 ; CHECK-NEXT: [[SMUL_OV:%.*]] = extractvalue { i4, i1 } [[SMUL]], 1
2724 ; CHECK-NEXT: [[PHITMP:%.*]] = xor i1 [[SMUL_OV]], true
28 ; CHECK-NEXT: [[OR:%.*]] = or i1 [[PHITMP]], [[CMP]]
29 ; CHECK-NEXT: ret i1 [[OR]]
25 ; CHECK-NEXT: ret i1 [[PHITMP]]
3026 ;
3127 %cmp = icmp eq i4 %size, 0
3228 %smul = tail call { i4, i1 } @llvm.smul.with.overflow.i4(i4 %size, i4 %nmemb)
44
55 define i1 @t0_umul(i4 %size, i4 %nmemb) {
66 ; CHECK-LABEL: @t0_umul(
7 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i4 [[SIZE:%.*]], 0
8 ; CHECK-NEXT: [[UMUL:%.*]] = tail call { i4, i1 } @llvm.umul.with.overflow.i4(i4 [[SIZE]], i4 [[NMEMB:%.*]])
7 ; CHECK-NEXT: [[UMUL:%.*]] = tail call { i4, i1 } @llvm.umul.with.overflow.i4(i4 [[SIZE:%.*]], i4 [[NMEMB:%.*]])
98 ; CHECK-NEXT: [[UMUL_OV:%.*]] = extractvalue { i4, i1 } [[UMUL]], 1
109 ; CHECK-NEXT: [[PHITMP:%.*]] = xor i1 [[UMUL_OV]], true
11 ; CHECK-NEXT: [[OR:%.*]] = or i1 [[CMP]], [[PHITMP]]
12 ; CHECK-NEXT: ret i1 [[OR]]
10 ; CHECK-NEXT: ret i1 [[PHITMP]]
1311 ;
1412 %cmp = icmp eq i4 %size, 0
1513 %umul = tail call { i4, i1 } @llvm.umul.with.overflow.i4(i4 %size, i4 %nmemb)
2119
2220 define i1 @t1_commutative(i4 %size, i4 %nmemb) {
2321 ; CHECK-LABEL: @t1_commutative(
24 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i4 [[SIZE:%.*]], 0
25 ; CHECK-NEXT: [[UMUL:%.*]] = tail call { i4, i1 } @llvm.umul.with.overflow.i4(i4 [[SIZE]], i4 [[NMEMB:%.*]])
22 ; CHECK-NEXT: [[UMUL:%.*]] = tail call { i4, i1 } @llvm.umul.with.overflow.i4(i4 [[SIZE:%.*]], i4 [[NMEMB:%.*]])
2623 ; CHECK-NEXT: [[UMUL_OV:%.*]] = extractvalue { i4, i1 } [[UMUL]], 1
2724 ; CHECK-NEXT: [[PHITMP:%.*]] = xor i1 [[UMUL_OV]], true
28 ; CHECK-NEXT: [[OR:%.*]] = or i1 [[PHITMP]], [[CMP]]
29 ; CHECK-NEXT: ret i1 [[OR]]
25 ; CHECK-NEXT: ret i1 [[PHITMP]]
3026 ;
3127 %cmp = icmp eq i4 %size, 0
3228 %umul = tail call { i4, i1 } @llvm.umul.with.overflow.i4(i4 %size, i4 %nmemb)
123123 ;
124124 ; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-LABEL: @will_overflow(
125125 ; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: bb:
126 ; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0
127 ; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[UMUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
126 ; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[UMUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG:%.*]], i64 [[ARG1:%.*]])
128127 ; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[UMUL_OV:%.*]] = extractvalue { i64, i1 } [[UMUL]], 1
129128 ; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[PHITMP:%.*]] = xor i1 [[UMUL_OV]], true
130 ; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[T6:%.*]] = or i1 [[T0]], [[PHITMP]]
131 ; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: ret i1 [[T6]]
129 ; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: ret i1 [[PHITMP]]
132130 ;
133131 ; INSTCOMBINESIMPLIFYCFGCOSTLYONLY-LABEL: @will_overflow(
134132 ; INSTCOMBINESIMPLIFYCFGCOSTLYONLY-NEXT: bb:
141139 ;
142140 ; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-LABEL: @will_overflow(
143141 ; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT: bb:
144 ; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT: [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0
145 ; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT: [[UMUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
142 ; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT: [[UMUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG:%.*]], i64 [[ARG1:%.*]])
146143 ; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT: [[UMUL_OV:%.*]] = extractvalue { i64, i1 } [[UMUL]], 1
147144 ; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT: [[PHITMP:%.*]] = xor i1 [[UMUL_OV]], true
148 ; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT: [[T6:%.*]] = or i1 [[T0]], [[PHITMP]]
149 ; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT: ret i1 [[T6]]
145 ; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT: ret i1 [[PHITMP]]
150146 ;
151147 bb:
152148 %t0 = icmp eq i64 %arg, 0