llvm.org GIT mirror llvm / 0e1f2dd
[InstCombine] canonicalize check for power-of-2 The form that compares against 0 is better because: 1. It removes a use of the input value. 2. It's the more standard form for this pattern: https://graphics.stanford.edu/~seander/bithacks.html#DetermineIfPowerOf2 3. It results in equal or better codegen (tested with x86, AArch64, ARM, PowerPC, MIPS). This is a root cause for PR42314, but probably doesn't completely answer the codegen request: https://bugs.llvm.org/show_bug.cgi?id=42314 Alive proof: https://rise4fun.com/Alive/9kG Name: is power-of-2 %neg = sub i32 0, %x %a = and i32 %neg, %x %r = icmp eq i32 %a, %x => %dec = add i32 %x, -1 %a2 = and i32 %dec, %x %r = icmp eq i32 %a2, 0 Name: is not power-of-2 %neg = sub i32 0, %x %a = and i32 %neg, %x %r = icmp ne i32 %a, %x => %dec = add i32 %x, -1 %a2 = and i32 %dec, %x %r = icmp ne i32 %a2, 0 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@363956 91177308-0d34-0410-b5e6-96231b3b80d8 Sanjay Patel a month ago
2 changed file(s) with 41 addition(s) and 21 deletion(s). Raw diff Collapse all Expand all
38293829 match(Op1, m_BitReverse(m_Value(B)))))
38303830 return new ICmpInst(Pred, A, B);
38313831
3832 // Canonicalize checking for a power-of-2-or-zero value:
3833 // (A & -A) == A --> (A & (A - 1)) == 0
3834 // (-A & A) == A --> (A & (A - 1)) == 0
3835 // A == (A & -A) --> (A & (A - 1)) == 0
3836 // A == (-A & A) --> (A & (A - 1)) == 0
3837 // TODO: This could be reduced by using the popct intrinsic.
3838 A = nullptr;
3839 if (match(Op0, m_OneUse(m_c_And(m_OneUse(m_Neg(m_Specific(Op1))),
3840 m_Specific(Op1)))))
3841 A = Op1;
3842 else if (match(Op1, m_OneUse(m_c_And(m_OneUse(m_Neg(m_Specific(Op0))),
3843 m_Specific(Op0)))))
3844 A = Op0;
3845 if (A) {
3846 Type *Ty = A->getType();
3847 Value *Dec = Builder.CreateAdd(A, ConstantInt::getAllOnesValue(Ty));
3848 Value *And = Builder.CreateAnd(A, Dec);
3849 return new ICmpInst(Pred, And, ConstantInt::getNullValue(Ty));
3850 }
3851
38323852 return nullptr;
38333853 }
38343854
22
33 define i1 @is_pow2or0_negate_op(i32 %x) {
44 ; CHECK-LABEL: @is_pow2or0_negate_op(
5 ; CHECK-NEXT: [[NEG:%.*]] = sub i32 0, [[X:%.*]]
6 ; CHECK-NEXT: [[AND:%.*]] = and i32 [[NEG]], [[X]]
7 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], [[X]]
5 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X:%.*]], -1
6 ; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[X]]
7 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP2]], 0
88 ; CHECK-NEXT: ret i1 [[CMP]]
99 ;
1010 %neg = sub i32 0, %x
1515
1616 define <2 x i1> @is_pow2or0_negate_op_vec(<2 x i32> %x) {
1717 ; CHECK-LABEL: @is_pow2or0_negate_op_vec(
18 ; CHECK-NEXT: [[NEG:%.*]] = sub <2 x i32> zeroinitializer, [[X:%.*]]
19 ; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[NEG]], [[X]]
20 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[AND]], [[X]]
18 ; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]],
19 ; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], [[X]]
20 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[TMP2]], zeroinitializer
2121 ; CHECK-NEXT: ret <2 x i1> [[CMP]]
2222 ;
2323 %neg = sub <2 x i32> zeroinitializer, %x
5454
5555 define i1 @isnot_pow2or0_negate_op(i32 %x) {
5656 ; CHECK-LABEL: @isnot_pow2or0_negate_op(
57 ; CHECK-NEXT: [[NEG:%.*]] = sub i32 0, [[X:%.*]]
58 ; CHECK-NEXT: [[AND:%.*]] = and i32 [[NEG]], [[X]]
59 ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], [[X]]
57 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X:%.*]], -1
58 ; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[X]]
59 ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP2]], 0
6060 ; CHECK-NEXT: ret i1 [[CMP]]
6161 ;
6262 %neg = sub i32 0, %x
6767
6868 define <2 x i1> @isnot_pow2or0_negate_op_vec(<2 x i32> %x) {
6969 ; CHECK-LABEL: @isnot_pow2or0_negate_op_vec(
70 ; CHECK-NEXT: [[NEG:%.*]] = sub <2 x i32> zeroinitializer, [[X:%.*]]
71 ; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[NEG]], [[X]]
72 ; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[AND]], [[X]]
70 ; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]],
71 ; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], [[X]]
72 ; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
7373 ; CHECK-NEXT: ret <2 x i1> [[CMP]]
7474 ;
7575 %neg = sub <2 x i32> zeroinitializer, %x
107107 define i1 @is_pow2or0_negate_op_commute1(i32 %p) {
108108 ; CHECK-LABEL: @is_pow2or0_negate_op_commute1(
109109 ; CHECK-NEXT: [[X:%.*]] = srem i32 42, [[P:%.*]]
110 ; CHECK-NEXT: [[NEG:%.*]] = sub nsw i32 0, [[X]]
111 ; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], [[NEG]]
112 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], [[X]]
110 ; CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[X]], -1
111 ; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[X]], [[TMP1]]
112 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP2]], 0
113113 ; CHECK-NEXT: ret i1 [[CMP]]
114114 ;
115115 %x = srem i32 42, %p ; thwart complexity-based canonicalization
124124 define i1 @isnot_pow2or0_negate_op_commute2(i32 %p) {
125125 ; CHECK-LABEL: @isnot_pow2or0_negate_op_commute2(
126126 ; CHECK-NEXT: [[X:%.*]] = urem i32 42, [[P:%.*]]
127 ; CHECK-NEXT: [[NEG:%.*]] = sub nsw i32 0, [[X]]
128 ; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], [[NEG]]
129 ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[X]], [[AND]]
127 ; CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[X]], -1
128 ; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[X]], [[TMP1]]
129 ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP2]], 0
130130 ; CHECK-NEXT: ret i1 [[CMP]]
131131 ;
132132 %x = urem i32 42, %p ; thwart complexity-based canonicalization
139139 define i1 @isnot_pow2or0_negate_op_commute3(i32 %p) {
140140 ; CHECK-LABEL: @isnot_pow2or0_negate_op_commute3(
141141 ; CHECK-NEXT: [[X:%.*]] = urem i32 42, [[P:%.*]]
142 ; CHECK-NEXT: [[NEG:%.*]] = sub nsw i32 0, [[X]]
143 ; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], [[NEG]]
144 ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[X]], [[AND]]
142 ; CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[X]], -1
143 ; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[X]], [[TMP1]]
144 ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP2]], 0
145145 ; CHECK-NEXT: ret i1 [[CMP]]
146146 ;
147147 %x = urem i32 42, %p ; thwart complexity-based canonicalization