llvm.org GIT mirror llvm / 4762060
[InstCombine] Dropping redundant masking before left-shift [3/5] (PR42563) Summary: If we have some pattern that leaves only some low bits set, and then performs left-shift of those bits, if none of the bits that are left after the final shift are modified by the mask, we can omit the mask. There are many variants to this pattern: d. `(x & ((-1 << MaskShAmt) >> MaskShAmt)) << ShiftShAmt` All these patterns can be simplified to just: `x << ShiftShAmt` iff: d. `(ShiftShAmt-MaskShAmt) s>= 0` (i.e. `ShiftShAmt u>= MaskShAmt`) alive proofs: d: https://rise4fun.com/Alive/I5Y For now let's start with patterns where both shift amounts are variable, with trivial constant "offset" between them, since i believe this is both simplest to handle and i think this is most common. But again, there are likely other variants where we could use ValueTracking/ConstantRange to handle more cases. https://bugs.llvm.org/show_bug.cgi?id=42563 Differential Revision: https://reviews.llvm.org/D64519 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@366538 91177308-0d34-0410-b5e6-96231b3b80d8 Roman Lebedev a month ago
2 changed file(s) with 16 addition(s) and 12 deletion(s). Raw diff Collapse all Expand all
7272 // a) (x & ((1 << MaskShAmt) - 1)) << ShiftShAmt
7373 // b) (x & (~(-1 << MaskShAmt))) << ShiftShAmt
7474 // c) (x & (-1 >> MaskShAmt)) << ShiftShAmt
75 // d) (x & ((-1 << MaskShAmt) >> MaskShAmt)) << ShiftShAmt
7576 // All these patterns can be simplified to just:
7677 // x << ShiftShAmt
7778 // iff:
7879 // a,b) (MaskShAmt+ShiftShAmt) u>= bitwidth(x)
79 // c) (ShiftShAmt-MaskShAmt) s>= 0 (i.e. ShiftShAmt u>= MaskShAmt)
80 // c,d) (ShiftShAmt-MaskShAmt) s>= 0 (i.e. ShiftShAmt u>= MaskShAmt)
8081 static Instruction *
8182 dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift,
8283 const SimplifyQuery &SQ) {
9495 auto MaskB = m_Xor(m_Shl(m_AllOnes(), m_Value(MaskShAmt)), m_AllOnes());
9596 // (-1 >> MaskShAmt)
9697 auto MaskC = m_Shr(m_AllOnes(), m_Value(MaskShAmt));
98 // ((-1 << MaskShAmt) >> MaskShAmt)
99 auto MaskD =
100 m_Shr(m_Shl(m_AllOnes(), m_Value(MaskShAmt)), m_Deferred(MaskShAmt));
97101
98102 Value *X;
99103 if (match(Masked, m_c_And(m_CombineOr(MaskA, MaskB), m_Value(X)))) {
110114 APInt(BitWidth, BitWidth))))
111115 return nullptr;
112116 // All good, we can do this fold.
113 } else if (match(Masked, m_c_And(MaskC, m_Value(X)))) {
117 } else if (match(Masked, m_c_And(m_CombineOr(MaskC, MaskD), m_Value(X)))) {
114118 // Can we simplify (ShiftShAmt-MaskShAmt) ?
115119 Value *ShAmtsDiff =
116120 SimplifySubInst(ShiftShAmt, MaskShAmt, /*IsNSW=*/false, /*IsNUW=*/false,
2222 ; CHECK-NEXT: call void @use32(i32 [[T0]])
2323 ; CHECK-NEXT: call void @use32(i32 [[T1]])
2424 ; CHECK-NEXT: call void @use32(i32 [[T2]])
25 ; CHECK-NEXT: [[T4:%.*]] = shl i32 [[T2]], [[NBITS]]
25 ; CHECK-NEXT: [[T4:%.*]] = shl i32 [[X]], [[NBITS]]
2626 ; CHECK-NEXT: ret i32 [[T4]]
2727 ;
2828 %t0 = shl i32 -1, %nbits
4545 ; CHECK-NEXT: call void @use32(i32 [[T1]])
4646 ; CHECK-NEXT: call void @use32(i32 [[T2]])
4747 ; CHECK-NEXT: call void @use32(i32 [[T3]])
48 ; CHECK-NEXT: [[T4:%.*]] = shl i32 [[T2]], [[T3]]
48 ; CHECK-NEXT: [[T4:%.*]] = shl i32 [[X]], [[T3]]
4949 ; CHECK-NEXT: ret i32 [[T4]]
5050 ;
5151 %t0 = shl i32 -1, %nbits
7474 ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T1]])
7575 ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T2]])
7676 ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T3]])
77 ; CHECK-NEXT: [[T4:%.*]] = shl <3 x i32> [[T2]], [[T3]]
77 ; CHECK-NEXT: [[T4:%.*]] = shl <3 x i32> [[X]], [[T3]]
7878 ; CHECK-NEXT: ret <3 x i32> [[T4]]
7979 ;
8080 %t0 = shl <3 x i32> , %nbits
9999 ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T1]])
100100 ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T2]])
101101 ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T3]])
102 ; CHECK-NEXT: [[T4:%.*]] = shl <3 x i32> [[T2]], [[T3]]
102 ; CHECK-NEXT: [[T4:%.*]] = shl <3 x i32> [[X]], [[T3]]
103103 ; CHECK-NEXT: ret <3 x i32> [[T4]]
104104 ;
105105 %t0 = shl <3 x i32> , %nbits
123123 ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T1]])
124124 ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T2]])
125125 ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[NBITS]])
126 ; CHECK-NEXT: [[T4:%.*]] = shl <3 x i32> [[T2]], [[NBITS]]
126 ; CHECK-NEXT: [[T4:%.*]] = shl <3 x i32> [[X]], [[NBITS]]
127127 ; CHECK-NEXT: ret <3 x i32> [[T4]]
128128 ;
129129 %t0 = shl <3 x i32> , %nbits
151151 ; CHECK-NEXT: call void @use32(i32 [[T0]])
152152 ; CHECK-NEXT: call void @use32(i32 [[T1]])
153153 ; CHECK-NEXT: call void @use32(i32 [[T2]])
154 ; CHECK-NEXT: [[T3:%.*]] = shl i32 [[T2]], [[NBITS]]
154 ; CHECK-NEXT: [[T3:%.*]] = shl i32 [[X]], [[NBITS]]
155155 ; CHECK-NEXT: ret i32 [[T3]]
156156 ;
157157 %x = call i32 @gen32()
177177 ; CHECK-NEXT: call void @use32(i32 [[T2]])
178178 ; CHECK-NEXT: call void @use32(i32 [[T3]])
179179 ; CHECK-NEXT: call void @use32(i32 [[T4]])
180 ; CHECK-NEXT: [[T5:%.*]] = shl i32 [[T4]], [[NBITS0]]
180 ; CHECK-NEXT: [[T5:%.*]] = shl i32 [[T3]], [[NBITS0]]
181181 ; CHECK-NEXT: ret i32 [[T5]]
182182 ;
183183 %t0 = shl i32 -1, %nbits0
232232 ; CHECK-NEXT: call void @use32(i32 [[T0]])
233233 ; CHECK-NEXT: call void @use32(i32 [[T1]])
234234 ; CHECK-NEXT: call void @use32(i32 [[T2]])
235 ; CHECK-NEXT: [[T3:%.*]] = shl nuw i32 [[T2]], [[NBITS]]
235 ; CHECK-NEXT: [[T3:%.*]] = shl i32 [[X]], [[NBITS]]
236236 ; CHECK-NEXT: ret i32 [[T3]]
237237 ;
238238 %t0 = shl i32 -1, %nbits
253253 ; CHECK-NEXT: call void @use32(i32 [[T0]])
254254 ; CHECK-NEXT: call void @use32(i32 [[T1]])
255255 ; CHECK-NEXT: call void @use32(i32 [[T2]])
256 ; CHECK-NEXT: [[T3:%.*]] = shl nsw i32 [[T2]], [[NBITS]]
256 ; CHECK-NEXT: [[T3:%.*]] = shl i32 [[X]], [[NBITS]]
257257 ; CHECK-NEXT: ret i32 [[T3]]
258258 ;
259259 %t0 = shl i32 -1, %nbits
274274 ; CHECK-NEXT: call void @use32(i32 [[T0]])
275275 ; CHECK-NEXT: call void @use32(i32 [[T1]])
276276 ; CHECK-NEXT: call void @use32(i32 [[T2]])
277 ; CHECK-NEXT: [[T3:%.*]] = shl nuw nsw i32 [[T2]], [[NBITS]]
277 ; CHECK-NEXT: [[T3:%.*]] = shl i32 [[X]], [[NBITS]]
278278 ; CHECK-NEXT: ret i32 [[T3]]
279279 ;
280280 %t0 = shl i32 -1, %nbits