llvm.org GIT mirror llvm / 0c1db38
[InstCombine] Dropping redundant masking before left-shift [1/5] (PR42563) Summary: If we have some pattern that leaves only some low bits set, and then performs left-shift of those bits, if none of the bits that are left after the final shift are modified by the mask, we can omit the mask. There are many variants to this pattern: b. `(x & (~(-1 << maskNbits))) << shiftNbits` All these patterns can be simplified to just: `x << ShiftShAmt` iff: b. `(MaskShAmt+ShiftShAmt) u>= bitwidth(x)` alive proof: b: https://rise4fun.com/Alive/y8M For now let's start with patterns where both shift amounts are variable, with trivial constant "offset" between them, since i believe this is both simplest to handle and i think this is most common. But again, there are likely other variants where we could use ValueTracking/ConstantRange to handle more cases. https://bugs.llvm.org/show_bug.cgi?id=42563 Differential Revision: https://reviews.llvm.org/D64514 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@366536 91177308-0d34-0410-b5e6-96231b3b80d8 Roman Lebedev a month ago
2 changed file(s) with 16 addition(s) and 13 deletion(s). Raw diff Collapse all Expand all
7070 //
7171 // There are many variants to this pattern:
7272 // a) (x & ((1 << MaskShAmt) - 1)) << ShiftShAmt
73 // b) (x & (~(-1 << MaskShAmt))) << ShiftShAmt
7374 // All these patterns can be simplified to just:
7475 // x << ShiftShAmt
7576 // iff:
76 // a) (MaskShAmt+ShiftShAmt) u>= bitwidth(x)
77 // a,b) (MaskShAmt+ShiftShAmt) u>= bitwidth(x)
7778 static Instruction *
7879 dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift,
7980 const SimplifyQuery &SQ) {
8788
8889 // ((1 << MaskShAmt) - 1)
8990 auto MaskA = m_Add(m_Shl(m_One(), m_Value(MaskShAmt)), m_AllOnes());
91 // (~(-1 << maskNbits))
92 auto MaskB = m_Xor(m_Shl(m_AllOnes(), m_Value(MaskShAmt)), m_AllOnes());
9093
9194 Value *X;
92 if (!match(Masked, m_c_And(MaskA, m_Value(X))))
95 if (!match(Masked, m_c_And(m_CombineOr(MaskA, MaskB), m_Value(X))))
9396 return nullptr;
9497
9598 // Can we simplify (MaskShAmt+ShiftShAmt) ?
2424 ; CHECK-NEXT: call void @use32(i32 [[T1]])
2525 ; CHECK-NEXT: call void @use32(i32 [[T2]])
2626 ; CHECK-NEXT: call void @use32(i32 [[T3]])
27 ; CHECK-NEXT: [[T4:%.*]] = shl i32 [[T2]], [[T3]]
27 ; CHECK-NEXT: [[T4:%.*]] = shl i32 [[X]], [[T3]]
2828 ; CHECK-NEXT: ret i32 [[T4]]
2929 ;
3030 %t0 = shl i32 -1, %nbits
4949 ; CHECK-NEXT: call void @use32(i32 [[T1]])
5050 ; CHECK-NEXT: call void @use32(i32 [[T2]])
5151 ; CHECK-NEXT: call void @use32(i32 [[T3]])
52 ; CHECK-NEXT: [[T4:%.*]] = shl i32 [[T2]], [[T3]]
52 ; CHECK-NEXT: [[T4:%.*]] = shl i32 [[X]], [[T3]]
5353 ; CHECK-NEXT: ret i32 [[T4]]
5454 ;
5555 %t0 = shl i32 -1, %nbits
7676 ; CHECK-NEXT: call void @use32(i32 [[T2]])
7777 ; CHECK-NEXT: call void @use32(i32 [[T3]])
7878 ; CHECK-NEXT: call void @use32(i32 [[T4]])
79 ; CHECK-NEXT: [[T5:%.*]] = shl i32 [[T3]], [[T4]]
79 ; CHECK-NEXT: [[T5:%.*]] = shl i32 [[X]], [[T4]]
8080 ; CHECK-NEXT: ret i32 [[T5]]
8181 ;
8282 %t0 = add i32 %nbits, 1
108108 ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T2]])
109109 ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T3]])
110110 ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T4]])
111 ; CHECK-NEXT: [[T5:%.*]] = shl <3 x i32> [[T3]], [[T4]]
111 ; CHECK-NEXT: [[T5:%.*]] = shl <3 x i32> [[X]], [[T4]]
112112 ; CHECK-NEXT: ret <3 x i32> [[T5]]
113113 ;
114114 %t0 = add <3 x i32> %nbits,
137137 ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T2]])
138138 ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T3]])
139139 ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T4]])
140 ; CHECK-NEXT: [[T5:%.*]] = shl <3 x i32> [[T3]], [[T4]]
140 ; CHECK-NEXT: [[T5:%.*]] = shl <3 x i32> [[X]], [[T4]]
141141 ; CHECK-NEXT: ret <3 x i32> [[T5]]
142142 ;
143143 %t0 = add <3 x i32> %nbits,
165165 ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T2]])
166166 ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T3]])
167167 ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T4]])
168 ; CHECK-NEXT: [[T5:%.*]] = shl <3 x i32> [[T3]], [[T4]]
168 ; CHECK-NEXT: [[T5:%.*]] = shl <3 x i32> [[X]], [[T4]]
169169 ; CHECK-NEXT: ret <3 x i32> [[T5]]
170170 ;
171171 %t0 = add <3 x i32> %nbits,
197197 ; CHECK-NEXT: call void @use32(i32 [[T1]])
198198 ; CHECK-NEXT: call void @use32(i32 [[T2]])
199199 ; CHECK-NEXT: call void @use32(i32 [[T3]])
200 ; CHECK-NEXT: [[T4:%.*]] = shl i32 [[T2]], [[T3]]
200 ; CHECK-NEXT: [[T4:%.*]] = shl i32 [[X]], [[T3]]
201201 ; CHECK-NEXT: ret i32 [[T4]]
202202 ;
203203 %x = call i32 @gen32()
259259 ; CHECK-NEXT: call void @use32(i32 [[T3]])
260260 ; CHECK-NEXT: call void @use32(i32 [[T4]])
261261 ; CHECK-NEXT: call void @use32(i32 [[T5]])
262 ; CHECK-NEXT: [[T6:%.*]] = shl i32 [[T4]], [[T5]]
262 ; CHECK-NEXT: [[T6:%.*]] = shl i32 [[T1]], [[T5]]
263263 ; CHECK-NEXT: ret i32 [[T6]]
264264 ;
265265 %t0 = shl i32 -1, %nbits0
290290 ; CHECK-NEXT: call void @use32(i32 [[T1]])
291291 ; CHECK-NEXT: call void @use32(i32 [[T2]])
292292 ; CHECK-NEXT: call void @use32(i32 [[T3]])
293 ; CHECK-NEXT: [[T4:%.*]] = shl nuw i32 [[T2]], [[T3]]
293 ; CHECK-NEXT: [[T4:%.*]] = shl i32 [[X]], [[T3]]
294294 ; CHECK-NEXT: ret i32 [[T4]]
295295 ;
296296 %t0 = shl i32 -1, %nbits
315315 ; CHECK-NEXT: call void @use32(i32 [[T1]])
316316 ; CHECK-NEXT: call void @use32(i32 [[T2]])
317317 ; CHECK-NEXT: call void @use32(i32 [[T3]])
318 ; CHECK-NEXT: [[T4:%.*]] = shl nsw i32 [[T2]], [[T3]]
318 ; CHECK-NEXT: [[T4:%.*]] = shl i32 [[X]], [[T3]]
319319 ; CHECK-NEXT: ret i32 [[T4]]
320320 ;
321321 %t0 = shl i32 -1, %nbits
340340 ; CHECK-NEXT: call void @use32(i32 [[T1]])
341341 ; CHECK-NEXT: call void @use32(i32 [[T2]])
342342 ; CHECK-NEXT: call void @use32(i32 [[T3]])
343 ; CHECK-NEXT: [[T4:%.*]] = shl nuw nsw i32 [[T2]], [[T3]]
343 ; CHECK-NEXT: [[T4:%.*]] = shl i32 [[X]], [[T3]]
344344 ; CHECK-NEXT: ret i32 [[T4]]
345345 ;
346346 %t0 = shl i32 -1, %nbits