llvm.org GIT mirror llvm / 934ad58
[InstCombine] Dropping redundant masking before left-shift [0/5] (PR42563) Summary: If we have some pattern that leaves only some low bits set, and then performs left-shift of those bits, if none of the bits that are left after the final shift are modified by the mask, we can omit the mask. There are many variants to this pattern: a. `(x & ((1 << MaskShAmt) - 1)) << ShiftShAmt` All these patterns can be simplified to just: `x << ShiftShAmt` iff: a. `(MaskShAmt+ShiftShAmt) u>= bitwidth(x)` alive proof: a: https://rise4fun.com/Alive/wi9 Indeed, not all of these patterns are canonical. But since this fold will only produce a single instruction i'm really interested in handling even uncanonical patterns, since i have this general kind of pattern in hotpaths, and it is not totally outlandish for bit-twiddling code. For now let's start with patterns where both shift amounts are variable, with trivial constant "offset" between them, since i believe this is both simplest to handle and i think this is most common. But again, there are likely other variants where we could use ValueTracking/ConstantRange to handle more cases. https://bugs.llvm.org/show_bug.cgi?id=42563 Reviewers: spatel, nikic, huihuiz, xbolva00 Reviewed By: xbolva00 Subscribers: efriedma, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D64512 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@366535 91177308-0d34-0410-b5e6-96231b3b80d8 Roman Lebedev a month ago
2 changed file(s) with 61 addition(s) and 11 deletion(s). Raw diff Collapse all Expand all
6464 return NewShift;
6565 }
6666
67 // If we have some pattern that leaves only some low bits set, and then performs
68 // left-shift of those bits, if none of the bits that are left after the final
69 // shift are modified by the mask, we can omit the mask.
70 //
71 // There are many variants to this pattern:
72 // a) (x & ((1 << MaskShAmt) - 1)) << ShiftShAmt
73 // All these patterns can be simplified to just:
74 // x << ShiftShAmt
75 // iff:
76 // a) (MaskShAmt+ShiftShAmt) u>= bitwidth(x)
77 static Instruction *
78 dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift,
79 const SimplifyQuery &SQ) {
80 assert(OuterShift->getOpcode() == Instruction::BinaryOps::Shl &&
81 "The input must be 'shl'!");
82
83 Value *Masked = OuterShift->getOperand(0);
84 Value *ShiftShAmt = OuterShift->getOperand(1);
85
86 Value *MaskShAmt;
87
88 // ((1 << MaskShAmt) - 1)
89 auto MaskA = m_Add(m_Shl(m_One(), m_Value(MaskShAmt)), m_AllOnes());
90
91 Value *X;
92 if (!match(Masked, m_c_And(MaskA, m_Value(X))))
93 return nullptr;
94
95 // Can we simplify (MaskShAmt+ShiftShAmt) ?
96 Value *SumOfShAmts =
97 SimplifyAddInst(MaskShAmt, ShiftShAmt, /*IsNSW=*/false, /*IsNUW=*/false,
98 SQ.getWithInstruction(OuterShift));
99 if (!SumOfShAmts)
100 return nullptr; // Did not simplify.
101 // Is the total shift amount *not* smaller than the bit width?
102 // FIXME: could also rely on ConstantRange.
103 unsigned BitWidth = X->getType()->getScalarSizeInBits();
104 if (!match(SumOfShAmts, m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_UGE,
105 APInt(BitWidth, BitWidth))))
106 return nullptr;
107 // All good, we can do this fold.
108
109 // No 'NUW'/'NSW'!
110 // We no longer know that we won't shift-out non-0 bits.
111 return BinaryOperator::Create(OuterShift->getOpcode(), X, ShiftShAmt);
112 }
113
67114 Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
68115 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
69116 assert(Op0->getType() == Op1->getType());
628675 if (Instruction *V = commonShiftTransforms(I))
629676 return V;
630677
678 if (Instruction *V = dropRedundantMaskingOfLeftShiftInput(&I, SQ))
679 return V;
680
631681 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
632682 Type *Ty = I.getType();
633683 unsigned BitWidth = Ty->getScalarSizeInBits();
2424 ; CHECK-NEXT: call void @use32(i32 [[T1]])
2525 ; CHECK-NEXT: call void @use32(i32 [[T2]])
2626 ; CHECK-NEXT: call void @use32(i32 [[T3]])
27 ; CHECK-NEXT: [[T4:%.*]] = shl i32 [[T2]], [[T3]]
27 ; CHECK-NEXT: [[T4:%.*]] = shl i32 [[X]], [[T3]]
2828 ; CHECK-NEXT: ret i32 [[T4]]
2929 ;
3030 %t0 = shl i32 1, %nbits
4949 ; CHECK-NEXT: call void @use32(i32 [[T1]])
5050 ; CHECK-NEXT: call void @use32(i32 [[T2]])
5151 ; CHECK-NEXT: call void @use32(i32 [[T3]])
52 ; CHECK-NEXT: [[T4:%.*]] = shl i32 [[T2]], [[T3]]
52 ; CHECK-NEXT: [[T4:%.*]] = shl i32 [[X]], [[T3]]
5353 ; CHECK-NEXT: ret i32 [[T4]]
5454 ;
5555 %t0 = shl i32 1, %nbits
7676 ; CHECK-NEXT: call void @use32(i32 [[T2]])
7777 ; CHECK-NEXT: call void @use32(i32 [[T3]])
7878 ; CHECK-NEXT: call void @use32(i32 [[T4]])
79 ; CHECK-NEXT: [[T5:%.*]] = shl i32 [[T3]], [[T4]]
79 ; CHECK-NEXT: [[T5:%.*]] = shl i32 [[X]], [[T4]]
8080 ; CHECK-NEXT: ret i32 [[T5]]
8181 ;
8282 %t0 = add i32 %nbits, 1
108108 ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T2]])
109109 ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T3]])
110110 ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T4]])
111 ; CHECK-NEXT: [[T5:%.*]] = shl <3 x i32> [[T3]], [[T4]]
111 ; CHECK-NEXT: [[T5:%.*]] = shl <3 x i32> [[X]], [[T4]]
112112 ; CHECK-NEXT: ret <3 x i32> [[T5]]
113113 ;
114114 %t0 = add <3 x i32> %nbits,
137137 ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T2]])
138138 ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T3]])
139139 ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T4]])
140 ; CHECK-NEXT: [[T5:%.*]] = shl <3 x i32> [[T3]], [[T4]]
140 ; CHECK-NEXT: [[T5:%.*]] = shl <3 x i32> [[X]], [[T4]]
141141 ; CHECK-NEXT: ret <3 x i32> [[T5]]
142142 ;
143143 %t0 = add <3 x i32> %nbits,
165165 ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T2]])
166166 ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T3]])
167167 ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T4]])
168 ; CHECK-NEXT: [[T5:%.*]] = shl <3 x i32> [[T3]], [[T4]]
168 ; CHECK-NEXT: [[T5:%.*]] = shl <3 x i32> [[X]], [[T4]]
169169 ; CHECK-NEXT: ret <3 x i32> [[T5]]
170170 ;
171171 %t0 = add <3 x i32> %nbits,
197197 ; CHECK-NEXT: call void @use32(i32 [[T1]])
198198 ; CHECK-NEXT: call void @use32(i32 [[T2]])
199199 ; CHECK-NEXT: call void @use32(i32 [[T3]])
200 ; CHECK-NEXT: [[T4:%.*]] = shl i32 [[T2]], [[T3]]
200 ; CHECK-NEXT: [[T4:%.*]] = shl i32 [[X]], [[T3]]
201201 ; CHECK-NEXT: ret i32 [[T4]]
202202 ;
203203 %x = call i32 @gen32()
259259 ; CHECK-NEXT: call void @use32(i32 [[T3]])
260260 ; CHECK-NEXT: call void @use32(i32 [[T4]])
261261 ; CHECK-NEXT: call void @use32(i32 [[T5]])
262 ; CHECK-NEXT: [[T6:%.*]] = shl i32 [[T4]], [[T5]]
262 ; CHECK-NEXT: [[T6:%.*]] = shl i32 [[T1]], [[T5]]
263263 ; CHECK-NEXT: ret i32 [[T6]]
264264 ;
265265 %t0 = shl i32 1, %nbits0
290290 ; CHECK-NEXT: call void @use32(i32 [[T1]])
291291 ; CHECK-NEXT: call void @use32(i32 [[T2]])
292292 ; CHECK-NEXT: call void @use32(i32 [[T3]])
293 ; CHECK-NEXT: [[T4:%.*]] = shl nuw i32 [[T2]], [[T3]]
293 ; CHECK-NEXT: [[T4:%.*]] = shl i32 [[X]], [[T3]]
294294 ; CHECK-NEXT: ret i32 [[T4]]
295295 ;
296296 %t0 = shl i32 1, %nbits
315315 ; CHECK-NEXT: call void @use32(i32 [[T1]])
316316 ; CHECK-NEXT: call void @use32(i32 [[T2]])
317317 ; CHECK-NEXT: call void @use32(i32 [[T3]])
318 ; CHECK-NEXT: [[T4:%.*]] = shl nsw i32 [[T2]], [[T3]]
318 ; CHECK-NEXT: [[T4:%.*]] = shl i32 [[X]], [[T3]]
319319 ; CHECK-NEXT: ret i32 [[T4]]
320320 ;
321321 %t0 = shl i32 1, %nbits
340340 ; CHECK-NEXT: call void @use32(i32 [[T1]])
341341 ; CHECK-NEXT: call void @use32(i32 [[T2]])
342342 ; CHECK-NEXT: call void @use32(i32 [[T3]])
343 ; CHECK-NEXT: [[T4:%.*]] = shl nuw nsw i32 [[T2]], [[T3]]
343 ; CHECK-NEXT: [[T4:%.*]] = shl i32 [[X]], [[T3]]
344344 ; CHECK-NEXT: ret i32 [[T4]]
345345 ;
346346 %t0 = shl i32 1, %nbits