llvm.org GIT mirror llvm / 2f7417d
[AggressiveInstCombine] convert a chain of 'or-shift' bits into masked compare and (or (lshr X, C), ...), 1 --> (X & C') != 0 I initially thought about implementing the minimal pattern in instcombine as mentioned here: https://bugs.llvm.org/show_bug.cgi?id=37098#c6 ...but we need to do better to catch the more general sequence from the motivating test (more than 2 bits in the compare). And a test-suite run with statistics showed that this pattern only happened 2 times currently. It would potentially happen more often if reassociation worked better (D45842), but it's probably still not too frequent? This is small enough that I didn't see a need to create a whole new class/file within AggressiveInstCombine. There are likely other relatively small matchers like what was discussed in D44266 that would slide under foldUnusualPatterns() (name suggestions welcome). We could potentially also consolidate matchers for ctpop, bswap, etc under here. Differential Revision: https://reviews.llvm.org/D45986 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@331311 91177308-0d34-0410-b5e6-96231b3b80d8 Sanjay Patel 1 year, 4 months ago
3 changed file(s) with 114 addition(s) and 57 deletion(s). Raw diff Collapse all Expand all
1818 #include "llvm/Analysis/BasicAliasAnalysis.h"
1919 #include "llvm/Analysis/GlobalsModRef.h"
2020 #include "llvm/Analysis/TargetLibraryInfo.h"
21 #include "llvm/Analysis/Utils/Local.h"
2122 #include "llvm/IR/DataLayout.h"
2223 #include "llvm/IR/Dominators.h"
24 #include "llvm/IR/IRBuilder.h"
2325 #include "llvm/IR/LegacyPassManager.h"
26 #include "llvm/IR/PatternMatch.h"
2427 #include "llvm/Pass.h"
2528 using namespace llvm;
29 using namespace PatternMatch;
2630
2731 #define DEBUG_TYPE "aggressive-instcombine"
2832
5256 };
5357 } // namespace
5458
59 /// This is a recursive helper for 'and X, 1' that walks through a chain of 'or'
60 /// instructions looking for shift ops of a common source value (first member of
61 /// the pair). The second member of the pair is a mask constant for all of the
62 /// bits that are being compared. So this:
63 /// or (or (or X, (X >> 3)), (X >> 5)), (X >> 8)
64 /// returns {X, 0x129} and those are the operands of an 'and' that is compared
65 /// to zero.
66 static bool matchMaskedCmpOp(Value *V, std::pair &Result) {
67 // Recurse through a chain of 'or' operands.
68 Value *Op0, *Op1;
69 if (match(V, m_Or(m_Value(Op0), m_Value(Op1))))
70 return matchMaskedCmpOp(Op0, Result) && matchMaskedCmpOp(Op1, Result);
71
72 // We need a shift-right or a bare value representing a compare of bit 0 of
73 // the original source operand.
74 Value *Candidate;
75 uint64_t BitIndex = 0;
76 if (!match(V, m_LShr(m_Value(Candidate), m_ConstantInt(BitIndex))))
77 Candidate = V;
78
79 // Initialize result source operand.
80 if (!Result.first)
81 Result.first = Candidate;
82
83 // Fill in the mask bit derived from the shift constant.
84 Result.second |= (1 << BitIndex);
85 return Result.first == Candidate;
86 }
87
88 /// Match an 'and' of a chain of or-shifted bits from a common source value into
89 /// a masked compare:
90 /// and (or (lshr X, C), ...), 1 --> (X & C') != 0
91 static bool foldToMaskedCmp(Instruction &I) {
92 // TODO: This is only looking for 'any-bits-set' and 'all-bits-clear'.
93 // We should also match 'all-bits-set' and 'any-bits-clear' by looking for a
94 // a chain of 'and'.
95 if (!match(&I, m_And(m_OneUse(m_Or(m_Value(), m_Value())), m_One())))
96 return false;
97
98 std::pair
99 MaskOps(nullptr, APInt::getNullValue(I.getType()->getScalarSizeInBits()));
100 if (!matchMaskedCmpOp(cast(&I)->getOperand(0), MaskOps))
101 return false;
102
103 IRBuilder<> Builder(&I);
104 Value *Mask = Builder.CreateAnd(MaskOps.first, MaskOps.second);
105 Value *CmpZero = Builder.CreateIsNotNull(Mask);
106 Value *Zext = Builder.CreateZExt(CmpZero, I.getType());
107 I.replaceAllUsesWith(Zext);
108 return true;
109 }
110
111 /// This is the entry point for folds that could be implemented in regular
112 /// InstCombine, but they are separated because they are not expected to
113 /// occur frequently and/or have more than a constant-length pattern match.
114 static bool foldUnusualPatterns(Function &F, DominatorTree &DT) {
115 bool MadeChange = false;
116 for (BasicBlock &BB : F) {
117 // Ignore unreachable basic blocks.
118 if (!DT.isReachableFromEntry(&BB))
119 continue;
120 // Do not delete instructions under here and invalidate the iterator.
121 for (Instruction &I : BB)
122 MadeChange |= foldToMaskedCmp(I);
123 }
124
125 // We're done with transforms, so remove dead instructions.
126 if (MadeChange)
127 for (BasicBlock &BB : F)
128 SimplifyInstructionsInBlock(&BB);
129
130 return MadeChange;
131 }
132
133 /// This is the entry point for all transforms. Pass manager differences are
134 /// handled in the callers of this function.
135 static bool runImpl(Function &F, TargetLibraryInfo &TLI, DominatorTree &DT) {
136 bool MadeChange = false;
137 const DataLayout &DL = F.getParent()->getDataLayout();
138 TruncInstCombine TIC(TLI, DL, DT);
139 MadeChange |= TIC.run(F);
140 MadeChange |= foldUnusualPatterns(F, DT);
141 return MadeChange;
142 }
143
55144 void AggressiveInstCombinerLegacyPass::getAnalysisUsage(
56145 AnalysisUsage &AU) const {
57146 AU.setPreservesCFG();
64153 }
65154
66155 bool AggressiveInstCombinerLegacyPass::runOnFunction(Function &F) {
156 auto &TLI = getAnalysis().getTLI();
67157 auto &DT = getAnalysis().getDomTree();
68 auto &TLI = getAnalysis().getTLI();
69 auto &DL = F.getParent()->getDataLayout();
70
71 bool MadeIRChange = false;
72
73 // Handle TruncInst patterns
74 TruncInstCombine TIC(TLI, DL, DT);
75 MadeIRChange |= TIC.run(F);
76
77 // TODO: add more patterns to handle...
78
79 return MadeIRChange;
158 return runImpl(F, TLI, DT);
80159 }
81160
82161 PreservedAnalyses AggressiveInstCombinePass::run(Function &F,
83162 FunctionAnalysisManager &AM) {
163 auto &TLI = AM.getResult(F);
84164 auto &DT = AM.getResult(F);
85 auto &TLI = AM.getResult(F);
86 auto &DL = F.getParent()->getDataLayout();
87 bool MadeIRChange = false;
88
89 // Handle TruncInst patterns
90 TruncInstCombine TIC(TLI, DL, DT);
91 MadeIRChange |= TIC.run(F);
92 if (!MadeIRChange)
165 if (!runImpl(F, TLI, DT)) {
93166 // No changes, all analyses are preserved.
94167 return PreservedAnalyses::all();
95
168 }
96169 // Mark all the analyses that instcombine updates as preserved.
97170 PreservedAnalyses PA;
98171 PA.preserveSet();
44
55 define i32 @anyset_two_bit_mask(i32 %x) {
66 ; CHECK-LABEL: @anyset_two_bit_mask(
7 ; CHECK-NEXT: [[S:%.*]] = lshr i32 [[X:%.*]], 3
8 ; CHECK-NEXT: [[O:%.*]] = or i32 [[S]], [[X]]
9 ; CHECK-NEXT: [[R:%.*]] = and i32 [[O]], 1
10 ; CHECK-NEXT: ret i32 [[R]]
7 ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 9
8 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
9 ; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
10 ; CHECK-NEXT: ret i32 [[TMP3]]
1111 ;
1212 %s = lshr i32 %x, 3
1313 %o = or i32 %s, %x
1717
1818 define i32 @anyset_four_bit_mask(i32 %x) {
1919 ; CHECK-LABEL: @anyset_four_bit_mask(
20 ; CHECK-NEXT: [[T1:%.*]] = lshr i32 [[X:%.*]], 3
21 ; CHECK-NEXT: [[T2:%.*]] = lshr i32 [[X]], 5
22 ; CHECK-NEXT: [[T3:%.*]] = lshr i32 [[X]], 8
23 ; CHECK-NEXT: [[O1:%.*]] = or i32 [[T1]], [[X]]
24 ; CHECK-NEXT: [[O2:%.*]] = or i32 [[T2]], [[T3]]
25 ; CHECK-NEXT: [[O3:%.*]] = or i32 [[O1]], [[O2]]
26 ; CHECK-NEXT: [[R:%.*]] = and i32 [[O3]], 1
27 ; CHECK-NEXT: ret i32 [[R]]
20 ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 297
21 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
22 ; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
23 ; CHECK-NEXT: ret i32 [[TMP3]]
2824 ;
2925 %t1 = lshr i32 %x, 3
3026 %t2 = lshr i32 %x, 5
4036
4137 define i32 @anyset_three_bit_mask_all_shifted_bits(i32 %x) {
4238 ; CHECK-LABEL: @anyset_three_bit_mask_all_shifted_bits(
43 ; CHECK-NEXT: [[T1:%.*]] = lshr i32 [[X:%.*]], 3
44 ; CHECK-NEXT: [[T2:%.*]] = lshr i32 [[X]], 5
45 ; CHECK-NEXT: [[T3:%.*]] = lshr i32 [[X]], 8
46 ; CHECK-NEXT: [[O2:%.*]] = or i32 [[T2]], [[T3]]
47 ; CHECK-NEXT: [[O3:%.*]] = or i32 [[T1]], [[O2]]
48 ; CHECK-NEXT: [[R:%.*]] = and i32 [[O3]], 1
49 ; CHECK-NEXT: ret i32 [[R]]
39 ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 296
40 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
41 ; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
42 ; CHECK-NEXT: ret i32 [[TMP3]]
5043 ;
5144 %t1 = lshr i32 %x, 3
5245 %t2 = lshr i32 %x, 5
1717
1818 define i32 @allclear(i32 %a) {
1919 ; CHECK-LABEL: @allclear(
20 ; CHECK-NEXT: [[BF_LSHR:%.*]] = lshr i32 [[A:%.*]], 1
21 ; CHECK-NEXT: [[BF_CLEAR1:%.*]] = or i32 [[BF_LSHR]], [[A]]
22 ; CHECK-NEXT: [[BF_LSHR5:%.*]] = lshr i32 [[A]], 2
23 ; CHECK-NEXT: [[OR2:%.*]] = or i32 [[BF_CLEAR1]], [[BF_LSHR5]]
24 ; CHECK-NEXT: [[BF_LSHR10:%.*]] = lshr i32 [[A]], 3
25 ; CHECK-NEXT: [[OR83:%.*]] = or i32 [[OR2]], [[BF_LSHR10]]
26 ; CHECK-NEXT: [[OR13:%.*]] = and i32 [[OR83]], 1
27 ; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[OR13]], 1
28 ; CHECK-NEXT: ret i32 [[TMP1]]
20 ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 15
21 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
22 ; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
23 ; CHECK-NEXT: ret i32 [[TMP3]]
2924 ;
3025 %a.sroa.0.0.trunc = trunc i32 %a to i8
3126 %a.sroa.5.0.shift = lshr i32 %a, 8
5045
5146 define i32 @anyset(i32 %a) {
5247 ; CHECK-LABEL: @anyset(
53 ; CHECK-NEXT: [[BF_LSHR:%.*]] = lshr i32 [[A:%.*]], 1
54 ; CHECK-NEXT: [[BF_CLEAR1:%.*]] = or i32 [[BF_LSHR]], [[A]]
55 ; CHECK-NEXT: [[BF_LSHR5:%.*]] = lshr i32 [[A]], 2
56 ; CHECK-NEXT: [[OR2:%.*]] = or i32 [[BF_CLEAR1]], [[BF_LSHR5]]
57 ; CHECK-NEXT: [[BF_LSHR10:%.*]] = lshr i32 [[A]], 3
58 ; CHECK-NEXT: [[OR83:%.*]] = or i32 [[OR2]], [[BF_LSHR10]]
59 ; CHECK-NEXT: [[OR13:%.*]] = and i32 [[OR83]], 1
60 ; CHECK-NEXT: ret i32 [[OR13]]
48 ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 15
49 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
50 ; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
51 ; CHECK-NEXT: ret i32 [[TMP3]]
6152 ;
6253 %a.sroa.0.0.trunc = trunc i32 %a to i8
6354 %a.sroa.5.0.shift = lshr i32 %a, 8