llvm.org GIT mirror llvm / 722ea18
[Reassociate] swap binop operands to increase factoring potential If we have a pair of binops feeding another pair of binops, rearrange the operands so the matching pair are together because that allows easy factorization folds to happen in instcombine: ((X << S) & Y) & (Z << S) --> ((X << S) & (Z << S)) & Y (reassociation) --> ((X & Z) << S) & Y (factorize shift from 'and' ops optimization) This is part of solving PR37098: https://bugs.llvm.org/show_bug.cgi?id=37098 Note that there's an instcombine version of this patch attached there, but we're trying to make instcombine have less responsibility to improve compile-time efficiency. For reasons I still don't completely understand, reassociate does this kind of transform sometimes, but misses everything in my motivating cases. This patch on its own is gluing an independent cleanup chunk to the end of the existing RewriteExprTree() loop. We can build on it and do something stronger to better order the full expression tree like D40049. That might be an alternative to the proposal to add a separate reassociation pass like D41574. Differential Revision: https://reviews.llvm.org/D45842 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@341288 91177308-0d34-0410-b5e6-96231b3b80d8 Sanjay Patel 1 year, 19 days ago
3 changed file(s) with 111 addition(s) and 36 deletion(s). Raw diff Collapse all Expand all
117117 void OptimizeInst(Instruction *I);
118118 Instruction *canonicalizeNegConstExpr(Instruction *I);
119119 void BuildPairMap(ReversePostOrderTraversal &RPOT);
120 void swapOperandsToMatchBinops(BinaryOperator &B);
120121 };
121122
122123 } // end namespace llvm
6262
6363 using namespace llvm;
6464 using namespace reassociate;
65 using namespace PatternMatch;
6566
6667 #define DEBUG_TYPE "reassociate"
6768
21302131 ReassociateExpression(BO);
21312132 }
21322133
2134 /// If we have an associative pair of binops with the same opcode and 2 of the 3
2135 /// operands to that pair of binops are some other matching binop, rearrange the
2136 /// operands of the associative binops so the matching ops are paired together.
2137 /// This transform creates factoring opportunities by pairing opcodes.
2138 /// TODO: Should those factoring optimizations be handled here or InstCombine?
2139 /// Example:
2140 /// ((X << S) & Y) & (Z << S) --> ((X << S) & (Z << S)) & Y (reassociation)
2141 /// --> ((X & Z) << S) & Y (factorize shift from 'and' ops optimization)
2142 void ReassociatePass::swapOperandsToMatchBinops(BinaryOperator &B) {
2143 BinaryOperator *B0, *B1;
2144 if (!B.isAssociative() || !B.isCommutative() ||
2145 !match(&B, m_BinOp(m_BinOp(B0), m_BinOp(B1))))
2146 return;
2147
2148 // We have (B0 op B1) where both operands are also binops.
2149 // Canonicalize a binop with the same opcode as the parent binop (B) to B0 and
2150 // a binop with a different opcode to B1.
2151 Instruction::BinaryOps TopOpc = B.getOpcode();
2152 if (B0->getOpcode() != TopOpc)
2153 std::swap(B0, B1);
2154
2155 // If (1) we don't have a pair of binops with the same opcode or (2) B0 and B1
2156 // already have the same opcode, there is nothing to do. If the binop with the
2157 // same opcode (B0) has more than one use, reassociation would result in more
2158 // instructions, so bail out.
2159 Instruction::BinaryOps OtherOpc = B1->getOpcode();
2160 if (B0->getOpcode() != TopOpc || !B0->hasOneUse() || OtherOpc == TopOpc)
2161 return;
2162
2163 // Canonicalize a binop that matches B1 to V00 (operand 0 of B0) and a value
2164 // that does not match B1 to V01.
2165 Value *V00 = B0->getOperand(0), *V01 = B0->getOperand(1);
2166 if (!match(V00, m_BinOp()) ||
2167 cast(V00)->getOpcode() != OtherOpc)
2168 std::swap(V00, V01);
2169
2170 // We need a binop with the same opcode in V00, and a value with a different
2171 // opcode in V01.
2172 BinaryOperator *B00, *B01;
2173 if (!match(V00, m_BinOp(B00)) || B00->getOpcode() != OtherOpc ||
2174 (match(V01, m_BinOp(B01)) && B01->getOpcode() == OtherOpc))
2175 return;
2176
2177 // B00 and B1 are displaced matching binops, so pull them together:
2178 // (B00 & V01) & B1 --> (B00 & B1) & V01
2179 IRBuilder<> Builder(&B);
2180 Builder.SetInstDebugLocation(&B);
2181 Value *NewBO1 = Builder.CreateBinOp(TopOpc, B00, B1);
2182 Value *NewBO2 = Builder.CreateBinOp(TopOpc, NewBO1, V01);
2183
2184 // Fast-math-flags propagate from B; wrapping flags are cleared.
2185 if (auto *I1 = dyn_cast(NewBO1))
2186 I1->copyIRFlags(&B, false);
2187 if (auto *I2 = dyn_cast(NewBO2))
2188 I2->copyIRFlags(&B, false);
2189
2190 B.replaceAllUsesWith(NewBO2);
2191 return;
2192 }
2193
21332194 void ReassociatePass::ReassociateExpression(BinaryOperator *I) {
21342195 // First, walk the expression tree, linearizing the tree, collecting the
21352196 // operand information.
22492310 // Now that we ordered and optimized the expressions, splat them back into
22502311 // the expression tree, removing any unneeded nodes.
22512312 RewriteExprTree(I, Ops);
2313
2314 // Try a final reassociation of the root of the tree.
2315 swapOperandsToMatchBinops(*I);
22522316 }
22532317
22542318 void
1515 ; CHECK-LABEL: @and_shl(
1616 ; CHECK-NEXT: [[SX:%.*]] = shl i8 [[X:%.*]], [[SHAMT:%.*]]
1717 ; CHECK-NEXT: [[SY:%.*]] = shl i8 [[Y:%.*]], [[SHAMT]]
18 ; CHECK-NEXT: [[A:%.*]] = and i8 [[SX]], [[Z:%.*]]
19 ; CHECK-NEXT: [[R:%.*]] = and i8 [[A]], [[SY]]
18 ; CHECK-NEXT: [[A:%.*]] = and i8 [[SX]], [[SY]]
19 ; CHECK-NEXT: [[R:%.*]] = and i8 [[A]], [[Z:%.*]]
2020 ; CHECK-NEXT: ret i8 [[R]]
2121 ;
2222 %sx = shl i8 %x, %shamt
3030 ; CHECK-LABEL: @or_shl(
3131 ; CHECK-NEXT: [[SX:%.*]] = shl i8 [[X:%.*]], [[SHAMT:%.*]]
3232 ; CHECK-NEXT: [[SY:%.*]] = shl i8 [[Y:%.*]], [[SHAMT]]
33 ; CHECK-NEXT: [[A:%.*]] = or i8 [[SX]], [[Z:%.*]]
34 ; CHECK-NEXT: [[R:%.*]] = or i8 [[A]], [[SY]]
33 ; CHECK-NEXT: [[A:%.*]] = or i8 [[SX]], [[SY]]
34 ; CHECK-NEXT: [[R:%.*]] = or i8 [[A]], [[Z:%.*]]
3535 ; CHECK-NEXT: ret i8 [[R]]
3636 ;
3737 %sx = shl i8 %x, %shamt
4545 ; CHECK-LABEL: @xor_shl(
4646 ; CHECK-NEXT: [[SX:%.*]] = shl i8 [[X:%.*]], [[SHAMT:%.*]]
4747 ; CHECK-NEXT: [[SY:%.*]] = shl i8 [[Y:%.*]], [[SHAMT]]
48 ; CHECK-NEXT: [[A:%.*]] = xor i8 [[SX]], [[Z:%.*]]
49 ; CHECK-NEXT: [[R:%.*]] = xor i8 [[A]], [[SY]]
48 ; CHECK-NEXT: [[A:%.*]] = xor i8 [[SX]], [[SY]]
49 ; CHECK-NEXT: [[R:%.*]] = xor i8 [[A]], [[Z:%.*]]
5050 ; CHECK-NEXT: ret i8 [[R]]
5151 ;
5252 %sx = shl i8 %x, %shamt
6060 ; CHECK-LABEL: @and_lshr(
6161 ; CHECK-NEXT: [[SX:%.*]] = lshr i8 [[X:%.*]], [[SHAMT:%.*]]
6262 ; CHECK-NEXT: [[SY:%.*]] = lshr i8 [[Y:%.*]], [[SHAMT]]
63 ; CHECK-NEXT: [[A:%.*]] = and i8 [[SX]], [[Z:%.*]]
64 ; CHECK-NEXT: [[R:%.*]] = and i8 [[A]], [[SY]]
63 ; CHECK-NEXT: [[A:%.*]] = and i8 [[SX]], [[SY]]
64 ; CHECK-NEXT: [[R:%.*]] = and i8 [[A]], [[Z:%.*]]
6565 ; CHECK-NEXT: ret i8 [[R]]
6666 ;
6767 %sx = lshr i8 %x, %shamt
7575 ; CHECK-LABEL: @or_lshr(
7676 ; CHECK-NEXT: [[SX:%.*]] = lshr i8 [[X:%.*]], [[SHAMT:%.*]]
7777 ; CHECK-NEXT: [[SY:%.*]] = lshr i8 [[Y:%.*]], [[SHAMT]]
78 ; CHECK-NEXT: [[A:%.*]] = or i8 [[SX]], [[Z:%.*]]
79 ; CHECK-NEXT: [[R:%.*]] = or i8 [[A]], [[SY]]
78 ; CHECK-NEXT: [[A:%.*]] = or i8 [[SX]], [[SY]]
79 ; CHECK-NEXT: [[R:%.*]] = or i8 [[A]], [[Z:%.*]]
8080 ; CHECK-NEXT: ret i8 [[R]]
8181 ;
8282 %sx = lshr i8 %x, %shamt
9090 ; CHECK-LABEL: @xor_lshr(
9191 ; CHECK-NEXT: [[SX:%.*]] = lshr i8 [[X:%.*]], [[SHAMT:%.*]]
9292 ; CHECK-NEXT: [[SY:%.*]] = lshr i8 [[Y:%.*]], [[SHAMT]]
93 ; CHECK-NEXT: [[A:%.*]] = xor i8 [[SX]], [[Z:%.*]]
94 ; CHECK-NEXT: [[R:%.*]] = xor i8 [[A]], [[SY]]
93 ; CHECK-NEXT: [[A:%.*]] = xor i8 [[SX]], [[SY]]
94 ; CHECK-NEXT: [[R:%.*]] = xor i8 [[A]], [[Z:%.*]]
9595 ; CHECK-NEXT: ret i8 [[R]]
9696 ;
9797 %sx = lshr i8 %x, %shamt
105105 ; CHECK-LABEL: @and_ashr(
106106 ; CHECK-NEXT: [[SX:%.*]] = ashr i8 [[X:%.*]], [[SHAMT:%.*]]
107107 ; CHECK-NEXT: [[SY:%.*]] = ashr i8 [[Y:%.*]], [[SHAMT]]
108 ; CHECK-NEXT: [[A:%.*]] = and i8 [[SX]], [[Z:%.*]]
109 ; CHECK-NEXT: [[R:%.*]] = and i8 [[A]], [[SY]]
108 ; CHECK-NEXT: [[A:%.*]] = and i8 [[SX]], [[SY]]
109 ; CHECK-NEXT: [[R:%.*]] = and i8 [[A]], [[Z:%.*]]
110110 ; CHECK-NEXT: ret i8 [[R]]
111111 ;
112112 %sx = ashr i8 %x, %shamt
120120 ; CHECK-LABEL: @or_ashr(
121121 ; CHECK-NEXT: [[SX:%.*]] = ashr i8 [[X:%.*]], [[SHAMT:%.*]]
122122 ; CHECK-NEXT: [[SY:%.*]] = ashr i8 [[Y:%.*]], [[SHAMT]]
123 ; CHECK-NEXT: [[A:%.*]] = or i8 [[SX]], [[Z:%.*]]
124 ; CHECK-NEXT: [[R:%.*]] = or i8 [[A]], [[SY]]
123 ; CHECK-NEXT: [[A:%.*]] = or i8 [[SX]], [[SY]]
124 ; CHECK-NEXT: [[R:%.*]] = or i8 [[A]], [[Z:%.*]]
125125 ; CHECK-NEXT: ret i8 [[R]]
126126 ;
127127 %sx = ashr i8 %x, %shamt
137137 ; CHECK-LABEL: @xor_ashr(
138138 ; CHECK-NEXT: [[SX:%.*]] = ashr <2 x i8> [[X:%.*]], [[SHAMT:%.*]]
139139 ; CHECK-NEXT: [[SY:%.*]] = ashr <2 x i8> [[Y:%.*]], [[SHAMT]]
140 ; CHECK-NEXT: [[A:%.*]] = xor <2 x i8> [[SX]], [[Z:%.*]]
141 ; CHECK-NEXT: [[R:%.*]] = xor <2 x i8> [[A]], [[SY]]
140 ; CHECK-NEXT: [[A:%.*]] = xor <2 x i8> [[SX]], [[SY]]
141 ; CHECK-NEXT: [[R:%.*]] = xor <2 x i8> [[A]], [[Z:%.*]]
142142 ; CHECK-NEXT: ret <2 x i8> [[R]]
143143 ;
144144 %sx = ashr <2 x i8> %x, %shamt
202202 }
203203
204204 ; Math ops work too. Change instruction positions too to verify placement.
205 ; We only care about extra uses of the first associative value - in this
206 ; case, it's %a. Everything else can have extra uses.
207
208 declare void @use(i8)
205209
206210 define i8 @add_lshr(i8 %x, i8 %y, i8 %z, i8 %shamt) {
207211 ; CHECK-LABEL: @add_lshr(
208212 ; CHECK-NEXT: [[SX:%.*]] = lshr i8 [[X:%.*]], [[SHAMT:%.*]]
209 ; CHECK-NEXT: [[A:%.*]] = add i8 [[SX]], [[Z:%.*]]
210 ; CHECK-NEXT: [[SY:%.*]] = lshr i8 [[Y:%.*]], [[SHAMT]]
211 ; CHECK-NEXT: [[R:%.*]] = add i8 [[A]], [[SY]]
212 ; CHECK-NEXT: ret i8 [[R]]
213 ; CHECK-NEXT: [[SY:%.*]] = lshr i8 [[Y:%.*]], [[SHAMT]]
214 ; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[SX]], [[SY]]
215 ; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[TMP1]], [[Z:%.*]]
216 ; CHECK-NEXT: call void @use(i8 [[SX]])
217 ; CHECK-NEXT: call void @use(i8 [[SY]])
218 ; CHECK-NEXT: call void @use(i8 [[TMP2]])
219 ; CHECK-NEXT: ret i8 [[TMP2]]
213220 ;
214221 %sx = lshr i8 %x, %shamt
215222 %a = add i8 %sx, %z
216223 %sy = lshr i8 %y, %shamt
217224 %r = add i8 %a, %sy
225 call void @use(i8 %sx)
226 call void @use(i8 %sy)
227 call void @use(i8 %r)
218228 ret i8 %r
219229 }
220230
224234 ; CHECK-LABEL: @mul_sub(
225235 ; CHECK-NEXT: [[SX:%.*]] = sub i8 [[X:%.*]], [[M:%.*]]
226236 ; CHECK-NEXT: [[SY:%.*]] = sub i8 [[Y:%.*]], [[M]]
227 ; CHECK-NEXT: [[A:%.*]] = mul nsw i8 [[SX]], [[Z:%.*]]
228 ; CHECK-NEXT: [[R:%.*]] = mul nuw i8 [[A]], [[SY]]
237 ; CHECK-NEXT: [[A:%.*]] = mul i8 [[SX]], [[SY]]
238 ; CHECK-NEXT: [[R:%.*]] = mul i8 [[A]], [[Z:%.*]]
229239 ; CHECK-NEXT: ret i8 [[R]]
230240 ;
231241 %sx = sub i8 %x, %m
238248 define i8 @add_mul(i8 %x, i8 %y, i8 %z, i8 %m) {
239249 ; CHECK-LABEL: @add_mul(
240250 ; CHECK-NEXT: [[SX:%.*]] = mul nuw i8 [[X:%.*]], 42
241 ; CHECK-NEXT: [[A:%.*]] = add nuw i8 [[Z:%.*]], [[SX]]
242251 ; CHECK-NEXT: [[SY:%.*]] = mul nsw i8 [[M:%.*]], [[Y:%.*]]
243 ; CHECK-NEXT: [[R:%.*]] = add nsw i8 [[A]], [[SY]]
252 ; CHECK-NEXT: [[A:%.*]] = add i8 [[SX]], [[SY]]
253 ; CHECK-NEXT: [[R:%.*]] = add i8 [[A]], [[Z:%.*]]
244254 ; CHECK-NEXT: ret i8 [[R]]
245255 ;
246256 %sx = mul nuw i8 %x, 42
256266 define float @fadd_fmul(float %x, float %y, float %z, float %m) {
257267 ; CHECK-LABEL: @fadd_fmul(
258268 ; CHECK-NEXT: [[SX:%.*]] = fmul float [[X:%.*]], [[M:%.*]]
259 ; CHECK-NEXT: [[A:%.*]] = fadd fast float [[SX]], [[Z:%.*]]
260269 ; CHECK-NEXT: [[SY:%.*]] = fmul float [[Y:%.*]], [[M]]
261 ; CHECK-NEXT: [[R:%.*]] = fadd fast float [[A]], [[SY]]
270 ; CHECK-NEXT: [[A:%.*]] = fadd fast float [[SX]], [[SY]]
271 ; CHECK-NEXT: [[R:%.*]] = fadd fast float [[A]], [[Z:%.*]]
262272 ; CHECK-NEXT: ret float [[R]]
263273 ;
264274 %sx = fmul float %x, %m
272282 ; CHECK-LABEL: @fmul_fdiv(
273283 ; CHECK-NEXT: [[SX:%.*]] = fdiv float [[X:%.*]], [[M:%.*]]
274284 ; CHECK-NEXT: [[SY:%.*]] = fdiv float [[Y:%.*]], 4.200000e+01
275 ; CHECK-NEXT: [[A:%.*]] = fmul fast float [[SY]], [[Z:%.*]]
276 ; CHECK-NEXT: [[R:%.*]] = fmul fast float [[A]], [[SX]]
285 ; CHECK-NEXT: [[A:%.*]] = fmul fast float [[SY]], [[SX]]
286 ; CHECK-NEXT: [[R:%.*]] = fmul fast float [[A]], [[Z:%.*]]
277287 ; CHECK-NEXT: ret float [[R]]
278288 ;
279289 %sx = fdiv float %x, %m
283293 ret float %r
284294 }
285295
286 ; Verify that debug info for modified instructions gets discarded (references become undef).
296 ; Verify that debug info for modified instructions is not invalid.
287297
288298 define i32 @and_shl_dbg(i32 %x, i32 %y, i32 %z, i32 %shamt) {
289299 ; CHECK-LABEL: @and_shl_dbg(
295305 ; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[SHL]], metadata !16, metadata !DIExpression()), !dbg !25
296306 ; CHECK-NEXT: [[SHL1:%.*]] = shl i32 [[Y]], [[SHAMT]], !dbg !26
297307 ; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[SHL1]], metadata !17, metadata !DIExpression()), !dbg !27
298 ; CHECK-NEXT: [[AND:%.*]] = and i32 [[SHL]], [[Z]], !dbg !28
299 ; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[AND]], metadata !18, metadata !DIExpression()), !dbg !29
300 ; CHECK-NEXT: [[AND2:%.*]] = and i32 [[AND]], [[SHL1]], !dbg !30
301 ; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[AND2]], metadata !19, metadata !DIExpression()), !dbg !31
302 ; CHECK-NEXT: ret i32 [[AND2]], !dbg !32
308 ; CHECK-NEXT: call void @llvm.dbg.value(metadata !2, metadata !18, metadata !DIExpression()), !dbg !28
309 ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[SHL]], [[SHL1]], !dbg !29
310 ; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[Z]], !dbg !29
311 ; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP2]], metadata !19, metadata !DIExpression()), !dbg !30
312 ; CHECK-NEXT: ret i32 [[TMP2]], !dbg !31
303313 ;
304314 call void @llvm.dbg.value(metadata i32 %x, metadata !13, metadata !DIExpression()), !dbg !21
305315 call void @llvm.dbg.value(metadata i32 %y, metadata !14, metadata !DIExpression()), !dbg !22