llvm.org GIT mirror llvm / 7bc6001
[SimplifyCFG] Rewrite SinkThenElseCodeToEnd The new version has several advantages: 1) IMSHO it's more readable and neater 2) It handles loads and stores properly 3) It can handle any number of incoming blocks rather than just two. I'll be taking advantage of this in a followup patch. With this change we can now finally sink load-modify-store idioms such as: if (a) return *b += 3; else return *b += 4; => %z = load i32, i32* %y %.sink = select i1 %a, i32 5, i32 7 %b = add i32 %z, %.sink store i32 %b, i32* %y ret i32 %b When this works for switches it'll be even more powerful. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@278660 91177308-0d34-0410-b5e6-96231b3b80d8 James Molloy 4 years ago
5 changed file(s) with 395 addition(s) and 159 deletion(s). Raw diff Collapse all Expand all
13181318 return true;
13191319 }
13201320
1321 // Return true if V0 and V1 are equivalent. This handles the obvious cases
1322 // where V0 == V1 and V0 and V1 are both identical instructions, but also
1323 // handles loads and stores with identical operands.
1324 //
1325 // Because determining if two memory instructions are equivalent
1326 // depends on control flow, the \c At0 and \c At1 parameters specify a
1327 // location for the query. This function is essentially answering the
1328 // query "If V0 were moved to At0, and V1 were moved to At1, are V0 and V1
1329 // equivalent?". In practice this means checking that moving V0 to At0
1330 // doesn't cross any other memory instructions.
1331 static bool areValuesTriviallySame(Value *V0, BasicBlock::const_iterator At0,
1332 Value *V1, BasicBlock::const_iterator At1) {
1333 if (V0 == V1)
1334 return true;
1335
1336 // Also check for instructions that are identical but not pointer-identical.
1337 // This can include load instructions that haven't been CSE'd.
1338 if (!isa(V0) || !isa(V1))
1339 return false;
1340 const auto *I0 = cast(V0);
1341 const auto *I1 = cast(V1);
1342 if (!I0->isIdenticalToWhenDefined(I1))
1343 return false;
1344
1345 if (!I0->mayReadOrWriteMemory())
1346 return true;
1347
1348 // Instructions that may read or write memory have extra restrictions. We
1349 // must ensure we don't treat %a and %b as equivalent in code such as:
1350 //
1351 // %a = load %x
1352 // store %x, 1
1353 // if (%c) {
1354 // %b = load %x
1355 // %d = add %b, 1
1356 // } else {
1357 // %d = add %a, 1
1358 // }
1359
1360 // Be conservative. We don't want to search the entire CFG between def
1361 // and use; if the def isn't in the same block as the use just bail.
1362 if (I0->getParent() != At0->getParent() ||
1363 I1->getParent() != At1->getParent())
1364 return false;
1365
1366 // Again, be super conservative. Ideally we'd be able to query AliasAnalysis
1367 // but we currently don't have that available.
1368 auto WritesMemory = [](const Instruction &I) {
1369 return I.mayReadOrWriteMemory();
1370 };
1371 if (std::any_of(std::next(I0->getIterator()), At0, WritesMemory))
1372 return false;
1373 if (std::any_of(std::next(I1->getIterator()), At1, WritesMemory))
1374 return false;
1375 return true;
1376 }
1377
1378 // Is it legal to replace the operand \c OpIdx of \c GEP with a PHI node?
1379 static bool canReplaceGEPOperandWithPHI(const Instruction *GEP,
1380 unsigned OpIdx) {
1381 if (OpIdx == 0)
1382 return true;
1383 gep_type_iterator It = std::next(gep_type_begin(GEP), OpIdx - 1);
1384 return !It->isStructTy();
1385 }
1386
1387 // All blocks in Blocks unconditionally jump to a common successor. Analyze
1388 // the last non-terminator instruction in each block and return true if it would
1389 // be possible to sink them into their successor, creating one common
1390 // instruction instead. Set NumPHIsRequired to the number of PHI nodes that
1391 // would need to be created during sinking.
1392 static bool canSinkLastInstruction(ArrayRef Blocks,
1393 unsigned &NumPHIsRequired) {
1394 SmallVector Insts;
1395 for (auto *BB : Blocks) {
1396 if (BB->getTerminator() == &BB->front())
1397 // Block was empty.
1398 return false;
1399 Insts.push_back(BB->getTerminator()->getPrevNode());
1400 }
1401
1402 // Prune out obviously bad instructions to move. Any non-store instruction
1403 // must have exactly one use, and we check later that use is by a single,
1404 // common PHI instruction in the successor.
1405 for (auto *I : Insts) {
1406 // These instructions may change or break semantics if moved.
1407 if (isa(I) || I->isEHPad() || isa(I) ||
1408 I->getType()->isTokenTy())
1409 return false;
1410 // Apart from loads and stores, we won't move anything that could
1411 // change memory or have sideeffects.
1412 if (!isa(I) && !isa(I) &&
1413 (I->mayHaveSideEffects() || I->mayHaveSideEffects()))
1414 return false;
1415 // Everything must have only one use too, apart from stores which
1416 // have no uses.
1417 if (!isa(I) && !I->hasOneUse())
1418 return false;
1419 }
1420
1421 const Instruction *I0 = Insts.front();
1422 for (auto *I : Insts)
1423 if (!I->isSameOperationAs(I0))
1424 return false;
1425
1426 // If this isn't a store, check the only user is a single PHI.
1427 if (!isa(I0)) {
1428 auto *PNUse = dyn_cast(*I0->user_begin());
1429 if (!PNUse ||
1430 !all_of(Insts, [&PNUse](const Instruction *I) {
1431 return *I->user_begin() == PNUse;
1432 }))
1433 return false;
1434 }
1435
1436 NumPHIsRequired = 0;
1437 for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) {
1438 if (I0->getOperand(OI)->getType()->isTokenTy())
1439 // Don't touch any operand of token type.
1440 return false;
1441 auto SameAsI0 = [&I0, OI](const Instruction *I) {
1442 return areValuesTriviallySame(I->getOperand(OI), I->getIterator(),
1443 I0->getOperand(OI), I0->getIterator());
1444 };
1445 if (!all_of(Insts, SameAsI0)) {
1446 if (isa(I0) && !canReplaceGEPOperandWithPHI(I0, OI))
1447 // We can't create a PHI from this GEP.
1448 return false;
1449 ++NumPHIsRequired;
1450 }
1451 }
1452 return true;
1453 }
1454
1455 // Assuming canSinkLastInstruction(Blocks) has returned true, sink the last
1456 // instruction of every block in Blocks to their common successor, commoning
1457 // into one instruction.
1458 static void sinkLastInstruction(ArrayRef Blocks) {
1459 unsigned Dummy;
1460 (void)Dummy;
1461 assert(canSinkLastInstruction(Blocks, Dummy) &&
1462 "Must analyze before transforming!");
1463 auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0);
1464
1465 // canSinkLastInstruction returning true guarantees that every block has at
1466 // least one non-terminator instruction.
1467 SmallVector Insts;
1468 for (auto *BB : Blocks)
1469 Insts.push_back(BB->getTerminator()->getPrevNode());
1470
1471 // We don't need to do any checking here; canSinkLastInstruction should have
1472 // done it all for us.
1473 Instruction *I0 = Insts.front();
1474 SmallVector NewOperands;
1475 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
1476 // This check is different to that in canSinkLastInstruction. There, we
1477 // cared about the global view once simplifycfg (and instcombine) have
1478 // completed - it takes into account PHIs that become trivially
1479 // simplifiable. However here we need a more local view; if an operand
1480 // differs we create a PHI and rely on instcombine to clean up the very
1481 // small mess we may make.
1482 bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) {
1483 return I->getOperand(O) != I0->getOperand(O);
1484 });
1485 if (!NeedPHI) {
1486 NewOperands.push_back(I0->getOperand(O));
1487 continue;
1488 }
1489
1490 // Create a new PHI in the successor block and populate it.
1491 auto *Op = I0->getOperand(O);
1492 assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
1493 auto *PN = PHINode::Create(Op->getType(), Insts.size(),
1494 Op->getName() + ".sink", &BBEnd->front());
1495 for (auto *I : Insts)
1496 PN->addIncoming(I->getOperand(O), I->getParent());
1497 NewOperands.push_back(PN);
1498 }
1499
1500 // Arbitrarily use I0 as the new "common" instruction; remap its operands
1501 // and move it to the start of the successor block.
1502 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
1503 I0->getOperandUse(O).set(NewOperands[O]);
1504 I0->moveBefore(&*BBEnd->getFirstInsertionPt());
1505
1506 if (!isa(I0)) {
1507 // canSinkLastInstruction checked that all instructions were used by
1508 // one and only one PHI node. Find that now, RAUW it to our common
1509 // instruction and nuke it.
1510 assert(I0->hasOneUse());
1511 auto *PN = cast(*I0->user_begin());
1512 PN->replaceAllUsesWith(I0);
1513 PN->eraseFromParent();
1514 }
1515
1516 // Finally nuke all instructions apart from the common instruction.
1517 for (auto *I : Insts)
1518 if (I != I0)
1519 I->eraseFromParent();
1520 }
1521
13211522 /// Given an unconditional branch that goes to BBEnd,
13221523 /// check whether BBEnd has only two predecessors and the other predecessor
13231524 /// ends with an unconditional branch. If it is true, sink any common code
13241525 /// in the two predecessors to BBEnd.
13251526 static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
13261527 assert(BI1->isUnconditional());
1327 BasicBlock *BB1 = BI1->getParent();
13281528 BasicBlock *BBEnd = BI1->getSuccessor(0);
13291529
1330 // Check that BBEnd has two predecessors and the other predecessor ends with
1331 // an unconditional branch.
1332 pred_iterator PI = pred_begin(BBEnd), PE = pred_end(BBEnd);
1333 BasicBlock *Pred0 = *PI++;
1334 if (PI == PE) // Only one predecessor.
1335 return false;
1336 BasicBlock *Pred1 = *PI++;
1337 if (PI != PE) // More than two predecessors.
1338 return false;
1339 BasicBlock *BB2 = (Pred0 == BB1) ? Pred1 : Pred0;
1340 BranchInst *BI2 = dyn_cast(BB2->getTerminator());
1341 if (!BI2 || !BI2->isUnconditional())
1342 return false;
1343
1344 // Gather the PHI nodes in BBEnd.
1345 SmallDenseMap, PHINode *> JointValueMap;
1346 Instruction *FirstNonPhiInBBEnd = nullptr;
1347 for (BasicBlock::iterator I = BBEnd->begin(), E = BBEnd->end(); I != E; ++I) {
1348 if (PHINode *PN = dyn_cast(I)) {
1349 Value *BB1V = PN->getIncomingValueForBlock(BB1);
1350 Value *BB2V = PN->getIncomingValueForBlock(BB2);
1351 JointValueMap[std::make_pair(BB1V, BB2V)] = PN;
1352 } else {
1353 FirstNonPhiInBBEnd = &*I;
1354 break;
1355 }
1356 }
1357 if (!FirstNonPhiInBBEnd)
1358 return false;
1359
1360 // This does very trivial matching, with limited scanning, to find identical
1361 // instructions in the two blocks. We scan backward for obviously identical
1362 // instructions in an identical order.
1363 BasicBlock::InstListType::reverse_iterator RI1 = BB1->getInstList().rbegin(),
1364 RE1 = BB1->getInstList().rend(),
1365 RI2 = BB2->getInstList().rbegin(),
1366 RE2 = BB2->getInstList().rend();
1367 // Skip debug info.
1368 while (RI1 != RE1 && isa(&*RI1))
1369 ++RI1;
1370 if (RI1 == RE1)
1371 return false;
1372 while (RI2 != RE2 && isa(&*RI2))
1373 ++RI2;
1374 if (RI2 == RE2)
1375 return false;
1376 // Skip the unconditional branches.
1377 ++RI1;
1378 ++RI2;
1530 SmallVector Blocks;
1531 for (auto *BB : predecessors(BBEnd))
1532 Blocks.push_back(BB);
1533 if (Blocks.size() != 2 ||
1534 !all_of(Blocks, [](const BasicBlock *BB) {
1535 auto *BI = dyn_cast(BB->getTerminator());
1536 return BI && BI->isUnconditional();
1537 }))
1538 return false;
13791539
13801540 bool Changed = false;
1381 while (RI1 != RE1 && RI2 != RE2) {
1382 // Skip debug info.
1383 while (RI1 != RE1 && isa(&*RI1))
1384 ++RI1;
1385 if (RI1 == RE1)
1386 return Changed;
1387 while (RI2 != RE2 && isa(&*RI2))
1388 ++RI2;
1389 if (RI2 == RE2)
1390 return Changed;
1391
1392 Instruction *I1 = &*RI1, *I2 = &*RI2;
1393 auto InstPair = std::make_pair(I1, I2);
1394 // I1 and I2 should have a single use in the same PHI node, and they
1395 // perform the same operation.
1396 // Cannot move control-flow-involving, volatile loads, vaarg, etc.
1397 if (isa(I1) || isa(I2) || isa(I1) ||
1398 isa(I2) || I1->isEHPad() || I2->isEHPad() ||
1399 isa(I1) || isa(I2) ||
1400 I1->mayHaveSideEffects() || I2->mayHaveSideEffects() ||
1401 I1->mayReadOrWriteMemory() || I2->mayReadOrWriteMemory() ||
1402 !I1->hasOneUse() || !I2->hasOneUse() || !JointValueMap.count(InstPair))
1403 return Changed;
1404
1405 // Check whether we should swap the operands of ICmpInst.
1406 // TODO: Add support of communativity.
1407 ICmpInst *ICmp1 = dyn_cast(I1), *ICmp2 = dyn_cast(I2);
1408 bool SwapOpnds = false;
1409 if (ICmp1 && ICmp2 && ICmp1->getOperand(0) != ICmp2->getOperand(0) &&
1410 ICmp1->getOperand(1) != ICmp2->getOperand(1) &&
1411 (ICmp1->getOperand(0) == ICmp2->getOperand(1) ||
1412 ICmp1->getOperand(1) == ICmp2->getOperand(0))) {
1413 ICmp2->swapOperands();
1414 SwapOpnds = true;
1415 }
1416 if (!I1->isSameOperationAs(I2)) {
1417 if (SwapOpnds)
1418 ICmp2->swapOperands();
1419 return Changed;
1420 }
1421
1422 // The operands should be either the same or they need to be generated
1423 // with a PHI node after sinking. We only handle the case where there is
1424 // a single pair of different operands.
1425 Value *DifferentOp1 = nullptr, *DifferentOp2 = nullptr;
1426 unsigned Op1Idx = ~0U;
1427 for (unsigned I = 0, E = I1->getNumOperands(); I != E; ++I) {
1428 if (I1->getOperand(I) == I2->getOperand(I))
1429 continue;
1430 // Early exit if we have more-than one pair of different operands or if
1431 // we need a PHI node to replace a constant.
1432 if (Op1Idx != ~0U || isa(I1->getOperand(I)) ||
1433 isa(I2->getOperand(I))) {
1434 // If we can't sink the instructions, undo the swapping.
1435 if (SwapOpnds)
1436 ICmp2->swapOperands();
1437 return Changed;
1438 }
1439 DifferentOp1 = I1->getOperand(I);
1440 Op1Idx = I;
1441 DifferentOp2 = I2->getOperand(I);
1442 }
1443
1444 DEBUG(dbgs() << "SINK common instructions " << *I1 << "\n");
1445 DEBUG(dbgs() << " " << *I2 << "\n");
1446
1447 // We insert the pair of different operands to JointValueMap and
1448 // remove (I1, I2) from JointValueMap.
1449 if (Op1Idx != ~0U) {
1450 auto &NewPN = JointValueMap[std::make_pair(DifferentOp1, DifferentOp2)];
1451 if (!NewPN) {
1452 NewPN =
1453 PHINode::Create(DifferentOp1->getType(), 2,
1454 DifferentOp1->getName() + ".sink", &BBEnd->front());
1455 NewPN->addIncoming(DifferentOp1, BB1);
1456 NewPN->addIncoming(DifferentOp2, BB2);
1457 DEBUG(dbgs() << "Create PHI node " << *NewPN << "\n";);
1458 }
1459 // I1 should use NewPN instead of DifferentOp1.
1460 I1->setOperand(Op1Idx, NewPN);
1461 }
1462 PHINode *OldPN = JointValueMap[InstPair];
1463 JointValueMap.erase(InstPair);
1464
1465 // We need to update RE1 and RE2 if we are going to sink the first
1466 // instruction in the basic block down.
1467 bool UpdateRE1 = (I1 == &BB1->front()), UpdateRE2 = (I2 == &BB2->front());
1468 // Sink the instruction.
1469 BBEnd->getInstList().splice(FirstNonPhiInBBEnd->getIterator(),
1470 BB1->getInstList(), I1);
1471 if (!OldPN->use_empty())
1472 OldPN->replaceAllUsesWith(I1);
1473 OldPN->eraseFromParent();
1474
1475 if (!I2->use_empty())
1476 I2->replaceAllUsesWith(I1);
1477 I1->intersectOptionalDataWith(I2);
1478 // TODO: Use combineMetadata here to preserve what metadata we can
1479 // (analogous to the hoisting case above).
1480 I2->eraseFromParent();
1481
1482 if (UpdateRE1)
1483 RE1 = BB1->getInstList().rend();
1484 if (UpdateRE2)
1485 RE2 = BB2->getInstList().rend();
1486 FirstNonPhiInBBEnd = &*I1;
1541 unsigned NumPHIsToInsert;
1542 while (canSinkLastInstruction(Blocks, NumPHIsToInsert) && NumPHIsToInsert <= 1) {
1543 sinkLastInstruction(Blocks);
14871544 NumSinkCommons++;
14881545 Changed = true;
14891546 }
105105
106106 if.else:
107107 store i32 3, i32* %p, align 4
108 %incdec.ptr5 = getelementptr inbounds i32, i32* %p, i32 2
108 %incdec.ptr5 = getelementptr inbounds i32, i32* %p, i32 3
109109 store i32 5, i32* %incdec.ptr1, align 4
110110 store i32 6, i32* %incdec.ptr5, align 4
111111 br label %if.end
88 ; return -1;
99 ; }
1010
11 ; CHECK: mvnlt
1112 ; CHECK: .loc 1 6 7
12 ; CHECK: mvn
13 ; CHECK: strlt
1314
1415 target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
1516 target triple = "armv7--linux-gnueabihf"
2828 %4 = load double, double* %a, align 8
2929 %mul1 = fmul fast double %1, %4
3030 %sub1 = fsub fast double %mul1, %0
31 store double %sub1, double* %y, align 8
31 %gep1 = getelementptr double, double* %y, i32 1
32 store double %sub1, double* %gep1, align 8
3233 br label %if.end
3334
3435 if.end: ; preds = %if.else, %if.then
8080 ; CHECK: call
8181 ; CHECK: add
8282 ; CHECK-NOT: br
83
84 define i32 @test4(i1 zeroext %flag, i32 %x, i32* %y) {
85 entry:
86 br i1 %flag, label %if.then, label %if.else
87
88 if.then:
89 %a = add i32 %x, 5
90 store i32 %a, i32* %y
91 br label %if.end
92
93 if.else:
94 %b = add i32 %x, 7
95 store i32 %b, i32* %y
96 br label %if.end
97
98 if.end:
99 ret i32 1
100 }
101
102 ; CHECK-LABEL: test4
103 ; CHECK: select
104 ; CHECK: store
105 ; CHECK-NOT: store
106
107 define i32 @test5(i1 zeroext %flag, i32 %x, i32* %y) {
108 entry:
109 br i1 %flag, label %if.then, label %if.else
110
111 if.then:
112 %a = add i32 %x, 5
113 store volatile i32 %a, i32* %y
114 br label %if.end
115
116 if.else:
117 %b = add i32 %x, 7
118 store i32 %b, i32* %y
119 br label %if.end
120
121 if.end:
122 ret i32 1
123 }
124
125 ; CHECK-LABEL: test5
126 ; CHECK: store volatile
127 ; CHECK: store
128
129 define i32 @test6(i1 zeroext %flag, i32 %x, i32* %y) {
130 entry:
131 br i1 %flag, label %if.then, label %if.else
132
133 if.then:
134 %a = add i32 %x, 5
135 store volatile i32 %a, i32* %y
136 br label %if.end
137
138 if.else:
139 %b = add i32 %x, 7
140 store volatile i32 %b, i32* %y
141 br label %if.end
142
143 if.end:
144 ret i32 1
145 }
146
147 ; CHECK-LABEL: test6
148 ; CHECK: select
149 ; CHECK: store volatile
150 ; CHECK-NOT: store
151
152 define i32 @test7(i1 zeroext %flag, i32 %x, i32* %y) {
153 entry:
154 br i1 %flag, label %if.then, label %if.else
155
156 if.then:
157 %z = load volatile i32, i32* %y
158 %a = add i32 %z, 5
159 store volatile i32 %a, i32* %y
160 br label %if.end
161
162 if.else:
163 %w = load volatile i32, i32* %y
164 %b = add i32 %w, 7
165 store volatile i32 %b, i32* %y
166 br label %if.end
167
168 if.end:
169 ret i32 1
170 }
171
172 ; CHECK-LABEL: test7
173 ; CHECK-DAG: select
174 ; CHECK-DAG: load volatile
175 ; CHECK: store volatile
176 ; CHECK-NOT: load
177 ; CHECK-NOT: store
178
179 ; %z and %w are in different blocks. We shouldn't sink the add because
180 ; there may be intervening memory instructions.
181 define i32 @test8(i1 zeroext %flag, i32 %x, i32* %y) {
182 entry:
183 %z = load volatile i32, i32* %y
184 br i1 %flag, label %if.then, label %if.else
185
186 if.then:
187 %a = add i32 %z, 5
188 store volatile i32 %a, i32* %y
189 br label %if.end
190
191 if.else:
192 %w = load volatile i32, i32* %y
193 %b = add i32 %w, 7
194 store volatile i32 %b, i32* %y
195 br label %if.end
196
197 if.end:
198 ret i32 1
199 }
200
201 ; CHECK-LABEL: test8
202 ; CHECK: add
203 ; CHECK: add
204
205 ; The extra store in %if.then means %z and %w are not equivalent.
206 define i32 @test9(i1 zeroext %flag, i32 %x, i32* %y, i32* %p) {
207 entry:
208 br i1 %flag, label %if.then, label %if.else
209
210 if.then:
211 store i32 7, i32* %p
212 %z = load volatile i32, i32* %y
213 store i32 6, i32* %p
214 %a = add i32 %z, 5
215 store volatile i32 %a, i32* %y
216 br label %if.end
217
218 if.else:
219 %w = load volatile i32, i32* %y
220 %b = add i32 %w, 7
221 store volatile i32 %b, i32* %y
222 br label %if.end
223
224 if.end:
225 ret i32 1
226 }
227
228 ; CHECK-LABEL: test9
229 ; CHECK: add
230 ; CHECK: add
231
232 %struct.anon = type { i32, i32 }
233
234 ; The GEP indexes a struct type so cannot have a variable last index.
235 define i32 @test10(i1 zeroext %flag, i32 %x, i32* %y, %struct.anon* %s) {
236 entry:
237 br i1 %flag, label %if.then, label %if.else
238
239 if.then:
240 %dummy = add i32 %x, 5
241 %gepa = getelementptr inbounds %struct.anon, %struct.anon* %s, i32 0, i32 0
242 store volatile i32 %x, i32* %gepa
243 br label %if.end
244
245 if.else:
246 %dummy1 = add i32 %x, 6
247 %gepb = getelementptr inbounds %struct.anon, %struct.anon* %s, i32 0, i32 1
248 store volatile i32 %x, i32* %gepb
249 br label %if.end
250
251 if.end:
252 ret i32 1
253 }
254
255 ; CHECK-LABEL: test10
256 ; CHECK: getelementptr
257 ; CHECK: getelementptr
258 ; CHECK: phi
259 ; CHECK: store volatile