llvm.org GIT mirror llvm / 5154970
Revert "[SimplifyCFG] Rewrite SinkThenElseCodeToEnd" This reverts commit r278660. It causes downstream assertion failure in InstCombine on shuffle instructions. Comes up in __mm_swizzle_epi32. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@278672 91177308-0d34-0410-b5e6-96231b3b80d8 Reid Kleckner 4 years ago
5 changed file(s) with 159 addition(s) and 395 deletion(s). Raw diff Collapse all Expand all
13181318 return true;
13191319 }
13201320
1321 // Return true if V0 and V1 are equivalent. This handles the obvious cases
1322 // where V0 == V1 and V0 and V1 are both identical instructions, but also
1323 // handles loads and stores with identical operands.
1324 //
1325 // Because determining if two memory instructions are equivalent
1326 // depends on control flow, the \c At0 and \c At1 parameters specify a
1327 // location for the query. This function is essentially answering the
1328 // query "If V0 were moved to At0, and V1 were moved to At1, are V0 and V1
1329 // equivalent?". In practice this means checking that moving V0 to At0
1330 // doesn't cross any other memory instructions.
1331 static bool areValuesTriviallySame(Value *V0, BasicBlock::const_iterator At0,
1332 Value *V1, BasicBlock::const_iterator At1) {
1333 if (V0 == V1)
1334 return true;
1335
1336 // Also check for instructions that are identical but not pointer-identical.
1337 // This can include load instructions that haven't been CSE'd.
1338 if (!isa(V0) || !isa(V1))
1339 return false;
1340 const auto *I0 = cast(V0);
1341 const auto *I1 = cast(V1);
1342 if (!I0->isIdenticalToWhenDefined(I1))
1343 return false;
1344
1345 if (!I0->mayReadOrWriteMemory())
1346 return true;
1347
1348 // Instructions that may read or write memory have extra restrictions. We
1349 // must ensure we don't treat %a and %b as equivalent in code such as:
1350 //
1351 // %a = load %x
1352 // store %x, 1
1353 // if (%c) {
1354 // %b = load %x
1355 // %d = add %b, 1
1356 // } else {
1357 // %d = add %a, 1
1358 // }
1359
1360 // Be conservative. We don't want to search the entire CFG between def
1361 // and use; if the def isn't in the same block as the use just bail.
1362 if (I0->getParent() != At0->getParent() ||
1363 I1->getParent() != At1->getParent())
1364 return false;
1365
1366 // Again, be super conservative. Ideally we'd be able to query AliasAnalysis
1367 // but we currently don't have that available.
1368 auto WritesMemory = [](const Instruction &I) {
1369 return I.mayReadOrWriteMemory();
1370 };
1371 if (std::any_of(std::next(I0->getIterator()), At0, WritesMemory))
1372 return false;
1373 if (std::any_of(std::next(I1->getIterator()), At1, WritesMemory))
1374 return false;
1375 return true;
1376 }
1377
1378 // Is it legal to replace the operand \c OpIdx of \c GEP with a PHI node?
1379 static bool canReplaceGEPOperandWithPHI(const Instruction *GEP,
1380 unsigned OpIdx) {
1381 if (OpIdx == 0)
1382 return true;
1383 gep_type_iterator It = std::next(gep_type_begin(GEP), OpIdx - 1);
1384 return !It->isStructTy();
1385 }
1386
1387 // All blocks in Blocks unconditionally jump to a common successor. Analyze
1388 // the last non-terminator instruction in each block and return true if it would
1389 // be possible to sink them into their successor, creating one common
1390 // instruction instead. Set NumPHIsRequired to the number of PHI nodes that
1391 // would need to be created during sinking.
1392 static bool canSinkLastInstruction(ArrayRef Blocks,
1393 unsigned &NumPHIsRequired) {
1394 SmallVector Insts;
1395 for (auto *BB : Blocks) {
1396 if (BB->getTerminator() == &BB->front())
1397 // Block was empty.
1398 return false;
1399 Insts.push_back(BB->getTerminator()->getPrevNode());
1400 }
1401
1402 // Prune out obviously bad instructions to move. Any non-store instruction
1403 // must have exactly one use, and we check later that use is by a single,
1404 // common PHI instruction in the successor.
1405 for (auto *I : Insts) {
1406 // These instructions may change or break semantics if moved.
1407 if (isa(I) || I->isEHPad() || isa(I) ||
1408 I->getType()->isTokenTy())
1409 return false;
1410 // Apart from loads and stores, we won't move anything that could
1411 // change memory or have sideeffects.
1412 if (!isa(I) && !isa(I) &&
1413 (I->mayHaveSideEffects() || I->mayHaveSideEffects()))
1414 return false;
1415 // Everything must have only one use too, apart from stores which
1416 // have no uses.
1417 if (!isa(I) && !I->hasOneUse())
1418 return false;
1419 }
1420
1421 const Instruction *I0 = Insts.front();
1422 for (auto *I : Insts)
1423 if (!I->isSameOperationAs(I0))
1424 return false;
1425
1426 // If this isn't a store, check the only user is a single PHI.
1427 if (!isa(I0)) {
1428 auto *PNUse = dyn_cast(*I0->user_begin());
1429 if (!PNUse ||
1430 !all_of(Insts, [&PNUse](const Instruction *I) {
1431 return *I->user_begin() == PNUse;
1432 }))
1433 return false;
1434 }
1435
1436 NumPHIsRequired = 0;
1437 for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) {
1438 if (I0->getOperand(OI)->getType()->isTokenTy())
1439 // Don't touch any operand of token type.
1440 return false;
1441 auto SameAsI0 = [&I0, OI](const Instruction *I) {
1442 return areValuesTriviallySame(I->getOperand(OI), I->getIterator(),
1443 I0->getOperand(OI), I0->getIterator());
1444 };
1445 if (!all_of(Insts, SameAsI0)) {
1446 if (isa(I0) && !canReplaceGEPOperandWithPHI(I0, OI))
1447 // We can't create a PHI from this GEP.
1448 return false;
1449 ++NumPHIsRequired;
1450 }
1451 }
1452 return true;
1453 }
1454
1455 // Assuming canSinkLastInstruction(Blocks) has returned true, sink the last
1456 // instruction of every block in Blocks to their common successor, commoning
1457 // into one instruction.
1458 static void sinkLastInstruction(ArrayRef Blocks) {
1459 unsigned Dummy;
1460 (void)Dummy;
1461 assert(canSinkLastInstruction(Blocks, Dummy) &&
1462 "Must analyze before transforming!");
1463 auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0);
1464
1465 // canSinkLastInstruction returning true guarantees that every block has at
1466 // least one non-terminator instruction.
1467 SmallVector Insts;
1468 for (auto *BB : Blocks)
1469 Insts.push_back(BB->getTerminator()->getPrevNode());
1470
1471 // We don't need to do any checking here; canSinkLastInstruction should have
1472 // done it all for us.
1473 Instruction *I0 = Insts.front();
1474 SmallVector NewOperands;
1475 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
1476 // This check is different to that in canSinkLastInstruction. There, we
1477 // cared about the global view once simplifycfg (and instcombine) have
1478 // completed - it takes into account PHIs that become trivially
1479 // simplifiable. However here we need a more local view; if an operand
1480 // differs we create a PHI and rely on instcombine to clean up the very
1481 // small mess we may make.
1482 bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) {
1483 return I->getOperand(O) != I0->getOperand(O);
1484 });
1485 if (!NeedPHI) {
1486 NewOperands.push_back(I0->getOperand(O));
1487 continue;
1488 }
1489
1490 // Create a new PHI in the successor block and populate it.
1491 auto *Op = I0->getOperand(O);
1492 assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
1493 auto *PN = PHINode::Create(Op->getType(), Insts.size(),
1494 Op->getName() + ".sink", &BBEnd->front());
1495 for (auto *I : Insts)
1496 PN->addIncoming(I->getOperand(O), I->getParent());
1497 NewOperands.push_back(PN);
1498 }
1499
1500 // Arbitrarily use I0 as the new "common" instruction; remap its operands
1501 // and move it to the start of the successor block.
1502 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
1503 I0->getOperandUse(O).set(NewOperands[O]);
1504 I0->moveBefore(&*BBEnd->getFirstInsertionPt());
1505
1506 if (!isa(I0)) {
1507 // canSinkLastInstruction checked that all instructions were used by
1508 // one and only one PHI node. Find that now, RAUW it to our common
1509 // instruction and nuke it.
1510 assert(I0->hasOneUse());
1511 auto *PN = cast(*I0->user_begin());
1512 PN->replaceAllUsesWith(I0);
1513 PN->eraseFromParent();
1514 }
1515
1516 // Finally nuke all instructions apart from the common instruction.
1517 for (auto *I : Insts)
1518 if (I != I0)
1519 I->eraseFromParent();
1520 }
1521
15221321 /// Given an unconditional branch that goes to BBEnd,
15231322 /// check whether BBEnd has only two predecessors and the other predecessor
15241323 /// ends with an unconditional branch. If it is true, sink any common code
15251324 /// in the two predecessors to BBEnd.
15261325 static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
15271326 assert(BI1->isUnconditional());
1327 BasicBlock *BB1 = BI1->getParent();
15281328 BasicBlock *BBEnd = BI1->getSuccessor(0);
15291329
1530 SmallVector Blocks;
1531 for (auto *BB : predecessors(BBEnd))
1532 Blocks.push_back(BB);
1533 if (Blocks.size() != 2 ||
1534 !all_of(Blocks, [](const BasicBlock *BB) {
1535 auto *BI = dyn_cast(BB->getTerminator());
1536 return BI && BI->isUnconditional();
1537 }))
1538 return false;
1330 // Check that BBEnd has two predecessors and the other predecessor ends with
1331 // an unconditional branch.
1332 pred_iterator PI = pred_begin(BBEnd), PE = pred_end(BBEnd);
1333 BasicBlock *Pred0 = *PI++;
1334 if (PI == PE) // Only one predecessor.
1335 return false;
1336 BasicBlock *Pred1 = *PI++;
1337 if (PI != PE) // More than two predecessors.
1338 return false;
1339 BasicBlock *BB2 = (Pred0 == BB1) ? Pred1 : Pred0;
1340 BranchInst *BI2 = dyn_cast(BB2->getTerminator());
1341 if (!BI2 || !BI2->isUnconditional())
1342 return false;
1343
1344 // Gather the PHI nodes in BBEnd.
1345 SmallDenseMap, PHINode *> JointValueMap;
1346 Instruction *FirstNonPhiInBBEnd = nullptr;
1347 for (BasicBlock::iterator I = BBEnd->begin(), E = BBEnd->end(); I != E; ++I) {
1348 if (PHINode *PN = dyn_cast(I)) {
1349 Value *BB1V = PN->getIncomingValueForBlock(BB1);
1350 Value *BB2V = PN->getIncomingValueForBlock(BB2);
1351 JointValueMap[std::make_pair(BB1V, BB2V)] = PN;
1352 } else {
1353 FirstNonPhiInBBEnd = &*I;
1354 break;
1355 }
1356 }
1357 if (!FirstNonPhiInBBEnd)
1358 return false;
1359
1360 // This does very trivial matching, with limited scanning, to find identical
1361 // instructions in the two blocks. We scan backward for obviously identical
1362 // instructions in an identical order.
1363 BasicBlock::InstListType::reverse_iterator RI1 = BB1->getInstList().rbegin(),
1364 RE1 = BB1->getInstList().rend(),
1365 RI2 = BB2->getInstList().rbegin(),
1366 RE2 = BB2->getInstList().rend();
1367 // Skip debug info.
1368 while (RI1 != RE1 && isa(&*RI1))
1369 ++RI1;
1370 if (RI1 == RE1)
1371 return false;
1372 while (RI2 != RE2 && isa(&*RI2))
1373 ++RI2;
1374 if (RI2 == RE2)
1375 return false;
1376 // Skip the unconditional branches.
1377 ++RI1;
1378 ++RI2;
15391379
15401380 bool Changed = false;
1541 unsigned NumPHIsToInsert;
1542 while (canSinkLastInstruction(Blocks, NumPHIsToInsert) && NumPHIsToInsert <= 1) {
1543 sinkLastInstruction(Blocks);
1381 while (RI1 != RE1 && RI2 != RE2) {
1382 // Skip debug info.
1383 while (RI1 != RE1 && isa(&*RI1))
1384 ++RI1;
1385 if (RI1 == RE1)
1386 return Changed;
1387 while (RI2 != RE2 && isa(&*RI2))
1388 ++RI2;
1389 if (RI2 == RE2)
1390 return Changed;
1391
1392 Instruction *I1 = &*RI1, *I2 = &*RI2;
1393 auto InstPair = std::make_pair(I1, I2);
1394 // I1 and I2 should have a single use in the same PHI node, and they
1395 // perform the same operation.
1396 // Cannot move control-flow-involving, volatile loads, vaarg, etc.
1397 if (isa(I1) || isa(I2) || isa(I1) ||
1398 isa(I2) || I1->isEHPad() || I2->isEHPad() ||
1399 isa(I1) || isa(I2) ||
1400 I1->mayHaveSideEffects() || I2->mayHaveSideEffects() ||
1401 I1->mayReadOrWriteMemory() || I2->mayReadOrWriteMemory() ||
1402 !I1->hasOneUse() || !I2->hasOneUse() || !JointValueMap.count(InstPair))
1403 return Changed;
1404
1405 // Check whether we should swap the operands of ICmpInst.
1406 // TODO: Add support of communativity.
1407 ICmpInst *ICmp1 = dyn_cast(I1), *ICmp2 = dyn_cast(I2);
1408 bool SwapOpnds = false;
1409 if (ICmp1 && ICmp2 && ICmp1->getOperand(0) != ICmp2->getOperand(0) &&
1410 ICmp1->getOperand(1) != ICmp2->getOperand(1) &&
1411 (ICmp1->getOperand(0) == ICmp2->getOperand(1) ||
1412 ICmp1->getOperand(1) == ICmp2->getOperand(0))) {
1413 ICmp2->swapOperands();
1414 SwapOpnds = true;
1415 }
1416 if (!I1->isSameOperationAs(I2)) {
1417 if (SwapOpnds)
1418 ICmp2->swapOperands();
1419 return Changed;
1420 }
1421
1422 // The operands should be either the same or they need to be generated
1423 // with a PHI node after sinking. We only handle the case where there is
1424 // a single pair of different operands.
1425 Value *DifferentOp1 = nullptr, *DifferentOp2 = nullptr;
1426 unsigned Op1Idx = ~0U;
1427 for (unsigned I = 0, E = I1->getNumOperands(); I != E; ++I) {
1428 if (I1->getOperand(I) == I2->getOperand(I))
1429 continue;
1430 // Early exit if we have more-than one pair of different operands or if
1431 // we need a PHI node to replace a constant.
1432 if (Op1Idx != ~0U || isa(I1->getOperand(I)) ||
1433 isa(I2->getOperand(I))) {
1434 // If we can't sink the instructions, undo the swapping.
1435 if (SwapOpnds)
1436 ICmp2->swapOperands();
1437 return Changed;
1438 }
1439 DifferentOp1 = I1->getOperand(I);
1440 Op1Idx = I;
1441 DifferentOp2 = I2->getOperand(I);
1442 }
1443
1444 DEBUG(dbgs() << "SINK common instructions " << *I1 << "\n");
1445 DEBUG(dbgs() << " " << *I2 << "\n");
1446
1447 // We insert the pair of different operands to JointValueMap and
1448 // remove (I1, I2) from JointValueMap.
1449 if (Op1Idx != ~0U) {
1450 auto &NewPN = JointValueMap[std::make_pair(DifferentOp1, DifferentOp2)];
1451 if (!NewPN) {
1452 NewPN =
1453 PHINode::Create(DifferentOp1->getType(), 2,
1454 DifferentOp1->getName() + ".sink", &BBEnd->front());
1455 NewPN->addIncoming(DifferentOp1, BB1);
1456 NewPN->addIncoming(DifferentOp2, BB2);
1457 DEBUG(dbgs() << "Create PHI node " << *NewPN << "\n";);
1458 }
1459 // I1 should use NewPN instead of DifferentOp1.
1460 I1->setOperand(Op1Idx, NewPN);
1461 }
1462 PHINode *OldPN = JointValueMap[InstPair];
1463 JointValueMap.erase(InstPair);
1464
1465 // We need to update RE1 and RE2 if we are going to sink the first
1466 // instruction in the basic block down.
1467 bool UpdateRE1 = (I1 == &BB1->front()), UpdateRE2 = (I2 == &BB2->front());
1468 // Sink the instruction.
1469 BBEnd->getInstList().splice(FirstNonPhiInBBEnd->getIterator(),
1470 BB1->getInstList(), I1);
1471 if (!OldPN->use_empty())
1472 OldPN->replaceAllUsesWith(I1);
1473 OldPN->eraseFromParent();
1474
1475 if (!I2->use_empty())
1476 I2->replaceAllUsesWith(I1);
1477 I1->intersectOptionalDataWith(I2);
1478 // TODO: Use combineMetadata here to preserve what metadata we can
1479 // (analogous to the hoisting case above).
1480 I2->eraseFromParent();
1481
1482 if (UpdateRE1)
1483 RE1 = BB1->getInstList().rend();
1484 if (UpdateRE2)
1485 RE2 = BB2->getInstList().rend();
1486 FirstNonPhiInBBEnd = &*I1;
15441487 NumSinkCommons++;
15451488 Changed = true;
15461489 }
105105
106106 if.else:
107107 store i32 3, i32* %p, align 4
108 %incdec.ptr5 = getelementptr inbounds i32, i32* %p, i32 3
108 %incdec.ptr5 = getelementptr inbounds i32, i32* %p, i32 2
109109 store i32 5, i32* %incdec.ptr1, align 4
110110 store i32 6, i32* %incdec.ptr5, align 4
111111 br label %if.end
88 ; return -1;
99 ; }
1010
11 ; CHECK: mvnlt
1211 ; CHECK: .loc 1 6 7
13 ; CHECK: strlt
12 ; CHECK: mvn
1413
1514 target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
1615 target triple = "armv7--linux-gnueabihf"
2828 %4 = load double, double* %a, align 8
2929 %mul1 = fmul fast double %1, %4
3030 %sub1 = fsub fast double %mul1, %0
31 %gep1 = getelementptr double, double* %y, i32 1
32 store double %sub1, double* %gep1, align 8
31 store double %sub1, double* %y, align 8
3332 br label %if.end
3433
3534 if.end: ; preds = %if.else, %if.then
8080 ; CHECK: call
8181 ; CHECK: add
8282 ; CHECK-NOT: br
83
84 define i32 @test4(i1 zeroext %flag, i32 %x, i32* %y) {
85 entry:
86 br i1 %flag, label %if.then, label %if.else
87
88 if.then:
89 %a = add i32 %x, 5
90 store i32 %a, i32* %y
91 br label %if.end
92
93 if.else:
94 %b = add i32 %x, 7
95 store i32 %b, i32* %y
96 br label %if.end
97
98 if.end:
99 ret i32 1
100 }
101
102 ; CHECK-LABEL: test4
103 ; CHECK: select
104 ; CHECK: store
105 ; CHECK-NOT: store
106
107 define i32 @test5(i1 zeroext %flag, i32 %x, i32* %y) {
108 entry:
109 br i1 %flag, label %if.then, label %if.else
110
111 if.then:
112 %a = add i32 %x, 5
113 store volatile i32 %a, i32* %y
114 br label %if.end
115
116 if.else:
117 %b = add i32 %x, 7
118 store i32 %b, i32* %y
119 br label %if.end
120
121 if.end:
122 ret i32 1
123 }
124
125 ; CHECK-LABEL: test5
126 ; CHECK: store volatile
127 ; CHECK: store
128
129 define i32 @test6(i1 zeroext %flag, i32 %x, i32* %y) {
130 entry:
131 br i1 %flag, label %if.then, label %if.else
132
133 if.then:
134 %a = add i32 %x, 5
135 store volatile i32 %a, i32* %y
136 br label %if.end
137
138 if.else:
139 %b = add i32 %x, 7
140 store volatile i32 %b, i32* %y
141 br label %if.end
142
143 if.end:
144 ret i32 1
145 }
146
147 ; CHECK-LABEL: test6
148 ; CHECK: select
149 ; CHECK: store volatile
150 ; CHECK-NOT: store
151
152 define i32 @test7(i1 zeroext %flag, i32 %x, i32* %y) {
153 entry:
154 br i1 %flag, label %if.then, label %if.else
155
156 if.then:
157 %z = load volatile i32, i32* %y
158 %a = add i32 %z, 5
159 store volatile i32 %a, i32* %y
160 br label %if.end
161
162 if.else:
163 %w = load volatile i32, i32* %y
164 %b = add i32 %w, 7
165 store volatile i32 %b, i32* %y
166 br label %if.end
167
168 if.end:
169 ret i32 1
170 }
171
172 ; CHECK-LABEL: test7
173 ; CHECK-DAG: select
174 ; CHECK-DAG: load volatile
175 ; CHECK: store volatile
176 ; CHECK-NOT: load
177 ; CHECK-NOT: store
178
179 ; %z and %w are in different blocks. We shouldn't sink the add because
180 ; there may be intervening memory instructions.
181 define i32 @test8(i1 zeroext %flag, i32 %x, i32* %y) {
182 entry:
183 %z = load volatile i32, i32* %y
184 br i1 %flag, label %if.then, label %if.else
185
186 if.then:
187 %a = add i32 %z, 5
188 store volatile i32 %a, i32* %y
189 br label %if.end
190
191 if.else:
192 %w = load volatile i32, i32* %y
193 %b = add i32 %w, 7
194 store volatile i32 %b, i32* %y
195 br label %if.end
196
197 if.end:
198 ret i32 1
199 }
200
201 ; CHECK-LABEL: test8
202 ; CHECK: add
203 ; CHECK: add
204
205 ; The extra store in %if.then means %z and %w are not equivalent.
206 define i32 @test9(i1 zeroext %flag, i32 %x, i32* %y, i32* %p) {
207 entry:
208 br i1 %flag, label %if.then, label %if.else
209
210 if.then:
211 store i32 7, i32* %p
212 %z = load volatile i32, i32* %y
213 store i32 6, i32* %p
214 %a = add i32 %z, 5
215 store volatile i32 %a, i32* %y
216 br label %if.end
217
218 if.else:
219 %w = load volatile i32, i32* %y
220 %b = add i32 %w, 7
221 store volatile i32 %b, i32* %y
222 br label %if.end
223
224 if.end:
225 ret i32 1
226 }
227
228 ; CHECK-LABEL: test9
229 ; CHECK: add
230 ; CHECK: add
231
232 %struct.anon = type { i32, i32 }
233
234 ; The GEP indexes a struct type so cannot have a variable last index.
235 define i32 @test10(i1 zeroext %flag, i32 %x, i32* %y, %struct.anon* %s) {
236 entry:
237 br i1 %flag, label %if.then, label %if.else
238
239 if.then:
240 %dummy = add i32 %x, 5
241 %gepa = getelementptr inbounds %struct.anon, %struct.anon* %s, i32 0, i32 0
242 store volatile i32 %x, i32* %gepa
243 br label %if.end
244
245 if.else:
246 %dummy1 = add i32 %x, 6
247 %gepb = getelementptr inbounds %struct.anon, %struct.anon* %s, i32 0, i32 1
248 store volatile i32 %x, i32* %gepb
249 br label %if.end
250
251 if.end:
252 ret i32 1
253 }
254
255 ; CHECK-LABEL: test10
256 ; CHECK: getelementptr
257 ; CHECK: getelementptr
258 ; CHECK: phi
259 ; CHECK: store volatile