llvm.org GIT mirror llvm / 6747ae5
AMDGPU/InsertWaitcnts: Remove kill-related logic Summary: This is not needed, because we don't actually insert relevant branches for KILLs that late in the compilation flow. Besides, this was always checking for the wrong kill opcode anyway... Reviewers: msearles, rampitec, scott.linder, kanarayan Subscribers: arsenm, kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D54085 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@346362 91177308-0d34-0410-b5e6-96231b3b80d8 Nicolai Haehnle 10 months ago
1 changed file(s) with 1 addition(s) and 101 deletion(s). Raw diff Collapse all Expand all
381381
382382 DenseMap> LoopWaitcntDataMap;
383383
384 std::vector> KillWaitBrackets;
385
386384 // ForceEmitZeroWaitcnts: force all waitcnts insts to be s_waitcnt 0
387385 // because of amdgpu-waitcnt-forcezero flag
388386 bool ForceEmitZeroWaitcnts;
407405 AU.setPreservesCFG();
408406 AU.addRequired();
409407 MachineFunctionPass::getAnalysisUsage(AU);
410 }
411
412 void addKillWaitBracket(BlockWaitcntBrackets *Bracket) {
413 // The waitcnt information is copied because it changes as the block is
414 // traversed.
415 KillWaitBrackets.push_back(
416 llvm::make_unique(*Bracket));
417408 }
418409
419410 bool isForceEmitWaitcnt() const {
14241415 MixedExpTypes |= PredScoreBrackets->mixedExpTypes();
14251416 }
14261417
1427 // TODO: Is SC Block->IsMainExit() same as Block.succ_empty()?
1428 // Also handle kills for exit block.
1429 if (Block.succ_empty() && !KillWaitBrackets.empty()) {
1430 for (unsigned int I = 0; I < KillWaitBrackets.size(); I++) {
1431 for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
1432 T = (enum InstCounterType)(T + 1)) {
1433 int Span = KillWaitBrackets[I]->getScoreUB(T) -
1434 KillWaitBrackets[I]->getScoreLB(T);
1435 MaxPending[T] = std::max(MaxPending[T], Span);
1436 Span = KillWaitBrackets[I]->pendingFlat(T) -
1437 KillWaitBrackets[I]->getScoreLB(T);
1438 MaxFlat[T] = std::max(MaxFlat[T], Span);
1439 }
1440
1441 MixedExpTypes |= KillWaitBrackets[I]->mixedExpTypes();
1442 }
1443 }
1444
14451418 // Special handling for GDS_GPR_LOCK and EXP_GPR_LOCK.
14461419 for (MachineBasicBlock *Pred : Block.predecessors()) {
14471420 BlockWaitcntBrackets *PredScoreBrackets =
14571430 int EXPSpan = PredScoreBrackets->getEventUB(EXP_GPR_LOCK) -
14581431 PredScoreBrackets->getScoreLB(EXP_CNT);
14591432 MaxPending[EXP_CNT] = std::max(MaxPending[EXP_CNT], EXPSpan);
1460 }
1461
1462 // TODO: Is SC Block->IsMainExit() same as Block.succ_empty()?
1463 if (Block.succ_empty() && !KillWaitBrackets.empty()) {
1464 for (unsigned int I = 0; I < KillWaitBrackets.size(); I++) {
1465 int GDSSpan = KillWaitBrackets[I]->getEventUB(GDS_GPR_LOCK) -
1466 KillWaitBrackets[I]->getScoreLB(EXP_CNT);
1467 MaxPending[EXP_CNT] = std::max(MaxPending[EXP_CNT], GDSSpan);
1468 int EXPSpan = KillWaitBrackets[I]->getEventUB(EXP_GPR_LOCK) -
1469 KillWaitBrackets[I]->getScoreLB(EXP_CNT);
1470 MaxPending[EXP_CNT] = std::max(MaxPending[EXP_CNT], EXPSpan);
1471 }
14721433 }
14731434
14741435 #if 0
15501511 }
15511512 }
15521513
1553 // TODO: Is SC Block->IsMainExit() same as Block.succ_empty()?
1554 // Set the register scoreboard.
1555 if (Block.succ_empty() && !KillWaitBrackets.empty()) {
1556 for (unsigned int I = 0; I < KillWaitBrackets.size(); I++) {
1557 // Now merge the gpr_reg_score information.
1558 for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
1559 T = (enum InstCounterType)(T + 1)) {
1560 int PredLB = KillWaitBrackets[I]->getScoreLB(T);
1561 int PredUB = KillWaitBrackets[I]->getScoreUB(T);
1562 if (PredLB < PredUB) {
1563 int PredScale = MaxPending[T] - PredUB;
1564 // Merge vgpr scores.
1565 for (int J = 0; J <= KillWaitBrackets[I]->getMaxVGPR(); J++) {
1566 int PredRegScore = KillWaitBrackets[I]->getRegScore(J, T);
1567 if (PredRegScore <= PredLB)
1568 continue;
1569 int NewRegScore = PredScale + PredRegScore;
1570 ScoreBrackets->setRegScore(
1571 J, T, std::max(ScoreBrackets->getRegScore(J, T), NewRegScore));
1572 }
1573 // Also need to merge sgpr scores for lgkm_cnt.
1574 if (T == LGKM_CNT) {
1575 for (int J = 0; J <= KillWaitBrackets[I]->getMaxSGPR(); J++) {
1576 int PredRegScore =
1577 KillWaitBrackets[I]->getRegScore(J + NUM_ALL_VGPRS, LGKM_CNT);
1578 if (PredRegScore <= PredLB)
1579 continue;
1580 int NewRegScore = PredScale + PredRegScore;
1581 ScoreBrackets->setRegScore(
1582 J + NUM_ALL_VGPRS, LGKM_CNT,
1583 std::max(
1584 ScoreBrackets->getRegScore(J + NUM_ALL_VGPRS, LGKM_CNT),
1585 NewRegScore));
1586 }
1587 }
1588 }
1589 }
1590
1591 // Also merge the WaitEvent information.
1592 ForAllWaitEventType(W) {
1593 enum InstCounterType T = KillWaitBrackets[I]->eventCounter(W);
1594 int PredEventUB = KillWaitBrackets[I]->getEventUB(W);
1595 if (PredEventUB > KillWaitBrackets[I]->getScoreLB(T)) {
1596 int NewEventUB =
1597 MaxPending[T] + PredEventUB - KillWaitBrackets[I]->getScoreUB(T);
1598 if (NewEventUB > 0) {
1599 ScoreBrackets->setEventUB(
1600 W, std::max(ScoreBrackets->getEventUB(W), NewEventUB));
1601 }
1602 }
1603 }
1604 }
1605 }
1606
16071514 // Special case handling of GDS_GPR_LOCK and EXP_GPR_LOCK. Merge this for the
16081515 // sequencing predecessors, because changes to EXEC require waitcnts due to
16091516 // the delayed nature of these operations.
16981605 }
16991606 ScoreBrackets->setWaitcnt(&Inst);
17001607 continue;
1701 }
1702
1703 // Kill instructions generate a conditional branch to the endmain block.
1704 // Merge the current waitcnt state into the endmain block information.
1705 // TODO: Are there other flavors of KILL instruction?
1706 if (Inst.getOpcode() == AMDGPU::KILL) {
1707 addKillWaitBracket(ScoreBrackets);
17081608 }
17091609
17101610 bool VCCZBugWorkAround = false;
18701770 LoopWaitcntDataMap.clear();
18711771 BlockWaitcntProcessedSet.clear();
18721772
1873 // Walk over the blocks in reverse post-dominator order, inserting
1773 // Walk over the blocks in reverse post order, inserting
18741774 // s_waitcnt where needed.
18751775 ReversePostOrderTraversal RPOT(&MF);
18761776 bool Modified = false;