llvm.org GIT mirror llvm / 5d206b6
[AMDGPU] Add MachineDCE pass after RenameIndependentSubregs Detect dead lanes can create some dead defs. Then RenameIndependentSubregs will break a REG_SEQUENCE which may use these dead defs. At this point a dead instruction can be removed but we do not run a DCE anymore. MachineDCE was only running before live variable analysis. The patch adds a mean to preserve LiveIntervals and SlotIndexes in case it works past this. Differential Revision: https://reviews.llvm.org/D59626 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@357805 91177308-0d34-0410-b5e6-96231b3b80d8 Stanislav Mekhanoshin 10 months ago
8 changed file(s) with 63 addition(s) and 4 deletion(s). Raw diff Collapse all Expand all
99 //
1010 //===----------------------------------------------------------------------===//
1111
12 #include "llvm/ADT/DenseSet.h"
1213 #include "llvm/ADT/Statistic.h"
14 #include "llvm/CodeGen/LiveIntervals.h"
1315 #include "llvm/CodeGen/MachineFunctionPass.h"
1416 #include "llvm/CodeGen/MachineRegisterInfo.h"
1517 #include "llvm/CodeGen/Passes.h"
3133 const TargetRegisterInfo *TRI;
3234 const MachineRegisterInfo *MRI;
3335 const TargetInstrInfo *TII;
36 LiveIntervals *LIS;
3437 BitVector LivePhysRegs;
3538
3639 public:
4043 }
4144
4245 void getAnalysisUsage(AnalysisUsage &AU) const override {
43 AU.setPreservesCFG();
46 AU.setPreservesAll();
4447 MachineFunctionPass::getAnalysisUsage(AU);
4548 }
4649
7780 unsigned Reg = MO.getReg();
7881 if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
7982 // Don't delete live physreg defs, or any reserved register defs.
80 if (LivePhysRegs.test(Reg) || MRI->isReserved(Reg))
83 // Do not remove physreg defs if we have LIS as we may be unable
84 // to accurately recompute its liveness.
85 if (LivePhysRegs.test(Reg) || MRI->isReserved(Reg) || LIS)
8186 return false;
8287 } else {
88 // An instruction can also use its def in case if it is a tied operand.
89 // TODO: Technically we can also remove it if def dominates the use.
90 // This can happen when two instructions define different subregs
91 // of the same register.
8392 for (const MachineInstr &Use : MRI->use_nodbg_instructions(Reg)) {
8493 if (&Use != MI)
8594 // This def has a non-debug use. Don't delete the instruction!
101110 MRI = &MF.getRegInfo();
102111 TRI = MF.getSubtarget().getRegisterInfo();
103112 TII = MF.getSubtarget().getInstrInfo();
113 LIS = getAnalysisIfAvailable();
114 DenseSet RecalcRegs;
104115
105116 // Loop over all instructions in all blocks, from bottom to top, so that it's
106117 // more likely that chains of dependent but ultimately dead instructions will
126137 // If the instruction is dead, delete it!
127138 if (isDead(MI)) {
128139 LLVM_DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << *MI);
140 if (LIS) {
141 for (const MachineOperand &MO : MI->operands()) {
142 if (MO.isReg() && TRI->isVirtualRegister(MO.getReg()))
143 RecalcRegs.insert(MO.getReg());
144 }
145 LIS->RemoveMachineInstrFromMaps(*MI);
146 }
147
129148 // It is possible that some DBG_VALUE instructions refer to this
130149 // instruction. They get marked as undef and will be deleted
131150 // in the live debug variable analysis.
169188 }
170189
171190 LivePhysRegs.clear();
191
192 for (auto Reg : RecalcRegs) {
193 LIS->removeInterval(Reg);
194 if (!MRI->reg_empty(Reg))
195 LIS->createAndComputeVirtRegInterval(Reg);
196 }
197
172198 return AnyChanges;
173199 }
161161 cl::desc("Enable mode register pass"),
162162 cl::init(true),
163163 cl::Hidden);
164
165 // Option is used in lit tests to prevent deadcoding of patterns inspected.
166 static cl::opt
167 EnableDCEInRA("amdgpu-dce-in-ra",
168 cl::init(true), cl::Hidden,
169 cl::desc("Enable machine DCE inside regalloc"));
164170
165171 extern "C" void LLVMInitializeAMDGPUTarget() {
166172 // Register the target
900906 // This must be run just after RegisterCoalescing.
901907 insertPass(&RegisterCoalescerID, &SIPreAllocateWWMRegsID, false);
902908
909 if (EnableDCEInRA)
910 insertPass(&RenameIndependentSubregsID, &DeadMachineInstructionElimID);
911
903912 TargetPassConfig::addOptimizedRegAlloc();
904913 }
905914
0 # RUN: llc -march=amdgcn -mcpu=tonga %s -start-before detect-dead-lanes -stop-before machine-scheduler -verify-machineinstrs -o - | FileCheck -check-prefix=GCN %s
1
2 # GCN-LABEL: name: dead_lane
3 # GCN: bb.0:
4 # GCN-NEXT: undef %3.sub0:vreg_64 = V_MAC_F32_e32 undef %1:vgpr_32, undef %1:vgpr_32, undef %3.sub0, implicit $exec
5 # GCN-NEXT: FLAT_STORE_DWORD undef %4:vreg_64, %3.sub0,
6 ---
7 name: dead_lane
8 tracksRegLiveness: true
9 body: |
10 bb.0:
11 %1:vgpr_32 = V_MAC_F32_e32 undef %0:vgpr_32, undef %0:vgpr_32, undef %0:vgpr_32, implicit $exec
12 %2:vgpr_32 = V_MAC_F32_e32 undef %0:vgpr_32, undef %0:vgpr_32, undef %0:vgpr_32, implicit $exec
13 %3:vreg_64 = REG_SEQUENCE %1:vgpr_32, %subreg.sub0, %2:vgpr_32, %subreg.sub1
14 FLAT_STORE_DWORD undef %4:vreg_64, %3.sub0, 0, 0, 0, implicit $exec, implicit $flat_scr
15 S_ENDPGM 0
16
17 ...
474474
475475 bb4:
476476 %tmp5 = phi i32 [ %tmp3, %bb2 ], [ %tmp, %bb1 ]
477 store volatile i32 %tmp5, i32 addrspace(1)* undef
477478 br label %bb1
478479 }
479480
522522
523523 bb11: ; preds = %bb10, %bb2
524524 %tmp12 = phi <2 x i32> [ %tmp6, %bb2 ], [ %tmp, %bb1 ]
525 store volatile <2 x i32> %tmp12, <2 x i32> addrspace(1)* undef
525526 br label %bb1
526527 }
2020 %2 = IMPLICIT_DEF
2121 %3 = V_CMP_GT_U32_e64 %0, %1, implicit $exec
2222 %4, %5 = V_SUBBREV_U32_e64 0, %0, %3, 0, implicit $exec
23 GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, 0, implicit $exec
2324
2425 ...
2526
4445 %2 = IMPLICIT_DEF
4546 %3 = V_CMP_GT_U32_e64 %0, %1, implicit $exec
4647 %4, %5 = V_SUBB_U32_e64 %0, 0, %3, 0, implicit $exec
48 GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, 0, implicit $exec
4749
4850 ...
4951
6870 %2 = IMPLICIT_DEF
6971 %3 = V_CMP_GT_U32_e64 %0, %1, implicit $exec
7072 %4, %5 = V_ADDC_U32_e64 0, %0, %3, 0, implicit $exec
73 GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, 0, implicit $exec
7174
7275 ...
7376
9295 %2 = IMPLICIT_DEF
9396 %3 = V_CMP_GT_U32_e64 %0, %1, implicit $exec
9497 %4, %5 = V_ADDC_U32_e64 %0, 0, %3, 0, implicit $exec
98 GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, 0, implicit $exec
9599
96100 ...
None # RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -stress-regalloc=1 -start-before=simple-register-coalescing -stop-after=greedy -o - %s | FileCheck %s
0 # RUN: llc -mtriple=amdgcn-amd-amdhsa -amdgpu-dce-in-ra=0 -verify-machineinstrs -stress-regalloc=1 -start-before=simple-register-coalescing -stop-after=greedy -o - %s | FileCheck %s
11 # https://bugs.llvm.org/show_bug.cgi?id=33620
22
33 ---
None ; RUN: llc -march=amdgcn -mcpu=tahiti -o - %s | FileCheck %s
0 ; RUN: llc -march=amdgcn -mcpu=tahiti -amdgpu-dce-in-ra=0 -o - %s | FileCheck %s
11 ; Don't crash when the use of an undefined value is only detected by the
22 ; register coalescer because it is hidden with subregister insert/extract.
33 target triple="amdgcn--"