llvm.org GIT mirror llvm / e5c4e28
CodeGen: Add DetectDeadLanes pass. The DetectDeadLanes pass performs a dataflow analysis of used/defined subregister lanes across COPY instructions and instructions that will get lowered to copies. It detects dead definitions and uses reading undefined values which are obscured by COPY and subregister usage. These dead definitions cause trouble in the register coalescer which cannot deal with definitions suddenly becoming dead after coalescing COPY instructions. For now the pass only adds dead and undef flags to machine operands. It should be possible to extend it in the future to remove the dead instructions and redo the analysis for the affected virtual registers. Differential Revision: http://reviews.llvm.org/D18427 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@267851 91177308-0d34-0410-b5e6-96231b3b80d8 Matthias Braun 4 years ago
7 changed file(s) with 946 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
469469 /// DeadMachineInstructionElim - This pass removes dead machine instructions.
470470 extern char &DeadMachineInstructionElimID;
471471
472 /// This pass adds dead/undef flags after analyzing subregister lanes.
473 extern char &DetectDeadLanesID;
474
472475 /// FastRegisterAllocation Pass - This pass register allocates as fast as
473476 /// possible. It is best suited for debug code where live ranges are short.
474477 ///
109109 void initializeDeadMachineInstructionElimPass(PassRegistry&);
110110 void initializeDelinearizationPass(PassRegistry &);
111111 void initializeDependenceAnalysisPass(PassRegistry&);
112 void initializeDetectDeadLanesPass(PassRegistry&);
112113 void initializeDivergenceAnalysisPass(PassRegistry&);
113114 void initializeDomOnlyPrinterPass(PassRegistry&);
114115 void initializeDomOnlyViewerPass(PassRegistry&);
1111 CodeGenPrepare.cpp
1212 CriticalAntiDepBreaker.cpp
1313 DeadMachineInstructionElim.cpp
14 DetectDeadLanes.cpp
1415 DFAPacketizer.cpp
1516 DwarfEHPrepare.cpp
1617 EarlyIfConversion.cpp
2323 initializeBranchFolderPassPass(Registry);
2424 initializeCodeGenPreparePass(Registry);
2525 initializeDeadMachineInstructionElimPass(Registry);
26 initializeDetectDeadLanesPass(Registry);
2627 initializeDwarfEHPreparePass(Registry);
2728 initializeEarlyIfConverterPass(Registry);
2829 initializeExpandISelPseudosPass(Registry);
0 //===- DetectDeadLanes.cpp - SubRegister Lane Usage Analysis --*- C++ -*---===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Analysis that tracks defined/used subregister lanes across COPY instructions
11 /// and instructions that get lowered to a COPY (PHI, REG_SEQUENCE,
12 /// INSERT_SUBREG, EXTRACT_SUBREG).
13 /// The information is used to detect dead definitions and the usage of
14 /// (completely) undefined values and mark the operands as such.
15 /// This pass is necessary because the dead/undef status is not obvious anymore
16 /// when subregisters are involved.
17 ///
18 /// Example:
19 /// %vreg0 = some definition
20 /// %vreg1 = IMPLICIT_DEF
21 /// %vreg2 = REG_SEQUENCE %vreg0, sub0, %vreg1, sub1
22 /// %vreg3 = EXTRACT_SUBREG %vreg2, sub1
23 /// = use %vreg3
24 /// The %vreg0 definition is dead and %vreg3 contains an undefined value.
25 //
26 //===----------------------------------------------------------------------===//
27
28 #include
29 #include
30
31 #include "llvm/ADT/BitVector.h"
32 #include "llvm/CodeGen/MachineFunctionPass.h"
33 #include "llvm/CodeGen/MachineRegisterInfo.h"
34 #include "llvm/CodeGen/Passes.h"
35 #include "llvm/InitializePasses.h"
36 #include "llvm/Pass.h"
37 #include "llvm/PassRegistry.h"
38 #include "llvm/Support/Debug.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include "llvm/Target/TargetInstrInfo.h"
41 #include "llvm/Target/TargetRegisterInfo.h"
42 #include "llvm/Target/TargetSubtargetInfo.h"
43
44 using namespace llvm;
45
46 #define DEBUG_TYPE "detect-dead-lanes"
47
48 namespace {
49
50 /// Contains a bitmask of which lanes of a given virtual register are
51 /// defined and which ones are actually used.
52 struct VRegInfo {
53 LaneBitmask UsedLanes;
54 LaneBitmask DefinedLanes;
55 };
56
57 class DetectDeadLanes : public MachineFunctionPass {
58 public:
59 bool runOnMachineFunction(MachineFunction &MF) override;
60
61 static char ID;
62 DetectDeadLanes() : MachineFunctionPass(ID) {}
63
64 const char *getPassName() const override { return "Detect Dead Lanes"; }
65
66 private:
67 /// Add used lane bits on the register used by operand \p MO. This translates
68 /// the bitmask based on the operands subregister, and puts the register into
69 /// the worklist if any new bits were added.
70 void addUsedLanesOnOperand(const MachineOperand &MO, LaneBitmask UsedLanes);
71
72 /// Given a bitmask \p UsedLanes for the used lanes on a def output of a
73 /// COPY-like instruction determine the lanes used on the use operands
74 /// and call addUsedLanesOnOperand() for them.
75 void transferUsedLanesStep(const MachineOperand &Def, LaneBitmask UsedLanes);
76
77 /// Given a use regiser operand \p Use and a mask of defined lanes, check
78 /// if the operand belongs to a lowerToCopies() instruction, transfer the
79 /// mask to the def and put the instruction into the worklist.
80 void transferDefinedLanesStep(const MachineOperand &Use,
81 LaneBitmask DefinedLanes);
82
83 /// Given a mask \p DefinedLanes of lanes defined at operand \p OpNum
84 /// of COPY-like instruction, determine which lanes are defined at the output
85 /// operand \p Def.
86 LaneBitmask transferDefinedLanes(const MachineOperand &Def, unsigned OpNum,
87 LaneBitmask DefinedLanes);
88
89 LaneBitmask determineInitialDefinedLanes(unsigned Reg);
90 LaneBitmask determineInitialUsedLanes(unsigned Reg);
91
92 const MachineRegisterInfo *MRI;
93 const TargetRegisterInfo *TRI;
94
95 void PutInWorklist(unsigned RegIdx) {
96 if (WorklistMembers.test(RegIdx))
97 return;
98 WorklistMembers.set(RegIdx);
99 Worklist.push_back(RegIdx);
100 }
101
102 VRegInfo *VRegInfos;
103 /// Worklist containing virtreg indexes.
104 std::deque Worklist;
105 BitVector WorklistMembers;
106 /// This bitvector is set for each vreg index where the vreg is defined
107 /// by an instruction where lowersToCopies()==true.
108 BitVector DefinedByCopy;
109 };
110
111 } // end anonymous namespace
112
113 char DetectDeadLanes::ID = 0;
114 char &llvm::DetectDeadLanesID = DetectDeadLanes::ID;
115
116 INITIALIZE_PASS(DetectDeadLanes, "detect-dead-lanes", "Detect Dead Lanes",
117 false, false);
118
119 /// Returns true if \p MI will get lowered to a series of COPY instructions.
120 /// We call this a COPY-like instruction.
121 static bool lowersToCopies(const MachineInstr &MI) {
122 // Note: We could support instructions with MCInstrDesc::isRegSequenceLike(),
123 // isExtractSubRegLike(), isInsertSubregLike() in the future even though they
124 // are not lowered to a COPY.
125 switch (MI.getOpcode()) {
126 case TargetOpcode::COPY:
127 case TargetOpcode::PHI:
128 case TargetOpcode::INSERT_SUBREG:
129 case TargetOpcode::REG_SEQUENCE:
130 case TargetOpcode::EXTRACT_SUBREG:
131 return true;
132 }
133 return false;
134 }
135
136 static bool isCrossCopy(const MachineRegisterInfo &MRI,
137 const MachineInstr &MI,
138 const TargetRegisterClass *DstRC,
139 const MachineOperand &MO) {
140 assert(lowersToCopies(MI));
141 unsigned SrcReg = MO.getReg();
142 const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
143 if (DstRC == SrcRC)
144 return false;
145
146 unsigned SrcSubIdx = MO.getSubReg();
147
148 const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
149 unsigned DstSubIdx = 0;
150 switch (MI.getOpcode()) {
151 case TargetOpcode::INSERT_SUBREG:
152 if (MI.getOperandNo(&MO) == 2)
153 DstSubIdx = MI.getOperand(3).getImm();
154 break;
155 case TargetOpcode::REG_SEQUENCE: {
156 unsigned OpNum = MI.getOperandNo(&MO);
157 DstSubIdx = MI.getOperand(OpNum+1).getImm();
158 break;
159 }
160 case TargetOpcode::EXTRACT_SUBREG: {
161 unsigned SubReg = MI.getOperand(2).getImm();
162 SrcSubIdx = TRI.composeSubRegIndices(SubReg, SrcSubIdx);
163 }
164 }
165
166 unsigned PreA, PreB; // Unused.
167 if (SrcSubIdx && DstSubIdx)
168 return !TRI.getCommonSuperRegClass(SrcRC, SrcSubIdx, DstRC, DstSubIdx, PreA,
169 PreB);
170 if (SrcSubIdx)
171 return !TRI.getMatchingSuperRegClass(SrcRC, DstRC, SrcSubIdx);
172 if (DstSubIdx)
173 return !TRI.getMatchingSuperRegClass(DstRC, SrcRC, DstSubIdx);
174 return !TRI.getCommonSubClass(SrcRC, DstRC);
175 }
176
177 void DetectDeadLanes::addUsedLanesOnOperand(const MachineOperand &MO,
178 LaneBitmask UsedLanes) {
179 if (!MO.readsReg())
180 return;
181 unsigned MOReg = MO.getReg();
182 if (!TargetRegisterInfo::isVirtualRegister(MOReg))
183 return;
184
185 unsigned MOSubReg = MO.getSubReg();
186 if (MOSubReg != 0)
187 UsedLanes = TRI->composeSubRegIndexLaneMask(MOSubReg, UsedLanes);
188 UsedLanes &= MRI->getMaxLaneMaskForVReg(MOReg);
189
190 unsigned MORegIdx = TargetRegisterInfo::virtReg2Index(MOReg);
191 VRegInfo &MORegInfo = VRegInfos[MORegIdx];
192 LaneBitmask PrevUsedLanes = MORegInfo.UsedLanes;
193 // Any change at all?
194 if ((UsedLanes & ~PrevUsedLanes) == 0)
195 return;
196
197 // Set UsedLanes and remember instruction for further propagation.
198 MORegInfo.UsedLanes = PrevUsedLanes | UsedLanes;
199 if (DefinedByCopy.test(MORegIdx))
200 PutInWorklist(MORegIdx);
201 }
202
203 void DetectDeadLanes::transferUsedLanesStep(const MachineOperand &Def,
204 LaneBitmask UsedLanes) {
205 const MachineInstr &MI = *Def.getParent();
206 switch (MI.getOpcode()) {
207 case TargetOpcode::COPY:
208 case TargetOpcode::PHI:
209 for (const MachineOperand &MO : MI.uses()) {
210 if (MO.isReg() && MO.isUse())
211 addUsedLanesOnOperand(MO, UsedLanes);
212 }
213 break;
214 case TargetOpcode::REG_SEQUENCE: {
215 // Note: This loop makes the conservative assumption that subregister
216 // indices do not overlap or that we do not know how the overlap is
217 // resolved when lowering to copies.
218 for (unsigned I = 1, N = MI.getNumOperands(); I < N; I += 2) {
219 const MachineOperand &MO = MI.getOperand(I);
220 unsigned SubIdx = MI.getOperand(I + 1).getImm();
221 LaneBitmask MOUsedLanes =
222 TRI->reverseComposeSubRegIndexLaneMask(SubIdx, UsedLanes);
223
224 addUsedLanesOnOperand(MO, MOUsedLanes);
225 }
226 break;
227 }
228 case TargetOpcode::INSERT_SUBREG: {
229 const MachineOperand &MO2 = MI.getOperand(2);
230 unsigned SubIdx = MI.getOperand(3).getImm();
231 LaneBitmask MO2UsedLanes =
232 TRI->reverseComposeSubRegIndexLaneMask(SubIdx, UsedLanes);
233 addUsedLanesOnOperand(MO2, MO2UsedLanes);
234
235 const MachineOperand &MO1 = MI.getOperand(1);
236 unsigned DefReg = Def.getReg();
237 const TargetRegisterClass *RC = MRI->getRegClass(DefReg);
238 LaneBitmask MO1UsedLanes;
239 if (RC->CoveredBySubRegs)
240 MO1UsedLanes = UsedLanes & ~TRI->getSubRegIndexLaneMask(SubIdx);
241 else
242 MO1UsedLanes = RC->LaneMask;
243 addUsedLanesOnOperand(MO1, MO1UsedLanes);
244 break;
245 }
246 case TargetOpcode::EXTRACT_SUBREG: {
247 const MachineOperand &MO = MI.getOperand(1);
248 unsigned SubIdx = MI.getOperand(2).getImm();
249 LaneBitmask MOUsedLanes =
250 TRI->composeSubRegIndexLaneMask(SubIdx, UsedLanes);
251 addUsedLanesOnOperand(MO, MOUsedLanes);
252 break;
253 }
254 default:
255 llvm_unreachable("function must be called with COPY-like instruction");
256 }
257 }
258
259 void DetectDeadLanes::transferDefinedLanesStep(const MachineOperand &Use,
260 LaneBitmask DefinedLanes) {
261 if (!Use.readsReg())
262 return;
263 // Check whether the operand writes a vreg and is part of a COPY-like
264 // instruction.
265 const MachineInstr &MI = *Use.getParent();
266 if (MI.getDesc().getNumDefs() != 1)
267 return;
268 // FIXME: PATCHPOINT instructions announce a Def that does not always exist,
269 // they really need to be modeled differently!
270 if (MI.getOpcode() == TargetOpcode::PATCHPOINT)
271 return;
272 const MachineOperand &Def = *MI.defs().begin();
273 unsigned DefReg = Def.getReg();
274 if (!TargetRegisterInfo::isVirtualRegister(DefReg))
275 return;
276 unsigned DefRegIdx = TargetRegisterInfo::virtReg2Index(DefReg);
277 if (!DefinedByCopy.test(DefRegIdx))
278 return;
279
280 unsigned OpNum = MI.getOperandNo(&Use);
281 DefinedLanes =
282 TRI->reverseComposeSubRegIndexLaneMask(Use.getSubReg(), DefinedLanes);
283 DefinedLanes = transferDefinedLanes(Def, OpNum, DefinedLanes);
284
285 VRegInfo &RegInfo = VRegInfos[DefRegIdx];
286 LaneBitmask PrevDefinedLanes = RegInfo.DefinedLanes;
287 // Any change at all?
288 if ((DefinedLanes & ~PrevDefinedLanes) == 0)
289 return;
290
291 RegInfo.DefinedLanes = PrevDefinedLanes | DefinedLanes;
292 PutInWorklist(DefRegIdx);
293 }
294
295 LaneBitmask DetectDeadLanes::transferDefinedLanes(const MachineOperand &Def,
296 unsigned OpNum,
297 LaneBitmask DefinedLanes) {
298 const MachineInstr &MI = *Def.getParent();
299 // Translate DefinedLanes if necessary.
300 switch (MI.getOpcode()) {
301 case TargetOpcode::REG_SEQUENCE: {
302 unsigned SubIdx = MI.getOperand(OpNum + 1).getImm();
303 DefinedLanes = TRI->composeSubRegIndexLaneMask(SubIdx, DefinedLanes);
304 DefinedLanes &= TRI->getSubRegIndexLaneMask(SubIdx);
305 break;
306 }
307 case TargetOpcode::INSERT_SUBREG: {
308 unsigned SubIdx = MI.getOperand(3).getImm();
309 if (OpNum == 2) {
310 DefinedLanes = TRI->composeSubRegIndexLaneMask(SubIdx, DefinedLanes);
311 DefinedLanes &= TRI->getSubRegIndexLaneMask(SubIdx);
312 } else {
313 assert(OpNum == 1 && "INSERT_SUBREG must have two operands");
314 // Ignore lanes defined by operand 2.
315 DefinedLanes &= ~TRI->getSubRegIndexLaneMask(SubIdx);
316 }
317 break;
318 }
319 case TargetOpcode::EXTRACT_SUBREG: {
320 unsigned SubIdx = MI.getOperand(2).getImm();
321 assert(OpNum == 1 && "EXTRACT_SUBREG must have one register operand only");
322 DefinedLanes = TRI->reverseComposeSubRegIndexLaneMask(SubIdx, DefinedLanes);
323 break;
324 }
325 case TargetOpcode::COPY:
326 case TargetOpcode::PHI:
327 break;
328 default:
329 llvm_unreachable("function must be called with COPY-like instruction");
330 }
331
332 unsigned SubIdx = Def.getSubReg();
333 DefinedLanes = TRI->composeSubRegIndexLaneMask(SubIdx, DefinedLanes);
334 DefinedLanes &= MRI->getMaxLaneMaskForVReg(Def.getReg());
335 return DefinedLanes;
336 }
337
338 LaneBitmask DetectDeadLanes::determineInitialDefinedLanes(unsigned Reg) {
339 // Live-In or unused registers have no definition but are considered fully
340 // defined.
341 if (!MRI->hasOneDef(Reg))
342 return ~0u;
343
344 const MachineOperand &Def = *MRI->def_begin(Reg);
345 const MachineInstr &DefMI = *Def.getParent();
346 if (lowersToCopies(DefMI)) {
347 // Start optimisatically with no used or defined lanes for copy
348 // instructions. The following dataflow analysis will add more bits.
349 unsigned RegIdx = TargetRegisterInfo::virtReg2Index(Reg);
350 DefinedByCopy.set(RegIdx);
351 PutInWorklist(RegIdx);
352
353 if (Def.isDead())
354 return 0;
355
356 // COPY/PHI can copy across unrelated register classes (example: float/int)
357 // with incompatible subregister structure. Do not include these in the
358 // dataflow analysis since we cannot transfer lanemasks in a meaningful way.
359 const TargetRegisterClass *DefRC = MRI->getRegClass(Reg);
360
361 // Determine initially DefinedLanes.
362 LaneBitmask DefinedLanes = 0;
363 for (const MachineOperand &MO : DefMI.uses()) {
364 if (!MO.isReg() || !MO.readsReg())
365 continue;
366 unsigned MOReg = MO.getReg();
367 if (!MOReg)
368 continue;
369
370 LaneBitmask MODefinedLanes;
371 if (TargetRegisterInfo::isPhysicalRegister(MOReg)) {
372 MODefinedLanes = ~0u;
373 } else if (isCrossCopy(*MRI, DefMI, DefRC, MO)) {
374 MODefinedLanes = ~0u;
375 } else {
376 assert(TargetRegisterInfo::isVirtualRegister(MOReg));
377 if (MRI->hasOneDef(MOReg)) {
378 const MachineOperand &MODef = *MRI->def_begin(MOReg);
379 const MachineInstr &MODefMI = *MODef.getParent();
380 // Bits from copy-like operations will be added later.
381 if (lowersToCopies(MODefMI) || MODefMI.isImplicitDef())
382 continue;
383 }
384 unsigned MOSubReg = MO.getSubReg();
385 MODefinedLanes = MRI->getMaxLaneMaskForVReg(MOReg);
386 MODefinedLanes = TRI->reverseComposeSubRegIndexLaneMask(
387 MOSubReg, MODefinedLanes);
388 }
389
390 unsigned OpNum = DefMI.getOperandNo(&MO);
391 DefinedLanes |= transferDefinedLanes(Def, OpNum, MODefinedLanes);
392 }
393 return DefinedLanes;
394 }
395 if (DefMI.isImplicitDef() || Def.isDead())
396 return 0;
397
398 unsigned SubReg = Def.getSubReg();
399 return SubReg != 0 ? TRI->getSubRegIndexLaneMask(SubReg)
400 : MRI->getMaxLaneMaskForVReg(Reg);
401 }
402
403 LaneBitmask DetectDeadLanes::determineInitialUsedLanes(unsigned Reg) {
404 LaneBitmask UsedLanes = 0;
405 for (const MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
406 if (!MO.readsReg())
407 continue;
408
409 const MachineInstr &UseMI = *MO.getParent();
410 if (UseMI.isKill())
411 continue;
412
413 unsigned SubReg = MO.getSubReg();
414 if (lowersToCopies(UseMI)) {
415 assert(UseMI.getDesc().getNumDefs() == 1);
416 const MachineOperand &Def = *UseMI.defs().begin();
417 unsigned DefReg = Def.getReg();
418 // The used lanes of COPY-like instruction operands are determined by the
419 // following dataflow analysis.
420 if (TargetRegisterInfo::isVirtualRegister(DefReg)) {
421 // But ignore copies across incompatible register classes.
422 bool CrossCopy = false;
423 if (lowersToCopies(UseMI)) {
424 const TargetRegisterClass *DstRC = MRI->getRegClass(DefReg);
425 CrossCopy = isCrossCopy(*MRI, UseMI, DstRC, MO);
426 }
427
428 if (!CrossCopy)
429 continue;
430 }
431 }
432
433 // Shortcut: All lanes are used.
434 if (SubReg == 0)
435 return MRI->getMaxLaneMaskForVReg(Reg);
436
437 UsedLanes |= TRI->getSubRegIndexLaneMask(SubReg);
438 }
439 return UsedLanes;
440 }
441
442 bool DetectDeadLanes::runOnMachineFunction(MachineFunction &MF) {
443 // Don't bother if we won't track subregister liveness later. This pass is
444 // required for correctness if subregister liveness is enabled because the
445 // register coalescer cannot deal with hidden dead defs. However without
446 // subregister liveness enabled, the expected benefits of this pass are small
447 // so we safe the compile time.
448 if (!MF.getSubtarget().enableSubRegLiveness()) {
449 DEBUG(dbgs() << "Skipping Detect dead lanes pass\n");
450 return false;
451 }
452
453 MRI = &MF.getRegInfo();
454 TRI = MRI->getTargetRegisterInfo();
455
456 unsigned NumVirtRegs = MRI->getNumVirtRegs();
457 VRegInfos = new VRegInfo[NumVirtRegs];
458 WorklistMembers.resize(NumVirtRegs);
459 DefinedByCopy.resize(NumVirtRegs);
460
461 // First pass: Populate defs/uses of vregs with initial values
462 for (unsigned RegIdx = 0; RegIdx < NumVirtRegs; ++RegIdx) {
463 unsigned Reg = TargetRegisterInfo::index2VirtReg(RegIdx);
464
465 // Determine used/defined lanes and add copy instructions to worklist.
466 VRegInfo &Info = VRegInfos[RegIdx];
467 Info.DefinedLanes = determineInitialDefinedLanes(Reg);
468 Info.UsedLanes = determineInitialUsedLanes(Reg);
469 }
470
471 // Iterate as long as defined lanes/used lanes keep changing.
472 while (!Worklist.empty()) {
473 unsigned RegIdx = Worklist.front();
474 Worklist.pop_front();
475 WorklistMembers.reset(RegIdx);
476 VRegInfo &Info = VRegInfos[RegIdx];
477 unsigned Reg = TargetRegisterInfo::index2VirtReg(RegIdx);
478
479 // Transfer UsedLanes to operands of DefMI (backwards dataflow).
480 MachineOperand &Def = *MRI->def_begin(Reg);
481 transferUsedLanesStep(Def, Info.UsedLanes);
482 // Transfer DefinedLanes to users of Reg (forward dataflow).
483 for (const MachineOperand &MO : MRI->use_nodbg_operands(Reg))
484 transferDefinedLanesStep(MO, Info.DefinedLanes);
485 }
486
487 DEBUG(
488 dbgs() << "Defined/Used lanes:\n";
489 for (unsigned RegIdx = 0; RegIdx < NumVirtRegs; ++RegIdx) {
490 unsigned Reg = TargetRegisterInfo::index2VirtReg(RegIdx);
491 const VRegInfo &Info = VRegInfos[RegIdx];
492 dbgs() << PrintReg(Reg, nullptr)
493 << " Used: " << PrintLaneMask(Info.UsedLanes)
494 << " Def: " << PrintLaneMask(Info.DefinedLanes) << '\n';
495 }
496 dbgs() << "\n";
497 );
498
499 // Mark operands as dead/unused.
500 for (MachineBasicBlock &MBB : MF) {
501 for (MachineInstr &MI : MBB) {
502 for (MachineOperand &MO : MI.operands()) {
503 if (!MO.isReg())
504 continue;
505 unsigned Reg = MO.getReg();
506 if (!TargetRegisterInfo::isVirtualRegister(Reg))
507 continue;
508 unsigned SubReg = MO.getSubReg();
509 LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubReg);
510 unsigned RegIdx = TargetRegisterInfo::virtReg2Index(Reg);
511 const VRegInfo &RegInfo = VRegInfos[RegIdx];
512 if (RegInfo.UsedLanes == 0 && MO.isDef() && !MO.isDead()) {
513 DEBUG(dbgs() << "Marking operand '" << MO << "' as dead in " << MI);
514 MO.setIsDead();
515 }
516 if (((RegInfo.UsedLanes & Mask) == 0 ||
517 (RegInfo.DefinedLanes & Mask) == 0) && MO.readsReg()) {
518 DEBUG(dbgs() << "Marking operand '" << MO << "' as undef in " << MI);
519 MO.setIsUndef();
520 }
521 }
522 }
523 }
524
525 DefinedByCopy.clear();
526 WorklistMembers.clear();
527 delete[] VRegInfos;
528 return true;
529 }
735735 /// optimized register allocation, including coalescing, machine instruction
736736 /// scheduling, and register allocation itself.
737737 void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
738 addPass(&DetectDeadLanesID, false);
739
738740 addPass(&ProcessImplicitDefsID, false);
739741
740742 // LiveVariables currently requires pure SSA form.
0 # RUN: llc -march=amdgcn -run-pass detect-dead-lanes -o /dev/null %s 2>&1 | FileCheck %s
1 --- |
2 define void @test0() { ret void }
3 define void @test1() { ret void }
4 define void @test2() { ret void }
5 define void @test3() { ret void }
6 define void @test4() { ret void }
7 define void @loop0() { ret void }
8 define void @loop1() { ret void }
9 define void @loop2() { ret void }
10 ...
11 ---
12 # Combined use/def transfer check, the basics.
13 # CHECK-LABEL: name: test0
14 # CHECK: S_NOP 0, implicit-def %0
15 # CHECK: S_NOP 0, implicit-def %1
16 # CHECK: S_NOP 0, implicit-def dead %2
17 # CHECK: %3 = REG_SEQUENCE %0, {{[0-9]+}}, %1, {{[0-9]+}}, undef %2, {{[0-9]+}}
18 # CHECK: S_NOP 0, implicit %3:sub0
19 # CHECK: S_NOP 0, implicit %3:sub1
20 # CHECK: S_NOP 0, implicit undef %3:sub2
21 # CHECK: %4 = COPY %3:sub0_sub1
22 # CHECK: %5 = COPY %3:sub2_sub3
23 # CHECK: S_NOP 0, implicit %4:sub0
24 # CHECK: S_NOP 0, implicit %4:sub1
25 # CHECK: S_NOP 0, implicit undef %5:sub0
26 name: test0
27 isSSA: true
28 registers:
29 - { id: 0, class: sreg_32 }
30 - { id: 1, class: sreg_32 }
31 - { id: 2, class: sreg_32 }
32 - { id: 3, class: sreg_128 }
33 - { id: 4, class: sreg_64 }
34 - { id: 5, class: sreg_64 }
35 body: |
36 bb.0:
37 S_NOP 0, implicit-def %0
38 S_NOP 0, implicit-def %1
39 S_NOP 0, implicit-def %2
40 %3 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub3
41 S_NOP 0, implicit %3:sub0
42 S_NOP 0, implicit %3:sub1
43 S_NOP 0, implicit %3:sub2
44 %4 = COPY %3:sub0_sub1
45 %5 = COPY %3:sub2_sub3
46 S_NOP 0, implicit %4:sub0
47 S_NOP 0, implicit %4:sub1
48 S_NOP 0, implicit %5:sub0
49 ...
50 ---
51 # Check defined lanes transfer; Includes checking for some special cases like
52 # undef operands or IMPLICIT_DEF definitions.
53 # CHECK-LABEL: name: test1
54 # CHECK: %0 = REG_SEQUENCE %sgpr0, {{[0-9]+}}, %sgpr0, {{[0-9]+}}
55 # CHECK: %1 = INSERT_SUBREG %0, %sgpr1, {{[0-9]+}}
56 # CHECK: %2 = INSERT_SUBREG %0:sub2_sub3, %sgpr42, {{[0-9]+}}
57 # CHECK: S_NOP 0, implicit %1:sub0
58 # CHECK: S_NOP 0, implicit undef %1:sub1
59 # CHECK: S_NOP 0, implicit %1:sub2
60 # CHECK: S_NOP 0, implicit %1:sub3
61 # CHECK: S_NOP 0, implicit %2:sub0
62 # CHECK: S_NOP 0, implicit undef %2:sub1
63
64 # CHECK: %3 = IMPLICIT_DEF
65 # CHECK: %4 = INSERT_SUBREG %0, undef %3, {{[0-9]+}}
66 # CHECK: S_NOP 0, implicit undef %4:sub0
67 # CHECK: S_NOP 0, implicit undef %4:sub1
68 # CHECK: S_NOP 0, implicit %4:sub2
69 # CHECK: S_NOP 0, implicit undef %4:sub3
70
71 # CHECK: %5 = EXTRACT_SUBREG %0, {{[0-9]+}}
72 # CHECK: %6 = EXTRACT_SUBREG %5, {{[0-9]+}}
73 # CHECK: %7 = EXTRACT_SUBREG %5, {{[0-9]+}}
74 # CHECK: S_NOP 0, implicit %5
75 # CHECK: S_NOP 0, implicit %6
76 # CHECK: S_NOP 0, implicit undef %7
77
78 # CHECK: %8 = IMPLICIT_DEF
79 # CHECK: %9 = EXTRACT_SUBREG undef %8, {{[0-9]+}}
80 # CHECK: S_NOP 0, implicit undef %9
81
82 # CHECK: %10 = EXTRACT_SUBREG undef %0, {{[0-9]+}}
83 # CHECK: S_NOP 0, implicit undef %10
84 name: test1
85 isSSA: true
86 registers:
87 - { id: 0, class: sreg_128 }
88 - { id: 1, class: sreg_128 }
89 - { id: 2, class: sreg_64 }
90 - { id: 3, class: sreg_32 }
91 - { id: 4, class: sreg_128 }
92 - { id: 5, class: sreg_64 }
93 - { id: 6, class: sreg_32 }
94 - { id: 7, class: sreg_32 }
95 - { id: 8, class: sreg_64 }
96 - { id: 9, class: sreg_32 }
97 - { id: 10, class: sreg_128 }
98 body: |
99 bb.0:
100 %0 = REG_SEQUENCE %sgpr0, %subreg.sub0, %sgpr0, %subreg.sub2
101 %1 = INSERT_SUBREG %0, %sgpr1, %subreg.sub3
102 %2 = INSERT_SUBREG %0:sub2_sub3, %sgpr42, %subreg.sub0
103 S_NOP 0, implicit %1:sub0
104 S_NOP 0, implicit %1:sub1
105 S_NOP 0, implicit %1:sub2
106 S_NOP 0, implicit %1:sub3
107 S_NOP 0, implicit %2:sub0
108 S_NOP 0, implicit %2:sub1
109
110 %3 = IMPLICIT_DEF
111 %4 = INSERT_SUBREG %0, %3, %subreg.sub0
112 S_NOP 0, implicit %4:sub0
113 S_NOP 0, implicit %4:sub1
114 S_NOP 0, implicit %4:sub2
115 S_NOP 0, implicit %4:sub3
116
117 %5 = EXTRACT_SUBREG %0, %subreg.sub0_sub1
118 %6 = EXTRACT_SUBREG %5, %subreg.sub0
119 %7 = EXTRACT_SUBREG %5, %subreg.sub1
120 S_NOP 0, implicit %5
121 S_NOP 0, implicit %6
122 S_NOP 0, implicit %7
123
124 %8 = IMPLICIT_DEF
125 %9 = EXTRACT_SUBREG %8, %subreg.sub1
126 S_NOP 0, implicit %9
127
128 %10 = EXTRACT_SUBREG undef %0, %subreg.sub2_sub3
129 S_NOP 0, implicit %10
130 ...
131 ---
132 # Check used lanes transfer; Includes checking for some special cases like
133 # undef operands.
134 # CHECK-LABEL: name: test2
135 # CHECK: S_NOP 0, implicit-def dead %0
136 # CHECK: S_NOP 0, implicit-def %1
137 # CHECK: S_NOP 0, implicit-def %2
138 # CHECK: %3 = REG_SEQUENCE undef %0, {{[0-9]+}}, %1, {{[0-9]+}}, %2, {{[0-9]+}}
139 # CHECK: S_NOP 0, implicit %3:sub1
140 # CHECK: S_NOP 0, implicit %3:sub3
141
142 # CHECK: S_NOP 0, implicit-def %4
143 # CHECK: S_NOP 0, implicit-def dead %5
144 # CHECK: %6 = REG_SEQUENCE %4, {{[0-9]+}}, undef %5, {{[0-9]+}}
145 # CHECK: S_NOP 0, implicit %6
146
147 # CHECK: S_NOP 0, implicit-def dead %7
148 # CHECK: S_NOP 0, implicit-def %8
149 # CHECK: %9 = INSERT_SUBREG undef %7, %8, {{[0-9]+}}
150 # CHECK: S_NOP 0, implicit %9:sub2
151
152 # CHECK: S_NOP 0, implicit-def %10
153 # CHECK: S_NOP 0, implicit-def dead %11
154 # CHECK: %12 = INSERT_SUBREG %10, undef %11, {{[0-9]+}}
155 # CHECK: S_NOP 0, implicit %12:sub3
156
157 # CHECK: S_NOP 0, implicit-def %13
158 # CHECK: S_NOP 0, implicit-def dead %14
159 # CHECK: %15 = REG_SEQUENCE %13, {{[0-9]+}}, undef %14, {{[0-9]+}}
160 # CHECK: %16 = EXTRACT_SUBREG %15, {{[0-9]+}}
161 # CHECK: S_NOP 0, implicit %16:sub1
162
163 name: test2
164 isSSA: true
165 registers:
166 - { id: 0, class: sreg_32 }
167 - { id: 1, class: sreg_32 }
168 - { id: 2, class: sreg_64 }
169 - { id: 3, class: sreg_128 }
170 - { id: 4, class: sreg_32 }
171 - { id: 5, class: sreg_32 }
172 - { id: 6, class: sreg_64 }
173 - { id: 7, class: sreg_128 }
174 - { id: 8, class: sreg_64 }
175 - { id: 9, class: sreg_128 }
176 - { id: 10, class: sreg_128 }
177 - { id: 11, class: sreg_64 }
178 - { id: 12, class: sreg_128 }
179 - { id: 13, class: sreg_64 }
180 - { id: 14, class: sreg_64 }
181 - { id: 15, class: sreg_128 }
182 - { id: 16, class: sreg_64 }
183 body: |
184 bb.0:
185 S_NOP 0, implicit-def %0
186 S_NOP 0, implicit-def %1
187 S_NOP 0, implicit-def %2
188 %3 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2_sub3
189 S_NOP 0, implicit %3:sub1
190 S_NOP 0, implicit %3:sub3
191
192 S_NOP 0, implicit-def %4
193 S_NOP 0, implicit-def %5
194 %6 = REG_SEQUENCE %4, %subreg.sub0, undef %5, %subreg.sub1
195 S_NOP 0, implicit %6
196
197 S_NOP 0, implicit-def %7
198 S_NOP 0, implicit-def %8
199 %9 = INSERT_SUBREG %7, %8, %subreg.sub2_sub3
200 S_NOP 0, implicit %9:sub2
201
202 S_NOP 0, implicit-def %10
203 S_NOP 0, implicit-def %11
204 %12 = INSERT_SUBREG %10, %11, %subreg.sub0_sub1
205 S_NOP 0, implicit %12:sub3
206
207 S_NOP 0, implicit-def %13
208 S_NOP 0, implicit-def %14
209 %15 = REG_SEQUENCE %13, %subreg.sub0_sub1, %14, %subreg.sub2_sub3
210 %16 = EXTRACT_SUBREG %15, %subreg.sub0_sub1
211 S_NOP 0, implicit %16:sub1
212 ...
213 ---
214 # Check that copies to physregs use all lanes, copies from physregs define all
215 # lanes. So we should not get a dead/undef flag here.
216 # CHECK-LABEL: name: test3
217 # CHECK: S_NOP 0, implicit-def %0
218 # CHECK: %vcc = COPY %0
219 # CHECK: %1 = COPY %vcc
220 # CHECK: S_NOP 0, implicit %1
221 name: test3
222 isSSA: true
223 tracksRegLiveness: true
224 registers:
225 - { id: 0, class: sreg_64 }
226 - { id: 1, class: sreg_64 }
227 body: |
228 bb.0:
229 S_NOP 0, implicit-def %0
230 %vcc = COPY %0
231
232 %1 = COPY %vcc
233 S_NOP 0, implicit %1
234 ...
235 ---
236 # Check that implicit-def/kill do not count as def/uses.
237 # CHECK-LABEL: name: test4
238 # CHECK: S_NOP 0, implicit-def dead %0
239 # CHECK: KILL undef %0
240 # CHECK: %1 = IMPLICIT_DEF
241 # CHECK: S_NOP 0, implicit undef %1
242 name: test4
243 isSSA: true
244 tracksRegLiveness: true
245 registers:
246 - { id: 0, class: sreg_64 }
247 - { id: 1, class: sreg_64 }
248 body: |
249 bb.0:
250 S_NOP 0, implicit-def %0
251 KILL %0
252
253 %1 = IMPLICIT_DEF
254 S_NOP 0, implicit %1
255 ...
256 ---
257 # Check "optimistic" dataflow fixpoint in phi-loops.
258 # CHECK-LABEL: name: loop0
259 # CHECK: bb.0:
260 # CHECK: S_NOP 0, implicit-def %0
261 # CHECK: S_NOP 0, implicit-def dead %1
262 # CHECK: S_NOP 0, implicit-def dead %2
263 # CHECK: %3 = REG_SEQUENCE %0, {{[0-9]+}}, undef %1, {{[0-9]+}}, undef %2, {{[0-9]+}}
264
265 # CHECK: bb.1:
266 # CHECK: %4 = PHI %3, %bb.0, %5, %bb.1
267
268 # CHECK: bb.2:
269 # CHECK: S_NOP 0, implicit %4:sub0
270 # CHECK: S_NOP 0, implicit undef %4:sub3
271 name: loop0
272 isSSA: true
273 tracksRegLiveness: true
274 registers:
275 - { id: 0, class: sreg_32 }
276 - { id: 1, class: sreg_32 }
277 - { id: 2, class: sreg_32 }
278 - { id: 3, class: sreg_128 }
279 - { id: 4, class: sreg_128 }
280 - { id: 5, class: sreg_128 }
281 body: |
282 bb.0:
283 successors: %bb.1
284 S_NOP 0, implicit-def %0
285 S_NOP 0, implicit-def %1
286 S_NOP 0, implicit-def %2
287 %3 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2
288 S_BRANCH %bb.1
289
290 bb.1:
291 successors: %bb.1, %bb.2
292 %4 = PHI %3, %bb.0, %5, %bb.1
293
294 ; let's swiffle some lanes around for fun...
295 %5 = REG_SEQUENCE %4:sub0, %subreg.sub0, %4:sub2, %subreg.sub1, %4:sub1, %subreg.sub2, %4:sub3, %subreg.sub3
296
297 S_CBRANCH_VCCNZ %bb.1, implicit undef %vcc
298 S_BRANCH %bb.2
299
300 bb.2:
301 S_NOP 0, implicit %4:sub0
302 S_NOP 0, implicit %4:sub3
303 ...
304 ---
305 # Check a loop that needs to be traversed multiple times to reach the fixpoint
306 # for the used lanes. The example reads sub3 lane at the end, however with each
307 # loop iteration we should get 1 more lane marked as we cycles the sublanes
308 # along. Sublanes sub0, sub1 and sub3 are rotate in the loop so only sub2
309 # should be dead.
310 # CHECK-LABEL: name: loop1
311 # CHECK: bb.0:
312 # CHECK: S_NOP 0, implicit-def %0
313 # CHECK: S_NOP 0, implicit-def %1
314 # CHECK: S_NOP 0, implicit-def dead %2
315 # CHECK: S_NOP 0, implicit-def %3
316 # CHECK: %4 = REG_SEQUENCE %0, {{[0-9]+}}, %1, {{[0-9]+}}, undef %2, {{[0-9]+}}, %3, {{[0-9]+}}
317
318 # CHECK: bb.1:
319 # CHECK: %5 = PHI %4, %bb.0, %6, %bb.1
320
321 # CHECK: %6 = REG_SEQUENCE %5:sub1, {{[0-9]+}}, %5:sub3, {{[0-9]+}}, undef %5:sub2, {{[0-9]+}}, %5:sub0, {{[0-9]+}}
322
323 # CHECK: bb.2:
324 # CHECK: S_NOP 0, implicit %6:sub3
325 name: loop1
326 isSSA: true
327 tracksRegLiveness: true
328 registers:
329 - { id: 0, class: sreg_32 }
330 - { id: 1, class: sreg_32 }
331 - { id: 2, class: sreg_32 }
332 - { id: 3, class: sreg_32 }
333 - { id: 4, class: sreg_128 }
334 - { id: 5, class: sreg_128 }
335 - { id: 6, class: sreg_128 }
336 body: |
337 bb.0:
338 successors: %bb.1
339 S_NOP 0, implicit-def %0
340 S_NOP 0, implicit-def %1
341 S_NOP 0, implicit-def dead %2
342 S_NOP 0, implicit-def %3
343 %4 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3
344 S_BRANCH %bb.1
345
346 bb.1:
347 successors: %bb.1, %bb.2
348 %5 = PHI %4, %bb.0, %6, %bb.1
349
350 ; rotate lanes, but skip sub2 lane...
351 %6 = REG_SEQUENCE %5:sub1, %subreg.sub0, %5:sub3, %subreg.sub1, %5:sub2, %subreg.sub2, %5:sub0, %subreg.sub3
352
353 S_CBRANCH_VCCNZ %bb.1, implicit undef %vcc
354 S_BRANCH %bb.2
355
356 bb.2:
357 S_NOP 0, implicit %6:sub3
358 ...
359 ---
360 # Similar to loop1 test, but check for fixpoint of defined lanes.
361 # Lanes are rotate between sub0, sub2, sub3 so only sub1 should be dead/undef.
362 # CHECK-LABEL: name: loop2
363 # CHECK: bb.0:
364 # CHECK: S_NOP 0, implicit-def %0
365 # CHECK: %1 = REG_SEQUENCE %0, {{[0-9]+}}
366
367 # CHECK: bb.1:
368 # CHECK: %2 = PHI %1, %bb.0, %3, %bb.1
369
370 # CHECK: %3 = REG_SEQUENCE %2:sub3, {{[0-9]+}}, undef %2:sub1, {{[0-9]+}}, %2:sub0, {{[0-9]+}}, %2:sub2, {{[0-9]+}}
371
372 # CHECK: bb.2:
373 # CHECK: S_NOP 0, implicit %2:sub0
374 # CHECK: S_NOP 0, implicit undef %2:sub1
375 # CHECK: S_NOP 0, implicit %2:sub2
376 # CHECK: S_NOP 0, implicit %2:sub3
377 name: loop2
378 isSSA: true
379 tracksRegLiveness: true
380 registers:
381 - { id: 0, class: sreg_32 }
382 - { id: 1, class: sreg_128 }
383 - { id: 2, class: sreg_128 }
384 - { id: 3, class: sreg_128 }
385 body: |
386 bb.0:
387 successors: %bb.1
388 S_NOP 0, implicit-def %0
389 %1 = REG_SEQUENCE %0, %subreg.sub0
390 S_BRANCH %bb.1
391
392 bb.1:
393 successors: %bb.1, %bb.2
394 %2 = PHI %1, %bb.0, %3, %bb.1
395
396 ; rotate subreg lanes, skipping sub1
397 %3 = REG_SEQUENCE %2:sub3, %subreg.sub0, %2:sub1, %subreg.sub1, %2:sub0, %subreg.sub2, %2:sub2, %subreg.sub3
398
399 S_CBRANCH_VCCNZ %bb.1, implicit undef %vcc
400 S_BRANCH %bb.2
401
402 bb.2:
403 S_NOP 0, implicit %2:sub0
404 S_NOP 0, implicit undef %2:sub1
405 S_NOP 0, implicit %2:sub2
406 S_NOP 0, implicit %2:sub3
407 ...