llvm.org GIT mirror llvm / release_80 lib / Target / AMDGPU / SIFixWWMLiveness.cpp
release_80

Tree @release_80 (Download .tar.gz)

SIFixWWMLiveness.cpp @release_80raw · history · blame

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
//===-- SIFixWWMLiveness.cpp - Fix WWM live intervals ---------===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
/// Computations in WWM can overwrite values in inactive channels for
/// variables that the register allocator thinks are dead. This pass adds fake
/// uses of those variables to their def(s) to make sure that they aren't
/// overwritten.
///
/// As an example, consider this snippet:
/// %vgpr0 = V_MOV_B32_e32 0.0
/// if (...) {
///   %vgpr1 = ...
///   %vgpr2 = WWM killed %vgpr1
///   ... = killed %vgpr2
///   %vgpr0 = V_MOV_B32_e32 1.0
/// }
/// ... = %vgpr0
///
/// The live intervals of %vgpr0 don't overlap with those of %vgpr1. Normally,
/// we can safely allocate %vgpr0 and %vgpr1 in the same register, since
/// writing %vgpr1 would only write to channels that would be clobbered by the
/// second write to %vgpr0 anyways. But if %vgpr1 is written with WWM enabled,
/// it would clobber even the inactive channels for which the if-condition is
/// false, for which %vgpr0 is supposed to be 0. This pass adds an implicit use
/// of %vgpr0 to its def to make sure they aren't allocated to the
/// same register.
///
/// In general, we need to figure out what registers might have their inactive
/// channels which are eventually used accidentally clobbered by a WWM
/// instruction. We do that by spotting three separate cases of registers:
///
/// 1. A "then phi": the value resulting from phi elimination of a phi node at
///    the end of an if..endif. If there is WWM code in the "then", then we
///    make the def at the end of the "then" branch a partial def by adding an
///    implicit use of the register.
///
/// 2. A "loop exit register": a value written inside a loop but used outside the
///    loop, where there is WWM code inside the loop (the case in the example
///    above). We add an implicit_def of the register in the loop pre-header,
///    and make the original def a partial def by adding an implicit use of the
///    register.
///
/// 3. A "loop exit phi": the value resulting from phi elimination of a phi node
///    in a loop header. If there is WWM code inside the loop, then we make all
///    defs inside the loop partial defs by adding an implicit use of the
///    register on each one.
///
/// Note that we do not need to consider an if..else..endif phi. We only need to
/// consider non-uniform control flow, and control flow structurization would
/// have transformed a non-uniform if..else..endif into two if..endifs.
///
/// The analysis to detect these cases relies on a property of the MIR
/// arising from this pass running straight after PHIElimination and before any
/// coalescing: that any virtual register with more than one definition must be
/// the new register added to lower a phi node by PHIElimination.
///
/// FIXME: We should detect whether a register in one of the above categories is
/// already live at the WWM code before deciding to add the implicit uses to
/// synthesize its liveness.
///
/// FIXME: I believe this whole scheme may be flawed due to the possibility of
/// the register allocator doing live interval splitting.
///
//===----------------------------------------------------------------------===//

#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
#include "SIRegisterInfo.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SparseBitVector.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"

using namespace llvm;

#define DEBUG_TYPE "si-fix-wwm-liveness"

namespace {

class SIFixWWMLiveness : public MachineFunctionPass {
private:
  MachineDominatorTree *DomTree;
  MachineLoopInfo *LoopInfo;
  LiveIntervals *LIS = nullptr;
  const SIInstrInfo *TII;
  const SIRegisterInfo *TRI;
  MachineRegisterInfo *MRI;

  std::vector<MachineInstr *> WWMs;
  std::vector<MachineOperand *> ThenDefs;
  std::vector<std::pair<MachineOperand *, MachineLoop *>> LoopExitDefs;
  std::vector<std::pair<MachineOperand *, MachineLoop *>> LoopPhiDefs;

public:
  static char ID;

  SIFixWWMLiveness() : MachineFunctionPass(ID) {
    initializeSIFixWWMLivenessPass(*PassRegistry::getPassRegistry());
  }

  bool runOnMachineFunction(MachineFunction &MF) override;

  StringRef getPassName() const override { return "SI Fix WWM Liveness"; }

  void getAnalysisUsage(AnalysisUsage &AU) const override {
    AU.addRequiredID(MachineDominatorsID);
    AU.addRequiredID(MachineLoopInfoID);
    // Should preserve the same set that TwoAddressInstructions does.
    AU.addPreserved<SlotIndexes>();
    AU.addPreserved<LiveIntervals>();
    AU.addPreservedID(LiveVariablesID);
    AU.addPreservedID(MachineLoopInfoID);
    AU.addPreservedID(MachineDominatorsID);
    AU.setPreservesCFG();
    MachineFunctionPass::getAnalysisUsage(AU);
  }

private:
  void processDef(MachineOperand &DefOpnd);
  bool processThenDef(MachineOperand *DefOpnd);
  bool processLoopExitDef(MachineOperand *DefOpnd, MachineLoop *Loop);
  bool processLoopPhiDef(MachineOperand *DefOpnd, MachineLoop *Loop);
};

} // End anonymous namespace.

INITIALIZE_PASS_BEGIN(SIFixWWMLiveness, DEBUG_TYPE,
                "SI fix WWM liveness", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_END(SIFixWWMLiveness, DEBUG_TYPE,
                "SI fix WWM liveness", false, false)

char SIFixWWMLiveness::ID = 0;

char &llvm::SIFixWWMLivenessID = SIFixWWMLiveness::ID;

FunctionPass *llvm::createSIFixWWMLivenessPass() {
  return new SIFixWWMLiveness();
}

bool SIFixWWMLiveness::runOnMachineFunction(MachineFunction &MF) {
  LLVM_DEBUG(dbgs() << "SIFixWWMLiveness: function " << MF.getName() << "\n");
  bool Modified = false;

  // This doesn't actually need LiveIntervals, but we can preserve them.
  LIS = getAnalysisIfAvailable<LiveIntervals>();

  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

  TII = ST.getInstrInfo();
  TRI = &TII->getRegisterInfo();
  MRI = &MF.getRegInfo();

  DomTree = &getAnalysis<MachineDominatorTree>();
  LoopInfo = &getAnalysis<MachineLoopInfo>();

  // Scan the function to find the WWM sections and the candidate registers for
  // having liveness modified.
  for (MachineBasicBlock &MBB : MF) {
    for (MachineInstr &MI : MBB) {
      if (MI.getOpcode() == AMDGPU::EXIT_WWM)
        WWMs.push_back(&MI);
      else {
        for (MachineOperand &DefOpnd : MI.defs()) {
          if (DefOpnd.isReg()) {
            unsigned Reg = DefOpnd.getReg();
            if (TRI->isVGPR(*MRI, Reg))
              processDef(DefOpnd);
          }
        }
      }
    }
  }
  if (!WWMs.empty()) {
    // Synthesize liveness over WWM sections as required.
    for (auto ThenDef : ThenDefs)
      Modified |= processThenDef(ThenDef);
    for (auto LoopExitDef : LoopExitDefs)
      Modified |= processLoopExitDef(LoopExitDef.first, LoopExitDef.second);
    for (auto LoopPhiDef : LoopPhiDefs)
      Modified |= processLoopPhiDef(LoopPhiDef.first, LoopPhiDef.second);
  }

  WWMs.clear();
  ThenDefs.clear();
  LoopExitDefs.clear();
  LoopPhiDefs.clear();

  return Modified;
}

// During the function scan, process an operand that defines a VGPR.
// This categorizes the register and puts it in the appropriate list for later
// use when processing a WWM section.
void SIFixWWMLiveness::processDef(MachineOperand &DefOpnd) {
  unsigned Reg = DefOpnd.getReg();
  // Get all the defining instructions. For convenience, make Defs[0] the def
  // we are on now.
  SmallVector<const MachineInstr *, 4> Defs;
  Defs.push_back(DefOpnd.getParent());
  for (auto &MI : MRI->def_instructions(Reg)) {
    if (&MI != DefOpnd.getParent())
      Defs.push_back(&MI);
  }
  // Check whether this def dominates all the others. If not, ignore this def.
  // Either it is going to be processed when the scan encounters its other def
  // that dominates all defs, or there is no def that dominates all others.
  // The latter case is an eliminated phi from an if..else..endif or similar,
  // which must be for uniform control flow so can be ignored.
  // Because this pass runs shortly after PHIElimination, we assume that any
  // multi-def register is a lowered phi, and thus has each def in a separate
  // basic block.
  for (unsigned I = 1; I != Defs.size(); ++I) {
    if (!DomTree->dominates(Defs[0]->getParent(), Defs[I]->getParent()))
      return;
  }
  // Check for the case of an if..endif lowered phi: It has two defs, one
  // dominates the other, and there is a single use in a successor of the
  // dominant def.
  // Later we will spot any WWM code inside
  // the "then" clause and turn the second def into a partial def so its
  // liveness goes through the WWM code in the "then" clause.
  if (Defs.size() == 2) {
    auto DomDefBlock = Defs[0]->getParent();
    if (DomDefBlock->succ_size() == 2 && MRI->hasOneUse(Reg)) {
      auto UseBlock = MRI->use_begin(Reg)->getParent()->getParent();
      for (auto Succ : DomDefBlock->successors()) {
        if (Succ == UseBlock) {
          LLVM_DEBUG(dbgs() << printReg(Reg, TRI) << " is a then phi reg\n");
          ThenDefs.push_back(&DefOpnd);
          return;
        }
      }
    }
  }
  // Check for the case of a non-lowered-phi register (single def) that exits
  // a loop, that is, it has a use that is outside a loop that the def is
  // inside. We find the outermost loop that the def is inside but a use is
  // outside. Later we will spot any WWM code inside that loop and then make
  // the def a partial def so its liveness goes round the loop and through the
  // WWM code.
  if (Defs.size() == 1) {
    auto Loop = LoopInfo->getLoopFor(Defs[0]->getParent());
    if (!Loop)
      return;
    bool IsLoopExit = false;
    for (auto &Use : MRI->use_instructions(Reg)) {
      auto UseBlock = Use.getParent();
      if (Loop->contains(UseBlock))
        continue;
      IsLoopExit = true;
      while (auto Parent = Loop->getParentLoop()) {
        if (Parent->contains(UseBlock))
          break;
        Loop = Parent;
      }
    }
    if (!IsLoopExit)
      return;
    LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
        << " is a loop exit reg with loop header at "
        << "bb." << Loop->getHeader()->getNumber() << "\n");
    LoopExitDefs.push_back(std::pair<MachineOperand *, MachineLoop *>(
            &DefOpnd, Loop));
    return;
  }
  // Check for the case of a lowered single-preheader-loop phi, that is, a
  // multi-def register where the dominating def is in the loop pre-header and
  // all other defs are in backedges. Later we will spot any WWM code inside
  // that loop and then make the backedge defs partial defs so the liveness
  // goes through the WWM code.
  // Note that we are ignoring multi-preheader loops on the basis that the
  // structurizer does not allow that for non-uniform loops.
  // There must be a single use in the loop header.
  if (!MRI->hasOneUse(Reg))
    return;
  auto UseBlock = MRI->use_begin(Reg)->getParent()->getParent();
  auto Loop = LoopInfo->getLoopFor(UseBlock);
  if (!Loop || Loop->getHeader() != UseBlock
      || Loop->contains(Defs[0]->getParent())) {
    LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
        << " is multi-def but single use not in loop header\n");
    return;
  }
  for (unsigned I = 1; I != Defs.size(); ++I) {
    if (!Loop->contains(Defs[I]->getParent()))
      return;
  }
  LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
      << " is a loop phi reg with loop header at "
      << "bb." << Loop->getHeader()->getNumber() << "\n");
  LoopPhiDefs.push_back(
      std::pair<MachineOperand *, MachineLoop *>(&DefOpnd, Loop));
}

// Process a then phi def: It has two defs, one dominates the other, and there
// is a single use in a successor of the dominant def. Here we spot any WWM
// code inside the "then" clause and turn the second def into a partial def so
// its liveness goes through the WWM code in the "then" clause.
bool SIFixWWMLiveness::processThenDef(MachineOperand *DefOpnd) {
  LLVM_DEBUG(dbgs() << "Processing then def: " << *DefOpnd->getParent());
  if (DefOpnd->getParent()->getOpcode() == TargetOpcode::IMPLICIT_DEF) {
    // Ignore if dominating def is undef.
    LLVM_DEBUG(dbgs() << "  ignoring as dominating def is undef\n");
    return false;
  }
  unsigned Reg = DefOpnd->getReg();
  // Get the use block, which is the endif block.
  auto UseBlock = MRI->use_instr_begin(Reg)->getParent();
  // Check whether there is WWM code inside the then branch. The WWM code must
  // be dominated by the if but not dominated by the endif.
  bool ContainsWWM = false;
  for (auto WWM : WWMs) {
    if (DomTree->dominates(DefOpnd->getParent()->getParent(), WWM->getParent())
        && !DomTree->dominates(UseBlock, WWM->getParent())) {
      LLVM_DEBUG(dbgs() << "  contains WWM: " << *WWM);
      ContainsWWM = true;
      break;
    }
  }
  if (!ContainsWWM)
    return false;
  // Get the other def.
  MachineInstr *OtherDef = nullptr;
  for (auto &MI : MRI->def_instructions(Reg)) {
    if (&MI != DefOpnd->getParent())
      OtherDef = &MI;
  }
  // Make it a partial def.
  OtherDef->addOperand(MachineOperand::CreateReg(Reg, false, /*isImp=*/true));
  LLVM_DEBUG(dbgs() << *OtherDef);
  return true;
}

// Process a loop exit def, that is, a register with a single use in a loop
// that has a use outside the loop.  Here we spot any WWM code inside that loop
// and then make the def a partial def so its liveness goes round the loop and
// through the WWM code.
bool SIFixWWMLiveness::processLoopExitDef(MachineOperand *DefOpnd,
      MachineLoop *Loop) {
  LLVM_DEBUG(dbgs() << "Processing loop exit def: " << *DefOpnd->getParent());
  // Check whether there is WWM code inside the loop.
  bool ContainsWWM = false;
  for (auto WWM : WWMs) {
    if (Loop->contains(WWM->getParent())) {
      LLVM_DEBUG(dbgs() << "  contains WWM: " << *WWM);
      ContainsWWM = true;
      break;
    }
  }
  if (!ContainsWWM)
    return false;
  unsigned Reg = DefOpnd->getReg();
  // Add a new implicit_def in loop preheader(s).
  for (auto Pred : Loop->getHeader()->predecessors()) {
    if (!Loop->contains(Pred)) {
      auto ImplicitDef = BuildMI(*Pred, Pred->getFirstTerminator(), DebugLoc(),
          TII->get(TargetOpcode::IMPLICIT_DEF), Reg);
      LLVM_DEBUG(dbgs() << *ImplicitDef);
      (void)ImplicitDef;
    }
  }
  // Make the original def partial.
  DefOpnd->getParent()->addOperand(MachineOperand::CreateReg(
          Reg, false, /*isImp=*/true));
  LLVM_DEBUG(dbgs() << *DefOpnd->getParent());
  return true;
}

// Process a loop phi def, that is, a multi-def register where the dominating
// def is in the loop pre-header and all other defs are in backedges. Here we
// spot any WWM code inside that loop and then make the backedge defs partial
// defs so the liveness goes through the WWM code.
bool SIFixWWMLiveness::processLoopPhiDef(MachineOperand *DefOpnd,
      MachineLoop *Loop) {
  LLVM_DEBUG(dbgs() << "Processing loop phi def: " << *DefOpnd->getParent());
  // Check whether there is WWM code inside the loop.
  bool ContainsWWM = false;
  for (auto WWM : WWMs) {
    if (Loop->contains(WWM->getParent())) {
      LLVM_DEBUG(dbgs() << "  contains WWM: " << *WWM);
      ContainsWWM = true;
      break;
    }
  }
  if (!ContainsWWM)
    return false;
  unsigned Reg = DefOpnd->getReg();
  // Remove kill mark from uses.
  for (auto &Use : MRI->use_operands(Reg))
    Use.setIsKill(false);
  // Make all defs except the dominating one partial defs.
  SmallVector<MachineInstr *, 4> Defs;
  for (auto &Def : MRI->def_instructions(Reg))
    Defs.push_back(&Def);
  for (auto Def : Defs) {
    if (DefOpnd->getParent() == Def)
      continue;
    Def->addOperand(MachineOperand::CreateReg(Reg, false, /*isImp=*/true));
    LLVM_DEBUG(dbgs() << *Def);
  }
  return true;
}