llvm.org GIT mirror llvm / 40a24a8
[llvm-mca] Improved support for dependency-breaking instructions. The tool assumes that a zero-latency instruction that doesn't consume hardware resources is an optimizable dependency-breaking instruction. That means, it doesn't have to wait on register input operands, and it doesn't consume any physical register. The PRF knows how to optimize it at register renaming stage. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@332249 91177308-0d34-0410-b5e6-96231b3b80d8 Andrea Di Biagio 1 year, 5 months ago
4 changed file(s) with 54 addition(s) and 35 deletion(s). Raw diff Collapse all Expand all
9090 }
9191 }
9292
93 void RegisterFile::createNewMappings(IndexPlusCostPairTy Entry,
94 MutableArrayRef UsedPhysRegs) {
93 void RegisterFile::allocatePhysRegs(IndexPlusCostPairTy Entry,
94 MutableArrayRef UsedPhysRegs) {
9595 unsigned RegisterFileIndex = Entry.first;
9696 unsigned Cost = Entry.second;
9797 if (RegisterFileIndex) {
105105 UsedPhysRegs[0] += Cost;
106106 }
107107
108 void RegisterFile::removeMappings(IndexPlusCostPairTy Entry,
109 MutableArrayRef FreedPhysRegs) {
108 void RegisterFile::freePhysRegs(IndexPlusCostPairTy Entry,
109 MutableArrayRef FreedPhysRegs) {
110110 unsigned RegisterFileIndex = Entry.first;
111111 unsigned Cost = Entry.second;
112112 if (RegisterFileIndex) {
120120 FreedPhysRegs[0] += Cost;
121121 }
122122
123 void RegisterFile::addRegisterMapping(WriteState &WS,
124 MutableArrayRef UsedPhysRegs) {
123 void RegisterFile::addRegisterWrite(WriteState &WS,
124 MutableArrayRef UsedPhysRegs,
125 bool ShouldAllocatePhysRegs) {
125126 unsigned RegID = WS.getRegisterID();
126127 assert(RegID && "Adding an invalid register definition?");
127128
130131 for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I)
131132 RegisterMappings[*I].first = &WS;
132133
133 createNewMappings(Mapping.second, UsedPhysRegs);
134 // No physical registers are allocated for instructions that are optimized in
135 // hardware. For example, zero-latency data-dependency breaking instructions
136 // don't consume physical registers.
137 if (ShouldAllocatePhysRegs)
138 allocatePhysRegs(Mapping.second, UsedPhysRegs);
134139
135140 // If this is a partial update, then we are done.
136141 if (!WS.fullyUpdatesSuperRegs())
140145 RegisterMappings[*I].first = &WS;
141146 }
142147
143 void RegisterFile::invalidateRegisterMapping(
144 const WriteState &WS, MutableArrayRef FreedPhysRegs) {
148 void RegisterFile::removeRegisterWrite(
149 const WriteState &WS, MutableArrayRef FreedPhysRegs,
150 bool ShouldFreePhysRegs) {
145151 unsigned RegID = WS.getRegisterID();
146152 bool ShouldInvalidateSuperRegs = WS.fullyUpdatesSuperRegs();
147153
153159 if (!Mapping.first)
154160 return;
155161
156 removeMappings(Mapping.second, FreedPhysRegs);
162 if (ShouldFreePhysRegs)
163 freePhysRegs(Mapping.second, FreedPhysRegs);
157164
158165 if (Mapping.first == &WS)
159166 Mapping.first = nullptr;
260267 void DispatchUnit::notifyInstructionRetired(const InstRef &IR) {
261268 LLVM_DEBUG(dbgs() << "[E] Instruction Retired: " << IR << '\n');
262269 SmallVector FreedRegs(RAT->getNumRegisterFiles());
270 const InstrDesc &Desc = IR.getInstruction()->getDesc();
271
263272 for (const std::unique_ptr &WS : IR.getInstruction()->getDefs())
264 RAT->invalidateRegisterMapping(*WS.get(), FreedRegs);
273 RAT->removeRegisterWrite(*WS.get(), FreedRegs, !Desc.isZeroLatency());
265274 Owner->notifyInstructionEvent(HWInstructionRetiredEvent(IR, FreedRegs));
266275 Owner->eraseInstruction(IR);
267276 }
338347 AvailableEntries -= NumMicroOps;
339348 }
340349
341 // Update RAW dependencies if this instruction is not a zero-latency
342 // instruction. The assumption is that a zero-latency instruction doesn't
343 // require to be issued to the scheduler for execution. More importantly, it
344 // doesn't have to wait on the register input operands.
345 if (Desc.MaxLatency || !Desc.Resources.empty())
350 // A dependency-breaking instruction doesn't have to wait on the register
351 // input operands, and it is often optimized at register renaming stage.
352 // Update RAW dependencies if this instruction is not a dependency-breaking
353 // instruction. A dependency-breaking instruction is a zero-latency
354 // instruction that doesn't consume hardware resources.
355 // An example of dependency-breaking instruction on X86 is a zero-idiom XOR.
356 if (!Desc.isZeroLatency())
346357 for (std::unique_ptr &RS : IS.getUses())
347358 updateRAWDependencies(*RS, STI);
348359
349 // Allocate new mappings.
360 // By default, a dependency-breaking zero-latency instruction is expected to
361 // be optimized at register renaming stage. That means, no physical register
362 // is allocated to the instruction.
350363 SmallVector RegisterFiles(RAT->getNumRegisterFiles());
351364 for (std::unique_ptr &WS : IS.getDefs())
352 RAT->addRegisterMapping(*WS, RegisterFiles);
365 RAT->addRegisterWrite(*WS, RegisterFiles, !Desc.isZeroLatency());
353366
354367 // Reserve slots in the RCU, and notify the instruction that it has been
355368 // dispatched to the schedulers for execution.
103103
104104 // Allocates register mappings in register file specified by the
105105 // IndexPlusCostPairTy object. This method is called from addRegisterMapping.
106 void createNewMappings(IndexPlusCostPairTy IPC,
107 llvm::MutableArrayRef UsedPhysRegs);
106 void allocatePhysRegs(IndexPlusCostPairTy IPC,
107 llvm::MutableArrayRef UsedPhysRegs);
108108
109109 // Removes a previously allocated mapping from the register file referenced
110110 // by the IndexPlusCostPairTy object. This method is called from
111111 // invalidateRegisterMapping.
112 void removeMappings(IndexPlusCostPairTy IPC,
113 llvm::MutableArrayRef FreedPhysRegs);
112 void freePhysRegs(IndexPlusCostPairTy IPC,
113 llvm::MutableArrayRef FreedPhysRegs);
114114
115115 // Create an instance of RegisterMappingTracker for every register file
116116 // specified by the processor model.
125125 initialize(SM, NumRegs);
126126 }
127127
128 // Creates a new register mapping for RegID.
129 // This reserves a microarchitectural register in every register file that
130 // contains RegID.
131 void addRegisterMapping(WriteState &WS,
132 llvm::MutableArrayRef UsedPhysRegs);
133
134 // Invalidates register mappings associated to the input WriteState object.
135 // This releases previously allocated mappings for the physical register
136 // associated to the WriteState.
137 void invalidateRegisterMapping(const WriteState &WS,
138 llvm::MutableArrayRef FreedPhysRegs);
128 // This method updates the data dependency graph by inserting a new register
129 // definition. This method is also responsible for updating the number of used
130 // physical registers in the register file(s). The number of physical
131 // registers is updated only if flag ShouldAllocatePhysRegs is set.
132 void addRegisterWrite(WriteState &WS,
133 llvm::MutableArrayRef UsedPhysRegs,
134 bool ShouldAllocatePhysRegs = true);
135
136 // Updates the data dependency graph by removing a write. It also updates the
137 // internal state of the register file(s) by freeing physical registers.
138 // The number of physical registers is updated only if flag ShouldFreePhysRegs
139 // is set.
140 void removeRegisterWrite(const WriteState &WS,
141 llvm::MutableArrayRef FreedPhysRegs,
142 bool ShouldFreePhysRegs = true);
139143
140144 // Checks if there are enough microarchitectural registers in the register
141145 // files. Returns a "response mask" where each bit is the response from a
268268 bool MayLoad;
269269 bool MayStore;
270270 bool HasSideEffects;
271
272 // A zero latency instruction doesn't consume any scheduler resources.
273 bool isZeroLatency() const { return !MaxLatency && Resources.empty(); }
271274 };
272275
273276 /// An instruction dispatched to the out-of-order backend.
259259 // targets, zero-idiom instructions (for example: a xor that clears the value
260260 // of a register) are treated speacially, and are often eliminated at register
261261 // renaming stage.
262 bool IsZeroLatency = !Desc.MaxLatency && Desc.Resources.empty();
263262
264263 // Instructions that use an in-order dispatch/issue processor resource must be
265264 // issued immediately to the pipeline(s). Any other in-order buffered
266265 // resources (i.e. BufferSize=1) is consumed.
267266
268 if (!IsZeroLatency && !Resources->mustIssueImmediately(Desc)) {
267 if (!Desc.isZeroLatency() && !Resources->mustIssueImmediately(Desc)) {
269268 LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding " << IR
270269 << " to the Ready Queue\n");
271270 ReadyQueue[IR.getSourceIndex()] = IR.getInstruction();