llvm.org GIT mirror llvm / daf3fe1
[llvm-mca] Add extra counters for move elimination in view RegisterFileStatistics. This patch teaches view RegisterFileStatistics how to report events for optimizable register moves. For each processor register file, view RegisterFileStatistics reports the following extra information: - Number of optimizable register moves - Number of register moves eliminated - Number of zero moves (i.e. register moves that propagate a zero) - Max Number of moves eliminated per cycle. Differential Revision: https://reviews.llvm.org/D53976 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@345865 91177308-0d34-0410-b5e6-96231b3b80d8 Andrea Di Biagio 10 months ago
12 changed file(s) with 183 addition(s) and 47 deletion(s). Raw diff Collapse all Expand all
3838 # CHECK-NEXT: Number of physical registers: 72
3939 # CHECK-NEXT: Total number of mappings created: 3
4040 # CHECK-NEXT: Max number of mappings used: 3
41 # CHECK-NEXT: Number of optimizable moves: 3
42 # CHECK-NEXT: Number of moves eliminated: 3 (100.0%)
43 # CHECK-NEXT: Number of zero moves: 3 (100.0%)
44 # CHECK-NEXT: Max moves eliminated per cycle: 1
4145
4246 # CHECK: * Register File #2 -- JIntegerPRF:
4347 # CHECK-NEXT: Number of physical registers: 64
4848 # CHECK-NEXT: Number of physical registers: 72
4949 # CHECK-NEXT: Total number of mappings created: 0
5050 # CHECK-NEXT: Max number of mappings used: 0
51 # CHECK-NEXT: Number of optimizable moves: 21
52 # CHECK-NEXT: Number of moves eliminated: 21 (100.0%)
53 # CHECK-NEXT: Number of zero moves: 21 (100.0%)
54 # CHECK-NEXT: Max moves eliminated per cycle: 2
5155
5256 # CHECK: * Register File #2 -- JIntegerPRF:
5357 # CHECK-NEXT: Number of physical registers: 64
4343 # CHECK-NEXT: Number of physical registers: 72
4444 # CHECK-NEXT: Total number of mappings created: 0
4545 # CHECK-NEXT: Max number of mappings used: 0
46 # CHECK-NEXT: Number of optimizable moves: 18
47 # CHECK-NEXT: Number of moves eliminated: 18 (100.0%)
48 # CHECK-NEXT: Number of zero moves: 18 (100.0%)
49 # CHECK-NEXT: Max moves eliminated per cycle: 2
4650
4751 # CHECK: * Register File #2 -- JIntegerPRF:
4852 # CHECK-NEXT: Number of physical registers: 64
4444 # CHECK-NEXT: Number of physical registers: 64
4545 # CHECK-NEXT: Total number of mappings created: 0
4646 # CHECK-NEXT: Max number of mappings used: 0
47 # CHECK-NEXT: Number of optimizable moves: 12
48 # CHECK-NEXT: Number of moves eliminated: 12 (100.0%)
49 # CHECK-NEXT: Number of zero moves: 12 (100.0%)
50 # CHECK-NEXT: Max moves eliminated per cycle: 2
4751
4852 # CHECK: Resources:
4953 # CHECK-NEXT: [0] - JALU0
4444 # CHECK-NEXT: Number of physical registers: 64
4545 # CHECK-NEXT: Total number of mappings created: 0
4646 # CHECK-NEXT: Max number of mappings used: 0
47 # CHECK-NEXT: Number of optimizable moves: 12
48 # CHECK-NEXT: Number of moves eliminated: 12 (100.0%)
49 # CHECK-NEXT: Number of zero moves: 12 (100.0%)
50 # CHECK-NEXT: Max moves eliminated per cycle: 2
4751
4852 # CHECK: Resources:
4953 # CHECK-NEXT: [0] - JALU0
2020 RegisterFileStatistics::RegisterFileStatistics(const MCSubtargetInfo &sti)
2121 : STI(sti) {
2222 const MCSchedModel &SM = STI.getSchedModel();
23 RegisterFileUsage Empty = {0, 0, 0};
23 RegisterFileUsage RFUEmpty = {0, 0, 0};
24 MoveEliminationInfo MEIEmpty = {0, 0, 0, 0, 0};
2425 if (!SM.hasExtraProcessorInfo()) {
2526 // Assume a single register file.
26 RegisterFiles.emplace_back(Empty);
27 PRFUsage.emplace_back(RFUEmpty);
28 MoveElimInfo.emplace_back(MEIEmpty);
2729 return;
2830 }
2931
3436 // be skipped. If there are no user defined register files, then reserve a
3537 // single entry for the default register file at index #0.
3638 unsigned NumRegFiles = std::max(PI.NumRegisterFiles, 1U);
37 RegisterFiles.resize(NumRegFiles);
38 std::fill(RegisterFiles.begin(), RegisterFiles.end(), Empty);
39
40 PRFUsage.resize(NumRegFiles);
41 std::fill(PRFUsage.begin(), PRFUsage.end(), RFUEmpty);
42
43 MoveElimInfo.resize(NumRegFiles);
44 std::fill(MoveElimInfo.begin(), MoveElimInfo.end(), MEIEmpty);
45 }
46
47 void RegisterFileStatistics::updateRegisterFileUsage(
48 ArrayRef UsedPhysRegs) {
49 for (unsigned I = 0, E = PRFUsage.size(); I < E; ++I) {
50 RegisterFileUsage &RFU = PRFUsage[I];
51 unsigned NumUsedPhysRegs = UsedPhysRegs[I];
52 RFU.CurrentlyUsedMappings += NumUsedPhysRegs;
53 RFU.TotalMappings += NumUsedPhysRegs;
54 RFU.MaxUsedMappings =
55 std::max(RFU.MaxUsedMappings, RFU.CurrentlyUsedMappings);
56 }
57 }
58
59 void RegisterFileStatistics::updateMoveElimInfo(const Instruction &Inst) {
60 if (!Inst.isOptimizableMove())
61 return;
62
63 assert(Inst.getDefs().size() == 1 && "Expected a single definition!");
64 assert(Inst.getUses().size() == 1 && "Expected a single register use!");
65 const WriteState &WS = Inst.getDefs()[0];
66 const ReadState &RS = Inst.getUses()[0];
67
68 MoveEliminationInfo &Info =
69 MoveElimInfo[Inst.getDefs()[0].getRegisterFileID()];
70 Info.TotalMoveEliminationCandidates++;
71 if (WS.isEliminated())
72 Info.CurrentMovesEliminated++;
73 if (WS.isWriteZero() && RS.isReadZero())
74 Info.TotalMovesThatPropagateZero++;
3975 }
4076
4177 void RegisterFileStatistics::onEvent(const HWInstructionEvent &Event) {
4480 break;
4581 case HWInstructionEvent::Retired: {
4682 const auto &RE = static_cast(Event);
47 for (unsigned I = 0, E = RegisterFiles.size(); I < E; ++I)
48 RegisterFiles[I].CurrentlyUsedMappings -= RE.FreedPhysRegs[I];
83 for (unsigned I = 0, E = PRFUsage.size(); I < E; ++I)
84 PRFUsage[I].CurrentlyUsedMappings -= RE.FreedPhysRegs[I];
4985 break;
5086 }
5187 case HWInstructionEvent::Dispatched: {
5288 const auto &DE = static_cast(Event);
53 for (unsigned I = 0, E = RegisterFiles.size(); I < E; ++I) {
54 RegisterFileUsage &RFU = RegisterFiles[I];
55 unsigned NumUsedPhysRegs = DE.UsedPhysRegs[I];
56 RFU.CurrentlyUsedMappings += NumUsedPhysRegs;
57 RFU.TotalMappings += NumUsedPhysRegs;
58 RFU.MaxUsedMappings =
59 std::max(RFU.MaxUsedMappings, RFU.CurrentlyUsedMappings);
60 }
89 updateRegisterFileUsage(DE.UsedPhysRegs);
90 updateMoveElimInfo(*DE.IR.getInstruction());
6191 }
92 }
93 }
94
95 void RegisterFileStatistics::onCycleEnd() {
96 for (MoveEliminationInfo &MEI : MoveElimInfo) {
97 unsigned &CurrentMax = MEI.MaxMovesEliminatedPerCycle;
98 CurrentMax = std::max(CurrentMax, MEI.CurrentMovesEliminated);
99 MEI.TotalMovesEliminated += MEI.CurrentMovesEliminated;
100 MEI.CurrentMovesEliminated = 0;
62101 }
63102 }
64103
67106 raw_string_ostream TempStream(Buffer);
68107
69108 TempStream << "\n\nRegister File statistics:";
70 const RegisterFileUsage &GlobalUsage = RegisterFiles[0];
109 const RegisterFileUsage &GlobalUsage = PRFUsage[0];
71110 TempStream << "\nTotal number of mappings created: "
72111 << GlobalUsage.TotalMappings;
73112 TempStream << "\nMax number of mappings used: "
74113 << GlobalUsage.MaxUsedMappings << '\n';
75114
76 for (unsigned I = 1, E = RegisterFiles.size(); I < E; ++I) {
77 const RegisterFileUsage &RFU = RegisterFiles[I];
115 for (unsigned I = 1, E = PRFUsage.size(); I < E; ++I) {
116 const RegisterFileUsage &RFU = PRFUsage[I];
78117 // Obtain the register file descriptor from the scheduling model.
79118 assert(STI.getSchedModel().hasExtraProcessorInfo() &&
80119 "Unable to find register file info!");
97136 << RFU.TotalMappings;
98137 TempStream << "\n Max number of mappings used: "
99138 << RFU.MaxUsedMappings << '\n';
139 const MoveEliminationInfo &MEI = MoveElimInfo[I];
140
141 if (MEI.TotalMoveEliminationCandidates) {
142 TempStream << " Number of optimizable moves: "
143 << MEI.TotalMoveEliminationCandidates;
144 double EliminatedMovProportion = (double)MEI.TotalMovesEliminated /
145 MEI.TotalMoveEliminationCandidates *
146 100.0;
147 double ZeroMovProportion = (double)MEI.TotalMovesThatPropagateZero /
148 MEI.TotalMoveEliminationCandidates * 100.0;
149 TempStream << "\n Number of moves eliminated: "
150 << MEI.TotalMovesEliminated << " "
151 << format("(%.1f%%)",
152 floor((EliminatedMovProportion * 10) + 0.5) / 10);
153 TempStream << "\n Number of zero moves: "
154 << MEI.TotalMovesThatPropagateZero << " "
155 << format("(%.1f%%)",
156 floor((ZeroMovProportion * 10) + 0.5) / 10);
157 TempStream << "\n Max moves eliminated per cycle: "
158 << MEI.MaxMovesEliminatedPerCycle << '\n';
159 }
100160 }
101161
102162 TempStream.flush();
2020 /// Number of physical registers: 72
2121 /// Total number of mappings created: 0
2222 /// Max number of mappings used: 0
23 /// Number of optimizable moves: 200
24 /// Number of moves eliminated: 200 (100.0%)
25 /// Number of zero moves: 200 (100.0%)
26 /// Max moves eliminated per cycle: 2
2327 ///
2428 /// * Register File #2 -- IntegerPRF:
2529 /// Number of physical registers: 64
4852 unsigned CurrentlyUsedMappings;
4953 };
5054
55 struct MoveEliminationInfo {
56 unsigned TotalMoveEliminationCandidates;
57 unsigned TotalMovesEliminated;
58 unsigned TotalMovesThatPropagateZero;
59 unsigned MaxMovesEliminatedPerCycle;
60 unsigned CurrentMovesEliminated;
61 };
62
5163 // There is one entry for each register file implemented by the processor.
52 llvm::SmallVector RegisterFiles;
64 llvm::SmallVector PRFUsage;
65 llvm::SmallVector MoveElimInfo;
66
67 void updateRegisterFileUsage(ArrayRef UsedPhysRegs);
68 void updateMoveElimInfo(const Instruction &Inst);
5369
5470 public:
5571 RegisterFileStatistics(const llvm::MCSubtargetInfo &sti);
5672
73 void onCycleEnd() override;
5774 void onEvent(const HWInstructionEvent &Event) override;
5875 void printView(llvm::raw_ostream &OS) const override;
5976 };
172172 void freePhysRegs(const RegisterRenamingInfo &Entry,
173173 MutableArrayRef FreedPhysRegs);
174174
175 // Collects writes that are in a RAW dependency with RS.
176 // This method is called from `addRegisterRead()`.
177 void collectWrites(const ReadState &RS,
178 SmallVectorImpl &Writes) const;
179
175180 // Create an instance of RegisterMappingTracker for every register file
176181 // specified by the processor model.
177182 // If no register file is specified, then this method creates a default
188193 // No physical regiser is allocated if this write is from a zero-idiom.
189194 void addRegisterWrite(WriteRef Write, MutableArrayRef UsedPhysRegs);
190195
196 // Collect writes that are in a data dependency with RS, and update RS
197 // internal state.
198 void addRegisterRead(ReadState &RS, SmallVectorImpl &Writes) const;
199
191200 // Removes write \param WS from the register mappings.
192201 // Physical registers may be released to reflect this update.
193202 // No registers are released if this write is from a zero-idiom.
199208 // If RS is a read from a zero register, and WS is eliminated, then
200209 // `WS.WritesZero` is also set, so that method addRegisterWrite() would not
201210 // reserve a physical register for it.
202 bool tryEliminateMove(WriteState &WS, const ReadState &RS);
211 bool tryEliminateMove(WriteState &WS, ReadState &RS);
203212
204213 // Checks if there are enough physical registers in the register files.
205214 // Returns a "response mask" where each bit represents the response from a
211220 // Current implementation can simulate up to 32 register files (including the
212221 // special register file at index #0).
213222 unsigned isAvailable(ArrayRef Regs) const;
214 void collectWrites(SmallVectorImpl &Writes, unsigned RegID) const;
223
224 // Returns the number of PRFs implemented by this processor.
215225 unsigned getNumRegisterFiles() const { return RegisterFiles.size(); }
216226
217227 // Notify each PRF that a new cycle just started.
100100 // field RegisterID from WD.
101101 unsigned RegisterID;
102102
103 // Physical register file that serves register RegisterID.
104 unsigned PRFID;
105
103106 // True if this write implicitly clears the upper portion of RegisterID's
104107 // super-registers.
105108 bool ClearsSuperRegs;
134137 WriteState(const WriteDescriptor &Desc, unsigned RegID,
135138 bool clearsSuperRegs = false, bool writesZero = false)
136139 : WD(&Desc), CyclesLeft(UNKNOWN_CYCLES), RegisterID(RegID),
137 ClearsSuperRegs(clearsSuperRegs), WritesZero(writesZero),
140 PRFID(0), ClearsSuperRegs(clearsSuperRegs), WritesZero(writesZero),
138141 IsEliminated(false), DependentWrite(nullptr), NumWriteUsers(0U) {}
139142
140143 WriteState(const WriteState &Other) = default;
143146 int getCyclesLeft() const { return CyclesLeft; }
144147 unsigned getWriteResourceID() const { return WD->SClassOrWriteResourceID; }
145148 unsigned getRegisterID() const { return RegisterID; }
149 unsigned getRegisterFileID() const { return PRFID; }
146150 unsigned getLatency() const { return WD->Latency; }
147151
148152 void addUser(ReadState *Use, int ReadAdvance);
167171 IsEliminated = true;
168172 }
169173
174 void setPRF(unsigned PRF) { PRFID = PRF; }
175
170176 // On every cycle, update CyclesLeft and notify dependent users.
171177 void cycleEvent();
172178 void onInstructionIssued();
184190 const ReadDescriptor *RD;
185191 // Physical register identified associated to this read.
186192 unsigned RegisterID;
193 // Physical register file that serves register RegisterID.
194 unsigned PRFID;
187195 // Number of writes that contribute to the definition of RegisterID.
188196 // In the absence of partial register updates, the number of DependentWrites
189197 // cannot be more than one.
200208 // This field is set to true only if there are no dependent writes, and
201209 // there are no `CyclesLeft' to wait.
202210 bool IsReady;
211 // True if this is a read from a known zero register.
212 bool IsZero;
203213 // True if this register read is from a dependency-breaking instruction.
204214 bool IndependentFromDef;
205215
206216 public:
207217 ReadState(const ReadDescriptor &Desc, unsigned RegID)
208 : RD(&Desc), RegisterID(RegID), DependentWrites(0),
218 : RD(&Desc), RegisterID(RegID), PRFID(0), DependentWrites(0),
209219 CyclesLeft(UNKNOWN_CYCLES), TotalCycles(0), IsReady(true),
210 IndependentFromDef(false) {}
220 IsZero(false), IndependentFromDef(false) {}
211221
212222 const ReadDescriptor &getDescriptor() const { return *RD; }
213223 unsigned getSchedClass() const { return RD->SchedClassID; }
214224 unsigned getRegisterID() const { return RegisterID; }
225 unsigned getRegisterFileID() const { return PRFID; }
215226
216227 bool isReady() const { return IsReady; }
217228 bool isImplicitRead() const { return RD->isImplicitRead(); }
225236 DependentWrites = Writes;
226237 IsReady = !Writes;
227238 }
239
240 bool isReadZero() const { return IsZero; }
241 void setReadZero() { IsZero = true; }
242 void setPRF(unsigned ID) { PRFID = ID; }
228243 };
229244
230245 /// A sequence of cycles.
6767 ArrayRef UsedPhysRegs,
6868 unsigned uOps) const;
6969
70 void collectWrites(SmallVectorImpl &Vec, unsigned RegID) const {
71 return PRF.collectWrites(Vec, RegID);
72 }
73
7470 public:
7571 DispatchStage(const MCSubtargetInfo &Subtarget, const MCRegisterInfo &MRI,
7672 unsigned MaxDispatchWidth, RetireControlUnit &R,
172172 bool IsEliminated = WS.isEliminated();
173173 bool ShouldAllocatePhysRegs = !IsWriteZero && !IsEliminated;
174174 const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second;
175 WS.setPRF(RRI.IndexPlusCost.first);
175176
176177 if (RRI.RenameAs && RRI.RenameAs != RegID) {
177178 RegID = RRI.RenameAs;
216217 RegisterMappings[*I].second.AliasRegID = 0U;
217218 }
218219
219 // No physical registers are allocated for instructions that are optimized in
220 // hardware. For example, zero-latency data-dependency breaking instructions
221 // don't consume physical registers.
220 // No physical registers are allocated for instructions that are optimized
221 // in hardware. For example, zero-latency data-dependency breaking
222 // instructions don't consume physical registers.
222223 if (ShouldAllocatePhysRegs)
223224 allocatePhysRegs(RegisterMappings[RegID].second, UsedPhysRegs);
224225 }
287288 }
288289 }
289290
290 bool RegisterFile::tryEliminateMove(WriteState &WS, const ReadState &RS) {
291 bool RegisterFile::tryEliminateMove(WriteState &WS, ReadState &RS) {
291292 const RegisterMapping &RMFrom = RegisterMappings[RS.getRegisterID()];
292293 const RegisterMapping &RMTo = RegisterMappings[WS.getRegisterID()];
293294
348349 }
349350
350351 RMT.NumMoveEliminated++;
351 if (IsZeroMove)
352 if (IsZeroMove) {
352353 WS.setWriteZero();
354 RS.setReadZero();
355 }
353356 WS.setEliminated();
354357
355358 return true;
356359 }
357360
358 void RegisterFile::collectWrites(SmallVectorImpl &Writes,
359 unsigned RegID) const {
361 void RegisterFile::collectWrites(const ReadState &RS,
362 SmallVectorImpl &Writes) const {
363 unsigned RegID = RS.getRegisterID();
360364 assert(RegID && RegID < RegisterMappings.size());
361365 LLVM_DEBUG(dbgs() << "RegisterFile: collecting writes for register "
362366 << MRI.getName(RegID) << '\n');
378382 }
379383
380384 // Remove duplicate entries and resize the input vector.
381 sort(Writes, [](const WriteRef &Lhs, const WriteRef &Rhs) {
382 return Lhs.getWriteState() < Rhs.getWriteState();
383 });
384 auto It = std::unique(Writes.begin(), Writes.end());
385 Writes.resize(std::distance(Writes.begin(), It));
385 if (Writes.size() > 1) {
386 sort(Writes, [](const WriteRef &Lhs, const WriteRef &Rhs) {
387 return Lhs.getWriteState() < Rhs.getWriteState();
388 });
389 auto It = std::unique(Writes.begin(), Writes.end());
390 Writes.resize(std::distance(Writes.begin(), It));
391 }
386392
387393 LLVM_DEBUG({
388394 for (const WriteRef &WR : Writes) {
392398 << WR.getSourceIndex() << ")\n";
393399 }
394400 });
401 }
402
403 void RegisterFile::addRegisterRead(ReadState &RS,
404 SmallVectorImpl &Defs) const {
405 unsigned RegID = RS.getRegisterID();
406 const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second;
407 RS.setPRF(RRI.IndexPlusCost.first);
408 if (RS.isIndependentFromDef())
409 return;
410
411 if (ZeroRegisters[RS.getRegisterID()])
412 RS.setReadZero();
413 collectWrites(RS, Defs);
414 RS.setDependentWrites(Defs.size());
395415 }
396416
397417 unsigned RegisterFile::isAvailable(ArrayRef Regs) const {
6666 const MCSubtargetInfo &STI) {
6767 SmallVector DependentWrites;
6868
69 collectWrites(DependentWrites, RS.getRegisterID());
70 RS.setDependentWrites(DependentWrites.size());
69 // Collect all the dependent writes, and update RS internal state.
70 PRF.addRegisterRead(RS, DependentWrites);
71
7172 // We know that this read depends on all the writes in DependentWrites.
7273 // For each write, check if we have ReadAdvance information, and use it
7374 // to figure out in how many cycles this read becomes available.
115116 // We also don't update data dependencies for instructions that have been
116117 // eliminated at register renaming stage.
117118 if (!IsEliminated) {
118 for (ReadState &RS : IS.getUses()) {
119 if (!RS.isIndependentFromDef())
120 updateRAWDependencies(RS, STI);
121 }
119 for (ReadState &RS : IS.getUses())
120 updateRAWDependencies(RS, STI);
122121 }
123122
124123 // By default, a dependency-breaking zero-idiom is expected to be optimized
126125 // to the instruction.
127126 SmallVector RegisterFiles(PRF.getNumRegisterFiles());
128127 for (WriteState &WS : IS.getDefs())
129 PRF.addRegisterWrite(WriteRef(IR.getSourceIndex(), &WS),
130 RegisterFiles);
128 PRF.addRegisterWrite(WriteRef(IR.getSourceIndex(), &WS), RegisterFiles);
131129
132130 // Reserve slots in the RCU, and notify the instruction that it has been
133131 // dispatched to the schedulers for execution.