llvm.org GIT mirror llvm / 437f3bd
[MCA] Fix wrong definition of ResourceUnitMask in DefaultResourceStrategy. Field ResourceUnitMask was incorrectly defined as a 'const unsigned' mask. It should have been a 64 bit quantity instead. That means, ResourceUnitMask was always implicitly truncated to a 32 bit quantity. This issue has been found by inspection. Surprisingly, that bug was latent, and it never negatively affected any existing upstream targets. This patch fixes the wrong definition of ResourceUnitMask, and adds a bunch of extra debug prints to help debugging potential issues related to invalid processor resource masks. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@350820 91177308-0d34-0410-b5e6-96231b3b80d8 Andrea Di Biagio 1 year, 10 months ago
11 changed file(s) with 52 addition(s) and 32 deletion(s). Raw diff Collapse all Expand all
7171 ///
7272 /// There is one bit set for every available resource unit.
7373 /// It defaults to the value of field ResourceSizeMask in ResourceState.
74 const unsigned ResourceUnitMask;
74 const uint64_t ResourceUnitMask;
7575
7676 /// A simple round-robin selector for processor resource units.
7777 /// Each bit of this mask identifies a sub resource within a group.
334334 // Used to quickly identify groups that own a particular resource unit.
335335 std::vector Resource2Groups;
336336
337 // A table to map processor resource IDs to processor resource masks.
338 SmallVector ProcResID2Mask;
339
337340 // Keeps track of which resources are busy, and how many cycles are left
338341 // before those become usable again.
339342 SmallDenseMap BusyResources;
340
341 // A table to map processor resource IDs to processor resource masks.
342 SmallVector ProcResID2Mask;
343343
344344 // Returns the actual resource unit that will be used.
345345 ResourceRef selectPipe(uint64_t ResourceID);
137137 public:
138138 WriteState(const WriteDescriptor &Desc, unsigned RegID,
139139 bool clearsSuperRegs = false, bool writesZero = false)
140 : WD(&Desc), CyclesLeft(UNKNOWN_CYCLES), RegisterID(RegID),
141 PRFID(0), ClearsSuperRegs(clearsSuperRegs), WritesZero(writesZero),
140 : WD(&Desc), CyclesLeft(UNKNOWN_CYCLES), RegisterID(RegID), PRFID(0),
141 ClearsSuperRegs(clearsSuperRegs), WritesZero(writesZero),
142142 IsEliminated(false), DependentWrite(nullptr), PartialWrite(nullptr),
143143 DependentWriteCyclesLeft(0) {}
144144
154154 void addUser(ReadState *Use, int ReadAdvance);
155155 void addUser(WriteState *Use);
156156
157 unsigned getDependentWriteCyclesLeft() const { return DependentWriteCyclesLeft; }
157 unsigned getDependentWriteCyclesLeft() const {
158 return DependentWriteCyclesLeft;
159 }
158160
159161 unsigned getNumUsers() const {
160162 unsigned NumUsers = Users.size();
347349 InstrDesc() = default;
348350 InstrDesc(const InstrDesc &Other) = delete;
349351 InstrDesc &operator=(const InstrDesc &Other) = delete;
350
351 #ifndef NDEBUG
352 // Original instruction name for debugging purposes.
353 StringRef Name;
354 #endif
355352 };
356353
357354 /// Base class for instructions consumed by the simulation pipeline.
550547 } // namespace mca
551548 } // namespace llvm
552549
553 #endif // LLVM_MCA_INSTRUCTION_H
550 #endif // LLVM_MCA_INSTRUCTION_H
6464
6565 void notifyInstructionIssued(
6666 const InstRef &IR,
67 ArrayRef> Used) const;
67 MutableArrayRef> Used) const;
6868 void notifyInstructionExecuted(const InstRef &IR) const;
6969 void notifyInstructionReady(const InstRef &IR) const;
7070 void notifyResourceAvailable(const ResourceRef &RR) const;
3131 SmallVector Masks;
3232
3333 public:
34 InstructionTables(const MCSchedModel &Model) : Stage(), SM(Model) {
34 InstructionTables(const MCSchedModel &Model)
35 : Stage(), SM(Model), Masks(Model.getNumProcResourceKinds()) {
3536 computeProcResourceMasks(Model, Masks);
3637 }
3738
103103 /// Resource masks are used by the ResourceManager to solve set membership
104104 /// problems with simple bit manipulation operations.
105105 void computeProcResourceMasks(const MCSchedModel &SM,
106 SmallVectorImpl &Masks);
106 MutableArrayRef Masks);
107107
108108 /// Compute the reciprocal block throughput from a set of processor resource
109109 /// cycles. The reciprocal block throughput is computed as the MAX between:
117117 ResourceManager::ResourceManager(const MCSchedModel &SM)
118118 : Resources(SM.getNumProcResourceKinds()),
119119 Strategies(SM.getNumProcResourceKinds()),
120 Resource2Groups(SM.getNumProcResourceKinds(), 0) {
120 Resource2Groups(SM.getNumProcResourceKinds(), 0),
121 ProcResID2Mask(SM.getNumProcResourceKinds()) {
121122 computeProcResourceMasks(SM, ProcResID2Mask);
122123
123124 for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
282283 ResourceRef Pipe = selectPipe(R.first);
283284 use(Pipe);
284285 BusyResources[Pipe] += CS.size();
285 // Replace the resource mask with a valid processor resource index.
286 const ResourceState &RS = *Resources[getResourceStateIndex(Pipe.first)];
287 Pipe.first = RS.getProcResourceID();
288286 Pipes.emplace_back(std::pair(
289287 Pipe, ResourceCycles(CS.size())));
290288 } else {
3030 const llvm::MCInstrAnalysis *mcia)
3131 : STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), FirstCallInst(true),
3232 FirstReturnInst(true) {
33 const MCSchedModel &SM = STI.getSchedModel();
34 ProcResourceMasks.resize(SM.getNumProcResourceKinds());
3335 computeProcResourceMasks(STI.getSchedModel(), ProcResourceMasks);
3436 }
3537
177179
178180 LLVM_DEBUG({
179181 for (const std::pair &R : ID.Resources)
180 dbgs() << "\t\tMask=" << format_hex(R.first, 16) << ", " <<
181 "cy=" << R.second.size() << '\n';
182 dbgs() << "\t\tMask=" << format_hex(R.first, 16) << ", "
183 << "cy=" << R.second.size() << '\n';
182184 for (const uint64_t R : ID.Buffers)
183185 dbgs() << "\t\tBuffer Mask=" << format_hex(R, 16) << '\n';
184186 });
524526 MCI);
525527 }
526528
529 LLVM_DEBUG(dbgs() << "\n\t\tOpcode Name= " << MCII.getName(Opcode) << '\n');
530 LLVM_DEBUG(dbgs() << "\t\tSchedClassID=" << SchedClassID << '\n');
531
527532 // Create a new empty descriptor.
528533 std::unique_ptr ID = llvm::make_unique();
529534 ID->NumMicroOps = SCDesc.NumMicroOps;
558563 populateWrites(*ID, MCI, SchedClassID);
559564 populateReads(*ID, MCI, SchedClassID);
560565
561 #ifndef NDEBUG
562 ID->Name = MCII.getName(Opcode);
563 #endif
564566 LLVM_DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n');
565567 LLVM_DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n');
566568
8282 }
8383
8484 void Pipeline::notifyCycleBegin() {
85 LLVM_DEBUG(dbgs() << "[E] Cycle begin: " << Cycles << '\n');
85 LLVM_DEBUG(dbgs() << "\n[E] Cycle begin: " << Cycles << '\n');
8686 for (HWEventListener *Listener : Listeners)
8787 Listener->onCycleBegin();
8888 }
8989
9090 void Pipeline::notifyCycleEnd() {
91 LLVM_DEBUG(dbgs() << "[E] Cycle end: " << Cycles << "\n\n");
91 LLVM_DEBUG(dbgs() << "[E] Cycle end: " << Cycles << "\n");
9292 for (HWEventListener *Listener : Listeners)
9393 Listener->onCycleEnd();
9494 }
5656 HWS.issueInstruction(IR, Used, Ready);
5757
5858 notifyReservedOrReleasedBuffers(IR, /* Reserved */ false);
59
5960 notifyInstructionIssued(IR, Used);
6061 if (IR.getInstruction()->isExecuted()) {
6162 notifyInstructionExecuted(IR);
183184
184185 void ExecuteStage::notifyInstructionIssued(
185186 const InstRef &IR,
186 ArrayRef> Used) const {
187 MutableArrayRef> Used) const {
187188 LLVM_DEBUG({
188189 dbgs() << "[E] Instruction Issued: #" << IR << '\n';
189190 for (const std::pair &Resource : Used) {
192193 dbgs() << "cycles: " << Resource.second << '\n';
193194 }
194195 });
196
197 // Replace resource masks with valid resource processor IDs.
198 for (std::pair &Use : Used)
199 Use.first.first = HWS.getResourceID(Use.first.first);
200
195201 notifyEvent(HWInstructionIssuedEvent(IR, Used));
196202 }
197203
1818 namespace llvm {
1919 namespace mca {
2020
21 #define DEBUG_TYPE "llvm-mca"
22
2123 void computeProcResourceMasks(const MCSchedModel &SM,
22 SmallVectorImpl &Masks) {
24 MutableArrayRef Masks) {
2325 unsigned ProcResourceID = 0;
2426
27 assert(Masks.size() == SM.getNumProcResourceKinds() &&
28 "Invalid number of elements");
29 // Resource at index 0 is the 'InvalidUnit'. Set an invalid mask for it.
30 Masks[0] = 0;
31
2532 // Create a unique bitmask for every processor resource unit.
26 // Skip resource at index 0, since it always references 'InvalidUnit'.
27 Masks.resize(SM.getNumProcResourceKinds());
2833 for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
2934 const MCProcResourceDesc &Desc = *SM.getProcResource(I);
3035 if (Desc.SubUnitsIdxBegin)
4550 }
4651 ProcResourceID++;
4752 }
53
54 #ifndef NDEBUG
55 LLVM_DEBUG(dbgs() << "\nProcessor resource masks:"
56 << "\n");
57 for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
58 const MCProcResourceDesc &Desc = *SM.getProcResource(I);
59 LLVM_DEBUG(dbgs() << '[' << I << "] " << Desc.Name << " - " << Masks[I]
60 << '\n');
61 }
62 #endif
4863 }
4964
5065 double computeBlockRThroughput(const MCSchedModel &SM, unsigned DispatchWidth,
2626 unsigned Width)
2727 : SM(Model), Source(S), DispatchWidth(Width), LastInstructionIdx(0),
2828 TotalCycles(0), NumMicroOps(0),
29 ProcResourceUsage(Model.getNumProcResourceKinds(), 0) {
29 ProcResourceUsage(Model.getNumProcResourceKinds(), 0),
30 ProcResourceMasks(Model.getNumProcResourceKinds()) {
3031 computeProcResourceMasks(SM, ProcResourceMasks);
3132 }
3233