llvm.org GIT mirror llvm / a03cc69
[MCA][ResourceManager] Add a table that maps processor resource indices to processor resource identifiers. This patch adds a lookup table to speed up resource queries in the ResourceManager. This patch also moves helper function 'getResourceStateIndex()' from ResourceManager.cpp to Support.h, so that we can reuse that logic in the SummaryView (and potentially other views in llvm-mca). No functional change intended. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@354470 91177308-0d34-0410-b5e6-96231b3b80d8 Andrea Di Biagio 8 months ago
5 changed file(s) with 47 addition(s) and 28 deletion(s). Raw diff Collapse all Expand all
333333 // Used to quickly identify groups that own a particular resource unit.
334334 std::vector Resource2Groups;
335335
336 // A table to map processor resource IDs to processor resource masks.
336 // A table that maps processor resource IDs to processor resource masks.
337337 SmallVector ProcResID2Mask;
338
339 // A table that maps resource indices to actual processor resource IDs in the
340 // scheduling model.
341 SmallVector ResIndex2ProcResID;
338342
339343 // Keeps track of which resources are busy, and how many cycles are left
340344 // before those become usable again.
9393 void computeProcResourceMasks(const MCSchedModel &SM,
9494 MutableArrayRef Masks);
9595
96 // Returns the index of the highest bit set. For resource masks, the position of
97 // the highest bit set can be used to construct a resource mask identifier.
98 inline unsigned getResourceStateIndex(uint64_t Mask) {
99 assert(Mask && "Processor Resource Mask cannot be zero!");
100 return (std::numeric_limits::digits - countLeadingZeros(Mask)) - 1;
101 }
102
96103 /// Compute the reciprocal block throughput from a set of processor resource
97104 /// cycles. The reciprocal block throughput is computed as the MAX between:
98105 /// - NumMicroOps / DispatchWidth
2222 #define DEBUG_TYPE "llvm-mca"
2323 ResourceStrategy::~ResourceStrategy() = default;
2424
25 // Returns the index of the highest bit set. For resource masks, the position of
26 // the highest bit set can be used to construct a resource mask identifier.
27 static unsigned getResourceStateIndex(uint64_t Mask) {
28 return std::numeric_limits::digits - countLeadingZeros(Mask);
29 }
30
3125 static uint64_t selectImpl(uint64_t CandidateMask,
3226 uint64_t &NextInSequenceMask) {
3327 // The upper bit set in CandidateMask identifies our next candidate resource.
34 CandidateMask = 1ULL << (getResourceStateIndex(CandidateMask) - 1);
28 CandidateMask = 1ULL << getResourceStateIndex(CandidateMask);
3529 NextInSequenceMask &= (CandidateMask | (CandidateMask - 1));
3630 return CandidateMask;
3731 }
7367 BufferSize(Desc.BufferSize), IsAGroup(countPopulation(ResourceMask) > 1) {
7468 if (IsAGroup) {
7569 ResourceSizeMask =
76 ResourceMask ^ 1ULL << (getResourceStateIndex(ResourceMask) - 1);
70 ResourceMask ^ 1ULL << getResourceStateIndex(ResourceMask);
7771 } else {
7872 ResourceSizeMask = (1ULL << Desc.NumUnits) - 1;
7973 }
114108 }
115109
116110 ResourceManager::ResourceManager(const MCSchedModel &SM)
117 : Resources(SM.getNumProcResourceKinds()),
118 Strategies(SM.getNumProcResourceKinds()),
119 Resource2Groups(SM.getNumProcResourceKinds(), 0),
120 ProcResID2Mask(SM.getNumProcResourceKinds()), ProcResUnitMask(0),
121 ReservedResourceGroups(0) {
111 : Resources(SM.getNumProcResourceKinds() - 1),
112 Strategies(SM.getNumProcResourceKinds() - 1),
113 Resource2Groups(SM.getNumProcResourceKinds() - 1, 0),
114 ProcResID2Mask(SM.getNumProcResourceKinds(), 0),
115 ResIndex2ProcResID(SM.getNumProcResourceKinds() - 1, 0),
116 ProcResUnitMask(0), ReservedResourceGroups(0) {
122117 computeProcResourceMasks(SM, ProcResID2Mask);
123118
124 for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
119 // initialize vector ResIndex2ProcResID.
120 for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
121 unsigned Index = getResourceStateIndex(ProcResID2Mask[I]);
122 ResIndex2ProcResID[Index] = I;
123 }
124
125 for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
125126 uint64_t Mask = ProcResID2Mask[I];
126127 unsigned Index = getResourceStateIndex(Mask);
127128 Resources[Index] =
129130 Strategies[Index] = getStrategyFor(*Resources[Index]);
130131 }
131132
132 for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
133 for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
133134 uint64_t Mask = ProcResID2Mask[I];
134135 unsigned Index = getResourceStateIndex(Mask);
135136 const ResourceState &RS = *Resources[Index];
138139 continue;
139140 }
140141
141 uint64_t GroupMaskIdx = 1ULL << (Index - 1);
142 uint64_t GroupMaskIdx = 1ULL << Index;
142143 Mask -= GroupMaskIdx;
143144 while (Mask) {
144145 // Extract lowest set isolated bit.
161162 }
162163
163164 unsigned ResourceManager::resolveResourceMask(uint64_t Mask) const {
164 return Resources[getResourceStateIndex(Mask)]->getProcResourceID();
165 return ResIndex2ProcResID[getResourceStateIndex(Mask)];
165166 }
166167
167168 unsigned ResourceManager::getNumUnits(uint64_t ResourceID) const {
331332 }
332333
333334 void ResourceManager::reserveResource(uint64_t ResourceID) {
334 ResourceState &Resource = *Resources[getResourceStateIndex(ResourceID)];
335 const unsigned Index = getResourceStateIndex(ResourceID);
336 ResourceState &Resource = *Resources[Index];
335337 assert(Resource.isAResourceGroup() && !Resource.isReserved() &&
336338 "Unexpected resource found!");
337339 Resource.setReserved();
338 ReservedResourceGroups ^= PowerOf2Floor(ResourceID);
340 ReservedResourceGroups ^= 1ULL << Index;
339341 }
340342
341343 void ResourceManager::releaseResource(uint64_t ResourceID) {
342 ResourceState &Resource = *Resources[getResourceStateIndex(ResourceID)];
344 const unsigned Index = getResourceStateIndex(ResourceID);
345 ResourceState &Resource = *Resources[Index];
343346 Resource.clearReserved();
344347 if (Resource.isAResourceGroup())
345 ReservedResourceGroups ^= PowerOf2Floor(ResourceID);
348 ReservedResourceGroups ^= 1ULL << Index;
346349 }
347350
348351 } // namespace mca
2626 : SM(Model), Source(S), DispatchWidth(Width), LastInstructionIdx(0),
2727 TotalCycles(0), NumMicroOps(0),
2828 ProcResourceUsage(Model.getNumProcResourceKinds(), 0),
29 ProcResourceMasks(Model.getNumProcResourceKinds()) {
29 ProcResourceMasks(Model.getNumProcResourceKinds()),
30 ResIdx2ProcResID(Model.getNumProcResourceKinds(), 0) {
3031 computeProcResourceMasks(SM, ProcResourceMasks);
32 for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
33 unsigned Index = getResourceStateIndex(ProcResourceMasks[I]);
34 ResIdx2ProcResID[Index] = I;
35 }
3136 }
3237
3338 void SummaryView::onEvent(const HWInstructionEvent &Event) {
4954 NumMicroOps += Desc.NumMicroOps;
5055 for (const std::pair &RU : Desc.Resources) {
5156 if (RU.second.size()) {
52 const auto It = find(ProcResourceMasks, RU.first);
53 assert(It != ProcResourceMasks.end() &&
54 "Invalid processor resource mask!");
55 ProcResourceUsage[std::distance(ProcResourceMasks.begin(), It)] +=
56 RU.second.size();
57 unsigned ProcResID = ResIdx2ProcResID[getResourceStateIndex(RU.first)];
58 ProcResourceUsage[ProcResID] += RU.second.size();
5759 }
5860 }
5961 }
5454 // declared by the scheduling model.
5555 llvm::SmallVector ProcResourceMasks;
5656
57 // Used to map resource indices to actual processor resource IDs.
58 llvm::SmallVector ResIdx2ProcResID;
59
5760 // Compute the reciprocal throughput for the analyzed code block.
5861 // The reciprocal block throughput is computed as the MAX between:
5962 // - NumMicroOps / DispatchWidth