llvm.org GIT mirror llvm / 8396e13
Count processor resources individually in MachineTraceMetrics. The new instruction scheduling models provide information about the number of cycles consumed on each processor resource. This makes it possible to estimate ILP more accurately than simply counting instructions / issue width. The functions getResourceDepth() and getResourceLength() now identify the limiting processor resource, and return a cycle count based on that. This gives more precise resource information, particularly in traces that use one resource a lot more than others. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@178553 91177308-0d34-0410-b5e6-96231b3b80d8 Jakob Stoklund Olesen 7 years ago
2 changed file(s) with 169 addition(s) and 9 deletion(s). Raw diff Collapse all Expand all
105105
106106 /// Get the fixed resource information about MBB. Compute it on demand.
107107 const FixedBlockInfo *getResources(const MachineBasicBlock*);
108
109 /// Get the scaled number of cycles used per processor resource in MBB.
110 /// This is an array with SchedModel.getNumProcResourceKinds() entries.
111 /// The getResources() function above must have been called first.
112 ///
113 /// These numbers have already been scaled by SchedModel.getResourceFactor().
114 ArrayRef getProcResourceCycles(unsigned MBBNum) const;
108115
109116 /// A virtual register or regunit required by a basic block or its trace
110117 /// successors.
283290 class Ensemble {
284291 SmallVector BlockInfo;
285292 DenseMap Cycles;
293 SmallVector ProcResourceDepths;
294 SmallVector ProcResourceHeights;
286295 friend class Trace;
287296
288297 void computeTrace(const MachineBasicBlock*);
302311 const MachineLoop *getLoopFor(const MachineBasicBlock*) const;
303312 const TraceBlockInfo *getDepthResources(const MachineBasicBlock*) const;
304313 const TraceBlockInfo *getHeightResources(const MachineBasicBlock*) const;
314 ArrayRef getProcResourceDepths(unsigned MBBNum) const;
315 ArrayRef getProcResourceHeights(unsigned MBBNum) const;
305316
306317 public:
307318 virtual ~Ensemble();
342353 // One entry per basic block, indexed by block number.
343354 SmallVector BlockInfo;
344355
356 // Cycles consumed on each processor resource per block.
357 // The number of processor resource kinds is constant for a given subtarget,
358 // but it is not known at compile time. The number of cycles consumed by
359 // block B on processor resource R is at ProcResourceCycles[B*Kinds + R]
360 // where Kinds = SchedModel.getNumProcResourceKinds().
361 SmallVector ProcResourceCycles;
362
345363 // One ensemble per strategy.
346364 Ensemble* Ensembles[TS_NumStrategies];
365
366 // Convert scaled resource usage to a cycle count that can be compared with
367 // latencies.
368 unsigned getCycles(unsigned Scaled) {
369 unsigned Factor = SchedModel.getLatencyFactor();
370 return (Scaled + Factor - 1) / Factor;
371 }
347372 };
348373
349374 inline raw_ostream &operator<<(raw_ostream &OS,
1717 #include "llvm/CodeGen/Passes.h"
1818 #include "llvm/MC/MCSubtargetInfo.h"
1919 #include "llvm/Support/Debug.h"
20 #include "llvm/Support/Format.h"
2021 #include "llvm/Support/raw_ostream.h"
2122 #include "llvm/Target/TargetInstrInfo.h"
2223 #include "llvm/Target/TargetRegisterInfo.h"
5657 MF->getTarget().getSubtarget();
5758 SchedModel.init(*ST.getSchedModel(), &ST, TII);
5859 BlockInfo.resize(MF->getNumBlockIDs());
60 ProcResourceCycles.resize(MF->getNumBlockIDs() *
61 SchedModel.getNumProcResourceKinds());
5962 return false;
6063 }
6164
8487 return FBI;
8588
8689 // Compute resource usage in the block.
87 // FIXME: Compute per-functional unit counts.
8890 FBI->HasCalls = false;
8991 unsigned InstrCount = 0;
92
93 // Add up per-processor resource cycles as well.
94 unsigned PRKinds = SchedModel.getNumProcResourceKinds();
95 SmallVector PRCycles(PRKinds);
96
9097 for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
9198 I != E; ++I) {
9299 const MachineInstr *MI = I;
95102 ++InstrCount;
96103 if (MI->isCall())
97104 FBI->HasCalls = true;
105
106 // Count processor resources used.
107 const MCSchedClassDesc *SC = SchedModel.resolveSchedClass(MI);
108 if (!SC->isValid())
109 continue;
110
111 for (TargetSchedModel::ProcResIter
112 PI = SchedModel.getWriteProcResBegin(SC),
113 PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
114 assert(PI->ProcResourceIdx < PRKinds && "Bad processor resource kind");
115 PRCycles[PI->ProcResourceIdx] += PI->Cycles;
116 }
98117 }
99118 FBI->InstrCount = InstrCount;
119
120 // Scale the resource cycles so they are comparable.
121 unsigned PROffset = MBB->getNumber() * PRKinds;
122 for (unsigned K = 0; K != PRKinds; ++K)
123 ProcResourceCycles[PROffset + K] =
124 PRCycles[K] * SchedModel.getResourceFactor(K);
125
100126 return FBI;
101127 }
128
129 ArrayRef
130 MachineTraceMetrics::getProcResourceCycles(unsigned MBBNum) const {
131 assert(BlockInfo[MBBNum].hasResources() &&
132 "getResources() must be called before getProcResourceCycles()");
133 unsigned PRKinds = SchedModel.getNumProcResourceKinds();
134 return ArrayRef(&ProcResourceCycles[MBBNum * PRKinds], PRKinds);
135 }
136
102137
103138 //===----------------------------------------------------------------------===//
104139 // Ensemble utility functions
107142 MachineTraceMetrics::Ensemble::Ensemble(MachineTraceMetrics *ct)
108143 : MTM(*ct) {
109144 BlockInfo.resize(MTM.BlockInfo.size());
145 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
146 ProcResourceDepths.resize(MTM.BlockInfo.size() * PRKinds);
147 ProcResourceHeights.resize(MTM.BlockInfo.size() * PRKinds);
110148 }
111149
112150 // Virtual destructor serves as an anchor.
122160 void MachineTraceMetrics::Ensemble::
123161 computeDepthResources(const MachineBasicBlock *MBB) {
124162 TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
163 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
164 unsigned PROffset = MBB->getNumber() * PRKinds;
125165
126166 // Compute resources from trace above. The top block is simple.
127167 if (!TBI->Pred) {
128168 TBI->InstrDepth = 0;
129169 TBI->Head = MBB->getNumber();
170 std::fill(ProcResourceDepths.begin() + PROffset,
171 ProcResourceDepths.begin() + PROffset + PRKinds, 0);
130172 return;
131173 }
132174
133175 // Compute from the block above. A post-order traversal ensures the
134176 // predecessor is always computed first.
135 TraceBlockInfo *PredTBI = &BlockInfo[TBI->Pred->getNumber()];
177 unsigned PredNum = TBI->Pred->getNumber();
178 TraceBlockInfo *PredTBI = &BlockInfo[PredNum];
136179 assert(PredTBI->hasValidDepth() && "Trace above has not been computed yet");
137180 const FixedBlockInfo *PredFBI = MTM.getResources(TBI->Pred);
138181 TBI->InstrDepth = PredTBI->InstrDepth + PredFBI->InstrCount;
139182 TBI->Head = PredTBI->Head;
183
184 // Compute per-resource depths.
185 ArrayRef PredPRDepths = getProcResourceDepths(PredNum);
186 ArrayRef PredPRCycles = MTM.getProcResourceCycles(PredNum);
187 for (unsigned K = 0; K != PRKinds; ++K)
188 ProcResourceDepths[PROffset + K] = PredPRDepths[K] + PredPRCycles[K];
140189 }
141190
142191 // Update resource-related information in the TraceBlockInfo for MBB.
144193 void MachineTraceMetrics::Ensemble::
145194 computeHeightResources(const MachineBasicBlock *MBB) {
146195 TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
196 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
197 unsigned PROffset = MBB->getNumber() * PRKinds;
147198
148199 // Compute resources for the current block.
149200 TBI->InstrHeight = MTM.getResources(MBB)->InstrCount;
201 ArrayRef PRCycles = MTM.getProcResourceCycles(MBB->getNumber());
150202
151203 // The trace tail is done.
152204 if (!TBI->Succ) {
153205 TBI->Tail = MBB->getNumber();
206 std::copy(PRCycles.begin(), PRCycles.end(),
207 ProcResourceHeights.begin() + PROffset);
154208 return;
155209 }
156210
157211 // Compute from the block below. A post-order traversal ensures the
158212 // predecessor is always computed first.
159 TraceBlockInfo *SuccTBI = &BlockInfo[TBI->Succ->getNumber()];
213 unsigned SuccNum = TBI->Succ->getNumber();
214 TraceBlockInfo *SuccTBI = &BlockInfo[SuccNum];
160215 assert(SuccTBI->hasValidHeight() && "Trace below has not been computed yet");
161216 TBI->InstrHeight += SuccTBI->InstrHeight;
162217 TBI->Tail = SuccTBI->Tail;
218
219 // Compute per-resource heights.
220 ArrayRef SuccPRHeights = getProcResourceHeights(SuccNum);
221 for (unsigned K = 0; K != PRKinds; ++K)
222 ProcResourceHeights[PROffset + K] = SuccPRHeights[K] + PRCycles[K];
163223 }
164224
165225 // Check if depth resources for MBB are valid and return the TBI.
178238 getHeightResources(const MachineBasicBlock *MBB) const {
179239 const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
180240 return TBI->hasValidHeight() ? TBI : 0;
241 }
242
243 /// Get an array of processor resource depths for MBB. Indexed by processor
244 /// resource kind, this array contains the scaled processor resources consumed
245 /// by all blocks preceding MBB in its trace. It does not include instructions
246 /// in MBB.
247 ///
248 /// Compare TraceBlockInfo::InstrDepth.
249 ArrayRef
250 MachineTraceMetrics::Ensemble::
251 getProcResourceDepths(unsigned MBBNum) const {
252 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
253 return ArrayRef(&ProcResourceDepths[MBBNum * PRKinds], PRKinds);
254 }
255
256 /// Get an array of processor resource heights for MBB. Indexed by processor
257 /// resource kind, this array contains the scaled processor resources consumed
258 /// by this block and all blocks following it in its trace.
259 ///
260 /// Compare TraceBlockInfo::InstrHeight.
261 ArrayRef
262 MachineTraceMetrics::Ensemble::
263 getProcResourceHeights(unsigned MBBNum) const {
264 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
265 return ArrayRef(&ProcResourceHeights[MBBNum * PRKinds], PRKinds);
181266 }
182267
183268 //===----------------------------------------------------------------------===//
712797 SmallVector Deps;
713798 while (!Stack.empty()) {
714799 MBB = Stack.pop_back_val();
715 DEBUG(dbgs() << "Depths for BB#" << MBB->getNumber() << ":\n");
800 DEBUG(dbgs() << "\nDepths for BB#" << MBB->getNumber() << ":\n");
716801 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
717802 TBI.HasValidInstrDepths = true;
718803 TBI.CriticalPath = 0;
804
805 // Print out resource depths here as well.
806 DEBUG({
807 dbgs() << format("%7u Instructions\n", TBI.InstrDepth);
808 ArrayRef PRDepths = getProcResourceDepths(MBB->getNumber());
809 for (unsigned K = 0; K != PRDepths.size(); ++K)
810 if (PRDepths[K]) {
811 unsigned Factor = MTM.SchedModel.getResourceFactor(K);
812 dbgs() << format("%6uc @ ", MTM.getCycles(PRDepths[K]))
813 << MTM.SchedModel.getProcResource(K)->Name << " ("
814 << PRDepths[K]/Factor << " ops x" << Factor << ")\n";
815 }
816 });
719817
720818 // Also compute the critical path length through MBB when possible.
721819 if (TBI.HasValidInstrHeights)
9271025 TBI.HasValidInstrHeights = true;
9281026 TBI.CriticalPath = 0;
9291027
1028 DEBUG({
1029 dbgs() << format("%7u Instructions\n", TBI.InstrHeight);
1030 ArrayRef PRHeights = getProcResourceHeights(MBB->getNumber());
1031 for (unsigned K = 0; K != PRHeights.size(); ++K)
1032 if (PRHeights[K]) {
1033 unsigned Factor = MTM.SchedModel.getResourceFactor(K);
1034 dbgs() << format("%6uc @ ", MTM.getCycles(PRHeights[K]))
1035 << MTM.SchedModel.getProcResource(K)->Name << " ("
1036 << PRHeights[K]/Factor << " ops x" << Factor << ")\n";
1037 }
1038 });
1039
9301040 // Get dependencies from PHIs in the trace successor.
9311041 const MachineBasicBlock *Succ = TBI.Succ;
9321042 // If MBB is the last block in the trace, and it has a back-edge to the
10571167 }
10581168
10591169 unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const {
1060 // For now, we compute the resource depth from instruction count / issue
1061 // width. Eventually, we should compute resource depth per functional unit
1062 // and return the max.
1170 // Find the limiting processor resource.
1171 // Numbers have been pre-scaled to be comparable.
1172 unsigned PRMax = 0;
1173 ArrayRef PRDepths = TE.getProcResourceDepths(getBlockNum());
1174 if (Bottom) {
1175 ArrayRef PRCycles = TE.MTM.getProcResourceCycles(getBlockNum());
1176 for (unsigned K = 0; K != PRDepths.size(); ++K)
1177 PRMax = std::max(PRMax, PRDepths[K] + PRCycles[K]);
1178 } else {
1179 for (unsigned K = 0; K != PRDepths.size(); ++K)
1180 PRMax = std::max(PRMax, PRDepths[K]);
1181 }
1182 // Convert to cycle count.
1183 PRMax = TE.MTM.getCycles(PRMax);
1184
10631185 unsigned Instrs = TBI.InstrDepth;
10641186 if (Bottom)
10651187 Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount;
10661188 if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
10671189 Instrs /= IW;
10681190 // Assume issue width 1 without a schedule model.
1069 return Instrs;
1191 return std::max(Instrs, PRMax);
10701192 }
10711193
10721194 unsigned MachineTraceMetrics::Trace::
10731195 getResourceLength(ArrayRef Extrablocks) const {
1196 // Add up resources above and below the center block.
1197 ArrayRef PRDepths = TE.getProcResourceDepths(getBlockNum());
1198 ArrayRef PRHeights = TE.getProcResourceHeights(getBlockNum());
1199 unsigned PRMax = 0;
1200 for (unsigned K = 0; K != PRDepths.size(); ++K) {
1201 unsigned PRCycles = PRDepths[K] + PRHeights[K];
1202 for (unsigned I = 0; I != Extrablocks.size(); ++I)
1203 PRCycles += TE.MTM.getProcResourceCycles(Extrablocks[I]->getNumber())[K];
1204 PRMax = std::max(PRMax, PRCycles);
1205 }
1206 // Convert to cycle count.
1207 PRMax = TE.MTM.getCycles(PRMax);
1208
10741209 unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight;
10751210 for (unsigned i = 0, e = Extrablocks.size(); i != e; ++i)
10761211 Instrs += TE.MTM.getResources(Extrablocks[i])->InstrCount;
10771212 if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
10781213 Instrs /= IW;
10791214 // Assume issue width 1 without a schedule model.
1080 return Instrs;
1215 return std::max(Instrs, PRMax);
10811216 }
10821217
10831218 void MachineTraceMetrics::Ensemble::print(raw_ostream &OS) const {