llvm.org GIT mirror llvm / db35087
Implement a block placement pass based on the branch probability and block frequency analyses. This differs substantially from the existing block-placement pass in LLVM: 1) It operates on the Machine-IR in the CodeGen layer. This exposes much more (and more precise) information and opportunities. Also, the results are more stable due to fewer transforms ocurring after the pass runs. 2) It uses the generalized probability and frequency analyses. These can model static heuristics, code annotation derived heuristics as well as eventual profile loading. By basing the optimization on the analysis interface it can work from any (or a combination) of these inputs. 3) It uses a more aggressive algorithm, both building chains from tho bottom up to maximize benefit, and using an SCC-based walk to layout chains of blocks in a profitable ordering without O(N^2) iterations which the old pass involves. The pass is currently gated behind a flag, and not enabled by default because it still needs to grow some important features. Most notably, it needs to support loop aligning and careful layout of loop structures much as done by hand currently in CodePlacementOpt. Once it supports these, and has sufficient testing and quality tuning, it should replace both of these passes. Thanks to Nick Lewycky and Richard Smith for help authoring & debugging this, and to Jakob, Andy, Eric, Jim, and probably a few others I'm forgetting for reviewing and answering all my questions. Writing a backend pass is *sooo* much better now than it used to be. =D git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142641 91177308-0d34-0410-b5e6-96231b3b80d8 Chandler Carruth 8 years ago
6 changed file(s) with 643 addition(s) and 2 deletion(s). Raw diff Collapse all Expand all
154154 /// IfConverter Pass - This pass performs machine code if conversion.
155155 FunctionPass *createIfConverterPass();
156156
157 /// MachineBlockPlacement Pass - This pass places basic blocks based on branch
158 /// probabilities.
159 FunctionPass *createMachineBlockPlacementPass();
160
157161 /// Code Placement Pass - This pass optimize code placement and aligns loop
158162 /// headers to target specific alignment boundary.
159163 FunctionPass *createCodePlacementOptPass();
145145 void initializeLowerInvokePass(PassRegistry&);
146146 void initializeLowerSwitchPass(PassRegistry&);
147147 void initializeMachineBlockFrequencyInfoPass(PassRegistry&);
148 void initializeMachineBlockPlacementPass(PassRegistry&);
148149 void initializeMachineBranchProbabilityInfoPass(PassRegistry&);
149150 void initializeMachineCSEPass(PassRegistry&);
150151 void initializeMachineDominatorTreePass(PassRegistry&);
3636 LocalStackSlotAllocation.cpp
3737 MachineBasicBlock.cpp
3838 MachineBlockFrequencyInfo.cpp
39 MachineBlockPlacement.cpp
3940 MachineBranchProbabilityInfo.cpp
4041 MachineCSE.cpp
4142 MachineDominators.cpp
2727 initializeLiveStacksPass(Registry);
2828 initializeLiveVariablesPass(Registry);
2929 initializeMachineBlockFrequencyInfoPass(Registry);
30 initializeMachineBlockPlacementPass(Registry);
3031 initializeMachineCSEPass(Registry);
3132 initializeMachineDominatorTreePass(Registry);
3233 initializeMachineLICMPass(Registry);
5252 cl::desc("Disable tail duplication"));
5353 static cl::opt DisableEarlyTailDup("disable-early-taildup", cl::Hidden,
5454 cl::desc("Disable pre-register allocation tail duplication"));
55 static cl::opt EnableBlockPlacement("enable-block-placement",
56 cl::Hidden, cl::desc("Enable probability-driven block placement"));
5557 static cl::opt DisableCodePlace("disable-code-place", cl::Hidden,
5658 cl::desc("Disable code placement"));
5759 static cl::opt DisableSSC("disable-ssc", cl::Hidden,
485487 PM.add(createGCInfoPrinter(dbgs()));
486488
487489 if (OptLevel != CodeGenOpt::None && !DisableCodePlace) {
488 PM.add(createCodePlacementOptPass());
489 printNoVerify(PM, "After CodePlacementOpt");
490 if (EnableBlockPlacement) {
491 // MachineBlockPlacement is an experimental pass which is disabled by
492 // default currently. Eventually it should subsume CodePlacementOpt, so
493 // when enabled, the other is disabled.
494 PM.add(createMachineBlockPlacementPass());
495 printNoVerify(PM, "After MachineBlockPlacement");
496 } else {
497 PM.add(createCodePlacementOptPass());
498 printNoVerify(PM, "After CodePlacementOpt");
499 }
490500 }
491501
492502 if (addPreEmitPass(PM, OptLevel))
0 //===-- MachineBlockPlacement.cpp - Basic Block Code Layout optimization --===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements basic block placement transformations using branch
10 // probability estimates. It is based around "Algo2" from Profile Guided Code
11 // Positioning [http://portal.acm.org/citation.cfm?id=989433].
12 //
13 // We combine the BlockFrequencyInfo with BranchProbabilityInfo to simulate
14 // measured edge-weights. The BlockFrequencyInfo effectively summarizes the
15 // probability of starting from any particular block, and the
16 // BranchProbabilityInfo the probability of exiting the block via a particular
17 // edge. Combined they form a function-wide ordering of the edges.
18 //
19 //===----------------------------------------------------------------------===//
20
21 #define DEBUG_TYPE "block-placement2"
22 #include "llvm/CodeGen/Passes.h"
23 #include "llvm/CodeGen/MachineModuleInfo.h"
24 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
25 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
26 #include "llvm/CodeGen/MachineFunction.h"
27 #include "llvm/CodeGen/MachineBasicBlock.h"
28 #include "llvm/CodeGen/MachineFunctionPass.h"
29 #include "llvm/Support/Allocator.h"
30 #include "llvm/Support/ErrorHandling.h"
31 #include "llvm/ADT/DenseMap.h"
32 #include "llvm/ADT/SCCIterator.h"
33 #include "llvm/ADT/SmallPtrSet.h"
34 #include "llvm/ADT/SmallVector.h"
35 #include "llvm/ADT/Statistic.h"
36 #include "llvm/Target/TargetInstrInfo.h"
37 #include
38 using namespace llvm;
39
40 namespace {
41 /// \brief A structure for storing a weighted edge.
42 ///
43 /// This stores an edge and its weight, computed as the product of the
44 /// frequency that the starting block is entered with the probability of
45 /// a particular exit block.
46 struct WeightedEdge {
47 BlockFrequency EdgeFrequency;
48 MachineBasicBlock *From, *To;
49
50 bool operator<(const WeightedEdge &RHS) const {
51 return EdgeFrequency < RHS.EdgeFrequency;
52 }
53 };
54 }
55
56 namespace {
57 struct BlockChain;
58 /// \brief Type for our function-wide basic block -> block chain mapping.
59 typedef DenseMap BlockToChainMapType;
60 }
61
62 namespace {
63 /// \brief A chain of blocks which will be laid out contiguously.
64 ///
65 /// This is the datastructure representing a chain of consecutive blocks that
66 /// are profitable to layout together in order to maximize fallthrough
67 /// probabilities. We also can use a block chain to represent a sequence of
68 /// basic blocks which have some external (correctness) requirement for
69 /// sequential layout.
70 ///
71 /// Eventually, the block chains will form a directed graph over the function.
72 /// We provide an SCC-supporting-iterator in order to quicky build and walk the
73 /// SCCs of block chains within a function.
74 ///
75 /// The block chains also have support for calculating and caching probability
76 /// information related to the chain itself versus other chains. This is used
77 /// for ranking during the final layout of block chains.
78 struct BlockChain {
79 class SuccIterator;
80
81 /// \brief The first and last basic block that from this chain.
82 ///
83 /// The chain is stored within the existing function ilist of basic blocks.
84 /// When merging chains or otherwise manipulating them, we splice the blocks
85 /// within this ilist, giving us very cheap storage here and constant time
86 /// merge operations.
87 ///
88 /// It is extremely important to note that LastBB is the iterator pointing
89 /// *at* the last basic block in the chain. That is, the chain consists of
90 /// the *closed* range [FirstBB, LastBB]. We cannot use half-open ranges
91 /// because the next basic block may get relocated to a different part of the
92 /// function at any time during the run of this pass.
93 MachineFunction::iterator FirstBB, LastBB;
94
95 /// \brief A handle to the function-wide basic block to block chain mapping.
96 ///
97 /// This is retained in each block chain to simplify the computation of child
98 /// block chains for SCC-formation and iteration. We store the edges to child
99 /// basic blocks, and map them back to their associated chains using this
100 /// structure.
101 BlockToChainMapType &BlockToChain;
102
103 /// \brief The weight used to rank two block chains in the same SCC.
104 ///
105 /// This is used during SCC layout of block chains to cache and rank the
106 /// chains. It is supposed to represent the expected frequency with which
107 /// control reaches a block within this chain, has the option of branching to
108 /// a block in some other chain participating in the SCC, but instead
109 /// continues within this chain. The higher this is, the more costly we
110 /// expect mis-predicted branches between this chain and other chains within
111 /// the SCC to be. Thus, since we expect branches between chains to be
112 /// predicted when backwards and not predicted when forwards, the higher this
113 /// is the more important that this chain is laid out first among those
114 /// chains in the same SCC as it.
115 BlockFrequency InChainEdgeFrequency;
116
117 /// \brief Construct a new BlockChain.
118 ///
119 /// This builds a new block chain representing a single basic block in the
120 /// function. It also registers itself as the chain that block participates
121 /// in with the BlockToChain mapping.
122 BlockChain(BlockToChainMapType &BlockToChain, MachineBasicBlock *BB)
123 : FirstBB(BB), LastBB(BB), BlockToChain(BlockToChain) {
124 assert(BB && "Cannot create a chain with a null basic block");
125 BlockToChain[BB] = this;
126 }
127
128 /// \brief Merge another block chain into this one.
129 ///
130 /// This routine merges a block chain into this one. It takes care of forming
131 /// a contiguous sequence of basic blocks, updating the edge list, and
132 /// updating the block -> chain mapping. It does not free or tear down the
133 /// old chain, but the old chain's block list is no longer valid.
134 void merge(BlockChain *Chain) {
135 assert(Chain && "Cannot merge a null chain");
136 MachineFunction::iterator EndBB = llvm::next(LastBB);
137 MachineFunction::iterator ChainEndBB = llvm::next(Chain->LastBB);
138
139 // Update the incoming blocks to point to this chain.
140 for (MachineFunction::iterator BI = Chain->FirstBB, BE = ChainEndBB;
141 BI != BE; ++BI) {
142 assert(BlockToChain[BI] == Chain && "Incoming blocks not in chain");
143 BlockToChain[BI] = this;
144 }
145
146 // We splice the blocks together within the function (unless they already
147 // are adjacent) so we can represent the new chain with a pair of pointers
148 // to basic blocks within the function. This is also useful as each chain
149 // of blocks will end up being laid out contiguously within the function.
150 if (EndBB != Chain->FirstBB)
151 FirstBB->getParent()->splice(EndBB, Chain->FirstBB, ChainEndBB);
152 LastBB = Chain->LastBB;
153 }
154 };
155 }
156
157 namespace {
158 /// \brief Successor iterator for BlockChains.
159 ///
160 /// This is an iterator that walks over the successor block chains by looking
161 /// through its blocks successors and mapping those back to block chains. This
162 /// iterator is not a fully-functioning iterator, it is designed specifically
163 /// to support the interface required by SCCIterator when forming and walking
164 /// SCCs of BlockChains.
165 ///
166 /// Note that this iterator cannot be used while the chains are still being
167 /// formed and/or merged. Unlike the chains themselves, it does store end
168 /// iterators which could be moved if the chains are re-ordered. Once we begin
169 /// forming and iterating over an SCC of chains, the order of blocks within the
170 /// function must not change until we finish using the SCC iterators.
171 class BlockChain::SuccIterator
172 : public std::iterator
173 BlockChain *, ptrdiff_t> {
174 BlockChain *Chain;
175 MachineFunction::iterator BI, BE;
176 MachineBasicBlock::succ_iterator SI;
177
178 public:
179 explicit SuccIterator(BlockChain *Chain)
180 : Chain(Chain), BI(Chain->FirstBB), BE(llvm::next(Chain->LastBB)),
181 SI(BI->succ_begin()) {
182 while (BI != BE && BI->succ_begin() == BI->succ_end())
183 ++BI;
184 if (BI != BE)
185 SI = BI->succ_begin();
186 }
187
188 /// \brief Helper function to create an end iterator for a particular chain.
189 ///
190 /// The "end" state is extremely arbitrary. We chose to have BI == BE, and SI
191 /// == Chain->FirstBB->succ_begin(). The value of SI doesn't really make any
192 /// sense, but rather than try to rationalize SI and our increment, when we
193 /// detect an "end" state, we just immediately call this function to build
194 /// the canonical end iterator.
195 static SuccIterator CreateEnd(BlockChain *Chain) {
196 SuccIterator It(Chain);
197 It.BI = It.BE;
198 return It;
199 }
200
201 bool operator==(const SuccIterator &RHS) const {
202 return (Chain == RHS.Chain && BI == RHS.BI && SI == RHS.SI);
203 }
204 bool operator!=(const SuccIterator &RHS) const {
205 return !operator==(RHS);
206 }
207
208 SuccIterator& operator++() {
209 assert(*this != CreateEnd(Chain) && "Cannot increment the end iterator");
210 // There may be null successor pointers, skip over them.
211 // FIXME: I don't understand *why* there are null successor pointers.
212 do {
213 ++SI;
214 if (SI != BI->succ_end() && *SI)
215 return *this;
216
217 // There may be a basic block without successors. Skip over them.
218 do {
219 ++BI;
220 if (BI == BE)
221 return *this = CreateEnd(Chain);
222 } while (BI->succ_begin() == BI->succ_end());
223 SI = BI->succ_begin();
224 } while (!*SI);
225 return *this;
226 }
227 SuccIterator operator++(int) {
228 SuccIterator tmp = *this;
229 ++*this;
230 return tmp;
231 }
232
233 BlockChain *operator*() const {
234 assert(Chain->BlockToChain.lookup(*SI) && "Missing chain");
235 return Chain->BlockToChain.lookup(*SI);
236 }
237 };
238 }
239
240 namespace {
241 /// \brief Sorter used with containers of BlockChain pointers.
242 ///
243 /// Sorts based on the \see BlockChain::InChainEdgeFrequency -- see its
244 /// comments for details on what this ordering represents.
245 struct ChainPtrPrioritySorter {
246 bool operator()(const BlockChain *LHS, const BlockChain *RHS) const {
247 assert(LHS && RHS && "Null chain entry");
248 return LHS->InChainEdgeFrequency < RHS->InChainEdgeFrequency;
249 }
250 };
251 }
252
253 namespace {
254 class MachineBlockPlacement : public MachineFunctionPass {
255 /// \brief A handle to the branch probability pass.
256 const MachineBranchProbabilityInfo *MBPI;
257
258 /// \brief A handle to the function-wide block frequency pass.
259 const MachineBlockFrequencyInfo *MBFI;
260
261 /// \brief A handle to the target's instruction info.
262 const TargetInstrInfo *TII;
263
264 /// \brief A prioritized list of edges in the BB-graph.
265 ///
266 /// For each function, we insert all control flow edges between BBs, along
267 /// with their "global" frequency. The Frequency of an edge being taken is
268 /// defined as the frequency of entering the source BB (from MBFI) times the
269 /// probability of taking a particular branch out of that block (from MBPI).
270 ///
271 /// Once built, this list is sorted in ascending frequency, making the last
272 /// edge the hottest one in the function.
273 SmallVector Edges;
274
275 /// \brief Allocator and owner of BlockChain structures.
276 ///
277 /// We build BlockChains lazily by merging together high probability BB
278 /// sequences acording to the "Algo2" in the paper mentioned at the top of
279 /// the file. To reduce malloc traffic, we allocate them using this slab-like
280 /// allocator, and destroy them after the pass completes.
281 SpecificBumpPtrAllocator ChainAllocator;
282
283 /// \brief Function wide BasicBlock to BlockChain mapping.
284 ///
285 /// This mapping allows efficiently moving from any given basic block to the
286 /// BlockChain it participates in, if any. We use it to, among other things,
287 /// allow implicitly defining edges between chains as the existing edges
288 /// between basic blocks.
289 DenseMap BlockToChain;
290
291 /// \brief A prioritized sequence of chains.
292 ///
293 /// We build up the ideal sequence of basic block chains in reverse order
294 /// here, and then walk backwards to arrange the final function ordering.
295 SmallVector PChains;
296
297 #ifndef NDEBUG
298 /// \brief A set of active chains used to sanity-check the pass algorithm.
299 ///
300 /// All operations on this member should be wrapped in an assert or NDEBUG.
301 SmallPtrSet ActiveChains;
302 #endif
303
304 BlockChain *CreateChain(MachineBasicBlock *BB);
305 void PrioritizeEdges(MachineFunction &F);
306 void BuildBlockChains();
307 void PrioritizeChains(MachineFunction &F);
308 void PlaceBlockChains(MachineFunction &F);
309
310 public:
311 static char ID; // Pass identification, replacement for typeid
312 MachineBlockPlacement() : MachineFunctionPass(ID) {
313 initializeMachineBlockPlacementPass(*PassRegistry::getPassRegistry());
314 }
315
316 bool runOnMachineFunction(MachineFunction &F);
317
318 void getAnalysisUsage(AnalysisUsage &AU) const {
319 AU.addRequired();
320 AU.addRequired();
321 MachineFunctionPass::getAnalysisUsage(AU);
322 }
323
324 const char *getPassName() const { return "Block Placement"; }
325 };
326 }
327
328 char MachineBlockPlacement::ID = 0;
329 INITIALIZE_PASS_BEGIN(MachineBlockPlacement, "block-placement2",
330 "Branch Probability Basic Block Placement", false, false)
331 INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
332 INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
333 INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement2",
334 "Branch Probability Basic Block Placement", false, false)
335
336 FunctionPass *llvm::createMachineBlockPlacementPass() {
337 return new MachineBlockPlacement();
338 }
339
340 namespace llvm {
341 /// \brief GraphTraits specialization for our BlockChain graph.
342 template <> struct GraphTraits {
343 typedef BlockChain NodeType;
344 typedef BlockChain::SuccIterator ChildIteratorType;
345
346 static NodeType *getEntryNode(NodeType *N) { return N; }
347 static BlockChain::SuccIterator child_begin(NodeType *N) {
348 return BlockChain::SuccIterator(N);
349 }
350 static BlockChain::SuccIterator child_end(NodeType *N) {
351 return BlockChain::SuccIterator::CreateEnd(N);
352 }
353 };
354 }
355
356 /// \brief Helper to create a new chain for a single BB.
357 ///
358 /// Takes care of growing the Chains, setting up the BlockChain object, and any
359 /// debug checking logic.
360 /// \returns A pointer to the new BlockChain.
361 BlockChain *MachineBlockPlacement::CreateChain(MachineBasicBlock *BB) {
362 BlockChain *Chain =
363 new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB);
364 assert(ActiveChains.insert(Chain));
365 return Chain;
366 }
367
368 /// \brief Build a prioritized list of edges.
369 ///
370 /// The priority is determined by the product of the block frequency (how
371 /// likely it is to arrive at a particular block) times the probability of
372 /// taking this particular edge out of the block. This provides a function-wide
373 /// ordering of the edges.
374 void MachineBlockPlacement::PrioritizeEdges(MachineFunction &F) {
375 assert(Edges.empty() && "Already have an edge list");
376 SmallVector Cond; // For AnalyzeBranch.
377 BlockChain *RequiredChain = 0;
378 for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
379 MachineBasicBlock *From = &*FI;
380 // We only consider MBBs with analyzable branches. Even if the analysis
381 // fails, if there is no fallthrough, we can still work with the MBB.
382 MachineBasicBlock *TBB = 0, *FBB = 0;
383 Cond.clear();
384 if (TII->AnalyzeBranch(*From, TBB, FBB, Cond) && From->canFallThrough()) {
385 // We push all unanalyzed blocks onto a chain eagerly to prevent them
386 // from being split later. Create the chain if needed, otherwise just
387 // keep track that these blocks reside on it.
388 if (!RequiredChain)
389 RequiredChain = CreateChain(From);
390 else
391 BlockToChain[From] = RequiredChain;
392 } else {
393 // As soon as we find an analyzable branch, add that block to and
394 // finalize any required chain that has been started. The required chain
395 // is only modeling potentially inexplicable fallthrough, so the first
396 // block to have analyzable fallthrough is a known-safe stopping point.
397 if (RequiredChain) {
398 BlockToChain[From] = RequiredChain;
399 RequiredChain->LastBB = FI;
400 RequiredChain = 0;
401 }
402 }
403
404 BlockFrequency BaseFrequency = MBFI->getBlockFreq(From);
405 for (MachineBasicBlock::succ_iterator SI = From->succ_begin(),
406 SE = From->succ_end();
407 SI != SE; ++SI) {
408 MachineBasicBlock *To = *SI;
409 WeightedEdge WE = { BaseFrequency * MBPI->getEdgeProbability(From, To),
410 From, To };
411 Edges.push_back(WE);
412 }
413 }
414 assert(!RequiredChain && "Never found a terminator for a required chain");
415 std::stable_sort(Edges.begin(), Edges.end());
416 }
417
418 /// \brief Build chains of basic blocks along hot paths.
419 ///
420 /// Build chains by trying to merge each pair of blocks from the mostly costly
421 /// edge first. This is essentially "Algo2" from the Profile Guided Code
422 /// Placement paper. While each node is considered a chain of one block, this
423 /// routine lazily build the chain objects themselves so that when possible it
424 /// can just merge a block into an existing chain.
425 void MachineBlockPlacement::BuildBlockChains() {
426 for (SmallVectorImpl::reverse_iterator EI = Edges.rbegin(),
427 EE = Edges.rend();
428 EI != EE; ++EI) {
429 MachineBasicBlock *SourceB = EI->From, *DestB = EI->To;
430 if (SourceB == DestB) continue;
431
432 BlockChain *SourceChain = BlockToChain.lookup(SourceB);
433 if (!SourceChain) SourceChain = CreateChain(SourceB);
434 BlockChain *DestChain = BlockToChain.lookup(DestB);
435 if (!DestChain) DestChain = CreateChain(DestB);
436 if (SourceChain == DestChain)
437 continue;
438
439 bool IsSourceTail =
440 SourceChain->LastBB == MachineFunction::iterator(SourceB);
441 bool IsDestHead =
442 DestChain->FirstBB == MachineFunction::iterator(DestB);
443
444 if (!IsSourceTail || !IsDestHead)
445 continue;
446
447 SourceChain->merge(DestChain);
448 assert(ActiveChains.erase(DestChain));
449 }
450 }
451
452 /// \brief Prioritize the chains to minimize back-edges between chains.
453 ///
454 /// This is the trickiest part of the placement algorithm. Each chain is
455 /// a hot-path through a sequence of basic blocks, but there are conditional
456 /// branches away from this hot path, and to some other chain. Hardware branch
457 /// predictors favor back edges over forward edges, and so it is desirable to
458 /// arrange the targets of branches away from a hot path and to some other
459 /// chain to come later in the function, making them forward branches, and
460 /// helping the branch predictor to predict fallthrough.
461 ///
462 /// In some cases, this is easy. simply topologically walking from the entry
463 /// chain through its successors in order would work if there were no cycles
464 /// between the chains of blocks, but often there are. In such a case, we first
465 /// need to identify the participants in the cycle, and then rank them so that
466 /// the linearizing of the chains has the lowest *probability* of causing
467 /// a mispredicted branch. To compute the correct rank for a chain, we take the
468 /// complement of the branch probability for each branch leading away from the
469 /// chain and multiply it by the frequency of the source block for that branch.
470 /// This gives us the probability of that particular branch *not* being taken
471 /// in this function. The sum of these probabilities for each chain is used as
472 /// a rank, so that we order the chain with the highest such sum first.
473 /// FIXME: This seems like a good approximation, but there is probably a known
474 /// technique for ordering of an SCC given edge weights. It would be good to
475 /// use that, or even use its code if possible.
476 ///
477 /// Also notable is that we prioritize the chains from the bottom up, and so
478 /// all of the "first" and "before" relationships end up inverted in the code.
479 void MachineBlockPlacement::PrioritizeChains(MachineFunction &F) {
480 MachineBasicBlock *EntryB = &F.front();
481 BlockChain *EntryChain = BlockToChain[EntryB];
482 assert(EntryChain && "Missing chain for entry block");
483 assert(EntryChain->FirstBB == F.begin() &&
484 "Entry block is not the head of the entry block chain");
485
486 // Form an SCC and walk it from the bottom up.
487 SmallPtrSet IsInSCC;
488 for (scc_iterator I = scc_begin(EntryChain);
489 !I.isAtEnd(); ++I) {
490 const std::vector &SCC = *I;
491 PChains.insert(PChains.end(), SCC.begin(), SCC.end());
492
493 // If there is only one chain in the SCC, it's trivially sorted so just
494 // bail out early. Sorting the SCC is expensive.
495 if (SCC.size() == 1)
496 continue;
497
498 // We work strictly on the PChains range from here on out to maximize
499 // locality.
500 SmallVectorImpl::iterator SCCEnd = PChains.end(),
501 SCCBegin = SCCEnd - SCC.size();
502 IsInSCC.clear();
503 IsInSCC.insert(SCCBegin, SCCEnd);
504
505 // Compute the edge frequency of staying in a chain, despite the existency
506 // of an edge to some other chain within this SCC.
507 for (SmallVectorImpl::iterator SCCI = SCCBegin;
508 SCCI != SCCEnd; ++SCCI) {
509 BlockChain *Chain = *SCCI;
510
511 // Special case the entry chain. Regardless of the weights of other
512 // chains, the entry chain *must* come first, so move it to the end, and
513 // avoid processing that chain at all.
514 if (Chain == EntryChain) {
515 --SCCEnd;
516 if (SCCI == SCCEnd) break;
517 Chain = *SCCI = *SCCEnd;
518 *SCCEnd = EntryChain;
519 }
520
521 // Walk over every block in this chain looking for out-bound edges to
522 // other chains in this SCC.
523 for (MachineFunction::iterator BI = Chain->FirstBB,
524 BE = llvm::next(Chain->LastBB);
525 BI != BE; ++BI) {
526 MachineBasicBlock *From = &*BI;
527 for (MachineBasicBlock::succ_iterator SI = BI->succ_begin(),
528 SE = BI->succ_end();
529 SI != SE; ++SI) {
530 MachineBasicBlock *To = *SI;
531 if (!To || !IsInSCC.count(BlockToChain[To]))
532 continue;
533 BranchProbability ComplEdgeProb =
534 MBPI->getEdgeProbability(From, To).getCompl();
535 Chain->InChainEdgeFrequency +=
536 MBFI->getBlockFreq(From) * ComplEdgeProb;
537 }
538 }
539 }
540
541 // Sort the chains within the SCC according to their edge frequencies,
542 // which should make the least costly chain of blocks to mis-place be
543 // ordered first in the prioritized sequence.
544 std::stable_sort(SCCBegin, SCCEnd, ChainPtrPrioritySorter());
545 }
546 }
547
548 /// \brief Splice the function blocks together based on the chain priorities.
549 ///
550 /// Each chain is already represented as a contiguous range of blocks in the
551 /// function. Simply walk backwards down the prioritized chains and splice in
552 /// any chains out of order. Note that the first chain we visit is necessarily
553 /// the entry chain. It has no predecessors and so must be the top of the SCC.
554 /// Also, we cannot splice any chain prior to the entry chain as we can't
555 /// splice any blocks prior to the entry block.
556 void MachineBlockPlacement::PlaceBlockChains(MachineFunction &F) {
557 assert(!PChains.empty() && "No chains were prioritized");
558 assert(PChains.back() == BlockToChain[&F.front()] &&
559 "The entry chain must always be the final chain");
560
561 MachineFunction::iterator InsertPos = F.begin();
562 for (SmallVectorImpl::reverse_iterator CI = PChains.rbegin(),
563 CE = PChains.rend();
564 CI != CE; ++CI) {
565 BlockChain *Chain = *CI;
566 // Check that we process this chain only once for debugging.
567 assert(ActiveChains.erase(Chain) && "Processed a chain twice");
568
569 // If this chain is already in the right position, just skip past it.
570 // Otherwise, splice it into position.
571 if (InsertPos == Chain->FirstBB)
572 InsertPos = llvm::next(Chain->LastBB);
573 else
574 F.splice(InsertPos, Chain->FirstBB, llvm::next(Chain->LastBB));
575 }
576
577 // Note that we can't assert this is empty as there may be unreachable blocks
578 // in the function.
579 #ifndef NDEBUG
580 ActiveChains.clear();
581 #endif
582
583 // Now that every block is in its final position, update all of the
584 // terminators.
585 SmallVector Cond; // For AnalyzeBranch.
586 for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
587 // FIXME: It would be awesome of updateTerminator would just return rather
588 // than assert when the branch cannot be analyzed in order to remove this
589 // boiler plate.
590 Cond.clear();
591 MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch.
592 if (!TII->AnalyzeBranch(*FI, TBB, FBB, Cond))
593 FI->updateTerminator();
594 }
595 }
596
597 bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) {
598 // Check for single-block functions and skip them.
599 if (llvm::next(F.begin()) == F.end())
600 return false;
601
602 MBPI = &getAnalysis();
603 MBFI = &getAnalysis();
604 TII = F.getTarget().getInstrInfo();
605 assert(Edges.empty());
606 assert(BlockToChain.empty());
607 assert(PChains.empty());
608 assert(ActiveChains.empty());
609
610 PrioritizeEdges(F);
611 BuildBlockChains();
612 PrioritizeChains(F);
613 PlaceBlockChains(F);
614
615 Edges.clear();
616 BlockToChain.clear();
617 PChains.clear();
618 ChainAllocator.DestroyAll();
619
620 // We always return true as we have no way to track whether the final order
621 // differs from the original order.
622 return true;
623 }