 0 
//=== MachineBlockPlacement.cpp  Basic Block Code Layout optimization ===//

 1 
//

 2 
// The LLVM Compiler Infrastructure

 3 
//

 4 
// This file is distributed under the University of Illinois Open Source

 5 
// License. See LICENSE.TXT for details.

 6 
//

 7 
//======//

 8 
//

 9 
// This file implements basic block placement transformations using branch

 10 
// probability estimates. It is based around "Algo2" from Profile Guided Code

 11 
// Positioning [http://portal.acm.org/citation.cfm?id=989433].

 12 
//

 13 
// We combine the BlockFrequencyInfo with BranchProbabilityInfo to simulate

 14 
// measured edgeweights. The BlockFrequencyInfo effectively summarizes the

 15 
// probability of starting from any particular block, and the

 16 
// BranchProbabilityInfo the probability of exiting the block via a particular

 17 
// edge. Combined they form a functionwide ordering of the edges.

 18 
//

 19 
//======//

 20 

 21 
#define DEBUG_TYPE "blockplacement2"

 22 
#include "llvm/CodeGen/Passes.h"

 23 
#include "llvm/CodeGen/MachineModuleInfo.h"

 24 
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"

 25 
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"

 26 
#include "llvm/CodeGen/MachineFunction.h"

 27 
#include "llvm/CodeGen/MachineBasicBlock.h"

 28 
#include "llvm/CodeGen/MachineFunctionPass.h"

 29 
#include "llvm/Support/Allocator.h"

 30 
#include "llvm/Support/ErrorHandling.h"

 31 
#include "llvm/ADT/DenseMap.h"

 32 
#include "llvm/ADT/SCCIterator.h"

 33 
#include "llvm/ADT/SmallPtrSet.h"

 34 
#include "llvm/ADT/SmallVector.h"

 35 
#include "llvm/ADT/Statistic.h"

 36 
#include "llvm/Target/TargetInstrInfo.h"

 37 
#include

 38 
using namespace llvm;

 39 

 40 
namespace {

 41 
/// \brief A structure for storing a weighted edge.

 42 
///

 43 
/// This stores an edge and its weight, computed as the product of the

 44 
/// frequency that the starting block is entered with the probability of

 45 
/// a particular exit block.

 46 
struct WeightedEdge {

 47 
BlockFrequency EdgeFrequency;

 48 
MachineBasicBlock *From, *To;

 49 

 50 
bool operator<(const WeightedEdge &RHS) const {

 51 
return EdgeFrequency < RHS.EdgeFrequency;

 52 
}

 53 
};

 54 
}

 55 

 56 
namespace {

 57 
struct BlockChain;

 58 
/// \brief Type for our functionwide basic block > block chain mapping.

 59 
typedef DenseMap BlockToChainMapType;

 60 
}

 61 

 62 
namespace {

 63 
/// \brief A chain of blocks which will be laid out contiguously.

 64 
///

 65 
/// This is the datastructure representing a chain of consecutive blocks that

 66 
/// are profitable to layout together in order to maximize fallthrough

 67 
/// probabilities. We also can use a block chain to represent a sequence of

 68 
/// basic blocks which have some external (correctness) requirement for

 69 
/// sequential layout.

 70 
///

 71 
/// Eventually, the block chains will form a directed graph over the function.

 72 
/// We provide an SCCsupportingiterator in order to quicky build and walk the

 73 
/// SCCs of block chains within a function.

 74 
///

 75 
/// The block chains also have support for calculating and caching probability

 76 
/// information related to the chain itself versus other chains. This is used

 77 
/// for ranking during the final layout of block chains.

 78 
struct BlockChain {

 79 
class SuccIterator;

 80 

 81 
/// \brief The first and last basic block that from this chain.

 82 
///

 83 
/// The chain is stored within the existing function ilist of basic blocks.

 84 
/// When merging chains or otherwise manipulating them, we splice the blocks

 85 
/// within this ilist, giving us very cheap storage here and constant time

 86 
/// merge operations.

 87 
///

 88 
/// It is extremely important to note that LastBB is the iterator pointing

 89 
/// *at* the last basic block in the chain. That is, the chain consists of

 90 
/// the *closed* range [FirstBB, LastBB]. We cannot use halfopen ranges

 91 
/// because the next basic block may get relocated to a different part of the

 92 
/// function at any time during the run of this pass.

 93 
MachineFunction::iterator FirstBB, LastBB;

 94 

 95 
/// \brief A handle to the functionwide basic block to block chain mapping.

 96 
///

 97 
/// This is retained in each block chain to simplify the computation of child

 98 
/// block chains for SCCformation and iteration. We store the edges to child

 99 
/// basic blocks, and map them back to their associated chains using this

 100 
/// structure.

 101 
BlockToChainMapType &BlockToChain;

 102 

 103 
/// \brief The weight used to rank two block chains in the same SCC.

 104 
///

 105 
/// This is used during SCC layout of block chains to cache and rank the

 106 
/// chains. It is supposed to represent the expected frequency with which

 107 
/// control reaches a block within this chain, has the option of branching to

 108 
/// a block in some other chain participating in the SCC, but instead

 109 
/// continues within this chain. The higher this is, the more costly we

 110 
/// expect mispredicted branches between this chain and other chains within

 111 
/// the SCC to be. Thus, since we expect branches between chains to be

 112 
/// predicted when backwards and not predicted when forwards, the higher this

 113 
/// is the more important that this chain is laid out first among those

 114 
/// chains in the same SCC as it.

 115 
BlockFrequency InChainEdgeFrequency;

 116 

 117 
/// \brief Construct a new BlockChain.

 118 
///

 119 
/// This builds a new block chain representing a single basic block in the

 120 
/// function. It also registers itself as the chain that block participates

 121 
/// in with the BlockToChain mapping.

 122 
BlockChain(BlockToChainMapType &BlockToChain, MachineBasicBlock *BB)

 123 
: FirstBB(BB), LastBB(BB), BlockToChain(BlockToChain) {

 124 
assert(BB && "Cannot create a chain with a null basic block");

 125 
BlockToChain[BB] = this;

 126 
}

 127 

 128 
/// \brief Merge another block chain into this one.

 129 
///

 130 
/// This routine merges a block chain into this one. It takes care of forming

 131 
/// a contiguous sequence of basic blocks, updating the edge list, and

 132 
/// updating the block > chain mapping. It does not free or tear down the

 133 
/// old chain, but the old chain's block list is no longer valid.

 134 
void merge(BlockChain *Chain) {

 135 
assert(Chain && "Cannot merge a null chain");

 136 
MachineFunction::iterator EndBB = llvm::next(LastBB);

 137 
MachineFunction::iterator ChainEndBB = llvm::next(Chain>LastBB);

 138 

 139 
// Update the incoming blocks to point to this chain.

 140 
for (MachineFunction::iterator BI = Chain>FirstBB, BE = ChainEndBB;

 141 
BI != BE; ++BI) {

 142 
assert(BlockToChain[BI] == Chain && "Incoming blocks not in chain");

 143 
BlockToChain[BI] = this;

 144 
}

 145 

 146 
// We splice the blocks together within the function (unless they already

 147 
// are adjacent) so we can represent the new chain with a pair of pointers

 148 
// to basic blocks within the function. This is also useful as each chain

 149 
// of blocks will end up being laid out contiguously within the function.

 150 
if (EndBB != Chain>FirstBB)

 151 
FirstBB>getParent()>splice(EndBB, Chain>FirstBB, ChainEndBB);

 152 
LastBB = Chain>LastBB;

 153 
}

 154 
};

 155 
}

 156 

 157 
namespace {

 158 
/// \brief Successor iterator for BlockChains.

 159 
///

 160 
/// This is an iterator that walks over the successor block chains by looking

 161 
/// through its blocks successors and mapping those back to block chains. This

 162 
/// iterator is not a fullyfunctioning iterator, it is designed specifically

 163 
/// to support the interface required by SCCIterator when forming and walking

 164 
/// SCCs of BlockChains.

 165 
///

 166 
/// Note that this iterator cannot be used while the chains are still being

 167 
/// formed and/or merged. Unlike the chains themselves, it does store end

 168 
/// iterators which could be moved if the chains are reordered. Once we begin

 169 
/// forming and iterating over an SCC of chains, the order of blocks within the

 170 
/// function must not change until we finish using the SCC iterators.

 171 
class BlockChain::SuccIterator

 172 
: public std::iterator

 173 
BlockChain *, ptrdiff_t> {

 174 
BlockChain *Chain;

 175 
MachineFunction::iterator BI, BE;

 176 
MachineBasicBlock::succ_iterator SI;

 177 

 178 
public:

 179 
explicit SuccIterator(BlockChain *Chain)

 180 
: Chain(Chain), BI(Chain>FirstBB), BE(llvm::next(Chain>LastBB)),

 181 
SI(BI>succ_begin()) {

 182 
while (BI != BE && BI>succ_begin() == BI>succ_end())

 183 
++BI;

 184 
if (BI != BE)

 185 
SI = BI>succ_begin();

 186 
}

 187 

 188 
/// \brief Helper function to create an end iterator for a particular chain.

 189 
///

 190 
/// The "end" state is extremely arbitrary. We chose to have BI == BE, and SI

 191 
/// == Chain>FirstBB>succ_begin(). The value of SI doesn't really make any

 192 
/// sense, but rather than try to rationalize SI and our increment, when we

 193 
/// detect an "end" state, we just immediately call this function to build

 194 
/// the canonical end iterator.

 195 
static SuccIterator CreateEnd(BlockChain *Chain) {

 196 
SuccIterator It(Chain);

 197 
It.BI = It.BE;

 198 
return It;

 199 
}

 200 

 201 
bool operator==(const SuccIterator &RHS) const {

 202 
return (Chain == RHS.Chain && BI == RHS.BI && SI == RHS.SI);

 203 
}

 204 
bool operator!=(const SuccIterator &RHS) const {

 205 
return !operator==(RHS);

 206 
}

 207 

 208 
SuccIterator& operator++() {

 209 
assert(*this != CreateEnd(Chain) && "Cannot increment the end iterator");

 210 
// There may be null successor pointers, skip over them.

 211 
// FIXME: I don't understand *why* there are null successor pointers.

 212 
do {

 213 
++SI;

 214 
if (SI != BI>succ_end() && *SI)

 215 
return *this;

 216 

 217 
// There may be a basic block without successors. Skip over them.

 218 
do {

 219 
++BI;

 220 
if (BI == BE)

 221 
return *this = CreateEnd(Chain);

 222 
} while (BI>succ_begin() == BI>succ_end());

 223 
SI = BI>succ_begin();

 224 
} while (!*SI);

 225 
return *this;

 226 
}

 227 
SuccIterator operator++(int) {

 228 
SuccIterator tmp = *this;

 229 
++*this;

 230 
return tmp;

 231 
}

 232 

 233 
BlockChain *operator*() const {

 234 
assert(Chain>BlockToChain.lookup(*SI) && "Missing chain");

 235 
return Chain>BlockToChain.lookup(*SI);

 236 
}

 237 
};

 238 
}

 239 

 240 
namespace {

 241 
/// \brief Sorter used with containers of BlockChain pointers.

 242 
///

 243 
/// Sorts based on the \see BlockChain::InChainEdgeFrequency  see its

 244 
/// comments for details on what this ordering represents.

 245 
struct ChainPtrPrioritySorter {

 246 
bool operator()(const BlockChain *LHS, const BlockChain *RHS) const {

 247 
assert(LHS && RHS && "Null chain entry");

 248 
return LHS>InChainEdgeFrequency < RHS>InChainEdgeFrequency;

 249 
}

 250 
};

 251 
}

 252 

 253 
namespace {

 254 
class MachineBlockPlacement : public MachineFunctionPass {

 255 
/// \brief A handle to the branch probability pass.

 256 
const MachineBranchProbabilityInfo *MBPI;

 257 

 258 
/// \brief A handle to the functionwide block frequency pass.

 259 
const MachineBlockFrequencyInfo *MBFI;

 260 

 261 
/// \brief A handle to the target's instruction info.

 262 
const TargetInstrInfo *TII;

 263 

 264 
/// \brief A prioritized list of edges in the BBgraph.

 265 
///

 266 
/// For each function, we insert all control flow edges between BBs, along

 267 
/// with their "global" frequency. The Frequency of an edge being taken is

 268 
/// defined as the frequency of entering the source BB (from MBFI) times the

 269 
/// probability of taking a particular branch out of that block (from MBPI).

 270 
///

 271 
/// Once built, this list is sorted in ascending frequency, making the last

 272 
/// edge the hottest one in the function.

 273 
SmallVector Edges;

 274 

 275 
/// \brief Allocator and owner of BlockChain structures.

 276 
///

 277 
/// We build BlockChains lazily by merging together high probability BB

 278 
/// sequences acording to the "Algo2" in the paper mentioned at the top of

 279 
/// the file. To reduce malloc traffic, we allocate them using this slablike

 280 
/// allocator, and destroy them after the pass completes.

 281 
SpecificBumpPtrAllocator ChainAllocator;

 282 

 283 
/// \brief Function wide BasicBlock to BlockChain mapping.

 284 
///

 285 
/// This mapping allows efficiently moving from any given basic block to the

 286 
/// BlockChain it participates in, if any. We use it to, among other things,

 287 
/// allow implicitly defining edges between chains as the existing edges

 288 
/// between basic blocks.

 289 
DenseMap BlockToChain;

 290 

 291 
/// \brief A prioritized sequence of chains.

 292 
///

 293 
/// We build up the ideal sequence of basic block chains in reverse order

 294 
/// here, and then walk backwards to arrange the final function ordering.

 295 
SmallVector PChains;

 296 

 297 
#ifndef NDEBUG

 298 
/// \brief A set of active chains used to sanitycheck the pass algorithm.

 299 
///

 300 
/// All operations on this member should be wrapped in an assert or NDEBUG.

 301 
SmallPtrSet ActiveChains;

 302 
#endif

 303 

 304 
BlockChain *CreateChain(MachineBasicBlock *BB);

 305 
void PrioritizeEdges(MachineFunction &F);

 306 
void BuildBlockChains();

 307 
void PrioritizeChains(MachineFunction &F);

 308 
void PlaceBlockChains(MachineFunction &F);

 309 

 310 
public:

 311 
static char ID; // Pass identification, replacement for typeid

 312 
MachineBlockPlacement() : MachineFunctionPass(ID) {

 313 
initializeMachineBlockPlacementPass(*PassRegistry::getPassRegistry());

 314 
}

 315 

 316 
bool runOnMachineFunction(MachineFunction &F);

 317 

 318 
void getAnalysisUsage(AnalysisUsage &AU) const {

 319 
AU.addRequired();

 320 
AU.addRequired();

 321 
MachineFunctionPass::getAnalysisUsage(AU);

 322 
}

 323 

 324 
const char *getPassName() const { return "Block Placement"; }

 325 
};

 326 
}

 327 

 328 
char MachineBlockPlacement::ID = 0;

 329 
INITIALIZE_PASS_BEGIN(MachineBlockPlacement, "blockplacement2",

 330 
"Branch Probability Basic Block Placement", false, false)

 331 
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)

 332 
INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)

 333 
INITIALIZE_PASS_END(MachineBlockPlacement, "blockplacement2",

 334 
"Branch Probability Basic Block Placement", false, false)

 335 

 336 
FunctionPass *llvm::createMachineBlockPlacementPass() {

 337 
return new MachineBlockPlacement();

 338 
}

 339 

 340 
namespace llvm {

 341 
/// \brief GraphTraits specialization for our BlockChain graph.

 342 
template <> struct GraphTraits {

 343 
typedef BlockChain NodeType;

 344 
typedef BlockChain::SuccIterator ChildIteratorType;

 345 

 346 
static NodeType *getEntryNode(NodeType *N) { return N; }

 347 
static BlockChain::SuccIterator child_begin(NodeType *N) {

 348 
return BlockChain::SuccIterator(N);

 349 
}

 350 
static BlockChain::SuccIterator child_end(NodeType *N) {

 351 
return BlockChain::SuccIterator::CreateEnd(N);

 352 
}

 353 
};

 354 
}

 355 

 356 
/// \brief Helper to create a new chain for a single BB.

 357 
///

 358 
/// Takes care of growing the Chains, setting up the BlockChain object, and any

 359 
/// debug checking logic.

 360 
/// \returns A pointer to the new BlockChain.

 361 
BlockChain *MachineBlockPlacement::CreateChain(MachineBasicBlock *BB) {

 362 
BlockChain *Chain =

 363 
new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB);

 364 
assert(ActiveChains.insert(Chain));

 365 
return Chain;

 366 
}

 367 

 368 
/// \brief Build a prioritized list of edges.

 369 
///

 370 
/// The priority is determined by the product of the block frequency (how

 371 
/// likely it is to arrive at a particular block) times the probability of

 372 
/// taking this particular edge out of the block. This provides a functionwide

 373 
/// ordering of the edges.

 374 
void MachineBlockPlacement::PrioritizeEdges(MachineFunction &F) {

 375 
assert(Edges.empty() && "Already have an edge list");

 376 
SmallVector Cond; // For AnalyzeBranch.

 377 
BlockChain *RequiredChain = 0;

 378 
for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {

 379 
MachineBasicBlock *From = &*FI;

 380 
// We only consider MBBs with analyzable branches. Even if the analysis

 381 
// fails, if there is no fallthrough, we can still work with the MBB.

 382 
MachineBasicBlock *TBB = 0, *FBB = 0;

 383 
Cond.clear();

 384 
if (TII>AnalyzeBranch(*From, TBB, FBB, Cond) && From>canFallThrough()) {

 385 
// We push all unanalyzed blocks onto a chain eagerly to prevent them

 386 
// from being split later. Create the chain if needed, otherwise just

 387 
// keep track that these blocks reside on it.

 388 
if (!RequiredChain)

 389 
RequiredChain = CreateChain(From);

 390 
else

 391 
BlockToChain[From] = RequiredChain;

 392 
} else {

 393 
// As soon as we find an analyzable branch, add that block to and

 394 
// finalize any required chain that has been started. The required chain

 395 
// is only modeling potentially inexplicable fallthrough, so the first

 396 
// block to have analyzable fallthrough is a knownsafe stopping point.

 397 
if (RequiredChain) {

 398 
BlockToChain[From] = RequiredChain;

 399 
RequiredChain>LastBB = FI;

 400 
RequiredChain = 0;

 401 
}

 402 
}

 403 

 404 
BlockFrequency BaseFrequency = MBFI>getBlockFreq(From);

 405 
for (MachineBasicBlock::succ_iterator SI = From>succ_begin(),

 406 
SE = From>succ_end();

 407 
SI != SE; ++SI) {

 408 
MachineBasicBlock *To = *SI;

 409 
WeightedEdge WE = { BaseFrequency * MBPI>getEdgeProbability(From, To),

 410 
From, To };

 411 
Edges.push_back(WE);

 412 
}

 413 
}

 414 
assert(!RequiredChain && "Never found a terminator for a required chain");

 415 
std::stable_sort(Edges.begin(), Edges.end());

 416 
}

 417 

 418 
/// \brief Build chains of basic blocks along hot paths.

 419 
///

 420 
/// Build chains by trying to merge each pair of blocks from the mostly costly

 421 
/// edge first. This is essentially "Algo2" from the Profile Guided Code

 422 
/// Placement paper. While each node is considered a chain of one block, this

 423 
/// routine lazily build the chain objects themselves so that when possible it

 424 
/// can just merge a block into an existing chain.

 425 
void MachineBlockPlacement::BuildBlockChains() {

 426 
for (SmallVectorImpl::reverse_iterator EI = Edges.rbegin(),

 427 
EE = Edges.rend();

 428 
EI != EE; ++EI) {

 429 
MachineBasicBlock *SourceB = EI>From, *DestB = EI>To;

 430 
if (SourceB == DestB) continue;

 431 

 432 
BlockChain *SourceChain = BlockToChain.lookup(SourceB);

 433 
if (!SourceChain) SourceChain = CreateChain(SourceB);

 434 
BlockChain *DestChain = BlockToChain.lookup(DestB);

 435 
if (!DestChain) DestChain = CreateChain(DestB);

 436 
if (SourceChain == DestChain)

 437 
continue;

 438 

 439 
bool IsSourceTail =

 440 
SourceChain>LastBB == MachineFunction::iterator(SourceB);

 441 
bool IsDestHead =

 442 
DestChain>FirstBB == MachineFunction::iterator(DestB);

 443 

 444 
if (!IsSourceTail  !IsDestHead)

 445 
continue;

 446 

 447 
SourceChain>merge(DestChain);

 448 
assert(ActiveChains.erase(DestChain));

 449 
}

 450 
}

 451 

 452 
/// \brief Prioritize the chains to minimize backedges between chains.

 453 
///

 454 
/// This is the trickiest part of the placement algorithm. Each chain is

 455 
/// a hotpath through a sequence of basic blocks, but there are conditional

 456 
/// branches away from this hot path, and to some other chain. Hardware branch

 457 
/// predictors favor back edges over forward edges, and so it is desirable to

 458 
/// arrange the targets of branches away from a hot path and to some other

 459 
/// chain to come later in the function, making them forward branches, and

 460 
/// helping the branch predictor to predict fallthrough.

 461 
///

 462 
/// In some cases, this is easy. simply topologically walking from the entry

 463 
/// chain through its successors in order would work if there were no cycles

 464 
/// between the chains of blocks, but often there are. In such a case, we first

 465 
/// need to identify the participants in the cycle, and then rank them so that

 466 
/// the linearizing of the chains has the lowest *probability* of causing

 467 
/// a mispredicted branch. To compute the correct rank for a chain, we take the

 468 
/// complement of the branch probability for each branch leading away from the

 469 
/// chain and multiply it by the frequency of the source block for that branch.

 470 
/// This gives us the probability of that particular branch *not* being taken

 471 
/// in this function. The sum of these probabilities for each chain is used as

 472 
/// a rank, so that we order the chain with the highest such sum first.

 473 
/// FIXME: This seems like a good approximation, but there is probably a known

 474 
/// technique for ordering of an SCC given edge weights. It would be good to

 475 
/// use that, or even use its code if possible.

 476 
///

 477 
/// Also notable is that we prioritize the chains from the bottom up, and so

 478 
/// all of the "first" and "before" relationships end up inverted in the code.

 479 
void MachineBlockPlacement::PrioritizeChains(MachineFunction &F) {

 480 
MachineBasicBlock *EntryB = &F.front();

 481 
BlockChain *EntryChain = BlockToChain[EntryB];

 482 
assert(EntryChain && "Missing chain for entry block");

 483 
assert(EntryChain>FirstBB == F.begin() &&

 484 
"Entry block is not the head of the entry block chain");

 485 

 486 
// Form an SCC and walk it from the bottom up.

 487 
SmallPtrSet IsInSCC;

 488 
for (scc_iterator I = scc_begin(EntryChain);

 489 
!I.isAtEnd(); ++I) {

 490 
const std::vector &SCC = *I;

 491 
PChains.insert(PChains.end(), SCC.begin(), SCC.end());

 492 

 493 
// If there is only one chain in the SCC, it's trivially sorted so just

 494 
// bail out early. Sorting the SCC is expensive.

 495 
if (SCC.size() == 1)

 496 
continue;

 497 

 498 
// We work strictly on the PChains range from here on out to maximize

 499 
// locality.

 500 
SmallVectorImpl::iterator SCCEnd = PChains.end(),

 501 
SCCBegin = SCCEnd  SCC.size();

 502 
IsInSCC.clear();

 503 
IsInSCC.insert(SCCBegin, SCCEnd);

 504 

 505 
// Compute the edge frequency of staying in a chain, despite the existency

 506 
// of an edge to some other chain within this SCC.

 507 
for (SmallVectorImpl::iterator SCCI = SCCBegin;

 508 
SCCI != SCCEnd; ++SCCI) {

 509 
BlockChain *Chain = *SCCI;

 510 

 511 
// Special case the entry chain. Regardless of the weights of other

 512 
// chains, the entry chain *must* come first, so move it to the end, and

 513 
// avoid processing that chain at all.

 514 
if (Chain == EntryChain) {

 515 
SCCEnd;

 516 
if (SCCI == SCCEnd) break;

 517 
Chain = *SCCI = *SCCEnd;

 518 
*SCCEnd = EntryChain;

 519 
}

 520 

 521 
// Walk over every block in this chain looking for outbound edges to

 522 
// other chains in this SCC.

 523 
for (MachineFunction::iterator BI = Chain>FirstBB,

 524 
BE = llvm::next(Chain>LastBB);

 525 
BI != BE; ++BI) {

 526 
MachineBasicBlock *From = &*BI;

 527 
for (MachineBasicBlock::succ_iterator SI = BI>succ_begin(),

 528 
SE = BI>succ_end();

 529 
SI != SE; ++SI) {

 530 
MachineBasicBlock *To = *SI;

 531 
if (!To  !IsInSCC.count(BlockToChain[To]))

 532 
continue;

 533 
BranchProbability ComplEdgeProb =

 534 
MBPI>getEdgeProbability(From, To).getCompl();

 535 
Chain>InChainEdgeFrequency +=

 536 
MBFI>getBlockFreq(From) * ComplEdgeProb;

 537 
}

 538 
}

 539 
}

 540 

 541 
// Sort the chains within the SCC according to their edge frequencies,

 542 
// which should make the least costly chain of blocks to misplace be

 543 
// ordered first in the prioritized sequence.

 544 
std::stable_sort(SCCBegin, SCCEnd, ChainPtrPrioritySorter());

 545 
}

 546 
}

 547 

 548 
/// \brief Splice the function blocks together based on the chain priorities.

 549 
///

 550 
/// Each chain is already represented as a contiguous range of blocks in the

 551 
/// function. Simply walk backwards down the prioritized chains and splice in

 552 
/// any chains out of order. Note that the first chain we visit is necessarily

 553 
/// the entry chain. It has no predecessors and so must be the top of the SCC.

 554 
/// Also, we cannot splice any chain prior to the entry chain as we can't

 555 
/// splice any blocks prior to the entry block.

 556 
void MachineBlockPlacement::PlaceBlockChains(MachineFunction &F) {

 557 
assert(!PChains.empty() && "No chains were prioritized");

 558 
assert(PChains.back() == BlockToChain[&F.front()] &&

 559 
"The entry chain must always be the final chain");

 560 

 561 
MachineFunction::iterator InsertPos = F.begin();

 562 
for (SmallVectorImpl::reverse_iterator CI = PChains.rbegin(),

 563 
CE = PChains.rend();

 564 
CI != CE; ++CI) {

 565 
BlockChain *Chain = *CI;

 566 
// Check that we process this chain only once for debugging.

 567 
assert(ActiveChains.erase(Chain) && "Processed a chain twice");

 568 

 569 
// If this chain is already in the right position, just skip past it.

 570 
// Otherwise, splice it into position.

 571 
if (InsertPos == Chain>FirstBB)

 572 
InsertPos = llvm::next(Chain>LastBB);

 573 
else

 574 
F.splice(InsertPos, Chain>FirstBB, llvm::next(Chain>LastBB));

 575 
}

 576 

 577 
// Note that we can't assert this is empty as there may be unreachable blocks

 578 
// in the function.

 579 
#ifndef NDEBUG

 580 
ActiveChains.clear();

 581 
#endif

 582 

 583 
// Now that every block is in its final position, update all of the

 584 
// terminators.

 585 
SmallVector Cond; // For AnalyzeBranch.

 586 
for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {

 587 
// FIXME: It would be awesome of updateTerminator would just return rather

 588 
// than assert when the branch cannot be analyzed in order to remove this

 589 
// boiler plate.

 590 
Cond.clear();

 591 
MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch.

 592 
if (!TII>AnalyzeBranch(*FI, TBB, FBB, Cond))

 593 
FI>updateTerminator();

 594 
}

 595 
}

 596 

 597 
bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) {

 598 
// Check for singleblock functions and skip them.

 599 
if (llvm::next(F.begin()) == F.end())

 600 
return false;

 601 

 602 
MBPI = &getAnalysis();

 603 
MBFI = &getAnalysis();

 604 
TII = F.getTarget().getInstrInfo();

 605 
assert(Edges.empty());

 606 
assert(BlockToChain.empty());

 607 
assert(PChains.empty());

 608 
assert(ActiveChains.empty());

 609 

 610 
PrioritizeEdges(F);

 611 
BuildBlockChains();

 612 
PrioritizeChains(F);

 613 
PlaceBlockChains(F);

 614 

 615 
Edges.clear();

 616 
BlockToChain.clear();

 617 
PChains.clear();

 618 
ChainAllocator.DestroyAll();

 619 

 620 
// We always return true as we have no way to track whether the final order

 621 
// differs from the original order.

 622 
return true;

 623 
}
