/// iteration.

/// This sets the branch weights for the latch of the recently peeled off loop

/// iteration correctly.

/// Our goal is to make sure that:

/// a) The total weight of all the copies of the loop body is preserved.

/// b) The total weight of the loop exit is preserved.

/// c) The body weight is reasonably distributed between the peeled iterations.⏎

/// Let F is a weight of the edge from latch to header.⏎

/// Let E is a weight of the edge from latch to exit.

/// F/(F+E) is a probability to go to loop and E/(F+E) is a probability to

/// go to exit.

/// Then, Estimated TripCount = F / E.

/// For Ith (counting from 0) peeled off iteration we set the the weights for

/// the peeled latch as (TC  I, 1). It gives us reasonable distribution,

/// The probability to go to exit 1/(TCI) increases. At the same time

/// the estimated trip count of remaining loop reduces by I.

/// To avoid dealing with division rounding we can just multiple both part

/// of weights to E and use weight as (F  I * E, E).

///

/// \param Header The copy of the header block that belongs to next iteration.

/// \param LatchBR The copy of the latch branch that belongs to this iteration.

/// \param IterNumber The serial number of the iteration that was just

/// peeled off.

/// \param AvgIters The average number of iterations we expect the loop to have.

/// \param[in,out] PeeledHeaderWeight The total number of dynamic loop

/// iterations that are unaccounted for. As an input, it represents the number

/// of times we expect to enter the header of the iteration currently being

/// peeled off. The output is the number of times we expect to enter the

/// header of the next iteration.⏎

/// \param[in,out] FallThroughWeight The weight of the edge from latch to⏎

/// header before peeling (in) and after peeled off one iteration (out).

static void updateBranchWeights(BasicBlock *Header, BranchInst *LatchBR,

unsigned IterNumber, unsigned AvgIters,

uint64_t &PeeledHeaderWeight) {

if (!PeeledHeaderWeight)⏎

uint64_t ExitWeight,⏎

uint64_t &FallThroughWeight) {

 385 
// FallThroughWeight is 0 means that there is no branch weights on original

 386 
// latch block or estimated trip count is zero.

 387 
if (!FallThroughWeight)

return;

// FIXME: Pick a more realistic distribution.

// Currently the proportion of weight we assign to the fallthrough

// side of the branch drops linearly with the iteration number, and we use

// a 0.9 fudge factor to make the dropoff less sharp...

uint64_t FallThruWeight =

PeeledHeaderWeight * ((float)(AvgIters  IterNumber) / AvgIters * 0.9);

uint64_t ExitWeight = PeeledHeaderWeight  FallThruWeight;

PeeledHeaderWeight = ExitWeight;

unsigned HeaderIdx = (LatchBR>getSuccessor(0) == Header ? 0 : 1);

MDBuilder MDB(LatchBR>getContext());

MDNode *WeightNode =

HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThruWeight)

: MDB.createBranchWeights(FallThruWeight, ExitWeight);⏎

HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThroughWeight)⏎

: MDB.createBranchWeights(FallThroughWeight, ExitWeight);

LatchBR>setMetadata(LLVMContext::MD_prof, WeightNode);

FallThroughWeight =

FallThroughWeight > ExitWeight ? FallThroughWeight  ExitWeight : 1;

}

/// Initialize the weights.

///

/// \param Header The header block.

/// \param LatchBR The latch branch.

/// \param AvgIters The average number of iterations we expect the loop to have.

/// \param[out] ExitWeight The # of times the edge from Latch to Exit is taken.

/// \param[out] CurHeaderWeight The # of times the header is executed.⏎

/// \param[out] ExitWeight The weight of the edge from Latch to Exit.⏎

/// \param[out] FallThroughWeight The weight of the edge from Latch to Header.

static void initBranchWeights(BasicBlock *Header, BranchInst *LatchBR,

unsigned AvgIters, uint64_t &ExitWeight,

uint64_t &CurHeaderWeight) {⏎

uint64_t &ExitWeight,⏎

uint64_t &FallThroughWeight) {

uint64_t TrueWeight, FalseWeight;

if (!LatchBR>extractProfMetadata(TrueWeight, FalseWeight))

return;

unsigned HeaderIdx = LatchBR>getSuccessor(0) == Header ? 0 : 1;

ExitWeight = HeaderIdx ? TrueWeight : FalseWeight;

// The # of times the loop body executes is the sum of the exit block

// is taken and the # of times the backedges are taken.

CurHeaderWeight = TrueWeight + FalseWeight;⏎

FallThroughWeight = HeaderIdx ? FalseWeight : TrueWeight;⏎

}

/// Update the weights of original Latch block after peeling off all iterations.

///

/// \param Header The header block.

/// \param LatchBR The latch branch.

/// \param ExitWeight The weight of the edge from Latch to Exit block.

/// \param CurHeaderWeight The # of time the header is executed.⏎

/// \param ExitWeight The weight of the edge from Latch to Exit.⏎

/// \param FallThroughWeight The weight of the edge from Latch to Header.

static void fixupBranchWeights(BasicBlock *Header, BranchInst *LatchBR,

uint64_t ExitWeight, uint64_t CurHeaderWeight) {

// Adjust the branch weights on the loop exit.

if (!ExitWeight)⏎

uint64_t ExitWeight,⏎

uint64_t FallThroughWeight) {

// FallThroughWeight is 0 means that there is no branch weights on original

// latch block or estimated trip count is zero.

if (!FallThroughWeight)

return;

// The backedge count is the difference of current header weight and

// current loop exit weight. If the current header weight is smaller than

// the current loop exit weight, we mark the loop backedge weight as 1.

uint64_t BackEdgeWeight = 0;

if (ExitWeight < CurHeaderWeight)

BackEdgeWeight = CurHeaderWeight  ExitWeight;

else

BackEdgeWeight = 1;⏎

// Sets the branch weights on the loop exit.⏎

MDBuilder MDB(LatchBR>getContext());

unsigned HeaderIdx = LatchBR>getSuccessor(0) == Header ? 0 : 1;

MDNode *WeightNode =

HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight)

: MDB.createBranchWeights(BackEdgeWeight, ExitWeight);⏎

HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThroughWeight)⏎

: MDB.createBranchWeights(FallThroughWeight, ExitWeight);

LatchBR>setMetadata(LLVMContext::MD_prof, WeightNode);

}

// newly created branches.

BranchInst *LatchBR =

cast(cast(Latch)>getTerminator());

662  
uint64_t ExitWeight = 0, FallThroughWeight = 0;⏎

initBranchWeights(Header, LatchBR, ExitWeight, FallThroughWeight);

// For each peeledoff iteration, make a copy of the loop.

for (unsigned Iter = 0; Iter < PeelCount; ++Iter) {

SmallVector NewBlocks;

ValueToValueMapTy VMap;

670  
671  
672  
673  
674  
675  
676  
cloneLoopBlocks(L, Iter, InsertTop, InsertBot, ExitEdges, NewBlocks,

}

auto *LatchBRCopy = cast(VMap[LatchBR]);

700  
updateBranchWeights(InsertBot, LatchBRCopy, ExitWeight, FallThroughWeight);⏎

// Remove Loop metadata from the latch branch instruction

// because it is not the Loop's latch branch anymore.

PHI>setIncomingValueForBlock(NewPreHeader, NewVal);

}

fixupBranchWeights(Header, LatchBR, ExitWeight, CurHeaderWeight);⏎

fixupBranchWeights(Header, LatchBR, ExitWeight, FallThroughWeight);⏎

if (Loop *ParentLoop = L>getParentLoop())

L = ParentLoop;
