363  363 
/// iteration.

364  364 
/// This sets the branch weights for the latch of the recently peeled off loop

365  365 
/// iteration correctly.

366  
/// Our goal is to make sure that:

367  
/// a) The total weight of all the copies of the loop body is preserved.

368  
/// b) The total weight of the loop exit is preserved.

369  
/// c) The body weight is reasonably distributed between the peeled iterations.⏎

 366 
/// Let F is a weight of the edge from latch to header.⏎

 367 
/// Let E is a weight of the edge from latch to exit.

 368 
/// F/(F+E) is a probability to go to loop and E/(F+E) is a probability to

 369 
/// go to exit.

 370 
/// Then, Estimated TripCount = F / E.

 371 
/// For Ith (counting from 0) peeled off iteration we set the the weights for

 372 
/// the peeled latch as (TC  I, 1). It gives us reasonable distribution,

 373 
/// The probability to go to exit 1/(TCI) increases. At the same time

 374 
/// the estimated trip count of remaining loop reduces by I.

 375 
/// To avoid dealing with division rounding we can just multiple both part

 376 
/// of weights to E and use weight as (F  I * E, E).

370  377 
///

371  378 
/// \param Header The copy of the header block that belongs to next iteration.

372  379 
/// \param LatchBR The copy of the latch branch that belongs to this iteration.

373  
/// \param IterNumber The serial number of the iteration that was just

374  
/// peeled off.

375  
/// \param AvgIters The average number of iterations we expect the loop to have.

376  
/// \param[in,out] PeeledHeaderWeight The total number of dynamic loop

377  
/// iterations that are unaccounted for. As an input, it represents the number

378  
/// of times we expect to enter the header of the iteration currently being

379  
/// peeled off. The output is the number of times we expect to enter the

380  
/// header of the next iteration.⏎

 380 
/// \param[in,out] FallThroughWeight The weight of the edge from latch to⏎

 381 
/// header before peeling (in) and after peeled off one iteration (out).

381  382 
static void updateBranchWeights(BasicBlock *Header, BranchInst *LatchBR,

382  
unsigned IterNumber, unsigned AvgIters,

383  
uint64_t &PeeledHeaderWeight) {

384  
if (!PeeledHeaderWeight)⏎

 383 
uint64_t ExitWeight,⏎

 384 
uint64_t &FallThroughWeight) {

 385 
// FallThroughWeight is 0 means that there is no branch weights on original

 386 
// latch block or estimated trip count is zero.

 387 
if (!FallThroughWeight)

385  388 
return;

386  
// FIXME: Pick a more realistic distribution.

387  
// Currently the proportion of weight we assign to the fallthrough

388  
// side of the branch drops linearly with the iteration number, and we use

389  
// a 0.9 fudge factor to make the dropoff less sharp...

390  
uint64_t FallThruWeight =

391  
PeeledHeaderWeight * ((float)(AvgIters  IterNumber) / AvgIters * 0.9);

392  
uint64_t ExitWeight = PeeledHeaderWeight  FallThruWeight;

393  
PeeledHeaderWeight = ExitWeight;

394  389 

395  390 
unsigned HeaderIdx = (LatchBR>getSuccessor(0) == Header ? 0 : 1);

396  391 
MDBuilder MDB(LatchBR>getContext());

397  392 
MDNode *WeightNode =

398  
HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThruWeight)

399  
: MDB.createBranchWeights(FallThruWeight, ExitWeight);⏎

 393 
HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThroughWeight)⏎

 394 
: MDB.createBranchWeights(FallThroughWeight, ExitWeight);

400  395 
LatchBR>setMetadata(LLVMContext::MD_prof, WeightNode);

 396 
FallThroughWeight =

 397 
FallThroughWeight > ExitWeight ? FallThroughWeight  ExitWeight : 1;

401  398 
}

402  399 

403  400 
/// Initialize the weights.

404  401 
///

405  402 
/// \param Header The header block.

406  403 
/// \param LatchBR The latch branch.

407  
/// \param AvgIters The average number of iterations we expect the loop to have.

408  
/// \param[out] ExitWeight The # of times the edge from Latch to Exit is taken.

409  
/// \param[out] CurHeaderWeight The # of times the header is executed.⏎

 404 
/// \param[out] ExitWeight The weight of the edge from Latch to Exit.⏎

 405 
/// \param[out] FallThroughWeight The weight of the edge from Latch to Header.

410  406 
static void initBranchWeights(BasicBlock *Header, BranchInst *LatchBR,

411  
unsigned AvgIters, uint64_t &ExitWeight,

412  
uint64_t &CurHeaderWeight) {⏎

 407 
uint64_t &ExitWeight,⏎

 408 
uint64_t &FallThroughWeight) {

413  409 
uint64_t TrueWeight, FalseWeight;

414  410 
if (!LatchBR>extractProfMetadata(TrueWeight, FalseWeight))

415  411 
return;

416  412 
unsigned HeaderIdx = LatchBR>getSuccessor(0) == Header ? 0 : 1;

417  413 
ExitWeight = HeaderIdx ? TrueWeight : FalseWeight;

418  
// The # of times the loop body executes is the sum of the exit block

419  
// is taken and the # of times the backedges are taken.

420  
CurHeaderWeight = TrueWeight + FalseWeight;⏎

 414 
FallThroughWeight = HeaderIdx ? FalseWeight : TrueWeight;⏎

421  415 
}

422  416 

423  417 
/// Update the weights of original Latch block after peeling off all iterations.

424  418 
///

425  419 
/// \param Header The header block.

426  420 
/// \param LatchBR The latch branch.

427  
/// \param ExitWeight The weight of the edge from Latch to Exit block.

428  
/// \param CurHeaderWeight The # of time the header is executed.⏎

 421 
/// \param ExitWeight The weight of the edge from Latch to Exit.⏎

 422 
/// \param FallThroughWeight The weight of the edge from Latch to Header.

429  423 
static void fixupBranchWeights(BasicBlock *Header, BranchInst *LatchBR,

430  
uint64_t ExitWeight, uint64_t CurHeaderWeight) {

431  
// Adjust the branch weights on the loop exit.

432  
if (!ExitWeight)⏎

 424 
uint64_t ExitWeight,⏎

 425 
uint64_t FallThroughWeight) {

 426 
// FallThroughWeight is 0 means that there is no branch weights on original

 427 
// latch block or estimated trip count is zero.

 428 
if (!FallThroughWeight)

433  429 
return;

434  430 

435  
// The backedge count is the difference of current header weight and

436  
// current loop exit weight. If the current header weight is smaller than

437  
// the current loop exit weight, we mark the loop backedge weight as 1.

438  
uint64_t BackEdgeWeight = 0;

439  
if (ExitWeight < CurHeaderWeight)

440  
BackEdgeWeight = CurHeaderWeight  ExitWeight;

441  
else

442  
BackEdgeWeight = 1;⏎

 431 
// Sets the branch weights on the loop exit.⏎

443  432 
MDBuilder MDB(LatchBR>getContext());

444  433 
unsigned HeaderIdx = LatchBR>getSuccessor(0) == Header ? 0 : 1;

445  434 
MDNode *WeightNode =

446  
HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight)

447  
: MDB.createBranchWeights(BackEdgeWeight, ExitWeight);⏎

 435 
HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThroughWeight)⏎

 436 
: MDB.createBranchWeights(FallThroughWeight, ExitWeight);

448  437 
LatchBR>setMetadata(LLVMContext::MD_prof, WeightNode);

449  438 
}

450  439 


658  647 
// newly created branches.

659  648 
BranchInst *LatchBR =

660  649 
cast(cast(Latch)>getTerminator());

661  
uint64_t ExitWeight = 0, CurHeaderWeight = 0;

662  
initBranchWeights(Header, LatchBR, PeelCount, ExitWeight, CurHeaderWeight);⏎

 650 
uint64_t ExitWeight = 0, FallThroughWeight = 0;⏎

 651 
initBranchWeights(Header, LatchBR, ExitWeight, FallThroughWeight);

663  652 

664  653 
// For each peeledoff iteration, make a copy of the loop.

665  654 
for (unsigned Iter = 0; Iter < PeelCount; ++Iter) {

666  655 
SmallVector NewBlocks;

667  656 
ValueToValueMapTy VMap;

668  

669  
// Subtract the exit weight from the current header weight  the exit

670  
// weight is exactly the weight of the previous iteration's header.

671  
// FIXME: due to the way the distribution is constructed, we need a

672  
// guard here to make sure we don't end up with nonpositive weights.

673  
if (ExitWeight < CurHeaderWeight)

674  
CurHeaderWeight = ExitWeight;

675  
else

676  
CurHeaderWeight = 1;

677  657 

678  658 
cloneLoopBlocks(L, Iter, InsertTop, InsertBot, ExitEdges, NewBlocks,

679  659 
LoopBlocks, VMap, LVMap, DT, LI);


696  676 
}

697  677 

698  678 
auto *LatchBRCopy = cast(VMap[LatchBR]);

699  
updateBranchWeights(InsertBot, LatchBRCopy, Iter,

700  
PeelCount, ExitWeight);⏎

 679 
updateBranchWeights(InsertBot, LatchBRCopy, ExitWeight, FallThroughWeight);⏎

701  680 
// Remove Loop metadata from the latch branch instruction

702  681 
// because it is not the Loop's latch branch anymore.

703  682 
LatchBRCopy>setMetadata(LLVMContext::MD_loop, nullptr);


723  702 
PHI>setIncomingValueForBlock(NewPreHeader, NewVal);

724  703 
}

725  704 

726  
fixupBranchWeights(Header, LatchBR, ExitWeight, CurHeaderWeight);⏎

 705 
fixupBranchWeights(Header, LatchBR, ExitWeight, FallThroughWeight);⏎

727  706 

728  707 
if (Loop *ParentLoop = L>getParentLoop())

729  708 
L = ParentLoop;
