llvm.org GIT mirror llvm / f423025
[MachineSink+PGO] Teach MachineSink to use BlockFrequencyInfo Machine Sink uses loop depth information to select between successors BBs to sink machine instructions into, where BBs within smaller loop depths are preferable. This patch adds support for choosing between successors by using profile information from BlockFrequencyInfo instead, whenever the information is available. Tested it under SPEC2006 train (average of 30 runs for each program); ~1.5% execution speedup in average on x86-64 darwin. <rdar://problem/18021659> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218472 91177308-0d34-0410-b5e6-96231b3b80d8 Bruno Cardoso Lopes 5 years ago
2 changed file(s) with 68 addition(s) and 6 deletion(s). Raw diff Collapse all Expand all
2020 #include "llvm/ADT/SmallSet.h"
2121 #include "llvm/ADT/Statistic.h"
2222 #include "llvm/Analysis/AliasAnalysis.h"
23 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
2324 #include "llvm/CodeGen/MachineDominators.h"
2425 #include "llvm/CodeGen/MachineLoopInfo.h"
2526 #include "llvm/CodeGen/MachinePostDominators.h"
4041 cl::desc("Split critical edges during machine sinking"),
4142 cl::init(true), cl::Hidden);
4243
44 static cl::opt
45 UseBlockFreqInfo("machine-sink-bfi",
46 cl::desc("Use block frequency info to find successors to sink"),
47 cl::init(true), cl::Hidden);
48
49
4350 STATISTIC(NumSunk, "Number of machine instructions sunk");
4451 STATISTIC(NumSplit, "Number of critical edges split");
4552 STATISTIC(NumCoalesces, "Number of copies coalesced");
5259 MachineDominatorTree *DT; // Machine dominator tree
5360 MachinePostDominatorTree *PDT; // Machine post dominator tree
5461 MachineLoopInfo *LI;
62 const MachineBlockFrequencyInfo *MBFI;
5563 AliasAnalysis *AA;
5664
5765 // Remember which edges have been considered for breaking.
8088 AU.addPreserved();
8189 AU.addPreserved();
8290 AU.addPreserved();
91 if (UseBlockFreqInfo)
92 AU.addRequired();
8393 }
8494
8595 void releaseMemory() override {
246256 DT = &getAnalysis();
247257 PDT = &getAnalysis();
248258 LI = &getAnalysis();
259 MBFI = UseBlockFreqInfo ? &getAnalysis() : nullptr;
249260 AA = &getAnalysis();
250261
251262 bool EverMadeChange = false;
565576 }
566577
567578 // Otherwise, we should look at all the successors and decide which one
568 // we should sink to.
569 // We give successors with smaller loop depth higher priority.
570 SmallVector Succs(MBB->succ_begin(), MBB->succ_end());
571 // Sort Successors according to their loop depth.
579 // we should sink to. If we have reliable block frequency information
580 // (frequency != 0) available, give successors with smaller frequencies
581 // higher priority, otherwise prioritize smaller loop depths.
582 SmallVector Succs(MBB->succ_begin(),
583 MBB->succ_end());
584 // Sort Successors according to their loop depth or block frequency info.
572585 std::stable_sort(
573586 Succs.begin(), Succs.end(),
574 [this](const MachineBasicBlock *LHS, const MachineBasicBlock *RHS) {
575 return LI->getLoopDepth(LHS) < LI->getLoopDepth(RHS);
587 [this](const MachineBasicBlock *L, const MachineBasicBlock *R) {
588 uint64_t LHSFreq = MBFI ? MBFI->getBlockFreq(L).getFrequency() : 0;
589 uint64_t RHSFreq = MBFI ? MBFI->getBlockFreq(R).getFrequency() : 0;
590 bool HasBlockFreq = LHSFreq != 0 && RHSFreq != 0;
591 return HasBlockFreq ? LHSFreq < RHSFreq
592 : LI->getLoopDepth(L) < LI->getLoopDepth(R);
576593 });
577594 for (SmallVectorImpl::iterator SI = Succs.begin(),
578595 E = Succs.end(); SI != E; ++SI) {
0 ; RUN: llc -disable-machine-licm -machine-sink-bfi=true -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_BFI
1 ; RUN: llc -disable-machine-licm -machine-sink-bfi=false -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_NOBFI
2
3 ; Test that by changing BlockFrequencyInfo we change the order in which
4 ; machine-sink looks for sucessor blocks. By not using BFI, both G and B
5 ; have the same loop depth and no instructions is sinked - B is selected but
6 ; can't be used as to avoid breaking a non profitable critical edge. By using
7 ; BFI, "mul" is sinked into the less frequent block G.
8 define i32 @sink_freqinfo(i32 %a, i32 %b) nounwind uwtable ssp {
9 ; MSINK_BFI-LABEL: sink_freqinfo
10 ; MSINK_BFI: jl
11 ; MSINK_BFI-NEXT: ## BB#
12 ; MSINK_BFI-NEXT: imull
13
14 ; MSINK_NOBFI-LABEL: sink_freqinfo
15 ; MSINK_NOBFI: imull
16 ; MSINK_NOBFI: jl
17 entry:
18 br label %B
19
20 B:
21 %ee = phi i32 [ 0, %entry ], [ %inc, %F ]
22 %xx = sub i32 %a, %ee
23 %cond0 = icmp slt i32 %xx, 0
24 br i1 %cond0, label %F, label %exit, !prof !0
25
26 F:
27 %inc = add nsw i32 %xx, 2
28 %aa = mul nsw i32 %b, %inc
29 %exitcond = icmp slt i32 %inc, %a
30 br i1 %exitcond, label %B, label %G, !prof !1
31
32 G:
33 %ii = add nsw i32 %aa, %a
34 %ll = add i32 %b, 45
35 %exitcond2 = icmp sge i32 %ii, %b
36 br i1 %exitcond2, label %G, label %exit, !prof !2
37
38 exit:
39 ret i32 0
40 }
41
42 !0 = metadata !{metadata !"branch_weights", i32 4, i32 1}
43 !1 = metadata !{metadata !"branch_weights", i32 128, i32 1}
44 !2 = metadata !{metadata !"branch_weights", i32 1, i32 1}