llvm.org GIT mirror llvm / 116e080
Refactor synthetic profile count computation. NFC. Summary: Instead of using two separate callbacks to return the entry count and the relative block frequency, use a single callback to return callsite count. This would allow better supporting hybrid mode in the future as the count of callsite need not always be derived from entry count (as in sample PGO). Reviewers: davidxl Subscribers: mehdi_amini, steven_wu, dexonsmith, dang, llvm-commits Differential Revision: https://reviews.llvm.org/D56464 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@350755 91177308-0d34-0410-b5e6-96231b3b80d8 Easwaran Raman 8 months ago
4 changed file(s) with 49 addition(s) and 44 deletion(s). Raw diff Collapse all Expand all
3535 using EdgeRef = typename CGT::EdgeRef;
3636 using SccTy = std::vector;
3737
38 using GetRelBBFreqTy = function_ref(EdgeRef)>;
39 using GetCountTy = function_ref;
40 using AddCountTy = function_ref;
38 // Not all EdgeRef have information about the source of the edge. Hence
39 // NodeRef corresponding to the source of the EdgeRef is explicitly passed.
40 using GetProfCountTy = function_ref(NodeRef, EdgeRef)>;
41 using AddCountTy = function_ref;
4142
42 static void propagate(const CallGraphType &CG, GetRelBBFreqTy GetRelBBFreq,
43 GetCountTy GetCount, AddCountTy AddCount);
43 static void propagate(const CallGraphType &CG, GetProfCountTy GetProfCount,
44 AddCountTy AddCount);
4445
4546 private:
46 static void propagateFromSCC(const SccTy &SCC, GetRelBBFreqTy GetRelBBFreq,
47 GetCountTy GetCount, AddCountTy AddCount);
47 static void propagateFromSCC(const SccTy &SCC, GetProfCountTy GetProfCount,
48 AddCountTy AddCount);
4849 };
4950 } // namespace llvm
5051
2525 // Given an SCC, propagate entry counts along the edge of the SCC nodes.
2626 template
2727 void SyntheticCountsUtils::propagateFromSCC(
28 const SccTy &SCC, GetRelBBFreqTy GetRelBBFreq, GetCountTy GetCount,
29 AddCountTy AddCount) {
28 const SccTy &SCC, GetProfCountTy GetProfCount, AddCountTy AddCount) {
3029
3130 DenseSet SCCNodes;
3231 SmallVector, 8> SCCEdges, NonSCCEdges;
5352 // This ensures that the order of
5453 // traversal of nodes within the SCC doesn't affect the final result.
5554
56 DenseMapuint64_t> AdditionalCounts;
55 DenseMapScaled64> AdditionalCounts;
5756 for (auto &E : SCCEdges) {
58 auto OptRelFreq = GetRelBBFreq(E.second);
59 if (!OptRelFreq)
57 auto OptProfCount = GetProfCount(E.first, E.second);
58 if (!OptProfCount)
6059 continue;
61 Scaled64 RelFreq = OptRelFreq.getValue();
62 auto Caller = E.first;
6360 auto Callee = CGT::edge_dest(E.second);
64 RelFreq *= Scaled64(GetCount(Caller), 0);
65 uint64_t AdditionalCount = RelFreq.toInt();
66 AdditionalCounts[Callee] += AdditionalCount;
61 AdditionalCounts[Callee] += OptProfCount.getValue();
6762 }
6863
6964 // Update the counts for the nodes in the SCC.
7267
7368 // Now update the counts for nodes outside the SCC.
7469 for (auto &E : NonSCCEdges) {
75 auto OptRelFreq = GetRelBBFreq(E.second);
76 if (!OptRelFreq)
70 auto OptProfCount = GetProfCount(E.first, E.second);
71 if (!OptProfCount)
7772 continue;
78 Scaled64 RelFreq = OptRelFreq.getValue();
79 auto Caller = E.first;
8073 auto Callee = CGT::edge_dest(E.second);
81 RelFreq *= Scaled64(GetCount(Caller), 0);
82 AddCount(Callee, RelFreq.toInt());
74 AddCount(Callee, OptProfCount.getValue());
8375 }
8476 }
8577
9385
9486 template
9587 void SyntheticCountsUtils::propagate(const CallGraphType &CG,
96 GetRelBBFreqTy GetRelBBFreq,
97 GetCountTy GetCount,
88 GetProfCountTy GetProfCount,
9889 AddCountTy AddCount) {
9990 std::vector SCCs;
10091
10697 // The scc iterator returns the scc in bottom-up order, so reverse the SCCs
10798 // and call propagateFromSCC.
10899 for (auto &SCC : reverse(SCCs))
109 propagateFromSCC(SCC, GetRelBBFreq, GetCount, AddCount);
100 propagateFromSCC(SCC, GetProfCount, AddCount);
110101 }
111102
112103 template class llvm::SyntheticCountsUtils;
5959 return UINT64_C(0);
6060 }
6161 };
62 auto AddToEntryCount = [](ValueInfo V, uint64_t New) {
62 auto AddToEntryCount = [](ValueInfo V, Scaled64 New) {
6363 if (!V.getSummaryList().size())
6464 return;
6565 for (auto &GVS : V.getSummaryList()) {
6666 auto S = GVS.get()->getBaseObject();
6767 auto *F = cast(S);
68 F->setEntryCount(SaturatingAdd(F->entryCount(), New));
68 F->setEntryCount(
69 SaturatingAdd(F->entryCount(), New.template toInt()));
6970 }
7071 };
7172
73 auto GetProfileCount = [&](ValueInfo V, FunctionSummary::EdgeTy &Edge) {
74 auto RelFreq = GetCallSiteRelFreq(Edge);
75 Scaled64 EC(GetEntryCount(V), 0);
76 return RelFreq * EC;
77 };
7278 // After initializing the counts in initializeCounts above, the counts have to
7379 // be propagated across the combined callgraph.
7480 // SyntheticCountsUtils::propagate takes care of this propagation on any
7581 // callgraph that specialized GraphTraits.
76 SyntheticCountsUtils::propagate(
77 &Index, GetCallSiteRelFreq, GetEntryCount, AddToEntryCount);
82 SyntheticCountsUtils::propagate(&Index, GetProfileCount,
83 AddToEntryCount);
7884 Index.setHasSyntheticEntryCounts();
7985 }
2929 #include "llvm/ADT/STLExtras.h"
3030 #include "llvm/Analysis/BlockFrequencyInfo.h"
3131 #include "llvm/Analysis/CallGraph.h"
32 #include "llvm/Analysis/ProfileSummaryInfo.h"
3233 #include "llvm/Analysis/SyntheticCountsUtils.h"
3334 #include "llvm/IR/CallSite.h"
3435 #include "llvm/IR/Function.h"
9798 ModuleAnalysisManager &MAM) {
9899 FunctionAnalysisManager &FAM =
99100 MAM.getResult(M).getManager();
100 DenseMapuint64_t> Counts;
101 DenseMapScaled64> Counts;
101102 // Set initial entry counts.
102 initializeCounts(M, [&](Function *F, uint64_t Count) { Counts[F] = Count; });
103 initializeCounts(
104 M, [&](Function *F, uint64_t Count) { Counts[F] = Scaled64(Count, 0); });
103105
104 // Compute the relative block frequency for a call edge. Use scaled numbers
105 // and not integers since the relative block frequency could be less than 1.
106 auto GetCallSiteRelFreq = [&](const CallGraphNode::CallRecord &Edge) {
106 // Edge includes information about the source. Hence ignore the first
107 // parameter.
108 auto GetCallSiteProfCount = [&](const CallGraphNode *,
109 const CallGraphNode::CallRecord &Edge) {
107110 Optional Res = None;
108111 if (!Edge.first)
109112 return Res;
111114 CallSite CS(cast(Edge.first));
112115 Function *Caller = CS.getCaller();
113116 auto &BFI = FAM.getResult(*Caller);
117
118 // Now compute the callsite count from relative frequency and
119 // entry count:
114120 BasicBlock *CSBB = CS.getInstruction()->getParent();
115121 Scaled64 EntryFreq(BFI.getEntryFreq(), 0);
116 Scaled64 BBFreq(BFI.getBlockFreq(CSBB).getFrequency(), 0);
117 BBFreq /= EntryFreq;
118 return Optional(BBFreq);
122 Scaled64 BBCount(BFI.getBlockFreq(CSBB).getFrequency(), 0);
123 BBCount /= EntryFreq;
124 BBCount *= Counts[Caller];
125 return Optional(BBCount);
119126 };
120127
121128 CallGraph CG(M);
122129 // Propgate the entry counts on the callgraph.
123130 SyntheticCountsUtils::propagate(
124 &CG, GetCallSiteRelFreq,
125 [&](const CallGraphNode *N) { return Counts[N->getFunction()]; },
126 [&](const CallGraphNode *N, uint64_t New) {
131 &CG, GetCallSiteProfCount, [&](const CallGraphNode *N, Scaled64 New) {
127132 auto F = N->getFunction();
128133 if (!F || F->isDeclaration())
129134 return;
135
130136 Counts[F] += New;
131137 });
132138
133139 // Set the counts as metadata.
134 for (auto Entry : Counts)
135 Entry.first->setEntryCount(
136 ProfileCount(Entry.second, Function::PCT_Synthetic));
140 for (auto Entry : Counts) {
141 Entry.first->setEntryCount(ProfileCount(
142 Entry.second.template toInt(), Function::PCT_Synthetic));
143 }
137144
138145 return PreservedAnalyses::all();
139146 }