llvm.org GIT mirror llvm / e2f3426
[ThinLTO] Perform profile-guided indirect call promotion Summary: To enable profile-guided indirect call promotion in ThinLTO mode, we simply add call graph edges for each profitable target from the profile to the summaries, then the summary-guided importing will consider the callee for importing as usual. Also we need to enable the indirect call promotion pass creation in the PassManagerBuilder when PerformThinLTO=true (we are in the ThinLTO backend), so that the newly imported functions are considered for promotion in the backends. The IC promotion profiles refer to callees by GUID, which required adding GUIDs to the per-module VST in bitcode (and assigning them valueIds similar to how they are assigned valueIds in the combined index). Reviewers: mehdi_amini, xur Subscribers: mehdi_amini, davidxl, llvm-commits Differential Revision: http://reviews.llvm.org/D21932 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@275707 91177308-0d34-0410-b5e6-96231b3b80d8 Teresa Johnson 3 years ago
6 changed file(s) with 144 addition(s) and 14 deletion(s). Raw diff Collapse all Expand all
7979 assert(Kind == VI_Value && "Not a Value type");
8080 return TheValue.V;
8181 }
82 bool isGUID() const { return Kind == VI_GUID; }
8283 };
8384
8485 /// \brief Function and variable summary information to aid decisions and
258259 /// count (across all calls from this function) or 0 if no PGO.
259260 void addCallGraphEdge(GlobalValue::GUID CalleeGUID, CalleeInfo Info) {
260261 CallGraphEdgeList.push_back(std::make_pair(CalleeGUID, Info));
262 }
263
264 /// Record a call graph edge from this function to each function GUID recorded
265 /// in \p CallGraphEdges.
266 void
267 addCallGraphEdges(DenseMap &CallGraphEdges) {
268 for (auto &EI : CallGraphEdges)
269 addCallGraphEdge(EI.first, EI.second);
261270 }
262271
263272 /// Record a call graph edge from this function to the function identified
1515 #include "llvm/Analysis/BlockFrequencyInfo.h"
1616 #include "llvm/Analysis/BlockFrequencyInfoImpl.h"
1717 #include "llvm/Analysis/BranchProbabilityInfo.h"
18 #include "llvm/Analysis/IndirectCallPromotionAnalysis.h"
1819 #include "llvm/Analysis/LoopInfo.h"
1920 #include "llvm/IR/CallSite.h"
2021 #include "llvm/IR/Dominators.h"
7273 // Map from callee ValueId to profile count. Used to accumulate profile
7374 // counts for all static calls to a given callee.
7475 DenseMap CallGraphEdges;
76 DenseMap IndirectCallEdges;
7577 DenseSet RefEdges;
78 ICallPromotionAnalysis ICallAnalysis;
7679
7780 SmallPtrSet Visited;
7881 for (const BasicBlock &BB : F)
8285
8386 if (auto CS = ImmutableCallSite(&I)) {
8487 auto *CalledFunction = CS.getCalledFunction();
85 if (CalledFunction && CalledFunction->hasName() &&
86 !CalledFunction->isIntrinsic()) {
87 auto ScaledCount = BFI ? BFI->getBlockProfileCount(&BB) : None;
88 auto *CalleeId =
89 M->getValueSymbolTable().lookup(CalledFunction->getName());
90 CallGraphEdges[CalleeId] +=
91 (ScaledCount ? ScaledCount.getValue() : 0);
88 // Check if this is a direct call to a known function.
89 if (CalledFunction) {
90 if (CalledFunction->hasName() && !CalledFunction->isIntrinsic()) {
91 auto ScaledCount = BFI ? BFI->getBlockProfileCount(&BB) : None;
92 auto *CalleeId =
93 M->getValueSymbolTable().lookup(CalledFunction->getName());
94 CallGraphEdges[CalleeId] +=
95 (ScaledCount ? ScaledCount.getValue() : 0);
96 }
97 } else {
98 // Otherwise, check for an indirect call (call to a non-const value
99 // that isn't an inline assembly call).
100 const CallInst *CI = dyn_cast(&I);
101 if (CS.getCalledValue() && !isa(CS.getCalledValue()) &&
102 !(CI && CI->isInlineAsm())) {
103 uint32_t NumVals, NumCandidates;
104 uint64_t TotalCount;
105 auto CandidateProfileData =
106 ICallAnalysis.getPromotionCandidatesForInstruction(
107 &I, NumVals, TotalCount, NumCandidates);
108 for (auto &Candidate : CandidateProfileData)
109 IndirectCallEdges[Candidate.Value] += Candidate.Count;
110 }
92111 }
93112 }
94113 findRefEdges(&I, RefEdges, Visited);
98117 std::unique_ptr FuncSummary =
99118 llvm::make_unique(Flags, NumInsts);
100119 FuncSummary->addCallGraphEdges(CallGraphEdges);
120 FuncSummary->addCallGraphEdges(IndirectCallEdges);
101121 FuncSummary->addRefEdges(RefEdges);
102122 Index->addGlobalValueSummary(F.getName(), std::move(FuncSummary));
103123 }
118118 /// The start bit of the module block, for use in generating a module hash
119119 uint64_t BitcodeStartBit = 0;
120120
121 /// Map that holds the correspondence between GUIDs in the summary index,
122 /// that came from indirect call profiles, and a value id generated by this
123 /// class to use in the VST and summary block records.
124 std::map GUIDToValueIdMap;
125
126 /// Tracks the last value id recorded in the GUIDToValueMap.
127 unsigned GlobalValueId;
128
121129 public:
122130 /// Constructs a ModuleBitcodeWriter object for the given Module,
123131 /// writing to the provided \p Buffer.
131139 // will start at the bitcode, and we need the offset of the VST
132140 // to line up.
133141 BitcodeStartBit = Stream.GetCurrentBitNo();
142
143 // Assign ValueIds to any callee values in the index that came from
144 // indirect call profiles and were recorded as a GUID not a Value*
145 // (which would have been assigned an ID by the ValueEnumerator).
146 // The starting ValueId is just after the number of values in the
147 // ValueEnumerator, so that they can be emitted in the VST.
148 GlobalValueId = VE.getValues().size();
149 if (Index)
150 for (const auto &GUIDSummaryLists : *Index)
151 // Examine all summaries for this GUID.
152 for (auto &Summary : GUIDSummaryLists.second)
153 if (auto FS = dyn_cast(Summary.get()))
154 // For each call in the function summary, see if the call
155 // is to a GUID (which means it is for an indirect call,
156 // otherwise we would have a Value for it). If so, synthesize
157 // a value id.
158 for (auto &CallEdge : FS->calls())
159 if (CallEdge.first.isGUID())
160 assignValueId(CallEdge.first.getGUID());
134161 }
135162
136163 private:
259286 unsigned FSModRefsAbbrev);
260287 void writePerModuleGlobalValueSummary();
261288 void writeModuleHash(size_t BlockStartPos);
289
290 void assignValueId(GlobalValue::GUID ValGUID) {
291 GUIDToValueIdMap[ValGUID] = ++GlobalValueId;
292 }
293 unsigned getValueId(GlobalValue::GUID ValGUID) {
294 const auto &VMI = GUIDToValueIdMap.find(ValGUID);
295 assert(VMI != GUIDToValueIdMap.end());
296 return VMI->second;
297 }
298 // Helper to get the valueId for the type of value recorded in VI.
299 unsigned getValueId(ValueInfo VI) {
300 if (VI.isGUID())
301 return getValueId(VI.getGUID());
302 return VE.getValueID(VI.getValue());
303 }
304 std::map &valueIds() { return GUIDToValueIdMap; }
262305 };
263306
264307 /// Class to manage the bitcode writing for a combined index.
27062749 unsigned FnEntry8BitAbbrev;
27072750 unsigned FnEntry7BitAbbrev;
27082751 unsigned FnEntry6BitAbbrev;
2752 unsigned GUIDEntryAbbrev;
27092753 if (IsModuleLevel && hasVSTOffsetPlaceholder()) {
27102754 // 8-bit fixed-width VST_CODE_FNENTRY function strings.
27112755 BitCodeAbbrev *Abbv = new BitCodeAbbrev();
27332777 Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
27342778 Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
27352779 FnEntry6BitAbbrev = Stream.EmitAbbrev(Abbv);
2780
2781 // FIXME: Change the name of this record as it is now used by
2782 // the per-module index as well.
2783 Abbv = new BitCodeAbbrev();
2784 Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_COMBINED_ENTRY));
2785 Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
2786 Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // refguid
2787 GUIDEntryAbbrev = Stream.EmitAbbrev(Abbv);
27362788 }
27372789
27382790 // FIXME: Set up the abbrev, we know how many values there are!
27392791 // FIXME: We know if the type names can use 7-bit ascii.
2740 SmallVectornsigned, 64> NameVals;
2792 SmallVectorint64_t, 64> NameVals;
27412793
27422794 for (const ValueName &Name : VST) {
27432795 // Figure out the encoding to use for the name.
27982850 Stream.EmitRecord(Code, NameVals, AbbrevToUse);
27992851 NameVals.clear();
28002852 }
2853 // Emit any GUID valueIDs created for indirect call edges into the
2854 // module-level VST.
2855 if (IsModuleLevel && hasVSTOffsetPlaceholder())
2856 for (const auto &GI : valueIds()) {
2857 NameVals.push_back(GI.second);
2858 NameVals.push_back(GI.first);
2859 Stream.EmitRecord(bitc::VST_CODE_COMBINED_ENTRY, NameVals,
2860 GUIDEntryAbbrev);
2861 NameVals.clear();
2862 }
28012863 Stream.ExitBlock();
28022864 }
28032865
32193281 std::sort(Calls.begin(), Calls.end(),
32203282 [this](const FunctionSummary::EdgeTy &L,
32213283 const FunctionSummary::EdgeTy &R) {
3222 return VE.getValueID(L.first.getValue()) <
3223 VE.getValueID(R.first.getValue());
3284 return getValueId(L.first) < getValueId(R.first);
32243285 });
32253286 bool HasProfileData = F.getEntryCount().hasValue();
32263287 for (auto &ECI : Calls) {
3227 NameVals.push_back(VE.getValueID(ECI.first.getValue()));
3288 NameVals.push_back(getValueId(ECI.first));
32283289 assert(ECI.second.CallsiteCount > 0 && "Expected at least one callsite");
32293290 NameVals.push_back(ECI.second.CallsiteCount);
32303291 if (HasProfileData)
406406 /// PGO instrumentation is added during the compile phase for ThinLTO, do
407407 /// not run it a second time
408408 addPGOInstrPasses(MPM);
409 // Indirect call promotion that promotes intra-module targets only.
410 MPM.add(createPGOIndirectCallPromotionLegacyPass());
411 }
409 }
410
411 // Indirect call promotion that promotes intra-module targets only.
412 MPM.add(createPGOIndirectCallPromotionLegacyPass());
412413
413414 if (EnableNonLTOGlobalsModRef)
414415 // We add a module alias analysis pass here. In part due to bugs in the
0 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
1 target triple = "x86_64-unknown-linux-gnu"
2
3 define void @a() {
4 entry:
5 ret void
6 }
0 ; Do setup work for all below tests: generate bitcode and combined index
1 ; RUN: opt -module-summary %s -o %t.bc
2 ; RUN: opt -module-summary %p/Inputs/thinlto_indirect_call_promotion.ll -o %t2.bc
3 ; RUN: llvm-lto -thinlto -o %t3 %t.bc %t2.bc
4
5 ; RUN: opt -function-import -summary-file %t3.thinlto.bc %t.bc -o %t4.bc -print-imports 2>&1 | FileCheck %s --check-prefix=IMPORTS
6 ; IMPORTS: Import a
7
8 ; RUN: opt %t4.bc -pgo-icall-prom -S -icp-count-threshold=1 | FileCheck %s --check-prefix=ICALL-PROM
9 ; RUN: opt %t4.bc -pgo-icall-prom -S -pass-remarks=pgo-icall-prom -icp-count-threshold=1 2>&1 | FileCheck %s --check-prefix=PASS-REMARK
10 ; PASS-REMARK: Promote indirect call to a with count 1 out of 1
11
12 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
13 target triple = "x86_64-unknown-linux-gnu"
14
15 @foo = external local_unnamed_addr global void ()*, align 8
16
17 define i32 @main() local_unnamed_addr {
18 entry:
19 %0 = load void ()*, void ()** @foo, align 8
20 ; ICALL-PROM: br i1 %{{[0-9]+}}, label %if.true.direct_targ, label %if.false.orig_indirect, !prof [[BRANCH_WEIGHT:![0-9]+]]
21 tail call void %0(), !prof !1
22 ret i32 0
23 }
24
25 !1 = !{!"VP", i32 0, i64 1, i64 -6289574019528802036, i64 1}
26
27 ; Should not have a VP annotation on new indirect call (check before and after
28 ; branch_weights annotation).
29 ; ICALL-PROM-NOT: !"VP"
30 ; ICALL-PROM: [[BRANCH_WEIGHT]] = !{!"branch_weights", i32 1, i32 0}
31 ; ICALL-PROM-NOT: !"VP"