llvm.org GIT mirror llvm / 92f5878
Restore "[ThinLTO] Ensure we always select the same function copy to import" This reverts commit r337081, therefore restoring r337050 (and fix in r337059), with test fix for bot failure described after the original description below. In order to always import the same copy of a linkonce function, even when encountering it with different thresholds (a higher one then a lower one), keep track of the summary we decided to import. This ensures that the backend only gets a single definition to import for each GUID, so that it doesn't need to choose one. Move the largest threshold the GUID was considered for import into the current module out of the ImportMap (which is part of a larger map maintained across the whole index), and into a new map just maintained for the current module we are computing imports for. This saves some memory since we no longer have the thresholds maintained across the whole index (and throughout the in-process backends when doing a normal non-distributed ThinLTO build), at the cost of some additional information being maintained for each invocation of ComputeImportForModule (the selected summary pointer for each import). There is an additional map lookup for each callee being considered for importing, however, this was able to subsume a map lookup in the Worklist iteration that invokes computeImportForFunction. We also are able to avoid calling selectCallee if we already failed to import at the same or higher threshold. I compared the run time and peak memory for the SPEC2006 471.omnetpp benchmark (running in-process ThinLTO backends), as well as for a large internal benchmark with a distributed ThinLTO build (so just looking at the thin link time/memory). Across a number of runs with and without this change there was no significant change in the time and memory. (I tried a few other variations of the change but they also didn't improve time or peak memory). The new commit removes a test that no longer makes sense (Transforms/FunctionImport/hotness_based_import2.ll), as exposed by the reverse-iteration bot. The test depends on the order of processing the summary call edges, and actually depended on the old problematic behavior of selecting more than one summary for a given GUID when encountered with different thresholds. There was no guarantee even before that we would eventually pick the linkonce copy with the hottest call edges, it just happened to work with the test and the old code, and there was no guarantee that we would end up importing the selected version of the copy that had the hottest call edges (since the backend would effectively import only one of the selected copies). Reviewers: davidxl Subscribers: mehdi_amini, inglorion, llvm-commits Differential Revision: https://reviews.llvm.org/D48670 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@337184 91177308-0d34-0410-b5e6-96231b3b80d8 Teresa Johnson 1 year, 2 months ago
9 changed file(s) with 196 addition(s) and 174 deletion(s). Raw diff Collapse all Expand all
3232 /// based on the provided summary informations.
3333 class FunctionImporter {
3434 public:
35 /// Set of functions to import from a source module. Each entry is a map
36 /// containing all the functions to import for a source module.
37 /// The keys is the GUID identifying a function to import, and the value
38 /// is the threshold applied when deciding to import it.
39 using FunctionsToImportTy = std::map;
35 /// Set of functions to import from a source module. Each entry is a set
36 /// containing all the GUIDs of all functions to import for a source module.
37 using FunctionsToImportTy = std::unordered_set;
38
39 /// Map of callee GUID considered for import into a given module to a pair
40 /// consisting of the largest threshold applied when deciding whether to
41 /// import it and, if we decided to import, a pointer to the summary instance
42 /// imported. If we decided not to import, the summary will be nullptr.
43 using ImportThresholdsTy =
44 DenseMap
45 std::pair>;
4046
4147 /// The map contains an entry for every module to import from, the key being
4248 /// the module identifier to pass to the ModuleLoader. The value is the set of
155155
156156 AddUint64(Entry.second.size());
157157 for (auto &Fn : Entry.second)
158 AddUint64(Fn.first);
158 AddUint64(Fn);
159159 }
160160
161161 // Include the hash for the resolved ODR.
220220 // so we need to collect their used resolutions as well.
221221 for (auto &ImpM : ImportList)
222222 for (auto &ImpF : ImpM.second)
223 AddUsedThings(Index.findSummaryInModule(ImpF.first, ImpM.first()));
223 AddUsedThings(Index.findSummaryInModule(ImpF, ImpM.first()));
224224
225225 auto AddTypeIdSummary = [&](StringRef TId, const TypeIdSummary &S) {
226226 AddString(TId);
261261 !RefSummary->modulePath().empty() &&
262262 !GlobalValue::isInterposableLinkage(RefSummary->linkage()) &&
263263 RefSummary->refs().empty()) {
264 ImportList[RefSummary->modulePath()][VI.getGUID()] = 1;
264 ImportList[RefSummary->modulePath()].insert(VI.getGUID());
265265 if (ExportLists)
266266 (*ExportLists)[RefSummary->modulePath()].insert(VI.getGUID());
267267 break;
277277 const unsigned Threshold, const GVSummaryMapTy &DefinedGVSummaries,
278278 SmallVectorImpl &Worklist,
279279 FunctionImporter::ImportMapTy &ImportList,
280 StringMap *ExportLists = nullptr) {
280 StringMap *ExportLists,
281 FunctionImporter::ImportThresholdsTy &ImportThresholds) {
281282 computeImportForReferencedGlobals(Summary, DefinedGVSummaries, ImportList,
282283 ExportLists);
283284 static int ImportCount = 0;
314315 const auto NewThreshold =
315316 Threshold * GetBonusMultiplier(Edge.second.getHotness());
316317
317 auto *CalleeSummary = selectCallee(Index, VI.getSummaryList(), NewThreshold,
318 Summary.modulePath());
319 if (!CalleeSummary) {
320 LLVM_DEBUG(
321 dbgs() << "ignored! No qualifying callee with summary found.\n");
322 continue;
323 }
324
325 // "Resolve" the summary
326 const auto *ResolvedCalleeSummary = cast(CalleeSummary->getBaseObject());
327
328 assert(ResolvedCalleeSummary->instCount() <= NewThreshold &&
329 "selectCallee() didn't honor the threshold");
318 auto IT = ImportThresholds.insert(
319 std::make_pair(VI.getGUID(), std::make_pair(NewThreshold, nullptr)));
320 bool PreviouslyVisited = !IT.second;
321 auto &ProcessedThreshold = IT.first->second.first;
322 auto &CalleeSummary = IT.first->second.second;
323
324 const FunctionSummary *ResolvedCalleeSummary = nullptr;
325 if (CalleeSummary) {
326 assert(PreviouslyVisited);
327 // Since the traversal of the call graph is DFS, we can revisit a function
328 // a second time with a higher threshold. In this case, it is added back
329 // to the worklist with the new threshold (so that its own callee chains
330 // can be considered with the higher threshold).
331 if (NewThreshold <= ProcessedThreshold) {
332 LLVM_DEBUG(
333 dbgs() << "ignored! Target was already imported with Threshold "
334 << ProcessedThreshold << "\n");
335 continue;
336 }
337 // Update with new larger threshold.
338 ProcessedThreshold = NewThreshold;
339 ResolvedCalleeSummary = cast(CalleeSummary);
340 } else {
341 // If we already rejected importing a callee at the same or higher
342 // threshold, don't waste time calling selectCallee.
343 if (PreviouslyVisited && NewThreshold <= ProcessedThreshold) {
344 LLVM_DEBUG(
345 dbgs() << "ignored! Target was already rejected with Threshold "
346 << ProcessedThreshold << "\n");
347 continue;
348 }
349
350 CalleeSummary = selectCallee(Index, VI.getSummaryList(), NewThreshold,
351 Summary.modulePath());
352 if (!CalleeSummary) {
353 // Update with new larger threshold if this was a retry (otherwise
354 // we would have already inserted with NewThreshold above).
355 if (PreviouslyVisited)
356 ProcessedThreshold = NewThreshold;
357 LLVM_DEBUG(
358 dbgs() << "ignored! No qualifying callee with summary found.\n");
359 continue;
360 }
361
362 // "Resolve" the summary
363 CalleeSummary = CalleeSummary->getBaseObject();
364 ResolvedCalleeSummary = cast(CalleeSummary);
365
366 assert(ResolvedCalleeSummary->instCount() <= NewThreshold &&
367 "selectCallee() didn't honor the threshold");
368
369 auto ExportModulePath = ResolvedCalleeSummary->modulePath();
370 auto ILI = ImportList[ExportModulePath].insert(VI.getGUID());
371 // We previously decided to import this GUID definition if it was already
372 // inserted in the set of imports from the exporting module.
373 bool PreviouslyImported = !ILI.second;
374
375 // Make exports in the source module.
376 if (ExportLists) {
377 auto &ExportList = (*ExportLists)[ExportModulePath];
378 ExportList.insert(VI.getGUID());
379 if (!PreviouslyImported) {
380 // This is the first time this function was exported from its source
381 // module, so mark all functions and globals it references as exported
382 // to the outside if they are defined in the same source module.
383 // For efficiency, we unconditionally add all the referenced GUIDs
384 // to the ExportList for this module, and will prune out any not
385 // defined in the module later in a single pass.
386 for (auto &Edge : ResolvedCalleeSummary->calls()) {
387 auto CalleeGUID = Edge.first.getGUID();
388 ExportList.insert(CalleeGUID);
389 }
390 for (auto &Ref : ResolvedCalleeSummary->refs()) {
391 auto GUID = Ref.getGUID();
392 ExportList.insert(GUID);
393 }
394 }
395 }
396 }
330397
331398 auto GetAdjustedThreshold = [](unsigned Threshold, bool IsHotCallsite) {
332399 // Adjust the threshold for next level of imported functions.
341408 Edge.second.getHotness() == CalleeInfo::HotnessType::Hot;
342409 const auto AdjThreshold = GetAdjustedThreshold(Threshold, IsHotCallsite);
343410
344 auto ExportModulePath = ResolvedCalleeSummary->modulePath();
345 auto &ProcessedThreshold = ImportList[ExportModulePath][VI.getGUID()];
346 /// Since the traversal of the call graph is DFS, we can revisit a function
347 /// a second time with a higher threshold. In this case, it is added back to
348 /// the worklist with the new threshold.
349 if (ProcessedThreshold && ProcessedThreshold >= AdjThreshold) {
350 LLVM_DEBUG(dbgs() << "ignored! Target was already seen with Threshold "
351 << ProcessedThreshold << "\n");
352 continue;
353 }
354 bool PreviouslyImported = ProcessedThreshold != 0;
355 // Mark this function as imported in this module, with the current Threshold
356 ProcessedThreshold = AdjThreshold;
357
358411 ImportCount++;
359
360 // Make exports in the source module.
361 if (ExportLists) {
362 auto &ExportList = (*ExportLists)[ExportModulePath];
363 ExportList.insert(VI.getGUID());
364 if (!PreviouslyImported) {
365 // This is the first time this function was exported from its source
366 // module, so mark all functions and globals it references as exported
367 // to the outside if they are defined in the same source module.
368 // For efficiency, we unconditionally add all the referenced GUIDs
369 // to the ExportList for this module, and will prune out any not
370 // defined in the module later in a single pass.
371 for (auto &Edge : ResolvedCalleeSummary->calls()) {
372 auto CalleeGUID = Edge.first.getGUID();
373 ExportList.insert(CalleeGUID);
374 }
375 for (auto &Ref : ResolvedCalleeSummary->refs()) {
376 auto GUID = Ref.getGUID();
377 ExportList.insert(GUID);
378 }
379 }
380 }
381412
382413 // Insert the newly imported function to the worklist.
383414 Worklist.emplace_back(ResolvedCalleeSummary, AdjThreshold, VI.getGUID());
394425 // Worklist contains the list of function imported in this module, for which
395426 // we will analyse the callees and may import further down the callgraph.
396427 SmallVector Worklist;
428 FunctionImporter::ImportThresholdsTy ImportThresholds;
397429
398430 // Populate the worklist with the import for the functions in the current
399431 // module
415447 LLVM_DEBUG(dbgs() << "Initialize import for " << VI << "\n");
416448 computeImportForFunction(*FuncSummary, Index, ImportInstrLimit,
417449 DefinedGVSummaries, Worklist, ImportList,
418 ExportLists);
450 ExportLists, ImportThresholds);
419451 }
420452
421453 // Process the newly imported functions and add callees to the worklist.
423455 auto FuncInfo = Worklist.pop_back_val();
424456 auto *Summary = std::get<0>(FuncInfo);
425457 auto Threshold = std::get<1>(FuncInfo);
426 auto GUID = std::get<2>(FuncInfo);
427
428 // Check if we later added this summary with a higher threshold.
429 // If so, skip this entry.
430 auto ExportModulePath = Summary->modulePath();
431 auto &LatestProcessedThreshold = ImportList[ExportModulePath][GUID];
432 if (LatestProcessedThreshold > Threshold)
433 continue;
434458
435459 computeImportForFunction(*Summary, Index, Threshold, DefinedGVSummaries,
436 Worklist, ImportList, ExportLists);
460 Worklist, ImportList, ExportLists,
461 ImportThresholds);
437462 }
438463 }
439464
449474 }
450475
451476 static GlobalValue::GUID getGUID(GlobalValue::GUID G) { return G; }
452
453 static GlobalValue::GUID
454 getGUID(const std::pair &P) {
455 return P.first;
456 }
457477
458478 template
459479 static unsigned numGlobalVarSummaries(const ModuleSummaryIndex &Index,
573593 // e.g. record required linkage changes.
574594 if (Summary->modulePath() == ModulePath)
575595 continue;
576 // Doesn't matter what value we plug in to the map, just needs an entry
577 // to provoke importing by thinBackend.
578 ImportList[Summary->modulePath()][GUID] = 1;
596 // Add an entry to provoke importing by thinBackend.
597 ImportList[Summary->modulePath()].insert(GUID);
579598 }
580599 #ifndef NDEBUG
581600 dumpImportListForModule(Index, ModulePath, ImportList);
697716 const auto &DefinedGVSummaries =
698717 ModuleToDefinedGVSummaries.lookup(ILI.first());
699718 for (auto &GI : ILI.second) {
700 const auto &DS = DefinedGVSummaries.find(GI.first);
719 const auto &DS = DefinedGVSummaries.find(GI);
701720 assert(DS != DefinedGVSummaries.end() &&
702721 "Expected a defined summary for imported global value");
703 SummariesForIndex[GI.first] = DS->second;
722 SummariesForIndex[GI] = DS->second;
704723 }
705724 }
706725 }
0 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
1 target triple = "x86_64-apple-macosx10.11.0"
2
3 define void @foo() {
4 call void @linkonceodrfunc()
5 call void @linkonceodrfunc2()
6 ret void
7 }
8
9 define linkonce_odr void @linkonceodrfunc() {
10 call void @f()
11 call void @f()
12 call void @f()
13 call void @f()
14 call void @f()
15 call void @f()
16 call void @f()
17 ret void
18 }
19
20 define linkonce_odr void @linkonceodrfunc2() {
21 call void @f()
22 call void @f()
23 call void @f()
24 call void @f()
25 call void @f()
26 call void @f()
27 call void @f()
28 ret void
29 }
30
31 define internal void @f() {
32 ret void
33 }
0 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
1 target triple = "x86_64-apple-macosx10.11.0"
2
3 define linkonce_odr void @linkonceodrfunc() {
4 ret void
5 }
+0
-42
test/Transforms/FunctionImport/Inputs/hotness_based_import2.ll less more
None ; ModuleID = 'thinlto-function-summary-callgraph-profile-summary2.ll'
1 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
2 target triple = "x86_64-unknown-linux-gnu"
3
4
5 define void @hot() #1 !prof !28 {
6 call void @calledFromHot()
7 ret void
8 }
9
10 ; 9 instructions so it is above decayed cold threshold of 7 and below
11 ; decayed hot threshold of 10.
12 define void @calledFromHot() !prof !28 {
13 %b = alloca i32, align 4
14 store i32 1, i32* %b, align 4
15 store i32 1, i32* %b, align 4
16 store i32 1, i32* %b, align 4
17 store i32 1, i32* %b, align 4
18 store i32 1, i32* %b, align 4
19 store i32 1, i32* %b, align 4
20 store i32 1, i32* %b, align 4
21 ret void
22 }
23
24 !llvm.module.flags = !{!1}
25
26 !1 = !{i32 1, !"ProfileSummary", !2}
27 !2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
28 !3 = !{!"ProfileFormat", !"InstrProf"}
29 !4 = !{!"TotalCount", i64 222}
30 !5 = !{!"MaxCount", i64 110}
31 !6 = !{!"MaxInternalCount", i64 1}
32 !7 = !{!"MaxFunctionCount", i64 110}
33 !8 = !{!"NumCounts", i64 4}
34 !9 = !{!"NumFunctions", i64 3}
35 !10 = !{!"DetailedSummary", !11}
36 !11 = !{!12, !13, !14}
37 !12 = !{i32 10000, i64 110, i32 2}
38 !13 = !{i32 999000, i64 2, i32 4}
39 !14 = !{i32 999999, i64 2, i32 4}
40 !28 = !{!"function_entry_count", i64 110}
41 !29 = !{!"function_entry_count", i64 1}
0 ; Test to ensure that we always select the same copy of a linkonce function
1 ; when it is encountered with different thresholds. When we encounter the
2 ; copy in funcimport_resolved1.ll with a higher threshold via the direct call
3 ; from main(), it will be selected for importing. When we encounter it with a
4 ; lower threshold by reaching it from the deeper call chain via foo(), it
5 ; won't be selected for importing. We don't want to select both the copy from
6 ; funcimport_resolved1.ll and the smaller one from funcimport_resolved2.ll,
7 ; leaving it up to the backend to figure out which one to actually import.
8 ; The linkonce_odr may have different instruction counts in practice due to
9 ; different inlines in the compile step.
10
11 ; Require asserts so we can use -debug-only
12 ; REQUIRES: asserts
13
14 ; REQUIRES: x86-registered-target
15
16 ; RUN: opt -module-summary %s -o %t.bc
17 ; RUN: opt -module-summary %p/Inputs/funcimport_resolved1.ll -o %t2.bc
18 ; RUN: opt -module-summary %p/Inputs/funcimport_resolved2.ll -o %t3.bc
19
20 ; First do a sanity check that all callees are imported with the default
21 ; instruction limit
22 ; RUN: llvm-lto2 run %t.bc %t2.bc %t3.bc -o %t4 -r=%t.bc,_main,pl -r=%t.bc,_linkonceodrfunc,l -r=%t.bc,_foo,l -r=%t2.bc,_foo,pl -r=%t2.bc,_linkonceodrfunc,pl -r=%t2.bc,_linkonceodrfunc2,pl -r=%t3.bc,_linkonceodrfunc,l -thinlto-threads=1 -debug-only=function-import 2>&1 | FileCheck %s --check-prefix=INSTLIMDEFAULT
23 ; INSTLIMDEFAULT: Is importing function {{.*}} foo from {{.*}}funcimport_resolved1.ll
24 ; INSTLIMDEFAULT: Is importing function {{.*}} linkonceodrfunc from {{.*}}funcimport_resolved1.ll
25 ; INSTLIMDEFAULT: Is importing function {{.*}} linkonceodrfunc2 from {{.*}}funcimport_resolved1.ll
26 ; INSTLIMDEFAULT: Is importing function {{.*}} f from {{.*}}funcimport_resolved1.ll
27 ; INSTLIMDEFAULT-NOT: Is importing function {{.*}} linkonceodrfunc from {{.*}}funcimport_resolved2.ll
28
29 ; Now run with the lower threshold that will only allow linkonceodrfunc to be
30 ; imported from funcimport_resolved1.ll when encountered via the direct call
31 ; from main(). Ensure we don't also select the copy in funcimport_resolved2.ll
32 ; when it is encountered via the deeper call chain.
33 ; RUN: llvm-lto2 run %t.bc %t2.bc %t3.bc -o %t4 -r=%t.bc,_main,pl -r=%t.bc,_linkonceodrfunc,l -r=%t.bc,_foo,l -r=%t2.bc,_foo,pl -r=%t2.bc,_linkonceodrfunc,pl -r=%t2.bc,_linkonceodrfunc2,pl -r=%t3.bc,_linkonceodrfunc,l -thinlto-threads=1 -debug-only=function-import -import-instr-limit=8 2>&1 | FileCheck %s --check-prefix=INSTLIM8
34 ; INSTLIM8: Is importing function {{.*}} foo from {{.*}}funcimport_resolved1.ll
35 ; INSTLIM8: Is importing function {{.*}} linkonceodrfunc from {{.*}}funcimport_resolved1.ll
36 ; INSTLIM8: Not importing function {{.*}} linkonceodrfunc2 from {{.*}}funcimport_resolved1.ll
37 ; INSTLIM8: Is importing function {{.*}} f from {{.*}}funcimport_resolved1.ll
38 ; INSTLIM8-NOT: Is importing function {{.*}} linkonceodrfunc from {{.*}}funcimport_resolved2.ll
39
40 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
41 target triple = "x86_64-apple-macosx10.11.0"
42
43 define i32 @main() #0 {
44 entry:
45 call void (...) @foo()
46 call void (...) @linkonceodrfunc()
47 ret i32 0
48 }
49
50 declare void @foo(...) #1
51 declare void @linkonceodrfunc(...) #1
+0
-53
test/Transforms/FunctionImport/hotness_based_import2.ll less more
None ; Test to check that callee reached from cold and then hot path gets
1 ; hot thresholds.
2 ; RUN: opt -module-summary %s -o %t.bc
3 ; RUN: opt -module-summary %p/Inputs/hotness_based_import2.ll -o %t2.bc
4 ; RUN: llvm-lto -thinlto -o %t3 %t.bc %t2.bc
5
6 ; Teset with limit set to 10 and multipliers set to 1. Since cold call to
7 ; hot is first in the other module, we'll first add calledFromHot to worklist
8 ; with threshold decayed by default 0.7 factor. Test ensures that when we
9 ; encounter it again from hot path, we re-enqueue with higher non-decayed
10 ; threshold which will allow it to be imported.
11 ; RUN: opt -function-import -summary-file %t3.thinlto.bc %t.bc -import-instr-limit=10 -import-hot-multiplier=1.0 -import-cold-multiplier=1.0 -S | FileCheck %s --check-prefix=CHECK
12 ; CHECK-DAG: define available_externally void @hot()
13 ; CHECK-DAG: define available_externally void @calledFromHot()
14
15 ; ModuleID = 'thinlto-function-summary-callgraph.ll'
16 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
17 target triple = "x86_64-unknown-linux-gnu"
18
19 ; This function has a high profile count, so entry block is hot.
20 define void @hot_function(i1 %a, i1 %a2) !prof !28 {
21 entry:
22 call void @hot()
23 ret void
24 }
25
26 ; This function has a low profile count, so entry block is hot.
27 define void @cold_function(i1 %a, i1 %a2) !prof !29 {
28 entry:
29 call void @hot()
30 ret void
31 }
32
33 declare void @hot() #1
34
35 !llvm.module.flags = !{!1}
36
37 !1 = !{i32 1, !"ProfileSummary", !2}
38 !2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
39 !3 = !{!"ProfileFormat", !"InstrProf"}
40 !4 = !{!"TotalCount", i64 222}
41 !5 = !{!"MaxCount", i64 110}
42 !6 = !{!"MaxInternalCount", i64 1}
43 !7 = !{!"MaxFunctionCount", i64 110}
44 !8 = !{!"NumCounts", i64 4}
45 !9 = !{!"NumFunctions", i64 3}
46 !10 = !{!"DetailedSummary", !11}
47 !11 = !{!12, !13, !14}
48 !12 = !{i32 10000, i64 110, i32 2}
49 !13 = !{i32 999000, i64 2, i32 4}
50 !14 = !{i32 999999, i64 2, i32 4}
51 !28 = !{!"function_entry_count", i64 110}
52 !29 = !{!"function_entry_count", i64 1}
261261 errs() << "Importing " << FunctionName << " from " << FileName << "\n";
262262
263263 auto &Entry = ImportList[FileName];
264 Entry.insert(std::make_pair(F->getGUID(), /* (Unused) threshold */ 1.0));
264 Entry.insert(F->getGUID());
265265 }
266266 auto CachedModuleLoader = [&](StringRef Identifier) {
267267 return ModuleLoaderCache.takeModule(Identifier);