llvm.org GIT mirror llvm / 32ce91c
[ThinLTO] For SamplePGO, need to handle ICP targets consistently in thin link Summary: SamplePGO indirect call profiles record the target as the original GUID for statics. The importer had special handling to map to the normal GUID in that case. The dead global analysis needs the same treatment or inconsistencies arise, resulting in linker unsats due to some dead symbols being exported and kept, leaving in references to other dead symbols that are removed. This can happen when a SamplePGO profile collected by one binary is used for a different binary, so the indirect call profiles may not accurately reflect live targets. Reviewers: danielcdh Subscribers: mehdi_amini, inglorion, llvm-commits, eraman Differential Revision: https://reviews.llvm.org/D37783 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@313151 91177308-0d34-0410-b5e6-96231b3b80d8 Teresa Johnson 2 years ago
4 changed file(s) with 154 addition(s) and 11 deletion(s). Raw diff Collapse all Expand all
186186 using EdgeInfo = std::tuple
187187 GlobalValue::GUID>;
188188
189 static ValueInfo
190 updateValueInfoForIndirectCalls(const ModuleSummaryIndex &Index, ValueInfo VI) {
191 if (!VI.getSummaryList().empty())
192 return VI;
193 // For SamplePGO, the indirect call targets for local functions will
194 // have its original name annotated in profile. We try to find the
195 // corresponding PGOFuncName as the GUID.
196 // FIXME: Consider updating the edges in the graph after building
197 // it, rather than needing to perform this mapping on each walk.
198 auto GUID = Index.getGUIDFromOriginalID(VI.getGUID());
199 if (GUID == 0)
200 return nullptr;
201 return Index.getValueInfo(GUID);
202 }
203
189204 /// Compute the list of functions to import for a given caller. Mark these
190205 /// imported functions and the symbols they reference in their source module as
191206 /// exported from their source module.
200215 DEBUG(dbgs() << " edge -> " << VI.getGUID() << " Threshold:" << Threshold
201216 << "\n");
202217
203 if (VI.getSummaryList().empty()) {
204 // For SamplePGO, the indirect call targets for local functions will
205 // have its original name annotated in profile. We try to find the
206 // corresponding PGOFuncName as the GUID.
207 auto GUID = Index.getGUIDFromOriginalID(VI.getGUID());
208 if (GUID == 0)
209 continue;
210 VI = Index.getValueInfo(GUID);
211 if (!VI)
212 continue;
213 }
218 VI = updateValueInfoForIndirectCalls(Index, VI);
219 if (!VI)
220 continue;
214221
215222 if (DefinedGVSummaries.count(VI.getGUID())) {
216223 DEBUG(dbgs() << "ignored! Target already in destination module.\n");
460467 for (auto &S : VI.getSummaryList())
461468 if (S->isLive())
462469 return;
470 // FIXME: If we knew which edges were created for indirect call profiles,
471 // we could skip them here. Any that are live should be reached via
472 // other edges, e.g. reference edges. Otherwise, using a profile collected
473 // on a slightly different binary might provoke preserving, importing
474 // and ultimately promoting calls to functions not linked into this
475 // binary, which increases the binary size unnecessarily. Note that
476 // if this code changes, the importer needs to change so that edges
477 // to functions marked dead are skipped.
478 VI = updateValueInfoForIndirectCalls(Index, VI);
479 if (!VI)
480 return;
463481 for (auto &S : VI.getSummaryList())
464482 S->setLive(true);
465483 ++LiveSymbols;
0 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
1 target triple = "x86_64-unknown-linux-gnu"
2
3 ; Function Attrs: norecurse nounwind readnone uwtable
4 define internal void @_ZL3foov() #1 {
5 entry:
6 call void @_ZL3barv()
7 ret void
8 }
9
10 declare void @_ZL3barv()
11
12 !llvm.dbg.cu = !{!0}
13 !llvm.module.flags = !{!3}
14 !llvm.ident = !{!31}
15
16 !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 5.0.0 (trunk 297016)", isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug, enums: !2)
17 !1 = !DIFile(filename: "b.cc", directory: "/ssd/llvm/abc/small")
18 !2 = !{}
19 !3 = !{i32 2, !"Debug Info Version", i32 3}
20 !31 = !{!"clang version 5.0.0 (trunk 297016)"}
0 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
1 target triple = "x86_64-unknown-linux-gnu"
2
3 define void @_ZL3barv() #1 {
4 entry:
5 call void @dummy()
6 call void @dummy()
7 call void @dummy()
8 call void @dummy()
9 call void @dummy()
10 call void @dummy()
11 ret void
12 }
13
14 define internal void @dummy() {
15 entry:
16 ret void
17 }
18
19 !llvm.dbg.cu = !{!0}
20 !llvm.module.flags = !{!3}
21 !llvm.ident = !{!31}
22
23 !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 5.0.0 (trunk 297016)", isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug, enums: !2)
24 !1 = !DIFile(filename: "c.cc", directory: "/ssd/llvm/abc/small")
25 !2 = !{}
26 !3 = !{i32 2, !"Debug Info Version", i32 3}
27 !31 = !{!"clang version 5.0.0 (trunk 297016)"}
0 ; Checks if indirect calls to static target functions that are actually
1 ; dead in the new binary target (due to a profile collected from a slightly
2 ; different binary) are properly traversed during ThinLTO liveness analysis.
3 ; If the liveness analysis is changed to ignore indirect edges and the
4 ; importer is changed to check liveness before importing, this test will
5 ; need adjustment (in that case _ZL3foov should not be imported/promoted,
6 ; and _ZL3barv can be internalized/removed).
7
8 ; Do setup work for all below tests: generate bitcode and combined index
9 ; RUN: opt -module-summary %s -o %t.bc
10 ; RUN: opt -module-summary %p/Inputs/thinlto_samplepgo_icp2a.ll -o %t2a.bc
11 ; RUN: opt -module-summary %p/Inputs/thinlto_samplepgo_icp2b.ll -o %t2b.bc
12
13 ; Use -import-instr-limit=5 so that we don't import _ZL3barv, which would
14 ; hide the problem.
15 ; RUN: llvm-lto2 run -save-temps -import-instr-limit=5 -o %t3 %t.bc %t2a.bc %t2b.bc -r %t.bc,fptr,plx -r %t.bc,main,plx -r %t2a.bc,_ZL3barv,l -r %t2b.bc,_ZL3barv,pl -print-imports 2>&1 | FileCheck %s --check-prefix=IMPORTS2
16 ; IMPORTS2-NOT: Import _ZL3barv
17 ; IMPORTS2: Import _ZL3foov.llvm.0
18 ; IMPORTS2-NOT: Import _ZL3barv
19 ; RUN: llvm-nm %t3.2 | FileCheck %s --check-prefix=NM
20 ; NM: _ZL3barv
21 ; RUN: llvm-dis < %t3.2.2.internalize.bc | FileCheck %s --check-prefix=INTERNALIZE
22 ; INTERNALIZE: define void @_ZL3barv
23
24 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
25 target triple = "x86_64-unknown-linux-gnu"
26
27 @fptr = local_unnamed_addr global void ()* null, align 8
28
29 ; Function Attrs: norecurse uwtable
30 define i32 @main() local_unnamed_addr #0 !prof !34 {
31 entry:
32 %0 = load void ()*, void ()** @fptr, align 8
33 ; ICALL-PROM: br i1 %{{[0-9]+}}, label %if.true.direct_targ, label %if.false.orig_indirect
34 tail call void %0(), !prof !40
35 ret i32 0
36 }
37
38 !llvm.dbg.cu = !{!0}
39 !llvm.module.flags = !{!3,!4}
40 !llvm.ident = !{!31}
41
42 !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 5.0.0 (trunk 297016)", isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug, enums: !2)
43 !1 = !DIFile(filename: "main.cc", directory: ".")
44 !2 = !{}
45 !3 = !{i32 2, !"Debug Info Version", i32 3}
46 !4 = !{i32 1, !"ProfileSummary", !5}
47 !5 = !{!6, !7, !8, !9, !10, !11, !12, !13}
48 !6 = !{!"ProfileFormat", !"SampleProfile"}
49 !7 = !{!"TotalCount", i64 3003}
50 !8 = !{!"MaxCount", i64 3000}
51 !9 = !{!"MaxInternalCount", i64 0}
52 !10 = !{!"MaxFunctionCount", i64 0}
53 !11 = !{!"NumCounts", i64 3}
54 !12 = !{!"NumFunctions", i64 1}
55 !13 = !{!"DetailedSummary", !14}
56 !14 = !{!15, !16, !17, !18, !19, !20, !20, !21, !21, !22, !23, !24, !25, !26, !27, !28, !29, !30}
57 !15 = !{i32 10000, i64 3000, i32 1}
58 !16 = !{i32 100000, i64 3000, i32 1}
59 !17 = !{i32 200000, i64 3000, i32 1}
60 !18 = !{i32 300000, i64 3000, i32 1}
61 !19 = !{i32 400000, i64 3000, i32 1}
62 !20 = !{i32 500000, i64 3000, i32 1}
63 !21 = !{i32 600000, i64 3000, i32 1}
64 !22 = !{i32 700000, i64 3000, i32 1}
65 !23 = !{i32 800000, i64 3000, i32 1}
66 !24 = !{i32 900000, i64 3000, i32 1}
67 !25 = !{i32 950000, i64 3000, i32 1}
68 !26 = !{i32 990000, i64 3000, i32 1}
69 !27 = !{i32 999000, i64 3000, i32 1}
70 !28 = !{i32 999900, i64 2, i32 2}
71 !29 = !{i32 999990, i64 2, i32 2}
72 !30 = !{i32 999999, i64 2, i32 2}
73 !31 = !{!"clang version 5.0.0 (trunk 297016)"}
74 !34 = !{!"function_entry_count", i64 1}
75 !40 = !{!"VP", i32 0, i64 3000, i64 -8789629626369651636, i64 3000}