llvm.org GIT mirror llvm / 7b29966
Restrict call metadata based hotness detection to Sample PGO mode Summary: Don't use the metadata on call instructions for determining hotness unless we are in sample PGO mode, where it is needed because profile counts are not accurate. In instrumentation mode this is not necessary and does more harm than good when calls have VP metadata that hasn't been properly scaled after transformations or dropped after constant prop based devirtualization (both should be fixed, but we don't need to do this in the first place for instrumentation PGO). This required adjusting a number of tests to distinguish between sample and instrumentation PGO handling, and to add in profile summary metadata so that getProfileCount can get the summary. Reviewers: davidxl, danielcdh Subscribers: aemerson, rengolin, mehdi_amini, Prazek, llvm-commits Differential Revision: https://reviews.llvm.org/D32877 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@302844 91177308-0d34-0410-b5e6-96231b3b80d8 Teresa Johnson 3 years ago
7 changed file(s) with 226 addition(s) and 26 deletion(s). Raw diff Collapse all Expand all
7474 return None;
7575 assert((isa(Inst) || isa(Inst)) &&
7676 "We can only get profile count for call/invoke instruction.");
77 // Check if there is a profile metadata on the instruction. If it is present,
78 // determine hotness solely based on that.
79 uint64_t TotalCount;
80 if (Inst->extractProfTotalWeight(TotalCount))
81 return TotalCount;
77 if (computeSummary() && Summary->getKind() == ProfileSummary::PSK_Sample) {
78 // In sample PGO mode, check if there is a profile metadata on the
79 // instruction. If it is present, determine hotness solely based on that,
80 // since the sampled entry count may not be accurate.
81 uint64_t TotalCount;
82 if (Inst->extractProfTotalWeight(TotalCount))
83 return TotalCount;
84 }
8285 if (BFI)
8386 return BFI->getBlockProfileCount(Inst->getParent());
8487 return None;
2828 ; CHECK-NEXT:
2929 ; CHECK-NEXT:
3030 ; op4=hot1 op6=cold op8=hot2 op10=hot4 op12=none1 op14=hot3 op16=none2 op18=none3 op20=123
31 ; CHECK-NEXT: 3 op12=6 op13=2 op14=3 op15=3 op16=7 op17=2 op18=8 op19=2 op20=25 op21=3/>
31 ; CHECK-NEXT: 1 op12=6 op13=2 op14=3 op15=3 op16=7 op17=2 op18=8 op19=2 op20=25 op21=3/>
3232 ; CHECK-NEXT:
3333
3434 ; CHECK:
0 ; Test to check the callgraph in summary when there is PGO
1 ; RUN: opt -module-summary %s -o %t.o
2 ; RUN: llvm-bcanalyzer -dump %t.o | FileCheck %s
3 ; RUN: opt -module-summary %p/Inputs/thinlto-function-summary-callgraph-profile-summary.ll -o %t2.o
4 ; RUN: llvm-lto -thinlto -o %t3 %t.o %t2.o
5 ; RUN: llvm-bcanalyzer -dump %t3.thinlto.bc | FileCheck %s --check-prefix=COMBINED
6
7
8 ; CHECK:
9 ; "hot_function"
10 ; CHECK-NEXT:
11 ; "hot1"
12 ; CHECK-NEXT:
13 ; "hot2"
14 ; CHECK-NEXT:
15 ; "hot3"
16 ; CHECK-NEXT:
17 ; "hot4"
18 ; CHECK-NEXT:
19 ; "cold"
20 ; CHECK-NEXT:
21 ; "none1"
22 ; CHECK-NEXT:
23 ; "none2"
24 ; CHECK-NEXT:
25 ; "none3"
26 ; CHECK-NEXT:
27 ; CHECK-LABEL:
28 ; CHECK-NEXT:
29 ; CHECK-NEXT:
30 ; op4=hot1 op6=cold op8=hot2 op10=hot4 op12=none1 op14=hot3 op16=none2 op18=none3 op20=123
31 ; CHECK-NEXT:
32 ; CHECK-NEXT:
33
34 ; CHECK:
35 ; CHECK-NEXT: blob data = 'hot_functionhot1hot2hot3hot4coldnone1none2none3'
36
37 ; COMBINED:
38 ; COMBINED-NEXT:
39 ; COMBINED-NEXT:
40 ; COMBINED-NEXT:
41 ; COMBINED-NEXT:
42 ; COMBINED-NEXT:
43 ; COMBINED-NEXT:
44 ; COMBINED-NEXT:
45 ; COMBINED-NEXT:
46 ; COMBINED-NEXT:
47 ; COMBINED-NEXT:
48 ; COMBINED-NEXT:
49 ; COMBINED-NEXT:
50 ; COMBINED-NEXT:
51 ; COMBINED-NEXT:
52 ; COMBINED-NEXT:
53 ; COMBINED-NEXT:
54 ; COMBINED_NEXT:
55 ; COMBINED_NEXT:
56
57
58 ; ModuleID = 'thinlto-function-summary-callgraph.ll'
59 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
60 target triple = "x86_64-unknown-linux-gnu"
61
62 ; This function have high profile count, so entry block is hot.
63 define void @hot_function(i1 %a, i1 %a2) !prof !20 {
64 entry:
65 call void @hot1()
66 br i1 %a, label %Cold, label %Hot, !prof !41
67 Cold: ; 1/1000 goes here
68 call void @cold()
69 call void @hot2()
70 call void @hot4(), !prof !15
71 call void @none1()
72 br label %exit
73 Hot: ; 999/1000 goes here
74 call void @hot2()
75 call void @hot3()
76 br i1 %a2, label %None1, label %None2, !prof !42
77 None1: ; half goes here
78 call void @none1()
79 call void @none2()
80 br label %exit
81 None2: ; half goes here
82 call void @none3()
83 br label %exit
84 exit:
85 ret void
86 }
87
88 declare void @hot1() #1
89 declare void @hot2() #1
90 declare void @hot3() #1
91 declare void @hot4() #1
92 declare void @cold() #1
93 declare void @none1() #1
94 declare void @none2() #1
95 declare void @none3() #1
96
97
98 !41 = !{!"branch_weights", i32 1, i32 1000}
99 !42 = !{!"branch_weights", i32 1, i32 1}
100
101
102
103 !llvm.module.flags = !{!1}
104 !20 = !{!"function_entry_count", i64 110, i64 123}
105
106 !1 = !{i32 1, !"ProfileSummary", !2}
107 !2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
108 !3 = !{!"ProfileFormat", !"SampleProfile"}
109 !4 = !{!"TotalCount", i64 10000}
110 !5 = !{!"MaxCount", i64 10}
111 !6 = !{!"MaxInternalCount", i64 1}
112 !7 = !{!"MaxFunctionCount", i64 1000}
113 !8 = !{!"NumCounts", i64 3}
114 !9 = !{!"NumFunctions", i64 3}
115 !10 = !{!"DetailedSummary", !11}
116 !11 = !{!12, !13, !14}
117 !12 = !{i32 10000, i64 100, i32 1}
118 !13 = !{i32 999000, i64 100, i32 1}
119 !14 = !{i32 999999, i64 1, i32 2}
120 !15 = !{!"branch_weights", i32 100}
0 ; RUN: opt < %s -codegenprepare -S | FileCheck %s
1
2 target triple = "x86_64-pc-linux-gnu"
3
4 ; This tests that hot/cold functions get correct section prefix assigned
5
6 ; CHECK: hot_func{{.*}}!section_prefix ![[HOT_ID:[0-9]+]]
7 ; The entry is hot
8 define void @hot_func() !prof !15 {
9 ret void
10 }
11
12 ; CHECK: hot_call_func{{.*}}!section_prefix ![[HOT_ID]]
13 ; The sum of 2 callsites are hot
14 define void @hot_call_func() !prof !16 {
15 call void @hot_func(), !prof !17
16 call void @hot_func(), !prof !17
17 ret void
18 }
19
20 ; CHECK-NOT: normal_func{{.*}}!section_prefix
21 ; The sum of all callsites are neither hot or cold
22 define void @normal_func() !prof !16 {
23 call void @hot_func(), !prof !17
24 call void @hot_func(), !prof !18
25 call void @hot_func(), !prof !18
26 ret void
27 }
28
29 ; CHECK: cold_func{{.*}}!section_prefix ![[COLD_ID:[0-9]+]]
30 ; The entry and the callsite are both cold
31 define void @cold_func() !prof !16 {
32 call void @hot_func(), !prof !18
33 ret void
34 }
35
36 ; CHECK: ![[HOT_ID]] = !{!"function_section_prefix", !".hot"}
37 ; CHECK: ![[COLD_ID]] = !{!"function_section_prefix", !".unlikely"}
38 !llvm.module.flags = !{!1}
39 !1 = !{i32 1, !"ProfileSummary", !2}
40 !2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
41 !3 = !{!"ProfileFormat", !"SampleProfile"}
42 !4 = !{!"TotalCount", i64 10000}
43 !5 = !{!"MaxCount", i64 1000}
44 !6 = !{!"MaxInternalCount", i64 1}
45 !7 = !{!"MaxFunctionCount", i64 1000}
46 !8 = !{!"NumCounts", i64 3}
47 !9 = !{!"NumFunctions", i64 3}
48 !10 = !{!"DetailedSummary", !11}
49 !11 = !{!12, !13, !14}
50 !12 = !{i32 10000, i64 100, i32 1}
51 !13 = !{i32 999000, i64 100, i32 1}
52 !14 = !{i32 999999, i64 1, i32 2}
53 !15 = !{!"function_entry_count", i64 1000}
54 !16 = !{!"function_entry_count", i64 1}
55 !17 = !{!"branch_weights", i32 80}
56 !18 = !{!"branch_weights", i32 1}
99 ret void
1010 }
1111
12 ; CHECK: hot_call_func{{.*}}!section_prefix ![[HOT_ID]]
13 ; The sum of 2 callsites are hot
14 define void @hot_call_func() !prof !16 {
12 ; For instrumentation based PGO, we should only look at entry counts,
13 ; not call site VP metadata (which can exist on value profiled memcpy,
14 ; or possibly left behind after static analysis based devirtualization).
15 ; CHECK: cold_func1{{.*}}!section_prefix ![[COLD_ID:[0-9]+]]
16 define void @cold_func1() !prof !16 {
1517 call void @hot_func(), !prof !17
1618 call void @hot_func(), !prof !17
1719 ret void
1820 }
1921
20 ; CHECK-NOT: normal_func{{.*}}!section_prefix
21 ; The sum of all callsites are neither hot or cold
22 define void @normal_func() !prof !16 {
22 ; CHECK: cold_func2{{.*}}!section_prefix
23 define void @cold_func2() !prof !16 {
2324 call void @hot_func(), !prof !17
2425 call void @hot_func(), !prof !18
2526 call void @hot_func(), !prof !18
2627 ret void
2728 }
2829
29 ; CHECK: cold_func{{.*}}!section_prefix ![[COLD_ID:[0-9]+]]
30 ; The entry and the callsite are both cold
31 define void @cold_func() !prof !16 {
30 ; CHECK: cold_func3{{.*}}!section_prefix ![[COLD_ID]]
31 define void @cold_func3() !prof !16 {
3232 call void @hot_func(), !prof !18
3333 ret void
3434 }
55 @func = global void ()* null
66
77 ; CHECK: define void @callee(i32 %n) !prof ![[ENTRY_COUNT:[0-9]*]]
8 define void @callee(i32 %n) !prof !1 {
8 define void @callee(i32 %n) !prof !15 {
99 %cond = icmp sle i32 %n, 10
1010 br i1 %cond, label %cond_true, label %cond_false
1111 cond_true:
1212 ; ext1 is optimized away, thus not updated.
1313 ; CHECK: call void @ext1(), !prof ![[COUNT_CALLEE1:[0-9]*]]
14 call void @ext1(), !prof !2
14 call void @ext1(), !prof !16
1515 ret void
1616 cond_false:
1717 ; ext is cloned and updated.
1818 ; CHECK: call void @ext(), !prof ![[COUNT_CALLEE:[0-9]*]]
19 call void @ext(), !prof !2
19 call void @ext(), !prof !16
2020 %f = load void ()*, void ()** @func
2121 ; CHECK: call void %f(), !prof ![[COUNT_IND_CALLEE:[0-9]*]]
22 call void %f(), !prof !4
22 call void %f(), !prof !18
2323 ret void
2424 }
2525
2727 define void @caller() {
2828 ; CHECK: call void @ext(), !prof ![[COUNT_CALLER:[0-9]*]]
2929 ; CHECK: call void %f.i(), !prof ![[COUNT_IND_CALLER:[0-9]*]]
30 call void @callee(i32 15), !prof !3
30 call void @callee(i32 15), !prof !17
3131 ret void
3232 }
3333
34 !llvm.module.flags = !{!0}
35 !0 = !{i32 1, !"MaxFunctionCount", i32 2000}
36 !1 = !{!"function_entry_count", i64 1000}
37 !2 = !{!"branch_weights", i64 2000}
38 !3 = !{!"branch_weights", i64 400}
39 !4 = !{!"VP", i32 0, i64 140, i64 111, i64 80, i64 222, i64 40, i64 333, i64 20}
34 !llvm.module.flags = !{!1}
35 !1 = !{i32 1, !"ProfileSummary", !2}
36 !2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
37 !3 = !{!"ProfileFormat", !"SampleProfile"}
38 !4 = !{!"TotalCount", i64 10000}
39 !5 = !{!"MaxCount", i64 10}
40 !6 = !{!"MaxInternalCount", i64 1}
41 !7 = !{!"MaxFunctionCount", i64 2000}
42 !8 = !{!"NumCounts", i64 2}
43 !9 = !{!"NumFunctions", i64 2}
44 !10 = !{!"DetailedSummary", !11}
45 !11 = !{!12, !13, !14}
46 !12 = !{i32 10000, i64 100, i32 1}
47 !13 = !{i32 999000, i64 100, i32 1}
48 !14 = !{i32 999999, i64 1, i32 2}
49 !15 = !{!"function_entry_count", i64 1000}
50 !16 = !{!"branch_weights", i64 2000}
51 !17 = !{!"branch_weights", i64 400}
52 !18 = !{!"VP", i32 0, i64 140, i64 111, i64 80, i64 222, i64 40, i64 333, i64 20}
4053 attributes #0 = { alwaysinline }
4154 ; CHECK: ![[ENTRY_COUNT]] = !{!"function_entry_count", i64 600}
4255 ; CHECK: ![[COUNT_CALLEE1]] = !{!"branch_weights", i64 2000}
161161
162162 EXPECT_TRUE(PSI.isHotCallSite(CS1, &BFI));
163163 EXPECT_FALSE(PSI.isHotCallSite(CS2, &BFI));
164
165 // Test that adding an MD_prof metadata with a hot count on CS2 does not
166 // change its hotness as it has no effect in instrumented profiling.
167 MDBuilder MDB(M->getContext());
168 CI2->setMetadata(llvm::LLVMContext::MD_prof, MDB.createBranchWeights({400}));
169 EXPECT_FALSE(PSI.isHotCallSite(CS2, &BFI));
164170 }
165171
166172 TEST_F(ProfileSummaryInfoTest, SampleProf) {