llvm.org GIT mirror llvm / f8bdcad
Use ProfileSummaryInfo in inline cost analysis. Instead of directly using MaxFunctionCount and function entry count to determine callee hotness, use the isHotFunction/isColdFunction methods provided by ProfileSummaryInfo. Differential revision: http://reviews.llvm.org/D21045 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@272321 91177308-0d34-0410-b5e6-96231b3b80d8 Easwaran Raman 3 years ago
8 changed file(s) with 81 addition(s) and 56 deletion(s). Raw diff Collapse all Expand all
2222 class CallSite;
2323 class DataLayout;
2424 class Function;
25 class ProfileSummaryInfo;
2526 class TargetTransformInfo;
2627
2728 namespace InlineConstants {
110111 /// inlining the callsite. It is an expensive, heavyweight call.
111112 InlineCost getInlineCost(CallSite CS, int DefaultThreshold,
112113 TargetTransformInfo &CalleeTTI,
113 AssumptionCacheTracker *ACT);
114 AssumptionCacheTracker *ACT, ProfileSummaryInfo *PSI);
114115
115116 /// \brief Get an InlineCost with the callee explicitly specified.
116117 /// This allows you to calculate the cost of inlining a function via a
119120 //
120121 InlineCost getInlineCost(CallSite CS, Function *Callee, int DefaultThreshold,
121122 TargetTransformInfo &CalleeTTI,
122 AssumptionCacheTracker *ACT);
123 AssumptionCacheTracker *ACT, ProfileSummaryInfo *PSI);
123124
124125 int computeThresholdFromOptLevels(unsigned OptLevel, unsigned SizeOptLevel);
125126
2323 class CallSite;
2424 class DataLayout;
2525 class InlineCost;
26 class ProfileSummaryInfo;
2627 template class SmallPtrSet;
2728
2829 /// Inliner - This class contains all of the helper code which is used to
8485
8586 protected:
8687 AssumptionCacheTracker *ACT;
88 ProfileSummaryInfo *PSI;
8789 };
8890
8991 } // End llvm namespace
2020 #include "llvm/Analysis/CodeMetrics.h"
2121 #include "llvm/Analysis/ConstantFolding.h"
2222 #include "llvm/Analysis/InstructionSimplify.h"
23 #include "llvm/Analysis/ProfileSummaryInfo.h"
2324 #include "llvm/Analysis/TargetTransformInfo.h"
2425 #include "llvm/IR/CallSite.h"
2526 #include "llvm/IR/CallingConv.h"
7576
7677 /// The cache of @llvm.assume intrinsics.
7778 AssumptionCacheTracker *ACT;
79
80 /// Profile summary information.
81 ProfileSummaryInfo *PSI;
7882
7983 // The called function.
8084 Function &F;
199203
200204 public:
201205 CallAnalyzer(const TargetTransformInfo &TTI, AssumptionCacheTracker *ACT,
202 Function &Callee, int Threshold, CallSite CSArg)
203 : TTI(TTI), ACT(ACT), F(Callee), CandidateCS(CSArg), Threshold(Threshold),
204 Cost(0), IsCallerRecursive(false), IsRecursiveCall(false),
205 ExposesReturnsTwice(false), HasDynamicAlloca(false),
206 ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
207 HasFrameEscape(false), AllocatedSize(0), NumInstructions(0),
208 NumVectorInstructions(0), FiftyPercentVectorBonus(0),
209 TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0),
210 NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0),
211 NumConstantPtrDiffs(0), NumInstructionsSimplified(0),
212 SROACostSavings(0), SROACostSavingsLost(0) {}
206 ProfileSummaryInfo *PSI, Function &Callee, int Threshold,
207 CallSite CSArg)
208 : TTI(TTI), ACT(ACT), PSI(PSI), F(Callee), CandidateCS(CSArg),
209 Threshold(Threshold), Cost(0), IsCallerRecursive(false),
210 IsRecursiveCall(false), ExposesReturnsTwice(false),
211 HasDynamicAlloca(false), ContainsNoDuplicateCall(false),
212 HasReturn(false), HasIndirectBr(false), HasFrameEscape(false),
213 AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0),
214 FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0),
215 NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
216 NumConstantPtrCmps(0), NumConstantPtrDiffs(0),
217 NumInstructionsSimplified(0), SROACostSavings(0),
218 SROACostSavingsLost(0) {}
213219
214220 bool analyzeCall(CallSite CS);
215221
625631 Threshold = OptSizeThreshold;
626632 }
627633
628 // If profile information is available, use that to adjust threshold of hot
629 // and cold functions.
630 // FIXME: The heuristic used below for determining hotness and coldness are
631 // based on preliminary SPEC tuning and may not be optimal. Replace this with
632 // a well-tuned heuristic based on *callsite* hotness and not callee hotness.
633 uint64_t FunctionCount = 0, MaxFunctionCount = 0;
634 bool HasPGOCounts = false;
635 if (Callee.getEntryCount() && Callee.getParent()->getMaximumFunctionCount()) {
636 HasPGOCounts = true;
637 FunctionCount = Callee.getEntryCount().getValue();
638 MaxFunctionCount = Callee.getParent()->getMaximumFunctionCount().getValue();
639 }
640
641634 // Listen to the inlinehint attribute or profile based hotness information
642635 // when it would increase the threshold and the caller does not need to
643636 // minimize its size.
644 bool InlineHint =
645 Callee.hasFnAttribute(Attribute::InlineHint) ||
646 (HasPGOCounts &&
647 FunctionCount >= (uint64_t)(0.3 * (double)MaxFunctionCount));
637 bool InlineHint = Callee.hasFnAttribute(Attribute::InlineHint) ||
638 PSI->isHotFunction(&Callee);
648639 if (InlineHint && HintThreshold > Threshold && !Caller->optForMinSize())
649640 Threshold = HintThreshold;
650641
651 // Listen to the cold attribute or profile based coldness information
652 // when it would decrease the threshold.
653 bool ColdCallee =
654 Callee.hasFnAttribute(Attribute::Cold) ||
655 (HasPGOCounts &&
656 FunctionCount <= (uint64_t)(0.01 * (double)MaxFunctionCount));
642 bool ColdCallee = PSI->isColdFunction(&Callee);
657643 // Command line argument for DefaultInlineThreshold will override the default
658644 // ColdThreshold. If we have -inline-threshold but no -inlinecold-threshold,
659645 // do not use the default cold threshold even if it is smaller.
962948 // during devirtualization and so we want to give it a hefty bonus for
963949 // inlining, but cap that bonus in the event that inlining wouldn't pan
964950 // out. Pretend to inline the function, with a custom threshold.
965 CallAnalyzer CA(TTI, ACT, *F, InlineConstants::IndirectCallThreshold, CS);
951 CallAnalyzer CA(TTI, ACT, PSI, *F, InlineConstants::IndirectCallThreshold,
952 CS);
966953 if (CA.analyzeCall(CS)) {
967954 // We were able to inline the indirect call! Subtract the cost from the
968955 // threshold to get the bonus we want to apply, but don't go below zero.
14501437
14511438 InlineCost llvm::getInlineCost(CallSite CS, int DefaultThreshold,
14521439 TargetTransformInfo &CalleeTTI,
1453 AssumptionCacheTracker *ACT) {
1440 AssumptionCacheTracker *ACT,
1441 ProfileSummaryInfo *PSI) {
14541442 return getInlineCost(CS, CS.getCalledFunction(), DefaultThreshold, CalleeTTI,
1455 ACT);
1443 ACT, PSI);
14561444 }
14571445
14581446 int llvm::computeThresholdFromOptLevels(unsigned OptLevel,
14711459 InlineCost llvm::getInlineCost(CallSite CS, Function *Callee,
14721460 int DefaultThreshold,
14731461 TargetTransformInfo &CalleeTTI,
1474 AssumptionCacheTracker *ACT) {
1462 AssumptionCacheTracker *ACT,
1463 ProfileSummaryInfo *PSI) {
14751464
14761465 // Cannot inline indirect calls.
14771466 if (!Callee)
15051494 DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
15061495 << "...\n");
15071496
1508 CallAnalyzer CA(CalleeTTI, ACT, *Callee, DefaultThreshold, CS);
1497 CallAnalyzer CA(CalleeTTI, ACT, PSI, *Callee, DefaultThreshold, CS);
15091498 bool ShouldInline = CA.analyzeCall(CS);
15101499
15111500 DEBUG(CA.dump());
1616 #include "llvm/Analysis/AssumptionCache.h"
1717 #include "llvm/Analysis/CallGraph.h"
1818 #include "llvm/Analysis/InlineCost.h"
19 #include "llvm/Analysis/ProfileSummaryInfo.h"
1920 #include "llvm/Analysis/TargetLibraryInfo.h"
2021 #include "llvm/IR/CallSite.h"
2122 #include "llvm/IR/CallingConv.h"
6465 "Inliner for always_inline functions", false, false)
6566 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
6667 INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
68 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
6769 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
6870 INITIALIZE_PASS_END(AlwaysInliner, "always-inline",
6971 "Inliner for always_inline functions", false, false)
1313 #include "llvm/Analysis/AssumptionCache.h"
1414 #include "llvm/Analysis/CallGraph.h"
1515 #include "llvm/Analysis/InlineCost.h"
16 #include "llvm/Analysis/ProfileSummaryInfo.h"
1617 #include "llvm/Analysis/TargetLibraryInfo.h"
1718 #include "llvm/Analysis/TargetTransformInfo.h"
1819 #include "llvm/IR/CallSite.h"
5960 InlineCost getInlineCost(CallSite CS) override {
6061 Function *Callee = CS.getCalledFunction();
6162 TargetTransformInfo &TTI = TTIWP->getTTI(*Callee);
62 return llvm::getInlineCost(CS, DefaultThreshold, TTI, ACT);
63 return llvm::getInlineCost(CS, DefaultThreshold, TTI, ACT, PSI);
6364 }
6465
6566 bool runOnSCC(CallGraphSCC &SCC) override;
7677 "Function Integration/Inlining", false, false)
7778 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
7879 INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
80 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
7981 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
8082 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
8183 INITIALIZE_PASS_END(SimpleInliner, "inline",
1919 #include "llvm/Analysis/BasicAliasAnalysis.h"
2020 #include "llvm/Analysis/CallGraph.h"
2121 #include "llvm/Analysis/InlineCost.h"
22 #include "llvm/Analysis/ProfileSummaryInfo.h"
2223 #include "llvm/Analysis/TargetLibraryInfo.h"
2324 #include "llvm/IR/CallSite.h"
2425 #include "llvm/IR/DataLayout.h"
5556 /// always explicitly call the implementation here.
5657 void Inliner::getAnalysisUsage(AnalysisUsage &AU) const {
5758 AU.addRequired();
59 AU.addRequired();
5860 AU.addRequired();
5961 getAAResultsAnalysisUsage(AU);
6062 CallGraphSCCPass::getAnalysisUsage(AU);
373375 bool Inliner::inlineCalls(CallGraphSCC &SCC) {
374376 CallGraph &CG = getAnalysis().getCallGraph();
375377 ACT = &getAnalysis();
378 PSI = getAnalysis().getPSI(CG.getModule());
376379 auto &TLI = getAnalysis().getTLI();
377380
378381 SmallPtrSet SCCFunctions;
44 ; A callee with identical body does gets inlined because cost fits within the
55 ; inline-threshold
66
7 define i32 @callee1(i32 %x) !prof !1 {
7 define i32 @callee1(i32 %x) !prof !21 {
88 %x1 = add i32 %x, 1
99 %x2 = add i32 %x1, 1
1010 %x3 = add i32 %x2, 1
1212 ret i32 %x3
1313 }
1414
15 define i32 @callee2(i32 %x) !prof !2 {
15 define i32 @callee2(i32 %x) !prof !22 {
1616 ; CHECK-LABEL: @callee2(
1717 %x1 = add i32 %x, 1
1818 %x2 = add i32 %x1, 1
2121 ret i32 %x3
2222 }
2323
24 define i32 @caller2(i32 %y1) !prof !2 {
24 define i32 @caller2(i32 %y1) !prof !22 {
2525 ; CHECK-LABEL: @caller2(
2626 ; CHECK: call i32 @callee2
2727 ; CHECK-NOT: call i32 @callee1
3131 ret i32 %y3
3232 }
3333
34 !llvm.module.flags = !{!0}
35 !0 = !{i32 1, !"MaxFunctionCount", i32 1000}
36 !1 = !{!"function_entry_count", i64 100}
37 !2 = !{!"function_entry_count", i64 1}
34 !llvm.module.flags = !{!1}
35 !21 = !{!"function_entry_count", i64 100}
36 !22 = !{!"function_entry_count", i64 1}
3837
38 !1 = !{i32 1, !"ProfileSummary", !2}
39 !2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
40 !3 = !{!"ProfileFormat", !"InstrProf"}
41 !4 = !{!"TotalCount", i64 10000}
42 !5 = !{!"MaxCount", i64 1000}
43 !6 = !{!"MaxInternalCount", i64 1}
44 !7 = !{!"MaxFunctionCount", i64 1000}
45 !8 = !{!"NumCounts", i64 3}
46 !9 = !{!"NumFunctions", i64 3}
47 !10 = !{!"DetailedSummary", !11}
48 !11 = !{!12, !13, !14}
49 !12 = !{i32 10000, i64 100, i32 1}
50 !13 = !{i32 999000, i64 100, i32 1}
51 !14 = !{i32 999999, i64 1, i32 2}
44 ; A cold callee with identical body does not get inlined because cost exceeds the
55 ; inline-threshold
66
7 define i32 @callee1(i32 %x) !prof !1 {
7 define i32 @callee1(i32 %x) !prof !21 {
88 %x1 = add i32 %x, 1
99 %x2 = add i32 %x1, 1
1010 %x3 = add i32 %x2, 1
1212 ret i32 %x3
1313 }
1414
15 define i32 @callee2(i32 %x) !prof !2 {
15 define i32 @callee2(i32 %x) !prof !22 {
1616 ; CHECK-LABEL: @callee2(
1717 %x1 = add i32 %x, 1
1818 %x2 = add i32 %x1, 1
2121 ret i32 %x3
2222 }
2323
24 define i32 @caller2(i32 %y1) !prof !2 {
24 define i32 @caller2(i32 %y1) !prof !22 {
2525 ; CHECK-LABEL: @caller2(
2626 ; CHECK: call i32 @callee2
2727 ; CHECK-NOT: call i32 @callee1
3131 ret i32 %y3
3232 }
3333
34 !llvm.module.flags = !{!0}
35 !0 = !{i32 1, !"MaxFunctionCount", i32 10}
36 !1 = !{!"function_entry_count", i64 10}
37 !2 = !{!"function_entry_count", i64 1}
34 !llvm.module.flags = !{!1}
35 !21 = !{!"function_entry_count", i64 300}
36 !22 = !{!"function_entry_count", i64 1}
3837
38 !1 = !{i32 1, !"ProfileSummary", !2}
39 !2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
40 !3 = !{!"ProfileFormat", !"InstrProf"}
41 !4 = !{!"TotalCount", i64 10000}
42 !5 = !{!"MaxCount", i64 1000}
43 !6 = !{!"MaxInternalCount", i64 1}
44 !7 = !{!"MaxFunctionCount", i64 1000}
45 !8 = !{!"NumCounts", i64 3}
46 !9 = !{!"NumFunctions", i64 3}
47 !10 = !{!"DetailedSummary", !11}
48 !11 = !{!12, !13, !14}
49 !12 = !{i32 10000, i64 100, i32 1}
50 !13 = !{i32 999000, i64 100, i32 1}
51 !14 = !{i32 999999, i64 1, i32 2}