llvm.org GIT mirror llvm / f1f1adc
[ThinLTO] Compute synthetic function entry count Summary: This patch computes the synthetic function entry count on the whole program callgraph (based on module summary) and writes the entry counts to the summary. After function importing, this count gets attached to the IR as metadata. Since it adds a new field to the summary, this bumps up the version. Reviewers: tejohnson Subscribers: mehdi_amini, inglorion, llvm-commits Differential Revision: https://reviews.llvm.org/D43521 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@349076 91177308-0d34-0410-b5e6-96231b3b80d8 Easwaran Raman 9 months ago
24 changed file(s) with 293 addition(s) and 34 deletion(s). Raw diff Collapse all Expand all
500500 FunctionSummary::GVFlags(
501501 GlobalValue::LinkageTypes::AvailableExternallyLinkage,
502502 /*NotEligibleToImport=*/true, /*Live=*/true, /*IsLocal=*/false),
503 0, FunctionSummary::FFlags{}, std::vector(),
504 std::move(Edges), std::vector(),
503 /*InsCount=*/0, FunctionSummary::FFlags{}, /*EntryCount=*/0,
504 std::vector(), std::move(Edges),
505 std::vector(),
505506 std::vector(),
506507 std::vector(),
507508 std::vector(),
519520 /// Function summary specific flags.
520521 FFlags FunFlags;
521522
523 /// The synthesized entry count of the function.
524 /// This is only populated during ThinLink phase and remains unused while
525 /// generating per-module summaries.
526 uint64_t EntryCount = 0;
527
522528 /// List of call edge pairs from this function.
523529 std::vector CallGraphEdgeList;
524530
526532
527533 public:
528534 FunctionSummary(GVFlags Flags, unsigned NumInsts, FFlags FunFlags,
529 std::vector Refs, std::vector CGEdges,
535 uint64_t EntryCount, std::vector Refs,
536 std::vector CGEdges,
530537 std::vector TypeTests,
531538 std::vector TypeTestAssumeVCalls,
532539 std::vector TypeCheckedLoadVCalls,
533540 std::vector TypeTestAssumeConstVCalls,
534541 std::vector TypeCheckedLoadConstVCalls)
535542 : GlobalValueSummary(FunctionKind, Flags, std::move(Refs)),
536 InstCount(NumInsts), FunFlags(FunFlags),
543 InstCount(NumInsts), FunFlags(FunFlags), EntryCount(EntryCount),
537544 CallGraphEdgeList(std::move(CGEdges)) {
538545 if (!TypeTests.empty() || !TypeTestAssumeVCalls.empty() ||
539546 !TypeCheckedLoadVCalls.empty() || !TypeTestAssumeConstVCalls.empty() ||
557564
558565 /// Get the instruction count recorded for this function.
559566 unsigned instCount() const { return InstCount; }
567
568 /// Get the synthetic entry count for this function.
569 uint64_t entryCount() const { return EntryCount; }
570
571 /// Set the synthetic entry count for this function.
572 void setEntryCount(uint64_t EC) { EntryCount = EC; }
560573
561574 /// Return the list of pairs.
562575 ArrayRef calls() const { return CallGraphEdgeList; }
801814 /// considered live.
802815 bool WithGlobalValueDeadStripping = false;
803816
817 /// Indicates that summary-based synthetic entry count propagation has run
818 bool HasSyntheticEntryCounts = false;
819
804820 /// Indicates that distributed backend should skip compilation of the
805821 /// module. Flag is suppose to be set by distributed ThinLTO indexing
806822 /// when it detected that the module is not needed during the final
912928 void setWithGlobalValueDeadStripping() {
913929 WithGlobalValueDeadStripping = true;
914930 }
931
932 bool hasSyntheticEntryCounts() const { return HasSyntheticEntryCounts; }
933 void setHasSyntheticEntryCounts() { HasSyntheticEntryCounts = true; }
915934
916935 bool skipModuleByDistributedBackend() const {
917936 return SkipModuleByDistributedBackend;
11571176 /// GraphTraits definition to build SCC for the index
11581177 template <> struct GraphTraits {
11591178 typedef ValueInfo NodeRef;
1179 using EdgeRef = FunctionSummary::EdgeTy &;
11601180
11611181 static NodeRef valueInfoFromEdge(FunctionSummary::EdgeTy &P) {
11621182 return P.first;
11641184 using ChildIteratorType =
11651185 mapped_iterator::iterator,
11661186 decltype(&valueInfoFromEdge)>;
1187
1188 using ChildEdgeIteratorType = std::vector::iterator;
11671189
11681190 static NodeRef getEntryNode(ValueInfo V) { return V; }
11691191
11861208 cast(N.getSummaryList().front()->getBaseObject());
11871209 return ChildIteratorType(F->CallGraphEdgeList.end(), &valueInfoFromEdge);
11881210 }
1211
1212 static ChildEdgeIteratorType child_edge_begin(NodeRef N) {
1213 if (!N.getSummaryList().size()) // handle external function
1214 return FunctionSummary::ExternalNode.CallGraphEdgeList.begin();
1215
1216 FunctionSummary *F =
1217 cast(N.getSummaryList().front()->getBaseObject());
1218 return F->CallGraphEdgeList.begin();
1219 }
1220
1221 static ChildEdgeIteratorType child_edge_end(NodeRef N) {
1222 if (!N.getSummaryList().size()) // handle external function
1223 return FunctionSummary::ExternalNode.CallGraphEdgeList.end();
1224
1225 FunctionSummary *F =
1226 cast(N.getSummaryList().front()->getBaseObject());
1227 return F->CallGraphEdgeList.end();
1228 }
1229
1230 static NodeRef edge_dest(EdgeRef E) { return E.first; }
11891231 };
11901232
11911233 template <>
223223 GlobalValueSummary::GVFlags(
224224 static_cast(FSum.Linkage),
225225 FSum.NotEligibleToImport, FSum.Live, FSum.IsLocal),
226 0, FunctionSummary::FFlags{}, Refs,
226 /*NumInsts=*/0, FunctionSummary::FFlags{}, /*EntryCount=*/0, Refs,
227227 ArrayRef{}, std::move(FSum.TypeTests),
228228 std::move(FSum.TypeTestAssumeVCalls),
229229 std::move(FSum.TypeCheckedLoadVCalls),
0 //=- llvm/LTO/SummaryBasedOptimizations.h -Link time optimizations-*- C++ -*-=//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8
9 #ifndef LLVM_LTO_SUMMARYBASEDOPTIMIZATIONS_H
10 #define LLVM_LTO_SUMMARYBASEDOPTIMIZATIONS_H
11 namespace llvm {
12 class ModuleSummaryIndex;
13 void computeSyntheticCounts(ModuleSummaryIndex &Index);
14
15 } // namespace llvm
16 #endif
112112 bool renameModuleForThinLTO(
113113 Module &M, const ModuleSummaryIndex &Index,
114114 SetVector *GlobalsToImport = nullptr);
115
116 /// Compute synthetic function entry counts.
117 void computeSyntheticCounts(ModuleSummaryIndex &Index);
118
115119 } // End llvm namespace
116120
117121 #endif
395395 // Don't try to import functions with noinline attribute.
396396 F.getAttributes().hasFnAttribute(Attribute::NoInline)};
397397 auto FuncSummary = llvm::make_unique(
398 Flags, NumInsts, FunFlags, std::move(Refs), CallGraphEdges.takeVector(),
399 TypeTests.takeVector(), TypeTestAssumeVCalls.takeVector(),
400 TypeCheckedLoadVCalls.takeVector(),
398 Flags, NumInsts, FunFlags, /*EntryCount=*/0, std::move(Refs),
399 CallGraphEdges.takeVector(), TypeTests.takeVector(),
400 TypeTestAssumeVCalls.takeVector(), TypeCheckedLoadVCalls.takeVector(),
401401 TypeTestAssumeConstVCalls.takeVector(),
402402 TypeCheckedLoadConstVCalls.takeVector());
403403 if (NonRenamableLocal)
508508 if (Function *F = dyn_cast(GV)) {
509509 std::unique_ptr Summary =
510510 llvm::make_unique(
511 GVFlags, 0,
511 GVFlags, /*InstCount=*/0,
512512 FunctionSummary::FFlags{
513513 F->hasFnAttribute(Attribute::ReadNone),
514514 F->hasFnAttribute(Attribute::ReadOnly),
515515 F->hasFnAttribute(Attribute::NoRecurse),
516516 F->returnDoesNotAlias(),
517517 /* NoInline = */ false},
518 ArrayRef{}, ArrayRef>{},
518 /*EntryCount=*/0, ArrayRef>{},
519 ArrayRef{},
519520 ArrayRef{},
520521 ArrayRef{},
521522 ArrayRef{},
1313 #include "llvm/Analysis/SyntheticCountsUtils.h"
1414 #include "llvm/ADT/DenseSet.h"
1515 #include "llvm/ADT/SCCIterator.h"
16 #include "llvm/ADT/SmallPtrSet.h"
1716 #include "llvm/Analysis/CallGraph.h"
1817 #include "llvm/IR/CallSite.h"
1918 #include "llvm/IR/Function.h"
2019 #include "llvm/IR/InstIterator.h"
2120 #include "llvm/IR/Instructions.h"
21 #include "llvm/IR/ModuleSummaryIndex.h"
2222
2323 using namespace llvm;
2424
2828 const SccTy &SCC, GetRelBBFreqTy GetRelBBFreq, GetCountTy GetCount,
2929 AddCountTy AddCount) {
3030
31 SmallPtrSet> SCCNodes;
31 DenseSet> SCCNodes;
3232 SmallVector, 8> SCCEdges, NonSCCEdges;
3333
3434 for (auto &Node : SCC)
110110 }
111111
112112 template class llvm::SyntheticCountsUtils;
113 template class llvm::SyntheticCountsUtils;
77267726 return true;
77277727
77287728 auto FS = llvm::make_unique(
7729 GVFlags, InstCount, FFlags, std::move(Refs), std::move(Calls),
7730 std::move(TypeIdInfo.TypeTests),
7729 GVFlags, InstCount, FFlags, /*EntryCount=*/0, std::move(Refs),
7730 std::move(Calls), std::move(TypeIdInfo.TypeTests),
77317731 std::move(TypeIdInfo.TypeTestAssumeVCalls),
77327732 std::move(TypeIdInfo.TypeCheckedLoadVCalls),
77337733 std::move(TypeIdInfo.TypeTestAssumeConstVCalls),
52465246 }
52475247 const uint64_t Version = Record[0];
52485248 const bool IsOldProfileFormat = Version == 1;
5249 if (Version < 1 || Version > 5)
5249 if (Version < 1 || Version > 6)
52505250 return error("Invalid summary version " + Twine(Version) +
5251 ", 1, 2, 3, 4 or 5 expected");
5251 ". Version should be in the range [1-6].");
52525252 Record.clear();
52535253
52545254 // Keep around the last seen summary to be used when we see an optional
53025302 // 1 bit: SkipModuleByDistributedBackend flag.
53035303 if (Flags & 0x2)
53045304 TheIndex.setSkipModuleByDistributedBackend();
5305 // 1 bit: HasSyntheticEntryCounts flag.
5306 if (Flags & 0x4)
5307 TheIndex.setHasSyntheticEntryCounts();
53055308 break;
53065309 }
53075310 case bitc::FS_VALUE_GUID: { // [valueid, refguid]
53575360 IsOldProfileFormat, HasProfile, HasRelBF);
53585361 setImmutableRefs(Refs, NumImmutableRefs);
53595362 auto FS = llvm::make_unique(
5360 Flags, InstCount, getDecodedFFlags(RawFunFlags), std::move(Refs),
5361 std::move(Calls), std::move(PendingTypeTests),
5363 Flags, InstCount, getDecodedFFlags(RawFunFlags), /*EntryCount=*/0,
5364 std::move(Refs), std::move(Calls), std::move(PendingTypeTests),
53625365 std::move(PendingTypeTestAssumeVCalls),
53635366 std::move(PendingTypeCheckedLoadVCalls),
53645367 std::move(PendingTypeTestAssumeConstVCalls),
54365439 uint64_t RawFlags = Record[2];
54375440 unsigned InstCount = Record[3];
54385441 uint64_t RawFunFlags = 0;
5442 uint64_t EntryCount = 0;
54395443 unsigned NumRefs = Record[4];
54405444 unsigned NumImmutableRefs = 0;
54415445 int RefListStartIndex = 5;
54425446
54435447 if (Version >= 4) {
54445448 RawFunFlags = Record[4];
5445 NumRefs = Record[5];
54465449 RefListStartIndex = 6;
5450 size_t NumRefsIndex = 5;
54475451 if (Version >= 5) {
5448 NumImmutableRefs = Record[6];
54495452 RefListStartIndex = 7;
5453 if (Version >= 6) {
5454 NumRefsIndex = 6;
5455 EntryCount = Record[5];
5456 RefListStartIndex = 8;
5457 }
5458 NumImmutableRefs = Record[RefListStartIndex - 1];
54505459 }
5460 NumRefs = Record[NumRefsIndex];
54515461 }
54525462
54535463 auto Flags = getDecodedGVSummaryFlags(RawFlags, Version);
54635473 ValueInfo VI = getValueInfoFromValueId(ValueID).first;
54645474 setImmutableRefs(Refs, NumImmutableRefs);
54655475 auto FS = llvm::make_unique(
5466 Flags, InstCount, getDecodedFFlags(RawFunFlags), std::move(Refs),
5467 std::move(Edges), std::move(PendingTypeTests),
5476 Flags, InstCount, getDecodedFFlags(RawFunFlags), EntryCount,
5477 std::move(Refs), std::move(Edges), std::move(PendingTypeTests),
54685478 std::move(PendingTypeTestAssumeVCalls),
54695479 std::move(PendingTypeCheckedLoadVCalls),
54705480 std::move(PendingTypeTestAssumeConstVCalls),
36003600 // Current version for the summary.
36013601 // This is bumped whenever we introduce changes in the way some record are
36023602 // interpreted, like flags for instance.
3603 static const uint64_t INDEX_VERSION = 5;
3603 static const uint64_t INDEX_VERSION = 6;
36043604
36053605 /// Emit the per-module summary section alongside the rest of
36063606 /// the module's bitcode.
37313731 Flags |= 0x1;
37323732 if (Index.skipModuleByDistributedBackend())
37333733 Flags |= 0x2;
3734 if (Index.hasSyntheticEntryCounts())
3735 Flags |= 0x4;
37343736 Stream.EmitRecord(bitc::FS_FLAGS, ArrayRef{Flags});
37353737
37363738 for (const auto &GVI : valueIds()) {
37463748 Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags
37473749 Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount
37483750 Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // fflags
3751 Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // entrycount
37493752 Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs
37503753 Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // immutablerefcnt
37513754 // numrefs x valueid, n x (valueid)
38603863 NameVals.push_back(getEncodedGVSummaryFlags(FS->flags()));
38613864 NameVals.push_back(FS->instCount());
38623865 NameVals.push_back(getEncodedFFlags(FS->fflags()));
3866 NameVals.push_back(FS->entryCount());
3867
38633868 // Fill in below
38643869 NameVals.push_back(0); // numrefs
38653870 NameVals.push_back(0); // immutablerefcnt
38743879 ImmutableRefCnt++;
38753880 Count++;
38763881 }
3877 NameVals[5] = Count;
3878 NameVals[6] = ImmutableRefCnt;
3882 NameVals[6] = Count;
3883 NameVals[7] = ImmutableRefCnt;
38793884
38803885 bool HasProfileData = false;
38813886 for (auto &EI : FS->calls()) {
33 LTOBackend.cpp
44 LTOModule.cpp
55 LTOCodeGenerator.cpp
6 SummaryBasedOptimizations.cpp
67 UpdateCompilerUsed.cpp
78 ThinLTOCodeGenerator.cpp
89
2424 #include "llvm/IR/Mangler.h"
2525 #include "llvm/IR/Metadata.h"
2626 #include "llvm/LTO/LTOBackend.h"
27 #include "llvm/LTO/SummaryBasedOptimizations.h"
2728 #include "llvm/Linker/IRMover.h"
2829 #include "llvm/Object/IRObjectFile.h"
2930 #include "llvm/Support/Error.h"
4142 #include "llvm/Target/TargetOptions.h"
4243 #include "llvm/Transforms/IPO.h"
4344 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
45 #include "llvm/Transforms/Utils/FunctionImportUtils.h"
4446 #include "llvm/Transforms/Utils/SplitModule.h"
4547
4648 #include
11691171 if (!ModuleToDefinedGVSummaries.count(Mod.first))
11701172 ModuleToDefinedGVSummaries.try_emplace(Mod.first);
11711173
1174 // Synthesize entry counts for functions in the CombinedIndex.
1175 computeSyntheticCounts(ThinLTO.CombinedIndex);
1176
11721177 StringMap ImportLists(
11731178 ThinLTO.ModuleMap.size());
11741179 StringMap ExportLists(
0 //==-SummaryBasedOptimizations.cpp - Optimizations based on ThinLTO summary-==//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements optimizations that are based on the module summaries.
10 // These optimizations are performed during the thinlink phase of the
11 // compilation.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "llvm/LTO/SummaryBasedOptimizations.h"
16 #include "llvm/Analysis/SyntheticCountsUtils.h"
17 #include "llvm/IR/ModuleSummaryIndex.h"
18
19 using namespace llvm;
20
21 cl::opt ThinLTOSynthesizeEntryCounts(
22 "thinlto-synthesize-entry-counts", cl::init(false), cl::Hidden,
23 cl::desc("Synthesize entry counts based on the summary"));
24
25 extern cl::opt InitialSyntheticCount;
26
27 static void initializeCounts(ModuleSummaryIndex &Index) {
28 auto Root = Index.calculateCallGraphRoot();
29 // Root is a fake node. All its successors are the actual roots of the
30 // callgraph.
31 // FIXME: This initializes the entry counts of only the root nodes. This makes
32 // sense when compiling a binary with ThinLTO, but for libraries any of the
33 // non-root nodes could be called from outside.
34 for (auto &C : Root.calls()) {
35 auto &V = C.first;
36 for (auto &GVS : V.getSummaryList()) {
37 auto S = GVS.get()->getBaseObject();
38 auto *F = cast(S);
39 F->setEntryCount(InitialSyntheticCount);
40 }
41 }
42 }
43
44 void llvm::computeSyntheticCounts(ModuleSummaryIndex &Index) {
45 if (!ThinLTOSynthesizeEntryCounts)
46 return;
47
48 using Scaled64 = ScaledNumber;
49 initializeCounts(Index);
50 auto GetCallSiteRelFreq = [](FunctionSummary::EdgeTy &Edge) {
51 return Scaled64(Edge.second.RelBlockFreq, -CalleeInfo::ScaleShift);
52 };
53 auto GetEntryCount = [](ValueInfo V) {
54 if (V.getSummaryList().size()) {
55 auto S = V.getSummaryList().front().get()->getBaseObject();
56 auto *F = cast(S);
57 return F->entryCount();
58 } else {
59 return UINT64_C(0);
60 }
61 };
62 auto AddToEntryCount = [](ValueInfo V, uint64_t New) {
63 if (!V.getSummaryList().size())
64 return;
65 for (auto &GVS : V.getSummaryList()) {
66 auto S = GVS.get()->getBaseObject();
67 auto *F = cast(S);
68 F->setEntryCount(SaturatingAdd(F->entryCount(), New));
69 }
70 };
71
72 // After initializing the counts in initializeCounts above, the counts have to
73 // be propagated across the combined callgraph.
74 // SyntheticCountsUtils::propagate takes care of this propagation on any
75 // callgraph that specialized GraphTraits.
76 SyntheticCountsUtils::propagate(
77 &Index, GetCallSiteRelFreq, GetEntryCount, AddToEntryCount);
78 Index.setHasSyntheticEntryCounts();
79 }
3232 #include "llvm/IR/Verifier.h"
3333 #include "llvm/IRReader/IRReader.h"
3434 #include "llvm/LTO/LTO.h"
35 #include "llvm/LTO/SummaryBasedOptimizations.h"
3536 #include "llvm/MC/SubtargetFeature.h"
3637 #include "llvm/Object/IRObjectFile.h"
3738 #include "llvm/Support/CachePruning.h"
881882
882883 // Compute "dead" symbols, we don't want to import/export these!
883884 computeDeadSymbolsInIndex(*Index, GUIDPreservedSymbols);
885
886 // Synthesize entry counts for functions in the combined index.
887 computeSyntheticCounts(*Index);
884888
885889 // Collect the import/export lists for all modules from the call-graph in the
886890 // combined index.
4545 #define DEBUG_TYPE "synthetic-counts-propagation"
4646
4747 /// Initial synthetic count assigned to functions.
48 static cl::opt
48 cl::opt
4949 InitialSyntheticCount("initial-synthetic-count", cl::Hidden, cl::init(10),
5050 cl::ZeroOrMore,
5151 cl::desc("Initial value of synthetic entry count."));
202202
203203 void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) {
204204
205 // Check the summaries to see if the symbol gets resolved to a known local
206 // definition.
207205 ValueInfo VI;
208206 if (GV.hasName()) {
209207 VI = ImportIndex.getValueInfo(GV.getGUID());
208 // Set synthetic function entry counts.
209 if (VI && ImportIndex.hasSyntheticEntryCounts()) {
210 if (Function *F = dyn_cast(&GV)) {
211 if (!F->isDeclaration()) {
212 for (auto &S : VI.getSummaryList()) {
213 FunctionSummary *FS = dyn_cast(S->getBaseObject());
214 if (FS->modulePath() == M.getModuleIdentifier()) {
215 F->setEntryCount(Function::ProfileCount(FS->entryCount(),
216 Function::PCT_Synthetic));
217 break;
218 }
219 }
220 }
221 }
222 }
223 // Check the summaries to see if the symbol gets resolved to a known local
224 // definition.
210225 if (VI && VI.isDSOLocal()) {
211226 GV.setDSOLocal(true);
212227 if (GV.hasDLLImportStorageClass())
11 ; RUN: opt -module-summary %s -o - | llvm-bcanalyzer -dump | FileCheck %s
22
33 ; CHECK:
4 ; CHECK: 5/>
4 ; CHECK: 6/>
55
66
77
3232 ; COMBINED-NEXT:
3333 ; COMBINED-NEXT:
3434 ; COMBINED-NEXT:
35 ; COMBINED-NEXT: 7=[[ALIASID]]/>
35 ; COMBINED-NEXT: 8=[[ALIASID]]/>
3636 ; COMBINED-NEXT:
3737 ; COMBINED-NEXT:
3838 ; COMBINED-NEXT:
2929 ; COMBINED-NEXT:
3030 ; See if the call to func is registered, using the expected hotness type.
3131 ; op6=2 which is hotnessType::None.
32 ; COMBINED-NEXT: 7=[[FUNCID]] op8=2/>
32 ; COMBINED-NEXT: 8=[[FUNCID]] op9=2/>
3333 ; COMBINED-NEXT:
3434
3535 ; ModuleID = 'thinlto-function-summary-callgraph.ll'
7070 ; COMBINED-NEXT:
7171 ; COMBINED-NEXT:
7272 ; COMBINED-NEXT:
73 ; COMBINED-NEXT: 7=[[HOT1:.*]] op8=3 op9=[[COLD:.*]] op10=1 op11=[[HOT2:.*]] op12=3 op13=[[NONE1:.*]] op14=2 op15=[[HOT3:.*]] op16=3 op17=[[NONE2:.*]] op18=2 op19=[[NONE3:.*]] op20=2/>
73 ; COMBINED-NEXT: 8=[[HOT1:.*]] op9=3 op10=[[COLD:.*]] op11=1 op12=[[HOT2:.*]] op13=3 op14=[[NONE1:.*]] op15=2 op16=[[HOT3:.*]] op17=3 op18=[[NONE2:.*]] op19=2 op20=[[NONE3:.*]] op21=2/>
7474 ; COMBINED_NEXT:
7575 ; COMBINED_NEXT:
7676
5757 ; COMBINED-NEXT:
5858 ; COMBINED-NEXT:
5959 ; COMBINED-NEXT:
60 ; COMBINED-NEXT: 7=[[NONE1:.*]] op8=0 op9=[[HOT1:.*]] op10=3 op11=[[COLD1:.*]] op12=1 op13=[[NONE2:.*]] op14=0 op15=[[HOT2:.*]] op16=3 op17=[[COLD2:.*]] op18=1 op19=[[NONE3:.*]] op20=0 op21=[[HOT3:.*]] op22=3 op23=[[COLD3:.*]] op24=1/>
60 ; COMBINED-NEXT: 8=[[NONE1:.*]] op9=0 op10=[[HOT1:.*]] op11=3 op12=[[COLD1:.*]] op13=1 op14=[[NONE2:.*]] op15=0 op16=[[HOT2:.*]] op17=3 op18=[[COLD2:.*]] op19=1 op20=[[NONE3:.*]] op21=0 op22=[[HOT3:.*]] op23=3 op24=[[COLD3:.*]] op25=1/>
6161 ; COMBINED_NEXT:
6262 ; COMBINED_NEXT:
6363
3232 ; COMBINED-NEXT:
3333 ; COMBINED-NEXT:
3434 ; See if the call to func is registered.
35 ; COMBINED-NEXT: 7=[[FUNCID]]/>
35 ; COMBINED-NEXT: 8=[[FUNCID]]/>
3636 ; COMBINED-NEXT:
3737
3838 ; ModuleID = 'thinlto-function-summary-callgraph.ll'
0 ; REQUIRES: x86-registered-target
1 ; RUN: opt -module-summary %s -o %t.o
2
3 ; Ensure synthetic entry count flag is not set on distributed index
4 ; RUN: llvm-lto2 run %t.o -o %t.out -thinlto-distributed-indexes \
5 ; RUN: -r %t.o,glob,plx -compute-dead=false
6 ; RUN: llvm-bcanalyzer -dump %t.o.thinlto.bc | FileCheck %s --check-prefix=NOSYNTHETIC
7 ; NOSYNTHETIC:
8
9 ; Ensure synthetic entry count flag is set on distributed index
10 ; when option used to enable synthetic count propagation
11 ; RUN: llvm-lto2 run %t.o -o %t.out -thinlto-distributed-indexes \
12 ; RUN: -r %t.o,glob,plx -thinlto-synthesize-entry-counts \
13 ; RUN: -compute-dead=false
14 ; RUN: llvm-bcanalyzer -dump %t.o.thinlto.bc | FileCheck %s --check-prefix=HASSYNTHETIC
15 ; HASSYNTHETIC:
16
17 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
18 target triple = "x86_64-unknown-linux-gnu"
19
20 @glob = global i32 0
0 target triple = "x86_64-unknown-linux-gnu"
1 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
2
3 declare void @h();
4
5 define void @g() {
6 call void @h();
7 ret void
8 }
0 ; RUN: opt -thinlto-bc %s -write-relbf-to-summary -thin-link-bitcode-file=%t1.thinlink.bc -o %t1.bc
1 ; RUN: opt -thinlto-bc %p/Inputs/function_entry_count.ll -write-relbf-to-summary -thin-link-bitcode-file=%t2.thinlink.bc -o %t2.bc
2
3 ; First perform the thin link on the normal bitcode file.
4 ; RUN: llvm-lto2 run %t1.bc %t2.bc -o %t.o -save-temps -thinlto-synthesize-entry-counts \
5 ; RUN: -r=%t1.bc,g, \
6 ; RUN: -r=%t1.bc,f,px \
7 ; RUN: -r=%t1.bc,h,px \
8 ; RUN: -r=%t2.bc,h, \
9 ; RUN: -r=%t2.bc,g,px
10 ; RUN: llvm-dis -o - %t.o.1.3.import.bc | FileCheck %s
11
12 ; RUN: llvm-lto -thinlto-action=run -thinlto-synthesize-entry-counts -exported-symbol=f \
13 ; RUN: -exported-symbol=g -exported-symbol=h -thinlto-save-temps=%t3. %t1.bc %t2.bc
14 ; RUN: llvm-dis %t3.0.3.imported.bc -o - | FileCheck %s
15
16 ; CHECK: define void @h() !prof ![[PROF2:[0-9]+]]
17 ; CHECK: define void @f(i32 %n) !prof ![[PROF1:[0-9]+]]
18 ; CHECK: define available_externally void @g() !prof ![[PROF2]]
19 ; CHECK-DAG: ![[PROF1]] = !{!"synthetic_function_entry_count", i64 10}
20 ; CHECK-DAG: ![[PROF2]] = !{!"synthetic_function_entry_count", i64 198}
21
22 target triple = "x86_64-unknown-linux-gnu"
23 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
24
25 declare void @g();
26
27 define void @h() {
28 ret void
29 }
30
31 define void @f(i32 %n) {
32 entry:
33 %cmp = icmp slt i32 %n, 1
34 br i1 %cmp, label %exit, label %loop
35 loop:
36 %n1 = phi i32 [%n, %entry], [%n2, %loop]
37 call void @g()
38 %n2 = sub i32 %n1, 1
39 %cmp2 = icmp slt i32 %n, 1
40 br i1 %cmp2, label %exit, label %loop
41 exit:
42 ret void
43 }