llvm.org GIT mirror llvm / a764eb9
[PGO] Context sensitive PGO (part 1) Current PGO profile counts are not context sensitive. The branch probabilities for the inlined functions are kept the same for all call-sites, and they might be very different from the actual branch probabilities. These suboptimal profiles can greatly affect some downstream optimizations, in particular for the machine basic block placement optimization. In this patch, we propose to have a post-inline PGO instrumentation/use pass, which we called Context Sensitive PGO (CSPGO). For the users who want the best possible performance, they can perform a second round of PGO instrument/use on the top of the regular PGO. They will have two sets of profile counts. The first pass profile will be manly for inline, indirect-call promotion, and CGSCC simplification pass optimizations. The second pass profile is for post-inline optimizations and code-gen optimizations. A typical usage: // Regular PGO instrumentation and generate pass1 profile. > clang -O2 -fprofile-generate source.c -o gen > ./gen > llvm-profdata merge default.*profraw -o pass1.profdata // CSPGO instrumentation. > clang -O2 -fprofile-use=pass1.profdata -fcs-profile-generate -o gen2 > ./gen2 // Merge two sets of profiles > llvm-profdata merge default.*profraw pass1.profdata -o profile.profdata // Use the combined profile. Pass manager will invoke two PGO use passes. > clang -O2 -fprofile-use=profile.profdata -o use This change touches many components in the compiler. The reviewed patch (D54175) will committed in phrases. Differential Revision: https://reviews.llvm.org/D54175 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@354930 91177308-0d34-0410-b5e6-96231b3b80d8 Rong Xu 1 year, 9 months ago
14 changed file(s) with 309 addition(s) and 75 deletion(s). Raw diff Collapse all Expand all
298298 void initializePGOIndirectCallPromotionLegacyPassPass(PassRegistry&);
299299 void initializePGOInstrumentationGenLegacyPassPass(PassRegistry&);
300300 void initializePGOInstrumentationUseLegacyPassPass(PassRegistry&);
301 void initializePGOInstrumentationGenCreateVarLegacyPassPass(PassRegistry&);
301302 void initializePGOMemOPSizeOptLegacyPassPass(PassRegistry&);
302303 void initializePHIEliminationPass(PassRegistry&);
303304 void initializePartialInlinerLegacyPassPass(PassRegistry&);
5454 /// Disable entirely the optimizer, including importing for ThinLTO
5555 bool CodeGenOnly = false;
5656
57 /// Run PGO context sensitive IR instrumentation.
58 bool RunCSIRInstr = false;
59
5760 /// If this field is set, the set of passes run in the middle-end optimizer
5861 /// will be the one specified by the string. Only works with the new pass
5962 /// manager as the old one doesn't have this ability.
7174 /// Setting this field will replace unspecified target triples in input files
7275 /// with this triple.
7376 std::string DefaultTriple;
77
78 /// Context Sensitive PGO profile path.
79 std::string CSIRProfile;
7480
7581 /// Sample PGO profile path.
7682 std::string SampleProfile;
101101 (void) llvm::createGCOVProfilerPass();
102102 (void) llvm::createPGOInstrumentationGenLegacyPass();
103103 (void) llvm::createPGOInstrumentationUseLegacyPass();
104 (void) llvm::createPGOInstrumentationGenCreateVarLegacyPass();
104105 (void) llvm::createPGOIndirectCallPromotionLegacyPass();
105106 (void) llvm::createPGOMemOPSizeOptLegacyPass();
106107 (void) llvm::createInstrProfilingLegacyPass();
766766 StringRef Name;
767767 uint64_t Hash;
768768
769 // We reserve this bit as the flag for context sensitive profile record.
770 static const int CS_FLAG_IN_FUNC_HASH = 60;
771
769772 NamedInstrProfRecord() = default;
770773 NamedInstrProfRecord(StringRef Name, uint64_t Hash,
771774 std::vector Counts)
772775 : InstrProfRecord(std::move(Counts)), Name(Name), Hash(Hash) {}
776
777 static bool hasCSFlagInHash(uint64_t FuncHash) {
778 return ((FuncHash >> CS_FLAG_IN_FUNC_HASH) & 1);
779 }
780 static void setCSFlagInHash(uint64_t &FuncHash) {
781 FuncHash |= ((uint64_t)1 << CS_FLAG_IN_FUNC_HASH);
782 }
773783 };
774784
775785 uint32_t InstrProfRecord::getNumValueKinds() const {
10031013 // from control data struct is changed from raw pointer to Name's MD5 value.
10041014 // Version 4: ValueDataBegin and ValueDataSizes fields are removed from the
10051015 // raw header.
1016 // Version 5: Bit 60 of FuncHash is reserved for the flag for the context
1017 // sensitive records.
10061018 const uint64_t Version = INSTR_PROF_RAW_VERSION;
10071019
10081020 template inline uint64_t getMagic();
10391051 void getMemOPSizeRangeFromOption(StringRef Str, int64_t &RangeStart,
10401052 int64_t &RangeLast);
10411053
1054 // Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
1055 // aware this is an ir_level profile so it can set the version flag.
1056 void createIRLevelProfileFlagVar(Module &M, bool IsCS);
1057
10421058 // Create the variable for the profile file name.
10431059 void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput);
10441060
634634 * version for other variants of profile. We set the lowest bit of the upper 8
635635 * bits (i.e. bit 56) to 1 to indicate if this is an IR-level instrumentaiton
636636 * generated profile, and 0 if this is a Clang FE generated profile.
637 * 1 in bit 57 indicates there are context-sensitive records in the profile.
637638 */
638639 #define VARIANT_MASKS_ALL 0xff00000000000000ULL
639640 #define GET_VERSION(V) ((V) & ~VARIANT_MASKS_ALL)
640641 #define VARIANT_MASK_IR_PROF (0x1ULL << 56)
642 #define VARIANT_MASK_CSIR_PROF (0x1ULL << 57)
641643 #define INSTR_PROF_RAW_VERSION_VAR __llvm_profile_raw_version
642644 #define INSTR_PROF_PROFILE_RUNTIME_VAR __llvm_profile_runtime
643645
3434 class InstrProfiling : public PassInfoMixin {
3535 public:
3636 InstrProfiling() = default;
37 InstrProfiling(const InstrProfOptions &Options) : Options(Options) {}
37 InstrProfiling(const InstrProfOptions &Options, bool IsCS)
38 : Options(Options), IsCS(IsCS) {}
3839
3940 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
4041 bool run(Module &M, const TargetLibraryInfo &TLI);
5859 std::vector ReferencedNames;
5960 GlobalVariable *NamesVar;
6061 size_t NamesSize;
62
63 // Is this lowering for the context-sensitive instrumentation.
64 bool IsCS;
6165
6266 // vector of counter load/store pairs to be register promoted.
6367 std::vector PromotionCandidates;
1616
1717 #include "llvm/ADT/ArrayRef.h"
1818 #include "llvm/IR/PassManager.h"
19 #include "llvm/ProfileData/InstrProf.h"
1920 #include
2021 #include
2122
2627 class Module;
2728
2829 /// The instrumentation (profile-instr-gen) pass for IR based PGO.
30 // We use this pass to create COMDAT profile variables for context
31 // sensitive PGO (CSPGO). The reason to have a pass for this is CSPGO
32 // can be run after LTO/ThinLTO linking. Lld linker needs to see
33 // all the COMDAT variables before linking. So we have this pass
34 // always run before linking for CSPGO.
35 class PGOInstrumentationGenCreateVar
36 : public PassInfoMixin {
37 public:
38 PGOInstrumentationGenCreateVar(std::string CSInstrName = "")
39 : CSInstrName(CSInstrName) {}
40 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) {
41 createProfileFileNameVar(M, CSInstrName);
42 createIRLevelProfileFlagVar(M, /* IsCS */ true);
43 return PreservedAnalyses::all();
44 }
45
46 private:
47 std::string CSInstrName;
48 };
49
50 /// The instrumentation (profile-instr-gen) pass for IR based PGO.
2951 class PGOInstrumentationGen : public PassInfoMixin {
3052 public:
53 PGOInstrumentationGen(bool IsCS = false) : IsCS(IsCS) {}
3154 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
55
56 private:
57 // If this is a context sensitive instrumentation.
58 bool IsCS;
3259 };
3360
3461 /// The profile annotation (profile-instr-use) pass for IR based PGO.
3562 class PGOInstrumentationUse : public PassInfoMixin {
3663 public:
3764 PGOInstrumentationUse(std::string Filename = "",
38 std::string RemappingFilename = "");
65 std::string RemappingFilename = "", bool IsCS = false);
3966
4067 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
4168
4269 private:
4370 std::string ProfileFileName;
4471 std::string ProfileRemappingFileName;
72 // If this is a context sensitive instrumentation.
73 bool IsCS;
4574 };
4675
4776 /// The indirect function call promotion pass.
8686 ModulePass *createGCOVProfilerPass(const GCOVOptions &Options =
8787 GCOVOptions::getDefault());
8888
89 // PGO Instrumention
90 ModulePass *createPGOInstrumentationGenLegacyPass();
89 // PGO Instrumention. Parameter IsCS indicates if this is the context senstive
90 // instrumentation.
91 ModulePass *createPGOInstrumentationGenLegacyPass(bool IsCS = false);
9192 ModulePass *
92 createPGOInstrumentationUseLegacyPass(StringRef Filename = StringRef(""));
93 createPGOInstrumentationUseLegacyPass(StringRef Filename = StringRef(""),
94 bool IsCS = false);
95 ModulePass *createPGOInstrumentationGenCreateVarLegacyPass(
96 StringRef CSInstrName = StringRef(""));
9397 ModulePass *createPGOIndirectCallPromotionLegacyPass(bool InLTO = false,
9498 bool SamplePGO = false);
9599 FunctionPass *createPGOMemOPSizeOptLegacyPass();
131135 // Use atomic profile counter increments.
132136 bool Atomic = false;
133137
138 // Use BFI to guide register promotion
139 bool UseBFIInPromotion = false;
140
134141 // Name of the profile file to use as output
135142 std::string InstrProfileOutput;
136143
137144 InstrProfOptions() = default;
138145 };
139146
140 /// Insert frontend instrumentation based profiling.
147 /// Insert frontend instrumentation based profiling. Parameter IsCS indicates if
148 // this is the context senstive instrumentation.
141149 ModulePass *createInstrProfilingLegacyPass(
142 const InstrProfOptions &Options = InstrProfOptions());
150 const InstrProfOptions &Options = InstrProfOptions(), bool IsCS = false);
143151
144152 FunctionPass *createHWAddressSanitizerPass(bool CompileKernel = false,
145153 bool Recover = false);
568568 if (!ProfileGenFile.empty())
569569 Options.InstrProfileOutput = ProfileGenFile;
570570 Options.DoCounterPromotion = true;
571 MPM.addPass(InstrProfiling(Options));
571 Options.UseBFIInPromotion = false;
572 MPM.addPass(InstrProfiling(Options, false));
572573 }
573574
574575 if (!ProfileUseFile.empty())
10101010 assert(RangeLast >= RangeStart);
10111011 }
10121012
1013 // Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
1014 // aware this is an ir_level profile so it can set the version flag.
1015 void createIRLevelProfileFlagVar(Module &M, bool IsCS) {
1016 const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR));
1017 Type *IntTy64 = Type::getInt64Ty(M.getContext());
1018 uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF);
1019 if (IsCS)
1020 ProfileVersion |= VARIANT_MASK_CSIR_PROF;
1021 auto IRLevelVersionVariable = new GlobalVariable(
1022 M, IntTy64, true, GlobalValue::WeakAnyLinkage,
1023 Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName);
1024 IRLevelVersionVariable->setVisibility(GlobalValue::DefaultVisibility);
1025 Triple TT(M.getTargetTriple());
1026 if (TT.supportsCOMDAT()) {
1027 IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage);
1028 IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName));
1029 }
1030 }
1031
10131032 // Create the variable for the profile file name.
10141033 void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput) {
10151034 if (InstrProfileOutput.empty())
1717 #include "llvm/ADT/StringRef.h"
1818 #include "llvm/ADT/Triple.h"
1919 #include "llvm/ADT/Twine.h"
20 #include "llvm/Analysis/BlockFrequencyInfo.h"
21 #include "llvm/Analysis/BranchProbabilityInfo.h"
2022 #include "llvm/Analysis/LoopInfo.h"
2123 #include "llvm/Analysis/TargetLibraryInfo.h"
2224 #include "llvm/IR/Attributes.h"
146148 static char ID;
147149
148150 InstrProfilingLegacyPass() : ModulePass(ID) {}
149 InstrProfilingLegacyPass(const InstrProfOptions &Options)
150 : ModulePass(ID), InstrProf(Options) {}
151 InstrProfilingLegacyPass(const InstrProfOptions &Options, bool IsCS)
152 : ModulePass(ID), InstrProf(Options, IsCS) {}
151153
152154 StringRef getPassName() const override {
153155 return "Frontend instrumentation-based coverage lowering";
231233 public:
232234 PGOCounterPromoter(
233235 DenseMap> &LoopToCands,
234 Loop &CurLoop, LoopInfo &LI)
236 Loop &CurLoop, LoopInfo &LI, BlockFrequencyInfo *BFI)
235237 : LoopToCandidates(LoopToCands), ExitBlocks(), InsertPts(), L(CurLoop),
236 LI(LI) {
238 LI(LI), BFI(BFI) {
237239
238240 SmallVector LoopExitBlocks;
239241 SmallPtrSet BlockSet;
261263 SmallVector NewPHIs;
262264 SSAUpdater SSA(&NewPHIs);
263265 Value *InitVal = ConstantInt::get(Cand.first->getType(), 0);
266
267 // If BFI is set, we will use it to guide the promotions.
268 if (BFI) {
269 auto *BB = Cand.first->getParent();
270 auto InstrCount = BFI->getBlockProfileCount(BB);
271 if (!InstrCount)
272 continue;
273 auto PreheaderCount = BFI->getBlockProfileCount(L.getLoopPreheader());
274 // If the average loop trip count is not greater than 1.5, we skip
275 // promotion.
276 if (PreheaderCount &&
277 (PreheaderCount.getValue() * 3) >= (InstrCount.getValue() * 2))
278 continue;
279 }
264280
265281 PGOCounterPromoterHelper Promoter(Cand.first, Cand.second, SSA, InitVal,
266282 L.getLoopPreheader(), ExitBlocks,
311327
312328 SmallVector ExitingBlocks;
313329 LP->getExitingBlocks(ExitingBlocks);
330
331 // If BFI is set, we do more aggressive promotions based on BFI.
332 if (BFI)
333 return (unsigned)-1;
334
314335 // Not considierered speculative.
315336 if (ExitingBlocks.size() == 1)
316337 return MaxNumOfPromotionsPerLoop;
342363 SmallVector InsertPts;
343364 Loop &L;
344365 LoopInfo &LI;
366 BlockFrequencyInfo *BFI;
345367 };
346368
347369 } // end anonymous namespace
364386 "Frontend instrumentation-based coverage lowering.", false, false)
365387
366388 ModulePass *
367 llvm::createInstrProfilingLegacyPass(const InstrProfOptions &Options) {
368 return new InstrProfilingLegacyPass(Options);
389 llvm::createInstrProfilingLegacyPass(const InstrProfOptions &Options,
390 bool IsCS) {
391 return new InstrProfilingLegacyPass(Options, IsCS);
369392 }
370393
371394 static InstrProfIncrementInst *castToIncrementInst(Instruction *Instr) {
414437 LoopInfo LI(DT);
415438 DenseMap> LoopPromotionCandidates;
416439
440 std::unique_ptr BFI;
441 if (Options.UseBFIInPromotion) {
442 std::unique_ptr BPI;
443 BPI.reset(new BranchProbabilityInfo(*F, LI, TLI));
444 BFI.reset(new BlockFrequencyInfo(*F, *BPI, LI));
445 }
446
417447 for (const auto &LoadStore : PromotionCandidates) {
418448 auto *CounterLoad = LoadStore.first;
419449 auto *CounterStore = LoadStore.second;
429459 // Do a post-order traversal of the loops so that counter updates can be
430460 // iteratively hoisted outside the loop nest.
431461 for (auto *Loop : llvm::reverse(Loops)) {
432 PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI);
462 PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI, BFI.get());
433463 Promoter.run(&TotalCountersPromoted);
434464 }
435465 }
680710 // Don't do this for Darwin. compiler-rt uses linker magic.
681711 if (TT.isOSDarwin())
682712 return false;
683
684713 // Use linker script magic to get data/cnts/name start/end.
685714 if (TT.isOSLinux() || TT.isOSFreeBSD() || TT.isOSNetBSD() ||
686715 TT.isOSFuchsia() || TT.isPS4CPU() || TT.isOSWindows())
9841013 }
9851014
9861015 void InstrProfiling::emitInitialization() {
987 // Create variable for profile name.
988 createProfileFileNameVar(*M, Options.InstrProfileOutput);
1016 // Create ProfileFileName variable. Don't don't this for the
1017 // context-sensitive instrumentation lowering: This lowering is after
1018 // LTO/ThinLTO linking. Pass PGOInstrumentationGenCreateVar should
1019 // have already create the variable before LTO/ThinLTO linking.
1020 if (!IsCS)
1021 createProfileFileNameVar(*M, Options.InstrProfileOutput);
9891022 Function *RegisterF = M->getFunction(getInstrProfRegFuncsName());
9901023 if (!RegisterF)
9911024 return;
6464 #include "llvm/Analysis/IndirectCallVisitor.h"
6565 #include "llvm/Analysis/LoopInfo.h"
6666 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
67 #include "llvm/Analysis/ProfileSummaryInfo.h"
6768 #include "llvm/IR/Attributes.h"
6869 #include "llvm/IR/BasicBlock.h"
6970 #include "llvm/IR/CFG.h"
131132 STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile.");
132133 STATISTIC(NumOfPGOMissing, "Number of functions without profile.");
133134 STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations.");
135 STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO.");
136 STATISTIC(NumOfCSPGOSelectInsts,
137 "Number of select instruction instrumented in CSPGO.");
138 STATISTIC(NumOfCSPGOMemIntrinsics,
139 "Number of mem intrinsics instrumented in CSPGO.");
140 STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO.");
141 STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO.");
142 STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO.");
143 STATISTIC(NumOfCSPGOFunc,
144 "Number of functions having valid profile counts in CSPGO.");
145 STATISTIC(NumOfCSPGOMismatch,
146 "Number of functions having mismatch profile in CSPGO.");
147 STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO.");
134148
135149 // Command line option to specify the file to read profile from. This is
136150 // mainly used for testing.
382396 public:
383397 static char ID;
384398
385 PGOInstrumentationGenLegacyPass() : ModulePass(ID) {
399 PGOInstrumentationGenLegacyPass(bool IsCS = false)
400 : ModulePass(ID), IsCS(IsCS) {
386401 initializePGOInstrumentationGenLegacyPassPass(
387402 *PassRegistry::getPassRegistry());
388403 }
390405 StringRef getPassName() const override { return "PGOInstrumentationGenPass"; }
391406
392407 private:
408 // Is this is context-sensitive instrumentation.
409 bool IsCS;
393410 bool runOnModule(Module &M) override;
394411
395412 void getAnalysisUsage(AnalysisUsage &AU) const override {
402419 static char ID;
403420
404421 // Provide the profile filename as the parameter.
405 PGOInstrumentationUseLegacyPass(std::string Filename = "")
406 : ModulePass(ID), ProfileFileName(std::move(Filename)) {
422 PGOInstrumentationUseLegacyPass(std::string Filename = "", bool IsCS = false)
423 : ModulePass(ID), ProfileFileName(std::move(Filename)), IsCS(IsCS) {
407424 if (!PGOTestProfileFile.empty())
408425 ProfileFileName = PGOTestProfileFile;
409426 initializePGOInstrumentationUseLegacyPassPass(
414431
415432 private:
416433 std::string ProfileFileName;
434 // Is this is context-sensitive instrumentation use.
435 bool IsCS;
417436
418437 bool runOnModule(Module &M) override;
419438
420439 void getAnalysisUsage(AnalysisUsage &AU) const override {
440 AU.addRequired();
421441 AU.addRequired();
422442 }
443 };
444
445 class PGOInstrumentationGenCreateVarLegacyPass : public ModulePass {
446 public:
447 static char ID;
448 StringRef getPassName() const override {
449 return "PGOInstrumentationGenCreateVarPass";
450 }
451 PGOInstrumentationGenCreateVarLegacyPass(std::string CSInstrName = "")
452 : ModulePass(ID), InstrProfileOutput(CSInstrName) {
453 initializePGOInstrumentationGenCreateVarLegacyPassPass(
454 *PassRegistry::getPassRegistry());
455 }
456
457 private:
458 bool runOnModule(Module &M) override {
459 createProfileFileNameVar(M, InstrProfileOutput);
460 createIRLevelProfileFlagVar(M, true);
461 return false;
462 }
463 std::string InstrProfileOutput;
423464 };
424465
425466 } // end anonymous namespace
433474 INITIALIZE_PASS_END(PGOInstrumentationGenLegacyPass, "pgo-instr-gen",
434475 "PGO instrumentation.", false, false)
435476
436 ModulePass *llvm::createPGOInstrumentationGenLegacyPass() {
437 return new PGOInstrumentationGenLegacyPass();
477 ModulePass *llvm::createPGOInstrumentationGenLegacyPass(bool IsCS) {
478 return new PGOInstrumentationGenLegacyPass(IsCS);
438479 }
439480
440481 char PGOInstrumentationUseLegacyPass::ID = 0;
443484 "Read PGO instrumentation profile.", false, false)
444485 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
445486 INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
487 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
446488 INITIALIZE_PASS_END(PGOInstrumentationUseLegacyPass, "pgo-instr-use",
447489 "Read PGO instrumentation profile.", false, false)
448490
449 ModulePass *llvm::createPGOInstrumentationUseLegacyPass(StringRef Filename) {
450 return new PGOInstrumentationUseLegacyPass(Filename.str());
491 ModulePass *llvm::createPGOInstrumentationUseLegacyPass(StringRef Filename,
492 bool IsCS) {
493 return new PGOInstrumentationUseLegacyPass(Filename.str(), IsCS);
494 }
495
496 char PGOInstrumentationGenCreateVarLegacyPass::ID = 0;
497
498 INITIALIZE_PASS(PGOInstrumentationGenCreateVarLegacyPass,
499 "pgo-instr-gen-create-var",
500 "Create PGO instrumentation version variable for CSPGO.", false,
501 false)
502
503 ModulePass *
504 llvm::createPGOInstrumentationGenCreateVarLegacyPass(StringRef CSInstrName) {
505 return new PGOInstrumentationGenCreateVarLegacyPass(CSInstrName);
451506 }
452507
453508 namespace {
495550 private:
496551 Function &F;
497552
553 // Is this is context-sensitive instrumentation.
554 bool IsCS;
555
498556 // A map that stores the Comdat group in function F.
499557 std::unordered_multimap &ComdatMembers;
500558
534592 Function &Func,
535593 std::unordered_multimap &ComdatMembers,
536594 bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
537 BlockFrequencyInfo *BFI = nullptr)
538 : F(Func), ComdatMembers(ComdatMembers), ValueSites(IPVK_Last + 1),
539 SIVisitor(Func), MIVisitor(Func), MST(F, BPI, BFI) {
595 BlockFrequencyInfo *BFI = nullptr, bool IsCS = false)
596 : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers),
597 ValueSites(IPVK_Last + 1), SIVisitor(Func), MIVisitor(Func),
598 MST(F, BPI, BFI) {
540599 // This should be done before CFG hash computation.
541600 SIVisitor.countSelects(Func);
542601 MIVisitor.countMemIntrinsics(Func);
543 NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
544 NumOfPGOMemIntrinsics += MIVisitor.getNumOfMemIntrinsics();
545 ValueSites[IPVK_IndirectCallTarget] = findIndirectCalls(Func);
602 if (!IsCS) {
603 NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
604 NumOfPGOMemIntrinsics += MIVisitor.getNumOfMemIntrinsics();
605 NumOfPGOBB += MST.BBInfos.size();
606 ValueSites[IPVK_IndirectCallTarget] = findIndirectCalls(Func);
607 } else {
608 NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
609 NumOfCSPGOMemIntrinsics += MIVisitor.getNumOfMemIntrinsics();
610 NumOfCSPGOBB += MST.BBInfos.size();
611 }
546612 ValueSites[IPVK_MemOPSize] = MIVisitor.findMemIntrinsics(Func);
547613
548614 FuncName = getPGOFuncName(F);
551617 renameComdatFunction();
552618 LLVM_DEBUG(dumpInfo("after CFGMST"));
553619
554 NumOfPGOBB += MST.BBInfos.size();
555620 for (auto &E : MST.AllEdges) {
556621 if (E->Removed)
557622 continue;
558 NumOfPGOEdge++;
623 IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++;
559624 if (!E->InMST)
560 NumOfPGOInstrument++;
625 IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++;
561626 }
562627
563628 if (CreateGlobalVar)
596661 }
597662 }
598663 JC.update(Indexes);
664
665 // Hash format for context sensitive profile. Reserve 4 bits for other
666 // information.
599667 FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 |
600668 (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 |
669 //(uint64_t)ValueSites[IPVK_MemOPSize].size() << 40 |
601670 (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC();
671 // Reserve bit 60-63 for other information purpose.
672 FunctionHash &= 0x0FFFFFFFFFFFFFFF;
673 if (IsCS)
674 NamedInstrProfRecord::setCSFlagInHash(FunctionHash);
602675 LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n"
603676 << " CRC = " << JC.getCRC()
604677 << ", Selects = " << SIVisitor.getNumOfSelectInsts()
704777
705778 // For a critical edge, we have to split. Instrument the newly
706779 // created BB.
707 NumOfPGOSplit++;
780 IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++;
708781 LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index
709782 << " --> " << getBBInfo(DestBB).Index << "\n");
710783 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
719792 // Critical edges will be split.
720793 static void instrumentOneFunc(
721794 Function &F, Module *M, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFI,
722 std::unordered_multimap &ComdatMembers) {
795 std::unordered_multimap &ComdatMembers,
796 bool IsCS) {
723797 // Split indirectbr critical edges here before computing the MST rather than
724798 // later in getInstrBB() to avoid invalidating it.
725799 SplitIndirectBrCriticalEdges(F, BPI, BFI);
800
726801 FuncPGOInstrumentation FuncInfo(F, ComdatMembers, true, BPI,
727 BFI);
802 BFI, IsCS);
728803 unsigned NumCounters = FuncInfo.getNumCounters();
729804
730805 uint32_t I = 0;
851926 PGOUseFunc(Function &Func, Module *Modu,
852927 std::unordered_multimap &ComdatMembers,
853928 BranchProbabilityInfo *BPI = nullptr,
854 BlockFrequencyInfo *BFIin = nullptr)
929 BlockFrequencyInfo *BFIin = nullptr, bool IsCS = false)
855930 : F(Func), M(Modu), BFI(BFIin),
856 FuncInfo(Func, ComdatMembers, false, BPI, BFIin),
857 FreqAttr(FFA_Normal) {}
931 FuncInfo(Func, ComdatMembers, false, BPI, BFIin, IsCS),
932 FreqAttr(FFA_Normal), IsCS(IsCS) {}
858933
859934 // Read counts for the instrumented BB from profile.
860935 bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros);
9261001
9271002 // Function hotness info derived from profile.
9281003 FuncFreqAttr FreqAttr;
1004
1005 // Is to use the context sensitive profile.
1006 bool IsCS;
9291007
9301008 // Find the Instrumented BB and set the value.
9311009 void setInstrumentedCounts(const std::vector &CountFromProfile);
10201098 handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
10211099 auto Err = IPE.get();
10221100 bool SkipWarning = false;
1101 LLVM_DEBUG(dbgs() << "Error in reading profile for Func "
1102 << FuncInfo.FuncName << ": ");
10231103 if (Err == instrprof_error::unknown_function) {
1024 NumOfPGOMissing++;
1104 IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++;
10251105 SkipWarning = !PGOWarnMissing;
1106 LLVM_DEBUG(dbgs() << "unknown function");
10261107 } else if (Err == instrprof_error::hash_mismatch ||
10271108 Err == instrprof_error::malformed) {
1028 NumOfPGOMismatch++;
1109 IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++;
10291110 SkipWarning =
10301111 NoPGOWarnMismatch ||
10311112 (NoPGOWarnMismatchComdat &&
10321113 (F.hasComdat() ||
10331114 F.getLinkage() == GlobalValue::AvailableExternallyLinkage));
1115 LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")");
10341116 }
10351117
1118 LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n");
10361119 if (SkipWarning)
10371120 return;
10381121
1039 std::string Msg = IPE.message() + std::string(" ") + F.getName().str();
1122 std::string Msg = IPE.message() + std::string(" ") + F.getName().str() +
1123 std::string(" Hash = ") +
1124 std::to_string(FuncInfo.FunctionHash);
1125
10401126 Ctx.diagnose(
10411127 DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
10421128 });
10451131 ProfileRecord = std::move(Result.get());
10461132 std::vector &CountFromProfile = ProfileRecord.Counts;
10471133
1048 NumOfPGOFunc++;
1134 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
10491135 LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n");
10501136 uint64_t ValueSum = 0;
10511137 for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) {
10601146 getBBInfo(nullptr).UnknownCountInEdge = 2;
10611147
10621148 setInstrumentedCounts(CountFromProfile);
1149 #if 0
1150 ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS);
1151 #else
10631152 ProgramMaxCount = PGOReader->getMaximumFunctionCount();
1153 #endif
10641154 return true;
10651155 }
10661156
11651255 // Assign the scaled count values to the BB with multiple out edges.
11661256 void PGOUseFunc::setBranchWeights() {
11671257 // Generate MD_prof metadata for every branch instruction.
1168 LLVM_DEBUG(dbgs() << "\nSetting branch weights.\n");
1258 LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName()
1259 << " IsCS=" << IsCS << "\n");
11691260 for (auto &BB : F) {
11701261 Instruction *TI = BB.getTerminator();
11711262 if (TI->getNumSuccessors() < 2)
11731264 if (!(isa(TI) || isa(TI) ||
11741265 isa(TI)))
11751266 continue;
1267
11761268 if (getBBInfo(&BB).CountValue == 0)
11771269 continue;
11781270
13501442 }
13511443 }
13521444
1353 // Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
1354 // aware this is an ir_level profile so it can set the version flag.
1355 static void createIRLevelProfileFlagVariable(Module &M) {
1356 Type *IntTy64 = Type::getInt64Ty(M.getContext());
1357 uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF);
1358 auto IRLevelVersionVariable = new GlobalVariable(
1359 M, IntTy64, true, GlobalVariable::ExternalLinkage,
1360 Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)),
1361 INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR));
1362 IRLevelVersionVariable->setVisibility(GlobalValue::DefaultVisibility);
1363 Triple TT(M.getTargetTriple());
1364 if (!TT.supportsCOMDAT())
1365 IRLevelVersionVariable->setLinkage(GlobalValue::WeakAnyLinkage);
1366 else
1367 IRLevelVersionVariable->setComdat(M.getOrInsertComdat(
1368 StringRef(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR))));
1369 }
1370
13711445 // Collect the set of members for each Comdat in module M and store
13721446 // in ComdatMembers.
13731447 static void collectComdatMembers(
13881462
13891463 static bool InstrumentAllFunctions(
13901464 Module &M, function_ref LookupBPI,
1391 function_ref LookupBFI) {
1392 createIRLevelProfileFlagVariable(M);
1465 function_ref LookupBFI, bool IsCS) {
1466 // For the context-sensitve instrumentation, we should have a separated pass
1467 // (before LTO/ThinLTO linking) to create these variables.
1468 if (!IsCS)
1469 createIRLevelProfileFlagVar(M, /* IsCS */ false);
13931470 std::unordered_multimap ComdatMembers;
13941471 collectComdatMembers(M, ComdatMembers);
13951472
13981475 continue;
13991476 auto *BPI = LookupBPI(F);
14001477 auto *BFI = LookupBFI(F);
1401 instrumentOneFunc(F, &M, BPI, BFI, ComdatMembers);
1478 instrumentOneFunc(F, &M, BPI, BFI, ComdatMembers, IsCS);
14021479 }
14031480 return true;
14041481 }
14131490 auto LookupBFI = [this](Function &F) {
14141491 return &this->getAnalysis(F).getBFI();
14151492 };
1416 return InstrumentAllFunctions(M, LookupBPI, LookupBFI);
1493 return InstrumentAllFunctions(M, LookupBPI, LookupBFI, IsCS);
14171494 }
14181495
14191496 PreservedAnalyses PGOInstrumentationGen::run(Module &M,
14271504 return &FAM.getResult(F);
14281505 };
14291506
1430 if (!InstrumentAllFunctions(M, LookupBPI, LookupBFI))
1507 if (!InstrumentAllFunctions(M, LookupBPI, LookupBFI, IsCS))
14311508 return PreservedAnalyses::all();
14321509
14331510 return PreservedAnalyses::none();
14361513 static bool annotateAllFunctions(
14371514 Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName,
14381515 function_ref LookupBPI,
1439 function_ref LookupBFI) {
1516 function_ref LookupBFI, bool IsCS) {
14401517 LLVM_DEBUG(dbgs() << "Read in profile counters: ");
14411518 auto &Ctx = M.getContext();
14421519 // Read the counter array from file.
14571534 StringRef("Cannot get PGOReader")));
14581535 return false;
14591536 }
1537 #if 0
1538 if (!PGOReader->hasCSIRLevelProfile() && IsCS)
1539 return false;
1540 #endif
1541
14601542 // TODO: might need to change the warning once the clang option is finalized.
14611543 if (!PGOReader->isIRLevelProfile()) {
14621544 Ctx.diagnose(DiagnosticInfoPGOProfile(
14761558 // Split indirectbr critical edges here before computing the MST rather than
14771559 // later in getInstrBB() to avoid invalidating it.
14781560 SplitIndirectBrCriticalEdges(F, BPI, BFI);
1479 PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI);
1561 PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI, IsCS);
14801562 bool AllZeros = false;
14811563 if (!Func.readCounters(PGOReader.get(), AllZeros))
14821564 continue;
15241606 }
15251607 }
15261608 }
1609 #if 0
1610 M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()),
1611 IsCS ? ProfileSummary::PSK_CSInstr
1612 : ProfileSummary::PSK_Instr);
1613 #else
15271614 M.setProfileSummary(PGOReader->getSummary().getMD(M.getContext()));
1615 #endif
1616
15281617 // Set function hotness attribute from the profile.
15291618 // We have to apply these attributes at the end because their presence
15301619 // can affect the BranchProbabilityInfo of any callers, resulting in an
15431632 }
15441633
15451634 PGOInstrumentationUse::PGOInstrumentationUse(std::string Filename,
1546 std::string RemappingFilename)
1635 std::string RemappingFilename,
1636 bool IsCS)
15471637 : ProfileFileName(std::move(Filename)),
1548 ProfileRemappingFileName(std::move(RemappingFilename)) {
1638 ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS) {
15491639 if (!PGOTestProfileFile.empty())
15501640 ProfileFileName = PGOTestProfileFile;
15511641 if (!PGOTestProfileRemappingFile.empty())
15651655 };
15661656
15671657 if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName,
1568 LookupBPI, LookupBFI))
1658 LookupBPI, LookupBFI, IsCS))
15691659 return PreservedAnalyses::all();
15701660
15711661 return PreservedAnalyses::none();
15821672 return &this->getAnalysis(F).getBFI();
15831673 };
15841674
1585 return annotateAllFunctions(M, ProfileFileName, "", LookupBPI, LookupBFI);
1675 return annotateAllFunctions(M, ProfileFileName, "", LookupBPI, LookupBFI,
1676 IsCS);
15861677 }
15871678
15881679 static std::string getSimpleNodeName(const BasicBlock *Node) {
208208 static std::string OptRemarksFilename;
209209 static bool OptRemarksWithHotness = false;
210210
211 // Context sensitive PGO options.
212 static std::string cs_profile_path;
213 static bool cs_pgo_gen = false;
214
211215 static void process_plugin_option(const char *opt_)
212216 {
213217 if (opt_ == nullptr)
267271 } else if (opt == "disable-verify") {
268272 DisableVerify = true;
269273 } else if (opt.startswith("sample-profile=")) {
270 sample_profile= opt.substr(strlen("sample-profile="));
274 sample_profile = opt.substr(strlen("sample-profile="));
275 } else if (opt == "cs-profile-generate") {
276 cs_pgo_gen = true;
277 } else if (opt.startswith("cs-profile-path=")) {
278 cs_profile_path = opt.substr(strlen("cs-profile-path="));
271279 } else if (opt == "new-pass-manager") {
272280 new_pass_manager = true;
273281 } else if (opt == "debug-pass-manager") {
890898
891899 if (!options::sample_profile.empty())
892900 Conf.SampleProfile = options::sample_profile;
901
902 if (!options::cs_profile_path.empty())
903 Conf.CSIRProfile = options::cs_profile_path;
904 Conf.RunCSIRInstr = options::cs_pgo_gen;
893905
894906 Conf.DwoDir = options::dwo_dir;
895907
102102 static cl::opt
103103 SamplePGOFile("lto-sample-profile-file",
104104 cl::desc("Specify a SamplePGO profile file"));
105
106 static cl::opt
107 CSPGOFile("lto-cspgo-profile-file",
108 cl::desc("Specify a context sensitive PGO profile file"));
109
110 static cl::opt
111 RunCSIRInstr("lto-cspgo-gen",
112 cl::desc("Run PGO context sensitive IR instrumentation"),
113 cl::init(false), cl::Hidden);
105114
106115 static cl::opt
107116 UseNewPM("use-new-pm",
213222 Conf.RemarksWithHotness = OptRemarksWithHotness;
214223
215224 Conf.SampleProfile = SamplePGOFile;
225 Conf.CSIRProfile = CSPGOFile;
226 Conf.RunCSIRInstr = RunCSIRInstr;
216227
217228 // Run a custom pipeline, if asked for.
218229 Conf.OptPipeline = OptPipeline;