124#include <unordered_map>
132#define DEBUG_TYPE "pgo-instrumentation"
134STATISTIC(NumOfPGOInstrument,
"Number of edges instrumented.");
135STATISTIC(NumOfPGOSelectInsts,
"Number of select instruction instrumented.");
136STATISTIC(NumOfPGOMemIntrinsics,
"Number of mem intrinsics instrumented.");
139STATISTIC(NumOfPGOSplit,
"Number of critical edge splits.");
140STATISTIC(NumOfPGOFunc,
"Number of functions having valid profile counts.");
141STATISTIC(NumOfPGOMismatch,
"Number of functions having mismatch profile.");
142STATISTIC(NumOfPGOMissing,
"Number of functions without profile.");
143STATISTIC(NumOfPGOICall,
"Number of indirect call value instrumentations.");
144STATISTIC(NumOfCSPGOInstrument,
"Number of edges instrumented in CSPGO.");
146 "Number of select instruction instrumented in CSPGO.");
148 "Number of mem intrinsics instrumented in CSPGO.");
150STATISTIC(NumOfCSPGOBB,
"Number of basic-blocks in CSPGO.");
151STATISTIC(NumOfCSPGOSplit,
"Number of critical edge splits in CSPGO.");
153 "Number of functions having valid profile counts in CSPGO.");
155 "Number of functions having mismatch profile in CSPGO.");
156STATISTIC(NumOfCSPGOMissing,
"Number of functions without profile in CSPGO.");
157STATISTIC(NumCoveredBlocks,
"Number of basic blocks that were executed");
164 cl::desc(
"Specify the path of profile data file. This is"
165 "mainly for test purpose."));
169 cl::desc(
"Specify the path of profile remapping file. This is mainly for "
176 cl::desc(
"Disable Value Profiling"));
182 cl::desc(
"Max number of annotations for a single indirect "
189 cl::desc(
"Max number of preicise value annotations for a single memop"
196 cl::desc(
"Append function hash to the name of COMDAT function to avoid "
197 "function hash mismatch due to the preinliner"));
204 cl::desc(
"Use this option to turn on/off "
205 "warnings about missing profile data for "
212 cl::desc(
"Use this option to turn off/on "
213 "warnings about profile cfg mismatch."));
220 cl::desc(
"The option is used to turn on/off "
221 "warnings about hash mismatch for comdat "
222 "or weak functions."));
228 cl::desc(
"Use this option to turn on/off SELECT "
229 "instruction instrumentation. "));
234 cl::desc(
"A boolean option to show CFG dag or text "
235 "with raw profile counts from "
236 "profile data. See also option "
237 "-pgo-view-counts. To limit graph "
238 "display to only one function, use "
239 "filtering option -view-bfi-func-name."),
247 cl::desc(
"Use this option to turn on/off "
248 "memory intrinsic size profiling."));
253 cl::desc(
"When this option is on, the annotated "
254 "branch probability will be emitted as "
255 "optimization remarks: -{Rpass|"
256 "pass-remarks}=pgo-instrumentation"));
260 cl::desc(
"Force to instrument function entry basicblock."));
265 "Use this option to enable function entry coverage instrumentation."));
268 "pgo-block-coverage",
269 cl::desc(
"Use this option to enable basic block coverage instrumentation"));
273 cl::desc(
"Create a dot file of CFGs with block "
274 "coverage inference information"));
277 "pgo-temporal-instrumentation",
278 cl::desc(
"Use this option to enable temporal instrumentation"));
282 cl::desc(
"Fix function entry count in profile use."));
286 cl::desc(
"Print out the non-match BFI count if a hot raw profile count "
287 "becomes non-hot, or a cold raw profile count becomes hot. "
288 "The print is enabled under -Rpass-analysis=pgo, or "
289 "internal option -pass-remakrs-analysis=pgo."));
293 cl::desc(
"Print out mismatched BFI counts after setting profile metadata "
294 "The print is enabled under -Rpass-analysis=pgo, or "
295 "internal option -pass-remakrs-analysis=pgo."));
299 cl::desc(
"Set the threshold for pgo-verify-bfi: only print out "
300 "mismatched BFI if the difference percentage is greater than "
301 "this value (in percentage)."));
305 cl::desc(
"Set the threshold for pgo-verify-bfi: skip the counts whose "
306 "profile count value is below."));
311 cl::desc(
"Trace the hash of the function with this name."));
315 cl::desc(
"Do not instrument functions smaller than this threshold."));
319 cl::desc(
"Do not instrument functions with the number of critical edges "
320 " greater than this threshold."));
356 return std::string();
361 return std::string();
373 else if (CV->
isOne())
385#define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr,
392 const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR));
394 uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF);
396 ProfileVersion |= VARIANT_MASK_CSIR_PROF;
398 ProfileVersion |= VARIANT_MASK_INSTR_ENTRY;
400 ProfileVersion |= VARIANT_MASK_DBG_CORRELATE;
403 VARIANT_MASK_BYTE_COVERAGE | VARIANT_MASK_FUNCTION_ENTRY_ONLY;
405 ProfileVersion |= VARIANT_MASK_BYTE_COVERAGE;
407 ProfileVersion |= VARIANT_MASK_TEMPORAL_PROF;
412 Triple TT(M.getTargetTriple());
413 if (TT.supportsCOMDAT()) {
415 IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName));
417 return IRLevelVersionVariable;
427enum VisitMode { VM_counting, VM_instrument, VM_annotate };
431struct SelectInstVisitor :
public InstVisitor<SelectInstVisitor> {
434 VisitMode
Mode = VM_counting;
435 unsigned *CurCtrIdx =
nullptr;
436 unsigned TotalNumCtrs = 0;
439 PGOUseFunc *UseFunc =
nullptr;
440 bool HasSingleByteCoverage;
442 SelectInstVisitor(
Function &Func,
bool HasSingleByteCoverage)
443 :
F(
Func), HasSingleByteCoverage(HasSingleByteCoverage) {}
445 void countSelects() {
455 void instrumentSelects(
unsigned *Ind,
unsigned TotalNC,
GlobalVariable *FNV,
457 Mode = VM_instrument;
459 TotalNumCtrs = TotalNC;
466 void annotateSelects(PGOUseFunc *UF,
unsigned *Ind) {
481 unsigned getNumOfSelectInsts()
const {
return NSIs; }
493 bool Removed =
false;
494 bool IsCritical =
false;
497 : SrcBB(Src), DestBB(Dest), Weight(
W) {}
500 std::string infoString()
const {
501 return (
Twine(Removed ?
"-" :
" ") + (InMST ?
" " :
"*") +
502 (IsCritical ?
"c" :
" ") +
" W=" +
Twine(Weight))
513 PGOBBInfo(
unsigned IX) : Group(this),
Index(IX) {}
516 std::string infoString()
const {
522template <
class Edge,
class BBInfo>
class FuncPGOInstrumentation {
530 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
534 void computeCFGHash();
535 void renameComdatFunction();
539 std::vector<std::vector<VPCandidateInfo>> ValueSites;
540 SelectInstVisitor SIVisitor;
541 std::string FuncName;
542 std::string DeprecatedFuncName;
551 const std::optional<BlockCoverageInference> BCI;
553 static std::optional<BlockCoverageInference>
554 constructBCI(
Function &Func,
bool HasSingleByteCoverage,
555 bool InstrumentFuncEntry) {
556 if (HasSingleByteCoverage)
563 void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs);
576 void dumpInfo(
StringRef Str =
"")
const {
578 " Hash: " +
Twine(FunctionHash) +
"\t" + Str);
581 FuncPGOInstrumentation(
583 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
586 bool InstrumentFuncEntry =
true,
bool HasSingleByteCoverage =
false)
587 :
F(
Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(
Func, TLI),
588 TLI(TLI), ValueSites(IPVK_Last + 1),
589 SIVisitor(
Func, HasSingleByteCoverage),
590 MST(
F, InstrumentFuncEntry, BPI,
BFI),
591 BCI(constructBCI(
Func, HasSingleByteCoverage, InstrumentFuncEntry)) {
593 BCI->viewBlockCoverageGraph();
595 SIVisitor.countSelects();
596 ValueSites[IPVK_MemOPSize] = VPC.
get(IPVK_MemOPSize);
598 NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
599 NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
601 ValueSites[IPVK_IndirectCallTarget] = VPC.
get(IPVK_IndirectCallTarget);
603 ValueSites[IPVK_VTableTarget] = VPC.
get(IPVK_VTableTarget);
605 NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
606 NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
613 if (!ComdatMembers.empty())
614 renameComdatFunction();
617 for (
const auto &E : MST.
allEdges()) {
620 IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++;
622 IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++;
635template <
class Edge,
class BBInfo>
636void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
637 std::vector<uint8_t> Indexes;
641 auto BI = findBBInfo(Succ);
645 for (
int J = 0; J < 4; J++)
646 Indexes.push_back((uint8_t)(
Index >> (J * 8)));
653 auto updateJCH = [&JCH](
uint64_t Num) {
658 updateJCH((
uint64_t)SIVisitor.getNumOfSelectInsts());
659 updateJCH((
uint64_t)ValueSites[IPVK_IndirectCallTarget].
size());
662 updateJCH(BCI->getInstrumentedBlocksHash());
672 FunctionHash &= 0x0FFFFFFFFFFFFFFF;
675 LLVM_DEBUG(
dbgs() <<
"Function Hash Computation for " <<
F.getName() <<
":\n"
676 <<
" CRC = " << JC.
getCRC()
677 <<
", Selects = " << SIVisitor.getNumOfSelectInsts()
678 <<
", Edges = " << MST.
numEdges() <<
", ICSites = "
679 << ValueSites[IPVK_IndirectCallTarget].size()
680 <<
", Memops = " << ValueSites[IPVK_MemOPSize].size()
681 <<
", High32 CRC = " << JCH.
getCRC()
682 <<
", Hash = " << FunctionHash <<
"\n";);
685 dbgs() <<
"Funcname=" <<
F.getName() <<
", Hash=" << FunctionHash
686 <<
" in building " <<
F.getParent()->getSourceFileName() <<
"\n";
692 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
704 for (
auto &&CM :
make_range(ComdatMembers.equal_range(
C))) {
705 assert(!isa<GlobalAlias>(CM.second));
706 Function *FM = dyn_cast<Function>(CM.second);
714template <
class Edge,
class BBInfo>
715void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() {
718 std::string OrigName =
F.getName().str();
719 std::string NewFuncName =
721 F.setName(
Twine(NewFuncName));
723 FuncName =
Twine(FuncName +
"." +
Twine(FunctionHash)).
str();
729 if (!
F.hasComdat()) {
731 NewComdat =
M->getOrInsertComdat(
StringRef(NewFuncName));
733 F.setComdat(NewComdat);
738 Comdat *OrigComdat =
F.getComdat();
739 std::string NewComdatName =
741 NewComdat =
M->getOrInsertComdat(
StringRef(NewComdatName));
744 for (
auto &&CM :
make_range(ComdatMembers.equal_range(OrigComdat))) {
746 cast<Function>(CM.second)->setComdat(NewComdat);
752template <
class Edge,
class BBInfo>
753void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs(
754 std::vector<BasicBlock *> &InstrumentBBs) {
757 if (BCI->shouldInstrumentBlock(BB))
758 InstrumentBBs.push_back(&BB);
763 std::vector<Edge *> EdgeList;
765 for (
const auto &E : MST.
allEdges())
766 EdgeList.push_back(E.get());
768 for (
auto &E : EdgeList) {
771 InstrumentBBs.push_back(InstrBB);
777template <
class Edge,
class BBInfo>
778BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
779 if (E->InMST || E->Removed)
785 if (SrcBB ==
nullptr)
787 if (DestBB ==
nullptr)
802 return canInstrument(SrcBB);
804 return canInstrument(DestBB);
813 dbgs() <<
"Fail to split critical edge: not instrument this edge.\n");
818 IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++;
820 <<
" --> " << getBBInfo(DestBB).
Index <<
"\n");
822 MST.
addEdge(SrcBB, InstrBB, 0);
824 Edge &NewEdge1 = MST.
addEdge(InstrBB, DestBB, 0);
825 NewEdge1.InMST =
true;
828 return canInstrument(InstrBB);
844 if (!isa<IntrinsicInst>(OrigCall)) {
847 std::optional<OperandBundleUse> ParentFunclet =
855 if (!BlockColors.
empty()) {
856 const ColorVector &CV = BlockColors.
find(OrigCall->getParent())->second;
857 assert(CV.
size() == 1 &&
"non-unique color for block!");
870 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
878 FuncPGOInstrumentation<PGOEdge, PGOBBInfo> FuncInfo(
882 auto Name = FuncInfo.FuncNameVar;
884 FuncInfo.FunctionHash);
886 auto &EntryBB =
F.getEntryBlock();
887 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
896 std::vector<BasicBlock *> InstrumentBBs;
897 FuncInfo.getInstrumentBBs(InstrumentBBs);
898 unsigned NumCounters =
899 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
914 for (
auto &Instr : BB)
915 if (
auto *CS = dyn_cast<CallBase>(&Instr)) {
916 if ((CS->getCalledFunction() &&
917 CS->getCalledFunction()->isIntrinsic()) ||
918 dyn_cast<InlineAsm>(CS->getCalledOperand()))
925 Visit([&TotalNrCallsites](
auto *) { ++TotalNrCallsites; });
929 Visit([&](
auto *CB) {
941 auto &EntryBB =
F.getEntryBlock();
942 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
951 for (
auto *InstrBB : InstrumentBBs) {
954 "Cannot get the Instrumentation point");
959 ? Intrinsic::instrprof_cover
960 : Intrinsic::instrprof_increment),
965 FuncInfo.SIVisitor.instrumentSelects(&
I, NumCounters, FuncInfo.FuncNameVar,
966 FuncInfo.FunctionHash);
972 NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size();
979 if (
F.hasPersonalityFn() &&
984 for (
uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) {
985 unsigned SiteIndex = 0;
991 <<
" site: CallSite Index = " << SiteIndex <<
"\n");
995 "Cannot get the Instrumentation point");
997 Value *ToProfile =
nullptr;
998 if (Cand.V->getType()->isIntegerTy())
1000 else if (Cand.V->getType()->isPointerTy())
1002 assert(ToProfile &&
"value profiling Value is of unexpected type");
1008 {FuncInfo.FuncNameVar, Builder.
getInt64(FuncInfo.FunctionHash),
1018struct PGOUseEdge :
public PGOEdge {
1019 using PGOEdge::PGOEdge;
1021 std::optional<uint64_t> Count;
1027 std::string infoString()
const {
1029 return PGOEdge::infoString();
1030 return (
Twine(PGOEdge::infoString()) +
" Count=" +
Twine(*Count)).str();
1037struct PGOUseBBInfo :
public PGOBBInfo {
1038 std::optional<uint64_t> Count;
1039 int32_t UnknownCountInEdge = 0;
1040 int32_t UnknownCountOutEdge = 0;
1041 DirectEdges InEdges;
1042 DirectEdges OutEdges;
1044 PGOUseBBInfo(
unsigned IX) : PGOBBInfo(IX) {}
1050 std::string infoString()
const {
1052 return PGOBBInfo::infoString();
1053 return (
Twine(PGOBBInfo::infoString()) +
" Count=" +
Twine(*Count)).str();
1057 void addOutEdge(PGOUseEdge *E) {
1058 OutEdges.push_back(E);
1059 UnknownCountOutEdge++;
1063 void addInEdge(PGOUseEdge *E) {
1064 InEdges.push_back(E);
1065 UnknownCountInEdge++;
1074 for (
const auto &E : Edges) {
1088 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
1091 bool HasSingleByteCoverage)
1092 :
F(
Func),
M(Modu),
BFI(BFIin), PSI(PSI),
1093 FuncInfo(
Func, TLI, ComdatMembers,
false, BPI, BFIin, IsCS,
1094 InstrumentFuncEntry, HasSingleByteCoverage),
1095 FreqAttr(FFA_Normal), IsCS(IsCS) {}
1097 void handleInstrProfError(
Error Err,
uint64_t MismatchedFuncSum);
1104 void populateCounters();
1113 void annotateValueSites();
1116 void annotateValueSites(
uint32_t Kind);
1119 void annotateIrrLoopHeaderWeights();
1122 enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot };
1125 FuncFreqAttr getFuncFreqAttr()
const {
return FreqAttr; }
1134 PGOUseBBInfo &getBBInfo(
const BasicBlock *BB)
const {
1135 return FuncInfo.getBBInfo(BB);
1139 PGOUseBBInfo *findBBInfo(
const BasicBlock *BB)
const {
1140 return FuncInfo.findBBInfo(BB);
1145 void dumpInfo(
StringRef Str =
"")
const { FuncInfo.dumpInfo(Str); }
1147 uint64_t getProgramMaxCount()
const {
return ProgramMaxCount; }
1156 FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> FuncInfo;
1172 FuncFreqAttr FreqAttr;
1178 bool setInstrumentedCounts(
const std::vector<uint64_t> &CountFromProfile);
1191 FreqAttr = FFA_Cold;
1199 const FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> &FuncInfo) {
1203 for (
const auto &E : FuncInfo.MST.allEdges()) {
1208 PGOUseBBInfo &SrcInfo = FuncInfo.getBBInfo(SrcBB);
1209 PGOUseBBInfo &DestInfo = FuncInfo.getBBInfo(DestBB);
1210 SrcInfo.addOutEdge(E.get());
1211 DestInfo.addInEdge(E.get());
1217bool PGOUseFunc::setInstrumentedCounts(
1218 const std::vector<uint64_t> &CountFromProfile) {
1220 std::vector<BasicBlock *> InstrumentBBs;
1221 FuncInfo.getInstrumentBBs(InstrumentBBs);
1225 unsigned NumCounters =
1226 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
1229 if (NumCounters != CountFromProfile.size()) {
1232 auto *FuncEntry = &*
F.begin();
1237 uint64_t CountValue = CountFromProfile[
I++];
1238 PGOUseBBInfo &
Info = getBBInfo(InstrBB);
1242 if (InstrBB == FuncEntry && CountValue == 0)
1244 Info.setBBInfoCount(CountValue);
1246 ProfileCountSize = CountFromProfile.size();
1250 auto setEdgeCount = [
this](PGOUseEdge *E,
uint64_t Value) ->
void {
1251 E->setEdgeCount(
Value);
1252 this->getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1253 this->getBBInfo(E->DestBB).UnknownCountInEdge--;
1259 for (
const auto &E : FuncInfo.MST.allEdges()) {
1260 if (E->Removed || E->InMST)
1263 PGOUseBBInfo &SrcInfo = getBBInfo(SrcBB);
1267 if (SrcInfo.Count && SrcInfo.OutEdges.size() == 1)
1268 setEdgeCount(E.get(), *SrcInfo.Count);
1271 PGOUseBBInfo &DestInfo = getBBInfo(DestBB);
1274 if (DestInfo.Count && DestInfo.InEdges.size() == 1)
1275 setEdgeCount(E.get(), *DestInfo.Count);
1281 setEdgeCount(E.get(), 0);
1288void PGOUseFunc::setEdgeCount(DirectEdges &Edges,
uint64_t Value) {
1289 for (
auto &E : Edges) {
1292 E->setEdgeCount(
Value);
1294 getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1295 getBBInfo(E->DestBB).UnknownCountInEdge--;
1303 const char MetadataName[] =
"instr_prof_hash_mismatch";
1306 auto *Existing =
F.getMetadata(LLVMContext::MD_annotation);
1308 MDTuple *Tuple = cast<MDTuple>(Existing);
1309 for (
const auto &
N : Tuple->
operands()) {
1310 if (
N.equalsStr(MetadataName))
1319 F.setMetadata(LLVMContext::MD_annotation, MD);
1322void PGOUseFunc::handleInstrProfError(
Error Err,
uint64_t MismatchedFuncSum) {
1324 auto &Ctx =
M->getContext();
1325 auto Err = IPE.
get();
1326 bool SkipWarning =
false;
1328 << FuncInfo.FuncName <<
": ");
1329 if (Err == instrprof_error::unknown_function) {
1330 IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++;
1333 }
else if (Err == instrprof_error::hash_mismatch ||
1334 Err == instrprof_error::malformed) {
1335 IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++;
1341 LLVM_DEBUG(
dbgs() <<
"hash mismatch (hash= " << FuncInfo.FunctionHash
1342 <<
" skip=" << SkipWarning <<
")");
1352 IPE.
message() + std::string(
" ") +
F.getName().str() +
1353 std::string(
" Hash = ") + std::to_string(FuncInfo.FunctionHash) +
1354 std::string(
" up to ") + std::to_string(MismatchedFuncSum) +
1355 std::string(
" count discarded");
1367 auto &Ctx =
M->getContext();
1370 FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName,
1371 &MismatchedFuncSum);
1373 handleInstrProfError(std::move(E), MismatchedFuncSum);
1376 ProfileRecord = std::move(
Result.get());
1381 std::vector<uint64_t> &CountFromProfile = ProfileRecord.
Counts;
1383 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
1387 for (
unsigned I = 0, S = CountFromProfile.size();
I < S;
I++) {
1389 ValueSum += CountFromProfile[
I];
1391 AllZeros = (ValueSum == 0);
1395 getBBInfo(
nullptr).UnknownCountOutEdge = 2;
1396 getBBInfo(
nullptr).UnknownCountInEdge = 2;
1398 if (!setInstrumentedCounts(CountFromProfile)) {
1400 dbgs() <<
"Inconsistent number of counts, skipping this function");
1402 M->getName().data(),
1403 Twine(
"Inconsistent number of counts in ") +
F.getName().str() +
1404 Twine(
": the profile may be stale or there is a function name "
1416 FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName,
1417 &MismatchedFuncSum);
1418 if (
auto Err =
Result.takeError()) {
1419 handleInstrProfError(std::move(Err), MismatchedFuncSum);
1422 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
1424 std::vector<uint64_t> &CountsFromProfile =
Result.get().Counts;
1428 if (FuncInfo.BCI->shouldInstrumentBlock(BB))
1434 InverseDependencies;
1435 for (
auto &BB :
F) {
1436 for (
auto *Dep : FuncInfo.BCI->getDependencies(BB)) {
1438 InverseDependencies[Dep].
insert(&BB);
1443 std::stack<const BasicBlock *> CoveredBlocksToProcess;
1444 for (
auto &[BB, IsCovered] : Coverage)
1446 CoveredBlocksToProcess.push(BB);
1448 while (!CoveredBlocksToProcess.empty()) {
1449 auto *CoveredBlock = CoveredBlocksToProcess.top();
1450 assert(Coverage[CoveredBlock]);
1451 CoveredBlocksToProcess.pop();
1452 for (
auto *BB : InverseDependencies[CoveredBlock]) {
1457 CoveredBlocksToProcess.push(BB);
1465 F.setEntryCount(Coverage[&
F.getEntryBlock()] ? 10000 : 0);
1466 for (
auto &BB :
F) {
1475 Weights.
push_back((Coverage[Succ] || !Coverage[&BB]) ? 1 : 0);
1476 if (Weights.
size() >= 2)
1480 unsigned NumCorruptCoverage = 0;
1485 auto IsBlockDead = [&](
const BasicBlock &BB) -> std::optional<bool> {
1486 if (
auto C =
BFI.getBlockProfileCount(&BB))
1490 LLVM_DEBUG(
dbgs() <<
"Block Coverage: (Instrumented=*, Covered=X)\n");
1491 for (
auto &BB :
F) {
1492 LLVM_DEBUG(
dbgs() << (FuncInfo.BCI->shouldInstrumentBlock(BB) ?
"* " :
" ")
1493 << (Coverage[&BB] ?
"X " :
" ") <<
" " << BB.getName()
1499 if (Coverage[&BB] == IsBlockDead(BB).value_or(
false)) {
1501 dbgs() <<
"Found inconsistent block covearge for " << BB.getName()
1502 <<
": BCI=" << (Coverage[&BB] ?
"Covered" :
"Dead") <<
" BFI="
1503 << (IsBlockDead(BB).
value() ?
"Dead" :
"Covered") <<
"\n");
1504 ++NumCorruptCoverage;
1510 auto &Ctx =
M->getContext();
1512 M->getName().data(),
1513 Twine(
"Found inconsistent block coverage for function ") +
F.getName() +
1514 " in " +
Twine(NumCorruptCoverage) +
" blocks.",
1518 FuncInfo.BCI->viewBlockCoverageGraph(&Coverage);
1523void PGOUseFunc::populateCounters() {
1524 bool Changes =
true;
1525 unsigned NumPasses = 0;
1533 PGOUseBBInfo *UseBBInfo = findBBInfo(&BB);
1534 if (UseBBInfo ==
nullptr)
1536 if (!UseBBInfo->Count) {
1537 if (UseBBInfo->UnknownCountOutEdge == 0) {
1540 }
else if (UseBBInfo->UnknownCountInEdge == 0) {
1545 if (UseBBInfo->Count) {
1546 if (UseBBInfo->UnknownCountOutEdge == 1) {
1552 if (*UseBBInfo->Count > OutSum)
1553 Total = *UseBBInfo->Count - OutSum;
1554 setEdgeCount(UseBBInfo->OutEdges,
Total);
1557 if (UseBBInfo->UnknownCountInEdge == 1) {
1560 if (*UseBBInfo->Count > InSum)
1561 Total = *UseBBInfo->Count - InSum;
1562 setEdgeCount(UseBBInfo->InEdges,
Total);
1569 LLVM_DEBUG(
dbgs() <<
"Populate counts in " << NumPasses <<
" passes.\n");
1573 for (
auto &BB :
F) {
1574 auto BI = findBBInfo(&BB);
1577 assert(BI->Count &&
"BB count is not valid");
1582 for (
auto &BB :
F) {
1583 auto BI = findBBInfo(&BB);
1586 FuncMaxCount = std::max(FuncMaxCount, *BI->Count);
1596 FuncInfo.SIVisitor.annotateSelects(
this, &CountPosition);
1597 assert(CountPosition == ProfileCountSize);
1599 LLVM_DEBUG(FuncInfo.dumpInfo(
"after reading profile."));
1603void PGOUseFunc::setBranchWeights() {
1605 LLVM_DEBUG(
dbgs() <<
"\nSetting branch weights for func " <<
F.getName()
1606 <<
" IsCS=" << IsCS <<
"\n");
1607 for (
auto &BB :
F) {
1611 if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) ||
1612 isa<IndirectBrInst>(TI) || isa<InvokeInst>(TI) ||
1613 isa<CallBrInst>(TI)))
1616 const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB);
1617 if (!*BBCountInfo.Count)
1621 unsigned Size = BBCountInfo.OutEdges.size();
1624 for (
unsigned s = 0; s <
Size; s++) {
1625 const PGOUseEdge *E = BBCountInfo.OutEdges[s];
1628 if (DestBB ==
nullptr)
1632 if (EdgeCount > MaxCount)
1633 MaxCount = EdgeCount;
1634 EdgeCounts[SuccNum] = EdgeCount;
1643 auto &Ctx =
M->getContext();
1645 M->getName().data(),
1646 Twine(
"Profile in ") +
F.getName().str() +
1647 Twine(
" partially ignored") +
1648 Twine(
", possibly due to the lack of a return path."),
1656 if (isa<IndirectBrInst>(Pred->getTerminator()))
1662void PGOUseFunc::annotateIrrLoopHeaderWeights() {
1663 LLVM_DEBUG(
dbgs() <<
"\nAnnotating irreducible loop header weights.\n");
1665 for (
auto &BB :
F) {
1671 const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB);
1677void SelectInstVisitor::instrumentOneSelectInst(
SelectInst &SI) {
1681 auto *Step = Builder.CreateZExt(
SI.getCondition(), Int64Ty);
1684 {FuncNameVar, Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs),
1685 Builder.getInt32(*CurCtrIdx), Step});
1689void SelectInstVisitor::annotateOneSelectInst(
SelectInst &SI) {
1690 std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts;
1691 assert(*CurCtrIdx < CountFromProfile.size() &&
1692 "Out of bound access of counters");
1694 SCounts[0] = CountFromProfile[*CurCtrIdx];
1697 auto BI = UseFunc->findBBInfo(
SI.getParent());
1699 TotalCount = *BI->Count;
1701 SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0);
1702 uint64_t MaxCount = std::max(SCounts[0], SCounts[1]);
1707void SelectInstVisitor::visitSelectInst(
SelectInst &SI) {
1711 if (
SI.getCondition()->getType()->isVectorTy())
1719 instrumentOneSelectInst(SI);
1722 annotateOneSelectInst(SI);
1730void PGOUseFunc::annotateValueSites() {
1738 annotateValueSites(Kind);
1742void PGOUseFunc::annotateValueSites(
uint32_t Kind) {
1743 assert(Kind <= IPVK_Last);
1744 unsigned ValueSiteIndex = 0;
1745 auto &ValueSites = FuncInfo.ValueSites[
Kind];
1747 if (NumValueSites != ValueSites.size()) {
1748 auto &Ctx =
M->getContext();
1750 M->getName().data(),
1751 Twine(
"Inconsistent number of value sites for ") +
1754 Twine(
"\", possibly due to the use of a stale profile."),
1760 LLVM_DEBUG(
dbgs() <<
"Read one value site profile (kind = " << Kind
1761 <<
"): Index = " << ValueSiteIndex <<
" out of "
1762 << NumValueSites <<
"\n");
1775 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
1780 ComdatMembers.insert(std::make_pair(
C, &
F));
1782 if (
Comdat *
C = GV.getComdat())
1783 ComdatMembers.insert(std::make_pair(
C, &GV));
1785 if (
Comdat *
C = GA.getComdat())
1786 ComdatMembers.insert(std::make_pair(
C, &GA));
1791 if (
F.isDeclaration())
1796 unsigned NumCriticalEdges = 0;
1797 for (
auto &BB :
F) {
1806 <<
", NumCriticalEdges=" << NumCriticalEdges
1807 <<
" exceed the threshold. Skip PGO.\n");
1817 if (
F.hasFnAttribute(llvm::Attribute::Naked))
1819 if (
F.hasFnAttribute(llvm::Attribute::NoProfile))
1821 if (
F.hasFnAttribute(llvm::Attribute::SkipProfile))
1837 Triple TT(M.getTargetTriple());
1842 Twine(
"VTable value profiling is presently not "
1843 "supported for non-ELF object formats"),
1845 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
1851 auto &TLI = LookupTLI(
F);
1852 auto *BPI = LookupBPI(
F);
1853 auto *BFI = LookupBFI(
F);
1897 auto BFIEntryCount =
F.getEntryCount();
1898 assert(BFIEntryCount && (BFIEntryCount->getCount() > 0) &&
1899 "Invalid BFI Entrycount");
1903 for (
auto &BBI :
F) {
1906 if (!Func.findBBInfo(&BBI))
1909 CountValue = *Func.getBBInfo(&BBI).Count;
1910 BFICountValue = *BFICount;
1914 if (SumCount.isZero())
1918 "Incorrect sum of BFI counts");
1921 double Scale = (SumCount / SumBFICount).convertToDouble();
1922 if (Scale < 1.001 && Scale > 0.999)
1927 if (NewEntryCount == 0)
1933 << NewEntryCount <<
"\n");
1950 unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0;
1951 for (
auto &BBI :
F) {
1955 CountValue = Func.getBBInfo(&BBI).Count.value_or(CountValue);
1962 BFICountValue = *BFICount;
1965 bool rawIsHot = CountValue >= HotCountThreshold;
1966 bool BFIIsHot = BFICountValue >= HotCountThreshold;
1968 bool ShowCount =
false;
1969 if (rawIsHot && !BFIIsHot) {
1970 Msg =
"raw-Hot to BFI-nonHot";
1972 }
else if (rawIsCold && BFIIsHot) {
1973 Msg =
"raw-Cold to BFI-Hot";
1982 uint64_t Diff = (BFICountValue >= CountValue)
1983 ? BFICountValue - CountValue
1984 : CountValue - BFICountValue;
1992 F.getSubprogram(), &BBI);
1994 <<
" Count=" <<
ore::NV(
"Count", CountValue)
1995 <<
" BFI_Count=" <<
ore::NV(
"Count", BFICountValue);
1997 Remark <<
" (" << Msg <<
")";
2004 F.getSubprogram(), &
F.getEntryBlock())
2005 <<
"In Func " <<
ore::NV(
"Function",
F.getName())
2006 <<
": Num_of_BB=" <<
ore::NV(
"Count", BBNum)
2007 <<
", Num_of_non_zerovalue_BB=" <<
ore::NV(
"Count", NonZeroBBNum)
2008 <<
", Num_of_mis_matching_BB=" <<
ore::NV(
"Count", BBMisMatchNum);
2020 auto &Ctx = M.getContext();
2023 ProfileRemappingFileName);
2024 if (
Error E = ReaderOrErr.takeError()) {
2032 std::unique_ptr<IndexedInstrProfReader> PGOReader =
2033 std::move(ReaderOrErr.get());
2039 if (!PGOReader->hasCSIRLevelProfile() && IsCS)
2043 if (!PGOReader->isIRLevelProfile()) {
2045 ProfileFileName.
data(),
"Not an IR level instrumentation profile"));
2048 if (PGOReader->functionEntryOnly()) {
2050 ProfileFileName.
data(),
2051 "Function entry profiles are not yet supported for optimization"));
2058 M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()),
2063 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
2065 std::vector<Function *> HotFunctions;
2066 std::vector<Function *> ColdFunctions;
2070 bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled();
2075 bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage();
2079 auto &TLI = LookupTLI(
F);
2080 auto *BPI = LookupBPI(
F);
2081 auto *BFI = LookupBFI(
F);
2082 if (!HasSingleByteCoverage) {
2088 PGOUseFunc Func(
F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS,
2089 InstrumentFuncEntry, HasSingleByteCoverage);
2090 if (HasSingleByteCoverage) {
2091 Func.populateCoverage(PGOReader.get());
2099 bool AllZeros =
false;
2100 if (!Func.readCounters(PGOReader.get(), AllZeros, PseudoKind))
2104 if (Func.getProgramMaxCount() != 0)
2105 ColdFunctions.push_back(&
F);
2110 if (
F.hasFnAttribute(Attribute::Cold))
2111 F.removeFnAttr(Attribute::Cold);
2114 F.addFnAttr(Attribute::Hot);
2117 Func.populateCounters();
2118 Func.setBranchWeights();
2119 Func.annotateValueSites();
2120 Func.annotateIrrLoopHeaderWeights();
2121 PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr();
2122 if (FreqAttr == PGOUseFunc::FFA_Cold)
2123 ColdFunctions.push_back(&
F);
2124 else if (FreqAttr == PGOUseFunc::FFA_Hot)
2125 HotFunctions.push_back(&
F);
2130 std::unique_ptr<BranchProbabilityInfo> NewBPI =
2131 std::make_unique<BranchProbabilityInfo>(
F, LI);
2132 std::unique_ptr<BlockFrequencyInfo> NewBFI =
2133 std::make_unique<BlockFrequencyInfo>(
F, *NewBPI, LI);
2137 dbgs() <<
"pgo-view-counts: " << Func.getFunc().getName() <<
"\n";
2138 NewBFI->print(
dbgs());
2148 ViewGraph(&Func,
Twine(
"PGORawCounts_") + Func.getFunc().getName());
2150 dbgs() <<
"pgo-view-raw-counts: " << Func.getFunc().getName() <<
"\n";
2177 for (
auto &
F : HotFunctions) {
2178 F->addFnAttr(Attribute::InlineHint);
2179 LLVM_DEBUG(
dbgs() <<
"Set inline attribute to function: " <<
F->getName()
2182 for (
auto &
F : ColdFunctions) {
2185 if (
F->hasFnAttribute(Attribute::Hot)) {
2186 auto &Ctx = M.getContext();
2187 std::string Msg = std::string(
"Function ") +
F->getName().str() +
2188 std::string(
" is annotated as a hot function but"
2189 " the profile is cold");
2194 F->addFnAttr(Attribute::Cold);
2195 LLVM_DEBUG(
dbgs() <<
"Set cold attribute to function: " <<
F->getName()
2202 std::string Filename, std::string RemappingFilename,
bool IsCS,
2204 : ProfileFileName(
std::
move(Filename)),
2205 ProfileRemappingFileName(
std::
move(RemappingFilename)), IsCS(IsCS),
2232 LookupTLI, LookupBPI, LookupBFI, PSI, IsCS))
2239 if (!
Node->getName().empty())
2240 return Node->getName().str();
2242 std::string SimpleNodeName;
2250 assert(MaxCount > 0 &&
"Bad max count");
2253 for (
const auto &ECI : EdgeCounts)
2266 if (BrCondStr.empty())
2278 std::string BranchProbStr;
2281 OS <<
" (total count : " << TotalCount <<
")";
2287 << BrCondStr <<
" is true with probability : " << BranchProbStr;
2306 return &
G->getFunc().front();
2329 return std::string(
G->getFunc().getName());
2337 PGOUseBBInfo *BI = Graph->findBBInfo(Node);
2339 if (BI && BI->Count)
2340 OS << *BI->Count <<
"\\l";
2348 if (!isa<SelectInst>(&
I))
2351 OS <<
"SELECT : { T = ";
2355 OS <<
"Unknown, F = Unknown }\\l";
2357 OS << TC <<
", F = " << FC <<
" }\\l";
This file implements a class to represent arbitrary precision integral constant values and operations...
This file contains the simple types necessary to represent the attributes associated with functions a...
This file finds the minimum set of blocks on a CFG that must be instrumented to infer execution cover...
Analysis containing CSE Info
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Given that RA is a live value
static BasicBlock * getInstrBB(CFGMST< Edge, BBInfo > &MST, Edge &E, const DenseSet< const BasicBlock * > &ExecBlocks)
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
static cl::opt< unsigned > ColdCountThreshold("mfs-count-threshold", cl::desc("Minimum number of times a block must be executed to be retained."), cl::init(1), cl::Hidden)
Module.h This file contains the declarations for the Module class.
static cl::opt< bool > PGOInstrumentEntry("pgo-instrument-entry", cl::init(false), cl::Hidden, cl::desc("Force to instrument function entry basicblock."))
static cl::opt< std::string > PGOTestProfileRemappingFile("pgo-test-profile-remapping-file", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path of profile remapping file. This is mainly for " "test purpose."))
static cl::opt< bool > PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden, cl::desc("Fix function entry count in profile use."))
static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI, BranchProbabilityInfo &NBPI)
static cl::opt< bool > PGOVerifyHotBFI("pgo-verify-hot-bfi", cl::init(false), cl::Hidden, cl::desc("Print out the non-match BFI count if a hot raw profile count " "becomes non-hot, or a cold raw profile count becomes hot. " "The print is enabled under -Rpass-analysis=pgo, or " "internal option -pass-remakrs-analysis=pgo."))
static void annotateFunctionWithHashMismatch(Function &F, LLVMContext &ctx)
static cl::opt< bool > PGOTemporalInstrumentation("pgo-temporal-instrumentation", cl::desc("Use this option to enable temporal instrumentation"))
static cl::opt< unsigned > PGOFunctionSizeThreshold("pgo-function-size-threshold", cl::Hidden, cl::desc("Do not instrument functions smaller than this threshold."))
static cl::opt< unsigned > MaxNumAnnotations("icp-max-annotations", cl::init(3), cl::Hidden, cl::desc("Max number of annotations for a single indirect " "call callsite"))
static bool skipPGOGen(const Function &F)
static void collectComdatMembers(Module &M, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers)
static cl::opt< unsigned > PGOVerifyBFICutoff("pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden, cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose " "profile count value is below."))
static cl::opt< std::string > PGOTraceFuncHash("pgo-trace-func-hash", cl::init("-"), cl::Hidden, cl::value_desc("function name"), cl::desc("Trace the hash of the function with this name."))
static void instrumentOneFunc(Function &F, Module *M, TargetLibraryInfo &TLI, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFI, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers, bool IsCS)
bool isValueProfilingDisabled()
static void populateEHOperandBundle(VPCandidateInfo &Cand, DenseMap< BasicBlock *, ColorVector > &BlockColors, SmallVectorImpl< OperandBundleDef > &OpBundles)
static cl::opt< bool > PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden, cl::desc("Use this option to turn on/off SELECT " "instruction instrumentation. "))
static cl::opt< bool > PGOFunctionEntryCoverage("pgo-function-entry-coverage", cl::Hidden, cl::desc("Use this option to enable function entry coverage instrumentation."))
static bool InstrumentAllFunctions(Module &M, function_ref< TargetLibraryInfo &(Function &)> LookupTLI, function_ref< BranchProbabilityInfo *(Function &)> LookupBPI, function_ref< BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS)
static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI, BranchProbabilityInfo &NBPI, uint64_t HotCountThreshold, uint64_t ColdCountThreshold)
static cl::opt< unsigned > PGOVerifyBFIRatio("pgo-verify-bfi-ratio", cl::init(2), cl::Hidden, cl::desc("Set the threshold for pgo-verify-bfi: only print out " "mismatched BFI if the difference percentage is greater than " "this value (in percentage)."))
static cl::opt< bool > DoComdatRenaming("do-comdat-renaming", cl::init(false), cl::Hidden, cl::desc("Append function hash to the name of COMDAT function to avoid " "function hash mismatch due to the preinliner"))
static cl::opt< unsigned > PGOFunctionCriticalEdgeThreshold("pgo-critical-edge-threshold", cl::init(20000), cl::Hidden, cl::desc("Do not instrument functions with the number of critical edges " " greater than this threshold."))
static void setupBBInfoEdges(const FuncPGOInstrumentation< PGOUseEdge, PGOUseBBInfo > &FuncInfo)
Set up InEdges/OutEdges for all BBs in the MST.
static cl::opt< std::string > PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path of profile data file. This is" "mainly for test purpose."))
static bool skipPGOUse(const Function &F)
static bool canRenameComdat(Function &F, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers)
static cl::opt< bool > PGOVerifyBFI("pgo-verify-bfi", cl::init(false), cl::Hidden, cl::desc("Print out mismatched BFI counts after setting profile metadata " "The print is enabled under -Rpass-analysis=pgo, or " "internal option -pass-remakrs-analysis=pgo."))
static cl::opt< bool > PGOBlockCoverage("pgo-block-coverage", cl::desc("Use this option to enable basic block coverage instrumentation"))
static uint64_t sumEdgeCount(const ArrayRef< PGOUseEdge * > Edges)
static cl::opt< bool > PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden, cl::desc("Use this option to turn on/off " "memory intrinsic size profiling."))
Function::ProfileCount ProfileCount
static cl::opt< bool > EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden, cl::desc("When this option is on, the annotated " "branch probability will be emitted as " "optimization remarks: -{Rpass|" "pass-remarks}=pgo-instrumentation"))
static cl::opt< unsigned > MaxNumMemOPAnnotations("memop-max-annotations", cl::init(4), cl::Hidden, cl::desc("Max number of preicise value annotations for a single memop" "intrinsic"))
static cl::opt< bool > DisableValueProfiling("disable-vp", cl::init(false), cl::Hidden, cl::desc("Disable Value Profiling"))
static std::string getSimpleNodeName(const BasicBlock *Node)
static cl::opt< bool > PGOViewBlockCoverageGraph("pgo-view-block-coverage-graph", cl::desc("Create a dot file of CFGs with block " "coverage inference information"))
static GlobalVariable * createIRLevelProfileFlagVar(Module &M, bool IsCS)
static bool isIndirectBrTarget(BasicBlock *BB)
static std::string getBranchCondString(Instruction *TI)
bool shouldInstrumentEntryBB()
static bool annotateAllFunctions(Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, vfs::FileSystem &FS, function_ref< TargetLibraryInfo &(Function &)> LookupTLI, function_ref< BranchProbabilityInfo *(Function &)> LookupBPI, function_ref< BlockFrequencyInfo *(Function &)> LookupBFI, ProfileSummaryInfo *PSI, bool IsCS)
static cl::opt< PGOViewCountsType > PGOViewRawCounts("pgo-view-raw-counts", cl::Hidden, cl::desc("A boolean option to show CFG dag or text " "with raw profile counts from " "profile data. See also option " "-pgo-view-counts. To limit graph " "display to only one function, use " "filtering option -view-bfi-func-name."), cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), clEnumValN(PGOVCT_Graph, "graph", "show a graph."), clEnumValN(PGOVCT_Text, "text", "show in text.")))
static const char * ValueProfKindDescr[]
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
FunctionAnalysisManager FAM
ModuleAnalysisManager MAM
This header defines various interfaces for pass management in LLVM.
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimple(Instruction *I)
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Defines the virtual file system interface vfs::FileSystem.
void printAsOperand(OutputBuffer &OB, Prec P=Prec::Default, bool StrictlyWorse=false) const
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
Class for arbitrary precision integers.
This templated class represents "all analyses that operate over <a particular IR unit>" (e....
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
LLVM Basic Block Representation.
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
std::optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
Conditional or Unconditional Branch instruction.
bool isConditional() const
Value * getCondition() const
Analysis pass which computes BranchProbabilityInfo.
Analysis providing branch probability information.
An union-find based Minimum Spanning Tree for CFG.
Edge & addEdge(BasicBlock *Src, BasicBlock *Dest, uint64_t W)
const std::vector< std::unique_ptr< Edge > > & allEdges() const
size_t bbInfoSize() const
BBInfo * findBBInfo(const BasicBlock *BB) const
BBInfo & getBBInfo(const BasicBlock *BB) const
void dumpEdges(raw_ostream &OS, const Twine &Message) const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Value * getCalledOperand() const
Predicate getPredicate() const
Return the predicate for this instruction.
StringRef getName() const
void setSelectionKind(SelectionKind Val)
SelectionKind getSelectionKind() const
This is the shared class of boolean and integer constants.
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
static Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Diagnostic information for the PGO profiler.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Base class for error info classes.
virtual std::string message() const
Return the error message as a string.
Lightweight error class with error context and mandatory checking.
Tagged union holding either a T or a Error.
Class to represent profile counts.
static GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
@ HiddenVisibility
The GV is hidden.
@ ExternalLinkage
Externally visible function.
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
@ AvailableExternallyLinkage
Available for inspection, not emission.
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
BasicBlock::iterator GetInsertPoint() const
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Reader for the indexed binary instrprof format.
static Expected< std::unique_ptr< IndexedInstrProfReader > > create(const Twine &Path, vfs::FileSystem &FS, const Twine &RemappingPath="")
Factory method to create an indexed reader.
Expected< InstrProfRecord > getInstrProfRecord(StringRef FuncName, uint64_t FuncHash, StringRef DeprecatedFuncName="", uint64_t *MismatchedFuncSum=nullptr)
Return the NamedInstrProfRecord associated with FuncName and FuncHash.
uint64_t getMaximumFunctionCount(bool UseCS)
Return the maximum of all known function counts.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Base class for instruction visitors.
void visit(Iterator Start, Iterator End)
RetTy visitSelectInst(SelectInst &I)
instrprof_error get() const
std::string message() const override
Return the error message as a string.
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
const BasicBlock * getParent() const
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
void update(ArrayRef< uint8_t > Data)
This is an important class for using LLVM in a threaded context.
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
MDString * createString(StringRef Str)
Return the given string as metadata.
MDNode * createIrrLoopHeaderWeight(uint64_t Weight)
Return metadata containing an irreducible loop header weight.
ArrayRef< MDOperand > operands() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
A Module instance is used to store all the information related to an LLVM module.
static bool isContextualIRPGOEnabled()
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
PGOInstrumentationUse(std::string Filename="", std::string RemappingFilename="", bool IsCS=false, IntrusiveRefCntPtr< vfs::FileSystem > FS=nullptr)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
void preserve()
Mark an analysis as preserved.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
uint64_t getOrCompColdCountThreshold() const
Returns ColdCountThreshold if set.
void refresh()
If no summary is present, attempt to refresh.
bool isColdCount(uint64_t C) const
Returns true if count C is considered cold.
bool isHotCount(uint64_t C) const
Returns true if count C is considered hot.
uint64_t getOrCompHotCountThreshold() const
Returns HotCountThreshold if set.
This class represents the LLVM 'select' instruction.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
constexpr bool empty() const
empty - Check if the string is empty.
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
Triple - Helper class for working with autoconf configuration names.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
std::string str() const
Return the twine contents as a std::string.
The instances of the Type class are immutable: once they are created, they are never changed.
void print(raw_ostream &O, bool IsForDebug=false, bool NoDetails=false) const
Print the current type.
static IntegerType * getInt64Ty(LLVMContext &C)
Value * getOperand(unsigned i) const
Utility analysis that determines what values are worth profiling.
std::vector< CandidateInfo > get(InstrProfValueKind Kind) const
returns a list of value profiling candidates of the given kind
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
An efficient, type-erasing, non-owning reference to a callable.
A raw_ostream that writes to an std::string.
The virtual file system interface.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
This file contains the declaration of the Comdat class, which represents a single COMDAT in LLVM.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
uint64_t getFuncHash(const FuncRecordTy *Record)
Return the structural hash associated with the function.
void checkExpectAnnotations(Instruction &I, const ArrayRef< uint32_t > ExistingWeights, bool IsFrontend)
checkExpectAnnotations - compares PGO counters to the thresholds used for llvm.expect and warns if th...
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< FuncNode * > Func
void write64le(void *P, uint64_t V)
IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
This is an optimization pass for GlobalISel generic memory operations.
void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
std::string getPGOFuncName(const Function &F, bool InLTO=false, uint64_t Version=INSTR_PROF_INDEX_VERSION)
Please use getIRPGOFuncName for LLVM IR instrumentation.
void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName)
Create the PGOFuncName meta data if PGOFuncName is different from function's raw name.
unsigned GetSuccessorNumber(const BasicBlock *BB, const BasicBlock *Succ)
Search for the specified successor of basic block BB and return its position in the terminator instru...
std::string getIRPGOFuncName(const Function &F, bool InLTO=false)
Function::ProfileCount ProfileCount
auto successors(const MachineBasicBlock *BB)
void handleAllErrors(Error E, HandlerTs &&... Handlers)
Behaves the same as handleErrors, except that by contract all errors must be handled by the given han...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
cl::opt< InstrProfCorrelator::ProfCorrelatorKind > ProfileCorrelate("profile-correlate", cl::desc("Use debug info or binary file to correlate profiles."), cl::init(InstrProfCorrelator::NONE), cl::values(clEnumValN(InstrProfCorrelator::NONE, "", "No profile correlation"), clEnumValN(InstrProfCorrelator::DEBUG_INFO, "debug-info", "Use debug info to correlate"), clEnumValN(InstrProfCorrelator::BINARY, "binary", "Use binary to correlate")))
DenseMap< BasicBlock *, ColorVector > colorEHFunclets(Function &F)
If an EH funclet personality is in use (see isFuncletEHPersonality), this will recompute which blocks...
cl::opt< bool > PGOWarnMissing
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
bool SplitIndirectBrCriticalEdges(Function &F, bool IgnoreBlocksWithoutPHI, BranchProbabilityInfo *BPI=nullptr, BlockFrequencyInfo *BFI=nullptr)
bool isScopedEHPersonality(EHPersonality Pers)
Returns true if this personality uses scope-style EH IR instructions: catchswitch,...
cl::opt< bool > DebugInfoCorrelate
void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
OperandBundleDefT< Value * > OperandBundleDef
cl::opt< std::string > ViewBlockFreqFuncName("view-bfi-func-name", cl::Hidden, cl::desc("The option to specify " "the name of the function " "whose CFG will be displayed."))
GlobalVariable * createPGOFuncNameVar(Function &F, StringRef PGOFuncName)
Create and return the global variable for function name used in PGO instrumentation.
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
auto reverse(ContainerTy &&C)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
cl::opt< bool > NoPGOWarnMismatch
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
cl::opt< PGOViewCountsType > PGOViewCounts("pgo-view-counts", cl::Hidden, cl::desc("A boolean option to show CFG dag or text with " "block profile counts and branch probabilities " "right after PGO profile annotation step. The " "profile counts are computed using branch " "probabilities from the runtime profile data and " "block frequency propagation algorithm. To view " "the raw counts from the profile, use option " "-pgo-view-raw-counts instead. To limit graph " "display to only one function, use filtering option " "-view-bfi-func-name."), cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), clEnumValN(PGOVCT_Graph, "graph", "show a graph."), clEnumValN(PGOVCT_Text, "text", "show in text.")))
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
static uint32_t scaleBranchCount(uint64_t Count, uint64_t Scale)
Scale an individual branch count.
void appendToCompilerUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.compiler.used list.
BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
ViewGraph - Emit a dot graph, run 'dot', run gv on the postscript file, then cleanup.
bool isCriticalEdge(const Instruction *TI, unsigned SuccNum, bool AllowIdenticalEdges=false)
Return true if the specified edge is a critical edge.
static uint64_t calculateCountScale(uint64_t MaxCount)
Calculate what to divide by to scale counts.
bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken=false)
Check if we can safely rename this Comdat function.
void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput)
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
auto predecessors(const MachineBasicBlock *BB)
void setProfMetadata(Module *M, Instruction *TI, ArrayRef< uint64_t > EdgeCounts, uint64_t MaxCount)
cl::opt< bool > EnableVTableValueProfiling("enable-vtable-value-profiling", cl::init(false), cl::desc("If true, the virtual table address will be instrumented to know " "the types of a C++ pointer. The information is used in indirect " "call promotion to do selective vtable-based comparison."))
SuccIterator< const Instruction, const BasicBlock > const_succ_iterator
cl::opt< bool > NoPGOWarnMismatchComdatWeak
Implement std::hash so that hash_code can be used in STL containers.
static constexpr roundingMode rmNearestTiesToEven
static const fltSemantics & IEEEdouble() LLVM_READNONE
DOTGraphTraits(bool isSimple=false)
static std::string getGraphName(const PGOUseFunc *G)
std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph)
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
static ChildIteratorType child_end(const NodeRef N)
static NodeRef getEntryNode(const PGOUseFunc *G)
static ChildIteratorType child_begin(const NodeRef N)
static nodes_iterator nodes_end(const PGOUseFunc *G)
static nodes_iterator nodes_begin(const PGOUseFunc *G)
Profiling information for a single function.
std::vector< uint64_t > Counts
CountPseudoKind getCountPseudoKind() const
uint32_t getNumValueSites(uint32_t ValueKind) const
Return the number of instrumented sites for ValueKind.
static void setCSFlagInHash(uint64_t &FuncHash)
Instruction * AnnotatedInst