llvm.org GIT mirror llvm / 8f580be
[PGO] Handle cases of failing to split critical edges Fix PR41279 where critical edges to EHPad are not split. The fix is to not instrument those critical edges. We used to be able to know the size of counters right after MST is computed. With this, we have to pre-collect the instrument BBs to know the size, and then instrument them. Differential Revision: https://reviews.llvm.org/D62439 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@361882 91177308-0d34-0410-b5e6-96231b3b80d8 Rong Xu 3 months ago
2 changed file(s) with 123 addition(s) and 44 deletion(s). Raw diff Collapse all Expand all
572572 // The Minimum Spanning Tree of function CFG.
573573 CFGMST MST;
574574
575 // Collect all the BBs that will be instrumented, and store them in
576 // InstrumentBBs.
577 void getInstrumentBBs(std::vector &InstrumentBBs);
578
575579 // Give an edge, find the BB that will be instrumented.
576580 // Return nullptr if there is no BB to be instrumented.
577581 BasicBlock *getInstrBB(Edge *E);
628632 if (CreateGlobalVar)
629633 FuncNameVar = createPGOFuncNameVar(F, FuncName);
630634 }
631
632 // Return the number of profile counters needed for the function.
633 unsigned getNumCounters() {
634 unsigned NumCounters = 0;
635 for (auto &E : this->MST.AllEdges) {
636 if (!E->InMST && !E->Removed)
637 NumCounters++;
638 }
639 return NumCounters + SIVisitor.getNumOfSelectInsts();
640 }
641635 };
642636
643637 } // end anonymous namespace
752746 }
753747 }
754748
749 // Collect all the BBs that will be instruments and return them in
750 // InstrumentBBs.
751 template
752 void FuncPGOInstrumentation::getInstrumentBBs(
753 std::vector &InstrumentBBs) {
754 // Use a worklist as we will update the vector during the iteration.
755 std::vector EdgeList;
756 EdgeList.reserve(MST.AllEdges.size());
757 for (auto &E : MST.AllEdges)
758 EdgeList.push_back(E.get());
759
760 for (auto &E : EdgeList) {
761 BasicBlock *InstrBB = getInstrBB(E);
762 if (InstrBB)
763 InstrumentBBs.push_back(InstrBB);
764 }
765 }
766
755767 // Given a CFG E to be instrumented, find which BB to place the instrumented
756768 // code. The function will split the critical edge if necessary.
757769 template
782794 << " --> " << getBBInfo(DestBB).Index << "\n");
783795 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
784796 BasicBlock *InstrBB = SplitCriticalEdge(TI, SuccNum);
785 assert(InstrBB && "Critical edge is not split");
786
797 if (!InstrBB) {
798 LLVM_DEBUG(
799 dbgs() << "Fail to split critical edge: not instrument this edge.\n");
800 return nullptr;
801 }
802 // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB.
803 MST.addEdge(SrcBB, InstrBB, 0);
804 // Second one: Add new edge of InstrBB->DestBB.
805 Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0);
806 NewEdge1.InMST = true;
787807 E->Removed = true;
808
788809 return InstrBB;
789810 }
790811
800821
801822 FuncPGOInstrumentation FuncInfo(F, ComdatMembers, true, BPI,
802823 BFI, IsCS);
803 unsigned NumCounters = FuncInfo.getNumCounters();
824 std::vector InstrumentBBs;
825 FuncInfo.getInstrumentBBs(InstrumentBBs);
826 unsigned NumCounters =
827 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
804828
805829 uint32_t I = 0;
806830 Type *I8PtrTy = Type::getInt8PtrTy(M->getContext());
807 for (auto &E : FuncInfo.MST.AllEdges) {
808 BasicBlock *InstrBB = FuncInfo.getInstrBB(E.get());
809 if (!InstrBB)
810 continue;
811
831 for (auto *InstrBB : InstrumentBBs) {
812832 IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt());
813833 assert(Builder.GetInsertPoint() != InstrBB->end() &&
814834 "Cannot get the Instrumentation point");
10381058 // edges and the BB. Return false on error.
10391059 bool PGOUseFunc::setInstrumentedCounts(
10401060 const std::vector &CountFromProfile) {
1061
1062 std::vector InstrumentBBs;
1063 FuncInfo.getInstrumentBBs(InstrumentBBs);
1064 unsigned NumCounters =
1065 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
10411066 // The number of counters here should match the number of counters
10421067 // in profile. Return if they mismatch.
1043 if (FuncInfo.getNumCounters() != CountFromProfile.size()) {
1068 if (NumCounters != CountFromProfile.size()) {
10441069 return false;
10451070 }
1046 // Use a worklist as we will update the vector during the iteration.
1047 std::vector WorkList;
1048 for (auto &E : FuncInfo.MST.AllEdges)
1049 WorkList.push_back(E.get());
1050
10511071 uint32_t I = 0;
1052 for (auto &E : WorkList) {
1053 BasicBlock *InstrBB = FuncInfo.getInstrBB(E);
1054 if (!InstrBB)
1055 continue;
1072 for (BasicBlock *InstrBB : InstrumentBBs) {
10561073 uint64_t CountValue = CountFromProfile[I++];
1057 if (!E->Removed) {
1058 getBBInfo(InstrBB).setBBInfoCount(CountValue);
1059 E->setEdgeCount(CountValue);
1060 continue;
1074 UseBBInfo &Info = getBBInfo(InstrBB);
1075 Info.setBBInfoCount(CountValue);
1076 // If only one in-edge, the edge profile count should be the same as BB
1077 // profile count.
1078 if (Info.InEdges.size() == 1) {
1079 Info.InEdges[0]->setEdgeCount(CountValue);
10611080 }
1062
1063 // Need to add two new edges.
1064 BasicBlock *SrcBB = const_cast(E->SrcBB);
1065 BasicBlock *DestBB = const_cast(E->DestBB);
1066 // Add new edge of SrcBB->InstrBB.
1067 PGOUseEdge &NewEdge = FuncInfo.MST.addEdge(SrcBB, InstrBB, 0);
1068 NewEdge.setEdgeCount(CountValue);
1069 // Add new edge of InstrBB->DestBB.
1070 PGOUseEdge &NewEdge1 = FuncInfo.MST.addEdge(InstrBB, DestBB, 0);
1071 NewEdge1.setEdgeCount(CountValue);
1072 NewEdge1.InMST = true;
1073 getBBInfo(InstrBB).setBBInfoCount(CountValue);
1081 // If only one out-edge, the edge profile count should be the same as BB
1082 // profile count.
1083 if (Info.OutEdges.size() == 1) {
1084 Info.OutEdges[0]->setEdgeCount(CountValue);
1085 }
10741086 }
10751087 ProfileCountSize = CountFromProfile.size();
10761088 CountPosition = I;
0 ; Test that instrumentaiton works fine for the case of failing the split critical edges.
1 ; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
2 ; RUN: opt < %s -passes=pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
3
4 declare void @f3({ i8*, i64 }*, { i8*, i64 }*, i64)
5 declare { i8*, i64 } @f0({ i8*, i64 }*)
6 declare i64 @f1()
7 declare void @invok2({ i8*, i64 }*, i8* noalias readonly align 1, i64)
8 declare void @invok1({ i8*, i64 }*, { i8*, i64 }*, i64)
9 declare i32 @__CxxFrameHandler3(...)
10
11 define internal void @foo({ i8*, i64 }*, { i8*, i64 }*) personality i32 (...)* @__CxxFrameHandler3 {
12 %3 = alloca i8, align 1
13 store i8 0, i8* %3, align 1
14 %4 = call i64 @f1()
15 %5 = icmp ult i64 %4, 32
16 br i1 %5, label %7, label %13
17
18 6:
19 cleanupret from %17 unwind to caller
20 ; GEN: 6:
21 ; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn__stdin__foo, i32 0, i32 0), i64 60927483247, i32 4, i32 2)
22
23 7:
24 store i8 1, i8* %3, align 1
25 %8 = call { i8*, i64 } @f0({ i8*, i64 }* %0)
26 %9 = extractvalue { i8*, i64 } %8, 0
27 %10 = extractvalue { i8*, i64 } %8, 1
28 invoke void @invok1({ i8*, i64 }* %1, { i8*, i64 }* %0, i64 1)
29 to label %11 unwind label %16
30 ; GEN: 7:
31 ; GEN-NOT: call void @llvm.instrprof.increment
32
33 11:
34 store i8 0, i8* %3, align 1
35 invoke void @invok2({ i8*, i64 }* %1, i8* noalias readonly align 1 %9, i64 %10)
36 to label %12 unwind label %16
37 ; GEN: 11:
38 ; GEN-NOT: call void @llvm.instrprof.increment
39
40 12:
41 store i8 0, i8* %3, align 1
42 br label %14
43 ; GEN: 12:
44 ; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn__stdin__foo, i32 0, i32 0), i64 60927483247, i32 4, i32 1)
45
46 13:
47 call void @f3({ i8*, i64 }* %0, { i8*, i64 }* %1, i64 1)
48 br label %14
49 ; GEN: 13:
50 ; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn__stdin__foo, i32 0, i32 0), i64 60927483247, i32 4, i32 0)
51
52 14:
53 ret void
54
55 15:
56 store i8 0, i8* %3, align 1
57 br label %6
58 ; GEN: 15:
59 ; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn__stdin__foo, i32 0, i32 0), i64 60927483247, i32 4, i32 3)
60
61 16:
62 %17 = cleanuppad within none []
63 %18 = load i8, i8* %3, align 1
64 %19 = trunc i8 %18 to i1
65 br i1 %19, label %15, label %6
66 }