llvm.org GIT mirror llvm / dd33e17
Irreducible loop metadata for more accurate block frequency under PGO. Summary: Currently the block frequency analysis is an approximation for irreducible loops. The new irreducible loop metadata is used to annotate the irreducible loop headers with their header weights based on the PGO profile (currently this is approximated to be evenly weighted) and to help improve the accuracy of the block frequency analysis for irreducible loops. This patch is a basic support for this. Reviewers: davidxl Reviewed By: davidxl Subscribers: mehdi_amini, llvm-commits, eraman Differential Revision: https://reviews.llvm.org/D39028 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@317278 91177308-0d34-0410-b5e6-96231b3b80d8 Hiroshi Yamauchi 1 year, 9 months ago
21 changed file(s) with 601 addition(s) and 11 deletion(s). Raw diff Collapse all Expand all
51935193 !1 = !{!1} ; an identifier for the inner loop
51945194 !2 = !{!2} ; an identifier for the outer loop
51955195
5196 '``irr_loop``' Metadata
5197 ^^^^^^^^^^^^^^^^^^^^^^^
5198
5199 ``irr_loop`` metadata may be attached to the terminator instruction of a basic
5200 block that's an irreducible loop header (note that an irreducible loop has more
5201 than once header basic blocks.) If ``irr_loop`` metadata is attached to the
5202 terminator instruction of a basic block that is not really an irreducible loop
5203 header, the behavior is undefined. The intent of this metadata is to improve the
5204 accuracy of the block frequency propagation. For example, in the code below, the
5205 block ``header0`` may have a loop header weight (relative to the other headers of
5206 the irreducible loop) of 100:
5207
5208 .. code-block:: llvm
5209
5210 header0:
5211 ...
5212 br i1 %cmp, label %t1, label %t2, !irr_loop !0
5213
5214 ...
5215 !0 = !{"loop_header_weight", i64 100}
5216
5217 Irreducible loop header weights are typically based on profile data.
5218
51965219 '``invariant.group``' Metadata
51975220 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
51985221
7474 /// the enclosing function's count (if available) and returns the value.
7575 Optional getProfileCountFromFreq(uint64_t Freq) const;
7676
77 /// \brief Returns true if \p BB is an irreducible loop header
78 /// block. Otherwise false.
79 bool isIrrLoopHeader(const BasicBlock *BB);
80
7781 // Set the frequency of the given basic block.
7882 void setBlockFreq(const BasicBlock *BB, uint64_t Freq);
7983
1919 #include "llvm/ADT/Optional.h"
2020 #include "llvm/ADT/PostOrderIterator.h"
2121 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/SparseBitVector.h"
2223 #include "llvm/ADT/Twine.h"
2324 #include "llvm/ADT/iterator_range.h"
2425 #include "llvm/IR/BasicBlock.h"
413414 /// \brief Data about each block. This is used downstream.
414415 std::vector Freqs;
415416
417 /// \brief Whether each block is an irreducible loop header.
418 /// This is used downstream.
419 SparseBitVector<> IsIrrLoopHeader;
420
416421 /// \brief Loop data: see initializeLoops().
417422 std::vector Working;
418423
491496 /// the backedges going into each of the loop headers.
492497 void adjustLoopHeaderMass(LoopData &Loop);
493498
499 void distributeIrrLoopHeaderMass(Distribution &Dist);
500
494501 /// \brief Package up a loop.
495502 void packageLoop(LoopData &Loop);
496503
519526 const BlockNode &Node) const;
520527 Optional getProfileCountFromFreq(const Function &F,
521528 uint64_t Freq) const;
529 bool isIrrLoopHeader(const BlockNode &Node);
522530
523531 void setBlockFreq(const BlockNode &Node, uint64_t Freq);
524532
972980 return BlockFrequencyInfoImplBase::getProfileCountFromFreq(F, Freq);
973981 }
974982
983 bool isIrrLoopHeader(const BlockT *BB) {
984 return BlockFrequencyInfoImplBase::isIrrLoopHeader(getNode(BB));
985 }
986
975987 void setBlockFreq(const BlockT *BB, uint64_t Freq);
976988
977989 Scaled64 getFloatingBlockFreq(const BlockT *BB) const {
11391151 DEBUG(dbgs() << "compute-mass-in-loop: " << getLoopName(Loop) << "\n");
11401152
11411153 if (Loop.isIrreducible()) {
1142 BlockMass Remaining = BlockMass::getFull();
1154 DEBUG(dbgs() << "isIrreducible = true\n");
1155 Distribution Dist;
1156 unsigned NumHeadersWithWeight = 0;
11431157 for (uint32_t H = 0; H < Loop.NumHeaders; ++H) {
1144 auto &Mass = Working[Loop.Nodes[H].Index].getMass();
1145 Mass = Remaining * BranchProbability(1, Loop.NumHeaders - H);
1146 Remaining -= Mass;
1147 }
1158 auto &HeaderNode = Loop.Nodes[H];
1159 const BlockT *Block = getBlock(HeaderNode);
1160 IsIrrLoopHeader.set(Loop.Nodes[H].Index);
1161 Optional HeaderWeight = Block->getIrrLoopHeaderWeight();
1162 if (!HeaderWeight)
1163 continue;
1164 DEBUG(dbgs() << getBlockName(HeaderNode)
1165 << " has irr loop header weight " << HeaderWeight.getValue()
1166 << "\n");
1167 NumHeadersWithWeight++;
1168 uint64_t HeaderWeightValue = HeaderWeight.getValue();
1169 if (HeaderWeightValue)
1170 Dist.addLocal(HeaderNode, HeaderWeightValue);
1171 }
1172 if (NumHeadersWithWeight != Loop.NumHeaders) {
1173 // Not all headers have a weight metadata. Distribute weight evenly.
1174 Dist = Distribution();
1175 for (uint32_t H = 0; H < Loop.NumHeaders; ++H) {
1176 auto &HeaderNode = Loop.Nodes[H];
1177 Dist.addLocal(HeaderNode, 1);
1178 }
1179 }
1180 distributeIrrLoopHeaderMass(Dist);
11481181 for (const BlockNode &M : Loop.Nodes)
11491182 if (!propagateMassToSuccessors(&Loop, M))
11501183 llvm_unreachable("unhandled irreducible control flow");
1151
1152 adjustLoopHeaderMass(Loop);
1184 if (NumHeadersWithWeight != Loop.NumHeaders)
1185 // Not all headers have a weight metadata. Adjust header mass.
1186 adjustLoopHeaderMass(Loop);
11531187 } else {
11541188 Working[Loop.getHeader().Index].getMass() = BlockMass::getFull();
11551189 if (!propagateMassToSuccessors(&Loop, Loop.getHeader()))
12841318 BlockFrequencyInfoImplBase::getBlockProfileCount(
12851319 *F->getFunction(), getNode(&BB)))
12861320 OS << ", count = " << ProfileCount.getValue();
1321 if (Optional IrrLoopHeaderWeight =
1322 BB.getIrrLoopHeaderWeight())
1323 OS << ", irr_loop_header_weight = " << IrrLoopHeaderWeight.getValue();
12871324 OS << "\n";
12881325 }
12891326
9595 using probability_iterator = std::vector::iterator;
9696 using const_probability_iterator =
9797 std::vector::const_iterator;
98
99 Optional IrrLoopHeaderWeight;
98100
99101 /// Keep track of the physical registers that are livein of the basicblock.
100102 using LiveInVector = std::vector;
728730 /// Return the MCSymbol for this basic block.
729731 MCSymbol *getSymbol() const;
730732
733 Optional getIrrLoopHeaderWeight() const {
734 return IrrLoopHeaderWeight;
735 }
736
737 void setIrrLoopHeaderWeight(uint64_t Weight) {
738 IrrLoopHeaderWeight = Weight;
739 }
740
731741 private:
732742 /// Return probability iterator corresponding to the I successor iterator.
733743 probability_iterator getProbabilityIterator(succ_iterator I);
6161 Optional getBlockProfileCount(const MachineBasicBlock *MBB) const;
6262 Optional getProfileCountFromFreq(uint64_t Freq) const;
6363
64 bool isIrrLoopHeader(const MachineBasicBlock *MBB);
65
6466 const MachineFunction *getFunction() const;
6567 const MachineBranchProbabilityInfo *getMBPI() const;
6668 void view(const Twine &Name, bool isSimple = true) const;
397397 /// \brief Return true if it is legal to hoist instructions into this block.
398398 bool isLegalToHoistInto() const;
399399
400 Optional getIrrLoopHeaderWeight() const;
401
400402 private:
401403 /// \brief Increment the internal refcount of the number of BlockAddresses
402404 /// referencing this BasicBlock by \p Amt.
100100 MD_absolute_symbol = 21, // "absolute_symbol"
101101 MD_associated = 22, // "associated"
102102 MD_callees = 23, // "callees"
103 MD_irr_loop = 24, // "irr_loop"
103104 };
104105
105106 /// Known operand bundle tag IDs, which always have the same value. All
172172 /// base type, access type and offset relative to the base type.
173173 MDNode *createTBAAStructTagNode(MDNode *BaseType, MDNode *AccessType,
174174 uint64_t Offset, bool IsConstant = false);
175
176 /// \brief Return metadata containing an irreducible loop header weight.
177 MDNode *createIrrLoopHeaderWeight(uint64_t Weight);
175178 };
176179
177180 } // end namespace llvm
6767 void setProfMetadata(Module *M, Instruction *TI, ArrayRef EdgeCounts,
6868 uint64_t MaxCount);
6969
70 void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count);
71
7072 } // end namespace llvm
7173
7274 #endif // LLVM_TRANSFORMS_PGOINSTRUMENTATION_H
215215 if (!BFI)
216216 return None;
217217 return BFI->getProfileCountFromFreq(*getFunction(), Freq);
218 }
219
220 bool BlockFrequencyInfo::isIrrLoopHeader(const BasicBlock *BB) {
221 assert(BFI && "Expected analysis to be available");
222 return BFI->isIrrLoopHeader(BB);
218223 }
219224
220225 void BlockFrequencyInfo::setBlockFreq(const BasicBlock *BB, uint64_t Freq) {
270270 // Swap with a default-constructed std::vector, since std::vector<>::clear()
271271 // does not actually clear heap storage.
272272 std::vector().swap(Freqs);
273 IsIrrLoopHeader.clear();
273274 std::vector().swap(Working);
274275 Loops.clear();
275276 }
279280 /// Releases all memory not used downstream. In particular, saves Freqs.
280281 static void cleanup(BlockFrequencyInfoImplBase &BFI) {
281282 std::vector SavedFreqs(std::move(BFI.Freqs));
283 SparseBitVector<> SavedIsIrrLoopHeader(std::move(BFI.IsIrrLoopHeader));
282284 BFI.clear();
283285 BFI.Freqs = std::move(SavedFreqs);
286 BFI.IsIrrLoopHeader = std::move(SavedIsIrrLoopHeader);
284287 }
285288
286289 bool BlockFrequencyInfoImplBase::addToDist(Distribution &Dist,
571574 return BlockCount.getLimitedValue();
572575 }
573576
577 bool
578 BlockFrequencyInfoImplBase::isIrrLoopHeader(const BlockNode &Node) {
579 if (!Node.isValid())
580 return false;
581 return IsIrrLoopHeader.test(Node.Index);
582 }
583
574584 Scaled64
575585 BlockFrequencyInfoImplBase::getFloatingBlockFreq(const BlockNode &Node) const {
576586 if (!Node.isValid())
818828 DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
819829 }
820830 }
831
832 void BlockFrequencyInfoImplBase::distributeIrrLoopHeaderMass(Distribution &Dist) {
833 BlockMass LoopMass = BlockMass::getFull();
834 DitheringDistributer D(Dist, LoopMass);
835 for (const Weight &W : Dist.Weights) {
836 BlockMass Taken = D.takeMass(W.Amount);
837 assert(W.Type == Weight::Local && "all weights should be local");
838 Working[W.TargetNode.Index].getMass() = Taken;
839 DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
840 }
841 }
4141 MachineBasicBlock::MachineBasicBlock(MachineFunction &MF, const BasicBlock *B)
4242 : BB(B), Number(-1), xParent(&MF) {
4343 Insts.Parent = this;
44 if (B)
45 IrrLoopHeaderWeight = B->getIrrLoopHeaderWeight();
4446 }
4547
4648 MachineBasicBlock::~MachineBasicBlock() {
335337 if (!Probs.empty())
336338 OS << '(' << *getProbabilityIterator(SI) << ')';
337339 }
340 OS << '\n';
341 }
342 if (IrrLoopHeaderWeight) {
343 if (Indexes) OS << '\t';
344 OS << " Irreducible loop header weight: "
345 << IrrLoopHeaderWeight.getValue();
338346 OS << '\n';
339347 }
340348 }
233233 return MBFI ? MBFI->getProfileCountFromFreq(*F, Freq) : None;
234234 }
235235
236 bool
237 MachineBlockFrequencyInfo::isIrrLoopHeader(const MachineBasicBlock *MBB) {
238 assert(MBFI && "Expected analysis to be available");
239 return MBFI->isIrrLoopHeader(MBB);
240 }
241
236242 const MachineFunction *MachineBlockFrequencyInfo::getFunction() const {
237243 return MBFI ? MBFI->getFunction() : nullptr;
238244 }
446446 const LandingPadInst *BasicBlock::getLandingPadInst() const {
447447 return dyn_cast(getFirstNonPHI());
448448 }
449
450 Optional BasicBlock::getIrrLoopHeaderWeight() const {
451 const TerminatorInst *TI = getTerminator();
452 if (MDNode *MDIrrLoopHeader =
453 TI->getMetadata(LLVMContext::MD_irr_loop)) {
454 MDString *MDName = cast(MDIrrLoopHeader->getOperand(0));
455 if (MDName->getString().equals("loop_header_weight")) {
456 auto *CI = mdconst::extract(MDIrrLoopHeader->getOperand(1));
457 return Optional(CI->getValue().getZExtValue());
458 }
459 }
460 return Optional();
461 }
5959 {MD_absolute_symbol, "absolute_symbol"},
6060 {MD_associated, "associated"},
6161 {MD_callees, "callees"},
62 {MD_irr_loop, "irr_loop"},
6263 };
6364
6465 for (auto &MDKind : MDKinds) {
196196 }
197197 return MDNode::get(Context, {BaseType, AccessType, createConstant(Off)});
198198 }
199
200 MDNode *MDBuilder::createIrrLoopHeaderWeight(uint64_t Weight) {
201 SmallVector Vals(2);
202 Vals[0] = createString("loop_header_weight");
203 Vals[1] = createConstant(ConstantInt::get(Type::getInt64Ty(Context), Weight));
204 return MDNode::get(Context, Vals);
205 }
843843 PGOUseFunc(Function &Func, Module *Modu,
844844 std::unordered_multimap &ComdatMembers,
845845 BranchProbabilityInfo *BPI = nullptr,
846 BlockFrequencyInfo *BFI = nullptr)
847 : F(Func), M(Modu), FuncInfo(Func, ComdatMembers, false, BPI, BFI),
846 BlockFrequencyInfo *BFIin = nullptr)
847 : F(Func), M(Modu), BFI(BFIin),
848 FuncInfo(Func, ComdatMembers, false, BPI, BFIin),
848849 FreqAttr(FFA_Normal) {}
849850
850851 // Read counts for the instrumented BB from profile.
861862
862863 // Annotate the value profile call sites for one value kind.
863864 void annotateValueSites(uint32_t Kind);
865
866 // Annotate the irreducible loop header weights.
867 void annotateIrrLoopHeaderWeights();
864868
865869 // The hotness of the function from the profile count.
866870 enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot };
893897 private:
894898 Function &F;
895899 Module *M;
900 BlockFrequencyInfo *BFI;
896901
897902 // This member stores the shared information with class PGOGenFunc.
898903 FuncPGOInstrumentation FuncInfo;
11821187 }
11831188 }
11841189
1190 void PGOUseFunc::annotateIrrLoopHeaderWeights() {
1191 DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n");
1192 // Find irr loop headers
1193 for (auto &BB : F) {
1194 if (BFI->isIrrLoopHeader(&BB)) {
1195 TerminatorInst *TI = BB.getTerminator();
1196 const UseBBInfo &BBCountInfo = getBBInfo(&BB);
1197 setIrrLoopHeaderMetadata(M, TI, BBCountInfo.CountValue);
1198 }
1199 }
1200 }
1201
11851202 void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {
11861203 Module *M = F.getParent();
11871204 IRBuilder<> Builder(&SI);
14401457 Func.populateCounters();
14411458 Func.setBranchWeights();
14421459 Func.annotateValueSites();
1460 Func.annotateIrrLoopHeaderWeights();
14431461 PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr();
14441462 if (FreqAttr == PGOUseFunc::FFA_Cold)
14451463 ColdFunctions.push_back(&F);
15811599
15821600 namespace llvm {
15831601
1602 void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count) {
1603 MDBuilder MDB(M->getContext());
1604 TI->setMetadata(llvm::LLVMContext::MD_irr_loop,
1605 MDB.createIrrLoopHeaderWeight(Count));
1606 }
1607
15841608 template <> struct GraphTraits {
15851609 using NodeRef = const BasicBlock *;
15861610 using ChildIteratorType = succ_const_iterator;
0 ; RUN: opt < %s -analyze -block-freq | FileCheck %s
1 ; RUN: opt < %s -passes='print' -disable-output 2>&1 | FileCheck %s
2
3 ; Function Attrs: noinline norecurse nounwind readnone uwtable
4 define i32 @_Z11irreducibleii(i32 %iter_outer, i32 %iter_inner) local_unnamed_addr !prof !27 {
5 entry:
6 %cmp24 = icmp sgt i32 %iter_outer, 0
7 br i1 %cmp24, label %for.body, label %entry.for.cond.cleanup_crit_edge, !prof !28
8
9 entry.for.cond.cleanup_crit_edge: ; preds = %entry
10 br label %for.cond.cleanup
11
12 for.cond.cleanup: ; preds = %for.end, %entry.for.cond.cleanup_crit_edge
13 %sum.0.lcssa = phi i32 [ 0, %entry.for.cond.cleanup_crit_edge ], [ %sum.1, %for.end ]
14 ret i32 %sum.0.lcssa
15
16 for.body: ; preds = %for.end, %entry
17 %k.026 = phi i32 [ %inc12, %for.end ], [ 0, %entry ]
18 %sum.025 = phi i32 [ %sum.1, %for.end ], [ 0, %entry ]
19 %rem23 = and i32 %k.026, 1
20 %cmp1 = icmp eq i32 %rem23, 0
21 br i1 %cmp1, label %entry8, label %for.cond2, !prof !29
22
23 for.cond2: ; preds = %if.end9, %for.body
24 %sum.1 = phi i32 [ %add10, %if.end9 ], [ %sum.025, %for.body ]
25 %i.0 = phi i32 [ %inc, %if.end9 ], [ 0, %for.body ]
26 %cmp3 = icmp slt i32 %i.0, %iter_inner
27 br i1 %cmp3, label %for.body4, label %for.end, !prof !30, !irr_loop !31
28
29 for.body4: ; preds = %for.cond2
30 %rem5 = srem i32 %k.026, 3
31 %cmp6 = icmp eq i32 %rem5, 0
32 br i1 %cmp6, label %entry8, label %if.end9, !prof !32
33
34 entry8: ; preds = %for.body4, %for.body
35 %sum.2 = phi i32 [ %sum.025, %for.body ], [ %sum.1, %for.body4 ]
36 %i.1 = phi i32 [ 0, %for.body ], [ %i.0, %for.body4 ]
37 %add = add nsw i32 %sum.2, 4
38 br label %if.end9, !irr_loop !33
39
40 if.end9: ; preds = %entry8, %for.body4
41 %sum.3 = phi i32 [ %add, %entry8 ], [ %sum.1, %for.body4 ]
42 %i.2 = phi i32 [ %i.1, %entry8 ], [ %i.0, %for.body4 ]
43 %add10 = add nsw i32 %sum.3, 1
44 %inc = add nsw i32 %i.2, 1
45 br label %for.cond2, !irr_loop !34
46
47 for.end: ; preds = %for.cond2
48 %inc12 = add nuw nsw i32 %k.026, 1
49 %exitcond = icmp eq i32 %inc12, %iter_outer
50 br i1 %exitcond, label %for.cond.cleanup, label %for.body, !prof !35
51 }
52
53 !27 = !{!"function_entry_count", i64 1}
54 !28 = !{!"branch_weights", i32 1, i32 0}
55 !29 = !{!"branch_weights", i32 50, i32 50}
56 !30 = !{!"branch_weights", i32 950, i32 100}
57 !31 = !{!"loop_header_weight", i64 1050}
58 !32 = !{!"branch_weights", i32 323, i32 627}
59 !33 = !{!"loop_header_weight", i64 373}
60 !34 = !{!"loop_header_weight", i64 1000}
61 !35 = !{!"branch_weights", i32 1, i32 99}
62
63 ; CHECK-LABEL: Printing analysis {{.*}} for function '_Z11irreducibleii':
64 ; CHECK-NEXT: block-frequency-info: _Z11irreducibleii
65 ; CHECK-NEXT: - entry: {{.*}} count = 1
66 ; CHECK-NEXT: - entry.for.cond.cleanup_crit_edge: {{.*}} count = 0
67 ; CHECK-NEXT: - for.cond.cleanup: {{.*}} count = 1
68 ; CHECK-NEXT: - for.body: {{.*}} count = 100
69 ; CHECK-NEXT: - for.cond2: {{.*}} count = 1050, irr_loop_header_weight = 1050
70 ; CHECK-NEXT: - for.body4: {{.*}} count = 950
71 ; CHECK-NEXT: - entry8: {{.*}} count = 373, irr_loop_header_weight = 373
72 ; CHECK-NEXT: - if.end9: {{.*}} count = 1000, irr_loop_header_weight = 1000
73 ; CHECK-NEXT: - for.end: {{.*}} count = 100
74
75 @targets = local_unnamed_addr global [256 x i8*] zeroinitializer, align 16
76 @tracing = local_unnamed_addr global i32 0, align 4
77
78 ; Function Attrs: noinline norecurse nounwind uwtable
79 define i32 @_Z11irreduciblePh(i8* nocapture readonly %p) !prof !27 {
80 entry:
81 store <2 x i8*> , <2 x i8*>* bitcast ([256 x i8*]* @targets to <2 x i8*>*), align 16
82 store i8* blockaddress(@_Z11irreduciblePh, %TARGET_2), i8** getelementptr inbounds ([256 x i8*], [256 x i8*]* @targets, i64 0, i64 2), align 16
83 %0 = load i32, i32* @tracing, align 4
84 %tobool = icmp eq i32 %0, 0
85 br label %for.cond1
86
87 for.cond1: ; preds = %sw.default, %entry
88 %p.addr.0 = phi i8* [ %p, %entry ], [ %p.addr.4, %sw.default ]
89 %sum.0 = phi i32 [ 0, %entry ], [ %add25, %sw.default ]
90 %incdec.ptr = getelementptr inbounds i8, i8* %p.addr.0, i64 1
91 %1 = load i8, i8* %p.addr.0, align 1
92 %incdec.ptr2 = getelementptr inbounds i8, i8* %p.addr.0, i64 2
93 %2 = load i8, i8* %incdec.ptr, align 1
94 %conv3 = zext i8 %2 to i32
95 br label %dispatch_op
96
97 dispatch_op: ; preds = %sw.bb6, %for.cond1
98 %p.addr.1 = phi i8* [ %incdec.ptr2, %for.cond1 ], [ %p.addr.2, %sw.bb6 ]
99 %op.0 = phi i8 [ %1, %for.cond1 ], [ 1, %sw.bb6 ]
100 %oparg.0 = phi i32 [ %conv3, %for.cond1 ], [ %oparg.2, %sw.bb6 ]
101 %sum.1 = phi i32 [ %sum.0, %for.cond1 ], [ %add7, %sw.bb6 ]
102 switch i8 %op.0, label %sw.default [
103 i8 0, label %sw.bb
104 i8 1, label %dispatch_op.sw.bb6_crit_edge
105 i8 2, label %sw.bb15
106 ], !prof !36
107
108 dispatch_op.sw.bb6_crit_edge: ; preds = %dispatch_op
109 br label %sw.bb6
110
111 sw.bb: ; preds = %indirectgoto, %dispatch_op
112 %oparg.1 = phi i32 [ %oparg.0, %dispatch_op ], [ 0, %indirectgoto ]
113 %sum.2 = phi i32 [ %sum.1, %dispatch_op ], [ %sum.7, %indirectgoto ]
114 %add.neg = sub i32 -5, %oparg.1
115 %sub = add i32 %add.neg, %sum.2
116 br label %exit
117
118 TARGET_1: ; preds = %indirectgoto
119 %incdec.ptr4 = getelementptr inbounds i8, i8* %add.ptr.pn, i64 2
120 %3 = load i8, i8* %p.addr.5, align 1
121 %conv5 = zext i8 %3 to i32
122 br label %sw.bb6
123
124 sw.bb6: ; preds = %TARGET_1, %dispatch_op.sw.bb6_crit_edge
125 %p.addr.2 = phi i8* [ %incdec.ptr4, %TARGET_1 ], [ %p.addr.1, %dispatch_op.sw.bb6_crit_edge ]
126 %oparg.2 = phi i32 [ %conv5, %TARGET_1 ], [ %oparg.0, %dispatch_op.sw.bb6_crit_edge ]
127 %sum.3 = phi i32 [ %sum.7, %TARGET_1 ], [ %sum.1, %dispatch_op.sw.bb6_crit_edge ]
128 %mul = mul nsw i32 %oparg.2, 7
129 %add7 = add nsw i32 %sum.3, %mul
130 %rem46 = and i32 %add7, 1
131 %cmp8 = icmp eq i32 %rem46, 0
132 br i1 %cmp8, label %dispatch_op, label %if.then, !prof !37, !irr_loop !38
133
134 if.then: ; preds = %sw.bb6
135 %mul9 = mul nsw i32 %add7, 9
136 br label %indirectgoto
137
138 TARGET_2: ; preds = %indirectgoto
139 %incdec.ptr13 = getelementptr inbounds i8, i8* %add.ptr.pn, i64 2
140 %4 = load i8, i8* %p.addr.5, align 1
141 %conv14 = zext i8 %4 to i32
142 br label %sw.bb15
143
144 sw.bb15: ; preds = %TARGET_2, %dispatch_op
145 %p.addr.3 = phi i8* [ %p.addr.1, %dispatch_op ], [ %incdec.ptr13, %TARGET_2 ]
146 %oparg.3 = phi i32 [ %oparg.0, %dispatch_op ], [ %conv14, %TARGET_2 ]
147 %sum.4 = phi i32 [ %sum.1, %dispatch_op ], [ %sum.7, %TARGET_2 ]
148 %add16 = add nsw i32 %oparg.3, 3
149 %add17 = add nsw i32 %add16, %sum.4
150 br i1 %tobool, label %if.then18, label %exit, !prof !39, !irr_loop !40
151
152 if.then18: ; preds = %sw.bb15
153 %idx.ext = sext i32 %oparg.3 to i64
154 %add.ptr = getelementptr inbounds i8, i8* %p.addr.3, i64 %idx.ext
155 %mul19 = mul nsw i32 %add17, 17
156 br label %indirectgoto
157
158 unknown_op: ; preds = %indirectgoto
159 %sub24 = add nsw i32 %sum.7, -4
160 br label %sw.default
161
162 sw.default: ; preds = %unknown_op, %dispatch_op
163 %p.addr.4 = phi i8* [ %p.addr.5, %unknown_op ], [ %p.addr.1, %dispatch_op ]
164 %sum.5 = phi i32 [ %sub24, %unknown_op ], [ %sum.1, %dispatch_op ]
165 %add25 = add nsw i32 %sum.5, 11
166 br label %for.cond1
167
168 exit: ; preds = %sw.bb15, %sw.bb
169 %sum.6 = phi i32 [ %sub, %sw.bb ], [ %add17, %sw.bb15 ]
170 ret i32 %sum.6
171
172 indirectgoto: ; preds = %if.then18, %if.then
173 %add.ptr.pn = phi i8* [ %add.ptr, %if.then18 ], [ %p.addr.2, %if.then ]
174 %sum.7 = phi i32 [ %mul19, %if.then18 ], [ %mul9, %if.then ]
175 %p.addr.5 = getelementptr inbounds i8, i8* %add.ptr.pn, i64 1
176 %5 = load i8, i8* %add.ptr.pn, align 1
177 %idxprom21 = zext i8 %5 to i64
178 %arrayidx22 = getelementptr inbounds [256 x i8*], [256 x i8*]* @targets, i64 0, i64 %idxprom21
179 %6 = load i8*, i8** %arrayidx22, align 8
180 indirectbr i8* %6, [label %unknown_op, label %sw.bb, label %TARGET_1, label %TARGET_2], !prof !41, !irr_loop !42
181 }
182
183 !36 = !{!"branch_weights", i32 0, i32 0, i32 201, i32 1}
184 !37 = !{!"branch_weights", i32 201, i32 300}
185 !38 = !{!"loop_header_weight", i64 501}
186 !39 = !{!"branch_weights", i32 100, i32 0}
187 !40 = !{!"loop_header_weight", i64 100}
188 !41 = !{!"branch_weights", i32 0, i32 1, i32 300, i32 99}
189 !42 = !{!"loop_header_weight", i64 400}
190
191 ; CHECK-LABEL: Printing analysis {{.*}} for function '_Z11irreduciblePh':
192 ; CHECK-NEXT: block-frequency-info: _Z11irreduciblePh
193 ; CHECK-NEXT: - entry: {{.*}} count = 1
194 ; CHECK-NEXT: - for.cond1: {{.*}} count = 1
195 ; CHECK-NEXT: - dispatch_op: {{.*}} count = 201
196 ; CHECK-NEXT: - dispatch_op.sw.bb6_crit_edge: {{.*}} count = 200
197 ; CHECK-NEXT: - sw.bb: {{.*}} count = 0
198 ; CHECK-NEXT: - TARGET_1: {{.*}} count = 299
199 ; CHECK-NEXT: - sw.bb6: {{.*}} count = 500, irr_loop_header_weight = 501
200 ; CHECK-NEXT: - if.then: {{.*}} count = 299
201 ; CHECK-NEXT: - TARGET_2: {{.*}} count = 98
202 ; CHECK-NEXT: - sw.bb15: {{.*}} count = 99, irr_loop_header_weight = 100
203 ; CHECK-NEXT: - if.then18: {{.*}} count = 99
204 ; CHECK-NEXT: - unknown_op: {{.*}} count = 0
205 ; CHECK-NEXT: - sw.default: {{.*}} count = 0
206 ; CHECK-NEXT: - exit: {{.*}} count = 1
207 ; CHECK-NEXT: - indirectgoto: {{.*}} count = 399, irr_loop_header_weight = 400
99 ; RUN: llvm-lto -thinlto-action=import %t2.bc -thinlto-index=%t3.bc \
1010 ; RUN: -o /dev/null -stats \
1111 ; RUN: 2>&1 | FileCheck %s -check-prefix=LAZY
12 ; LAZY: 53 bitcode-reader - Number of Metadata records loaded
12 ; LAZY: 55 bitcode-reader - Number of Metadata records loaded
1313 ; LAZY: 2 bitcode-reader - Number of MDStrings loaded
1414
1515 ; RUN: llvm-lto -thinlto-action=import %t2.bc -thinlto-index=%t3.bc \
1616 ; RUN: -o /dev/null -disable-ondemand-mds-loading -stats \
1717 ; RUN: 2>&1 | FileCheck %s -check-prefix=NOTLAZY
18 ; NOTLAZY: 62 bitcode-reader - Number of Metadata records loaded
18 ; NOTLAZY: 64 bitcode-reader - Number of Metadata records loaded
1919 ; NOTLAZY: 7 bitcode-reader - Number of MDStrings loaded
2020
2121
0 :ir
1 _Z11irreducibleii
2 # Func Hash:
3 64451410787
4 # Num Counters:
5 6
6 # Counter Values:
7 1000
8 950
9 100
10 373
11 1
12 0
13
14 _Z11irreduciblePh
15 # Func Hash:
16 104649601521
17 # Num Counters:
18 9
19 # Counter Values:
20 100
21 300
22 99
23 300
24 201
25 1
26 1
27 0
28 0
0 ; RUN: llvm-profdata merge %S/Inputs/irreducible.proftext -o %t.profdata
1 ; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE
2 ; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE
3
4 ; GEN: $__llvm_profile_raw_version = comdat any
5
6 ; Function Attrs: noinline norecurse nounwind readnone uwtable
7 define i32 @_Z11irreducibleii(i32 %iter_outer, i32 %iter_inner) local_unnamed_addr #0 {
8 entry:
9 %cmp24 = icmp sgt i32 %iter_outer, 0
10 br i1 %cmp24, label %for.body, label %entry.for.cond.cleanup_crit_edge
11
12 entry.for.cond.cleanup_crit_edge: ; preds = %entry
13 br label %for.cond.cleanup
14
15 for.cond.cleanup: ; preds = %entry.for.cond.cleanup_crit_edge, %for.end
16 %sum.0.lcssa = phi i32 [ 0, %entry.for.cond.cleanup_crit_edge ], [ %sum.1, %for.end ]
17 ret i32 %sum.0.lcssa
18
19 for.body: ; preds = %entry, %for.end
20 %k.026 = phi i32 [ %inc12, %for.end ], [ 0, %entry ]
21 %sum.025 = phi i32 [ %sum.1, %for.end ], [ 0, %entry ]
22 %rem23 = and i32 %k.026, 1
23 %cmp1 = icmp eq i32 %rem23, 0
24 br i1 %cmp1, label %entry8, label %for.cond2
25
26 for.cond2: ; preds = %for.body, %if.end9
27 %sum.1 = phi i32 [ %add10, %if.end9 ], [ %sum.025, %for.body ]
28 %i.0 = phi i32 [ %inc, %if.end9 ], [ 0, %for.body ]
29 %cmp3 = icmp slt i32 %i.0, %iter_inner
30 br i1 %cmp3, label %for.body4, label %for.end
31 ; USE: br i1 %cmp3, label %for.body4, label %for.end, !prof !{{[0-9]+}},
32 ; USE-SAME: !irr_loop ![[FOR_COND2_IRR_LOOP:[0-9]+]]
33
34 for.body4: ; preds = %for.cond2
35 %rem5 = srem i32 %k.026, 3
36 %cmp6 = icmp eq i32 %rem5, 0
37 br i1 %cmp6, label %entry8, label %if.end9
38
39 entry8: ; preds = %for.body4, %for.body
40 %sum.2 = phi i32 [ %sum.025, %for.body ], [ %sum.1, %for.body4 ]
41 %i.1 = phi i32 [ 0, %for.body ], [ %i.0, %for.body4 ]
42 %add = add nsw i32 %sum.2, 4
43 br label %if.end9
44 ; USE: br label %if.end9,
45 ; USE-SAME: !irr_loop ![[ENTRY8_IRR_LOOP:[0-9]+]]
46
47 if.end9: ; preds = %entry8, %for.body4
48 %sum.3 = phi i32 [ %add, %entry8 ], [ %sum.1, %for.body4 ]
49 %i.2 = phi i32 [ %i.1, %entry8 ], [ %i.0, %for.body4 ]
50 %add10 = add nsw i32 %sum.3, 1
51 %inc = add nsw i32 %i.2, 1
52 br label %for.cond2
53 ; USE: br label %for.cond2,
54 ; USE-SAME: !irr_loop ![[IF_END9_IRR_LOOP:[0-9]+]]
55
56 for.end: ; preds = %for.cond2
57 %inc12 = add nuw nsw i32 %k.026, 1
58 %exitcond = icmp eq i32 %inc12, %iter_outer
59 br i1 %exitcond, label %for.cond.cleanup, label %for.body
60 }
61
62
63
64 @targets = local_unnamed_addr global [256 x i8*] zeroinitializer, align 16
65 @tracing = local_unnamed_addr global i32 0, align 4
66
67 ; Function Attrs: noinline norecurse nounwind uwtable
68 define i32 @_Z11irreduciblePh(i8* nocapture readonly %p) {
69 entry:
70 store <2 x i8*> , <2 x i8*>* bitcast ([256 x i8*]* @targets to <2 x i8*>*), align 16
71 store i8* blockaddress(@_Z11irreduciblePh, %TARGET_2), i8** getelementptr inbounds ([256 x i8*], [256 x i8*]* @targets, i64 0, i64 2), align 16
72 %0 = load i32, i32* @tracing, align 4
73 %tobool = icmp eq i32 %0, 0
74 br label %for.cond1
75
76 for.cond1: ; preds = %sw.default, %entry
77 %p.addr.0 = phi i8* [ %p, %entry ], [ %p.addr.4, %sw.default ]
78 %sum.0 = phi i32 [ 0, %entry ], [ %add25, %sw.default ]
79 %incdec.ptr = getelementptr inbounds i8, i8* %p.addr.0, i64 1
80 %1 = load i8, i8* %p.addr.0, align 1
81 %incdec.ptr2 = getelementptr inbounds i8, i8* %p.addr.0, i64 2
82 %2 = load i8, i8* %incdec.ptr, align 1
83 %conv3 = zext i8 %2 to i32
84 br label %dispatch_op
85
86 dispatch_op: ; preds = %sw.bb6, %for.cond1
87 %p.addr.1 = phi i8* [ %incdec.ptr2, %for.cond1 ], [ %p.addr.2, %sw.bb6 ]
88 %op.0 = phi i8 [ %1, %for.cond1 ], [ 1, %sw.bb6 ]
89 %oparg.0 = phi i32 [ %conv3, %for.cond1 ], [ %oparg.2, %sw.bb6 ]
90 %sum.1 = phi i32 [ %sum.0, %for.cond1 ], [ %add7, %sw.bb6 ]
91 switch i8 %op.0, label %sw.default [
92 i8 0, label %sw.bb
93 i8 1, label %dispatch_op.sw.bb6_crit_edge
94 i8 2, label %sw.bb15
95 ]
96
97 dispatch_op.sw.bb6_crit_edge: ; preds = %dispatch_op
98 br label %sw.bb6
99
100 sw.bb: ; preds = %indirectgoto, %dispatch_op
101 %oparg.1 = phi i32 [ %oparg.0, %dispatch_op ], [ 0, %indirectgoto ]
102 %sum.2 = phi i32 [ %sum.1, %dispatch_op ], [ %sum.7, %indirectgoto ]
103 %add.neg = sub i32 -5, %oparg.1
104 %sub = add i32 %add.neg, %sum.2
105 br label %exit
106
107 TARGET_1: ; preds = %indirectgoto
108 %incdec.ptr4 = getelementptr inbounds i8, i8* %add.ptr.pn, i64 2
109 %3 = load i8, i8* %p.addr.5, align 1
110 %conv5 = zext i8 %3 to i32
111 br label %sw.bb6
112
113 sw.bb6: ; preds = %dispatch_op.sw.bb6_crit_edge, %TARGET_1
114 %p.addr.2 = phi i8* [ %incdec.ptr4, %TARGET_1 ], [ %p.addr.1, %dispatch_op.sw.bb6_crit_edge ]
115 %oparg.2 = phi i32 [ %conv5, %TARGET_1 ], [ %oparg.0, %dispatch_op.sw.bb6_crit_edge ]
116 %sum.3 = phi i32 [ %sum.7, %TARGET_1 ], [ %sum.1, %dispatch_op.sw.bb6_crit_edge ]
117 %mul = mul nsw i32 %oparg.2, 7
118 %add7 = add nsw i32 %sum.3, %mul
119 %rem46 = and i32 %add7, 1
120 %cmp8 = icmp eq i32 %rem46, 0
121 br i1 %cmp8, label %dispatch_op, label %if.then
122 ; USE: br i1 %cmp8, label %dispatch_op, label %if.then, !prof !{{[0-9]+}},
123 ; USE-SAME: !irr_loop ![[SW_BB6_IRR_LOOP:[0-9]+]]
124
125 if.then: ; preds = %sw.bb6
126 %mul9 = mul nsw i32 %add7, 9
127 br label %indirectgoto
128
129 TARGET_2: ; preds = %indirectgoto
130 %incdec.ptr13 = getelementptr inbounds i8, i8* %add.ptr.pn, i64 2
131 %4 = load i8, i8* %p.addr.5, align 1
132 %conv14 = zext i8 %4 to i32
133 br label %sw.bb15
134
135 sw.bb15: ; preds = %TARGET_2, %dispatch_op
136 %p.addr.3 = phi i8* [ %p.addr.1, %dispatch_op ], [ %incdec.ptr13, %TARGET_2 ]
137 %oparg.3 = phi i32 [ %oparg.0, %dispatch_op ], [ %conv14, %TARGET_2 ]
138 %sum.4 = phi i32 [ %sum.1, %dispatch_op ], [ %sum.7, %TARGET_2 ]
139 %add16 = add nsw i32 %oparg.3, 3
140 %add17 = add nsw i32 %add16, %sum.4
141 br i1 %tobool, label %if.then18, label %exit
142 ; USE: br i1 %tobool, label %if.then18, label %exit, !prof !{{[0-9]+}},
143 ; USE-SAME: !irr_loop ![[SW_BB15_IRR_LOOP:[0-9]+]]
144
145 if.then18: ; preds = %sw.bb15
146 %idx.ext = sext i32 %oparg.3 to i64
147 %add.ptr = getelementptr inbounds i8, i8* %p.addr.3, i64 %idx.ext
148 %mul19 = mul nsw i32 %add17, 17
149 br label %indirectgoto
150
151 unknown_op: ; preds = %indirectgoto
152 %sub24 = add nsw i32 %sum.7, -4
153 br label %sw.default
154
155 sw.default: ; preds = %unknown_op, %dispatch_op
156 %p.addr.4 = phi i8* [ %p.addr.5, %unknown_op ], [ %p.addr.1, %dispatch_op ]
157 %sum.5 = phi i32 [ %sub24, %unknown_op ], [ %sum.1, %dispatch_op ]
158 %add25 = add nsw i32 %sum.5, 11
159 br label %for.cond1
160
161 exit: ; preds = %sw.bb15, %sw.bb
162 %sum.6 = phi i32 [ %sub, %sw.bb ], [ %add17, %sw.bb15 ]
163 ret i32 %sum.6
164
165 indirectgoto: ; preds = %if.then18, %if.then
166 %add.ptr.pn = phi i8* [ %add.ptr, %if.then18 ], [ %p.addr.2, %if.then ]
167 %sum.7 = phi i32 [ %mul19, %if.then18 ], [ %mul9, %if.then ]
168 %p.addr.5 = getelementptr inbounds i8, i8* %add.ptr.pn, i64 1
169 %5 = load i8, i8* %add.ptr.pn, align 1
170 %idxprom21 = zext i8 %5 to i64
171 %arrayidx22 = getelementptr inbounds [256 x i8*], [256 x i8*]* @targets, i64 0, i64 %idxprom21
172 %6 = load i8*, i8** %arrayidx22, align 8
173 indirectbr i8* %6, [label %unknown_op, label %sw.bb, label %TARGET_1, label %TARGET_2]
174 ; USE: indirectbr i8* %6, [label %unknown_op, label %sw.bb, label %TARGET_1, label %TARGET_2], !prof !{{[0-9]+}},
175 ; USE-SAME: !irr_loop ![[INDIRECTGOTO_IRR_LOOP:[0-9]+]]
176 }
177
178 ; USE: ![[FOR_COND2_IRR_LOOP]] = !{!"loop_header_weight", i64 1050}
179 ; USE: ![[ENTRY8_IRR_LOOP]] = !{!"loop_header_weight", i64 373}
180 ; USE: ![[IF_END9_IRR_LOOP]] = !{!"loop_header_weight", i64 1000}
181 ; USE: ![[SW_BB6_IRR_LOOP]] = !{!"loop_header_weight", i64 501}
182 ; USE: ![[SW_BB15_IRR_LOOP]] = !{!"loop_header_weight", i64 100}
183 ; USE: ![[INDIRECTGOTO_IRR_LOOP]] = !{!"loop_header_weight", i64 400}