llvm.org GIT mirror llvm / 0912890
Add heuristics for irreducible loop metadata under PGO Summary: Add the following heuristics for irreducible loop metadata: - When an irreducible loop header is missing the loop header weight metadata, give it the minimum weight seen among other headers. - Annotate indirectbr targets with the loop header weight metadata (as they are likely to become irreducible loop headers after indirectbr tail duplication.) These greatly improve the accuracy of the block frequency info of the Python interpreter loop (eg. from ~3-16x off down to ~40-55% off) and the Python performance (eg. unpack_sequence from ~50% slower to ~8% faster than GCC) due to better register allocation under PGO. Reviewers: davidxl Reviewed By: davidxl Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D39980 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@318693 91177308-0d34-0410-b5e6-96231b3b80d8 Hiroshi Yamauchi 1 year, 10 months ago
4 changed file(s) with 112 addition(s) and 12 deletion(s). Raw diff Collapse all Expand all
1515 #define LLVM_ANALYSIS_BLOCKFREQUENCYINFOIMPL_H
1616
1717 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/DenseSet.h"
1819 #include "llvm/ADT/GraphTraits.h"
1920 #include "llvm/ADT/Optional.h"
2021 #include "llvm/ADT/PostOrderIterator.h"
11541155 DEBUG(dbgs() << "isIrreducible = true\n");
11551156 Distribution Dist;
11561157 unsigned NumHeadersWithWeight = 0;
1158 Optional MinHeaderWeight;
1159 DenseSet HeadersWithoutWeight;
1160 HeadersWithoutWeight.reserve(Loop.NumHeaders);
11571161 for (uint32_t H = 0; H < Loop.NumHeaders; ++H) {
11581162 auto &HeaderNode = Loop.Nodes[H];
11591163 const BlockT *Block = getBlock(HeaderNode);
11601164 IsIrrLoopHeader.set(Loop.Nodes[H].Index);
11611165 Optional HeaderWeight = Block->getIrrLoopHeaderWeight();
1162 if (!HeaderWeight)
1166 if (!HeaderWeight) {
1167 DEBUG(dbgs() << "Missing irr loop header metadata on "
1168 << getBlockName(HeaderNode) << "\n");
1169 HeadersWithoutWeight.insert(H);
11631170 continue;
1171 }
11641172 DEBUG(dbgs() << getBlockName(HeaderNode)
11651173 << " has irr loop header weight " << HeaderWeight.getValue()
11661174 << "\n");
11671175 NumHeadersWithWeight++;
11681176 uint64_t HeaderWeightValue = HeaderWeight.getValue();
1169 if (HeaderWeightValue)
1177 if (!MinHeaderWeight || HeaderWeightValue < MinHeaderWeight)
1178 MinHeaderWeight = HeaderWeightValue;
1179 if (HeaderWeightValue) {
11701180 Dist.addLocal(HeaderNode, HeaderWeightValue);
1171 }
1172 if (NumHeadersWithWeight != Loop.NumHeaders) {
1173 // Not all headers have a weight metadata. Distribute weight evenly.
1174 Dist = Distribution();
1175 for (uint32_t H = 0; H < Loop.NumHeaders; ++H) {
1176 auto &HeaderNode = Loop.Nodes[H];
1177 Dist.addLocal(HeaderNode, 1);
11781181 }
1182 }
1183 // As a heuristic, if some headers don't have a weight, give them the
1184 // minimium weight seen (not to disrupt the existing trends too much by
1185 // using a weight that's in the general range of the other headers' weights,
1186 // and the minimum seems to perform better than the average.)
1187 // FIXME: better update in the passes that drop the header weight.
1188 // If no headers have a weight, give them even weight (use weight 1).
1189 if (!MinHeaderWeight)
1190 MinHeaderWeight = 1;
1191 for (uint32_t H : HeadersWithoutWeight) {
1192 auto &HeaderNode = Loop.Nodes[H];
1193 const BlockT *Block = getBlock(HeaderNode);
1194 assert(!Block->getIrrLoopHeaderWeight() &&
1195 "Shouldn't have a weight metadata");
1196 uint64_t MinWeight = MinHeaderWeight.getValue();
1197 DEBUG(dbgs() << "Giving weight " << MinWeight
1198 << " to " << getBlockName(HeaderNode) << "\n");
1199 if (MinWeight)
1200 Dist.addLocal(HeaderNode, MinWeight);
11791201 }
11801202 distributeIrrLoopHeaderMass(Dist);
11811203 for (const BlockNode &M : Loop.Nodes)
11821204 if (!propagateMassToSuccessors(&Loop, M))
11831205 llvm_unreachable("unhandled irreducible control flow");
1184 if (NumHeadersWithWeight != Loop.NumHeaders)
1185 // Not all headers have a weight metadata. Adjust header mass.
1206 if (NumHeadersWithWeight == 0)
1207 // No headers have a metadata. Adjust header mass.
11861208 adjustLoopHeaderMass(Loop);
11871209 } else {
11881210 Working[Loop.getHeader().Index].getMass() = BlockMass::getFull();
11871187 }
11881188 }
11891189
1190 static bool isIndirectBrTarget(BasicBlock *BB) {
1191 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
1192 if (isa((*PI)->getTerminator()))
1193 return true;
1194 }
1195 return false;
1196 }
1197
11901198 void PGOUseFunc::annotateIrrLoopHeaderWeights() {
11911199 DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n");
11921200 // Find irr loop headers
11931201 for (auto &BB : F) {
1194 if (BFI->isIrrLoopHeader(&BB)) {
1202 // As a heuristic also annotate indrectbr targets as they have a high chance
1203 // to become an irreducible loop header after the indirectbr tail
1204 // duplication.
1205 if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) {
11951206 TerminatorInst *TI = BB.getTerminator();
11961207 const UseBBInfo &BBCountInfo = getBBInfo(&BB);
11971208 setIrrLoopHeaderMetadata(M, TI, BBCountInfo.CountValue);
158158 ; CHECK-NEXT: - sw.default: {{.*}} count = 0
159159 ; CHECK-NEXT: - exit: {{.*}} count = 1
160160 ; CHECK-NEXT: - indirectgoto: {{.*}} count = 399, irr_loop_header_weight = 400
161
162 ; Missing some irr loop annotations.
163 ; Function Attrs: noinline norecurse nounwind uwtable
164 define i32 @_Z11irreduciblePh2(i8* nocapture readonly %p) !prof !27 {
165 entry:
166 %0 = load i32, i32* @tracing, align 4
167 %1 = trunc i32 %0 to i8
168 %tobool = icmp eq i32 %0, 0
169 br label %for.cond1
170
171 for.cond1: ; preds = %sw.default, %entry
172 br label %dispatch_op
173
174 dispatch_op: ; preds = %sw.bb6, %for.cond1
175 switch i8 %1, label %sw.default [
176 i8 0, label %sw.bb
177 i8 1, label %dispatch_op.sw.bb6_crit_edge
178 i8 2, label %sw.bb15
179 ], !prof !36
180
181 dispatch_op.sw.bb6_crit_edge: ; preds = %dispatch_op
182 br label %sw.bb6
183
184 sw.bb: ; preds = %indirectgoto, %dispatch_op
185 br label %exit
186
187 TARGET_1: ; preds = %indirectgoto
188 br label %sw.bb6
189
190 sw.bb6: ; preds = %TARGET_1, %dispatch_op.sw.bb6_crit_edge
191 br i1 %tobool, label %dispatch_op, label %if.then, !prof !37 ; Missing !irr_loop !38
192
193 if.then: ; preds = %sw.bb6
194 br label %indirectgoto
195
196 TARGET_2: ; preds = %indirectgoto
197 br label %sw.bb15
198
199 sw.bb15: ; preds = %TARGET_2, %dispatch_op
200 br i1 %tobool, label %if.then18, label %exit, !prof !39, !irr_loop !40
201
202 if.then18: ; preds = %sw.bb15
203 br label %indirectgoto
204
205 unknown_op: ; preds = %indirectgoto
206 br label %sw.default
207
208 sw.default: ; preds = %unknown_op, %dispatch_op
209 br label %for.cond1
210
211 exit: ; preds = %sw.bb15, %sw.bb
212 ret i32 0
213
214 indirectgoto: ; preds = %if.then18, %if.then
215 %idxprom21 = zext i32 %0 to i64
216 %arrayidx22 = getelementptr inbounds [256 x i8*], [256 x i8*]* @targets, i64 0, i64 %idxprom21
217 %target = load i8*, i8** %arrayidx22, align 8
218 indirectbr i8* %target, [label %unknown_op, label %sw.bb, label %TARGET_1, label %TARGET_2], !prof !41, !irr_loop !42
219 }
220
221 ; CHECK-LABEL: Printing analysis {{.*}} for function '_Z11irreduciblePh2':
222 ; CHECK: block-frequency-info: _Z11irreduciblePh2
223 ; CHECK: - sw.bb6: {{.*}} count = 100
224 ; CHECK: - sw.bb15: {{.*}} count = 100, irr_loop_header_weight = 100
225 ; CHECK: - indirectgoto: {{.*}} count = 400, irr_loop_header_weight = 400
9090
9191 TARGET_1: ; preds = %indirectgoto
9292 br label %sw.bb6
93 ; USE: br label %sw.bb6, !irr_loop {{.*}}
9394
9495 sw.bb6: ; preds = %TARGET_1, %dispatch_op.sw.bb6_crit_edge
9596 br i1 %tobool, label %dispatch_op, label %if.then
101102
102103 TARGET_2: ; preds = %indirectgoto
103104 br label %sw.bb15
105 ; USE: br label %sw.bb15, !irr_loop {{.*}}
104106
105107 sw.bb15: ; preds = %TARGET_2, %dispatch_op
106108 br i1 %tobool, label %if.then18, label %exit