llvm.org GIT mirror llvm / ded959f
[LoopPredication] Add profitability check based on BPI Summary: LoopPredication is not profitable when the loop is known to always exit through some block other than the latch block. A coarse grained latch check can cause loop predication to predicate the loop, and unconditionally deoptimize. However, without predicating the loop, the guard may never fail within the loop during the dynamic execution because the non-latch loop termination condition exits the loop before the latch condition causes the loop to exit. We teach LP about this using BranchProfileInfo pass. Reviewers: apilipenko, skatkov, mkazantsev, reames Reviewed by: skatkov Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D44667 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@328210 91177308-0d34-0410-b5e6-96231b3b80d8 Anna Thomas 1 year, 5 months ago
2 changed file(s) with 212 addition(s) and 3 deletion(s). Raw diff Collapse all Expand all
177177 //===----------------------------------------------------------------------===//
178178
179179 #include "llvm/Transforms/Scalar/LoopPredication.h"
180 #include "llvm/Analysis/BranchProbabilityInfo.h"
180181 #include "llvm/Analysis/LoopInfo.h"
181182 #include "llvm/Analysis/LoopPass.h"
182183 #include "llvm/Analysis/ScalarEvolution.h"
201202
202203 static cl::opt EnableCountDownLoop("loop-predication-enable-count-down-loop",
203204 cl::Hidden, cl::init(true));
205
206 static cl::opt
207 SkipProfitabilityChecks("loop-predication-skip-profitability-checks",
208 cl::Hidden, cl::init(false));
209
210 // This is the scale factor for the latch probability. We use this during
211 // profitability analysis to find other exiting blocks that have a much higher
212 // probability of exiting the loop instead of loop exiting via latch.
213 // This value should be greater than 1 for a sane profitability check.
214 static cl::opt LatchExitProbabilityScale(
215 "loop-predication-latch-probability-scale", cl::Hidden, cl::init(2.0),
216 cl::desc("scale factor for the latch probability. Value should be greater "
217 "than 1. Lower values are ignored"));
218
204219 namespace {
205220 class LoopPredication {
206221 /// Represents an induction variable check:
220235 };
221236
222237 ScalarEvolution *SE;
238 BranchProbabilityInfo *BPI;
223239
224240 Loop *L;
225241 const DataLayout *DL;
252268 SCEVExpander &Expander,
253269 IRBuilder<> &Builder);
254270 bool widenGuardConditions(IntrinsicInst *II, SCEVExpander &Expander);
271
272 // If the loop always exits through another block in the loop, we should not
273 // predicate based on the latch check. For example, the latch check can be a
274 // very coarse grained check and there can be more fine grained exit checks
275 // within the loop. We identify such unprofitable loops through BPI.
276 bool isLoopProfitableToPredicate();
255277
256278 // When the IV type is wider than the range operand type, we can still do loop
257279 // predication, by generating SCEVs for the range and latch that are of the
271293 Optional generateLoopLatchCheck(Type *RangeCheckType);
272294
273295 public:
274 LoopPredication(ScalarEvolution *SE) : SE(SE){};
296 LoopPredication(ScalarEvolution *SE, BranchProbabilityInfo *BPI)
297 : SE(SE), BPI(BPI){};
275298 bool runOnLoop(Loop *L);
276299 };
277300
283306 }
284307
285308 void getAnalysisUsage(AnalysisUsage &AU) const override {
309 AU.addRequired();
286310 getLoopAnalysisUsage(AU);
287311 }
288312
290314 if (skipLoop(L))
291315 return false;
292316 auto *SE = &getAnalysis().getSE();
293 LoopPredication LP(SE);
317 BranchProbabilityInfo &BPI =
318 getAnalysis().getBPI();
319 LoopPredication LP(SE, &BPI);
294320 return LP.runOnLoop(L);
295321 }
296322 };
300326
301327 INITIALIZE_PASS_BEGIN(LoopPredicationLegacyPass, "loop-predication",
302328 "Loop predication", false, false)
329 INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
303330 INITIALIZE_PASS_DEPENDENCY(LoopPass)
304331 INITIALIZE_PASS_END(LoopPredicationLegacyPass, "loop-predication",
305332 "Loop predication", false, false)
311338 PreservedAnalyses LoopPredicationPass::run(Loop &L, LoopAnalysisManager &AM,
312339 LoopStandardAnalysisResults &AR,
313340 LPMUpdater &U) {
314 LoopPredication LP(&AR.SE);
341 const auto &FAM =
342 AM.getResult(L, AR).getManager();
343 Function *F = L.getHeader()->getParent();
344 auto *BPI = FAM.getCachedResult(*F);
345 LoopPredication LP(&AR.SE, BPI);
315346 if (!LP.runOnLoop(&L))
316347 return PreservedAnalyses::all();
317348
689720 Limit->getAPInt().getActiveBits() < RangeCheckTypeBitSize;
690721 }
691722
723 bool LoopPredication::isLoopProfitableToPredicate() {
724 if (SkipProfitabilityChecks || !BPI)
725 return true;
726
727 SmallVector, 8> ExitEdges;
728 L->getExitEdges(ExitEdges);
729 // If there is only one exiting edge in the loop, it is always profitable to
730 // predicate the loop.
731 if (ExitEdges.size() == 1)
732 return true;
733
734 // Calculate the exiting probabilities of all exiting edges from the loop,
735 // starting with the LatchExitProbability.
736 // Heuristic for profitability: If any of the exiting blocks' probability of
737 // exiting the loop is larger than exiting through the latch block, it's not
738 // profitable to predicate the loop.
739 auto *LatchBlock = L->getLoopLatch();
740 assert(LatchBlock && "Should have a single latch at this point!");
741 auto *LatchTerm = LatchBlock->getTerminator();
742 assert(LatchTerm->getNumSuccessors() == 2 &&
743 "expected to be an exiting block with 2 succs!");
744 unsigned LatchBrExitIdx =
745 LatchTerm->getSuccessor(0) == L->getHeader() ? 1 : 0;
746 BranchProbability LatchExitProbability =
747 BPI->getEdgeProbability(LatchBlock, LatchBrExitIdx);
748
749 // Protect against degenerate inputs provided by the user. Providing a value
750 // less than one, can invert the definition of profitable loop predication.
751 float ScaleFactor = LatchExitProbabilityScale;
752 if (ScaleFactor < 1) {
753 DEBUG(
754 dbgs()
755 << "Ignored user setting for loop-predication-latch-probability-scale: "
756 << LatchExitProbabilityScale << "\n");
757 DEBUG(dbgs() << "The value is set to 1.0\n");
758 ScaleFactor = 1.0;
759 }
760 const auto LatchProbabilityThreshold =
761 LatchExitProbability * ScaleFactor;
762
763 for (const auto &ExitEdge : ExitEdges) {
764 BranchProbability ExitingBlockProbability =
765 BPI->getEdgeProbability(ExitEdge.first, ExitEdge.second);
766 // Some exiting edge has higher probability than the latch exiting edge.
767 // No longer profitable to predicate.
768 if (ExitingBlockProbability > LatchProbabilityThreshold)
769 return false;
770 }
771 // Using BPI, we have concluded that the most probable way to exit from the
772 // loop is through the latch (or there's no profile information and all
773 // exits are equally likely).
774 return true;
775 }
776
692777 bool LoopPredication::runOnLoop(Loop *Loop) {
693778 L = Loop;
694779
717802 DEBUG(dbgs() << "Latch check:\n");
718803 DEBUG(LatchCheck.dump());
719804
805 if (!isLoopProfitableToPredicate()) {
806 DEBUG(dbgs()<< "Loop not profitable to predicate!\n");
807 return false;
808 }
720809 // Collect all the guards into a vector and process later, so as not
721810 // to invalidate the instruction iterator.
722811 SmallVector Guards;
0 ; RUN: opt -S -loop-predication -loop-predication-skip-profitability-checks=false < %s 2>&1 | FileCheck %s
1 ; RUN: opt -S -loop-predication-skip-profitability-checks=false -passes='require,require,loop(loop-predication)' < %s 2>&1 | FileCheck %s
2
3 ; latch block exits to a speculation block. BPI already knows (without prof
4 ; data) that deopt is very rarely
5 ; taken. So we do not predicate this loop using that coarse latch check.
6 ; LatchExitProbability: 0x04000000 / 0x80000000 = 3.12%
7 ; ExitingBlockProbability: 0x7ffa572a / 0x80000000 = 99.98%
8 define i64 @donot_predicate(i64* nocapture readonly %arg, i32 %length, i64* nocapture readonly %arg2, i64* nocapture readonly %n_addr, i64 %i) {
9 ; CHECK-LABEL: donot_predicate(
10 entry:
11 %length.ext = zext i32 %length to i64
12 %n.pre = load i64, i64* %n_addr, align 4
13 br label %Header
14
15 ; CHECK-LABEL: Header:
16 ; CHECK: %within.bounds = icmp ult i64 %j2, %length.ext
17 ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9)
18 Header: ; preds = %entry, %Latch
19 %result.in3 = phi i64* [ %arg2, %entry ], [ %arg, %Latch ]
20 %j2 = phi i64 [ 0, %entry ], [ %j.next, %Latch ]
21 %within.bounds = icmp ult i64 %j2, %length.ext
22 call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
23 %innercmp = icmp eq i64 %j2, %n.pre
24 %j.next = add nuw nsw i64 %j2, 1
25 br i1 %innercmp, label %Latch, label %exit, !prof !0
26
27 Latch: ; preds = %Header
28 %speculate_trip_count = icmp ult i64 %j.next, 1048576
29 br i1 %speculate_trip_count, label %Header, label %deopt
30
31 deopt: ; preds = %Latch
32 %counted_speculation_failed = call i64 (...) @llvm.experimental.deoptimize.i64(i64 30) [ "deopt"(i32 0) ]
33 ret i64 %counted_speculation_failed
34
35 exit: ; preds = %Header
36 %result.in3.lcssa = phi i64* [ %result.in3, %Header ]
37 %result.le = load i64, i64* %result.in3.lcssa, align 8
38 ret i64 %result.le
39 }
40 !0 = !{!"branch_weights", i32 18, i32 104200}
41
42 ; predicate loop since there's no profile information and BPI concluded all
43 ; exiting blocks have same probability of exiting from loop.
44 define i64 @predicate(i64* nocapture readonly %arg, i32 %length, i64* nocapture readonly %arg2, i64* nocapture readonly %n_addr, i64 %i) {
45 ; CHECK-LABEL: predicate(
46 ; CHECK-LABEL: entry:
47 ; CHECK: [[limit_check:[^ ]+]] = icmp ule i64 1048576, %length.ext
48 ; CHECK-NEXT: [[first_iteration_check:[^ ]+]] = icmp ult i64 0, %length.ext
49 ; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]]
50 entry:
51 %length.ext = zext i32 %length to i64
52 %n.pre = load i64, i64* %n_addr, align 4
53 br label %Header
54
55 ; CHECK-LABEL: Header:
56 ; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
57 Header: ; preds = %entry, %Latch
58 %result.in3 = phi i64* [ %arg2, %entry ], [ %arg, %Latch ]
59 %j2 = phi i64 [ 0, %entry ], [ %j.next, %Latch ]
60 %within.bounds = icmp ult i64 %j2, %length.ext
61 call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
62 %innercmp = icmp eq i64 %j2, %n.pre
63 %j.next = add nuw nsw i64 %j2, 1
64 br i1 %innercmp, label %Latch, label %exit
65
66 Latch: ; preds = %Header
67 %speculate_trip_count = icmp ult i64 %j.next, 1048576
68 br i1 %speculate_trip_count, label %Header, label %exitLatch
69
70 exitLatch: ; preds = %Latch
71 ret i64 1
72
73 exit: ; preds = %Header
74 %result.in3.lcssa = phi i64* [ %result.in3, %Header ]
75 %result.le = load i64, i64* %result.in3.lcssa, align 8
76 ret i64 %result.le
77 }
78
79 ; Same as test above but with profiling data that the most probable exit from
80 ; the loop is the header exiting block (not the latch block). So do not predicate.
81 ; LatchExitProbability: 0x000020e1 / 0x80000000 = 0.00%
82 ; ExitingBlockProbability: 0x7ffcbb86 / 0x80000000 = 99.99%
83 define i64 @donot_predicate_prof(i64* nocapture readonly %arg, i32 %length, i64* nocapture readonly %arg2, i64* nocapture readonly %n_addr, i64 %i) {
84 ; CHECK-LABEL: donot_predicate_prof(
85 ; CHECK-LABEL: entry:
86 entry:
87 %length.ext = zext i32 %length to i64
88 %n.pre = load i64, i64* %n_addr, align 4
89 br label %Header
90
91 ; CHECK-LABEL: Header:
92 ; CHECK: %within.bounds = icmp ult i64 %j2, %length.ext
93 ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9)
94 Header: ; preds = %entry, %Latch
95 %result.in3 = phi i64* [ %arg2, %entry ], [ %arg, %Latch ]
96 %j2 = phi i64 [ 0, %entry ], [ %j.next, %Latch ]
97 %within.bounds = icmp ult i64 %j2, %length.ext
98 call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
99 %innercmp = icmp eq i64 %j2, %n.pre
100 %j.next = add nuw nsw i64 %j2, 1
101 br i1 %innercmp, label %Latch, label %exit, !prof !1
102
103 Latch: ; preds = %Header
104 %speculate_trip_count = icmp ult i64 %j.next, 1048576
105 br i1 %speculate_trip_count, label %Header, label %exitLatch, !prof !2
106
107 exitLatch: ; preds = %Latch
108 ret i64 1
109
110 exit: ; preds = %Header
111 %result.in3.lcssa = phi i64* [ %result.in3, %Header ]
112 %result.le = load i64, i64* %result.in3.lcssa, align 8
113 ret i64 %result.le
114 }
115 declare i64 @llvm.experimental.deoptimize.i64(...)
116 declare void @llvm.experimental.guard(i1, ...)
117
118 !1 = !{!"branch_weights", i32 104, i32 1042861}
119 !2 = !{!"branch_weights", i32 255129, i32 1}