llvm.org GIT mirror llvm / 1eba875
[SCEV] Smart range calculation for SCEVUnknown Phis The range of SCEVUnknown Phi which merges values `X1, X2, ..., XN` can be evaluated as `U(Range(X1), Range(X2), ..., Range(XN))`. Reviewed By: sanjoy Differential Revision: https://reviews.llvm.org/D43810 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@326418 91177308-0d34-0410-b5e6-96231b3b80d8 Max Kazantsev 2 years ago
5 changed file(s) with 187 addition(s) and 3 deletion(s). Raw diff Collapse all Expand all
10981098 /// Mark predicate values currently being processed by isImpliedCond.
10991099 SmallPtrSet PendingLoopPredicates;
11001100
1101 /// Mark SCEVUnknown Phis currently being processed by getRangeRef.
1102 SmallPtrSet PendingPhiRanges;
1103
11011104 /// Set to true by isLoopBackedgeGuardedByCond when we're walking the set of
11021105 /// conditions dominating the backedge of a loop.
11031106 bool WalkingBEDominatingConds = false;
55815581 APInt::getSignedMaxValue(BitWidth).ashr(NS - 1) + 1));
55825582 }
55835583
5584 // A range of Phi is a subset of union of all ranges of its input.
5585 if (const PHINode *Phi = dyn_cast(U->getValue())) {
5586 // Make sure that we do not run over cycled Phis.
5587 if (PendingPhiRanges.insert(Phi).second) {
5588 ConstantRange RangeFromOps(BitWidth, /*isFullSet=*/false);
5589 for (auto &Op : Phi->operands()) {
5590 auto OpRange = getRangeRef(getSCEV(Op), SignHint);
5591 RangeFromOps = RangeFromOps.unionWith(OpRange);
5592 // No point to continue if we already have a full set.
5593 if (RangeFromOps.isFullSet())
5594 break;
5595 }
5596 ConservativeResult = ConservativeResult.intersectWith(RangeFromOps);
5597 bool Erased = PendingPhiRanges.erase(Phi);
5598 assert(Erased && "Failed to erase Phi properly?");
5599 (void) Erased;
5600 }
5601 }
5602
55845603 return setRange(U, SignHint, std::move(ConservativeResult));
55855604 }
55865605
1088710906 LI(Arg.LI), CouldNotCompute(std::move(Arg.CouldNotCompute)),
1088810907 ValueExprMap(std::move(Arg.ValueExprMap)),
1088910908 PendingLoopPredicates(std::move(Arg.PendingLoopPredicates)),
10909 PendingPhiRanges(std::move(Arg.PendingPhiRanges)),
1089010910 MinTrailingZerosCache(std::move(Arg.MinTrailingZerosCache)),
1089110911 BackedgeTakenCounts(std::move(Arg.BackedgeTakenCounts)),
1089210912 PredicatedBackedgeTakenCounts(
1093010950 BTCI.second.clear();
1093110951
1093210952 assert(PendingLoopPredicates.empty() && "isImpliedCond garbage");
10953 assert(PendingPhiRanges.empty() && "getRangeRef garbage");
1093310954 assert(!WalkingBEDominatingConds && "isLoopBackedgeGuardedByCond garbage!");
1093410955 assert(!ProvingSplitPredicate && "ProvingSplitPredicate garbage!");
1093510956 }
0 ; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
1
2 define void @merge_values_with_ranges(i32 *%a_len_ptr, i32 *%b_len_ptr, i1 %unknown_cond) {
3
4 ; CHECK-LABEL: Classifying expressions for: @merge_values_with_ranges
5 ; CHECK: %len = phi i32 [ %len_a, %if.true ], [ %len_b, %if.false ]
6 ; CHECK-NEXT: --> %len U: [0,2147483647) S: [0,2147483647)
7
8 entry:
9 br i1 %unknown_cond, label %if.true, label %if.false
10
11 if.true:
12 %len_a = load i32, i32* %a_len_ptr, !range !0
13 br label %merge
14
15 if.false:
16 %len_b = load i32, i32* %b_len_ptr, !range !0
17 br label %merge
18
19 merge:
20 %len = phi i32 [ %len_a, %if.true ], [ %len_b, %if.false ]
21 ret void
22 }
23
24 define void @merge_values_with_ranges_looped(i32 *%a_len_ptr, i32 *%b_len_ptr) {
25
26 ; TODO: We could be much smarter here. So far we just make sure that we do not
27 ; go into infinite loop analyzing these Phis.
28
29 ; CHECK-LABEL: Classifying expressions for: @merge_values_with_ranges_looped
30 ; CHECK: %p1 = phi i32 [ %len_a, %entry ], [ %p2, %loop ]
31 ; CHECK-NEXT: --> %p1 U: full-set S: full-set
32 ; CHECK: %p2 = phi i32 [ %len_b, %entry ], [ %p1, %loop ]
33 ; CHECK-NEXT: --> %p2 U: full-set S: full-set
34
35 entry:
36 %len_a = load i32, i32* %a_len_ptr, !range !0
37 %len_b = load i32, i32* %b_len_ptr, !range !0
38 br label %loop
39
40 loop:
41 %p1 = phi i32 [ %len_a, %entry ], [ %p2, %loop ]
42 %p2 = phi i32 [ %len_b, %entry ], [ %p1, %loop ]
43 %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
44 %iv.next = add i32 %iv, 1
45 %loop.cond = icmp slt i32 %iv.next, 100
46 br i1 %loop.cond, label %loop, label %exit
47
48 exit:
49 ret void
50 }
51
52
53 !0 = !{i32 0, i32 2147483647}
178178 ; CHECK-NOT: br i1 false
179179 ; CHECK-NOT: preloop
180180
181 define void @single_access_no_preloop_no_offset_phi_len(i32 *%arr, i32 *%a_len_ptr, i32 *%b_len_ptr, i32 %n, i1 %unknown_cond) {
182 entry:
183 br i1 %unknown_cond, label %if.true, label %if.false
184
185 if.true:
186 %len_a = load i32, i32* %a_len_ptr, !range !0
187 br label %merge
188
189 if.false:
190 %len_b = load i32, i32* %b_len_ptr, !range !0
191 br label %merge
192
193 merge:
194 %len = phi i32 [ %len_a, %if.true ], [ %len_b, %if.false ]
195 %first.itr.check = icmp sgt i32 %n, 0
196 br i1 %first.itr.check, label %loop, label %exit
197
198 loop:
199 %idx = phi i32 [ 0, %merge ] , [ %idx.next, %in.bounds ]
200 %idx.next = add i32 %idx, 1
201 %abc = icmp slt i32 %idx, %len
202 br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !1
203
204 in.bounds:
205 %addr = getelementptr i32, i32* %arr, i32 %idx
206 store i32 0, i32* %addr
207 %next = icmp slt i32 %idx.next, %n
208 br i1 %next, label %loop, label %exit
209
210 out.of.bounds:
211 ret void
212
213 exit:
214 ret void
215 }
216
217 ; CHECK-LABEL: @single_access_no_preloop_no_offset_phi_len(
218
219 ; CHECK: loop:
220 ; CHECK: br i1 true, label %in.bounds, label %out.of.bounds
221
222 ; CHECK: main.exit.selector:
223 ; CHECK-NEXT: %idx.next.lcssa = phi i32 [ %idx.next, %in.bounds ]
224 ; CHECK-NEXT: [[continue:%[^ ]+]] = icmp slt i32 %idx.next.lcssa, %n
225 ; CHECK-NEXT: br i1 [[continue]], label %main.pseudo.exit, label %exit.loopexit
226
227 ; CHECK: main.pseudo.exit:
228 ; CHECK-NEXT: %idx.copy = phi i32 [ 0, %loop.preheader ], [ %idx.next.lcssa, %main.exit.selector ]
229 ; CHECK-NEXT: %indvar.end = phi i32 [ 0, %loop.preheader ], [ %idx.next.lcssa, %main.exit.selector ]
230 ; CHECK-NEXT: br label %postloop
231
232 ; CHECK: postloop:
233 ; CHECK-NEXT: br label %loop.postloop
234
235 ; CHECK: loop.postloop:
236 ; CHECK-NEXT: %idx.postloop = phi i32 [ %idx.next.postloop, %in.bounds.postloop ], [ %idx.copy, %postloop ]
237 ; CHECK-NEXT: %idx.next.postloop = add i32 %idx.postloop, 1
238 ; CHECK-NEXT: %abc.postloop = icmp slt i32 %idx.postloop, %len
239 ; CHECK-NEXT: br i1 %abc.postloop, label %in.bounds.postloop, label %out.of.bounds
240
241 ; CHECK: in.bounds.postloop:
242 ; CHECK-NEXT: %addr.postloop = getelementptr i32, i32* %arr, i32 %idx.postloop
243 ; CHECK-NEXT: store i32 0, i32* %addr.postloop
244 ; CHECK-NEXT: %next.postloop = icmp slt i32 %idx.next.postloop, %n
245 ; CHECK-NEXT: br i1 %next.postloop, label %loop.postloop, label %exit.loopexit
246
181247 !0 = !{i32 0, i32 2147483647}
182248 !1 = !{!"branch_weights", i32 64, i32 4}
2828
2929 ; Verify that store is vectorized as stride-1 memory access.
3030
31 ; CHECK: vector.body:
32 ; CHECK: store <4 x i32>
31 ; CHECK-LABEL: @test_01(
32 ; CHECK-NOT: vector.body:
3333
34 ; This test was originally vectorized, but now SCEV is smart enough to prove
35 ; that its trip count is 1, so it gets ignored by vectorizer.
3436 ; Function Attrs: uwtable
35 define void @test() {
37 define void @test_01() {
3638 br label %.outer
3739
3840 ;
6567 br i1 undef, label %2, label %._crit_edge.loopexit
6668 }
6769
70 ; After trip count is increased, the test gets vectorized.
71 ; CHECK-LABEL: @test_02(
72 ; CHECK: vector.body:
73 ; CHECK: store <4 x i32>
74
75 ; Function Attrs: uwtable
76 define void @test_02() {
77 br label %.outer
78
79 ;
80 ret void
81
82 ;
83 %3 = add nsw i32 %.ph, -2
84 br i1 undef, label %1, label %.outer
85
86 .outer: ; preds = %2, %0
87 %.ph = phi i32 [ %3, %2 ], [ 336, %0 ]
88 %.ph2 = phi i32 [ 62, %2 ], [ 110, %0 ]
89 %4 = and i32 %.ph, 30
90 %5 = add i32 %.ph2, 1
91 br label %6
92
93 ;
94 %7 = phi i32 [ %5, %.outer ], [ %13, %6 ]
95 %8 = phi i32 [ %.ph2, %.outer ], [ %7, %6 ]
96 %9 = add i32 %8, 2
97 %10 = zext i32 %9 to i64
98 %11 = getelementptr inbounds i32, i32 addrspace(1)* undef, i64 %10
99 %12 = ashr i32 undef, %4
100 store i32 %12, i32 addrspace(1)* %11, align 4
101 %13 = add i32 %7, 1
102 %14 = icmp sgt i32 %13, 610
103 br i1 %14, label %._crit_edge.loopexit, label %6
104
105 ._crit_edge.loopexit: ; preds = %._crit_edge.loopexit, %6
106 br i1 undef, label %2, label %._crit_edge.loopexit
107 }