llvm.org GIT mirror llvm / 96c95e6
Re-enable "[SCEV] Do not fold dominated SCEVUnknown into AddRecExpr start" The patch rL303730 was reverted because test lsr-expand-quadratic.ll failed on many non-X86 configs with this patch. The reason of this is that the patch makes a correctless fix that changes optimizer's behavior for this test. Without the change, LSR was making an overconfident simplification basing on a wrong SCEV. Apparently it did not need the IV analysis to do this. With the change, it chose a different way to simplify (that wasn't so confident), and this way required the IV analysis. Now, following the right execution path, LSR tries to make a transformation relying on IV Users analysis. This analysis is target-dependent due to this code: // LSR is not APInt clean, do not touch integers bigger than 64-bits. // Also avoid creating IVs of non-native types. For example, we don't want a // 64-bit IV in 32-bit code just because the loop has one 64-bit cast. uint64_t Width = SE->getTypeSizeInBits(I->getType()); if (Width > 64 || !DL.isLegalInteger(Width)) return false; To make a proper transformation in this test case, the type i32 needs to be legal for the specified data layout. When the test runs on some non-X86 configuration (e.g. pure ARM 64), opt gets confused by the specified target and does not use it, rejecting the specified data layout as well. Instead, it uses some default layout that does not treat i32 as a legal type (currently the layout that is used when it is not specified does not have legal types at all). As result, the transformation we expect to happen does not happen for this test. This re-enabling patch does not have any source code changes compared to the original patch rL303730. The only difference is that the failing test is moved to X86 directory and now has requirement of running on x86 only to comply with the specified target triple and data layout. Differential Revision: https://reviews.llvm.org/D33543 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@303971 91177308-0d34-0410-b5e6-96231b3b80d8 Max Kazantsev 2 years ago
8 changed file(s) with 226 addition(s) and 57 deletion(s). Raw diff Collapse all Expand all
15321532 /// specified loop.
15331533 bool isLoopInvariant(const SCEV *S, const Loop *L);
15341534
1535 /// Determine if the SCEV can be evaluated at loop's entry. It is true if it
1536 /// doesn't depend on a SCEVUnknown of an instruction which is dominated by
1537 /// the header of loop L.
1538 bool isAvailableAtLoopEntry(const SCEV *S, const Loop *L, DominatorTree &DT,
1539 LoopInfo &LI);
1540
15351541 /// Return true if the given SCEV changes value in a known way in the
15361542 /// specified loop. This property being true implies that the value is
15371543 /// variant in the loop AND that we can emit an expression to compute the
21772177 return Flags;
21782178 }
21792179
2180 bool ScalarEvolution::isAvailableAtLoopEntry(const SCEV *S, const Loop *L,
2181 DominatorTree &DT, LoopInfo &LI) {
2182 if (!isLoopInvariant(S, L))
2183 return false;
2184 // If a value depends on a SCEVUnknown which is defined after the loop, we
2185 // conservatively assume that we cannot calculate it at the loop's entry.
2186 struct FindDominatedSCEVUnknown {
2187 bool Found = false;
2188 const Loop *L;
2189 DominatorTree &DT;
2190 LoopInfo &LI;
2191
2192 FindDominatedSCEVUnknown(const Loop *L, DominatorTree &DT, LoopInfo &LI)
2193 : L(L), DT(DT), LI(LI) {}
2194
2195 bool checkSCEVUnknown(const SCEVUnknown *SU) {
2196 if (auto *I = dyn_cast(SU->getValue())) {
2197 if (DT.dominates(L->getHeader(), I->getParent()))
2198 Found = true;
2199 else
2200 assert(DT.dominates(I->getParent(), L->getHeader()) &&
2201 "No dominance relationship between SCEV and loop?");
2202 }
2203 return false;
2204 }
2205
2206 bool follow(const SCEV *S) {
2207 switch (static_cast(S->getSCEVType())) {
2208 case scConstant:
2209 return false;
2210 case scAddRecExpr:
2211 case scTruncate:
2212 case scZeroExtend:
2213 case scSignExtend:
2214 case scAddExpr:
2215 case scMulExpr:
2216 case scUMaxExpr:
2217 case scSMaxExpr:
2218 case scUDivExpr:
2219 return true;
2220 case scUnknown:
2221 return checkSCEVUnknown(cast(S));
2222 case scCouldNotCompute:
2223 llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
2224 }
2225 return false;
2226 }
2227
2228 bool isDone() { return Found; }
2229 };
2230
2231 FindDominatedSCEVUnknown FSU(L, DT, LI);
2232 SCEVTraversal ST(FSU);
2233 ST.visitAll(S);
2234 return !FSU.Found;
2235 }
2236
21802237 /// Get a canonical add expression, or something simpler if possible.
21812238 const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops,
21822239 SCEV::NoWrapFlags Flags,
24582515 const SCEVAddRecExpr *AddRec = cast(Ops[Idx]);
24592516 const Loop *AddRecLoop = AddRec->getLoop();
24602517 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
2461 if (isLoopInvariant(Ops[i], AddRecLoop)) {
2518 if (isAvailableAtLoopEntry(Ops[i], AddRecLoop, DT, LI)) {
24622519 LIOps.push_back(Ops[i]);
24632520 Ops.erase(Ops.begin()+i);
24642521 --i; --e;
27332790 const SCEVAddRecExpr *AddRec = cast(Ops[Idx]);
27342791 const Loop *AddRecLoop = AddRec->getLoop();
27352792 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
2736 if (isLoopInvariant(Ops[i], AddRecLoop)) {
2793 if (isAvailableAtLoopEntry(Ops[i], AddRecLoop, DT, LI)) {
27372794 LIOps.push_back(Ops[i]);
27382795 Ops.erase(Ops.begin()+i);
27392796 --i; --e;
2929 ret i64 %r
3030 }
3131
32 ; PR15470: LSR miscompile. The test1 function should return '1'.
33 ; It is valid to fold SCEVUnknown into the recurrence because it
34 ; was defined before the loop.
35 ;
36 ; SCEV does not know how to denormalize chained recurrences, so make
37 ; sure they aren't marked as post-inc users.
38 ;
39 ; CHECK-LABEL: IV Users for loop %test1.loop
40 ; CHECK-NO-LCSSA: %sext.us = {0,+,(16777216 + (-16777216 * %sub.us)),+,33554432}<%test1.loop> (post-inc with loop %test1.loop) in %f = ashr i32 %sext.us, 24
41 define i32 @test1(i1 %cond) {
42 entry:
43 %sub.us = select i1 %cond, i32 0, i32 0
44 br label %test1.loop
45
46 test1.loop:
47 %inc1115.us = phi i32 [ 0, %entry ], [ %inc11.us, %test1.loop ]
48 %inc11.us = add nsw i32 %inc1115.us, 1
49 %cmp.us = icmp slt i32 %inc11.us, 2
50 br i1 %cmp.us, label %test1.loop, label %for.end
51
52 for.end:
53 %tobool.us = icmp eq i32 %inc1115.us, 0
54 %mul.us = shl i32 %inc1115.us, 24
55 %sub.cond.us = sub nsw i32 %inc1115.us, %sub.us
56 %sext.us = mul i32 %mul.us, %sub.cond.us
57 %f = ashr i32 %sext.us, 24
58 br label %exit
59
60 exit:
61 ret i32 %f
62 }
63
3264 ; PR15470: LSR miscompile. The test2 function should return '1'.
65 ; It is illegal to fold SCEVUnknown (sext.us) into the recurrence
66 ; because it is defined after the loop where this recurrence belongs.
3367 ;
3468 ; SCEV does not know how to denormalize chained recurrences, so make
3569 ; sure they aren't marked as post-inc users.
3670 ;
3771 ; CHECK-LABEL: IV Users for loop %test2.loop
38 ; CHECK-NO-LCSSA: %sext.us = {0,+,(16777216 + (-16777216 * %sub.us)),+,33554432}<%test2.loop> (post-inc with loop %test2.loop) in %f = ashr i32 %sext.us, 24
72 ; CHECK-NO-LCSSA: %sub.cond.us = ((-1 * %sub.us) + {0,+,1}<%test2.loop>) (post-inc with loop %test2.loop) in %sext.us = mul i32 %mul.us, %sub.cond.us
3973 define i32 @test2() {
4074 entry:
4175 br label %test2.loop
219219
220220 ; Mix of previous use cases that demonstrates %s3 can be incorrectly treated as
221221 ; a recurrence of loop1 because of operands order if we pick recurrencies in an
222 ; incorrect order.
222 ; incorrect order. It also shows that we cannot safely fold v1 (SCEVUnknown)
223 ; because we cannot prove for sure that it doesn't use Phis of loop 2.
223224
224225 define void @test_03(i32 %a, i32 %b, i32 %c, i32* %p) {
225226
227228 ; CHECK: %v1 = load i32, i32* %p
228229 ; CHECK-NEXT: --> %v1
229230 ; CHECK: %s1 = add i32 %phi1, %v1
230 ; CHECK-NEXT: --> {(%a + %v1),+,1}<%loop1>
231 ; CHECK-NEXT: --> ({%a,+,1}<%loop1> + %v1)
231232 ; CHECK: %s2 = add i32 %s1, %b
232 ; CHECK-NEXT: --> {(%a + %b + %v1),+,1}<%loop1>
233 ; CHECK-NEXT: --> ({(%a + %b),+,1}<%loop1> + %v1)
233234 ; CHECK: %s3 = add i32 %s2, %phi2
234235 ; CHECK-NEXT: --> ({{{{}}((2 * %a) + %b),+,1}<%loop1>,+,2}<%loop2> + %v1)
235236
451452 %s6 = add i32 %phi3, %phi2
452453 ret void
453454 }
455
456 ; Make sure that a complicated Phi does not get folded with rec's start value
457 ; of a loop which is above.
458 define void @test_08() {
459
460 ; CHECK-LABEL: Classifying expressions for: @test_08
461 ; CHECK: %tmp11 = add i64 %iv.2.2, %iv.2.1
462 ; CHECK-NEXT: --> ({0,+,-1}<%loop_2> + %iv.2.1)
463 ; CHECK: %tmp12 = trunc i64 %tmp11 to i32
464 ; CHECK-NEXT: --> (trunc i64 ({0,+,-1}<%loop_2> + %iv.2.1) to i32)
465 ; CHECK: %tmp14 = mul i32 %tmp12, %tmp7
466 ; CHECK-NEXT: --> ((trunc i64 ({0,+,-1}<%loop_2> + %iv.2.1) to i32) * {-1,+,-1}<%loop_1>)
467 ; CHECK: %tmp16 = mul i64 %iv.2.1, %iv.1.1
468 ; CHECK-NEXT: --> ({2,+,1}<%loop_1> * %iv.2.1)
469
470 entry:
471 br label %loop_1
472
473 loop_1:
474 %iv.1.1 = phi i64 [ 2, %entry ], [ %iv.1.1.next, %loop_1_back_branch ]
475 %iv.1.2 = phi i32 [ -1, %entry ], [ %iv.1.2.next, %loop_1_back_branch ]
476 br label %loop_1_exit
477
478 dead:
479 br label %loop_1_exit
480
481 loop_1_exit:
482 %tmp5 = icmp sgt i64 %iv.1.1, 2
483 br i1 %tmp5, label %loop_2_preheader, label %loop_1_back_branch
484
485 loop_1_back_branch:
486 %iv.1.1.next = add nuw nsw i64 %iv.1.1, 1
487 %iv.1.2.next = add nsw i32 %iv.1.2, 1
488 br label %loop_1
489
490 loop_2_preheader:
491 %tmp6 = sub i64 1, %iv.1.1
492 %tmp7 = trunc i64 %tmp6 to i32
493 br label %loop_2
494
495 loop_2:
496 %iv.2.1 = phi i64 [ 0, %loop_2_preheader ], [ %tmp16, %loop_2 ]
497 %iv.2.2 = phi i64 [ 0, %loop_2_preheader ], [ %iv.2.2.next, %loop_2 ]
498 %iv.2.3 = phi i64 [ 2, %loop_2_preheader ], [ %iv.2.3.next, %loop_2 ]
499 %tmp11 = add i64 %iv.2.2, %iv.2.1
500 %tmp12 = trunc i64 %tmp11 to i32
501 %tmp14 = mul i32 %tmp12, %tmp7
502 %tmp16 = mul i64 %iv.2.1, %iv.1.1
503 %iv.2.3.next = add nuw nsw i64 %iv.2.3, 1
504 %iv.2.2.next = add nsw i64 %iv.2.2, -1
505 %tmp17 = icmp slt i64 %iv.2.3.next, %iv.1.1
506 br i1 %tmp17, label %loop_2, label %exit
507
508 exit:
509 %tmp10 = add i32 %iv.1.2, 3
510 ret void
511 }
2424 if6: ; preds = %idxend.8
2525 %r2 = add i64 %0, -1
2626 %r3 = load i64, i64* %1, align 8
27 ; CHECK-NOT: %r2
27 ; CHECK: %r2 = add i64 %0, -1
2828 ; CHECK: %r3 = load i64
2929 br label %ib
3030
3535 %r4 = mul i64 %r3, %r0
3636 %r5 = add i64 %r2, %r4
3737 %r6 = icmp ult i64 %r5, undef
38 ; CHECK: [[MUL1:%[0-9]+]] = mul i64 %lsr.iv, %r3
39 ; CHECK: [[ADD1:%[0-9]+]] = add i64 [[MUL1]], -1
40 ; CHECK: add i64 %{{.}}, [[ADD1]]
41 ; CHECK: %r6
38 ; CHECK: %r4 = mul i64 %r3, %lsr.iv
39 ; CHECK: %r5 = add i64 %r2, %r4
40 ; CHECK: %r6 = icmp ult i64 %r5, undef
41 ; CHECK: %r7 = getelementptr i64, i64* undef, i64 %r5
4242 %r7 = getelementptr i64, i64* undef, i64 %r5
4343 store i64 1, i64* %r7, align 8
44 ; CHECK: [[MUL2:%[0-9]+]] = mul i64 %lsr.iv, %r3
45 ; CHECK: [[ADD2:%[0-9]+]] = add i64 [[MUL2]], -1
4644 br label %L
4745 }
0 ; REQUIRES: x86
1 ; RUN: opt -loop-reduce -S < %s | FileCheck %s
2
3 ; Strength reduction analysis here relies on IV Users analysis, that
4 ; only finds users among instructions with types that are treated as
5 ; legal by the data layout. When running this test on pure non-x86
6 ; configs (for example, ARM 64), it gets confused with the target
7 ; triple and uses a default data layout instead. This default layout
8 ; does not have any legal types (even i32), so the transformation
9 ; does not happen.
10
11 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
12 target triple = "x86_64-apple-macosx"
13
14 ; PR15470: LSR miscompile. The test2 function should return '1'.
15 ;
16 ; SCEV expander cannot expand quadratic recurrences outside of the
17 ; loop. This recurrence depends on %sub.us, so can't be expanded.
18 ; We cannot fold SCEVUnknown (sub.us) with recurrences since it is
19 ; declared after the loop.
20 ;
21 ; CHECK-LABEL: @test2
22 ; CHECK-LABEL: test2.loop:
23 ; CHECK: %lsr.iv1 = phi i32 [ %lsr.iv.next2, %test2.loop ], [ -16777216, %entry ]
24 ; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %test2.loop ], [ -1, %entry ]
25 ; CHECK: %lsr.iv.next = add nsw i32 %lsr.iv, 1
26 ; CHECK: %lsr.iv.next2 = add nsw i32 %lsr.iv1, 16777216
27 ;
28 ; CHECK-LABEL: for.end:
29 ; CHECK: %tobool.us = icmp eq i32 %lsr.iv.next2, 0
30 ; CHECK: %sub.us = select i1 %tobool.us, i32 0, i32 0
31 ; CHECK: %1 = sub i32 0, %sub.us
32 ; CHECK: %2 = add i32 %1, %lsr.iv.next
33 ; CHECK: %sext.us = mul i32 %lsr.iv.next2, %2
34 ; CHECK: %f = ashr i32 %sext.us, 24
35 ; CHECK: ret i32 %f
36 define i32 @test2() {
37 entry:
38 br label %test2.loop
39
40 test2.loop:
41 %inc1115.us = phi i32 [ 0, %entry ], [ %inc11.us, %test2.loop ]
42 %inc11.us = add nsw i32 %inc1115.us, 1
43 %cmp.us = icmp slt i32 %inc11.us, 2
44 br i1 %cmp.us, label %test2.loop, label %for.end
45
46 for.end:
47 %tobool.us = icmp eq i32 %inc1115.us, 0
48 %sub.us = select i1 %tobool.us, i32 0, i32 0
49 %mul.us = shl i32 %inc1115.us, 24
50 %sub.cond.us = sub nsw i32 %inc1115.us, %sub.us
51 %sext.us = mul i32 %mul.us, %sub.cond.us
52 %f = ashr i32 %sext.us, 24
53 br label %exit
54
55 exit:
56 ret i32 %f
57 }
+0
-42
test/Transforms/LoopStrengthReduce/lsr-expand-quadratic.ll less more
None ; RUN: opt -loop-reduce -S < %s | FileCheck %s
1
2 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
3 target triple = "x86_64-apple-macosx"
4
5 ; PR15470: LSR miscompile. The test2 function should return '1'.
6 ;
7 ; SCEV expander cannot expand quadratic recurrences outside of the
8 ; loop. This recurrence depends on %sub.us, so can't be expanded.
9 ;
10 ; CHECK-LABEL: @test2
11 ; CHECK-LABEL: test2.loop:
12 ; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %test2.loop ], [ -16777216, %entry ]
13 ; CHECK: %lsr.iv.next = add nsw i32 %lsr.iv, 16777216
14 ;
15 ; CHECK-LABEL: for.end:
16 ; CHECK: %sub.cond.us = sub nsw i32 %inc1115.us, %sub.us
17 ; CHECK: %sext.us = mul i32 %lsr.iv.next, %sub.cond.us
18 ; CHECK: %f = ashr i32 %sext.us, 24
19 ; CHECK: ret i32 %f
20 define i32 @test2() {
21 entry:
22 br label %test2.loop
23
24 test2.loop:
25 %inc1115.us = phi i32 [ 0, %entry ], [ %inc11.us, %test2.loop ]
26 %inc11.us = add nsw i32 %inc1115.us, 1
27 %cmp.us = icmp slt i32 %inc11.us, 2
28 br i1 %cmp.us, label %test2.loop, label %for.end
29
30 for.end:
31 %tobool.us = icmp eq i32 %inc1115.us, 0
32 %sub.us = select i1 %tobool.us, i32 0, i32 0
33 %mul.us = shl i32 %inc1115.us, 24
34 %sub.cond.us = sub nsw i32 %inc1115.us, %sub.us
35 %sext.us = mul i32 %mul.us, %sub.cond.us
36 %f = ashr i32 %sext.us, 24
37 br label %exit
38
39 exit:
40 ret i32 %f
41 }
2424 entry:
2525 %buffer = alloca [33 x i16], align 16
2626 %add.ptr = getelementptr inbounds [33 x i16], [33 x i16]* %buffer, i64 0, i64 33
27 %sub.ptr.lhs.cast = ptrtoint i16* %add.ptr to i64
28 %sub.ptr.rhs.cast = ptrtoint i16* %add.ptr to i64
2729 br label %do.body
2830
2931 do.body: ; preds = %do.body, %entry
4547 do.end: ; preds = %do.body
4648 %xap.0 = inttoptr i64 %0 to i1*
4749 %cap.0 = ptrtoint i1* %xap.0 to i64
48 %sub.ptr.lhs.cast = ptrtoint i16* %add.ptr to i64
49 %sub.ptr.rhs.cast = ptrtoint i16* %incdec.ptr to i64
5050 %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
5151 %sub.ptr.div39 = lshr exact i64 %sub.ptr.sub, 1
5252 %conv11 = trunc i64 %sub.ptr.div39 to i32