llvm.org GIT mirror llvm / 109b3a6
[SCEV] Add nuw/nsw to mul ops in StrengthenNoWrapFlags where safe. Summary: Previously we would add them for adds, but not multiplies. Reviewers: sanjoy Subscribers: llvm-commits, hiraditya Differential Revision: https://reviews.llvm.org/D48038 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@334428 91177308-0d34-0410-b5e6-96231b3b80d8 Justin Lebar 1 year, 11 months ago
15 changed file(s) with 56 addition(s) and 28 deletion(s). Raw diff Collapse all Expand all
22042204
22052205 SignOrUnsignWrap = ScalarEvolution::maskFlags(Flags, SignOrUnsignMask);
22062206
2207 if (SignOrUnsignWrap != SignOrUnsignMask && Type == scAddExpr &&
2208 Ops.size() == 2 && isa(Ops[0])) {
2209
2210 // (A + C) --> (A + C) if the addition does not sign overflow
2211 // (A + C) --> (A + C) if the addition does not unsign overflow
2207 if (SignOrUnsignWrap != SignOrUnsignMask &&
2208 (Type == scAddExpr || Type == scMulExpr) && Ops.size() == 2 &&
2209 isa(Ops[0])) {
2210
2211 auto Opcode = [&] {
2212 switch (Type) {
2213 case scAddExpr:
2214 return Instruction::Add;
2215 case scMulExpr:
2216 return Instruction::Mul;
2217 default:
2218 llvm_unreachable("Unexpected SCEV op.");
2219 }
2220 }();
22122221
22132222 const APInt &C = cast(Ops[0])->getAPInt();
2223
2224 // (A C) --> (A C) if the op doesn't sign overflow.
22142225 if (!(SignOrUnsignWrap & SCEV::FlagNSW)) {
22152226 auto NSWRegion = ConstantRange::makeGuaranteedNoWrapRegion(
2216 Instruction::Add, C, OBO::NoSignedWrap);
2227 Opcode, C, OBO::NoSignedWrap);
22172228 if (NSWRegion.contains(SE->getSignedRange(Ops[1])))
22182229 Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW);
22192230 }
2231
2232 // (A C) --> (A C) if the op doesn't unsign overflow.
22202233 if (!(SignOrUnsignWrap & SCEV::FlagNUW)) {
22212234 auto NUWRegion = ConstantRange::makeGuaranteedNoWrapRegion(
22222235 Instruction::Add, C, OBO::NoUnsignedWrap);
99 ; AddRec: {{{(28 + (4 * (-4 + (3 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,(12 * %o)}<%for.j>,+,20}<%for.k>
1010 ; CHECK: Base offset: %A
1111 ; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of 4 bytes.
12 ; CHECK: ArrayRef[{3,+,2}<%for.i>][{-4,+,3}<%for.j>][{7,+,5}<%for.k>]
12 ; CHECK: ArrayRef[{3,+,2}<nw><%for.i>][{-4,+,3}<%for.j>][{7,+,5}<%for.k>]
1313
1414 define void @foo(i64 %n, i64 %m, i64 %o, i32* nocapture %A) #0 {
1515 entry:
1010 ; AddRec: {{((%m * %b * 8) + %A),+,(2 * %m * 8)}<%for.i>,+,(2 * 8)}<%for.j>
1111 ; CHECK: Base offset: %A
1212 ; CHECK: ArrayDecl[UnknownSize][%m] with elements of 8 bytes.
13 ; CHECK: ArrayRef[{%b,+,2}<%for.i>][{0,+,2}<%for.j>]
13 ; CHECK: ArrayRef[{%b,+,2}<%for.i>][{0,+,2}<nuw><%for.j>]
1414
1515
1616 define void @foo(i64 %n, i64 %m, i64 %b, double* %A) {
6969 ; sure they aren't marked as post-inc users.
7070 ;
7171 ; CHECK-LABEL: IV Users for loop %test2.loop
72 ; CHECK-NO-LCSSA: %sub.cond.us = ((-1 * %sub.us)sw> + {0,+,1}<%test2.loop>) (post-inc with loop %test2.loop) in %sext.us = mul i32 %mul.us, %sub.cond.us
72 ; CHECK-NO-LCSSA: %sub.cond.us = ((-1 * %sub.us)uw>sw> + {0,+,1}<%test2.loop>) (post-inc with loop %test2.loop) in %sext.us = mul i32 %mul.us, %sub.cond.us
7373 define i32 @test2() {
7474 entry:
7575 br label %test2.loop
250250 ; CHECK-NEXT: Member: {((2 * %offset) + %a),+,2}<%for.body>
251251 ; CHECK-NEXT: Group {{.*}}[[ONE]]:
252252 ; CHECK-NEXT: (Low: %a High: (10000 + %a))
253 ; CHECK-NEXT: Member: {%a,+,2}<%for.body>
253 ; CHECK-NEXT: Member: {%a,+,2}<nw><%for.body>
254254 ; CHECK-NEXT: Group {{.*}}[[TWO]]:
255255 ; CHECK-NEXT: (Low: (20000 + %a) High: (30000 + %a))
256 ; CHECK-NEXT: Member: {(20000 + %a),+,2}<%for.body>
256 ; CHECK-NEXT: Member: {(20000 + %a),+,2}<nw><%for.body>
257257
258258 define void @testi(i16* %a,
259259 i64 %offset) {
3232 ; i64 {0,+,2}<%for.body>
3333
3434 ; LAA: [PSE] %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext:
35 ; LAA-NEXT: ((2 * (zext i32 {0,+,2}<%for.body> to i64)) + %a)
35 ; LAA-NEXT: ((2 * (zext i32 {0,+,2}<%for.body> to i64)) + %a)
3636 ; LAA-NEXT: --> {%a,+,4}<%for.body>
3737
3838
121121 ; LAA: Memory dependences are safe{{$}}
122122 ; LAA: SCEV assumptions:
123123 ; LAA-NEXT: {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> Added Flags:
124 ; LAA-NEXT: {((2 * (zext i32 (2 * (trunc i64 %N to i32)) to i64)) + %a),+,-4}<%for.body> Added Flags:
124 ; LAA-NEXT: {((2 * (zext i32 (2 * (trunc i64 %N to i32)) to i64)) + %a),+,-4}<%for.body> Added Flags:
125125
126126 ; The expression for %mul_ext as analyzed by SCEV is
127127 ; (zext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64)
129129 ; i64 {zext i32 (2 * (trunc i64 %N to i32)) to i64,+,-2}<%for.body>
130130
131131 ; LAA: [PSE] %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext:
132 ; LAA-NEXT: ((2 * (zext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64)) + %a)
133 ; LAA-NEXT: --> {((2 * (zext i32 (2 * (trunc i64 %N to i32)) to i64)) + %a),+,-4}<%for.body>
132 ; LAA-NEXT: ((2 * (zext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64)) + %a)
133 ; LAA-NEXT: --> {((2 * (zext i32 (2 * (trunc i64 %N to i32)) to i64)) + %a),+,-4}<%for.body>
134134
135135 ; LV-LABEL: f2
136136 ; LV-LABEL: for.body.lver.check
320320 ; CHECK: %SQ = mul i32 %i.0, %i.0
321321 ; CHECK-NEXT: --> {4,+,5,+,2}<%bb3>
322322 ; CHECK: %tmp4 = mul i32 %i.0, 2
323 ; CHECK-NEXT: --> {4,+,2}<%bb3>
323 ; CHECK-NEXT: --> {4,+,2}<nuw><%bb3>
324324 ; CHECK: %tmp5 = sub i32 %SQ, %tmp4
325325 ; CHECK-NEXT: --> {0,+,3,+,2}<%bb3>
326326
0 ; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
1
2 ; Check that we add nuw to multiplies by a constant where we can infer that the
3 ; multiply does not have unsigned overflow.
4 declare i32 @get_int();
5
6 define void @foo() {
7 %a = call i32 @get_int(), !range !0
8 %b = mul i32 %a, 4
9 ; CHECK: %b
10 ; CHECK-NEXT: --> (4 * %a)
11 ret void
12 }
13
14 !0 = !{i32 0, i32 100}
7878
7979 ; Note: Without the preheader assume, there is an 'smax' in the
8080 ; backedge-taken count expression:
81 ; CHECK: Loop %bb: backedge-taken count is ((-1 + (2 * (%no /u 2))) /u 2)
81 ; CHECK: Loop %bb: backedge-taken count is ((-1 + (2 * (%no /u 2))) /u 2)
8282 ; CHECK: Loop %bb: max backedge-taken count is 1073741822
7272 ret void
7373 }
7474
75 ; CHECK: Loop %bb: backedge-taken count is ((-1 + (2 * (%no /u 2))) /u 2)
75 ; CHECK: Loop %bb: backedge-taken count is ((-1 + (2 * (%no /u 2))) /u 2)
7676 ; CHECK: Loop %bb: max backedge-taken count is 1073741822
2424 %tmp6 = sext i32 %i.01 to i64 ; [#uses=1]
2525 %tmp7 = getelementptr double, double* %p, i64 %tmp6 ; [#uses=1]
2626 ; CHECK: %tmp7
27 ; CHECK-NEXT: --> {%p,+,8}<%bb>
27 ; CHECK-NEXT: --> {%p,+,8}<nw><%bb>
2828 store double %tmp5, double* %tmp7, align 8
2929 %tmp8 = add nsw i32 %i.01, 1 ; [#uses=2]
3030 ; CHECK: %tmp8
125125 }
126126
127127 ; CHECK-LABEL: PR12375
128 ; CHECK: --> {(4 + %arg),+,4}<%bb1>{{ U: [^ ]+ S: [^ ]+}}{{ *}}Exits: (4 + (4 * ((-1 + (-1 * %arg) + ((4 + %arg) umax (8 + %arg))) /u 4)) + %arg)
128 ; CHECK: --> {(4 + %arg),+,4}<%bb1>{{ U: [^ ]+ S: [^ ]+}}{{ *}}Exits: (4 + (4 * ((-1 + (-1 * %arg) + ((4 + %arg) umax (8 + %arg))) /u 4)) + %arg)
129129 define i32 @PR12375(i32* readnone %arg) {
130130 bb:
131131 %tmp = getelementptr inbounds i32, i32* %arg, i64 2
144144 }
145145
146146 ; CHECK-LABEL: PR12376
147 ; CHECK: --> {(4 + %arg),+,4}<%bb2>{{ U: [^ ]+ S: [^ ]+}}{{ *}}Exits: (4 + (4 * ((-1 + (-1 * %arg) + ((4 + %arg) umax %arg1)) /u 4)) + %arg)
147 ; CHECK: --> {(4 + %arg),+,4}<%bb2>{{ U: [^ ]+ S: [^ ]+}}{{ *}}Exits: (4 + (4 * ((-1 + (-1 * %arg) + ((4 + %arg) umax %arg1)) /u 4)) + %arg)
148148 define void @PR12376(i32* nocapture %arg, i32* nocapture %arg1) {
149149 bb:
150150 br label %bb2
0 ; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
11
22 ; CHECK: %tmp9 = shl i64 %tmp8, 33
3 ; CHECK-NEXT: --> {{.*}} Exits: (-8589934592 + (8589934592 * (zext i32 %arg2 to i64)))
3 ; CHECK-NEXT: --> {{.*}} Exits: (-8589934592 + (8589934592 * (zext i32 %arg2 to i64)))
44 ; CHECK: %tmp10 = ashr exact i64 %tmp9, 32
55 ; CHECK-NEXT: --> {{.*}} Exits: (sext i32 (-2 + (2 * %arg2)) to i64)
66 ; CHECK: %tmp11 = getelementptr inbounds i32, i32* %arg, i64 %tmp10
4747 }
4848
4949 ; CHECK: %t10 = ashr exact i128 %t9, 1
50 ; CHECK-NEXT: --> {{.*}} Exits: (sext i127 (-633825300114114700748351602688 + (633825300114114700748351602688 * (zext i32 %arg5 to i127))) to i128)
50 ; CHECK-NEXT: --> {{.*}} Exits: (sext i127 (-633825300114114700748351602688 + (633825300114114700748351602688 * (zext i32 %arg5 to i127))) to i128)
5151 ; CHECK: %t14 = or i128 %t10, 1
52 ; CHECK-NEXT: --> {{.*}} Exits: (1 + (sext i127 (-633825300114114700748351602688 + (633825300114114700748351602688 * (zext i32 %arg5 to i127))) to i128))
52 ; CHECK-NEXT: --> {{.*}} Exits: (1 + (sext i127 (-633825300114114700748351602688 + (633825300114114700748351602688 * (zext i32 %arg5 to i127))) to i128))
5353 ; CHECK: Loop %bb7: backedge-taken count is (-1 + (zext i32 %arg5 to i128))
5454 ; CHECK-NEXT: Loop %bb7: max backedge-taken count is -1
5555 ; CHECK-NEXT: Loop %bb7: Predicated backedge-taken count is (-1 + (zext i32 %arg5 to i128))
0 ; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
11
22 ; CHECK: %tmp9 = shl i64 %tmp8, 33
3 ; CHECK-NEXT: --> {{.*}} Exits: (-8589934592 + (8589934592 * (zext i32 %arg2 to i64)))
3 ; CHECK-NEXT: --> {{.*}} Exits: (-8589934592 + (8589934592 * (zext i32 %arg2 to i64)))
44 ; CHECK-NEXT: %tmp10 = ashr exact i64 %tmp9, 0
5 ; CHECK-NEXT: --> {{.*}} Exits: (-8589934592 + (8589934592 * (zext i32 %arg2 to i64)))
5 ; CHECK-NEXT: --> {{.*}} Exits: (-8589934592 + (8589934592 * (zext i32 %arg2 to i64)))
66
77 define void @foo(i32* nocapture %arg, i32 %arg1, i32 %arg2) {
88 bb:
3030 ret i32 %i
3131
3232 ; CHECK-LABEL: @test2
33 ; CHECK: Loop %loop: backedge-taken count is ((-32 + (32 * (%n /u 32))) /u 32)
33 ; CHECK: Loop %loop: backedge-taken count is ((-32 + (32 * (%n /u 32))) /u 32)
3434 ; CHECK: Loop %loop: max backedge-taken count is 134217727
3535 }
3636
1919 ; CHECK: add nuw nsw i64 %indvars.iv, 1
2020 ; CHECK: sub nsw i64 %indvars.iv, 2
2121 ; CHECK: sub nsw i64 4, %indvars.iv
22 ; CHECK: mul nsw i64 %indvars.iv, 8
22 ; CHECK: mul nuw nsw i64 %indvars.iv, 8
2323 for.body170: ; preds = %for.body170, %for.body153
2424 %i2.19 = phi i32 [ %add249, %for.body170 ], [ 0, %for.body153 ]
2525