llvm.org GIT mirror llvm / 788841c
[SimplifyCFG] Defer folding unconditional branches to LateSimplifyCFG if it can destroy canonical loop structure. Summary: When simplifying unconditional branches from empty blocks, we pre-test if the BB belongs to a set of loop headers and keep the block to prevent passes from destroying canonical loop structure. However, the current algorithm fails if the destination of the branch is a loop header. Especially when such a loop's latch block is folded into loop header it results in additional backedges and LoopSimplify turns it into a nested loop which prevent later optimizations from being applied (e.g., loop unrolling and loop interleaving). This patch augments the existing algorithm by further checking if the destination of the branch belongs to a set of loop headers and defer eliminating it if yes to LateSimplifyCFG. Fixes PR33605: https://bugs.llvm.org/show_bug.cgi?id=33605 Reviewers: efriedma, mcrosier, pacxx, hsung, davidxl Reviewed By: efriedma Subscribers: ashutosh.nema, gberry, javed.absar, llvm-commits Differential Revision: https://reviews.llvm.org/D35411 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@308422 91177308-0d34-0410-b5e6-96231b3b80d8 Balaram Makam 2 years ago
14 changed file(s) with 166 addition(s) and 30 deletion(s). Raw diff Collapse all Expand all
230230 // Can't thread an unconditional jump, but if the block is "almost
231231 // empty", we can replace uses of it with uses of the successor and make
232232 // this dead.
233 // We should not eliminate the loop header either, because eliminating
234 // a loop header might later prevent LoopSimplify from transforming nested
235 // loops into simplified form.
233 // We should not eliminate the loop header or latch either, because
234 // eliminating a loop header or latch might later prevent LoopSimplify
235 // from transforming nested loops into simplified form. We will rely on
236 // later passes in backend to clean up empty blocks.
236237 if (BI && BI->isUnconditional() &&
237238 BB != &BB->getParent()->getEntryBlock() &&
238239 // If the terminator is the only non-phi instruction, try to nuke it.
239 BB->getFirstNonPHIOrDbg()->isTerminator() && !LoopHeaders.count(BB)) {
240 BB->getFirstNonPHIOrDbg()->isTerminator() && !LoopHeaders.count(BB) &&
241 !LoopHeaders.count(BI->getSuccessor(0))) {
240242 // FIXME: It is always conservatively correct to drop the info
241243 // for a block even if it doesn't get erased. This isn't totally
242244 // awesome, but it allows us to use AssertingVH to prevent nasty
56555655 bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI,
56565656 IRBuilder<> &Builder) {
56575657 BasicBlock *BB = BI->getParent();
5658 BasicBlock *Succ = BI->getSuccessor(0);
56585659
56595660 if (SinkCommon && SinkThenElseCodeToEnd(BI))
56605661 return true;
56615662
56625663 // If the Terminator is the only non-phi instruction, simplify the block.
5663 // if LoopHeader is provided, check if the block is a loop header
5664 // (This is for early invocations before loop simplify and vectorization
5665 // to keep canonical loop forms for nested loops.
5666 // These blocks can be eliminated when the pass is invoked later
5667 // in the back-end.)
5664 // if LoopHeader is provided, check if the block or its successor is a loop
5665 // header (This is for early invocations before loop simplify and
5666 // vectorization to keep canonical loop forms for nested loops. These blocks
5667 // can be eliminated when the pass is invoked later in the back-end.)
5668 bool NeedCanonicalLoop =
5669 !LateSimplifyCFG &&
5670 (LoopHeaders && (LoopHeaders->count(BB) || LoopHeaders->count(Succ)));
56685671 BasicBlock::iterator I = BB->getFirstNonPHIOrDbg()->getIterator();
56695672 if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
5670 (!LoopHeaders || !LoopHeaders->count(BB)) &&
5671 TryToSimplifyUncondBranchFromEmptyBlock(BB))
5673 !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB))
56725674 return true;
56735675
56745676 // If the only instruction in the block is a seteq/setne comparison
1818
1919 do.body.i:
2020 ; CHECK-LABEL: do.body.i:
21 ; CHECK: %uglygep2 = getelementptr i8, i8* %uglygep, i64 %3
22 ; CHECK-NEXT: %4 = bitcast i8* %uglygep2 to i32*
23 ; CHECK-NOT: %uglygep2 = getelementptr i8, i8* %uglygep, i64 1032
21 ; CHECK: %uglygep1 = getelementptr i8, i8* %uglygep, i64 %3
22 ; CHECK-NEXT: %4 = bitcast i8* %uglygep1 to i32*
23 ; CHECK-NOT: %uglygep1 = getelementptr i8, i8* %uglygep, i64 1032
2424
2525
2626 %0 = phi i32 [ 256, %entry ], [ %.be, %do.body.i.backedge ]
0 ; RUN: opt < %s -jump-threading -S | FileCheck %s
1
2 ; Skip simplifying unconditional branches from empty blocks in simplifyCFG,
3 ; when it can destroy canonical loop structure.
4
5 ; void foo();
6 ; bool test(int a, int b, int *c) {
7 ; bool changed = false;
8 ; for (unsigned int i = 2; i--;) {
9 ; int r = a | b;
10 ; if ( r != c[i]) {
11 ; c[i] = r;
12 ; foo();
13 ; changed = true;
14 ; }
15 ; }
16 ; return changed;
17 ; }
18
19 ; CHECK-LABEL: @test(
20 ; CHECK: for.cond:
21 ; CHECK-NEXT: %i.0 = phi i32 [ 2, %entry ], [ %dec, %if.end ]
22 ; CHECK: for.body:
23 ; CHECK: br i1 %cmp, label %if.end, label %if.then
24 ; CHECK-NOT: br i1 %cmp, label %for.cond, label %if.then
25 ; CHECK: if.then:
26 ; CHECK: br label %if.end
27 ; CHECK-NOT: br label %for.cond
28 ; CHECK: if.end:
29 ; CHECK br label %for.cond
30 define i1 @test(i32 %a, i32 %b, i32* %c) {
31 entry:
32 br label %for.cond
33
34 for.cond: ; preds = %if.end, %entry
35 %i.0 = phi i32 [ 2, %entry ], [ %dec, %if.end ]
36 %changed.0.off0 = phi i1 [ false, %entry ], [ %changed.1.off0, %if.end ]
37 %dec = add nsw i32 %i.0, -1
38 %tobool = icmp eq i32 %i.0, 0
39 br i1 %tobool, label %for.cond.cleanup, label %for.body
40
41 for.cond.cleanup: ; preds = %for.cond
42 %changed.0.off0.lcssa = phi i1 [ %changed.0.off0, %for.cond ]
43 ret i1 %changed.0.off0.lcssa
44
45 for.body: ; preds = %for.cond
46 %or = or i32 %a, %b
47 %idxprom = sext i32 %dec to i64
48 %arrayidx = getelementptr inbounds i32, i32* %c, i64 %idxprom
49 %0 = load i32, i32* %arrayidx, align 4
50 %cmp = icmp eq i32 %or, %0
51 br i1 %cmp, label %if.end, label %if.then
52
53 if.then: ; preds = %for.body
54 store i32 %or, i32* %arrayidx, align 4
55 call void @foo()
56 br label %if.end
57
58 if.end: ; preds = %for.body, %if.then
59 %changed.1.off0 = phi i1 [ true, %if.then ], [ %changed.0.off0, %for.body ]
60 br label %for.cond
61 }
62
63 declare void @foo()
8585 ; Verify the new backedge:
8686 ; CHECK: check_2.thread:
8787 ; CHECK-NEXT: call void @bar()
88 ; CHECK-NEXT: br label %check_1
88 ; CHECK-NEXT: br label %check_3.thread
8989
9090 check_2:
9191 %cond2 = icmp eq i32 %v, 2
9999 ; Verify the new backedge:
100100 ; CHECK: eq_2:
101101 ; CHECK-NEXT: call void @bar()
102 ; CHECK-NEXT: br label %check_1
102 ; CHECK-NEXT: br label %check_3.thread
103103
104104 check_3:
105105 %condE = icmp eq i32 %v, 3
1717 ; CHECK: %[[INC2:.*]] = getelementptr inbounds i32, i32* %p, i64 2
1818 ; CHECK: store i32 2, i32* %[[INC2]], align 4
1919 ; CHECK: %[[CMP3:.*]] = icmp eq i32 %k, 3
20 ; CHECK: br i1 %[[CMP3]], label %for.end, label %[[LOOP:.*]]
20 ; CHECK: br i1 %[[CMP3]], label %for.end, label %[[LOOP_PH:.*]]
21 ; CHECK: [[LOOP_PH]]:
22 ; CHECK: br label %[[LOOP:.*]]
2123 ; CHECK: [[LOOP]]:
22 ; CHECK: %[[IV:.*]] = phi i32 [ {{.*}}, %[[LOOP]] ], [ 3, %[[NEXT2]] ]
24 ; CHECK: %[[IV:.*]] = phi i32 [ 3, %[[LOOP_PH]] ], [ {{.*}}, %[[LOOP]] ]
2325
2426 define void @basic(i32* %p, i32 %k) #0 {
2527 entry:
6466 ; CHECK: %[[INC2:.*]] = getelementptr inbounds i32, i32* %p, i64 2
6567 ; CHECK: store i32 2, i32* %[[INC2]], align 4
6668 ; CHECK: %[[CMP3:.*]] = icmp eq i32 %k, 3
67 ; CHECK: br i1 %[[CMP3]], label %for.end, label %[[LOOP:.*]]
69 ; CHECK: br i1 %[[CMP3]], label %for.end, label %[[LOOP_PH:.*]]
70 ; CHECK: [[LOOP_PH]]:
71 ; CHECK: br label %[[LOOP:.*]]
6872 ; CHECK: [[LOOP]]:
69 ; CHECK: %[[IV:.*]] = phi i32 [ %[[IV:.*]], %[[LOOP]] ], [ 3, %[[NEXT2]] ]
73 ; CHECK: %[[IV:.*]] = phi i32 [ 3, %[[LOOP_PH]] ], [ %[[IV:.*]], %[[LOOP]] ]
7074 ; CHECK: %ret = phi i32 [ 0, %entry ], [ 1, %[[NEXT0]] ], [ 2, %[[NEXT1]] ], [ 3, %[[NEXT2]] ], [ %[[IV]], %[[LOOP]] ]
7175 ; CHECK: ret i32 %ret
7276 define i32 @output(i32* %p, i32 %k) #0 {
1515 %cmp1 = icmp eq i32 %a, 12345
1616 br i1 %cmp1, label %if.then, label %if.else, !prof !0
1717 ; CHECK: %cmp1 = icmp eq i32 %a, 12345
18 ; CHECK-NEXT: br i1 %cmp1, label %for.body.us, label %for.body, !prof !0
18 ; CHECK-NEXT: br i1 %cmp1, label %for.body.preheader.split.us, label %for.body.preheader.split, !prof !0
1919 if.then: ; preds = %for.body
2020 ; CHECK: for.body.us:
2121 ; CHECK: add nsw i32 %{{.*}}, 123
5252 br label %for.body
5353 ;CHECK: entry:
5454 ;CHECK-NEXT: %cmp1 = icmp eq i32 1, 2
55 ;CHECK-NEXT: br i1 %cmp1, label %for.body, label %for.cond.cleanup.split, !prof !1
55 ;CHECK-NEXT: br i1 %cmp1, label %entry.split, label %for.cond.cleanup.split, !prof !1
5656 ;CHECK: for.body:
5757 for.body: ; preds = %for.inc, %entry
5858 %inc.i = phi i32 [ 0, %entry ], [ %inc, %if.then ]
55 ; Loop unswitching shouldn't trivially unswitch the true case of condition %a
66 ; in the code here because it leads to an infinite loop. While this doesn't
77 ; contain any instructions with side effects, it's still a kind of side effect.
8 ; It can trivially unswitch on the false cas of condition %a though.
8 ; It can trivially unswitch on the false case of condition %a though.
99
1010 ; STATS: 2 loop-unswitch - Number of branches unswitched
1111 ; STATS: 2 loop-unswitch - Number of unswitches that are trivial
1515 ; CHECK-NEXT: br i1 %a, label %entry.split, label %abort0.split
1616
1717 ; CHECK: entry.split:
18 ; CHECK-NEXT: br i1 %b, label %for.body, label %abort1.split
18 ; CHECK-NEXT: br i1 %b, label %entry.split.split, label %abort1.split
1919
2020 ; CHECK: for.body:
2121 ; CHECK-NEXT: br label %for.body
None ; RUN: opt < %s -O3 -mcpu=core-avx2 -mtriple=x86_64-unknown-linux-gnu -S | FileCheck --check-prefix AUTO_VEC %s
0 ; RUN: opt < %s -O3 -latesimplifycfg -mcpu=core-avx2 -mtriple=x86_64-unknown-linux-gnu -S | FileCheck --check-prefix AUTO_VEC %s
11
22 ; This test checks auto-vectorization with FP induction variable.
33 ; The FP operation is not "fast" and requires "fast-math" function attribute.
0 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck --check-prefix VEC4_INTERL1 %s
11 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -dce -instcombine -S | FileCheck --check-prefix VEC4_INTERL2 %s
22 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=1 -dce -instcombine -S | FileCheck --check-prefix VEC1_INTERL2 %s
3 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -dce -simplifycfg -instcombine -S | FileCheck --check-prefix VEC2_INTERL1_PRED_STORE %s
3 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -dce -simplifycfg -instcombine -latesimplifycfg -S | FileCheck --check-prefix VEC2_INTERL1_PRED_STORE %s
44
55 @fp_inc = common global float 0.000000e+00, align 4
66
13211321 ; Speculation depth must be limited to avoid a zero-cost instruction cycle.
13221322
13231323 ; CHECK-LABEL: @PR26308(
1324 ; CHECK: while.body:
1325 ; CHECK-NEXT: br label %while.body
1324 ; CHECK: cleanup4:
1325 ; CHECK-NEXT: br label %cleanup4
13261326
13271327 define i32 @PR26308(i1 %B, i64 %load) {
13281328 entry:
None ; RUN: opt -simplifycfg -S < %s | FileCheck %s
0 ; RUN: opt -latesimplifycfg -S < %s | FileCheck %s
11
22 ; It's not worthwhile to if-convert one of the phi nodes and leave
33 ; the other behind, because that still requires a branch. If
0 ; RUN: opt < %s -simplifycfg -S | FileCheck %s
1
2 ; Skip simplifying unconditional branches from empty blocks in simplifyCFG,
3 ; when it can destroy canonical loop structure.
4
5 ; void foo();
6 ; bool test(int a, int b, int *c) {
7 ; bool changed = false;
8 ; for (unsigned int i = 2; i--;) {
9 ; int r = a | b;
10 ; if ( r != c[i]) {
11 ; c[i] = r;
12 ; foo();
13 ; changed = true;
14 ; }
15 ; }
16 ; return changed;
17 ; }
18
19 ; CHECK-LABEL: @test(
20 ; CHECK: for.cond:
21 ; CHECK-NEXT: %i.0 = phi i32 [ 2, %entry ], [ %dec, %if.end ]
22 ; CHECK: for.body:
23 ; CHECK: br i1 %cmp, label %if.end, label %if.then
24 ; CHECK-NOT: br i1 %cmp, label %for.cond, label %if.then
25 ; CHECK: if.then:
26 ; CHECK: br label %if.end
27 ; CHECK-NOT: br label %for.cond
28 ; CHECK: if.end:
29 ; CHECK br label %for.cond
30 define i1 @test(i32 %a, i32 %b, i32* %c) {
31 entry:
32 br label %for.cond
33
34 for.cond: ; preds = %if.end, %entry
35 %i.0 = phi i32 [ 2, %entry ], [ %dec, %if.end ]
36 %changed.0.off0 = phi i1 [ false, %entry ], [ %changed.1.off0, %if.end ]
37 %dec = add nsw i32 %i.0, -1
38 %tobool = icmp eq i32 %i.0, 0
39 br i1 %tobool, label %for.cond.cleanup, label %for.body
40
41 for.cond.cleanup: ; preds = %for.cond
42 %changed.0.off0.lcssa = phi i1 [ %changed.0.off0, %for.cond ]
43 ret i1 %changed.0.off0.lcssa
44
45 for.body: ; preds = %for.cond
46 %or = or i32 %a, %b
47 %idxprom = sext i32 %dec to i64
48 %arrayidx = getelementptr inbounds i32, i32* %c, i64 %idxprom
49 %0 = load i32, i32* %arrayidx, align 4
50 %cmp = icmp eq i32 %or, %0
51 br i1 %cmp, label %if.end, label %if.then
52
53 if.then: ; preds = %for.body
54 store i32 %or, i32* %arrayidx, align 4
55 call void @foo()
56 br label %if.end
57
58 if.end: ; preds = %for.body, %if.then
59 %changed.1.off0 = phi i1 [ true, %if.then ], [ %changed.0.off0, %for.body ]
60 br label %for.cond
61 }
62
63 declare void @foo()
None ; RUN: opt -simplifycfg -S < %s | FileCheck %s
0 ; RUN: opt -latesimplifycfg -S < %s | FileCheck %s
11
22 define void @test1(i32 %n) #0 {
33 entry: