llvm.org GIT mirror llvm / aeff6e7
[MBP] Don't move bottom block before header if it can't reduce taken branches If bottom of block BB has only one successor OldTop, in most cases it is profitable to move it before OldTop, except the following case: -->OldTop<- | . | | . | | . | ---Pred | | | BB----- Move BB before OldTop can't reduce the number of taken branches, this patch detects this case and prevent the moving. Differential Revision: https://reviews.llvm.org/D57067 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@352236 91177308-0d34-0410-b5e6-96231b3b80d8 Guozhi Wei 1 year, 4 months ago
8 changed file(s) with 116 addition(s) and 46 deletion(s). Raw diff Collapse all Expand all
450450
451451 void buildChain(const MachineBasicBlock *BB, BlockChain &Chain,
452452 BlockFilterSet *BlockFilter = nullptr);
453 bool canMoveBottomBlockToTop(const MachineBasicBlock *BottomBlock,
454 const MachineBasicBlock *OldTop);
453455 MachineBasicBlock *findBestLoopTop(
454456 const MachineLoop &L, const BlockFilterSet &LoopBlockSet);
455457 MachineBasicBlock *findBestLoopExit(
17551757 << getBlockName(*Chain.begin()) << "\n");
17561758 }
17571759
1760 // If bottom of block BB has only one successor OldTop, in most cases it is
1761 // profitable to move it before OldTop, except the following case:
1762 //
1763 // -->OldTop<-
1764 // | . |
1765 // | . |
1766 // | . |
1767 // ---Pred |
1768 // | |
1769 // BB-----
1770 //
1771 // If BB is moved before OldTop, Pred needs a taken branch to BB, and it can't
1772 // layout the other successor below it, so it can't reduce taken branch.
1773 // In this case we keep its original layout.
1774 bool
1775 MachineBlockPlacement::canMoveBottomBlockToTop(
1776 const MachineBasicBlock *BottomBlock,
1777 const MachineBasicBlock *OldTop) {
1778 if (BottomBlock->pred_size() != 1)
1779 return true;
1780 MachineBasicBlock *Pred = *BottomBlock->pred_begin();
1781 if (Pred->succ_size() != 2)
1782 return true;
1783
1784 MachineBasicBlock *OtherBB = *Pred->succ_begin();
1785 if (OtherBB == BottomBlock)
1786 OtherBB = *Pred->succ_rbegin();
1787 if (OtherBB == OldTop)
1788 return false;
1789
1790 return true;
1791 }
1792
17581793 /// Find the best loop top block for layout.
17591794 ///
17601795 /// Look for a block which is strictly better than the loop header for laying
17971832 << Pred->succ_size() << " successors, ";
17981833 MBFI->printBlockFreq(dbgs(), Pred) << " freq\n");
17991834 if (Pred->succ_size() > 1)
1835 continue;
1836
1837 if (!canMoveBottomBlockToTop(Pred, L.getHeader()))
18001838 continue;
18011839
18021840 BlockFrequency PredFreq = MBFI->getBlockFreq(Pred);
9595 ; FUNC-LABEL: {{^}}loop_land_info_assert:
9696 ; SI: v_cmp_lt_i32_e64 [[CMP4:s\[[0-9:]+\]]], s{{[0-9]+}}, 4{{$}}
9797 ; SI: s_and_b64 [[CMP4M:s\[[0-9]+:[0-9]+\]]], exec, [[CMP4]]
98 ; SI: s_mov_b64 vcc, [[CMP4M]]
99 ; SI-NEXT: s_cbranch_vccnz [[CONVEX_EXIT:BB[0-9_]+]]
100 ; SI-NEXT: s_branch [[FOR_COND_PREHDR:BB[0-9_]+]]
98 ; SI: s_branch [[INFLOOP:BB[0-9]+_[0-9]+]]
99
100 ; SI: [[CONVEX_EXIT:BB[0-9_]+]]
101 ; SI: s_mov_b64 vcc,
102 ; SI-NEXT: s_cbranch_vccnz [[ENDPGM:BB[0-9]+_[0-9]+]]
103 ; SI: s_cbranch_vccnz [[INFLOOP]]
101104
102105 ; SI: ; %if.else
103106 ; SI: buffer_store_dword
104107
105 ; SI: [[INFLOOP:BB[0-9]+_[0-9]+]]:
108 ; SI: [[INFLOOP]]:
109 ; SI: s_cbranch_vccnz [[CONVEX_EXIT]]
106110
107 ; SI: [[CONVEX_EXIT]]:
108 ; SI: s_mov_b64 vcc,
109 ; SI-NEXT: s_cbranch_vccnz [[ENDPGM:BB[0-9]+_[0-9]+]]
110 ; SI: s_branch [[INFLOOP]]
111 ; SI-NEXT: [[FOR_COND_PREHDR]]:
111 ; SI: ; %for.cond.preheader
112112 ; SI: s_cbranch_vccz [[ENDPGM]]
113113
114114 ; SI: [[ENDPGM]]:
2323 ; CHECK-DAG: addi 25, 3, _ZN6snappy8internalL8wordmaskE@toc@l
2424 ; CHECK-DAG: addis 5, 2, _ZN6snappy8internalL10char_tableE@toc@ha
2525 ; CHECK-DAG: addi 24, 5, _ZN6snappy8internalL10char_tableE@toc@l
26 ; CHECK: b .LBB0_2
27 ; CHECK: .LBB0_2: # %for.cond
26 ; CHECK: b .[[LABEL1:[A-Z0-9_]+]]
27 ; CHECK: .[[LABEL1]]: # %for.cond
2828 ; CHECK-NOT: addis {{[0-9]+}}, 2, _ZN6snappy8internalL8wordmaskE@toc@ha
2929 ; CHECK-NOT: addis {{[0-9]+}}, 2, _ZN6snappy8internalL10char_tableE@toc@ha
3030 ; CHECK: bctrl
1919 ; CHECK-NEXT: movq %rdi, %rbx
2020 ; CHECK-NEXT: orq $2097152, %r14 ## imm = 0x200000
2121 ; CHECK-NEXT: andl $15728640, %r14d ## imm = 0xF00000
22 ; CHECK-NEXT: jmp LBB0_1
2322 ; CHECK-NEXT: .p2align 4, 0x90
24 ; CHECK-NEXT: LBB0_3: ## %bb.i
25 ; CHECK-NEXT: ## in Loop: Header=BB0_1 Depth=1
26 ; CHECK-NEXT: movl 0, %eax
27 ; CHECK-NEXT: xorps %xmm0, %xmm0
28 ; CHECK-NEXT: cvtsi2ssq %rax, %xmm0
29 ; CHECK-NEXT: movl 4, %eax
30 ; CHECK-NEXT: xorps %xmm1, %xmm1
31 ; CHECK-NEXT: cvtsi2ssq %rax, %xmm1
32 ; CHECK-NEXT: movl 8, %eax
33 ; CHECK-NEXT: xorps %xmm2, %xmm2
34 ; CHECK-NEXT: cvtsi2ssq %rax, %xmm2
35 ; CHECK-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
36 ; CHECK-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
37 ; CHECK-NEXT: movaps %xmm0, 0
3823 ; CHECK-NEXT: LBB0_1: ## %bb4
3924 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
4025 ; CHECK-NEXT: xorl %eax, %eax
4934 ; CHECK-NEXT: ## in Loop: Header=BB0_1 Depth=1
5035 ; CHECK-NEXT: cmpq $1048576, %r14 ## imm = 0x100000
5136 ; CHECK-NEXT: jne LBB0_1
52 ; CHECK-NEXT: jmp LBB0_3
37 ; CHECK-NEXT: ## %bb.3: ## %bb.i
38 ; CHECK-NEXT: ## in Loop: Header=BB0_1 Depth=1
39 ; CHECK-NEXT: movl 0, %eax
40 ; CHECK-NEXT: xorps %xmm0, %xmm0
41 ; CHECK-NEXT: cvtsi2ssq %rax, %xmm0
42 ; CHECK-NEXT: movl 4, %eax
43 ; CHECK-NEXT: xorps %xmm1, %xmm1
44 ; CHECK-NEXT: cvtsi2ssq %rax, %xmm1
45 ; CHECK-NEXT: movl 8, %eax
46 ; CHECK-NEXT: xorps %xmm2, %xmm2
47 ; CHECK-NEXT: cvtsi2ssq %rax, %xmm2
48 ; CHECK-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
49 ; CHECK-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
50 ; CHECK-NEXT: movaps %xmm0, 0
51 ; CHECK-NEXT: jmp LBB0_1
5352 entry:
5453 br label %bb4
5554
3434 ; CHECK-NEXT: # %bb.1: # %for.cond5.preheader
3535 ; CHECK-NEXT: xorl %ebx, %ebx
3636 ; CHECK-NEXT: movb $1, %bpl
37 ; CHECK-NEXT: jmp .LBB2_2
3837 ; CHECK-NEXT: .p2align 4, 0x90
39 ; CHECK-NEXT: .LBB2_5: # %if.then
40 ; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1
41 ; CHECK-NEXT: callq scale
4238 ; CHECK-NEXT: .LBB2_2: # %for.cond5
4339 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
4440 ; CHECK-NEXT: testb %bl, %bl
5147 ; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1
5248 ; CHECK-NEXT: vucomisd {{\.LCPI.*}}, %xmm0
5349 ; CHECK-NEXT: jne .LBB2_5
54 ; CHECK-NEXT: jp .LBB2_5
50 ; CHECK-NEXT: jnp .LBB2_2
51 ; CHECK-NEXT: .LBB2_5: # %if.then
52 ; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1
53 ; CHECK-NEXT: callq scale
5554 ; CHECK-NEXT: jmp .LBB2_2
5655 ; CHECK-NEXT: .LBB2_6: # %for.end52
5756 ; CHECK-NEXT: addq $8, %rsp
1414 ; CHECK-NEXT: retq
1515 ; CHECK-NEXT: .LBB0_1: # %bb56
1616 ; CHECK-NEXT: xorl %eax, %eax
17 ; CHECK-NEXT: jmp .LBB0_2
1817 ; CHECK-NEXT: .p2align 4, 0x90
19 ; CHECK-NEXT: .LBB0_3: # %bb35
20 ; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
21 ; CHECK-NEXT: testb %al, %al
2218 ; CHECK-NEXT: .LBB0_2: # %bb33
2319 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
2420 ; CHECK-NEXT: testb %al, %al
2521 ; CHECK-NEXT: jne .LBB0_2
26 ; CHECK-NEXT: jmp .LBB0_3
22 ; CHECK-NEXT: # %bb.3: # %bb35
23 ; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
24 ; CHECK-NEXT: testb %al, %al
25 ; CHECK-NEXT: jmp .LBB0_2
2726 bb1:
2827 br i1 undef, label %L_10, label %L_10
2928
0 ; RUN: llc -mtriple=i686-linux < %s | FileCheck %s
1
2
3 define i32 @bar(i32 %count) {
4 ; Test checks that basic block backedge2 is not moved before header,
5 ; because it can't reduce taken branches.
6 ; Later backedge1 and backedge2 is rotated before loop header.
7 ; CHECK-LABEL: bar
8 ; CHECK: %.entry
9 ; CHECK: %.backedge1
10 ; CHECK: %.backedge2
11 ; CHECK: %.header
12 ; CHECK: %.exit
13 .entry:
14 %c = shl nsw i32 %count, 2
15 br label %.header
16
17 .header:
18 %val1 = call i32 @foo()
19 %cond1 = icmp sgt i32 %val1, 1
20 br i1 %cond1, label %.exit, label %.backedge1
21
22 .backedge1:
23 %val2 = call i32 @foo()
24 %cond2 = icmp sgt i32 %val2, 1
25 br i1 %cond2, label %.header, label %.backedge2
26
27 .backedge2:
28 %val3 = call i32 @foo()
29 br label %.header
30
31 .exit:
32 ret i32 %c
33 }
34
35 declare i32 @foo()
2121 ; CHECK: #DEBUG_VALUE: main:aa <- 0
2222 ; CHECK: #DEBUG_VALUE: main:aa <- $[[REG:[0-9a-z]+]]
2323 ; CHECK: jmp .LBB0_1
24 ; CHECK: .LBB0_3:
25 ; CHECK: #DEBUG_VALUE: main:aa <- $[[REG]]
26 ; CHECK: incl %[[REG]]
27 ; CHECK: #DEBUG_VALUE: main:aa <- $[[REG]]
24 ; CHECK: .LBB0_2:
25 ; CHECK: #DEBUG_VALUE: main:aa <- $[[REG]]
26 ; CHECK: jne .LBB0_1
27 ; CHECK: # %bb.{{.*}}:
28 ; CHECK: #DEBUG_VALUE: main:aa <- $[[REG]]
29 ; CHECK: incl %[[REG]]
30 ; CHECK: #DEBUG_VALUE: main:aa <- $[[REG]]
2831 ; CHECK: .LBB0_1:
2932 ; CHECK: #DEBUG_VALUE: main:aa <- $[[REG]]
30 ; CHECK: je .LBB0_4
33 ; CHECK: jne .LBB0_2
3134 ; CHECK: # %bb.{{.*}}:
32 ; CHECK: #DEBUG_VALUE: main:aa <- $[[REG]]
33 ; CHECK: jne .LBB0_1
34 ; CHECK: jmp .LBB0_3
35 ; CHECK: .LBB0_4:
3635 ; CHECK: #DEBUG_VALUE: main:aa <- $[[REG]]
3736 ; CHECK: retq
3837