llvm.org GIT mirror llvm / afa4f41
Don't delete empty preheaders in CodeGenPrepare if it would create a critical edge Presently, CodeGenPrepare deletes all nearly empty (only phi and branch) basic blocks. This pass can delete loop preheaders which frequently creates critical edges. A preheader can be a convenient place to spill registers to the stack. If the entrance to a loop body is a critical edge, then spills may occur in the loop body rather than immediately before it. This patch protects loop preheaders from deletion in CodeGenPrepare even if they are nearly empty. Since the patch alters the CFG, it affects a large number of test cases. In most cases, the changes are merely cosmetic (basic blocks have different names or instruction orders change slightly). I am somewhat concerned about the test/CodeGen/Mips/brdelayslot.ll test case. If the loop preheader is not deleted, then the MIPS backend does not take advantage of a branch delay slot. Consequently, I would like some close review by a MIPS expert. The patch also partially subsumes D16893 from George Burgess IV. George correctly notes that CodeGenPrepare does not actually preserve the dominator tree. I think the dominator tree was usually not valid when CodeGenPrepare ran, but I am using LoopInfo to mark preheaders, so the dominator tree is now always valid before CodeGenPrepare. Author: Tom Jablin (tjablin) Reviewers: hfinkel george.burgess.iv vkalintiris dsanders kbarton cycheng http://reviews.llvm.org/D16984 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@265397 91177308-0d34-0410-b5e6-96231b3b80d8 Chuang-Yu Cheng 3 years ago
16 changed file(s) with 59 addition(s) and 35 deletion(s). Raw diff Collapse all Expand all
1717 #include "llvm/ADT/SmallSet.h"
1818 #include "llvm/ADT/Statistic.h"
1919 #include "llvm/Analysis/InstructionSimplify.h"
20 #include "llvm/Analysis/LoopInfo.h"
2021 #include "llvm/Analysis/TargetLibraryInfo.h"
2122 #include "llvm/Analysis/TargetTransformInfo.h"
2223 #include "llvm/Analysis/ValueTracking.h"
110111 cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) "
111112 "optimization in CodeGenPrepare"));
112113
114 static cl::opt DisablePreheaderProtect(
115 "disable-preheader-prot", cl::Hidden, cl::init(false),
116 cl::desc("Disable protection against removing loop preheaders"));
117
113118 namespace {
114119 typedef SmallPtrSet SetOfInstrs;
115120 typedef PointerIntPair TypeIsSExt;
121126 const TargetLowering *TLI;
122127 const TargetTransformInfo *TTI;
123128 const TargetLibraryInfo *TLInfo;
129 const LoopInfo *LI;
124130
125131 /// As we scan instructions optimizing them, this is the next instruction
126132 /// to optimize. Transforms that can invalidate this should update it.
160166 // FIXME: When we can selectively preserve passes, preserve the domtree.
161167 AU.addRequired();
162168 AU.addRequired();
169 AU.addRequired();
163170 }
164171
165172 private:
217224 TLI = TM->getSubtargetImpl(F)->getTargetLowering();
218225 TLInfo = &getAnalysis().getTLI();
219226 TTI = &getAnalysis().getTTI(F);
227 LI = &getAnalysis().getLoopInfo();
220228 OptSize = F.optForSize();
221229
222230 /// This optimization identifies DIV instructions that can be
358366 /// edges in ways that are non-optimal for isel. Start by eliminating these
359367 /// blocks so we can split them the way we want them.
360368 bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
369 SmallPtrSet Preheaders;
370 SmallVector LoopList(LI->begin(), LI->end());
371 while (!LoopList.empty()) {
372 Loop *L = LoopList.pop_back_val();
373 LoopList.insert(LoopList.end(), L->begin(), L->end());
374 if (BasicBlock *Preheader = L->getLoopPreheader())
375 Preheaders.insert(Preheader);
376 }
377
361378 bool MadeChange = false;
362379 // Note that this intentionally skips the entry block.
363380 for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) {
389406
390407 if (!canMergeBlocks(BB, DestBB))
391408 continue;
409
410 // Do not delete loop preheaders if doing so would create a critical edge.
411 // Loop preheaders can be good locations to spill registers. If the
412 // preheader is deleted and we create a critical edge, registers may be
413 // spilled in the loop body instead.
414 if (!DisablePreheaderProtect && Preheaders.count(BB) &&
415 !(BB->getSinglePredecessor() && BB->getSinglePredecessor()->getSingleSuccessor()))
416 continue;
392417
393418 eliminateMostlyEmptyBlock(BB);
394419 MadeChange = true;
2828 ; Set the first argument to zero.
2929 ; CHECK-NEXT: mov w0, wzr
3030 ; CHECK-NEXT: bl _doSomething
31 ;
31 ;
3232 ; Without shrink-wrapping, epilogue is in the exit block.
3333 ; DISABLE: [[EXIT_LABEL]]:
3434 ; Epilogue code.
331331 ; DISABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]]
332332 ;
333333 ; Sum is merged with the returned register.
334 ; CHECK: mov [[SUM:w0]], wzr
335 ; CHECK-NEXT: add [[VA_BASE:x[0-9]+]], sp, #16
334 ; CHECK: add [[VA_BASE:x[0-9]+]], sp, #16
336335 ; CHECK-NEXT: str [[VA_BASE]], [sp, #8]
337336 ; CHECK-NEXT: cmp w1, #1
338337 ; CHECK-NEXT: b.lt [[IFEND_LABEL:LBB[0-9_]+]]
338 ; CHECK: mov [[SUM:w0]], wzr
339339 ;
340340 ; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body
341341 ; CHECK: ldr [[VA_ADDR:x[0-9]+]], [sp, #8]
346346 ; CHECK-NEXT: sub w1, w1, #1
347347 ; CHECK-NEXT: cbnz w1, [[LOOP_LABEL]]
348348 ;
349 ; DISABLE-NEXT: b [[IFEND_LABEL]]
349 ; DISABLE-NEXT: b
350350 ; DISABLE: [[ELSE_LABEL]]: ; %if.else
351351 ; DISABLE: lsl w0, w1, #1
352 ;
353 ; ENABLE: [[ELSE_LABEL]]: ; %if.else
354 ; ENABLE: lsl w0, w1, #1
355 ; ENABLE-NEXT: ret
352356 ;
353357 ; CHECK: [[IFEND_LABEL]]:
354358 ; Epilogue code.
355359 ; CHECK: add sp, sp, #16
356360 ; CHECK-NEXT: ret
357 ;
358 ; ENABLE: [[ELSE_LABEL]]: ; %if.else
359 ; ENABLE: lsl w0, w1, #1
360 ; ENABLE-NEXT: ret
361361 define i32 @variadicFunc(i32 %cond, i32 %count, ...) #0 {
362362 entry:
363363 %ap = alloca i8*, align 8
1111 br i1 %0, label %bb2, label %bb
1212
1313 bb:
14 ; CHECK: LBB0_1:
15 ; CHECK: bne LBB0_1
16 ; CHECK-NOT: b LBB0_1
14 ; CHECK: LBB0_2:
15 ; CHECK: bne LBB0_2
16 ; CHECK-NOT: b LBB0_2
1717 ; CHECK: bx lr
1818 %list_addr.05 = phi %struct.list_head* [ %2, %bb ], [ %list, %entry ]
1919 %next.04 = phi %struct.list_head* [ %list_addr.05, %bb ], [ null, %entry ]
7474
7575 ; CHECK-LABEL: __Z4foo1c:
7676 ; CHECK: blx __Znwm
77 ; CHECK: {{.*}}@ %entry.do.body.i.i.i_crit_edge
77 ; CHECK: {{.*}}@ %do.body.i.i.i.preheader
7878 ; CHECK: str r0, [sp, [[OFFSET:#[0-9]+]]]
7979 ; CHECK: {{.*}}@ %do.body.i.i.i
8080 ; CHECK: ldr [[R0:r[0-9]+]], [sp, [[OFFSET]]]
44 ; RUN: llc -march=mipsel -disable-mips-df-forward-search=false \
55 ; RUN: -relocation-model=static < %s | FileCheck %s -check-prefix=FORWARD
66 ; RUN: llc -march=mipsel -disable-mips-df-backward-search \
7 ; RUN: -disable-mips-df-succbb-search=false < %s | \
7 ; RUN: -disable-mips-df-succbb-search=false -disable-preheader-prot=true < %s | \
88 ; RUN: FileCheck %s -check-prefix=SUCCBB
99
1010 define void @foo1() nounwind {
1111 entry:
12 ; Default: jalr
13 ; Default-NOT: nop
14 ; Default: jr
12 ; Default: jalr
13 ; Default-NOT: nop
14 ; Default: jr
1515 ; Default-NOT: nop
1616 ; Default: .end
17 ; None: jalr
18 ; None: nop
19 ; None: jr
17 ; None: jalr
18 ; None: nop
19 ; None: jr
2020 ; None: nop
2121 ; None: .end
2222
1010 ; CHECK-LABEL: readLumaCoeff8x8_CABAC
1111
1212 ; The check for first "addiu" instruction is added so that we can match the correct "b" instruction.
13 ; CHECK: addiu ${{[0-9]+}}, $zero, -1
13 ; CHECK: andi
1414 ; CHECK: b $[[BB0:BB[0-9_]+]]
15 ; CHECK-NEXT: addiu ${{[0-9]+}}, $zero, 0
15 ; CHECK-NEXT: sll
1616
1717 ; Check that at the start of a fallthrough block there is a instruction that writes to $1.
18 ; CHECK-NEXT: {{BB[0-9_#]+}}:
18 ; CHECK-NEXT: {{BB[0-9_#]+}}:
1919 ; CHECK-NEXT: lw $[[R1:[0-9]+]], %got(assignSE2partition)($[[R2:[0-9]+]])
2020 ; CHECK-NEXT: sll $1, $[[R0:[0-9]+]], 4
2121
1818 ; reusing the pre-addition register later, or the post-addition one. Currently,
1919 ; it does the latter, so we check:
2020
21 ; CHECK: # %while.body85.i
21 ; CHECK: # %while.body85.i{{$}}
2222 ; CHECK-NOT: # %
2323 ; CHECK-NOT: add
2424 ; CHECK: movl %[[POSTR:e[abcdxi]+]], %[[PRER:e[abcdxi]+]]
602602 ;
603603 ; CHECK: test_unnatural_cfg_backwards_inner_loop
604604 ; CHECK: %entry
605 ; CHECK: [[BODY:# BB#[0-9]+]]:
606605 ; CHECK: %loop2b
607606 ; CHECK: %loop1
608 ; CHECK: %loop2a
609607
610608 entry:
611609 br i1 undef, label %loop2a, label %body
6363 declare double @llvm.sqrt.f64(double)
6464
6565 ; SSE-LABEL: loopdep1
66 ; SSE: for.body
66 ; SSE: for.body{{$}}
6767 ;
6868 ; This loop contains two cvtsi2ss instructions that update the same xmm
6969 ; register. Verify that the execution dependency fix pass breaks those
138138
139139 ; This loop contains a cvtsi2sd instruction that has a loop-carried
140140 ; false dependency on an xmm that is modified by other scalar instructions
141 ; that follow it in the loop. Additionally, the source of convert is a
141 ; that follow it in the loop. Additionally, the source of convert is a
142142 ; memory operand. Verify the execution dependency fix pass breaks this
143143 ; dependency by inserting a xor before the convert.
144144 @x = common global [1024 x double] zeroinitializer, align 16
1010 ; CHECK-NEXT: incq %rax
1111
1212
13 ; ATOM: movsd .LCPI0_0(%rip), %xmm0
1314 ; ATOM: xorl %eax, %eax
14 ; ATOM: movsd .LCPI0_0(%rip), %xmm0
1515 ; ATOM: align
1616 ; ATOM-NEXT: BB0_2:
1717 ; ATOM-NEXT: movsd A(,%rax,8)
0 ; REQUIRES: asserts
1 ; RUN: llc < %s -march=x86 -stats 2>&1 | grep "Number of blocks eliminated" | grep 6
1 ; RUN: llc < %s -disable-preheader-prot=true -march=x86 -stats 2>&1 | grep "Number of blocks eliminated" | grep 6
2 ; RUN: llc < %s -disable-preheader-prot=false -march=x86 -stats 2>&1 | grep "Number of blocks eliminated" | grep 3
23 ; PR1296
34
45 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
1212
1313 ifthen: ; preds = %entry
1414 ret i32 0
15 ; CHECK: forbody
15 ; CHECK: forbody{{$}}
1616 ; CHECK-NOT: mov
1717 forbody: ; preds = %forbody, %forcond.preheader
1818 %indvar = phi i32 [ 0, %forcond.preheader ], [ %divisor.02, %forbody ] ; [#uses=3]
2020 ; CHECK: je
2121
2222 ; There should be no moves required in the for loop body.
23 ; CHECK: %forbody
23 ; CHECK: %forbody{{$}}
2424 ; CHECK-NOT: mov
2525 ; CHECK: jbe
2626
3232
3333 define void @pr26232(i64 %a) {
3434 ; KNL-32-LABEL: pr26232:
35 ; KNL-32: # BB#0: # %for_test11.preheader
35 ; KNL-32: # BB#0: # %for_loop599.preheader
3636 ; KNL-32-NEXT: pushl %esi
3737 ; KNL-32-NEXT: .Ltmp0:
3838 ; KNL-32-NEXT: .cfi_def_cfa_offset 8
None ; RUN: llc -disable-machine-licm -machine-sink-bfi=true -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_BFI
1 ; RUN: llc -disable-machine-licm -machine-sink-bfi=false -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_NOBFI
0 ; RUN: llc -disable-preheader-prot=true -disable-machine-licm -machine-sink-bfi=true -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_BFI
1 ; RUN: llc -disable-preheader-prot=true -disable-machine-licm -machine-sink-bfi=false -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_NOBFI
22
33 ; Test that by changing BlockFrequencyInfo we change the order in which
44 ; machine-sink looks for sucessor blocks. By not using BFI, both G and B
101101 ; CHECK-NEXT: %for.body3.us.i
102102 ; CHECK-NEXT: Inner Loop
103103 ; CHECK: testb
104 ; CHECK: jne
104 ; CHECK: je
105105 ; CHECK: jmp
106106 define fastcc void @test3(double* nocapture %u) nounwind uwtable ssp {
107107 entry: