llvm.org GIT mirror llvm / c3f507f
Re-apply r124518 with fix. Watch out for invalidated iterator. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@124526 91177308-0d34-0410-b5e6-96231b3b80d8 Evan Cheng 8 years ago
13 changed file(s) with 229 addition(s) and 76 deletion(s). Raw diff Collapse all Expand all
2121
2222 namespace llvm {
2323
24 class AliasAnalysis;
2425 class Instruction;
2526 class Pass;
26 class AliasAnalysis;
27 class ReturnInst;
2728
2829 /// DeleteDeadBlock - Delete the specified block, which must have no
2930 /// predecessors.
170171 BasicBlock *SplitBlockPredecessors(BasicBlock *BB, BasicBlock *const *Preds,
171172 unsigned NumPreds, const char *Suffix,
172173 Pass *P = 0);
173
174
175 /// FoldReturnIntoUncondBranch - This method duplicates the specified return
176 /// instruction into a predecessor which ends in an unconditional branch. If
177 /// the return instruction returns a value defined by a PHI, propagate the
178 /// right value into the return. It returns the new return instruction in the
179 /// predecessor.
180 ReturnInst *FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
181 BasicBlock *Pred);
182
174183 } // End llvm namespace
175184
176185 #endif
464464 MaxDuplicateCount = TailDuplicateSize;
465465
466466 if (PreRegAlloc) {
467 // Pre-regalloc tail duplication hurts compile time and doesn't help
468 // much except for indirect branches.
469 if (TailBB->empty() || !TailBB->back().getDesc().isIndirectBranch())
467 if (TailBB->empty())
468 return false;
469 const TargetInstrDesc &TID = TailBB->back().getDesc();
470 // Pre-regalloc tail duplication hurts compile time and doesn't help
471 // much except for indirect branches and returns.
472 if (!TID.isIndirectBranch() && !TID.isReturn())
470473 return false;
471474 // If the target has hardware branch prediction that can handle indirect
472475 // branches, duplicating them can often make them predictable when there
501504 }
502505 // Heuristically, don't tail-duplicate calls if it would expand code size,
503506 // as it's less likely to be worth the extra cost.
504 if (InstrCount > 1 && HasCall)
507 if (InstrCount > 1 && (PreRegAlloc && HasCall))
505508 return false;
506509
507510 DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n');
5151
5252 #define DEBUG_TYPE "tailcallelim"
5353 #include "llvm/Transforms/Scalar.h"
54 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
5455 #include "llvm/Transforms/Utils/Local.h"
5556 #include "llvm/Constants.h"
5657 #include "llvm/DerivedTypes.h"
5758 #include "llvm/Function.h"
5859 #include "llvm/Instructions.h"
60 #include "llvm/IntrinsicInst.h"
5961 #include "llvm/Pass.h"
6062 #include "llvm/Analysis/CaptureTracking.h"
6163 #include "llvm/Analysis/InlineCost.h"
6365 #include "llvm/Analysis/Loads.h"
6466 #include "llvm/Support/CallSite.h"
6567 #include "llvm/Support/CFG.h"
68 #include "llvm/Support/Debug.h"
6669 #include "llvm/ADT/Statistic.h"
70 #include "llvm/ADT/STLExtras.h"
6771 using namespace llvm;
6872
6973 STATISTIC(NumEliminated, "Number of tail calls removed");
7983 virtual bool runOnFunction(Function &F);
8084
8185 private:
86 CallInst *FindTRECandidate(Instruction *I,
87 bool CannotTailCallElimCallsMarkedTail);
88 bool EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
89 BasicBlock *&OldEntry,
90 bool &TailCallsAreMarkedTail,
91 SmallVector &ArgumentPHIs,
92 bool CannotTailCallElimCallsMarkedTail);
93 bool FoldReturnAndProcessPred(BasicBlock *BB,
94 ReturnInst *Ret, BasicBlock *&OldEntry,
95 bool &TailCallsAreMarkedTail,
96 SmallVector &ArgumentPHIs,
97 bool CannotTailCallElimCallsMarkedTail);
8298 bool ProcessReturningBlock(ReturnInst *RI, BasicBlock *&OldEntry,
8399 bool &TailCallsAreMarkedTail,
84100 SmallVector &ArgumentPHIs,
135151 bool TailCallsAreMarkedTail = false;
136152 SmallVector ArgumentPHIs;
137153 bool MadeChange = false;
138
139154 bool FunctionContainsEscapingAllocas = false;
140155
141156 // CannotTCETailMarkedCall - If true, we cannot perform TCE on tail calls
162177 return false;
163178
164179 // Second pass, change any tail calls to loops.
165 for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
166 if (ReturnInst *Ret = dyn_cast(BB->getTerminator()))
167 MadeChange |= ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail,
180 for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
181 if (ReturnInst *Ret = dyn_cast(BB->getTerminator())) {
182 bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail,
168183 ArgumentPHIs,CannotTCETailMarkedCall);
184 if (!Change && BB->getFirstNonPHIOrDbg() == Ret)
185 Change = FoldReturnAndProcessPred(BB, Ret, OldEntry,
186 TailCallsAreMarkedTail, ArgumentPHIs,
187 CannotTCETailMarkedCall);
188 MadeChange |= Change;
189 }
190 }
169191
170192 // If we eliminated any tail recursions, it's possible that we inserted some
171193 // silly PHI nodes which just merge an initial value (the incoming operand)
324346 return getCommonReturnValue(cast(I->use_back()), CI);
325347 }
326348
327 bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
328 bool &TailCallsAreMarkedTail,
329 SmallVector &ArgumentPHIs,
330 bool CannotTailCallElimCallsMarkedTail) {
331 BasicBlock *BB = Ret->getParent();
349 static Instruction *FirstNonDbg(BasicBlock::iterator I) {
350 while (isa(I))
351 ++I;
352 return &*I;
353 }
354
355 CallInst*
356 TailCallElim::FindTRECandidate(Instruction *TI,
357 bool CannotTailCallElimCallsMarkedTail) {
358 BasicBlock *BB = TI->getParent();
332359 Function *F = BB->getParent();
333360
334 if (&BB->front() == Ret) // Make sure there is something before the ret...
335 return false;
361 if (&BB->front() == TI) // Make sure there is something before the terminator.
362 return 0;
336363
337364 // Scan backwards from the return, checking to see if there is a tail call in
338365 // this block. If so, set CI to it.
339 CallInst *CI;
340 BasicBlock::iterator BBI = Ret;
341 while (1) {
366 CallInst *CI = 0;
367 BasicBlock::iterator BBI = TI;
368 while (true) {
342369 CI = dyn_cast(BBI);
343370 if (CI && CI->getCalledFunction() == F)
344371 break;
345372
346373 if (BBI == BB->begin())
347 return false; // Didn't find a potential tail call.
374 return 0; // Didn't find a potential tail call.
348375 --BBI;
349376 }
350377
351378 // If this call is marked as a tail call, and if there are dynamic allocas in
352379 // the function, we cannot perform this optimization.
353380 if (CI->isTailCall() && CannotTailCallElimCallsMarkedTail)
354 return false;
381 return 0;
355382
356383 // As a special case, detect code like this:
357384 // double fabs(double f) { return __builtin_fabs(f); } // a 'fabs' call
358385 // and disable this xform in this case, because the code generator will
359386 // lower the call to fabs into inline code.
360387 if (BB == &F->getEntryBlock() &&
361 &BB->front() == CI && &*++BB->begin() == Ret &&
388 FirstNonDbg(BB->front()) == CI &&
389 FirstNonDbg(llvm::next(BB->begin())) == TI &&
362390 callIsSmall(F)) {
363391 // A single-block function with just a call and a return. Check that
364392 // the arguments match.
369397 for (; I != E && FI != FE; ++I, ++FI)
370398 if (*I != &*FI) break;
371399 if (I == E && FI == FE)
372 return false;
373 }
374
400 return 0;
401 }
402
403 return CI;
404 }
405
406 bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
407 BasicBlock *&OldEntry,
408 bool &TailCallsAreMarkedTail,
409 SmallVector &ArgumentPHIs,
410 bool CannotTailCallElimCallsMarkedTail) {
375411 // If we are introducing accumulator recursion to eliminate operations after
376412 // the call instruction that are both associative and commutative, the initial
377413 // value for the accumulator is placed in this variable. If this value is set
389425 // tail call if all of the instructions between the call and the return are
390426 // movable to above the call itself, leaving the call next to the return.
391427 // Check that this is the case now.
392 for (BBI = CI, ++BBI; &*BBI != Ret; ++BBI) {
428 BasicBlock::iterator BBI = CI;
429 for (++BBI; &*BBI != Ret; ++BBI) {
393430 if (CanMoveAboveCall(BBI, CI)) continue;
394431
395432 // If we can't move the instruction above the call, it might be because it
426463 return false;
427464 }
428465
466 BasicBlock *BB = Ret->getParent();
467 Function *F = BB->getParent();
468
429469 // OK! We can transform this tail call. If this is the first one found,
430470 // create the new entry block, allowing us to branch back to the old entry.
431471 if (OldEntry == 0) {
535575 ++NumEliminated;
536576 return true;
537577 }
578
579 bool TailCallElim::FoldReturnAndProcessPred(BasicBlock *BB,
580 ReturnInst *Ret, BasicBlock *&OldEntry,
581 bool &TailCallsAreMarkedTail,
582 SmallVector &ArgumentPHIs,
583 bool CannotTailCallElimCallsMarkedTail) {
584 bool Change = false;
585
586 // If the return block contains nothing but the return and PHI's,
587 // there might be an opportunity to duplicate the return in its
588 // predecessors and perform TRC there. Look for predecessors that end
589 // in unconditional branch and recursive call(s).
590 SmallVector UncondBranchPreds;
591 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
592 BasicBlock *Pred = *PI;
593 TerminatorInst *PTI = Pred->getTerminator();
594 if (BranchInst *BI = dyn_cast(PTI))
595 if (BI->isUnconditional())
596 UncondBranchPreds.push_back(BI);
597 }
598
599 while (!UncondBranchPreds.empty()) {
600 BranchInst *BI = UncondBranchPreds.pop_back_val();
601 BasicBlock *Pred = BI->getParent();
602 if (CallInst *CI = FindTRECandidate(BI, CannotTailCallElimCallsMarkedTail)){
603 DEBUG(dbgs() << "FOLDING: " << *BB
604 << "INTO UNCOND BRANCH PRED: " << *Pred);
605 EliminateRecursiveTailCall(CI, FoldReturnIntoUncondBranch(Ret, BB, Pred),
606 OldEntry, TailCallsAreMarkedTail, ArgumentPHIs,
607 CannotTailCallElimCallsMarkedTail);
608 Change = true;
609 }
610 }
611
612 return Change;
613 }
614
615 bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
616 bool &TailCallsAreMarkedTail,
617 SmallVector &ArgumentPHIs,
618 bool CannotTailCallElimCallsMarkedTail) {
619 CallInst *CI = FindTRECandidate(Ret, CannotTailCallElimCallsMarkedTail);
620 if (!CI)
621 return false;
622
623 return EliminateRecursiveTailCall(CI, Ret, OldEntry, TailCallsAreMarkedTail,
624 ArgumentPHIs,
625 CannotTailCallElimCallsMarkedTail);
626 }
508508 // Go up one level.
509509 InStack.erase(VisitStack.pop_back_val().first);
510510 }
511 } while (!VisitStack.empty());
512
513
514 }
511 } while (!VisitStack.empty());
512 }
513
514 /// FoldReturnIntoUncondBranch - This method duplicates the specified return
515 /// instruction into a predecessor which ends in an unconditional branch. If
516 /// the return instruction returns a value defined by a PHI, propagate the
517 /// right value into the return. It returns the new return instruction in the
518 /// predecessor.
519 ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
520 BasicBlock *Pred) {
521 Instruction *UncondBranch = Pred->getTerminator();
522 // Clone the return and add it to the end of the predecessor.
523 Instruction *NewRet = RI->clone();
524 Pred->getInstList().push_back(NewRet);
525
526 // If the return instruction returns a value, and if the value was a
527 // PHI node in "BB", propagate the right value into the return.
528 for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end();
529 i != e; ++i)
530 if (PHINode *PN = dyn_cast(*i))
531 if (PN->getParent() == BB)
532 *i = PN->getIncomingValueForBlock(Pred);
533
534 // Update any PHI nodes in the returning block to realize that we no
535 // longer branch to them.
536 BB->removePredecessor(Pred);
537 UncondBranch->eraseFromParent();
538 return cast(NewRet);
539 }
2727 #include "llvm/ADT/Statistic.h"
2828 #include "llvm/ADT/STLExtras.h"
2929 #include "llvm/Support/CFG.h"
30 #include "llvm/Support/CommandLine.h"
3031 #include "llvm/Support/ConstantRange.h"
3132 #include "llvm/Support/Debug.h"
3233 #include "llvm/Support/raw_ostream.h"
3435 #include
3536 #include
3637 using namespace llvm;
38
39 static cl::opt
40 DupRet("simplifycfg-dup-ret", cl::Hidden, cl::init(false),
41 cl::desc("Duplicate return instructions into unconditional branches"));
3742
3843 STATISTIC(NumSpeculations, "Number of speculative executed instructions");
3944
20262031 }
20272032
20282033 // If we found some, do the transformation!
2029 if (!UncondBranchPreds.empty()) {
2034 if (!UncondBranchPreds.empty() && DupRet) {
20302035 while (!UncondBranchPreds.empty()) {
20312036 BasicBlock *Pred = UncondBranchPreds.pop_back_val();
20322037 DEBUG(dbgs() << "FOLDING: " << *BB
20332038 << "INTO UNCOND BRANCH PRED: " << *Pred);
2034 Instruction *UncondBranch = Pred->getTerminator();
2035 // Clone the return and add it to the end of the predecessor.
2036 Instruction *NewRet = RI->clone();
2037 Pred->getInstList().push_back(NewRet);
2038
2039 // If the return instruction returns a value, and if the value was a
2040 // PHI node in "BB", propagate the right value into the return.
2041 for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end();
2042 i != e; ++i)
2043 if (PHINode *PN = dyn_cast(*i))
2044 if (PN->getParent() == BB)
2045 *i = PN->getIncomingValueForBlock(Pred);
2046
2047 // Update any PHI nodes in the returning block to realize that we no
2048 // longer branch to them.
2049 BB->removePredecessor(Pred);
2050 UncondBranch->eraseFromParent();
2039 (void)FoldReturnIntoUncondBranch(RI, BB, Pred);
20512040 }
20522041
20532042 // If we eliminated all predecessors of the block, delete the block now.
6969
7070 ; Same as slightly_more_involved, but block_a is now a CFG diamond with
7171 ; fallthrough edges which should be preserved.
72 ; "callq block_a_merge_func" is tail duped.
7273
7374 ; CHECK: yet_more_involved:
7475 ; CHECK: jmp .LBB2_1
7778 ; CHECK-NEXT: callq bar99
7879 ; CHECK-NEXT: callq get
7980 ; CHECK-NEXT: cmpl $2999, %eax
80 ; CHECK-NEXT: jg .LBB2_6
81 ; CHECK-NEXT: jle .LBB2_5
82 ; CHECK-NEXT: callq block_a_false_func
83 ; CHECK-NEXT: callq block_a_merge_func
84 ; CHECK-NEXT: jmp .LBB2_1
85 ; CHECK-NEXT: .LBB2_5:
8186 ; CHECK-NEXT: callq block_a_true_func
82 ; CHECK-NEXT: jmp .LBB2_7
83 ; CHECK-NEXT: .LBB2_6:
84 ; CHECK-NEXT: callq block_a_false_func
85 ; CHECK-NEXT: .LBB2_7:
8687 ; CHECK-NEXT: callq block_a_merge_func
8788 ; CHECK-NEXT: .LBB2_1:
8889 ; CHECK-NEXT: callq body
None ; RUN: opt < %s -jump-threading -mem2reg -instcombine -simplifycfg -S | grep {ret i32 %v1}
1 ; There should be no uncond branches left.
2 ; RUN: opt < %s -jump-threading -mem2reg -instcombine -simplifycfg -S | not grep {br label}
0 ; RUN: opt < %s -jump-threading -mem2reg -instcombine -simplifycfg -S | FileCheck %s
31
42 declare i32 @f1()
53 declare i32 @f2()
64 declare void @f3()
75
86 define i32 @test(i1 %cond, i1 %cond2, i1 %cond3) {
7 ; CHECK: test
98 br i1 %cond, label %T1, label %F1
109
10 ; CHECK-NOT: T1:
1111 T1:
1212 %v1 = call i32 @f1()
1313 br label %Merge
1717 br label %Merge
1818
1919 Merge:
20 ; CHECK: Merge:
21 ; CHECK: %v1 = call i32 @f1()
22 ; CHECK-NEXT: %D = and i1 %cond2, %cond3
23 ; CHECK-NEXT: br i1 %D
2024 %A = phi i1 [true, %T1], [false, %F1]
2125 %B = phi i32 [%v1, %T1], [%v2, %F1]
2226 %C = and i1 %A, %cond2
None ; RUN: opt < %s -jump-threading -mem2reg -instcombine -simplifycfg -S | grep {ret i32 %v1}
1 ; There should be no uncond branches left.
2 ; RUN: opt < %s -jump-threading -mem2reg -instcombine -simplifycfg -S | not grep {br label}
0 ; RUN: opt < %s -jump-threading -mem2reg -instcombine -simplifycfg -S | FileCheck %s
31
42 declare i32 @f1()
53 declare i32 @f2()
64 declare void @f3()
75
86 define i32 @test(i1 %cond, i1 %cond2) {
7 ; CHECK: test
98 br i1 %cond, label %T1, label %F1
109
10 ; CHECK-NOT: T1
1111 T1:
1212 %v1 = call i32 @f1()
1313 br label %Merge
1717 br label %Merge
1818
1919 Merge:
20 ; CHECK: Merge:
21 ; CHECK: %v1 = call i32 @f1()
22 ; CHECK-NEXT: br i1 %cond2
2023 %A = phi i1 [true, %T1], [false, %F1]
2124 %B = phi i32 [%v1, %T1], [%v2, %F1]
2225 %C = and i1 %A, %cond2
None ; RUN: opt < %s -jump-threading -simplifycfg -S | grep {ret i32 1}
0 ; RUN: opt < %s -jump-threading -S | FileCheck %s
11 ; rdar://6402033
22
33 ; Test that we can thread through the block with the partially redundant load (%2).
55 target triple = "i386-apple-darwin7"
66
77 define i32 @foo(i32* %P) nounwind {
8 ; CHECK: foo
89 entry:
910 %0 = tail call i32 (...)* @f1() nounwind ; [#uses=1]
1011 %1 = icmp eq i32 %0, 0 ; [#uses=1]
1112 br i1 %1, label %bb1, label %bb
1213
1314 bb: ; preds = %entry
15 ; CHECK: bb1.thread:
16 ; CHECK: store
17 ; CHECK: br label %bb3
1418 store i32 42, i32* %P, align 4
1519 br label %bb1
1620
2529 ret i32 %res.0
2630
2731 bb3: ; preds = %bb1
32 ; CHECK: bb3:
33 ; CHECK: %res.01 = phi i32 [ 1, %bb1.thread ], [ 0, %bb1 ]
34 ; CHECK: ret i32 %res.01
2835 ret i32 %res.0
2936 }
3037
77 ; CHECK: i64 2, label
88 ; CHECK: i64 3, label
99 ; CHECK: i64 4, label
10 ; CHECK-NOT: br
1110 ; CHECK: }
1211
1312 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
2424 }
2525
2626
27 define void @test4() {
28 br label %return
29 return:
30 ret void
31 ; CHECK: @test4
32 ; CHECK-NEXT: ret void
33 }
34 @test4g = global i8* blockaddress(@test4, %return)
35
36
3727 ; PR5795
3828 define void @test5(i32 %A) {
3929 switch i32 %A, label %return [
146146 ; CHECK: i32 16, label %UnifiedReturnBlock
147147 ; CHECK: i32 17, label %UnifiedReturnBlock
148148 ; CHECK: i32 18, label %UnifiedReturnBlock
149 ; CHECK: i32 19, label %switch.edge
149 ; CHECK: i32 19, label %UnifiedReturnBlock
150150 ; CHECK: ]
151151 }
152152
440440 ; CHECK-NOT: switch
441441 ; CHECK: ret void
442442 }
443
444 ; PR8675
445 ; rdar://5134905
446 define zeroext i1 @test16(i32 %x) nounwind {
447 entry:
448 ; CHECK: @test16
449 ; CHECK: switch i32 %x, label %lor.rhs [
450 ; CHECK: i32 1, label %lor.end
451 ; CHECK: i32 2, label %lor.end
452 ; CHECK: i32 3, label %lor.end
453 ; CHECK: ]
454 %cmp.i = icmp eq i32 %x, 1
455 br i1 %cmp.i, label %lor.end, label %lor.lhs.false
456
457 lor.lhs.false:
458 %cmp.i2 = icmp eq i32 %x, 2
459 br i1 %cmp.i2, label %lor.end, label %lor.rhs
460
461 lor.rhs:
462 %cmp.i1 = icmp eq i32 %x, 3
463 br label %lor.end
464
465 lor.end:
466 %0 = phi i1 [ true, %lor.lhs.false ], [ true, %entry ], [ %cmp.i1, %lor.rhs ]
467 ret i1 %0
468 }
None ; RUN: opt < %s -simplifycfg -S | not grep br
1
0 ; RUN: opt < %s -simplifycfg -S | FileCheck %s
21
32 %llvm.dbg.anchor.type = type { i32, i32 }
43 %llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8* }
1211
1312 declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind
1413
15 define i1 @_ZN4llvm11SetCondInst7classofEPKNS_11InstructionE({ i32, i32 }* %I) {
14 define i1 @t({ i32, i32 }* %I) {
15 ; CHECK: t
16 ; CHECK: switch i32 %tmp.2.i, label %shortcirc_next.4 [
17 ; CHECK: i32 14, label %UnifiedReturnBlock
18 ; CHECK: i32 15, label %UnifiedReturnBlock
19 ; CHECK: i32 16, label %UnifiedReturnBlock
20 ; CHECK: i32 17, label %UnifiedReturnBlock
21 ; CHECK: i32 18, label %UnifiedReturnBlock
22 ; CHECK: i32 19, label %UnifiedReturnBlock
23 ; CHECK: ]
1624 entry:
1725 %tmp.1.i = getelementptr { i32, i32 }* %I, i64 0, i32 1 ; [#uses=1]
1826 %tmp.2.i = load i32* %tmp.1.i ; [#uses=6]