llvm.org GIT mirror llvm / b0a42fd
Revert r124518. It broke Linux self-host. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@124522 91177308-0d34-0410-b5e6-96231b3b80d8 Evan Cheng 8 years ago
13 changed file(s) with 76 addition(s) and 226 deletion(s). Raw diff Collapse all Expand all
2121
2222 namespace llvm {
2323
24 class AliasAnalysis;
2524 class Instruction;
2625 class Pass;
27 class ReturnInst;
26 class AliasAnalysis;
2827
2928 /// DeleteDeadBlock - Delete the specified block, which must have no
3029 /// predecessors.
171170 BasicBlock *SplitBlockPredecessors(BasicBlock *BB, BasicBlock *const *Preds,
172171 unsigned NumPreds, const char *Suffix,
173172 Pass *P = 0);
174
175 /// FoldReturnIntoUncondBranch - This method duplicates the specified return
176 /// instruction into a predecessor which ends in an unconditional branch. If
177 /// the return instruction returns a value defined by a PHI, propagate the
178 /// right value into the return. It returns the new return instruction in the
179 /// predecessor.
180 ReturnInst *FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
181 BasicBlock *Pred);
182
173
183174 } // End llvm namespace
184175
185176 #endif
464464 MaxDuplicateCount = TailDuplicateSize;
465465
466466 if (PreRegAlloc) {
467 if (TailBB->empty())
468 return false;
469 const TargetInstrDesc &TID = TailBB->back().getDesc();
470 // Pre-regalloc tail duplication hurts compile time and doesn't help
471 // much except for indirect branches and returns.
472 if (!TID.isIndirectBranch() && !TID.isReturn())
467 // Pre-regalloc tail duplication hurts compile time and doesn't help
468 // much except for indirect branches.
469 if (TailBB->empty() || !TailBB->back().getDesc().isIndirectBranch())
473470 return false;
474471 // If the target has hardware branch prediction that can handle indirect
475472 // branches, duplicating them can often make them predictable when there
504501 }
505502 // Heuristically, don't tail-duplicate calls if it would expand code size,
506503 // as it's less likely to be worth the extra cost.
507 if (InstrCount > 1 && (PreRegAlloc && HasCall))
504 if (InstrCount > 1 && HasCall)
508505 return false;
509506
510507 DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n');
5151
5252 #define DEBUG_TYPE "tailcallelim"
5353 #include "llvm/Transforms/Scalar.h"
54 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
5554 #include "llvm/Transforms/Utils/Local.h"
5655 #include "llvm/Constants.h"
5756 #include "llvm/DerivedTypes.h"
5857 #include "llvm/Function.h"
5958 #include "llvm/Instructions.h"
60 #include "llvm/IntrinsicInst.h"
6159 #include "llvm/Pass.h"
6260 #include "llvm/Analysis/CaptureTracking.h"
6361 #include "llvm/Analysis/InlineCost.h"
6563 #include "llvm/Analysis/Loads.h"
6664 #include "llvm/Support/CallSite.h"
6765 #include "llvm/Support/CFG.h"
68 #include "llvm/Support/Debug.h"
6966 #include "llvm/ADT/Statistic.h"
70 #include "llvm/ADT/STLExtras.h"
7167 using namespace llvm;
7268
7369 STATISTIC(NumEliminated, "Number of tail calls removed");
8379 virtual bool runOnFunction(Function &F);
8480
8581 private:
86 CallInst *FindTRECandidate(Instruction *I,
87 bool CannotTailCallElimCallsMarkedTail);
88 bool EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
89 BasicBlock *&OldEntry,
90 bool &TailCallsAreMarkedTail,
91 SmallVector &ArgumentPHIs,
92 bool CannotTailCallElimCallsMarkedTail);
93 bool FoldReturnAndProcessPred(BasicBlock *BB,
94 ReturnInst *Ret, BasicBlock *&OldEntry,
95 bool &TailCallsAreMarkedTail,
96 SmallVector &ArgumentPHIs,
97 bool CannotTailCallElimCallsMarkedTail);
9882 bool ProcessReturningBlock(ReturnInst *RI, BasicBlock *&OldEntry,
9983 bool &TailCallsAreMarkedTail,
10084 SmallVector &ArgumentPHIs,
151135 bool TailCallsAreMarkedTail = false;
152136 SmallVector ArgumentPHIs;
153137 bool MadeChange = false;
138
154139 bool FunctionContainsEscapingAllocas = false;
155140
156141 // CannotTCETailMarkedCall - If true, we cannot perform TCE on tail calls
177162 return false;
178163
179164 // Second pass, change any tail calls to loops.
180 for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
181 if (ReturnInst *Ret = dyn_cast(BB->getTerminator())) {
182 bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail,
165 for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
166 if (ReturnInst *Ret = dyn_cast(BB->getTerminator()))
167 MadeChange |= ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail,
183168 ArgumentPHIs,CannotTCETailMarkedCall);
184 if (!Change && BB->getFirstNonPHIOrDbg() == Ret)
185 Change = FoldReturnAndProcessPred(BB, Ret, OldEntry,
186 TailCallsAreMarkedTail, ArgumentPHIs,
187 CannotTCETailMarkedCall);
188 MadeChange |= Change;
189 }
190 }
191169
192170 // If we eliminated any tail recursions, it's possible that we inserted some
193171 // silly PHI nodes which just merge an initial value (the incoming operand)
346324 return getCommonReturnValue(cast(I->use_back()), CI);
347325 }
348326
349 static Instruction *FirstNonDbg(BasicBlock::iterator I) {
350 while (isa(I))
351 ++I;
352 return &*I;
353 }
354
355 CallInst*
356 TailCallElim::FindTRECandidate(Instruction *TI,
357 bool CannotTailCallElimCallsMarkedTail) {
358 BasicBlock *BB = TI->getParent();
327 bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
328 bool &TailCallsAreMarkedTail,
329 SmallVector &ArgumentPHIs,
330 bool CannotTailCallElimCallsMarkedTail) {
331 BasicBlock *BB = Ret->getParent();
359332 Function *F = BB->getParent();
360333
361 if (&BB->front() == TI) // Make sure there is something before the terminator.
362 return 0;
334 if (&BB->front() == Ret) // Make sure there is something before the ret...
335 return false;
363336
364337 // Scan backwards from the return, checking to see if there is a tail call in
365338 // this block. If so, set CI to it.
366 CallInst *CI = 0;
367 BasicBlock::iterator BBI = TI;
368 while (true) {
339 CallInst *CI;
340 BasicBlock::iterator BBI = Ret;
341 while (1) {
369342 CI = dyn_cast(BBI);
370343 if (CI && CI->getCalledFunction() == F)
371344 break;
372345
373346 if (BBI == BB->begin())
374 return 0; // Didn't find a potential tail call.
347 return false; // Didn't find a potential tail call.
375348 --BBI;
376349 }
377350
378351 // If this call is marked as a tail call, and if there are dynamic allocas in
379352 // the function, we cannot perform this optimization.
380353 if (CI->isTailCall() && CannotTailCallElimCallsMarkedTail)
381 return 0;
354 return false;
382355
383356 // As a special case, detect code like this:
384357 // double fabs(double f) { return __builtin_fabs(f); } // a 'fabs' call
385358 // and disable this xform in this case, because the code generator will
386359 // lower the call to fabs into inline code.
387360 if (BB == &F->getEntryBlock() &&
388 FirstNonDbg(BB->front()) == CI &&
389 FirstNonDbg(llvm::next(BB->begin())) == TI &&
361 &BB->front() == CI && &*++BB->begin() == Ret &&
390362 callIsSmall(F)) {
391363 // A single-block function with just a call and a return. Check that
392364 // the arguments match.
397369 for (; I != E && FI != FE; ++I, ++FI)
398370 if (*I != &*FI) break;
399371 if (I == E && FI == FE)
400 return 0;
401 }
402
403 return CI;
404 }
405
406 bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
407 BasicBlock *&OldEntry,
408 bool &TailCallsAreMarkedTail,
409 SmallVector &ArgumentPHIs,
410 bool CannotTailCallElimCallsMarkedTail) {
372 return false;
373 }
374
411375 // If we are introducing accumulator recursion to eliminate operations after
412376 // the call instruction that are both associative and commutative, the initial
413377 // value for the accumulator is placed in this variable. If this value is set
425389 // tail call if all of the instructions between the call and the return are
426390 // movable to above the call itself, leaving the call next to the return.
427391 // Check that this is the case now.
428 BasicBlock::iterator BBI = CI;
429 for (++BBI; &*BBI != Ret; ++BBI) {
392 for (BBI = CI, ++BBI; &*BBI != Ret; ++BBI) {
430393 if (CanMoveAboveCall(BBI, CI)) continue;
431394
432395 // If we can't move the instruction above the call, it might be because it
463426 return false;
464427 }
465428
466 BasicBlock *BB = Ret->getParent();
467 Function *F = BB->getParent();
468
469429 // OK! We can transform this tail call. If this is the first one found,
470430 // create the new entry block, allowing us to branch back to the old entry.
471431 if (OldEntry == 0) {
575535 ++NumEliminated;
576536 return true;
577537 }
578
579 bool TailCallElim::FoldReturnAndProcessPred(BasicBlock *BB,
580 ReturnInst *Ret, BasicBlock *&OldEntry,
581 bool &TailCallsAreMarkedTail,
582 SmallVector &ArgumentPHIs,
583 bool CannotTailCallElimCallsMarkedTail) {
584 bool Change = false;
585
586 // If the return block contains nothing but the return and PHI's,
587 // there might be an opportunity to duplicate the return in its
588 // predecessors and perform TRC there. Look for predecessors that end
589 // in unconditional branch and recursive call(s).
590 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB);
591 PI != E; ++PI) {
592 BasicBlock *Pred = *PI;
593 TerminatorInst *PTI = Pred->getTerminator();
594 if (BranchInst *BI = dyn_cast(PTI)) {
595 CallInst *CI = 0;
596 if (BI->isUnconditional() &&
597 (CI = FindTRECandidate(BI, CannotTailCallElimCallsMarkedTail))) {
598 DEBUG(dbgs() << "FOLDING: " << *BB
599 << "INTO UNCOND BRANCH PRED: " << *Pred);
600 EliminateRecursiveTailCall(CI,
601 FoldReturnIntoUncondBranch(Ret, BB, Pred),
602 OldEntry, TailCallsAreMarkedTail, ArgumentPHIs,
603 CannotTailCallElimCallsMarkedTail);
604 Change = true;
605 }
606 }
607 }
608
609 return Change;
610 }
611
612 bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
613 bool &TailCallsAreMarkedTail,
614 SmallVector &ArgumentPHIs,
615 bool CannotTailCallElimCallsMarkedTail) {
616 CallInst *CI = FindTRECandidate(Ret, CannotTailCallElimCallsMarkedTail);
617 if (!CI)
618 return false;
619
620 return EliminateRecursiveTailCall(CI, Ret, OldEntry, TailCallsAreMarkedTail,
621 ArgumentPHIs,
622 CannotTailCallElimCallsMarkedTail);
623 }
508508 // Go up one level.
509509 InStack.erase(VisitStack.pop_back_val().first);
510510 }
511 } while (!VisitStack.empty());
512 }
513
514 /// FoldReturnIntoUncondBranch - This method duplicates the specified return
515 /// instruction into a predecessor which ends in an unconditional branch. If
516 /// the return instruction returns a value defined by a PHI, propagate the
517 /// right value into the return. It returns the new return instruction in the
518 /// predecessor.
519 ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
520 BasicBlock *Pred) {
521 Instruction *UncondBranch = Pred->getTerminator();
522 // Clone the return and add it to the end of the predecessor.
523 Instruction *NewRet = RI->clone();
524 Pred->getInstList().push_back(NewRet);
525
526 // If the return instruction returns a value, and if the value was a
527 // PHI node in "BB", propagate the right value into the return.
528 for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end();
529 i != e; ++i)
530 if (PHINode *PN = dyn_cast(*i))
531 if (PN->getParent() == BB)
532 *i = PN->getIncomingValueForBlock(Pred);
533
534 // Update any PHI nodes in the returning block to realize that we no
535 // longer branch to them.
536 BB->removePredecessor(Pred);
537 UncondBranch->eraseFromParent();
538 return cast(NewRet);
539 }
511 } while (!VisitStack.empty());
512
513
514 }
2727 #include "llvm/ADT/Statistic.h"
2828 #include "llvm/ADT/STLExtras.h"
2929 #include "llvm/Support/CFG.h"
30 #include "llvm/Support/CommandLine.h"
3130 #include "llvm/Support/ConstantRange.h"
3231 #include "llvm/Support/Debug.h"
3332 #include "llvm/Support/raw_ostream.h"
3534 #include
3635 #include
3736 using namespace llvm;
38
39 static cl::opt
40 DupRet("simplifycfg-dup-ret", cl::Hidden, cl::init(false),
41 cl::desc("Duplicate return instructions into unconditional branches"));
4237
4338 STATISTIC(NumSpeculations, "Number of speculative executed instructions");
4439
20312026 }
20322027
20332028 // If we found some, do the transformation!
2034 if (!UncondBranchPreds.empty() && DupRet) {
2029 if (!UncondBranchPreds.empty()) {
20352030 while (!UncondBranchPreds.empty()) {
20362031 BasicBlock *Pred = UncondBranchPreds.pop_back_val();
20372032 DEBUG(dbgs() << "FOLDING: " << *BB
20382033 << "INTO UNCOND BRANCH PRED: " << *Pred);
2039 (void)FoldReturnIntoUncondBranch(RI, BB, Pred);
2034 Instruction *UncondBranch = Pred->getTerminator();
2035 // Clone the return and add it to the end of the predecessor.
2036 Instruction *NewRet = RI->clone();
2037 Pred->getInstList().push_back(NewRet);
2038
2039 // If the return instruction returns a value, and if the value was a
2040 // PHI node in "BB", propagate the right value into the return.
2041 for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end();
2042 i != e; ++i)
2043 if (PHINode *PN = dyn_cast(*i))
2044 if (PN->getParent() == BB)
2045 *i = PN->getIncomingValueForBlock(Pred);
2046
2047 // Update any PHI nodes in the returning block to realize that we no
2048 // longer branch to them.
2049 BB->removePredecessor(Pred);
2050 UncondBranch->eraseFromParent();
20402051 }
20412052
20422053 // If we eliminated all predecessors of the block, delete the block now.
6969
7070 ; Same as slightly_more_involved, but block_a is now a CFG diamond with
7171 ; fallthrough edges which should be preserved.
72 ; "callq block_a_merge_func" is tail duped.
7372
7473 ; CHECK: yet_more_involved:
7574 ; CHECK: jmp .LBB2_1
7877 ; CHECK-NEXT: callq bar99
7978 ; CHECK-NEXT: callq get
8079 ; CHECK-NEXT: cmpl $2999, %eax
81 ; CHECK-NEXT: jle .LBB2_5
80 ; CHECK-NEXT: jg .LBB2_6
81 ; CHECK-NEXT: callq block_a_true_func
82 ; CHECK-NEXT: jmp .LBB2_7
83 ; CHECK-NEXT: .LBB2_6:
8284 ; CHECK-NEXT: callq block_a_false_func
83 ; CHECK-NEXT: callq block_a_merge_func
84 ; CHECK-NEXT: jmp .LBB2_1
85 ; CHECK-NEXT: .LBB2_5:
86 ; CHECK-NEXT: callq block_a_true_func
85 ; CHECK-NEXT: .LBB2_7:
8786 ; CHECK-NEXT: callq block_a_merge_func
8887 ; CHECK-NEXT: .LBB2_1:
8988 ; CHECK-NEXT: callq body
None ; RUN: opt < %s -jump-threading -mem2reg -instcombine -simplifycfg -S | FileCheck %s
0 ; RUN: opt < %s -jump-threading -mem2reg -instcombine -simplifycfg -S | grep {ret i32 %v1}
1 ; There should be no uncond branches left.
2 ; RUN: opt < %s -jump-threading -mem2reg -instcombine -simplifycfg -S | not grep {br label}
13
24 declare i32 @f1()
35 declare i32 @f2()
46 declare void @f3()
57
68 define i32 @test(i1 %cond, i1 %cond2, i1 %cond3) {
7 ; CHECK: test
89 br i1 %cond, label %T1, label %F1
910
10 ; CHECK-NOT: T1:
1111 T1:
1212 %v1 = call i32 @f1()
1313 br label %Merge
1717 br label %Merge
1818
1919 Merge:
20 ; CHECK: Merge:
21 ; CHECK: %v1 = call i32 @f1()
22 ; CHECK-NEXT: %D = and i1 %cond2, %cond3
23 ; CHECK-NEXT: br i1 %D
2420 %A = phi i1 [true, %T1], [false, %F1]
2521 %B = phi i32 [%v1, %T1], [%v2, %F1]
2622 %C = and i1 %A, %cond2
None ; RUN: opt < %s -jump-threading -mem2reg -instcombine -simplifycfg -S | FileCheck %s
0 ; RUN: opt < %s -jump-threading -mem2reg -instcombine -simplifycfg -S | grep {ret i32 %v1}
1 ; There should be no uncond branches left.
2 ; RUN: opt < %s -jump-threading -mem2reg -instcombine -simplifycfg -S | not grep {br label}
13
24 declare i32 @f1()
35 declare i32 @f2()
46 declare void @f3()
57
68 define i32 @test(i1 %cond, i1 %cond2) {
7 ; CHECK: test
89 br i1 %cond, label %T1, label %F1
910
10 ; CHECK-NOT: T1
1111 T1:
1212 %v1 = call i32 @f1()
1313 br label %Merge
1717 br label %Merge
1818
1919 Merge:
20 ; CHECK: Merge:
21 ; CHECK: %v1 = call i32 @f1()
22 ; CHECK-NEXT: br i1 %cond2
2320 %A = phi i1 [true, %T1], [false, %F1]
2421 %B = phi i32 [%v1, %T1], [%v2, %F1]
2522 %C = and i1 %A, %cond2
None ; RUN: opt < %s -jump-threading -S | FileCheck %s
0 ; RUN: opt < %s -jump-threading -simplifycfg -S | grep {ret i32 1}
11 ; rdar://6402033
22
33 ; Test that we can thread through the block with the partially redundant load (%2).
55 target triple = "i386-apple-darwin7"
66
77 define i32 @foo(i32* %P) nounwind {
8 ; CHECK: foo
98 entry:
109 %0 = tail call i32 (...)* @f1() nounwind ; [#uses=1]
1110 %1 = icmp eq i32 %0, 0 ; [#uses=1]
1211 br i1 %1, label %bb1, label %bb
1312
1413 bb: ; preds = %entry
15 ; CHECK: bb1.thread:
16 ; CHECK: store
17 ; CHECK: br label %bb3
1814 store i32 42, i32* %P, align 4
1915 br label %bb1
2016
2925 ret i32 %res.0
3026
3127 bb3: ; preds = %bb1
32 ; CHECK: bb3:
33 ; CHECK: %res.01 = phi i32 [ 1, %bb1.thread ], [ 0, %bb1 ]
34 ; CHECK: ret i32 %res.01
3528 ret i32 %res.0
3629 }
3730
77 ; CHECK: i64 2, label
88 ; CHECK: i64 3, label
99 ; CHECK: i64 4, label
10 ; CHECK-NOT: br
1011 ; CHECK: }
1112
1213 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
2424 }
2525
2626
27 define void @test4() {
28 br label %return
29 return:
30 ret void
31 ; CHECK: @test4
32 ; CHECK-NEXT: ret void
33 }
34 @test4g = global i8* blockaddress(@test4, %return)
35
36
2737 ; PR5795
2838 define void @test5(i32 %A) {
2939 switch i32 %A, label %return [
146146 ; CHECK: i32 16, label %UnifiedReturnBlock
147147 ; CHECK: i32 17, label %UnifiedReturnBlock
148148 ; CHECK: i32 18, label %UnifiedReturnBlock
149 ; CHECK: i32 19, label %UnifiedReturnBlock
149 ; CHECK: i32 19, label %switch.edge
150150 ; CHECK: ]
151151 }
152152
440440 ; CHECK-NOT: switch
441441 ; CHECK: ret void
442442 }
443
444 ; PR8675
445 ; rdar://5134905
446 define zeroext i1 @test16(i32 %x) nounwind {
447 entry:
448 ; CHECK: @test16
449 ; CHECK: switch i32 %x, label %lor.rhs [
450 ; CHECK: i32 1, label %lor.end
451 ; CHECK: i32 2, label %lor.end
452 ; CHECK: i32 3, label %lor.end
453 ; CHECK: ]
454 %cmp.i = icmp eq i32 %x, 1
455 br i1 %cmp.i, label %lor.end, label %lor.lhs.false
456
457 lor.lhs.false:
458 %cmp.i2 = icmp eq i32 %x, 2
459 br i1 %cmp.i2, label %lor.end, label %lor.rhs
460
461 lor.rhs:
462 %cmp.i1 = icmp eq i32 %x, 3
463 br label %lor.end
464
465 lor.end:
466 %0 = phi i1 [ true, %lor.lhs.false ], [ true, %entry ], [ %cmp.i1, %lor.rhs ]
467 ret i1 %0
468 }
None ; RUN: opt < %s -simplifycfg -S | FileCheck %s
0 ; RUN: opt < %s -simplifycfg -S | not grep br
1
12
23 %llvm.dbg.anchor.type = type { i32, i32 }
34 %llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8* }
1112
1213 declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind
1314
14 define i1 @t({ i32, i32 }* %I) {
15 ; CHECK: t
16 ; CHECK: switch i32 %tmp.2.i, label %shortcirc_next.4 [
17 ; CHECK: i32 14, label %UnifiedReturnBlock
18 ; CHECK: i32 15, label %UnifiedReturnBlock
19 ; CHECK: i32 16, label %UnifiedReturnBlock
20 ; CHECK: i32 17, label %UnifiedReturnBlock
21 ; CHECK: i32 18, label %UnifiedReturnBlock
22 ; CHECK: i32 19, label %UnifiedReturnBlock
23 ; CHECK: ]
15 define i1 @_ZN4llvm11SetCondInst7classofEPKNS_11InstructionE({ i32, i32 }* %I) {
2416 entry:
2517 %tmp.1.i = getelementptr { i32, i32 }* %I, i64 0, i32 1 ; [#uses=1]
2618 %tmp.2.i = load i32* %tmp.1.i ; [#uses=6]