llvm.org GIT mirror llvm / 3d2125c
Enable sibling call optimization of libcalls which are expanded during legalization time. Since at legalization time there is no mapping from SDNode back to the corresponding LLVM instruction and the return SDNode is target specific, this requires a target hook to check for eligibility. Only x86 and ARM support this form of sibcall optimization right now. rdar://8707777 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@120501 91177308-0d34-0410-b5e6-96231b3b80d8 Evan Cheng 8 years ago
13 changed file(s) with 229 addition(s) and 98 deletion(s). Raw diff Collapse all Expand all
2222
2323 namespace llvm {
2424
25 class GlobalVariable;
2526 class TargetLowering;
26 class GlobalVariable;
27 class SDNode;
28 class SelectionDAG;
2729
2830 /// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence
2931 /// of insertvalue or extractvalue indices that identify a member, return
7476 bool isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
7577 const TargetLowering &TLI);
7678
79 bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
80 const TargetLowering &TLI);
81
7782 } // End llvm namespace
7883
7984 #endif
12571257 return SDValue(); // this is here to silence compiler errors
12581258 }
12591259
1260 /// isUsedByReturnOnly - Return true if result of the specified node is used
1261 /// by a return node only. This is used to determine whether it is possible
1262 /// to codegen a libcall as tail call at legalization time.
1263 virtual bool isUsedByReturnOnly(SDNode *N) const {
1264 return false;
1265 }
1266
12601267 /// LowerOperationWrapper - This callback is invoked by the type legalizer
12611268 /// to legalize nodes with an illegal operand type but legal result types.
12621269 /// It replaces the LowerOperation callback in the type Legalizer.
1818 #include "llvm/LLVMContext.h"
1919 #include "llvm/Module.h"
2020 #include "llvm/CodeGen/MachineFunction.h"
21 #include "llvm/CodeGen/SelectionDAG.h"
2122 #include "llvm/Target/TargetData.h"
2223 #include "llvm/Target/TargetLowering.h"
2324 #include "llvm/Target/TargetOptions.h"
282283 return true;
283284 }
284285
286 bool llvm::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
287 const TargetLowering &TLI) {
288 const Function *F = DAG.getMachineFunction().getFunction();
289
290 // Conservatively require the attributes of the call to match those of
291 // the return. Ignore noalias because it doesn't affect the call sequence.
292 unsigned CallerRetAttr = F->getAttributes().getRetAttributes();
293 if (CallerRetAttr & ~Attribute::NoAlias)
294 return false;
295
296 // It's not safe to eliminate the sign / zero extension of the return value.
297 if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt))
298 return false;
299
300 // Check if the only use is a function return node.
301 return TLI.isUsedByReturnOnly(Node);
302 }
1010 //
1111 //===----------------------------------------------------------------------===//
1212
13 #include "llvm/CodeGen/SelectionDAG.h"
13 #include "llvm/Analysis/DebugInfo.h"
14 #include "llvm/CodeGen/Analysis.h"
1415 #include "llvm/CodeGen/MachineFunction.h"
1516 #include "llvm/CodeGen/MachineFrameInfo.h"
1617 #include "llvm/CodeGen/MachineJumpTableInfo.h"
1718 #include "llvm/CodeGen/MachineModuleInfo.h"
18 #include "llvm/Analysis/DebugInfo.h"
1919 #include "llvm/CodeGen/PseudoSourceValue.h"
20 #include "llvm/CodeGen/SelectionDAG.h"
2021 #include "llvm/Target/TargetFrameInfo.h"
2122 #include "llvm/Target/TargetLowering.h"
2223 #include "llvm/Target/TargetData.h"
19471948
19481949 // Splice the libcall in wherever FindInputOutputChains tells us to.
19491950 const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
1951
1952 // isTailCall may be true since the callee does not reference caller stack
1953 // frame. Check if it's in the right position.
1954 bool isTailCall = isInTailCallPosition(DAG, Node, TLI);
19501955 std::pair CallInfo =
19511956 TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
1952 0, TLI.getLibcallCallingConv(LC), false,
1957 0, TLI.getLibcallCallingConv(LC), isTailCall,
19531958 /*isReturnValueUsed=*/true,
19541959 Callee, Args, DAG, Node->getDebugLoc());
1960
1961 if (!CallInfo.second.getNode())
1962 // It's a tailcall, return the chain (which is the DAG root).
1963 return DAG.getRoot();
19551964
19561965 // Legalize the call sequence, starting with the chain. This will advance
19571966 // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
19871996 const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
19881997 std::pair CallInfo =
19891998 TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
1990 0, TLI.getLibcallCallingConv(LC), false,
1999 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
19912000 /*isReturnValueUsed=*/true,
19922001 Callee, Args, DAG, Node->getDebugLoc());
19932002
25572566 TargetLowering::ArgListTy Args;
25582567 std::pair CallResult =
25592568 TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()),
2560 false, false, false, false, 0, CallingConv::C, false,
2569 false, false, false, false, 0, CallingConv::C,
2570 /*isTailCall=*/false,
25612571 /*isReturnValueUsed=*/true,
25622572 DAG.getExternalSymbol("__sync_synchronize",
25632573 TLI.getPointerTy()),
26082618 TargetLowering::ArgListTy Args;
26092619 std::pair CallResult =
26102620 TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()),
2611 false, false, false, false, 0, CallingConv::C, false,
2621 false, false, false, false, 0, CallingConv::C,
2622 /*isTailCall=*/false,
26122623 /*isReturnValueUsed=*/true,
26132624 DAG.getExternalSymbol("abort", TLI.getPointerTy()),
26142625 Args, DAG, dl);
373373 MachineOperand &JumpTarget = MBBI->getOperand(0);
374374
375375 // Jump to label or value in register.
376 if (RetOpcode == ARM::TCRETURNdi) {
377 BuildMI(MBB, MBBI, dl,
378 TII.get(STI.isThumb() ? ARM::TAILJMPdt : ARM::TAILJMPd)).
379 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
380 JumpTarget.getTargetFlags());
381 } else if (RetOpcode == ARM::TCRETURNdiND) {
382 BuildMI(MBB, MBBI, dl,
383 TII.get(STI.isThumb() ? ARM::TAILJMPdNDt : ARM::TAILJMPdND)).
384 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
385 JumpTarget.getTargetFlags());
376 if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND) {
377 unsigned TCOpcode = (RetOpcode == ARM::TCRETURNdi)
378 ? (STI.isThumb() ? ARM::TAILJMPdt : ARM::TAILJMPd)
379 : (STI.isThumb() ? ARM::TAILJMPdNDt : ARM::TAILJMPdND);
380 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));
381 if (JumpTarget.isGlobal())
382 MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
383 JumpTarget.getTargetFlags());
384 else {
385 assert(JumpTarget.isSymbol());
386 MIB.addExternalSymbol(JumpTarget.getSymbolName(),
387 JumpTarget.getTargetFlags());
388 }
386389 } else if (RetOpcode == ARM::TCRETURNri) {
387390 BuildMI(MBB, MBBI, dl, TII.get(ARM::TAILJMPr)).
388391 addReg(JumpTarget.getReg(), RegState::Kill);
15181518 // whether LR is going to be used. Probably the right approach is to
15191519 // generate the tail call here and turn it back into CALL/RET in
15201520 // emitEpilogue if LR is used.
1521 if (Subtarget->isThumb1Only())
1522 return false;
1523
1524 // For the moment, we can only do this to functions defined in this
1525 // compilation, or to indirect calls. A Thumb B to an ARM function,
1526 // or vice versa, is not easily fixed up in the linker unlike BL.
1527 // (We could do this by loading the address of the callee into a register;
1528 // that is an extra instruction over the direct call and burns a register
1529 // as well, so is not likely to be a win.)
1530
1531 // It might be safe to remove this restriction on non-Darwin.
15321521
15331522 // Thumb1 PIC calls to external symbols use BX, so they can be tail calls,
15341523 // but we need to make sure there are enough registers; the only valid
15351524 // registers are the 4 used for parameters. We don't currently do this
15361525 // case.
1537 if (isa(Callee))
1538 return false;
1539
1540 if (GlobalAddressSDNode *G = dyn_cast(Callee)) {
1541 const GlobalValue *GV = G->getGlobal();
1542 if (GV->isDeclaration() || GV->isWeakForLinker())
1543 return false;
1544 }
1526 if (Subtarget->isThumb1Only())
1527 return false;
15451528
15461529 // If the calling conventions do not match, then we'd better make sure the
15471530 // results are returned in the same way as what the caller expects.
17171700 result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain);
17181701
17191702 return result;
1703 }
1704
1705 bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N) const {
1706 if (N->getNumValues() != 1)
1707 return false;
1708 if (!N->hasNUsesOfValue(1, 0))
1709 return false;
1710
1711 unsigned NumCopies = 0;
1712 SDNode* Copies[2];
1713 SDNode *Use = *N->use_begin();
1714 if (Use->getOpcode() == ISD::CopyToReg) {
1715 Copies[NumCopies++] = Use;
1716 } else if (Use->getOpcode() == ARMISD::VMOVRRD) {
1717 // f64 returned in a pair of GPRs.
1718 for (SDNode::use_iterator UI = Use->use_begin(), UE = Use->use_end();
1719 UI != UE; ++UI) {
1720 if (UI->getOpcode() != ISD::CopyToReg)
1721 return false;
1722 Copies[UI.getUse().getResNo()] = *UI;
1723 ++NumCopies;
1724 }
1725 } else if (Use->getOpcode() == ISD::BITCAST) {
1726 // f32 returned in a single GPR.
1727 if (!Use->hasNUsesOfValue(1, 0))
1728 return false;
1729 Use = *Use->use_begin();
1730 if (Use->getOpcode() != ISD::CopyToReg || !Use->hasNUsesOfValue(1, 0))
1731 return false;
1732 Copies[NumCopies++] = Use;
1733 } else {
1734 return false;
1735 }
1736
1737 if (NumCopies != 1 && NumCopies != 2)
1738 return false;
1739 for (unsigned i = 0; i < NumCopies; ++i) {
1740 SDNode *Copy = Copies[i];
1741 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
1742 UI != UE; ++UI) {
1743 if (UI->getOpcode() == ISD::CopyToReg) {
1744 SDNode *Use = *UI;
1745 if (Use == Copies[0] || Use == Copies[1])
1746 continue;
1747 return false;
1748 }
1749 if (UI->getOpcode() != ARMISD::RET_FLAG)
1750 return false;
1751 }
1752 }
1753
1754 return true;
17201755 }
17211756
17221757 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
420420 const SmallVectorImpl &OutVals,
421421 DebugLoc dl, SelectionDAG &DAG) const;
422422
423 virtual bool isUsedByReturnOnly(SDNode *N) const;
424
423425 SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
424426 SDValue &ARMcc, SelectionDAG &DAG, DebugLoc dl) const;
425427 SDValue getVFPCmp(SDValue LHS, SDValue RHS,
891891 .L4:
892892 .p2align 4,,9
893893 call abort
894
895 //===---------------------------------------------------------------------===//
896
897 Linux is missing some basic tail call support:
898
899 #include
900 double foo(double a) { return sin(a); }
901
902 This compiles into this on x86-64 Linux (but not darwin):
903 foo:
904 subq $8, %rsp
905 call sin
906 addq $8, %rsp
907 ret
908 vs:
909
910 foo:
911 jmp sin
912894
913895 //===---------------------------------------------------------------------===//
914896
711711
712712 // Jump to label or value in register.
713713 if (RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNdi64) {
714 BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNdi)
715 ? X86::TAILJMPd : X86::TAILJMPd64)).
716 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
717 JumpTarget.getTargetFlags());
714 MachineInstrBuilder MIB =
715 BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNdi)
716 ? X86::TAILJMPd : X86::TAILJMPd64));
717 if (JumpTarget.isGlobal())
718 MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
719 JumpTarget.getTargetFlags());
720 else {
721 assert(JumpTarget.isSymbol());
722 MIB.addExternalSymbol(JumpTarget.getSymbolName(),
723 JumpTarget.getTargetFlags());
724 }
718725 } else if (RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64) {
719726 MachineInstrBuilder MIB =
720727 BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNmi)
13371337 MVT::Other, &RetOps[0], RetOps.size());
13381338 }
13391339
1340 bool X86TargetLowering::isUsedByReturnOnly(SDNode *N) const {
1341 if (N->getNumValues() != 1)
1342 return false;
1343 if (!N->hasNUsesOfValue(1, 0))
1344 return false;
1345
1346 SDNode *Copy = *N->use_begin();
1347 if (Copy->getOpcode() != ISD::CopyToReg)
1348 return false;
1349 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
1350 UI != UE; ++UI)
1351 if (UI->getOpcode() != X86ISD::RET_FLAG)
1352 return false;
1353
1354 return true;
1355 }
1356
13401357 /// LowerCallResult - Lower the result values of a call into the
13411358 /// appropriate copies out of appropriate physical registers.
13421359 ///
21412158 } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) {
21422159 unsigned char OpFlags = 0;
21432160
2144 // On ELF targets, in either X86-64 or X86-32 mode, direct calls to external
2145 // symbols should go through the PLT.
2146 if (Subtarget->isTargetELF() &&
2147 getTargetMachine().getRelocationModel() == Reloc::PIC_) {
2148 OpFlags = X86II::MO_PLT;
2149 } else if (Subtarget->isPICStyleStubAny() &&
2150 Subtarget->getDarwinVers() < 9) {
2151 // PC-relative references to external symbols should go through $stub,
2152 // unless we're building with the leopard linker or later, which
2153 // automatically synthesizes these stubs.
2154 OpFlags = X86II::MO_DARWIN_STUB;
2161 if (!isTailCall) {
2162 // On ELF targets, in either X86-64 or X86-32 mode, direct calls to
2163 // external symbols should go through the PLT.
2164 if (Subtarget->isTargetELF() &&
2165 getTargetMachine().getRelocationModel() == Reloc::PIC_) {
2166 OpFlags = X86II::MO_PLT;
2167 } else if (Subtarget->isPICStyleStubAny() &&
2168 Subtarget->getDarwinVers() < 9) {
2169 // PC-relative references to external symbols should go through $stub,
2170 // unless we're building with the leopard linker or later, which
2171 // automatically synthesizes these stubs.
2172 OpFlags = X86II::MO_DARWIN_STUB;
2173 }
21552174 }
21562175
21572176 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(),
804804 const SmallVectorImpl &OutVals,
805805 DebugLoc dl, SelectionDAG &DAG) const;
806806
807 virtual bool isUsedByReturnOnly(SDNode *N) const;
808
807809 virtual bool
808810 CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
809811 const SmallVectorImpl &Outs,
None ; RUN: llc < %s -mtriple=arm-apple-darwin -march=arm | FileCheck %s -check-prefix=CHECKV4
1 ; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin -mattr=+v5t | FileCheck %s -check-prefix=CHECKV5
2 ; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi\
3 ; RUN: -relocation-model=pic | FileCheck %s -check-prefix=CHECKELF
4 ; XFAIL: *
0 ; RUN: llc < %s -mtriple=armv6-apple-darwin -mattr=+vfp2 -arm-tail-calls | FileCheck %s -check-prefix=CHECKV6
1 ; RUN: llc < %s -mtriple=armv6-linux-gnueabi -relocation-model=pic -mattr=+vfp2 -arm-tail-calls | FileCheck %s -check-prefix=CHECKELF
52
63 @t = weak global i32 ()* null ; [#uses=1]
74
96
107 define void @t1() {
118 ; CHECKELF: t1:
12 ; CHECKELF: PLT
9 ; CHECKELF: bl g(PLT)
1310 call void @g( i32 1, i32 2, i32 3, i32 4 )
1411 ret void
1512 }
1613
1714 define void @t2() {
18 ; CHECKV4: t2:
19 ; CHECKV4: bx r0 @ TAILCALL
20 ; CHECKV5: t2:
21 ; CHECKV5: bx r0 @ TAILCALL
15 ; CHECKV6: t2:
16 ; CHECKV6: bx r0 @ TAILCALL
2217 %tmp = load i32 ()** @t ; [#uses=1]
2318 %tmp.upgrd.2 = tail call i32 %tmp( ) ; [#uses=0]
2419 ret void
2520 }
2621
27 define i32* @t3(i32, i32, i32*, i32*, i32*) nounwind {
28 ; CHECKV4: t3:
29 ; CHECKV4: bx r{{.*}}
30 BB0:
31 %5 = inttoptr i32 %0 to i32* ; [#uses=1]
32 %t35 = volatile load i32* %5 ; [#uses=1]
33 %6 = inttoptr i32 %t35 to i32** ; [#uses=1]
34 %7 = getelementptr i32** %6, i32 86 ; [#uses=1]
35 %8 = load i32** %7 ; [#uses=1]
36 %9 = bitcast i32* %8 to i32* (i32, i32*, i32, i32*, i32*, i32*)* ; [#uses=1]
37 %10 = call i32* %9(i32 %0, i32* null, i32 %1, i32* %2, i32* %3, i32* %4) ; [#uses=1]
38 ret i32* %10
39 }
40
41 define void @t4() {
42 ; CHECKV4: t4:
43 ; CHECKV4: b _t2 @ TAILCALL
44 ; CHECKV5: t4:
45 ; CHECKV5: b _t2 @ TAILCALL
22 define void @t3() {
23 ; CHECKV6: t3:
24 ; CHECKV6: b _t2 @ TAILCALL
25 ; CHECKELF: t3:
26 ; CHECKELF: b t2(PLT) @ TAILCALL
4627 tail call void @t2( ) ; [#uses=0]
4728 ret void
4829 }
30
31 ; Sibcall optimization of expanded libcalls. rdar://8707777
32 define double @t4(double %a) nounwind readonly ssp {
33 entry:
34 ; CHECKV6: t4:
35 ; CHECKV6: b _sin @ TAILCALL
36 ; CHECKELF: t4:
37 ; CHECKELF: b sin(PLT) @ TAILCALL
38 %0 = tail call double @sin(double %a) nounwind readonly ; [#uses=1]
39 ret double %0
40 }
41
42 define float @t5(float %a) nounwind readonly ssp {
43 entry:
44 ; CHECKV6: t5:
45 ; CHECKV6: b _sinf @ TAILCALL
46 ; CHECKELF: t5:
47 ; CHECKELF: b sinf(PLT) @ TAILCALL
48 %0 = tail call float @sinf(float %a) nounwind readonly ; [#uses=1]
49 ret float %0
50 }
51
52 declare float @sinf(float) nounwind readonly
53
54 declare double @sin(double) nounwind readonly
55
56 define i32 @t6(i32 %a, i32 %b) nounwind readnone {
57 entry:
58 ; CHECKV6: t6:
59 ; CHECKV6: b ___divsi3 @ TAILCALL
60 ; CHECKELF: t6:
61 ; CHECKELF: b __aeabi_idiv(PLT) @ TAILCALL
62 %0 = sdiv i32 %a, %b
63 ret i32 %0
64 }
0 ; RUN: llc < %s -march=x86-64 | FileCheck %s
1
2 ; Sibcall optimization of expanded libcalls.
3 ; rdar://8707777
4
5 define double @foo(double %a) nounwind readonly ssp {
6 entry:
7 ; CHECK: foo:
8 ; CHECK: jmp {{_?}}sin
9 %0 = tail call double @sin(double %a) nounwind readonly
10 ret double %0
11 }
12
13 define float @bar(float %a) nounwind readonly ssp {
14 ; CHECK: bar:
15 ; CHECK: jmp {{_?}}sinf
16 entry:
17 %0 = tail call float @sinf(float %a) nounwind readonly
18 ret float %0
19 }
20
21 declare float @sinf(float) nounwind readonly
22
23 declare double @sin(double) nounwind readonly