llvm.org GIT mirror llvm / f22f9b3
Do not emit callseq instructions around sibcalls. This eliminated some unnecessary stack adjustments. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@95475 91177308-0d34-0410-b5e6-96231b3b80d8 Evan Cheng 10 years ago
4 changed file(s) with 41 addition(s) and 24 deletion(s). Raw diff Collapse all Expand all
17481748 SDValue &OutRetAddr, SDValue Chain,
17491749 bool IsTailCall, bool Is64Bit,
17501750 int FPDiff, DebugLoc dl) {
1751 if (!IsTailCall || FPDiff==0) return Chain;
1752
17531751 // Adjust the Return address stack slot.
17541752 EVT VT = getPointerTy();
17551753 OutRetAddr = getReturnAddressFrameIndex(DAG);
17951793 // Check if it's really possible to do a tail call.
17961794 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
17971795 Outs, Ins, DAG);
1796
1797 // Sibcalls are automatically detected tailcalls which do not require
1798 // ABI changes.
17981799 if (!PerformTailCallOpt && isTailCall)
17991800 IsSibcall = true;
1801
1802 if (isTailCall)
1803 ++NumTailCalls;
18001804 }
18011805
18021806 assert(!(isVarArg && CallConv == CallingConv::Fast) &&
18101814
18111815 // Get a count of how many bytes are to be pushed on the stack.
18121816 unsigned NumBytes = CCInfo.getNextStackOffset();
1813 if (FuncIsMadeTailCallSafe(CallConv))
1814 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
1815 else if (IsSibcall)
1817 if (IsSibcall)
18161818 // This is a sibcall. The memory operands are available in caller's
18171819 // own caller's stack.
18181820 NumBytes = 0;
1821 else if (PerformTailCallOpt && CallConv == CallingConv::Fast)
1822 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
18191823
18201824 int FPDiff = 0;
1821 if (isTailCall) {
1822 ++NumTailCalls;
1823
1825 if (isTailCall && !IsSibcall) {
18241826 // Lower arguments at fp - stackoffset + fpdiff.
18251827 unsigned NumBytesCallerPushed =
18261828 MF.getInfo()->getBytesToPopOnReturn();
18321834 MF.getInfo()->setTCReturnAddrDelta(FPDiff);
18331835 }
18341836
1835 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
1837 if (!IsSibcall)
1838 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
18361839
18371840 SDValue RetAddrFrIdx;
18381841 // Load return adress for tail calls.
1839 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall, Is64Bit,
1840 FPDiff, dl);
1842 if (isTailCall && FPDiff)
1843 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
1844 Is64Bit, FPDiff, dl);
18411845
18421846 SmallVector, 8> RegsToPass;
18431847 SmallVector MemOpChains;
18871891
18881892 if (VA.isRegLoc()) {
18891893 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1890 } else if ((!isTailCall || isByVal) && !IsSibcall) {
1894 } else if (!IsSibcall && (!isTailCall || isByVal)) {
18911895 assert(VA.isMemLoc());
18921896 if (StackPtr.getNode() == 0)
18931897 StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr, getPointerTy());
19111915 RegsToPass[i].second, InFlag);
19121916 InFlag = Chain.getValue(1);
19131917 }
1914
19151918
19161919 if (Subtarget->isPICStyleGOT()) {
19171920 // ELF / PIC requires GOT in the EBX register before function calls via PLT
21092112 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
21102113 SmallVector Ops;
21112114
2112 if (isTailCall) {
2115 if (!IsSibcall && isTailCall) {
21132116 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
21142117 DAG.getIntPtrConstant(0, true), InFlag);
21152118 InFlag = Chain.getValue(1);
21782181 NumBytesForCalleeToPush = 0; // Callee pops nothing.
21792182
21802183 // Returns a flag for retval copy to use.
2181 Chain = DAG.getCALLSEQ_END(Chain,
2182 DAG.getIntPtrConstant(NumBytes, true),
2183 DAG.getIntPtrConstant(NumBytesForCalleeToPush,
2184 true),
2185 InFlag);
2186 InFlag = Chain.getValue(1);
2184 if (!IsSibcall) {
2185 Chain = DAG.getCALLSEQ_END(Chain,
2186 DAG.getIntPtrConstant(NumBytes, true),
2187 DAG.getIntPtrConstant(NumBytesForCalleeToPush,
2188 true),
2189 InFlag);
2190 InFlag = Chain.getValue(1);
2191 }
21872192
21882193 // Handle result values, copying them out of physregs into vregs that we
21892194 // return.
4747 /// ReturnAddrIndex - FrameIndex for return slot.
4848 int ReturnAddrIndex;
4949
50 /// TailCallReturnAddrDelta - Delta the ReturnAddr stack slot is moved
51 /// Used for creating an area before the register spill area on the stack
52 /// the returnaddr can be savely move to this area
50 /// TailCallReturnAddrDelta - The number of bytes by which return address
51 /// stack slot is moved as the result of tail call optimization.
5352 int TailCallReturnAddrDelta;
5453
5554 /// SRetReturnReg - Some subtargets require that sret lowering includes
None ; RUN: llc < %s -mtriple=i386-apple-darwin -disable-fp-elim | grep subl | count 1
0 ; RUN: llc < %s -mtriple=i386-apple-darwin -disable-fp-elim | FileCheck %s
11
22 %struct.decode_t = type { i8, i8, i8, i8, i16, i8, i8, %struct.range_t** }
33 %struct.range_t = type { float, float, i32, i32, i32, [0 x i8] }
44
55 define i32 @decode_byte(%struct.decode_t* %decode) nounwind {
6 ; CHECK: decode_byte:
7 ; CHECK: pushl
8 ; CHECK: popl
9 ; CHECK: popl
10 ; CHECK: jmp
611 entry:
712 %tmp2 = getelementptr %struct.decode_t* %decode, i32 0, i32 4 ; [#uses=1]
813 %tmp23 = bitcast i16* %tmp2 to i32* ; [#uses=1]
145145 ; eliminated currently.
146146
147147 ; 32: t11:
148 ; 32-NOT: subl ${{[0-9]+}}, %esp
149 ; 32-NOT: addl ${{[0-9]+}}, %esp
148150 ; 32: jmp {{_?}}foo5
149151
150152 ; 64: t11:
153 ; 64-NOT: subq ${{[0-9]+}}, %esp
154 ; 64-NOT: addq ${{[0-9]+}}, %esp
151155 ; 64: jmp {{_?}}foo5
152156 entry:
153157 %0 = icmp eq i32 %x, 0
167171
168172 define i32 @t12(i32 %x, i32 %y, %struct.t* byval align 4 %z) nounwind ssp {
169173 ; 32: t12:
174 ; 32-NOT: subl ${{[0-9]+}}, %esp
175 ; 32-NOT: addl ${{[0-9]+}}, %esp
170176 ; 32: jmp {{_?}}foo6
171177
172178 ; 64: t12:
179 ; 64-NOT: subq ${{[0-9]+}}, %esp
180 ; 64-NOT: addq ${{[0-9]+}}, %esp
173181 ; 64: jmp {{_?}}foo6
174182 entry:
175183 %0 = icmp eq i32 %x, 0