llvm.org GIT mirror llvm / c85e171
Added tail call optimization to the x86 back end. It can be enabled by passing -tailcallopt to llc. The optimization is performed if the following conditions are satisfied: * caller/callee are fastcc * elf/pic is disabled OR elf/pic enabled + callee is in module + callee has visibility protected or hidden git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@42870 91177308-0d34-0410-b5e6-96231b3b80d8 Arnold Schwaighofer 12 years ago
16 changed file(s) with 929 addition(s) and 66 deletion(s). Raw diff Collapse all Expand all
859859 /// implement this. The default implementation of this aborts.
860860 virtual SDOperand LowerOperation(SDOperand Op, SelectionDAG &DAG);
861861
862 /// IsEligibleForTailCallOptimization - Check whether the call is eligible for
863 /// tail call optimization. Target which want to do tail call optimization
864 /// should implement this function.
865 virtual bool IsEligibleForTailCallOptimization(SDOperand Call,
866 SDOperand Ret,
867 SelectionDAG &DAG) const {
868 return false;
869 }
870
862871 /// CustomPromoteOperation - This callback is invoked for operations that are
863872 /// unsupported by the target, are registered to use 'custom' lowering, and
864873 /// whose type needs to be promoted.
7272 /// ExceptionHandling - This flag indicates that exception information should
7373 /// be emitted.
7474 extern bool ExceptionHandling;
75
76 /// PerformTailCallOpt - This flag is enabled when the -tailcallopt is
77 /// specified on the commandline. When the flag is on, the target will perform
78 /// tail call optimization (pop the caller's stack) providing it supports it.
79 extern bool PerformTailCallOpt;
7580 } // End llvm namespace
7681
7782 #endif
44434443 }
44444444 }
44454445
4446 /// CheckDAGForTailCallsAndFixThem - This Function looks for CALL nodes in the
4447 /// DAG and fixes their tailcall attribute operand
4448 static void CheckDAGForTailCallsAndFixThem(SelectionDAG &DAG,
4449 TargetLowering& TLI) {
4450 SDNode * Ret = NULL;
4451 SDOperand Terminator = DAG.getRoot();
4452
4453 // Find RET node.
4454 if (Terminator.getOpcode() == ISD::RET) {
4455 Ret = Terminator.Val;
4456 }
4457
4458 // Fix tail call attribute of CALL nodes.
4459 for (SelectionDAG::allnodes_iterator BE = DAG.allnodes_begin(),
4460 BI = prior(DAG.allnodes_end()); BI != BE; --BI) {
4461 if (BI->getOpcode() == ISD::CALL) {
4462 SDOperand OpRet(Ret, 0);
4463 SDOperand OpCall(static_cast(BI), 0);
4464 bool isMarkedTailCall =
4465 cast(OpCall.getOperand(3))->getValue() != 0;
4466 // If CALL node has tail call attribute set to true and the call is not
4467 // eligible (no RET or the target rejects) the attribute is fixed to
4468 // false. The TargetLowering::IsEligibleForTailCallOptimization function
4469 // must correctly identify tail call optimizable calls.
4470 if (isMarkedTailCall &&
4471 (Ret==NULL ||
4472 !TLI.IsEligibleForTailCallOptimization(OpCall, OpRet, DAG))) {
4473 SmallVector Ops;
4474 unsigned idx=0;
4475 for(SDNode::op_iterator I =OpCall.Val->op_begin(),
4476 E=OpCall.Val->op_end(); I!=E; I++, idx++) {
4477 if (idx!=3)
4478 Ops.push_back(*I);
4479 else
4480 Ops.push_back(DAG.getConstant(false, TLI.getPointerTy()));
4481 }
4482 DAG.UpdateNodeOperands(OpCall, Ops.begin(), Ops.size());
4483 }
4484 }
4485 }
4486 }
4487
44464488 void SelectionDAGISel::BuildSelectionDAG(SelectionDAG &DAG, BasicBlock *LLVMBB,
44474489 std::vector > &PHINodesToUpdate,
44484490 FunctionLoweringInfo &FuncInfo) {
46204662
46214663 // Make sure the root of the DAG is up-to-date.
46224664 DAG.setRoot(SDL.getRoot());
4665
4666 // Check whether calls in this block are real tail calls. Fix up CALL nodes
4667 // with correct tailcall attribute so that the target can rely on the tailcall
4668 // attribute indicating whether the call is really eligible for tail call
4669 // optimization.
4670 CheckDAGForTailCallsAndFixThem(DAG, TLI);
46234671 }
46244672
46254673 void SelectionDAGISel::CodeGenAndEmitDAG(SelectionDAG &DAG) {
3232 bool ExceptionHandling;
3333 Reloc::Model RelocationModel;
3434 CodeModel::Model CMModel;
35 bool PerformTailCallOpt;
3536 }
3637 namespace {
3738 cl::opt PrintCode("print-machineinstrs",
115116 clEnumValN(CodeModel::Large, "large",
116117 " Large code model"),
117118 clEnumValEnd));
119
120 cl::opt
121 EnablePerformTailCallOpt("tailcallopt",
122 cl::desc("Turn on tail call optimization."),
123 cl::location(PerformTailCallOpt),
124 cl::init(false));
118125 }
119126
120127 //---------------------------------------------------------------------------
13671367 L5:
13681368
13691369 //===---------------------------------------------------------------------===//
1370 Tail call optimization improvements: Tail call optimization currently
1371 pushes all arguments on the top of the stack (their normal place if
1372 that was a not tail call optimized functiong call ) before moving them
1373 to actual stack slot. this is done to prevent overwriting of paramters
1374 (see example below) that might be used, since the arguments of the
1375 callee overwrites callers arguments.
1376
1377 example:
1378
1379 int callee(int32, int64);
1380 int caller(int32 arg1, int32 arg2) {
1381 int64 local = arg2 * 2;
1382 return callee(arg2, (int64)local);
1383 }
1384
1385 [arg1] [!arg2 no longer valid since we moved local onto it]
1386 [arg2] -> [(int64)
1387 [RETADDR] local ]
1388
1389 moving arg1 onto the stack slot of callee function would overwrite
1390 arg2 of the caller.
1391
1392 Possible optimizations:
1393
1394 - only push those arguments to the top of the stack that are actual
1395 parameters of the caller function and have no local value in the
1396 caller
1397
1398 in above example local does not need to be pushed onto the top of
1399 the stack as it is definitetly not a caller's function parameter
1400
1401 - analyse the actual parameters of the callee to see which would
1402 overwrite a caller paramter which is used by the callee and only
1403 push them onto the top of the stack
1404
1405 int callee (int32 arg1, int32 arg2);
1406 int caller (int32 arg1, int32 arg2) {
1407 return callee(arg1,arg2);
1408 }
1409
1410 here we don't need to write any variables to the top of the stack
1411 since they don't overwrite each other
1412
1413 int callee (int32 arg1, int32 arg2);
1414 int caller (int32 arg1, int32 arg2) {
1415 return callee(arg2,arg1);
1416 }
1417
1418 here we need to push the arguments because they overwrite each other
1419
1420
1421 code for lowering directly onto callers arguments:
1422 + SmallVector, 8> RegsToPass;
1423 + SmallVector MemOpChains;
1424 +
1425 + SDOperand FramePtr;
1426 + SDOperand PtrOff;
1427 + SDOperand FIN;
1428 + int FI = 0;
1429 + // Walk the register/memloc assignments, inserting copies/loads.
1430 + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1431 + CCValAssign &VA = ArgLocs[i];
1432 + SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
1433 +
1434 + ....
1435 +
1436 + if (VA.isRegLoc()) {
1437 + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1438 + } else {
1439 + assert(VA.isMemLoc());
1440 + // create frame index
1441 + int32_t Offset = VA.getLocMemOffset()+FPDiff;
1442 + uint32_t OpSize = (MVT::getSizeInBits(VA.getLocVT())+7)/8;
1443 + FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset);
1444 + FIN = DAG.getFrameIndex(FI, MVT::i32);
1445 + // store relative to framepointer
1446 + MemOpChains.push_back(DAG.getStore(Chain, Arg, FIN, NULL, 0));
1447 + }
1448 + }
1449 //===---------------------------------------------------------------------===//
126126 CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>>
127127 ]>;
128128
129 // tail call convetion (fast) one register is reserved for target address
130 // namely R9
131 def CC_X86_64_TailCall : CallingConv<[
132 // Promote i8/i16 arguments to i32.
133 CCIfType<[i8, i16], CCPromoteToType>,
134
135 CCIfStruct>,
136
137 // The first 6 integer arguments are passed in integer registers.
138 CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX, R8D]>>,
139 CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX, R8]>>,
140
141 // The first 8 FP/Vector arguments are passed in XMM registers.
142 CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
143 CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>,
144
145 // The first 8 MMX vector arguments are passed in GPRs.
146 CCIfType<[v8i8, v4i16, v2i32, v1i64],
147 CCAssignToReg<[RDI, RSI, RDX, RCX, R8]>>,
148
149 // The 'nest' parameter, if any, is passed in R10.
150 CCIfNest>,
151
152 // Integer/FP values get stored in stack slots that are 8 bytes in size and
153 // 8-byte aligned if there are no more registers to hold them.
154 CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
155
156 // Vectors get 16-byte stack slots that are 16-byte aligned.
157 CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
158
159 // __m64 vectors get 8-byte stack slots that are 8-byte aligned.
160 CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>>
161 ]>;
162
129163
130164 //===----------------------------------------------------------------------===//
131165 // X86 C Calling Convention
172206 CCDelegateTo
173207 ]>;
174208
209 /// Same as C calling convention up to nonfree ECX which is used for storing
210 /// potential pointer to tail called function
211 def CC_X86_32_TailCall : CallingConv<[
212 // Promote i8/i16 arguments to i32.
213 CCIfType<[i8, i16], CCPromoteToType>,
214
215 // The 'nest' parameter, if any, is passed in ECX.
216 CCIfNest>,
217
218 // The first 3 integer arguments, if marked 'inreg' and if the call is not
219 // a vararg call, are passed in integer registers.
220 CCIfNotVarArg>>>,
221
222 // Otherwise, same as everything else.
223 CCDelegateTo
224 ]>;
175225
176226 def CC_X86_32_FastCall : CallingConv<[
177227 // Promote i8/i16 arguments to i32.
3131 #include "llvm/CodeGen/SelectionDAG.h"
3232 #include "llvm/CodeGen/SSARegMap.h"
3333 #include "llvm/Support/MathExtras.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/Debug.h"
3436 #include "llvm/Target/TargetOptions.h"
3537 #include "llvm/ADT/StringExtras.h"
3638 #include "llvm/ParameterAttributes.h"
4244 X86ScalarSSEf64 = Subtarget->hasSSE2();
4345 X86ScalarSSEf32 = Subtarget->hasSSE1();
4446 X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
47
4548
4649 RegInfo = TM.getRegisterInfo();
4750
640643 //===----------------------------------------------------------------------===//
641644
642645 #include "X86GenCallingConv.inc"
646
647 /// GetPossiblePreceedingTailCall - Get preceeding X86ISD::TAILCALL node if it
648 /// exists skip possible ISD:TokenFactor.
649 static SDOperand GetPossiblePreceedingTailCall(SDOperand Chain) {
650 if (Chain.getOpcode()==X86ISD::TAILCALL) {
651 return Chain;
652 } else if (Chain.getOpcode()==ISD::TokenFactor) {
653 if (Chain.getNumOperands() &&
654 Chain.getOperand(0).getOpcode()==X86ISD::TAILCALL)
655 return Chain.getOperand(0);
656 }
657 return Chain;
658 }
643659
644660 /// LowerRET - Lower an ISD::RET node.
645661 SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) {
650666 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
651667 CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs);
652668 CCInfo.AnalyzeReturn(Op.Val, RetCC_X86);
653
654
669
655670 // If this is the first return lowered for this function, add the regs to the
656671 // liveout set for the function.
657672 if (DAG.getMachineFunction().liveout_empty()) {
659674 if (RVLocs[i].isRegLoc())
660675 DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg());
661676 }
677 SDOperand Chain = Op.getOperand(0);
662678
663 SDOperand Chain = Op.getOperand(0);
679 // Handle tail call return.
680 Chain = GetPossiblePreceedingTailCall(Chain);
681 if (Chain.getOpcode() == X86ISD::TAILCALL) {
682 SDOperand TailCall = Chain;
683 SDOperand TargetAddress = TailCall.getOperand(1);
684 SDOperand StackAdjustment = TailCall.getOperand(2);
685 assert ( ((TargetAddress.getOpcode() == ISD::Register &&
686 (cast(TargetAddress)->getReg() == X86::ECX ||
687 cast(TargetAddress)->getReg() == X86::R9)) ||
688 TargetAddress.getOpcode() == ISD::TargetExternalSymbol ||
689 TargetAddress.getOpcode() == ISD::TargetGlobalAddress) &&
690 "Expecting an global address, external symbol, or register");
691 assert( StackAdjustment.getOpcode() == ISD::Constant &&
692 "Expecting a const value");
693
694 SmallVector Operands;
695 Operands.push_back(Chain.getOperand(0));
696 Operands.push_back(TargetAddress);
697 Operands.push_back(StackAdjustment);
698 // Copy registers used by the call. Last operand is a flag so it is not
699 // copied.
700 for(unsigned i=3; i < TailCall.getNumOperands()-1;i++) {
701 Operands.push_back(Chain.getOperand(i));
702 }
703 return DAG.getNode(X86ISD::TC_RETURN, MVT::Other, &Operands[0], Operands.size());
704 }
705
706 // Regular return.
664707 SDOperand Flag;
665
708
666709 // Copy the result values into the output registers.
667710 if (RVLocs.size() != 1 || !RVLocs[0].isRegLoc() ||
668711 RVLocs[0].getLocReg() != X86::ST0) {
683726 if ((X86ScalarSSEf32 && RVLocs[0].getValVT()==MVT::f32) ||
684727 (X86ScalarSSEf64 && RVLocs[0].getValVT()==MVT::f64)) {
685728 SDOperand MemLoc;
686
729
687730 // If this is a load into a scalarsse value, don't store the loaded value
688731 // back to the stack, only to reload it: just replace the scalar-sse load.
689732 if (ISD::isNON_EXTLoad(Value.Val) &&
783826
784827
785828 //===----------------------------------------------------------------------===//
786 // C & StdCall Calling Convention implementation
829 // C & StdCall & Fast Calling Convention implementation
787830 //===----------------------------------------------------------------------===//
788831 // StdCall calling convention seems to be standard for many Windows' API
789832 // routines and around. It differs from C calling convention just a little:
790833 // callee should clean up the stack, not caller. Symbols should be also
791834 // decorated in some fancy way :) It doesn't support any vector arguments.
835 // For info on fast calling convention see Fast Calling Convention (tail call)
836 // implementation LowerX86_32FastCCCallTo.
792837
793838 /// AddLiveIn - This helper function adds the specified physical register to the
794839 /// MachineFunction as a live in value. It also creates a corresponding virtual
801846 return VReg;
802847 }
803848
849 // align stack arguments according to platform alignment needed for tail calls
850 unsigned GetAlignedArgumentStackSize(unsigned StackSize, SelectionDAG& DAG);
851
804852 SDOperand X86TargetLowering::LowerMemArgument(SDOperand Op, SelectionDAG &DAG,
805853 const CCValAssign &VA,
806854 MachineFrameInfo *MFI,
825873 MachineFrameInfo *MFI = MF.getFrameInfo();
826874 SDOperand Root = Op.getOperand(0);
827875 bool isVarArg = cast(Op.getOperand(2))->getValue() != 0;
828
876 unsigned CC = MF.getFunction()->getCallingConv();
829877 // Assign locations to all of the incoming arguments.
830878 SmallVector ArgLocs;
831 CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg,
879 CCState CCInfo(CC, isVarArg,
832880 getTargetMachine(), ArgLocs);
833 CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_C);
834
881 // Check for possible tail call calling convention.
882 if (CC == CallingConv::Fast && PerformTailCallOpt)
883 CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_TailCall);
884 else
885 CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_C);
886
835887 SmallVector ArgValues;
836888 unsigned LastVal = ~0U;
837889 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
876928 }
877929
878930 unsigned StackSize = CCInfo.getNextStackOffset();
931 // align stack specially for tail calls
932 if (CC==CallingConv::Fast)
933 StackSize = GetAlignedArgumentStackSize(StackSize,DAG);
879934
880935 ArgValues.push_back(Root);
881936
884939 if (isVarArg)
885940 VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize);
886941
887 if (isStdCall && !isVarArg) {
942 // Tail call calling convention (CallingConv::Fast) does not support varargs.
943 assert( !(isVarArg && CC == CallingConv::Fast) &&
944 "CallingConv::Fast does not support varargs.");
945
946 if (isStdCall && !isVarArg &&
947 (CC==CallingConv::Fast && PerformTailCallOpt || CC!=CallingConv::Fast)) {
888948 BytesToPopOnReturn = StackSize; // Callee pops everything..
889949 BytesCallerReserves = 0;
890950 } else {
913973 unsigned CC) {
914974 SDOperand Chain = Op.getOperand(0);
915975 bool isVarArg = cast(Op.getOperand(2))->getValue() != 0;
916 bool isTailCall = cast(Op.getOperand(3))->getValue() != 0;
917976 SDOperand Callee = Op.getOperand(4);
918977 unsigned NumOps = (Op.getNumOperands() - 5) / 2;
919
978
920979 // Analyze operands of the call, assigning locations to each operand.
921980 SmallVector ArgLocs;
922981 CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
923 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_C);
982 if(CC==CallingConv::Fast && PerformTailCallOpt)
983 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_TailCall);
984 else
985 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_C);
924986
925987 // Get a count of how many bytes are to be pushed on the stack.
926988 unsigned NumBytes = CCInfo.getNextStackOffset();
989 if (CC==CallingConv::Fast)
990 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
927991
928992 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
929993
10221086
10231087 if (InFlag.Val)
10241088 Ops.push_back(InFlag);
1025
1026 Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
1027 NodeTys, &Ops[0], Ops.size());
1089
1090 Chain = DAG.getNode(X86ISD::CALL, NodeTys, &Ops[0], Ops.size());
10281091 InFlag = Chain.getValue(1);
10291092
10301093 // Create the CALLSEQ_END node.
10311094 unsigned NumBytesForCalleeToPush = 0;
10321095
1033 if (CC == CallingConv::X86_StdCall) {
1096 if (CC == CallingConv::X86_StdCall ||
1097 (CC == CallingConv::Fast && PerformTailCallOpt)) {
10341098 if (isVarArg)
10351099 NumBytesForCalleeToPush = isSRet ? 4 : 0;
10361100 else
10371101 NumBytesForCalleeToPush = NumBytes;
1102 assert(!(isVarArg && CC==CallingConv::Fast) &&
1103 "CallingConv::Fast does not support varargs.");
10381104 } else {
10391105 // If this is is a call to a struct-return function, the callee
10401106 // pops the hidden struct pointer, so we have to push it back.
11311197
11321198 if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) {
11331199 // Make sure the instruction takes 8n+4 bytes to make sure the start of the
1134 // arguments and the arguments after the retaddr has been pushed are aligned.
1200 // arguments and the arguments after the retaddr has been pushed are
1201 // aligned.
11351202 if ((StackSize & 7) == 0)
11361203 StackSize += 4;
11371204 }
11931260
11941261 if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) {
11951262 // Make sure the instruction takes 8n+4 bytes to make sure the start of the
1196 // arguments and the arguments after the retaddr has been pushed are aligned.
1263 // arguments and the arguments after the retaddr has been pushed are
1264 // aligned.
11971265 if ((NumBytes & 7) == 0)
11981266 NumBytes += 4;
11991267 }
12911359 if (InFlag.Val)
12921360 Ops.push_back(InFlag);
12931361
1294 // FIXME: Do not generate X86ISD::TAILCALL for now.
1295 Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
1362 assert(isTailCall==false && "no tail call here");
1363 Chain = DAG.getNode(X86ISD::CALL,
12961364 NodeTys, &Ops[0], Ops.size());
12971365 InFlag = Chain.getValue(1);
12981366
13111379 return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo);
13121380 }
13131381
1382 //===----------------------------------------------------------------------===//
1383 // Fast Calling Convention (tail call) implementation
1384 //===----------------------------------------------------------------------===//
1385
1386 // Like std call, callee cleans arguments, convention except that ECX is
1387 // reserved for storing the tail called function address. Only 2 registers are
1388 // free for argument passing (inreg). Tail call optimization is performed
1389 // provided:
1390 // * tailcallopt is enabled
1391 // * caller/callee are fastcc
1392 // * elf/pic is disabled OR
1393 // * elf/pic enabled + callee is in module + callee has
1394 // visibility protected or hidden
1395 // To ensure the stack is aligned according to platform abi pass
1396 // tail-call-align-stack. This makes sure that argument delta is always
1397 // multiples of stack alignment. (Dynamic linkers need this - darwin's dyld for
1398 // example)
1399 // If a tail called function callee has more arguments than the caller the
1400 // caller needs to make sure that there is room to move the RETADDR to. This is
1401 // achived by reserving an area the size of the argument delta right after the
1402 // original REtADDR, but before the saved framepointer or the spilled registers
1403 // e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
1404 // stack layout:
1405 // arg1
1406 // arg2
1407 // RETADDR
1408 // [ new RETADDR
1409 // move area ]
1410 // (possible EBP)
1411 // ESI
1412 // EDI
1413 // local1 ..
1414
1415 /// GetAlignedArgumentStackSize - Make the stack size align e.g 16n + 12 aligned
1416 /// for a 16 byte align requirement.
1417 unsigned X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
1418 SelectionDAG& DAG) {
1419 if (PerformTailCallOpt) {
1420 MachineFunction &MF = DAG.getMachineFunction();
1421 const TargetMachine &TM = MF.getTarget();
1422 const TargetFrameInfo &TFI = *TM.getFrameInfo();
1423 unsigned StackAlignment = TFI.getStackAlignment();
1424 uint64_t AlignMask = StackAlignment - 1;
1425 int64_t Offset = StackSize;
1426 unsigned SlotSize = Subtarget->is64Bit() ? 8 : 4;
1427 if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) {
1428 // Number smaller than 12 so just add the difference.
1429 Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
1430 } else {
1431 // Mask out lower bits, add stackalignment once plus the 12 bytes.
1432 Offset = ((~AlignMask) & Offset) + StackAlignment +
1433 (StackAlignment-SlotSize);
1434 }
1435 StackSize = Offset;
1436 }
1437 return StackSize;
1438 }
1439
1440 /// IsEligibleForTailCallElimination - Check to see whether the next instruction
1441 // following the call is a return. A function is eligible if caller/callee
1442 // calling conventions match, currently only fastcc supports tail calls, and the
1443 // function CALL is immediatly followed by a RET.
1444 bool X86TargetLowering::IsEligibleForTailCallOptimization(SDOperand Call,
1445 SDOperand Ret,
1446 SelectionDAG& DAG) const {
1447 bool IsEligible = false;
1448
1449 // Check whether CALL node immediatly preceeds the RET node and whether the
1450 // return uses the result of the node or is a void return.
1451 if ((Ret.getNumOperands() == 1 &&
1452 (Ret.getOperand(0)== SDOperand(Call.Val,1) ||
1453 Ret.getOperand(0)== SDOperand(Call.Val,0))) ||
1454 (Ret.getOperand(0)== SDOperand(Call.Val,Call.Val->getNumValues()-1) &&
1455 Ret.getOperand(1)== SDOperand(Call.Val,0))) {
1456 MachineFunction &MF = DAG.getMachineFunction();
1457 unsigned CallerCC = MF.getFunction()->getCallingConv();
1458 unsigned CalleeCC = cast(Call.getOperand(1))->getValue();
1459 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
1460 SDOperand Callee = Call.getOperand(4);
1461 // On elf/pic %ebx needs to be livein.
1462 if(getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
1463 Subtarget->isPICStyleGOT()) {
1464 // Can only do local tail calls with PIC.
1465 GlobalValue * GV = 0;
1466 GlobalAddressSDNode *G = dyn_cast(Callee);
1467 if(G != 0 &&
1468 (GV = G->getGlobal()) &&
1469 (GV->hasHiddenVisibility() || GV->hasProtectedVisibility()))
1470 IsEligible=true;
1471 } else {
1472 IsEligible=true;
1473 }
1474 }
1475 }
1476 return IsEligible;
1477 }
1478
1479 SDOperand X86TargetLowering::LowerX86_TailCallTo(SDOperand Op,
1480 SelectionDAG &DAG,
1481 unsigned CC) {
1482 SDOperand Chain = Op.getOperand(0);
1483 bool isVarArg = cast(Op.getOperand(2))->getValue() != 0;
1484 bool isTailCall = cast(Op.getOperand(3))->getValue() != 0;
1485 SDOperand Callee = Op.getOperand(4);
1486 bool is64Bit = Subtarget->is64Bit();
1487
1488 assert(isTailCall && PerformTailCallOpt && "Should only emit tail calls.");
1489
1490 // Analyze operands of the call, assigning locations to each operand.
1491 SmallVector ArgLocs;
1492 CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
1493 if (is64Bit)
1494 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_TailCall);
1495 else
1496 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_TailCall);
1497
1498
1499 // Lower arguments at fp - stackoffset + fpdiff.
1500 MachineFunction &MF = DAG.getMachineFunction();
1501
1502 unsigned NumBytesToBePushed =
1503 GetAlignedArgumentStackSize(CCInfo.getNextStackOffset(), DAG);
1504
1505 unsigned NumBytesCallerPushed =
1506 MF.getInfo()->getBytesToPopOnReturn();
1507 int FPDiff = NumBytesCallerPushed - NumBytesToBePushed;
1508
1509 // Set the delta of movement of the returnaddr stackslot.
1510 // But only set if delta is greater than previous delta.
1511 if (FPDiff < (MF.getInfo()->getTCReturnAddrDelta()))
1512 MF.getInfo()->setTCReturnAddrDelta(FPDiff);
1513
1514 // Adjust the ret address stack slot.
1515 if (FPDiff) {
1516 MVT::ValueType VT = is64Bit ? MVT::i64 : MVT::i32;
1517 SDOperand RetAddrFrIdx = getReturnAddressFrameIndex(DAG);
1518 RetAddrFrIdx =
1519 DAG.getLoad(VT, DAG.getEntryNode(),RetAddrFrIdx, NULL, 0);
1520 // Emit a store of the saved ret value to the new location.
1521 int SlotSize = is64Bit ? 8 : 4;
1522 int NewReturnAddrFI =
1523 MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize);
1524 SDOperand NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT);
1525 Chain = DAG.getStore(Chain,RetAddrFrIdx, NewRetAddrFrIdx, NULL, 0);
1526 }
1527
1528 Chain = DAG.
1529 getCALLSEQ_START(Chain, DAG.getConstant(NumBytesToBePushed, getPointerTy()));
1530
1531 SmallVector, 8> RegsToPass;
1532 SmallVector MemOpChains;
1533 SmallVector MemOpChains2;
1534 SDOperand FramePtr, StackPtr;
1535 SDOperand PtrOff;
1536 SDOperand FIN;
1537 int FI = 0;
1538
1539 // Walk the register/memloc assignments, inserting copies/loads. Lower
1540 // arguments first to the stack slot where they would normally - in case of a
1541 // normal function call - be.
1542 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1543 CCValAssign &VA = ArgLocs[i];
1544 SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
1545
1546 // Promote the value if needed.
1547 switch (VA.getLocInfo()) {
1548 default: assert(0 && "Unknown loc info!");
1549 case CCValAssign::Full: break;
1550 case CCValAssign::SExt:
1551 Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg);
1552 break;
1553 case CCValAssign::ZExt:
1554 Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg);
1555 break;
1556 case CCValAssign::AExt:
1557 Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg);
1558 break;
1559 }
1560
1561 if (VA.isRegLoc()) {
1562 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1563 } else {
1564 assert(VA.isMemLoc());
1565 if (StackPtr.Val == 0)
1566 StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy());
1567
1568 MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain,
1569 Arg));
1570 }
1571 }
1572
1573 if (!MemOpChains.empty())
1574 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1575 &MemOpChains[0], MemOpChains.size());
1576
1577 // Build a sequence of copy-to-reg nodes chained together with token chain
1578 // and flag operands which copy the outgoing args into registers.
1579 SDOperand InFlag;
1580 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1581 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1582 InFlag);
1583 InFlag = Chain.getValue(1);
1584 }
1585 InFlag = SDOperand();
1586 // Copy from stack slots to stack slot of a tail called function. This needs
1587 // to be done because if we would lower the arguments directly to their real
1588 // stack slot we might end up overwriting each other.
1589 // TODO: To make this more efficient (sometimes saving a store/load) we could
1590 // analyse the arguments and emit this store/load/store sequence only for
1591 // arguments which would be overwritten otherwise.
1592 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1593 CCValAssign &VA = ArgLocs[i];
1594 if (!VA.isRegLoc()) {
1595 SDOperand FlagsOp = Op.getOperand(6+2*VA.getValNo());
1596 unsigned Flags = cast(FlagsOp)->getValue();
1597
1598 // Get source stack slot.
1599 SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy());
1600 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
1601 // Create frame index.
1602 int32_t Offset = VA.getLocMemOffset()+FPDiff;
1603 uint32_t OpSize = (MVT::getSizeInBits(VA.getLocVT())+7)/8;
1604 FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset);
1605 FIN = DAG.getFrameIndex(FI, MVT::i32);
1606 if (Flags & ISD::ParamFlags::ByVal) {
1607 // Copy relative to framepointer.
1608 unsigned Align = 1 << ((Flags & ISD::ParamFlags::ByValAlign) >>
1609 ISD::ParamFlags::ByValAlignOffs);
1610
1611 unsigned Size = (Flags & ISD::ParamFlags::ByValSize) >>
1612 ISD::ParamFlags::ByValSizeOffs;
1613
1614 SDOperand AlignNode = DAG.getConstant(Align, MVT::i32);
1615 SDOperand SizeNode = DAG.getConstant(Size, MVT::i32);
1616 // Copy relative to framepointer.
1617 MemOpChains2.push_back(DAG.getNode(ISD::MEMCPY, MVT::Other, Chain, FIN,
1618 PtrOff, SizeNode, AlignNode));
1619 } else {
1620 SDOperand LoadedArg = DAG.getLoad(VA.getValVT(), Chain, PtrOff, NULL,0);
1621 // Store relative to framepointer.
1622 MemOpChains2.push_back(DAG.getStore(Chain, LoadedArg, FIN, NULL, 0));
1623 }
1624 }
1625 }
1626
1627 if (!MemOpChains2.empty())
1628 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1629 &MemOpChains2[0], MemOpChains.size());
1630
1631 // ELF / PIC requires GOT in the EBX register before function calls via PLT
1632 // GOT pointer.
1633 // Does not work with tail call since ebx is not restored correctly by
1634 // tailcaller. TODO: at least for x86 - verify for x86-64
1635
1636 // If the callee is a GlobalAddress node (quite common, every direct call is)
1637 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
1638 if (GlobalAddressSDNode *G = dyn_cast(Callee)) {
1639 // We should use extra load for direct calls to dllimported functions in
1640 // non-JIT mode.
1641 if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(),
1642 getTargetMachine(), true))
1643 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
1644 } else if (ExternalSymbolSDNode *S = dyn_cast(Callee))
1645 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
1646 else {
1647 assert(Callee.getOpcode() == ISD::LOAD &&
1648 "Function destination must be loaded into virtual register");
1649 unsigned Opc = is64Bit ? X86::R9 : X86::ECX;
1650
1651 Chain = DAG.getCopyToReg(Chain,
1652 DAG.getRegister(Opc, getPointerTy()) ,
1653 Callee,InFlag);
1654 Callee = DAG.getRegister(Opc, getPointerTy());
1655 // Add register as live out.
1656 DAG.getMachineFunction().addLiveOut(Opc);
1657 }
1658
1659 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
1660 SmallVector Ops;
1661
1662 Ops.push_back(Chain);
1663 Ops.push_back(DAG.getConstant(NumBytesToBePushed, getPointerTy()));
1664 Ops.push_back(DAG.getConstant(0, getPointerTy()));
1665 if (InFlag.Val)
1666 Ops.push_back(InFlag);
1667 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
1668 InFlag = Chain.getValue(1);
1669
1670 // Returns a chain & a flag for retval copy to use.
1671 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
1672 Ops.clear();
1673 Ops.push_back(Chain);
1674 Ops.push_back(Callee);
1675 Ops.push_back(DAG.getConstant(FPDiff, MVT::i32));
1676 // Add argument registers to the end of the list so that they are known live
1677 // into the call.
1678 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1679 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1680 RegsToPass[i].second.getValueType()));
1681 if (InFlag.Val)
1682 Ops.push_back(InFlag);
1683 assert(InFlag.Val &&
1684 "Flag must be set. Depend on flag being set in LowerRET");
1685 Chain = DAG.getNode(X86ISD::TAILCALL,
1686 Op.Val->getVTList(), &Ops[0], Ops.size());
1687
1688 return SDOperand(Chain.Val, Op.ResNo);
1689 }
13141690
13151691 //===----------------------------------------------------------------------===//
13161692 // X86-64 C Calling Convention implementation
13221698 MachineFrameInfo *MFI = MF.getFrameInfo();
13231699 SDOperand Root = Op.getOperand(0);
13241700 bool isVarArg = cast(Op.getOperand(2))->getValue() != 0;
1701 unsigned CC= MF.getFunction()->getCallingConv();
13251702
13261703 static const unsigned GPR64ArgRegs[] = {
13271704 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
13341711
13351712 // Assign locations to all of the incoming arguments.
13361713 SmallVector ArgLocs;
1337 CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg,
1714 CCState CCInfo(CC, isVarArg,
13381715 getTargetMachine(), ArgLocs);
1339 CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_64_C);
1716 if (CC == CallingConv::Fast && PerformTailCallOpt)
1717 CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_64_TailCall);
1718 else
1719 CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_64_C);
13401720
13411721 SmallVector ArgValues;
13421722 unsigned LastVal = ~0U;
13971777 }
13981778
13991779 unsigned StackSize = CCInfo.getNextStackOffset();
1780 if (CC==CallingConv::Fast)
1781 StackSize =GetAlignedArgumentStackSize(StackSize, DAG);
14001782
14011783 // If the function takes variable number of arguments, make a frame index for
14021784 // the start of the first vararg value... for expansion of llvm.va_start.
14031785 if (isVarArg) {
1786 assert(CC!=CallingConv::Fast
1787 && "Var arg not supported with calling convention fastcc");
14041788 unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, 6);
14051789 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
14061790
14451829 }
14461830
14471831 ArgValues.push_back(Root);
1448
1449 BytesToPopOnReturn = 0; // Callee pops nothing.
1450 BytesCallerReserves = StackSize;
1451
1832 // Tail call convention (fastcc) needs callee pop.
1833 if (CC == CallingConv::Fast && PerformTailCallOpt){
1834 BytesToPopOnReturn = StackSize; // Callee pops everything.
1835 BytesCallerReserves = 0;
1836 } else {
1837 BytesToPopOnReturn = 0; // Callee pops nothing.
1838 BytesCallerReserves = StackSize;
1839 }
14521840 X86MachineFunctionInfo *FuncInfo = MF.getInfo();
14531841 FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn);
14541842
14621850 unsigned CC) {
14631851 SDOperand Chain = Op.getOperand(0);
14641852 bool isVarArg = cast(Op.getOperand(2))->getValue() != 0;
1465 bool isTailCall = cast(Op.getOperand(3))->getValue() != 0;
14661853 SDOperand Callee = Op.getOperand(4);
14671854
14681855 // Analyze operands of the call, assigning locations to each operand.
14691856 SmallVector ArgLocs;
14701857 CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
1471 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_C);
1858 if (CC==CallingConv::Fast)
1859 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_TailCall);
1860 else
1861 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_C);
14721862
14731863 // Get a count of how many bytes are to be pushed on the stack.
14741864 unsigned NumBytes = CCInfo.getNextStackOffset();
1865 if (CC == CallingConv::Fast)
1866 NumBytes = GetAlignedArgumentStackSize(NumBytes,DAG);
1867
14751868 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
14761869
14771870 SmallVector, 8> RegsToPass;
15251918 }
15261919
15271920 if (isVarArg) {
1921 assert ( CallingConv::Fast != CC &&
1922 "Var args not supported with calling convention fastcc");
1923
15281924 // From AMD64 ABI document:
15291925 // For calls that may call functions that use varargs or stdargs
15301926 // (prototype-less calls or calls to functions containing ellipsis (...) in
15731969 if (InFlag.Val)
15741970 Ops.push_back(InFlag);
15751971
1576 // FIXME: Do not generate X86ISD::TAILCALL for now.
1577 Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
1972 Chain = DAG.getNode(X86ISD::CALL,
15781973 NodeTys, &Ops[0], Ops.size());
15791974 InFlag = Chain.getValue(1);
1580
1975 int NumBytesForCalleeToPush = 0;
1976 if (CC==CallingConv::Fast) {
1977 NumBytesForCalleeToPush = NumBytes; // Callee pops everything
1978
1979 } else {
1980 NumBytesForCalleeToPush = 0; // Callee pops nothing.
1981 }
15811982 // Returns a flag for retval copy to use.
15821983 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
15831984 Ops.clear();
15841985 Ops.push_back(Chain);
15851986 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
1586 Ops.push_back(DAG.getConstant(0, getPointerTy()));
1987 Ops.push_back(DAG.getConstant(NumBytesForCalleeToPush, getPointerTy()));
15871988 Ops.push_back(InFlag);
15881989 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
15891990 InFlag = Chain.getValue(1);
31053506 // SHUFPS the element to the lowest double word, then movss.
31063507 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
31073508 SmallVector IdxVec;
3108 IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorElementType(MaskVT)));
3109 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
3110 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
3111 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
3509 IdxVec.
3510 push_back(DAG.getConstant(Idx, MVT::getVectorElementType(MaskVT)));
3511 IdxVec.
3512 push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
3513 IdxVec.
3514 push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
3515 IdxVec.
3516 push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
31123517 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
31133518 &IdxVec[0], IdxVec.size());
31143519 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
31273532 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
31283533 SmallVector IdxVec;
31293534 IdxVec.push_back(DAG.getConstant(1, MVT::getVectorElementType(MaskVT)));
3130 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
3535 IdxVec.
3536 push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
31313537 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
31323538 &IdxVec[0], IdxVec.size());
31333539 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
37764182 }
37774183
37784184 SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
3779 unsigned CallingConv= cast(Op.getOperand(1))->getValue();
3780
3781 if (Subtarget->is64Bit())
3782 return LowerX86_64CCCCallTo(Op, DAG, CallingConv);
4185 unsigned CallingConv = cast(Op.getOperand(1))->getValue();
4186 bool isTailCall = cast(Op.getOperand(3))->getValue() != 0;
4187
4188 if (Subtarget->is64Bit())
4189 if(CallingConv==CallingConv::Fast && isTailCall && PerformTailCallOpt)
4190 return LowerX86_TailCallTo(Op, DAG, CallingConv);
4191 else
4192 return LowerX86_64CCCCallTo(Op, DAG, CallingConv);
37834193 else
37844194 switch (CallingConv) {
37854195 default:
37864196 assert(0 && "Unsupported calling convention");
37874197 case CallingConv::Fast:
3788 // TODO: Implement fastcc
3789 // Falls through
4198 if (isTailCall && PerformTailCallOpt)
4199 return LowerX86_TailCallTo(Op, DAG, CallingConv);
4200 else
4201 return LowerCCCCallTo(Op,DAG, CallingConv);
37904202 case CallingConv::C:
37914203 case CallingConv::X86_StdCall:
37924204 return LowerCCCCallTo(Op, DAG, CallingConv);
38544266 default:
38554267 assert(0 && "Unsupported calling convention");
38564268 case CallingConv::Fast:
3857 // TODO: implement fastcc.
3858
4269 return LowerCCCArguments(Op,DAG, true);
38594270 // Falls through
38604271 case CallingConv::C:
38614272 return LowerCCCArguments(Op, DAG);
41754586 SDOperand TheOp = Op.getOperand(0);
41764587 SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, &TheOp, 1);
41774588 if (Subtarget->is64Bit()) {
4178 SDOperand Copy1 = DAG.getCopyFromReg(rd, X86::RAX, MVT::i64, rd.getValue(1));
4589 SDOperand Copy1 =
4590 DAG.getCopyFromReg(rd, X86::RAX, MVT::i64, rd.getValue(1));
41794591 SDOperand Copy2 = DAG.getCopyFromReg(Copy1.getValue(1), X86::RDX,
41804592 MVT::i64, Copy1.getValue(2));
41814593 SDOperand Tmp = DAG.getNode(ISD::SHL, MVT::i64, Copy2,
46115023 case X86ISD::TLSADDR: return "X86ISD::TLSADDR";
46125024 case X86ISD::THREAD_POINTER: return "X86ISD::THREAD_POINTER";
46135025 case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN";
5026 case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN";
46145027 }
46155028 }
46165029
48845297 i %= NumElems;
48855298 if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) {
48865299 return (i == 0)
4887 ? V.getOperand(0) : DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT));
5300 ? V.getOperand(0) : DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT));
48885301 } else if (V.getOpcode() == ISD::VECTOR_SHUFFLE) {
48895302 SDOperand Idx = PermMask.getOperand(i);
48905303 if (Idx.getOpcode() == ISD::UNDEF)
180180 TLSADDR, THREAD_POINTER,
181181
182182 // Exception Handling helpers
183 EH_RETURN
183 EH_RETURN,
184
185 // tail call return
186 // oeprand #0 chain
187 // operand #1 callee (register or absolute)
188 // operand #2 stack adjustment
189 // operand #3 optional in flag
190 TC_RETURN
184191 };
185192 }
186193
284291 unsigned VarArgsFPOffset; // X86-64 vararg func fp reg offset.
285292 int BytesToPopOnReturn; // Number of arg bytes ret should pop.
286293 int BytesCallerReserves; // Number of arg bytes caller makes.
294
287295 public:
288296 explicit X86TargetLowering(TargetMachine &TM);
289297
363371 virtual bool isVectorClearMaskLegal(std::vector &BVOps,
364372 MVT::ValueType EVT,
365373 SelectionDAG &DAG) const;
374
375 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
376 /// for tail call optimization. Target which want to do tail call
377 /// optimization should implement this function.
378 virtual bool IsEligibleForTailCallOptimization(SDOperand Call,
379 SDOperand Ret,
380 SelectionDAG &DAG) const;
381
366382 private:
367383 /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
368384 /// make the right decision when generating code for different targets.
371387
372388 /// X86StackPtr - X86 physical register used as stack ptr.
373389 unsigned X86StackPtr;
374
390
375391 /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
376392 /// floating point ops.
377393 /// When SSE is available, use it for f32 operations.
401417 SDOperand LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG);
402418 SDOperand LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG,unsigned CC);
403419
420 // fast calling convention (tail call) implementation for 32/64bit
421 SDOperand LowerX86_TailCallTo(SDOperand Op,
422 SelectionDAG & DAG, unsigned CC);
423 unsigned GetAlignedArgumentStackSize(unsigned StackSize, SelectionDAG &DAG);
404424 // Fast and FastCall Calling Convention implementation.
405425 SDOperand LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG);
406426 SDOperand LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG, unsigned CC);
705705 if (MBB.empty()) return false;
706706
707707 switch (MBB.back().getOpcode()) {
708 case X86::TCRETURNri:
709 case X86::TCRETURNdi:
708710 case X86::RET: // Return.
709711 case X86::RETI:
710712 case X86::TAILJMPd:
5454
5555 def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
5656
57 def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>;
58
5759 def X86shld : SDNode<"X86ISD::SHLD", SDTIntShiftDOp>;
5860 def X86shrd : SDNode<"X86ISD::SHRD", SDTIntShiftDOp>;
5961
7274 [SDNPHasChain, SDNPOutFlag]>;
7375 def X86callseq_end :
7476 SDNode<"ISD::CALLSEQ_END", SDT_X86CallSeqEnd,
75 [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
77 [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
7678
7779 def X86call : SDNode<"X86ISD::CALL", SDT_X86Call,
7880 [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag]>;
98100 def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET,
99101 [SDNPHasChain]>;
100102
103 def X86tcret : SDNode<"X86ISD::TC_RETURN", SDT_X86TCRET,
104 [SDNPHasChain, SDNPOptInFlag]>;
101105
102106 //===----------------------------------------------------------------------===//
103107 // X86 Operand Definitions.
355359 }
356360
357361 // Tail call stuff.
362
363 def TAILCALL : I<0, Pseudo, (outs), (ins ),
364 "#TAILCALL",
365 []>;
366
358367 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
359 def TAILJMPd : IBr<0xE9, (ins i32imm:$dst), "jmp\t${dst:call} # TAIL CALL",
368 def TCRETURNdi : I<0, Pseudo, (outs), (ins i32imm:$dst, i32imm:$offset),
369 "#TC_RETURN $dst $offset",
370 []>;
371
372 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
373 def TCRETURNri : I<0, Pseudo, (outs), (ins GR32:$dst, i32imm:$offset),
374 "#TC_RETURN $dst $offset",
375 []>;
376
377 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
378 def TAILJMPd : IBr<0xE9, (ins i32imm:$dst), "jmp\t${dst:call} # TAILCALL",
360379 []>;
361380 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
362 def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp\t{*}$dst # TAIL CALL",
363 []>;
381 def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst # TAILCALL",
382 []>;
364383 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
365384 def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem:$dst),
366 "jmp\t{*}$dst # TAIL CALL", []>;
385 "jmp\t{*}$dst # TAILCALL", []>;
367386
368387 //===----------------------------------------------------------------------===//
369388 // Miscellaneous Instructions...
25062525 (MOV32mi addr:$dst, texternalsym:$src)>;
25072526
25082527 // Calls
2528 // tailcall stuff
25092529 def : Pat<(X86tailcall GR32:$dst),
2510 (CALL32r GR32:$dst)>;
2530 (TAILCALL)>;
25112531
25122532 def : Pat<(X86tailcall (i32 tglobaladdr:$dst)),
2513 (CALLpcrel32 tglobaladdr:$dst)>;
2533 (TAILCALL)>;
25142534 def : Pat<(X86tailcall (i32 texternalsym:$dst)),
2515 (CALLpcrel32 texternalsym:$dst)>;
2535 (TAILCALL)>;
2536
2537 def : Pat<(X86tcret GR32:$dst, imm:$off),
2538 (TCRETURNri GR32:$dst, imm:$off)>;
2539
2540 def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),
2541 (TCRETURNdi texternalsym:$dst, imm:$off)>;
2542
2543 def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),
2544 (TCRETURNdi texternalsym:$dst, imm:$off)>;
25162545
25172546 def : Pat<(X86call (i32 tglobaladdr:$dst)),
25182547 (CALLpcrel32 tglobaladdr:$dst)>;
100100 def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst, variable_ops),
101101 "call\t{*}$dst", []>;
102102 }
103
104
105
106 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
107 def TCRETURNdi64 : I<0, Pseudo, (outs), (ins i64imm:$dst, i32imm:$offset),
108 "#TC_RETURN $dst $offset",
109 []>;
110
111 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
112 def TCRETURNri64 : I<0, Pseudo, (outs), (ins GR64:$dst, i32imm:$offset),
113 "#TC_RETURN $dst $offset",
114 []>;
115
116
117 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
118 def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins GR64:$dst), "jmp{q}\t{*}$dst # TAILCALL",
119 []>;
103120
104121 // Branches
105122 let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
11041121 def : Pat<(X86tailcall GR64:$dst),
11051122 (CALL64r GR64:$dst)>;
11061123
1124
1125 // tailcall stuff
1126 def : Pat<(X86tailcall GR32:$dst),
1127 (TAILCALL)>;
1128 def : Pat<(X86tailcall (i64 tglobaladdr:$dst)),
1129 (TAILCALL)>;
1130 def : Pat<(X86tailcall (i64 texternalsym:$dst)),
1131 (TAILCALL)>;
1132
1133 def : Pat<(X86tcret GR64:$dst, imm:$off),
1134 (TCRETURNri64 GR64:$dst, imm:$off)>;
1135
1136 def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off),
1137 (TCRETURNdi64 texternalsym:$dst, imm:$off)>;
1138
1139 def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off),
1140 (TCRETURNdi64 texternalsym:$dst, imm:$off)>;
1141
11071142 // Comparisons.
11081143
11091144 // TEST R,R is smaller than CMP R,0
4646
4747 // FrameIndex for return slot.
4848 int ReturnAddrIndex;
49
50 // Delta the ReturnAddr stack slot is moved
51 // Used for creating an area before the register spill area on the stack
52 // the returnaddr can be savely move to this area
53 int TailCallReturnAddrDelta;
54
4955 public:
5056 X86MachineFunctionInfo() : ForceFramePointer(false),
5157 CalleeSavedFrameSize(0),
5258 BytesToPopOnReturn(0),
5359 DecorationStyle(None),
54 ReturnAddrIndex(0) {}
60 ReturnAddrIndex(0),
61 TailCallReturnAddrDelta(0){}
5562
5663 X86MachineFunctionInfo(MachineFunction &MF) : ForceFramePointer(false),
5764 CalleeSavedFrameSize(0),
5865 BytesToPopOnReturn(0),
5966 DecorationStyle(None),
60 ReturnAddrIndex(0) {}
67 ReturnAddrIndex(0),
68 TailCallReturnAddrDelta(0) {}
6169
6270 bool getForceFramePointer() const { return ForceFramePointer;}
6371 void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; }
7381
7482 int getRAIndex() const { return ReturnAddrIndex; }
7583 void setRAIndex(int Index) { ReturnAddrIndex = Index; }
84
85 int getTCReturnAddrDelta() const { return TailCallReturnAddrDelta; }
86 void setTCReturnAddrDelta(int delta) {TailCallReturnAddrDelta = delta;}
7687 };
7788 } // End llvm namespace
7889
14351435
14361436 if (!hasFP(MF))
14371437 Offset += MF.getFrameInfo()->getStackSize();
1438 else
1438 else {
14391439 Offset += SlotSize; // Skip the saved EBP
1440
1440 // Skip the RETADDR move area
1441 X86MachineFunctionInfo *X86FI = MF.getInfo();
1442 int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
1443 if (TailCallReturnAddrDelta < 0) Offset -= TailCallReturnAddrDelta;
1444 }
1445
14411446 MI.getOperand(i+3).ChangeToImmediate(Offset);
14421447 }
14431448
14441449 void
14451450 X86RegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) const{
1451 X86MachineFunctionInfo *X86FI = MF.getInfo();
1452 int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
1453 if (TailCallReturnAddrDelta < 0) {
1454 // create RETURNADDR area
1455 // arg
1456 // arg
1457 // RETADDR
1458 // { ...
1459 // RETADDR area
1460 // ...
1461 // }
1462 // [EBP]
1463 MF.getFrameInfo()->
1464 CreateFixedObject(-TailCallReturnAddrDelta,
1465 (-1*SlotSize)+TailCallReturnAddrDelta);
1466 }
14461467 if (hasFP(MF)) {
1468 assert((TailCallReturnAddrDelta <= 0) &&
1469 "The Delta should always be zero or negative");
14471470 // Create a frame entry for the EBP register that must be saved.
14481471 int FrameIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize,
1449 (int)SlotSize * -2);
1472 (int)SlotSize * -2+
1473 TailCallReturnAddrDelta);
14501474 assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() &&
14511475 "Slot for EBP register must be last in order to be found!");
14521476 }
15291553 }
15301554 }
15311555
1556 /// mergeSPUpdates - Checks the instruction before/after the passed
1557 /// instruction. If it is an ADD/SUB instruction it is deleted
1558 /// argument and the stack adjustment is returned as a positive value for ADD
1559 /// and a negative for SUB.
1560 static int mergeSPUpdates(MachineBasicBlock &MBB,
1561 MachineBasicBlock::iterator &MBBI,
1562 unsigned StackPtr,
1563 bool doMergeWithPrevious) {
1564
1565 if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
1566 (!doMergeWithPrevious && MBBI == MBB.end()))
1567 return 0;
1568
1569 int Offset = 0;
1570
1571 MachineBasicBlock::iterator PI = doMergeWithPrevious ? prior(MBBI) : MBBI;
1572 MachineBasicBlock::iterator NI = doMergeWithPrevious ? 0 : next(MBBI);
1573 unsigned Opc = PI->getOpcode();
1574 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
1575 Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
1576 PI->getOperand(0).getReg() == StackPtr){
1577 Offset += PI->getOperand(2).getImm();
1578 MBB.erase(PI);
1579 if (!doMergeWithPrevious) MBBI = NI;
1580 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
1581 Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
1582 PI->getOperand(0).getReg() == StackPtr) {
1583 Offset -= PI->getOperand(2).getImm();
1584 MBB.erase(PI);
1585 if (!doMergeWithPrevious) MBBI = NI;
1586 }
1587
1588 return Offset;
1589 }
1590
15321591 void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
15331592 MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
15341593 MachineFrameInfo *MFI = MF.getFrameInfo();
15421601 // Prepare for frame info.
15431602 unsigned FrameLabelId = 0;
15441603
1545 // Get the number of bytes to allocate from the FrameInfo
1604 // Get the number of bytes to allocate from the FrameInfo.
15461605 uint64_t StackSize = MFI->getStackSize();
1606 // Add RETADDR move area to callee saved frame size.
1607 int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
1608 if (TailCallReturnAddrDelta < 0)
1609 X86FI->setCalleeSavedFrameSize(
1610 X86FI->getCalleeSavedFrameSize() +(-TailCallReturnAddrDelta));
15471611 uint64_t NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
1612
1613 // Insert stack pointer adjustment for later moving of return addr. Only
1614 // applies to tail call optimized functions where the callee argument stack
1615 // size is bigger than the callers.
1616 if (TailCallReturnAddrDelta < 0) {
1617 BuildMI(MBB, MBBI, TII.get(Is64Bit? X86::SUB64ri32 : X86::SUB32ri),
1618 StackPtr).addReg(StackPtr).addImm(-TailCallReturnAddrDelta);
1619 }
15481620
15491621 if (hasFP(MF)) {
15501622 // Get the offset of the stack slot for the EBP register... which is
16141686 MBB.insert(MBBI, MI);
16151687 }
16161688 } else {
1689 // If there is an SUB32ri of ESP immediately before this instruction,
1690 // merge the two. This can be the case when tail call elimination is
1691 // enabled and the callee has more arguments then the caller.
1692 NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true);
16171693 // If there is an ADD32ri or SUB32ri of ESP immediately after this
16181694 // instruction, merge the two instructions.
16191695 mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes);
17101786 switch (RetOpcode) {
17111787 case X86::RET:
17121788 case X86::RETI:
1789 case X86::TCRETURNdi:
1790 case X86::TCRETURNri:
1791 case X86::TCRETURNri64:
1792 case X86::TCRETURNdi64:
17131793 case X86::EH_RETURN:
17141794 case X86::TAILJMPd:
17151795 case X86::TAILJMPr:
17721852 MachineOperand &DestAddr = MBBI->getOperand(0);
17731853 assert(DestAddr.isRegister() && "Offset should be in register!");
17741854 BuildMI(MBB, MBBI, TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),StackPtr).
1775 addReg(DestAddr.getReg());
1855 addReg(DestAddr.getReg());
1856 // Tail call return: adjust the stack pointer and jump to callee
1857 } else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi ||
1858 RetOpcode== X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64) {
1859 MBBI = prior(MBB.end());
1860 MachineOperand &JumpTarget = MBBI->getOperand(0);
1861 MachineOperand &StackAdjust = MBBI->getOperand(1);
1862 assert( StackAdjust.isImmediate() && "Expecting immediate value.");
1863
1864 // Adjust stack pointer.
1865 int StackAdj = StackAdjust.getImm();
1866 int MaxTCDelta = X86FI->getTCReturnAddrDelta();
1867 int Offset = 0;
1868 assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive");
1869 // Incoporate the retaddr area.
1870 Offset = StackAdj-MaxTCDelta;
1871 assert(Offset >= 0 && "Offset should never be negative");
1872 if (Offset) {
1873 // Check for possible merge with preceeding ADD instruction.
1874 Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true);
1875 emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, TII);
1876 }
1877 // Jump to label or value in register.
1878 if (RetOpcode == X86::TCRETURNdi|| RetOpcode == X86::TCRETURNdi64)
1879 BuildMI(MBB, MBBI, TII.get(X86::TAILJMPd)).
1880 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1881 else if (RetOpcode== X86::TCRETURNri64) {
1882 BuildMI(MBB, MBBI, TII.get(X86::TAILJMPr64), JumpTarget.getReg());
1883 } else
1884 BuildMI(MBB, MBBI, TII.get(X86::TAILJMPr), JumpTarget.getReg());
1885 // Delete the pseudo instruction TCRETURN.
1886 MBB.erase(MBBI);
1887 } else if ((RetOpcode == X86::RET || RetOpcode == X86::RETI) &&
1888 (X86FI->getTCReturnAddrDelta() < 0)) {
1889 // Add the return addr area delta back since we are not tail calling.
1890 int delta = -1*X86FI->getTCReturnAddrDelta();
1891 MBBI = prior(MBB.end());
1892 // Check for possible merge with preceeding ADD instruction.
1893 delta += mergeSPUpdates(MBB, MBBI, StackPtr, true);
1894 emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, TII);
17761895 }
17771896 }
17781897
0 ; RUN: llvm-as < %s | llc -tailcallopt | grep TAILCALL
1 define fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
2 entry:
3 ret i32 %a3
4 }
5
6 define fastcc i32 @tailcaller(i32 %in1, i32 %in2) {
7 entry:
8 %tmp11 = tail call fastcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 ) ; [#uses=1]
9 ret i32 %tmp11
10 }
0 ; RUN: llvm-as < %s | llc -tailcallopt -mtriple=i686-pc-linux-gnu -relocation-model=pic | grep TAILCALL
1
2 define protected fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
3 entry:
4 ret i32 %a3
5 }
6
7 define fastcc i32 @tailcaller(i32 %in1, i32 %in2) {
8 entry:
9 %tmp11 = tail call fastcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 ) ; [#uses=1]
10 ret i32 %tmp11
11 }
0 ; RUN: llvm-as < %s | llc -tailcallopt -mtriple=i686-pc-linux-gnu -relocation-model=pic | grep -v TAILCALL
1
2 define fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
3 entry:
4 ret i32 %a3
5 }
6
7 define fastcc i32 @tailcaller(i32 %in1, i32 %in2) {
8 entry:
9 %tmp11 = tail call fastcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 ) ; [#uses=1]
10 ret i32 %tmp11
11 }