llvm.org GIT mirror llvm / 726c237
This is another TLC patch for separating code for the Darwin and ELF ABIs for the PowerPC target, and factoring the results. This will ease future maintenance of both subtargets. PPCTargetLowering::LowerCall_Darwin_Or_64SVR4() has grown a lot of special-case code for the different ABIs, making maintenance difficult. This is getting worse as we repair errors in the 64-bit ELF ABI implementation, while avoiding changes to the Darwin ABI logic. This patch splits the routine into LowerCall_Darwin() and LowerCall_64SVR4(), allowing both versions to be significantly simplified. I've factored out chunks of similar code where it made sense to do so. I also performed similar factoring on LowerFormalArguments_Darwin() and LowerFormalArguments_64SVR4(). There are no functional changes in this patch, and therefore no new test cases have been developed. Built and tested on powerpc64-unknown-linux-gnu with no new regressions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@166480 91177308-0d34-0410-b5e6-96231b3b80d8 Bill Schmidt 8 years ago
2 changed file(s) with 474 addition(s) and 187 deletion(s). Raw diff Collapse all Expand all
19521952 return Chain;
19531953 }
19541954
1955 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
1956 // value to MVT::i64 and then truncate to the correct register size.
1957 SDValue
1958 PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT,
1959 SelectionDAG &DAG, SDValue ArgVal,
1960 DebugLoc dl) const {
1961 if (Flags.isSExt())
1962 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
1963 DAG.getValueType(ObjectVT));
1964 else if (Flags.isZExt())
1965 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
1966 DAG.getValueType(ObjectVT));
1967
1968 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
1969 }
1970
1971 // Set the size that is at least reserved in caller of this function. Tail
1972 // call optimized functions' reserved stack space needs to be aligned so that
1973 // taking the difference between two stack areas will result in an aligned
1974 // stack.
1975 void
1976 PPCTargetLowering::setMinReservedArea(MachineFunction &MF, SelectionDAG &DAG,
1977 unsigned nAltivecParamsAtEnd,
1978 unsigned MinReservedArea,
1979 bool isPPC64) const {
1980 PPCFunctionInfo *FI = MF.getInfo();
1981 // Add the Altivec parameters at the end, if needed.
1982 if (nAltivecParamsAtEnd) {
1983 MinReservedArea = ((MinReservedArea+15)/16)*16;
1984 MinReservedArea += 16*nAltivecParamsAtEnd;
1985 }
1986 MinReservedArea =
1987 std::max(MinReservedArea,
1988 PPCFrameLowering::getMinCallFrameSize(isPPC64, true));
1989 unsigned TargetAlign
1990 = DAG.getMachineFunction().getTarget().getFrameLowering()->
1991 getStackAlignment();
1992 unsigned AlignMask = TargetAlign-1;
1993 MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
1994 FI->setMinReservedArea(MinReservedArea);
1995 }
1996
19551997 SDValue
19561998 PPCTargetLowering::LowerFormalArguments_64SVR4(
19571999 SDValue Chain,
21002142 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
21012143 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
21022144
2103 if (ObjectVT == MVT::i32) {
2145 if (ObjectVT == MVT::i32)
21042146 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
21052147 // value to MVT::i64 and then truncate to the correct register size.
2106 if (Flags.isSExt())
2107 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
2108 DAG.getValueType(ObjectVT));
2109 else if (Flags.isZExt())
2110 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
2111 DAG.getValueType(ObjectVT));
2112
2113 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
2114 }
2148 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
21152149
21162150 ++GPR_idx;
21172151 } else {
21892223 }
21902224
21912225 // Set the size that is at least reserved in caller of this function. Tail
2192 // call optimized function's reserved stack space needs to be aligned so that
2226 // call optimized functions' reserved stack space needs to be aligned so that
21932227 // taking the difference between two stack areas will result in an aligned
21942228 // stack.
2195 PPCFunctionInfo *FI = MF.getInfo();
2196 // Add the Altivec parameters at the end, if needed.
2197 if (nAltivecParamsAtEnd) {
2198 MinReservedArea = ((MinReservedArea+15)/16)*16;
2199 MinReservedArea += 16*nAltivecParamsAtEnd;
2200 }
2201 MinReservedArea =
2202 std::max(MinReservedArea,
2203 PPCFrameLowering::getMinCallFrameSize(true, true));
2204 unsigned TargetAlign
2205 = DAG.getMachineFunction().getTarget().getFrameLowering()->
2206 getStackAlignment();
2207 unsigned AlignMask = TargetAlign-1;
2208 MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
2209 FI->setMinReservedArea(MinReservedArea);
2229 setMinReservedArea(MF, DAG, nAltivecParamsAtEnd, MinReservedArea, true);
22102230
22112231 // If the function takes variable number of arguments, make a frame index for
22122232 // the start of the first vararg value... for expansion of llvm.va_start.
22142234 int Depth = ArgOffset;
22152235
22162236 FuncInfo->setVarArgsFrameIndex(
2217 MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
2218 Depth, true));
2237 MFI->CreateFixedObject(PtrByteSize, Depth, true));
22192238 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
22202239
22212240 // If this function is vararg, store any remaining integer argument regs
22282247 MachinePointerInfo(), false, false, 0);
22292248 MemOps.push_back(Store);
22302249 // Increment the address by four for the next argument to store
2231 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
2250 SDValue PtrOff = DAG.getConstant(PtrByteSize, PtrVT);
22322251 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
22332252 }
22342253 }
23932412 else
23942413 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
23952414 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
2396 EVT ObjType = (ObjSize == 1 ? MVT::i8 :
2397 (ObjSize == 2 ? MVT::i16 : MVT::i32));
2415 EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
23982416 SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
23992417 MachinePointerInfo(FuncArg,
24002418 CurArgOffset),
24562474 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
24572475 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
24582476
2459 if (ObjectVT == MVT::i32) {
2477 if (ObjectVT == MVT::i32)
24602478 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
24612479 // value to MVT::i64 and then truncate to the correct register size.
2462 if (Flags.isSExt())
2463 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
2464 DAG.getValueType(ObjectVT));
2465 else if (Flags.isZExt())
2466 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
2467 DAG.getValueType(ObjectVT));
2468
2469 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
2470 }
2480 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
24712481
24722482 ++GPR_idx;
24732483 } else {
25542564 }
25552565
25562566 // Set the size that is at least reserved in caller of this function. Tail
2557 // call optimized function's reserved stack space needs to be aligned so that
2567 // call optimized functions' reserved stack space needs to be aligned so that
25582568 // taking the difference between two stack areas will result in an aligned
25592569 // stack.
2560 PPCFunctionInfo *FI = MF.getInfo();
2561 // Add the Altivec parameters at the end, if needed.
2562 if (nAltivecParamsAtEnd) {
2563 MinReservedArea = ((MinReservedArea+15)/16)*16;
2564 MinReservedArea += 16*nAltivecParamsAtEnd;
2565 }
2566 MinReservedArea =
2567 std::max(MinReservedArea,
2568 PPCFrameLowering::getMinCallFrameSize(isPPC64, true));
2569 unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()->
2570 getStackAlignment();
2571 unsigned AlignMask = TargetAlign-1;
2572 MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
2573 FI->setMinReservedArea(MinReservedArea);
2570 setMinReservedArea(MF, DAG, nAltivecParamsAtEnd, MinReservedArea, isPPC64);
25742571
25752572 // If the function takes variable number of arguments, make a frame index for
25762573 // the start of the first vararg value... for expansion of llvm.va_start.
30203017 // Thus for a call through a function pointer, the following actions need
30213018 // to be performed:
30223019 // 1. Save the TOC of the caller in the TOC save area of its stack
3023 // frame (this is done in LowerCall_Darwin_Or_64SVR4()).
3020 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
30243021 // 2. Load the address of the function entry point from the function
30253022 // descriptor.
30263023 // 3. Load the TOC of the callee from the function descriptor into r2.
32703267 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
32713268 Ins, DAG);
32723269
3273 if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64())
3274 return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
3275 isTailCall, Outs, OutVals, Ins,
3276 dl, DAG, InVals);
3277
3278 return LowerCall_Darwin_Or_64SVR4(Chain, Callee, CallConv, isVarArg,
3279 isTailCall, Outs, OutVals, Ins,
3280 dl, DAG, InVals);
3270 if (PPCSubTarget.isSVR4ABI()) {
3271 if (PPCSubTarget.isPPC64())
3272 return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
3273 isTailCall, Outs, OutVals, Ins,
3274 dl, DAG, InVals);
3275 else
3276 return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
3277 isTailCall, Outs, OutVals, Ins,
3278 dl, DAG, InVals);
3279 }
3280
3281 return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
3282 isTailCall, Outs, OutVals, Ins,
3283 dl, DAG, InVals);
32813284 }
32823285
32833286 SDValue
34933496 Ins, InVals);
34943497 }
34953498
3499 // Copy an argument into memory, being careful to do this outside the
3500 // call sequence for the call to which the argument belongs.
34963501 SDValue
3497 PPCTargetLowering::LowerCall_Darwin_Or_64SVR4(SDValue Chain, SDValue Callee,
3502 PPCTargetLowering::createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff,
3503 SDValue CallSeqStart,
3504 ISD::ArgFlagsTy Flags,
3505 SelectionDAG &DAG,
3506 DebugLoc dl) const {
3507 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
3508 CallSeqStart.getNode()->getOperand(0),
3509 Flags, DAG, dl);
3510 // The MEMCPY must go outside the CALLSEQ_START..END.
3511 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
3512 CallSeqStart.getNode()->getOperand(1));
3513 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
3514 NewCallSeqStart.getNode());
3515 return NewCallSeqStart;
3516 }
3517
3518 SDValue
3519 PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
34983520 CallingConv::ID CallConv, bool isVarArg,
34993521 bool isTailCall,
35003522 const SmallVectorImpl &Outs,
35033525 DebugLoc dl, SelectionDAG &DAG,
35043526 SmallVectorImpl &InVals) const {
35053527
3506 bool isSVR4ABI = PPCSubTarget.isSVR4ABI();
3507
3508 unsigned NumOps = Outs.size();
3528 unsigned NumOps = Outs.size();
35093529
35103530 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
3511 bool isPPC64 = PtrVT == MVT::i64;
3512 unsigned PtrByteSize = isPPC64 ? 8 : 4;
3531 unsigned PtrByteSize = 8;
35133532
35143533 MachineFunction &MF = DAG.getMachineFunction();
35153534
35253544 unsigned nAltivecParamsAtEnd = 0;
35263545
35273546 // Count how many bytes are to be pushed on the stack, including the linkage
3528 // area, and parameter passing area. We start with 24/48 bytes, which is
3529 // prereserved space for [SP][CR][LR][3 x unused].
3547 // area, and parameter passing area. We start with at least 48 bytes, which
3548 // is reserved space for [SP][CR][LR][3 x unused].
3549 // NOTE: For PPC64, nAltivecParamsAtEnd always remains zero as a result
3550 // of this call.
35303551 unsigned NumBytes =
3531 CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isVarArg, CallConv,
3532 Outs, OutVals,
3533 nAltivecParamsAtEnd);
3552 CalculateParameterAndLinkageAreaSize(DAG, true, isVarArg, CallConv,
3553 Outs, OutVals, nAltivecParamsAtEnd);
35343554
35353555 // Calculate by how many bytes the stack has to be adjusted in case of tail
35363556 // call optimization.
35553575 // Set up a copy of the stack pointer for use loading and storing any
35563576 // arguments that may not fit in the registers available for argument
35573577 // passing.
3558 SDValue StackPtr;
3559 if (isPPC64)
3560 StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
3561 else
3562 StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
3578 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
35633579
35643580 // Figure out which arguments are going to go in registers, and which in
35653581 // memory. Also, if this is a vararg function, floating point operations
35663582 // must be stored to our stack, and loaded into integer regs as well, if
35673583 // any integer regs are available for argument passing.
3568 unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);
3584 unsigned ArgOffset = PPCFrameLowering::getLinkageSize(true, true);
35693585 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
35703586
3571 static const uint16_t GPR_32[] = { // 32-bit registers.
3572 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3573 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3574 };
3575 static const uint16_t GPR_64[] = { // 64-bit registers.
3587 static const uint16_t GPR[] = {
35763588 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
35773589 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
35783590 };
35823594 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
35833595 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
35843596 };
3585 const unsigned NumGPRs = array_lengthof(GPR_32);
3597 const unsigned NumGPRs = array_lengthof(GPR);
35863598 const unsigned NumFPRs = 13;
35873599 const unsigned NumVRs = array_lengthof(VR);
3588
3589 const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32;
35903600
35913601 SmallVector, 8> RegsToPass;
35923602 SmallVector TailCallArguments;
36043614
36053615 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
36063616
3607 // On PPC64, promote integers to 64-bit values.
3608 if (isPPC64 && Arg.getValueType() == MVT::i32) {
3617 // Promote integers to 64-bit values.
3618 if (Arg.getValueType() == MVT::i32) {
36093619 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
36103620 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
36113621 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
36193629 // struct x { short a; char b; }
36203630 // will have Size = 4. With #pragma pack(1), it will have Size = 3.
36213631 // These are the proper values we need for right-justifying the
3622 // aggregate in a parameter register for 64-bit SVR4.
3632 // aggregate in a parameter register.
36233633 unsigned Size = Flags.getByValSize();
3624 // FOR DARWIN ONLY: Very small objects are passed right-justified.
3625 // Everything else is passed left-justified.
3626 // FOR 64-BIT SVR4: All aggregates smaller than 8 bytes must
3627 // be passed right-justified.
3628 if (Size==1 || Size==2 ||
3629 (Size==4 && isSVR4ABI)) {
3634 // All aggregates smaller than 8 bytes must be passed right-justified.
3635 if (Size==1 || Size==2 || Size==4) {
36303636 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
36313637 if (GPR_idx != NumGPRs) {
36323638 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
36363642 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
36373643
36383644 ArgOffset += PtrByteSize;
3639 } else {
3640 SDValue Const = DAG.getConstant(PtrByteSize - Size,
3641 PtrOff.getValueType());
3642 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
3643 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, AddPtr,
3644 CallSeqStart.getNode()->getOperand(0),
3645 Flags, DAG, dl);
3646 // The MEMCPY must go outside the CALLSEQ_START..END.
3647 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
3648 CallSeqStart.getNode()->getOperand(1));
3649 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
3650 NewCallSeqStart.getNode());
3651 Chain = CallSeqStart = NewCallSeqStart;
3652 ArgOffset += PtrByteSize;
3645 continue;
36533646 }
3654 continue;
3655 } else if (isSVR4ABI && GPR_idx == NumGPRs && Size < 8) {
3656 // Case: Size is 3, 5, 6, or 7 for SVR4 and we're out of registers.
3657 // This is the same case as 1, 2, and 4 for SVR4 with no registers.
3658 // FIXME: Separate into 64-bit SVR4 and Darwin versions of this
3659 // function, and combine the duplicated code chunks.
3647 }
3648
3649 if (GPR_idx == NumGPRs && Size < 8) {
36603650 SDValue Const = DAG.getConstant(PtrByteSize - Size,
36613651 PtrOff.getValueType());
36623652 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
3663 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, AddPtr,
3664 CallSeqStart.getNode()->getOperand(0),
3665 Flags, DAG, dl);
3666 // The MEMCPY must go outside the CALLSEQ_START..END.
3667 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
3668 CallSeqStart.getNode()->getOperand(1));
3669 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
3670 NewCallSeqStart.getNode());
3671 Chain = CallSeqStart = NewCallSeqStart;
3653 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
3654 CallSeqStart,
3655 Flags, DAG, dl);
36723656 ArgOffset += PtrByteSize;
36733657 continue;
36743658 }
36773661 // registers. (This is not what the doc says.)
36783662
36793663 // FIXME: The above statement is likely due to a misunderstanding of the
3680 // documents. At least for 64-bit SVR4, all arguments must be copied
3681 // into the parameter area BY THE CALLEE in the event that the callee
3682 // takes the address of any formal argument. That has not yet been
3683 // implemented. However, it is reasonable to use the stack area as a
3684 // staging area for the register load.
3685
3686 // Skip this for small aggregates under 64-bit SVR4, as we will use
3687 // the same slot for a right-justified copy, below.
3688 if (Size >= 8 || !isSVR4ABI) {
3689 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
3690 CallSeqStart.getNode()->getOperand(0),
3691 Flags, DAG, dl);
3692 // This must go outside the CALLSEQ_START..END.
3693 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
3694 CallSeqStart.getNode()->getOperand(1));
3695 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
3696 NewCallSeqStart.getNode());
3697 Chain = CallSeqStart = NewCallSeqStart;
3698 }
3699
3700 // FOR 64-BIT SVR4: When a register is available, pass the
3701 // aggregate right-justified.
3702 if (isSVR4ABI && Size < 8 && GPR_idx != NumGPRs) {
3664 // documents. All arguments must be copied into the parameter area BY
3665 // THE CALLEE in the event that the callee takes the address of any
3666 // formal argument. That has not yet been implemented. However, it is
3667 // reasonable to use the stack area as a staging area for the register
3668 // load.
3669
3670 // Skip this for small aggregates, as we will use the same slot for a
3671 // right-justified copy, below.
3672 if (Size >= 8)
3673 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
3674 CallSeqStart,
3675 Flags, DAG, dl);
3676
3677 // When a register is available, pass a small aggregate right-justified.
3678 if (Size < 8 && GPR_idx != NumGPRs) {
37033679 // The easiest way to get this right-justified in a register
37043680 // is to copy the structure into the rightmost portion of a
37053681 // local variable slot, then load the whole slot into the
37103686 // parameter save area instead of a new local variable.
37113687 SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType());
37123688 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
3713 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, AddPtr,
3714 CallSeqStart.getNode()->getOperand(0),
3715 Flags, DAG, dl);
3716
3717 // Place the memcpy outside the CALLSEQ_START..END.
3718 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
3719 CallSeqStart.getNode()->getOperand(1));
3720 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
3721 NewCallSeqStart.getNode());
3722 Chain = CallSeqStart = NewCallSeqStart;
3689 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
3690 CallSeqStart,
3691 Flags, DAG, dl);
37233692
37243693 // Load the slot into the register.
37253694 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, PtrOff,
37323701 ArgOffset += PtrByteSize;
37333702 continue;
37343703 }
3704
3705 // For aggregates larger than PtrByteSize, copy the pieces of the
3706 // object that fit into registers from the parameter save area.
3707 for (unsigned j=0; j
3708 SDValue Const = DAG.getConstant(j, PtrOff.getValueType());
3709 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
3710 if (GPR_idx != NumGPRs) {
3711 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
3712 MachinePointerInfo(),
3713 false, false, false, 0);
3714 MemOpChains.push_back(Load.getValue(1));
3715 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
3716 ArgOffset += PtrByteSize;
3717 } else {
3718 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
3719 break;
3720 }
3721 }
3722 continue;
3723 }
3724
3725 switch (Arg.getValueType().getSimpleVT().SimpleTy) {
3726 default: llvm_unreachable("Unexpected ValueType for argument!");
3727 case MVT::i32:
3728 case MVT::i64:
3729 if (GPR_idx != NumGPRs) {
3730 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
3731 } else {
3732 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
3733 true, isTailCall, false, MemOpChains,
3734 TailCallArguments, dl);
3735 }
3736 ArgOffset += PtrByteSize;
3737 break;
3738 case MVT::f32:
3739 case MVT::f64:
3740 if (FPR_idx != NumFPRs) {
3741 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
3742
3743 if (isVarArg) {
3744 SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
3745 MachinePointerInfo(), false, false, 0);
3746 MemOpChains.push_back(Store);
3747
3748 // Float varargs are always shadowed in available integer registers
3749 if (GPR_idx != NumGPRs) {
3750 SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
3751 MachinePointerInfo(), false, false,
3752 false, 0);
3753 MemOpChains.push_back(Load.getValue(1));
3754 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
3755 }
3756 } else if (GPR_idx != NumGPRs)
3757 // If we have any FPRs remaining, we may also have GPRs remaining.
3758 ++GPR_idx;
3759 } else {
3760 // Single-precision floating-point values are mapped to the
3761 // second (rightmost) word of the stack doubleword.
3762 if (Arg.getValueType() == MVT::f32) {
3763 SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
3764 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
3765 }
3766
3767 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
3768 true, isTailCall, false, MemOpChains,
3769 TailCallArguments, dl);
3770 }
3771 ArgOffset += 8;
3772 break;
3773 case MVT::v4f32:
3774 case MVT::v4i32:
3775 case MVT::v8i16:
3776 case MVT::v16i8:
3777 if (isVarArg) {
3778 // These go aligned on the stack, or in the corresponding R registers
3779 // when within range. The Darwin PPC ABI doc claims they also go in
3780 // V registers; in fact gcc does this only for arguments that are
3781 // prototyped, not for those that match the ... We do it for all
3782 // arguments, seems to work.
3783 while (ArgOffset % 16 !=0) {
3784 ArgOffset += PtrByteSize;
3785 if (GPR_idx != NumGPRs)
3786 GPR_idx++;
3787 }
3788 // We could elide this store in the case where the object fits
3789 // entirely in R registers. Maybe later.
3790 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
3791 DAG.getConstant(ArgOffset, PtrVT));
3792 SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
3793 MachinePointerInfo(), false, false, 0);
3794 MemOpChains.push_back(Store);
3795 if (VR_idx != NumVRs) {
3796 SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff,
3797 MachinePointerInfo(),
3798 false, false, false, 0);
3799 MemOpChains.push_back(Load.getValue(1));
3800 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
3801 }
3802 ArgOffset += 16;
3803 for (unsigned i=0; i<16; i+=PtrByteSize) {
3804 if (GPR_idx == NumGPRs)
3805 break;
3806 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
3807 DAG.getConstant(i, PtrVT));
3808 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
3809 false, false, false, 0);
3810 MemOpChains.push_back(Load.getValue(1));
3811 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
3812 }
3813 break;
3814 }
3815
3816 // Non-varargs Altivec params generally go in registers, but have
3817 // stack space allocated at the end.
3818 if (VR_idx != NumVRs) {
3819 // Doesn't have GPR space allocated.
3820 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
3821 } else {
3822 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
3823 true, isTailCall, true, MemOpChains,
3824 TailCallArguments, dl);
3825 ArgOffset += 16;
3826 }
3827 break;
3828 }
3829 }
3830
3831 if (!MemOpChains.empty())
3832 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3833 &MemOpChains[0], MemOpChains.size());
3834
3835 // Check if this is an indirect call (MTCTR/BCTRL).
3836 // See PrepareCall() for more information about calls through function
3837 // pointers in the 64-bit SVR4 ABI.
3838 if (!isTailCall &&
3839 !dyn_cast(Callee) &&
3840 !dyn_cast(Callee) &&
3841 !isBLACompatibleAddress(Callee, DAG)) {
3842 // Load r2 into a virtual register and store it to the TOC save area.
3843 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
3844 // TOC save area offset.
3845 SDValue PtrOff = DAG.getIntPtrConstant(40);
3846 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
3847 Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(),
3848 false, false, 0);
3849 // R12 must contain the address of an indirect callee. This does not
3850 // mean the MTCTR instruction must use R12; it's easier to model this
3851 // as an extra parameter, so do that.
3852 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
3853 }
3854
3855 // Build a sequence of copy-to-reg nodes chained together with token chain
3856 // and flag operands which copy the outgoing args into the appropriate regs.
3857 SDValue InFlag;
3858 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
3859 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
3860 RegsToPass[i].second, InFlag);
3861 InFlag = Chain.getValue(1);
3862 }
3863
3864 if (isTailCall)
3865 PrepareTailCall(DAG, InFlag, Chain, dl, true, SPDiff, NumBytes, LROp,
3866 FPOp, true, TailCallArguments);
3867
3868 return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
3869 RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
3870 Ins, InVals);
3871 }
3872
3873 SDValue
3874 PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
3875 CallingConv::ID CallConv, bool isVarArg,
3876 bool isTailCall,
3877 const SmallVectorImpl &Outs,
3878 const SmallVectorImpl &OutVals,
3879 const SmallVectorImpl &Ins,
3880 DebugLoc dl, SelectionDAG &DAG,
3881 SmallVectorImpl &InVals) const {
3882
3883 unsigned NumOps = Outs.size();
3884
3885 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
3886 bool isPPC64 = PtrVT == MVT::i64;
3887 unsigned PtrByteSize = isPPC64 ? 8 : 4;
3888
3889 MachineFunction &MF = DAG.getMachineFunction();
3890
3891 // Mark this function as potentially containing a function that contains a
3892 // tail call. As a consequence the frame pointer will be used for dynamicalloc
3893 // and restoring the callers stack pointer in this functions epilog. This is
3894 // done because by tail calling the called function might overwrite the value
3895 // in this function's (MF) stack pointer stack slot 0(SP).
3896 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
3897 CallConv == CallingConv::Fast)
3898 MF.getInfo()->setHasFastCall();
3899
3900 unsigned nAltivecParamsAtEnd = 0;
3901
3902 // Count how many bytes are to be pushed on the stack, including the linkage
3903 // area, and parameter passing area. We start with 24/48 bytes, which is
3904 // prereserved space for [SP][CR][LR][3 x unused].
3905 unsigned NumBytes =
3906 CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isVarArg, CallConv,
3907 Outs, OutVals,
3908 nAltivecParamsAtEnd);
3909
3910 // Calculate by how many bytes the stack has to be adjusted in case of tail
3911 // call optimization.
3912 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
3913
3914 // To protect arguments on the stack from being clobbered in a tail call,
3915 // force all the loads to happen before doing any other lowering.
3916 if (isTailCall)
3917 Chain = DAG.getStackArgumentTokenFactor(Chain);
3918
3919 // Adjust the stack pointer for the new arguments...
3920 // These operations are automatically eliminated by the prolog/epilog pass
3921 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
3922 SDValue CallSeqStart = Chain;
3923
3924 // Load the return address and frame pointer so it can be move somewhere else
3925 // later.
3926 SDValue LROp, FPOp;
3927 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true,
3928 dl);
3929
3930 // Set up a copy of the stack pointer for use loading and storing any
3931 // arguments that may not fit in the registers available for argument
3932 // passing.
3933 SDValue StackPtr;
3934 if (isPPC64)
3935 StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
3936 else
3937 StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
3938
3939 // Figure out which arguments are going to go in registers, and which in
3940 // memory. Also, if this is a vararg function, floating point operations
3941 // must be stored to our stack, and loaded into integer regs as well, if
3942 // any integer regs are available for argument passing.
3943 unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);
3944 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
3945
3946 static const uint16_t GPR_32[] = { // 32-bit registers.
3947 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3948 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3949 };
3950 static const uint16_t GPR_64[] = { // 64-bit registers.
3951 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
3952 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
3953 };
3954 static const uint16_t *FPR = GetFPR();
3955
3956 static const uint16_t VR[] = {
3957 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
3958 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
3959 };
3960 const unsigned NumGPRs = array_lengthof(GPR_32);
3961 const unsigned NumFPRs = 13;
3962 const unsigned NumVRs = array_lengthof(VR);
3963
3964 const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32;
3965
3966 SmallVector, 8> RegsToPass;
3967 SmallVector TailCallArguments;
3968
3969 SmallVector MemOpChains;
3970 for (unsigned i = 0; i != NumOps; ++i) {
3971 SDValue Arg = OutVals[i];
3972 ISD::ArgFlagsTy Flags = Outs[i].Flags;
3973
3974 // PtrOff will be used to store the current argument to the stack if a
3975 // register cannot be found for it.
3976 SDValue PtrOff;
3977
3978 PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
3979
3980 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
3981
3982 // On PPC64, promote integers to 64-bit values.
3983 if (isPPC64 && Arg.getValueType() == MVT::i32) {
3984 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
3985 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
3986 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
3987 }
3988
3989 // FIXME memcpy is used way more than necessary. Correctness first.
3990 // Note: "by value" is code for passing a structure by value, not
3991 // basic types.
3992 if (Flags.isByVal()) {
3993 unsigned Size = Flags.getByValSize();
3994 // Very small objects are passed right-justified. Everything else is
3995 // passed left-justified.
3996 if (Size==1 || Size==2) {
3997 EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
3998 if (GPR_idx != NumGPRs) {
3999 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
4000 MachinePointerInfo(), VT,
4001 false, false, 0);
4002 MemOpChains.push_back(Load.getValue(1));
4003 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
4004
4005 ArgOffset += PtrByteSize;
4006 } else {
4007 SDValue Const = DAG.getConstant(PtrByteSize - Size,
4008 PtrOff.getValueType());
4009 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
4010 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
4011 CallSeqStart,
4012 Flags, DAG, dl);
4013 ArgOffset += PtrByteSize;
4014 }
4015 continue;
4016 }
4017 // Copy entire object into memory. There are cases where gcc-generated
4018 // code assumes it is there, even if it could be put entirely into
4019 // registers. (This is not what the doc says.)
4020 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
4021 CallSeqStart,
4022 Flags, DAG, dl);
37354023
37364024 // For small aggregates (Darwin only) and aggregates >= PtrByteSize,
37374025 // copy the pieces of the object that fit into registers from the
38044092 !isPPC64) // PPC64 has 64-bit GPR's obviously :)
38054093 ++GPR_idx;
38064094 }
3807 } else {
3808 // Single-precision floating-point values are mapped to the
3809 // second (rightmost) word of the stack doubleword.
3810 if (Arg.getValueType() == MVT::f32 && isPPC64 && isSVR4ABI) {
3811 SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
3812 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
3813 }
3814
4095 } else
38154096 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
38164097 isPPC64, isTailCall, false, MemOpChains,
38174098 TailCallArguments, dl);
3818 }
38194099 if (isPPC64)
38204100 ArgOffset += 8;
38214101 else
39104190 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
39114191 &MemOpChains[0], MemOpChains.size());
39124192
3913 // Check if this is an indirect call (MTCTR/BCTRL).
3914 // See PrepareCall() for more information about calls through function
3915 // pointers in the 64-bit SVR4 ABI.
3916 if (!isTailCall && isPPC64 && PPCSubTarget.isSVR4ABI() &&
3917 !dyn_cast(Callee) &&
3918 !dyn_cast(Callee) &&
3919 !isBLACompatibleAddress(Callee, DAG)) {
3920 // Load r2 into a virtual register and store it to the TOC save area.
3921 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
3922 // TOC save area offset.
3923 SDValue PtrOff = DAG.getIntPtrConstant(40);
3924 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
3925 Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(),
3926 false, false, 0);
3927 }
3928
39294193 // On Darwin, R12 must contain the address of an indirect callee. This does
39304194 // not mean the MTCTR instruction must use R12; it's easier to model this as
39314195 // an extra parameter, so do that.
466466 DebugLoc dl, SelectionDAG &DAG) const;
467467
468468 SDValue
469 extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT, SelectionDAG &DAG,
470 SDValue ArgVal, DebugLoc dl) const;
471
472 void
473 setMinReservedArea(MachineFunction &MF, SelectionDAG &DAG,
474 unsigned nAltivecParamsAtEnd,
475 unsigned MinReservedArea, bool isPPC64) const;
476
477 SDValue
469478 LowerFormalArguments_Darwin(SDValue Chain,
470479 CallingConv::ID CallConv, bool isVarArg,
471480 const SmallVectorImpl &Ins,
485494 SmallVectorImpl &InVals) const;
486495
487496 SDValue
488 LowerCall_Darwin_Or_64SVR4(SDValue Chain, SDValue Callee,
497 createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff,
498 SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
499 SelectionDAG &DAG, DebugLoc dl) const;
500
501 SDValue
502 LowerCall_Darwin(SDValue Chain, SDValue Callee,
503 CallingConv::ID CallConv,
504 bool isVarArg, bool isTailCall,
505 const SmallVectorImpl &Outs,
506 const SmallVectorImpl &OutVals,
507 const SmallVectorImpl &Ins,
508 DebugLoc dl, SelectionDAG &DAG,
509 SmallVectorImpl &InVals) const;
510 SDValue
511 LowerCall_64SVR4(SDValue Chain, SDValue Callee,
489512 CallingConv::ID CallConv,
490513 bool isVarArg, bool isTailCall,
491514 const SmallVectorImpl &Outs,