llvm.org GIT mirror llvm / 7fc5011
[PowerPC] ELFv2 stack space reduction The ELFv2 ABI reduces the amount of stack required to implement an ABI-compliant function call in two ways: * the "linkage area" is reduced from 48 bytes to 32 bytes by eliminating two unused doublewords * the 64-byte "parameter save area" is now optional and need not be present in certain cases (it remains mandatory in functions with variable arguments, and functions that have any parameter that is passed on the stack) The following patch implements this required changes: - reducing the linkage area, and associated relocation of the TOC save slot, in getLinkageSize / getTOCSaveOffset (this requires updating all callers of these routines to pass in the isELFv2ABI flag). - (partially) handling the case where the parameter save are is optional This latter part requires some extra explanation: Currently, we still always allocate the parameter save area when *calling* a function. That is certainly always compliant with the ABI, but may cause code to allocate stack unnecessarily. This can be addressed by a follow-on optimization patch. On the *callee* side, in LowerFormalArguments, we *must* track correctly whether the ABI guarantees that the caller has allocated the parameter save area for our use, and the patch does so. However, there is one complication: the code that handles incoming "byval" arguments will currently *always* write to the parameter save area, because it has to force incoming register arguments to the stack since it must return an *address* to implement the byval semantics. To fix this, the patch changes the LowerFormalArguments code to write arguments to a freshly allocated stack slot on the function's own stack frame instead of the argument save area in those cases where that area is not present. Reviewed by Hal Finkel. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@213490 91177308-0d34-0410-b5e6-96231b3b80d8 Ulrich Weigand 5 years ago
6 changed file(s) with 118 addition(s) and 27 deletion(s). Raw diff Collapse all Expand all
12021202 CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, ArgLocs, *Context);
12031203
12041204 // Reserve space for the linkage area on the stack.
1205 unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false);
1205 bool isELFv2ABI = PPCSubTarget->isELFv2ABI();
1206 unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false,
1207 isELFv2ABI);
12061208 CCInfo.AllocateStack(LinkageSize, 8);
12071209
12081210 CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_PPC64_ELF_FIS);
12311233 // Because we cannot tell if this is needed on the caller side, we have to
12321234 // conservatively assume that it is needed. As such, make sure we have at
12331235 // least enough stack space for the caller to store the 8 GPRs.
1236 // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.
12341237 NumBytes = std::max(NumBytes, LinkageSize + 64);
12351238
12361239 // Issue CALLSEQ_START.
399399
400400 // Maximum call frame needs to be at least big enough for linkage area.
401401 unsigned minCallFrameSize = getLinkageSize(Subtarget.isPPC64(),
402 Subtarget.isDarwinABI());
402 Subtarget.isDarwinABI(),
403 Subtarget.isELFv2ABI());
403404 maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
404405
405406 // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
7575
7676 /// getTOCSaveOffset - Return the previous frame offset to save the
7777 /// TOC register -- 64-bit SVR4 ABI only.
78 static unsigned getTOCSaveOffset(void) {
79 return 40;
78 static unsigned getTOCSaveOffset(bool isELFv2ABI) {
79 return isELFv2ABI ? 24 : 40;
8080 }
8181
8282 /// getFramePointerSaveOffset - Return the previous frame offset to save the
108108
109109 /// getLinkageSize - Return the size of the PowerPC ABI linkage area.
110110 ///
111 static unsigned getLinkageSize(bool isPPC64, bool isDarwinABI) {
111 static unsigned getLinkageSize(bool isPPC64, bool isDarwinABI,
112 bool isELFv2ABI) {
112113 if (isDarwinABI || isPPC64)
113 return 6 * (isPPC64 ? 8 : 4);
114 return (isELFv2ABI ? 4 : 6) * (isPPC64 ? 8 : 4);
114115
115116 // SVR4 ABI:
116117 return 8;
21892189 return Align;
21902190 }
21912191
2192 /// CalculateStackSlotUsed - Return whether this argument will use its
2193 /// stack slot (instead of being passed in registers). ArgOffset,
2194 /// AvailableFPRs, and AvailableVRs must hold the current argument
2195 /// position, and will be updated to account for this argument.
2196 static bool CalculateStackSlotUsed(EVT ArgVT, ISD::ArgFlagsTy Flags,
2197 unsigned PtrByteSize,
2198 unsigned LinkageSize,
2199 unsigned ParamAreaSize,
2200 unsigned &ArgOffset,
2201 unsigned &AvailableFPRs,
2202 unsigned &AvailableVRs) {
2203 bool UseMemory = false;
2204
2205 // Respect alignment of argument on the stack.
2206 unsigned Align = CalculateStackSlotAlignment(ArgVT, Flags, PtrByteSize);
2207 ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
2208 // If there's no space left in the argument save area, we must
2209 // use memory (this check also catches zero-sized arguments).
2210 if (ArgOffset >= LinkageSize + ParamAreaSize)
2211 UseMemory = true;
2212
2213 // Allocate argument on the stack.
2214 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
2215 // If we overran the argument save area, we must use memory
2216 // (this check catches arguments passed partially in memory)
2217 if (ArgOffset > LinkageSize + ParamAreaSize)
2218 UseMemory = true;
2219
2220 // However, if the argument is actually passed in an FPR or a VR,
2221 // we don't use memory after all.
2222 if (!Flags.isByVal()) {
2223 if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
2224 if (AvailableFPRs > 0) {
2225 --AvailableFPRs;
2226 return false;
2227 }
2228 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
2229 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
2230 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64)
2231 if (AvailableVRs > 0) {
2232 --AvailableVRs;
2233 return false;
2234 }
2235 }
2236
2237 return UseMemory;
2238 }
2239
21922240 /// EnsureStackAlignment - Round stack frame size up from NumBytes to
21932241 /// ensure minimum alignment required for target.
21942242 static unsigned EnsureStackAlignment(const TargetMachine &Target,
22742322 getTargetMachine(), ArgLocs, *DAG.getContext());
22752323
22762324 // Reserve space for the linkage area on the stack.
2277 unsigned LinkageSize = PPCFrameLowering::getLinkageSize(false, false);
2325 unsigned LinkageSize = PPCFrameLowering::getLinkageSize(false, false, false);
22782326 CCInfo.AllocateStack(LinkageSize, PtrByteSize);
22792327
22802328 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
24672515 SmallVectorImpl &InVals) const {
24682516 // TODO: add description of PPC stack frame format, or at least some docs.
24692517 //
2518 bool isELFv2ABI = Subtarget.isELFv2ABI();
24702519 bool isLittleEndian = Subtarget.isLittleEndian();
24712520 MachineFunction &MF = DAG.getMachineFunction();
24722521 MachineFrameInfo *MFI = MF.getFrameInfo();
24782527 (CallConv == CallingConv::Fast));
24792528 unsigned PtrByteSize = 8;
24802529
2481 unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false);
2482 unsigned ArgOffset = LinkageSize;
2530 unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false,
2531 isELFv2ABI);
24832532
24842533 static const MCPhysReg GPR[] = {
24852534 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
25012550 const unsigned Num_FPR_Regs = 13;
25022551 const unsigned Num_VR_Regs = array_lengthof(VR);
25032552
2504 unsigned GPR_idx, FPR_idx = 0, VR_idx = 0;
2553 // Do a first pass over the arguments to determine whether the ABI
2554 // guarantees that our caller has allocated the parameter save area
2555 // on its stack frame. In the ELFv1 ABI, this is always the case;
2556 // in the ELFv2 ABI, it is true if this is a vararg function or if
2557 // any parameter is located in a stack slot.
2558
2559 bool HasParameterArea = !isELFv2ABI || isVarArg;
2560 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
2561 unsigned NumBytes = LinkageSize;
2562 unsigned AvailableFPRs = Num_FPR_Regs;
2563 unsigned AvailableVRs = Num_VR_Regs;
2564 for (unsigned i = 0, e = Ins.size(); i != e; ++i)
2565 if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].Flags,
2566 PtrByteSize, LinkageSize, ParamAreaSize,
2567 NumBytes, AvailableFPRs, AvailableVRs))
2568 HasParameterArea = true;
25052569
25062570 // Add DAG nodes to load the arguments or copy them out of registers. On
25072571 // entry to a function on PPC, the arguments start after the linkage area,
25082572 // although the first ones are often in registers.
25092573
2574 unsigned ArgOffset = LinkageSize;
2575 unsigned GPR_idx, FPR_idx = 0, VR_idx = 0;
25102576 SmallVector MemOps;
25112577 Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
25122578 unsigned CurArgIdx = 0;
25512617 }
25522618
25532619 // Create a stack object covering all stack doublewords occupied
2554 // by the argument.
2555 int FI = MFI->CreateFixedObject(ArgSize, ArgOffset, true);
2620 // by the argument. If the argument is (fully or partially) on
2621 // the stack, or if the argument is fully in registers but the
2622 // caller has allocated the parameter save anyway, we can refer
2623 // directly to the caller's stack frame. Otherwise, create a
2624 // local copy in our own frame.
2625 int FI;
2626 if (HasParameterArea ||
2627 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
2628 FI = MFI->CreateFixedObject(ArgSize, ArgOffset, true);
2629 else
2630 FI = MFI->CreateStackObject(ArgSize, Align, false);
25562631 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
25572632
25582633 // Handle aggregates smaller than 8 bytes.
26962771
26972772 // Area that is at least reserved in the caller of this function.
26982773 unsigned MinReservedArea;
2699 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
2774 if (HasParameterArea)
2775 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
2776 else
2777 MinReservedArea = LinkageSize;
27002778
27012779 // Set the size that is at least reserved in caller of this function. Tail
27022780 // call optimized functions' reserved stack space needs to be aligned so that
27572835 (CallConv == CallingConv::Fast));
27582836 unsigned PtrByteSize = isPPC64 ? 8 : 4;
27592837
2760 unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true);
2838 unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true,
2839 false);
27612840 unsigned ArgOffset = LinkageSize;
27622841 // Area that is at least reserved in caller of this function.
27632842 unsigned MinReservedArea = ArgOffset;
36153694 int SPDiff, unsigned NumBytes,
36163695 const SmallVectorImpl &Ins,
36173696 SmallVectorImpl &InVals) const {
3697
3698 bool isELFv2ABI = Subtarget.isELFv2ABI();
36183699 std::vector NodeTys;
36193700 SmallVector Ops;
36203701 unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff,
36903771 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
36913772 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
36923773 SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
3693 unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset();
3774 unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset(isELFv2ABI);
36943775 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset);
36953776 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
36963777 Chain = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain, AddTOC, InFlag);
37833864 getTargetMachine(), ArgLocs, *DAG.getContext());
37843865
37853866 // Reserve space for the linkage area on the stack.
3786 CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize);
3867 CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false, false),
3868 PtrByteSize);
37873869
37883870 if (isVarArg) {
37893871 // Handle fixed and variable vector arguments differently.
40114093 MF.getInfo()->setHasFastCall();
40124094
40134095 // Count how many bytes are to be pushed on the stack, including the linkage
4014 // area, and parameter passing area. We start with at least 48 bytes, which
4015 // is reserved space for [SP][CR][LR][3 x unused].
4016 unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false);
4096 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
4097 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
4098 // area is 32 bytes reserved space for [SP][CR][LR][TOC].
4099 unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false,
4100 isELFv2ABI);
40174101 unsigned NumBytes = LinkageSize;
40184102
40194103 // Add up all the space actually used.
40354119 // Because we cannot tell if this is needed on the caller side, we have to
40364120 // conservatively assume that it is needed. As such, make sure we have at
40374121 // least enough stack space for the caller to store the 8 GPRs.
4122 // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.
40384123 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
40394124
40404125 // Tail call needs the stack to be aligned.
43734458 // Load r2 into a virtual register and store it to the TOC save area.
43744459 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
43754460 // TOC save area offset.
4376 unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset();
4461 unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset(isELFv2ABI);
43774462 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset);
43784463 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
43794464 Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(),
44334518 // Count how many bytes are to be pushed on the stack, including the linkage
44344519 // area, and parameter passing area. We start with 24/48 bytes, which is
44354520 // prereserved space for [SP][CR][LR][3 x unused].
4436 unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true);
4521 unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true,
4522 false);
44374523 unsigned NumBytes = LinkageSize;
44384524
44394525 // Add up all the space actually used.
66 define void @test_indirect(void ()* nocapture %fp) {
77 ; CHECK-LABEL: @test_indirect
88 tail call void %fp()
9 ; CHECK-DAG: std 2, 40(1)
9 ; CHECK-DAG: std 2, 24(1)
1010 ; CHECK-DAG: mr 12, 3
1111 ; CHECK-DAG: mtctr 3
1212 ; CHECK: bctrl
13 ; CHECK-NEXT: ld 2, 40(1)
13 ; CHECK-NEXT: ld 2, 24(1)
1414 ret void
1515 }
1616
2121 ret void
2222 }
2323 ; CHECK: @callee1
24 ; CHECK: lwz {{[0-9]+}}, 120(1)
24 ; CHECK: lwz {{[0-9]+}}, 104(1)
2525 ; CHECK: blr
2626
2727 define void @caller1() {
3131 ret void
3232 }
3333 ; CHECK: @caller1
34 ; CHECK: stw {{[0-9]+}}, 120(1)
34 ; CHECK: stw {{[0-9]+}}, 104(1)
3535 ; CHECK: bl test1
3636
3737 declare void @test1(%struct.small_arg* sret, %struct.large_arg* byval, %struct.small_arg* byval)
4141 ret float %x
4242 }
4343 ; CHECK: @callee2
44 ; CHECK: lfs {{[0-9]+}}, 152(1)
44 ; CHECK: lfs {{[0-9]+}}, 136(1)
4545 ; CHECK: blr
4646
4747 define void @caller2() {
5151 ret void
5252 }
5353 ; CHECK: @caller2
54 ; CHECK: stfs {{[0-9]+}}, 152(1)
54 ; CHECK: stfs {{[0-9]+}}, 136(1)
5555 ; CHECK: bl test2
5656
5757 declare float @test2(float, float, float, float, float, float, float, float, float, float, float, float, float, float)