llvm.org GIT mirror llvm / 52f83a9
ARM: simplify and extend byval handling The main issue being fixed here is that APCS targets handling a "byval align N" parameter with N > 4 were miscounting what objects were where on the stack, leading to FrameLowering setting the frame pointer incorrectly and clobbering the stack. But byval handling had grown over many years, and had multiple layers of cruft trying to compensate for each other and calculate padding correctly. This only really needs to be done once, in the HandleByVal function. Elsewhere should just do what it's told by that call. I also stripped out unnecessary APCS/AAPCS distinctions (now that Clang emits byvals with the correct C ABI alignment), which simplified HandleByVal. rdar://20095672 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@231959 91177308-0d34-0410-b5e6-96231b3b80d8 Tim Northover 4 years ago
16 changed file(s) with 259 addition(s) and 302 deletion(s). Raw diff Collapse all Expand all
292292 "This emitPrologue does not support Thumb1!");
293293 bool isARM = !AFI->isThumbFunction();
294294 unsigned Align = STI.getFrameLowering()->getStackAlignment();
295 unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
295 unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
296296 unsigned NumBytes = MFI->getStackSize();
297297 const std::vector &CSI = MFI->getCalleeSavedInfo();
298298 DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
741741 "This emitEpilogue does not support Thumb1!");
742742 bool isARM = !AFI->isThumbFunction();
743743
744 unsigned Align = STI.getFrameLowering()->getStackAlignment();
745 unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
744 unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
746745 int NumBytes = (int)MFI->getStackSize();
747746 unsigned FramePtr = RegInfo->getFrameRegister(MF);
748747
18551855 /// on the stack. Remember the next parameter register to allocate,
18561856 /// and then confiscate the rest of the parameter registers to insure
18571857 /// this.
1858 void
1859 ARMTargetLowering::HandleByVal(
1860 CCState *State, unsigned &size, unsigned Align) const {
1861 unsigned reg = State->AllocateReg(GPRArgRegs);
1858 void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
1859 unsigned Align) const {
18621860 assert((State->getCallOrPrologue() == Prologue ||
18631861 State->getCallOrPrologue() == Call) &&
18641862 "unhandled ParmContext");
18651863
1866 if ((ARM::R0 <= reg) && (reg <= ARM::R3)) {
1867 if (Subtarget->isAAPCS_ABI() && Align > 4) {
1868 unsigned AlignInRegs = Align / 4;
1869 unsigned Waste = (ARM::R4 - reg) % AlignInRegs;
1870 for (unsigned i = 0; i < Waste; ++i)
1871 reg = State->AllocateReg(GPRArgRegs);
1872 }
1873 if (reg != 0) {
1874 unsigned excess = 4 * (ARM::R4 - reg);
1875
1876 // Special case when NSAA != SP and parameter size greater than size of
1877 // all remained GPR regs. In that case we can't split parameter, we must
1878 // send it to stack. We also must set NCRN to R4, so waste all
1879 // remained registers.
1880 const unsigned NSAAOffset = State->getNextStackOffset();
1881 if (Subtarget->isAAPCS_ABI() && NSAAOffset != 0 && size > excess) {
1882 while (State->AllocateReg(GPRArgRegs))
1883 ;
1884 return;
1885 }
1886
1887 // First register for byval parameter is the first register that wasn't
1888 // allocated before this method call, so it would be "reg".
1889 // If parameter is small enough to be saved in range [reg, r4), then
1890 // the end (first after last) register would be reg + param-size-in-regs,
1891 // else parameter would be splitted between registers and stack,
1892 // end register would be r4 in this case.
1893 unsigned ByValRegBegin = reg;
1894 unsigned ByValRegEnd = (size < excess) ? reg + size/4 : (unsigned)ARM::R4;
1895 State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
1896 // Note, first register is allocated in the beginning of function already,
1897 // allocate remained amount of registers we need.
1898 for (unsigned i = reg+1; i != ByValRegEnd; ++i)
1899 State->AllocateReg(GPRArgRegs);
1900 // A byval parameter that is split between registers and memory needs its
1901 // size truncated here.
1902 // In the case where the entire structure fits in registers, we set the
1903 // size in memory to zero.
1904 if (size < excess)
1905 size = 0;
1906 else
1907 size -= excess;
1908 }
1909 }
1910 }
1864 // Byval (as with any stack) slots are always at least 4 byte aligned.
1865 Align = std::max(Align, 4U);
1866
1867 unsigned Reg = State->AllocateReg(GPRArgRegs);
1868 if (!Reg)
1869 return;
1870
1871 unsigned AlignInRegs = Align / 4;
1872 unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
1873 for (unsigned i = 0; i < Waste; ++i)
1874 Reg = State->AllocateReg(GPRArgRegs);
1875
1876 if (!Reg)
1877 return;
1878
1879 unsigned Excess = 4 * (ARM::R4 - Reg);
1880
1881 // Special case when NSAA != SP and parameter size greater than size of
1882 // all remained GPR regs. In that case we can't split parameter, we must
1883 // send it to stack. We also must set NCRN to R4, so waste all
1884 // remained registers.
1885 const unsigned NSAAOffset = State->getNextStackOffset();
1886 if (NSAAOffset != 0 && Size > Excess) {
1887 while (State->AllocateReg(GPRArgRegs))
1888 ;
1889 return;
1890 }
1891
1892 // First register for byval parameter is the first register that wasn't
1893 // allocated before this method call, so it would be "reg".
1894 // If parameter is small enough to be saved in range [reg, r4), then
1895 // the end (first after last) register would be reg + param-size-in-regs,
1896 // else parameter would be splitted between registers and stack,
1897 // end register would be r4 in this case.
1898 unsigned ByValRegBegin = Reg;
1899 unsigned ByValRegEnd = std::min(Reg + Size / 4, ARM::R4);
1900 State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
1901 // Note, first register is allocated in the beginning of function already,
1902 // allocate remained amount of registers we need.
1903 for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
1904 State->AllocateReg(GPRArgRegs);
1905 // A byval parameter that is split between registers and memory needs its
1906 // size truncated here.
1907 // In the case where the entire structure fits in registers, we set the
1908 // size in memory to zero.
1909 Size = std::max(Size - Excess, 0);
1910 }
1911
19111912
19121913 /// MatchingStackOffset - Return true if the given stack call argument is
19131914 /// already available in the same position (relatively) of the caller's
28172818 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
28182819 }
28192820
2820 void
2821 ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
2822 unsigned InRegsParamRecordIdx,
2823 unsigned ArgSize,
2824 unsigned &ArgRegsSize,
2825 unsigned &ArgRegsSaveSize)
2826 const {
2827 unsigned NumGPRs;
2828 if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
2829 unsigned RBegin, REnd;
2830 CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
2831 NumGPRs = REnd - RBegin;
2832 } else {
2833 unsigned int firstUnalloced;
2834 firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs);
2835 NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0;
2836 }
2837
2838 unsigned Align = Subtarget->getFrameLowering()->getStackAlignment();
2839 ArgRegsSize = NumGPRs * 4;
2840
2841 // If parameter is split between stack and GPRs...
2842 if (NumGPRs && Align > 4 &&
2843 (ArgRegsSize < ArgSize ||
2844 InRegsParamRecordIdx >= CCInfo.getInRegsParamsCount())) {
2845 // Add padding for part of param recovered from GPRs. For example,
2846 // if Align == 8, its last byte must be at address K*8 - 1.
2847 // We need to do it, since remained (stack) part of parameter has
2848 // stack alignment, and we need to "attach" "GPRs head" without gaps
2849 // to it:
2850 // Stack:
2851 // |---- 8 bytes block ----| |---- 8 bytes block ----| |---- 8 bytes...
2852 // [ [padding] [GPRs head] ] [ Tail passed via stack ....
2853 //
2854 ARMFunctionInfo *AFI = MF.getInfo();
2855 unsigned Padding =
2856 OffsetToAlignment(ArgRegsSize + AFI->getArgRegsSaveSize(), Align);
2857 ArgRegsSaveSize = ArgRegsSize + Padding;
2858 } else
2859 // We don't need to extend regs save size for byval parameters if they
2860 // are passed via GPRs only.
2861 ArgRegsSaveSize = ArgRegsSize;
2862 }
2863
28642821 // The remaining GPRs hold either the beginning of variable-argument
28652822 // data, or the beginning of an aggregate passed by value (usually
28662823 // byval). Either way, we allocate stack slots adjacent to the data
28742831 SDLoc dl, SDValue &Chain,
28752832 const Value *OrigArg,
28762833 unsigned InRegsParamRecordIdx,
2877 unsigned OffsetFromOrigArg,
2878 unsigned ArgOffset,
2879 unsigned ArgSize,
2880 bool ForceMutable,
2881 unsigned ByValStoreOffset,
2882 unsigned TotalArgRegsSaveSize) const {
2883
2834 int ArgOffset,
2835 unsigned ArgSize) const {
28842836 // Currently, two use-cases possible:
28852837 // Case #1. Non-var-args function, and we meet first byval parameter.
28862838 // Setup first unallocated register as first byval register;
28952847 MachineFunction &MF = DAG.getMachineFunction();
28962848 MachineFrameInfo *MFI = MF.getFrameInfo();
28972849 ARMFunctionInfo *AFI = MF.getInfo();
2898 unsigned firstRegToSaveIndex, lastRegToSaveIndex;
28992850 unsigned RBegin, REnd;
29002851 if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
29012852 CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
2902 firstRegToSaveIndex = RBegin - ARM::R0;
2903 lastRegToSaveIndex = REnd - ARM::R0;
29042853 } else {
2905 firstRegToSaveIndex = CCInfo.getFirstUnallocated(GPRArgRegs);
2906 lastRegToSaveIndex = 4;
2907 }
2908
2909 unsigned ArgRegsSize, ArgRegsSaveSize;
2910 computeRegArea(CCInfo, MF, InRegsParamRecordIdx, ArgSize,
2911 ArgRegsSize, ArgRegsSaveSize);
2912
2913 // Store any by-val regs to their spots on the stack so that they may be
2914 // loaded by deferencing the result of formal parameter pointer or va_next.
2915 // Note: once stack area for byval/varargs registers
2916 // was initialized, it can't be initialized again.
2917 if (ArgRegsSaveSize) {
2918 unsigned Padding = ArgRegsSaveSize - ArgRegsSize;
2919
2920 if (Padding) {
2921 assert(AFI->getStoredByValParamsPadding() == 0 &&
2922 "The only parameter may be padded.");
2923 AFI->setStoredByValParamsPadding(Padding);
2924 }
2925
2926 int FrameIndex = MFI->CreateFixedObject(ArgRegsSaveSize,
2927 Padding +
2928 ByValStoreOffset -
2929 (int64_t)TotalArgRegsSaveSize,
2930 false);
2931 SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy());
2932 if (Padding) {
2933 MFI->CreateFixedObject(Padding,
2934 ArgOffset + ByValStoreOffset -
2935 (int64_t)ArgRegsSaveSize,
2936 false);
2937 }
2938
2939 SmallVector MemOps;
2940 for (unsigned i = 0; firstRegToSaveIndex < lastRegToSaveIndex;
2941 ++firstRegToSaveIndex, ++i) {
2942 const TargetRegisterClass *RC;
2943 if (AFI->isThumb1OnlyFunction())
2944 RC = &ARM::tGPRRegClass;
2945 else
2946 RC = &ARM::GPRRegClass;
2947
2948 unsigned VReg = MF.addLiveIn(GPRArgRegs[firstRegToSaveIndex], RC);
2949 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
2950 SDValue Store =
2854 unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
2855 RBegin = RBeginIdx == 4 ? ARM::R4 : GPRArgRegs[RBeginIdx];
2856 REnd = ARM::R4;
2857 }
2858
2859 if (REnd != RBegin)
2860 ArgOffset = -4 * (ARM::R4 - RBegin);
2861
2862 int FrameIndex = MFI->CreateFixedObject(ArgSize, ArgOffset, false);
2863 SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy());
2864
2865 SmallVector MemOps;
2866 const TargetRegisterClass *RC =
2867 AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
2868
2869 for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
2870 unsigned VReg = MF.addLiveIn(Reg, RC);
2871 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
2872 SDValue Store =
29512873 DAG.getStore(Val.getValue(1), dl, Val, FIN,
2952 MachinePointerInfo(OrigArg, OffsetFromOrigArg + 4*i),
2953 false, false, 0);
2954 MemOps.push_back(Store);
2955 FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
2956 DAG.getConstant(4, getPointerTy()));
2957 }
2958
2959 AFI->setArgRegsSaveSize(ArgRegsSaveSize + AFI->getArgRegsSaveSize());
2960
2961 if (!MemOps.empty())
2962 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
2963 return FrameIndex;
2964 } else {
2965 if (ArgSize == 0) {
2966 // We cannot allocate a zero-byte object for the first variadic argument,
2967 // so just make up a size.
2968 ArgSize = 4;
2969 }
2970 // This will point to the next argument passed via stack.
2971 return MFI->CreateFixedObject(
2972 ArgSize, ArgOffset, !ForceMutable);
2973 }
2874 MachinePointerInfo(OrigArg, 4 * i), false, false, 0);
2875 MemOps.push_back(Store);
2876 FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
2877 DAG.getConstant(4, getPointerTy()));
2878 }
2879
2880 if (!MemOps.empty())
2881 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
2882 return FrameIndex;
29742883 }
29752884
29762885 // Setup stack frame, the va_list pointer will start from.
29882897 // the result of va_next.
29892898 // If there is no regs to be stored, just point address after last
29902899 // argument passed via stack.
2991 int FrameIndex =
2992 StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
2993 CCInfo.getInRegsParamsCount(), 0, ArgOffset, 0, ForceMutable,
2994 0, TotalArgRegsSaveSize);
2995
2900 int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
2901 CCInfo.getInRegsParamsCount(),
2902 CCInfo.getNextStackOffset(), 4);
29962903 AFI->setVarArgsFrameIndex(FrameIndex);
29972904 }
29982905
30182925 isVarArg));
30192926
30202927 SmallVector ArgValues;
3021 int lastInsIndex = -1;
30222928 SDValue ArgValue;
30232929 Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin();
30242930 unsigned CurArgIdx = 0;
30272933 // Then we increase this value each time we meet byval parameter.
30282934 // We also increase this value in case of varargs function.
30292935 AFI->setArgRegsSaveSize(0);
3030
3031 unsigned ByValStoreOffset = 0;
3032 unsigned TotalArgRegsSaveSize = 0;
3033 unsigned ArgRegsSaveSizeMaxAlign = 4;
30342936
30352937 // Calculate the amount of stack space that we need to allocate to store
30362938 // byval and variadic arguments that are passed in registers.
30372939 // We need to know this before we allocate the first byval or variadic
30382940 // argument, as they will be allocated a stack slot below the CFA (Canonical
30392941 // Frame Address, the stack pointer at entry to the function).
2942 unsigned ArgRegBegin = ARM::R4;
30402943 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2944 if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount())
2945 break;
2946
30412947 CCValAssign &VA = ArgLocs[i];
3042 if (VA.isMemLoc()) {
3043 int index = VA.getValNo();
3044 if (index != lastInsIndex) {
3045 ISD::ArgFlagsTy Flags = Ins[index].Flags;
3046 if (Flags.isByVal()) {
3047 unsigned ExtraArgRegsSize;
3048 unsigned ExtraArgRegsSaveSize;
3049 computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsProcessed(),
3050 Flags.getByValSize(),
3051 ExtraArgRegsSize, ExtraArgRegsSaveSize);
3052
3053 TotalArgRegsSaveSize += ExtraArgRegsSaveSize;
3054 if (Flags.getByValAlign() > ArgRegsSaveSizeMaxAlign)
3055 ArgRegsSaveSizeMaxAlign = Flags.getByValAlign();
3056 CCInfo.nextInRegsParam();
3057 }
3058 lastInsIndex = index;
3059 }
3060 }
2948 unsigned Index = VA.getValNo();
2949 ISD::ArgFlagsTy Flags = Ins[Index].Flags;
2950 if (!Flags.isByVal())
2951 continue;
2952
2953 assert(VA.isMemLoc() && "unexpected byval pointer in reg");
2954 unsigned RBegin, REnd;
2955 CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd);
2956 ArgRegBegin = std::min(ArgRegBegin, RBegin);
2957
2958 CCInfo.nextInRegsParam();
30612959 }
30622960 CCInfo.rewindByValRegsInfo();
3063 lastInsIndex = -1;
2961
2962 int lastInsIndex = -1;
30642963 if (isVarArg && MFI->hasVAStart()) {
3065 unsigned ExtraArgRegsSize;
3066 unsigned ExtraArgRegsSaveSize;
3067 computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsCount(), 0,
3068 ExtraArgRegsSize, ExtraArgRegsSaveSize);
3069 TotalArgRegsSaveSize += ExtraArgRegsSaveSize;
3070 }
3071 // If the arg regs save area contains N-byte aligned values, the
3072 // bottom of it must be at least N-byte aligned.
3073 TotalArgRegsSaveSize = RoundUpToAlignment(TotalArgRegsSaveSize, ArgRegsSaveSizeMaxAlign);
3074 TotalArgRegsSaveSize = std::min(TotalArgRegsSaveSize, 16U);
2964 unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
2965 if (RegIdx != array_lengthof(GPRArgRegs))
2966 ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
2967 }
2968
2969 unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
2970 AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
30752971
30762972 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
30772973 CCValAssign &VA = ArgLocs[i];
31763072 "Byval arguments cannot be implicit");
31773073 unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
31783074
3179 ByValStoreOffset = RoundUpToAlignment(ByValStoreOffset, Flags.getByValAlign());
3180 int FrameIndex = StoreByValRegs(
3181 CCInfo, DAG, dl, Chain, CurOrigArg,
3182 CurByValIndex,
3183 Ins[VA.getValNo()].PartOffset,
3184 VA.getLocMemOffset(),
3185 Flags.getByValSize(),
3186 true /*force mutable frames*/,
3187 ByValStoreOffset,
3188 TotalArgRegsSaveSize);
3189 ByValStoreOffset += Flags.getByValSize();
3190 ByValStoreOffset = std::min(ByValStoreOffset, 16U);
3075 int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, CurOrigArg,
3076 CurByValIndex, VA.getLocMemOffset(),
3077 Flags.getByValSize());
31913078 InVals.push_back(DAG.getFrameIndex(FrameIndex, getPointerTy()));
31923079 CCInfo.nextInRegsParam();
31933080 } else {
527527 SDLoc dl, SDValue &Chain,
528528 const Value *OrigArg,
529529 unsigned InRegsParamRecordIdx,
530 unsigned OffsetFromOrigArg,
531 unsigned ArgOffset,
532 unsigned ArgSize,
533 bool ForceMutable,
534 unsigned ByValStoreOffset,
535 unsigned TotalArgRegsSaveSize) const;
530 int ArgOffset,
531 unsigned ArgSize) const;
536532
537533 void VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
538534 SDLoc dl, SDValue &Chain,
539535 unsigned ArgOffset,
540536 unsigned TotalArgRegsSaveSize,
541537 bool ForceMutable = false) const;
542
543 void computeRegArea(CCState &CCInfo, MachineFunction &MF,
544 unsigned InRegsParamRecordIdx,
545 unsigned ArgSize,
546 unsigned &ArgRegsSize,
547 unsigned &ArgRegsSaveSize) const;
548538
549539 SDValue
550540 LowerCall(TargetLowering::CallLoweringInfo &CLI,
148148 unsigned getStoredByValParamsPadding() const { return StByValParamsPadding; }
149149 void setStoredByValParamsPadding(unsigned p) { StByValParamsPadding = p; }
150150
151 unsigned getArgRegsSaveSize(unsigned Align = 0) const {
152 if (!Align)
153 return ArgRegsSaveSize;
154 return (ArgRegsSaveSize + Align - 1) & ~(Align - 1);
155 }
151 unsigned getArgRegsSaveSize() const { return ArgRegsSaveSize; }
156152 void setArgRegsSaveSize(unsigned s) { ArgRegsSaveSize = s; }
157153
158154 unsigned getReturnRegsCount() const { return ReturnRegsCount; }
9393 const Thumb1InstrInfo &TII =
9494 *static_cast(STI.getInstrInfo());
9595
96 unsigned Align = STI.getFrameLowering()->getStackAlignment();
97 unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
96 unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
9897 unsigned NumBytes = MFI->getStackSize();
9998 assert(NumBytes >= ArgRegsSaveSize &&
10099 "ArgRegsSaveSize is included in NumBytes");
332331 const Thumb1InstrInfo &TII =
333332 *static_cast(STI.getInstrInfo());
334333
335 unsigned Align = STI.getFrameLowering()->getStackAlignment();
336 unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
334 unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
337335 int NumBytes = (int)MFI->getStackSize();
338336 assert((unsigned)NumBytes >= ArgRegsSaveSize &&
339337 "ArgRegsSaveSize is included in NumBytes");
99 ; CHECK-LABEL: test_byval_8_bytes_alignment:
1010 define void @test_byval_8_bytes_alignment(i32 %i, ...) {
1111 entry:
12 ; CHECK: stm r0, {r1, r2, r3}
12 ; CHECK: sub sp, sp, #12
13 ; CHECK: sub sp, sp, #4
14 ; CHECK: stmib sp, {r1, r2, r3}
1315 %g = alloca i8*
1416 %g1 = bitcast i8** %g to i8*
1517 call void @llvm.va_start(i8* %g1)
99 entry:
1010
1111 ; Here we need to only check proper start address of restored %s argument.
12 ; CHECK: sub sp, sp, #16
12 ; CHECK: sub sp, sp, #12
1313 ; CHECK: push {r11, lr}
14 ; CHECK: sub sp, sp, #4
1415 ; CHECK: add r0, sp, #12
1516 ; CHECK: stm r0, {r1, r2, r3}
1617 ; CHECK: add r0, sp, #12
11 ;RUN: llc -mtriple=arm-linux-gnueabihf < %s | FileCheck %s
22
33 ;CHECK-LABEL: foo:
4 ;CHECK: sub sp, sp, #8
4 ;CHECK: sub sp, sp, #16
55 ;CHECK: push {r11, lr}
6 ;CHECK: str r0, [sp, #12]
7 ;CHECK: add r0, sp, #12
6 ;CHECK: str r0, [sp, #8]
7 ;CHECK: add r0, sp, #8
88 ;CHECK: bl fooUseParam
99 ;CHECK: pop {r11, lr}
10 ;CHECK: add sp, sp, #8
10 ;CHECK: add sp, sp, #16
1111 ;CHECK: mov pc, lr
1212
1313 ;CHECK-LABEL: foo2:
14 ;CHECK: sub sp, sp, #8
14 ;CHECK: sub sp, sp, #16
1515 ;CHECK: push {r11, lr}
1616 ;CHECK: str r0, [sp, #8]
1717 ;CHECK: add r0, sp, #8
18 ;CHECK: str r2, [sp, #12]
18 ;CHECK: str r2, [sp, #16]
1919 ;CHECK: bl fooUseParam
20 ;CHECK: add r0, sp, #12
20 ;CHECK: add r0, sp, #16
2121 ;CHECK: bl fooUseParam
2222 ;CHECK: pop {r11, lr}
23 ;CHECK: add sp, sp, #8
23 ;CHECK: add sp, sp, #16
2424 ;CHECK: mov pc, lr
2525
2626 ;CHECK-LABEL: doFoo:
2020 i32 %p2, ; --> R3, NSAA=SP+8
2121 i32 %p3) #0 { ; --> SP+4, NSAA=SP+12
2222 entry:
23 ;CHECK: sub sp, #8
23 ;CHECK: sub sp, #12
2424 ;CHECK: push.w {r11, lr}
25 ;CHECK: add r0, sp, #8
26 ;CHECK: str r2, [sp, #12]
27 ;CHECK: str r1, [sp, #8]
25 ;CHECK: sub sp, #4
26 ;CHECK: add r0, sp, #12
27 ;CHECK: str r2, [sp, #16]
28 ;CHECK: str r1, [sp, #12]
2829 ;CHECK: bl fooUseStruct
2930 call void @fooUseStruct(%st_t* %p1)
3031 ret void
33 %struct.S227 = type { [49 x i32], i32 }
44
55 define void @check227(
6 i32 %b,
6 i32 %b,
77 %struct.S227* byval nocapture %arg0,
88 %struct.S227* %arg1) {
99 ; b --> R0
1212
1313 entry:
1414
15 ;CHECK: sub sp, sp, #16
15 ;CHECK: sub sp, sp, #12
1616 ;CHECK: push {r11, lr}
17 ;CHECK: sub sp, sp, #4
1718 ;CHECK: add r0, sp, #12
1819 ;CHECK: stm r0, {r1, r2, r3}
1920 ;CHECK: ldr r0, [sp, #212]
2021 ;CHECK: bl useInt
22 ;CHECK: add sp, sp, #4
2123 ;CHECK: pop {r11, lr}
22 ;CHECK: add sp, sp, #16
24 ;CHECK: add sp, sp, #12
2325
2426 %0 = ptrtoint %struct.S227* %arg1 to i32
2527 tail call void @useInt(i32 %0)
1212 ; c -> sp+0..sp+7
1313 define void @foo1(i32 %a, %struct12bytes* byval %b, i64 %c) {
1414 ; CHECK-LABEL: foo1
15 ; CHECK: sub sp, sp, #16
15 ; CHECK: sub sp, sp, #12
1616 ; CHECK: push {r11, lr}
17 ; CHECK: sub sp, sp, #4
1718 ; CHECK: add [[SCRATCH:r[0-9]+]], sp, #12
1819 ; CHECK: stm [[SCRATCH]], {r1, r2, r3}
1920 ; CHECK: ldr r0, [sp, #24]
2021 ; CHECK: ldr r1, [sp, #28]
2122 ; CHECK: bl useLong
2223 ; CHECK: pop {r11, lr}
23 ; CHECK: add sp, sp, #16
24 ; CHECK: add sp, sp, #12
2425
2526 call void @useLong(i64 %c)
2627 ret void
0 ; RUN: llc -mtriple=thumbv7-apple-ios8.0 %s -o - | FileCheck %s
1
2 ; This checks that alignments greater than 4 are respected by APCS
3 ; targets. Mostly here to make sure *some* correct code is created after some
4 ; simplifying refactoring; at the time of writing there were no actual APCS
5 ; users of byval alignments > 4, so no real calls for ABI stability.
6
7 ; "byval align 16" can't fit in any regs with an i8* taking up r0.
8 define i32 @test_align16(i8*, [4 x i32]* byval align 16 %b) {
9 ; CHECK-LABEL: test_align16:
10 ; CHECK-NOT: sub sp
11 ; CHECK: push {r4, r7, lr}
12 ; CHECK: add r7, sp, #4
13
14 ; CHECK: ldr r0, [r7, #8]
15
16 call void @bar()
17 %valptr = getelementptr [4 x i32], [4 x i32]* %b, i32 0, i32 0
18 %val = load i32, i32* %valptr
19 ret i32 %val
20 }
21
22 ; byval align 8 can, but we used to incorrectly set r7 here (miscalculating the
23 ; space taken up by arg regs).
24 define i32 @test_align8(i8*, [4 x i32]* byval align 8 %b) {
25 ; CHECK-LABEL: test_align8:
26 ; CHECK: sub sp, #8
27 ; CHECK: push {r4, r7, lr}
28 ; CHECK: add r7, sp, #4
29
30 ; CHECK-DAG: str r2, [r7, #8]
31 ; CHECK-DAG: str r3, [r7, #12]
32
33 ; CHECK: ldr r0, [r7, #8]
34
35 call void @bar()
36 %valptr = getelementptr [4 x i32], [4 x i32]* %b, i32 0, i32 0
37 %val = load i32, i32* %valptr
38 ret i32 %val
39 }
40
41 ; "byval align 32" can't fit in regs no matter what: it would be misaligned
42 ; unless the incoming stack was deliberately misaligned.
43 define i32 @test_align32(i8*, [4 x i32]* byval align 32 %b) {
44 ; CHECK-LABEL: test_align32:
45 ; CHECK-NOT: sub sp
46 ; CHECK: push {r4, r7, lr}
47 ; CHECK: add r7, sp, #4
48
49 ; CHECK: ldr r0, [r7, #8]
50
51 call void @bar()
52 %valptr = getelementptr [4 x i32], [4 x i32]* %b, i32 0, i32 0
53 %val = load i32, i32* %valptr
54 ret i32 %val
55 }
56
57 ; When passing an object "byval align N", the stack must be at least N-aligned.
58 define void @test_call_align16() {
59 ; CHECK-LABEL: test_call_align16:
60 ; CHECK: push {r4, r7, lr}
61 ; CHECK: add r7, sp, #4
62
63 ; CHECK: mov [[TMP:r[0-9]+]], sp
64 ; CHECK: bfc [[TMP]], #0, #4
65 ; CHECK: mov sp, [[TMP]]
66
67 ; While we're here, make sure the caller also puts it at sp
68 ; CHECK: mov r[[BASE:[0-9]+]], sp
69 ; CHECK: vst1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r[[BASE]]]
70 call i32 @test_align16(i8* null, [4 x i32]* byval align 16 @var)
71 ret void
72 }
73
74 @var = global [4 x i32] zeroinitializer
75 declare void @bar()
6161
6262 ; CHECK-FP-LABEL: sum
6363 ; CHECK-FP: .cfi_startproc
64 ; CHECK-FP: sub sp, sp, #16
65 ; CHECK-FP: .cfi_def_cfa_offset 16
64 ; CHECK-FP: sub sp, sp, #12
65 ; CHECK-FP: .cfi_def_cfa_offset 12
6666 ; CHECK-FP: push {r4, lr}
67 ; CHECK-FP: .cfi_def_cfa_offset 20
68 ; CHECK-FP: .cfi_offset lr, -16
69 ; CHECK-FP: .cfi_offset r4, -20
70 ; CHECK-FP: sub sp, sp, #4
6771 ; CHECK-FP: .cfi_def_cfa_offset 24
68 ; CHECK-FP: .cfi_offset lr, -20
69 ; CHECK-FP: .cfi_offset r4, -24
70 ; CHECK-FP: sub sp, sp, #8
71 ; CHECK-FP: .cfi_def_cfa_offset 32
7272
7373 ; CHECK-FP-ELIM-LABEL: sum
7474 ; CHECK-FP-ELIM: .cfi_startproc
75 ; CHECK-FP-ELIM: sub sp, sp, #16
76 ; CHECK-FP-ELIM: .cfi_def_cfa_offset 16
75 ; CHECK-FP-ELIM: sub sp, sp, #12
76 ; CHECK-FP-ELIM: .cfi_def_cfa_offset 12
7777 ; CHECK-FP-ELIM: push {r4, r10, r11, lr}
78 ; CHECK-FP-ELIM: .cfi_def_cfa_offset 32
79 ; CHECK-FP-ELIM: .cfi_offset lr, -20
80 ; CHECK-FP-ELIM: .cfi_offset r11, -24
81 ; CHECK-FP-ELIM: .cfi_offset r10, -28
82 ; CHECK-FP-ELIM: .cfi_offset r4, -32
78 ; CHECK-FP-ELIM: .cfi_def_cfa_offset 28
79 ; CHECK-FP-ELIM: .cfi_offset lr, -16
80 ; CHECK-FP-ELIM: .cfi_offset r11, -20
81 ; CHECK-FP-ELIM: .cfi_offset r10, -24
82 ; CHECK-FP-ELIM: .cfi_offset r4, -28
8383 ; CHECK-FP-ELIM: add r11, sp, #8
84 ; CHECK-FP-ELIM: .cfi_def_cfa r11, 24
84 ; CHECK-FP-ELIM: .cfi_def_cfa r11, 20
8585
8686 ; CHECK-THUMB-FP-LABEL: sum
8787 ; CHECK-THUMB-FP: .cfi_startproc
88 ; CHECK-THUMB-FP: sub sp, #16
89 ; CHECK-THUMB-FP: .cfi_def_cfa_offset 16
88 ; CHECK-THUMB-FP: sub sp, #12
89 ; CHECK-THUMB-FP: .cfi_def_cfa_offset 12
9090 ; CHECK-THUMB-FP: push {r4, lr}
91 ; CHECK-THUMB-FP: .cfi_def_cfa_offset 20
92 ; CHECK-THUMB-FP: .cfi_offset lr, -16
93 ; CHECK-THUMB-FP: .cfi_offset r4, -20
94 ; CHECK-THUMB-FP: sub sp, #4
9195 ; CHECK-THUMB-FP: .cfi_def_cfa_offset 24
92 ; CHECK-THUMB-FP: .cfi_offset lr, -20
93 ; CHECK-THUMB-FP: .cfi_offset r4, -24
94 ; CHECK-THUMB-FP: sub sp, #8
95 ; CHECK-THUMB-FP: .cfi_def_cfa_offset 32
9696
9797 ; CHECK-THUMB-FP-ELIM-LABEL: sum
9898 ; CHECK-THUMB-FP-ELIM: .cfi_startproc
99 ; CHECK-THUMB-FP-ELIM: sub sp, #16
100 ; CHECK-THUMB-FP-ELIM: .cfi_def_cfa_offset 16
99 ; CHECK-THUMB-FP-ELIM: sub sp, #12
100 ; CHECK-THUMB-FP-ELIM: .cfi_def_cfa_offset 12
101101 ; CHECK-THUMB-FP-ELIM: push {r4, r6, r7, lr}
102 ; CHECK-THUMB-FP-ELIM: .cfi_def_cfa_offset 32
103 ; CHECK-THUMB-FP-ELIM: .cfi_offset lr, -20
104 ; CHECK-THUMB-FP-ELIM: .cfi_offset r7, -24
105 ; CHECK-THUMB-FP-ELIM: .cfi_offset r6, -28
106 ; CHECK-THUMB-FP-ELIM: .cfi_offset r4, -32
102 ; CHECK-THUMB-FP-ELIM: .cfi_def_cfa_offset 28
103 ; CHECK-THUMB-FP-ELIM: .cfi_offset lr, -16
104 ; CHECK-THUMB-FP-ELIM: .cfi_offset r7, -20
105 ; CHECK-THUMB-FP-ELIM: .cfi_offset r6, -24
106 ; CHECK-THUMB-FP-ELIM: .cfi_offset r4, -28
107107 ; CHECK-THUMB-FP-ELIM: add r7, sp, #8
108 ; CHECK-THUMB-FP-ELIM: .cfi_def_cfa r7, 24
108 ; CHECK-THUMB-FP-ELIM: .cfi_def_cfa r7, 20
109109
110110 define i32 @sum(i32 %count, ...) {
111111 entry:
160160 %coerce.dive26 = getelementptr %struct.struct_small_nonchar, %struct.struct_small_nonchar* %d, i32 0, i32 0
161161 %7 = bitcast [2 x i16]* %coerce.dive26 to i32*
162162 %8 = load i32, i32* %7, align 1
163 call void @takes_all(i64 %4, i16 %6, %struct.struct_large_nonchar* byval align 8 %c, i32 %8, i8* %arraydecay, i8* %arraydecay22, i32* %arraydecay23, i16* %arraydecay24, i32* %ptr, i32 %0, i32 %1, i32 %2)
163 call void @takes_all(i64 %4, i16 %6, %struct.struct_large_nonchar* byval align 4 %c, i32 %8, i8* %arraydecay, i8* %arraydecay22, i32* %arraydecay23, i16* %arraydecay24, i32* %ptr, i32 %0, i32 %1, i32 %2)
164164 ret void
165165 }
166166
307307 %coerce.dive26 = getelementptr %struct.struct_small_nonchar, %struct.struct_small_nonchar* %d, i32 0, i32 0
308308 %7 = bitcast [2 x i16]* %coerce.dive26 to i32*
309309 %8 = load i32, i32* %7, align 1
310 call void @takes_all(i64 %4, i16 %6, %struct.struct_large_nonchar* byval align 8 %c, i32 %8, i8* %arraydecay, i8* %arraydecay22, i32* %arraydecay23, i16* %arraydecay24, i32* %ptr, i32 %0, i32 %1, i32 %2)
310 call void @takes_all(i64 %4, i16 %6, %struct.struct_large_nonchar* byval align 4 %c, i32 %8, i8* %arraydecay, i8* %arraydecay22, i32* %arraydecay23, i16* %arraydecay24, i32* %ptr, i32 %0, i32 %1, i32 %2)
311311 ret void
312312 }
313313
442442 %coerce.dive26 = getelementptr %struct.struct_small_nonchar, %struct.struct_small_nonchar* %d, i32 0, i32 0
443443 %7 = bitcast [2 x i16]* %coerce.dive26 to i32*
444444 %8 = load i32, i32* %7, align 1
445 call void @takes_all(i64 %4, i16 %6, %struct.struct_large_nonchar* byval align 8 %c, i32 %8, i8* %arraydecay, i8* %arraydecay22, i32* %arraydecay23, i16* %arraydecay24, i32* %ptr, i32 %0, i32 %1, i32 %2)
445 call void @takes_all(i64 %4, i16 %6, %struct.struct_large_nonchar* byval align 4 %c, i32 %8, i8* %arraydecay, i8* %arraydecay22, i32* %arraydecay23, i16* %arraydecay24, i32* %ptr, i32 %0, i32 %1, i32 %2)
446446 ret void
447447 }
448448
481481 %coerce.dive5 = getelementptr %struct.struct_small_nonchar, %struct.struct_small_nonchar* %d2, i32 0, i32 0
482482 %5 = bitcast [2 x i16]* %coerce.dive5 to i32*
483483 %6 = load i32, i32* %5, align 1
484 call void @takes_all(i64 %2, i16 %4, %struct.struct_large_nonchar* byval align 8 %d1, i32 %6, i8* null, i8* null, i32* null, i16* null, i32* null, i32 0, i32 0, i32 0)
484 call void @takes_all(i64 %2, i16 %4, %struct.struct_large_nonchar* byval align 4 %d1, i32 %6, i8* null, i8* null, i32* null, i16* null, i32* null, i32 0, i32 0, i32 0)
485485 ret void
486486 }
487487
2020 ; CHECK-LABEL: varargs_func:
2121 ; Reserve space for the varargs save area. This currently reserves
2222 ; more than enough (16 bytes rather than the 12 bytes needed).
23 ; CHECK: sub sp, sp, #16
23 ; CHECK: sub sp, sp, #12
2424 ; CHECK: push {r11, lr}
2525 ; Align the stack pointer to a multiple of 16.
26 ; CHECK: sub sp, sp, #8
26 ; CHECK: sub sp, sp, #12
2727 ; Calculate the address of the varargs save area and save varargs
2828 ; arguments into it.
2929 ; CHECK-NEXT: add r0, sp, #20
None ; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=EABI
0 ; RUN: llc < %s -mtriple=armv7-linux-gnueabihf | FileCheck %s -check-prefix=EABI
11 ; RUN: llc < %s -march=arm -mtriple=arm-linux-gnu | FileCheck %s -check-prefix=OABI
22
33 define i32 @f(i32 %a, ...) {
1010 %tmp1 = load i32, i32* %tmp ; [#uses=1]
1111 store i32 %tmp1, i32* %retval
1212 call void @llvm.va_start(i8* null)
13 call void asm sideeffect "", "~{d8}"()
1314 br label %return
1415
1516 return: ; preds = %entry
1617 %retval2 = load i32, i32* %retval ; [#uses=1]
1718 ret i32 %retval2
19 ; EABI: add sp, sp, #16
20 ; EABI: vpop {d8}
21 ; EABI: add sp, sp, #4
1822 ; EABI: add sp, sp, #12
19 ; EABI: add sp, sp, #16
23
2024 ; OABI: add sp, sp, #12
2125 ; OABI: add sp, sp, #12
2226 }