llvm.org GIT mirror llvm / f5f9e9a
Arguments spilled on the stack before a function call may have alignment requirements, for example in the case of vectors. These requirements are exploited by the code generator by using move instructions that have similar alignment requirements, e.g., movaps on x86. Although the code generator properly aligns the arguments with respect to the displacement of the stack pointer it computes, the displacement itself may cause misalignment. For example if we have %3 = load <16 x float>, <16 x float>* %1, align 64 call void @bar(<16 x float> %3, i32 0) the x86 back-end emits: movaps 32(%ecx), %xmm2 movaps (%ecx), %xmm0 movaps 16(%ecx), %xmm1 movaps 48(%ecx), %xmm3 subl $20, %esp <-- if %esp was 16-byte aligned before this instruction, it no longer will be afterwards movaps %xmm3, (%esp) <-- movaps requires 16-byte alignment, while %esp is not aligned as such. movl $0, 16(%esp) calll __bar To solve this, we need to make sure that the computed value with which the stack pointer is changed is a multiple af the maximal alignment seen during its computation. With this change we get proper alignment: subl $32, %esp movaps %xmm3, (%esp) Differential Revision: http://reviews.llvm.org/D12337 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@248786 91177308-0d34-0410-b5e6-96231b3b80d8 Jeroen Ketema 5 years ago
5 changed file(s) with 60 addition(s) and 4 deletion(s). Raw diff Collapse all Expand all
200200 LLVMContext &Context;
201201
202202 unsigned StackOffset;
203 unsigned MaxStackArgAlign;
203204 SmallVector UsedRegs;
204205 SmallVector PendingLocs;
205206
269270 CallingConv::ID getCallingConv() const { return CallingConv; }
270271 bool isVarArg() const { return IsVarArg; }
271272
272 unsigned getNextStackOffset() const { return StackOffset; }
273 /// getNextStackOffset - Return the next stack offset such that all stack
274 /// slots satisfy their alignment requirements.
275 unsigned getNextStackOffset() const {
276 return StackOffset;
277 }
278
279 /// getAlignedCallFrameSize - Return the size of the call frame needed to
280 /// be able to store all arguments and such that the alignment requirement
281 /// of each of the arguments is satisfied.
282 unsigned getAlignedCallFrameSize() const {
283 return RoundUpToAlignment(StackOffset, MaxStackArgAlign);
284 }
273285
274286 /// isAllocated - Return true if the specified register (or an alias) is
275287 /// allocated.
399411 /// and alignment.
400412 unsigned AllocateStack(unsigned Size, unsigned Align) {
401413 assert(Align && ((Align - 1) & Align) == 0); // Align is power of 2.
402 StackOffset = ((StackOffset + Align - 1) & ~(Align - 1));
414 StackOffset = RoundUpToAlignment(StackOffset, Align);
403415 unsigned Result = StackOffset;
404416 StackOffset += Size;
417 MaxStackArgAlign = std::max(Align, MaxStackArgAlign);
405418 MF.getFrameInfo()->ensureMaxAlignment(Align);
406419 return Result;
407420 }
3131 CallOrPrologue(Unknown) {
3232 // No stack is used.
3333 StackOffset = 0;
34 MaxStackArgAlign = 1;
3435
3536 clearByValRegsInfo();
3637 UsedRegs.resize((TRI.getNumRegs()+31)/32);
191192 void CCState::getRemainingRegParmsForType(SmallVectorImpl &Regs,
192193 MVT VT, CCAssignFn Fn) {
193194 unsigned SavedStackOffset = StackOffset;
195 unsigned SavedMaxStackArgAlign = MaxStackArgAlign;
194196 unsigned NumLocs = Locs.size();
195197
196198 // Set the 'inreg' flag if it is used for this calling convention.
222224 // as allocated so that future queries don't return the same registers, i.e.
223225 // when i64 and f64 are both passed in GPRs.
224226 StackOffset = SavedStackOffset;
227 MaxStackArgAlign = SavedMaxStackArgAlign;
225228 Locs.resize(NumLocs);
226229 }
227230
29052905 CCInfo.AnalyzeCallOperands(OutVTs, OutFlags, CC_X86);
29062906
29072907 // Get a count of how many bytes are to be pushed on the stack.
2908 unsigned NumBytes = CCInfo.getNextStackOffset();
2908 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
29092909
29102910 // Issue CALLSEQ_START
29112911 unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
30183018 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
30193019
30203020 // Get a count of how many bytes are to be pushed on the stack.
3021 unsigned NumBytes = CCInfo.getNextStackOffset();
3021 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
30223022 if (IsSibcall)
30233023 // This is a sibcall. The memory operands are available in caller's
30243024 // own caller's stack.
0 ; RUN: llc -mcpu=generic -mtriple=i686-pc-windows-msvc -mattr=+sse < %s | FileCheck %s
1
2 ; Check proper alignment of spilled vector
3
4 ; CHECK-LABEL: spill_ok
5 ; CHECK: subl $32, %esp
6 ; CHECK: movaps %xmm3, (%esp)
7 ; CHECK: movl $0, 16(%esp)
8 ; CHECK: calll _bar
9 define void @spill_ok(i32, <16 x float> *) {
10 entry:
11 %2 = alloca i32, i32 %0
12 %3 = load <16 x float>, <16 x float> * %1, align 64
13 tail call void @bar(<16 x float> %3, i32 0) nounwind
14 ret void
15 }
16
17 declare void @bar(<16 x float> %a, i32 %b)
18
19 ; Check that proper alignment of spilled vector does not affect vargs
20
21 ; CHECK-LABEL: vargs_not_affected
22 ; CHECK: leal 28(%ebp), %eax
23 define i32 @vargs_not_affected(<4 x float> %v, i8* %f, ...) {
24 entry:
25 %ap = alloca i8*, align 4
26 %0 = bitcast i8** %ap to i8*
27 call void @llvm.va_start(i8* %0)
28 %argp.cur = load i8*, i8** %ap, align 4
29 %argp.next = getelementptr inbounds i8, i8* %argp.cur, i32 4
30 store i8* %argp.next, i8** %ap, align 4
31 %1 = bitcast i8* %argp.cur to i32*
32 %2 = load i32, i32* %1, align 4
33 call void @llvm.va_end(i8* %0)
34 ret i32 %2
35 }
36
37 declare void @llvm.va_start(i8*)
38
39 declare void @llvm.va_end(i8*)