llvm.org GIT mirror llvm / d7a4f74
AArch64: treat [N x Ty] as a block during procedure calls. The AAPCS treats small structs and homogeneous floating (or vector) aggregates specially, and guarantees they either get passed as a contiguous block of registers, or prevent any future use of those registers and get passed on the stack. This concept can fit quite neatly into LLVM's own type system, mapping an HFA to [N x float] and so on, and small structs to [N x i64]. Doing so allows front-ends to emit AAPCS compliant code without having to duplicate the register counting logic. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@222903 91177308-0d34-0410-b5e6-96231b3b80d8 Tim Northover 5 years ago
10 changed file(s) with 257 addition(s) and 9 deletion(s). Raw diff Collapse all Expand all
344344 /// AllocateRegBlock - Attempt to allocate a block of RegsRequired consecutive
345345 /// registers. If this is not possible, return zero. Otherwise, return the first
346346 /// register of the block that were allocated, marking the entire block as allocated.
347 unsigned AllocateRegBlock(const uint16_t *Regs, unsigned NumRegs, unsigned RegsRequired) {
348 for (unsigned StartIdx = 0; StartIdx <= NumRegs - RegsRequired; ++StartIdx) {
347 unsigned AllocateRegBlock(ArrayRef Regs,
348 unsigned RegsRequired) {
349 if (RegsRequired > Regs.size())
350 return 0;
351
352 for (unsigned StartIdx = 0; StartIdx <= Regs.size() - RegsRequired;
353 ++StartIdx) {
349354 bool BlockAvailable = true;
350355 // Check for already-allocated regs in this block
351356 for (unsigned BlockIdx = 0; BlockIdx < RegsRequired; ++BlockIdx) {
226226 bool exceedsNaturalStackAlignment(unsigned Align) const {
227227 return (StackNaturalAlign != 0) && (Align > StackNaturalAlign);
228228 }
229
230 unsigned getStackAlignment() const { return StackNaturalAlign; }
229231
230232 bool hasMicrosoftFastStdCallMangling() const {
231233 return ManglingMode == MM_WINCOFF;
0 //=== AArch64CallingConv.h - Custom Calling Convention Routines -*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the custom routines for the AArch64 Calling Convention
10 // that aren't done by tablegen.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64CALLINGCONVENTION_H
15 #define LLVM_LIB_TARGET_AARCH64_AARCH64CALLINGCONVENTION_H
16
17 #include "AArch64.h"
18 #include "AArch64InstrInfo.h"
19 #include "AArch64Subtarget.h"
20 #include "llvm/CodeGen/CallingConvLower.h"
21 #include "llvm/IR/CallingConv.h"
22 #include "llvm/Target/TargetInstrInfo.h"
23
24 namespace {
25 using namespace llvm;
26
27 static const uint16_t XRegList[] = {AArch64::X0, AArch64::X1, AArch64::X2,
28 AArch64::X3, AArch64::X4, AArch64::X5,
29 AArch64::X6, AArch64::X7};
30 static const uint16_t SRegList[] = {AArch64::S0, AArch64::S1, AArch64::S2,
31 AArch64::S3, AArch64::S4, AArch64::S5,
32 AArch64::S6, AArch64::S7};
33 static const uint16_t DRegList[] = {AArch64::D0, AArch64::D1, AArch64::D2,
34 AArch64::D3, AArch64::D4, AArch64::D5,
35 AArch64::D6, AArch64::D7};
36 static const uint16_t QRegList[] = {AArch64::Q0, AArch64::Q1, AArch64::Q2,
37 AArch64::Q3, AArch64::Q4, AArch64::Q5,
38 AArch64::Q6, AArch64::Q7};
39
40 static bool finishStackBlock(SmallVectorImpl &PendingMembers,
41 MVT LocVT, ISD::ArgFlagsTy &ArgFlags,
42 CCState &State, unsigned SlotAlign) {
43 unsigned Size = LocVT.getSizeInBits() / 8;
44 unsigned StackAlign = State.getMachineFunction()
45 .getSubtarget()
46 .getDataLayout()
47 ->getStackAlignment();
48 unsigned Align = std::min(ArgFlags.getOrigAlign(), StackAlign);
49
50 for (auto &It : PendingMembers) {
51 It.convertToMem(State.AllocateStack(Size, std::max(Align, SlotAlign)));
52 State.addLoc(It);
53 SlotAlign = 1;
54 }
55
56 // All pending members have now been allocated
57 PendingMembers.clear();
58 return true;
59 }
60
61 /// The Darwin variadic PCS places anonymous arguments in 8-byte stack slots. An
62 /// [N x Ty] type must still be contiguous in memory though.
63 static bool CC_AArch64_Custom_Stack_Block(
64 unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo,
65 ISD::ArgFlagsTy &ArgFlags, CCState &State) {
66 SmallVectorImpl &PendingMembers = State.getPendingLocs();
67
68 // Add the argument to the list to be allocated once we know the size of the
69 // block.
70 PendingMembers.push_back(
71 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
72
73 if (!ArgFlags.isInConsecutiveRegsLast())
74 return true;
75
76 return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, 8);
77 }
78
79 /// Given an [N x Ty] block, it should be passed in a consecutive sequence of
80 /// registers. If no such sequence is available, mark the rest of the registers
81 /// of that type as used and place the argument on the stack.
82 static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
83 CCValAssign::LocInfo &LocInfo,
84 ISD::ArgFlagsTy &ArgFlags, CCState &State) {
85 // Try to allocate a contiguous block of registers, each of the correct
86 // size to hold one member.
87 ArrayRef RegList;
88 if (LocVT.SimpleTy == MVT::i64)
89 RegList = XRegList;
90 else if (LocVT.SimpleTy == MVT::f32)
91 RegList = SRegList;
92 else if (LocVT.SimpleTy == MVT::f64)
93 RegList = DRegList;
94 else if (LocVT.SimpleTy == MVT::v2f64)
95 RegList = QRegList;
96 else {
97 // Not an array we want to split up after all.
98 return false;
99 }
100
101 SmallVectorImpl &PendingMembers = State.getPendingLocs();
102
103 // Add the argument to the list to be allocated once we know the size of the
104 // block.
105 PendingMembers.push_back(
106 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
107
108 if (!ArgFlags.isInConsecutiveRegsLast())
109 return true;
110
111 unsigned RegResult = State.AllocateRegBlock(RegList, PendingMembers.size());
112 if (RegResult) {
113 for (auto &It : PendingMembers) {
114 It.convertToReg(RegResult);
115 State.addLoc(It);
116 ++RegResult;
117 }
118 PendingMembers.clear();
119 return true;
120 }
121
122 // Mark all regs in the class as unavailable
123 for (auto Reg : RegList)
124 State.AllocateReg(Reg);
125
126 const AArch64Subtarget &Subtarget = static_cast(
127 State.getMachineFunction().getSubtarget());
128 unsigned SlotAlign = Subtarget.isTargetDarwin() ? 1 : 8;
129
130 return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, SlotAlign);
131 }
132
133 }
134
135 #endif
3838 // Put ByVal arguments directly on the stack. Minimum size and alignment of a
3939 // slot is 64-bit.
4040 CCIfByVal>,
41
42 CCIfConsecutiveRegs>,
4143
4244 // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers,
4345 // up to eight each of GPR and FPR.
117119 // Put ByVal arguments directly on the stack. Minimum size and alignment of a
118120 // slot is 64-bit.
119121 CCIfByVal>,
122
123 CCIfConsecutiveRegs>,
120124
121125 // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers,
122126 // up to eight each of GPR and FPR.
157161 def CC_AArch64_DarwinPCS_VarArg : CallingConv<[
158162 CCIfType<[v2f32], CCBitConvertToType>,
159163 CCIfType<[v2f64, v4f32, f128], CCBitConvertToType>,
164
165 CCIfConsecutiveRegs>,
160166
161167 // Handle all scalar types as either i64 or f64.
162168 CCIfType<[i8, i16, i32], CCPromoteToType>,
1313 //===----------------------------------------------------------------------===//
1414
1515 #include "AArch64.h"
16 #include "AArch64CallingConvention.h"
1617 #include "AArch64Subtarget.h"
1718 #include "AArch64TargetMachine.h"
1819 #include "MCTargetDesc/AArch64AddressingModes.h"
1111 //===----------------------------------------------------------------------===//
1212
1313 #include "AArch64ISelLowering.h"
14 #include "AArch64CallingConvention.h"
1415 #include "AArch64MachineFunctionInfo.h"
1516 #include "AArch64PerfectShuffle.h"
1617 #include "AArch64Subtarget.h"
88418842 Val, Stxr->getFunctionType()->getParamType(0)),
88428843 Addr);
88438844 }
8845
8846 bool AArch64TargetLowering::functionArgumentNeedsConsecutiveRegisters(
8847 Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {
8848 return Ty->isArrayTy();
8849 }
472472
473473 void ReplaceNodeResults(SDNode *N, SmallVectorImpl &Results,
474474 SelectionDAG &DAG) const override;
475
476 bool functionArgumentNeedsConsecutiveRegisters(Type *Ty,
477 CallingConv::ID CallConv,
478 bool isVarArg) const;
475479 };
476480
477481 namespace AArch64 {
193193
194194 // Try to allocate a contiguous block of registers, each of the correct
195195 // size to hold one member.
196 const uint16_t *RegList;
197 unsigned NumRegs;
196 ArrayRef RegList;
198197 switch (LocVT.SimpleTy) {
199198 case MVT::f32:
200199 RegList = SRegList;
201 NumRegs = 16;
202200 break;
203201 case MVT::f64:
204202 RegList = DRegList;
205 NumRegs = 8;
206203 break;
207204 case MVT::v2f64:
208205 RegList = QRegList;
209 NumRegs = 4;
210206 break;
211207 default:
212208 llvm_unreachable("Unexpected member type for HA");
214210 }
215211
216212 unsigned RegResult =
217 State.AllocateRegBlock(RegList, NumRegs, PendingHAMembers.size());
213 State.AllocateRegBlock(RegList, PendingHAMembers.size());
218214
219215 if (RegResult) {
220216 for (SmallVectorImpl::iterator It = PendingHAMembers.begin();
0 ; RUN: llc -mtriple=aarch64-apple-ios7.0 -o - %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-DARWINPCS
1 ; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AAPCS
2
3 declare void @callee(...)
4
5 define float @test_hfa_regs(float, [2 x float] %in) {
6 ; CHECK-LABEL: test_hfa_regs:
7 ; CHECK: fadd s0, s1, s2
8
9 %lhs = extractvalue [2 x float] %in, 0
10 %rhs = extractvalue [2 x float] %in, 1
11 %sum = fadd float %lhs, %rhs
12 ret float %sum
13 }
14
15 ; Check that the array gets allocated to a contiguous block on the stack (rather
16 ; than the default of 2 8-byte slots).
17 define float @test_hfa_block([7 x float], [2 x float] %in) {
18 ; CHECK-LABEL: test_hfa_block:
19 ; CHECK: ldp [[LHS:s[0-9]+]], [[RHS:s[0-9]+]], [sp]
20 ; CHECK: fadd s0, [[LHS]], [[RHS]]
21
22 %lhs = extractvalue [2 x float] %in, 0
23 %rhs = extractvalue [2 x float] %in, 1
24 %sum = fadd float %lhs, %rhs
25 ret float %sum
26 }
27
28 ; Check that an HFA prevents backfilling of VFP registers (i.e. %rhs must go on
29 ; the stack rather than in s7).
30 define float @test_hfa_block_consume([7 x float], [2 x float] %in, float %rhs) {
31 ; CHECK-LABEL: test_hfa_block_consume:
32 ; CHECK-DAG: ldr [[LHS:s[0-9]+]], [sp]
33 ; CHECK-DAG: ldr [[RHS:s[0-9]+]], [sp, #8]
34 ; CHECK: fadd s0, [[LHS]], [[RHS]]
35
36 %lhs = extractvalue [2 x float] %in, 0
37 %sum = fadd float %lhs, %rhs
38 ret float %sum
39 }
40
41 define float @test_hfa_stackalign([8 x float], [1 x float], [2 x float] %in) {
42 ; CHECK-LABEL: test_hfa_stackalign:
43 ; CHECK-AAPCS: ldp [[LHS:s[0-9]+]], [[RHS:s[0-9]+]], [sp, #8]
44 ; CHECK-DARWINPCS: ldp [[LHS:s[0-9]+]], [[RHS:s[0-9]+]], [sp, #4]
45 ; CHECK: fadd s0, [[LHS]], [[RHS]]
46 %lhs = extractvalue [2 x float] %in, 0
47 %rhs = extractvalue [2 x float] %in, 1
48 %sum = fadd float %lhs, %rhs
49 ret float %sum
50 }
51
52 ; An HFA that ends up on the stack should not have any effect on where
53 ; integer-based arguments go.
54 define i64 @test_hfa_ignores_gprs([7 x float], [2 x float] %in, i64, i64 %res) {
55 ; CHECK-LABEL: test_hfa_ignores_gprs:
56 ; CHECK: mov x0, x1
57 ret i64 %res
58 }
59
60 ; [2 x float] should not be promoted to double by the Darwin varargs handling,
61 ; but should go in an 8-byte aligned slot.
62 define void @test_varargs_stackalign() {
63 ; CHECK-LABEL: test_varargs_stackalign:
64 ; CHECK-DARWINPCS: stp {{w[0-9]+}}, {{w[0-9]+}}, [sp, #16]
65
66 call void(...)* @callee([3 x float] undef, [2 x float] [float 1.0, float 2.0])
67 ret void
68 }
69
70 define i64 @test_smallstruct_block([7 x i64], [2 x i64] %in) {
71 ; CHECK-LABEL: test_smallstruct_block:
72 ; CHECK: ldp [[LHS:x[0-9]+]], [[RHS:x[0-9]+]], [sp]
73 ; CHECK: add x0, [[LHS]], [[RHS]]
74 %lhs = extractvalue [2 x i64] %in, 0
75 %rhs = extractvalue [2 x i64] %in, 1
76 %sum = add i64 %lhs, %rhs
77 ret i64 %sum
78 }
79
80 ; Check that a small struct prevents backfilling of registers (i.e. %rhs
81 ; must go on the stack rather than in x7).
82 define i64 @test_smallstruct_block_consume([7 x i64], [2 x i64] %in, i64 %rhs) {
83 ; CHECK-LABEL: test_smallstruct_block_consume:
84 ; CHECK-DAG: ldr [[LHS:x[0-9]+]], [sp]
85 ; CHECK-DAG: ldr [[RHS:x[0-9]+]], [sp, #16]
86 ; CHECK: add x0, [[LHS]], [[RHS]]
87
88 %lhs = extractvalue [2 x i64] %in, 0
89 %sum = add i64 %lhs, %rhs
90 ret i64 %sum
91 }
9595
9696 ; If there are non-variadic arguments on the stack (here two i64s) then the
9797 ; __stack field should point just past them.
98 define void @test_offsetstack([10 x i64], [3 x float], ...) {
98 define void @test_offsetstack([8 x i64], [2 x i64], [3 x float], ...) {
9999 ; CHECK-LABEL: test_offsetstack:
100100 ; CHECK: sub sp, sp, #80
101101 ; CHECK: add [[STACK_TOP:x[0-9]+]], sp, #96