llvm.org GIT mirror llvm / 5530ac9
ARM: treat [N x i32] and [N x i64] as AAPCS composite types The logic is almost there already, with our special homogeneous aggregate handling. Tweaking it like this allows front-ends to emit AAPCS compliant code without ever having to count registers or add discarded padding arguments. Only arrays of i32 and i64 are needed to model AAPCS rules, but I decided to apply the logic to all integer arrays for more consistency. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@230348 91177308-0d34-0410-b5e6-96231b3b80d8 Tim Northover 5 years ago
6 changed file(s) with 219 addition(s) and 79 deletion(s). Raw diff Collapse all Expand all
121121 // There is no need to differentiate between a pending CCValAssign and other
122122 // kinds, as they are stored in a different list.
123123 static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT,
124 LocInfo HTP) {
125 return getReg(ValNo, ValVT, 0, LocVT, HTP);
124 LocInfo HTP, unsigned ExtraInfo = 0) {
125 return getReg(ValNo, ValVT, ExtraInfo, LocVT, HTP);
126126 }
127127
128128 void convertToReg(unsigned RegNo) {
145145
146146 unsigned getLocReg() const { assert(isRegLoc()); return Loc; }
147147 unsigned getLocMemOffset() const { assert(isMemLoc()); return Loc; }
148 unsigned getExtraInfo() const { return Loc; }
148149 MVT getLocVT() const { return LocVT; }
149150
150151 LocInfo getLocInfo() const { return HTP; }
74627462 }
74637463 if (Args[i].isNest)
74647464 Flags.setNest();
7465 if (NeedsRegBlock) {
7465 if (NeedsRegBlock)
74667466 Flags.setInConsecutiveRegs();
7467 if (Value == NumValues - 1)
7468 Flags.setInConsecutiveRegsLast();
7469 }
74707467 Flags.setOrigAlign(OriginalAlignment);
74717468
74727469 MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT);
75157512 CLI.Outs.push_back(MyFlags);
75167513 CLI.OutVals.push_back(Parts[j]);
75177514 }
7515
7516 if (NeedsRegBlock && Value == NumValues - 1)
7517 CLI.Outs[CLI.Outs.size() - 1].Flags.setInConsecutiveRegsLast();
75187518 }
75197519 }
75207520
77307730 }
77317731 if (F.getAttributes().hasAttribute(Idx, Attribute::Nest))
77327732 Flags.setNest();
7733 if (NeedsRegBlock) {
7733 if (NeedsRegBlock)
77347734 Flags.setInConsecutiveRegs();
7735 if (Value == NumValues - 1)
7736 Flags.setInConsecutiveRegsLast();
7737 }
77387735 Flags.setOrigAlign(OriginalAlignment);
77397736
77407737 MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
77497746 MyFlags.Flags.setOrigAlign(1);
77507747 Ins.push_back(MyFlags);
77517748 }
7749 if (NeedsRegBlock && Value == NumValues - 1)
7750 Ins[Ins.size() - 1].Flags.setInConsecutiveRegsLast();
77527751 PartBase += VT.getStoreSize();
77537752 }
77547753 }
159159 State);
160160 }
161161
162 static const uint16_t RRegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
163
162164 static const uint16_t SRegList[] = { ARM::S0, ARM::S1, ARM::S2, ARM::S3,
163165 ARM::S4, ARM::S5, ARM::S6, ARM::S7,
164166 ARM::S8, ARM::S9, ARM::S10, ARM::S11,
167169 ARM::D4, ARM::D5, ARM::D6, ARM::D7 };
168170 static const uint16_t QRegList[] = { ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3 };
169171
172
170173 // Allocate part of an AAPCS HFA or HVA. We assume that each member of the HA
171174 // has InConsecutiveRegs set, and that the last member also has
172175 // InConsecutiveRegsLast set. We must process all members of the HA before
173176 // we can allocate it, as we need to know the total number of registers that
174177 // will be needed in order to (attempt to) allocate a contiguous block.
175 static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
176 CCValAssign::LocInfo &LocInfo,
177 ISD::ArgFlagsTy &ArgFlags, CCState &State) {
178 SmallVectorImpl &PendingHAMembers = State.getPendingLocs();
178 static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned &ValNo, MVT &ValVT,
179 MVT &LocVT,
180 CCValAssign::LocInfo &LocInfo,
181 ISD::ArgFlagsTy &ArgFlags,
182 CCState &State) {
183 SmallVectorImpl &PendingMembers = State.getPendingLocs();
179184
180185 // AAPCS HFAs must have 1-4 elements, all of the same type
181 assert(PendingHAMembers.size() < 4);
182 if (PendingHAMembers.size() > 0)
183 assert(PendingHAMembers[0].getLocVT() == LocVT);
186 if (PendingMembers.size() > 0)
187 assert(PendingMembers[0].getLocVT() == LocVT);
184188
185189 // Add the argument to the list to be allocated once we know the size of the
186 // HA
187 PendingHAMembers.push_back(
188 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
189
190 if (ArgFlags.isInConsecutiveRegsLast()) {
191 assert(PendingHAMembers.size() > 0 && PendingHAMembers.size() <= 4 &&
192 "Homogeneous aggregates must have between 1 and 4 members");
193
194 // Try to allocate a contiguous block of registers, each of the correct
195 // size to hold one member.
196 ArrayRef RegList;
197 switch (LocVT.SimpleTy) {
198 case MVT::f32:
199 RegList = SRegList;
200 break;
201 case MVT::f64:
202 RegList = DRegList;
203 break;
204 case MVT::v2f64:
205 RegList = QRegList;
206 break;
207 default:
208 llvm_unreachable("Unexpected member type for HA");
209 break;
190 // aggregate. Store the type's required alignmnent as extra info for later: in
191 // the [N x i64] case all trace has been removed by the time we actually get
192 // to do allocation.
193 PendingMembers.push_back(CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo,
194 ArgFlags.getOrigAlign()));
195
196 if (!ArgFlags.isInConsecutiveRegsLast())
197 return true;
198
199 // Try to allocate a contiguous block of registers, each of the correct
200 // size to hold one member.
201 unsigned Align = std::min(PendingMembers[0].getExtraInfo(), 8U);
202
203 ArrayRef RegList;
204 switch (LocVT.SimpleTy) {
205 case MVT::i32: {
206 RegList = RRegList;
207 unsigned RegIdx = State.getFirstUnallocated(RegList);
208
209 // First consume all registers that would give an unaligned object. Whether
210 // we go on stack or in regs, no-one will be using them in future.
211 unsigned RegAlign = RoundUpToAlignment(Align, 4) / 4;
212 while (RegIdx % RegAlign != 0 && RegIdx < RegList.size())
213 State.AllocateReg(RegList[RegIdx++]);
214
215 break;
216 }
217 case MVT::f32:
218 RegList = SRegList;
219 break;
220 case MVT::f64:
221 RegList = DRegList;
222 break;
223 case MVT::v2f64:
224 RegList = QRegList;
225 break;
226 default:
227 llvm_unreachable("Unexpected member type for block aggregate");
228 break;
229 }
230
231 unsigned RegResult = State.AllocateRegBlock(RegList, PendingMembers.size());
232 if (RegResult) {
233 for (SmallVectorImpl::iterator It = PendingMembers.begin();
234 It != PendingMembers.end(); ++It) {
235 It->convertToReg(RegResult);
236 State.addLoc(*It);
237 ++RegResult;
210238 }
211
212 unsigned RegResult =
213 State.AllocateRegBlock(RegList, PendingHAMembers.size());
214
215 if (RegResult) {
216 for (SmallVectorImpl::iterator It = PendingHAMembers.begin();
217 It != PendingHAMembers.end(); ++It) {
218 It->convertToReg(RegResult);
219 State.addLoc(*It);
220 ++RegResult;
221 }
222 PendingHAMembers.clear();
223 return true;
224 }
225
226 // Register allocation failed, fall back to the stack
227
228 // Mark all VFP regs as unavailable (AAPCS rule C.2.vfp)
229 for (unsigned regNo = 0; regNo < 16; ++regNo)
230 State.AllocateReg(SRegList[regNo]);
231
232 unsigned Size = LocVT.getSizeInBits() / 8;
233 unsigned Align = std::min(Size, 8U);
234
235 for (auto It : PendingHAMembers) {
236 It.convertToMem(State.AllocateStack(Size, Align));
239 PendingMembers.clear();
240 return true;
241 }
242
243 // Register allocation failed, we'll be needing the stack
244 unsigned Size = LocVT.getSizeInBits() / 8;
245 if (LocVT == MVT::i32 && State.getNextStackOffset() == 0) {
246 // If nothing else has used the stack until this point, a non-HFA aggregate
247 // can be split between regs and stack.
248 unsigned RegIdx = State.getFirstUnallocated(RegList);
249 for (auto &It : PendingMembers) {
250 if (RegIdx >= RegList.size())
251 It.convertToMem(State.AllocateStack(Size, Size));
252 else
253 It.convertToReg(State.AllocateReg(RegList[RegIdx++]));
254
237255 State.addLoc(It);
238256 }
239
240 // All pending members have now been allocated
241 PendingHAMembers.clear();
242 }
243
244 // This will be allocated by the last member of the HA
257 PendingMembers.clear();
258 return true;
259 } else if (LocVT != MVT::i32)
260 RegList = SRegList;
261
262 // Mark all regs as unavailable (AAPCS rule C.2.vfp for VFP, C.6 for core)
263 for (auto Reg : RegList)
264 State.AllocateReg(Reg);
265
266 for (auto &It : PendingMembers) {
267 It.convertToMem(State.AllocateStack(Size, Align));
268 State.addLoc(It);
269
270 // After the first item has been allocated, the rest are packed as tightly
271 // as possible. (E.g. an incoming i64 would have starting Align of 8, but
272 // we'll be allocating a bunch of i32 slots).
273 Align = Size;
274 }
275
276 // All pending members have now been allocated
277 PendingMembers.clear();
278
279 // This will be allocated by the last member of the aggregate
245280 return true;
246281 }
247282
174174 CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType>,
175175
176176 // HFAs are passed in a contiguous block of registers, or on the stack
177 CCIfConsecutiveRegsHA">>,
177 CCIfConsecutiveRegsAggregate">>,
178178
179179 CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
180180 CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
1136211362 return (Members > 0 && Members <= 4);
1136311363 }
1136411364
11365 /// \brief Return true if a type is an AAPCS-VFP homogeneous aggregate.
11365 /// \brief Return true if a type is an AAPCS-VFP homogeneous aggregate or one of
11366 /// [N x i32] or [N x i64]. This allows front-ends to skip emitting padding when
11367 /// passing according to AAPCS rules.
1136611368 bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters(
1136711369 Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {
1136811370 if (getEffectiveCallingConv(CallConv, isVarArg) !=
1137111373
1137211374 HABaseType Base = HA_UNKNOWN;
1137311375 uint64_t Members = 0;
11374 bool result = isHomogeneousAggregate(Ty, Base, Members);
11375 DEBUG(dbgs() << "isHA: " << result << " "; Ty->dump());
11376 return result;
11377 }
11376 bool IsHA = isHomogeneousAggregate(Ty, Base, Members);
11377 DEBUG(dbgs() << "isHA: " << IsHA << " "; Ty->dump());
11378
11379 bool IsIntArray = Ty->isArrayTy() && Ty->getArrayElementType()->isIntegerTy();
11380 return IsHA || IsIntArray;
11381 }
0 ; RUN: llc -mtriple=armv7-linux-gnueabihf %s -o - | FileCheck %s
1
2 ; [2 x i64] should be contiguous when split (e.g. we shouldn't try to align all
3 ; i32 components to 64 bits). Also makes sure i64 based types are properly
4 ; aligned on the stack.
5 define i64 @test_i64_contiguous_on_stack([8 x double], float, i32 %in, [2 x i64] %arg) nounwind {
6 ; CHECK-LABEL: test_i64_contiguous_on_stack:
7 ; CHECK-DAG: ldr [[LO0:r[0-9]+]], [sp, #8]
8 ; CHECK-DAG: ldr [[HI0:r[0-9]+]], [sp, #12]
9 ; CHECK-DAG: ldr [[LO1:r[0-9]+]], [sp, #16]
10 ; CHECK-DAG: ldr [[HI1:r[0-9]+]], [sp, #20]
11 ; CHECK: adds r0, [[LO0]], [[LO1]]
12 ; CHECK: adc r1, [[HI0]], [[HI1]]
13
14 %val1 = extractvalue [2 x i64] %arg, 0
15 %val2 = extractvalue [2 x i64] %arg, 1
16 %sum = add i64 %val1, %val2
17 ret i64 %sum
18 }
19
20 ; [2 x i64] should try to use looks for 4 regs, not 8 (which might happen if the
21 ; i64 -> i32, i32 split wasn't handled correctly).
22 define i64 @test_2xi64_uses_4_regs([8 x double], float, [2 x i64] %arg) nounwind {
23 ; CHECK-LABEL: test_2xi64_uses_4_regs:
24 ; CHECK-DAG: mov r0, r2
25 ; CHECK-DAG: mov r1, r3
26
27 %val = extractvalue [2 x i64] %arg, 1
28 ret i64 %val
29 }
30
31 ; An aggregate should be able to split between registers and stack if there is
32 ; nothing else on the stack.
33 define i32 @test_aggregates_split([8 x double], i32, [4 x i32] %arg) nounwind {
34 ; CHECK-LABEL: test_aggregates_split:
35 ; CHECK: ldr [[VAL3:r[0-9]+]], [sp]
36 ; CHECK: add r0, r1, [[VAL3]]
37
38 %val0 = extractvalue [4 x i32] %arg, 0
39 %val3 = extractvalue [4 x i32] %arg, 3
40 %sum = add i32 %val0, %val3
41 ret i32 %sum
42 }
43
44 ; If an aggregate has to be moved entirely onto the stack, nothing should be
45 ; able to use r0-r3 any more. Also checks that [2 x i64] properly aligned when
46 ; it uses regs.
47 define i32 @test_no_int_backfilling([8 x double], float, i32, [2 x i64], i32 %arg) nounwind {
48 ; CHECK-LABEL: test_no_int_backfilling:
49 ; CHECK: ldr r0, [sp, #24]
50 ret i32 %arg
51 }
52
53 ; Even if the argument was successfully allocated as reg block, there should be
54 ; no backfillig to r1.
55 define i32 @test_no_int_backfilling_regsonly(i32, [1 x i64], i32 %arg) {
56 ; CHECK-LABEL: test_no_int_backfilling_regsonly:
57 ; CHECK: ldr r0, [sp]
58 ret i32 %arg
59 }
60
61 ; If an aggregate has to be moved entirely onto the stack, nothing should be
62 ; able to use r0-r3 any more.
63 define float @test_no_float_backfilling([7 x double], [4 x i32], i32, [4 x double], float %arg) nounwind {
64 ; CHECK-LABEL: test_no_float_backfilling:
65 ; CHECK: vldr s0, [sp, #40]
66 ret float %arg
67 }
68
69 ; They're a bit pointless, but types like [N x i8] should work as well.
70 define i8 @test_i8_in_regs(i32, [3 x i8] %arg) {
71 ; CHECK-LABEL: test_i8_in_regs:
72 ; CHECK: add r0, r1, r3
73 %val0 = extractvalue [3 x i8] %arg, 0
74 %val2 = extractvalue [3 x i8] %arg, 2
75 %sum = add i8 %val0, %val2
76 ret i8 %sum
77 }
78
79 define i16 @test_i16_split(i32, i32, [3 x i16] %arg) {
80 ; CHECK-LABEL: test_i16_split:
81 ; CHECK: ldrh [[VAL2:r[0-9]+]], [sp]
82 ; CHECK: add r0, r2, [[VAL2]]
83 %val0 = extractvalue [3 x i16] %arg, 0
84 %val2 = extractvalue [3 x i16] %arg, 2
85 %sum = add i16 %val0, %val2
86 ret i16 %sum
87 }
88
89 ; Beware: on the stack each i16 still gets a 32-bit slot, the array is not
90 ; packed.
91 define i16 @test_i16_forced_stack([8 x double], double, i32, i32, [3 x i16] %arg) {
92 ; CHECK-LABEL: test_i16_forced_stack:
93 ; CHECK-DAG: ldrh [[VAL0:r[0-9]+]], [sp, #8]
94 ; CHECK-DAG: ldrh [[VAL2:r[0-9]+]], [sp, #16]
95 ; CHECK: add r0, [[VAL0]], [[VAL2]]
96 %val0 = extractvalue [3 x i16] %arg, 0
97 %val2 = extractvalue [3 x i16] %arg, 2
98 %sum = add i16 %val0, %val2
99 ret i16 %sum
100 }