llvm.org GIT mirror llvm / 5ab8f33
AMDGPU/GlobalISel: Move kernel argument handling to separate function git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@365782 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 1 year, 2 months ago
2 changed file(s) with 61 addition(s) and 42 deletion(s). Raw diff Collapse all Expand all
303303 }
304304 }
305305
306 bool AMDGPUCallLowering::lowerFormalArgumentsKernel(
307 MachineIRBuilder &MIRBuilder, const Function &F,
308 ArrayRef> VRegs) const {
309 MachineFunction &MF = MIRBuilder.getMF();
310 const GCNSubtarget *Subtarget = &MF.getSubtarget();
311 MachineRegisterInfo &MRI = MF.getRegInfo();
312 SIMachineFunctionInfo *Info = MF.getInfo();
313 const SIRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
314 const DataLayout &DL = F.getParent()->getDataLayout();
315
316 SmallVector ArgLocs;
317 CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
318
319 allocateHSAUserSGPRs(CCInfo, MIRBuilder, MF, *TRI, *Info);
320
321 unsigned i = 0;
322 const unsigned KernArgBaseAlign = 16;
323 const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset(F);
324 uint64_t ExplicitArgOffset = 0;
325
326 // TODO: Align down to dword alignment and extract bits for extending loads.
327 for (auto &Arg : F.args()) {
328 Type *ArgTy = Arg.getType();
329 unsigned AllocSize = DL.getTypeAllocSize(ArgTy);
330 if (AllocSize == 0)
331 continue;
332
333 unsigned ABIAlign = DL.getABITypeAlignment(ArgTy);
334
335 uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;
336 ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;
337
338 ArrayRef OrigArgRegs = VRegs[i];
339 Register ArgReg =
340 OrigArgRegs.size() == 1
341 ? OrigArgRegs[0]
342 : MRI.createGenericVirtualRegister(getLLTForType(*ArgTy, DL));
343 unsigned Align = MinAlign(KernArgBaseAlign, ArgOffset);
344 ArgOffset = alignTo(ArgOffset, DL.getABITypeAlignment(ArgTy));
345 lowerParameter(MIRBuilder, ArgTy, ArgOffset, Align, ArgReg);
346 if (OrigArgRegs.size() > 1)
347 unpackRegs(OrigArgRegs, ArgReg, ArgTy, MIRBuilder);
348 ++i;
349 }
350
351 allocateSpecialEntryInputVGPRs(CCInfo, MF, *TRI, *Info);
352 allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), false);
353 return true;
354 }
355
306356 bool AMDGPUCallLowering::lowerFormalArguments(
307357 MachineIRBuilder &MIRBuilder, const Function &F,
308358 ArrayRef> VRegs) const {
359 // The infrastructure for normal calling convention lowering is essentially
360 // useless for kernels. We want to avoid any kind of legalization or argument
361 // splitting.
362 if (F.getCallingConv() == CallingConv::AMDGPU_KERNEL)
363 return lowerFormalArgumentsKernel(MIRBuilder, F, VRegs);
364
309365 // AMDGPU_GS and AMDGP_HS are not supported yet.
310366 if (F.getCallingConv() == CallingConv::AMDGPU_GS ||
311367 F.getCallingConv() == CallingConv::AMDGPU_HS)
312368 return false;
313369
314370 MachineFunction &MF = MIRBuilder.getMF();
315 const GCNSubtarget *Subtarget = &MF.getSubtarget();
316371 MachineRegisterInfo &MRI = MF.getRegInfo();
317372 SIMachineFunctionInfo *Info = MF.getInfo();
318373 const SIRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
322377
323378 SmallVector ArgLocs;
324379 CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
325
326 // The infrastructure for normal calling convention lowering is essentially
327 // useless for kernels. We want to avoid any kind of legalization or argument
328 // splitting.
329 if (F.getCallingConv() == CallingConv::AMDGPU_KERNEL) {
330 allocateHSAUserSGPRs(CCInfo, MIRBuilder, MF, *TRI, *Info);
331
332 unsigned i = 0;
333 const unsigned KernArgBaseAlign = 16;
334 const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset(F);
335 uint64_t ExplicitArgOffset = 0;
336
337 // TODO: Align down to dword alignment and extract bits for extending loads.
338 for (auto &Arg : F.args()) {
339 Type *ArgTy = Arg.getType();
340 unsigned AllocSize = DL.getTypeAllocSize(ArgTy);
341 if (AllocSize == 0)
342 continue;
343
344 unsigned ABIAlign = DL.getABITypeAlignment(ArgTy);
345
346 uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;
347 ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;
348
349 ArrayRef OrigArgRegs = VRegs[i];
350 Register ArgReg =
351 OrigArgRegs.size() == 1
352 ? OrigArgRegs[0]
353 : MRI.createGenericVirtualRegister(getLLTForType(*ArgTy, DL));
354 unsigned Align = MinAlign(KernArgBaseAlign, ArgOffset);
355 ArgOffset = alignTo(ArgOffset, DL.getABITypeAlignment(ArgTy));
356 lowerParameter(MIRBuilder, ArgTy, ArgOffset, Align, ArgReg);
357 if (OrigArgRegs.size() > 1)
358 unpackRegs(OrigArgRegs, ArgReg, ArgTy, MIRBuilder);
359 ++i;
360 }
361
362 allocateSpecialEntryInputVGPRs(CCInfo, MF, *TRI, *Info);
363 allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), IsShader);
364 return true;
365 }
366380
367381 if (Info->hasImplicitBufferPtr()) {
368382 unsigned ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI);
3333
3434 bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
3535 ArrayRef VRegs) const override;
36
37 bool lowerFormalArgumentsKernel(MachineIRBuilder &MIRBuilder,
38 const Function &F,
39 ArrayRef> VRegs) const;
40
3641 bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
3742 ArrayRef> VRegs) const override;
3843 static CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg);