llvm.org GIT mirror llvm / a799ff4
AMDGPU: Expand frame indexes to be relative to scratch wave offset In order for an arbitrary callee to access an object in a caller's stack frame, the 32-bit offset used as the private pointer needs to be relative to the kernel's scratch wave offset register. Convert to this by finding the difference from the current stack frame and scaling by the wavefront size. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@303303 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 3 years ago
1 changed file(s) with 71 addition(s) and 6 deletion(s). Raw diff Collapse all Expand all
317317
318318 MachineOperand *FIOp = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
319319 assert(FIOp && FIOp->isFI() && "frame index must be address operand");
320
321320 assert(TII->isMUBUF(MI));
321 assert(TII->getNamedOperand(MI, AMDGPU::OpName::soffset)->getReg() ==
322 MF->getInfo()->getFrameOffsetReg() &&
323 "should only be seeing frame offset relative FrameIndex");
324
322325
323326 MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
324327 int64_t NewOffset = OffsetOp->getImm() + Offset;
980983 }
981984
982985 default: {
983 if (TII->isMUBUF(*MI)) {
986 const DebugLoc &DL = MI->getDebugLoc();
987 bool IsMUBUF = TII->isMUBUF(*MI);
988
989 if (!IsMUBUF &&
990 MFI->getFrameOffsetReg() != MFI->getScratchWaveOffsetReg()) {
991 // Convert to an absolute stack address by finding the offset from the
992 // scratch wave base and scaling by the wave size.
993 //
994 // In an entry function/kernel the stack address is already the absolute
995 // address relative to the the scratch wave offset.
996
997 unsigned DiffReg
998 = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
999
1000 bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32;
1001 unsigned ResultReg = IsCopy ?
1002 MI->getOperand(0).getReg() :
1003 MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1004
1005 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), DiffReg)
1006 .addReg(MFI->getFrameOffsetReg())
1007 .addReg(MFI->getScratchWaveOffsetReg());
1008
1009 int64_t Offset = FrameInfo.getObjectOffset(Index);
1010 if (Offset == 0) {
1011 // XXX - This never happens because of emergency scavenging slot at 0?
1012 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ResultReg)
1013 .addImm(Log2_32(ST.getWavefrontSize()))
1014 .addReg(DiffReg);
1015 } else {
1016 unsigned CarryOut
1017 = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
1018 unsigned ScaledReg
1019 = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1020
1021 // XXX - Should this use a vector shift?
1022 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), ScaledReg)
1023 .addReg(DiffReg, RegState::Kill)
1024 .addImm(Log2_32(ST.getWavefrontSize()));
1025
1026 // TODO: Fold if use instruction is another add of a constant.
1027 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ADD_I32_e64), ResultReg)
1028 .addReg(CarryOut, RegState::Define | RegState::Dead)
1029 .addImm(Offset)
1030 .addReg(ScaledReg, RegState::Kill);
1031
1032 MRI.setRegAllocationHint(CarryOut, 0, AMDGPU::VCC);
1033 }
1034
1035 // Don't introduce an extra copy if we're just materializing in a mov.
1036 if (IsCopy)
1037 MI->eraseFromParent();
1038 else
1039 FIOp.ChangeToRegister(ResultReg, false, false, true);
1040 return;
1041 }
1042
1043 if (IsMUBUF) {
9841044 // Disable offen so we don't need a 0 vgpr base.
9851045 assert(static_cast(FIOperandNum) ==
9861046 AMDGPU::getNamedOperandIdx(MI->getOpcode(),
9871047 AMDGPU::OpName::vaddr));
9881048
1049 assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg()
1050 == MFI->getFrameOffsetReg());
1051
9891052 int64_t Offset = FrameInfo.getObjectOffset(Index);
9901053 int64_t OldImm
9911054 = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
9941057 if (isUInt<12>(NewOffset) &&
9951058 buildMUBUFOffsetLoadStore(TII, FrameInfo, MI, Index, NewOffset)) {
9961059 MI->eraseFromParent();
997 break;
1060 return;
9981061 }
9991062 }
1063
1064 // If the offset is simply too big, don't convert to a scratch wave offset
1065 // relative index.
10001066
10011067 int64_t Offset = FrameInfo.getObjectOffset(Index);
10021068 FIOp.ChangeToImmediate(Offset);
10031069 if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
10041070 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1005 BuildMI(*MBB, MI, MI->getDebugLoc(),
1006 TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
1007 .addImm(Offset);
1071 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
1072 .addImm(Offset);
10081073 FIOp.ChangeToRegister(TmpReg, false, false, true);
10091074 }
10101075 }