llvm.org GIT mirror llvm / d544680
AMDGPU: Don't clobber VCC in MUBUF addr64 emulation Introducing VCC defs during SIFixSGPRCopies is generally problematic. Avoid it by starting with the VOP3 form with the general condition register. This is the easiest to fix instance, but doesn't solve any specific problems I'm looking at. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@363904 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault a month ago
2 changed file(s) with 24 addition(s) and 17 deletion(s). Raw diff Collapse all Expand all
44024402 unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
44034403 unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
44044404
4405 const auto *BoolXExecRC = RI.getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
4406 unsigned CondReg0 = MRI.createVirtualRegister(BoolXExecRC);
4407 unsigned CondReg1 = MRI.createVirtualRegister(BoolXExecRC);
4408
44054409 unsigned RsrcPtr, NewSRsrc;
44064410 std::tie(RsrcPtr, NewSRsrc) = extractRsrcPtr(*this, MI, *Rsrc);
44074411
44084412 // NewVaddrLo = RsrcPtr:sub0 + VAddr:sub0
4409 DebugLoc DL = MI.getDebugLoc();
4410 fixImplicitOperands(*
4411 BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), NewVAddrLo)
4412 .addReg(RsrcPtr, 0, AMDGPU::sub0)
4413 .addReg(VAddr->getReg(), 0, AMDGPU::sub0));
4413 const DebugLoc &DL = MI.getDebugLoc();
4414 BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e64), NewVAddrLo)
4415 .addDef(CondReg0)
4416 .addReg(RsrcPtr, 0, AMDGPU::sub0)
4417 .addReg(VAddr->getReg(), 0, AMDGPU::sub0)
4418 .addImm(0);
44144419
44154420 // NewVaddrHi = RsrcPtr:sub1 + VAddr:sub1
4416 fixImplicitOperands(*
4417 BuildMI(MBB, MI, DL, get(AMDGPU::V_ADDC_U32_e32), NewVAddrHi)
4418 .addReg(RsrcPtr, 0, AMDGPU::sub1)
4419 .addReg(VAddr->getReg(), 0, AMDGPU::sub1));
4421 BuildMI(MBB, MI, DL, get(AMDGPU::V_ADDC_U32_e64), NewVAddrHi)
4422 .addDef(CondReg1, RegState::Dead)
4423 .addReg(RsrcPtr, 0, AMDGPU::sub1)
4424 .addReg(VAddr->getReg(), 0, AMDGPU::sub1)
4425 .addReg(CondReg0, RegState::Kill)
4426 .addImm(0);
44204427
44214428 // NewVaddr = {NewVaddrHi, NewVaddrLo}
44224429 BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr)
216216
217217 # ADDR64-LABEL: name: addr64
218218 # ADDR64-LABEL: bb.0:
219 # ADDR64: %12:vreg_64 = COPY %8.sub0_sub1
220 # ADDR64: %13:sreg_64 = S_MOV_B64 0
221 # ADDR64: %14:sgpr_32 = S_MOV_B32 0
222 # ADDR64: %15:sgpr_32 = S_MOV_B32 61440
223 # ADDR64: %16:sreg_128 = REG_SEQUENCE %13, %subreg.sub0_sub1, %14, %subreg.sub2, %15, %subreg.sub3
224 # ADDR64: %9:vgpr_32 = V_ADD_I32_e32 %12.sub0, %4.sub0, implicit-def $vcc, implicit $exec
225 # ADDR64: %10:vgpr_32 = V_ADDC_U32_e32 %12.sub1, %4.sub1, implicit-def $vcc, implicit $vcc, implicit $exec
219 # ADDR64: %14:vreg_64 = COPY %8.sub0_sub1
220 # ADDR64: %15:sreg_64 = S_MOV_B64 0
221 # ADDR64: %16:sgpr_32 = S_MOV_B32 0
222 # ADDR64: %17:sgpr_32 = S_MOV_B32 61440
223 # ADDR64: %18:sreg_128 = REG_SEQUENCE %15, %subreg.sub0_sub1, %16, %subreg.sub2, %17, %subreg.sub3
224 # ADDR64: %9:vgpr_32, %12:sreg_64_xexec = V_ADD_I32_e64 %14.sub0, %4.sub0, 0, implicit $exec
225 # ADDR64: %10:vgpr_32, dead %13:sreg_64_xexec = V_ADDC_U32_e64 %14.sub1, %4.sub1, killed %12, 0, implicit $exec
226226 # ADDR64: %11:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %10, %subreg.sub1
227 # ADDR64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_ADDR64 %11, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec
227 # ADDR64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_ADDR64 %11, killed %18, 0, 0, 0, 0, 0, 0, implicit $exec
228228 ---
229229 name: addr64
230230 liveins: