llvm.org GIT mirror llvm / a1d28f6
R600/SI: Handle MUBUF instructions in SIInstrInfo::moveToVALU() git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@204476 91177308-0d34-0410-b5e6-96231b3b80d8 Tom Stellard 6 years ago
6 changed file(s) with 157 addition(s) and 5 deletion(s). Raw diff Collapse all Expand all
164164 addPass(createR600VectorRegMerger(*TM));
165165 } else {
166166 addPass(createSIFixSGPRCopiesPass(*TM));
167 // SIFixSGPRCopies can generate a lot of duplicate instructions,
168 // so we need to run MachineCSE afterwards.
169 addPass(&MachineCSEID);
167170 }
168171 return false;
169172 }
2323 #include "llvm/CodeGen/MachineRegisterInfo.h"
2424 #include "llvm/CodeGen/SelectionDAG.h"
2525 #include "llvm/IR/Function.h"
26
27 const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
2826
2927 using namespace llvm;
3028
406404 BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), SubRegHiLo)
407405 .addImm(0);
408406 BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), SubRegHiHi)
409 .addImm(RSRC_DATA_FORMAT >> 32);
407 .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32);
410408 BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::REG_SEQUENCE), SubRegHi)
411409 .addReg(SubRegHiLo)
412410 .addImm(AMDGPU::sub0)
368368 let EXP_CNT = 1;
369369
370370 let neverHasSideEffects = 1;
371 let UseNamedOperandTable = 1;
371372 }
372373
373374 class MTBUF op, dag outs, dag ins, string asm, list pattern> :
557557 MO.ChangeToRegister(Reg, false);
558558 }
559559
560 unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI,
561 MachineRegisterInfo &MRI,
562 MachineOperand &SuperReg,
563 const TargetRegisterClass *SuperRC,
564 unsigned SubIdx,
565 const TargetRegisterClass *SubRC)
566 const {
567 assert(SuperReg.isReg());
568
569 unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC);
570 unsigned SubReg = MRI.createVirtualRegister(SubRC);
571
572 // Just in case the super register is itself a sub-register, copy it to a new
573 // value so we don't need to wory about merging its subreg index with the
574 // SubIdx passed to this function. The register coalescer should be able to
575 // eliminate this extra copy.
576 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(TargetOpcode::COPY),
577 NewSuperReg)
578 .addOperand(SuperReg);
579
580 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(TargetOpcode::COPY),
581 SubReg)
582 .addReg(NewSuperReg, 0, SubIdx);
583 return SubReg;
584 }
585
560586 void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
561587 MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
562588 int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
672698 get(AMDGPU::COPY), DstReg)
673699 .addOperand(MI->getOperand(i));
674700 MI->getOperand(i).setReg(DstReg);
701 }
702 }
703
704 // Legalize MUBUF* instructions
705 // FIXME: If we start using the non-addr64 instructions for compute, we
706 // may need to legalize them here.
707
708 int SRsrcIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
709 AMDGPU::OpName::srsrc);
710 int VAddrIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
711 AMDGPU::OpName::vaddr);
712 if (SRsrcIdx != -1 && VAddrIdx != -1) {
713 const TargetRegisterClass *VAddrRC =
714 RI.getRegClass(get(MI->getOpcode()).OpInfo[VAddrIdx].RegClass);
715
716 if(VAddrRC->getSize() == 8 &&
717 MRI.getRegClass(MI->getOperand(SRsrcIdx).getReg()) != VAddrRC) {
718 // We have a MUBUF instruction that uses a 64-bit vaddr register and
719 // srsrc has the incorrect register class. In order to fix this, we
720 // need to extract the pointer from the resource descriptor (srsrc),
721 // add it to the value of vadd, then store the result in the vaddr
722 // operand. Then, we need to set the pointer field of the resource
723 // descriptor to zero.
724
725 MachineBasicBlock &MBB = *MI->getParent();
726 MachineOperand &SRsrcOp = MI->getOperand(SRsrcIdx);
727 MachineOperand &VAddrOp = MI->getOperand(VAddrIdx);
728 unsigned SRsrcPtrLo, SRsrcPtrHi, VAddrLo, VAddrHi;
729 unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
730 unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
731 unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
732 unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
733 unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
734 unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
735 unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
736
737 // SRsrcPtrLo = srsrc:sub0
738 SRsrcPtrLo = buildExtractSubReg(MI, MRI, SRsrcOp,
739 &AMDGPU::VReg_128RegClass, AMDGPU::sub0, &AMDGPU::VReg_32RegClass);
740
741 // SRsrcPtrHi = srsrc:sub1
742 SRsrcPtrHi = buildExtractSubReg(MI, MRI, SRsrcOp,
743 &AMDGPU::VReg_128RegClass, AMDGPU::sub1, &AMDGPU::VReg_32RegClass);
744
745 // VAddrLo = vaddr:sub0
746 VAddrLo = buildExtractSubReg(MI, MRI, VAddrOp,
747 &AMDGPU::VReg_64RegClass, AMDGPU::sub0, &AMDGPU::VReg_32RegClass);
748
749 // VAddrHi = vaddr:sub1
750 VAddrHi = buildExtractSubReg(MI, MRI, VAddrOp,
751 &AMDGPU::VReg_64RegClass, AMDGPU::sub1, &AMDGPU::VReg_32RegClass);
752
753 // NewVaddrLo = SRsrcPtrLo + VAddrLo
754 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADD_I32_e32),
755 NewVAddrLo)
756 .addReg(SRsrcPtrLo)
757 .addReg(VAddrLo)
758 .addReg(AMDGPU::VCC, RegState::Define | RegState::Implicit);
759
760 // NewVaddrHi = SRsrcPtrHi + VAddrHi
761 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADDC_U32_e32),
762 NewVAddrHi)
763 .addReg(SRsrcPtrHi)
764 .addReg(VAddrHi)
765 .addReg(AMDGPU::VCC, RegState::ImplicitDefine)
766 .addReg(AMDGPU::VCC, RegState::Implicit);
767
768 // NewVaddr = {NewVaddrHi, NewVaddrLo}
769 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
770 NewVAddr)
771 .addReg(NewVAddrLo)
772 .addImm(AMDGPU::sub0)
773 .addReg(NewVAddrHi)
774 .addImm(AMDGPU::sub1);
775
776 // Zero64 = 0
777 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B64),
778 Zero64)
779 .addImm(0);
780
781 // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0}
782 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
783 SRsrcFormatLo)
784 .addImm(AMDGPU::RSRC_DATA_FORMAT & 0xFFFFFFFF);
785
786 // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32}
787 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
788 SRsrcFormatHi)
789 .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32);
790
791 // NewSRsrc = {Zero64, SRsrcFormat}
792 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
793 NewSRsrc)
794 .addReg(Zero64)
795 .addImm(AMDGPU::sub0_sub1)
796 .addReg(SRsrcFormatLo)
797 .addImm(AMDGPU::sub2)
798 .addReg(SRsrcFormatHi)
799 .addImm(AMDGPU::sub3);
800
801 // Update the instruction to use NewVaddr
802 MI->getOperand(VAddrIdx).setReg(NewVAddr);
803 // Update the instruction to use NewSRsrc
804 MI->getOperand(SRsrcIdx).setReg(NewSRsrc);
675805 }
676806 }
677807 }
730860 }
731861
732862 unsigned NewOpcode = getVALUOp(*Inst);
733 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END)
863 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
864 // We cannot move this instruction to the VALU, so we should try to
865 // legalize its operands instead.
866 legalizeOperands(Inst);
734867 continue;
868 }
735869
736870 // Use the new VALU Opcode.
737871 const MCInstrDesc &NewDesc = get(NewOpcode);
2323 class SIInstrInfo : public AMDGPUInstrInfo {
2424 private:
2525 const SIRegisterInfo RI;
26
27 unsigned buildExtractSubReg(MachineBasicBlock::iterator MI,
28 MachineRegisterInfo &MRI,
29 MachineOperand &SuperReg,
30 const TargetRegisterClass *SuperRC,
31 unsigned SubIdx,
32 const TargetRegisterClass *SubRC) const;
2633
2734 public:
2835 explicit SIInstrInfo(AMDGPUTargetMachine &tm);
141148 int getCommuteRev(uint16_t Opcode);
142149 int getCommuteOrig(uint16_t Opcode);
143150
151 const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
152
153
144154 } // End namespace AMDGPU
145155
146156 } // End namespace llvm
None ; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
0 ; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s
11
22 ; In this test both the pointer and the offset operands to the
33 ; BUFFER_LOAD instructions end up being stored in vgprs. This
77 ; (low 64-bits of srsrc).
88
99 ; CHECK-LABEL: @mubuf
10
1011 ; Make sure we aren't using VGPRs for the source operand of S_MOV_B64
1112 ; CHECK-NOT: S_MOV_B64 s[{{[0-9]+:[0-9]+}}], v
13
14 ; Make sure we aren't using VGPR's for the srsrc operand of BUFFER_LOAD_*
15 ; instructions
16 ; CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]
17 ; CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]
1218 define void @mubuf(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
1319 entry:
1420 %0 = call i32 @llvm.r600.read.tidig.x() #1