llvm.org GIT mirror llvm / 573630a
R600/SI: Emit s_mov_b32 m0, -1 before every DS instruction This s_mov_b32 will write to a virtual register from the M0Reg class and all the ds instructions now take an extra M0Reg explicit argument. This change is necessary to prevent issues with the scheduler mixing together instructions that expect different values in the m0 registers. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@222583 91177308-0d34-0410-b5e6-96231b3b80d8 Tom Stellard 5 years ago
7 changed file(s) with 30 addition(s) and 41 deletion(s). Raw diff Collapse all Expand all
19851985 const SIInstrInfo *TII = static_cast(
19861986 getTargetMachine().getSubtargetImpl()->getInstrInfo());
19871987
1988 MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
19881989 TII->legalizeOperands(MI);
19891990
19901991 if (TII->isMIMG(MI->getOpcode())) {
20042005
20052006 unsigned NewOpcode = TII->getMaskedMIMGOp(MI->getOpcode(), BitsSet);
20062007 MI->setDesc(TII->get(NewOpcode));
2007 MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
20082008 MRI.setRegClass(VReg, RC);
20092009 return;
20102010 }
545545
546546 let LGKM_CNT = 1;
547547 let UseNamedOperandTable = 1;
548 let DisableEncoding = "$m0";
548549 }
549550
550551 class MUBUF op, dag outs, dag ins, string asm, list pattern> :
947947 class DS_Load_Helper op, string asm, RegisterClass regClass> : DS_1A <
948948 op,
949949 (outs regClass:$vdst),
950 (ins i1imm:$gds, VReg_32:$addr, ds_offset:$offset),
950 (ins i1imm:$gds, VReg_32:$addr, ds_offset:$offset, M0Reg:$m0),
951951 asm#" $vdst, $addr"#"$offset"#" [M0]",
952952 []> {
953953 let data0 = 0;
959959 class DS_Load2_Helper op, string asm, RegisterClass regClass> : DS <
960960 op,
961961 (outs regClass:$vdst),
962 (ins i1imm:$gds, VReg_32:$addr, ds_offset0:$offset0, ds_offset1:$offset1),
962 (ins i1imm:$gds, VReg_32:$addr, ds_offset0:$offset0, ds_offset1:$offset1,
963 M0Reg:$m0),
963964 asm#" $vdst, $addr"#"$offset0"#"$offset1 [M0]",
964965 []> {
965966 let data0 = 0;
972973 class DS_Store_Helper op, string asm, RegisterClass regClass> : DS_1A <
973974 op,
974975 (outs),
975 (ins i1imm:$gds, VReg_32:$addr, regClass:$data0, ds_offset:$offset),
976 (ins i1imm:$gds, VReg_32:$addr, regClass:$data0, ds_offset:$offset, M0Reg:$m0),
976977 asm#" $addr, $data0"#"$offset"#" [M0]",
977978 []> {
978979 let data1 = 0;
985986 op,
986987 (outs),
987988 (ins i1imm:$gds, VReg_32:$addr, regClass:$data0, regClass:$data1,
988 ds_offset0:$offset0, ds_offset1:$offset1),
989 ds_offset0:$offset0, ds_offset1:$offset1, M0Reg:$m0),
989990 asm#" $addr, $data0, $data1"#"$offset0"#"$offset1 [M0]",
990991 []> {
991992 let mayStore = 1;
998999 class DS_1A1D_RET op, string asm, RegisterClass rc, string noRetOp = ""> : DS_1A <
9991000 op,
10001001 (outs rc:$vdst),
1001 (ins i1imm:$gds, VReg_32:$addr, rc:$data0, ds_offset:$offset),
1002 (ins i1imm:$gds, VReg_32:$addr, rc:$data0, ds_offset:$offset, M0Reg:$m0),
10021003 asm#" $vdst, $addr, $data0"#"$offset"#" [M0]", []>,
10031004 AtomicNoRet {
10041005
10131014 class DS_1A2D_RET op, string asm, RegisterClass rc, string noRetOp = ""> : DS_1A <
10141015 op,
10151016 (outs rc:$vdst),
1016 (ins i1imm:$gds, VReg_32:$addr, rc:$data0, rc:$data1, ds_offset:$offset),
1017 (ins i1imm:$gds, VReg_32:$addr, rc:$data0, rc:$data1, ds_offset:$offset, M0Reg:$m0),
10171018 asm#" $vdst, $addr, $data0, $data1"#"$offset"#" [M0]",
10181019 []>,
10191020 AtomicNoRet {
10261027 class DS_1A2D_NORET op, string asm, RegisterClass rc, string noRetOp = asm> : DS_1A <
10271028 op,
10281029 (outs),
1029 (ins i1imm:$gds, VReg_32:$addr, rc:$data0, rc:$data1, ds_offset:$offset),
1030 (ins i1imm:$gds, VReg_32:$addr, rc:$data0, rc:$data1, ds_offset:$offset, M0Reg:$m0),
10301031 asm#" $addr, $data0, $data1"#"$offset"#" [M0]",
10311032 []>,
10321033 AtomicNoRet {
10381039 class DS_1A1D_NORET op, string asm, RegisterClass rc, string noRetOp = asm> : DS_1A <
10391040 op,
10401041 (outs),
1041 (ins i1imm:$gds, VReg_32:$addr, rc:$data0, ds_offset:$offset),
1042 (ins i1imm:$gds, VReg_32:$addr, rc:$data0, ds_offset:$offset, M0Reg:$m0),
10421043 asm#" $addr, $data0"#"$offset"#" [M0]",
10431044 []>,
10441045 AtomicNoRet {
26132613
26142614 class DSReadPat : Pat <
26152615 (vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset))),
2616 (inst (i1 0), $ptr, (as_i16imm $offset))
2616 (inst (i1 0), $ptr, (as_i16imm $offset), (S_MOV_B32 -1))
26172617 >;
26182618
26192619 def : DSReadPat ;
26312631 def : Pat <
26322632 (v2i32 (local_load (DS64Bit4ByteAligned i32:$ptr, i8:$offset0,
26332633 i8:$offset1))),
2634 (DS_READ2_B32 (i1 0), $ptr, $offset0, $offset1)
2634 (DS_READ2_B32 (i1 0), $ptr, $offset0, $offset1, (S_MOV_B32 -1))
26352635 >;
26362636
26372637 class DSWritePat : Pat <
26382638 (frag vt:$value, (DS1Addr1Offset i32:$ptr, i32:$offset)),
2639 (inst (i1 0), $ptr, $value, (as_i16imm $offset))
2639 (inst (i1 0), $ptr, $value, (as_i16imm $offset), (S_MOV_B32 -1))
26402640 >;
26412641
26422642 def : DSWritePat ;
26522652 (local_store v2i32:$value, (DS64Bit4ByteAligned i32:$ptr, i8:$offset0,
26532653 i8:$offset1)),
26542654 (DS_WRITE2_B32 (i1 0), $ptr, (EXTRACT_SUBREG $value, sub0),
2655 (EXTRACT_SUBREG $value, sub1), $offset0, $offset1)
2655 (EXTRACT_SUBREG $value, sub1), $offset0, $offset1,
2656 (S_MOV_B32 -1))
26562657 >;
26572658
26582659 class DSAtomicRetPat : Pat <
26592660 (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value),
2660 (inst (i1 0), $ptr, $value, (as_i16imm $offset))
2661 (inst (i1 0), $ptr, $value, (as_i16imm $offset), (S_MOV_B32 -1))
26612662 >;
26622663
26632664 // Special case of DSAtomicRetPat for add / sub 1 -> inc / dec
26732674 class DSAtomicIncRetPat
26742675 Instruction LoadImm, PatFrag frag> : Pat <
26752676 (frag (DS1Addr1Offset i32:$ptr, i32:$offset), (vt 1)),
2676 (inst (i1 0), $ptr, (LoadImm (vt -1)), (as_i16imm $offset))
2677 (inst (i1 0), $ptr, (LoadImm (vt -1)), (as_i16imm $offset), (S_MOV_B32 -1))
26772678 >;
26782679
26792680
26802681 class DSAtomicCmpXChg : Pat <
26812682 (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$cmp, vt:$swap),
2682 (inst (i1 0), $ptr, $cmp, $swap, (as_i16imm $offset))
2683 (inst (i1 0), $ptr, $cmp, $swap, (as_i16imm $offset), (S_MOV_B32 -1))
26832684 >;
26842685
26852686
221221 // Be careful, since the addresses could be subregisters themselves in weird
222222 // cases, like vectors of pointers.
223223 const MachineOperand *AddrReg = TII->getNamedOperand(*I, AMDGPU::OpName::addr);
224 const MachineOperand *M0Reg = TII->getNamedOperand(*I, AMDGPU::OpName::m0);
224225
225226 unsigned DestReg0 = TII->getNamedOperand(*I, AMDGPU::OpName::vdst)->getReg();
226227 unsigned DestReg1
261262 .addOperand(*AddrReg) // addr
262263 .addImm(NewOffset0) // offset0
263264 .addImm(NewOffset1) // offset1
265 .addOperand(*M0Reg) // M0
264266 .addMemOperand(*I->memoperands_begin())
265267 .addMemOperand(*Paired->memoperands_begin());
266268
278280
279281 LiveInterval &AddrRegLI = LIS->getInterval(AddrReg->getReg());
280282 LIS->shrinkToUses(&AddrRegLI);
283
284 LiveInterval &M0RegLI = LIS->getInterval(M0Reg->getReg());
285 LIS->shrinkToUses(&M0RegLI);
281286
282287 LIS->getInterval(DestReg); // Create new LI
283288
294299 // Be sure to use .addOperand(), and not .addReg() with these. We want to be
295300 // sure we preserve the subregister index and any register flags set on them.
296301 const MachineOperand *Addr = TII->getNamedOperand(*I, AMDGPU::OpName::addr);
302 const MachineOperand *M0Reg = TII->getNamedOperand(*I, AMDGPU::OpName::m0);
297303 const MachineOperand *Data0 = TII->getNamedOperand(*I, AMDGPU::OpName::data0);
298304 const MachineOperand *Data1
299305 = TII->getNamedOperand(*Paired, AMDGPU::OpName::data0);
332338 .addOperand(*Data1) // data1
333339 .addImm(NewOffset0) // offset0
334340 .addImm(NewOffset1) // offset1
341 .addOperand(*M0Reg) // m0
335342 .addMemOperand(*I->memoperands_begin())
336343 .addMemOperand(*Paired->memoperands_begin());
337344
338345 // XXX - How do we express subregisters here?
339 unsigned OrigRegs[] = { Data0->getReg(), Data1->getReg(), Addr->getReg() };
346 unsigned OrigRegs[] = { Data0->getReg(), Data1->getReg(), Addr->getReg(),
347 M0Reg->getReg()};
340348
341349 LIS->RemoveMachineInstrFromMaps(I);
342350 LIS->RemoveMachineInstrFromMaps(Paired);
8787 void Kill(MachineInstr &MI);
8888 void Branch(MachineInstr &MI);
8989
90 void InitM0ForLDS(MachineBasicBlock::iterator MI);
9190 void LoadM0(MachineInstr &MI, MachineInstr *MovRel);
9291 void IndirectSrc(MachineInstr &MI);
9392 void IndirectDst(MachineInstr &MI);
324323 MI.eraseFromParent();
325324 }
326325
327 /// The m0 register stores the maximum allowable address for LDS reads and
328 /// writes. Its value must be at least the size in bytes of LDS allocated by
329 /// the shader. For simplicity, we set it to the maximum possible value.
330 void SILowerControlFlowPass::InitM0ForLDS(MachineBasicBlock::iterator MI) {
331 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32),
332 AMDGPU::M0).addImm(0xffffffff);
333 }
334
335326 void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
336327
337328 MachineBasicBlock &MBB = *MI.getParent();
390381 .addReg(Save);
391382
392383 }
393 // FIXME: Are there any values other than the LDS address clamp that need to
394 // be stored in the m0 register and may be live for more than a few
395 // instructions? If so, we should save the m0 register at the beginning
396 // of this function and restore it here.
397 // FIXME: Add support for LDS direct loads.
398 InitM0ForLDS(&MI);
399384 MI.eraseFromParent();
400385 }
401386
464449
465450 MachineInstr &MI = *I;
466451 if (TII->isDS(MI.getOpcode())) {
467 NeedM0 = true;
468452 NeedWQM = true;
469453 }
470454
543527 }
544528 }
545529
546 if (NeedM0) {
547 MachineBasicBlock &MBB = MF.front();
548 // Initialize M0 to a value that won't cause LDS access to be discarded
549 // due to offset clamping
550 InitM0ForLDS(MBB.getFirstNonPHI());
551 }
552
553530 if (NeedWQM && MFI->getShaderType() == ShaderType::PIXEL) {
554531 MachineBasicBlock &MBB = MF.front();
555532 BuildMI(MBB, MBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WQM_B64),
6767 ; pointer can be used with an offset into the second one.
6868
6969 ; SI-LABEL: {{^}}load_shl_base_lds_2:
70 ; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
70 ; SI: s_mov_b32 m0, -1
71 ; SI-NEXT: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
7172 ; SI-NEXT: ds_read2st64_b32 {{v\[[0-9]+:[0-9]+\]}}, [[PTR]] offset0:1 offset1:9 [M0]
7273 ; SI: s_endpgm
7374 define void @load_shl_base_lds_2(float addrspace(1)* %out) #0 {