llvm.org GIT mirror llvm / 751b64f
Merging r266105: ------------------------------------------------------------------------ r266105 | thomas.stellard | 2016-04-12 11:40:43 -0700 (Tue, 12 Apr 2016) | 15 lines AMDGPU/SI: Insert wait states required after v_readfirstlane on SI Summary: We will be able to handle this case much better once the hazard recognizer is finished, but this conservative implementation fixes a hang with the piglit test: spec/arb_arrays_of_arrays/execution/sampler/fs-nested-struct-arrays-nonconst-nested-arra Reviewers: arsenm, nhaehnle Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D18988 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@271731 91177308-0d34-0410-b5e6-96231b3b80d8 Tom Stellard 3 years ago
3 changed file(s) with 9 addition(s) and 1 deletion(s). Raw diff Collapse all Expand all
473473 TII = static_cast(MF.getSubtarget().getInstrInfo());
474474 TRI =
475475 static_cast(MF.getSubtarget().getRegisterInfo());
476
476 const AMDGPUSubtarget &ST = MF.getSubtarget();
477477 MRI = &MF.getRegInfo();
478478
479479 WaitedOn = ZeroCounts;
492492 for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
493493 I != E; ++I) {
494494
495 // Insert required wait states for SMRD reading an SGPR written by a VALU
496 // instruction.
497 if (ST.getGeneration() <= AMDGPUSubtarget::SOUTHERN_ISLANDS &&
498 I->getOpcode() == AMDGPU::V_READFIRSTLANE_B32)
499 TII->insertWaitStates(std::next(I), 4);
500
495501 // Wait for everything before a barrier.
496502 if (I->getOpcode() == AMDGPU::S_BARRIER)
497503 Changes |= insertWait(MBB, I, LastIssued);
99 ; SI: buffer_store_dword
1010 ; SI: v_readfirstlane_b32 s[[PTR_LO:[0-9]+]], v{{[0-9]+}}
1111 ; SI: v_readfirstlane_b32 s[[PTR_HI:[0-9]+]], v{{[0-9]+}}
12 ; SI-NEXT: s_nop
1213 ; SI: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}
1314 ; SI: buffer_store_dword
1415 ; SI: s_endpgm
5555 ; SI-DAG: s_movk_i32 [[OFFSET:s[0-9]+]], 0x2ee0
5656 ; GCN-DAG: v_readfirstlane_b32 s[[PTR_LO:[0-9]+]], v{{[0-9]+}}
5757 ; GCN: v_readfirstlane_b32 s[[PTR_HI:[0-9]+]], v{{[0-9]+}}
58 ; SI-NEXT: s_nop
5859 ; SI: s_load_dword [[OUT:s[0-9]+]], s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, [[OFFSET]]
5960 ; CI: s_load_dword [[OUT:s[0-9]+]], s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0xbb8
6061 ; GCN: v_mov_b32_e32 [[V_OUT:v[0-9]+]], [[OUT]]