llvm.org GIT mirror llvm / e16a4c1
AMDGPU: Fold inline offset for loads properly in moveToVALU on GFX9 Summary: This enables load merging into x2, x4, which is driven by inline offsets. 6500 shaders are affected: Code Size in affected shaders: -15.14 % Reviewers: arsenm, nhaehnle Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D42078 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@323909 91177308-0d34-0410-b5e6-96231b3b80d8 Marek Olsak 2 years ago
2 changed file(s) with 35 addition(s) and 38 deletion(s). Raw diff Collapse all Expand all
37553755 // FIXME: This isn't safe because the addressing mode doesn't work
37563756 // correctly if vaddr is negative.
37573757 //
3758 // FIXME: Handle v_add_u32 and VOP3 form. Also don't rely on immediate
3759 // being in src0.
3760 //
37613758 // FIXME: Should probably be done somewhere else, maybe SIFoldOperands.
37623759 //
37633760 // See if we can extract an immediate offset by recognizing one of these:
37643761 // V_ADD_I32_e32 dst, imm, src1
37653762 // V_ADD_I32_e32 dst, (S_MOV_B32 imm), src1
37663763 // V_ADD will be removed by "Remove dead machine instructions".
3767 if (Add && Add->getOpcode() == AMDGPU::V_ADD_I32_e32) {
3768 const MachineOperand *Src =
3769 getNamedOperand(*Add, AMDGPU::OpName::src0);
3770
3771 if (Src->isReg()) {
3772 auto Mov = MRI.getUniqueVRegDef(Src->getReg());
3773 if (Mov && Mov->getOpcode() == AMDGPU::S_MOV_B32)
3774 Src = &Mov->getOperand(1);
3764 if (Add &&
3765 (Add->getOpcode() == AMDGPU::V_ADD_I32_e32 ||
3766 Add->getOpcode() == AMDGPU::V_ADD_U32_e64)) {
3767 static const unsigned SrcNames[2] = {
3768 AMDGPU::OpName::src0,
3769 AMDGPU::OpName::src1,
3770 };
3771
3772 // Find a literal offset in one of source operands.
3773 for (int i = 0; i < 2; i++) {
3774 const MachineOperand *Src =
3775 getNamedOperand(*Add, SrcNames[i]);
3776
3777 if (Src->isReg()) {
3778 auto Mov = MRI.getUniqueVRegDef(Src->getReg());
3779 if (Mov && Mov->getOpcode() == AMDGPU::S_MOV_B32)
3780 Src = &Mov->getOperand(1);
3781 }
3782
3783 if (Src) {
3784 if (Src->isImm())
3785 Offset = Src->getImm();
3786 else if (Src->isCImm())
3787 Offset = Src->getCImm()->getZExtValue();
3788 }
3789
3790 if (Offset && isLegalMUBUFImmOffset(Offset)) {
3791 VAddr = getNamedOperand(*Add, SrcNames[!i]);
3792 break;
3793 }
3794
3795 Offset = 0;
37753796 }
3776
3777 if (Src) {
3778 if (Src->isImm())
3779 Offset = Src->getImm();
3780 else if (Src->isCImm())
3781 Offset = Src->getCImm()->getZExtValue();
3782 }
3783
3784 if (Offset && isLegalMUBUFImmOffset(Offset))
3785 VAddr = getNamedOperand(*Add, AMDGPU::OpName::src1);
3786 else
3787 Offset = 0;
37883797 }
37893798
37903799 BuildMI(*MBB, Inst, Inst.getDebugLoc(),
193193
194194 ; GCN-LABEL: {{^}}smrd_vgpr_offset_imm:
195195 ; GCN-NEXT: %bb.
196
197 ; SICIVI-NEXT: buffer_load_dword v{{[0-9]}}, v0, s[0:3], 0 offen offset:4095 ;
198
199 ; GFX9-NEXT: v_add_u32_e32 [[ADD:v[0-9]+]], 0xfff, v0
200 ; GFX9-NEXT: buffer_load_dword v{{[0-9]}}, [[ADD]], s[0:3], 0 offen ;
196 ; GCN-NEXT: buffer_load_dword v{{[0-9]}}, v0, s[0:3], 0 offen offset:4095 ;
201197 define amdgpu_ps float @smrd_vgpr_offset_imm(<4 x i32> inreg %desc, i32 %offset) #0 {
202198 main_body:
203199 %off = add i32 %offset, 4095
243239
244240 ; GCN-LABEL: {{^}}smrd_vgpr_merged:
245241 ; GCN-NEXT: %bb.
246
247 ; SICIVI-NEXT: buffer_load_dwordx4 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:4
248 ; SICIVI-NEXT: buffer_load_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:28
249
250 ; GFX9: buffer_load_dword
251 ; GFX9: buffer_load_dword
252 ; GFX9: buffer_load_dword
253 ; GFX9: buffer_load_dword
254 ; GFX9: buffer_load_dword
255 ; GFX9: buffer_load_dword
242 ; GCN-NEXT: buffer_load_dwordx4 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:4
243 ; GCN-NEXT: buffer_load_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:28
256244 define amdgpu_ps void @smrd_vgpr_merged(<4 x i32> inreg %desc, i32 %a) #0 {
257245 main_body:
258246 %a1 = add i32 %a, 4