llvm.org GIT mirror llvm / 51588a0
AMDGPU: Fix S_BUFFER_LOAD_DWORD_SGPR moveToVALU Author: Bas Nieuwenhuizen https://reviews.llvm.org/D42881 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@324353 91177308-0d34-0410-b5e6-96231b3b80d8 Marek Olsak 2 years ago
2 changed file(s) with 42 addition(s) and 2 deletion(s). Raw diff Collapse all Expand all
37963796 }
37973797 }
37983798
3799 BuildMI(*MBB, Inst, Inst.getDebugLoc(),
3799 MachineInstr *NewInstr =
3800 BuildMI(*MBB, Inst, Inst.getDebugLoc(),
38003801 get(AMDGPU::BUFFER_LOAD_DWORD_OFFEN), VDst)
38013802 .add(*VAddr) // vaddr
38023803 .add(*getNamedOperand(Inst, AMDGPU::OpName::sbase)) // srsrc
38053806 .addImm(getNamedOperand(Inst, AMDGPU::OpName::glc)->getImm())
38063807 .addImm(0) // slc
38073808 .addImm(0) // tfe
3808 .setMemRefs(Inst.memoperands_begin(), Inst.memoperands_end());
3809 .setMemRefs(Inst.memoperands_begin(), Inst.memoperands_end())
3810 .getInstr();
38093811
38103812 MRI.replaceRegWith(getNamedOperand(Inst, AMDGPU::OpName::sdst)->getReg(),
38113813 VDst);
38123814 addUsersToMoveToVALUWorklist(VDst, MRI, Worklist);
38133815 Inst.eraseFromParent();
3816
3817 // Legalize all operands other than the offset. Notably, convert the srsrc
3818 // into SGPRs using v_readfirstlane if needed.
3819 legalizeOperands(*NewInstr);
38143820 continue;
38153821 }
38163822 }
260260 ret void
261261 }
262262
263 ; GCN-LABEL: {{^}}smrd_sgpr_descriptor_promoted
264 ; GCN: v_readfirstlane
265 define amdgpu_cs void @smrd_sgpr_descriptor_promoted([0 x i8] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), i32) #0 {
266 main_body:
267 %descptr = bitcast [0 x i8] addrspace(2)* %0 to <4 x i32> addrspace(2)*, !amdgpu.uniform !0
268 br label %.outer_loop_header
269
270 ret_block: ; preds = %.outer, %.label22, %main_body
271 ret void
272
273 .outer_loop_header:
274 br label %.inner_loop_header
275
276 .inner_loop_header: ; preds = %.inner_loop_body, %.outer_loop_header
277 %loopctr.1 = phi i32 [ 0, %.outer_loop_header ], [ %loopctr.2, %.inner_loop_body ]
278 %loopctr.2 = add i32 %loopctr.1, 1
279 %inner_br1 = icmp slt i32 %loopctr.2, 10
280 br i1 %inner_br1, label %.inner_loop_body, label %ret_block
281
282 .inner_loop_body:
283 %descriptor = load <4 x i32>, <4 x i32> addrspace(2)* %descptr, align 16, !invariant.load !0
284 %load1result = call float @llvm.SI.load.const.v4i32(<4 x i32> %descriptor, i32 0)
285 %inner_br2 = icmp uge i32 %1, 10
286 br i1 %inner_br2, label %.inner_loop_header, label %.outer_loop_body
287
288 .outer_loop_body:
289 %offset = shl i32 %loopctr.2, 6
290 %load2result = call float @llvm.SI.load.const.v4i32(<4 x i32> %descriptor, i32 %offset)
291 %outer_br = fcmp ueq float %load2result, 0x0
292 br i1 %outer_br, label %.outer_loop_header, label %ret_block
293 }
294
263295 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
264296 declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1
265297
266298 attributes #0 = { nounwind }
267299 attributes #1 = { nounwind readnone }
300
301 !0 = !{}