llvm.org GIT mirror llvm / c95a737
Merging r287339: ------------------------------------------------------------------------ r287339 | nhaehnle | 2016-11-18 03:55:52 -0800 (Fri, 18 Nov 2016) | 20 lines AMDGPU: Fix legalization of MUBUF instructions in shaders Summary: The addr64-based legalization is incorrect for MUBUF instructions with idxen set as well as for BUFFER_LOAD/STORE_FORMAT_* instructions. This affects e.g. shaders that access buffer textures. Since we never actually need the addr64-legalization in shaders, this patch takes the easy route and keys off the calling convention. If this ever affects (non-OpenGL) compute, the type of legalization needs to be chosen based on some TSFlag. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98664 Reviewers: arsenm, tstellarAMD Subscribers: kzhuravl, wdng, yaxunl, tony-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D26747 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_39@288106 91177308-0d34-0410-b5e6-96231b3b80d8 Tom Stellard 2 years ago
2 changed file(s) with 62 addition(s) and 5 deletion(s). Raw diff Collapse all Expand all
22022202 }
22032203
22042204 void SIInstrInfo::legalizeOperands(MachineInstr &MI) const {
2205 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
2205 MachineFunction &MF = *MI.getParent()->getParent();
2206 MachineRegisterInfo &MRI = MF.getRegInfo();
22062207
22072208 // Legalize VOP2
22082209 if (isVOP2(MI) || isVOPC(MI)) {
23202321 return;
23212322 }
23222323
2323 // Legalize MIMG
2324 if (isMIMG(MI)) {
2324 // Legalize MIMG and MUBUF/MTBUF for shaders.
2325 //
2326 // Shaders only generate MUBUF/MTBUF instructions via intrinsics or via
2327 // scratch memory access. In both cases, the legalization never involves
2328 // conversion to the addr64 form.
2329 if (isMIMG(MI) ||
2330 (AMDGPU::isShader(MF.getFunction()->getCallingConv()) &&
2331 (isMUBUF(MI) || isMTBUF(MI)))) {
23252332 MachineOperand *SRsrc = getNamedOperand(MI, AMDGPU::OpName::srsrc);
23262333 if (SRsrc && !RI.isSGPRClass(MRI.getRegClass(SRsrc->getReg()))) {
23272334 unsigned SGPR = readlaneVGPRToSGPR(SRsrc->getReg(), MI, MRI);
23362343 return;
23372344 }
23382345
2339 // Legalize MUBUF* instructions
2346 // Legalize MUBUF* instructions by converting to addr64 form.
23402347 // FIXME: If we start using the non-addr64 instructions for compute, we
2341 // may need to legalize them here.
2348 // may need to legalize them as above. This especially applies to the
2349 // buffer_load_format_* variants and variants with idxen (or bothen).
23422350 int SRsrcIdx =
23432351 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
23442352 if (SRsrcIdx != -1) {
0 ;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s -check-prefix=CHECK
1 ;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s -check-prefix=CHECK
2
3 ; Test that buffer_load_format with VGPR resource descriptor is properly
4 ; legalized.
5
6 ; CHECK-LABEL: {{^}}test_none:
7 ; CHECK: buffer_load_format_x v0, off, {{s\[[0-9]+:[0-9]+\]}}, 0{{$}}
8 define amdgpu_vs float @test_none(<4 x i32> addrspace(2)* inreg %base, i32 %i) {
9 main_body:
10 %ptr = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %base, i32 %i
11 %tmp2 = load <4 x i32>, <4 x i32> addrspace(2)* %ptr, align 32
12 %tmp7 = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> %tmp2, i32 0, i32 0, i1 0, i1 0)
13 ret float %tmp7
14 }
15
16 ; CHECK-LABEL: {{^}}test_idxen:
17 ; CHECK: buffer_load_format_x v0, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 idxen{{$}}
18 define amdgpu_vs float @test_idxen(<4 x i32> addrspace(2)* inreg %base, i32 %i) {
19 main_body:
20 %ptr = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %base, i32 %i
21 %tmp2 = load <4 x i32>, <4 x i32> addrspace(2)* %ptr, align 32
22 %tmp7 = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> %tmp2, i32 undef, i32 0, i1 0, i1 0)
23 ret float %tmp7
24 }
25
26 ; CHECK-LABEL: {{^}}test_offen:
27 ; CHECK: buffer_load_format_x v0, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen{{$}}
28 define amdgpu_vs float @test_offen(<4 x i32> addrspace(2)* inreg %base, i32 %i) {
29 main_body:
30 %ptr = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %base, i32 %i
31 %tmp2 = load <4 x i32>, <4 x i32> addrspace(2)* %ptr, align 32
32 %tmp7 = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> %tmp2, i32 0, i32 undef, i1 0, i1 0)
33 ret float %tmp7
34 }
35
36 ; CHECK-LABEL: {{^}}test_both:
37 ; CHECK: buffer_load_format_x v0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 idxen offen{{$}}
38 define amdgpu_vs float @test_both(<4 x i32> addrspace(2)* inreg %base, i32 %i) {
39 main_body:
40 %ptr = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %base, i32 %i
41 %tmp2 = load <4 x i32>, <4 x i32> addrspace(2)* %ptr, align 32
42 %tmp7 = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> %tmp2, i32 undef, i32 undef, i1 0, i1 0)
43 ret float %tmp7
44 }
45
46 declare float @llvm.amdgcn.buffer.load.format.f32(<4 x i32>, i32, i32, i1, i1) nounwind readonly
47
48 attributes #0 = { nounwind readnone }