llvm.org GIT mirror llvm / ad396b6
AMDGPU: enable 128-bit for local addr space under an option Author: Samuel Pitoiset ds_read_b128 and ds_write_b128 have been recently enabled under the amdgpu-ds128 option because the performance benefit is unclear. Though, using 128-bit loads/stores for the local address space appears to introduce regressions in tessellation shaders. Not sure what is broken, but as ds_read_b128/ds_write_b128 are not enabled by default, just introduce a global option and enable 128-bit only if requested (until it's fixed/used correctly). Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105464 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@329591 91177308-0d34-0410-b5e6-96231b3b80d8 Marek Olsak 1 year, 10 months ago
11 changed file(s) with 31 addition(s) and 26 deletion(s). Raw diff Collapse all Expand all
423423 "EnableSIScheduler",
424424 "true",
425425 "Enable SI Machine Scheduler"
426 >;
427
428 def FeatureEnableDS128 : SubtargetFeature<"enable-ds128",
429 "EnableDS128",
430 "true",
431 "Use ds_{read|write}_b128"
426432 >;
427433
428434 // Unless +-flat-for-global is specified, turn on FlatForGlobal for
131131 EnableLoadStoreOpt(false),
132132 EnableUnsafeDSOffsetFolding(false),
133133 EnableSIScheduler(false),
134 EnableDS128(false),
134135 DumpCode(false),
135136
136137 FP64(false),
132132 bool EnableLoadStoreOpt;
133133 bool EnableUnsafeDSOffsetFolding;
134134 bool EnableSIScheduler;
135 bool EnableDS128;
135136 bool DumpCode;
136137
137138 // Subtarget statically properties set by tablegen
411412
412413 /// \returns If target supports ds_read/write_b128 and user enables generation
413414 /// of ds_read/write_b128.
414 bool useDS128(bool UserEnable) const {
415 return CIInsts && UserEnable;
415 bool useDS128() const {
416 return CIInsts && EnableDS128;
416417 }
417418
418419 /// \returns If MUBUF instructions always perform range checking, even for
264264 return 512;
265265 }
266266
267 if (AddrSpace == AS.FLAT_ADDRESS ||
268 AddrSpace == AS.LOCAL_ADDRESS ||
267 if (AddrSpace == AS.FLAT_ADDRESS)
268 return 128;
269
270 if (AddrSpace == AS.LOCAL_ADDRESS ||
269271 AddrSpace == AS.REGION_ADDRESS)
270 return 128;
272 return ST->useDS128() ? 128 : 64;
271273
272274 if (AddrSpace == AS.PRIVATE_ADDRESS)
273275 return 8 * ST->getMaxPrivateElementSize();
9393 cl::desc("Use GPR indexing mode instead of movrel for vector indexing"),
9494 cl::init(false));
9595
96 static cl::opt EnableDS128(
97 "amdgpu-ds128",
98 cl::desc("Use DS_read/write_b128"),
99 cl::init(false));
100
10196 static cl::opt AssumeFrameIndexHighZeroBits(
10297 "amdgpu-frame-index-zero-bits",
10398 cl::desc("High bits of frame index assumed to be zero"),
52995294 }
53005295 } else if (AS == AMDGPUASI.LOCAL_ADDRESS) {
53015296 // Use ds_read_b128 if possible.
5302 if (Subtarget->useDS128(EnableDS128) && Load->getAlignment() >= 16 &&
5297 if (Subtarget->useDS128() && Load->getAlignment() >= 16 &&
53035298 MemVT.getStoreSize() == 16)
53045299 return SDValue();
53055300
57025697 }
57035698 } else if (AS == AMDGPUASI.LOCAL_ADDRESS) {
57045699 // Use ds_write_b128 if possible.
5705 if (Subtarget->useDS128(EnableDS128) && Store->getAlignment() >= 16 &&
5700 if (Subtarget->useDS128() && Store->getAlignment() >= 16 &&
57065701 VT.getStoreSize() == 16)
57075702 return SDValue();
57085703
22 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefixes=EG,FUNC %s
33
44 ; Testing for ds_read/write_128
5 ; RUN: llc -march=amdgcn -mcpu=tahiti -amdgpu-ds128 < %s | FileCheck -check-prefixes=SI,FUNC %s
6 ; RUN: llc -march=amdgcn -mcpu=tonga -amdgpu-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s
7 ; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s
5 ; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+enable-ds128 < %s | FileCheck -check-prefixes=SI,FUNC %s
6 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+enable-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s
7 ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+enable-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s
88
99 ; FUNC-LABEL: {{^}}load_f32_local:
1010 ; SICIVI: s_mov_b32 m0
44 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefixes=EG,FUNC %s
55
66 ; Testing for ds_read_b128
7 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -amdgpu-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s
8 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -amdgpu-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s
7 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -mattr=+enable-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s
8 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+enable-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s
99
1010 ; FUNC-LABEL: {{^}}local_load_f64:
1111 ; SICIV: s_mov_b32 m0
33 ; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
44
55 ; Testing for ds_read/write_b128
6 ; RUN: llc -march=amdgcn -mcpu=tonga -amdgpu-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s
7 ; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s
6 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+enable-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s
7 ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+enable-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s
88
99 ; FUNC-LABEL: {{^}}local_load_i16:
1010 ; GFX9-NOT: m0
33 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
44
55 ; Testing for ds_read/write_128
6 ; RUN: llc -march=amdgcn -mcpu=tahiti -amdgpu-ds128 < %s | FileCheck -check-prefixes=SI,FUNC %s
7 ; RUN: llc -march=amdgcn -mcpu=tonga -amdgpu-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s
8 ; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s
6 ; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+enable-ds128 < %s | FileCheck -check-prefixes=SI,FUNC %s
7 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+enable-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s
8 ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+enable-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s
99
1010 ; FUNC-LABEL: {{^}}local_load_i32:
1111 ; GCN-NOT: s_wqm_b64
44 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefixes=EG,FUNC %s
55
66 ; Testing for ds_read/write_b128
7 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -amdgpu-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s
8 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -amdgpu-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s
7 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -mattr=+enable-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s
8 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+enable-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s
99
1010 ; FUNC-LABEL: {{^}}local_load_i64:
1111 ; SICIVI: s_mov_b32 m0
33 ; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
44
55 ; Testing for ds_read/write_b128
6 ; RUN: llc -march=amdgcn -mcpu=tonga -amdgpu-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s
7 ; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s
6 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+enable-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s
7 ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+enable-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s
88
99 ; FUNC-LABEL: {{^}}local_load_i8:
1010 ; GCN-NOT: s_wqm_b64