llvm.org GIT mirror llvm / 974695d
Merging r293310: ------------------------------------------------------------------------ r293310 | arsenm | 2017-01-27 09:42:26 -0800 (Fri, 27 Jan 2017) | 8 lines AMDGPU: Enable FeatureFlatForGlobal on Volcanic Islands Accomplishes what r292982 was supposed to, which ended up only really making the necessary test changes. This should be applied to the 4.0 branch. Patch by Vedran Miletić <vedran@miletic.net> ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_40@293329 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 2 years ago
7 changed file(s) with 67 addition(s) and 42 deletion(s). Raw diff Collapse all Expand all
281281 "Enable SI Machine Scheduler"
282282 >;
283283
284 def FeatureNoAddr64 : SubtargetFeature<"mubuf-no-addr64",
285 "NoAddr64",
286 "true",
287 "MUBUF instructions have addr64 bit"
288 >;
289
290284 // Unless +-flat-for-global is specified, turn on FlatForGlobal for
291285 // all OS-es on VI and newer hardware to avoid assertion failures due
292286 // to missing ADDR64 variants of MUBUF instructions.
296290 def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global",
297291 "FlatForGlobal",
298292 "true",
299 "Force to generate flat instruction for global",
300 [FeatureNoAddr64]
293 "Force to generate flat instruction for global"
301294 >;
302295
303296 // Dummy feature used to disable assembler instructions.
349342 FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
350343 FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
351344 FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel,
352 FeatureScalarStores, FeatureInv2PiInlineImm,
353 FeatureNoAddr64
345 FeatureScalarStores, FeatureInv2PiInlineImm
354346 ]
355347 >;
356348
4747
4848 ParseSubtargetFeatures(GPU, FullFS);
4949
50 // Unless +-flat-for-global is specified, turn on FlatForGlobal for all OS-es
51 // on VI and newer hardware to avoid assertion failures due to missing ADDR64
52 // variants of MUBUF instructions.
53 if (!hasAddr64() && !FS.contains("flat-for-global")) {
54 FlatForGlobal = true;
55 }
56
5057 // FIXME: I don't think think Evergreen has any useful support for
5158 // denormals, but should be checked. Should we issue a warning somewhere
5259 // if someone tries to enable these?
8289 FP64Denormals(false),
8390 FPExceptions(false),
8491 FlatForGlobal(false),
85 NoAddr64(false),
8692 UnalignedScratchAccess(false),
8793 UnalignedBufferAccess(false),
8894
8585 bool FP64Denormals;
8686 bool FPExceptions;
8787 bool FlatForGlobal;
88 bool NoAddr64;
8988 bool UnalignedScratchAccess;
9089 bool UnalignedBufferAccess;
9190 bool EnableXNACK;
+0
-26
test/CodeGen/AMDGPU/ci-use-flat-for-global.ll less more
None ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=+flat-for-global < %s | FileCheck -check-prefix=HSA -check-prefix=HSA-DEFAULT -check-prefix=ALL %s
1 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global < %s | FileCheck -check-prefix=HSA -check-prefix=HSA-NODEFAULT -check-prefix=ALL %s
2 ; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=-flat-for-global < %s | FileCheck -check-prefix=NOHSA-DEFAULT -check-prefix=ALL %s
3 ; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=+flat-for-global < %s | FileCheck -check-prefix=NOHSA-NODEFAULT -check-prefix=ALL %s
4
5
6 ; There are no stack objects even though flat is used by default, so
7 ; flat_scratch_init should be disabled.
8
9 ; ALL-LABEL: {{^}}test:
10 ; HSA: .amd_kernel_code_t
11 ; HSA: enable_sgpr_flat_scratch_init = 0
12 ; HSA: .end_amd_kernel_code_t
13
14 ; ALL-NOT: flat_scr
15
16 ; HSA-DEFAULT: flat_store_dword
17 ; HSA-NODEFAULT: buffer_store_dword
18
19 ; NOHSA-DEFAULT: buffer_store_dword
20 ; NOHSA-NODEFAULT: flat_store_dword
21 define void @test(i32 addrspace(1)* %out) {
22 entry:
23 store i32 0, i32 addrspace(1)* %out
24 ret void
25 }
166166 }
167167
168168 attributes #0 = { nounwind readnone }
169 attributes #1 = { nounwind }
170 attributes #2 = { nounwind "target-features"="-fp16-denormals,-fp16-denormals" }
171 attributes #3 = { nounwind "target-features"="+fp16-denormals,+fp64-denormals" }
169 attributes #1 = { nounwind "target-features"="-flat-for-global" }
170 attributes #2 = { nounwind "target-features"="-flat-for-global,-fp16-denormals,-fp16-denormals" }
171 attributes #3 = { nounwind "target-features"="-flat-for-global,+fp16-denormals,+fp64-denormals" }
0 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=+flat-for-global < %s | FileCheck -check-prefix=HSA -check-prefix=HSA-DEFAULT -check-prefix=ALL %s
1 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global < %s | FileCheck -check-prefix=HSA -check-prefix=HSA-NODEFAULT -check-prefix=ALL %s
2 ; RUN: llc -mtriple=amdgcn-- -mcpu=tonga < %s | FileCheck -check-prefix=HSA-NOADDR64 -check-prefix=ALL %s
3 ; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=-flat-for-global < %s | FileCheck -check-prefix=NOHSA-DEFAULT -check-prefix=ALL %s
4 ; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=+flat-for-global < %s | FileCheck -check-prefix=NOHSA-NODEFAULT -check-prefix=ALL %s
5 ; RUN: llc -mtriple=amdgcn-- -mcpu=tonga < %s | FileCheck -check-prefix=NOHSA-NOADDR64 -check-prefix=ALL %s
6
7
8 ; There are no stack objects even though flat is used by default, so
9 ; flat_scratch_init should be disabled.
10
11 ; ALL-LABEL: {{^}}test:
12 ; HSA: .amd_kernel_code_t
13 ; HSA: enable_sgpr_flat_scratch_init = 0
14 ; HSA: .end_amd_kernel_code_t
15
16 ; ALL-NOT: flat_scr
17
18 ; HSA-DEFAULT: flat_store_dword
19 ; HSA-NODEFAULT: buffer_store_dword
20 ; HSA-NOADDR64: flat_store_dword
21
22 ; NOHSA-DEFAULT: buffer_store_dword
23 ; NOHSA-NODEFAULT: flat_store_dword
24 ; NOHSA-NOADDR64: flat_store_dword
25 define void @test(i32 addrspace(1)* %out) {
26 entry:
27 store i32 0, i32 addrspace(1)* %out
28 ret void
29 }
30
31 ; HSA-DEFAULT: flat_store_dword
32 ; HSA-NODEFAULT: buffer_store_dword
33 ; HSA-NOADDR64: flat_store_dword
34
35 ; NOHSA-DEFAULT: buffer_store_dword
36 ; NOHSA-NODEFAULT: flat_store_dword
37 ; NOHSA-NOADDR64: flat_store_dword
38 define void @test_addr64(i32 addrspace(1)* %out) {
39 entry:
40 %out.addr = alloca i32 addrspace(1)*, align 4
41
42 store i32 addrspace(1)* %out, i32 addrspace(1)** %out.addr, align 4
43 %ld0 = load i32 addrspace(1)*, i32 addrspace(1)** %out.addr, align 4
44
45 %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %ld0, i32 0
46 store i32 1, i32 addrspace(1)* %arrayidx, align 4
47
48 %ld1 = load i32 addrspace(1)*, i32 addrspace(1)** %out.addr, align 4
49 %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %ld1, i32 1
50 store i32 2, i32 addrspace(1)* %arrayidx1, align 4
51
52 ret void
53 }
0 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
1 ; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
1 ; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
22
33 ; GCN-LABEL: {{^}}madak_f16
44 ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]