llvm.org GIT mirror llvm / 0c05ce4
AMDGPU: Disable AMDGPUPromoteAlloca pass for shader calling conventions. Summary: The work item intrinsics are not available for the shader calling conventions. And even if we did hook them up most shader stages haves some extra restrictions on the amount of available LDS. Reviewers: tstellarAMD, arsenm Subscribers: nhaehnle, arsenm, llvm-commits, kzhuravl Differential Revision: https://reviews.llvm.org/D20728 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@275779 91177308-0d34-0410-b5e6-96231b3b80d8 Nicolai Haehnle 4 years ago
2 changed file(s) with 35 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
648648
649649 const Function &ContainingFunction = *I.getParent()->getParent();
650650
651 // Don't promote the alloca to LDS for shader calling conventions as the work
652 // item ID intrinsics are not supported for these calling conventions.
653 // Furthermore not all LDS is available for some of the stages.
654 if (AMDGPU::isShader(ContainingFunction.getCallingConv()))
655 return;
656
651657 // FIXME: We should also try to get this value from the reqd_work_group_size
652658 // function attribute if it is available.
653659 unsigned WorkGroupSize = AMDGPU::getMaximumWorkGroupSize(ContainingFunction);
0 ; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-promote-alloca < %s | FileCheck -check-prefix=IR %s
1 ; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=ASM %s
2
3 ; IR-LABEL: define amdgpu_vs void @promote_alloca_shaders(i32 addrspace(1)* inreg %out, i32 addrspace(1)* inreg %in) #0 {
4 ; IR: alloca [5 x i32]
5 ; ASM-LABEL: {{^}}promote_alloca_shaders:
6 ; ASM: ; LDSByteSize: 0 bytes/workgroup (compile time only)
7
8 define amdgpu_vs void @promote_alloca_shaders(i32 addrspace(1)* inreg %out, i32 addrspace(1)* inreg %in) #0 {
9 entry:
10 %stack = alloca [5 x i32], align 4
11 %tmp0 = load i32, i32 addrspace(1)* %in, align 4
12 %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp0
13 store i32 4, i32* %arrayidx1, align 4
14 %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
15 %tmp1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
16 %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp1
17 store i32 5, i32* %arrayidx3, align 4
18 %arrayidx4 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0
19 %tmp2 = load i32, i32* %arrayidx4, align 4
20 store i32 %tmp2, i32 addrspace(1)* %out, align 4
21 %arrayidx5 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1
22 %tmp3 = load i32, i32* %arrayidx5
23 %arrayidx6 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
24 store i32 %tmp3, i32 addrspace(1)* %arrayidx6
25 ret void
26 }
27
28 attributes #0 = { nounwind "amdgpu-max-work-group-size"="64" }