llvm.org GIT mirror llvm / 8cea59c
AMDGPU/InsertWaitcnts: Update VGPR/SGPR bounds when brackets are merged Summary: Fix an issue where VGPR/SGPR bounds are not properly extended when brackets are merged. This manifests as missing waitcnt insertions when multiple brackets are forwarded to a successor block and the first forward has lower VGPR/SGPR bounds. Irreducible loop test has been extended based on a CTS failure detected for GFX9. Reviewers: nhaehnle Reviewed By: nhaehnle Subscribers: arsenm, kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, jfb, llvm-commits Differential Revision: https://reviews.llvm.org/D55602 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@349611 91177308-0d34-0410-b5e6-96231b3b80d8 Carl Ritson 10 months ago
2 changed file(s) with 67 addition(s) and 8 deletion(s). Raw diff Collapse all Expand all
12151215 StrictDom = true;
12161216 }
12171217
1218 VgprUB = std::max(getMaxVGPR(), Other.getMaxVGPR());
1219 SgprUB = std::max(getMaxSGPR(), Other.getMaxSGPR());
1220
12181221 return StrictDom;
12191222 }
12201223
None # RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefixes=GCN %s
0 # RUN: llc -march=amdgcn -mcpu=gfx803 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefixes=GCN,GFX8 %s
1 # RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefixes=GCN,GFX9 %s
12
2 # GCN-LABEL: name: irreducible_loop{{$}}
3 # GCN: S_LOAD_DWORDX4_IMM
4 # GCN: S_WAITCNT 127{{$}}
5 # GCN: S_BUFFER_LOAD_DWORD_IMM
6 # GCN: S_WAITCNT 127{{$}}
7 # GCN: S_CMP_GE_I32
83 --- |
94
105 define amdgpu_ps void @irreducible_loop() {
11 main:
6 ret void
7 }
8 define amdgpu_ps void @irreducible_loop_extended() {
129 ret void
1310 }
1411
1512 ...
1613 ---
14
15 # GCN-LABEL: name: irreducible_loop{{$}}
16 # GCN: S_LOAD_DWORDX4_IMM
17 # GFX8: S_WAITCNT 127{{$}}
18 # GFX9: S_WAITCNT 49279{{$}}
19 # GCN: S_BUFFER_LOAD_DWORD_IMM
20 # GFX8: S_WAITCNT 127{{$}}
21 # GFX9: S_WAITCNT 49279{{$}}
22 # GCN: S_CMP_GE_I32
1723 name: irreducible_loop
1824 body: |
1925 bb.0:
4450 S_ENDPGM
4551
4652 ...
53
54 # GCN-LABEL: name: irreducible_loop_extended
55
56 # GCN: S_LOAD_DWORDX4_IMM
57 # GFX8: S_WAITCNT 127{{$}}
58 # GFX9: S_WAITCNT 49279{{$}}
59 # GCN: BUFFER_STORE_DWORD_OFFEN_exact
60 # GFX8: S_WAITCNT 127{{$}}
61 # GFX9: S_WAITCNT 49279{{$}}
62 # GCN: BUFFER_STORE_DWORD_OFFEN_exact
63 # GCN: S_LOAD_DWORDX4_IMM
64 # GFX8: S_WAITCNT 127{{$}}
65 # GFX9: S_WAITCNT 49279{{$}}
66 # GCN: BUFFER_ATOMIC_ADD_OFFSET_RTN
67 # GCN: S_WAITCNT 3952
68 # GCN: FLAT_STORE_DWORD
69 # GCN: S_ENDPGM
70 name: irreducible_loop_extended
71
72 body: |
73 bb.0:
74 successors: %bb.1, %bb.2
75 $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM renamable $sgpr2_sgpr3, 0, 0
76 S_CBRANCH_VCCZ %bb.2, implicit $vcc
77
78 bb.1:
79 successors: %bb.2
80 BUFFER_STORE_DWORD_OFFEN_exact killed renamable $vgpr3, renamable $vgpr2, renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
81
82 bb.2:
83 successors: %bb.3, %bb.6
84 S_CBRANCH_VCCNZ %bb.6, implicit $vcc
85
86 bb.3:
87 successors: %bb.4, %bb.5
88 BUFFER_STORE_DWORD_OFFEN_exact killed renamable $vgpr3, killed renamable $vgpr2, killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
89 S_CBRANCH_VCCNZ %bb.5, implicit $vcc
90
91 bb.4:
92 successors: %bb.5
93 renamable $sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM killed renamable $sgpr2_sgpr3, 64, 0
94 renamable $vgpr2 = BUFFER_ATOMIC_ADD_OFFSET_RTN killed renamable $vgpr2, killed renamable $sgpr12_sgpr13_sgpr14_sgpr15, 0, 0, 0, implicit $exec
95
96 bb.5:
97 successors: %bb.6
98
99 bb.6:
100 FLAT_STORE_DWORD $vgpr3_vgpr4, $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr
101 S_ENDPGM
102 ...