llvm.org GIT mirror llvm / 2df5f8c
AMDGPU: Increase vcc liveness scan threshold Avoids a test regression in a future patch. Also add debug printing on this case, so I waste less time debugging folds in the future. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@375367 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 1 year, 1 month ago
6 changed file(s) with 12 addition(s) and 16 deletion(s). Raw diff Collapse all Expand all
238238
239239 if ((Fold.isImm() || Fold.isFI() || Fold.isGlobal()) && Fold.needsShrink()) {
240240 MachineBasicBlock *MBB = MI->getParent();
241 auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI);
242 if (Liveness != MachineBasicBlock::LQR_Dead)
241 auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI, 16);
242 if (Liveness != MachineBasicBlock::LQR_Dead) {
243 LLVM_DEBUG(dbgs() << "Not shrinking " << MI << " due to vcc liveness\n");
243244 return false;
245 }
244246
245247 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
246248 int Op32 = Fold.getShrinkOpcode();
244244 ; SI-NEXT: s_mov_b32 s6, s10
245245 ; SI-NEXT: s_mov_b32 s7, s11
246246 ; SI-NEXT: s_movk_i32 s1, 0xff
247 ; SI-NEXT: s_movk_i32 s2, 0x900
248247 ; SI-NEXT: s_waitcnt vmcnt(0)
249248 ; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v0
250249 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
256255 ; SI-NEXT: v_add_i32_e32 v1, vcc, 9, v1
257256 ; SI-NEXT: v_or_b32_e32 v0, v2, v0
258257 ; SI-NEXT: v_and_b32_e32 v1, s1, v1
259 ; SI-NEXT: v_add_i32_e32 v0, vcc, s2, v0
258 ; SI-NEXT: v_add_i32_e32 v0, vcc, 0x900, v0
260259 ; SI-NEXT: v_or_b32_e32 v1, v3, v1
261260 ; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0
262261 ; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
325324 ; SI-NEXT: buffer_load_dword v0, v[0:1], s[12:15], 0 addr64
326325 ; SI-NEXT: s_mov_b32 s16, 0xff00
327326 ; SI-NEXT: s_movk_i32 s17, 0xff
328 ; SI-NEXT: s_movk_i32 s18, 0x900
329327 ; SI-NEXT: s_mov_b32 s10, -1
330328 ; SI-NEXT: s_mov_b32 s8, s4
331329 ; SI-NEXT: s_mov_b32 s9, s5
344342 ; SI-NEXT: v_and_b32_e32 v3, s17, v3
345343 ; SI-NEXT: v_or_b32_e32 v2, v2, v3
346344 ; SI-NEXT: v_and_b32_e32 v1, s17, v1
347 ; SI-NEXT: v_add_i32_e32 v2, vcc, s18, v2
345 ; SI-NEXT: v_add_i32_e32 v2, vcc, 0x900, v2
348346 ; SI-NEXT: v_or_b32_e32 v1, v4, v1
349347 ; SI-NEXT: v_and_b32_e32 v2, 0xffff, v2
350348 ; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
279279 ; SI-NEXT: s_mov_b32 s10, s2
280280 ; SI-NEXT: s_mov_b32 s11, s3
281281 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb
282 ; SI-NEXT: s_movk_i32 s13, 0x900
283282 ; SI-NEXT: s_waitcnt vmcnt(0)
284283 ; SI-NEXT: v_lshrrev_b32_e32 v4, 16, v1
285284 ; SI-NEXT: v_add_i32_e32 v7, vcc, 9, v1
297296 ; SI-NEXT: v_or_b32_e32 v0, v6, v7
298297 ; SI-NEXT: v_lshlrev_b32_e32 v5, 8, v5
299298 ; SI-NEXT: v_and_b32_e32 v1, s12, v4
300 ; SI-NEXT: v_add_i32_e32 v0, vcc, s13, v0
299 ; SI-NEXT: v_add_i32_e32 v0, vcc, 0x900, v0
301300 ; SI-NEXT: v_or_b32_e32 v1, v5, v1
302301 ; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0
303302 ; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
66
77 ; Function Attrs: nounwind
88 ; CHECK-LABEL: {{^}}signed_ds_offset_addressing_loop:
9 ; SI: s_movk_i32 [[K_0X88:s[0-9]+]], 0x
10 ; SI: s_movk_i32 [[K_0X100:s[0-9]+]], 0x100
119 ; CHECK: BB0_1:
1210 ; CHECK: v_add_i32_e32 [[VADDR:v[0-9]+]],
1311 ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR]]
1513 ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR8]]
1614 ; SI-DAG: v_add_i32_e32 [[VADDR0x80:v[0-9]+]], vcc, 0x80, [[VADDR]]
1715 ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x80]]
18 ; SI-DAG: v_add_i32_e32 [[VADDR0x88:v[0-9]+]], vcc, [[K_0X88]], [[VADDR]]
16 ; SI-DAG: v_add_i32_e32 [[VADDR0x88:v[0-9]+]], vcc, 0x88, [[VADDR]]
1917 ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x88]]
20 ; SI-DAG: v_add_i32_e32 [[VADDR0x100:v[0-9]+]], vcc, [[K_0X100]], [[VADDR]]
18 ; SI-DAG: v_add_i32_e32 [[VADDR0x100:v[0-9]+]], vcc, 0x100, [[VADDR]]
2119 ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x100]]
2220
2321 ; CI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset1:2
5353 }
5454
5555 ; GCN-LABEL: {{^}}test_global
56 ; GCN: s_movk_i32 [[K:s[0-9]+]], 0x888
57 ; GCN: v_add_u32_e32 v{{[0-9]+}}, vcc, [[K]], v{{[0-9]+}}
56 ; GCN: v_add_u32_e32 v{{[0-9]+}}, vcc, 0x888, v{{[0-9]+}}
5857 ; GCN: flat_store_dword
5958 ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
6059 ; GCN-NEXT: s_barrier
7979 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048
8080 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
8181 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048
82 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
83 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048
8482 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-4096
8583 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048
84 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
85 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048
8686 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off
8787 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-4096
8888 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off