llvm.org GIT mirror llvm / 25c1eb0
Merging r277500: ------------------------------------------------------------------------ r277500 | nhaehnle | 2016-08-02 12:17:37 -0700 (Tue, 02 Aug 2016) | 18 lines AMDGPU: Track physical registers in SIWholeQuadMode Summary: There are cases where uniform branch conditions are computed in VGPRs, and we didn't correctly mark those as WQM. The stray change in basic-branch.ll is because invoking the LiveIntervals analysis leads to the detection of a dead register that would otherwise not be seen at -O0. This is a candidate for the 3.9 branch, as it fixes a possible hang. Reviewers: arsenm, tstellarAMD, mareko Subscribers: arsenm, llvm-commits, kzhuravl Differential Revision: https://reviews.llvm.org/D22673 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_39@288103 91177308-0d34-0410-b5e6-96231b3b80d8 Tom Stellard 2 years ago
1 changed file(s) with 0 addition(s) and 41 deletion(s). Raw diff Collapse all Expand all
373373 ret <4 x float> %c.iv
374374 }
375375
376 ; Only intrinsic stores need exact execution -- other stores do not have
377 ; externally visible effects and may require WQM for correctness.
378 ;
379 ; CHECK-LABEL: {{^}}test_alloca:
380 ; CHECK: s_mov_b64 [[LIVE:s\[[0-9]+:[0-9]+\]]], exec
381 ; CHECK: s_wqm_b64 exec, exec
382
383 ; CHECK: s_and_b64 exec, exec, [[LIVE]]
384 ; CHECK: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0
385 ; CHECK: s_wqm_b64 exec, exec
386 ; CHECK: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen
387 ; CHECK: s_and_b64 exec, exec, [[LIVE]]
388 ; CHECK: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 idxen
389 ; CHECK: s_wqm_b64 exec, exec
390 ; CHECK: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen
391
392 ; CHECK: image_sample
393 ; CHECK: s_and_b64 exec, exec, [[LIVE]]
394 ; CHECK: buffer_store_dwordx4
395 define amdgpu_ps void @test_alloca(float %data, i32 %a, i32 %idx) nounwind {
396 entry:
397 %array = alloca [32 x i32], align 4
398
399 call void @llvm.amdgcn.buffer.store.f32(float %data, <4 x i32> undef, i32 0, i32 0, i1 0, i1 0)
400
401 %s.gep = getelementptr [32 x i32], [32 x i32]* %array, i32 0, i32 0
402 store volatile i32 %a, i32* %s.gep, align 4
403
404 call void @llvm.amdgcn.buffer.store.f32(float %data, <4 x i32> undef, i32 1, i32 0, i1 0, i1 0)
405
406 %c.gep = getelementptr [32 x i32], [32 x i32]* %array, i32 0, i32 %idx
407 %c = load i32, i32* %c.gep, align 4
408
409 %t = call <4 x float> @llvm.SI.image.sample.i32(i32 %c, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
410
411 call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %t, <4 x i32> undef, i32 0, i32 0, i1 0, i1 0)
412
413 ret void
414 }
415
416
417376 declare void @llvm.amdgcn.image.store.v4i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
418377 declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) #1
419378 declare void @llvm.amdgcn.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #1