llvm.org GIT mirror llvm / efccb7d
Merging r277504: ------------------------------------------------------------------------ r277504 | nhaehnle | 2016-08-02 12:31:14 -0700 (Tue, 02 Aug 2016) | 21 lines AMDGPU: Stay in WQM for non-intrinsic stores Summary: Two types of stores are possible in pixel shaders: stores to memory that are explicitly requested at the API level, and stores that are an implementation detail of register spilling or lowering of arrays. For the first kind of store, we must ensure that helper pixels have no effect and hence WQM must be disabled. The second kind of store must always be executed, because the written value may be loaded again in a way that is relevant for helper pixels as well -- and there are no externally visible effects anyway. This is a candidate for the 3.9 release branch. Reviewers: arsenm, tstellarAMD, mareko Subscribers: arsenm, kzhuravl, llvm-commits Differential Revision: https://reviews.llvm.org/D22675 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_39@288104 91177308-0d34-0410-b5e6-96231b3b80d8 Tom Stellard 2 years ago
1 changed file(s) with 41 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
373373 ret <4 x float> %c.iv
374374 }
375375
376 ; Only intrinsic stores need exact execution -- other stores do not have
377 ; externally visible effects and may require WQM for correctness.
378 ;
379 ; CHECK-LABEL: {{^}}test_alloca:
380 ; CHECK: s_mov_b64 [[LIVE:s\[[0-9]+:[0-9]+\]]], exec
381 ; CHECK: s_wqm_b64 exec, exec
382
383 ; CHECK: s_and_b64 exec, exec, [[LIVE]]
384 ; CHECK: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0
385 ; CHECK: s_wqm_b64 exec, exec
386 ; CHECK: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen
387 ; CHECK: s_and_b64 exec, exec, [[LIVE]]
388 ; CHECK: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 idxen
389 ; CHECK: s_wqm_b64 exec, exec
390 ; CHECK: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen
391
392 ; CHECK: image_sample
393 ; CHECK: s_and_b64 exec, exec, [[LIVE]]
394 ; CHECK: buffer_store_dwordx4
395 define amdgpu_ps void @test_alloca(float %data, i32 %a, i32 %idx) nounwind {
396 entry:
397 %array = alloca [32 x i32], align 4
398
399 call void @llvm.amdgcn.buffer.store.f32(float %data, <4 x i32> undef, i32 0, i32 0, i1 0, i1 0)
400
401 %s.gep = getelementptr [32 x i32], [32 x i32]* %array, i32 0, i32 0
402 store volatile i32 %a, i32* %s.gep, align 4
403
404 call void @llvm.amdgcn.buffer.store.f32(float %data, <4 x i32> undef, i32 1, i32 0, i1 0, i1 0)
405
406 %c.gep = getelementptr [32 x i32], [32 x i32]* %array, i32 0, i32 %idx
407 %c = load i32, i32* %c.gep, align 4
408
409 %t = call <4 x float> @llvm.SI.image.sample.i32(i32 %c, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
410
411 call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %t, <4 x i32> undef, i32 0, i32 0, i1 0, i1 0)
412
413 ret void
414 }
415
416
376417 declare void @llvm.amdgcn.image.store.v4i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
377418 declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) #1
378419 declare void @llvm.amdgcn.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #1