llvm.org GIT mirror llvm / 25e2616
Merging r280589: ------------------------------------------------------------------------ r280589 | nhaehnle | 2016-09-03 05:26:32 -0700 (Sat, 03 Sep 2016) | 19 lines AMDGPU: Fix an interaction between WQM and polygon stippling Summary: This fixes a rare bug in polygon stippling with non-monolithic pixel shaders. The underlying problem is as follows: the prolog part contains the polygon stippling sequence, i.e. a kill. The main part then enables WQM based on the _reduced_ exec mask, effectively undoing most of the polygon stippling. Since we cannot know whether polygon stippling will be used, the main part of a non-monolithic shader must always return to exact mode to fix this problem. Reviewers: arsenm, tstellarAMD, mareko Subscribers: arsenm, llvm-commits, kzhuravl Differential Revision: https://reviews.llvm.org/D23131 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_39@288105 91177308-0d34-0410-b5e6-96231b3b80d8 Tom Stellard 2 years ago
3 changed file(s) with 46 addition(s) and 11 deletion(s). Raw diff Collapse all Expand all
20282028 let hasSideEffects = 1;
20292029 let SALU = 1;
20302030 let hasNoSchedulingInfo = 1;
2031 let DisableWQM = 1;
20312032 }
20322033
20332034 let Uses = [EXEC], Defs = [EXEC, VCC, M0],
218218 markInstruction(MI, Flags, Worklist);
219219 GlobalFlags |= Flags;
220220 }
221
222 if (WQMOutputs && MBB.succ_empty()) {
223 // This is a prolog shader. Make sure we go back to exact mode at the end.
224 Blocks[&MBB].OutNeeds = StateExact;
225 Worklist.push_back(&MBB);
226 GlobalFlags |= StateExact;
227 }
228221 }
229222
230223 return GlobalFlags;
1616 ;CHECK-LABEL: {{^}}test2:
1717 ;CHECK-NEXT: ; %main_body
1818 ;CHECK-NEXT: s_wqm_b64 exec, exec
19 ;CHECK: image_sample
2019 ;CHECK-NOT: exec
21 ;CHECK: _load_dword v0,
22 define amdgpu_ps float @test2(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, <4 x i32> %c) {
20 define amdgpu_ps void @test2(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, <4 x i32> %c) {
2321 main_body:
2422 %c.1 = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %c, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
2523 %c.2 = bitcast <4 x float> %c.1 to <4 x i32>
2624 %c.3 = extractelement <4 x i32> %c.2, i32 0
2725 %gep = getelementptr float, float addrspace(1)* %ptr, i32 %c.3
2826 %data = load float, float addrspace(1)* %gep
29 ret float %data
27
28 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %data, float undef, float undef, float undef)
29
30 ret void
3031 }
3132
3233 ; ... but disabled for stores (and, in this simple case, not re-enabled).
413414 ret void
414415 }
415416
417 ; Must return to exact at the end of a non-void returning shader,
418 ; otherwise the EXEC mask exported by the epilog will be wrong. This is true
419 ; even if the shader has no kills, because a kill could have happened in a
420 ; previous shader fragment.
421 ;
422 ; CHECK-LABEL: {{^}}test_nonvoid_return:
423 ; CHECK: s_mov_b64 [[LIVE:s\[[0-9]+:[0-9]+\]]], exec
424 ; CHECK: s_wqm_b64 exec, exec
425 ;
426 ; CHECK: s_and_b64 exec, exec, [[LIVE]]
427 ; CHECK-NOT: exec
428 define amdgpu_ps <4 x float> @test_nonvoid_return() nounwind {
429 %tex = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
430 %tex.i = bitcast <4 x float> %tex to <4 x i32>
431 %dtex = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %tex.i, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
432 ret <4 x float> %dtex
433 }
434
435 ; CHECK-LABEL: {{^}}test_nonvoid_return_unreachable:
436 ; CHECK: s_mov_b64 [[LIVE:s\[[0-9]+:[0-9]+\]]], exec
437 ; CHECK: s_wqm_b64 exec, exec
438 ;
439 ; CHECK: s_and_b64 exec, exec, [[LIVE]]
440 ; CHECK-NOT: exec
441 define amdgpu_ps <4 x float> @test_nonvoid_return_unreachable(i32 inreg %c) nounwind {
442 entry:
443 %tex = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
444 %tex.i = bitcast <4 x float> %tex to <4 x i32>
445 %dtex = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %tex.i, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
446
447 %cc = icmp sgt i32 %c, 0
448 br i1 %cc, label %if, label %else
449
450 if:
451 store volatile <4 x float> %dtex, <4 x float>* undef
452 unreachable
453
454 else:
455 ret <4 x float> %dtex
456 }
416457
417458 declare void @llvm.amdgcn.image.store.v4i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
418459 declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) #1