llvm.org GIT mirror llvm / beff7fe
AMDGPU: Fix not expanding control flow after some kill blocks Also stop trying to insert skip blocks at end_cf. This was inserting them at the end of the block which doesn't make sense. The skip should be inserted at the beginning of the block right after the end cf. Just remove this for now since no tests seem to stress this and I think this can be handled more generally later. Fixes bug 28550 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@275510 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 4 years ago
2 changed file(s) with 51 addition(s) and 7 deletion(s). Raw diff Collapse all Expand all
747747 case AMDGPU::SI_END_CF:
748748 if (--Depth == 0 && HaveKill) {
749749 HaveKill = false;
750
751 if (skipIfDead(MI, *NextBB)) {
752 NextBB = std::next(BI);
753 BE = MF.end();
754 Next = MBB.end();
755 }
750 // TODO: Insert skip if exec is 0?
756751 }
752
757753 EndCf(MI);
758754 break;
759755
762758 if (skipIfDead(MI, *NextBB)) {
763759 NextBB = std::next(BI);
764760 BE = MF.end();
765 Next = MBB.end();
766761 }
767762 } else
768763 HaveKill = true;
335335 ret void
336336 }
337337
338 ; CHECK-LABEL: {{^}}if_after_kill_block:
339 ; CHECK: ; BB#0:
340 ; CHECK: s_and_saveexec_b64
341 ; CHECK: s_xor_b64
342 ; CHECK-NEXT: mask branch [[BB4:BB[0-9]+_[0-9]+]]
343
344 ; CHECK: v_cmpx_le_f32_e32 vcc, 0,
345 ; CHECK: [[BB4]]:
346 ; CHECK: s_or_b64 exec, exec
347 ; CHECK: image_sample_c
348
349 ; CHECK: v_cmp_neq_f32_e32 vcc, 0,
350 ; CHECK: s_and_b64 exec, exec,
351 ; CHECK: s_and_saveexec_b64 s{{\[[0-9]+:[0-9]+\]}}, vcc
352 ; CHECK: s_xor_b64 s{{\[[0-9]+:[0-9]+\]}}, exec
353 ; CHECK: mask branch [[END:BB[0-9]+_[0-9]+]]
354 ; CHECK-NOT: branch
355
356 ; CHECK: ; BB#3: ; %bb8
357 ; CHECK: buffer_store_dword
358
359 ; CHECK: [[END]]:
360 ; CHECK: s_or_b64 exec, exec
361 ; CHECK: s_endpgm
362 define amdgpu_ps void @if_after_kill_block(float %arg, float %arg1, <4 x i32> %arg2) #0 {
363 bb:
364 %tmp = fcmp ult float %arg1, 0.000000e+00
365 br i1 %tmp, label %bb3, label %bb4
366
367 bb3: ; preds = %bb
368 call void @llvm.AMDGPU.kill(float %arg)
369 br label %bb4
370
371 bb4: ; preds = %bb3, %bb
372 %tmp5 = call <4 x float> @llvm.SI.image.sample.c.v4i32(<4 x i32> %arg2, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
373 %tmp6 = extractelement <4 x float> %tmp5, i32 0
374 %tmp7 = fcmp une float %tmp6, 0.000000e+00
375 br i1 %tmp7, label %bb8, label %bb9
376
377 bb8: ; preds = %bb9, %bb4
378 store volatile i32 9, i32 addrspace(1)* undef
379 ret void
380
381 bb9: ; preds = %bb4
382 ret void
383 }
384
338385 declare void @llvm.AMDGPU.kill(float) #0
386 declare <4 x float> @llvm.SI.image.sample.c.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
339387
340388 attributes #0 = { nounwind }
389 attributes #1 = { nounwind readnone }