llvm.org GIT mirror llvm / e066e58
AMDGPU: Fix verifier error from partially undef copy In this situation: %VGPR2<def> = BUFFER_LOAD_DWORD_OFFSET %SGPR8_SGPR9_SGPR10_SGPR11, %VGPR7<def,tied3> = V_MAC_F32_e32 %VGPR0<undef>, %VGPR1<kill>, %VGPR7<kill,tied0>, %EXEC<imp-use> %VGPR3_VGPR4_VGPR5_VGPR6<def> = COPY %VGPR0_VGPR1_VGPR2_VGPR3 %VGPR4<def> = COPY %VGPR2 The copy for VGPR1 -> VGPR4 was an error from reading undefined VGPR1, but VGPR4 is defined immediately after this copy. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@275635 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 4 years ago
2 changed file(s) with 70 addition(s) and 8 deletion(s). Raw diff Collapse all Expand all
389389
390390 unsigned Opcode;
391391 ArrayRef SubIndices;
392 bool Forward;
393392
394393 if (AMDGPU::SReg_32RegClass.contains(DestReg)) {
395394 assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
473472 llvm_unreachable("Can't copy register!");
474473 }
475474
476 if (RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg))
477 Forward = true;
478 else
479 Forward = false;
475 bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg);
480476
481477 for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) {
482478 unsigned SubIdx;
495491
496492 if (Idx == 0)
497493 Builder.addReg(DestReg, RegState::Define | RegState::Implicit);
494
495 Builder.addReg(SrcReg, RegState::Implicit);
498496 }
499497 }
500498
None ; RUN: llc -verify-machineinstrs -o /dev/null %s
0 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
11 ; We may have subregister live ranges that are undefined on some paths. The
22 ; verifier should not complain about this.
3 target triple = "amdgcn--"
43
5 define void @func() {
4
5 ; CHECK-LABEL: {{^}}func:
6 define void @func() #0 {
67 B0:
78 br i1 undef, label %B1, label %B2
89
2324 store float %ve0, float addrspace(3)* undef, align 4
2425 ret void
2526 }
27
28 ; FIXME: Extra undef subregister copy should be removed before
29 ; overwritten with defined copy
30 ; CHECK-LABEL: {{^}}valley_partially_undef_copy:
31 define amdgpu_ps float @valley_partially_undef_copy() #0 {
32 bb:
33 %tmp = load volatile i32, i32 addrspace(1)* undef, align 4
34 %tmp1 = load volatile i32, i32 addrspace(1)* undef, align 4
35 %tmp2 = insertelement <4 x i32> undef, i32 %tmp1, i32 0
36 %tmp3 = insertelement <4 x i32> %tmp2, i32 %tmp1, i32 1
37 %tmp4 = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %tmp3, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
38 %tmp5 = extractelement <4 x float> %tmp4, i32 0
39 %tmp6 = fmul float %tmp5, undef
40 %tmp7 = fadd float %tmp6, %tmp6
41 %tmp8 = insertelement <4 x i32> %tmp2, i32 %tmp, i32 1
42 store <4 x i32> %tmp8, <4 x i32> addrspace(1)* undef, align 16
43 store float %tmp7, float addrspace(1)* undef, align 4
44 br label %bb9
45
46 bb9: ; preds = %bb9, %bb
47 %tmp10 = icmp eq i32 %tmp, 0
48 br i1 %tmp10, label %bb9, label %bb11
49
50 bb11: ; preds = %bb9
51 store <4 x i32> %tmp2, <4 x i32> addrspace(1)* undef, align 16
52 ret float undef
53 }
54
55 ; FIXME: Should be able to remove the undef copies
56
57 ; CHECK-LABEL: {{^}}partially_undef_copy:
58 ; CHECK: v_mov_b32_e32 v5, 5
59 ; CHECK: v_mov_b32_e32 v6, 6
60
61 ; CHECK: v_mov_b32_e32 v[[OUTPUT_LO:[0-9]+]], v5
62
63 ; Undef copy
64 ; CHECK: v_mov_b32_e32 v1, v6
65
66 ; undef copy
67 ; CHECK: v_mov_b32_e32 v2, v7
68
69 ; CHECK: v_mov_b32_e32 v[[OUTPUT_HI:[0-9]+]], v8
70 ; CHECK: v_mov_b32_e32 v[[OUTPUT_LO]], v6
71
72 ; CHECK: buffer_store_dwordx4 v{{\[}}[[OUTPUT_LO]]:[[OUTPUT_HI]]{{\]}}
73 define void @partially_undef_copy() #0 {
74 %tmp0 = call i32 asm sideeffect "v_mov_b32_e32 v5, 5", "={VGPR5}"()
75 %tmp1 = call i32 asm sideeffect "v_mov_b32_e32 v6, 6", "={VGPR6}"()
76
77 %partially.undef.0 = insertelement <4 x i32> undef, i32 %tmp0, i32 0
78 %partially.undef.1 = insertelement <4 x i32> %partially.undef.0, i32 %tmp1, i32 0
79
80 store volatile <4 x i32> %partially.undef.1, <4 x i32> addrspace(1)* undef, align 16
81 tail call void asm sideeffect "v_nop", "v={VGPR5_VGPR6_VGPR7_VGPR8}"(<4 x i32> %partially.undef.0)
82 ret void
83 }
84
85 declare <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
86 declare float @llvm.SI.image.sample.i32(i32, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
87
88 attributes #0 = { nounwind }
89 attributes #1 = { nounwind readnone }