llvm.org GIT mirror llvm / c33f9cd
AMDGPU: Fix a few slightly broken tests Fix minor bugs and uses of undef which break when pointer related optimization passes are run. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@269944 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 3 years ago
9 changed file(s) with 72 addition(s) and 65 deletion(s). Raw diff Collapse all Expand all
1010 define amdgpu_ps void @main (<4 x float> inreg %reg0) {
1111 entry:
1212 br label %outer_loop
13
1314 outer_loop:
1415 %cnt = phi i32 [0, %entry], [%cnt_incr, %inner_loop]
1516 %cond = icmp eq i32 %cnt, 16
1617 br i1 %cond, label %outer_loop_body, label %exit
18
1719 outer_loop_body:
1820 %cnt_incr = add i32 %cnt, 1
1921 br label %inner_loop
22
2023 inner_loop:
2124 %cnt2 = phi i32 [0, %outer_loop_body], [%cnt2_incr, %inner_loop_body]
22 %cond2 = icmp eq i32 %cnt2, 16
23 br i1 %cond, label %inner_loop_body, label %outer_loop
25 %n = load volatile i32, i32 addrspace(1)* undef
26 %cond2 = icmp slt i32 %cnt2, %n
27 br i1 %cond2, label %inner_loop_body, label %outer_loop
28
2429 inner_loop_body:
2530 %cnt2_incr = add i32 %cnt2, 1
2631 br label %inner_loop
32
2733 exit:
2834 ret void
2935 }
33 ; GCN-LABEL: {{^}}main:
44 ; SI: v_lshl_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
55 ; VI: v_lshlrev_b32_e64 v{{[0-9]+}}, v{{[0-9]+}}, 1
6 define amdgpu_ps void @main() #0 {
6 define amdgpu_ps void @main(float %arg0, float %arg1) #0 {
77 bb:
8 %tmp = fptosi float undef to i32
8 %tmp = fptosi float %arg0 to i32
99 %tmp1 = call <4 x float> @llvm.SI.image.load.v4i32(<4 x i32> undef, <8 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
1010 %tmp2.f = extractelement <4 x float> %tmp1, i32 0
1111 %tmp2 = bitcast float %tmp2.f to i32
1313 %tmp4 = shl i32 1, %tmp3
1414 %tmp5 = and i32 %tmp2, %tmp4
1515 %tmp6 = icmp eq i32 %tmp5, 0
16 %tmp7 = select i1 %tmp6, float 0.000000e+00, float undef
16 %tmp7 = select i1 %tmp6, float 0.000000e+00, float %arg1
1717 %tmp8 = call i32 @llvm.SI.packf16(float undef, float %tmp7)
1818 %tmp9 = bitcast i32 %tmp8 to float
1919 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float undef, float %tmp9, float undef, float %tmp9)
2323 br i1 %tmp, label %bb4, label %bb6
2424
2525 bb4: ; preds = %bb3
26 %tmp5 = mul i32 undef, %arg
26 %val = load volatile i32, i32 addrspace(1)* undef
27 %tmp5 = mul i32 %val, %arg
2728 br label %bb6
2829
2930 bb6: ; preds = %bb4, %bb3
6161 %tmp2 = shl i32 %6, 2
6262 %tmp3 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %tmp1, i32 %tmp2, i32 64, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
6363 %tmp4 = add i32 %6, 16
64 %tmp5 = bitcast float 0.0 to i32
65 call void @llvm.SI.tbuffer.store.i32(<16 x i8> %tmp1, i32 %tmp5, i32 1, i32 %tmp4, i32 %4, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
64 call void @llvm.SI.tbuffer.store.i32(<16 x i8> %tmp1, i32 %tmp3, i32 1, i32 %tmp4, i32 %4, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
6665 ret void
6766 }
6867
8079 %tmp2 = shl i32 %6, 2
8180 %tmp3 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %tmp1, i32 %tmp2, i32 65, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
8281 %tmp4 = add i32 %6, 16
83 %tmp5 = bitcast float 0.0 to i32
84 call void @llvm.SI.tbuffer.store.i32(<16 x i8> %tmp1, i32 %tmp5, i32 1, i32 %tmp4, i32 %4, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
82 call void @llvm.SI.tbuffer.store.i32(<16 x i8> %tmp1, i32 %tmp3, i32 1, i32 %tmp4, i32 %4, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
8583 ret void
8684 }
8785
9595 ; SI: v_or_b32_e32 v{{[0-9]}}
9696 define void @vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
9797 %loada = load i64, i64 addrspace(1)* %a, align 8
98 %loadb = load i64, i64 addrspace(1)* %a, align 8
98 %loadb = load i64, i64 addrspace(1)* %b, align 8
9999 %or = or i64 %loada, %loadb
100100 store i64 %or, i64 addrspace(1)* %out
101101 ret void
5050 ; SI-DAG: v_cndmask_b32_e64
5151 ; SI-DAG: v_cndmask_b32_e64
5252 ; SI: s_endpgm
53 define void @test_udivrem(i32 addrspace(1)* %out, i32 %x, i32 %y) {
53 define void @test_udivrem(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 %x, i32 %y) {
5454 %result0 = udiv i32 %x, %y
55 store i32 %result0, i32 addrspace(1)* %out
55 store i32 %result0, i32 addrspace(1)* %out0
5656 %result1 = urem i32 %x, %y
57 store i32 %result1, i32 addrspace(1)* %out
57 store i32 %result1, i32 addrspace(1)* %out1
5858 ret void
5959 }
6060
3434 br label %bb3
3535
3636 bb3: ; preds = %bb3, %bb2
37 %tmp4 = icmp eq i32 undef, %arg1
37 %val = load volatile i32, i32 addrspace(2)* undef
38 %tmp4 = icmp eq i32 %val, %arg1
3839 br i1 %tmp4, label %bb5, label %bb3
3940
4041 bb5: ; preds = %bb3, %bb
41 %tmp6 = tail call i32 @llvm.r600.read.tidig.y() #1
42 %tmp6 = tail call i32 @llvm.amdgcn.workitem.id.y() #1
4243 %tmp10 = icmp ult i32 %tmp6, %arg
4344 br i1 %tmp10, label %bb11, label %bb12
4445
5051 }
5152
5253 ; Function Attrs: nounwind readnone
53 declare i32 @llvm.r600.read.tidig.y() #1
54 declare i32 @llvm.amdgcn.workitem.id.y() #1
5455
5556 attributes #1 = { nounwind readnone }
11 ;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=VI --check-prefix=FUNC %s
22 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG --check-prefix=FUNC %s
33
4 ;FUNC-LABEL: {{^}}test_select_v2i32:
4 ; FUNC-LABEL: {{^}}test_select_v2i32:
55
6 ;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
7 ;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
6 ; EG-DAG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW]}}, KC0[3].Z
7 ; EG-DAG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW]}}, KC0[3].Y
88
9 ;SI: v_cndmask_b32_e64
10 ;SI: v_cndmask_b32_e32
9 ; SI: v_cndmask_b32_e64
10 ; SI: v_cndmask_b32_e32
1111
12 define void @test_select_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in0, <2 x i32> addrspace(1)* %in1) {
12 define void @test_select_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in0, <2 x i32> addrspace(1)* %in1, <2 x i32> %val) {
1313 entry:
14 %0 = load <2 x i32>, <2 x i32> addrspace(1)* %in0
15 %1 = load <2 x i32>, <2 x i32> addrspace(1)* %in1
16 %cmp = icmp ne <2 x i32> %0, %1
17 %result = select <2 x i1> %cmp, <2 x i32> %0, <2 x i32> %1
14 %load0 = load <2 x i32>, <2 x i32> addrspace(1)* %in0
15 %load1 = load <2 x i32>, <2 x i32> addrspace(1)* %in1
16 %cmp = icmp sgt <2 x i32> %load0, %load1
17 %result = select <2 x i1> %cmp, <2 x i32> %val, <2 x i32> %load0
1818 store <2 x i32> %result, <2 x i32> addrspace(1)* %out
1919 ret void
2020 }
2121
22 ;FUNC-LABEL: {{^}}test_select_v2f32:
22 ; FUNC-LABEL: {{^}}test_select_v2f32:
2323
24 ;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
25 ;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
24 ; EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
25 ; EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
2626
2727 ;SI: v_cndmask_b32_e64
2828 ;SI: v_cndmask_b32_e32
3939
4040 ;FUNC-LABEL: {{^}}test_select_v4i32:
4141
42 ;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
43 ;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
44 ;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
45 ;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
42 ; EG-DAG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW]}}, KC0[4].X
43 ; EG-DAG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, KC0[3].W
44 ; EG-DAG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW]}}, KC0[3].Z
45 ; EG-DAG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW]}}, KC0[3].Y
4646
4747 ; FIXME: The shrinking does not happen on tonga
4848
49 ;SI: v_cndmask_b32
50 ;SI: v_cndmask_b32
51 ;SI: v_cndmask_b32
52 ;SI: v_cndmask_b32
49 ; SI: v_cndmask_b32
50 ; SI: v_cndmask_b32
51 ; SI: v_cndmask_b32
52 ; SI: v_cndmask_b32
5353
54 define void @test_select_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in0, <4 x i32> addrspace(1)* %in1) {
54 define void @test_select_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in0, <4 x i32> addrspace(1)* %in1, <4 x i32> %val) {
5555 entry:
56 %0 = load <4 x i32>, <4 x i32> addrspace(1)* %in0
57 %1 = load <4 x i32>, <4 x i32> addrspace(1)* %in1
58 %cmp = icmp ne <4 x i32> %0, %1
59 %result = select <4 x i1> %cmp, <4 x i32> %0, <4 x i32> %1
56 %load0 = load <4 x i32>, <4 x i32> addrspace(1)* %in0
57 %load1 = load <4 x i32>, <4 x i32> addrspace(1)* %in1
58 %cmp = icmp sgt <4 x i32> %load0, %load1
59 %result = select <4 x i1> %cmp, <4 x i32> %val, <4 x i32> %load0
6060 store <4 x i32> %result, <4 x i32> addrspace(1)* %out
6161 ret void
6262 }
66
77 ; CHECK-LABEL: @local_cmp_user(
88 ; CHECK: bb11:
9 ; CHECK: %lsr.iv1 = phi i32 [ %lsr.iv.next2, %bb ], [ -2, %entry ]
10 ; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %bb ], [ undef, %entry ]
9 ; CHECK: %lsr.iv1 = phi i32 [ %lsr.iv.next2, %bb ], [ 2, %entry ]
10 ; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %bb ], [ %{{[0-9]+}}, %entry ]
1111 ; CHECK: %lsr.iv.next = add i32 %lsr.iv, -1
12 ; CHECK: %lsr.iv.next2 = add i32 %lsr.iv1, 2
12 ; CHECK: %lsr.iv.next2 = add i32 %lsr.iv1, -2
1313 ; CHECK: br i1
1414
1515 ; CHECK: bb:
16 ; CHECK: %scevgep = getelementptr i8, i8 addrspace(3)* %t, i32 %lsr.iv.next2
17 ; CHECK: %c1 = icmp ult i8 addrspace(3)* %scevgep, undef
18 define void @local_cmp_user() nounwind {
16 ; CHECK: inttoptr i32 %lsr.iv.next2 to i8 addrspace(3)*
17 ; CHECK: %c1 = icmp ne i8 addrspace(3)*
18 define void @local_cmp_user(i32 %arg0) nounwind {
1919 entry:
2020 br label %bb11
2121
2222 bb11:
2323 %i = phi i32 [ 0, %entry ], [ %i.next, %bb ]
2424 %ii = shl i32 %i, 1
25 %c0 = icmp eq i32 %i, undef
25 %c0 = icmp eq i32 %i, %arg0
2626 br i1 %c0, label %bb13, label %bb
2727
2828 bb:
2929 %t = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* undef
3030 %p = getelementptr i8, i8 addrspace(3)* %t, i32 %ii
31 %c1 = icmp ult i8 addrspace(3)* %p, undef
31 %c1 = icmp ne i8 addrspace(3)* %p, null
3232 %i.next = add i32 %i, 1
3333 br i1 %c1, label %bb11, label %bb13
3434
4040 ; CHECK: %lsr.iv1 = phi i64
4141 ; CHECK: %lsr.iv = phi i64
4242 ; CHECK: %lsr.iv.next = add i64 %lsr.iv, -1
43 ; CHECK: %lsr.iv.next2 = add i64 %lsr.iv1, 2
43 ; CHECK: %lsr.iv.next2 = add i64 %lsr.iv1, -2
4444 ; CHECK: br i1
4545
4646 ; CHECK: bb:
47 ; CHECK: %scevgep = getelementptr i8, i8 addrspace(1)* %t, i64 %lsr.iv.next2
48 define void @global_cmp_user() nounwind {
47 ; CHECK: inttoptr i64 %lsr.iv.next2 to i8 addrspace(1)*
48 ; CHECK: icmp ne i8 addrspace(1)* %t
49 define void @global_cmp_user(i64 %arg0) nounwind {
4950 entry:
5051 br label %bb11
5152
5253 bb11:
5354 %i = phi i64 [ 0, %entry ], [ %i.next, %bb ]
5455 %ii = shl i64 %i, 1
55 %c0 = icmp eq i64 %i, undef
56 %c0 = icmp eq i64 %i, %arg0
5657 br i1 %c0, label %bb13, label %bb
5758
5859 bb:
5960 %t = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef
6061 %p = getelementptr i8, i8 addrspace(1)* %t, i64 %ii
61 %c1 = icmp ult i8 addrspace(1)* %p, undef
62 %c1 = icmp ne i8 addrspace(1)* %p, null
6263 %i.next = add i64 %i, 1
6364 br i1 %c1, label %bb11, label %bb13
6465
6869
6970 ; CHECK-LABEL: @global_gep_user(
7071 ; CHECK: %lsr.iv1 = phi i32 [ %lsr.iv.next2, %bb ], [ 0, %entry ]
71 ; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %bb ], [ undef, %entry ]
72 ; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %bb ], [ %{{[0-9]+}}, %entry ]
7273 ; CHECK: %lsr.iv.next = add i32 %lsr.iv, -1
7374 ; CHECK: %lsr.iv.next2 = add i32 %lsr.iv1, 2
7475 ; CHECK: br i1
7576
7677 ; CHECK: bb:
77 ; CHECK: %p = getelementptr i8, i8 addrspace(1)* %t, i32 %lsr.iv1
78 define void @global_gep_user() nounwind {
78 ; CHECK: getelementptr i8, i8 addrspace(1)* %t, i32 %lsr.iv
79 define void @global_gep_user(i32 %arg0) nounwind {
7980 entry:
8081 br label %bb11
8182
8283 bb11:
8384 %i = phi i32 [ 0, %entry ], [ %i.next, %bb ]
8485 %ii = shl i32 %i, 1
85 %c0 = icmp eq i32 %i, undef
86 %c0 = icmp eq i32 %i, %arg0
8687 br i1 %c0, label %bb13, label %bb
8788
8889 bb:
8990 %t = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef
9091 %p = getelementptr i8, i8 addrspace(1)* %t, i32 %ii
91 %c1 = icmp ult i8 addrspace(1)* %p, undef
92 %c1 = icmp ne i8 addrspace(1)* %p, null
9293 %i.next = add i32 %i, 1
9394 br i1 %c1, label %bb11, label %bb13
9495
9899
99100 ; CHECK-LABEL: @global_sext_scale_user(
100101 ; CHECK: %lsr.iv1 = phi i32 [ %lsr.iv.next2, %bb ], [ 0, %entry ]
101 ; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %bb ], [ undef, %entry ]
102 ; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %bb ], [ %{{[0-9]+}}, %entry ]
102103 ; CHECK: %lsr.iv.next = add i32 %lsr.iv, -1
103104 ; CHECK: %lsr.iv.next2 = add i32 %lsr.iv1, 2
104105 ; CHECK: br i1
105106
106107 ; CHECK: bb
107108 ; CHECK: %p = getelementptr i8, i8 addrspace(1)* %t, i64 %ii.ext
108 define void @global_sext_scale_user() nounwind {
109 define void @global_sext_scale_user(i32 %arg0) nounwind {
109110 entry:
110111 br label %bb11
111112
113114 %i = phi i32 [ 0, %entry ], [ %i.next, %bb ]
114115 %ii = shl i32 %i, 1
115116 %ii.ext = sext i32 %ii to i64
116 %c0 = icmp eq i32 %i, undef
117 %c0 = icmp eq i32 %i, %arg0
117118 br i1 %c0, label %bb13, label %bb
118119
119120 bb:
120121 %t = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef
121122 %p = getelementptr i8, i8 addrspace(1)* %t, i64 %ii.ext
122 %c1 = icmp ult i8 addrspace(1)* %p, undef
123 %c1 = icmp ne i8 addrspace(1)* %p, null
123124 %i.next = add i32 %i, 1
124125 br i1 %c1, label %bb11, label %bb13
125126