llvm.org GIT mirror llvm / d3708b9
Merging r359899: ------------------------------------------------------------------------ r359899 | arsenm | 2019-05-03 08:37:07 -0700 (Fri, 03 May 2019) | 7 lines AMDGPU: Select VOP3 form of sub The VOP3 form should always be the preferred selection form to be shrunk later. The r600 sub test needs to be split out because it asserts on the arguments in the new test during the calling convention lowering. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_80@362654 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 2 months ago
3 changed file(s) with 199 addition(s) and 55 deletion(s). Raw diff Collapse all Expand all
515515
516516 let SubtargetPredicate = HasAddNoCarryInsts in {
517517 def : DivergentBinOp;
518 def : DivergentBinOp;
519 def : DivergentBinOp;
518
519 def : DivergentBinOp;
520520 }
521521
522522
523523 def : DivergentBinOp;
524
525 def : DivergentBinOp;
526 def : DivergentBinOp32>;
524 def : DivergentBinOp64>;
527525
528526 def : DivergentBinOp;
529527
0 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=EG,FUNC %s
1
2 declare i32 @llvm.r600.read.tidig.x() readnone
3
4 ; FUNC-LABEL: {{^}}s_sub_i32:
5 define amdgpu_kernel void @s_sub_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
6 %result = sub i32 %a, %b
7 store i32 %result, i32 addrspace(1)* %out
8 ret void
9 }
10
11 ; FUNC-LABEL: {{^}}s_sub_imm_i32:
12 define amdgpu_kernel void @s_sub_imm_i32(i32 addrspace(1)* %out, i32 %a) {
13 %result = sub i32 1234, %a
14 store i32 %result, i32 addrspace(1)* %out
15 ret void
16 }
17
18 ; FUNC-LABEL: {{^}}test_sub_i32:
19 ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
20 define amdgpu_kernel void @test_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
21 %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
22 %a = load i32, i32 addrspace(1)* %in
23 %b = load i32, i32 addrspace(1)* %b_ptr
24 %result = sub i32 %a, %b
25 store i32 %result, i32 addrspace(1)* %out
26 ret void
27 }
28
29 ; FUNC-LABEL: {{^}}test_sub_imm_i32:
30 ; EG: SUB_INT
31 define amdgpu_kernel void @test_sub_imm_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
32 %a = load i32, i32 addrspace(1)* %in
33 %result = sub i32 123, %a
34 store i32 %result, i32 addrspace(1)* %out
35 ret void
36 }
37
38 ; FUNC-LABEL: {{^}}test_sub_v2i32:
39 ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
40 ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
41 define amdgpu_kernel void @test_sub_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
42 %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
43 %a = load <2 x i32>, <2 x i32> addrspace(1) * %in
44 %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr
45 %result = sub <2 x i32> %a, %b
46 store <2 x i32> %result, <2 x i32> addrspace(1)* %out
47 ret void
48 }
49
50 ; FUNC-LABEL: {{^}}test_sub_v4i32:
51 ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
52 ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
53 ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
54 ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
55 define amdgpu_kernel void @test_sub_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
56 %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
57 %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
58 %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
59 %result = sub <4 x i32> %a, %b
60 store <4 x i32> %result, <4 x i32> addrspace(1)* %out
61 ret void
62 }
63
64 ; FUNC-LABEL: {{^}}test_sub_i16:
65 define amdgpu_kernel void @test_sub_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
66 %tid = call i32 @llvm.r600.read.tidig.x()
67 %gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid
68 %b_ptr = getelementptr i16, i16 addrspace(1)* %gep, i32 1
69 %a = load volatile i16, i16 addrspace(1)* %gep
70 %b = load volatile i16, i16 addrspace(1)* %b_ptr
71 %result = sub i16 %a, %b
72 store i16 %result, i16 addrspace(1)* %out
73 ret void
74 }
75
76 ; FUNC-LABEL: {{^}}test_sub_v2i16:
77 define amdgpu_kernel void @test_sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
78 %tid = call i32 @llvm.r600.read.tidig.x()
79 %gep = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %in, i32 %tid
80 %b_ptr = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %gep, i16 1
81 %a = load <2 x i16>, <2 x i16> addrspace(1)* %gep
82 %b = load <2 x i16>, <2 x i16> addrspace(1)* %b_ptr
83 %result = sub <2 x i16> %a, %b
84 store <2 x i16> %result, <2 x i16> addrspace(1)* %out
85 ret void
86 }
87
88 ; FUNC-LABEL: {{^}}test_sub_v4i16:
89 define amdgpu_kernel void @test_sub_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
90 %tid = call i32 @llvm.r600.read.tidig.x()
91 %gep = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %in, i32 %tid
92 %b_ptr = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %gep, i16 1
93 %a = load <4 x i16>, <4 x i16> addrspace(1) * %gep
94 %b = load <4 x i16>, <4 x i16> addrspace(1) * %b_ptr
95 %result = sub <4 x i16> %a, %b
96 store <4 x i16> %result, <4 x i16> addrspace(1)* %out
97 ret void
98 }
99
100 ; FUNC-LABEL: {{^}}s_sub_i64:
101 ; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY
102 ; EG-DAG: SUB_INT {{[* ]*}}
103 ; EG-DAG: SUBB_UINT
104 ; EG-DAG: SUB_INT
105 ; EG-DAG: SUB_INT {{[* ]*}}
106 define amdgpu_kernel void @s_sub_i64(i64 addrspace(1)* noalias %out, i64 %a, i64 %b) nounwind {
107 %result = sub i64 %a, %b
108 store i64 %result, i64 addrspace(1)* %out, align 8
109 ret void
110 }
111
112 ; FUNC-LABEL: {{^}}v_sub_i64:
113 ; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY
114 ; EG-DAG: SUB_INT {{[* ]*}}
115 ; EG-DAG: SUBB_UINT
116 ; EG-DAG: SUB_INT
117 ; EG-DAG: SUB_INT {{[* ]*}}
118 define amdgpu_kernel void @v_sub_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %inA, i64 addrspace(1)* noalias %inB) nounwind {
119 %tid = call i32 @llvm.r600.read.tidig.x() readnone
120 %a_ptr = getelementptr i64, i64 addrspace(1)* %inA, i32 %tid
121 %b_ptr = getelementptr i64, i64 addrspace(1)* %inB, i32 %tid
122 %a = load i64, i64 addrspace(1)* %a_ptr
123 %b = load i64, i64 addrspace(1)* %b_ptr
124 %result = sub i64 %a, %b
125 store i64 %result, i64 addrspace(1)* %out, align 8
126 ret void
127 }
128
129 ; FUNC-LABEL: {{^}}v_test_sub_v2i64:
130 define amdgpu_kernel void @v_test_sub_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* noalias %inA, <2 x i64> addrspace(1)* noalias %inB) {
131 %tid = call i32 @llvm.r600.read.tidig.x() readnone
132 %a_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %inA, i32 %tid
133 %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %inB, i32 %tid
134 %a = load <2 x i64>, <2 x i64> addrspace(1)* %a_ptr
135 %b = load <2 x i64>, <2 x i64> addrspace(1)* %b_ptr
136 %result = sub <2 x i64> %a, %b
137 store <2 x i64> %result, <2 x i64> addrspace(1)* %out
138 ret void
139 }
140
141 ; FUNC-LABEL: {{^}}v_test_sub_v4i64:
142 define amdgpu_kernel void @v_test_sub_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* noalias %inA, <4 x i64> addrspace(1)* noalias %inB) {
143 %tid = call i32 @llvm.r600.read.tidig.x() readnone
144 %a_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %inA, i32 %tid
145 %b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %inB, i32 %tid
146 %a = load <4 x i64>, <4 x i64> addrspace(1)* %a_ptr
147 %b = load <4 x i64>, <4 x i64> addrspace(1)* %b_ptr
148 %result = sub <4 x i64> %a, %b
149 store <4 x i64> %result, <4 x i64> addrspace(1)* %out
150 ret void
151 }
None ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI,FUNC %s
1 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,GFX89,FUNC %s
2 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX89,FUNC %s
3 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=EG,FUNC %s
4
5 declare i32 @llvm.r600.read.tidig.x() readnone
6
7 ; FUNC-LABEL: {{^}}s_sub_i32:
0 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s
1 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,GFX89 %s
2 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX89 %s
3
4 declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
5
6 ; GCN-LABEL: {{^}}s_sub_i32:
87 ; GCN: s_load_dwordx2
98 ; GCN: s_load_dwordx2 s{{\[}}[[A:[0-9]+]]:[[B:[0-9]+]]{{\]}}
109 ; GCN: s_sub_i32 s{{[0-9]+}}, s[[A]], s[[B]]
1413 ret void
1514 }
1615
17 ; FUNC-LABEL: {{^}}s_sub_imm_i32:
16 ; GCN-LABEL: {{^}}s_sub_imm_i32:
1817 ; GCN: s_load_dword [[A:s[0-9]+]]
1918 ; GCN: s_sub_i32 s{{[0-9]+}}, 0x4d2, [[A]]
2019 define amdgpu_kernel void @s_sub_imm_i32(i32 addrspace(1)* %out, i32 %a) {
2322 ret void
2423 }
2524
26 ; FUNC-LABEL: {{^}}test_sub_i32:
27 ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
28
25 ; GCN-LABEL: {{^}}test_sub_i32:
2926 ; SI: v_subrev_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
3027 ; GFX9: v_sub_u32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
3128 define amdgpu_kernel void @test_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
3734 ret void
3835 }
3936
40 ; FUNC-LABEL: {{^}}test_sub_imm_i32:
41 ; EG: SUB_INT
42
37 ; GCN-LABEL: {{^}}test_sub_imm_i32:
4338 ; SI: v_sub_i32_e32 v{{[0-9]+}}, vcc, 0x7b, v{{[0-9]+}}
4439 ; GFX9: v_sub_u32_e32 v{{[0-9]+}}, 0x7b, v{{[0-9]+}}
4540 define amdgpu_kernel void @test_sub_imm_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
4944 ret void
5045 }
5146
52 ; FUNC-LABEL: {{^}}test_sub_v2i32:
53 ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
54 ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
55
47 ; GCN-LABEL: {{^}}test_sub_v2i32:
5648 ; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
5749 ; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
5850
6759 ret void
6860 }
6961
70 ; FUNC-LABEL: {{^}}test_sub_v4i32:
71 ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
72 ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
73 ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
74 ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
75
62 ; GCN-LABEL: {{^}}test_sub_v4i32:
7663 ; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
7764 ; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
7865 ; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
9178 ret void
9279 }
9380
94 ; FUNC-LABEL: {{^}}test_sub_i16:
81 ; GCN-LABEL: {{^}}test_sub_i16:
9582 ; SI: v_sub_i32_e32 v{{[0-9]+}}, vcc,
9683 ; GFX89: v_sub_u16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
9784 define amdgpu_kernel void @test_sub_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
98 %tid = call i32 @llvm.r600.read.tidig.x()
85 %tid = call i32 @llvm.amdgcn.workitem.id.x()
9986 %gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid
10087 %b_ptr = getelementptr i16, i16 addrspace(1)* %gep, i32 1
10188 %a = load volatile i16, i16 addrspace(1)* %gep
10592 ret void
10693 }
10794
108 ; FUNC-LABEL: {{^}}test_sub_v2i16:
95 ; GCN-LABEL: {{^}}test_sub_v2i16:
10996 ; VI: v_sub_u16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
11097 ; VI: v_sub_u16_sdwa v{{[0-9]+, v[0-9]+, v[0-9]+}}
11198
11299 ; GFX9: v_pk_sub_i16
113100 define amdgpu_kernel void @test_sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
114 %tid = call i32 @llvm.r600.read.tidig.x()
101 %tid = call i32 @llvm.amdgcn.workitem.id.x()
115102 %gep = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %in, i32 %tid
116103 %b_ptr = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %gep, i16 1
117104 %a = load <2 x i16>, <2 x i16> addrspace(1)* %gep
121108 ret void
122109 }
123110
124 ; FUNC-LABEL: {{^}}test_sub_v4i16:
111 ; GCN-LABEL: {{^}}test_sub_v4i16:
125112 ; VI: v_sub_u16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
126113 ; VI: v_sub_u16_sdwa v{{[0-9]+, v[0-9]+, v[0-9]+}}
127114 ; VI: v_sub_u16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
130117 ; GFX9: v_pk_sub_i16
131118 ; GFX9: v_pk_sub_i16
132119 define amdgpu_kernel void @test_sub_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
133 %tid = call i32 @llvm.r600.read.tidig.x()
120 %tid = call i32 @llvm.amdgcn.workitem.id.x()
134121 %gep = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %in, i32 %tid
135122 %b_ptr = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %gep, i16 1
136123 %a = load <4 x i16>, <4 x i16> addrspace(1) * %gep
140127 ret void
141128 }
142129
143 ; FUNC-LABEL: {{^}}s_sub_i64:
130 ; GCN-LABEL: {{^}}s_sub_i64:
144131 ; GCN: s_sub_u32
145132 ; GCN: s_subb_u32
146
147 ; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY
148 ; EG-DAG: SUB_INT {{[* ]*}}
149 ; EG-DAG: SUBB_UINT
150 ; EG-DAG: SUB_INT
151 ; EG-DAG: SUB_INT {{[* ]*}}
152133 define amdgpu_kernel void @s_sub_i64(i64 addrspace(1)* noalias %out, i64 %a, i64 %b) nounwind {
153134 %result = sub i64 %a, %b
154135 store i64 %result, i64 addrspace(1)* %out, align 8
155136 ret void
156137 }
157138
158 ; FUNC-LABEL: {{^}}v_sub_i64:
139 ; GCN-LABEL: {{^}}v_sub_i64:
159140 ; SI: v_sub_i32_e32
160141 ; SI: v_subb_u32_e32
161142
164145
165146 ; GFX9: v_sub_co_u32_e32
166147 ; GFX9: v_subb_co_u32_e32
167
168 ; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY
169 ; EG-DAG: SUB_INT {{[* ]*}}
170 ; EG-DAG: SUBB_UINT
171 ; EG-DAG: SUB_INT
172 ; EG-DAG: SUB_INT {{[* ]*}}
173148 define amdgpu_kernel void @v_sub_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %inA, i64 addrspace(1)* noalias %inB) nounwind {
174 %tid = call i32 @llvm.r600.read.tidig.x() readnone
149 %tid = call i32 @llvm.amdgcn.workitem.id.x() readnone
175150 %a_ptr = getelementptr i64, i64 addrspace(1)* %inA, i32 %tid
176151 %b_ptr = getelementptr i64, i64 addrspace(1)* %inB, i32 %tid
177152 %a = load i64, i64 addrspace(1)* %a_ptr
181156 ret void
182157 }
183158
184 ; FUNC-LABEL: {{^}}v_test_sub_v2i64:
159 ; GCN-LABEL: {{^}}v_test_sub_v2i64:
185160 ; SI: v_sub_i32_e32 v{{[0-9]+}}, vcc,
186161 ; SI: v_subb_u32_e32 v{{[0-9]+}}, vcc,
187162 ; SI: v_sub_i32_e32 v{{[0-9]+}}, vcc,
197172 ; GFX9: v_sub_co_u32_e32 v{{[0-9]+}}, vcc,
198173 ; GFX9: v_subb_co_u32_e32 v{{[0-9]+}}, vcc,
199174 define amdgpu_kernel void @v_test_sub_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* noalias %inA, <2 x i64> addrspace(1)* noalias %inB) {
200 %tid = call i32 @llvm.r600.read.tidig.x() readnone
175 %tid = call i32 @llvm.amdgcn.workitem.id.x() readnone
201176 %a_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %inA, i32 %tid
202177 %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %inB, i32 %tid
203178 %a = load <2 x i64>, <2 x i64> addrspace(1)* %a_ptr
207182 ret void
208183 }
209184
210 ; FUNC-LABEL: {{^}}v_test_sub_v4i64:
185 ; GCN-LABEL: {{^}}v_test_sub_v4i64:
211186 ; SI: v_sub_i32_e32 v{{[0-9]+}}, vcc,
212187 ; SI: v_subb_u32_e32 v{{[0-9]+}}, vcc,
213188 ; SI: v_sub_i32_e32 v{{[0-9]+}}, vcc,
235210 ; GFX9: v_sub_co_u32_e32 v{{[0-9]+}}, vcc,
236211 ; GFX9: v_subb_co_u32_e32 v{{[0-9]+}}, vcc,
237212 define amdgpu_kernel void @v_test_sub_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* noalias %inA, <4 x i64> addrspace(1)* noalias %inB) {
238 %tid = call i32 @llvm.r600.read.tidig.x() readnone
213 %tid = call i32 @llvm.amdgcn.workitem.id.x() readnone
239214 %a_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %inA, i32 %tid
240215 %b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %inB, i32 %tid
241216 %a = load <4 x i64>, <4 x i64> addrspace(1)* %a_ptr
244219 store <4 x i64> %result, <4 x i64> addrspace(1)* %out
245220 ret void
246221 }
222
223 ; Make sure the VOP3 form of sub is initially selected. Otherwise pair
224 ; of opies from/to VCC would be necessary
225
226 ; GCN-LABEL: {{^}}sub_select_vop3:
227 ; SI: v_subrev_i32_e64 v0, s[0:1], s0, v0
228 ; VI: v_subrev_u32_e64 v0, s[0:1], s0, v0
229 ; GFX9: v_subrev_u32_e32 v0, s0, v0
230
231 ; GCN: ; def vcc
232 ; GCN: ds_write_b32
233 ; GCN: ; use vcc
234 define amdgpu_ps void @sub_select_vop3(i32 inreg %s, i32 %v) {
235 %vcc = call i64 asm sideeffect "; def vcc", "={vcc}"()
236 %sub = sub i32 %v, %s
237 store i32 %sub, i32 addrspace(3)* undef
238 call void asm sideeffect "; use vcc", "{vcc}"(i64 %vcc)
239 ret void
240 }