llvm.org GIT mirror llvm / d658843
UpdateTestChecks: fix AMDGPU handling Summary: Was looking into supporting `(srl (shl x, c1), c2)` with c1 != c2 in dagcombiner, this test changes, but makes `update_llc_test_checks.py` unhappy. **Many** AMDGPU tests specify `-march`, not `-mtriple`, which results in `update_llc_test_checks.py` defaulting to x86 asm function detection heuristics, which don't work here. I propose to fix this by adding an infrastructure to map from `-march` to `-mtriple`, in the UpdateTestChecks tooling. Reviewers: RKSimon, MaskRay, arsenm Reviewed By: arsenm Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D62099 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@361101 91177308-0d34-0410-b5e6-96231b3b80d8 Roman Lebedev 1 year, 6 months ago
3 changed file(s) with 1369 addition(s) and 234 deletion(s). Raw diff Collapse all Expand all
None ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
1 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
2
3 ; GCN-LABEL: {{^}}bfe_u32_arg_arg_arg:
4 ; GCN: v_bfe_u32
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,SI %s
2 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,VI %s
3
54 define amdgpu_kernel void @bfe_u32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #0 {
5 ; SI-LABEL: bfe_u32_arg_arg_arg:
6 ; SI: ; %bb.0:
7 ; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb
8 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
9 ; SI-NEXT: s_mov_b32 s7, 0xf000
10 ; SI-NEXT: s_mov_b32 s6, -1
11 ; SI-NEXT: s_waitcnt lgkmcnt(0)
12 ; SI-NEXT: v_mov_b32_e32 v0, s2
13 ; SI-NEXT: v_bfe_u32 v0, v0, s3, s3
14 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
15 ; SI-NEXT: s_endpgm
16 ;
17 ; VI-LABEL: bfe_u32_arg_arg_arg:
18 ; VI: ; %bb.0:
19 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
20 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c
21 ; VI-NEXT: s_mov_b32 s7, 0xf000
22 ; VI-NEXT: s_mov_b32 s6, -1
23 ; VI-NEXT: s_waitcnt lgkmcnt(0)
24 ; VI-NEXT: v_mov_b32_e32 v0, s0
25 ; VI-NEXT: v_bfe_u32 v0, v0, s1, s1
26 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
27 ; VI-NEXT: s_endpgm
628 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 %src1)
729 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
830 ret void
931 }
1032
11 ; GCN-LABEL: {{^}}bfe_u32_arg_arg_imm:
12 ; GCN: v_bfe_u32
1333 define amdgpu_kernel void @bfe_u32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 {
34 ; SI-LABEL: bfe_u32_arg_arg_imm:
35 ; SI: ; %bb.0:
36 ; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb
37 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
38 ; SI-NEXT: s_mov_b32 s7, 0xf000
39 ; SI-NEXT: s_mov_b32 s6, -1
40 ; SI-NEXT: v_mov_b32_e32 v0, 0x7b
41 ; SI-NEXT: s_waitcnt lgkmcnt(0)
42 ; SI-NEXT: v_mov_b32_e32 v1, s3
43 ; SI-NEXT: v_bfe_u32 v0, s2, v1, v0
44 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
45 ; SI-NEXT: s_endpgm
46 ;
47 ; VI-LABEL: bfe_u32_arg_arg_imm:
48 ; VI: ; %bb.0:
49 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
50 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c
51 ; VI-NEXT: v_mov_b32_e32 v1, 0x7b
52 ; VI-NEXT: s_mov_b32 s7, 0xf000
53 ; VI-NEXT: s_mov_b32 s6, -1
54 ; VI-NEXT: s_waitcnt lgkmcnt(0)
55 ; VI-NEXT: v_mov_b32_e32 v0, s1
56 ; VI-NEXT: v_bfe_u32 v0, s0, v0, v1
57 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
58 ; VI-NEXT: s_endpgm
1459 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 123)
1560 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
1661 ret void
1762 }
1863
19 ; GCN-LABEL: {{^}}bfe_u32_arg_imm_arg:
20 ; GCN: v_bfe_u32
2164 define amdgpu_kernel void @bfe_u32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) #0 {
65 ; SI-LABEL: bfe_u32_arg_imm_arg:
66 ; SI: ; %bb.0:
67 ; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb
68 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
69 ; SI-NEXT: s_mov_b32 s7, 0xf000
70 ; SI-NEXT: s_mov_b32 s6, -1
71 ; SI-NEXT: v_mov_b32_e32 v0, 0x7b
72 ; SI-NEXT: s_waitcnt lgkmcnt(0)
73 ; SI-NEXT: v_mov_b32_e32 v1, s3
74 ; SI-NEXT: v_bfe_u32 v0, s2, v0, v1
75 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
76 ; SI-NEXT: s_endpgm
77 ;
78 ; VI-LABEL: bfe_u32_arg_imm_arg:
79 ; VI: ; %bb.0:
80 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
81 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c
82 ; VI-NEXT: v_mov_b32_e32 v0, 0x7b
83 ; VI-NEXT: s_mov_b32 s7, 0xf000
84 ; VI-NEXT: s_mov_b32 s6, -1
85 ; VI-NEXT: s_waitcnt lgkmcnt(0)
86 ; VI-NEXT: v_mov_b32_e32 v1, s1
87 ; VI-NEXT: v_bfe_u32 v0, s0, v0, v1
88 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
89 ; VI-NEXT: s_endpgm
2290 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 123, i32 %src2)
2391 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
2492 ret void
2593 }
2694
27 ; GCN-LABEL: {{^}}bfe_u32_imm_arg_arg:
28 ; GCN: v_bfe_u32
2995 define amdgpu_kernel void @bfe_u32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) #0 {
96 ; SI-LABEL: bfe_u32_imm_arg_arg:
97 ; SI: ; %bb.0:
98 ; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb
99 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
100 ; SI-NEXT: s_mov_b32 s7, 0xf000
101 ; SI-NEXT: s_mov_b32 s6, -1
102 ; SI-NEXT: s_movk_i32 s0, 0x7b
103 ; SI-NEXT: s_waitcnt lgkmcnt(0)
104 ; SI-NEXT: v_mov_b32_e32 v0, s2
105 ; SI-NEXT: v_mov_b32_e32 v1, s3
106 ; SI-NEXT: v_bfe_u32 v0, s0, v0, v1
107 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
108 ; SI-NEXT: s_endpgm
109 ;
110 ; VI-LABEL: bfe_u32_imm_arg_arg:
111 ; VI: ; %bb.0:
112 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
113 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c
114 ; VI-NEXT: s_movk_i32 s2, 0x7b
115 ; VI-NEXT: s_mov_b32 s7, 0xf000
116 ; VI-NEXT: s_mov_b32 s6, -1
117 ; VI-NEXT: s_waitcnt lgkmcnt(0)
118 ; VI-NEXT: v_mov_b32_e32 v0, s0
119 ; VI-NEXT: v_mov_b32_e32 v1, s1
120 ; VI-NEXT: v_bfe_u32 v0, s2, v0, v1
121 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
122 ; VI-NEXT: s_endpgm
30123 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 123, i32 %src1, i32 %src2)
31124 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
32125 ret void
33126 }
34127
35 ; GCN-LABEL: {{^}}bfe_u32_arg_0_width_reg_offset:
36 ; GCN-NOT: {{[^@]}}bfe
37 ; GCN: s_endpgm
38128 define amdgpu_kernel void @bfe_u32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 {
129 ; SI-LABEL: bfe_u32_arg_0_width_reg_offset:
130 ; SI: ; %bb.0:
131 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
132 ; SI-NEXT: s_mov_b32 s3, 0xf000
133 ; SI-NEXT: s_mov_b32 s2, -1
134 ; SI-NEXT: v_mov_b32_e32 v0, 0
135 ; SI-NEXT: s_waitcnt lgkmcnt(0)
136 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
137 ; SI-NEXT: s_endpgm
138 ;
139 ; VI-LABEL: bfe_u32_arg_0_width_reg_offset:
140 ; VI: ; %bb.0:
141 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
142 ; VI-NEXT: s_mov_b32 s3, 0xf000
143 ; VI-NEXT: s_mov_b32 s2, -1
144 ; VI-NEXT: v_mov_b32_e32 v0, 0
145 ; VI-NEXT: s_waitcnt lgkmcnt(0)
146 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
147 ; VI-NEXT: s_endpgm
39148 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 0)
40149 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
41150 ret void
42151 }
43152
44 ; GCN-LABEL: {{^}}bfe_u32_arg_0_width_imm_offset:
45 ; GCN-NOT: {{[^@]}}bfe
46 ; GCN: s_endpgm
47153 define amdgpu_kernel void @bfe_u32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 {
154 ; SI-LABEL: bfe_u32_arg_0_width_imm_offset:
155 ; SI: ; %bb.0:
156 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
157 ; SI-NEXT: s_mov_b32 s3, 0xf000
158 ; SI-NEXT: s_mov_b32 s2, -1
159 ; SI-NEXT: v_mov_b32_e32 v0, 0
160 ; SI-NEXT: s_waitcnt lgkmcnt(0)
161 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
162 ; SI-NEXT: s_endpgm
163 ;
164 ; VI-LABEL: bfe_u32_arg_0_width_imm_offset:
165 ; VI: ; %bb.0:
166 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
167 ; VI-NEXT: s_mov_b32 s3, 0xf000
168 ; VI-NEXT: s_mov_b32 s2, -1
169 ; VI-NEXT: v_mov_b32_e32 v0, 0
170 ; VI-NEXT: s_waitcnt lgkmcnt(0)
171 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
172 ; VI-NEXT: s_endpgm
48173 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 8, i32 0)
49174 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
50175 ret void
51176 }
52177
53 ; GCN-LABEL: {{^}}bfe_u32_zextload_i8:
54 ; GCN: buffer_load_ubyte
55 ; GCN-NOT: {{[^@]}}bfe
56 ; GCN: s_endpgm
57178 define amdgpu_kernel void @bfe_u32_zextload_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
179 ; SI-LABEL: bfe_u32_zextload_i8:
180 ; SI: ; %bb.0:
181 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
182 ; SI-NEXT: s_mov_b32 s7, 0xf000
183 ; SI-NEXT: s_mov_b32 s6, -1
184 ; SI-NEXT: s_mov_b32 s10, s6
185 ; SI-NEXT: s_mov_b32 s11, s7
186 ; SI-NEXT: s_waitcnt lgkmcnt(0)
187 ; SI-NEXT: s_mov_b32 s8, s2
188 ; SI-NEXT: s_mov_b32 s9, s3
189 ; SI-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
190 ; SI-NEXT: s_mov_b32 s4, s0
191 ; SI-NEXT: s_mov_b32 s5, s1
192 ; SI-NEXT: s_waitcnt vmcnt(0)
193 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
194 ; SI-NEXT: s_endpgm
195 ;
196 ; VI-LABEL: bfe_u32_zextload_i8:
197 ; VI: ; %bb.0:
198 ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
199 ; VI-NEXT: s_mov_b32 s3, 0xf000
200 ; VI-NEXT: s_mov_b32 s2, -1
201 ; VI-NEXT: s_waitcnt lgkmcnt(0)
202 ; VI-NEXT: s_mov_b32 s0, s4
203 ; VI-NEXT: s_mov_b32 s1, s5
204 ; VI-NEXT: s_mov_b32 s4, s6
205 ; VI-NEXT: s_mov_b32 s5, s7
206 ; VI-NEXT: s_mov_b32 s6, s2
207 ; VI-NEXT: s_mov_b32 s7, s3
208 ; VI-NEXT: buffer_load_ubyte v0, off, s[4:7], 0
209 ; VI-NEXT: s_waitcnt vmcnt(0)
210 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
211 ; VI-NEXT: s_endpgm
58212 %load = load i8, i8 addrspace(1)* %in
59213 %ext = zext i8 %load to i32
60214 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 8)
62216 ret void
63217 }
64218
65 ; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i8:
66 ; GCN: buffer_load_dword
67 ; GCN: v_add_{{[iu]}}32
68 ; GCN-NEXT: v_and_b32_e32
69219 ; FIXME: Should be using s_add_i32
70 ; GCN-NOT: {{[^@]}}bfe
71 ; GCN: s_endpgm
72220 define amdgpu_kernel void @bfe_u32_zext_in_reg_i8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
221 ; SI-LABEL: bfe_u32_zext_in_reg_i8:
222 ; SI: ; %bb.0:
223 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
224 ; SI-NEXT: s_mov_b32 s7, 0xf000
225 ; SI-NEXT: s_mov_b32 s6, -1
226 ; SI-NEXT: s_mov_b32 s10, s6
227 ; SI-NEXT: s_mov_b32 s11, s7
228 ; SI-NEXT: s_waitcnt lgkmcnt(0)
229 ; SI-NEXT: s_mov_b32 s8, s2
230 ; SI-NEXT: s_mov_b32 s9, s3
231 ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
232 ; SI-NEXT: s_mov_b32 s4, s0
233 ; SI-NEXT: s_mov_b32 s5, s1
234 ; SI-NEXT: s_waitcnt vmcnt(0)
235 ; SI-NEXT: v_add_i32_e32 v0, vcc, 1, v0
236 ; SI-NEXT: v_and_b32_e32 v0, 0xff, v0
237 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
238 ; SI-NEXT: s_endpgm
239 ;
240 ; VI-LABEL: bfe_u32_zext_in_reg_i8:
241 ; VI: ; %bb.0:
242 ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
243 ; VI-NEXT: s_mov_b32 s3, 0xf000
244 ; VI-NEXT: s_mov_b32 s2, -1
245 ; VI-NEXT: s_waitcnt lgkmcnt(0)
246 ; VI-NEXT: s_mov_b32 s0, s4
247 ; VI-NEXT: s_mov_b32 s1, s5
248 ; VI-NEXT: s_mov_b32 s4, s6
249 ; VI-NEXT: s_mov_b32 s5, s7
250 ; VI-NEXT: s_mov_b32 s6, s2
251 ; VI-NEXT: s_mov_b32 s7, s3
252 ; VI-NEXT: buffer_load_dword v0, off, s[4:7], 0
253 ; VI-NEXT: s_waitcnt vmcnt(0)
254 ; VI-NEXT: v_add_u32_e32 v0, vcc, 1, v0
255 ; VI-NEXT: v_and_b32_e32 v0, 0xff, v0
256 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
257 ; VI-NEXT: s_endpgm
73258 %load = load i32, i32 addrspace(1)* %in, align 4
74259 %add = add i32 %load, 1
75260 %ext = and i32 %add, 255
78263 ret void
79264 }
80265
81 ; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i16:
82 ; GCN: buffer_load_dword
83 ; GCN: v_add_{{[iu]}}32
84 ; GCN-NEXT: v_and_b32_e32
85 ; GCN-NOT: {{[^@]}}bfe
86 ; GCN: s_endpgm
87266 define amdgpu_kernel void @bfe_u32_zext_in_reg_i16(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
267 ; SI-LABEL: bfe_u32_zext_in_reg_i16:
268 ; SI: ; %bb.0:
269 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
270 ; SI-NEXT: s_mov_b32 s7, 0xf000
271 ; SI-NEXT: s_mov_b32 s6, -1
272 ; SI-NEXT: s_mov_b32 s10, s6
273 ; SI-NEXT: s_mov_b32 s11, s7
274 ; SI-NEXT: s_waitcnt lgkmcnt(0)
275 ; SI-NEXT: s_mov_b32 s8, s2
276 ; SI-NEXT: s_mov_b32 s9, s3
277 ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
278 ; SI-NEXT: s_mov_b32 s4, s0
279 ; SI-NEXT: s_mov_b32 s5, s1
280 ; SI-NEXT: s_waitcnt vmcnt(0)
281 ; SI-NEXT: v_add_i32_e32 v0, vcc, 1, v0
282 ; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0
283 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
284 ; SI-NEXT: s_endpgm
285 ;
286 ; VI-LABEL: bfe_u32_zext_in_reg_i16:
287 ; VI: ; %bb.0:
288 ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
289 ; VI-NEXT: s_mov_b32 s3, 0xf000
290 ; VI-NEXT: s_mov_b32 s2, -1
291 ; VI-NEXT: s_waitcnt lgkmcnt(0)
292 ; VI-NEXT: s_mov_b32 s0, s4
293 ; VI-NEXT: s_mov_b32 s1, s5
294 ; VI-NEXT: s_mov_b32 s4, s6
295 ; VI-NEXT: s_mov_b32 s5, s7
296 ; VI-NEXT: s_mov_b32 s6, s2
297 ; VI-NEXT: s_mov_b32 s7, s3
298 ; VI-NEXT: buffer_load_dword v0, off, s[4:7], 0
299 ; VI-NEXT: s_waitcnt vmcnt(0)
300 ; VI-NEXT: v_add_u32_e32 v0, vcc, 1, v0
301 ; VI-NEXT: v_and_b32_e32 v0, 0xffff, v0
302 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
303 ; VI-NEXT: s_endpgm
88304 %load = load i32, i32 addrspace(1)* %in, align 4
89305 %add = add i32 %load, 1
90306 %ext = and i32 %add, 65535
93309 ret void
94310 }
95311
96 ; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_1:
97 ; GCN: buffer_load_dword
98 ; GCN: v_add_{{[iu]}}32
99 ; GCN: bfe
100 ; GCN: s_endpgm
101312 define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
313 ; SI-LABEL: bfe_u32_zext_in_reg_i8_offset_1:
314 ; SI: ; %bb.0:
315 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
316 ; SI-NEXT: s_mov_b32 s7, 0xf000
317 ; SI-NEXT: s_mov_b32 s6, -1
318 ; SI-NEXT: s_mov_b32 s10, s6
319 ; SI-NEXT: s_mov_b32 s11, s7
320 ; SI-NEXT: s_waitcnt lgkmcnt(0)
321 ; SI-NEXT: s_mov_b32 s8, s2
322 ; SI-NEXT: s_mov_b32 s9, s3
323 ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
324 ; SI-NEXT: s_mov_b32 s4, s0
325 ; SI-NEXT: s_mov_b32 s5, s1
326 ; SI-NEXT: s_waitcnt vmcnt(0)
327 ; SI-NEXT: v_add_i32_e32 v0, vcc, 1, v0
328 ; SI-NEXT: v_and_b32_e32 v0, 0xfe, v0
329 ; SI-NEXT: v_bfe_u32 v0, v0, 1, 8
330 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
331 ; SI-NEXT: s_endpgm
332 ;
333 ; VI-LABEL: bfe_u32_zext_in_reg_i8_offset_1:
334 ; VI: ; %bb.0:
335 ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
336 ; VI-NEXT: s_mov_b32 s3, 0xf000
337 ; VI-NEXT: s_mov_b32 s2, -1
338 ; VI-NEXT: s_waitcnt lgkmcnt(0)
339 ; VI-NEXT: s_mov_b32 s0, s4
340 ; VI-NEXT: s_mov_b32 s1, s5
341 ; VI-NEXT: s_mov_b32 s4, s6
342 ; VI-NEXT: s_mov_b32 s5, s7
343 ; VI-NEXT: s_mov_b32 s6, s2
344 ; VI-NEXT: s_mov_b32 s7, s3
345 ; VI-NEXT: buffer_load_dword v0, off, s[4:7], 0
346 ; VI-NEXT: s_waitcnt vmcnt(0)
347 ; VI-NEXT: v_add_u32_e32 v0, vcc, 1, v0
348 ; VI-NEXT: v_and_b32_e32 v0, 0xfe, v0
349 ; VI-NEXT: v_bfe_u32 v0, v0, 1, 8
350 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
351 ; VI-NEXT: s_endpgm
102352 %load = load i32, i32 addrspace(1)* %in, align 4
103353 %add = add i32 %load, 1
104354 %ext = and i32 %add, 255
107357 ret void
108358 }
109359
110 ; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_3:
111 ; GCN: buffer_load_dword
112 ; GCN: v_add_{{[iu]}}32
113 ; GCN-NEXT: v_and_b32_e32 {{v[0-9]+}}, 0xf8
114 ; GCN-NEXT: bfe
115 ; GCN: s_endpgm
116360 define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
361 ; SI-LABEL: bfe_u32_zext_in_reg_i8_offset_3:
362 ; SI: ; %bb.0:
363 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
364 ; SI-NEXT: s_mov_b32 s7, 0xf000
365 ; SI-NEXT: s_mov_b32 s6, -1
366 ; SI-NEXT: s_mov_b32 s10, s6
367 ; SI-NEXT: s_mov_b32 s11, s7
368 ; SI-NEXT: s_waitcnt lgkmcnt(0)
369 ; SI-NEXT: s_mov_b32 s8, s2
370 ; SI-NEXT: s_mov_b32 s9, s3
371 ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
372 ; SI-NEXT: s_mov_b32 s4, s0
373 ; SI-NEXT: s_mov_b32 s5, s1
374 ; SI-NEXT: s_waitcnt vmcnt(0)
375 ; SI-NEXT: v_add_i32_e32 v0, vcc, 1, v0
376 ; SI-NEXT: v_and_b32_e32 v0, 0xf8, v0
377 ; SI-NEXT: v_bfe_u32 v0, v0, 3, 8
378 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
379 ; SI-NEXT: s_endpgm
380 ;
381 ; VI-LABEL: bfe_u32_zext_in_reg_i8_offset_3:
382 ; VI: ; %bb.0:
383 ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
384 ; VI-NEXT: s_mov_b32 s3, 0xf000
385 ; VI-NEXT: s_mov_b32 s2, -1
386 ; VI-NEXT: s_waitcnt lgkmcnt(0)
387 ; VI-NEXT: s_mov_b32 s0, s4
388 ; VI-NEXT: s_mov_b32 s1, s5
389 ; VI-NEXT: s_mov_b32 s4, s6
390 ; VI-NEXT: s_mov_b32 s5, s7
391 ; VI-NEXT: s_mov_b32 s6, s2
392 ; VI-NEXT: s_mov_b32 s7, s3
393 ; VI-NEXT: buffer_load_dword v0, off, s[4:7], 0
394 ; VI-NEXT: s_waitcnt vmcnt(0)
395 ; VI-NEXT: v_add_u32_e32 v0, vcc, 1, v0
396 ; VI-NEXT: v_and_b32_e32 v0, 0xf8, v0
397 ; VI-NEXT: v_bfe_u32 v0, v0, 3, 8
398 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
399 ; VI-NEXT: s_endpgm
117400 %load = load i32, i32 addrspace(1)* %in, align 4
118401 %add = add i32 %load, 1
119402 %ext = and i32 %add, 255
122405 ret void
123406 }
124407
125 ; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_7:
126 ; GCN: buffer_load_dword
127 ; GCN: v_add_{{[iu]}}32
128 ; GCN-NEXT: v_and_b32_e32 {{v[0-9]+}}, 0x80
129 ; GCN-NEXT: bfe
130 ; GCN: s_endpgm
131408 define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
409 ; SI-LABEL: bfe_u32_zext_in_reg_i8_offset_7:
410 ; SI: ; %bb.0:
411 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
412 ; SI-NEXT: s_mov_b32 s7, 0xf000
413 ; SI-NEXT: s_mov_b32 s6, -1
414 ; SI-NEXT: s_mov_b32 s10, s6
415 ; SI-NEXT: s_mov_b32 s11, s7
416 ; SI-NEXT: s_waitcnt lgkmcnt(0)
417 ; SI-NEXT: s_mov_b32 s8, s2
418 ; SI-NEXT: s_mov_b32 s9, s3
419 ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
420 ; SI-NEXT: s_mov_b32 s4, s0
421 ; SI-NEXT: s_mov_b32 s5, s1
422 ; SI-NEXT: s_waitcnt vmcnt(0)
423 ; SI-NEXT: v_add_i32_e32 v0, vcc, 1, v0
424 ; SI-NEXT: v_and_b32_e32 v0, 0x80, v0
425 ; SI-NEXT: v_bfe_u32 v0, v0, 7, 8
426 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
427 ; SI-NEXT: s_endpgm
428 ;
429 ; VI-LABEL: bfe_u32_zext_in_reg_i8_offset_7:
430 ; VI: ; %bb.0:
431 ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
432 ; VI-NEXT: s_mov_b32 s3, 0xf000
433 ; VI-NEXT: s_mov_b32 s2, -1
434 ; VI-NEXT: s_waitcnt lgkmcnt(0)
435 ; VI-NEXT: s_mov_b32 s0, s4
436 ; VI-NEXT: s_mov_b32 s1, s5
437 ; VI-NEXT: s_mov_b32 s4, s6
438 ; VI-NEXT: s_mov_b32 s5, s7
439 ; VI-NEXT: s_mov_b32 s6, s2
440 ; VI-NEXT: s_mov_b32 s7, s3
441 ; VI-NEXT: buffer_load_dword v0, off, s[4:7], 0
442 ; VI-NEXT: s_waitcnt vmcnt(0)
443 ; VI-NEXT: v_add_u32_e32 v0, vcc, 1, v0
444 ; VI-NEXT: v_and_b32_e32 v0, 0x80, v0
445 ; VI-NEXT: v_bfe_u32 v0, v0, 7, 8
446 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
447 ; VI-NEXT: s_endpgm
132448 %load = load i32, i32 addrspace(1)* %in, align 4
133449 %add = add i32 %load, 1
134450 %ext = and i32 %add, 255
137453 ret void
138454 }
139455
140 ; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i16_offset_8:
141 ; GCN: buffer_load_dword
142 ; GCN: v_add_{{[iu]}}32
143 ; GCN-NEXT: bfe
144 ; GCN: s_endpgm
145456 define amdgpu_kernel void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
457 ; SI-LABEL: bfe_u32_zext_in_reg_i16_offset_8:
458 ; SI: ; %bb.0:
459 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
460 ; SI-NEXT: s_mov_b32 s7, 0xf000
461 ; SI-NEXT: s_mov_b32 s6, -1
462 ; SI-NEXT: s_mov_b32 s10, s6
463 ; SI-NEXT: s_mov_b32 s11, s7
464 ; SI-NEXT: s_waitcnt lgkmcnt(0)
465 ; SI-NEXT: s_mov_b32 s8, s2
466 ; SI-NEXT: s_mov_b32 s9, s3
467 ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
468 ; SI-NEXT: s_mov_b32 s4, s0
469 ; SI-NEXT: s_mov_b32 s5, s1
470 ; SI-NEXT: s_waitcnt vmcnt(0)
471 ; SI-NEXT: v_add_i32_e32 v0, vcc, 1, v0
472 ; SI-NEXT: v_bfe_u32 v0, v0, 8, 8
473 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
474 ; SI-NEXT: s_endpgm
475 ;
476 ; VI-LABEL: bfe_u32_zext_in_reg_i16_offset_8:
477 ; VI: ; %bb.0:
478 ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
479 ; VI-NEXT: s_mov_b32 s3, 0xf000
480 ; VI-NEXT: s_mov_b32 s2, -1
481 ; VI-NEXT: s_waitcnt lgkmcnt(0)
482 ; VI-NEXT: s_mov_b32 s0, s4
483 ; VI-NEXT: s_mov_b32 s1, s5
484 ; VI-NEXT: s_mov_b32 s4, s6
485 ; VI-NEXT: s_mov_b32 s5, s7
486 ; VI-NEXT: s_mov_b32 s6, s2
487 ; VI-NEXT: s_mov_b32 s7, s3
488 ; VI-NEXT: buffer_load_dword v0, off, s[4:7], 0
489 ; VI-NEXT: s_waitcnt vmcnt(0)
490 ; VI-NEXT: v_add_u32_e32 v0, vcc, 1, v0
491 ; VI-NEXT: v_bfe_u32 v0, v0, 8, 8
492 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
493 ; VI-NEXT: s_endpgm
146494 %load = load i32, i32 addrspace(1)* %in, align 4
147495 %add = add i32 %load, 1
148496 %ext = and i32 %add, 65535
151499 ret void
152500 }
153501
154 ; GCN-LABEL: {{^}}bfe_u32_test_1:
155 ; GCN: buffer_load_dword
156 ; GCN: v_and_b32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}}
157 ; GCN: s_endpgm
158502 define amdgpu_kernel void @bfe_u32_test_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
503 ; SI-LABEL: bfe_u32_test_1:
504 ; SI: ; %bb.0:
505 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
506 ; SI-NEXT: s_mov_b32 s7, 0xf000
507 ; SI-NEXT: s_mov_b32 s6, -1
508 ; SI-NEXT: s_mov_b32 s10, s6
509 ; SI-NEXT: s_mov_b32 s11, s7
510 ; SI-NEXT: s_waitcnt lgkmcnt(0)
511 ; SI-NEXT: s_mov_b32 s8, s2
512 ; SI-NEXT: s_mov_b32 s9, s3
513 ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
514 ; SI-NEXT: s_mov_b32 s4, s0
515 ; SI-NEXT: s_mov_b32 s5, s1
516 ; SI-NEXT: s_waitcnt vmcnt(0)
517 ; SI-NEXT: v_and_b32_e32 v0, 1, v0
518 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
519 ; SI-NEXT: s_endpgm
520 ;
521 ; VI-LABEL: bfe_u32_test_1:
522 ; VI: ; %bb.0:
523 ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
524 ; VI-NEXT: s_mov_b32 s3, 0xf000
525 ; VI-NEXT: s_mov_b32 s2, -1
526 ; VI-NEXT: s_waitcnt lgkmcnt(0)
527 ; VI-NEXT: s_mov_b32 s0, s4
528 ; VI-NEXT: s_mov_b32 s1, s5
529 ; VI-NEXT: s_mov_b32 s4, s6
530 ; VI-NEXT: s_mov_b32 s5, s7
531 ; VI-NEXT: s_mov_b32 s6, s2
532 ; VI-NEXT: s_mov_b32 s7, s3
533 ; VI-NEXT: buffer_load_dword v0, off, s[4:7], 0
534 ; VI-NEXT: s_waitcnt vmcnt(0)
535 ; VI-NEXT: v_and_b32_e32 v0, 1, v0
536 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
537 ; VI-NEXT: s_endpgm
159538 %x = load i32, i32 addrspace(1)* %in, align 4
160539 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 0, i32 1)
161540 store i32 %bfe, i32 addrspace(1)* %out, align 4
163542 }
164543
165544 define amdgpu_kernel void @bfe_u32_test_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
545 ; SI-LABEL: bfe_u32_test_2:
546 ; SI: ; %bb.0:
547 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
548 ; SI-NEXT: s_waitcnt lgkmcnt(0)
549 ; SI-NEXT: s_mov_b32 s3, 0xf000
550 ; SI-NEXT: s_mov_b32 s2, -1
551 ; SI-NEXT: v_mov_b32_e32 v0, 0
552 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
553 ; SI-NEXT: s_endpgm
554 ;
555 ; VI-LABEL: bfe_u32_test_2:
556 ; VI: ; %bb.0:
557 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
558 ; VI-NEXT: s_waitcnt lgkmcnt(0)
559 ; VI-NEXT: s_mov_b32 s3, 0xf000
560 ; VI-NEXT: s_mov_b32 s2, -1
561 ; VI-NEXT: v_mov_b32_e32 v0, 0
562 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
563 ; VI-NEXT: s_endpgm
166564 %x = load i32, i32 addrspace(1)* %in, align 4
167565 %shl = shl i32 %x, 31
168566 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 8)
171569 }
172570
173571 define amdgpu_kernel void @bfe_u32_test_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
572 ; SI-LABEL: bfe_u32_test_3:
573 ; SI: ; %bb.0:
574 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
575 ; SI-NEXT: s_waitcnt lgkmcnt(0)
576 ; SI-NEXT: s_mov_b32 s3, 0xf000
577 ; SI-NEXT: s_mov_b32 s2, -1
578 ; SI-NEXT: v_mov_b32_e32 v0, 0
579 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
580 ; SI-NEXT: s_endpgm
581 ;
582 ; VI-LABEL: bfe_u32_test_3:
583 ; VI: ; %bb.0:
584 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
585 ; VI-NEXT: s_waitcnt lgkmcnt(0)
586 ; VI-NEXT: s_mov_b32 s3, 0xf000
587 ; VI-NEXT: s_mov_b32 s2, -1
588 ; VI-NEXT: v_mov_b32_e32 v0, 0
589 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
590 ; VI-NEXT: s_endpgm
174591 %x = load i32, i32 addrspace(1)* %in, align 4
175592 %shl = shl i32 %x, 31
176593 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 1)
178595 ret void
179596 }
180597
181 ; GCN-LABEL: {{^}}bfe_u32_test_4:
182 ; GCN-NOT: lshl
183 ; GCN-NOT: shr
184 ; GCN-NOT: {{[^@]}}bfe
185 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
186 ; GCN: buffer_store_dword [[VREG]],
187 ; GCN: s_endpgm
188598 define amdgpu_kernel void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
599 ; SI-LABEL: bfe_u32_test_4:
600 ; SI: ; %bb.0:
601 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
602 ; SI-NEXT: s_waitcnt lgkmcnt(0)
603 ; SI-NEXT: s_mov_b32 s3, 0xf000
604 ; SI-NEXT: s_mov_b32 s2, -1
605 ; SI-NEXT: v_mov_b32_e32 v0, 0
606 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
607 ; SI-NEXT: s_endpgm
608 ;
609 ; VI-LABEL: bfe_u32_test_4:
610 ; VI: ; %bb.0:
611 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
612 ; VI-NEXT: s_waitcnt lgkmcnt(0)
613 ; VI-NEXT: s_mov_b32 s3, 0xf000
614 ; VI-NEXT: s_mov_b32 s2, -1
615 ; VI-NEXT: v_mov_b32_e32 v0, 0
616 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
617 ; VI-NEXT: s_endpgm
189618 %x = load i32, i32 addrspace(1)* %in, align 4
190619 %shl = shl i32 %x, 31
191620 %shr = lshr i32 %shl, 31
194623 ret void
195624 }
196625
197 ; GCN-LABEL: {{^}}bfe_u32_test_5:
198 ; GCN: buffer_load_dword
199 ; GCN-NOT: lshl
200 ; GCN-NOT: shr
201 ; GCN: v_bfe_i32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1
202 ; GCN: s_endpgm
203626 define amdgpu_kernel void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
627 ; SI-LABEL: bfe_u32_test_5:
628 ; SI: ; %bb.0:
629 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
630 ; SI-NEXT: s_mov_b32 s7, 0xf000
631 ; SI-NEXT: s_mov_b32 s6, -1
632 ; SI-NEXT: s_mov_b32 s10, s6
633 ; SI-NEXT: s_mov_b32 s11, s7
634 ; SI-NEXT: s_waitcnt lgkmcnt(0)
635 ; SI-NEXT: s_mov_b32 s8, s2
636 ; SI-NEXT: s_mov_b32 s9, s3
637 ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
638 ; SI-NEXT: s_mov_b32 s4, s0
639 ; SI-NEXT: s_mov_b32 s5, s1
640 ; SI-NEXT: s_waitcnt vmcnt(0)
641 ; SI-NEXT: v_bfe_i32 v0, v0, 0, 1
642 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
643 ; SI-NEXT: s_endpgm
644 ;
645 ; VI-LABEL: bfe_u32_test_5:
646 ; VI: ; %bb.0:
647 ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
648 ; VI-NEXT: s_mov_b32 s3, 0xf000
649 ; VI-NEXT: s_mov_b32 s2, -1
650 ; VI-NEXT: s_waitcnt lgkmcnt(0)
651 ; VI-NEXT: s_mov_b32 s0, s4
652 ; VI-NEXT: s_mov_b32 s1, s5
653 ; VI-NEXT: s_mov_b32 s4, s6
654 ; VI-NEXT: s_mov_b32 s5, s7
655 ; VI-NEXT: s_mov_b32 s6, s2
656 ; VI-NEXT: s_mov_b32 s7, s3
657 ; VI-NEXT: buffer_load_dword v0, off, s[4:7], 0
658 ; VI-NEXT: s_waitcnt vmcnt(0)
659 ; VI-NEXT: v_bfe_i32 v0, v0, 0, 1
660 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
661 ; VI-NEXT: s_endpgm
204662 %x = load i32, i32 addrspace(1)* %in, align 4
205663 %shl = shl i32 %x, 31
206664 %shr = ashr i32 %shl, 31
209667 ret void
210668 }
211669
212 ; GCN-LABEL: {{^}}bfe_u32_test_6:
213 ; GCN: v_lshlrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
214 ; GCN: v_lshrrev_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
215 ; GCN: s_endpgm
216670 define amdgpu_kernel void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
671 ; SI-LABEL: bfe_u32_test_6:
672 ; SI: ; %bb.0:
673 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
674 ; SI-NEXT: s_mov_b32 s7, 0xf000
675 ; SI-NEXT: s_mov_b32 s6, -1
676 ; SI-NEXT: s_mov_b32 s10, s6
677 ; SI-NEXT: s_mov_b32 s11, s7
678 ; SI-NEXT: s_waitcnt lgkmcnt(0)
679 ; SI-NEXT: s_mov_b32 s8, s2
680 ; SI-NEXT: s_mov_b32 s9, s3
681 ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
682 ; SI-NEXT: s_mov_b32 s4, s0
683 ; SI-NEXT: s_mov_b32 s5, s1
684 ; SI-NEXT: s_waitcnt vmcnt(0)
685 ; SI-NEXT: v_lshlrev_b32_e32 v0, 31, v0
686 ; SI-NEXT: v_lshrrev_b32_e32 v0, 1, v0
687 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
688 ; SI-NEXT: s_endpgm
689 ;
690 ; VI-LABEL: bfe_u32_test_6:
691 ; VI: ; %bb.0:
692 ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
693 ; VI-NEXT: s_mov_b32 s3, 0xf000
694 ; VI-NEXT: s_mov_b32 s2, -1
695 ; VI-NEXT: s_waitcnt lgkmcnt(0)
696 ; VI-NEXT: s_mov_b32 s0, s4
697 ; VI-NEXT: s_mov_b32 s1, s5
698 ; VI-NEXT: s_mov_b32 s4, s6
699 ; VI-NEXT: s_mov_b32 s5, s7
700 ; VI-NEXT: s_mov_b32 s6, s2
701 ; VI-NEXT: s_mov_b32 s7, s3
702 ; VI-NEXT: buffer_load_dword v0, off, s[4:7], 0
703 ; VI-NEXT: s_waitcnt vmcnt(0)
704 ; VI-NEXT: v_lshlrev_b32_e32 v0, 31, v0
705 ; VI-NEXT: v_lshrrev_b32_e32 v0, 1, v0
706 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
707 ; VI-NEXT: s_endpgm
217708 %x = load i32, i32 addrspace(1)* %in, align 4
218709 %shl = shl i32 %x, 31
219710 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 1, i32 31)
221712 ret void
222713 }
223714
224 ; GCN-LABEL: {{^}}bfe_u32_test_7:
225 ; GCN: v_lshlrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
226 ; GCN-NOT: {{[^@]}}bfe
227 ; GCN: s_endpgm
228715 define amdgpu_kernel void @bfe_u32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
716 ; SI-LABEL: bfe_u32_test_7:
717 ; SI: ; %bb.0:
718 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
719 ; SI-NEXT: s_mov_b32 s7, 0xf000
720 ; SI-NEXT: s_mov_b32 s6, -1
721 ; SI-NEXT: s_mov_b32 s10, s6
722 ; SI-NEXT: s_mov_b32 s11, s7
723 ; SI-NEXT: s_waitcnt lgkmcnt(0)
724 ; SI-NEXT: s_mov_b32 s8, s2
725 ; SI-NEXT: s_mov_b32 s9, s3
726 ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
727 ; SI-NEXT: s_mov_b32 s4, s0
728 ; SI-NEXT: s_mov_b32 s5, s1
729 ; SI-NEXT: s_waitcnt vmcnt(0)
730 ; SI-NEXT: v_lshlrev_b32_e32 v0, 31, v0
731 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
732 ; SI-NEXT: s_endpgm
733 ;
734 ; VI-LABEL: bfe_u32_test_7:
735 ; VI: ; %bb.0:
736 ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
737 ; VI-NEXT: s_mov_b32 s3, 0xf000
738 ; VI-NEXT: s_mov_b32 s2, -1
739 ; VI-NEXT: s_waitcnt lgkmcnt(0)
740 ; VI-NEXT: s_mov_b32 s0, s4
741 ; VI-NEXT: s_mov_b32 s1, s5
742 ; VI-NEXT: s_mov_b32 s4, s6
743 ; VI-NEXT: s_mov_b32 s5, s7
744 ; VI-NEXT: s_mov_b32 s6, s2
745 ; VI-NEXT: s_mov_b32 s7, s3
746 ; VI-NEXT: buffer_load_dword v0, off, s[4:7], 0
747 ; VI-NEXT: s_waitcnt vmcnt(0)
748 ; VI-NEXT: v_lshlrev_b32_e32 v0, 31, v0
749 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
750 ; VI-NEXT: s_endpgm
229751 %x = load i32, i32 addrspace(1)* %in, align 4
230752 %shl = shl i32 %x, 31
231753 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 31)
233755 ret void
234756 }
235757
236 ; GCN-LABEL: {{^}}bfe_u32_test_8:
237 ; GCN-NOT: {{[^@]}}bfe
238 ; GCN: v_and_b32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}}
239 ; GCN-NOT: {{[^@]}}bfe
240 ; GCN: s_endpgm
241758 define amdgpu_kernel void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
759 ; SI-LABEL: bfe_u32_test_8:
760 ; SI: ; %bb.0:
761 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
762 ; SI-NEXT: s_mov_b32 s7, 0xf000
763 ; SI-NEXT: s_mov_b32 s6, -1
764 ; SI-NEXT: s_mov_b32 s10, s6
765 ; SI-NEXT: s_mov_b32 s11, s7
766 ; SI-NEXT: s_waitcnt lgkmcnt(0)
767 ; SI-NEXT: s_mov_b32 s8, s2
768 ; SI-NEXT: s_mov_b32 s9, s3
769 ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
770 ; SI-NEXT: s_mov_b32 s4, s0
771 ; SI-NEXT: s_mov_b32 s5, s1
772 ; SI-NEXT: s_waitcnt vmcnt(0)
773 ; SI-NEXT: v_and_b32_e32 v0, 1, v0
774 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
775 ; SI-NEXT: s_endpgm
776 ;
777 ; VI-LABEL: bfe_u32_test_8:
778 ; VI: ; %bb.0:
779 ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
780 ; VI-NEXT: s_mov_b32 s3, 0xf000
781 ; VI-NEXT: s_mov_b32 s2, -1
782 ; VI-NEXT: s_waitcnt lgkmcnt(0)
783 ; VI-NEXT: s_mov_b32 s0, s4
784 ; VI-NEXT: s_mov_b32 s1, s5
785 ; VI-NEXT: s_mov_b32 s4, s6
786 ; VI-NEXT: s_mov_b32 s5, s7
787 ; VI-NEXT: s_mov_b32 s6, s2
788 ; VI-NEXT: s_mov_b32 s7, s3
789 ; VI-NEXT: buffer_load_dword v0, off, s[4:7], 0
790 ; VI-NEXT: s_waitcnt vmcnt(0)
791 ; VI-NEXT: v_and_b32_e32 v0, 1, v0
792 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
793 ; VI-NEXT: s_endpgm
242794 %x = load i32, i32 addrspace(1)* %in, align 4
243795 %shl = shl i32 %x, 31
244796 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1)
246798 ret void
247799 }
248800
249 ; GCN-LABEL: {{^}}bfe_u32_test_9:
250 ; GCN-NOT: {{[^@]}}bfe
251 ; GCN: v_lshrrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
252 ; GCN-NOT: {{[^@]}}bfe
253 ; GCN: s_endpgm
254801 define amdgpu_kernel void @bfe_u32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
802 ; SI-LABEL: bfe_u32_test_9:
803 ; SI: ; %bb.0:
804 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
805 ; SI-NEXT: s_mov_b32 s7, 0xf000
806 ; SI-NEXT: s_mov_b32 s6, -1
807 ; SI-NEXT: s_mov_b32 s10, s6
808 ; SI-NEXT: s_mov_b32 s11, s7
809 ; SI-NEXT: s_waitcnt lgkmcnt(0)
810 ; SI-NEXT: s_mov_b32 s8, s2
811 ; SI-NEXT: s_mov_b32 s9, s3
812 ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
813 ; SI-NEXT: s_mov_b32 s4, s0
814 ; SI-NEXT: s_mov_b32 s5, s1
815 ; SI-NEXT: s_waitcnt vmcnt(0)
816 ; SI-NEXT: v_lshrrev_b32_e32 v0, 31, v0
817 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
818 ; SI-NEXT: s_endpgm
819 ;
820 ; VI-LABEL: bfe_u32_test_9:
821 ; VI: ; %bb.0:
822 ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
823 ; VI-NEXT: s_mov_b32 s3, 0xf000
824 ; VI-NEXT: s_mov_b32 s2, -1
825 ; VI-NEXT: s_waitcnt lgkmcnt(0)
826 ; VI-NEXT: s_mov_b32 s0, s4
827 ; VI-NEXT: s_mov_b32 s1, s5
828 ; VI-NEXT: s_mov_b32 s4, s6
829 ; VI-NEXT: s_mov_b32 s5, s7
830 ; VI-NEXT: s_mov_b32 s6, s2
831 ; VI-NEXT: s_mov_b32 s7, s3
832 ; VI-NEXT: buffer_load_dword v0, off, s[4:7], 0
833 ; VI-NEXT: s_waitcnt vmcnt(0)
834 ; VI-NEXT: v_lshrrev_b32_e32 v0, 31, v0
835 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
836 ; VI-NEXT: s_endpgm
255837 %x = load i32, i32 addrspace(1)* %in, align 4
256838 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 31, i32 1)
257839 store i32 %bfe, i32 addrspace(1)* %out, align 4
258840 ret void
259841 }
260842
261 ; GCN-LABEL: {{^}}bfe_u32_test_10:
262 ; GCN-NOT: {{[^@]}}bfe
263 ; GCN: v_lshrrev_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
264 ; GCN-NOT: {{[^@]}}bfe
265 ; GCN: s_endpgm
266843 define amdgpu_kernel void @bfe_u32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
844 ; SI-LABEL: bfe_u32_test_10:
845 ; SI: ; %bb.0:
846 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
847 ; SI-NEXT: s_mov_b32 s7, 0xf000
848 ; SI-NEXT: s_mov_b32 s6, -1
849 ; SI-NEXT: s_mov_b32 s10, s6
850 ; SI-NEXT: s_mov_b32 s11, s7
851 ; SI-NEXT: s_waitcnt lgkmcnt(0)
852 ; SI-NEXT: s_mov_b32 s8, s2
853 ; SI-NEXT: s_mov_b32 s9, s3
854 ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
855 ; SI-NEXT: s_mov_b32 s4, s0
856 ; SI-NEXT: s_mov_b32 s5, s1
857 ; SI-NEXT: s_waitcnt vmcnt(0)
858 ; SI-NEXT: v_lshrrev_b32_e32 v0, 1, v0
859 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
860 ; SI-NEXT: s_endpgm
861 ;
862 ; VI-LABEL: bfe_u32_test_10:
863 ; VI: ; %bb.0:
864 ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
865 ; VI-NEXT: s_mov_b32 s3, 0xf000
866 ; VI-NEXT: s_mov_b32 s2, -1
867 ; VI-NEXT: s_waitcnt lgkmcnt(0)
868 ; VI-NEXT: s_mov_b32 s0, s4
869 ; VI-NEXT: s_mov_b32 s1, s5
870 ; VI-NEXT: s_mov_b32 s4, s6
871 ; VI-NEXT: s_mov_b32 s5, s7
872 ; VI-NEXT: s_mov_b32 s6, s2
873 ; VI-NEXT: s_mov_b32 s7, s3
874 ; VI-NEXT: buffer_load_dword v0, off, s[4:7], 0
875 ; VI-NEXT: s_waitcnt vmcnt(0)
876 ; VI-NEXT: v_lshrrev_b32_e32 v0, 1, v0
877 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
878 ; VI-NEXT: s_endpgm
267879 %x = load i32, i32 addrspace(1)* %in, align 4
268880 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 1, i32 31)
269881 store i32 %bfe, i32 addrspace(1)* %out, align 4
270882 ret void
271883 }
272884
273 ; GCN-LABEL: {{^}}bfe_u32_test_11:
274 ; GCN-NOT: {{[^@]}}bfe
275 ; GCN: v_lshrrev_b32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}}
276 ; GCN-NOT: {{[^@]}}bfe
277 ; GCN: s_endpgm
278885 define amdgpu_kernel void @bfe_u32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
886 ; SI-LABEL: bfe_u32_test_11:
887 ; SI: ; %bb.0:
888 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
889 ; SI-NEXT: s_mov_b32 s7, 0xf000
890 ; SI-NEXT: s_mov_b32 s6, -1
891 ; SI-NEXT: s_mov_b32 s10, s6
892 ; SI-NEXT: s_mov_b32 s11, s7
893 ; SI-NEXT: s_waitcnt lgkmcnt(0)
894 ; SI-NEXT: s_mov_b32 s8, s2
895 ; SI-NEXT: s_mov_b32 s9, s3
896 ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
897 ; SI-NEXT: s_mov_b32 s4, s0
898 ; SI-NEXT: s_mov_b32 s5, s1
899 ; SI-NEXT: s_waitcnt vmcnt(0)
900 ; SI-NEXT: v_lshrrev_b32_e32 v0, 8, v0
901 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
902 ; SI-NEXT: s_endpgm
903 ;
904 ; VI-LABEL: bfe_u32_test_11:
905 ; VI: ; %bb.0:
906 ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
907 ; VI-NEXT: s_mov_b32 s3, 0xf000
908 ; VI-NEXT: s_mov_b32 s2, -1
909 ; VI-NEXT: s_waitcnt lgkmcnt(0)
910 ; VI-NEXT: s_mov_b32 s0, s4
911 ; VI-NEXT: s_mov_b32 s1, s5
912 ; VI-NEXT: s_mov_b32 s4, s6
913 ; VI-NEXT: s_mov_b32 s5, s7
914 ; VI-NEXT: s_mov_b32 s6, s2
915 ; VI-NEXT: s_mov_b32 s7, s3
916 ; VI-NEXT: buffer_load_dword v0, off, s[4:7], 0
917 ; VI-NEXT: s_waitcnt vmcnt(0)
918 ; VI-NEXT: v_lshrrev_b32_e32 v0, 8, v0
919 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
920 ; VI-NEXT: s_endpgm
279921 %x = load i32, i32 addrspace(1)* %in, align 4
280922 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 8, i32 24)
281923 store i32 %bfe, i32 addrspace(1)* %out, align 4
282924 ret void
283925 }
284926
285 ; GCN-LABEL: {{^}}bfe_u32_test_12:
286 ; GCN-NOT: {{[^@]}}bfe
287 ; GCN: v_lshrrev_b32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}}
288 ; GCN-NOT: {{[^@]}}bfe
289 ; GCN: s_endpgm
290927 define amdgpu_kernel void @bfe_u32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
928 ; SI-LABEL: bfe_u32_test_12:
929 ; SI: ; %bb.0:
930 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
931 ; SI-NEXT: s_mov_b32 s7, 0xf000
932 ; SI-NEXT: s_mov_b32 s6, -1
933 ; SI-NEXT: s_mov_b32 s10, s6
934 ; SI-NEXT: s_mov_b32 s11, s7
935 ; SI-NEXT: s_waitcnt lgkmcnt(0)
936 ; SI-NEXT: s_mov_b32 s8, s2
937 ; SI-NEXT: s_mov_b32 s9, s3
938 ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
939 ; SI-NEXT: s_mov_b32 s4, s0
940 ; SI-NEXT: s_mov_b32 s5, s1
941 ; SI-NEXT: s_waitcnt vmcnt(0)
942 ; SI-NEXT: v_lshrrev_b32_e32 v0, 24, v0
943 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
944 ; SI-NEXT: s_endpgm
945 ;
946 ; VI-LABEL: bfe_u32_test_12:
947 ; VI: ; %bb.0:
948 ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
949 ; VI-NEXT: s_mov_b32 s3, 0xf000
950 ; VI-NEXT: s_mov_b32 s2, -1
951 ; VI-NEXT: s_waitcnt lgkmcnt(0)
952 ; VI-NEXT: s_mov_b32 s0, s4
953 ; VI-NEXT: s_mov_b32 s1, s5
954 ; VI-NEXT: s_mov_b32 s4, s6
955 ; VI-NEXT: s_mov_b32 s5, s7
956 ; VI-NEXT: s_mov_b32 s6, s2
957 ; VI-NEXT: s_mov_b32 s7, s3
958 ; VI-NEXT: buffer_load_dword v0, off, s[4:7], 0
959 ; VI-NEXT: s_waitcnt vmcnt(0)
960 ; VI-NEXT: v_lshrrev_b32_e32 v0, 24, v0
961 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
962 ; VI-NEXT: s_endpgm
291963 %x = load i32, i32 addrspace(1)* %in, align 4
292964 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 24, i32 8)
293965 store i32 %bfe, i32 addrspace(1)* %out, align 4
294966 ret void
295967 }
296968
297 ; GCN-LABEL: {{^}}bfe_u32_test_13:
298969 ; V_ASHRREV_U32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}}
299 ; GCN-NOT: {{[^@]}}bfe
300 ; GCN: s_endpgm
301970 define amdgpu_kernel void @bfe_u32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
971 ; SI-LABEL: bfe_u32_test_13:
972 ; SI: ; %bb.0:
973 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
974 ; SI-NEXT: s_mov_b32 s7, 0xf000
975 ; SI-NEXT: s_mov_b32 s6, -1
976 ; SI-NEXT: s_mov_b32 s10, s6
977 ; SI-NEXT: s_mov_b32 s11, s7
978 ; SI-NEXT: s_waitcnt lgkmcnt(0)
979 ; SI-NEXT: s_mov_b32 s8, s2
980 ; SI-NEXT: s_mov_b32 s9, s3
981 ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
982 ; SI-NEXT: s_mov_b32 s4, s0
983 ; SI-NEXT: s_mov_b32 s5, s1
984 ; SI-NEXT: s_waitcnt vmcnt(0)
985 ; SI-NEXT: v_lshrrev_b32_e32 v0, 31, v0
986 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
987 ; SI-NEXT: s_endpgm
988 ;
989 ; VI-LABEL: bfe_u32_test_13:
990 ; VI: ; %bb.0:
991 ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
992 ; VI-NEXT: s_mov_b32 s3, 0xf000
993 ; VI-NEXT: s_mov_b32 s2, -1
994 ; VI-NEXT: s_waitcnt lgkmcnt(0)
995 ; VI-NEXT: s_mov_b32 s0, s4
996 ; VI-NEXT: s_mov_b32 s1, s5
997 ; VI-NEXT: s_mov_b32 s4, s6
998 ; VI-NEXT: s_mov_b32 s5, s7
999 ; VI-NEXT: s_mov_b32 s6, s2
1000 ; VI-NEXT: s_mov_b32 s7, s3
1001 ; VI-NEXT: buffer_load_dword v0, off, s[4:7], 0
1002 ; VI-NEXT: s_waitcnt vmcnt(0)
1003 ; VI-NEXT: v_lshrrev_b32_e32 v0, 31, v0
1004 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1005 ; VI-NEXT: s_endpgm
3021006 %x = load i32, i32 addrspace(1)* %in, align 4
3031007 %shl = ashr i32 %x, 31
3041008 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1)
3051009 store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
3061010 }
3071011
308 ; GCN-LABEL: {{^}}bfe_u32_test_14:
309 ; GCN-NOT: lshr
310 ; GCN-NOT: {{[^@]}}bfe
311 ; GCN: s_endpgm
3121012 define amdgpu_kernel void @bfe_u32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
1013 ; SI-LABEL: bfe_u32_test_14:
1014 ; SI: ; %bb.0:
1015 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
1016 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1017 ; SI-NEXT: s_mov_b32 s3, 0xf000
1018 ; SI-NEXT: s_mov_b32 s2, -1
1019 ; SI-NEXT: v_mov_b32_e32 v0, 0
1020 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1021 ; SI-NEXT: s_endpgm
1022 ;
1023 ; VI-LABEL: bfe_u32_test_14:
1024 ; VI: ; %bb.0:
1025 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
1026 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1027 ; VI-NEXT: s_mov_b32 s3, 0xf000
1028 ; VI-NEXT: s_mov_b32 s2, -1
1029 ; VI-NEXT: v_mov_b32_e32 v0, 0
1030 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1031 ; VI-NEXT: s_endpgm
3131032 %x = load i32, i32 addrspace(1)* %in, align 4
3141033 %shl = lshr i32 %x, 31
3151034 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1)
3161035 store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
3171036 }
3181037
319 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_0:
320 ; GCN-NOT: {{[^@]}}bfe
321 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
322 ; GCN: buffer_store_dword [[VREG]],
323 ; GCN: s_endpgm
3241038 ; EG-NOT: BFE
3251039 define amdgpu_kernel void @bfe_u32_constant_fold_test_0(i32 addrspace(1)* %out) #0 {
1040 ; SI-LABEL: bfe_u32_constant_fold_test_0:
1041 ; SI: ; %bb.0:
1042 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1043 ; SI-NEXT: s_mov_b32 s3, 0xf000
1044 ; SI-NEXT: s_mov_b32 s2, -1
1045 ; SI-NEXT: v_mov_b32_e32 v0, 0
1046 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1047 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1048 ; SI-NEXT: s_endpgm
1049 ;
1050 ; VI-LABEL: bfe_u32_constant_fold_test_0:
1051 ; VI: ; %bb.0:
1052 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1053 ; VI-NEXT: s_mov_b32 s3, 0xf000
1054 ; VI-NEXT: s_mov_b32 s2, -1
1055 ; VI-NEXT: v_mov_b32_e32 v0, 0
1056 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1057 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1058 ; VI-NEXT: s_endpgm
3261059 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 0, i32 0, i32 0)
3271060 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
3281061 ret void
3291062 }
3301063
331 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_1:
332 ; GCN-NOT: {{[^@]}}bfe
333 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
334 ; GCN: buffer_store_dword [[VREG]],
335 ; GCN: s_endpgm
3361064 ; EG-NOT: BFE
3371065 define amdgpu_kernel void @bfe_u32_constant_fold_test_1(i32 addrspace(1)* %out) #0 {
1066 ; SI-LABEL: bfe_u32_constant_fold_test_1:
1067 ; SI: ; %bb.0:
1068 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1069 ; SI-NEXT: s_mov_b32 s3, 0xf000
1070 ; SI-NEXT: s_mov_b32 s2, -1
1071 ; SI-NEXT: v_mov_b32_e32 v0, 0
1072 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1073 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1074 ; SI-NEXT: s_endpgm
1075 ;
1076 ; VI-LABEL: bfe_u32_constant_fold_test_1:
1077 ; VI: ; %bb.0:
1078 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1079 ; VI-NEXT: s_mov_b32 s3, 0xf000
1080 ; VI-NEXT: s_mov_b32 s2, -1
1081 ; VI-NEXT: v_mov_b32_e32 v0, 0
1082 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1083 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1084 ; VI-NEXT: s_endpgm
3381085 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 12334, i32 0, i32 0)
3391086 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
3401087 ret void
3411088 }
3421089
343 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_2:
344 ; GCN-NOT: {{[^@]}}bfe
345 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
346 ; GCN: buffer_store_dword [[VREG]],
347 ; GCN: s_endpgm
3481090 ; EG-NOT: BFE
3491091 define amdgpu_kernel void @bfe_u32_constant_fold_test_2(i32 addrspace(1)* %out) #0 {
1092 ; SI-LABEL: bfe_u32_constant_fold_test_2:
1093 ; SI: ; %bb.0:
1094 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1095 ; SI-NEXT: s_mov_b32 s3, 0xf000
1096 ; SI-NEXT: s_mov_b32 s2, -1
1097 ; SI-NEXT: v_mov_b32_e32 v0, 0
1098 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1099 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1100 ; SI-NEXT: s_endpgm
1101 ;
1102 ; VI-LABEL: bfe_u32_constant_fold_test_2:
1103 ; VI: ; %bb.0:
1104 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1105 ; VI-NEXT: s_mov_b32 s3, 0xf000
1106 ; VI-NEXT: s_mov_b32 s2, -1
1107 ; VI-NEXT: v_mov_b32_e32 v0, 0
1108 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1109 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1110 ; VI-NEXT: s_endpgm
3501111 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 0, i32 0, i32 1)
3511112 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
3521113 ret void
3531114 }
3541115
355 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_3:
356 ; GCN-NOT: {{[^@]}}bfe
357 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
358 ; GCN: buffer_store_dword [[VREG]],
359 ; GCN: s_endpgm
3601116 ; EG-NOT: BFE
3611117 define amdgpu_kernel void @bfe_u32_constant_fold_test_3(i32 addrspace(1)* %out) #0 {
1118 ; SI-LABEL: bfe_u32_constant_fold_test_3:
1119 ; SI: ; %bb.0:
1120 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1121 ; SI-NEXT: s_mov_b32 s3, 0xf000
1122 ; SI-NEXT: s_mov_b32 s2, -1
1123 ; SI-NEXT: v_mov_b32_e32 v0, 1
1124 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1125 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1126 ; SI-NEXT: s_endpgm
1127 ;
1128 ; VI-LABEL: bfe_u32_constant_fold_test_3:
1129 ; VI: ; %bb.0:
1130 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1131 ; VI-NEXT: s_mov_b32 s3, 0xf000
1132 ; VI-NEXT: s_mov_b32 s2, -1
1133 ; VI-NEXT: v_mov_b32_e32 v0, 1
1134 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1135 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1136 ; VI-NEXT: s_endpgm
3621137 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 1, i32 0, i32 1)
3631138 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
3641139 ret void
3651140 }
3661141
367 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_4:
368 ; GCN-NOT: {{[^@]}}bfe
369 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], -1
370 ; GCN: buffer_store_dword [[VREG]],
371 ; GCN: s_endpgm
3721142 ; EG-NOT: BFE
3731143 define amdgpu_kernel void @bfe_u32_constant_fold_test_4(i32 addrspace(1)* %out) #0 {
1144 ; SI-LABEL: bfe_u32_constant_fold_test_4:
1145 ; SI: ; %bb.0:
1146 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1147 ; SI-NEXT: s_mov_b32 s3, 0xf000
1148 ; SI-NEXT: s_mov_b32 s2, -1
1149 ; SI-NEXT: v_mov_b32_e32 v0, -1
1150 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1151 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1152 ; SI-NEXT: s_endpgm
1153 ;
1154 ; VI-LABEL: bfe_u32_constant_fold_test_4:
1155 ; VI: ; %bb.0:
1156 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1157 ; VI-NEXT: s_mov_b32 s3, 0xf000
1158 ; VI-NEXT: s_mov_b32 s2, -1
1159 ; VI-NEXT: v_mov_b32_e32 v0, -1
1160 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1161 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1162 ; VI-NEXT: s_endpgm
3741163 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 4294967295, i32 0, i32 1)
3751164 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
3761165 ret void
3771166 }
3781167
379 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_5:
380 ; GCN-NOT: {{[^@]}}bfe
381 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
382 ; GCN: buffer_store_dword [[VREG]],
383 ; GCN: s_endpgm
3841168 ; EG-NOT: BFE
3851169 define amdgpu_kernel void @bfe_u32_constant_fold_test_5(i32 addrspace(1)* %out) #0 {
1170 ; SI-LABEL: bfe_u32_constant_fold_test_5:
1171 ; SI: ; %bb.0:
1172 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1173 ; SI-NEXT: s_mov_b32 s3, 0xf000
1174 ; SI-NEXT: s_mov_b32 s2, -1
1175 ; SI-NEXT: v_mov_b32_e32 v0, 1
1176 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1177 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1178 ; SI-NEXT: s_endpgm
1179 ;
1180 ; VI-LABEL: bfe_u32_constant_fold_test_5:
1181 ; VI: ; %bb.0:
1182 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1183 ; VI-NEXT: s_mov_b32 s3, 0xf000
1184 ; VI-NEXT: s_mov_b32 s2, -1
1185 ; VI-NEXT: v_mov_b32_e32 v0, 1
1186 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1187 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1188 ; VI-NEXT: s_endpgm
3861189 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 128, i32 7, i32 1)
3871190 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
3881191 ret void
3891192 }
3901193
391 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_6:
392 ; GCN-NOT: {{[^@]}}bfe
393 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x80
394 ; GCN: buffer_store_dword [[VREG]],
395 ; GCN: s_endpgm
3961194 ; EG-NOT: BFE
3971195 define amdgpu_kernel void @bfe_u32_constant_fold_test_6(i32 addrspace(1)* %out) #0 {
1196 ; SI-LABEL: bfe_u32_constant_fold_test_6:
1197 ; SI: ; %bb.0:
1198 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1199 ; SI-NEXT: s_mov_b32 s3, 0xf000
1200 ; SI-NEXT: s_mov_b32 s2, -1
1201 ; SI-NEXT: v_mov_b32_e32 v0, 0x80
1202 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1203 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1204 ; SI-NEXT: s_endpgm
1205 ;
1206 ; VI-LABEL: bfe_u32_constant_fold_test_6:
1207 ; VI: ; %bb.0:
1208 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1209 ; VI-NEXT: s_mov_b32 s3, 0xf000
1210 ; VI-NEXT: s_mov_b32 s2, -1
1211 ; VI-NEXT: v_mov_b32_e32 v0, 0x80
1212 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1213 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1214 ; VI-NEXT: s_endpgm
3981215 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 128, i32 0, i32 8)
3991216 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
4001217 ret void
4011218 }
4021219
403 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_7:
404 ; GCN-NOT: {{[^@]}}bfe
405 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
406 ; GCN: buffer_store_dword [[VREG]],
407 ; GCN: s_endpgm
4081220 ; EG-NOT: BFE
4091221 define amdgpu_kernel void @bfe_u32_constant_fold_test_7(i32 addrspace(1)* %out) #0 {
1222 ; SI-LABEL: bfe_u32_constant_fold_test_7:
1223 ; SI: ; %bb.0:
1224 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1225 ; SI-NEXT: s_mov_b32 s3, 0xf000
1226 ; SI-NEXT: s_mov_b32 s2, -1
1227 ; SI-NEXT: v_mov_b32_e32 v0, 0x7f
1228 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1229 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1230 ; SI-NEXT: s_endpgm
1231 ;
1232 ; VI-LABEL: bfe_u32_constant_fold_test_7:
1233 ; VI: ; %bb.0:
1234 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1235 ; VI-NEXT: s_mov_b32 s3, 0xf000
1236 ; VI-NEXT: s_mov_b32 s2, -1
1237 ; VI-NEXT: v_mov_b32_e32 v0, 0x7f
1238 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1239 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1240 ; VI-NEXT: s_endpgm
4101241 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 127, i32 0, i32 8)
4111242 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
4121243 ret void
4131244 }
4141245
415 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_8:
416 ; GCN-NOT: {{[^@]}}bfe
417 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
418 ; GCN: buffer_store_dword [[VREG]],
419 ; GCN: s_endpgm
4201246 ; EG-NOT: BFE
4211247 define amdgpu_kernel void @bfe_u32_constant_fold_test_8(i32 addrspace(1)* %out) #0 {
1248 ; SI-LABEL: bfe_u32_constant_fold_test_8:
1249 ; SI: ; %bb.0:
1250 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1251 ; SI-NEXT: s_mov_b32 s3, 0xf000
1252 ; SI-NEXT: s_mov_b32 s2, -1
1253 ; SI-NEXT: v_mov_b32_e32 v0, 1
1254 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1255 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1256 ; SI-NEXT: s_endpgm
1257 ;
1258 ; VI-LABEL: bfe_u32_constant_fold_test_8:
1259 ; VI: ; %bb.0:
1260 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1261 ; VI-NEXT: s_mov_b32 s3, 0xf000
1262 ; VI-NEXT: s_mov_b32 s2, -1
1263 ; VI-NEXT: v_mov_b32_e32 v0, 1
1264 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1265 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1266 ; VI-NEXT: s_endpgm
4221267 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 127, i32 6, i32 8)
4231268 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
4241269 ret void
4251270 }
4261271
427 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_9:
428 ; GCN-NOT: {{[^@]}}bfe
429 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
430 ; GCN: buffer_store_dword [[VREG]],
431 ; GCN: s_endpgm
4321272 ; EG-NOT: BFE
4331273 define amdgpu_kernel void @bfe_u32_constant_fold_test_9(i32 addrspace(1)* %out) #0 {
1274 ; SI-LABEL: bfe_u32_constant_fold_test_9:
1275 ; SI: ; %bb.0:
1276 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1277 ; SI-NEXT: s_mov_b32 s3, 0xf000
1278 ; SI-NEXT: s_mov_b32 s2, -1
1279 ; SI-NEXT: v_mov_b32_e32 v0, 1
1280 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1281 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1282 ; SI-NEXT: s_endpgm
1283 ;
1284 ; VI-LABEL: bfe_u32_constant_fold_test_9:
1285 ; VI: ; %bb.0:
1286 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1287 ; VI-NEXT: s_mov_b32 s3, 0xf000
1288 ; VI-NEXT: s_mov_b32 s2, -1
1289 ; VI-NEXT: v_mov_b32_e32 v0, 1
1290 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1291 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1292 ; VI-NEXT: s_endpgm
4341293 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 65536, i32 16, i32 8)
4351294 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
4361295 ret void
4371296 }
4381297
439 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_10:
440 ; GCN-NOT: {{[^@]}}bfe
441 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
442 ; GCN: buffer_store_dword [[VREG]],
443 ; GCN: s_endpgm
4441298 ; EG-NOT: BFE
4451299 define amdgpu_kernel void @bfe_u32_constant_fold_test_10(i32 addrspace(1)* %out) #0 {
1300 ; SI-LABEL: bfe_u32_constant_fold_test_10:
1301 ; SI: ; %bb.0:
1302 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1303 ; SI-NEXT: s_mov_b32 s3, 0xf000
1304 ; SI-NEXT: s_mov_b32 s2, -1
1305 ; SI-NEXT: v_mov_b32_e32 v0, 0
1306 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1307 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1308 ; SI-NEXT: s_endpgm
1309 ;
1310 ; VI-LABEL: bfe_u32_constant_fold_test_10:
1311 ; VI: ; %bb.0:
1312 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1313 ; VI-NEXT: s_mov_b32 s3, 0xf000
1314 ; VI-NEXT: s_mov_b32 s2, -1
1315 ; VI-NEXT: v_mov_b32_e32 v0, 0
1316 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1317 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1318 ; VI-NEXT: s_endpgm
4461319 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 65535, i32 16, i32 16)
4471320 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
4481321 ret void
4491322 }
4501323
451 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_11:
452 ; GCN-NOT: {{[^@]}}bfe
453 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 10
454 ; GCN: buffer_store_dword [[VREG]],
455 ; GCN: s_endpgm
4561324 ; EG-NOT: BFE
4571325 define amdgpu_kernel void @bfe_u32_constant_fold_test_11(i32 addrspace(1)* %out) #0 {
1326 ; SI-LABEL: bfe_u32_constant_fold_test_11:
1327 ; SI: ; %bb.0:
1328 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1329 ; SI-NEXT: s_mov_b32 s3, 0xf000
1330 ; SI-NEXT: s_mov_b32 s2, -1
1331 ; SI-NEXT: v_mov_b32_e32 v0, 10
1332 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1333 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1334 ; SI-NEXT: s_endpgm
1335 ;
1336 ; VI-LABEL: bfe_u32_constant_fold_test_11:
1337 ; VI: ; %bb.0:
1338 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1339 ; VI-NEXT: s_mov_b32 s3, 0xf000
1340 ; VI-NEXT: s_mov_b32 s2, -1
1341 ; VI-NEXT: v_mov_b32_e32 v0, 10
1342 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1343 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1344 ; VI-NEXT: s_endpgm
4581345 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 4, i32 4)
4591346 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
4601347 ret void
4611348 }
4621349
463 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_12:
464 ; GCN-NOT: {{[^@]}}bfe
465 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
466 ; GCN: buffer_store_dword [[VREG]],
467 ; GCN: s_endpgm
4681350 ; EG-NOT: BFE
4691351 define amdgpu_kernel void @bfe_u32_constant_fold_test_12(i32 addrspace(1)* %out) #0 {
1352 ; SI-LABEL: bfe_u32_constant_fold_test_12:
1353 ; SI: ; %bb.0:
1354 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1355 ; SI-NEXT: s_mov_b32 s3, 0xf000
1356 ; SI-NEXT: s_mov_b32 s2, -1
1357 ; SI-NEXT: v_mov_b32_e32 v0, 0
1358 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1359 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1360 ; SI-NEXT: s_endpgm
1361 ;
1362 ; VI-LABEL: bfe_u32_constant_fold_test_12:
1363 ; VI: ; %bb.0:
1364 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1365 ; VI-NEXT: s_mov_b32 s3, 0xf000
1366 ; VI-NEXT: s_mov_b32 s2, -1
1367 ; VI-NEXT: v_mov_b32_e32 v0, 0
1368 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1369 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1370 ; VI-NEXT: s_endpgm
4701371 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 31, i32 1)
4711372 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
4721373 ret void
4731374 }
4741375
475 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_13:
476 ; GCN-NOT: {{[^@]}}bfe
477 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
478 ; GCN: buffer_store_dword [[VREG]],
479 ; GCN: s_endpgm
4801376 ; EG-NOT: BFE
4811377 define amdgpu_kernel void @bfe_u32_constant_fold_test_13(i32 addrspace(1)* %out) #0 {
1378 ; SI-LABEL: bfe_u32_constant_fold_test_13:
1379 ; SI: ; %bb.0:
1380 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1381 ; SI-NEXT: s_mov_b32 s3, 0xf000
1382 ; SI-NEXT: s_mov_b32 s2, -1
1383 ; SI-NEXT: v_mov_b32_e32 v0, 1
1384 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1385 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1386 ; SI-NEXT: s_endpgm
1387 ;
1388 ; VI-LABEL: bfe_u32_constant_fold_test_13:
1389 ; VI: ; %bb.0:
1390 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1391 ; VI-NEXT: s_mov_b32 s3, 0xf000
1392 ; VI-NEXT: s_mov_b32 s2, -1
1393 ; VI-NEXT: v_mov_b32_e32 v0, 1
1394 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1395 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1396 ; VI-NEXT: s_endpgm
4821397 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 131070, i32 16, i32 16)
4831398 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
4841399 ret void
4851400 }
4861401
487 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_14:
488 ; GCN-NOT: {{[^@]}}bfe
489 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 40
490 ; GCN: buffer_store_dword [[VREG]],
491 ; GCN: s_endpgm
4921402 ; EG-NOT: BFE
4931403 define amdgpu_kernel void @bfe_u32_constant_fold_test_14(i32 addrspace(1)* %out) #0 {
1404 ; SI-LABEL: bfe_u32_constant_fold_test_14:
1405 ; SI: ; %bb.0:
1406 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1407 ; SI-NEXT: s_mov_b32 s3, 0xf000
1408 ; SI-NEXT: s_mov_b32 s2, -1
1409 ; SI-NEXT: v_mov_b32_e32 v0, 40
1410 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1411 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1412 ; SI-NEXT: s_endpgm
1413 ;
1414 ; VI-LABEL: bfe_u32_constant_fold_test_14:
1415 ; VI: ; %bb.0:
1416 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1417 ; VI-NEXT: s_mov_b32 s3, 0xf000
1418 ; VI-NEXT: s_mov_b32 s2, -1
1419 ; VI-NEXT: v_mov_b32_e32 v0, 40
1420 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1421 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1422 ; VI-NEXT: s_endpgm
4941423 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 2, i32 30)
4951424 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
4961425 ret void
4971426 }
4981427
499 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_15:
500 ; GCN-NOT: {{[^@]}}bfe
501 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 10
502 ; GCN: buffer_store_dword [[VREG]],
503 ; GCN: s_endpgm
5041428 ; EG-NOT: BFE
5051429 define amdgpu_kernel void @bfe_u32_constant_fold_test_15(i32 addrspace(1)* %out) #0 {
1430 ; SI-LABEL: bfe_u32_constant_fold_test_15:
1431 ; SI: ; %bb.0:
1432 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1433 ; SI-NEXT: s_mov_b32 s3, 0xf000
1434 ; SI-NEXT: s_mov_b32 s2, -1
1435 ; SI-NEXT: v_mov_b32_e32 v0, 10
1436 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1437 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1438 ; SI-NEXT: s_endpgm
1439 ;
1440 ; VI-LABEL: bfe_u32_constant_fold_test_15:
1441 ; VI: ; %bb.0:
1442 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1443 ; VI-NEXT: s_mov_b32 s3, 0xf000
1444 ; VI-NEXT: s_mov_b32 s2, -1
1445 ; VI-NEXT: v_mov_b32_e32 v0, 10
1446 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1447 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1448 ; VI-NEXT: s_endpgm
5061449 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 4, i32 28)
5071450 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
5081451 ret void
5091452 }
5101453
511 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_16:
512 ; GCN-NOT: {{[^@]}}bfe
513 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
514 ; GCN: buffer_store_dword [[VREG]],
515 ; GCN: s_endpgm
5161454 ; EG-NOT: BFE
5171455 define amdgpu_kernel void @bfe_u32_constant_fold_test_16(i32 addrspace(1)* %out) #0 {
1456 ; SI-LABEL: bfe_u32_constant_fold_test_16:
1457 ; SI: ; %bb.0:
1458 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1459 ; SI-NEXT: s_mov_b32 s3, 0xf000
1460 ; SI-NEXT: s_mov_b32 s2, -1
1461 ; SI-NEXT: v_mov_b32_e32 v0, 0x7f
1462 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1463 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1464 ; SI-NEXT: s_endpgm
1465 ;
1466 ; VI-LABEL: bfe_u32_constant_fold_test_16:
1467 ; VI: ; %bb.0:
1468 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1469 ; VI-NEXT: s_mov_b32 s3, 0xf000
1470 ; VI-NEXT: s_mov_b32 s2, -1
1471 ; VI-NEXT: v_mov_b32_e32 v0, 0x7f
1472 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1473 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1474 ; VI-NEXT: s_endpgm
5181475 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 4294967295, i32 1, i32 7)
5191476 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
5201477 ret void
5211478 }
5221479
523 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_17:
524 ; GCN-NOT: {{[^@]}}bfe
525 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
526 ; GCN: buffer_store_dword [[VREG]],
527 ; GCN: s_endpgm
5281480 ; EG-NOT: BFE
5291481 define amdgpu_kernel void @bfe_u32_constant_fold_test_17(i32 addrspace(1)* %out) #0 {
1482 ; SI-LABEL: bfe_u32_constant_fold_test_17:
1483 ; SI: ; %bb.0:
1484 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1485 ; SI-NEXT: s_mov_b32 s3, 0xf000
1486 ; SI-NEXT: s_mov_b32 s2, -1
1487 ; SI-NEXT: v_mov_b32_e32 v0, 0x7f
1488 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1489 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1490 ; SI-NEXT: s_endpgm
1491 ;
1492 ; VI-LABEL: bfe_u32_constant_fold_test_17:
1493 ; VI: ; %bb.0:
1494 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1495 ; VI-NEXT: s_mov_b32 s3, 0xf000
1496 ; VI-NEXT: s_mov_b32 s2, -1
1497 ; VI-NEXT: v_mov_b32_e32 v0, 0x7f
1498 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1499 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1500 ; VI-NEXT: s_endpgm
5301501 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 255, i32 1, i32 31)
5311502 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
5321503 ret void
5331504 }
5341505
535 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_18:
536 ; GCN-NOT: {{[^@]}}bfe
537 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
538 ; GCN: buffer_store_dword [[VREG]],
539 ; GCN: s_endpgm
5401506 ; EG-NOT: BFE
5411507 define amdgpu_kernel void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out) #0 {
1508 ; SI-LABEL: bfe_u32_constant_fold_test_18:
1509 ; SI: ; %bb.0:
1510 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1511 ; SI-NEXT: s_mov_b32 s3, 0xf000
1512 ; SI-NEXT: s_mov_b32 s2, -1
1513 ; SI-NEXT: v_mov_b32_e32 v0, 0
1514 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1515 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1516 ; SI-NEXT: s_endpgm
1517 ;
1518 ; VI-LABEL: bfe_u32_constant_fold_test_18:
1519 ; VI: ; %bb.0:
1520 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1521 ; VI-NEXT: s_mov_b32 s3, 0xf000
1522 ; VI-NEXT: s_mov_b32 s2, -1
1523 ; VI-NEXT: v_mov_b32_e32 v0, 0
1524 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1525 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1526 ; VI-NEXT: s_endpgm
5421527 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 255, i32 31, i32 1)
5431528 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
5441529 ret void
5481533 ; reduced to the bits demanded by the bfe.
5491534
5501535 ; XXX: The operand to v_bfe_u32 could also just directly be the load register.
551 ; GCN-LABEL: {{^}}simplify_bfe_u32_multi_use_arg:
552 ; GCN: buffer_load_dword [[ARG:v[0-9]+]]
553 ; GCN: v_and_b32_e32 [[AND:v[0-9]+]], 63, [[ARG]]
554 ; GCN: v_bfe_u32 [[BFE:v[0-9]+]], [[AND]], 2, 2
555 ; GCN-DAG: buffer_store_dword [[AND]]
556 ; GCN-DAG: buffer_store_dword [[BFE]]
557 ; GCN: s_endpgm
5581536 define amdgpu_kernel void @simplify_bfe_u32_multi_use_arg(i32 addrspace(1)* %out0,
1537 ; SI-LABEL: simplify_bfe_u32_multi_use_arg:
1538 ; SI: ; %bb.0:
1539 ; SI-NEXT: s_mov_b32 s3, 0xf000
1540 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd
1541 ; SI-NEXT: s_mov_b32 s2, -1
1542 ; SI-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x9
1543 ; SI-NEXT: s_mov_b32 s6, s2
1544 ; SI-NEXT: s_mov_b32 s7, s3
1545 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1546 ; SI-NEXT: buffer_load_dword v0, off, s[4:7], 0
1547 ; SI-NEXT: s_mov_b32 s0, s10
1548 ; SI-NEXT: s_mov_b32 s1, s11
1549 ; SI-NEXT: s_mov_b32 s10, s2
1550 ; SI-NEXT: s_mov_b32 s11, s3
1551 ; SI-NEXT: s_waitcnt vmcnt(0)
1552 ; SI-NEXT: v_and_b32_e32 v0, 63, v0
1553 ; SI-NEXT: v_bfe_u32 v1, v0, 2, 2
1554 ; SI-NEXT: buffer_store_dword v1, off, s[8:11], 0
1555 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1556 ; SI-NEXT: s_endpgm
1557 ;
1558 ; VI-LABEL: simplify_bfe_u32_multi_use_arg:
1559 ; VI: ; %bb.0:
1560 ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
1561 ; VI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x34
1562 ; VI-NEXT: s_mov_b32 s3, 0xf000
1563 ; VI-NEXT: s_mov_b32 s2, -1
1564 ; VI-NEXT: s_mov_b32 s10, s2
1565 ; VI-NEXT: s_mov_b32 s11, s3
1566 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1567 ; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0
1568 ; VI-NEXT: s_mov_b32 s0, s6
1569 ; VI-NEXT: s_mov_b32 s1, s7
1570 ; VI-NEXT: s_mov_b32 s6, s2
1571 ; VI-NEXT: s_mov_b32 s7, s3
1572 ; VI-NEXT: s_waitcnt vmcnt(0)
1573 ; VI-NEXT: v_and_b32_e32 v0, 63, v0
1574 ; VI-NEXT: v_bfe_u32 v1, v0, 2, 2
1575 ; VI-NEXT: buffer_store_dword v1, off, s[4:7], 0
1576 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1577 ; VI-NEXT: s_endpgm
5591578 i32 addrspace(1)* %out1,
5601579 i32 addrspace(1)* %in) #0 {
5611580 %src = load i32, i32 addrspace(1)* %in, align 4
5661585 ret void
5671586 }
5681587
569 ; GCN-LABEL: {{^}}lshr_and:
570 ; GCN: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x30006
571 ; GCN: buffer_store_dword
5721588 define amdgpu_kernel void @lshr_and(i32 addrspace(1)* %out, i32 %a) #0 {
1589 ; SI-LABEL: lshr_and:
1590 ; SI: ; %bb.0:
1591 ; SI-NEXT: s_load_dword s2, s[0:1], 0xb
1592 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
1593 ; SI-NEXT: s_mov_b32 s7, 0xf000
1594 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1595 ; SI-NEXT: s_bfe_u32 s0, s2, 0x30006
1596 ; SI-NEXT: s_mov_b32 s6, -1
1597 ; SI-NEXT: v_mov_b32_e32 v0, s0
1598 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
1599 ; SI-NEXT: s_endpgm
1600 ;
1601 ; VI-LABEL: lshr_and:
1602 ; VI: ; %bb.0:
1603 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
1604 ; VI-NEXT: s_load_dword s0, s[0:1], 0x2c
1605 ; VI-NEXT: s_mov_b32 s7, 0xf000
1606 ; VI-NEXT: s_mov_b32 s6, -1
1607 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1608 ; VI-NEXT: s_bfe_u32 s0, s0, 0x30006
1609 ; VI-NEXT: v_mov_b32_e32 v0, s0
1610 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
1611 ; VI-NEXT: s_endpgm
5731612 %b = lshr i32 %a, 6
5741613 %c = and i32 %b, 7
5751614 store i32 %c, i32 addrspace(1)* %out, align 8
5761615 ret void
5771616 }
5781617
579 ; GCN-LABEL: {{^}}v_lshr_and:
580 ; GCN: v_bfe_u32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}, 3
581 ; GCN: buffer_store_dword
5821618 define amdgpu_kernel void @v_lshr_and(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
1619 ; SI-LABEL: v_lshr_and:
1620 ; SI: ; %bb.0:
1621 ; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb
1622 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
1623 ; SI-NEXT: s_mov_b32 s7, 0xf000
1624 ; SI-NEXT: s_mov_b32 s6, -1
1625 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1626 ; SI-NEXT: v_mov_b32_e32 v0, s3
1627 ; SI-NEXT: v_bfe_u32 v0, s2, v0, 3
1628 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
1629 ; SI-NEXT: s_endpgm
1630 ;
1631 ; VI-LABEL: v_lshr_and:
1632 ; VI: ; %bb.0:
1633 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
1634 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c
1635 ; VI-NEXT: s_mov_b32 s7, 0xf000
1636 ; VI-NEXT: s_mov_b32 s6, -1
1637 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1638 ; VI-NEXT: v_mov_b32_e32 v0, s1
1639 ; VI-NEXT: v_bfe_u32 v0, s0, v0, 3
1640 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
1641 ; VI-NEXT: s_endpgm
5831642 %c = lshr i32 %a, %b
5841643 %d = and i32 %c, 7
5851644 store i32 %d, i32 addrspace(1)* %out, align 8
5861645 ret void
5871646 }
5881647
589 ; GCN-LABEL: {{^}}and_lshr:
590 ; GCN: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x30006
591 ; GCN: buffer_store_dword
5921648 define amdgpu_kernel void @and_lshr(i32 addrspace(1)* %out, i32 %a) #0 {
1649 ; SI-LABEL: and_lshr:
1650 ; SI: ; %bb.0:
1651 ; SI-NEXT: s_load_dword s2, s[0:1], 0xb
1652 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
1653 ; SI-NEXT: s_mov_b32 s7, 0xf000
1654 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1655 ; SI-NEXT: s_bfe_u32 s0, s2, 0x30006
1656 ; SI-NEXT: s_mov_b32 s6, -1
1657 ; SI-NEXT: v_mov_b32_e32 v0, s0
1658 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
1659 ; SI-NEXT: s_endpgm
1660 ;
1661 ; VI-LABEL: and_lshr:
1662 ; VI: ; %bb.0:
1663 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
1664 ; VI-NEXT: s_load_dword s0, s[0:1], 0x2c
1665 ; VI-NEXT: s_mov_b32 s7, 0xf000
1666 ; VI-NEXT: s_mov_b32 s6, -1
1667 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1668 ; VI-NEXT: s_bfe_u32 s0, s0, 0x30006
1669 ; VI-NEXT: v_mov_b32_e32 v0, s0
1670 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
1671 ; VI-NEXT: s_endpgm
5931672 %b = and i32 %a, 448
5941673 %c = lshr i32 %b, 6
5951674 store i32 %c, i32 addrspace(1)* %out, align 8
5961675 ret void
5971676 }
5981677
599 ; GCN-LABEL: {{^}}and_lshr2:
600 ; GCN: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x30006
601 ; GCN: buffer_store_dword
6021678 define amdgpu_kernel void @and_lshr2(i32 addrspace(1)* %out, i32 %a) #0 {
1679 ; SI-LABEL: and_lshr2:
1680 ; SI: ; %bb.0:
1681 ; SI-NEXT: s_load_dword s2, s[0:1], 0xb
1682 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
1683 ; SI-NEXT: s_mov_b32 s7, 0xf000
1684 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1685 ; SI-NEXT: s_bfe_u32 s0, s2, 0x30006
1686 ; SI-NEXT: s_mov_b32 s6, -1
1687 ; SI-NEXT: v_mov_b32_e32 v0, s0
1688 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
1689 ; SI-NEXT: s_endpgm
1690 ;
1691 ; VI-LABEL: and_lshr2:
1692 ; VI: ; %bb.0:
1693 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
1694 ; VI-NEXT: s_load_dword s0, s[0:1], 0x2c
1695 ; VI-NEXT: s_mov_b32 s7, 0xf000
1696 ; VI-NEXT: s_mov_b32 s6, -1
1697 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1698 ; VI-NEXT: s_bfe_u32 s0, s0, 0x30006
1699 ; VI-NEXT: v_mov_b32_e32 v0, s0
1700 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
1701 ; VI-NEXT: s_endpgm
6031702 %b = and i32 %a, 511
6041703 %c = lshr i32 %b, 6
6051704 store i32 %c, i32 addrspace(1)* %out, align 8
6061705 ret void
6071706 }
6081707
609 ; GCN-LABEL: {{^}}shl_lshr:
610 ; GCN: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x150002
611 ; GCN: buffer_store_dword
6121708 define amdgpu_kernel void @shl_lshr(i32 addrspace(1)* %out, i32 %a) #0 {
1709 ; SI-LABEL: shl_lshr:
1710 ; SI: ; %bb.0:
1711 ; SI-NEXT: s_load_dword s2, s[0:1], 0xb
1712 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
1713 ; SI-NEXT: s_mov_b32 s7, 0xf000
1714 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1715 ; SI-NEXT: s_bfe_u32 s0, s2, 0x150002
1716 ; SI-NEXT: s_mov_b32 s6, -1
1717 ; SI-NEXT: v_mov_b32_e32 v0, s0
1718 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
1719 ; SI-NEXT: s_endpgm
1720 ;
1721 ; VI-LABEL: shl_lshr:
1722 ; VI: ; %bb.0:
1723 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
1724 ; VI-NEXT: s_load_dword s0, s[0:1], 0x2c
1725 ; VI-NEXT: s_mov_b32 s7, 0xf000
1726 ; VI-NEXT: s_mov_b32 s6, -1
1727 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1728 ; VI-NEXT: s_bfe_u32 s0, s0, 0x150002
1729 ; VI-NEXT: v_mov_b32_e32 v0, s0
1730 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
1731 ; VI-NEXT: s_endpgm
6131732 %b = shl i32 %a, 9
6141733 %c = lshr i32 %b, 11
6151734 store i32 %c, i32 addrspace(1)* %out, align 8
0 from __future__ import print_function
1 import re
12 import sys
23
198199 asm = common.SCRUB_TRAILING_WHITESPACE_RE.sub(r'', asm)
199200 return asm
200201
202 def get_triple_from_march(march):
203 triples = {
204 'amdgcn': 'amdgcn',
205 }
206 for prefix, triple in triples.items():
207 if march.startswith(prefix):
208 return triple
209 print("Cannot find a triple. Assume 'x86'", file=sys.stderr)
210 return 'x86'
201211
202212 def build_function_body_dictionary_for_triple(args, raw_tool_output, triple, prefixes, func_dict):
203213 target_handlers = {
8080 if m:
8181 triple_in_cmd = m.groups()[0]
8282
83 march_in_cmd = None
84 m = common.MARCH_ARG_RE.search(llc_cmd)
85 if m:
86 march_in_cmd = m.groups()[0]
87
8388 filecheck_cmd = ''
8489 if len(commands) > 1:
8590 filecheck_cmd = commands[1]
101106
102107 # FIXME: We should use multiple check prefixes to common check lines. For
103108 # now, we just ignore all but the last.
104 run_list.append((check_prefixes, llc_cmd_args, triple_in_cmd))
109 run_list.append((check_prefixes, llc_cmd_args, triple_in_cmd, march_in_cmd))
105110
106111 func_dict = {}
107112 for p in run_list:
108113 prefixes = p[0]
109114 for prefix in prefixes:
110115 func_dict.update({prefix: dict()})
111 for prefixes, llc_args, triple_in_cmd in run_list:
116 for prefixes, llc_args, triple_in_cmd, march_in_cmd in run_list:
112117 if args.verbose:
113118 print('Extracted LLC cmd: llc ' + llc_args, file=sys.stderr)
114119 print('Extracted FileCheck prefixes: ' + str(prefixes), file=sys.stderr)
115120
116121 raw_tool_output = common.invoke_tool(args.llc_binary, llc_args, test)
117 if not (triple_in_cmd or triple_in_ir):
118 print("Cannot find a triple. Assume 'x86'", file=sys.stderr)
122 triple = triple_in_cmd or triple_in_ir
123 if not triple:
124 triple = asm.get_triple_from_march(march_in_cmd)
119125
120126 asm.build_function_body_dictionary_for_triple(args, raw_tool_output,
121 triple_in_cmd or triple_in_ir or 'x86', prefixes, func_dict)
127 triple, prefixes, func_dict)
122128
123129 is_in_function = False
124130 is_in_function_start = False