llvm.org GIT mirror llvm / f799b25
AMDGPU/SI: Add VI patterns to select FLAT instructions for global memory ops Summary: The MUBUF addr64 bit has been removed on VI, so we must use FLAT instructions when the pointer is stored in VGPRs. Reviewers: arsenm Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D11067 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@242673 91177308-0d34-0410-b5e6-96231b3b80d8 Tom Stellard 5 years ago
6 changed file(s) with 283 addition(s) and 93 deletion(s). Raw diff Collapse all Expand all
10281028 SDValue &SLC, SDValue &TFE) const {
10291029 SDValue Ptr, Offen, Idxen, Addr64;
10301030
1031 // addr64 bit was removed for volcanic islands.
1032 if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
1033 return false;
1034
10311035 SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
10321036 GLC, SLC, TFE);
10331037
253253 return false;
254254 }
255255
256 bool SITargetLowering::isLegalFlatAddressingMode(const AddrMode &AM) const {
257 // Flat instructions do not have offsets, and only have the register
258 // address.
259 return AM.BaseOffs == 0 && (AM.Scale == 0 || AM.Scale == 1);
260 }
261
256262 bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
257263 const AddrMode &AM, Type *Ty,
258264 unsigned AS) const {
262268
263269 switch (AS) {
264270 case AMDGPUAS::GLOBAL_ADDRESS:
271 if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
272 // Assume the we will use FLAT for all global memory accesses
273 // on VI.
274 // FIXME: This assumption is currently wrong. On VI we still use
275 // MUBUF instructions for the r + i addressing mode. As currently
276 // implemented, the MUBUF instructions only work on buffer < 4GB.
277 // It may be possible to support > 4GB buffers with MUBUF instructions,
278 // by setting the stride value in the resource descriptor which would
279 // increase the size limit to (stride * 4GB). However, this is risky,
280 // because it has never been validated.
281 return isLegalFlatAddressingMode(AM);
282 }
283 // fall-through
284 case AMDGPUAS::PRIVATE_ADDRESS:
265285 case AMDGPUAS::CONSTANT_ADDRESS: // XXX - Should we assume SMRD instructions?
266 case AMDGPUAS::PRIVATE_ADDRESS:
267286 case AMDGPUAS::UNKNOWN_ADDRESS_SPACE: {
268287 // MUBUF / MTBUF instructions have a 12-bit unsigned byte offset, and
269288 // additionally can do r + r + i with addr64. 32-bit has more addressing
323342
324343 return false;
325344 }
326 case AMDGPUAS::FLAT_ADDRESS: {
327 // Flat instructions do not have offsets, and only have the register
328 // address.
329 return AM.BaseOffs == 0 && (AM.Scale == 0 || AM.Scale == 1);
330 }
345 case AMDGPUAS::FLAT_ADDRESS:
346 return isLegalFlatAddressingMode(AM);
347
331348 default:
332349 llvm_unreachable("unhandled address space");
333350 }
5555 SDValue performMin3Max3Combine(SDNode *N, DAGCombinerInfo &DCI) const;
5656 SDValue performSetCCCombine(SDNode *N, DAGCombinerInfo &DCI) const;
5757
58 bool isLegalFlatAddressingMode(const AddrMode &AM) const;
5859 public:
5960 SITargetLowering(TargetMachine &tm, const AMDGPUSubtarget &STI);
6061
102102 (S_BUFFER_LOAD_DWORD_IMM $sbase, (as_i32imm $offset))
103103 >;
104104
105 // Patterns for global loads with no offset
106 class FlatLoadPat : Pat <
107 (vt (node i64:$addr)),
108 (inst $addr, 0, 0, 0)
109 >;
110
111 def : FlatLoadPat ;
112 def : FlatLoadPat ;
113 def : FlatLoadPat ;
114 def : FlatLoadPat ;
115 def : FlatLoadPat ;
116 def : FlatLoadPat ;
117 def : FlatLoadPat ;
118
119 class FlatStorePat : Pat <
120 (node vt:$data, i64:$addr),
121 (inst $data, $addr, 0, 0, 0)
122 >;
123
124 def : FlatStorePat ;
125 def : FlatStorePat ;
126 def : FlatStorePat ;
127 def : FlatStorePat ;
128 def : FlatStorePat ;
129
130 class FlatAtomicPat : Pat <
131 (vt (node i64:$addr, vt:$data)),
132 (inst $addr, $data, 0, 0)
133 >;
134
135 def : FlatAtomicPat ;
136 def : FlatAtomicPat ;
137 def : FlatAtomicPat ;
138 def : FlatAtomicPat ;
139 def : FlatAtomicPat ;
140 def : FlatAtomicPat ;
141 def : FlatAtomicPat ;
142 def : FlatAtomicPat ;
143 def : FlatAtomicPat ;
144 def : FlatAtomicPat ;
145
146
105147 } // End Predicates = [isVI]
None ; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown < %s | FileCheck -check-prefix=OPT %s
1 ; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN %s
0 ; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-CI %s
1 ; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI %s
2 ; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
3 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
24
35 declare i32 @llvm.r600.read.tidig.x() #0
46
57 ; OPT-LABEL: @test_sink_global_small_offset_i32(
6 ; OPT-NOT: getelementptr i32, i32 addrspace(1)* %in
7 ; OPT: br i1
8 ; OPT: ptrtoint
8 ; OPT-CI-NOT: getelementptr i32, i32 addrspace(1)* %in
9 ; OPT-VI: getelementptr i32, i32 addrspace(1)* %in
10 ; OPT: br i1
11 ; OPT-CI: ptrtoint
912
1013 ; GCN-LABEL: {{^}}test_sink_global_small_offset_i32:
1114 ; GCN: {{^}}BB0_2:
213216 }
214217
215218 ; GCN-LABEL: {{^}}test_sink_global_vreg_sreg_i32:
216 ; GCN: s_and_saveexec_b64
217 ; GCN: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
219 ; VI-DAG: s_movk_i32 flat_scratch_lo, 0x0
220 ; VI-DAG: s_movk_i32 flat_scratch_hi, 0x0
221 ; GCN: s_and_saveexec_b64
222 ; CI: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
223 ; VI: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
218224 ; GCN: {{^}}BB7_2:
219225 define void @test_sink_global_vreg_sreg_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %offset, i32 %cond) {
220226 entry:
None ; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
0 ; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=GCN --check-prefix=SI --check-prefix=FUNC %s
1 ; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=GCN --check-prefix=VI --check-prefix=FUNC %s
2
13
24 ; FUNC-LABEL: {{^}}atomic_add_i32_offset:
3 ; SI: buffer_atomic_add v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
5 ; GCN: buffer_atomic_add v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
46 define void @atomic_add_i32_offset(i32 addrspace(1)* %out, i32 %in) {
57 entry:
68 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
911 }
1012
1113 ; FUNC-LABEL: {{^}}atomic_add_i32_ret_offset:
12 ; SI: buffer_atomic_add [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
13 ; SI: buffer_store_dword [[RET]]
14 ; GCN: buffer_atomic_add [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
15 ; GCN: buffer_store_dword [[RET]]
1416 define void @atomic_add_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
1517 entry:
1618 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
2123
2224 ; FUNC-LABEL: {{^}}atomic_add_i32_addr64_offset:
2325 ; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
26 ; VI: s_movk_i32 flat_scratch_lo, 0x0
27 ; VI: s_movk_i32 flat_scratch_hi, 0x0
28 ; VI: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
29
2430 define void @atomic_add_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
2531 entry:
2632 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
3137
3238 ; FUNC-LABEL: {{^}}atomic_add_i32_ret_addr64_offset:
3339 ; SI: buffer_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
34 ; SI: buffer_store_dword [[RET]]
40 ; VI: s_movk_i32 flat_scratch_lo, 0x0
41 ; VI: s_movk_i32 flat_scratch_hi, 0x0
42 ; VI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
43 ; GCN: buffer_store_dword [[RET]]
3544 define void @atomic_add_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
3645 entry:
3746 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
4251 }
4352
4453 ; FUNC-LABEL: {{^}}atomic_add_i32:
45 ; SI: buffer_atomic_add v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
54 ; GCN: buffer_atomic_add v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
4655 define void @atomic_add_i32(i32 addrspace(1)* %out, i32 %in) {
4756 entry:
4857 %0 = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst
5059 }
5160
5261 ; FUNC-LABEL: {{^}}atomic_add_i32_ret:
53 ; SI: buffer_atomic_add [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
54 ; SI: buffer_store_dword [[RET]]
62 ; GCN: buffer_atomic_add [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
63 ; GCN: buffer_store_dword [[RET]]
5564 define void @atomic_add_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
5665 entry:
5766 %0 = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst
6170
6271 ; FUNC-LABEL: {{^}}atomic_add_i32_addr64:
6372 ; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
73 ; VI: s_movk_i32 flat_scratch_lo, 0x0
74 ; VI: s_movk_i32 flat_scratch_hi, 0x0
75 ; VI: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
6476 define void @atomic_add_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
6577 entry:
6678 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
7082
7183 ; FUNC-LABEL: {{^}}atomic_add_i32_ret_addr64:
7284 ; SI: buffer_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
73 ; SI: buffer_store_dword [[RET]]
85 ; VI: s_movk_i32 flat_scratch_lo, 0x0
86 ; VI: s_movk_i32 flat_scratch_hi, 0x0
87 ; VI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
88 ; GCN: buffer_store_dword [[RET]]
7489 define void @atomic_add_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
7590 entry:
7691 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
8095 }
8196
8297 ; FUNC-LABEL: {{^}}atomic_and_i32_offset:
83 ; SI: buffer_atomic_and v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
98 ; GCN: buffer_atomic_and v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
8499 define void @atomic_and_i32_offset(i32 addrspace(1)* %out, i32 %in) {
85100 entry:
86101 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
89104 }
90105
91106 ; FUNC-LABEL: {{^}}atomic_and_i32_ret_offset:
92 ; SI: buffer_atomic_and [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
93 ; SI: buffer_store_dword [[RET]]
107 ; GCN: buffer_atomic_and [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
108 ; GCN: buffer_store_dword [[RET]]
94109 define void @atomic_and_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
95110 entry:
96111 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
101116
102117 ; FUNC-LABEL: {{^}}atomic_and_i32_addr64_offset:
103118 ; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
119 ; VI: s_movk_i32 flat_scratch_lo, 0x0
120 ; VI: s_movk_i32 flat_scratch_hi, 0x0
121 ; VI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
104122 define void @atomic_and_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
105123 entry:
106124 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
111129
112130 ; FUNC-LABEL: {{^}}atomic_and_i32_ret_addr64_offset:
113131 ; SI: buffer_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
114 ; SI: buffer_store_dword [[RET]]
132 ; VI: s_movk_i32 flat_scratch_lo, 0x0
133 ; VI: s_movk_i32 flat_scratch_hi, 0x0
134 ; VI: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
135 ; GCN: buffer_store_dword [[RET]]
115136 define void @atomic_and_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
116137 entry:
117138 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
122143 }
123144
124145 ; FUNC-LABEL: {{^}}atomic_and_i32:
125 ; SI: buffer_atomic_and v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
146 ; GCN: buffer_atomic_and v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
126147 define void @atomic_and_i32(i32 addrspace(1)* %out, i32 %in) {
127148 entry:
128149 %0 = atomicrmw volatile and i32 addrspace(1)* %out, i32 %in seq_cst
130151 }
131152
132153 ; FUNC-LABEL: {{^}}atomic_and_i32_ret:
133 ; SI: buffer_atomic_and [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
134 ; SI: buffer_store_dword [[RET]]
154 ; GCN: buffer_atomic_and [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
155 ; GCN: buffer_store_dword [[RET]]
135156 define void @atomic_and_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
136157 entry:
137158 %0 = atomicrmw volatile and i32 addrspace(1)* %out, i32 %in seq_cst
141162
142163 ; FUNC-LABEL: {{^}}atomic_and_i32_addr64:
143164 ; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
165 ; VI: s_movk_i32 flat_scratch_lo, 0x0
166 ; VI: s_movk_i32 flat_scratch_hi, 0x0
167 ; VI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
144168 define void @atomic_and_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
145169 entry:
146170 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
150174
151175 ; FUNC-LABEL: {{^}}atomic_and_i32_ret_addr64:
152176 ; SI: buffer_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
153 ; SI: buffer_store_dword [[RET]]
177 ; VI: s_movk_i32 flat_scratch_lo, 0x0
178 ; VI: s_movk_i32 flat_scratch_hi, 0x0
179 ; VI: flat_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
180 ; GCN: buffer_store_dword [[RET]]
154181 define void @atomic_and_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
155182 entry:
156183 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
160187 }
161188
162189 ; FUNC-LABEL: {{^}}atomic_sub_i32_offset:
163 ; SI: buffer_atomic_sub v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
190 ; GCN: buffer_atomic_sub v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
164191 define void @atomic_sub_i32_offset(i32 addrspace(1)* %out, i32 %in) {
165192 entry:
166193 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
169196 }
170197
171198 ; FUNC-LABEL: {{^}}atomic_sub_i32_ret_offset:
172 ; SI: buffer_atomic_sub [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
173 ; SI: buffer_store_dword [[RET]]
199 ; GCN: buffer_atomic_sub [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
200 ; GCN: buffer_store_dword [[RET]]
174201 define void @atomic_sub_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
175202 entry:
176203 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
181208
182209 ; FUNC-LABEL: {{^}}atomic_sub_i32_addr64_offset:
183210 ; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
211 ; VI: s_movk_i32 flat_scratch_lo, 0x0
212 ; VI: s_movk_i32 flat_scratch_hi, 0x0
213 ; VI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
184214 define void @atomic_sub_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
185215 entry:
186216 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
191221
192222 ; FUNC-LABEL: {{^}}atomic_sub_i32_ret_addr64_offset:
193223 ; SI: buffer_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
194 ; SI: buffer_store_dword [[RET]]
224 ; VI: s_movk_i32 flat_scratch_lo, 0x0
225 ; VI: s_movk_i32 flat_scratch_hi, 0x0
226 ; VI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
227 ; GCN: buffer_store_dword [[RET]]
195228 define void @atomic_sub_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
196229 entry:
197230 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
202235 }
203236
204237 ; FUNC-LABEL: {{^}}atomic_sub_i32:
205 ; SI: buffer_atomic_sub v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
238 ; GCN: buffer_atomic_sub v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
206239 define void @atomic_sub_i32(i32 addrspace(1)* %out, i32 %in) {
207240 entry:
208241 %0 = atomicrmw volatile sub i32 addrspace(1)* %out, i32 %in seq_cst
210243 }
211244
212245 ; FUNC-LABEL: {{^}}atomic_sub_i32_ret:
213 ; SI: buffer_atomic_sub [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
214 ; SI: buffer_store_dword [[RET]]
246 ; GCN: buffer_atomic_sub [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
247 ; GCN: buffer_store_dword [[RET]]
215248 define void @atomic_sub_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
216249 entry:
217250 %0 = atomicrmw volatile sub i32 addrspace(1)* %out, i32 %in seq_cst
221254
222255 ; FUNC-LABEL: {{^}}atomic_sub_i32_addr64:
223256 ; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
257 ; VI: s_movk_i32 flat_scratch_lo, 0x0
258 ; VI: s_movk_i32 flat_scratch_hi, 0x0
259 ; VI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
224260 define void @atomic_sub_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
225261 entry:
226262 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
230266
231267 ; FUNC-LABEL: {{^}}atomic_sub_i32_ret_addr64:
232268 ; SI: buffer_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
233 ; SI: buffer_store_dword [[RET]]
269 ; VI: s_movk_i32 flat_scratch_lo, 0x0
270 ; VI: s_movk_i32 flat_scratch_hi, 0x0
271 ; VI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
272 ; GCN: buffer_store_dword [[RET]]
234273 define void @atomic_sub_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
235274 entry:
236275 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
240279 }
241280
242281 ; FUNC-LABEL: {{^}}atomic_max_i32_offset:
243 ; SI: buffer_atomic_smax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
282 ; GCN: buffer_atomic_smax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
244283 define void @atomic_max_i32_offset(i32 addrspace(1)* %out, i32 %in) {
245284 entry:
246285 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
249288 }
250289
251290 ; FUNC-LABEL: {{^}}atomic_max_i32_ret_offset:
252 ; SI: buffer_atomic_smax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
253 ; SI: buffer_store_dword [[RET]]
291 ; GCN: buffer_atomic_smax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
292 ; GCN: buffer_store_dword [[RET]]
254293 define void @atomic_max_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
255294 entry:
256295 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
261300
262301 ; FUNC-LABEL: {{^}}atomic_max_i32_addr64_offset:
263302 ; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
303 ; VI: s_movk_i32 flat_scratch_lo, 0x0
304 ; VI: s_movk_i32 flat_scratch_hi, 0x0
305 ; VI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
264306 define void @atomic_max_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
265307 entry:
266308 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
271313
272314 ; FUNC-LABEL: {{^}}atomic_max_i32_ret_addr64_offset:
273315 ; SI: buffer_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
274 ; SI: buffer_store_dword [[RET]]
316 ; VI: s_movk_i32 flat_scratch_lo, 0x0
317 ; VI: s_movk_i32 flat_scratch_hi, 0x0
318 ; VI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
319 ; GCN: buffer_store_dword [[RET]]
275320 define void @atomic_max_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
276321 entry:
277322 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
282327 }
283328
284329 ; FUNC-LABEL: {{^}}atomic_max_i32:
285 ; SI: buffer_atomic_smax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
330 ; GCN: buffer_atomic_smax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
286331 define void @atomic_max_i32(i32 addrspace(1)* %out, i32 %in) {
287332 entry:
288333 %0 = atomicrmw volatile max i32 addrspace(1)* %out, i32 %in seq_cst
290335 }
291336
292337 ; FUNC-LABEL: {{^}}atomic_max_i32_ret:
293 ; SI: buffer_atomic_smax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
294 ; SI: buffer_store_dword [[RET]]
338 ; GCN: buffer_atomic_smax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
339 ; GCN: buffer_store_dword [[RET]]
295340 define void @atomic_max_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
296341 entry:
297342 %0 = atomicrmw volatile max i32 addrspace(1)* %out, i32 %in seq_cst
301346
302347 ; FUNC-LABEL: {{^}}atomic_max_i32_addr64:
303348 ; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
349 ; VI: s_movk_i32 flat_scratch_lo, 0x0
350 ; VI: s_movk_i32 flat_scratch_hi, 0x0
351 ; VI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
304352 define void @atomic_max_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
305353 entry:
306354 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
310358
311359 ; FUNC-LABEL: {{^}}atomic_max_i32_ret_addr64:
312360 ; SI: buffer_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
313 ; SI: buffer_store_dword [[RET]]
361 ; VI: s_movk_i32 flat_scratch_lo, 0x0
362 ; VI: s_movk_i32 flat_scratch_hi, 0x0
363 ; VI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
364 ; GCN: buffer_store_dword [[RET]]
314365 define void @atomic_max_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
315366 entry:
316367 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
320371 }
321372
322373 ; FUNC-LABEL: {{^}}atomic_umax_i32_offset:
323 ; SI: buffer_atomic_umax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
374 ; GCN: buffer_atomic_umax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
324375 define void @atomic_umax_i32_offset(i32 addrspace(1)* %out, i32 %in) {
325376 entry:
326377 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
329380 }
330381
331382 ; FUNC-LABEL: {{^}}atomic_umax_i32_ret_offset:
332 ; SI: buffer_atomic_umax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
333 ; SI: buffer_store_dword [[RET]]
383 ; GCN: buffer_atomic_umax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
384 ; GCN: buffer_store_dword [[RET]]
334385 define void @atomic_umax_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
335386 entry:
336387 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
341392
342393 ; FUNC-LABEL: {{^}}atomic_umax_i32_addr64_offset:
343394 ; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
395 ; VI: s_movk_i32 flat_scratch_lo, 0x0
396 ; VI: s_movk_i32 flat_scratch_hi, 0x0
397 ; VI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
344398 define void @atomic_umax_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
345399 entry:
346400 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
351405
352406 ; FUNC-LABEL: {{^}}atomic_umax_i32_ret_addr64_offset:
353407 ; SI: buffer_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
354 ; SI: buffer_store_dword [[RET]]
408 ; VI: s_movk_i32 flat_scratch_lo, 0x0
409 ; VI: s_movk_i32 flat_scratch_hi, 0x0
410 ; VI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
411 ; GCN: buffer_store_dword [[RET]]
355412 define void @atomic_umax_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
356413 entry:
357414 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
362419 }
363420
364421 ; FUNC-LABEL: {{^}}atomic_umax_i32:
365 ; SI: buffer_atomic_umax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
422 ; GCN: buffer_atomic_umax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
366423 define void @atomic_umax_i32(i32 addrspace(1)* %out, i32 %in) {
367424 entry:
368425 %0 = atomicrmw volatile umax i32 addrspace(1)* %out, i32 %in seq_cst
370427 }
371428
372429 ; FUNC-LABEL: {{^}}atomic_umax_i32_ret:
373 ; SI: buffer_atomic_umax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
374 ; SI: buffer_store_dword [[RET]]
430 ; GCN: buffer_atomic_umax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
431 ; GCN: buffer_store_dword [[RET]]
375432 define void @atomic_umax_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
376433 entry:
377434 %0 = atomicrmw volatile umax i32 addrspace(1)* %out, i32 %in seq_cst
381438
382439 ; FUNC-LABEL: {{^}}atomic_umax_i32_addr64:
383440 ; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
441 ; VI: s_movk_i32 flat_scratch_lo, 0x0
442 ; VI: s_movk_i32 flat_scratch_hi, 0x0
443 ; VI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
384444 define void @atomic_umax_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
385445 entry:
386446 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
390450
391451 ; FUNC-LABEL: {{^}}atomic_umax_i32_ret_addr64:
392452 ; SI: buffer_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
393 ; SI: buffer_store_dword [[RET]]
453 ; VI: s_movk_i32 flat_scratch_lo, 0x0
454 ; VI: s_movk_i32 flat_scratch_hi, 0x0
455 ; VI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
456 ; GCN: buffer_store_dword [[RET]]
394457 define void @atomic_umax_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
395458 entry:
396459 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
400463 }
401464
402465 ; FUNC-LABEL: {{^}}atomic_min_i32_offset:
403 ; SI: buffer_atomic_smin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
466 ; GCN: buffer_atomic_smin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
404467 define void @atomic_min_i32_offset(i32 addrspace(1)* %out, i32 %in) {
405468 entry:
406469 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
409472 }
410473
411474 ; FUNC-LABEL: {{^}}atomic_min_i32_ret_offset:
412 ; SI: buffer_atomic_smin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
413 ; SI: buffer_store_dword [[RET]]
475 ; GCN: buffer_atomic_smin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
476 ; GCN: buffer_store_dword [[RET]]
414477 define void @atomic_min_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
415478 entry:
416479 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
421484
422485 ; FUNC-LABEL: {{^}}atomic_min_i32_addr64_offset:
423486 ; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
487 ; VI: s_movk_i32 flat_scratch_lo, 0x0
488 ; VI: s_movk_i32 flat_scratch_hi, 0x0
489 ; VI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
424490 define void @atomic_min_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
425491 entry:
426492 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
431497
432498 ; FUNC-LABEL: {{^}}atomic_min_i32_ret_addr64_offset:
433499 ; SI: buffer_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
434 ; SI: buffer_store_dword [[RET]]
500 ; VI: s_movk_i32 flat_scratch_lo, 0x0
501 ; VI: s_movk_i32 flat_scratch_hi, 0x0
502 ; VI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
503 ; GCN: buffer_store_dword [[RET]]
435504 define void @atomic_min_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
436505 entry:
437506 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
442511 }
443512
444513 ; FUNC-LABEL: {{^}}atomic_min_i32:
445 ; SI: buffer_atomic_smin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
514 ; GCN: buffer_atomic_smin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
446515 define void @atomic_min_i32(i32 addrspace(1)* %out, i32 %in) {
447516 entry:
448517 %0 = atomicrmw volatile min i32 addrspace(1)* %out, i32 %in seq_cst
450519 }
451520
452521 ; FUNC-LABEL: {{^}}atomic_min_i32_ret:
453 ; SI: buffer_atomic_smin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
454 ; SI: buffer_store_dword [[RET]]
522 ; GCN: buffer_atomic_smin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
523 ; GCN: buffer_store_dword [[RET]]
455524 define void @atomic_min_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
456525 entry:
457526 %0 = atomicrmw volatile min i32 addrspace(1)* %out, i32 %in seq_cst
461530
462531 ; FUNC-LABEL: {{^}}atomic_min_i32_addr64:
463532 ; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
533 ; VI: s_movk_i32 flat_scratch_lo, 0x0
534 ; VI: s_movk_i32 flat_scratch_hi, 0x0
535 ; VI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
464536 define void @atomic_min_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
465537 entry:
466538 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
470542
471543 ; FUNC-LABEL: {{^}}atomic_min_i32_ret_addr64:
472544 ; SI: buffer_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
473 ; SI: buffer_store_dword [[RET]]
545 ; VI: s_movk_i32 flat_scratch_lo, 0x0
546 ; VI: s_movk_i32 flat_scratch_hi, 0x0
547 ; VI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
548 ; GCN: buffer_store_dword [[RET]]
474549 define void @atomic_min_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
475550 entry:
476551 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
480555 }
481556
482557 ; FUNC-LABEL: {{^}}atomic_umin_i32_offset:
483 ; SI: buffer_atomic_umin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
558 ; GCN: buffer_atomic_umin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
484559 define void @atomic_umin_i32_offset(i32 addrspace(1)* %out, i32 %in) {
485560 entry:
486561 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
489564 }
490565
491566 ; FUNC-LABEL: {{^}}atomic_umin_i32_ret_offset:
492 ; SI: buffer_atomic_umin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
493 ; SI: buffer_store_dword [[RET]]
567 ; GCN: buffer_atomic_umin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
568 ; GCN: buffer_store_dword [[RET]]
494569 define void @atomic_umin_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
495570 entry:
496571 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
501576
502577 ; FUNC-LABEL: {{^}}atomic_umin_i32_addr64_offset:
503578 ; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
579 ; VI: s_movk_i32 flat_scratch_lo, 0x0
580 ; VI: s_movk_i32 flat_scratch_hi, 0x0
581 ; VI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
504582 define void @atomic_umin_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
505583 entry:
506584 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
511589
512590 ; FUNC-LABEL: {{^}}atomic_umin_i32_ret_addr64_offset:
513591 ; SI: buffer_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
514 ; SI: buffer_store_dword [[RET]]
592 ; VI: s_movk_i32 flat_scratch_lo, 0x0
593 ; VI: s_movk_i32 flat_scratch_hi, 0x0
594 ; VI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
595 ; GCN: buffer_store_dword [[RET]]
515596 define void @atomic_umin_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
516597 entry:
517598 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
522603 }
523604
524605 ; FUNC-LABEL: {{^}}atomic_umin_i32:
525 ; SI: buffer_atomic_umin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
606 ; GCN: buffer_atomic_umin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
526607 define void @atomic_umin_i32(i32 addrspace(1)* %out, i32 %in) {
527608 entry:
528609 %0 = atomicrmw volatile umin i32 addrspace(1)* %out, i32 %in seq_cst
531612
532613 ; FUNC-LABEL: {{^}}atomic_umin_i32_ret:
533614 ; SI: buffer_atomic_umin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
534 ; SI: buffer_store_dword [[RET]]
615 ; GCN: buffer_store_dword [[RET]]
535616 define void @atomic_umin_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
536617 entry:
537618 %0 = atomicrmw volatile umin i32 addrspace(1)* %out, i32 %in seq_cst
541622
542623 ; FUNC-LABEL: {{^}}atomic_umin_i32_addr64:
543624 ; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
625 ; VI: s_movk_i32 flat_scratch_lo, 0x0
626 ; VI: s_movk_i32 flat_scratch_hi, 0x0
627 ; VI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
544628 define void @atomic_umin_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
545629 entry:
546630 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
550634
551635 ; FUNC-LABEL: {{^}}atomic_umin_i32_ret_addr64:
552636 ; SI: buffer_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
553 ; SI: buffer_store_dword [[RET]]
637 ; VI: s_movk_i32 flat_scratch_lo, 0x0
638 ; VI: s_movk_i32 flat_scratch_hi, 0x0
639 ; VI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
640 ; GCN: buffer_store_dword [[RET]]
554641 define void @atomic_umin_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
555642 entry:
556643 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
560647 }
561648
562649 ; FUNC-LABEL: {{^}}atomic_or_i32_offset:
563 ; SI: buffer_atomic_or v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
650 ; GCN: buffer_atomic_or v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
564651 define void @atomic_or_i32_offset(i32 addrspace(1)* %out, i32 %in) {
565652 entry:
566653 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
569656 }
570657
571658 ; FUNC-LABEL: {{^}}atomic_or_i32_ret_offset:
572 ; SI: buffer_atomic_or [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
573 ; SI: buffer_store_dword [[RET]]
659 ; GCN: buffer_atomic_or [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
660 ; GCN: buffer_store_dword [[RET]]
574661 define void @atomic_or_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
575662 entry:
576663 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
581668
582669 ; FUNC-LABEL: {{^}}atomic_or_i32_addr64_offset:
583670 ; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
671 ; VI: s_movk_i32 flat_scratch_lo, 0x0
672 ; VI: s_movk_i32 flat_scratch_hi, 0x0
673 ; VI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
584674 define void @atomic_or_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
585675 entry:
586676 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
591681
592682 ; FUNC-LABEL: {{^}}atomic_or_i32_ret_addr64_offset:
593683 ; SI: buffer_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
594 ; SI: buffer_store_dword [[RET]]
684 ; VI: s_movk_i32 flat_scratch_lo, 0x0
685 ; VI: s_movk_i32 flat_scratch_hi, 0x0
686 ; VI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
687 ; GCN: buffer_store_dword [[RET]]
595688 define void @atomic_or_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
596689 entry:
597690 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
602695 }
603696
604697 ; FUNC-LABEL: {{^}}atomic_or_i32:
605 ; SI: buffer_atomic_or v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
698 ; GCN: buffer_atomic_or v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
606699 define void @atomic_or_i32(i32 addrspace(1)* %out, i32 %in) {
607700 entry:
608701 %0 = atomicrmw volatile or i32 addrspace(1)* %out, i32 %in seq_cst
610703 }
611704
612705 ; FUNC-LABEL: {{^}}atomic_or_i32_ret:
613 ; SI: buffer_atomic_or [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
614 ; SI: buffer_store_dword [[RET]]
706 ; GCN: buffer_atomic_or [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
707 ; GCN: buffer_store_dword [[RET]]
615708 define void @atomic_or_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
616709 entry:
617710 %0 = atomicrmw volatile or i32 addrspace(1)* %out, i32 %in seq_cst
621714
622715 ; FUNC-LABEL: {{^}}atomic_or_i32_addr64:
623716 ; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
717 ; VI: s_movk_i32 flat_scratch_lo, 0x0
718 ; VI: s_movk_i32 flat_scratch_hi, 0x0
719 ; VI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
624720 define void @atomic_or_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
625721 entry:
626722 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
630726
631727 ; FUNC-LABEL: {{^}}atomic_or_i32_ret_addr64:
632728 ; SI: buffer_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
633 ; SI: buffer_store_dword [[RET]]
729 ; VI: s_movk_i32 flat_scratch_lo, 0x0
730 ; VI: s_movk_i32 flat_scratch_hi, 0x0
731 ; VI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
732 ; GCN: buffer_store_dword [[RET]]
634733 define void @atomic_or_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
635734 entry:
636735 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
640739 }
641740
642741 ; FUNC-LABEL: {{^}}atomic_xchg_i32_offset:
643 ; SI: buffer_atomic_swap v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
742 ; GCN: buffer_atomic_swap v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
644743 define void @atomic_xchg_i32_offset(i32 addrspace(1)* %out, i32 %in) {
645744 entry:
646745 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
649748 }
650749
651750 ; FUNC-LABEL: {{^}}atomic_xchg_i32_ret_offset:
652 ; SI: buffer_atomic_swap [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
653 ; SI: buffer_store_dword [[RET]]
751 ; GCN: buffer_atomic_swap [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
752 ; GCN: buffer_store_dword [[RET]]
654753 define void @atomic_xchg_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
655754 entry:
656755 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
671770
672771 ; FUNC-LABEL: {{^}}atomic_xchg_i32_ret_addr64_offset:
673772 ; SI: buffer_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
674 ; SI: buffer_store_dword [[RET]]
773 ; VI: s_movk_i32 flat_scratch_lo, 0x0
774 ; VI: s_movk_i32 flat_scratch_hi, 0x0
775 ; VI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
776 ; GCN: buffer_store_dword [[RET]]
675777 define void @atomic_xchg_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
676778 entry:
677779 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
682784 }
683785
684786 ; FUNC-LABEL: {{^}}atomic_xchg_i32:
685 ; SI: buffer_atomic_swap v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
787 ; GCN: buffer_atomic_swap v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
686788 define void @atomic_xchg_i32(i32 addrspace(1)* %out, i32 %in) {
687789 entry:
688790 %0 = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst
690792 }
691793
692794 ; FUNC-LABEL: {{^}}atomic_xchg_i32_ret:
693 ; SI: buffer_atomic_swap [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
694 ; SI: buffer_store_dword [[RET]]
795 ; GCN: buffer_atomic_swap [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
796 ; GCN: buffer_store_dword [[RET]]
695797 define void @atomic_xchg_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
696798 entry:
697799 %0 = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst
701803
702804 ; FUNC-LABEL: {{^}}atomic_xchg_i32_addr64:
703805 ; SI: buffer_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
806 ; VI: s_movk_i32 flat_scratch_lo, 0x0
807 ; VI: s_movk_i32 flat_scratch_hi, 0x0
808 ; VI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
704809 define void @atomic_xchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
705810 entry:
706811 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
710815
711816 ; FUNC-LABEL: {{^}}atomic_xchg_i32_ret_addr64:
712817 ; SI: buffer_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
713 ; SI: buffer_store_dword [[RET]]
818 ; VI: s_movk_i32 flat_scratch_lo, 0x0
819 ; VI: s_movk_i32 flat_scratch_hi, 0x0
820 ; VI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
821 ; GCN: buffer_store_dword [[RET]]
714822 define void @atomic_xchg_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
715823 entry:
716824 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
720828 }
721829
722830 ; FUNC-LABEL: {{^}}atomic_xor_i32_offset:
723 ; SI: buffer_atomic_xor v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
831 ; GCN: buffer_atomic_xor v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
724832 define void @atomic_xor_i32_offset(i32 addrspace(1)* %out, i32 %in) {
725833 entry:
726834 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
729837 }
730838
731839 ; FUNC-LABEL: {{^}}atomic_xor_i32_ret_offset:
732 ; SI: buffer_atomic_xor [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
733 ; SI: buffer_store_dword [[RET]]
840 ; GCN: buffer_atomic_xor [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
841 ; GCN: buffer_store_dword [[RET]]
734842 define void @atomic_xor_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
735843 entry:
736844 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
741849
742850 ; FUNC-LABEL: {{^}}atomic_xor_i32_addr64_offset:
743851 ; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
852 ; VI: s_movk_i32 flat_scratch_lo, 0x0
853 ; VI: s_movk_i32 flat_scratch_hi, 0x0
854 ; VI: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
744855 define void @atomic_xor_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
745856 entry:
746857 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
751862
752863 ; FUNC-LABEL: {{^}}atomic_xor_i32_ret_addr64_offset:
753864 ; SI: buffer_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
754 ; SI: buffer_store_dword [[RET]]
865 ; VI: s_movk_i32 flat_scratch_lo, 0x0
866 ; VI: s_movk_i32 flat_scratch_hi, 0x0
867 ; VI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
868 ; GCN: buffer_store_dword [[RET]]
755869 define void @atomic_xor_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
756870 entry:
757871 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
762876 }
763877
764878 ; FUNC-LABEL: {{^}}atomic_xor_i32:
765 ; SI: buffer_atomic_xor v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
879 ; GCN: buffer_atomic_xor v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
766880 define void @atomic_xor_i32(i32 addrspace(1)* %out, i32 %in) {
767881 entry:
768882 %0 = atomicrmw volatile xor i32 addrspace(1)* %out, i32 %in seq_cst
770884 }
771885
772886 ; FUNC-LABEL: {{^}}atomic_xor_i32_ret:
773 ; SI: buffer_atomic_xor [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
774 ; SI: buffer_store_dword [[RET]]
887 ; GCN: buffer_atomic_xor [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
888 ; GCN: buffer_store_dword [[RET]]
775889 define void @atomic_xor_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
776890 entry:
777891 %0 = atomicrmw volatile xor i32 addrspace(1)* %out, i32 %in seq_cst
781895
782896 ; FUNC-LABEL: {{^}}atomic_xor_i32_addr64:
783897 ; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
898 ; VI: s_movk_i32 flat_scratch_lo, 0x0
899 ; VI: s_movk_i32 flat_scratch_hi, 0x0
900 ; VI: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
784901 define void @atomic_xor_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
785902 entry:
786903 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
790907
791908 ; FUNC-LABEL: {{^}}atomic_xor_i32_ret_addr64:
792909 ; SI: buffer_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
793 ; SI: buffer_store_dword [[RET]]
910 ; VI: s_movk_i32 flat_scratch_lo, 0x0
911 ; VI: s_movk_i32 flat_scratch_hi, 0x0
912 ; VI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
913 ; GCN: buffer_store_dword [[RET]]
794914 define void @atomic_xor_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
795915 entry:
796916 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index