llvm.org GIT mirror llvm / d74d012
AMDGPU: Start selecting global instructions git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@309470 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 2 years ago
30 changed file(s) with 1112 addition(s) and 646 deletion(s). Raw diff Collapse all Expand all
139139
140140 bool SelectFlatAtomic(SDValue Addr, SDValue &VAddr,
141141 SDValue &Offset, SDValue &SLC) const;
142 bool SelectFlatAtomicSigned(SDValue Addr, SDValue &VAddr,
143 SDValue &Offset, SDValue &SLC) const;
144
145 template
142146 bool SelectFlatOffset(SDValue Addr, SDValue &VAddr,
143147 SDValue &Offset, SDValue &SLC) const;
144148
13231327 return true;
13241328 }
13251329
1330 template
13261331 bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDValue Addr,
13271332 SDValue &VAddr,
13281333 SDValue &Offset,
13331338 CurDAG->isBaseWithConstantOffset(Addr)) {
13341339 SDValue N0 = Addr.getOperand(0);
13351340 SDValue N1 = Addr.getOperand(1);
1336 uint64_t COffsetVal = cast(N1)->getZExtValue();
1337 if (isUInt<12>(COffsetVal)) {
1341 int64_t COffsetVal = cast(N1)->getSExtValue();
1342
1343 if ((IsSigned && isInt<13>(COffsetVal)) ||
1344 (!IsSigned && isUInt<12>(COffsetVal))) {
13381345 Addr = N0;
13391346 OffsetVal = COffsetVal;
13401347 }
13511358 SDValue &VAddr,
13521359 SDValue &Offset,
13531360 SDValue &SLC) const {
1354 return SelectFlatOffset(Addr, VAddr, Offset, SLC);
1361 return SelectFlatOffset(Addr, VAddr, Offset, SLC);
1362 }
1363
1364 bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDValue Addr,
1365 SDValue &VAddr,
1366 SDValue &Offset,
1367 SDValue &SLC) const {
1368 return SelectFlatOffset(Addr, VAddr, Offset, SLC);
13551369 }
13561370
13571371 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
246246 >;
247247
248248 def global_load : GlobalLoad ;
249 def global_atomic_load : GlobalLoad;
249250
250251 // Global address space stores
251252 class GlobalStore : GlobalMemOp <
77 //===----------------------------------------------------------------------===//
88
99 def FLATAtomic : ComplexPattern;
10 def FLATOffset : ComplexPattern", [], [], -10>;
10 def FLATOffset : ComplexPatternOffset", [], [], -10>;
11
12 def FLATOffsetSigned : ComplexPattern", [], [], -10>;
13 def FLATSignedAtomic : ComplexPattern;
1114
1215 //===----------------------------------------------------------------------===//
1316 // FLAT classes
288291 (ins VReg_64:$vaddr, data_rc:$vdata, offset_s13:$offset, slc:$slc),
289292 " $vdst, $vaddr, $vdata, off$offset glc$slc",
290293 [(set vt:$vdst,
291 (atomic (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>,
294 (atomic (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>,
292295 AtomicNoRet {
293296 let has_saddr = 1;
294297 }
613616
614617 // Patterns for global loads with no offset.
615618 class FlatLoadPat : Pat <
616 (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc))),
619 (vt (node (FLATOffset i64:$vaddr, i16:$offset, i1:$slc))),
617620 (inst $vaddr, $offset, 0, $slc)
618621 >;
619622
622625 (inst $vaddr, $offset, 0, $slc)
623626 >;
624627
628 class FlatLoadSignedPat : Pat <
629 (vt (node (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc))),
630 (inst $vaddr, $offset, 0, $slc)
631 >;
632
625633 class FlatStorePat : Pat <
626 (node vt:$data, (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc)),
634 (node vt:$data, (FLATOffset i64:$vaddr, i16:$offset, i1:$slc)),
635 (inst $vaddr, $data, $offset, 0, $slc)
636 >;
637
638 class FlatStoreSignedPat : Pat <
639 (node vt:$data, (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc)),
627640 (inst $vaddr, $data, $offset, 0, $slc)
628641 >;
629642
634647 (inst $vaddr, $data, $offset, 0, $slc)
635648 >;
636649
650 class FlatStoreSignedAtomicPat : Pat <
651 // atomic store follows atomic binop convention so the address comes
652 // first.
653 (node (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data),
654 (inst $vaddr, $data, $offset, 0, $slc)
655 >;
656
637657 class FlatAtomicPat
638658 ValueType data_vt = vt> : Pat <
639659 (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$data)),
660 (inst $vaddr, $data, $offset, $slc)
661 >;
662
663 class FlatSignedAtomicPat
664 ValueType data_vt = vt> : Pat <
665 (vt (node (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$data)),
640666 (inst $vaddr, $data, $offset, $slc)
641667 >;
642668
698724 def : FlatStorePat ;
699725 def : FlatStorePat ;
700726 }
727
728
729 let Predicates = [HasFlatGlobalInsts], AddedComplexity = 10 in {
730
731 def : FlatLoadSignedPat ;
732 def : FlatLoadSignedPat ;
733 def : FlatLoadSignedPat ;
734 def : FlatLoadSignedPat ;
735 def : FlatLoadSignedPat ;
736 def : FlatLoadSignedPat ;
737
738
739 def : FlatLoadSignedPat ;
740 def : FlatLoadSignedPat ;
741 def : FlatLoadSignedPat ;
742
743 def : FlatLoadAtomicPat ;
744 def : FlatLoadAtomicPat ;
745
746 def : FlatStoreSignedPat ;
747 def : FlatStoreSignedPat ;
748 def : FlatStoreSignedPat ;
749 def : FlatStoreSignedPat ;
750 def : FlatStoreSignedPat ;
751 def : FlatStoreSignedPat ;
752 def : FlatStoreSignedPat ;
753
754 def : FlatStoreSignedAtomicPat ;
755 def : FlatStoreSignedAtomicPat ;
756
757 def : FlatSignedAtomicPat ;
758 def : FlatSignedAtomicPat ;
759 def : FlatSignedAtomicPat ;
760 def : FlatSignedAtomicPat ;
761 def : FlatSignedAtomicPat ;
762 def : FlatSignedAtomicPat ;
763 def : FlatSignedAtomicPat ;
764 def : FlatSignedAtomicPat ;
765 def : FlatSignedAtomicPat ;
766 def : FlatSignedAtomicPat ;
767 def : FlatSignedAtomicPat ;
768 def : FlatSignedAtomicPat ;
769 def : FlatSignedAtomicPat ;
770
771 def : FlatSignedAtomicPat ;
772 def : FlatSignedAtomicPat ;
773 def : FlatSignedAtomicPat ;
774 def : FlatSignedAtomicPat ;
775 def : FlatSignedAtomicPat ;
776 def : FlatSignedAtomicPat ;
777 def : FlatSignedAtomicPat ;
778 def : FlatSignedAtomicPat ;
779 def : FlatSignedAtomicPat ;
780 def : FlatSignedAtomicPat ;
781 def : FlatSignedAtomicPat ;
782 def : FlatSignedAtomicPat ;
783 def : FlatSignedAtomicPat ;
784
785 } // End Predicates = [HasFlatGlobalInsts]
701786
702787
703788 //===----------------------------------------------------------------------===//
154154
155155 ; FIXME: Need to handle non-uniform case for function below (load without gep).
156156 ; GCN-LABEL: {{^}}v_test_add_v2i16_zext_to_v2i32:
157 ; GFX9: flat_load_dword [[A:v[0-9]+]]
158 ; GFX9: flat_load_dword [[B:v[0-9]+]]
157 ; GFX9: global_load_dword [[A:v[0-9]+]]
158 ; GFX9: global_load_dword [[B:v[0-9]+]]
159159
160160 ; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]], [[A]], [[B]]
161161 ; GFX9-DAG: v_and_b32_e32 v[[ELT0:[0-9]+]], 0xffff, [[ADD]]
189189
190190 ; FIXME: Need to handle non-uniform case for function below (load without gep).
191191 ; GCN-LABEL: {{^}}v_test_add_v2i16_zext_to_v2i64:
192 ; GFX9: flat_load_dword [[A:v[0-9]+]]
193 ; GFX9: flat_load_dword [[B:v[0-9]+]]
192 ; GFX9: global_load_dword [[A:v[0-9]+]]
193 ; GFX9: global_load_dword [[B:v[0-9]+]]
194194
195195 ; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]], [[A]], [[B]]
196196 ; GFX9-DAG: v_and_b32_e32 v[[ELT0:[0-9]+]], 0xffff, [[ADD]]
222222
223223 ; FIXME: Need to handle non-uniform case for function below (load without gep).
224224 ; GCN-LABEL: {{^}}v_test_add_v2i16_sext_to_v2i32:
225 ; GFX9: flat_load_dword [[A:v[0-9]+]]
226 ; GFX9: flat_load_dword [[B:v[0-9]+]]
225 ; GFX9: global_load_dword [[A:v[0-9]+]]
226 ; GFX9: global_load_dword [[B:v[0-9]+]]
227227
228228 ; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]], [[A]], [[B]]
229229 ; GFX9-DAG: v_bfe_i32 v[[ELT0:[0-9]+]], [[ADD]], 0, 16
250250
251251 ; FIXME: Need to handle non-uniform case for function below (load without gep).
252252 ; GCN-LABEL: {{^}}v_test_add_v2i16_sext_to_v2i64:
253 ; GCN: flat_load_dword
254 ; GCN: flat_load_dword
253 ; GCN: {{flat|global}}_load_dword
254 ; GCN: {{flat|global}}_load_dword
255255
256256 ; GFX9: v_pk_add_u16
257257 ; GFX9: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}}
146146 ; HSA-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]]
147147 ; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]]
148148 ; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0
149 ; HSA: flat_store_dword v{{\[}}[[VPTRLO]]:[[VPTRHI]]{{\]}}, [[K]]
149 ; HSA: {{flat|global}}_store_dword v{{\[}}[[VPTRLO]]:[[VPTRHI]]{{\]}}, [[K]]
150150 define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32 addrspace(4)* %ptr) #0 {
151151 %ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(1)*
152152 store volatile i32 0, i32 addrspace(1)* %ftos
175175
176176 ; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
177177 ; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
178 ; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]]
178 ; HSA: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]]
179179 define amdgpu_kernel void @cast_0_group_to_flat_addrspacecast() #0 {
180180 %cast = addrspacecast i32 addrspace(3)* null to i32 addrspace(4)*
181181 store volatile i32 7, i32 addrspace(4)* %cast
196196 ; HSA: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
197197 ; HSA: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
198198 ; HSA: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
199 ; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]]
199 ; HSA: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]]
200200 define amdgpu_kernel void @cast_neg1_group_to_flat_addrspacecast() #0 {
201201 %cast = addrspacecast i32 addrspace(3)* inttoptr (i32 -1 to i32 addrspace(3)*) to i32 addrspace(4)*
202202 store volatile i32 7, i32 addrspace(4)* %cast
221221 ; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
222222 ; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
223223 ; HSA: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
224 ; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]]
224 ; HSA: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]]
225225 define amdgpu_kernel void @cast_0_private_to_flat_addrspacecast() #0 {
226226 %cast = addrspacecast i32* null to i32 addrspace(4)*
227227 store volatile i32 7, i32 addrspace(4)* %cast
241241 ; specialize away generic pointer accesses.
242242
243243 ; HSA-LABEL: {{^}}branch_use_flat_i32:
244 ; HSA: flat_store_dword {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}
244 ; HSA: {{flat|global}}_store_dword {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}
245245 ; HSA: s_endpgm
246246 define amdgpu_kernel void @branch_use_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* %gptr, i32 addrspace(3)* %lptr, i32 %x, i32 %c) #0 {
247247 entry:
273273 ; GFX9: s_add_u32 flat_scratch_lo, s6, s9
274274 ; GFX9: s_addc_u32 flat_scratch_hi, s7, 0
275275
276 ; HSA: flat_store_dword
276 ; HSA: {{flat|global}}_store_dword
277277 ; HSA: s_barrier
278 ; HSA: flat_load_dword
278 ; HSA: {{flat|global}}_load_dword
279279 define amdgpu_kernel void @store_flat_scratch(i32 addrspace(1)* noalias %out, i32) #0 {
280280 %alloca = alloca i32, i32 9, align 4
281281 %x = call i32 @llvm.amdgcn.workitem.id.x() #2
2222 }
2323
2424 ; GCN-LABEL: {{^}}v_ashr_v2i16:
25 ; GCN: {{buffer|flat}}_load_dword [[LHS:v[0-9]+]]
26 ; GCN: {{buffer|flat}}_load_dword [[RHS:v[0-9]+]]
25 ; GCN: {{buffer|flat|global}}_load_dword [[LHS:v[0-9]+]]
26 ; GCN: {{buffer|flat|global}}_load_dword [[RHS:v[0-9]+]]
2727 ; GFX9: v_pk_ashrrev_i16 [[RESULT:v[0-9]+]], [[RHS]], [[LHS]]
2828
2929 ; VI: v_ashrrev_i16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
5555
5656 ; GCN-LABEL: {{^}}ashr_v_s_v2i16:
5757 ; GFX9: s_load_dword [[RHS:s[0-9]+]]
58 ; GFX9: {{buffer|flat}}_load_dword [[LHS:v[0-9]+]]
58 ; GFX9: {{buffer|flat|global}}_load_dword [[LHS:v[0-9]+]]
5959 ; GFX9: v_pk_ashrrev_i16 [[RESULT:v[0-9]+]], [[RHS]], [[LHS]]
6060 define amdgpu_kernel void @ashr_v_s_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in, <2 x i16> %sgpr) #0 {
6161 %tid = call i32 @llvm.amdgcn.workitem.id.x()
7070
7171 ; GCN-LABEL: {{^}}ashr_s_v_v2i16:
7272 ; GFX9: s_load_dword [[LHS:s[0-9]+]]
73 ; GFX9: {{buffer|flat}}_load_dword [[RHS:v[0-9]+]]
73 ; GFX9: {{buffer|flat|global}}_load_dword [[RHS:v[0-9]+]]
7474 ; GFX9: v_pk_ashrrev_i16 [[RESULT:v[0-9]+]], [[RHS]], [[LHS]]
7575 define amdgpu_kernel void @ashr_s_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in, <2 x i16> %sgpr) #0 {
7676 %tid = call i32 @llvm.amdgcn.workitem.id.x()
8484 }
8585
8686 ; GCN-LABEL: {{^}}ashr_imm_v_v2i16:
87 ; GCN: {{buffer|flat}}_load_dword [[RHS:v[0-9]+]]
87 ; GCN: {{buffer|flat|global}}_load_dword [[RHS:v[0-9]+]]
8888 ; GFX9: v_pk_ashrrev_i16 [[RESULT:v[0-9]+]], [[RHS]], -4
8989 define amdgpu_kernel void @ashr_imm_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
9090 %tid = call i32 @llvm.amdgcn.workitem.id.x()
9898 }
9999
100100 ; GCN-LABEL: {{^}}ashr_v_imm_v2i16:
101 ; GCN: {{buffer|flat}}_load_dword [[LHS:v[0-9]+]]
101 ; GCN: {{buffer|flat|global}}_load_dword [[LHS:v[0-9]+]]
102102 ; GFX9: v_pk_ashrrev_i16 [[RESULT:v[0-9]+]], 8, [[LHS]]
103103 define amdgpu_kernel void @ashr_v_imm_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
104104 %tid = call i32 @llvm.amdgcn.workitem.id.x()
112112 }
113113
114114 ; GCN-LABEL: {{^}}v_ashr_v4i16:
115 ; GCN: {{buffer|flat}}_load_dwordx2
116 ; GCN: {{buffer|flat}}_load_dwordx2
115 ; GCN: {{buffer|flat|global}}_load_dwordx2
116 ; GCN: {{buffer|flat|global}}_load_dwordx2
117117 ; GFX9: v_pk_ashrrev_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
118118 ; GFX9: v_pk_ashrrev_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
119119
124124 ; VI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
125125 ; VI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
126126
127 ; GCN: {{buffer|flat}}_store_dwordx2
127 ; GCN: {{buffer|flat|global}}_store_dwordx2
128128 define amdgpu_kernel void @v_ashr_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
129129 %tid = call i32 @llvm.amdgcn.workitem.id.x()
130130 %tid.ext = sext i32 %tid to i64
139139 }
140140
141141 ; GCN-LABEL: {{^}}ashr_v_imm_v4i16:
142 ; GCN: {{buffer|flat}}_load_dwordx2
142 ; GCN: {{buffer|flat|global}}_load_dwordx2
143143 ; GFX9: v_pk_ashrrev_i16 v{{[0-9]+}}, 8, v{{[0-9]+}}
144144 ; GFX9: v_pk_ashrrev_i16 v{{[0-9]+}}, 8, v{{[0-9]+}}
145 ; GCN: {{buffer|flat}}_store_dwordx2
145 ; GCN: {{buffer|flat|global}}_store_dwordx2
146146 define amdgpu_kernel void @ashr_v_imm_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
147147 %tid = call i32 @llvm.amdgcn.workitem.id.x()
148148 %tid.ext = sext i32 %tid to i64
3535
3636 ; GCN-LABEL: {{^}}extract_vector_elt_v2i16_dynamic_vgpr:
3737 ; GCN-DAG: s_load_dword [[VEC:s[0-9]+]]
38 ; GCN-DAG: {{flat|buffer}}_load_dword [[IDX:v[0-9]+]]
38 ; GCN-DAG: {{flat|buffer|global}}_load_dword [[IDX:v[0-9]+]]
3939 ; GCN: v_lshlrev_b32_e32 [[IDX_SCALED:v[0-9]+]], 16, [[IDX]]
4040
4141 ; SI: v_lshr_b32_e32 [[ELT:v[0-9]+]], [[VEC]], [[IDX_SCALED]]
88 ; GCN-LABEL: {{^}}s_fabs_free_f16:
99 ; GCN: flat_load_ushort [[VAL:v[0-9]+]],
1010 ; GCN: v_and_b32_e32 [[RESULT:v[0-9]+]], 0x7fff, [[VAL]]
11 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
11 ; GCN: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1212
1313 define amdgpu_kernel void @s_fabs_free_f16(half addrspace(1)* %out, i16 %in) {
1414 %bc= bitcast i16 %in to half
6666 ; VI-DAG: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
6767 ; VI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
6868
69 ; GCN: flat_store_dwordx2
69 ; GCN: {{flat|global}}_store_dwordx2
7070 define amdgpu_kernel void @s_fabs_v4f16(<4 x half> addrspace(1)* %out, <4 x half> %in) {
7171 %fabs = call <4 x half> @llvm.fabs.v4f16(<4 x half> %in)
7272 store <4 x half> %fabs, <4 x half> addrspace(1)* %out
9494 }
9595
9696 ; GCN-LABEL: {{^}}v_fabs_v2f16:
97 ; GCN: flat_load_dword [[VAL:v[0-9]+]]
97 ; GCN: {{flat|global}}_load_dword [[VAL:v[0-9]+]]
9898 ; GCN: v_and_b32_e32 v{{[0-9]+}}, 0x7fff7fff, [[VAL]]
9999 define amdgpu_kernel void @v_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
100100 %tid = call i32 @llvm.amdgcn.workitem.id.x()
117117 }
118118
119119 ; GCN-LABEL: {{^}}v_fabs_fold_v2f16:
120 ; GCN: flat_load_dword [[VAL:v[0-9]+]]
120 ; GCN: {{flat|global}}_load_dword [[VAL:v[0-9]+]]
121121
122122 ; CI: v_cvt_f32_f16_e32
123123 ; CI: v_cvt_f32_f16_e32
1515
1616 ; GCN-LABEL: {{^}}test_fold_canonicalize_fmul_value_f32:
1717 ; GCN: v_mul_f32_e32 [[V:v[0-9]+]], 0x41700000, v{{[0-9]+}}
18 ; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
18 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
1919 ; GCN-NOT: 1.0
2020 define amdgpu_kernel void @test_fold_canonicalize_fmul_value_f32(float addrspace(1)* %arg) {
2121 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
2929
3030 ; GCN-LABEL: {{^}}test_fold_canonicalize_sub_value_f32:
3131 ; GCN: v_sub_f32_e32 [[V:v[0-9]+]], 0x41700000, v{{[0-9]+}}
32 ; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
32 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
3333 ; GCN-NOT: 1.0
3434 define amdgpu_kernel void @test_fold_canonicalize_sub_value_f32(float addrspace(1)* %arg) {
3535 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
4343
4444 ; GCN-LABEL: {{^}}test_fold_canonicalize_add_value_f32:
4545 ; GCN: v_add_f32_e32 [[V:v[0-9]+]], 0x41700000, v{{[0-9]+}}
46 ; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
46 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
4747 ; GCN-NOT: 1.0
4848 define amdgpu_kernel void @test_fold_canonicalize_add_value_f32(float addrspace(1)* %arg) {
4949 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
5757
5858 ; GCN-LABEL: {{^}}test_fold_canonicalize_sqrt_value_f32:
5959 ; GCN: v_sqrt_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
60 ; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
60 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
6161 ; GCN-NOT: 1.0
6262 define amdgpu_kernel void @test_fold_canonicalize_sqrt_value_f32(float addrspace(1)* %arg) {
6363 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
7171
7272 ; GCN-LABEL: test_fold_canonicalize_fceil_value_f32:
7373 ; GCN: v_ceil_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
74 ; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
74 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
7575 ; GCN-NOT: 1.0
7676 define amdgpu_kernel void @test_fold_canonicalize_fceil_value_f32(float addrspace(1)* %arg) {
7777 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
8585
8686 ; GCN-LABEL: test_fold_canonicalize_floor_value_f32:
8787 ; GCN: v_floor_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
88 ; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
88 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
8989 ; GCN-NOT: 1.0
9090 define amdgpu_kernel void @test_fold_canonicalize_floor_value_f32(float addrspace(1)* %arg) {
9191 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
9999
100100 ; GCN-LABEL: test_fold_canonicalize_fma_value_f32:
101101 ; GCN: v_fma_f32 [[V:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
102 ; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
102 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
103103 ; GCN-NOT: 1.0
104104 define amdgpu_kernel void @test_fold_canonicalize_fma_value_f32(float addrspace(1)* %arg) {
105105 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
114114 ; GCN-LABEL: test_fold_canonicalize_fmuladd_value_f32:
115115 ; GCN-FLUSH: v_mac_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}
116116 ; GFX9-DENORM: v_fma_f32 [[V:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
117 ; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
117 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
118118 ; GCN-NOT: 1.0
119119 define amdgpu_kernel void @test_fold_canonicalize_fmuladd_value_f32(float addrspace(1)* %arg) {
120120 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
127127 }
128128
129129 ; GCN-LABEL: test_fold_canonicalize_canonicalize_value_f32:
130 ; GCN: flat_load_dword [[LOAD:v[0-9]+]],
130 ; GCN: {{flat|global}}_load_dword [[LOAD:v[0-9]+]],
131131 ; GCN: v_mul_f32_e32 [[V:v[0-9]+]], 1.0, [[LOAD]]
132 ; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
132 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
133133 ; GCN-NOT: 1.0
134134 define amdgpu_kernel void @test_fold_canonicalize_canonicalize_value_f32(float addrspace(1)* %arg) {
135135 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
143143
144144 ; GCN-LABEL: test_fold_canonicalize_fpextend_value_f64_f32:
145145 ; GCN: v_cvt_f64_f32_e32 [[V:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}
146 ; GCN: flat_store_dwordx2 v[{{[0-9:]+}}], [[V]]
146 ; GCN: {{flat|global}}_store_dwordx2 v[{{[0-9:]+}}], [[V]]
147147 ; GCN-NOT: 1.0
148148 define amdgpu_kernel void @test_fold_canonicalize_fpextend_value_f64_f32(float addrspace(1)* %arg, double addrspace(1)* %out) {
149149 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
158158
159159 ; GCN-LABEL: test_fold_canonicalize_fpextend_value_f32_f16:
160160 ; GCN: v_cvt_f32_f16_e32 [[V:v[0-9]+]], v{{[0-9]+}}
161 ; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
161 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
162162 ; GCN-NOT: 1.0
163163 define amdgpu_kernel void @test_fold_canonicalize_fpextend_value_f32_f16(half addrspace(1)* %arg, float addrspace(1)* %out) {
164164 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
173173
174174 ; GCN-LABEL: test_fold_canonicalize_fpround_value_f32_f64:
175175 ; GCN: v_cvt_f32_f64_e32 [[V:v[0-9]+]], v[{{[0-9:]+}}]
176 ; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
176 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
177177 ; GCN-NOT: 1.0
178178 define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f32_f64(double addrspace(1)* %arg, float addrspace(1)* %out) {
179179 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
188188
189189 ; GCN-LABEL: test_fold_canonicalize_fpround_value_f16_f32:
190190 ; GCN: v_cvt_f16_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
191 ; GCN: flat_store_short v[{{[0-9:]+}}], [[V]]
191 ; GCN: {{flat|global}}_store_short v[{{[0-9:]+}}], [[V]]
192192 ; GCN-NOT: 1.0
193193 define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f16_f32(float addrspace(1)* %arg, half addrspace(1)* %out) {
194194 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
208208 ; GFX9: v_cvt_f16_f32_e32 [[V1:v[0-9]+]], v{{[0-9]+}}
209209 ; GFX9: v_and_b32_e32 [[V0_16:v[0-9]+]], 0xffff, [[V0]]
210210 ; GFX9: v_lshl_or_b32 [[V:v[0-9]+]], [[V1]], 16, [[V0_16]]
211 ; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
211 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
212212 ; GCN-NOT: 1.0
213213 define amdgpu_kernel void @test_fold_canonicalize_fpround_value_v2f16_v2f32(<2 x float> addrspace(1)* %arg, <2 x half> addrspace(1)* %out) {
214214 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
235235
236236 ; GCN-LABEL: test_fold_canonicalize_fneg_value_f32:
237237 ; GCN: v_xor_b32_e32 [[V:v[0-9]+]], 0x80000000, v{{[0-9]+}}
238 ; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
238 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
239239 ; GCN-NOT: 1.0
240240 define amdgpu_kernel void @test_fold_canonicalize_fneg_value_f32(float addrspace(1)* %arg) {
241241 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
262262
263263 ; GCN-LABEL: test_fold_canonicalize_fabs_value_f32:
264264 ; GCN: v_and_b32_e32 [[V:v[0-9]+]], 0x7fffffff, v{{[0-9]+}}
265 ; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
265 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
266266 ; GCN-NOT: 1.0
267267 define amdgpu_kernel void @test_fold_canonicalize_fabs_value_f32(float addrspace(1)* %arg) {
268268 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
277277
278278 ; GCN-LABEL: test_fold_canonicalize_sin_value_f32:
279279 ; GCN: v_sin_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
280 ; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
280 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
281281 ; GCN-NOT: 1.0
282282 define amdgpu_kernel void @test_fold_canonicalize_sin_value_f32(float addrspace(1)* %arg) {
283283 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
291291
292292 ; GCN-LABEL: test_fold_canonicalize_cos_value_f32:
293293 ; GCN: v_cos_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
294 ; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
294 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
295295 ; GCN-NOT: 1.0
296296 define amdgpu_kernel void @test_fold_canonicalize_cos_value_f32(float addrspace(1)* %arg) {
297297 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
306306 ; GCN-LABEL: test_fold_canonicalize_sin_value_f16:
307307 ; GCN: v_sin_f32_e32 [[V0:v[0-9]+]], v{{[0-9]+}}
308308 ; GCN: v_cvt_f16_f32_e32 [[V:v[0-9]+]], [[V0]]
309 ; GCN: flat_store_short v[{{[0-9:]+}}], [[V]]
309 ; GCN: {{flat|global}}_store_short v[{{[0-9:]+}}], [[V]]
310310 ; GCN-NOT: 1.0
311311 define amdgpu_kernel void @test_fold_canonicalize_sin_value_f16(half addrspace(1)* %arg) {
312312 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
321321 ; GCN-LABEL: test_fold_canonicalize_cos_value_f16:
322322 ; GCN: v_cos_f32_e32 [[V0:v[0-9]+]], v{{[0-9]+}}
323323 ; GCN: v_cvt_f16_f32_e32 [[V:v[0-9]+]], [[V0]]
324 ; GCN: flat_store_short v[{{[0-9:]+}}], [[V]]
324 ; GCN: {{flat|global}}_store_short v[{{[0-9:]+}}], [[V]]
325325 ; GCN-NOT: 1.0
326326 define amdgpu_kernel void @test_fold_canonicalize_cos_value_f16(half addrspace(1)* %arg) {
327327 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
335335
336336 ; GCN-LABEL: test_fold_canonicalize_qNaN_value_f32:
337337 ; GCN: v_mov_b32_e32 [[V:v[0-9]+]], 0x7fc00000
338 ; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
338 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
339339 ; GCN-NOT: 1.0
340340 define amdgpu_kernel void @test_fold_canonicalize_qNaN_value_f32(float addrspace(1)* %arg) {
341341 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
348348 ; GCN-LABEL: test_fold_canonicalize_minnum_value_from_load_f32:
349349 ; VI: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
350350 ; GFX9: v_min_f32_e32 [[V:v[0-9]+]], 0, v{{[0-9]+}}
351 ; GFX9: flat_store_dword v[{{[0-9:]+}}], [[V]]
351 ; GFX9: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
352352 define amdgpu_kernel void @test_fold_canonicalize_minnum_value_from_load_f32(float addrspace(1)* %arg) {
353353 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
354354 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
361361
362362 ; GCN-LABEL: test_fold_canonicalize_minnum_value_f32:
363363 ; GCN: v_min_f32_e32 [[V:v[0-9]+]], 0, v{{[0-9]+}}
364 ; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
364 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
365365 ; GCN-NOT: 1.0
366366 define amdgpu_kernel void @test_fold_canonicalize_minnum_value_f32(float addrspace(1)* %arg) {
367367 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
377377 ; GCN-LABEL: test_fold_canonicalize_sNaN_value_f32:
378378 ; GCN: v_min_f32_e32 [[V0:v[0-9]+]], 0x7f800001, v{{[0-9]+}}
379379 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, 1.0, [[V0]]
380 ; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
380 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
381381 define amdgpu_kernel void @test_fold_canonicalize_sNaN_value_f32(float addrspace(1)* %arg) {
382382 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
383383 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
392392 ; GFX9: v_min_f32_e32 [[V:v[0-9]+]], 0x7fffff, v{{[0-9]+}}
393393 ; VI: v_min_f32_e32 [[V0:v[0-9]+]], 0x7fffff, v{{[0-9]+}}
394394 ; VI: v_mul_f32_e32 v{{[0-9]+}}, 1.0, [[V0]]
395 ; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
395 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
396396 ; GFX9-NOT: 1.0
397397 define amdgpu_kernel void @test_fold_canonicalize_denorm_value_f32(float addrspace(1)* %arg) {
398398 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
408408 ; GFX9: v_max_f32_e32 [[V:v[0-9]+]], 0, v{{[0-9]+}}
409409 ; VI: v_max_f32_e32 [[V0:v[0-9]+]], 0, v{{[0-9]+}}
410410 ; VI: v_mul_f32_e32 v{{[0-9]+}}, 1.0, [[V0]]
411 ; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
411 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
412412 ; GFX9-NOT: 1.0
413413 define amdgpu_kernel void @test_fold_canonicalize_maxnum_value_from_load_f32(float addrspace(1)* %arg) {
414414 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
422422
423423 ; GCN-LABEL: test_fold_canonicalize_maxnum_value_f32:
424424 ; GCN: v_max_f32_e32 [[V:v[0-9]+]], 0, v{{[0-9]+}}
425 ; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
425 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
426426 ; GCN-NOT: 1.0
427427 define amdgpu_kernel void @test_fold_canonicalize_maxnum_value_f32(float addrspace(1)* %arg) {
428428 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
437437
438438 ; GCN-LABEL: test_fold_canonicalize_maxnum_value_f64:
439439 ; GCN: v_max_f64 [[V:v\[[0-9]+:[0-9]+\]]], v[{{[0-9:]+}}], 0
440 ; GCN: flat_store_dwordx2 v[{{[0-9:]+}}], [[V]]
440 ; GCN: {{flat|global}}_store_dwordx2 v[{{[0-9:]+}}], [[V]]
441441 ; GCN-NOT: 1.0
442442 define amdgpu_kernel void @test_fold_canonicalize_maxnum_value_f64(double addrspace(1)* %arg) {
443443 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
471471 }
472472
473473 ; GCN-LABEL: {{^}}test_fold_canonicalize_load_nnan_value_f32
474 ; GFX9-DENORM: flat_load_dword [[V:v[0-9]+]],
475 ; GFX9-DENORM: flat_store_dword v[{{[0-9:]+}}], [[V]]
474 ; GFX9-DENORM: global_load_dword [[V:v[0-9]+]],
475 ; GFX9-DENORM: global_store_dword v[{{[0-9:]+}}], [[V]]
476476 ; GFX9-DENORM-NOT: 1.0
477477 ; GCN-FLUSH: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
478478 define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f32(float addrspace(1)* %arg, float addrspace(1)* %out) #1 {
486486 }
487487
488488 ; GCN-LABEL: {{^}}test_fold_canonicalize_load_nnan_value_f64
489 ; GCN: flat_load_dwordx2 [[V:v\[[0-9:]+\]]],
490 ; GCN: flat_store_dwordx2 v[{{[0-9:]+}}], [[V]]
489 ; GCN: {{flat|global}}_load_dwordx2 [[V:v\[[0-9:]+\]]],
490 ; GCN: {{flat|global}}_store_dwordx2 v[{{[0-9:]+}}], [[V]]
491491 ; GCN-NOT: 1.0
492492 define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f64(double addrspace(1)* %arg, double addrspace(1)* %out) #1 {
493493 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
500500 }
501501
502502 ; GCN-LABEL: {{^}}test_fold_canonicalize_load_nnan_value_f16
503 ; GCN: flat_load_ushort [[V:v[0-9]+]],
504 ; GCN: flat_store_short v[{{[0-9:]+}}], [[V]]
503 ; GCN: {{flat|global}}_load_ushort [[V:v[0-9]+]],
504 ; GCN: {{flat|global}}_store_short v[{{[0-9:]+}}], [[V]]
505505 ; GCN-NOT: 1.0
506506 define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f16(half addrspace(1)* %arg, half addrspace(1)* %out) #1 {
507507 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
3737 }
3838
3939 ; GCN-LABEL: {{^}}test_copysign_out_f32_mag_f16_sign_f32:
40 ; GCN-DAG: {{buffer|flat}}_load_ushort v[[MAG:[0-9]+]]
41 ; GCN-DAG: {{buffer|flat}}_load_dword v[[SIGN:[0-9]+]]
40 ; GCN-DAG: {{buffer|flat|global}}_load_ushort v[[MAG:[0-9]+]]
41 ; GCN-DAG: {{buffer|flat|global}}_load_dword v[[SIGN:[0-9]+]]
4242 ; GCN-DAG: s_brev_b32 s[[CONST:[0-9]+]], -2
4343 ; GCN-DAG: v_cvt_f32_f16_e32 v[[MAG_EXT:[0-9]+]], v[[MAG]]
4444 ; GCN: v_bfi_b32 v[[OUT:[0-9]+]], s[[CONST]], v[[MAG_EXT]], v[[SIGN]]
6161 }
6262
6363 ; GCN-LABEL: {{^}}test_copysign_out_f64_mag_f16_sign_f64:
64 ; GCN-DAG: {{buffer|flat}}_load_ushort v[[MAG:[0-9]+]]
65 ; GCN-DAG: {{buffer|flat}}_load_dwordx2 v{{\[}}[[SIGN_LO:[0-9]+]]:[[SIGN_HI:[0-9]+]]{{\]}}
64 ; GCN-DAG: {{buffer|flat|global}}_load_ushort v[[MAG:[0-9]+]]
65 ; GCN-DAG: {{buffer|flat|global}}_load_dwordx2 v{{\[}}[[SIGN_LO:[0-9]+]]:[[SIGN_HI:[0-9]+]]{{\]}}
6666 ; GCN-DAG: s_brev_b32 s[[CONST:[0-9]+]], -2
6767 ; GCN-DAG: v_cvt_f32_f16_e32 v[[MAG_EXT:[0-9]+]], v[[MAG]]
6868 ; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[MAG_EXT_LO:[0-9]+]]:[[MAG_EXT_HI:[0-9]+]]{{\]}}, v[[MAG_EXT]]
8686 }
8787
8888 ; GCN-LABEL: {{^}}test_copysign_out_f32_mag_f32_sign_f16:
89 ; GCN-DAG: {{buffer|flat}}_load_dword v[[MAG:[0-9]+]]
90 ; GCN-DAG: {{buffer|flat}}_load_ushort v[[SIGN:[0-9]+]]
89 ; GCN-DAG: {{buffer|flat|global}}_load_dword v[[MAG:[0-9]+]]
90 ; GCN-DAG: {{buffer|flat|global}}_load_ushort v[[SIGN:[0-9]+]]
9191 ; GCN-DAG: s_brev_b32 s[[CONST:[0-9]+]], -2
9292 ; SI-DAG: v_cvt_f32_f16_e32 v[[SIGN_F32:[0-9]+]], v[[SIGN]]
9393 ; SI: v_bfi_b32 v[[OUT:[0-9]+]], s[[CONST]], v[[MAG]], v[[SIGN_F32]]
112112 }
113113
114114 ; GCN-LABEL: {{^}}test_copysign_out_f64_mag_f64_sign_f16:
115 ; GCN-DAG: {{buffer|flat}}_load_dwordx2 v{{\[}}[[MAG_LO:[0-9]+]]:[[MAG_HI:[0-9]+]]{{\]}}
116 ; GCN-DAG: {{buffer|flat}}_load_ushort v[[SIGN:[0-9]+]]
115 ; GCN-DAG: {{buffer|flat|global}}_load_dwordx2 v{{\[}}[[MAG_LO:[0-9]+]]:[[MAG_HI:[0-9]+]]{{\]}}
116 ; GCN-DAG: {{buffer|flat|global}}_load_ushort v[[SIGN:[0-9]+]]
117117 ; GCN-DAG: s_brev_b32 s[[CONST:[0-9]+]], -2
118118 ; SI-DAG: v_cvt_f32_f16_e32 v[[SIGN_F32:[0-9]+]], v[[SIGN]]
119119 ; SI: v_bfi_b32 v[[OUT_HI:[0-9]+]], s[[CONST]], v[[MAG_HI]], v[[SIGN_F32]]
138138 }
139139
140140 ; GCN-LABEL: {{^}}test_copysign_out_f16_mag_f16_sign_f32:
141 ; GCN-DAG: {{buffer|flat}}_load_ushort v[[MAG:[0-9]+]]
142 ; GCN-DAG: {{buffer|flat}}_load_dword v[[SIGN:[0-9]+]]
141 ; GCN-DAG: {{buffer|flat|global}}_load_ushort v[[MAG:[0-9]+]]
142 ; GCN-DAG: {{buffer|flat|global}}_load_dword v[[SIGN:[0-9]+]]
143143 ; SI-DAG: s_brev_b32 s[[CONST:[0-9]+]], -2
144144 ; SI-DAG: v_cvt_f32_f16_e32 v[[MAG_F32:[0-9]+]], v[[MAG]]
145145 ; SI: v_bfi_b32 v[[OUT_F32:[0-9]+]], s[[CONST]], v[[MAG_F32]], v[[SIGN]]
166166 }
167167
168168 ; GCN-LABEL: {{^}}test_copysign_out_f16_mag_f16_sign_f64:
169 ; GCN-DAG: {{buffer|flat}}_load_ushort v[[MAG:[0-9]+]]
170 ; GCN-DAG: {{buffer|flat}}_load_dwordx2 v{{\[}}[[SIGN_LO:[0-9]+]]:[[SIGN_HI:[0-9]+]]{{\]}}
169 ; GCN-DAG: {{buffer|flat|global}}_load_ushort v[[MAG:[0-9]+]]
170 ; GCN-DAG: {{buffer|flat|global}}_load_dwordx2 v{{\[}}[[SIGN_LO:[0-9]+]]:[[SIGN_HI:[0-9]+]]{{\]}}
171171 ; SI-DAG: s_brev_b32 s[[CONST:[0-9]+]], -2
172172 ; SI-DAG: v_cvt_f32_f16_e32 v[[MAG_F32:[0-9]+]], v[[MAG]]
173173 ; SI: v_bfi_b32 v[[OUT_F32:[0-9]+]], s[[CONST]], v[[MAG_F32]], v[[SIGN_HI]]
194194 }
195195
196196 ; GCN-LABEL: {{^}}test_copysign_out_f16_mag_f32_sign_f16:
197 ; GCN-DAG: {{buffer|flat}}_load_dword v[[MAG:[0-9]+]]
198 ; GCN-DAG: {{buffer|flat}}_load_ushort v[[SIGN:[0-9]+]]
197 ; GCN-DAG: {{buffer|flat|global}}_load_dword v[[MAG:[0-9]+]]
198 ; GCN-DAG: {{buffer|flat|global}}_load_ushort v[[SIGN:[0-9]+]]
199199 ; SI-DAG: s_brev_b32 s[[CONST:[0-9]+]], -2
200200 ; SI-DAG: v_cvt_f16_f32_e32 v[[MAG_TRUNC:[0-9]+]], v[[MAG]]
201201 ; SI-DAG: v_cvt_f32_f16_e32 v[[SIGN_F32:[0-9]+]], v[[SIGN]]
164164 }
165165
166166 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0_srcmod0:
167 ; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
168 ; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
169 ; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
167 ; GCN: {{buffer_|flat_|global_}}load_dword [[A:v[0-9]+]]
168 ; GCN: {{buffer_|flat_|global_}}load_dword [[B:v[0-9]+]]
169 ; GCN: {{buffer_|flat_|global_}}load_dword [[C:v[0-9]+]]
170170 ; GCN: v_med3_f32 v{{[0-9]+}}, -[[A]], [[B]], [[C]]
171171 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
172172 %tid = call i32 @llvm.amdgcn.workitem.id.x()
187187 }
188188
189189 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0_srcmod1:
190 ; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
191 ; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
192 ; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
190 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
191 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
192 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
193193 ; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], -[[B]], [[C]]
194194 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod1(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
195195 %tid = call i32 @llvm.amdgcn.workitem.id.x()
210210 }
211211
212212 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0_srcmod2:
213 ; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
214 ; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
215 ; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
213 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
214 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
215 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
216216 ; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], -[[C]]
217217 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod2(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
218218 %tid = call i32 @llvm.amdgcn.workitem.id.x()
233233 }
234234
235235 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0_srcmod012:
236 ; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
237 ; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
238 ; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
236 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
237 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
238 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
239239 ; GCN: v_med3_f32 v{{[0-9]+}}, -[[A]], |[[B]]|, -|[[C]]|
240240 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod012(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
241241 %tid = call i32 @llvm.amdgcn.workitem.id.x()
262262 }
263263
264264 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0_negabs012:
265 ; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
266 ; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
267 ; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
265 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
266 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
267 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
268268 ; GCN: v_med3_f32 v{{[0-9]+}}, -|[[A]]|, -|[[B]]|, -|[[C]]|
269269 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_negabs012(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
270270 %tid = call i32 @llvm.amdgcn.workitem.id.x()
293293 }
294294
295295 ; GCN-LABEL: {{^}}v_nnan_inputs_med3_f32_pat0:
296 ; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
297 ; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
298 ; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
296 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
297 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
298 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
299299 ; GCN-DAG: v_add_f32_e32 [[A_ADD:v[0-9]+]], 1.0, [[A]]
300300 ; GCN-DAG: v_add_f32_e32 [[B_ADD:v[0-9]+]], 2.0, [[B]]
301301 ; GCN-DAG: v_add_f32_e32 [[C_ADD:v[0-9]+]], 4.0, [[C]]
336336 ; + commute outermost max
337337
338338 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0:
339 ; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
340 ; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
341 ; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
339 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
340 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
341 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
342342 ; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]]
343343 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
344344 %tid = call i32 @llvm.amdgcn.workitem.id.x()
358358 }
359359
360360 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat1:
361 ; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
362 ; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
363 ; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
361 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
362 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
363 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
364364 ; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]]
365365 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat1(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
366366 %tid = call i32 @llvm.amdgcn.workitem.id.x()
380380 }
381381
382382 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat2:
383 ; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
384 ; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
385 ; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
383 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
384 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
385 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
386386 ; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]]
387387 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat2(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
388388 %tid = call i32 @llvm.amdgcn.workitem.id.x()
402402 }
403403
404404 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat3:
405 ; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
406 ; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
407 ; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
405 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
406 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
407 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
408408 ; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]]
409409 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat3(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
410410 %tid = call i32 @llvm.amdgcn.workitem.id.x()
424424 }
425425
426426 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat4:
427 ; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
428 ; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
429 ; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
427 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
428 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
429 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
430430 ; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
431431 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat4(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
432432 %tid = call i32 @llvm.amdgcn.workitem.id.x()
446446 }
447447
448448 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat5:
449 ; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
450 ; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
451 ; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
449 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
450 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
451 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
452452 ; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
453453 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat5(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
454454 %tid = call i32 @llvm.amdgcn.workitem.id.x()
468468 }
469469
470470 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat6:
471 ; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
472 ; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
473 ; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
471 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
472 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
473 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
474474 ; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
475475 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat6(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
476476 %tid = call i32 @llvm.amdgcn.workitem.id.x()
490490 }
491491
492492 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat7:
493 ; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
494 ; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
495 ; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
493 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
494 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
495 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
496496 ; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
497497 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat7(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
498498 %tid = call i32 @llvm.amdgcn.workitem.id.x()
512512 }
513513
514514 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat8:
515 ; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
516 ; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
517 ; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
515 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
516 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
517 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
518518 ; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]]
519519 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat8(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
520520 %tid = call i32 @llvm.amdgcn.workitem.id.x()
534534 }
535535
536536 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat9:
537 ; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
538 ; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
539 ; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
537 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
538 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
539 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
540540 ; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
541541 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat9(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
542542 %tid = call i32 @llvm.amdgcn.workitem.id.x()
556556 }
557557
558558 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat10:
559 ; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
560 ; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
561 ; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
559 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
560 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
561 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
562562 ; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]]
563563 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat10(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
564564 %tid = call i32 @llvm.amdgcn.workitem.id.x()
578578 }
579579
580580 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat11:
581 ; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
582 ; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
583 ; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
581 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
582 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
583 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
584584 ; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
585585 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat11(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
586586 %tid = call i32 @llvm.amdgcn.workitem.id.x()
600600 }
601601
602602 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat12:
603 ; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
604 ; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
605 ; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
603 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
604 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
605 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
606606 ; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
607607 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat12(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
608608 %tid = call i32 @llvm.amdgcn.workitem.id.x()
622622 }
623623
624624 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat13:
625 ; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
626 ; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
627 ; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
625 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
626 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
627 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
628628 ; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
629629 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat13(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
630630 %tid = call i32 @llvm.amdgcn.workitem.id.x()
644644 }
645645
646646 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat14:
647 ; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
648 ; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
649 ; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
647 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
648 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
649 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
650650 ; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]]
651651 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat14(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
652652 %tid = call i32 @llvm.amdgcn.workitem.id.x()
666666 }
667667
668668 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat15:
669 ; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
670 ; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
671 ; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
669 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
670 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
671 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
672672 ; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
673673 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat15(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
674674 %tid = call i32 @llvm.amdgcn.workitem.id.x()
841841 }
842842
843843 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0_srcmod0_mismatch:
844 ; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
845 ; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
846 ; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
844 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
845 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
846 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
847847 ; GCN-DAG: v_min_f32
848848 ; GCN-DAG: v_max_f32
849849 ; GCN-DAG: v_min_f32
868868
869869 ; A simple min and max is not sufficient
870870 ; GCN-LABEL: {{^}}v_test_global_nnans_min_max_f32:
871 ; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
872 ; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
873 ; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
871 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
872 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
873 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
874874 ; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], [[A]], [[B]]
875875 ; GCN: v_min_f32_e32 v{{[0-9]+}}, [[MAX]], [[C]]
876876 define amdgpu_kernel void @v_test_global_nnans_min_max_f32(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
914914 }
915915
916916 ; GCN-LABEL: {{^}}v_nnan_inputs_med3_f16_pat0:
917 ; GCN: {{buffer_|flat_}}load_ushort [[A:v[0-9]+]]
918 ; GCN: {{buffer_|flat_}}load_ushort [[B:v[0-9]+]]
919 ; GCN: {{buffer_|flat_}}load_ushort [[C:v[0-9]+]]
917 ; GCN: {{buffer|flat|global}}_load_ushort [[A:v[0-9]+]]
918 ; GCN: {{buffer|flat|global}}_load_ushort [[B:v[0-9]+]]
919 ; GCN: {{buffer|flat|global}}_load_ushort [[C:v[0-9]+]]
920920
921921 ; SI: v_cvt_f32_f16
922922 ; SI: v_cvt_f32_f16
2727 }
2828
2929 ; GCN-LABEL: {{^}}fmuladd_2.0_a_b_v2f16:
30 ; GCN: {{buffer|flat}}_load_dword [[R1:v[0-9]+]],
31 ; GCN: {{buffer|flat}}_load_dword [[R2:v[0-9]+]],
30 ; GCN: {{buffer|flat|global}}_load_dword [[R1:v[0-9]+]],
31 ; GCN: {{buffer|flat|global}}_load_dword [[R2:v[0-9]+]],
3232 ; GFX9-FLUSH: v_pk_add_f16 [[ADD0:v[0-9]+]], [[R1]], [[R1]]
3333 ; GFX9-FLUSH: v_pk_add_f16 [[RESULT:v[0-9]+]], [[ADD0]], [[R2]]
3434
35 ; GFX9-FLUSH: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
35 ; GFX9-FLUSH: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
3636
3737 ; GFX9-DENORM: v_pk_fma_f16 [[RESULT:v[0-9]+]], [[R1]], 2.0, [[R2]]
38 ; GFX9-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
38 ; GFX9-DENORM: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
3939 define amdgpu_kernel void @fmuladd_2.0_a_b_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
4040 %tid = call i32 @llvm.amdgcn.workitem.id.x()
4141 %gep.0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
5151 }
5252
5353 ; GCN-LABEL: {{^}}fmuladd_a_2.0_b_v2f16:
54 ; GCN: {{buffer|flat}}_load_dword [[R1:v[0-9]+]],
55 ; GCN: {{buffer|flat}}_load_dword [[R2:v[0-9]+]],
54 ; GCN: {{buffer|flat|global}}_load_dword [[R1:v[0-9]+]],
55 ; GCN: {{buffer|flat|global}}_load_dword [[R2:v[0-9]+]],
5656 ; GFX9-FLUSH: v_pk_add_f16 [[ADD0:v[0-9]+]], [[R1]], [[R1]]
5757 ; GFX9-FLUSH: v_pk_add_f16 [[RESULT:v[0-9]+]], [[ADD0]], [[R2]]
5858
59 ; GFX9-FLUSH: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
59 ; GFX9-FLUSH: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
6060
6161 ; GFX9-DENORM: v_pk_fma_f16 [[RESULT:v[0-9]+]], [[R1]], 2.0, [[R2]]
62 ; GFX9-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
62 ; GFX9-DENORM: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
6363 define amdgpu_kernel void @fmuladd_a_2.0_b_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
6464 %tid = call i32 @llvm.amdgcn.workitem.id.x()
6565 %gep.0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
7575 }
7676
7777 ; GCN-LABEL: {{^}}fadd_a_a_b_v2f16:
78 ; GCN: {{buffer|flat}}_load_dword [[R1:v[0-9]+]],
79 ; GCN: {{buffer|flat}}_load_dword [[R2:v[0-9]+]],
78 ; GCN: {{buffer|flat|global}}_load_dword [[R1:v[0-9]+]],
79 ; GCN: {{buffer|flat|global}}_load_dword [[R2:v[0-9]+]],
8080 ; GFX9-FLUSH: v_pk_add_f16 [[ADD0:v[0-9]+]], [[R1]], [[R1]]
8181 ; GFX9-FLUSH: v_pk_add_f16 [[RESULT:v[0-9]+]], [[ADD0]], [[R2]]
8282
8484 ; GFX9-DENORM-STRICT: v_pk_add_f16 [[RESULT:v[0-9]+]], [[ADD0]], [[R2]]
8585
8686 ; GFX9-DENORM-CONTRACT: v_pk_fma_f16 [[RESULT:v[0-9]+]], [[R1]], 2.0, [[R2]]
87 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
87 ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
8888 define amdgpu_kernel void @fadd_a_a_b_v2f16(<2 x half> addrspace(1)* %out,
8989 <2 x half> addrspace(1)* %in1,
9090 <2 x half> addrspace(1)* %in2) #0 {
2525 ; GFX89-NOT: _and
2626 ; GFX89: v_mul_f16_e64 [[MUL:v[0-9]+]], {{v[0-9]+}}, -|{{v[0-9]+}}|
2727 ; GFX89-NOT: [[MUL]]
28 ; GFX89: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
28 ; GFX89: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
2929 define amdgpu_kernel void @fneg_fabs_fmul_f16(half addrspace(1)* %out, half %x, half %y) {
3030 %fabs = call half @llvm.fabs.f16(half %x)
3131 %fsub = fsub half -0.0, %fabs
100100 ; GFX9: s_or_b32 s{{[0-9]+}}, [[MASK]], s{{[0-9]+}}
101101 ; GFX9: s_or_b32 s{{[0-9]+}}, [[MASK]], s{{[0-9]+}}
102102
103 ; GCN: flat_store_dwordx2
103 ; GCN: {{flat|global}}_store_dwordx2
104104 define amdgpu_kernel void @fneg_fabs_v4f16(<4 x half> addrspace(1)* %out, <4 x half> %in) {
105105 %fabs = call <4 x half> @llvm.fabs.v4f16(<4 x half> %in)
106106 %fsub = fsub <4 x half> , %fabs
1313 ; well.
1414
1515 ; GCN-LABEL: {{^}}v_fneg_f16:
16 ; GCN: flat_load_ushort [[VAL:v[0-9]+]],
16 ; GCN: {{flat|global}}_load_ushort [[VAL:v[0-9]+]],
1717 ; GCN: v_xor_b32_e32 [[XOR:v[0-9]+]], 0x8000, [[VAL]]
1818 ; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[XOR]]
1919 ; SI: buffer_store_short [[XOR]]
2828 }
2929
3030 ; GCN-LABEL: {{^}}fneg_free_f16:
31 ; GCN: flat_load_ushort [[NEG_VALUE:v[0-9]+]],
31 ; GCN: {{flat|global}}_load_ushort [[NEG_VALUE:v[0-9]+]],
3232
3333 ; XCI: s_xor_b32 [[XOR:s[0-9]+]], [[NEG_VALUE]], 0x8000{{$}}
3434 ; CI: v_xor_b32_e32 [[XOR:v[0-9]+]], 0x8000, [[NEG_VALUE]]
4141 }
4242
4343 ; GCN-LABEL: {{^}}v_fneg_fold_f16:
44 ; GCN: flat_load_ushort [[NEG_VALUE:v[0-9]+]]
44 ; GCN: {{flat|global}}_load_ushort [[NEG_VALUE:v[0-9]+]]
4545
4646 ; CI-DAG: v_cvt_f32_f16_e32 [[CVT_VAL:v[0-9]+]], [[NEG_VALUE]]
4747 ; CI-DAG: v_cvt_f32_f16_e64 [[NEG_CVT0:v[0-9]+]], -[[NEG_VALUE]]
8080 }
8181
8282 ; GCN-LABEL: {{^}}v_fneg_v2f16:
83 ; GCN: flat_load_dword [[VAL:v[0-9]+]]
83 ; GCN: {{flat|global}}_load_dword [[VAL:v[0-9]+]]
8484 ; GCN: v_xor_b32_e32 v{{[0-9]+}}, 0x80008000, [[VAL]]
8585 define amdgpu_kernel void @v_fneg_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
8686 %tid = call i32 @llvm.amdgcn.workitem.id.x()
106106 }
107107
108108 ; GCN-LABEL: {{^}}v_fneg_fold_v2f16:
109 ; GCN: flat_load_dword [[VAL:v[0-9]+]]
109 ; GCN: {{flat|global}}_load_dword [[VAL:v[0-9]+]]
110110
111111 ; CI: v_cvt_f32_f16_e64 v{{[0-9]+}}, -v{{[0-9]+}}
112112 ; CI: v_cvt_f32_f16_e64 v{{[0-9]+}}, -v{{[0-9]+}}
129129 }
130130
131131 ; GCN-LABEL: {{^}}v_extract_fneg_fold_v2f16:
132 ; GCN-DAG: flat_load_dword [[VAL:v[0-9]+]]
132 ; GCN-DAG: {{flat|global}}_load_dword [[VAL:v[0-9]+]]
133133 ; CI-DAG: v_mul_f32_e32 v{{[0-9]+}}, -4.0, v{{[0-9]+}}
134134 ; CI-DAG: v_sub_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}
135135
151151 }
152152
153153 ; GCN-LABEL: {{^}}v_extract_fneg_no_fold_v2f16:
154 ; GCN: flat_load_dword [[VAL:v[0-9]+]]
154 ; GCN: {{flat|global}}_load_dword [[VAL:v[0-9]+]]
155155 ; GCN: v_xor_b32_e32 [[NEG:v[0-9]+]], 0x80008000, [[VAL]]
156156 ; GCN: v_lshrrev_b32_e32 [[ELT1:v[0-9]+]], 16, [[NEG]]
157157 define amdgpu_kernel void @v_extract_fneg_no_fold_v2f16(<2 x half> addrspace(1)* %in) #0 {
None ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
1 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s
2
3 ; FUNC-LABEL: {{^}}atomic_add_i32_offset:
4 ; GCN: buffer_atomic_add v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
0 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI,SIVI %s
1 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,SIVI %s
2 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
3
4 ; GCN-LABEL: {{^}}atomic_add_i32_offset:
5 ; SIVI: buffer_atomic_add v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
6 ; GFX9: global_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
57 define amdgpu_kernel void @atomic_add_i32_offset(i32 addrspace(1)* %out, i32 %in) {
68 entry:
79 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
911 ret void
1012 }
1113
12 ; FUNC-LABEL: {{^}}atomic_add_i32_soffset:
13 ; GCN: s_mov_b32 [[SREG:s[0-9]+]], 0x8ca0
14 ; GCN: buffer_atomic_add v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], [[SREG]]{{$}}
14 ; GCN-LABEL: {{^}}atomic_add_i32_max_neg_offset:
15 ; GFX9: global_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off offset:-4096{{$}}
16 define amdgpu_kernel void @atomic_add_i32_max_neg_offset(i32 addrspace(1)* %out, i32 %in) {
17 entry:
18 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 -1024
19 %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
20 ret void
21 }
22
23 ; GCN-LABEL: {{^}}atomic_add_i32_soffset:
24 ; SIVI: s_mov_b32 [[SREG:s[0-9]+]], 0x8ca0
25 ; SIVI: buffer_atomic_add v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], [[SREG]]{{$}}
26
27 ; GFX9: global_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off{{$}}
1528 define amdgpu_kernel void @atomic_add_i32_soffset(i32 addrspace(1)* %out, i32 %in) {
1629 entry:
1730 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 9000
1932 ret void
2033 }
2134
22 ; FUNC-LABEL: {{^}}atomic_add_i32_huge_offset:
35 ; GCN-LABEL: {{^}}atomic_add_i32_huge_offset:
2336 ; SI-DAG: v_mov_b32_e32 v[[PTRLO:[0-9]+]], 0xdeac
2437 ; SI-DAG: v_mov_b32_e32 v[[PTRHI:[0-9]+]], 0xabcd
2538 ; SI: buffer_atomic_add v{{[0-9]+}}, v{{\[}}[[PTRLO]]:[[PTRHI]]{{\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
39
2640 ; VI: flat_atomic_add
41
42 ; GFX9: global_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off{{$}}
2743 define amdgpu_kernel void @atomic_add_i32_huge_offset(i32 addrspace(1)* %out, i32 %in) {
2844 entry:
2945 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 47224239175595
3248 ret void
3349 }
3450
35 ; FUNC-LABEL: {{^}}atomic_add_i32_ret_offset:
36 ; GCN: buffer_atomic_add [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
37 ; GCN: buffer_store_dword [[RET]]
51 ; GCN-LABEL: {{^}}atomic_add_i32_ret_offset:
52 ; SIVI: buffer_atomic_add [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
53 ; SIVI: buffer_store_dword [[RET]]
54
55 ; GFX9: global_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
3856 define amdgpu_kernel void @atomic_add_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
3957 entry:
4058 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
4361 ret void
4462 }
4563
46 ; FUNC-LABEL: {{^}}atomic_add_i32_addr64_offset:
64 ; GCN-LABEL: {{^}}atomic_add_i32_addr64_offset:
4765 ; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
4866 ; VI: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
67 ; GFX9: global_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
4968 define amdgpu_kernel void @atomic_add_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
5069 entry:
5170 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
5473 ret void
5574 }
5675
57 ; FUNC-LABEL: {{^}}atomic_add_i32_ret_addr64_offset:
76 ; GCN-LABEL: {{^}}atomic_add_i32_ret_addr64_offset:
5877 ; SI: buffer_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
5978 ; VI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
60 ; GCN: buffer_store_dword [[RET]]
79 ; SIVI: buffer_store_dword [[RET]]
80
81 ; GFX9: global_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
82 ; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
6183 define amdgpu_kernel void @atomic_add_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
6284 entry:
6385 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
6789 ret void
6890 }
6991
70 ; FUNC-LABEL: {{^}}atomic_add_i32:
71 ; GCN: buffer_atomic_add v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
92 ; GCN-LABEL: {{^}}atomic_add_i32:
93 ; SIVI: buffer_atomic_add v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
94 ; GFX9: global_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off{{$}}
7295 define amdgpu_kernel void @atomic_add_i32(i32 addrspace(1)* %out, i32 %in) {
7396 entry:
7497 %val = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst
7598 ret void
7699 }
77100
78 ; FUNC-LABEL: {{^}}atomic_add_i32_ret:
79 ; GCN: buffer_atomic_add [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
80 ; GCN: buffer_store_dword [[RET]]
101 ; GCN-LABEL: {{^}}atomic_add_i32_ret:
102 ; SIVI: buffer_atomic_add [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
103 ; SIVI: buffer_store_dword [[RET]]
104
105 ; GFX9: global_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
106 ; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
81107 define amdgpu_kernel void @atomic_add_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
82108 entry:
83109 %val = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst
85111 ret void
86112 }
87113
88 ; FUNC-LABEL: {{^}}atomic_add_i32_addr64:
114 ; GCN-LABEL: {{^}}atomic_add_i32_addr64:
89115 ; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
90116 ; VI: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
117 ; GFX9: global_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
91118 define amdgpu_kernel void @atomic_add_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
92119 entry:
93120 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
95122 ret void
96123 }
97124
98 ; FUNC-LABEL: {{^}}atomic_add_i32_ret_addr64:
125 ; GCN-LABEL: {{^}}atomic_add_i32_ret_addr64:
99126 ; SI: buffer_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
100127 ; VI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
101 ; GCN: buffer_store_dword [[RET]]
128 ; SIVI: buffer_store_dword [[RET]]
129
130 ; GFX9: global_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
102131 define amdgpu_kernel void @atomic_add_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
103132 entry:
104133 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
107136 ret void
108137 }
109138
110 ; FUNC-LABEL: {{^}}atomic_and_i32_offset:
111 ; GCN: buffer_atomic_and v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
139 ; GCN-LABEL: {{^}}atomic_and_i32_offset:
140 ; SIVI: buffer_atomic_and v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
141
142 ; GFX9: global_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
112143 define amdgpu_kernel void @atomic_and_i32_offset(i32 addrspace(1)* %out, i32 %in) {
113144 entry:
114145 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
116147 ret void
117148 }
118149
119 ; FUNC-LABEL: {{^}}atomic_and_i32_ret_offset:
120 ; GCN: buffer_atomic_and [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
121 ; GCN: buffer_store_dword [[RET]]
150 ; GCN-LABEL: {{^}}atomic_and_i32_ret_offset:
151 ; SIVI: buffer_atomic_and [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
152 ; SIVI: buffer_store_dword [[RET]]
153
154 ; GFX9: global_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
122155 define amdgpu_kernel void @atomic_and_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
123156 entry:
124157 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
127160 ret void
128161 }
129162
130 ; FUNC-LABEL: {{^}}atomic_and_i32_addr64_offset:
163 ; GCN-LABEL: {{^}}atomic_and_i32_addr64_offset:
131164 ; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
132165 ; VI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
166
167 ; GFX9: global_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
133168 define amdgpu_kernel void @atomic_and_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
134169 entry:
135170 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
138173 ret void
139174 }
140175
141 ; FUNC-LABEL: {{^}}atomic_and_i32_ret_addr64_offset:
176 ; GCN-LABEL: {{^}}atomic_and_i32_ret_addr64_offset:
142177 ; SI: buffer_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
143178 ; VI: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
144 ; GCN: buffer_store_dword [[RET]]
179 ; SIVI: buffer_store_dword [[RET]]
180
181 ; GFX9: global_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
145182 define amdgpu_kernel void @atomic_and_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
146183 entry:
147184 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
151188 ret void
152189 }
153190
154 ; FUNC-LABEL: {{^}}atomic_and_i32:
155 ; GCN: buffer_atomic_and v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
191 ; GCN-LABEL: {{^}}atomic_and_i32:
192 ; SIVI: buffer_atomic_and v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
193
194 ; GFX9: global_atomic_and v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off{{$}}
156195 define amdgpu_kernel void @atomic_and_i32(i32 addrspace(1)* %out, i32 %in) {
157196 entry:
158197 %val = atomicrmw volatile and i32 addrspace(1)* %out, i32 %in seq_cst
159198 ret void
160199 }
161200
162 ; FUNC-LABEL: {{^}}atomic_and_i32_ret:
163 ; GCN: buffer_atomic_and [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
164 ; GCN: buffer_store_dword [[RET]]
201 ; GCN-LABEL: {{^}}atomic_and_i32_ret:
202 ; SIVI: buffer_atomic_and [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
203 ; SIVI: buffer_store_dword [[RET]]
204
205 ; GFX9: global_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
165206 define amdgpu_kernel void @atomic_and_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
166207 entry:
167208 %val = atomicrmw volatile and i32 addrspace(1)* %out, i32 %in seq_cst
169210 ret void
170211 }
171212
172 ; FUNC-LABEL: {{^}}atomic_and_i32_addr64:
213 ; GCN-LABEL: {{^}}atomic_and_i32_addr64:
173214 ; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
174215 ; VI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
216
217 ; GFX9: global_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
175218 define amdgpu_kernel void @atomic_and_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
176219 entry:
177220 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
179222 ret void
180223 }
181224
182 ; FUNC-LABEL: {{^}}atomic_and_i32_ret_addr64:
225 ; GCN-LABEL: {{^}}atomic_and_i32_ret_addr64:
183226 ; SI: buffer_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
184227 ; VI: flat_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
185 ; GCN: buffer_store_dword [[RET]]
228 ; SIVI: buffer_store_dword [[RET]]
229
230 ; GFX9: global_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
186231 define amdgpu_kernel void @atomic_and_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
187232 entry:
188233 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
191236 ret void
192237 }
193238
194 ; FUNC-LABEL: {{^}}atomic_sub_i32_offset:
195 ; GCN: buffer_atomic_sub v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
239 ; GCN-LABEL: {{^}}atomic_sub_i32_offset:
240 ; SIVI: buffer_atomic_sub v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
241
242 ; GFX9: global_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
196243 define amdgpu_kernel void @atomic_sub_i32_offset(i32 addrspace(1)* %out, i32 %in) {
197244 entry:
198245 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
200247 ret void
201248 }
202249
203 ; FUNC-LABEL: {{^}}atomic_sub_i32_ret_offset:
204 ; GCN: buffer_atomic_sub [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
205 ; GCN: buffer_store_dword [[RET]]
250 ; GCN-LABEL: {{^}}atomic_sub_i32_ret_offset:
251 ; SIVI: buffer_atomic_sub [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
252 ; SIVI: buffer_store_dword [[RET]]
253
254 ; GFX9: global_atomic_sub v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
206255 define amdgpu_kernel void @atomic_sub_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
207256 entry:
208257 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
211260 ret void
212261 }
213262
214 ; FUNC-LABEL: {{^}}atomic_sub_i32_addr64_offset:
263 ; GCN-LABEL: {{^}}atomic_sub_i32_addr64_offset:
215264 ; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
216265 ; VI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
266
267 ; GFX9: global_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
217268 define amdgpu_kernel void @atomic_sub_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
218269 entry:
219270 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
222273 ret void
223274 }
224275
225 ; FUNC-LABEL: {{^}}atomic_sub_i32_ret_addr64_offset:
276 ; GCN-LABEL: {{^}}atomic_sub_i32_ret_addr64_offset:
226277 ; SI: buffer_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
227278 ; VI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
228 ; GCN: buffer_store_dword [[RET]]
279 ; SIVI: buffer_store_dword [[RET]]
280
281 ; GFX9: global_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
229282 define amdgpu_kernel void @atomic_sub_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
230283 entry:
231284 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
235288 ret void
236289 }
237290
238 ; FUNC-LABEL: {{^}}atomic_sub_i32:
239 ; GCN: buffer_atomic_sub v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
291 ; GCN-LABEL: {{^}}atomic_sub_i32:
292 ; SIVI: buffer_atomic_sub v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
293
294 ; GFX9: global_atomic_sub v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off{{$}}
240295 define amdgpu_kernel void @atomic_sub_i32(i32 addrspace(1)* %out, i32 %in) {
241296 entry:
242297 %val = atomicrmw volatile sub i32 addrspace(1)* %out, i32 %in seq_cst
243298 ret void
244299 }
245300
246 ; FUNC-LABEL: {{^}}atomic_sub_i32_ret:
247 ; GCN: buffer_atomic_sub [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
248 ; GCN: buffer_store_dword [[RET]]
301 ; GCN-LABEL: {{^}}atomic_sub_i32_ret:
302 ; SIVI: buffer_atomic_sub [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
303 ; SIVI: buffer_store_dword [[RET]]
304
305 ; GFX9: global_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
249306 define amdgpu_kernel void @atomic_sub_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
250307 entry:
251308 %val = atomicrmw volatile sub i32 addrspace(1)* %out, i32 %in seq_cst
253310 ret void
254311 }
255312
256 ; FUNC-LABEL: {{^}}atomic_sub_i32_addr64:
313 ; GCN-LABEL: {{^}}atomic_sub_i32_addr64:
257314 ; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
258315 ; VI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
316
317 ; GFX9: global_atomic_sub v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off{{$}}
259318 define amdgpu_kernel void @atomic_sub_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
260319 entry:
261320 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
263322 ret void
264323 }
265324
266 ; FUNC-LABEL: {{^}}atomic_sub_i32_ret_addr64:
325 ; GCN-LABEL: {{^}}atomic_sub_i32_ret_addr64:
267326 ; SI: buffer_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
268327 ; VI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
269 ; GCN: buffer_store_dword [[RET]]
328 ; SIVI: buffer_store_dword [[RET]]
329
330 ; GFX9: global_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
270331 define amdgpu_kernel void @atomic_sub_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
271332 entry:
272333 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
275336 ret void
276337 }
277338
278 ; FUNC-LABEL: {{^}}atomic_max_i32_offset:
279 ; GCN: buffer_atomic_smax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
339 ; GCN-LABEL: {{^}}atomic_max_i32_offset:
340 ; SIVI: buffer_atomic_smax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
341
342 ; GFX9: global_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
280343 define amdgpu_kernel void @atomic_max_i32_offset(i32 addrspace(1)* %out, i32 %in) {
281344 entry:
282345 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
284347 ret void
285348 }
286349
287 ; FUNC-LABEL: {{^}}atomic_max_i32_ret_offset:
288 ; GCN: buffer_atomic_smax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
289 ; GCN: buffer_store_dword [[RET]]
350 ; GCN-LABEL: {{^}}atomic_max_i32_ret_offset:
351 ; SIVI: buffer_atomic_smax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
352 ; SIVI: buffer_store_dword [[RET]]
353
354 ; GFX9: global_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
290355 define amdgpu_kernel void @atomic_max_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
291356 entry:
292357 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
295360 ret void
296361 }
297362
298 ; FUNC-LABEL: {{^}}atomic_max_i32_addr64_offset:
363 ; GCN-LABEL: {{^}}atomic_max_i32_addr64_offset:
299364 ; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
300365 ; VI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
366
367 ; GFX9: global_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
301368 define amdgpu_kernel void @atomic_max_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
302369 entry:
303370 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
306373 ret void
307374 }
308375
309 ; FUNC-LABEL: {{^}}atomic_max_i32_ret_addr64_offset:
376 ; GCN-LABEL: {{^}}atomic_max_i32_ret_addr64_offset:
310377 ; SI: buffer_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
311378 ; VI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
312 ; GCN: buffer_store_dword [[RET]]
379 ; SIVI: buffer_store_dword [[RET]]
380
381 ; GFX9: global_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
313382 define amdgpu_kernel void @atomic_max_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
314383 entry:
315384 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
319388 ret void
320389 }
321390
322 ; FUNC-LABEL: {{^}}atomic_max_i32:
323 ; GCN: buffer_atomic_smax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
391 ; GCN-LABEL: {{^}}atomic_max_i32:
392 ; SIVI: buffer_atomic_smax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
393
394 ; GFX9: global_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
324395 define amdgpu_kernel void @atomic_max_i32(i32 addrspace(1)* %out, i32 %in) {
325396 entry:
326397 %val = atomicrmw volatile max i32 addrspace(1)* %out, i32 %in seq_cst
327398 ret void
328399 }
329400
330 ; FUNC-LABEL: {{^}}atomic_max_i32_ret:
331 ; GCN: buffer_atomic_smax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
332 ; GCN: buffer_store_dword [[RET]]
401 ; GCN-LABEL: {{^}}atomic_max_i32_ret:
402 ; SIVI: buffer_atomic_smax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
403 ; SIVI: buffer_store_dword [[RET]]
404
405 ; GFX9: global_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
333406 define amdgpu_kernel void @atomic_max_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
334407 entry:
335408 %val = atomicrmw volatile max i32 addrspace(1)* %out, i32 %in seq_cst
337410 ret void
338411 }
339412
340 ; FUNC-LABEL: {{^}}atomic_max_i32_addr64:
413 ; GCN-LABEL: {{^}}atomic_max_i32_addr64:
341414 ; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
342415 ; VI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
416
417 ; GFX9: global_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
343418 define amdgpu_kernel void @atomic_max_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
344419 entry:
345420 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
347422 ret void
348423 }
349424
350 ; FUNC-LABEL: {{^}}atomic_max_i32_ret_addr64:
425 ; GCN-LABEL: {{^}}atomic_max_i32_ret_addr64:
351426 ; SI: buffer_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
352427 ; VI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
353 ; GCN: buffer_store_dword [[RET]]
428 ; SIVI: buffer_store_dword [[RET]]
429
430 ; GFX9: global_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
354431 define amdgpu_kernel void @atomic_max_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
355432 entry:
356433 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
359436 ret void
360437 }
361438
362 ; FUNC-LABEL: {{^}}atomic_umax_i32_offset:
363 ; GCN: buffer_atomic_umax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
439 ; GCN-LABEL: {{^}}atomic_umax_i32_offset:
440 ; SIVI: buffer_atomic_umax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
441
442 ; GFX9: global_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
364443 define amdgpu_kernel void @atomic_umax_i32_offset(i32 addrspace(1)* %out, i32 %in) {
365444 entry:
366445 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
368447 ret void
369448 }
370449
371 ; FUNC-LABEL: {{^}}atomic_umax_i32_ret_offset:
372 ; GCN: buffer_atomic_umax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
373 ; GCN: buffer_store_dword [[RET]]
450 ; GCN-LABEL: {{^}}atomic_umax_i32_ret_offset:
451 ; SIVI: buffer_atomic_umax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
452 ; SIVI: buffer_store_dword [[RET]]
453
454 ; GFX9: global_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
374455 define amdgpu_kernel void @atomic_umax_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
375456 entry:
376457 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
379460 ret void
380461 }
381462
382 ; FUNC-LABEL: {{^}}atomic_umax_i32_addr64_offset:
463 ; GCN-LABEL: {{^}}atomic_umax_i32_addr64_offset:
383464 ; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
384465 ; VI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
466 ; GFX9: global_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
385467 define amdgpu_kernel void @atomic_umax_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
386468 entry:
387469 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
390472 ret void
391473 }
392474
393 ; FUNC-LABEL: {{^}}atomic_umax_i32_ret_addr64_offset:
475 ; GCN-LABEL: {{^}}atomic_umax_i32_ret_addr64_offset:
394476 ; SI: buffer_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
395477 ; VI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
396 ; GCN: buffer_store_dword [[RET]]
478 ; SIVI: buffer_store_dword [[RET]]
479
480 ; GFX9: global_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
397481 define amdgpu_kernel void @atomic_umax_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
398482 entry:
399483 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
403487 ret void
404488 }
405489
406 ; FUNC-LABEL: {{^}}atomic_umax_i32:
407 ; GCN: buffer_atomic_umax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
490 ; GCN-LABEL: {{^}}atomic_umax_i32:
491 ; SIVI: buffer_atomic_umax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
492
493 ; GFX9: global_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
408494 define amdgpu_kernel void @atomic_umax_i32(i32 addrspace(1)* %out, i32 %in) {
409495 entry:
410496 %val = atomicrmw volatile umax i32 addrspace(1)* %out, i32 %in seq_cst
411497 ret void
412498 }
413499
414 ; FUNC-LABEL: {{^}}atomic_umax_i32_ret:
415 ; GCN: buffer_atomic_umax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
416 ; GCN: buffer_store_dword [[RET]]
500 ; GCN-LABEL: {{^}}atomic_umax_i32_ret:
501 ; SIVI: buffer_atomic_umax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
502 ; SIVI: buffer_store_dword [[RET]]
503
504 ; GFX9: global_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
417505 define amdgpu_kernel void @atomic_umax_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
418506 entry:
419507 %val = atomicrmw volatile umax i32 addrspace(1)* %out, i32 %in seq_cst
421509 ret void
422510 }
423511
424 ; FUNC-LABEL: {{^}}atomic_umax_i32_addr64:
512 ; GCN-LABEL: {{^}}atomic_umax_i32_addr64:
425513 ; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
426514 ; VI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
515 ; GFX9: global_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
427516 define amdgpu_kernel void @atomic_umax_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
428517 entry:
429518 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
431520 ret void
432521 }
433522
434 ; FUNC-LABEL: {{^}}atomic_umax_i32_ret_addr64:
523 ; GCN-LABEL: {{^}}atomic_umax_i32_ret_addr64:
435524 ; SI: buffer_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
436525 ; VI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
437 ; GCN: buffer_store_dword [[RET]]
526 ; SIVI: buffer_store_dword [[RET]]
527
528 ; GFX9: global_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
438529 define amdgpu_kernel void @atomic_umax_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
439530 entry:
440531 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
443534 ret void
444535 }
445536
446 ; FUNC-LABEL: {{^}}atomic_min_i32_offset:
447 ; GCN: buffer_atomic_smin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
537 ; GCN-LABEL: {{^}}atomic_min_i32_offset:
538 ; SIVI: buffer_atomic_smin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
539
540 ; GFX9: global_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
448541 define amdgpu_kernel void @atomic_min_i32_offset(i32 addrspace(1)* %out, i32 %in) {
449542 entry:
450543 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
452545 ret void
453546 }
454547
455 ; FUNC-LABEL: {{^}}atomic_min_i32_ret_offset:
456 ; GCN: buffer_atomic_smin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
457 ; GCN: buffer_store_dword [[RET]]
548 ; GCN-LABEL: {{^}}atomic_min_i32_ret_offset:
549 ; SIVI: buffer_atomic_smin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
550 ; SIVI: buffer_store_dword [[RET]]
551
552 ; GFX9: global_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
458553 define amdgpu_kernel void @atomic_min_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
459554 entry:
460555 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
463558 ret void
464559 }
465560
466 ; FUNC-LABEL: {{^}}atomic_min_i32_addr64_offset:
561 ; GCN-LABEL: {{^}}atomic_min_i32_addr64_offset:
467562 ; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
468563 ; VI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
564 ; GFX9: global_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16
469565 define amdgpu_kernel void @atomic_min_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
470566 entry:
471567 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
474570 ret void
475571 }
476572
477 ; FUNC-LABEL: {{^}}atomic_min_i32_ret_addr64_offset:
573 ; GCN-LABEL: {{^}}atomic_min_i32_ret_addr64_offset:
478574 ; SI: buffer_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
479575 ; VI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
480 ; GCN: buffer_store_dword [[RET]]
576 ; SIVI: buffer_store_dword [[RET]]
577
578 ; GFX9: global_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
481579 define amdgpu_kernel void @atomic_min_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
482580 entry:
483581 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
487585 ret void
488586 }
489587
490 ; FUNC-LABEL: {{^}}atomic_min_i32:
491 ; GCN: buffer_atomic_smin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
588 ; GCN-LABEL: {{^}}atomic_min_i32:
589 ; SIVI: buffer_atomic_smin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
590
591 ; GFX9: global_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
492592 define amdgpu_kernel void @atomic_min_i32(i32 addrspace(1)* %out, i32 %in) {
493593 entry:
494594 %val = atomicrmw volatile min i32 addrspace(1)* %out, i32 %in seq_cst
495595 ret void
496596 }
497597
498 ; FUNC-LABEL: {{^}}atomic_min_i32_ret:
499 ; GCN: buffer_atomic_smin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
500 ; GCN: buffer_store_dword [[RET]]
598 ; GCN-LABEL: {{^}}atomic_min_i32_ret:
599 ; SIVI: buffer_atomic_smin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
600 ; SIVI: buffer_store_dword [[RET]]
601
602 ; GFX9: global_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
501603 define amdgpu_kernel void @atomic_min_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
502604 entry:
503605 %val = atomicrmw volatile min i32 addrspace(1)* %out, i32 %in seq_cst
505607 ret void
506608 }
507609
508 ; FUNC-LABEL: {{^}}atomic_min_i32_addr64:
610 ; GCN-LABEL: {{^}}atomic_min_i32_addr64:
509611 ; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
510612 ; VI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
613 ; GFX9: global_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
511614 define amdgpu_kernel void @atomic_min_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
512615 entry:
513616 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
515618 ret void
516619 }
517620
518 ; FUNC-LABEL: {{^}}atomic_min_i32_ret_addr64:
621 ; GCN-LABEL: {{^}}atomic_min_i32_ret_addr64:
519622 ; SI: buffer_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
520623 ; VI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
521 ; GCN: buffer_store_dword [[RET]]
624 ; SIVI: buffer_store_dword [[RET]]
625
626 ; GFX9: global_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
522627 define amdgpu_kernel void @atomic_min_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
523628 entry:
524629 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
527632 ret void
528633 }
529634
530 ; FUNC-LABEL: {{^}}atomic_umin_i32_offset:
531 ; GCN: buffer_atomic_umin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
635 ; GCN-LABEL: {{^}}atomic_umin_i32_offset:
636 ; SIVI: buffer_atomic_umin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
637
638 ; GFX9: global_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
532639 define amdgpu_kernel void @atomic_umin_i32_offset(i32 addrspace(1)* %out, i32 %in) {
533640 entry:
534641 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
536643 ret void
537644 }
538645
539 ; FUNC-LABEL: {{^}}atomic_umin_i32_ret_offset:
540 ; GCN: buffer_atomic_umin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
541 ; GCN: buffer_store_dword [[RET]]
646 ; GCN-LABEL: {{^}}atomic_umin_i32_ret_offset:
647 ; SIVI: buffer_atomic_umin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
648 ; SIVI: buffer_store_dword [[RET]]
649
650 ; GFX9: global_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
542651 define amdgpu_kernel void @atomic_umin_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
543652 entry:
544653 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
547656 ret void
548657 }
549658
550 ; FUNC-LABEL: {{^}}atomic_umin_i32_addr64_offset:
659 ; GCN-LABEL: {{^}}atomic_umin_i32_addr64_offset:
551660 ; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
552661 ; VI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
662 ; GFX9: global_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
553663 define amdgpu_kernel void @atomic_umin_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
554664 entry:
555665 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
558668 ret void
559669 }
560670
561 ; FUNC-LABEL: {{^}}atomic_umin_i32_ret_addr64_offset:
671 ; GCN-LABEL: {{^}}atomic_umin_i32_ret_addr64_offset:
562672 ; SI: buffer_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
563673 ; VI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
564 ; GCN: buffer_store_dword [[RET]]
674 ; SIVI: buffer_store_dword [[RET]]
675
676 ; GFX9: global_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
565677 define amdgpu_kernel void @atomic_umin_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
566678 entry:
567679 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
571683 ret void
572684 }
573685
574 ; FUNC-LABEL: {{^}}atomic_umin_i32:
575 ; GCN: buffer_atomic_umin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
686 ; GCN-LABEL: {{^}}atomic_umin_i32:
687 ; SIVI: buffer_atomic_umin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
688 ; GFX9: global_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
576689 define amdgpu_kernel void @atomic_umin_i32(i32 addrspace(1)* %out, i32 %in) {
577690 entry:
578691 %val = atomicrmw volatile umin i32 addrspace(1)* %out, i32 %in seq_cst
579692 ret void
580693 }
581694
582 ; FUNC-LABEL: {{^}}atomic_umin_i32_ret:
583 ; SI: buffer_atomic_umin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
584 ; GCN: buffer_store_dword [[RET]]
695 ; GCN-LABEL: {{^}}atomic_umin_i32_ret:
696 ; SIVI: buffer_atomic_umin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
697 ; SIVI: buffer_store_dword [[RET]]
698
699 ; GFX9: global_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
585700 define amdgpu_kernel void @atomic_umin_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
586701 entry:
587702 %val = atomicrmw volatile umin i32 addrspace(1)* %out, i32 %in seq_cst
589704 ret void
590705 }
591706
592 ; FUNC-LABEL: {{^}}atomic_umin_i32_addr64:
707 ; GCN-LABEL: {{^}}atomic_umin_i32_addr64:
593708 ; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
594709 ; VI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
710 ; GFX9: global_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
595711 define amdgpu_kernel void @atomic_umin_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
596712 entry:
597713 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
599715 ret void
600716 }
601717
602 ; FUNC-LABEL: {{^}}atomic_umin_i32_ret_addr64:
718 ; GCN-LABEL: {{^}}atomic_umin_i32_ret_addr64:
603719 ; SI: buffer_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
604720 ; VI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
605 ; GCN: buffer_store_dword [[RET]]
721 ; SIVI: buffer_store_dword [[RET]]
722
723 ; GFX9: global_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
606724 define amdgpu_kernel void @atomic_umin_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
607725 entry:
608726 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
611729 ret void
612730 }
613731
614 ; FUNC-LABEL: {{^}}atomic_or_i32_offset:
615 ; GCN: buffer_atomic_or v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
732 ; GCN-LABEL: {{^}}atomic_or_i32_offset:
733 ; SIVI: buffer_atomic_or v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
734
735 ; GFX9: global_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
616736 define amdgpu_kernel void @atomic_or_i32_offset(i32 addrspace(1)* %out, i32 %in) {
617737 entry:
618738 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
620740 ret void
621741 }
622742
623 ; FUNC-LABEL: {{^}}atomic_or_i32_ret_offset:
624 ; GCN: buffer_atomic_or [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
625 ; GCN: buffer_store_dword [[RET]]
743 ; GCN-LABEL: {{^}}atomic_or_i32_ret_offset:
744 ; SIVI: buffer_atomic_or [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
745 ; SIVI: buffer_store_dword [[RET]]
746
747 ; GFX9: global_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
626748 define amdgpu_kernel void @atomic_or_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
627749 entry:
628750 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
631753 ret void
632754 }
633755
634 ; FUNC-LABEL: {{^}}atomic_or_i32_addr64_offset:
756 ; GCN-LABEL: {{^}}atomic_or_i32_addr64_offset:
635757 ; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
636758 ; VI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
759 ; GFX9: global_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16
637760 define amdgpu_kernel void @atomic_or_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
638761 entry:
639762 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
642765 ret void
643766 }
644767
645 ; FUNC-LABEL: {{^}}atomic_or_i32_ret_addr64_offset:
768 ; GCN-LABEL: {{^}}atomic_or_i32_ret_addr64_offset:
646769 ; SI: buffer_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
647770 ; VI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
648 ; GCN: buffer_store_dword [[RET]]
771 ; SIVI: buffer_store_dword [[RET]]
772
773 ; GFX9: global_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
649774 define amdgpu_kernel void @atomic_or_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
650775 entry:
651776 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
655780 ret void
656781 }
657782
658 ; FUNC-LABEL: {{^}}atomic_or_i32:
659 ; GCN: buffer_atomic_or v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
783 ; GCN-LABEL: {{^}}atomic_or_i32:
784 ; SIVI: buffer_atomic_or v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
785
786 ; GFX9: global_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
660787 define amdgpu_kernel void @atomic_or_i32(i32 addrspace(1)* %out, i32 %in) {
661788 entry:
662789 %val = atomicrmw volatile or i32 addrspace(1)* %out, i32 %in seq_cst
663790 ret void
664791 }
665792
666 ; FUNC-LABEL: {{^}}atomic_or_i32_ret:
667 ; GCN: buffer_atomic_or [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
668 ; GCN: buffer_store_dword [[RET]]
793 ; GCN-LABEL: {{^}}atomic_or_i32_ret:
794 ; SIVI: buffer_atomic_or [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
795 ; SIVI: buffer_store_dword [[RET]]
796
797 ; GFX9: global_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
669798 define amdgpu_kernel void @atomic_or_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
670799 entry:
671800 %val = atomicrmw volatile or i32 addrspace(1)* %out, i32 %in seq_cst
673802 ret void
674803 }
675804
676 ; FUNC-LABEL: {{^}}atomic_or_i32_addr64:
805 ; GCN-LABEL: {{^}}atomic_or_i32_addr64:
677806 ; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
678807 ; VI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
808 ; GFX9: global_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
679809 define amdgpu_kernel void @atomic_or_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
680810 entry:
681811 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
683813 ret void
684814 }
685815
686 ; FUNC-LABEL: {{^}}atomic_or_i32_ret_addr64:
816 ; GCN-LABEL: {{^}}atomic_or_i32_ret_addr64:
687817 ; SI: buffer_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
688818 ; VI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
689 ; GCN: buffer_store_dword [[RET]]
819 ; SIVI: buffer_store_dword [[RET]]
820
821 ; GFX9: global_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
690822 define amdgpu_kernel void @atomic_or_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
691823 entry:
692824 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
695827 ret void
696828 }
697829
698 ; FUNC-LABEL: {{^}}atomic_xchg_i32_offset:
699 ; GCN: buffer_atomic_swap v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
830 ; GCN-LABEL: {{^}}atomic_xchg_i32_offset:
831 ; SIVI: buffer_atomic_swap v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
832
833 ; GFX9: global_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
700834 define amdgpu_kernel void @atomic_xchg_i32_offset(i32 addrspace(1)* %out, i32 %in) {
701835 entry:
702836 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
704838 ret void
705839 }
706840
707 ; FUNC-LABEL: {{^}}atomic_xchg_i32_ret_offset:
708 ; GCN: buffer_atomic_swap [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
709 ; GCN: buffer_store_dword [[RET]]
841 ; GCN-LABEL: {{^}}atomic_xchg_i32_ret_offset:
842 ; SIVI: buffer_atomic_swap [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
843 ; SIVI: buffer_store_dword [[RET]]
844
845 ; GFX9: global_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
710846 define amdgpu_kernel void @atomic_xchg_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
711847 entry:
712848 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
715851 ret void
716852 }
717853
718 ; FUNC-LABEL: {{^}}atomic_xchg_i32_addr64_offset:
854 ; GCN-LABEL: {{^}}atomic_xchg_i32_addr64_offset:
719855 ; SI: buffer_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
720
721 ; VI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}{{$}}
856 ; VI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
857 ; GFX9: global_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
722858 define amdgpu_kernel void @atomic_xchg_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
723859 entry:
724860 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
727863 ret void
728864 }
729865
730 ; FUNC-LABEL: {{^}}atomic_xchg_i32_ret_addr64_offset:
866 ; GCN-LABEL: {{^}}atomic_xchg_i32_ret_addr64_offset:
731867 ; SI: buffer_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
732
733868 ; VI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
734 ; GCN: buffer_store_dword [[RET]]
869 ; SIVI: buffer_store_dword [[RET]]
870
871 ; GFX9: global_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
735872 define amdgpu_kernel void @atomic_xchg_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
736873 entry:
737874 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
741878 ret void
742879 }
743880
744 ; FUNC-LABEL: {{^}}atomic_xchg_i32:
745 ; GCN: buffer_atomic_swap v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
881 ; GCN-LABEL: {{^}}atomic_xchg_i32:
882 ; SIVI: buffer_atomic_swap v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
883 ; GFX9: global_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
746884 define amdgpu_kernel void @atomic_xchg_i32(i32 addrspace(1)* %out, i32 %in) {
747885 entry:
748886 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst
749887 ret void
750888 }
751889
752 ; FUNC-LABEL: {{^}}atomic_xchg_i32_ret:
753 ; GCN: buffer_atomic_swap [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
754 ; GCN: buffer_store_dword [[RET]]
890 ; GCN-LABEL: {{^}}atomic_xchg_i32_ret:
891 ; SIVI: buffer_atomic_swap [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
892 ; SIVI: buffer_store_dword [[RET]]
893
894 ; GFX9: global_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
755895 define amdgpu_kernel void @atomic_xchg_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
756896 entry:
757897 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst
759899 ret void
760900 }
761901
762 ; FUNC-LABEL: {{^}}atomic_xchg_i32_addr64:
902 ; GCN-LABEL: {{^}}atomic_xchg_i32_addr64:
763903 ; SI: buffer_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
764904 ; VI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
905 ; GFX9: global_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
765906 define amdgpu_kernel void @atomic_xchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
766907 entry:
767908 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
769910 ret void
770911 }
771912
772 ; FUNC-LABEL: {{^}}atomic_xchg_i32_ret_addr64:
913 ; GCN-LABEL: {{^}}atomic_xchg_i32_ret_addr64:
773914 ; SI: buffer_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
774915 ; VI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
775 ; GCN: buffer_store_dword [[RET]]
916 ; SIVI: buffer_store_dword [[RET]]
917
918 ; GFX9: global_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
776919 define amdgpu_kernel void @atomic_xchg_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
777920 entry:
778921 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
781924 ret void
782925 }
783926
784 ; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_offset:
785 ; GCN: buffer_atomic_cmpswap v[{{[0-9]+}}:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
927 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_offset:
928 ; SIVI: buffer_atomic_cmpswap v[{{[0-9]+}}:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
929
930 ; GFX9: global_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:16{{$}}
786931 define amdgpu_kernel void @atomic_cmpxchg_i32_offset(i32 addrspace(1)* %out, i32 %in, i32 %old) {
787932 entry:
788933 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
790935 ret void
791936 }
792937
793 ; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_ret_offset:
794 ; GCN: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]{{:[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
795 ; GCN: buffer_store_dword v[[RET]]
938 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_offset:
939 ; SIVI: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]{{:[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
940 ; SIVI: buffer_store_dword v[[RET]]
941
942 ; GFX9: global_atomic_cmpswap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:16 glc{{$}}
796943 define amdgpu_kernel void @atomic_cmpxchg_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i32 %old) {
797944 entry:
798945 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
802949 ret void
803950 }
804951
805 ; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_addr64_offset:
952 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_addr64_offset:
806953 ; SI: buffer_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
807954
808955 ; VI: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
956 ; GFX9: global_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], off offset:16{{$}}
809957 define amdgpu_kernel void @atomic_cmpxchg_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index, i32 %old) {
810958 entry:
811959 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
814962 ret void
815963 }
816964
817 ; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64_offset:
965 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64_offset:
818966 ; SI: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
819967 ; VI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
820 ; GCN: buffer_store_dword v[[RET]]
968 ; SIVI: buffer_store_dword v[[RET]]
969
970 ; GFX9: global_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:16 glc{{$}}
821971 define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index, i32 %old) {
822972 entry:
823973 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
828978 ret void
829979 }
830980
831 ; FUNC-LABEL: {{^}}atomic_cmpxchg_i32:
832 ; GCN: buffer_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
981 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32:
982 ; SIVI: buffer_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
983
984 ; GFX9: global_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
833985 define amdgpu_kernel void @atomic_cmpxchg_i32(i32 addrspace(1)* %out, i32 %in, i32 %old) {
834986 entry:
835987 %val = cmpxchg volatile i32 addrspace(1)* %out, i32 %old, i32 %in seq_cst seq_cst
836988 ret void
837989 }
838990
839 ; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_ret:
840 ; GCN: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
841 ; GCN: buffer_store_dword v[[RET]]
991 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret:
992 ; SIVI: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
993 ; SIVI: buffer_store_dword v[[RET]]
994
995 ; GFX9: global_atomic_cmpswap [[RET:v[0-9]+]], v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], off glc{{$}}
842996 define amdgpu_kernel void @atomic_cmpxchg_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i32 %old) {
843997 entry:
844998 %val = cmpxchg volatile i32 addrspace(1)* %out, i32 %old, i32 %in seq_cst seq_cst
8471001 ret void
8481002 }
8491003
850 ; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_addr64:
1004 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_addr64:
8511005 ; SI: buffer_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
8521006 ; VI: flat_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]{{$}}
1007 ; GFX9: global_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
8531008 define amdgpu_kernel void @atomic_cmpxchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index, i32 %old) {
8541009 entry:
8551010 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
8571012 ret void
8581013 }
8591014
860 ; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64:
1015 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64:
8611016 ; SI: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
8621017 ; VI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
863 ; GCN: buffer_store_dword v[[RET]]
1018 ; SIVI: buffer_store_dword v[[RET]]
1019
1020 ; GFX9: global_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off glc{{$}}
8641021 define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index, i32 %old) {
8651022 entry:
8661023 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
8701027 ret void
8711028 }
8721029
873 ; FUNC-LABEL: {{^}}atomic_xor_i32_offset:
874 ; GCN: buffer_atomic_xor v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
1030 ; GCN-LABEL: {{^}}atomic_xor_i32_offset:
1031 ; SIVI: buffer_atomic_xor v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
1032
1033 ; GFX9: global_atomic_xor v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
8751034 define amdgpu_kernel void @atomic_xor_i32_offset(i32 addrspace(1)* %out, i32 %in) {
8761035 entry:
8771036 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
8791038 ret void
8801039 }
8811040
882 ; FUNC-LABEL: {{^}}atomic_xor_i32_ret_offset:
883 ; GCN: buffer_atomic_xor [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
884 ; GCN: buffer_store_dword [[RET]]
1041 ; GCN-LABEL: {{^}}atomic_xor_i32_ret_offset:
1042 ; SIVI: buffer_atomic_xor [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
1043 ; SIVI: buffer_store_dword [[RET]]
1044
1045 ; GFX9: global_atomic_xor v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
8851046 define amdgpu_kernel void @atomic_xor_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
8861047 entry:
8871048 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
8901051 ret void
8911052 }
8921053
893 ; FUNC-LABEL: {{^}}atomic_xor_i32_addr64_offset:
1054 ; GCN-LABEL: {{^}}atomic_xor_i32_addr64_offset:
8941055 ; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
8951056 ; VI: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
1057 ; GFX9: global_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
8961058 define amdgpu_kernel void @atomic_xor_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
8971059 entry:
8981060 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
9011063 ret void
9021064 }
9031065
904 ; FUNC-LABEL: {{^}}atomic_xor_i32_ret_addr64_offset:
1066 ; GCN-LABEL: {{^}}atomic_xor_i32_ret_addr64_offset:
9051067 ; SI: buffer_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
9061068 ; VI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
907 ; GCN: buffer_store_dword [[RET]]
1069 ; SIVI: buffer_store_dword [[RET]]
1070
1071 ; GFX9: global_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
9081072 define amdgpu_kernel void @atomic_xor_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
9091073 entry:
9101074 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
9141078 ret void
9151079 }
9161080
917 ; FUNC-LABEL: {{^}}atomic_xor_i32:
918 ; GCN: buffer_atomic_xor v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
1081 ; GCN-LABEL: {{^}}atomic_xor_i32:
1082 ; SIVI: buffer_atomic_xor v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
1083 ; GFX9: global_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
9191084 define amdgpu_kernel void @atomic_xor_i32(i32 addrspace(1)* %out, i32 %in) {
9201085 entry:
9211086 %val = atomicrmw volatile xor i32 addrspace(1)* %out, i32 %in seq_cst
9221087 ret void
9231088 }
9241089
925 ; FUNC-LABEL: {{^}}atomic_xor_i32_ret:
926 ; GCN: buffer_atomic_xor [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
927 ; GCN: buffer_store_dword [[RET]]
1090 ; GCN-LABEL: {{^}}atomic_xor_i32_ret:
1091 ; SIVI: buffer_atomic_xor [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
1092 ; SIVI: buffer_store_dword [[RET]]
1093
1094 ; GFX9: global_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
9281095 define amdgpu_kernel void @atomic_xor_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
9291096 entry:
9301097 %val = atomicrmw volatile xor i32 addrspace(1)* %out, i32 %in seq_cst
9321099 ret void
9331100 }
9341101
935 ; FUNC-LABEL: {{^}}atomic_xor_i32_addr64:
1102 ; GCN-LABEL: {{^}}atomic_xor_i32_addr64:
9361103 ; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
9371104 ; VI: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
1105 ; GFX9: global_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
9381106 define amdgpu_kernel void @atomic_xor_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
9391107 entry:
9401108 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
9421110 ret void
9431111 }
9441112
945 ; FUNC-LABEL: {{^}}atomic_xor_i32_ret_addr64:
1113 ; GCN-LABEL: {{^}}atomic_xor_i32_ret_addr64:
9461114 ; SI: buffer_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
9471115 ; VI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
948 ; GCN: buffer_store_dword [[RET]]
1116 ; SIVI: buffer_store_dword [[RET]]
1117
1118 ; GFX9: global_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
9491119 define amdgpu_kernel void @atomic_xor_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
9501120 entry:
9511121 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
9541124 ret void
9551125 }
9561126
957 ; FUNC-LABEL: {{^}}atomic_load_i32_offset:
1127 ; GCN-LABEL: {{^}}atomic_load_i32_offset:
9581128 ; SI: buffer_load_dword [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
9591129 ; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
960 ; GCN: buffer_store_dword [[RET]]
1130 ; SIVI: buffer_store_dword [[RET]]
1131
1132 ; GFX9: global_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], off offset:16 glc{{$}}
9611133 define amdgpu_kernel void @atomic_load_i32_offset(i32 addrspace(1)* %in, i32 addrspace(1)* %out) {
9621134 entry:
9631135 %gep = getelementptr i32, i32 addrspace(1)* %in, i64 4
9661138 ret void
9671139 }
9681140
969 ; FUNC-LABEL: {{^}}atomic_load_i32:
1141 ; GCN-LABEL: {{^}}atomic_load_i32:
9701142 ; SI: buffer_load_dword [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
9711143 ; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc
972 ; GCN: buffer_store_dword [[RET]]
1144 ; SIVI: buffer_store_dword [[RET]]
1145
1146 ; GFX9: global_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], off glc
9731147 define amdgpu_kernel void @atomic_load_i32(i32 addrspace(1)* %in, i32 addrspace(1)* %out) {
9741148 entry:
9751149 %val = load atomic i32, i32 addrspace(1)* %in seq_cst, align 4
9771151 ret void
9781152 }
9791153
980 ; FUNC-LABEL: {{^}}atomic_load_i32_addr64_offset:
1154 ; GCN-LABEL: {{^}}atomic_load_i32_addr64_offset:
9811155 ; SI: buffer_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
9821156 ; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
983 ; GCN: buffer_store_dword [[RET]]
1157 ; SIVI: buffer_store_dword [[RET]]
1158
1159 ; GFX9: global_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], off offset:16 glc{{$}}
9841160 define amdgpu_kernel void @atomic_load_i32_addr64_offset(i32 addrspace(1)* %in, i32 addrspace(1)* %out, i64 %index) {
9851161 entry:
9861162 %ptr = getelementptr i32, i32 addrspace(1)* %in, i64 %index
9901166 ret void
9911167 }
9921168
993 ; FUNC-LABEL: {{^}}atomic_load_i32_addr64:
1169 ; GCN-LABEL: {{^}}atomic_load_i32_addr64:
9941170 ; SI: buffer_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
9951171 ; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
996 ; GCN: buffer_store_dword [[RET]]
1172 ; SIVI: buffer_store_dword [[RET]]
1173
1174 ; GFX9: global_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], off glc{{$}}
9971175 define amdgpu_kernel void @atomic_load_i32_addr64(i32 addrspace(1)* %in, i32 addrspace(1)* %out, i64 %index) {
9981176 entry:
9991177 %ptr = getelementptr i32, i32 addrspace(1)* %in, i64 %index
10021180 ret void
10031181 }
10041182
1005 ; FUNC-LABEL: {{^}}atomic_store_i32_offset:
1183 ; GCN-LABEL: {{^}}atomic_store_i32_offset:
10061184 ; SI: buffer_store_dword {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
1007 ; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
1185 ; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
1186 ; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}, off offset:16{{$}}
10081187 define amdgpu_kernel void @atomic_store_i32_offset(i32 %in, i32 addrspace(1)* %out) {
10091188 entry:
10101189 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
10121191 ret void
10131192 }
10141193
1015 ; FUNC-LABEL: {{^}}atomic_store_i32:
1194 ; GCN-LABEL: {{^}}atomic_store_i32:
10161195 ; SI: buffer_store_dword {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
1017 ; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
1196 ; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
1197 ; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}, off{{$}}
10181198 define amdgpu_kernel void @atomic_store_i32(i32 %in, i32 addrspace(1)* %out) {
10191199 entry:
10201200 store atomic i32 %in, i32 addrspace(1)* %out seq_cst, align 4
10211201 ret void
10221202 }
10231203
1024 ; FUNC-LABEL: {{^}}atomic_store_i32_addr64_offset:
1204 ; GCN-LABEL: {{^}}atomic_store_i32_addr64_offset:
10251205 ; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
1026 ; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
1206 ; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
1207 ; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}, off offset:16{{$}}
10271208 define amdgpu_kernel void @atomic_store_i32_addr64_offset(i32 %in, i32 addrspace(1)* %out, i64 %index) {
10281209 entry:
10291210 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
10321213 ret void
10331214 }
10341215
1035 ; FUNC-LABEL: {{^}}atomic_store_i32_addr64:
1216 ; GCN-LABEL: {{^}}atomic_store_i32_addr64:
10361217 ; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
1037 ; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
1218 ; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
1219 ; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}, off{{$}}
10381220 define amdgpu_kernel void @atomic_store_i32_addr64(i32 %in, i32 addrspace(1)* %out, i64 %index) {
10391221 entry:
10401222 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
None ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
1 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
0 ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,CIVI %s
1 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,CIVI %s
2 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
23
34 ; GCN-LABEL: {{^}}atomic_add_i64_offset:
4 ; GCN: buffer_atomic_add_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
5 ; CIVI: buffer_atomic_add_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
6
7 ; GFX9: global_atomic_add_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], off offset:32{{$}}
58 define amdgpu_kernel void @atomic_add_i64_offset(i64 addrspace(1)* %out, i64 %in) {
69 entry:
710 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
1013 }
1114
1215 ; GCN-LABEL: {{^}}atomic_add_i64_ret_offset:
13 ; GCN: buffer_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
14 ; GCN: buffer_store_dwordx2 [[RET]]
16 ; CIVI: buffer_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
17 ; CIVI: buffer_store_dwordx2 [[RET]]
18
19 ; GFX9: global_atomic_add_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], off offset:32 glc{{$}}
1520 define amdgpu_kernel void @atomic_add_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
1621 entry:
1722 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
2328 ; GCN-LABEL: {{^}}atomic_add_i64_addr64_offset:
2429 ; CI: buffer_atomic_add_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}
2530 ; VI: flat_atomic_add_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}{{$}}
31 ; GFX9: global_atomic_add_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32{{$}}
2632 define amdgpu_kernel void @atomic_add_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) {
2733 entry:
2834 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
3440 ; GCN-LABEL: {{^}}atomic_add_i64_ret_addr64_offset:
3541 ; CI: buffer_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}
3642 ; VI: flat_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
37 ; GCN: buffer_store_dwordx2 [[RET]]
43 ; CIVI: buffer_store_dwordx2 [[RET]]
44
45 ; GFX9: global_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32 glc{{$}}
3846 define amdgpu_kernel void @atomic_add_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
3947 entry:
4048 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
4553 }
4654
4755 ; GCN-LABEL: {{^}}atomic_add_i64:
48 ; GCN: buffer_atomic_add_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
56 ; SIVI: buffer_atomic_add_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
57 ; GFX9: global_atomic_add_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off{{$}}
4958 define amdgpu_kernel void @atomic_add_i64(i64 addrspace(1)* %out, i64 %in) {
5059 entry:
5160 %tmp0 = atomicrmw volatile add i64 addrspace(1)* %out, i64 %in seq_cst
5362 }
5463
5564 ; GCN-LABEL: {{^}}atomic_add_i64_ret:
56 ; GCN: buffer_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
57 ; GCN: buffer_store_dwordx2 [[RET]]
65 ; CIVI: buffer_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
66 ; CIVI: buffer_store_dwordx2 [[RET]]
67
68 ; GFX9: global_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off glc{{$}}
5869 define amdgpu_kernel void @atomic_add_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
5970 entry:
6071 %tmp0 = atomicrmw volatile add i64 addrspace(1)* %out, i64 %in seq_cst
6576 ; GCN-LABEL: {{^}}atomic_add_i64_addr64:
6677 ; CI: buffer_atomic_add_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
6778 ; VI: flat_atomic_add_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
79 ; GFX9: global_atomic_add_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off{{$}}
6880 define amdgpu_kernel void @atomic_add_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) {
6981 entry:
7082 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
7587 ; GCN-LABEL: {{^}}atomic_add_i64_ret_addr64:
7688 ; CI: buffer_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
7789 ; VI: flat_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
78 ; GCN: buffer_store_dwordx2 [[RET]]
90 ; CIVI: buffer_store_dwordx2 [[RET]]
91
92 ; GFX9: global_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off glc{{$}}
7993 define amdgpu_kernel void @atomic_add_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
8094 entry:
8195 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
8599 }
86100
87101 ; GCN-LABEL: {{^}}atomic_and_i64_offset:
88 ; GCN: buffer_atomic_and_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
102 ; CIVI: buffer_atomic_and_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
103 ; GFX9: global_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32{{$}}
89104 define amdgpu_kernel void @atomic_and_i64_offset(i64 addrspace(1)* %out, i64 %in) {
90105 entry:
91106 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
94109 }
95110
96111 ; GCN-LABEL: {{^}}atomic_and_i64_ret_offset:
97 ; GCN: buffer_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
98 ; GCN: buffer_store_dwordx2 [[RET]]
112 ; CIVI: buffer_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
113 ; CIVI: buffer_store_dwordx2 [[RET]]
114
115 ; GFX9: global_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32 glc{{$}}
99116 define amdgpu_kernel void @atomic_and_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
100117 entry:
101118 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
107124 ; GCN-LABEL: {{^}}atomic_and_i64_addr64_offset:
108125 ; CI: buffer_atomic_and_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}
109126 ; VI: flat_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
127 ; GFX9: global_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32{{$}}
110128 define amdgpu_kernel void @atomic_and_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) {
111129 entry:
112130 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
118136 ; GCN-LABEL: {{^}}atomic_and_i64_ret_addr64_offset:
119137 ; CI: buffer_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}
120138 ; VI: flat_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
121 ; GCN: buffer_store_dwordx2 [[RET]]
139 ; CIVI: buffer_store_dwordx2 [[RET]]
140
141 ; GFX9: global_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32 glc{{$}}
122142 define amdgpu_kernel void @atomic_and_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
123143 entry:
124144 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
129149 }
130150
131151 ; GCN-LABEL: {{^}}atomic_and_i64:
132 ; GCN: buffer_atomic_and_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
152 ; CIVI: buffer_atomic_and_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
153 ; GFX9: global_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off{{$}}
133154 define amdgpu_kernel void @atomic_and_i64(i64 addrspace(1)* %out, i64 %in) {
134155 entry:
135156 %tmp0 = atomicrmw volatile and i64 addrspace(1)* %out, i64 %in seq_cst
137158 }
138159
139160 ; GCN-LABEL: {{^}}atomic_and_i64_ret:
140 ; GCN: buffer_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
141 ; GCN: buffer_store_dwordx2 [[RET]]
161 ; CIVI: buffer_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
162 ; CIVI: buffer_store_dwordx2 [[RET]]
163
164 ; GFX9: global_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off glc{{$}}
142165 define amdgpu_kernel void @atomic_and_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
143166 entry:
144167 %tmp0 = atomicrmw volatile and i64 addrspace(1)* %out, i64 %in seq_cst
149172 ; GCN-LABEL: {{^}}atomic_and_i64_addr64:
150173 ; CI: buffer_atomic_and_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
151174 ; VI: flat_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
175 ; GFX9: global_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off{{$}}
152176 define amdgpu_kernel void @atomic_and_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) {
153177 entry:
154178 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
159183 ; GCN-LABEL: {{^}}atomic_and_i64_ret_addr64:
160184 ; CI: buffer_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
161185 ; VI: flat_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
162 ; GCN: buffer_store_dwordx2 [[RET]]
186 ; CIVI: buffer_store_dwordx2 [[RET]]
187
188 ; GFX9: global_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off glc{{$}}
163189 define amdgpu_kernel void @atomic_and_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
164190 entry:
165191 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
169195 }
170196
171197 ; GCN-LABEL: {{^}}atomic_sub_i64_offset:
172 ; GCN: buffer_atomic_sub_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
198 ; CIVI: buffer_atomic_sub_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
199 ; GFX9: global_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32{{$}}
173200 define amdgpu_kernel void @atomic_sub_i64_offset(i64 addrspace(1)* %out, i64 %in) {
174201 entry:
175202 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
178205 }
179206
180207 ; GCN-LABEL: {{^}}atomic_sub_i64_ret_offset:
181 ; GCN: buffer_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
182 ; GCN: buffer_store_dwordx2 [[RET]]
208 ; CIVI: buffer_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
209 ; CIVI: buffer_store_dwordx2 [[RET]]
210
211 ; GFX9: global_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32 glc{{$}}
183212 define amdgpu_kernel void @atomic_sub_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
184213 entry:
185214 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
191220 ; GCN-LABEL: {{^}}atomic_sub_i64_addr64_offset:
192221 ; CI: buffer_atomic_sub_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}
193222 ; VI: flat_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
223 ; GFX9: global_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32{{$}}
194224 define amdgpu_kernel void @atomic_sub_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) {
195225 entry:
196226 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
202232 ; GCN-LABEL: {{^}}atomic_sub_i64_ret_addr64_offset:
203233 ; CI: buffer_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}
204234 ; VI: flat_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
205 ; GCN: buffer_store_dwordx2 [[RET]]
235 ; CIVI: buffer_store_dwordx2 [[RET]]
236
237 ; GFX9: global_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32 glc{{$}}
206238 define amdgpu_kernel void @atomic_sub_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
207239 entry:
208240 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
213245 }
214246
215247 ; GCN-LABEL: {{^}}atomic_sub_i64:
216 ; GCN: buffer_atomic_sub_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
248 ; CIVI: buffer_atomic_sub_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
249 ; GFX9: global_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off{{$}}
217250 define amdgpu_kernel void @atomic_sub_i64(i64 addrspace(1)* %out, i64 %in) {
218251 entry:
219252 %tmp0 = atomicrmw volatile sub i64 addrspace(1)* %out, i64 %in seq_cst
221254 }
222255
223256 ; GCN-LABEL: {{^}}atomic_sub_i64_ret:
224 ; GCN: buffer_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
225 ; GCN: buffer_store_dwordx2 [[RET]]
257 ; CIVI: buffer_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
258 ; CIVI: buffer_store_dwordx2 [[RET]]
259
260 ; GFX9: global_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off glc{{$}}
226261 define amdgpu_kernel void @atomic_sub_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
227262 entry:
228263 %tmp0 = atomicrmw volatile sub i64 addrspace(1)* %out, i64 %in seq_cst
233268 ; GCN-LABEL: {{^}}atomic_sub_i64_addr64:
234269 ; CI: buffer_atomic_sub_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
235270 ; VI: flat_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
271 ; GFX9: global_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off{{$}}
236272 define amdgpu_kernel void @atomic_sub_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) {
237273 entry:
238274 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
243279 ; GCN-LABEL: {{^}}atomic_sub_i64_ret_addr64:
244280 ; CI: buffer_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
245281 ; VI: flat_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
246 ; GCN: buffer_store_dwordx2 [[RET]]
282 ; CIVI: buffer_store_dwordx2 [[RET]]
283
284 ; GFX9: global_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off glc{{$}}
247285 define amdgpu_kernel void @atomic_sub_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
248286 entry:
249287 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
253291 }
254292
255293 ; GCN-LABEL: {{^}}atomic_max_i64_offset:
256 ; GCN: buffer_atomic_smax_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
294 ; CIVI: buffer_atomic_smax_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
295 ; GFX9: global_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32{{$}}
257296 define amdgpu_kernel void @atomic_max_i64_offset(i64 addrspace(1)* %out, i64 %in) {
258297 entry:
259298 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
262301 }
263302
264303 ; GCN-LABEL: {{^}}atomic_max_i64_ret_offset:
265 ; GCN: buffer_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
266 ; GCN: buffer_store_dwordx2 [[RET]]
304 ; CIVI: buffer_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
305 ; CIVI: buffer_store_dwordx2 [[RET]]
306
307 ; GFX9: global_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32 glc{{$}}
267308 define amdgpu_kernel void @atomic_max_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
268309 entry:
269310 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
275316 ; GCN-LABEL: {{^}}atomic_max_i64_addr64_offset:
276317 ; CI: buffer_atomic_smax_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}
277318 ; VI: flat_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
319 ; GFX9: global_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32{{$}}
278320 define amdgpu_kernel void @atomic_max_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) {
279321 entry:
280322 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
286328 ; GCN-LABEL: {{^}}atomic_max_i64_ret_addr64_offset:
287329 ; CI: buffer_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}
288330 ; VI: flat_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
289 ; GCN: buffer_store_dwordx2 [[RET]]
331 ; CIVI: buffer_store_dwordx2 [[RET]]
332
333 ; GFX9: global_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32 glc{{$}}
290334 define amdgpu_kernel void @atomic_max_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
291335 entry:
292336 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
297341 }
298342
299343 ; GCN-LABEL: {{^}}atomic_max_i64:
300 ; GCN: buffer_atomic_smax_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
344 ; CIVI: buffer_atomic_smax_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
345 ; GFX9: global_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off{{$}}
301346 define amdgpu_kernel void @atomic_max_i64(i64 addrspace(1)* %out, i64 %in) {
302347 entry:
303348 %tmp0 = atomicrmw volatile max i64 addrspace(1)* %out, i64 %in seq_cst
305350 }
306351
307352 ; GCN-LABEL: {{^}}atomic_max_i64_ret:
308 ; GCN: buffer_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
309 ; GCN: buffer_store_dwordx2 [[RET]]
353 ; CIVI: buffer_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
354 ; CIVI: buffer_store_dwordx2 [[RET]]
355
356 ; GFX9: global_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off glc{{$}}
310357 define amdgpu_kernel void @atomic_max_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
311358 entry:
312359 %tmp0 = atomicrmw volatile max i64 addrspace(1)* %out, i64 %in seq_cst
317364 ; GCN-LABEL: {{^}}atomic_max_i64_addr64:
318365 ; CI: buffer_atomic_smax_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
319366 ; VI: flat_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
367 ; GFX9: global_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off{{$}}
320368 define amdgpu_kernel void @atomic_max_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) {
321369 entry:
322370 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
327375 ; GCN-LABEL: {{^}}atomic_max_i64_ret_addr64:
328376 ; CI: buffer_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
329377 ; VI: flat_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
330 ; GCN: buffer_store_dwordx2 [[RET]]
378 ; CIVI: buffer_store_dwordx2 [[RET]]
379
380 ; GFX9: global_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off glc{{$}}
331381 define amdgpu_kernel void @atomic_max_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
332382 entry:
333383 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
337387 }
338388
339389 ; GCN-LABEL: {{^}}atomic_umax_i64_offset:
340 ; GCN: buffer_atomic_umax_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
390 ; CIVI: buffer_atomic_umax_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
391 ; GFX9: global_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32{{$}}
341392 define amdgpu_kernel void @atomic_umax_i64_offset(i64 addrspace(1)* %out, i64 %in) {
342393 entry:
343394 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
346397 }
347398
348399 ; GCN-LABEL: {{^}}atomic_umax_i64_ret_offset:
349 ; GCN: buffer_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
350 ; GCN: buffer_store_dwordx2 [[RET]]
400 ; CIVI: buffer_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
401 ; CIVI: buffer_store_dwordx2 [[RET]]
402
403 ; GFX9: global_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32 glc{{$}}
351404 define amdgpu_kernel void @atomic_umax_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
352405 entry:
353406 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
359412 ; GCN-LABEL: {{^}}atomic_umax_i64_addr64_offset:
360413 ; CI: buffer_atomic_umax_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}
361414 ; VI: flat_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
415 ; FX9: global_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
362416 define amdgpu_kernel void @atomic_umax_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) {
363417 entry:
364418 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
370424 ; GCN-LABEL: {{^}}atomic_umax_i64_ret_addr64_offset:
371425 ; CI: buffer_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}
372426 ; VI: flat_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
373 ; GCN: buffer_store_dwordx2 [[RET]]
427 ; CIVI: buffer_store_dwordx2 [[RET]]
428
429 ; GFX9: global_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32 glc{{$}}
374430 define amdgpu_kernel void @atomic_umax_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
375431 entry:
376432 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
381437 }
382438
383439 ; GCN-LABEL: {{^}}atomic_umax_i64:
384 ; GCN: buffer_atomic_umax_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
440 ; CIVI: buffer_atomic_umax_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
441 ; GFX9: global_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off{{$}}
385442 define amdgpu_kernel void @atomic_umax_i64(i64 addrspace(1)* %out, i64 %in) {
386443 entry:
387444 %tmp0 = atomicrmw volatile umax i64 addrspace(1)* %out, i64 %in seq_cst
389446 }
390447
391448 ; GCN-LABEL: {{^}}atomic_umax_i64_ret:
392 ; GCN: buffer_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
393 ; GCN: buffer_store_dwordx2 [[RET]]
449 ; CIVI: buffer_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
450 ; CIVI: buffer_store_dwordx2 [[RET]]
451
452 ; GFX9: global_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off glc{{$}}
394453 define amdgpu_kernel void @atomic_umax_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
395454 entry:
396455 %tmp0 = atomicrmw volatile umax i64 addrspace(1)* %out, i64 %in seq_cst
401460 ; GCN-LABEL: {{^}}atomic_umax_i64_addr64:
402461 ; CI: buffer_atomic_umax_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
403462 ; VI: flat_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
463 ; GFX9: global_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off{{$}}
404464 define amdgpu_kernel void @atomic_umax_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) {
405465 entry:
406466 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
411471 ; GCN-LABEL: {{^}}atomic_umax_i64_ret_addr64:
412472 ; CI: buffer_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
413473 ; VI: flat_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
414 ; GCN: buffer_store_dwordx2 [[RET]]
474 ; CIVI: buffer_store_dwordx2 [[RET]]
475
476 ; GFX9: global_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off glc{{$}}
415477 define amdgpu_kernel void @atomic_umax_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
416478 entry:
417479 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
421483 }
422484
423485 ; GCN-LABEL: {{^}}atomic_min_i64_offset:
424 ; GCN: buffer_atomic_smin_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
486 ; CIVI: buffer_atomic_smin_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
487 ; GFX9: global_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32{{$}}
425488 define amdgpu_kernel void @atomic_min_i64_offset(i64 addrspace(1)* %out, i64 %in) {
426489 entry:
427490 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
430493 }
431494
432495 ; GCN-LABEL: {{^}}atomic_min_i64_ret_offset:
433 ; GCN: buffer_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
434 ; GCN: buffer_store_dwordx2 [[RET]]
496 ; CIVI: buffer_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
497 ; CIVI: buffer_store_dwordx2 [[RET]]
498
499 ; GFX9: global_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32 glc{{$}}
435500 define amdgpu_kernel void @atomic_min_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
436501 entry:
437502 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
443508 ; GCN-LABEL: {{^}}atomic_min_i64_addr64_offset:
444509 ; CI: buffer_atomic_smin_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}
445510 ; VI: flat_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
511 ; GFX9: global_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32{{$}}
446512 define amdgpu_kernel void @atomic_min_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) {
447513 entry:
448514 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
454520 ; GCN-LABEL: {{^}}atomic_min_i64_ret_addr64_offset:
455521 ; CI: buffer_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}
456522 ; VI: flat_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
457 ; GCN: buffer_store_dwordx2 [[RET]]
523 ; CIVI: buffer_store_dwordx2 [[RET]]
524
525 ; GFX9: global_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32 glc{{$}}
458526 define amdgpu_kernel void @atomic_min_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
459527 entry:
460528 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
465533 }
466534
467535 ; GCN-LABEL: {{^}}atomic_min_i64:
468 ; GCN: buffer_atomic_smin_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
536 ; CIVI: buffer_atomic_smin_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
537 ; GFX9: global_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off{{$}}
469538 define amdgpu_kernel void @atomic_min_i64(i64 addrspace(1)* %out, i64 %in) {
470539 entry:
471540 %tmp0 = atomicrmw volatile min i64 addrspace(1)* %out, i64 %in seq_cst
473542 }
474543
475544 ; GCN-LABEL: {{^}}atomic_min_i64_ret:
476 ; GCN: buffer_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
477 ; GCN: buffer_store_dwordx2 [[RET]]
545 ; CIVI: buffer_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
546 ; CIVI: buffer_store_dwordx2 [[RET]]
547
548 ; GFX9: global_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off glc{{$}}
478549 define amdgpu_kernel void @atomic_min_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
479550 entry:
480551 %tmp0 = atomicrmw volatile min i64 addrspace(1)* %out, i64 %in seq_cst
485556 ; GCN-LABEL: {{^}}atomic_min_i64_addr64:
486557 ; CI: buffer_atomic_smin_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
487558 ; VI: flat_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
559 ; GFX9: global_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off{{$}}
488560 define amdgpu_kernel void @atomic_min_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) {
489561 entry:
490562 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
495567 ; GCN-LABEL: {{^}}atomic_min_i64_ret_addr64:
496568 ; CI: buffer_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
497569 ; VI: flat_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
498 ; GCN: buffer_store_dwordx2 [[RET]]
570 ; CIVI: buffer_store_dwordx2 [[RET]]
571
572 ; GFX9: global_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off glc{{$}}
499573 define amdgpu_kernel void @atomic_min_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
500574 entry:
501575 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
505579 }
506580
507581 ; GCN-LABEL: {{^}}atomic_umin_i64_offset:
508 ; GCN: buffer_atomic_umin_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
582 ; CIVI: buffer_atomic_umin_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
583
584 ; GFX9: global_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32{{$}}
509585 define amdgpu_kernel void @atomic_umin_i64_offset(i64 addrspace(1)* %out, i64 %in) {
510586 entry:
511587 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
514590 }
515591
516592 ; GCN-LABEL: {{^}}atomic_umin_i64_ret_offset:
517 ; GCN: buffer_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
518 ; GCN: buffer_store_dwordx2 [[RET]]
593 ; CIVI: buffer_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
594 ; CIVI: buffer_store_dwordx2 [[RET]]
595
596 ; GFX9: global_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32 glc{{$}}
519597 define amdgpu_kernel void @atomic_umin_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
520598 entry:
521599 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
527605 ; GCN-LABEL: {{^}}atomic_umin_i64_addr64_offset:
528606 ; CI: buffer_atomic_umin_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}
529607 ; VI: flat_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
608 ; GFX9: global_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32{{$}}
530609 define amdgpu_kernel void @atomic_umin_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) {
531610 entry:
532611 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
538617 ; GCN-LABEL: {{^}}atomic_umin_i64_ret_addr64_offset:
539618 ; CI: buffer_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}
540619 ; VI: flat_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
541 ; GCN: buffer_store_dwordx2 [[RET]]
620 ; CIVI: buffer_store_dwordx2 [[RET]]
621
622 ; GFX9: global_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32 glc{{$}}
542623 define amdgpu_kernel void @atomic_umin_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
543624 entry:
544625 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
549630 }
550631
551632 ; GCN-LABEL: {{^}}atomic_umin_i64:
552 ; GCN: buffer_atomic_umin_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
633 ; CIVI: buffer_atomic_umin_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
634 ; GFX9: global_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off{{$}}
553635 define amdgpu_kernel void @atomic_umin_i64(i64 addrspace(1)* %out, i64 %in) {
554636 entry:
555637 %tmp0 = atomicrmw volatile umin i64 addrspace(1)* %out, i64 %in seq_cst
557639 }
558640
559641 ; GCN-LABEL: {{^}}atomic_umin_i64_ret:
560 ; CI: buffer_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
561 ; GCN: buffer_store_dwordx2 [[RET]]
642 ; CIVI: buffer_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
643 ; CIVI: buffer_store_dwordx2 [[RET]]
644
645 ; GFX9: global_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off glc{{$}}
562646 define amdgpu_kernel void @atomic_umin_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
563647 entry:
564648 %tmp0 = atomicrmw volatile umin i64 addrspace(1)* %out, i64 %in seq_cst
569653 ; GCN-LABEL: {{^}}atomic_umin_i64_addr64:
570654 ; CI: buffer_atomic_umin_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
571655 ; VI: flat_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
656 ; GFX9: global_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off{{$}}
572657 define amdgpu_kernel void @atomic_umin_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) {
573658 entry:
574659 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
579664 ; GCN-LABEL: {{^}}atomic_umin_i64_ret_addr64:
580665 ; CI: buffer_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
581666 ; VI: flat_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
582 ; GCN: buffer_store_dwordx2 [[RET]]
667 ; CIVI: buffer_store_dwordx2 [[RET]]
668
669 ; GFX9: global_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off glc{{$}}
583670 define amdgpu_kernel void @atomic_umin_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
584671 entry:
585672 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
589676 }
590677
591678 ; GCN-LABEL: {{^}}atomic_or_i64_offset:
592 ; GCN: buffer_atomic_or_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
679 ; CIVI: buffer_atomic_or_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
680 ; GFX9: global_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32{{$}}
593681 define amdgpu_kernel void @atomic_or_i64_offset(i64 addrspace(1)* %out, i64 %in) {
594682 entry:
595683 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
598686 }
599687
600688 ; GCN-LABEL: {{^}}atomic_or_i64_ret_offset:
601 ; GCN: buffer_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
602 ; GCN: buffer_store_dwordx2 [[RET]]
689 ; CIVI: buffer_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
690 ; CIVI: buffer_store_dwordx2 [[RET]]
691
692 ; GFX9: global_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32 glc{{$}}
603693 define amdgpu_kernel void @atomic_or_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
604694 entry:
605695 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
611701 ; GCN-LABEL: {{^}}atomic_or_i64_addr64_offset:
612702 ; CI: buffer_atomic_or_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}
613703 ; VI: flat_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
704 ; GFX9: global_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32{{$}}
614705 define amdgpu_kernel void @atomic_or_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) {
615706 entry:
616707 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
622713 ; GCN-LABEL: {{^}}atomic_or_i64_ret_addr64_offset:
623714 ; CI: buffer_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{