llvm.org GIT mirror llvm / bc0aee5
AMDGPU: Add atomic_inc + atomic_dec intrinsics These are different than atomicrmw add 1 because they have an additional input value to clamp the result. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@266074 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 3 years ago
12 changed file(s) with 615 addition(s) and 13 deletion(s). Raw diff Collapse all Expand all
148148 def int_amdgcn_cubetc : GCCBuiltin<"__builtin_amdgcn_cubetc">,
149149 Intrinsic<[llvm_float_ty],
150150 [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]
151 >;
152
153 // TODO: Do we want an ordering for these?
154 def int_amdgcn_atomic_inc : Intrinsic<[llvm_anyint_ty],
155 [llvm_anyptr_ty, LLVMMatchType<0>],
156 [IntrReadWriteArgMem, NoCapture<0>]
157 >;
158
159 def int_amdgcn_atomic_dec : Intrinsic<[llvm_anyint_ty],
160 [llvm_anyptr_ty, LLVMMatchType<0>],
161 [IntrReadWriteArgMem, NoCapture<0>]
151162 >;
152163
153164 class AMDGPUImageLoad : Intrinsic <
336336 return nullptr; // Already selected.
337337 }
338338
339 if (isa(N))
339 if (isa(N) ||
340 (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC))
340341 N = glueCopyToM0(N);
341342
342343 switch (Opc) {
28122812 NODE_NAME_CASE(STORE_MSKOR)
28132813 NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
28142814 NODE_NAME_CASE(ATOMIC_CMP_SWAP)
2815 NODE_NAME_CASE(ATOMIC_INC)
2816 NODE_NAME_CASE(ATOMIC_DEC)
28152817 case AMDGPUISD::LAST_AMDGPU_ISD_NUMBER: break;
28162818 }
28172819 return nullptr;
313313 LOAD_CONSTANT,
314314 TBUFFER_STORE_FORMAT,
315315 ATOMIC_CMP_SWAP,
316 ATOMIC_INC,
317 ATOMIC_DEC,
316318 LAST_AMDGPU_ISD_NUMBER
317319 };
318320
332332 >;
333333
334334 def : FlatAtomicPat ;
335 def : FlatAtomicPat ;
336 def : FlatAtomicPat ;
337 def : FlatAtomicPat ;
335338 def : FlatAtomicPat ;
336 def : FlatAtomicPat ;
337339 def : FlatAtomicPat ;
338340 def : FlatAtomicPat ;
339341 def : FlatAtomicPat ;
343345 def : FlatAtomicPat ;
344346 def : FlatAtomicPat ;
345347
348 def : FlatAtomicPat ;
349 def : FlatAtomicPat ;
346350 def : FlatAtomicPat ;
347351
348352 } // End Predicates = [isCIVI]
126126 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f32, Custom);
127127 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v16i8, Custom);
128128 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom);
129
130 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
129131
130132 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
131133 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
306308 // TargetLowering queries
307309 //===----------------------------------------------------------------------===//
308310
311 bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
312 const CallInst &CI,
313 unsigned IntrID) const {
314 switch (IntrID) {
315 case Intrinsic::amdgcn_atomic_inc:
316 case Intrinsic::amdgcn_atomic_dec:
317 Info.opc = ISD::INTRINSIC_W_CHAIN;
318 Info.memVT = MVT::getVT(CI.getType());
319 Info.ptrVal = CI.getOperand(0);
320 Info.align = 0;
321 Info.vol = false;
322 Info.readMem = true;
323 Info.writeMem = true;
324 return true;
325 default:
326 return false;
327 }
328 }
329
309330 bool SITargetLowering::isShuffleMaskLegal(const SmallVectorImpl &,
310331 EVT) const {
311332 // SI has some legal vector types, but no legal vector operations. Say no
11721193 return LowerGlobalAddress(MFI, Op, DAG);
11731194 }
11741195 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
1196 case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, DAG);
11751197 case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG);
11761198 }
11771199 return SDValue();
16371659 }
16381660 }
16391661
1662 SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
1663 SelectionDAG &DAG) const {
1664 unsigned IntrID = cast(Op.getOperand(1))->getZExtValue();
1665 switch (IntrID) {
1666 case Intrinsic::amdgcn_atomic_inc:
1667 case Intrinsic::amdgcn_atomic_dec: {
1668 MemSDNode *M = cast(Op);
1669 unsigned Opc = (IntrID == Intrinsic::amdgcn_atomic_inc) ?
1670 AMDGPUISD::ATOMIC_INC : AMDGPUISD::ATOMIC_DEC;
1671 SDValue Ops[] = {
1672 M->getOperand(0), // Chain
1673 M->getOperand(2), // Ptr
1674 M->getOperand(3) // Value
1675 };
1676
1677 return DAG.getMemIntrinsicNode(Opc, SDLoc(Op), M->getVTList(), Ops,
1678 M->getMemoryVT(), M->getMemOperand());
1679 }
1680 default:
1681 return SDValue();
1682 }
1683 }
1684
16401685 SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
16411686 SelectionDAG &DAG) const {
16421687 MachineFunction &MF = DAG.getMachineFunction();
26432688 case ISD::ATOMIC_LOAD_MIN:
26442689 case ISD::ATOMIC_LOAD_MAX:
26452690 case ISD::ATOMIC_LOAD_UMIN:
2646 case ISD::ATOMIC_LOAD_UMAX: { // TODO: Target mem intrinsics.
2691 case ISD::ATOMIC_LOAD_UMAX:
2692 case AMDGPUISD::ATOMIC_INC:
2693 case AMDGPUISD::ATOMIC_DEC: { // TODO: Target mem intrinsics.
26472694 if (DCI.isBeforeLegalize())
26482695 break;
26492696
2929 MVT VT, unsigned Offset) const;
3030
3131 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
32 SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
3233 SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
3334 SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
3435 SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
6465 bool isCFIntrinsic(const SDNode *Intr) const;
6566 public:
6667 SITargetLowering(TargetMachine &tm, const AMDGPUSubtarget &STI);
68
69 bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &,
70 unsigned IntrinsicID) const override;
6771
6872 bool isShuffleMaskLegal(const SmallVectorImpl &/*Mask*/,
6973 EVT /*VT*/) const override;
9292 def SIload_constant : SDNode<"AMDGPUISD::LOAD_CONSTANT",
9393 SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisVT<1, v4i32>, SDTCisVT<2, i32>]>,
9494 [SDNPMayLoad, SDNPMemOperand]
95 >;
96
97 def SIatomic_inc : SDNode<"AMDGPUISD::ATOMIC_INC", SDTAtomic2,
98 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
99 >;
100
101 def SIatomic_dec : SDNode<"AMDGPUISD::ATOMIC_DEC", SDTAtomic2,
102 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
95103 >;
96104
97105 def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT",
180188 }]>;
181189
182190 //===----------------------------------------------------------------------===//
191 // PatFrags for global memory operations
192 //===----------------------------------------------------------------------===//
193
194 def atomic_inc_global : global_binary_atomic_op;
195 def atomic_dec_global : global_binary_atomic_op;
196
197 //===----------------------------------------------------------------------===//
183198 // SDNodes and PatFrag for local loads and stores to enable s_mov_b32 m0, -1
184199 // to be glued to the memory instructions.
185200 //===----------------------------------------------------------------------===//
278293 return isCBranchSCC(N);
279294 }]>;
280295
281 multiclass SIAtomicM0Glue2 {
282
283 def _glue : SDNode <"ISD::ATOMIC_"#op_name, SDTAtomic2,
296 multiclass SIAtomicM0Glue2 {
297
298 def _glue : SDNode <
299 !if(is_amdgpu, "AMDGPUISD", "ISD")#"::ATOMIC_"#op_name, SDTAtomic2,
284300 [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
285301 >;
286302
288304 }
289305
290306 defm si_atomic_load_add : SIAtomicM0Glue2 <"LOAD_ADD">;
307 defm si_atomic_load_sub : SIAtomicM0Glue2 <"LOAD_SUB">;
308 defm si_atomic_inc : SIAtomicM0Glue2 <"INC", 1>;
309 defm si_atomic_dec : SIAtomicM0Glue2 <"DEC", 1>;
291310 defm si_atomic_load_and : SIAtomicM0Glue2 <"LOAD_AND">;
292311 defm si_atomic_load_min : SIAtomicM0Glue2 <"LOAD_MIN">;
293312 defm si_atomic_load_max : SIAtomicM0Glue2 <"LOAD_MAX">;
294313 defm si_atomic_load_or : SIAtomicM0Glue2 <"LOAD_OR">;
295 defm si_atomic_load_sub : SIAtomicM0Glue2 <"LOAD_SUB">;
296314 defm si_atomic_load_xor : SIAtomicM0Glue2 <"LOAD_XOR">;
297315 defm si_atomic_load_umin : SIAtomicM0Glue2 <"LOAD_UMIN">;
298316 defm si_atomic_load_umax : SIAtomicM0Glue2 <"LOAD_UMAX">;
10411041 defm BUFFER_ATOMIC_XOR : MUBUF_Atomic <
10421042 mubuf<0x3b, 0x4a>, "buffer_atomic_xor", VGPR_32, i32, atomic_xor_global
10431043 >;
1044 //def BUFFER_ATOMIC_INC : MUBUF_ , "buffer_atomic_inc", []>;
1045 //def BUFFER_ATOMIC_DEC : MUBUF_ , "buffer_atomic_dec", []>;
1044 defm BUFFER_ATOMIC_INC : MUBUF_Atomic <
1045 mubuf<0x3c, 0x4b>, "buffer_atomic_inc", VGPR_32, i32, atomic_inc_global
1046 >;
1047 defm BUFFER_ATOMIC_DEC : MUBUF_Atomic <
1048 mubuf<0x3d, 0x4c>, "buffer_atomic_dec", VGPR_32, i32, atomic_dec_global
1049 >;
1050
10461051 //def BUFFER_ATOMIC_FCMPSWAP : MUBUF_ , "buffer_atomic_fcmpswap", []>; // isn't on VI
10471052 //def BUFFER_ATOMIC_FMIN : MUBUF_ , "buffer_atomic_fmin", []>; // isn't on VI
10481053 //def BUFFER_ATOMIC_FMAX : MUBUF_ , "buffer_atomic_fmax", []>; // isn't on VI
10601065 //def BUFFER_ATOMIC_AND_X2 : MUBUF_X2 , "buffer_atomic_and_x2", []>;
10611066 //def BUFFER_ATOMIC_OR_X2 : MUBUF_X2 , "buffer_atomic_or_x2", []>;
10621067 //def BUFFER_ATOMIC_XOR_X2 : MUBUF_X2 , "buffer_atomic_xor_x2", []>;
1063 //def BUFFER_ATOMIC_INC_X2 : MUBUF_X2 , "buffer_atomic_inc_x2", []>;
1064 //def BUFFER_ATOMIC_DEC_X2 : MUBUF_X2 , "buffer_atomic_dec_x2", []>;
1068 defm BUFFER_ATOMIC_INC_X2 : MUBUF_Atomic <
1069 mubuf<0x5c, 0x6b>, "buffer_atomic_inc_x2", VReg_64, i64, atomic_inc_global
1070 >;
1071 defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Atomic <
1072 mubuf<0x5d, 0x6c>, "buffer_atomic_dec_x2", VReg_64, i64, atomic_dec_global
1073 >;
10651074 //def BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_X2 , "buffer_atomic_fcmpswap_x2", []>; // isn't on VI
10661075 //def BUFFER_ATOMIC_FMIN_X2 : MUBUF_X2 , "buffer_atomic_fmin_x2", []>; // isn't on VI
10671076 //def BUFFER_ATOMIC_FMAX_X2 : MUBUF_X2 , "buffer_atomic_fmax_x2", []>; // isn't on VI
30723081 def : DSAtomicRetPat;
30733082 def : DSAtomicRetPat;
30743083 def : DSAtomicRetPat;
3084 def : DSAtomicRetPat;
3085 def : DSAtomicRetPat;
30753086 def : DSAtomicRetPat;
30763087 def : DSAtomicRetPat;
30773088 def : DSAtomicRetPat;
30793090 def : DSAtomicRetPat;
30803091 def : DSAtomicRetPat;
30813092 def : DSAtomicRetPat;
3082
30833093 def : DSAtomicCmpXChg;
30843094
30853095 // 64-bit atomics.
30863096 def : DSAtomicRetPat;
30873097 def : DSAtomicRetPat;
30883098 def : DSAtomicRetPat;
3099 def : DSAtomicRetPat;
3100 def : DSAtomicRetPat;
30893101 def : DSAtomicRetPat;
30903102 def : DSAtomicRetPat;
30913103 def : DSAtomicRetPat;
0 ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
1 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
2
3 declare i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* nocapture, i32) #2
4 declare i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* nocapture, i32) #2
5
6 declare i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* nocapture, i64) #2
7 declare i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* nocapture, i64) #2
8
9 declare i32 @llvm.amdgcn.workitem.id.x() #1
10
11 ; GCN-LABEL: {{^}}lds_atomic_dec_ret_i32:
12 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
13 ; GCN: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]]
14 define void @lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 {
15 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42)
16 store i32 %result, i32 addrspace(1)* %out
17 ret void
18 }
19
20 ; GCN-LABEL: {{^}}lds_atomic_dec_ret_i32_offset:
21 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
22 ; GCN: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]] offset:16
23 define void @lds_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 {
24 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
25 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %gep, i32 42)
26 store i32 %result, i32 addrspace(1)* %out
27 ret void
28 }
29
30 ; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i32:
31 ; GCN: s_load_dword [[SPTR:s[0-9]+]],
32 ; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
33 ; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
34 ; GCN: ds_dec_u32 [[VPTR]], [[DATA]]
35 define void @lds_atomic_dec_noret_i32(i32 addrspace(3)* %ptr) nounwind {
36 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42)
37 ret void
38 }
39
40 ; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i32_offset:
41 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
42 ; GCN: ds_dec_u32 v{{[0-9]+}}, [[K]] offset:16
43 define void @lds_atomic_dec_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
44 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
45 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %gep, i32 42)
46 ret void
47 }
48
49 ; GCN-LABEL: {{^}}global_atomic_dec_ret_i32:
50 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
51 ; GCN: buffer_atomic_dec [[K]], s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
52 define void @global_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
53 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %ptr, i32 42)
54 store i32 %result, i32 addrspace(1)* %out
55 ret void
56 }
57
58 ; GCN-LABEL: {{^}}global_atomic_dec_ret_i32_offset:
59 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
60 ; GCN: buffer_atomic_dec [[K]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16 glc{{$}}
61 define void @global_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
62 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
63 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42)
64 store i32 %result, i32 addrspace(1)* %out
65 ret void
66 }
67
68 ; FUNC-LABEL: {{^}}global_atomic_dec_noret_i32:
69 ; GCN: buffer_atomic_dec [[K]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
70 define void @global_atomic_dec_noret_i32(i32 addrspace(1)* %ptr) nounwind {
71 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %ptr, i32 42)
72 ret void
73 }
74
75 ; FUNC-LABEL: {{^}}global_atomic_dec_noret_i32_offset:
76 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
77 ; GCN: buffer_atomic_dec [[K]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
78 define void @global_atomic_dec_noret_i32_offset(i32 addrspace(1)* %ptr) nounwind {
79 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
80 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42)
81 ret void
82 }
83
84 ; GCN-LABEL: {{^}}global_atomic_dec_ret_i32_offset_addr64:
85 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
86 ; CI: buffer_atomic_dec [[K]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20 glc{{$}}
87 ; VI: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
88 define void @global_atomic_dec_ret_i32_offset_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
89 %id = call i32 @llvm.amdgcn.workitem.id.x()
90 %gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id
91 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id
92 %gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5
93 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42)
94 store i32 %result, i32 addrspace(1)* %out.gep
95 ret void
96 }
97
98 ; GCN-LABEL: {{^}}global_atomic_dec_noret_i32_offset_addr64:
99 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
100 ; CI: buffer_atomic_dec [[K]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20{{$}}
101 ; VI: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
102 define void @global_atomic_dec_noret_i32_offset_addr64(i32 addrspace(1)* %ptr) #0 {
103 %id = call i32 @llvm.amdgcn.workitem.id.x()
104 %gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id
105 %gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5
106 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42)
107 ret void
108 }
109
110 @lds0 = addrspace(3) global [512 x i32] undef
111
112 ; SI-LABEL: {{^}}atomic_dec_shl_base_lds_0:
113 ; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
114 ; SI: ds_dec_rtn_u32 {{v[0-9]+}}, [[PTR]] offset:8
115 define void @atomic_dec_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
116 %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
117 %idx.0 = add nsw i32 %tid.x, 2
118 %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds0, i32 0, i32 %idx.0
119 %val0 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %arrayidx0, i32 9)
120 store i32 %idx.0, i32 addrspace(1)* %add_use
121 store i32 %val0, i32 addrspace(1)* %out
122 ret void
123 }
124
125 ; GCN-LABEL: {{^}}lds_atomic_dec_ret_i64:
126 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
127 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
128 ; GCN: ds_dec_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
129 define void @lds_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 {
130 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %ptr, i64 42)
131 store i64 %result, i64 addrspace(1)* %out
132 ret void
133 }
134
135 ; GCN-LABEL: {{^}}lds_atomic_dec_ret_i64_offset:
136 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
137 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
138 ; GCN: ds_dec_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32
139 define void @lds_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 {
140 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
141 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %gep, i64 42)
142 store i64 %result, i64 addrspace(1)* %out
143 ret void
144 }
145
146 ; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i64:
147 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
148 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
149 ; GCN: ds_dec_u64 v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
150 define void @lds_atomic_dec_noret_i64(i64 addrspace(3)* %ptr) nounwind {
151 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %ptr, i64 42)
152 ret void
153 }
154
155 ; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i64_offset:
156 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
157 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
158 ; GCN: ds_dec_u64 v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32{{$}}
159 define void @lds_atomic_dec_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
160 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
161 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %gep, i64 42)
162 ret void
163 }
164
165 ; GCN-LABEL: {{^}}global_atomic_dec_ret_i64:
166 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
167 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
168 ; GCN: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
169 define void @global_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
170 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %ptr, i64 42)
171 store i64 %result, i64 addrspace(1)* %out
172 ret void
173 }
174
175 ; GCN-LABEL: {{^}}global_atomic_dec_ret_i64_offset:
176 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
177 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
178 ; GCN: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32 glc{{$}}
179 define void @global_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
180 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4
181 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42)
182 store i64 %result, i64 addrspace(1)* %out
183 ret void
184 }
185
186 ; FUNC-LABEL: {{^}}global_atomic_dec_noret_i64:
187 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
188 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
189 ; GCN: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
190 define void @global_atomic_dec_noret_i64(i64 addrspace(1)* %ptr) nounwind {
191 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %ptr, i64 42)
192 ret void
193 }
194
195 ; FUNC-LABEL: {{^}}global_atomic_dec_noret_i64_offset:
196 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
197 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
198 ; GCN: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32{{$}}
199 define void @global_atomic_dec_noret_i64_offset(i64 addrspace(1)* %ptr) nounwind {
200 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4
201 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42)
202 ret void
203 }
204
205 ; GCN-LABEL: {{^}}global_atomic_dec_ret_i64_offset_addr64:
206 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
207 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
208 ; CI: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40 glc{{$}}
209 ; VI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
210 define void @global_atomic_dec_ret_i64_offset_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
211 %id = call i32 @llvm.amdgcn.workitem.id.x()
212 %gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id
213 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id
214 %gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5
215 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42)
216 store i64 %result, i64 addrspace(1)* %out.gep
217 ret void
218 }
219
220 ; GCN-LABEL: {{^}}global_atomic_dec_noret_i64_offset_addr64:
221 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
222 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
223 ; CI: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40{{$}}
224 ; VI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
225 define void @global_atomic_dec_noret_i64_offset_addr64(i64 addrspace(1)* %ptr) #0 {
226 %id = call i32 @llvm.amdgcn.workitem.id.x()
227 %gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id
228 %gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5
229 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42)
230 ret void
231 }
232
233 @lds1 = addrspace(3) global [512 x i64] undef, align 8
234
235 ; GCN-LABEL: {{^}}atomic_dec_shl_base_lds_0_i64:
236 ; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 3, {{v[0-9]+}}
237 ; GCN: ds_dec_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, [[PTR]], v{{\[[0-9]+:[0-9]+\]}} offset:16
238 define void @atomic_dec_shl_base_lds_0_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
239 %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
240 %idx.0 = add nsw i32 %tid.x, 2
241 %arrayidx0 = getelementptr inbounds [512 x i64], [512 x i64] addrspace(3)* @lds1, i32 0, i32 %idx.0
242 %val0 = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %arrayidx0, i64 9)
243 store i32 %idx.0, i32 addrspace(1)* %add_use
244 store i64 %val0, i64 addrspace(1)* %out
245 ret void
246 }
247
248 attributes #0 = { nounwind }
249 attributes #1 = { nounwind readnone }
250 attributes #2 = { nounwind argmemonly }
0 ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
1 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
2
3 declare i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* nocapture, i32) #2
4 declare i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* nocapture, i32) #2
5
6 declare i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* nocapture, i64) #2
7 declare i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* nocapture, i64) #2
8
9 declare i32 @llvm.amdgcn.workitem.id.x() #1
10
11 ; GCN-LABEL: {{^}}lds_atomic_inc_ret_i32:
12 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
13 ; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]]
14 define void @lds_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 {
15 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42)
16 store i32 %result, i32 addrspace(1)* %out
17 ret void
18 }
19
20 ; GCN-LABEL: {{^}}lds_atomic_inc_ret_i32_offset:
21 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
22 ; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]] offset:16
23 define void @lds_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 {
24 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
25 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %gep, i32 42)
26 store i32 %result, i32 addrspace(1)* %out
27 ret void
28 }
29
30 ; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i32:
31 ; GCN: s_load_dword [[SPTR:s[0-9]+]],
32 ; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
33 ; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
34 ; GCN: ds_inc_u32 [[VPTR]], [[DATA]]
35 define void @lds_atomic_inc_noret_i32(i32 addrspace(3)* %ptr) nounwind {
36 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42)
37 ret void
38 }
39
40 ; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i32_offset:
41 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
42 ; GCN: ds_inc_u32 v{{[0-9]+}}, [[K]] offset:16
43 define void @lds_atomic_inc_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
44 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
45 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %gep, i32 42)
46 ret void
47 }
48
49 ; GCN-LABEL: {{^}}global_atomic_inc_ret_i32:
50 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
51 ; GCN: buffer_atomic_inc [[K]], s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
52 define void @global_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
53 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %ptr, i32 42)
54 store i32 %result, i32 addrspace(1)* %out
55 ret void
56 }
57
58 ; GCN-LABEL: {{^}}global_atomic_inc_ret_i32_offset:
59 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
60 ; GCN: buffer_atomic_inc [[K]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16 glc{{$}}
61 define void @global_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
62 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
63 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42)
64 store i32 %result, i32 addrspace(1)* %out
65 ret void
66 }
67
68 ; FUNC-LABEL: {{^}}global_atomic_inc_noret_i32:
69 ; GCN: buffer_atomic_inc [[K]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
70 define void @global_atomic_inc_noret_i32(i32 addrspace(1)* %ptr) nounwind {
71 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %ptr, i32 42)
72 ret void
73 }
74
75 ; FUNC-LABEL: {{^}}global_atomic_inc_noret_i32_offset:
76 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
77 ; GCN: buffer_atomic_inc [[K]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
78 define void @global_atomic_inc_noret_i32_offset(i32 addrspace(1)* %ptr) nounwind {
79 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
80 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42)
81 ret void
82 }
83
84 ; GCN-LABEL: {{^}}global_atomic_inc_ret_i32_offset_addr64:
85 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
86 ; CI: buffer_atomic_inc [[K]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20 glc{{$}}
87 ; VI: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
88 define void @global_atomic_inc_ret_i32_offset_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
89 %id = call i32 @llvm.amdgcn.workitem.id.x()
90 %gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id
91 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id
92 %gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5
93 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42)
94 store i32 %result, i32 addrspace(1)* %out.gep
95 ret void
96 }
97
98 ; GCN-LABEL: {{^}}global_atomic_inc_noret_i32_offset_addr64:
99 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
100 ; CI: buffer_atomic_inc [[K]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20{{$}}
101 ; VI: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
102 define void @global_atomic_inc_noret_i32_offset_addr64(i32 addrspace(1)* %ptr) #0 {
103 %id = call i32 @llvm.amdgcn.workitem.id.x()
104 %gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id
105 %gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5
106 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42)
107 ret void
108 }
109
110 @lds0 = addrspace(3) global [512 x i32] undef, align 4
111
112 ; GCN-LABEL: {{^}}atomic_inc_shl_base_lds_0_i32:
113 ; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
114 ; GCN: ds_inc_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
115 define void @atomic_inc_shl_base_lds_0_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
116 %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
117 %idx.0 = add nsw i32 %tid.x, 2
118 %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds0, i32 0, i32 %idx.0
119 %val0 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %arrayidx0, i32 9)
120 store i32 %idx.0, i32 addrspace(1)* %add_use
121 store i32 %val0, i32 addrspace(1)* %out
122 ret void
123 }
124
125 ; GCN-LABEL: {{^}}lds_atomic_inc_ret_i64:
126 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
127 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
128 ; GCN: ds_inc_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
129 define void @lds_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 {
130 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %ptr, i64 42)
131 store i64 %result, i64 addrspace(1)* %out
132 ret void
133 }
134
135 ; GCN-LABEL: {{^}}lds_atomic_inc_ret_i64_offset:
136 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
137 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
138 ; GCN: ds_inc_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32
139 define void @lds_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 {
140 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
141 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %gep, i64 42)
142 store i64 %result, i64 addrspace(1)* %out
143 ret void
144 }
145
146 ; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i64:
147 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
148 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
149 ; GCN: ds_inc_u64 v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
150 define void @lds_atomic_inc_noret_i64(i64 addrspace(3)* %ptr) nounwind {
151 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %ptr, i64 42)
152 ret void
153 }
154
155 ; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i64_offset:
156 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
157 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
158 ; GCN: ds_inc_u64 v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32{{$}}
159 define void @lds_atomic_inc_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
160 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
161 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %gep, i64 42)
162 ret void
163 }
164
165 ; GCN-LABEL: {{^}}global_atomic_inc_ret_i64:
166 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
167 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
168 ; GCN: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
169 define void @global_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
170 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %ptr, i64 42)
171 store i64 %result, i64 addrspace(1)* %out
172 ret void
173 }
174
175 ; GCN-LABEL: {{^}}global_atomic_inc_ret_i64_offset:
176 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
177 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
178 ; GCN: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32 glc{{$}}
179 define void @global_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
180 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4
181 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42)
182 store i64 %result, i64 addrspace(1)* %out
183 ret void
184 }
185
186 ; FUNC-LABEL: {{^}}global_atomic_inc_noret_i64:
187 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
188 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
189 ; GCN: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
190 define void @global_atomic_inc_noret_i64(i64 addrspace(1)* %ptr) nounwind {
191 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %ptr, i64 42)
192 ret void
193 }
194
195 ; FUNC-LABEL: {{^}}global_atomic_inc_noret_i64_offset:
196 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
197 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
198 ; GCN: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32{{$}}
199 define void @global_atomic_inc_noret_i64_offset(i64 addrspace(1)* %ptr) nounwind {
200 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4
201 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42)
202 ret void
203 }
204
205 ; GCN-LABEL: {{^}}global_atomic_inc_ret_i64_offset_addr64:
206 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
207 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
208 ; CI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40 glc{{$}}
209 ; VI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
210 define void @global_atomic_inc_ret_i64_offset_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
211 %id = call i32 @llvm.amdgcn.workitem.id.x()
212 %gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id
213 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id
214 %gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5
215 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42)
216 store i64 %result, i64 addrspace(1)* %out.gep
217 ret void
218 }
219
220 ; GCN-LABEL: {{^}}global_atomic_inc_noret_i64_offset_addr64:
221 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
222 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
223 ; CI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40{{$}}
224 ; VI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
225 define void @global_atomic_inc_noret_i64_offset_addr64(i64 addrspace(1)* %ptr) #0 {
226 %id = call i32 @llvm.amdgcn.workitem.id.x()
227 %gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id
228 %gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5
229 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42)
230 ret void
231 }
232
233 @lds1 = addrspace(3) global [512 x i64] undef, align 8
234
235 ; GCN-LABEL: {{^}}atomic_inc_shl_base_lds_0_i64:
236 ; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 3, {{v[0-9]+}}
237 ; GCN: ds_inc_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, [[PTR]], v{{\[[0-9]+:[0-9]+\]}} offset:16
238 define void @atomic_inc_shl_base_lds_0_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
239 %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
240 %idx.0 = add nsw i32 %tid.x, 2
241 %arrayidx0 = getelementptr inbounds [512 x i64], [512 x i64] addrspace(3)* @lds1, i32 0, i32 %idx.0
242 %val0 = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %arrayidx0, i64 9)
243 store i32 %idx.0, i32 addrspace(1)* %add_use
244 store i64 %val0, i64 addrspace(1)* %out
245 ret void
246 }
247
248 attributes #0 = { nounwind }
249 attributes #1 = { nounwind readnone }
250 attributes #2 = { nounwind argmemonly }
323323 ret void
324324 }
325325
326 ; XXX - Is it really necessary to load 4 into VGPR?
327326 ; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32:
328327 ; GCN: s_load_dword [[SPTR:s[0-9]+]],
329328 ; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4