llvm.org GIT mirror llvm / 17ff078
[AMDGPU] gfx1010 memory legalizer Differential Revision: https://reviews.llvm.org/D61535 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@360087 91177308-0d34-0410-b5e6-96231b3b80d8 Stanislav Mekhanoshin 1 year, 5 months ago
7 changed file(s) with 5607 addition(s) and 1707 deletion(s). Raw diff Collapse all Expand all
349349 SIAtomicAddrSpace AddrSpace,
350350 Position Pos) const override;
351351
352 };
353
354 class SIGfx10CacheControl : public SIGfx7CacheControl {
355 protected:
356 bool CuMode = false;
357
358 /// Sets DLC bit to "true" if present in \p MI. Returns true if \p MI
359 /// is modified, false otherwise.
360 bool enableDLCBit(const MachineBasicBlock::iterator &MI) const {
361 return enableNamedBit(MI);
362 }
363
364 public:
365
366 SIGfx10CacheControl(const GCNSubtarget &ST, bool CuMode) :
367 SIGfx7CacheControl(ST), CuMode(CuMode) {};
368
369 bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
370 SIAtomicScope Scope,
371 SIAtomicAddrSpace AddrSpace) const override;
372
373 bool enableNonTemporal(const MachineBasicBlock::iterator &MI) const override;
374
375 bool insertCacheInvalidate(MachineBasicBlock::iterator &MI,
376 SIAtomicScope Scope,
377 SIAtomicAddrSpace AddrSpace,
378 Position Pos) const override;
379
380 bool insertWait(MachineBasicBlock::iterator &MI,
381 SIAtomicScope Scope,
382 SIAtomicAddrSpace AddrSpace,
383 SIMemOp Op,
384 bool IsCrossAddrSpaceOrdering,
385 Position Pos) const override;
352386 };
353387
354388 class SIMemoryLegalizer final : public MachineFunctionPass {
622656 GCNSubtarget::Generation Generation = ST.getGeneration();
623657 if (Generation <= AMDGPUSubtarget::SOUTHERN_ISLANDS)
624658 return make_unique(ST);
625 return make_unique(ST);
659 if (Generation < AMDGPUSubtarget::GFX10)
660 return make_unique(ST);
661 return make_unique(ST, ST.isCuModeEnabled());
626662 }
627663
628664 bool SIGfx6CacheControl::enableLoadCacheBypass(
852888 /// memory.
853889
854890 /// Other address spaces do not hava a cache.
891
892 if (Pos == Position::AFTER)
893 --MI;
894
895 return Changed;
896 }
897
898 bool SIGfx10CacheControl::enableLoadCacheBypass(
899 const MachineBasicBlock::iterator &MI,
900 SIAtomicScope Scope,
901 SIAtomicAddrSpace AddrSpace) const {
902 assert(MI->mayLoad() && !MI->mayStore());
903 bool Changed = false;
904
905 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
906 /// TODO Do not set glc for rmw atomic operations as they
907 /// implicitly bypass the L0/L1 caches.
908
909 switch (Scope) {
910 case SIAtomicScope::SYSTEM:
911 case SIAtomicScope::AGENT:
912 Changed |= enableGLCBit(MI);
913 Changed |= enableDLCBit(MI);
914 break;
915 case SIAtomicScope::WORKGROUP:
916 // In WGP mode the waves of a work-group can be executing on either CU of
917 // the WGP. Therefore need to bypass the L0 which is per CU. Otherwise in
918 // CU mode and all waves of a work-group are on the same CU, and so the
919 // L0 does not need to be bypassed.
920 if (!CuMode) Changed |= enableGLCBit(MI);
921 break;
922 case SIAtomicScope::WAVEFRONT:
923 case SIAtomicScope::SINGLETHREAD:
924 // No cache to bypass.
925 break;
926 default:
927 llvm_unreachable("Unsupported synchronization scope");
928 }
929 }
930
931 /// The scratch address space does not need the global memory caches
932 /// to be bypassed as all memory operations by the same thread are
933 /// sequentially consistent, and no other thread can access scratch
934 /// memory.
935
936 /// Other address spaces do not hava a cache.
937
938 return Changed;
939 }
940
941 bool SIGfx10CacheControl::enableNonTemporal(
942 const MachineBasicBlock::iterator &MI) const {
943 assert(MI->mayLoad() ^ MI->mayStore());
944 bool Changed = false;
945
946 Changed |= enableSLCBit(MI);
947 /// TODO for store (non-rmw atomic) instructions also enableGLCBit(MI)
948
949 return Changed;
950 }
951
952 bool SIGfx10CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
953 SIAtomicScope Scope,
954 SIAtomicAddrSpace AddrSpace,
955 Position Pos) const {
956 bool Changed = false;
957
958 MachineBasicBlock &MBB = *MI->getParent();
959 DebugLoc DL = MI->getDebugLoc();
960
961 if (Pos == Position::AFTER)
962 ++MI;
963
964 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
965 switch (Scope) {
966 case SIAtomicScope::SYSTEM:
967 case SIAtomicScope::AGENT:
968 BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_GL0_INV));
969 BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_GL1_INV));
970 Changed = true;
971 break;
972 case SIAtomicScope::WORKGROUP:
973 // In WGP mode the waves of a work-group can be executing on either CU of
974 // the WGP. Therefore need to invalidate the L0 which is per CU. Otherwise
975 // in CU mode and all waves of a work-group are on the same CU, and so the
976 // L0 does not need to be invalidated.
977 if (!CuMode) {
978 BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_GL0_INV));
979 Changed = true;
980 }
981 break;
982 case SIAtomicScope::WAVEFRONT:
983 case SIAtomicScope::SINGLETHREAD:
984 // No cache to invalidate.
985 break;
986 default:
987 llvm_unreachable("Unsupported synchronization scope");
988 }
989 }
990
991 /// The scratch address space does not need the global memory cache
992 /// to be flushed as all memory operations by the same thread are
993 /// sequentially consistent, and no other thread can access scratch
994 /// memory.
995
996 /// Other address spaces do not hava a cache.
997
998 if (Pos == Position::AFTER)
999 --MI;
1000
1001 return Changed;
1002 }
1003
1004 bool SIGfx10CacheControl::insertWait(MachineBasicBlock::iterator &MI,
1005 SIAtomicScope Scope,
1006 SIAtomicAddrSpace AddrSpace,
1007 SIMemOp Op,
1008 bool IsCrossAddrSpaceOrdering,
1009 Position Pos) const {
1010 bool Changed = false;
1011
1012 MachineBasicBlock &MBB = *MI->getParent();
1013 DebugLoc DL = MI->getDebugLoc();
1014
1015 if (Pos == Position::AFTER)
1016 ++MI;
1017
1018 bool VMCnt = false;
1019 bool VSCnt = false;
1020 bool LGKMCnt = false;
1021
1022 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1023 switch (Scope) {
1024 case SIAtomicScope::SYSTEM:
1025 case SIAtomicScope::AGENT:
1026 if ((Op & SIMemOp::LOAD) != SIMemOp::NONE)
1027 VMCnt |= true;
1028 if ((Op & SIMemOp::STORE) != SIMemOp::NONE)
1029 VSCnt |= true;
1030 break;
1031 case SIAtomicScope::WORKGROUP:
1032 // In WGP mode the waves of a work-group can be executing on either CU of
1033 // the WGP. Therefore need to wait for operations to complete to ensure
1034 // they are visible to waves in the other CU as the L0 is per CU.
1035 // Otherwise in CU mode and all waves of a work-group are on the same CU
1036 // which shares the same L0.
1037 if (!CuMode) {
1038 if ((Op & SIMemOp::LOAD) != SIMemOp::NONE)
1039 VMCnt |= true;
1040 if ((Op & SIMemOp::STORE) != SIMemOp::NONE)
1041 VSCnt |= true;
1042 }
1043 break;
1044 case SIAtomicScope::WAVEFRONT:
1045 case SIAtomicScope::SINGLETHREAD:
1046 // The L0 cache keeps all memory operations in order for
1047 // work-items in the same wavefront.
1048 break;
1049 default:
1050 llvm_unreachable("Unsupported synchronization scope");
1051 }
1052 }
1053
1054 if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
1055 switch (Scope) {
1056 case SIAtomicScope::SYSTEM:
1057 case SIAtomicScope::AGENT:
1058 case SIAtomicScope::WORKGROUP:
1059 // If no cross address space ordering then an LDS waitcnt is not
1060 // needed as LDS operations for all waves are executed in a
1061 // total global ordering as observed by all waves. Required if
1062 // also synchronizing with global/GDS memory as LDS operations
1063 // could be reordered with respect to later global/GDS memory
1064 // operations of the same wave.
1065 LGKMCnt |= IsCrossAddrSpaceOrdering;
1066 break;
1067 case SIAtomicScope::WAVEFRONT:
1068 case SIAtomicScope::SINGLETHREAD:
1069 // The LDS keeps all memory operations in order for
1070 // the same wavesfront.
1071 break;
1072 default:
1073 llvm_unreachable("Unsupported synchronization scope");
1074 }
1075 }
1076
1077 if ((AddrSpace & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE) {
1078 switch (Scope) {
1079 case SIAtomicScope::SYSTEM:
1080 case SIAtomicScope::AGENT:
1081 // If no cross address space ordering then an GDS waitcnt is not
1082 // needed as GDS operations for all waves are executed in a
1083 // total global ordering as observed by all waves. Required if
1084 // also synchronizing with global/LDS memory as GDS operations
1085 // could be reordered with respect to later global/LDS memory
1086 // operations of the same wave.
1087 LGKMCnt |= IsCrossAddrSpaceOrdering;
1088 break;
1089 case SIAtomicScope::WORKGROUP:
1090 case SIAtomicScope::WAVEFRONT:
1091 case SIAtomicScope::SINGLETHREAD:
1092 // The GDS keeps all memory operations in order for
1093 // the same work-group.
1094 break;
1095 default:
1096 llvm_unreachable("Unsupported synchronization scope");
1097 }
1098 }
1099
1100 if (VMCnt || LGKMCnt) {
1101 unsigned WaitCntImmediate =
1102 AMDGPU::encodeWaitcnt(IV,
1103 VMCnt ? 0 : getVmcntBitMask(IV),
1104 getExpcntBitMask(IV),
1105 LGKMCnt ? 0 : getLgkmcntBitMask(IV));
1106 BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(WaitCntImmediate);
1107 Changed = true;
1108 }
1109
1110 if (VSCnt) {
1111 BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT_VSCNT))
1112 .addReg(AMDGPU::SGPR_NULL, RegState::Undef)
1113 .addImm(0);
1114 Changed = true;
1115 }
8551116
8561117 if (Pos == Position::AFTER)
8571118 --MI;
0 ; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s
11 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s
2
3 ; GCN-LABEL: {{^}}system_monotonic_monotonic:
4 ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
5 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
6 ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
7 ; GCN-NOT: buffer_wbinvl1_vol
8 define amdgpu_kernel void @system_monotonic_monotonic(
9 i32* %out, i32 %in, i32 %old) {
10 entry:
11 %gep = getelementptr i32, i32* %out, i32 4
12 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in monotonic monotonic
13 ret void
14 }
15
16 ; GCN-LABEL: {{^}}system_acquire_monotonic:
17 ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
18 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
19 ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
20 ; GFX8-NEXT: buffer_wbinvl1_vol
21 define amdgpu_kernel void @system_acquire_monotonic(
22 i32* %out, i32 %in, i32 %old) {
23 entry:
24 %gep = getelementptr i32, i32* %out, i32 4
25 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acquire monotonic
26 ret void
27 }
28
29 ; GCN-LABEL: {{^}}system_release_monotonic:
30 ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
31 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
32 ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
33 ; GCN-NOT: buffer_wbinvl1_vol
34 define amdgpu_kernel void @system_release_monotonic(
35 i32* %out, i32 %in, i32 %old) {
36 entry:
37 %gep = getelementptr i32, i32* %out, i32 4
38 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in release monotonic
39 ret void
40 }
41
42 ; GCN-LABEL: {{^}}system_acq_rel_monotonic:
43 ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
44 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
45 ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
46 ; GFX8-NEXT: buffer_wbinvl1_vol
47 define amdgpu_kernel void @system_acq_rel_monotonic(
48 i32* %out, i32 %in, i32 %old) {
49 entry:
50 %gep = getelementptr i32, i32* %out, i32 4
51 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acq_rel monotonic
52 ret void
53 }
54
55 ; GCN-LABEL: {{^}}system_seq_cst_monotonic:
56 ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
57 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
58 ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
59 ; GFX8-NEXT: buffer_wbinvl1_vol
60 define amdgpu_kernel void @system_seq_cst_monotonic(
61 i32* %out, i32 %in, i32 %old) {
62 entry:
63 %gep = getelementptr i32, i32* %out, i32 4
64 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst monotonic
65 ret void
66 }
67
68 ; GCN-LABEL: {{^}}system_acquire_acquire:
69 ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
70 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
71 ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
72 ; GFX8-NEXT: buffer_wbinvl1_vol
73 define amdgpu_kernel void @system_acquire_acquire(
74 i32* %out, i32 %in, i32 %old) {
75 entry:
76 %gep = getelementptr i32, i32* %out, i32 4
77 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acquire acquire
78 ret void
79 }
80
81 ; GCN-LABEL: {{^}}system_release_acquire:
82 ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
83 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
84 ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
85 ; GFX8-NEXT: buffer_wbinvl1_vol
86 define amdgpu_kernel void @system_release_acquire(
87 i32* %out, i32 %in, i32 %old) {
88 entry:
89 %gep = getelementptr i32, i32* %out, i32 4
90 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in release acquire
91 ret void
92 }
93
94 ; GCN-LABEL: {{^}}system_acq_rel_acquire:
95 ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
96 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
97 ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
98 ; GFX8-NEXT: buffer_wbinvl1_vol
99 define amdgpu_kernel void @system_acq_rel_acquire(
100 i32* %out, i32 %in, i32 %old) {
101 entry:
102 %gep = getelementptr i32, i32* %out, i32 4
103 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acq_rel acquire
104 ret void
105 }
106
107 ; GCN-LABEL: {{^}}system_seq_cst_acquire:
108 ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
109 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
110 ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
111 ; GFX8-NEXT: buffer_wbinvl1_vol
112 define amdgpu_kernel void @system_seq_cst_acquire(
113 i32* %out, i32 %in, i32 %old) {
114 entry:
115 %gep = getelementptr i32, i32* %out, i32 4
116 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst acquire
117 ret void
118 }
119
120 ; GCN-LABEL: {{^}}system_seq_cst_seq_cst:
121 ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
122 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
123 ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
124 ; GFX8-NEXT: buffer_wbinvl1_vol
125 define amdgpu_kernel void @system_seq_cst_seq_cst(
126 i32* %out, i32 %in, i32 %old) {
127 entry:
128 %gep = getelementptr i32, i32* %out, i32 4
129 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst
130 ret void
131 }
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+code-object-v3 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10,GFX10WGP %s
3 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+code-object-v3,+cumode -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10,GFX10CU %s
1324
1335 ; GCN-LABEL: {{^}}system_one_as_monotonic_monotonic:
1346 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
135 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
136 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
137 ; GCN-NOT: buffer_wbinvl1_vol
7 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
8 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
9 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
10 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
11 ; GFX8-NOT: buffer_wbinvl1_vol
12 ; GFX10-NOT: buffer_gl{{[01]}}_inv
13 ; GFX10: .amdhsa_kernel system_one_as_monotonic_monotonic
14 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
15 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
16 ; GFX10-NOT: .amdhsa_memory_ordered 0
13817 define amdgpu_kernel void @system_one_as_monotonic_monotonic(
13918 i32* %out, i32 %in, i32 %old) {
14019 entry:
14524
14625 ; GCN-LABEL: {{^}}system_one_as_acquire_monotonic:
14726 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
27 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
14828 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
149 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
150 ; GFX8-NEXT: buffer_wbinvl1_vol
29 ; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
30 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
31 ; GFX8-NEXT: buffer_wbinvl1_vol
32 ; GFX10-NEXT: buffer_gl0_inv
33 ; GFX10-NEXT: buffer_gl1_inv
34 ; GFX10: .amdhsa_kernel system_one_as_acquire_monotonic
35 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
36 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
37 ; GFX10-NOT: .amdhsa_memory_ordered 0
15138 define amdgpu_kernel void @system_one_as_acquire_monotonic(
15239 i32* %out, i32 %in, i32 %old) {
15340 entry:
15845
15946 ; GCN-LABEL: {{^}}system_one_as_release_monotonic:
16047 ; GCN: s_waitcnt vmcnt(0){{$}}
48 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
16149 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
16250 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
163 ; GCN-NOT: buffer_wbinvl1_vol
51 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
52 ; GFX8-NOT: buffer_wbinvl1_vol
53 ; GFX10-NOT: buffer_gl._inv
54 ; GFX10: .amdhsa_kernel system_one_as_release_monotonic
55 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
56 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
57 ; GFX10-NOT: .amdhsa_memory_ordered 0
16458 define amdgpu_kernel void @system_one_as_release_monotonic(
16559 i32* %out, i32 %in, i32 %old) {
16660 entry:
17165
17266 ; GCN-LABEL: {{^}}system_one_as_acq_rel_monotonic:
17367 ; GCN: s_waitcnt vmcnt(0){{$}}
68 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
17469 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
175 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
176 ; GFX8-NEXT: buffer_wbinvl1_vol
70 ; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
71 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
72 ; GFX8-NEXT: buffer_wbinvl1_vol
73 ; GFX10-NEXT: buffer_gl0_inv
74 ; GFX10-NEXT: buffer_gl1_inv
75 ; GFX10: .amdhsa_kernel system_one_as_acq_rel_monotonic
76 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
77 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
78 ; GFX10-NOT: .amdhsa_memory_ordered 0
17779 define amdgpu_kernel void @system_one_as_acq_rel_monotonic(
17880 i32* %out, i32 %in, i32 %old) {
17981 entry:
18486
18587 ; GCN-LABEL: {{^}}system_one_as_seq_cst_monotonic:
18688 ; GCN: s_waitcnt vmcnt(0){{$}}
89 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
18790 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
188 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
189 ; GFX8-NEXT: buffer_wbinvl1_vol
91 ; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
92 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
93 ; GFX8-NEXT: buffer_wbinvl1_vol
94 ; GFX10-NEXT: buffer_gl0_inv
95 ; GFX10-NEXT: buffer_gl1_inv
96 ; GFX10: .amdhsa_kernel system_one_as_seq_cst_monotonic
97 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
98 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
99 ; GFX10-NOT: .amdhsa_memory_ordered 0
190100 define amdgpu_kernel void @system_one_as_seq_cst_monotonic(
191101 i32* %out, i32 %in, i32 %old) {
192102 entry:
197107
198108 ; GCN-LABEL: {{^}}system_one_as_acquire_acquire:
199109 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
110 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
200111 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
201 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
202 ; GFX8-NEXT: buffer_wbinvl1_vol
112 ; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
113 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
114 ; GFX8-NEXT: buffer_wbinvl1_vol
115 ; GFX10-NEXT: buffer_gl0_inv
116 ; GFX10-NEXT: buffer_gl1_inv
117 ; GFX10: .amdhsa_kernel system_one_as_acquire_acquire
118 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
119 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
120 ; GFX10-NOT: .amdhsa_memory_ordered 0
203121 define amdgpu_kernel void @system_one_as_acquire_acquire(
204122 i32* %out, i32 %in, i32 %old) {
205123 entry:
210128
211129 ; GCN-LABEL: {{^}}system_one_as_release_acquire:
212130 ; GCN: s_waitcnt vmcnt(0){{$}}
131 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
213132 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
214 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
215 ; GFX8-NEXT: buffer_wbinvl1_vol
133 ; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
134 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
135 ; GFX8-NEXT: buffer_wbinvl1_vol
136 ; GFX10-NEXT: buffer_gl0_inv
137 ; GFX10-NEXT: buffer_gl1_inv
138 ; GFX10: .amdhsa_kernel system_one_as_release_acquire
139 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
140 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
141 ; GFX10-NOT: .amdhsa_memory_ordered 0
216142 define amdgpu_kernel void @system_one_as_release_acquire(
217143 i32* %out, i32 %in, i32 %old) {
218144 entry:
223149
224150 ; GCN-LABEL: {{^}}system_one_as_acq_rel_acquire:
225151 ; GCN: s_waitcnt vmcnt(0){{$}}
152 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
226153 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
227 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
228 ; GFX8-NEXT: buffer_wbinvl1_vol
154 ; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
155 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
156 ; GFX8-NEXT: buffer_wbinvl1_vol
157 ; GFX10-NEXT: buffer_gl0_inv
158 ; GFX10-NEXT: buffer_gl1_inv
159 ; GFX10: .amdhsa_kernel system_one_as_acq_rel_acquire
160 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
161 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
162 ; GFX10-NOT: .amdhsa_memory_ordered 0
229163 define amdgpu_kernel void @system_one_as_acq_rel_acquire(
230164 i32* %out, i32 %in, i32 %old) {
231165 entry:
236170
237171 ; GCN-LABEL: {{^}}system_one_as_seq_cst_acquire:
238172 ; GCN: s_waitcnt vmcnt(0){{$}}
173 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
239174 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
240 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
241 ; GFX8-NEXT: buffer_wbinvl1_vol
175 ; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
176 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
177 ; GFX8-NEXT: buffer_wbinvl1_vol
178 ; GFX10-NEXT: buffer_gl0_inv
179 ; GFX10-NEXT: buffer_gl1_inv
180 ; GFX10: .amdhsa_kernel system_one_as_seq_cst_acquire
181 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
182 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
183 ; GFX10-NOT: .amdhsa_memory_ordered 0
242184 define amdgpu_kernel void @system_one_as_seq_cst_acquire(
243185 i32* %out, i32 %in, i32 %old) {
244186 entry:
249191
250192 ; GCN-LABEL: {{^}}system_one_as_seq_cst_seq_cst:
251193 ; GCN: s_waitcnt vmcnt(0){{$}}
194 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
252195 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
253 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
254 ; GFX8-NEXT: buffer_wbinvl1_vol
196 ; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
197 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
198 ; GFX8-NEXT: buffer_wbinvl1_vol
199 ; GFX10-NEXT: buffer_gl0_inv
200 ; GFX10-NEXT: buffer_gl1_inv
201 ; GFX10: .amdhsa_kernel system_one_as_seq_cst_seq_cst
202 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
203 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
204 ; GFX10-NOT: .amdhsa_memory_ordered 0
255205 define amdgpu_kernel void @system_one_as_seq_cst_seq_cst(
256206 i32* %out, i32 %in, i32 %old) {
257207 entry:
260210 ret void
261211 }
262212
263 ; GCN-LABEL: {{^}}singlethread_monotonic_monotonic:
264 ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
265 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
266 ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
267 ; GCN-NOT: buffer_wbinvl1_vol
268 define amdgpu_kernel void @singlethread_monotonic_monotonic(
269 i32* %out, i32 %in, i32 %old) {
270 entry:
271 %gep = getelementptr i32, i32* %out, i32 4
272 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") monotonic monotonic
273 ret void
274 }
275
276 ; GCN-LABEL: {{^}}singlethread_acquire_monotonic:
277 ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
278 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
279 ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
280 ; GCN-NOT: buffer_wbinvl1_vol
281 define amdgpu_kernel void @singlethread_acquire_monotonic(
282 i32* %out, i32 %in, i32 %old) {
283 entry:
284 %gep = getelementptr i32, i32* %out, i32 4
285 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acquire monotonic
286 ret void
287 }
288
289 ; GCN-LABEL: {{^}}singlethread_release_monotonic:
290 ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
291 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
292 ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
293 ; GCN-NOT: buffer_wbinvl1_vol
294 define amdgpu_kernel void @singlethread_release_monotonic(
295 i32* %out, i32 %in, i32 %old) {
296 entry:
297 %gep = getelementptr i32, i32* %out, i32 4
298 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") release monotonic
299 ret void
300 }
301
302 ; GCN-LABEL: {{^}}singlethread_acq_rel_monotonic:
303 ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
304 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
305 ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
306 ; GCN-NOT: buffer_wbinvl1_vol
307 define amdgpu_kernel void @singlethread_acq_rel_monotonic(
308 i32* %out, i32 %in, i32 %old) {
309 entry:
310 %gep = getelementptr i32, i32* %out, i32 4
311 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel monotonic
312 ret void
313 }
314
315 ; GCN-LABEL: {{^}}singlethread_seq_cst_monotonic:
316 ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
317 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
318 ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
319 ; GCN-NOT: buffer_wbinvl1_vol
320 define amdgpu_kernel void @singlethread_seq_cst_monotonic(
321 i32* %out, i32 %in, i32 %old) {
322 entry:
323 %gep = getelementptr i32, i32* %out, i32 4
324 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst monotonic
325 ret void
326 }
327
328 ; GCN-LABEL: {{^}}singlethread_acquire_acquire:
329 ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
330 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
331 ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
332 ; GCN-NOT: buffer_wbinvl1_vol
333 define amdgpu_kernel void @singlethread_acquire_acquire(
334 i32* %out, i32 %in, i32 %old) {
335 entry:
336 %gep = getelementptr i32, i32* %out, i32 4
337 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acquire acquire
338 ret void
339 }
340
341 ; GCN-LABEL: {{^}}singlethread_release_acquire:
342 ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
343 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
344 ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
345 ; GCN-NOT: buffer_wbinvl1_vol
346 define amdgpu_kernel void @singlethread_release_acquire(
347 i32* %out, i32 %in, i32 %old) {
348 entry:
349 %gep = getelementptr i32, i32* %out, i32 4
350 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") release acquire
351 ret void
352 }
353
354 ; GCN-LABEL: {{^}}singlethread_acq_rel_acquire:
355 ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
356 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
357 ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
358 ; GCN-NOT: buffer_wbinvl1_vol
359 define amdgpu_kernel void @singlethread_acq_rel_acquire(
360 i32* %out, i32 %in, i32 %old) {
361 entry:
362 %gep = getelementptr i32, i32* %out, i32 4
363 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel acquire
364 ret void
365 }
366
367 ; GCN-LABEL: {{^}}singlethread_seq_cst_acquire:
368 ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
369 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
370 ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
371 ; GCN-NOT: buffer_wbinvl1_vol
372 define amdgpu_kernel void @singlethread_seq_cst_acquire(
373 i32* %out, i32 %in, i32 %old) {
374 entry:
375 %gep = getelementptr i32, i32* %out, i32 4
376 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst acquire
377 ret void
378 }
379
380 ; GCN-LABEL: {{^}}singlethread_seq_cst_seq_cst:
381 ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
382 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
383 ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
384 ; GCN-NOT: buffer_wbinvl1_vol
385 define amdgpu_kernel void @singlethread_seq_cst_seq_cst(
386 i32* %out, i32 %in, i32 %old) {
387 entry:
388 %gep = getelementptr i32, i32* %out, i32 4
389 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst seq_cst
390 ret void
391 }
392
393213 ; GCN-LABEL: {{^}}singlethread_one_as_monotonic_monotonic:
394214 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
395 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
396 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
397 ; GCN-NOT: buffer_wbinvl1_vol
215 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
216 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
217 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
218 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
219 ; GFX8-NOT: buffer_wbinvl1_vol
220 ; GFX10-NOT: buffer_gl{{[01]}}_inv
221 ; GFX10: .amdhsa_kernel singlethread_one_as_monotonic_monotonic
222 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
223 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
224 ; GFX10-NOT: .amdhsa_memory_ordered 0
398225 define amdgpu_kernel void @singlethread_one_as_monotonic_monotonic(
399226 i32* %out, i32 %in, i32 %old) {
400227 entry:
405232
406233 ; GCN-LABEL: {{^}}singlethread_one_as_acquire_monotonic:
407234 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
408 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
409 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
410 ; GCN-NOT: buffer_wbinvl1_vol
235 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
236 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
237 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
238 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
239 ; GFX8-NOT: buffer_wbinvl1_vol
240 ; GFX10-NOT: buffer_gl{{[01]}}_inv
241 ; GFX10: .amdhsa_kernel singlethread_one_as_acquire_monotonic
242 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
243 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
244 ; GFX10-NOT: .amdhsa_memory_ordered 0
411245 define amdgpu_kernel void @singlethread_one_as_acquire_monotonic(
412246 i32* %out, i32 %in, i32 %old) {
413247 entry:
418252
419253 ; GCN-LABEL: {{^}}singlethread_one_as_release_monotonic:
420254 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
421 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
422 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
423 ; GCN-NOT: buffer_wbinvl1_vol
255 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
256 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
257 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
258 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
259 ; Gfx8-NOT: buffer_wbinvl1_vol
260 ; GCN-NOT: buffer_gl{{[01]}}_inv
261 ; GFX10: .amdhsa_kernel singlethread_one_as_release_monotonic
262 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
263 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
264 ; GFX10-NOT: .amdhsa_memory_ordered 0
424265 define amdgpu_kernel void @singlethread_one_as_release_monotonic(
425266 i32* %out, i32 %in, i32 %old) {
426267 entry:
431272
432273 ; GCN-LABEL: {{^}}singlethread_one_as_acq_rel_monotonic:
433274 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
434 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
435 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
436 ; GCN-NOT: buffer_wbinvl1_vol
275 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
276 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
277 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
278 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
279 ; GFX8-NOT: buffer_wbinvl1_vol
280 ; GFX10-NOT: buffer_gl{{[01]}}._inv
281 ; GFX10: .amdhsa_kernel singlethread_one_as_acq_rel_monotonic
282 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
283 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
284 ; GFX10-NOT: .amdhsa_memory_ordered 0
437285 define amdgpu_kernel void @singlethread_one_as_acq_rel_monotonic(
438286 i32* %out, i32 %in, i32 %old) {
439287 entry:
444292
445293 ; GCN-LABEL: {{^}}singlethread_one_as_seq_cst_monotonic:
446294 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
447 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
448 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
449 ; GCN-NOT: buffer_wbinvl1_vol
295 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
296 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
297 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
298 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
299 ; GFX8-NOT: buffer_wbinvl1_vol
300 ; GFX10-NOT: buffer_gl{{[01]}}._inv
301 ; GFX10: .amdhsa_kernel singlethread_one_as_seq_cst_monotonic
302 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
303 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
304 ; GFX10-NOT: .amdhsa_memory_ordered 0
450305 define amdgpu_kernel void @singlethread_one_as_seq_cst_monotonic(
451306 i32* %out, i32 %in, i32 %old) {
452307 entry:
457312
458313 ; GCN-LABEL: {{^}}singlethread_one_as_acquire_acquire:
459314 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
460 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
461 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
462 ; GCN-NOT: buffer_wbinvl1_vol
315 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
316 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
317 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
318 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
319 ; GFX8-NOT: buffer_wbinvl1_vol
320 ; GFX10-NOT: buffer_gl{{[01]}}._inv
321 ; GFX10: .amdhsa_kernel singlethread_one_as_acquire_acquire
322 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
323 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
324 ; GFX10-NOT: .amdhsa_memory_ordered 0
463325 define amdgpu_kernel void @singlethread_one_as_acquire_acquire(
464326 i32* %out, i32 %in, i32 %old) {
465327 entry:
470332
471333 ; GCN-LABEL: {{^}}singlethread_one_as_release_acquire:
472334 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
473 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
474 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
475 ; GCN-NOT: buffer_wbinvl1_vol
335 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
336 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
337 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
338 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
339 ; GFX8-NOT: buffer_wbinvl1_vol
340 ; GFX10-NOT: buffer_gl{{[01]}}._inv
341 ; GFX10: .amdhsa_kernel singlethread_one_as_release_acquire
342 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
343 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
344 ; GFX10-NOT: .amdhsa_memory_ordered 0
476345 define amdgpu_kernel void @singlethread_one_as_release_acquire(
477346 i32* %out, i32 %in, i32 %old) {
478347 entry:
483352
484353 ; GCN-LABEL: {{^}}singlethread_one_as_acq_rel_acquire:
485354 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
486 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
487 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
488 ; GCN-NOT: buffer_wbinvl1_vol
355 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
356 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
357 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
358 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
359 ; GFX8-NOT: buffer_wbinvl1_vol
360 ; GFX10-NOT: buffer_gl{{[01]}}._inv
361 ; GFX10: .amdhsa_kernel singlethread_one_as_acq_rel_acquire
362 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
363 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
364 ; GFX10-NOT: .amdhsa_memory_ordered 0
489365 define amdgpu_kernel void @singlethread_one_as_acq_rel_acquire(
490366 i32* %out, i32 %in, i32 %old) {
491367 entry:
496372
497373 ; GCN-LABEL: {{^}}singlethread_one_as_seq_cst_acquire:
498374 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
499 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
500 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
501 ; GCN-NOT: buffer_wbinvl1_vol
375 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
376 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
377 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
378 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
379 ; GFX8-NOT: buffer_wbinvl1_vol
380 ; GFX10-NOT: buffer_gl{{[01]}}._inv
381 ; GFX10: .amdhsa_kernel singlethread_one_as_seq_cst_acquire
382 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
383 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
384 ; GFX10-NOT: .amdhsa_memory_ordered 0
502385 define amdgpu_kernel void @singlethread_one_as_seq_cst_acquire(
503386 i32* %out, i32 %in, i32 %old) {
504387 entry:
509392
510393 ; GCN-LABEL: {{^}}singlethread_one_as_seq_cst_seq_cst:
511394 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
512 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
513 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
514 ; GCN-NOT: buffer_wbinvl1_vol
395 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
396 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
397 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
398 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
399 ; GFX8-NOT: buffer_wbinvl1_vol
400 ; GFX10-NOT: buffer_gl{{[01]}}._inv
401 ; GFX10: .amdhsa_kernel singlethread_one_as_seq_cst_seq_cst
402 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
403 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
404 ; GFX10-NOT: .amdhsa_memory_ordered 0
515405 define amdgpu_kernel void @singlethread_one_as_seq_cst_seq_cst(
516406 i32* %out, i32 %in, i32 %old) {
517407 entry:
520410 ret void
521411 }
522412
523 ; GCN-LABEL: {{^}}agent_monotonic_monotonic:
524 ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
525 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
526 ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
527 ; GCN-NOT: buffer_wbinvl1_vol
528 define amdgpu_kernel void @agent_monotonic_monotonic(
529 i32* %out, i32 %in, i32 %old) {
530 entry:
531 %gep = getelementptr i32, i32* %out, i32 4
532 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") monotonic monotonic
533 ret void
534 }
535
536 ; GCN-LABEL: {{^}}agent_acquire_monotonic:
537 ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
538 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
539 ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
540 ; GFX8-NEXT: buffer_wbinvl1_vol
541 define amdgpu_kernel void @agent_acquire_monotonic(
542 i32* %out, i32 %in, i32 %old) {
543 entry:
544 %gep = getelementptr i32, i32* %out, i32 4
545 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acquire monotonic
546 ret void
547 }
548
549 ; GCN-LABEL: {{^}}agent_release_monotonic:
550 ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
551 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
552 ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
553 ; GCN-NOT: buffer_wbinvl1_vol
554 define amdgpu_kernel void @agent_release_monotonic(
555 i32* %out, i32 %in, i32 %old) {
556 entry:
557 %gep = getelementptr i32, i32* %out, i32 4
558 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") release monotonic
559 ret void
560 }
561
562 ; GCN-LABEL: {{^}}agent_acq_rel_monotonic:
563 ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
564 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
565 ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
566 ; GFX8-NEXT: buffer_wbinvl1_vol
567 define amdgpu_kernel void @agent_acq_rel_monotonic(
568 i32* %out, i32 %in, i32 %old) {
569 entry:
570 %gep = getelementptr i32, i32* %out, i32 4
571 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acq_rel monotonic
572 ret void
573 }
574
575 ; GCN-LABEL: {{^}}agent_seq_cst_monotonic:
576 ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
577 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
578 ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
579 ; GFX8-NEXT: buffer_wbinvl1_vol
580 define amdgpu_kernel void @agent_seq_cst_monotonic(
581 i32* %out, i32 %in, i32 %old) {
582 entry:
583 %gep = getelementptr i32, i32* %out, i32 4
584 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") seq_cst monotonic
585 ret void
586 }
587
588 ; GCN-LABEL: {{^}}agent_acquire_acquire:
589 ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
590 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
591 ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
592 ; GFX8-NEXT: buffer_wbinvl1_vol
593 define amdgpu_kernel void @agent_acquire_acquire(
594 i32* %out, i32 %in, i32 %old) {
595 entry:
596 %gep = getelementptr i32, i32* %out, i32 4
597 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acquire acquire
598 ret void
599 }
600
601 ; GCN-LABEL: {{^}}agent_release_acquire:
602 ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
603 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
604 ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
605 ; GFX8-NEXT: buffer_wbinvl1_vol
606 define amdgpu_kernel void @agent_release_acquire(
607 i32* %out, i32 %in, i32 %old) {
608 entry:
609 %gep = getelementptr i32, i32* %out, i32 4
610 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") release acquire
611 ret void
612 }
613
614 ; GCN-LABEL: {{^}}agent_acq_rel_acquire:
615 ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
616 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
617 ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
618 ; GFX8-NEXT: buffer_wbinvl1_vol
619 define amdgpu_kernel void @agent_acq_rel_acquire(
620 i32* %out, i32 %in, i32 %old) {
621 entry:
622 %gep = getelementptr i32, i32* %out, i32 4
623 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acq_rel acquire
624 ret void
625 }
626
627 ; GCN-LABEL: {{^}}agent_seq_cst_acquire:
628 ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
629 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
630 ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
631 ; GFX8-NEXT: buffer_wbinvl1_vol
632 define amdgpu_kernel void @agent_seq_cst_acquire(
633 i32* %out, i32 %in, i32 %old) {
634 entry:
635 %gep = getelementptr i32, i32* %out, i32 4
636 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") seq_cst acquire
637 ret void
638 }
639
640 ; GCN-LABEL: {{^}}agent_seq_cst_seq_cst:
641 ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
642 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
643 ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
644 ; GFX8-NEXT: buffer_wbinvl1_vol
645 define amdgpu_kernel void @agent_seq_cst_seq_cst(
646 i32* %out, i32 %in, i32 %old) {
647 entry:
648 %gep = getelementptr i32, i32* %out, i32 4
649 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
650 ret void
651 }
652
653413 ; GCN-LABEL: {{^}}agent_one_as_monotonic_monotonic:
654414 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
655 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
656 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
657 ; GCN-NOT: buffer_wbinvl1_vol
415 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
416 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
417 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
418 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
419 ; GFX8-NOT: buffer_wbinvl1_vol
420 ; GFX10-NOT: buffer_gl{{[01]}}._inv
421 ; GFX10: .amdhsa_kernel agent_one_as_monotonic_monotonic
422 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
423 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
424 ; GFX10-NOT: .amdhsa_memory_ordered 0
658425 define amdgpu_kernel void @agent_one_as_monotonic_monotonic(
659426 i32* %out, i32 %in, i32 %old) {
660427 entry:
665432
666433 ; GCN-LABEL: {{^}}agent_one_as_acquire_monotonic:
667434 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
435 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
668436 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
669 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
670 ; GFX8-NEXT: buffer_wbinvl1_vol
437 ; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
438 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
439 ; GFX8-NEXT: buffer_wbinvl1_vol
440 ; GFX10-NEXT: buffer_gl0_inv
441 ; GFX10-NEXT: buffer_gl1_inv
442 ; GFX10: .amdhsa_kernel agent_one_as_acquire_monotonic
443 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
444 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
445 ; GFX10-NOT: .amdhsa_memory_ordered 0
671446 define amdgpu_kernel void @agent_one_as_acquire_monotonic(
672447 i32* %out, i32 %in, i32 %old) {
673448 entry:
678453
679454 ; GCN-LABEL: {{^}}agent_one_as_release_monotonic:
680455 ; GCN: s_waitcnt vmcnt(0){{$}}
456 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
681457 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
682458 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
683 ; GCN-NOT: buffer_wbinvl1_vol
459 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
460 ; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
461 ; GFX10: .amdhsa_kernel agent_one_as_release_monotonic
462 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
463 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
464 ; GFX10-NOT: .amdhsa_memory_ordered 0
684465 define amdgpu_kernel void @agent_one_as_release_monotonic(
685466 i32* %out, i32 %in, i32 %old) {
686467 entry:
691472
692473 ; GCN-LABEL: {{^}}agent_one_as_acq_rel_monotonic:
693474 ; GCN: s_waitcnt vmcnt(0){{$}}
475 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
694476 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
695 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
696 ; GFX8-NEXT: buffer_wbinvl1_vol
477 ; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
478 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
479 ; GFX8-NEXT: buffer_wbinvl1_vol
480 ; GFX10-NEXT: buffer_gl0_inv
481 ; GFX10-NEXT: buffer_gl1_inv
482 ; GFX10: .amdhsa_kernel agent_one_as_acq_rel_monotonic
483 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
484 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
485 ; GFX10-NOT: .amdhsa_memory_ordered 0
697486 define amdgpu_kernel void @agent_one_as_acq_rel_monotonic(
698487 i32* %out, i32 %in, i32 %old) {
699488 entry:
704493
705494 ; GCN-LABEL: {{^}}agent_one_as_seq_cst_monotonic:
706495 ; GCN: s_waitcnt vmcnt(0){{$}}
496 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
707497 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
708 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
709 ; GFX8-NEXT: buffer_wbinvl1_vol
498 ; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
499 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
500 ; GFX8-NEXT: buffer_wbinvl1_vol
501 ; GFX10-NEXT: buffer_gl0_inv
502 ; GFX10-NEXT: buffer_gl1_inv
503 ; GFX10: .amdhsa_kernel agent_one_as_seq_cst_monotonic
504 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
505 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
506 ; GFX10-NOT: .amdhsa_memory_ordered 0
710507 define amdgpu_kernel void @agent_one_as_seq_cst_monotonic(
711508 i32* %out, i32 %in, i32 %old) {
712509 entry:
717514
718515 ; GCN-LABEL: {{^}}agent_one_as_acquire_acquire:
719516 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
517 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
720518 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
721 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
722 ; GFX8-NEXT: buffer_wbinvl1_vol
519 ; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
520 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
521 ; GFX8-NEXT: buffer_wbinvl1_vol
522 ; GFX10-NEXT: buffer_gl0_inv
523 ; GFX10-NEXT: buffer_gl1_inv
524 ; GFX10: .amdhsa_kernel agent_one_as_acquire_acquire
525 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
526 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
527 ; GFX10-NOT: .amdhsa_memory_ordered 0
723528 define amdgpu_kernel void @agent_one_as_acquire_acquire(
724529 i32* %out, i32 %in, i32 %old) {
725530 entry:
730535
731536 ; GCN-LABEL: {{^}}agent_one_as_release_acquire:
732537 ; GCN: s_waitcnt vmcnt(0){{$}}
538 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
733539 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
734 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
735 ; GFX8-NEXT: buffer_wbinvl1_vol
540 ; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
541 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
542 ; GFX8-NEXT: buffer_wbinvl1_vol
543 ; GFX10-NEXT: buffer_gl0_inv
544 ; GFX10-NEXT: buffer_gl1_inv
545 ; GFX10: .amdhsa_kernel agent_one_as_release_acquire
546 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
547 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
548 ; GFX10-NOT: .amdhsa_memory_ordered 0
736549 define amdgpu_kernel void @agent_one_as_release_acquire(
737550 i32* %out, i32 %in, i32 %old) {
738551 entry:
743556
744557 ; GCN-LABEL: {{^}}agent_one_as_acq_rel_acquire:
745558 ; GCN: s_waitcnt vmcnt(0){{$}}
559 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
746560 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
747 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
748 ; GFX8-NEXT: buffer_wbinvl1_vol
561 ; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
562 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
563 ; GFX8-NEXT: buffer_wbinvl1_vol
564 ; GFX10-NEXT: buffer_gl0_inv
565 ; GFX10-NEXT: buffer_gl1_inv
566 ; GFX10: .amdhsa_kernel agent_one_as_acq_rel_acquire
567 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
568 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
569 ; GFX10-NOT: .amdhsa_memory_ordered 0
749570 define amdgpu_kernel void @agent_one_as_acq_rel_acquire(
750571 i32* %out, i32 %in, i32 %old) {
751572 entry:
756577
757578 ; GCN-LABEL: {{^}}agent_one_as_seq_cst_acquire:
758579 ; GCN: s_waitcnt vmcnt(0){{$}}
580 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
759581 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
760 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
761 ; GFX8-NEXT: buffer_wbinvl1_vol
582 ; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
583 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
584 ; GFX8-NEXT: buffer_wbinvl1_vol
585 ; GFX10-NEXT: buffer_gl0_inv
586 ; GFX10-NEXT: buffer_gl1_inv
587 ; GFX10: .amdhsa_kernel agent_one_as_seq_cst_acquire
588 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
589 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
590 ; GFX10-NOT: .amdhsa_memory_ordered 0
762591 define amdgpu_kernel void @agent_one_as_seq_cst_acquire(
763592 i32* %out, i32 %in, i32 %old) {
764593 entry:
769598
770599 ; GCN-LABEL: {{^}}agent_one_as_seq_cst_seq_cst:
771600 ; GCN: s_waitcnt vmcnt(0){{$}}
601 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
772602 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
603 ; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
604 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
605 ; GFX8-NEXT: buffer_wbinvl1_vol
606 ; GFX10-NEXT: buffer_gl0_inv
607 ; GFX10-NEXT: buffer_gl1_inv
608 ; GFX10: .amdhsa_kernel agent_one_as_seq_cst_seq_cst
609 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
610 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
611 ; GFX10-NOT: .amdhsa_memory_ordered 0
612 define amdgpu_kernel void @agent_one_as_seq_cst_seq_cst(
613 i32* %out, i32 %in, i32 %old) {
614 entry:
615 %gep = getelementptr i32, i32* %out, i32 4
616 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst seq_cst
617 ret void
618 }
619
620 ; GCN-LABEL: {{^}}workgroup_one_as_monotonic_monotonic:
621 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
622 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
623 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
624 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
625 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
626 ; GFX8-NOT: buffer_wbinvl1_vol
627 ; GFX10-NOT: buffer_gl{{[01]}}._inv
628 ; GFX10: .amdhsa_kernel workgroup_one_as_monotonic_monotonic
629 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
630 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
631 ; GFX10-NOT: .amdhsa_memory_ordered 0
632 define amdgpu_kernel void @workgroup_one_as_monotonic_monotonic(
633 i32* %out, i32 %in, i32 %old) {
634 entry:
635 %gep = getelementptr i32, i32* %out, i32 4
636 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") monotonic monotonic
637 ret void
638 }
639
640 ; GCN-LABEL: {{^}}workgroup_one_as_acquire_monotonic:
641 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
642 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
643 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
644 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
645 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
646 ; GFX10WGP-NEXT: buffer_gl0_inv
647 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
648 ; GFX10CU-NOT: buffer_gl0_inv
649 ; GFX8-NOT: buffer_wbinvl1_vol
650 ; GFX10: .amdhsa_kernel workgroup_one_as_acquire_monotonic
651 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
652 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
653 ; GFX10-NOT: .amdhsa_memory_ordered 0
654 define amdgpu_kernel void @workgroup_one_as_acquire_monotonic(
655 i32* %out, i32 %in, i32 %old) {
656 entry:
657 %gep = getelementptr i32, i32* %out, i32 4
658 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire monotonic
659 ret void
660 }
661
662 ; GCN-LABEL: {{^}}workgroup_one_as_release_monotonic:
663 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
664 ; GFX10WGP: s_waitcnt vmcnt(0){{$}}
665 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
666 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
667 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
668 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
669 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
670 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
671 ; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
672 ; GFX10: .amdhsa_kernel workgroup_one_as_release_monotonic
673 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
674 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
675 ; GFX10-NOT: .amdhsa_memory_ordered 0
676 define amdgpu_kernel void @workgroup_one_as_release_monotonic(
677 i32* %out, i32 %in, i32 %old) {
678 entry:
679 %gep = getelementptr i32, i32* %out, i32 4
680 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release monotonic
681 ret void
682 }
683
684 ; GCN-LABEL: {{^}}workgroup_one_as_acq_rel_monotonic:
685 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
686 ; GFX10WGP: s_waitcnt vmcnt(0){{$}}
687 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
688 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
689 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
690 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
691 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
692 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
693 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
694 ; GFX8-NOT: buffer_wbinvl1_vol
695 ; GFX10WGP-NEXT: buffer_gl0_inv
696 ; GFX10CU-NOT: buffer_gl0_inv
697 ; GFX10: .amdhsa_kernel workgroup_one_as_acq_rel_monotonic
698 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
699 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
700 ; GFX10-NOT: .amdhsa_memory_ordered 0
701 define amdgpu_kernel void @workgroup_one_as_acq_rel_monotonic(
702 i32* %out, i32 %in, i32 %old) {
703 entry:
704 %gep = getelementptr i32, i32* %out, i32 4
705 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel monotonic
706 ret void
707 }
708
709 ; GCN-LABEL: {{^}}workgroup_one_as_seq_cst_monotonic:
710 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
711 ; GFX10WGP: s_waitcnt vmcnt(0){{$}}
712 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
713 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
714 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
715 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
716 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
717 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
718 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
719 ; GFX8-NOT: buffer_wbinvl1_vol
720 ; GFX10WGP-NEXT: buffer_gl0_inv
721 ; GFX10CU-NOT: buffer_gl0_inv
722 ; GFX10: .amdhsa_kernel workgroup_one_as_seq_cst_monotonic
723 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
724 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
725 ; GFX10-NOT: .amdhsa_memory_ordered 0
726 define amdgpu_kernel void @workgroup_one_as_seq_cst_monotonic(
727 i32* %out, i32 %in, i32 %old) {
728 entry:
729 %gep = getelementptr i32, i32* %out, i32 4
730 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst monotonic
731 ret void
732 }
733
734 ; GCN-LABEL: {{^}}workgroup_one_as_acquire_acquire:
735 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
736 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
737 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
738 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
739 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
740 ; GFX10WGP-NEXT: buffer_gl0_inv
741 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
742 ; GFX10CU-NOT: buffer_gl0_inv
743 ; GFX8-NOT: buffer_wbinvl1_vol
744 ; GFX10: .amdhsa_kernel workgroup_one_as_acquire_acquire
745 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
746 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
747 ; GFX10-NOT: .amdhsa_memory_ordered 0
748 define amdgpu_kernel void @workgroup_one_as_acquire_acquire(
749 i32* %out, i32 %in, i32 %old) {
750 entry:
751 %gep = getelementptr i32, i32* %out, i32 4
752 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire acquire
753 ret void
754 }
755
756 ; GCN-LABEL: {{^}}workgroup_one_as_release_acquire:
757 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
758 ; GFX10WGP: s_waitcnt vmcnt(0){{$}}
759 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
760 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
761 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
762 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
763 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
764 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
765 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
766 ; GFX8-NOT: buffer_wbinvl1_vol
767 ; GFX10WGP-NEXT: buffer_gl0_inv
768 ; GFX10CU-NOT: buffer_gl0_inv
769 ; GFX10: .amdhsa_kernel workgroup_one_as_release_acquire
770 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
771 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
772 ; GFX10-NOT: .amdhsa_memory_ordered 0
773 define amdgpu_kernel void @workgroup_one_as_release_acquire(
774 i32* %out, i32 %in, i32 %old) {
775 entry:
776 %gep = getelementptr i32, i32* %out, i32 4
777 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release acquire
778 ret void
779 }
780
781 ; GCN-LABEL: {{^}}workgroup_one_as_acq_rel_acquire:
782 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
783 ; GFX10WGP: s_waitcnt vmcnt(0){{$}}
784 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
785 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
786 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
787 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
788 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
789 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
790 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
791 ; GFX8-NOT: buffer_wbinvl1_vol
792 ; GFX10WGP: buffer_gl0_inv
793 ; GFX10CU-NOT: buffer_gl0_inv
794 ; GFX10: .amdhsa_kernel workgroup_one_as_acq_rel_acquire
795 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
796 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
797 ; GFX10-NOT: .amdhsa_memory_ordered 0
798 define amdgpu_kernel void @workgroup_one_as_acq_rel_acquire(
799 i32* %out, i32 %in, i32 %old) {
800 entry:
801 %gep = getelementptr i32, i32* %out, i32 4
802 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel acquire
803 ret void
804 }
805
806 ; GCN-LABEL: {{^}}workgroup_one_as_seq_cst_acquire:
807 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
808 ; GFX10WGP: s_waitcnt vmcnt(0){{$}}
809 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
810 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
811 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
812 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
813 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
814 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
815 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
816 ; GFX8-NOT: buffer_wbinvl1_vol
817 ; GFX10WGP-NEXT: buffer_gl0_inv
818 ; GFX10CU-NOT: buffer_gl0_inv
819 ; GFX10: .amdhsa_kernel workgroup_one_as_seq_cst_acquire
820 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
821 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
822 ; GFX10-NOT: .amdhsa_memory_ordered 0
823 define amdgpu_kernel void @workgroup_one_as_seq_cst_acquire(
824 i32* %out, i32 %in, i32 %old) {
825 entry:
826 %gep = getelementptr i32, i32* %out, i32 4
827 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst acquire
828 ret void
829 }
830
831 ; GCN-LABEL: {{^}}workgroup_one_as_seq_cst_seq_cst:
832 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
833 ; GFX10WGP: s_waitcnt vmcnt(0){{$}}
834 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
835 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
836 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
837 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
838 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
839 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
840 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
841 ; GFX8-NOT: buffer_wbinvl1_vol
842 ; GFX10WGP: buffer_gl0_inv
843 ; GFX10CU-NOT: buffer_gl0_inv
844 ; GFX10: .amdhsa_kernel workgroup_one_as_seq_cst_seq_cst
845 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
846 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
847 ; GFX10-NOT: .amdhsa_memory_ordered 0
848 define amdgpu_kernel void @workgroup_one_as_seq_cst_seq_cst(
849 i32* %out, i32 %in, i32 %old) {
850 entry:
851 %gep = getelementptr i32, i32* %out, i32 4
852 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst seq_cst
853 ret void
854 }
855
856 ; GCN-LABEL: {{^}}wavefront_one_as_monotonic_monotonic:
857 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
858 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
859 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
860 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
861 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
862 ; GFX8-NOT: buffer_wbinvl1_vol
863 ; GFX10-NOT: buffer_gl{{[01]}}._inv
864 ; GFX10: .amdhsa_kernel wavefront_one_as_monotonic_monotonic
865 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
866 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
867 ; GFX10-NOT: .amdhsa_memory_ordered 0
868 define amdgpu_kernel void @wavefront_one_as_monotonic_monotonic(
869 i32* %out, i32 %in, i32 %old) {
870 entry:
871 %gep = getelementptr i32, i32* %out, i32 4
872 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic monotonic
873 ret void
874 }
875
876 ; GCN-LABEL: {{^}}wavefront_one_as_acquire_monotonic:
877 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
878 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
879 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
880 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
881 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
882 ; GFX8-NOT: buffer_wbinvl1_vol
883 ; GFX10-NOT: buffer_gl{{[01]}}._inv
884 ; GFX10: .amdhsa_kernel wavefront_one_as_acquire_monotonic
885 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
886 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
887 ; GFX10-NOT: .amdhsa_memory_ordered 0
888 define amdgpu_kernel void @wavefront_one_as_acquire_monotonic(
889 i32* %out, i32 %in, i32 %old) {
890 entry:
891 %gep = getelementptr i32, i32* %out, i32 4
892 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire monotonic
893 ret void
894 }
895
896 ; GCN-LABEL: {{^}}wavefront_one_as_release_monotonic:
897 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
898 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
899 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
900 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
901 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
902 ; GFX8-NOT: buffer_wbinvl1_vol
903 ; GFX10-NOT: buffer_gl{{[01]}}._inv
904 ; GFX10: .amdhsa_kernel wavefront_one_as_release_monotonic
905 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
906 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
907 ; GFX10-NOT: .amdhsa_memory_ordered 0
908 define amdgpu_kernel void @wavefront_one_as_release_monotonic(
909 i32* %out, i32 %in, i32 %old) {
910 entry:
911 %gep = getelementptr i32, i32* %out, i32 4
912 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release monotonic
913 ret void
914 }
915
916 ; GCN-LABEL: {{^}}wavefront_one_as_acq_rel_monotonic:
917 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
918 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
919 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
920 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
921 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
922 ; GFX8-NOT: buffer_wbinvl1_vol
923 ; GFX10-NOT: buffer_gl{{[01]}}._inv
924 ; GFX10: .amdhsa_kernel wavefront_one_as_acq_rel_monotonic
925 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
926 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
927 ; GFX10-NOT: .amdhsa_memory_ordered 0
928 define amdgpu_kernel void @wavefront_one_as_acq_rel_monotonic(
929 i32* %out, i32 %in, i32 %old) {
930 entry:
931 %gep = getelementptr i32, i32* %out, i32 4
932 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel monotonic
933 ret void
934 }
935
936 ; GCN-LABEL: {{^}}wavefront_one_as_seq_cst_monotonic:
937 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
938 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
939 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
940 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
941 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
942 ; GFX8-NOT: buffer_wbinvl1_vol
943 ; GFX10-NOT: buffer_gl{{[01]}}._inv
944 ; GFX10: .amdhsa_kernel wavefront_one_as_seq_cst_monotonic
945 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
946 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
947 ; GFX10-NOT: .amdhsa_memory_ordered 0
948 define amdgpu_kernel void @wavefront_one_as_seq_cst_monotonic(
949 i32* %out, i32 %in, i32 %old) {
950 entry:
951 %gep = getelementptr i32, i32* %out, i32 4
952 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst monotonic
953 ret void
954 }
955
956 ; GCN-LABEL: {{^}}wavefront_one_as_acquire_acquire:
957 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
958 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
959 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
960 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
961 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
962 ; GFX8-NOT: buffer_wbinvl1_vol
963 ; GFX10-NOT: buffer_gl{{[01]}}._inv
964 ; GFX10: .amdhsa_kernel wavefront_one_as_acquire_acquire
965 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
966 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
967 ; GFX10-NOT: .amdhsa_memory_ordered 0
968 define amdgpu_kernel void @wavefront_one_as_acquire_acquire(
969 i32* %out, i32 %in, i32 %old) {
970 entry:
971 %gep = getelementptr i32, i32* %out, i32 4
972 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire acquire
973 ret void
974 }
975
976 ; GCN-LABEL: {{^}}wavefront_one_as_release_acquire:
977 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
978 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
979 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
980 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
981 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
982 ; GFX8-NOT: buffer_wbinvl1_vol
983 ; GFX10-NOT: buffer_gl{{[01]}}._inv
984 ; GFX10: .amdhsa_kernel wavefront_one_as_release_acquire
985 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
986 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
987 ; GFX10-NOT: .amdhsa_memory_ordered 0
988 define amdgpu_kernel void @wavefront_one_as_release_acquire(
989 i32* %out, i32 %in, i32 %old) {
990 entry:
991 %gep = getelementptr i32, i32* %out, i32 4
992 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release acquire
993 ret void
994 }
995
996 ; GCN-LABEL: {{^}}wavefront_one_as_acq_rel_acquire:
997 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
998 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
999 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
1000 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1001 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1002 ; GFX8-NOT: buffer_wbinvl1_vol
1003 ; GFX10-NOT: buffer_gl{{[01]}}._inv
1004 ; GFX10: .amdhsa_kernel wavefront_one_as_acq_rel_acquire
1005 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1006 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1007 ; GFX10-NOT: .amdhsa_memory_ordered 0
1008 define amdgpu_kernel void @wavefront_one_as_acq_rel_acquire(
1009 i32* %out, i32 %in, i32 %old) {
1010 entry:
1011 %gep = getelementptr i32, i32* %out, i32 4
1012 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel acquire
1013 ret void
1014 }
1015
1016 ; GCN-LABEL: {{^}}wavefront_one_as_seq_cst_acquire:
1017 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1018 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1019 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
1020 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1021 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1022 ; GFX8-NOT: buffer_wbinvl1_vol
1023 ; GFX10-NOT: buffer_gl{{[01]}}._inv
1024 ; GFX10: .amdhsa_kernel wavefront_one_as_seq_cst_acquire
1025 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1026 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1027 ; GFX10-NOT: .amdhsa_memory_ordered 0
1028 define amdgpu_kernel void @wavefront_one_as_seq_cst_acquire(
1029 i32* %out, i32 %in, i32 %old) {
1030 entry:
1031 %gep = getelementptr i32, i32* %out, i32 4
1032 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst acquire
1033 ret void
1034 }
1035
1036 ; GCN-LABEL: {{^}}wavefront_one_as_seq_cst_seq_cst:
1037 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1038 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1039 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
1040 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1041 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1042 ; GFX8-NOT: buffer_wbinvl1_vol
1043 ; GFX10-NOT: buffer_gl{{[01]}}._inv
1044 ; GFX10: .amdhsa_kernel wavefront_one_as_seq_cst_seq_cst
1045 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1046 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1047 ; GFX10-NOT: .amdhsa_memory_ordered 0
1048 define amdgpu_kernel void @wavefront_one_as_seq_cst_seq_cst(
1049 i32* %out, i32 %in, i32 %old) {
1050 entry:
1051 %gep = getelementptr i32, i32* %out, i32 4
1052 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst seq_cst
1053 ret void
1054 }
1055
1056 ; GCN-LABEL: {{^}}system_one_as_acquire_monotonic_ret:
1057 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1058 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1059 ; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
7731060 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
7741061 ; GFX8-NEXT: buffer_wbinvl1_vol
775 define amdgpu_kernel void @agent_one_as_seq_cst_seq_cst(
1062 ; GFX10-NEXT: buffer_gl0_inv
1063 ; GFX10-NEXT: buffer_gl1_inv
1064 ; GFX10: .amdhsa_kernel system_one_as_acquire_monotonic_ret
1065 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1066 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1067 ; GFX10-NOT: .amdhsa_memory_ordered 0
1068 define amdgpu_kernel void @system_one_as_acquire_monotonic_ret(
1069 i32* %out, i32 %in, i32 %old) {
1070 entry:
1071 %gep = getelementptr i32, i32* %out, i32 4
1072 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") acquire monotonic
1073 %val0 = extractvalue { i32, i1 } %val, 0
1074 store i32 %val0, i32* %out, align 4
1075 ret void
1076 }
1077
1078 ; GCN-LABEL: {{^}}system_one_as_acq_rel_monotonic_ret:
1079 ; GCN: s_waitcnt vmcnt(0){{$}}
1080 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1081 ; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
1082 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
1083 ; GFX8-NEXT: buffer_wbinvl1_vol
1084 ; GFX10-NEXT: buffer_gl0_inv
1085 ; GFX10-NEXT: buffer_gl1_inv
1086 ; GFX10: .amdhsa_kernel system_one_as_acq_rel_monotonic_ret
1087 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1088 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1089 ; GFX10-NOT: .amdhsa_memory_ordered 0
1090 define amdgpu_kernel void @system_one_as_acq_rel_monotonic_ret(
1091 i32* %out, i32 %in, i32 %old) {
1092 entry:
1093 %gep = getelementptr i32, i32* %out, i32 4
1094 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") acq_rel monotonic
1095 %val0 = extractvalue { i32, i1 } %val, 0
1096 store i32 %val0, i32* %out, align 4
1097 ret void
1098 }
1099
1100 ; GCN-LABEL: {{^}}system_one_as_seq_cst_monotonic_ret:
1101 ; GCN: s_waitcnt vmcnt(0){{$}}
1102 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1103 ; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
1104 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
1105 ; GFX8-NEXT: buffer_wbinvl1_vol
1106 ; GFX10-NEXT: buffer_gl0_inv
1107 ; GFX10-NEXT: buffer_gl1_inv
1108 ; GFX10: .amdhsa_kernel system_one_as_seq_cst_monotonic_ret
1109 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1110 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1111 ; GFX10-NOT: .amdhsa_memory_ordered 0
1112 define amdgpu_kernel void @system_one_as_seq_cst_monotonic_ret(
1113 i32* %out, i32 %in, i32 %old) {
1114 entry:
1115 %gep = getelementptr i32, i32* %out, i32 4
1116 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") seq_cst monotonic
1117 %val0 = extractvalue { i32, i1 } %val, 0
1118 store i32 %val0, i32* %out, align 4
1119 ret void
1120 }
1121
1122 ; GCN-LABEL: {{^}}system_one_as_acquire_acquire_ret:
1123 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1124 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1125 ; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
1126 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
1127 ; GFX8-NEXT: buffer_wbinvl1_vol
1128 ; GFX10-NEXT: buffer_gl0_inv
1129 ; GFX10-NEXT: buffer_gl1_inv
1130 ; GFX10: .amdhsa_kernel system_one_as_acquire_acquire_ret
1131 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1132 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1133 ; GFX10-NOT: .amdhsa_memory_ordered 0
1134 define amdgpu_kernel void @system_one_as_acquire_acquire_ret(
1135 i32* %out, i32 %in, i32 %old) {
1136 entry:
1137 %gep = getelementptr i32, i32* %out, i32 4
1138 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") acquire acquire
1139 %val0 = extractvalue { i32, i1 } %val, 0
1140 store i32 %val0, i32* %out, align 4
1141 ret void
1142 }
1143
1144 ; GCN-LABEL: {{^}}system_one_as_release_acquire_ret:
1145 ; GCN: s_waitcnt vmcnt(0){{$}}
1146 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1147 ; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
1148 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
1149 ; GFX8-NEXT: buffer_wbinvl1_vol
1150 ; GFX10-NEXT: buffer_gl0_inv
1151 ; GFX10-NEXT: buffer_gl1_inv
1152 ; GFX10: .amdhsa_kernel system_one_as_release_acquire_ret
1153 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1154 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1155 ; GFX10-NOT: .amdhsa_memory_ordered 0
1156 define amdgpu_kernel void @system_one_as_release_acquire_ret(
1157 i32* %out, i32 %in, i32 %old) {
1158 entry:
1159 %gep = getelementptr i32, i32* %out, i32 4
1160 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") release acquire
1161 %val0 = extractvalue { i32, i1 } %val, 0
1162 store i32 %val0, i32* %out, align 4
1163 ret void
1164 }
1165
1166 ; GCN-LABEL: {{^}}system_one_as_acq_rel_acquire_ret:
1167 ; GCN: s_waitcnt vmcnt(0){{$}}
1168 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1169 ; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
1170 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
1171 ; GFX8-NEXT: buffer_wbinvl1_vol
1172 ; GFX10-NEXT: buffer_gl0_inv
1173 ; GFX10-NEXT: buffer_gl1_inv
1174 ; GFX10: .amdhsa_kernel system_one_as_acq_rel_acquire_ret
1175 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1176 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1177 ; GFX10-NOT: .amdhsa_memory_ordered 0
1178 define amdgpu_kernel void @system_one_as_acq_rel_acquire_ret(
1179 i32* %out, i32 %in, i32 %old) {
1180 entry:
1181 %gep = getelementptr i32, i32* %out, i32 4
1182 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") acq_rel acquire
1183 %val0 = extractvalue { i32, i1 } %val, 0
1184 store i32 %val0, i32* %out, align 4
1185 ret void
1186 }
1187
1188 ; GCN-LABEL: {{^}}system_one_as_seq_cst_acquire_ret:
1189 ; GCN: s_waitcnt vmcnt(0){{$}}
1190 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1191 ; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
1192 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
1193 ; GFX8-NEXT: buffer_wbinvl1_vol
1194 ; GFX10-NEXT: buffer_gl0_inv
1195 ; GFX10-NEXT: buffer_gl1_inv
1196 ; GFX10: .amdhsa_kernel system_one_as_seq_cst_acquire_ret
1197 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1198 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1199 ; GFX10-NOT: .amdhsa_memory_ordered 0
1200 define amdgpu_kernel void @system_one_as_seq_cst_acquire_ret(
1201 i32* %out, i32 %in, i32 %old) {
1202 entry:
1203 %gep = getelementptr i32, i32* %out, i32 4
1204 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") seq_cst acquire
1205 %val0 = extractvalue { i32, i1 } %val, 0
1206 store i32 %val0, i32* %out, align 4
1207 ret void
1208 }
1209
1210 ; GCN-LABEL: {{^}}system_one_as_seq_cst_seq_cst_ret:
1211 ; GCN: s_waitcnt vmcnt(0){{$}}
1212 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1213 ; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
1214 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
1215 ; GFX8-NEXT: buffer_wbinvl1_vol
1216 ; GFX10-NEXT: buffer_gl0_inv
1217 ; GFX10-NEXT: buffer_gl1_inv
1218 ; GFX10: .amdhsa_kernel system_one_as_seq_cst_seq_cst_ret
1219 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1220 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1221 ; GFX10-NOT: .amdhsa_memory_ordered 0
1222 define amdgpu_kernel void @system_one_as_seq_cst_seq_cst_ret(
1223 i32* %out, i32 %in, i32 %old) {
1224 entry:
1225 %gep = getelementptr i32, i32* %out, i32 4
1226 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") seq_cst seq_cst
1227 %val0 = extractvalue { i32, i1 } %val, 0
1228 store i32 %val0, i32* %out, align 4
1229 ret void
1230 }
1231
1232 ; GCN-LABEL: {{^}}agent_one_as_acquire_monotonic_ret:
1233 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1234 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1235 ; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
1236 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
1237 ; GFX8-NEXT: buffer_wbinvl1_vol
1238 ; GFX10-NEXT: buffer_gl0_inv
1239 ; GFX10-NEXT: buffer_gl1_inv
1240 ; GFX10: .amdhsa_kernel agent_one_as_acquire_monotonic_ret
1241 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1242 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1243 ; GFX10-NOT: .amdhsa_memory_ordered 0
1244 define amdgpu_kernel void @agent_one_as_acquire_monotonic_ret(
1245 i32* %out, i32 %in, i32 %old) {
1246 entry:
1247 %gep = getelementptr i32, i32* %out, i32 4
1248 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire monotonic
1249 %val0 = extractvalue { i32, i1 } %val, 0
1250 store i32 %val0, i32* %out, align 4
1251 ret void
1252 }
1253
1254 ; GCN-LABEL: {{^}}agent_one_as_acq_rel_monotonic_ret:
1255 ; GCN: s_waitcnt vmcnt(0){{$}}
1256 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1257 ; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
1258 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
1259 ; GFX8-NEXT: buffer_wbinvl1_vol
1260 ; GFX10-NEXT: buffer_gl0_inv
1261 ; GFX10-NEXT: buffer_gl1_inv
1262 ; GFX10: .amdhsa_kernel agent_one_as_acq_rel_monotonic_ret
1263 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1264 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1265 ; GFX10-NOT: .amdhsa_memory_ordered 0
1266 define amdgpu_kernel void @agent_one_as_acq_rel_monotonic_ret(
1267 i32* %out, i32 %in, i32 %old) {
1268 entry:
1269 %gep = getelementptr i32, i32* %out, i32 4
1270 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel monotonic
1271 %val0 = extractvalue { i32, i1 } %val, 0
1272 store i32 %val0, i32* %out, align 4
1273 ret void
1274 }
1275
1276 ; GCN-LABEL: {{^}}agent_one_as_seq_cst_monotonic_ret:
1277 ; GCN: s_waitcnt vmcnt(0){{$}}
1278 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1279 ; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
1280 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
1281 ; GFX8-NEXT: buffer_wbinvl1_vol
1282 ; GFX10-NEXT: buffer_gl0_inv
1283 ; GFX10-NEXT: buffer_gl1_inv
1284 ; GFX10: .amdhsa_kernel agent_one_as_seq_cst_monotonic_ret
1285 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1286 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1287 ; GFX10-NOT: .amdhsa_memory_ordered 0
1288 define amdgpu_kernel void @agent_one_as_seq_cst_monotonic_ret(
1289 i32* %out, i32 %in, i32 %old) {
1290 entry:
1291 %gep = getelementptr i32, i32* %out, i32 4
1292 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst monotonic
1293 %val0 = extractvalue { i32, i1 } %val, 0
1294 store i32 %val0, i32* %out, align 4
1295 ret void
1296 }
1297
1298 ; GCN-LABEL: {{^}}agent_one_as_acquire_acquire_ret:
1299 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1300 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1301 ; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
1302 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
1303 ; GFX8-NEXT: buffer_wbinvl1_vol
1304 ; GFX10-NEXT: buffer_gl0_inv
1305 ; GFX10-NEXT: buffer_gl1_inv
1306 ; GFX10: .amdhsa_kernel agent_one_as_acquire_acquire_ret
1307 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1308 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1309 ; GFX10-NOT: .amdhsa_memory_ordered 0
1310 define amdgpu_kernel void @agent_one_as_acquire_acquire_ret(
1311 i32* %out, i32 %in, i32 %old) {
1312 entry:
1313 %gep = getelementptr i32, i32* %out, i32 4
1314 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire acquire
1315 %val0 = extractvalue { i32, i1 } %val, 0
1316 store i32 %val0, i32* %out, align 4
1317 ret void
1318 }
1319
1320 ; GCN-LABEL: {{^}}agent_one_as_release_acquire_ret:
1321 ; GCN: s_waitcnt vmcnt(0){{$}}
1322 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1323 ; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
1324 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
1325 ; GFX8-NEXT: buffer_wbinvl1_vol
1326 ; GFX10-NEXT: buffer_gl0_inv
1327 ; GFX10-NEXT: buffer_gl1_inv
1328 ; GFX10: .amdhsa_kernel agent_one_as_release_acquire_ret
1329 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1330 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1331 ; GFX10-NOT: .amdhsa_memory_ordered 0
1332 define amdgpu_kernel void @agent_one_as_release_acquire_ret(
1333 i32* %out, i32 %in, i32 %old) {
1334 entry:
1335 %gep = getelementptr i32, i32* %out, i32 4
1336 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") release acquire
1337 %val0 = extractvalue { i32, i1 } %val, 0
1338 store i32 %val0, i32* %out, align 4
1339 ret void
1340 }
1341
1342 ; GCN-LABEL: {{^}}agent_one_as_acq_rel_acquire_ret:
1343 ; GCN: s_waitcnt vmcnt(0){{$}}
1344 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1345 ; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
1346 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
1347 ; GFX8-NEXT: buffer_wbinvl1_vol
1348 ; GFX10-NEXT: buffer_gl0_inv
1349 ; GFX10-NEXT: buffer_gl1_inv
1350 ; GFX10: .amdhsa_kernel agent_one_as_acq_rel_acquire_ret
1351 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1352 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1353 ; GFX10-NOT: .amdhsa_memory_ordered 0
1354 define amdgpu_kernel void @agent_one_as_acq_rel_acquire_ret(
1355 i32* %out, i32 %in, i32 %old) {
1356 entry:
1357 %gep = getelementptr i32, i32* %out, i32 4
1358 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel acquire
1359 %val0 = extractvalue { i32, i1 } %val, 0
1360 store i32 %val0, i32* %out, align 4
1361 ret void
1362 }
1363
1364 ; GCN-LABEL: {{^}}agent_one_as_seq_cst_acquire_ret:
1365 ; GCN: s_waitcnt vmcnt(0){{$}}
1366 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1367 ; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
1368 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
1369 ; GFX8-NEXT: buffer_wbinvl1_vol
1370 ; GFX10-NEXT: buffer_gl0_inv
1371 ; GFX10-NEXT: buffer_gl1_inv
1372 ; GFX10: .amdhsa_kernel agent_one_as_seq_cst_acquire_ret
1373 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1374 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1375 ; GFX10-NOT: .amdhsa_memory_ordered 0
1376 define amdgpu_kernel void @agent_one_as_seq_cst_acquire_ret(
1377 i32* %out, i32 %in, i32 %old) {
1378 entry:
1379 %gep = getelementptr i32, i32* %out, i32 4
1380 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst acquire
1381 %val0 = extractvalue { i32, i1 } %val, 0
1382 store i32 %val0, i32* %out, align 4
1383 ret void
1384 }
1385
1386 ; GCN-LABEL: {{^}}agent_one_as_seq_cst_seq_cst_ret:
1387 ; GCN: s_waitcnt vmcnt(0){{$}}
1388 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1389 ; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
1390 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
1391 ; GFX8-NEXT: buffer_wbinvl1_vol
1392 ; GFX10-NEXT: buffer_gl0_inv
1393 ; GFX10-NEXT: buffer_gl1_inv
1394 ; GFX10: .amdhsa_kernel agent_one_as_seq_cst_seq_cst_ret
1395 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1396 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1397 ; GFX10-NOT: .amdhsa_memory_ordered 0
1398 define amdgpu_kernel void @agent_one_as_seq_cst_seq_cst_ret(
7761399 i32* %out, i32 %in, i32 %old) {
7771400 entry:
7781401 %gep = getelementptr i32, i32* %out, i32 4
7791402 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst seq_cst
1403 %val0 = extractvalue { i32, i1 } %val, 0
1404 store i32 %val0, i32* %out, align 4
1405 ret void
1406 }
1407
1408 ; GCN-LABEL: {{^}}workgroup_one_as_acquire_monotonic_ret:
1409 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1410 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1411 ; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
1412 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
1413 ; GFX10WGP-NEXT: s_waitcnt vmcnt(0){{$}}
1414 ; GFX10WGP-NEXT: buffer_gl0_inv
1415 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
1416 ; GFX10CU-NOT: buffer_gl0_inv
1417 ; GFX8-NOT: buffer_wbinvl1_vol
1418 ; GFX10: .amdhsa_kernel workgroup_one_as_acquire_monotonic_ret
1419 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1420 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1421 ; GFX10-NOT: .amdhsa_memory_ordered 0
1422 define amdgpu_kernel void @workgroup_one_as_acquire_monotonic_ret(
1423 i32* %out, i32 %in, i32 %old) {
1424 entry:
1425 %gep = getelementptr i32, i32* %out, i32 4
1426 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire monotonic
1427 %val0 = extractvalue { i32, i1 } %val, 0
1428 store i32 %val0, i32* %out, align 4
1429 ret void
1430 }
1431
1432 ; GCN-LABEL: {{^}}workgroup_one_as_acq_rel_monotonic_ret:
1433 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
1434 ; GFX10WGP: s_waitcnt vmcnt(0){{$}}
1435 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1436 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
1437 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
1438 ; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
1439 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
1440 ; GFX10WGP-NEXT: s_waitcnt vmcnt(0){{$}}
1441 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
1442 ; GFX8-NOT: buffer_wbinvl1_vol
1443 ; GFX10WGP-NEXT: buffer_gl0_inv
1444 ; GFX10CU-NOT: buffer_gl0_inv
1445 ; GFX10: .amdhsa_kernel workgroup_one_as_acq_rel_monotonic_ret
1446 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1447 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1448 ; GFX10-NOT: .amdhsa_memory_ordered 0
1449 define amdgpu_kernel void @workgroup_one_as_acq_rel_monotonic_ret(
1450 i32* %out, i32 %in, i32 %old) {
1451 entry:
1452 %gep = getelementptr i32, i32* %out, i32 4
1453 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel monotonic
1454 %val0 = extractvalue { i32, i1 } %val, 0
1455 store i32 %val0, i32* %out, align 4
1456 ret void
1457 }
1458
1459 ; GCN-LABEL: {{^}}workgroup_one_as_seq_cst_monotonic_ret:
1460 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
1461 ; GFX10WGP: s_waitcnt vmcnt(0){{$}}
1462 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1463 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
1464 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
1465 ; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
1466 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
1467 ; GFX10WGP-NEXT: s_waitcnt vmcnt(0){{$}}
1468 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
1469 ; GFX8-NOT: buffer_wbinvl1_vol
1470 ; GFX10WGP-NEXT: buffer_gl0_inv
1471 ; GFX10CU-NOT: buffer_gl0_inv
1472 ; GFX10: .amdhsa_kernel workgroup_one_as_seq_cst_monotonic_ret
1473 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1474 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1475 ; GFX10-NOT: .amdhsa_memory_ordered 0
1476 define amdgpu_kernel void @workgroup_one_as_seq_cst_monotonic_ret(
1477 i32* %out, i32 %in, i32 %old) {
1478 entry:
1479 %gep = getelementptr i32, i32* %out, i32 4
1480 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst monotonic
1481 %val0 = extractvalue { i32, i1 } %val, 0
1482 store i32 %val0, i32* %out, align 4
1483 ret void
1484 }
1485
1486 ; GCN-LABEL: {{^}}workgroup_one_as_acquire_acquire_ret:
1487 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1488 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1489 ; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
1490 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
1491 ; GFX10WGP-NEXT: s_waitcnt vmcnt(0){{$}}
1492 ; GFX10WGP-NEXT: buffer_gl0_inv
1493 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
1494 ; GFX10CU-NOT: buffer_gl0_inv
1495 ; GFX8-NOT: buffer_wbinvl1_vol
1496 ; GFX10: .amdhsa_kernel workgroup_one_as_acquire_acquire_ret
1497 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1498 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1499 ; GFX10-NOT: .amdhsa_memory_ordered 0
1500 define amdgpu_kernel void @workgroup_one_as_acquire_acquire_ret(
1501 i32* %out, i32 %in, i32 %old) {
1502 entry:
1503 %gep = getelementptr i32, i32* %out, i32 4
1504 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire acquire
1505 %val0 = extractvalue { i32, i1 } %val, 0
1506 store i32 %val0, i32* %out, align 4
1507 ret void
1508 }
1509
1510 ; GCN-LABEL: {{^}}workgroup_one_as_release_acquire_ret:
1511 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
1512 ; GFX10WGP: s_waitcnt vmcnt(0){{$}}
1513 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1514 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
1515 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
1516 ; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
1517 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
1518 ; GFX10WGP-NEXT: s_waitcnt vmcnt(0){{$}}
1519 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
1520 ; GFX8-NOT: buffer_wbinvl1_vol
1521 ; GFX10WGP-NEXT: buffer_gl0_inv
1522 ; GFX10CU-NOT: buffer_gl0_inv
1523 ; GFX10: .amdhsa_kernel workgroup_one_as_release_acquire_ret
1524 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1525 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1526 ; GFX10-NOT: .amdhsa_memory_ordered 0
1527 define amdgpu_kernel void @workgroup_one_as_release_acquire_ret(
1528 i32* %out, i32 %in, i32 %old) {
1529 entry:
1530 %gep = getelementptr i32, i32* %out, i32 4
1531 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release acquire
1532 %val0 = extractvalue { i32, i1 } %val, 0
1533 store i32 %val0, i32* %out, align 4
1534 ret void
1535 }
1536
1537 ; GCN-LABEL: {{^}}workgroup_one_as_acq_rel_acquire_ret:
1538 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
1539 ; GFX10WGP: s_waitcnt vmcnt(0){{$}}
1540 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1541 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
1542 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
1543 ; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
1544 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
1545 ; GFX10WGP-NEXT: s_waitcnt vmcnt(0){{$}}
1546 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
1547 ; GFX8-NOT: buffer_wbinvl1_vol
1548 ; GFX10WGP: buffer_gl0_inv
1549 ; GFX10CU-NOT: buffer_gl0_inv
1550 ; GFX10: .amdhsa_kernel workgroup_one_as_acq_rel_acquire_ret
1551 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1552 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1553 ; GFX10-NOT: .amdhsa_memory_ordered 0
1554 define amdgpu_kernel void @workgroup_one_as_acq_rel_acquire_ret(
1555 i32* %out, i32 %in, i32 %old) {
1556 entry:
1557 %gep = getelementptr i32, i32* %out, i32 4
1558 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel acquire
1559 %val0 = extractvalue { i32, i1 } %val, 0
1560 store i32 %val0, i32* %out, align 4
1561 ret void
1562 }
1563
1564 ; GCN-LABEL: {{^}}workgroup_one_as_seq_cst_acquire_ret:
1565 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
1566 ; GFX10WGP: s_waitcnt vmcnt(0){{$}}
1567 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1568 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
1569 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
1570 ; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
1571 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
1572 ; GFX10WGP-NEXT: s_waitcnt vmcnt(0){{$}}
1573 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
1574 ; GFX8-NOT: buffer_wbinvl1_vol
1575 ; GFX10WGP-NEXT: buffer_gl0_inv
1576 ; GFX10CU-NOT: buffer_gl0_inv
1577 ; GFX10: .amdhsa_kernel workgroup_one_as_seq_cst_acquire_ret
1578 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1579 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1580 ; GFX10-NOT: .amdhsa_memory_ordered 0
1581 define amdgpu_kernel void @workgroup_one_as_seq_cst_acquire_ret(
1582 i32* %out, i32 %in, i32 %old) {
1583 entry:
1584 %gep = getelementptr i32, i32* %out, i32 4
1585 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst acquire
1586 %val0 = extractvalue { i32, i1 } %val, 0
1587 store i32 %val0, i32* %out, align 4
1588 ret void
1589 }
1590
1591 ; GCN-LABEL: {{^}}workgroup_one_as_seq_cst_seq_cst_ret:
1592 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
1593 ; GFX10WGP: s_waitcnt vmcnt(0){{$}}
1594 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1595 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
1596 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
1597 ; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
1598 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
1599 ; GFX10WGP-NEXT: s_waitcnt vmcnt(0){{$}}
1600 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
1601 ; GFX8-NOT: buffer_wbinvl1_vol
1602 ; GFX10WGP: buffer_gl0_inv
1603 ; GFX10CU-NOT: buffer_gl0_inv
1604 ; GFX10: .amdhsa_kernel workgroup_one_as_seq_cst_seq_cst_ret
1605 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1606 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1607 ; GFX10-NOT: .amdhsa_memory_ordered 0
1608 define amdgpu_kernel void @workgroup_one_as_seq_cst_seq_cst_ret(
1609 i32* %out, i32 %in, i32 %old) {
1610 entry:
1611 %gep = getelementptr i32, i32* %out, i32 4
1612 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst seq_cst
1613 %val0 = extractvalue { i32, i1 } %val, 0
1614 store i32 %val0, i32* %out, align 4
1615 ret void
1616 }
1617
1618 ; GCN-LABEL: {{^}}system_monotonic_monotonic:
1619 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1620 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1621 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
1622 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1623 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1624 ; GFX8-NOT: buffer_wbinvl1_vol
1625 ; GFX10-NOT: buffer_gl{{[01]}}_inv
1626 ; GFX10: .amdhsa_kernel system_monotonic_monotonic
1627 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1628 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1629 ; GFX10-NOT: .amdhsa_memory_ordered 0
1630 define amdgpu_kernel void @system_monotonic_monotonic(
1631 i32* %out, i32 %in, i32 %old) {
1632 entry:
1633 %gep = getelementptr i32, i32* %out, i32 4
1634 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in monotonic monotonic
1635 ret void
1636 }
1637
1638 ; GCN-LABEL: {{^}}system_acquire_monotonic:
1639 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1640 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1641 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
1642 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1643 ; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
1644 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1645 ; GFX8-NEXT: buffer_wbinvl1_vol
1646 ; GFX10-NEXT: buffer_gl0_inv
1647 ; GFX10-NEXT: buffer_gl1_inv
1648 ; GFX10: .amdhsa_kernel system_acquire_monotonic
1649 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1650 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1651 ; GFX10-NOT: .amdhsa_memory_ordered 0
1652 define amdgpu_kernel void @system_acquire_monotonic(
1653 i32* %out, i32 %in, i32 %old) {
1654 entry:
1655 %gep = getelementptr i32, i32* %out, i32 4
1656 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acquire monotonic
1657 ret void
1658 }
1659
1660 ; GCN-LABEL: {{^}}system_release_monotonic:
1661 ; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1662 ; GFX10: s_waitcnt lgkmcnt(0){{$}}
1663 ; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
1664 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
1665 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1666 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1667 ; GFX8-NOT: buffer_wbinvl1_vol
1668 ; GFX10-NOT: buffer_gl._inv
1669 ; GFX10: .amdhsa_kernel system_release_monotonic
1670 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1671 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1672 ; GFX10-NOT: .amdhsa_memory_ordered 0
1673 define amdgpu_kernel void @system_release_monotonic(
1674 i32* %out, i32 %in, i32 %old) {
1675 entry:
1676 %gep = getelementptr i32, i32* %out, i32 4
1677 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in release monotonic
1678 ret void
1679 }
1680
1681 ; GCN-LABEL: {{^}}system_acq_rel_monotonic:
1682 ; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1683 ; GFX10: s_waitcnt lgkmcnt(0){{$}}
1684 ; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
1685 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
1686 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1687 ; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
1688 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1689 ; GFX8-NEXT: buffer_wbinvl1_vol
1690 ; GFX10-NEXT: buffer_gl0_inv
1691 ; GFX10-NEXT: buffer_gl1_inv
1692 ; GFX10: .amdhsa_kernel system_acq_rel_monotonic
1693 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1694 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1695 ; GFX10-NOT: .amdhsa_memory_ordered 0
1696 define amdgpu_kernel void @system_acq_rel_monotonic(
1697 i32* %out, i32 %in, i32 %old) {
1698 entry:
1699 %gep = getelementptr i32, i32* %out, i32 4
1700 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acq_rel monotonic
1701 ret void
1702 }
1703
1704 ; GCN-LABEL: {{^}}system_seq_cst_monotonic:
1705 ; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1706 ; GFX10: s_waitcnt lgkmcnt(0){{$}}
1707 ; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
1708 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
1709 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1710 ; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
1711 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1712 ; GFX8-NEXT: buffer_wbinvl1_vol
1713 ; GFX10-NEXT: buffer_gl0_inv
1714 ; GFX10-NEXT: buffer_gl1_inv
1715 ; GFX10: .amdhsa_kernel system_seq_cst_monotonic
1716 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1717 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1718 ; GFX10-NOT: .amdhsa_memory_ordered 0
1719 define amdgpu_kernel void @system_seq_cst_monotonic(
1720 i32* %out, i32 %in, i32 %old) {
1721 entry:
1722 %gep = getelementptr i32, i32* %out, i32 4
1723 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst monotonic
1724 ret void
1725 }
1726
1727 ; GCN-LABEL: {{^}}system_acquire_acquire:
1728 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1729 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1730 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
1731 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1732 ; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
1733 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1734 ; GFX8-NEXT: buffer_wbinvl1_vol
1735 ; GFX10-NEXT: buffer_gl0_inv
1736 ; GFX10-NEXT: buffer_gl1_inv
1737 ; GFX10: .amdhsa_kernel system_acquire_acquire
1738 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1739 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1740 ; GFX10-NOT: .amdhsa_memory_ordered 0
1741 define amdgpu_kernel void @system_acquire_acquire(
1742 i32* %out, i32 %in, i32 %old) {
1743 entry:
1744 %gep = getelementptr i32, i32* %out, i32 4
1745 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acquire acquire
1746 ret void
1747 }
1748
1749 ; GCN-LABEL: {{^}}system_release_acquire:
1750 ; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1751 ; GFX10: s_waitcnt lgkmcnt(0){{$}}
1752 ; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
1753 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
1754 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1755 ; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
1756 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1757 ; GFX8-NEXT: buffer_wbinvl1_vol
1758 ; GFX10-NEXT: buffer_gl0_inv
1759 ; GFX10-NEXT: buffer_gl1_inv
1760 ; GFX10: .amdhsa_kernel system_release_acquire
1761 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1762 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1763 ; GFX10-NOT: .amdhsa_memory_ordered 0
1764 define amdgpu_kernel void @system_release_acquire(
1765 i32* %out, i32 %in, i32 %old) {
1766 entry:
1767 %gep = getelementptr i32, i32* %out, i32 4
1768 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in release acquire
1769 ret void
1770 }
1771
1772 ; GCN-LABEL: {{^}}system_acq_rel_acquire:
1773 ; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1774 ; GFX10: s_waitcnt lgkmcnt(0){{$}}
1775 ; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
1776 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
1777 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1778 ; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
1779 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1780 ; GFX8-NEXT: buffer_wbinvl1_vol
1781 ; GFX10-NEXT: buffer_gl0_inv
1782 ; GFX10-NEXT: buffer_gl1_inv
1783 ; GFX10: .amdhsa_kernel system_acq_rel_acquire
1784 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1785 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1786 ; GFX10-NOT: .amdhsa_memory_ordered 0
1787 define amdgpu_kernel void @system_acq_rel_acquire(
1788 i32* %out, i32 %in, i32 %old) {
1789 entry:
1790 %gep = getelementptr i32, i32* %out, i32 4
1791 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acq_rel acquire
1792 ret void
1793 }
1794
1795 ; GCN-LABEL: {{^}}system_seq_cst_acquire:
1796 ; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1797 ; GFX10: s_waitcnt lgkmcnt(0){{$}}
1798 ; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
1799 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
1800 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1801 ; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
1802 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1803 ; GFX8-NEXT: buffer_wbinvl1_vol
1804 ; GFX10-NEXT: buffer_gl0_inv
1805 ; GFX10-NEXT: buffer_gl1_inv
1806 ; GFX10: .amdhsa_kernel system_seq_cst_acquire
1807 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1808 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1809 ; GFX10-NOT: .amdhsa_memory_ordered 0
1810 define amdgpu_kernel void @system_seq_cst_acquire(
1811 i32* %out, i32 %in, i32 %old) {
1812 entry:
1813 %gep = getelementptr i32, i32* %out, i32 4
1814 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst acquire
1815 ret void
1816 }
1817
1818 ; GCN-LABEL: {{^}}system_seq_cst_seq_cst:
1819 ; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1820 ; GFX10: s_waitcnt lgkmcnt(0){{$}}
1821 ; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
1822 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
1823 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
1824 ; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
1825 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
1826 ; GFX8-NEXT: buffer_wbinvl1_vol
1827 ; GFX10-NEXT: buffer_gl0_inv
1828 ; GFX10-NEXT: buffer_gl1_inv
1829 ; GFX10: .amdhsa_kernel system_seq_cst_seq_cst
1830 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1831 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1832 ; GFX10-NOT: .amdhsa_memory_ordered 0
1833 define amdgpu_kernel void @system_seq_cst_seq_cst(
1834 i32* %out, i32 %in, i32 %old) {
1835 entry:
1836 %gep = getelementptr i32, i32* %out, i32 4
1837 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst
1838 ret void
1839 }
1840
1841 ; GCN-LABEL: {{^}}singlethread_monotonic_monotonic:
1842 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1843 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1844 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
1845 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1846 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1847 ; GFX8-NOT: buffer_wbinvl1_vol
1848 ; GFX10-NOT: buffer_gl{{[01]}}_inv
1849 ; GFX10: .amdhsa_kernel singlethread_monotonic_monotonic
1850 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1851 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1852 ; GFX10-NOT: .amdhsa_memory_ordered 0
1853 define amdgpu_kernel void @singlethread_monotonic_monotonic(
1854 i32* %out, i32 %in, i32 %old) {
1855 entry:
1856 %gep = getelementptr i32, i32* %out, i32 4
1857 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") monotonic monotonic
1858 ret void
1859 }
1860
1861 ; GCN-LABEL: {{^}}singlethread_acquire_monotonic:
1862 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1863 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1864 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
1865 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1866 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1867 ; GFX8-NOT: buffer_wbinvl1_vol
1868 ; GFX10-NOT: buffer_gl{{[01]}}_inv
1869 ; GFX10: .amdhsa_kernel singlethread_acquire_monotonic
1870 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1871 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1872 ; GFX10-NOT: .amdhsa_memory_ordered 0
1873 define amdgpu_kernel void @singlethread_acquire_monotonic(
1874 i32* %out, i32 %in, i32 %old) {
1875 entry:
1876 %gep = getelementptr i32, i32* %out, i32 4
1877 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acquire monotonic
1878 ret void
1879 }
1880
1881 ; GCN-LABEL: {{^}}singlethread_release_monotonic:
1882 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1883 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1884 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
1885 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1886 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1887 ; Gfx8-NOT: buffer_wbinvl1_vol
1888 ; GCN-NOT: buffer_gl{{[01]}}_inv
1889 ; GFX10: .amdhsa_kernel singlethread_release_monotonic
1890 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1891 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1892 ; GFX10-NOT: .amdhsa_memory_ordered 0
1893 define amdgpu_kernel void @singlethread_release_monotonic(
1894 i32* %out, i32 %in, i32 %old) {
1895 entry:
1896 %gep = getelementptr i32, i32* %out, i32 4
1897 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") release monotonic
1898 ret void
1899 }
1900
1901 ; GCN-LABEL: {{^}}singlethread_acq_rel_monotonic:
1902 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1903 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1904 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
1905 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1906 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1907 ; GFX8-NOT: buffer_wbinvl1_vol
1908 ; GFX10-NOT: buffer_gl{{[01]}}._inv
1909 ; GFX10: .amdhsa_kernel singlethread_acq_rel_monotonic
1910 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1911 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1912 ; GFX10-NOT: .amdhsa_memory_ordered 0
1913 define amdgpu_kernel void @singlethread_acq_rel_monotonic(
1914 i32* %out, i32 %in, i32 %old) {
1915 entry:
1916 %gep = getelementptr i32, i32* %out, i32 4
1917 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel monotonic
1918 ret void
1919 }
1920
1921 ; GCN-LABEL: {{^}}singlethread_seq_cst_monotonic:
1922 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1923 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1924 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
1925 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1926 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1927 ; GFX8-NOT: buffer_wbinvl1_vol
1928 ; GFX10-NOT: buffer_gl{{[01]}}._inv
1929 ; GFX10: .amdhsa_kernel singlethread_seq_cst_monotonic
1930 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1931 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1932 ; GFX10-NOT: .amdhsa_memory_ordered 0
1933 define amdgpu_kernel void @singlethread_seq_cst_monotonic(
1934 i32* %out, i32 %in, i32 %old) {
1935 entry:
1936 %gep = getelementptr i32, i32* %out, i32 4
1937 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst monotonic
1938 ret void
1939 }
1940
1941 ; GCN-LABEL: {{^}}singlethread_acquire_acquire:
1942 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1943 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1944 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
1945 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1946 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1947 ; GFX8-NOT: buffer_wbinvl1_vol
1948 ; GFX10-NOT: buffer_gl{{[01]}}._inv
1949 ; GFX10: .amdhsa_kernel singlethread_acquire_acquire
1950 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1951 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1952 ; GFX10-NOT: .amdhsa_memory_ordered 0
1953 define amdgpu_kernel void @singlethread_acquire_acquire(
1954 i32* %out, i32 %in, i32 %old) {
1955 entry:
1956 %gep = getelementptr i32, i32* %out, i32 4
1957 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acquire acquire
1958 ret void
1959 }
1960
1961 ; GCN-LABEL: {{^}}singlethread_release_acquire:
1962 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1963 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1964 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
1965 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1966 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1967 ; GFX8-NOT: buffer_wbinvl1_vol
1968 ; GFX10-NOT: buffer_gl{{[01]}}._inv
1969 ; GFX10: .amdhsa_kernel singlethread_release_acquire
1970 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1971 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1972 ; GFX10-NOT: .amdhsa_memory_ordered 0
1973 define amdgpu_kernel void @singlethread_release_acquire(
1974 i32* %out, i32 %in, i32 %old) {
1975 entry:
1976 %gep = getelementptr i32, i32* %out, i32 4
1977 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") release acquire
1978 ret void
1979 }
1980
1981 ; GCN-LABEL: {{^}}singlethread_acq_rel_acquire:
1982 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1983 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1984 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
1985 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
1986 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
1987 ; GFX8-NOT: buffer_wbinvl1_vol
1988 ; GFX10-NOT: buffer_gl{{[01]}}._inv
1989 ; GFX10: .amdhsa_kernel singlethread_acq_rel_acquire
1990 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
1991 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
1992 ; GFX10-NOT: .amdhsa_memory_ordered 0
1993 define amdgpu_kernel void @singlethread_acq_rel_acquire(
1994 i32* %out, i32 %in, i32 %old) {
1995 entry:
1996 %gep = getelementptr i32, i32* %out, i32 4
1997 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel acquire
1998 ret void
1999 }
2000
2001 ; GCN-LABEL: {{^}}singlethread_seq_cst_acquire:
2002 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2003 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2004 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2005 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2006 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2007 ; GFX8-NOT: buffer_wbinvl1_vol
2008 ; GFX10-NOT: buffer_gl{{[01]}}._inv
2009 ; GFX10: .amdhsa_kernel singlethread_seq_cst_acquire
2010 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2011 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2012 ; GFX10-NOT: .amdhsa_memory_ordered 0
2013 define amdgpu_kernel void @singlethread_seq_cst_acquire(
2014 i32* %out, i32 %in, i32 %old) {
2015 entry:
2016 %gep = getelementptr i32, i32* %out, i32 4
2017 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst acquire
2018 ret void
2019 }
2020
2021 ; GCN-LABEL: {{^}}singlethread_seq_cst_seq_cst:
2022 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2023 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2024 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2025 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2026 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2027 ; GFX8-NOT: buffer_wbinvl1_vol
2028 ; GFX10-NOT: buffer_gl{{[01]}}._inv
2029 ; GFX10: .amdhsa_kernel singlethread_seq_cst_seq_cst
2030 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2031 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2032 ; GFX10-NOT: .amdhsa_memory_ordered 0
2033 define amdgpu_kernel void @singlethread_seq_cst_seq_cst(
2034 i32* %out, i32 %in, i32 %old) {
2035 entry:
2036 %gep = getelementptr i32, i32* %out, i32 4
2037 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst seq_cst
2038 ret void
2039 }
2040
2041 ; GCN-LABEL: {{^}}agent_monotonic_monotonic:
2042 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2043 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2044 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2045 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2046 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2047 ; GFX8-NOT: buffer_wbinvl1_vol
2048 ; GFX10-NOT: buffer_gl{{[01]}}._inv
2049 ; GFX10: .amdhsa_kernel agent_monotonic_monotonic
2050 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2051 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2052 ; GFX10-NOT: .amdhsa_memory_ordered 0
2053 define amdgpu_kernel void @agent_monotonic_monotonic(
2054 i32* %out, i32 %in, i32 %old) {
2055 entry:
2056 %gep = getelementptr i32, i32* %out, i32 4
2057 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") monotonic monotonic
2058 ret void
2059 }
2060
2061 ; GCN-LABEL: {{^}}agent_acquire_monotonic:
2062 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2063 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2064 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2065 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2066 ; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
2067 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
2068 ; GFX8-NEXT: buffer_wbinvl1_vol
2069 ; GFX10-NEXT: buffer_gl0_inv
2070 ; GFX10-NEXT: buffer_gl1_inv
2071 ; GFX10: .amdhsa_kernel agent_acquire_monotonic
2072 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2073 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2074 ; GFX10-NOT: .amdhsa_memory_ordered 0
2075 define amdgpu_kernel void @agent_acquire_monotonic(
2076 i32* %out, i32 %in, i32 %old) {
2077 entry:
2078 %gep = getelementptr i32, i32* %out, i32 4
2079 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acquire monotonic
2080 ret void
2081 }
2082
2083 ; GCN-LABEL: {{^}}agent_release_monotonic:
2084 ; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2085 ; GFX10: s_waitcnt lgkmcnt(0){{$}}
2086 ; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
2087 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2088 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2089 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2090 ; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
2091 ; GFX10: .amdhsa_kernel agent_release_monotonic
2092 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2093 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2094 ; GFX10-NOT: .amdhsa_memory_ordered 0
2095 define amdgpu_kernel void @agent_release_monotonic(
2096 i32* %out, i32 %in, i32 %old) {
2097 entry:
2098 %gep = getelementptr i32, i32* %out, i32 4
2099 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") release monotonic
2100 ret void
2101 }
2102
2103 ; GCN-LABEL: {{^}}agent_acq_rel_monotonic:
2104 ; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2105 ; GFX10: s_waitcnt lgkmcnt(0){{$}}
2106 ; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
2107 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2108 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2109 ; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
2110 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
2111 ; GFX8-NEXT: buffer_wbinvl1_vol
2112 ; GFX10-NEXT: buffer_gl0_inv
2113 ; GFX10-NEXT: buffer_gl1_inv
2114 ; GFX10: .amdhsa_kernel agent_acq_rel_monotonic
2115 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2116 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2117 ; GFX10-NOT: .amdhsa_memory_ordered 0
2118 define amdgpu_kernel void @agent_acq_rel_monotonic(
2119 i32* %out, i32 %in, i32 %old) {
2120 entry:
2121 %gep = getelementptr i32, i32* %out, i32 4
2122 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acq_rel monotonic
2123 ret void
2124 }
2125
2126 ; GCN-LABEL: {{^}}agent_seq_cst_monotonic:
2127 ; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2128 ; GFX10: s_waitcnt lgkmcnt(0){{$}}
2129 ; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
2130 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2131 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2132 ; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
2133 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
2134 ; GFX8-NEXT: buffer_wbinvl1_vol
2135 ; GFX10-NEXT: buffer_gl0_inv
2136 ; GFX10-NEXT: buffer_gl1_inv
2137 ; GFX10: .amdhsa_kernel agent_seq_cst_monotonic
2138 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2139 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2140 ; GFX10-NOT: .amdhsa_memory_ordered 0
2141 define amdgpu_kernel void @agent_seq_cst_monotonic(
2142 i32* %out, i32 %in, i32 %old) {
2143 entry:
2144 %gep = getelementptr i32, i32* %out, i32 4
2145 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") seq_cst monotonic
2146 ret void
2147 }
2148
2149 ; GCN-LABEL: {{^}}agent_acquire_acquire:
2150 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2151 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2152 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2153 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2154 ; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
2155 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
2156 ; GFX8-NEXT: buffer_wbinvl1_vol
2157 ; GFX10-NEXT: buffer_gl0_inv
2158 ; GFX10-NEXT: buffer_gl1_inv
2159 ; GFX10: .amdhsa_kernel agent_acquire_acquire
2160 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2161 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2162 ; GFX10-NOT: .amdhsa_memory_ordered 0
2163 define amdgpu_kernel void @agent_acquire_acquire(
2164 i32* %out, i32 %in, i32 %old) {
2165 entry:
2166 %gep = getelementptr i32, i32* %out, i32 4
2167 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acquire acquire
2168 ret void
2169 }
2170
2171 ; GCN-LABEL: {{^}}agent_release_acquire:
2172 ; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2173 ; GFX10: s_waitcnt lgkmcnt(0){{$}}
2174 ; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
2175 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2176 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2177 ; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
2178 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
2179 ; GFX8-NEXT: buffer_wbinvl1_vol
2180 ; GFX10-NEXT: buffer_gl0_inv
2181 ; GFX10-NEXT: buffer_gl1_inv
2182 ; GFX10: .amdhsa_kernel agent_release_acquire
2183 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2184 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2185 ; GFX10-NOT: .amdhsa_memory_ordered 0
2186 define amdgpu_kernel void @agent_release_acquire(
2187 i32* %out, i32 %in, i32 %old) {
2188 entry:
2189 %gep = getelementptr i32, i32* %out, i32 4
2190 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") release acquire
2191 ret void
2192 }
2193
2194 ; GCN-LABEL: {{^}}agent_acq_rel_acquire:
2195 ; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2196 ; GFX10: s_waitcnt lgkmcnt(0){{$}}
2197 ; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
2198 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2199 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2200 ; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
2201 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
2202 ; GFX8-NEXT: buffer_wbinvl1_vol
2203 ; GFX10-NEXT: buffer_gl0_inv
2204 ; GFX10-NEXT: buffer_gl1_inv
2205 ; GFX10: .amdhsa_kernel agent_acq_rel_acquire
2206 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2207 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2208 ; GFX10-NOT: .amdhsa_memory_ordered 0
2209 define amdgpu_kernel void @agent_acq_rel_acquire(
2210 i32* %out, i32 %in, i32 %old) {
2211 entry:
2212 %gep = getelementptr i32, i32* %out, i32 4
2213 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acq_rel acquire
2214 ret void
2215 }
2216
2217 ; GCN-LABEL: {{^}}agent_seq_cst_acquire:
2218 ; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2219 ; GFX10: s_waitcnt lgkmcnt(0){{$}}
2220 ; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
2221 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2222 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2223 ; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
2224 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
2225 ; GFX8-NEXT: buffer_wbinvl1_vol
2226 ; GFX10-NEXT: buffer_gl0_inv
2227 ; GFX10-NEXT: buffer_gl1_inv
2228 ; GFX10: .amdhsa_kernel agent_seq_cst_acquire
2229 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2230 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2231 ; GFX10-NOT: .amdhsa_memory_ordered 0
2232 define amdgpu_kernel void @agent_seq_cst_acquire(
2233 i32* %out, i32 %in, i32 %old) {
2234 entry:
2235 %gep = getelementptr i32, i32* %out, i32 4
2236 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") seq_cst acquire
2237 ret void
2238 }
2239
2240 ; GCN-LABEL: {{^}}agent_seq_cst_seq_cst:
2241 ; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2242 ; GFX10: s_waitcnt lgkmcnt(0){{$}}
2243 ; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
2244 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2245 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2246 ; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
2247 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
2248 ; GFX8-NEXT: buffer_wbinvl1_vol
2249 ; GFX10-NEXT: buffer_gl0_inv
2250 ; GFX10-NEXT: buffer_gl1_inv
2251 ; GFX10: .amdhsa_kernel agent_seq_cst_seq_cst
2252 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2253 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2254 ; GFX10-NOT: .amdhsa_memory_ordered 0
2255 define amdgpu_kernel void @agent_seq_cst_seq_cst(
2256 i32* %out, i32 %in, i32 %old) {
2257 entry:
2258 %gep = getelementptr i32, i32* %out, i32 4
2259 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
7802260 ret void
7812261 }
7822262
7832263 ; GCN-LABEL: {{^}}workgroup_monotonic_monotonic:
784 ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
785 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
786 ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
787 ; GCN-NOT: buffer_wbinvl1_vol
2264 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2265 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2266 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2267 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2268 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2269 ; GFX8-NOT: buffer_wbinvl1_vol
2270 ; GFX10-NOT: buffer_gl{{[01]}}._inv
2271 ; GFX10: .amdhsa_kernel workgroup_monotonic_monotonic
2272 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2273 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2274 ; GFX10-NOT: .amdhsa_memory_ordered 0
7882275 define amdgpu_kernel void @workgroup_monotonic_monotonic(
7892276 i32* %out, i32 %in, i32 %old) {
7902277 entry:
7932280 ret void
7942281 }
7952282
796 ; GCN-LABEL: {{^}}workgroup_acquire_monotonic:
797 ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
798 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
799 ; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
800 ; GFX8-NOT: buffer_wbinvl1_vol
2283 ; GCN-LABEL: {{^}}workgroup_acquire_monotonic:
2284 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2285 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2286 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2287 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
2288 ; GFX10WGP-NEXT: s_waitcnt lgkmcnt(0){{$}}
2289 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
2290 ; GFX10WGP-NEXT: buffer_gl0_inv
2291 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
2292 ; GFX10CU-NOT: buffer_gl0_inv
2293 ; GFX8-NOT: buffer_wbinvl1_vol
2294 ; GFX10: .amdhsa_kernel workgroup_acquire_monotonic
2295 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2296 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2297 ; GFX10-NOT: .amdhsa_memory_ordered 0
8012298 define amdgpu_kernel void @workgroup_acquire_monotonic(
8022299 i32* %out, i32 %in, i32 %old) {
8032300 entry:
8062303 ret void
8072304 }
8082305
809 ; GCN-LABEL: {{^}}workgroup_release_monotonic:
810 ; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
811 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
812 ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
813 ; GCN-NOT: buffer_wbinvl1_vol
2306 ; GCN-LABEL: {{^}}workgroup_release_monotonic:
2307 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
2308 ; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2309 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
2310 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
2311 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
2312 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2313 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2314 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2315 ; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
2316 ; GFX10: .amdhsa_kernel workgroup_release_monotonic
2317 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2318 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2319 ; GFX10-NOT: .amdhsa_memory_ordered 0
8142320 define amdgpu_kernel void @workgroup_release_monotonic(
8152321 i32* %out, i32 %in, i32 %old) {
8162322 entry:
8192325 ret void
8202326 }
8212327
822 ; GCN-LABEL: {{^}}workgroup_acq_rel_monotonic:
823 ; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
824 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
825 ; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
826 ; GFX8-NOT: buffer_wbinvl1_vol
2328 ; GCN-LABEL: {{^}}workgroup_acq_rel_monotonic:
2329 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
2330 ; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2331 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
2332 ; GFX10CU-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2333 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
2334 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2335 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
2336 ; GFX10WGP-NEXT: s_waitcnt lgkmcnt(0){{$}}
2337 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
2338 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
2339 ; GFX8-NOT: buffer_wbinvl1_vol
2340 ; GFX10WGP-NEXT: buffer_gl0_inv
2341 ; GFX10CU-NOT: buffer_gl0_inv
2342 ; GFX10: .amdhsa_kernel workgroup_acq_rel_monotonic
2343 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2344 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2345 ; GFX10-NOT: .amdhsa_memory_ordered 0
8272346 define amdgpu_kernel void @workgroup_acq_rel_monotonic(
8282347 i32* %out, i32 %in, i32 %old) {
8292348 entry:
8322351 ret void
8332352 }
8342353
835 ; GCN-LABEL: {{^}}workgroup_seq_cst_monotonic:
836 ; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
837 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
838 ; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
839 ; GFX8-NOT: buffer_wbinvl1_vol
2354 ; GCN-LABEL: {{^}}workgroup_seq_cst_monotonic:
2355 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
2356 ; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2357 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
2358 ; GFX10CU-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2359 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
2360 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2361 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
2362 ; GFX10WGP-NEXT: s_waitcnt lgkmcnt(0){{$}}
2363 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
2364 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
2365 ; GFX8-NOT: buffer_wbinvl1_vol
2366 ; GFX10WGP-NEXT: buffer_gl0_inv
2367 ; GFX10CU-NOT: buffer_gl0_inv
2368 ; GFX10: .amdhsa_kernel workgroup_seq_cst_monotonic
2369 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2370 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2371 ; GFX10-NOT: .amdhsa_memory_ordered 0
8402372 define amdgpu_kernel void @workgroup_seq_cst_monotonic(
8412373 i32* %out, i32 %in, i32 %old) {
8422374 entry:
8452377 ret void
8462378 }
8472379
848 ; GCN-LABEL: {{^}}workgroup_acquire_acquire:
849 ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
850 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
851 ; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
852 ; GFX8-NOT: buffer_wbinvl1_vol
2380 ; GCN-LABEL: {{^}}workgroup_acquire_acquire:
2381 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2382 ; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2383 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2384 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
2385 ; GFX10WGP-NEXT: s_waitcnt lgkmcnt(0){{$}}
2386 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
2387 ; GFX10WGP-NEXT: buffer_gl0_inv
2388 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
2389 ; GFX10CU-NOT: buffer_gl0_inv
2390 ; GFX8-NOT: buffer_wbinvl1_vol
2391 ; GFX10: .amdhsa_kernel workgroup_acquire_acquire
2392 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2393 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2394 ; GFX10-NOT: .amdhsa_memory_ordered 0
8532395 define amdgpu_kernel void @workgroup_acquire_acquire(
8542396 i32* %out, i32 %in, i32 %old) {
8552397 entry:
8582400 ret void
8592401 }
8602402
861 ; GCN-LABEL: {{^}}workgroup_release_acquire:
862 ; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
863 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
864 ; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
865 ; GFX8-NOT: buffer_wbinvl1_vol
2403 ; GCN-LABEL: {{^}}workgroup_release_acquire:
2404 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
2405 ; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2406 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
2407 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
2408 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
2409 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2410 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
2411 ; GFX10WGP-NEXT: s_waitcnt lgkmcnt(0){{$}}
2412 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
2413 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
2414 ; GFX8-NOT: buffer_wbinvl1_vol
2415 ; GFX10WGP-NEXT: buffer_gl0_inv
2416 ; GFX10CU-NOT: buffer_gl0_inv
2417 ; GFX10: .amdhsa_kernel workgroup_release_acquire
2418 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2419 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2420 ; GFX10-NOT: .amdhsa_memory_ordered 0
8662421 define amdgpu_kernel void @workgroup_release_acquire(
8672422 i32* %out, i32 %in, i32 %old) {
8682423 entry:
8712426 ret void
8722427 }
8732428
874 ; GCN-LABEL: {{^}}workgroup_acq_rel_acquire:
875 ; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
876 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
877 ; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
878 ; GFX8-NOT: buffer_wbinvl1_vol
2429 ; GCN-LABEL: {{^}}workgroup_acq_rel_acquire:
2430 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
2431 ; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2432 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
2433 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
2434 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
2435 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2436 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
2437 ; GFX10WGP-NEXT: s_waitcnt lgkmcnt(0){{$}}
2438 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
2439 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
2440 ; GFX8-NOT: buffer_wbinvl1_vol
2441 ; GFX10WGP: buffer_gl0_inv
2442 ; GFX10CU-NOT: buffer_gl0_inv
2443 ; GFX10: .amdhsa_kernel workgroup_acq_rel_acquire
2444 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2445 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2446 ; GFX10-NOT: .amdhsa_memory_ordered 0
8792447 define amdgpu_kernel void @workgroup_acq_rel_acquire(
8802448 i32* %out, i32 %in, i32 %old) {
8812449 entry:
8842452 ret void
8852453 }
8862454
887 ; GCN-LABEL: {{^}}workgroup_seq_cst_acquire:
888 ; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
889 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
890 ; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
891 ; GFX8-NOT: buffer_wbinvl1_vol
2455 ; GCN-LABEL: {{^}}workgroup_seq_cst_acquire:
2456 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
2457 ; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2458 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
2459 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
2460 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
2461 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2462 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
2463 ; GFX10WGP-NEXT: s_waitcnt lgkmcnt(0){{$}}
2464 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
2465 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
2466 ; GFX8-NOT: buffer_wbinvl1_vol
2467 ; GFX10WGP-NEXT: buffer_gl0_inv
2468 ; GFX10CU-NOT: buffer_gl0_inv
2469 ; GFX10: .amdhsa_kernel workgroup_seq_cst_acquire
2470 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2471 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2472 ; GFX10-NOT: .amdhsa_memory_ordered 0
8922473 define amdgpu_kernel void @workgroup_seq_cst_acquire(
8932474 i32* %out, i32 %in, i32 %old) {
8942475 entry:
8972478 ret void
8982479 }
8992480
900 ; GCN-LABEL: {{^}}workgroup_seq_cst_seq_cst:
901 ; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
902 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
903 ; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
904 ; GFX8-NOT: buffer_wbinvl1_vol
2481 ; GCN-LABEL: {{^}}workgroup_seq_cst_seq_cst:
2482 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
2483 ; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
2484 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
2485 ; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
2486 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
2487 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2488 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
2489 ; GFX10WGP-NEXT: s_waitcnt lgkmcnt(0){{$}}
2490 ; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
2491 ; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
2492 ; GFX8-NOT: buffer_wbinvl1_vol
2493 ; GFX10WGP: buffer_gl0_inv
2494 ; GFX10CU-NOT: buffer_gl0_inv
2495 ; GFX10: .amdhsa_kernel workgroup_seq_cst_seq_cst
2496 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2497 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2498 ; GFX10-NOT: .amdhsa_memory_ordered 0
9052499 define amdgpu_kernel void @workgroup_seq_cst_seq_cst(
9062500 i32* %out, i32 %in, i32 %old) {
9072501 entry:
9102504 ret void
9112505 }
9122506
913 ; GCN-LABEL: {{^}}workgroup_one_as_monotonic_monotonic:
914 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
915 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
916 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
917 ; GCN-NOT: buffer_wbinvl1_vol
918 define amdgpu_kernel void @workgroup_one_as_monotonic_monotonic(
919 i32* %out, i32 %in, i32 %old) {
920 entry:
921 %gep = getelementptr i32, i32* %out, i32 4
922 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") monotonic monotonic
923 ret void
924 }
925
926 ; GCN-LABEL: {{^}}workgroup_one_as_acquire_monotonic:
2507 ; GCN-LABEL: {{^}}wavefront_monotonic_monotonic:
2508 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2509 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2510 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2511 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2512 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2513 ; GFX8-NOT: buffer_wbinvl1_vol
2514 ; GFX10-NOT: buffer_gl{{[01]}}._inv
2515 ; GFX10: .amdhsa_kernel wavefront_monotonic_monotonic
2516 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2517 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2518 ; GFX10-NOT: .amdhsa_memory_ordered 0
2519 define amdgpu_kernel void @wavefront_monotonic_monotonic(
2520 i32* %out, i32 %in, i32 %old) {
2521 entry:
2522 %gep = getelementptr i32, i32* %out, i32 4
2523 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") monotonic monotonic
2524 ret void
2525 }
2526
2527 ; GCN-LABEL: {{^}}wavefront_acquire_monotonic:
2528 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2529 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2530 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2531 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2532 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2533 ; GFX8-NOT: buffer_wbinvl1_vol
2534 ; GFX10-NOT: buffer_gl{{[01]}}._inv
2535 ; GFX10: .amdhsa_kernel wavefront_acquire_monotonic
2536 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2537 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2538 ; GFX10-NOT: .amdhsa_memory_ordered 0
2539 define amdgpu_kernel void @wavefront_acquire_monotonic(
2540 i32* %out, i32 %in, i32 %old) {
2541 entry:
2542 %gep = getelementptr i32, i32* %out, i32 4
2543 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") acquire monotonic
2544 ret void
2545 }
2546
2547 ; GCN-LABEL: {{^}}wavefront_release_monotonic:
2548 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2549 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2550 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2551 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2552 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2553 ; GFX8-NOT: buffer_wbinvl1_vol
2554 ; GFX10-NOT: buffer_gl{{[01]}}._inv
2555 ; GFX10: .amdhsa_kernel wavefront_release_monotonic
2556 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2557 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2558 ; GFX10-NOT: .amdhsa_memory_ordered 0
2559 define amdgpu_kernel void @wavefront_release_monotonic(
2560 i32* %out, i32 %in, i32 %old) {
2561 entry:
2562 %gep = getelementptr i32, i32* %out, i32 4
2563 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") release monotonic
2564 ret void
2565 }
2566
2567 ; GCN-LABEL: {{^}}wavefront_acq_rel_monotonic:
2568 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2569 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2570 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2571 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2572 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2573 ; GFX8-NOT: buffer_wbinvl1_vol
2574 ; GFX10-NOT: buffer_gl{{[01]}}._inv
2575 ; GFX10: .amdhsa_kernel wavefront_acq_rel_monotonic
2576 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2577 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2578 ; GFX10-NOT: .amdhsa_memory_ordered 0
2579 define amdgpu_kernel void @wavefront_acq_rel_monotonic(
2580 i32* %out, i32 %in, i32 %old) {
2581 entry:
2582 %gep = getelementptr i32, i32* %out, i32 4
2583 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel monotonic
2584 ret void
2585 }
2586
2587 ; GCN-LABEL: {{^}}wavefront_seq_cst_monotonic:
2588 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2589 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2590 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2591 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2592 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2593 ; GFX8-NOT: buffer_wbinvl1_vol
2594 ; GFX10-NOT: buffer_gl{{[01]}}._inv
2595 ; GFX10: .amdhsa_kernel wavefront_seq_cst_monotonic
2596 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2597 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2598 ; GFX10-NOT: .amdhsa_memory_ordered 0
2599 define amdgpu_kernel void @wavefront_seq_cst_monotonic(
2600 i32* %out, i32 %in, i32 %old) {
2601 entry:
2602 %gep = getelementptr i32, i32* %out, i32 4
2603 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst monotonic
2604 ret void
2605 }
2606
2607 ; GCN-LABEL: {{^}}wavefront_acquire_acquire:
2608 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2609 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2610 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2611 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2612 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2613 ; GFX8-NOT: buffer_wbinvl1_vol
2614 ; GFX10-NOT: buffer_gl{{[01]}}._inv
2615 ; GFX10: .amdhsa_kernel wavefront_acquire_acquire
2616 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2617 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2618 ; GFX10-NOT: .amdhsa_memory_ordered 0
2619 define amdgpu_kernel void @wavefront_acquire_acquire(
2620 i32* %out, i32 %in, i32 %old) {
2621 entry:
2622 %gep = getelementptr i32, i32* %out, i32 4
2623 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") acquire acquire
2624 ret void
2625 }
2626
2627 ; GCN-LABEL: {{^}}wavefront_release_acquire:
2628 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2629 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2630 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2631 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2632 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2633 ; GFX8-NOT: buffer_wbinvl1_vol
2634 ; GFX10-NOT: buffer_gl{{[01]}}._inv
2635 ; GFX10: .amdhsa_kernel wavefront_release_acquire
2636 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2637 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2638 ; GFX10-NOT: .amdhsa_memory_ordered 0
2639 define amdgpu_kernel void @wavefront_release_acquire(
2640 i32* %out, i32 %in, i32 %old) {
2641 entry:
2642 %gep = getelementptr i32, i32* %out, i32 4
2643 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") release acquire
2644 ret void
2645 }
2646
2647 ; GCN-LABEL: {{^}}wavefront_acq_rel_acquire:
2648 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2649 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2650 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2651 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2652 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2653 ; GFX8-NOT: buffer_wbinvl1_vol
2654 ; GFX10-NOT: buffer_gl{{[01]}}._inv
2655 ; GFX10: .amdhsa_kernel wavefront_acq_rel_acquire
2656 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2657 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2658 ; GFX10-NOT: .amdhsa_memory_ordered 0
2659 define amdgpu_kernel void @wavefront_acq_rel_acquire(
2660 i32* %out, i32 %in, i32 %old) {
2661 entry:
2662 %gep = getelementptr i32, i32* %out, i32 4
2663 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel acquire
2664 ret void
2665 }
2666
2667 ; GCN-LABEL: {{^}}wavefront_seq_cst_acquire:
2668 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2669 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2670 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2671 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2672 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2673 ; GFX8-NOT: buffer_wbinvl1_vol
2674 ; GFX10-NOT: buffer_gl{{[01]}}._inv
2675 ; GFX10: .amdhsa_kernel wavefront_seq_cst_acquire
2676 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2677 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2678 ; GFX10-NOT: .amdhsa_memory_ordered 0
2679 define amdgpu_kernel void @wavefront_seq_cst_acquire(
2680 i32* %out, i32 %in, i32 %old) {
2681 entry:
2682 %gep = getelementptr i32, i32* %out, i32 4
2683 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst acquire
2684 ret void
2685 }
2686
2687 ; GCN-LABEL: {{^}}wavefront_seq_cst_seq_cst:
2688 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2689 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2690 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
2691 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
2692 ; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
2693 ; GFX8-NOT: buffer_wbinvl1_vol
2694 ; GFX10-NOT: buffer_gl{{[01]}}._inv
2695 ; GFX10: .amdhsa_kernel wavefront_seq_cst_seq_cst
2696 ; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
2697 ; GFX10CU: .amdhsa_workgroup_processor_mode 0
2698 ; GFX10-NOT: .amdhsa_memory_ordered 0
2699 define amdgpu_kernel void @wavefront_seq_cst_seq_cst(
2700 i32* %out, i32 %in, i32 %old) {
2701 entry:
2702 %gep = getelementptr i32, i32* %out, i32 4
2703 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst seq_cst
2704 ret void
2705 }
2706
2707 ; GCN-LABEL: {{^}}system_acquire_monotonic_ret:
9272708 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
928 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
929 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
930 ; GFX8-NOT: buffer_wbinvl1_vol
931 define amdgpu_kernel void @workgroup_one_as_acquire_monotonic(
932 i32* %out, i32 %in, i32 %old) {
933 entry: