llvm.org GIT mirror llvm / abf1684
[AMDGPU] Assembler: Swap operands of flat_store instructions to match AMD assembler Historically, AMD internal sp3 assembler has flat_store* addr, data format. To match existing code and to enable reuse, change LLVM definitions to match. Also update MC and CodeGen tests. Differential Revision: http://reviews.llvm.org/D16927 Patch by: Nikolay Haustov git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@260694 91177308-0d34-0410-b5e6-96231b3b80d8 Tom Stellard 4 years ago
16 changed file(s) with 114 addition(s) and 114 deletion(s). Raw diff Collapse all Expand all
296296
297297 class FlatStorePat : Pat <
298298 (node vt:$data, i64:$addr),
299 (inst $data, $addr, 0, 0, 0)
299 (inst $addr, $data, 0, 0, 0)
300300 >;
301301
302302 def : FlatStorePat ;
26512651 multiclass FLAT_Store_Helper
26522652 RegisterClass vdataClass,
26532653 dag outs = (outs),
2654 dag ins = (ins vdataClass:$data, VReg_64:$addr, glc_flat:$glc,
2654 dag ins = (ins VReg_64:$addr, vdataClass:$data, glc_flat:$glc,
26552655 slc_flat:$slc, tfe_flat:$tfe),
2656 string asm = asm_name#" $data, $addr"#"$glc"#"$slc"#"$tfe"> {
2656 string asm = asm_name#" $addr, $data"#"$glc"#"$slc"#"$tfe"> {
26572657
26582658 let mayLoad = 0, mayStore = 1, vdst = 0 in {
26592659
119119 ; SI-DAG: v_mov_b32_e32 [[VFFBH_HI:v[0-9]+]], [[FFBH_HI]]
120120 ; SI-DAG: v_cndmask_b32_e32 v[[CTLZ:[0-9]+]], [[VFFBH_HI]], [[VFFBH_LO]]
121121 ; SI-DAG: v_mov_b32_e32 v[[CTLZ_HI:[0-9]+]], 0{{$}}
122 ; SI: {{buffer|flat}}_store_dwordx2 v{{\[}}[[CTLZ]]:[[CTLZ_HI]]{{\]}}
122 ; SI: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[CTLZ]]:[[CTLZ_HI]]{{\]}}
123123 define void @s_ctlz_i64(i64 addrspace(1)* noalias %out, i64 %val) nounwind {
124124 %ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 false)
125125 store i64 %ctlz, i64 addrspace(1)* %out
145145 ; SI-DAG: v_cmp_eq_i32_e32 vcc, 0, [[OR]]
146146 ; SI-DAG: v_cndmask_b32_e64 v[[CLTZ_LO:[0-9]+]], v[[CTLZ:[0-9]+]], 64, vcc
147147 ; SI-DAG: v_mov_b32_e32 v[[CTLZ_HI:[0-9]+]], 0{{$}}
148 ; SI: {{buffer|flat}}_store_dwordx2 v{{\[}}[[CLTZ_LO]]:[[CTLZ_HI]]{{\]}}
148 ; SI: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[CLTZ_LO]]:[[CTLZ_HI]]{{\]}}
149149 define void @v_ctlz_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
150150 %tid = call i32 @llvm.r600.read.tidig.x()
151151 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
122122 ; SI-DAG: v_ffbh_u32_e32 [[FFBH_HI:v[0-9]+]], v[[HI]]
123123 ; SI-DAG: v_cndmask_b32_e64 v[[CTLZ:[0-9]+]], [[FFBH_HI]], [[FFBH_LO]]
124124 ; SI-DAG: v_mov_b32_e32 v[[CTLZ_HI:[0-9]+]], 0{{$}}
125 ; SI: {{buffer|flat}}_store_dwordx2 v{{\[}}[[CTLZ]]:[[CTLZ_HI]]{{\]}}
125 ; SI: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[CTLZ]]:[[CTLZ_HI]]{{\]}}
126126 define void @v_ctlz_zero_undef_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
127127 %tid = call i32 @llvm.r600.read.tidig.x()
128128 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
1616 ; CHECK-DAG: v_mov_b32_e32 v[[DATA:[0-9]+]], s[[SDATA]]
1717 ; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG]]
1818 ; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], s[[HI_SREG]]
19 ; CHECK: flat_store_dword v[[DATA]], v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
19 ; CHECK: flat_store_dword v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}, v[[DATA]]
2020 define void @store_flat_i32(i32 addrspace(1)* %gptr, i32 %x) #0 {
2121 %fptr = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)*
2222 store i32 %x, i32 addrspace(4)* %fptr, align 4
5050 ; On VI+ we also need to set MTYPE = 2
5151 ; HSA-VI: s_mov_b32 s[[HI:[0-9]]], 0x1100f000
5252 ; Make sure we generate flat store for HSA
53 ; HSA: flat_store_dword v{{[0-9]+}}
53 ; HSA: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}
5454
5555 ; HSA: .Lfunc_end0:
5656 ; HSA: .size simple, .Lfunc_end0-simple
2424 ; HSA: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s6{{$}}
2525
2626 ; ALL-NOT: [[VCOPY]]
27 ; ALL: {{buffer|flat}}_store_dword [[VCOPY]]
27 ; ALL: {{buffer|flat}}_store_dword {{.*}}[[VCOPY]]
2828
2929 ; HSA: COMPUTE_PGM_RSRC2:USER_SGPR: 6
3030 ; ALL-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
5252 ; HSA: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s7{{$}}
5353
5454 ; ALL-NOT: [[VCOPY]]
55 ; ALL: {{buffer|flat}}_store_dword [[VCOPY]]
55 ; ALL: {{buffer|flat}}_store_dword {{.*}}[[VCOPY]]
5656
5757 ; HSA: COMPUTE_PGM_RSRC2:USER_SGPR: 6
5858 ; ALL-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
8888 ; HSA: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s7{{$}}
8989
9090 ; ALL-NOT: [[VCOPY]]
91 ; ALL: {{buffer|flat}}_store_dword [[VCOPY]]
91 ; ALL: {{buffer|flat}}_store_dword {{.*}}[[VCOPY]]
9292
9393 ; HSA: COMPUTE_PGM_RSRC2:USER_SGPR: 6
9494 ; ALL-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
1414 ; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 0
1515
1616 ; ALL-NOT: v0
17 ; ALL: {{buffer|flat}}_store_dword v0
17 ; ALL: {{buffer|flat}}_store_dword {{.*}}v0
1818 define void @test_workitem_id_x(i32 addrspace(1)* %out) #1 {
1919 %id = call i32 @llvm.amdgcn.workitem.id.x()
2020 store i32 %id, i32 addrspace(1)* %out
2929 ; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 1
3030
3131 ; ALL-NOT: v1
32 ; ALL: {{buffer|flat}}_store_dword v1
32 ; ALL: {{buffer|flat}}_store_dword {{.*}}v1
3333 define void @test_workitem_id_y(i32 addrspace(1)* %out) #1 {
3434 %id = call i32 @llvm.amdgcn.workitem.id.y()
3535 store i32 %id, i32 addrspace(1)* %out
4444 ; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 2
4545
4646 ; ALL-NOT: v2
47 ; ALL: {{buffer|flat}}_store_dword v2
47 ; ALL: {{buffer|flat}}_store_dword {{.*}}v2
4848 define void @test_workitem_id_z(i32 addrspace(1)* %out) #1 {
4949 %id = call i32 @llvm.amdgcn.workitem.id.z()
5050 store i32 %id, i32 addrspace(1)* %out
33 ; and can be eliminated
44 ; CHECK-LABEL: {{^}}test_workitem_id_x_known_max_range:
55 ; CHECK-NOT: v0
6 ; CHECK: {{flat|buffer}}_store_dword v0
6 ; CHECK: {{flat|buffer}}_store_dword {{.*}}v0
77 define void @test_workitem_id_x_known_max_range(i32 addrspace(1)* nocapture %out) #0 {
88 entry:
99 %id = tail call i32 @llvm.amdgcn.workitem.id.x(), !range !0
1414
1515 ; CHECK-LABEL: {{^}}test_workitem_id_x_known_trunc_1_bit_range:
1616 ; CHECK: v_and_b32_e32 [[MASKED:v[0-9]+]], 0x1ff, v0
17 ; CHECK: {{flat|buffer}}_store_dword [[MASKED]]
17 ; CHECK: {{flat|buffer}}_store_dword {{.*}}[[MASKED]]
1818 define void @test_workitem_id_x_known_trunc_1_bit_range(i32 addrspace(1)* nocapture %out) #0 {
1919 entry:
2020 %id = tail call i32 @llvm.amdgcn.workitem.id.x(), !range !0
2626 ; CHECK-LABEL: {{^}}test_workitem_id_x_known_max_range_m1:
2727 ; CHECK-NOT: v0
2828 ; CHECK: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xff, v0
29 ; CHECK: {{flat|buffer}}_store_dword [[MASKED]]
29 ; CHECK: {{flat|buffer}}_store_dword {{.*}}[[MASKED]]
3030 define void @test_workitem_id_x_known_max_range_m1(i32 addrspace(1)* nocapture %out) #0 {
3131 entry:
3232 %id = tail call i32 @llvm.amdgcn.workitem.id.x(), !range !1
5555 ; FIXME: We should be using flat load for HSA.
5656 ; GCN: buffer_load_dword [[OUT:v[0-9]+]]
5757 ; GCN-NOHSA: buffer_store_dword [[OUT]]
58 ; GCN-HSA: flat_store_dword [[OUT]]
58 ; GCN-HSA: flat_store_dword {{.*}}, [[OUT]]
5959 define void @smrd_valu(i32 addrspace(2)* addrspace(1)* %in, i32 %a, i32 %b, i32 addrspace(1)* %out) #1 {
6060 entry:
6161 %tmp = icmp ne i32 %a, 0
103103 ; GCN-NOHSA: v_add_i32_e32
104104 ; GCN-NOHSA: buffer_store_dword
105105 ; GCN-HSA: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
106 ; GCN-HSA: flat_store_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
106 ; GCN-HSA: flat_store_dword v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}
107107 define void @smrd_valu_ci_offset(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %c) #1 {
108108 entry:
109109 %tmp = call i32 @llvm.amdgcn.workitem.id.x()
247247 ; GCN-HSA: flat_load_dword [[MOVED:v[0-9]+]], v[{{[0-9+:[0-9]+}}]
248248 ; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, s{{[0-9]+}}, [[MOVED]]
249249 ; GCN-NOHSA: buffer_store_dword [[ADD]]
250 ; GCN-HSA: flat_store_dword [[ADD]]
250 ; GCN-HSA: flat_store_dword {{.*}}, [[ADD]]
251251 define void @smrd_valu2_salu_user(i32 addrspace(1)* %out, [8 x i32] addrspace(2)* %in, i32 %a) #1 {
252252 entry:
253253 %tmp = call i32 @llvm.amdgcn.workitem.id.x()
2525
2626 ; GCN: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
2727 ; GCN: v_cndmask_b32_e32 [[SIGN_SEL:v[0-9]+]],
28 ; GCN: {{buffer|flat}}_store_dword [[SIGN_SEL]]
28 ; GCN: {{buffer|flat}}_store_dword {{.*}}[[SIGN_SEL]]
2929 define void @v_sint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 addrspace(1)* %in) #0 {
3030 %tid = call i32 @llvm.amdgcn.workitem.id.x()
3131 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
102102 ; SI: v_and_b32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}}
103103 ; SI: v_cmp_eq_i32
104104 ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1.0
105 ; SI: {{buffer|flat}}_store_dword [[RESULT]],
105 ; SI: {{buffer|flat}}_store_dword {{.*}}[[RESULT]]
106106 ; SI: s_endpgm
107107 define void @v_sint_to_fp_i1_f32_load(float addrspace(1)* %out, i1 addrspace(1)* %in) #0 {
108108 %tid = call i32 @llvm.r600.read.tidig.x()
215215 ; SI: buffer_load_dword v[[HI:[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
216216 ; VI: flat_load_dword v[[HI:[0-9]+]]
217217 ; GCN: v_ashrrev_i32_e32 v[[SHIFT:[0-9]+]], 31, v[[HI]]
218 ; GCN: {{buffer|flat}}_store_dwordx2 v{{\[}}[[HI]]:[[SHIFT]]{{\]}}
218 ; GCN: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[HI]]:[[SHIFT]]{{\]}}
219219 define void @v_ashr_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
220220 %tid = call i32 @llvm.r600.read.tidig.x() #0
221221 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
244244 ; VI: flat_load_dword v[[HI:[0-9]+]]
245245 ; GCN: v_ashrrev_i32_e32 v[[SHIFT:[0-9]+]], 31, v[[HI]]
246246 ; GCN: v_mov_b32_e32 v[[COPY:[0-9]+]], v[[SHIFT]]
247 ; GCN: {{buffer|flat}}_store_dwordx2 v{{\[}}[[SHIFT]]:[[COPY]]{{\]}}
247 ; GCN: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[SHIFT]]:[[COPY]]{{\]}}
248248 define void @v_ashr_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
249249 %tid = call i32 @llvm.r600.read.tidig.x() #0
250250 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
2121 ; GCN-DAG: v_cmp_lt_u64
2222
2323 ; GCN: v_add_i32_e32 [[VR:v[0-9]+]]
24 ; GCN: {{buffer|flat}}_store_dword [[VR]]
24 ; GCN: {{buffer|flat}}_store_dword {{.*}}[[VR]]
2525 define void @v_uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 addrspace(1)* %in) #0 {
2626 %tid = call i32 @llvm.r600.read.tidig.x()
2727 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
102102 ; SI: v_and_b32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}}
103103 ; SI: v_cmp_eq_i32
104104 ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1.0
105 ; SI: {{buffer|flat}}_store_dword [[RESULT]],
105 ; SI: {{buffer|flat}}_store_dword {{.*}}[[RESULT]]
106106 ; SI: s_endpgm
107107 define void @v_uint_to_fp_i1_f32_load(float addrspace(1)* %out, i1 addrspace(1)* %in) #0 {
108108 %tid = call i32 @llvm.r600.read.tidig.x()
9494 // CI: flat_load_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x33,0xdc,0x03,0x00,0x80,0x01]
9595 // VI: flat_load_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x53,0xdc,0x03,0x00,0x80,0x01]
9696
97 flat_store_dword v1, v[3:4]
98 // NOSI: error:
99 // CIVI: flat_store_dword v1, v[3:4] ; encoding: [0x00,0x00,0x70,0xdc,0x03,0x01,0x00,0x00]
100
101 flat_store_dword v1, v[3:4] glc
102 // NOSI: error:
103 // CIVI: flat_store_dword v1, v[3:4] glc ; encoding: [0x00,0x00,0x71,0xdc,0x03,0x01,0x00,0x00]
104
105 flat_store_dword v1, v[3:4] glc slc
106 // NOSI: error:
107 // CIVI: flat_store_dword v1, v[3:4] glc slc ; encoding: [0x00,0x00,0x73,0xdc,0x03,0x01,0x00,0x00]
108
109 flat_store_dword v1, v[3:4] glc tfe
110 // NOSI: error:
111 // CIVI: flat_store_dword v1, v[3:4] glc tfe ; encoding: [0x00,0x00,0x71,0xdc,0x03,0x01,0x80,0x00]
112
113 flat_store_dword v1, v[3:4] glc slc tfe
114 // NOSI: error:
115 // CIVI: flat_store_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x73,0xdc,0x03,0x01,0x80,0x00]
116
117 flat_store_dword v1, v[3:4] glc tfe slc
118 // NOSI: error:
119 // CIVI: flat_store_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x73,0xdc,0x03,0x01,0x80,0x00]
120
121 flat_store_dword v1, v[3:4] slc
122 // NOSI: error:
123 // CIVI: flat_store_dword v1, v[3:4] slc ; encoding: [0x00,0x00,0x72,0xdc,0x03,0x01,0x00,0x00]
124
125 flat_store_dword v1, v[3:4] slc glc
126 // NOSI: error:
127 // CIVI: flat_store_dword v1, v[3:4] glc slc ; encoding: [0x00,0x00,0x73,0xdc,0x03,0x01,0x00,0x00]
128
129 flat_store_dword v1, v[3:4] slc tfe
130 // NOSI: error:
131 // CIVI: flat_store_dword v1, v[3:4] slc tfe ; encoding: [0x00,0x00,0x72,0xdc,0x03,0x01,0x80,0x00]
132
133 flat_store_dword v1, v[3:4] slc glc tfe
134 // NOSI: error:
135 // CIVI: flat_store_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x73,0xdc,0x03,0x01,0x80,0x00]
136
137 flat_store_dword v1, v[3:4] slc tfe glc
138 // NOSI: error:
139 // CIVI: flat_store_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x73,0xdc,0x03,0x01,0x80,0x00]
140
141 flat_store_dword v1, v[3:4] tfe
142 // NOSI: error:
143 // CIVI: flat_store_dword v1, v[3:4] tfe ; encoding: [0x00,0x00,0x70,0xdc,0x03,0x01,0x80,0x00]
144
145 flat_store_dword v1, v[3:4] tfe glc
146 // NOSI: error:
147 // CIVI: flat_store_dword v1, v[3:4] glc tfe ; encoding: [0x00,0x00,0x71,0xdc,0x03,0x01,0x80,0x00]
148
149 flat_store_dword v1, v[3:4] tfe slc
150 // NOSI: error:
151 // CIVI: flat_store_dword v1, v[3:4] slc tfe ; encoding: [0x00,0x00,0x72,0xdc,0x03,0x01,0x80,0x00]
152
153 flat_store_dword v1, v[3:4] tfe glc slc
154 // NOSI: error:
155 // CIVI: flat_store_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x73,0xdc,0x03,0x01,0x80,0x00]
156
157 flat_store_dword v1, v[3:4] tfe slc glc
158 // NOSI: error:
159 // CIVI: flat_store_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x73,0xdc,0x03,0x01,0x80,0x00]
97 flat_store_dword v[3:4], v1
98 // NOSI: error:
99 // CIVI: flat_store_dword v[3:4], v1 ; encoding: [0x00,0x00,0x70,0xdc,0x03,0x01,0x00,0x00]
100
101 flat_store_dword v[3:4], v1 glc
102 // NOSI: error:
103 // CIVI: flat_store_dword v[3:4], v1 glc ; encoding: [0x00,0x00,0x71,0xdc,0x03,0x01,0x00,0x00]
104
105 flat_store_dword v[3:4], v1 glc slc
106 // NOSI: error:
107 // CIVI: flat_store_dword v[3:4], v1 glc slc ; encoding: [0x00,0x00,0x73,0xdc,0x03,0x01,0x00,0x00]
108
109 flat_store_dword v[3:4], v1 glc tfe
110 // NOSI: error:
111 // CIVI: flat_store_dword v[3:4], v1 glc tfe ; encoding: [0x00,0x00,0x71,0xdc,0x03,0x01,0x80,0x00]
112
113 flat_store_dword v[3:4], v1 glc slc tfe
114 // NOSI: error:
115 // CIVI: flat_store_dword v[3:4], v1 glc slc tfe ; encoding: [0x00,0x00,0x73,0xdc,0x03,0x01,0x80,0x00]
116
117 flat_store_dword v[3:4], v1 glc tfe slc
118 // NOSI: error:
119 // CIVI: flat_store_dword v[3:4], v1 glc slc tfe ; encoding: [0x00,0x00,0x73,0xdc,0x03,0x01,0x80,0x00]
120
121 flat_store_dword v[3:4], v1 slc
122 // NOSI: error:
123 // CIVI: flat_store_dword v[3:4], v1 slc ; encoding: [0x00,0x00,0x72,0xdc,0x03,0x01,0x00,0x00]
124
125 flat_store_dword v[3:4], v1 slc glc
126 // NOSI: error:
127 // CIVI: flat_store_dword v[3:4], v1 glc slc ; encoding: [0x00,0x00,0x73,0xdc,0x03,0x01,0x00,0x00]
128
129 flat_store_dword v[3:4], v1 slc tfe
130 // NOSI: error:
131 // CIVI: flat_store_dword v[3:4], v1 slc tfe ; encoding: [0x00,0x00,0x72,0xdc,0x03,0x01,0x80,0x00]
132
133 flat_store_dword v[3:4], v1 slc glc tfe
134 // NOSI: error:
135 // CIVI: flat_store_dword v[3:4], v1 glc slc tfe ; encoding: [0x00,0x00,0x73,0xdc,0x03,0x01,0x80,0x00]
136
137 flat_store_dword v[3:4], v1 slc tfe glc
138 // NOSI: error:
139 // CIVI: flat_store_dword v[3:4], v1 glc slc tfe ; encoding: [0x00,0x00,0x73,0xdc,0x03,0x01,0x80,0x00]
140
141 flat_store_dword v[3:4], v1 tfe
142 // NOSI: error:
143 // CIVI: flat_store_dword v[3:4], v1 tfe ; encoding: [0x00,0x00,0x70,0xdc,0x03,0x01,0x80,0x00]
144
145 flat_store_dword v[3:4], v1 tfe glc
146 // NOSI: error:
147 // CIVI: flat_store_dword v[3:4], v1 glc tfe ; encoding: [0x00,0x00,0x71,0xdc,0x03,0x01,0x80,0x00]
148
149 flat_store_dword v[3:4], v1 tfe slc
150 // NOSI: error:
151 // CIVI: flat_store_dword v[3:4], v1 slc tfe ; encoding: [0x00,0x00,0x72,0xdc,0x03,0x01,0x80,0x00]
152
153 flat_store_dword v[3:4], v1 tfe glc slc
154 // NOSI: error:
155 // CIVI: flat_store_dword v[3:4], v1 glc slc tfe ; encoding: [0x00,0x00,0x73,0xdc,0x03,0x01,0x80,0x00]
156
157 flat_store_dword v[3:4], v1 tfe slc glc
158 // NOSI: error:
159 // CIVI: flat_store_dword v[3:4], v1 glc slc tfe ; encoding: [0x00,0x00,0x73,0xdc,0x03,0x01,0x80,0x00]
160160
161161 // FIXME: For atomic instructions, glc must be placed immediately following
162162 // the data regiser. These forms aren't currently supported:
247247 // CI: flat_load_dwordx3 v[5:7], v[3:4] ; encoding: [0x00,0x00,0x3c,0xdc,0x03,0x00,0x00,0x05]
248248 // VI: flat_load_dwordx3 v[5:7], v[3:4] ; encoding: [0x00,0x00,0x58,0xdc,0x03,0x00,0x00,0x05]
249249
250 flat_store_byte v1, v[3:4]
251 // NOSI: error:
252 // CIVI: flat_store_byte v1, v[3:4] ; encoding: [0x00,0x00,0x60,0xdc,0x03,0x01,0x00,0x00]
253
254 flat_store_short v1, v[3:4]
255 // NOSI: error:
256 // CIVI: flat_store_short v1, v[3:4] ; encoding: [0x00,0x00,0x68,0xdc,0x03,0x01,0x00,0x00]
257
258 flat_store_dword v1, v[3:4]
259 // NOSI: error:
260 // CIVI: flat_store_dword v1, v[3:4] ; encoding: [0x00,0x00,0x70,0xdc,0x03,0x01,0x00,0x00]
261
262 flat_store_dwordx2 v[1:2], v[3:4]
263 // NOSI: error:
264 // CIVI: flat_store_dwordx2 v[1:2], v[3:4] ; encoding: [0x00,0x00,0x74,0xdc,0x03,0x01,0x00,0x00]
265
266 flat_store_dwordx4 v[5:8], v[3:4]
267 // NOSI: error:
268 // CI: flat_store_dwordx4 v[5:8], v[3:4] ; encoding: [0x00,0x00,0x78,0xdc,0x03,0x05,0x00,0x00]
269 // VI: flat_store_dwordx4 v[5:8], v[3:4] ; encoding: [0x00,0x00,0x7c,0xdc,0x03,0x05,0x00,0x00]
270
271 flat_store_dwordx3 v[5:7], v[3:4]
272 // NOSI: error:
273 // CI: flat_store_dwordx3 v[5:7], v[3:4] ; encoding: [0x00,0x00,0x7c,0xdc,0x03,0x05,0x00,0x00]
274 // VI: flat_store_dwordx3 v[5:7], v[3:4] ; encoding: [0x00,0x00,0x78,0xdc,0x03,0x05,0x00,0x00]
250 flat_store_byte v[3:4], v1
251 // NOSI: error:
252 // CIVI: flat_store_byte v[3:4], v1 ; encoding: [0x00,0x00,0x60,0xdc,0x03,0x01,0x00,0x00]
253
254 flat_store_short v[3:4], v1
255 // NOSI: error:
256 // CIVI: flat_store_short v[3:4], v1 ; encoding: [0x00,0x00,0x68,0xdc,0x03,0x01,0x00,0x00]
257
258 flat_store_dword v[3:4], v1
259 // NOSI: error:
260 // CIVI: flat_store_dword v[3:4], v1 ; encoding: [0x00,0x00,0x70,0xdc,0x03,0x01,0x00,0x00]
261
262 flat_store_dwordx2 v[3:4], v[1:2]
263 // NOSI: error:
264 // CIVI: flat_store_dwordx2 v[3:4], v[1:2] ; encoding: [0x00,0x00,0x74,0xdc,0x03,0x01,0x00,0x00]
265
266 flat_store_dwordx4 v[3:4], v[5:8]
267 // NOSI: error:
268 // CI: flat_store_dwordx4 v[3:4], v[5:8] ; encoding: [0x00,0x00,0x78,0xdc,0x03,0x05,0x00,0x00]
269 // VI: flat_store_dwordx4 v[3:4], v[5:8] ; encoding: [0x00,0x00,0x7c,0xdc,0x03,0x05,0x00,0x00]
270
271 flat_store_dwordx3 v[3:4], v[5:7]
272 // NOSI: error:
273 // CI: flat_store_dwordx3 v[3:4], v[5:7] ; encoding: [0x00,0x00,0x7c,0xdc,0x03,0x05,0x00,0x00]
274 // VI: flat_store_dwordx3 v[3:4], v[5:7] ; encoding: [0x00,0x00,0x78,0xdc,0x03,0x05,0x00,0x00]
275275
276276 flat_atomic_swap v[3:4], v5
277277 // NOSI: error: