llvm.org GIT mirror llvm / 89e4328
R600/SI: Remove M0 from DS assembly strings This matches the assembly syntax for the proprietary compiler. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@230645 91177308-0d34-0410-b5e6-96231b3b80d8 Tom Stellard 4 years ago
10 changed file(s) with 72 addition(s) and 72 deletion(s). Raw diff Collapse all Expand all
15221522 asm,
15231523 (outs regClass:$vdst),
15241524 (ins i1imm:$gds, VGPR_32:$addr, ds_offset:$offset, M0Reg:$m0),
1525 asm#" $vdst, $addr"#"$offset"#" [M0]",
1525 asm#" $vdst, $addr"#"$offset",
15261526 []>;
15271527
15281528 multiclass DS_Load2_m op, string opName, dag outs, dag ins, string asm,
15441544 (outs regClass:$vdst),
15451545 (ins i1imm:$gds, VGPR_32:$addr, ds_offset0:$offset0, ds_offset1:$offset1,
15461546 M0Reg:$m0),
1547 asm#" $vdst, $addr"#"$offset0"#"$offset1 [M0]",
1547 asm#" $vdst, $addr"#"$offset0"#"$offset1",
15481548 []>;
15491549
15501550 multiclass DS_1A_Store_m op, string opName, dag outs, dag ins,
15651565 asm,
15661566 (outs),
15671567 (ins i1imm:$gds, VGPR_32:$addr, regClass:$data0, ds_offset:$offset, M0Reg:$m0),
1568 asm#" $addr, $data0"#"$offset"#" [M0]",
1568 asm#" $addr, $data0"#"$offset",
15691569 []>;
15701570
15711571 multiclass DS_Store_m op, string opName, dag outs, dag ins,
15871587 (outs),
15881588 (ins i1imm:$gds, VGPR_32:$addr, regClass:$data0, regClass:$data1,
15891589 ds_offset0:$offset0, ds_offset1:$offset1, M0Reg:$m0),
1590 asm#" $addr, $data0, $data1"#"$offset0"#"$offset1 [M0]",
1590 asm#" $addr, $data0, $data1"#"$offset0"#"$offset1",
15911591 []>;
15921592
15931593 // 1 address, 1 data.
16111611 op, asm,
16121612 (outs rc:$vdst),
16131613 (ins i1imm:$gds, VGPR_32:$addr, rc:$data0, ds_offset:$offset, M0Reg:$m0),
1614 asm#" $vdst, $addr, $data0"#"$offset"#" [M0]", [], noRetOp>;
1614 asm#" $vdst, $addr, $data0"#"$offset", [], noRetOp>;
16151615
16161616 // 1 address, 2 data.
16171617 multiclass DS_1A2D_RET_m op, string opName, dag outs, dag ins,
16321632 op, asm,
16331633 (outs rc:$vdst),
16341634 (ins i1imm:$gds, VGPR_32:$addr, rc:$data0, rc:$data1, ds_offset:$offset, M0Reg:$m0),
1635 asm#" $vdst, $addr, $data0, $data1"#"$offset"#" [M0]",
1635 asm#" $vdst, $addr, $data0, $data1"#"$offset",
16361636 [], noRetOp>;
16371637
16381638 // 1 address, 2 data.
16541654 op, asm,
16551655 (outs),
16561656 (ins i1imm:$gds, VGPR_32:$addr, rc:$data0, rc:$data1, ds_offset:$offset, M0Reg:$m0),
1657 asm#" $addr, $data0, $data1"#"$offset"#" [M0]",
1657 asm#" $addr, $data0, $data1"#"$offset",
16581658 [], noRetOp>;
16591659
16601660 // 1 address, 1 data.
16761676 op, asm,
16771677 (outs),
16781678 (ins i1imm:$gds, VGPR_32:$addr, rc:$data0, ds_offset:$offset, M0Reg:$m0),
1679 asm#" $addr, $data0"#"$offset"#" [M0]",
1679 asm#" $addr, $data0"#"$offset",
16801680 [], noRetOp>;
16811681
16821682 //===----------------------------------------------------------------------===//
130130 ; FUNC-LABEL: {{^}}local_address_gep_large_const_offset_store:
131131 ; SI: s_add_i32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 0x10004
132132 ; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
133 ; SI: ds_write_b32 [[VPTR]], v{{[0-9]+}} [M0]{{$}}
133 ; SI: ds_write_b32 [[VPTR]], v{{[0-9]+$}}
134134 define void @local_address_gep_large_const_offset_store(i32 addrspace(3)* %out, i32 %val) {
135135 %gep = getelementptr i32 addrspace(3)* %out, i32 16385
136136 store i32 %val, i32 addrspace(3)* %gep, align 4
99 ; VI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
1010 ; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
1111 ; GCN-DAG: v_mov_b32_e32 [[VSWAP:v[0-9]+]], [[SWAP]]
12 ; GCN: ds_cmpst_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[VCMP]], [[VSWAP]] offset:16 [M0]
12 ; GCN: ds_cmpst_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[VCMP]], [[VSWAP]] offset:16
1313 ; GCN: s_endpgm
1414 define void @lds_atomic_cmpxchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %swap) nounwind {
1515 %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
2929 ; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
3030 ; GCN-DAG: v_mov_b32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]]
3131 ; GCN-DAG: v_mov_b32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]]
32 ; GCN: ds_cmpst_rtn_b64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}} offset:32 [M0]
32 ; GCN: ds_cmpst_rtn_b64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}} offset:32
3333 ; GCN: buffer_store_dwordx2 [[RESULT]],
3434 ; GCN: s_endpgm
3535 define void @lds_atomic_cmpxchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr, i64 %swap) nounwind {
4242
4343 ; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i32_bad_si_offset
4444 ; SI: ds_cmpst_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
45 ; CIVI: ds_cmpst_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 [M0]
45 ; CIVI: ds_cmpst_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
4646 ; GCN: s_endpgm
4747 define void @lds_atomic_cmpxchg_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %swap, i32 %a, i32 %b) nounwind {
4848 %sub = sub i32 %a, %b
6262 ; GCN-DAG: v_mov_b32_e32 [[VCMP:v[0-9]+]], 7
6363 ; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
6464 ; GCN-DAG: v_mov_b32_e32 [[VSWAP:v[0-9]+]], [[SWAP]]
65 ; GCN: ds_cmpst_b32 [[VPTR]], [[VCMP]], [[VSWAP]] offset:16 [M0]
65 ; GCN: ds_cmpst_b32 [[VPTR]], [[VCMP]], [[VSWAP]] offset:16
6666 ; GCN: s_endpgm
6767 define void @lds_atomic_cmpxchg_noret_i32_offset(i32 addrspace(3)* %ptr, i32 %swap) nounwind {
6868 %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
8181 ; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
8282 ; GCN-DAG: v_mov_b32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]]
8383 ; GCN-DAG: v_mov_b32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]]
84 ; GCN: ds_cmpst_b64 [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}} offset:32 [M0]
84 ; GCN: ds_cmpst_b64 [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}} offset:32
8585 ; GCN: s_endpgm
8686 define void @lds_atomic_cmpxchg_noret_i64_offset(i64 addrspace(3)* %ptr, i64 %swap) nounwind {
8787 %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
66 ; SI-LABEL: @simple_write2_one_val_f32
77 ; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]]
88 ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
9 ; SI: ds_write2_b32 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:8 [M0]
9 ; SI: ds_write2_b32 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:8
1010 ; SI: s_endpgm
1111 define void @simple_write2_one_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
1212 %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
2424 ; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
2525 ; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
2626 ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
27 ; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:8 [M0]
27 ; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:8
2828 ; SI: s_endpgm
2929 define void @simple_write2_two_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
3030 %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
8383 ; SI: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:{{[0-9]+\]}}
8484 ; SI: buffer_load_dwordx2 v{{\[[0-9]+}}:[[VAL1:[0-9]+]]{{\]}}
8585 ; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
86 ; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8 [M0]
86 ; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8
8787 ; SI: s_endpgm
8888 define void @simple_write2_two_val_subreg2_mixed_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 {
8989 %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
104104 ; SI-LABEL: @simple_write2_two_val_subreg2_f32
105105 ; SI-DAG: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
106106 ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
107 ; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8 [M0]
107 ; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8
108108 ; SI: s_endpgm
109109 define void @simple_write2_two_val_subreg2_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 {
110110 %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
123123 ; SI-LABEL: @simple_write2_two_val_subreg4_f32
124124 ; SI-DAG: buffer_load_dwordx4 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
125125 ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
126 ; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8 [M0]
126 ; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8
127127 ; SI: s_endpgm
128128 define void @simple_write2_two_val_subreg4_f32(float addrspace(1)* %C, <4 x float> addrspace(1)* %in) #0 {
129129 %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
143143 ; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
144144 ; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
145145 ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
146 ; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:255 [M0]
146 ; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:255
147147 ; SI: s_endpgm
148148 define void @simple_write2_two_val_max_offset_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
149149 %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
267267 ; SI-LABEL: @simple_write2_one_val_f64
268268 ; SI: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]],
269269 ; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
270 ; SI: ds_write2_b64 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:8 [M0]
270 ; SI: ds_write2_b64 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:8
271271 ; SI: s_endpgm
272272 define void @simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 {
273273 %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
284284 ; SI-LABEL: @misaligned_simple_write2_one_val_f64
285285 ; SI-DAG: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
286286 ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
287 ; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:1 [M0]
288 ; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:14 offset1:15 [M0]
287 ; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:1
288 ; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:14 offset1:15
289289 ; SI: s_endpgm
290290 define void @misaligned_simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
291291 %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
303303 ; SI-DAG: buffer_load_dwordx2 [[VAL0:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
304304 ; SI-DAG: buffer_load_dwordx2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8
305305 ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
306 ; SI: ds_write2_b64 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:8 [M0]
306 ; SI: ds_write2_b64 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:8
307307 ; SI: s_endpgm
308308 define void @simple_write2_two_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 {
309309 %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
66 ; SI-LABEL: @simple_write2st64_one_val_f32_0_1
77 ; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]]
88 ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
9 ; SI: ds_write2st64_b32 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:1 [M0]
9 ; SI: ds_write2st64_b32 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:1
1010 ; SI: s_endpgm
1111 define void @simple_write2st64_one_val_f32_0_1(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
1212 %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
2424 ; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
2525 ; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
2626 ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
27 ; SI: ds_write2st64_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:2 offset1:5 [M0]
27 ; SI: ds_write2st64_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:2 offset1:5
2828 ; SI: s_endpgm
2929 define void @simple_write2st64_two_val_f32_2_5(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
3030 %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
4545 ; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
4646 ; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
4747 ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
48 ; SI: ds_write2st64_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:255 [M0]
48 ; SI: ds_write2st64_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:255
4949 ; SI: s_endpgm
5050 define void @simple_write2st64_two_val_max_offset_f32(float addrspace(1)* %C, float addrspace(1)* %in, float addrspace(3)* %lds) #0 {
5151 %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
6565 ; SI-DAG: buffer_load_dwordx2 [[VAL0:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
6666 ; SI-DAG: buffer_load_dwordx2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8
6767 ; SI-DAG: v_add_i32_e32 [[VPTR:v[0-9]+]],
68 ; SI: ds_write2st64_b64 [[VPTR]], [[VAL0]], [[VAL1]] offset0:4 offset1:127 [M0]
68 ; SI: ds_write2st64_b64 [[VPTR]], [[VAL0]], [[VAL1]] offset0:4 offset1:127
6969 ; SI: s_endpgm
7070 define void @simple_write2st64_two_val_max_offset_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
7171 %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
22 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck --check-prefix=CI --check-prefix=BOTH %s
33
44 ; BOTH-LABEL: {{^}}local_i32_load
5 ; BOTH: ds_read_b32 [[REG:v[0-9]+]], v{{[0-9]+}} offset:28 [M0]
5 ; BOTH: ds_read_b32 [[REG:v[0-9]+]], v{{[0-9]+}} offset:28
66 ; BOTH: buffer_store_dword [[REG]],
77 define void @local_i32_load(i32 addrspace(1)* %out, i32 addrspace(3)* %in) nounwind {
88 %gep = getelementptr i32 addrspace(3)* %in, i32 7
1212 }
1313
1414 ; BOTH-LABEL: {{^}}local_i32_load_0_offset
15 ; BOTH: ds_read_b32 [[REG:v[0-9]+]], v{{[0-9]+}} [M0]
15 ; BOTH: ds_read_b32 [[REG:v[0-9]+]], v{{[0-9]+}}
1616 ; BOTH: buffer_store_dword [[REG]],
1717 define void @local_i32_load_0_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) nounwind {
1818 %val = load i32 addrspace(3)* %in, align 4
2222
2323 ; BOTH-LABEL: {{^}}local_i8_load_i16_max_offset:
2424 ; BOTH-NOT: ADD
25 ; BOTH: ds_read_u8 [[REG:v[0-9]+]], {{v[0-9]+}} offset:65535 [M0]
25 ; BOTH: ds_read_u8 [[REG:v[0-9]+]], {{v[0-9]+}} offset:65535
2626 ; BOTH: buffer_store_byte [[REG]],
2727 define void @local_i8_load_i16_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %in) nounwind {
2828 %gep = getelementptr i8 addrspace(3)* %in, i32 65535
3737 ; SI: s_or_b32 [[ADDR:s[0-9]+]], s{{[0-9]+}}, 0x10000
3838 ; CI: s_add_i32 [[ADDR:s[0-9]+]], s{{[0-9]+}}, 0x10000
3939 ; BOTH: v_mov_b32_e32 [[VREGADDR:v[0-9]+]], [[ADDR]]
40 ; BOTH: ds_read_u8 [[REG:v[0-9]+]], [[VREGADDR]] [M0]
40 ; BOTH: ds_read_u8 [[REG:v[0-9]+]], [[VREGADDR]]
4141 ; BOTH: buffer_store_byte [[REG]],
4242 define void @local_i8_load_over_i16_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %in) nounwind {
4343 %gep = getelementptr i8 addrspace(3)* %in, i32 65536
4848
4949 ; BOTH-LABEL: {{^}}local_i64_load:
5050 ; BOTH-NOT: ADD
51 ; BOTH: ds_read_b64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}} offset:56 [M0]
51 ; BOTH: ds_read_b64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}} offset:56
5252 ; BOTH: buffer_store_dwordx2 [[REG]],
5353 define void @local_i64_load(i64 addrspace(1)* %out, i64 addrspace(3)* %in) nounwind {
5454 %gep = getelementptr i64 addrspace(3)* %in, i32 7
5858 }
5959
6060 ; BOTH-LABEL: {{^}}local_i64_load_0_offset
61 ; BOTH: ds_read_b64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}} [M0]
61 ; BOTH: ds_read_b64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}
6262 ; BOTH: buffer_store_dwordx2 [[REG]],
6363 define void @local_i64_load_0_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %in) nounwind {
6464 %val = load i64 addrspace(3)* %in, align 8
6868
6969 ; BOTH-LABEL: {{^}}local_f64_load:
7070 ; BOTH-NOT: ADD
71 ; BOTH: ds_read_b64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}} offset:56 [M0]
71 ; BOTH: ds_read_b64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}} offset:56
7272 ; BOTH: buffer_store_dwordx2 [[REG]],
7373 define void @local_f64_load(double addrspace(1)* %out, double addrspace(3)* %in) nounwind {
7474 %gep = getelementptr double addrspace(3)* %in, i32 7
7878 }
7979
8080 ; BOTH-LABEL: {{^}}local_f64_load_0_offset
81 ; BOTH: ds_read_b64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}} [M0]
81 ; BOTH: ds_read_b64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}
8282 ; BOTH: buffer_store_dwordx2 [[REG]],
8383 define void @local_f64_load_0_offset(double addrspace(1)* %out, double addrspace(3)* %in) nounwind {
8484 %val = load double addrspace(3)* %in, align 8
8888
8989 ; BOTH-LABEL: {{^}}local_i64_store:
9090 ; BOTH-NOT: ADD
91 ; BOTH: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:56 [M0]
91 ; BOTH: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:56
9292 define void @local_i64_store(i64 addrspace(3)* %out) nounwind {
9393 %gep = getelementptr i64 addrspace(3)* %out, i32 7
9494 store i64 5678, i64 addrspace(3)* %gep, align 8
9797
9898 ; BOTH-LABEL: {{^}}local_i64_store_0_offset:
9999 ; BOTH-NOT: ADD
100 ; BOTH: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} [M0]
100 ; BOTH: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}
101101 define void @local_i64_store_0_offset(i64 addrspace(3)* %out) nounwind {
102102 store i64 1234, i64 addrspace(3)* %out, align 8
103103 ret void
105105
106106 ; BOTH-LABEL: {{^}}local_f64_store:
107107 ; BOTH-NOT: ADD
108 ; BOTH: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:56 [M0]
108 ; BOTH: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:56
109109 define void @local_f64_store(double addrspace(3)* %out) nounwind {
110110 %gep = getelementptr double addrspace(3)* %out, i32 7
111111 store double 16.0, double addrspace(3)* %gep, align 8
113113 }
114114
115115 ; BOTH-LABEL: {{^}}local_f64_store_0_offset
116 ; BOTH: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} [M0]
116 ; BOTH: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}
117117 define void @local_f64_store_0_offset(double addrspace(3)* %out) nounwind {
118118 store double 20.0, double addrspace(3)* %out, align 8
119119 ret void
121121
122122 ; BOTH-LABEL: {{^}}local_v2i64_store:
123123 ; BOTH-NOT: ADD
124 ; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:112 [M0]
125 ; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:120 [M0]
124 ; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:112
125 ; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:120
126126 ; BOTH: s_endpgm
127127 define void @local_v2i64_store(<2 x i64> addrspace(3)* %out) nounwind {
128128 %gep = getelementptr <2 x i64> addrspace(3)* %out, i32 7
132132
133133 ; BOTH-LABEL: {{^}}local_v2i64_store_0_offset:
134134 ; BOTH-NOT: ADD
135 ; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} [M0]
136 ; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:8 [M0]
135 ; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}
136 ; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:8
137137 ; BOTH: s_endpgm
138138 define void @local_v2i64_store_0_offset(<2 x i64> addrspace(3)* %out) nounwind {
139139 store <2 x i64> , <2 x i64> addrspace(3)* %out, align 16
142142
143143 ; BOTH-LABEL: {{^}}local_v4i64_store:
144144 ; BOTH-NOT: ADD
145 ; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:224 [M0]
146 ; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:232 [M0]
147 ; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:240 [M0]
148 ; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:248 [M0]
145 ; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:224
146 ; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:232
147 ; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:240
148 ; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:248
149149 ; BOTH: s_endpgm
150150 define void @local_v4i64_store(<4 x i64> addrspace(3)* %out) nounwind {
151151 %gep = getelementptr <4 x i64> addrspace(3)* %out, i32 7
155155
156156 ; BOTH-LABEL: {{^}}local_v4i64_store_0_offset:
157157 ; BOTH-NOT: ADD
158 ; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} [M0]
159 ; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:8 [M0]
160 ; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:16 [M0]
161 ; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:24 [M0]
158 ; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}
159 ; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:8
160 ; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:16
161 ; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:24
162162 ; BOTH: s_endpgm
163163 define void @local_v4i64_store_0_offset(<4 x i64> addrspace(3)* %out) nounwind {
164164 store <4 x i64> , <4 x i64> addrspace(3)* %out, align 16
77 ; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
88 ; GCN: s_load_dword [[SPTR:s[0-9]+]],
99 ; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
10 ; GCN: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0]
10 ; GCN: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]]
1111 ; GCN: buffer_store_dword [[RESULT]],
1212 ; GCN: s_endpgm
1313 define void @lds_atomic_xchg_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
3333 ; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
3434 ; GCN: s_load_dword [[SPTR:s[0-9]+]],
3535 ; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
36 ; GCN: ds_add_rtn_u32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0]
36 ; GCN: ds_add_rtn_u32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]]
3737 ; GCN: buffer_store_dword [[RESULT]],
3838 ; GCN: s_endpgm
3939 define void @lds_atomic_add_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
5555
5656 ; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32_bad_si_offset:
5757 ; EG: LDS_ADD_RET *
58 ; SI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} [M0]
58 ; SI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
5959 ; CIVI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
6060 ; GCN: s_endpgm
6161 define void @lds_atomic_add_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
7070 ; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i32:
7171 ; EG: LDS_ADD_RET *
7272 ; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
73 ; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] [M0]
73 ; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]]
7474 ; GCN: s_endpgm
7575 define void @lds_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
7676 %result = atomicrmw add i32 addrspace(3)* %ptr, i32 1 seq_cst
9292
9393 ; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i32_bad_si_offset:
9494 ; EG: LDS_ADD_RET *
95 ; SI: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} [M0]
95 ; SI: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
9696 ; CIVI: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
9797 ; GCN: s_endpgm
9898 define void @lds_atomic_inc_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
128128 ; FUNC-LABEL: {{^}}lds_atomic_dec_ret_i32:
129129 ; EG: LDS_SUB_RET *
130130 ; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
131 ; GCN: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] [M0]
131 ; GCN: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]]
132132 ; GCN: s_endpgm
133133 define void @lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
134134 %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 1 seq_cst
307307 ; GCN: s_load_dword [[SPTR:s[0-9]+]],
308308 ; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
309309 ; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
310 ; GCN: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0]
310 ; GCN: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]]
311311 ; GCN: s_endpgm
312312 define void @lds_atomic_xchg_noret_i32(i32 addrspace(3)* %ptr) nounwind {
313313 %result = atomicrmw xchg i32 addrspace(3)* %ptr, i32 4 seq_cst
328328 ; GCN: s_load_dword [[SPTR:s[0-9]+]],
329329 ; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
330330 ; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
331 ; GCN: ds_add_u32 [[VPTR]], [[DATA]] [M0]
331 ; GCN: ds_add_u32 [[VPTR]], [[DATA]]
332332 ; GCN: s_endpgm
333333 define void @lds_atomic_add_noret_i32(i32 addrspace(3)* %ptr) nounwind {
334334 %result = atomicrmw add i32 addrspace(3)* %ptr, i32 4 seq_cst
345345 }
346346
347347 ; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32_bad_si_offset
348 ; SI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} [M0]
349 ; CIVI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 [M0]
348 ; SI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}}
349 ; CIVI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
350350 ; GCN: s_endpgm
351351 define void @lds_atomic_add_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
352352 %sub = sub i32 %a, %b
358358
359359 ; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i32:
360360 ; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
361 ; GCN: ds_inc_u32 v{{[0-9]+}}, [[NEGONE]] [M0]
361 ; GCN: ds_inc_u32 v{{[0-9]+}}, [[NEGONE]]
362362 ; GCN: s_endpgm
363363 define void @lds_atomic_inc_noret_i32(i32 addrspace(3)* %ptr) nounwind {
364364 %result = atomicrmw add i32 addrspace(3)* %ptr, i32 1 seq_cst
3434 ; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
3535 ; VI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
3636 ; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
37 ; GCN: ds_add_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32 [M0]
37 ; GCN: ds_add_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32
3838 ; GCN: buffer_store_dwordx2 [[RESULT]],
3939 ; GCN: s_endpgm
4040 define void @lds_atomic_add_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
279279 ; GCN: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9
280280 ; GCN: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0
281281 ; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
282 ; GCN: ds_add_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32 [M0]
282 ; GCN: ds_add_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32
283283 ; GCN: s_endpgm
284284 define void @lds_atomic_add_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
285285 %gep = getelementptr i64 addrspace(3)* %ptr, i64 4
2929 ; EG: LDS_READ_RET {{[*]*}} OQAP, {{PV|T}}[[ADDRR:[0-9]*\.[XYZW]]]
3030 ; EG-NOT: LDS_READ_RET {{[*]*}} OQAP, T[[ADDRR]]
3131 ; SI: v_add_i32_e32 [[SIPTR:v[0-9]+]], 16, v{{[0-9]+}}
32 ; SI: ds_read_b32 {{v[0-9]+}}, [[SIPTR]] [M0]
33 ; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR:v[0-9]+]] [M0]
34 ; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR]] offset:16 [M0]
32 ; SI: ds_read_b32 {{v[0-9]+}}, [[SIPTR]]
33 ; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR:v[0-9]+]]
34 ; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR]] offset:16
3535
3636 define void @local_memory_two_objects(i32 addrspace(1)* %out) {
3737 entry:
1616
1717 ; SI-LABEL: {{^}}load_shl_base_lds_0:
1818 ; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
19 ; SI: ds_read_b32 {{v[0-9]+}}, [[PTR]] offset:8 [M0]
19 ; SI: ds_read_b32 {{v[0-9]+}}, [[PTR]] offset:8
2020 ; SI: s_endpgm
2121 define void @load_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
2222 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
3333
3434 ; SI-LABEL: {{^}}load_shl_base_lds_1:
3535 ; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
36 ; SI: ds_read_b32 [[RESULT:v[0-9]+]], [[PTR]] offset:8 [M0]
36 ; SI: ds_read_b32 [[RESULT:v[0-9]+]], [[PTR]] offset:8
3737 ; SI: v_add_i32_e32 [[ADDUSE:v[0-9]+]], 8, v{{[0-9]+}}
3838 ; SI-DAG: buffer_store_dword [[RESULT]]
3939 ; SI-DAG: buffer_store_dword [[ADDUSE]]
7070 ; SI-LABEL: {{^}}load_shl_base_lds_2:
7171 ; SI: s_mov_b32 m0, -1
7272 ; SI-NEXT: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
73 ; SI-NEXT: ds_read2st64_b32 {{v\[[0-9]+:[0-9]+\]}}, [[PTR]] offset0:1 offset1:9 [M0]
73 ; SI-NEXT: ds_read2st64_b32 {{v\[[0-9]+:[0-9]+\]}}, [[PTR]] offset0:1 offset1:9
7474 ; SI: s_endpgm
7575 define void @load_shl_base_lds_2(float addrspace(1)* %out) #0 {
7676 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
8686
8787 ; SI-LABEL: {{^}}store_shl_base_lds_0:
8888 ; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
89 ; SI: ds_write_b32 [[PTR]], {{v[0-9]+}} offset:8 [M0]
89 ; SI: ds_write_b32 [[PTR]], {{v[0-9]+}} offset:8
9090 ; SI: s_endpgm
9191 define void @store_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
9292 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1