llvm.org GIT mirror llvm / fc9fda5
R600/SI: Change how DS offsets are printed Match SC by using offset/offset0/offset1 and printing in decimal. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@219537 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 5 years ago
19 changed file(s) with 274 addition(s) and 225 deletion(s). Raw diff Collapse all Expand all
4141 O << formatHex(MI->getOperand(OpNo).getImm() & 0xffffffff);
4242 }
4343
44 void AMDGPUInstPrinter::printU8ImmDecOperand(const MCInst *MI, unsigned OpNo,
45 raw_ostream &O) {
46 O << formatDec(MI->getOperand(OpNo).getImm() & 0xff);
47 }
48
49 void AMDGPUInstPrinter::printU16ImmDecOperand(const MCInst *MI, unsigned OpNo,
50 raw_ostream &O) {
51 O << formatDec(MI->getOperand(OpNo).getImm() & 0xffff);
52 }
53
4454 void AMDGPUInstPrinter::printOffen(const MCInst *MI, unsigned OpNo,
4555 raw_ostream &O) {
4656 if (MI->getOperand(OpNo).getImm())
6575 O << " offset:";
6676 printU16ImmOperand(MI, OpNo, O);
6777 }
78 }
79
80 void AMDGPUInstPrinter::printDSOffset(const MCInst *MI, unsigned OpNo,
81 raw_ostream &O) {
82 uint16_t Imm = MI->getOperand(OpNo).getImm();
83 if (Imm != 0) {
84 O << " offset:";
85 printU16ImmDecOperand(MI, OpNo, O);
86 }
87 }
88
89 void AMDGPUInstPrinter::printDSOffset0(const MCInst *MI, unsigned OpNo,
90 raw_ostream &O) {
91 O << " offset0:";
92 printU8ImmDecOperand(MI, OpNo, O);
93 }
94
95 void AMDGPUInstPrinter::printDSOffset1(const MCInst *MI, unsigned OpNo,
96 raw_ostream &O) {
97 O << " offset1:";
98 printU8ImmDecOperand(MI, OpNo, O);
6899 }
69100
70101 void AMDGPUInstPrinter::printGLC(const MCInst *MI, unsigned OpNo,
3333 private:
3434 void printU8ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
3535 void printU16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
36 void printU8ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
37 void printU16ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
3638 void printU32ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
3739 void printOffen(const MCInst *MI, unsigned OpNo, raw_ostream &O);
3840 void printIdxen(const MCInst *MI, unsigned OpNo, raw_ostream &O);
3941 void printAddr64(const MCInst *MI, unsigned OpNo, raw_ostream &O);
4042 void printMBUFOffset(const MCInst *MI, unsigned OpNo, raw_ostream &O);
43 void printDSOffset(const MCInst *MI, unsigned OpNo, raw_ostream &O);
44 void printDSOffset0(const MCInst *MI, unsigned OpNo, raw_ostream &O);
45 void printDSOffset1(const MCInst *MI, unsigned OpNo, raw_ostream &O);
4146 void printGLC(const MCInst *MI, unsigned OpNo, raw_ostream &O);
4247 void printSLC(const MCInst *MI, unsigned OpNo, raw_ostream &O);
4348 void printTFE(const MCInst *MI, unsigned OpNo, raw_ostream &O);
199199 }
200200 def mbuf_offset : Operand {
201201 let PrintMethod = "printMBUFOffset";
202 }
203 def ds_offset : Operand {
204 let PrintMethod = "printDSOffset";
205 }
206 def ds_offset0 : Operand {
207 let PrintMethod = "printDSOffset0";
208 }
209 def ds_offset1 : Operand {
210 let PrintMethod = "printDSOffset1";
202211 }
203212 def glc : Operand {
204213 let PrintMethod = "printGLC";
925934 class DS_Load_Helper op, string asm, RegisterClass regClass> : DS_1A <
926935 op,
927936 (outs regClass:$vdst),
928 (ins i1imm:$gds, VReg_32:$addr, u16imm:$offset),
929 asm#" $vdst, $addr, $offset, [M0]",
937 (ins i1imm:$gds, VReg_32:$addr, ds_offset:$offset),
938 asm#" $vdst, $addr"#"$offset"#" [M0]",
930939 []> {
931940 let data0 = 0;
932941 let data1 = 0;
937946 class DS_Load2_Helper op, string asm, RegisterClass regClass> : DS <
938947 op,
939948 (outs regClass:$vdst),
940 (ins i1imm:$gds, VReg_32:$addr, u8imm:$offset0, u8imm:$offset1),
941 asm#" $vdst, $addr, $offset0, $offset1, [M0]",
949 (ins i1imm:$gds, VReg_32:$addr, ds_offset0:$offset0, ds_offset1:$offset1),
950 asm#" $vdst, $addr"#"$offset0"#"$offset1 [M0]",
942951 []> {
943952 let data0 = 0;
944953 let data1 = 0;
949958 class DS_Store_Helper op, string asm, RegisterClass regClass> : DS_1A <
950959 op,
951960 (outs),
952 (ins i1imm:$gds, VReg_32:$addr, regClass:$data0, u16imm:$offset),
953 asm#" $addr, $data0, $offset [M0]",
961 (ins i1imm:$gds, VReg_32:$addr, regClass:$data0, ds_offset:$offset),
962 asm#" $addr, $data0"#"$offset"#" [M0]",
954963 []> {
955964 let data1 = 0;
956965 let mayStore = 1;
962971 op,
963972 (outs),
964973 (ins i1imm:$gds, VReg_32:$addr, regClass:$data0, regClass:$data1,
965 u8imm:$offset0, u8imm:$offset1),
966 asm#" $addr, $data0, $data1, $offset0, $offset1 [M0]",
974 ds_offset0:$offset0, ds_offset1:$offset1),
975 asm#" $addr, $data0, $data1"#"$offset0"#"$offset1 [M0]",
967976 []> {
968977 let mayStore = 1;
969978 let mayLoad = 0;
974983 class DS_1A1D_RET op, string asm, RegisterClass rc, string noRetOp = ""> : DS_1A <
975984 op,
976985 (outs rc:$vdst),
977 (ins i1imm:$gds, VReg_32:$addr, rc:$data0, u16imm:$offset),
978 asm#" $vdst, $addr, $data0, $offset, [M0]", []>,
986 (ins i1imm:$gds, VReg_32:$addr, rc:$data0, ds_offset:$offset),
987 asm#" $vdst, $addr, $data0"#"$offset"#" [M0]", []>,
979988 AtomicNoRet {
980989
981990 let data1 = 0;
989998 class DS_1A2D_RET op, string asm, RegisterClass rc, string noRetOp = ""> : DS_1A <
990999 op,
9911000 (outs rc:$vdst),
992 (ins i1imm:$gds, VReg_32:$addr, rc:$data0, rc:$data1, u16imm:$offset),
993 asm#" $vdst, $addr, $data0, $data1, $offset, [M0]",
1001 (ins i1imm:$gds, VReg_32:$addr, rc:$data0, rc:$data1, ds_offset:$offset),
1002 asm#" $vdst, $addr, $data0, $data1"#"$offset"#" [M0]",
9941003 []>,
9951004 AtomicNoRet {
9961005 let mayStore = 1;
10031012 class DS_1A2D_NORET op, string asm, RegisterClass rc, string noRetOp = asm> : DS_1A <
10041013 op,
10051014 (outs),
1006 (ins i1imm:$gds, VReg_32:$addr, rc:$data0, rc:$data1, u16imm:$offset),
1007 asm#" $addr, $data0, $data1, $offset, [M0]",
1015 (ins i1imm:$gds, VReg_32:$addr, rc:$data0, rc:$data1, ds_offset:$offset),
1016 asm#" $addr, $data0, $data1"#"$offset"#" [M0]",
10081017 []>,
10091018 AtomicNoRet {
10101019 let mayStore = 1;
10151024 class DS_1A1D_NORET op, string asm, RegisterClass rc, string noRetOp = asm> : DS_1A <
10161025 op,
10171026 (outs),
1018 (ins i1imm:$gds, VReg_32:$addr, rc:$data0, u16imm:$offset),
1019 asm#" $addr, $data0, $offset, [M0]",
1027 (ins i1imm:$gds, VReg_32:$addr, rc:$data0, ds_offset:$offset),
1028 asm#" $addr, $data0"#"$offset"#" [M0]",
10201029 []>,
10211030 AtomicNoRet {
10221031
99 ; CHECK-LABEL: {{^}}do_as_ptr_calcs:
1010 ; CHECK: S_LOAD_DWORD [[SREG1:s[0-9]+]],
1111 ; CHECK: V_MOV_B32_e32 [[VREG1:v[0-9]+]], [[SREG1]]
12 ; CHECK-DAG: DS_READ_B32 v{{[0-9]+}}, [[VREG1]], 0xc
13 ; CHECK-DAG: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}}, 0x14
12 ; CHECK-DAG: DS_READ_B32 v{{[0-9]+}}, [[VREG1]] offset:12
13 ; CHECK-DAG: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}} offset:20
1414 define void @do_as_ptr_calcs(%struct.foo addrspace(3)* nocapture %ptr) nounwind {
1515 entry:
1616 %x = getelementptr inbounds %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 0
0 ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
1 ; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
1 ; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=CI -check-prefix=FUNC %s
22
33 ; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i32_offset:
44 ; SI: S_LOAD_DWORD [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
66 ; SI-DAG: V_MOV_B32_e32 [[VCMP:v[0-9]+]], 7
77 ; SI-DAG: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[PTR]]
88 ; SI-DAG: V_MOV_B32_e32 [[VSWAP:v[0-9]+]], [[SWAP]]
9 ; SI: DS_CMPST_RTN_B32 [[RESULT:v[0-9]+]], [[VPTR]], [[VCMP]], [[VSWAP]], 0x10, [M0]
9 ; SI: DS_CMPST_RTN_B32 [[RESULT:v[0-9]+]], [[VPTR]], [[VCMP]], [[VSWAP]] offset:16 [M0]
1010 ; SI: S_ENDPGM
1111 define void @lds_atomic_cmpxchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %swap) nounwind {
1212 %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
2525 ; SI-DAG: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[PTR]]
2626 ; SI-DAG: V_MOV_B32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]]
2727 ; SI-DAG: V_MOV_B32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]]
28 ; SI: DS_CMPST_RTN_B64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}}, 0x20, [M0]
28 ; SI: DS_CMPST_RTN_B64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}} offset:32 [M0]
2929 ; SI: BUFFER_STORE_DWORDX2 [[RESULT]],
3030 ; SI: S_ENDPGM
3131 define void @lds_atomic_cmpxchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr, i64 %swap) nounwind {
3636 ret void
3737 }
3838
39 ; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i32_bad_si_offset:
40 ; SI: DS_CMPST_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x0
41 ; CI: DS_CMPST_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
39 ; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i32_bad_si_offset
40 ; SI: DS_CMPST_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
41 ; CI: DS_CMPST_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 [M0]
4242 ; SI: S_ENDPGM
4343 define void @lds_atomic_cmpxchg_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %swap, i32 %a, i32 %b) nounwind {
4444 %sub = sub i32 %a, %b
5656 ; SI-DAG: V_MOV_B32_e32 [[VCMP:v[0-9]+]], 7
5757 ; SI-DAG: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[PTR]]
5858 ; SI-DAG: V_MOV_B32_e32 [[VSWAP:v[0-9]+]], [[SWAP]]
59 ; SI: DS_CMPST_B32 [[VPTR]], [[VCMP]], [[VSWAP]], 0x10, [M0]
59 ; SI: DS_CMPST_B32 [[VPTR]], [[VCMP]], [[VSWAP]] offset:16 [M0]
6060 ; SI: S_ENDPGM
6161 define void @lds_atomic_cmpxchg_noret_i32_offset(i32 addrspace(3)* %ptr, i32 %swap) nounwind {
6262 %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
7474 ; SI-DAG: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[PTR]]
7575 ; SI-DAG: V_MOV_B32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]]
7676 ; SI-DAG: V_MOV_B32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]]
77 ; SI: DS_CMPST_B64 [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}}, 0x20, [M0]
77 ; SI: DS_CMPST_B64 [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}} offset:32 [M0]
7878 ; SI: S_ENDPGM
7979 define void @lds_atomic_cmpxchg_noret_i64_offset(i64 addrspace(3)* %ptr, i64 %swap) nounwind {
8080 %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
1010
1111 ; FUNC-LABEL: {{^}}atomic_add_local_const_offset:
1212 ; R600: LDS_ADD *
13 ; SI: DS_ADD_U32 v{{[0-9]+}}, v{{[0-9]+}}, 0x10
13 ; SI: DS_ADD_U32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
1414 define void @atomic_add_local_const_offset(i32 addrspace(3)* %local) {
1515 %gep = getelementptr i32 addrspace(3)* %local, i32 4
1616 %val = atomicrmw volatile add i32 addrspace(3)* %gep, i32 5 seq_cst
2828
2929 ; FUNC-LABEL: {{^}}atomic_add_ret_local_const_offset:
3030 ; R600: LDS_ADD_RET *
31 ; SI: DS_ADD_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x14
31 ; SI: DS_ADD_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:20
3232 define void @atomic_add_ret_local_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
3333 %gep = getelementptr i32 addrspace(3)* %local, i32 5
3434 %val = atomicrmw volatile add i32 addrspace(3)* %gep, i32 5 seq_cst
1010
1111 ; FUNC-LABEL: {{^}}atomic_sub_local_const_offset:
1212 ; R600: LDS_SUB *
13 ; SI: DS_SUB_U32 v{{[0-9]+}}, v{{[0-9]+}}, 0x10
13 ; SI: DS_SUB_U32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
1414 define void @atomic_sub_local_const_offset(i32 addrspace(3)* %local) {
1515 %gep = getelementptr i32 addrspace(3)* %local, i32 4
1616 %val = atomicrmw volatile sub i32 addrspace(3)* %gep, i32 5 seq_cst
2828
2929 ; FUNC-LABEL: {{^}}atomic_sub_ret_local_const_offset:
3030 ; R600: LDS_SUB_RET *
31 ; SI: DS_SUB_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x14
31 ; SI: DS_SUB_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:20
3232 define void @atomic_sub_ret_local_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
3333 %gep = getelementptr i32 addrspace(3)* %local, i32 5
3434 %val = atomicrmw volatile sub i32 addrspace(3)* %gep, i32 5 seq_cst
77 ; CHECK-LABEL: {{^}}signed_ds_offset_addressing_loop:
88 ; CHECK: BB0_1:
99 ; CHECK: V_ADD_I32_e32 [[VADDR:v[0-9]+]],
10 ; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR]], 0x0
10 ; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR]]
1111 ; SI-DAG: V_ADD_I32_e32 [[VADDR4:v[0-9]+]], 4, [[VADDR]]
12 ; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR4]], 0x0
12 ; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR4]]
1313 ; SI-DAG: V_ADD_I32_e32 [[VADDR0x80:v[0-9]+]], 0x80, [[VADDR]]
14 ; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR0x80]], 0x0
14 ; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR0x80]]
1515 ; SI-DAG: V_ADD_I32_e32 [[VADDR0x84:v[0-9]+]], 0x84, [[VADDR]]
16 ; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR0x84]], 0x0
16 ; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR0x84]]
1717 ; SI-DAG: V_ADD_I32_e32 [[VADDR0x100:v[0-9]+]], 0x100, [[VADDR]]
18 ; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR0x100]], 0x0
18 ; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR0x100]]
1919
20 ; CI-DAG: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]], 0x0, 0x1
21 ; CI-DAG: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]], 0x20, 0x21
22 ; CI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR]], 0x100
20 ; CI-DAG: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset0:0 offset1:1
21 ; CI-DAG: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset0:32 offset1:33
22 ; CI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR]] offset:256
2323 ; CHECK: S_ENDPGM
2424 define void @signed_ds_offset_addressing_loop(float addrspace(1)* noalias nocapture %out, float addrspace(3)* noalias nocapture readonly %lptr, i32 %n) #2 {
2525 entry:
None ; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -check-prefix=SI %s
0 ; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -strict-whitespace -check-prefix=SI %s
11
22 ; FIXME: We don't get cases where the address was an SGPR because we
33 ; get a copy to the address register for each one.
66 @lds.f64 = addrspace(3) global [512 x double] zeroinitializer, align 8
77
88 ; SI-LABEL: @simple_read2_f32
9 ; SI: DS_READ2_B32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}}, 0x0, 0x8
9 ; SI: DS_READ2_B32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:8
1010 ; SI: S_WAITCNT lgkmcnt(0)
1111 ; SI: V_ADD_F32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
1212 ; SI: BUFFER_STORE_DWORD [[RESULT]]
2525 }
2626
2727 ; SI-LABEL: @simple_read2_f32_max_offset
28 ; SI: DS_READ2_B32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}}, 0x0, 0xff
28 ; SI: DS_READ2_B32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:255
2929 ; SI: S_WAITCNT lgkmcnt(0)
3030 ; SI: V_ADD_F32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
3131 ; SI: BUFFER_STORE_DWORD [[RESULT]]
4545
4646 ; SI-LABEL: @simple_read2_f32_too_far
4747 ; SI-NOT DS_READ2_B32
48 ; SI: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}}, 0x0
49 ; SI: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}}, 0x404
48 ; SI: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}}
49 ; SI: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}} offset:1028
5050 ; SI: S_ENDPGM
5151 define void @simple_read2_f32_too_far(float addrspace(1)* %out) #0 {
5252 %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
6262 }
6363
6464 ; SI-LABEL: @simple_read2_f32_x2
65 ; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]], 0x0, 0x8
66 ; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]], 0xb, 0x1b
65 ; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:0 offset1:8
66 ; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27
6767 ; SI: S_ENDPGM
6868 define void @simple_read2_f32_x2(float addrspace(1)* %out) #0 {
6969 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
9393
9494 ; Make sure there is an instruction between the two sets of reads.
9595 ; SI-LABEL: @simple_read2_f32_x2_barrier
96 ; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]], 0x0, 0x8
96 ; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:0 offset1:8
9797 ; SI: S_BARRIER
98 ; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]], 0xb, 0x1b
98 ; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27
9999 ; SI: S_ENDPGM
100100 define void @simple_read2_f32_x2_barrier(float addrspace(1)* %out) #0 {
101101 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
129129 ; element results in only folding the inner pair.
130130
131131 ; SI-LABEL: @simple_read2_f32_x2_nonzero_base
132 ; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]], 0x2, 0x8
133 ; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]], 0xb, 0x1b
132 ; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:2 offset1:8
133 ; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27
134134 ; SI: S_ENDPGM
135135 define void @simple_read2_f32_x2_nonzero_base(float addrspace(1)* %out) #0 {
136136 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
241241
242242 ; SI-LABEL: @simple_read2_f32_volatile_0
243243 ; SI-NOT DS_READ2_B32
244 ; SI: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}}, 0x0
245 ; SI: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}}, 0x20
244 ; SI: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}}
245 ; SI: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}} offset:32
246246 ; SI: S_ENDPGM
247247 define void @simple_read2_f32_volatile_0(float addrspace(1)* %out) #0 {
248248 %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
259259
260260 ; SI-LABEL: @simple_read2_f32_volatile_1
261261 ; SI-NOT DS_READ2_B32
262 ; SI: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}}, 0x0
263 ; SI: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}}, 0x20
262 ; SI: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}}
263 ; SI: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}} offset:32
264264 ; SI: S_ENDPGM
265265 define void @simple_read2_f32_volatile_1(float addrspace(1)* %out) #0 {
266266 %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
312312
313313 ; SI-LABEL: @simple_read2_f64
314314 ; SI: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 3, {{v[0-9]+}}
315 ; SI: DS_READ2_B64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, [[VPTR]], 0x0, 0x8
315 ; SI: DS_READ2_B64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, [[VPTR]] offset0:0 offset1:8
316316 ; SI: V_ADD_F64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
317317 ; SI: BUFFER_STORE_DWORDX2 [[RESULT]]
318318 ; SI: S_ENDPGM
330330 }
331331
332332 ; SI-LABEL: @simple_read2_f64_max_offset
333 ; SI: DS_READ2_B64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, 0x0, 0xff
333 ; SI: DS_READ2_B64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:0 offset1:255
334334 ; SI: S_ENDPGM
335335 define void @simple_read2_f64_max_offset(double addrspace(1)* %out) #0 {
336336 %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
347347
348348 ; SI-LABEL: @simple_read2_f64_too_far
349349 ; SI-NOT DS_READ2_B64
350 ; SI: DS_READ_B64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, 0x0
351 ; SI: DS_READ_B64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, 0x808
350 ; SI: DS_READ_B64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}
351 ; SI: DS_READ_B64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:2056
352352 ; SI: S_ENDPGM
353353 define void @simple_read2_f64_too_far(double addrspace(1)* %out) #0 {
354354 %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
365365
366366 ; Alignment only 4
367367 ; SI-LABEL: @misaligned_read2_f64
368 ; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, 0x0, 0x1
369 ; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, 0xe, 0xf
368 ; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:0 offset1:1
369 ; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:14 offset1:15
370370 ; SI: S_ENDPGM
371371 define void @misaligned_read2_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
372372 %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
44
55
66 ; SI-LABEL: @simple_read2st64_f32_0_1
7 ; SI: DS_READ2ST64_B32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}}, 0x0, 0x1
7 ; SI: DS_READ2ST64_B32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:1
88 ; SI: S_WAITCNT lgkmcnt(0)
99 ; SI: V_ADD_F32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
1010 ; SI: BUFFER_STORE_DWORD [[RESULT]]
2323 }
2424
2525 ; SI-LABEL: @simple_read2st64_f32_1_2
26 ; SI: DS_READ2ST64_B32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}}, 0x1, 0x2
26 ; SI: DS_READ2ST64_B32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:2
2727 ; SI: S_WAITCNT lgkmcnt(0)
2828 ; SI: V_ADD_F32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
2929 ; SI: BUFFER_STORE_DWORD [[RESULT]]
4343 }
4444
4545 ; SI-LABEL: @simple_read2st64_f32_max_offset
46 ; SI: DS_READ2ST64_B32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}}, 0x1, 0xff
46 ; SI: DS_READ2ST64_B32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:255
4747 ; SI: S_WAITCNT lgkmcnt(0)
4848 ; SI: V_ADD_F32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
4949 ; SI: BUFFER_STORE_DWORD [[RESULT]]
6464
6565 ; SI-LABEL: @simple_read2st64_f32_over_max_offset
6666 ; SI-NOT: DS_READ2ST64_B32
67 ; SI: DS_READ_B32 {{v[0-9]+}}, {{v[0-9]+}}, 0x100,
67 ; SI: DS_READ_B32 {{v[0-9]+}}, {{v[0-9]+}} offset:256
6868 ; SI: V_ADD_I32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}}
69 ; SI: DS_READ_B32 {{v[0-9]+}}, [[BIGADD]], 0x0
69 ; SI: DS_READ_B32 {{v[0-9]+}}, [[BIGADD]]
7070 ; SI: S_ENDPGM
7171 define void @simple_read2st64_f32_over_max_offset(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
7272 %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
116116 }
117117
118118 ; SI-LABEL: @simple_read2st64_f64_0_1
119 ; SI: DS_READ2ST64_B64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}}, 0x0, 0x1
119 ; SI: DS_READ2ST64_B64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:1
120120 ; SI: S_WAITCNT lgkmcnt(0)
121121 ; SI: V_ADD_F64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
122122 ; SI: BUFFER_STORE_DWORDX2 [[RESULT]]
135135 }
136136
137137 ; SI-LABEL: @simple_read2st64_f64_1_2
138 ; SI: DS_READ2ST64_B64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}}, 0x1, 0x2
138 ; SI: DS_READ2ST64_B64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:2
139139 ; SI: S_WAITCNT lgkmcnt(0)
140140 ; SI: V_ADD_F64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
141141 ; SI: BUFFER_STORE_DWORDX2 [[RESULT]]
157157 ; Alignment only
158158
159159 ; SI-LABEL: @misaligned_read2st64_f64
160 ; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, 0x0, 0x1
161 ; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, 0x80, 0x81
160 ; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:0 offset1:1
161 ; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:128 offset1:129
162162 ; SI: S_ENDPGM
163163 define void @misaligned_read2st64_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
164164 %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
175175
176176 ; The maximum is not the usual 0xff because 0xff * 8 * 64 > 0xffff
177177 ; SI-LABEL: @simple_read2st64_f64_max_offset
178 ; SI: DS_READ2ST64_B64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}}, 0x4, 0x7f
178 ; SI: DS_READ2ST64_B64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:4 offset1:127
179179 ; SI: S_WAITCNT lgkmcnt(0)
180180 ; SI: V_ADD_F64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
181181 ; SI: BUFFER_STORE_DWORDX2 [[RESULT]]
196196
197197 ; SI-LABEL: @simple_read2st64_f64_over_max_offset
198198 ; SI-NOT: DS_READ2ST64_B64
199 ; SI: DS_READ_B64 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, 0x200,
199 ; SI: DS_READ_B64 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset:512
200200 ; SI: V_ADD_I32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}}
201 ; SI: DS_READ_B64 {{v\[[0-9]+:[0-9]+\]}}, [[BIGADD]], 0x0
201 ; SI: DS_READ_B64 {{v\[[0-9]+:[0-9]+\]}}, [[BIGADD]]
202202 ; SI: S_ENDPGM
203203 define void @simple_read2st64_f64_over_max_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
204204 %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
236236
237237 ; SI-LABEL: @byte_size_only_divisible_64_read2_f64
238238 ; SI-NOT: DS_READ2ST_B64
239 ; SI: DS_READ2_B64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, 0x0, 0x8
239 ; SI: DS_READ2_B64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:0 offset1:8
240240 ; SI: S_ENDPGM
241241 define void @byte_size_only_divisible_64_read2_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
242242 %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
None ; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -check-prefix=SI %s
0 ; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -strict-whitespace -check-prefix=SI %s
11
22 @lds = addrspace(3) global [512 x float] zeroinitializer, align 4
33 @lds.f64 = addrspace(3) global [512 x double] zeroinitializer, align 8
66 ; SI-LABEL: @simple_write2_one_val_f32
77 ; SI-DAG: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]]
88 ; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
9 ; SI: DS_WRITE2_B32 [[VPTR]], [[VAL]], [[VAL]], 0x0, 0x8 [M0]
9 ; SI: DS_WRITE2_B32 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:8 [M0]
1010 ; SI: S_ENDPGM
1111 define void @simple_write2_one_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
1212 %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
2424 ; SI-DAG: BUFFER_LOAD_DWORD [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
2525 ; SI-DAG: BUFFER_LOAD_DWORD [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
2626 ; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
27 ; SI: DS_WRITE2_B32 [[VPTR]], [[VAL0]], [[VAL1]], 0x0, 0x8 [M0]
27 ; SI: DS_WRITE2_B32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:8 [M0]
2828 ; SI: S_ENDPGM
2929 define void @simple_write2_two_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
3030 %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
4242
4343 ; SI-LABEL: @simple_write2_two_val_f32_volatile_0
4444 ; SI-NOT: DS_WRITE2_B32
45 ; SI: DS_WRITE_B32 {{v[0-9]+}}, {{v[0-9]+}}, 0x0
46 ; SI: DS_WRITE_B32 {{v[0-9]+}}, {{v[0-9]+}}, 0x20
45 ; SI: DS_WRITE_B32 {{v[0-9]+}}, {{v[0-9]+}}
46 ; SI: DS_WRITE_B32 {{v[0-9]+}}, {{v[0-9]+}} offset:32
4747 ; SI: S_ENDPGM
4848 define void @simple_write2_two_val_f32_volatile_0(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
4949 %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
6161
6262 ; SI-LABEL: @simple_write2_two_val_f32_volatile_1
6363 ; SI-NOT: DS_WRITE2_B32
64 ; SI: DS_WRITE_B32 {{v[0-9]+}}, {{v[0-9]+}}, 0x0
65 ; SI: DS_WRITE_B32 {{v[0-9]+}}, {{v[0-9]+}}, 0x20
64 ; SI: DS_WRITE_B32 {{v[0-9]+}}, {{v[0-9]+}}
65 ; SI: DS_WRITE_B32 {{v[0-9]+}}, {{v[0-9]+}} offset:32
6666 ; SI: S_ENDPGM
6767 define void @simple_write2_two_val_f32_volatile_1(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
6868 %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
8383 ; SI: BUFFER_LOAD_DWORDX2 v{{\[}}[[VAL0:[0-9]+]]:{{[0-9]+\]}}
8484 ; SI: BUFFER_LOAD_DWORDX2 v{{\[[0-9]+}}:[[VAL1:[0-9]+]]{{\]}}
8585 ; SI: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
86 ; SI: DS_WRITE2_B32 [[VPTR]], v[[VAL0]], v[[VAL1]], 0x0, 0x8 [M0]
86 ; SI: DS_WRITE2_B32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8 [M0]
8787 ; SI: S_ENDPGM
8888 define void @simple_write2_two_val_subreg2_mixed_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 {
8989 %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
104104 ; SI-LABEL: @simple_write2_two_val_subreg2_f32
105105 ; SI-DAG: BUFFER_LOAD_DWORDX2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
106106 ; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
107 ; SI: DS_WRITE2_B32 [[VPTR]], v[[VAL0]], v[[VAL1]], 0x0, 0x8 [M0]
107 ; SI: DS_WRITE2_B32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8 [M0]
108108 ; SI: S_ENDPGM
109109 define void @simple_write2_two_val_subreg2_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 {
110110 %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
123123 ; SI-LABEL: @simple_write2_two_val_subreg4_f32
124124 ; SI-DAG: BUFFER_LOAD_DWORDX4 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
125125 ; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
126 ; SI: DS_WRITE2_B32 [[VPTR]], v[[VAL0]], v[[VAL1]], 0x0, 0x8 [M0]
126 ; SI: DS_WRITE2_B32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8 [M0]
127127 ; SI: S_ENDPGM
128128 define void @simple_write2_two_val_subreg4_f32(float addrspace(1)* %C, <4 x float> addrspace(1)* %in) #0 {
129129 %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
143143 ; SI-DAG: BUFFER_LOAD_DWORD [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
144144 ; SI-DAG: BUFFER_LOAD_DWORD [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
145145 ; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
146 ; SI: DS_WRITE2_B32 [[VPTR]], [[VAL0]], [[VAL1]], 0x0, 0xff [M0]
146 ; SI: DS_WRITE2_B32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:255 [M0]
147147 ; SI: S_ENDPGM
148148 define void @simple_write2_two_val_max_offset_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
149149 %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
160160 }
161161
162162 ; SI-LABEL: @simple_write2_two_val_too_far_f32
163 ; SI: DS_WRITE_B32 v{{[0-9]+}}, v{{[0-9]+}}, 0x0
164 ; SI: DS_WRITE_B32 v{{[0-9]+}}, v{{[0-9]+}}, 0x404
163 ; SI: DS_WRITE_B32 v{{[0-9]+}}, v{{[0-9]+}}
164 ; SI: DS_WRITE_B32 v{{[0-9]+}}, v{{[0-9]+}} offset:1028
165165 ; SI: S_ENDPGM
166166 define void @simple_write2_two_val_too_far_f32(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
167167 %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
178178 }
179179
180180 ; SI-LABEL: @simple_write2_two_val_f32_x2
181 ; SI: DS_WRITE2_B32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL1:v[0-9]+]], 0x0, 0x8
182 ; SI-NEXT: DS_WRITE2_B32 [[BASEADDR]], [[VAL0]], [[VAL1]], 0xb, 0x1b
181 ; SI: DS_WRITE2_B32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL1:v[0-9]+]] offset0:0 offset1:8
182 ; SI-NEXT: DS_WRITE2_B32 [[BASEADDR]], [[VAL0]], [[VAL1]] offset0:11 offset1:27
183183 ; SI: S_ENDPGM
184184 define void @simple_write2_two_val_f32_x2(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
185185 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
208208 }
209209
210210 ; SI-LABEL: @simple_write2_two_val_f32_x2_nonzero_base
211 ; SI: DS_WRITE2_B32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL1:v[0-9]+]], 0x3, 0x8
212 ; SI-NEXT: DS_WRITE2_B32 [[BASEADDR]], [[VAL0]], [[VAL1]], 0xb, 0x1b
211 ; SI: DS_WRITE2_B32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL1:v[0-9]+]] offset0:3 offset1:8
212 ; SI-NEXT: DS_WRITE2_B32 [[BASEADDR]], [[VAL0]], [[VAL1]] offset0:11 offset1:27
213213 ; SI: S_ENDPGM
214214 define void @simple_write2_two_val_f32_x2_nonzero_base(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
215215 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
267267 ; SI-LABEL: @simple_write2_one_val_f64
268268 ; SI: BUFFER_LOAD_DWORDX2 [[VAL:v\[[0-9]+:[0-9]+\]]],
269269 ; SI: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
270 ; SI: DS_WRITE2_B64 [[VPTR]], [[VAL]], [[VAL]], 0x0, 0x8 [M0]
270 ; SI: DS_WRITE2_B64 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:8 [M0]
271271 ; SI: S_ENDPGM
272272 define void @simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 {
273273 %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
284284 ; SI-LABEL: @misaligned_simple_write2_one_val_f64
285285 ; SI-DAG: BUFFER_LOAD_DWORDX2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
286286 ; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
287 ; SI: DS_WRITE2_B32 [[VPTR]], v[[VAL0]], v[[VAL1]], 0x0, 0x1 [M0]
288 ; SI: DS_WRITE2_B32 [[VPTR]], v[[VAL0]], v[[VAL1]], 0xe, 0xf [M0]
287 ; SI: DS_WRITE2_B32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:1 [M0]
288 ; SI: DS_WRITE2_B32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:14 offset1:15 [M0]
289289 ; SI: S_ENDPGM
290290 define void @misaligned_simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
291291 %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
303303 ; SI-DAG: BUFFER_LOAD_DWORDX2 [[VAL0:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
304304 ; SI-DAG: BUFFER_LOAD_DWORDX2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x8
305305 ; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
306 ; SI: DS_WRITE2_B64 [[VPTR]], [[VAL0]], [[VAL1]], 0x0, 0x8 [M0]
306 ; SI: DS_WRITE2_B64 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:8 [M0]
307307 ; SI: S_ENDPGM
308308 define void @simple_write2_two_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 {
309309 %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
66 ; SI-LABEL: @simple_write2st64_one_val_f32_0_1
77 ; SI-DAG: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]]
88 ; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
9 ; SI: DS_WRITE2ST64_B32 [[VPTR]], [[VAL]], [[VAL]], 0x0, 0x1 [M0]
9 ; SI: DS_WRITE2ST64_B32 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:1 [M0]
1010 ; SI: S_ENDPGM
1111 define void @simple_write2st64_one_val_f32_0_1(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
1212 %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
2424 ; SI-DAG: BUFFER_LOAD_DWORD [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
2525 ; SI-DAG: BUFFER_LOAD_DWORD [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
2626 ; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
27 ; SI: DS_WRITE2ST64_B32 [[VPTR]], [[VAL0]], [[VAL1]], 0x2, 0x5 [M0]
27 ; SI: DS_WRITE2ST64_B32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:2 offset1:5 [M0]
2828 ; SI: S_ENDPGM
2929 define void @simple_write2st64_two_val_f32_2_5(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
3030 %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
4545 ; SI-DAG: BUFFER_LOAD_DWORD [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
4646 ; SI-DAG: BUFFER_LOAD_DWORD [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
4747 ; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
48 ; SI: DS_WRITE2ST64_B32 [[VPTR]], [[VAL0]], [[VAL1]], 0x0, 0xff [M0]
48 ; SI: DS_WRITE2ST64_B32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:255 [M0]
4949 ; SI: S_ENDPGM
5050 define void @simple_write2st64_two_val_max_offset_f32(float addrspace(1)* %C, float addrspace(1)* %in, float addrspace(3)* %lds) #0 {
5151 %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
6565 ; SI-DAG: BUFFER_LOAD_DWORDX2 [[VAL0:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
6666 ; SI-DAG: BUFFER_LOAD_DWORDX2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x8
6767 ; SI-DAG: V_ADD_I32_e32 [[VPTR:v[0-9]+]],
68 ; SI: DS_WRITE2ST64_B64 [[VPTR]], [[VAL0]], [[VAL1]], 0x4, 0x7f [M0]
68 ; SI: DS_WRITE2ST64_B64 [[VPTR]], [[VAL0]], [[VAL1]] offset0:4 offset1:127 [M0]
6969 ; SI: S_ENDPGM
7070 define void @simple_write2st64_two_val_max_offset_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
7171 %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
8484
8585 ; SI-LABEL: @byte_size_only_divisible_64_write2st64_f64
8686 ; SI-NOT: DS_WRITE2ST64_B64
87 ; SI: DS_WRITE2_B64 {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, 0x0, 0x8
87 ; SI: DS_WRITE2_B64 {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:0 offset1:8
8888 ; SI: S_ENDPGM
8989 define void @byte_size_only_divisible_64_write2st64_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
9090 %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
33 define void @use_gep_address_space([1024 x i32] addrspace(3)* %array) nounwind {
44 ; CHECK-LABEL: {{^}}use_gep_address_space:
55 ; CHECK: V_MOV_B32_e32 [[PTR:v[0-9]+]], s{{[0-9]+}}
6 ; CHECK: DS_WRITE_B32 [[PTR]], v{{[0-9]+}}, 0x40
6 ; CHECK: DS_WRITE_B32 [[PTR]], v{{[0-9]+}} offset:64
77 %p = getelementptr [1024 x i32] addrspace(3)* %array, i16 0, i16 16
88 store i32 99, i32 addrspace(3)* %p
99 ret void
0 ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck --check-prefix=SI --check-prefix=BOTH %s
11 ; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs< %s | FileCheck --check-prefix=CI --check-prefix=BOTH %s
22
3 ; BOTH-LABEL: {{^}}local_i32_load:
4 ; BOTH: DS_READ_B32 [[REG:v[0-9]+]], v{{[0-9]+}}, 0x1c, [M0]
3 ; BOTH-LABEL: {{^}}local_i32_load
4 ; BOTH: DS_READ_B32 [[REG:v[0-9]+]], v{{[0-9]+}} offset:28 [M0]
55 ; BOTH: BUFFER_STORE_DWORD [[REG]],
66 define void @local_i32_load(i32 addrspace(1)* %out, i32 addrspace(3)* %in) nounwind {
77 %gep = getelementptr i32 addrspace(3)* %in, i32 7
1010 ret void
1111 }
1212
13 ; BOTH-LABEL: {{^}}local_i32_load_0_offset:
14 ; BOTH: DS_READ_B32 [[REG:v[0-9]+]], v{{[0-9]+}}, 0x0, [M0]
13 ; BOTH-LABEL: {{^}}local_i32_load_0_offset
14 ; BOTH: DS_READ_B32 [[REG:v[0-9]+]], v{{[0-9]+}} [M0]
1515 ; BOTH: BUFFER_STORE_DWORD [[REG]],
1616 define void @local_i32_load_0_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) nounwind {
1717 %val = load i32 addrspace(3)* %in, align 4
2121
2222 ; BOTH-LABEL: {{^}}local_i8_load_i16_max_offset:
2323 ; BOTH-NOT: ADD
24 ; BOTH: DS_READ_U8 [[REG:v[0-9]+]], {{v[0-9]+}}, 0xffff, [M0]
24 ; BOTH: DS_READ_U8 [[REG:v[0-9]+]], {{v[0-9]+}} offset:65535 [M0]
2525 ; BOTH: BUFFER_STORE_BYTE [[REG]],
2626 define void @local_i8_load_i16_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %in) nounwind {
2727 %gep = getelementptr i8 addrspace(3)* %in, i32 65535
3636 ; SI: S_OR_B32 [[ADDR:s[0-9]+]], s{{[0-9]+}}, 0x10000
3737 ; CI: S_ADD_I32 [[ADDR:s[0-9]+]], s{{[0-9]+}}, 0x10000
3838 ; BOTH: V_MOV_B32_e32 [[VREGADDR:v[0-9]+]], [[ADDR]]
39 ; BOTH: DS_READ_U8 [[REG:v[0-9]+]], [[VREGADDR]], 0x0, [M0]
39 ; BOTH: DS_READ_U8 [[REG:v[0-9]+]], [[VREGADDR]] [M0]
4040 ; BOTH: BUFFER_STORE_BYTE [[REG]],
4141 define void @local_i8_load_over_i16_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %in) nounwind {
4242 %gep = getelementptr i8 addrspace(3)* %in, i32 65536
4747
4848 ; BOTH-LABEL: {{^}}local_i64_load:
4949 ; BOTH-NOT: ADD
50 ; BOTH: DS_READ_B64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}}, 0x38, [M0]
50 ; BOTH: DS_READ_B64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}} offset:56 [M0]
5151 ; BOTH: BUFFER_STORE_DWORDX2 [[REG]],
5252 define void @local_i64_load(i64 addrspace(1)* %out, i64 addrspace(3)* %in) nounwind {
5353 %gep = getelementptr i64 addrspace(3)* %in, i32 7
5656 ret void
5757 }
5858
59 ; BOTH-LABEL: {{^}}local_i64_load_0_offset:
60 ; BOTH: DS_READ_B64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, 0x0, [M0]
59 ; BOTH-LABEL: {{^}}local_i64_load_0_offset
60 ; BOTH: DS_READ_B64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}} [M0]
6161 ; BOTH: BUFFER_STORE_DWORDX2 [[REG]],
6262 define void @local_i64_load_0_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %in) nounwind {
6363 %val = load i64 addrspace(3)* %in, align 8
6767
6868 ; BOTH-LABEL: {{^}}local_f64_load:
6969 ; BOTH-NOT: ADD
70 ; BOTH: DS_READ_B64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}}, 0x38, [M0]
70 ; BOTH: DS_READ_B64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}} offset:56 [M0]
7171 ; BOTH: BUFFER_STORE_DWORDX2 [[REG]],
7272 define void @local_f64_load(double addrspace(1)* %out, double addrspace(3)* %in) nounwind {
7373 %gep = getelementptr double addrspace(3)* %in, i32 7
7676 ret void
7777 }
7878
79 ; BOTH-LABEL: {{^}}local_f64_load_0_offset:
80 ; BOTH: DS_READ_B64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, 0x0, [M0]
79 ; BOTH-LABEL: {{^}}local_f64_load_0_offset
80 ; BOTH: DS_READ_B64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}} [M0]
8181 ; BOTH: BUFFER_STORE_DWORDX2 [[REG]],
8282 define void @local_f64_load_0_offset(double addrspace(1)* %out, double addrspace(3)* %in) nounwind {
8383 %val = load double addrspace(3)* %in, align 8
8787
8888 ; BOTH-LABEL: {{^}}local_i64_store:
8989 ; BOTH-NOT: ADD
90 ; BOTH: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x38 [M0]
90 ; BOTH: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:56 [M0]
9191 define void @local_i64_store(i64 addrspace(3)* %out) nounwind {
9292 %gep = getelementptr i64 addrspace(3)* %out, i32 7
9393 store i64 5678, i64 addrspace(3)* %gep, align 8
9696
9797 ; BOTH-LABEL: {{^}}local_i64_store_0_offset:
9898 ; BOTH-NOT: ADD
99 ; BOTH: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x0 [M0]
99 ; BOTH: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} [M0]
100100 define void @local_i64_store_0_offset(i64 addrspace(3)* %out) nounwind {
101101 store i64 1234, i64 addrspace(3)* %out, align 8
102102 ret void
104104
105105 ; BOTH-LABEL: {{^}}local_f64_store:
106106 ; BOTH-NOT: ADD
107 ; BOTH: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x38 [M0]
107 ; BOTH: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:56 [M0]
108108 define void @local_f64_store(double addrspace(3)* %out) nounwind {
109109 %gep = getelementptr double addrspace(3)* %out, i32 7
110110 store double 16.0, double addrspace(3)* %gep, align 8
111111 ret void
112112 }
113113
114 ; BOTH-LABEL: {{^}}local_f64_store_0_offset:
115 ; BOTH: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x0 [M0]
114 ; BOTH-LABEL: {{^}}local_f64_store_0_offset
115 ; BOTH: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} [M0]
116116 define void @local_f64_store_0_offset(double addrspace(3)* %out) nounwind {
117117 store double 20.0, double addrspace(3)* %out, align 8
118118 ret void
120120
121121 ; BOTH-LABEL: {{^}}local_v2i64_store:
122122 ; BOTH-NOT: ADD
123 ; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x78 [M0]
124 ; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x70 [M0]
123 ; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:112 [M0]
124 ; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:120 [M0]
125 ; BOTH: S_ENDPGM
125126 define void @local_v2i64_store(<2 x i64> addrspace(3)* %out) nounwind {
126127 %gep = getelementptr <2 x i64> addrspace(3)* %out, i32 7
127128 store <2 x i64> , <2 x i64> addrspace(3)* %gep, align 16
130131
131132 ; BOTH-LABEL: {{^}}local_v2i64_store_0_offset:
132133 ; BOTH-NOT: ADD
133 ; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x8 [M0]
134 ; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x0 [M0]
134 ; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} [M0]
135 ; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:8 [M0]
136 ; BOTH: S_ENDPGM
135137 define void @local_v2i64_store_0_offset(<2 x i64> addrspace(3)* %out) nounwind {
136138 store <2 x i64> , <2 x i64> addrspace(3)* %out, align 16
137139 ret void
139141
140142 ; BOTH-LABEL: {{^}}local_v4i64_store:
141143 ; BOTH-NOT: ADD
142 ; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0xf8 [M0]
143 ; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0xf0 [M0]
144 ; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0xe8 [M0]
145 ; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0xe0 [M0]
144 ; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:224 [M0]
145 ; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:232 [M0]
146 ; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:240 [M0]
147 ; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:248 [M0]
148 ; BOTH: S_ENDPGM
146149 define void @local_v4i64_store(<4 x i64> addrspace(3)* %out) nounwind {
147150 %gep = getelementptr <4 x i64> addrspace(3)* %out, i32 7
148151 store <4 x i64> , <4 x i64> addrspace(3)* %gep, align 16
151154
152155 ; BOTH-LABEL: {{^}}local_v4i64_store_0_offset:
153156 ; BOTH-NOT: ADD
154 ; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x18 [M0]
155 ; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x10 [M0]
156 ; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x8 [M0]
157 ; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x0 [M0]
157 ; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} [M0]
158 ; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:8 [M0]
159 ; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:16 [M0]
160 ; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:24 [M0]
161 ; BOTH: S_ENDPGM
158162 define void @local_v4i64_store_0_offset(<4 x i64> addrspace(3)* %out) nounwind {
159163 store <4 x i64> , <4 x i64> addrspace(3)* %out, align 16
160164 ret void
0 ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
1 ; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
1 ; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=CI -check-prefix=FUNC %s
22 ; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
33
44 ; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i32:
66 ; SI: S_LOAD_DWORD [[SPTR:s[0-9]+]],
77 ; SI: V_MOV_B32_e32 [[DATA:v[0-9]+]], 4
88 ; SI: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
9 ; SI: DS_WRXCHG_RTN_B32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]], 0x0, [M0]
9 ; SI: DS_WRXCHG_RTN_B32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0]
1010 ; SI: BUFFER_STORE_DWORD [[RESULT]],
1111 ; SI: S_ENDPGM
1212 define void @lds_atomic_xchg_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
1717
1818 ; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i32_offset:
1919 ; EG: LDS_WRXCHG_RET *
20 ; SI: DS_WRXCHG_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
20 ; SI: DS_WRXCHG_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
2121 ; SI: S_ENDPGM
2222 define void @lds_atomic_xchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
2323 %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
3232 ; SI: S_LOAD_DWORD [[SPTR:s[0-9]+]],
3333 ; SI: V_MOV_B32_e32 [[DATA:v[0-9]+]], 4
3434 ; SI: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
35 ; SI: DS_ADD_RTN_U32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]], 0x0, [M0]
35 ; SI: DS_ADD_RTN_U32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0]
3636 ; SI: BUFFER_STORE_DWORD [[RESULT]],
3737 ; SI: S_ENDPGM
3838 define void @lds_atomic_add_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
4343
4444 ; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32_offset:
4545 ; EG: LDS_ADD_RET *
46 ; SI: DS_ADD_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
46 ; SI: DS_ADD_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
4747 ; SI: S_ENDPGM
4848 define void @lds_atomic_add_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
4949 %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
5454
5555 ; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32_bad_si_offset:
5656 ; EG: LDS_ADD_RET *
57 ; SI: DS_ADD_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x0
58 ; CI: DS_ADD_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
57 ; SI: DS_ADD_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} [M0]
58 ; CI: DS_ADD_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
5959 ; SI: S_ENDPGM
6060 define void @lds_atomic_add_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
6161 %sub = sub i32 %a, %b
7070 ; EG: LDS_ADD_RET *
7171 ; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1
7272 ; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]]
73 ; SI: DS_INC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]], 0x0
73 ; SI: DS_INC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] [M0]
7474 ; SI: S_ENDPGM
7575 define void @lds_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
7676 %result = atomicrmw add i32 addrspace(3)* %ptr, i32 1 seq_cst
8282 ; EG: LDS_ADD_RET *
8383 ; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1
8484 ; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]]
85 ; SI: DS_INC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]], 0x10
85 ; SI: DS_INC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] offset:16
8686 ; SI: S_ENDPGM
8787 define void @lds_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
8888 %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
9393
9494 ; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i32_bad_si_offset:
9595 ; EG: LDS_ADD_RET *
96 ; SI: DS_INC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x0
97 ; CI: DS_INC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
96 ; SI: DS_INC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} [M0]
97 ; CI: DS_INC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
9898 ; SI: S_ENDPGM
9999 define void @lds_atomic_inc_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
100100 %sub = sub i32 %a, %b
117117
118118 ; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i32_offset:
119119 ; EG: LDS_SUB_RET *
120 ; SI: DS_SUB_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
120 ; SI: DS_SUB_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
121121 ; SI: S_ENDPGM
122122 define void @lds_atomic_sub_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
123123 %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
130130 ; EG: LDS_SUB_RET *
131131 ; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1
132132 ; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]]
133 ; SI: DS_DEC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]], 0x0
133 ; SI: DS_DEC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] [M0]
134134 ; SI: S_ENDPGM
135135 define void @lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
136136 %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 1 seq_cst
142142 ; EG: LDS_SUB_RET *
143143 ; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1
144144 ; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]]
145 ; SI: DS_DEC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]], 0x10
145 ; SI: DS_DEC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] offset:16
146146 ; SI: S_ENDPGM
147147 define void @lds_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
148148 %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
163163
164164 ; FUNC-LABEL: {{^}}lds_atomic_and_ret_i32_offset:
165165 ; EG: LDS_AND_RET *
166 ; SI: DS_AND_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
166 ; SI: DS_AND_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
167167 ; SI: S_ENDPGM
168168 define void @lds_atomic_and_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
169169 %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
184184
185185 ; FUNC-LABEL: {{^}}lds_atomic_or_ret_i32_offset:
186186 ; EG: LDS_OR_RET *
187 ; SI: DS_OR_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
187 ; SI: DS_OR_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
188188 ; SI: S_ENDPGM
189189 define void @lds_atomic_or_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
190190 %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
205205
206206 ; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i32_offset:
207207 ; EG: LDS_XOR_RET *
208 ; SI: DS_XOR_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
208 ; SI: DS_XOR_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
209209 ; SI: S_ENDPGM
210210 define void @lds_atomic_xor_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
211211 %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
234234
235235 ; FUNC-LABEL: {{^}}lds_atomic_min_ret_i32_offset:
236236 ; EG: LDS_MIN_INT_RET *
237 ; SI: DS_MIN_RTN_I32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
237 ; SI: DS_MIN_RTN_I32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
238238 ; SI: S_ENDPGM
239239 define void @lds_atomic_min_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
240240 %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
255255
256256 ; FUNC-LABEL: {{^}}lds_atomic_max_ret_i32_offset:
257257 ; EG: LDS_MAX_INT_RET *
258 ; SI: DS_MAX_RTN_I32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
258 ; SI: DS_MAX_RTN_I32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
259259 ; SI: S_ENDPGM
260260 define void @lds_atomic_max_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
261261 %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
276276
277277 ; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i32_offset:
278278 ; EG: LDS_MIN_UINT_RET *
279 ; SI: DS_MIN_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
279 ; SI: DS_MIN_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
280280 ; SI: S_ENDPGM
281281 define void @lds_atomic_umin_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
282282 %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
297297
298298 ; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i32_offset:
299299 ; EG: LDS_MAX_UINT_RET *
300 ; SI: DS_MAX_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
300 ; SI: DS_MAX_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
301301 ; SI: S_ENDPGM
302302 define void @lds_atomic_umax_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
303303 %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
310310 ; SI: S_LOAD_DWORD [[SPTR:s[0-9]+]],
311311 ; SI: V_MOV_B32_e32 [[DATA:v[0-9]+]], 4
312312 ; SI: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
313 ; SI: DS_WRXCHG_RTN_B32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]], 0x0, [M0]
313 ; SI: DS_WRXCHG_RTN_B32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0]
314314 ; SI: S_ENDPGM
315315 define void @lds_atomic_xchg_noret_i32(i32 addrspace(3)* %ptr) nounwind {
316316 %result = atomicrmw xchg i32 addrspace(3)* %ptr, i32 4 seq_cst
318318 }
319319
320320 ; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i32_offset:
321 ; SI: DS_WRXCHG_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
321 ; SI: DS_WRXCHG_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
322322 ; SI: S_ENDPGM
323323 define void @lds_atomic_xchg_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
324324 %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
331331 ; SI: S_LOAD_DWORD [[SPTR:s[0-9]+]],
332332 ; SI: V_MOV_B32_e32 [[DATA:v[0-9]+]], 4
333333 ; SI: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
334 ; SI: DS_ADD_U32 [[VPTR]], [[DATA]], 0x0, [M0]
334 ; SI: DS_ADD_U32 [[VPTR]], [[DATA]] [M0]
335335 ; SI: S_ENDPGM
336336 define void @lds_atomic_add_noret_i32(i32 addrspace(3)* %ptr) nounwind {
337337 %result = atomicrmw add i32 addrspace(3)* %ptr, i32 4 seq_cst
339339 }
340340
341341 ; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32_offset:
342 ; SI: DS_ADD_U32 v{{[0-9]+}}, v{{[0-9]+}}, 0x10
342 ; SI: DS_ADD_U32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
343343 ; SI: S_ENDPGM
344344 define void @lds_atomic_add_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
345345 %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
347347 ret void
348348 }
349349
350 ; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32_bad_si_offset:
351 ; SI: DS_ADD_U32 v{{[0-9]+}}, v{{[0-9]+}}, 0x0
352 ; CI: DS_ADD_U32 v{{[0-9]+}}, v{{[0-9]+}}, 0x10
350 ; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32_bad_si_offset
351 ; SI: DS_ADD_U32 v{{[0-9]+}}, v{{[0-9]+}} [M0]
352 ; CI: DS_ADD_U32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 [M0]
353353 ; SI: S_ENDPGM
354354 define void @lds_atomic_add_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
355355 %sub = sub i32 %a, %b
362362 ; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i32:
363363 ; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1
364364 ; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]]
365 ; SI: DS_INC_U32 v{{[0-9]+}}, [[NEGONE]], 0x0
365 ; SI: DS_INC_U32 v{{[0-9]+}}, [[NEGONE]] [M0]
366366 ; SI: S_ENDPGM
367367 define void @lds_atomic_inc_noret_i32(i32 addrspace(3)* %ptr) nounwind {
368368 %result = atomicrmw add i32 addrspace(3)* %ptr, i32 1 seq_cst
372372 ; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i32_offset:
373373 ; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1
374374 ; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]]
375 ; SI: DS_INC_U32 v{{[0-9]+}}, [[NEGONE]], 0x10
375 ; SI: DS_INC_U32 v{{[0-9]+}}, [[NEGONE]] offset:16
376376 ; SI: S_ENDPGM
377377 define void @lds_atomic_inc_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
378378 %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
381381 }
382382
383383 ; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i32_bad_si_offset:
384 ; SI: DS_INC_U32 v{{[0-9]+}}, v{{[0-9]+}}, 0x0
385 ; CI: DS_INC_U32 v{{[0-9]+}}, v{{[0-9]+}}, 0x10
384 ; SI: DS_INC_U32 v{{[0-9]+}}, v{{[0-9]+}}
385 ; CI: DS_INC_U32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
386386 ; SI: S_ENDPGM
387387 define void @lds_atomic_inc_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
388388 %sub = sub i32 %a, %b
401401 }
402402
403403 ; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i32_offset:
404 ; SI: DS_SUB_U32 v{{[0-9]+}}, v{{[0-9]+}}, 0x10
404 ; SI: DS_SUB_U32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
405405 ; SI: S_ENDPGM
406406 define void @lds_atomic_sub_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
407407 %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
412412 ; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i32:
413413 ; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1
414414 ; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]]
415 ; SI: DS_DEC_U32 v{{[0-9]+}}, [[NEGONE]], 0x0
415 ; SI: DS_DEC_U32 v{{[0-9]+}}, [[NEGONE]]
416416 ; SI: S_ENDPGM
417417 define void @lds_atomic_dec_noret_i32(i32 addrspace(3)* %ptr) nounwind {
418418 %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 1 seq_cst
422422 ; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i32_offset:
423423 ; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1
424424 ; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]]
425 ; SI: DS_DEC_U32 v{{[0-9]+}}, [[NEGONE]], 0x10
425 ; SI: DS_DEC_U32 v{{[0-9]+}}, [[NEGONE]] offset:16
426426 ; SI: S_ENDPGM
427427 define void @lds_atomic_dec_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
428428 %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
439439 }
440440
441441 ; FUNC-LABEL: {{^}}lds_atomic_and_noret_i32_offset:
442 ; SI: DS_AND_B32 v{{[0-9]+}}, v{{[0-9]+}}, 0x10
442 ; SI: DS_AND_B32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
443443 ; SI: S_ENDPGM
444444 define void @lds_atomic_and_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
445445 %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
456456 }
457457
458458 ; FUNC-LABEL: {{^}}lds_atomic_or_noret_i32_offset:
459 ; SI: DS_OR_B32 v{{[0-9]+}}, v{{[0-9]+}}, 0x10
459 ; SI: DS_OR_B32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
460460 ; SI: S_ENDPGM
461461 define void @lds_atomic_or_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
462462 %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
473473 }
474474
475475 ; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i32_offset:
476 ; SI: DS_XOR_B32 v{{[0-9]+}}, v{{[0-9]+}}, 0x10
476 ; SI: DS_XOR_B32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
477477 ; SI: S_ENDPGM
478478 define void @lds_atomic_xor_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
479479 %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
497497 }
498498
499499 ; FUNC-LABEL: {{^}}lds_atomic_min_noret_i32_offset:
500 ; SI: DS_MIN_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0x10
500 ; SI: DS_MIN_I32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
501501 ; SI: S_ENDPGM
502502 define void @lds_atomic_min_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
503503 %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
514514 }
515515
516516 ; FUNC-LABEL: {{^}}lds_atomic_max_noret_i32_offset:
517 ; SI: DS_MAX_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0x10
517 ; SI: DS_MAX_I32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
518518 ; SI: S_ENDPGM
519519 define void @lds_atomic_max_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
520520 %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
531531 }
532532
533533 ; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i32_offset:
534 ; SI: DS_MIN_U32 v{{[0-9]+}}, v{{[0-9]+}}, 0x10
534 ; SI: DS_MIN_U32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
535535 ; SI: S_ENDPGM
536536 define void @lds_atomic_umin_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
537537 %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
548548 }
549549
550550 ; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i32_offset:
551 ; SI: DS_MAX_U32 v{{[0-9]+}}, v{{[0-9]+}}, 0x10
551 ; SI: DS_MAX_U32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
552552 ; SI: S_ENDPGM
553553 define void @lds_atomic_umax_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
554554 %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
None ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
0 ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI %s
11
22 ; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i64:
33 ; SI: DS_WRXCHG_RTN_B64
99 }
1010
1111 ; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i64_offset:
12 ; SI: DS_WRXCHG_RTN_B64 {{.*}} 0x20
12 ; SI: DS_WRXCHG_RTN_B64 {{.*}} offset:32
1313 ; SI: S_ENDPGM
1414 define void @lds_atomic_xchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
1515 %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
3333 ; SI-DAG: V_MOV_B32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]]
3434 ; SI-DAG: V_MOV_B32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]]
3535 ; SI-DAG: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[PTR]]
36 ; SI: DS_ADD_RTN_U64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}, 0x20, [M0]
36 ; SI: DS_ADD_RTN_U64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32 [M0]
3737 ; SI: BUFFER_STORE_DWORDX2 [[RESULT]],
3838 ; SI: S_ENDPGM
3939 define void @lds_atomic_add_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
4747 ; SI: S_MOV_B64 s{{\[}}[[LOSDATA:[0-9]+]]:[[HISDATA:[0-9]+]]{{\]}}, -1
4848 ; SI-DAG: V_MOV_B32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]]
4949 ; SI-DAG: V_MOV_B32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]]
50 ; SI: DS_INC_RTN_U64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}},
50 ; SI: DS_INC_RTN_U64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
5151 ; SI: BUFFER_STORE_DWORDX2 [[RESULT]],
5252 ; SI: S_ENDPGM
5353 define void @lds_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
5757 }
5858
5959 ; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i64_offset:
60 ; SI: DS_INC_RTN_U64 {{.*}} 0x20
60 ; SI: DS_INC_RTN_U64 {{.*}} offset:32
6161 ; SI: S_ENDPGM
6262 define void @lds_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
6363 %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
7676 }
7777
7878 ; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i64_offset:
79 ; SI: DS_SUB_RTN_U64 {{.*}} 0x20
79 ; SI: DS_SUB_RTN_U64 {{.*}} offset:32
8080 ; SI: S_ENDPGM
8181 define void @lds_atomic_sub_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
8282 %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
8989 ; SI: S_MOV_B64 s{{\[}}[[LOSDATA:[0-9]+]]:[[HISDATA:[0-9]+]]{{\]}}, -1
9090 ; SI-DAG: V_MOV_B32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]]
9191 ; SI-DAG: V_MOV_B32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]]
92 ; SI: DS_DEC_RTN_U64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}},
92 ; SI: DS_DEC_RTN_U64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
9393 ; SI: BUFFER_STORE_DWORDX2 [[RESULT]],
9494 ; SI: S_ENDPGM
9595 define void @lds_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
9999 }
100100
101101 ; FUNC-LABEL: {{^}}lds_atomic_dec_ret_i64_offset:
102 ; SI: DS_DEC_RTN_U64 {{.*}} 0x20
102 ; SI: DS_DEC_RTN_U64 {{.*}} offset:32
103103 ; SI: S_ENDPGM
104104 define void @lds_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
105105 %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
118118 }
119119
120120 ; FUNC-LABEL: {{^}}lds_atomic_and_ret_i64_offset:
121 ; SI: DS_AND_RTN_B64 {{.*}} 0x20
121 ; SI: DS_AND_RTN_B64 {{.*}} offset:32
122122 ; SI: S_ENDPGM
123123 define void @lds_atomic_and_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
124124 %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
137137 }
138138
139139 ; FUNC-LABEL: {{^}}lds_atomic_or_ret_i64_offset:
140 ; SI: DS_OR_RTN_B64 {{.*}} 0x20
140 ; SI: DS_OR_RTN_B64 {{.*}} offset:32
141141 ; SI: S_ENDPGM
142142 define void @lds_atomic_or_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
143143 %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
156156 }
157157
158158 ; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i64_offset:
159 ; SI: DS_XOR_RTN_B64 {{.*}} 0x20
159 ; SI: DS_XOR_RTN_B64 {{.*}} offset:32
160160 ; SI: S_ENDPGM
161161 define void @lds_atomic_xor_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
162162 %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
183183 }
184184
185185 ; FUNC-LABEL: {{^}}lds_atomic_min_ret_i64_offset:
186 ; SI: DS_MIN_RTN_I64 {{.*}} 0x20
186 ; SI: DS_MIN_RTN_I64 {{.*}} offset:32
187187 ; SI: S_ENDPGM
188188 define void @lds_atomic_min_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
189189 %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
202202 }
203203
204204 ; FUNC-LABEL: {{^}}lds_atomic_max_ret_i64_offset:
205 ; SI: DS_MAX_RTN_I64 {{.*}} 0x20
205 ; SI: DS_MAX_RTN_I64 {{.*}} offset:32
206206 ; SI: S_ENDPGM
207207 define void @lds_atomic_max_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
208208 %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
221221 }
222222
223223 ; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i64_offset:
224 ; SI: DS_MIN_RTN_U64 {{.*}} 0x20
224 ; SI: DS_MIN_RTN_U64 {{.*}} offset:32
225225 ; SI: S_ENDPGM
226226 define void @lds_atomic_umin_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
227227 %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
240240 }
241241
242242 ; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i64_offset:
243 ; SI: DS_MAX_RTN_U64 {{.*}} 0x20
243 ; SI: DS_MAX_RTN_U64 {{.*}} offset:32
244244 ; SI: S_ENDPGM
245245 define void @lds_atomic_umax_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
246246 %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
258258 }
259259
260260 ; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i64_offset:
261 ; SI: DS_WRXCHG_RTN_B64 {{.*}} 0x20
261 ; SI: DS_WRXCHG_RTN_B64 {{.*}} offset:32
262262 ; SI: S_ENDPGM
263263 define void @lds_atomic_xchg_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
264264 %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
280280 ; SI-DAG: V_MOV_B32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]]
281281 ; SI-DAG: V_MOV_B32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]]
282282 ; SI-DAG: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[PTR]]
283 ; SI: DS_ADD_U64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}, 0x20, [M0]
283 ; SI: DS_ADD_U64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32 [M0]
284284 ; SI: S_ENDPGM
285285 define void @lds_atomic_add_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
286286 %gep = getelementptr i64 addrspace(3)* %ptr, i64 4
292292 ; SI: S_MOV_B64 s{{\[}}[[LOSDATA:[0-9]+]]:[[HISDATA:[0-9]+]]{{\]}}, -1
293293 ; SI-DAG: V_MOV_B32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]]
294294 ; SI-DAG: V_MOV_B32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]]
295 ; SI: DS_INC_U64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}},
295 ; SI: DS_INC_U64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
296296 ; SI: S_ENDPGM
297297 define void @lds_atomic_inc_noret_i64(i64 addrspace(3)* %ptr) nounwind {
298298 %result = atomicrmw add i64 addrspace(3)* %ptr, i64 1 seq_cst
300300 }
301301
302302 ; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i64_offset:
303 ; SI: DS_INC_U64 {{.*}} 0x20
303 ; SI: DS_INC_U64 {{.*}} offset:32
304304 ; SI: S_ENDPGM
305305 define void @lds_atomic_inc_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
306306 %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
317317 }
318318
319319 ; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i64_offset:
320 ; SI: DS_SUB_U64 {{.*}} 0x20
320 ; SI: DS_SUB_U64 {{.*}} offset:32
321321 ; SI: S_ENDPGM
322322 define void @lds_atomic_sub_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
323323 %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
329329 ; SI: S_MOV_B64 s{{\[}}[[LOSDATA:[0-9]+]]:[[HISDATA:[0-9]+]]{{\]}}, -1
330330 ; SI-DAG: V_MOV_B32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]]
331331 ; SI-DAG: V_MOV_B32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]]
332 ; SI: DS_DEC_U64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}},
332 ; SI: DS_DEC_U64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
333333 ; SI: S_ENDPGM
334334 define void @lds_atomic_dec_noret_i64(i64 addrspace(3)* %ptr) nounwind {
335335 %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 1 seq_cst
337337 }
338338
339339 ; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i64_offset:
340 ; SI: DS_DEC_U64 {{.*}} 0x20
340 ; SI: DS_DEC_U64 {{.*}} offset:32
341341 ; SI: S_ENDPGM
342342 define void @lds_atomic_dec_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
343343 %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
354354 }
355355
356356 ; FUNC-LABEL: {{^}}lds_atomic_and_noret_i64_offset:
357 ; SI: DS_AND_B64 {{.*}} 0x20
357 ; SI: DS_AND_B64 {{.*}} offset:32
358358 ; SI: S_ENDPGM
359359 define void @lds_atomic_and_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
360360 %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
371371 }
372372
373373 ; FUNC-LABEL: {{^}}lds_atomic_or_noret_i64_offset:
374 ; SI: DS_OR_B64 {{.*}} 0x20
374 ; SI: DS_OR_B64 {{.*}} offset:32
375375 ; SI: S_ENDPGM
376376 define void @lds_atomic_or_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
377377 %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
388388 }
389389
390390 ; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i64_offset:
391 ; SI: DS_XOR_B64 {{.*}} 0x20
391 ; SI: DS_XOR_B64 {{.*}} offset:32
392392 ; SI: S_ENDPGM
393393 define void @lds_atomic_xor_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
394394 %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
412412 }
413413
414414 ; FUNC-LABEL: {{^}}lds_atomic_min_noret_i64_offset:
415 ; SI: DS_MIN_I64 {{.*}} 0x20
415 ; SI: DS_MIN_I64 {{.*}} offset:32
416416 ; SI: S_ENDPGM
417417 define void @lds_atomic_min_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
418418 %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
429429 }
430430
431431 ; FUNC-LABEL: {{^}}lds_atomic_max_noret_i64_offset:
432 ; SI: DS_MAX_I64 {{.*}} 0x20
432 ; SI: DS_MAX_I64 {{.*}} offset:32
433433 ; SI: S_ENDPGM
434434 define void @lds_atomic_max_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
435435 %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
446446 }
447447
448448 ; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i64_offset:
449 ; SI: DS_MIN_U64 {{.*}} 0x20
449 ; SI: DS_MIN_U64 {{.*}} offset:32
450450 ; SI: S_ENDPGM
451451 define void @lds_atomic_umin_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
452452 %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
463463 }
464464
465465 ; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i64_offset:
466 ; SI: DS_MAX_U64 {{.*}} 0x20
466 ; SI: DS_MAX_U64 {{.*}} offset:32
467467 ; SI: S_ENDPGM
468468 define void @lds_atomic_umax_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
469469 %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
2929 ; EG-CHECK: LDS_READ_RET {{[*]*}} OQAP, {{PV|T}}[[ADDRR:[0-9]*\.[XYZW]]]
3030 ; EG-CHECK-NOT: LDS_READ_RET {{[*]*}} OQAP, T[[ADDRR]]
3131 ; SI: V_ADD_I32_e32 [[SIPTR:v[0-9]+]], 16, v{{[0-9]+}}
32 ; SI: DS_READ_B32 {{v[0-9]+}}, [[SIPTR]], 0x0
33 ; CI: DS_READ_B32 {{v[0-9]+}}, [[ADDRR:v[0-9]+]], 0x10
34 ; CI: DS_READ_B32 {{v[0-9]+}}, [[ADDRR]], 0x0,
32 ; SI: DS_READ_B32 {{v[0-9]+}}, [[SIPTR]] [M0]
33 ; CI: DS_READ_B32 {{v[0-9]+}}, [[ADDRR:v[0-9]+]] offset:16 [M0]
34 ; CI: DS_READ_B32 {{v[0-9]+}}, [[ADDRR]] [M0]
3535
3636 define void @local_memory_two_objects(i32 addrspace(1)* %out) {
3737 entry:
1515
1616 ; SI-LABEL: {{^}}load_shl_base_lds_0:
1717 ; SI: V_LSHLREV_B32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
18 ; SI: DS_READ_B32 {{v[0-9]+}}, [[PTR]], 0x8, [M0]
18 ; SI: DS_READ_B32 {{v[0-9]+}}, [[PTR]] offset:8 [M0]
1919 ; SI: S_ENDPGM
2020 define void @load_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
2121 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
3232
3333 ; SI-LABEL: {{^}}load_shl_base_lds_1:
3434 ; SI: V_LSHLREV_B32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
35 ; SI: DS_READ_B32 [[RESULT:v[0-9]+]], [[PTR]], 0x8, [M0]
35 ; SI: DS_READ_B32 [[RESULT:v[0-9]+]], [[PTR]] offset:8 [M0]
3636 ; SI: V_ADD_I32_e32 [[ADDUSE:v[0-9]+]], 8, v{{[0-9]+}}
3737 ; SI-DAG: BUFFER_STORE_DWORD [[RESULT]]
3838 ; SI-DAG: BUFFER_STORE_DWORD [[ADDUSE]]
5050
5151 @maxlds = addrspace(3) global [65536 x i8] zeroinitializer, align 4
5252
53 ; SI-LABEL: {{^}}load_shl_base_lds_max_offset:
54 ; SI: DS_READ_U8 v{{[0-9]+}}, v{{[0-9]+}}, 0xffff
53 ; SI-LABEL: {{^}}load_shl_base_lds_max_offset
54 ; SI: DS_READ_U8 v{{[0-9]+}}, v{{[0-9]+}} offset:65535
5555 ; SI: S_ENDPGM
5656 define void @load_shl_base_lds_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %lds, i32 addrspace(1)* %add_use) #0 {
5757 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
6868
6969 ; SI-LABEL: {{^}}load_shl_base_lds_2:
7070 ; SI: V_LSHLREV_B32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
71 ; SI-NEXT: DS_READ2ST64_B32 {{v\[[0-9]+:[0-9]+\]}}, [[PTR]], 0x1, 0x9, [M0]
71 ; SI-NEXT: DS_READ2ST64_B32 {{v\[[0-9]+:[0-9]+\]}}, [[PTR]] offset0:1 offset1:9 [M0]
7272 ; SI: S_ENDPGM
7373 define void @load_shl_base_lds_2(float addrspace(1)* %out) #0 {
7474 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
8484
8585 ; SI-LABEL: {{^}}store_shl_base_lds_0:
8686 ; SI: V_LSHLREV_B32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
87 ; SI: DS_WRITE_B32 [[PTR]], {{v[0-9]+}}, 0x8 [M0]
87 ; SI: DS_WRITE_B32 [[PTR]], {{v[0-9]+}} offset:8 [M0]
8888 ; SI: S_ENDPGM
8989 define void @store_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
9090 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
114114
115115 ; SI-LABEL: {{^}}atomic_cmpxchg_shl_base_lds_0:
116116 ; SI: V_LSHLREV_B32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
117 ; SI: DS_CMPST_RTN_B32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}}, {{v[0-9]+}}, 0x8
117 ; SI: DS_CMPST_RTN_B32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}}, {{v[0-9]+}} offset:8
118118 ; SI: S_ENDPGM
119119 define void @atomic_cmpxchg_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use, i32 %swap) #0 {
120120 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
129129
130130 ; SI-LABEL: {{^}}atomic_swap_shl_base_lds_0:
131131 ; SI: V_LSHLREV_B32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
132 ; SI: DS_WRXCHG_RTN_B32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}}, 0x8
132 ; SI: DS_WRXCHG_RTN_B32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
133133 ; SI: S_ENDPGM
134134 define void @atomic_swap_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
135135 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
143143
144144 ; SI-LABEL: {{^}}atomic_add_shl_base_lds_0:
145145 ; SI: V_LSHLREV_B32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
146 ; SI: DS_ADD_RTN_U32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}}, 0x8
146 ; SI: DS_ADD_RTN_U32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
147147 ; SI: S_ENDPGM
148148 define void @atomic_add_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
149149 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
157157
158158 ; SI-LABEL: {{^}}atomic_sub_shl_base_lds_0:
159159 ; SI: V_LSHLREV_B32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
160 ; SI: DS_SUB_RTN_U32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}}, 0x8
160 ; SI: DS_SUB_RTN_U32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
161161 ; SI: S_ENDPGM
162162 define void @atomic_sub_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
163163 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
171171
172172 ; SI-LABEL: {{^}}atomic_and_shl_base_lds_0:
173173 ; SI: V_LSHLREV_B32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
174 ; SI: DS_AND_RTN_B32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}}, 0x8
174 ; SI: DS_AND_RTN_B32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
175175 ; SI: S_ENDPGM
176176 define void @atomic_and_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
177177 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
185185
186186 ; SI-LABEL: {{^}}atomic_or_shl_base_lds_0:
187187 ; SI: V_LSHLREV_B32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
188 ; SI: DS_OR_RTN_B32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}}, 0x8
188 ; SI: DS_OR_RTN_B32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
189189 ; SI: S_ENDPGM
190190 define void @atomic_or_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
191191 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
199199
200200 ; SI-LABEL: {{^}}atomic_xor_shl_base_lds_0:
201201 ; SI: V_LSHLREV_B32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
202 ; SI: DS_XOR_RTN_B32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}}, 0x8
202 ; SI: DS_XOR_RTN_B32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
203203 ; SI: S_ENDPGM
204204 define void @atomic_xor_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
205205 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
223223
224224 ; SI-LABEL: {{^}}atomic_min_shl_base_lds_0:
225225 ; SI: V_LSHLREV_B32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
226 ; SI: DS_MIN_RTN_I32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}}, 0x8
226 ; SI: DS_MIN_RTN_I32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
227227 ; SI: S_ENDPGM
228228 define void @atomic_min_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
229229 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
237237
238238 ; SI-LABEL: {{^}}atomic_max_shl_base_lds_0:
239239 ; SI: V_LSHLREV_B32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
240 ; SI: DS_MAX_RTN_I32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}}, 0x8
240 ; SI: DS_MAX_RTN_I32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
241241 ; SI: S_ENDPGM
242242 define void @atomic_max_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
243243 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
251251
252252 ; SI-LABEL: {{^}}atomic_umin_shl_base_lds_0:
253253 ; SI: V_LSHLREV_B32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
254 ; SI: DS_MIN_RTN_U32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}}, 0x8
254 ; SI: DS_MIN_RTN_U32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
255255 ; SI: S_ENDPGM
256256 define void @atomic_umin_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
257257 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
265265
266266 ; SI-LABEL: {{^}}atomic_umax_shl_base_lds_0:
267267 ; SI: V_LSHLREV_B32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
268 ; SI: DS_MAX_RTN_U32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}}, 0x8
268 ; SI: DS_MAX_RTN_U32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
269269 ; SI: S_ENDPGM
270270 define void @atomic_umax_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
271271 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
4040 ret void
4141 }
4242
43 ; SI-LABEL: {{^}}load_lds_i64_align_4_with_offset:
44 ; SI: DS_READ2_B32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}}, 0x8, 0x9
43 ; SI-LABEL: {{^}}load_lds_i64_align_4_with_offset
44 ; SI: DS_READ2_B32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset0:8 offset1:9
4545 ; SI: S_ENDPGM
4646 define void @load_lds_i64_align_4_with_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
4747 %ptr = getelementptr i64 addrspace(3)* %in, i32 4
5252
5353 ; SI-LABEL: {{^}}load_lds_i64_align_4_with_split_offset:
5454 ; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits
55 ; SI: DS_READ2_B32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}}, 0x0, 0x1
55 ; SI: DS_READ2_B32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset0:0 offset1:1
5656 ; SI: S_ENDPGM
5757 define void @load_lds_i64_align_4_with_split_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
5858 %ptr = bitcast i64 addrspace(3)* %in to i32 addrspace(3)*
7878 ret void
7979 }
8080
81 ; SI-LABEL: {{^}}store_lds_i64_align_4_with_offset:
82 ; SI: DS_WRITE2_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x8, 0x9
81 ; SI-LABEL: {{^}}store_lds_i64_align_4_with_offset
82 ; SI: DS_WRITE2_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:8 offset1:9
8383 ; SI: S_ENDPGM
8484 define void @store_lds_i64_align_4_with_offset(i64 addrspace(3)* %out) #0 {
8585 %ptr = getelementptr i64 addrspace(3)* %out, i32 4
8989
9090 ; SI-LABEL: {{^}}store_lds_i64_align_4_with_split_offset:
9191 ; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits
92 ; SI: DS_WRITE2_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]}}, 0x0, 0x1
92 ; SI: DS_WRITE2_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
9393 ; SI: S_ENDPGM
9494 define void @store_lds_i64_align_4_with_split_offset(i64 addrspace(3)* %out) #0 {
9595 %ptr = bitcast i64 addrspace(3)* %out to i32 addrspace(3)*