llvm.org GIT mirror llvm / ec4cb33
R600/SI: Use a ComplexPattern for DS loads and stores git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@216278 91177308-0d34-0410-b5e6-96231b3b80d8 Tom Stellard 5 years ago
8 changed file(s) with 172 addition(s) and 123 deletion(s). Raw diff Collapse all Expand all
8787 SDValue& Offset);
8888 bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
8989 bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
90 bool isDSOffsetLegal(const SDValue &Base, unsigned Offset,
91 unsigned OffsetBits) const;
92 bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
9093 void SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
9194 SDValue &SOffset, SDValue &Offset, SDValue &Offen,
9295 SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
743746 return CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops);
744747 }
745748
749 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
750 unsigned OffsetBits) const {
751 const AMDGPUSubtarget &ST = TM.getSubtarget();
752 if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
753 (OffsetBits == 8 && !isUInt<8>(Offset)))
754 return false;
755
756 if (ST.getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS)
757 return true;
758
759 // On Southern Islands instruction with a negative base value and an offset
760 // don't seem to work.
761 return CurDAG->SignBitIsZero(Base);
762 }
763
764 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
765 SDValue &Offset) const {
766 if (CurDAG->isBaseWithConstantOffset(Addr)) {
767 SDValue N0 = Addr.getOperand(0);
768 SDValue N1 = Addr.getOperand(1);
769 ConstantSDNode *C1 = cast(N1);
770 if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) {
771 // (add n0, c0)
772 Base = N0;
773 Offset = N1;
774 return true;
775 }
776 }
777
778 // default case
779 Base = Addr;
780 Offset = CurDAG->getTargetConstant(0, MVT::i16);
781 return true;
782 }
783
746784 static SDValue wrapAddr64Rsrc(SelectionDAG *DAG, SDLoc DL, SDValue Ptr) {
747785 return SDValue(DAG->getMachineNode(AMDGPU::SI_ADDR64_RSRC, DL, MVT::v4i32,
748786 Ptr), 0);
189189 //===----------------------------------------------------------------------===//
190190 // Complex patterns
191191 //===----------------------------------------------------------------------===//
192
193 def DS1Addr1Offset : ComplexPattern;
192194
193195 def MUBUFAddr32 : ComplexPattern;
194196 def MUBUFAddr64 : ComplexPattern;
25192519 /********** Load/Store Patterns **********/
25202520 /********** ======================= **********/
25212521
2522 multiclass DSReadPat {
2523 def : Pat <
2524 (vt (frag (add i32:$ptr, (i32 IMM16bit:$offset)))),
2525 (inst (i1 0), $ptr, (as_i16imm $offset))
2526 >;
2527
2528 def : Pat <
2529 (frag i32:$src0),
2530 (vt (inst 0, $src0, 0))
2531 >;
2532 }
2533
2534 defm : DSReadPat ;
2535 defm : DSReadPat ;
2536 defm : DSReadPat ;
2537 defm : DSReadPat ;
2538 defm : DSReadPat ;
2539 defm : DSReadPat ;
2540
2541 multiclass DSWritePat {
2542 def : Pat <
2543 (frag vt:$value, (add i32:$ptr, (i32 IMM16bit:$offset))),
2544 (inst (i1 0), $ptr, $value, (as_i16imm $offset))
2545 >;
2546
2547 def : Pat <
2548 (frag vt:$val, i32:$ptr),
2549 (inst 0, $ptr, $val, 0)
2550 >;
2551 }
2552
2553 defm : DSWritePat ;
2554 defm : DSWritePat ;
2555 defm : DSWritePat ;
2556 defm : DSWritePat ;
2522 class DSReadPat : Pat <
2523 (vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset))),
2524 (inst (i1 0), $ptr, (as_i16imm $offset))
2525 >;
2526
2527 def : DSReadPat ;
2528 def : DSReadPat ;
2529 def : DSReadPat ;
2530 def : DSReadPat ;
2531 def : DSReadPat ;
2532 def : DSReadPat ;
2533
2534 class DSWritePat : Pat <
2535 (frag vt:$value, (DS1Addr1Offset i32:$ptr, i32:$offset)),
2536 (inst (i1 0), $ptr, $value, (as_i16imm $offset))
2537 >;
2538
2539 def : DSWritePat ;
2540 def : DSWritePat ;
2541 def : DSWritePat ;
2542 def : DSWritePat ;
25572543
25582544 multiclass DSAtomicRetPat {
25592545 def : Pat <
None ; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
0 ; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
1 ; RUN: llc < %s -march=r600 -mcpu=bonaire -verify-machineinstrs | FileCheck --check-prefix=CI --check-prefix=FUNC %s
12
23 ; On Southern Islands GPUs the local address space(3) uses 32-bit pointers and
34 ; the global address space(1) uses 64-bit pointers. These tests check to make sure
89 ; Instructions with B32, U32, and I32 in their name take 32-bit operands, while
910 ; instructions with B64, U64, and I64 take 64-bit operands.
1011
11 ; CHECK-LABEL: @local_address_load
12 ; FUNC-LABEL: @local_address_load
1213 ; CHECK: V_MOV_B32_e{{32|64}} [[PTR:v[0-9]]]
1314 ; CHECK: DS_READ_B32 v{{[0-9]+}}, [[PTR]]
1415 define void @local_address_load(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
1819 ret void
1920 }
2021
21 ; CHECK-LABEL: @local_address_gep
22 ; FUNC-LABEL: @local_address_gep
2223 ; CHECK: S_ADD_I32 [[SPTR:s[0-9]]]
2324 ; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
2425 ; CHECK: DS_READ_B32 [[VPTR]]
3031 ret void
3132 }
3233
33 ; CHECK-LABEL: @local_address_gep_const_offset
34 ; FUNC-LABEL: @local_address_gep_const_offset
3435 ; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], s{{[0-9]+}}
3536 ; CHECK: DS_READ_B32 v{{[0-9]+}}, [[VPTR]], 0x4,
3637 define void @local_address_gep_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
4243 }
4344
4445 ; Offset too large, can't fold into 16-bit immediate offset.
45 ; CHECK-LABEL: @local_address_gep_large_const_offset
46 ; FUNC-LABEL: @local_address_gep_large_const_offset
4647 ; CHECK: S_ADD_I32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 0x10004
4748 ; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
4849 ; CHECK: DS_READ_B32 [[VPTR]]
5455 ret void
5556 }
5657
57 ; CHECK-LABEL: @null_32bit_lds_ptr:
58 ; FUNC-LABEL: @null_32bit_lds_ptr:
5859 ; CHECK: V_CMP_NE_I32
5960 ; CHECK-NOT: V_CMP_NE_I32
6061 ; CHECK: V_CNDMASK_B32
6566 ret void
6667 }
6768
68 ; CHECK-LABEL: @mul_32bit_ptr:
69 ; FUNC-LABEL: @mul_32bit_ptr:
6970 ; CHECK: V_MUL_LO_I32
7071 ; CHECK-NEXT: V_ADD_I32_e32
7172 ; CHECK-NEXT: DS_READ_B32
7879
7980 @g_lds = addrspace(3) global float zeroinitializer, align 4
8081
81 ; CHECK-LABEL: @infer_ptr_alignment_global_offset:
82 ; FUNC-LABEL: @infer_ptr_alignment_global_offset:
8283 ; CHECK: V_MOV_B32_e32 [[REG:v[0-9]+]], 0
8384 ; CHECK: DS_READ_B32 v{{[0-9]+}}, [[REG]]
8485 define void @infer_ptr_alignment_global_offset(float addrspace(1)* %out, i32 %tid) {
9192 @ptr = addrspace(3) global i32 addrspace(3)* null
9293 @dst = addrspace(3) global [16384 x i32] zeroinitializer
9394
94 ; CHECK-LABEL: @global_ptr:
95 ; FUNC-LABEL: @global_ptr:
9596 ; CHECK: DS_WRITE_B32
9697 define void @global_ptr() nounwind {
9798 store i32 addrspace(3)* getelementptr ([16384 x i32] addrspace(3)* @dst, i32 0, i32 16), i32 addrspace(3)* addrspace(3)* @ptr
9899 ret void
99100 }
100101
101 ; CHECK-LABEL: @local_address_store
102 ; FUNC-LABEL: @local_address_store
102103 ; CHECK: DS_WRITE_B32
103104 define void @local_address_store(i32 addrspace(3)* %out, i32 %val) {
104105 store i32 %val, i32 addrspace(3)* %out
105106 ret void
106107 }
107108
108 ; CHECK-LABEL: @local_address_gep_store
109 ; FUNC-LABEL: @local_address_gep_store
109110 ; CHECK: S_ADD_I32 [[SADDR:s[0-9]+]],
110111 ; CHECK: V_MOV_B32_e32 [[ADDR:v[0-9]+]], [[SADDR]]
111112 ; CHECK: DS_WRITE_B32 [[ADDR]], v{{[0-9]+}},
115116 ret void
116117 }
117118
118 ; CHECK-LABEL: @local_address_gep_const_offset_store
119 ; FUNC-LABEL: @local_address_gep_const_offset_store
119120 ; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], s{{[0-9]+}}
120121 ; CHECK: V_MOV_B32_e32 [[VAL:v[0-9]+]], s{{[0-9]+}}
121122 ; CHECK: DS_WRITE_B32 [[VPTR]], [[VAL]], 0x4
126127 }
127128
128129 ; Offset too large, can't fold into 16-bit immediate offset.
129 ; CHECK-LABEL: @local_address_gep_large_const_offset_store
130 ; FUNC-LABEL: @local_address_gep_large_const_offset_store
130131 ; CHECK: S_ADD_I32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 0x10004
131132 ; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
132133 ; CHECK: DS_WRITE_B32 [[VPTR]], v{{[0-9]+}}, 0
None ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
0 ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI --check-prefix=CHECK %s
1 ; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI --check-prefix=CHECK %s
12
23 declare i32 @llvm.r600.read.tidig.x() #0
34 declare void @llvm.AMDGPU.barrier.local() #1
45
56 ; Function Attrs: nounwind
6 ; SI-LABEL: @signed_ds_offset_addressing_loop
7 ; SI: BB0_1:
8 ; SI: V_ADD_I32_e32 [[VADDR:v[0-9]+]],
7 ; CHECK-LABEL: @signed_ds_offset_addressing_loop
8 ; CHECK: BB0_1:
9 ; CHECK: V_ADD_I32_e32 [[VADDR:v[0-9]+]],
910 ; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR]], 0x0
10 ; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR]], 0x4
11 ; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR]], 0x80
12 ; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR]], 0x84
13 ; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR]], 0x100
14 ; SI: S_ENDPGM
11 ; SI-DAG: V_ADD_I32_e32 [[VADDR4:v[0-9]+]], 4, [[VADDR]]
12 ; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR4]], 0x0
13 ; SI-DAG: V_ADD_I32_e32 [[VADDR0x80:v[0-9]+]], 0x80, [[VADDR]]
14 ; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR0x80]], 0x0
15 ; SI-DAG: V_ADD_I32_e32 [[VADDR0x84:v[0-9]+]], 0x84, [[VADDR]]
16 ; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR0x84]], 0x0
17 ; SI-DAG: V_ADD_I32_e32 [[VADDR0x100:v[0-9]+]], 0x100, [[VADDR]]
18 ; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR0x100]], 0x0
19
20 ; CI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR]], 0x0
21 ; CI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR]], 0x4
22 ; CI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR]], 0x80
23 ; CI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR]], 0x84
24 ; CI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR]], 0x100
25 ; CHECK: S_ENDPGM
1526 define void @signed_ds_offset_addressing_loop(float addrspace(1)* noalias nocapture %out, float addrspace(3)* noalias nocapture readonly %lptr, i32 %n) #2 {
1627 entry:
1728 %x.i = tail call i32 @llvm.r600.read.tidig.x() #0
None ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck %s
0 ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck --check-prefix=SI --check-prefix=CHECK %s
1 ; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs< %s | FileCheck --check-prefix=CI --check-prefix=CHECK %s
12
23 define void @use_gep_address_space([1024 x i32] addrspace(3)* %array) nounwind {
34 ; CHECK-LABEL: @use_gep_address_space:
1011
1112 define void @use_gep_address_space_large_offset([1024 x i32] addrspace(3)* %array) nounwind {
1213 ; CHECK-LABEL: @use_gep_address_space_large_offset:
13 ; CHECK: S_ADD_I32
14 ; The LDS offset will be 65536 bytes, which is larger than the size of LDS on
15 ; SI, which is why it is being OR'd with the base pointer.
16 ; SI: S_OR_B32
17 ; CI: S_ADD_I32
1418 ; CHECK: DS_WRITE_B32
1519 %p = getelementptr [1024 x i32] addrspace(3)* %array, i16 0, i16 16384
1620 store i32 99, i32 addrspace(3)* %p
None ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
0 ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck --check-prefix=SI --check-prefix=BOTH %s
1 ; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs< %s | FileCheck --check-prefix=CI --check-prefix=BOTH %s
12
2 ; SI-LABEL: @local_i32_load
3 ; SI: DS_READ_B32 [[REG:v[0-9]+]], v{{[0-9]+}}, 0x1c, [M0]
4 ; SI: BUFFER_STORE_DWORD [[REG]],
3 ; BOTH-LABEL: @local_i32_load
4 ; BOTH: DS_READ_B32 [[REG:v[0-9]+]], v{{[0-9]+}}, 0x1c, [M0]
5 ; BOTH: BUFFER_STORE_DWORD [[REG]],
56 define void @local_i32_load(i32 addrspace(1)* %out, i32 addrspace(3)* %in) nounwind {
67 %gep = getelementptr i32 addrspace(3)* %in, i32 7
78 %val = load i32 addrspace(3)* %gep, align 4
910 ret void
1011 }
1112
12 ; SI-LABEL: @local_i32_load_0_offset
13 ; SI: DS_READ_B32 [[REG:v[0-9]+]], v{{[0-9]+}}, 0x0, [M0]
14 ; SI: BUFFER_STORE_DWORD [[REG]],
13 ; BOTH-LABEL: @local_i32_load_0_offset
14 ; BOTH: DS_READ_B32 [[REG:v[0-9]+]], v{{[0-9]+}}, 0x0, [M0]
15 ; BOTH: BUFFER_STORE_DWORD [[REG]],
1516 define void @local_i32_load_0_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) nounwind {
1617 %val = load i32 addrspace(3)* %in, align 4
1718 store i32 %val, i32 addrspace(1)* %out, align 4
1819 ret void
1920 }
2021
21 ; SI-LABEL: @local_i8_load_i16_max_offset
22 ; SI-NOT: ADD
23 ; SI: DS_READ_U8 [[REG:v[0-9]+]], {{v[0-9]+}}, 0xffff, [M0]
24 ; SI: BUFFER_STORE_BYTE [[REG]],
22 ; BOTH-LABEL: @local_i8_load_i16_max_offset
23 ; BOTH-NOT: ADD
24 ; BOTH: DS_READ_U8 [[REG:v[0-9]+]], {{v[0-9]+}}, 0xffff, [M0]
25 ; BOTH: BUFFER_STORE_BYTE [[REG]],
2526 define void @local_i8_load_i16_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %in) nounwind {
2627 %gep = getelementptr i8 addrspace(3)* %in, i32 65535
2728 %val = load i8 addrspace(3)* %gep, align 4
2930 ret void
3031 }
3132
32 ; SI-LABEL: @local_i8_load_over_i16_max_offset
33 ; SI: S_ADD_I32 [[ADDR:s[0-9]+]], s{{[0-9]+}}, 0x10000
34 ; SI: V_MOV_B32_e32 [[VREGADDR:v[0-9]+]], [[ADDR]]
35 ; SI: DS_READ_U8 [[REG:v[0-9]+]], [[VREGADDR]], 0x0, [M0]
36 ; SI: BUFFER_STORE_BYTE [[REG]],
33 ; BOTH-LABEL: @local_i8_load_over_i16_max_offset
34 ; The LDS offset will be 65536 bytes, which is larger than the size of LDS on
35 ; SI, which is why it is being OR'd with the base pointer.
36 ; SI: S_OR_B32 [[ADDR:s[0-9]+]], s{{[0-9]+}}, 0x10000
37 ; CI: S_ADD_I32 [[ADDR:s[0-9]+]], s{{[0-9]+}}, 0x10000
38 ; BOTH: V_MOV_B32_e32 [[VREGADDR:v[0-9]+]], [[ADDR]]
39 ; BOTH: DS_READ_U8 [[REG:v[0-9]+]], [[VREGADDR]], 0x0, [M0]
40 ; BOTH: BUFFER_STORE_BYTE [[REG]],
3741 define void @local_i8_load_over_i16_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %in) nounwind {
3842 %gep = getelementptr i8 addrspace(3)* %in, i32 65536
3943 %val = load i8 addrspace(3)* %gep, align 4
4145 ret void
4246 }
4347
44 ; SI-LABEL: @local_i64_load
45 ; SI-NOT: ADD
46 ; SI: DS_READ_B64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}}, 0x38, [M0]
47 ; SI: BUFFER_STORE_DWORDX2 [[REG]],
48 ; BOTH-LABEL: @local_i64_load
49 ; BOTH-NOT: ADD
50 ; BOTH: DS_READ_B64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}}, 0x38, [M0]
51 ; BOTH: BUFFER_STORE_DWORDX2 [[REG]],
4852 define void @local_i64_load(i64 addrspace(1)* %out, i64 addrspace(3)* %in) nounwind {
4953 %gep = getelementptr i64 addrspace(3)* %in, i32 7
5054 %val = load i64 addrspace(3)* %gep, align 8
5256 ret void
5357 }
5458
55 ; SI-LABEL: @local_i64_load_0_offset
56 ; SI: DS_READ_B64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, 0x0, [M0]
57 ; SI: BUFFER_STORE_DWORDX2 [[REG]],
59 ; BOTH-LABEL: @local_i64_load_0_offset
60 ; BOTH: DS_READ_B64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, 0x0, [M0]
61 ; BOTH: BUFFER_STORE_DWORDX2 [[REG]],
5862 define void @local_i64_load_0_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %in) nounwind {
5963 %val = load i64 addrspace(3)* %in, align 8
6064 store i64 %val, i64 addrspace(1)* %out, align 8
6165 ret void
6266 }
6367
64 ; SI-LABEL: @local_f64_load
65 ; SI-NOT: ADD
66 ; SI: DS_READ_B64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}}, 0x38, [M0]
67 ; SI: BUFFER_STORE_DWORDX2 [[REG]],
68 ; BOTH-LABEL: @local_f64_load
69 ; BOTH-NOT: ADD
70 ; BOTH: DS_READ_B64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}}, 0x38, [M0]
71 ; BOTH: BUFFER_STORE_DWORDX2 [[REG]],
6872 define void @local_f64_load(double addrspace(1)* %out, double addrspace(3)* %in) nounwind {
6973 %gep = getelementptr double addrspace(3)* %in, i32 7
7074 %val = load double addrspace(3)* %gep, align 8
7276 ret void
7377 }
7478
75 ; SI-LABEL: @local_f64_load_0_offset
76 ; SI: DS_READ_B64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, 0x0, [M0]
77 ; SI: BUFFER_STORE_DWORDX2 [[REG]],
79 ; BOTH-LABEL: @local_f64_load_0_offset
80 ; BOTH: DS_READ_B64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, 0x0, [M0]
81 ; BOTH: BUFFER_STORE_DWORDX2 [[REG]],
7882 define void @local_f64_load_0_offset(double addrspace(1)* %out, double addrspace(3)* %in) nounwind {
7983 %val = load double addrspace(3)* %in, align 8
8084 store double %val, double addrspace(1)* %out, align 8
8185 ret void
8286 }
8387
84 ; SI-LABEL: @local_i64_store
85 ; SI-NOT: ADD
86 ; SI: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x38 [M0]
88 ; BOTH-LABEL: @local_i64_store
89 ; BOTH-NOT: ADD
90 ; BOTH: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x38 [M0]
8791 define void @local_i64_store(i64 addrspace(3)* %out) nounwind {
8892 %gep = getelementptr i64 addrspace(3)* %out, i32 7
8993 store i64 5678, i64 addrspace(3)* %gep, align 8
9094 ret void
9195 }
9296
93 ; SI-LABEL: @local_i64_store_0_offset
94 ; SI-NOT: ADD
95 ; SI: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x0 [M0]
97 ; BOTH-LABEL: @local_i64_store_0_offset
98 ; BOTH-NOT: ADD
99 ; BOTH: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x0 [M0]
96100 define void @local_i64_store_0_offset(i64 addrspace(3)* %out) nounwind {
97101 store i64 1234, i64 addrspace(3)* %out, align 8
98102 ret void
99103 }
100104
101 ; SI-LABEL: @local_f64_store
102 ; SI-NOT: ADD
103 ; SI: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x38 [M0]
105 ; BOTH-LABEL: @local_f64_store
106 ; BOTH-NOT: ADD
107 ; BOTH: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x38 [M0]
104108 define void @local_f64_store(double addrspace(3)* %out) nounwind {
105109 %gep = getelementptr double addrspace(3)* %out, i32 7
106110 store double 16.0, double addrspace(3)* %gep, align 8
107111 ret void
108112 }
109113
110 ; SI-LABEL: @local_f64_store_0_offset
111 ; SI: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x0 [M0]
114 ; BOTH-LABEL: @local_f64_store_0_offset
115 ; BOTH: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x0 [M0]
112116 define void @local_f64_store_0_offset(double addrspace(3)* %out) nounwind {
113117 store double 20.0, double addrspace(3)* %out, align 8
114118 ret void
115119 }
116120
117 ; SI-LABEL: @local_v2i64_store
118 ; SI-NOT: ADD
119 ; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x78 [M0]
120 ; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x70 [M0]
121 ; BOTH-LABEL: @local_v2i64_store
122 ; BOTH-NOT: ADD
123 ; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x78 [M0]
124 ; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x70 [M0]
121125 define void @local_v2i64_store(<2 x i64> addrspace(3)* %out) nounwind {
122126 %gep = getelementptr <2 x i64> addrspace(3)* %out, i32 7
123127 store <2 x i64> , <2 x i64> addrspace(3)* %gep, align 16
124128 ret void
125129 }
126130
127 ; SI-LABEL: @local_v2i64_store_0_offset
128 ; SI-NOT: ADD
129 ; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x8 [M0]
130 ; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x0 [M0]
131 ; BOTH-LABEL: @local_v2i64_store_0_offset
132 ; BOTH-NOT: ADD
133 ; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x8 [M0]
134 ; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x0 [M0]
131135 define void @local_v2i64_store_0_offset(<2 x i64> addrspace(3)* %out) nounwind {
132136 store <2 x i64> , <2 x i64> addrspace(3)* %out, align 16
133137 ret void
134138 }
135139
136 ; SI-LABEL: @local_v4i64_store
137 ; SI-NOT: ADD
138 ; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0xf8 [M0]
139 ; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0xf0 [M0]
140 ; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0xe8 [M0]
141 ; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0xe0 [M0]
140 ; BOTH-LABEL: @local_v4i64_store
141 ; BOTH-NOT: ADD
142 ; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0xf8 [M0]
143 ; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0xf0 [M0]
144 ; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0xe8 [M0]
145 ; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0xe0 [M0]
142146 define void @local_v4i64_store(<4 x i64> addrspace(3)* %out) nounwind {
143147 %gep = getelementptr <4 x i64> addrspace(3)* %out, i32 7
144148 store <4 x i64> , <4 x i64> addrspace(3)* %gep, align 16
145149 ret void
146150 }
147151
148 ; SI-LABEL: @local_v4i64_store_0_offset
149 ; SI-NOT: ADD
150 ; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x18 [M0]
151 ; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x10 [M0]
152 ; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x8 [M0]
153 ; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x0 [M0]
152 ; BOTH-LABEL: @local_v4i64_store_0_offset
153 ; BOTH-NOT: ADD
154 ; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x18 [M0]
155 ; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x10 [M0]
156 ; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x8 [M0]
157 ; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x0 [M0]
154158 define void @local_v4i64_store_0_offset(<4 x i64> addrspace(3)* %out) nounwind {
155159 store <4 x i64> , <4 x i64> addrspace(3)* %out, align 16
156160 ret void
0 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
1 ; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
1 ; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK --check-prefix=SI %s
2 ; RUN: llc < %s -march=r600 -mcpu=bonaire -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK --check-prefix=CI %s
23
34 @local_memory_two_objects.local_mem0 = internal unnamed_addr addrspace(3) global [4 x i32] zeroinitializer, align 4
45 @local_memory_two_objects.local_mem1 = internal unnamed_addr addrspace(3) global [4 x i32] zeroinitializer, align 4
2728 ; constant offsets.
2829 ; EG-CHECK: LDS_READ_RET {{[*]*}} OQAP, {{PV|T}}[[ADDRR:[0-9]*\.[XYZW]]]
2930 ; EG-CHECK-NOT: LDS_READ_RET {{[*]*}} OQAP, T[[ADDRR]]
30 ; SI-CHECK: DS_READ_B32 {{v[0-9]+}}, [[ADDRR:v[0-9]+]], 0x10
31 ; SI-CHECK: DS_READ_B32 {{v[0-9]+}}, [[ADDRR]], 0x0,
31 ; SI: V_ADD_I32_e32 [[SIPTR:v[0-9]+]], 16, v{{[0-9]+}}
32 ; SI: DS_READ_B32 {{v[0-9]+}}, [[SIPTR]], 0x0
33 ; CI: DS_READ_B32 {{v[0-9]+}}, [[ADDRR:v[0-9]+]], 0x10
34 ; CI: DS_READ_B32 {{v[0-9]+}}, [[ADDRR]], 0x0,
3235
3336 define void @local_memory_two_objects(i32 addrspace(1)* %out) {
3437 entry: