llvm.org GIT mirror llvm / 85979be
[AMDGPU] Handle OR as operand of raw load/store Summary: Use isBaseWithConstantOffset() which handles OR as an operand to llvm.amdgcn.raw.buffer.load and llvm.amdgcn.raw.buffer.store. Change-Id: Ifefb9dc5ded8710d333df07ab1900b230e33539a Reviewers: nhaehnle, mareko, arsenm Reviewed By: arsenm Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D55999 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@350208 91177308-0d34-0410-b5e6-96231b3b80d8 Piotr Sobczak 9 months ago
3 changed file(s) with 95 addition(s) and 13 deletion(s). Raw diff Collapse all Expand all
60046004 const unsigned MaxImm = 4095;
60056005 SDValue N0 = Offset;
60066006 ConstantSDNode *C1 = nullptr;
6007 if (N0.getOpcode() == ISD::ADD) {
6008 if ((C1 = dyn_cast(N0.getOperand(1))))
6009 N0 = N0.getOperand(0);
6010 } else if ((C1 = dyn_cast(N0)))
6007
6008 if ((C1 = dyn_cast(N0)))
60116009 N0 = SDValue();
6010 else if (DAG.isBaseWithConstantOffset(N0)) {
6011 C1 = cast(N0.getOperand(1));
6012 N0 = N0.getOperand(0);
6013 }
60126014
60136015 if (C1) {
60146016 unsigned ImmOffset = C1->getZExtValue();
9696 ret float %val
9797 }
9898
99 ;CHECK-LABEL: {{^}}buffer_load_x1_offen_merged:
99 ;CHECK-LABEL: {{^}}buffer_load_x1_offen_merged_and:
100100 ;CHECK-NEXT: %bb.
101101 ;CHECK-NEXT: buffer_load_dwordx4 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:4
102102 ;CHECK-NEXT: buffer_load_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:28
103103 ;CHECK: s_waitcnt
104 define amdgpu_ps void @buffer_load_x1_offen_merged(<4 x i32> inreg %rsrc, i32 %a) {
104 define amdgpu_ps void @buffer_load_x1_offen_merged_and(<4 x i32> inreg %rsrc, i32 %a) {
105105 main_body:
106106 %a1 = add i32 %a, 4
107107 %a2 = add i32 %a, 8
120120 ret void
121121 }
122122
123 ;CHECK-LABEL: {{^}}buffer_load_x1_offen_merged_or:
124 ;CHECK-NEXT: %bb.
125 ;CHECK-NEXT: v_lshlrev_b32_e32 v{{[0-9]}}, 6, v0
126 ;CHECK-NEXT: buffer_load_dwordx4 v[{{[0-9]}}:{{[0-9]}}], v{{[0-9]}}, s[0:3], 0 offen offset:4
127 ;CHECK-NEXT: buffer_load_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v{{[0-9]}}, s[0:3], 0 offen offset:28
128 ;CHECK: s_waitcnt
129 define amdgpu_ps void @buffer_load_x1_offen_merged_or(<4 x i32> inreg %rsrc, i32 %inp) {
130 main_body:
131 %a = shl i32 %inp, 6
132 %a1 = or i32 %a, 4
133 %a2 = or i32 %a, 8
134 %a3 = or i32 %a, 12
135 %a4 = or i32 %a, 16
136 %a5 = or i32 %a, 28
137 %a6 = or i32 %a, 32
138 %r1 = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %a1, i32 0, i32 0)
139 %r2 = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %a2, i32 0, i32 0)
140 %r3 = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %a3, i32 0, i32 0)
141 %r4 = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %a4, i32 0, i32 0)
142 %r5 = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %a5, i32 0, i32 0)
143 %r6 = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %a6, i32 0, i32 0)
144 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r1, float %r2, float %r3, float %r4, i1 true, i1 true)
145 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r5, float %r6, float undef, float undef, i1 true, i1 true)
146 ret void
147 }
148
123149 ;CHECK-LABEL: {{^}}buffer_load_x1_offen_merged_glc_slc:
124150 ;CHECK-NEXT: %bb.
125151 ;CHECK-NEXT: buffer_load_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:4{{$}}
145171 ret void
146172 }
147173
148 ;CHECK-LABEL: {{^}}buffer_load_x2_offen_merged:
174 ;CHECK-LABEL: {{^}}buffer_load_x2_offen_merged_and:
149175 ;CHECK-NEXT: %bb.
150176 ;CHECK-NEXT: buffer_load_dwordx4 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:4
151177 ;CHECK: s_waitcnt
152 define amdgpu_ps void @buffer_load_x2_offen_merged(<4 x i32> inreg %rsrc, i32 %a) {
153 main_body:
178 define amdgpu_ps void @buffer_load_x2_offen_merged_and(<4 x i32> inreg %rsrc, i32 %a) {
179 main_body:
180 %a1 = add i32 %a, 4
181 %a2 = add i32 %a, 12
182 %vr1 = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %a1, i32 0, i32 0)
183 %vr2 = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %a2, i32 0, i32 0)
184 %r1 = extractelement <2 x float> %vr1, i32 0
185 %r2 = extractelement <2 x float> %vr1, i32 1
186 %r3 = extractelement <2 x float> %vr2, i32 0
187 %r4 = extractelement <2 x float> %vr2, i32 1
188 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r1, float %r2, float %r3, float %r4, i1 true, i1 true)
189 ret void
190 }
191
192 ;CHECK-LABEL: {{^}}buffer_load_x2_offen_merged_or:
193 ;CHECK-NEXT: %bb.
194 ;CHECK-NEXT: v_lshlrev_b32_e32 v{{[0-9]}}, 4, v0
195 ;CHECK-NEXT: buffer_load_dwordx4 v[{{[0-9]}}:{{[0-9]}}], v{{[0-9]}}, s[0:3], 0 offen offset:4
196 ;CHECK: s_waitcnt
197 define amdgpu_ps void @buffer_load_x2_offen_merged_or(<4 x i32> inreg %rsrc, i32 %inp) {
198 main_body:
199 %a = shl i32 %inp, 4
154200 %a1 = add i32 %a, 4
155201 %a2 = add i32 %a, 12
156202 %vr1 = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %a1, i32 0, i32 0)
6666 ret void
6767 }
6868
69 ;CHECK-LABEL: {{^}}buffer_store_x1_offen_merged:
69 ;CHECK-LABEL: {{^}}buffer_store_x1_offen_merged_and:
7070 ;CHECK-NOT: s_waitcnt
7171 ;CHECK-DAG: buffer_store_dwordx4 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:4
7272 ;CHECK-DAG: buffer_store_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:28
73 define amdgpu_ps void @buffer_store_x1_offen_merged(<4 x i32> inreg %rsrc, i32 %a, float %v1, float %v2, float %v3, float %v4, float %v5, float %v6) {
73 define amdgpu_ps void @buffer_store_x1_offen_merged_and(<4 x i32> inreg %rsrc, i32 %a, float %v1, float %v2, float %v3, float %v4, float %v5, float %v6) {
7474 %a1 = add i32 %a, 4
7575 %a2 = add i32 %a, 8
7676 %a3 = add i32 %a, 12
8686 ret void
8787 }
8888
89 ;CHECK-LABEL: {{^}}buffer_store_x1_offen_merged_or:
90 ;CHECK-NOT: s_waitcnt
91 ;CHECK-DAG: buffer_store_dwordx4 v[{{[0-9]}}:{{[0-9]}}], v{{[0-9]}}, s[0:3], 0 offen offset:4
92 ;CHECK-DAG: buffer_store_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v{{[0-9]}}, s[0:3], 0 offen offset:28
93 define amdgpu_ps void @buffer_store_x1_offen_merged_or(<4 x i32> inreg %rsrc, i32 %inp, float %v1, float %v2, float %v3, float %v4, float %v5, float %v6) {
94 %a = shl i32 %inp, 6
95 %a1 = add i32 %a, 4
96 %a2 = add i32 %a, 8
97 %a3 = add i32 %a, 12
98 %a4 = add i32 %a, 16
99 %a5 = add i32 %a, 28
100 %a6 = add i32 %a, 32
101 call void @llvm.amdgcn.raw.buffer.store.f32(float %v1, <4 x i32> %rsrc, i32 %a1, i32 0, i32 0)
102 call void @llvm.amdgcn.raw.buffer.store.f32(float %v2, <4 x i32> %rsrc, i32 %a2, i32 0, i32 0)
103 call void @llvm.amdgcn.raw.buffer.store.f32(float %v3, <4 x i32> %rsrc, i32 %a3, i32 0, i32 0)
104 call void @llvm.amdgcn.raw.buffer.store.f32(float %v4, <4 x i32> %rsrc, i32 %a4, i32 0, i32 0)
105 call void @llvm.amdgcn.raw.buffer.store.f32(float %v5, <4 x i32> %rsrc, i32 %a5, i32 0, i32 0)
106 call void @llvm.amdgcn.raw.buffer.store.f32(float %v6, <4 x i32> %rsrc, i32 %a6, i32 0, i32 0)
107 ret void
108 }
109
110
89111 ;CHECK-LABEL: {{^}}buffer_store_x1_offen_merged_glc_slc:
90112 ;CHECK-NOT: s_waitcnt
91113 ;CHECK-DAG: buffer_store_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:4{{$}}
107129 ret void
108130 }
109131
110 ;CHECK-LABEL: {{^}}buffer_store_x2_offen_merged:
132 ;CHECK-LABEL: {{^}}buffer_store_x2_offen_merged_and:
111133 ;CHECK-NOT: s_waitcnt
112134 ;CHECK: buffer_store_dwordx4 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:4
113 define amdgpu_ps void @buffer_store_x2_offen_merged(<4 x i32> inreg %rsrc, i32 %a, <2 x float> %v1, <2 x float> %v2) {
135 define amdgpu_ps void @buffer_store_x2_offen_merged_and(<4 x i32> inreg %rsrc, i32 %a, <2 x float> %v1, <2 x float> %v2) {
136 %a1 = add i32 %a, 4
137 %a2 = add i32 %a, 12
138 call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> %v1, <4 x i32> %rsrc, i32 %a1, i32 0, i32 0)
139 call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> %v2, <4 x i32> %rsrc, i32 %a2, i32 0, i32 0)
140 ret void
141 }
142
143 ;CHECK-LABEL: {{^}}buffer_store_x2_offen_merged_or:
144 ;CHECK-NOT: s_waitcnt
145 ;CHECK: buffer_store_dwordx4 v[{{[0-9]}}:{{[0-9]}}], v{{[0-9]}}, s[0:3], 0 offen offset:4
146 define amdgpu_ps void @buffer_store_x2_offen_merged_or(<4 x i32> inreg %rsrc, i32 %inp, <2 x float> %v1, <2 x float> %v2) {
147 %a = shl i32 %inp, 4
114148 %a1 = add i32 %a, 4
115149 %a2 = add i32 %a, 12
116150 call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> %v1, <4 x i32> %rsrc, i32 %a1, i32 0, i32 0)