llvm.org GIT mirror llvm / 8111e8d
Merging r340959: ------------------------------------------------------------------------ r340959 | mareko | 2018-08-29 22:03:00 +0200 (Wed, 29 Aug 2018) | 9 lines AMDGPU: Handle 32-bit address wraparounds for SMRD opcodes Summary: This fixes GPU hangs with OpenGL bindless handle arithmetic. Reviewers: arsenm, nhaehnle Subscribers: kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D51203 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_70@341351 91177308-0d34-0410-b5e6-96231b3b80d8 Hans Wennborg 2 years ago
2 changed file(s) with 30 addition(s) and 15 deletion(s). Raw diff Collapse all Expand all
14501450 SDValue &Offset, bool &Imm) const {
14511451 SDLoc SL(Addr);
14521452
1453 if (CurDAG->isBaseWithConstantOffset(Addr)) {
1453 // A 32-bit (address + offset) should not cause unsigned 32-bit integer
1454 // wraparound, because s_load instructions perform the addition in 64 bits.
1455 if ((Addr.getValueType() != MVT::i32 ||
1456 Addr->getFlags().hasNoUnsignedWrap()) &&
1457 CurDAG->isBaseWithConstantOffset(Addr)) {
14541458 SDValue N0 = Addr.getOperand(0);
14551459 SDValue N1 = Addr.getOperand(1);
14561460
1111 ; VIGFX9-DAG: s_load_dword s{{[0-9]}}, s[0:1], 0x0
1212 ; VIGFX9-DAG: s_load_dword s{{[0-9]}}, s[2:3], 0x8
1313 define amdgpu_vs float @load_i32(i32 addrspace(6)* inreg %p0, i32 addrspace(6)* inreg %p1) #0 {
14 %gep1 = getelementptr i32, i32 addrspace(6)* %p1, i64 2
14 %gep1 = getelementptr inbounds i32, i32 addrspace(6)* %p1, i32 2
1515 %r0 = load i32, i32 addrspace(6)* %p0
1616 %r1 = load i32, i32 addrspace(6)* %gep1
1717 %r = add i32 %r0, %r1
2828 ; VIGFX9-DAG: s_load_dwordx2 s[{{.*}}], s[0:1], 0x0
2929 ; VIGFX9-DAG: s_load_dwordx2 s[{{.*}}], s[2:3], 0x10
3030 define amdgpu_vs <2 x float> @load_v2i32(<2 x i32> addrspace(6)* inreg %p0, <2 x i32> addrspace(6)* inreg %p1) #0 {
31 %gep1 = getelementptr <2 x i32>, <2 x i32> addrspace(6)* %p1, i64 2
31 %gep1 = getelementptr inbounds <2 x i32>, <2 x i32> addrspace(6)* %p1, i32 2
3232 %r0 = load <2 x i32>, <2 x i32> addrspace(6)* %p0
3333 %r1 = load <2 x i32>, <2 x i32> addrspace(6)* %gep1
3434 %r = add <2 x i32> %r0, %r1
4545 ; VIGFX9-DAG: s_load_dwordx4 s[{{.*}}], s[0:1], 0x0
4646 ; VIGFX9-DAG: s_load_dwordx4 s[{{.*}}], s[2:3], 0x20
4747 define amdgpu_vs <4 x float> @load_v4i32(<4 x i32> addrspace(6)* inreg %p0, <4 x i32> addrspace(6)* inreg %p1) #0 {
48 %gep1 = getelementptr <4 x i32>, <4 x i32> addrspace(6)* %p1, i64 2
48 %gep1 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(6)* %p1, i32 2
4949 %r0 = load <4 x i32>, <4 x i32> addrspace(6)* %p0
5050 %r1 = load <4 x i32>, <4 x i32> addrspace(6)* %gep1
5151 %r = add <4 x i32> %r0, %r1
6262 ; VIGFX9-DAG: s_load_dwordx8 s[{{.*}}], s[0:1], 0x0
6363 ; VIGFX9-DAG: s_load_dwordx8 s[{{.*}}], s[2:3], 0x40
6464 define amdgpu_vs <8 x float> @load_v8i32(<8 x i32> addrspace(6)* inreg %p0, <8 x i32> addrspace(6)* inreg %p1) #0 {
65 %gep1 = getelementptr <8 x i32>, <8 x i32> addrspace(6)* %p1, i64 2
65 %gep1 = getelementptr inbounds <8 x i32>, <8 x i32> addrspace(6)* %p1, i32 2
6666 %r0 = load <8 x i32>, <8 x i32> addrspace(6)* %p0
6767 %r1 = load <8 x i32>, <8 x i32> addrspace(6)* %gep1
6868 %r = add <8 x i32> %r0, %r1
7979 ; VIGFX9-DAG: s_load_dwordx16 s[{{.*}}], s[0:1], 0x0
8080 ; VIGFX9-DAG: s_load_dwordx16 s[{{.*}}], s[2:3], 0x80
8181 define amdgpu_vs <16 x float> @load_v16i32(<16 x i32> addrspace(6)* inreg %p0, <16 x i32> addrspace(6)* inreg %p1) #0 {
82 %gep1 = getelementptr <16 x i32>, <16 x i32> addrspace(6)* %p1, i64 2
82 %gep1 = getelementptr inbounds <16 x i32>, <16 x i32> addrspace(6)* %p1, i32 2
8383 %r0 = load <16 x i32>, <16 x i32> addrspace(6)* %p0
8484 %r1 = load <16 x i32>, <16 x i32> addrspace(6)* %gep1
8585 %r = add <16 x i32> %r0, %r1
9696 ; VIGFX9-DAG: s_load_dword s{{[0-9]}}, s[0:1], 0x0
9797 ; VIGFX9-DAG: s_load_dword s{{[0-9]}}, s[2:3], 0x8
9898 define amdgpu_vs float @load_float(float addrspace(6)* inreg %p0, float addrspace(6)* inreg %p1) #0 {
99 %gep1 = getelementptr float, float addrspace(6)* %p1, i64 2
99 %gep1 = getelementptr inbounds float, float addrspace(6)* %p1, i32 2
100100 %r0 = load float, float addrspace(6)* %p0
101101 %r1 = load float, float addrspace(6)* %gep1
102102 %r = fadd float %r0, %r1
112112 ; VIGFX9-DAG: s_load_dwordx2 s[{{.*}}], s[0:1], 0x0
113113 ; VIGFX9-DAG: s_load_dwordx2 s[{{.*}}], s[2:3], 0x10
114114 define amdgpu_vs <2 x float> @load_v2float(<2 x float> addrspace(6)* inreg %p0, <2 x float> addrspace(6)* inreg %p1) #0 {
115 %gep1 = getelementptr <2 x float>, <2 x float> addrspace(6)* %p1, i64 2
115 %gep1 = getelementptr inbounds <2 x float>, <2 x float> addrspace(6)* %p1, i32 2
116116 %r0 = load <2 x float>, <2 x float> addrspace(6)* %p0
117117 %r1 = load <2 x float>, <2 x float> addrspace(6)* %gep1
118118 %r = fadd <2 x float> %r0, %r1
128128 ; VIGFX9-DAG: s_load_dwordx4 s[{{.*}}], s[0:1], 0x0
129129 ; VIGFX9-DAG: s_load_dwordx4 s[{{.*}}], s[2:3], 0x20
130130 define amdgpu_vs <4 x float> @load_v4float(<4 x float> addrspace(6)* inreg %p0, <4 x float> addrspace(6)* inreg %p1) #0 {
131 %gep1 = getelementptr <4 x float>, <4 x float> addrspace(6)* %p1, i64 2
131 %gep1 = getelementptr inbounds <4 x float>, <4 x float> addrspace(6)* %p1, i32 2
132132 %r0 = load <4 x float>, <4 x float> addrspace(6)* %p0
133133 %r1 = load <4 x float>, <4 x float> addrspace(6)* %gep1
134134 %r = fadd <4 x float> %r0, %r1
144144 ; VIGFX9-DAG: s_load_dwordx8 s[{{.*}}], s[0:1], 0x0
145145 ; VIGFX9-DAG: s_load_dwordx8 s[{{.*}}], s[2:3], 0x40
146146 define amdgpu_vs <8 x float> @load_v8float(<8 x float> addrspace(6)* inreg %p0, <8 x float> addrspace(6)* inreg %p1) #0 {
147 %gep1 = getelementptr <8 x float>, <8 x float> addrspace(6)* %p1, i64 2
147 %gep1 = getelementptr inbounds <8 x float>, <8 x float> addrspace(6)* %p1, i32 2
148148 %r0 = load <8 x float>, <8 x float> addrspace(6)* %p0
149149 %r1 = load <8 x float>, <8 x float> addrspace(6)* %gep1
150150 %r = fadd <8 x float> %r0, %r1
160160 ; VIGFX9-DAG: s_load_dwordx16 s[{{.*}}], s[0:1], 0x0
161161 ; VIGFX9-DAG: s_load_dwordx16 s[{{.*}}], s[2:3], 0x80
162162 define amdgpu_vs <16 x float> @load_v16float(<16 x float> addrspace(6)* inreg %p0, <16 x float> addrspace(6)* inreg %p1) #0 {
163 %gep1 = getelementptr <16 x float>, <16 x float> addrspace(6)* %p1, i64 2
163 %gep1 = getelementptr inbounds <16 x float>, <16 x float> addrspace(6)* %p1, i32 2
164164 %r0 = load <16 x float>, <16 x float> addrspace(6)* %p0
165165 %r1 = load <16 x float>, <16 x float> addrspace(6)* %gep1
166166 %r = fadd <16 x float> %r0, %r1
211211 %22 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %5) #8
212212 %23 = bitcast float %22 to i32
213213 %24 = shl i32 %23, 1
214 %25 = getelementptr [0 x <8 x i32>], [0 x <8 x i32>] addrspace(6)* %1, i32 0, i32 %24, !amdgpu.uniform !0
214 %25 = getelementptr inbounds [0 x <8 x i32>], [0 x <8 x i32>] addrspace(6)* %1, i32 0, i32 %24, !amdgpu.uniform !0
215215 %26 = load <8 x i32>, <8 x i32> addrspace(6)* %25, align 32, !invariant.load !0
216216 %27 = shl i32 %23, 2
217217 %28 = or i32 %27, 3
218218 %29 = bitcast [0 x <8 x i32>] addrspace(6)* %1 to [0 x <4 x i32>] addrspace(6)*
219 %30 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(6)* %29, i32 0, i32 %28, !amdgpu.uniform !0
219 %30 = getelementptr inbounds [0 x <4 x i32>], [0 x <4 x i32>] addrspace(6)* %29, i32 0, i32 %28, !amdgpu.uniform !0
220220 %31 = load <4 x i32>, <4 x i32> addrspace(6)* %30, align 16, !invariant.load !0
221221 %32 = call nsz <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float 0.0, <8 x i32> %26, <4 x i32> %31, i1 0, i32 0, i32 0) #8
222222 %33 = extractelement <4 x float> %32, i32 0
245245 %22 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %5) #8
246246 %23 = bitcast float %22 to i32
247247 %24 = shl i32 %23, 1
248 %25 = getelementptr [0 x <8 x i32>], [0 x <8 x i32>] addrspace(6)* %1, i32 0, i32 %24
248 %25 = getelementptr inbounds [0 x <8 x i32>], [0 x <8 x i32>] addrspace(6)* %1, i32 0, i32 %24
249249 %26 = load <8 x i32>, <8 x i32> addrspace(6)* %25, align 32, !invariant.load !0
250250 %27 = shl i32 %23, 2
251251 %28 = or i32 %27, 3
252252 %29 = bitcast [0 x <8 x i32>] addrspace(6)* %1 to [0 x <4 x i32>] addrspace(6)*
253 %30 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(6)* %29, i32 0, i32 %28
253 %30 = getelementptr inbounds [0 x <4 x i32>], [0 x <4 x i32>] addrspace(6)* %29, i32 0, i32 %28
254254 %31 = load <4 x i32>, <4 x i32> addrspace(6)* %30, align 16, !invariant.load !0
255255 %32 = call nsz <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float 0.0, <8 x i32> %26, <4 x i32> %31, i1 0, i32 0, i32 0) #8
256256 %33 = extractelement <4 x float> %32, i32 0
267267 ret <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %43
268268 }
269269
270 ; GCN-LABEL: {{^}}load_addr_no_fold:
271 ; GCN-DAG: s_add_i32 s0, s0, 4
272 ; GCN-DAG: s_mov_b32 s1, 0
273 ; GCN: s_load_dword s{{[0-9]}}, s[0:1], 0x0
274 define amdgpu_vs float @load_addr_no_fold(i32 addrspace(6)* inreg noalias %p0) #0 {
275 %gep1 = getelementptr i32, i32 addrspace(6)* %p0, i32 1
276 %r1 = load i32, i32 addrspace(6)* %gep1
277 %r2 = bitcast i32 %r1 to float
278 ret float %r2
279 }
280
270281 ; Function Attrs: nounwind readnone speculatable
271282 declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #6
272283