llvm.org GIT mirror llvm / dfa4156
[AMDGPU] Fix DPP operand order in atomic optimizer Summary: Ensure order of operands in DPP atomic optimizer final WWM step is appropriate for sub instructions. Change-Id: I631d050e1c00a3b4bc7c11a90437064403c4cf30 Reviewers: sheredom, tpr Reviewed By: sheredom Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, t-tye, jfb, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D58900 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@355394 91177308-0d34-0410-b5e6-96231b3b80d8 Carl Ritson 7 months ago
6 changed file(s) with 16 addition(s) and 7 deletion(s). Raw diff Collapse all Expand all
310310 }
311311
312312 LaneOffset = B.CreateIntrinsic(Intrinsic::amdgcn_wwm, Ty, NewV);
313 NewV = B.CreateBinOp(Op, NewV, SetInactive);
313 NewV = B.CreateBinOp(Op, SetInactive, NewV);
314314
315315 // Read the value from the last lane, which has accumlated the values of
316316 // each active lane in the wavefront. This will be our new value with which
111111 ; GFX7LESS-NOT: v_mbcnt_hi_u32_b32
112112 ; GFX7LESS-NOT: s_bcnt1_i32_b64
113113 ; GFX7LESS: buffer_atomic_sub v{{[0-9]+}}
114 ; GFX8MORE: v_mov_b32_dpp v[[wave_shr1:[0-9]+]], v{{[0-9]+}} wave_shr:1 row_mask:0xf bank_mask:0xf
114 ; GFX8MORE: v_mov_b32_dpp v[[wave_shr1:[0-9]+]], v[[sub_value:[0-9]+]] wave_shr:1 row_mask:0xf bank_mask:0xf
115115 ; GFX8MORE: v_mov_b32_dpp v{{[0-9]+}}, v[[wave_shr1]] row_shr:1 row_mask:0xf bank_mask:0xf
116116 ; GFX8MORE: v_mov_b32_dpp v{{[0-9]+}}, v[[wave_shr1]] row_shr:2 row_mask:0xf bank_mask:0xf
117117 ; GFX8MORE: v_mov_b32_dpp v{{[0-9]+}}, v[[wave_shr1]] row_shr:3 row_mask:0xf bank_mask:0xf
119119 ; GFX8MORE: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} row_shr:8 row_mask:0xf bank_mask:0xc
120120 ; GFX8MORE: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} row_bcast:15 row_mask:0xa bank_mask:0xf
121121 ; GFX8MORE: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} row_bcast:31 row_mask:0xc bank_mask:0xf
122 ; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 63
122 ; GFX8MORE: v_sub_u32_e32 v[[sub_value]],{{( vcc,)?}} v[[sub_value]], v{{[0-9]+}}
123 ; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v[[sub_value]], 63
123124 ; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]]
124125 ; GFX8MORE: buffer_atomic_sub v[[value]]
125126 define amdgpu_kernel void @sub_i32_varying_vdata(i32 addrspace(1)* %out, <4 x i32> %inout) {
132132 ; GFX7LESS-NOT: v_mbcnt_hi_u32_b32
133133 ; GFX7LESS-NOT: s_bcnt1_i32_b64
134134 ; GFX7LESS: buffer_atomic_sub v{{[0-9]+}}
135 ; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 63
135 ; GFX8MORE: v_mov_b32_dpp v{{[0-9]+}}, v[[sub_value:[0-9]+]] wave_shr:1 row_mask:0xf bank_mask:0xf
136 ; GFX8MORE: v_sub_u32_e32 v[[sub_value]],{{( vcc,)?}} v[[sub_value]], v{{[0-9]+}}
137 ; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v[[sub_value]], 63
136138 ; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]]
137139 ; GFX8MORE: buffer_atomic_sub v[[value]]
138140 define amdgpu_kernel void @sub_i32_varying(i32 addrspace(1)* %out, i32 addrspace(1)* %inout) {
135135 ; GFX7LESS-NOT: v_mbcnt_hi_u32_b32
136136 ; GFX7LESS-NOT: s_bcnt1_i32_b64
137137 ; GFX7LESS: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
138 ; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 63
138 ; GFX8MORE: v_mov_b32_dpp v{{[0-9]+}}, v[[sub_value:[0-9]+]] wave_shr:1 row_mask:0xf bank_mask:0xf
139 ; GFX8MORE: v_sub_u32_e32 v[[sub_value]],{{( vcc,)?}} v[[sub_value]], v{{[0-9]+}}
140 ; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v[[sub_value]], 63
139141 ; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]]
140142 ; GFX8MORE: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v[[value]]
141143 define amdgpu_kernel void @sub_i32_varying(i32 addrspace(1)* %out) {
103103 ; GFX7LESS-NOT: v_mbcnt_hi_u32_b32
104104 ; GFX7LESS-NOT: s_bcnt1_i32_b64
105105 ; GFX7LESS: buffer_atomic_sub v{{[0-9]+}}
106 ; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 63
106 ; GFX8MORE: v_mov_b32_dpp v{{[0-9]+}}, v[[sub_value:[0-9]+]] wave_shr:1 row_mask:0xf bank_mask:0xf
107 ; GFX8MORE: v_sub_u32_e32 v[[sub_value]],{{( vcc,)?}} v[[sub_value]], v{{[0-9]+}}
108 ; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v[[sub_value]], 63
107109 ; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]]
108110 ; GFX8MORE: buffer_atomic_sub v[[value]]
109111 define amdgpu_kernel void @sub_i32_varying_vdata(i32 addrspace(1)* %out, <4 x i32> %inout) {
116116 ; GFX7LESS-NOT: v_mbcnt_hi_u32_b32
117117 ; GFX7LESS-NOT: s_bcnt1_i32_b64
118118 ; GFX7LESS: buffer_atomic_sub v{{[0-9]+}}
119 ; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 63
119 ; GFX8MORE: v_mov_b32_dpp v{{[0-9]+}}, v[[sub_value:[0-9]+]] wave_shr:1 row_mask:0xf bank_mask:0xf
120 ; GFX8MORE: v_sub_u32_e32 v[[sub_value]],{{( vcc,)?}} v[[sub_value]], v{{[0-9]+}}
121 ; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v[[sub_value]], 63
120122 ; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]]
121123 ; GFX8MORE: buffer_atomic_sub v[[value]]
122124 define amdgpu_kernel void @sub_i32_varying_vdata(i32 addrspace(1)* %out, <4 x i32> %inout) {