llvm.org GIT mirror llvm / f9c9936
AMDGPU: Fix assert on trunc from bitcast of build_vector The v2i64 argument is lowered to a bitcast of v4i32 build_vector. This would then attempt to use the i32-element as the source of the vector truncate. This really would need to collect 2 elements from the build_vector to produce the intended truncate. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@353202 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 1 year, 22 days ago
2 changed file(s) with 19 addition(s) and 3 deletion(s). Raw diff Collapse all Expand all
30873087 SDValue Src = N->getOperand(0);
30883088
30893089 // vt1 (truncate (bitcast (build_vector vt0:x, ...))) -> vt1 (bitcast vt0:x)
3090 if (Src.getOpcode() == ISD::BITCAST) {
3090 if (Src.getOpcode() == ISD::BITCAST && !VT.isVector()) {
30913091 SDValue Vec = Src.getOperand(0);
30923092 if (Vec.getOpcode() == ISD::BUILD_VECTOR) {
30933093 SDValue Elt0 = Vec.getOperand(0);
0 ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs< %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s
1 ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs< %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s
12
23 ; Make sure high constant 0 isn't pointlessly materialized
2425 ; GCN: _load_dword
2526 ; GCN-NOT: _load_dword
2627 ; GCN-NOT: v_mov_b32
27 ; GCN: v_add_u16_e32 v0, 4, v0
28 ; VI: v_add_u16_e32 v0, 4, v0
2829 define i16 @trunc_bitcast_v2i32_to_i16(<2 x i32> %bar) {
2930 %load0 = load i32, i32 addrspace(1)* undef
3031 %load1 = load i32, i32 addrspace(1)* null
4142 ; GCN: _load_dword
4243 ; GCN-NOT: _load_dword
4344 ; GCN-NOT: v_mov_b32
44 ; GCN: v_add_u16_e32 v0, 4, v0
45 ; VI: v_add_u16_e32 v0, 4, v0
4546 define i16 @trunc_bitcast_v2f32_to_i16(<2 x float> %bar) {
4647 %load0 = load float, float addrspace(1)* undef
4748 %load1 = load float, float addrspace(1)* null
7980 store <2 x i16> %tmp14, <2 x i16> addrspace(1)* %tmp15, align 4
8081 ret void
8182 }
83
84 ; GCN-LABEL: {{^}}trunc_v2i64_arg_to_v2i16:
85 ; GCN: v_lshlrev_b32_e32 v1, 16, v2
86
87 ; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0
88 ; SI-NEXT: v_or_b32_e32 v0, v0, v1
89 ; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v0
90
91 ; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
92
93 ; GCN-NEXT: s_setpc_b64
94 define <2 x i16> @trunc_v2i64_arg_to_v2i16(<2 x i64> %arg0) #0 {
95 %trunc = trunc <2 x i64> %arg0 to <2 x i16>
96 ret <2 x i16> %trunc
97 }