llvm.org GIT mirror llvm / d06f393
DAGCombiner: Turn truncate of a bitcasted vector to an extract On AMDGPU where operations i64 operations are often bitcasted to v2i32 and back, this pattern shows up regularly where it breaks some expected combines on i64, such as load width reducing. This fixes some test failures in a future commit when i64 loads are changed to promote. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@262397 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 4 years ago
3 changed file(s) with 114 addition(s) and 6 deletion(s). Raw diff Collapse all Expand all
71757175 }
71767176 }
71777177
7178 // Fold truncate of a bitcast of a vector to an extract of the low vector
7179 // element.
7180 //
7181 // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, 0
7182 if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
7183 SDValue VecSrc = N0.getOperand(0);
7184 EVT SrcVT = VecSrc.getValueType();
7185 if (SrcVT.isVector() && SrcVT.getScalarType() == VT) {
7186 SDLoc SL(N);
7187
7188 EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
7189 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT,
7190 VecSrc, DAG.getConstant(0, SL, IdxVT));
7191 }
7192 }
7193
71787194 // Simplify the operands using demanded-bits information.
71797195 if (!VT.isVector() &&
71807196 SimplifyDemandedBits(SDValue(N, 0)))
395395 ; GCN-LABEL: {{^}}global_extload_v3f16_to_v3f64:
396396
397397 ; GCN: buffer_load_dwordx2 [[LOAD:v\[[0-9]+:[0-9]+\]]]
398 ; GCN: v_lshrrev_b32_e32 {{v[0-9]+}}, 16, {{v[0-9]+}}
399
400 ; GCN: v_cvt_f32_f16_e32
401 ; GCN: v_cvt_f32_f16_e32
402 ; GCN: v_cvt_f32_f16_e32
403 ; GCN-NOT: v_cvt_f32_f16_e32
398 ; GCN: v_cvt_f32_f16_e32
399 ; GCN: v_cvt_f32_f16_e32
400 ; GCN-DAG: v_lshrrev_b32_e32 {{v[0-9]+}}, 16, {{v[0-9]+}}
401 ; GCN: v_cvt_f32_f16_e32
402 ; GCN-NOT: v_cvt_f32_f16
404403
405404 ; GCN: v_cvt_f64_f32_e32
406405 ; GCN: v_cvt_f64_f32_e32
0 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
1 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
2
3 ; CHECK-LABEL: {{^}}trunc_i64_bitcast_v2i32:
4 ; CHECK: buffer_load_dword v
5 ; CHECK: buffer_store_dword v
6 define void @trunc_i64_bitcast_v2i32(i32 addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
7 %ld = load <2 x i32>, <2 x i32> addrspace(1)* %in
8 %bc = bitcast <2 x i32> %ld to i64
9 %trunc = trunc i64 %bc to i32
10 store i32 %trunc, i32 addrspace(1)* %out
11 ret void
12 }
13
14 ; CHECK-LABEL: {{^}}trunc_i96_bitcast_v3i32:
15 ; CHECK: buffer_load_dword v
16 ; CHECK: buffer_store_dword v
17 define void @trunc_i96_bitcast_v3i32(i32 addrspace(1)* %out, <3 x i32> addrspace(1)* %in) {
18 %ld = load <3 x i32>, <3 x i32> addrspace(1)* %in
19 %bc = bitcast <3 x i32> %ld to i96
20 %trunc = trunc i96 %bc to i32
21 store i32 %trunc, i32 addrspace(1)* %out
22 ret void
23 }
24
25 ; CHECK-LABEL: {{^}}trunc_i128_bitcast_v4i32:
26 ; CHECK: buffer_load_dword v
27 ; CHECK: buffer_store_dword v
28 define void @trunc_i128_bitcast_v4i32(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
29 %ld = load <4 x i32>, <4 x i32> addrspace(1)* %in
30 %bc = bitcast <4 x i32> %ld to i128
31 %trunc = trunc i128 %bc to i32
32 store i32 %trunc, i32 addrspace(1)* %out
33 ret void
34 }
35
36 ; Don't want load width reduced in this case.
37 ; CHECK-LABEL: {{^}}trunc_i16_bitcast_v2i16:
38 ; CHECK: buffer_load_dword [[VAL:v[0-9]+]]
39 ; CHECK: buffer_store_short [[VAL]]
40 define void @trunc_i16_bitcast_v2i16(i16 addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
41 %ld = load <2 x i16>, <2 x i16> addrspace(1)* %in
42 %bc = bitcast <2 x i16> %ld to i32
43 %trunc = trunc i32 %bc to i16
44 store i16 %trunc, i16 addrspace(1)* %out
45 ret void
46 }
47
48 ; FIXME: Don't want load width reduced here.
49 ; CHECK-LABEL: {{^}}trunc_i16_bitcast_v4i16:
50 ; CHECK: buffer_load_ushort [[VAL:v[0-9]+]]
51 ; CHECK: buffer_store_short [[VAL]]
52 define void @trunc_i16_bitcast_v4i16(i16 addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
53 %ld = load <4 x i16>, <4 x i16> addrspace(1)* %in
54 %bc = bitcast <4 x i16> %ld to i64
55 %trunc = trunc i64 %bc to i16
56 store i16 %trunc, i16 addrspace(1)* %out
57 ret void
58 }
59
60 ; FIXME: Don't want load width reduced in this case.
61 ; CHECK-LABEL: {{^}}trunc_i8_bitcast_v2i8:
62 ; CHECK: buffer_load_ubyte [[VAL:v[0-9]+]]
63 ; CHECK: buffer_store_byte [[VAL]]
64 define void @trunc_i8_bitcast_v2i8(i8 addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
65 %ld = load <2 x i8>, <2 x i8> addrspace(1)* %in
66 %bc = bitcast <2 x i8> %ld to i16
67 %trunc = trunc i16 %bc to i8
68 store i8 %trunc, i8 addrspace(1)* %out
69 ret void
70 }
71
72 ; CHECK-LABEL: {{^}}trunc_i32_bitcast_v4i8:
73 ; CHECK: buffer_load_dword [[VAL:v[0-9]+]]
74 ; CHECK: buffer_store_byte [[VAL]]
75 define void @trunc_i32_bitcast_v4i8(i8 addrspace(1)* %out, <4 x i8> addrspace(1)* %in) {
76 %ld = load <4 x i8>, <4 x i8> addrspace(1)* %in
77 %bc = bitcast <4 x i8> %ld to i32
78 %trunc = trunc i32 %bc to i8
79 store i8 %trunc, i8 addrspace(1)* %out
80 ret void
81 }
82
83 ; CHECK-LABEL: {{^}}trunc_i24_bitcast_v3i8:
84 ; CHECK: buffer_load_dword [[VAL:v[0-9]+]]
85 ; CHECK: buffer_store_byte [[VAL]]
86 define void @trunc_i24_bitcast_v3i8(i8 addrspace(1)* %out, <3 x i8> addrspace(1)* %in) {
87 %ld = load <3 x i8>, <3 x i8> addrspace(1)* %in
88 %bc = bitcast <3 x i8> %ld to i24
89 %trunc = trunc i24 %bc to i8
90 store i8 %trunc, i8 addrspace(1)* %out
91 ret void
92 }