llvm.org GIT mirror llvm / 3031f21
AMDGPU: Remove custom BUILD_VECTOR combine This was looping in a testcase and removing it now slightly improves a test. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@345560 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 1 year, 3 months ago
4 changed file(s) with 33 addition(s) and 52 deletion(s). Raw diff Collapse all Expand all
678678 setTargetDAGCombine(ISD::SCALAR_TO_VECTOR);
679679 setTargetDAGCombine(ISD::ZERO_EXTEND);
680680 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
681 setTargetDAGCombine(ISD::BUILD_VECTOR);
682681
683682 // All memory operations. Some folding on the pointer operand is done to help
684683 // matching the constant offsets in the addressing modes.
81328131 return SDValue();
81338132 }
81348133
8135 static bool convertBuildVectorCastElt(SelectionDAG &DAG,
8136 SDValue &Lo, SDValue &Hi) {
8137 if (Hi.getOpcode() == ISD::BITCAST &&
8138 Hi.getOperand(0).getValueType() == MVT::f16 &&
8139 (isa(Lo) || Lo.isUndef())) {
8140 Lo = DAG.getNode(ISD::BITCAST, SDLoc(Lo), MVT::f16, Lo);
8141 Hi = Hi.getOperand(0);
8142 return true;
8143 }
8144
8145 return false;
8146 }
8147
8148 SDValue SITargetLowering::performBuildVectorCombine(
8149 SDNode *N, DAGCombinerInfo &DCI) const {
8150 SDLoc SL(N);
8151
8152 if (!isTypeLegal(MVT::v2i16))
8153 return SDValue();
8154 SelectionDAG &DAG = DCI.DAG;
8155 EVT VT = N->getValueType(0);
8156
8157 if (VT == MVT::v2i16) {
8158 SDValue Lo = N->getOperand(0);
8159 SDValue Hi = N->getOperand(1);
8160
8161 // v2i16 build_vector (const|undef), (bitcast f16:$x)
8162 // -> bitcast (v2f16 build_vector const|undef, $x
8163 if (convertBuildVectorCastElt(DAG, Lo, Hi)) {
8164 SDValue NewVec = DAG.getBuildVector(MVT::v2f16, SL, { Lo, Hi });
8165 return DAG.getNode(ISD::BITCAST, SL, VT, NewVec);
8166 }
8167
8168 if (convertBuildVectorCastElt(DAG, Hi, Lo)) {
8169 SDValue NewVec = DAG.getBuildVector(MVT::v2f16, SL, { Hi, Lo });
8170 return DAG.getNode(ISD::BITCAST, SL, VT, NewVec);
8171 }
8172 }
8173
8174 return SDValue();
8175 }
8176
81778134 unsigned SITargetLowering::getFusedOpcode(const SelectionDAG &DAG,
81788135 const SDNode *N0,
81798136 const SDNode *N1) const {
87828739 }
87838740 case ISD::EXTRACT_VECTOR_ELT:
87848741 return performExtractVectorEltCombine(N, DCI);
8785 case ISD::BUILD_VECTOR:
8786 return performBuildVectorCombine(N, DCI);
87878742 }
87888743 return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
87898744 }
153153 SDValue performFMed3Combine(SDNode *N, DAGCombinerInfo &DCI) const;
154154 SDValue performCvtPkRTZCombine(SDNode *N, DAGCombinerInfo &DCI) const;
155155 SDValue performExtractVectorEltCombine(SDNode *N, DAGCombinerInfo &DCI) const;
156 SDValue performBuildVectorCombine(SDNode *N, DAGCombinerInfo &DCI) const;
157156
158157 unsigned getFusedOpcode(const SelectionDAG &DAG,
159158 const SDNode *N0, const SDNode *N1) const;
0 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
1
2 ; There was an infinite loop in DAGCombiner from a target build_vector
3 ; combine and a generic insert_vector_elt combine.
4
5 ; GCN-LABEL: {{^}}combine_loop:
6 ; GCN: flat_load_ushort
7 ; GCN: flat_store_short
8 ; GCN: v_lshlrev_b32_e32 v{{[0-9]+}}, 16,
9 define amdgpu_kernel void @combine_loop(i16* %arg) #0 {
10 bb:
11 br label %bb1
12
13 bb1:
14 %tmp = phi <2 x i16> [ , %bb ], [ %tmp5, %bb1 ]
15 %tmp2 = phi half [ 0xH0000, %bb ], [ %tmp8, %bb1 ]
16 %tmp3 = load volatile half, half* null, align 536870912
17 %tmp4 = bitcast half %tmp3 to i16
18 %tmp5 = insertelement <2 x i16> , i16 %tmp4, i32 1
19 %tmp6 = bitcast i16* %arg to half*
20 store half %tmp2, half* %tmp6, align 2
21 %tmp7 = bitcast <2 x i16> %tmp to <2 x half>
22 %tmp8 = extractelement <2 x half> %tmp7, i32 0
23 br label %bb1
24 }
25
26 attributes #0 = { nounwind }
4848 }
4949
5050 ; GCN-LABEL: {{^}}v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack:
51 ; GFX9: v_mov_b32_e32 v3, 0
52 ; GFX9-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2
53 ; GFX9-NEXT: v_mov_b32_e32 v0, v3
51 ; GFX9: s_waitcnt
52 ; GFX9-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1]
53 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
5454 ; GFX9-NEXT: s_setpc_b64
5555 define i32 @v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack(half %src0, half %src1, half %src2) #0 {
5656 %src0.ext = fpext half %src0 to float
6565 }
6666
6767 ; GCN-LABEL: {{^}}v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack_sext:
68 ; GFX9: v_mov_b32_e32 v3, 0
69 ; GFX9-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2
70 ; GFX9-NEXT: v_mov_b32_e32 v0, v3
68 ; GFX9: s_waitcnt
69 ; GFX9-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1]
70 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
7171 ; GFX9-NEXT: s_setpc_b64
7272 define i32 @v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack_sext(half %src0, half %src1, half %src2) #0 {
7373 %src0.ext = fpext half %src0 to float