llvm.org GIT mirror llvm / f20700c
SelectionDAG shuffle nodes do not allow operands with different numbers of elements than the result vector type. So, when an instruction like: %8 = shufflevector <2 x float> %4, <2 x float> %7, <4 x i32> <i32 1, i32 0, i32 3, i32 2> is translated to a DAG, each operand is changed to a concat_vectors node that appends 2 undef elements. That is: shuffle [a,b], [c,d] is changed to: shuffle [a,b,u,u], [c,d,u,u] That's probably the right thing for x86 but for NEON, we'd much rather have: shuffle [a,b,c,d], undef Teach the DAG combiner how to do that transformation for ARM. Radar 8597007. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@117482 91177308-0d34-0410-b5e6-96231b3b80d8 Bob Wilson 9 years ago
2 changed file(s) with 73 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
459459 setTargetDAGCombine(ISD::ANY_EXTEND);
460460 setTargetDAGCombine(ISD::SELECT_CC);
461461 setTargetDAGCombine(ISD::BUILD_VECTOR);
462 setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
462463 }
463464
464465 computeRegisterProperties();
45304531 return SDValue();
45314532 }
45324533
4534 /// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for
4535 /// ISD::VECTOR_SHUFFLE.
4536 static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) {
4537 // The LLVM shufflevector instruction does not require the shuffle mask
4538 // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does
4539 // have that requirement. When translating to ISD::VECTOR_SHUFFLE, if the
4540 // operands do not match the mask length, they are extended by concatenating
4541 // them with undef vectors. That is probably the right thing for other
4542 // targets, but for NEON it is better to concatenate two double-register
4543 // size vector operands into a single quad-register size vector. Do that
4544 // transformation here:
4545 // shuffle(concat(v1, undef), concat(v2, undef)) ->
4546 // shuffle(concat(v1, v2), undef)
4547 SDValue Op0 = N->getOperand(0);
4548 SDValue Op1 = N->getOperand(1);
4549 if (Op0.getOpcode() != ISD::CONCAT_VECTORS ||
4550 Op1.getOpcode() != ISD::CONCAT_VECTORS ||
4551 Op0.getNumOperands() != 2 ||
4552 Op1.getNumOperands() != 2)
4553 return SDValue();
4554 SDValue Concat0Op1 = Op0.getOperand(1);
4555 SDValue Concat1Op1 = Op1.getOperand(1);
4556 if (Concat0Op1.getOpcode() != ISD::UNDEF ||
4557 Concat1Op1.getOpcode() != ISD::UNDEF)
4558 return SDValue();
4559 // Skip the transformation if any of the types are illegal.
4560 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4561 EVT VT = N->getValueType(0);
4562 if (!TLI.isTypeLegal(VT) ||
4563 !TLI.isTypeLegal(Concat0Op1.getValueType()) ||
4564 !TLI.isTypeLegal(Concat1Op1.getValueType()))
4565 return SDValue();
4566
4567 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT,
4568 Op0.getOperand(0), Op1.getOperand(0));
4569 // Translate the shuffle mask.
4570 SmallVector NewMask;
4571 unsigned NumElts = VT.getVectorNumElements();
4572 unsigned HalfElts = NumElts/2;
4573 ShuffleVectorSDNode *SVN = cast(N);
4574 for (unsigned n = 0; n < NumElts; ++n) {
4575 int MaskElt = SVN->getMaskElt(n);
4576 int NewElt = -1;
4577 if (MaskElt < HalfElts)
4578 NewElt = MaskElt;
4579 else if (MaskElt >= NumElts && MaskElt < NumElts + HalfElts)
4580 NewElt = HalfElts + MaskElt - NumElts;
4581 NewMask.push_back(NewElt);
4582 }
4583 return DAG.getVectorShuffle(VT, N->getDebugLoc(), NewConcat,
4584 DAG.getUNDEF(VT), NewMask.data());
4585 }
4586
45334587 /// PerformVDUPLANECombine - Target-specific dag combine xforms for
45344588 /// ARMISD::VDUPLANE.
45354589 static SDValue PerformVDUPLANECombine(SDNode *N, SelectionDAG &DAG) {
49384992 case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
49394993 case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
49404994 case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI.DAG);
4995 case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG);
49414996 case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI.DAG);
49424997 case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
49434998 case ISD::SHL:
128128 %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32>
129129 ret <8 x i16> %tmp2
130130 }
131
132 ; A vcombine feeding a VREV should not obscure things. Radar 8597007.
133
134 define void @test_with_vcombine(<4 x float>* %v) nounwind {
135 ;CHECK: test_with_vcombine:
136 ;CHECK-NOT: vext
137 ;CHECK: vrev64.32
138 %tmp1 = load <4 x float>* %v, align 16
139 %tmp2 = bitcast <4 x float> %tmp1 to <2 x double>
140 %tmp3 = extractelement <2 x double> %tmp2, i32 0
141 %tmp4 = bitcast double %tmp3 to <2 x float>
142 %tmp5 = extractelement <2 x double> %tmp2, i32 1
143 %tmp6 = bitcast double %tmp5 to <2 x float>
144 %tmp7 = fadd <2 x float> %tmp6, %tmp6
145 %tmp8 = shufflevector <2 x float> %tmp4, <2 x float> %tmp7, <4 x i32>
146 store <4 x float> %tmp8, <4 x float>* %v, align 16
147 ret void
148 }