llvm.org GIT mirror llvm / 189531f
Add an optimization that looks for a specific pair-wise add pattern and generates a vpaddl instruction instead of scalarizing the add. Includes a test case. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@133027 91177308-0d34-0410-b5e6-96231b3b80d8 Tanya Lattner 8 years ago
2 changed file(s) with 120 addition(s) and 5 deletion(s). Raw diff Collapse all Expand all
55225522 return SDValue();
55235523 }
55245524
5525 // AddCombineToVPADDL- For pair-wise add on neon, use the vpaddl instruction
5526 // (only after legalization).
5527 static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1,
5528 TargetLowering::DAGCombinerInfo &DCI,
5529 const ARMSubtarget *Subtarget) {
5530
5531 // Only perform optimization if after legalize, and if NEON is available. We
5532 // also expected both operands to be BUILD_VECTORs.
5533 if (DCI.isBeforeLegalize() || !Subtarget->hasNEON()
5534 || N0.getOpcode() != ISD::BUILD_VECTOR
5535 || N1.getOpcode() != ISD::BUILD_VECTOR)
5536 return SDValue();
5537
5538 // Check output type since VPADDL operand elements can only be 8, 16, or 32.
5539 EVT VT = N->getValueType(0);
5540 if (!VT.isInteger() || VT.getVectorElementType() == MVT::i64)
5541 return SDValue();
5542
5543 // Check that the vector operands are of the right form.
5544 // N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR
5545 // operands, where N is the size of the formed vector.
5546 // Each EXTRACT_VECTOR should have the same input vector and odd or even
5547 // index such that we have a pair wise add pattern.
5548 SDNode *V = 0;
5549 SDValue Vec;
5550 unsigned nextIndex = 0;
5551
5552 // Grab the vector that all EXTRACT_VECTOR nodes should be referencing.
5553 if (N0->getOperand(0)->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
5554 Vec = N0->getOperand(0)->getOperand(0);
5555 V = Vec.getNode();
5556 } else
5557 return SDValue();
5558
5559 // For each operands to the ADD which are BUILD_VECTORs,
5560 // check to see if each of their operands are an EXTRACT_VECTOR with
5561 // the same vector and appropriate index.
5562 for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) {
5563 if (N0->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT
5564 && N1->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
5565
5566 SDValue ExtVec0 = N0->getOperand(i);
5567 SDValue ExtVec1 = N1->getOperand(i);
5568
5569 // First operand is the vector, verify its the same.
5570 if (V != ExtVec0->getOperand(0).getNode() ||
5571 V != ExtVec1->getOperand(0).getNode())
5572 return SDValue();
5573
5574 // Second is the constant, verify its correct.
5575 ConstantSDNode *C0 = dyn_cast(ExtVec0->getOperand(1));
5576 ConstantSDNode *C1 = dyn_cast(ExtVec1->getOperand(1));
5577
5578 // For the constant, we want to see all the even or all the odd.
5579 if (!C0 || !C1 || C0->getZExtValue() != nextIndex
5580 || C1->getZExtValue() != nextIndex+1)
5581 return SDValue();
5582
5583 // Increment index.
5584 nextIndex+=2;
5585 } else
5586 return SDValue();
5587 }
5588
5589 // Create VPADDL node.
5590 SelectionDAG &DAG = DCI.DAG;
5591 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5592 DebugLoc DL = N->getDebugLoc();
5593
5594 // Build operand list.
5595 SmallVector Ops;
5596 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls,
5597 TLI.getPointerTy()));
5598
5599 // Input is the vector.
5600 Ops.push_back(Vec);
5601
5602 // Get widened type and narrowed type.
5603 MVT widenType;
5604 unsigned numElem = VT.getVectorNumElements();
5605 switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
5606 case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break;
5607 case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break;
5608 case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break;
5609 default:
5610 assert(0 && "Invalid vector element type for padd optimization.");
5611 }
5612
5613 SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(),
5614 widenType, &Ops[0], Ops.size());
5615 return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, tmp);
5616 }
5617
55255618 /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
55265619 /// operands N0 and N1. This is a helper for PerformADDCombine that is
55275620 /// called with the default operands, and if that fails, with commuted
55285621 /// operands.
55295622 static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
5530 TargetLowering::DAGCombinerInfo &DCI) {
5623 TargetLowering::DAGCombinerInfo &DCI,
5624 const ARMSubtarget *Subtarget){
5625
5626 // Attempt to create vpaddl for this add.
5627 SDValue Result = AddCombineToVPADDL(N, N0, N1, DCI, Subtarget);
5628 if (Result.getNode())
5629 return Result;
5630
55315631 // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
55325632 if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) {
55335633 SDValue Result = combineSelectAndUse(N, N0, N1, DCI);
55395639 /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
55405640 ///
55415641 static SDValue PerformADDCombine(SDNode *N,
5542 TargetLowering::DAGCombinerInfo &DCI) {
5642 TargetLowering::DAGCombinerInfo &DCI,
5643 const ARMSubtarget *Subtarget) {
55435644 SDValue N0 = N->getOperand(0);
55445645 SDValue N1 = N->getOperand(1);
55455646
55465647 // First try with the default operand order.
5547 SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI);
5648 SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget);
55485649 if (Result.getNode())
55495650 return Result;
55505651
55515652 // If that didn't work, try again with the operands commuted.
5552 return PerformADDCombineWithOperands(N, N1, N0, DCI);
5653 return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget);
55535654 }
55545655
55555656 /// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
67546855 DAGCombinerInfo &DCI) const {
67556856 switch (N->getOpcode()) {
67566857 default: break;
6757 case ISD::ADD: return PerformADDCombine(N, DCI);
6858 case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget);
67586859 case ISD::SUB: return PerformSUBCombine(N, DCI);
67596860 case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
67606861 case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
137137 ret <2 x i64> %tmp2
138138 }
139139
140 ; Test AddCombine optimization that generates a vpaddl.s
141 define void @addCombineToVPADDL() nounwind ssp {
142 ; CHECK: vpaddl.s8
143 %cbcr = alloca <16 x i8>, align 16
144 %X = alloca <8 x i8>, align 8
145 %tmp = load <16 x i8>* %cbcr
146 %tmp1 = shufflevector <16 x i8> %tmp, <16 x i8> undef, <8 x i32>
147 %tmp2 = load <16 x i8>* %cbcr
148 %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <8 x i32>
149 %add = add <8 x i8> %tmp3, %tmp1
150 store <8 x i8> %add, <8 x i8>* %X, align 8
151 ret void
152 }
153
140154 declare <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8>) nounwind readnone
141155 declare <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16>) nounwind readnone
142156 declare <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32>) nounwind readnone