llvm.org GIT mirror llvm / fa5f307
[X86][SSE] Generalized SplitBinaryOpsAndApply to SplitOpsAndApply to support any number of ops. I've kept SplitBinaryOpsAndApply as a wrapper to avoid a lot of makeArrayRef code. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@327240 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 2 years ago
1 changed file(s) with 54 addition(s) and 44 deletion(s). Raw diff Collapse all Expand all
50945094 DAG.getIntPtrConstant(0, dl));
50955095 }
50965096
5097 // Helper for splitting operands of a binary operation to legal target size and
5097 // Helper for splitting operands of an operation to legal target size and
50985098 // apply a function on each part.
50995099 // Useful for operations that are available on SSE2 in 128-bit, on AVX2 in
5100 // 256-bit and on AVX512BW in 512-bit.
5101 // The argument VT is the type used for deciding if/how to split the operands
5102 // Op0 and Op1. Op0 and Op1 do *not* have to be of type VT.
5103 // The argument Builder is a function that will be applied on each split psrt:
5104 // SDValue Builder(SelectionDAG&G, SDLoc, SDValue, SDValue)
5100 // 256-bit and on AVX512BW in 512-bit. The argument VT is the type used for
5101 // deciding if/how to split Ops. Ops elements do *not* have to be of type VT.
5102 // The argument Builder is a function that will be applied on each split part:
5103 // SDValue Builder(SelectionDAG&G, SDLoc, ArrayRef)
51055104 template
5106 SDValue SplitBinaryOpsAndApply(SelectionDAG &DAG, const X86Subtarget &Subtarget,
5107 const SDLoc &DL, EVT VT, SDValue Op0,
5108 SDValue Op1, F Builder) {
5105 SDValue SplitOpsAndApply(SelectionDAG &DAG, const X86Subtarget &Subtarget,
5106 const SDLoc &DL, EVT VT, ArrayRef Ops,
5107 F Builder) {
51095108 assert(Subtarget.hasSSE2() && "Target assumed to support at least SSE2");
51105109 unsigned NumSubs = 1;
51115110 if (Subtarget.useBWIRegs()) {
51265125 }
51275126
51285127 if (NumSubs == 1)
5129 return Builder(DAG, DL, Op0, Op1);
5128 return Builder(DAG, DL, Ops);
51305129
51315130 SmallVector Subs;
5132 EVT InVT = Op0.getValueType();
5133 EVT SubVT = EVT::getVectorVT(*DAG.getContext(), InVT.getScalarType(),
5134 InVT.getVectorNumElements() / NumSubs);
51355131 for (unsigned i = 0; i != NumSubs; ++i) {
5136 unsigned Idx = i * SubVT.getVectorNumElements();
5137 SDValue LHS = extractSubVector(Op0, Idx, DAG, DL, SubVT.getSizeInBits());
5138 SDValue RHS = extractSubVector(Op1, Idx, DAG, DL, SubVT.getSizeInBits());
5139 Subs.push_back(Builder(DAG, DL, LHS, RHS));
5132 SmallVector SubOps;
5133 for (SDValue Op : Ops) {
5134 EVT OpVT = Op.getValueType();
5135 unsigned NumSubElts = OpVT.getVectorNumElements() / NumSubs;
5136 unsigned SizeSub = OpVT.getSizeInBits() / NumSubs;
5137 SubOps.push_back(extractSubVector(Op, i * NumSubElts, DAG, DL, SizeSub));
5138 }
5139 Subs.push_back(Builder(DAG, DL, SubOps));
51405140 }
51415141 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Subs);
5142 }
5143
5144 // Helper for splitting operands of a binary operation to legal target size and
5145 // apply a function on each part.
5146 template
5147 SDValue SplitBinaryOpsAndApply(SelectionDAG &DAG, const X86Subtarget &Subtarget,
5148 const SDLoc &DL, EVT VT, SDValue Op0,
5149 SDValue Op1, F Builder) {
5150 return SplitOpsAndApply(DAG, Subtarget, DL, VT, makeArrayRef({Op0, Op1}),
5151 Builder);
51425152 }
51435153
51445154 // Return true if the instruction zeroes the unused upper part of the
3124831258 SDValue SadOp1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, ExtendedVT, Ops);
3124931259
3125031260 // Actually build the SAD, split as 128/256/512 bits for SSE/AVX2/AVX512BW.
31251 auto PSADBWBuilder = [](SelectionDAG &DAG, const SDLoc &DL, SDValue Op0,
31252 SDValue Op1) {
31253 MVT VT = MVT::getVectorVT(MVT::i64, Op0.getValueSizeInBits() / 64);
31254 return DAG.getNode(X86ISD::PSADBW, DL, VT, Op0, Op1);
31261 auto PSADBWBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
31262 ArrayRef Ops) {
31263 MVT VT = MVT::getVectorVT(MVT::i64, Ops[0].getValueSizeInBits() / 64);
31264 return DAG.getNode(X86ISD::PSADBW, DL, VT, Ops);
3125531265 };
3125631266 MVT SadVT = MVT::getVectorVT(MVT::i64, RegSize / 64);
3125731267 return SplitBinaryOpsAndApply(DAG, Subtarget, DL, SadVT, SadOp0, SadOp1,
3207832088 SDValue OpLHS = Other->getOperand(0), OpRHS = Other->getOperand(1);
3207932089 SDValue CondRHS = Cond->getOperand(1);
3208032090
32081 auto SUBUSBuilder = [](SelectionDAG &DAG, const SDLoc &DL, SDValue Op0,
32082 SDValue Op1) {
32083 return DAG.getNode(X86ISD::SUBUS, DL, Op0.getValueType(), Op0, Op1);
32091 auto SUBUSBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
32092 ArrayRef Ops) {
32093 return DAG.getNode(X86ISD::SUBUS, DL, Ops[0].getValueType(), Ops);
3208432094 };
3208532095
3208632096 // Look for a general sub with unsigned saturation first.
3303433044 return SDValue();
3303533045
3303633046 // Use SplitBinaryOpsAndApply to handle AVX splitting.
33037 auto PMADDWDBuilder = [](SelectionDAG &DAG, const SDLoc &DL, SDValue Op0,
33038 SDValue Op1) {
33039 MVT VT = MVT::getVectorVT(MVT::i32, Op0.getValueSizeInBits() / 32);
33040 return DAG.getNode(X86ISD::VPMADDWD, DL, VT, Op0, Op1);
33047 auto PMADDWDBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
33048 ArrayRef Ops) {
33049 MVT VT = MVT::getVectorVT(MVT::i32, Ops[0].getValueSizeInBits() / 32);
33050 return DAG.getNode(X86ISD::VPMADDWD, DL, VT, Ops);
3304133051 };
3304233052 return SplitBinaryOpsAndApply(DAG, Subtarget, SDLoc(N), VT,
3304333053 DAG.getBitcast(WVT, N0),
3467134681 Operands[0] = LHS.getOperand(0);
3467234682 Operands[1] = LHS.getOperand(1);
3467334683
34674 auto AVGBuilder = [](SelectionDAG &DAG, const SDLoc &DL, SDValue Op0,
34675 SDValue Op1) {
34676 return DAG.getNode(X86ISD::AVG, DL, Op0.getValueType(), Op0, Op1);
34684 auto AVGBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
34685 ArrayRef Ops) {
34686 return DAG.getNode(X86ISD::AVG, DL, Ops[0].getValueType(), Ops);
3467734687 };
3467834688
3467934689 // Take care of the case when one of the operands is a constant vector whose
3770437714 SDValue N1 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, MulOp->getOperand(1));
3770537715
3770637716 // Madd vector size is half of the original vector size
37707 auto PMADDWDBuilder = [](SelectionDAG &DAG, const SDLoc &DL, SDValue Op0,
37708 SDValue Op1) {
37709 MVT VT = MVT::getVectorVT(MVT::i32, Op0.getValueSizeInBits() / 32);
37710 return DAG.getNode(X86ISD::VPMADDWD, DL, VT, Op0, Op1);
37717 auto PMADDWDBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
37718 ArrayRef Ops) {
37719 MVT VT = MVT::getVectorVT(MVT::i32, Ops[0].getValueSizeInBits() / 32);
37720 return DAG.getNode(X86ISD::VPMADDWD, DL, VT, Ops);
3771137721 };
3771237722 SDValue Madd = SplitBinaryOpsAndApply(DAG, Subtarget, DL, MAddVT, N0, N1,
3771337723 PMADDWDBuilder);
3790337913 if (!canReduceVMulWidth(Mul.getNode(), DAG, Mode) || Mode == MULU16)
3790437914 return SDValue();
3790537915
37906 auto PMADDBuilder = [](SelectionDAG &DAG, const SDLoc &DL, SDValue Op0,
37907 SDValue Op1) {
37916 auto PMADDBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
37917 ArrayRef Ops) {
3790837918 // Shrink by adding truncate nodes and let DAGCombine fold with the
3790937919 // sources.
37910 EVT InVT = Op0.getValueType();
37920 EVT InVT = Ops[0].getValueType();
3791137921 assert(InVT.getScalarType() == MVT::i32 &&
3791237922 "Unexpected scalar element type");
37913 assert(InVT == Op1.getValueType() && "Operands' types mismatch");
37923 assert(InVT == Ops[1].getValueType() && "Operands' types mismatch");
3791437924 EVT ResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32,
3791537925 InVT.getVectorNumElements() / 2);
3791637926 EVT TruncVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16,
3791737927 InVT.getVectorNumElements());
3791837928 return DAG.getNode(X86ISD::VPMADDWD, DL, ResVT,
37919 DAG.getNode(ISD::TRUNCATE, DL, TruncVT, Op0),
37920 DAG.getNode(ISD::TRUNCATE, DL, TruncVT, Op1));
37929 DAG.getNode(ISD::TRUNCATE, DL, TruncVT, Ops[0]),
37930 DAG.getNode(ISD::TRUNCATE, DL, TruncVT, Ops[1]));
3792137931 };
3792237932 return SplitBinaryOpsAndApply(DAG, Subtarget, DL, VT, Mul.getOperand(0),
3792337933 Mul.getOperand(1), PMADDBuilder);
3799338003 } else
3799438004 return SDValue();
3799538005
37996 auto SUBUSBuilder = [](SelectionDAG &DAG, const SDLoc &DL, SDValue Op0,
37997 SDValue Op1) {
37998 return DAG.getNode(X86ISD::SUBUS, DL, Op0.getValueType(), Op0, Op1);
38006 auto SUBUSBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
38007 ArrayRef Ops) {
38008 return DAG.getNode(X86ISD::SUBUS, DL, Ops[0].getValueType(), Ops);
3799938009 };
3800038010
3800138011 // PSUBUS doesn't support v8i32/v8i64/v16i32, but it can be enabled with