llvm.org GIT mirror llvm / 5aebbfb
[X86] combineX86ShufflesRecursively - start recursion at depth = 0. NFCI. As discussed on rL367171, we have a problem where the depth recursion used in combineX86ShufflesRecursively was subtly different to computeKnownBits etc. - it starts at Depth=1 instead of Depth=0 like the others and has a different maximum recursion depth. This NFC patch fixes the recursion depth to start at 0, so we can more easily reuse depth values in calls from combineX86ShufflesRecursively and its helper functions in computeKnownBits etc. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@367232 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 1 year, 17 days ago
1 changed file(s) with 18 addition(s) and 18 deletion(s). Raw diff Collapse all Expand all
3203232032 unsigned NumRootElts = RootVT.getVectorNumElements();
3203332033 unsigned BaseMaskEltSizeInBits = RootSizeInBits / NumBaseMaskElts;
3203432034 bool FloatDomain = VT1.isFloatingPoint() || VT2.isFloatingPoint() ||
32035 (RootVT.isFloatingPoint() && Depth >= 2) ||
32035 (RootVT.isFloatingPoint() && Depth >= 1) ||
3203632036 (RootVT.is256BitVector() && !Subtarget.hasAVX2());
3203732037
3203832038 // Don't combine if we are a AVX512/EVEX target and the mask element size
3207132071 if (UnaryShuffle && RootVT.is256BitVector() && NumBaseMaskElts == 2 &&
3207232072 !(Subtarget.hasAVX2() && BaseMask[0] >= -1 && BaseMask[1] >= -1) &&
3207332073 !isSequentialOrUndefOrZeroInRange(BaseMask, 0, 2, 0)) {
32074 if (Depth == 1 && Root.getOpcode() == X86ISD::VPERM2X128)
32074 if (Depth == 0 && Root.getOpcode() == X86ISD::VPERM2X128)
3207532075 return SDValue(); // Nothing to do!
3207632076 MVT ShuffleVT = (FloatDomain ? MVT::v4f64 : MVT::v4i64);
3207732077 unsigned PermMask = 0;
3211632116 // Which shuffle domains are permitted?
3211732117 // Permit domain crossing at higher combine depths.
3211832118 // TODO: Should we indicate which domain is preferred if both are allowed?
32119 bool AllowFloatDomain = FloatDomain || (Depth > 3);
32120 bool AllowIntDomain = (!FloatDomain || (Depth > 3)) && Subtarget.hasSSE2() &&
32119 bool AllowFloatDomain = FloatDomain || (Depth >= 3);
32120 bool AllowIntDomain = (!FloatDomain || (Depth >= 3)) && Subtarget.hasSSE2() &&
3212132121 (!MaskVT.is256BitVector() || Subtarget.hasAVX2());
3212232122
3212332123 // Determine zeroable mask elements.
3215232152 if (V1.getValueType() == MaskVT &&
3215332153 V1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
3215432154 MayFoldLoad(V1.getOperand(0))) {
32155 if (Depth == 1 && Root.getOpcode() == X86ISD::VBROADCAST)
32155 if (Depth == 0 && Root.getOpcode() == X86ISD::VBROADCAST)
3215632156 return SDValue(); // Nothing to do!
3215732157 Res = V1.getOperand(0);
3215832158 Res = DAG.getNode(X86ISD::VBROADCAST, DL, MaskVT, Res);
3215932159 return DAG.getBitcast(RootVT, Res);
3216032160 }
3216132161 if (Subtarget.hasAVX2()) {
32162 if (Depth == 1 && Root.getOpcode() == X86ISD::VBROADCAST)
32162 if (Depth == 0 && Root.getOpcode() == X86ISD::VBROADCAST)
3216332163 return SDValue(); // Nothing to do!
3216432164 Res = DAG.getBitcast(MaskVT, V1);
3216532165 Res = DAG.getNode(X86ISD::VBROADCAST, DL, MaskVT, Res);
3217332173 DL, DAG, Subtarget, Shuffle, ShuffleSrcVT,
3217432174 ShuffleVT) &&
3217532175 (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
32176 if (Depth == 1 && Root.getOpcode() == Shuffle)
32176 if (Depth == 0 && Root.getOpcode() == Shuffle)
3217732177 return SDValue(); // Nothing to do!
3217832178 Res = DAG.getBitcast(ShuffleSrcVT, NewV1);
3217932179 Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res);
3218432184 AllowIntDomain, Subtarget, Shuffle, ShuffleVT,
3218532185 PermuteImm) &&
3218632186 (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
32187 if (Depth == 1 && Root.getOpcode() == Shuffle)
32187 if (Depth == 0 && Root.getOpcode() == Shuffle)
3218832188 return SDValue(); // Nothing to do!
3218932189 Res = DAG.getBitcast(ShuffleVT, V1);
3219032190 Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res,
3219932199 NewV2, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT,
3220032200 ShuffleVT, UnaryShuffle) &&
3220132201 (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
32202 if (Depth == 1 && Root.getOpcode() == Shuffle)
32202 if (Depth == 0 && Root.getOpcode() == Shuffle)
3220332203 return SDValue(); // Nothing to do!
3220432204 NewV1 = DAG.getBitcast(ShuffleSrcVT, NewV1);
3220532205 NewV2 = DAG.getBitcast(ShuffleSrcVT, NewV2);
3221332213 MaskVT, Mask, Zeroable, AllowFloatDomain, AllowIntDomain, NewV1,
3221432214 NewV2, DL, DAG, Subtarget, Shuffle, ShuffleVT, PermuteImm) &&
3221532215 (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
32216 if (Depth == 1 && Root.getOpcode() == Shuffle)
32216 if (Depth == 0 && Root.getOpcode() == Shuffle)
3221732217 return SDValue(); // Nothing to do!
3221832218 NewV1 = DAG.getBitcast(ShuffleVT, NewV1);
3221932219 NewV2 = DAG.getBitcast(ShuffleVT, NewV2);
3223132231 uint64_t BitLen, BitIdx;
3223232232 if (matchShuffleAsEXTRQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx,
3223332233 Zeroable)) {
32234 if (Depth == 1 && Root.getOpcode() == X86ISD::EXTRQI)
32234 if (Depth == 0 && Root.getOpcode() == X86ISD::EXTRQI)
3223532235 return SDValue(); // Nothing to do!
3223632236 V1 = DAG.getBitcast(IntMaskVT, V1);
3223732237 Res = DAG.getNode(X86ISD::EXTRQI, DL, IntMaskVT, V1,
3224132241 }
3224232242
3224332243 if (matchShuffleAsINSERTQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx)) {
32244 if (Depth == 1 && Root.getOpcode() == X86ISD::INSERTQI)
32244 if (Depth == 0 && Root.getOpcode() == X86ISD::INSERTQI)
3224532245 return SDValue(); // Nothing to do!
3224632246 V1 = DAG.getBitcast(IntMaskVT, V1);
3224732247 V2 = DAG.getBitcast(IntMaskVT, V2);
3225432254
3225532255 // Don't try to re-form single instruction chains under any circumstances now
3225632256 // that we've done encoding canonicalization for them.
32257 if (Depth < 2)
32257 if (Depth < 1)
3225832258 return SDValue();
3225932259
3226032260 // Depth threshold above which we can efficiently use variable mask shuffles.
32261 int VariableShuffleDepth = Subtarget.hasFastVariableShuffle() ? 2 : 3;
32261 int VariableShuffleDepth = Subtarget.hasFastVariableShuffle() ? 1 : 2;
3226232262 AllowVariableMask &= (Depth >= VariableShuffleDepth) || HasVariableMask;
3226332263
3226432264 bool MaskContainsZeros =
3274032740 // Bound the depth of our recursive combine because this is ultimately
3274132741 // quadratic in nature.
3274232742 const unsigned MaxRecursionDepth = 8;
32743 if (Depth > MaxRecursionDepth)
32743 if (Depth >= MaxRecursionDepth)
3274432744 return SDValue();
3274532745
3274632746 // Directly rip through bitcasts to find the underlying operand.
3294332943 /// Helper entry wrapper to combineX86ShufflesRecursively.
3294432944 static SDValue combineX86ShufflesRecursively(SDValue Op, SelectionDAG &DAG,
3294532945 const X86Subtarget &Subtarget) {
32946 return combineX86ShufflesRecursively({Op}, 0, Op, {0}, {}, /*Depth*/ 1,
32946 return combineX86ShufflesRecursively({Op}, 0, Op, {0}, {}, /*Depth*/ 0,
3294732947 /*HasVarMask*/ false,
3294832948 /*AllowVarMask*/ true, DAG, Subtarget);
3294932949 }
3317833178 for (unsigned i = 0; i != Scale; ++i)
3317933179 DemandedMask[i] = i;
3318033180 if (SDValue Res = combineX86ShufflesRecursively(
33181 {BC}, 0, BC, DemandedMask, {}, /*Depth*/ 1,
33181 {BC}, 0, BC, DemandedMask, {}, /*Depth*/ 0,
3318233182 /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget))
3318333183 return DAG.getNode(X86ISD::VBROADCAST, DL, VT,
3318433184 DAG.getBitcast(SrcVT, Res));
3869638696 }
3869738697
3869838698 if (SDValue Shuffle = combineX86ShufflesRecursively(
38699 {SrcVec}, 0, SrcVec, ShuffleMask, {}, /*Depth*/ 2,
38699 {SrcVec}, 0, SrcVec, ShuffleMask, {}, /*Depth*/ 1,
3870038700 /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget))
3870138701 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), VT, Shuffle,
3870238702 N->getOperand(0).getOperand(1));