llvm.org GIT mirror llvm / 3d39cb0
[X86][SSE] Begin merging vector shuffle to BLEND for lowering and combining. Split off matchVectorShuffleAsBlend from lowerVectorShuffleAsBlend for reuse in combining. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@298914 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 3 years ago
1 changed file(s) with 91 addition(s) and 79 deletion(s). Raw diff Collapse all Expand all
82068206 return true;
82078207 }
82088208
8209 // Merges a general DAG shuffle mask and zeroable bit mask into a target shuffle
8210 // mask.
8211 static SmallVector createTargetShuffleMask(ArrayRef Mask,
8212 const APInt &Zeroable) {
8213 int NumElts = Mask.size();
8214 assert(NumElts == Zeroable.getBitWidth() && "Mismatch mask sizes");
8215
8216 SmallVector TargetMask(NumElts, SM_SentinelUndef);
8217 for (unsigned i = 0; i != NumElts; ++i) {
8218 int M = Mask[i];
8219 if (M == SM_SentinelUndef)
8220 continue;
8221 assert(0 <= M && M < (2 * NumElts) && "Out of range shuffle index");
8222 TargetMask[i] = (Zeroable[i] ? SM_SentinelZero : M);
8223 }
8224 return TargetMask;
8225 }
8226
82098227 // Check if the shuffle mask is suitable for the AVX vpunpcklwd or vpunpckhwd
82108228 // instructions.
82118229 static bool isUnpackWdShuffleMask(ArrayRef Mask, MVT VT) {
86258643 const X86Subtarget &Subtarget,
86268644 SelectionDAG &DAG);
86278645
8646 static bool matchVectorShuffleAsBlend(SDValue V1, SDValue V2,
8647 MutableArrayRef TargetMask,
8648 bool &ForceV1Zero, bool &ForceV2Zero,
8649 uint64_t &BlendMask) {
8650 bool V1IsZeroOrUndef =
8651 V1.isUndef() || ISD::isBuildVectorAllZeros(V1.getNode());
8652 bool V2IsZeroOrUndef =
8653 V2.isUndef() || ISD::isBuildVectorAllZeros(V2.getNode());
8654
8655 BlendMask = 0;
8656 ForceV1Zero = false, ForceV2Zero = false;
8657 assert(TargetMask.size() <= 64 && "Shuffle mask too big for blend mask");
8658
8659 // Attempt to generate the binary blend mask. If an input is zero then
8660 // we can use any lane.
8661 // TODO: generalize the zero matching to any scalar like isShuffleEquivalent.
8662 for (int i = 0, Size = TargetMask.size(); i < Size; ++i) {
8663 int M = TargetMask[i];
8664 if (M == SM_SentinelUndef)
8665 continue;
8666 if (M == i)
8667 continue;
8668 if (M == i + Size) {
8669 BlendMask |= 1ull << i;
8670 continue;
8671 }
8672 if (M == SM_SentinelZero) {
8673 if (V1IsZeroOrUndef) {
8674 ForceV1Zero = true;
8675 TargetMask[i] = i;
8676 continue;
8677 }
8678 if (V2IsZeroOrUndef) {
8679 ForceV2Zero = true;
8680 BlendMask |= 1ull << i;
8681 TargetMask[i] = i + Size;
8682 continue;
8683 }
8684 }
8685 return false;
8686 }
8687 return true;
8688 }
8689
8690 uint64_t scaleVectorShuffleBlendMask(uint64_t BlendMask, int Size, int Scale) {
8691 uint64_t ScaledMask = 0;
8692 for (int i = 0; i != Size; ++i)
8693 if (BlendMask & (1ull << i))
8694 ScaledMask |= ((1ull << Scale) - 1) << (i * Scale);
8695 return ScaledMask;
8696 };
8697
86288698 /// \brief Try to emit a blend instruction for a shuffle.
86298699 ///
86308700 /// This doesn't do any checks for the availability of instructions for blending
86368706 const APInt &Zeroable,
86378707 const X86Subtarget &Subtarget,
86388708 SelectionDAG &DAG) {
8639 bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
8640 bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
8641 SmallVector8> Mask(Original.begin(), Original.end());
8709 SmallVector64> Mask = createTargetShuffleMask(Original, Zeroable);
8710
8711 uint64_t BlendMask = 0;
86428712 bool ForceV1Zero = false, ForceV2Zero = false;
8643
8644 // Attempt to generate the binary blend mask. If an input is zero then
8645 // we can use any lane.
8646 // TODO: generalize the zero matching to any scalar like isShuffleEquivalent.
8647 uint64_t BlendMask = 0;
8648 for (int i = 0, Size = Mask.size(); i < Size; ++i) {
8649 int M = Mask[i];
8650 if (M < 0)
8651 continue;
8652 if (M == i)
8653 continue;
8654 if (M == i + Size) {
8655 BlendMask |= 1ull << i;
8656 continue;
8657 }
8658 if (Zeroable[i]) {
8659 if (V1IsZero) {
8660 ForceV1Zero = true;
8661 Mask[i] = i;
8662 continue;
8663 }
8664 if (V2IsZero) {
8665 ForceV2Zero = true;
8666 BlendMask |= 1ull << i;
8667 Mask[i] = i + Size;
8668 continue;
8669 }
8670 }
8671 return SDValue(); // Shuffled input!
8672 }
8713 if (!matchVectorShuffleAsBlend(V1, V2, Mask, ForceV1Zero, ForceV2Zero,
8714 BlendMask))
8715 return SDValue();
86738716
86748717 // Create a REAL zero vector - ISD::isBuildVectorAllZeros allows UNDEFs.
86758718 if (ForceV1Zero)
86768719 V1 = getZeroVector(VT, Subtarget, DAG, DL);
86778720 if (ForceV2Zero)
86788721 V2 = getZeroVector(VT, Subtarget, DAG, DL);
8679
8680 auto ScaleBlendMask = [](uint64_t BlendMask, int Size, int Scale) {
8681 uint64_t ScaledMask = 0;
8682 for (int i = 0; i != Size; ++i)
8683 if (BlendMask & (1ull << i))
8684 ScaledMask |= ((1ull << Scale) - 1) << (i * Scale);
8685 return ScaledMask;
8686 };
86878722
86888723 switch (VT.SimpleTy) {
86898724 case MVT::v2f64:
87048739 if (Subtarget.hasAVX2()) {
87058740 // Scale the blend by the number of 32-bit dwords per element.
87068741 int Scale = VT.getScalarSizeInBits() / 32;
8707 BlendMask = ScaleBlendMask(BlendMask, Mask.size(), Scale);
8742 BlendMask = scaleVectorShuffleBlendMask(BlendMask, Mask.size(), Scale);
87088743 MVT BlendVT = VT.getSizeInBits() > 128 ? MVT::v8i32 : MVT::v4i32;
87098744 V1 = DAG.getBitcast(BlendVT, V1);
87108745 V2 = DAG.getBitcast(BlendVT, V2);
87178752 // For integer shuffles we need to expand the mask and cast the inputs to
87188753 // v8i16s prior to blending.
87198754 int Scale = 8 / VT.getVectorNumElements();
8720 BlendMask = ScaleBlendMask(BlendMask, Mask.size(), Scale);
8755 BlendMask = scaleVectorShuffleBlendMask(BlendMask, Mask.size(), Scale);
87218756 V1 = DAG.getBitcast(MVT::v8i16, V1);
87228757 V2 = DAG.getBitcast(MVT::v8i16, V2);
87238758 return DAG.getBitcast(VT,
2712327158 BlendVT = MVT::v8f32;
2712427159 }
2712527160
27126 unsigned BlendSize = BlendVT.getVectorNumElements();
27127 unsigned MaskRatio = BlendSize / NumMaskElts;
27128
27129 // Can we blend with zero?
27130 if (isSequentialOrUndefOrZeroInRange(Mask, /*Pos*/ 0, /*Size*/ NumMaskElts,
27131 /*Low*/ 0) &&
27132 NumMaskElts <= BlendVT.getVectorNumElements()) {
27133 PermuteImm = 0;
27134 for (unsigned i = 0; i != BlendSize; ++i)
27135 if (Mask[i / MaskRatio] < 0)
27136 PermuteImm |= 1u << i;
27137
27138 V2 = getZeroVector(BlendVT, Subtarget, DAG, DL);
27139 Shuffle = X86ISD::BLENDI;
27140 ShuffleVT = BlendVT;
27141 return true;
27142 }
27143
27144 // Attempt to match as a binary blend.
2714527161 if (NumMaskElts <= BlendVT.getVectorNumElements()) {
27146 bool MatchBlend = true;
27147 for (int i = 0; i != (int)NumMaskElts; ++i) {
27148 int M = Mask[i];
27149 if (M == SM_SentinelUndef)
27150 continue;
27151 if ((M == SM_SentinelZero) ||
27152 ((M != i) && (M != (i + (int)NumMaskElts)))) {
27153 MatchBlend = false;
27154 break;
27162 uint64_t BlendMask = 0;
27163 bool ForceV1Zero = false, ForceV2Zero = false;
27164 SmallVector TargetMask(Mask.begin(), Mask.end());
27165 if (matchVectorShuffleAsBlend(V1, V2, TargetMask, ForceV1Zero,
27166 ForceV2Zero, BlendMask)) {
27167 if (NumMaskElts < BlendVT.getVectorNumElements()) {
27168 int Scale = BlendVT.getVectorNumElements() / NumMaskElts;
27169 BlendMask =
27170 scaleVectorShuffleBlendMask(BlendMask, NumMaskElts, Scale);
2715527171 }
27156 }
27157
27158 if (MatchBlend) {
27159 PermuteImm = 0;
27160 for (unsigned i = 0; i != BlendSize; ++i)
27161 if ((int)NumMaskElts <= Mask[i / MaskRatio])
27162 PermuteImm |= 1u << i;
27163
27172
27173 V1 = ForceV1Zero ? getZeroVector(BlendVT, Subtarget, DAG, DL) : V1;
27174 V2 = ForceV2Zero ? getZeroVector(BlendVT, Subtarget, DAG, DL) : V2;
27175 PermuteImm = (unsigned)BlendMask;
2716427176 Shuffle = X86ISD::BLENDI;
2716527177 ShuffleVT = BlendVT;
2716627178 return true;