llvm.org GIT mirror llvm / 987756c
[x86] add helper for creating a half-width shuffle; NFC This reduces a bit of duplication between the combining and lowering places that use it, but the primary motivation is to make it easier to rearrange the lowering logic and solve PR40434: https://bugs.llvm.org/show_bug.cgi?id=40434 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@352280 91177308-0d34-0410-b5e6-96231b3b80d8 Sanjay Patel 1 year, 4 months ago
1 changed file(s) with 39 addition(s) and 28 deletion(s). Raw diff Collapse all Expand all
1437014370 return true;
1437114371 }
1437214372
14373 /// Given the output values from getHalfShuffleMask(), create a half width
14374 /// shuffle of extracted vectors followed by an insert back to full width.
14375 static SDValue getShuffleHalfVectors(const SDLoc &DL, SDValue V1, SDValue V2,
14376 ArrayRef HalfMask, int HalfIdx1,
14377 int HalfIdx2, bool UndefLower,
14378 SelectionDAG &DAG) {
14379 assert(V1.getValueType() == V2.getValueType() && "Different sized vectors?");
14380 assert(V1.getValueType().isSimple() && "Expecting only simple types");
14381
14382 MVT VT = V1.getSimpleValueType();
14383 unsigned NumElts = VT.getVectorNumElements();
14384 unsigned HalfNumElts = NumElts / 2;
14385 MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(), HalfNumElts);
14386
14387 auto getHalfVector = [&](int HalfIdx) {
14388 if (HalfIdx < 0)
14389 return DAG.getUNDEF(HalfVT);
14390 SDValue V = (HalfIdx < 2 ? V1 : V2);
14391 HalfIdx = (HalfIdx % 2) * HalfNumElts;
14392 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V,
14393 DAG.getIntPtrConstant(HalfIdx, DL));
14394 };
14395
14396 // ins undef, (shuf (ext V1, HalfIdx1), (ext V2, HalfIdx2), HalfMask), Offset
14397 SDValue Half1 = getHalfVector(HalfIdx1);
14398 SDValue Half2 = getHalfVector(HalfIdx2);
14399 SDValue V = DAG.getVectorShuffle(HalfVT, DL, Half1, Half2, HalfMask);
14400 unsigned Offset = UndefLower ? HalfNumElts : 0;
14401 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V,
14402 DAG.getIntPtrConstant(Offset, DL));
14403 }
14404
1437314405 /// Lower shuffles where an entire half of a 256 or 512-bit vector is UNDEF.
1437414406 /// This allows for fast cases such as subvector extraction/insertion
1437514407 /// or shuffling smaller vector types which can lower more efficiently.
1444914481 if (VT.is512BitVector() && (UndefLower || NumUpperHalves != 0))
1445014482 return SDValue();
1445114483
14452 auto GetHalfVector = [&](int HalfIdx) {
14453 if (HalfIdx < 0)
14454 return DAG.getUNDEF(HalfVT);
14455 SDValue V = (HalfIdx < 2 ? V1 : V2);
14456 HalfIdx = (HalfIdx % 2) * HalfNumElts;
14457 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V,
14458 DAG.getIntPtrConstant(HalfIdx, DL));
14459 };
14460
14461 SDValue Half1 = GetHalfVector(HalfIdx1);
14462 SDValue Half2 = GetHalfVector(HalfIdx2);
14463 SDValue V = DAG.getVectorShuffle(HalfVT, DL, Half1, Half2, HalfMask);
14464 unsigned Offset = UndefLower ? HalfNumElts : 0;
14465 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V,
14466 DAG.getIntPtrConstant(Offset, DL));
14484 return getShuffleHalfVectors(DL, V1, V2, HalfMask, HalfIdx1, HalfIdx2,
14485 UndefLower, DAG);
1446714486 }
1446814487
1446914488 /// Test whether the specified input (0 or 1) is in-place blended by the
3235232371 (HalfIdx1 % 2 == 1) || (HalfIdx2 % 2 == 1))
3235332372 return SDValue();
3235432373
32355 // Create 4 instructions to replace the unnecessarily wide shuffle.
32374 // Create a half-width shuffle to replace the unnecessarily wide shuffle.
3235632375 // The trick is knowing that all of the insert/extract are actually free
32357 // subregister (zmm->ymm or ymm->xmm) ops. That leaves us with a shuffle
32376 // subregister (zmm<->ymm or ymm<->xmm) ops. That leaves us with a shuffle
3235832377 // of narrow inputs into a narrow output, and that is always cheaper than
3235932378 // the wide shuffle that we started with.
32360 unsigned NumElts = Mask.size();
32361 SDValue Op0 = Shuf->getOperand(0);
32362 SDValue Op1 = Shuf->getOperand(1);
32363 SDLoc DL(Shuf);
32364 SDValue Index0 = DAG.getIntPtrConstant(0, DL);
32365 MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(), NumElts / 2);
32366 SDValue Extr0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, Op0, Index0);
32367 SDValue Extr1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, Op1, Index0);
32368 SDValue NewShuf = DAG.getVectorShuffle(HalfVT, DL, Extr0, Extr1, HalfMask);
32369 SDValue UndefV = DAG.getUNDEF(VT);
32370 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, UndefV, NewShuf, Index0);
32379 return getShuffleHalfVectors(SDLoc(Shuf), Shuf->getOperand(0),
32380 Shuf->getOperand(1), HalfMask, HalfIdx1,
32381 HalfIdx2, false, DAG);
3237132382 }
3237232383
3237332384 static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,