llvm.org GIT mirror llvm / 94141a4
Separate the check for blend shuffle_vector masks Summary: Separate the check for blend shuffle_vector masks into isBlendMask. This function will also be used to check if a vector shuffle is legal. No change in functionality was intended, but we ended up improving codegen on two tests, which were being (more) optimized only if the resulting shuffle was legal. Reviewers: nadav, delena, andreadb Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D3964 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209923 91177308-0d34-0410-b5e6-96231b3b80d8 Filipe Cabecinhas 6 years ago
2 changed file(s) with 62 addition(s) and 45 deletion(s). Raw diff Collapse all Expand all
64286428 return LowerAVXCONCAT_VECTORS(Op, DAG);
64296429 }
64306430
6431 static bool isBlendMask(ArrayRef MaskVals, MVT VT, bool hasSSE41,
6432 bool hasInt256, unsigned *MaskOut = nullptr) {
6433 MVT EltVT = VT.getVectorElementType();
6434
6435 // There is no blend with immediate in AVX-512.
6436 if (VT.is512BitVector())
6437 return false;
6438
6439 if (!hasSSE41 || EltVT == MVT::i8)
6440 return false;
6441 if (!hasInt256 && VT == MVT::v16i16)
6442 return false;
6443
6444 unsigned MaskValue = 0;
6445 unsigned NumElems = VT.getVectorNumElements();
6446 // There are 2 lanes if (NumElems > 8), and 1 lane otherwise.
6447 unsigned NumLanes = (NumElems - 1) / 8 + 1;
6448 unsigned NumElemsInLane = NumElems / NumLanes;
6449
6450 // Blend for v16i16 should be symetric for the both lanes.
6451 for (unsigned i = 0; i < NumElemsInLane; ++i) {
6452
6453 int SndLaneEltIdx = (NumLanes == 2) ? MaskVals[i + NumElemsInLane] : -1;
6454 int EltIdx = MaskVals[i];
6455
6456 if ((EltIdx < 0 || EltIdx == (int)i) &&
6457 (SndLaneEltIdx < 0 || SndLaneEltIdx == (int)(i + NumElemsInLane)))
6458 continue;
6459
6460 if (((unsigned)EltIdx == (i + NumElems)) &&
6461 (SndLaneEltIdx < 0 ||
6462 (unsigned)SndLaneEltIdx == i + NumElems + NumElemsInLane))
6463 MaskValue |= (1 << i);
6464 else
6465 return false;
6466 }
6467
6468 if (MaskOut)
6469 *MaskOut = MaskValue;
6470 return true;
6471 }
6472
64316473 // Try to lower a shuffle node into a simple blend instruction.
6432 static SDValue
6433 LowerVECTOR_SHUFFLEtoBlend(ShuffleVectorSDNode *SVOp,
6434 const X86Subtarget *Subtarget, SelectionDAG &DAG) {
6474 // This function assumes isBlendMask returns true for this
6475 // SuffleVectorSDNode
6476 static SDValue LowerVECTOR_SHUFFLEtoBlend(ShuffleVectorSDNode *SVOp,
6477 unsigned MaskValue,
6478 const X86Subtarget *Subtarget,
6479 SelectionDAG &DAG) {
6480 MVT VT = SVOp->getSimpleValueType(0);
6481 MVT EltVT = VT.getVectorElementType();
6482 assert(isBlendMask(SVOp->getMask(), VT, Subtarget->hasSSE41(),
6483 Subtarget->hasInt256() && "Trying to lower a "
6484 "VECTOR_SHUFFLE to a Blend but "
6485 "with the wrong mask"));
64356486 SDValue V1 = SVOp->getOperand(0);
64366487 SDValue V2 = SVOp->getOperand(1);
64376488 SDLoc dl(SVOp);
6438 MVT VT = SVOp->getSimpleValueType(0);
6439 MVT EltVT = VT.getVectorElementType();
64406489 unsigned NumElems = VT.getVectorNumElements();
6441
6442 // There is no blend with immediate in AVX-512.
6443 if (VT.is512BitVector())
6444 return SDValue();
6445
6446 if (!Subtarget->hasSSE41() || EltVT == MVT::i8)
6447 return SDValue();
6448 if (!Subtarget->hasInt256() && VT == MVT::v16i16)
6449 return SDValue();
6450
6451 // Check the mask for BLEND and build the value.
6452 unsigned MaskValue = 0;
6453 // There are 2 lanes if (NumElems > 8), and 1 lane otherwise.
6454 unsigned NumLanes = (NumElems-1)/8 + 1;
6455 unsigned NumElemsInLane = NumElems / NumLanes;
6456
6457 // Blend for v16i16 should be symetric for the both lanes.
6458 for (unsigned i = 0; i < NumElemsInLane; ++i) {
6459
6460 int SndLaneEltIdx = (NumLanes == 2) ?
6461 SVOp->getMaskElt(i + NumElemsInLane) : -1;
6462 int EltIdx = SVOp->getMaskElt(i);
6463
6464 if ((EltIdx < 0 || EltIdx == (int)i) &&
6465 (SndLaneEltIdx < 0 || SndLaneEltIdx == (int)(i + NumElemsInLane)))
6466 continue;
6467
6468 if (((unsigned)EltIdx == (i + NumElems)) &&
6469 (SndLaneEltIdx < 0 ||
6470 (unsigned)SndLaneEltIdx == i + NumElems + NumElemsInLane))
6471 MaskValue |= (1<
6472 else
6473 return SDValue();
6474 }
64756490
64766491 // Convert i32 vectors to floating point if it is not AVX2.
64776492 // AVX2 introduced VPBLENDD instruction for 128 and 256-bit vectors.
79097924 return getTargetShuffleNode(X86ISD::VPERM2X128, dl, VT, V1,
79107925 V2, getShuffleVPERM2X128Immediate(SVOp), DAG);
79117926
7912 SDValue BlendOp = LowerVECTOR_SHUFFLEtoBlend(SVOp, Subtarget, DAG);
7913 if (BlendOp.getNode())
7914 return BlendOp;
7927 unsigned MaskValue;
7928 if (isBlendMask(M, VT, Subtarget->hasSSE41(), Subtarget->hasInt256(),
7929 &MaskValue))
7930 return LowerVECTOR_SHUFFLEtoBlend(SVOp, MaskValue, Subtarget, DAG);
79157931
79167932 if (Subtarget->hasSSE41() && isINSERTPSMask(M, VT))
79177933 return getINSERTPS(SVOp, dl, DAG);
1517215188 isUNPCKLMask(M, SVT, Subtarget->hasInt256()) ||
1517315189 isUNPCKHMask(M, SVT, Subtarget->hasInt256()) ||
1517415190 isUNPCKL_v_undef_Mask(M, SVT, Subtarget->hasInt256()) ||
15175 isUNPCKH_v_undef_Mask(M, SVT, Subtarget->hasInt256()));
15191 isUNPCKH_v_undef_Mask(M, SVT, Subtarget->hasInt256()) ||
15192 isBlendMask(M, SVT, Subtarget->hasSSE41(), Subtarget->hasInt256()));
1517615193 }
1517715194
1517815195 bool
2424 }
2525 ; CHECK-LABEL: test2
2626 ; CHECK-NOT: xorps
27 ; CHECK: shufps
27 ; CHECK: movsd
2828 ; CHECK: ret
2929
3030
110110 }
111111 ; CHECK-LABEL: test9
112112 ; CHECK-NOT: xorps
113 ; CHECK: shufps
113 ; CHECK: movsd
114114 ; CHECK: ret
115115
116116