llvm.org GIT mirror llvm / 6f4f46c
[X86] Generate VPSHUFB for in-place v16i16 shuffles This used to resort to splitting the 256-bit operation into two 128-bit shuffles and then recombining the results. Fixes <rdar://problem/16167303> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@204735 91177308-0d34-0410-b5e6-96231b3b80d8 Adam Nemet 6 years ago
2 changed file(s) with 47 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
65756575 return NewV;
65766576 }
65776577
6578 /// \brief v16i16 shuffles
6579 ///
6580 /// FIXME: We only support generation of a single pshufb currently. We can
6581 /// generalize the other applicable cases from LowerVECTOR_SHUFFLEv8i16 as
6582 /// well (e.g 2 x pshufb + 1 x por).
6583 static SDValue
6584 LowerVECTOR_SHUFFLEv16i16(SDValue Op, SelectionDAG &DAG) {
6585 ShuffleVectorSDNode *SVOp = cast(Op);
6586 SDValue V1 = SVOp->getOperand(0);
6587 SDValue V2 = SVOp->getOperand(1);
6588 SDLoc dl(SVOp);
6589
6590 if (V2.getOpcode() != ISD::UNDEF)
6591 return SDValue();
6592
6593 SmallVector MaskVals(SVOp->getMask().begin(), SVOp->getMask().end());
6594 return getPSHUFB(MaskVals, V1, dl, DAG);
6595 }
6596
65786597 // v16i8 shuffles - Prefer shuffles in the following order:
65796598 // 1. [ssse3] 1 x pshufb
65806599 // 2. [ssse3] 2 x pshufb + 1 x por
76307649 // Handle v8i16 specifically since SSE can do byte extraction and insertion.
76317650 if (VT == MVT::v8i16) {
76327651 SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(Op, Subtarget, DAG);
7652 if (NewOp.getNode())
7653 return NewOp;
7654 }
7655
7656 if (VT == MVT::v16i16 && Subtarget->hasInt256()) {
7657 SDValue NewOp = LowerVECTOR_SHUFFLEv16i16(Op, DAG);
76337658 if (NewOp.getNode())
76347659 return NewOp;
76357660 }
0 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 | FileCheck %s
1
2 define void @shuffle_v16i16(<16 x i16>* %a) {
3 ; CHECK-LABEL: shuffle_v16i16:
4 ; CHECK: vpshufb {{.*}}%ymm
5 ; CHECK-NOT: vpshufb {{.*}}%xmm
6 entry:
7 %0 = load <16 x i16>* %a, align 32
8 %shuffle = shufflevector <16 x i16> %0, <16 x i16> undef, <16 x i32>
9 store <16 x i16> %shuffle, <16 x i16>* %a, align 32
10 ret void
11 }
12
13 define void @shuffle_v16i16_lanecrossing(<16 x i16>* %a) {
14 ; CHECK-LABEL: shuffle_v16i16_lanecrossing:
15 ; CHECK-NOT: vpshufb {{.*}}%ymm
16 entry:
17 %0 = load <16 x i16>* %a, align 32
18 %shuffle = shufflevector <16 x i16> %0, <16 x i16> undef, <16 x i32>
19 store <16 x i16> %shuffle, <16 x i16>* %a, align 32
20 ret void
21 }