llvm.org GIT mirror llvm / ed2e13d
Add target specific ISD node types for SSE/AVX vector shuffle instructions and change all the code that used to create intrinsic nodes to create the new nodes instead. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@148664 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 8 years ago
4 changed file(s) with 415 addition(s) and 278 deletion(s). Raw diff Collapse all Expand all
47884788 const TargetLowering &TLI, DebugLoc dl) {
47894789 assert(VT.getSizeInBits() == 128 && "Unknown type for VShift");
47904790 EVT ShVT = MVT::v2i64;
4791 unsigned Opc = isLeft ? X86ISD::VSHL : X86ISD::VSRL;
4791 unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ;
47924792 SrcOp = DAG.getNode(ISD::BITCAST, dl, ShVT, SrcOp);
47934793 return DAG.getNode(ISD::BITCAST, dl, VT,
47944794 DAG.getNode(Opc, dl, ShVT, SrcOp,
65866586 return CommuteVectorShuffle(SVOp, DAG);
65876587
65886588 if (isShift) {
6589 // No better options. Use a vshl / vsrl.
6589 // No better options. Use a vshldq / vsrldq.
65906590 EVT EltVT = VT.getVectorElementType();
65916591 ShAmt *= EltVT.getSizeInBits();
65926592 return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl);
1000910009 return Res;
1001010010 }
1001110011
10012 // getTargetVShiftNOde - Handle vector element shifts where the shift amount
10013 // may or may not be a constant. Takes immediate version of shift as input.
10014 static SDValue getTargetVShiftNode(unsigned Opc, DebugLoc dl, EVT VT,
10015 SDValue SrcOp, SDValue ShAmt,
10016 SelectionDAG &DAG) {
10017 assert(ShAmt.getValueType() == MVT::i32 && "ShAmt is not i32");
10018
10019 if (isa(ShAmt)) {
10020 switch (Opc) {
10021 default: llvm_unreachable("Unknown target vector shift node");
10022 case X86ISD::VSHLI:
10023 case X86ISD::VSRLI:
10024 case X86ISD::VSRAI:
10025 return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt);
10026 }
10027 }
10028
10029 // Change opcode to non-immediate version
10030 switch (Opc) {
10031 default: llvm_unreachable("Unknown target vector shift node");
10032 case X86ISD::VSHLI: Opc = X86ISD::VSHL; break;
10033 case X86ISD::VSRLI: Opc = X86ISD::VSRL; break;
10034 case X86ISD::VSRAI: Opc = X86ISD::VSRA; break;
10035 }
10036
10037 // Need to build a vector containing shift amount
10038 // Shift amount is 32-bits, but SSE instructions read 64-bit, so fill with 0
10039 SDValue ShOps[4];
10040 ShOps[0] = ShAmt;
10041 ShOps[1] = DAG.getConstant(0, MVT::i32);
10042 ShOps[2] = DAG.getUNDEF(MVT::i32);
10043 ShOps[3] = DAG.getUNDEF(MVT::i32);
10044 ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, &ShOps[0], 4);
10045 ShAmt = DAG.getNode(ISD::BITCAST, dl, VT, ShAmt);
10046 return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt);
10047 }
10048
1001210049 SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
1001310050
1001410051 EVT VT = Op.getValueType();
1002610063 if (ConstantSDNode *C = dyn_cast(SclrAmt)) {
1002710064 uint64_t ShiftAmt = C->getZExtValue();
1002810065
10029 if (VT == MVT::v16i8 && Op.getOpcode() == ISD::SHL) {
10030 // Make a large shift.
10031 SDValue SHL =
10032 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
10033 DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32),
10034 R, DAG.getConstant(ShiftAmt, MVT::i32));
10035 // Zero out the rightmost bits.
10036 SmallVector V(16, DAG.getConstant(uint8_t(-1U << ShiftAmt),
10037 MVT::i8));
10038 return DAG.getNode(ISD::AND, dl, VT, SHL,
10039 DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 16));
10066 if (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
10067 (Subtarget->hasAVX2() &&
10068 (VT == MVT::v4i64 || VT == MVT::v8i32 || VT == MVT::v16i16))) {
10069 if (Op.getOpcode() == ISD::SHL)
10070 return DAG.getNode(X86ISD::VSHLI, dl, VT, R,
10071 DAG.getConstant(ShiftAmt, MVT::i32));
10072 if (Op.getOpcode() == ISD::SRL)
10073 return DAG.getNode(X86ISD::VSRLI, dl, VT, R,
10074 DAG.getConstant(ShiftAmt, MVT::i32));
10075 if (Op.getOpcode() == ISD::SRA && VT != MVT::v2i64 && VT != MVT::v4i64)
10076 return DAG.getNode(X86ISD::VSRAI, dl, VT, R,
10077 DAG.getConstant(ShiftAmt, MVT::i32));
1004010078 }
1004110079
10042 if (VT == MVT::v2i64 && Op.getOpcode() == ISD::SHL)
10043 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
10044 DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32),
10045 R, DAG.getConstant(ShiftAmt, MVT::i32));
10046
10047 if (VT == MVT::v4i32 && Op.getOpcode() == ISD::SHL)
10048 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
10049 DAG.getConstant(Intrinsic::x86_sse2_pslli_d, MVT::i32),
10050 R, DAG.getConstant(ShiftAmt, MVT::i32));
10051
10052 if (VT == MVT::v8i16 && Op.getOpcode() == ISD::SHL)
10053 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
10054 DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32),
10055 R, DAG.getConstant(ShiftAmt, MVT::i32));
10056
10057 if (VT == MVT::v16i8 && Op.getOpcode() == ISD::SRL) {
10058 // Make a large shift.
10059 SDValue SRL =
10060 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
10061 DAG.getConstant(Intrinsic::x86_sse2_psrli_w, MVT::i32),
10062 R, DAG.getConstant(ShiftAmt, MVT::i32));
10063 // Zero out the leftmost bits.
10064 SmallVector V(16, DAG.getConstant(uint8_t(-1U) >> ShiftAmt,
10065 MVT::i8));
10066 return DAG.getNode(ISD::AND, dl, VT, SRL,
10067 DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 16));
10068 }
10069
10070 if (VT == MVT::v2i64 && Op.getOpcode() == ISD::SRL)
10071 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
10072 DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),
10073 R, DAG.getConstant(ShiftAmt, MVT::i32));
10074
10075 if (VT == MVT::v4i32 && Op.getOpcode() == ISD::SRL)
10076 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
10077 DAG.getConstant(Intrinsic::x86_sse2_psrli_d, MVT::i32),
10078 R, DAG.getConstant(ShiftAmt, MVT::i32));
10079
10080 if (VT == MVT::v8i16 && Op.getOpcode() == ISD::SRL)
10081 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
10082 DAG.getConstant(Intrinsic::x86_sse2_psrli_w, MVT::i32),
10083 R, DAG.getConstant(ShiftAmt, MVT::i32));
10084
10085 if (VT == MVT::v4i32 && Op.getOpcode() == ISD::SRA)
10086 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
10087 DAG.getConstant(Intrinsic::x86_sse2_psrai_d, MVT::i32),
10088 R, DAG.getConstant(ShiftAmt, MVT::i32));
10089
10090 if (VT == MVT::v8i16 && Op.getOpcode() == ISD::SRA)
10091 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
10092 DAG.getConstant(Intrinsic::x86_sse2_psrai_w, MVT::i32),
10093 R, DAG.getConstant(ShiftAmt, MVT::i32));
10094
10095 if (VT == MVT::v16i8 && Op.getOpcode() == ISD::SRA) {
10096 if (ShiftAmt == 7) {
10097 // R s>> 7 === R s< 0
10098 SDValue Zeros = getZeroVector(VT, /* HasSSE2 */true,
10099 /* HasAVX2 */false, DAG, dl);
10100 return DAG.getNode(X86ISD::PCMPGTB, dl, VT, Zeros, R);
10080 if (VT == MVT::v16i8) {
10081 if (Op.getOpcode() == ISD::SHL) {
10082 // Make a large shift.
10083 SDValue SHL = DAG.getNode(X86ISD::VSHLI, dl, MVT::v8i16, R,
10084 DAG.getConstant(ShiftAmt, MVT::i32));
10085 SHL = DAG.getNode(ISD::BITCAST, dl, VT, SHL);
10086 // Zero out the rightmost bits.
10087 SmallVector V(16,
10088 DAG.getConstant(uint8_t(-1U << ShiftAmt),
10089 MVT::i8));
10090 return DAG.getNode(ISD::AND, dl, VT, SHL,
10091 DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 16));
1010110092 }
10102
10103 // R s>> a === ((R u>> a) ^ m) - m
10104 SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
10105 SmallVector V(16, DAG.getConstant(128 >> ShiftAmt,
10106 MVT::i8));
10107 SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 16);
10108 Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask);
10109 Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask);
10110 return Res;
10093 if (Op.getOpcode() == ISD::SRL) {
10094 // Make a large shift.
10095 SDValue SRL = DAG.getNode(X86ISD::VSRLI, dl, MVT::v8i16, R,
10096 DAG.getConstant(ShiftAmt, MVT::i32));
10097 SRL = DAG.getNode(ISD::BITCAST, dl, VT, SRL);
10098 // Zero out the leftmost bits.
10099 SmallVector V(16,
10100 DAG.getConstant(uint8_t(-1U) >> ShiftAmt,
10101 MVT::i8));
10102 return DAG.getNode(ISD::AND, dl, VT, SRL,
10103 DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 16));
10104 }
10105 if (Op.getOpcode() == ISD::SRA) {
10106 if (ShiftAmt == 7) {
10107 // R s>> 7 === R s< 0
10108 SDValue Zeros = getZeroVector(VT, /* HasSSE2 */true,
10109 /* HasAVX2 */false, DAG, dl);
10110 return DAG.getNode(X86ISD::PCMPGTB, dl, VT, Zeros, R);
10111 }
10112
10113 // R s>> a === ((R u>> a) ^ m) - m
10114 SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
10115 SmallVector V(16, DAG.getConstant(128 >> ShiftAmt,
10116 MVT::i8));
10117 SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 16);
10118 Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask);
10119 Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask);
10120 return Res;
10121 }
1011110122 }
1011210123
1011310124 if (Subtarget->hasAVX2() && VT == MVT::v32i8) {
1011410125 if (Op.getOpcode() == ISD::SHL) {
1011510126 // Make a large shift.
10116 SDValue SHL =
10117 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
10118 DAG.getConstant(Intrinsic::x86_avx2_pslli_w, MVT::i32),
10119 R, DAG.getConstant(ShiftAmt, MVT::i32));
10127 SDValue SHL = DAG.getNode(X86ISD::VSHLI, dl, MVT::v16i16, R,
10128 DAG.getConstant(ShiftAmt, MVT::i32));
10129 SHL = DAG.getNode(ISD::BITCAST, dl, VT, SHL);
1012010130 // Zero out the rightmost bits.
10121 SmallVector V(32, DAG.getConstant(uint8_t(-1U << ShiftAmt),
10122 MVT::i8));
10131 SmallVector V(32,
10132 DAG.getConstant(uint8_t(-1U << ShiftAmt),
10133 MVT::i8));
1012310134 return DAG.getNode(ISD::AND, dl, VT, SHL,
1012410135 DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 32));
1012510136 }
1012610137 if (Op.getOpcode() == ISD::SRL) {
1012710138 // Make a large shift.
10128 SDValue SRL =
10129 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
10130 DAG.getConstant(Intrinsic::x86_avx2_psrli_w, MVT::i32),
10131 R, DAG.getConstant(ShiftAmt, MVT::i32));
10139 SDValue SRL = DAG.getNode(X86ISD::VSRLI, dl, MVT::v16i16, R,
10140 DAG.getConstant(ShiftAmt, MVT::i32));
10141 SRL = DAG.getNode(ISD::BITCAST, dl, VT, SRL);
1013210142 // Zero out the leftmost bits.
10133 SmallVector V(32, DAG.getConstant(uint8_t(-1U) >> ShiftAmt,
10134 MVT::i8));
10143 SmallVector V(32,
10144 DAG.getConstant(uint8_t(-1U) >> ShiftAmt,
10145 MVT::i8));
1013510146 return DAG.getNode(ISD::AND, dl, VT, SRL,
1013610147 DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 32));
1013710148 }
1015810169
1015910170 // Lower SHL with variable shift amount.
1016010171 if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) {
10161 Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
10162 DAG.getConstant(Intrinsic::x86_sse2_pslli_d, MVT::i32),
10163 Op.getOperand(1), DAG.getConstant(23, MVT::i32));
10172 Op = DAG.getNode(X86ISD::VSHLI, dl, VT, Op.getOperand(1),
10173 DAG.getConstant(23, MVT::i32));
1016410174
1016510175 ConstantInt *CI = ConstantInt::get(*Context, APInt(32, 0x3f800000U));
1016610176
1018010190 assert(Subtarget->hasSSE2() && "Need SSE2 for pslli/pcmpeq.");
1018110191
1018210192 // a = a << 5;
10183 Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
10184 DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32),
10185 Op.getOperand(1), DAG.getConstant(5, MVT::i32));
10193 Op = DAG.getNode(X86ISD::VSHLI, dl, MVT::v8i16, Op.getOperand(1),
10194 DAG.getConstant(5, MVT::i32));
10195 Op = DAG.getNode(ISD::BITCAST, dl, VT, Op);
1018610196
1018710197 // Turn 'a' into a mask suitable for VSELECT
1018810198 SDValue VSelM = DAG.getConstant(0x80, VT);
1018910199 SDValue OpVSel = DAG.getNode(ISD::AND, dl, VT, VSelM, Op);
10190 OpVSel = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
10191 DAG.getConstant(Intrinsic::x86_sse2_pcmpeq_b, MVT::i32),
10192 OpVSel, VSelM);
10200 OpVSel = DAG.getNode(X86ISD::PCMPEQB, dl, VT, OpVSel, VSelM);
1019310201
1019410202 SDValue CM1 = DAG.getConstant(0x0f, VT);
1019510203 SDValue CM2 = DAG.getConstant(0x3f, VT);
1019610204
1019710205 // r = VSELECT(r, psllw(r & (char16)15, 4), a);
1019810206 SDValue M = DAG.getNode(ISD::AND, dl, VT, R, CM1);
10199 M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
10200 DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M,
10201 DAG.getConstant(4, MVT::i32));
10207 M = getTargetVShiftNode(X86ISD::VSHLI, dl, MVT::v8i16, M,
10208 DAG.getConstant(4, MVT::i32), DAG);
10209 M = DAG.getNode(ISD::BITCAST, dl, VT, M);
1020210210 R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel, M, R);
1020310211
1020410212 // a += a
1020510213 Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
1020610214 OpVSel = DAG.getNode(ISD::AND, dl, VT, VSelM, Op);
10207 OpVSel = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
10208 DAG.getConstant(Intrinsic::x86_sse2_pcmpeq_b, MVT::i32),
10209 OpVSel, VSelM);
10215 OpVSel = DAG.getNode(X86ISD::PCMPEQB, dl, VT, OpVSel, VSelM);
1021010216
1021110217 // r = VSELECT(r, psllw(r & (char16)63, 2), a);
1021210218 M = DAG.getNode(ISD::AND, dl, VT, R, CM2);
10213 M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
10214 DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M,
10215 DAG.getConstant(2, MVT::i32));
10219 M = getTargetVShiftNode(X86ISD::VSHLI, dl, MVT::v8i16, M,
10220 DAG.getConstant(2, MVT::i32), DAG);
10221 M = DAG.getNode(ISD::BITCAST, dl, VT, M);
1021610222 R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel, M, R);
1021710223
1021810224 // a += a
1021910225 Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
1022010226 OpVSel = DAG.getNode(ISD::AND, dl, VT, VSelM, Op);
10221 OpVSel = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
10222 DAG.getConstant(Intrinsic::x86_sse2_pcmpeq_b, MVT::i32),
10223 OpVSel, VSelM);
10227 OpVSel = DAG.getNode(X86ISD::PCMPEQB, dl, VT, OpVSel, VSelM);
1022410228
1022510229 // return VSELECT(r, r+r, a);
1022610230 R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel,
1023010234
1023110235 // Decompose 256-bit shifts into smaller 128-bit shifts.
1023210236 if (VT.getSizeInBits() == 256) {
10233 int NumElems = VT.getVectorNumElements();
10237 unsigned NumElems = VT.getVectorNumElements();
1023410238 MVT EltVT = VT.getVectorElementType().getSimpleVT();
1023510239 EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
1023610240
1024510249 // Constant shift amount
1024610250 SmallVector Amt1Csts;
1024710251 SmallVector Amt2Csts;
10248 for (int i = 0; i < NumElems/2; ++i)
10252 for (unsigned i = 0; i != NumElems/2; ++i)
1024910253 Amt1Csts.push_back(Amt->getOperand(i));
10250 for (int i = NumElems/2; i < NumElems; ++i)
10254 for (unsigned i = NumElems/2; i != NumElems; ++i)
1025110255 Amt2Csts.push_back(Amt->getOperand(i));
1025210256
1025310257 Amt1 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT,
1035310357 EVT ExtraVT = cast(Op.getOperand(1))->getVT();
1035410358 EVT VT = Op.getValueType();
1035510359
10356 if (Subtarget->hasSSE2() && VT.isVector()) {
10357 unsigned BitsDiff = VT.getScalarType().getSizeInBits() -
10358 ExtraVT.getScalarType().getSizeInBits();
10359 SDValue ShAmt = DAG.getConstant(BitsDiff, MVT::i32);
10360
10361 unsigned SHLIntrinsicsID = 0;
10362 unsigned SRAIntrinsicsID = 0;
10363 switch (VT.getSimpleVT().SimpleTy) {
10364 default:
10360 if (!Subtarget->hasSSE2() || !VT.isVector())
10361 return SDValue();
10362
10363 unsigned BitsDiff = VT.getScalarType().getSizeInBits() -
10364 ExtraVT.getScalarType().getSizeInBits();
10365 SDValue ShAmt = DAG.getConstant(BitsDiff, MVT::i32);
10366
10367 switch (VT.getSimpleVT().SimpleTy) {
10368 default: return SDValue();
10369 case MVT::v8i32:
10370 case MVT::v16i16:
10371 if (!Subtarget->hasAVX())
1036510372 return SDValue();
10366 case MVT::v4i32:
10367 SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_d;
10368 SRAIntrinsicsID = Intrinsic::x86_sse2_psrai_d;
10369 break;
10370 case MVT::v8i16:
10371 SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_w;
10372 SRAIntrinsicsID = Intrinsic::x86_sse2_psrai_w;
10373 break;
10374 case MVT::v8i32:
10375 case MVT::v16i16:
10376 if (!Subtarget->hasAVX())
10377 return SDValue();
10378 if (!Subtarget->hasAVX2()) {
10379 // needs to be split
10380 int NumElems = VT.getVectorNumElements();
10381 SDValue Idx0 = DAG.getConstant(0, MVT::i32);
10382 SDValue Idx1 = DAG.getConstant(NumElems/2, MVT::i32);
10383
10384 // Extract the LHS vectors
10385 SDValue LHS = Op.getOperand(0);
10386 SDValue LHS1 = Extract128BitVector(LHS, Idx0, DAG, dl);
10387 SDValue LHS2 = Extract128BitVector(LHS, Idx1, DAG, dl);
10388
10389 MVT EltVT = VT.getVectorElementType().getSimpleVT();
10390 EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
10391
10392 EVT ExtraEltVT = ExtraVT.getVectorElementType();
10393 int ExtraNumElems = ExtraVT.getVectorNumElements();
10394 ExtraVT = EVT::getVectorVT(*DAG.getContext(), ExtraEltVT,
10395 ExtraNumElems/2);
10396 SDValue Extra = DAG.getValueType(ExtraVT);
10397
10398 LHS1 = DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, Extra);
10399 LHS2 = DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, Extra);
10400
10401 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, LHS1, LHS2);;
10402 }
10403 if (VT == MVT::v8i32) {
10404 SHLIntrinsicsID = Intrinsic::x86_avx2_pslli_d;
10405 SRAIntrinsicsID = Intrinsic::x86_avx2_psrai_d;
10406 } else {
10407 SHLIntrinsicsID = Intrinsic::x86_avx2_pslli_w;
10408 SRAIntrinsicsID = Intrinsic::x86_avx2_psrai_w;
10409 }
10410 }
10411
10412 SDValue Tmp1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
10413 DAG.getConstant(SHLIntrinsicsID, MVT::i32),
10414 Op.getOperand(0), ShAmt);
10415
10416 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
10417 DAG.getConstant(SRAIntrinsicsID, MVT::i32),
10418 Tmp1, ShAmt);
10419 }
10420
10421 return SDValue();
10373 if (!Subtarget->hasAVX2()) {
10374 // needs to be split
10375 int NumElems = VT.getVectorNumElements();
10376 SDValue Idx0 = DAG.getConstant(0, MVT::i32);
10377 SDValue Idx1 = DAG.getConstant(NumElems/2, MVT::i32);
10378
10379 // Extract the LHS vectors
10380 SDValue LHS = Op.getOperand(0);
10381 SDValue LHS1 = Extract128BitVector(LHS, Idx0, DAG, dl);
10382 SDValue LHS2 = Extract128BitVector(LHS, Idx1, DAG, dl);
10383
10384 MVT EltVT = VT.getVectorElementType().getSimpleVT();
10385 EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
10386
10387 EVT ExtraEltVT = ExtraVT.getVectorElementType();
10388 int ExtraNumElems = ExtraVT.getVectorNumElements();
10389 ExtraVT = EVT::getVectorVT(*DAG.getContext(), ExtraEltVT,
10390 ExtraNumElems/2);
10391 SDValue Extra = DAG.getValueType(ExtraVT);
10392
10393 LHS1 = DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, Extra);
10394 LHS2 = DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, Extra);
10395
10396 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, LHS1, LHS2);;
10397 }
10398 // fall through
10399 case MVT::v4i32:
10400 case MVT::v8i16: {
10401 SDValue Tmp1 = getTargetVShiftNode(X86ISD::VSHLI, dl, VT,
10402 Op.getOperand(0), ShAmt, DAG);
10403 return getTargetVShiftNode(X86ISD::VSRAI, dl, VT, Tmp1, ShAmt, DAG);
10404 }
10405 }
1042210406 }
1042310407
1042410408
1095010934 case X86ISD::ATOMNAND64_DAG: return "X86ISD::ATOMNAND64_DAG";
1095110935 case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL";
1095210936 case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
10937 case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ";
10938 case X86ISD::VSRLDQ: return "X86ISD::VSRLDQ";
1095310939 case X86ISD::VSHL: return "X86ISD::VSHL";
1095410940 case X86ISD::VSRL: return "X86ISD::VSRL";
10941 case X86ISD::VSRA: return "X86ISD::VSRA";
10942 case X86ISD::VSHLI: return "X86ISD::VSHLI";
10943 case X86ISD::VSRLI: return "X86ISD::VSRLI";
10944 case X86ISD::VSRAI: return "X86ISD::VSRAI";
1095510945 case X86ISD::CMPPD: return "X86ISD::CMPPD";
1095610946 case X86ISD::CMPPS: return "X86ISD::CMPPS";
1095710947 case X86ISD::PCMPEQB: return "X86ISD::PCMPEQB";
1348413474 default:
1348513475 llvm_unreachable("Unknown shift opcode!");
1348613476 case ISD::SHL:
13487 if (VT == MVT::v2i64)
13488 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
13489 DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32),
13490 ValOp, BaseShAmt);
13491 if (VT == MVT::v4i32)
13492 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
13493 DAG.getConstant(Intrinsic::x86_sse2_pslli_d, MVT::i32),
13494 ValOp, BaseShAmt);
13495 if (VT == MVT::v8i16)
13496 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
13497 DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32),
13498 ValOp, BaseShAmt);
13499 if (VT == MVT::v4i64)
13500 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
13501 DAG.getConstant(Intrinsic::x86_avx2_pslli_q, MVT::i32),
13502 ValOp, BaseShAmt);
13503 if (VT == MVT::v8i32)
13504 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
13505 DAG.getConstant(Intrinsic::x86_avx2_pslli_d, MVT::i32),
13506 ValOp, BaseShAmt);
13507 if (VT == MVT::v16i16)
13508 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
13509 DAG.getConstant(Intrinsic::x86_avx2_pslli_w, MVT::i32),
13510 ValOp, BaseShAmt);
13511 break;
13477 switch (VT.getSimpleVT().SimpleTy) {
13478 default: return SDValue();
13479 case MVT::v2i64:
13480 case MVT::v4i32:
13481 case MVT::v8i16:
13482 case MVT::v4i64:
13483 case MVT::v8i32:
13484 case MVT::v16i16:
13485 return getTargetVShiftNode(X86ISD::VSHLI, DL, VT, ValOp, BaseShAmt, DAG);
13486 }
1351213487 case ISD::SRA:
13513 if (VT == MVT::v4i32)
13514 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
13515 DAG.getConstant(Intrinsic::x86_sse2_psrai_d, MVT::i32),
13516 ValOp, BaseShAmt);
13517 if (VT == MVT::v8i16)
13518 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
13519 DAG.getConstant(Intrinsic::x86_sse2_psrai_w, MVT::i32),
13520 ValOp, BaseShAmt);
13521 if (VT == MVT::v8i32)
13522 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
13523 DAG.getConstant(Intrinsic::x86_avx2_psrai_d, MVT::i32),
13524 ValOp, BaseShAmt);
13525 if (VT == MVT::v16i16)
13526 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
13527 DAG.getConstant(Intrinsic::x86_avx2_psrai_w, MVT::i32),
13528 ValOp, BaseShAmt);
13529 break;
13488 switch (VT.getSimpleVT().SimpleTy) {
13489 default: return SDValue();
13490 case MVT::v4i32:
13491 case MVT::v8i16:
13492 case MVT::v8i32:
13493 case MVT::v16i16:
13494 return getTargetVShiftNode(X86ISD::VSRAI, DL, VT, ValOp, BaseShAmt, DAG);
13495 }
1353013496 case ISD::SRL:
13531 if (VT == MVT::v2i64)
13532 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
13533 DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),
13534 ValOp, BaseShAmt);
13535 if (VT == MVT::v4i32)
13536 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
13537 DAG.getConstant(Intrinsic::x86_sse2_psrli_d, MVT::i32),
13538 ValOp, BaseShAmt);
13539 if (VT == MVT::v8i16)
13540 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
13541 DAG.getConstant(Intrinsic::x86_sse2_psrli_w, MVT::i32),
13542 ValOp, BaseShAmt);
13543 if (VT == MVT::v4i64)
13544 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
13545 DAG.getConstant(Intrinsic::x86_avx2_psrli_q, MVT::i32),
13546 ValOp, BaseShAmt);
13547 if (VT == MVT::v8i32)
13548 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
13549 DAG.getConstant(Intrinsic::x86_avx2_psrli_d, MVT::i32),
13550 ValOp, BaseShAmt);
13551 if (VT == MVT::v16i16)
13552 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
13553 DAG.getConstant(Intrinsic::x86_avx2_psrli_w, MVT::i32),
13554 ValOp, BaseShAmt);
13555 break;
13556 }
13557 return SDValue();
13497 switch (VT.getSimpleVT().SimpleTy) {
13498 default: return SDValue();
13499 case MVT::v2i64:
13500 case MVT::v4i32:
13501 case MVT::v8i16:
13502 case MVT::v4i64:
13503 case MVT::v8i32:
13504 case MVT::v16i16:
13505 return getTargetVShiftNode(X86ISD::VSRLI, DL, VT, ValOp, BaseShAmt, DAG);
13506 }
13507 }
1355813508 }
1355913509
1356013510
1379013740 Mask = Mask.getOperand(0);
1379113741 EVT MaskVT = Mask.getValueType();
1379213742
13793 // Validate that the Mask operand is a vector sra node. The sra node
13794 // will be an intrinsic.
13795 if (Mask.getOpcode() != ISD::INTRINSIC_WO_CHAIN)
13796 return SDValue();
13797
13743 // Validate that the Mask operand is a vector sra node.
1379813744 // FIXME: what to do for bytes, since there is a psignb/pblendvb, but
1379913745 // there is no psrai.b
13800 switch (cast(Mask.getOperand(0))->getZExtValue()) {
13801 case Intrinsic::x86_sse2_psrai_w:
13802 case Intrinsic::x86_sse2_psrai_d:
13803 case Intrinsic::x86_avx2_psrai_w:
13804 case Intrinsic::x86_avx2_psrai_d:
13805 break;
13806 default: return SDValue();
13807 }
13746 SDValue SraSrc, SraC;
13747 if (Mask.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
13748 switch (cast(Mask.getOperand(0))->getZExtValue()) {
13749 case Intrinsic::x86_sse2_psrai_w:
13750 case Intrinsic::x86_sse2_psrai_d:
13751 case Intrinsic::x86_avx2_psrai_w:
13752 case Intrinsic::x86_avx2_psrai_d:
13753 break;
13754 default: return SDValue();
13755 }
13756
13757 SraSrc = Mask.getOperand(1);
13758 SraC = Mask.getOperand(2);
13759 } else if (Mask.getOpcode() == X86ISD::VSRAI) {
13760 SraSrc = Mask.getOperand(0);
13761 SraC = Mask.getOperand(1);
13762 } else
13763 return SDValue();
1380813764
1380913765 // Check that the SRA is all signbits.
13810 SDValue SraC = Mask.getOperand(2);
1381113766 unsigned SraAmt = cast(SraC)->getZExtValue();
1381213767 unsigned EltBits = MaskVT.getVectorElementType().getSizeInBits();
1381313768 if ((SraAmt + 1) != EltBits)
1382213777 Y = Y.getOperand(0);
1382313778 if (Y.getOpcode() == ISD::SUB && Y.getOperand(1) == X &&
1382413779 ISD::isBuildVectorAllZeros(Y.getOperand(0).getNode()) &&
13825 X.getValueType() == MaskVT && X.getValueType() == Y.getValueType() &&
13826 (EltBits == 8 || EltBits == 16 || EltBits == 32)) {
13827 SDValue Sign = DAG.getNode(X86ISD::PSIGN, DL, MaskVT, X,
13828 Mask.getOperand(1));
13829 return DAG.getNode(ISD::BITCAST, DL, VT, Sign);
13780 X.getValueType() == MaskVT && Y.getValueType() == MaskVT) {
13781 assert((EltBits == 8 || EltBits == 16 || EltBits == 32) &&
13782 "Unsupported VT for PSIGN");
13783 Mask = DAG.getNode(X86ISD::PSIGN, DL, MaskVT, X, SraSrc);
13784 return DAG.getNode(ISD::BITCAST, DL, VT, Mask);
1383013785 }
1383113786 // PBLENDVB only available on SSE 4.1
1383213787 if (!Subtarget->hasSSE41())
218218 // VZEXT_MOVL - Vector move low and zero extend.
219219 VZEXT_MOVL,
220220
221 // VSHL, VSRL - Vector logical left / right shift.
222 VSHL, VSRL,
221 // VSHL, VSRL - 128-bit vector logical left / right shift
222 VSHLDQ, VSRLDQ,
223
224 // VSHL, VSRL, VSRA - Vector shift elements
225 VSHL, VSRL, VSRA,
226
227 // VSHLI, VSRLI, VSRAI - Vector shift elements by immediate
228 VSHLI, VSRLI, VSRAI,
223229
224230 // CMPPD, CMPPS - Vector double/float comparison.
225231 // CMPPD, CMPPS - Vector double/float comparison.
7272 SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
7373 def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
7474 [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
75 def X86vshl : SDNode<"X86ISD::VSHL", SDTIntShiftOp>;
76 def X86vshr : SDNode<"X86ISD::VSRL", SDTIntShiftOp>;
75 def X86vshldq : SDNode<"X86ISD::VSHLDQ", SDTIntShiftOp>;
76 def X86vshrdq : SDNode<"X86ISD::VSRLDQ", SDTIntShiftOp>;
7777 def X86cmpps : SDNode<"X86ISD::CMPPS", SDTX86VFCMP>;
7878 def X86cmppd : SDNode<"X86ISD::CMPPD", SDTX86VFCMP>;
7979 def X86pcmpeqb : SDNode<"X86ISD::PCMPEQB", SDTIntBinOp, [SDNPCommutative]>;
8484 def X86pcmpgtw : SDNode<"X86ISD::PCMPGTW", SDTIntBinOp>;
8585 def X86pcmpgtd : SDNode<"X86ISD::PCMPGTD", SDTIntBinOp>;
8686 def X86pcmpgtq : SDNode<"X86ISD::PCMPGTQ", SDTIntBinOp>;
87
88 def X86vshl : SDNode<"X86ISD::VSHL",
89 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
90 SDTCisVec<2>]>>;
91 def X86vsrl : SDNode<"X86ISD::VSRL",
92 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
93 SDTCisVec<2>]>>;
94 def X86vsra : SDNode<"X86ISD::VSRA",
95 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
96 SDTCisVec<2>]>>;
97
98 def X86vshli : SDNode<"X86ISD::VSHLI", SDTIntShiftOp>;
99 def X86vsrli : SDNode<"X86ISD::VSRLI", SDTIntShiftOp>;
100 def X86vsrai : SDNode<"X86ISD::VSRAI", SDTIntShiftOp>;
87101
88102 def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
89103 SDTCisVec<1>,
38833883 (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
38843884
38853885 // Shift up / down and insert zero's.
3886 def : Pat<(v2i64 (X86vshl VR128:$src, (i8 imm:$amt))),
3886 def : Pat<(v2i64 (X86vshldq VR128:$src, (i8 imm:$amt))),
38873887 (VPSLLDQri VR128:$src, (BYTE_imm imm:$amt))>;
3888 def : Pat<(v2i64 (X86vshr VR128:$src, (i8 imm:$amt))),
3888 def : Pat<(v2i64 (X86vshrdq VR128:$src, (i8 imm:$amt))),
38893889 (VPSRLDQri VR128:$src, (BYTE_imm imm:$amt))>;
3890
3891 def : Pat<(v8i16 (X86vshli VR128:$src1, (i32 imm:$src2))),
3892 (VPSLLWri VR128:$src1, imm:$src2)>;
3893 def : Pat<(v4i32 (X86vshli VR128:$src1, (i32 imm:$src2))),
3894 (VPSLLDri VR128:$src1, imm:$src2)>;
3895 def : Pat<(v2i64 (X86vshli VR128:$src1, (i32 imm:$src2))),
3896 (VPSLLQri VR128:$src1, imm:$src2)>;
3897
3898 def : Pat<(v8i16 (X86vsrli VR128:$src1, (i32 imm:$src2))),
3899 (VPSRLWri VR128:$src1, imm:$src2)>;
3900 def : Pat<(v4i32 (X86vsrli VR128:$src1, (i32 imm:$src2))),
3901 (VPSRLDri VR128:$src1, imm:$src2)>;
3902 def : Pat<(v2i64 (X86vsrli VR128:$src1, (i32 imm:$src2))),
3903 (VPSRLQri VR128:$src1, imm:$src2)>;
3904
3905 def : Pat<(v8i16 (X86vsrai VR128:$src1, (i32 imm:$src2))),
3906 (VPSRAWri VR128:$src1, imm:$src2)>;
3907 def : Pat<(v4i32 (X86vsrai VR128:$src1, (i32 imm:$src2))),
3908 (VPSRADri VR128:$src1, imm:$src2)>;
3909
3910 def : Pat<(v8i16 (X86vshl VR128:$src1, (v8i16 VR128:$src2))),
3911 (VPSLLWrr VR128:$src1, VR128:$src2)>;
3912 def : Pat<(v8i16 (X86vshl VR128:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))),
3913 (VPSLLWrm VR128:$src1, addr:$src2)>;
3914 def : Pat<(v4i32 (X86vshl VR128:$src1, (v4i32 VR128:$src2))),
3915 (VPSLLDrr VR128:$src1, VR128:$src2)>;
3916 def : Pat<(v4i32 (X86vshl VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
3917 (VPSLLDrm VR128:$src1, addr:$src2)>;
3918 def : Pat<(v2i64 (X86vshl VR128:$src1, (v2i64 VR128:$src2))),
3919 (VPSLLQrr VR128:$src1, VR128:$src2)>;
3920 def : Pat<(v2i64 (X86vshl VR128:$src1, (memopv2i64 addr:$src2))),
3921 (VPSLLQrm VR128:$src1, addr:$src2)>;
3922
3923 def : Pat<(v8i16 (X86vsrl VR128:$src1, (v8i16 VR128:$src2))),
3924 (VPSRLWrr VR128:$src1, VR128:$src2)>;
3925 def : Pat<(v8i16 (X86vsrl VR128:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))),
3926 (VPSRLWrm VR128:$src1, addr:$src2)>;
3927 def : Pat<(v4i32 (X86vsrl VR128:$src1, (v4i32 VR128:$src2))),
3928 (VPSRLDrr VR128:$src1, VR128:$src2)>;
3929 def : Pat<(v4i32 (X86vsrl VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
3930 (VPSRLDrm VR128:$src1, addr:$src2)>;
3931 def : Pat<(v2i64 (X86vsrl VR128:$src1, (v2i64 VR128:$src2))),
3932 (VPSRLQrr VR128:$src1, VR128:$src2)>;
3933 def : Pat<(v2i64 (X86vsrl VR128:$src1, (memopv2i64 addr:$src2))),
3934 (VPSRLQrm VR128:$src1, addr:$src2)>;
3935
3936 def : Pat<(v8i16 (X86vsra VR128:$src1, (v8i16 VR128:$src2))),
3937 (VPSRAWrr VR128:$src1, VR128:$src2)>;
3938 def : Pat<(v8i16 (X86vsra VR128:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))),
3939 (VPSRAWrm VR128:$src1, addr:$src2)>;
3940 def : Pat<(v4i32 (X86vsra VR128:$src1, (v4i32 VR128:$src2))),
3941 (VPSRADrr VR128:$src1, VR128:$src2)>;
3942 def : Pat<(v4i32 (X86vsra VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
3943 (VPSRADrm VR128:$src1, addr:$src2)>;
38903944 }
38913945
38923946 let Predicates = [HasAVX2] in {
38943948 (VPSLLDQYri VR256:$src1, (BYTE_imm imm:$src2))>;
38953949 def : Pat<(int_x86_avx2_psrl_dq VR256:$src1, imm:$src2),
38963950 (VPSRLDQYri VR256:$src1, (BYTE_imm imm:$src2))>;
3951
3952 def : Pat<(v16i16 (X86vshli VR256:$src1, (i32 imm:$src2))),
3953 (VPSLLWYri VR256:$src1, imm:$src2)>;
3954 def : Pat<(v8i32 (X86vshli VR256:$src1, (i32 imm:$src2))),
3955 (VPSLLDYri VR256:$src1, imm:$src2)>;
3956 def : Pat<(v4i64 (X86vshli VR256:$src1, (i32 imm:$src2))),
3957 (VPSLLQYri VR256:$src1, imm:$src2)>;
3958
3959 def : Pat<(v16i16 (X86vsrli VR256:$src1, (i32 imm:$src2))),
3960 (VPSRLWYri VR256:$src1, imm:$src2)>;
3961 def : Pat<(v8i32 (X86vsrli VR256:$src1, (i32 imm:$src2))),
3962 (VPSRLDYri VR256:$src1, imm:$src2)>;
3963 def : Pat<(v4i64 (X86vsrli VR256:$src1, (i32 imm:$src2))),
3964 (VPSRLQYri VR256:$src1, imm:$src2)>;
3965
3966 def : Pat<(v16i16 (X86vsrai VR256:$src1, (i32 imm:$src2))),
3967 (VPSRAWYri VR256:$src1, imm:$src2)>;
3968 def : Pat<(v8i32 (X86vsrai VR256:$src1, (i32 imm:$src2))),
3969 (VPSRADYri VR256:$src1, imm:$src2)>;
3970
3971 def : Pat<(v16i16 (X86vshl VR256:$src1, (v8i16 VR128:$src2))),
3972 (VPSLLWYrr VR256:$src1, VR128:$src2)>;
3973 def : Pat<(v16i16 (X86vshl VR256:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))),
3974 (VPSLLWYrm VR256:$src1, addr:$src2)>;
3975 def : Pat<(v8i32 (X86vshl VR256:$src1, (v4i32 VR128:$src2))),
3976 (VPSLLDYrr VR256:$src1, VR128:$src2)>;
3977 def : Pat<(v8i32 (X86vshl VR256:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
3978 (VPSLLDYrm VR256:$src1, addr:$src2)>;
3979 def : Pat<(v4i64 (X86vshl VR256:$src1, (v2i64 VR128:$src2))),
3980 (VPSLLQYrr VR256:$src1, VR128:$src2)>;
3981 def : Pat<(v4i64 (X86vshl VR256:$src1, (memopv2i64 addr:$src2))),
3982 (VPSLLQYrm VR256:$src1, addr:$src2)>;
3983
3984 def : Pat<(v16i16 (X86vsrl VR256:$src1, (v8i16 VR128:$src2))),
3985 (VPSRLWYrr VR256:$src1, VR128:$src2)>;
3986 def : Pat<(v16i16 (X86vsrl VR256:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))),
3987 (VPSRLWYrm VR256:$src1, addr:$src2)>;
3988 def : Pat<(v8i32 (X86vsrl VR256:$src1, (v4i32 VR128:$src2))),
3989 (VPSRLDYrr VR256:$src1, VR128:$src2)>;
3990 def : Pat<(v8i32 (X86vsrl VR256:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
3991 (VPSRLDYrm VR256:$src1, addr:$src2)>;
3992 def : Pat<(v4i64 (X86vsrl VR256:$src1, (v2i64 VR128:$src2))),
3993 (VPSRLQYrr VR256:$src1, VR128:$src2)>;
3994 def : Pat<(v4i64 (X86vsrl VR256:$src1, (memopv2i64 addr:$src2))),
3995 (VPSRLQYrm VR256:$src1, addr:$src2)>;
3996
3997 def : Pat<(v16i16 (X86vsra VR256:$src1, (v8i16 VR128:$src2))),
3998 (VPSRAWYrr VR256:$src1, VR128:$src2)>;
3999 def : Pat<(v16i16 (X86vsra VR256:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))),
4000 (VPSRAWYrm VR256:$src1, addr:$src2)>;
4001 def : Pat<(v8i32 (X86vsra VR256:$src1, (v4i32 VR128:$src2))),
4002 (VPSRADYrr VR256:$src1, VR128:$src2)>;
4003 def : Pat<(v8i32 (X86vsra VR256:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
4004 (VPSRADYrm VR256:$src1, addr:$src2)>;
38974005 }
38984006
38994007 let Predicates = [HasSSE2] in {
39054013 (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
39064014
39074015 // Shift up / down and insert zero's.
3908 def : Pat<(v2i64 (X86vshl VR128:$src, (i8 imm:$amt))),
4016 def : Pat<(v2i64 (X86vshldq VR128:$src, (i8 imm:$amt))),
39094017 (PSLLDQri VR128:$src, (BYTE_imm imm:$amt))>;
3910 def : Pat<(v2i64 (X86vshr VR128:$src, (i8 imm:$amt))),
4018 def : Pat<(v2i64 (X86vshrdq VR128:$src, (i8 imm:$amt))),
39114019 (PSRLDQri VR128:$src, (BYTE_imm imm:$amt))>;
4020
4021 def : Pat<(v8i16 (X86vshli VR128:$src1, (i32 imm:$src2))),
4022 (PSLLWri VR128:$src1, imm:$src2)>;
4023 def : Pat<(v4i32 (X86vshli VR128:$src1, (i32 imm:$src2))),
4024 (PSLLDri VR128:$src1, imm:$src2)>;
4025 def : Pat<(v2i64 (X86vshli VR128:$src1, (i32 imm:$src2))),
4026 (PSLLQri VR128:$src1, imm:$src2)>;
4027
4028 def : Pat<(v8i16 (X86vsrli VR128:$src1, (i32 imm:$src2))),
4029 (PSRLWri VR128:$src1, imm:$src2)>;
4030 def : Pat<(v4i32 (X86vsrli VR128:$src1, (i32 imm:$src2))),
4031 (PSRLDri VR128:$src1, imm:$src2)>;
4032 def : Pat<(v2i64 (X86vsrli VR128:$src1, (i32 imm:$src2))),
4033 (PSRLQri VR128:$src1, imm:$src2)>;
4034
4035 def : Pat<(v8i16 (X86vsrai VR128:$src1, (i32 imm:$src2))),
4036 (PSRAWri VR128:$src1, imm:$src2)>;
4037 def : Pat<(v4i32 (X86vsrai VR128:$src1, (i32 imm:$src2))),
4038 (PSRADri VR128:$src1, imm:$src2)>;
4039
4040 def : Pat<(v8i16 (X86vshl VR128:$src1, (v8i16 VR128:$src2))),
4041 (PSLLWrr VR128:$src1, VR128:$src2)>;
4042 def : Pat<(v8i16 (X86vshl VR128:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))),
4043 (PSLLWrm VR128:$src1, addr:$src2)>;
4044 def : Pat<(v4i32 (X86vshl VR128:$src1, (v4i32 VR128:$src2))),
4045 (PSLLDrr VR128:$src1, VR128:$src2)>;
4046 def : Pat<(v4i32 (X86vshl VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
4047 (PSLLDrm VR128:$src1, addr:$src2)>;
4048 def : Pat<(v2i64 (X86vshl VR128:$src1, (v2i64 VR128:$src2))),
4049 (PSLLQrr VR128:$src1, VR128:$src2)>;
4050 def : Pat<(v2i64 (X86vshl VR128:$src1, (memopv2i64 addr:$src2))),
4051 (PSLLQrm VR128:$src1, addr:$src2)>;
4052
4053 def : Pat<(v8i16 (X86vsrl VR128:$src1, (v8i16 VR128:$src2))),
4054 (PSRLWrr VR128:$src1, VR128:$src2)>;
4055 def : Pat<(v8i16 (X86vsrl VR128:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))),
4056 (PSRLWrm VR128:$src1, addr:$src2)>;
4057 def : Pat<(v4i32 (X86vsrl VR128:$src1, (v4i32 VR128:$src2))),
4058 (PSRLDrr VR128:$src1, VR128:$src2)>;
4059 def : Pat<(v4i32 (X86vsrl VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
4060 (PSRLDrm VR128:$src1, addr:$src2)>;
4061 def : Pat<(v2i64 (X86vsrl VR128:$src1, (v2i64 VR128:$src2))),
4062 (PSRLQrr VR128:$src1, VR128:$src2)>;
4063 def : Pat<(v2i64 (X86vsrl VR128:$src1, (memopv2i64 addr:$src2))),
4064 (PSRLQrm VR128:$src1, addr:$src2)>;
4065
4066 def : Pat<(v8i16 (X86vsra VR128:$src1, (v8i16 VR128:$src2))),
4067 (PSRAWrr VR128:$src1, VR128:$src2)>;
4068 def : Pat<(v8i16 (X86vsra VR128:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))),
4069 (PSRAWrm VR128:$src1, addr:$src2)>;
4070 def : Pat<(v4i32 (X86vsra VR128:$src1, (v4i32 VR128:$src2))),
4071 (PSRADrr VR128:$src1, VR128:$src2)>;
4072 def : Pat<(v4i32 (X86vsra VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
4073 (PSRADrm VR128:$src1, addr:$src2)>;
39124074 }
39134075
39144076 //===---------------------------------------------------------------------===//