llvm.org GIT mirror llvm / 1184a94
[X86] Make v2i1 and v4i1 legal types without VLX Summary: There are few oddities that occur due to v1i1, v8i1, v16i1 being legal without v2i1 and v4i1 being legal when we don't have VLX. Particularly during legalization of v2i32/v4i32/v2i64/v4i64 masked gather/scatter/load/store. We end up promoting the mask argument to these during type legalization and then have to widen the promoted type to v8iX/v16iX and truncate it to get the element size back down to v8i1/v16i1 to use a 512-bit operation. Since need to fill the upper bits of the mask we have to fill with 0s at the promoted type. It would be better if we could just have the v2i1/v4i1 types as legal so they don't undergo any promotion. Then we can just widen with 0s directly in a k register. There are no real v4i1/v2i1 instructions anyway. Everything is done on a larger register anyway. This also fixes an issue that we couldn't implement a masked vextractf32x4 from zmm to xmm properly. We now have to support widening more compares to 512-bit to get a mask result out so new tablegen patterns got added. I had to hack the legalizer for widening the operand of a setcc a bit so it didn't try create a setcc returning v4i32, extract from it, then try to promote it using a sign extend to v2i1. Now we create the setcc with v4i1 if the original setcc's result type is v2i1. Then extract that and don't sign extend it at all. There's definitely room for improvement with some follow up patches. Reviewers: RKSimon, zvi, guyblank Reviewed By: RKSimon Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D41560 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@321967 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 2 years ago
23 changed file(s) with 5200 addition(s) and 8742 deletion(s). Raw diff Collapse all Expand all
36153615 SDValue InOp0 = GetWidenedVector(N->getOperand(0));
36163616 SDValue InOp1 = GetWidenedVector(N->getOperand(1));
36173617 SDLoc dl(N);
3618 EVT VT = N->getValueType(0);
36183619
36193620 // WARNING: In this code we widen the compare instruction with garbage.
36203621 // This garbage may contain denormal floats which may be slow. Is this a real
36243625 // Only some of the compared elements are legal.
36253626 EVT SVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
36263627 InOp0.getValueType());
3628 // The result type is legal, if its vXi1, keep vXi1 for the new SETCC.
3629 if (VT.getScalarType() == MVT::i1)
3630 SVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
3631 SVT.getVectorNumElements());
3632
36273633 SDValue WideSETCC = DAG.getNode(ISD::SETCC, SDLoc(N),
3628 SVT, InOp0, InOp1, N->getOperand(2));
3634 SVT, InOp0, InOp1, N->getOperand(2));
36293635
36303636 // Extract the needed results from the result vector.
36313637 EVT ResVT = EVT::getVectorVT(*DAG.getContext(),
36323638 SVT.getVectorElementType(),
3633 N->getValueType(0).getVectorNumElements());
3639 VT.getVectorNumElements());
36343640 SDValue CC = DAG.getNode(
36353641 ISD::EXTRACT_SUBVECTOR, dl, ResVT, WideSETCC,
36363642 DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
36373643
3638 return PromoteTargetBoolean(CC, N->getValueType(0));
3644 return PromoteTargetBoolean(CC, VT);
36393645 }
36403646
36413647
459459 // this happens we will use 512-bit operations and the mask will not be
460460 // zero extended.
461461 EVT OpVT = N->getOperand(0).getValueType();
462 if (OpVT == MVT::v8i32 || OpVT == MVT::v8f32)
462 if (OpVT.is256BitVector() || OpVT.is128BitVector())
463463 return Subtarget->hasVLX();
464464
465465 return true;
11431143 addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
11441144
11451145 addRegisterClass(MVT::v1i1, &X86::VK1RegClass);
1146 addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
1147 addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
11461148 addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
11471149 addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
11481150
11701172 setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom);
11711173 }
11721174
1173 // Extends of v16i1/v8i1 to 128-bit vectors.
1174 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i8, Custom);
1175 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i8, Custom);
1176 setOperationAction(ISD::ANY_EXTEND, MVT::v16i8, Custom);
1177 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i16, Custom);
1178 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i16, Custom);
1179 setOperationAction(ISD::ANY_EXTEND, MVT::v8i16, Custom);
1180
1181 for (auto VT : { MVT::v8i1, MVT::v16i1 }) {
1175 // Extends of v16i1/v8i1/v4i1/v2i1 to 128-bit vectors.
1176 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1177 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1178 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1179 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1180 }
1181
1182 for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) {
11821183 setOperationAction(ISD::ADD, VT, Custom);
11831184 setOperationAction(ISD::SUB, VT, Custom);
11841185 setOperationAction(ISD::MUL, VT, Custom);
11941195 }
11951196
11961197 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Custom);
1198 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom);
1199 setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i1, Custom);
1200 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i1, Custom);
11971201 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Custom);
11981202 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16i1, Custom);
1199 for (auto VT : { MVT::v1i1, MVT::v8i1 })
1203 for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 })
12001204 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
12011205
12021206 for (MVT VT : MVT::fp_vector_valuetypes())
15271531 }
15281532
15291533 if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
1530 addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
1531 addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
1532
1533 for (auto VT : { MVT::v2i1, MVT::v4i1 }) {
1534 setOperationAction(ISD::ADD, VT, Custom);
1535 setOperationAction(ISD::SUB, VT, Custom);
1536 setOperationAction(ISD::MUL, VT, Custom);
1537 setOperationAction(ISD::VSELECT, VT, Expand);
1538
1539 setOperationAction(ISD::TRUNCATE, VT, Custom);
1540 setOperationAction(ISD::SETCC, VT, Custom);
1541 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1542 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1543 setOperationAction(ISD::SELECT, VT, Custom);
1544 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1545 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1546 }
1547
1548 // TODO: v8i1 concat should be legal without VLX to support concats of
1549 // v1i1, but we won't legalize it correctly currently without introducing
1550 // a v4i1 concat in the middle.
1551 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom);
1552 setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i1, Custom);
1553 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i1, Custom);
1554 for (auto VT : { MVT::v2i1, MVT::v4i1 })
1555 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1556
1557 // Extends from v2i1/v4i1 masks to 128-bit vectors.
1558 setOperationAction(ISD::ZERO_EXTEND, MVT::v4i32, Custom);
1559 setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Custom);
1560 setOperationAction(ISD::SIGN_EXTEND, MVT::v4i32, Custom);
1561 setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Custom);
1562 setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Custom);
1563 setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Custom);
1564
15651534 setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
15661535 setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
15671536 setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
49434912 Vec = DAG.getConstantFP(+0.0, dl, MVT::v4f32);
49444913 } else if (VT.getVectorElementType() == MVT::i1) {
49454914 assert((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&
4946 "Unexpected vector type");
4947 assert((Subtarget.hasVLX() || VT.getVectorNumElements() >= 8) &&
49484915 "Unexpected vector type");
49494916 Vec = DAG.getConstant(0, dl, VT);
49504917 } else {
1777817745 assert(EltVT == MVT::f32 || EltVT == MVT::f64);
1777917746 #endif
1778017747
17748 // Custom widen MVT::v2f32 to prevent the default widening
17749 // from getting a result type of v4i32, extracting it to v2i32 and then
17750 // trying to sign extend that to v2i1.
17751 if (VT == MVT::v2i1 && Op1.getValueType() == MVT::v2f32) {
17752 Op0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Op0,
17753 DAG.getUNDEF(MVT::v2f32));
17754 Op1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Op1,
17755 DAG.getUNDEF(MVT::v2f32));
17756 SDValue NewOp = DAG.getNode(ISD::SETCC, dl, MVT::v4i1, Op0, Op1, CC);
17757 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i1, NewOp,
17758 DAG.getIntPtrConstant(0, dl));
17759 }
17760
1778117761 unsigned Opc;
1778217762 if (Subtarget.hasAVX512() && VT.getVectorElementType() == MVT::i1) {
1778317763 assert(VT.getVectorNumElements() <= 16);
2441624396
2441724397 // Mask
2441824398 // At this point we have promoted mask operand
24419 assert(MaskVT.getScalarSizeInBits() >= 32 && "unexpected mask type");
24420 MVT ExtMaskVT = MVT::getVectorVT(MaskVT.getScalarType(), NumElts);
24399 assert(MaskVT.getScalarType() == MVT::i1 && "unexpected mask type");
24400 MVT ExtMaskVT = MVT::getVectorVT(MVT::i1, NumElts);
2442124401 // Use the original mask here, do not modify the mask twice
2442224402 Mask = ExtendToType(N->getMask(), ExtMaskVT, DAG, true);
2442324403
2442624406 Src = ExtendToType(Src, NewVT, DAG);
2442724407 }
2442824408 }
24429 // If the mask is "wide" at this point - truncate it to i1 vector
24430 MVT BitMaskVT = MVT::getVectorVT(MVT::i1, NumElts);
24431 Mask = DAG.getNode(ISD::TRUNCATE, dl, BitMaskVT, Mask);
2443224409
2443324410 // The mask is killed by scatter, add it to the values
24434 SDVTList VTs = DAG.getVTList(BitMaskVT, MVT::Other);
24411 SDVTList VTs = DAG.getVTList(Mask.getValueType(), MVT::Other);
2443524412 SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index};
2443624413 SDValue NewScatter = DAG.getTargetMemSDNode(
2443724414 VTs, Ops, dl, N->getMemoryVT(), N->getMemOperand());
2445324430
2445424431 assert((!N->isExpandingLoad() || ScalarVT.getSizeInBits() >= 32) &&
2445524432 "Expanding masked load is supported for 32 and 64-bit types only!");
24456
24457 // 4x32, 4x64 and 2x64 vectors of non-expanding loads are legal regardless of
24458 // VLX. These types for exp-loads are handled here.
24459 if (!N->isExpandingLoad() && VT.getVectorNumElements() <= 4)
24460 return Op;
2446124433
2446224434 assert(Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() &&
2446324435 "Cannot lower masked load op.");
2447524447 Src0 = ExtendToType(Src0, WideDataVT, DAG);
2447624448
2447724449 // Mask element has to be i1.
24478 MVT MaskEltTy = Mask.getSimpleValueType().getScalarType();
24479 assert((MaskEltTy == MVT::i1 || VT.getVectorNumElements() <= 4) &&
24480 "We handle 4x32, 4x64 and 2x64 vectors only in this case");
24481
24482 MVT WideMaskVT = MVT::getVectorVT(MaskEltTy, NumEltsInWideVec);
24450 assert(Mask.getSimpleValueType().getScalarType() == MVT::i1 &&
24451 "Unexpected mask type");
24452
24453 MVT WideMaskVT = MVT::getVectorVT(MVT::i1, NumEltsInWideVec);
2448324454
2448424455 Mask = ExtendToType(Mask, WideMaskVT, DAG, true);
24485 if (MaskEltTy != MVT::i1)
24486 Mask = DAG.getNode(ISD::TRUNCATE, dl,
24487 MVT::getVectorVT(MVT::i1, NumEltsInWideVec), Mask);
2448824456 SDValue NewLoad = DAG.getMaskedLoad(WideDataVT, dl, N->getChain(),
2448924457 N->getBasePtr(), Mask, Src0,
2449024458 N->getMemoryVT(), N->getMemOperand(),
2451324481 assert((!N->isCompressingStore() || ScalarVT.getSizeInBits() >= 32) &&
2451424482 "Expanding masked load is supported for 32 and 64-bit types only!");
2451524483
24516 // 4x32 and 2x64 vectors of non-compressing stores are legal regardless to VLX.
24517 if (!N->isCompressingStore() && VT.getVectorNumElements() <= 4)
24518 return Op;
24519
2452024484 assert(Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() &&
2452124485 "Cannot lower masked store op.");
2452224486
2453124495 MVT WideDataVT = MVT::getVectorVT(ScalarVT, NumEltsInWideVec);
2453224496
2453324497 // Mask element has to be i1.
24534 MVT MaskEltTy = Mask.getSimpleValueType().getScalarType();
24535 assert((MaskEltTy == MVT::i1 || VT.getVectorNumElements() <= 4) &&
24536 "We handle 4x32, 4x64 and 2x64 vectors only in this case");
24537
24538 MVT WideMaskVT = MVT::getVectorVT(MaskEltTy, NumEltsInWideVec);
24498 assert(Mask.getSimpleValueType().getScalarType() == MVT::i1 &&
24499 "Unexpected mask type");
24500
24501 MVT WideMaskVT = MVT::getVectorVT(MVT::i1, NumEltsInWideVec);
2453924502
2454024503 DataToStore = ExtendToType(DataToStore, WideDataVT, DAG);
2454124504 Mask = ExtendToType(Mask, WideMaskVT, DAG, true);
24542 if (MaskEltTy != MVT::i1)
24543 Mask = DAG.getNode(ISD::TRUNCATE, dl,
24544 MVT::getVectorVT(MVT::i1, NumEltsInWideVec), Mask);
2454524505 return DAG.getMaskedStore(N->getChain(), dl, DataToStore, N->getBasePtr(),
2454624506 Mask, N->getMemoryVT(), N->getMemOperand(),
2454724507 N->isTruncatingStore(), N->isCompressingStore());
2459124551 Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index);
2459224552
2459324553 // Mask
24594 MVT MaskBitVT = MVT::getVectorVT(MVT::i1, NumElts);
24595 // At this point we have promoted mask operand
24596 assert(MaskVT.getScalarSizeInBits() >= 32 && "unexpected mask type");
24597 MVT ExtMaskVT = MVT::getVectorVT(MaskVT.getScalarType(), NumElts);
24598 Mask = ExtendToType(Mask, ExtMaskVT, DAG, true);
24599 Mask = DAG.getNode(ISD::TRUNCATE, dl, MaskBitVT, Mask);
24554 assert(MaskVT.getScalarType() == MVT::i1 && "unexpected mask type");
24555 MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
24556 Mask = ExtendToType(Mask, MaskVT, DAG, true);
2460024557
2460124558 // The pass-through value
2460224559 MVT NewVT = MVT::getVectorVT(VT.getScalarType(), NumElts);
2460424561
2460524562 SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index };
2460624563 SDValue NewGather = DAG.getTargetMemSDNode(
24607 DAG.getVTList(NewVT, MaskBitVT, MVT::Other), Ops, dl, N->getMemoryVT(),
24564 DAG.getVTList(NewVT, MaskVT, MVT::Other), Ops, dl, N->getMemoryVT(),
2460824565 N->getMemOperand());
2460924566 SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
2461024567 NewGather.getValue(0),
3044630403 // If this is a bitcast between a MVT::v4i1/v2i1 and an illegal integer
3044730404 // type, widen both sides to avoid a trip through memory.
3044830405 if ((VT == MVT::v4i1 || VT == MVT::v2i1) && SrcVT.isScalarInteger() &&
30449 Subtarget.hasVLX()) {
30406 Subtarget.hasAVX512()) {
3045030407 SDLoc dl(N);
3045130408 N0 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i8, N0);
3045230409 N0 = DAG.getBitcast(MVT::v8i1, N0);
3045730414 // If this is a bitcast between a MVT::v4i1/v2i1 and an illegal integer
3045830415 // type, widen both sides to avoid a trip through memory.
3045930416 if ((SrcVT == MVT::v4i1 || SrcVT == MVT::v2i1) && VT.isScalarInteger() &&
30460 Subtarget.hasVLX()) {
30417 Subtarget.hasAVX512()) {
3046130418 SDLoc dl(N);
3046230419 unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
3046330420 SmallVector Ops(NumConcats, DAG.getUNDEF(SrcVT));
29612961 defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, SSE_PSHUF>;
29622962 defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, SSE_PSHUF>;
29632963
2964 multiclass axv512_icmp_packed_no_vlx_lowering {
2965 def : Pat<(v8i1 (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
2966 (COPY_TO_REGCLASS (!cast(InstStr##Zrr)
2967 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
2968 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), VK8)>;
2969
2970 def : Pat<(v8i1 (and VK8:$mask,
2971 (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2)))),
2964 multiclass axv512_icmp_packed_no_vlx_lowering,
2965 X86VectorVTInfo Narrow,
2966 X86VectorVTInfo Wide> {
2967 def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1),
2968 (Narrow.VT Narrow.RC:$src2))),
2969 (COPY_TO_REGCLASS
2970 (!cast(InstStr##Zrr)
2971 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
2972 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))),
2973 Narrow.KRC)>;
2974
2975 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
2976 (OpNode (Narrow.VT Narrow.RC:$src1),
2977 (Narrow.VT Narrow.RC:$src2)))),
29722978 (COPY_TO_REGCLASS
29732979 (!cast(InstStr##Zrrk)
2974 (COPY_TO_REGCLASS VK8:$mask, VK16),
2975 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
2976 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))),
2977 VK8)>;
2980 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
2981 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
2982 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))),
2983 Narrow.KRC)>;
29782984 }
29792985
29802986 multiclass axv512_icmp_packed_cc_no_vlx_lowering
2981 AVX512VLVectorVTInfo _> {
2982 def : Pat<(v8i1 (OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc)),
2983 (COPY_TO_REGCLASS (!cast(InstStr##Zrri)
2984 (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
2985 (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
2986 imm:$cc), VK8)>;
2987
2988 def : Pat<(v8i1 (and VK8:$mask, (OpNode (_.info256.VT VR256X:$src1),
2989 (_.info256.VT VR256X:$src2), imm:$cc))),
2990 (COPY_TO_REGCLASS (!cast(InstStr##Zrrik)
2991 (COPY_TO_REGCLASS VK8:$mask, VK16),
2992 (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
2993 (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
2994 imm:$cc), VK8)>;
2987 X86VectorVTInfo Narrow,
2988 X86VectorVTInfo Wide> {
2989 def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1),
2990 (Narrow.VT Narrow.RC:$src2), imm:$cc)),
2991 (COPY_TO_REGCLASS
2992 (!cast(InstStr##Zrri)
2993 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
2994 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
2995 imm:$cc), Narrow.KRC)>;
2996
2997 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
2998 (OpNode (Narrow.VT Narrow.RC:$src1),
2999 (Narrow.VT Narrow.RC:$src2), imm:$cc))),
3000 (COPY_TO_REGCLASS (!cast(InstStr##Zrrik)
3001 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3002 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3003 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3004 imm:$cc), Narrow.KRC)>;
29953005 }
29963006
29973007 let Predicates = [HasAVX512, NoVLX] in {
2998 defm : axv512_icmp_packed_no_vlx_lowering;
2999 defm : axv512_icmp_packed_no_vlx_lowering;
3000
3001 defm : axv512_icmp_packed_cc_no_vlx_lowering;
3002 defm : axv512_icmp_packed_cc_no_vlx_lowering;
3003 defm : axv512_icmp_packed_cc_no_vlx_loweringi32_info>;
3008 defm : axv512_icmp_packed_no_vlx_loweringi32_info>;
3009 defm : axv512_icmp_packed_no_vlx_lowering;
3010
3011 defm : axv512_icmp_packed_no_vlx_lowering;
3012 defm : axv512_icmp_packed_no_vlx_lowering;
3013
3014 defm : axv512_icmp_packed_no_vlx_lowering;
3015 defm : axv512_icmp_packed_no_vlx_lowering;
3016
3017 defm : axv512_icmp_packed_no_vlx_lowering;
3018 defm : axv512_icmp_packed_no_vlx_lowering;
3019
3020 defm : axv512_icmp_packed_cc_no_vlx_lowering;
3021 defm : axv512_icmp_packed_cc_no_vlx_lowering;
3022 defm : axv512_icmp_packed_cc_no_vlx_lowering;
3023
3024 defm : axv512_icmp_packed_cc_no_vlx_lowering;
3025 defm : axv512_icmp_packed_cc_no_vlx_lowering;
3026 defm : axv512_icmp_packed_cc_no_vlx_lowering;
3027
3028 defm : axv512_icmp_packed_cc_no_vlx_lowering;
3029 defm : axv512_icmp_packed_cc_no_vlx_lowering;
3030 defm : axv512_icmp_packed_cc_no_vlx_lowering;
3031
3032 defm : axv512_icmp_packed_cc_no_vlx_lowering;
3033 defm : axv512_icmp_packed_cc_no_vlx_lowering;
3034 defm : axv512_icmp_packed_cc_no_vlx_lowering;
30043035 }
30053036
30063037 // Mask setting all 0s or 1s
33753406 // Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
33763407 // available. Use a 512-bit operation and extract.
33773408 let Predicates = [HasAVX512, NoVLX] in {
3409 defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>;
3410 defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>;
33783411 defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
33793412 defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;
3413
3414 defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>;
3415 defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>;
3416 defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>;
3417 defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
33803418 }
33813419
33823420 let Predicates = [HasAVX512] in {
494494
495495 // If the bits are not zero we have to fall back to explicitly zeroing by
496496 // using shifts.
497 let Predicates = [HasAVX512] in {
498 def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
499 (v2i1 VK2:$mask), (iPTR 0))),
500 (KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK2:$mask, VK16),
501 (i8 14)), (i8 14))>;
502
503 def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
504 (v4i1 VK4:$mask), (iPTR 0))),
505 (KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK4:$mask, VK16),
506 (i8 12)), (i8 12))>;
507 }
508
497509 let Predicates = [HasAVX512, NoDQI] in {
498510 def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
499511 (v8i1 VK8:$mask), (iPTR 0))),
505517 def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
506518 (v8i1 VK8:$mask), (iPTR 0))),
507519 (COPY_TO_REGCLASS (KMOVBkk VK8:$mask), VK16)>;
508 }
509
510 let Predicates = [HasVLX, HasDQI] in {
520
511521 def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV),
512522 (v2i1 VK2:$mask), (iPTR 0))),
513523 (KSHIFTRBri (KSHIFTLBri (COPY_TO_REGCLASS VK2:$mask, VK8),
518528 (i8 4)), (i8 4))>;
519529 }
520530
521 let Predicates = [HasVLX] in {
522 def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
523 (v2i1 VK2:$mask), (iPTR 0))),
524 (KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK2:$mask, VK16),
525 (i8 14)), (i8 14))>;
526 def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
527 (v4i1 VK4:$mask), (iPTR 0))),
528 (KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK4:$mask, VK16),
529 (i8 12)), (i8 12))>;
530 }
531
532531 let Predicates = [HasBWI] in {
533532 def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
534533 (v16i1 VK16:$mask), (iPTR 0))),
77 define i32 @add(i32 %arg) {
88 ; CHECK-LABEL: for function 'add'
99 ; -- Same size registeres --
10 ;CHECK: cost of 1 {{.*}} zext
10 ;CHECK-AVX512: cost of 12 {{.*}} zext
11 ;CHECK-AVX2: cost of 1 {{.*}} zext
12 ;CHECK-AVX: cost of 1 {{.*}} zext
1113 %A = zext <4 x i1> undef to <4 x i32>
12 ;CHECK: cost of 2 {{.*}} sext
14 ;CHECK-AVX512: cost of 12 {{.*}} sext
15 ;CHECK-AVX2: cost of 2 {{.*}} sext
16 ;CHECK-AVX: cost of 2 {{.*}} sext
1317 %B = sext <4 x i1> undef to <4 x i32>
14 ;CHECK: cost of 0 {{.*}} trunc
18 ;CHECK-AVX512: cost of 0 {{.*}} trunc
19 ;CHECK-AVX2: cost of 0 {{.*}} trunc
20 ;CHECK-AVX: cost of 0 {{.*}} trunc
1521 %C = trunc <4 x i32> undef to <4 x i1>
1622
1723 ; -- Different size registers --
701701 ; NOVL-LABEL: f64to4f32_mask:
702702 ; NOVL: # %bb.0:
703703 ; NOVL-NEXT: vpslld $31, %xmm1, %xmm1
704 ; NOVL-NEXT: vpsrad $31, %xmm1, %xmm1
704 ; NOVL-NEXT: vptestmd %zmm1, %zmm1, %k1
705705 ; NOVL-NEXT: vcvtpd2ps %ymm0, %xmm0
706 ; NOVL-NEXT: vpand %xmm0, %xmm1, %xmm0
706 ; NOVL-NEXT: vmovaps %zmm0, %zmm0 {%k1} {z}
707 ; NOVL-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
707708 ; NOVL-NEXT: vzeroupper
708709 ; NOVL-NEXT: retq
709710 ;
742743 define <4 x double> @f32to4f64_mask(<4 x float> %b, <4 x double> %b1, <4 x double> %a1) {
743744 ; NOVL-LABEL: f32to4f64_mask:
744745 ; NOVL: # %bb.0:
746 ; NOVL-NEXT: # kill: def %ymm2 killed %ymm2 def %zmm2
747 ; NOVL-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
745748 ; NOVL-NEXT: vcvtps2pd %xmm0, %ymm0
746 ; NOVL-NEXT: vcmpltpd %ymm2, %ymm1, %ymm1
747 ; NOVL-NEXT: vandpd %ymm0, %ymm1, %ymm0
749 ; NOVL-NEXT: vcmpltpd %zmm2, %zmm1, %k1
750 ; NOVL-NEXT: vmovapd %zmm0, %zmm0 {%k1} {z}
751 ; NOVL-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0
748752 ; NOVL-NEXT: retq
749753 ;
750754 ; VL-LABEL: f32to4f64_mask:
15901594 }
15911595
15921596 define <4 x float> @sbto4f32(<4 x float> %a) {
1593 ; NOVL-LABEL: sbto4f32:
1594 ; NOVL: # %bb.0:
1595 ; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
1596 ; NOVL-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
1597 ; NOVL-NEXT: vcvtdq2ps %xmm0, %xmm0
1598 ; NOVL-NEXT: retq
1597 ; NOVLDQ-LABEL: sbto4f32:
1598 ; NOVLDQ: # %bb.0:
1599 ; NOVLDQ-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
1600 ; NOVLDQ-NEXT: vxorps %xmm1, %xmm1, %xmm1
1601 ; NOVLDQ-NEXT: vcmpltps %zmm0, %zmm1, %k1
1602 ; NOVLDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1603 ; NOVLDQ-NEXT: vcvtdq2ps %xmm0, %xmm0
1604 ; NOVLDQ-NEXT: vzeroupper
1605 ; NOVLDQ-NEXT: retq
15991606 ;
16001607 ; VLDQ-LABEL: sbto4f32:
16011608 ; VLDQ: # %bb.0:
16131620 ; VLNODQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
16141621 ; VLNODQ-NEXT: vcvtdq2ps %xmm0, %xmm0
16151622 ; VLNODQ-NEXT: retq
1623 ;
1624 ; AVX512DQ-LABEL: sbto4f32:
1625 ; AVX512DQ: # %bb.0:
1626 ; AVX512DQ-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
1627 ; AVX512DQ-NEXT: vxorps %xmm1, %xmm1, %xmm1
1628 ; AVX512DQ-NEXT: vcmpltps %zmm0, %zmm1, %k0
1629 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
1630 ; AVX512DQ-NEXT: vcvtdq2ps %xmm0, %xmm0
1631 ; AVX512DQ-NEXT: vzeroupper
1632 ; AVX512DQ-NEXT: retq
16161633 %cmpres = fcmp ogt <4 x float> %a, zeroinitializer
16171634 %1 = sitofp <4 x i1> %cmpres to <4 x float>
16181635 ret <4 x float> %1
16191636 }
16201637
16211638 define <4 x double> @sbto4f64(<4 x double> %a) {
1622 ; NOVL-LABEL: sbto4f64:
1623 ; NOVL: # %bb.0:
1624 ; NOVL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
1625 ; NOVL-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
1626 ; NOVL-NEXT: vpmovqd %zmm0, %ymm0
1627 ; NOVL-NEXT: vcvtdq2pd %xmm0, %ymm0
1628 ; NOVL-NEXT: retq
1639 ; NOVLDQ-LABEL: sbto4f64:
1640 ; NOVLDQ: # %bb.0:
1641 ; NOVLDQ-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
1642 ; NOVLDQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1
1643 ; NOVLDQ-NEXT: vcmpltpd %zmm0, %zmm1, %k1
1644 ; NOVLDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1645 ; NOVLDQ-NEXT: vcvtdq2pd %xmm0, %ymm0
1646 ; NOVLDQ-NEXT: retq
16291647 ;
16301648 ; VLDQ-LABEL: sbto4f64:
16311649 ; VLDQ: # %bb.0:
16431661 ; VLNODQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
16441662 ; VLNODQ-NEXT: vcvtdq2pd %xmm0, %ymm0
16451663 ; VLNODQ-NEXT: retq
1664 ;
1665 ; AVX512DQ-LABEL: sbto4f64:
1666 ; AVX512DQ: # %bb.0:
1667 ; AVX512DQ-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
1668 ; AVX512DQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1
1669 ; AVX512DQ-NEXT: vcmpltpd %zmm0, %zmm1, %k0
1670 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
1671 ; AVX512DQ-NEXT: vcvtdq2pd %xmm0, %ymm0
1672 ; AVX512DQ-NEXT: retq
16461673 %cmpres = fcmp ogt <4 x double> %a, zeroinitializer
16471674 %1 = sitofp <4 x i1> %cmpres to <4 x double>
16481675 ret <4 x double> %1
16491676 }
16501677
16511678 define <2 x float> @sbto2f32(<2 x float> %a) {
1652 ; NOVL-LABEL: sbto2f32:
1653 ; NOVL: # %bb.0:
1654 ; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
1655 ; NOVL-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
1656 ; NOVL-NEXT: vcvtdq2ps %xmm0, %xmm0
1657 ; NOVL-NEXT: retq
1679 ; NOVLDQ-LABEL: sbto2f32:
1680 ; NOVLDQ: # %bb.0:
1681 ; NOVLDQ-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
1682 ; NOVLDQ-NEXT: vxorps %xmm1, %xmm1, %xmm1
1683 ; NOVLDQ-NEXT: vcmpltps %zmm0, %zmm1, %k1
1684 ; NOVLDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1685 ; NOVLDQ-NEXT: vcvtdq2ps %xmm0, %xmm0
1686 ; NOVLDQ-NEXT: vzeroupper
1687 ; NOVLDQ-NEXT: retq
16581688 ;
16591689 ; VLDQ-LABEL: sbto2f32:
16601690 ; VLDQ: # %bb.0:
16721702 ; VLNODQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
16731703 ; VLNODQ-NEXT: vcvtdq2ps %xmm0, %xmm0
16741704 ; VLNODQ-NEXT: retq
1705 ;
1706 ; AVX512DQ-LABEL: sbto2f32:
1707 ; AVX512DQ: # %bb.0:
1708 ; AVX512DQ-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
1709 ; AVX512DQ-NEXT: vxorps %xmm1, %xmm1, %xmm1
1710 ; AVX512DQ-NEXT: vcmpltps %zmm0, %zmm1, %k0
1711 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
1712 ; AVX512DQ-NEXT: vcvtdq2ps %xmm0, %xmm0
1713 ; AVX512DQ-NEXT: vzeroupper
1714 ; AVX512DQ-NEXT: retq
16751715 %cmpres = fcmp ogt <2 x float> %a, zeroinitializer
16761716 %1 = sitofp <2 x i1> %cmpres to <2 x float>
16771717 ret <2 x float> %1
16781718 }
16791719
16801720 define <2 x double> @sbto2f64(<2 x double> %a) {
1681 ; NOVL-LABEL: sbto2f64:
1682 ; NOVL: # %bb.0:
1683 ; NOVL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
1684 ; NOVL-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
1685 ; NOVL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
1686 ; NOVL-NEXT: vcvtdq2pd %xmm0, %xmm0
1687 ; NOVL-NEXT: retq
1721 ; NOVLDQ-LABEL: sbto2f64:
1722 ; NOVLDQ: # %bb.0:
1723 ; NOVLDQ-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
1724 ; NOVLDQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1
1725 ; NOVLDQ-NEXT: vcmpltpd %zmm0, %zmm1, %k1
1726 ; NOVLDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1727 ; NOVLDQ-NEXT: vcvtdq2pd %xmm0, %xmm0
1728 ; NOVLDQ-NEXT: vzeroupper
1729 ; NOVLDQ-NEXT: retq
16881730 ;
16891731 ; VLDQ-LABEL: sbto2f64:
16901732 ; VLDQ: # %bb.0:
17021744 ; VLNODQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
17031745 ; VLNODQ-NEXT: vcvtdq2pd %xmm0, %xmm0
17041746 ; VLNODQ-NEXT: retq
1747 ;
1748 ; AVX512DQ-LABEL: sbto2f64:
1749 ; AVX512DQ: # %bb.0:
1750 ; AVX512DQ-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
1751 ; AVX512DQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1
1752 ; AVX512DQ-NEXT: vcmpltpd %zmm0, %zmm1, %k0
1753 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
1754 ; AVX512DQ-NEXT: vcvtdq2pd %xmm0, %xmm0
1755 ; AVX512DQ-NEXT: vzeroupper
1756 ; AVX512DQ-NEXT: retq
17051757 %cmpres = fcmp ogt <2 x double> %a, zeroinitializer
17061758 %1 = sitofp <2 x i1> %cmpres to <2 x double>
17071759 ret <2 x double> %1
19241976 define <4 x float> @ubto4f32(<4 x i32> %a) {
19251977 ; NOVL-LABEL: ubto4f32:
19261978 ; NOVL: # %bb.0:
1979 ; NOVL-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
19271980 ; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
1928 ; NOVL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
1929 ; NOVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
1930 ; NOVL-NEXT: vpand %xmm1, %xmm0, %xmm0
1981 ; NOVL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
1982 ; NOVL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
1983 ; NOVL-NEXT: vcvtdq2ps %xmm0, %xmm0
1984 ; NOVL-NEXT: vzeroupper
19311985 ; NOVL-NEXT: retq
19321986 ;
19331987 ; VL-LABEL: ubto4f32:
19451999 define <4 x double> @ubto4f64(<4 x i32> %a) {
19462000 ; NOVL-LABEL: ubto4f64:
19472001 ; NOVL: # %bb.0:
2002 ; NOVL-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
19482003 ; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
1949 ; NOVL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
1950 ; NOVL-NEXT: vpsrld $31, %xmm0, %xmm0
2004 ; NOVL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
2005 ; NOVL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
19512006 ; NOVL-NEXT: vcvtdq2pd %xmm0, %ymm0
19522007 ; NOVL-NEXT: retq
19532008 ;
19682023 ; NOVL: # %bb.0:
19692024 ; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
19702025 ; NOVL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
1971 ; NOVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
1972 ; NOVL-NEXT: vpextrb $8, %xmm0, %eax
1973 ; NOVL-NEXT: andl $1, %eax
1974 ; NOVL-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm1
1975 ; NOVL-NEXT: vpextrb $0, %xmm0, %eax
1976 ; NOVL-NEXT: andl $1, %eax
1977 ; NOVL-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0
1978 ; NOVL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
2026 ; NOVL-NEXT: vpcmpltuq %zmm1, %zmm0, %k1
2027 ; NOVL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
2028 ; NOVL-NEXT: vcvtdq2ps %xmm0, %xmm0
2029 ; NOVL-NEXT: vzeroupper
19792030 ; NOVL-NEXT: retq
19802031 ;
19812032 ; VL-LABEL: ubto2f32:
19962047 ; NOVL: # %bb.0:
19972048 ; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
19982049 ; NOVL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
1999 ; NOVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
2000 ; NOVL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
2001 ; NOVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
2002 ; NOVL-NEXT: vpand %xmm1, %xmm0, %xmm0
2050 ; NOVL-NEXT: vpcmpltuq %zmm1, %zmm0, %k1
2051 ; NOVL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
20032052 ; NOVL-NEXT: vcvtudq2pd %ymm0, %zmm0
20042053 ; NOVL-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
20052054 ; NOVL-NEXT: vzeroupper
20192068 }
20202069
20212070 define <2 x i64> @test_2f64toub(<2 x double> %a, <2 x i64> %passthru) {
2022 ; NOVLDQ-LABEL: test_2f64toub:
2023 ; NOVLDQ: # %bb.0:
2024 ; NOVLDQ-NEXT: vcvttsd2usi %xmm0, %rax
2025 ; NOVLDQ-NEXT: vmovq %rax, %xmm2
2026 ; NOVLDQ-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
2027 ; NOVLDQ-NEXT: vcvttsd2usi %xmm0, %rax
2028 ; NOVLDQ-NEXT: vmovq %rax, %xmm0
2029 ; NOVLDQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
2030 ; NOVLDQ-NEXT: vpsllq $63, %xmm0, %xmm0
2031 ; NOVLDQ-NEXT: vpsraq $63, %zmm0, %zmm0
2032 ; NOVLDQ-NEXT: vpand %xmm1, %xmm0, %xmm0
2033 ; NOVLDQ-NEXT: vzeroupper
2034 ; NOVLDQ-NEXT: retq
2071 ; KNL-LABEL: test_2f64toub:
2072 ; KNL: # %bb.0:
2073 ; KNL-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
2074 ; KNL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
2075 ; KNL-NEXT: vcvttsd2si %xmm2, %eax
2076 ; KNL-NEXT: kmovw %eax, %k0
2077 ; KNL-NEXT: vcvttsd2si %xmm0, %eax
2078 ; KNL-NEXT: andl $1, %eax
2079 ; KNL-NEXT: kmovw %eax, %k1
2080 ; KNL-NEXT: kshiftrw $1, %k0, %k2
2081 ; KNL-NEXT: kshiftlw $1, %k2, %k2
2082 ; KNL-NEXT: korw %k1, %k2, %k1
2083 ; KNL-NEXT: kshiftrw $1, %k1, %k2
2084 ; KNL-NEXT: kxorw %k0, %k2, %k0
2085 ; KNL-NEXT: kshiftlw $15, %k0, %k0
2086 ; KNL-NEXT: kshiftrw $14, %k0, %k0
2087 ; KNL-NEXT: kxorw %k1, %k0, %k1
2088 ; KNL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2089 ; KNL-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
2090 ; KNL-NEXT: vzeroupper
2091 ; KNL-NEXT: retq
20352092 ;
20362093 ; VL-LABEL: test_2f64toub:
20372094 ; VL: # %bb.0:
20432100 ;
20442101 ; AVX512DQ-LABEL: test_2f64toub:
20452102 ; AVX512DQ: # %bb.0:
2046 ; AVX512DQ-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
2047 ; AVX512DQ-NEXT: vcvttpd2uqq %zmm0, %zmm0
2048 ; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0
2049 ; AVX512DQ-NEXT: vpsraq $63, %zmm0, %zmm0
2050 ; AVX512DQ-NEXT: vpand %xmm1, %xmm0, %xmm0
2103 ; AVX512DQ-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
2104 ; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
2105 ; AVX512DQ-NEXT: vcvttsd2si %xmm2, %eax
2106 ; AVX512DQ-NEXT: kmovw %eax, %k0
2107 ; AVX512DQ-NEXT: vcvttsd2si %xmm0, %eax
2108 ; AVX512DQ-NEXT: andl $1, %eax
2109 ; AVX512DQ-NEXT: kmovw %eax, %k1
2110 ; AVX512DQ-NEXT: kshiftrw $1, %k0, %k2
2111 ; AVX512DQ-NEXT: kshiftlw $1, %k2, %k2
2112 ; AVX512DQ-NEXT: korw %k1, %k2, %k1
2113 ; AVX512DQ-NEXT: kshiftrw $1, %k1, %k2
2114 ; AVX512DQ-NEXT: kxorw %k0, %k2, %k0
2115 ; AVX512DQ-NEXT: kshiftlw $15, %k0, %k0
2116 ; AVX512DQ-NEXT: kshiftrw $14, %k0, %k0
2117 ; AVX512DQ-NEXT: kxorw %k1, %k0, %k1
2118 ; AVX512DQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2119 ; AVX512DQ-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
20512120 ; AVX512DQ-NEXT: vzeroupper
20522121 ; AVX512DQ-NEXT: retq
2122 ;
2123 ; AVX512BW-LABEL: test_2f64toub:
2124 ; AVX512BW: # %bb.0:
2125 ; AVX512BW-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
2126 ; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
2127 ; AVX512BW-NEXT: vcvttsd2si %xmm2, %eax
2128 ; AVX512BW-NEXT: kmovd %eax, %k0
2129 ; AVX512BW-NEXT: vcvttsd2si %xmm0, %eax
2130 ; AVX512BW-NEXT: andl $1, %eax
2131 ; AVX512BW-NEXT: kmovw %eax, %k1
2132 ; AVX512BW-NEXT: kshiftrw $1, %k0, %k2
2133 ; AVX512BW-NEXT: kshiftlw $1, %k2, %k2
2134 ; AVX512BW-NEXT: korw %k1, %k2, %k1
2135 ; AVX512BW-NEXT: kshiftrw $1, %k1, %k2
2136 ; AVX512BW-NEXT: kxorw %k0, %k2, %k0
2137 ; AVX512BW-NEXT: kshiftlw $15, %k0, %k0
2138 ; AVX512BW-NEXT: kshiftrw $14, %k0, %k0
2139 ; AVX512BW-NEXT: kxorw %k1, %k0, %k1
2140 ; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2141 ; AVX512BW-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
2142 ; AVX512BW-NEXT: vzeroupper
2143 ; AVX512BW-NEXT: retq
20532144 %mask = fptoui <2 x double> %a to <2 x i1>
20542145 %select = select <2 x i1> %mask, <2 x i64> %passthru, <2 x i64> zeroinitializer
20552146 ret <2 x i64> %select
20582149 define <4 x i64> @test_4f64toub(<4 x double> %a, <4 x i64> %passthru) {
20592150 ; NOVL-LABEL: test_4f64toub:
20602151 ; NOVL: # %bb.0:
2061 ; NOVL-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
2062 ; NOVL-NEXT: vcvttpd2udq %zmm0, %ymm0
2152 ; NOVL-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
2153 ; NOVL-NEXT: vcvttpd2dq %ymm0, %xmm0
20632154 ; NOVL-NEXT: vpslld $31, %xmm0, %xmm0
2064 ; NOVL-NEXT: vpsrad $31, %xmm0, %xmm0
2065 ; NOVL-NEXT: vpmovsxdq %xmm0, %ymm0
2066 ; NOVL-NEXT: vpand %ymm1, %ymm0, %ymm0
2155 ; NOVL-NEXT: vptestmd %zmm0, %zmm0, %k1
2156 ; NOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2157 ; NOVL-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0
20672158 ; NOVL-NEXT: retq
20682159 ;
20692160 ; VL-LABEL: test_4f64toub:
21002191 }
21012192
21022193 define <2 x i64> @test_2f32toub(<2 x float> %a, <2 x i64> %passthru) {
2103 ; NOVLDQ-LABEL: test_2f32toub:
2104 ; NOVLDQ: # %bb.0:
2105 ; NOVLDQ-NEXT: vcvttss2usi %xmm0, %rax
2106 ; NOVLDQ-NEXT: vmovq %rax, %xmm2
2107 ; NOVLDQ-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
2108 ; NOVLDQ-NEXT: vcvttss2usi %xmm0, %rax
2109 ; NOVLDQ-NEXT: vmovq %rax, %xmm0
2110 ; NOVLDQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
2111 ; NOVLDQ-NEXT: vpsllq $63, %xmm0, %xmm0
2112 ; NOVLDQ-NEXT: vpsraq $63, %zmm0, %zmm0
2113 ; NOVLDQ-NEXT: vpand %xmm1, %xmm0, %xmm0
2114 ; NOVLDQ-NEXT: vzeroupper
2115 ; NOVLDQ-NEXT: retq
2194 ; NOVL-LABEL: test_2f32toub:
2195 ; NOVL: # %bb.0:
2196 ; NOVL-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
2197 ; NOVL-NEXT: vcvttps2dq %xmm0, %xmm0
2198 ; NOVL-NEXT: vpslld $31, %xmm0, %xmm0
2199 ; NOVL-NEXT: vptestmd %zmm0, %zmm0, %k1
2200 ; NOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2201 ; NOVL-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
2202 ; NOVL-NEXT: vzeroupper
2203 ; NOVL-NEXT: retq
21162204 ;
21172205 ; VL-LABEL: test_2f32toub:
21182206 ; VL: # %bb.0:
21212209 ; VL-NEXT: vptestmd %xmm0, %xmm0, %k1
21222210 ; VL-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} {z}
21232211 ; VL-NEXT: retq
2124 ;
2125 ; AVX512DQ-LABEL: test_2f32toub:
2126 ; AVX512DQ: # %bb.0:
2127 ; AVX512DQ-NEXT: # kill: def %xmm0 killed %xmm0 def %ymm0
2128 ; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0
2129 ; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0
2130 ; AVX512DQ-NEXT: vpsraq $63, %zmm0, %zmm0
2131 ; AVX512DQ-NEXT: vpand %xmm1, %xmm0, %xmm0
2132 ; AVX512DQ-NEXT: vzeroupper
2133 ; AVX512DQ-NEXT: retq
21342212 %mask = fptoui <2 x float> %a to <2 x i1>
21352213 %select = select <2 x i1> %mask, <2 x i64> %passthru, <2 x i64> zeroinitializer
21362214 ret <2 x i64> %select
21392217 define <4 x i64> @test_4f32toub(<4 x float> %a, <4 x i64> %passthru) {
21402218 ; NOVL-LABEL: test_4f32toub:
21412219 ; NOVL: # %bb.0:
2142 ; NOVL-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
2143 ; NOVL-NEXT: vcvttps2udq %zmm0, %zmm0
2220 ; NOVL-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
2221 ; NOVL-NEXT: vcvttps2dq %xmm0, %xmm0
21442222 ; NOVL-NEXT: vpslld $31, %xmm0, %xmm0
2145 ; NOVL-NEXT: vpsrad $31, %xmm0, %xmm0
2146 ; NOVL-NEXT: vpmovsxdq %xmm0, %ymm0
2147 ; NOVL-NEXT: vpand %ymm1, %ymm0, %ymm0
2223 ; NOVL-NEXT: vptestmd %zmm0, %zmm0, %k1
2224 ; NOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2225 ; NOVL-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0
21482226 ; NOVL-NEXT: retq
21492227 ;
21502228 ; VL-LABEL: test_4f32toub:
21942272 }
21952273
21962274 define <2 x i64> @test_2f64tosb(<2 x double> %a, <2 x i64> %passthru) {
2197 ; NOVLDQ-LABEL: test_2f64tosb:
2198 ; NOVLDQ: # %bb.0:
2199 ; NOVLDQ-NEXT: vcvttsd2si %xmm0, %rax
2200 ; NOVLDQ-NEXT: vmovq %rax, %xmm2
2201 ; NOVLDQ-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
2202 ; NOVLDQ-NEXT: vcvttsd2si %xmm0, %rax
2203 ; NOVLDQ-NEXT: vmovq %rax, %xmm0
2204 ; NOVLDQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
2205 ; NOVLDQ-NEXT: vpand %xmm1, %xmm0, %xmm0
2206 ; NOVLDQ-NEXT: retq
2275 ; KNL-LABEL: test_2f64tosb:
2276 ; KNL: # %bb.0:
2277 ; KNL-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
2278 ; KNL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
2279 ; KNL-NEXT: vcvttsd2si %xmm2, %eax
2280 ; KNL-NEXT: kmovw %eax, %k0
2281 ; KNL-NEXT: vcvttsd2si %xmm0, %eax
2282 ; KNL-NEXT: andl $1, %eax
2283 ; KNL-NEXT: kmovw %eax, %k1
2284 ; KNL-NEXT: kshiftrw $1, %k0, %k2
2285 ; KNL-NEXT: kshiftlw $1, %k2, %k2
2286 ; KNL-NEXT: korw %k1, %k2, %k1
2287 ; KNL-NEXT: kshiftrw $1, %k1, %k2
2288 ; KNL-NEXT: kxorw %k0, %k2, %k0
2289 ; KNL-NEXT: kshiftlw $15, %k0, %k0
2290 ; KNL-NEXT: kshiftrw $14, %k0, %k0
2291 ; KNL-NEXT: kxorw %k1, %k0, %k1
2292 ; KNL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2293 ; KNL-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
2294 ; KNL-NEXT: vzeroupper
2295 ; KNL-NEXT: retq
22072296 ;
22082297 ; VL-LABEL: test_2f64tosb:
22092298 ; VL: # %bb.0:
22152304 ;
22162305 ; AVX512DQ-LABEL: test_2f64tosb:
22172306 ; AVX512DQ: # %bb.0:
2218 ; AVX512DQ-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
2219 ; AVX512DQ-NEXT: vcvttpd2qq %zmm0, %zmm0
2220 ; AVX512DQ-NEXT: vandps %xmm1, %xmm0, %xmm0
2307 ; AVX512DQ-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
2308 ; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
2309 ; AVX512DQ-NEXT: vcvttsd2si %xmm2, %eax
2310 ; AVX512DQ-NEXT: kmovw %eax, %k0
2311 ; AVX512DQ-NEXT: vcvttsd2si %xmm0, %eax
2312 ; AVX512DQ-NEXT: andl $1, %eax
2313 ; AVX512DQ-NEXT: kmovw %eax, %k1
2314 ; AVX512DQ-NEXT: kshiftrw $1, %k0, %k2
2315 ; AVX512DQ-NEXT: kshiftlw $1, %k2, %k2
2316 ; AVX512DQ-NEXT: korw %k1, %k2, %k1
2317 ; AVX512DQ-NEXT: kshiftrw $1, %k1, %k2
2318 ; AVX512DQ-NEXT: kxorw %k0, %k2, %k0
2319 ; AVX512DQ-NEXT: kshiftlw $15, %k0, %k0
2320 ; AVX512DQ-NEXT: kshiftrw $14, %k0, %k0
2321 ; AVX512DQ-NEXT: kxorw %k1, %k0, %k1
2322 ; AVX512DQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2323 ; AVX512DQ-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
22212324 ; AVX512DQ-NEXT: vzeroupper
22222325 ; AVX512DQ-NEXT: retq
2326 ;
2327 ; AVX512BW-LABEL: test_2f64tosb:
2328 ; AVX512BW: # %bb.0:
2329 ; AVX512BW-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
2330 ; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
2331 ; AVX512BW-NEXT: vcvttsd2si %xmm2, %eax
2332 ; AVX512BW-NEXT: kmovd %eax, %k0
2333 ; AVX512BW-NEXT: vcvttsd2si %xmm0, %eax
2334 ; AVX512BW-NEXT: andl $1, %eax
2335 ; AVX512BW-NEXT: kmovw %eax, %k1
2336 ; AVX512BW-NEXT: kshiftrw $1, %k0, %k2
2337 ; AVX512BW-NEXT: kshiftlw $1, %k2, %k2
2338 ; AVX512BW-NEXT: korw %k1, %k2, %k1
2339 ; AVX512BW-NEXT: kshiftrw $1, %k1, %k2
2340 ; AVX512BW-NEXT: kxorw %k0, %k2, %k0
2341 ; AVX512BW-NEXT: kshiftlw $15, %k0, %k0
2342 ; AVX512BW-NEXT: kshiftrw $14, %k0, %k0
2343 ; AVX512BW-NEXT: kxorw %k1, %k0, %k1
2344 ; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2345 ; AVX512BW-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
2346 ; AVX512BW-NEXT: vzeroupper
2347 ; AVX512BW-NEXT: retq
22232348 %mask = fptosi <2 x double> %a to <2 x i1>
22242349 %select = select <2 x i1> %mask, <2 x i64> %passthru, <2 x i64> zeroinitializer
22252350 ret <2 x i64> %select
22282353 define <4 x i64> @test_4f64tosb(<4 x double> %a, <4 x i64> %passthru) {
22292354 ; NOVL-LABEL: test_4f64tosb:
22302355 ; NOVL: # %bb.0:
2356 ; NOVL-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
22312357 ; NOVL-NEXT: vcvttpd2dq %ymm0, %xmm0
2232 ; NOVL-NEXT: vpmovsxdq %xmm0, %ymm0
2233 ; NOVL-NEXT: vpand %ymm1, %ymm0, %ymm0
2358 ; NOVL-NEXT: vptestmd %zmm0, %zmm0, %k1
2359 ; NOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2360 ; NOVL-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0
22342361 ; NOVL-NEXT: retq
22352362 ;
22362363 ; VL-LABEL: test_4f64tosb:
22642391 }
22652392
22662393 define <2 x i64> @test_2f32tosb(<2 x float> %a, <2 x i64> %passthru) {
2267 ; NOVLDQ-LABEL: test_2f32tosb:
2268 ; NOVLDQ: # %bb.0:
2269 ; NOVLDQ-NEXT: vcvttss2si %xmm0, %rax
2270 ; NOVLDQ-NEXT: vmovq %rax, %xmm2
2271 ; NOVLDQ-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
2272 ; NOVLDQ-NEXT: vcvttss2si %xmm0, %rax
2273 ; NOVLDQ-NEXT: vmovq %rax, %xmm0
2274 ; NOVLDQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
2275 ; NOVLDQ-NEXT: vpand %xmm1, %xmm0, %xmm0
2276 ; NOVLDQ-NEXT: retq
2394 ; NOVL-LABEL: test_2f32tosb:
2395 ; NOVL: # %bb.0:
2396 ; NOVL-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
2397 ; NOVL-NEXT: vcvttps2dq %xmm0, %xmm0
2398 ; NOVL-NEXT: vptestmd %zmm0, %zmm0, %k1
2399 ; NOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2400 ; NOVL-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
2401 ; NOVL-NEXT: vzeroupper
2402 ; NOVL-NEXT: retq
22772403 ;
22782404 ; VL-LABEL: test_2f32tosb:
22792405 ; VL: # %bb.0:
22812407 ; VL-NEXT: vptestmd %xmm0, %xmm0, %k1
22822408 ; VL-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} {z}
22832409 ; VL-NEXT: retq
2284 ;
2285 ; AVX512DQ-LABEL: test_2f32tosb:
2286 ; AVX512DQ: # %bb.0:
2287 ; AVX512DQ-NEXT: # kill: def %xmm0 killed %xmm0 def %ymm0
2288 ; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0
2289 ; AVX512DQ-NEXT: vandps %xmm1, %xmm0, %xmm0
2290 ; AVX512DQ-NEXT: vzeroupper
2291 ; AVX512DQ-NEXT: retq
22922410 %mask = fptosi <2 x float> %a to <2 x i1>
22932411 %select = select <2 x i1> %mask, <2 x i64> %passthru, <2 x i64> zeroinitializer
22942412 ret <2 x i64> %select
22972415 define <4 x i64> @test_4f32tosb(<4 x float> %a, <4 x i64> %passthru) {
22982416 ; NOVL-LABEL: test_4f32tosb:
22992417 ; NOVL: # %bb.0:
2418 ; NOVL-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
23002419 ; NOVL-NEXT: vcvttps2dq %xmm0, %xmm0
2301 ; NOVL-NEXT: vpmovsxdq %xmm0, %ymm0
2302 ; NOVL-NEXT: vpand %ymm1, %ymm0, %ymm0
2420 ; NOVL-NEXT: vptestmd %zmm0, %zmm0, %k1
2421 ; NOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2422 ; NOVL-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0
23032423 ; NOVL-NEXT: retq
23042424 ;
23052425 ; VL-LABEL: test_4f32tosb:
300300 ; KNL-LABEL: zext_4x8mem_to_4x32:
301301 ; KNL: # %bb.0:
302302 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
303 ; KNL-NEXT: vpsrad $31, %xmm0, %xmm0
304 ; KNL-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
305 ; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
303 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
304 ; KNL-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
305 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
306 ; KNL-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
306307 ; KNL-NEXT: retq
307308 ;
308309 ; SKX-LABEL: zext_4x8mem_to_4x32:
321322 ; KNL-LABEL: sext_4x8mem_to_4x32:
322323 ; KNL: # %bb.0:
323324 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
324 ; KNL-NEXT: vpsrad $31, %xmm0, %xmm0
325 ; KNL-NEXT: vpmovsxbd (%rdi), %xmm1
326 ; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
325 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
326 ; KNL-NEXT: vpmovsxbd (%rdi), %xmm0
327 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
328 ; KNL-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
327329 ; KNL-NEXT: retq
328330 ;
329331 ; SKX-LABEL: sext_4x8mem_to_4x32:
488490 ; KNL-LABEL: zext_2x8mem_to_2x64:
489491 ; KNL: # %bb.0:
490492 ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0
491 ; KNL-NEXT: vpsraq $63, %zmm0, %zmm0
492 ; KNL-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
493 ; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
493 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
494 ; KNL-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
495 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
496 ; KNL-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
494497 ; KNL-NEXT: retq
495498 ;
496499 ; SKX-LABEL: zext_2x8mem_to_2x64:
508511 ; KNL-LABEL: sext_2x8mem_to_2x64mask:
509512 ; KNL: # %bb.0:
510513 ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0
511 ; KNL-NEXT: vpsraq $63, %zmm0, %zmm0
512 ; KNL-NEXT: vpmovsxbq (%rdi), %xmm1
513 ; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
514 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
515 ; KNL-NEXT: vpmovsxbq (%rdi), %xmm0
516 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
517 ; KNL-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
514518 ; KNL-NEXT: retq
515519 ;
516520 ; SKX-LABEL: sext_2x8mem_to_2x64mask:
538542 ; KNL-LABEL: zext_4x8mem_to_4x64:
539543 ; KNL: # %bb.0:
540544 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
541 ; KNL-NEXT: vpsrad $31, %xmm0, %xmm0
542 ; KNL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
543 ; KNL-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
544 ; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0
545 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
546 ; KNL-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
547 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
548 ; KNL-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0
545549 ; KNL-NEXT: retq
546550 ;
547551 ; SKX-LABEL: zext_4x8mem_to_4x64:
560564 ; KNL-LABEL: sext_4x8mem_to_4x64mask:
561565 ; KNL: # %bb.0:
562566 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
563 ; KNL-NEXT: vpsrad $31, %xmm0, %xmm0
564 ; KNL-NEXT: vpmovsxdq %xmm0, %ymm0
565 ; KNL-NEXT: vpmovsxbq (%rdi), %ymm1
566 ; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0
567 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
568 ; KNL-NEXT: vpmovsxbq (%rdi), %ymm0
569 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
570 ; KNL-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0
567571 ; KNL-NEXT: retq
568572 ;
569573 ; SKX-LABEL: sext_4x8mem_to_4x64mask:
644648 ; KNL-LABEL: zext_4x16mem_to_4x32:
645649 ; KNL: # %bb.0:
646650 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
647 ; KNL-NEXT: vpsrad $31, %xmm0, %xmm0
648 ; KNL-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
649 ; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
651 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
652 ; KNL-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
653 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
654 ; KNL-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
650655 ; KNL-NEXT: retq
651656 ;
652657 ; SKX-LABEL: zext_4x16mem_to_4x32:
665670 ; KNL-LABEL: sext_4x16mem_to_4x32mask:
666671 ; KNL: # %bb.0:
667672 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
668 ; KNL-NEXT: vpsrad $31, %xmm0, %xmm0
669 ; KNL-NEXT: vpmovsxwd (%rdi), %xmm1
670 ; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
673 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
674 ; KNL-NEXT: vpmovsxwd (%rdi), %xmm0
675 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
676 ; KNL-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
671677 ; KNL-NEXT: retq
672678 ;
673679 ; SKX-LABEL: sext_4x16mem_to_4x32mask:
864870 ; KNL-LABEL: zext_2x16mem_to_2x64:
865871 ; KNL: # %bb.0:
866872 ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0
867 ; KNL-NEXT: vpsraq $63, %zmm0, %zmm0
868 ; KNL-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
869 ; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
873 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
874 ; KNL-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
875 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
876 ; KNL-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
870877 ; KNL-NEXT: retq
871878 ;
872879 ; SKX-LABEL: zext_2x16mem_to_2x64:
885892 ; KNL-LABEL: sext_2x16mem_to_2x64mask:
886893 ; KNL: # %bb.0:
887894 ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0
888 ; KNL-NEXT: vpsraq $63, %zmm0, %zmm0
889 ; KNL-NEXT: vpmovsxwq (%rdi), %xmm1
890 ; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
895 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
896 ; KNL-NEXT: vpmovsxwq (%rdi), %xmm0
897 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
898 ; KNL-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
891899 ; KNL-NEXT: retq
892900 ;
893901 ; SKX-LABEL: sext_2x16mem_to_2x64mask:
916924 ; KNL-LABEL: zext_4x16mem_to_4x64:
917925 ; KNL: # %bb.0:
918926 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
919 ; KNL-NEXT: vpsrad $31, %xmm0, %xmm0
920 ; KNL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
921 ; KNL-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
922 ; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0
927 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
928 ; KNL-NEXT: vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
929 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
930 ; KNL-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0
923931 ; KNL-NEXT: retq
924932 ;
925933 ; SKX-LABEL: zext_4x16mem_to_4x64:
938946 ; KNL-LABEL: sext_4x16mem_to_4x64mask:
939947 ; KNL: # %bb.0:
940948 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
941 ; KNL-NEXT: vpsrad $31, %xmm0, %xmm0
942 ; KNL-NEXT: vpmovsxdq %xmm0, %ymm0
943 ; KNL-NEXT: vpmovsxwq (%rdi), %ymm1
944 ; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0
949 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
950 ; KNL-NEXT: vpmovsxwq (%rdi), %ymm0
951 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
952 ; KNL-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0
945953 ; KNL-NEXT: retq
946954 ;
947955 ; SKX-LABEL: sext_4x16mem_to_4x64mask:
10511059 ; KNL-LABEL: zext_2x32mem_to_2x64:
10521060 ; KNL: # %bb.0:
10531061 ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0
1054 ; KNL-NEXT: vpsraq $63, %zmm0, %zmm0
1055 ; KNL-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
1056 ; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
1062 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
1063 ; KNL-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
1064 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1065 ; KNL-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
10571066 ; KNL-NEXT: retq
10581067 ;
10591068 ; SKX-LABEL: zext_2x32mem_to_2x64:
10721081 ; KNL-LABEL: sext_2x32mem_to_2x64mask:
10731082 ; KNL: # %bb.0:
10741083 ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0
1075 ; KNL-NEXT: vpsraq $63, %zmm0, %zmm0
1076 ; KNL-NEXT: vpmovsxdq (%rdi), %xmm1
1077 ; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
1084 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
1085 ; KNL-NEXT: vpmovsxdq (%rdi), %xmm0
1086 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1087 ; KNL-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
10781088 ; KNL-NEXT: retq
10791089 ;
10801090 ; SKX-LABEL: sext_2x32mem_to_2x64mask:
11031113 ; KNL-LABEL: zext_4x32mem_to_4x64:
11041114 ; KNL: # %bb.0:
11051115 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
1106 ; KNL-NEXT: vpsrad $31, %xmm0, %xmm0
1107 ; KNL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1108 ; KNL-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1109 ; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0
1116 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
1117 ; KNL-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1118 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1119 ; KNL-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0
11101120 ; KNL-NEXT: retq
11111121 ;
11121122 ; SKX-LABEL: zext_4x32mem_to_4x64:
11251135 ; KNL-LABEL: sext_4x32mem_to_4x64mask:
11261136 ; KNL: # %bb.0:
11271137 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
1128 ; KNL-NEXT: vpsrad $31, %xmm0, %xmm0
1129 ; KNL-NEXT: vpmovsxdq %xmm0, %ymm0
1130 ; KNL-NEXT: vpmovsxdq (%rdi), %ymm1
1131 ; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0
1138 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
1139 ; KNL-NEXT: vpmovsxdq (%rdi), %ymm0
1140 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1141 ; KNL-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0
11321142 ; KNL-NEXT: retq
11331143 ;
11341144 ; SKX-LABEL: sext_4x32mem_to_4x64mask:
11661176 ; KNL-LABEL: zext_4x32_to_4x64mask:
11671177 ; KNL: # %bb.0:
11681178 ; KNL-NEXT: vpslld $31, %xmm1, %xmm1
1169 ; KNL-NEXT: vpsrad $31, %xmm1, %xmm1
1170 ; KNL-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
1179 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1
11711180 ; KNL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1172 ; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0
1181 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1182 ; KNL-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0
11731183 ; KNL-NEXT: retq
11741184 ;
11751185 ; SKX-LABEL: zext_4x32_to_4x64mask:
843843 define i8 @test_iinsertelement_v4i1(i32 %a, i32 %b, <4 x i32> %x , <4 x i32> %y) {
844844 ; KNL-LABEL: test_iinsertelement_v4i1:
845845 ; KNL: ## %bb.0:
846 ; KNL-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1
847 ; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0
846848 ; KNL-NEXT: cmpl %esi, %edi
847849 ; KNL-NEXT: setb %al
848 ; KNL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
849 ; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0
850 ; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1
851 ; KNL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
852 ; KNL-NEXT: vpextrb $4, %xmm0, %ecx
853 ; KNL-NEXT: kmovw %ecx, %k0
854 ; KNL-NEXT: vpextrb $0, %xmm0, %ecx
855 ; KNL-NEXT: andl $1, %ecx
856 ; KNL-NEXT: kmovw %ecx, %k1
857 ; KNL-NEXT: kshiftrw $1, %k0, %k2
858 ; KNL-NEXT: kshiftlw $1, %k2, %k2
859 ; KNL-NEXT: korw %k1, %k2, %k1
860 ; KNL-NEXT: kshiftrw $1, %k1, %k2
861 ; KNL-NEXT: kxorw %k0, %k2, %k0
862 ; KNL-NEXT: kshiftlw $15, %k0, %k0
863 ; KNL-NEXT: kshiftrw $14, %k0, %k0
864 ; KNL-NEXT: kxorw %k1, %k0, %k0
850 ; KNL-NEXT: vpcmpltud %zmm1, %zmm0, %k0
865851 ; KNL-NEXT: kshiftrw $2, %k0, %k1
866852 ; KNL-NEXT: kmovw %eax, %k2
867853 ; KNL-NEXT: kxorw %k2, %k1, %k1
868854 ; KNL-NEXT: kshiftlw $15, %k1, %k1
869855 ; KNL-NEXT: kshiftrw $13, %k1, %k1
870856 ; KNL-NEXT: kxorw %k0, %k1, %k0
871 ; KNL-NEXT: kshiftrw $3, %k0, %k1
872 ; KNL-NEXT: vpextrb $12, %xmm0, %eax
873 ; KNL-NEXT: kmovw %eax, %k2
874 ; KNL-NEXT: kxorw %k2, %k1, %k1
875 ; KNL-NEXT: kshiftlw $15, %k1, %k1
876 ; KNL-NEXT: kshiftrw $12, %k1, %k1
877 ; KNL-NEXT: kxorw %k0, %k1, %k0
878857 ; KNL-NEXT: kmovw %k0, %eax
879858 ; KNL-NEXT: ## kill: def %al killed %al killed %eax
859 ; KNL-NEXT: vzeroupper
880860 ; KNL-NEXT: retq
881861 ;
882862 ; SKX-LABEL: test_iinsertelement_v4i1:
904884 define i8 @test_iinsertelement_v2i1(i32 %a, i32 %b, <2 x i64> %x , <2 x i64> %y) {
905885 ; KNL-LABEL: test_iinsertelement_v2i1:
906886 ; KNL: ## %bb.0:
887 ; KNL-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1
888 ; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0
907889 ; KNL-NEXT: cmpl %esi, %edi
908890 ; KNL-NEXT: setb %al
909 ; KNL-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
910 ; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0
911 ; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1
912 ; KNL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
913 ; KNL-NEXT: vpextrb $0, %xmm0, %ecx
914 ; KNL-NEXT: andl $1, %ecx
915 ; KNL-NEXT: kmovw %ecx, %k0
916 ; KNL-NEXT: kshiftrw $1, %k0, %k1
917 ; KNL-NEXT: kshiftlw $1, %k1, %k1
918 ; KNL-NEXT: korw %k0, %k1, %k0
891 ; KNL-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
919892 ; KNL-NEXT: kshiftrw $1, %k0, %k1
920893 ; KNL-NEXT: kmovw %eax, %k2
921894 ; KNL-NEXT: kxorw %k2, %k1, %k1
924897 ; KNL-NEXT: kxorw %k0, %k1, %k0
925898 ; KNL-NEXT: kmovw %k0, %eax
926899 ; KNL-NEXT: ## kill: def %al killed %al killed %eax
900 ; KNL-NEXT: vzeroupper
927901 ; KNL-NEXT: retq
928902 ;
929903 ; SKX-LABEL: test_iinsertelement_v2i1:
951925 define zeroext i8 @test_extractelement_v2i1(<2 x i64> %a, <2 x i64> %b) {
952926 ; KNL-LABEL: test_extractelement_v2i1:
953927 ; KNL: ## %bb.0:
954 ; KNL-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
955 ; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1
956 ; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0
957 ; KNL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
958 ; KNL-NEXT: vpextrb $0, %xmm0, %eax
928 ; KNL-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1
929 ; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0
930 ; KNL-NEXT: vpcmpnleuq %zmm1, %zmm0, %k0
931 ; KNL-NEXT: kmovw %k0, %eax
959932 ; KNL-NEXT: andb $1, %al
960933 ; KNL-NEXT: movb $4, %cl
961934 ; KNL-NEXT: subb %al, %cl
962935 ; KNL-NEXT: movzbl %cl, %eax
936 ; KNL-NEXT: vzeroupper
963937 ; KNL-NEXT: retq
964938 ;
965939 ; SKX-LABEL: test_extractelement_v2i1:
980954 define zeroext i8 @extractelement_v2i1_alt(<2 x i64> %a, <2 x i64> %b) {
981955 ; KNL-LABEL: extractelement_v2i1_alt:
982956 ; KNL: ## %bb.0:
983 ; KNL-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
984 ; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1
985 ; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0
986 ; KNL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
987 ; KNL-NEXT: vpextrb $0, %xmm0, %eax
957 ; KNL-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1
958 ; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0
959 ; KNL-NEXT: vpcmpnleuq %zmm1, %zmm0, %k0
960 ; KNL-NEXT: kmovw %k0, %eax
988961 ; KNL-NEXT: andb $1, %al
989962 ; KNL-NEXT: movb $4, %cl
990963 ; KNL-NEXT: subb %al, %cl
991964 ; KNL-NEXT: movzbl %cl, %eax
965 ; KNL-NEXT: vzeroupper
992966 ; KNL-NEXT: retq
993967 ;
994968 ; SKX-LABEL: extractelement_v2i1_alt:
1010984 define zeroext i8 @test_extractelement_v4i1(<4 x i32> %a, <4 x i32> %b) {
1011985 ; KNL-LABEL: test_extractelement_v4i1:
1012986 ; KNL: ## %bb.0:
1013 ; KNL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
1014 ; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1
1015 ; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0
1016 ; KNL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
1017 ; KNL-NEXT: vpextrd $3, %xmm0, %eax
987 ; KNL-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1
988 ; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0
989 ; KNL-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
990 ; KNL-NEXT: kshiftrw $3, %k0, %k0
991 ; KNL-NEXT: kmovw %k0, %eax
1018992 ; KNL-NEXT: andl $1, %eax
993 ; KNL-NEXT: vzeroupper
1019994 ; KNL-NEXT: retq
1020995 ;
1021996 ; SKX-LABEL: test_extractelement_v4i1:
15491524 ; KNL-LABEL: test_extractelement_varible_v2i1:
15501525 ; KNL: ## %bb.0:
15511526 ; KNL-NEXT: ## kill: def %edi killed %edi def %rdi
1552 ; KNL-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
1553 ; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1
1554 ; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0
1555 ; KNL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
1556 ; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
1527 ; KNL-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1
1528 ; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0
1529 ; KNL-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1
1530 ; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1531 ; KNL-NEXT: vextracti32x4 $0, %zmm0, -{{[0-9]+}}(%rsp)
15571532 ; KNL-NEXT: andl $1, %edi
1558 ; KNL-NEXT: movl -24(%rsp,%rdi,8), %eax
1533 ; KNL-NEXT: movzbl -24(%rsp,%rdi,8), %eax
15591534 ; KNL-NEXT: andl $1, %eax
1535 ; KNL-NEXT: vzeroupper
15601536 ; KNL-NEXT: retq
15611537 ;
15621538 ; SKX-LABEL: test_extractelement_varible_v2i1:
15791555 ; KNL-LABEL: test_extractelement_varible_v4i1:
15801556 ; KNL: ## %bb.0:
15811557 ; KNL-NEXT: ## kill: def %edi killed %edi def %rdi
1582 ; KNL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
1583 ; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1
1584 ; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0
1585 ; KNL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
1586 ; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
1558 ; KNL-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1
1559 ; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0
1560 ; KNL-NEXT: vpcmpnleud %zmm1, %zmm0, %k1
1561 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1562 ; KNL-NEXT: vextracti32x4 $0, %zmm0, -{{[0-9]+}}(%rsp)
15871563 ; KNL-NEXT: andl $3, %edi
1588 ; KNL-NEXT: movl -24(%rsp,%rdi,4), %eax
1564 ; KNL-NEXT: movzbl -24(%rsp,%rdi,4), %eax
15891565 ; KNL-NEXT: andl $1, %eax
1566 ; KNL-NEXT: vzeroupper
15901567 ; KNL-NEXT: retq
15911568 ;
15921569 ; SKX-LABEL: test_extractelement_varible_v4i1:
30033003 define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8 %mask) {
30043004 ; CHECK-LABEL: test_mask_vextractf32x4:
30053005 ; CHECK: ## %bb.0:
3006 ; CHECK-NEXT: vmovd %edi, %xmm2
3007 ; CHECK-NEXT: kmovw %edi, %k0
3008 ; CHECK-NEXT: kshiftrw $3, %k0, %k1
3009 ; CHECK-NEXT: kmovw %k1, %eax
3010 ; CHECK-NEXT: kshiftrw $2, %k0, %k1
3011 ; CHECK-NEXT: kmovw %k1, %ecx
3012 ; CHECK-NEXT: kshiftrw $1, %k0, %k0
3013 ; CHECK-NEXT: kmovw %k0, %edx
3014 ; CHECK-NEXT: vpinsrb $4, %edx, %xmm2, %xmm2
3015 ; CHECK-NEXT: vpinsrb $8, %ecx, %xmm2, %xmm2
3016 ; CHECK-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
3017 ; CHECK-NEXT: vextractf32x4 $2, %zmm1, %xmm1
3018 ; CHECK-NEXT: vpslld $31, %xmm2, %xmm2
3019 ; CHECK-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0
3006 ; CHECK-NEXT: kmovw %edi, %k1
3007 ; CHECK-NEXT: vextractf32x4 $2, %zmm1, %xmm0 {%k1}
30203008 ; CHECK-NEXT: retq
30213009 %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float> %a, i32 2, <4 x float> %b, i8 %mask)
30223010 ret <4 x float> %res
30273015 define <4 x i64> @test_mask_vextracti64x4(<4 x i64> %b, <8 x i64> %a, i8 %mask) {
30283016 ; CHECK-LABEL: test_mask_vextracti64x4:
30293017 ; CHECK: ## %bb.0:
3030 ; CHECK-NEXT: vextractf64x4 $1, %zmm1, %ymm1
3031 ; CHECK-NEXT: vmovd %edi, %xmm2
3032 ; CHECK-NEXT: kmovw %edi, %k0
3033 ; CHECK-NEXT: kshiftrw $3, %k0, %k1
3034 ; CHECK-NEXT: kmovw %k1, %eax
3035 ; CHECK-NEXT: kshiftrw $2, %k0, %k1
3036 ; CHECK-NEXT: kmovw %k1, %ecx
3037 ; CHECK-NEXT: kshiftrw $1, %k0, %k0
3038 ; CHECK-NEXT: kmovw %k0, %edx
3039 ; CHECK-NEXT: vpinsrb $4, %edx, %xmm2, %xmm2
3040 ; CHECK-NEXT: vpinsrb $8, %ecx, %xmm2, %xmm2
3041 ; CHECK-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
3042 ; CHECK-NEXT: vpslld $31, %xmm2, %xmm2
3043 ; CHECK-NEXT: vpmovsxdq %xmm2, %ymm2
3044 ; CHECK-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
3018 ; CHECK-NEXT: kmovw %edi, %k1
3019 ; CHECK-NEXT: vextracti64x4 $1, %zmm1, %ymm0 {%k1}
30453020 ; CHECK-NEXT: retq
30463021 %res = call <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64> %a, i32 1, <4 x i64> %b, i8 %mask)
30473022 ret <4 x i64> %res
30523027 define <4 x i32> @test_maskz_vextracti32x4(<16 x i32> %a, i8 %mask) {
30533028 ; CHECK-LABEL: test_maskz_vextracti32x4:
30543029 ; CHECK: ## %bb.0:
3055 ; CHECK-NEXT: vmovd %edi, %xmm1
3056 ; CHECK-NEXT: kmovw %edi, %k0
3057 ; CHECK-NEXT: kshiftrw $3, %k0, %k1
3058 ; CHECK-NEXT: kmovw %k1, %eax
3059 ; CHECK-NEXT: kshiftrw $2, %k0, %k1
3060 ; CHECK-NEXT: kmovw %k1, %ecx
3061 ; CHECK-NEXT: kshiftrw $1, %k0, %k0
3062 ; CHECK-NEXT: kmovw %k0, %edx
3063 ; CHECK-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
3064 ; CHECK-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
3065 ; CHECK-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
3066 ; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm0
3067 ; CHECK-NEXT: vpslld $31, %xmm1, %xmm1
3068 ; CHECK-NEXT: vpsrad $31, %xmm1, %xmm1
3069 ; CHECK-NEXT: vpand %xmm0, %xmm1, %xmm0
3030 ; CHECK-NEXT: kmovw %edi, %k1
3031 ; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm0 {%k1} {z}
30703032 ; CHECK-NEXT: retq
30713033 %res = call <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32> %a, i32 2, <4 x i32> zeroinitializer, i8 %mask)
30723034 ret <4 x i32> %res
497497 define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1) {
498498 ; KNL-LABEL: test4:
499499 ; KNL: ## %bb.0:
500 ; KNL-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
501 ; KNL-NEXT: vpmovqd %zmm0, %ymm0
502 ; KNL-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm1
503 ; KNL-NEXT: vpmovqd %zmm1, %ymm1
504 ; KNL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
500 ; KNL-NEXT: ## kill: def %ymm3 killed %ymm3 def %zmm3
501 ; KNL-NEXT: ## kill: def %ymm2 killed %ymm2 def %zmm2
502 ; KNL-NEXT: ## kill: def %ymm1 killed %ymm1 def %zmm1
503 ; KNL-NEXT: ## kill: def %ymm0 killed %ymm0 def %zmm0
504 ; KNL-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
505 ; KNL-NEXT: vpcmpgtq %zmm3, %zmm2, %k1
506 ; KNL-NEXT: kandnw %k0, %k1, %k1
507 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
508 ; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0
505509 ; KNL-NEXT: vzeroupper
506510 ; KNL-NEXT: retq
507511 ;
516520 ;
517521 ; AVX512BW-LABEL: test4:
518522 ; AVX512BW: ## %bb.0:
519 ; AVX512BW-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
520 ; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
521 ; AVX512BW-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm1
522 ; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1
523 ; AVX512BW-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
523 ; AVX512BW-NEXT: ## kill: def %ymm3 killed %ymm3 def %zmm3
524 ; AVX512BW-NEXT: ## kill: def %ymm2 killed %ymm2 def %zmm2
525 ; AVX512BW-NEXT: ## kill: def %ymm1 killed %ymm1 def %zmm1
526 ; AVX512BW-NEXT: ## kill: def %ymm0 killed %ymm0 def %zmm0
527 ; AVX512BW-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
528 ; AVX512BW-NEXT: vpcmpgtq %zmm3, %zmm2, %k1
529 ; AVX512BW-NEXT: kandnw %k0, %k1, %k1
530 ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
531 ; AVX512BW-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0
524532 ; AVX512BW-NEXT: vzeroupper
525533 ; AVX512BW-NEXT: retq
526534 ;
527535 ; AVX512DQ-LABEL: test4:
528536 ; AVX512DQ: ## %bb.0:
529 ; AVX512DQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
530 ; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0
531 ; AVX512DQ-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm1
532 ; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1
533 ; AVX512DQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
537 ; AVX512DQ-NEXT: ## kill: def %ymm3 killed %ymm3 def %zmm3
538 ; AVX512DQ-NEXT: ## kill: def %ymm2 killed %ymm2 def %zmm2
539 ; AVX512DQ-NEXT: ## kill: def %ymm1 killed %ymm1 def %zmm1
540 ; AVX512DQ-NEXT: ## kill: def %ymm0 killed %ymm0 def %zmm0
541 ; AVX512DQ-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
542 ; AVX512DQ-NEXT: vpcmpgtq %zmm3, %zmm2, %k1
543 ; AVX512DQ-NEXT: kandnw %k0, %k1, %k0
544 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
545 ; AVX512DQ-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0
534546 ; AVX512DQ-NEXT: vzeroupper
535547 ; AVX512DQ-NEXT: retq
536548 %x_gt_y = icmp sgt <4 x i64> %x, %y
543555 define <2 x i64> @test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1) {
544556 ; KNL-LABEL: test5:
545557 ; KNL: ## %bb.0:
546 ; KNL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
547 ; KNL-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm1
548 ; KNL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
558 ; KNL-NEXT: ## kill: def %xmm3 killed %xmm3 def %zmm3
559 ; KNL-NEXT: ## kill: def %xmm2 killed %xmm2 def %zmm2
560 ; KNL-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1
561 ; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0
562 ; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
563 ; KNL-NEXT: vpcmpgtq %zmm3, %zmm2, %k1
564 ; KNL-NEXT: kandnw %k1, %k0, %k1
565 ; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
566 ; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0
567 ; KNL-NEXT: vzeroupper
549568 ; KNL-NEXT: retq
550569 ;
551570 ; SKX-LABEL: test5:
558577 ;
559578 ; AVX512BW-LABEL: test5:
560579 ; AVX512BW: ## %bb.0:
561 ; AVX512BW-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
562 ; AVX512BW-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm1
563 ; AVX512BW-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
580 ; AVX512BW-NEXT: ## kill: def %xmm3 killed %xmm3 def %zmm3
581 ; AVX512BW-NEXT: ## kill: def %xmm2 killed %xmm2 def %zmm2
582 ; AVX512BW-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1
583 ; AVX512BW-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0
584 ; AVX512BW-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
585 ; AVX512BW-NEXT: vpcmpgtq %zmm3, %zmm2, %k1
586 ; AVX512BW-NEXT: kandnw %k1, %k0, %k1
587 ; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
588 ; AVX512BW-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0
589 ; AVX512BW-NEXT: vzeroupper
564590 ; AVX512BW-NEXT: retq
565591 ;
566592 ; AVX512DQ-LABEL: test5:
567593 ; AVX512DQ: ## %bb.0:
568 ; AVX512DQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
569 ; AVX512DQ-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm1
570 ; AVX512DQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
594 ; AVX512DQ-NEXT: ## kill: def %xmm3 killed %xmm3 def %zmm3
595 ; AVX512DQ-NEXT: ## kill: def %xmm2 killed %xmm2 def %zmm2
596 ; AVX512DQ-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1
597 ; AVX512DQ-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0
598 ; AVX512DQ-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
599 ; AVX512DQ-NEXT: vpcmpgtq %zmm3, %zmm2, %k1
600 ; AVX512DQ-NEXT: kandnw %k1, %k0, %k0
601 ; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0
602 ; AVX512DQ-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0
603 ; AVX512DQ-NEXT: vzeroupper
571604 ; AVX512DQ-NEXT: retq
572605 %x_gt_y = icmp slt <2 x i64> %x, %y
573606 %x1_gt_y1 = icmp sgt <2 x i64> %x1, %y1
794827 ; KNL-LABEL: test11:
795828 ; KNL: ## %bb.0:
796829 ; KNL-NEXT: cmpl %esi, %edi
797 ; KNL-NEXT: jg LBB20_2
798 ; KNL-NEXT: ## %bb.1:
799 ; KNL-NEXT: vmovaps %xmm1, %xmm0
800 ; KNL-NEXT: LBB20_2:
830 ; KNL-NEXT: jg LBB20_1
831 ; KNL-NEXT: ## %bb.2:
832 ; KNL-NEXT: vpslld $31, %xmm1, %xmm0
833 ; KNL-NEXT: jmp LBB20_3
834 ; KNL-NEXT: LBB20_1:
835 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
836 ; KNL-NEXT: LBB20_3:
837 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
838 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
839 ; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0
840 ; KNL-NEXT: vzeroupper
801841 ; KNL-NEXT: retq
802842 ;
803843 ; SKX-LABEL: test11:
817857 ; AVX512BW-LABEL: test11:
818858 ; AVX512BW: ## %bb.0:
819859 ; AVX512BW-NEXT: cmpl %esi, %edi
820 ; AVX512BW-NEXT: jg LBB20_2
821 ; AVX512BW-NEXT: ## %bb.1:
822 ; AVX512BW-NEXT: vmovaps %xmm1, %xmm0
823 ; AVX512BW-NEXT: LBB20_2:
860 ; AVX512BW-NEXT: jg LBB20_1
861 ; AVX512BW-NEXT: ## %bb.2:
862 ; AVX512BW-NEXT: vpslld $31, %xmm1, %xmm0
863 ; AVX512BW-NEXT: jmp LBB20_3
864 ; AVX512BW-NEXT: LBB20_1:
865 ; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0
866 ; AVX512BW-NEXT: LBB20_3:
867 ; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k1
868 ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
869 ; AVX512BW-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0
870 ; AVX512BW-NEXT: vzeroupper
824871 ; AVX512BW-NEXT: retq
825872 ;
826873 ; AVX512DQ-LABEL: test11:
827874 ; AVX512DQ: ## %bb.0:
828875 ; AVX512DQ-NEXT: cmpl %esi, %edi
829 ; AVX512DQ-NEXT: jg LBB20_2
830 ; AVX512DQ-NEXT: ## %bb.1:
831 ; AVX512DQ-NEXT: vmovaps %xmm1, %xmm0
832 ; AVX512DQ-NEXT: LBB20_2:
876 ; AVX512DQ-NEXT: jg LBB20_1
877 ; AVX512DQ-NEXT: ## %bb.2:
878 ; AVX512DQ-NEXT: vpslld $31, %xmm1, %xmm0
879 ; AVX512DQ-NEXT: jmp LBB20_3
880 ; AVX512DQ-NEXT: LBB20_1:
881 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
882 ; AVX512DQ-NEXT: LBB20_3:
883 ; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0
884 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
885 ; AVX512DQ-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0
886 ; AVX512DQ-NEXT: vzeroupper
833887 ; AVX512DQ-NEXT: retq
834888 %mask = icmp sgt i32 %a1, %b1
835889 %c = select i1 %mask, <4 x i1>%a, <4 x i1>%b
12701324 define void @test22(<4 x i1> %a, <4 x i1>* %addr) {
12711325 ; KNL-LABEL: test22:
12721326 ; KNL: ## %bb.0:
1273 ; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %ymm0
1274 ; KNL-NEXT: vpslld $31, %ymm0, %ymm0
1327 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
12751328 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
12761329 ; KNL-NEXT: kmovw %k0, %eax
12771330 ; KNL-NEXT: movb %al, (%rdi)
12871340 ;
12881341 ; AVX512BW-LABEL: test22:
12891342 ; AVX512BW: ## %bb.0:
1290 ; AVX512BW-NEXT: ## kill: def %xmm0 killed %xmm0 def %ymm0
1291 ; AVX512BW-NEXT: vpslld $31, %ymm0, %ymm0
1343 ; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0
12921344 ; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k0
12931345 ; AVX512BW-NEXT: kmovd %k0, %eax
12941346 ; AVX512BW-NEXT: movb %al, (%rdi)
12971349 ;
12981350 ; AVX512DQ-LABEL: test22:
12991351 ; AVX512DQ: ## %bb.0:
1300 ; AVX512DQ-NEXT: ## kill: def %xmm0 killed %xmm0 def %ymm0
1301 ; AVX512DQ-NEXT: vpslld $31, %ymm0, %ymm0
1352 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
13021353 ; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0
13031354 ; AVX512DQ-NEXT: kmovb %k0, (%rdi)
13041355 ; AVX512DQ-NEXT: vzeroupper
13101361 define void @test23(<2 x i1> %a, <2 x i1>* %addr) {
13111362 ; KNL-LABEL: test23:
13121363 ; KNL: ## %bb.0:
1313 ; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0
1314 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
1364 ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0
13151365 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
13161366 ; KNL-NEXT: kmovw %k0, %eax
13171367 ; KNL-NEXT: movb %al, (%rdi)
13271377 ;
13281378 ; AVX512BW-LABEL: test23:
13291379 ; AVX512BW: ## %bb.0:
1330 ; AVX512BW-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0
1331 ; AVX512BW-NEXT: vpsllq $63, %zmm0, %zmm0
1380 ; AVX512BW-NEXT: vpsllq $63, %xmm0, %xmm0
13321381 ; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0
13331382 ; AVX512BW-NEXT: kmovd %k0, %eax
13341383 ; AVX512BW-NEXT: movb %al, (%rdi)
13371386 ;
13381387 ; AVX512DQ-LABEL: test23:
13391388 ; AVX512DQ: ## %bb.0:
1340 ; AVX512DQ-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0
1341 ; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0
1389 ; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0
13421390 ; AVX512DQ-NEXT: vptestmq %zmm0, %zmm0, %k0
13431391 ; AVX512DQ-NEXT: kmovb %k0, (%rdi)
13441392 ; AVX512DQ-NEXT: vzeroupper
13891437 define void @store_v2i1(<2 x i1> %c , <2 x i1>* %ptr) {
13901438 ; KNL-LABEL: store_v2i1:
13911439 ; KNL: ## %bb.0:
1392 ; KNL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
1393 ; KNL-NEXT: vpxor %xmm1, %xmm0, %xmm0
1394 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
1440 ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0
13951441 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
1442 ; KNL-NEXT: knotw %k0, %k0
13961443 ; KNL-NEXT: kmovw %k0, %eax
13971444 ; KNL-NEXT: movb %al, (%rdi)
13981445 ; KNL-NEXT: vzeroupper
14081455 ;
14091456 ; AVX512BW-LABEL: store_v2i1:
14101457 ; AVX512BW: ## %bb.0:
1411 ; AVX512BW-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
1412 ; AVX512BW-NEXT: vpxor %xmm1, %xmm0, %xmm0
1413 ; AVX512BW-NEXT: vpsllq $63, %zmm0, %zmm0
1458 ; AVX512BW-NEXT: vpsllq $63, %xmm0, %xmm0
14141459 ; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0
1460 ; AVX512BW-NEXT: knotw %k0, %k0
14151461 ; AVX512BW-NEXT: kmovd %k0, %eax
14161462 ; AVX512BW-NEXT: movb %al, (%rdi)
14171463 ; AVX512BW-NEXT: vzeroupper
14191465 ;
14201466 ; AVX512DQ-LABEL: store_v2i1:
14211467 ; AVX512DQ: ## %bb.0:
1422 ; AVX512DQ-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
1423 ; AVX512DQ-NEXT: vpxor %xmm1, %xmm0, %xmm0
1424 ; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0
1468 ; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0
14251469 ; AVX512DQ-NEXT: vptestmq %zmm0, %zmm0, %k0
1470 ; AVX512DQ-NEXT: knotw %k0, %k0
14261471 ; AVX512DQ-NEXT: kmovb %k0, (%rdi)
14271472 ; AVX512DQ-NEXT: vzeroupper
14281473 ; AVX512DQ-NEXT: retq
14341479 define void @store_v4i1(<4 x i1> %c , <4 x i1>* %ptr) {
14351480 ; KNL-LABEL: store_v4i1:
14361481 ; KNL: ## %bb.0:
1437 ; KNL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
1438 ; KNL-NEXT: vpxor %xmm1, %xmm0, %xmm0
1439 ; KNL-NEXT: vpslld $31, %ymm0, %ymm0
1482 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
14401483 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
1484 ; KNL-NEXT: knotw %k0, %k0
14411485 ; KNL-NEXT: kmovw %k0, %eax
14421486 ; KNL-NEXT: movb %al, (%rdi)
14431487 ; KNL-NEXT: vzeroupper
14531497 ;
14541498 ; AVX512BW-LABEL: store_v4i1:
14551499 ; AVX512BW: ## %bb.0:
1456 ; AVX512BW-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
1457 ; AVX512BW-NEXT: vpxor %xmm1, %xmm0, %xmm0
1458 ; AVX512BW-NEXT: vpslld $31, %ymm0, %ymm0
1500 ; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0
14591501 ; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k0
1502 ; AVX512BW-NEXT: knotw %k0, %k0
14601503 ; AVX512BW-NEXT: kmovd %k0, %eax
14611504 ; AVX512BW-NEXT: movb %al, (%rdi)
14621505 ; AVX512BW-NEXT: vzeroupper
14641507 ;
14651508 ; AVX512DQ-LABEL: store_v4i1:
14661509 ; AVX512DQ: ## %bb.0:
1467 ; AVX512DQ-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
1468 ; AVX512DQ-NEXT: vpxor %xmm1, %xmm0, %xmm0
1469 ; AVX512DQ-NEXT: vpslld $31, %ymm0, %ymm0
1510 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
14701511 ; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0
1512 ; AVX512DQ-NEXT: knotw %k0, %k0
14711513 ; AVX512DQ-NEXT: kmovb %k0, (%rdi)
14721514 ; AVX512DQ-NEXT: vzeroupper
14731515 ; AVX512DQ-NEXT: retq
7171 define <4 x float> @test7(<4 x float> %a, <4 x float> %b) {
7272 ; KNL-LABEL: test7:
7373 ; KNL: ## %bb.0:
74 ; KNL-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1
75 ; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0
7476 ; KNL-NEXT: vxorps %xmm2, %xmm2, %xmm2
75 ; KNL-NEXT: vcmpltps %xmm2, %xmm0, %xmm2
76 ; KNL-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
77 ; KNL-NEXT: vcmpltps %zmm2, %zmm0, %k1
78 ; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
79 ; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0
80 ; KNL-NEXT: vzeroupper
7781 ; KNL-NEXT: retq
7882 ;
7983 ; SKX-LABEL: test7:
9195 define <2 x double> @test8(<2 x double> %a, <2 x double> %b) {
9296 ; KNL-LABEL: test8:
9397 ; KNL: ## %bb.0:
98 ; KNL-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1
99 ; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0
94100 ; KNL-NEXT: vxorpd %xmm2, %xmm2, %xmm2
95 ; KNL-NEXT: vcmpltpd %xmm2, %xmm0, %xmm2
96 ; KNL-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
101 ; KNL-NEXT: vcmpltpd %zmm2, %zmm0, %k1
102 ; KNL-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
103 ; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0
104 ; KNL-NEXT: vzeroupper
97105 ; KNL-NEXT: retq
98106 ;
99107 ; SKX-LABEL: test8:
536544 define <4 x double> @test30(<4 x double> %x, <4 x double> %y) nounwind {
537545 ; KNL-LABEL: test30:
538546 ; KNL: ## %bb.0:
539 ; KNL-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm2
540 ; KNL-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
547 ; KNL-NEXT: ## kill: def %ymm1 killed %ymm1 def %zmm1
548 ; KNL-NEXT: ## kill: def %ymm0 killed %ymm0 def %zmm0
549 ; KNL-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
550 ; KNL-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
551 ; KNL-NEXT: ## kill: def %ymm0 killed %ymm0 killed %zmm0
541552 ; KNL-NEXT: retq
542553 ;
543554 ; SKX-LABEL: test30:
554565 define <2 x double> @test31(<2 x double> %x, <2 x double> %x1, <2 x double>* %yp) nounwind {
555566 ; KNL-LABEL: test31:
556567 ; KNL: ## %bb.0:
557 ; KNL-NEXT: vcmpltpd (%rdi), %xmm0, %xmm2
558 ; KNL-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
568 ; KNL-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1
569 ; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0
570 ; KNL-NEXT: vmovupd (%rdi), %xmm2
571 ; KNL-NEXT: vcmpltpd %zmm2, %zmm0, %k1
572 ; KNL-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
573 ; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0
574 ; KNL-NEXT: vzeroupper
559575 ; KNL-NEXT: retq
560576 ;
561577 ; SKX-LABEL: test31:
573589 define <4 x double> @test32(<4 x double> %x, <4 x double> %x1, <4 x double>* %yp) nounwind {
574590 ; KNL-LABEL: test32:
575591 ; KNL: ## %bb.0:
576 ; KNL-NEXT: vcmpltpd (%rdi), %ymm0, %ymm2
577 ; KNL-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
592 ; KNL-NEXT: ## kill: def %ymm1 killed %ymm1 def %zmm1
593 ; KNL-NEXT: ## kill: def %ymm0 killed %ymm0 def %zmm0
594 ; KNL-NEXT: vmovupd (%rdi), %ymm2
595 ; KNL-NEXT: vcmpltpd %zmm2, %zmm0, %k1
596 ; KNL-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
597 ; KNL-NEXT: ## kill: def %ymm0 killed %ymm0 killed %zmm0
578598 ; KNL-NEXT: retq
579599 ;
580600 ; SKX-LABEL: test32:
604624 define <4 x float> @test34(<4 x float> %x, <4 x float> %x1, <4 x float>* %yp) nounwind {
605625 ; KNL-LABEL: test34:
606626 ; KNL: ## %bb.0:
607 ; KNL-NEXT: vcmpltps (%rdi), %xmm0, %xmm2
608 ; KNL-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
627 ; KNL-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1
628 ; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0
629 ; KNL-NEXT: vmovups (%rdi), %xmm2
630 ; KNL-NEXT: vcmpltps %zmm2, %zmm0, %k1
631 ; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
632 ; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0
633 ; KNL-NEXT: vzeroupper
609634 ; KNL-NEXT: retq
610635 ;
611636 ; SKX-LABEL: test34:
673698 define <4 x double> @test38(<4 x double> %x, <4 x double> %x1, double* %ptr) nounwind {
674699 ; KNL-LABEL: test38:
675700 ; KNL: ## %bb.0:
701 ; KNL-NEXT: ## kill: def %ymm1 killed %ymm1 def %zmm1
702 ; KNL-NEXT: ## kill: def %ymm0 killed %ymm0 def %zmm0
676703 ; KNL-NEXT: vbroadcastsd (%rdi), %ymm2
677 ; KNL-NEXT: vcmpltpd %ymm2, %ymm0, %ymm2
678 ; KNL-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
704 ; KNL-NEXT: vcmpltpd %zmm2, %zmm0, %k1
705 ; KNL-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
706 ; KNL-NEXT: ## kill: def %ymm0 killed %ymm0 killed %zmm0
679707 ; KNL-NEXT: retq
680708 ;
681709 ; SKX-LABEL: test38:
696724 define <2 x double> @test39(<2 x double> %x, <2 x double> %x1, double* %ptr) nounwind {
697725 ; KNL-LABEL: test39:
698726 ; KNL: ## %bb.0:
727 ; KNL-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1
728 ; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0
699729 ; KNL-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0]
700 ; KNL-NEXT: vcmpltpd %xmm2, %xmm0, %xmm2
701 ; KNL-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
730 ; KNL-NEXT: vcmpltpd %zmm2, %zmm0, %k1
731 ; KNL-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
732 ; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0
733 ; KNL-NEXT: vzeroupper
702734 ; KNL-NEXT: retq
703735 ;
704736 ; SKX-LABEL: test39:
762794 define <4 x float> @test42(<4 x float> %x, <4 x float> %x1, float* %ptr) nounwind {
763795 ; KNL-LABEL: test42:
764796 ; KNL: ## %bb.0:
797 ; KNL-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1
798 ; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0
765799 ; KNL-NEXT: vbroadcastss (%rdi), %xmm2
766 ; KNL-NEXT: vcmpltps %xmm2, %xmm0, %xmm2
767 ; KNL-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
800 ; KNL-NEXT: vcmpltps %zmm2, %zmm0, %k1
801 ; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
802 ; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0
803 ; KNL-NEXT: vzeroupper
768804 ; KNL-NEXT: retq
769805 ;
770806 ; SKX-LABEL: test42:
55 define <2 x double>@test_int_x86_avx512_mask_vextractf64x2_512(<8 x double> %x0, <2 x double> %x2, i8 %x3) {
66 ; CHECK-LABEL: test_int_x86_avx512_mask_vextractf64x2_512:
77 ; CHECK: ## %bb.0:
8 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
9 ; CHECK-NEXT: vmovd %edi, %xmm2
10 ; CHECK-NEXT: kmovw %edi, %k0
11 ; CHECK-NEXT: kshiftrb $1, %k0, %k0
12 ; CHECK-NEXT: kmovw %k0, %eax
13 ; CHECK-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
14 ; CHECK-NEXT: vpsllq $63, %xmm2, %xmm2
15 ; CHECK-NEXT: vpsraq $63, %zmm2, %zmm2
16 ; CHECK-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1
17 ; CHECK-NEXT: vandpd %xmm0, %xmm2, %xmm2
18 ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
19 ; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0
8 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm2
9 ; CHECK-NEXT: kmovw %edi, %k1
10 ; CHECK-NEXT: vextractf64x2 $1, %zmm0, %xmm1 {%k1}
11 ; CHECK-NEXT: vextractf64x2 $1, %zmm0, %xmm0 {%k1} {z}
12 ; CHECK-NEXT: vaddpd %xmm2, %xmm1, %xmm1
13 ; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0
2014 ; CHECK-NEXT: retq
2115 %res = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double> %x0,i32 1, <2 x double> %x2, i8 %x3)
2216 %res2 = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double> %x0,i32 1, <2 x double> zeroinitializer, i8 %x3)
1010 ;
1111 ; NoVLX-LABEL: test256_1:
1212 ; NoVLX: # %bb.0:
13 ; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm2
14 ; NoVLX-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
13 ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
14 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
15 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k1
16 ; NoVLX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
17 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0
1518 ; NoVLX-NEXT: retq
1619 %mask = icmp eq <4 x i64> %x, %y
1720 %max = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> %y
2730 ;
2831 ; NoVLX-LABEL: test256_2:
2932 ; NoVLX: # %bb.0:
30 ; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
31 ; NoVLX-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
33 ; NoVLX-NEXT: # kill: def %ymm2 killed %ymm2 def %zmm2
34 ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
35 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
36 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k1
37 ; NoVLX-NEXT: vpblendmq %zmm2, %zmm1, %zmm0 {%k1}
38 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0
3239 ; NoVLX-NEXT: retq
3340 %mask = icmp sgt <4 x i64> %x, %y
3441 %max = select <4 x i1> %mask, <4 x i64> %x1, <4 x i64> %y
6572 ;
6673 ; NoVLX-LABEL: test256_4:
6774 ; NoVLX: # %bb.0:
68 ; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm3 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
69 ; NoVLX-NEXT: vpxor %ymm3, %ymm1, %ymm4
70 ; NoVLX-NEXT: vpxor %ymm3, %ymm0, %ymm0
71 ; NoVLX-NEXT: vpcmpgtq %ymm4, %ymm0, %ymm0
72 ; NoVLX-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
75 ; NoVLX-NEXT: # kill: def %ymm2 killed %ymm2 def %zmm2
76 ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
77 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
78 ; NoVLX-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1
79 ; NoVLX-NEXT: vpblendmq %zmm2, %zmm1, %zmm0 {%k1}
80 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0
7381 ; NoVLX-NEXT: retq
7482 %mask = icmp ugt <4 x i64> %x, %y
7583 %max = select <4 x i1> %mask, <4 x i64> %x1, <4 x i64> %y
288296 ;
289297 ; NoVLX-LABEL: test256_10:
290298 ; NoVLX: # %bb.0:
291 ; NoVLX-NEXT: vpcmpgtq %ymm2, %ymm3, %ymm3
292 ; NoVLX-NEXT: vpcmpeqd %ymm4, %ymm4, %ymm4
293 ; NoVLX-NEXT: vpxor %ymm4, %ymm3, %ymm3
294 ; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm1
295 ; NoVLX-NEXT: vpandn %ymm3, %ymm1, %ymm1
296 ; NoVLX-NEXT: vblendvpd %ymm1, %ymm0, %ymm2, %ymm0
299 ; NoVLX-NEXT: # kill: def %ymm3 killed %ymm3 def %zmm3
300 ; NoVLX-NEXT: # kill: def %ymm2 killed %ymm2 def %zmm2
301 ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
302 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
303 ; NoVLX-NEXT: vpcmpleq %zmm1, %zmm0, %k1
304 ; NoVLX-NEXT: vpcmpleq %zmm2, %zmm3, %k1 {%k1}
305 ; NoVLX-NEXT: vpblendmq %zmm0, %zmm2, %zmm0 {%k1}
306 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0
297307 ; NoVLX-NEXT: retq
298308 %mask1 = icmp sge <4 x i64> %x1, %y1
299309 %mask0 = icmp sle <4 x i64> %x, %y
312322 ;
313323 ; NoVLX-LABEL: test256_11:
314324 ; NoVLX: # %bb.0:
315 ; NoVLX-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm3
316 ; NoVLX-NEXT: vpcmpgtq %ymm2, %ymm1, %ymm2
317 ; NoVLX-NEXT: vpand %ymm2, %ymm3, %ymm2
318 ; NoVLX-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
325 ; NoVLX-NEXT: # kill: def %ymm2 killed %ymm2 def %zmm2
326 ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
327 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
328 ; NoVLX-NEXT: vmovdqu (%rdi), %ymm3
329 ; NoVLX-NEXT: vpcmpgtq %zmm3, %zmm0, %k1
330 ; NoVLX-NEXT: vpcmpgtq %zmm2, %zmm1, %k1 {%k1}
331 ; NoVLX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
332 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0
319333 ; NoVLX-NEXT: retq
320334 %mask1 = icmp sgt <4 x i64> %x1, %y1
321335 %y = load <4 x i64>, <4 x i64>* %y.ptr, align 4
361375 ;
362376 ; NoVLX-LABEL: test256_13:
363377 ; NoVLX: # %bb.0:
378 ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
379 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
364380 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm2
365 ; NoVLX-NEXT: vpcmpeqq %ymm2, %ymm0, %ymm2
366 ; NoVLX-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
381 ; NoVLX-NEXT: vpcmpeqq %zmm2, %zmm0, %k1
382 ; NoVLX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
383 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0
367384 ; NoVLX-NEXT: retq
368385 %yb = load i64, i64* %yb.ptr, align 4
369386 %y.0 = insertelement <4 x i64> undef, i64 %yb, i32 0
436453 ;
437454 ; NoVLX-LABEL: test256_16:
438455 ; NoVLX: # %bb.0:
439 ; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm2
456 ; NoVLX-NEXT: # kill: def %ymm2 killed %ymm2 def %zmm2
457 ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
458 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
440459 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm3
441 ; NoVLX-NEXT: vpcmpgtq %ymm3, %ymm0, %ymm3
442 ; NoVLX-NEXT: vpandn %ymm3, %ymm2, %ymm2
443 ; NoVLX-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
460 ; NoVLX-NEXT: vpcmpgtq %zmm3, %zmm0, %k1
461 ; NoVLX-NEXT: vpcmpleq %zmm1, %zmm2, %k1 {%k1}
462 ; NoVLX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
463 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0
444464 ; NoVLX-NEXT: retq
445465 %mask1 = icmp sge <4 x i64> %x1, %y1
446466 %yb = load i64, i64* %yb.ptr, align 4
549569 ;
550570 ; NoVLX-LABEL: test128_1:
551571 ; NoVLX: # %bb.0:
552 ; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm2
553 ; NoVLX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
572 ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
573 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
574 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k1
575 ; NoVLX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
576 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
554577 ; NoVLX-NEXT: retq
555578 %mask = icmp eq <2 x i64> %x, %y
556579 %max = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> %y
566589 ;
567590 ; NoVLX-LABEL: test128_2:
568591 ; NoVLX: # %bb.0:
569 ; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
570 ; NoVLX-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
592 ; NoVLX-NEXT: # kill: def %xmm2 killed %xmm2 def %zmm2
593 ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
594 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
595 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k1
596 ; NoVLX-NEXT: vpblendmq %zmm2, %zmm1, %zmm0 {%k1}
597 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
571598 ; NoVLX-NEXT: retq
572599 %mask = icmp sgt <2 x i64> %x, %y
573600 %max = select <2 x i1> %mask, <2 x i64> %x1, <2 x i64> %y
583610 ;
584611 ; NoVLX-LABEL: test128_3:
585612 ; NoVLX: # %bb.0:
586 ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
587 ; NoVLX-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
588 ; NoVLX-NEXT: vpxor %xmm3, %xmm0, %xmm0
589 ; NoVLX-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
613 ; NoVLX-NEXT: # kill: def %xmm2 killed %xmm2 def %zmm2
614 ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
615 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
616 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k1
617 ; NoVLX-NEXT: vpblendmd %zmm2, %zmm1, %zmm0 {%k1}
618 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
590619 ; NoVLX-NEXT: retq
591620 %mask = icmp sge <4 x i32> %x, %y
592621 %max = select <4 x i1> %mask, <4 x i32> %x1, <4 x i32> %y
602631 ;
603632 ; NoVLX-LABEL: test128_4:
604633 ; NoVLX: # %bb.0:
605 ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
606 ; NoVLX-NEXT: vpxor %xmm3, %xmm1, %xmm4
607 ; NoVLX-NEXT: vpxor %xmm3, %xmm0, %xmm0
608 ; NoVLX-NEXT: vpcmpgtq %xmm4, %xmm0, %xmm0
609 ; NoVLX-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
634 ; NoVLX-NEXT: # kill: def %xmm2 killed %xmm2 def %zmm2
635 ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
636 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
637 ; NoVLX-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1
638 ; NoVLX-NEXT: vpblendmq %zmm2, %zmm1, %zmm0 {%k1}
639 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
610640 ; NoVLX-NEXT: retq
611641 %mask = icmp ugt <2 x i64> %x, %y
612642 %max = select <2 x i1> %mask, <2 x i64> %x1, <2 x i64> %y
622652 ;
623653 ; NoVLX-LABEL: test128_5:
624654 ; NoVLX: # %bb.0:
625 ; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm2
626 ; NoVLX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
655 ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
656 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
657 ; NoVLX-NEXT: vmovdqu (%rdi), %xmm2
658 ; NoVLX-NEXT: vpcmpeqd %zmm2, %zmm0, %k1
659 ; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
660 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
627661 ; NoVLX-NEXT: retq
628662 %y = load <4 x i32>, <4 x i32>* %yp, align 4
629663 %mask = icmp eq <4 x i32> %x, %y
640674 ;
641675 ; NoVLX-LABEL: test128_5b:
642676 ; NoVLX: # %bb.0:
643 ; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm2
644 ; NoVLX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
677 ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
678 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
679 ; NoVLX-NEXT: vmovdqu (%rdi), %xmm2
680 ; NoVLX-NEXT: vpcmpeqd %zmm0, %zmm2, %k1
681 ; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
682 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
645683 ; NoVLX-NEXT: retq
646684 %y = load <4 x i32>, <4 x i32>* %yp, align 4
647685 %mask = icmp eq <4 x i32> %y, %x
658696 ;
659697 ; NoVLX-LABEL: test128_6:
660698 ; NoVLX: # %bb.0:
661 ; NoVLX-NEXT: vpcmpgtd (%rdi), %xmm0, %xmm2
662 ; NoVLX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
699 ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
700 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
701 ; NoVLX-NEXT: vmovdqu (%rdi), %xmm2
702 ; NoVLX-NEXT: vpcmpgtd %zmm2, %zmm0, %k1
703 ; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
704 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
663705 ; NoVLX-NEXT: retq
664706 %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4
665707 %mask = icmp sgt <4 x i32> %x, %y
676718 ;
677719 ; NoVLX-LABEL: test128_6b:
678720 ; NoVLX: # %bb.0:
679 ; NoVLX-NEXT: vpcmpgtd (%rdi), %xmm0, %xmm2
680 ; NoVLX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
721 ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
722 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
723 ; NoVLX-NEXT: vmovdqu (%rdi), %xmm2
724 ; NoVLX-NEXT: vpcmpgtd %zmm2, %zmm0, %k1
725 ; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
726 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
681727 ; NoVLX-NEXT: retq
682728 %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4
683729 %mask = icmp slt <4 x i32> %y, %x
694740 ;
695741 ; NoVLX-LABEL: test128_7:
696742 ; NoVLX: # %bb.0:
697 ; NoVLX-NEXT: vpcmpgtd (%rdi), %xmm0, %xmm2
698 ; NoVLX-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
699 ; NoVLX-NEXT: vpxor %xmm3, %xmm2, %xmm2
700 ; NoVLX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
743 ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
744 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
745 ; NoVLX-NEXT: vmovdqu (%rdi), %xmm2
746 ; NoVLX-NEXT: vpcmpled %zmm2, %zmm0, %k1
747 ; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
748 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
701749 ; NoVLX-NEXT: retq
702750 %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4
703751 %mask = icmp sle <4 x i32> %x, %y
714762 ;
715763 ; NoVLX-LABEL: test128_7b:
716764 ; NoVLX: # %bb.0:
717 ; NoVLX-NEXT: vpcmpgtd (%rdi), %xmm0, %xmm2
718 ; NoVLX-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
719 ; NoVLX-NEXT: vpxor %xmm3, %xmm2, %xmm2
720 ; NoVLX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
765 ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
766 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
767 ; NoVLX-NEXT: vmovdqu (%rdi), %xmm2
768 ; NoVLX-NEXT: vpcmpled %zmm2, %zmm0, %k1
769 ; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
770 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
721771 ; NoVLX-NEXT: retq
722772 %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4
723773 %mask = icmp sge <4 x i32> %y, %x
734784 ;
735785 ; NoVLX-LABEL: test128_8:
736786 ; NoVLX: # %bb.0:
737 ; NoVLX-NEXT: vpminud (%rdi), %xmm0, %xmm2
738 ; NoVLX-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm2
739 ; NoVLX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
787 ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
788 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
789 ; NoVLX-NEXT: vmovdqu (%rdi), %xmm2
790 ; NoVLX-NEXT: vpcmpleud %zmm2, %zmm0, %k1
791 ; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
792 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
740793 ; NoVLX-NEXT: retq
741794 %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4
742795 %mask = icmp ule <4 x i32> %x, %y
753806 ;
754807 ; NoVLX-LABEL: test128_8b:
755808 ; NoVLX: # %bb.0:
809 ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
810 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
756811 ; NoVLX-NEXT: vmovdqu (%rdi), %xmm2
757 ; NoVLX-NEXT: vpmaxud %xmm0, %xmm2, %xmm3
758 ; NoVLX-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
759 ; NoVLX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
812 ; NoVLX-NEXT: vpcmpnltud %zmm0, %zmm2, %k1
813 ; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
814 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
760815 ; NoVLX-NEXT: retq
761816 %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4
762817 %mask = icmp uge <4 x i32> %y, %x
774829 ;
775830 ; NoVLX-LABEL: test128_9:
776831 ; NoVLX: # %bb.0:
777 ; NoVLX-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
778 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm3
779 ; NoVLX-NEXT: vpand %xmm2, %xmm3, %xmm2
780 ; NoVLX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
832 ; NoVLX-NEXT: # kill: def %xmm3 killed %xmm3 def %zmm3
833 ; NoVLX-NEXT: # kill: def %xmm2 killed %xmm2 def %zmm2
834 ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
835 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
836 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k1
837 ; NoVLX-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 {%k1}
838 ; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
839 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
781840 ; NoVLX-NEXT: retq
782841 %mask1 = icmp eq <4 x i32> %x1, %y1
783842 %mask0 = icmp eq <4 x i32> %x, %y
796855 ;
797856 ; NoVLX-LABEL: test128_10:
798857 ; NoVLX: # %bb.0:
799 ; NoVLX-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm3
800 ; NoVLX-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
801 ; NoVLX-NEXT: vpxor %xmm4, %xmm3, %xmm3
802 ; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm1
803 ; NoVLX-NEXT: vpandn %xmm3, %xmm1, %xmm1
804 ; NoVLX-NEXT: vblendvpd %xmm1, %xmm0, %xmm2, %xmm0
858 ; NoVLX-NEXT: # kill: def %xmm3 killed %xmm3 def %zmm3
859 ; NoVLX-NEXT: # kill: def %xmm2 killed %xmm2 def %zmm2
860 ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
861 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
862 ; NoVLX-NEXT: vpcmpleq %zmm1, %zmm0, %k1
863 ; NoVLX-NEXT: vpcmpleq %zmm2, %zmm3, %k1 {%k1}
864 ; NoVLX-NEXT: vpblendmq %zmm0, %zmm2, %zmm0 {%k1}
865 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
805866 ; NoVLX-NEXT: retq
806867 %mask1 = icmp sge <2 x i64> %x1, %y1
807868 %mask0 = icmp sle <2 x i64> %x, %y
820881 ;
821882 ; NoVLX-LABEL: test128_11:
822883 ; NoVLX: # %bb.0:
823 ; NoVLX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm3
824 ; NoVLX-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm2
825 ; NoVLX-NEXT: vpand %xmm2, %xmm3, %xmm2
826 ; NoVLX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
884 ; NoVLX-NEXT: # kill: def %xmm2 killed %xmm2 def %zmm2
885 ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
886 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
887 ; NoVLX-NEXT: vmovdqu (%rdi), %xmm3
888 ; NoVLX-NEXT: vpcmpgtq %zmm3, %zmm0, %k1
889 ; NoVLX-NEXT: vpcmpgtq %zmm2, %zmm1, %k1 {%k1}
890 ; NoVLX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
891 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
827892 ; NoVLX-NEXT: retq
828893 %mask1 = icmp sgt <2 x i64> %x1, %y1
829894 %y = load <2 x i64>, <2 x i64>* %y.ptr, align 4
843908 ;
844909 ; NoVLX-LABEL: test128_12:
845910 ; NoVLX: # %bb.0:
846 ; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm2
847 ; NoVLX-NEXT: vpminud (%rdi), %xmm0, %xmm3
848 ; NoVLX-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm3
849 ; NoVLX-NEXT: vpandn %xmm3, %xmm2, %xmm2
850 ; NoVLX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
911 ; NoVLX-NEXT: # kill: def %xmm2 killed %xmm2 def %zmm2
912 ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
913 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
914 ; NoVLX-NEXT: vmovdqu (%rdi), %xmm3
915 ; NoVLX-NEXT: vpcmpleud %zmm3, %zmm0, %k1
916 ; NoVLX-NEXT: vpcmpled %zmm1, %zmm2, %k1 {%k1}
917 ; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
918 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
851919 ; NoVLX-NEXT: retq
852920 %mask1 = icmp sge <4 x i32> %x1, %y1
853921 %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4
866934 ;
867935 ; NoVLX-LABEL: test128_13:
868936 ; NoVLX: # %bb.0:
937 ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
938 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
869939 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm2
870 ; NoVLX-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm2
871 ; NoVLX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
940 ; NoVLX-NEXT: vpcmpeqq %zmm2, %zmm0, %k1
941 ; NoVLX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
942 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
872943 ; NoVLX-NEXT: retq
873944 %yb = load i64, i64* %yb.ptr, align 4
874945 %y.0 = insertelement <2 x i64> undef, i64 %yb, i32 0
887958 ;
888959 ; NoVLX-LABEL: test128_14:
889960 ; NoVLX: # %bb.0:
961 ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
962 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
890963 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm2
891 ; NoVLX-NEXT: vpcmpgtd %xmm2, %xmm0, %xmm2
892 ; NoVLX-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
893 ; NoVLX-NEXT: vpxor %xmm3, %xmm2, %xmm2
894 ; NoVLX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
964 ; NoVLX-NEXT: vpcmpled %zmm2, %zmm0, %k1
965 ; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
966 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
895967 ; NoVLX-NEXT: retq
896968 %yb = load i32, i32* %yb.ptr, align 4
897969 %y.0 = insertelement <4 x i32> undef, i32 %yb, i32 0
911983 ;
912984 ; NoVLX-LABEL: test128_15:
913985 ; NoVLX: # %bb.0:
914 ; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm2
986 ; NoVLX-NEXT: # kill: def %xmm2 killed %xmm2 def %zmm2
987 ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
988 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
915989 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm3
916 ; NoVLX-NEXT: vpcmpgtd %xmm3, %xmm0, %xmm3
917 ; NoVLX-NEXT: vpandn %xmm3, %xmm2, %xmm2
918 ; NoVLX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
990 ; NoVLX-NEXT: vpcmpgtd %zmm3, %zmm0, %k1
991 ; NoVLX-NEXT: vpcmpled %zmm1, %zmm2, %k1 {%k1}
992 ; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
993 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
919994 ; NoVLX-NEXT: retq
920995 %mask1 = icmp sge <4 x i32> %x1, %y1
921996 %yb = load i32, i32* %yb.ptr, align 4
9371012 ;
9381013 ; NoVLX-LABEL: test128_16:
9391014 ; NoVLX: # %bb.0:
940 ; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm2
1015 ; NoVLX-NEXT: # kill: def %xmm2 killed %xmm2 def %zmm2
1016 ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
1017 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
9411018 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm3
942 ; NoVLX-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3
943 ; NoVLX-NEXT: vpandn %xmm3, %xmm2, %xmm2
944 ; NoVLX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1019 ; NoVLX-NEXT: vpcmpgtq %zmm3, %zmm0, %k1
1020 ; NoVLX-NEXT: vpcmpleq %zmm1, %zmm2, %k1 {%k1}
1021 ; NoVLX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
1022 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
9451023 ; NoVLX-NEXT: retq
9461024 %mask1 = icmp sge <2 x i64> %x1, %y1
9471025 %yb = load i64, i64* %yb.ptr, align 4
9621040 ;
9631041 ; NoVLX-LABEL: test128_17:
9641042 ; NoVLX: # %bb.0:
965 ; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm2
966 ; NoVLX-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
967 ; NoVLX-NEXT: vpxor %xmm3, %xmm2, %xmm2
968 ; NoVLX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
1043 ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
1044 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
1045 ; NoVLX-NEXT: vmovdqu (%rdi), %xmm2
1046 ; NoVLX-NEXT: vpcmpneqd %zmm2, %zmm0, %k1
1047 ; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
1048 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
9691049 ; NoVLX-NEXT: retq
9701050 %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4
9711051 %mask = icmp ne <4 x i32> %x, %y
9821062 ;
9831063 ; NoVLX-LABEL: test128_18:
9841064 ; NoVLX: # %bb.0:
985 ; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm2
986 ; NoVLX-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
987 ; NoVLX-NEXT: vpxor %xmm3, %xmm2, %xmm2
988 ; NoVLX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
1065 ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
1066 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
1067 ; NoVLX-NEXT: vmovdqu (%rdi), %xmm2
1068 ; NoVLX-NEXT: vpcmpneqd %zmm0, %zmm2, %k1
1069 ; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
1070 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
9891071 ; NoVLX-NEXT: retq
9901072 %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4
9911073 %mask = icmp ne <4 x i32> %y, %x
10021084 ;
10031085 ; NoVLX-LABEL: test128_19:
10041086 ; NoVLX: # %bb.0:
1005 ; NoVLX-NEXT: vpmaxud (%rdi), %xmm0, %xmm2
1006 ; NoVLX-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm2
1007 ; NoVLX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
1087 ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
1088 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
1089 ; NoVLX-NEXT: vmovdqu (%rdi), %xmm2
1090 ; NoVLX-NEXT: vpcmpnltud %zmm2, %zmm0, %k1
1091 ; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
1092 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
10081093 ; NoVLX-NEXT: retq
10091094 %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4
10101095 %mask = icmp uge <4 x i32> %x, %y
10211106 ;
10221107 ; NoVLX-LABEL: test128_20:
10231108 ; NoVLX: # %bb.0:
1109 ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
1110 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
10241111 ; NoVLX-NEXT: vmovdqu (%rdi), %xmm2
1025 ; NoVLX-NEXT: vpmaxud %xmm0, %xmm2, %xmm3
1026 ; NoVLX-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
1027 ; NoVLX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
1112 ; NoVLX-NEXT: vpcmpnltud %zmm0, %zmm2, %k1
1113 ; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
1114 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
10281115 ; NoVLX-NEXT: retq
10291116 %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4
10301117 %mask = icmp uge <4 x i32> %y, %x
23282328 ;
23292329 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask:
23302330 ; NoVLX: # %bb.0: # %entry
2331 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
2332 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
2333 ; NoVLX-NEXT: kmovw %eax, %k0
2334 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
2335 ; NoVLX-NEXT: andl $1, %eax
2336 ; NoVLX-NEXT: kmovw %eax, %k1
2337 ; NoVLX-NEXT: kxorw %k0, %k0, %k2
2338 ; NoVLX-NEXT: kshiftrw $1, %k2, %k2
2339 ; NoVLX-NEXT: kshiftlw $1, %k2, %k2
2340 ; NoVLX-NEXT: korw %k1, %k2, %k1
2341 ; NoVLX-NEXT: kshiftrw $1, %k1, %k2
2342 ; NoVLX-NEXT: kxorw %k0, %k2, %k0
2343 ; NoVLX-NEXT: kshiftlw $15, %k0, %k0
2344 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
2345 ; NoVLX-NEXT: kxorw %k1, %k0, %k0
2346 ; NoVLX-NEXT: kshiftrw $2, %k0, %k1
2347 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
2348 ; NoVLX-NEXT: kmovw %eax, %k2
2349 ; NoVLX-NEXT: kxorw %k2, %k1, %k1
2350 ; NoVLX-NEXT: kshiftlw $15, %k1, %k1
2351 ; NoVLX-NEXT: kshiftrw $13, %k1, %k1
2352 ; NoVLX-NEXT: kxorw %k0, %k1, %k0
2353 ; NoVLX-NEXT: kshiftrw $3, %k0, %k1
2354 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
2355 ; NoVLX-NEXT: kmovw %eax, %k2
2356 ; NoVLX-NEXT: kxorw %k2, %k1, %k1
2357 ; NoVLX-NEXT: kshiftlw $15, %k1, %k1
2358 ; NoVLX-NEXT: kshiftrw $12, %k1, %k1
2359 ; NoVLX-NEXT: kxorw %k0, %k1, %k0
2331 ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
2332 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
2333 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2334 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
2335 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
23602336 ; NoVLX-NEXT: kmovw %k0, %eax
23612337 ; NoVLX-NEXT: # kill: def %al killed %al killed %eax
2338 ; NoVLX-NEXT: vzeroupper
23622339 ; NoVLX-NEXT: retq
23632340 entry:
23642341 %0 = bitcast <2 x i64> %__a to <4 x i32>
23792356 ;
23802357 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem:
23812358 ; NoVLX: # %bb.0: # %entry
2382 ; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0
2383 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
2384 ; NoVLX-NEXT: kmovw %eax, %k0
2385 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
2386 ; NoVLX-NEXT: andl $1, %eax
2387 ; NoVLX-NEXT: kmovw %eax, %k1
2388 ; NoVLX-NEXT: kxorw %k0, %k0, %k2
2389 ; NoVLX-NEXT: kshiftrw $1, %k2, %k2
2390 ; NoVLX-NEXT: kshiftlw $1, %k2, %k2
2391 ; NoVLX-NEXT: korw %k1, %k2, %k1
2392 ; NoVLX-NEXT: kshiftrw $1, %k1, %k2
2393 ; NoVLX-NEXT: kxorw %k0, %k2, %k0
2394 ; NoVLX-NEXT: kshiftlw $15, %k0, %k0
2395 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
2396 ; NoVLX-NEXT: kxorw %k1, %k0, %k0
2397 ; NoVLX-NEXT: kshiftrw $2, %k0, %k1
2398 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
2399 ; NoVLX-NEXT: kmovw %eax, %k2
2400 ; NoVLX-NEXT: kxorw %k2, %k1, %k1
2401 ; NoVLX-NEXT: kshiftlw $15, %k1, %k1
2402 ; NoVLX-NEXT: kshiftrw $13, %k1, %k1
2403 ; NoVLX-NEXT: kxorw %k0, %k1, %k0
2404 ; NoVLX-NEXT: kshiftrw $3, %k0, %k1
2405 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
2406 ; NoVLX-NEXT: kmovw %eax, %k2
2407 ; NoVLX-NEXT: kxorw %k2, %k1, %k1
2408 ; NoVLX-NEXT: kshiftlw $15, %k1, %k1
2409 ; NoVLX-NEXT: kshiftrw $12, %k1, %k1
2410 ; NoVLX-NEXT: kxorw %k0, %k1, %k0
2359 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
2360 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
2361 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2362 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
2363 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
24112364 ; NoVLX-NEXT: kmovw %k0, %eax
24122365 ; NoVLX-NEXT: # kill: def %al killed %al killed %eax
2366 ; NoVLX-NEXT: vzeroupper
24132367 ; NoVLX-NEXT: retq
24142368 entry:
24152369 %0 = bitcast <2 x i64> %__a to <4 x i32>
24322386 ;
24332387 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask:
24342388 ; NoVLX: # %bb.0: # %entry
2435 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
2436 ; NoVLX-NEXT: kmovw %edi, %k0
2437 ; NoVLX-NEXT: kshiftrw $3, %k0, %k1
2438 ; NoVLX-NEXT: kmovw %k1, %eax
2439 ; NoVLX-NEXT: kshiftrw $2, %k0, %k1
2440 ; NoVLX-NEXT: kmovw %k1, %ecx
2441 ; NoVLX-NEXT: kshiftrw $1, %k0, %k1
2442 ; NoVLX-NEXT: kmovw %k1, %edx
2443 ; NoVLX-NEXT: kmovw %k0, %esi
2444 ; NoVLX-NEXT: vmovd %esi, %xmm1
2445 ; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
2446 ; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
2447 ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
2448 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
2449 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
2450 ; NoVLX-NEXT: kmovw %eax, %k0
2451 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
2452 ; NoVLX-NEXT: andl $1, %eax
2453 ; NoVLX-NEXT: kmovw %eax, %k1
2454 ; NoVLX-NEXT: kxorw %k0, %k0, %k2
2455 ; NoVLX-NEXT: kshiftrw $1, %k2, %k2
2456 ; NoVLX-NEXT: kshiftlw $1, %k2, %k2
2457 ; NoVLX-NEXT: korw %k1, %k2, %k1
2458 ; NoVLX-NEXT: kshiftrw $1, %k1, %k2
2459 ; NoVLX-NEXT: kxorw %k0, %k2, %k0
2460 ; NoVLX-NEXT: kshiftlw $15, %k0, %k0
2461 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
2462 ; NoVLX-NEXT: kxorw %k1, %k0, %k0
2463 ; NoVLX-NEXT: kshiftrw $2, %k0, %k1
2464 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
2465 ; NoVLX-NEXT: kmovw %eax, %k2
2466 ; NoVLX-NEXT: kxorw %k2, %k1, %k1
2467 ; NoVLX-NEXT: kshiftlw $15, %k1, %k1
2468 ; NoVLX-NEXT: kshiftrw $13, %k1, %k1
2469 ; NoVLX-NEXT: kxorw %k0, %k1, %k0
2470 ; NoVLX-NEXT: kshiftrw $3, %k0, %k1
2471 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
2472 ; NoVLX-NEXT: kmovw %eax, %k2
2473 ; NoVLX-NEXT: kxorw %k2, %k1, %k1
2474 ; NoVLX-NEXT: kshiftlw $15, %k1, %k1
2475 ; NoVLX-NEXT: kshiftrw $12, %k1, %k1
2476 ; NoVLX-NEXT: kxorw %k0, %k1, %k0
2389 ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
2390 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
2391 ; NoVLX-NEXT: kmovw %edi, %k1
2392 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2393 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
2394 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
24772395 ; NoVLX-NEXT: kmovw %k0, %eax
24782396 ; NoVLX-NEXT: # kill: def %al killed %al killed %eax
2397 ; NoVLX-NEXT: vzeroupper
24792398 ; NoVLX-NEXT: retq
24802399 entry:
24812400 %0 = bitcast <2 x i64> %__a to <4 x i32>
25002419 ;
25012420 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem:
25022421 ; NoVLX: # %bb.0: # %entry
2503 ; NoVLX-NEXT: vpcmpeqd (%rsi), %xmm0, %xmm0
2504 ; NoVLX-NEXT: kmovw %edi, %k0
2505 ; NoVLX-NEXT: kshiftrw $3, %k0, %k1
2506 ; NoVLX-NEXT: kmovw %k1, %eax
2507 ; NoVLX-NEXT: kshiftrw $2, %k0, %k1
2508 ; NoVLX-NEXT: kmovw %k1, %ecx
2509 ; NoVLX-NEXT: kshiftrw $1, %k0, %k1
2510 ; NoVLX-NEXT: kmovw %k1, %edx
2511 ; NoVLX-NEXT: kmovw %k0, %esi
2512 ; NoVLX-NEXT: vmovd %esi, %xmm1
2513 ; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
2514 ; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
2515 ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
2516 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
2517 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
2518 ; NoVLX-NEXT: kmovw %eax, %k0
2519 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
2520 ; NoVLX-NEXT: andl $1, %eax
2521 ; NoVLX-NEXT: kmovw %eax, %k1
2522 ; NoVLX-NEXT: kxorw %k0, %k0, %k2
2523 ; NoVLX-NEXT: kshiftrw $1, %k2, %k2
2524 ; NoVLX-NEXT: kshiftlw $1, %k2, %k2
2525 ; NoVLX-NEXT: korw %k1, %k2, %k1
2526 ; NoVLX-NEXT: kshiftrw $1, %k1, %k2
2527 ; NoVLX-NEXT: kxorw %k0, %k2, %k0
2528 ; NoVLX-NEXT: kshiftlw $15, %k0, %k0
2529 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
2530 ; NoVLX-NEXT: kxorw %k1, %k0, %k0
2531 ; NoVLX-NEXT: kshiftrw $2, %k0, %k1
2532 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
2533 ; NoVLX-NEXT: kmovw %eax, %k2
2534 ; NoVLX-NEXT: kxorw %k2, %k1, %k1
2535 ; NoVLX-NEXT: kshiftlw $15, %k1, %k1
2536 ; NoVLX-NEXT: kshiftrw $13, %k1, %k1
2537 ; NoVLX-NEXT: kxorw %k0, %k1, %k0
2538 ; NoVLX-NEXT: kshiftrw $3, %k0, %k1
2539 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
2540 ; NoVLX-NEXT: kmovw %eax, %k2
2541 ; NoVLX-NEXT: kxorw %k2, %k1, %k1
2542 ; NoVLX-NEXT: kshiftlw $15, %k1, %k1
2543 ; NoVLX-NEXT: kshiftrw $12, %k1, %k1
2544 ; NoVLX-NEXT: kxorw %k0, %k1, %k0
2422 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
2423 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
2424 ; NoVLX-NEXT: kmovw %edi, %k1
2425 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2426 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
2427 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
25452428 ; NoVLX-NEXT: kmovw %k0, %eax
25462429 ; NoVLX-NEXT: # kill: def %al killed %al killed %eax
2430 ; NoVLX-NEXT: vzeroupper
25472431 ; NoVLX-NEXT: retq
25482432 entry:
25492433 %0 = bitcast <2 x i64> %__a to <4 x i32>
25692453 ;
25702454 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem_b:
25712455 ; NoVLX: # %bb.0: # %entry
2456 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
25722457 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
2573 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
2574 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
2575 ; NoVLX-NEXT: kmovw %eax, %k0
2576 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
2577 ; NoVLX-NEXT: andl $1, %eax
2578 ; NoVLX-NEXT: kmovw %eax, %k1
2579 ; NoVLX-NEXT: kxorw %k0, %k0, %k2
2580 ; NoVLX-NEXT: kshiftrw $1, %k2, %k2
2581 ; NoVLX-NEXT: kshiftlw $1, %k2, %k2
2582 ; NoVLX-NEXT: korw %k1, %k2, %k1
2583 ; NoVLX-NEXT: kshiftrw $1, %k1, %k2
2584 ; NoVLX-NEXT: kxorw %k0, %k2, %k0
2585 ; NoVLX-NEXT: kshiftlw $15, %k0, %k0
2586 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
2587 ; NoVLX-NEXT: kxorw %k1, %k0, %k0
2588 ; NoVLX-NEXT: kshiftrw $2, %k0, %k1
2589 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
2590 ; NoVLX-NEXT: kmovw %eax, %k2
2591 ; NoVLX-NEXT: kxorw %k2, %k1, %k1
2592 ; NoVLX-NEXT: kshiftlw $15, %k1, %k1
2593 ; NoVLX-NEXT: kshiftrw $13, %k1, %k1
2594 ; NoVLX-NEXT: kxorw %k0, %k1, %k0
2595 ; NoVLX-NEXT: kshiftrw $3, %k0, %k1
2596 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
2597 ; NoVLX-NEXT: kmovw %eax, %k2
2598 ; NoVLX-NEXT: kxorw %k2, %k1, %k1
2599 ; NoVLX-NEXT: kshiftlw $15, %k1, %k1
2600 ; NoVLX-NEXT: kshiftrw $12, %k1, %k1
2601 ; NoVLX-NEXT: kxorw %k0, %k1, %k0
2458 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2459 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
2460 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
26022461 ; NoVLX-NEXT: kmovw %k0, %eax
26032462 ; NoVLX-NEXT: # kill: def %al killed %al killed %eax
2463 ; NoVLX-NEXT: vzeroupper
26042464 ; NoVLX-NEXT: retq
26052465 entry:
26062466 %0 = bitcast <2 x i64> %__a to <4 x i32>
26242484 ;
26252485 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b:
26262486 ; NoVLX: # %bb.0: # %entry
2487 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
26272488 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
2628 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
2629 ; NoVLX-NEXT: kmovw %edi, %k0
2630 ; NoVLX-NEXT: kshiftrw $3, %k0, %k1
2631 ; NoVLX-NEXT: kmovw %k1, %eax
2632 ; NoVLX-NEXT: kshiftrw $2, %k0, %k1
2633 ; NoVLX-NEXT: kmovw %k1, %ecx
2634 ; NoVLX-NEXT: kshiftrw $1, %k0, %k1
2635 ; NoVLX-NEXT: kmovw %k1, %edx
2636 ; NoVLX-NEXT: kmovw %k0, %esi
2637 ; NoVLX-NEXT: vmovd %esi, %xmm1
2638 ; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
2639 ; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
2640 ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
2641 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
2642 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
2643 ; NoVLX-NEXT: kmovw %eax, %k0
2644 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
2645 ; NoVLX-NEXT: andl $1, %eax
2646 ; NoVLX-NEXT: kmovw %eax, %k1
2647 ; NoVLX-NEXT: kxorw %k0, %k0, %k2
2648 ; NoVLX-NEXT: kshiftrw $1, %k2, %k2
2649 ; NoVLX-NEXT: kshiftlw $1, %k2, %k2
2650 ; NoVLX-NEXT: korw %k1, %k2, %k1
2651 ; NoVLX-NEXT: kshiftrw $1, %k1, %k2
2652 ; NoVLX-NEXT: kxorw %k0, %k2, %k0
2653 ; NoVLX-NEXT: kshiftlw $15, %k0, %k0
2654 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
2655 ; NoVLX-NEXT: kxorw %k1, %k0, %k0
2656 ; NoVLX-NEXT: kshiftrw $2, %k0, %k1
2657 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
2658 ; NoVLX-NEXT: kmovw %eax, %k2
2659 ; NoVLX-NEXT: kxorw %k2, %k1, %k1
2660 ; NoVLX-NEXT: kshiftlw $15, %k1, %k1
2661 ; NoVLX-NEXT: kshiftrw $13, %k1, %k1
2662 ; NoVLX-NEXT: kxorw %k0, %k1, %k0
2663 ; NoVLX-NEXT: kshiftrw $3, %k0, %k1
2664 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
2665 ; NoVLX-NEXT: kmovw %eax, %k2
2666 ; NoVLX-NEXT: kxorw %k2, %k1, %k1
2667 ; NoVLX-NEXT: kshiftlw $15, %k1, %k1
2668 ; NoVLX-NEXT: kshiftrw $12, %k1, %k1
2669 ; NoVLX-NEXT: kxorw %k0, %k1, %k0
2489 ; NoVLX-NEXT: kmovw %edi, %k1
2490 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2491 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
2492 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
26702493 ; NoVLX-NEXT: kmovw %k0, %eax
26712494 ; NoVLX-NEXT: # kill: def %al killed %al killed %eax
2495 ; NoVLX-NEXT: vzeroupper
26722496 ; NoVLX-NEXT: retq
26732497 entry:
26742498 %0 = bitcast <2 x i64> %__a to <4 x i32>
26952519 ;
26962520 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask:
26972521 ; NoVLX: # %bb.0: # %entry
2698 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
2699 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
2700 ; NoVLX-NEXT: kmovw %eax, %k0
2701 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
2702 ; NoVLX-NEXT: andl $1, %eax
2703 ; NoVLX-NEXT: kmovw %eax, %k1
2704 ; NoVLX-NEXT: kxorw %k0, %k0, %k2
2705 ; NoVLX-NEXT: kshiftrw $1, %k2, %k2
2706 ; NoVLX-NEXT: kshiftlw $1, %k2, %k2
2707 ; NoVLX-NEXT: korw %k1, %k2, %k1
2708 ; NoVLX-NEXT: kshiftrw $1, %k1, %k2
2709 ; NoVLX-NEXT: kxorw %k0, %k2, %k0
2710 ; NoVLX-NEXT: kshiftlw $15, %k0, %k0
2711 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
2712 ; NoVLX-NEXT: kxorw %k1, %k0, %k0
2713 ; NoVLX-NEXT: kshiftrw $2, %k0, %k1
2714 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
2715 ; NoVLX-NEXT: kmovw %eax, %k2
2716 ; NoVLX-NEXT: kxorw %k2, %k1, %k1
2717 ; NoVLX-NEXT: kshiftlw $15, %k1, %k1
2718 ; NoVLX-NEXT: kshiftrw $13, %k1, %k1
2719 ; NoVLX-NEXT: kxorw %k0, %k1, %k0
2720 ; NoVLX-NEXT: kshiftrw $3, %k0, %k1
2721 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
2722 ; NoVLX-NEXT: kmovw %eax, %k2
2723 ; NoVLX-NEXT: kxorw %k2, %k1, %k1
2724 ; NoVLX-NEXT: kshiftlw $15, %k1, %k1
2725 ; NoVLX-NEXT: kshiftrw $12, %k1, %k1
2726 ; NoVLX-NEXT: kxorw %k0, %k1, %k0
2522 ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
2523 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
2524 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2525 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
2526 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
27272527 ; NoVLX-NEXT: kmovw %k0, %eax
27282528 ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
2529 ; NoVLX-NEXT: vzeroupper
27292530 ; NoVLX-NEXT: retq
27302531 entry:
27312532 %0 = bitcast <2 x i64> %__a to <4 x i32>
27462547 ;
27472548 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem:
27482549 ; NoVLX: # %bb.0: # %entry
2749 ; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0
2750 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
2751 ; NoVLX-NEXT: kmovw %eax, %k0
2752 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
2753 ; NoVLX-NEXT: andl $1, %eax
2754 ; NoVLX-NEXT: kmovw %eax, %k1
2755 ; NoVLX-NEXT: kxorw %k0, %k0, %k2
2756 ; NoVLX-NEXT: kshiftrw $1, %k2, %k2
2757 ; NoVLX-NEXT: kshiftlw $1, %k2, %k2
2758 ; NoVLX-NEXT: korw %k1, %k2, %k1
2759 ; NoVLX-NEXT: kshiftrw $1, %k1, %k2
2760 ; NoVLX-NEXT: kxorw %k0, %k2, %k0
2761 ; NoVLX-NEXT: kshiftlw $15, %k0, %k0
2762 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
2763 ; NoVLX-NEXT: kxorw %k1, %k0, %k0
2764 ; NoVLX-NEXT: kshiftrw $2, %k0, %k1
2765 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
2766 ; NoVLX-NEXT: kmovw %eax, %k2
2767 ; NoVLX-NEXT: kxorw %k2, %k1, %k1
2768 ; NoVLX-NEXT: kshiftlw $15, %k1, %k1
2769 ; NoVLX-NEXT: kshiftrw $13, %k1, %k1
2770 ; NoVLX-NEXT: kxorw %k0, %k1, %k0
2771 ; NoVLX-NEXT: kshiftrw $3, %k0, %k1
2772 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
2773 ; NoVLX-NEXT: kmovw %eax, %k2
2774 ; NoVLX-NEXT: kxorw %k2, %k1, %k1
2775 ; NoVLX-NEXT: kshiftlw $15, %k1, %k1
2776 ; NoVLX-NEXT: kshiftrw $12, %k1, %k1
2777 ; NoVLX-NEXT: kxorw %k0, %k1, %k0
2550 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
2551 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
2552 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2553 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
2554 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
27782555 ; NoVLX-NEXT: kmovw %k0, %eax
27792556 ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
2557 ; NoVLX-NEXT: vzeroupper
27802558 ; NoVLX-NEXT: retq
27812559 entry:
27822560 %0 = bitcast <2 x i64> %__a to <4 x i32>
27992577 ;
28002578 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask:
28012579 ; NoVLX: # %bb.0: # %entry
2802 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
2803 ; NoVLX-NEXT: kmovw %edi, %k0
2804 ; NoVLX-NEXT: kshiftrw $3, %k0, %k1
2805 ; NoVLX-NEXT: kmovw %k1, %eax
2806 ; NoVLX-NEXT: kshiftrw $2, %k0, %k1
2807 ; NoVLX-NEXT: kmovw %k1, %ecx
2808 ; NoVLX-NEXT: kshiftrw $1, %k0, %k1
2809 ; NoVLX-NEXT: kmovw %k1, %edx
2810 ; NoVLX-NEXT: kmovw %k0, %esi
2811 ; NoVLX-NEXT: vmovd %esi, %xmm1
2812 ; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
2813 ; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
2814 ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
2815 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
2816 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
2817 ; NoVLX-NEXT: kmovw %eax, %k0
2818 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
2819 ; NoVLX-NEXT: andl $1, %eax
2820 ; NoVLX-NEXT: kmovw %eax, %k1
2821 ; NoVLX-NEXT: kxorw %k0, %k0, %k2
2822 ; NoVLX-NEXT: kshiftrw $1, %k2, %k2
2823 ; NoVLX-NEXT: kshiftlw $1, %k2, %k2
2824 ; NoVLX-NEXT: korw %k1, %k2, %k1
2825 ; NoVLX-NEXT: kshiftrw $1, %k1, %k2
2826 ; NoVLX-NEXT: kxorw %k0, %k2, %k0
2827 ; NoVLX-NEXT: kshiftlw $15, %k0, %k0
2828 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
2829 ; NoVLX-NEXT: kxorw %k1, %k0, %k0
2830 ; NoVLX-NEXT: kshiftrw $2, %k0, %k1
2831 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
2832 ; NoVLX-NEXT: kmovw %eax, %k2
2833 ; NoVLX-NEXT: kxorw %k2, %k1, %k1
2834 ; NoVLX-NEXT: kshiftlw $15, %k1, %k1
2835 ; NoVLX-NEXT: kshiftrw $13, %k1, %k1
2836 ; NoVLX-NEXT: kxorw %k0, %k1, %k0
2837 ; NoVLX-NEXT: kshiftrw $3, %k0, %k1
2838 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
2839 ; NoVLX-NEXT: kmovw %eax, %k2
2840 ; NoVLX-NEXT: kxorw %k2, %k1, %k1
2841 ; NoVLX-NEXT: kshiftlw $15, %k1, %k1
2842 ; NoVLX-NEXT: kshiftrw $12, %k1, %k1
2843 ; NoVLX-NEXT: kxorw %k0, %k1, %k0
2580 ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
2581 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
2582 ; NoVLX-NEXT: kmovw %edi, %k1
2583 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2584 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
2585 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
28442586 ; NoVLX-NEXT: kmovw %k0, %eax
28452587 ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
2588 ; NoVLX-NEXT: vzeroupper
28462589 ; NoVLX-NEXT: retq
28472590 entry:
28482591 %0 = bitcast <2 x i64> %__a to <4 x i32>
28672610 ;
28682611 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem:
28692612 ; NoVLX: # %bb.0: # %entry
2870 ; NoVLX-NEXT: vpcmpeqd (%rsi), %xmm0, %xmm0
2871 ; NoVLX-NEXT: kmovw %edi, %k0
2872 ; NoVLX-NEXT: kshiftrw $3, %k0, %k1
2873 ; NoVLX-NEXT: kmovw %k1, %eax
2874 ; NoVLX-NEXT: kshiftrw $2, %k0, %k1
2875 ; NoVLX-NEXT: kmovw %k1, %ecx
2876 ; NoVLX-NEXT: kshiftrw $1, %k0, %k1
2877 ; NoVLX-NEXT: kmovw %k1, %edx
2878 ; NoVLX-NEXT: kmovw %k0, %esi
2879 ; NoVLX-NEXT: vmovd %esi, %xmm1
2880 ; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
2881 ; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
2882 ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
2883 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
2884 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
2885 ; NoVLX-NEXT: kmovw %eax, %k0
2886 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
2887 ; NoVLX-NEXT: andl $1, %eax
2888 ; NoVLX-NEXT: kmovw %eax, %k1
2889 ; NoVLX-NEXT: kxorw %k0, %k0, %k2
2890 ; NoVLX-NEXT: kshiftrw $1, %k2, %k2
2891 ; NoVLX-NEXT: kshiftlw $1, %k2, %k2
2892 ; NoVLX-NEXT: korw %k1, %k2, %k1
2893 ; NoVLX-NEXT: kshiftrw $1, %k1, %k2
2894 ; NoVLX-NEXT: kxorw %k0, %k2, %k0
2895 ; NoVLX-NEXT: kshiftlw $15, %k0, %k0
2896 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
2897 ; NoVLX-NEXT: kxorw %k1, %k0, %k0
2898 ; NoVLX-NEXT: kshiftrw $2, %k0, %k1
2899 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
2900 ; NoVLX-NEXT: kmovw %eax, %k2
2901 ; NoVLX-NEXT: kxorw %k2, %k1, %k1
2902 ; NoVLX-NEXT: kshiftlw $15, %k1, %k1
2903 ; NoVLX-NEXT: kshiftrw $13, %k1, %k1
2904 ; NoVLX-NEXT: kxorw %k0, %k1, %k0
2905 ; NoVLX-NEXT: kshiftrw $3, %k0, %k1
2906 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
2907 ; NoVLX-NEXT: kmovw %eax, %k2
2908 ; NoVLX-NEXT: kxorw %k2, %k1, %k1
2909 ; NoVLX-NEXT: kshiftlw $15, %k1, %k1
2910 ; NoVLX-NEXT: kshiftrw $12, %k1, %k1
2911 ; NoVLX-NEXT: kxorw %k0, %k1, %k0
2613 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
2614 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
2615 ; NoVLX-NEXT: kmovw %edi, %k1
2616 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2617 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
2618 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
29122619 ; NoVLX-NEXT: kmovw %k0, %eax
29132620 ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
2621 ; NoVLX-NEXT: vzeroupper
29142622 ; NoVLX-NEXT: retq
29152623 entry:
29162624 %0 = bitcast <2 x i64> %__a to <4 x i32>
29362644 ;
29372645 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem_b:
29382646 ; NoVLX: # %bb.0: # %entry
2647 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
29392648 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
2940 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
2941 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
2942 ; NoVLX-NEXT: kmovw %eax, %k0
2943 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
2944 ; NoVLX-NEXT: andl $1, %eax
2945 ; NoVLX-NEXT: kmovw %eax, %k1
2946 ; NoVLX-NEXT: kxorw %k0, %k0, %k2
2947 ; NoVLX-NEXT: kshiftrw $1, %k2, %k2
2948 ; NoVLX-NEXT: kshiftlw $1, %k2, %k2
2949 ; NoVLX-NEXT: korw %k1, %k2, %k1
2950 ; NoVLX-NEXT: kshiftrw $1, %k1, %k2
2951 ; NoVLX-NEXT: kxorw %k0, %k2, %k0
2952 ; NoVLX-NEXT: kshiftlw $15, %k0, %k0
2953 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
2954 ; NoVLX-NEXT: kxorw %k1, %k0, %k0
2955 ; NoVLX-NEXT: kshiftrw $2, %k0, %k1
2956 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
2957 ; NoVLX-NEXT: kmovw %eax, %k2
2958 ; NoVLX-NEXT: kxorw %k2, %k1, %k1
2959 ; NoVLX-NEXT: kshiftlw $15, %k1, %k1
2960 ; NoVLX-NEXT: kshiftrw $13, %k1, %k1
2961 ; NoVLX-NEXT: kxorw %k0, %k1, %k0
2962 ; NoVLX-NEXT: kshiftrw $3, %k0, %k1
2963 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
2964 ; NoVLX-NEXT: kmovw %eax, %k2
2965 ; NoVLX-NEXT: kxorw %k2, %k1, %k1
2966 ; NoVLX-NEXT: kshiftlw $15, %k1, %k1
2967 ; NoVLX-NEXT: kshiftrw $12, %k1, %k1
2968 ; NoVLX-NEXT: kxorw %k0, %k1, %k0
2649 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2650 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
2651 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
29692652 ; NoVLX-NEXT: kmovw %k0, %eax
29702653 ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
2654 ; NoVLX-NEXT: vzeroupper
29712655 ; NoVLX-NEXT: retq
29722656 entry:
29732657 %0 = bitcast <2 x i64> %__a to <4 x i32>
29912675 ;
29922676 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b:
29932677 ; NoVLX: # %bb.0: # %entry
2678 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
29942679 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
2995 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
2996 ; NoVLX-NEXT: kmovw %edi, %k0
2997 ; NoVLX-NEXT: kshiftrw $3, %k0, %k1
2998 ; NoVLX-NEXT: kmovw %k1, %eax
2999 ; NoVLX-NEXT: kshiftrw $2, %k0, %k1
3000 ; NoVLX-NEXT: kmovw %k1, %ecx
3001 ; NoVLX-NEXT: kshiftrw $1, %k0, %k1
3002 ; NoVLX-NEXT: kmovw %k1, %edx
3003 ; NoVLX-NEXT: kmovw %k0, %esi
3004 ; NoVLX-NEXT: vmovd %esi, %xmm1
3005 ; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
3006 ; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
3007 ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
3008 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
3009 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
3010 ; NoVLX-NEXT: kmovw %eax, %k0
3011 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
3012 ; NoVLX-NEXT: andl $1, %eax
3013 ; NoVLX-NEXT: kmovw %eax, %k1
3014 ; NoVLX-NEXT: kxorw %k0, %k0, %k2
3015 ; NoVLX-NEXT: kshiftrw $1, %k2, %k2
3016 ; NoVLX-NEXT: kshiftlw $1, %k2, %k2
3017 ; NoVLX-NEXT: korw %k1, %k2, %k1
3018 ; NoVLX-NEXT: kshiftrw $1, %k1, %k2
3019 ; NoVLX-NEXT: kxorw %k0, %k2, %k0
3020 ; NoVLX-NEXT: kshiftlw $15, %k0, %k0
3021 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3022 ; NoVLX-NEXT: kxorw %k1, %k0, %k0
3023 ; NoVLX-NEXT: kshiftrw $2, %k0, %k1
3024 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
3025 ; NoVLX-NEXT: kmovw %eax, %k2
3026 ; NoVLX-NEXT: kxorw %k2, %k1, %k1
3027 ; NoVLX-NEXT: kshiftlw $15, %k1, %k1
3028 ; NoVLX-NEXT: kshiftrw $13, %k1, %k1
3029 ; NoVLX-NEXT: kxorw %k0, %k1, %k0
3030 ; NoVLX-NEXT: kshiftrw $3, %k0, %k1
3031 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
3032 ; NoVLX-NEXT: kmovw %eax, %k2
3033 ; NoVLX-NEXT: kxorw %k2, %k1, %k1
3034 ; NoVLX-NEXT: kshiftlw $15, %k1, %k1
3035 ; NoVLX-NEXT: kshiftrw $12, %k1, %k1
3036 ; NoVLX-NEXT: kxorw %k0, %k1, %k0
2680 ; NoVLX-NEXT: kmovw %edi, %k1
2681 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2682 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
2683 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
30372684 ; NoVLX-NEXT: kmovw %k0, %eax
30382685 ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
2686 ; NoVLX-NEXT: vzeroupper
30392687 ; NoVLX-NEXT: retq
30402688 entry:
30412689 %0 = bitcast <2 x i64> %__a to <4 x i32>
30682716 ; NoVLX-NEXT: .cfi_def_cfa_register %rbp
30692717 ; NoVLX-NEXT: andq $-32, %rsp
30702718 ; NoVLX-NEXT: subq $32, %rsp
3071 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
3072 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
3073 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
3074 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
3075 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
3076 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
3077 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
2719 ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
2720 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
2721 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2722 ; NoVLX-NEXT: kshiftrw $3, %k0, %k1
2723 ; NoVLX-NEXT: kmovw %k1, %eax
2724 ; NoVLX-NEXT: kshiftrw $2, %k0, %k1
2725 ; NoVLX-NEXT: kmovw %k1, %ecx
2726 ; NoVLX-NEXT: kshiftrw $1, %k0, %k1
2727 ; NoVLX-NEXT: kmovw %k1, %edx
2728 ; NoVLX-NEXT: kmovw %k0, %esi
2729 ; NoVLX-NEXT: kxorw %k0, %k0, %k0
2730 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
2731 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
2732 ; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
2733 ; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
2734 ; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
2735 ; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
2736 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
2737 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
30782738 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
30792739 ; NoVLX-NEXT: kmovw %k0, (%rsp)
30802740 ; NoVLX-NEXT: movl (%rsp), %eax
31072767 ; NoVLX-NEXT: .cfi_def_cfa_register %rbp
31082768 ; NoVLX-NEXT: andq $-32, %rsp
31092769 ; NoVLX-NEXT: subq $32, %rsp
3110 ; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0
3111 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
3112 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
3113 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
3114 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
3115 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
3116 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
2770 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
2771 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
2772 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2773 ; NoVLX-NEXT: kshiftrw $3, %k0, %k1
2774 ; NoVLX-NEXT: kmovw %k1, %eax
2775 ; NoVLX-NEXT: kshiftrw $2, %k0, %k1
2776 ; NoVLX-NEXT: kmovw %k1, %ecx
2777 ; NoVLX-NEXT: kshiftrw $1, %k0, %k1
2778 ; NoVLX-NEXT: kmovw %k1, %edx
2779 ; NoVLX-NEXT: kmovw %k0, %esi
2780 ; NoVLX-NEXT: kxorw %k0, %k0, %k0
2781 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
2782 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
2783 ; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
2784 ; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
2785 ; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
2786 ; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
2787 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
2788 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
31172789 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
31182790 ; NoVLX-NEXT: kmovw %k0, (%rsp)
31192791 ; NoVLX-NEXT: movl (%rsp), %eax
31482820 ; NoVLX-NEXT: .cfi_def_cfa_register %rbp
31492821 ; NoVLX-NEXT: andq $-32, %rsp
31502822 ; NoVLX-NEXT: subq $32, %rsp
3151 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
3152 ; NoVLX-NEXT: kmovw %edi, %k0
2823 ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
2824 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
2825 ; NoVLX-NEXT: kmovw %edi, %k1
2826 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
31532827 ; NoVLX-NEXT: kshiftrw $3, %k0, %k1
31542828 ; NoVLX-NEXT: kmovw %k1, %eax
31552829 ; NoVLX-NEXT: kshiftrw $2, %k0, %k1
31572831 ; NoVLX-NEXT: kshiftrw $1, %k0, %k1
31582832 ; NoVLX-NEXT: kmovw %k1, %edx
31592833 ; NoVLX-NEXT: kmovw %k0, %esi
3160 ; NoVLX-NEXT: vmovd %esi, %xmm1
3161 ; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
3162 ; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
3163 ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
3164 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
3165 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
3166 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
3167 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
3168 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
3169 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
2834 ; NoVLX-NEXT: kxorw %k0, %k0, %k0
2835 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
2836 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
2837 ; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
2838 ; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
2839 ; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
2840 ; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
31702841 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
31712842 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
31722843 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
32052876 ; NoVLX-NEXT: .cfi_def_cfa_register %rbp
32062877 ; NoVLX-NEXT: andq $-32, %rsp
32072878 ; NoVLX-NEXT: subq $32, %rsp
3208 ; NoVLX-NEXT: vpcmpeqd (%rsi), %xmm0, %xmm0
3209 ; NoVLX-NEXT: kmovw %edi, %k0
2879 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
2880 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
2881 ; NoVLX-NEXT: kmovw %edi, %k1
2882 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
32102883 ; NoVLX-NEXT: kshiftrw $3, %k0, %k1
32112884 ; NoVLX-NEXT: kmovw %k1, %eax
32122885 ; NoVLX-NEXT: kshiftrw $2, %k0, %k1
32142887 ; NoVLX-NEXT: kshiftrw $1, %k0, %k1
32152888 ; NoVLX-NEXT: kmovw %k1, %edx
32162889 ; NoVLX-NEXT: kmovw %k0, %esi
3217 ; NoVLX-NEXT: vmovd %esi, %xmm1
3218 ; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
3219 ; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
3220 ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
3221 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
3222 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
3223 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
3224 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
3225 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
3226 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
2890 ; NoVLX-NEXT: kxorw %k0, %k0, %k0
2891 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
2892 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
2893 ; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
2894 ; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
2895 ; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
2896 ; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
32272897 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
32282898 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
32292899 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
32632933 ; NoVLX-NEXT: .cfi_def_cfa_register %rbp
32642934 ; NoVLX-NEXT: andq $-32, %rsp
32652935 ; NoVLX-NEXT: subq $32, %rsp
2936 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
32662937 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
3267 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
3268 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
3269 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
3270 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
3271 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
3272 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
3273 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
2938 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2939 ; NoVLX-NEXT: kshiftrw $3, %k0, %k1
2940 ; NoVLX-NEXT: kmovw %k1, %eax
2941 ; NoVLX-NEXT: kshiftrw $2, %k0, %k1
2942 ; NoVLX-NEXT: kmovw %k1, %ecx
2943 ; NoVLX-NEXT: kshiftrw $1, %k0, %k1
2944 ; NoVLX-NEXT: kmovw %k1, %edx
2945 ; NoVLX-NEXT: kmovw %k0, %esi
2946 ; NoVLX-NEXT: kxorw %k0, %k0, %k0
2947 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
2948 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
2949 ; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
2950 ; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
2951 ; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
2952 ; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
2953 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
2954 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
32742955 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
32752956 ; NoVLX-NEXT: kmovw %k0, (%rsp)
32762957 ; NoVLX-NEXT: movl (%rsp), %eax
33062987 ; NoVLX-NEXT: .cfi_def_cfa_register %rbp
33072988 ; NoVLX-NEXT: andq $-32, %rsp
33082989 ; NoVLX-NEXT: subq $32, %rsp
2990 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
33092991 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
3310 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
3311 ; NoVLX-NEXT: kmovw %edi, %k0
2992 ; NoVLX-NEXT: kmovw %edi, %k1
2993 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}