llvm.org GIT mirror llvm / 6975687
[X86] Add VP2INTERSECT instructions Support Intel AVX512 VP2INTERSECT instructions in llvm Patch by Xiang Zhang (xiangzhangllvm) Differential Revision: https://reviews.llvm.org/D62366 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@362188 91177308-0d34-0410-b5e6-96231b3b80d8 Pengfei Wang 2 months ago
39 changed file(s) with 2816 addition(s) and 1 deletion(s). Raw diff Collapse all Expand all
46964696 [IntrNoMem, ImmArg<3>]>;
46974697 }
46984698
4699 // vp2intersect
4700 let TargetPrefix = "x86" in {
4701 def int_x86_avx512_vp2intersect_q_512 :
4702 Intrinsic<[llvm_v8i1_ty, llvm_v8i1_ty],
4703 [llvm_v8i64_ty, llvm_v8i64_ty],
4704 [IntrNoMem]>;
4705 def int_x86_avx512_vp2intersect_q_256 :
4706 Intrinsic<[llvm_v4i1_ty, llvm_v4i1_ty],
4707 [llvm_v4i64_ty, llvm_v4i64_ty],
4708 [IntrNoMem]>;
4709 def int_x86_avx512_vp2intersect_q_128 :
4710 Intrinsic<[llvm_v2i1_ty, llvm_v2i1_ty],
4711 [llvm_v2i64_ty, llvm_v2i64_ty],
4712 [IntrNoMem]>;
4713 def int_x86_avx512_vp2intersect_d_512 :
4714 Intrinsic<[llvm_v16i1_ty, llvm_v16i1_ty],
4715 [llvm_v16i32_ty, llvm_v16i32_ty],
4716 [IntrNoMem]>;
4717 def int_x86_avx512_vp2intersect_d_256 :
4718 Intrinsic<[llvm_v8i1_ty, llvm_v8i1_ty],
4719 [llvm_v8i32_ty, llvm_v8i32_ty],
4720 [IntrNoMem]>;
4721 def int_x86_avx512_vp2intersect_d_128 :
4722 Intrinsic<[llvm_v4i1_ty, llvm_v4i1_ty],
4723 [llvm_v4i32_ty, llvm_v4i32_ty],
4724 [IntrNoMem]>;
4725 }
4726
46994727 // Misc.
47004728 let TargetPrefix = "x86" in {
47014729 // NOTE: These comparison intrinsics are not used by clang as long as the
428428 ENUM_ENTRY(TYPE_YMM, "32-byte") \
429429 ENUM_ENTRY(TYPE_ZMM, "64-byte") \
430430 ENUM_ENTRY(TYPE_VK, "mask register") \
431 ENUM_ENTRY(TYPE_VK_PAIR, "mask register pair") \
431432 ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand") \
432433 ENUM_ENTRY(TYPE_DEBUGREG, "Debug register operand") \
433434 ENUM_ENTRY(TYPE_CONTROLREG, "Control register operand") \
450450 X86MCRegisterClasses[X86::GR64RegClassID].contains(getReg()));
451451 }
452452
453 bool isVK1Pair() const {
454 return Kind == Register &&
455 X86MCRegisterClasses[X86::VK1RegClassID].contains(getReg());
456 }
457
458 bool isVK2Pair() const {
459 return Kind == Register &&
460 X86MCRegisterClasses[X86::VK2RegClassID].contains(getReg());
461 }
462
463 bool isVK4Pair() const {
464 return Kind == Register &&
465 X86MCRegisterClasses[X86::VK4RegClassID].contains(getReg());
466 }
467
468 bool isVK8Pair() const {
469 return Kind == Register &&
470 X86MCRegisterClasses[X86::VK8RegClassID].contains(getReg());
471 }
472
473 bool isVK16Pair() const {
474 return Kind == Register &&
475 X86MCRegisterClasses[X86::VK16RegClassID].contains(getReg());
476 }
477
453478 void addExpr(MCInst &Inst, const MCExpr *Expr) const {
454479 // Add as immediates when possible.
455480 if (const MCConstantExpr *CE = dyn_cast(Expr))
479504 void addImmOperands(MCInst &Inst, unsigned N) const {
480505 assert(N == 1 && "Invalid number of operands!");
481506 addExpr(Inst, getImm());
507 }
508
509 void addMaskPairOperands(MCInst &Inst, unsigned N) const {
510 assert(N == 1 && "Invalid number of operands!");
511 unsigned Reg = getReg();
512 switch (Reg) {
513 case X86::K0:
514 case X86::K1:
515 Reg = X86::K0_K1;
516 break;
517 case X86::K2:
518 case X86::K3:
519 Reg = X86::K2_K3;
520 break;
521 case X86::K4:
522 case X86::K5:
523 Reg = X86::K4_K5;
524 break;
525 case X86::K6:
526 case X86::K7:
527 Reg = X86::K6_K7;
528 break;
529 }
530 Inst.addOperand(MCOperand::createReg(Reg));
482531 }
483532
484533 void addMemOperands(MCInst &Inst, unsigned N) const {
693693 case TYPE_XMM:
694694 case TYPE_YMM:
695695 case TYPE_ZMM:
696 case TYPE_VK_PAIR:
696697 case TYPE_VK:
697698 case TYPE_DEBUGREG:
698699 case TYPE_CONTROLREG:
14671467 if (index > 7) \
14681468 *valid = 0; \
14691469 return prefix##_K0 + index; \
1470 case TYPE_VK_PAIR: \
1471 if (index > 7) \
1472 *valid = 0; \
1473 return prefix##_K0_K1 + (index / 2); \
14701474 case TYPE_MM64: \
14711475 return prefix##_MM0 + (index & 0x7); \
14721476 case TYPE_SEGMENTREG: \
323323 ENTRY(K6) \
324324 ENTRY(K7)
325325
326 #define REGS_MASK_PAIRS \
327 ENTRY(K0_K1) \
328 ENTRY(K2_K3) \
329 ENTRY(K4_K5) \
330 ENTRY(K6_K7)
331
326332 #define REGS_SEGMENT \
327333 ENTRY(ES) \
328334 ENTRY(CS) \
392398 REGS_YMM \
393399 REGS_ZMM \
394400 REGS_MASKS \
401 REGS_MASK_PAIRS \
395402 REGS_SEGMENT \
396403 REGS_DEBUG \
397404 REGS_CONTROL \
334334 else if (Flags & X86::IP_HAS_REPEAT)
335335 O << "\trep\t";
336336 }
337
338 void X86InstPrinterCommon::printVKPair(const MCInst *MI, unsigned OpNo,
339 raw_ostream &OS) {
340 // In assembly listings, a pair is represented by one of its members, any
341 // of the two. Here, we pick k0, k2, k4, k6, but we could as well
342 // print K2_K3 as "k3". It would probably make a lot more sense, if
343 // the assembly would look something like:
344 // "vp2intersect %zmm5, %zmm7, {%k2, %k3}"
345 // but this can work too.
346 switch (MI->getOperand(OpNo).getReg()) {
347 case X86::K0_K1:
348 printRegName(OS, X86::K0);
349 return;
350 case X86::K2_K3:
351 printRegName(OS, X86::K2);
352 return;
353 case X86::K4_K5:
354 printRegName(OS, X86::K4);
355 return;
356 case X86::K6_K7:
357 printRegName(OS, X86::K6);
358 return;
359 }
360 llvm_unreachable("Unknown mask pair register name");
361 }
3232 protected:
3333 void printInstFlags(const MCInst *MI, raw_ostream &O);
3434 void printOptionalSegReg(const MCInst *MI, unsigned OpNo, raw_ostream &O);
35 void printVKPair(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
3536 };
3637
3738 } // end namespace llvm
172172 def FeatureBITALG : SubtargetFeature<"avx512bitalg", "HasBITALG", "true",
173173 "Enable AVX-512 Bit Algorithms",
174174 [FeatureBWI]>;
175 def FeatureVP2INTERSECT : SubtargetFeature<"avx512vp2intersect",
176 "HasVP2INTERSECT", "true",
177 "Enable AVX-512 vp2intersect",
178 [FeatureAVX512]>;
175179 def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
176180 "Enable packed carry-less multiplication instructions",
177181 [FeatureSSE2]>;
2294222942 Reg = RegInfo->getPtrSizedFrameRegister(MF);
2294322943 }
2294422944 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
22945 }
22946
22947 case Intrinsic::x86_avx512_vp2intersect_q_512:
22948 case Intrinsic::x86_avx512_vp2intersect_q_256:
22949 case Intrinsic::x86_avx512_vp2intersect_q_128:
22950 case Intrinsic::x86_avx512_vp2intersect_d_512:
22951 case Intrinsic::x86_avx512_vp2intersect_d_256:
22952 case Intrinsic::x86_avx512_vp2intersect_d_128: {
22953 MVT MaskVT = Op.getSimpleValueType();
22954
22955 SDVTList VTs = DAG.getVTList(MVT::Untyped, MVT::Other);
22956 SDLoc DL(Op);
22957
22958 SDValue Operation =
22959 DAG.getNode(X86ISD::VP2INTERSECT, DL, VTs,
22960 Op->getOperand(1), Op->getOperand(2));
22961
22962 SDValue Result0 = DAG.getTargetExtractSubreg(X86::sub_mask_0, DL,
22963 MaskVT, Operation);
22964 SDValue Result1 = DAG.getTargetExtractSubreg(X86::sub_mask_1, DL,
22965 MaskVT, Operation);
22966 return DAG.getMergeValues({Result0, Result1}, DL);
2294522967 }
2294622968 }
2294722969 }
2828328305 case X86ISD::TPAUSE: return "X86ISD::TPAUSE";
2828428306 case X86ISD::ENQCMD: return "X86ISD:ENQCMD";
2828528307 case X86ISD::ENQCMDS: return "X86ISD:ENQCMDS";
28308 case X86ISD::VP2INTERSECT: return "X86ISD::VP2INTERSECT";
2828628309 }
2828728310 return nullptr;
2828828311 }
591591 // Enqueue Stores Instructions
592592 ENQCMD, ENQCMDS,
593593
594 // For avx512-vp2intersect
595 VP2INTERSECT,
596
594597 // Compare and swap.
595598 LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
596599 LCMPXCHG8_DAG,
2424
2525 // Corresponding mask register class.
2626 RegisterClass KRC = !cast("VK" # NumElts);
27
28 // Corresponding mask register pair class.
29 RegisterOperand KRPC = !if (!gt(NumElts, 16), ?,
30 !cast("VK" # NumElts # "Pair"));
2731
2832 // Corresponding write-mask register class.
2933 RegisterClass KRCWM = !cast("VK" # NumElts # "WM");
1255512559 Sched<[SchedWriteFMA.ZMM.Folded]>;
1255612560 }
1255712561
12562 let hasSideEffects = 0 in {
12563 def MASKPAIR16STORE : PseudoI<(outs), (ins VK16PAIR:$src, anymem:$dst),
12564 [(store VK16PAIR:$src, addr:$dst)]>;
12565 def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src),
12566 [(set VK16PAIR:$dst, (load addr:$src))]>;
12567 }
12568
12569 //===----------------------------------------------------------------------===//
12570 // VP2INTERSECT
12571 //===----------------------------------------------------------------------===//
12572
12573 multiclass avx512_vp2intersect_modes {
12574 def rr : I<0x68, MRMSrcReg,
12575 (outs _.KRPC:$dst),
12576 (ins _.RC:$src1, _.RC:$src2),
12577 !strconcat("vp2intersect", _.Suffix,
12578 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12579 [(set _.KRPC:$dst, (X86vp2intersect
12580 _.RC:$src1, (_.VT _.RC:$src2)))]>,
12581 EVEX_4V, T8XD;
12582
12583 def rm : I<0x68, MRMSrcMem,
12584 (outs _.KRPC:$dst),
12585 (ins _.RC:$src1, _.MemOp:$src2),
12586 !strconcat("vp2intersect", _.Suffix,
12587 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12588 [(set _.KRPC:$dst, (X86vp2intersect
12589 _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
12590 EVEX_4V, T8XD, EVEX_CD8<_.EltSize, CD8VF>;
12591
12592 def rmb : I<0x68, MRMSrcMem,
12593 (outs _.KRPC:$dst),
12594 (ins _.RC:$src1, _.ScalarMemOp:$src2),
12595 !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr,
12596 ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"),
12597 [(set _.KRPC:$dst, (X86vp2intersect
12598 _.RC:$src1, (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src2)))))]>,
12599 EVEX_4V, T8XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;
12600 }
12601
12602 multiclass avx512_vp2intersect {
12603 let Predicates = [HasAVX512, HasVP2INTERSECT] in
12604 defm Z : avx512_vp2intersect_modes<_.info512>, EVEX_V512;
12605
12606 let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in {
12607 defm Z256 : avx512_vp2intersect_modes<_.info256>, EVEX_V256;
12608 defm Z128 : avx512_vp2intersect_modes<_.info128>, EVEX_V128;
12609 }
12610 }
12611
12612 defm VP2INTERSECTD : avx512_vp2intersect;
12613 defm VP2INTERSECTQ : avx512_vp2intersect, VEX_W;
12614
1255812615 multiclass avx512_binop_all2 opc, string OpcodeStr,
1255912616 X86SchedWriteWidths sched,
1256012617 AVX512VLVectorVTInfo _SrcVTInfo,
504504 def X86FmaddsubRnd : SDNode<"X86ISD::FMADDSUB_RND", SDTFmaRound, [SDNPCommutative]>;
505505 def X86FmsubaddRnd : SDNode<"X86ISD::FMSUBADD_RND", SDTFmaRound, [SDNPCommutative]>;
506506
507 def X86vp2intersect : SDNode<"X86ISD::VP2INTERSECT",
508 SDTypeProfile<1, 2, [SDTCisVT<0, untyped>,
509 SDTCisVec<1>, SDTCisSameAs<1, 2>]>>;
510
507511 def SDTIFma : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0,1>,
508512 SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>;
509513 def x86vpmadd52l : SDNode<"X86ISD::VPMADD52L", SDTIFma, [SDNPCommutative]>;
28762876 assert(STI.hasBWI() && "KMOVD requires BWI");
28772877 return load ? X86::KMOVDkm : X86::KMOVDmk;
28782878 }
2879 // All of these mask pair classes have the same spill size, the same kind
2880 // of kmov instructions can be used with all of them.
2881 if (X86::VK1PAIRRegClass.hasSubClassEq(RC) ||
2882 X86::VK2PAIRRegClass.hasSubClassEq(RC) ||
2883 X86::VK4PAIRRegClass.hasSubClassEq(RC) ||
2884 X86::VK8PAIRRegClass.hasSubClassEq(RC) ||
2885 X86::VK16PAIRRegClass.hasSubClassEq(RC))
2886 return load ? X86::MASKPAIR16LOAD : X86::MASKPAIR16STORE;
28792887 llvm_unreachable("Unknown 4-byte regclass");
28802888 case 8:
28812889 if (X86::GR64RegClass.hasSubClassEq(RC))
756756 let ParserMatchClass = X86MemAsmOperand;
757757 }
758758
759 let RenderMethod = "addMaskPairOperands" in {
760 def VK1PairAsmOperand : AsmOperandClass { let Name = "VK1Pair"; }
761 def VK2PairAsmOperand : AsmOperandClass { let Name = "VK2Pair"; }
762 def VK4PairAsmOperand : AsmOperandClass { let Name = "VK4Pair"; }
763 def VK8PairAsmOperand : AsmOperandClass { let Name = "VK8Pair"; }
764 def VK16PairAsmOperand : AsmOperandClass { let Name = "VK16Pair"; }
765 }
766
767 def VK1Pair : RegisterOperand {
768 let ParserMatchClass = VK1PairAsmOperand;
769 }
770
771 def VK2Pair : RegisterOperand {
772 let ParserMatchClass = VK2PairAsmOperand;
773 }
774
775 def VK4Pair : RegisterOperand {
776 let ParserMatchClass = VK4PairAsmOperand;
777 }
778
779 def VK8Pair : RegisterOperand {
780 let ParserMatchClass = VK8PairAsmOperand;
781 }
782
783 def VK16Pair : RegisterOperand {
784 let ParserMatchClass = VK16PairAsmOperand;
785 }
759786
760787 //===----------------------------------------------------------------------===//
761788 // X86 Complex Pattern Definitions.
842869 def NoVLX_Or_NoDQI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasDQI()">;
843870 def PKU : Predicate<"Subtarget->hasPKU()">;
844871 def HasVNNI : Predicate<"Subtarget->hasVNNI()">;
872 def HasVP2INTERSECT : Predicate<"Subtarget->hasVP2INTERSECT()">;
845873 def HasBF16 : Predicate<"Subtarget->hasBF16()">;
846874
847875 def HasBITALG : Predicate<"Subtarget->hasBITALG()">;
16791679 case X86::TLS_base_addr64:
16801680 return LowerTlsAddr(MCInstLowering, *MI);
16811681
1682 // Loading/storing mask pairs requires two kmov operations. The second one of these
1683 // needs a 2 byte displacement relative to the specified address (with 32 bit spill
1684 // size). The pairs of 1bit masks up to 16 bit masks all use the same spill size,
1685 // they all are stored using MASKPAIR16STORE, loaded using MASKPAIR16LOAD.
1686 //
1687 // The displacement value might wrap around in theory, thus the asserts in both
1688 // cases.
1689 case X86::MASKPAIR16LOAD: {
1690 int64_t Disp = MI->getOperand(1 + X86::AddrDisp).getImm();
1691 assert(Disp >= 0 && Disp <= INT32_MAX - 2 && "Unexpected displacement");
1692 const X86RegisterInfo *RI =
1693 MF->getSubtarget().getRegisterInfo();
1694 unsigned Reg = MI->getOperand(0).getReg();
1695 unsigned Reg0 = RI->getSubReg(Reg, X86::sub_mask_0);
1696 unsigned Reg1 = RI->getSubReg(Reg, X86::sub_mask_1);
1697
1698 // Load the first mask register
1699 MCInstBuilder MIB = MCInstBuilder(X86::KMOVWkm);
1700 MIB.addReg(Reg0);
1701 for (int i = 0; i < X86::AddrNumOperands; ++i) {
1702 auto Op = MCInstLowering.LowerMachineOperand(MI, MI->getOperand(1 + i));
1703 MIB.addOperand(Op.getValue());
1704 }
1705 EmitAndCountInstruction(MIB);
1706
1707 // Load the second mask register of the pair
1708 MIB = MCInstBuilder(X86::KMOVWkm);
1709 MIB.addReg(Reg1);
1710 for (int i = 0; i < X86::AddrNumOperands; ++i) {
1711 if (i == X86::AddrDisp) {
1712 MIB.addImm(Disp + 2);
1713 } else {
1714 auto Op = MCInstLowering.LowerMachineOperand(MI, MI->getOperand(1 + i));
1715 MIB.addOperand(Op.getValue());
1716 }
1717 }
1718 EmitAndCountInstruction(MIB);
1719 return;
1720 }
1721
1722 case X86::MASKPAIR16STORE: {
1723 int64_t Disp = MI->getOperand(X86::AddrDisp).getImm();
1724 assert(Disp >= 0 && Disp <= INT32_MAX - 2 && "Unexpected displacement");
1725 const X86RegisterInfo *RI =
1726 MF->getSubtarget().getRegisterInfo();
1727 unsigned Reg = MI->getOperand(X86::AddrNumOperands).getReg();
1728 unsigned Reg0 = RI->getSubReg(Reg, X86::sub_mask_0);
1729 unsigned Reg1 = RI->getSubReg(Reg, X86::sub_mask_1);
1730
1731 // Store the first mask register
1732 MCInstBuilder MIB = MCInstBuilder(X86::KMOVWmk);
1733 for (int i = 0; i < X86::AddrNumOperands; ++i)
1734 MIB.addOperand(MCInstLowering.LowerMachineOperand(MI, MI->getOperand(i)).getValue());
1735 MIB.addReg(Reg0);
1736 EmitAndCountInstruction(MIB);
1737
1738 // Store the second mask register of the pair
1739 MIB = MCInstBuilder(X86::KMOVWmk);
1740 for (int i = 0; i < X86::AddrNumOperands; ++i) {
1741 if (i == X86::AddrDisp) {
1742 MIB.addImm(Disp + 2);
1743 } else {
1744 auto Op = MCInstLowering.LowerMachineOperand(MI, MI->getOperand(0 + i));
1745 MIB.addOperand(Op.getValue());
1746 }
1747 }
1748 MIB.addReg(Reg1);
1749 EmitAndCountInstruction(MIB);
1750 return;
1751 }
1752
16821753 case X86::MOVPC32r: {
16831754 // This is a pseudo op for a two instruction sequence with a label, which
16841755 // looks like:
2727 def sub_32bit : SubRegIndex<32>;
2828 def sub_xmm : SubRegIndex<128>;
2929 def sub_ymm : SubRegIndex<256>;
30 def sub_mask_0 : SubRegIndex<-1>;
31 def sub_mask_1 : SubRegIndex<-1, -1>;
3032 }
3133
3234 //===----------------------------------------------------------------------===//
593595 def VK32 : RegisterClass<"X86", [v32i1], 32, (add VK16)> {let Size = 32;}
594596 def VK64 : RegisterClass<"X86", [v64i1], 64, (add VK32)> {let Size = 64;}
595597
598 // Mask register pairs
599 def KPAIRS : RegisterTuples<[sub_mask_0, sub_mask_1],
600 [(add K0, K2, K4, K6), (add K1, K3, K5, K7)]>;
601
602 def VK1PAIR : RegisterClass<"X86", [untyped], 16, (add KPAIRS)> {let Size = 32;}
603 def VK2PAIR : RegisterClass<"X86", [untyped], 16, (add KPAIRS)> {let Size = 32;}
604 def VK4PAIR : RegisterClass<"X86", [untyped], 16, (add KPAIRS)> {let Size = 32;}
605 def VK8PAIR : RegisterClass<"X86", [untyped], 16, (add KPAIRS)> {let Size = 32;}
606 def VK16PAIR : RegisterClass<"X86", [untyped], 16, (add KPAIRS)> {let Size = 32;}
607
596608 def VK1WM : RegisterClass<"X86", [v1i1], 16, (sub VK1, K0)> {let Size = 16;}
597609 def VK2WM : RegisterClass<"X86", [v2i1], 16, (sub VK2, K0)> {let Size = 16;}
598610 def VK4WM : RegisterClass<"X86", [v4i1], 16, (sub VK4, K0)> {let Size = 16;}
360360
361361 /// Processor has AVX-512 Bit Algorithms instructions
362362 bool HasBITALG = false;
363
364 /// Processor has AVX-512 vp2intersect instructions
365 bool HasVP2INTERSECT = false;
363366
364367 /// Processor supports MPX - Memory Protection Extensions
365368 bool HasMPX = false;
678681 bool hasPKU() const { return HasPKU; }
679682 bool hasVNNI() const { return HasVNNI; }
680683 bool hasBF16() const { return HasBF16; }
684 bool hasVP2INTERSECT() const { return HasVP2INTERSECT; }
681685 bool hasBITALG() const { return HasBITALG; }
682686 bool hasMPX() const { return HasMPX; }
683687 bool hasSHSTK() const { return HasSHSTK; }
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vp2intersect,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vp2intersect,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
3
4 define void @test_mm256_2intersect_epi32(<4 x i64> %a, <4 x i64> %b, i8* nocapture %m0, i8* nocapture %m1) {
5 ; X86-LABEL: test_mm256_2intersect_epi32:
6 ; X86: # %bb.0: # %entry
7 ; X86-NEXT: movl 4(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8 ; X86-NEXT: vp2intersectd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7f,0x28,0x68,0xc1]
9 ; X86-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
10 ; X86-NEXT: kmovw %k0, %edx # encoding: [0xc5,0xf8,0x93,0xd0]
11 ; X86-NEXT: movb %dl, (%eax) # encoding: [0x88,0x10]
12 ; X86-NEXT: movl 8(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
13 ; X86-NEXT: movb %cl, (%eax) # encoding: [0x88,0x08]
14 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
15 ; X86-NEXT: retl # encoding: [0xc3]
16 ;
17 ; X64-LABEL: test_mm256_2intersect_epi32:
18 ; X64: # %bb.0: # %entry
19 ; X64-NEXT: vp2intersectd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7f,0x28,0x68,0xc1]
20 ; X64-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
21 ; X64-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
22 ; X64-NEXT: movb %cl, (%rdi) # encoding: [0x88,0x0f]
23 ; X64-NEXT: movb %al, (%rsi) # encoding: [0x88,0x06]
24 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
25 ; X64-NEXT: retq # encoding: [0xc3]
26 entry:
27 %0 = bitcast <4 x i64> %a to <8 x i32>
28 %1 = bitcast <4 x i64> %b to <8 x i32>
29 %2 = tail call { <8 x i1>, <8 x i1> } @llvm.x86.avx512.vp2intersect.d.256(<8 x i32> %0, <8 x i32> %1)
30 %3 = extractvalue { <8 x i1>, <8 x i1> } %2, 0
31 %4 = bitcast i8* %m0 to <8 x i1>*
32 store <8 x i1> %3, <8 x i1>* %4, align 8
33 %5 = extractvalue { <8 x i1>, <8 x i1> } %2, 1
34 %6 = bitcast i8* %m1 to <8 x i1>*
35 store <8 x i1> %5, <8 x i1>* %6, align 8
36 ret void
37 }
38
39 define void @test_mm256_2intersect_epi64(<4 x i64> %a, <4 x i64> %b, i8* nocapture %m0, i8* nocapture %m1) {
40 ; X86-LABEL: test_mm256_2intersect_epi64:
41 ; X86: # %bb.0: # %entry
42 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
43 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
44 ; X86-NEXT: vp2intersectq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xff,0x28,0x68,0xc1]
45 ; X86-NEXT: kshiftlw $12, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0c]
46 ; X86-NEXT: kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
47 ; X86-NEXT: kmovw %k2, %edx # encoding: [0xc5,0xf8,0x93,0xd2]
48 ; X86-NEXT: movb %dl, (%ecx) # encoding: [0x88,0x11]
49 ; X86-NEXT: kshiftlw $12, %k1, %k0 # encoding: [0xc4,0xe3,0xf9,0x32,0xc1,0x0c]
50 ; X86-NEXT: kshiftrw $12, %k0, %k0 # encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
51 ; X86-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
52 ; X86-NEXT: movb %cl, (%eax) # encoding: [0x88,0x08]
53 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
54 ; X86-NEXT: retl # encoding: [0xc3]
55 ;
56 ; X64-LABEL: test_mm256_2intersect_epi64:
57 ; X64: # %bb.0: # %entry
58 ; X64-NEXT: vp2intersectq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xff,0x28,0x68,0xc1]
59 ; X64-NEXT: kshiftlw $12, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0c]
60 ; X64-NEXT: kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
61 ; X64-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
62 ; X64-NEXT: movb %al, (%rdi) # encoding: [0x88,0x07]
63 ; X64-NEXT: kshiftlw $12, %k1, %k0 # encoding: [0xc4,0xe3,0xf9,0x32,0xc1,0x0c]
64 ; X64-NEXT: kshiftrw $12, %k0, %k0 # encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
65 ; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
66 ; X64-NEXT: movb %al, (%rsi) # encoding: [0x88,0x06]
67 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
68 ; X64-NEXT: retq # encoding: [0xc3]
69 entry:
70 %0 = tail call { <4 x i1>, <4 x i1> } @llvm.x86.avx512.vp2intersect.q.256(<4 x i64> %a, <4 x i64> %b)
71 %1 = extractvalue { <4 x i1>, <4 x i1> } %0, 0
72 %2 = shufflevector <4 x i1> %1, <4 x i1> zeroinitializer, <8 x i32>
73 %3 = bitcast <8 x i1> %2 to i8
74 store i8 %3, i8* %m0, align 1
75 %4 = extractvalue { <4 x i1>, <4 x i1> } %0, 1
76 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32>
77 %6 = bitcast <8 x i1> %5 to i8
78 store i8 %6, i8* %m1, align 1
79 ret void
80 }
81
82 define void @test_mm256_2intersect_epi32_p(<4 x i64>* nocapture readonly %a, <4 x i64>* nocapture readonly %b, i8* nocapture %m0, i8* nocapture %m1) {
83 ; X86-LABEL: test_mm256_2intersect_epi32_p:
84 ; X86: # %bb.0: # %entry
85 ; X86-NEXT: movl 12(%esp), %eax # encoding: [0x8b,0x44,0x24,0x0c]
86 ; X86-NEXT: movl 8(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x08]
87 ; X86-NEXT: movl 4(%esp), %edx # encoding: [0x8b,0x54,0x24,0x04]
88 ; X86-NEXT: vmovaps (%edx), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x02]
89 ; X86-NEXT: vp2intersectd (%ecx), %ymm0, %k0 # encoding: [0x62,0xf2,0x7f,0x28,0x68,0x01]
90 ; X86-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
91 ; X86-NEXT: kmovw %k0, %edx # encoding: [0xc5,0xf8,0x93,0xd0]
92 ; X86-NEXT: movb %dl, (%eax) # encoding: [0x88,0x10]
93 ; X86-NEXT: movl 16(%esp), %eax # encoding: [0x8b,0x44,0x24,0x10]
94 ; X86-NEXT: movb %cl, (%eax) # encoding: [0x88,0x08]
95 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
96 ; X86-NEXT: retl # encoding: [0xc3]
97 ;
98 ; X64-LABEL: test_mm256_2intersect_epi32_p:
99 ; X64: # %bb.0: # %entry
100 ; X64-NEXT: vmovaps (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x07]
101 ; X64-NEXT: vp2intersectd (%rsi), %ymm0, %k0 # encoding: [0x62,0xf2,0x7f,0x28,0x68,0x06]
102 ; X64-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
103 ; X64-NEXT: kmovw %k0, %esi # encoding: [0xc5,0xf8,0x93,0xf0]
104 ; X64-NEXT: movb %sil, (%rdx) # encoding: [0x40,0x88,0x32]
105 ; X64-NEXT: movb %al, (%rcx) # encoding: [0x88,0x01]
106 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
107 ; X64-NEXT: retq # encoding: [0xc3]
108 entry:
109 %0 = bitcast <4 x i64>* %a to <8 x i32>*
110 %1 = load <8 x i32>, <8 x i32>* %0, align 32
111 %2 = bitcast <4 x i64>* %b to <8 x i32>*
112 %3 = load <8 x i32>, <8 x i32>* %2, align 32
113 %4 = tail call { <8 x i1>, <8 x i1> } @llvm.x86.avx512.vp2intersect.d.256(<8 x i32> %1, <8 x i32> %3)
114 %5 = extractvalue { <8 x i1>, <8 x i1> } %4, 0
115 %6 = bitcast i8* %m0 to <8 x i1>*
116 store <8 x i1> %5, <8 x i1>* %6, align 8
117 %7 = extractvalue { <8 x i1>, <8 x i1> } %4, 1
118 %8 = bitcast i8* %m1 to <8 x i1>*
119 store <8 x i1> %7, <8 x i1>* %8, align 8
120 ret void
121 }
122
123 define void @test_mm256_2intersect_epi64_p(<4 x i64>* nocapture readonly %a, <4 x i64>* nocapture readonly %b, i8* nocapture %m0, i8* nocapture %m1) {
124 ; X86-LABEL: test_mm256_2intersect_epi64_p:
125 ; X86: .cfi_startproc
126 ; X86-NEXT: # %bb.0: # %entry
127 ; X86-NEXT: pushl %esi # encoding: [0x56]
128 ; X86-NEXT: .cfi_def_cfa_offset 8
129 ; X86-NEXT: .cfi_offset %esi, -8
130 ; X86-NEXT: movl 20(%esp), %eax # encoding: [0x8b,0x44,0x24,0x14]
131 ; X86-NEXT: movl 16(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x10]
132 ; X86-NEXT: movl 12(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c]
133 ; X86-NEXT: movl 8(%esp), %esi # encoding: [0x8b,0x74,0x24,0x08]
134 ; X86-NEXT: vmovaps (%esi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x06]
135 ; X86-NEXT: vp2intersectq (%edx), %ymm0, %k0 # encoding: [0x62,0xf2,0xff,0x28,0x68,0x02]
136 ; X86-NEXT: kshiftlw $12, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0c]
137 ; X86-NEXT: kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
138 ; X86-NEXT: kmovw %k2, %edx # encoding: [0xc5,0xf8,0x93,0xd2]
139 ; X86-NEXT: movb %dl, (%ecx) # encoding: [0x88,0x11]
140 ; X86-NEXT: kshiftlw $12, %k1, %k0 # encoding: [0xc4,0xe3,0xf9,0x32,0xc1,0x0c]
141 ; X86-NEXT: kshiftrw $12, %k0, %k0 # encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
142 ; X86-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
143 ; X86-NEXT: movb %cl, (%eax) # encoding: [0x88,0x08]
144 ; X86-NEXT: popl %esi # encoding: [0x5e]
145 ; X86-NEXT: .cfi_def_cfa_offset 4
146 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
147 ; X86-NEXT: retl # encoding: [0xc3]
148 ;
149 ; X64-LABEL: test_mm256_2intersect_epi64_p:
150 ; X64: # %bb.0: # %entry
151 ; X64-NEXT: vmovaps (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x07]
152 ; X64-NEXT: vp2intersectq (%rsi), %ymm0, %k0 # encoding: [0x62,0xf2,0xff,0x28,0x68,0x06]
153 ; X64-NEXT: kshiftlw $12, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0c]
154 ; X64-NEXT: kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
155 ; X64-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
156 ; X64-NEXT: movb %al, (%rdx) # encoding: [0x88,0x02]
157 ; X64-NEXT: kshiftlw $12, %k1, %k0 # encoding: [0xc4,0xe3,0xf9,0x32,0xc1,0x0c]
158 ; X64-NEXT: kshiftrw $12, %k0, %k0 # encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
159 ; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
160 ; X64-NEXT: movb %al, (%rcx) # encoding: [0x88,0x01]
161 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
162 ; X64-NEXT: retq # encoding: [0xc3]
163 entry:
164 %0 = load <4 x i64>, <4 x i64>* %a, align 32
165 %1 = load <4 x i64>, <4 x i64>* %b, align 32
166 %2 = tail call { <4 x i1>, <4 x i1> } @llvm.x86.avx512.vp2intersect.q.256(<4 x i64> %0, <4 x i64> %1)
167 %3 = extractvalue { <4 x i1>, <4 x i1> } %2, 0
168 %4 = shufflevector <4 x i1> %3, <4 x i1> zeroinitializer, <8 x i32>
169 %5 = bitcast <8 x i1> %4 to i8
170 store i8 %5, i8* %m0, align 1
171 %6 = extractvalue { <4 x i1>, <4 x i1> } %2, 1
172 %7 = shufflevector <4 x i1> %6, <4 x i1> zeroinitializer, <8 x i32>
173 %8 = bitcast <8 x i1> %7 to i8
174 store i8 %8, i8* %m1, align 1
175 ret void
176 }
177
178 define void @test_mm256_2intersect_epi32_b(i32* nocapture readonly %a, i32* nocapture readonly %b, i8* nocapture %m0, i8* nocapture %m1) {
179 ; X86-LABEL: test_mm256_2intersect_epi32_b:
180 ; X86: .cfi_startproc
181 ; X86-NEXT: # %bb.0: # %entry
182 ; X86-NEXT: movl 12(%esp), %eax # encoding: [0x8b,0x44,0x24,0x0c]
183 ; X86-NEXT: movl 8(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x08]
184 ; X86-NEXT: movl 4(%esp), %edx # encoding: [0x8b,0x54,0x24,0x04]
185 ; X86-NEXT: vbroadcastss (%edx), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x18,0x02]
186 ; X86-NEXT: vp2intersectd (%ecx){1to8}, %ymm0, %k0 # encoding: [0x62,0xf2,0x7f,0x38,0x68,0x01]
187 ; X86-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
188 ; X86-NEXT: kmovw %k0, %edx # encoding: [0xc5,0xf8,0x93,0xd0]
189 ; X86-NEXT: movb %dl, (%eax) # encoding: [0x88,0x10]
190 ; X86-NEXT: movl 16(%esp), %eax # encoding: [0x8b,0x44,0x24,0x10]
191 ; X86-NEXT: movb %cl, (%eax) # encoding: [0x88,0x08]
192 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
193 ; X86-NEXT: retl # encoding: [0xc3]
194 ;
195 ; X64-LABEL: test_mm256_2intersect_epi32_b:
196 ; X64: # %bb.0: # %entry
197 ; X64-NEXT: vbroadcastss (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x18,0x07]
198 ; X64-NEXT: vp2intersectd (%rsi){1to8}, %ymm0, %k0 # encoding: [0x62,0xf2,0x7f,0x38,0x68,0x06]
199 ; X64-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
200 ; X64-NEXT: kmovw %k0, %esi # encoding: [0xc5,0xf8,0x93,0xf0]
201 ; X64-NEXT: movb %sil, (%rdx) # encoding: [0x40,0x88,0x32]
202 ; X64-NEXT: movb %al, (%rcx) # encoding: [0x88,0x01]
203 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
204 ; X64-NEXT: retq # encoding: [0xc3]
205 entry:
206 %0 = load i32, i32* %a, align 4
207 %vecinit.i.i = insertelement <8 x i32> undef, i32 %0, i32 0
208 %vecinit7.i.i = shufflevector <8 x i32> %vecinit.i.i, <8 x i32> undef, <8 x i32> zeroinitializer
209 %1 = load i32, i32* %b, align 4
210 %vecinit.i.i2 = insertelement <8 x i32> undef, i32 %1, i32 0
211 %vecinit7.i.i3 = shufflevector <8 x i32> %vecinit.i.i2, <8 x i32> undef, <8 x i32> zeroinitializer
212 %2 = tail call { <8 x i1>, <8 x i1> } @llvm.x86.avx512.vp2intersect.d.256(<8 x i32> %vecinit7.i.i, <8 x i32> %vecinit7.i.i3)
213 %3 = extractvalue { <8 x i1>, <8 x i1> } %2, 0
214 %4 = bitcast i8* %m0 to <8 x i1>*
215 store <8 x i1> %3, <8 x i1>* %4, align 8
216 %5 = extractvalue { <8 x i1>, <8 x i1> } %2, 1
217 %6 = bitcast i8* %m1 to <8 x i1>*
218 store <8 x i1> %5, <8 x i1>* %6, align 8
219 ret void
220 }
221
222 define void @test_mm256_2intersect_epi64_b(i64* nocapture readonly %a, i64* nocapture readonly %b, i8* nocapture %m0, i8* nocapture %m1) {
223 ; X86-LABEL: test_mm256_2intersect_epi64_b:
224 ; X86: .cfi_startproc
225 ; X86-NEXT: # %bb.0: # %entry
226 ; X86-NEXT: pushl %esi # encoding: [0x56]
227 ; X86-NEXT: .cfi_def_cfa_offset 8
228 ; X86-NEXT: .cfi_offset %esi, -8
229 ; X86-NEXT: movl 20(%esp), %eax # encoding: [0x8b,0x44,0x24,0x14]
230 ; X86-NEXT: movl 16(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x10]
231 ; X86-NEXT: movl 12(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c]
232 ; X86-NEXT: movl 8(%esp), %esi # encoding: [0x8b,0x74,0x24,0x08]
233 ; X86-NEXT: vbroadcastsd (%esi), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x19,0x06]
234 ; X86-NEXT: vbroadcastsd (%edx), %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x19,0x0a]
235 ; X86-NEXT: vp2intersectq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xff,0x28,0x68,0xc1]
236 ; X86-NEXT: kshiftlw $12, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0c]
237 ; X86-NEXT: kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
238 ; X86-NEXT: kmovw %k2, %edx # encoding: [0xc5,0xf8,0x93,0xd2]
239 ; X86-NEXT: movb %dl, (%ecx) # encoding: [0x88,0x11]
240 ; X86-NEXT: kshiftlw $12, %k1, %k0 # encoding: [0xc4,0xe3,0xf9,0x32,0xc1,0x0c]
241 ; X86-NEXT: kshiftrw $12, %k0, %k0 # encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
242 ; X86-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
243 ; X86-NEXT: movb %cl, (%eax) # encoding: [0x88,0x08]
244 ; X86-NEXT: popl %esi # encoding: [0x5e]
245 ; X86-NEXT: .cfi_def_cfa_offset 4
246 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
247 ; X86-NEXT: retl # encoding: [0xc3]
248 ;
249 ; X64-LABEL: test_mm256_2intersect_epi64_b:
250 ; X64: # %bb.0: # %entry
251 ; X64-NEXT: vbroadcastsd (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x19,0x07]
252 ; X64-NEXT: vp2intersectq (%rsi){1to4}, %ymm0, %k0 # encoding: [0x62,0xf2,0xff,0x38,0x68,0x06]
253 ; X64-NEXT: kshiftlw $12, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0c]
254 ; X64-NEXT: kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
255 ; X64-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
256 ; X64-NEXT: movb %al, (%rdx) # encoding: [0x88,0x02]
257 ; X64-NEXT: kshiftlw $12, %k1, %k0 # encoding: [0xc4,0xe3,0xf9,0x32,0xc1,0x0c]
258 ; X64-NEXT: kshiftrw $12, %k0, %k0 # encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
259 ; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
260 ; X64-NEXT: movb %al, (%rcx) # encoding: [0x88,0x01]
261 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
262 ; X64-NEXT: retq # encoding: [0xc3]
263 entry:
264 %0 = load i64, i64* %a, align 8
265 %vecinit.i.i = insertelement <4 x i64> undef, i64 %0, i32 0
266 %vecinit3.i.i = shufflevector <4 x i64> %vecinit.i.i, <4 x i64> undef, <4 x i32> zeroinitializer
267 %1 = load i64, i64* %b, align 8
268 %vecinit.i.i2 = insertelement <4 x i64> undef, i64 %1, i32 0
269 %vecinit3.i.i3 = shufflevector <4 x i64> %vecinit.i.i2, <4 x i64> undef, <4 x i32> zeroinitializer
270 %2 = tail call { <4 x i1>, <4 x i1> } @llvm.x86.avx512.vp2intersect.q.256(<4 x i64> %vecinit3.i.i, <4 x i64> %vecinit3.i.i3)
271 %3 = extractvalue { <4 x i1>, <4 x i1> } %2, 0
272 %4 = shufflevector <4 x i1> %3, <4 x i1> zeroinitializer, <8 x i32>
273 %5 = bitcast <8 x i1> %4 to i8
274 store i8 %5, i8* %m0, align 1
275 %6 = extractvalue { <4 x i1>, <4 x i1> } %2, 1
276 %7 = shufflevector <4 x i1> %6, <4 x i1> zeroinitializer, <8 x i32>
277 %8 = bitcast <8 x i1> %7 to i8
278 store i8 %8, i8* %m1, align 1
279 ret void
280 }
281
282 define void @test_mm_2intersect_epi32(<2 x i64> %a, <2 x i64> %b, i8* nocapture %m0, i8* nocapture %m1) {
283 ; X86-LABEL: test_mm_2intersect_epi32:
284 ; X86: .cfi_startproc
285 ; X86-NEXT: # %bb.0: # %entry
286 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
287 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
288 ; X86-NEXT: vp2intersectd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7f,0x08,0x68,0xc1]
289 ; X86-NEXT: kshiftlw $12, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0c]
290 ; X86-NEXT: kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
291 ; X86-NEXT: kmovw %k2, %edx # encoding: [0xc5,0xf8,0x93,0xd2]
292 ; X86-NEXT: movb %dl, (%ecx) # encoding: [0x88,0x11]
293 ; X86-NEXT: kshiftlw $12, %k1, %k0 # encoding: [0xc4,0xe3,0xf9,0x32,0xc1,0x0c]
294 ; X86-NEXT: kshiftrw $12, %k0, %k0 # encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
295 ; X86-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
296 ; X86-NEXT: movb %cl, (%eax) # encoding: [0x88,0x08]
297 ; X86-NEXT: retl # encoding: [0xc3]
298 ;
299 ; X64-LABEL: test_mm_2intersect_epi32:
300 ; X64: # %bb.0: # %entry
301 ; X64-NEXT: vp2intersectd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7f,0x08,0x68,0xc1]
302 ; X64-NEXT: kshiftlw $12, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0c]
303 ; X64-NEXT: kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
304 ; X64-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
305 ; X64-NEXT: movb %al, (%rdi) # encoding: [0x88,0x07]
306 ; X64-NEXT: kshiftlw $12, %k1, %k0 # encoding: [0xc4,0xe3,0xf9,0x32,0xc1,0x0c]
307 ; X64-NEXT: kshiftrw $12, %k0, %k0 # encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
308 ; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
309 ; X64-NEXT: movb %al, (%rsi) # encoding: [0x88,0x06]
310 ; X64-NEXT: retq # encoding: [0xc3]
311 entry:
312 %0 = bitcast <2 x i64> %a to <4 x i32>
313 %1 = bitcast <2 x i64> %b to <4 x i32>
314 %2 = tail call { <4 x i1>, <4 x i1> } @llvm.x86.avx512.vp2intersect.d.128(<4 x i32> %0, <4 x i32> %1)
315 %3 = extractvalue { <4 x i1>, <4 x i1> } %2, 0
316 %4 = shufflevector <4 x i1> %3, <4 x i1> zeroinitializer, <8 x i32>
317 %5 = bitcast <8 x i1> %4 to i8
318 store i8 %5, i8* %m0, align 1
319 %6 = extractvalue { <4 x i1>, <4 x i1> } %2, 1
320 %7 = shufflevector <4 x i1> %6, <4 x i1> zeroinitializer, <8 x i32>
321 %8 = bitcast <8 x i1> %7 to i8
322 store i8 %8, i8* %m1, align 1
323 ret void
324 }
325
326 define void @test_mm_2intersect_epi64(<2 x i64> %a, <2 x i64> %b, i8* nocapture %m0, i8* nocapture %m1) {
327 ; X86-LABEL: test_mm_2intersect_epi64:
328 ; X86: # %bb.0: # %entry
329 ; X86-NEXT: movl 8(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
330 ; X86-NEXT: movl 4(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
331 ; X86-NEXT: vp2intersectq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xff,0x08,0x68,0xc1]
332 ; X86-NEXT: kshiftlw $14, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0e]
333 ; X86-NEXT: kshiftrw $14, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0e]
334 ; X86-NEXT: kmovw %k2, %edx # encoding: [0xc5,0xf8,0x93,0xd2]
335 ; X86-NEXT: movb %dl, (%ecx) # encoding: [0x88,0x11]
336 ; X86-NEXT: kshiftlw $14, %k1, %k0 # encoding: [0xc4,0xe3,0xf9,0x32,0xc1,0x0e]
337 ; X86-NEXT: kshiftrw $14, %k0, %k0 # encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e]
338 ; X86-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
339 ; X86-NEXT: movb %cl, (%eax) # encoding: [0x88,0x08]
340 ; X86-NEXT: retl # encoding: [0xc3]
341 ;
342 ; X64-LABEL: test_mm_2intersect_epi64:
343 ; X64: # %bb.0: # %entry
344 ; X64-NEXT: vp2intersectq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xff,0x08,0x68,0xc1]
345 ; X64-NEXT: kshiftlw $14, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0e]
346 ; X64-NEXT: kshiftrw $14, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0e]
347 ; X64-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
348 ; X64-NEXT: movb %al, (%rdi) # encoding: [0x88,0x07]
349 ; X64-NEXT: kshiftlw $14, %k1, %k0 # encoding: [0xc4,0xe3,0xf9,0x32,0xc1,0x0e]
350 ; X64-NEXT: kshiftrw $14, %k0, %k0 # encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e]
351 ; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
352 ; X64-NEXT: movb %al, (%rsi) # encoding: [0x88,0x06]
353 ; X64-NEXT: retq # encoding: [0xc3]
354 entry:
355 %0 = tail call { <2 x i1>, <2 x i1> } @llvm.x86.avx512.vp2intersect.q.128(<2 x i64> %a, <2 x i64> %b)
356 %1 = extractvalue { <2 x i1>, <2 x i1> } %0, 0
357 %2 = shufflevector <2 x i1> %1, <2 x i1> zeroinitializer, <8 x i32>
358 %3 = bitcast <8 x i1> %2 to i8
359 store i8 %3, i8* %m0, align 1
360 %4 = extractvalue { <2 x i1>, <2 x i1> } %0, 1
361 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32>
362 %6 = bitcast <8 x i1> %5 to i8
363 store i8 %6, i8* %m1, align 1
364 ret void
365 }
366
367 define void @test_mm_2intersect_epi32_p(<2 x i64>* nocapture readonly %a, <2 x i64>* nocapture readonly %b, i8* nocapture %m0, i8* nocapture %m1) {
368 ; X86-LABEL: test_mm_2intersect_epi32_p:
369 ; X86: # %bb.0: # %entry
370 ; X86-NEXT: pushl %esi # encoding: [0x56]
371 ; X86-NEXT: .cfi_def_cfa_offset 8
372 ; X86-NEXT: .cfi_offset %esi, -8
373 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x14]
374 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x10]
375 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c]
376 ; X86-NEXT: movl 8(%esp), %esi # encoding: [0x8b,0x74,0x24,0x08]
377 ; X86-NEXT: vmovaps (%esi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x06]
378 ; X86-NEXT: vp2intersectd (%edx), %xmm0, %k0 # encoding: [0x62,0xf2,0x7f,0x08,0x68,0x02]
379 ; X86-NEXT: kshiftlw $12, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0c]
380 ; X86-NEXT: kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
381 ; X86-NEXT: kmovw %k2, %edx # encoding: [0xc5,0xf8,0x93,0xd2]
382 ; X86-NEXT: movb %dl, (%ecx) # encoding: [0x88,0x11]
383 ; X86-NEXT: kshiftlw $12, %k1, %k0 # encoding: [0xc4,0xe3,0xf9,0x32,0xc1,0x0c]
384 ; X86-NEXT: kshiftrw $12, %k0, %k0 # encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
385 ; X86-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
386 ; X86-NEXT: movb %cl, (%eax) # encoding: [0x88,0x08]
387 ; X86-NEXT: popl %esi # encoding: [0x5e]
388 ; X86-NEXT: .cfi_def_cfa_offset 4
389 ; X86-NEXT: retl # encoding: [0xc3]
390 ;
391 ; X64-LABEL: test_mm_2intersect_epi32_p:
392 ; X64: # %bb.0: # %entry
393 ; X64-NEXT: vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07]
394 ; X64-NEXT: vp2intersectd (%rsi), %xmm0, %k0 # encoding: [0x62,0xf2,0x7f,0x08,0x68,0x06]
395 ; X64-NEXT: kshiftlw $12, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0c]
396 ; X64-NEXT: kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
397 ; X64-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
398 ; X64-NEXT: movb %al, (%rdx) # encoding: [0x88,0x02]
399 ; X64-NEXT: kshiftlw $12, %k1, %k0 # encoding: [0xc4,0xe3,0xf9,0x32,0xc1,0x0c]
400 ; X64-NEXT: kshiftrw $12, %k0, %k0 # encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
401 ; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
402 ; X64-NEXT: movb %al, (%rcx) # encoding: [0x88,0x01]
403 ; X64-NEXT: retq # encoding: [0xc3]
404 entry:
405 %0 = bitcast <2 x i64>* %a to <4 x i32>*
406 %1 = load <4 x i32>, <4 x i32>* %0, align 16
407 %2 = bitcast <2 x i64>* %b to <4 x i32>*
408 %3 = load <4 x i32>, <4 x i32>* %2, align 16
409 %4 = tail call { <4 x i1>, <4 x i1> } @llvm.x86.avx512.vp2intersect.d.128(<4 x i32> %1, <4 x i32> %3)
410 %5 = extractvalue { <4 x i1>, <4 x i1> } %4, 0
411 %6 = shufflevector <4 x i1> %5, <4 x i1> zeroinitializer, <8 x i32>
412 %7 = bitcast <8 x i1> %6 to i8
413 store i8 %7, i8* %m0, align 1
414 %8 = extractvalue { <4 x i1>, <4 x i1> } %4, 1
415 %9 = shufflevector <4 x i1> %8, <4 x i1> zeroinitializer, <8 x i32>
416 %10 = bitcast <8 x i1> %9 to i8
417 store i8 %10, i8* %m1, align 1
418 ret void
419 }
420
421 define void @test_mm_2intersect_epi64_p(<2 x i64>* nocapture readonly %a, <2 x i64>* nocapture readonly %b, i8* nocapture %m0, i8* nocapture %m1) {
422 ; X86-LABEL: test_mm_2intersect_epi64_p:
423 ; X86: # %bb.0: # %entry
424 ; X86-NEXT: pushl %esi # encoding: [0x56]
425 ; X86-NEXT: .cfi_def_cfa_offset 8
426 ; X86-NEXT: .cfi_offset %esi, -8
427 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x14]
428 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x10]
429 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c]
430 ; X86-NEXT: movl 8(%esp), %esi # encoding: [0x8b,0x74,0x24,0x08]
431 ; X86-NEXT: vmovaps (%esi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x06]
432 ; X86-NEXT: vp2intersectq (%edx), %xmm0, %k0 # encoding: [0x62,0xf2,0xff,0x08,0x68,0x02]
433 ; X86-NEXT: kshiftlw $14, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0e]
434 ; X86-NEXT: kshiftrw $14, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0e]
435 ; X86-NEXT: kmovw %k2, %edx # encoding: [0xc5,0xf8,0x93,0xd2]
436 ; X86-NEXT: movb %dl, (%ecx) # encoding: [0x88,0x11]
437 ; X86-NEXT: kshiftlw $14, %k1, %k0 # encoding: [0xc4,0xe3,0xf9,0x32,0xc1,0x0e]
438 ; X86-NEXT: kshiftrw $14, %k0, %k0 # encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e]
439 ; X86-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
440 ; X86-NEXT: movb %cl, (%eax) # encoding: [0x88,0x08]
441 ; X86-NEXT: popl %esi # encoding: [0x5e]
442 ; X86-NEXT: .cfi_def_cfa_offset 4
443 ; X86-NEXT: retl # encoding: [0xc3]
444 ;
445 ; X64-LABEL: test_mm_2intersect_epi64_p:
446 ; X64: # %bb.0: # %entry
447 ; X64-NEXT: vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07]
448 ; X64-NEXT: vp2intersectq (%rsi), %xmm0, %k0 # encoding: [0x62,0xf2,0xff,0x08,0x68,0x06]
449 ; X64-NEXT: kshiftlw $14, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0e]
450 ; X64-NEXT: kshiftrw $14, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0e]
451 ; X64-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
452 ; X64-NEXT: movb %al, (%rdx) # encoding: [0x88,0x02]
453 ; X64-NEXT: kshiftlw $14, %k1, %k0 # encoding: [0xc4,0xe3,0xf9,0x32,0xc1,0x0e]
454 ; X64-NEXT: kshiftrw $14, %k0, %k0 # encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e]
455 ; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
456 ; X64-NEXT: movb %al, (%rcx) # encoding: [0x88,0x01]
457 ; X64-NEXT: retq # encoding: [0xc3]
458 entry:
459 %0 = load <2 x i64>, <2 x i64>* %a, align 16
460 %1 = load <2 x i64>, <2 x i64>* %b, align 16
461 %2 = tail call { <2 x i1>, <2 x i1> } @llvm.x86.avx512.vp2intersect.q.128(<2 x i64> %0, <2 x i64> %1)
462 %3 = extractvalue { <2 x i1>, <2 x i1> } %2, 0
463 %4 = shufflevector <2 x i1> %3, <2 x i1> zeroinitializer, <8 x i32>
464 %5 = bitcast <8 x i1> %4 to i8
465 store i8 %5, i8* %m0, align 1
466 %6 = extractvalue { <2 x i1>, <2 x i1> } %2, 1
467 %7 = shufflevector <2 x i1> %6, <2 x i1> zeroinitializer, <8 x i32>
468 %8 = bitcast <8 x i1> %7 to i8
469 store i8 %8, i8* %m1, align 1
470 ret void
471 }
472
473 define void @test_mm_2intersect_epi32_b(i32* nocapture readonly %a, i32* nocapture readonly %b, i8* nocapture %m0, i8* nocapture %m1) {
474 ; X86-LABEL: test_mm_2intersect_epi32_b:
475 ; X86: # %bb.0: # %entry
476 ; X86-NEXT: pushl %esi # encoding: [0x56]
477 ; X86-NEXT: .cfi_def_cfa_offset 8
478 ; X86-NEXT: .cfi_offset %esi, -8
479 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x14]
480 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x10]
481 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c]
482 ; X86-NEXT: movl 8(%esp), %esi # encoding: [0x8b,0x74,0x24,0x08]
483 ; X86-NEXT: vbroadcastss (%esi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0x06]
484 ; X86-NEXT: vp2intersectd (%edx){1to4}, %xmm0, %k0 # encoding: [0x62,0xf2,0x7f,0x18,0x68,0x02]
485 ; X86-NEXT: kshiftlw $12, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0c]
486 ; X86-NEXT: kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
487 ; X86-NEXT: kmovw %k2, %edx # encoding: [0xc5,0xf8,0x93,0xd2]
488 ; X86-NEXT: movb %dl, (%ecx) # encoding: [0x88,0x11]
489 ; X86-NEXT: kshiftlw $12, %k1, %k0 # encoding: [0xc4,0xe3,0xf9,0x32,0xc1,0x0c]
490 ; X86-NEXT: kshiftrw $12, %k0, %k0 # encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
491 ; X86-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
492 ; X86-NEXT: movb %cl, (%eax) # encoding: [0x88,0x08]
493 ; X86-NEXT: popl %esi # encoding: [0x5e]
494 ; X86-NEXT: .cfi_def_cfa_offset 4
495 ; X86-NEXT: retl # encoding: [0xc3]
496 ;
497 ; X64-LABEL: test_mm_2intersect_epi32_b:
498 ; X64: # %bb.0: # %entry
499 ; X64-NEXT: vbroadcastss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0x07]
500 ; X64-NEXT: vp2intersectd (%rsi){1to4}, %xmm0, %k0 # encoding: [0x62,0xf2,0x7f,0x18,0x68,0x06]
501 ; X64-NEXT: kshiftlw $12, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0c]
502 ; X64-NEXT: kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
503 ; X64-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
504 ; X64-NEXT: movb %al, (%rdx) # encoding: [0x88,0x02]
505 ; X64-NEXT: kshiftlw $12, %k1, %k0 # encoding: [0xc4,0xe3,0xf9,0x32,0xc1,0x0c]
506 ; X64-NEXT: kshiftrw $12, %k0, %k0 # encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
507 ; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
508 ; X64-NEXT: movb %al, (%rcx) # encoding: [0x88,0x01]
509 ; X64-NEXT: retq # encoding: [0xc3]
510 entry:
511 %0 = load i32, i32* %a, align 4
512 %vecinit.i.i = insertelement <4 x i32> undef, i32 %0, i32 0
513 %vecinit3.i.i = shufflevector <4 x i32> %vecinit.i.i, <4 x i32> undef, <4 x i32> zeroinitializer
514 %1 = load i32, i32* %b, align 4
515 %vecinit.i.i2 = insertelement <4 x i32> undef, i32 %1, i32 0
516 %vecinit3.i.i3 = shufflevector <4 x i32> %vecinit.i.i2, <4 x i32> undef, <4 x i32> zeroinitializer
517 %2 = tail call { <4 x i1>, <4 x i1> } @llvm.x86.avx512.vp2intersect.d.128(<4 x i32> %vecinit3.i.i, <4 x i32> %vecinit3.i.i3)
518 %3 = extractvalue { <4 x i1>, <4 x i1> } %2, 0
519 %4 = shufflevector <4 x i1> %3, <4 x i1> zeroinitializer, <8 x i32>
520 %5 = bitcast <8 x i1> %4 to i8
521 store i8 %5, i8* %m0, align 1
522 %6 = extractvalue { <4 x i1>, <4 x i1> } %2, 1
523 %7 = shufflevector <4 x i1> %6, <4 x i1> zeroinitializer, <8 x i32>
524 %8 = bitcast <8 x i1> %7 to i8
525 store i8 %8, i8* %m1, align 1
526 ret void
527 }
528
529 define void @test_mm_2intersect_epi64_b(i64* nocapture readonly %a, i64* nocapture readonly %b, i8* nocapture %m0, i8* nocapture %m1) {
530 ; X86-LABEL: test_mm_2intersect_epi64_b:
531 ; X86: # %bb.0: # %entry
532 ; X86-NEXT: pushl %esi # encoding: [0x56]
533 ; X86-NEXT: .cfi_def_cfa_offset 8
534 ; X86-NEXT: .cfi_offset %esi, -8
535 ; X86-NEXT: movl 20(%esp), %eax # encoding: [0x8b,0x44,0x24,0x14]
536 ; X86-NEXT: movl 16(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x10]
537 ; X86-NEXT: movl 12(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c]
538 ; X86-NEXT: movl 8(%esp), %esi # encoding: [0x8b,0x74,0x24,0x08]
539 ; X86-NEXT: vmovddup (%esi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0x06]
540 ; X86-NEXT: # xmm0 = mem[0,0]
541 ; X86-NEXT: vmovddup (%edx), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0x0a]
542 ; X86-NEXT: # xmm1 = mem[0,0]
543 ; X86-NEXT: vp2intersectq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xff,0x08,0x68,0xc1]
544 ; X86-NEXT: kshiftlw $14, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0e]
545 ; X86-NEXT: kshiftrw $14, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0e]
546 ; X86-NEXT: kmovw %k2, %edx # encoding: [0xc5,0xf8,0x93,0xd2]
547 ; X86-NEXT: movb %dl, (%ecx) # encoding: [0x88,0x11]
548 ; X86-NEXT: kshiftlw $14, %k1, %k0 # encoding: [0xc4,0xe3,0xf9,0x32,0xc1,0x0e]
549 ; X86-NEXT: kshiftrw $14, %k0, %k0 # encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e]
550 ; X86-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
551 ; X86-NEXT: movb %cl, (%eax) # encoding: [0x88,0x08]
552 ; X86-NEXT: popl %esi # encoding: [0x5e]
553 ; X86-NEXT: .cfi_def_cfa_offset 4
554 ; X86-NEXT: retl # encoding: [0xc3]
555 ;
556 ; X64-LABEL: test_mm_2intersect_epi64_b:
557 ; X64: # %bb.0: # %entry
558 ; X64-NEXT: vmovddup (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0x07]
559 ; X64-NEXT: # xmm0 = mem[0,0]
560 ; X64-NEXT: vp2intersectq (%rsi){1to2}, %xmm0, %k0 # encoding: [0x62,0xf2,0xff,0x18,0x68,0x06]
561 ; X64-NEXT: kshiftlw $14, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0e]
562 ; X64-NEXT: kshiftrw $14, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0e]
563 ; X64-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
564 ; X64-NEXT: movb %al, (%rdx) # encoding: [0x88,0x02]
565 ; X64-NEXT: kshiftlw $14, %k1, %k0 # encoding: [0xc4,0xe3,0xf9,0x32,0xc1,0x0e]
566 ; X64-NEXT: kshiftrw $14, %k0, %k0 # encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e]
567 ; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
568 ; X64-NEXT: movb %al, (%rcx) # encoding: [0x88,0x01]
569 ; X64-NEXT: retq # encoding: [0xc3]
570 entry:
571 %0 = load i64, i64* %a, align 8
572 %vecinit.i.i = insertelement <2 x i64> undef, i64 %0, i32 0
573 %vecinit1.i.i = shufflevector <2 x i64> %vecinit.i.i, <2 x i64> undef, <2 x i32> zeroinitializer
574 %1 = load i64, i64* %b, align 8
575 %vecinit.i.i2 = insertelement <2 x i64> undef, i64 %1, i32 0
576 %vecinit1.i.i3 = shufflevector <2 x i64> %vecinit.i.i2, <2 x i64> undef, <2 x i32> zeroinitializer
577 %2 = tail call { <2 x i1>, <2 x i1> } @llvm.x86.avx512.vp2intersect.q.128(<2 x i64> %vecinit1.i.i, <2 x i64> %vecinit1.i.i3)
578 %3 = extractvalue { <2 x i1>, <2 x i1> } %2, 0
579 %4 = shufflevector <2 x i1> %3, <2 x i1> zeroinitializer, <8 x i32>
580 %5 = bitcast <8 x i1> %4 to i8
581 store i8 %5, i8* %m0, align 1
582 %6 = extractvalue { <2 x i1>, <2 x i1> } %2, 1
583 %7 = shufflevector <2 x i1> %6, <2 x i1> zeroinitializer, <8 x i32>
584 %8 = bitcast <8 x i1> %7 to i8
585 store i8 %8, i8* %m1, align 1
586 ret void
587 }
588
589 declare { <8 x i1>, <8 x i1> } @llvm.x86.avx512.vp2intersect.d.256(<8 x i32>, <8 x i32>)
590 declare { <4 x i1>, <4 x i1> } @llvm.x86.avx512.vp2intersect.q.256(<4 x i64>, <4 x i64>)
591 declare { <4 x i1>, <4 x i1> } @llvm.x86.avx512.vp2intersect.d.128(<4 x i32>, <4 x i32>)
592 declare { <2 x i1>, <2 x i1> } @llvm.x86.avx512.vp2intersect.q.128(<2 x i64>, <2 x i64>)
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vp2intersect --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vp2intersect --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
3
4 define void @test_mm512_2intersect_epi32(<8 x i64> %a, <8 x i64> %b, i16* nocapture %m0, i16* nocapture %m1) {
5 ; X86-LABEL: test_mm512_2intersect_epi32:
6 ; X86: # %bb.0: # %entry
7 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
8 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
9 ; X86-NEXT: vp2intersectd %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0x7f,0x48,0x68,0xc1]
10 ; X86-NEXT: kmovw %k0, (%ecx) # encoding: [0xc5,0xf8,0x91,0x01]
11 ; X86-NEXT: kmovw %k1, (%eax) # encoding: [0xc5,0xf8,0x91,0x08]
12 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
13 ; X86-NEXT: retl # encoding: [0xc3]
14 ;
15 ; X64-LABEL: test_mm512_2intersect_epi32:
16 ; X64: # %bb.0: # %entry
17 ; X64-NEXT: vp2intersectd %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0x7f,0x48,0x68,0xc1]
18 ; X64-NEXT: kmovw %k0, (%rdi) # encoding: [0xc5,0xf8,0x91,0x07]
19 ; X64-NEXT: kmovw %k1, (%rsi) # encoding: [0xc5,0xf8,0x91,0x0e]
20 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
21 ; X64-NEXT: retq # encoding: [0xc3]
22 entry:
23 %0 = bitcast <8 x i64> %a to <16 x i32>
24 %1 = bitcast <8 x i64> %b to <16 x i32>
25 %2 = tail call { <16 x i1>, <16 x i1> } @llvm.x86.avx512.vp2intersect.d.512(<16 x i32> %0, <16 x i32> %1)
26 %3 = extractvalue { <16 x i1>, <16 x i1> } %2, 0
27 %4 = bitcast i16* %m0 to <16 x i1>*
28 store <16 x i1> %3, <16 x i1>* %4, align 16
29 %5 = extractvalue { <16 x i1>, <16 x i1> } %2, 1
30 %6 = bitcast i16* %m1 to <16 x i1>*
31 store <16 x i1> %5, <16 x i1>* %6, align 16
32 ret void
33 }
34
35 define void @test_mm512_2intersect_epi64(<8 x i64> %a, <8 x i64> %b, i8* nocapture %m0, i8* nocapture %m1) {
36 ; X86-LABEL: test_mm512_2intersect_epi64:
37 ; X86: # %bb.0: # %entry
38 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
39 ; X86-NEXT: vp2intersectq %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0xff,0x48,0x68,0xc1]
40 ; X86-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
41 ; X86-NEXT: kmovw %k0, %edx # encoding: [0xc5,0xf8,0x93,0xd0]
42 ; X86-NEXT: movb %dl, (%eax) # encoding: [0x88,0x10]
43 ; X86-NEXT: movl 8(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
44 ; X86-NEXT: movb %cl, (%eax) # encoding: [0x88,0x08]
45 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
46 ; X86-NEXT: retl # encoding: [0xc3]
47 ;
48 ; X64-LABEL: test_mm512_2intersect_epi64:
49 ; X64: # %bb.0: # %entry
50 ; X64-NEXT: vp2intersectq %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0xff,0x48,0x68,0xc1]
51 ; X64-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
52 ; X64-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
53 ; X64-NEXT: movb %cl, (%rdi) # encoding: [0x88,0x0f]
54 ; X64-NEXT: movb %al, (%rsi) # encoding: [0x88,0x06]
55 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
56 ; X64-NEXT: retq # encoding: [0xc3]
57 entry:
58 %0 = tail call { <8 x i1>, <8 x i1> } @llvm.x86.avx512.vp2intersect.q.512(<8 x i64> %a, <8 x i64> %b)
59 %1 = extractvalue { <8 x i1>, <8 x i1> } %0, 0
60 %2 = bitcast i8* %m0 to <8 x i1>*
61 store <8 x i1> %1, <8 x i1>* %2, align 8
62 %3 = extractvalue { <8 x i1>, <8 x i1> } %0, 1
63 %4 = bitcast i8* %m1 to <8 x i1>*
64 store <8 x i1> %3, <8 x i1>* %4, align 8
65 ret void
66 }
67
68 define void @test_mm512_2intersect_epi32_p(<8 x i64>* nocapture readonly %a, <8 x i64>* nocapture readonly %b, i16* nocapture %m0, i16* nocapture %m1) {
69 ; X86-LABEL: test_mm512_2intersect_epi32_p:
70 ; X86: # %bb.0: # %entry
71 ; X86-NEXT: pushl %esi # encoding: [0x56]
72 ; X86-NEXT: .cfi_def_cfa_offset 8
73 ; X86-NEXT: .cfi_offset %esi, -8
74 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x14]
75 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x10]
76 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c]
77 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x08]
78 ; X86-NEXT: vmovaps (%esi), %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0x06]
79 ; X86-NEXT: vp2intersectd (%edx), %zmm0, %k0 # encoding: [0x62,0xf2,0x7f,0x48,0x68,0x02]
80 ; X86-NEXT: kmovw %k0, (%ecx) # encoding: [0xc5,0xf8,0x91,0x01]
81 ; X86-NEXT: kmovw %k1, (%eax) # encoding: [0xc5,0xf8,0x91,0x08]
82 ; X86-NEXT: popl %esi # encoding: [0x5e]
83 ; X86-NEXT: .cfi_def_cfa_offset 4
84 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
85 ; X86-NEXT: retl # encoding: [0xc3]
86 ;
87 ; X64-LABEL: test_mm512_2intersect_epi32_p:
88 ; X64: # %bb.0: # %entry
89 ; X64-NEXT: vmovaps (%rdi), %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0x07]
90 ; X64-NEXT: vp2intersectd (%rsi), %zmm0, %k0 # encoding: [0x62,0xf2,0x7f,0x48,0x68,0x06]
91 ; X64-NEXT: kmovw %k0, (%rdx) # encoding: [0xc5,0xf8,0x91,0x02]
92 ; X64-NEXT: kmovw %k1, (%rcx) # encoding: [0xc5,0xf8,0x91,0x09]
93 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
94 ; X64-NEXT: retq # encoding: [0xc3]
95 entry:
96 %0 = bitcast <8 x i64>* %a to <16 x i32>*
97 %1 = load <16 x i32>, <16 x i32>* %0, align 64
98 %2 = bitcast <8 x i64>* %b to <16 x i32>*
99 %3 = load <16 x i32>, <16 x i32>* %2, align 64
100 %4 = tail call { <16 x i1>, <16 x i1> } @llvm.x86.avx512.vp2intersect.d.512(<16 x i32> %1, <16 x i32> %3)
101 %5 = extractvalue { <16 x i1>, <16 x i1> } %4, 0
102 %6 = bitcast i16* %m0 to <16 x i1>*
103 store <16 x i1> %5, <16 x i1>* %6, align 16
104 %7 = extractvalue { <16 x i1>, <16 x i1> } %4, 1
105 %8 = bitcast i16* %m1 to <16 x i1>*
106 store <16 x i1> %7, <16 x i1>* %8, align 16
107 ret void
108 }
109
110 define void @test_mm512_2intersect_epi64_p(<8 x i64>* nocapture readonly %a, <8 x i64>* nocapture readonly %b, i8* nocapture %m0, i8* nocapture %m1) {
111 ; X86-LABEL: test_mm512_2intersect_epi64_p:
112 ; X86: # %bb.0: # %entry
113 ; X86-NEXT: movl 12(%esp), %eax # encoding: [0x8b,0x44,0x24,0x0c]
114 ; X86-NEXT: movl 8(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x08]
115 ; X86-NEXT: movl 4(%esp), %edx # encoding: [0x8b,0x54,0x24,0x04]
116 ; X86-NEXT: vmovaps (%edx), %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0x02]
117 ; X86-NEXT: vp2intersectq (%ecx), %zmm0, %k0 # encoding: [0x62,0xf2,0xff,0x48,0x68,0x01]
118 ; X86-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
119 ; X86-NEXT: kmovw %k0, %edx # encoding: [0xc5,0xf8,0x93,0xd0]
120 ; X86-NEXT: movb %dl, (%eax) # encoding: [0x88,0x10]
121 ; X86-NEXT: movl 16(%esp), %eax # encoding: [0x8b,0x44,0x24,0x10]
122 ; X86-NEXT: movb %cl, (%eax) # encoding: [0x88,0x08]
123 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
124 ; X86-NEXT: retl
125
126 ;
127 ; X64-LABEL: test_mm512_2intersect_epi64_p:
128 ; X64: # %bb.0: # %entry
129 ; X64-NEXT: vmovaps (%rdi), %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0x07]
130 ; X64-NEXT: vp2intersectq (%rsi), %zmm0, %k0 # encoding: [0x62,0xf2,0xff,0x48,0x68,0x06]
131 ; X64-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
132 ; X64-NEXT: kmovw %k0, %esi # encoding: [0xc5,0xf8,0x93,0xf0]
133 ; X64-NEXT: movb %sil, (%rdx) # encoding: [0x40,0x88,0x32]
134 ; X64-NEXT: movb %al, (%rcx) # encoding: [0x88,0x01]
135 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
136 ; X64-NEXT: retq # encoding: [0xc3]
137 entry:
138 %0 = load <8 x i64>, <8 x i64>* %a, align 64
139 %1 = load <8 x i64>, <8 x i64>* %b, align 64
140 %2 = tail call { <8 x i1>, <8 x i1> } @llvm.x86.avx512.vp2intersect.q.512(<8 x i64> %0, <8 x i64> %1)
141 %3 = extractvalue { <8 x i1>, <8 x i1> } %2, 0
142 %4 = bitcast i8* %m0 to <8 x i1>*
143 store <8 x i1> %3, <8 x i1>* %4, align 8
144 %5 = extractvalue { <8 x i1>, <8 x i1> } %2, 1
145 %6 = bitcast i8* %m1 to <8 x i1>*
146 store <8 x i1> %5, <8 x i1>* %6, align 8
147 ret void
148 }
149
150 define void @test_mm512_2intersect_epi32_b(i32* nocapture readonly %a, i32* nocapture readonly %b, i16* nocapture %m0, i16* nocapture %m1) {
151 ; X86-LABEL: test_mm512_2intersect_epi32_b:
152 ; X86: # %bb.0: # %entry
153 ; X86-NEXT: pushl %esi # encoding: [0x56]
154 ; X86-NEXT: .cfi_def_cfa_offset 8
155 ; X86-NEXT: .cfi_offset %esi, -8
156 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x14]
157 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x10]
158 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c]
159 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x08]
160 ; X86-NEXT: vbroadcastss (%esi), %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x18,0x06]
161 ; X86-NEXT: vp2intersectd (%edx){1to16}, %zmm0, %k0 # encoding: [0x62,0xf2,0x7f,0x58,0x68,0x02]
162 ; X86-NEXT: kmovw %k0, (%ecx) # encoding: [0xc5,0xf8,0x91,0x01]
163 ; X86-NEXT: kmovw %k1, (%eax) # encoding: [0xc5,0xf8,0x91,0x08]
164 ; X86-NEXT: popl %esi # encoding: [0x5e]
165 ; X86-NEXT: .cfi_def_cfa_offset 4
166 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
167 ; X86-NEXT: retl # encoding: [0xc3]
168 ;
169 ; X64-LABEL: test_mm512_2intersect_epi32_b:
170 ; X64: # %bb.0: # %entry
171 ; X64-NEXT: vbroadcastss (%rdi), %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x18,0x07]
172 ; X64-NEXT: vp2intersectd (%rsi){1to16}, %zmm0, %k0 # encoding: [0x62,0xf2,0x7f,0x58,0x68,0x06]
173 ; X64-NEXT: kmovw %k0, (%rdx) # encoding: [0xc5,0xf8,0x91,0x02]
174 ; X64-NEXT: kmovw %k1, (%rcx) # encoding: [0xc5,0xf8,0x91,0x09]
175 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
176 ; X64-NEXT: retq # encoding: [0xc3]
177 entry:
178 %0 = load i32, i32* %a, align 4
179 %vecinit.i = insertelement <16 x i32> undef, i32 %0, i32 0
180 %vecinit15.i = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
181 %1 = load i32, i32* %b, align 4
182 %vecinit.i2 = insertelement <16 x i32> undef, i32 %1, i32 0
183 %vecinit15.i3 = shufflevector <16 x i32> %vecinit.i2, <16 x i32> undef, <16 x i32> zeroinitializer
184 %2 = tail call { <16 x i1>, <16 x i1> } @llvm.x86.avx512.vp2intersect.d.512(<16 x i32> %vecinit15.i, <16 x i32> %vecinit15.i3)
185 %3 = extractvalue { <16 x i1>, <16 x i1> } %2, 0
186 %4 = bitcast i16* %m0 to <16 x i1>*
187 store <16 x i1> %3, <16 x i1>* %4, align 16
188 %5 = extractvalue { <16 x i1>, <16 x i1> } %2, 1
189 %6 = bitcast i16* %m1 to <16 x i1>*
190 store <16 x i1> %5, <16 x i1>* %6, align 16
191 ret void
192 }
193
194 define void @test_mm512_2intersect_epi64_b(i64* nocapture readonly %a, i64* nocapture readonly %b, i8* nocapture %m0, i8* nocapture %m1) {
195 ; X86-LABEL: test_mm512_2intersect_epi64_b:
196 ; X86: # %bb.0: # %entry
197 ; X86-NEXT: movl 12(%esp), %eax # encoding: [0x8b,0x44,0x24,0x0c]
198 ; X86-NEXT: movl 8(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x08]
199 ; X86-NEXT: movl 4(%esp), %edx # encoding: [0x8b,0x54,0x24,0x04]
200 ; X86-NEXT: vbroadcastsd (%edx), %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x19,0x02]
201 ; X86-NEXT: vbroadcastsd (%ecx), %zmm1 # encoding: [0x62,0xf2,0xfd,0x48,0x19,0x09]
202 ; X86-NEXT: vp2intersectq %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0xff,0x48,0x68,0xc1]
203 ; X86-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
204 ; X86-NEXT: kmovw %k0, %edx # encoding: [0xc5,0xf8,0x93,0xd0]
205 ; X86-NEXT: movb %dl, (%eax) # encoding: [0x88,0x10]
206 ; X86-NEXT: movl 16(%esp), %eax # encoding: [0x8b,0x44,0x24,0x10]
207 ; X86-NEXT: movb %cl, (%eax) # encoding: [0x88,0x08]
208 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
209 ; X86-NEXT: retl # encoding: [0xc3]
210 ;
211 ; X64-LABEL: test_mm512_2intersect_epi64_b:
212 ; X64: # %bb.0: # %entry
213 ; X64-NEXT: vbroadcastsd (%rdi), %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x19,0x07]
214 ; X64-NEXT: vp2intersectq (%rsi){1to8}, %zmm0, %k0 # encoding: [0x62,0xf2,0xff,0x58,0x68,0x06]
215 ; X64-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
216 ; X64-NEXT: kmovw %k0, %esi # encoding: [0xc5,0xf8,0x93,0xf0]
217 ; X64-NEXT: movb %sil, (%rdx) # encoding: [0x40,0x88,0x32]
218 ; X64-NEXT: movb %al, (%rcx) # encoding: [0x88,0x01]
219 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
220 ; X64-NEXT: retq # encoding: [0xc3]
221 entry:
222 %0 = load i64, i64* %a, align 8
223 %vecinit.i = insertelement <8 x i64> undef, i64 %0, i32 0
224 %vecinit7.i = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
225 %1 = load i64, i64* %b, align 8
226 %vecinit.i2 = insertelement <8 x i64> undef, i64 %1, i32 0
227 %vecinit7.i3 = shufflevector <8 x i64> %vecinit.i2, <8 x i64> undef, <8 x i32> zeroinitializer
228 %2 = tail call { <8 x i1>, <8 x i1> } @llvm.x86.avx512.vp2intersect.q.512(<8 x i64> %vecinit7.i, <8 x i64> %vecinit7.i3)
229 %3 = extractvalue { <8 x i1>, <8 x i1> } %2, 0
230 %4 = bitcast i8* %m0 to <8 x i1>*
231 store <8 x i1> %3, <8 x i1>* %4, align 8
232 %5 = extractvalue { <8 x i1>, <8 x i1> } %2, 1
233 %6 = bitcast i8* %m1 to <8 x i1>*
234 store <8 x i1> %5, <8 x i1>* %6, align 8
235 ret void
236 }
237
238 declare { <16 x i1>, <16 x i1> } @llvm.x86.avx512.vp2intersect.d.512(<16 x i32>, <16 x i32>)
239 declare { <8 x i1>, <8 x i1> } @llvm.x86.avx512.vp2intersect.q.512(<8 x i64>, <8 x i64>)
11
22 ; CHECK: %[[REG1:.*]]:vr512_0_15 = COPY %1
33 ; CHECK: %[[REG2:.*]]:vr512_0_15 = COPY %2
4 ; CHECK: INLINEASM &"vpaddq\09$3, $2, $0 {$1}", 0, 7340042, def %{{.*}}, 1179657, %{{.*}}, 7340041, %[[REG1]], 7340041, %[[REG2]], 12, implicit-def early-clobber $df, 12, implicit-def early-clobber $fpsw, 12, implicit-def early-clobber $eflags
4 ; CHECK: INLINEASM &"vpaddq\09$3, $2, $0 {$1}", 0, {{.*}}, def %{{.*}}, {{.*}}, %{{.*}}, {{.*}}, %[[REG1]], {{.*}}, %[[REG2]], 12, implicit-def early-clobber $df, 12, implicit-def early-clobber $fpsw, 12, implicit-def early-clobber $eflags
5
56 define <8 x i64> @mask_Yk_i8(i8 signext %msk, <8 x i64> %x, <8 x i64> %y) {
67 entry:
78 %0 = tail call <8 x i64> asm "vpaddq\09$3, $2, $0 {$1}", "=x,^Yk,x,x,~{dirflag},~{fpsr},~{flags}"(i8 %msk, <8 x i64> %x, <8 x i64> %y)
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vp2intersect | FileCheck %s --check-prefixes=CHECK,X86
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vp2intersect | FileCheck %s --check-prefixes=CHECK,X64
3
4 ; Test with more than four live mask pairs
5
6 define void @test(<16 x i32> %a0, <16 x i32> %b0,
7 <16 x i32> %a1, <16 x i32> %b1,
8 <16 x i32> %a2, <16 x i32> %b2,
9 <16 x i32> %a3, <16 x i32> %b3,
10 <16 x i32> %a4, <16 x i32> %b4,
11 i16* nocapture %m0, i16* nocapture %m1) {
12 ; X86-LABEL: test:
13 ; X86: # %bb.0: # %entry
14 ; X86-NEXT: pushl %ebp
15 ; X86-NEXT: .cfi_def_cfa_offset 8
16 ; X86-NEXT: .cfi_offset %ebp, -8
17 ; X86-NEXT: movl %esp, %ebp
18 ; X86-NEXT: .cfi_def_cfa_register %ebp
19 ; X86-NEXT: pushl %edi
20 ; X86-NEXT: pushl %esi
21 ; X86-NEXT: andl $-64, %esp
22 ; X86-NEXT: subl $64, %esp
23 ; X86-NEXT: .cfi_offset %esi, -16
24 ; X86-NEXT: .cfi_offset %edi, -12
25 ; X86-NEXT: movl 456(%ebp), %esi
26 ; X86-NEXT: vmovaps 328(%ebp), %zmm3
27 ; X86-NEXT: vmovaps 200(%ebp), %zmm4
28 ; X86-NEXT: vmovaps 72(%ebp), %zmm5
29 ; X86-NEXT: vp2intersectd %zmm1, %zmm0, %k0
30 ; X86-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
31 ; X86-NEXT: kmovw %k1, {{[0-9]+}}(%esp)
32 ; X86-NEXT: vp2intersectd 8(%ebp), %zmm2, %k0
33 ; X86-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
34 ; X86-NEXT: kmovw %k1, {{[0-9]+}}(%esp)
35 ; X86-NEXT: vp2intersectd 136(%ebp), %zmm5, %k0
36 ; X86-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
37 ; X86-NEXT: kmovw %k1, {{[0-9]+}}(%esp)
38 ; X86-NEXT: vp2intersectd 264(%ebp), %zmm4, %k0
39 ; X86-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
40 ; X86-NEXT: kmovw %k1, {{[0-9]+}}(%esp)
41 ; X86-NEXT: vp2intersectd 392(%ebp), %zmm3, %k0
42 ; X86-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
43 ; X86-NEXT: kmovw %k1, {{[0-9]+}}(%esp)
44 ; X86-NEXT: vzeroupper
45 ; X86-NEXT: calll dummy
46 ; X86-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k0 # 4-byte Folded Reload
47 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
48 ; X86-NEXT: kmovw %k0, %eax
49 ; X86-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k0 # 4-byte Folded Reload
50 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
51 ; X86-NEXT: kmovw %k0, %ecx
52 ; X86-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k0 # 4-byte Folded Reload
53 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
54 ; X86-NEXT: kmovw %k0, %edx
55 ; X86-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k0 # 4-byte Folded Reload
56 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
57 ; X86-NEXT: kmovw %k0, %edi
58 ; X86-NEXT: addl %edi, %eax
59 ; X86-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k2 # 4-byte Folded Reload
60 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k3
61 ; X86-NEXT: kmovw %k2, %edi
62 ; X86-NEXT: addl %ecx, %edx
63 ; X86-NEXT: kmovw %k1, %ecx
64 ;
65 ; X64-LABEL: test:
66 ; X64: # %bb.0: # %entry
67 ; X64-NEXT: pushq %rbp
68 ; X64-NEXT: .cfi_def_cfa_offset 16
69 ; X64-NEXT: .cfi_offset %rbp, -16
70 ; X64-NEXT: movq %rsp, %rbp
71 ; X64-NEXT: .cfi_def_cfa_register %rbp
72 ; X64-NEXT: pushq %r14
73 ; X64-NEXT: pushq %rbx
74 ; X64-NEXT: andq $-64, %rsp
75 ; X64-NEXT: subq $64, %rsp
76 ; X64-NEXT: .cfi_offset %rbx, -32
77 ; X64-NEXT: .cfi_offset %r14, -24
78 ; X64-NEXT: movq %rdi, %r14
79 ; X64-NEXT: vmovaps 16(%rbp), %zmm8
80 ; X64-NEXT: vp2intersectd %zmm1, %zmm0, %k0
81 ; X64-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
82 ; X64-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
83 ; X64-NEXT: vp2intersectd %zmm3, %zmm2, %k0
84 ; X64-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
85 ; X64-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
86 ; X64-NEXT: vp2intersectd %zmm5, %zmm4, %k0
87 ; X64-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
88 ; X64-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
89 ; X64-NEXT: vp2intersectd %zmm7, %zmm6, %k0
90 ; X64-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
91 ; X64-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
92 ; X64-NEXT: vp2intersectd 80(%rbp), %zmm8, %k0
93 ; X64-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
94 ; X64-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
95 ; X64-NEXT: vzeroupper
96 ; X64-NEXT: callq dummy
97 ; X64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 4-byte Folded Reload
98 ; X64-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
99 ; X64-NEXT: kmovw %k0, %eax
100 ; X64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 4-byte Folded Reload
101 ; X64-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
102 ; X64-NEXT: kmovw %k0, %ecx
103 ; X64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 4-byte Folded Reload
104 ; X64-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
105 ; X64-NEXT: kmovw %k0, %edx
106 ; X64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 4-byte Folded Reload
107 ; X64-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
108 ; X64-NEXT: kmovw %k0, %esi
109 ; X64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 4-byte Folded Reload
110 ; X64-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
111 ; X64-NEXT: kmovw %k0, %edi
112 ; X64-NEXT: kmovw %k1, %ebx
113 entry:
114 %0 = call { <16 x i1>, <16 x i1> } @llvm.x86.avx512.vp2intersect.d.512(<16 x i32> %a0, <16 x i32> %b0)
115 %1 = call { <16 x i1>, <16 x i1> } @llvm.x86.avx512.vp2intersect.d.512(<16 x i32> %a1, <16 x i32> %b1)
116 %2 = call { <16 x i1>, <16 x i1> } @llvm.x86.avx512.vp2intersect.d.512(<16 x i32> %a2, <16 x i32> %b2)
117 %3 = call { <16 x i1>, <16 x i1> } @llvm.x86.avx512.vp2intersect.d.512(<16 x i32> %a3, <16 x i32> %b3)
118 %4 = call { <16 x i1>, <16 x i1> } @llvm.x86.avx512.vp2intersect.d.512(<16 x i32> %a4, <16 x i32> %b4)
119
120 %5 = extractvalue { <16 x i1>, <16 x i1> } %0, 0
121 %6 = extractvalue { <16 x i1>, <16 x i1> } %1, 0
122 %7 = extractvalue { <16 x i1>, <16 x i1> } %2, 0
123 %8 = extractvalue { <16 x i1>, <16 x i1> } %3, 0
124 %9 = extractvalue { <16 x i1>, <16 x i1> } %4, 0
125 %10 = extractvalue { <16 x i1>, <16 x i1> } %0, 1
126 %11 = extractvalue { <16 x i1>, <16 x i1> } %1, 1
127
128 call void @dummy()
129
130 %12 = bitcast <16 x i1> %5 to i16
131 %13 = bitcast <16 x i1> %6 to i16
132 %14 = bitcast <16 x i1> %7 to i16
133 %15 = bitcast <16 x i1> %8 to i16
134 %16 = bitcast <16 x i1> %9 to i16
135 %17 = bitcast <16 x i1> %10 to i16
136 %18 = bitcast <16 x i1> %11 to i16
137
138 %19 = add i16 %12, %13
139 %20 = add i16 %14, %15
140 %21 = add i16 %16, %17
141 %22 = add i16 %19, %21
142 %23 = add i16 %22, %20
143
144 store i16 %23, i16* %m0, align 16
145 ret void
146 }
147
148 declare { <16 x i1>, <16 x i1> } @llvm.x86.avx512.vp2intersect.d.512(<16 x i32>, <16 x i32>)
149 declare void @dummy()
0 # RUN: llvm-mc --disassemble %s -triple=i686-apple-darwin9 | FileCheck %s
1
2 # Coverage
3 #CHECK: vp2intersectq %zmm2, %zmm1, %k0
4 0x62 0xf2 0xf7 0x48 0x68 0xc2
5
6 # Instruction encodes k1, but we print k0 anyways.
7 # Not sure if GNU binutils does the same.
8 #CHECK: vp2intersectq %zmm2, %zmm1, %k0
9 0x62 0xf2 0xf7 0x48 0x68 0xca
10
11 #CHECK: vp2intersectq (%esi){1to8}, %zmm4, %k6
12 0x62 0xf2 0xdf 0x58 0x68 0x36
13
14 #CHECK: vp2intersectd %xmm7, %xmm4, %k6
15 0x62 0xf2 0x5f 0x08 0x68 0xff
0 # RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s
1
2 # Coverage
3 #CHECK: vp2intersectq %zmm2, %zmm1, %k0
4 0x62 0xf2 0xf7 0x48 0x68 0xc2
5
6 # Instruction encodes k1, but we print k0 anyways.
7 # Not sure if GNU binutils does the same.
8 #CHECK: vp2intersectq %zmm2, %zmm1, %k0
9 0x62 0xf2 0xf7 0x48 0x68 0xca
10
11 #CHECK: vp2intersectq (%rsi){1to8}, %zmm9, %k6
12 0x62 0xf2 0xb7 0x58 0x68 0x36
13
14 #CHECK: vp2intersectd %xmm7, %xmm9, %k6
15 0x62 0xf2 0x37 0x08 0x68 0xff
0 # RUN: llvm-mc --disassemble %s -triple=i386 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s
1 # CHECK: vp2intersectd k4, zmm3, zmm4
2 0x62,0xf2,0x67,0x48,0x68,0xe4
3
4 # CHECK: vp2intersectd k4, zmm3, zmmword ptr [esp + 8*esi + 268435456]
5 0x62,0xf2,0x67,0x48,0x68,0xa4,0xf4,0x00,0x00,0x00,0x10
6
7 # CHECK: vp2intersectd k4, zmm3, zmmword ptr [edi + 4*eax + 291]
8 0x62,0xf2,0x67,0x48,0x68,0xa4,0x87,0x23,0x01,0x00,0x00
9
10 # CHECK: vp2intersectd k4, zmm3, dword ptr [eax]{1to16}
11 0x62,0xf2,0x67,0x58,0x68,0x20
12
13 # CHECK: vp2intersectd k4, zmm3, zmmword ptr [2*ebp - 2048]
14 0x62,0xf2,0x67,0x48,0x68,0x24,0x6d,0x00,0xf8,0xff,0xff
15
16 # CHECK: vp2intersectd k4, zmm3, zmmword ptr [ecx + 8128]
17 0x62,0xf2,0x67,0x48,0x68,0x61,0x7f
18
19 # CHECK: vp2intersectd k4, zmm3, dword ptr [edx - 512]{1to16}
20 0x62,0xf2,0x67,0x58,0x68,0x62,0x80
21
22 # CHECK: vp2intersectq k4, zmm3, zmm4
23 0x62,0xf2,0xe7,0x48,0x68,0xe4
24
25 # CHECK: vp2intersectq k4, zmm3, zmmword ptr [esp + 8*esi + 268435456]
26 0x62,0xf2,0xe7,0x48,0x68,0xa4,0xf4,0x00,0x00,0x00,0x10
27
28 # CHECK: vp2intersectq k4, zmm3, zmmword ptr [edi + 4*eax + 291]
29 0x62,0xf2,0xe7,0x48,0x68,0xa4,0x87,0x23,0x01,0x00,0x00
30
31 # CHECK: vp2intersectq k4, zmm3, qword ptr [eax]{1to8}
32 0x62,0xf2,0xe7,0x58,0x68,0x20
33
34 # CHECK: vp2intersectq k4, zmm3, zmmword ptr [2*ebp - 2048]
35 0x62,0xf2,0xe7,0x48,0x68,0x24,0x6d,0x00,0xf8,0xff,0xff
36
37 # CHECK: vp2intersectq k4, zmm3, zmmword ptr [ecx + 8128]
38 0x62,0xf2,0xe7,0x48,0x68,0x61,0x7f
39
40 # CHECK: vp2intersectq k4, zmm3, qword ptr [edx - 1024]{1to8}
41 0x62,0xf2,0xe7,0x58,0x68,0x62,0x80
42
0 # RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s
1 # CHECK: vp2intersectd k4, zmm23, zmm24
2 0x62,0x92,0x47,0x40,0x68,0xe0
3
4 # CHECK: vp2intersectd k4, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
5 0x62,0xb2,0x47,0x40,0x68,0xa4,0xf5,0x00,0x00,0x00,0x10
6
7 # CHECK: vp2intersectd k4, zmm23, zmmword ptr [r8 + 4*rax + 291]
8 0x62,0xd2,0x47,0x40,0x68,0xa4,0x80,0x23,0x01,0x00,0x00
9
10 # CHECK: vp2intersectd k4, zmm23, dword ptr [rip]{1to16}
11 0x62,0xf2,0x47,0x50,0x68,0x25,0x00,0x00,0x00,0x00
12
13 # CHECK: vp2intersectd k4, zmm23, zmmword ptr [2*rbp - 2048]
14 0x62,0xf2,0x47,0x40,0x68,0x24,0x6d,0x00,0xf8,0xff,0xff
15
16 # CHECK: vp2intersectd k4, zmm23, zmmword ptr [rcx + 8128]
17 0x62,0xf2,0x47,0x40,0x68,0x61,0x7f
18
19 # CHECK: vp2intersectd k4, zmm23, dword ptr [rdx - 512]{1to16}
20 0x62,0xf2,0x47,0x50,0x68,0x62,0x80
21
22 # CHECK: vp2intersectq k4, zmm23, zmm24
23 0x62,0x92,0xc7,0x40,0x68,0xe0
24
25 # CHECK: vp2intersectq k4, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
26 0x62,0xb2,0xc7,0x40,0x68,0xa4,0xf5,0x00,0x00,0x00,0x10
27
28 # CHECK: vp2intersectq k4, zmm23, zmmword ptr [r8 + 4*rax + 291]
29 0x62,0xd2,0xc7,0x40,0x68,0xa4,0x80,0x23,0x01,0x00,0x00
30
31 # CHECK: vp2intersectq k4, zmm23, qword ptr [rip]{1to8}
32 0x62,0xf2,0xc7,0x50,0x68,0x25,0x00,0x00,0x00,0x00
33
34 # CHECK: vp2intersectq k4, zmm23, zmmword ptr [2*rbp - 2048]
35 0x62,0xf2,0xc7,0x40,0x68,0x24,0x6d,0x00,0xf8,0xff,0xff
36
37 # CHECK: vp2intersectq k4, zmm23, zmmword ptr [rcx + 8128]
38 0x62,0xf2,0xc7,0x40,0x68,0x61,0x7f
39
40 # CHECK: vp2intersectq k4, zmm23, qword ptr [rdx - 1024]{1to8}
41 0x62,0xf2,0xc7,0x50,0x68,0x62,0x80
42
0 # RUN: llvm-mc --disassemble %s -triple=i686-apple-darwin9 | FileCheck %s
1
2 # CHECK: vp2intersectd %ymm4, %ymm3, %k6
3 0x62,0xf2,0x67,0x28,0x68,0xf4
4
5 # CHECK: vp2intersectd %xmm4, %xmm3, %k6
6 0x62,0xf2,0x67,0x08,0x68,0xf4
7
8 # CHECK: vp2intersectd 268435456(%esp,%esi,8), %ymm3, %k6
9 0x62,0xf2,0x67,0x28,0x68,0xb4,0xf4,0x00,0x00,0x00,0x10
10
11 # CHECK: vp2intersectd 291(%edi,%eax,4), %ymm3, %k6
12 0x62,0xf2,0x67,0x28,0x68,0xb4,0x87,0x23,0x01,0x00,0x00
13
14 # CHECK: vp2intersectd (%eax){1to8}, %ymm3, %k6
15 0x62,0xf2,0x67,0x38,0x68,0x30
16
17 # CHECK: vp2intersectd -1024(,%ebp,2), %ymm3, %k6
18 0x62,0xf2,0x67,0x28,0x68,0x34,0x6d,0x00,0xfc,0xff,0xff
19
20 # CHECK: vp2intersectd 4064(%ecx), %ymm3, %k6
21 0x62,0xf2,0x67,0x28,0x68,0x71,0x7f
22
23 # CHECK: vp2intersectd -512(%edx){1to8}, %ymm3, %k6
24 0x62,0xf2,0x67,0x38,0x68,0x72,0x80
25
26 # CHECK: vp2intersectd 268435456(%esp,%esi,8), %xmm3, %k6
27 0x62,0xf2,0x67,0x08,0x68,0xb4,0xf4,0x00,0x00,0x00,0x10
28
29 # CHECK: vp2intersectd 291(%edi,%eax,4), %xmm3, %k6
30 0x62,0xf2,0x67,0x08,0x68,0xb4,0x87,0x23,0x01,0x00,0x00
31
32 # CHECK: vp2intersectd (%eax){1to4}, %xmm3, %k6
33 0x62,0xf2,0x67,0x18,0x68,0x30
34
35 # CHECK: vp2intersectd -512(,%ebp,2), %xmm3, %k6
36 0x62,0xf2,0x67,0x08,0x68,0x34,0x6d,0x00,0xfe,0xff,0xff
37
38 # CHECK: vp2intersectd 2032(%ecx), %xmm3, %k6
39 0x62,0xf2,0x67,0x08,0x68,0x71,0x7f
40
41 # CHECK: vp2intersectd -512(%edx){1to4}, %xmm3, %k6
42 0x62,0xf2,0x67,0x18,0x68,0x72,0x80
43
44 # CHECK: vp2intersectq %ymm4, %ymm3, %k6
45 0x62,0xf2,0xe7,0x28,0x68,0xf4
46
47 # CHECK: vp2intersectq %xmm4, %xmm3, %k6
48 0x62,0xf2,0xe7,0x08,0x68,0xf4
49
50 # CHECK: vp2intersectq 268435456(%esp,%esi,8), %ymm3, %k6
51 0x62,0xf2,0xe7,0x28,0x68,0xb4,0xf4,0x00,0x00,0x00,0x10
52
53 # CHECK: vp2intersectq 291(%edi,%eax,4), %ymm3, %k6
54 0x62,0xf2,0xe7,0x28,0x68,0xb4,0x87,0x23,0x01,0x00,0x00
55
56 # CHECK: vp2intersectq (%eax){1to4}, %ymm3, %k6
57 0x62,0xf2,0xe7,0x38,0x68,0x30
58
59 # CHECK: vp2intersectq -1024(,%ebp,2), %ymm3, %k6
60 0x62,0xf2,0xe7,0x28,0x68,0x34,0x6d,0x00,0xfc,0xff,0xff
61
62 # CHECK: vp2intersectq 4064(%ecx), %ymm3, %k6
63 0x62,0xf2,0xe7,0x28,0x68,0x71,0x7f
64
65 # CHECK: vp2intersectq -1024(%edx){1to4}, %ymm3, %k6
66 0x62,0xf2,0xe7,0x38,0x68,0x72,0x80
67
68 # CHECK: vp2intersectq 268435456(%esp,%esi,8), %xmm3, %k6
69 0x62,0xf2,0xe7,0x08,0x68,0xb4,0xf4,0x00,0x00,0x00,0x10
70
71 # CHECK: vp2intersectq 291(%edi,%eax,4), %xmm3, %k6
72 0x62,0xf2,0xe7,0x08,0x68,0xb4,0x87,0x23,0x01,0x00,0x00
73
74 # CHECK: vp2intersectq (%eax){1to2}, %xmm3, %k6
75 0x62,0xf2,0xe7,0x18,0x68,0x30
76
77 # CHECK: vp2intersectq -512(,%ebp,2), %xmm3, %k6
78 0x62,0xf2,0xe7,0x08,0x68,0x34,0x6d,0x00,0xfe,0xff,0xff
79
80 # CHECK: vp2intersectq 2032(%ecx), %xmm3, %k6
81 0x62,0xf2,0xe7,0x08,0x68,0x71,0x7f
82
83 # CHECK: vp2intersectq -1024(%edx){1to2}, %xmm3, %k6
84 0x62,0xf2,0xe7,0x18,0x68,0x72,0x80
85
0 # RUN: llvm-mc --disassemble %s -triple=i386 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s
1
2 # CHECK: vp2intersectd k6, ymm3, ymm4
3 0x62,0xf2,0x67,0x28,0x68,0xf4
4
5 # CHECK: vp2intersectd k6, xmm3, xmm4
6 0x62,0xf2,0x67,0x08,0x68,0xf4
7
8 # CHECK: vp2intersectd k6, ymm3, ymmword ptr [esp + 8*esi + 268435456]
9 0x62,0xf2,0x67,0x28,0x68,0xb4,0xf4,0x00,0x00,0x00,0x10
10
11 # CHECK: vp2intersectd k6, ymm3, ymmword ptr [edi + 4*eax + 291]
12 0x62,0xf2,0x67,0x28,0x68,0xb4,0x87,0x23,0x01,0x00,0x00
13
14 # CHECK: vp2intersectd k6, ymm3, dword ptr [eax]{1to8}
15 0x62,0xf2,0x67,0x38,0x68,0x30
16
17 # CHECK: vp2intersectd k6, ymm3, ymmword ptr [2*ebp - 1024]
18 0x62,0xf2,0x67,0x28,0x68,0x34,0x6d,0x00,0xfc,0xff,0xff
19
20 # CHECK: vp2intersectd k6, ymm3, ymmword ptr [ecx + 4064]
21 0x62,0xf2,0x67,0x28,0x68,0x71,0x7f
22
23 # CHECK: vp2intersectd k6, ymm3, dword ptr [edx - 512]{1to8}
24 0x62,0xf2,0x67,0x38,0x68,0x72,0x80
25
26 # CHECK: vp2intersectd k6, xmm3, xmmword ptr [esp + 8*esi + 268435456]
27 0x62,0xf2,0x67,0x08,0x68,0xb4,0xf4,0x00,0x00,0x00,0x10
28
29 # CHECK: vp2intersectd k6, xmm3, xmmword ptr [edi + 4*eax + 291]
30 0x62,0xf2,0x67,0x08,0x68,0xb4,0x87,0x23,0x01,0x00,0x00
31
32 # CHECK: vp2intersectd k6, xmm3, dword ptr [eax]{1to4}
33 0x62,0xf2,0x67,0x18,0x68,0x30
34
35 # CHECK: vp2intersectd k6, xmm3, xmmword ptr [2*ebp - 512]
36 0x62,0xf2,0x67,0x08,0x68,0x34,0x6d,0x00,0xfe,0xff,0xff
37
38 # CHECK: vp2intersectd k6, xmm3, xmmword ptr [ecx + 2032]
39 0x62,0xf2,0x67,0x08,0x68,0x71,0x7f
40
41 # CHECK: vp2intersectd k6, xmm3, dword ptr [edx - 512]{1to4}
42 0x62,0xf2,0x67,0x18,0x68,0x72,0x80
43
44 # CHECK: vp2intersectq k6, ymm3, ymm4
45 0x62,0xf2,0xe7,0x28,0x68,0xf4
46
47 # CHECK: vp2intersectq k6, xmm3, xmm4
48 0x62,0xf2,0xe7,0x08,0x68,0xf4
49
50 # CHECK: vp2intersectq k6, ymm3, ymmword ptr [esp + 8*esi + 268435456]
51 0x62,0xf2,0xe7,0x28,0x68,0xb4,0xf4,0x00,0x00,0x00,0x10
52
53 # CHECK: vp2intersectq k6, ymm3, ymmword ptr [edi + 4*eax + 291]
54 0x62,0xf2,0xe7,0x28,0x68,0xb4,0x87,0x23,0x01,0x00,0x00
55
56 # CHECK: vp2intersectq k6, ymm3, qword ptr [eax]{1to4}
57 0x62,0xf2,0xe7,0x38,0x68,0x30
58
59 # CHECK: vp2intersectq k6, ymm3, ymmword ptr [2*ebp - 1024]
60 0x62,0xf2,0xe7,0x28,0x68,0x34,0x6d,0x00,0xfc,0xff,0xff
61
62 # CHECK: vp2intersectq k6, ymm3, ymmword ptr [ecx + 4064]
63 0x62,0xf2,0xe7,0x28,0x68,0x71,0x7f
64
65 # CHECK: vp2intersectq k6, ymm3, qword ptr [edx - 1024]{1to4}
66 0x62,0xf2,0xe7,0x38,0x68,0x72,0x80
67
68 # CHECK: vp2intersectq k6, xmm3, xmmword ptr [esp + 8*esi + 268435456]
69 0x62,0xf2,0xe7,0x08,0x68,0xb4,0xf4,0x00,0x00,0x00,0x10
70
71 # CHECK: vp2intersectq k6, xmm3, xmmword ptr [edi + 4*eax + 291]
72 0x62,0xf2,0xe7,0x08,0x68,0xb4,0x87,0x23,0x01,0x00,0x00
73
74 # CHECK: vp2intersectq k6, xmm3, qword ptr [eax]{1to2}
75 0x62,0xf2,0xe7,0x18,0x68,0x30
76
77 # CHECK: vp2intersectq k6, xmm3, xmmword ptr [2*ebp - 512]
78 0x62,0xf2,0xe7,0x08,0x68,0x34,0x6d,0x00,0xfe,0xff,0xff
79
80 # CHECK: vp2intersectq k6, xmm3, xmmword ptr [ecx + 2032]
81 0x62,0xf2,0xe7,0x08,0x68,0x71,0x7f
82
83 # CHECK: vp2intersectq k6, xmm3, qword ptr [edx - 1024]{1to2}
84 0x62,0xf2,0xe7,0x18,0x68,0x72,0x80
0 # RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s
1
2 # CHECK: vp2intersectd %ymm24, %ymm23, %k6
3 0x62,0x92,0x47,0x20,0x68,0xf0
4
5 # CHECK: vp2intersectd %xmm24, %xmm23, %k6
6 0x62,0x92,0x47,0x00,0x68,0xf0
7
8 # CHECK: vp2intersectd 268435456(%rbp,%r14,8), %ymm23, %k6
9 0x62,0xb2,0x47,0x20,0x68,0xb4,0xf5,0x00,0x00,0x00,0x10
10
11 # CHECK: vp2intersectd 291(%r8,%rax,4), %ymm23, %k6
12 0x62,0xd2,0x47,0x20,0x68,0xb4,0x80,0x23,0x01,0x00,0x00
13
14 # CHECK: vp2intersectd (%rip){1to8}, %ymm23, %k6
15 0x62,0xf2,0x47,0x30,0x68,0x35,0x00,0x00,0x00,0x00
16
17 # CHECK: vp2intersectd -1024(,%rbp,2), %ymm23, %k6
18 0x62,0xf2,0x47,0x20,0x68,0x34,0x6d,0x00,0xfc,0xff,0xff
19
20 # CHECK: vp2intersectd 4064(%rcx), %ymm23, %k6
21 0x62,0xf2,0x47,0x20,0x68,0x71,0x7f
22
23 # CHECK: vp2intersectd -512(%rdx){1to8}, %ymm23, %k6
24 0x62,0xf2,0x47,0x30,0x68,0x72,0x80
25
26 # CHECK: vp2intersectd 268435456(%rbp,%r14,8), %xmm23, %k6
27 0x62,0xb2,0x47,0x00,0x68,0xb4,0xf5,0x00,0x00,0x00,0x10
28
29 # CHECK: vp2intersectd 291(%r8,%rax,4), %xmm23, %k6
30 0x62,0xd2,0x47,0x00,0x68,0xb4,0x80,0x23,0x01,0x00,0x00
31
32 # CHECK: vp2intersectd (%rip){1to4}, %xmm23, %k6
33 0x62,0xf2,0x47,0x10,0x68,0x35,0x00,0x00,0x00,0x00
34
35 # CHECK: vp2intersectd -512(,%rbp,2), %xmm23, %k6
36 0x62,0xf2,0x47,0x00,0x68,0x34,0x6d,0x00,0xfe,0xff,0xff
37
38 # CHECK: vp2intersectd 2032(%rcx), %xmm23, %k6
39 0x62,0xf2,0x47,0x00,0x68,0x71,0x7f
40
41 # CHECK: vp2intersectd -512(%rdx){1to4}, %xmm23, %k6
42 0x62,0xf2,0x47,0x10,0x68,0x72,0x80
43
44 # CHECK: vp2intersectq %ymm24, %ymm23, %k6
45 0x62,0x92,0xc7,0x20,0x68,0xf0
46
47 # CHECK: vp2intersectq %xmm24, %xmm23, %k6
48 0x62,0x92,0xc7,0x00,0x68,0xf0
49
50 # CHECK: vp2intersectq 268435456(%rbp,%r14,8), %ymm23, %k6
51 0x62,0xb2,0xc7,0x20,0x68,0xb4,0xf5,0x00,0x00,0x00,0x10
52
53 # CHECK: vp2intersectq 291(%r8,%rax,4), %ymm23, %k6
54 0x62,0xd2,0xc7,0x20,0x68,0xb4,0x80,0x23,0x01,0x00,0x00
55
56 # CHECK: vp2intersectq (%rip){1to4}, %ymm23, %k6
57 0x62,0xf2,0xc7,0x30,0x68,0x35,0x00,0x00,0x00,0x00
58
59 # CHECK: vp2intersectq -1024(,%rbp,2), %ymm23, %k6
60 0x62,0xf2,0xc7,0x20,0x68,0x34,0x6d,0x00,0xfc,0xff,0xff
61
62 # CHECK: vp2intersectq 4064(%rcx), %ymm23, %k6
63 0x62,0xf2,0xc7,0x20,0x68,0x71,0x7f
64
65 # CHECK: vp2intersectq -1024(%rdx){1to4}, %ymm23, %k6
66 0x62,0xf2,0xc7,0x30,0x68,0x72,0x80
67
68 # CHECK: vp2intersectq 268435456(%rbp,%r14,8), %xmm23, %k6
69 0x62,0xb2,0xc7,0x00,0x68,0xb4,0xf5,0x00,0x00,0x00,0x10
70
71 # CHECK: vp2intersectq 291(%r8,%rax,4), %xmm23, %k6
72 0x62,0xd2,0xc7,0x00,0x68,0xb4,0x80,0x23,0x01,0x00,0x00
73
74 # CHECK: vp2intersectq (%rip){1to2}, %xmm23, %k6
75 0x62,0xf2,0xc7,0x10,0x68,0x35,0x00,0x00,0x00,0x00
76
77 # CHECK: vp2intersectq -512(,%rbp,2), %xmm23, %k6
78 0x62,0xf2,0xc7,0x00,0x68,0x34,0x6d,0x00,0xfe,0xff,0xff
79
80 # CHECK: vp2intersectq 2032(%rcx), %xmm23, %k6
81 0x62,0xf2,0xc7,0x00,0x68,0x71,0x7f
82
83 # CHECK: vp2intersectq -1024(%rdx){1to2}, %xmm23, %k6
84 0x62,0xf2,0xc7,0x10,0x68,0x72,0x80
0 # RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s
1
2 # CHECK: vp2intersectd k6, ymm23, ymm24
3 0x62,0x92,0x47,0x20,0x68,0xf0
4
5 # CHECK: vp2intersectd k6, xmm23, xmm24
6 0x62,0x92,0x47,0x00,0x68,0xf0
7
8 # CHECK: vp2intersectd k6, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
9 0x62,0xb2,0x47,0x20,0x68,0xb4,0xf5,0x00,0x00,0x00,0x10
10
11 # CHECK: vp2intersectd k6, ymm23, ymmword ptr [r8 + 4*rax + 291]
12 0x62,0xd2,0x47,0x20,0x68,0xb4,0x80,0x23,0x01,0x00,0x00
13
14 # CHECK: vp2intersectd k6, ymm23, dword ptr [rip]{1to8}
15 0x62,0xf2,0x47,0x30,0x68,0x35,0x00,0x00,0x00,0x00
16
17 # CHECK: vp2intersectd k6, ymm23, ymmword ptr [2*rbp - 1024]
18 0x62,0xf2,0x47,0x20,0x68,0x34,0x6d,0x00,0xfc,0xff,0xff
19
20 # CHECK: vp2intersectd k6, ymm23, ymmword ptr [rcx + 4064]
21 0x62,0xf2,0x47,0x20,0x68,0x71,0x7f
22
23 # CHECK: vp2intersectd k6, ymm23, dword ptr [rdx - 512]{1to8}
24 0x62,0xf2,0x47,0x30,0x68,0x72,0x80
25
26 # CHECK: vp2intersectd k6, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
27 0x62,0xb2,0x47,0x00,0x68,0xb4,0xf5,0x00,0x00,0x00,0x10
28
29 # CHECK: vp2intersectd k6, xmm23, xmmword ptr [r8 + 4*rax + 291]
30 0x62,0xd2,0x47,0x00,0x68,0xb4,0x80,0x23,0x01,0x00,0x00
31
32 # CHECK: vp2intersectd k6, xmm23, dword ptr [rip]{1to4}
33 0x62,0xf2,0x47,0x10,0x68,0x35,0x00,0x00,0x00,0x00
34
35 # CHECK: vp2intersectd k6, xmm23, xmmword ptr [2*rbp - 512]
36 0x62,0xf2,0x47,0x00,0x68,0x34,0x6d,0x00,0xfe,0xff,0xff
37
38 # CHECK: vp2intersectd k6, xmm23, xmmword ptr [rcx + 2032]
39 0x62,0xf2,0x47,0x00,0x68,0x71,0x7f
40
41 # CHECK: vp2intersectd k6, xmm23, dword ptr [rdx - 512]{1to4}
42 0x62,0xf2,0x47,0x10,0x68,0x72,0x80
43
44 # CHECK: vp2intersectq k6, ymm23, ymm24
45 0x62,0x92,0xc7,0x20,0x68,0xf0
46
47 # CHECK: vp2intersectq k6, xmm23, xmm24
48 0x62,0x92,0xc7,0x00,0x68,0xf0
49
50 # CHECK: vp2intersectq k6, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
51 0x62,0xb2,0xc7,0x20,0x68,0xb4,0xf5,0x00,0x00,0x00,0x10
52
53 # CHECK: vp2intersectq k6, ymm23, ymmword ptr [r8 + 4*rax + 291]
54 0x62,0xd2,0xc7,0x20,0x68,0xb4,0x80,0x23,0x01,0x00,0x00
55
56 # CHECK: vp2intersectq k6, ymm23, qword ptr [rip]{1to4}
57 0x62,0xf2,0xc7,0x30,0x68,0x35,0x00,0x00,0x00,0x00
58
59 # CHECK: vp2intersectq k6, ymm23, ymmword ptr [2*rbp - 1024]
60 0x62,0xf2,0xc7,0x20,0x68,0x34,0x6d,0x00,0xfc,0xff,0xff
61
62 # CHECK: vp2intersectq k6, ymm23, ymmword ptr [rcx + 4064]
63 0x62,0xf2,0xc7,0x20,0x68,0x71,0x7f
64
65 # CHECK: vp2intersectq k6, ymm23, qword ptr [rdx - 1024]{1to4}
66 0x62,0xf2,0xc7,0x30,0x68,0x72,0x80
67
68 # CHECK: vp2intersectq k6, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
69 0x62,0xb2,0xc7,0x00,0x68,0xb4,0xf5,0x00,0x00,0x00,0x10
70
71 # CHECK: vp2intersectq k6, xmm23, xmmword ptr [r8 + 4*rax + 291]
72 0x62,0xd2,0xc7,0x00,0x68,0xb4,0x80,0x23,0x01,0x00,0x00
73
74 # CHECK: vp2intersectq k6, xmm23, qword ptr [rip]{1to2}
75 0x62,0xf2,0xc7,0x10,0x68,0x35,0x00,0x00,0x00,0x00
76
77 # CHECK: vp2intersectq k6, xmm23, xmmword ptr [2*rbp - 512]
78 0x62,0xf2,0xc7,0x00,0x68,0x34,0x6d,0x00,0xfe,0xff,0xff
79
80 # CHECK: vp2intersectq k6, xmm23, xmmword ptr [rcx + 2032]
81 0x62,0xf2,0xc7,0x00,0x68,0x71,0x7f
82
83 # CHECK: vp2intersectq k6, xmm23, qword ptr [rdx - 1024]{1to2}
84 0x62,0xf2,0xc7,0x10,0x68,0x72,0x80
0 // RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck %s
1
2 // CHECK: vp2intersectd %ymm4, %ymm3, %k6
3 // CHECK: encoding: [0x62,0xf2,0x67,0x28,0x68,0xf4]
4 vp2intersectd %ymm4, %ymm3, %k6
5
6 // CHECK: vp2intersectd %xmm4, %xmm3, %k6
7 // CHECK: encoding: [0x62,0xf2,0x67,0x08,0x68,0xf4]
8 vp2intersectd %xmm4, %xmm3, %k6
9
10 // CHECK: vp2intersectd 268435456(%esp,%esi,8), %ymm3, %k6
11 // CHECK: encoding: [0x62,0xf2,0x67,0x28,0x68,0xb4,0xf4,0x00,0x00,0x00,0x10]
12 vp2intersectd 268435456(%esp,%esi,8), %ymm3, %k6
13
14 // CHECK: vp2intersectd 291(%edi,%eax,4), %ymm3, %k6
15 // CHECK: encoding: [0x62,0xf2,0x67,0x28,0x68,0xb4,0x87,0x23,0x01,0x00,0x00]
16 vp2intersectd 291(%edi,%eax,4), %ymm3, %k6
17
18 // CHECK: vp2intersectd (%eax){1to8}, %ymm3, %k6
19 // CHECK: encoding: [0x62,0xf2,0x67,0x38,0x68,0x30]
20 vp2intersectd (%eax){1to8}, %ymm3, %k6
21
22 // CHECK: vp2intersectd -1024(,%ebp,2), %ymm3, %k6
23 // CHECK: encoding: [0x62,0xf2,0x67,0x28,0x68,0x34,0x6d,0x00,0xfc,0xff,0xff]
24 vp2intersectd -1024(,%ebp,2), %ymm3, %k6
25
26 // CHECK: vp2intersectd 4064(%ecx), %ymm3, %k6
27 // CHECK: encoding: [0x62,0xf2,0x67,0x28,0x68,0x71,0x7f]
28 vp2intersectd 4064(%ecx), %ymm3, %k6
29
30 // CHECK: vp2intersectd -512(%edx){1to8}, %ymm3, %k6
31 // CHECK: encoding: [0x62,0xf2,0x67,0x38,0x68,0x72,0x80]
32 vp2intersectd -512(%edx){1to8}, %ymm3, %k6
33
34 // CHECK: vp2intersectd 268435456(%esp,%esi,8), %xmm3, %k6
35 // CHECK: encoding: [0x62,0xf2,0x67,0x08,0x68,0xb4,0xf4,0x00,0x00,0x00,0x10]
36 vp2intersectd 268435456(%esp,%esi,8), %xmm3, %k6
37
38 // CHECK: vp2intersectd 291(%edi,%eax,4), %xmm3, %k6
39 // CHECK: encoding: [0x62,0xf2,0x67,0x08,0x68,0xb4,0x87,0x23,0x01,0x00,0x00]
40 vp2intersectd 291(%edi,%eax,4), %xmm3, %k6
41
42 // CHECK: vp2intersectd (%eax){1to4}, %xmm3, %k6
43 // CHECK: encoding: [0x62,0xf2,0x67,0x18,0x68,0x30]
44 vp2intersectd (%eax){1to4}, %xmm3, %k6
45
46 // CHECK: vp2intersectd -512(,%ebp,2), %xmm3, %k6
47 // CHECK: encoding: [0x62,0xf2,0x67,0x08,0x68,0x34,0x6d,0x00,0xfe,0xff,0xff]
48 vp2intersectd -512(,%ebp,2), %xmm3, %k6
49
50 // CHECK: vp2intersectd 2032(%ecx), %xmm3, %k6
51 // CHECK: encoding: [0x62,0xf2,0x67,0x08,0x68,0x71,0x7f]
52 vp2intersectd 2032(%ecx), %xmm3, %k6
53
54 // CHECK: vp2intersectd -512(%edx){1to4}, %xmm3, %k6
55 // CHECK: encoding: [0x62,0xf2,0x67,0x18,0x68,0x72,0x80]
56 vp2intersectd -512(%edx){1to4}, %xmm3, %k6
57
58 // CHECK: vp2intersectq %ymm4, %ymm3, %k6
59 // CHECK: encoding: [0x62,0xf2,0xe7,0x28,0x68,0xf4]
60 vp2intersectq %ymm4, %ymm3, %k6
61
62 // CHECK: vp2intersectq %xmm4, %xmm3, %k6
63 // CHECK: encoding: [0x62,0xf2,0xe7,0x08,0x68,0xf4]
64 vp2intersectq %xmm4, %xmm3, %k6
65
66 // CHECK: vp2intersectq 268435456(%esp,%esi,8), %ymm3, %k6
67 // CHECK: encoding: [0x62,0xf2,0xe7,0x28,0x68,0xb4,0xf4,0x00,0x00,0x00,0x10]
68 vp2intersectq 268435456(%esp,%esi,8), %ymm3, %k6
69
70 // CHECK: vp2intersectq 291(%edi,%eax,4), %ymm3, %k6
71 // CHECK: encoding: [0x62,0xf2,0xe7,0x28,0x68,0xb4,0x87,0x23,0x01,0x00,0x00]
72 vp2intersectq 291(%edi,%eax,4), %ymm3, %k6
73
74 // CHECK: vp2intersectq (%eax){1to4}, %ymm3, %k6
75 // CHECK: encoding: [0x62,0xf2,0xe7,0x38,0x68,0x30]
76 vp2intersectq (%eax){1to4}, %ymm3, %k6
77
78 // CHECK: vp2intersectq -1024(,%ebp,2), %ymm3, %k6
79 // CHECK: encoding: [0x62,0xf2,0xe7,0x28,0x68,0x34,0x6d,0x00,0xfc,0xff,0xff]
80 vp2intersectq -1024(,%ebp,2), %ymm3, %k6
81
82 // CHECK: vp2intersectq 4064(%ecx), %ymm3, %k6
83 // CHECK: encoding: [0x62,0xf2,0xe7,0x28,0x68,0x71,0x7f]
84 vp2intersectq 4064(%ecx), %ymm3, %k6
85
86 // CHECK: vp2intersectq -1024(%edx){1to4}, %ymm3, %k6
87 // CHECK: encoding: [0x62,0xf2,0xe7,0x38,0x68,0x72,0x80]
88 vp2intersectq -1024(%edx){1to4}, %ymm3, %k6
89
90 // CHECK: vp2intersectq 268435456(%esp,%esi,8), %xmm3, %k6
91 // CHECK: encoding: [0x62,0xf2,0xe7,0x08,0x68,0xb4,0xf4,0x00,0x00,0x00,0x10]
92 vp2intersectq 268435456(%esp,%esi,8), %xmm3, %k6
93
94 // CHECK: vp2intersectq 291(%edi,%eax,4), %xmm3, %k6
95 // CHECK: encoding: [0x62,0xf2,0xe7,0x08,0x68,0xb4,0x87,0x23,0x01,0x00,0x00]
96 vp2intersectq 291(%edi,%eax,4), %xmm3, %k6
97
98 // CHECK: vp2intersectq (%eax){1to2}, %xmm3, %k6
99 // CHECK: encoding: [0x62,0xf2,0xe7,0x18,0x68,0x30]
100 vp2intersectq (%eax){1to2}, %xmm3, %k6
101
102 // CHECK: vp2intersectq -512(,%ebp,2), %xmm3, %k6
103 // CHECK: encoding: [0x62,0xf2,0xe7,0x08,0x68,0x34,0x6d,0x00,0xfe,0xff,0xff]
104 vp2intersectq -512(,%ebp,2), %xmm3, %k6
105
106 // CHECK: vp2intersectq 2032(%ecx), %xmm3, %k6
107 // CHECK: encoding: [0x62,0xf2,0xe7,0x08,0x68,0x71,0x7f]
108 vp2intersectq 2032(%ecx), %xmm3, %k6
109
110 // CHECK: vp2intersectq -1024(%edx){1to2}, %xmm3, %k6
111 // CHECK: encoding: [0x62,0xf2,0xe7,0x18,0x68,0x72,0x80]
112 vp2intersectq -1024(%edx){1to2}, %xmm3, %k6
0 // RUN: llvm-mc -triple i386-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
1
2 // CHECK: vp2intersectd k6, ymm3, ymm4
3 // CHECK: encoding: [0x62,0xf2,0x67,0x28,0x68,0xf4]
4 vp2intersectd k6, ymm3, ymm4
5
6 // CHECK: vp2intersectd k6, xmm3, xmm4
7 // CHECK: encoding: [0x62,0xf2,0x67,0x08,0x68,0xf4]
8 vp2intersectd k6, xmm3, xmm4
9
10 // CHECK: vp2intersectd k6, ymm3, ymmword ptr [esp + 8*esi + 268435456]
11 // CHECK: encoding: [0x62,0xf2,0x67,0x28,0x68,0xb4,0xf4,0x00,0x00,0x00,0x10]
12 vp2intersectd k6, ymm3, ymmword ptr [esp + 8*esi + 268435456]
13
14 // CHECK: vp2intersectd k6, ymm3, ymmword ptr [edi + 4*eax + 291]
15 // CHECK: encoding: [0x62,0xf2,0x67,0x28,0x68,0xb4,0x87,0x23,0x01,0x00,0x00]
16 vp2intersectd k6, ymm3, ymmword ptr [edi + 4*eax + 291]
17
18 // CHECK: vp2intersectd k6, ymm3, dword ptr [eax]{1to8}
19 // CHECK: encoding: [0x62,0xf2,0x67,0x38,0x68,0x30]
20 vp2intersectd k6, ymm3, dword ptr [eax]{1to8}
21
22 // CHECK: vp2intersectd k6, ymm3, ymmword ptr [2*ebp - 1024]
23 // CHECK: encoding: [0x62,0xf2,0x67,0x28,0x68,0x34,0x6d,0x00,0xfc,0xff,0xff]
24 vp2intersectd k6, ymm3, ymmword ptr [2*ebp - 1024]
25
26 // CHECK: vp2intersectd k6, ymm3, ymmword ptr [ecx + 4064]
27 // CHECK: encoding: [0x62,0xf2,0x67,0x28,0x68,0x71,0x7f]
28 vp2intersectd k6, ymm3, ymmword ptr [ecx + 4064]
29
30 // CHECK: vp2intersectd k6, ymm3, dword ptr [edx - 512]{1to8}
31 // CHECK: encoding: [0x62,0xf2,0x67,0x38,0x68,0x72,0x80]
32 vp2intersectd k6, ymm3, dword ptr [edx - 512]{1to8}
33
34 // CHECK: vp2intersectd k6, xmm3, xmmword ptr [esp + 8*esi + 268435456]
35 // CHECK: encoding: [0x62,0xf2,0x67,0x08,0x68,0xb4,0xf4,0x00,0x00,0x00,0x10]
36 vp2intersectd k6, xmm3, xmmword ptr [esp + 8*esi + 268435456]
37
38 // CHECK: vp2intersectd k6, xmm3, xmmword ptr [edi + 4*eax + 291]
39 // CHECK: encoding: [0x62,0xf2,0x67,0x08,0x68,0xb4,0x87,0x23,0x01,0x00,0x00]
40 vp2intersectd k6, xmm3, xmmword ptr [edi + 4*eax + 291]
41
42 // CHECK: vp2intersectd k6, xmm3, dword ptr [eax]{1to4}
43 // CHECK: encoding: [0x62,0xf2,0x67,0x18,0x68,0x30]
44 vp2intersectd k6, xmm3, dword ptr [eax]{1to4}
45
46 // CHECK: vp2intersectd k6, xmm3, xmmword ptr [2*ebp - 512]
47 // CHECK: encoding: [0x62,0xf2,0x67,0x08,0x68,0x34,0x6d,0x00,0xfe,0xff,0xff]
48 vp2intersectd k6, xmm3, xmmword ptr [2*ebp - 512]
49
50 // CHECK: vp2intersectd k6, xmm3, xmmword ptr [ecx + 2032]
51 // CHECK: encoding: [0x62,0xf2,0x67,0x08,0x68,0x71,0x7f]
52 vp2intersectd k6, xmm3, xmmword ptr [ecx + 2032]
53
54 // CHECK: vp2intersectd k6, xmm3, dword ptr [edx - 512]{1to4}
55 // CHECK: encoding: [0x62,0xf2,0x67,0x18,0x68,0x72,0x80]
56 vp2intersectd k6, xmm3, dword ptr [edx - 512]{1to4}
57
58 // CHECK: vp2intersectq k6, ymm3, ymm4
59 // CHECK: encoding: [0x62,0xf2,0xe7,0x28,0x68,0xf4]
60 vp2intersectq k6, ymm3, ymm4
61
62 // CHECK: vp2intersectq k6, xmm3, xmm4
63 // CHECK: encoding: [0x62,0xf2,0xe7,0x08,0x68,0xf4]
64 vp2intersectq k6, xmm3, xmm4
65
66 // CHECK: vp2intersectq k6, ymm3, ymmword ptr [esp + 8*esi + 268435456]
67 // CHECK: encoding: [0x62,0xf2,0xe7,0x28,0x68,0xb4,0xf4,0x00,0x00,0x00,0x10]
68 vp2intersectq k6, ymm3, ymmword ptr [esp + 8*esi + 268435456]
69
70 // CHECK: vp2intersectq k6, ymm3, ymmword ptr [edi + 4*eax + 291]
71 // CHECK: encoding: [0x62,0xf2,0xe7,0x28,0x68,0xb4,0x87,0x23,0x01,0x00,0x00]
72 vp2intersectq k6, ymm3, ymmword ptr [edi + 4*eax + 291]
73
74 // CHECK: vp2intersectq k6, ymm3, qword ptr [eax]{1to4}
75 // CHECK: encoding: [0x62,0xf2,0xe7,0x38,0x68,0x30]
76 vp2intersectq k6, ymm3, qword ptr [eax]{1to4}
77
78 // CHECK: vp2intersectq k6, ymm3, ymmword ptr [2*ebp - 1024]
79 // CHECK: encoding: [0x62,0xf2,0xe7,0x28,0x68,0x34,0x6d,0x00,0xfc,0xff,0xff]
80 vp2intersectq k6, ymm3, ymmword ptr [2*ebp - 1024]
81
82 // CHECK: vp2intersectq k6, ymm3, ymmword ptr [ecx + 4064]
83 // CHECK: encoding: [0x62,0xf2,0xe7,0x28,0x68,0x71,0x7f]
84 vp2intersectq k6, ymm3, ymmword ptr [ecx + 4064]
85
86 // CHECK: vp2intersectq k6, ymm3, qword ptr [edx - 1024]{1to4}
87 // CHECK: encoding: [0x62,0xf2,0xe7,0x38,0x68,0x72,0x80]
88 vp2intersectq k6, ymm3, qword ptr [edx - 1024]{1to4}
89
90 // CHECK: vp2intersectq k6, xmm3, xmmword ptr [esp + 8*esi + 268435456]
91 // CHECK: encoding: [0x62,0xf2,0xe7,0x08,0x68,0xb4,0xf4,0x00,0x00,0x00,0x10]
92 vp2intersectq k6, xmm3, xmmword ptr [esp + 8*esi + 268435456]
93
94 // CHECK: vp2intersectq k6, xmm3, xmmword ptr [edi + 4*eax + 291]
95 // CHECK: encoding: [0x62,0xf2,0xe7,0x08,0x68,0xb4,0x87,0x23,0x01,0x00,0x00]
96 vp2intersectq k6, xmm3, xmmword ptr [edi + 4*eax + 291]
97
98 // CHECK: vp2intersectq k6, xmm3, qword ptr [eax]{1to2}
99 // CHECK: encoding: [0x62,0xf2,0xe7,0x18,0x68,0x30]
100 vp2intersectq k6, xmm3, qword ptr [eax]{1to2}
101
102 // CHECK: vp2intersectq k6, xmm3, xmmword ptr [2*ebp - 512]
103 // CHECK: encoding: [0x62,0xf2,0xe7,0x08,0x68,0x34,0x6d,0x00,0xfe,0xff,0xff]
104 vp2intersectq k6, xmm3, xmmword ptr [2*ebp - 512]
105
106 // CHECK: vp2intersectq k6, xmm3, xmmword ptr [ecx + 2032]
107 // CHECK: encoding: [0x62,0xf2,0xe7,0x08,0x68,0x71,0x7f]
108 vp2intersectq k6, xmm3, xmmword ptr [ecx + 2032]
109
110 // CHECK: vp2intersectq k6, xmm3, qword ptr [edx - 1024]{1to2}
111 // CHECK: encoding: [0x62,0xf2,0xe7,0x18,0x68,0x72,0x80]
112 vp2intersectq k6, xmm3, qword ptr [edx - 1024]{1to2}
0 // RUN: llvm-mc -triple i386-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
1 // CHECK: vp2intersectd k4, zmm3, zmm4
2 // CHECK: encoding: [0x62,0xf2,0x67,0x48,0x68,0xe4]
3 vp2intersectd k4, zmm3, zmm4
4
5 // CHECK: vp2intersectd k4, zmm3, zmmword ptr [esp + 8*esi + 268435456]
6 // CHECK: encoding: [0x62,0xf2,0x67,0x48,0x68,0xa4,0xf4,0x00,0x00,0x00,0x10]
7 vp2intersectd k4, zmm3, zmmword ptr [esp + 8*esi + 268435456]
8
9 // CHECK: vp2intersectd k4, zmm3, zmmword ptr [edi + 4*eax + 291]
10 // CHECK: encoding: [0x62,0xf2,0x67,0x48,0x68,0xa4,0x87,0x23,0x01,0x00,0x00]
11 vp2intersectd k4, zmm3, zmmword ptr [edi + 4*eax + 291]
12
13 // CHECK: vp2intersectd k4, zmm3, dword ptr [eax]{1to16}
14 // CHECK: encoding: [0x62,0xf2,0x67,0x58,0x68,0x20]
15 vp2intersectd k4, zmm3, dword ptr [eax]{1to16}
16
17 // CHECK: vp2intersectd k4, zmm3, zmmword ptr [2*ebp - 2048]
18 // CHECK: encoding: [0x62,0xf2,0x67,0x48,0x68,0x24,0x6d,0x00,0xf8,0xff,0xff]
19 vp2intersectd k4, zmm3, zmmword ptr [2*ebp - 2048]
20
21 // CHECK: vp2intersectd k4, zmm3, zmmword ptr [ecx + 8128]
22 // CHECK: encoding: [0x62,0xf2,0x67,0x48,0x68,0x61,0x7f]
23 vp2intersectd k4, zmm3, zmmword ptr [ecx + 8128]
24
25 // CHECK: vp2intersectd k4, zmm3, dword ptr [edx - 512]{1to16}
26 // CHECK: encoding: [0x62,0xf2,0x67,0x58,0x68,0x62,0x80]
27 vp2intersectd k4, zmm3, dword ptr [edx - 512]{1to16}
28
29 // CHECK: vp2intersectq k4, zmm3, zmm4
30 // CHECK: encoding: [0x62,0xf2,0xe7,0x48,0x68,0xe4]
31 vp2intersectq k4, zmm3, zmm4
32
33 // CHECK: vp2intersectq k4, zmm3, zmmword ptr [esp + 8*esi + 268435456]
34 // CHECK: encoding: [0x62,0xf2,0xe7,0x48,0x68,0xa4,0xf4,0x00,0x00,0x00,0x10]
35 vp2intersectq k4, zmm3, zmmword ptr [esp + 8*esi + 268435456]
36
37 // CHECK: vp2intersectq k4, zmm3, zmmword ptr [edi + 4*eax + 291]
38 // CHECK: encoding: [0x62,0xf2,0xe7,0x48,0x68,0xa4,0x87,0x23,0x01,0x00,0x00]
39 vp2intersectq k4, zmm3, zmmword ptr [edi + 4*eax + 291]
40
41 // CHECK: vp2intersectq k4, zmm3, qword ptr [eax]{1to8}
42 // CHECK: encoding: [0x62,0xf2,0xe7,0x58,0x68,0x20]
43 vp2intersectq k4, zmm3, qword ptr [eax]{1to8}
44
45 // CHECK: vp2intersectq k4, zmm3, zmmword ptr [2*ebp - 2048]
46 // CHECK: encoding: [0x62,0xf2,0xe7,0x48,0x68,0x24,0x6d,0x00,0xf8,0xff,0xff]
47 vp2intersectq k4, zmm3, zmmword ptr [2*ebp - 2048]
48
49 // CHECK: vp2intersectq k4, zmm3, zmmword ptr [ecx + 8128]
50 // CHECK: encoding: [0x62,0xf2,0xe7,0x48,0x68,0x61,0x7f]
51 vp2intersectq k4, zmm3, zmmword ptr [ecx + 8128]
52
53 // CHECK: vp2intersectq k4, zmm3, qword ptr [edx - 1024]{1to8}
54 // CHECK: encoding: [0x62,0xf2,0xe7,0x58,0x68,0x62,0x80]
55 vp2intersectq k4, zmm3, qword ptr [edx - 1024]{1to8}
56
0 // RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck %s
1
2 // CHECK: vp2intersectq %zmm2, %zmm1, %k0
3 // CHECK: encoding: [0x62,0xf2,0xf7,0x48,0x68,0xc2]
4 vp2intersectq %zmm2, %zmm1, %k0
5
6 // CHECK: vp2intersectq (%edi), %zmm1, %k0
7 // CHECK: encoding: [0x62,0xf2,0xf7,0x48,0x68,0x07]
8 vp2intersectq (%edi), %zmm1, %k0
9
10 // CHECK: vp2intersectq (%edi){1to8}, %zmm1, %k0
11 // CHECK: encoding: [0x62,0xf2,0xf7,0x58,0x68,0x07]
12 vp2intersectq (%edi){1to8}, %zmm1, %k0
13
14 // CHECK: vp2intersectq %zmm2, %zmm1, %k0
15 // CHECK: encoding: [0x62,0xf2,0xf7,0x48,0x68,0xc2]
16 vp2intersectq %zmm2, %zmm1, %k1
17
18 // CHECK: vp2intersectq (%edi), %zmm1, %k0
19 // CHECK: encoding: [0x62,0xf2,0xf7,0x48,0x68,0x07]
20 vp2intersectq (%edi), %zmm1, %k1
21
22 // CHECK: vp2intersectq (%edi){1to8}, %zmm1, %k0
23 // CHECK: encoding: [0x62,0xf2,0xf7,0x58,0x68,0x07]
24 vp2intersectq (%edi){1to8}, %zmm1, %k1
25
26 // CHECK: vp2intersectq %zmm7, %zmm4, %k6
27 // CHECK: encoding: [0x62,0xf2,0xdf,0x48,0x68,0xf7]
28 vp2intersectq %zmm7, %zmm4, %k6
29
30 // CHECK: vp2intersectq (%esi), %zmm4, %k6
31 // CHECK: encoding: [0x62,0xf2,0xdf,0x48,0x68,0x36]
32 vp2intersectq (%esi), %zmm4, %k6
33
34 // CHECK: vp2intersectq (%esi){1to8}, %zmm4, %k6
35 // CHECK: encoding: [0x62,0xf2,0xdf,0x58,0x68,0x36]
36 vp2intersectq (%esi){1to8}, %zmm4, %k6
37
38 // CHECK: vp2intersectq %zmm7, %zmm4, %k6
39 // CHECK: encoding: [0x62,0xf2,0xdf,0x48,0x68,0xf7]
40 vp2intersectq %zmm7, %zmm4, %k7
41
42 // CHECK: vp2intersectq (%esi), %zmm4, %k6
43 // CHECK: encoding: [0x62,0xf2,0xdf,0x48,0x68,0x36]
44 vp2intersectq (%esi), %zmm4, %k7
45
46 // CHECK: vp2intersectq (%esi){1to8}, %zmm4, %k6
47 // CHECK: encoding: [0x62,0xf2,0xdf,0x58,0x68,0x36]
48 vp2intersectq (%esi){1to8}, %zmm4, %k7
49
50 // CHECK: vp2intersectq %ymm2, %ymm1, %k0
51 // CHECK: encoding: [0x62,0xf2,0xf7,0x28,0x68,0xc2]
52 vp2intersectq %ymm2, %ymm1, %k0
53
54 // CHECK: vp2intersectq (%edi), %ymm1, %k0
55 // CHECK: encoding: [0x62,0xf2,0xf7,0x28,0x68,0x07]
56 vp2intersectq (%edi), %ymm1, %k0
57
58 // CHECK: vp2intersectq (%edi){1to4}, %ymm1, %k0
59 // CHECK: encoding: [0x62,0xf2,0xf7,0x38,0x68,0x07]
60 vp2intersectq (%edi){1to4}, %ymm1, %k0
61
62 // CHECK: vp2intersectq %ymm2, %ymm1, %k0
63 // CHECK: encoding: [0x62,0xf2,0xf7,0x28,0x68,0xc2]
64 vp2intersectq %ymm2, %ymm1, %k1
65
66 // CHECK: vp2intersectq (%edi), %ymm1, %k0
67 // CHECK: encoding: [0x62,0xf2,0xf7,0x28,0x68,0x07]
68 vp2intersectq (%edi), %ymm1, %k1
69
70 // CHECK: vp2intersectq (%edi){1to4}, %ymm1, %k0
71 // CHECK: encoding: [0x62,0xf2,0xf7,0x38,0x68,0x07]
72 vp2intersectq (%edi){1to4}, %ymm1, %k1
73
74 // CHECK: vp2intersectq %ymm7, %ymm4, %k6
75 // CHECK: encoding: [0x62,0xf2,0xdf,0x28,0x68,0xf7]
76 vp2intersectq %ymm7, %ymm4, %k6
77
78 // CHECK: vp2intersectq (%esi), %ymm4, %k6
79 // CHECK: encoding: [0x62,0xf2,0xdf,0x28,0x68,0x36]
80 vp2intersectq (%esi), %ymm4, %k6
81
82 // CHECK: vp2intersectq (%esi){1to4}, %ymm4, %k6
83 // CHECK: encoding: [0x62,0xf2,0xdf,0x38,0x68,0x36]
84 vp2intersectq (%esi){1to4}, %ymm4, %k6
85
86 // CHECK: vp2intersectq %ymm7, %ymm4, %k6
87 // CHECK: encoding: [0x62,0xf2,0xdf,0x28,0x68,0xf7]
88 vp2intersectq %ymm7, %ymm4, %k7
89
90 // CHECK: vp2intersectq (%esi), %ymm4, %k6
91 // CHECK: encoding: [0x62,0xf2,0xdf,0x28,0x68,0x36]
92 vp2intersectq (%esi), %ymm4, %k7
93
94 // CHECK: vp2intersectq %xmm2, %xmm1, %k0
95 // CHECK: encoding: [0x62,0xf2,0xf7,0x08,0x68,0xc2]
96 vp2intersectq %xmm2, %xmm1, %k0
97
98 // CHECK: vp2intersectq (%edi), %xmm1, %k0
99 // CHECK: encoding: [0x62,0xf2,0xf7,0x08,0x68,0x07]
100 vp2intersectq (%edi), %xmm1, %k0
101
102 // CHECK: vp2intersectq (%edi){1to2}, %xmm1, %k0
103 // CHECK: encoding: [0x62,0xf2,0xf7,0x18,0x68,0x07]
104 vp2intersectq (%edi){1to2}, %xmm1, %k0
105
106 // CHECK: vp2intersectq %xmm2, %xmm1, %k0
107 // CHECK: encoding: [0x62,0xf2,0xf7,0x08,0x68,0xc2]
108 vp2intersectq %xmm2, %xmm1, %k1
109
110 // CHECK: vp2intersectq (%edi), %xmm1, %k0
111 // CHECK: encoding: [0x62,0xf2,0xf7,0x08,0x68,0x07]
112 vp2intersectq (%edi), %xmm1, %k1
113
114 // CHECK: vp2intersectq %xmm7, %xmm4, %k6
115 // CHECK: encoding: [0x62,0xf2,0xdf,0x08,0x68,0xf7]
116 vp2intersectq %xmm7, %xmm4, %k6
117
118 // CHECK: vp2intersectq (%esi), %xmm4, %k6
119 // CHECK: encoding: [0x62,0xf2,0xdf,0x08,0x68,0x36]
120 vp2intersectq (%esi), %xmm4, %k6
121
122 // CHECK: vp2intersectq %xmm7, %xmm4, %k6
123 // CHECK: encoding: [0x62,0xf2,0xdf,0x08,0x68,0xf7]
124 vp2intersectq %xmm7, %xmm4, %k7
125
126 // CHECK: vp2intersectq (%esi), %xmm4, %k6
127 // CHECK: encoding: [0x62,0xf2,0xdf,0x08,0x68,0x36]
128 vp2intersectq (%esi), %xmm4, %k7
129
130 // CHECK: vp2intersectd %zmm2, %zmm1, %k0
131 // CHECK: encoding: [0x62,0xf2,0x77,0x48,0x68,0xc2]
132 vp2intersectd %zmm2, %zmm1, %k0
133
134 // CHECK: vp2intersectd (%edi), %zmm1, %k0
135 // CHECK: encoding: [0x62,0xf2,0x77,0x48,0x68,0x07]
136 vp2intersectd (%edi), %zmm1, %k0
137
138 // CHECK: vp2intersectd %zmm2, %zmm1, %k0
139 // CHECK: encoding: [0x62,0xf2,0x77,0x48,0x68,0xc2]
140 vp2intersectd %zmm2, %zmm1, %k1
141
142 // CHECK: vp2intersectd (%edi), %zmm1, %k0
143 // CHECK: encoding: [0x62,0xf2,0x77,0x48,0x68,0x07]
144 vp2intersectd (%edi), %zmm1, %k1
145
146 // CHECK: vp2intersectd %zmm7, %zmm4, %k6
147 // CHECK: encoding: [0x62,0xf2,0x5f,0x48,0x68,0xf7]
148 vp2intersectd %zmm7, %zmm4, %k6
149
150 // CHECK: vp2intersectd (%esi), %zmm4, %k6
151 // CHECK: encoding: [0x62,0xf2,0x5f,0x48,0x68,0x36]
152 vp2intersectd (%esi), %zmm4, %k6
153
154 // CHECK: vp2intersectd %zmm7, %zmm4, %k6
155 // CHECK: encoding: [0x62,0xf2,0x5f,0x48,0x68,0xf7]
156 vp2intersectd %zmm7, %zmm4, %k7
157
158 // CHECK: vp2intersectd (%esi), %zmm4, %k6
159 // CHECK: encoding: [0x62,0xf2,0x5f,0x48,0x68,0x36]
160 vp2intersectd (%esi), %zmm4, %k7
161
162 // CHECK: vp2intersectd %ymm2, %ymm1, %k0
163 // CHECK: encoding: [0x62,0xf2,0x77,0x28,0x68,0xc2]
164 vp2intersectd %ymm2, %ymm1, %k0
165
166 // CHECK: vp2intersectd (%edi), %ymm1, %k0
167 // CHECK: encoding: [0x62,0xf2,0x77,0x28,0x68,0x07]
168 vp2intersectd (%edi), %ymm1, %k0
169
170 // CHECK: vp2intersectd %ymm2, %ymm1, %k0
171 // CHECK: encoding: [0x62,0xf2,0x77,0x28,0x68,0xc2]
172 vp2intersectd %ymm2, %ymm1, %k1
173
174 // CHECK: vp2intersectd (%edi), %ymm1, %k0
175 // CHECK: encoding: [0x62,0xf2,0x77,0x28,0x68,0x07]
176 vp2intersectd (%edi), %ymm1, %k1
177
178 // CHECK: vp2intersectd %ymm7, %ymm4, %k6
179 // CHECK: encoding: [0x62,0xf2,0x5f,0x28,0x68,0xf7]
180 vp2intersectd %ymm7, %ymm4, %k6
181
182 // CHECK: vp2intersectd (%esi), %ymm4, %k6
183 // CHECK: encoding: [0x62,0xf2,0x5f,0x28,0x68,0x36]
184 vp2intersectd (%esi), %ymm4, %k6
185
186 // CHECK: vp2intersectd %ymm7, %ymm4, %k6
187 // CHECK: encoding: [0x62,0xf2,0x5f,0x28,0x68,0xf7]
188 vp2intersectd %ymm7, %ymm4, %k7
189
190 // CHECK: vp2intersectd (%esi), %ymm4, %k6
191 // CHECK: encoding: [0x62,0xf2,0x5f,0x28,0x68,0x36]
192 vp2intersectd (%esi), %ymm4, %k7
193
194 // CHECK: vp2intersectd %xmm2, %xmm1, %k0
195 // CHECK: encoding: [0x62,0xf2,0x77,0x08,0x68,0xc2]
196 vp2intersectd %xmm2, %xmm1, %k0
197
198 // CHECK: vp2intersectd (%edi), %xmm1, %k0
199 // CHECK: encoding: [0x62,0xf2,0x77,0x08,0x68,0x07]
200 vp2intersectd (%edi), %xmm1, %k0
201
202 // CHECK: vp2intersectd %xmm2, %xmm1, %k0
203 // CHECK: encoding: [0x62,0xf2,0x77,0x08,0x68,0xc2]
204 vp2intersectd %xmm2, %xmm1, %k1
205
206 // CHECK: vp2intersectd (%edi), %xmm1, %k0
207 // CHECK: encoding: [0x62,0xf2,0x77,0x08,0x68,0x07]
208 vp2intersectd (%edi), %xmm1, %k1
209
210 // CHECK: vp2intersectd %xmm7, %xmm4, %k6
211 // CHECK: encoding: [0x62,0xf2,0x5f,0x08,0x68,0xf7]
212 vp2intersectd %xmm7, %xmm4, %k6
213
214 // CHECK: vp2intersectd (%esi), %xmm4, %k6
215 // CHECK: encoding: [0x62,0xf2,0x5f,0x08,0x68,0x36]
216 vp2intersectd (%esi), %xmm4, %k6
217
218 // CHECK: vp2intersectd %xmm7, %xmm4, %k6
219 // CHECK: encoding: [0x62,0xf2,0x5f,0x08,0x68,0xf7]
220 vp2intersectd %xmm7, %xmm4, %k7
221
222 // CHECK: vp2intersectd (%esi), %xmm4, %k6
223 // CHECK: encoding: [0x62,0xf2,0x5f,0x08,0x68,0x36]
224 vp2intersectd (%esi), %xmm4, %k7
0 // RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
1 // CHECK: vp2intersectd k4, zmm23, zmm24
2 // CHECK: encoding: [0x62,0x92,0x47,0x40,0x68,0xe0]
3 vp2intersectd k4, zmm23, zmm24
4
5 // CHECK: vp2intersectd k4, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
6 // CHECK: encoding: [0x62,0xb2,0x47,0x40,0x68,0xa4,0xf5,0x00,0x00,0x00,0x10]
7 vp2intersectd k4, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
8
9 // CHECK: vp2intersectd k4, zmm23, zmmword ptr [r8 + 4*rax + 291]
10 // CHECK: encoding: [0x62,0xd2,0x47,0x40,0x68,0xa4,0x80,0x23,0x01,0x00,0x00]
11 vp2intersectd k4, zmm23, zmmword ptr [r8 + 4*rax + 291]
12
13 // CHECK: vp2intersectd k4, zmm23, dword ptr [rip]{1to16}
14 // CHECK: encoding: [0x62,0xf2,0x47,0x50,0x68,0x25,0x00,0x00,0x00,0x00]
15 vp2intersectd k4, zmm23, dword ptr [rip]{1to16}
16
17 // CHECK: vp2intersectd k4, zmm23, zmmword ptr [2*rbp - 2048]
18 // CHECK: encoding: [0x62,0xf2,0x47,0x40,0x68,0x24,0x6d,0x00,0xf8,0xff,0xff]
19 vp2intersectd k4, zmm23, zmmword ptr [2*rbp - 2048]
20
21 // CHECK: vp2intersectd k4, zmm23, zmmword ptr [rcx + 8128]
22 // CHECK: encoding: [0x62,0xf2,0x47,0x40,0x68,0x61,0x7f]
23 vp2intersectd k4, zmm23, zmmword ptr [rcx + 8128]
24
25 // CHECK: vp2intersectd k4, zmm23, dword ptr [rdx - 512]{1to16}
26 // CHECK: encoding: [0x62,0xf2,0x47,0x50,0x68,0x62,0x80]
27 vp2intersectd k4, zmm23, dword ptr [rdx - 512]{1to16}
28
29 // CHECK: vp2intersectq k4, zmm23, zmm24
30 // CHECK: encoding: [0x62,0x92,0xc7,0x40,0x68,0xe0]
31 vp2intersectq k4, zmm23, zmm24
32
33 // CHECK: vp2intersectq k4, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
34 // CHECK: encoding: [0x62,0xb2,0xc7,0x40,0x68,0xa4,0xf5,0x00,0x00,0x00,0x10]
35 vp2intersectq k4, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
36
37 // CHECK: vp2intersectq k4, zmm23, zmmword ptr [r8 + 4*rax + 291]
38 // CHECK: encoding: [0x62,0xd2,0xc7,0x40,0x68,0xa4,0x80,0x23,0x01,0x00,0x00]
39 vp2intersectq k4, zmm23, zmmword ptr [r8 + 4*rax + 291]
40
41 // CHECK: vp2intersectq k4, zmm23, qword ptr [rip]{1to8}
42 // CHECK: encoding: [0x62,0xf2,0xc7,0x50,0x68,0x25,0x00,0x00,0x00,0x00]
43 vp2intersectq k4, zmm23, qword ptr [rip]{1to8}
44
45 // CHECK: vp2intersectq k4, zmm23, zmmword ptr [2*rbp - 2048]
46 // CHECK: encoding: [0x62,0xf2,0xc7,0x40,0x68,0x24,0x6d,0x00,0xf8,0xff,0xff]
47 vp2intersectq k4, zmm23, zmmword ptr [2*rbp - 2048]
48
49 // CHECK: vp2intersectq k4, zmm23, zmmword ptr [rcx + 8128]
50 // CHECK: encoding: [0x62,0xf2,0xc7,0x40,0x68,0x61,0x7f]
51 vp2intersectq k4, zmm23, zmmword ptr [rcx + 8128]
52
53 // CHECK: vp2intersectq k4, zmm23, qword ptr [rdx - 1024]{1to8}
54 // CHECK: encoding: [0x62,0xf2,0xc7,0x50,0x68,0x62,0x80]
55 vp2intersectq k4, zmm23, qword ptr [rdx - 1024]{1to8}
56
0 // RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
1
2 // v8i64 vectors
3 // CHECK: vp2intersectq %zmm2, %zmm1, %k0
4 // CHECK: encoding: [0x62,0xf2,0xf7,0x48,0x68,0xc2]
5 vp2intersectq %zmm2, %zmm1, %k0
6
7 // CHECK: vp2intersectq (%rdi), %zmm1, %k0
8 // CHECK: encoding: [0x62,0xf2,0xf7,0x48,0x68,0x07]
9 vp2intersectq (%rdi), %zmm1, %k0
10
11 // CHECK: vp2intersectq (%rdi){1to8}, %zmm1, %k0
12 // CHECK: encoding: [0x62,0xf2,0xf7,0x58,0x68,0x07]
13 vp2intersectq (%rdi){1to8}, %zmm1, %k0
14
15 // CHECK: vp2intersectq %zmm2, %zmm1, %k0
16 // CHECK: encoding: [0x62,0xf2,0xf7,0x48,0x68,0xc2]
17 vp2intersectq %zmm2, %zmm1, %k1
18
19 // CHECK: vp2intersectq (%rdi), %zmm1, %k0
20 // CHECK: encoding: [0x62,0xf2,0xf7,0x48,0x68,0x07]
21 vp2intersectq (%rdi), %zmm1, %k1
22
23 // CHECK: vp2intersectq (%rdi){1to8}, %zmm1, %k0
24 // CHECK: encoding: [0x62,0xf2,0xf7,0x58,0x68,0x07]
25 vp2intersectq (%rdi){1to8}, %zmm1, %k1
26
27 // CHECK: vp2intersectq %zmm7, %zmm9, %k6
28 // CHECK: encoding: [0x62,0xf2,0xb7,0x48,0x68,0xf7]
29 vp2intersectq %zmm7, %zmm9, %k6
30
31 // CHECK: vp2intersectq (%rsi), %zmm9, %k6
32 // CHECK: encoding: [0x62,0xf2,0xb7,0x48,0x68,0x36]
33 vp2intersectq (%rsi), %zmm9, %k6
34
35 // CHECK: vp2intersectq (%rsi){1to8}, %zmm9, %k6
36 // CHECK: encoding: [0x62,0xf2,0xb7,0x58,0x68,0x36]
37 vp2intersectq (%rsi){1to8}, %zmm9, %k6
38
39 // CHECK: vp2intersectq %zmm7, %zmm9, %k6
40 // CHECK: encoding: [0x62,0xf2,0xb7,0x48,0x68,0xf7]
41 vp2intersectq %zmm7, %zmm9, %k7
42
43 // CHECK: vp2intersectq (%rsi), %zmm9, %k6
44 // CHECK: encoding: [0x62,0xf2,0xb7,0x48,0x68,0x36]
45 vp2intersectq (%rsi), %zmm9, %k7
46
47 // CHECK: vp2intersectq (%rsi){1to8}, %zmm9, %k6
48 // CHECK: encoding: [0x62,0xf2,0xb7,0x58,0x68,0x36]
49 vp2intersectq (%rsi){1to8}, %zmm9, %k7
50
51 // v4i64 vectors
52 // CHECK: vp2intersectq %ymm2, %ymm1, %k0
53 // CHECK: encoding: [0x62,0xf2,0xf7,0x28,0x68,0xc2]
54 vp2intersectq %ymm2, %ymm1, %k0
55
56 // CHECK: vp2intersectq (%rdi), %ymm1, %k0
57 // CHECK: encoding: [0x62,0xf2,0xf7,0x28,0x68,0x07]
58 vp2intersectq (%rdi), %ymm1, %k0
59
60 // CHECK: vp2intersectq (%rdi){1to4}, %ymm1, %k0
61 // CHECK: encoding: [0x62,0xf2,0xf7,0x38,0x68,0x07]
62 vp2intersectq (%rdi){1to4}, %ymm1, %k0
63
64 // CHECK: vp2intersectq %ymm2, %ymm1, %k0
65 // CHECK: encoding: [0x62,0xf2,0xf7,0x28,0x68,0xc2]
66 vp2intersectq %ymm2, %ymm1, %k1
67
68 // CHECK: vp2intersectq (%rdi), %ymm1, %k0
69 // CHECK: encoding: [0x62,0xf2,0xf7,0x28,0x68,0x07]
70 vp2intersectq (%rdi), %ymm1, %k1
71
72 // CHECK: vp2intersectq (%rdi){1to4}, %ymm1, %k0
73 // CHECK: encoding: [0x62,0xf2,0xf7,0x38,0x68,0x07]
74 vp2intersectq (%rdi){1to4}, %ymm1, %k1
75
76 // CHECK: vp2intersectq %ymm7, %ymm9, %k6
77 // CHECK: encoding: [0x62,0xf2,0xb7,0x28,0x68,0xf7]
78 vp2intersectq %ymm7, %ymm9, %k6
79
80 // CHECK: vp2intersectq (%rsi), %ymm9, %k6
81 // CHECK: encoding: [0x62,0xf2,0xb7,0x28,0x68,0x36]
82 vp2intersectq (%rsi), %ymm9, %k6
83
84 // CHECK: vp2intersectq (%rsi){1to4}, %ymm9, %k6
85 // CHECK: encoding: [0x62,0xf2,0xb7,0x38,0x68,0x36]
86 vp2intersectq (%rsi){1to4}, %ymm9, %k6
87
88 // CHECK: vp2intersectq %ymm7, %ymm9, %k6
89 // CHECK: encoding: [0x62,0xf2,0xb7,0x28,0x68,0xf7]
90 vp2intersectq %ymm7, %ymm9, %k7
91
92 // CHECK: vp2intersectq (%rsi), %ymm9, %k6
93 // CHECK: encoding: [0x62,0xf2,0xb7,0x28,0x68,0x36]
94 vp2intersectq (%rsi), %ymm9, %k7
95
96 // v2i64 vectors
97 // CHECK: vp2intersectq %xmm2, %xmm1, %k0
98 // CHECK: encoding: [0x62,0xf2,0xf7,0x08,0x68,0xc2]
99 vp2intersectq %xmm2, %xmm1, %k0
100
101 // CHECK: vp2intersectq (%rdi), %xmm1, %k0
102 // CHECK: encoding: [0x62,0xf2,0xf7,0x08,0x68,0x07]
103 vp2intersectq (%rdi), %xmm1, %k0
104
105 // CHECK: vp2intersectq (%rdi){1to2}, %xmm1, %k0
106 // CHECK: encoding: [0x62,0xf2,0xf7,0x18,0x68,0x07]
107 vp2intersectq (%rdi){1to2}, %xmm1, %k0
108
109 // CHECK: vp2intersectq %xmm2, %xmm1, %k0
110 // CHECK: encoding: [0x62,0xf2,0xf7,0x08,0x68,0xc2]
111 vp2intersectq %xmm2, %xmm1, %k1
112
113 // CHECK: vp2intersectq (%rdi), %xmm1, %k0
114 // CHECK: encoding: [0x62,0xf2,0xf7,0x08,0x68,0x07]
115 vp2intersectq (%rdi), %xmm1, %k1
116
117 // CHECK: vp2intersectq %xmm7, %xmm9, %k6
118 // CHECK: encoding: [0x62,0xf2,0xb7,0x08,0x68,0xf7]
119 vp2intersectq %xmm7, %xmm9, %k6
120
121 // CHECK: vp2intersectq (%rsi), %xmm9, %k6
122 // CHECK: encoding: [0x62,0xf2,0xb7,0x08,0x68,0x36]
123 vp2intersectq (%rsi), %xmm9, %k6
124
125 // CHECK: vp2intersectq %xmm7, %xmm9, %k6
126 // CHECK: encoding: [0x62,0xf2,0xb7,0x08,0x68,0xf7]
127 vp2intersectq %xmm7, %xmm9, %k7
128
129 // CHECK: vp2intersectq (%rsi), %xmm9, %k6
130 // CHECK: encoding: [0x62,0xf2,0xb7,0x08,0x68,0x36]
131 vp2intersectq (%rsi), %xmm9, %k7
132
133 // v16i32 vectors
134 // CHECK: vp2intersectd %zmm2, %zmm1, %k0
135 // CHECK: encoding: [0x62,0xf2,0x77,0x48,0x68,0xc2]
136 vp2intersectd %zmm2, %zmm1, %k0
137
138 // CHECK: vp2intersectd (%rdi), %zmm1, %k0
139 // CHECK: encoding: [0x62,0xf2,0x77,0x48,0x68,0x07]
140 vp2intersectd (%rdi), %zmm1, %k0
141
142 // CHECK: vp2intersectd %zmm2, %zmm1, %k0
143 // CHECK: encoding: [0x62,0xf2,0x77,0x48,0x68,0xc2]
144 vp2intersectd %zmm2, %zmm1, %k1
145
146 // CHECK: vp2intersectd (%rdi), %zmm1, %k0
147 // CHECK: encoding: [0x62,0xf2,0x77,0x48,0x68,0x07]
148 vp2intersectd (%rdi), %zmm1, %k1
149
150 // CHECK: vp2intersectd %zmm7, %zmm9, %k6
151 // CHECK: encoding: [0x62,0xf2,0x37,0x48,0x68,0xf7]
152 vp2intersectd %zmm7, %zmm9, %k6
153
154 // CHECK: vp2intersectd (%rsi), %zmm9, %k6
155 // CHECK: encoding: [0x62,0xf2,0x37,0x48,0x68,0x36]
156 vp2intersectd (%rsi), %zmm9, %k6
157
158 // CHECK: vp2intersectd %zmm7, %zmm9, %k6
159 // CHECK: encoding: [0x62,0xf2,0x37,0x48,0x68,0xf7]
160 vp2intersectd %zmm7, %zmm9, %k7
161
162 // CHECK: vp2intersectd (%rsi), %zmm9, %k6
163 // CHECK: encoding: [0x62,0xf2,0x37,0x48,0x68,0x36]
164 vp2intersectd (%rsi), %zmm9, %k7
165
166 // v8i32 vectors
167 // CHECK: vp2intersectd %ymm2, %ymm1, %k0
168 // CHECK: encoding: [0x62,0xf2,0x77,0x28,0x68,0xc2]
169 vp2intersectd %ymm2, %ymm1, %k0
170
171 // CHECK: vp2intersectd (%rdi), %ymm1, %k0
172 // CHECK: encoding: [0x62,0xf2,0x77,0x28,0x68,0x07]
173 vp2intersectd (%rdi), %ymm1, %k0
174
175 // CHECK: vp2intersectd %ymm2, %ymm1, %k0
176 // CHECK: encoding: [0x62,0xf2,0x77,0x28,0x68,0xc2]
177 vp2intersectd %ymm2, %ymm1, %k1
178
179 // CHECK: vp2intersectd (%rdi), %ymm1, %k0
180 // CHECK: encoding: [0x62,0xf2,0x77,0x28,0x68,0x07]
181 vp2intersectd (%rdi), %ymm1, %k1
182
183 // CHECK: vp2intersectd %ymm7, %ymm9, %k6
184 // CHECK: encoding: [0x62,0xf2,0x37,0x28,0x68,0xf7]
185 vp2intersectd %ymm7, %ymm9, %k6
186
187 // CHECK: vp2intersectd (%rsi), %ymm9, %k6
188 // CHECK: encoding: [0x62,0xf2,0x37,0x28,0x68,0x36]
189 vp2intersectd (%rsi), %ymm9, %k6
190
191 // CHECK: vp2intersectd %ymm7, %ymm9, %k6
192 // CHECK: encoding: [0x62,0xf2,0x37,0x28,0x68,0xf7]
193 vp2intersectd %ymm7, %ymm9, %k7
194
195 // CHECK: vp2intersectd (%rsi), %ymm9, %k6
196 // CHECK: encoding: [0x62,0xf2,0x37,0x28,0x68,0x36]
197 vp2intersectd (%rsi), %ymm9, %k7
198
199 // v4i32 vectors
200 // CHECK: vp2intersectd %xmm2, %xmm1, %k0
201 // CHECK: encoding: [0x62,0xf2,0x77,0x08,0x68,0xc2]
202 vp2intersectd %xmm2, %xmm1, %k0
203
204 // CHECK: vp2intersectd (%rdi), %xmm1, %k0
205 // CHECK: encoding: [0x62,0xf2,0x77,0x08,0x68,0x07]
206 vp2intersectd (%rdi), %xmm1, %k0
207
208 // CHECK: vp2intersectd %xmm2, %xmm1, %k0
209 // CHECK: encoding: [0x62,0xf2,0x77,0x08,0x68,0xc2]
210 vp2intersectd %xmm2, %xmm1, %k1
211
212 // CHECK: vp2intersectd (%rdi), %xmm1, %k0
213 // CHECK: encoding: [0x62,0xf2,0x77,0x08,0x68,0x07]
214 vp2intersectd (%rdi), %xmm1, %k1
215
216 // CHECK: vp2intersectd %xmm7, %xmm9, %k6
217 // CHECK: encoding: [0x62,0xf2,0x37,0x08,0x68,0xf7]
218 vp2intersectd %xmm7, %xmm9, %k6
219
220 // CHECK: vp2intersectd (%rsi), %xmm9, %k6
221 // CHECK: encoding: [0x62,0xf2,0x37,0x08,0x68,0x36]
222 vp2intersectd (%rsi), %xmm9, %k6
223
224 // CHECK: vp2intersectd %xmm7, %xmm9, %k6
225 // CHECK: encoding: [0x62,0xf2,0x37,0x08,0x68,0xf7]
226 vp2intersectd %xmm7, %xmm9, %k7
227
228 // CHECK: vp2intersectd (%rsi), %xmm9, %k6
229 // CHECK: encoding: [0x62,0xf2,0x37,0x08,0x68,0x36]
230 vp2intersectd (%rsi), %xmm9, %k7
0 // RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding < %s | FileCheck %s
1
2 // CHECK: vp2intersectd %ymm24, %ymm23, %k6
3 // CHECK: encoding: [0x62,0x92,0x47,0x20,0x68,0xf0]
4 vp2intersectd %ymm24, %ymm23, %k6
5
6 // CHECK: vp2intersectd %xmm24, %xmm23, %k6
7 // CHECK: encoding: [0x62,0x92,0x47,0x00,0x68,0xf0]
8 vp2intersectd %xmm24, %xmm23, %k6
9
10 // CHECK: vp2intersectd 268435456(%rbp,%r14,8), %ymm23, %k6
11 // CHECK: encoding: [0x62,0xb2,0x47,0x20,0x68,0xb4,0xf5,0x00,0x00,0x00,0x10]
12 vp2intersectd 268435456(%rbp,%r14,8), %ymm23, %k6
13
14 // CHECK: vp2intersectd 291(%r8,%rax,4), %ymm23, %k6
15 // CHECK: encoding: [0x62,0xd2,0x47,0x20,0x68,0xb4,0x80,0x23,0x01,0x00,0x00]
16 vp2intersectd 291(%r8,%rax,4), %ymm23, %k6
17
18 // CHECK: vp2intersectd (%rip){1to8}, %ymm23, %k6
19 // CHECK: encoding: [0x62,0xf2,0x47,0x30,0x68,0x35,0x00,0x00,0x00,0x00]
20 vp2intersectd (%rip){1to8}, %ymm23, %k6
21
22 // CHECK: vp2intersectd -1024(,%rbp,2), %ymm23, %k6
23 // CHECK: encoding: [0x62,0xf2,0x47,0x20,0x68,0x34,0x6d,0x00,0xfc,0xff,0xff]
24 vp2intersectd -1024(,%rbp,2), %ymm23, %k6
25
26 // CHECK: vp2intersectd 4064(%rcx), %ymm23, %k6
27 // CHECK: encoding: [0x62,0xf2,0x47,0x20,0x68,0x71,0x7f]
28 vp2intersectd 4064(%rcx), %ymm23, %k6
29
30 // CHECK: vp2intersectd -512(%rdx){1to8}, %ymm23, %k6
31 // CHECK: encoding: [0x62,0xf2,0x47,0x30,0x68,0x72,0x80]
32 vp2intersectd -512(%rdx){1to8}, %ymm23, %k6
33
34 // CHECK: vp2intersectd 268435456(%rbp,%r14,8), %xmm23, %k6
35 // CHECK: encoding: [0x62,0xb2,0x47,0x00,0x68,0xb4,0xf5,0x00,0x00,0x00,0x10]
36 vp2intersectd 268435456(%rbp,%r14,8), %xmm23, %k6
37
38 // CHECK: vp2intersectd 291(%r8,%rax,4), %xmm23, %k6
39 // CHECK: encoding: [0x62,0xd2,0x47,0x00,0x68,0xb4,0x80,0x23,0x01,0x00,0x00]
40 vp2intersectd 291(%r8,%rax,4), %xmm23, %k6
41
42 // CHECK: vp2intersectd (%rip){1to4}, %xmm23, %k6
43 // CHECK: encoding: [0x62,0xf2,0x47,0x10,0x68,0x35,0x00,0x00,0x00,0x00]
44 vp2intersectd (%rip){1to4}, %xmm23, %k6
45
46 // CHECK: vp2intersectd -512(,%rbp,2), %xmm23, %k6
47 // CHECK: encoding: [0x62,0xf2,0x47,0x00,0x68,0x34,0x6d,0x00,0xfe,0xff,0xff]
48 vp2intersectd -512(,%rbp,2), %xmm23, %k6
49
50 // CHECK: vp2intersectd 2032(%rcx), %xmm23, %k6
51 // CHECK: encoding: [0x62,0xf2,0x47,0x00,0x68,0x71,0x7f]
52 vp2intersectd 2032(%rcx), %xmm23, %k6
53
54 // CHECK: vp2intersectd -512(%rdx){1to4}, %xmm23, %k6
55 // CHECK: encoding: [0x62,0xf2,0x47,0x10,0x68,0x72,0x80]
56 vp2intersectd -512(%rdx){1to4}, %xmm23, %k6
57
58 // CHECK: vp2intersectq %ymm24, %ymm23, %k6
59 // CHECK: encoding: [0x62,0x92,0xc7,0x20,0x68,0xf0]
60 vp2intersectq %ymm24, %ymm23, %k6
61
62 // CHECK: vp2intersectq %xmm24, %xmm23, %k6
63 // CHECK: encoding: [0x62,0x92,0xc7,0x00,0x68,0xf0]
64 vp2intersectq %xmm24, %xmm23, %k6
65
66 // CHECK: vp2intersectq 268435456(%rbp,%r14,8), %ymm23, %k6
67 // CHECK: encoding: [0x62,0xb2,0xc7,0x20,0x68,0xb4,0xf5,0x00,0x00,0x00,0x10]
68 vp2intersectq 268435456(%rbp,%r14,8), %ymm23, %k6
69
70 // CHECK: vp2intersectq 291(%r8,%rax,4), %ymm23, %k6
71 // CHECK: encoding: [0x62,0xd2,0xc7,0x20,0x68,0xb4,0x80,0x23,0x01,0x00,0x00]
72 vp2intersectq 291(%r8,%rax,4), %ymm23, %k6
73
74 // CHECK: vp2intersectq (%rip){1to4}, %ymm23, %k6
75 // CHECK: encoding: [0x62,0xf2,0xc7,0x30,0x68,0x35,0x00,0x00,0x00,0x00]
76 vp2intersectq (%rip){1to4}, %ymm23, %k6
77
78 // CHECK: vp2intersectq -1024(,%rbp,2), %ymm23, %k6
79 // CHECK: encoding: [0x62,0xf2,0xc7,0x20,0x68,0x34,0x6d,0x00,0xfc,0xff,0xff]
80 vp2intersectq -1024(,%rbp,2), %ymm23, %k6
81
82 // CHECK: vp2intersectq 4064(%rcx), %ymm23, %k6
83 // CHECK: encoding: [0x62,0xf2,0xc7,0x20,0x68,0x71,0x7f]
84 vp2intersectq 4064(%rcx), %ymm23, %k6
85
86 // CHECK: vp2intersectq -1024(%rdx){1to4}, %ymm23, %k6
87 // CHECK: encoding: [0x62,0xf2,0xc7,0x30,0x68,0x72,0x80]
88 vp2intersectq -1024(%rdx){1to4}, %ymm23, %k6
89
90 // CHECK: vp2intersectq 268435456(%rbp,%r14,8), %xmm23, %k6
91 // CHECK: encoding: [0x62,0xb2,0xc7,0x00,0x68,0xb4,0xf5,0x00,0x00,0x00,0x10]
92 vp2intersectq 268435456(%rbp,%r14,8), %xmm23, %k6
93
94 // CHECK: vp2intersectq 291(%r8,%rax,4), %xmm23, %k6
95 // CHECK: encoding: [0x62,0xd2,0xc7,0x00,0x68,0xb4,0x80,0x23,0x01,0x00,0x00]
96 vp2intersectq 291(%r8,%rax,4), %xmm23, %k6
97
98 // CHECK: vp2intersectq (%rip){1to2}, %xmm23, %k6
99 // CHECK: encoding: [0x62,0xf2,0xc7,0x10,0x68,0x35,0x00,0x00,0x00,0x00]
100 vp2intersectq (%rip){1to2}, %xmm23, %k6
101
102 // CHECK: vp2intersectq -512(,%rbp,2), %xmm23, %k6
103 // CHECK: encoding: [0x62,0xf2,0xc7,0x00,0x68,0x34,0x6d,0x00,0xfe,0xff,0xff]
104 vp2intersectq -512(,%rbp,2), %xmm23, %k6
105
106 // CHECK: vp2intersectq 2032(%rcx), %xmm23, %k6
107 // CHECK: encoding: [0x62,0xf2,0xc7,0x00,0x68,0x71,0x7f]
108 vp2intersectq 2032(%rcx), %xmm23, %k6
109
110 // CHECK: vp2intersectq -1024(%rdx){1to2}, %xmm23, %k6
111 // CHECK: encoding: [0x62,0xf2,0xc7,0x10,0x68,0x72,0x80]
112 vp2intersectq -1024(%rdx){1to2}, %xmm23, %k6
0 // RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
1
2 // CHECK: vp2intersectd k6, ymm23, ymm24
3 // CHECK: encoding: [0x62,0x92,0x47,0x20,0x68,0xf0]
4 vp2intersectd k6, ymm23, ymm24
5
6 // CHECK: vp2intersectd k6, xmm23, xmm24
7 // CHECK: encoding: [0x62,0x92,0x47,0x00,0x68,0xf0]
8 vp2intersectd k6, xmm23, xmm24
9
10 // CHECK: vp2intersectd k6, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
11 // CHECK: encoding: [0x62,0xb2,0x47,0x20,0x68,0xb4,0xf5,0x00,0x00,0x00,0x10]
12 vp2intersectd k6, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
13
14 // CHECK: vp2intersectd k6, ymm23, ymmword ptr [r8 + 4*rax + 291]
15 // CHECK: encoding: [0x62,0xd2,0x47,0x20,0x68,0xb4,0x80,0x23,0x01,0x00,0x00]
16 vp2intersectd k6, ymm23, ymmword ptr [r8 + 4*rax + 291]
17
18 // CHECK: vp2intersectd k6, ymm23, dword ptr [rip]{1to8}
19 // CHECK: encoding: [0x62,0xf2,0x47,0x30,0x68,0x35,0x00,0x00,0x00,0x00]
20 vp2intersectd k6, ymm23, dword ptr [rip]{1to8}
21
22 // CHECK: vp2intersectd k6, ymm23, ymmword ptr [2*rbp - 1024]
23 // CHECK: encoding: [0x62,0xf2,0x47,0x20,0x68,0x34,0x6d,0x00,0xfc,0xff,0xff]
24 vp2intersectd k6, ymm23, ymmword ptr [2*rbp - 1024]
25
26 // CHECK: vp2intersectd k6, ymm23, ymmword ptr [rcx + 4064]
27 // CHECK: encoding: [0x62,0xf2,0x47,0x20,0x68,0x71,0x7f]
28 vp2intersectd k6, ymm23, ymmword ptr [rcx + 4064]
29
30 // CHECK: vp2intersectd k6, ymm23, dword ptr [rdx - 512]{1to8}
31 // CHECK: encoding: [0x62,0xf2,0x47,0x30,0x68,0x72,0x80]
32 vp2intersectd k6, ymm23, dword ptr [rdx - 512]{1to8}
33
34 // CHECK: vp2intersectd k6, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
35 // CHECK: encoding: [0x62,0xb2,0x47,0x00,0x68,0xb4,0xf5,0x00,0x00,0x00,0x10]
36 vp2intersectd k6, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
37
38 // CHECK: vp2intersectd k6, xmm23, xmmword ptr [r8 + 4*rax + 291]
39 // CHECK: encoding: [0x62,0xd2,0x47,0x00,0x68,0xb4,0x80,0x23,0x01,0x00,0x00]
40 vp2intersectd k6, xmm23, xmmword ptr [r8 + 4*rax + 291]
41
42 // CHECK: vp2intersectd k6, xmm23, dword ptr [rip]{1to4}
43 // CHECK: encoding: [0x62,0xf2,0x47,0x10,0x68,0x35,0x00,0x00,0x00,0x00]
44 vp2intersectd k6, xmm23, dword ptr [rip]{1to4}
45
46 // CHECK: vp2intersectd k6, xmm23, xmmword ptr [2*rbp - 512]
47 // CHECK: encoding: [0x62,0xf2,0x47,0x00,0x68,0x34,0x6d,0x00,0xfe,0xff,0xff]
48 vp2intersectd k6, xmm23, xmmword ptr [2*rbp - 512]
49
50 // CHECK: vp2intersectd k6, xmm23, xmmword ptr [rcx + 2032]
51 // CHECK: encoding: [0x62,0xf2,0x47,0x00,0x68,0x71,0x7f]
52 vp2intersectd k6, xmm23, xmmword ptr [rcx + 2032]
53
54 // CHECK: vp2intersectd k6, xmm23, dword ptr [rdx - 512]{1to4}
55 // CHECK: encoding: [0x62,0xf2,0x47,0x10,0x68,0x72,0x80]
56 vp2intersectd k6, xmm23, dword ptr [rdx - 512]{1to4}
57
58 // CHECK: vp2intersectq k6, ymm23, ymm24
59 // CHECK: encoding: [0x62,0x92,0xc7,0x20,0x68,0xf0]
60 vp2intersectq k6, ymm23, ymm24
61
62 // CHECK: vp2intersectq k6, xmm23, xmm24
63 // CHECK: encoding: [0x62,0x92,0xc7,0x00,0x68,0xf0]
64 vp2intersectq k6, xmm23, xmm24
65
66 // CHECK: vp2intersectq k6, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
67 // CHECK: encoding: [0x62,0xb2,0xc7,0x20,0x68,0xb4,0xf5,0x00,0x00,0x00,0x10]
68 vp2intersectq k6, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
69
70 // CHECK: vp2intersectq k6, ymm23, ymmword ptr [r8 + 4*rax + 291]
71 // CHECK: encoding: [0x62,0xd2,0xc7,0x20,0x68,0xb4,0x80,0x23,0x01,0x00,0x00]
72 vp2intersectq k6, ymm23, ymmword ptr [r8 + 4*rax + 291]
73
74 // CHECK: vp2intersectq k6, ymm23, qword ptr [rip]{1to4}
75 // CHECK: encoding: [0x62,0xf2,0xc7,0x30,0x68,0x35,0x00,0x00,0x00,0x00]
76 vp2intersectq k6, ymm23, qword ptr [rip]{1to4}
77
78 // CHECK: vp2intersectq k6, ymm23, ymmword ptr [2*rbp - 1024]
79 // CHECK: encoding: [0x62,0xf2,0xc7,0x20,0x68,0x34,0x6d,0x00,0xfc,0xff,0xff]
80 vp2intersectq k6, ymm23, ymmword ptr [2*rbp - 1024]
81
82 // CHECK: vp2intersectq k6, ymm23, ymmword ptr [rcx + 4064]
83 // CHECK: encoding: [0x62,0xf2,0xc7,0x20,0x68,0x71,0x7f]
84 vp2intersectq k6, ymm23, ymmword ptr [rcx + 4064]
85
86 // CHECK: vp2intersectq k6, ymm23, qword ptr [rdx - 1024]{1to4}
87 // CHECK: encoding: [0x62,0xf2,0xc7,0x30,0x68,0x72,0x80]
88 vp2intersectq k6, ymm23, qword ptr [rdx - 1024]{1to4}
89
90 // CHECK: vp2intersectq k6, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
91 // CHECK: encoding: [0x62,0xb2,0xc7,0x00,0x68,0xb4,0xf5,0x00,0x00,0x00,0x10]
92 vp2intersectq k6, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
93
94 // CHECK: vp2intersectq k6, xmm23, xmmword ptr [r8 + 4*rax + 291]
95 // CHECK: encoding: [0x62,0xd2,0xc7,0x00,0x68,0xb4,0x80,0x23,0x01,0x00,0x00]
96 vp2intersectq k6, xmm23, xmmword ptr [r8 + 4*rax + 291]
97
98 // CHECK: vp2intersectq k6, xmm23, qword ptr [rip]{1to2}
99 // CHECK: encoding: [0x62,0xf2,0xc7,0x10,0x68,0x35,0x00,0x00,0x00,0x00]
100 vp2intersectq k6, xmm23, qword ptr [rip]{1to2}
101
102 // CHECK: vp2intersectq k6, xmm23, xmmword ptr [2*rbp - 512]
103 // CHECK: encoding: [0x62,0xf2,0xc7,0x00,0x68,0x34,0x6d,0x00,0xfe,0xff,0xff]
104 vp2intersectq k6, xmm23, xmmword ptr [2*rbp - 512]
105
106 // CHECK: vp2intersectq k6, xmm23, xmmword ptr [rcx + 2032]
107 // CHECK: encoding: [0x62,0xf2,0xc7,0x00,0x68,0x71,0x7f]
108 vp2intersectq k6, xmm23, xmmword ptr [rcx + 2032]
109
110 // CHECK: vp2intersectq k6, xmm23, qword ptr [rdx - 1024]{1to2}
111 // CHECK: encoding: [0x62,0xf2,0xc7,0x10,0x68,0x72,0x80]
112 vp2intersectq k6, xmm23, qword ptr [rdx - 1024]{1to2}
931931 TYPE("VK32WM", TYPE_VK)
932932 TYPE("VK64", TYPE_VK)
933933 TYPE("VK64WM", TYPE_VK)
934 TYPE("VK1Pair", TYPE_VK_PAIR)
935 TYPE("VK2Pair", TYPE_VK_PAIR)
936 TYPE("VK4Pair", TYPE_VK_PAIR)
937 TYPE("VK8Pair", TYPE_VK_PAIR)
938 TYPE("VK16Pair", TYPE_VK_PAIR)
934939 TYPE("vx64mem", TYPE_MVSIBX)
935940 TYPE("vx128mem", TYPE_MVSIBX)
936941 TYPE("vx256mem", TYPE_MVSIBX)
10151020 ENCODING("VK16", ENCODING_RM)
10161021 ENCODING("VK32", ENCODING_RM)
10171022 ENCODING("VK64", ENCODING_RM)
1023 ENCODING("VK1PAIR", ENCODING_RM)
1024 ENCODING("VK2PAIR", ENCODING_RM)
1025 ENCODING("VK4PAIR", ENCODING_RM)
1026 ENCODING("VK8PAIR", ENCODING_RM)
1027 ENCODING("VK16PAIR", ENCODING_RM)
10181028 ENCODING("BNDR", ENCODING_RM)
10191029 errs() << "Unhandled R/M register encoding " << s << "\n";
10201030 llvm_unreachable("Unhandled R/M register encoding");
10491059 ENCODING("VK16", ENCODING_REG)
10501060 ENCODING("VK32", ENCODING_REG)
10511061 ENCODING("VK64", ENCODING_REG)
1062 ENCODING("VK1Pair", ENCODING_REG)
1063 ENCODING("VK2Pair", ENCODING_REG)
1064 ENCODING("VK4Pair", ENCODING_REG)
1065 ENCODING("VK8Pair", ENCODING_REG)
1066 ENCODING("VK16Pair", ENCODING_REG)
10521067 ENCODING("VK1WM", ENCODING_REG)
10531068 ENCODING("VK2WM", ENCODING_REG)
10541069 ENCODING("VK4WM", ENCODING_REG)
10831098 ENCODING("VK16", ENCODING_VVVV)
10841099 ENCODING("VK32", ENCODING_VVVV)
10851100 ENCODING("VK64", ENCODING_VVVV)
1101 ENCODING("VK1PAIR", ENCODING_VVVV)
1102 ENCODING("VK2PAIR", ENCODING_VVVV)
1103 ENCODING("VK4PAIR", ENCODING_VVVV)
1104 ENCODING("VK8PAIR", ENCODING_VVVV)
1105 ENCODING("VK16PAIR", ENCODING_VVVV)
10861106 errs() << "Unhandled VEX.vvvv register encoding " << s << "\n";
10871107 llvm_unreachable("Unhandled VEX.vvvv register encoding");
10881108 }