llvm.org GIT mirror llvm / 376a81d
AVX-512: Added legal type MVT::i1 and VK1 register for it. Added scalar compare VCMPSS, VCMPSD. Implemented LowerSELECT for scalar FP operations. I replaced FSETCCss, FSETCCsd with one node type FSETCCs. Node extract_vector_elt(v16i1/v8i1, idx) returns an element of type i1. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@197384 91177308-0d34-0410-b5e6-96231b3b80d8 Elena Demikhovsky 6 years ago
13 changed file(s) with 220 addition(s) and 101 deletion(s). Raw diff Collapse all Expand all
477477 ENUM_ENTRY(TYPE_XMM128, "16-byte") \
478478 ENUM_ENTRY(TYPE_XMM256, "32-byte") \
479479 ENUM_ENTRY(TYPE_XMM512, "64-byte") \
480 ENUM_ENTRY(TYPE_VK1, "1-bit") \
480481 ENUM_ENTRY(TYPE_VK8, "8-bit") \
481482 ENUM_ENTRY(TYPE_VK16, "16-bit") \
482483 ENUM_ENTRY(TYPE_XMM0, "Implicit use of XMM0") \
13051305 addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
13061306 addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
13071307
1308 addRegisterClass(MVT::i1, &X86::VK1RegClass);
13081309 addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
13091310 addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
13101311
1312 setOperationAction(ISD::BR_CC, MVT::i1, Expand);
1313 setOperationAction(ISD::SETCC, MVT::i1, Custom);
1314 setOperationAction(ISD::XOR, MVT::i1, Legal);
13111315 setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, Legal);
13121316 setOperationAction(ISD::LOAD, MVT::v16f32, Legal);
13131317 setOperationAction(ISD::LOAD, MVT::v8f64, Legal);
13751379
13761380 setOperationAction(ISD::MUL, MVT::v8i64, Custom);
13771381
1382 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i1, Custom);
1383 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i1, Custom);
13781384 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i1, Custom);
13791385 setOperationAction(ISD::BUILD_VECTOR, MVT::v16i1, Custom);
13801386 setOperationAction(ISD::SELECT, MVT::v8f64, Custom);
22202226 RC = &X86::VR128RegClass;
22212227 else if (RegVT == MVT::x86mmx)
22222228 RC = &X86::VR64RegClass;
2229 else if (RegVT == MVT::i1)
2230 RC = &X86::VK1RegClass;
22232231 else if (RegVT == MVT::v8i1)
22242232 RC = &X86::VK8RegClass;
22252233 else if (RegVT == MVT::v16i1)
76687676 return SDValue();
76697677 }
76707678
7679 /// Extract one bit from mask vector, like v16i1 or v8i1.
7680 /// AVX-512 feature.
7681 static SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) {
7682 SDValue Vec = Op.getOperand(0);
7683 SDLoc dl(Vec);
7684 MVT VecVT = Vec.getSimpleValueType();
7685 SDValue Idx = Op.getOperand(1);
7686 MVT EltVT = Op.getSimpleValueType();
7687
7688 assert((EltVT == MVT::i1) && "Unexpected operands in ExtractBitFromMaskVector");
7689
7690 // variable index can't be handled in mask registers,
7691 // extend vector to VR512
7692 if (!isa(Idx)) {
7693 MVT ExtVT = (VecVT == MVT::v8i1 ? MVT::v8i64 : MVT::v16i32);
7694 SDValue Ext = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVT, Vec);
7695 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
7696 ExtVT.getVectorElementType(), Ext, Idx);
7697 return DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt);
7698 }
7699
7700 unsigned IdxVal = cast(Idx)->getZExtValue();
7701 if (IdxVal) {
7702 unsigned MaxSift = VecVT.getSizeInBits() - 1;
7703 Vec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, Vec,
7704 DAG.getConstant(MaxSift - IdxVal, MVT::i8));
7705 Vec = DAG.getNode(X86ISD::VSRLI, dl, VecVT, Vec,
7706 DAG.getConstant(MaxSift, MVT::i8));
7707 }
7708 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i1, Vec,
7709 DAG.getIntPtrConstant(0));
7710 }
7711
76717712 SDValue
76727713 X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
76737714 SelectionDAG &DAG) const {
76757716 SDValue Vec = Op.getOperand(0);
76767717 MVT VecVT = Vec.getSimpleValueType();
76777718 SDValue Idx = Op.getOperand(1);
7719
7720 if (Op.getSimpleValueType() == MVT::i1)
7721 return ExtractBitFromMaskVector(Op, DAG);
7722
76787723 if (!isa(Idx)) {
76797724 if (VecVT.is512BitVector() ||
76807725 (VecVT.is256BitVector() && Subtarget->hasInt256() &&
96809725 /// equivalent.
96819726 SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
96829727 SelectionDAG &DAG) const {
9683 if (ConstantSDNode *C = dyn_cast(Op1))
9728 SDLoc dl(Op0);
9729 if (ConstantSDNode *C = dyn_cast(Op1)) {
96849730 if (C->getAPIntValue() == 0)
96859731 return EmitTest(Op0, X86CC, DAG);
96869732
9687 SDLoc dl(Op0);
9733 if (Op0.getValueType() == MVT::i1) {
9734 Op0 = DAG.getNode(ISD::XOR, dl, MVT::i1, Op0, DAG.getConstant(-1, MVT::i1));
9735 return DAG.getNode(X86ISD::CMP, dl, MVT::i1, Op0, Op0);
9736 }
9737 }
9738
96889739 if ((Op0.getValueType() == MVT::i8 || Op0.getValueType() == MVT::i16 ||
96899740 Op0.getValueType() == MVT::i32 || Op0.getValueType() == MVT::i64)) {
96909741 // Do the comparison at i32 if it's smaller. This avoids subregister
1012010171
1012110172 if (VT.isVector()) return LowerVSETCC(Op, Subtarget, DAG);
1012210173
10123 assert(VT == MVT::i8 && "SetCC type must be 8-bit integer");
10174 assert((VT == MVT::i8 || (Subtarget->hasAVX512() && VT == MVT::i1))
10175 && "SetCC type must be 8-bit or 1-bit integer");
1012410176 SDValue Op0 = Op.getOperand(0);
1012510177 SDValue Op1 = Op.getOperand(1);
1012610178 SDLoc dl(Op);
1023310285 cast(Cond.getOperand(2))->get(), CondOp0, CondOp1);
1023410286
1023510287 if (SSECC != 8) {
10236 unsigned Opcode = VT == MVT::f32 ? X86ISD::FSETCCss : X86ISD::FSETCCsd;
10237 SDValue Cmp = DAG.getNode(Opcode, DL, VT, CondOp0, CondOp1,
10288 if (Subtarget->hasAVX512()) {
10289 SDValue Cmp = DAG.getNode(X86ISD::FSETCC, DL, MVT::i1, CondOp0, CondOp1,
10290 DAG.getConstant(SSECC, MVT::i8));
10291 return DAG.getNode(X86ISD::SELECT, DL, VT, Cmp, Op1, Op2);
10292 }
10293 SDValue Cmp = DAG.getNode(X86ISD::FSETCC, DL, VT, CondOp0, CondOp1,
1023810294 DAG.getConstant(SSECC, MVT::i8));
1023910295 SDValue AndN = DAG.getNode(X86ISD::FANDN, DL, VT, Cmp, Op2);
1024010296 SDValue And = DAG.getNode(X86ISD::FAND, DL, VT, Cmp, Op1);
1377313829 case X86ISD::CMPMU: return "X86ISD::CMPMU";
1377413830 case X86ISD::SETCC: return "X86ISD::SETCC";
1377513831 case X86ISD::SETCC_CARRY: return "X86ISD::SETCC_CARRY";
13776 case X86ISD::FSETCCsd: return "X86ISD::FSETCCsd";
13777 case X86ISD::FSETCCss: return "X86ISD::FSETCCss";
13832 case X86ISD::FSETCC: return "X86ISD::FSETCC";
1377813833 case X86ISD::CMOV: return "X86ISD::CMOV";
1377913834 case X86ISD::BRCOND: return "X86ISD::BRCOND";
1378013835 case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG";
1386913924 case X86ISD::TESTP: return "X86ISD::TESTP";
1387013925 case X86ISD::TESTM: return "X86ISD::TESTM";
1387113926 case X86ISD::KORTEST: return "X86ISD::KORTEST";
13872 case X86ISD::KTEST: return "X86ISD::KTEST";
1387313927 case X86ISD::PALIGNR: return "X86ISD::PALIGNR";
1387413928 case X86ISD::PSHUFD: return "X86ISD::PSHUFD";
1387513929 case X86ISD::PSHUFHW: return "X86ISD::PSHUFHW";
1641916473 EltNo);
1642016474 }
1642116475
16422 /// Extract one bit from mask vector, like v16i1 or v8i1.
16423 /// AVX-512 feature.
16424 static SDValue ExtractBitFromMaskVector(SDNode *N, SelectionDAG &DAG) {
16425 SDValue Vec = N->getOperand(0);
16426 SDLoc dl(Vec);
16427 MVT VecVT = Vec.getSimpleValueType();
16428 SDValue Idx = N->getOperand(1);
16429 MVT EltVT = N->getSimpleValueType(0);
16430
16431 assert((VecVT.getVectorElementType() == MVT::i1 && EltVT == MVT::i8) ||
16432 "Unexpected operands in ExtractBitFromMaskVector");
16433
16434 // variable index
16435 if (!isa(Idx)) {
16436 MVT ExtVT = (VecVT == MVT::v8i1 ? MVT::v8i64 : MVT::v16i32);
16437 SDValue Ext = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVT, Vec);
16438 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
16439 ExtVT.getVectorElementType(), Ext);
16440 return DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt);
16441 }
16442
16443 unsigned IdxVal = cast(Idx)->getZExtValue();
16444
16445 MVT ScalarVT = MVT::getIntegerVT(VecVT.getSizeInBits());
16446 unsigned MaxShift = VecVT.getSizeInBits() - 1;
16447 Vec = DAG.getNode(ISD::BITCAST, dl, ScalarVT, Vec);
16448 Vec = DAG.getNode(ISD::SHL, dl, ScalarVT, Vec,
16449 DAG.getConstant(MaxShift - IdxVal, ScalarVT));
16450 Vec = DAG.getNode(ISD::SRL, dl, ScalarVT, Vec,
16451 DAG.getConstant(MaxShift, ScalarVT));
16452
16453 if (VecVT == MVT::v16i1) {
16454 Vec = DAG.getNode(ISD::BITCAST, dl, MVT::i16, Vec);
16455 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Vec);
16456 }
16457 return DAG.getNode(ISD::BITCAST, dl, MVT::i8, Vec);
16458 }
16459
1646016476 /// PerformEXTRACT_VECTOR_ELTCombine - Detect vector gather/scatter index
1646116477 /// generation and convert it from being a bunch of shuffles and extracts
1646216478 /// to a simple store and scalar loads to extract the elements.
1646716483 return NewOp;
1646816484
1646916485 SDValue InputVector = N->getOperand(0);
16470
16471 if (InputVector.getValueType().getVectorElementType() == MVT::i1 &&
16472 !DCI.isBeforeLegalize())
16473 return ExtractBitFromMaskVector(N, DAG);
1647416486
1647516487 // Detect whether we are trying to convert from mmx to i32 and the bitcast
1647616488 // from mmx to v2i32 has a single usage.
1761517627 if ((cc0 == X86::COND_E && cc1 == X86::COND_NP) ||
1761617628 (cc0 == X86::COND_NE && cc1 == X86::COND_P)) {
1761717629 bool is64BitFP = (CMP00.getValueType() == MVT::f64);
17618 X86ISD::NodeType NTOperator = is64BitFP ?
17619 X86ISD::FSETCCsd : X86ISD::FSETCCss;
1762017630 // FIXME: need symbolic constants for these magic numbers.
1762117631 // See X86ATTInstPrinter.cpp:printSSECC().
1762217632 unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4;
17623 SDValue OnesOrZeroesF = DAG.getNode(NTOperator, DL, MVT::f32, CMP00, CMP01,
17633 SDValue OnesOrZeroesF = DAG.getNode(X86ISD::FSETCC, DL, CMP00.getValueType(), CMP00, CMP01,
1762417634 DAG.getConstant(x86cc, MVT::i8));
17625 SDValue OnesOrZeroesI = DAG.getNode(ISD::BITCAST, DL, MVT::i32,
17635 MVT IntVT = (is64BitFP ? MVT::i64 : MVT::i32);
17636 SDValue OnesOrZeroesI = DAG.getNode(ISD::BITCAST, DL, IntVT,
1762617637 OnesOrZeroesF);
17627 SDValue ANDed = DAG.getNode(ISD::AND, DL, MVT::i32, OnesOrZeroesI,
17628 DAG.getConstant(1, MVT::i32));
17638 SDValue ANDed = DAG.getNode(ISD::AND, DL, IntVT, OnesOrZeroesI,
17639 DAG.getConstant(1, IntVT));
1762917640 SDValue OneBitOfTruth = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ANDed);
1763017641 return OneBitOfTruth;
1763117642 }
9393 /// operand, usually produced by a CMP instruction.
9494 SETCC,
9595
96 /// X86 Select
97 SELECT,
98
9699 // Same as SETCC except it's materialized with a sbb and the value is all
97100 // one's or all zero's.
98101 SETCC_CARRY, // R = carry_bit ? ~0 : 0
100103 /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
101104 /// Operands are two FP values to compare; result is a mask of
102105 /// 0s or 1s. Generally DTRT for C/C++ with NaNs.
103 FSETCCss, FSETCCsd,
106 FSETCC,
104107
105108 /// X86 MOVMSK{pd|ps}, extracts sign bits of two or four FP values,
106109 /// result in an integer GPR. Needs masking for scalar result.
313316
314317 // OR/AND test for masks
315318 KORTEST,
316 KTEST,
317319
318320 // Several flavors of instructions with vector shuffle behaviors.
319321 PALIGNR,
682682 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
683683 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
684684 }
685 //===----------------------------------------------------------------------===//
686 // Compare Instructions
687 //===----------------------------------------------------------------------===//
688
689 // avx512_cmp_scalar - AVX512 CMPSS and CMPSD
690 multiclass avx512_cmp_scalar
691 Operand CC, SDNode OpNode, ValueType VT,
692 PatFrag ld_frag, string asm, string asm_alt> {
693 def rr : AVX512Ii8<0xC2, MRMSrcReg,
694 (outs VK1:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
695 [(set VK1:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))],
696 IIC_SSE_ALU_F32S_RR>, EVEX_4V;
697 def rm : AVX512Ii8<0xC2, MRMSrcMem,
698 (outs VK1:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
699 [(set VK1:$dst, (OpNode (VT RC:$src1),
700 (ld_frag addr:$src2), imm:$cc))], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
701 let neverHasSideEffects = 1 in {
702 def rri_alt : AVX512Ii8<0xC2, MRMSrcReg,
703 (outs VK1:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
704 asm_alt, [], IIC_SSE_ALU_F32S_RR>, EVEX_4V;
705 def rmi_alt : AVX512Ii8<0xC2, MRMSrcMem,
706 (outs VK1:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
707 asm_alt, [], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
708 }
709 }
710
711 let Predicates = [HasAVX512] in {
712 defm VCMPSSZ : avx512_cmp_scalar
713 "vcmp${cc}ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
714 "vcmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
715 XS;
716 defm VCMPSDZ : avx512_cmp_scalar
717 "vcmp${cc}sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
718 "vcmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
719 XD, VEX_W;
720 }
685721
686722 multiclass avx512_icmp_packed opc, string OpcodeStr, RegisterClass KRC,
687723 RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag,
733769 // Accept explicit immediate argument form instead of comparison code.
734770 let neverHasSideEffects = 1 in {
735771 def rri_alt : AVX512AIi8
736 (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
772 (outs KRC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
737773 asm_alt, [], IIC_SSE_ALU_F32P_RR>, EVEX_4V;
738774 def rmi_alt : AVX512AIi8
739 (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
775 (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
740776 asm_alt, [], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
741777 }
742778 }
863899 def : Pat<(store (v16i1 VK16:$src), addr:$dst),
864900 (KMOVWmk addr:$dst, VK16:$src)>;
865901
866 def : Pat<(store (v8i1 VK8:$src), addr:$dst),
867 (KMOVWmk addr:$dst, (v16i1 (COPY_TO_REGCLASS VK8:$src, VK16)))>;
902 def : Pat<(store VK8:$src, addr:$dst),
903 (KMOVWmk addr:$dst, (COPY_TO_REGCLASS VK8:$src, VK16))>;
904
905 def : Pat<(i1 (load addr:$src)),
906 (COPY_TO_REGCLASS (KMOVWkm addr:$src), VK1)>;
907
908 def : Pat<(v8i1 (load addr:$src)),
909 (COPY_TO_REGCLASS (KMOVWkm addr:$src), VK8)>;
868910 }
869911 // With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
870912 let Predicates = [HasAVX512] in {
877919 (EXTRACT_SUBREG
878920 (KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16)),
879921 sub_8bit)>;
922
923 def : Pat<(i1 (extractelt VK16:$src, (iPTR 0))),
924 (COPY_TO_REGCLASS VK16:$src, VK1)>;
925 def : Pat<(i1 (extractelt VK8:$src, (iPTR 0))),
926 (COPY_TO_REGCLASS VK8:$src, VK1)>;
927
880928 }
881929
882930 // Mask unary operation
944992 defm KXOR : avx512_mask_binop_w<0x47, "kxor", xor>;
945993 }
946994
995 def : Pat<(xor VK1:$src1, VK1:$src2),
996 (COPY_TO_REGCLASS (KXORWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
997 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
998
999 def : Pat<(or VK1:$src1, VK1:$src2),
1000 (COPY_TO_REGCLASS (KORWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
1001 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
1002
1003 def : Pat<(not VK1:$src),
1004 (COPY_TO_REGCLASS (KXORWrr (COPY_TO_REGCLASS VK1:$src, VK16),
1005 (COPY_TO_REGCLASS (VCMPSSZrr (f32 (IMPLICIT_DEF)),
1006 (f32 (IMPLICIT_DEF)), (i8 0)), VK16)), VK1)>;
1007
9471008 multiclass avx512_mask_binop_int {
9481009 let Predicates = [HasAVX512] in
9491010 def : Pat<(!cast("int_x86_avx512_"##IntName##"_w")
10151076 }
10161077
10171078 defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest>;
1018 defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest>;
1079
1080 def : Pat<(X86cmp VK1:$src1, VK1:$src2),
1081 (KORTESTWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
1082 (COPY_TO_REGCLASS VK1:$src2, VK16))>;
10191083
10201084 // Mask shift
10211085 multiclass avx512_mask_shiftop opc, string OpcodeStr, RegisterClass KRC,
10331097 VEX, OpSize, TA, VEX_W;
10341098 }
10351099
1036 defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", shl>;
1037 defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", srl>;
1100 defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86vshli>;
1101 defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86vsrli>;
10381102
10391103 // Mask setting all 0s or 1s
10401104 multiclass avx512_mask_setop {
10451109 }
10461110
10471111 multiclass avx512_mask_setop_w {
1048 defm B : avx512_mask_setopv8i1, Val>;
1112 defm B : avx512_mask_setop v8i1, Val>;
10491113 defm W : avx512_mask_setop;
10501114 }
10511115
13401404 [(set VR128X:$dst, (vt (OpNode VR128X:$src1,
13411405 (scalar_to_vector RC:$src2))))],
13421406 IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG;
1407 let Constraints = "$src1 = $dst" in
1408 def rrk : SI<0x10, MRMSrcReg, (outs VR128X:$dst),
1409 (ins VR128X:$src1, VK1WM:$mask, RC:$src2, RC:$src3),
1410 !strconcat(asm,
1411 "\t{$src3, $src2, $dst {${mask}}|$dst {${mask}}, $src2, $src3}"),
1412 [], IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG, EVEX_K;
13431413 def rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
13441414 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
13451415 [(set RC:$dst, (mem_pat addr:$src))], IIC_SSE_MOV_S_RM>,
13581428 defm VMOVSDZ : avx512_move_scalar<"movsd", FR64X, X86Movsd, v2f64, f64mem,
13591429 loadf64>, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
13601430
1431 def : Pat<(f32 (X86select VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
1432 (COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
1433 VK1WM:$mask, (f32 (IMPLICIT_DEF)), FR32X:$src1), FR32X)>;
1434
1435 def : Pat<(f64 (X86select VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
1436 (COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
1437 VK1WM:$mask, (f64 (IMPLICIT_DEF)), FR64X:$src1), FR64X)>;
13611438
13621439 // For the disassembler
13631440 let isCodeGenOnly = 1 in {
5858 def X86hsub : SDNode<"X86ISD::HSUB", SDTIntBinOp>;
5959 def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>;
6060 def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>;
61 def X86cmpss : SDNode<"X86ISD::FSETCCss", SDTX86Cmpss>;
62 def X86cmpsd : SDNode<"X86ISD::FSETCCsd", SDTX86Cmpsd>;
61 def X86cmps : SDNode<"X86ISD::FSETCC", SDTX86Cmps>;
62 //def X86cmpsd : SDNode<"X86ISD::FSETCCsd", SDTX86Cmpsd>;
6363 def X86pshufb : SDNode<"X86ISD::PSHUFB",
6464 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
6565 SDTCisSameAs<0,2>]>>;
129129 def X86pcmpeqm : SDNode<"X86ISD::PCMPEQM", X86IntCmpMask, [SDNPCommutative]>;
130130 def X86pcmpgtm : SDNode<"X86ISD::PCMPGTM", X86IntCmpMask>;
131131
132 def X86CmpMaskCC : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
132 def X86CmpMaskCC :
133 SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
134 def X86CmpMaskCCScalar :
135 SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
136
133137 def X86cmpm : SDNode<"X86ISD::CMPM", X86CmpMaskCC>;
134138 def X86cmpmu : SDNode<"X86ISD::CMPMU", X86CmpMaskCC>;
139 def X86cmpms : SDNode<"X86ISD::FSETCC", X86CmpMaskCCScalar>;
135140
136141 def X86vshl : SDNode<"X86ISD::VSHL",
137142 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
154159 def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
155160 def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>;
156161 def X86kortest : SDNode<"X86ISD::KORTEST", SDTX86CmpPTest>;
157 def X86ktest : SDNode<"X86ISD::KTEST", SDTX86CmpPTest>;
158 def X86testm : SDNode<"X86ISD::TESTM", SDTypeProfile<1, 2, [SDTCisVec<0>,
162 def X86testm : SDNode<"X86ISD::TESTM", SDTypeProfile<1, 2, [SDTCisVec<0>,
159163 SDTCisVec<1>,
160164 SDTCisSameAs<2, 1>]>>;
165 def X86select : SDNode<"X86ISD::SELECT" , SDTSelect>;
161166
162167 def X86pmuludq : SDNode<"X86ISD::PMULUDQ",
163168 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
30143014 return 0;
30153015 }
30163016
3017 inline static bool MaskRegClassContains(unsigned Reg) {
3018 return X86::VK8RegClass.contains(Reg) ||
3019 X86::VK16RegClass.contains(Reg) ||
3020 X86::VK1RegClass.contains(Reg);
3021 }
30173022 static
30183023 unsigned copyPhysRegOpcode_AVX512(unsigned& DestReg, unsigned& SrcReg) {
30193024 if (X86::VR128XRegClass.contains(DestReg, SrcReg) ||
30233028 SrcReg = get512BitSuperRegister(SrcReg);
30243029 return X86::VMOVAPSZrr;
30253030 }
3026 if ((X86::VK8RegClass.contains(DestReg) ||
3027 X86::VK16RegClass.contains(DestReg)) &&
3028 (X86::VK8RegClass.contains(SrcReg) ||
3029 X86::VK16RegClass.contains(SrcReg)))
3031 if (MaskRegClassContains(DestReg) &&
3032 MaskRegClassContains(SrcReg))
30303033 return X86::KMOVWkk;
3031 if ((X86::VK8RegClass.contains(DestReg) ||
3032 X86::VK16RegClass.contains(DestReg)) &&
3034 if (MaskRegClassContains(DestReg) &&
30333035 (X86::GR32RegClass.contains(SrcReg) ||
30343036 X86::GR16RegClass.contains(SrcReg) ||
30353037 X86::GR8RegClass.contains(SrcReg))) {
30393041 if ((X86::GR32RegClass.contains(DestReg) ||
30403042 X86::GR16RegClass.contains(DestReg) ||
30413043 X86::GR8RegClass.contains(DestReg)) &&
3042 (X86::VK8RegClass.contains(SrcReg) ||
3043 X86::VK16RegClass.contains(SrcReg))) {
3044 MaskRegClassContains(SrcReg)) {
30443045 DestReg = getX86SubSuperRegister(DestReg, MVT::i32);
30453046 return X86::KMOVWrk;
30463047 }
2222
2323 def SDTX86CmpTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisSameAs<1, 2>]>;
2424
25 def SDTX86Cmpsd : SDTypeProfile<1, 3, [SDTCisVT<0, f64>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
26 def SDTX86Cmpss : SDTypeProfile<1, 3, [SDTCisVT<0, f32>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
25 def SDTX86Cmps : SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
26 //def SDTX86Cmpss : SDTypeProfile<1, 3, [SDTCisVT<0, f32>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
2727
2828 def SDTX86Cmov : SDTypeProfile<1, 4,
2929 [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
22982298 }
22992299 }
23002300
2301 defm VCMPSS : sse12_cmp_scalars, f32, loadf32,
2301 defm VCMPSS : sse12_cmp_scalar, f32, loadf32,
23022302 "cmp${cc}ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
23032303 "cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
23042304 SSE_ALU_F32S>,
23052305 XS, VEX_4V, VEX_LIG;
2306 defm VCMPSD : sse12_cmp_scalard, f64, loadf64,
2306 defm VCMPSD : sse12_cmp_scalar, f64, loadf64,
23072307 "cmp${cc}sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
23082308 "cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
23092309 SSE_ALU_F32S>, // same latency as 32 bit compare
23102310 XD, VEX_4V, VEX_LIG;
23112311
23122312 let Constraints = "$src1 = $dst" in {
2313 defm CMPSS : sse12_cmp_scalars, f32, loadf32,
2313 defm CMPSS : sse12_cmp_scalar, f32, loadf32,
23142314 "cmp${cc}ss\t{$src2, $dst|$dst, $src2}",
23152315 "cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}", SSE_ALU_F32S>,
23162316 XS;
2317 defm CMPSD : sse12_cmp_scalard, f64, loadf64,
2317 defm CMPSD : sse12_cmp_scalar, f64, loadf64,
23182318 "cmp${cc}sd\t{$src2, $dst|$dst, $src2}",
23192319 "cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
23202320 SSE_ALU_F64S>,
462462 def VR256X : RegisterClass<"X86", [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
463463 256, (sequence "YMM%u", 0, 31)>;
464464
465 def VK1 : RegisterClass<"X86", [i1], 1, (sequence "K%u", 0, 7)>;
465466 def VK8 : RegisterClass<"X86", [v8i1], 8, (sequence "K%u", 0, 7)>;
466467 def VK16 : RegisterClass<"X86", [v16i1], 16, (add VK8)>;
467468
469 def VK1WM : RegisterClass<"X86", [i1], 1, (sub VK1, K0)>;
468470 def VK8WM : RegisterClass<"X86", [v8i1], 8, (sub VK8, K0)>;
469471 def VK16WM : RegisterClass<"X86", [v16i1], 16, (add VK8WM)>;
470472
9898 }
9999
100100 ;CHECK-LABEL: test11
101 ;CHECK: movl $260
102 ;CHECK: bextrl
103 ;CHECK: movl $268
104 ;CHECK: bextrl
101 ;CHECK: vpcmpltud
102 ;CKECK: kshiftlw $11
103 ;CKECK: kshiftrw $15
104 ;CHECK: kxorw
105 ;CHECK: kortestw
106 ;CHECK: jne
107 ;CHECK: ret
105108 ;CHECK: ret
106109 define <16 x i32> @test11(<16 x i32>%a, <16 x i32>%b) {
107110 %cmp_res = icmp ult <16 x i32> %a, %b
108111 %ia = extractelement <16 x i1> %cmp_res, i32 4
109 %ib = extractelement <16 x i1> %cmp_res, i32 12
110
111112 br i1 %ia, label %A, label %B
112
113113 A:
114114 ret <16 x i32>%b
115115 B:
116116 %c = add <16 x i32>%b, %a
117 br i1 %ib, label %C, label %D
118 C:
119 %c1 = sub <16 x i32>%c, %a
120 ret <16 x i32>%c1
121 D:
122 %c2 = mul <16 x i32>%c, %a
123 ret <16 x i32>%c2
117 ret <16 x i32>%c
124118 }
1919 ret <8 x i64> %res
2020 }
2121
22 ; CHECK-LABEL: @select02
23 ; CHECK: cmpless %xmm0, %xmm3, %k1
24 ; CHECK-NEXT: vmovss %xmm2, {{.*}}%xmm1 {%k1}
25 ; CHECK: ret
26 define float @select02(float %a, float %b, float %c, float %eps) {
27 %cmp = fcmp oge float %a, %eps
28 %cond = select i1 %cmp, float %c, float %b
29 ret float %cond
30 }
31
32 ; CHECK-LABEL: @select03
33 ; CHECK: cmplesd %xmm0, %xmm3, %k1
34 ; CHECK-NEXT: vmovsd %xmm2, {{.*}}%xmm1 {%k1}
35 ; CHECK: ret
36 define double @select03(double %a, double %b, double %c, double %eps) {
37 %cmp = fcmp oge double %a, %eps
38 %cond = select i1 %cmp, double %c, double %b
39 ret double %cond
40 }
77 %e = sitofp i32 %i to double
88 ; CHECK: cmpeqsd
99 %c = fcmp oeq double %d, %e
10 ; CHECK-NEXT: movd
11 ; CHECK-NEXT: andl
10 ; CHECK-NEXT: movq
11 ; CHECK-NEXT: andq
1212 %z = zext i1 %c to i32
1313 ret i32 %z
1414 }
12681268 TYPE("VR256", TYPE_XMM256)
12691269 TYPE("VR256X", TYPE_XMM256)
12701270 TYPE("VR512", TYPE_XMM512)
1271 TYPE("VK1", TYPE_VK1)
1272 TYPE("VK1WM", TYPE_VK1)
12711273 TYPE("VK8", TYPE_VK8)
12721274 TYPE("VK8WM", TYPE_VK8)
12731275 TYPE("VK16", TYPE_VK16)
13391341 ENCODING("VR256", ENCODING_RM)
13401342 ENCODING("VR256X", ENCODING_RM)
13411343 ENCODING("VR512", ENCODING_RM)
1344 ENCODING("VK1", ENCODING_RM)
13421345 ENCODING("VK8", ENCODING_RM)
13431346 ENCODING("VK16", ENCODING_RM)
13441347 errs() << "Unhandled R/M register encoding " << s << "\n";
13661369 ENCODING("FR64X", ENCODING_REG)
13671370 ENCODING("FR32X", ENCODING_REG)
13681371 ENCODING("VR512", ENCODING_REG)
1372 ENCODING("VK1", ENCODING_REG)
13691373 ENCODING("VK8", ENCODING_REG)
13701374 ENCODING("VK16", ENCODING_REG)
1375 ENCODING("VK1WM", ENCODING_REG)
13711376 ENCODING("VK8WM", ENCODING_REG)
13721377 ENCODING("VK16WM", ENCODING_REG)
13731378 errs() << "Unhandled reg/opcode register encoding " << s << "\n";
13881393 ENCODING("VR128X", ENCODING_VVVV)
13891394 ENCODING("VR256X", ENCODING_VVVV)
13901395 ENCODING("VR512", ENCODING_VVVV)
1396 ENCODING("VK1", ENCODING_VVVV)
13911397 ENCODING("VK8", ENCODING_VVVV)
13921398 ENCODING("VK16", ENCODING_VVVV)
13931399 errs() << "Unhandled VEX.vvvv register encoding " << s << "\n";
13971403 OperandEncoding RecognizableInstr::writemaskRegisterEncodingFromString
13981404 (const std::string &s,
13991405 bool hasOpSizePrefix) {
1406 ENCODING("VK1WM", ENCODING_WRITEMASK)
14001407 ENCODING("VK8WM", ENCODING_WRITEMASK)
14011408 ENCODING("VK16WM", ENCODING_WRITEMASK)
14021409 errs() << "Unhandled mask register encoding " << s << "\n";