llvm.org GIT mirror llvm / 9c7ae01
Cleanup pcmp(e/i)str(m/i) instruction definitions and load folding support. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@167652 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 7 years ago
5 changed file(s) with 245 addition(s) and 172 deletion(s). Raw diff Collapse all Expand all
26782678
26792679 return Result;
26802680 }
2681
2682 // FIXME: Custom handling because TableGen doesn't support multiple implicit
2683 // defs in an instruction pattern
2684 case X86ISD::PCMPESTRI: {
2685 SDValue N0 = Node->getOperand(0);
2686 SDValue N1 = Node->getOperand(1);
2687 SDValue N2 = Node->getOperand(2);
2688 SDValue N3 = Node->getOperand(3);
2689 SDValue N4 = Node->getOperand(4);
2690
2691 // Make sure last argument is a constant
2692 ConstantSDNode *Cst = dyn_cast(N4);
2693 if (!Cst)
2694 break;
2695
2696 uint64_t Imm = Cst->getZExtValue();
2697
2698 SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
2699 X86::EAX, N1, SDValue()).getValue(1);
2700 InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EDX,
2701 N3, InFlag).getValue(1);
2702
2703 SDValue Ops[] = { N0, N2, getI8Imm(Imm), InFlag };
2704 unsigned Opc = Subtarget->hasAVX() ? X86::VPCMPESTRIrr :
2705 X86::PCMPESTRIrr;
2706 InFlag = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Ops,
2707 array_lengthof(Ops)), 0);
2708
2709 if (!SDValue(Node, 0).use_empty()) {
2710 SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
2711 X86::ECX, NVT, InFlag);
2712 InFlag = Result.getValue(2);
2713 ReplaceUses(SDValue(Node, 0), Result);
2714 }
2715 if (!SDValue(Node, 1).use_empty()) {
2716 SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
2717 X86::EFLAGS, NVT, InFlag);
2718 InFlag = Result.getValue(2);
2719 ReplaceUses(SDValue(Node, 1), Result);
2720 }
2721
2722 return NULL;
2723 }
2724
2725 // FIXME: Custom handling because TableGen doesn't support multiple implicit
2726 // defs in an instruction pattern
2727 case X86ISD::PCMPISTRI: {
2728 SDValue N0 = Node->getOperand(0);
2729 SDValue N1 = Node->getOperand(1);
2730 SDValue N2 = Node->getOperand(2);
2731
2732 // Make sure last argument is a constant
2733 ConstantSDNode *Cst = dyn_cast(N2);
2734 if (!Cst)
2735 break;
2736
2737 uint64_t Imm = Cst->getZExtValue();
2738
2739 SDValue Ops[] = { N0, N1, getI8Imm(Imm) };
2740 unsigned Opc = Subtarget->hasAVX() ? X86::VPCMPISTRIrr :
2741 X86::PCMPISTRIrr;
2742 SDValue InFlag = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Ops,
2743 array_lengthof(Ops)), 0);
2744
2745 if (!SDValue(Node, 0).use_empty()) {
2746 SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
2747 X86::ECX, NVT, InFlag);
2748 InFlag = Result.getValue(2);
2749 ReplaceUses(SDValue(Node, 0), Result);
2750 }
2751 if (!SDValue(Node, 1).use_empty()) {
2752 SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
2753 X86::EFLAGS, NVT, InFlag);
2754 InFlag = Result.getValue(2);
2755 ReplaceUses(SDValue(Node, 1), Result);
2756 }
2757
2758 return NULL;
2759 }
27602681 }
27612682
27622683 SDNode *ResNode = SelectCode(Node);
1204412044 case X86ISD::FNMSUB: return "X86ISD::FNMSUB";
1204512045 case X86ISD::FMADDSUB: return "X86ISD::FMADDSUB";
1204612046 case X86ISD::FMSUBADD: return "X86ISD::FMSUBADD";
12047 case X86ISD::PCMPESTRI: return "X86ISD::PCMPESTRI";
12048 case X86ISD::PCMPISTRI: return "X86ISD::PCMPISTRI";
1204712049 }
1204812050 }
1204912051
1283812840 // or XMM0_V32I8 in AVX all of this code can be replaced with that
1283912841 // in the .td file.
1284012842 MachineBasicBlock *
12841 X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB,
12842 unsigned numArgs, bool memArg) const {
12843 X86TargetLowering::EmitPCMPSTRM(MachineInstr *MI, MachineBasicBlock *BB,
12844 bool Implicit, bool MemArg) const {
1284312845 assert(Subtarget->hasSSE42() &&
1284412846 "Target must have SSE4.2 or AVX features enabled");
1284512847
1284712849 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
1284812850 unsigned Opc;
1284912851 if (!Subtarget->hasAVX()) {
12850 if (memArg)
12851 Opc = numArgs == 3 ? X86::PCMPISTRM128rm : X86::PCMPESTRM128rm;
12852 if (MemArg)
12853 Opc = Implicit ? X86::PCMPISTRM128rm : X86::PCMPESTRM128rm;
1285212854 else
12853 Opc = numArgs == 3 ? X86::PCMPISTRM128rr : X86::PCMPESTRM128rr;
12855 Opc = Implicit ? X86::PCMPISTRM128rr : X86::PCMPESTRM128rr;
1285412856 } else {
12855 if (memArg)
12856 Opc = numArgs == 3 ? X86::VPCMPISTRM128rm : X86::VPCMPESTRM128rm;
12857 if (MemArg)
12858 Opc = Implicit ? X86::VPCMPISTRM128rm : X86::VPCMPESTRM128rm;
1285712859 else
12858 Opc = numArgs == 3 ? X86::VPCMPISTRM128rr : X86::VPCMPESTRM128rr;
12859 }
12860 Opc = Implicit ? X86::VPCMPISTRM128rr : X86::VPCMPESTRM128rr;
12861 }
12862
12863 unsigned NumArgs = Implicit ? 3 : 5;
12864 if (MemArg)
12865 NumArgs += X86::AddrNumOperands;
1286012866
1286112867 MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(Opc));
12862 for (unsigned i = 0; i < numArgs; ++i) {
12868 for (unsigned i = 0; i < NumArgs; ++i) {
1286312869 MachineOperand &Op = MI->getOperand(i+1);
1286412870 if (!(Op.isReg() && Op.isImplicit()))
1286512871 MIB.addOperand(Op);
1286612872 }
12873 if (MemArg)
12874 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
12875
1286712876 BuildMI(*BB, MI, dl,
1286812877 TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg())
1286912878 .addReg(X86::XMM0);
12879
12880 MI->eraseFromParent();
12881 return BB;
12882 }
12883
12884 // FIXME: Custom handling because TableGen doesn't support multiple implicit
12885 // defs in an instruction pattern
12886 MachineBasicBlock *
12887 X86TargetLowering::EmitPCMPSTRI(MachineInstr *MI, MachineBasicBlock *BB,
12888 bool Implicit, bool MemArg) const {
12889 assert(Subtarget->hasSSE42() &&
12890 "Target must have SSE4.2 or AVX features enabled");
12891
12892 DebugLoc dl = MI->getDebugLoc();
12893 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
12894 unsigned Opc;
12895 if (!Subtarget->hasAVX()) {
12896 if (MemArg)
12897 Opc = Implicit ? X86::PCMPISTRIrm : X86::PCMPESTRIrm;
12898 else
12899 Opc = Implicit ? X86::PCMPISTRIrr : X86::PCMPESTRIrr;
12900 } else {
12901 if (MemArg)
12902 Opc = Implicit ? X86::VPCMPISTRIrm : X86::VPCMPESTRIrm;
12903 else
12904 Opc = Implicit ? X86::VPCMPISTRIrr : X86::VPCMPESTRIrr;
12905 }
12906
12907 unsigned NumArgs = Implicit ? 3 : 5;
12908 if (MemArg)
12909 NumArgs += X86::AddrNumOperands;
12910
12911 MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(Opc));
12912 for (unsigned i = 0; i < NumArgs; ++i) {
12913 MachineOperand &Op = MI->getOperand(i+1);
12914 if (!(Op.isReg() && Op.isImplicit()))
12915 MIB.addOperand(Op);
12916 }
12917 if (MemArg)
12918 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
12919
12920 BuildMI(*BB, MI, dl,
12921 TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg())
12922 .addReg(X86::ECX);
1287012923
1287112924 MI->eraseFromParent();
1287212925 return BB;
1389013943 case X86::VPCMPESTRM128REG:
1389113944 case X86::PCMPESTRM128MEM:
1389213945 case X86::VPCMPESTRM128MEM: {
13893 unsigned NumArgs;
13894 bool MemArg;
13946 bool Implicit, MemArg;
1389513947 switch (MI->getOpcode()) {
1389613948 default: llvm_unreachable("illegal opcode!");
1389713949 case X86::PCMPISTRM128REG:
1389813950 case X86::VPCMPISTRM128REG:
13899 NumArgs = 3; MemArg = false; break;
13951 Implicit = true; MemArg = false; break;
1390013952 case X86::PCMPISTRM128MEM:
1390113953 case X86::VPCMPISTRM128MEM:
13902 NumArgs = 3; MemArg = true; break;
13954 Implicit = true; MemArg = true; break;
1390313955 case X86::PCMPESTRM128REG:
1390413956 case X86::VPCMPESTRM128REG:
13905 NumArgs = 5; MemArg = false; break;
13957 Implicit = false; MemArg = false; break;
1390613958 case X86::PCMPESTRM128MEM:
1390713959 case X86::VPCMPESTRM128MEM:
13908 NumArgs = 5; MemArg = true; break;
13909 }
13910 return EmitPCMP(MI, BB, NumArgs, MemArg);
13960 Implicit = false; MemArg = true; break;
13961 }
13962 return EmitPCMPSTRM(MI, BB, Implicit, MemArg);
13963 }
13964
13965 // String/text processing lowering.
13966 case X86::PCMPISTRIREG:
13967 case X86::VPCMPISTRIREG:
13968 case X86::PCMPISTRIMEM:
13969 case X86::VPCMPISTRIMEM:
13970 case X86::PCMPESTRIREG:
13971 case X86::VPCMPESTRIREG:
13972 case X86::PCMPESTRIMEM:
13973 case X86::VPCMPESTRIMEM: {
13974 bool Implicit, MemArg;
13975 switch (MI->getOpcode()) {
13976 default: llvm_unreachable("illegal opcode!");
13977 case X86::PCMPISTRIREG:
13978 case X86::VPCMPISTRIREG:
13979 Implicit = true; MemArg = false; break;
13980 case X86::PCMPISTRIMEM:
13981 case X86::VPCMPISTRIMEM:
13982 Implicit = true; MemArg = true; break;
13983 case X86::PCMPESTRIREG:
13984 case X86::VPCMPESTRIREG:
13985 Implicit = false; MemArg = false; break;
13986 case X86::PCMPESTRIMEM:
13987 case X86::VPCMPESTRIMEM:
13988 Implicit = false; MemArg = true; break;
13989 }
13990 return EmitPCMPSTRI(MI, BB, Implicit, MemArg);
1391113991 }
1391213992
1391313993 // Thread synchronization.
870870 const SmallVectorImpl &Outs,
871871 LLVMContext &Context) const;
872872
873 /// Utility function to emit string processing sse4.2 instructions
873 /// Utility functions to emit string processing sse4.2 instructions
874874 /// that return in xmm0.
875875 /// This takes the instruction to expand, the associated machine basic
876876 /// block, the number of args, and whether or not the second arg is
877877 /// in memory or not.
878 MachineBasicBlock *EmitPCMP(MachineInstr *BInstr, MachineBasicBlock *BB,
879 unsigned argNum, bool inMem) const;
878 MachineBasicBlock *EmitPCMPSTRM(MachineInstr *MI, MachineBasicBlock *BB,
879 bool Implicit, bool MemArg) const;
880 MachineBasicBlock *EmitPCMPSTRI(MachineInstr *MI, MachineBasicBlock *BB,
881 bool Implicit, bool MemArg) const;
880882
881883 /// Utility functions to emit monitor and mwait instructions. These
882884 /// need to make sure that the arguments to the intrinsic are in the
70017001 imm:$src3))]>;
70027002 def MEM : PseudoI<(outs VR128:$dst),
70037003 (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
7004 [(set VR128:$dst, (int_x86_sse42_pcmpistrm128
7005 VR128:$src1, (load addr:$src2), imm:$src3))]>;
7004 [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1,
7005 (bc_v16i8 (memopv2i64 addr:$src2)), imm:$src3))]>;
70067006 }
70077007
70087008 let Defs = [EFLAGS], usesCustomInserter = 1 in {
70107010 defm PCMPISTRM128 : pseudo_pcmpistrm<"#PCMPISTRM128">, Requires<[UseSSE42]>;
70117011 }
70127012
7013 let Defs = [XMM0, EFLAGS], neverHasSideEffects = 1, Predicates = [HasAVX] in {
7014 def VPCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs),
7015 (ins VR128:$src1, VR128:$src2, i8imm:$src3),
7016 "vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX;
7013 multiclass pcmpistrm_SS42AI {
7014 def rr : SS42AI<0x62, MRMSrcReg, (outs),
7015 (ins VR128:$src1, VR128:$src2, i8imm:$src3),
7016 !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
7017 []>, OpSize;
70177018 let mayLoad = 1 in
7018 def VPCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs),
7019 (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
7020 "vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX;
7019 def rm :SS42AI<0x62, MRMSrcMem, (outs),
7020 (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
7021 !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
7022 []>, OpSize;
70217023 }
70227024
70237025 let Defs = [XMM0, EFLAGS], neverHasSideEffects = 1 in {
7024 def PCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs),
7025 (ins VR128:$src1, VR128:$src2, i8imm:$src3),
7026 "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize;
7027 let mayLoad = 1 in
7028 def PCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs),
7029 (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
7030 "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize;
7026 let Predicates = [HasAVX] in
7027 defm VPCMPISTRM128 : pcmpistrm_SS42AI<"vpcmpistrm">, VEX;
7028 defm PCMPISTRM128 : pcmpistrm_SS42AI<"pcmpistrm"> ;
70317029 }
70327030
70337031 // Packed Compare Explicit Length Strings, Return Mask
70387036 VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>;
70397037 def MEM : PseudoI<(outs VR128:$dst),
70407038 (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
7041 [(set VR128:$dst, (int_x86_sse42_pcmpestrm128
7042 VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>;
7039 [(set VR128:$dst, (int_x86_sse42_pcmpestrm128 VR128:$src1, EAX,
7040 (bc_v16i8 (memopv2i64 addr:$src3)), EDX, imm:$src5))]>;
70437041 }
70447042
70457043 let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in {
70477045 defm PCMPESTRM128 : pseudo_pcmpestrm<"#PCMPESTRM128">, Requires<[UseSSE42]>;
70487046 }
70497047
7050 let Predicates = [HasAVX],
7051 Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in {
7052 def VPCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs),
7053 (ins VR128:$src1, VR128:$src3, i8imm:$src5),
7054 "vpcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize, VEX;
7048 multiclass SS42AI_pcmpestrm {
7049 def rr : SS42AI<0x60, MRMSrcReg, (outs),
7050 (ins VR128:$src1, VR128:$src3, i8imm:$src5),
7051 !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
7052 []>, OpSize;
70557053 let mayLoad = 1 in
7056 def VPCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs),
7057 (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
7058 "vpcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize, VEX;
7054 def rm : SS42AI<0x60, MRMSrcMem, (outs),
7055 (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
7056 !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
7057 []>, OpSize;
70597058 }
70607059
70617060 let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in {
7062 def PCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs),
7063 (ins VR128:$src1, VR128:$src3, i8imm:$src5),
7064 "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize;
7061 let Predicates = [HasAVX] in
7062 defm VPCMPESTRM128 : SS42AI_pcmpestrm<"vpcmpestrm">, VEX;
7063 defm PCMPESTRM128 : SS42AI_pcmpestrm<"pcmpestrm">;
7064 }
7065
7066 // Packed Compare Implicit Length Strings, Return Index
7067 multiclass pseudo_pcmpistri {
7068 def REG : PseudoI<(outs GR32:$dst),
7069 (ins VR128:$src1, VR128:$src2, i8imm:$src3),
7070 [(set GR32:$dst, EFLAGS,
7071 (X86pcmpistri VR128:$src1, VR128:$src2, imm:$src3))]>;
7072 def MEM : PseudoI<(outs GR32:$dst),
7073 (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
7074 [(set GR32:$dst, EFLAGS, (X86pcmpistri VR128:$src1,
7075 (bc_v16i8 (memopv2i64 addr:$src2)), imm:$src3))]>;
7076 }
7077
7078 let Defs = [EFLAGS], usesCustomInserter = 1 in {
7079 defm VPCMPISTRI : pseudo_pcmpistri<"#VPCMPISTRI">, Requires<[HasAVX]>;
7080 defm PCMPISTRI : pseudo_pcmpistri<"#PCMPISTRI">, Requires<[UseSSE42]>;
7081 }
7082
7083 multiclass SS42AI_pcmpistri {
7084 def rr : SS42AI<0x63, MRMSrcReg, (outs),
7085 (ins VR128:$src1, VR128:$src2, i8imm:$src3),
7086 !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
7087 []>, OpSize;
70657088 let mayLoad = 1 in
7066 def PCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs),
7067 (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
7068 "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize;
7069 }
7070
7071 // Packed Compare Implicit Length Strings, Return Index
7089 def rm : SS42AI<0x63, MRMSrcMem, (outs),
7090 (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
7091 !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
7092 []>, OpSize;
7093 }
7094
70727095 let Defs = [ECX, EFLAGS], neverHasSideEffects = 1 in {
7073 multiclass SS42AI_pcmpistri {
7074 def rr : SS42AI<0x63, MRMSrcReg, (outs),
7075 (ins VR128:$src1, VR128:$src2, i8imm:$src3),
7076 !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
7077 []>, OpSize;
7078 let mayLoad = 1 in
7079 def rm : SS42AI<0x63, MRMSrcMem, (outs),
7080 (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
7081 !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
7082 []>, OpSize;
7083 }
7084 }
7085
7086 let Predicates = [HasAVX] in
7087 defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX;
7088 defm PCMPISTRI : SS42AI_pcmpistri<"pcmpistri">;
7096 let Predicates = [HasAVX] in
7097 defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX;
7098 defm PCMPISTRI : SS42AI_pcmpistri<"pcmpistri">;
7099 }
70897100
70907101 // Packed Compare Explicit Length Strings, Return Index
7102 multiclass pseudo_pcmpestri {
7103 def REG : PseudoI<(outs GR32:$dst),
7104 (ins VR128:$src1, VR128:$src3, i8imm:$src5),
7105 [(set GR32:$dst, EFLAGS,
7106 (X86pcmpestri VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>;
7107 def MEM : PseudoI<(outs GR32:$dst),
7108 (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
7109 [(set GR32:$dst, EFLAGS,
7110 (X86pcmpestri VR128:$src1, EAX, (bc_v16i8 (memopv2i64 addr:$src3)), EDX,
7111 imm:$src5))]>;
7112 }
7113
7114 let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in {
7115 defm VPCMPESTRI : pseudo_pcmpestri<"#VPCMPESTRI">, Requires<[HasAVX]>;
7116 defm PCMPESTRI : pseudo_pcmpestri<"#PCMPESTRI">, Requires<[UseSSE42]>;
7117 }
7118
7119 multiclass SS42AI_pcmpestri {
7120 def rr : SS42AI<0x61, MRMSrcReg, (outs),
7121 (ins VR128:$src1, VR128:$src3, i8imm:$src5),
7122 !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
7123 []>, OpSize;
7124 let mayLoad = 1 in
7125 def rm : SS42AI<0x61, MRMSrcMem, (outs),
7126 (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
7127 !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
7128 []>, OpSize;
7129 }
7130
70917131 let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in {
7092 multiclass SS42AI_pcmpestri {
7093 def rr : SS42AI<0x61, MRMSrcReg, (outs),
7094 (ins VR128:$src1, VR128:$src3, i8imm:$src5),
7095 !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
7096 []>, OpSize;
7097 let mayLoad = 1 in
7098 def rm : SS42AI<0x61, MRMSrcMem, (outs),
7099 (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
7100 !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
7101 []>, OpSize;
7102 }
7103 }
7104
7105 let Predicates = [HasAVX] in
7106 defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX;
7107 defm PCMPESTRI : SS42AI_pcmpestri<"pcmpestri">;
7132 let Predicates = [HasAVX] in
7133 defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX;
7134 defm PCMPESTRI : SS42AI_pcmpestri<"pcmpestri">;
7135 }
71087136
71097137 //===----------------------------------------------------------------------===//
71107138 // SSE4.2 - CRC Instructions
11391139
11401140
11411141 define i32 @test_x86_sse42_pcmpestri128(<16 x i8> %a0, <16 x i8> %a2) {
1142 ; CHECK: movl
1143 ; CHECK: movl
1144 ; CHECK: vpcmpestri
1142 ; CHECK: movl $7
1143 ; CHECK: movl $7
1144 ; CHECK: vpcmpestri $7
11451145 ; CHECK: movl
11461146 %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; [#uses=1]
11471147 ret i32 %res
11481148 }
11491149 declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
1150
1151
1152 define i32 @test_x86_sse42_pcmpestri128_load(<16 x i8>* %a0, <16 x i8>* %a2) {
1153 ; CHECK: movl $7
1154 ; CHECK: movl $7
1155 ; CHECK: vpcmpestri $7, (
1156 ; CHECK: movl
1157 %1 = load <16 x i8>* %a0
1158 %2 = load <16 x i8>* %a2
1159 %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %1, i32 7, <16 x i8> %2, i32 7, i8 7) ; [#uses=1]
1160 ret i32 %res
1161 }
11501162
11511163
11521164 define i32 @test_x86_sse42_pcmpestria128(<16 x i8> %a0, <16 x i8> %a2) {
12151227 declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
12161228
12171229
1230 define <16 x i8> @test_x86_sse42_pcmpestrm128_load(<16 x i8> %a0, <16 x i8>* %a2) {
1231 ; CHECK: movl $7
1232 ; CHECK: movl $7
1233 ; CHECK: vpcmpestrm $7,
1234 ; CHECK-NOT: vmov
1235 %1 = load <16 x i8>* %a2
1236 %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %1, i32 7, i8 7) ; <<16 x i8>> [#uses=1]
1237 ret <16 x i8> %res
1238 }
1239
1240
12181241 define i32 @test_x86_sse42_pcmpistri128(<16 x i8> %a0, <16 x i8> %a1) {
1219 ; CHECK: vpcmpistri
1242 ; CHECK: vpcmpistri $7
12201243 ; CHECK: movl
12211244 %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; [#uses=1]
12221245 ret i32 %res
12231246 }
12241247 declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind readnone
1248
1249
1250 define i32 @test_x86_sse42_pcmpistri128_load(<16 x i8>* %a0, <16 x i8>* %a1) {
1251 ; CHECK: vpcmpistri $7, (
1252 ; CHECK: movl
1253 %1 = load <16 x i8>* %a0
1254 %2 = load <16 x i8>* %a1
1255 %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %1, <16 x i8> %2, i8 7) ; [#uses=1]
1256 ret i32 %res
1257 }
12251258
12261259
12271260 define i32 @test_x86_sse42_pcmpistria128(<16 x i8> %a0, <16 x i8> %a1) {
12701303
12711304
12721305 define <16 x i8> @test_x86_sse42_pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1) {
1273 ; CHECK: vpcmpistrm
1306 ; CHECK: vpcmpistrm $7
12741307 ; CHECK-NOT: vmov
12751308 %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<16 x i8>> [#uses=1]
12761309 ret <16 x i8> %res
12771310 }
12781311 declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwind readnone
1312
1313
1314 define <16 x i8> @test_x86_sse42_pcmpistrm128_load(<16 x i8> %a0, <16 x i8>* %a1) {
1315 ; CHECK: vpcmpistrm $7, (
1316 ; CHECK-NOT: vmov
1317 %1 = load <16 x i8>* %a1
1318 %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %1, i8 7) ; <<16 x i8>> [#uses=1]
1319 ret <16 x i8> %res
1320 }
12791321
12801322
12811323 define <4 x float> @test_x86_sse_add_ss(<4 x float> %a0, <4 x float> %a1) {