llvm.org GIT mirror llvm / 7925e25
Custom lower PCMPEQ/PCMPGT intrinsics to target specific nodes and remove the intrinsic patterns. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@148687 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 8 years ago
2 changed file(s) with 172 addition(s) and 308 deletion(s). Raw diff Collapse all Expand all
93179317 case Intrinsic::x86_avx2_psrav_d_256:
93189318 return DAG.getNode(ISD::SRA, dl, Op.getValueType(),
93199319 Op.getOperand(1), Op.getOperand(2));
9320 case Intrinsic::x86_sse2_pcmpeq_b:
9321 case Intrinsic::x86_sse2_pcmpeq_w:
9322 case Intrinsic::x86_sse2_pcmpeq_d:
9323 case Intrinsic::x86_sse41_pcmpeqq:
9324 case Intrinsic::x86_avx2_pcmpeq_b:
9325 case Intrinsic::x86_avx2_pcmpeq_w:
9326 case Intrinsic::x86_avx2_pcmpeq_d:
9327 case Intrinsic::x86_avx2_pcmpeq_q:
9328 return DAG.getNode(X86ISD::PCMPEQ, dl, Op.getValueType(),
9329 Op.getOperand(1), Op.getOperand(2));
9330 case Intrinsic::x86_sse2_pcmpgt_b:
9331 case Intrinsic::x86_sse2_pcmpgt_w:
9332 case Intrinsic::x86_sse2_pcmpgt_d:
9333 case Intrinsic::x86_sse42_pcmpgtq:
9334 case Intrinsic::x86_avx2_pcmpgt_b:
9335 case Intrinsic::x86_avx2_pcmpgt_w:
9336 case Intrinsic::x86_avx2_pcmpgt_d:
9337 case Intrinsic::x86_avx2_pcmpgt_q:
9338 return DAG.getNode(X86ISD::PCMPGT, dl, Op.getValueType(),
9339 Op.getOperand(1), Op.getOperand(2));
93209340
93219341 // ptest and testp intrinsics. The intrinsic these come from are designed to
93229342 // return an integer value, not just an instruction so lower it to the ptest
35093509 [(set RC:$dst, (IntId RC:$src1, (bitconvert (memop_frag addr:$src2))))]>;
35103510 }
35113511
3512 multiclass PDI_binop_rmi_int opc, bits<8> opc2, Format ImmForm,
3513 string OpcodeStr, SDNode OpNode,
3514 SDNode OpNode2, RegisterClass RC,
3515 ValueType DstVT, ValueType SrcVT, PatFrag bc_frag,
3516 bit Is2Addr = 1> {
3512 multiclass PDI_binop_rmi opc, bits<8> opc2, Format ImmForm,
3513 string OpcodeStr, SDNode OpNode,
3514 SDNode OpNode2, RegisterClass RC,
3515 ValueType DstVT, ValueType SrcVT, PatFrag bc_frag,
3516 bit Is2Addr = 1> {
35173517 // src2 is always 128-bit
35183518 def rr : PDI
35193519 (ins RC:$src1, VR128:$src2),
35203520 !if(Is2Addr,
35213521 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
35223522 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
3523 [(set RC:$dst, (OpNode (DstVT RC:$src1), (SrcVT VR128:$src2)))]>;
3523 [(set RC:$dst, (DstVT (OpNode RC:$src1, (SrcVT VR128:$src2))))]>;
35243524 def rm : PDI
35253525 (ins RC:$src1, i128mem:$src2),
35263526 !if(Is2Addr,
35273527 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
35283528 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
3529 [(set RC:$dst, (OpNode (DstVT RC:$src1),
3530 (bc_frag (memopv2i64 addr:$src2))))]>;
3529 [(set RC:$dst, (DstVT (OpNode RC:$src1,
3530 (bc_frag (memopv2i64 addr:$src2)))))]>;
35313531 def ri : PDIi8
35323532 (ins RC:$src1, i32i8imm:$src2),
35333533 !if(Is2Addr,
35343534 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
35353535 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
3536 [(set RC:$dst, (OpNode2 (DstVT RC:$src1), (i32 imm:$src2)))]>;
3536 [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i32 imm:$src2))))]>;
35373537 }
35383538
35393539 } // ExeDomain = SSEPackedInt
37293729 //===---------------------------------------------------------------------===//
37303730
37313731 let Predicates = [HasAVX] in {
3732 defm VPSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli,
3733 VR128, v8i16, v8i16, bc_v8i16, 0>, VEX_4V;
3734 defm VPSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli,
3735 VR128, v4i32, v4i32, bc_v4i32, 0>, VEX_4V;
3736 defm VPSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli,
3737 VR128, v2i64, v2i64, bc_v2i64, 0>, VEX_4V;
3738
3739 defm VPSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli,
3740 VR128, v8i16, v8i16, bc_v8i16, 0>, VEX_4V;
3741 defm VPSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli,
3742 VR128, v4i32, v4i32, bc_v4i32, 0>, VEX_4V;
3743 defm VPSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli,
3744 VR128, v2i64, v2i64, bc_v2i64, 0>, VEX_4V;
3745
3746 defm VPSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai,
3747 VR128, v8i16, v8i16, bc_v8i16, 0>, VEX_4V;
3748 defm VPSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai,
3749 VR128, v4i32, v4i32, bc_v4i32, 0>, VEX_4V;
3732 defm VPSLLW : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli,
3733 VR128, v8i16, v8i16, bc_v8i16, 0>, VEX_4V;
3734 defm VPSLLD : PDI_binop_rmi<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli,
3735 VR128, v4i32, v4i32, bc_v4i32, 0>, VEX_4V;
3736 defm VPSLLQ : PDI_binop_rmi<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli,
3737 VR128, v2i64, v2i64, bc_v2i64, 0>, VEX_4V;
3738
3739 defm VPSRLW : PDI_binop_rmi<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli,
3740 VR128, v8i16, v8i16, bc_v8i16, 0>, VEX_4V;
3741 defm VPSRLD : PDI_binop_rmi<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli,
3742 VR128, v4i32, v4i32, bc_v4i32, 0>, VEX_4V;
3743 defm VPSRLQ : PDI_binop_rmi<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli,
3744 VR128, v2i64, v2i64, bc_v2i64, 0>, VEX_4V;
3745
3746 defm VPSRAW : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai,
3747 VR128, v8i16, v8i16, bc_v8i16, 0>, VEX_4V;
3748 defm VPSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai,
3749 VR128, v4i32, v4i32, bc_v4i32, 0>, VEX_4V;
37503750
37513751 let ExeDomain = SSEPackedInt in {
37523752 // 128-bit logical shifts.
37673767 } // Predicates = [HasAVX]
37683768
37693769 let Predicates = [HasAVX2] in {
3770 defm VPSLLWY : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli,
3771 VR256, v16i16, v8i16, bc_v8i16, 0>, VEX_4V;
3772 defm VPSLLDY : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli,
3773 VR256, v8i32, v4i32, bc_v4i32, 0>, VEX_4V;
3774 defm VPSLLQY : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli,
3775 VR256, v4i64, v2i64, bc_v2i64, 0>, VEX_4V;
3776
3777 defm VPSRLWY : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli,
3778 VR256, v16i16, v8i16, bc_v8i16, 0>, VEX_4V;
3779 defm VPSRLDY : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli,
3780 VR256, v8i32, v4i32, bc_v4i32, 0>, VEX_4V;
3781 defm VPSRLQY : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli,
3782 VR256, v4i64, v2i64, bc_v2i64, 0>, VEX_4V;
3783
3784 defm VPSRAWY : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai,
3785 VR256, v16i16, v8i16, bc_v8i16, 0>, VEX_4V;
3786 defm VPSRADY : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai,
3787 VR256, v8i32, v4i32, bc_v4i32, 0>, VEX_4V;
3770 defm VPSLLWY : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli,
3771 VR256, v16i16, v8i16, bc_v8i16, 0>, VEX_4V;
3772 defm VPSLLDY : PDI_binop_rmi<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli,
3773 VR256, v8i32, v4i32, bc_v4i32, 0>, VEX_4V;
3774 defm VPSLLQY : PDI_binop_rmi<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli,
3775 VR256, v4i64, v2i64, bc_v2i64, 0>, VEX_4V;
3776
3777 defm VPSRLWY : PDI_binop_rmi<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli,
3778 VR256, v16i16, v8i16, bc_v8i16, 0>, VEX_4V;
3779 defm VPSRLDY : PDI_binop_rmi<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli,
3780 VR256, v8i32, v4i32, bc_v4i32, 0>, VEX_4V;
3781 defm VPSRLQY : PDI_binop_rmi<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli,
3782 VR256, v4i64, v2i64, bc_v2i64, 0>, VEX_4V;
3783
3784 defm VPSRAWY : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai,
3785 VR256, v16i16, v8i16, bc_v8i16, 0>, VEX_4V;
3786 defm VPSRADY : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai,
3787 VR256, v8i32, v4i32, bc_v4i32, 0>, VEX_4V;
37883788
37893789 let ExeDomain = SSEPackedInt in {
37903790 // 256-bit logical shifts.
38053805 } // Predicates = [HasAVX2]
38063806
38073807 let Constraints = "$src1 = $dst" in {
3808 defm PSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw", X86vshl, X86vshli,
3809 VR128, v8i16, v8i16, bc_v8i16>;
3810 defm PSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld", X86vshl, X86vshli,
3811 VR128, v4i32, v4i32, bc_v4i32>;
3812 defm PSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq", X86vshl, X86vshli,
3813 VR128, v2i64, v2i64, bc_v2i64>;
3814
3815 defm PSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw", X86vsrl, X86vsrli,
3816 VR128, v8i16, v8i16, bc_v8i16>;
3817 defm PSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld", X86vsrl, X86vsrli,
3818 VR128, v4i32, v4i32, bc_v4i32>;
3819 defm PSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq", X86vsrl, X86vsrli,
3820 VR128, v2i64, v2i64, bc_v2i64>;
3821
3822 defm PSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw", X86vsra, X86vsrai,
3823 VR128, v8i16, v8i16, bc_v8i16>;
3824 defm PSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad", X86vsra, X86vsrai,
3825 VR128, v4i32, v4i32, bc_v4i32>;
3808 defm PSLLW : PDI_binop_rmi<0xF1, 0x71, MRM6r, "psllw", X86vshl, X86vshli,
3809 VR128, v8i16, v8i16, bc_v8i16>;
3810 defm PSLLD : PDI_binop_rmi<0xF2, 0x72, MRM6r, "pslld", X86vshl, X86vshli,
3811 VR128, v4i32, v4i32, bc_v4i32>;
3812 defm PSLLQ : PDI_binop_rmi<0xF3, 0x73, MRM6r, "psllq", X86vshl, X86vshli,
3813 VR128, v2i64, v2i64, bc_v2i64>;
3814
3815 defm PSRLW : PDI_binop_rmi<0xD1, 0x71, MRM2r, "psrlw", X86vsrl, X86vsrli,
3816 VR128, v8i16, v8i16, bc_v8i16>;
3817 defm PSRLD : PDI_binop_rmi<0xD2, 0x72, MRM2r, "psrld", X86vsrl, X86vsrli,
3818 VR128, v4i32, v4i32, bc_v4i32>;
3819 defm PSRLQ : PDI_binop_rmi<0xD3, 0x73, MRM2r, "psrlq", X86vsrl, X86vsrli,
3820 VR128, v2i64, v2i64, bc_v2i64>;
3821
3822 defm PSRAW : PDI_binop_rmi<0xE1, 0x71, MRM4r, "psraw", X86vsra, X86vsrai,
3823 VR128, v8i16, v8i16, bc_v8i16>;
3824 defm PSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "psrad", X86vsra, X86vsrai,
3825 VR128, v4i32, v4i32, bc_v4i32>;
38263826
38273827 let ExeDomain = SSEPackedInt in {
38283828 // 128-bit logical shifts.
38823882 //===---------------------------------------------------------------------===//
38833883
38843884 let Predicates = [HasAVX] in {
3885 defm VPCMPEQB : PDI_binop_rm_int<0x74, "vpcmpeqb", int_x86_sse2_pcmpeq_b,
3886 VR128, memopv2i64, i128mem, 1, 0>, VEX_4V;
3887 defm VPCMPEQW : PDI_binop_rm_int<0x75, "vpcmpeqw", int_x86_sse2_pcmpeq_w,
3888 VR128, memopv2i64, i128mem, 1, 0>, VEX_4V;
3889 defm VPCMPEQD : PDI_binop_rm_int<0x76, "vpcmpeqd", int_x86_sse2_pcmpeq_d,
3890 VR128, memopv2i64, i128mem, 1, 0>, VEX_4V;
3891 defm VPCMPGTB : PDI_binop_rm_int<0x64, "vpcmpgtb", int_x86_sse2_pcmpgt_b,
3892 VR128, memopv2i64, i128mem, 0, 0>, VEX_4V;
3893 defm VPCMPGTW : PDI_binop_rm_int<0x65, "vpcmpgtw", int_x86_sse2_pcmpgt_w,
3894 VR128, memopv2i64, i128mem, 0, 0>, VEX_4V;
3895 defm VPCMPGTD : PDI_binop_rm_int<0x66, "vpcmpgtd", int_x86_sse2_pcmpgt_d,
3896 VR128, memopv2i64, i128mem, 0, 0>, VEX_4V;
3897
3898 def : Pat<(v16i8 (X86pcmpeq VR128:$src1, VR128:$src2)),
3899 (VPCMPEQBrr VR128:$src1, VR128:$src2)>;
3900 def : Pat<(v16i8 (X86pcmpeq VR128:$src1,
3901 (bc_v16i8 (memopv2i64 addr:$src2)))),
3902 (VPCMPEQBrm VR128:$src1, addr:$src2)>;
3903 def : Pat<(v8i16 (X86pcmpeq VR128:$src1, VR128:$src2)),
3904 (VPCMPEQWrr VR128:$src1, VR128:$src2)>;
3905 def : Pat<(v8i16 (X86pcmpeq VR128:$src1,
3906 (bc_v8i16 (memopv2i64 addr:$src2)))),
3907 (VPCMPEQWrm VR128:$src1, addr:$src2)>;
3908 def : Pat<(v4i32 (X86pcmpeq VR128:$src1, VR128:$src2)),
3909 (VPCMPEQDrr VR128:$src1, VR128:$src2)>;
3910 def : Pat<(v4i32 (X86pcmpeq VR128:$src1,
3911 (bc_v4i32 (memopv2i64 addr:$src2)))),
3912 (VPCMPEQDrm VR128:$src1, addr:$src2)>;
3913
3914 def : Pat<(v16i8 (X86pcmpgt VR128:$src1, VR128:$src2)),
3915 (VPCMPGTBrr VR128:$src1, VR128:$src2)>;
3916 def : Pat<(v16i8 (X86pcmpgt VR128:$src1,
3917 (bc_v16i8 (memopv2i64 addr:$src2)))),
3918 (VPCMPGTBrm VR128:$src1, addr:$src2)>;
3919 def : Pat<(v8i16 (X86pcmpgt VR128:$src1, VR128:$src2)),
3920 (VPCMPGTWrr VR128:$src1, VR128:$src2)>;
3921 def : Pat<(v8i16 (X86pcmpgt VR128:$src1,
3922 (bc_v8i16 (memopv2i64 addr:$src2)))),
3923 (VPCMPGTWrm VR128:$src1, addr:$src2)>;
3924 def : Pat<(v4i32 (X86pcmpgt VR128:$src1, VR128:$src2)),
3925 (VPCMPGTDrr VR128:$src1, VR128:$src2)>;
3926 def : Pat<(v4i32 (X86pcmpgt VR128:$src1,
3927 (bc_v4i32 (memopv2i64 addr:$src2)))),
3928 (VPCMPGTDrm VR128:$src1, addr:$src2)>;
3885 defm VPCMPEQB : PDI_binop_rm<0x74, "vpcmpeqb", X86pcmpeq, v16i8,
3886 VR128, memopv2i64, i128mem, 1, 0>, VEX_4V;
3887 defm VPCMPEQW : PDI_binop_rm<0x75, "vpcmpeqw", X86pcmpeq, v8i16,
3888 VR128, memopv2i64, i128mem, 1, 0>, VEX_4V;
3889 defm VPCMPEQD : PDI_binop_rm<0x76, "vpcmpeqd", X86pcmpeq, v4i32,
3890 VR128, memopv2i64, i128mem, 1, 0>, VEX_4V;
3891 defm VPCMPGTB : PDI_binop_rm<0x64, "vpcmpgtb", X86pcmpgt, v16i8,
3892 VR128, memopv2i64, i128mem, 0, 0>, VEX_4V;
3893 defm VPCMPGTW : PDI_binop_rm<0x65, "vpcmpgtw", X86pcmpgt, v8i16,
3894 VR128, memopv2i64, i128mem, 0, 0>, VEX_4V;
3895 defm VPCMPGTD : PDI_binop_rm<0x66, "vpcmpgtd", X86pcmpgt, v4i32,
3896 VR128, memopv2i64, i128mem, 0, 0>, VEX_4V;
39293897 }
39303898
39313899 let Predicates = [HasAVX2] in {
3932 defm VPCMPEQBY : PDI_binop_rm_int<0x74, "vpcmpeqb", int_x86_avx2_pcmpeq_b,
3933 VR256, memopv4i64, i256mem, 1, 0>, VEX_4V;
3934 defm VPCMPEQWY : PDI_binop_rm_int<0x75, "vpcmpeqw", int_x86_avx2_pcmpeq_w,
3935 VR256, memopv4i64, i256mem, 1, 0>, VEX_4V;
3936 defm VPCMPEQDY : PDI_binop_rm_int<0x76, "vpcmpeqd", int_x86_avx2_pcmpeq_d,
3937 VR256, memopv4i64, i256mem, 1, 0>, VEX_4V;
3938 defm VPCMPGTBY : PDI_binop_rm_int<0x64, "vpcmpgtb", int_x86_avx2_pcmpgt_b,
3939 VR256, memopv4i64, i256mem, 0, 0>, VEX_4V;
3940 defm VPCMPGTWY : PDI_binop_rm_int<0x65, "vpcmpgtw", int_x86_avx2_pcmpgt_w,
3941 VR256, memopv4i64, i256mem, 0, 0>, VEX_4V;
3942 defm VPCMPGTDY : PDI_binop_rm_int<0x66, "vpcmpgtd", int_x86_avx2_pcmpgt_d,
3943 VR256, memopv4i64, i256mem, 0, 0>, VEX_4V;
3944
3945 def : Pat<(v32i8 (X86pcmpeq VR256:$src1, VR256:$src2)),
3946 (VPCMPEQBYrr VR256:$src1, VR256:$src2)>;
3947 def : Pat<(v32i8 (X86pcmpeq VR256:$src1,
3948 (bc_v32i8 (memopv4i64 addr:$src2)))),
3949 (VPCMPEQBYrm VR256:$src1, addr:$src2)>;
3950 def : Pat<(v16i16 (X86pcmpeq VR256:$src1, VR256:$src2)),
3951 (VPCMPEQWYrr VR256:$src1, VR256:$src2)>;
3952 def : Pat<(v16i16 (X86pcmpeq VR256:$src1,
3953 (bc_v16i16 (memopv4i64 addr:$src2)))),
3954 (VPCMPEQWYrm VR256:$src1, addr:$src2)>;
3955 def : Pat<(v8i32 (X86pcmpeq VR256:$src1, VR256:$src2)),
3956 (VPCMPEQDYrr VR256:$src1, VR256:$src2)>;
3957 def : Pat<(v8i32 (X86pcmpeq VR256:$src1,
3958 (bc_v8i32 (memopv4i64 addr:$src2)))),
3959 (VPCMPEQDYrm VR256:$src1, addr:$src2)>;
3960
3961 def : Pat<(v32i8 (X86pcmpgt VR256:$src1, VR256:$src2)),
3962 (VPCMPGTBYrr VR256:$src1, VR256:$src2)>;
3963 def : Pat<(v32i8 (X86pcmpgt VR256:$src1,
3964 (bc_v32i8 (memopv4i64 addr:$src2)))),
3965 (VPCMPGTBYrm VR256:$src1, addr:$src2)>;
3966 def : Pat<(v16i16 (X86pcmpgt VR256:$src1, VR256:$src2)),
3967 (VPCMPGTWYrr VR256:$src1, VR256:$src2)>;
3968 def : Pat<(v16i16 (X86pcmpgt VR256:$src1,
3969 (bc_v16i16 (memopv4i64 addr:$src2)))),
3970 (VPCMPGTWYrm VR256:$src1, addr:$src2)>;
3971 def : Pat<(v8i32 (X86pcmpgt VR256:$src1, VR256:$src2)),
3972 (VPCMPGTDYrr VR256:$src1, VR256:$src2)>;
3973 def : Pat<(v8i32 (X86pcmpgt VR256:$src1,
3974 (bc_v8i32 (memopv4i64 addr:$src2)))),
3975 (VPCMPGTDYrm VR256:$src1, addr:$src2)>;
3900 defm VPCMPEQBY : PDI_binop_rm<0x74, "vpcmpeqb", X86pcmpeq, v32i8,
3901 VR256, memopv4i64, i256mem, 1, 0>, VEX_4V;
3902 defm VPCMPEQWY : PDI_binop_rm<0x75, "vpcmpeqw", X86pcmpeq, v16i16,
3903 VR256, memopv4i64, i256mem, 1, 0>, VEX_4V;
3904 defm VPCMPEQDY : PDI_binop_rm<0x76, "vpcmpeqd", X86pcmpeq, v8i32,
3905 VR256, memopv4i64, i256mem, 1, 0>, VEX_4V;
3906 defm VPCMPGTBY : PDI_binop_rm<0x64, "vpcmpgtb", X86pcmpgt, v32i8,
3907 VR256, memopv4i64, i256mem, 0, 0>, VEX_4V;
3908 defm VPCMPGTWY : PDI_binop_rm<0x65, "vpcmpgtw", X86pcmpgt, v16i16,
3909 VR256, memopv4i64, i256mem, 0, 0>, VEX_4V;
3910 defm VPCMPGTDY : PDI_binop_rm<0x66, "vpcmpgtd", X86pcmpgt, v8i32,
3911 VR256, memopv4i64, i256mem, 0, 0>, VEX_4V;
39763912 }
39773913
39783914 let Constraints = "$src1 = $dst" in {
3979 defm PCMPEQB : PDI_binop_rm_int<0x74, "pcmpeqb", int_x86_sse2_pcmpeq_b,
3980 VR128, memopv2i64, i128mem, 1>;
3981 defm PCMPEQW : PDI_binop_rm_int<0x75, "pcmpeqw", int_x86_sse2_pcmpeq_w,
3982 VR128, memopv2i64, i128mem, 1>;
3983 defm PCMPEQD : PDI_binop_rm_int<0x76, "pcmpeqd", int_x86_sse2_pcmpeq_d,
3984 VR128, memopv2i64, i128mem, 1>;
3985 defm PCMPGTB : PDI_binop_rm_int<0x64, "pcmpgtb", int_x86_sse2_pcmpgt_b,
3986 VR128, memopv2i64, i128mem>;
3987 defm PCMPGTW : PDI_binop_rm_int<0x65, "pcmpgtw", int_x86_sse2_pcmpgt_w,
3988 VR128, memopv2i64, i128mem>;
3989 defm PCMPGTD : PDI_binop_rm_int<0x66, "pcmpgtd", int_x86_sse2_pcmpgt_d,
3990 VR128, memopv2i64, i128mem>;
3915 defm PCMPEQB : PDI_binop_rm<0x74, "pcmpeqb", X86pcmpeq, v16i8,
3916 VR128, memopv2i64, i128mem, 1>;
3917 defm PCMPEQW : PDI_binop_rm<0x75, "pcmpeqw", X86pcmpeq, v8i16,
3918 VR128, memopv2i64, i128mem, 1>;
3919 defm PCMPEQD : PDI_binop_rm<0x76, "pcmpeqd", X86pcmpeq, v4i32,
3920 VR128, memopv2i64, i128mem, 1>;
3921 defm PCMPGTB : PDI_binop_rm<0x64, "pcmpgtb", X86pcmpgt, v16i8,
3922 VR128, memopv2i64, i128mem>;
3923 defm PCMPGTW : PDI_binop_rm<0x65, "pcmpgtw", X86pcmpgt, v8i16,
3924 VR128, memopv2i64, i128mem>;
3925 defm PCMPGTD : PDI_binop_rm<0x66, "pcmpgtd", X86pcmpgt, v4i32,
3926 VR128, memopv2i64, i128mem>;
39913927 } // Constraints = "$src1 = $dst"
3992
3993 let Predicates = [HasSSE2] in {
3994 def : Pat<(v16i8 (X86pcmpeq VR128:$src1, VR128:$src2)),
3995 (PCMPEQBrr VR128:$src1, VR128:$src2)>;
3996 def : Pat<(v16i8 (X86pcmpeq VR128:$src1,
3997 (bc_v16i8 (memopv2i64 addr:$src2)))),
3998 (PCMPEQBrm VR128:$src1, addr:$src2)>;
3999 def : Pat<(v8i16 (X86pcmpeq VR128:$src1, VR128:$src2)),
4000 (PCMPEQWrr VR128:$src1, VR128:$src2)>;
4001 def : Pat<(v8i16 (X86pcmpeq VR128:$src1,
4002 (bc_v8i16 (memopv2i64 addr:$src2)))),
4003 (PCMPEQWrm VR128:$src1, addr:$src2)>;
4004 def : Pat<(v4i32 (X86pcmpeq VR128:$src1, VR128:$src2)),
4005 (PCMPEQDrr VR128:$src1, VR128:$src2)>;
4006 def : Pat<(v4i32 (X86pcmpeq VR128:$src1,
4007 (bc_v4i32 (memopv2i64 addr:$src2)))),
4008 (PCMPEQDrm VR128:$src1, addr:$src2)>;
4009
4010 def : Pat<(v16i8 (X86pcmpgt VR128:$src1, VR128:$src2)),
4011 (PCMPGTBrr VR128:$src1, VR128:$src2)>;
4012 def : Pat<(v16i8 (X86pcmpgt VR128:$src1,
4013 (bc_v16i8 (memopv2i64 addr:$src2)))),
4014 (PCMPGTBrm VR128:$src1, addr:$src2)>;
4015 def : Pat<(v8i16 (X86pcmpgt VR128:$src1, VR128:$src2)),
4016 (PCMPGTWrr VR128:$src1, VR128:$src2)>;
4017 def : Pat<(v8i16 (X86pcmpgt VR128:$src1,
4018 (bc_v8i16 (memopv2i64 addr:$src2)))),
4019 (PCMPGTWrm VR128:$src1, addr:$src2)>;
4020 def : Pat<(v4i32 (X86pcmpgt VR128:$src1, VR128:$src2)),
4021 (PCMPGTDrr VR128:$src1, VR128:$src2)>;
4022 def : Pat<(v4i32 (X86pcmpgt VR128:$src1,
4023 (bc_v4i32 (memopv2i64 addr:$src2)))),
4024 (PCMPGTDrm VR128:$src1, addr:$src2)>;
4025 }
40263928
40273929 //===---------------------------------------------------------------------===//
40283930 // SSE2 - Packed Integer Pack Instructions
63716273 let isCommutable = 0 in
63726274 defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw,
63736275 0>, VEX_4V;
6374 defm VPCMPEQQ : SS41I_binop_rm_int<0x29, "vpcmpeqq", int_x86_sse41_pcmpeqq,
6375 0>, VEX_4V;
63766276 defm VPMINSB : SS41I_binop_rm_int<0x38, "vpminsb", int_x86_sse41_pminsb,
63776277 0>, VEX_4V;
63786278 defm VPMINSD : SS41I_binop_rm_int<0x39, "vpminsd", int_x86_sse41_pminsd,
63916291 0>, VEX_4V;
63926292 defm VPMULDQ : SS41I_binop_rm_int<0x28, "vpmuldq", int_x86_sse41_pmuldq,
63936293 0>, VEX_4V;
6394
6395 def : Pat<(v2i64 (X86pcmpeq VR128:$src1, VR128:$src2)),
6396 (VPCMPEQQrr VR128:$src1, VR128:$src2)>;
6397 def : Pat<(v2i64 (X86pcmpeq VR128:$src1, (memop addr:$src2))),
6398 (VPCMPEQQrm VR128:$src1, addr:$src2)>;
63996294 }
64006295
64016296 let Predicates = [HasAVX2] in {
64026297 let isCommutable = 0 in
64036298 defm VPACKUSDW : SS41I_binop_rm_int_y<0x2B, "vpackusdw",
64046299 int_x86_avx2_packusdw>, VEX_4V;
6405 defm VPCMPEQQ : SS41I_binop_rm_int_y<0x29, "vpcmpeqq",
6406 int_x86_avx2_pcmpeq_q>, VEX_4V;
64076300 defm VPMINSB : SS41I_binop_rm_int_y<0x38, "vpminsb",
64086301 int_x86_avx2_pmins_b>, VEX_4V;
64096302 defm VPMINSD : SS41I_binop_rm_int_y<0x39, "vpminsd",
64226315 int_x86_avx2_pmaxu_w>, VEX_4V;
64236316 defm VPMULDQ : SS41I_binop_rm_int_y<0x28, "vpmuldq",
64246317 int_x86_avx2_pmul_dq>, VEX_4V;
6425
6426 def : Pat<(v4i64 (X86pcmpeq VR256:$src1, VR256:$src2)),
6427 (VPCMPEQQYrr VR256:$src1, VR256:$src2)>;
6428 def : Pat<(v4i64 (X86pcmpeq VR256:$src1, (memop addr:$src2))),
6429 (VPCMPEQQYrm VR256:$src1, addr:$src2)>;
64306318 }
64316319
64326320 let Constraints = "$src1 = $dst" in {
64336321 let isCommutable = 0 in
64346322 defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw>;
6435 defm PCMPEQQ : SS41I_binop_rm_int<0x29, "pcmpeqq", int_x86_sse41_pcmpeqq>;
64366323 defm PMINSB : SS41I_binop_rm_int<0x38, "pminsb", int_x86_sse41_pminsb>;
64376324 defm PMINSD : SS41I_binop_rm_int<0x39, "pminsd", int_x86_sse41_pminsd>;
64386325 defm PMINUD : SS41I_binop_rm_int<0x3B, "pminud", int_x86_sse41_pminud>;
64446331 defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq>;
64456332 }
64466333
6447 let Predicates = [HasSSE41] in {
6448 def : Pat<(v2i64 (X86pcmpeq VR128:$src1, VR128:$src2)),
6449 (PCMPEQQrr VR128:$src1, VR128:$src2)>;
6450 def : Pat<(v2i64 (X86pcmpeq VR128:$src1, (memop addr:$src2))),
6451 (PCMPEQQrm VR128:$src1, addr:$src2)>;
6452 }
6453
64546334 /// SS48I_binop_rm - Simple SSE41 binary operator.
64556335 multiclass SS48I_binop_rm opc, string OpcodeStr, SDNode OpNode,
6456 ValueType OpVT, bit Is2Addr = 1> {
6336 ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
6337 X86MemOperand x86memop, bit Is2Addr = 1> {
64576338 let isCommutable = 1 in
6458 def rr : SS48I
6459 (ins VR128:$src1, VR128:$src2),
6339 def rr : SS48I),
6340 (ins RC:$src1, RC:$src2),
64606341 !if(Is2Addr,
64616342 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
64626343 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
6463 [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>,
6464 OpSize;
6465 def rm : SS48I
6466 (ins VR128:$src1, i128mem:$src2),
6344 [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, OpSize;
6345 def rm : SS48I
6346 (ins RC:$src1, x86memop:$src2),
64676347 !if(Is2Addr,
64686348 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
64696349 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
6470 [(set VR128:$dst, (OpNode VR128:$src1,
6471 (bc_v4i32 (memopv2i64 addr:$src2))))]>,
6472 OpSize;
6473 }
6474
6475 /// SS48I_binop_rm - Simple SSE41 binary operator.
6476 multiclass SS48I_binop_rm_y opc, string OpcodeStr, SDNode OpNode,
6477 ValueType OpVT> {
6478 let isCommutable = 1 in
6479 def Yrr : SS48I
6480 (ins VR256:$src1, VR256:$src2),
6481 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6482 [(set VR256:$dst, (OpVT (OpNode VR256:$src1, VR256:$src2)))]>,
6483 OpSize;
6484 def Yrm : SS48I
6485 (ins VR256:$src1, i256mem:$src2),
6486 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6487 [(set VR256:$dst, (OpNode VR256:$src1,
6488 (bc_v8i32 (memopv4i64 addr:$src2))))]>,
6489 OpSize;
6490 }
6491
6492 let Predicates = [HasAVX] in
6493 defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, 0>, VEX_4V;
6494 let Predicates = [HasAVX2] in
6495 defm VPMULLD : SS48I_binop_rm_y<0x40, "vpmulld", mul, v8i32>, VEX_4V;
6496 let Constraints = "$src1 = $dst" in
6497 defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32>;
6350 [(set RC:$dst,
6351 (OpVT (OpNode RC:$src1,
6352 (bitconvert (memop_frag addr:$src2)))))]>, OpSize;
6353 }
6354
6355 let Predicates = [HasAVX] in {
6356 defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, VR128,
6357 memopv2i64, i128mem, 0>, VEX_4V;
6358 defm VPCMPEQQ : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v2i64, VR128,
6359 memopv2i64, i128mem, 0>, VEX_4V;
6360 }
6361 let Predicates = [HasAVX2] in {
6362 defm VPMULLDY : SS48I_binop_rm<0x40, "vpmulld", mul, v8i32, VR256,
6363 memopv4i64, i256mem, 0>, VEX_4V;
6364 defm VPCMPEQQY : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v4i64, VR256,
6365 memopv4i64, i256mem, 0>, VEX_4V;
6366 }
6367
6368 let Constraints = "$src1 = $dst" in {
6369 defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, VR128,
6370 memopv2i64, i128mem>;
6371 defm PCMPEQQ : SS48I_binop_rm<0x29, "pcmpeqq", X86pcmpeq, v2i64, VR128,
6372 memopv2i64, i128mem>;
6373 }
64986374
64996375 /// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate
65006376 multiclass SS41I_binop_rmi_int opc, string OpcodeStr,
67296605 // SSE4.2 - Compare Instructions
67306606 //===----------------------------------------------------------------------===//
67316607
6732 /// SS42I_binop_rm_int - Simple SSE 4.2 binary operator
6733 multiclass SS42I_binop_rm_int opc, string OpcodeStr,
6734 Intrinsic IntId128, bit Is2Addr = 1> {
6735 def rr : SS428I
6736 (ins VR128:$src1, VR128:$src2),
6608 /// SS42I_binop_rm - Simple SSE 4.2 binary operator
6609 multiclass SS42I_binop_rm opc, string OpcodeStr, SDNode OpNode,
6610 ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
6611 X86MemOperand x86memop, bit Is2Addr = 1> {
6612 def rr : SS428I
6613 (ins RC:$src1, RC:$src2),
67376614 !if(Is2Addr,
67386615 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
67396616 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
6740 [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
6617 [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>,
67416618 OpSize;
6742 def rm : SS428I
6743 (ins VR128:$src1, i128mem:$src2),
6619 def rm : SS428I),
6620 (ins RC:$src1, x86memop:$src2),
67446621 !if(Is2Addr,
67456622 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
67466623 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
6747 [(set VR128:$dst,
6748 (IntId128 VR128:$src1, (memopv2i64 addr:$src2)))]>, OpSize;
6749 }
6750
6751 /// SS42I_binop_rm_int - Simple SSE 4.2 binary operator
6752 multiclass SS42I_binop_rm_int_y opc, string OpcodeStr,
6753 Intrinsic IntId256> {
6754 def Yrr : SS428I
6755 (ins VR256:$src1, VR256:$src2),
6756 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6757 [(set VR256:$dst, (IntId256 VR256:$src1, VR256:$src2))]>,
6758 OpSize;
6759 def Yrm : SS428I
6760 (ins VR256:$src1, i256mem:$src2),
6761 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6762 [(set VR256:$dst,
6763 (IntId256 VR256:$src1, (memopv4i64 addr:$src2)))]>, OpSize;
6764 }
6765
6766 let Predicates = [HasAVX] in {
6767 defm VPCMPGTQ : SS42I_binop_rm_int<0x37, "vpcmpgtq", int_x86_sse42_pcmpgtq,
6768 0>, VEX_4V;
6769
6770 def : Pat<(v2i64 (X86pcmpgt VR128:$src1, VR128:$src2)),
6771 (VPCMPGTQrr VR128:$src1, VR128:$src2)>;
6772 def : Pat<(v2i64 (X86pcmpgt VR128:$src1, (memop addr:$src2))),
6773 (VPCMPGTQrm VR128:$src1, addr:$src2)>;
6774 }
6775
6776 let Predicates = [HasAVX2] in {
6777 defm VPCMPGTQ : SS42I_binop_rm_int_y<0x37, "vpcmpgtq", int_x86_avx2_pcmpgt_q>,
6778 VEX_4V;
6779
6780 def : Pat<(v4i64 (X86pcmpgt VR256:$src1, VR256:$src2)),
6781 (VPCMPGTQYrr VR256:$src1, VR256:$src2)>;
6782 def : Pat<(v4i64 (X86pcmpgt VR256:$src1, (memop addr:$src2))),
6783 (VPCMPGTQYrm VR256:$src1, addr:$src2)>;
6784 }
6624 [(set RC:$dst,
6625 (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>, OpSize;
6626 }
6627
6628 let Predicates = [HasAVX] in
6629 defm VPCMPGTQ : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v2i64, VR128,
6630 memopv2i64, i128mem, 0>, VEX_4V;
6631
6632 let Predicates = [HasAVX2] in
6633 defm VPCMPGTQY : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v4i64, VR256,
6634 memopv4i64, i256mem, 0>, VEX_4V;
67856635
67866636 let Constraints = "$src1 = $dst" in
6787 defm PCMPGTQ : SS42I_binop_rm_int<0x37, "pcmpgtq", int_x86_sse42_pcmpgtq>;
6788
6789 let Predicates = [HasSSE42] in {
6790 def : Pat<(v2i64 (X86pcmpgt VR128:$src1, VR128:$src2)),
6791 (PCMPGTQrr VR128:$src1, VR128:$src2)>;
6792 def : Pat<(v2i64 (X86pcmpgt VR128:$src1, (memop addr:$src2))),
6793 (PCMPGTQrm VR128:$src1, addr:$src2)>;
6794 }
6637 defm PCMPGTQ : SS42I_binop_rm<0x37, "pcmpgtq", X86pcmpgt, v2i64, VR128,
6638 memopv2i64, i128mem>;
67956639
67966640 //===----------------------------------------------------------------------===//
67976641 // SSE4.2 - String/text Processing Instructions