llvm.org GIT mirror llvm / ffe3e7d
Add X86-SSE4 codegen support for vector-select. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@139285 91177308-0d34-0410-b5e6-96231b3b80d8 Nadav Rotem 9 years ago
5 changed file(s) with 52 addition(s) and 5 deletion(s). Raw diff Collapse all Expand all
916916 setOperationAction(ISD::SHL, MVT::v4i32, Custom);
917917 setOperationAction(ISD::SHL, MVT::v16i8, Custom);
918918
919 setOperationAction(ISD::VSELECT, MVT::v2f64, Custom);
920 setOperationAction(ISD::VSELECT, MVT::v2i64, Custom);
921 setOperationAction(ISD::VSELECT, MVT::v16i8, Custom);
922 setOperationAction(ISD::VSELECT, MVT::v8i16, Custom);
923 setOperationAction(ISD::VSELECT, MVT::v4i32, Custom);
924 setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
925
919926 // i8 and i16 vectors are custom , because the source register and source
920927 // source memory operand types are not the same width. f32 vectors are
921928 // custom since the immediate controlling the insert encodes additional
86828689 SDValue Ops[] = { Op2, Op1, CC, Cond };
86838690 return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops, array_lengthof(Ops));
86848691 }
8692
8693 SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
8694 SDValue Cond = Op.getOperand(0);
8695 SDValue Op1 = Op.getOperand(1);
8696 SDValue Op2 = Op.getOperand(2);
8697 DebugLoc DL = Op.getDebugLoc();
8698
8699 SDValue Ops[] = {Cond, Op1, Op2};
8700
8701 assert(Op1.getValueType().isVector() && "Op1 must be a vector");
8702 assert(Op2.getValueType().isVector() && "Op2 must be a vector");
8703 assert(Cond.getValueType().isVector() && "Cond must be a vector");
8704 assert(Op1.getValueType() == Op2.getValueType() && "Type mismatch");
8705
8706 switch (Op1.getValueType().getSimpleVT().SimpleTy) {
8707 default: break;
8708 case MVT::v2i64: return DAG.getNode(X86ISD::BLENDVPD, DL, Op1.getValueType(), Ops, array_lengthof(Ops));
8709 case MVT::v2f64: return DAG.getNode(X86ISD::BLENDVPD, DL, Op1.getValueType(), Ops, array_lengthof(Ops));
8710 case MVT::v4i32: return DAG.getNode(X86ISD::BLENDVPS, DL, Op1.getValueType(), Ops, array_lengthof(Ops));
8711 case MVT::v4f32: return DAG.getNode(X86ISD::BLENDVPS, DL, Op1.getValueType(), Ops, array_lengthof(Ops));
8712 case MVT::v16i8: return DAG.getNode(X86ISD::PBLENDVB, DL, Op1.getValueType(), Ops, array_lengthof(Ops));
8713 }
8714
8715 return SDValue();
8716 }
8717
86858718
86868719 // isAndOrOfSingleUseSetCCs - Return true if node is an ISD::AND or
86878720 // ISD::OR of two X86ISD::SETCC nodes each of which has no other use apart
1034910382 case ISD::FGETSIGN: return LowerFGETSIGN(Op, DAG);
1035010383 case ISD::SETCC: return LowerSETCC(Op, DAG);
1035110384 case ISD::SELECT: return LowerSELECT(Op, DAG);
10385 case ISD::VSELECT: return LowerVSELECT(Op, DAG);
1035210386 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
1035310387 case ISD::JumpTable: return LowerJumpTable(Op, DAG);
1035410388 case ISD::VASTART: return LowerVASTART(Op, DAG);
174174 /// PSIGNB/W/D - Copy integer sign.
175175 PSIGNB, PSIGNW, PSIGND,
176176
177 /// PBLENDVB - Variable blend
177 /// BLENDVXX family of opcodes
178178 PBLENDVB,
179 BLENDVPD,
180 BLENDVPS,
179181
180182 /// FMAX, FMIN - Floating point max and min.
181183 ///
808810 SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
809811 SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
810812 SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
813 SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
811814 SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
812815 SDValue LowerMEMSET(SDValue Op, SelectionDAG &DAG) const;
813816 SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
5757 def X86psignd : SDNode<"X86ISD::PSIGND",
5858 SDTypeProfile<1, 2, [SDTCisVT<0, v4i32>, SDTCisSameAs<0,1>,
5959 SDTCisSameAs<0,2>]>>;
60 def X86pblendv : SDNode<"X86ISD::PBLENDVB",
60 def X86pblendvb : SDNode<"X86ISD::PBLENDVB",
6161 SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
62 SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>>;
63 def X86blendvpd : SDNode<"X86ISD::BLENDVPD",
64 SDTypeProfile<1, 3, [SDTCisVT<0, v2i64>, SDTCisSameAs<0,1>,
65 SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>>;
66 def X86blendvps : SDNode<"X86ISD::BLENDVPS",
67 SDTypeProfile<1, 3, [SDTCisVT<0, v4i32>, SDTCisSameAs<0,1>,
6268 SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>>;
6369 def X86pextrb : SDNode<"X86ISD::PEXTRB",
6470 SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
58425842 defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, i256mem,
58435843 memopv32i8, int_x86_avx_blendv_ps_256>;
58445844
5845 def : Pat<(X86pblendv VR128:$src1, VR128:$src2, VR128:$src3),
5845 def : Pat<(X86pblendvb VR128:$src1, VR128:$src2, VR128:$src3),
58465846 (VPBLENDVBrr VR128:$src1, VR128:$src2, VR128:$src3)>,
58475847 Requires<[HasAVX]>;
58485848
58705870 defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>;
58715871 defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>;
58725872
5873 def : Pat<(X86pblendv VR128:$src1, VR128:$src2, XMM0),
5873 def : Pat<(X86pblendvb VR128:$src1, VR128:$src2, XMM0),
58745874 (PBLENDVBrr0 VR128:$src1, VR128:$src2)>, Requires<[HasSSE41]>;
5875 def : Pat<(X86blendvpd XMM0, VR128:$src1, VR128:$src2),
5876 (BLENDVPDrr0 VR128:$src1, VR128:$src2)>, Requires<[HasSSE41]>;
5877 def : Pat<(X86blendvps XMM0, VR128:$src1, VR128:$src2),
5878 (BLENDVPSrr0 VR128:$src1, VR128:$src2)>, Requires<[HasSSE41]>;
58755879
58765880 let Predicates = [HasAVX] in
58775881 def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
22
33 ; This test is the poster-child for integer-element-promotion.
44 ; Until this feature is complete, we mark this test as expected to fail.
5 ; XFAIL: *
65 ; CHECK: vector_code
6 ; CHECK: blend
77 ; CHECK: ret
88 define <4 x float> @vector_code(<4 x i64> %A, <4 x i64> %B, <4 x float> %R0, <4 x float> %R1 ) {
99 %C = icmp eq <4 x i64> %A, %B