llvm.org GIT mirror llvm / 8ffad56
Implement vector-select support for avx256. Refactor the vblend implementation to have tablegen match the instruction by the node type git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@139400 91177308-0d34-0410-b5e6-96231b3b80d8 Nadav Rotem 8 years ago
5 changed file(s) with 101 addition(s) and 30 deletion(s). Raw diff Collapse all Expand all
10181018 setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
10191019 setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
10201020
1021 setOperationAction(ISD::VSELECT, MVT::v4f64, Custom);
1022 setOperationAction(ISD::VSELECT, MVT::v4i64, Custom);
1023 setOperationAction(ISD::VSELECT, MVT::v8i32, Custom);
1024 setOperationAction(ISD::VSELECT, MVT::v8f32, Custom);
1025
10211026 setOperationAction(ISD::ADD, MVT::v4i64, Custom);
10221027 setOperationAction(ISD::ADD, MVT::v8i32, Custom);
10231028 setOperationAction(ISD::ADD, MVT::v16i16, Custom);
87058710 EVT VT = Op1.getValueType();
87068711 switch (VT.getSimpleVT().SimpleTy) {
87078712 default: break;
8713 // SSE4:
87088714 case MVT::v2i64:
87098715 case MVT::v2f64:
8710 return DAG.getNode(X86ISD::BLENDVPD, DL, VT, Ops, array_lengthof(Ops));
87118716 case MVT::v4i32:
87128717 case MVT::v4f32:
8713 return DAG.getNode(X86ISD::BLENDVPS, DL, VT , Ops, array_lengthof(Ops));
87148718 case MVT::v16i8:
8715 return DAG.getNode(X86ISD::PBLENDVB, DL, VT , Ops, array_lengthof(Ops));
8719 case MVT::v8i16:
8720 // AVX:
8721 case MVT::v4i64:
8722 case MVT::v4f64:
8723 case MVT::v8i32:
8724 case MVT::v8f32:
8725 case MVT::v32i8:
8726 case MVT::v16i16:
8727 return DAG.getNode(X86ISD::BLENDV, DL, VT, Ops, array_lengthof(Ops));
87168728 }
87178729
87188730 return SDValue();
99729984 M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
99739985 DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M,
99749986 DAG.getConstant(4, MVT::i32));
9975 R = DAG.getNode(X86ISD::PBLENDVB, dl, VT, R, M, Op);
9987 R = DAG.getNode(X86ISD::BLENDV, dl, VT, R, M, Op);
99769988 // a += a
99779989 Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
99789990
99879999 M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
998810000 DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M,
998910001 DAG.getConstant(2, MVT::i32));
9990 R = DAG.getNode(X86ISD::PBLENDVB, dl, VT, R, M, Op);
10002 R = DAG.getNode(X86ISD::BLENDV, dl, VT, R, M, Op);
999110003 // a += a
999210004 Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
999310005
999410006 // return pblendv(r, r+r, a);
9995 R = DAG.getNode(X86ISD::PBLENDVB, dl, VT,
10007 R = DAG.getNode(X86ISD::BLENDV, dl, VT,
999610008 R, DAG.getNode(ISD::ADD, dl, VT, R, R), Op);
999710009 return R;
999810010 }
1063010642 case X86ISD::PSIGNB: return "X86ISD::PSIGNB";
1063110643 case X86ISD::PSIGNW: return "X86ISD::PSIGNW";
1063210644 case X86ISD::PSIGND: return "X86ISD::PSIGND";
10633 case X86ISD::PBLENDVB: return "X86ISD::PBLENDVB";
10645 case X86ISD::BLENDV: return "X86ISD::BLENDV";
1063410646 case X86ISD::FMAX: return "X86ISD::FMAX";
1063510647 case X86ISD::FMIN: return "X86ISD::FMIN";
1063610648 case X86ISD::FRSQRT: return "X86ISD::FRSQRT";
1336013372 X = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, X);
1336113373 Y = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Y);
1336213374 Mask = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Mask);
13363 Mask = DAG.getNode(X86ISD::PBLENDVB, DL, MVT::v16i8, X, Y, Mask);
13375 Mask = DAG.getNode(X86ISD::BLENDV, DL, MVT::v16i8, X, Y, Mask);
1336413376 return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Mask);
1336513377 }
1336613378 }
174174 /// PSIGNB/W/D - Copy integer sign.
175175 PSIGNB, PSIGNW, PSIGND,
176176
177 /// BLENDVXX family of opcodes
178 PBLENDVB,
179 BLENDVPD,
180 BLENDVPS,
177 /// BLEND family of opcodes
178 BLENDV,
181179
182180 /// FMAX, FMIN - Floating point max and min.
183181 ///
5757 def X86psignd : SDNode<"X86ISD::PSIGND",
5858 SDTypeProfile<1, 2, [SDTCisVT<0, v4i32>, SDTCisSameAs<0,1>,
5959 SDTCisSameAs<0,2>]>>;
60 def X86pblendvb : SDNode<"X86ISD::PBLENDVB",
61 SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
62 SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>>;
63 def X86blendvpd : SDNode<"X86ISD::BLENDVPD",
64 SDTypeProfile<1, 3, [SDTCisVT<0, v2i64>, SDTCisSameAs<0,1>,
65 SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>>;
66 def X86blendvps : SDNode<"X86ISD::BLENDVPS",
67 SDTypeProfile<1, 3, [SDTCisVT<0, v4i32>, SDTCisSameAs<0,1>,
60 def X86blendv : SDNode<"X86ISD::BLENDV",
61 SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>,
6862 SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>>;
6963 def X86pextrb : SDNode<"X86ISD::PEXTRB",
7064 SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
58675867 memopv32i8, int_x86_avx_blendv_ps_256>;
58685868
58695869 let Predicates = [HasAVX] in {
5870 def : Pat<(X86pblendvb VR128:$src1, VR128:$src2, VR128:$mask),
5871 (VPBLENDVBrr VR128:$src1, VR128:$src2, VR128:$mask)>;
5872 def : Pat<(X86blendvpd VR128:$src1, VR128:$src2, VR128:$mask),
5873 (VBLENDVPDrr VR128:$src1, VR128:$src2, VR128:$mask)>;
5874 def : Pat<(X86blendvps VR128:$src1, VR128:$src2, VR128:$mask),
5875 (VBLENDVPSrr VR128:$src1, VR128:$src2, VR128:$mask)>;
5870 def : Pat<(v16i8 (X86blendv (v16i8 VR128:$src1), (v16i8 VR128:$src2),
5871 VR128:$mask)),
5872 (VPBLENDVBrr VR128:$src1, VR128:$src2, VR128:$mask)>;
5873 def : Pat<(v4i32 (X86blendv (v4i32 VR128:$src1), (v4i32 VR128:$src2),
5874 VR128:$mask)),
5875 (VBLENDVPSrr VR128:$src1, VR128:$src2, VR128:$mask)>;
5876 def : Pat<(v4f32 (X86blendv (v4f32 VR128:$src1), (v4f32 VR128:$src2),
5877 VR128:$mask)),
5878 (VBLENDVPSrr VR128:$src1, VR128:$src2, VR128:$mask)>;
5879 def : Pat<(v2i64 (X86blendv (v2i64 VR128:$src1), (v2i64 VR128:$src2),
5880 VR128:$mask)),
5881 (VBLENDVPDrr VR128:$src1, VR128:$src2, VR128:$mask)>;
5882 def : Pat<(v2f64 (X86blendv (v2f64 VR128:$src1), (v2f64 VR128:$src2),
5883 VR128:$mask)),
5884 (VBLENDVPDrr VR128:$src1, VR128:$src2, VR128:$mask)>;
5885
5886
5887 def : Pat<(v8i32 (X86blendv (v8i32 VR256:$src1), (v8i32 VR256:$src2),
5888 VR256:$mask)),
5889 (VBLENDVPSYrr VR256:$src1, VR256:$src2, VR256:$mask)>;
5890 def : Pat<(v8f32 (X86blendv (v8f32 VR256:$src1), (v8f32 VR256:$src2),
5891 VR256:$mask)),
5892 (VBLENDVPSYrr VR256:$src1, VR256:$src2, VR256:$mask)>;
5893
5894
5895 def : Pat<(v4i64 (X86blendv (v4i64 VR256:$src1), (v4i64 VR256:$src2),
5896 VR256:$mask)),
5897 (VBLENDVPDYrr VR256:$src1, VR256:$src2, VR256:$mask)>;
5898 def : Pat<(v4f64 (X86blendv (v4f64 VR256:$src1), (v4f64 VR256:$src2),
5899 VR256:$mask)),
5900 (VBLENDVPDYrr VR256:$src1, VR256:$src2, VR256:$mask)>;
58765901 }
58775902
58785903 /// SS41I_ternary_int - SSE 4.1 ternary operator
59005925 defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>;
59015926
59025927 let Predicates = [HasSSE41] in {
5903 def : Pat<(X86pblendvb VR128:$src1, VR128:$src2, XMM0),
5928 def : Pat<(v16i8 (X86blendv (v16i8 VR128:$src1), (v16i8 VR128:$src2), XMM0)),
59045929 (PBLENDVBrr0 VR128:$src1, VR128:$src2)>;
5905 def : Pat<(X86blendvpd VR128:$src1, VR128:$src2, XMM0),
5930 def : Pat<(v4i32 (X86blendv (v4i32 VR128:$src1), (v4i32 VR128:$src2), XMM0)),
5931 (BLENDVPSrr0 VR128:$src1, VR128:$src2)>;
5932 def : Pat<(v4f32 (X86blendv (v4f32 VR128:$src1), (v4f32 VR128:$src2), XMM0)),
5933 (BLENDVPSrr0 VR128:$src1, VR128:$src2)>;
5934 def : Pat<(v2i64 (X86blendv (v2i64 VR128:$src1), (v2i64 VR128:$src2), XMM0)),
59065935 (BLENDVPDrr0 VR128:$src1, VR128:$src2)>;
5907 def : Pat<(X86blendvps VR128:$src1, VR128:$src2, XMM0),
5908 (BLENDVPSrr0 VR128:$src1, VR128:$src2)>;
5936 def : Pat<(v2f64 (X86blendv (v2f64 VR128:$src1), (v2f64 VR128:$src2), XMM0)),
5937 (BLENDVPDrr0 VR128:$src1, VR128:$src2)>;
59095938 }
59105939
59115940 let Predicates = [HasAVX] in
0 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -promote-elements -mattr=+avx | FileCheck %s
1
2 ; AVX128 tests:
13
24 ;CHECK: vsel_float
35 ;CHECK: vblendvps
4446 }
4547
4648
49 ; AVX256 tests:
50
51
52 ;CHECK: vsel_float
53 ;CHECK: vblendvps
54 ;CHECK: ret
55 define <8 x float> @vsel_float8(<8 x float> %v1, <8 x float> %v2) {
56 %vsel = select <8 x i1> , <8 x float> %v1, <8 x float> %v2
57 ret <8 x float> %vsel
58 }
59
60 ;CHECK: vsel_i32
61 ;CHECK: vblendvps
62 ;CHECK: ret
63 define <8 x i32> @vsel_i328(<8 x i32> %v1, <8 x i32> %v2) {
64 %vsel = select <8 x i1> , <8 x i32> %v1, <8 x i32> %v2
65 ret <8 x i32> %vsel
66 }
67
68 ;CHECK: vsel_double
69 ;CHECK: vblendvpd
70 ;CHECK: ret
71 define <8 x double> @vsel_double8(<8 x double> %v1, <8 x double> %v2) {
72 %vsel = select <8 x i1> , <8 x double> %v1, <8 x double> %v2
73 ret <8 x double> %vsel
74 }
75
76 ;CHECK: vsel_i64
77 ;CHECK: vblendvpd
78 ;CHECK: ret
79 define <8 x i64> @vsel_i648(<8 x i64> %v1, <8 x i64> %v2) {
80 %vsel = select <8 x i1> , <8 x i64> %v1, <8 x i64> %v2
81 ret <8 x i64> %vsel
82 }
83
84