llvm.org GIT mirror llvm / 5363e7a
Use new vector insert half-word and byte instructions when we see insertelement on '8 x i16' and '16 x i8' types. Also extended existing lit testcase to cover these cases. Differential Revision: https://reviews.llvm.org/D34630 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@317613 91177308-0d34-0410-b5e6-96231b3b80d8 Graham Yiu 1 year, 11 months ago
3 changed file(s) with 277 addition(s) and 3 deletion(s). Raw diff Collapse all Expand all
781781 setOperationAction(ISD::SHL, MVT::v1i128, Legal);
782782 setOperationAction(ISD::SRL, MVT::v1i128, Legal);
783783 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
784 }
785
786 if (Subtarget.hasP9Altivec()) {
787 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
788 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
784789 }
785790 }
786791
88418846 SelectionDAG &DAG) const {
88428847 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
88438848 "Should only be called for ISD::INSERT_VECTOR_ELT");
8849
88448850 ConstantSDNode *C = dyn_cast(Op.getOperand(2));
88458851 // We have legal lowering for constant indices but not for variable ones.
8846 if (C)
8847 return Op;
8848 return SDValue();
8852 if (!C)
8853 return SDValue();
8854
8855 EVT VT = Op.getValueType();
8856 SDLoc dl(Op);
8857 SDValue V1 = Op.getOperand(0);
8858 SDValue V2 = Op.getOperand(1);
8859 // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
8860 if (VT == MVT::v8i16 || VT == MVT::v16i8) {
8861 SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
8862 unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
8863 unsigned InsertAtElement = C->getZExtValue();
8864 unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
8865 if (Subtarget.isLittleEndian()) {
8866 InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
8867 }
8868 return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
8869 DAG.getConstant(InsertAtByte, dl, MVT::i32));
8870 }
8871 return Op;
88498872 }
88508873
88518874 SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
25942594 (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 3)), VSFRC))>;
25952595 }
25962596
2597 // Alternate patterns for PPCmtvsrz where the output is v8i16 or v16i8 instead
2598 // of f64
2599 def : Pat<(v8i16 (PPCmtvsrz i32:$A)),
2600 (v8i16 (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64))>;
2601 def : Pat<(v16i8 (PPCmtvsrz i32:$A)),
2602 (v16i8 (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64))>;
2603
25972604 // Patterns for which instructions from ISA 3.0 are a better match
25982605 let Predicates = [IsLittleEndian, HasP9Vector] in {
25992606 def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 0))))),
875875 %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32>
876876 ret <16 x i8> %vecins
877877 }
878
879 ; The following tests try to insert one halfword element into the vector. We
880 ; should always be using the 'vinserth' instruction.
881 define <8 x i16> @insert_halfword_0(<8 x i16> %a, i16 %b) {
882 entry:
883 ; CHECK-LABEL: insert_halfword_0
884 ; CHECK: vinserth 2, 3, 14
885 ; CHECK-BE-LABEL: insert_halfword_0
886 ; CHECK-BE: vinserth 2, 3, 0
887 %vecins = insertelement <8 x i16> %a, i16 %b, i32 0
888 ret <8 x i16> %vecins
889 }
890
891 define <8 x i16> @insert_halfword_1(<8 x i16> %a, i16 %b) {
892 entry:
893 ; CHECK-LABEL: insert_halfword_1
894 ; CHECK: vinserth 2, 3, 12
895 ; CHECK-BE-LABEL: insert_halfword_1
896 ; CHECK-BE: vinserth 2, 3, 2
897 %vecins = insertelement <8 x i16> %a, i16 %b, i32 1
898 ret <8 x i16> %vecins
899 }
900
901 define <8 x i16> @insert_halfword_2(<8 x i16> %a, i16 %b) {
902 entry:
903 ; CHECK-LABEL: insert_halfword_2
904 ; CHECK: vinserth 2, 3, 10
905 ; CHECK-BE-LABEL: insert_halfword_2
906 ; CHECK-BE: vinserth 2, 3, 4
907 %vecins = insertelement <8 x i16> %a, i16 %b, i32 2
908 ret <8 x i16> %vecins
909 }
910
911 define <8 x i16> @insert_halfword_3(<8 x i16> %a, i16 %b) {
912 entry:
913 ; CHECK-LABEL: insert_halfword_3
914 ; CHECK: vinserth 2, 3, 8
915 ; CHECK-BE-LABEL: insert_halfword_3
916 ; CHECK-BE: vinserth 2, 3, 6
917 %vecins = insertelement <8 x i16> %a, i16 %b, i32 3
918 ret <8 x i16> %vecins
919 }
920
921 define <8 x i16> @insert_halfword_4(<8 x i16> %a, i16 %b) {
922 entry:
923 ; CHECK-LABEL: insert_halfword_4
924 ; CHECK: vinserth 2, 3, 6
925 ; CHECK-BE-LABEL: insert_halfword_4
926 ; CHECK-BE: vinserth 2, 3, 8
927 %vecins = insertelement <8 x i16> %a, i16 %b, i32 4
928 ret <8 x i16> %vecins
929 }
930
931 define <8 x i16> @insert_halfword_5(<8 x i16> %a, i16 %b) {
932 entry:
933 ; CHECK-LABEL: insert_halfword_5
934 ; CHECK: vinserth 2, 3, 4
935 ; CHECK-BE-LABEL: insert_halfword_5
936 ; CHECK-BE: vinserth 2, 3, 10
937 %vecins = insertelement <8 x i16> %a, i16 %b, i32 5
938 ret <8 x i16> %vecins
939 }
940
941 define <8 x i16> @insert_halfword_6(<8 x i16> %a, i16 %b) {
942 entry:
943 ; CHECK-LABEL: insert_halfword_6
944 ; CHECK: vinserth 2, 3, 2
945 ; CHECK-BE-LABEL: insert_halfword_6
946 ; CHECK-BE: vinserth 2, 3, 12
947 %vecins = insertelement <8 x i16> %a, i16 %b, i32 6
948 ret <8 x i16> %vecins
949 }
950
951 define <8 x i16> @insert_halfword_7(<8 x i16> %a, i16 %b) {
952 entry:
953 ; CHECK-LABEL: insert_halfword_7
954 ; CHECK: vinserth 2, 3, 0
955 ; CHECK-BE-LABEL: insert_halfword_7
956 ; CHECK-BE: vinserth 2, 3, 14
957 %vecins = insertelement <8 x i16> %a, i16 %b, i32 7
958 ret <8 x i16> %vecins
959 }
960
961 ; The following tests try to insert one byte element into the vector. We
962 ; should always be using the 'vinsertb' instruction.
963 define <16 x i8> @insert_byte_0(<16 x i8> %a, i8 %b) {
964 entry:
965 ; CHECK-LABEL: insert_byte_0
966 ; CHECK: vinsertb 2, 3, 15
967 ; CHECK-BE-LABEL: insert_byte_0
968 ; CHECK-BE: vinsertb 2, 3, 0
969 %vecins = insertelement <16 x i8> %a, i8 %b, i32 0
970 ret <16 x i8> %vecins
971 }
972
973 define <16 x i8> @insert_byte_1(<16 x i8> %a, i8 %b) {
974 entry:
975 ; CHECK-LABEL: insert_byte_1
976 ; CHECK: vinsertb 2, 3, 14
977 ; CHECK-BE-LABEL: insert_byte_1
978 ; CHECK-BE: vinsertb 2, 3, 1
979 %vecins = insertelement <16 x i8> %a, i8 %b, i32 1
980 ret <16 x i8> %vecins
981 }
982
983 define <16 x i8> @insert_byte_2(<16 x i8> %a, i8 %b) {
984 entry:
985 ; CHECK-LABEL: insert_byte_2
986 ; CHECK: vinsertb 2, 3, 13
987 ; CHECK-BE-LABEL: insert_byte_2
988 ; CHECK-BE: vinsertb 2, 3, 2
989 %vecins = insertelement <16 x i8> %a, i8 %b, i32 2
990 ret <16 x i8> %vecins
991 }
992
993 define <16 x i8> @insert_byte_3(<16 x i8> %a, i8 %b) {
994 entry:
995 ; CHECK-LABEL: insert_byte_3
996 ; CHECK: vinsertb 2, 3, 12
997 ; CHECK-BE-LABEL: insert_byte_3
998 ; CHECK-BE: vinsertb 2, 3, 3
999 %vecins = insertelement <16 x i8> %a, i8 %b, i32 3
1000 ret <16 x i8> %vecins
1001 }
1002
1003 define <16 x i8> @insert_byte_4(<16 x i8> %a, i8 %b) {
1004 entry:
1005 ; CHECK-LABEL: insert_byte_4
1006 ; CHECK: vinsertb 2, 3, 11
1007 ; CHECK-BE-LABEL: insert_byte_4
1008 ; CHECK-BE: vinsertb 2, 3, 4
1009 %vecins = insertelement <16 x i8> %a, i8 %b, i32 4
1010 ret <16 x i8> %vecins
1011 }
1012
1013 define <16 x i8> @insert_byte_5(<16 x i8> %a, i8 %b) {
1014 entry:
1015 ; CHECK-LABEL: insert_byte_5
1016 ; CHECK: vinsertb 2, 3, 10
1017 ; CHECK-BE-LABEL: insert_byte_5
1018 ; CHECK-BE: vinsertb 2, 3, 5
1019 %vecins = insertelement <16 x i8> %a, i8 %b, i32 5
1020 ret <16 x i8> %vecins
1021 }
1022
1023 define <16 x i8> @insert_byte_6(<16 x i8> %a, i8 %b) {
1024 entry:
1025 ; CHECK-LABEL: insert_byte_6
1026 ; CHECK: vinsertb 2, 3, 9
1027 ; CHECK-BE-LABEL: insert_byte_6
1028 ; CHECK-BE: vinsertb 2, 3, 6
1029 %vecins = insertelement <16 x i8> %a, i8 %b, i32 6
1030 ret <16 x i8> %vecins
1031 }
1032
1033 define <16 x i8> @insert_byte_7(<16 x i8> %a, i8 %b) {
1034 entry:
1035 ; CHECK-LABEL: insert_byte_7
1036 ; CHECK: vinsertb 2, 3, 8
1037 ; CHECK-BE-LABEL: insert_byte_7
1038 ; CHECK-BE: vinsertb 2, 3, 7
1039 %vecins = insertelement <16 x i8> %a, i8 %b, i32 7
1040 ret <16 x i8> %vecins
1041 }
1042
1043 define <16 x i8> @insert_byte_8(<16 x i8> %a, i8 %b) {
1044 entry:
1045 ; CHECK-LABEL: insert_byte_8
1046 ; CHECK: vinsertb 2, 3, 7
1047 ; CHECK-BE-LABEL: insert_byte_8
1048 ; CHECK-BE: vinsertb 2, 3, 8
1049 %vecins = insertelement <16 x i8> %a, i8 %b, i32 8
1050 ret <16 x i8> %vecins
1051 }
1052
1053 define <16 x i8> @insert_byte_9(<16 x i8> %a, i8 %b) {
1054 entry:
1055 ; CHECK-LABEL: insert_byte_9
1056 ; CHECK: vinsertb 2, 3, 6
1057 ; CHECK-BE-LABEL: insert_byte_9
1058 ; CHECK-BE: vinsertb 2, 3, 9
1059 %vecins = insertelement <16 x i8> %a, i8 %b, i32 9
1060 ret <16 x i8> %vecins
1061 }
1062
1063 define <16 x i8> @insert_byte_10(<16 x i8> %a, i8 %b) {
1064 entry:
1065 ; CHECK-LABEL: insert_byte_10
1066 ; CHECK: vinsertb 2, 3, 5
1067 ; CHECK-BE-LABEL: insert_byte_10
1068 ; CHECK-BE: vinsertb 2, 3, 10
1069 %vecins = insertelement <16 x i8> %a, i8 %b, i32 10
1070 ret <16 x i8> %vecins
1071 }
1072
1073 define <16 x i8> @insert_byte_11(<16 x i8> %a, i8 %b) {
1074 entry:
1075 ; CHECK-LABEL: insert_byte_11
1076 ; CHECK: vinsertb 2, 3, 4
1077 ; CHECK-BE-LABEL: insert_byte_11
1078 ; CHECK-BE: vinsertb 2, 3, 11
1079 %vecins = insertelement <16 x i8> %a, i8 %b, i32 11
1080 ret <16 x i8> %vecins
1081 }
1082
1083 define <16 x i8> @insert_byte_12(<16 x i8> %a, i8 %b) {
1084 entry:
1085 ; CHECK-LABEL: insert_byte_12
1086 ; CHECK: vinsertb 2, 3, 3
1087 ; CHECK-BE-LABEL: insert_byte_12
1088 ; CHECK-BE: vinsertb 2, 3, 12
1089 %vecins = insertelement <16 x i8> %a, i8 %b, i32 12
1090 ret <16 x i8> %vecins
1091 }
1092
1093 define <16 x i8> @insert_byte_13(<16 x i8> %a, i8 %b) {
1094 entry:
1095 ; CHECK-LABEL: insert_byte_13
1096 ; CHECK: vinsertb 2, 3, 2
1097 ; CHECK-BE-LABEL: insert_byte_13
1098 ; CHECK-BE: vinsertb 2, 3, 13
1099 %vecins = insertelement <16 x i8> %a, i8 %b, i32 13
1100 ret <16 x i8> %vecins
1101 }
1102
1103 define <16 x i8> @insert_byte_14(<16 x i8> %a, i8 %b) {
1104 entry:
1105 ; CHECK-LABEL: insert_byte_14
1106 ; CHECK: vinsertb 2, 3, 1
1107 ; CHECK-BE-LABEL: insert_byte_14
1108 ; CHECK-BE: vinsertb 2, 3, 14
1109 %vecins = insertelement <16 x i8> %a, i8 %b, i32 14
1110 ret <16 x i8> %vecins
1111 }
1112
1113 define <16 x i8> @insert_byte_15(<16 x i8> %a, i8 %b) {
1114 entry:
1115 ; CHECK-LABEL: insert_byte_15
1116 ; CHECK: vinsertb 2, 3, 0
1117 ; CHECK-BE-LABEL: insert_byte_15
1118 ; CHECK-BE: vinsertb 2, 3, 15
1119 %vecins = insertelement <16 x i8> %a, i8 %b, i32 15
1120 ret <16 x i8> %vecins
1121 }