llvm.org GIT mirror llvm / 49cfd1f
[AVX-512] Improve lowering of sign_extend of v4i1 to v4i32 and v2i1 to v2i64 when avx512vl is available, but not avx512dq. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@291746 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 3 years ago
2 changed file(s) with 73 addition(s) and 154 deletion(s). Raw diff Collapse all Expand all
12791279 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
12801280 setOperationAction(ISD::ZERO_EXTEND, MVT::v4i32, Custom);
12811281 setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Custom);
1282 setOperationAction(ISD::SIGN_EXTEND, MVT::v4i32, Custom);
1283 setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Custom);
12821284
12831285 // FIXME. This commands are available on SSE/AVX2, add relevant patterns.
12841286 setLoadExtAction(ISD::EXTLOAD, MVT::v8i32, MVT::v8i8, Legal);
13051307 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i8, Custom);
13061308 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i16, Custom);
13071309 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom);
1308 if (Subtarget.hasDQI()) {
1309 setOperationAction(ISD::SIGN_EXTEND, MVT::v4i32, Custom);
1310 setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Custom);
1311 }
1310
13121311 for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
13131312 setOperationAction(ISD::FFLOOR, VT, Legal);
13141313 setOperationAction(ISD::FCEIL, VT, Legal);
1739117390
1739217391 unsigned NumElts = VT.getVectorNumElements();
1739317392
17394 if (NumElts != 8 && NumElts != 16 && !Subtarget.hasBWI())
17395 return SDValue();
17396
17397 if (VT.is512BitVector() && InVTElt != MVT::i1) {
17393 if (VT.is512BitVector() && InVTElt != MVT::i1 &&
17394 (NumElts == 8 || NumElts == 16 || Subtarget.hasBWI())) {
1739817395 if (In.getOpcode() == X86ISD::VSEXT || In.getOpcode() == X86ISD::VZEXT)
1739917396 return DAG.getNode(In.getOpcode(), dl, VT, In.getOperand(0));
1740017397 return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
1740117398 }
1740217399
17403 assert (InVTElt == MVT::i1 && "Unexpected vector type");
17404 MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(512/NumElts), NumElts);
17400 if (InVTElt != MVT::i1)
17401 return SDValue();
17402
17403 MVT ExtVT = VT;
17404 if (!VT.is512BitVector() && !Subtarget.hasVLX())
17405 ExtVT = MVT::getVectorVT(MVT::getIntegerVT(512/NumElts), NumElts);
17406
1740517407 SDValue V;
1740617408 if (Subtarget.hasDQI()) {
1740717409 V = DAG.getNode(X86ISD::VSEXT, dl, ExtVT, In);
1741017412 SDValue NegOne = getOnesVector(ExtVT, Subtarget, DAG, dl);
1741117413 SDValue Zero = getZeroVector(ExtVT, Subtarget, DAG, dl);
1741217414 V = DAG.getNode(ISD::VSELECT, dl, ExtVT, In, NegOne, Zero);
17413 if (VT.is512BitVector())
17415 if (ExtVT == VT)
1741417416 return V;
1741517417 }
1741617418
875875 }
876876
877877 define <16 x double> @sitofp_16i1_double(<16 x double> %a) {
878 ; NODQ-LABEL: sitofp_16i1_double:
879 ; NODQ: ## BB#0:
880 ; NODQ-NEXT: vpxord %zmm2, %zmm2, %zmm2
881 ; NODQ-NEXT: vcmpltpd %zmm1, %zmm2, %k1
882 ; NODQ-NEXT: vcmpltpd %zmm0, %zmm2, %k2
883 ; NODQ-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
884 ; NODQ-NEXT: vpmovqd %zmm0, %ymm0
885 ; NODQ-NEXT: vcvtdq2pd %ymm0, %zmm0
886 ; NODQ-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
887 ; NODQ-NEXT: vpmovqd %zmm1, %ymm1
888 ; NODQ-NEXT: vcvtdq2pd %ymm1, %zmm1
889 ; NODQ-NEXT: retq
878 ; NOVLDQ-LABEL: sitofp_16i1_double:
879 ; NOVLDQ: ## BB#0:
880 ; NOVLDQ-NEXT: vpxord %zmm2, %zmm2, %zmm2
881 ; NOVLDQ-NEXT: vcmpltpd %zmm1, %zmm2, %k1
882 ; NOVLDQ-NEXT: vcmpltpd %zmm0, %zmm2, %k2
883 ; NOVLDQ-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
884 ; NOVLDQ-NEXT: vpmovqd %zmm0, %ymm0
885 ; NOVLDQ-NEXT: vcvtdq2pd %ymm0, %zmm0
886 ; NOVLDQ-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
887 ; NOVLDQ-NEXT: vpmovqd %zmm1, %ymm1
888 ; NOVLDQ-NEXT: vcvtdq2pd %ymm1, %zmm1
889 ; NOVLDQ-NEXT: retq
890890 ;
891891 ; VLDQ-LABEL: sitofp_16i1_double:
892892 ; VLDQ: ## BB#0:
898898 ; VLDQ-NEXT: vpmovm2d %k0, %ymm1
899899 ; VLDQ-NEXT: vcvtdq2pd %ymm1, %zmm1
900900 ; VLDQ-NEXT: retq
901 ;
902 ; VLNODQ-LABEL: sitofp_16i1_double:
903 ; VLNODQ: ## BB#0:
904 ; VLNODQ-NEXT: vpxord %zmm2, %zmm2, %zmm2
905 ; VLNODQ-NEXT: vcmpltpd %zmm1, %zmm2, %k1
906 ; VLNODQ-NEXT: vcmpltpd %zmm0, %zmm2, %k2
907 ; VLNODQ-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
908 ; VLNODQ-NEXT: vmovdqa32 %ymm1, %ymm0 {%k2} {z}
909 ; VLNODQ-NEXT: vcvtdq2pd %ymm0, %zmm0
910 ; VLNODQ-NEXT: vmovdqa32 %ymm1, %ymm1 {%k1} {z}
911 ; VLNODQ-NEXT: vcvtdq2pd %ymm1, %zmm1
912 ; VLNODQ-NEXT: retq
901913 ;
902914 ; AVX512DQ-LABEL: sitofp_16i1_double:
903915 ; AVX512DQ: ## BB#0:
917929 }
918930
919931 define <8 x double> @sitofp_8i1_double(<8 x double> %a) {
920 ; NODQ-LABEL: sitofp_8i1_double:
921 ; NODQ: ## BB#0:
922 ; NODQ-NEXT: vpxord %zmm1, %zmm1, %zmm1
923 ; NODQ-NEXT: vcmpltpd %zmm0, %zmm1, %k1
924 ; NODQ-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
925 ; NODQ-NEXT: vpmovqd %zmm0, %ymm0
926 ; NODQ-NEXT: vcvtdq2pd %ymm0, %zmm0
927 ; NODQ-NEXT: retq
932 ; NOVLDQ-LABEL: sitofp_8i1_double:
933 ; NOVLDQ: ## BB#0:
934 ; NOVLDQ-NEXT: vpxord %zmm1, %zmm1, %zmm1
935 ; NOVLDQ-NEXT: vcmpltpd %zmm0, %zmm1, %k1
936 ; NOVLDQ-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
937 ; NOVLDQ-NEXT: vpmovqd %zmm0, %ymm0
938 ; NOVLDQ-NEXT: vcvtdq2pd %ymm0, %zmm0
939 ; NOVLDQ-NEXT: retq
928940 ;
929941 ; VLDQ-LABEL: sitofp_8i1_double:
930942 ; VLDQ: ## BB#0:
933945 ; VLDQ-NEXT: vpmovm2d %k0, %ymm0
934946 ; VLDQ-NEXT: vcvtdq2pd %ymm0, %zmm0
935947 ; VLDQ-NEXT: retq
948 ;
949 ; VLNODQ-LABEL: sitofp_8i1_double:
950 ; VLNODQ: ## BB#0:
951 ; VLNODQ-NEXT: vpxord %zmm1, %zmm1, %zmm1
952 ; VLNODQ-NEXT: vcmpltpd %zmm0, %zmm1, %k1
953 ; VLNODQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
954 ; VLNODQ-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
955 ; VLNODQ-NEXT: vcvtdq2pd %ymm0, %zmm0
956 ; VLNODQ-NEXT: retq
936957 ;
937958 ; AVX512DQ-LABEL: sitofp_8i1_double:
938959 ; AVX512DQ: ## BB#0:
970991 ; VLNODQ: ## BB#0:
971992 ; VLNODQ-NEXT: vpxor %ymm1, %ymm1, %ymm1
972993 ; VLNODQ-NEXT: vcmpltps %ymm0, %ymm1, %k1
973 ; VLNODQ-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
974 ; VLNODQ-NEXT: vpmovqd %zmm0, %ymm0
994 ; VLNODQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
995 ; VLNODQ-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
975996 ; VLNODQ-NEXT: vcvtdq2ps %ymm0, %ymm0
976997 ; VLNODQ-NEXT: retq
977998 ;
10081029 ; VLNODQ-LABEL: sitofp_4i1_float:
10091030 ; VLNODQ: ## BB#0:
10101031 ; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
1011 ; VLNODQ-NEXT: vcmpltps %xmm0, %xmm1, %k2
1012 ; VLNODQ-NEXT: kshiftlw $12, %k2, %k0
1013 ; VLNODQ-NEXT: kshiftrw $15, %k0, %k0
1014 ; VLNODQ-NEXT: kshiftlw $13, %k2, %k1
1015 ; VLNODQ-NEXT: kshiftrw $15, %k1, %k1
1016 ; VLNODQ-NEXT: kshiftlw $15, %k2, %k3
1017 ; VLNODQ-NEXT: kshiftrw $15, %k3, %k3
1018 ; VLNODQ-NEXT: kshiftlw $14, %k2, %k2
1019 ; VLNODQ-NEXT: kshiftrw $15, %k2, %k2
1020 ; VLNODQ-NEXT: kmovw %k2, %eax
1021 ; VLNODQ-NEXT: andl $1, %eax
1022 ; VLNODQ-NEXT: xorl %ecx, %ecx
1023 ; VLNODQ-NEXT: testb %al, %al
1024 ; VLNODQ-NEXT: movl $-1, %eax
1025 ; VLNODQ-NEXT: movl $0, %edx
1026 ; VLNODQ-NEXT: cmovnel %eax, %edx
1027 ; VLNODQ-NEXT: kmovw %k3, %esi
1028 ; VLNODQ-NEXT: andl $1, %esi
1029 ; VLNODQ-NEXT: testb %sil, %sil
1030 ; VLNODQ-NEXT: movl $0, %esi
1031 ; VLNODQ-NEXT: cmovnel %eax, %esi
1032 ; VLNODQ-NEXT: vmovd %esi, %xmm0
1033 ; VLNODQ-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0
1034 ; VLNODQ-NEXT: kmovw %k1, %edx
1035 ; VLNODQ-NEXT: andl $1, %edx
1036 ; VLNODQ-NEXT: testb %dl, %dl
1037 ; VLNODQ-NEXT: movl $0, %edx
1038 ; VLNODQ-NEXT: cmovnel %eax, %edx
1039 ; VLNODQ-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0
1040 ; VLNODQ-NEXT: kmovw %k0, %edx
1041 ; VLNODQ-NEXT: andl $1, %edx
1042 ; VLNODQ-NEXT: testb %dl, %dl
1043 ; VLNODQ-NEXT: cmovnel %eax, %ecx
1044 ; VLNODQ-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0
1032 ; VLNODQ-NEXT: vcmpltps %xmm0, %xmm1, %k1
1033 ; VLNODQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
1034 ; VLNODQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
10451035 ; VLNODQ-NEXT: vcvtdq2ps %xmm0, %xmm0
10461036 ; VLNODQ-NEXT: retq
10471037 %cmpres = fcmp ogt <4 x float> %a, zeroinitializer
10691059 ; VLNODQ-LABEL: sitofp_4i1_double:
10701060 ; VLNODQ: ## BB#0:
10711061 ; VLNODQ-NEXT: vpxor %ymm1, %ymm1, %ymm1
1072 ; VLNODQ-NEXT: vcmpltpd %ymm0, %ymm1, %k2
1073 ; VLNODQ-NEXT: kshiftlw $12, %k2, %k0
1074 ; VLNODQ-NEXT: kshiftrw $15, %k0, %k0
1075 ; VLNODQ-NEXT: kshiftlw $13, %k2, %k1
1076 ; VLNODQ-NEXT: kshiftrw $15, %k1, %k1
1077 ; VLNODQ-NEXT: kshiftlw $15, %k2, %k3
1078 ; VLNODQ-NEXT: kshiftrw $15, %k3, %k3
1079 ; VLNODQ-NEXT: kshiftlw $14, %k2, %k2
1080 ; VLNODQ-NEXT: kshiftrw $15, %k2, %k2
1081 ; VLNODQ-NEXT: kmovw %k2, %eax
1082 ; VLNODQ-NEXT: andl $1, %eax
1083 ; VLNODQ-NEXT: xorl %ecx, %ecx
1084 ; VLNODQ-NEXT: testb %al, %al
1085 ; VLNODQ-NEXT: movl $-1, %eax
1086 ; VLNODQ-NEXT: movl $0, %edx
1087 ; VLNODQ-NEXT: cmovnel %eax, %edx
1088 ; VLNODQ-NEXT: kmovw %k3, %esi
1089 ; VLNODQ-NEXT: andl $1, %esi
1090 ; VLNODQ-NEXT: testb %sil, %sil
1091 ; VLNODQ-NEXT: movl $0, %esi
1092 ; VLNODQ-NEXT: cmovnel %eax, %esi
1093 ; VLNODQ-NEXT: vmovd %esi, %xmm0
1094 ; VLNODQ-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0
1095 ; VLNODQ-NEXT: kmovw %k1, %edx
1096 ; VLNODQ-NEXT: andl $1, %edx
1097 ; VLNODQ-NEXT: testb %dl, %dl
1098 ; VLNODQ-NEXT: movl $0, %edx
1099 ; VLNODQ-NEXT: cmovnel %eax, %edx
1100 ; VLNODQ-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0
1101 ; VLNODQ-NEXT: kmovw %k0, %edx
1102 ; VLNODQ-NEXT: andl $1, %edx
1103 ; VLNODQ-NEXT: testb %dl, %dl
1104 ; VLNODQ-NEXT: cmovnel %eax, %ecx
1105 ; VLNODQ-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0
1062 ; VLNODQ-NEXT: vcmpltpd %ymm0, %ymm1, %k1
1063 ; VLNODQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
1064 ; VLNODQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
11061065 ; VLNODQ-NEXT: vcvtdq2pd %xmm0, %ymm0
11071066 ; VLNODQ-NEXT: retq
11081067 %cmpres = fcmp ogt <4 x double> %a, zeroinitializer
11301089 ; VLNODQ-LABEL: sitofp_2i1_float:
11311090 ; VLNODQ: ## BB#0:
11321091 ; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
1133 ; VLNODQ-NEXT: vcmpltps %xmm0, %xmm1, %k2
1134 ; VLNODQ-NEXT: kshiftlw $12, %k2, %k0
1135 ; VLNODQ-NEXT: kshiftrw $15, %k0, %k0
1136 ; VLNODQ-NEXT: kshiftlw $13, %k2, %k1
1137 ; VLNODQ-NEXT: kshiftrw $15, %k1, %k1
1138 ; VLNODQ-NEXT: kshiftlw $15, %k2, %k3
1139 ; VLNODQ-NEXT: kshiftrw $15, %k3, %k3
1140 ; VLNODQ-NEXT: kshiftlw $14, %k2, %k2
1141 ; VLNODQ-NEXT: kshiftrw $15, %k2, %k2
1142 ; VLNODQ-NEXT: kmovw %k2, %eax
1143 ; VLNODQ-NEXT: andl $1, %eax
1144 ; VLNODQ-NEXT: xorl %ecx, %ecx
1145 ; VLNODQ-NEXT: testb %al, %al
1146 ; VLNODQ-NEXT: movl $-1, %eax
1147 ; VLNODQ-NEXT: movl $0, %edx
1148 ; VLNODQ-NEXT: cmovnel %eax, %edx
1149 ; VLNODQ-NEXT: kmovw %k3, %esi
1150 ; VLNODQ-NEXT: andl $1, %esi
1151 ; VLNODQ-NEXT: testb %sil, %sil
1152 ; VLNODQ-NEXT: movl $0, %esi
1153 ; VLNODQ-NEXT: cmovnel %eax, %esi
1154 ; VLNODQ-NEXT: vmovd %esi, %xmm0
1155 ; VLNODQ-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0
1156 ; VLNODQ-NEXT: kmovw %k1, %edx
1157 ; VLNODQ-NEXT: andl $1, %edx
1158 ; VLNODQ-NEXT: testb %dl, %dl
1159 ; VLNODQ-NEXT: movl $0, %edx
1160 ; VLNODQ-NEXT: cmovnel %eax, %edx
1161 ; VLNODQ-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0
1162 ; VLNODQ-NEXT: kmovw %k0, %edx
1163 ; VLNODQ-NEXT: andl $1, %edx
1164 ; VLNODQ-NEXT: testb %dl, %dl
1165 ; VLNODQ-NEXT: cmovnel %eax, %ecx
1166 ; VLNODQ-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0
1092 ; VLNODQ-NEXT: vcmpltps %xmm0, %xmm1, %k1
1093 ; VLNODQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
1094 ; VLNODQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
11671095 ; VLNODQ-NEXT: vcvtdq2ps %xmm0, %xmm0
11681096 ; VLNODQ-NEXT: retq
11691097 %cmpres = fcmp ogt <2 x float> %a, zeroinitializer
11911119 ; VLNODQ-LABEL: sitofp_2i1_double:
11921120 ; VLNODQ: ## BB#0:
11931121 ; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
1194 ; VLNODQ-NEXT: vcmpltpd %xmm0, %xmm1, %k0
1195 ; VLNODQ-NEXT: kshiftlw $15, %k0, %k1
1196 ; VLNODQ-NEXT: kshiftrw $15, %k1, %k1
1197 ; VLNODQ-NEXT: kshiftlw $14, %k0, %k0
1198 ; VLNODQ-NEXT: kshiftrw $15, %k0, %k0
1199 ; VLNODQ-NEXT: kmovw %k0, %eax
1200 ; VLNODQ-NEXT: andl $1, %eax
1201 ; VLNODQ-NEXT: xorl %ecx, %ecx
1202 ; VLNODQ-NEXT: testb %al, %al
1203 ; VLNODQ-NEXT: movl $-1, %eax
1204 ; VLNODQ-NEXT: movl $0, %edx
1205 ; VLNODQ-NEXT: cmovnel %eax, %edx
1206 ; VLNODQ-NEXT: vcvtsi2sdl %edx, %xmm2, %xmm0
1207 ; VLNODQ-NEXT: kmovw %k1, %edx
1208 ; VLNODQ-NEXT: andl $1, %edx
1209 ; VLNODQ-NEXT: testb %dl, %dl
1210 ; VLNODQ-NEXT: cmovnel %eax, %ecx
1211 ; VLNODQ-NEXT: vcvtsi2sdl %ecx, %xmm2, %xmm1
1212 ; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1122 ; VLNODQ-NEXT: vcmpltpd %xmm0, %xmm1, %k1
1123 ; VLNODQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
1124 ; VLNODQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
1125 ; VLNODQ-NEXT: vpextrq $1, %xmm0, %rax
1126 ; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm1
1127 ; VLNODQ-NEXT: vmovq %xmm0, %rax
1128 ; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm0
1129 ; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
12131130 ; VLNODQ-NEXT: retq
12141131 %cmpres = fcmp ogt <2 x double> %a, zeroinitializer
12151132 %1 = sitofp <2 x i1> %cmpres to <2 x double>