llvm.org GIT mirror llvm / b55453a
Merging r276648: ------------------------------------------------------------------------ r276648 | delena | 2016-07-25 09:51:00 -0700 (Mon, 25 Jul 2016) | 6 lines AVX-512: Fixed [US]INT_TO_FP selection for i1 vectors. It failed with assertion before this patch. Differential Revision: https://reviews.llvm.org/D22735 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_39@277508 91177308-0d34-0410-b5e6-96231b3b80d8 Hans Wennborg 3 years ago
2 changed file(s) with 368 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
11861186 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
11871187 setOperationAction(ISD::UINT_TO_FP, MVT::v16i8, Custom);
11881188 setOperationAction(ISD::UINT_TO_FP, MVT::v16i16, Custom);
1189 setOperationAction(ISD::SINT_TO_FP, MVT::v16i1, Custom);
1190 setOperationAction(ISD::UINT_TO_FP, MVT::v16i1, Custom);
1191 setOperationAction(ISD::SINT_TO_FP, MVT::v8i1, Custom);
1192 setOperationAction(ISD::UINT_TO_FP, MVT::v8i1, Custom);
1193 setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom);
1194 setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom);
1195 setOperationAction(ISD::SINT_TO_FP, MVT::v2i1, Custom);
1196 setOperationAction(ISD::UINT_TO_FP, MVT::v2i1, Custom);
11891197 setOperationAction(ISD::FP_ROUND, MVT::v8f32, Legal);
11901198 setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal);
11911199
1337213380 MVT VT = Op.getSimpleValueType();
1337313381 SDLoc dl(Op);
1337413382
13383 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1337513384 if (SrcVT.isVector()) {
1337613385 if (SrcVT == MVT::v2i32 && VT == MVT::v2f64) {
1337713386 return DAG.getNode(X86ISD::CVTDQ2PD, dl, VT,
1337913388 DAG.getUNDEF(SrcVT)));
1338013389 }
1338113390 if (SrcVT.getVectorElementType() == MVT::i1) {
13391 if (SrcVT == MVT::v2i1 && TLI.isTypeLegal(SrcVT))
13392 return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
13393 DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v2i64, Src));
1338213394 MVT IntegerVT = MVT::getVectorVT(MVT::i32, SrcVT.getVectorNumElements());
1338313395 return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
1338413396 DAG.getNode(ISD::SIGN_EXTEND, dl, IntegerVT, Src));
1369213704 SDValue N0 = Op.getOperand(0);
1369313705 MVT SVT = N0.getSimpleValueType();
1369413706 SDLoc dl(Op);
13707
13708 if (SVT.getVectorElementType() == MVT::i1) {
13709 if (SVT == MVT::v2i1)
13710 return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(),
13711 DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64, N0));
13712 MVT IntegerVT = MVT::getVectorVT(MVT::i32, SVT.getVectorNumElements());
13713 return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(),
13714 DAG.getNode(ISD::ZERO_EXTEND, dl, IntegerVT, N0));
13715 }
1369513716
1369613717 switch (SVT.SimpleTy) {
1369713718 default:
743743 ret <8 x double> %1
744744 }
745745
746 define <16 x double> @sitofp_16i1_double(<16 x double> %a) {
747 ; KNL-LABEL: sitofp_16i1_double:
748 ; KNL: ## BB#0:
749 ; KNL-NEXT: vpxord %zmm2, %zmm2, %zmm2
750 ; KNL-NEXT: vcmpltpd %zmm1, %zmm2, %k1
751 ; KNL-NEXT: vcmpltpd %zmm0, %zmm2, %k2
752 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
753 ; KNL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k2} {z}
754 ; KNL-NEXT: vpmovqd %zmm0, %ymm0
755 ; KNL-NEXT: vcvtdq2pd %ymm0, %zmm0
756 ; KNL-NEXT: vmovdqa64 %zmm1, %zmm1 {%k1} {z}
757 ; KNL-NEXT: vpmovqd %zmm1, %ymm1
758 ; KNL-NEXT: vcvtdq2pd %ymm1, %zmm1
759 ; KNL-NEXT: retq
760 ;
761 ; SKX-LABEL: sitofp_16i1_double:
762 ; SKX: ## BB#0:
763 ; SKX-NEXT: vpxord %zmm2, %zmm2, %zmm2
764 ; SKX-NEXT: vcmpltpd %zmm1, %zmm2, %k0
765 ; SKX-NEXT: vcmpltpd %zmm0, %zmm2, %k1
766 ; SKX-NEXT: vpmovm2d %k1, %ymm0
767 ; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0
768 ; SKX-NEXT: vpmovm2d %k0, %ymm1
769 ; SKX-NEXT: vcvtdq2pd %ymm1, %zmm1
770 ; SKX-NEXT: retq
771 %cmpres = fcmp ogt <16 x double> %a, zeroinitializer
772 %1 = sitofp <16 x i1> %cmpres to <16 x double>
773 ret <16 x double> %1
774 }
775
746776 define <8 x double> @sitofp_8i1_double(<8 x double> %a) {
747777 ; KNL-LABEL: sitofp_8i1_double:
748778 ; KNL: ## BB#0:
766796 ret <8 x double> %1
767797 }
768798
799 define <8 x float> @sitofp_8i1_float(<8 x float> %a) {
800 ; KNL-LABEL: sitofp_8i1_float:
801 ; KNL: ## BB#0:
802 ; KNL-NEXT: ## kill: %YMM0 %YMM0 %ZMM0
803 ; KNL-NEXT: vxorps %ymm1, %ymm1, %ymm1
804 ; KNL-NEXT: vcmpltps %zmm0, %zmm1, %k1
805 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
806 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
807 ; KNL-NEXT: vpmovqd %zmm0, %ymm0
808 ; KNL-NEXT: vcvtdq2ps %ymm0, %ymm0
809 ; KNL-NEXT: retq
810 ;
811 ; SKX-LABEL: sitofp_8i1_float:
812 ; SKX: ## BB#0:
813 ; SKX-NEXT: vpxord %ymm1, %ymm1, %ymm1
814 ; SKX-NEXT: vcmpltps %ymm0, %ymm1, %k0
815 ; SKX-NEXT: vpmovm2d %k0, %ymm0
816 ; SKX-NEXT: vcvtdq2ps %ymm0, %ymm0
817 ; SKX-NEXT: retq
818 %cmpres = fcmp ogt <8 x float> %a, zeroinitializer
819 %1 = sitofp <8 x i1> %cmpres to <8 x float>
820 ret <8 x float> %1
821 }
822
823 define <4 x float> @sitofp_4i1_float(<4 x float> %a) {
824 ; KNL-LABEL: sitofp_4i1_float:
825 ; KNL: ## BB#0:
826 ; KNL-NEXT: vxorps %xmm1, %xmm1, %xmm1
827 ; KNL-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
828 ; KNL-NEXT: vcvtdq2ps %xmm0, %xmm0
829 ; KNL-NEXT: retq
830 ;
831 ; SKX-LABEL: sitofp_4i1_float:
832 ; SKX: ## BB#0:
833 ; SKX-NEXT: vpxord %xmm1, %xmm1, %xmm1
834 ; SKX-NEXT: vcmpltps %xmm0, %xmm1, %k0
835 ; SKX-NEXT: vpmovm2d %k0, %xmm0
836 ; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0
837 ; SKX-NEXT: retq
838 %cmpres = fcmp ogt <4 x float> %a, zeroinitializer
839 %1 = sitofp <4 x i1> %cmpres to <4 x float>
840 ret <4 x float> %1
841 }
842
843 define <4 x double> @sitofp_4i1_double(<4 x double> %a) {
844 ; KNL-LABEL: sitofp_4i1_double:
845 ; KNL: ## BB#0:
846 ; KNL-NEXT: vxorpd %ymm1, %ymm1, %ymm1
847 ; KNL-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
848 ; KNL-NEXT: vpmovqd %zmm0, %ymm0
849 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
850 ; KNL-NEXT: vpsrad $31, %xmm0, %xmm0
851 ; KNL-NEXT: vcvtdq2pd %xmm0, %ymm0
852 ; KNL-NEXT: retq
853 ;
854 ; SKX-LABEL: sitofp_4i1_double:
855 ; SKX: ## BB#0:
856 ; SKX-NEXT: vpxord %ymm1, %ymm1, %ymm1
857 ; SKX-NEXT: vcmpltpd %ymm0, %ymm1, %k0
858 ; SKX-NEXT: vpmovm2d %k0, %xmm0
859 ; SKX-NEXT: vcvtdq2pd %xmm0, %ymm0
860 ; SKX-NEXT: retq
861 %cmpres = fcmp ogt <4 x double> %a, zeroinitializer
862 %1 = sitofp <4 x i1> %cmpres to <4 x double>
863 ret <4 x double> %1
864 }
865
866 define <2 x float> @sitofp_2i1_float(<2 x float> %a) {
867 ; KNL-LABEL: sitofp_2i1_float:
868 ; KNL: ## BB#0:
869 ; KNL-NEXT: vxorps %xmm1, %xmm1, %xmm1
870 ; KNL-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
871 ; KNL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
872 ; KNL-NEXT: vpsllq $32, %xmm0, %xmm0
873 ; KNL-NEXT: vpsrad $31, %xmm0, %xmm1
874 ; KNL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
875 ; KNL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
876 ; KNL-NEXT: vpextrq $1, %xmm0, %rax
877 ; KNL-NEXT: xorl %ecx, %ecx
878 ; KNL-NEXT: testb $1, %al
879 ; KNL-NEXT: movl $-1, %eax
880 ; KNL-NEXT: movl $0, %edx
881 ; KNL-NEXT: cmovnel %eax, %edx
882 ; KNL-NEXT: vcvtsi2ssl %edx, %xmm0, %xmm1
883 ; KNL-NEXT: vmovq %xmm0, %rdx
884 ; KNL-NEXT: testb $1, %dl
885 ; KNL-NEXT: cmovnel %eax, %ecx
886 ; KNL-NEXT: vcvtsi2ssl %ecx, %xmm0, %xmm0
887 ; KNL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
888 ; KNL-NEXT: retq
889 ;
890 ; SKX-LABEL: sitofp_2i1_float:
891 ; SKX: ## BB#0:
892 ; SKX-NEXT: vpxord %xmm1, %xmm1, %xmm1
893 ; SKX-NEXT: vcmpltps %xmm0, %xmm1, %k0
894 ; SKX-NEXT: vpmovm2d %k0, %xmm0
895 ; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0
896 ; SKX-NEXT: retq
897 %cmpres = fcmp ogt <2 x float> %a, zeroinitializer
898 %1 = sitofp <2 x i1> %cmpres to <2 x float>
899 ret <2 x float> %1
900 }
901
902 define <2 x double> @sitofp_2i1_double(<2 x double> %a) {
903 ; KNL-LABEL: sitofp_2i1_double:
904 ; KNL: ## BB#0:
905 ; KNL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
906 ; KNL-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
907 ; KNL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
908 ; KNL-NEXT: vcvtdq2pd %xmm0, %xmm0
909 ; KNL-NEXT: retq
910 ;
911 ; SKX-LABEL: sitofp_2i1_double:
912 ; SKX: ## BB#0:
913 ; SKX-NEXT: vpxord %xmm1, %xmm1, %xmm1
914 ; SKX-NEXT: vcmpltpd %xmm0, %xmm1, %k0
915 ; SKX-NEXT: vpmovm2q %k0, %xmm0
916 ; SKX-NEXT: vcvtqq2pd %xmm0, %xmm0
917 ; SKX-NEXT: retq
918 %cmpres = fcmp ogt <2 x double> %a, zeroinitializer
919 %1 = sitofp <2 x i1> %cmpres to <2 x double>
920 ret <2 x double> %1
921 }
922
769923 define <16 x float> @uitofp_16i8(<16 x i8>%a) {
770924 ; ALL-LABEL: uitofp_16i8:
771925 ; ALL: ## BB#0:
786940 ret <16 x float>%b
787941 }
788942
943 define <16 x float> @uitofp_16i1_float(<16 x i32> %a) {
944 ; ALL-LABEL: uitofp_16i1_float:
945 ; ALL: ## BB#0:
946 ; ALL-NEXT: vpxord %zmm1, %zmm1, %zmm1
947 ; ALL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
948 ; ALL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
949 ; ALL-NEXT: vcvtudq2ps %zmm0, %zmm0
950 ; ALL-NEXT: retq
951 %mask = icmp slt <16 x i32> %a, zeroinitializer
952 %1 = uitofp <16 x i1> %mask to <16 x float>
953 ret <16 x float> %1
954 }
955
956 define <16 x double> @uitofp_16i1_double(<16 x i32> %a) {
957 ; KNL-LABEL: uitofp_16i1_double:
958 ; KNL: ## BB#0:
959 ; KNL-NEXT: vpxord %zmm1, %zmm1, %zmm1
960 ; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
961 ; KNL-NEXT: movq {{.*}}(%rip), %rax
962 ; KNL-NEXT: vpbroadcastq %rax, %zmm0 {%k1} {z}
963 ; KNL-NEXT: vpmovqd %zmm0, %ymm0
964 ; KNL-NEXT: vcvtudq2pd %ymm0, %zmm0
965 ; KNL-NEXT: kshiftrw $8, %k1, %k1
966 ; KNL-NEXT: vpbroadcastq %rax, %zmm1 {%k1} {z}
967 ; KNL-NEXT: vpmovqd %zmm1, %ymm1
968 ; KNL-NEXT: vcvtudq2pd %ymm1, %zmm1
969 ; KNL-NEXT: retq
970 ;
971 ; SKX-LABEL: uitofp_16i1_double:
972 ; SKX: ## BB#0:
973 ; SKX-NEXT: vpxord %zmm1, %zmm1, %zmm1
974 ; SKX-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
975 ; SKX-NEXT: movl {{.*}}(%rip), %eax
976 ; SKX-NEXT: vpbroadcastd %eax, %ymm0 {%k1} {z}
977 ; SKX-NEXT: vcvtudq2pd %ymm0, %zmm0
978 ; SKX-NEXT: kshiftrw $8, %k1, %k1
979 ; SKX-NEXT: vpbroadcastd %eax, %ymm1 {%k1} {z}
980 ; SKX-NEXT: vcvtudq2pd %ymm1, %zmm1
981 ; SKX-NEXT: retq
982 %mask = icmp slt <16 x i32> %a, zeroinitializer
983 %1 = uitofp <16 x i1> %mask to <16 x double>
984 ret <16 x double> %1
985 }
986
987 define <8 x float> @uitofp_8i1_float(<8 x i32> %a) {
988 ; KNL-LABEL: uitofp_8i1_float:
989 ; KNL: ## BB#0:
990 ; KNL-NEXT: ## kill: %YMM0 %YMM0 %ZMM0
991 ; KNL-NEXT: vpxor %ymm1, %ymm1, %ymm1
992 ; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
993 ; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
994 ; KNL-NEXT: vpmovqd %zmm0, %ymm0
995 ; KNL-NEXT: vcvtudq2ps %zmm0, %zmm0
996 ; KNL-NEXT: ## kill: %YMM0 %YMM0 %ZMM0
997 ; KNL-NEXT: retq
998 ;
999 ; SKX-LABEL: uitofp_8i1_float:
1000 ; SKX: ## BB#0:
1001 ; SKX-NEXT: vpxord %ymm1, %ymm1, %ymm1
1002 ; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %k1
1003 ; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z}
1004 ; SKX-NEXT: vcvtudq2ps %ymm0, %ymm0
1005 ; SKX-NEXT: retq
1006 %mask = icmp slt <8 x i32> %a, zeroinitializer
1007 %1 = uitofp <8 x i1> %mask to <8 x float>
1008 ret <8 x float> %1
1009 }
1010
1011 define <8 x double> @uitofp_8i1_double(<8 x i32> %a) {
1012 ; KNL-LABEL: uitofp_8i1_double:
1013 ; KNL: ## BB#0:
1014 ; KNL-NEXT: ## kill: %YMM0 %YMM0 %ZMM0
1015 ; KNL-NEXT: vpxor %ymm1, %ymm1, %ymm1
1016 ; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
1017 ; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
1018 ; KNL-NEXT: vpmovqd %zmm0, %ymm0
1019 ; KNL-NEXT: vcvtudq2pd %ymm0, %zmm0
1020 ; KNL-NEXT: retq
1021 ;
1022 ; SKX-LABEL: uitofp_8i1_double:
1023 ; SKX: ## BB#0:
1024 ; SKX-NEXT: vpxord %ymm1, %ymm1, %ymm1
1025 ; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %k1
1026 ; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z}
1027 ; SKX-NEXT: vcvtudq2pd %ymm0, %zmm0
1028 ; SKX-NEXT: retq
1029 %mask = icmp slt <8 x i32> %a, zeroinitializer
1030 %1 = uitofp <8 x i1> %mask to <8 x double>
1031 ret <8 x double> %1
1032 }
1033
1034 define <4 x float> @uitofp_4i1_float(<4 x i32> %a) {
1035 ; KNL-LABEL: uitofp_4i1_float:
1036 ; KNL: ## BB#0:
1037 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
1038 ; KNL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
1039 ; KNL-NEXT: vpsrld $31, %xmm0, %xmm0
1040 ; KNL-NEXT: vcvtudq2ps %zmm0, %zmm0
1041 ; KNL-NEXT: ## kill: %XMM0 %XMM0 %ZMM0
1042 ; KNL-NEXT: retq
1043 ;
1044 ; SKX-LABEL: uitofp_4i1_float:
1045 ; SKX: ## BB#0:
1046 ; SKX-NEXT: vpxord %xmm1, %xmm1, %xmm1
1047 ; SKX-NEXT: vpcmpgtd %xmm0, %xmm1, %k1
1048 ; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
1049 ; SKX-NEXT: vcvtudq2ps %xmm0, %xmm0
1050 ; SKX-NEXT: retq
1051 %mask = icmp slt <4 x i32> %a, zeroinitializer
1052 %1 = uitofp <4 x i1> %mask to <4 x float>
1053 ret <4 x float> %1
1054 }
1055
1056 define <4 x double> @uitofp_4i1_double(<4 x i32> %a) {
1057 ; KNL-LABEL: uitofp_4i1_double:
1058 ; KNL: ## BB#0:
1059 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
1060 ; KNL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
1061 ; KNL-NEXT: vpsrld $31, %xmm0, %xmm0
1062 ; KNL-NEXT: vcvtudq2pd %ymm0, %zmm0
1063 ; KNL-NEXT: ## kill: %YMM0 %YMM0 %ZMM0
1064 ; KNL-NEXT: retq
1065 ;
1066 ; SKX-LABEL: uitofp_4i1_double:
1067 ; SKX: ## BB#0:
1068 ; SKX-NEXT: vpxord %xmm1, %xmm1, %xmm1
1069 ; SKX-NEXT: vpcmpgtd %xmm0, %xmm1, %k1
1070 ; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
1071 ; SKX-NEXT: vcvtudq2pd %xmm0, %ymm0
1072 ; SKX-NEXT: retq
1073 %mask = icmp slt <4 x i32> %a, zeroinitializer
1074 %1 = uitofp <4 x i1> %mask to <4 x double>
1075 ret <4 x double> %1
1076 }
1077
1078 define <2 x float> @uitofp_2i1_float(<2 x i32> %a) {
1079 ; KNL-LABEL: uitofp_2i1_float:
1080 ; KNL: ## BB#0:
1081 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
1082 ; KNL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
1083 ; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
1084 ; KNL-NEXT: vpxor %xmm1, %xmm0, %xmm0
1085 ; KNL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
1086 ; KNL-NEXT: vpextrq $1, %xmm0, %rax
1087 ; KNL-NEXT: andl $1, %eax
1088 ; KNL-NEXT: vcvtsi2ssl %eax, %xmm0, %xmm1
1089 ; KNL-NEXT: vmovq %xmm0, %rax
1090 ; KNL-NEXT: andl $1, %eax
1091 ; KNL-NEXT: vcvtsi2ssl %eax, %xmm0, %xmm0
1092 ; KNL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
1093 ; KNL-NEXT: retq
1094 ;
1095 ; SKX-LABEL: uitofp_2i1_float:
1096 ; SKX: ## BB#0:
1097 ; SKX-NEXT: vpxord %xmm1, %xmm1, %xmm1
1098 ; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
1099 ; SKX-NEXT: vpcmpltuq %xmm1, %xmm0, %k1
1100 ; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
1101 ; SKX-NEXT: vcvtudq2ps %xmm0, %xmm0
1102 ; SKX-NEXT: retq
1103 %mask = icmp ult <2 x i32> %a, zeroinitializer
1104 %1 = uitofp <2 x i1> %mask to <2 x float>
1105 ret <2 x float> %1
1106 }
1107
1108 define <2 x double> @uitofp_2i1_double(<2 x i32> %a) {
1109 ; KNL-LABEL: uitofp_2i1_double:
1110 ; KNL: ## BB#0:
1111 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
1112 ; KNL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
1113 ; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
1114 ; KNL-NEXT: vpxor %xmm1, %xmm0, %xmm0
1115 ; KNL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
1116 ; KNL-NEXT: vpsrlq $63, %xmm0, %xmm0
1117 ; KNL-NEXT: vpextrq $1, %xmm0, %rax
1118 ; KNL-NEXT: vcvtusi2sdq %rax, %xmm0, %xmm1
1119 ; KNL-NEXT: vmovq %xmm0, %rax
1120 ; KNL-NEXT: vcvtusi2sdq %rax, %xmm0, %xmm0
1121 ; KNL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1122 ; KNL-NEXT: retq
1123 ;
1124 ; SKX-LABEL: uitofp_2i1_double:
1125 ; SKX: ## BB#0:
1126 ; SKX-NEXT: vpxord %xmm1, %xmm1, %xmm1
1127 ; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
1128 ; SKX-NEXT: vpcmpltuq %xmm1, %xmm0, %k1
1129 ; SKX-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z}
1130 ; SKX-NEXT: vcvtuqq2pd %xmm0, %xmm0
1131 ; SKX-NEXT: retq
1132 %mask = icmp ult <2 x i32> %a, zeroinitializer
1133 %1 = uitofp <2 x i1> %mask to <2 x double>
1134 ret <2 x double> %1
1135 }