llvm.org GIT mirror llvm / 5e6ffb9
[X86] Legalize zero extends from vXi1 to vXi16/vXi32/vXi64 using a sign extend and a shift. This avoids a constant pool load to create 1. The int->float are showing converts to mask and back. We probably need to widen inputs to sint_to_fp/uint_to_fp before type legalization. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@324805 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 2 years ago
7 changed file(s) with 341 addition(s) and 199 deletion(s). Raw diff Collapse all Expand all
1647116471 SDLoc DL(Op);
1647216472 unsigned NumElts = VT.getVectorNumElements();
1647316473
16474 // Extend VT if the scalar type is v8/v16 and BWI is not supported.
16474 // For all vectors, but vXi8 we can just emit a sign_extend a shift. This
16475 // avoids a constant pool load.
16476 if (VT.getVectorElementType() != MVT::i8) {
16477 SDValue Extend = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, In);
16478 return DAG.getNode(ISD::SRL, DL, VT, Extend,
16479 DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT));
16480 }
16481
16482 // Extend VT if BWI is not supported.
1647516483 MVT ExtVT = VT;
16476 if (!Subtarget.hasBWI() &&
16477 (VT.getVectorElementType().getSizeInBits() <= 16)) {
16484 if (!Subtarget.hasBWI()) {
1647816485 // If v16i32 is to be avoided, we'll need to split and concatenate.
1647916486 if (NumElts == 16 && !Subtarget.canExtendTo512DQ())
1648016487 return SplitAndExtendv16i1(ISD::ZERO_EXTEND, VT, In, DL, DAG);
1649816505
1649916506 SDValue SelectedVal = DAG.getSelect(DL, WideVT, In, One, Zero);
1650016507
16501 // Truncate if we had to extend i16/i8 above.
16508 // Truncate if we had to extend above.
1650216509 if (VT != ExtVT) {
16503 WideVT = MVT::getVectorVT(VT.getVectorElementType(), NumElts);
16510 WideVT = MVT::getVectorVT(MVT::i8, NumElts);
1650416511 SelectedVal = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SelectedVal);
1650516512 }
1650616513
18911891 ; NODQ: # %bb.0:
18921892 ; NODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
18931893 ; NODQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
1894 ; NODQ-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
1894 ; NODQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1895 ; NODQ-NEXT: vpsrld $31, %zmm0, %zmm0
18951896 ; NODQ-NEXT: vcvtdq2ps %zmm0, %zmm0
18961897 ; NODQ-NEXT: retq
18971898 ;
18981899 ; DQ-LABEL: ubto16f32:
18991900 ; DQ: # %bb.0:
1900 ; DQ-NEXT: vpmovd2m %zmm0, %k1
1901 ; DQ-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
1901 ; DQ-NEXT: vpmovd2m %zmm0, %k0
1902 ; DQ-NEXT: vpmovm2d %k0, %zmm0
1903 ; DQ-NEXT: vpsrld $31, %zmm0, %zmm0
19021904 ; DQ-NEXT: vcvtdq2ps %zmm0, %zmm0
19031905 ; DQ-NEXT: retq
19041906 %mask = icmp slt <16 x i32> %a, zeroinitializer
19111913 ; NOVLDQ: # %bb.0:
19121914 ; NOVLDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
19131915 ; NOVLDQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
1914 ; NOVLDQ-NEXT: movl {{.*}}(%rip), %eax
1915 ; NOVLDQ-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z}
1916 ; NOVLDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1917 ; NOVLDQ-NEXT: vpsrld $31, %ymm0, %ymm0
19161918 ; NOVLDQ-NEXT: vcvtdq2pd %ymm0, %zmm0
19171919 ; NOVLDQ-NEXT: kshiftrw $8, %k1, %k1
1918 ; NOVLDQ-NEXT: vpbroadcastd %eax, %zmm1 {%k1} {z}
1920 ; NOVLDQ-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
1921 ; NOVLDQ-NEXT: vpsrld $31, %ymm1, %ymm1
19191922 ; NOVLDQ-NEXT: vcvtdq2pd %ymm1, %zmm1
19201923 ; NOVLDQ-NEXT: retq
19211924 ;
19221925 ; VLDQ-LABEL: ubto16f64:
19231926 ; VLDQ: # %bb.0:
1924 ; VLDQ-NEXT: vpmovd2m %zmm0, %k1
1925 ; VLDQ-NEXT: movl {{.*}}(%rip), %eax
1926 ; VLDQ-NEXT: vpbroadcastd %eax, %ymm0 {%k1} {z}
1927 ; VLDQ-NEXT: vpmovd2m %zmm0, %k0
1928 ; VLDQ-NEXT: vpmovm2d %k0, %ymm0
1929 ; VLDQ-NEXT: vpsrld $31, %ymm0, %ymm0
19271930 ; VLDQ-NEXT: vcvtdq2pd %ymm0, %zmm0
1928 ; VLDQ-NEXT: kshiftrw $8, %k1, %k1
1929 ; VLDQ-NEXT: vpbroadcastd %eax, %ymm1 {%k1} {z}
1931 ; VLDQ-NEXT: kshiftrw $8, %k0, %k0
1932 ; VLDQ-NEXT: vpmovm2d %k0, %ymm1
1933 ; VLDQ-NEXT: vpsrld $31, %ymm1, %ymm1
19301934 ; VLDQ-NEXT: vcvtdq2pd %ymm1, %zmm1
19311935 ; VLDQ-NEXT: retq
19321936 ;
19341938 ; VLNODQ: # %bb.0:
19351939 ; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
19361940 ; VLNODQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
1937 ; VLNODQ-NEXT: movl {{.*}}(%rip), %eax
1938 ; VLNODQ-NEXT: vpbroadcastd %eax, %ymm0 {%k1} {z}
1941 ; VLNODQ-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
1942 ; VLNODQ-NEXT: vmovdqa32 %ymm1, %ymm0 {%k1} {z}
1943 ; VLNODQ-NEXT: vpsrld $31, %ymm0, %ymm0
19391944 ; VLNODQ-NEXT: vcvtdq2pd %ymm0, %zmm0
19401945 ; VLNODQ-NEXT: kshiftrw $8, %k1, %k1
1941 ; VLNODQ-NEXT: vpbroadcastd %eax, %ymm1 {%k1} {z}
1946 ; VLNODQ-NEXT: vmovdqa32 %ymm1, %ymm1 {%k1} {z}
1947 ; VLNODQ-NEXT: vpsrld $31, %ymm1, %ymm1
19421948 ; VLNODQ-NEXT: vcvtdq2pd %ymm1, %zmm1
19431949 ; VLNODQ-NEXT: retq
19441950 ;
19451951 ; AVX512DQ-LABEL: ubto16f64:
19461952 ; AVX512DQ: # %bb.0:
1947 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k1
1948 ; AVX512DQ-NEXT: movl {{.*}}(%rip), %eax
1949 ; AVX512DQ-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z}
1953 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
1954 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
1955 ; AVX512DQ-NEXT: vpsrld $31, %ymm0, %ymm0
19501956 ; AVX512DQ-NEXT: vcvtdq2pd %ymm0, %zmm0
1951 ; AVX512DQ-NEXT: kshiftrw $8, %k1, %k1
1952 ; AVX512DQ-NEXT: vpbroadcastd %eax, %zmm1 {%k1} {z}
1957 ; AVX512DQ-NEXT: kshiftrw $8, %k0, %k0
1958 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm1
1959 ; AVX512DQ-NEXT: vpsrld $31, %ymm1, %ymm1
19531960 ; AVX512DQ-NEXT: vcvtdq2pd %ymm1, %zmm1
19541961 ; AVX512DQ-NEXT: retq
19551962 %mask = icmp slt <16 x i32> %a, zeroinitializer
19631970 ; NOVLDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
19641971 ; NOVLDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
19651972 ; NOVLDQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
1966 ; NOVLDQ-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
1973 ; NOVLDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1974 ; NOVLDQ-NEXT: vpsrld $31, %ymm0, %ymm0
19671975 ; NOVLDQ-NEXT: vcvtdq2ps %ymm0, %ymm0
19681976 ; NOVLDQ-NEXT: retq
19691977 ;
19701978 ; VLDQ-LABEL: ubto8f32:
19711979 ; VLDQ: # %bb.0:
1972 ; VLDQ-NEXT: vpmovd2m %ymm0, %k1
1973 ; VLDQ-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z}
1980 ; VLDQ-NEXT: vpmovd2m %ymm0, %k0
1981 ; VLDQ-NEXT: vpmovm2d %k0, %ymm0
1982 ; VLDQ-NEXT: vpsrld $31, %ymm0, %ymm0
19741983 ; VLDQ-NEXT: vcvtdq2ps %ymm0, %ymm0
19751984 ; VLDQ-NEXT: retq
19761985 ;
19781987 ; VLNODQ: # %bb.0:
19791988 ; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
19801989 ; VLNODQ-NEXT: vpcmpgtd %ymm0, %ymm1, %k1
1981 ; VLNODQ-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z}
1990 ; VLNODQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
1991 ; VLNODQ-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
1992 ; VLNODQ-NEXT: vpsrld $31, %ymm0, %ymm0
19821993 ; VLNODQ-NEXT: vcvtdq2ps %ymm0, %ymm0
19831994 ; VLNODQ-NEXT: retq
19841995 ;
19851996 ; AVX512DQ-LABEL: ubto8f32:
19861997 ; AVX512DQ: # %bb.0:
19871998 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1988 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k1
1989 ; AVX512DQ-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
1999 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
2000 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
2001 ; AVX512DQ-NEXT: vpsrld $31, %ymm0, %ymm0
19902002 ; AVX512DQ-NEXT: vcvtdq2ps %ymm0, %ymm0
19912003 ; AVX512DQ-NEXT: retq
19922004 %mask = icmp slt <8 x i32> %a, zeroinitializer
20002012 ; NOVLDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20012013 ; NOVLDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
20022014 ; NOVLDQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
2003 ; NOVLDQ-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
2015 ; NOVLDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2016 ; NOVLDQ-NEXT: vpsrld $31, %ymm0, %ymm0
20042017 ; NOVLDQ-NEXT: vcvtdq2pd %ymm0, %zmm0
20052018 ; NOVLDQ-NEXT: retq
20062019 ;
20072020 ; VLDQ-LABEL: ubto8f64:
20082021 ; VLDQ: # %bb.0:
2009 ; VLDQ-NEXT: vpmovd2m %ymm0, %k1
2010 ; VLDQ-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z}
2022 ; VLDQ-NEXT: vpmovd2m %ymm0, %k0
2023 ; VLDQ-NEXT: vpmovm2d %k0, %ymm0
2024 ; VLDQ-NEXT: vpsrld $31, %ymm0, %ymm0
20112025 ; VLDQ-NEXT: vcvtdq2pd %ymm0, %zmm0
20122026 ; VLDQ-NEXT: retq
20132027 ;
20152029 ; VLNODQ: # %bb.0:
20162030 ; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
20172031 ; VLNODQ-NEXT: vpcmpgtd %ymm0, %ymm1, %k1
2018 ; VLNODQ-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z}
2032 ; VLNODQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
2033 ; VLNODQ-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
2034 ; VLNODQ-NEXT: vpsrld $31, %ymm0, %ymm0
20192035 ; VLNODQ-NEXT: vcvtdq2pd %ymm0, %zmm0
20202036 ; VLNODQ-NEXT: retq
20212037 ;
20222038 ; AVX512DQ-LABEL: ubto8f64:
20232039 ; AVX512DQ: # %bb.0:
20242040 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2025 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k1
2026 ; AVX512DQ-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
2041 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
2042 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
2043 ; AVX512DQ-NEXT: vpsrld $31, %ymm0, %ymm0
20272044 ; AVX512DQ-NEXT: vcvtdq2pd %ymm0, %zmm0
20282045 ; AVX512DQ-NEXT: retq
20292046 %mask = icmp slt <8 x i32> %a, zeroinitializer
20372054 ; NOVLDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
20382055 ; NOVLDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
20392056 ; NOVLDQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
2040 ; NOVLDQ-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
2057 ; NOVLDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2058 ; NOVLDQ-NEXT: vpsrld $31, %xmm0, %xmm0
20412059 ; NOVLDQ-NEXT: vcvtdq2ps %xmm0, %xmm0
20422060 ; NOVLDQ-NEXT: vzeroupper
20432061 ; NOVLDQ-NEXT: retq
20442062 ;
20452063 ; VLDQ-LABEL: ubto4f32:
20462064 ; VLDQ: # %bb.0:
2047 ; VLDQ-NEXT: vpmovd2m %xmm0, %k1
2048 ; VLDQ-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
2065 ; VLDQ-NEXT: vpmovd2m %xmm0, %k0
2066 ; VLDQ-NEXT: vpmovm2d %k0, %xmm0
2067 ; VLDQ-NEXT: vpsrld $31, %xmm0, %xmm0
20492068 ; VLDQ-NEXT: vcvtdq2ps %xmm0, %xmm0
20502069 ; VLDQ-NEXT: retq
20512070 ;
20532072 ; VLNODQ: # %bb.0:
20542073 ; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
20552074 ; VLNODQ-NEXT: vpcmpgtd %xmm0, %xmm1, %k1
2056 ; VLNODQ-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
2075 ; VLNODQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
2076 ; VLNODQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
2077 ; VLNODQ-NEXT: vpsrld $31, %xmm0, %xmm0
20572078 ; VLNODQ-NEXT: vcvtdq2ps %xmm0, %xmm0
20582079 ; VLNODQ-NEXT: retq
20592080 ;
20602081 ; AVX512DQ-LABEL: ubto4f32:
20612082 ; AVX512DQ: # %bb.0:
20622083 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2063 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k1
2064 ; AVX512DQ-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
2084 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
2085 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
2086 ; AVX512DQ-NEXT: vpsrld $31, %xmm0, %xmm0
20652087 ; AVX512DQ-NEXT: vcvtdq2ps %xmm0, %xmm0
20662088 ; AVX512DQ-NEXT: vzeroupper
20672089 ; AVX512DQ-NEXT: retq
20762098 ; NOVLDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
20772099 ; NOVLDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
20782100 ; NOVLDQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
2079 ; NOVLDQ-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
2101 ; NOVLDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2102 ; NOVLDQ-NEXT: vpsrld $31, %xmm0, %xmm0
20802103 ; NOVLDQ-NEXT: vcvtdq2pd %xmm0, %ymm0
20812104 ; NOVLDQ-NEXT: retq
20822105 ;
20832106 ; VLDQ-LABEL: ubto4f64:
20842107 ; VLDQ: # %bb.0:
2085 ; VLDQ-NEXT: vpmovd2m %xmm0, %k1
2086 ; VLDQ-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
2108 ; VLDQ-NEXT: vpmovd2m %xmm0, %k0
2109 ; VLDQ-NEXT: vpmovm2d %k0, %xmm0
2110 ; VLDQ-NEXT: vpsrld $31, %xmm0, %xmm0
20872111 ; VLDQ-NEXT: vcvtdq2pd %xmm0, %ymm0
20882112 ; VLDQ-NEXT: retq
20892113 ;
20912115 ; VLNODQ: # %bb.0:
20922116 ; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
20932117 ; VLNODQ-NEXT: vpcmpgtd %xmm0, %xmm1, %k1
2094 ; VLNODQ-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
2118 ; VLNODQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
2119 ; VLNODQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
2120 ; VLNODQ-NEXT: vpsrld $31, %xmm0, %xmm0
20952121 ; VLNODQ-NEXT: vcvtdq2pd %xmm0, %ymm0
20962122 ; VLNODQ-NEXT: retq
20972123 ;
20982124 ; AVX512DQ-LABEL: ubto4f64:
20992125 ; AVX512DQ: # %bb.0:
21002126 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2101 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k1
2102 ; AVX512DQ-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
2127 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
2128 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
2129 ; AVX512DQ-NEXT: vpsrld $31, %xmm0, %xmm0
21032130 ; AVX512DQ-NEXT: vcvtdq2pd %xmm0, %ymm0
21042131 ; AVX512DQ-NEXT: retq
21052132 %mask = icmp slt <4 x i32> %a, zeroinitializer
21082135 }
21092136
21102137 define <2 x float> @ubto2f32(<2 x i32> %a) {
2111 ; NOVL-LABEL: ubto2f32:
2112 ; NOVL: # %bb.0:
2113 ; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
2114 ; NOVL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
2115 ; NOVL-NEXT: vptestmq %zmm0, %zmm0, %k1
2116 ; NOVL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
2117 ; NOVL-NEXT: vcvtdq2ps %xmm0, %xmm0
2118 ; NOVL-NEXT: vzeroupper
2119 ; NOVL-NEXT: retq
2120 ;
2121 ; VL-LABEL: ubto2f32:
2122 ; VL: # %bb.0:
2123 ; VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
2124 ; VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
2125 ; VL-NEXT: vptestmq %xmm0, %xmm0, %k1
2126 ; VL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
2127 ; VL-NEXT: vcvtdq2ps %xmm0, %xmm0
2128 ; VL-NEXT: retq
2138 ; NOVLDQ-LABEL: ubto2f32:
2139 ; NOVLDQ: # %bb.0:
2140 ; NOVLDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
2141 ; NOVLDQ-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
2142 ; NOVLDQ-NEXT: vptestmq %zmm0, %zmm0, %k1
2143 ; NOVLDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2144 ; NOVLDQ-NEXT: vpsrld $31, %xmm0, %xmm0
2145 ; NOVLDQ-NEXT: vcvtdq2ps %xmm0, %xmm0
2146 ; NOVLDQ-NEXT: vzeroupper
2147 ; NOVLDQ-NEXT: retq
2148 ;
2149 ; VLDQ-LABEL: ubto2f32:
2150 ; VLDQ: # %bb.0:
2151 ; VLDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
2152 ; VLDQ-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
2153 ; VLDQ-NEXT: vptestmq %xmm0, %xmm0, %k0
2154 ; VLDQ-NEXT: vpmovm2d %k0, %xmm0
2155 ; VLDQ-NEXT: vpsrld $31, %xmm0, %xmm0
2156 ; VLDQ-NEXT: vcvtdq2ps %xmm0, %xmm0
2157 ; VLDQ-NEXT: retq
2158 ;
2159 ; VLNODQ-LABEL: ubto2f32:
2160 ; VLNODQ: # %bb.0:
2161 ; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
2162 ; VLNODQ-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
2163 ; VLNODQ-NEXT: vptestmq %xmm0, %xmm0, %k1
2164 ; VLNODQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
2165 ; VLNODQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
2166 ; VLNODQ-NEXT: vpsrld $31, %xmm0, %xmm0
2167 ; VLNODQ-NEXT: vcvtdq2ps %xmm0, %xmm0
2168 ; VLNODQ-NEXT: retq
2169 ;
2170 ; AVX512DQ-LABEL: ubto2f32:
2171 ; AVX512DQ: # %bb.0:
2172 ; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
2173 ; AVX512DQ-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
2174 ; AVX512DQ-NEXT: vptestmq %zmm0, %zmm0, %k0
2175 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
2176 ; AVX512DQ-NEXT: vpsrld $31, %xmm0, %xmm0
2177 ; AVX512DQ-NEXT: vcvtdq2ps %xmm0, %xmm0
2178 ; AVX512DQ-NEXT: vzeroupper
2179 ; AVX512DQ-NEXT: retq
21292180 %mask = icmp ne <2 x i32> %a, zeroinitializer
21302181 %1 = uitofp <2 x i1> %mask to <2 x float>
21312182 ret <2 x float> %1
21322183 }
21332184
21342185 define <2 x double> @ubto2f64(<2 x i32> %a) {
2135 ; NOVL-LABEL: ubto2f64:
2136 ; NOVL: # %bb.0:
2137 ; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
2138 ; NOVL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
2139 ; NOVL-NEXT: vptestmq %zmm0, %zmm0, %k1
2140 ; NOVL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
2141 ; NOVL-NEXT: vcvtudq2pd %ymm0, %zmm0
2142 ; NOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
2143 ; NOVL-NEXT: vzeroupper
2144 ; NOVL-NEXT: retq
2145 ;
2146 ; VL-LABEL: ubto2f64:
2147 ; VL: # %bb.0:
2148 ; VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
2149 ; VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
2150 ; VL-NEXT: vptestmq %xmm0, %xmm0, %k1
2151 ; VL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
2152 ; VL-NEXT: vcvtudq2pd %xmm0, %xmm0
2153 ; VL-NEXT: retq
2186 ; NOVLDQ-LABEL: ubto2f64:
2187 ; NOVLDQ: # %bb.0:
2188 ; NOVLDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
2189 ; NOVLDQ-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
2190 ; NOVLDQ-NEXT: vptestmq %zmm0, %zmm0, %k1
2191 ; NOVLDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2192 ; NOVLDQ-NEXT: vpsrld $31, %xmm0, %xmm0
2193 ; NOVLDQ-NEXT: vcvtudq2pd %ymm0, %zmm0
2194 ; NOVLDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
2195 ; NOVLDQ-NEXT: vzeroupper
2196 ; NOVLDQ-NEXT: retq
2197 ;
2198 ; VLDQ-LABEL: ubto2f64:
2199 ; VLDQ: # %bb.0:
2200 ; VLDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
2201 ; VLDQ-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
2202 ; VLDQ-NEXT: vptestmq %xmm0, %xmm0, %k0
2203 ; VLDQ-NEXT: vpmovm2d %k0, %xmm0
2204 ; VLDQ-NEXT: vpsrld $31, %xmm0, %xmm0
2205 ; VLDQ-NEXT: vcvtudq2pd %xmm0, %xmm0
2206 ; VLDQ-NEXT: retq
2207 ;
2208 ; VLNODQ-LABEL: ubto2f64:
2209 ; VLNODQ: # %bb.0:
2210 ; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
2211 ; VLNODQ-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
2212 ; VLNODQ-NEXT: vptestmq %xmm0, %xmm0, %k1
2213 ; VLNODQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
2214 ; VLNODQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
2215 ; VLNODQ-NEXT: vpsrld $31, %xmm0, %xmm0
2216 ; VLNODQ-NEXT: vcvtudq2pd %xmm0, %xmm0
2217 ; VLNODQ-NEXT: retq
2218 ;
2219 ; AVX512DQ-LABEL: ubto2f64:
2220 ; AVX512DQ: # %bb.0:
2221 ; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
2222 ; AVX512DQ-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
2223 ; AVX512DQ-NEXT: vptestmq %zmm0, %zmm0, %k0
2224 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
2225 ; AVX512DQ-NEXT: vpsrld $31, %xmm0, %xmm0
2226 ; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0
2227 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
2228 ; AVX512DQ-NEXT: vzeroupper
2229 ; AVX512DQ-NEXT: retq
21542230 %mask = icmp ne <2 x i32> %a, zeroinitializer
21552231 %1 = uitofp <2 x i1> %mask to <2 x double>
21562232 ret <2 x double> %1
12931293 ; KNL-LABEL: zext_16i1_to_16xi32:
12941294 ; KNL: # %bb.0:
12951295 ; KNL-NEXT: kmovw %edi, %k1
1296 ; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
1296 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1297 ; KNL-NEXT: vpsrld $31, %zmm0, %zmm0
12971298 ; KNL-NEXT: retq
12981299 ;
12991300 ; SKX-LABEL: zext_16i1_to_16xi32:
13001301 ; SKX: # %bb.0:
1301 ; SKX-NEXT: kmovd %edi, %k1
1302 ; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
1302 ; SKX-NEXT: kmovd %edi, %k0
1303 ; SKX-NEXT: vpmovm2d %k0, %zmm0
1304 ; SKX-NEXT: vpsrld $31, %zmm0, %zmm0
13031305 ; SKX-NEXT: retq
13041306 %a = bitcast i16 %b to <16 x i1>
13051307 %c = zext <16 x i1> %a to <16 x i32>
13101312 ; KNL-LABEL: zext_8i1_to_8xi64:
13111313 ; KNL: # %bb.0:
13121314 ; KNL-NEXT: kmovw %edi, %k1
1313 ; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
1315 ; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1316 ; KNL-NEXT: vpsrlq $63, %zmm0, %zmm0
13141317 ; KNL-NEXT: retq
13151318 ;
13161319 ; SKX-LABEL: zext_8i1_to_8xi64:
13171320 ; SKX: # %bb.0:
1318 ; SKX-NEXT: kmovd %edi, %k1
1319 ; SKX-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
1321 ; SKX-NEXT: kmovd %edi, %k0
1322 ; SKX-NEXT: vpmovm2q %k0, %zmm0
1323 ; SKX-NEXT: vpsrlq $63, %zmm0, %zmm0
13201324 ; SKX-NEXT: retq
13211325 %a = bitcast i8 %b to <8 x i1>
13221326 %c = zext <8 x i1> %a to <8 x i64>
16841688 ;
16851689 ; SKX-LABEL: zext_32xi1_to_32xi16:
16861690 ; SKX: # %bb.0:
1687 ; SKX-NEXT: vpcmpeqw %zmm1, %zmm0, %k1
1688 ; SKX-NEXT: vmovdqu16 {{.*}}(%rip), %zmm0 {%k1} {z}
1691 ; SKX-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
1692 ; SKX-NEXT: vpmovm2w %k0, %zmm0
1693 ; SKX-NEXT: vpsrlw $15, %zmm0, %zmm0
16891694 ; SKX-NEXT: retq
16901695 %mask = icmp eq <32 x i16> %x, %y
16911696 %1 = zext <32 x i1> %mask to <32 x i16>
27862786 define <16 x float> @ubto16f32(<16 x i32> %a) {
27872787 ; GENERIC-LABEL: ubto16f32:
27882788 ; GENERIC: # %bb.0:
2789 ; GENERIC-NEXT: vpmovd2m %zmm0, %k1 # sched: [1:0.33]
2790 ; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [5:1.00]
2789 ; GENERIC-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:0.33]
2790 ; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33]
2791 ; GENERIC-NEXT: vpsrld $31, %zmm0, %zmm0 # sched: [3:1.00]
27912792 ; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:1.00]
27922793 ; GENERIC-NEXT: retq # sched: [1:1.00]
27932794 ;
27942795 ; SKX-LABEL: ubto16f32:
27952796 ; SKX: # %bb.0:
2796 ; SKX-NEXT: vpmovd2m %zmm0, %k1 # sched: [1:1.00]
2797 ; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [8:0.50]
2797 ; SKX-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:1.00]
2798 ; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25]
2799 ; SKX-NEXT: vpsrld $31, %zmm0, %zmm0 # sched: [1:0.50]
27982800 ; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33]
27992801 ; SKX-NEXT: retq # sched: [7:1.00]
28002802 %mask = icmp slt <16 x i32> %a, zeroinitializer
28052807 define <16 x double> @ubto16f64(<16 x i32> %a) {
28062808 ; GENERIC-LABEL: ubto16f64:
28072809 ; GENERIC: # %bb.0:
2808 ; GENERIC-NEXT: vpmovd2m %zmm0, %k1 # sched: [1:0.33]
2809 ; GENERIC-NEXT: movl {{.*}}(%rip), %eax # sched: [5:0.50]
2810 ; GENERIC-NEXT: vpbroadcastd %eax, %ymm0 {%k1} {z} # sched: [1:1.00]
2810 ; GENERIC-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:0.33]
2811 ; GENERIC-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.33]
2812 ; GENERIC-NEXT: vpsrld $31, %ymm0, %ymm0 # sched: [1:1.00]
28112813 ; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
2812 ; GENERIC-NEXT: kshiftrw $8, %k1, %k1 # sched: [1:1.00]
2813 ; GENERIC-NEXT: vpbroadcastd %eax, %ymm1 {%k1} {z} # sched: [1:1.00]
2814 ; GENERIC-NEXT: kshiftrw $8, %k0, %k0 # sched: [1:1.00]
2815 ; GENERIC-NEXT: vpmovm2d %k0, %ymm1 # sched: [1:0.33]
2816 ; GENERIC-NEXT: vpsrld $31, %ymm1, %ymm1 # sched: [1:1.00]
28142817 ; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00]
28152818 ; GENERIC-NEXT: retq # sched: [1:1.00]
28162819 ;
28172820 ; SKX-LABEL: ubto16f64:
28182821 ; SKX: # %bb.0:
2819 ; SKX-NEXT: vpmovd2m %zmm0, %k1 # sched: [1:1.00]
2820 ; SKX-NEXT: movl {{.*}}(%rip), %eax # sched: [5:0.50]
2821 ; SKX-NEXT: vpbroadcastd %eax, %ymm0 {%k1} {z} # sched: [3:1.00]
2822 ; SKX-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:1.00]
2823 ; SKX-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.25]
2824 ; SKX-NEXT: vpsrld $31, %ymm0, %ymm0 # sched: [1:0.50]
28222825 ; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
2823 ; SKX-NEXT: kshiftrw $8, %k1, %k1 # sched: [3:1.00]
2824 ; SKX-NEXT: vpbroadcastd %eax, %ymm1 {%k1} {z} # sched: [3:1.00]
2826 ; SKX-NEXT: kshiftrw $8, %k0, %k0 # sched: [3:1.00]
2827 ; SKX-NEXT: vpmovm2d %k0, %ymm1 # sched: [1:0.25]
2828 ; SKX-NEXT: vpsrld $31, %ymm1, %ymm1 # sched: [1:0.50]
28252829 ; SKX-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00]
28262830 ; SKX-NEXT: retq # sched: [7:1.00]
28272831 %mask = icmp slt <16 x i32> %a, zeroinitializer
28322836 define <8 x float> @ubto8f32(<8 x i32> %a) {
28332837 ; GENERIC-LABEL: ubto8f32:
28342838 ; GENERIC: # %bb.0:
2835 ; GENERIC-NEXT: vpmovd2m %ymm0, %k1 # sched: [1:0.33]
2836 ; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [5:1.00]
2839 ; GENERIC-NEXT: vpmovd2m %ymm0, %k0 # sched: [1:0.33]
2840 ; GENERIC-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.33]
2841 ; GENERIC-NEXT: vpsrld $31, %ymm0, %ymm0 # sched: [1:1.00]
28372842 ; GENERIC-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00]
28382843 ; GENERIC-NEXT: retq # sched: [1:1.00]
28392844 ;
28402845 ; SKX-LABEL: ubto8f32:
28412846 ; SKX: # %bb.0:
2842 ; SKX-NEXT: vpmovd2m %ymm0, %k1 # sched: [1:1.00]
2843 ; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [8:0.50]
2847 ; SKX-NEXT: vpmovd2m %ymm0, %k0 # sched: [1:1.00]
2848 ; SKX-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.25]
2849 ; SKX-NEXT: vpsrld $31, %ymm0, %ymm0 # sched: [1:0.50]
28442850 ; SKX-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:0.33]
28452851 ; SKX-NEXT: retq # sched: [7:1.00]
28462852 %mask = icmp slt <8 x i32> %a, zeroinitializer
28512857 define <8 x double> @ubto8f64(<8 x i32> %a) {
28522858 ; GENERIC-LABEL: ubto8f64:
28532859 ; GENERIC: # %bb.0:
2854 ; GENERIC-NEXT: vpmovd2m %ymm0, %k1 # sched: [1:0.33]
2855 ; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [5:1.00]
2860 ; GENERIC-NEXT: vpmovd2m %ymm0, %k0 # sched: [1:0.33]
2861 ; GENERIC-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.33]
2862 ; GENERIC-NEXT: vpsrld $31, %ymm0, %ymm0 # sched: [1:1.00]
28562863 ; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
28572864 ; GENERIC-NEXT: retq # sched: [1:1.00]
28582865 ;
28592866 ; SKX-LABEL: ubto8f64:
28602867 ; SKX: # %bb.0:
2861 ; SKX-NEXT: vpmovd2m %ymm0, %k1 # sched: [1:1.00]
2862 ; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [8:0.50]
2868 ; SKX-NEXT: vpmovd2m %ymm0, %k0 # sched: [1:1.00]
2869 ; SKX-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.25]
2870 ; SKX-NEXT: vpsrld $31, %ymm0, %ymm0 # sched: [1:0.50]
28632871 ; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
28642872 ; SKX-NEXT: retq # sched: [7:1.00]
28652873 %mask = icmp slt <8 x i32> %a, zeroinitializer
28702878 define <4 x float> @ubto4f32(<4 x i32> %a) {
28712879 ; GENERIC-LABEL: ubto4f32:
28722880 ; GENERIC: # %bb.0:
2873 ; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33]
2874 ; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [5:1.00]
2881 ; GENERIC-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:0.33]
2882 ; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33]
2883 ; GENERIC-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:1.00]
28752884 ; GENERIC-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
28762885 ; GENERIC-NEXT: retq # sched: [1:1.00]
28772886 ;
28782887 ; SKX-LABEL: ubto4f32:
28792888 ; SKX: # %bb.0:
2880 ; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00]
2881 ; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [7:0.50]
2889 ; SKX-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:1.00]
2890 ; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25]
2891 ; SKX-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:0.50]
28822892 ; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.33]
28832893 ; SKX-NEXT: retq # sched: [7:1.00]
28842894 %mask = icmp slt <4 x i32> %a, zeroinitializer
28892899 define <4 x double> @ubto4f64(<4 x i32> %a) {
28902900 ; GENERIC-LABEL: ubto4f64:
28912901 ; GENERIC: # %bb.0:
2892 ; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33]
2893 ; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [5:1.00]
2902 ; GENERIC-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:0.33]
2903 ; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33]
2904 ; GENERIC-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:1.00]
28942905 ; GENERIC-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [4:1.00]
28952906 ; GENERIC-NEXT: retq # sched: [1:1.00]
28962907 ;
28972908 ; SKX-LABEL: ubto4f64:
28982909 ; SKX: # %bb.0:
2899 ; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00]
2900 ; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [7:0.50]
2910 ; SKX-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:1.00]
2911 ; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25]
2912 ; SKX-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:0.50]
29012913 ; SKX-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [7:1.00]
29022914 ; SKX-NEXT: retq # sched: [7:1.00]
29032915 %mask = icmp slt <4 x i32> %a, zeroinitializer
29102922 ; GENERIC: # %bb.0:
29112923 ; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
29122924 ; GENERIC-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.50]
2913 ; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [1:1.00]
2914 ; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [5:1.00]
2925 ; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k0 # sched: [1:1.00]
2926 ; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33]
2927 ; GENERIC-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:1.00]
29152928 ; GENERIC-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
29162929 ; GENERIC-NEXT: retq # sched: [1:1.00]
29172930 ;
29192932 ; SKX: # %bb.0:
29202933 ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
29212934 ; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.33]
2922 ; SKX-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [3:1.00]
2923 ; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [7:0.50]
2935 ; SKX-NEXT: vptestmq %xmm0, %xmm0, %k0 # sched: [3:1.00]
2936 ; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25]
2937 ; SKX-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:0.50]
29242938 ; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.33]
29252939 ; SKX-NEXT: retq # sched: [7:1.00]
29262940 %mask = icmp ne <2 x i32> %a, zeroinitializer
29332947 ; GENERIC: # %bb.0:
29342948 ; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
29352949 ; GENERIC-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.50]
2936 ; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [1:1.00]
2937 ; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [5:1.00]
2950 ; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k0 # sched: [1:1.00]
2951 ; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33]
2952 ; GENERIC-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:1.00]
29382953 ; GENERIC-NEXT: vcvtudq2pd %xmm0, %xmm0 # sched: [4:1.00]
29392954 ; GENERIC-NEXT: retq # sched: [1:1.00]
29402955 ;
29422957 ; SKX: # %bb.0:
29432958 ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
29442959 ; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.33]
2945 ; SKX-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [3:1.00]
2946 ; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [7:0.50]
2960 ; SKX-NEXT: vptestmq %xmm0, %xmm0, %k0 # sched: [3:1.00]
2961 ; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25]
2962 ; SKX-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:0.50]
29472963 ; SKX-NEXT: vcvtudq2pd %xmm0, %xmm0 # sched: [5:1.00]
29482964 ; SKX-NEXT: retq # sched: [7:1.00]
29492965 %mask = icmp ne <2 x i32> %a, zeroinitializer
42434259 define <16 x i32> @zext_16i1_to_16xi32(i16 %b) {
42444260 ; GENERIC-LABEL: zext_16i1_to_16xi32:
42454261 ; GENERIC: # %bb.0:
4246 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
4247 ; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [5:1.00]
4262 ; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
4263 ; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33]
4264 ; GENERIC-NEXT: vpsrld $31, %zmm0, %zmm0 # sched: [3:1.00]
42484265 ; GENERIC-NEXT: retq # sched: [1:1.00]
42494266 ;
42504267 ; SKX-LABEL: zext_16i1_to_16xi32:
42514268 ; SKX: # %bb.0:
4252 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
4253 ; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [8:0.50]
4269 ; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00]
4270 ; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25]
4271 ; SKX-NEXT: vpsrld $31, %zmm0, %zmm0 # sched: [1:0.50]
42544272 ; SKX-NEXT: retq # sched: [7:1.00]
42554273 %a = bitcast i16 %b to <16 x i1>
42564274 %c = zext <16 x i1> %a to <16 x i32>
42604278 define <8 x i64> @zext_8i1_to_8xi64(i8 %b) {
42614279 ; GENERIC-LABEL: zext_8i1_to_8xi64:
42624280 ; GENERIC: # %bb.0:
4263 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
4264 ; GENERIC-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [5:1.00]
4281 ; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
4282 ; GENERIC-NEXT: vpmovm2q %k0, %zmm0 # sched: [1:0.33]
4283 ; GENERIC-NEXT: vpsrlq $63, %zmm0, %zmm0 # sched: [3:1.00]
42654284 ; GENERIC-NEXT: retq # sched: [1:1.00]
42664285 ;
42674286 ; SKX-LABEL: zext_8i1_to_8xi64:
42684287 ; SKX: # %bb.0:
4269 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
4270 ; SKX-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [8:0.50]
4288 ; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00]
4289 ; SKX-NEXT: vpmovm2q %k0, %zmm0 # sched: [1:0.25]
4290 ; SKX-NEXT: vpsrlq $63, %zmm0, %zmm0 # sched: [1:0.50]
42714291 ; SKX-NEXT: retq # sched: [7:1.00]
42724292 %a = bitcast i8 %b to <8 x i1>
42734293 %c = zext <8 x i1> %a to <8 x i64>
46524672 define <32 x i16> @zext_32xi1_to_32xi16(<32 x i16> %x, <32 x i16> %y) #0 {
46534673 ; GENERIC-LABEL: zext_32xi1_to_32xi16:
46544674 ; GENERIC: # %bb.0:
4655 ; GENERIC-NEXT: vpcmpeqw %zmm1, %zmm0, %k1 # sched: [3:1.00]
4656 ; GENERIC-NEXT: vmovdqu16 {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [4:0.50]
4675 ; GENERIC-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 # sched: [3:1.00]
4676 ; GENERIC-NEXT: vpmovm2w %k0, %zmm0 # sched: [1:0.33]
4677 ; GENERIC-NEXT: vpsrlw $15, %zmm0, %zmm0 # sched: [3:1.00]
46574678 ; GENERIC-NEXT: retq # sched: [1:1.00]
46584679 ;
46594680 ; SKX-LABEL: zext_32xi1_to_32xi16:
46604681 ; SKX: # %bb.0:
4661 ; SKX-NEXT: vpcmpeqw %zmm1, %zmm0, %k1 # sched: [3:1.00]
4662 ; SKX-NEXT: vmovdqu16 {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [8:0.50]
4682 ; SKX-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 # sched: [3:1.00]
4683 ; SKX-NEXT: vpmovm2w %k0, %zmm0 # sched: [1:0.25]
4684 ; SKX-NEXT: vpsrlw $15, %zmm0, %zmm0 # sched: [1:0.50]
46634685 ; SKX-NEXT: retq # sched: [7:1.00]
46644686 %mask = icmp eq <32 x i16> %x, %y
46654687 %1 = zext <32 x i1> %mask to <32 x i16>
287287 }
288288
289289 define <16 x i32> @test13(<16 x float>%a, <16 x float>%b)
290 ; CHECK-LABEL: test13:
291 ; CHECK: ## %bb.0:
292 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
293 ; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
294 ; CHECK-NEXT: retq
290 ; AVX512-LABEL: test13:
291 ; AVX512: ## %bb.0:
292 ; AVX512-NEXT: vcmpeqps %zmm1, %zmm0, %k1
293 ; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
294 ; AVX512-NEXT: vpsrld $31, %zmm0, %zmm0
295 ; AVX512-NEXT: retq
296 ;
297 ; SKX-LABEL: test13:
298 ; SKX: ## %bb.0:
299 ; SKX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
300 ; SKX-NEXT: vpmovm2d %k0, %zmm0
301 ; SKX-NEXT: vpsrld $31, %zmm0, %zmm0
302 ; SKX-NEXT: retq
295303 {
296304 %cmpvector_i = fcmp oeq <16 x float> %a, %b
297305 %conv = zext <16 x i1> %cmpvector_i to <16 x i32>
905913 ;
906914 ; SKX-LABEL: test46:
907915 ; SKX: ## %bb.0:
908 ; SKX-NEXT: vcmpeqps %xmm1, %xmm0, %k1
909 ; SKX-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z}
916 ; SKX-NEXT: vcmpeqps %xmm1, %xmm0, %k0
917 ; SKX-NEXT: vpmovm2q %k0, %xmm0
918 ; SKX-NEXT: vpsrlq $63, %xmm0, %xmm0
910919 ; SKX-NEXT: retq
911920 %mask = fcmp oeq <2 x float> %x, %y
912921 %1 = zext <2 x i1> %mask to <2 x i64>
4949 ; AVX512F-LABEL: ext_i2_2i64:
5050 ; AVX512F: # %bb.0:
5151 ; AVX512F-NEXT: kmovw %edi, %k1
52 ; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
53 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
52 ; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
53 ; AVX512F-NEXT: vpsrlq $63, %xmm0, %xmm0
5454 ; AVX512F-NEXT: vzeroupper
5555 ; AVX512F-NEXT: retq
5656 ;
5757 ; AVX512VLBW-LABEL: ext_i2_2i64:
5858 ; AVX512VLBW: # %bb.0:
5959 ; AVX512VLBW-NEXT: kmovd %edi, %k1
60 ; AVX512VLBW-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z}
60 ; AVX512VLBW-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
61 ; AVX512VLBW-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
62 ; AVX512VLBW-NEXT: vpsrlq $63, %xmm0, %xmm0
6163 ; AVX512VLBW-NEXT: retq
6264 %1 = bitcast i2 %a0 to <2 x i1>
6365 %2 = zext <2 x i1> %1 to <2 x i64>
98100 ; AVX512F-LABEL: ext_i4_4i32:
99101 ; AVX512F: # %bb.0:
100102 ; AVX512F-NEXT: kmovw %edi, %k1
101 ; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
102 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
103 ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
104 ; AVX512F-NEXT: vpsrld $31, %xmm0, %xmm0
103105 ; AVX512F-NEXT: vzeroupper
104106 ; AVX512F-NEXT: retq
105107 ;
106108 ; AVX512VLBW-LABEL: ext_i4_4i32:
107109 ; AVX512VLBW: # %bb.0:
108110 ; AVX512VLBW-NEXT: kmovd %edi, %k1
109 ; AVX512VLBW-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
111 ; AVX512VLBW-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
112 ; AVX512VLBW-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
113 ; AVX512VLBW-NEXT: vpsrld $31, %xmm0, %xmm0
110114 ; AVX512VLBW-NEXT: retq
111115 %1 = bitcast i4 %a0 to <4 x i1>
112116 %2 = zext <4 x i1> %1 to <4 x i32>
149153 ; AVX512F-LABEL: ext_i8_8i16:
150154 ; AVX512F: # %bb.0:
151155 ; AVX512F-NEXT: kmovw %edi, %k1
152 ; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
156 ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
153157 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
154 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
158 ; AVX512F-NEXT: vpsrlw $15, %xmm0, %xmm0
155159 ; AVX512F-NEXT: vzeroupper
156160 ; AVX512F-NEXT: retq
157161 ;
158162 ; AVX512VLBW-LABEL: ext_i8_8i16:
159163 ; AVX512VLBW: # %bb.0:
160 ; AVX512VLBW-NEXT: kmovd %edi, %k1
161 ; AVX512VLBW-NEXT: vmovdqu16 {{.*}}(%rip), %xmm0 {%k1} {z}
164 ; AVX512VLBW-NEXT: kmovd %edi, %k0
165 ; AVX512VLBW-NEXT: vpmovm2w %k0, %xmm0
166 ; AVX512VLBW-NEXT: vpsrlw $15, %xmm0, %xmm0
162167 ; AVX512VLBW-NEXT: retq
163168 %1 = bitcast i8 %a0 to <8 x i1>
164169 %2 = zext <8 x i1> %1 to <8 x i16>
288293 ; AVX512F-LABEL: ext_i4_4i64:
289294 ; AVX512F: # %bb.0:
290295 ; AVX512F-NEXT: kmovw %edi, %k1
291 ; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
292 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
296 ; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
297 ; AVX512F-NEXT: vpsrlq $63, %ymm0, %ymm0
293298 ; AVX512F-NEXT: retq
294299 ;
295300 ; AVX512VLBW-LABEL: ext_i4_4i64:
296301 ; AVX512VLBW: # %bb.0:
297302 ; AVX512VLBW-NEXT: kmovd %edi, %k1
298 ; AVX512VLBW-NEXT: vpbroadcastq {{.*}}(%rip), %ymm0 {%k1} {z}
303 ; AVX512VLBW-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
304 ; AVX512VLBW-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
305 ; AVX512VLBW-NEXT: vpsrlq $63, %ymm0, %ymm0
299306 ; AVX512VLBW-NEXT: retq
300307 %1 = bitcast i4 %a0 to <4 x i1>
301308 %2 = zext <4 x i1> %1 to <4 x i64>
349356 ; AVX512F-LABEL: ext_i8_8i32:
350357 ; AVX512F: # %bb.0:
351358 ; AVX512F-NEXT: kmovw %edi, %k1
352 ; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
353 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
359 ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
360 ; AVX512F-NEXT: vpsrld $31, %ymm0, %ymm0
354361 ; AVX512F-NEXT: retq
355362 ;
356363 ; AVX512VLBW-LABEL: ext_i8_8i32:
357364 ; AVX512VLBW: # %bb.0:
358365 ; AVX512VLBW-NEXT: kmovd %edi, %k1
359 ; AVX512VLBW-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z}
366 ; AVX512VLBW-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
367 ; AVX512VLBW-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
368 ; AVX512VLBW-NEXT: vpsrld $31, %ymm0, %ymm0
360369 ; AVX512VLBW-NEXT: retq
361370 %1 = bitcast i8 %a0 to <8 x i1>
362371 %2 = zext <8 x i1> %1 to <8 x i32>
412421 ; AVX512F-LABEL: ext_i16_16i16:
413422 ; AVX512F: # %bb.0:
414423 ; AVX512F-NEXT: kmovw %edi, %k1
415 ; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
424 ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
416425 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
426 ; AVX512F-NEXT: vpsrlw $15, %ymm0, %ymm0
417427 ; AVX512F-NEXT: retq
418428 ;
419429 ; AVX512VLBW-LABEL: ext_i16_16i16:
420430 ; AVX512VLBW: # %bb.0:
421 ; AVX512VLBW-NEXT: kmovd %edi, %k1
422 ; AVX512VLBW-NEXT: vmovdqu16 {{.*}}(%rip), %ymm0 {%k1} {z}
431 ; AVX512VLBW-NEXT: kmovd %edi, %k0
432 ; AVX512VLBW-NEXT: vpmovm2w %k0, %ymm0
433 ; AVX512VLBW-NEXT: vpsrlw $15, %ymm0, %ymm0
423434 ; AVX512VLBW-NEXT: retq
424435 %1 = bitcast i16 %a0 to <16 x i1>
425436 %2 = zext <16 x i1> %1 to <16 x i16>
610621 ; AVX512F-LABEL: ext_i8_8i64:
611622 ; AVX512F: # %bb.0:
612623 ; AVX512F-NEXT: kmovw %edi, %k1
613 ; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
624 ; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
625 ; AVX512F-NEXT: vpsrlq $63, %zmm0, %zmm0
614626 ; AVX512F-NEXT: retq
615627 ;
616628 ; AVX512VLBW-LABEL: ext_i8_8i64:
617629 ; AVX512VLBW: # %bb.0:
618630 ; AVX512VLBW-NEXT: kmovd %edi, %k1
619 ; AVX512VLBW-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
631 ; AVX512VLBW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
632 ; AVX512VLBW-NEXT: vpsrlq $63, %zmm0, %zmm0
620633 ; AVX512VLBW-NEXT: retq
621634 %1 = bitcast i8 %a0 to <8 x i1>
622635 %2 = zext <8 x i1> %1 to <8 x i64>
693706 ; AVX512F-LABEL: ext_i16_16i32:
694707 ; AVX512F: # %bb.0:
695708 ; AVX512F-NEXT: kmovw %edi, %k1
696 ; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
709 ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
710 ; AVX512F-NEXT: vpsrld $31, %zmm0, %zmm0
697711 ; AVX512F-NEXT: retq
698712 ;
699713 ; AVX512VLBW-LABEL: ext_i16_16i32:
700714 ; AVX512VLBW: # %bb.0:
701715 ; AVX512VLBW-NEXT: kmovd %edi, %k1
702 ; AVX512VLBW-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
716 ; AVX512VLBW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
717 ; AVX512VLBW-NEXT: vpsrld $31, %zmm0, %zmm0
703718 ; AVX512VLBW-NEXT: retq
704719 %1 = bitcast i16 %a0 to <16 x i1>
705720 %2 = zext <16 x i1> %1 to <16 x i32>
785800 ; AVX512F-NEXT: kmovw %edi, %k1
786801 ; AVX512F-NEXT: shrl $16, %edi
787802 ; AVX512F-NEXT: kmovw %edi, %k2
788 ; AVX512F-NEXT: movl {{.*}}(%rip), %eax
789 ; AVX512F-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z}
803 ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
790804 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
791 ; AVX512F-NEXT: vpbroadcastd %eax, %zmm1 {%k2} {z}
805 ; AVX512F-NEXT: vpsrlw $15, %ymm0, %ymm0
806 ; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
792807 ; AVX512F-NEXT: vpmovdw %zmm1, %ymm1
808 ; AVX512F-NEXT: vpsrlw $15, %ymm1, %ymm1
793809 ; AVX512F-NEXT: retq
794810 ;
795811 ; AVX512VLBW-LABEL: ext_i32_32i16:
796812 ; AVX512VLBW: # %bb.0:
797 ; AVX512VLBW-NEXT: kmovd %edi, %k1
798 ; AVX512VLBW-NEXT: vmovdqu16 {{.*}}(%rip), %zmm0 {%k1} {z}
813 ; AVX512VLBW-NEXT: kmovd %edi, %k0
814 ; AVX512VLBW-NEXT: vpmovm2w %k0, %zmm0
815 ; AVX512VLBW-NEXT: vpsrlw $15, %zmm0, %zmm0
799816 ; AVX512VLBW-NEXT: retq
800817 %1 = bitcast i32 %a0 to <32 x i1>
801818 %2 = zext <32 x i1> %1 to <32 x i16>
134134 ; AVX256: # %bb.0:
135135 ; AVX256-NEXT: vpxor %xmm0, %xmm0, %xmm0
136136 ; AVX256-NEXT: vpcmpeqd (%rdi), %ymm0, %k1
137 ; AVX256-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z}
138 ; AVX256-NEXT: vpmovdw %ymm0, %xmm0
137 ; AVX256-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
138 ; AVX256-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
139 ; AVX256-NEXT: vpmovdw %ymm0, %xmm0
140 ; AVX256-NEXT: vpsrlw $15, %xmm0, %xmm0
139141 ; AVX256-NEXT: vzeroupper
140142 ; AVX256-NEXT: retq
141143 ;
143145 ; AVX512VL: # %bb.0:
144146 ; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0
145147 ; AVX512VL-NEXT: vpcmpeqd (%rdi), %ymm0, %k1
146 ; AVX512VL-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z}
148 ; AVX512VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
149 ; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
147150 ; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
151 ; AVX512VL-NEXT: vpsrlw $15, %xmm0, %xmm0
148152 ; AVX512VL-NEXT: vzeroupper
149153 ; AVX512VL-NEXT: retq
150154 ;
168172 ; AVX256-NEXT: vpxor %xmm0, %xmm0, %xmm0
169173 ; AVX256-NEXT: vpcmpeqd (%rdi), %ymm0, %k1
170174 ; AVX256-NEXT: vpcmpeqd (%rsi), %ymm0, %k2
171 ; AVX256-NEXT: movl {{.*}}(%rip), %eax
172 ; AVX256-NEXT: vpbroadcastd %eax, %ymm0 {%k2} {z}
173 ; AVX256-NEXT: vpmovdw %ymm0, %xmm0
174 ; AVX256-NEXT: vmovdqa {{.*#+}} xmm1 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
175 ; AVX256-NEXT: vpshufb %xmm1, %xmm0, %xmm0
176 ; AVX256-NEXT: vpbroadcastd %eax, %ymm2 {%k1} {z}
177 ; AVX256-NEXT: vpmovdw %ymm2, %xmm2
178 ; AVX256-NEXT: vpshufb %xmm1, %xmm2, %xmm1
179 ; AVX256-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
175 ; AVX256-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
176 ; AVX256-NEXT: vmovdqa32 %ymm0, %ymm1 {%k2} {z}
177 ; AVX256-NEXT: vpmovdw %ymm1, %xmm1
178 ; AVX256-NEXT: vpsrlw $15, %xmm1, %xmm1
179 ; AVX256-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
180 ; AVX256-NEXT: vpmovdw %ymm0, %xmm0
181 ; AVX256-NEXT: vpsrlw $15, %xmm0, %xmm0
182 ; AVX256-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
180183 ; AVX256-NEXT: vzeroupper
181184 ; AVX256-NEXT: retq
182185 ;
217220 ; AVX256-NEXT: vpxor %xmm0, %xmm0, %xmm0
218221 ; AVX256-NEXT: vpcmpeqd (%rdi), %ymm0, %k1
219222 ; AVX256-NEXT: vpcmpeqd (%rsi), %ymm0, %k2
220 ; AVX256-NEXT: movl {{.*}}(%rip), %eax
221 ; AVX256-NEXT: vpbroadcastd %eax, %ymm0 {%k1} {z}
222 ; AVX256-NEXT: vpmovdw %ymm0, %xmm0
223 ; AVX256-NEXT: vpbroadcastd %eax, %ymm1 {%k2} {z}
224 ; AVX256-NEXT: vpmovdw %ymm1, %xmm1
225 ; AVX256-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
223 ; AVX256-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
224 ; AVX256-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} {z}
225 ; AVX256-NEXT: vpmovdw %ymm1, %xmm1
226 ; AVX256-NEXT: vmovdqa32 %ymm0, %ymm0 {%k2} {z}
227 ; AVX256-NEXT: vpmovdw %ymm0, %xmm0
228 ; AVX256-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
229 ; AVX256-NEXT: vpsrlw $15, %ymm0, %ymm0
226230 ; AVX256-NEXT: retq
227231 ;
228232 ; AVX512VL-LABEL: testv16i1_zext_v16i16:
231235 ; AVX512VL-NEXT: vpcmpeqd (%rdi), %ymm0, %k0
232236 ; AVX512VL-NEXT: vpcmpeqd (%rsi), %ymm0, %k1
233237 ; AVX512VL-NEXT: kunpckbw %k0, %k1, %k1
234 ; AVX512VL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
238 ; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
235239 ; AVX512VL-NEXT: vpmovdw %zmm0, %ymm0
240 ; AVX512VL-NEXT: vpsrlw $15, %ymm0, %ymm0
236241 ; AVX512VL-NEXT: retq
237242 ;
238243 ; AVX512F-LABEL: testv16i1_zext_v16i16:
242247 ; AVX512F-NEXT: vmovdqa (%rsi), %ymm0
243248 ; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k1
244249 ; AVX512F-NEXT: kunpckbw %k0, %k1, %k1
245 ; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
246 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
250 ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
251 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
252 ; AVX512F-NEXT: vpsrlw $15, %ymm0, %ymm0
247253 ; AVX512F-NEXT: retq
248254 %in = load <8 x i32>, <8 x i32>* %p
249255 %cmp = icmp eq <8 x i32> %in, zeroinitializer