llvm.org GIT mirror llvm / b85ce7e
[X86][SSE} Added constant SMAX/SMIN/UMAX/UMIN tests Constant folding patch to follow soon git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@245276 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 4 years ago
2 changed file(s) with 3260 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
15941594 %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b
15951595 ret <32 x i8> %2
15961596 }
1597
1598 ;
1599 ; Constant Folding
1600 ;
1601
1602 define <2 x i64> @max_gt_v2i64c() {
1603 ; SSE2-LABEL: max_gt_v2i64c:
1604 ; SSE2: # BB#0:
1605 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [18446744073709551609,7]
1606 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551615,1]
1607 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0]
1608 ; SSE2-NEXT: movdqa %xmm0, %xmm3
1609 ; SSE2-NEXT: pxor %xmm2, %xmm3
1610 ; SSE2-NEXT: pxor %xmm1, %xmm0
1611 ; SSE2-NEXT: movdqa %xmm0, %xmm4
1612 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
1613 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
1614 ; SSE2-NEXT: pcmpeqd %xmm3, %xmm0
1615 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
1616 ; SSE2-NEXT: pand %xmm5, %xmm3
1617 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
1618 ; SSE2-NEXT: por %xmm3, %xmm0
1619 ; SSE2-NEXT: movdqa %xmm0, %xmm3
1620 ; SSE2-NEXT: pandn %xmm2, %xmm3
1621 ; SSE2-NEXT: pand %xmm1, %xmm0
1622 ; SSE2-NEXT: por %xmm3, %xmm0
1623 ; SSE2-NEXT: retq
1624 ;
1625 ; SSE41-LABEL: max_gt_v2i64c:
1626 ; SSE41: # BB#0:
1627 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551609,7]
1628 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [18446744073709551615,1]
1629 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0]
1630 ; SSE41-NEXT: movdqa %xmm0, %xmm3
1631 ; SSE41-NEXT: pxor %xmm1, %xmm3
1632 ; SSE41-NEXT: pxor %xmm2, %xmm0
1633 ; SSE41-NEXT: movdqa %xmm0, %xmm4
1634 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm4
1635 ; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
1636 ; SSE41-NEXT: pcmpeqd %xmm3, %xmm0
1637 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
1638 ; SSE41-NEXT: pand %xmm5, %xmm3
1639 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
1640 ; SSE41-NEXT: por %xmm3, %xmm0
1641 ; SSE41-NEXT: blendvpd %xmm2, %xmm1
1642 ; SSE41-NEXT: movapd %xmm1, %xmm0
1643 ; SSE41-NEXT: retq
1644 ;
1645 ; SSE42-LABEL: max_gt_v2i64c:
1646 ; SSE42: # BB#0:
1647 ; SSE42-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551609,7]
1648 ; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [18446744073709551615,1]
1649 ; SSE42-NEXT: movdqa %xmm2, %xmm0
1650 ; SSE42-NEXT: pcmpgtq %xmm1, %xmm0
1651 ; SSE42-NEXT: blendvpd %xmm2, %xmm1
1652 ; SSE42-NEXT: movapd %xmm1, %xmm0
1653 ; SSE42-NEXT: retq
1654 ;
1655 ; AVX-LABEL: max_gt_v2i64c:
1656 ; AVX: # BB#0:
1657 ; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [18446744073709551609,7]
1658 ; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709551615,1]
1659 ; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
1660 ; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1661 ; AVX-NEXT: retq
1662 %1 = insertelement <2 x i64> , i64 -7, i32 0
1663 %2 = insertelement <2 x i64> , i64 -1, i32 0
1664 %3 = icmp sgt <2 x i64> %1, %2
1665 %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
1666 ret <2 x i64> %4
1667 }
1668
1669 define <4 x i64> @max_gt_v4i64c() {
1670 ; SSE2-LABEL: max_gt_v4i64c:
1671 ; SSE2: # BB#0:
1672 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [18446744073709551609,18446744073709551615]
1673 ; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [1,7]
1674 ; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [18446744073709551615,18446744073709551609]
1675 ; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [7,1]
1676 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0]
1677 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1678 ; SSE2-NEXT: pxor %xmm3, %xmm1
1679 ; SSE2-NEXT: movdqa %xmm0, %xmm6
1680 ; SSE2-NEXT: pxor %xmm8, %xmm6
1681 ; SSE2-NEXT: movdqa %xmm6, %xmm7
1682 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm7
1683 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm7[0,0,2,2]
1684 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm6
1685 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
1686 ; SSE2-NEXT: pand %xmm2, %xmm6
1687 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm7[1,1,3,3]
1688 ; SSE2-NEXT: por %xmm6, %xmm1
1689 ; SSE2-NEXT: movdqa %xmm0, %xmm2
1690 ; SSE2-NEXT: pxor %xmm5, %xmm2
1691 ; SSE2-NEXT: pxor %xmm4, %xmm0
1692 ; SSE2-NEXT: movdqa %xmm0, %xmm6
1693 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm6
1694 ; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
1695 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
1696 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
1697 ; SSE2-NEXT: pand %xmm7, %xmm2
1698 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
1699 ; SSE2-NEXT: por %xmm2, %xmm0
1700 ; SSE2-NEXT: movdqa %xmm0, %xmm2
1701 ; SSE2-NEXT: pandn %xmm5, %xmm2
1702 ; SSE2-NEXT: pand %xmm4, %xmm0
1703 ; SSE2-NEXT: por %xmm2, %xmm0
1704 ; SSE2-NEXT: movdqa %xmm1, %xmm2
1705 ; SSE2-NEXT: pandn %xmm3, %xmm2
1706 ; SSE2-NEXT: pand %xmm8, %xmm1
1707 ; SSE2-NEXT: por %xmm2, %xmm1
1708 ; SSE2-NEXT: retq
1709 ;
1710 ; SSE41-LABEL: max_gt_v4i64c:
1711 ; SSE41: # BB#0:
1712 ; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [18446744073709551609,18446744073709551615]
1713 ; SSE41-NEXT: movdqa {{.*#+}} xmm8 = [1,7]
1714 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551615,18446744073709551609]
1715 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [7,1]
1716 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0]
1717 ; SSE41-NEXT: movdqa %xmm0, %xmm3
1718 ; SSE41-NEXT: pxor %xmm1, %xmm3
1719 ; SSE41-NEXT: movdqa %xmm0, %xmm6
1720 ; SSE41-NEXT: pxor %xmm8, %xmm6
1721 ; SSE41-NEXT: movdqa %xmm6, %xmm7
1722 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm7
1723 ; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2]
1724 ; SSE41-NEXT: pcmpeqd %xmm3, %xmm6
1725 ; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
1726 ; SSE41-NEXT: pand %xmm4, %xmm6
1727 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm7[1,1,3,3]
1728 ; SSE41-NEXT: por %xmm6, %xmm3
1729 ; SSE41-NEXT: movdqa %xmm0, %xmm4
1730 ; SSE41-NEXT: pxor %xmm2, %xmm4
1731 ; SSE41-NEXT: pxor %xmm5, %xmm0
1732 ; SSE41-NEXT: movdqa %xmm0, %xmm6
1733 ; SSE41-NEXT: pcmpgtd %xmm4, %xmm6
1734 ; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
1735 ; SSE41-NEXT: pcmpeqd %xmm4, %xmm0
1736 ; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
1737 ; SSE41-NEXT: pand %xmm7, %xmm4
1738 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
1739 ; SSE41-NEXT: por %xmm4, %xmm0
1740 ; SSE41-NEXT: blendvpd %xmm5, %xmm2
1741 ; SSE41-NEXT: movdqa %xmm3, %xmm0
1742 ; SSE41-NEXT: blendvpd %xmm8, %xmm1
1743 ; SSE41-NEXT: movapd %xmm2, %xmm0
1744 ; SSE41-NEXT: retq
1745 ;
1746 ; SSE42-LABEL: max_gt_v4i64c:
1747 ; SSE42: # BB#0:
1748 ; SSE42-NEXT: movdqa {{.*#+}} xmm4 = [18446744073709551609,18446744073709551615]
1749 ; SSE42-NEXT: movdqa {{.*#+}} xmm5 = [1,7]
1750 ; SSE42-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551615,18446744073709551609]
1751 ; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [7,1]
1752 ; SSE42-NEXT: movdqa %xmm5, %xmm3
1753 ; SSE42-NEXT: pcmpgtq %xmm1, %xmm3
1754 ; SSE42-NEXT: movdqa %xmm4, %xmm0
1755 ; SSE42-NEXT: pcmpgtq %xmm2, %xmm0
1756 ; SSE42-NEXT: blendvpd %xmm4, %xmm2
1757 ; SSE42-NEXT: movdqa %xmm3, %xmm0
1758 ; SSE42-NEXT: blendvpd %xmm5, %xmm1
1759 ; SSE42-NEXT: movapd %xmm2, %xmm0
1760 ; SSE42-NEXT: retq
1761 ;
1762 ; AVX1-LABEL: max_gt_v4i64c:
1763 ; AVX1: # BB#0:
1764 ; AVX1-NEXT: vmovapd {{.*#+}} ymm0 = [18446744073709551615,18446744073709551609,7,1]
1765 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709551609,18446744073709551615]
1766 ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm1, %xmm1
1767 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1,7]
1768 ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm2, %xmm2
1769 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
1770 ; AVX1-NEXT: vblendvpd %ymm1, {{.*}}(%rip), %ymm0, %ymm0
1771 ; AVX1-NEXT: retq
1772 ;
1773 ; AVX2-LABEL: max_gt_v4i64c:
1774 ; AVX2: # BB#0:
1775 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [18446744073709551609,18446744073709551615,1,7]
1776 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [18446744073709551615,18446744073709551609,7,1]
1777 ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
1778 ; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1779 ; AVX2-NEXT: retq
1780 ;
1781 ; AVX512-LABEL: max_gt_v4i64c:
1782 ; AVX512: # BB#0:
1783 ; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [18446744073709551609,18446744073709551615,1,7]
1784 ; AVX512-NEXT: vmovdqa {{.*#+}} ymm1 = [18446744073709551615,18446744073709551609,7,1]
1785 ; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
1786 ; AVX512-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1787 ; AVX512-NEXT: retq
1788 %1 = insertelement <4 x i64> , i64 -7, i32 0
1789 %2 = insertelement <4 x i64> , i64 -1, i32 0
1790 %3 = icmp sgt <4 x i64> %1, %2
1791 %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
1792 ret <4 x i64> %4
1793 }
1794
1795 define <4 x i32> @max_gt_v4i32c() {
1796 ; SSE2-LABEL: max_gt_v4i32c:
1797 ; SSE2: # BB#0:
1798 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [4294967289,4294967295,1,7]
1799 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,4294967289,7,1]
1800 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1801 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm0
1802 ; SSE2-NEXT: pand %xmm0, %xmm1
1803 ; SSE2-NEXT: pandn %xmm2, %xmm0
1804 ; SSE2-NEXT: por %xmm1, %xmm0
1805 ; SSE2-NEXT: retq
1806 ;
1807 ; SSE41-LABEL: max_gt_v4i32c:
1808 ; SSE41: # BB#0:
1809 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
1810 ; SSE41-NEXT: pmaxsd {{.*}}(%rip), %xmm0
1811 ; SSE41-NEXT: retq
1812 ;
1813 ; SSE42-LABEL: max_gt_v4i32c:
1814 ; SSE42: # BB#0:
1815 ; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
1816 ; SSE42-NEXT: pmaxsd {{.*}}(%rip), %xmm0
1817 ; SSE42-NEXT: retq
1818 ;
1819 ; AVX-LABEL: max_gt_v4i32c:
1820 ; AVX: # BB#0:
1821 ; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
1822 ; AVX-NEXT: vpmaxsd {{.*}}(%rip), %xmm0, %xmm0
1823 ; AVX-NEXT: retq
1824 %1 = insertelement <4 x i32> , i32 -7, i32 0
1825 %2 = insertelement <4 x i32> , i32 1, i32 0
1826 %3 = icmp sgt <4 x i32> %1, %2
1827 %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
1828 ret <4 x i32> %4
1829 }
1830
1831 define <8 x i32> @max_gt_v8i32c() {
1832 ; SSE2-LABEL: max_gt_v8i32c:
1833 ; SSE2: # BB#0:
1834 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4294967289,4294967291,4294967293,4294967295]
1835 ; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [1,3,5,7]
1836 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [1,4294967293,4294967291,4294967289]
1837 ; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [7,5,3,1]
1838 ; SSE2-NEXT: movdqa %xmm3, %xmm1
1839 ; SSE2-NEXT: pcmpgtd %xmm5, %xmm1
1840 ; SSE2-NEXT: movdqa %xmm2, %xmm0
1841 ; SSE2-NEXT: pcmpgtd %xmm4, %xmm0
1842 ; SSE2-NEXT: pand %xmm0, %xmm2
1843 ; SSE2-NEXT: pandn %xmm4, %xmm0
1844 ; SSE2-NEXT: por %xmm2, %xmm0
1845 ; SSE2-NEXT: pand %xmm1, %xmm3
1846 ; SSE2-NEXT: pandn %xmm5, %xmm1
1847 ; SSE2-NEXT: por %xmm3, %xmm1
1848 ; SSE2-NEXT: retq
1849 ;
1850 ; SSE41-LABEL: max_gt_v8i32c:
1851 ; SSE41: # BB#0:
1852 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7]
1853 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
1854 ; SSE41-NEXT: pmaxsd {{.*}}(%rip), %xmm0
1855 ; SSE41-NEXT: pmaxsd {{.*}}(%rip), %xmm1
1856 ; SSE41-NEXT: retq
1857 ;
1858 ; SSE42-LABEL: max_gt_v8i32c:
1859 ; SSE42: # BB#0:
1860 ; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7]
1861 ; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
1862 ; SSE42-NEXT: pmaxsd {{.*}}(%rip), %xmm0
1863 ; SSE42-NEXT: pmaxsd {{.*}}(%rip), %xmm1
1864 ; SSE42-NEXT: retq
1865 ;
1866 ; AVX1-LABEL: max_gt_v8i32c:
1867 ; AVX1: # BB#0:
1868 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
1869 ; AVX1-NEXT: vpmaxsd {{.*}}(%rip), %xmm0, %xmm0
1870 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,3,5,7]
1871 ; AVX1-NEXT: vpmaxsd {{.*}}(%rip), %xmm1, %xmm1
1872 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1873 ; AVX1-NEXT: retq
1874 ;
1875 ; AVX2-LABEL: max_gt_v8i32c:
1876 ; AVX2: # BB#0:
1877 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7]
1878 ; AVX2-NEXT: vpmaxsd {{.*}}(%rip), %ymm0, %ymm0
1879 ; AVX2-NEXT: retq
1880 ;
1881 ; AVX512-LABEL: max_gt_v8i32c:
1882 ; AVX512: # BB#0:
1883 ; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7]
1884 ; AVX512-NEXT: vpmaxsd {{.*}}(%rip), %ymm0, %ymm0
1885 ; AVX512-NEXT: retq
1886 %1 = insertelement <8 x i32> , i32 -7, i32 0
1887 %2 = insertelement <8 x i32> , i32 1, i32 0
1888 %3 = icmp sgt <8 x i32> %1, %2
1889 %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2
1890 ret <8 x i32> %4
1891 }
1892
1893 define <8 x i16> @max_gt_v8i16c() {
1894 ; SSE-LABEL: max_gt_v8i16c:
1895 ; SSE: # BB#0:
1896 ; SSE-NEXT: movdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7]
1897 ; SSE-NEXT: pmaxsw {{.*}}(%rip), %xmm0
1898 ; SSE-NEXT: retq
1899 ;
1900 ; AVX-LABEL: max_gt_v8i16c:
1901 ; AVX: # BB#0:
1902 ; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7]
1903 ; AVX-NEXT: vpmaxsw {{.*}}(%rip), %xmm0, %xmm0
1904 ; AVX-NEXT: retq
1905 %1 = insertelement <8 x i16> , i16 -7, i16 0
1906 %2 = insertelement <8 x i16> , i16 1, i16 0
1907 %3 = icmp sgt <8 x i16> %1, %2
1908 %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
1909 ret <8 x i16> %4
1910 }
1911
1912 define <16 x i16> @max_gt_v16i16c() {
1913 ; SSE-LABEL: max_gt_v16i16c:
1914 ; SSE: # BB#0:
1915 ; SSE-NEXT: movdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
1916 ; SSE-NEXT: movdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0]
1917 ; SSE-NEXT: pmaxsw {{.*}}(%rip), %xmm0
1918 ; SSE-NEXT: pmaxsw {{.*}}(%rip), %xmm1
1919 ; SSE-NEXT: retq
1920 ;
1921 ; AVX1-LABEL: max_gt_v16i16c:
1922 ; AVX1: # BB#0:
1923 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0]
1924 ; AVX1-NEXT: vpmaxsw {{.*}}(%rip), %xmm0, %xmm0
1925 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
1926 ; AVX1-NEXT: vpmaxsw {{.*}}(%rip), %xmm1, %xmm1
1927 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1928 ; AVX1-NEXT: retq
1929 ;
1930 ; AVX2-LABEL: max_gt_v16i16c:
1931 ; AVX2: # BB#0:
1932 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8]
1933 ; AVX2-NEXT: vpmaxsw {{.*}}(%rip), %ymm0, %ymm0
1934 ; AVX2-NEXT: retq
1935 ;
1936 ; AVX512-LABEL: max_gt_v16i16c:
1937 ; AVX512: # BB#0:
1938 ; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8]
1939 ; AVX512-NEXT: vpmaxsw {{.*}}(%rip), %ymm0, %ymm0
1940 ; AVX512-NEXT: retq
1941 %1 = insertelement <16 x i16> , i16 -7, i16 0
1942 %2 = insertelement <16 x i16> , i16 1, i16 0
1943 %3 = icmp sgt <16 x i16> %1, %2
1944 %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2
1945 ret <16 x i16> %4
1946 }
1947
1948 define <16 x i8> @max_gt_v16i8c() {
1949 ; SSE2-LABEL: max_gt_v16i8c:
1950 ; SSE2: # BB#0:
1951 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8]
1952 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,254,253,252,251,250,249,0,7,6,5,4,3,2,1,0]
1953 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1954 ; SSE2-NEXT: pcmpgtb %xmm2, %xmm0
1955 ; SSE2-NEXT: pand %xmm0, %xmm1
1956 ; SSE2-NEXT: pandn %xmm2, %xmm0
1957 ; SSE2-NEXT: por %xmm1, %xmm0
1958 ; SSE2-NEXT: retq
1959 ;
1960 ; SSE41-LABEL: max_gt_v16i8c:
1961 ; SSE41: # BB#0:
1962 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8]
1963 ; SSE41-NEXT: pmaxsb {{.*}}(%rip), %xmm0
1964 ; SSE41-NEXT: retq
1965 ;
1966 ; SSE42-LABEL: max_gt_v16i8c:
1967 ; SSE42: # BB#0:
1968 ; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8]
1969 ; SSE42-NEXT: pmaxsb {{.*}}(%rip), %xmm0
1970 ; SSE42-NEXT: retq
1971 ;
1972 ; AVX-LABEL: max_gt_v16i8c:
1973 ; AVX: # BB#0:
1974 ; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8]
1975 ; AVX-NEXT: vpmaxsb {{.*}}(%rip), %xmm0, %xmm0
1976 ; AVX-NEXT: retq
1977 %1 = insertelement <16 x i8> , i8 -7, i8 0
1978 %2 = insertelement <16 x i8> , i8 1, i8 0
1979 %3 = icmp sgt <16 x i8> %1, %2
1980 %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
1981 ret <16 x i8> %4
1982 }
1983
1984 define <2 x i64> @max_ge_v2i64c() {
1985 ; SSE2-LABEL: max_ge_v2i64c:
1986 ; SSE2: # BB#0:
1987 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [18446744073709551609,7]
1988 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551615,1]
1989 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0]
1990 ; SSE2-NEXT: movdqa %xmm0, %xmm3
1991 ; SSE2-NEXT: pxor %xmm1, %xmm3
1992 ; SSE2-NEXT: pxor %xmm2, %xmm0
1993 ; SSE2-NEXT: movdqa %xmm0, %xmm4
1994 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
1995 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
1996 ; SSE2-NEXT: pcmpeqd %xmm3, %xmm0
1997 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1998 ; SSE2-NEXT: pand %xmm5, %xmm0
1999 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
2000 ; SSE2-NEXT: por %xmm0, %xmm3
2001 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
2002 ; SSE2-NEXT: pxor %xmm3, %xmm0
2003 ; SSE2-NEXT: pandn %xmm1, %xmm3
2004 ; SSE2-NEXT: pandn %xmm2, %xmm0
2005 ; SSE2-NEXT: por %xmm3, %xmm0
2006 ; SSE2-NEXT: retq
2007 ;
2008 ; SSE41-LABEL: max_ge_v2i64c:
2009 ; SSE41: # BB#0:
2010 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551609,7]
2011 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [18446744073709551615,1]
2012 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0]
2013 ; SSE41-NEXT: movdqa %xmm0, %xmm3
2014 ; SSE41-NEXT: pxor %xmm2, %xmm3
2015 ; SSE41-NEXT: pxor %xmm1, %xmm0
2016 ; SSE41-NEXT: movdqa %xmm0, %xmm4
2017 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm4
2018 ; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
2019 ; SSE41-NEXT: pcmpeqd %xmm3, %xmm0
2020 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
2021 ; SSE41-NEXT: pand %xmm5, %xmm0
2022 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
2023 ; SSE41-NEXT: por %xmm0, %xmm3
2024 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
2025 ; SSE41-NEXT: pxor %xmm3, %xmm0
2026 ; SSE41-NEXT: blendvpd %xmm2, %xmm1
2027 ; SSE41-NEXT: movapd %xmm1, %xmm0
2028 ; SSE41-NEXT: retq
2029 ;
2030 ; SSE42-LABEL: max_ge_v2i64c:
2031 ; SSE42: # BB#0:
2032 ; SSE42-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551609,7]
2033 ; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [18446744073709551615,1]
2034 ; SSE42-NEXT: movdqa %xmm1, %xmm3
2035 ; SSE42-NEXT: pcmpgtq %xmm2, %xmm3
2036 ; SSE42-NEXT: pcmpeqd %xmm0, %xmm0
2037 ; SSE42-NEXT: pxor %xmm3, %xmm0
2038 ; SSE42-NEXT: blendvpd %xmm2, %xmm1
2039 ; SSE42-NEXT: movapd %xmm1, %xmm0
2040 ; SSE42-NEXT: retq
2041 ;
2042 ; AVX-LABEL: max_ge_v2i64c:
2043 ; AVX: # BB#0:
2044 ; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [18446744073709551609,7]
2045 ; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709551615,1]
2046 ; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
2047 ; AVX-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
2048 ; AVX-NEXT: vpxor %xmm3, %xmm2, %xmm2
2049 ; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
2050 ; AVX-NEXT: retq
2051 %1 = insertelement <2 x i64> , i64 -7, i32 0
2052 %2 = insertelement <2 x i64> , i64 -1, i32 0
2053 %3 = icmp sge <2 x i64> %1, %2
2054 %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
2055 ret <2 x i64> %4
2056 }
2057
2058 define <4 x i64> @max_ge_v4i64c() {
2059 ; SSE2-LABEL: max_ge_v4i64c:
2060 ; SSE2: # BB#0:
2061 ; SSE2-NEXT: movdqa {{.*#+}} xmm10 = [18446744073709551609,18446744073709551615]
2062 ; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [1,7]
2063 ; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [18446744073709551615,18446744073709551609]
2064 ; SSE2-NEXT: movdqa {{.*#+}} xmm9 = [7,1]
2065 ; SSE2-NEXT: movdqa {{.*#+}} xmm7 = [2147483648,0,2147483648,0]
2066 ; SSE2-NEXT: movdqa %xmm7, %xmm0
2067 ; SSE2-NEXT: pxor %xmm8, %xmm0
2068 ; SSE2-NEXT: movdqa %xmm7, %xmm1
2069 ; SSE2-NEXT: pxor %xmm9, %xmm1
2070 ; SSE2-NEXT: movdqa %xmm1, %xmm6
2071 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm6
2072 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm6[0,0,2,2]
2073 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
2074 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
2075 ; SSE2-NEXT: pand %xmm2, %xmm0
2076 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
2077 ; SSE2-NEXT: por %xmm0, %xmm6
2078 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
2079 ; SSE2-NEXT: movdqa %xmm6, %xmm1
2080 ; SSE2-NEXT: pxor %xmm0, %xmm1
2081 ; SSE2-NEXT: movdqa %xmm7, %xmm2
2082 ; SSE2-NEXT: pxor %xmm10, %xmm2
2083 ; SSE2-NEXT: pxor %xmm5, %xmm7
2084 ; SSE2-NEXT: movdqa %xmm7, %xmm3
2085 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm3
2086 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
2087 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm7
2088 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm7[1,1,3,3]
2089 ; SSE2-NEXT: pand %xmm4, %xmm2
2090 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
2091 ; SSE2-NEXT: por %xmm2, %xmm3
2092 ; SSE2-NEXT: pxor %xmm3, %xmm0
2093 ; SSE2-NEXT: pandn %xmm10, %xmm3
2094 ; SSE2-NEXT: pandn %xmm5, %xmm0
2095 ; SSE2-NEXT: por %xmm3, %xmm0
2096 ; SSE2-NEXT: pandn %xmm8, %xmm6
2097 ; SSE2-NEXT: pandn %xmm9, %xmm1
2098 ; SSE2-NEXT: por %xmm6, %xmm1
2099 ; SSE2-NEXT: retq
2100 ;
2101 ; SSE41-LABEL: max_ge_v4i64c:
2102 ; SSE41: # BB#0:
2103 ; SSE41-NEXT: movdqa {{.*#+}} xmm9 = [18446744073709551609,18446744073709551615]
2104 ; SSE41-NEXT: movdqa {{.*#+}} xmm8 = [1,7]
2105 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551615,18446744073709551609]
2106 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [7,1]
2107 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0]
2108 ; SSE41-NEXT: movdqa %xmm0, %xmm3
2109 ; SSE41-NEXT: pxor %xmm8, %xmm3
2110 ; SSE41-NEXT: movdqa %xmm0, %xmm6
2111 ; SSE41-NEXT: pxor %xmm1, %xmm6
2112 ; SSE41-NEXT: movdqa %xmm6, %xmm7
2113 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm7
2114 ; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2]
2115 ; SSE41-NEXT: pcmpeqd %xmm3, %xmm6
2116 ; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
2117 ; SSE41-NEXT: pand %xmm4, %xmm6
2118 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm7[1,1,3,3]
2119 ; SSE41-NEXT: por %xmm6, %xmm3
2120 ; SSE41-NEXT: pcmpeqd %xmm4, %xmm4
2121 ; SSE41-NEXT: pxor %xmm4, %xmm3
2122 ; SSE41-NEXT: movdqa %xmm0, %xmm6
2123 ; SSE41-NEXT: pxor %xmm9, %xmm6
2124 ; SSE41-NEXT: pxor %xmm2, %xmm0
2125 ; SSE41-NEXT: movdqa %xmm0, %xmm7
2126 ; SSE41-NEXT: pcmpgtd %xmm6, %xmm7
2127 ; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm7[0,0,2,2]
2128 ; SSE41-NEXT: pcmpeqd %xmm6, %xmm0
2129 ; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
2130 ; SSE41-NEXT: pand %xmm5, %xmm6
2131 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[1,1,3,3]
2132 ; SSE41-NEXT: por %xmm6, %xmm0
2133 ; SSE41-NEXT: pxor %xmm4, %xmm0
2134 ; SSE41-NEXT: blendvpd %xmm9, %xmm2
2135 ; SSE41-NEXT: movdqa %xmm3, %xmm0
2136 ; SSE41-NEXT: blendvpd %xmm8, %xmm1
2137 ; SSE41-NEXT: movapd %xmm2, %xmm0
2138 ; SSE41-NEXT: retq
2139 ;
2140 ; SSE42-LABEL: max_ge_v4i64c:
2141 ; SSE42: # BB#0:
2142 ; SSE42-NEXT: movdqa {{.*#+}} xmm4 = [18446744073709551609,18446744073709551615]
2143 ; SSE42-NEXT: movdqa {{.*#+}} xmm5 = [1,7]
2144 ; SSE42-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551615,18446744073709551609]
2145 ; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [7,1]
2146 ; SSE42-NEXT: movdqa %xmm1, %xmm3
2147 ; SSE42-NEXT: pcmpgtq %xmm5, %xmm3
2148 ; SSE42-NEXT: pcmpeqd %xmm6, %xmm6
2149 ; SSE42-NEXT: pxor %xmm6, %xmm3
2150 ; SSE42-NEXT: movdqa %xmm2, %xmm0
2151 ; SSE42-NEXT: pcmpgtq %xmm4, %xmm0
2152 ; SSE42-NEXT: pxor %xmm6, %xmm0
2153 ; SSE42-NEXT: blendvpd %xmm4, %xmm2
2154 ; SSE42-NEXT: movdqa %xmm3, %xmm0
2155 ; SSE42-NEXT: blendvpd %xmm5, %xmm1
2156 ; SSE42-NEXT: movapd %xmm2, %xmm0
2157 ; SSE42-NEXT: retq
2158 ;
2159 ; AVX1-LABEL: max_ge_v4i64c:
2160 ; AVX1: # BB#0:
2161 ; AVX1-NEXT: vmovapd {{.*#+}} ymm0 = [18446744073709551615,18446744073709551609,7,1]
2162 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [7,1]
2163 ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm1, %xmm1
2164 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
2165 ; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1
2166 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [18446744073709551615,18446744073709551609]
2167 ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm3, %xmm3
2168 ; AVX1-NEXT: vpxor %xmm2, %xmm3, %xmm2
2169 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
2170 ; AVX1-NEXT: vblendvpd %ymm1, {{.*}}(%rip), %ymm0, %ymm0
2171 ; AVX1-NEXT: retq
2172 ;
2173 ; AVX2-LABEL: max_ge_v4i64c:
2174 ; AVX2: # BB#0:
2175 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [18446744073709551609,18446744073709551615,1,7]
2176 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [18446744073709551615,18446744073709551609,7,1]
2177 ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
2178 ; AVX2-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
2179 ; AVX2-NEXT: vpxor %ymm3, %ymm2, %ymm2
2180 ; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
2181 ; AVX2-NEXT: retq
2182 ;
2183 ; AVX512-LABEL: max_ge_v4i64c:
2184 ; AVX512: # BB#0:
2185 ; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [18446744073709551609,18446744073709551615,1,7]
2186 ; AVX512-NEXT: vmovdqa {{.*#+}} ymm1 = [18446744073709551615,18446744073709551609,7,1]
2187 ; AVX512-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
2188 ; AVX512-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
2189 ; AVX512-NEXT: vpxor %ymm3, %ymm2, %ymm2
2190 ; AVX512-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
2191 ; AVX512-NEXT: retq
2192 %1 = insertelement <4 x i64> , i64 -7, i32 0
2193 %2 = insertelement <4 x i64> , i64 -1, i32 0
2194 %3 = icmp sge <4 x i64> %1, %2
2195 %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
2196 ret <4 x i64> %4
2197 }
2198
2199 define <4 x i32> @max_ge_v4i32c() {
2200 ; SSE2-LABEL: max_ge_v4i32c:
2201 ; SSE2: # BB#0:
2202 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [4294967289,4294967295,1,7]
2203 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,4294967289,7,1]
2204 ; SSE2-NEXT: movdqa %xmm2, %xmm3
2205 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
2206 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
2207 ; SSE2-NEXT: pxor %xmm3, %xmm0
2208 ; SSE2-NEXT: pandn %xmm1, %xmm3
2209 ; SSE2-NEXT: pandn %xmm2, %xmm0
2210 ; SSE2-NEXT: por %xmm3, %xmm0
2211 ; SSE2-NEXT: retq
2212 ;
2213 ; SSE41-LABEL: max_ge_v4i32c:
2214 ; SSE41: # BB#0:
2215 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
2216 ; SSE41-NEXT: pmaxsd {{.*}}(%rip), %xmm0
2217 ; SSE41-NEXT: retq
2218 ;
2219 ; SSE42-LABEL: max_ge_v4i32c:
2220 ; SSE42: # BB#0:
2221 ; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
2222 ; SSE42-NEXT: pmaxsd {{.*}}(%rip), %xmm0
2223 ; SSE42-NEXT: retq
2224 ;
2225 ; AVX-LABEL: max_ge_v4i32c:
2226 ; AVX: # BB#0:
2227 ; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
2228 ; AVX-NEXT: vpmaxsd {{.*}}(%rip), %xmm0, %xmm0
2229 ; AVX-NEXT: retq
2230 %1 = insertelement <4 x i32> , i32 -7, i32 0
2231 %2 = insertelement <4 x i32> , i32 1, i32 0
2232 %3 = icmp sge <4 x i32> %1, %2
2233 %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
2234 ret <4 x i32> %4
2235 }
2236
2237 define <8 x i32> @max_ge_v8i32c() {
2238 ; SSE2-LABEL: max_ge_v8i32c:
2239 ; SSE2: # BB#0:
2240 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4294967289,4294967291,4294967293,4294967295]
2241 ; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [1,3,5,7]
2242 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [1,4294967293,4294967291,4294967289]
2243 ; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [7,5,3,1]
2244 ; SSE2-NEXT: movdqa %xmm5, %xmm6
2245 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm6
2246 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
2247 ; SSE2-NEXT: movdqa %xmm6, %xmm1
2248 ; SSE2-NEXT: pxor %xmm0, %xmm1
2249 ; SSE2-NEXT: movdqa %xmm4, %xmm7
2250 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm7
2251 ; SSE2-NEXT: pxor %xmm7, %xmm0
2252 ; SSE2-NEXT: pandn %xmm2, %xmm7
2253 ; SSE2-NEXT: pandn %xmm4, %xmm0
2254 ; SSE2-NEXT: por %xmm7, %xmm0
2255 ; SSE2-NEXT: pandn %xmm3, %xmm6
2256 ; SSE2-NEXT: pandn %xmm5, %xmm1
2257 ; SSE2-NEXT: por %xmm6, %xmm1
2258 ; SSE2-NEXT: retq
2259 ;
2260 ; SSE41-LABEL: max_ge_v8i32c:
2261 ; SSE41: # BB#0:
2262 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7]
2263 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
2264 ; SSE41-NEXT: pmaxsd {{.*}}(%rip), %xmm0
2265 ; SSE41-NEXT: pmaxsd {{.*}}(%rip), %xmm1
2266 ; SSE41-NEXT: retq
2267 ;
2268 ; SSE42-LABEL: max_ge_v8i32c:
2269 ; SSE42: # BB#0:
2270 ; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7]
2271 ; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
2272 ; SSE42-NEXT: pmaxsd {{.*}}(%rip), %xmm0
2273 ; SSE42-NEXT: pmaxsd {{.*}}(%rip), %xmm1
2274 ; SSE42-NEXT: retq
2275 ;
2276 ; AVX1-LABEL: max_ge_v8i32c:
2277 ; AVX1: # BB#0:
2278 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
2279 ; AVX1-NEXT: vpmaxsd {{.*}}(%rip), %xmm0, %xmm0
2280 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,3,5,7]
2281 ; AVX1-NEXT: vpmaxsd {{.*}}(%rip), %xmm1, %xmm1
2282 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2283 ; AVX1-NEXT: retq
2284 ;
2285 ; AVX2-LABEL: max_ge_v8i32c:
2286 ; AVX2: # BB#0:
2287 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7]
2288 ; AVX2-NEXT: vpmaxsd {{.*}}(%rip), %ymm0, %ymm0
2289 ; AVX2-NEXT: retq
2290 ;
2291 ; AVX512-LABEL: max_ge_v8i32c:
2292 ; AVX512: # BB#0:
2293 ; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7]
2294 ; AVX512-NEXT: vpmaxsd {{.*}}(%rip), %ymm0, %ymm0
2295 ; AVX512-NEXT: retq
2296 %1 = insertelement <8 x i32> , i32 -7, i32 0
2297 %2 = insertelement <8 x i32> , i32 1, i32 0
2298 %3 = icmp sge <8 x i32> %1, %2
2299 %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2
2300 ret <8 x i32> %4
2301 }
2302
2303 define <8 x i16> @max_ge_v8i16c() {
2304 ; SSE-LABEL: max_ge_v8i16c:
2305 ; SSE: # BB#0:
2306 ; SSE-NEXT: movdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7]
2307 ; SSE-NEXT: pmaxsw {{.*}}(%rip), %xmm0
2308 ; SSE-NEXT: retq
2309 ;
2310 ; AVX-LABEL: max_ge_v8i16c:
2311 ; AVX: # BB#0:
2312 ; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7]
2313 ; AVX-NEXT: vpmaxsw {{.*}}(%rip), %xmm0, %xmm0
2314 ; AVX-NEXT: retq
2315 %1 = insertelement <8 x i16> , i16 -7, i16 0
2316 %2 = insertelement <8 x i16> , i16 1, i16 0
2317 %3 = icmp sge <8 x i16> %1, %2
2318 %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
2319 ret <8 x i16> %4
2320 }
2321
2322 define <16 x i16> @max_ge_v16i16c() {
2323 ; SSE-LABEL: max_ge_v16i16c:
2324 ; SSE: # BB#0:
2325 ; SSE-NEXT: movdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
2326 ; SSE-NEXT: movdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0]
2327 ; SSE-NEXT: pmaxsw {{.*}}(%rip), %xmm0
2328 ; SSE-NEXT: pmaxsw {{.*}}(%rip), %xmm1
2329 ; SSE-NEXT: retq
2330 ;
2331 ; AVX1-LABEL: max_ge_v16i16c:
2332 ; AVX1: # BB#0:
2333 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0]
2334 ; AVX1-NEXT: vpmaxsw {{.*}}(%rip), %xmm0, %xmm0
2335 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
2336 ; AVX1-NEXT: vpmaxsw {{.*}}(%rip), %xmm1, %xmm1
2337 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2338 ; AVX1-NEXT: retq
2339 ;
2340 ; AVX2-LABEL: max_ge_v16i16c:
2341 ; AVX2: # BB#0:
2342 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8]
2343 ; AVX2-NEXT: vpmaxsw {{.*}}(%rip), %ymm0, %ymm0
2344 ; AVX2-NEXT: retq
2345 ;
2346 ; AVX512-LABEL: max_ge_v16i16c:
2347 ; AVX512: # BB#0:
2348 ; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8]
2349 ; AVX512-NEXT: vpmaxsw {{.*}}(%rip), %ymm0, %ymm0
2350 ; AVX512-NEXT: retq
2351 %1 = insertelement <16 x i16> , i16 -7, i16 0
2352 %2 = insertelement <16 x i16> , i16 1, i16 0
2353 %3 = icmp sge <16 x i16> %1, %2
2354 %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2
2355 ret <16 x i16> %4
2356 }
2357
2358 define <16 x i8> @max_ge_v16i8c() {
2359 ; SSE2-LABEL: max_ge_v16i8c:
2360 ; SSE2: # BB#0:
2361 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8]
2362 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,254,253,252,251,250,249,0,7,6,5,4,3,2,1,0]
2363 ; SSE2-NEXT: movdqa %xmm2, %xmm3
2364 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm3
2365 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
2366 ; SSE2-NEXT: pxor %xmm3, %xmm0
2367 ; SSE2-NEXT: pandn %xmm1, %xmm3
2368 ; SSE2-NEXT: pandn %xmm2, %xmm0
2369 ; SSE2-NEXT: por %xmm3, %xmm0
2370 ; SSE2-NEXT: retq
2371 ;
2372 ; SSE41-LABEL: max_ge_v16i8c:
2373 ; SSE41: # BB#0:
2374 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8]
2375 ; SSE41-NEXT: pmaxsb {{.*}}(%rip), %xmm0
2376 ; SSE41-NEXT: retq
2377 ;
2378 ; SSE42-LABEL: max_ge_v16i8c:
2379 ; SSE42: # BB#0:
2380 ; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8]
2381 ; SSE42-NEXT: pmaxsb {{.*}}(%rip), %xmm0
2382 ; SSE42-NEXT: retq
2383 ;
2384 ; AVX-LABEL: max_ge_v16i8c:
2385 ; AVX: # BB#0:
2386 ; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8]
2387 ; AVX-NEXT: vpmaxsb {{.*}}(%rip), %xmm0, %xmm0
2388 ; AVX-NEXT: retq
2389 %1 = insertelement <16 x i8> , i8 -7, i8 0
2390 %2 = insertelement <16 x i8> , i8 1, i8 0
2391 %3 = icmp sge <16 x i8> %1, %2
2392 %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
2393 ret <16 x i8> %4
2394 }
2395
2396 define <2 x i64> @max_lt_v2i64c() {
2397 ; SSE2-LABEL: max_lt_v2i64c:
2398 ; SSE2: # BB#0:
2399 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [18446744073709551609,7]
2400 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551615,1]
2401 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0]
2402 ; SSE2-NEXT: movdqa %xmm0, %xmm3
2403 ; SSE2-NEXT: pxor %xmm1, %xmm3
2404 ; SSE2-NEXT: pxor %xmm2, %xmm0
2405 ; SSE2-NEXT: movdqa %xmm0, %xmm4
2406 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
2407 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
2408 ; SSE2-NEXT: pcmpeqd %xmm3, %xmm0
2409 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2410 ; SSE2-NEXT: pand %xmm5, %xmm3
2411 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
2412 ; SSE2-NEXT: por %xmm3, %xmm0
2413 ; SSE2-NEXT: movdqa %xmm0, %xmm3
2414 ; SSE2-NEXT: pandn %xmm2, %xmm3
2415 ; SSE2-NEXT: pand %xmm1, %xmm0
2416 ; SSE2-NEXT: por %xmm3, %xmm0
2417 ; SSE2-NEXT: retq
2418 ;
2419 ; SSE41-LABEL: max_lt_v2i64c:
2420 ; SSE41: # BB#0:
2421 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551609,7]
2422 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [18446744073709551615,1]
2423 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0]
2424 ; SSE41-NEXT: movdqa %xmm0, %xmm3
2425 ; SSE41-NEXT: pxor %xmm2, %xmm3
2426 ; SSE41-NEXT: pxor %xmm1, %xmm0
2427 ; SSE41-NEXT: movdqa %xmm0, %xmm4
2428 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm4
2429 ; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
2430 ; SSE41-NEXT: pcmpeqd %xmm3, %xmm0
2431 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2432 ; SSE41-NEXT: pand %xmm5, %xmm3
2433 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
2434 ; SSE41-NEXT: por %xmm3, %xmm0
2435 ; SSE41-NEXT: blendvpd %xmm2, %xmm1
2436 ; SSE41-NEXT: movapd %xmm1, %xmm0
2437 ; SSE41-NEXT: retq
2438 ;
2439 ; SSE42-LABEL: max_lt_v2i64c:
2440 ; SSE42: # BB#0:
2441 ; SSE42-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551609,7]
2442 ; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [18446744073709551615,1]
2443 ; SSE42-NEXT: movdqa %xmm1, %xmm0
2444 ; SSE42-NEXT: pcmpgtq %xmm2, %xmm0
2445 ; SSE42-NEXT: blendvpd %xmm2, %xmm1
2446 ; SSE42-NEXT: movapd %xmm1, %xmm0
2447 ; SSE42-NEXT: retq
2448 ;
2449 ; AVX-LABEL: max_lt_v2i64c:
2450 ; AVX: # BB#0:
2451 ; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [18446744073709551609,7]
2452 ; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709551615,1]
2453 ; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
2454 ; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
2455 ; AVX-NEXT: retq
2456 %1 = insertelement <2 x i64> , i64 -7, i32 0
2457 %2 = insertelement <2 x i64> , i64 -1, i32 0
2458 %3 = icmp slt <2 x i64> %1, %2
2459 %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
2460 ret <2 x i64> %4
2461 }
2462
2463 define <4 x i64> @max_lt_v4i64c() {
2464 ; SSE2-LABEL: max_lt_v4i64c:
2465 ; SSE2: # BB#0:
2466 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [18446744073709551609,18446744073709551615]
2467 ; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [1,7]
2468 ; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [18446744073709551615,18446744073709551609]
2469 ; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [7,1]
2470 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0]
2471 ; SSE2-NEXT: movdqa %xmm0, %xmm1
2472 ; SSE2-NEXT: pxor %xmm8, %xmm1
2473 ; SSE2-NEXT: movdqa %xmm0, %xmm6
2474 ; SSE2-NEXT: pxor %xmm3, %xmm6
2475 ; SSE2-NEXT: movdqa %xmm6, %xmm7
2476 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm7
2477 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm7[0,0,2,2]
2478 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm6
2479 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
2480 ; SSE2-NEXT: pand %xmm2, %xmm6
2481 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm7[1,1,3,3]
2482 ; SSE2-NEXT: por %xmm6, %xmm1
2483 ; SSE2-NEXT: movdqa %xmm0, %xmm2
2484 ; SSE2-NEXT: pxor %xmm4, %xmm2
2485 ; SSE2-NEXT: pxor %xmm5, %xmm0
2486 ; SSE2-NEXT: movdqa %xmm0, %xmm6
2487 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm6
2488 ; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
2489 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
2490 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
2491 ; SSE2-NEXT: pand %xmm7, %xmm2
2492 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
2493 ; SSE2-NEXT: por %xmm2, %xmm0
2494 ; SSE2-NEXT: movdqa %xmm0, %xmm2
2495 ; SSE2-NEXT: pandn %xmm5, %xmm2
2496 ; SSE2-NEXT: pand %xmm4, %xmm0
2497 ; SSE2-NEXT: por %xmm2, %xmm0
2498 ; SSE2-NEXT: movdqa %xmm1, %xmm2
2499 ; SSE2-NEXT: pandn %xmm3, %xmm2
2500 ; SSE2-NEXT: pand %xmm8, %xmm1
2501 ; SSE2-NEXT: por %xmm2, %xmm1
2502 ; SSE2-NEXT: retq
2503 ;
2504 ; SSE41-LABEL: max_lt_v4i64c:
2505 ; SSE41: # BB#0:
2506 ; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [18446744073709551609,18446744073709551615]
2507 ; SSE41-NEXT: movdqa {{.*#+}} xmm8 = [1,7]
2508 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551615,18446744073709551609]
2509 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [7,1]
2510 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0]
2511 ; SSE41-NEXT: movdqa %xmm0, %xmm3
2512 ; SSE41-NEXT: pxor %xmm8, %xmm3
2513 ; SSE41-NEXT: movdqa %xmm0, %xmm6
2514 ; SSE41-NEXT: pxor %xmm1, %xmm6
2515 ; SSE41-NEXT: movdqa %xmm6, %xmm7
2516 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm7
2517 ; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2]
2518 ; SSE41-NEXT: pcmpeqd %xmm3, %xmm6
2519 ; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
2520 ; SSE41-NEXT: pand %xmm4, %xmm6
2521 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm7[1,1,3,3]
2522 ; SSE41-NEXT: por %xmm6, %xmm3
2523 ; SSE41-NEXT: movdqa %xmm0, %xmm4
2524 ; SSE41-NEXT: pxor %xmm5, %xmm4
2525 ; SSE41-NEXT: pxor %xmm2, %xmm0
2526 ; SSE41-NEXT: movdqa %xmm0, %xmm6
2527 ; SSE41-NEXT: pcmpgtd %xmm4, %xmm6
2528 ; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
2529 ; SSE41-NEXT: pcmpeqd %xmm4, %xmm0
2530 ; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
2531 ; SSE41-NEXT: pand %xmm7, %xmm4
2532 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
2533 ; SSE41-NEXT: por %xmm4, %xmm0
2534 ; SSE41-NEXT: blendvpd %xmm5, %xmm2
2535 ; SSE41-NEXT: movdqa %xmm3, %xmm0
2536 ; SSE41-NEXT: blendvpd %xmm8, %xmm1
2537 ; SSE41-NEXT: movapd %xmm2, %xmm0
2538 ; SSE41-NEXT: retq
2539 ;
2540 ; SSE42-LABEL: max_lt_v4i64c:
2541 ; SSE42: # BB#0:
2542 ; SSE42-NEXT: movdqa {{.*#+}} xmm4 = [18446744073709551609,18446744073709551615]
2543 ; SSE42-NEXT: movdqa {{.*#+}} xmm5 = [1,7]
2544 ; SSE42-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551615,18446744073709551609]
2545 ; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [7,1]
2546 ; SSE42-NEXT: movdqa %xmm1, %xmm3
2547 ; SSE42-NEXT: pcmpgtq %xmm5, %xmm3
2548 ; SSE42-NEXT: movdqa %xmm2, %xmm0
2549 ; SSE42-NEXT: pcmpgtq %xmm4, %xmm0
2550 ; SSE42-NEXT: blendvpd %xmm4, %xmm2
2551 ; SSE42-NEXT: movdqa %xmm3, %xmm0
2552 ; SSE42-NEXT: blendvpd %xmm5, %xmm1
2553 ; SSE42-NEXT: movapd %xmm2, %xmm0
2554 ; SSE42-NEXT: retq
2555 ;
2556 ; AVX1-LABEL: max_lt_v4i64c:
2557 ; AVX1: # BB#0:
2558 ; AVX1-NEXT: vmovapd {{.*#+}} ymm0 = [18446744073709551615,18446744073709551609,7,1]
2559 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709551615,18446744073709551609]
2560 ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm1, %xmm1
2561 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [7,1]
2562 ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm2, %xmm2
2563 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
2564 ; AVX1-NEXT: vblendvpd %ymm1, {{.*}}(%rip), %ymm0, %ymm0
2565 ; AVX1-NEXT: retq
2566 ;
2567 ; AVX2-LABEL: max_lt_v4i64c:
2568 ; AVX2: # BB#0:
2569 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [18446744073709551609,18446744073709551615,1,7]
2570 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [18446744073709551615,18446744073709551609,7,1]
2571 ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
2572 ; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
2573 ; AVX2-NEXT: retq
2574 ;
2575 ; AVX512-LABEL: max_lt_v4i64c:
2576 ; AVX512: # BB#0:
2577 ; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [18446744073709551609,18446744073709551615,1,7]
2578 ; AVX512-NEXT: vmovdqa {{.*#+}} ymm1 = [18446744073709551615,18446744073709551609,7,1]
2579 ; AVX512-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
2580 ; AVX512-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
2581 ; AVX512-NEXT: retq
2582 %1 = insertelement <4 x i64> , i64 -7, i32 0
2583 %2 = insertelement <4 x i64> , i64 -1, i32 0
2584 %3 = icmp slt <4 x i64> %1, %2
2585 %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
2586 ret <4 x i64> %4
2587 }
2588
2589 define <4 x i32> @max_lt_v4i32c() {
2590 ; SSE2-LABEL: max_lt_v4i32c:
2591 ; SSE2: # BB#0:
2592 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [4294967289,4294967295,1,7]
2593 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,4294967289,7,1]
2594 ; SSE2-NEXT: movdqa %xmm2, %xmm0
2595 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
2596 ; SSE2-NEXT: pand %xmm0, %xmm1
2597 ; SSE2-NEXT: pandn %xmm2, %xmm0
2598 ; SSE2-NEXT: por %xmm1, %xmm0
2599 ; SSE2-NEXT: retq
2600 ;
2601 ; SSE41-LABEL: max_lt_v4i32c:
2602 ; SSE41: # BB#0:
2603 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
2604 ; SSE41-NEXT: pminsd {{.*}}(%rip), %xmm0
2605 ; SSE41-NEXT: retq
2606 ;
2607 ; SSE42-LABEL: max_lt_v4i32c:
2608 ; SSE42: # BB#0:
2609 ; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
2610 ; SSE42-NEXT: pminsd {{.*}}(%rip), %xmm0
2611 ; SSE42-NEXT: retq
2612 ;
2613 ; AVX-LABEL: max_lt_v4i32c:
2614 ; AVX: # BB#0:
2615 ; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
2616 ; AVX-NEXT: vpminsd {{.*}}(%rip), %xmm0, %xmm0
2617 ; AVX-NEXT: retq
2618 %1 = insertelement <4 x i32> , i32 -7, i32 0
2619 %2 = insertelement <4 x i32> , i32 1, i32 0
2620 %3 = icmp slt <4 x i32> %1, %2
2621 %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
2622 ret <4 x i32> %4
2623 }
2624
2625 define <8 x i32> @max_lt_v8i32c() {
2626 ; SSE2-LABEL: max_lt_v8i32c:
2627 ; SSE2: # BB#0:
2628 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4294967289,4294967291,4294967293,4294967295]
2629 ; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [1,3,5,7]
2630 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [1,4294967293,4294967291,4294967289]
2631 ; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [7,5,3,1]
2632 ; SSE2-NEXT: movdqa %xmm5, %xmm1
2633 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm1
2634 ; SSE2-NEXT: movdqa %xmm4, %xmm0
2635 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm0
2636 ; SSE2-NEXT: pand %xmm0, %xmm2
2637 ; SSE2-NEXT: pandn %xmm4, %xmm0
2638 ; SSE2-NEXT: por %xmm2, %xmm0
2639 ; SSE2-NEXT: pand %xmm1, %xmm3
2640 ; SSE2-NEXT: pandn %xmm5, %xmm1
2641 ; SSE2-NEXT: por %xmm3, %xmm1
2642 ; SSE2-NEXT: retq
2643 ;
2644 ; SSE41-LABEL: max_lt_v8i32c:
2645 ; SSE41: # BB#0:
2646 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7]
2647 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
2648 ; SSE41-NEXT: pminsd {{.*}}(%rip), %xmm0
2649 ; SSE41-NEXT: pminsd {{.*}}(%rip), %xmm1
2650 ; SSE41-NEXT: retq
2651 ;
2652 ; SSE42-LABEL: max_lt_v8i32c:
2653 ; SSE42: # BB#0:
2654 ; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7]
2655 ; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
2656 ; SSE42-NEXT: pminsd {{.*}}(%rip), %xmm0
2657 ; SSE42-NEXT: pminsd {{.*}}(%rip), %xmm1
2658 ; SSE42-NEXT: retq
2659 ;
2660 ; AVX1-LABEL: max_lt_v8i32c:
2661 ; AVX1: # BB#0:
2662 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
2663 ; AVX1-NEXT: vpminsd {{.*}}(%rip), %xmm0, %xmm0
2664 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,3,5,7]
2665 ; AVX1-NEXT: vpminsd {{.*}}(%rip), %xmm1, %xmm1
2666 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2667 ; AVX1-NEXT: retq
2668 ;
2669 ; AVX2-LABEL: max_lt_v8i32c:
2670 ; AVX2: # BB#0:
2671 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7]
2672 ; AVX2-NEXT: vpminsd {{.*}}(%rip), %ymm0, %ymm0
2673 ; AVX2-NEXT: retq
2674 ;
2675 ; AVX512-LABEL: max_lt_v8i32c:
2676 ; AVX512: # BB#0:
2677 ; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7]
2678 ; AVX512-NEXT: vpminsd {{.*}}(%rip), %ymm0, %ymm0
2679 ; AVX512-NEXT: retq
2680 %1 = insertelement <8 x i32> , i32 -7, i32 0
2681 %2 = insertelement <8 x i32> , i32 1, i32 0
2682 %3 = icmp slt <8 x i32> %1, %2
2683 %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2
2684 ret <8 x i32> %4
2685 }
2686
2687 define <8 x i16> @max_lt_v8i16c() {
2688 ; SSE-LABEL: max_lt_v8i16c:
2689 ; SSE: # BB#0:
2690 ; SSE-NEXT: movdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7]
2691 ; SSE-NEXT: pminsw {{.*}}(%rip), %xmm0
2692 ; SSE-NEXT: retq
2693 ;
2694 ; AVX-LABEL: max_lt_v8i16c:
2695 ; AVX: # BB#0:
2696 ; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7]
2697 ; AVX-NEXT: vpminsw {{.*}}(%rip), %xmm0, %xmm0
2698 ; AVX-NEXT: retq
2699 %1 = insertelement <8 x i16> , i16 -7, i16 0
2700 %2 = insertelement <8 x i16> , i16 1, i16 0
2701 %3 = icmp slt <8 x i16> %1, %2
2702 %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
2703 ret <8 x i16> %4
2704 }
2705
2706 define <16 x i16> @max_lt_v16i16c() {
2707 ; SSE-LABEL: max_lt_v16i16c:
2708 ; SSE: # BB#0:
2709 ; SSE-NEXT: movdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
2710 ; SSE-NEXT: movdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0]
2711 ; SSE-NEXT: pminsw {{.*}}(%rip), %xmm0
2712 ; SSE-NEXT: pminsw {{.*}}(%rip), %xmm1
2713 ; SSE-NEXT: retq
2714 ;
2715 ; AVX1-LABEL: max_lt_v16i16c:
2716 ; AVX1: # BB#0:
2717 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0]
2718 ; AVX1-NEXT: vpminsw {{.*}}(%rip), %xmm0, %xmm0
2719 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
2720 ; AVX1-NEXT: vpminsw {{.*}}(%rip), %xmm1, %xmm1
2721 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2722 ; AVX1-NEXT: retq
2723 ;
2724 ; AVX2-LABEL: max_lt_v16i16c:
2725 ; AVX2: # BB#0:
2726 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8]
2727 ; AVX2-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
2728 ; AVX2-NEXT: retq
2729 ;
2730 ; AVX512-LABEL: max_lt_v16i16c:
2731 ; AVX512: # BB#0:
2732 ; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8]
2733 ; AVX512-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
2734 ; AVX512-NEXT: retq
2735 %1 = insertelement <16 x i16> , i16 -7, i16 0
2736 %2 = insertelement <16 x i16> , i16 1, i16 0
2737 %3 = icmp slt <16 x i16> %1, %2
2738 %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2
2739 ret <16 x i16> %4
2740 }
2741
2742 define <16 x i8> @max_lt_v16i8c() {
2743 ; SSE2-LABEL: max_lt_v16i8c:
2744 ; SSE2: # BB#0:
2745 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8]
2746 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,254,253,252,251,250,249,0,7,6,5,4,3,2,1,0]
2747 ; SSE2-NEXT: movdqa %xmm2, %xmm0
2748 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm0
2749 ; SSE2-NEXT: pand %xmm0, %xmm1
2750 ; SSE2-NEXT: pandn %xmm2, %xmm0
2751 ; SSE2-NEXT: por %xmm1, %xmm0
2752 ; SSE2-NEXT: retq
2753 ;
2754 ; SSE41-LABEL: max_lt_v16i8c:
2755 ; SSE41: # BB#0:
2756 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8]
2757 ; SSE41-NEXT: pminsb {{.*}}(%rip), %xmm0
2758 ; SSE41-NEXT: retq
2759 ;
2760 ; SSE42-LABEL: max_lt_v16i8c:
2761 ; SSE42: # BB#0:
2762 ; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8]
2763 ; SSE42-NEXT: pminsb {{.*}}(%rip), %xmm0
2764 ; SSE42-NEXT: retq
2765 ;
2766 ; AVX-LABEL: max_lt_v16i8c:
2767 ; AVX: # BB#0:
2768 ; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8]
2769 ; AVX-NEXT: vpminsb {{.*}}(%rip), %xmm0, %xmm0
2770 ; AVX-NEXT: retq
2771 %1 = insertelement <16 x i8> , i8 -7, i8 0
2772 %2 = insertelement <16 x i8> , i8 1, i8 0
2773 %3 = icmp slt <16 x i8> %1, %2
2774 %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
2775 ret <16 x i8> %4
2776 }
2777
2778 define <2 x i64> @max_le_v2i64c() {
2779 ; SSE2-LABEL: max_le_v2i64c:
2780 ; SSE2: # BB#0:
2781 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [18446744073709551609,7]
2782 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551615,1]
2783 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0]
2784 ; SSE2-NEXT: movdqa %xmm0, %xmm3
2785 ; SSE2-NEXT: pxor %xmm2, %xmm3
2786 ; SSE2-NEXT: pxor %xmm1, %xmm0
2787 ; SSE2-NEXT: movdqa %xmm0, %xmm4
2788 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
2789 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
2790 ; SSE2-NEXT: pcmpeqd %xmm3, %xmm0
2791 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
2792 ; SSE2-NEXT: pand %xmm5, %xmm0
2793 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
2794 ; SSE2-NEXT: por %xmm0, %xmm3
2795 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
2796 ; SSE2-NEXT: pxor %xmm3, %xmm0
2797 ; SSE2-NEXT: pandn %xmm1, %xmm3
2798 ; SSE2-NEXT: pandn %xmm2, %xmm0
2799 ; SSE2-NEXT: por %xmm3, %xmm0
2800 ; SSE2-NEXT: retq
2801 ;
2802 ; SSE41-LABEL: max_le_v2i64c:
2803 ; SSE41: # BB#0:
2804 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551609,7]
2805 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [18446744073709551615,1]
2806 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0]
2807 ; SSE41-NEXT: movdqa %xmm0, %xmm3
2808 ; SSE41-NEXT: pxor %xmm1, %xmm3
2809 ; SSE41-NEXT: pxor %xmm2, %xmm0
2810 ; SSE41-NEXT: movdqa %xmm0, %xmm4
2811 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm4
2812 ; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
2813 ; SSE41-NEXT: pcmpeqd %xmm3, %xmm0
2814 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
2815 ; SSE41-NEXT: pand %xmm5, %xmm0
2816 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
2817 ; SSE41-NEXT: por %xmm0, %xmm3
2818 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
2819 ; SSE41-NEXT: pxor %xmm3, %xmm0
2820 ; SSE41-NEXT: blendvpd %xmm2, %xmm1
2821 ; SSE41-NEXT: movapd %xmm1, %xmm0
2822 ; SSE41-NEXT: retq
2823 ;
2824 ; SSE42-LABEL: max_le_v2i64c:
2825 ; SSE42: # BB#0:
2826 ; SSE42-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551609,7]
2827 ; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [18446744073709551615,1]
2828 ; SSE42-NEXT: movdqa %xmm2, %xmm3
2829 ; SSE42-NEXT: pcmpgtq %xmm1, %xmm3
2830 ; SSE42-NEXT: pcmpeqd %xmm0, %xmm0
2831 ; SSE42-NEXT: pxor %xmm3, %xmm0
2832 ; SSE42-NEXT: blendvpd %xmm2, %xmm1
2833 ; SSE42-NEXT: movapd %xmm1, %xmm0
2834 ; SSE42-NEXT: retq
2835 ;
2836 ; AVX-LABEL: max_le_v2i64c:
2837 ; AVX: # BB#0:
2838 ; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [18446744073709551609,7]
2839 ; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709551615,1]
2840 ; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
2841 ; AVX-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
2842 ; AVX-NEXT: vpxor %xmm3, %xmm2, %xmm2
2843 ; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
2844 ; AVX-NEXT: retq
2845 %1 = insertelement <2 x i64> , i64 -7, i32 0
2846 %2 = insertelement <2 x i64> , i64 -1, i32 0
2847 %3 = icmp sle <2 x i64> %1, %2
2848 %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
2849 ret <2 x i64> %4
2850 }
2851
2852 define <4 x i64> @max_le_v4i64c() {
2853 ; SSE2-LABEL: max_le_v4i64c:
2854 ; SSE2: # BB#0:
2855 ; SSE2-NEXT: movdqa {{.*#+}} xmm10 = [18446744073709551609,18446744073709551615]
2856 ; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [1,7]
2857 ; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [18446744073709551615,18446744073709551609]
2858 ; SSE2-NEXT: movdqa {{.*#+}} xmm9 = [7,1]
2859 ; SSE2-NEXT: movdqa {{.*#+}} xmm7 = [2147483648,0,2147483648,0]
2860 ; SSE2-NEXT: movdqa %xmm7, %xmm0
2861 ; SSE2-NEXT: pxor %xmm9, %xmm0
2862 ; SSE2-NEXT: movdqa %xmm7, %xmm1
2863 ; SSE2-NEXT: pxor %xmm8, %xmm1
2864 ; SSE2-NEXT: movdqa %xmm1, %xmm6
2865 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm6
2866 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm6[0,0,2,2]
2867 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
2868 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
2869 ; SSE2-NEXT: pand %xmm2, %xmm0
2870 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
2871 ; SSE2-NEXT: por %xmm0, %xmm6
2872 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
2873 ; SSE2-NEXT: movdqa %xmm6, %xmm1
2874 ; SSE2-NEXT: pxor %xmm0, %xmm1
2875 ; SSE2-NEXT: movdqa %xmm7, %xmm2
2876 ; SSE2-NEXT: pxor %xmm5, %xmm2
2877 ; SSE2-NEXT: pxor %xmm10, %xmm7
2878 ; SSE2-NEXT: movdqa %xmm7, %xmm3
2879 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm3
2880 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
2881 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm7
2882 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm7[1,1,3,3]
2883 ; SSE2-NEXT: pand %xmm4, %xmm2
2884 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
2885 ; SSE2-NEXT: por %xmm2, %xmm3
2886 ; SSE2-NEXT: pxor %xmm3, %xmm0
2887 ; SSE2-NEXT: pandn %xmm10, %xmm3
2888 ; SSE2-NEXT: pandn %xmm5, %xmm0
2889 ; SSE2-NEXT: por %xmm3, %xmm0
2890 ; SSE2-NEXT: pandn %xmm8, %xmm6
2891 ; SSE2-NEXT: pandn %xmm9, %xmm1
2892 ; SSE2-NEXT: por %xmm6, %xmm1
2893 ; SSE2-NEXT: retq
2894 ;
2895 ; SSE41-LABEL: max_le_v4i64c:
2896 ; SSE41: # BB#0:
2897 ; SSE41-NEXT: movdqa {{.*#+}} xmm9 = [18446744073709551609,18446744073709551615]
2898 ; SSE41-NEXT: movdqa {{.*#+}} xmm8 = [1,7]
2899 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551615,18446744073709551609]
2900 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [7,1]
2901 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0]
2902 ; SSE41-NEXT: movdqa %xmm0, %xmm3
2903 ; SSE41-NEXT: pxor %xmm1, %xmm3
2904 ; SSE41-NEXT: movdqa %xmm0, %xmm6
2905 ; SSE41-NEXT: pxor %xmm8, %xmm6
2906 ; SSE41-NEXT: movdqa %xmm6, %xmm7
2907 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm7
2908 ; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2]
2909 ; SSE41-NEXT: pcmpeqd %xmm3, %xmm6
2910 ; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
2911 ; SSE41-NEXT: pand %xmm4, %xmm6
2912 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm7[1,1,3,3]
2913 ; SSE41-NEXT: por %xmm6, %xmm3
2914 ; SSE41-NEXT: pcmpeqd %xmm4, %xmm4
2915 ; SSE41-NEXT: pxor %xmm4, %xmm3
2916 ; SSE41-NEXT: movdqa %xmm0, %xmm6
2917 ; SSE41-NEXT: pxor %xmm2, %xmm6
2918 ; SSE41-NEXT: pxor %xmm9, %xmm0
2919 ; SSE41-NEXT: movdqa %xmm0, %xmm7
2920 ; SSE41-NEXT: pcmpgtd %xmm6, %xmm7
2921 ; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm7[0,0,2,2]
2922 ; SSE41-NEXT: pcmpeqd %xmm6, %xmm0
2923 ; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
2924 ; SSE41-NEXT: pand %xmm5, %xmm6
2925 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[1,1,3,3]
2926 ; SSE41-NEXT: por %xmm6, %xmm0
2927 ; SSE41-NEXT: pxor %xmm4, %xmm0
2928 ; SSE41-NEXT: blendvpd %xmm9, %xmm2
2929 ; SSE41-NEXT: movdqa %xmm3, %xmm0
2930 ; SSE41-NEXT: blendvpd %xmm8, %xmm1
2931 ; SSE41-NEXT: movapd %xmm2, %xmm0
2932 ; SSE41-NEXT: retq
2933 ;
2934 ; SSE42-LABEL: max_le_v4i64c:
2935 ; SSE42: # BB#0:
2936 ; SSE42-NEXT: movdqa {{.*#+}} xmm4 = [18446744073709551609,18446744073709551615]
2937 ; SSE42-NEXT: movdqa {{.*#+}} xmm5 = [1,7]
2938 ; SSE42-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551615,18446744073709551609]
2939 ; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [7,1]
2940 ; SSE42-NEXT: movdqa %xmm5, %xmm3
2941 ; SSE42-NEXT: pcmpgtq %xmm1, %xmm3
2942 ; SSE42-NEXT: pcmpeqd %xmm6, %xmm6
2943 ; SSE42-NEXT: pxor %xmm6, %xmm3
2944 ; SSE42-NEXT: movdqa %xmm4, %xmm0
2945 ; SSE42-NEXT: pcmpgtq %xmm2, %xmm0
2946 ; SSE42-NEXT: pxor %xmm6, %xmm0
2947 ; SSE42-NEXT: blendvpd %xmm4, %xmm2
2948 ; SSE42-NEXT: movdqa %xmm3, %xmm0
2949 ; SSE42-NEXT: blendvpd %xmm5, %xmm1
2950 ; SSE42-NEXT: movapd %xmm2, %xmm0
2951 ; SSE42-NEXT: retq
2952 ;
2953 ; AVX1-LABEL: max_le_v4i64c:
2954 ; AVX1: # BB#0:
2955 ; AVX1-NEXT: vmovapd {{.*#+}} ymm0 = [18446744073709551615,18446744073709551609,7,1]
2956 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,7]
2957 ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm1, %xmm1
2958 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
2959 ; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1
2960 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [18446744073709551609,18446744073709551615]
2961 ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm3, %xmm3
2962 ; AVX1-NEXT: vpxor %xmm2, %xmm3, %xmm2
2963 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
2964 ; AVX1-NEXT: vblendvpd %ymm1, {{.*}}(%rip), %ymm0, %ymm0
2965 ; AVX1-NEXT: retq
2966 ;
2967 ; AVX2-LABEL: max_le_v4i64c:
2968 ; AVX2: # BB#0:
2969 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [18446744073709551609,18446744073709551615,1,7]
2970 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [18446744073709551615,18446744073709551609,7,1]
2971 ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
2972 ; AVX2-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
2973 ; AVX2-NEXT: vpxor %ymm3, %ymm2, %ymm2
2974 ; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
2975 ; AVX2-NEXT: retq
2976 ;
2977 ; AVX512-LABEL: max_le_v4i64c:
2978 ; AVX512: # BB#0:
2979 ; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [18446744073709551609,18446744073709551615,1,7]
2980 ; AVX512-NEXT: vmovdqa {{.*#+}} ymm1 = [18446744073709551615,18446744073709551609,7,1]
2981 ; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
2982 ; AVX512-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
2983 ; AVX512-NEXT: vpxor %ymm3, %ymm2, %ymm2
2984 ; AVX512-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
2985 ; AVX512-NEXT: retq
2986 %1 = insertelement <4 x i64> , i64 -7, i32 0
2987 %2 = insertelement <4 x i64> , i64 -1, i32 0
2988 %3 = icmp sle <4 x i64> %1, %2
2989 %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
2990 ret <4 x i64> %4
2991 }
2992
2993 define <4 x i32> @max_le_v4i32c() {
2994 ; SSE2-LABEL: max_le_v4i32c:
2995 ; SSE2: # BB#0:
2996 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [4294967289,4294967295,1,7]
2997 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,4294967289,7,1]
2998 ; SSE2-NEXT: movdqa %xmm1, %xmm3
2999 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm3
3000 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
3001 ; SSE2-NEXT: pxor %xmm3, %xmm0
3002 ; SSE2-NEXT: pandn %xmm1, %xmm3
3003 ; SSE2-NEXT: pandn %xmm2, %xmm0
3004 ; SSE2-NEXT: por %xmm3, %xmm0
3005 ; SSE2-NEXT: retq
3006 ;
3007 ; SSE41-LABEL: max_le_v4i32c:
3008 ; SSE41: # BB#0:
3009 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
3010 ; SSE41-NEXT: pminsd {{.*}}(%rip), %xmm0
3011 ; SSE41-NEXT: retq
3012 ;
3013 ; SSE42-LABEL: max_le_v4i32c:
3014 ; SSE42: # BB#0:
3015 ; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
3016 ; SSE42-NEXT: pminsd {{.*}}(%rip), %xmm0
3017 ; SSE42-NEXT: retq
3018 ;
3019 ; AVX-LABEL: max_le_v4i32c:
3020 ; AVX: # BB#0:
3021 ; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
3022 ; AVX-NEXT: vpminsd {{.*}}(%rip), %xmm0, %xmm0
3023 ; AVX-NEXT: retq
3024 %1 = insertelement <4 x i32> , i32 -7, i32 0
3025 %2 = insertelement <4 x i32> , i32 1, i32 0
3026 %3 = icmp sle <4 x i32> %1, %2
3027 %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
3028 ret <4 x i32> %4
3029 }
3030
3031 define <8 x i32> @max_le_v8i32c() {
3032 ; SSE2-LABEL: max_le_v8i32c:
3033 ; SSE2: # BB#0:
3034 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4294967289,4294967291,4294967293,4294967295]
3035 ; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [1,3,5,7]
3036 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [1,4294967293,4294967291,4294967289]
3037 ; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [7,5,3,1]
3038 ; SSE2-NEXT: movdqa %xmm3, %xmm6
3039 ; SSE2-NEXT: pcmpgtd %xmm5, %xmm6
3040 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
3041 ; SSE2-NEXT: movdqa %xmm6, %xmm1
3042 ; SSE2-NEXT: pxor %xmm0, %xmm1
3043 ; SSE2-NEXT: movdqa %xmm2, %xmm7
3044 ; SSE2-NEXT: pcmpgtd %xmm4, %xmm7
3045 ; SSE2-NEXT: pxor %xmm7, %xmm0
3046 ; SSE2-NEXT: pandn %xmm2, %xmm7
3047 ; SSE2-NEXT: pandn %xmm4, %xmm0
3048 ; SSE2-NEXT: por %xmm7, %xmm0
3049 ; SSE2-NEXT: pandn %xmm3, %xmm6
3050 ; SSE2-NEXT: pandn %xmm5, %xmm1
3051 ; SSE2-NEXT: por %xmm6, %xmm1
3052 ; SSE2-NEXT: retq
3053 ;
3054 ; SSE41-LABEL: max_le_v8i32c:
3055 ; SSE41: # BB#0:
3056 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7]
3057 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
3058 ; SSE41-NEXT: pminsd {{.*}}(%rip), %xmm0
3059 ; SSE41-NEXT: pminsd {{.*}}(%rip), %xmm1
3060 ; SSE41-NEXT: retq
3061 ;
3062 ; SSE42-LABEL: max_le_v8i32c:
3063 ; SSE42: # BB#0:
3064 ; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7]
3065 ; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
3066 ; SSE42-NEXT: pminsd {{.*}}(%rip), %xmm0
3067 ; SSE42-NEXT: pminsd {{.*}}(%rip), %xmm1
3068 ; SSE42-NEXT: retq
3069 ;
3070 ; AVX1-LABEL: max_le_v8i32c:
3071 ; AVX1: # BB#0:
3072 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
3073 ; AVX1-NEXT: vpminsd {{.*}}(%rip), %xmm0, %xmm0
3074 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,3,5,7]
3075 ; AVX1-NEXT: vpminsd {{.*}}(%rip), %xmm1, %xmm1
3076 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3077 ; AVX1-NEXT: retq
3078 ;
3079 ; AVX2-LABEL: max_le_v8i32c:
3080 ; AVX2: # BB#0:
3081 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7]
3082 ; AVX2-NEXT: vpminsd {{.*}}(%rip), %ymm0, %ymm0
3083 ; AVX2-NEXT: retq
3084 ;
3085 ; AVX512-LABEL: max_le_v8i32c:
3086 ; AVX512: # BB#0:
3087 ; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7]
3088 ; AVX512-NEXT: vpminsd {{.*}}(%rip), %ymm0, %ymm0
3089 ; AVX512-NEXT: retq
3090 %1 = insertelement <8 x i32> , i32 -7, i32 0
3091 %2 = insertelement <8 x i32> , i32 1, i32 0
3092 %3 = icmp sle <8 x i32> %1, %2
3093 %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2
3094 ret <8 x i32> %4
3095 }
3096
3097 define <8 x i16> @max_le_v8i16c() {
3098 ; SSE-LABEL: max_le_v8i16c:
3099 ; SSE: # BB#0:
3100 ; SSE-NEXT: movdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7]
3101 ; SSE-NEXT: pminsw {{.*}}(%rip), %xmm0
3102 ; SSE-NEXT: retq
3103 ;
3104 ; AVX-LABEL: max_le_v8i16c:
3105 ; AVX: # BB#0:
3106 ; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7]
3107 ; AVX-NEXT: vpminsw {{.*}}(%rip), %xmm0, %xmm0
3108 ; AVX-NEXT: retq
3109 %1 = insertelement <8 x i16> , i16 -7, i16 0
3110 %2 = insertelement <8 x i16> , i16 1, i16 0
3111 %3 = icmp sle <8 x i16> %1, %2
3112 %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
3113 ret <8 x i16> %4
3114 }
3115
3116 define <16 x i16> @max_le_v16i16c() {
3117 ; SSE-LABEL: max_le_v16i16c:
3118 ; SSE: # BB#0:
3119 ; SSE-NEXT: movdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
3120 ; SSE-NEXT: movdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0]
3121 ; SSE-NEXT: pminsw {{.*}}(%rip), %xmm0
3122 ; SSE-NEXT: pminsw {{.*}}(%rip), %xmm1
3123 ; SSE-NEXT: retq
3124 ;
3125 ; AVX1-LABEL: max_le_v16i16c:
3126 ; AVX1: # BB#0:
3127 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0]
3128 ; AVX1-NEXT: vpminsw {{.*}}(%rip), %xmm0, %xmm0
3129 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
3130 ; AVX1-NEXT: vpminsw {{.*}}(%rip), %xmm1, %xmm1
3131 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3132 ; AVX1-NEXT: retq
3133 ;
3134 ; AVX2-LABEL: max_le_v16i16c:
3135 ; AVX2: # BB#0:
3136 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8]
3137 ; AVX2-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
3138 ; AVX2-NEXT: retq
3139 ;
3140 ; AVX512-LABEL: max_le_v16i16c:
3141 ; AVX512: # BB#0:
3142 ; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8]
3143 ; AVX512-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
3144 ; AVX512-NEXT: retq
3145 %1 = insertelement <16 x i16> , i16 -7, i16 0
3146 %2 = insertelement <16 x i16> , i16 1, i16 0
3147 %3 = icmp sle <16 x i16> %1, %2
3148 %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2
3149 ret <16 x i16> %4
3150 }
3151
3152 define <16 x i8> @max_le_v16i8c() {
3153 ; SSE2-LABEL: max_le_v16i8c:
3154 ; SSE2: # BB#0:
3155 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8]
3156 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,254,253,252,251,250,249,0,7,6,5,4,3,2,1,0]
3157 ; SSE2-NEXT: movdqa %xmm1, %xmm3
3158 ; SSE2-NEXT: pcmpgtb %xmm2, %xmm3
3159 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
3160 ; SSE2-NEXT: pxor %xmm3, %xmm0
3161 ; SSE2-NEXT: pandn %xmm1, %xmm3
3162 ; SSE2-NEXT: pandn %xmm2, %xmm0
3163 ; SSE2-NEXT: por %xmm3, %xmm0
3164 ; SSE2-NEXT: retq
3165 ;
3166 ; SSE41-LABEL: max_le_v16i8c:
3167 ; SSE41: # BB#0:
3168 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8]
3169 ; SSE41-NEXT: pminsb {{.*}}(%rip), %xmm0
3170 ; SSE41-NEXT: retq
3171 ;
3172 ; SSE42-LABEL: max_le_v16i8c:
3173 ; SSE42: # BB#0:
3174 ; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8]
3175 ; SSE42-NEXT: pminsb {{.*}}(%rip), %xmm0
3176 ; SSE42-NEXT: retq
3177 ;
3178 ; AVX-LABEL: max_le_v16i8c:
3179 ; AVX: # BB#0:
3180 ; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8]
3181 ; AVX-NEXT: vpminsb {{.*}}(%rip), %xmm0, %xmm0
3182 ; AVX-NEXT: retq
3183 %1 = insertelement <16 x i8> , i8 -7, i8 0
3184 %2 = insertelement <16 x i8> , i8 1, i8 0
3185 %3 = icmp sle <16 x i8> %1, %2
3186 %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
3187 ret <16 x i8> %4
3188 }
17331733 %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b
17341734 ret <32 x i8> %2
17351735 }
1736
1737 ;
1738 ; Constant Folding
1739 ;
1740
1741 define <2 x i64> @max_gt_v2i64c() {
1742 ; SSE2-LABEL: max_gt_v2i64c:
1743 ; SSE2: # BB#0:
1744 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [18446744073709551609,7]
1745 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551615,1]
1746 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
1747 ; SSE2-NEXT: movdqa %xmm0, %xmm3
1748 ; SSE2-NEXT: pxor %xmm2, %xmm3
1749 ; SSE2-NEXT: pxor %xmm1, %xmm0
1750 ; SSE2-NEXT: movdqa %xmm0, %xmm4
1751 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
1752 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
1753 ; SSE2-NEXT: pcmpeqd %xmm3, %xmm0
1754 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
1755 ; SSE2-NEXT: pand %xmm5, %xmm3
1756 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
1757 ; SSE2-NEXT: por %xmm3, %xmm0
1758 ; SSE2-NEXT: movdqa %xmm0, %xmm3
1759 ; SSE2-NEXT: pandn %xmm2, %xmm3
1760 ; SSE2-NEXT: pand %xmm1, %xmm0
1761 ; SSE2-NEXT: por %xmm3, %xmm0
1762 ; SSE2-NEXT: retq
1763 ;
1764 ; SSE41-LABEL: max_gt_v2i64c:
1765 ; SSE41: # BB#0:
1766 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551609,7]
1767 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [18446744073709551615,1]
1768 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
1769 ; SSE41-NEXT: movdqa %xmm0, %xmm3
1770 ; SSE41-NEXT: pxor %xmm1, %xmm3
1771 ; SSE41-NEXT: pxor %xmm2, %xmm0
1772 ; SSE41-NEXT: movdqa %xmm0, %xmm4
1773 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm4
1774 ; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
1775 ; SSE41-NEXT: pcmpeqd %xmm3, %xmm0
1776 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
1777 ; SSE41-NEXT: pand %xmm5, %xmm3
1778 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
1779 ; SSE41-NEXT: por %xmm3, %xmm0
1780 ; SSE41-NEXT: blendvpd %xmm2, %xmm1
1781 ; SSE41-NEXT: movapd %xmm1, %xmm0
1782 ; SSE41-NEXT: retq
1783 ;
1784 ; SSE42-LABEL: max_gt_v2i64c:
1785 ; SSE42: # BB#0:
1786 ; SSE42-NEXT: movapd {{.*#+}} xmm1 = [18446744073709551615,1]
1787 ; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775801,9223372036854775815]
1788 ; SSE42-NEXT: pcmpgtq {{.*}}(%rip), %xmm0
1789 ; SSE42-NEXT: blendvpd {{.*}}(%rip), %xmm1
1790 ; SSE42-NEXT: movapd %xmm1, %xmm0
1791 ; SSE42-NEXT: retq
1792 ;
1793 ; AVX-LABEL: max_gt_v2i64c:
1794 ; AVX: # BB#0:
1795 ; AVX-NEXT: vmovapd {{.*#+}} xmm0 = [18446744073709551615,1]
1796 ; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775801,9223372036854775815]
1797 ; AVX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm1, %xmm1
1798 ; AVX-NEXT: vblendvpd %xmm1, {{.*}}(%rip), %xmm0, %xmm0
1799 ; AVX-NEXT: retq
1800 %1 = insertelement <2 x i64> , i64 -7, i32 0
1801 %2 = insertelement <2 x i64> , i64 -1, i32 0
1802 %3 = icmp ugt <2 x i64> %1, %2
1803 %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
1804 ret <2 x i64> %4
1805 }
1806
1807 define <4 x i64> @max_gt_v4i64c() {
1808 ; SSE2-LABEL: max_gt_v4i64c:
1809 ; SSE2: # BB#0:
1810 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [18446744073709551609,18446744073709551615]
1811 ; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [1,7]
1812 ; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [18446744073709551615,18446744073709551609]
1813 ; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [7,1]
1814 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
1815 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1816 ; SSE2-NEXT: pxor %xmm3, %xmm1
1817 ; SSE2-NEXT: movdqa %xmm0, %xmm6
1818 ; SSE2-NEXT: pxor %xmm8, %xmm6
1819 ; SSE2-NEXT: movdqa %xmm6, %xmm7
1820 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm7
1821 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm7[0,0,2,2]
1822 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm6
1823 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
1824 ; SSE2-NEXT: pand %xmm2, %xmm6
1825 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm7[1,1,3,3]
1826 ; SSE2-NEXT: por %xmm6, %xmm1
1827 ; SSE2-NEXT: movdqa %xmm0, %xmm2
1828 ; SSE2-NEXT: pxor %xmm5, %xmm2
1829 ; SSE2-NEXT: pxor %xmm4, %xmm0
1830 ; SSE2-NEXT: movdqa %xmm0, %xmm6
1831 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm6
1832 ; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
1833 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
1834 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
1835 ; SSE2-NEXT: pand %xmm7, %xmm2
1836 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
1837 ; SSE2-NEXT: por %xmm2, %xmm0
1838 ; SSE2-NEXT: movdqa %xmm0, %xmm2
1839 ; SSE2-NEXT: pandn %xmm5, %xmm2
1840 ; SSE2-NEXT: pand %xmm4, %xmm0
1841 ; SSE2-NEXT: por %xmm2, %xmm0
1842 ; SSE2-NEXT: movdqa %xmm1, %xmm2
1843 ; SSE2-NEXT: pandn %xmm3, %xmm2
1844 ; SSE2-NEXT: pand %xmm8, %xmm1
1845 ; SSE2-NEXT: por %xmm2, %xmm1
1846 ; SSE2-NEXT: retq
1847 ;
1848 ; SSE41-LABEL: max_gt_v4i64c:
1849 ; SSE41: # BB#0:
1850 ; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [18446744073709551609,18446744073709551615]
1851 ; SSE41-NEXT: movdqa {{.*#+}} xmm8 = [1,7]
1852 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551615,18446744073709551609]
1853 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [7,1]
1854 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
1855 ; SSE41-NEXT: movdqa %xmm0, %xmm3
1856 ; SSE41-NEXT: pxor %xmm1, %xmm3
1857 ; SSE41-NEXT: movdqa %xmm0, %xmm6
1858 ; SSE41-NEXT: pxor %xmm8, %xmm6
1859 ; SSE41-NEXT: movdqa %xmm6, %xmm7
1860 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm7
1861 ; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2]
1862 ; SSE41-NEXT: pcmpeqd %xmm3, %xmm6
1863 ; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
1864 ; SSE41-NEXT: pand %xmm4, %xmm6
1865 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm7[1,1,3,3]
1866 ; SSE41-NEXT: por %xmm6, %xmm3
1867 ; SSE41-NEXT: movdqa %xmm0, %xmm4
1868 ; SSE41-NEXT: pxor %xmm2, %xmm4
1869 ; SSE41-NEXT: pxor %xmm5, %xmm0
1870 ; SSE41-NEXT: movdqa %xmm0, %xmm6
1871 ; SSE41-NEXT: pcmpgtd %xmm4, %xmm6
1872 ; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
1873 ; SSE41-NEXT: pcmpeqd %xmm4, %xmm0
1874 ; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
1875 ; SSE41-NEXT: pand %xmm7, %xmm4
1876 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
1877 ; SSE41-NEXT: por %xmm4, %xmm0
1878 ; SSE41-NEXT: blendvpd %xmm5, %xmm2
1879 ; SSE41-NEXT: movdqa %xmm3, %xmm0
1880 ; SSE41-NEXT: blendvpd %xmm8, %xmm1
1881 ; SSE41-NEXT: movapd %xmm2, %xmm0
1882 ; SSE41-NEXT: retq
1883 ;
1884 ; SSE42-LABEL: max_gt_v4i64c:
1885 ; SSE42: # BB#0:
1886 ; SSE42-NEXT: movapd {{.*#+}} xmm1 = [7,1]
1887 ; SSE42-NEXT: movapd {{.*#+}} xmm2 = [18446744073709551615,18446744073709551609]
1888 ; SSE42-NEXT: movdqa {{.*#+}} xmm3 = [9223372036854775809,9223372036854775815]
1889 ; SSE42-NEXT: pcmpgtq {{.*}}(%rip), %xmm3
1890 ; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775801,9223372036854775807]
1891 ; SSE42-NEXT: pcmpgtq {{.*}}(%rip), %xmm0
1892 ; SSE42-NEXT: blendvpd {{.*}}(%rip), %xmm2
1893 ; SSE42-NEXT: movdqa %xmm3, %xmm0
1894 ; SSE42-NEXT: blendvpd {{.*}}(%rip), %xmm1
1895 ; SSE42-NEXT: movapd %xmm2, %xmm0
1896 ; SSE42-NEXT: retq
1897 ;
1898 ; AVX1-LABEL: max_gt_v4i64c:
1899 ; AVX1: # BB#0:
1900 ; AVX1-NEXT: vmovapd {{.*#+}} ymm0 = [18446744073709551615,18446744073709551609,7,1]
1901 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775801,9223372036854775807]
1902 ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm1, %xmm1
1903 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775809,9223372036854775815]
1904 ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm2, %xmm2
1905 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
1906 ; AVX1-NEXT: vblendvpd %ymm1, {{.*}}(%rip), %ymm0, %ymm0
1907 ; AVX1-NEXT: retq
1908 ;
1909 ; AVX2-LABEL: max_gt_v4i64c:
1910 ; AVX2: # BB#0:
1911 ; AVX2-NEXT: vmovapd {{.*#+}} ymm0 = [18446744073709551615,18446744073709551609,7,1]
1912 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [9223372036854775801,9223372036854775807,9223372036854775809,9223372036854775815]
1913 ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %ymm1, %ymm1
1914 ; AVX2-NEXT: vblendvpd %ymm1, {{.*}}(%rip), %ymm0, %ymm0
1915 ; AVX2-NEXT: retq
1916 ;
1917 ; AVX512-LABEL: max_gt_v4i64c:
1918 ; AVX512: # BB#0:
1919 ; AVX512-NEXT: vmovapd {{.*#+}} ymm0 = [18446744073709551615,18446744073709551609,7,1]
1920 ; AVX512-NEXT: vmovdqa {{.*#+}} ymm1 = [9223372036854775801,9223372036854775807,9223372036854775809,9223372036854775815]
1921 ; AVX512-NEXT: vpcmpgtq {{.*}}(%rip), %ymm1, %ymm1
1922 ; AVX512-NEXT: vblendvpd %ymm1, {{.*}}(%rip), %ymm0, %ymm0
1923 ; AVX512-NEXT: retq
1924 %1 = insertelement <4 x i64> , i64 -7, i32 0
1925 %2 = insertelement <4 x i64> , i64 -1, i32 0
1926 %3 = icmp ugt <4 x i64> %1, %2
1927 %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
1928 ret <4 x i64> %4
1929 }
1930
1931 define <4 x i32> @max_gt_v4i32c() {
1932 ; SSE2-LABEL: max_gt_v4i32c:
1933 ; SSE2: # BB#0:
1934 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483641,2147483647,2147483649,2147483655]
1935 ; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm0
1936 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1937 ; SSE2-NEXT: pandn {{.*}}(%rip), %xmm1
1938 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
1939 ; SSE2-NEXT: por %xmm1, %xmm0
1940 ; SSE2-NEXT: retq
1941 ;
1942 ; SSE41-LABEL: max_gt_v4i32c:
1943 ; SSE41: # BB#0:
1944 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
1945 ; SSE41-NEXT: pmaxud {{.*}}(%rip), %xmm0
1946 ; SSE41-NEXT: retq
1947 ;
1948 ; SSE42-LABEL: max_gt_v4i32c:
1949 ; SSE42: # BB#0:
1950 ; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
1951 ; SSE42-NEXT: pmaxud {{.*}}(%rip), %xmm0
1952 ; SSE42-NEXT: retq
1953 ;
1954 ; AVX-LABEL: max_gt_v4i32c:
1955 ; AVX: # BB#0:
1956 ; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
1957 ; AVX-NEXT: vpmaxud {{.*}}(%rip), %xmm0, %xmm0
1958 ; AVX-NEXT: retq
1959 %1 = insertelement <4 x i32> , i32 -7, i32 0
1960 %2 = insertelement <4 x i32> , i32 1, i32 0
1961 %3 = icmp ugt <4 x i32> %1, %2
1962 %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
1963 ret <4 x i32> %4
1964 }
1965
1966 define <8 x i32> @max_gt_v8i32c() {
1967 ; SSE2-LABEL: max_gt_v8i32c:
1968 ; SSE2: # BB#0:
1969 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483649,2147483651,2147483653,2147483655]
1970 ; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm1
1971 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483641,2147483643,2147483645,2147483647]
1972 ; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm0
1973 ; SSE2-NEXT: movdqa %xmm0, %xmm2
1974 ; SSE2-NEXT: pandn {{.*}}(%rip), %xmm2
1975 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
1976 ; SSE2-NEXT: por %xmm2, %xmm0
1977 ; SSE2-NEXT: movdqa %xmm1, %xmm2
1978 ; SSE2-NEXT: pandn {{.*}}(%rip), %xmm2
1979 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
1980 ; SSE2-NEXT: por %xmm2, %xmm1
1981 ; SSE2-NEXT: retq
1982 ;
1983 ; SSE41-LABEL: max_gt_v8i32c:
1984 ; SSE41: # BB#0:
1985 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7]
1986 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
1987 ; SSE41-NEXT: pmaxud {{.*}}(%rip), %xmm0
1988 ; SSE41-NEXT: pmaxud {{.*}}(%rip), %xmm1
1989 ; SSE41-NEXT: retq
1990 ;
1991 ; SSE42-LABEL: max_gt_v8i32c:
1992 ; SSE42: # BB#0:
1993 ; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7]
1994 ; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
1995 ; SSE42-NEXT: pmaxud {{.*}}(%rip), %xmm0
1996 ; SSE42-NEXT: pmaxud {{.*}}(%rip), %xmm1
1997 ; SSE42-NEXT: retq
1998 ;
1999 ; AVX1-LABEL: max_gt_v8i32c:
2000 ; AVX1: # BB#0:
2001 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
2002 ; AVX1-NEXT: vpmaxud {{.*}}(%rip), %xmm0, %xmm0
2003 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,3,5,7]
2004 ; AVX1-NEXT: vpmaxud {{.*}}(%rip), %xmm1, %xmm1
2005 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2006 ; AVX1-NEXT: retq
2007 ;
2008 ; AVX2-LABEL: max_gt_v8i32c:
2009 ; AVX2: # BB#0:
2010 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7]
2011 ; AVX2-NEXT: vpmaxud {{.*}}(%rip), %ymm0, %ymm0
2012 ; AVX2-NEXT: retq
2013 ;
2014 ; AVX512-LABEL: max_gt_v8i32c:
2015 ; AVX512: # BB#0:
2016 ; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7]
2017 ; AVX512-NEXT: vpmaxud {{.*}}(%rip), %ymm0, %ymm0
2018 ; AVX512-NEXT: retq
2019 %1 = insertelement <8 x i32> , i32 -7, i32 0
2020 %2 = insertelement <8 x i32> , i32 1, i32 0
2021 %3 = icmp ugt <8 x i32> %1, %2
2022 %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2
2023 ret <8 x i32> %4
2024 }
2025
2026 define <8 x i16> @max_gt_v8i16c() {
2027 ; SSE2-LABEL: max_gt_v8i16c:
2028 ; SSE2: # BB#0:
2029 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [32761,32763,32765,32767,32769,32771,32773,32775]
2030 ; SSE2-NEXT: pcmpgtw {{.*}}(%rip), %xmm0
2031 ; SSE2-NEXT: movdqa %xmm0, %xmm1
2032 ; SSE2-NEXT: pandn {{.*}}(%rip), %xmm1
2033 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
2034 ; SSE2-NEXT: por %xmm1, %xmm0
2035 ; SSE2-NEXT: retq
2036 ;
2037 ; SSE41-LABEL: max_gt_v8i16c:
2038 ; SSE41: # BB#0:
2039 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7]
2040 ; SSE41-NEXT: pmaxuw {{.*}}(%rip), %xmm0
2041 ; SSE41-NEXT: retq
2042 ;
2043 ; SSE42-LABEL: max_gt_v8i16c:
2044 ; SSE42: # BB#0:
2045 ; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7]
2046 ; SSE42-NEXT: pmaxuw {{.*}}(%rip), %xmm0
2047 ; SSE42-NEXT: retq
2048 ;
2049 ; AVX-LABEL: max_gt_v8i16c:
2050 ; AVX: # BB#0:
2051 ; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7]
2052 ; AVX-NEXT: vpmaxuw {{.*}}(%rip), %xmm0, %xmm0
2053 ; AVX-NEXT: retq
2054 %1 = insertelement <8 x i16> , i16 -7, i16 0
2055 %2 = insertelement <8 x i16> , i16 1, i16 0
2056 %3 = icmp ugt <8 x i16> %1, %2
2057 %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
2058 ret <8 x i16> %4
2059 }
2060
2061 define <16 x i16> @max_gt_v16i16c() {
2062 ; SSE2-LABEL: max_gt_v16i16c:
2063 ; SSE2: # BB#0:
2064 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [32769,32770,32771,32772,32773,32774,32775,32776]
2065 ; SSE2-NEXT: pcmpgtw {{.*}}(%rip), %xmm1
2066 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [32761,32762,32763,32764,32765,32766,32767,32768]
2067 ; SSE2-NEXT: pcmpgtw {{.*}}(%rip), %xmm0
2068 ; SSE2-NEXT: movdqa %xmm0, %xmm2
2069 ; SSE2-NEXT: pandn {{.*}}(%rip), %xmm2
2070 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
2071 ; SSE2-NEXT: por %xmm2, %xmm0
2072 ; SSE2-NEXT: movdqa %xmm1, %xmm2
2073 ; SSE2-NEXT: pandn {{.*}}(%rip), %xmm2
2074 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
2075 ; SSE2-NEXT: por %xmm2, %xmm1
2076 ; SSE2-NEXT: retq
2077 ;
2078 ; SSE41-LABEL: max_gt_v16i16c:
2079 ; SSE41: # BB#0:
2080 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
2081 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0]
2082 ; SSE41-NEXT: pmaxuw {{.*}}(%rip), %xmm0
2083 ; SSE41-NEXT: pmaxuw {{.*}}(%rip), %xmm1
2084 ; SSE41-NEXT: retq
2085 ;
2086 ; SSE42-LABEL: max_gt_v16i16c:
2087 ; SSE42: # BB#0:
2088 ; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
2089 ; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0]
2090 ; SSE42-NEXT: pmaxuw {{.*}}(%rip), %xmm0
2091 ; SSE42-NEXT: pmaxuw {{.*}}(%rip), %xmm1
2092 ; SSE42-NEXT: retq
2093 ;
2094 ; AVX1-LABEL: max_gt_v16i16c:
2095 ; AVX1: # BB#0:
2096 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0]
2097 ; AVX1-NEXT: vpmaxuw {{.*}}(%rip), %xmm0, %xmm0
2098 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
2099 ; AVX1-NEXT: vpmaxuw {{.*}}(%rip), %xmm1, %xmm1
2100 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2101 ; AVX1-NEXT: retq
2102 ;
2103 ; AVX2-LABEL: max_gt_v16i16c:
2104 ; AVX2: # BB#0:
2105 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8]
2106 ; AVX2-NEXT: vpmaxuw {{.*}}(%rip), %ymm0, %ymm0
2107 ; AVX2-NEXT: retq
2108 ;
2109 ; AVX512-LABEL: max_gt_v16i16c:
2110 ; AVX512: # BB#0:
2111 ; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8]
2112 ; AVX512-NEXT: vpmaxuw {{.*}}(%rip), %ymm0, %ymm0
2113 ; AVX512-NEXT: retq
2114 %1 = insertelement <16 x i16> , i16 -7, i16 0
2115 %2 = insertelement <16 x i16> , i16 1, i16 0
2116 %3 = icmp ugt <16 x i16> %1, %2
2117 %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2
2118 ret <16 x i16> %4
2119 }
2120
2121 define <16 x i8> @max_gt_v16i8c() {
2122 ; SSE-LABEL: max_gt_v16i8c:
2123 ; SSE: # BB#0:
2124 ; SSE-NEXT: movdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8]
2125 ; SSE-NEXT: pmaxub {{.*}}(%rip), %xmm0
2126 ; SSE-NEXT: retq
2127 ;
2128 ; AVX-LABEL: max_gt_v16i8c:
2129 ; AVX: # BB#0:
2130 ; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8]
2131 ; AVX-NEXT: vpmaxub {{.*}}(%rip), %xmm0, %xmm0
2132 ; AVX-NEXT: retq
2133 %1 = insertelement <16 x i8> , i8 -7, i8 0
2134 %2 = insertelement <16 x i8> , i8 1, i8 0
2135 %3 = icmp ugt <16 x i8> %1, %2
2136 %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
2137 ret <16 x i8> %4
2138 }
2139
2140 define <2 x i64> @max_ge_v2i64c() {
2141 ; SSE2-LABEL: max_ge_v2i64c:
2142 ; SSE2: # BB#0:
2143 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [18446744073709551609,7]
2144 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551615,1]
2145 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
2146 ; SSE2-NEXT: movdqa %xmm0, %xmm3
2147 ; SSE2-NEXT: pxor %xmm1, %xmm3
2148 ; SSE2-NEXT: pxor %xmm2, %xmm0
2149 ; SSE2-NEXT: movdqa %xmm0, %xmm4
2150 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
2151 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
2152 ; SSE2-NEXT: pcmpeqd %xmm3, %xmm0
2153 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
2154 ; SSE2-NEXT: pand %xmm5, %xmm0
2155 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
2156 ; SSE2-NEXT: por %xmm0, %xmm3
2157 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
2158 ; SSE2-NEXT: pxor %xmm3, %xmm0
2159 ; SSE2-NEXT: pandn %xmm1, %xmm3
2160 ; SSE2-NEXT: pandn %xmm2, %xmm0
2161 ; SSE2-NEXT: por %xmm3, %xmm0
2162 ; SSE2-NEXT: retq
2163 ;
2164 ; SSE41-LABEL: max_ge_v2i64c:
2165 ; SSE41: # BB#0:
2166 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551609,7]
2167 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [18446744073709551615,1]
2168 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
2169 ; SSE41-NEXT: movdqa %xmm0, %xmm3
2170 ; SSE41-NEXT: pxor %xmm2, %xmm3
2171 ; SSE41-NEXT: pxor %xmm1, %xmm0
2172 ; SSE41-NEXT: movdqa %xmm0, %xmm4
2173 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm4
2174 ; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
2175 ; SSE41-NEXT: pcmpeqd %xmm3, %xmm0
2176 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
2177 ; SSE41-NEXT: pand %xmm5, %xmm0
2178 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
2179 ; SSE41-NEXT: por %xmm0, %xmm3
2180 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
2181 ; SSE41-NEXT: pxor %xmm3, %xmm0
2182 ; SSE41-NEXT: blendvpd %xmm2, %xmm1
2183 ; SSE41-NEXT: movapd %xmm1, %xmm0
2184 ; SSE41-NEXT: retq
2185 ;
2186 ; SSE42-LABEL: max_ge_v2i64c:
2187 ; SSE42: # BB#0:
2188 ; SSE42-NEXT: movapd {{.*#+}} xmm1 = [18446744073709551615,1]
2189 ; SSE42-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775807,9223372036854775809]
2190 ; SSE42-NEXT: pcmpgtq {{.*}}(%rip), %xmm2
2191 ; SSE42-NEXT: pcmpeqd %xmm0, %xmm0
2192 ; SSE42-NEXT: pxor %xmm2, %xmm0
2193 ; SSE42-NEXT: blendvpd {{.*}}(%rip), %xmm1
2194 ; SSE42-NEXT: movapd %xmm1, %xmm0
2195 ; SSE42-NEXT: retq
2196 ;
2197 ; AVX-LABEL: max_ge_v2i64c:
2198 ; AVX: # BB#0:
2199 ; AVX-NEXT: vmovapd {{.*#+}} xmm0 = [18446744073709551615,1]
2200 ; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775807,9223372036854775809]
2201 ; AVX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm1, %xmm1
2202 ; AVX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
2203 ; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1
2204 ; AVX-NEXT: vblendvpd %xmm1, {{.*}}(%rip), %xmm0, %xmm0
2205 ; AVX-NEXT: retq
2206 %1 = insertelement <2 x i64> , i64 -7, i32 0
2207 %2 = insertelement <2 x i64> , i64 -1, i32 0
2208 %3 = icmp uge <2 x i64> %1, %2
2209 %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
2210 ret <2 x i64> %4
2211 }
2212
2213 define <4 x i64> @max_ge_v4i64c() {
2214 ; SSE2-LABEL: max_ge_v4i64c:
2215 ; SSE2: # BB#0:
2216 ; SSE2-NEXT: movdqa {{.*#+}} xmm10 = [18446744073709551609,18446744073709551615]
2217 ; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [1,7]
2218 ; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [18446744073709551615,18446744073709551609]
2219 ; SSE2-NEXT: movdqa {{.*#+}} xmm9 = [7,1]
2220 ; SSE2-NEXT: movdqa {{.*#+}} xmm7 = [2147483648,2147483648,2147483648,2147483648]
2221 ; SSE2-NEXT: movdqa %xmm7, %xmm0
2222 ; SSE2-NEXT: pxor %xmm8, %xmm0
2223 ; SSE2-NEXT: movdqa %xmm7, %xmm1
2224 ; SSE2-NEXT: pxor %xmm9, %xmm1
2225 ; SSE2-NEXT: movdqa %xmm1, %xmm6
2226 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm6
2227 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm6[0,0,2,2]
2228 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
2229 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
2230 ; SSE2-NEXT: pand %xmm2, %xmm0
2231 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
2232 ; SSE2-NEXT: por %xmm0, %xmm6
2233 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
2234 ; SSE2-NEXT: movdqa %xmm6, %xmm1
2235 ; SSE2-NEXT: pxor %xmm0, %xmm1
2236 ; SSE2-NEXT: movdqa %xmm7, %xmm2
2237 ; SSE2-NEXT: pxor %xmm10, %xmm2
2238 ; SSE2-NEXT: pxor %xmm5, %xmm7
2239 ; SSE2-NEXT: movdqa %xmm7, %xmm3
2240 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm3
2241 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
2242 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm7
2243 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm7[1,1,3,3]
2244 ; SSE2-NEXT: pand %xmm4, %xmm2
2245 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
2246 ; SSE2-NEXT: por %xmm2, %xmm3
2247 ; SSE2-NEXT: pxor %xmm3, %xmm0
2248 ; SSE2-NEXT: pandn %xmm10, %xmm3
2249 ; SSE2-NEXT: pandn %xmm5, %xmm0
2250 ; SSE2-NEXT: por %xmm3, %xmm0
2251 ; SSE2-NEXT: pandn %xmm8, %xmm6
2252 ; SSE2-NEXT: pandn %xmm9, %xmm1
2253 ; SSE2-NEXT: por %xmm6, %xmm1
2254 ; SSE2-NEXT: retq
2255 ;
2256 ; SSE41-LABEL: max_ge_v4i64c:
2257 ; SSE41: # BB#0:
2258 ; SSE41-NEXT: movdqa {{.*#+}} xmm9 = [18446744073709551609,18446744073709551615]
2259 ; SSE41-NEXT: movdqa {{.*#+}} xmm8 = [1,7]
2260 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551615,18446744073709551609]
2261 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [7,1]
2262 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
2263 ; SSE41-NEXT: movdqa %xmm0, %xmm3
2264 ; SSE41-NEXT: pxor %xmm8, %xmm3
2265 ; SSE41-NEXT: movdqa %xmm0, %xmm6
2266 ; SSE41-NEXT: pxor %xmm1, %xmm6
2267 ; SSE41-NEXT: movdqa %xmm6, %xmm7
2268 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm7
2269 ; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2]
2270 ; SSE41-NEXT: pcmpeqd %xmm3, %xmm6
2271 ; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
2272 ; SSE41-NEXT: pand %xmm4, %xmm6
2273 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm7[1,1,3,3]
2274 ; SSE41-NEXT: por %xmm6, %xmm3
2275 ; SSE41-NEXT: pcmpeqd %xmm4, %xmm4
2276 ; SSE41-NEXT: pxor %xmm4, %xmm3
2277 ; SSE41-NEXT: movdqa %xmm0, %xmm6
2278 ; SSE41-NEXT: pxor %xmm9, %xmm6
2279 ; SSE41-NEXT: pxor %xmm2, %xmm0
2280 ; SSE41-NEXT: movdqa %xmm0, %xmm7
2281 ; SSE41-NEXT: pcmpgtd %xmm6, %xmm7
2282 ; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm7[0,0,2,2]
2283 ; SSE41-NEXT: pcmpeqd %xmm6, %xmm0
2284 ; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
2285 ; SSE41-NEXT: pand %xmm5, %xmm6
2286 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[1,1,3,3]
2287 ; SSE41-NEXT: por %xmm6, %xmm0
2288 ; SSE41-NEXT: pxor %xmm4, %xmm0
2289 ; SSE41-NEXT: blendvpd %xmm9, %xmm2
2290 ; SSE41-NEXT: movdqa %xmm3, %xmm0
2291 ; SSE41-NEXT: blendvpd %xmm8, %xmm1
2292 ; SSE41-NEXT: movapd %xmm2, %xmm0
2293 ; SSE41-NEXT: retq
2294 ;
2295 ; SSE42-LABEL: max_ge_v4i64c:
2296 ; SSE42: # BB#0:
2297 ; SSE42-NEXT: movapd {{.*#+}} xmm1 = [7,1]
2298 ; SSE42-NEXT: movapd {{.*#+}} xmm2 = [18446744073709551615,18446744073709551609]
2299 ; SSE42-NEXT: movdqa {{.*#+}} xmm3 = [9223372036854775815,9223372036854775809]
2300 ; SSE42-NEXT: pcmpgtq {{.*}}(%rip), %xmm3
2301 ; SSE42-NEXT: pcmpeqd %xmm4, %xmm4
2302 ; SSE42-NEXT: pxor %xmm4, %xmm3
2303 ; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775807,9223372036854775801]
2304 ; SSE42-NEXT: pcmpgtq {{.*}}(%rip), %xmm0
2305 ; SSE42-NEXT: pxor %xmm4, %xmm0
2306 ; SSE42-NEXT: blendvpd {{.*}}(%rip), %xmm2
2307 ; SSE42-NEXT: movdqa %xmm3, %xmm0
2308 ; SSE42-NEXT: blendvpd {{.*}}(%rip), %xmm1
2309 ; SSE42-NEXT: movapd %xmm2, %xmm0
2310 ; SSE42-NEXT: retq
2311 ;
2312 ; AVX1-LABEL: max_ge_v4i64c:
2313 ; AVX1: # BB#0:
2314 ; AVX1-NEXT: vmovapd {{.*#+}} ymm0 = [18446744073709551615,18446744073709551609,7,1]
2315 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775815,9223372036854775809]
2316 ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm1, %xmm1
2317 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
2318 ; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1
2319 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854775807,9223372036854775801]
2320 ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm3, %xmm3
2321 ; AVX1-NEXT: vpxor %xmm2, %xmm3, %xmm2
2322 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
2323 ; AVX1-NEXT: vblendvpd %ymm1, {{.*}}(%rip), %ymm0, %ymm0
2324 ; AVX1-NEXT: retq
2325 ;
2326 ; AVX2-LABEL: max_ge_v4i64c:
2327 ; AVX2: # BB#0:
2328 ; AVX2-NEXT: vmovapd {{.*#+}} ymm0 = [18446744073709551615,18446744073709551609,7,1]
2329 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [9223372036854775807,9223372036854775801,9223372036854775815,9223372036854775809]
2330 ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %ymm1, %ymm1
2331 ; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
2332 ; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm1
2333 ; AVX2-NEXT: vblendvpd %ymm1, {{.*}}(%rip), %ymm0, %ymm0
2334 ; AVX2-NEXT: retq
2335 ;
2336 ; AVX512-LABEL: max_ge_v4i64c:
2337 ; AVX512: # BB#0:
2338 ; AVX512-NEXT: vmovapd {{.*#+}} ymm0 = [18446744073709551615,18446744073709551609,7,1]
2339 ; AVX512-NEXT: vmovdqa {{.*#+}} ymm1 = [9223372036854775807,9223372036854775801,9223372036854775815,9223372036854775809]
2340 ; AVX512-NEXT: vpcmpgtq {{.*}}(%rip), %ymm1, %ymm1
2341 ; AVX512-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
2342 ; AVX512-NEXT: vpxor %ymm2, %ymm1, %ymm1
2343 ; AVX512-NEXT: vblendvpd %ymm1, {{.*}}(%rip), %ymm0, %ymm0
2344 ; AVX512-NEXT: retq
2345 %1 = insertelement <4 x i64> , i64 -7, i32 0
2346 %2 = insertelement <4 x i64> , i64 -1, i32 0
2347 %3 = icmp uge <4 x i64> %1, %2
2348 %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
2349 ret <4 x i64> %4
2350 }
2351
2352 define <4 x i32> @max_ge_v4i32c() {
2353 ; SSE2-LABEL: max_ge_v4i32c:
2354 ; SSE2: # BB#0:
2355 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483649,2147483641,2147483655,2147483649]
2356 ; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm0
2357 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
2358 ; SSE2-NEXT: pxor %xmm0, %xmm1
2359 ; SSE2-NEXT: pandn {{.*}}(%rip), %xmm1
2360 ; SSE2-NEXT: pandn {{.*}}(%rip), %xmm0
2361 ; SSE2-NEXT: por %xmm1, %xmm0
2362 ; SSE2-NEXT: retq
2363 ;
2364 ; SSE41-LABEL: max_ge_v4i32c:
2365 ; SSE41: # BB#0:
2366 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
2367 ; SSE41-NEXT: pmaxud {{.*}}(%rip), %xmm0
2368 ; SSE41-NEXT: retq
2369 ;
2370 ; SSE42-LABEL: max_ge_v4i32c:
2371 ; SSE42: # BB#0:
2372 ; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
2373 ; SSE42-NEXT: pmaxud {{.*}}(%rip), %xmm0
2374 ; SSE42-NEXT: retq
2375 ;
2376 ; AVX-LABEL: max_ge_v4i32c:
2377 ; AVX: # BB#0:
2378 ; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
2379 ; AVX-NEXT: vpmaxud {{.*}}(%rip), %xmm0, %xmm0
2380 ; AVX-NEXT: retq
2381 %1 = insertelement <4 x i32> , i32 -7, i32 0
2382 %2 = insertelement <4 x i32> , i32 1, i32 0
2383 %3 = icmp uge <4 x i32> %1, %2
2384 %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
2385 ret <4 x i32> %4
2386 }
2387
2388 define <8 x i32> @max_ge_v8i32c() {
2389 ; SSE2-LABEL: max_ge_v8i32c:
2390 ; SSE2: # BB#0:
2391 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483655,2147483653,2147483651,2147483649]
2392 ; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm1
2393 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
2394 ; SSE2-NEXT: movdqa %xmm1, %xmm3
2395 ; SSE2-NEXT: pxor %xmm2, %xmm3
2396 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483649,2147483645,2147483643,2147483641]
2397 ; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm0
2398 ; SSE2-NEXT: pxor %xmm0, %xmm2
2399 ; SSE2-NEXT: pandn {{.*}}(%rip), %xmm2
2400 ; SSE2-NEXT: pandn {{.*}}(%rip), %xmm0
2401 ; SSE2-NEXT: por %xmm2, %xmm0
2402 ; SSE2-NEXT: pandn {{.*}}(%rip), %xmm3
2403 ; SSE2-NEXT: pandn {{.*}}(%rip), %xmm1
2404 ; SSE2-NEXT: por %xmm3, %xmm1
2405 ; SSE2-NEXT: retq
2406 ;
2407 ; SSE41-LABEL: max_ge_v8i32c:
2408 ; SSE41: # BB#0:
2409 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7]
2410 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
2411 ; SSE41-NEXT: pmaxud {{.*}}(%rip), %xmm0
2412 ; SSE41-NEXT: pmaxud {{.*}}(%rip), %xmm1
2413 ; SSE41-NEXT: retq
2414 ;
2415 ; SSE42-LABEL: max_ge_v8i32c:
2416 ; SSE42: # BB#0:
2417 ; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7]
2418 ; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
2419 ; SSE42-NEXT: pmaxud {{.*}}(%rip), %xmm0
2420 ; SSE42-NEXT: pmaxud {{.*}}(%rip), %xmm1
2421 ; SSE42-NEXT: retq
2422 ;
2423 ; AVX1-LABEL: max_ge_v8i32c:
2424 ; AVX1: # BB#0:
2425 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
2426 ; AVX1-NEXT: vpmaxud {{.*}}(%rip), %xmm0, %xmm0
2427 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,3,5,7]
2428 ; AVX1-NEXT: vpmaxud {{.*}}(%rip), %xmm1, %xmm1
2429 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2430 ; AVX1-NEXT: retq
2431 ;
2432 ; AVX2-LABEL: max_ge_v8i32c:
2433 ; AVX2: # BB#0:
2434 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7]
2435 ; AVX2-NEXT: vpmaxud {{.*}}(%rip), %ymm0, %ymm0
2436 ; AVX2-NEXT: retq
2437 ;
2438 ; AVX512-LABEL: max_ge_v8i32c:
2439 ; AVX512: # BB#0:
2440 ; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7]
2441 ; AVX512-NEXT: vpmaxud {{.*}}(%rip), %ymm0, %ymm0
2442 ; AVX512-NEXT: retq
2443 %1 = insertelement <8 x i32> , i32 -7, i32 0
2444 %2 = insertelement <8 x i32> , i32 1, i32 0
2445 %3 = icmp uge <8 x i32> %1, %2
2446 %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2
2447 ret <8 x i32> %4
2448 }
2449
2450 define <8 x i16> @max_ge_v8i16c() {
2451 ; SSE2-LABEL: max_ge_v8i16c:
2452 ; SSE2: # BB#0:
2453 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65529,65531,65533,65535,1,3,5,7]
2454 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,65533,65531,65529,7,5,3,1]
2455 ; SSE2-NEXT: movdqa %xmm2, %xmm3
2456 ; SSE2-NEXT: psubusw %xmm1, %xmm3
2457 ; SSE2-NEXT: pxor %xmm0, %xmm0
2458 ; SSE2-NEXT: pcmpeqw %xmm3, %xmm0
2459 ; SSE2-NEXT: pand %xmm0, %xmm1
2460 ; SSE2-NEXT: pandn %xmm2, %xmm0
2461 ; SSE2-NEXT: por %xmm1, %xmm0
2462 ; SSE2-NEXT: retq
2463 ;
2464 ; SSE41-LABEL: max_ge_v8i16c:
2465 ; SSE41: # BB#0:
2466 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7]
2467 ; SSE41-NEXT: pmaxuw {{.*}}(%rip), %xmm0
2468 ; SSE41-NEXT: retq
2469 ;
2470 ; SSE42-LABEL: max_ge_v8i16c:
2471 ; SSE42: # BB#0:
2472 ; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7]
2473 ; SSE42-NEXT: pmaxuw {{.*}}(%rip), %xmm0
2474 ; SSE42-NEXT: retq
2475 ;
2476 ; AVX-LABEL: max_ge_v8i16c:
2477 ; AVX: # BB#0:
2478 ; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7]
2479 ; AVX-NEXT: vpmaxuw {{.*}}(%rip), %xmm0, %xmm0
2480 ; AVX-NEXT: retq
2481 %1 = insertelement <8 x i16> , i16 -7, i16 0
2482 %2 = insertelement <8 x i16> , i16 1, i16 0
2483 %3 = icmp uge <8 x i16> %1, %2
2484 %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
2485 ret <8 x i16> %4
2486 }
2487
2488 define <16 x i16> @max_ge_v16i16c() {
2489 ; SSE2-LABEL: max_ge_v16i16c:
2490 ; SSE2: # BB#0:
2491 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65529,65530,65531,65532,65533,65534,65535,0]
2492 ; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [1,2,3,4,5,6,7,8]
2493 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [1,65534,65533,65532,65531,65530,65529,0]
2494 ; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [7,6,5,4,3,2,1,0]
2495 ; SSE2-NEXT: movdqa %xmm5, %xmm1
2496 ; SSE2-NEXT: psubusw %xmm3, %xmm1
2497 ; SSE2-NEXT: pxor %xmm6, %xmm6
2498 ; SSE2-NEXT: pcmpeqw %xmm6, %xmm1
2499 ; SSE2-NEXT: movdqa %xmm4, %xmm0
2500 ; SSE2-NEXT: psubusw %xmm2, %xmm0
2501 ; SSE2-NEXT: pcmpeqw %xmm6, %xmm0
2502 ; SSE2-NEXT: pand %xmm0, %xmm2
2503 ; SSE2-NEXT: pandn %xmm4, %xmm0
2504 ; SSE2-NEXT: por %xmm2, %xmm0
2505 ; SSE2-NEXT: pand %xmm1, %xmm3
2506 ; SSE2-NEXT: pandn %xmm5, %xmm1
2507 ; SSE2-NEXT: por %xmm3, %xmm1
2508 ; SSE2-NEXT: retq
2509 ;
2510 ; SSE41-LABEL: max_ge_v16i16c:
2511 ; SSE41: # BB#0:
2512 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
2513 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0]
2514 ; SSE41-NEXT: pmaxuw {{.*}}(%rip), %xmm0
2515 ; SSE41-NEXT: pmaxuw {{.*}}(%rip), %xmm1
2516 ; SSE41-NEXT: retq
2517 ;
2518 ; SSE42-LABEL: max_ge_v16i16c:
2519 ; SSE42: # BB#0:
2520 ; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
2521 ; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0]
2522 ; SSE42-NEXT: pmaxuw {{.*}}(%rip), %xmm0
2523 ; SSE42-NEXT: pmaxuw {{.*}}(%rip), %xmm1
2524 ; SSE42-NEXT: retq
2525 ;
2526 ; AVX1-LABEL: max_ge_v16i16c:
2527 ; AVX1: # BB#0:
2528 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0]
2529 ; AVX1-NEXT: vpmaxuw {{.*}}(%rip), %xmm0, %xmm0
2530 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
2531 ; AVX1-NEXT: vpmaxuw {{.*}}(%rip), %xmm1, %xmm1
2532 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2533 ; AVX1-NEXT: retq
2534 ;
2535 ; AVX2-LABEL: max_ge_v16i16c:
2536 ; AVX2: # BB#0:
2537 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8]
2538 ; AVX2-NEXT: vpmaxuw {{.*}}(%rip), %ymm0, %ymm0
2539 ; AVX2-NEXT: retq
2540 ;
2541 ; AVX512-LABEL: max_ge_v16i16c:
2542 ; AVX512: # BB#0:
2543 ; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8]
2544 ; AVX512-NEXT: vpmaxuw {{.*}}(%rip), %ymm0, %ymm0
2545 ; AVX512-NEXT: retq
2546 %1 = insertelement <16 x i16> , i16 -7, i16 0
2547 %2 = insertelement <16 x i16> , i16 1, i16 0
2548 %3 = icmp uge <16 x i16> %1, %2
2549 %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2
2550 ret <16 x i16> %4
2551 }
2552
2553 define <16 x i8> @max_ge_v16i8c() {
2554 ; SSE-LABEL: max_ge_v16i8c:
2555 ; SSE: # BB#0:
2556 ; SSE-NEXT: movdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8]
2557 ; SSE-NEXT: pmaxub {{.*}}(%rip), %xmm0
2558 ; SSE-NEXT: retq
2559 ;
2560 ; AVX-LABEL: max_ge_v16i8c:
2561 ; AVX: # BB#0:
2562 ; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8]
2563 ; AVX-NEXT: vpmaxub {{.*}}(%rip), %xmm0, %xmm0
2564 ; AVX-NEXT: retq
2565 %1 = insertelement <16 x i8> , i8 -7, i8 0
2566 %2 = insertelement <16 x i8> , i8 1, i8 0
2567 %3 = icmp uge <16 x i8> %1, %2
2568 %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
2569 ret <16 x i8> %4
2570 }
2571
2572 define <2 x i64> @max_lt_v2i64c() {
2573 ; SSE2-LABEL: max_lt_v2i64c:
2574 ; SSE2: # BB#0:
2575 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [18446744073709551609,7]
2576 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551615,1]
2577 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
2578 ; SSE2-NEXT: movdqa %xmm0, %xmm3
2579 ; SSE2-NEXT: pxor %xmm1, %xmm3
2580 ; SSE2-NEXT: pxor %xmm2, %xmm0
2581 ; SSE2-NEXT: movdqa %xmm0, %xmm4
2582 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
2583 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
2584 ; SSE2-NEXT: pcmpeqd %xmm3, %xmm0
2585 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2586 ; SSE2-NEXT: pand %xmm5, %xmm3
2587 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
2588 ; SSE2-NEXT: por %xmm3, %xmm0
2589 ; SSE2-NEXT: movdqa %xmm0, %xmm3
2590 ; SSE2-NEXT: pandn %xmm2, %xmm3
2591 ; SSE2-NEXT: pand %xmm1, %xmm0
2592 ; SSE2-NEXT: por %xmm3, %xmm0
2593 ; SSE2-NEXT: retq
2594 ;
2595 ; SSE41-LABEL: max_lt_v2i64c:
2596 ; SSE41: # BB#0:
2597 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551609,7]
2598 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [18446744073709551615,1]
2599 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
2600 ; SSE41-NEXT: movdqa %xmm0, %xmm3
2601 ; SSE41-NEXT: pxor %xmm2, %xmm3
2602 ; SSE41-NEXT: pxor %xmm1, %xmm0
2603 ; SSE41-NEXT: movdqa %xmm0, %xmm4
2604 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm4
2605 ; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
2606 ; SSE41-NEXT: pcmpeqd %xmm3, %xmm0
2607 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2608 ; SSE41-NEXT: pand %xmm5, %xmm3
2609 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
2610 ; SSE41-NEXT: por %xmm3, %xmm0
2611 ; SSE41-NEXT: blendvpd %xmm2, %xmm1
2612 ; SSE41-NEXT: movapd %xmm1, %xmm0
2613 ; SSE41-NEXT: retq
2614 ;
2615 ; SSE42-LABEL: max_lt_v2i64c:
2616 ; SSE42: # BB#0:
2617 ; SSE42-NEXT: movapd {{.*#+}} xmm1 = [18446744073709551615,1]
2618 ; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775807,9223372036854775809]
2619 ; SSE42-NEXT: pcmpgtq {{.*}}(%rip), %xmm0
2620 ; SSE42-NEXT: blendvpd {{.*}}(%rip), %xmm1
2621 ; SSE42-NEXT: movapd %xmm1, %xmm0
2622 ; SSE42-NEXT: retq
2623 ;
2624 ; AVX-LABEL: max_lt_v2i64c:
2625 ; AVX: # BB#0:
2626 ; AVX-NEXT: vmovapd {{.*#+}} xmm0 = [18446744073709551615,1]
2627 ; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775807,9223372036854775809]
2628 ; AVX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm1, %xmm1
2629 ; AVX-NEXT: vblendvpd %xmm1, {{.*}}(%rip), %xmm0, %xmm0
2630 ; AVX-NEXT: retq
2631 %1 = insertelement <2 x i64> , i64 -7, i32 0
2632 %2 = insertelement <2 x i64> , i64 -1, i32 0
2633 %3 = icmp ult <2 x i64> %1, %2
2634 %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
2635 ret <2 x i64> %4
2636 }
2637
2638 define <4 x i64> @max_lt_v4i64c() {
2639 ; SSE2-LABEL: max_lt_v4i64c:
2640 ; SSE2: # BB#0:
2641 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [18446744073709551609,18446744073709551615]
2642 ; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [1,7]
2643 ; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [18446744073709551615,18446744073709551609]
2644 ; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [7,1]
2645 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
2646 ; SSE2-NEXT: movdqa %xmm0, %xmm1
2647 ; SSE2-NEXT: pxor %xmm8, %xmm1
2648 ; SSE2-NEXT: movdqa %xmm0, %xmm6
2649 ; SSE2-NEXT: pxor %xmm3, %xmm6
2650 ; SSE2-NEXT: movdqa %xmm6, %xmm7
2651 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm7
2652 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm7[0,0,2,2]
2653 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm6
2654 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
2655 ; SSE2-NEXT: pand %xmm2, %xmm6
2656 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm7[1,1,3,3]
2657 ; SSE2-NEXT: por %xmm6, %xmm1
2658 ; SSE2-NEXT: movdqa %xmm0, %xmm2
2659 ; SSE2-NEXT: pxor %xmm4, %xmm2
2660 ; SSE2-NEXT: pxor %xmm5, %xmm0
2661 ; SSE2-NEXT: movdqa %xmm0, %xmm6
2662 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm6
2663 ; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
2664 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
2665 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
2666 ; SSE2-NEXT: pand %xmm7, %xmm2
2667 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
2668 ; SSE2-NEXT: por %xmm2, %xmm0
2669 ; SSE2-NEXT: movdqa %xmm0, %xmm2
2670 ; SSE2-NEXT: pandn %xmm5, %xmm2
2671 ; SSE2-NEXT: pand %xmm4, %xmm0
2672 ; SSE2-NEXT: por %xmm2, %xmm0
2673 ; SSE2-NEXT: movdqa %xmm1, %xmm2
2674 ; SSE2-NEXT: pandn %xmm3, %xmm2
2675 ; SSE2-NEXT: pand %xmm8, %xmm1
2676 ; SSE2-NEXT: por %xmm2, %xmm1
2677 ; SSE2-NEXT: retq
2678 ;
2679 ; SSE41-LABEL: max_lt_v4i64c:
2680 ; SSE41: # BB#0:
2681 ; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [18446744073709551609,18446744073709551615]
2682 ; SSE41-NEXT: movdqa {{.*#+}} xmm8 = [1,7]
2683 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551615,18446744073709551609]
2684 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [7,1]
2685 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
2686 ; SSE41-NEXT: movdqa %xmm0, %xmm3
2687 ; SSE41-NEXT: pxor %xmm8, %xmm3
2688 ; SSE41-NEXT: movdqa %xmm0, %xmm6
2689 ; SSE41-NEXT: pxor %xmm1, %xmm6
2690 ; SSE41-NEXT: movdqa %xmm6, %xmm7
2691 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm7
2692 ; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2]
2693 ; SSE41-NEXT: pcmpeqd %xmm3, %xmm6
2694 ; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
2695 ; SSE41-NEXT: pand %xmm4, %xmm6
2696 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm7[1,1,3,3]
2697 ; SSE41-NEXT: por %xmm6, %xmm3
2698 ; SSE41-NEXT: movdqa %xmm0, %xmm4
2699 ; SSE41-NEXT: pxor %xmm5, %xmm4
2700 ; SSE41-NEXT: pxor %xmm2, %xmm0
2701 ; SSE41-NEXT: movdqa %xmm0, %xmm6
2702 ; SSE41-NEXT: pcmpgtd %xmm4, %xmm6
2703 ; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
2704 ; SSE41-NEXT: pcmpeqd %xmm4, %xmm0
2705 ; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
2706 ; SSE41-NEXT: pand %xmm7, %xmm4
2707 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
2708 ; SSE41-NEXT: por %xmm4, %xmm0
2709 ; SSE41-NEXT: blendvpd %xmm5, %xmm2
2710 ; SSE41-NEXT: movdqa %xmm3, %xmm0
2711 ; SSE41-NEXT: blendvpd %xmm8, %xmm1
2712 ; SSE41-NEXT: movapd %xmm2, %xmm0
2713 ; SSE41-NEXT: retq
2714 ;
2715 ; SSE42-LABEL: max_lt_v4i64c:
2716 ; SSE42: # BB#0:
2717 ; SSE42-NEXT: movapd {{.*#+}} xmm1 = [7,1]
2718 ; SSE42-NEXT: movapd {{.*#+}} xmm2 = [18446744073709551615,18446744073709551609]
2719 ; SSE42-NEXT: movdqa {{.*#+}} xmm3 = [9223372036854775815,9223372036854775809]
2720 ; SSE42-NEXT: pcmpgtq {{.*}}(%rip), %xmm3
2721 ; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775807,9223372036854775801]
2722 ; SSE42-NEXT: pcmpgtq {{.*}}(%rip), %xmm0
2723 ; SSE42-NEXT: blendvpd {{.*}}(%rip), %xmm2
2724 ; SSE42-NEXT: movdqa %xmm3, %xmm0
2725 ; SSE42-NEXT: blendvpd {{.*}}(%rip), %xmm1
2726 ; SSE42-NEXT: movapd %xmm2, %xmm0
2727 ; SSE42-NEXT: retq
2728 ;
2729 ; AVX1-LABEL: max_lt_v4i64c:
2730 ; AVX1: # BB#0:
2731 ; AVX1-NEXT: vmovapd {{.*#+}} ymm0 = [18446744073709551615,18446744073709551609,7,1]
2732 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775807,9223372036854775801]
2733 ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm1, %xmm1
2734 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775815,9223372036854775809]
2735 ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm2, %xmm2
2736 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
2737 ; AVX1-NEXT: vblendvpd %ymm1, {{.*}}(%rip), %ymm0, %ymm0
2738 ; AVX1-NEXT: retq
2739 ;
2740 ; AVX2-LABEL: max_lt_v4i64c:
2741 ; AVX2: # BB#0:
2742 ; AVX2-NEXT: vmovapd {{.*#+}} ymm0 = [18446744073709551615,18446744073709551609,7,1]
2743 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [9223372036854775807,9223372036854775801,9223372036854775815,9223372036854775809]
2744 ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %ymm1, %ymm1
2745 ; AVX2-NEXT: vblendvpd %ymm1, {{.*}}(%rip), %ymm0, %ymm0
2746 ; AVX2-NEXT: retq
2747 ;
2748 ; AVX512-LABEL: max_lt_v4i64c:
2749 ; AVX512: # BB#0:
2750 ; AVX512-NEXT: vmovapd {{.*#+}} ymm0 = [18446744073709551615,18446744073709551609,7,1]
2751 ; AVX512-NEXT: vmovdqa {{.*#+}} ymm1 = [9223372036854775807,9223372036854775801,9223372036854775815,9223372036854775809]
2752 ; AVX512-NEXT: vpcmpgtq {{.*}}(%rip), %ymm1, %ymm1
2753 ; AVX512-NEXT: vblendvpd %ymm1, {{.*}}(%rip), %ymm0, %ymm0
2754 ; AVX512-NEXT: retq
2755 %1 = insertelement <4 x i64> , i64 -7, i32 0
2756 %2 = insertelement <4 x i64> , i64 -1, i32 0
2757 %3 = icmp ult <4 x i64> %1, %2
2758 %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
2759 ret <4 x i64> %4
2760 }
2761
2762 define <4 x i32> @max_lt_v4i32c() {
2763 ; SSE2-LABEL: max_lt_v4i32c:
2764 ; SSE2: # BB#0:
2765 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483649,2147483641,2147483655,2147483649]
2766 ; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm0
2767 ; SSE2-NEXT: movdqa %xmm0, %xmm1
2768 ; SSE2-NEXT: pandn {{.*}}(%rip), %xmm1
2769 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
2770 ; SSE2-NEXT: por %xmm1, %xmm0
2771 ; SSE2-NEXT: retq
2772 ;
2773 ; SSE41-LABEL: max_lt_v4i32c:
2774 ; SSE41: # BB#0:
2775 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
2776 ; SSE41-NEXT: pminud {{.*}}(%rip), %xmm0
2777 ; SSE41-NEXT: retq
2778 ;
2779 ; SSE42-LABEL: max_lt_v4i32c:
2780 ; SSE42: # BB#0:
2781 ; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
2782 ; SSE42-NEXT: pminud {{.*}}(%rip), %xmm0
2783 ; SSE42-NEXT: retq
2784 ;
2785 ; AVX-LABEL: max_lt_v4i32c:
2786 ; AVX: # BB#0:
2787 ; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
2788 ; AVX-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm0
2789 ; AVX-NEXT: retq
2790 %1 = insertelement <4 x i32> , i32 -7, i32 0
2791 %2 = insertelement <4 x i32> , i32 1, i32 0
2792 %3 = icmp ult <4 x i32> %1, %2
2793 %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
2794 ret <4 x i32> %4
2795 }
2796
2797 define <8 x i32> @max_lt_v8i32c() {
2798 ; SSE2-LABEL: max_lt_v8i32c:
2799 ; SSE2: # BB#0:
2800 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483655,2147483653,2147483651,2147483649]
2801 ; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm1
2802 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483649,2147483645,2147483643,2147483641]
2803 ; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm0
2804 ; SSE2-NEXT: movdqa %xmm0, %xmm2
2805 ; SSE2-NEXT: pandn {{.*}}(%rip), %xmm2
2806 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
2807 ; SSE2-NEXT: por %xmm2, %xmm0
2808 ; SSE2-NEXT: movdqa %xmm1, %xmm2
2809 ; SSE2-NEXT: pandn {{.*}}(%rip), %xmm2
2810 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
2811 ; SSE2-NEXT: por %xmm2, %xmm1
2812 ; SSE2-NEXT: retq
2813 ;
2814 ; SSE41-LABEL: max_lt_v8i32c:
2815 ; SSE41: # BB#0:
2816 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7]
2817 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
2818 ; SSE41-NEXT: pminud {{.*}}(%rip), %xmm0
2819 ; SSE41-NEXT: pminud {{.*}}(%rip), %xmm1
2820 ; SSE41-NEXT: retq
2821 ;
2822 ; SSE42-LABEL: max_lt_v8i32c:
2823 ; SSE42: # BB#0:
2824 ; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7]
2825 ; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
2826 ; SSE42-NEXT: pminud {{.*}}(%rip), %xmm0
2827 ; SSE42-NEXT: pminud {{.*}}(%rip), %xmm1
2828 ; SSE42-NEXT: retq
2829 ;
2830 ; AVX1-LABEL: max_lt_v8i32c:
2831 ; AVX1: # BB#0:
2832 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
2833 ; AVX1-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm0
2834 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,3,5,7]
2835 ; AVX1-NEXT: vpminud {{.*}}(%rip), %xmm1, %xmm1
2836 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2837 ; AVX1-NEXT: retq
2838 ;
2839 ; AVX2-LABEL: max_lt_v8i32c:
2840 ; AVX2: # BB#0:
2841 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7]
2842 ; AVX2-NEXT: vpminud {{.*}}(%rip), %ymm0, %ymm0
2843 ; AVX2-NEXT: retq
2844 ;
2845 ; AVX512-LABEL: max_lt_v8i32c:
2846 ; AVX512: # BB#0:
2847 ; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7]
2848 ; AVX512-NEXT: vpminud {{.*}}(%rip), %ymm0, %ymm0
2849 ; AVX512-NEXT: retq
2850 %1 = insertelement <8 x i32> , i32 -7, i32 0
2851 %2 = insertelement <8 x i32> , i32 1, i32 0
2852 %3 = icmp ult <8 x i32> %1, %2
2853 %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2
2854 ret <8 x i32> %4
2855 }
2856
2857 define <8 x i16> @max_lt_v8i16c() {
2858 ; SSE2-LABEL: max_lt_v8i16c:
2859 ; SSE2: # BB#0:
2860 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65529,65531,65533,65535,1,3,5,7]
2861 ; SSE2-NEXT: movdqa %xmm1, %xmm2
2862 ; SSE2-NEXT: psubusw {{.*}}(%rip), %xmm2
2863 ; SSE2-NEXT: pxor %xmm0, %xmm0
2864 ; SSE2-NEXT: pcmpeqw %xmm2, %xmm0
2865 ; SSE2-NEXT: pand %xmm0, %xmm1
2866 ; SSE2-NEXT: pandn {{.*}}(%rip), %xmm0
2867 ; SSE2-NEXT: por %xmm1, %xmm0
2868 ; SSE2-NEXT: retq
2869 ;
2870 ; SSE41-LABEL: max_lt_v8i16c:
2871 ; SSE41: # BB#0:
2872 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7]
2873 ; SSE41-NEXT: pminuw {{.*}}(%rip), %xmm0
2874 ; SSE41-NEXT: retq
2875 ;
2876 ; SSE42-LABEL: max_lt_v8i16c:
2877 ; SSE42: # BB#0:
2878 ; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7]
2879 ; SSE42-NEXT: pminuw {{.*}}(%rip), %xmm0
2880 ; SSE42-NEXT: retq
2881 ;
2882 ; AVX-LABEL: max_lt_v8i16c:
2883 ; AVX: # BB#0:
2884 ; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7]
2885 ; AVX-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0
2886 ; AVX-NEXT: retq
2887 %1 = insertelement <8 x i16> , i16 -7, i16 0
2888 %2 = insertelement <8 x i16> , i16 1, i16 0
2889 %3 = icmp ult <8 x i16> %1, %2
2890 %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
2891 ret <8 x i16> %4
2892 }
2893
2894 define <16 x i16> @max_lt_v16i16c() {
2895 ; SSE2-LABEL: max_lt_v16i16c:
2896 ; SSE2: # BB#0:
2897 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [32775,32774,32773,32772,32771,32770,32769,32768]
2898 ; SSE2-NEXT: pcmpgtw {{.*}}(%rip), %xmm1
2899 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [32769,32766,32765,32764,32763,32762,32761,32768]
2900 ; SSE2-NEXT: pcmpgtw {{.*}}(%rip), %xmm0
2901 ; SSE2-NEXT: movdqa %xmm0, %xmm2
2902 ; SSE2-NEXT: pandn {{.*}}(%rip), %xmm2
2903 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
2904 ; SSE2-NEXT: por %xmm2, %xmm0
2905 ; SSE2-NEXT: movdqa %xmm1, %xmm2
2906 ; SSE2-NEXT: pandn {{.*}}(%rip), %xmm2
2907 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
2908 ; SSE2-NEXT: por %xmm2, %xmm1
2909 ; SSE2-NEXT: retq
2910 ;
2911 ; SSE41-LABEL: max_lt_v16i16c:
2912 ; SSE41: # BB#0:
2913 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
2914 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0]
2915 ; SSE41-NEXT: pminuw {{.*}}(%rip), %xmm0
2916 ; SSE41-NEXT: pminuw {{.*}}(%rip), %xmm1
2917 ; SSE41-NEXT: retq
2918 ;
2919 ; SSE42-LABEL: max_lt_v16i16c:
2920 ; SSE42: # BB#0:
2921 ; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
2922 ; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0]
2923 ; SSE42-NEXT: pminuw {{.*}}(%rip), %xmm0
2924 ; SSE42-NEXT: pminuw {{.*}}(%rip), %xmm1
2925 ; SSE42-NEXT: retq
2926 ;
2927 ; AVX1-LABEL: max_lt_v16i16c:
2928 ; AVX1: # BB#0:
2929 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0]
2930 ; AVX1-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0
2931 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
2932 ; AVX1-NEXT: vpminuw {{.*}}(%rip), %xmm1, %xmm1
2933 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2934 ; AVX1-NEXT: retq
2935 ;
2936 ; AVX2-LABEL: max_lt_v16i16c:
2937 ; AVX2: # BB#0:
2938 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8]
2939 ; AVX2-NEXT: vpminuw {{.*}}(%rip), %ymm0, %ymm0
2940 ; AVX2-NEXT: retq
2941 ;
2942 ; AVX512-LABEL: max_lt_v16i16c:
2943 ; AVX512: # BB#0:
2944 ; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8]
2945 ; AVX512-NEXT: vpminuw {{.*}}(%rip), %ymm0, %ymm0
2946 ; AVX512-NEXT: retq
2947 %1 = insertelement <16 x i16> , i16 -7, i16 0
2948 %2 = insertelement <16 x i16> , i16 1, i16 0
2949 %3 = icmp ult <16 x i16> %1, %2
2950 %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2
2951 ret <16 x i16> %4
2952 }
2953
2954 define <16 x i8> @max_lt_v16i8c() {
2955 ; SSE-LABEL: max_lt_v16i8c:
2956 ; SSE: # BB#0:
2957 ; SSE-NEXT: movdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8]
2958 ; SSE-NEXT: pminub {{.*}}(%rip), %xmm0
2959 ; SSE-NEXT: retq
2960 ;
2961 ; AVX-LABEL: max_lt_v16i8c:
2962 ; AVX: # BB#0:
2963 ; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8]
2964 ; AVX-NEXT: vpminub {{.*}}(%rip), %xmm0, %xmm0
2965 ; AVX-NEXT: retq
2966 %1 = insertelement <16 x i8> , i8 -7, i8 0
2967 %2 = insertelement <16 x i8> , i8 1, i8 0
2968 %3 = icmp ult <16 x i8> %1, %2
2969 %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
2970 ret <16 x i8> %4
2971 }
2972
2973 define <2 x i64> @max_le_v2i64c() {
2974 ; SSE2-LABEL: max_le_v2i64c:
2975 ; SSE2: # BB#0:
2976 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [18446744073709551609,7]
2977 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551615,1]
2978 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
2979 ; SSE2-NEXT: movdqa %xmm0, %xmm3
2980 ; SSE2-NEXT: pxor %xmm2, %xmm3
2981 ; SSE2-NEXT: pxor %xmm1, %xmm0
2982 ; SSE2-NEXT: movdqa %xmm0, %xmm4
2983 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
2984 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
2985 ; SSE2-NEXT: pcmpeqd %xmm3, %xmm0
2986 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
2987 ; SSE2-NEXT: pand %xmm5, %xmm0
2988 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
2989 ; SSE2-NEXT: por %xmm0, %xmm3
2990 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
2991 ; SSE2-NEXT: pxor %xmm3, %xmm0
2992 ; SSE2-NEXT: pandn %xmm1, %xmm3
2993 ; SSE2-NEXT: pandn %xmm2, %xmm0
2994 ; SSE2-NEXT: por %xmm3, %xmm0
2995 ; SSE2-NEXT: retq
2996 ;
2997 ; SSE41-LABEL: max_le_v2i64c:
2998 ; SSE41: # BB#0:
2999 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551609,7]
3000 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [18446744073709551615,1]
3001 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
3002 ; SSE41-NEXT: movdqa %xmm0, %xmm3
3003 ; SSE41-NEXT: pxor %xmm1, %xmm3
3004 ; SSE41-NEXT: pxor %xmm2, %xmm0
3005 ; SSE41-NEXT: movdqa %xmm0, %xmm4
3006 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm4
3007 ; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
3008 ; SSE41-NEXT: pcmpeqd %xmm3, %xmm0
3009 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
3010 ; SSE41-NEXT: pand %xmm5, %xmm0
3011 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
3012 ; SSE41-NEXT: por %xmm0, %xmm3
3013 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
3014 ; SSE41-NEXT: pxor %xmm3, %xmm0
3015 ; SSE41-NEXT: blendvpd %xmm2, %xmm1
3016 ; SSE41-NEXT: movapd %xmm1, %xmm0
3017 ; SSE41-NEXT: retq
3018 ;
3019 ; SSE42-LABEL: max_le_v2i64c:
3020 ; SSE42: # BB#0:
3021 ; SSE42-NEXT: movapd {{.*#+}} xmm1 = [18446744073709551615,1]
3022 ; SSE42-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775801,9223372036854775815]
3023 ; SSE42-NEXT: pcmpgtq {{.*}}(%rip), %xmm2
3024 ; SSE42-NEXT: pcmpeqd %xmm0, %xmm0
3025 ; SSE42-NEXT: pxor %xmm2, %xmm0
3026 ; SSE42-NEXT: blendvpd {{.*}}(%rip), %xmm1
3027 ; SSE42-NEXT: movapd %xmm1, %xmm0
3028 ; SSE42-NEXT: retq
3029 ;
3030 ; AVX-LABEL: max_le_v2i64c:
3031 ; AVX: # BB#0:
3032 ; AVX-NEXT: vmovapd {{.*#+}} xmm0 = [18446744073709551615,1]
3033 ; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775801,9223372036854775815]
3034 ; AVX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm1, %xmm1
3035 ; AVX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
3036 ; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1
3037 ; AVX-NEXT: vblendvpd %xmm1, {{.*}}(%rip), %xmm0, %xmm0
3038 ; AVX-NEXT: retq
3039 %1 = insertelement <2 x i64> , i64 -7, i32 0
3040 %2 = insertelement <2 x i64> , i64 -1, i32 0
3041 %3 = icmp ule <2 x i64> %1, %2
3042 %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
3043 ret <2 x i64> %4
3044 }
3045
3046 define <4 x i64> @max_le_v4i64c() {
3047 ; SSE2-LABEL: max_le_v4i64c:
3048 ; SSE2: # BB#0:
3049 ; SSE2-NEXT: movdqa {{.*#+}} xmm10 = [18446744073709551609,18446744073709551615]
3050 ; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [1,7]
3051 ; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [18446744073709551615,18446744073709551609]
3052 ; SSE2-NEXT: movdqa {{.*#+}} xmm9 = [7,1]
3053 ; SSE2-NEXT: movdqa {{.*#+}} xmm7 = [2147483648,2147483648,2147483648,2147483648]
3054 ; SSE2-NEXT: movdqa %xmm7, %xmm0
3055 ; SSE2-NEXT: pxor %xmm9, %xmm0
3056 ; SSE2-NEXT: movdqa %xmm7, %xmm1
3057 ; SSE2-NEXT: pxor %xmm8, %xmm1
3058 ; SSE2-NEXT: movdqa %xmm1, %xmm6
3059 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm6
3060 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm6[0,0,2,2]
3061 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
3062 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
3063 ; SSE2-NEXT: pand %xmm2, %xmm0
3064 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
3065 ; SSE2-NEXT: por %xmm0, %xmm6
3066 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
3067 ; SSE2-NEXT: movdqa %xmm6, %xmm1
3068 ; SSE2-NEXT: pxor %xmm0, %xmm1
3069 ; SSE2-NEXT: movdqa %xmm7, %xmm2
3070 ; SSE2-NEXT: pxor %xmm5, %xmm2
3071 ; SSE2-NEXT: pxor %xmm10, %xmm7
3072 ; SSE2-NEXT: movdqa %xmm7, %xmm3
3073 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm3
3074 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
3075 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm7
3076 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm7[1,1,3,3]
3077 ; SSE2-NEXT: pand %xmm4, %xmm2
3078 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
3079 ; SSE2-NEXT: por %xmm2, %xmm3
3080 ; SSE2-NEXT: pxor %xmm3, %xmm0
3081 ; SSE2-NEXT: pandn %xmm10, %xmm3
3082 ; SSE2-NEXT: pandn %xmm5, %xmm0
3083 ; SSE2-NEXT: por %xmm3, %xmm0
3084 ; SSE2-NEXT: pandn %xmm8, %xmm6
3085 ; SSE2-NEXT: pandn %xmm9, %xmm1
3086 ; SSE2-NEXT: por %xmm6, %xmm1
3087 ; SSE2-NEXT: retq
3088 ;
3089 ; SSE41-LABEL: max_le_v4i64c:
3090 ; SSE41: # BB#0:
3091 ; SSE41-NEXT: movdqa {{.*#+}} xmm9 = [18446744073709551609,18446744073709551615]
3092 ; SSE41-NEXT: movdqa {{.*#+}} xmm8 = [1,7]
3093 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551615,18446744073709551609]
3094 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [7,1]
3095 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
3096 ; SSE41-NEXT: movdqa %xmm0, %xmm3
3097 ; SSE41-NEXT: pxor %xmm1, %xmm3
3098 ; SSE41-NEXT: movdqa %xmm0, %xmm6
3099 ; SSE41-NEXT: pxor %xmm8, %xmm6
3100 ; SSE41-NEXT: movdqa %xmm6, %xmm7
3101 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm7
3102 ; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2]
3103 ; SSE41-NEXT: pcmpeqd %xmm3, %xmm6
3104 ; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
3105 ; SSE41-NEXT: pand %xmm4, %xmm6
3106 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm7[1,1,3,3]
3107 ; SSE41-NEXT: por %xmm6, %xmm3
3108 ; SSE41-NEXT: pcmpeqd %xmm4, %xmm4
3109 ; SSE41-NEXT: pxor %xmm4, %xmm3
3110 ; SSE41-NEXT: movdqa %xmm0, %xmm6
3111 ; SSE41-NEXT: pxor %xmm2, %xmm6
3112 ; SSE41-NEXT: pxor %xmm9, %xmm0
3113 ; SSE41-NEXT: movdqa %xmm0, %xmm7
3114 ; SSE41-NEXT: pcmpgtd %xmm6, %xmm7
3115 ; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm7[0,0,2,2]
3116 ; SSE41-NEXT: pcmpeqd %xmm6, %xmm0
3117 ; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
3118 ; SSE41-NEXT: pand %xmm5, %xmm6
3119 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[1,1,3,3]
3120 ; SSE41-NEXT: por %xmm6, %xmm0
3121 ; SSE41-NEXT: pxor %xmm4, %xmm0
3122 ; SSE41-NEXT: blendvpd %xmm9, %xmm2
3123 ; SSE41-NEXT: movdqa %xmm3, %xmm0
3124 ; SSE41-NEXT: blendvpd %xmm8, %xmm1
3125 ; SSE41-NEXT: movapd %xmm2, %xmm0
3126 ; SSE41-NEXT: retq
3127 ;
3128 ; SSE42-LABEL: max_le_v4i64c:
3129 ; SSE42: # BB#0:
3130 ; SSE42-NEXT: movapd {{.*#+}} xmm1 = [7,1]
3131 ; SSE42-NEXT: movapd {{.*#+}} xmm2 = [18446744073709551615,18446744073709551609]
3132 ; SSE42-NEXT: movdqa {{.*#+}} xmm3 = [9223372036854775809,9223372036854775815]
3133 ; SSE42-NEXT: pcmpgtq {{.*}}(%rip), %xmm3
3134 ; SSE42-NEXT: pcmpeqd %xmm4, %xmm4
3135 ; SSE42-NEXT: pxor %xmm4, %xmm3
3136 ; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775801,9223372036854775807]
3137 ; SSE42-NEXT: pcmpgtq {{.*}}(%rip), %xmm0
3138 ; SSE42-NEXT: pxor %xmm4, %xmm0
3139 ; SSE42-NEXT: blendvpd {{.*}}(%rip), %xmm2
3140 ; SSE42-NEXT: movdqa %xmm3, %xmm0
3141 ; SSE42-NEXT: blendvpd {{.*}}(%rip), %xmm1
3142 ; SSE42-NEXT: movapd %xmm2, %xmm0
3143 ; SSE42-NEXT: retq
3144 ;
3145 ; AVX1-LABEL: max_le_v4i64c:
3146 ; AVX1: # BB#0:
3147 ; AVX1-NEXT: vmovapd {{.*#+}} ymm0 = [18446744073709551615,18446744073709551609,7,1]
3148 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775809,9223372036854775815]
3149 ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm1, %xmm1
3150 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
3151 ; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1
3152 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854775801,9223372036854775807]
3153 ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm3, %xmm3
3154 ; AVX1-NEXT: vpxor %xmm2, %xmm3, %xmm2
3155 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
3156 ; AVX1-NEXT: vblendvpd %ymm1, {{.*}}(%rip), %ymm0, %ymm0
3157 ; AVX1-NEXT: retq
3158 ;
3159 ; AVX2-LABEL: max_le_v4i64c:
3160 ; AVX2: # BB#0:
3161 ; AVX2-NEXT: vmovapd {{.*#+}} ymm0 = [18446744073709551615,18446744073709551609,7,1]
3162 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [9223372036854775801,9223372036854775807,9223372036854775809,9223372036854775815]
3163 ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %ymm1, %ymm1
3164 ; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
3165 ; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm1
3166 ; AVX2-NEXT: vblendvpd %ymm1, {{.*}}(%rip), %ymm0, %ymm0
3167 ; AVX2-NEXT: retq
3168 ;
3169 ; AVX512-LABEL: max_le_v4i64c:
3170 ; AVX512: # BB#0:
3171 ; AVX512-NEXT: vmovapd {{.*#+}} ymm0 = [18446744073709551615,18446744073709551609,7,1]
3172 ; AVX512-NEXT: vmovdqa {{.*#+}} ymm1 = [9223372036854775801,9223372036854775807,9223372036854775809,9223372036854775815]
3173 ; AVX512-NEXT: vpcmpgtq {{.*}}(%rip), %ymm1, %ymm1
3174 ; AVX512-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
3175 ; AVX512-NEXT: vpxor %ymm2, %ymm1, %ymm1
3176 ; AVX512-NEXT: vblendvpd %ymm1, {{.*}}(%rip), %ymm0, %ymm0
3177 ; AVX512-NEXT: retq
3178 %1 = insertelement <4 x i64> , i64 -7, i32 0
3179 %2 = insertelement <4 x i64> , i64 -1, i32 0
3180 %3 = icmp ule <4 x i64> %1, %2
3181 %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
3182 ret <4 x i64> %4
3183 }
3184
3185 define <4 x i32> @max_le_v4i32c() {
3186 ; SSE2-LABEL: max_le_v4i32c:
3187 ; SSE2: # BB#0:
3188 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483641,2147483647,2147483649,2147483655]
3189 ; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm0
3190 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
3191 ; SSE2-NEXT: pxor %xmm0, %xmm1
3192 ; SSE2-NEXT: pandn {{.*}}(%rip), %xmm1
3193 ; SSE2-NEXT: pandn {{.*}}(%rip), %xmm0
3194 ; SSE2-NEXT: por %xmm1, %xmm0
3195 ; SSE2-NEXT: retq
3196 ;
3197 ; SSE41-LABEL: max_le_v4i32c:
3198 ; SSE41: # BB#0:
3199 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
3200 ; SSE41-NEXT: pminud {{.*}}(%rip), %xmm0
3201 ; SSE41-NEXT: retq
3202 ;
3203 ; SSE42-LABEL: max_le_v4i32c:
3204 ; SSE42: # BB#0:
3205 ; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
3206 ; SSE42-NEXT: pminud {{.*}}(%rip), %xmm0
3207 ; SSE42-NEXT: retq
3208 ;
3209 ; AVX-LABEL: max_le_v4i32c:
3210 ; AVX: # BB#0:
3211 ; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
3212 ; AVX-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm0
3213 ; AVX-NEXT: retq
3214 %1 = insertelement <4 x i32> , i32 -7, i32 0
3215 %2 = insertelement <4 x i32> , i32 1, i32 0
3216 %3 = icmp ule <4 x i32> %1, %2
3217 %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
3218 ret <4 x i32> %4
3219 }
3220
3221 define <8 x i32> @max_le_v8i32c() {
3222 ; SSE2-LABEL: max_le_v8i32c:
3223 ; SSE2: # BB#0:
3224 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483649,2147483651,2147483653,2147483655]
3225 ; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm1
3226 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
3227 ; SSE2-NEXT: movdqa %xmm1, %xmm3
3228 ; SSE2-NEXT: pxor %xmm2, %xmm3
3229 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483641,2147483643,2147483645,2147483647]
3230 ; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm0
3231 ; SSE2-NEXT: pxor %xmm0, %xmm2
3232 ; SSE2-NEXT: pandn {{.*}}(%rip), %xmm2
3233 ; SSE2-NEXT: pandn {{.*}}(%rip), %xmm0
3234 ; SSE2-NEXT: por %xmm2, %xmm0
3235 ; SSE2-NEXT: pandn {{.*}}(%rip), %xmm3
3236 ; SSE2-NEXT: pandn {{.*}}(%rip), %xmm1
3237 ; SSE2-NEXT: por %xmm3, %xmm1
3238 ; SSE2-NEXT: retq
3239 ;
3240 ; SSE41-LABEL: max_le_v8i32c:
3241 ; SSE41: # BB#0:
3242 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7]
3243 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
3244 ; SSE41-NEXT: pminud {{.*}}(%rip), %xmm0
3245 ; SSE41-NEXT: pminud {{.*}}(%rip), %xmm1
3246 ; SSE41-NEXT: retq
3247 ;
3248 ; SSE42-LABEL: max_le_v8i32c:
3249 ; SSE42: # BB#0:
3250 ; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7]
3251 ; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
3252 ; SSE42-NEXT: pminud {{.*}}(%rip), %xmm0
3253 ; SSE42-NEXT: pminud {{.*}}(%rip), %xmm1
3254 ; SSE42-NEXT: retq
3255 ;
3256 ; AVX1-LABEL: max_le_v8i32c:
3257 ; AVX1: # BB#0:
3258 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
3259 ; AVX1-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm0
3260 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,3,5,7]
3261 ; AVX1-NEXT: vpminud {{.*}}(%rip), %xmm1, %xmm1
3262 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3263 ; AVX1-NEXT: retq
3264 ;
3265 ; AVX2-LABEL: max_le_v8i32c:
3266 ; AVX2: # BB#0:
3267 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7]
3268 ; AVX2-NEXT: vpminud {{.*}}(%rip), %ymm0, %ymm0
3269 ; AVX2-NEXT: retq
3270 ;
3271 ; AVX512-LABEL: max_le_v8i32c:
3272 ; AVX512: # BB#0:
3273 ; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7]
3274 ; AVX512-NEXT: vpminud {{.*}}(%rip), %ymm0, %ymm0
3275 ; AVX512-NEXT: retq
3276 %1 = insertelement <8 x i32> , i32 -7, i32 0
3277 %2 = insertelement <8 x i32> , i32 1, i32 0
3278 %3 = icmp ule <8 x i32> %1, %2
3279 %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2
3280 ret <8 x i32> %4
3281 }
3282
3283 define <8 x i16> @max_le_v8i16c() {
3284 ; SSE2-LABEL: max_le_v8i16c:
3285 ; SSE2: # BB#0:
3286 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65529,65531,65533,65535,1,3,5,7]
3287 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,65533,65531,65529,7,5,3,1]
3288 ; SSE2-NEXT: movdqa %xmm1, %xmm3
3289 ; SSE2-NEXT: psubusw %xmm2, %xmm3
3290 ; SSE2-NEXT: pxor %xmm0, %xmm0
3291 ; SSE2-NEXT: pcmpeqw %xmm3, %xmm0
3292 ; SSE2-NEXT: pand %xmm0, %xmm1
3293 ; SSE2-NEXT: pandn %xmm2, %xmm0
3294 ; SSE2-NEXT: por %xmm1, %xmm0
3295 ; SSE2-NEXT: retq
3296 ;
3297 ; SSE41-LABEL: max_le_v8i16c:
3298 ; SSE41: # BB#0:
3299 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7]
3300 ; SSE41-NEXT: pminuw {{.*}}(%rip), %xmm0
3301 ; SSE41-NEXT: retq
3302 ;
3303 ; SSE42-LABEL: max_le_v8i16c:
3304 ; SSE42: # BB#0:
3305 ; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7]
3306 ; SSE42-NEXT: pminuw {{.*}}(%rip), %xmm0
3307 ; SSE42-NEXT: retq
3308 ;
3309 ; AVX-LABEL: max_le_v8i16c:
3310 ; AVX: # BB#0:
3311 ; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7]
3312 ; AVX-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0
3313 ; AVX-NEXT: retq
3314 %1 = insertelement <8 x i16> , i16 -7, i16 0
3315 %2 = insertelement <8 x i16> , i16 1, i16 0
3316 %3 = icmp ule <8 x i16> %1, %2
3317 %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
3318 ret <8 x i16> %4
3319 }
3320
3321 define <16 x i16> @max_le_v16i16c() {
3322 ; SSE2-LABEL: max_le_v16i16c:
3323 ; SSE2: # BB#0:
3324 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65529,65530,65531,65532,65533,65534,65535,0]
3325 ; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [1,2,3,4,5,6,7,8]
3326 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [1,65534,65533,65532,65531,65530,65529,0]
3327 ; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [7,6,5,4,3,2,1,0]
3328 ; SSE2-NEXT: movdqa %xmm3, %xmm1
3329 ; SSE2-NEXT: psubusw %xmm5, %xmm1
3330 ; SSE2-NEXT: pxor %xmm6, %xmm6
3331 ; SSE2-NEXT: pcmpeqw %xmm6, %xmm1
3332 ; SSE2-NEXT: movdqa %xmm2, %xmm0
3333 ; SSE2-NEXT: psubusw %xmm4, %xmm0
3334 ; SSE2-NEXT: pcmpeqw %xmm6, %xmm0
3335 ; SSE2-NEXT: pand %xmm0, %xmm2
3336 ; SSE2-NEXT: pandn %xmm4, %xmm0
3337 ; SSE2-NEXT: por %xmm2, %xmm0
3338 ; SSE2-NEXT: pand %xmm1, %xmm3
3339 ; SSE2-NEXT: pandn %xmm5, %xmm1
3340 ; SSE2-NEXT: por %xmm3, %xmm1
3341 ; SSE2-NEXT: retq
3342 ;
3343 ; SSE41-LABEL: max_le_v16i16c:
3344 ; SSE41: # BB#0:
3345 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
3346 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0]
3347 ; SSE41-NEXT: pminuw {{.*}}(%rip), %xmm0
3348 ; SSE41-NEXT: pminuw {{.*}}(%rip), %xmm1
3349 ; SSE41-NEXT: retq
3350 ;
3351 ; SSE42-LABEL: max_le_v16i16c:
3352 ; SSE42: # BB#0:
3353 ; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
3354 ; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0]
3355 ; SSE42-NEXT: pminuw {{.*}}(%rip), %xmm0
3356 ; SSE42-NEXT: pminuw {{.*}}(%rip), %xmm1
3357 ; SSE42-NEXT: retq
3358 ;
3359 ; AVX1-LABEL: max_le_v16i16c:
3360 ; AVX1: # BB#0:
3361 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0]
3362 ; AVX1-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0
3363 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
3364 ; AVX1-NEXT: vpminuw {{.*}}(%rip), %xmm1, %xmm1
3365 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3366 ; AVX1-NEXT: retq
3367 ;
3368 ; AVX2-LABEL: max_le_v16i16c:
3369 ; AVX2: # BB#0:
3370 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8]
3371 ; AVX2-NEXT: vpminuw {{.*}}(%rip), %ymm0, %ymm0
3372 ; AVX2-NEXT: retq
3373 ;
3374 ; AVX512-LABEL: max_le_v16i16c:
3375 ; AVX512: # BB#0:
3376 ; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8]
3377 ; AVX512-NEXT: vpminuw {{.*}}(%rip), %ymm0, %ymm0
3378 ; AVX512-NEXT: retq
3379 %1 = insertelement <16 x i16> , i16 -7, i16 0
3380 %2 = insertelement <16 x i16> , i16 1, i16 0
3381 %3 = icmp ule <16 x i16> %1, %2
3382 %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2
3383 ret <16 x i16> %4
3384 }
3385
3386 define <16 x i8> @max_le_v16i8c() {
3387 ; SSE-LABEL: max_le_v16i8c:
3388 ; SSE: # BB#0:
3389 ; SSE-NEXT: movdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8]
3390 ; SSE-NEXT: pminub {{.*}}(%rip), %xmm0
3391 ; SSE-NEXT: retq
3392 ;
3393 ; AVX-LABEL: max_le_v16i8c:
3394 ; AVX: # BB#0:
3395 ; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8]
3396 ; AVX-NEXT: vpminub {{.*}}(%rip), %xmm0, %xmm0
3397 ; AVX-NEXT: retq
3398 %1 = insertelement <16 x i8> , i8 -7, i8 0
3399 %2 = insertelement <16 x i8> , i8 1, i8 0
3400 %3 = icmp ule <16 x i8> %1, %2
3401 %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
3402 ret <16 x i8> %4
3403 }