llvm.org GIT mirror llvm / 29bd117
[X86] Add custom execution domain fixing for 128/256-bit integer logic operations with AVX512F, but not AVX512DQ. AVX512F only has integer domain logic instructions. AVX512DQ added FP domain logic instructions. Execution domain fixing runs before EVEX->VEX. So if we have AVX512F and not AVX512DQ we fail to do execution domain switching of the logic operations. This leads to mismatches in execution domain and more test differences. This patch adds custom domain fixing that switches EVEX integer logic operations to VEX fp logic operations if XMM16-31 are not used. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@337137 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 1 year, 7 months ago
28 changed file(s) with 1428 addition(s) and 1807 deletion(s). Raw diff Collapse all Expand all
63106310 { X86::VBLENDPSYrri, X86::VBLENDPDYrri, X86::VPBLENDDYrri },
63116311 };
63126312
6313 // Special table for changing EVEX logic instructions to VEX.
6314 // TODO: Should we run EVEX->VEX earlier?
6315 static const uint16_t ReplaceableCustomAVX512LogicInstrs[][4] = {
6316 // Two integer columns for 64-bit and 32-bit elements.
6317 //PackedSingle PackedDouble PackedInt PackedInt
6318 { X86::VANDNPSrm, X86::VANDNPDrm, X86::VPANDNQZ128rm, X86::VPANDNDZ128rm },
6319 { X86::VANDNPSrr, X86::VANDNPDrr, X86::VPANDNQZ128rr, X86::VPANDNDZ128rr },
6320 { X86::VANDPSrm, X86::VANDPDrm, X86::VPANDQZ128rm, X86::VPANDDZ128rm },
6321 { X86::VANDPSrr, X86::VANDPDrr, X86::VPANDQZ128rr, X86::VPANDDZ128rr },
6322 { X86::VORPSrm, X86::VORPDrm, X86::VPORQZ128rm, X86::VPORDZ128rm },
6323 { X86::VORPSrr, X86::VORPDrr, X86::VPORQZ128rr, X86::VPORDZ128rr },
6324 { X86::VXORPSrm, X86::VXORPDrm, X86::VPXORQZ128rm, X86::VPXORDZ128rm },
6325 { X86::VXORPSrr, X86::VXORPDrr, X86::VPXORQZ128rr, X86::VPXORDZ128rr },
6326 { X86::VANDNPSYrm, X86::VANDNPDYrm, X86::VPANDNQZ256rm, X86::VPANDNDZ256rm },
6327 { X86::VANDNPSYrr, X86::VANDNPDYrr, X86::VPANDNQZ256rr, X86::VPANDNDZ256rr },
6328 { X86::VANDPSYrm, X86::VANDPDYrm, X86::VPANDQZ256rm, X86::VPANDDZ256rm },
6329 { X86::VANDPSYrr, X86::VANDPDYrr, X86::VPANDQZ256rr, X86::VPANDDZ256rr },
6330 { X86::VORPSYrm, X86::VORPDYrm, X86::VPORQZ256rm, X86::VPORDZ256rm },
6331 { X86::VORPSYrr, X86::VORPDYrr, X86::VPORQZ256rr, X86::VPORDZ256rr },
6332 { X86::VXORPSYrm, X86::VXORPDYrm, X86::VPXORQZ256rm, X86::VPXORDZ256rm },
6333 { X86::VXORPSYrr, X86::VXORPDYrr, X86::VPXORQZ256rr, X86::VPXORDZ256rr },
6334 };
6335
63136336 // FIXME: Some shuffle and unpack instructions have equivalents in different
63146337 // domains, but they require a bit more work than just switching opcodes.
63156338
64096432 case X86::VPBLENDWYrmi:
64106433 case X86::VPBLENDWYrri:
64116434 return GetBlendDomains(8, false);
6435 case X86::VPANDDZ128rr: case X86::VPANDDZ128rm:
6436 case X86::VPANDDZ256rr: case X86::VPANDDZ256rm:
6437 case X86::VPANDQZ128rr: case X86::VPANDQZ128rm:
6438 case X86::VPANDQZ256rr: case X86::VPANDQZ256rm:
6439 case X86::VPANDNDZ128rr: case X86::VPANDNDZ128rm:
6440 case X86::VPANDNDZ256rr: case X86::VPANDNDZ256rm:
6441 case X86::VPANDNQZ128rr: case X86::VPANDNQZ128rm:
6442 case X86::VPANDNQZ256rr: case X86::VPANDNQZ256rm:
6443 case X86::VPORDZ128rr: case X86::VPORDZ128rm:
6444 case X86::VPORDZ256rr: case X86::VPORDZ256rm:
6445 case X86::VPORQZ128rr: case X86::VPORQZ128rm:
6446 case X86::VPORQZ256rr: case X86::VPORQZ256rm:
6447 case X86::VPXORDZ128rr: case X86::VPXORDZ128rm:
6448 case X86::VPXORDZ256rr: case X86::VPXORDZ256rm:
6449 case X86::VPXORQZ128rr: case X86::VPXORQZ128rm:
6450 case X86::VPXORQZ256rr: case X86::VPXORQZ256rm:
6451 // If we don't have DQI see if we can still switch from an EVEX integer
6452 // instruction to a VEX floating point instruction.
6453 if (Subtarget.hasDQI())
6454 return 0;
6455
6456 if (RI.getEncodingValue(MI.getOperand(0).getReg()) >= 16)
6457 return 0;
6458 if (RI.getEncodingValue(MI.getOperand(1).getReg()) >= 16)
6459 return 0;
6460 // Register forms will have 3 operands. Memory form will have more.
6461 if (NumOperands == 3 &&
6462 RI.getEncodingValue(MI.getOperand(2).getReg()) >= 16)
6463 return 0;
6464
6465 // All domains are valid.
6466 return 0xe;
64126467 }
64136468 return 0;
64146469 }
64856540 case X86::VPBLENDWYrmi:
64866541 case X86::VPBLENDWYrri:
64876542 return SetBlendDomain(16, true);
6543 case X86::VPANDDZ128rr: case X86::VPANDDZ128rm:
6544 case X86::VPANDDZ256rr: case X86::VPANDDZ256rm:
6545 case X86::VPANDQZ128rr: case X86::VPANDQZ128rm:
6546 case X86::VPANDQZ256rr: case X86::VPANDQZ256rm:
6547 case X86::VPANDNDZ128rr: case X86::VPANDNDZ128rm:
6548 case X86::VPANDNDZ256rr: case X86::VPANDNDZ256rm:
6549 case X86::VPANDNQZ128rr: case X86::VPANDNQZ128rm:
6550 case X86::VPANDNQZ256rr: case X86::VPANDNQZ256rm:
6551 case X86::VPORDZ128rr: case X86::VPORDZ128rm:
6552 case X86::VPORDZ256rr: case X86::VPORDZ256rm:
6553 case X86::VPORQZ128rr: case X86::VPORQZ128rm:
6554 case X86::VPORQZ256rr: case X86::VPORQZ256rm:
6555 case X86::VPXORDZ128rr: case X86::VPXORDZ128rm:
6556 case X86::VPXORDZ256rr: case X86::VPXORDZ256rm:
6557 case X86::VPXORQZ128rr: case X86::VPXORQZ128rm:
6558 case X86::VPXORQZ256rr: case X86::VPXORQZ256rm: {
6559 // Without DQI, convert EVEX instructions to VEX instructions.
6560 if (Subtarget.hasDQI())
6561 return false;
6562
6563 const uint16_t *table = lookupAVX512(MI.getOpcode(), dom,
6564 ReplaceableCustomAVX512LogicInstrs);
6565 assert(table && "Instruction not found in table?");
6566 // Don't change integer Q instructions to D instructions and
6567 // use D intructions if we started with a PS instruction.
6568 if (Domain == 3 && (dom == 1 || table[3] == MI.getOpcode()))
6569 Domain = 4;
6570 MI.setDesc(get(table[Domain - 1]));
6571 return true;
6572 }
64886573 }
64896574 return false;
64906575 }
15411541 }
15421542
15431543 define <16 x double> @sbto16f64(<16 x double> %a) {
1544 ; NOVLDQ-LABEL: sbto16f64:
1545 ; NOVLDQ: # %bb.0:
1546 ; NOVLDQ-NEXT: vxorpd %xmm2, %xmm2, %xmm2
1547 ; NOVLDQ-NEXT: vcmpltpd %zmm0, %zmm2, %k0
1548 ; NOVLDQ-NEXT: vcmpltpd %zmm1, %zmm2, %k1
1549 ; NOVLDQ-NEXT: kunpckbw %k0, %k1, %k1
1550 ; NOVLDQ-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
1551 ; NOVLDQ-NEXT: vcvtdq2pd %ymm1, %zmm0
1552 ; NOVLDQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1
1553 ; NOVLDQ-NEXT: vcvtdq2pd %ymm1, %zmm1
1554 ; NOVLDQ-NEXT: retq
1544 ; NODQ-LABEL: sbto16f64:
1545 ; NODQ: # %bb.0:
1546 ; NODQ-NEXT: vxorpd %xmm2, %xmm2, %xmm2
1547 ; NODQ-NEXT: vcmpltpd %zmm0, %zmm2, %k0
1548 ; NODQ-NEXT: vcmpltpd %zmm1, %zmm2, %k1
1549 ; NODQ-NEXT: kunpckbw %k0, %k1, %k1
1550 ; NODQ-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
1551 ; NODQ-NEXT: vcvtdq2pd %ymm1, %zmm0
1552 ; NODQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1
1553 ; NODQ-NEXT: vcvtdq2pd %ymm1, %zmm1
1554 ; NODQ-NEXT: retq
15551555 ;
15561556 ; VLDQ-LABEL: sbto16f64:
15571557 ; VLDQ: # %bb.0:
15651565 ; VLDQ-NEXT: vcvtdq2pd %ymm1, %zmm1
15661566 ; VLDQ-NEXT: retq
15671567 ;
1568 ; VLNODQ-LABEL: sbto16f64:
1569 ; VLNODQ: # %bb.0:
1570 ; VLNODQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
1571 ; VLNODQ-NEXT: vcmpltpd %zmm0, %zmm2, %k0
1572 ; VLNODQ-NEXT: vcmpltpd %zmm1, %zmm2, %k1
1573 ; VLNODQ-NEXT: kunpckbw %k0, %k1, %k1
1574 ; VLNODQ-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
1575 ; VLNODQ-NEXT: vcvtdq2pd %ymm1, %zmm0
1576 ; VLNODQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1
1577 ; VLNODQ-NEXT: vcvtdq2pd %ymm1, %zmm1
1578 ; VLNODQ-NEXT: retq
1579 ;
15801568 ; DQNOVL-LABEL: sbto16f64:
15811569 ; DQNOVL: # %bb.0:
15821570 ; DQNOVL-NEXT: vxorpd %xmm2, %xmm2, %xmm2
16121600 ;
16131601 ; VLNODQ-LABEL: sbto8f64:
16141602 ; VLNODQ: # %bb.0:
1615 ; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
1603 ; VLNODQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1
16161604 ; VLNODQ-NEXT: vcmpltpd %zmm0, %zmm1, %k1
16171605 ; VLNODQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
16181606 ; VLNODQ-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
16321620 }
16331621
16341622 define <8 x float> @sbto8f32(<8 x float> %a) {
1635 ; NOVL-LABEL: sbto8f32:
1636 ; NOVL: # %bb.0:
1637 ; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
1638 ; NOVL-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
1639 ; NOVL-NEXT: vcvtdq2ps %ymm0, %ymm0
1640 ; NOVL-NEXT: retq
1641 ;
1642 ; VLDQ-LABEL: sbto8f32:
1643 ; VLDQ: # %bb.0:
1644 ; VLDQ-NEXT: vxorps %xmm1, %xmm1, %xmm1
1645 ; VLDQ-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
1646 ; VLDQ-NEXT: vcvtdq2ps %ymm0, %ymm0
1647 ; VLDQ-NEXT: retq
1648 ;
1649 ; VLNODQ-LABEL: sbto8f32:
1650 ; VLNODQ: # %bb.0:
1651 ; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
1652 ; VLNODQ-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
1653 ; VLNODQ-NEXT: vcvtdq2ps %ymm0, %ymm0
1654 ; VLNODQ-NEXT: retq
1623 ; ALL-LABEL: sbto8f32:
1624 ; ALL: # %bb.0:
1625 ; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1
1626 ; ALL-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
1627 ; ALL-NEXT: vcvtdq2ps %ymm0, %ymm0
1628 ; ALL-NEXT: retq
16551629 %cmpres = fcmp ogt <8 x float> %a, zeroinitializer
16561630 %1 = sitofp <8 x i1> %cmpres to <8 x float>
16571631 ret <8 x float> %1
16581632 }
16591633
16601634 define <4 x float> @sbto4f32(<4 x float> %a) {
1661 ; NOVL-LABEL: sbto4f32:
1662 ; NOVL: # %bb.0:
1663 ; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
1664 ; NOVL-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
1665 ; NOVL-NEXT: vcvtdq2ps %xmm0, %xmm0
1666 ; NOVL-NEXT: retq
1667 ;
1668 ; VLDQ-LABEL: sbto4f32:
1669 ; VLDQ: # %bb.0:
1670 ; VLDQ-NEXT: vxorps %xmm1, %xmm1, %xmm1
1671 ; VLDQ-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
1672 ; VLDQ-NEXT: vcvtdq2ps %xmm0, %xmm0
1673 ; VLDQ-NEXT: retq
1674 ;
1675 ; VLNODQ-LABEL: sbto4f32:
1676 ; VLNODQ: # %bb.0:
1677 ; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
1678 ; VLNODQ-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
1679 ; VLNODQ-NEXT: vcvtdq2ps %xmm0, %xmm0
1680 ; VLNODQ-NEXT: retq
1635 ; ALL-LABEL: sbto4f32:
1636 ; ALL: # %bb.0:
1637 ; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1
1638 ; ALL-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
1639 ; ALL-NEXT: vcvtdq2ps %xmm0, %xmm0
1640 ; ALL-NEXT: retq
16811641 %cmpres = fcmp ogt <4 x float> %a, zeroinitializer
16821642 %1 = sitofp <4 x i1> %cmpres to <4 x float>
16831643 ret <4 x float> %1
17021662 ;
17031663 ; VLNODQ-LABEL: sbto4f64:
17041664 ; VLNODQ: # %bb.0:
1705 ; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
1665 ; VLNODQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1
17061666 ; VLNODQ-NEXT: vcmpltpd %ymm0, %ymm1, %k1
17071667 ; VLNODQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
17081668 ; VLNODQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
17141674 }
17151675
17161676 define <2 x float> @sbto2f32(<2 x float> %a) {
1717 ; NOVL-LABEL: sbto2f32:
1718 ; NOVL: # %bb.0:
1719 ; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
1720 ; NOVL-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
1721 ; NOVL-NEXT: vcvtdq2ps %xmm0, %xmm0
1722 ; NOVL-NEXT: retq
1723 ;
1724 ; VLDQ-LABEL: sbto2f32:
1725 ; VLDQ: # %bb.0:
1726 ; VLDQ-NEXT: vxorps %xmm1, %xmm1, %xmm1
1727 ; VLDQ-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
1728 ; VLDQ-NEXT: vcvtdq2ps %xmm0, %xmm0
1729 ; VLDQ-NEXT: retq
1730 ;
1731 ; VLNODQ-LABEL: sbto2f32:
1732 ; VLNODQ: # %bb.0:
1733 ; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
1734 ; VLNODQ-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
1735 ; VLNODQ-NEXT: vcvtdq2ps %xmm0, %xmm0
1736 ; VLNODQ-NEXT: retq
1677 ; ALL-LABEL: sbto2f32:
1678 ; ALL: # %bb.0:
1679 ; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1
1680 ; ALL-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
1681 ; ALL-NEXT: vcvtdq2ps %xmm0, %xmm0
1682 ; ALL-NEXT: retq
17371683 %cmpres = fcmp ogt <2 x float> %a, zeroinitializer
17381684 %1 = sitofp <2 x i1> %cmpres to <2 x float>
17391685 ret <2 x float> %1
17401686 }
17411687
17421688 define <2 x double> @sbto2f64(<2 x double> %a) {
1743 ; NOVL-LABEL: sbto2f64:
1744 ; NOVL: # %bb.0:
1745 ; NOVL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
1746 ; NOVL-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
1747 ; NOVL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
1748 ; NOVL-NEXT: vcvtdq2pd %xmm0, %xmm0
1749 ; NOVL-NEXT: retq
1750 ;
1751 ; VLDQ-LABEL: sbto2f64:
1752 ; VLDQ: # %bb.0:
1753 ; VLDQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1
1754 ; VLDQ-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
1755 ; VLDQ-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
1756 ; VLDQ-NEXT: vcvtdq2pd %xmm0, %xmm0
1757 ; VLDQ-NEXT: retq
1758 ;
1759 ; VLNODQ-LABEL: sbto2f64:
1760 ; VLNODQ: # %bb.0:
1761 ; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
1762 ; VLNODQ-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
1763 ; VLNODQ-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
1764 ; VLNODQ-NEXT: vcvtdq2pd %xmm0, %xmm0
1765 ; VLNODQ-NEXT: retq
1689 ; ALL-LABEL: sbto2f64:
1690 ; ALL: # %bb.0:
1691 ; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
1692 ; ALL-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
1693 ; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
1694 ; ALL-NEXT: vcvtdq2pd %xmm0, %xmm0
1695 ; ALL-NEXT: retq
17661696 %cmpres = fcmp ogt <2 x double> %a, zeroinitializer
17671697 %1 = sitofp <2 x i1> %cmpres to <2 x double>
17681698 ret <2 x double> %1
1212 define <4 x double> @test_masked_double_to_4_mask0(double %s, <4 x double> %default, <4 x double> %mask) {
1313 ; CHECK-LABEL: test_masked_double_to_4_mask0:
1414 ; CHECK: # %bb.0:
15 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
15 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
1616 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
1717 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm1 {%k1}
1818 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
2727 define <4 x double> @test_masked_z_double_to_4_mask0(double %s, <4 x double> %mask) {
2828 ; CHECK-LABEL: test_masked_z_double_to_4_mask0:
2929 ; CHECK: # %bb.0:
30 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
30 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
3131 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
3232 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 {%k1} {z}
3333 ; CHECK-NEXT: retq
4040 define <4 x double> @test_masked_double_to_4_mask1(double %s, <4 x double> %default, <4 x double> %mask) {
4141 ; CHECK-LABEL: test_masked_double_to_4_mask1:
4242 ; CHECK: # %bb.0:
43 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
43 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
4444 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
4545 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm1 {%k1}
4646 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
5555 define <4 x double> @test_masked_z_double_to_4_mask1(double %s, <4 x double> %mask) {
5656 ; CHECK-LABEL: test_masked_z_double_to_4_mask1:
5757 ; CHECK: # %bb.0:
58 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
58 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
5959 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
6060 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 {%k1} {z}
6161 ; CHECK-NEXT: retq
6868 define <4 x double> @test_masked_double_to_4_mask2(double %s, <4 x double> %default, <4 x double> %mask) {
6969 ; CHECK-LABEL: test_masked_double_to_4_mask2:
7070 ; CHECK: # %bb.0:
71 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
71 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
7272 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
7373 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm1 {%k1}
7474 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
8383 define <4 x double> @test_masked_z_double_to_4_mask2(double %s, <4 x double> %mask) {
8484 ; CHECK-LABEL: test_masked_z_double_to_4_mask2:
8585 ; CHECK: # %bb.0:
86 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
86 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
8787 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
8888 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 {%k1} {z}
8989 ; CHECK-NEXT: retq
9696 define <4 x double> @test_masked_double_to_4_mask3(double %s, <4 x double> %default, <4 x double> %mask) {
9797 ; CHECK-LABEL: test_masked_double_to_4_mask3:
9898 ; CHECK: # %bb.0:
99 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
99 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
100100 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
101101 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm1 {%k1}
102102 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
111111 define <4 x double> @test_masked_z_double_to_4_mask3(double %s, <4 x double> %mask) {
112112 ; CHECK-LABEL: test_masked_z_double_to_4_mask3:
113113 ; CHECK: # %bb.0:
114 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
114 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
115115 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
116116 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 {%k1} {z}
117117 ; CHECK-NEXT: retq
133133 define <8 x double> @test_masked_double_to_8_mask0(double %s, <8 x double> %default, <8 x double> %mask) {
134134 ; CHECK-LABEL: test_masked_double_to_8_mask0:
135135 ; CHECK: # %bb.0:
136 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
136 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
137137 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
138138 ; CHECK-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1}
139139 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
148148 define <8 x double> @test_masked_z_double_to_8_mask0(double %s, <8 x double> %mask) {
149149 ; CHECK-LABEL: test_masked_z_double_to_8_mask0:
150150 ; CHECK: # %bb.0:
151 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
151 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
152152 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
153153 ; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z}
154154 ; CHECK-NEXT: retq
161161 define <8 x double> @test_masked_double_to_8_mask1(double %s, <8 x double> %default, <8 x double> %mask) {
162162 ; CHECK-LABEL: test_masked_double_to_8_mask1:
163163 ; CHECK: # %bb.0:
164 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
164 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
165165 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
166166 ; CHECK-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1}
167167 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
176176 define <8 x double> @test_masked_z_double_to_8_mask1(double %s, <8 x double> %mask) {
177177 ; CHECK-LABEL: test_masked_z_double_to_8_mask1:
178178 ; CHECK: # %bb.0:
179 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
179 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
180180 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
181181 ; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z}
182182 ; CHECK-NEXT: retq
189189 define <8 x double> @test_masked_double_to_8_mask2(double %s, <8 x double> %default, <8 x double> %mask) {
190190 ; CHECK-LABEL: test_masked_double_to_8_mask2:
191191 ; CHECK: # %bb.0:
192 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
192 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
193193 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
194194 ; CHECK-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1}
195195 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
204204 define <8 x double> @test_masked_z_double_to_8_mask2(double %s, <8 x double> %mask) {
205205 ; CHECK-LABEL: test_masked_z_double_to_8_mask2:
206206 ; CHECK: # %bb.0:
207 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
207 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
208208 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
209209 ; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z}
210210 ; CHECK-NEXT: retq
217217 define <8 x double> @test_masked_double_to_8_mask3(double %s, <8 x double> %default, <8 x double> %mask) {
218218 ; CHECK-LABEL: test_masked_double_to_8_mask3:
219219 ; CHECK: # %bb.0:
220 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
220 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
221221 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
222222 ; CHECK-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1}
223223 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
232232 define <8 x double> @test_masked_z_double_to_8_mask3(double %s, <8 x double> %mask) {
233233 ; CHECK-LABEL: test_masked_z_double_to_8_mask3:
234234 ; CHECK: # %bb.0:
235 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
235 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
236236 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
237237 ; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z}
238238 ; CHECK-NEXT: retq
254254 define <4 x float> @test_masked_float_to_4_mask0(float %s, <4 x float> %default, <4 x float> %mask) {
255255 ; CHECK-LABEL: test_masked_float_to_4_mask0:
256256 ; CHECK: # %bb.0:
257 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
257 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
258258 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
259259 ; CHECK-NEXT: vbroadcastss %xmm0, %xmm1 {%k1}
260260 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
269269 define <4 x float> @test_masked_z_float_to_4_mask0(float %s, <4 x float> %mask) {
270270 ; CHECK-LABEL: test_masked_z_float_to_4_mask0:
271271 ; CHECK: # %bb.0:
272 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
272 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
273273 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
274274 ; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 {%k1} {z}
275275 ; CHECK-NEXT: retq
282282 define <4 x float> @test_masked_float_to_4_mask1(float %s, <4 x float> %default, <4 x float> %mask) {
283283 ; CHECK-LABEL: test_masked_float_to_4_mask1:
284284 ; CHECK: # %bb.0:
285 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
285 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
286286 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
287287 ; CHECK-NEXT: vbroadcastss %xmm0, %xmm1 {%k1}
288288 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
297297 define <4 x float> @test_masked_z_float_to_4_mask1(float %s, <4 x float> %mask) {
298298 ; CHECK-LABEL: test_masked_z_float_to_4_mask1:
299299 ; CHECK: # %bb.0:
300 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
300 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
301301 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
302302 ; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 {%k1} {z}
303303 ; CHECK-NEXT: retq
310310 define <4 x float> @test_masked_float_to_4_mask2(float %s, <4 x float> %default, <4 x float> %mask) {
311311 ; CHECK-LABEL: test_masked_float_to_4_mask2:
312312 ; CHECK: # %bb.0:
313 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
313 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
314314 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
315315 ; CHECK-NEXT: vbroadcastss %xmm0, %xmm1 {%k1}
316316 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
325325 define <4 x float> @test_masked_z_float_to_4_mask2(float %s, <4 x float> %mask) {
326326 ; CHECK-LABEL: test_masked_z_float_to_4_mask2:
327327 ; CHECK: # %bb.0:
328 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
328 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
329329 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
330330 ; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 {%k1} {z}
331331 ; CHECK-NEXT: retq
338338 define <4 x float> @test_masked_float_to_4_mask3(float %s, <4 x float> %default, <4 x float> %mask) {
339339 ; CHECK-LABEL: test_masked_float_to_4_mask3:
340340 ; CHECK: # %bb.0:
341 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
341 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
342342 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
343343 ; CHECK-NEXT: vbroadcastss %xmm0, %xmm1 {%k1}
344344 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
353353 define <4 x float> @test_masked_z_float_to_4_mask3(float %s, <4 x float> %mask) {
354354 ; CHECK-LABEL: test_masked_z_float_to_4_mask3:
355355 ; CHECK: # %bb.0:
356 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
356 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
357357 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
358358 ; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 {%k1} {z}
359359 ; CHECK-NEXT: retq
375375 define <8 x float> @test_masked_float_to_8_mask0(float %s, <8 x float> %default, <8 x float> %mask) {
376376 ; CHECK-LABEL: test_masked_float_to_8_mask0:
377377 ; CHECK: # %bb.0:
378 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
378 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
379379 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
380380 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm1 {%k1}
381381 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
390390 define <8 x float> @test_masked_z_float_to_8_mask0(float %s, <8 x float> %mask) {
391391 ; CHECK-LABEL: test_masked_z_float_to_8_mask0:
392392 ; CHECK: # %bb.0:
393 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
393 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
394394 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
395395 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 {%k1} {z}
396396 ; CHECK-NEXT: retq
403403 define <8 x float> @test_masked_float_to_8_mask1(float %s, <8 x float> %default, <8 x float> %mask) {
404404 ; CHECK-LABEL: test_masked_float_to_8_mask1:
405405 ; CHECK: # %bb.0:
406 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
406 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
407407 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
408408 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm1 {%k1}
409409 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
418418 define <8 x float> @test_masked_z_float_to_8_mask1(float %s, <8 x float> %mask) {
419419 ; CHECK-LABEL: test_masked_z_float_to_8_mask1:
420420 ; CHECK: # %bb.0:
421 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
421 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
422422 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
423423 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 {%k1} {z}
424424 ; CHECK-NEXT: retq
431431 define <8 x float> @test_masked_float_to_8_mask2(float %s, <8 x float> %default, <8 x float> %mask) {
432432 ; CHECK-LABEL: test_masked_float_to_8_mask2:
433433 ; CHECK: # %bb.0:
434 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
434 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
435435 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
436436 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm1 {%k1}
437437 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
446446 define <8 x float> @test_masked_z_float_to_8_mask2(float %s, <8 x float> %mask) {
447447 ; CHECK-LABEL: test_masked_z_float_to_8_mask2:
448448 ; CHECK: # %bb.0:
449 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
449 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
450450 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
451451 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 {%k1} {z}
452452 ; CHECK-NEXT: retq
459459 define <8 x float> @test_masked_float_to_8_mask3(float %s, <8 x float> %default, <8 x float> %mask) {
460460 ; CHECK-LABEL: test_masked_float_to_8_mask3:
461461 ; CHECK: # %bb.0:
462 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
462 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
463463 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
464464 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm1 {%k1}
465465 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
474474 define <8 x float> @test_masked_z_float_to_8_mask3(float %s, <8 x float> %mask) {
475475 ; CHECK-LABEL: test_masked_z_float_to_8_mask3:
476476 ; CHECK: # %bb.0:
477 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
477 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
478478 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
479479 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 {%k1} {z}
480480 ; CHECK-NEXT: retq
496496 define <16 x float> @test_masked_float_to_16_mask0(float %s, <16 x float> %default, <16 x float> %mask) {
497497 ; CHECK-LABEL: test_masked_float_to_16_mask0:
498498 ; CHECK: # %bb.0:
499 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
499 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
500500 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
501501 ; CHECK-NEXT: vbroadcastss %xmm0, %zmm1 {%k1}
502502 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
511511 define <16 x float> @test_masked_z_float_to_16_mask0(float %s, <16 x float> %mask) {
512512 ; CHECK-LABEL: test_masked_z_float_to_16_mask0:
513513 ; CHECK: # %bb.0:
514 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
514 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
515515 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
516516 ; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z}
517517 ; CHECK-NEXT: retq
524524 define <16 x float> @test_masked_float_to_16_mask1(float %s, <16 x float> %default, <16 x float> %mask) {
525525 ; CHECK-LABEL: test_masked_float_to_16_mask1:
526526 ; CHECK: # %bb.0:
527 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
527 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
528528 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
529529 ; CHECK-NEXT: vbroadcastss %xmm0, %zmm1 {%k1}
530530 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
539539 define <16 x float> @test_masked_z_float_to_16_mask1(float %s, <16 x float> %mask) {
540540 ; CHECK-LABEL: test_masked_z_float_to_16_mask1:
541541 ; CHECK: # %bb.0:
542 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
542 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
543543 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
544544 ; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z}
545545 ; CHECK-NEXT: retq
552552 define <16 x float> @test_masked_float_to_16_mask2(float %s, <16 x float> %default, <16 x float> %mask) {
553553 ; CHECK-LABEL: test_masked_float_to_16_mask2:
554554 ; CHECK: # %bb.0:
555 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
555 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
556556 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
557557 ; CHECK-NEXT: vbroadcastss %xmm0, %zmm1 {%k1}
558558 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
567567 define <16 x float> @test_masked_z_float_to_16_mask2(float %s, <16 x float> %mask) {
568568 ; CHECK-LABEL: test_masked_z_float_to_16_mask2:
569569 ; CHECK: # %bb.0:
570 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
570 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
571571 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
572572 ; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z}
573573 ; CHECK-NEXT: retq
580580 define <16 x float> @test_masked_float_to_16_mask3(float %s, <16 x float> %default, <16 x float> %mask) {
581581 ; CHECK-LABEL: test_masked_float_to_16_mask3:
582582 ; CHECK: # %bb.0:
583 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
583 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
584584 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
585585 ; CHECK-NEXT: vbroadcastss %xmm0, %zmm1 {%k1}
586586 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
595595 define <16 x float> @test_masked_z_float_to_16_mask3(float %s, <16 x float> %mask) {
596596 ; CHECK-LABEL: test_masked_z_float_to_16_mask3:
597597 ; CHECK: # %bb.0:
598 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
598 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
599599 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
600600 ; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z}
601601 ; CHECK-NEXT: retq
618618 define <4 x double> @test_masked_double_to_4_mem_mask0(double* %p, <4 x double> %default, <4 x double> %mask) {
619619 ; CHECK-LABEL: test_masked_double_to_4_mem_mask0:
620620 ; CHECK: # %bb.0:
621 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
621 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
622622 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
623623 ; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1}
624624 ; CHECK-NEXT: retq
633633 define <4 x double> @test_masked_z_double_to_4_mem_mask0(double* %p, <4 x double> %mask) {
634634 ; CHECK-LABEL: test_masked_z_double_to_4_mem_mask0:
635635 ; CHECK: # %bb.0:
636 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
636 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
637637 ; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1
638638 ; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1} {z}
639639 ; CHECK-NEXT: retq
647647 define <4 x double> @test_masked_double_to_4_mem_mask1(double* %p, <4 x double> %default, <4 x double> %mask) {
648648 ; CHECK-LABEL: test_masked_double_to_4_mem_mask1:
649649 ; CHECK: # %bb.0:
650 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
650 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
651651 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
652652 ; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1}
653653 ; CHECK-NEXT: retq
662662 define <4 x double> @test_masked_z_double_to_4_mem_mask1(double* %p, <4 x double> %mask) {
663663 ; CHECK-LABEL: test_masked_z_double_to_4_mem_mask1:
664664 ; CHECK: # %bb.0:
665 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
665 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
666666 ; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1
667667 ; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1} {z}
668668 ; CHECK-NEXT: retq
676676 define <4 x double> @test_masked_double_to_4_mem_mask2(double* %p, <4 x double> %default, <4 x double> %mask) {
677677 ; CHECK-LABEL: test_masked_double_to_4_mem_mask2:
678678 ; CHECK: # %bb.0:
679 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
679 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
680680 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
681681 ; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1}
682682 ; CHECK-NEXT: retq
691691 define <4 x double> @test_masked_z_double_to_4_mem_mask2(double* %p, <4 x double> %mask) {
692692 ; CHECK-LABEL: test_masked_z_double_to_4_mem_mask2:
693693 ; CHECK: # %bb.0:
694 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
694 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
695695 ; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1
696696 ; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1} {z}
697697 ; CHECK-NEXT: retq
705705 define <4 x double> @test_masked_double_to_4_mem_mask3(double* %p, <4 x double> %default, <4 x double> %mask) {
706706 ; CHECK-LABEL: test_masked_double_to_4_mem_mask3:
707707 ; CHECK: # %bb.0:
708 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
708 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
709709 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
710710 ; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1}
711711 ; CHECK-NEXT: retq
720720 define <4 x double> @test_masked_z_double_to_4_mem_mask3(double* %p, <4 x double> %mask) {
721721 ; CHECK-LABEL: test_masked_z_double_to_4_mem_mask3:
722722 ; CHECK: # %bb.0:
723 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
723 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
724724 ; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1
725725 ; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1} {z}
726726 ; CHECK-NEXT: retq
744744 define <8 x double> @test_masked_double_to_8_mem_mask0(double* %p, <8 x double> %default, <8 x double> %mask) {
745745 ; CHECK-LABEL: test_masked_double_to_8_mem_mask0:
746746 ; CHECK: # %bb.0:
747 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
747 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
748748 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
749749 ; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1}
750750 ; CHECK-NEXT: retq
759759 define <8 x double> @test_masked_z_double_to_8_mem_mask0(double* %p, <8 x double> %mask) {
760760 ; CHECK-LABEL: test_masked_z_double_to_8_mem_mask0:
761761 ; CHECK: # %bb.0:
762 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
762 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
763763 ; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
764764 ; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z}
765765 ; CHECK-NEXT: retq
773773 define <8 x double> @test_masked_double_to_8_mem_mask1(double* %p, <8 x double> %default, <8 x double> %mask) {
774774 ; CHECK-LABEL: test_masked_double_to_8_mem_mask1:
775775 ; CHECK: # %bb.0:
776 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
776 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
777777 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
778778 ; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1}
779779 ; CHECK-NEXT: retq
788788 define <8 x double> @test_masked_z_double_to_8_mem_mask1(double* %p, <8 x double> %mask) {
789789 ; CHECK-LABEL: test_masked_z_double_to_8_mem_mask1:
790790 ; CHECK: # %bb.0:
791 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
791 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
792792 ; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
793793 ; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z}
794794 ; CHECK-NEXT: retq
802802 define <8 x double> @test_masked_double_to_8_mem_mask2(double* %p, <8 x double> %default, <8 x double> %mask) {
803803 ; CHECK-LABEL: test_masked_double_to_8_mem_mask2:
804804 ; CHECK: # %bb.0:
805 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
805 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
806806 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
807807 ; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1}
808808 ; CHECK-NEXT: retq
817817 define <8 x double> @test_masked_z_double_to_8_mem_mask2(double* %p, <8 x double> %mask) {
818818 ; CHECK-LABEL: test_masked_z_double_to_8_mem_mask2:
819819 ; CHECK: # %bb.0:
820 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
820 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
821821 ; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
822822 ; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z}
823823 ; CHECK-NEXT: retq
831831 define <8 x double> @test_masked_double_to_8_mem_mask3(double* %p, <8 x double> %default, <8 x double> %mask) {
832832 ; CHECK-LABEL: test_masked_double_to_8_mem_mask3:
833833 ; CHECK: # %bb.0:
834 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
834 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
835835 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
836836 ; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1}
837837 ; CHECK-NEXT: retq
846846 define <8 x double> @test_masked_z_double_to_8_mem_mask3(double* %p, <8 x double> %mask) {
847847 ; CHECK-LABEL: test_masked_z_double_to_8_mem_mask3:
848848 ; CHECK: # %bb.0:
849 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
849 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
850850 ; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
851851 ; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z}
852852 ; CHECK-NEXT: retq
870870 define <4 x float> @test_masked_float_to_4_mem_mask0(float* %p, <4 x float> %default, <4 x float> %mask) {
871871 ; CHECK-LABEL: test_masked_float_to_4_mem_mask0:
872872 ; CHECK: # %bb.0:
873 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
873 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
874874 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
875875 ; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1}
876876 ; CHECK-NEXT: retq
885885 define <4 x float> @test_masked_z_float_to_4_mem_mask0(float* %p, <4 x float> %mask) {
886886 ; CHECK-LABEL: test_masked_z_float_to_4_mem_mask0:
887887 ; CHECK: # %bb.0:
888 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
888 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
889889 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
890890 ; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1} {z}
891891 ; CHECK-NEXT: retq
899899 define <4 x float> @test_masked_float_to_4_mem_mask1(float* %p, <4 x float> %default, <4 x float> %mask) {
900900 ; CHECK-LABEL: test_masked_float_to_4_mem_mask1:
901901 ; CHECK: # %bb.0:
902 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
902 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
903903 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
904904 ; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1}
905905 ; CHECK-NEXT: retq
914914 define <4 x float> @test_masked_z_float_to_4_mem_mask1(float* %p, <4 x float> %mask) {
915915 ; CHECK-LABEL: test_masked_z_float_to_4_mem_mask1:
916916 ; CHECK: # %bb.0:
917 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
917 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
918918 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
919919 ; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1} {z}
920920 ; CHECK-NEXT: retq
928928 define <4 x float> @test_masked_float_to_4_mem_mask2(float* %p, <4 x float> %default, <4 x float> %mask) {
929929 ; CHECK-LABEL: test_masked_float_to_4_mem_mask2:
930930 ; CHECK: # %bb.0:
931 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
931 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
932932 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
933933 ; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1}
934934 ; CHECK-NEXT: retq
943943 define <4 x float> @test_masked_z_float_to_4_mem_mask2(float* %p, <4 x float> %mask) {
944944 ; CHECK-LABEL: test_masked_z_float_to_4_mem_mask2:
945945 ; CHECK: # %bb.0:
946 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
946 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
947947 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
948948 ; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1} {z}
949949 ; CHECK-NEXT: retq
957957 define <4 x float> @test_masked_float_to_4_mem_mask3(float* %p, <4 x float> %default, <4 x float> %mask) {
958958 ; CHECK-LABEL: test_masked_float_to_4_mem_mask3:
959959 ; CHECK: # %bb.0:
960 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
960 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
961961 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
962962 ; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1}
963963 ; CHECK-NEXT: retq
972972 define <4 x float> @test_masked_z_float_to_4_mem_mask3(float* %p, <4 x float> %mask) {
973973 ; CHECK-LABEL: test_masked_z_float_to_4_mem_mask3:
974974 ; CHECK: # %bb.0:
975 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
975 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
976976 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
977977 ; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1} {z}
978978 ; CHECK-NEXT: retq
996996 define <8 x float> @test_masked_float_to_8_mem_mask0(float* %p, <8 x float> %default, <8 x float> %mask) {
997997 ; CHECK-LABEL: test_masked_float_to_8_mem_mask0:
998998 ; CHECK: # %bb.0:
999 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
999 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
10001000 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
10011001 ; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1}
10021002 ; CHECK-NEXT: retq
10111011 define <8 x float> @test_masked_z_float_to_8_mem_mask0(float* %p, <8 x float> %mask) {
10121012 ; CHECK-LABEL: test_masked_z_float_to_8_mem_mask0:
10131013 ; CHECK: # %bb.0:
1014 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
1014 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
10151015 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
10161016 ; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1} {z}
10171017 ; CHECK-NEXT: retq
10251025 define <8 x float> @test_masked_float_to_8_mem_mask1(float* %p, <8 x float> %default, <8 x float> %mask) {
10261026 ; CHECK-LABEL: test_masked_float_to_8_mem_mask1:
10271027 ; CHECK: # %bb.0:
1028 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
1028 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
10291029 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
10301030 ; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1}
10311031 ; CHECK-NEXT: retq
10401040 define <8 x float> @test_masked_z_float_to_8_mem_mask1(float* %p, <8 x float> %mask) {
10411041 ; CHECK-LABEL: test_masked_z_float_to_8_mem_mask1:
10421042 ; CHECK: # %bb.0:
1043 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
1043 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
10441044 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
10451045 ; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1} {z}
10461046 ; CHECK-NEXT: retq
10541054 define <8 x float> @test_masked_float_to_8_mem_mask2(float* %p, <8 x float> %default, <8 x float> %mask) {
10551055 ; CHECK-LABEL: test_masked_float_to_8_mem_mask2:
10561056 ; CHECK: # %bb.0:
1057 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
1057 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
10581058 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
10591059 ; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1}
10601060 ; CHECK-NEXT: retq
10691069 define <8 x float> @test_masked_z_float_to_8_mem_mask2(float* %p, <8 x float> %mask) {
10701070 ; CHECK-LABEL: test_masked_z_float_to_8_mem_mask2:
10711071 ; CHECK: # %bb.0:
1072 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
1072 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
10731073 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
10741074 ; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1} {z}
10751075 ; CHECK-NEXT: retq
10831083 define <8 x float> @test_masked_float_to_8_mem_mask3(float* %p, <8 x float> %default, <8 x float> %mask) {
10841084 ; CHECK-LABEL: test_masked_float_to_8_mem_mask3:
10851085 ; CHECK: # %bb.0:
1086 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
1086 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
10871087 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
10881088 ; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1}
10891089 ; CHECK-NEXT: retq
10981098 define <8 x float> @test_masked_z_float_to_8_mem_mask3(float* %p, <8 x float> %mask) {
10991099 ; CHECK-LABEL: test_masked_z_float_to_8_mem_mask3:
11001100 ; CHECK: # %bb.0:
1101 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
1101 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
11021102 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
11031103 ; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1} {z}
11041104 ; CHECK-NEXT: retq
11221122 define <16 x float> @test_masked_float_to_16_mem_mask0(float* %p, <16 x float> %default, <16 x float> %mask) {
11231123 ; CHECK-LABEL: test_masked_float_to_16_mem_mask0:
11241124 ; CHECK: # %bb.0:
1125 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
1125 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
11261126 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
11271127 ; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1}
11281128 ; CHECK-NEXT: retq
11371137 define <16 x float> @test_masked_z_float_to_16_mem_mask0(float* %p, <16 x float> %mask) {
11381138 ; CHECK-LABEL: test_masked_z_float_to_16_mem_mask0:
11391139 ; CHECK: # %bb.0:
1140 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
1140 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
11411141 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
11421142 ; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z}
11431143 ; CHECK-NEXT: retq
11511151 define <16 x float> @test_masked_float_to_16_mem_mask1(float* %p, <16 x float> %default, <16 x float> %mask) {
11521152 ; CHECK-LABEL: test_masked_float_to_16_mem_mask1:
11531153 ; CHECK: # %bb.0:
1154 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
1154 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
11551155 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
11561156 ; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1}
11571157 ; CHECK-NEXT: retq
11661166 define <16 x float> @test_masked_z_float_to_16_mem_mask1(float* %p, <16 x float> %mask) {
11671167 ; CHECK-LABEL: test_masked_z_float_to_16_mem_mask1:
11681168 ; CHECK: # %bb.0:
1169 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
1169 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
11701170 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
11711171 ; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z}
11721172 ; CHECK-NEXT: retq
11801180 define <16 x float> @test_masked_float_to_16_mem_mask2(float* %p, <16 x float> %default, <16 x float> %mask) {
11811181 ; CHECK-LABEL: test_masked_float_to_16_mem_mask2:
11821182 ; CHECK: # %bb.0:
1183 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
1183 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
11841184 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
11851185 ; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1}
11861186 ; CHECK-NEXT: retq
11951195 define <16 x float> @test_masked_z_float_to_16_mem_mask2(float* %p, <16 x float> %mask) {
11961196 ; CHECK-LABEL: test_masked_z_float_to_16_mem_mask2:
11971197 ; CHECK: # %bb.0:
1198 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
1198 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
11991199 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
12001200 ; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z}
12011201 ; CHECK-NEXT: retq
12091209 define <16 x float> @test_masked_float_to_16_mem_mask3(float* %p, <16 x float> %default, <16 x float> %mask) {
12101210 ; CHECK-LABEL: test_masked_float_to_16_mem_mask3:
12111211 ; CHECK: # %bb.0:
1212 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
1212 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
12131213 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
12141214 ; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1}
12151215 ; CHECK-NEXT: retq
12241224 define <16 x float> @test_masked_z_float_to_16_mem_mask3(float* %p, <16 x float> %mask) {
12251225 ; CHECK-LABEL: test_masked_z_float_to_16_mem_mask3:
12261226 ; CHECK: # %bb.0:
1227 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
1227 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
12281228 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
12291229 ; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z}
12301230 ; CHECK-NEXT: retq
1111 define <4 x float> @test_masked_4xfloat_dup_high_mask0(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
1212 ; CHECK-LABEL: test_masked_4xfloat_dup_high_mask0:
1313 ; CHECK: # %bb.0:
14 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
14 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
1515 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
1616 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm1 {%k1} = xmm0[1,1,3,3]
1717 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
2525 define <4 x float> @test_masked_z_4xfloat_dup_high_mask0(<4 x float> %vec, <4 x float> %mask) {
2626 ; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mask0:
2727 ; CHECK: # %bb.0:
28 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
28 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
2929 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
3030 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,3,3]
3131 ; CHECK-NEXT: retq
3737 define <4 x float> @test_masked_4xfloat_dup_high_mask1(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
3838 ; CHECK-LABEL: test_masked_4xfloat_dup_high_mask1:
3939 ; CHECK: # %bb.0:
40 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
40 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
4141 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
4242 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm1 {%k1} = xmm0[1,1,3,3]
4343 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
5151 define <4 x float> @test_masked_z_4xfloat_dup_high_mask1(<4 x float> %vec, <4 x float> %mask) {
5252 ; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mask1:
5353 ; CHECK: # %bb.0:
54 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
54 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
5555 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
5656 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,3,3]
5757 ; CHECK-NEXT: retq
6363 define <4 x float> @test_masked_4xfloat_dup_high_mask2(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
6464 ; CHECK-LABEL: test_masked_4xfloat_dup_high_mask2:
6565 ; CHECK: # %bb.0:
66 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
66 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
6767 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
6868 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm1 {%k1} = xmm0[1,1,3,3]
6969 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
7777 define <4 x float> @test_masked_z_4xfloat_dup_high_mask2(<4 x float> %vec, <4 x float> %mask) {
7878 ; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mask2:
7979 ; CHECK: # %bb.0:
80 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
80 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
8181 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
8282 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,3,3]
8383 ; CHECK-NEXT: retq
8989 define <4 x float> @test_masked_4xfloat_dup_high_mask3(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
9090 ; CHECK-LABEL: test_masked_4xfloat_dup_high_mask3:
9191 ; CHECK: # %bb.0:
92 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
92 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
9393 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
9494 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm1 {%k1} = xmm0[1,1,3,3]
9595 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
103103 define <4 x float> @test_masked_z_4xfloat_dup_high_mask3(<4 x float> %vec, <4 x float> %mask) {
104104 ; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mask3:
105105 ; CHECK: # %bb.0:
106 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
106 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
107107 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
108108 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,3,3]
109109 ; CHECK-NEXT: retq
115115 define <4 x float> @test_masked_4xfloat_dup_high_mask4(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
116116 ; CHECK-LABEL: test_masked_4xfloat_dup_high_mask4:
117117 ; CHECK: # %bb.0:
118 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
118 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
119119 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
120120 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm1 {%k1} = xmm0[1,1,3,3]
121121 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
129129 define <4 x float> @test_masked_z_4xfloat_dup_high_mask4(<4 x float> %vec, <4 x float> %mask) {
130130 ; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mask4:
131131 ; CHECK: # %bb.0:
132 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
132 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
133133 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
134134 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,3,3]
135135 ; CHECK-NEXT: retq
150150 define <4 x float> @test_masked_4xfloat_dup_high_mem_mask0(<4 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) {
151151 ; CHECK-LABEL: test_masked_4xfloat_dup_high_mem_mask0:
152152 ; CHECK: # %bb.0:
153 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
153 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
154154 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
155155 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} = mem[1,1,3,3]
156156 ; CHECK-NEXT: retq
164164 define <4 x float> @test_masked_z_4xfloat_dup_high_mem_mask0(<4 x float>* %vp, <4 x float> %mask) {
165165 ; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mem_mask0:
166166 ; CHECK: # %bb.0:
167 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
167 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
168168 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
169169 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = mem[1,1,3,3]
170170 ; CHECK-NEXT: retq
177177 define <4 x float> @test_masked_4xfloat_dup_high_mem_mask1(<4 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) {
178178 ; CHECK-LABEL: test_masked_4xfloat_dup_high_mem_mask1:
179179 ; CHECK: # %bb.0:
180 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
180 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
181181 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
182182 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} = mem[1,1,3,3]
183183 ; CHECK-NEXT: retq
191191 define <4 x float> @test_masked_z_4xfloat_dup_high_mem_mask1(<4 x float>* %vp, <4 x float> %mask) {
192192 ; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mem_mask1:
193193 ; CHECK: # %bb.0:
194 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
194 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
195195 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
196196 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = mem[1,1,3,3]
197197 ; CHECK-NEXT: retq
204204 define <4 x float> @test_masked_4xfloat_dup_high_mem_mask2(<4 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) {
205205 ; CHECK-LABEL: test_masked_4xfloat_dup_high_mem_mask2:
206206 ; CHECK: # %bb.0:
207 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
207 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
208208 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
209209 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} = mem[1,1,3,3]
210210 ; CHECK-NEXT: retq
218218 define <4 x float> @test_masked_z_4xfloat_dup_high_mem_mask2(<4 x float>* %vp, <4 x float> %mask) {
219219 ; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mem_mask2:
220220 ; CHECK: # %bb.0:
221 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
221 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
222222 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
223223 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = mem[1,1,3,3]
224224 ; CHECK-NEXT: retq
231231 define <4 x float> @test_masked_4xfloat_dup_high_mem_mask3(<4 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) {
232232 ; CHECK-LABEL: test_masked_4xfloat_dup_high_mem_mask3:
233233 ; CHECK: # %bb.0:
234 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
234 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
235235 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
236236 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} = mem[1,1,3,3]
237237 ; CHECK-NEXT: retq
245245 define <4 x float> @test_masked_z_4xfloat_dup_high_mem_mask3(<4 x float>* %vp, <4 x float> %mask) {
246246 ; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mem_mask3:
247247 ; CHECK: # %bb.0:
248 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
248 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
249249 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
250250 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = mem[1,1,3,3]
251251 ; CHECK-NEXT: retq
258258 define <4 x float> @test_masked_4xfloat_dup_high_mem_mask4(<4 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) {
259259 ; CHECK-LABEL: test_masked_4xfloat_dup_high_mem_mask4:
260260 ; CHECK: # %bb.0:
261 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
261 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
262262 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
263263 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} = mem[1,1,3,3]
264264 ; CHECK-NEXT: retq
272272 define <4 x float> @test_masked_z_4xfloat_dup_high_mem_mask4(<4 x float>* %vp, <4 x float> %mask) {
273273 ; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mem_mask4:
274274 ; CHECK: # %bb.0:
275 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
275 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
276276 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
277277 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = mem[1,1,3,3]
278278 ; CHECK-NEXT: retq
293293 define <8 x float> @test_masked_8xfloat_dup_high_mask0(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
294294 ; CHECK-LABEL: test_masked_8xfloat_dup_high_mask0:
295295 ; CHECK: # %bb.0:
296 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
296 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
297297 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
298298 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm1 {%k1} = ymm0[1,1,3,3,5,5,7,7]
299299 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
307307 define <8 x float> @test_masked_z_8xfloat_dup_high_mask0(<8 x float> %vec, <8 x float> %mask) {
308308 ; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mask0:
309309 ; CHECK: # %bb.0:
310 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
310 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
311311 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
312312 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7]
313313 ; CHECK-NEXT: retq
319319 define <8 x float> @test_masked_8xfloat_dup_high_mask1(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
320320 ; CHECK-LABEL: test_masked_8xfloat_dup_high_mask1:
321321 ; CHECK: # %bb.0:
322 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
322 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
323323 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
324324 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm1 {%k1} = ymm0[1,1,3,3,5,5,7,7]
325325 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
333333 define <8 x float> @test_masked_z_8xfloat_dup_high_mask1(<8 x float> %vec, <8 x float> %mask) {
334334 ; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mask1:
335335 ; CHECK: # %bb.0:
336 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
336 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
337337 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
338338 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7]
339339 ; CHECK-NEXT: retq
345345 define <8 x float> @test_masked_8xfloat_dup_high_mask2(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
346346 ; CHECK-LABEL: test_masked_8xfloat_dup_high_mask2:
347347 ; CHECK: # %bb.0:
348 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
348 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
349349 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
350350 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm1 {%k1} = ymm0[1,1,3,3,5,5,7,7]
351351 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
359359 define <8 x float> @test_masked_z_8xfloat_dup_high_mask2(<8 x float> %vec, <8 x float> %mask) {
360360 ; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mask2:
361361 ; CHECK: # %bb.0:
362 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
362 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
363363 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
364364 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7]
365365 ; CHECK-NEXT: retq
371371 define <8 x float> @test_masked_8xfloat_dup_high_mask3(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
372372 ; CHECK-LABEL: test_masked_8xfloat_dup_high_mask3:
373373 ; CHECK: # %bb.0:
374 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
374 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
375375 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
376376 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm1 {%k1} = ymm0[1,1,3,3,5,5,7,7]
377377 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
385385 define <8 x float> @test_masked_z_8xfloat_dup_high_mask3(<8 x float> %vec, <8 x float> %mask) {
386386 ; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mask3:
387387 ; CHECK: # %bb.0:
388 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
388 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
389389 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
390390 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7]
391391 ; CHECK-NEXT: retq
397397 define <8 x float> @test_masked_8xfloat_dup_high_mask4(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
398398 ; CHECK-LABEL: test_masked_8xfloat_dup_high_mask4:
399399 ; CHECK: # %bb.0:
400 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
400 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
401401 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
402402 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm1 {%k1} = ymm0[1,1,3,3,5,5,7,7]
403403 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
411411 define <8 x float> @test_masked_z_8xfloat_dup_high_mask4(<8 x float> %vec, <8 x float> %mask) {
412412 ; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mask4:
413413 ; CHECK: # %bb.0:
414 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
414 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
415415 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
416416 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7]
417417 ; CHECK-NEXT: retq
432432 define <8 x float> @test_masked_8xfloat_dup_high_mem_mask0(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) {
433433 ; CHECK-LABEL: test_masked_8xfloat_dup_high_mem_mask0:
434434 ; CHECK: # %bb.0:
435 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
435 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
436436 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
437437 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} = mem[1,1,3,3,5,5,7,7]
438438 ; CHECK-NEXT: retq
446446 define <8 x float> @test_masked_z_8xfloat_dup_high_mem_mask0(<8 x float>* %vp, <8 x float> %mask) {
447447 ; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mem_mask0:
448448 ; CHECK: # %bb.0:
449 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
449 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
450450 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
451451 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7]
452452 ; CHECK-NEXT: retq
459459 define <8 x float> @test_masked_8xfloat_dup_high_mem_mask1(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) {
460460 ; CHECK-LABEL: test_masked_8xfloat_dup_high_mem_mask1:
461461 ; CHECK: # %bb.0:
462 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
462 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
463463 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
464464 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} = mem[1,1,3,3,5,5,7,7]
465465 ; CHECK-NEXT: retq
473473 define <8 x float> @test_masked_z_8xfloat_dup_high_mem_mask1(<8 x float>* %vp, <8 x float> %mask) {
474474 ; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mem_mask1:
475475 ; CHECK: # %bb.0:
476 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
476 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
477477 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
478478 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7]
479479 ; CHECK-NEXT: retq
486486 define <8 x float> @test_masked_8xfloat_dup_high_mem_mask2(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) {
487487 ; CHECK-LABEL: test_masked_8xfloat_dup_high_mem_mask2:
488488 ; CHECK: # %bb.0:
489 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
489 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
490490 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
491491 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} = mem[1,1,3,3,5,5,7,7]
492492 ; CHECK-NEXT: retq
500500 define <8 x float> @test_masked_z_8xfloat_dup_high_mem_mask2(<8 x float>* %vp, <8 x float> %mask) {
501501 ; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mem_mask2:
502502 ; CHECK: # %bb.0:
503 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
503 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
504504 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
505505 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7]
506506 ; CHECK-NEXT: retq
513513 define <8 x float> @test_masked_8xfloat_dup_high_mem_mask3(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) {
514514 ; CHECK-LABEL: test_masked_8xfloat_dup_high_mem_mask3:
515515 ; CHECK: # %bb.0:
516 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
516 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
517517 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
518518 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} = mem[1,1,3,3,5,5,7,7]
519519 ; CHECK-NEXT: retq
527527 define <8 x float> @test_masked_z_8xfloat_dup_high_mem_mask3(<8 x float>* %vp, <8 x float> %mask) {
528528 ; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mem_mask3:
529529 ; CHECK: # %bb.0:
530 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
530 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
531531 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
532532 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7]
533533 ; CHECK-NEXT: retq
540540 define <8 x float> @test_masked_8xfloat_dup_high_mem_mask4(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) {
541541 ; CHECK-LABEL: test_masked_8xfloat_dup_high_mem_mask4:
542542 ; CHECK: # %bb.0:
543 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
543 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
544544 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
545545 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} = mem[1,1,3,3,5,5,7,7]
546546 ; CHECK-NEXT: retq
554554 define <8 x float> @test_masked_z_8xfloat_dup_high_mem_mask4(<8 x float>* %vp, <8 x float> %mask) {
555555 ; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mem_mask4:
556556 ; CHECK: # %bb.0:
557 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
557 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
558558 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
559559 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7]
560560 ; CHECK-NEXT: retq
575575 define <16 x float> @test_masked_16xfloat_dup_high_mask0(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
576576 ; CHECK-LABEL: test_masked_16xfloat_dup_high_mask0:
577577 ; CHECK: # %bb.0:
578 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
578 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
579579 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
580580 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm1 {%k1} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
581581 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
589589 define <16 x float> @test_masked_z_16xfloat_dup_high_mask0(<16 x float> %vec, <16 x float> %mask) {
590590 ; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mask0:
591591 ; CHECK: # %bb.0:
592 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
592 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
593593 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
594594 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
595595 ; CHECK-NEXT: retq
601601 define <16 x float> @test_masked_16xfloat_dup_high_mask1(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
602602 ; CHECK-LABEL: test_masked_16xfloat_dup_high_mask1:
603603 ; CHECK: # %bb.0:
604 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
604 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
605605 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
606606 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm1 {%k1} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
607607 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
615615 define <16 x float> @test_masked_z_16xfloat_dup_high_mask1(<16 x float> %vec, <16 x float> %mask) {
616616 ; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mask1:
617617 ; CHECK: # %bb.0:
618 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
618 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
619619 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
620620 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
621621 ; CHECK-NEXT: retq
627627 define <16 x float> @test_masked_16xfloat_dup_high_mask2(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
628628 ; CHECK-LABEL: test_masked_16xfloat_dup_high_mask2:
629629 ; CHECK: # %bb.0:
630 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
630 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
631631 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
632632 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm1 {%k1} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
633633 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
641641 define <16 x float> @test_masked_z_16xfloat_dup_high_mask2(<16 x float> %vec, <16 x float> %mask) {
642642 ; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mask2:
643643 ; CHECK: # %bb.0:
644 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
644 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
645645 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
646646 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
647647 ; CHECK-NEXT: retq
653653 define <16 x float> @test_masked_16xfloat_dup_high_mask3(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
654654 ; CHECK-LABEL: test_masked_16xfloat_dup_high_mask3:
655655 ; CHECK: # %bb.0:
656 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
656 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
657657 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
658658 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm1 {%k1} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
659659 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
667667 define <16 x float> @test_masked_z_16xfloat_dup_high_mask3(<16 x float> %vec, <16 x float> %mask) {
668668 ; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mask3:
669669 ; CHECK: # %bb.0:
670 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
670 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
671671 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
672672 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
673673 ; CHECK-NEXT: retq
679679 define <16 x float> @test_masked_16xfloat_dup_high_mask4(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
680680 ; CHECK-LABEL: test_masked_16xfloat_dup_high_mask4:
681681 ; CHECK: # %bb.0:
682 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
682 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
683683 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
684684 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm1 {%k1} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
685685 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
693693 define <16 x float> @test_masked_z_16xfloat_dup_high_mask4(<16 x float> %vec, <16 x float> %mask) {
694694 ; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mask4:
695695 ; CHECK: # %bb.0:
696 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
696 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
697697 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
698698 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
699699 ; CHECK-NEXT: retq
714714 define <16 x float> @test_masked_16xfloat_dup_high_mem_mask0(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) {
715715 ; CHECK-LABEL: test_masked_16xfloat_dup_high_mem_mask0:
716716 ; CHECK: # %bb.0:
717 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
717 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
718718 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
719719 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
720720 ; CHECK-NEXT: retq
728728 define <16 x float> @test_masked_z_16xfloat_dup_high_mem_mask0(<16 x float>* %vp, <16 x float> %mask) {
729729 ; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mem_mask0:
730730 ; CHECK: # %bb.0:
731 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
731 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
732732 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
733733 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
734734 ; CHECK-NEXT: retq
741741 define <16 x float> @test_masked_16xfloat_dup_high_mem_mask1(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) {
742742 ; CHECK-LABEL: test_masked_16xfloat_dup_high_mem_mask1:
743743 ; CHECK: # %bb.0:
744 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
744 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
745745 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
746746 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
747747 ; CHECK-NEXT: retq
755755 define <16 x float> @test_masked_z_16xfloat_dup_high_mem_mask1(<16 x float>* %vp, <16 x float> %mask) {
756756 ; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mem_mask1:
757757 ; CHECK: # %bb.0:
758 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
758 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
759759 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
760760 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
761761 ; CHECK-NEXT: retq
768768 define <16 x float> @test_masked_16xfloat_dup_high_mem_mask2(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) {
769769 ; CHECK-LABEL: test_masked_16xfloat_dup_high_mem_mask2:
770770 ; CHECK: # %bb.0:
771 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
771 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
772772 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
773773 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
774774 ; CHECK-NEXT: retq
782782 define <16 x float> @test_masked_z_16xfloat_dup_high_mem_mask2(<16 x float>* %vp, <16 x float> %mask) {
783783 ; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mem_mask2:
784784 ; CHECK: # %bb.0:
785 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
785 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
786786 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
787787 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
788788 ; CHECK-NEXT: retq
795795 define <16 x float> @test_masked_16xfloat_dup_high_mem_mask3(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) {
796796 ; CHECK-LABEL: test_masked_16xfloat_dup_high_mem_mask3:
797797 ; CHECK: # %bb.0:
798 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
798 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
799799 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
800800 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
801801 ; CHECK-NEXT: retq
809809 define <16 x float> @test_masked_z_16xfloat_dup_high_mem_mask3(<16 x float>* %vp, <16 x float> %mask) {
810810 ; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mem_mask3:
811811 ; CHECK: # %bb.0:
812 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
812 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
813813 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
814814 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
815815 ; CHECK-NEXT: retq
822822 define <16 x float> @test_masked_16xfloat_dup_high_mem_mask4(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) {
823823 ; CHECK-LABEL: test_masked_16xfloat_dup_high_mem_mask4:
824824 ; CHECK: # %bb.0:
825 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
825 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
826826 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
827827 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
828828 ; CHECK-NEXT: retq
836836 define <16 x float> @test_masked_z_16xfloat_dup_high_mem_mask4(<16 x float>* %vp, <16 x float> %mask) {
837837 ; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mem_mask4:
838838 ; CHECK: # %bb.0:
839 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
839 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
840840 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
841841 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
842842 ; CHECK-NEXT: retq
1111 define <2 x double> @test_masked_2xdouble_dup_low_mask0(<2 x double> %vec, <2 x double> %vec2, <2 x double> %mask) {
1212 ; CHECK-LABEL: test_masked_2xdouble_dup_low_mask0:
1313 ; CHECK: # %bb.0:
14 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
14 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
1515 ; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1
1616 ; CHECK-NEXT: vmovddup {{.*#+}} xmm1 {%k1} = xmm0[0,0]
1717 ; CHECK-NEXT: vmovapd %xmm1, %xmm0
2525 define <2 x double> @test_masked_z_2xdouble_dup_low_mask0(<2 x double> %vec, <2 x double> %mask) {
2626 ; CHECK-LABEL: test_masked_z_2xdouble_dup_low_mask0:
2727 ; CHECK: # %bb.0:
28 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
28 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
2929 ; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1
3030 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0]
3131 ; CHECK-NEXT: retq
3737 define <2 x double> @test_masked_2xdouble_dup_low_mask1(<2 x double> %vec, <2 x double> %vec2, <2 x double> %mask) {
3838 ; CHECK-LABEL: test_masked_2xdouble_dup_low_mask1:
3939 ; CHECK: # %bb.0:
40 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
40 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
4141 ; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1
4242 ; CHECK-NEXT: vmovddup {{.*#+}} xmm1 {%k1} = xmm0[0,0]
4343 ; CHECK-NEXT: vmovapd %xmm1, %xmm0
5151 define <2 x double> @test_masked_z_2xdouble_dup_low_mask1(<2 x double> %vec, <2 x double> %mask) {
5252 ; CHECK-LABEL: test_masked_z_2xdouble_dup_low_mask1:
5353 ; CHECK: # %bb.0:
54 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
54 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
5555 ; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1
5656 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0]
5757 ; CHECK-NEXT: retq
7272 define <2 x double> @test_masked_2xdouble_dup_low_mem_mask0(<2 x double>* %vp, <2 x double> %vec2, <2 x double> %mask) {
7373 ; CHECK-LABEL: test_masked_2xdouble_dup_low_mem_mask0:
7474 ; CHECK: # %bb.0:
75 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
75 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
7676 ; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1
7777 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = mem[0,0]
7878 ; CHECK-NEXT: retq
8686 define <2 x double> @test_masked_z_2xdouble_dup_low_mem_mask0(<2 x double>* %vp, <2 x double> %mask) {
8787 ; CHECK-LABEL: test_masked_z_2xdouble_dup_low_mem_mask0:
8888 ; CHECK: # %bb.0:
89 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
89 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
9090 ; CHECK-NEXT: vcmpeqpd %xmm1, %xmm0, %k1
9191 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = mem[0,0]
9292 ; CHECK-NEXT: retq
9999 define <2 x double> @test_masked_2xdouble_dup_low_mem_mask1(<2 x double>* %vp, <2 x double> %vec2, <2 x double> %mask) {
100100 ; CHECK-LABEL: test_masked_2xdouble_dup_low_mem_mask1:
101101 ; CHECK: # %bb.0:
102 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
102 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
103103 ; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1
104104 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = mem[0,0]
105105 ; CHECK-NEXT: retq
113113 define <2 x double> @test_masked_z_2xdouble_dup_low_mem_mask1(<2 x double>* %vp, <2 x double> %mask) {
114114 ; CHECK-LABEL: test_masked_z_2xdouble_dup_low_mem_mask1:
115115 ; CHECK: # %bb.0:
116 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
116 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
117117 ; CHECK-NEXT: vcmpeqpd %xmm1, %xmm0, %k1
118118 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = mem[0,0]
119119 ; CHECK-NEXT: retq
134134 define <4 x double> @test_masked_4xdouble_dup_low_mask0(<4 x double> %vec, <4 x double> %vec2, <4 x double> %mask) {
135135 ; CHECK-LABEL: test_masked_4xdouble_dup_low_mask0:
136136 ; CHECK: # %bb.0:
137 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
137 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
138138 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
139139 ; CHECK-NEXT: vmovddup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2]
140140 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
148148 define <4 x double> @test_masked_z_4xdouble_dup_low_mask0(<4 x double> %vec, <4 x double> %mask) {
149149 ; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mask0:
150150 ; CHECK: # %bb.0:
151 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
151 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
152152 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
153153 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2]
154154 ; CHECK-NEXT: retq
160160 define <4 x double> @test_masked_4xdouble_dup_low_mask1(<4 x double> %vec, <4 x double> %vec2, <4 x double> %mask) {
161161 ; CHECK-LABEL: test_masked_4xdouble_dup_low_mask1:
162162 ; CHECK: # %bb.0:
163 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
163 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
164164 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
165165 ; CHECK-NEXT: vmovddup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2]
166166 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
174174 define <4 x double> @test_masked_z_4xdouble_dup_low_mask1(<4 x double> %vec, <4 x double> %mask) {
175175 ; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mask1:
176176 ; CHECK: # %bb.0:
177 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
177 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
178178 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
179179 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2]
180180 ; CHECK-NEXT: retq
186186 define <4 x double> @test_masked_4xdouble_dup_low_mask2(<4 x double> %vec, <4 x double> %vec2, <4 x double> %mask) {
187187 ; CHECK-LABEL: test_masked_4xdouble_dup_low_mask2:
188188 ; CHECK: # %bb.0:
189 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
189 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
190190 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
191191 ; CHECK-NEXT: vmovddup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2]
192192 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
200200 define <4 x double> @test_masked_z_4xdouble_dup_low_mask2(<4 x double> %vec, <4 x double> %mask) {
201201 ; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mask2:
202202 ; CHECK: # %bb.0:
203 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
203 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
204204 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
205205 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2]
206206 ; CHECK-NEXT: retq
212212 define <4 x double> @test_masked_4xdouble_dup_low_mask3(<4 x double> %vec, <4 x double> %vec2, <4 x double> %mask) {
213213 ; CHECK-LABEL: test_masked_4xdouble_dup_low_mask3:
214214 ; CHECK: # %bb.0:
215 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
215 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
216216 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
217217 ; CHECK-NEXT: vmovddup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2]
218218 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
226226 define <4 x double> @test_masked_z_4xdouble_dup_low_mask3(<4 x double> %vec, <4 x double> %mask) {
227227 ; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mask3:
228228 ; CHECK: # %bb.0:
229 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
229 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
230230 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
231231 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2]
232232 ; CHECK-NEXT: retq
238238 define <4 x double> @test_masked_4xdouble_dup_low_mask4(<4 x double> %vec, <4 x double> %vec2, <4 x double> %mask) {
239239 ; CHECK-LABEL: test_masked_4xdouble_dup_low_mask4:
240240 ; CHECK: # %bb.0:
241 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
241 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
242242 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
243243 ; CHECK-NEXT: vmovddup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2]
244244 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
252252 define <4 x double> @test_masked_z_4xdouble_dup_low_mask4(<4 x double> %vec, <4 x double> %mask) {
253253 ; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mask4:
254254 ; CHECK: # %bb.0:
255 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
255 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
256256 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
257257 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2]
258258 ; CHECK-NEXT: retq
273273 define <4 x double> @test_masked_4xdouble_dup_low_mem_mask0(<4 x double>* %vp, <4 x double> %vec2, <4 x double> %mask) {
274274 ; CHECK-LABEL: test_masked_4xdouble_dup_low_mem_mask0:
275275 ; CHECK: # %bb.0:
276 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
276 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
277277 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
278278 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2]
279279 ; CHECK-NEXT: retq
287287 define <4 x double> @test_masked_z_4xdouble_dup_low_mem_mask0(<4 x double>* %vp, <4 x double> %mask) {
288288 ; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mem_mask0:
289289 ; CHECK: # %bb.0:
290 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
290 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
291291 ; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1
292292 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2]
293293 ; CHECK-NEXT: retq
300300 define <4 x double> @test_masked_4xdouble_dup_low_mem_mask1(<4 x double>* %vp, <4 x double> %vec2, <4 x double> %mask) {
301301 ; CHECK-LABEL: test_masked_4xdouble_dup_low_mem_mask1:
302302 ; CHECK: # %bb.0:
303 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
303 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
304304 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
305305 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2]
306306 ; CHECK-NEXT: retq
314314 define <4 x double> @test_masked_z_4xdouble_dup_low_mem_mask1(<4 x double>* %vp, <4 x double> %mask) {
315315 ; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mem_mask1:
316316 ; CHECK: # %bb.0:
317 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
317 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
318318 ; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1
319319 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2]
320320 ; CHECK-NEXT: retq
327327 define <4 x double> @test_masked_4xdouble_dup_low_mem_mask2(<4 x double>* %vp, <4 x double> %vec2, <4 x double> %mask) {
328328 ; CHECK-LABEL: test_masked_4xdouble_dup_low_mem_mask2:
329329 ; CHECK: # %bb.0:
330 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
330 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
331331 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
332332 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2]
333333 ; CHECK-NEXT: retq
341341 define <4 x double> @test_masked_z_4xdouble_dup_low_mem_mask2(<4 x double>* %vp, <4 x double> %mask) {
342342 ; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mem_mask2:
343343 ; CHECK: # %bb.0:
344 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
344 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
345345 ; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1
346346 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2]
347347 ; CHECK-NEXT: retq
354354 define <4 x double> @test_masked_4xdouble_dup_low_mem_mask3(<4 x double>* %vp, <4 x double> %vec2, <4 x double> %mask) {
355355 ; CHECK-LABEL: test_masked_4xdouble_dup_low_mem_mask3:
356356 ; CHECK: # %bb.0:
357 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
357 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
358358 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
359359 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2]
360360 ; CHECK-NEXT: retq
368368 define <4 x double> @test_masked_z_4xdouble_dup_low_mem_mask3(<4 x double>* %vp, <4 x double> %mask) {
369369 ; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mem_mask3:
370370 ; CHECK: # %bb.0:
371 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
371 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
372372 ; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1
373373 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2]
374374 ; CHECK-NEXT: retq
381381 define <4 x double> @test_masked_4xdouble_dup_low_mem_mask4(<4 x double>* %vp, <4 x double> %vec2, <4 x double> %mask) {
382382 ; CHECK-LABEL: test_masked_4xdouble_dup_low_mem_mask4:
383383 ; CHECK: # %bb.0:
384 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
384 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
385385 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
386386 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2]
387387 ; CHECK-NEXT: retq
395395 define <4 x double> @test_masked_z_4xdouble_dup_low_mem_mask4(<4 x double>* %vp, <4 x double> %mask) {
396396 ; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mem_mask4:
397397 ; CHECK: # %bb.0:
398 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
398 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
399399 ; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1
400400 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2]
401401 ; CHECK-NEXT: retq
416416 define <8 x double> @test_masked_8xdouble_dup_low_mask0(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) {
417417 ; CHECK-LABEL: test_masked_8xdouble_dup_low_mask0:
418418 ; CHECK: # %bb.0:
419 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
419 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
420420 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
421421 ; CHECK-NEXT: vmovddup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6]
422422 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
430430 define <8 x double> @test_masked_z_8xdouble_dup_low_mask0(<8 x double> %vec, <8 x double> %mask) {
431431 ; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mask0:
432432 ; CHECK: # %bb.0:
433 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
433 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
434434 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
435435 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6]
436436 ; CHECK-NEXT: retq
442442 define <8 x double> @test_masked_8xdouble_dup_low_mask1(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) {
443443 ; CHECK-LABEL: test_masked_8xdouble_dup_low_mask1:
444444 ; CHECK: # %bb.0:
445 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
445 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
446446 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
447447 ; CHECK-NEXT: vmovddup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6]
448448 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
456456 define <8 x double> @test_masked_z_8xdouble_dup_low_mask1(<8 x double> %vec, <8 x double> %mask) {
457457 ; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mask1:
458458 ; CHECK: # %bb.0:
459 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
459 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
460460 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
461461 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6]
462462 ; CHECK-NEXT: retq
468468 define <8 x double> @test_masked_8xdouble_dup_low_mask2(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) {
469469 ; CHECK-LABEL: test_masked_8xdouble_dup_low_mask2:
470470 ; CHECK: # %bb.0:
471 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
471 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
472472 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
473473 ; CHECK-NEXT: vmovddup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6]
474474 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
482482 define <8 x double> @test_masked_z_8xdouble_dup_low_mask2(<8 x double> %vec, <8 x double> %mask) {
483483 ; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mask2:
484484 ; CHECK: # %bb.0:
485 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
485 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
486486 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
487487 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6]
488488 ; CHECK-NEXT: retq
494494 define <8 x double> @test_masked_8xdouble_dup_low_mask3(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) {
495495 ; CHECK-LABEL: test_masked_8xdouble_dup_low_mask3:
496496 ; CHECK: # %bb.0:
497 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
497 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
498498 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
499499 ; CHECK-NEXT: vmovddup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6]
500500 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
508508 define <8 x double> @test_masked_z_8xdouble_dup_low_mask3(<8 x double> %vec, <8 x double> %mask) {
509509 ; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mask3:
510510 ; CHECK: # %bb.0:
511 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
511 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
512512 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
513513 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6]
514514 ; CHECK-NEXT: retq
520520 define <8 x double> @test_masked_8xdouble_dup_low_mask4(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) {
521521 ; CHECK-LABEL: test_masked_8xdouble_dup_low_mask4:
522522 ; CHECK: # %bb.0:
523 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
523 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
524524 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
525525 ; CHECK-NEXT: vmovddup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6]
526526 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
534534 define <8 x double> @test_masked_z_8xdouble_dup_low_mask4(<8 x double> %vec, <8 x double> %mask) {
535535 ; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mask4:
536536 ; CHECK: # %bb.0:
537 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
537 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
538538 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
539539 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6]
540540 ; CHECK-NEXT: retq
555555 define <8 x double> @test_masked_8xdouble_dup_low_mem_mask0(<8 x double>* %vp, <8 x double> %vec2, <8 x double> %mask) {
556556 ; CHECK-LABEL: test_masked_8xdouble_dup_low_mem_mask0:
557557 ; CHECK: # %bb.0:
558 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
558 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
559559 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
560560 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6]
561561 ; CHECK-NEXT: retq
569569 define <8 x double> @test_masked_z_8xdouble_dup_low_mem_mask0(<8 x double>* %vp, <8 x double> %mask) {
570570 ; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mem_mask0:
571571 ; CHECK: # %bb.0:
572 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
572 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
573573 ; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
574574 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6]
575575 ; CHECK-NEXT: retq
582582 define <8 x double> @test_masked_8xdouble_dup_low_mem_mask1(<8 x double>* %vp, <8 x double> %vec2, <8 x double> %mask) {
583583 ; CHECK-LABEL: test_masked_8xdouble_dup_low_mem_mask1:
584584 ; CHECK: # %bb.0:
585 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
585 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
586586 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
587587 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6]
588588 ; CHECK-NEXT: retq
596596 define <8 x double> @test_masked_z_8xdouble_dup_low_mem_mask1(<8 x double>* %vp, <8 x double> %mask) {
597597 ; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mem_mask1:
598598 ; CHECK: # %bb.0:
599 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
599 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
600600 ; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
601601 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6]
602602 ; CHECK-NEXT: retq
609609 define <8 x double> @test_masked_8xdouble_dup_low_mem_mask2(<8 x double>* %vp, <8 x double> %vec2, <8 x double> %mask) {
610610 ; CHECK-LABEL: test_masked_8xdouble_dup_low_mem_mask2:
611611 ; CHECK: # %bb.0:
612 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
612 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
613613 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
614614 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6]
615615 ; CHECK-NEXT: retq
623623 define <8 x double> @test_masked_z_8xdouble_dup_low_mem_mask2(<8 x double>* %vp, <8 x double> %mask) {
624624 ; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mem_mask2:
625625 ; CHECK: # %bb.0:
626 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
626 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
627627 ; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
628628 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6]
629629 ; CHECK-NEXT: retq
636636 define <8 x double> @test_masked_8xdouble_dup_low_mem_mask3(<8 x double>* %vp, <8 x double> %vec2, <8 x double> %mask) {
637637 ; CHECK-LABEL: test_masked_8xdouble_dup_low_mem_mask3:
638638 ; CHECK: # %bb.0:
639 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
639 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
640640 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
641641 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6]
642642 ; CHECK-NEXT: retq
650650 define <8 x double> @test_masked_z_8xdouble_dup_low_mem_mask3(<8 x double>* %vp, <8 x double> %mask) {
651651 ; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mem_mask3:
652652 ; CHECK: # %bb.0:
653 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
653 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
654654 ; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
655655 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6]
656656 ; CHECK-NEXT: retq
663663 define <8 x double> @test_masked_8xdouble_dup_low_mem_mask4(<8 x double>* %vp, <8 x double> %vec2, <8 x double> %mask) {
664664 ; CHECK-LABEL: test_masked_8xdouble_dup_low_mem_mask4:
665665 ; CHECK: # %bb.0:
666 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
666 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
667667 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
668668 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6]
669669 ; CHECK-NEXT: retq
677677 define <8 x double> @test_masked_z_8xdouble_dup_low_mem_mask4(<8 x double>* %vp, <8 x double> %mask) {
678678 ; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mem_mask4:
679679 ; CHECK: # %bb.0:
680 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
680 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
681681 ; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
682682 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6]
683683 ; CHECK-NEXT: retq
698698 define <4 x float> @test_masked_4xfloat_dup_low_mask0(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
699699 ; CHECK-LABEL: test_masked_4xfloat_dup_low_mask0:
700700 ; CHECK: # %bb.0:
701 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
701 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
702702 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
703703 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm1 {%k1} = xmm0[0,0,2,2]
704704 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
712712 define <4 x float> @test_masked_z_4xfloat_dup_low_mask0(<4 x float> %vec, <4 x float> %mask) {
713713 ; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mask0:
714714 ; CHECK: # %bb.0:
715 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
715 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
716716 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
717717 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0,2,2]
718718 ; CHECK-NEXT: retq
724724 define <4 x float> @test_masked_4xfloat_dup_low_mask1(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
725725 ; CHECK-LABEL: test_masked_4xfloat_dup_low_mask1:
726726 ; CHECK: # %bb.0:
727 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
727 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
728728 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
729729 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm1 {%k1} = xmm0[0,0,2,2]
730730 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
738738 define <4 x float> @test_masked_z_4xfloat_dup_low_mask1(<4 x float> %vec, <4 x float> %mask) {
739739 ; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mask1:
740740 ; CHECK: # %bb.0:
741 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
741 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
742742 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
743743 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0,2,2]
744744 ; CHECK-NEXT: retq
750750 define <4 x float> @test_masked_4xfloat_dup_low_mask2(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
751751 ; CHECK-LABEL: test_masked_4xfloat_dup_low_mask2:
752752 ; CHECK: # %bb.0:
753 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
753 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
754754 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
755755 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm1 {%k1} = xmm0[0,0,2,2]
756756 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
764764 define <4 x float> @test_masked_z_4xfloat_dup_low_mask2(<4 x float> %vec, <4 x float> %mask) {
765765 ; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mask2:
766766 ; CHECK: # %bb.0:
767 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
767 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
768768 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
769769 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0,2,2]
770770 ; CHECK-NEXT: retq
776776 define <4 x float> @test_masked_4xfloat_dup_low_mask3(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
777777 ; CHECK-LABEL: test_masked_4xfloat_dup_low_mask3:
778778 ; CHECK: # %bb.0:
779 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
779 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
780780 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
781781 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm1 {%k1} = xmm0[0,0,2,2]
782782 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
790790 define <4 x float> @test_masked_z_4xfloat_dup_low_mask3(<4 x float> %vec, <4 x float> %mask) {
791791 ; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mask3:
792792 ; CHECK: # %bb.0:
793 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
793 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
794794 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
795795 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0,2,2]
796796 ; CHECK-NEXT: retq
802802 define <4 x float> @test_masked_4xfloat_dup_low_mask4(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
803803 ; CHECK-LABEL: test_masked_4xfloat_dup_low_mask4:
804804 ; CHECK: # %bb.0:
805 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
805 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
806806 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
807807 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm1 {%k1} = xmm0[0,0,2,2]
808808 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
816816 define <4 x float> @test_masked_z_4xfloat_dup_low_mask4(<4 x float> %vec, <4 x float> %mask) {
817817 ; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mask4:
818818 ; CHECK: # %bb.0:
819 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
819 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
820820 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
821821 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0,2,2]
822822 ; CHECK-NEXT: retq
837837 define <4 x float> @test_masked_4xfloat_dup_low_mem_mask0(<4 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) {
838838 ; CHECK-LABEL: test_masked_4xfloat_dup_low_mem_mask0:
839839 ; CHECK: # %bb.0:
840 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
840 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
841841 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
842842 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} = mem[0,0,2,2]
843843 ; CHECK-NEXT: retq
851851 define <4 x float> @test_masked_z_4xfloat_dup_low_mem_mask0(<4 x float>* %vp, <4 x float> %mask) {
852852 ; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mem_mask0:
853853 ; CHECK: # %bb.0:
854 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
854 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
855855 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
856856 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = mem[0,0,2,2]
857857 ; CHECK-NEXT: retq
864864 define <4 x float> @test_masked_4xfloat_dup_low_mem_mask1(<4 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) {
865865 ; CHECK-LABEL: test_masked_4xfloat_dup_low_mem_mask1:
866866 ; CHECK: # %bb.0:
867 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
867 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
868868 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
869869 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} = mem[0,0,2,2]
870870 ; CHECK-NEXT: retq
878878 define <4 x float> @test_masked_z_4xfloat_dup_low_mem_mask1(<4 x float>* %vp, <4 x float> %mask) {
879879 ; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mem_mask1:
880880 ; CHECK: # %bb.0:
881 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
881 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
882882 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
883883 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = mem[0,0,2,2]
884884 ; CHECK-NEXT: retq
891891 define <4 x float> @test_masked_4xfloat_dup_low_mem_mask2(<4 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) {
892892 ; CHECK-LABEL: test_masked_4xfloat_dup_low_mem_mask2:
893893 ; CHECK: # %bb.0:
894 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
894 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
895895 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
896896 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} = mem[0,0,2,2]
897897 ; CHECK-NEXT: retq
905905 define <4 x float> @test_masked_z_4xfloat_dup_low_mem_mask2(<4 x float>* %vp, <4 x float> %mask) {
906906 ; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mem_mask2:
907907 ; CHECK: # %bb.0:
908 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
908 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
909909 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
910910 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = mem[0,0,2,2]
911911 ; CHECK-NEXT: retq
918918 define <4 x float> @test_masked_4xfloat_dup_low_mem_mask3(<4 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) {
919919 ; CHECK-LABEL: test_masked_4xfloat_dup_low_mem_mask3:
920920 ; CHECK: # %bb.0:
921 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
921 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
922922 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
923923 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} = mem[0,0,2,2]
924924 ; CHECK-NEXT: retq
932932 define <4 x float> @test_masked_z_4xfloat_dup_low_mem_mask3(<4 x float>* %vp, <4 x float> %mask) {
933933 ; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mem_mask3:
934934 ; CHECK: # %bb.0:
935 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
935 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
936936 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
937937 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = mem[0,0,2,2]
938938 ; CHECK-NEXT: retq
945945 define <4 x float> @test_masked_4xfloat_dup_low_mem_mask4(<4 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) {
946946 ; CHECK-LABEL: test_masked_4xfloat_dup_low_mem_mask4:
947947 ; CHECK: # %bb.0:
948 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
948 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
949949 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
950950 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} = mem[0,0,2,2]
951951 ; CHECK-NEXT: retq
959959 define <4 x float> @test_masked_z_4xfloat_dup_low_mem_mask4(<4 x float>* %vp, <4 x float> %mask) {
960960 ; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mem_mask4:
961961 ; CHECK: # %bb.0:
962 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
962 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
963963 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
964964 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = mem[0,0,2,2]
965965 ; CHECK-NEXT: retq
980980 define <8 x float> @test_masked_8xfloat_dup_low_mask0(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
981981 ; CHECK-LABEL: test_masked_8xfloat_dup_low_mask0:
982982 ; CHECK: # %bb.0:
983 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
983 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
984984 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
985985 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2,4,4,6,6]
986986 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
994994 define <8 x float> @test_masked_z_8xfloat_dup_low_mask0(<8 x float> %vec, <8 x float> %mask) {
995995 ; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mask0:
996996 ; CHECK: # %bb.0:
997 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
997 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
998998 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
999999 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6]
10001000 ; CHECK-NEXT: retq
10061006 define <8 x float> @test_masked_8xfloat_dup_low_mask1(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
10071007 ; CHECK-LABEL: test_masked_8xfloat_dup_low_mask1:
10081008 ; CHECK: # %bb.0:
1009 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
1009 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
10101010 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
10111011 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2,4,4,6,6]
10121012 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
10201020 define <8 x float> @test_masked_z_8xfloat_dup_low_mask1(<8 x float> %vec, <8 x float> %mask) {
10211021 ; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mask1:
10221022 ; CHECK: # %bb.0:
1023 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
1023 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
10241024 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
10251025 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6]
10261026 ; CHECK-NEXT: retq
10321032 define <8 x float> @test_masked_8xfloat_dup_low_mask2(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
10331033 ; CHECK-LABEL: test_masked_8xfloat_dup_low_mask2:
10341034 ; CHECK: # %bb.0:
1035 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
1035 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
10361036 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
10371037 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2,4,4,6,6]
10381038 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
10461046 define <8 x float> @test_masked_z_8xfloat_dup_low_mask2(<8 x float> %vec, <8 x float> %mask) {
10471047 ; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mask2:
10481048 ; CHECK: # %bb.0:
1049 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
1049 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
10501050 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
10511051 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6]
10521052 ; CHECK-NEXT: retq
10581058 define <8 x float> @test_masked_8xfloat_dup_low_mask3(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
10591059 ; CHECK-LABEL: test_masked_8xfloat_dup_low_mask3:
10601060 ; CHECK: # %bb.0:
1061 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
1061 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
10621062 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
10631063 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2,4,4,6,6]
10641064 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
10721072 define <8 x float> @test_masked_z_8xfloat_dup_low_mask3(<8 x float> %vec, <8 x float> %mask) {
10731073 ; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mask3:
10741074 ; CHECK: # %bb.0:
1075 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
1075 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
10761076 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
10771077 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6]
10781078 ; CHECK-NEXT: retq
10841084 define <8 x float> @test_masked_8xfloat_dup_low_mask4(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
10851085 ; CHECK-LABEL: test_masked_8xfloat_dup_low_mask4:
10861086 ; CHECK: # %bb.0:
1087 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
1087 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
10881088 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
10891089 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2,4,4,6,6]
10901090 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
10981098 define <8 x float> @test_masked_z_8xfloat_dup_low_mask4(<8 x float> %vec, <8 x float> %mask) {
10991099 ; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mask4:
11001100 ; CHECK: # %bb.0:
1101 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
1101 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
11021102 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
11031103 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6]
11041104 ; CHECK-NEXT: retq
11191119 define <8 x float> @test_masked_8xfloat_dup_low_mem_mask0(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) {
11201120 ; CHECK-LABEL: test_masked_8xfloat_dup_low_mem_mask0:
11211121 ; CHECK: # %bb.0:
1122 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
1122 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
11231123 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
11241124 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2,4,4,6,6]
11251125 ; CHECK-NEXT: retq
11331133 define <8 x float> @test_masked_z_8xfloat_dup_low_mem_mask0(<8 x float>* %vp, <8 x float> %mask) {
11341134 ; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mem_mask0:
11351135 ; CHECK: # %bb.0:
1136 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
1136 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
11371137 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
11381138 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6]
11391139 ; CHECK-NEXT: retq
11461146 define <8 x float> @test_masked_8xfloat_dup_low_mem_mask1(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) {
11471147 ; CHECK-LABEL: test_masked_8xfloat_dup_low_mem_mask1:
11481148 ; CHECK: # %bb.0:
1149 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
1149 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
11501150 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
11511151 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2,4,4,6,6]
11521152 ; CHECK-NEXT: retq
11601160 define <8 x float> @test_masked_z_8xfloat_dup_low_mem_mask1(<8 x float>* %vp, <8 x float> %mask) {
11611161 ; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mem_mask1:
11621162 ; CHECK: # %bb.0:
1163 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
1163 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
11641164 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
11651165 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6]
11661166 ; CHECK-NEXT: retq
11731173 define <8 x float> @test_masked_8xfloat_dup_low_mem_mask2(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) {
11741174 ; CHECK-LABEL: test_masked_8xfloat_dup_low_mem_mask2:
11751175 ; CHECK: # %bb.0:
1176 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
1176 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
11771177 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
11781178 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2,4,4,6,6]
11791179 ; CHECK-NEXT: retq
11871187 define <8 x float> @test_masked_z_8xfloat_dup_low_mem_mask2(<8 x float>* %vp, <8 x float> %mask) {
11881188 ; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mem_mask2:
11891189 ; CHECK: # %bb.0:
1190 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
1190 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
11911191 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
11921192 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6]
11931193 ; CHECK-NEXT: retq
12001200 define <8 x float> @test_masked_8xfloat_dup_low_mem_mask3(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) {
12011201 ; CHECK-LABEL: test_masked_8xfloat_dup_low_mem_mask3:
12021202 ; CHECK: # %bb.0:
1203 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
1203 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
12041204 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
12051205 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2,4,4,6,6]
12061206 ; CHECK-NEXT: retq
12141214 define <8 x float> @test_masked_z_8xfloat_dup_low_mem_mask3(<8 x float>* %vp, <8 x float> %mask) {
12151215 ; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mem_mask3:
12161216 ; CHECK: # %bb.0:
1217 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
1217 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
12181218 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
12191219 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6]
12201220 ; CHECK-NEXT: retq
12271227 define <8 x float> @test_masked_8xfloat_dup_low_mem_mask4(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) {
12281228 ; CHECK-LABEL: test_masked_8xfloat_dup_low_mem_mask4:
12291229 ; CHECK: # %bb.0:
1230 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
1230 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
12311231 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
12321232 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2,4,4,6,6]
12331233 ; CHECK-NEXT: retq
12411241 define <8 x float> @test_masked_z_8xfloat_dup_low_mem_mask4(<8 x float>* %vp, <8 x float> %mask) {
12421242 ; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mem_mask4:
12431243 ; CHECK: # %bb.0:
1244 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
1244 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
12451245 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
12461246 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6]
12471247 ; CHECK-NEXT: retq
12621262 define <16 x float> @test_masked_16xfloat_dup_low_mask0(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
12631263 ; CHECK-LABEL: test_masked_16xfloat_dup_low_mask0:
12641264 ; CHECK: # %bb.0:
1265 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
1265 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
12661266 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
12671267 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
12681268 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
12761276 define <16 x float> @test_masked_z_16xfloat_dup_low_mask0(<16 x float> %vec, <16 x float> %mask) {
12771277 ; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mask0:
12781278 ; CHECK: # %bb.0:
1279 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
1279 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
12801280 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
12811281 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
12821282 ; CHECK-NEXT: retq
12881288 define <16 x float> @test_masked_16xfloat_dup_low_mask1(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
12891289 ; CHECK-LABEL: test_masked_16xfloat_dup_low_mask1:
12901290 ; CHECK: # %bb.0:
1291 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
1291 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
12921292 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
12931293 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
12941294 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
13021302 define <16 x float> @test_masked_z_16xfloat_dup_low_mask1(<16 x float> %vec, <16 x float> %mask) {
13031303 ; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mask1:
13041304 ; CHECK: # %bb.0:
1305 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
1305 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
13061306 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
13071307 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
13081308 ; CHECK-NEXT: retq
13141314 define <16 x float> @test_masked_16xfloat_dup_low_mask2(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
13151315 ; CHECK-LABEL: test_masked_16xfloat_dup_low_mask2:
13161316 ; CHECK: # %bb.0:
1317 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
1317 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
13181318 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
13191319 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
13201320 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
13281328 define <16 x float> @test_masked_z_16xfloat_dup_low_mask2(<16 x float> %vec, <16 x float> %mask) {
13291329 ; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mask2:
13301330 ; CHECK: # %bb.0:
1331 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
1331 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
13321332 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
13331333 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
13341334 ; CHECK-NEXT: retq
13401340 define <16 x float> @test_masked_16xfloat_dup_low_mask3(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
13411341 ; CHECK-LABEL: test_masked_16xfloat_dup_low_mask3:
13421342 ; CHECK: # %bb.0:
1343 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
1343 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
13441344 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
13451345 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
13461346 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
13541354 define <16 x float> @test_masked_z_16xfloat_dup_low_mask3(<16 x float> %vec, <16 x float> %mask) {
13551355 ; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mask3:
13561356 ; CHECK: # %bb.0:
1357 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
1357 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
13581358 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
13591359 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
13601360 ; CHECK-NEXT: retq
13661366 define <16 x float> @test_masked_16xfloat_dup_low_mask4(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
13671367 ; CHECK-LABEL: test_masked_16xfloat_dup_low_mask4:
13681368 ; CHECK: # %bb.0:
1369 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
1369 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
13701370 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
13711371 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
13721372 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
13801380 define <16 x float> @test_masked_z_16xfloat_dup_low_mask4(<16 x float> %vec, <16 x float> %mask) {
13811381 ; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mask4:
13821382 ; CHECK: # %bb.0:
1383 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
1383 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
13841384 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
13851385 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
13861386 ; CHECK-NEXT: retq
14011401 define <16 x float> @test_masked_16xfloat_dup_low_mem_mask0(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) {
14021402 ; CHECK-LABEL: test_masked_16xfloat_dup_low_mem_mask0:
14031403 ; CHECK: # %bb.0:
1404 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
1404 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
14051405 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
14061406 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
14071407 ; CHECK-NEXT: retq
14151415 define <16 x float> @test_masked_z_16xfloat_dup_low_mem_mask0(<16 x float>* %vp, <16 x float> %mask) {
14161416 ; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mem_mask0:
14171417 ; CHECK: # %bb.0:
1418 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
1418 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
14191419 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
14201420 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
14211421 ; CHECK-NEXT: retq
14281428 define <16 x float> @test_masked_16xfloat_dup_low_mem_mask1(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) {
14291429 ; CHECK-LABEL: test_masked_16xfloat_dup_low_mem_mask1:
14301430 ; CHECK: # %bb.0:
1431 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
1431 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
14321432 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
14331433 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
14341434 ; CHECK-NEXT: retq
14421442 define <16 x float> @test_masked_z_16xfloat_dup_low_mem_mask1(<16 x float>* %vp, <16 x float> %mask) {
14431443 ; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mem_mask1:
14441444 ; CHECK: # %bb.0:
1445 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
1445 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
14461446 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
14471447 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
14481448 ; CHECK-NEXT: retq
14551455 define <16 x float> @test_masked_16xfloat_dup_low_mem_mask2(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) {
14561456 ; CHECK-LABEL: test_masked_16xfloat_dup_low_mem_mask2:
14571457 ; CHECK: # %bb.0:
1458 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
1458 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
14591459 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
14601460 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
14611461 ; CHECK-NEXT: retq
14691469 define <16 x float> @test_masked_z_16xfloat_dup_low_mem_mask2(<16 x float>* %vp, <16 x float> %mask) {
14701470 ; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mem_mask2:
14711471 ; CHECK: # %bb.0:
1472 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
1472 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
14731473 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
14741474 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
14751475 ; CHECK-NEXT: retq
14821482 define <16 x float> @test_masked_16xfloat_dup_low_mem_mask3(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) {
14831483 ; CHECK-LABEL: test_masked_16xfloat_dup_low_mem_mask3:
14841484 ; CHECK: # %bb.0:
1485 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
1485 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
14861486 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
14871487 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
14881488 ; CHECK-NEXT: retq
14961496 define <16 x float> @test_masked_z_16xfloat_dup_low_mem_mask3(<16 x float>* %vp, <16 x float> %mask) {
14971497 ; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mem_mask3:
14981498 ; CHECK: # %bb.0:
1499 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
1499 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
15001500 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
15011501 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
15021502 ; CHECK-NEXT: retq
15091509 define <16 x float> @test_masked_16xfloat_dup_low_mem_mask4(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) {
15101510 ; CHECK-LABEL: test_masked_16xfloat_dup_low_mem_mask4:
15111511 ; CHECK: # %bb.0:
1512 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
1512 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
15131513 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
15141514 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
15151515 ; CHECK-NEXT: retq
15231523 define <16 x float> @test_masked_z_16xfloat_dup_low_mem_mask4(<16 x float>* %vp, <16 x float> %mask) {
15241524 ; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mem_mask4:
15251525 ; CHECK: # %bb.0:
1526 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
1526 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
15271527 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
15281528 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
15291529 ; CHECK-NEXT: retq
1313 define <4 x float> @test_masked_4xfloat_perm_mask0(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
1414 ; CHECK-LABEL: test_masked_4xfloat_perm_mask0:
1515 ; CHECK: # %bb.0:
16 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
16 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
1717 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
1818 ; CHECK-NEXT: vpermilps {{.*#+}} xmm1 {%k1} = xmm0[2,1,3,1]
1919 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
2727 define <4 x float> @test_masked_z_4xfloat_perm_mask0(<4 x float> %vec, <4 x float> %mask) {
2828 ; CHECK-LABEL: test_masked_z_4xfloat_perm_mask0:
2929 ; CHECK: # %bb.0:
30 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
30 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
3131 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
3232 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = xmm0[2,1,3,1]
3333 ; CHECK-NEXT: retq
3939 define <4 x float> @test_masked_4xfloat_perm_mask1(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
4040 ; CHECK-LABEL: test_masked_4xfloat_perm_mask1:
4141 ; CHECK: # %bb.0:
42 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
42 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
4343 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
4444 ; CHECK-NEXT: vpermilps {{.*#+}} xmm1 {%k1} = xmm0[1,2,3,2]
4545 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
5353 define <4 x float> @test_masked_z_4xfloat_perm_mask1(<4 x float> %vec, <4 x float> %mask) {
5454 ; CHECK-LABEL: test_masked_z_4xfloat_perm_mask1:
5555 ; CHECK: # %bb.0:
56 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
56 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
5757 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
5858 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = xmm0[1,2,3,2]
5959 ; CHECK-NEXT: retq
6565 define <4 x float> @test_masked_4xfloat_perm_mask2(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
6666 ; CHECK-LABEL: test_masked_4xfloat_perm_mask2:
6767 ; CHECK: # %bb.0:
68 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
68 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
6969 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
7070 ; CHECK-NEXT: vpermilps {{.*#+}} xmm1 {%k1} = xmm0[1,3,2,1]
7171 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
7979 define <4 x float> @test_masked_z_4xfloat_perm_mask2(<4 x float> %vec, <4 x float> %mask) {
8080 ; CHECK-LABEL: test_masked_z_4xfloat_perm_mask2:
8181 ; CHECK: # %bb.0:
82 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
82 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
8383 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
8484 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = xmm0[1,3,2,1]
8585 ; CHECK-NEXT: retq
9999 define <4 x float> @test_masked_4xfloat_perm_mask3(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
100100 ; CHECK-LABEL: test_masked_4xfloat_perm_mask3:
101101 ; CHECK: # %bb.0:
102 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
102 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
103103 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
104104 ; CHECK-NEXT: vpermilps {{.*#+}} xmm1 {%k1} = xmm0[1,2,3,2]
105105 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
113113 define <4 x float> @test_masked_z_4xfloat_perm_mask3(<4 x float> %vec, <4 x float> %mask) {
114114 ; CHECK-LABEL: test_masked_z_4xfloat_perm_mask3:
115115 ; CHECK: # %bb.0:
116 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
116 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
117117 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
118118 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = xmm0[1,2,3,2]
119119 ; CHECK-NEXT: retq
134134 define <4 x float> @test_masked_4xfloat_perm_mem_mask0(<4 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) {
135135 ; CHECK-LABEL: test_masked_4xfloat_perm_mem_mask0:
136136 ; CHECK: # %bb.0:
137 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
137 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
138138 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
139139 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} = mem[3,3,1,3]
140140 ; CHECK-NEXT: retq
148148 define <4 x float> @test_masked_z_4xfloat_perm_mem_mask0(<4 x float>* %vp, <4 x float> %mask) {
149149 ; CHECK-LABEL: test_masked_z_4xfloat_perm_mem_mask0:
150150 ; CHECK: # %bb.0:
151 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
151 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
152152 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
153153 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = mem[3,3,1,3]
154154 ; CHECK-NEXT: retq
162162 define <4 x float> @test_masked_4xfloat_perm_mem_mask1(<4 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) {
163163 ; CHECK-LABEL: test_masked_4xfloat_perm_mem_mask1:
164164 ; CHECK: # %bb.0:
165 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
165 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
166166 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
167167 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} = mem[1,3,2,0]
168168 ; CHECK-NEXT: retq
176176 define <4 x float> @test_masked_z_4xfloat_perm_mem_mask1(<4 x float>* %vp, <4 x float> %mask) {
177177 ; CHECK-LABEL: test_masked_z_4xfloat_perm_mem_mask1:
178178 ; CHECK: # %bb.0:
179 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
179 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
180180 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
181181 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = mem[1,3,2,0]
182182 ; CHECK-NEXT: retq
190190 define <4 x float> @test_masked_4xfloat_perm_mem_mask2(<4 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) {
191191 ; CHECK-LABEL: test_masked_4xfloat_perm_mem_mask2:
192192 ; CHECK: # %bb.0:
193 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
193 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
194194 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
195195 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} = mem[2,1,3,2]
196196 ; CHECK-NEXT: retq
204204 define <4 x float> @test_masked_z_4xfloat_perm_mem_mask2(<4 x float>* %vp, <4 x float> %mask) {
205205 ; CHECK-LABEL: test_masked_z_4xfloat_perm_mem_mask2:
206206 ; CHECK: # %bb.0:
207 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
207 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
208208 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
209209 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = mem[2,1,3,2]
210210 ; CHECK-NEXT: retq
227227 define <4 x float> @test_masked_4xfloat_perm_mem_mask3(<4 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) {
228228 ; CHECK-LABEL: test_masked_4xfloat_perm_mem_mask3:
229229 ; CHECK: # %bb.0:
230 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
230 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
231231 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
232232 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} = mem[0,1,3,0]
233233 ; CHECK-NEXT: retq
241241 define <4 x float> @test_masked_z_4xfloat_perm_mem_mask3(<4 x float>* %vp, <4 x float> %mask) {
242242 ; CHECK-LABEL: test_masked_z_4xfloat_perm_mem_mask3:
243243 ; CHECK: # %bb.0:
244 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
244 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
245245 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
246246 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = mem[0,1,3,0]
247247 ; CHECK-NEXT: retq
263263 define <8 x float> @test_masked_8xfloat_perm_mask0(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
264264 ; CHECK-LABEL: test_masked_8xfloat_perm_mask0:
265265 ; CHECK: # %bb.0:
266 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
266 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
267267 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
268268 ; CHECK-NEXT: vpermilps {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,4,6,6,6]
269269 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
277277 define <8 x float> @test_masked_z_8xfloat_perm_mask0(<8 x float> %vec, <8 x float> %mask) {
278278 ; CHECK-LABEL: test_masked_z_8xfloat_perm_mask0:
279279 ; CHECK: # %bb.0:
280 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
280 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
281281 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
282282 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,4,6,6,6]
283283 ; CHECK-NEXT: retq
289289 define <8 x float> @test_masked_8xfloat_perm_imm_mask1(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
290290 ; CHECK-LABEL: test_masked_8xfloat_perm_imm_mask1:
291291 ; CHECK: # %bb.0:
292 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
292 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
293293 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
294294 ; CHECK-NEXT: vpermilps {{.*#+}} ymm1 {%k1} = ymm0[3,2,3,2,7,6,7,6]
295295 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
303303 define <8 x float> @test_masked_z_8xfloat_perm_imm_mask1(<8 x float> %vec, <8 x float> %mask) {
304304 ; CHECK-LABEL: test_masked_z_8xfloat_perm_imm_mask1:
305305 ; CHECK: # %bb.0:
306 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
306 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
307307 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
308308 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2,3,2,7,6,7,6]
309309 ; CHECK-NEXT: retq
315315 define <8 x float> @test_masked_8xfloat_perm_mask2(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
316316 ; CHECK-LABEL: test_masked_8xfloat_perm_mask2:
317317 ; CHECK: # %bb.0:
318 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
318 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
319319 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
320320 ; CHECK-NEXT: vpermilps {{.*#+}} ymm1 {%k1} = ymm0[2,1,2,1,6,5,4,4]
321321 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
329329 define <8 x float> @test_masked_z_8xfloat_perm_mask2(<8 x float> %vec, <8 x float> %mask) {
330330 ; CHECK-LABEL: test_masked_z_8xfloat_perm_mask2:
331331 ; CHECK: # %bb.0:
332 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
332 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
333333 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
334334 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1,2,1,6,5,4,4]
335335 ; CHECK-NEXT: retq
349349 define <8 x float> @test_masked_8xfloat_perm_imm_mask3(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
350350 ; CHECK-LABEL: test_masked_8xfloat_perm_imm_mask3:
351351 ; CHECK: # %bb.0:
352 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
352 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
353353 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
354354 ; CHECK-NEXT: vpermilps {{.*#+}} ymm1 {%k1} = ymm0[2,2,1,0,6,6,5,4]
355355 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
363363 define <8 x float> @test_masked_z_8xfloat_perm_imm_mask3(<8 x float> %vec, <8 x float> %mask) {
364364 ; CHECK-LABEL: test_masked_z_8xfloat_perm_imm_mask3:
365365 ; CHECK: # %bb.0:
366 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
366 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
367367 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
368368 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[2,2,1,0,6,6,5,4]
369369 ; CHECK-NEXT: retq
375375 define <8 x float> @test_masked_8xfloat_perm_mask4(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
376376 ; CHECK-LABEL: test_masked_8xfloat_perm_mask4:
377377 ; CHECK: # %bb.0:
378 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
378 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
379379 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
380380 ; CHECK-NEXT: vpermilps {{.*#+}} ymm1 {%k1} = ymm0[3,3,3,3,7,7,6,5]
381381 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
389389 define <8 x float> @test_masked_z_8xfloat_perm_mask4(<8 x float> %vec, <8 x float> %mask) {
390390 ; CHECK-LABEL: test_masked_z_8xfloat_perm_mask4:
391391 ; CHECK: # %bb.0:
392 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
392 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
393393 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
394394 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3,3,3,7,7,6,5]
395395 ; CHECK-NEXT: retq
401401 define <8 x float> @test_masked_8xfloat_perm_imm_mask5(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
402402 ; CHECK-LABEL: test_masked_8xfloat_perm_imm_mask5:
403403 ; CHECK: # %bb.0:
404 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
404 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
405405 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
406406 ; CHECK-NEXT: vpermilps {{.*#+}} ymm1 {%k1} = ymm0[2,1,3,3,6,5,7,7]
407407 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
415415 define <8 x float> @test_masked_z_8xfloat_perm_imm_mask5(<8 x float> %vec, <8 x float> %mask) {
416416 ; CHECK-LABEL: test_masked_z_8xfloat_perm_imm_mask5:
417417 ; CHECK: # %bb.0:
418 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
418 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
419419 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
420420 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1,3,3,6,5,7,7]
421421 ; CHECK-NEXT: retq
435435 define <8 x float> @test_masked_8xfloat_perm_mask6(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
436436 ; CHECK-LABEL: test_masked_8xfloat_perm_mask6:
437437 ; CHECK: # %bb.0:
438 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
438 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
439439 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
440440 ; CHECK-NEXT: vpermilps {{.*#+}} ymm1 {%k1} = ymm0[3,2,3,2,5,6,7,7]
441441 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
449449 define <8 x float> @test_masked_z_8xfloat_perm_mask6(<8 x float> %vec, <8 x float> %mask) {
450450 ; CHECK-LABEL: test_masked_z_8xfloat_perm_mask6:
451451 ; CHECK: # %bb.0:
452 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
452 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
453453 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
454454 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2,3,2,5,6,7,7]
455455 ; CHECK-NEXT: retq
461461 define <8 x float> @test_masked_8xfloat_perm_imm_mask7(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
462462 ; CHECK-LABEL: test_masked_8xfloat_perm_imm_mask7:
463463 ; CHECK: # %bb.0:
464 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
464 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
465465 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
466466 ; CHECK-NEXT: vpermilps {{.*#+}} ymm1 {%k1} = ymm0[3,0,2,1,7,4,6,5]
467467 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
475475 define <8 x float> @test_masked_z_8xfloat_perm_imm_mask7(<8 x float> %vec, <8 x float> %mask) {
476476 ; CHECK-LABEL: test_masked_z_8xfloat_perm_imm_mask7:
477477 ; CHECK: # %bb.0:
478 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
478 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
479479 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
480480 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[3,0,2,1,7,4,6,5]
481481 ; CHECK-NEXT: retq
498498 ; CHECK-LABEL: test_masked_8xfloat_perm_mem_mask0:
499499 ; CHECK: # %bb.0:
500500 ; CHECK-NEXT: vmovaps (%rdi), %ymm2
501 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
501 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
502502 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1
503503 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} = ymm2[3,0,0,2,4,6,7,6]
504504 ; CHECK-NEXT: retq
513513 ; CHECK-LABEL: test_masked_z_8xfloat_perm_mem_mask0:
514514 ; CHECK: # %bb.0:
515515 ; CHECK-NEXT: vmovaps (%rdi), %ymm1
516 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
516 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
517517 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm0, %k1
518518 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm1[3,0,0,2,4,6,7,6]
519519 ; CHECK-NEXT: retq
527527 define <8 x float> @test_masked_8xfloat_perm_imm_mem_mask1(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) {
528528 ; CHECK-LABEL: test_masked_8xfloat_perm_imm_mem_mask1:
529529 ; CHECK: # %bb.0:
530 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
530 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
531531 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
532532 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} = mem[2,0,2,2,6,4,6,6]
533533 ; CHECK-NEXT: retq
541541 define <8 x float> @test_masked_z_8xfloat_perm_imm_mem_mask1(<8 x float>* %vp, <8 x float> %mask) {
542542 ; CHECK-LABEL: test_masked_z_8xfloat_perm_imm_mem_mask1:
543543 ; CHECK: # %bb.0:
544 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
544 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
545545 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
546546 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = mem[2,0,2,2,6,4,6,6]
547547 ; CHECK-NEXT: retq
556556 ; CHECK-LABEL: test_masked_8xfloat_perm_mem_mask2:
557557 ; CHECK: # %bb.0:
558558 ; CHECK-NEXT: vmovaps (%rdi), %ymm2
559 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
559 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
560560 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1
561561 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} = ymm2[2,1,1,3,4,4,7,4]
562562 ; CHECK-NEXT: retq
571571 ; CHECK-LABEL: test_masked_z_8xfloat_perm_mem_mask2:
572572 ; CHECK: # %bb.0:
573573 ; CHECK-NEXT: vmovaps (%rdi), %ymm1
574 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
574 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
575575 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm0, %k1
576576 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm1[2,1,1,3,4,4,7,4]
577577 ; CHECK-NEXT: retq
594594 define <8 x float> @test_masked_8xfloat_perm_imm_mem_mask3(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) {
595595 ; CHECK-LABEL: test_masked_8xfloat_perm_imm_mem_mask3:
596596 ; CHECK: # %bb.0:
597 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
597 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
598598 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
599599 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} = mem[0,0,3,3,4,4,7,7]
600600 ; CHECK-NEXT: retq
608608 define <8 x float> @test_masked_z_8xfloat_perm_imm_mem_mask3(<8 x float>* %vp, <8 x float> %mask) {
609609 ; CHECK-LABEL: test_masked_z_8xfloat_perm_imm_mem_mask3:
610610 ; CHECK: # %bb.0:
611 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
611 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
612612 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
613613 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = mem[0,0,3,3,4,4,7,7]
614614 ; CHECK-NEXT: retq
623623 ; CHECK-LABEL: test_masked_8xfloat_perm_mem_mask4:
624624 ; CHECK: # %bb.0:
625625 ; CHECK-NEXT: vmovaps (%rdi), %ymm2
626 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
626 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
627627 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1
628628 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} = ymm2[0,1,0,1,4,6,5,4]
629629 ; CHECK-NEXT: retq
638638 ; CHECK-LABEL: test_masked_z_8xfloat_perm_mem_mask4:
639639 ; CHECK: # %bb.0:
640640 ; CHECK-NEXT: vmovaps (%rdi), %ymm1
641 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
641 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
642642 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm0, %k1
643643 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm1[0,1,0,1,4,6,5,4]
644644 ; CHECK-NEXT: retq
652652 define <8 x float> @test_masked_8xfloat_perm_imm_mem_mask5(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) {
653653 ; CHECK-LABEL: test_masked_8xfloat_perm_imm_mem_mask5:
654654 ; CHECK: # %bb.0:
655 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
655 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
656656 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
657657 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} = mem[2,0,0,3,6,4,4,7]
658658 ; CHECK-NEXT: retq
666666 define <8 x float> @test_masked_z_8xfloat_perm_imm_mem_mask5(<8 x float>* %vp, <8 x float> %mask) {
667667 ; CHECK-LABEL: test_masked_z_8xfloat_perm_imm_mem_mask5:
668668 ; CHECK: # %bb.0:
669 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
669 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
670670 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
671671 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = mem[2,0,0,3,6,4,4,7]
672672 ; CHECK-NEXT: retq
691691 ; CHECK-LABEL: test_masked_8xfloat_perm_mem_mask6:
692692 ; CHECK: # %bb.0:
693693 ; CHECK-NEXT: vmovaps (%rdi), %ymm2
694 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
694 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
695695 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1
696696 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} = ymm2[0,1,2,3,7,4,6,7]
697697 ; CHECK-NEXT: retq
706706 ; CHECK-LABEL: test_masked_z_8xfloat_perm_mem_mask6:
707707 ; CHECK: # %bb.0:
708708 ; CHECK-NEXT: vmovaps (%rdi), %ymm1
709 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
709 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
710710 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm0, %k1
711711 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm1[0,1,2,3,7,4,6,7]
712712 ; CHECK-NEXT: retq
720720 define <8 x float> @test_masked_8xfloat_perm_imm_mem_mask7(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) {
721721 ; CHECK-LABEL: test_masked_8xfloat_perm_imm_mem_mask7:
722722 ; CHECK: # %bb.0:
723 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
723 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
724724 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
725725 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} = mem[0,2,3,1,4,6,7,5]
726726 ; CHECK-NEXT: retq
734734 define <8 x float> @test_masked_z_8xfloat_perm_imm_mem_mask7(<8 x float>* %vp, <8 x float> %mask) {
735735 ; CHECK-LABEL: test_masked_z_8xfloat_perm_imm_mem_mask7:
736736 ; CHECK: # %bb.0:
737 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
737 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
738738 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
739739 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = mem[0,2,3,1,4,6,7,5]
740740 ; CHECK-NEXT: retq
756756 define <16 x float> @test_masked_16xfloat_perm_mask0(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
757757 ; CHECK-LABEL: test_masked_16xfloat_perm_mask0:
758758 ; CHECK: # %bb.0:
759 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
759 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
760760 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
761761 ; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} = zmm0[1,1,3,1,6,4,6,5,8,9,8,11,13,13,13,15]
762762 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
770770 define <16 x float> @test_masked_z_16xfloat_perm_mask0(<16 x float> %vec, <16 x float> %mask) {
771771 ; CHECK-LABEL: test_masked_z_16xfloat_perm_mask0:
772772 ; CHECK: # %bb.0:
773 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
773 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
774774 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
775775 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,1,6,4,6,5,8,9,8,11,13,13,13,15]
776776 ; CHECK-NEXT: retq
782782 define <16 x float> @test_masked_16xfloat_perm_imm_mask1(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
783783 ; CHECK-LABEL: test_masked_16xfloat_perm_imm_mask1:
784784 ; CHECK: # %bb.0:
785 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
785 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
786786 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
787787 ; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} = zmm0[2,2,2,1,6,6,6,5,10,10,10,9,14,14,14,13]
788788 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
796796 define <16 x float> @test_masked_z_16xfloat_perm_imm_mask1(<16 x float> %vec, <16 x float> %mask) {
797797 ; CHECK-LABEL: test_masked_z_16xfloat_perm_imm_mask1:
798798 ; CHECK: # %bb.0:
799 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
799 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
800800 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
801801 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[2,2,2,1,6,6,6,5,10,10,10,9,14,14,14,13]
802802 ; CHECK-NEXT: retq
808808 define <16 x float> @test_masked_16xfloat_perm_mask2(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
809809 ; CHECK-LABEL: test_masked_16xfloat_perm_mask2:
810810 ; CHECK: # %bb.0:
811 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
811 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
812812 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
813813 ; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} = zmm0[1,2,0,0,5,4,6,5,11,10,9,9,14,13,14,12]
814814 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
822822 define <16 x float> @test_masked_z_16xfloat_perm_mask2(<16 x float> %vec, <16 x float> %mask) {
823823 ; CHECK-LABEL: test_masked_z_16xfloat_perm_mask2:
824824 ; CHECK: # %bb.0:
825 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
825 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
826826 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
827827 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[1,2,0,0,5,4,6,5,11,10,9,9,14,13,14,12]
828828 ; CHECK-NEXT: retq
842842 define <16 x float> @test_masked_16xfloat_perm_imm_mask3(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
843843 ; CHECK-LABEL: test_masked_16xfloat_perm_imm_mask3:
844844 ; CHECK: # %bb.0:
845 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
845 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
846846 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
847847 ; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} = zmm0[1,1,0,2,5,5,4,6,9,9,8,10,13,13,12,14]
848848 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
856856 define <16 x float> @test_masked_z_16xfloat_perm_imm_mask3(<16 x float> %vec, <16 x float> %mask) {
857857 ; CHECK-LABEL: test_masked_z_16xfloat_perm_imm_mask3:
858858 ; CHECK: # %bb.0:
859 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
859 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
860860 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
861861 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,0,2,5,5,4,6,9,9,8,10,13,13,12,14]
862862 ; CHECK-NEXT: retq
868868 define <16 x float> @test_masked_16xfloat_perm_mask4(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
869869 ; CHECK-LABEL: test_masked_16xfloat_perm_mask4:
870870 ; CHECK: # %bb.0:
871 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
871 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
872872 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
873873 ; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} = zmm0[1,2,3,3,5,5,5,7,11,11,8,11,14,12,14,15]
874874 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
882882 define <16 x float> @test_masked_z_16xfloat_perm_mask4(<16 x float> %vec, <16 x float> %mask) {
883883 ; CHECK-LABEL: test_masked_z_16xfloat_perm_mask4:
884884 ; CHECK: # %bb.0:
885 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
885 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
886886 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
887887 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[1,2,3,3,5,5,5,7,11,11,8,11,14,12,14,15]
888888 ; CHECK-NEXT: retq
894894 define <16 x float> @test_masked_16xfloat_perm_imm_mask5(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
895895 ; CHECK-LABEL: test_masked_16xfloat_perm_imm_mask5:
896896 ; CHECK: # %bb.0:
897 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
897 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
898898 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
899899 ; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} = zmm0[1,2,1,0,5,6,5,4,9,10,9,8,13,14,13,12]
900900 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
908908 define <16 x float> @test_masked_z_16xfloat_perm_imm_mask5(<16 x float> %vec, <16 x float> %mask) {
909909 ; CHECK-LABEL: test_masked_z_16xfloat_perm_imm_mask5:
910910 ; CHECK: # %bb.0:
911 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
911 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
912912 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
913913 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[1,2,1,0,5,6,5,4,9,10,9,8,13,14,13,12]
914914 ; CHECK-NEXT: retq
928928 define <16 x float> @test_masked_16xfloat_perm_mask6(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
929929 ; CHECK-LABEL: test_masked_16xfloat_perm_mask6:
930930 ; CHECK: # %bb.0:
931 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
931 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
932932 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
933933 ; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} = zmm0[2,0,3,2,4,4,6,7,9,11,8,11,13,12,13,13]
934934 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
942942 define <16 x float> @test_masked_z_16xfloat_perm_mask6(<16 x float> %vec, <16 x float> %mask) {
943943 ; CHECK-LABEL: test_masked_z_16xfloat_perm_mask6:
944944 ; CHECK: # %bb.0:
945 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
945 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
946946 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
947947 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[2,0,3,2,4,4,6,7,9,11,8,11,13,12,13,13]
948948 ; CHECK-NEXT: retq
954954 define <16 x float> @test_masked_16xfloat_perm_imm_mask7(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
955955 ; CHECK-LABEL: test_masked_16xfloat_perm_imm_mask7:
956956 ; CHECK: # %bb.0:
957 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
957 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
958958 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
959959 ; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} = zmm0[3,3,0,2,7,7,4,6,11,11,8,10,15,15,12,14]
960960 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
968968 define <16 x float> @test_masked_z_16xfloat_perm_imm_mask7(<16 x float> %vec, <16 x float> %mask) {
969969 ; CHECK-LABEL: test_masked_z_16xfloat_perm_imm_mask7:
970970 ; CHECK: # %bb.0:
971 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
971 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
972972 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
973973 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,0,2,7,7,4,6,11,11,8,10,15,15,12,14]
974974 ; CHECK-NEXT: retq
991991 ; CHECK-LABEL: test_masked_16xfloat_perm_mem_mask0:
992992 ; CHECK: # %bb.0:
993993 ; CHECK-NEXT: vmovaps (%rdi), %zmm2
994 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
994 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
995995 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1
996996 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = zmm2[3,3,3,0,6,6,6,6,11,10,9,10,12,14,12,12]
997997 ; CHECK-NEXT: retq
10061006 ; CHECK-LABEL: test_masked_z_16xfloat_perm_mem_mask0:
10071007 ; CHECK: # %bb.0:
10081008 ; CHECK-NEXT: vmovaps (%rdi), %zmm1
1009 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
1009 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
10101010 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm0, %k1
10111011 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm1[3,3,3,0,6,6,6,6,11,10,9,10,12,14,12,12]
10121012 ; CHECK-NEXT: retq
10201020 define <16 x float> @test_masked_16xfloat_perm_imm_mem_mask1(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) {
10211021 ; CHECK-LABEL: test_masked_16xfloat_perm_imm_mem_mask1:
10221022 ; CHECK: # %bb.0:
1023 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
1023 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
10241024 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
10251025 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = mem[1,3,2,1,5,7,6,5,9,11,10,9,13,15,14,13]
10261026 ; CHECK-NEXT: retq
10341034 define <16 x float> @test_masked_z_16xfloat_perm_imm_mem_mask1(<16 x float>* %vp, <16 x float> %mask) {
10351035 ; CHECK-LABEL: test_masked_z_16xfloat_perm_imm_mem_mask1:
10361036 ; CHECK: # %bb.0:
1037 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
1037 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
10381038 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
10391039 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = mem[1,3,2,1,5,7,6,5,9,11,10,9,13,15,14,13]
10401040 ; CHECK-NEXT: retq
10491049 ; CHECK-LABEL: test_masked_16xfloat_perm_mem_mask2:
10501050 ; CHECK: # %bb.0:
10511051 ; CHECK-NEXT: vmovaps (%rdi), %zmm2
1052 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
1052 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
10531053 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1
10541054 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = zmm2[2,0,0,3,5,5,6,5,9,8,8,8,14,12,13,13]
10551055 ; CHECK-NEXT: retq
10641064 ; CHECK-LABEL: test_masked_z_16xfloat_perm_mem_mask2:
10651065 ; CHECK: # %bb.0:
10661066 ; CHECK-NEXT: vmovaps (%rdi), %zmm1
1067 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
1067 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
10681068 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm0, %k1
10691069 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm1[2,0,0,3,5,5,6,5,9,8,8,8,14,12,13,13]
10701070 ; CHECK-NEXT: retq
10871087 define <16 x float> @test_masked_16xfloat_perm_imm_mem_mask3(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) {
10881088 ; CHECK-LABEL: test_masked_16xfloat_perm_imm_mem_mask3:
10891089 ; CHECK: # %bb.0:
1090 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
1090 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
10911091 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
10921092 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = mem[1,0,3,1,5,4,7,5,9,8,11,9,13,12,15,13]
10931093 ; CHECK-NEXT: retq
11011101 define <16 x float> @test_masked_z_16xfloat_perm_imm_mem_mask3(<16 x float>* %vp, <16 x float> %mask) {
11021102 ; CHECK-LABEL: test_masked_z_16xfloat_perm_imm_mem_mask3:
11031103 ; CHECK: # %bb.0:
1104 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
1104 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
11051105 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
11061106 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = mem[1,0,3,1,5,4,7,5,9,8,11,9,13,12,15,13]
11071107 ; CHECK-NEXT: retq
11161116 ; CHECK-LABEL: test_masked_16xfloat_perm_mem_mask4:
11171117 ; CHECK: # %bb.0:
11181118 ; CHECK-NEXT: vmovaps (%rdi), %zmm2
1119 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
1119 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
11201120 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1
11211121 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = zmm2[3,3,1,1,6,5,5,6,11,11,10,9,15,14,12,12]
11221122 ; CHECK-NEXT: retq
11311131 ; CHECK-LABEL: test_masked_z_16xfloat_perm_mem_mask4:
11321132 ; CHECK: # %bb.0:
11331133 ; CHECK-NEXT: vmovaps (%rdi), %zmm1
1134 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
1134 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
11351135 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm0, %k1
11361136 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm1[3,3,1,1,6,5,5,6,11,11,10,9,15,14,12,12]
11371137 ; CHECK-NEXT: retq
11451145 define <16 x float> @test_masked_16xfloat_perm_imm_mem_mask5(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) {
11461146 ; CHECK-LABEL: test_masked_16xfloat_perm_imm_mem_mask5:
11471147 ; CHECK: # %bb.0:
1148 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
1148 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
11491149 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
11501150 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = mem[2,0,0,1,6,4,4,5,10,8,8,9,14,12,12,13]
11511151 ; CHECK-NEXT: retq
11591159 define <16 x float> @test_masked_z_16xfloat_perm_imm_mem_mask5(<16 x float>* %vp, <16 x float> %mask) {
11601160 ; CHECK-LABEL: test_masked_z_16xfloat_perm_imm_mem_mask5:
11611161 ; CHECK: # %bb.0:
1162 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
1162 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
11631163 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
11641164 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = mem[2,0,0,1,6,4,4,5,10,8,8,9,14,12,12,13]
11651165 ; CHECK-NEXT: retq
11841184 ; CHECK-LABEL: test_masked_16xfloat_perm_mem_mask6:
11851185 ; CHECK: # %bb.0:
11861186 ; CHECK-NEXT: vmovaps (%rdi), %zmm2
1187 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
1187 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
11881188 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1
11891189 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = zmm2[2,1,1,2,6,5,5,7,9,11,9,9,12,15,14,15]
11901190 ; CHECK-NEXT: retq
11991199 ; CHECK-LABEL: test_masked_z_16xfloat_perm_mem_mask6:
12001200 ; CHECK: # %bb.0:
12011201 ; CHECK-NEXT: vmovaps (%rdi), %zmm1
1202 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
1202 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
12031203 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm0, %k1
12041204 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm1[2,1,1,2,6,5,5,7,9,11,9,9,12,15,14,15]
12051205 ; CHECK-NEXT: retq
12131213 define <16 x float> @test_masked_16xfloat_perm_imm_mem_mask7(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) {
12141214 ; CHECK-LABEL: test_masked_16xfloat_perm_imm_mem_mask7:
12151215 ; CHECK: # %bb.0:
1216 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
1216 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
12171217 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
12181218 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = mem[1,2,0,1,5,6,4,5,9,10,8,9,13,14,12,13]
12191219 ; CHECK-NEXT: retq
12271227 define <16 x float> @test_masked_z_16xfloat_perm_imm_mem_mask7(<16 x float>* %vp, <16 x float> %mask) {
12281228 ; CHECK-LABEL: test_masked_z_16xfloat_perm_imm_mem_mask7:
12291229 ; CHECK: # %bb.0:
1230 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
1230 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
12311231 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
12321232 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = mem[1,2,0,1,5,6,4,5,9,10,8,9,13,14,12,13]
12331233 ; CHECK-NEXT: retq
12491249 define <2 x double> @test_masked_2xdouble_perm_mask0(<2 x double> %vec, <2 x double> %vec2, <2 x double> %mask) {
12501250 ; CHECK-LABEL: test_masked_2xdouble_perm_mask0:
12511251 ; CHECK: # %bb.0:
1252 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
1252 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
12531253 ; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1
12541254 ; CHECK-NEXT: vpermilpd {{.*#+}} xmm1 {%k1} = xmm0[1,0]
12551255 ; CHECK-NEXT: vmovapd %xmm1, %xmm0
12631263 define <2 x double> @test_masked_z_2xdouble_perm_mask0(<2 x double> %vec, <2 x double> %mask) {
12641264 ; CHECK-LABEL: test_masked_z_2xdouble_perm_mask0:
12651265 ; CHECK: # %bb.0:
1266 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
1266 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
12671267 ; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1
12681268 ; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0]
12691269 ; CHECK-NEXT: retq
12751275 define <2 x double> @test_masked_2xdouble_perm_mask1(<2 x double> %vec, <2 x double> %vec2, <2 x double> %mask) {
12761276 ; CHECK-LABEL: test_masked_2xdouble_perm_mask1:
12771277 ; CHECK: # %bb.0:
1278 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
1278<