llvm.org GIT mirror llvm / 527dfff
[X86] Add test cases for opportunities to use KTEST when check if the result of ANDing two mask registers is zero. The test cases are constructed to avoid folding the AND into a masked compare operation. Currently we emit a KAND and a KORTEST for these cases. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@350287 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 8 months ago
1 changed file(s) with 726 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
34783478 ret void
34793479 }
34803480 declare void @llvm.masked.store.v16i32.p0v16i32(<16 x i32>, <16 x i32>*, i32, <16 x i1>)
3481
3482 define void @ktest_3(<8 x i32> %w, <8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
3483 ; KNL-LABEL: ktest_3:
3484 ; KNL: ## %bb.0:
3485 ; KNL-NEXT: pushq %rax
3486 ; KNL-NEXT: .cfi_def_cfa_offset 16
3487 ; KNL-NEXT: ## kill: def $ymm3 killed $ymm3 def $zmm3
3488 ; KNL-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2
3489 ; KNL-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
3490 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
3491 ; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0
3492 ; KNL-NEXT: vptestnmd %zmm1, %zmm1, %k1
3493 ; KNL-NEXT: vptestnmd %zmm2, %zmm2, %k2
3494 ; KNL-NEXT: vptestnmd %zmm3, %zmm3, %k3
3495 ; KNL-NEXT: korw %k1, %k0, %k0
3496 ; KNL-NEXT: korw %k3, %k2, %k1
3497 ; KNL-NEXT: kandw %k1, %k0, %k0
3498 ; KNL-NEXT: kmovw %k0, %eax
3499 ; KNL-NEXT: testb %al, %al
3500 ; KNL-NEXT: je LBB71_1
3501 ; KNL-NEXT: ## %bb.2: ## %exit
3502 ; KNL-NEXT: popq %rax
3503 ; KNL-NEXT: vzeroupper
3504 ; KNL-NEXT: retq
3505 ; KNL-NEXT: LBB71_1: ## %bar
3506 ; KNL-NEXT: vzeroupper
3507 ; KNL-NEXT: callq _foo
3508 ; KNL-NEXT: popq %rax
3509 ; KNL-NEXT: retq
3510 ;
3511 ; SKX-LABEL: ktest_3:
3512 ; SKX: ## %bb.0:
3513 ; SKX-NEXT: pushq %rax
3514 ; SKX-NEXT: .cfi_def_cfa_offset 16
3515 ; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k0
3516 ; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1
3517 ; SKX-NEXT: korb %k1, %k0, %k0
3518 ; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1
3519 ; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k2
3520 ; SKX-NEXT: korb %k2, %k1, %k1
3521 ; SKX-NEXT: kandb %k1, %k0, %k0
3522 ; SKX-NEXT: kortestb %k0, %k0
3523 ; SKX-NEXT: je LBB71_1
3524 ; SKX-NEXT: ## %bb.2: ## %exit
3525 ; SKX-NEXT: popq %rax
3526 ; SKX-NEXT: vzeroupper
3527 ; SKX-NEXT: retq
3528 ; SKX-NEXT: LBB71_1: ## %bar
3529 ; SKX-NEXT: vzeroupper
3530 ; SKX-NEXT: callq _foo
3531 ; SKX-NEXT: popq %rax
3532 ; SKX-NEXT: retq
3533 ;
3534 ; AVX512BW-LABEL: ktest_3:
3535 ; AVX512BW: ## %bb.0:
3536 ; AVX512BW-NEXT: pushq %rax
3537 ; AVX512BW-NEXT: .cfi_def_cfa_offset 16
3538 ; AVX512BW-NEXT: ## kill: def $ymm3 killed $ymm3 def $zmm3
3539 ; AVX512BW-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2
3540 ; AVX512BW-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
3541 ; AVX512BW-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
3542 ; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0
3543 ; AVX512BW-NEXT: vptestnmd %zmm1, %zmm1, %k1
3544 ; AVX512BW-NEXT: vptestnmd %zmm2, %zmm2, %k2
3545 ; AVX512BW-NEXT: vptestnmd %zmm3, %zmm3, %k3
3546 ; AVX512BW-NEXT: korw %k1, %k0, %k0
3547 ; AVX512BW-NEXT: korw %k3, %k2, %k1
3548 ; AVX512BW-NEXT: kandw %k1, %k0, %k0
3549 ; AVX512BW-NEXT: kmovd %k0, %eax
3550 ; AVX512BW-NEXT: testb %al, %al
3551 ; AVX512BW-NEXT: je LBB71_1
3552 ; AVX512BW-NEXT: ## %bb.2: ## %exit
3553 ; AVX512BW-NEXT: popq %rax
3554 ; AVX512BW-NEXT: vzeroupper
3555 ; AVX512BW-NEXT: retq
3556 ; AVX512BW-NEXT: LBB71_1: ## %bar
3557 ; AVX512BW-NEXT: vzeroupper
3558 ; AVX512BW-NEXT: callq _foo
3559 ; AVX512BW-NEXT: popq %rax
3560 ; AVX512BW-NEXT: retq
3561 ;
3562 ; AVX512DQ-LABEL: ktest_3:
3563 ; AVX512DQ: ## %bb.0:
3564 ; AVX512DQ-NEXT: pushq %rax
3565 ; AVX512DQ-NEXT: .cfi_def_cfa_offset 16
3566 ; AVX512DQ-NEXT: ## kill: def $ymm3 killed $ymm3 def $zmm3
3567 ; AVX512DQ-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2
3568 ; AVX512DQ-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
3569 ; AVX512DQ-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
3570 ; AVX512DQ-NEXT: vptestnmd %zmm0, %zmm0, %k0
3571 ; AVX512DQ-NEXT: vptestnmd %zmm1, %zmm1, %k1
3572 ; AVX512DQ-NEXT: vptestnmd %zmm2, %zmm2, %k2
3573 ; AVX512DQ-NEXT: vptestnmd %zmm3, %zmm3, %k3
3574 ; AVX512DQ-NEXT: korb %k1, %k0, %k0
3575 ; AVX512DQ-NEXT: korb %k3, %k2, %k1
3576 ; AVX512DQ-NEXT: kandb %k1, %k0, %k0
3577 ; AVX512DQ-NEXT: kortestb %k0, %k0
3578 ; AVX512DQ-NEXT: je LBB71_1
3579 ; AVX512DQ-NEXT: ## %bb.2: ## %exit
3580 ; AVX512DQ-NEXT: popq %rax
3581 ; AVX512DQ-NEXT: vzeroupper
3582 ; AVX512DQ-NEXT: retq
3583 ; AVX512DQ-NEXT: LBB71_1: ## %bar
3584 ; AVX512DQ-NEXT: vzeroupper
3585 ; AVX512DQ-NEXT: callq _foo
3586 ; AVX512DQ-NEXT: popq %rax
3587 ; AVX512DQ-NEXT: retq
3588 ;
3589 ; X86-LABEL: ktest_3:
3590 ; X86: ## %bb.0:
3591 ; X86-NEXT: subl $12, %esp
3592 ; X86-NEXT: .cfi_def_cfa_offset 16
3593 ; X86-NEXT: vptestnmd %ymm0, %ymm0, %k0
3594 ; X86-NEXT: vptestnmd %ymm1, %ymm1, %k1
3595 ; X86-NEXT: korb %k1, %k0, %k0
3596 ; X86-NEXT: vptestnmd %ymm2, %ymm2, %k1
3597 ; X86-NEXT: vptestnmd %ymm3, %ymm3, %k2
3598 ; X86-NEXT: korb %k2, %k1, %k1
3599 ; X86-NEXT: kandb %k1, %k0, %k0
3600 ; X86-NEXT: kortestb %k0, %k0
3601 ; X86-NEXT: je LBB71_1
3602 ; X86-NEXT: ## %bb.2: ## %exit
3603 ; X86-NEXT: addl $12, %esp
3604 ; X86-NEXT: vzeroupper
3605 ; X86-NEXT: retl
3606 ; X86-NEXT: LBB71_1: ## %bar
3607 ; X86-NEXT: vzeroupper
3608 ; X86-NEXT: calll _foo
3609 ; X86-NEXT: addl $12, %esp
3610 ; X86-NEXT: retl
3611 %a = icmp eq <8 x i32> %w, zeroinitializer
3612 %b = icmp eq <8 x i32> %x, zeroinitializer
3613 %c = icmp eq <8 x i32> %y, zeroinitializer
3614 %d = icmp eq <8 x i32> %z, zeroinitializer
3615 %e = or <8 x i1> %a, %b
3616 %f = or <8 x i1> %c, %d
3617 %g = and <8 x i1> %e, %f
3618 %h = bitcast <8 x i1> %g to i8
3619 %i = icmp eq i8 %h, 0
3620 br i1 %i, label %bar, label %exit
3621
3622 bar:
3623 call void @foo()
3624 br label %exit
3625
3626 exit:
3627 ret void
3628 }
3629
3630 define void @ktest_4(<8 x i64> %w, <8 x i64> %x, <8 x i64> %y, <8 x i64> %z) {
3631 ; KNL-LABEL: ktest_4:
3632 ; KNL: ## %bb.0:
3633 ; KNL-NEXT: pushq %rax
3634 ; KNL-NEXT: .cfi_def_cfa_offset 16
3635 ; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k0
3636 ; KNL-NEXT: vptestnmq %zmm1, %zmm1, %k1
3637 ; KNL-NEXT: vptestnmq %zmm2, %zmm2, %k2
3638 ; KNL-NEXT: vptestnmq %zmm3, %zmm3, %k3
3639 ; KNL-NEXT: korw %k1, %k0, %k0
3640 ; KNL-NEXT: korw %k3, %k2, %k1
3641 ; KNL-NEXT: kandw %k1, %k0, %k0
3642 ; KNL-NEXT: kmovw %k0, %eax
3643 ; KNL-NEXT: testb %al, %al
3644 ; KNL-NEXT: je LBB72_1
3645 ; KNL-NEXT: ## %bb.2: ## %exit
3646 ; KNL-NEXT: popq %rax
3647 ; KNL-NEXT: vzeroupper
3648 ; KNL-NEXT: retq
3649 ; KNL-NEXT: LBB72_1: ## %bar
3650 ; KNL-NEXT: vzeroupper
3651 ; KNL-NEXT: callq _foo
3652 ; KNL-NEXT: popq %rax
3653 ; KNL-NEXT: retq
3654 ;
3655 ; SKX-LABEL: ktest_4:
3656 ; SKX: ## %bb.0:
3657 ; SKX-NEXT: pushq %rax
3658 ; SKX-NEXT: .cfi_def_cfa_offset 16
3659 ; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k0
3660 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1
3661 ; SKX-NEXT: korb %k1, %k0, %k0
3662 ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1
3663 ; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k2
3664 ; SKX-NEXT: korb %k2, %k1, %k1
3665 ; SKX-NEXT: kandb %k1, %k0, %k0
3666 ; SKX-NEXT: kortestb %k0, %k0
3667 ; SKX-NEXT: je LBB72_1
3668 ; SKX-NEXT: ## %bb.2: ## %exit
3669 ; SKX-NEXT: popq %rax
3670 ; SKX-NEXT: vzeroupper
3671 ; SKX-NEXT: retq
3672 ; SKX-NEXT: LBB72_1: ## %bar
3673 ; SKX-NEXT: vzeroupper
3674 ; SKX-NEXT: callq _foo
3675 ; SKX-NEXT: popq %rax
3676 ; SKX-NEXT: retq
3677 ;
3678 ; AVX512BW-LABEL: ktest_4:
3679 ; AVX512BW: ## %bb.0:
3680 ; AVX512BW-NEXT: pushq %rax
3681 ; AVX512BW-NEXT: .cfi_def_cfa_offset 16
3682 ; AVX512BW-NEXT: vptestnmq %zmm0, %zmm0, %k0
3683 ; AVX512BW-NEXT: vptestnmq %zmm1, %zmm1, %k1
3684 ; AVX512BW-NEXT: vptestnmq %zmm2, %zmm2, %k2
3685 ; AVX512BW-NEXT: vptestnmq %zmm3, %zmm3, %k3
3686 ; AVX512BW-NEXT: korw %k1, %k0, %k0
3687 ; AVX512BW-NEXT: korw %k3, %k2, %k1
3688 ; AVX512BW-NEXT: kandw %k1, %k0, %k0
3689 ; AVX512BW-NEXT: kmovd %k0, %eax
3690 ; AVX512BW-NEXT: testb %al, %al
3691 ; AVX512BW-NEXT: je LBB72_1
3692 ; AVX512BW-NEXT: ## %bb.2: ## %exit
3693 ; AVX512BW-NEXT: popq %rax
3694 ; AVX512BW-NEXT: vzeroupper
3695 ; AVX512BW-NEXT: retq
3696 ; AVX512BW-NEXT: LBB72_1: ## %bar
3697 ; AVX512BW-NEXT: vzeroupper
3698 ; AVX512BW-NEXT: callq _foo
3699 ; AVX512BW-NEXT: popq %rax
3700 ; AVX512BW-NEXT: retq
3701 ;
3702 ; AVX512DQ-LABEL: ktest_4:
3703 ; AVX512DQ: ## %bb.0:
3704 ; AVX512DQ-NEXT: pushq %rax
3705 ; AVX512DQ-NEXT: .cfi_def_cfa_offset 16
3706 ; AVX512DQ-NEXT: vptestnmq %zmm0, %zmm0, %k0
3707 ; AVX512DQ-NEXT: vptestnmq %zmm1, %zmm1, %k1
3708 ; AVX512DQ-NEXT: korb %k1, %k0, %k0
3709 ; AVX512DQ-NEXT: vptestnmq %zmm2, %zmm2, %k1
3710 ; AVX512DQ-NEXT: vptestnmq %zmm3, %zmm3, %k2
3711 ; AVX512DQ-NEXT: korb %k2, %k1, %k1
3712 ; AVX512DQ-NEXT: kandb %k1, %k0, %k0
3713 ; AVX512DQ-NEXT: kortestb %k0, %k0
3714 ; AVX512DQ-NEXT: je LBB72_1
3715 ; AVX512DQ-NEXT: ## %bb.2: ## %exit
3716 ; AVX512DQ-NEXT: popq %rax
3717 ; AVX512DQ-NEXT: vzeroupper
3718 ; AVX512DQ-NEXT: retq
3719 ; AVX512DQ-NEXT: LBB72_1: ## %bar
3720 ; AVX512DQ-NEXT: vzeroupper
3721 ; AVX512DQ-NEXT: callq _foo
3722 ; AVX512DQ-NEXT: popq %rax
3723 ; AVX512DQ-NEXT: retq
3724 ;
3725 ; X86-LABEL: ktest_4:
3726 ; X86: ## %bb.0:
3727 ; X86-NEXT: subl $12, %esp
3728 ; X86-NEXT: .cfi_def_cfa_offset 16
3729 ; X86-NEXT: vptestnmq %zmm0, %zmm0, %k0
3730 ; X86-NEXT: vptestnmq %zmm1, %zmm1, %k1
3731 ; X86-NEXT: korb %k1, %k0, %k0
3732 ; X86-NEXT: vptestnmq %zmm2, %zmm2, %k1
3733 ; X86-NEXT: vptestnmq %zmm3, %zmm3, %k2
3734 ; X86-NEXT: korb %k2, %k1, %k1
3735 ; X86-NEXT: kandb %k1, %k0, %k0
3736 ; X86-NEXT: kortestb %k0, %k0
3737 ; X86-NEXT: je LBB72_1
3738 ; X86-NEXT: ## %bb.2: ## %exit
3739 ; X86-NEXT: addl $12, %esp
3740 ; X86-NEXT: vzeroupper
3741 ; X86-NEXT: retl
3742 ; X86-NEXT: LBB72_1: ## %bar
3743 ; X86-NEXT: vzeroupper
3744 ; X86-NEXT: calll _foo
3745 ; X86-NEXT: addl $12, %esp
3746 ; X86-NEXT: retl
3747 %a = icmp eq <8 x i64> %w, zeroinitializer
3748 %b = icmp eq <8 x i64> %x, zeroinitializer
3749 %c = icmp eq <8 x i64> %y, zeroinitializer
3750 %d = icmp eq <8 x i64> %z, zeroinitializer
3751 %e = or <8 x i1> %a, %b
3752 %f = or <8 x i1> %c, %d
3753 %g = and <8 x i1> %e, %f
3754 %h = bitcast <8 x i1> %g to i8
3755 %i = icmp eq i8 %h, 0
3756 br i1 %i, label %bar, label %exit
3757
3758 bar:
3759 call void @foo()
3760 br label %exit
3761
3762 exit:
3763 ret void
3764 }
3765
3766 define void @ktest_5(<16 x i32> %w, <16 x i32> %x, <16 x i32> %y, <16 x i32> %z) {
3767 ; CHECK-LABEL: ktest_5:
3768 ; CHECK: ## %bb.0:
3769 ; CHECK-NEXT: pushq %rax
3770 ; CHECK-NEXT: .cfi_def_cfa_offset 16
3771 ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k0
3772 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
3773 ; CHECK-NEXT: korw %k1, %k0, %k0
3774 ; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1
3775 ; CHECK-NEXT: vptestnmd %zmm3, %zmm3, %k2
3776 ; CHECK-NEXT: korw %k2, %k1, %k1
3777 ; CHECK-NEXT: kandw %k1, %k0, %k0
3778 ; CHECK-NEXT: kortestw %k0, %k0
3779 ; CHECK-NEXT: je LBB73_1
3780 ; CHECK-NEXT: ## %bb.2: ## %exit
3781 ; CHECK-NEXT: popq %rax
3782 ; CHECK-NEXT: vzeroupper
3783 ; CHECK-NEXT: retq
3784 ; CHECK-NEXT: LBB73_1: ## %bar
3785 ; CHECK-NEXT: vzeroupper
3786 ; CHECK-NEXT: callq _foo
3787 ; CHECK-NEXT: popq %rax
3788 ; CHECK-NEXT: retq
3789 ;
3790 ; X86-LABEL: ktest_5:
3791 ; X86: ## %bb.0:
3792 ; X86-NEXT: subl $12, %esp
3793 ; X86-NEXT: .cfi_def_cfa_offset 16
3794 ; X86-NEXT: vptestnmd %zmm0, %zmm0, %k0
3795 ; X86-NEXT: vptestnmd %zmm1, %zmm1, %k1
3796 ; X86-NEXT: korw %k1, %k0, %k0
3797 ; X86-NEXT: vptestnmd %zmm2, %zmm2, %k1
3798 ; X86-NEXT: vptestnmd %zmm3, %zmm3, %k2
3799 ; X86-NEXT: korw %k2, %k1, %k1
3800 ; X86-NEXT: kandw %k1, %k0, %k0
3801 ; X86-NEXT: kortestw %k0, %k0
3802 ; X86-NEXT: je LBB73_1
3803 ; X86-NEXT: ## %bb.2: ## %exit
3804 ; X86-NEXT: addl $12, %esp
3805 ; X86-NEXT: vzeroupper
3806 ; X86-NEXT: retl
3807 ; X86-NEXT: LBB73_1: ## %bar
3808 ; X86-NEXT: vzeroupper
3809 ; X86-NEXT: calll _foo
3810 ; X86-NEXT: addl $12, %esp
3811 ; X86-NEXT: retl
3812 %a = icmp eq <16 x i32> %w, zeroinitializer
3813 %b = icmp eq <16 x i32> %x, zeroinitializer
3814 %c = icmp eq <16 x i32> %y, zeroinitializer
3815 %d = icmp eq <16 x i32> %z, zeroinitializer
3816 %e = or <16 x i1> %a, %b
3817 %f = or <16 x i1> %c, %d
3818 %g = and <16 x i1> %e, %f
3819 %h = bitcast <16 x i1> %g to i16
3820 %i = icmp eq i16 %h, 0
3821 br i1 %i, label %bar, label %exit
3822
3823 bar:
3824 call void @foo()
3825 br label %exit
3826
3827 exit:
3828 ret void
3829 }
3830
3831 define void @ktest_6(<32 x i16> %w, <32 x i16> %x, <32 x i16> %y, <32 x i16> %z) {
3832 ; KNL-LABEL: ktest_6:
3833 ; KNL: ## %bb.0:
3834 ; KNL-NEXT: pushq %rax
3835 ; KNL-NEXT: .cfi_def_cfa_offset 16
3836 ; KNL-NEXT: vpxor %xmm8, %xmm8, %xmm8
3837 ; KNL-NEXT: vpcmpeqw %ymm8, %ymm0, %ymm0
3838 ; KNL-NEXT: vpcmpeqw %ymm8, %ymm1, %ymm1
3839 ; KNL-NEXT: vpcmpeqw %ymm8, %ymm2, %ymm2
3840 ; KNL-NEXT: vpor %ymm2, %ymm0, %ymm0
3841 ; KNL-NEXT: vpcmpeqw %ymm8, %ymm3, %ymm2
3842 ; KNL-NEXT: vpor %ymm2, %ymm1, %ymm1
3843 ; KNL-NEXT: vpcmpeqw %ymm8, %ymm4, %ymm2
3844 ; KNL-NEXT: vpcmpeqw %ymm8, %ymm5, %ymm3
3845 ; KNL-NEXT: vpcmpeqw %ymm8, %ymm6, %ymm4
3846 ; KNL-NEXT: vpor %ymm4, %ymm2, %ymm2
3847 ; KNL-NEXT: vpand %ymm2, %ymm0, %ymm0
3848 ; KNL-NEXT: vpcmpeqw %ymm8, %ymm7, %ymm2
3849 ; KNL-NEXT: vpor %ymm2, %ymm3, %ymm2
3850 ; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1
3851 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
3852 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
3853 ; KNL-NEXT: kmovw %k0, %eax
3854 ; KNL-NEXT: vpmovsxwd %ymm1, %zmm0
3855 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
3856 ; KNL-NEXT: kmovw %k0, %ecx
3857 ; KNL-NEXT: shll $16, %ecx
3858 ; KNL-NEXT: orl %eax, %ecx
3859 ; KNL-NEXT: je LBB74_1
3860 ; KNL-NEXT: ## %bb.2: ## %exit
3861 ; KNL-NEXT: popq %rax
3862 ; KNL-NEXT: vzeroupper
3863 ; KNL-NEXT: retq
3864 ; KNL-NEXT: LBB74_1: ## %bar
3865 ; KNL-NEXT: vzeroupper
3866 ; KNL-NEXT: callq _foo
3867 ; KNL-NEXT: popq %rax
3868 ; KNL-NEXT: retq
3869 ;
3870 ; SKX-LABEL: ktest_6:
3871 ; SKX: ## %bb.0:
3872 ; SKX-NEXT: pushq %rax
3873 ; SKX-NEXT: .cfi_def_cfa_offset 16
3874 ; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k0
3875 ; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1
3876 ; SKX-NEXT: kord %k1, %k0, %k0
3877 ; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1
3878 ; SKX-NEXT: vptestnmw %zmm3, %zmm3, %k2
3879 ; SKX-NEXT: kord %k2, %k1, %k1
3880 ; SKX-NEXT: kandd %k1, %k0, %k0
3881 ; SKX-NEXT: kortestd %k0, %k0
3882 ; SKX-NEXT: je LBB74_1
3883 ; SKX-NEXT: ## %bb.2: ## %exit
3884 ; SKX-NEXT: popq %rax
3885 ; SKX-NEXT: vzeroupper
3886 ; SKX-NEXT: retq
3887 ; SKX-NEXT: LBB74_1: ## %bar
3888 ; SKX-NEXT: vzeroupper
3889 ; SKX-NEXT: callq _foo
3890 ; SKX-NEXT: popq %rax
3891 ; SKX-NEXT: retq
3892 ;
3893 ; AVX512BW-LABEL: ktest_6:
3894 ; AVX512BW: ## %bb.0:
3895 ; AVX512BW-NEXT: pushq %rax
3896 ; AVX512BW-NEXT: .cfi_def_cfa_offset 16
3897 ; AVX512BW-NEXT: vptestnmw %zmm0, %zmm0, %k0
3898 ; AVX512BW-NEXT: vptestnmw %zmm1, %zmm1, %k1
3899 ; AVX512BW-NEXT: kord %k1, %k0, %k0
3900 ; AVX512BW-NEXT: vptestnmw %zmm2, %zmm2, %k1
3901 ; AVX512BW-NEXT: vptestnmw %zmm3, %zmm3, %k2
3902 ; AVX512BW-NEXT: kord %k2, %k1, %k1
3903 ; AVX512BW-NEXT: kandd %k1, %k0, %k0
3904 ; AVX512BW-NEXT: kortestd %k0, %k0
3905 ; AVX512BW-NEXT: je LBB74_1
3906 ; AVX512BW-NEXT: ## %bb.2: ## %exit
3907 ; AVX512BW-NEXT: popq %rax
3908 ; AVX512BW-NEXT: vzeroupper
3909 ; AVX512BW-NEXT: retq
3910 ; AVX512BW-NEXT: LBB74_1: ## %bar
3911 ; AVX512BW-NEXT: vzeroupper
3912 ; AVX512BW-NEXT: callq _foo
3913 ; AVX512BW-NEXT: popq %rax
3914 ; AVX512BW-NEXT: retq
3915 ;
3916 ; AVX512DQ-LABEL: ktest_6:
3917 ; AVX512DQ: ## %bb.0:
3918 ; AVX512DQ-NEXT: pushq %rax
3919 ; AVX512DQ-NEXT: .cfi_def_cfa_offset 16
3920 ; AVX512DQ-NEXT: vpxor %xmm8, %xmm8, %xmm8
3921 ; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm0, %ymm0
3922 ; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm1, %ymm1
3923 ; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm2, %ymm2
3924 ; AVX512DQ-NEXT: vpor %ymm2, %ymm0, %ymm0
3925 ; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm3, %ymm2
3926 ; AVX512DQ-NEXT: vpor %ymm2, %ymm1, %ymm1
3927 ; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm4, %ymm2
3928 ; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm5, %ymm3
3929 ; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm6, %ymm4
3930 ; AVX512DQ-NEXT: vpor %ymm4, %ymm2, %ymm2
3931 ; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm0
3932 ; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm7, %ymm2
3933 ; AVX512DQ-NEXT: vpor %ymm2, %ymm3, %ymm2
3934 ; AVX512DQ-NEXT: vpand %ymm2, %ymm1, %ymm1
3935 ; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0
3936 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
3937 ; AVX512DQ-NEXT: kmovw %k0, %eax
3938 ; AVX512DQ-NEXT: vpmovsxwd %ymm1, %zmm0
3939 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
3940 ; AVX512DQ-NEXT: kmovw %k0, %ecx
3941 ; AVX512DQ-NEXT: shll $16, %ecx
3942 ; AVX512DQ-NEXT: orl %eax, %ecx
3943 ; AVX512DQ-NEXT: je LBB74_1
3944 ; AVX512DQ-NEXT: ## %bb.2: ## %exit
3945 ; AVX512DQ-NEXT: popq %rax
3946 ; AVX512DQ-NEXT: vzeroupper
3947 ; AVX512DQ-NEXT: retq
3948 ; AVX512DQ-NEXT: LBB74_1: ## %bar
3949 ; AVX512DQ-NEXT: vzeroupper
3950 ; AVX512DQ-NEXT: callq _foo
3951 ; AVX512DQ-NEXT: popq %rax
3952 ; AVX512DQ-NEXT: retq
3953 ;
3954 ; X86-LABEL: ktest_6:
3955 ; X86: ## %bb.0:
3956 ; X86-NEXT: subl $12, %esp
3957 ; X86-NEXT: .cfi_def_cfa_offset 16
3958 ; X86-NEXT: vptestnmw %zmm0, %zmm0, %k0
3959 ; X86-NEXT: vptestnmw %zmm1, %zmm1, %k1
3960 ; X86-NEXT: kord %k1, %k0, %k0
3961 ; X86-NEXT: vptestnmw %zmm2, %zmm2, %k1
3962 ; X86-NEXT: vptestnmw %zmm3, %zmm3, %k2
3963 ; X86-NEXT: kord %k2, %k1, %k1
3964 ; X86-NEXT: kandd %k1, %k0, %k0
3965 ; X86-NEXT: kortestd %k0, %k0
3966 ; X86-NEXT: je LBB74_1
3967 ; X86-NEXT: ## %bb.2: ## %exit
3968 ; X86-NEXT: addl $12, %esp
3969 ; X86-NEXT: vzeroupper
3970 ; X86-NEXT: retl
3971 ; X86-NEXT: LBB74_1: ## %bar
3972 ; X86-NEXT: vzeroupper
3973 ; X86-NEXT: calll _foo
3974 ; X86-NEXT: addl $12, %esp
3975 ; X86-NEXT: retl
3976 %a = icmp eq <32 x i16> %w, zeroinitializer
3977 %b = icmp eq <32 x i16> %x, zeroinitializer
3978 %c = icmp eq <32 x i16> %y, zeroinitializer
3979 %d = icmp eq <32 x i16> %z, zeroinitializer
3980 %e = or <32 x i1> %a, %b
3981 %f = or <32 x i1> %c, %d
3982 %g = and <32 x i1> %e, %f
3983 %h = bitcast <32 x i1> %g to i32
3984 %i = icmp eq i32 %h, 0
3985 br i1 %i, label %bar, label %exit
3986
3987 bar:
3988 call void @foo()
3989 br label %exit
3990
3991 exit:
3992 ret void
3993 }
3994
3995 define void @ktest_7(<64 x i8> %w, <64 x i8> %x, <64 x i8> %y, <64 x i8> %z) {
3996 ; KNL-LABEL: ktest_7:
3997 ; KNL: ## %bb.0:
3998 ; KNL-NEXT: pushq %rax
3999 ; KNL-NEXT: .cfi_def_cfa_offset 16
4000 ; KNL-NEXT: vpxor %xmm8, %xmm8, %xmm8
4001 ; KNL-NEXT: vpcmpeqb %ymm8, %ymm0, %ymm9
4002 ; KNL-NEXT: vextracti128 $1, %ymm9, %xmm0
4003 ; KNL-NEXT: vpcmpeqb %ymm8, %ymm1, %ymm10
4004 ; KNL-NEXT: vextracti128 $1, %ymm10, %xmm1
4005 ; KNL-NEXT: vpcmpeqb %ymm8, %ymm2, %ymm11
4006 ; KNL-NEXT: vextracti128 $1, %ymm11, %xmm2
4007 ; KNL-NEXT: vpor %xmm2, %xmm0, %xmm13
4008 ; KNL-NEXT: vpcmpeqb %ymm8, %ymm3, %ymm2
4009 ; KNL-NEXT: vextracti128 $1, %ymm2, %xmm3
4010 ; KNL-NEXT: vpor %xmm3, %xmm1, %xmm12
4011 ; KNL-NEXT: vpcmpeqb %ymm8, %ymm4, %ymm3
4012 ; KNL-NEXT: vextracti128 $1, %ymm3, %xmm4
4013 ; KNL-NEXT: vpcmpeqb %ymm8, %ymm5, %ymm5
4014 ; KNL-NEXT: vextracti128 $1, %ymm5, %xmm1
4015 ; KNL-NEXT: vpcmpeqb %ymm8, %ymm6, %ymm6
4016 ; KNL-NEXT: vextracti128 $1, %ymm6, %xmm0
4017 ; KNL-NEXT: vpor %xmm0, %xmm4, %xmm0
4018 ; KNL-NEXT: vpand %xmm0, %xmm13, %xmm0
4019 ; KNL-NEXT: vpcmpeqb %ymm8, %ymm7, %ymm4
4020 ; KNL-NEXT: vextracti128 $1, %ymm4, %xmm7
4021 ; KNL-NEXT: vpor %xmm7, %xmm1, %xmm1
4022 ; KNL-NEXT: vpand %xmm1, %xmm12, %xmm1
4023 ; KNL-NEXT: vpor %xmm2, %xmm10, %xmm2
4024 ; KNL-NEXT: vpor %xmm11, %xmm9, %xmm7
4025 ; KNL-NEXT: vpor %xmm4, %xmm5, %xmm4
4026 ; KNL-NEXT: vpand %xmm4, %xmm2, %xmm2
4027 ; KNL-NEXT: vpor %xmm6, %xmm3, %xmm3
4028 ; KNL-NEXT: vpand %xmm3, %xmm7, %xmm3
4029 ; KNL-NEXT: vpmovsxbd %xmm3, %zmm3
4030 ; KNL-NEXT: vptestmd %zmm3, %zmm3, %k0
4031 ; KNL-NEXT: kmovw %k0, %eax
4032 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
4033 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
4034 ; KNL-NEXT: kmovw %k0, %ecx
4035 ; KNL-NEXT: shll $16, %ecx
4036 ; KNL-NEXT: orl %eax, %ecx
4037 ; KNL-NEXT: vpmovsxbd %xmm2, %zmm0
4038 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
4039 ; KNL-NEXT: kmovw %k0, %eax
4040 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm0
4041 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
4042 ; KNL-NEXT: kmovw %k0, %edx
4043 ; KNL-NEXT: shll $16, %edx
4044 ; KNL-NEXT: orl %eax, %edx
4045 ; KNL-NEXT: shlq $32, %rdx
4046 ; KNL-NEXT: orq %rcx, %rdx
4047 ; KNL-NEXT: je LBB75_1
4048 ; KNL-NEXT: ## %bb.2: ## %exit
4049 ; KNL-NEXT: popq %rax
4050 ; KNL-NEXT: vzeroupper
4051 ; KNL-NEXT: retq
4052 ; KNL-NEXT: LBB75_1: ## %bar
4053 ; KNL-NEXT: vzeroupper
4054 ; KNL-NEXT: callq _foo
4055 ; KNL-NEXT: popq %rax
4056 ; KNL-NEXT: retq
4057 ;
4058 ; SKX-LABEL: ktest_7:
4059 ; SKX: ## %bb.0:
4060 ; SKX-NEXT: pushq %rax
4061 ; SKX-NEXT: .cfi_def_cfa_offset 16
4062 ; SKX-NEXT: vptestnmb %zmm0, %zmm0, %k0
4063 ; SKX-NEXT: vptestnmb %zmm1, %zmm1, %k1
4064 ; SKX-NEXT: korq %k1, %k0, %k0
4065 ; SKX-NEXT: vptestnmb %zmm2, %zmm2, %k1
4066 ; SKX-NEXT: vptestnmb %zmm3, %zmm3, %k2
4067 ; SKX-NEXT: korq %k2, %k1, %k1
4068 ; SKX-NEXT: kandq %k1, %k0, %k0
4069 ; SKX-NEXT: kortestq %k0, %k0
4070 ; SKX-NEXT: je LBB75_1
4071 ; SKX-NEXT: ## %bb.2: ## %exit
4072 ; SKX-NEXT: popq %rax
4073 ; SKX-NEXT: vzeroupper
4074 ; SKX-NEXT: retq
4075 ; SKX-NEXT: LBB75_1: ## %bar
4076 ; SKX-NEXT: vzeroupper
4077 ; SKX-NEXT: callq _foo
4078 ; SKX-NEXT: popq %rax
4079 ; SKX-NEXT: retq
4080 ;
4081 ; AVX512BW-LABEL: ktest_7:
4082 ; AVX512BW: ## %bb.0:
4083 ; AVX512BW-NEXT: pushq %rax
4084 ; AVX512BW-NEXT: .cfi_def_cfa_offset 16
4085 ; AVX512BW-NEXT: vptestnmb %zmm0, %zmm0, %k0
4086 ; AVX512BW-NEXT: vptestnmb %zmm1, %zmm1, %k1
4087 ; AVX512BW-NEXT: korq %k1, %k0, %k0
4088 ; AVX512BW-NEXT: vptestnmb %zmm2, %zmm2, %k1
4089 ; AVX512BW-NEXT: vptestnmb %zmm3, %zmm3, %k2
4090 ; AVX512BW-NEXT: korq %k2, %k1, %k1
4091 ; AVX512BW-NEXT: kandq %k1, %k0, %k0
4092 ; AVX512BW-NEXT: kortestq %k0, %k0
4093 ; AVX512BW-NEXT: je LBB75_1
4094 ; AVX512BW-NEXT: ## %bb.2: ## %exit
4095 ; AVX512BW-NEXT: popq %rax
4096 ; AVX512BW-NEXT: vzeroupper
4097 ; AVX512BW-NEXT: retq
4098 ; AVX512BW-NEXT: LBB75_1: ## %bar
4099 ; AVX512BW-NEXT: vzeroupper
4100 ; AVX512BW-NEXT: callq _foo
4101 ; AVX512BW-NEXT: popq %rax
4102 ; AVX512BW-NEXT: retq
4103 ;
4104 ; AVX512DQ-LABEL: ktest_7:
4105 ; AVX512DQ: ## %bb.0:
4106 ; AVX512DQ-NEXT: pushq %rax
4107 ; AVX512DQ-NEXT: .cfi_def_cfa_offset 16
4108 ; AVX512DQ-NEXT: vpxor %xmm8, %xmm8, %xmm8
4109 ; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm0, %ymm9
4110 ; AVX512DQ-NEXT: vextracti128 $1, %ymm9, %xmm0
4111 ; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm1, %ymm10
4112 ; AVX512DQ-NEXT: vextracti128 $1, %ymm10, %xmm1
4113 ; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm2, %ymm11
4114 ; AVX512DQ-NEXT: vextracti128 $1, %ymm11, %xmm2
4115 ; AVX512DQ-NEXT: vpor %xmm2, %xmm0, %xmm13
4116 ; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm3, %ymm2
4117 ; AVX512DQ-NEXT: vextracti128 $1, %ymm2, %xmm3
4118 ; AVX512DQ-NEXT: vpor %xmm3, %xmm1, %xmm12
4119 ; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm4, %ymm3
4120 ; AVX512DQ-NEXT: vextracti128 $1, %ymm3, %xmm4
4121 ; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm5, %ymm5
4122 ; AVX512DQ-NEXT: vextracti128 $1, %ymm5, %xmm1
4123 ; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm6, %ymm6
4124 ; AVX512DQ-NEXT: vextracti128 $1, %ymm6, %xmm0
4125 ; AVX512DQ-NEXT: vpor %xmm0, %xmm4, %xmm0
4126 ; AVX512DQ-NEXT: vpand %xmm0, %xmm13, %xmm0
4127 ; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm7, %ymm4
4128 ; AVX512DQ-NEXT: vextracti128 $1, %ymm4, %xmm7
4129 ; AVX512DQ-NEXT: vpor %xmm7, %xmm1, %xmm1
4130 ; AVX512DQ-NEXT: vpand %xmm1, %xmm12, %xmm1
4131 ; AVX512DQ-NEXT: vpor %xmm2, %xmm10, %xmm2
4132 ; AVX512DQ-NEXT: vpor %xmm11, %xmm9, %xmm7
4133 ; AVX512DQ-NEXT: vpor %xmm4, %xmm5, %xmm4
4134 ; AVX512DQ-NEXT: vpand %xmm4, %xmm2, %xmm2
4135 ; AVX512DQ-NEXT: vpor %xmm6, %xmm3, %xmm3
4136 ; AVX512DQ-NEXT: vpand %xmm3, %xmm7, %xmm3
4137 ; AVX512DQ-NEXT: vpmovsxbd %xmm3, %zmm3
4138 ; AVX512DQ-NEXT: vpmovd2m %zmm3, %k0
4139 ; AVX512DQ-NEXT: kmovw %k0, %eax
4140 ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
4141 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
4142 ; AVX512DQ-NEXT: kmovw %k0, %ecx
4143 ; AVX512DQ-NEXT: shll $16, %ecx
4144 ; AVX512DQ-NEXT: orl %eax, %ecx
4145 ; AVX512DQ-NEXT: vpmovsxbd %xmm2, %zmm0
4146 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
4147 ; AVX512DQ-NEXT: kmovw %k0, %eax
4148 ; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm0
4149 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
4150 ; AVX512DQ-NEXT: kmovw %k0, %edx
4151 ; AVX512DQ-NEXT: shll $16, %edx
4152 ; AVX512DQ-NEXT: orl %eax, %edx
4153 ; AVX512DQ-NEXT: shlq $32, %rdx
4154 ; AVX512DQ-NEXT: orq %rcx, %rdx
4155 ; AVX512DQ-NEXT: je LBB75_1
4156 ; AVX512DQ-NEXT: ## %bb.2: ## %exit
4157 ; AVX512DQ-NEXT: popq %rax
4158 ; AVX512DQ-NEXT: vzeroupper
4159 ; AVX512DQ-NEXT: retq
4160 ; AVX512DQ-NEXT: LBB75_1: ## %bar
4161 ; AVX512DQ-NEXT: vzeroupper
4162 ; AVX512DQ-NEXT: callq _foo
4163 ; AVX512DQ-NEXT: popq %rax
4164 ; AVX512DQ-NEXT: retq
4165 ;
4166 ; X86-LABEL: ktest_7:
4167 ; X86: ## %bb.0:
4168 ; X86-NEXT: subl $12, %esp
4169 ; X86-NEXT: .cfi_def_cfa_offset 16
4170 ; X86-NEXT: vptestnmb %zmm0, %zmm0, %k0
4171 ; X86-NEXT: vptestnmb %zmm1, %zmm1, %k1
4172 ; X86-NEXT: korq %k1, %k0, %k0
4173 ; X86-NEXT: vptestnmb %zmm2, %zmm2, %k1
4174 ; X86-NEXT: vptestnmb %zmm3, %zmm3, %k2
4175 ; X86-NEXT: korq %k2, %k1, %k1
4176 ; X86-NEXT: kandq %k1, %k0, %k0
4177 ; X86-NEXT: kshiftrq $32, %k0, %k1
4178 ; X86-NEXT: kortestd %k1, %k0
4179 ; X86-NEXT: je LBB75_1
4180 ; X86-NEXT: ## %bb.2: ## %exit
4181 ; X86-NEXT: addl $12, %esp
4182 ; X86-NEXT: vzeroupper
4183 ; X86-NEXT: retl
4184 ; X86-NEXT: LBB75_1: ## %bar
4185 ; X86-NEXT: vzeroupper
4186 ; X86-NEXT: calll _foo
4187 ; X86-NEXT: addl $12, %esp
4188 ; X86-NEXT: retl
4189 %a = icmp eq <64 x i8> %w, zeroinitializer
4190 %b = icmp eq <64 x i8> %x, zeroinitializer
4191 %c = icmp eq <64 x i8> %y, zeroinitializer
4192 %d = icmp eq <64 x i8> %z, zeroinitializer
4193 %e = or <64 x i1> %a, %b
4194 %f = or <64 x i1> %c, %d
4195 %g = and <64 x i1> %e, %f
4196 %h = bitcast <64 x i1> %g to i64
4197 %i = icmp eq i64 %h, 0
4198 br i1 %i, label %bar, label %exit
4199
4200 bar:
4201 call void @foo()
4202 br label %exit
4203
4204 exit:
4205 ret void
4206 }