llvm.org GIT mirror llvm / 4fc2808
[x86] add more tests for potential horizontal ops; NFC As discussed in D56011 - add runs for AVX512 and tests with extra uses. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@350221 91177308-0d34-0410-b5e6-96231b3b80d8 Sanjay Patel 9 months ago
1 changed file(s) with 376 addition(s) and 140 deletion(s). Raw diff Collapse all Expand all
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse3 | FileCheck %s --check-prefixes=SSE3,SSE3-SLOW
2 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse3,fast-hops | FileCheck %s --check-prefixes=SSE3,SSE3-FAST
3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX-SLOW
4 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx,fast-hops | FileCheck %s --check-prefixes=AVX,AVX-FAST
1 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse3 | FileCheck %s --check-prefixes=SSE3,SSE3-SLOW
2 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse3,fast-hops | FileCheck %s --check-prefixes=SSE3,SSE3-FAST
3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX-SLOW,AVX1-SLOW
4 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx,fast-hops | FileCheck %s --check-prefixes=AVX,AVX-FAST,AVX1-FAST
5 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX-SLOW,AVX512-SLOW
6 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,fast-hops | FileCheck %s --check-prefixes=AVX,AVX-FAST,AVX512-FAST
57
68 define <2 x double> @haddpd1(<2 x double> %x, <2 x double> %y) {
79 ; SSE3-LABEL: haddpd1:
582584 ret <2 x float> %res1
583585 }
584586
587 ; 128-bit vectors, float/double, fadd/fsub
588
585589 define float @extract_extract_v4f32_fadd_f32(<4 x float> %x) {
586590 ; SSE3-LABEL: extract_extract_v4f32_fadd_f32:
587591 ; SSE3: # %bb.0:
615619 %x0 = extractelement <4 x float> %x, i32 0
616620 %x1 = extractelement <4 x float> %x, i32 1
617621 %x01 = fadd float %x1, %x0
618 ret float %x01
619 }
620
621 define float @extract_extract_v8f32_fadd_f32(<8 x float> %x) {
622 ; SSE3-LABEL: extract_extract_v8f32_fadd_f32:
623 ; SSE3: # %bb.0:
624 ; SSE3-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
625 ; SSE3-NEXT: addss %xmm1, %xmm0
626 ; SSE3-NEXT: retq
627 ;
628 ; AVX-LABEL: extract_extract_v8f32_fadd_f32:
629 ; AVX: # %bb.0:
630 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
631 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
632 ; AVX-NEXT: vzeroupper
633 ; AVX-NEXT: retq
634 %x0 = extractelement <8 x float> %x, i32 0
635 %x1 = extractelement <8 x float> %x, i32 1
636 %x01 = fadd float %x0, %x1
637 ret float %x01
638 }
639
640 define float @extract_extract_v8f32_fadd_f32_commute(<8 x float> %x) {
641 ; SSE3-LABEL: extract_extract_v8f32_fadd_f32_commute:
642 ; SSE3: # %bb.0:
643 ; SSE3-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
644 ; SSE3-NEXT: addss %xmm1, %xmm0
645 ; SSE3-NEXT: retq
646 ;
647 ; AVX-LABEL: extract_extract_v8f32_fadd_f32_commute:
648 ; AVX: # %bb.0:
649 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
650 ; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0
651 ; AVX-NEXT: vzeroupper
652 ; AVX-NEXT: retq
653 %x0 = extractelement <8 x float> %x, i32 0
654 %x1 = extractelement <8 x float> %x, i32 1
655 %x01 = fadd float %x1, %x0
656 ret float %x01
657 }
658
659 define float @extract_extract_v4f32_fsub_f32(<4 x float> %x) {
660 ; SSE3-LABEL: extract_extract_v4f32_fsub_f32:
661 ; SSE3: # %bb.0:
662 ; SSE3-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
663 ; SSE3-NEXT: subss %xmm1, %xmm0
664 ; SSE3-NEXT: retq
665 ;
666 ; AVX-LABEL: extract_extract_v4f32_fsub_f32:
667 ; AVX: # %bb.0:
668 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
669 ; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0
670 ; AVX-NEXT: retq
671 %x0 = extractelement <4 x float> %x, i32 0
672 %x1 = extractelement <4 x float> %x, i32 1
673 %x01 = fsub float %x0, %x1
674 ret float %x01
675 }
676
677 define float @extract_extract_v4f32_fsub_f32_commute(<4 x float> %x) {
678 ; SSE3-LABEL: extract_extract_v4f32_fsub_f32_commute:
679 ; SSE3: # %bb.0:
680 ; SSE3-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
681 ; SSE3-NEXT: subss %xmm0, %xmm1
682 ; SSE3-NEXT: movaps %xmm1, %xmm0
683 ; SSE3-NEXT: retq
684 ;
685 ; AVX-LABEL: extract_extract_v4f32_fsub_f32_commute:
686 ; AVX: # %bb.0:
687 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
688 ; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0
689 ; AVX-NEXT: retq
690 %x0 = extractelement <4 x float> %x, i32 0
691 %x1 = extractelement <4 x float> %x, i32 1
692 %x01 = fsub float %x1, %x0
693 ret float %x01
694 }
695
696 define float @extract_extract_v8f32_fsub_f32(<8 x float> %x) {
697 ; SSE3-LABEL: extract_extract_v8f32_fsub_f32:
698 ; SSE3: # %bb.0:
699 ; SSE3-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
700 ; SSE3-NEXT: subss %xmm1, %xmm0
701 ; SSE3-NEXT: retq
702 ;
703 ; AVX-LABEL: extract_extract_v8f32_fsub_f32:
704 ; AVX: # %bb.0:
705 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
706 ; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0
707 ; AVX-NEXT: vzeroupper
708 ; AVX-NEXT: retq
709 %x0 = extractelement <8 x float> %x, i32 0
710 %x1 = extractelement <8 x float> %x, i32 1
711 %x01 = fsub float %x0, %x1
712 ret float %x01
713 }
714
715 define float @extract_extract_v8f32_fsub_f32_commute(<8 x float> %x) {
716 ; SSE3-LABEL: extract_extract_v8f32_fsub_f32_commute:
717 ; SSE3: # %bb.0:
718 ; SSE3-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
719 ; SSE3-NEXT: subss %xmm0, %xmm1
720 ; SSE3-NEXT: movaps %xmm1, %xmm0
721 ; SSE3-NEXT: retq
722 ;
723 ; AVX-LABEL: extract_extract_v8f32_fsub_f32_commute:
724 ; AVX: # %bb.0:
725 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
726 ; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0
727 ; AVX-NEXT: vzeroupper
728 ; AVX-NEXT: retq
729 %x0 = extractelement <8 x float> %x, i32 0
730 %x1 = extractelement <8 x float> %x, i32 1
731 %x01 = fsub float %x1, %x0
732622 ret float %x01
733623 }
734624
772662 ret double %x01
773663 }
774664
665 define float @extract_extract_v4f32_fsub_f32(<4 x float> %x) {
666 ; SSE3-LABEL: extract_extract_v4f32_fsub_f32:
667 ; SSE3: # %bb.0:
668 ; SSE3-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
669 ; SSE3-NEXT: subss %xmm1, %xmm0
670 ; SSE3-NEXT: retq
671 ;
672 ; AVX-LABEL: extract_extract_v4f32_fsub_f32:
673 ; AVX: # %bb.0:
674 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
675 ; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0
676 ; AVX-NEXT: retq
677 %x0 = extractelement <4 x float> %x, i32 0
678 %x1 = extractelement <4 x float> %x, i32 1
679 %x01 = fsub float %x0, %x1
680 ret float %x01
681 }
682
683 define float @extract_extract_v4f32_fsub_f32_commute(<4 x float> %x) {
684 ; SSE3-LABEL: extract_extract_v4f32_fsub_f32_commute:
685 ; SSE3: # %bb.0:
686 ; SSE3-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
687 ; SSE3-NEXT: subss %xmm0, %xmm1
688 ; SSE3-NEXT: movaps %xmm1, %xmm0
689 ; SSE3-NEXT: retq
690 ;
691 ; AVX-LABEL: extract_extract_v4f32_fsub_f32_commute:
692 ; AVX: # %bb.0:
693 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
694 ; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0
695 ; AVX-NEXT: retq
696 %x0 = extractelement <4 x float> %x, i32 0
697 %x1 = extractelement <4 x float> %x, i32 1
698 %x01 = fsub float %x1, %x0
699 ret float %x01
700 }
701
702 define double @extract_extract_v2f64_fsub_f64(<2 x double> %x) {
703 ; SSE3-LABEL: extract_extract_v2f64_fsub_f64:
704 ; SSE3: # %bb.0:
705 ; SSE3-NEXT: movapd %xmm0, %xmm1
706 ; SSE3-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
707 ; SSE3-NEXT: subsd %xmm1, %xmm0
708 ; SSE3-NEXT: retq
709 ;
710 ; AVX-LABEL: extract_extract_v2f64_fsub_f64:
711 ; AVX: # %bb.0:
712 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
713 ; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0
714 ; AVX-NEXT: retq
715 %x0 = extractelement <2 x double> %x, i32 0
716 %x1 = extractelement <2 x double> %x, i32 1
717 %x01 = fsub double %x0, %x1
718 ret double %x01
719 }
720
721 define double @extract_extract_v2f64_fsub_f64_commute(<2 x double> %x) {
722 ; SSE3-LABEL: extract_extract_v2f64_fsub_f64_commute:
723 ; SSE3: # %bb.0:
724 ; SSE3-NEXT: movapd %xmm0, %xmm1
725 ; SSE3-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
726 ; SSE3-NEXT: subsd %xmm0, %xmm1
727 ; SSE3-NEXT: movapd %xmm1, %xmm0
728 ; SSE3-NEXT: retq
729 ;
730 ; AVX-LABEL: extract_extract_v2f64_fsub_f64_commute:
731 ; AVX: # %bb.0:
732 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
733 ; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0
734 ; AVX-NEXT: retq
735 %x0 = extractelement <2 x double> %x, i32 0
736 %x1 = extractelement <2 x double> %x, i32 1
737 %x01 = fsub double %x1, %x0
738 ret double %x01
739 }
740
741 ; 256-bit vectors, float/double, fadd/fsub
742
743 define float @extract_extract_v8f32_fadd_f32(<8 x float> %x) {
744 ; SSE3-LABEL: extract_extract_v8f32_fadd_f32:
745 ; SSE3: # %bb.0:
746 ; SSE3-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
747 ; SSE3-NEXT: addss %xmm1, %xmm0
748 ; SSE3-NEXT: retq
749 ;
750 ; AVX-LABEL: extract_extract_v8f32_fadd_f32:
751 ; AVX: # %bb.0:
752 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
753 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
754 ; AVX-NEXT: vzeroupper
755 ; AVX-NEXT: retq
756 %x0 = extractelement <8 x float> %x, i32 0
757 %x1 = extractelement <8 x float> %x, i32 1
758 %x01 = fadd float %x0, %x1
759 ret float %x01
760 }
761
762 define float @extract_extract_v8f32_fadd_f32_commute(<8 x float> %x) {
763 ; SSE3-LABEL: extract_extract_v8f32_fadd_f32_commute:
764 ; SSE3: # %bb.0:
765 ; SSE3-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
766 ; SSE3-NEXT: addss %xmm1, %xmm0
767 ; SSE3-NEXT: retq
768 ;
769 ; AVX-LABEL: extract_extract_v8f32_fadd_f32_commute:
770 ; AVX: # %bb.0:
771 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
772 ; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0
773 ; AVX-NEXT: vzeroupper
774 ; AVX-NEXT: retq
775 %x0 = extractelement <8 x float> %x, i32 0
776 %x1 = extractelement <8 x float> %x, i32 1
777 %x01 = fadd float %x1, %x0
778 ret float %x01
779 }
780
775781 define double @extract_extract_v4f64_fadd_f64(<4 x double> %x) {
776782 ; SSE3-LABEL: extract_extract_v4f64_fadd_f64:
777783 ; SSE3: # %bb.0:
814820 ret double %x01
815821 }
816822
817 define double @extract_extract_v2f64_fsub_f64(<2 x double> %x) {
818 ; SSE3-LABEL: extract_extract_v2f64_fsub_f64:
823 define float @extract_extract_v8f32_fsub_f32(<8 x float> %x) {
824 ; SSE3-LABEL: extract_extract_v8f32_fsub_f32:
825 ; SSE3: # %bb.0:
826 ; SSE3-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
827 ; SSE3-NEXT: subss %xmm1, %xmm0
828 ; SSE3-NEXT: retq
829 ;
830 ; AVX-LABEL: extract_extract_v8f32_fsub_f32:
831 ; AVX: # %bb.0:
832 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
833 ; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0
834 ; AVX-NEXT: vzeroupper
835 ; AVX-NEXT: retq
836 %x0 = extractelement <8 x float> %x, i32 0
837 %x1 = extractelement <8 x float> %x, i32 1
838 %x01 = fsub float %x0, %x1
839 ret float %x01
840 }
841
842 define float @extract_extract_v8f32_fsub_f32_commute(<8 x float> %x) {
843 ; SSE3-LABEL: extract_extract_v8f32_fsub_f32_commute:
844 ; SSE3: # %bb.0:
845 ; SSE3-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
846 ; SSE3-NEXT: subss %xmm0, %xmm1
847 ; SSE3-NEXT: movaps %xmm1, %xmm0
848 ; SSE3-NEXT: retq
849 ;
850 ; AVX-LABEL: extract_extract_v8f32_fsub_f32_commute:
851 ; AVX: # %bb.0:
852 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
853 ; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0
854 ; AVX-NEXT: vzeroupper
855 ; AVX-NEXT: retq
856 %x0 = extractelement <8 x float> %x, i32 0
857 %x1 = extractelement <8 x float> %x, i32 1
858 %x01 = fsub float %x1, %x0
859 ret float %x01
860 }
861
862 define double @extract_extract_v4f64_fsub_f64(<4 x double> %x) {
863 ; SSE3-LABEL: extract_extract_v4f64_fsub_f64:
819864 ; SSE3: # %bb.0:
820865 ; SSE3-NEXT: movapd %xmm0, %xmm1
821866 ; SSE3-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
822867 ; SSE3-NEXT: subsd %xmm1, %xmm0
823868 ; SSE3-NEXT: retq
824869 ;
825 ; AVX-LABEL: extract_extract_v2f64_fsub_f64:
870 ; AVX-LABEL: extract_extract_v4f64_fsub_f64:
826871 ; AVX: # %bb.0:
827872 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
828873 ; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0
829 ; AVX-NEXT: retq
830 %x0 = extractelement <2 x double> %x, i32 0
831 %x1 = extractelement <2 x double> %x, i32 1
874 ; AVX-NEXT: vzeroupper
875 ; AVX-NEXT: retq
876 %x0 = extractelement <4 x double> %x, i32 0
877 %x1 = extractelement <4 x double> %x, i32 1
832878 %x01 = fsub double %x0, %x1
833879 ret double %x01
834880 }
835881
836 define double @extract_extract_v2f64_fsub_f64_commute(<2 x double> %x) {
837 ; SSE3-LABEL: extract_extract_v2f64_fsub_f64_commute:
882 define double @extract_extract_v4f64_fsub_f64_commute(<4 x double> %x) {
883 ; SSE3-LABEL: extract_extract_v4f64_fsub_f64_commute:
838884 ; SSE3: # %bb.0:
839885 ; SSE3-NEXT: movapd %xmm0, %xmm1
840886 ; SSE3-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
842888 ; SSE3-NEXT: movapd %xmm1, %xmm0
843889 ; SSE3-NEXT: retq
844890 ;
845 ; AVX-LABEL: extract_extract_v2f64_fsub_f64_commute:
891 ; AVX-LABEL: extract_extract_v4f64_fsub_f64_commute:
846892 ; AVX: # %bb.0:
847893 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
848894 ; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0
849 ; AVX-NEXT: retq
850 %x0 = extractelement <2 x double> %x, i32 0
851 %x1 = extractelement <2 x double> %x, i32 1
895 ; AVX-NEXT: vzeroupper
896 ; AVX-NEXT: retq
897 %x0 = extractelement <4 x double> %x, i32 0
898 %x1 = extractelement <4 x double> %x, i32 1
852899 %x01 = fsub double %x1, %x0
853900 ret double %x01
854901 }
855902
856 define double @extract_extract_v4f64_fsub_f64(<4 x double> %x) {
857 ; SSE3-LABEL: extract_extract_v4f64_fsub_f64:
903 ; 512-bit vectors, float/double, fadd/fsub
904
905 define float @extract_extract_v16f32_fadd_f32(<16 x float> %x) {
906 ; SSE3-LABEL: extract_extract_v16f32_fadd_f32:
907 ; SSE3: # %bb.0:
908 ; SSE3-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
909 ; SSE3-NEXT: addss %xmm1, %xmm0
910 ; SSE3-NEXT: retq
911 ;
912 ; AVX-LABEL: extract_extract_v16f32_fadd_f32:
913 ; AVX: # %bb.0:
914 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
915 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
916 ; AVX-NEXT: vzeroupper
917 ; AVX-NEXT: retq
918 %x0 = extractelement <16 x float> %x, i32 0
919 %x1 = extractelement <16 x float> %x, i32 1
920 %x01 = fadd float %x0, %x1
921 ret float %x01
922 }
923
924 define float @extract_extract_v16f32_fadd_f32_commute(<16 x float> %x) {
925 ; SSE3-LABEL: extract_extract_v16f32_fadd_f32_commute:
926 ; SSE3: # %bb.0:
927 ; SSE3-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
928 ; SSE3-NEXT: addss %xmm1, %xmm0
929 ; SSE3-NEXT: retq
930 ;
931 ; AVX-LABEL: extract_extract_v16f32_fadd_f32_commute:
932 ; AVX: # %bb.0:
933 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
934 ; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0
935 ; AVX-NEXT: vzeroupper
936 ; AVX-NEXT: retq
937 %x0 = extractelement <16 x float> %x, i32 0
938 %x1 = extractelement <16 x float> %x, i32 1
939 %x01 = fadd float %x1, %x0
940 ret float %x01
941 }
942
943 define double @extract_extract_v8f64_fadd_f64(<8 x double> %x) {
944 ; SSE3-LABEL: extract_extract_v8f64_fadd_f64:
945 ; SSE3: # %bb.0:
946 ; SSE3-NEXT: movapd %xmm0, %xmm1
947 ; SSE3-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
948 ; SSE3-NEXT: addsd %xmm0, %xmm1
949 ; SSE3-NEXT: movapd %xmm1, %xmm0
950 ; SSE3-NEXT: retq
951 ;
952 ; AVX-LABEL: extract_extract_v8f64_fadd_f64:
953 ; AVX: # %bb.0:
954 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
955 ; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0
956 ; AVX-NEXT: vzeroupper
957 ; AVX-NEXT: retq
958 %x0 = extractelement <8 x double> %x, i32 0
959 %x1 = extractelement <8 x double> %x, i32 1
960 %x01 = fadd double %x0, %x1
961 ret double %x01
962 }
963
964 define double @extract_extract_v8f64_fadd_f64_commute(<8 x double> %x) {
965 ; SSE3-LABEL: extract_extract_v8f64_fadd_f64_commute:
966 ; SSE3: # %bb.0:
967 ; SSE3-NEXT: movapd %xmm0, %xmm1
968 ; SSE3-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
969 ; SSE3-NEXT: addsd %xmm0, %xmm1
970 ; SSE3-NEXT: movapd %xmm1, %xmm0
971 ; SSE3-NEXT: retq
972 ;
973 ; AVX-LABEL: extract_extract_v8f64_fadd_f64_commute:
974 ; AVX: # %bb.0:
975 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
976 ; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0
977 ; AVX-NEXT: vzeroupper
978 ; AVX-NEXT: retq
979 %x0 = extractelement <8 x double> %x, i32 0
980 %x1 = extractelement <8 x double> %x, i32 1
981 %x01 = fadd double %x1, %x0
982 ret double %x01
983 }
984
985 define float @extract_extract_v16f32_fsub_f32(<16 x float> %x) {
986 ; SSE3-LABEL: extract_extract_v16f32_fsub_f32:
987 ; SSE3: # %bb.0:
988 ; SSE3-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
989 ; SSE3-NEXT: subss %xmm1, %xmm0
990 ; SSE3-NEXT: retq
991 ;
992 ; AVX-LABEL: extract_extract_v16f32_fsub_f32:
993 ; AVX: # %bb.0:
994 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
995 ; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0
996 ; AVX-NEXT: vzeroupper
997 ; AVX-NEXT: retq
998 %x0 = extractelement <16 x float> %x, i32 0
999 %x1 = extractelement <16 x float> %x, i32 1
1000 %x01 = fsub float %x0, %x1
1001 ret float %x01
1002 }
1003
1004 define float @extract_extract_v16f32_fsub_f32_commute(<16 x float> %x) {
1005 ; SSE3-LABEL: extract_extract_v16f32_fsub_f32_commute:
1006 ; SSE3: # %bb.0:
1007 ; SSE3-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1008 ; SSE3-NEXT: subss %xmm0, %xmm1
1009 ; SSE3-NEXT: movaps %xmm1, %xmm0
1010 ; SSE3-NEXT: retq
1011 ;
1012 ; AVX-LABEL: extract_extract_v16f32_fsub_f32_commute:
1013 ; AVX: # %bb.0:
1014 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1015 ; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0
1016 ; AVX-NEXT: vzeroupper
1017 ; AVX-NEXT: retq
1018 %x0 = extractelement <16 x float> %x, i32 0
1019 %x1 = extractelement <16 x float> %x, i32 1
1020 %x01 = fsub float %x1, %x0
1021 ret float %x01
1022 }
1023
1024 define double @extract_extract_v8f64_fsub_f64(<8 x double> %x) {
1025 ; SSE3-LABEL: extract_extract_v8f64_fsub_f64:
8581026 ; SSE3: # %bb.0:
8591027 ; SSE3-NEXT: movapd %xmm0, %xmm1
8601028 ; SSE3-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
8611029 ; SSE3-NEXT: subsd %xmm1, %xmm0
8621030 ; SSE3-NEXT: retq
8631031 ;
864 ; AVX-LABEL: extract_extract_v4f64_fsub_f64:
1032 ; AVX-LABEL: extract_extract_v8f64_fsub_f64:
8651033 ; AVX: # %bb.0:
8661034 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
8671035 ; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0
8681036 ; AVX-NEXT: vzeroupper
8691037 ; AVX-NEXT: retq
870 %x0 = extractelement <4 x double> %x, i32 0
871 %x1 = extractelement <4 x double> %x, i32 1
1038 %x0 = extractelement <8 x double> %x, i32 0
1039 %x1 = extractelement <8 x double> %x, i32 1
8721040 %x01 = fsub double %x0, %x1
8731041 ret double %x01
8741042 }
8751043
876 define double @extract_extract_v4f64_fsub_f64_commute(<4 x double> %x) {
877 ; SSE3-LABEL: extract_extract_v4f64_fsub_f64_commute:
1044 define double @extract_extract_v8f64_fsub_f64_commute(<8 x double> %x) {
1045 ; SSE3-LABEL: extract_extract_v8f64_fsub_f64_commute:
8781046 ; SSE3: # %bb.0:
8791047 ; SSE3-NEXT: movapd %xmm0, %xmm1
8801048 ; SSE3-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
8821050 ; SSE3-NEXT: movapd %xmm1, %xmm0
8831051 ; SSE3-NEXT: retq
8841052 ;
885 ; AVX-LABEL: extract_extract_v4f64_fsub_f64_commute:
1053 ; AVX-LABEL: extract_extract_v8f64_fsub_f64_commute:
8861054 ; AVX: # %bb.0:
8871055 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
8881056 ; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0
8891057 ; AVX-NEXT: vzeroupper
8901058 ; AVX-NEXT: retq
891 %x0 = extractelement <4 x double> %x, i32 0
892 %x1 = extractelement <4 x double> %x, i32 1
1059 %x0 = extractelement <8 x double> %x, i32 0
1060 %x1 = extractelement <8 x double> %x, i32 1
8931061 %x01 = fsub double %x1, %x0
8941062 ret double %x01
8951063 }
8961064
1065 ; Check output when 1 or both extracts have extra uses.
1066
1067 define float @extract_extract_v4f32_fadd_f32_uses1(<4 x float> %x, float* %p) {
1068 ; SSE3-LABEL: extract_extract_v4f32_fadd_f32_uses1:
1069 ; SSE3: # %bb.0:
1070 ; SSE3-NEXT: movss %xmm0, (%rdi)
1071 ; SSE3-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1072 ; SSE3-NEXT: addss %xmm1, %xmm0
1073 ; SSE3-NEXT: retq
1074 ;
1075 ; AVX-LABEL: extract_extract_v4f32_fadd_f32_uses1:
1076 ; AVX: # %bb.0:
1077 ; AVX-NEXT: vmovss %xmm0, (%rdi)
1078 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1079 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
1080 ; AVX-NEXT: retq
1081 %x0 = extractelement <4 x float> %x, i32 0
1082 store float %x0, float* %p
1083 %x1 = extractelement <4 x float> %x, i32 1
1084 %x01 = fadd float %x0, %x1
1085 ret float %x01
1086 }
1087
1088 define float @extract_extract_v4f32_fadd_f32_uses2(<4 x float> %x, float* %p) {
1089 ; SSE3-LABEL: extract_extract_v4f32_fadd_f32_uses2:
1090 ; SSE3: # %bb.0:
1091 ; SSE3-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1092 ; SSE3-NEXT: movss %xmm1, (%rdi)
1093 ; SSE3-NEXT: addss %xmm1, %xmm0
1094 ; SSE3-NEXT: retq
1095 ;
1096 ; AVX-LABEL: extract_extract_v4f32_fadd_f32_uses2:
1097 ; AVX: # %bb.0:
1098 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1099 ; AVX-NEXT: vmovss %xmm1, (%rdi)
1100 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
1101 ; AVX-NEXT: retq
1102 %x0 = extractelement <4 x float> %x, i32 0
1103 %x1 = extractelement <4 x float> %x, i32 1
1104 store float %x1, float* %p
1105 %x01 = fadd float %x0, %x1
1106 ret float %x01
1107 }
1108
1109 define float @extract_extract_v4f32_fadd_f32_uses3(<4 x float> %x, float* %p1, float* %p2) {
1110 ; SSE3-LABEL: extract_extract_v4f32_fadd_f32_uses3:
1111 ; SSE3: # %bb.0:
1112 ; SSE3-NEXT: movss %xmm0, (%rdi)
1113 ; SSE3-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1114 ; SSE3-NEXT: movss %xmm1, (%rsi)
1115 ; SSE3-NEXT: addss %xmm1, %xmm0
1116 ; SSE3-NEXT: retq
1117 ;
1118 ; AVX-LABEL: extract_extract_v4f32_fadd_f32_uses3:
1119 ; AVX: # %bb.0:
1120 ; AVX-NEXT: vmovss %xmm0, (%rdi)
1121 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1122 ; AVX-NEXT: vmovss %xmm1, (%rsi)
1123 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
1124 ; AVX-NEXT: retq
1125 %x0 = extractelement <4 x float> %x, i32 0
1126 store float %x0, float* %p1
1127 %x1 = extractelement <4 x float> %x, i32 1
1128 store float %x1, float* %p2
1129 %x01 = fadd float %x0, %x1
1130 ret float %x01
1131 }
1132