llvm.org GIT mirror llvm / c032bad
[X86][AVX512] Add support for lowering shuffles to VPERMILPD git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@274450 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 4 years ago
2 changed file(s) with 37 addition(s) and 52 deletion(s). Raw diff Collapse all Expand all
1172011720 // Use low duplicate instructions for masks that match their pattern.
1172111721 if (isShuffleEquivalent(V1, V2, Mask, {0, 0, 2, 2, 4, 4, 6, 6}))
1172211722 return DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v8f64, V1);
11723
11724 if (!is128BitLaneCrossingShuffleMask(MVT::v8f64, Mask)) {
11725 // Non-half-crossing single input shuffles can be lowered with an
11726 // interleaved permutation.
11727 unsigned VPERMILPMask = (Mask[0] == 1) | ((Mask[1] == 1) << 1) |
11728 ((Mask[2] == 3) << 2) | ((Mask[3] == 3) << 3) |
11729 ((Mask[4] == 5) << 4) | ((Mask[5] == 5) << 5) |
11730 ((Mask[6] == 7) << 6) | ((Mask[7] == 7) << 7);
11731 return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v8f64, V1,
11732 DAG.getConstant(VPERMILPMask, DL, MVT::i8));
11733 }
1172311734 }
1172411735
1172511736 if (SDValue Shuf128 =
553553 ;
554554 ; AVX512F-LABEL: shuffle_v8f64_00234467:
555555 ; AVX512F: # BB#0:
556 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,2,3,4,4,6,7]
557 ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
556 ; AVX512F-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[0,0,2,3,4,4,6,7]
558557 ; AVX512F-NEXT: retq
559558 ;
560559 ; AVX512F-32-LABEL: shuffle_v8f64_00234467:
561560 ; AVX512F-32: # BB#0:
562 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,3,0,4,0,4,0,6,0,7,0]
563 ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
561 ; AVX512F-32-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[0,0,2,3,4,4,6,7]
564562 ; AVX512F-32-NEXT: retl
565563 %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
566564 ret <8 x double> %shuffle
585583 ;
586584 ; AVX512F-LABEL: shuffle_v8f64_10325476:
587585 ; AVX512F: # BB#0:
588 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,3,2,5,4,7,6]
589 ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
586 ; AVX512F-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[1,0,3,2,5,4,7,6]
590587 ; AVX512F-NEXT: retq
591588 ;
592589 ; AVX512F-32-LABEL: shuffle_v8f64_10325476:
593590 ; AVX512F-32: # BB#0:
594 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,3,0,2,0,5,0,4,0,7,0,6,0]
595 ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
591 ; AVX512F-32-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[1,0,3,2,5,4,7,6]
596592 ; AVX512F-32-NEXT: retl
597593 %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
598594 ret <8 x double> %shuffle
602598 ;
603599 ; AVX512F-LABEL: shuffle_v8f64_11335577:
604600 ; AVX512F: # BB#0:
605 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,1,3,3,5,5,7,7]
606 ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
601 ; AVX512F-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[1,1,3,3,5,5,7,7]
607602 ; AVX512F-NEXT: retq
608603 ;
609604 ; AVX512F-32-LABEL: shuffle_v8f64_11335577:
610605 ; AVX512F-32: # BB#0:
611 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,1,0,3,0,3,0,5,0,5,0,7,0,7,0]
612 ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
606 ; AVX512F-32-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[1,1,3,3,5,5,7,7]
613607 ; AVX512F-32-NEXT: retl
614608 %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
615609 ret <8 x double> %shuffle
619613 ;
620614 ; AVX512F-LABEL: shuffle_v8f64_10235467:
621615 ; AVX512F: # BB#0:
622 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,2,3,5,4,6,7]
623 ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
616 ; AVX512F-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[1,0,2,3,5,4,6,7]
624617 ; AVX512F-NEXT: retq
625618 ;
626619 ; AVX512F-32-LABEL: shuffle_v8f64_10235467:
627620 ; AVX512F-32: # BB#0:
628 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,2,0,3,0,5,0,4,0,6,0,7,0]
629 ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
621 ; AVX512F-32-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[1,0,2,3,5,4,6,7]
630622 ; AVX512F-32-NEXT: retl
631623 %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
632624 ret <8 x double> %shuffle
636628 ;
637629 ; AVX512F-LABEL: shuffle_v8f64_10225466:
638630 ; AVX512F: # BB#0:
639 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,2,2,5,4,6,6]
640 ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
631 ; AVX512F-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[1,0,2,2,5,4,6,6]
641632 ; AVX512F-NEXT: retq
642633 ;
643634 ; AVX512F-32-LABEL: shuffle_v8f64_10225466:
644635 ; AVX512F-32: # BB#0:
645 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,2,0,2,0,5,0,4,0,6,0,6,0]
646 ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
636 ; AVX512F-32-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[1,0,2,2,5,4,6,6]
647637 ; AVX512F-32-NEXT: retl
648638 %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
649639 ret <8 x double> %shuffle
806796 ;
807797 ; AVX512F-LABEL: shuffle_v8f64_10324567:
808798 ; AVX512F: # BB#0:
809 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,3,2,4,5,6,7]
810 ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
799 ; AVX512F-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[1,0,3,2,4,5,6,7]
811800 ; AVX512F-NEXT: retq
812801 ;
813802 ; AVX512F-32-LABEL: shuffle_v8f64_10324567:
814803 ; AVX512F-32: # BB#0:
815 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,3,0,2,0,4,0,5,0,6,0,7,0]
816 ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
804 ; AVX512F-32-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[1,0,3,2,4,5,6,7]
817805 ; AVX512F-32-NEXT: retl
818806 %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
819807 ret <8 x double> %shuffle
823811 ;
824812 ; AVX512F-LABEL: shuffle_v8f64_11334567:
825813 ; AVX512F: # BB#0:
826 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,1,3,3,4,5,6,7]
827 ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
814 ; AVX512F-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[1,1,3,3,4,5,6,7]
828815 ; AVX512F-NEXT: retq
829816 ;
830817 ; AVX512F-32-LABEL: shuffle_v8f64_11334567:
831818 ; AVX512F-32: # BB#0:
832 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,1,0,3,0,3,0,4,0,5,0,6,0,7,0]
833 ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
819 ; AVX512F-32-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[1,1,3,3,4,5,6,7]
834820 ; AVX512F-32-NEXT: retl
835821 %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
836822 ret <8 x double> %shuffle
840826 ;
841827 ; AVX512F-LABEL: shuffle_v8f64_01235467:
842828 ; AVX512F: # BB#0:
843 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,1,2,3,5,4,6,7]
844 ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
829 ; AVX512F-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[0,1,2,3,5,4,6,7]
845830 ; AVX512F-NEXT: retq
846831 ;
847832 ; AVX512F-32-LABEL: shuffle_v8f64_01235467:
848833 ; AVX512F-32: # BB#0:
849 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,1,0,2,0,3,0,5,0,4,0,6,0,7,0]
850 ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
834 ; AVX512F-32-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[0,1,2,3,5,4,6,7]
851835 ; AVX512F-32-NEXT: retl
852836 %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
853837 ret <8 x double> %shuffle
857841 ;
858842 ; AVX512F-LABEL: shuffle_v8f64_01235466:
859843 ; AVX512F: # BB#0:
860 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,1,2,3,5,4,6,6]
861 ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
844 ; AVX512F-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[0,1,2,3,5,4,6,6]
862845 ; AVX512F-NEXT: retq
863846 ;
864847 ; AVX512F-32-LABEL: shuffle_v8f64_01235466:
865848 ; AVX512F-32: # BB#0:
866 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,1,0,2,0,3,0,5,0,4,0,6,0,6,0]
867 ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
849 ; AVX512F-32-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[0,1,2,3,5,4,6,6]
868850 ; AVX512F-32-NEXT: retl
869851 %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
870852 ret <8 x double> %shuffle
908890 ;
909891 ; AVX512F-LABEL: shuffle_v8f64_103245uu:
910892 ; AVX512F: # BB#0:
911 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = <1,0,3,2,4,5,u,u>
912 ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
893 ; AVX512F-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[1,0,3,2,4,5,6,6]
913894 ; AVX512F-NEXT: retq
914895 ;
915896 ; AVX512F-32-LABEL: shuffle_v8f64_103245uu:
916897 ; AVX512F-32: # BB#0:
917 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = <1,0,0,0,3,0,2,0,4,0,5,0,u,u,u,u>
918 ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
898 ; AVX512F-32-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[1,0,3,2,4,5,6,6]
919899 ; AVX512F-32-NEXT: retl
920900 %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
921901 ret <8 x double> %shuffle
925905 ;
926906 ; AVX512F-LABEL: shuffle_v8f64_1133uu67:
927907 ; AVX512F: # BB#0:
928 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = <1,1,3,3,u,u,6,7>
929 ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
908 ; AVX512F-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[1,1,3,3,4,4,6,7]
930909 ; AVX512F-NEXT: retq
931910 ;
932911 ; AVX512F-32-LABEL: shuffle_v8f64_1133uu67:
933912 ; AVX512F-32: # BB#0:
934 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = <1,0,1,0,3,0,3,0,u,u,u,u,6,0,7,0>
935 ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
913 ; AVX512F-32-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[1,1,3,3,4,4,6,7]
936914 ; AVX512F-32-NEXT: retl
937915 %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
938916 ret <8 x double> %shuffle
942920 ;
943921 ; AVX512F-LABEL: shuffle_v8f64_0uu354uu:
944922 ; AVX512F: # BB#0:
945 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,u,u,3,5,4,u,u>
946 ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
923 ; AVX512F-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[0,0,2,3,5,4,6,6]
947924 ; AVX512F-NEXT: retq
948925 ;
949926 ; AVX512F-32-LABEL: shuffle_v8f64_0uu354uu:
950927 ; AVX512F-32: # BB#0:
951 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,0,u,u,u,u,3,0,5,0,4,0,u,u,u,u>
952 ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
928 ; AVX512F-32-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[0,0,2,3,5,4,6,6]
953929 ; AVX512F-32-NEXT: retl
954930 %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
955931 ret <8 x double> %shuffle
959935 ;
960936 ; AVX512F-LABEL: shuffle_v8f64_uuu3uu66:
961937 ; AVX512F: # BB#0:
962 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 =
963 ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
938 ; AVX512F-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[0,0,2,3,4,4,6,6]
964939 ; AVX512F-NEXT: retq
965940 ;
966941 ; AVX512F-32-LABEL: shuffle_v8f64_uuu3uu66:
967942 ; AVX512F-32: # BB#0:
968 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 =
969 ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
943 ; AVX512F-32-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[0,0,2,3,4,4,6,6]
970944 ; AVX512F-32-NEXT: retl
971945 %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
972946 ret <8 x double> %shuffle