llvm.org GIT mirror llvm / b349d9e
[X86] Add test cases to show that we fail to fold loads into i8 smulo and i8/i16/i32/i64 umulo lowering without the assistance of the peephole pass. NFC git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@350271 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 8 months ago
1 changed file(s) with 440 addition(s) and 3 deletion(s). Raw diff Collapse all Expand all
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc -mtriple=x86_64-darwin-unknown < %s | FileCheck %s --check-prefix=SDAG
2 ; RUN: llc -mtriple=x86_64-darwin-unknown -fast-isel -fast-isel-abort=1 < %s | FileCheck %s --check-prefix=FAST
3 ; RUN: llc -mtriple=x86_64-darwin-unknown -mcpu=knl < %s | FileCheck %s --check-prefix=SDAG --check-prefix=KNL
1 ; RUN: llc -disable-peephole -mtriple=x86_64-darwin-unknown < %s | FileCheck %s --check-prefix=SDAG
2 ; RUN: llc -disable-peephole -mtriple=x86_64-darwin-unknown -fast-isel -fast-isel-abort=1 < %s | FileCheck %s --check-prefix=FAST
3 ; RUN: llc -disable-peephole -mtriple=x86_64-darwin-unknown -mcpu=knl < %s | FileCheck %s --check-prefix=SDAG --check-prefix=KNL
44
55 define {i64, i1} @t1() nounwind {
66 ; SDAG-LABEL: t1:
719719 %mul.overflow = extractvalue { i64, i1 } %mul, 1
720720 %x1 = or i1 %c2, %mul.overflow
721721 ret i1 %x1
722 }
723
724 define zeroext i1 @smuloi8_load(i8* %ptr1, i8 %v2, i8* %res) {
725 ; SDAG-LABEL: smuloi8_load:
726 ; SDAG: ## %bb.0:
727 ; SDAG-NEXT: movb (%rdi), %al
728 ; SDAG-NEXT: imulb %sil
729 ; SDAG-NEXT: seto %cl
730 ; SDAG-NEXT: movb %al, (%rdx)
731 ; SDAG-NEXT: movl %ecx, %eax
732 ; SDAG-NEXT: retq
733 ;
734 ; FAST-LABEL: smuloi8_load:
735 ; FAST: ## %bb.0:
736 ; FAST-NEXT: movb (%rdi), %al
737 ; FAST-NEXT: imulb %sil
738 ; FAST-NEXT: seto %cl
739 ; FAST-NEXT: movb %al, (%rdx)
740 ; FAST-NEXT: andb $1, %cl
741 ; FAST-NEXT: movzbl %cl, %eax
742 ; FAST-NEXT: retq
743 %v1 = load i8, i8* %ptr1
744 %t = call {i8, i1} @llvm.smul.with.overflow.i8(i8 %v1, i8 %v2)
745 %val = extractvalue {i8, i1} %t, 0
746 %obit = extractvalue {i8, i1} %t, 1
747 store i8 %val, i8* %res
748 ret i1 %obit
749 }
750
751 define zeroext i1 @smuloi8_load2(i8 %v1, i8* %ptr2, i8* %res) {
752 ; SDAG-LABEL: smuloi8_load2:
753 ; SDAG: ## %bb.0:
754 ; SDAG-NEXT: movl %edi, %eax
755 ; SDAG-NEXT: movb (%rsi), %cl
756 ; SDAG-NEXT: ## kill: def $al killed $al killed $eax
757 ; SDAG-NEXT: imulb %cl
758 ; SDAG-NEXT: seto %cl
759 ; SDAG-NEXT: movb %al, (%rdx)
760 ; SDAG-NEXT: movl %ecx, %eax
761 ; SDAG-NEXT: retq
762 ;
763 ; FAST-LABEL: smuloi8_load2:
764 ; FAST: ## %bb.0:
765 ; FAST-NEXT: movl %edi, %eax
766 ; FAST-NEXT: ## kill: def $al killed $al killed $eax
767 ; FAST-NEXT: imulb (%rsi)
768 ; FAST-NEXT: seto %cl
769 ; FAST-NEXT: movb %al, (%rdx)
770 ; FAST-NEXT: andb $1, %cl
771 ; FAST-NEXT: movzbl %cl, %eax
772 ; FAST-NEXT: retq
773 %v2 = load i8, i8* %ptr2
774 %t = call {i8, i1} @llvm.smul.with.overflow.i8(i8 %v1, i8 %v2)
775 %val = extractvalue {i8, i1} %t, 0
776 %obit = extractvalue {i8, i1} %t, 1
777 store i8 %val, i8* %res
778 ret i1 %obit
779 }
780
781 define zeroext i1 @smuloi16_load(i16* %ptr1, i16 %v2, i16* %res) {
782 ; SDAG-LABEL: smuloi16_load:
783 ; SDAG: ## %bb.0:
784 ; SDAG-NEXT: imulw (%rdi), %si
785 ; SDAG-NEXT: seto %al
786 ; SDAG-NEXT: movw %si, (%rdx)
787 ; SDAG-NEXT: retq
788 ;
789 ; FAST-LABEL: smuloi16_load:
790 ; FAST: ## %bb.0:
791 ; FAST-NEXT: imulw (%rdi), %si
792 ; FAST-NEXT: seto %al
793 ; FAST-NEXT: movw %si, (%rdx)
794 ; FAST-NEXT: andb $1, %al
795 ; FAST-NEXT: movzbl %al, %eax
796 ; FAST-NEXT: retq
797 %v1 = load i16, i16* %ptr1
798 %t = call {i16, i1} @llvm.smul.with.overflow.i16(i16 %v1, i16 %v2)
799 %val = extractvalue {i16, i1} %t, 0
800 %obit = extractvalue {i16, i1} %t, 1
801 store i16 %val, i16* %res
802 ret i1 %obit
803 }
804
805 define zeroext i1 @smuloi16_load2(i16 %v1, i16* %ptr2, i16* %res) {
806 ; SDAG-LABEL: smuloi16_load2:
807 ; SDAG: ## %bb.0:
808 ; SDAG-NEXT: imulw (%rsi), %di
809 ; SDAG-NEXT: seto %al
810 ; SDAG-NEXT: movw %di, (%rdx)
811 ; SDAG-NEXT: retq
812 ;
813 ; FAST-LABEL: smuloi16_load2:
814 ; FAST: ## %bb.0:
815 ; FAST-NEXT: imulw (%rsi), %di
816 ; FAST-NEXT: seto %al
817 ; FAST-NEXT: movw %di, (%rdx)
818 ; FAST-NEXT: andb $1, %al
819 ; FAST-NEXT: movzbl %al, %eax
820 ; FAST-NEXT: retq
821 %v2 = load i16, i16* %ptr2
822 %t = call {i16, i1} @llvm.smul.with.overflow.i16(i16 %v1, i16 %v2)
823 %val = extractvalue {i16, i1} %t, 0
824 %obit = extractvalue {i16, i1} %t, 1
825 store i16 %val, i16* %res
826 ret i1 %obit
827 }
828
829 define zeroext i1 @smuloi32_load(i32* %ptr1, i32 %v2, i32* %res) {
830 ; SDAG-LABEL: smuloi32_load:
831 ; SDAG: ## %bb.0:
832 ; SDAG-NEXT: imull (%rdi), %esi
833 ; SDAG-NEXT: seto %al
834 ; SDAG-NEXT: movl %esi, (%rdx)
835 ; SDAG-NEXT: retq
836 ;
837 ; FAST-LABEL: smuloi32_load:
838 ; FAST: ## %bb.0:
839 ; FAST-NEXT: imull (%rdi), %esi
840 ; FAST-NEXT: seto %al
841 ; FAST-NEXT: movl %esi, (%rdx)
842 ; FAST-NEXT: andb $1, %al
843 ; FAST-NEXT: movzbl %al, %eax
844 ; FAST-NEXT: retq
845 %v1 = load i32, i32* %ptr1
846 %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
847 %val = extractvalue {i32, i1} %t, 0
848 %obit = extractvalue {i32, i1} %t, 1
849 store i32 %val, i32* %res
850 ret i1 %obit
851 }
852
853 define zeroext i1 @smuloi32_load2(i32 %v1, i32* %ptr2, i32* %res) {
854 ; SDAG-LABEL: smuloi32_load2:
855 ; SDAG: ## %bb.0:
856 ; SDAG-NEXT: imull (%rsi), %edi
857 ; SDAG-NEXT: seto %al
858 ; SDAG-NEXT: movl %edi, (%rdx)
859 ; SDAG-NEXT: retq
860 ;
861 ; FAST-LABEL: smuloi32_load2:
862 ; FAST: ## %bb.0:
863 ; FAST-NEXT: imull (%rsi), %edi
864 ; FAST-NEXT: seto %al
865 ; FAST-NEXT: movl %edi, (%rdx)
866 ; FAST-NEXT: andb $1, %al
867 ; FAST-NEXT: movzbl %al, %eax
868 ; FAST-NEXT: retq
869 %v2 = load i32, i32* %ptr2
870 %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
871 %val = extractvalue {i32, i1} %t, 0
872 %obit = extractvalue {i32, i1} %t, 1
873 store i32 %val, i32* %res
874 ret i1 %obit
875 }
876
877 define zeroext i1 @smuloi64_load(i64* %ptr1, i64 %v2, i64* %res) {
878 ; SDAG-LABEL: smuloi64_load:
879 ; SDAG: ## %bb.0:
880 ; SDAG-NEXT: imulq (%rdi), %rsi
881 ; SDAG-NEXT: seto %al
882 ; SDAG-NEXT: movq %rsi, (%rdx)
883 ; SDAG-NEXT: retq
884 ;
885 ; FAST-LABEL: smuloi64_load:
886 ; FAST: ## %bb.0:
887 ; FAST-NEXT: imulq (%rdi), %rsi
888 ; FAST-NEXT: seto %al
889 ; FAST-NEXT: movq %rsi, (%rdx)
890 ; FAST-NEXT: andb $1, %al
891 ; FAST-NEXT: movzbl %al, %eax
892 ; FAST-NEXT: retq
893 %v1 = load i64, i64* %ptr1
894 %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
895 %val = extractvalue {i64, i1} %t, 0
896 %obit = extractvalue {i64, i1} %t, 1
897 store i64 %val, i64* %res
898 ret i1 %obit
899 }
900
901 define zeroext i1 @smuloi64_load2(i64 %v1, i64* %ptr2, i64* %res) {
902 ; SDAG-LABEL: smuloi64_load2:
903 ; SDAG: ## %bb.0:
904 ; SDAG-NEXT: imulq (%rsi), %rdi
905 ; SDAG-NEXT: seto %al
906 ; SDAG-NEXT: movq %rdi, (%rdx)
907 ; SDAG-NEXT: retq
908 ;
909 ; FAST-LABEL: smuloi64_load2:
910 ; FAST: ## %bb.0:
911 ; FAST-NEXT: imulq (%rsi), %rdi
912 ; FAST-NEXT: seto %al
913 ; FAST-NEXT: movq %rdi, (%rdx)
914 ; FAST-NEXT: andb $1, %al
915 ; FAST-NEXT: movzbl %al, %eax
916 ; FAST-NEXT: retq
917 %v2 = load i64, i64* %ptr2
918 %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
919 %val = extractvalue {i64, i1} %t, 0
920 %obit = extractvalue {i64, i1} %t, 1
921 store i64 %val, i64* %res
922 ret i1 %obit
923 }
924
925 define zeroext i1 @umuloi8_load(i8* %ptr1, i8 %v2, i8* %res) {
926 ; SDAG-LABEL: umuloi8_load:
927 ; SDAG: ## %bb.0:
928 ; SDAG-NEXT: movb (%rdi), %al
929 ; SDAG-NEXT: mulb %sil
930 ; SDAG-NEXT: seto %cl
931 ; SDAG-NEXT: movb %al, (%rdx)
932 ; SDAG-NEXT: movl %ecx, %eax
933 ; SDAG-NEXT: retq
934 ;
935 ; FAST-LABEL: umuloi8_load:
936 ; FAST: ## %bb.0:
937 ; FAST-NEXT: movb (%rdi), %al
938 ; FAST-NEXT: mulb %sil
939 ; FAST-NEXT: seto %cl
940 ; FAST-NEXT: movb %al, (%rdx)
941 ; FAST-NEXT: andb $1, %cl
942 ; FAST-NEXT: movzbl %cl, %eax
943 ; FAST-NEXT: retq
944 %v1 = load i8, i8* %ptr1
945 %t = call {i8, i1} @llvm.umul.with.overflow.i8(i8 %v1, i8 %v2)
946 %val = extractvalue {i8, i1} %t, 0
947 %obit = extractvalue {i8, i1} %t, 1
948 store i8 %val, i8* %res
949 ret i1 %obit
950 }
951
952 define zeroext i1 @umuloi8_load2(i8 %v1, i8* %ptr2, i8* %res) {
953 ; SDAG-LABEL: umuloi8_load2:
954 ; SDAG: ## %bb.0:
955 ; SDAG-NEXT: movl %edi, %eax
956 ; SDAG-NEXT: movb (%rsi), %cl
957 ; SDAG-NEXT: ## kill: def $al killed $al killed $eax
958 ; SDAG-NEXT: mulb %cl
959 ; SDAG-NEXT: seto %cl
960 ; SDAG-NEXT: movb %al, (%rdx)
961 ; SDAG-NEXT: movl %ecx, %eax
962 ; SDAG-NEXT: retq
963 ;
964 ; FAST-LABEL: umuloi8_load2:
965 ; FAST: ## %bb.0:
966 ; FAST-NEXT: movl %edi, %eax
967 ; FAST-NEXT: ## kill: def $al killed $al killed $eax
968 ; FAST-NEXT: mulb (%rsi)
969 ; FAST-NEXT: seto %cl
970 ; FAST-NEXT: movb %al, (%rdx)
971 ; FAST-NEXT: andb $1, %cl
972 ; FAST-NEXT: movzbl %cl, %eax
973 ; FAST-NEXT: retq
974 %v2 = load i8, i8* %ptr2
975 %t = call {i8, i1} @llvm.umul.with.overflow.i8(i8 %v1, i8 %v2)
976 %val = extractvalue {i8, i1} %t, 0
977 %obit = extractvalue {i8, i1} %t, 1
978 store i8 %val, i8* %res
979 ret i1 %obit
980 }
981
982 define zeroext i1 @umuloi16_load(i16* %ptr1, i16 %v2, i16* %res) {
983 ; SDAG-LABEL: umuloi16_load:
984 ; SDAG: ## %bb.0:
985 ; SDAG-NEXT: movq %rdx, %rcx
986 ; SDAG-NEXT: movzwl (%rdi), %eax
987 ; SDAG-NEXT: mulw %si
988 ; SDAG-NEXT: seto %dl
989 ; SDAG-NEXT: movw %ax, (%rcx)
990 ; SDAG-NEXT: movl %edx, %eax
991 ; SDAG-NEXT: retq
992 ;
993 ; FAST-LABEL: umuloi16_load:
994 ; FAST: ## %bb.0:
995 ; FAST-NEXT: movq %rdx, %rcx
996 ; FAST-NEXT: movzwl (%rdi), %eax
997 ; FAST-NEXT: mulw %si
998 ; FAST-NEXT: seto %dl
999 ; FAST-NEXT: movw %ax, (%rcx)
1000 ; FAST-NEXT: andb $1, %dl
1001 ; FAST-NEXT: movzbl %dl, %eax
1002 ; FAST-NEXT: retq
1003 %v1 = load i16, i16* %ptr1
1004 %t = call {i16, i1} @llvm.umul.with.overflow.i16(i16 %v1, i16 %v2)
1005 %val = extractvalue {i16, i1} %t, 0
1006 %obit = extractvalue {i16, i1} %t, 1
1007 store i16 %val, i16* %res
1008 ret i1 %obit
1009 }
1010
1011 define zeroext i1 @umuloi16_load2(i16 %v1, i16* %ptr2, i16* %res) {
1012 ; SDAG-LABEL: umuloi16_load2:
1013 ; SDAG: ## %bb.0:
1014 ; SDAG-NEXT: movq %rdx, %rcx
1015 ; SDAG-NEXT: movl %edi, %eax
1016 ; SDAG-NEXT: movzwl (%rsi), %edx
1017 ; SDAG-NEXT: ## kill: def $ax killed $ax killed $eax
1018 ; SDAG-NEXT: mulw %dx
1019 ; SDAG-NEXT: seto %dl
1020 ; SDAG-NEXT: movw %ax, (%rcx)
1021 ; SDAG-NEXT: movl %edx, %eax
1022 ; SDAG-NEXT: retq
1023 ;
1024 ; FAST-LABEL: umuloi16_load2:
1025 ; FAST: ## %bb.0:
1026 ; FAST-NEXT: movq %rdx, %rcx
1027 ; FAST-NEXT: movl %edi, %eax
1028 ; FAST-NEXT: ## kill: def $ax killed $ax killed $eax
1029 ; FAST-NEXT: mulw (%rsi)
1030 ; FAST-NEXT: seto %dl
1031 ; FAST-NEXT: movw %ax, (%rcx)
1032 ; FAST-NEXT: andb $1, %dl
1033 ; FAST-NEXT: movzbl %dl, %eax
1034 ; FAST-NEXT: retq
1035 %v2 = load i16, i16* %ptr2
1036 %t = call {i16, i1} @llvm.umul.with.overflow.i16(i16 %v1, i16 %v2)
1037 %val = extractvalue {i16, i1} %t, 0
1038 %obit = extractvalue {i16, i1} %t, 1
1039 store i16 %val, i16* %res
1040 ret i1 %obit
1041 }
1042
1043 define zeroext i1 @umuloi32_load(i32* %ptr1, i32 %v2, i32* %res) {
1044 ; SDAG-LABEL: umuloi32_load:
1045 ; SDAG: ## %bb.0:
1046 ; SDAG-NEXT: movq %rdx, %rcx
1047 ; SDAG-NEXT: movl (%rdi), %eax
1048 ; SDAG-NEXT: mull %esi
1049 ; SDAG-NEXT: seto %dl
1050 ; SDAG-NEXT: movl %eax, (%rcx)
1051 ; SDAG-NEXT: movl %edx, %eax
1052 ; SDAG-NEXT: retq
1053 ;
1054 ; FAST-LABEL: umuloi32_load:
1055 ; FAST: ## %bb.0:
1056 ; FAST-NEXT: movq %rdx, %rcx
1057 ; FAST-NEXT: movl (%rdi), %eax
1058 ; FAST-NEXT: mull %esi
1059 ; FAST-NEXT: seto %dl
1060 ; FAST-NEXT: movl %eax, (%rcx)
1061 ; FAST-NEXT: andb $1, %dl
1062 ; FAST-NEXT: movzbl %dl, %eax
1063 ; FAST-NEXT: retq
1064 %v1 = load i32, i32* %ptr1
1065 %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2)
1066 %val = extractvalue {i32, i1} %t, 0
1067 %obit = extractvalue {i32, i1} %t, 1
1068 store i32 %val, i32* %res
1069 ret i1 %obit
1070 }
1071
1072 define zeroext i1 @umuloi32_load2(i32 %v1, i32* %ptr2, i32* %res) {
1073 ; SDAG-LABEL: umuloi32_load2:
1074 ; SDAG: ## %bb.0:
1075 ; SDAG-NEXT: movq %rdx, %rcx
1076 ; SDAG-NEXT: movl %edi, %eax
1077 ; SDAG-NEXT: movl (%rsi), %edx
1078 ; SDAG-NEXT: mull %edx
1079 ; SDAG-NEXT: seto %dl
1080 ; SDAG-NEXT: movl %eax, (%rcx)
1081 ; SDAG-NEXT: movl %edx, %eax
1082 ; SDAG-NEXT: retq
1083 ;
1084 ; FAST-LABEL: umuloi32_load2:
1085 ; FAST: ## %bb.0:
1086 ; FAST-NEXT: movq %rdx, %rcx
1087 ; FAST-NEXT: movl %edi, %eax
1088 ; FAST-NEXT: mull (%rsi)
1089 ; FAST-NEXT: seto %dl
1090 ; FAST-NEXT: movl %eax, (%rcx)
1091 ; FAST-NEXT: andb $1, %dl
1092 ; FAST-NEXT: movzbl %dl, %eax
1093 ; FAST-NEXT: retq
1094 %v2 = load i32, i32* %ptr2
1095 %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2)
1096 %val = extractvalue {i32, i1} %t, 0
1097 %obit = extractvalue {i32, i1} %t, 1
1098 store i32 %val, i32* %res
1099 ret i1 %obit
1100 }
1101
1102 define zeroext i1 @umuloi64_load(i64* %ptr1, i64 %v2, i64* %res) {
1103 ; SDAG-LABEL: umuloi64_load:
1104 ; SDAG: ## %bb.0:
1105 ; SDAG-NEXT: movq %rdx, %rcx
1106 ; SDAG-NEXT: movq (%rdi), %rax
1107 ; SDAG-NEXT: mulq %rsi
1108 ; SDAG-NEXT: seto %dl
1109 ; SDAG-NEXT: movq %rax, (%rcx)
1110 ; SDAG-NEXT: movl %edx, %eax
1111 ; SDAG-NEXT: retq
1112 ;
1113 ; FAST-LABEL: umuloi64_load:
1114 ; FAST: ## %bb.0:
1115 ; FAST-NEXT: movq %rdx, %rcx
1116 ; FAST-NEXT: movq (%rdi), %rax
1117 ; FAST-NEXT: mulq %rsi
1118 ; FAST-NEXT: seto %dl
1119 ; FAST-NEXT: movq %rax, (%rcx)
1120 ; FAST-NEXT: andb $1, %dl
1121 ; FAST-NEXT: movzbl %dl, %eax
1122 ; FAST-NEXT: retq
1123 %v1 = load i64, i64* %ptr1
1124 %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2)
1125 %val = extractvalue {i64, i1} %t, 0
1126 %obit = extractvalue {i64, i1} %t, 1
1127 store i64 %val, i64* %res
1128 ret i1 %obit
1129 }
1130
1131 define zeroext i1 @umuloi64_load2(i64 %v1, i64* %ptr2, i64* %res) {
1132 ; SDAG-LABEL: umuloi64_load2:
1133 ; SDAG: ## %bb.0:
1134 ; SDAG-NEXT: movq %rdx, %rcx
1135 ; SDAG-NEXT: movq %rdi, %rax
1136 ; SDAG-NEXT: movq (%rsi), %rdx
1137 ; SDAG-NEXT: mulq %rdx
1138 ; SDAG-NEXT: seto %dl
1139 ; SDAG-NEXT: movq %rax, (%rcx)
1140 ; SDAG-NEXT: movl %edx, %eax
1141 ; SDAG-NEXT: retq
1142 ;
1143 ; FAST-LABEL: umuloi64_load2:
1144 ; FAST: ## %bb.0:
1145 ; FAST-NEXT: movq %rdx, %rcx
1146 ; FAST-NEXT: movq %rdi, %rax
1147 ; FAST-NEXT: mulq (%rsi)
1148 ; FAST-NEXT: seto %dl
1149 ; FAST-NEXT: movq %rax, (%rcx)
1150 ; FAST-NEXT: andb $1, %dl
1151 ; FAST-NEXT: movzbl %dl, %eax
1152 ; FAST-NEXT: retq
1153 %v2 = load i64, i64* %ptr2
1154 %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2)
1155 %val = extractvalue {i64, i1} %t, 0
1156 %obit = extractvalue {i64, i1} %t, 1
1157 store i64 %val, i64* %res
1158 ret i1 %obit
7221159 }
7231160
7241161 declare {i8, i1} @llvm.smul.with.overflow.i8 (i8, i8 ) nounwind readnone