llvm.org GIT mirror llvm / 2869204
Add AVX2 VPMOVMASK instructions and intrinsics. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143904 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 8 years ago
3 changed file(s) with 129 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
17431743 Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
17441744 llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
17451745 }
1746
1747 // Conditional load ops
1748 let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
1749 def int_x86_avx2_maskload_d : GCCBuiltin<"__builtin_ia32_maskloadd">,
1750 Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_v4i32_ty], [IntrReadMem]>;
1751 def int_x86_avx2_maskload_q : GCCBuiltin<"__builtin_ia32_maskloadq">,
1752 Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_v2i64_ty], [IntrReadMem]>;
1753 def int_x86_avx2_maskload_d_256 : GCCBuiltin<"__builtin_ia32_maskloadd256">,
1754 Intrinsic<[llvm_v8i32_ty], [llvm_ptr_ty, llvm_v8i32_ty], [IntrReadMem]>;
1755 def int_x86_avx2_maskload_q_256 : GCCBuiltin<"__builtin_ia32_maskloadq256">,
1756 Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty, llvm_v4i64_ty], [IntrReadMem]>;
1757 }
1758
1759 // Conditional store ops
1760 let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
1761 def int_x86_avx2_maskstore_d : GCCBuiltin<"__builtin_ia32_maskstored">,
1762 Intrinsic<[], [llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i32_ty], []>;
1763 def int_x86_avx2_maskstore_q : GCCBuiltin<"__builtin_ia32_maskstoreq">,
1764 Intrinsic<[], [llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i64_ty], []>;
1765 def int_x86_avx2_maskstore_d_256 :
1766 GCCBuiltin<"__builtin_ia32_maskstored256">,
1767 Intrinsic<[], [llvm_ptr_ty, llvm_v8i32_ty, llvm_v8i32_ty], []>;
1768 def int_x86_avx2_maskstore_q_256 :
1769 GCCBuiltin<"__builtin_ia32_maskstoreq256">,
1770 Intrinsic<[], [llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i64_ty], []>;
1771 }
1772
17461773 // Misc.
17471774 let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
17481775 def int_x86_avx2_pmovmskb : GCCBuiltin<"__builtin_ia32_pmovmskb256">,
75627562 def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs),
75637563 (ins i128mem:$dst, VR256:$src1, i8imm:$src2),
75647564 "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, VEX;
7565
7566 //===----------------------------------------------------------------------===//
7567 // VPMASKMOV - Conditional SIMD Integer Packed Loads and Stores
7568 //
7569 multiclass avx2_pmovmask
7570 Intrinsic IntLd128, Intrinsic IntLd256,
7571 Intrinsic IntSt128, Intrinsic IntSt256,
7572 PatFrag pf128, PatFrag pf256> {
7573 def rm : AVX28I<0x8c, MRMSrcMem, (outs VR128:$dst),
7574 (ins VR128:$src1, i128mem:$src2),
7575 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7576 [(set VR128:$dst, (IntLd128 addr:$src2, VR128:$src1))]>, VEX_4V;
7577 def Yrm : AVX28I<0x8c, MRMSrcMem, (outs VR256:$dst),
7578 (ins VR256:$src1, i256mem:$src2),
7579 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7580 [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>, VEX_4V;
7581 def mr : AVX28I<0x8e, MRMDestMem, (outs),
7582 (ins i128mem:$dst, VR128:$src1, VR128:$src2),
7583 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7584 [(IntSt128 addr:$dst, VR128:$src1, VR128:$src2)]>, VEX_4V;
7585 def Ymr : AVX28I<0x8e, MRMDestMem, (outs),
7586 (ins i256mem:$dst, VR256:$src1, VR256:$src2),
7587 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7588 [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V;
7589 }
7590
7591 defm VPMASKMOVD : avx2_pmovmask<"vpmaskmovd",
7592 int_x86_avx2_maskload_d,
7593 int_x86_avx2_maskload_d_256,
7594 int_x86_avx2_maskstore_d,
7595 int_x86_avx2_maskstore_d_256,
7596 memopv4i32, memopv8i32>;
7597 defm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq",
7598 int_x86_avx2_maskload_q,
7599 int_x86_avx2_maskload_q_256,
7600 int_x86_avx2_maskstore_q,
7601 int_x86_avx2_maskstore_q_256,
7602 memopv2i64, memopv4i64>, VEX_W;
901901 ret <4 x i64> %res
902902 }
903903 declare <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64>, <2 x i64>, i8) nounwind readnone
904
905
906 define <2 x i64> @test_x86_avx2_maskload_q(i8* %a0, <2 x i64> %a1) {
907 ; CHECK: vpmaskmovq
908 %res = call <2 x i64> @llvm.x86.avx2.maskload.q(i8* %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
909 ret <2 x i64> %res
910 }
911 declare <2 x i64> @llvm.x86.avx2.maskload.q(i8*, <2 x i64>) nounwind readonly
912
913
914 define <4 x i64> @test_x86_avx2_maskload_q_256(i8* %a0, <4 x i64> %a1) {
915 ; CHECK: vpmaskmovq
916 %res = call <4 x i64> @llvm.x86.avx2.maskload.q.256(i8* %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1]
917 ret <4 x i64> %res
918 }
919 declare <4 x i64> @llvm.x86.avx2.maskload.q.256(i8*, <4 x i64>) nounwind readonly
920
921
922 define <4 x i32> @test_x86_avx2_maskload_d(i8* %a0, <4 x i32> %a1) {
923 ; CHECK: vpmaskmovd
924 %res = call <4 x i32> @llvm.x86.avx2.maskload.d(i8* %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
925 ret <4 x i32> %res
926 }
927 declare <4 x i32> @llvm.x86.avx2.maskload.d(i8*, <4 x i32>) nounwind readonly
928
929
930 define <8 x i32> @test_x86_avx2_maskload_d_256(i8* %a0, <8 x i32> %a1) {
931 ; CHECK: vpmaskmovd
932 %res = call <8 x i32> @llvm.x86.avx2.maskload.d.256(i8* %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
933 ret <8 x i32> %res
934 }
935 declare <8 x i32> @llvm.x86.avx2.maskload.d.256(i8*, <8 x i32>) nounwind readonly
936
937
938 define void @test_x86_avx2_maskstore_q(i8* %a0, <2 x i64> %a1, <2 x i64> %a2) {
939 ; CHECK: vpmaskmovq
940 call void @llvm.x86.avx2.maskstore.q(i8* %a0, <2 x i64> %a1, <2 x i64> %a2)
941 ret void
942 }
943 declare void @llvm.x86.avx2.maskstore.q(i8*, <2 x i64>, <2 x i64>) nounwind
944
945
946 define void @test_x86_avx2_maskstore_q_256(i8* %a0, <4 x i64> %a1, <4 x i64> %a2) {
947 ; CHECK: vpmaskmovq
948 call void @llvm.x86.avx2.maskstore.q.256(i8* %a0, <4 x i64> %a1, <4 x i64> %a2)
949 ret void
950 }
951 declare void @llvm.x86.avx2.maskstore.q.256(i8*, <4 x i64>, <4 x i64>) nounwind
952
953
954 define void @test_x86_avx2_maskstore_d(i8* %a0, <4 x i32> %a1, <4 x i32> %a2) {
955 ; CHECK: vpmaskmovd
956 call void @llvm.x86.avx2.maskstore.d(i8* %a0, <4 x i32> %a1, <4 x i32> %a2)
957 ret void
958 }
959 declare void @llvm.x86.avx2.maskstore.d(i8*, <4 x i32>, <4 x i32>) nounwind
960
961
962 define void @test_x86_avx2_maskstore_d_256(i8* %a0, <8 x i32> %a1, <8 x i32> %a2) {
963 ; CHECK: vpmaskmovd
964 call void @llvm.x86.avx2.maskstore.d.256(i8* %a0, <8 x i32> %a1, <8 x i32> %a2)
965 ret void
966 }
967 declare void @llvm.x86.avx2.maskstore.d.256(i8*, <8 x i32>, <8 x i32>) nounwind