llvm.org GIT mirror llvm / 4c763ee
Add AVX2 variable shift instructions and intrinsics. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143915 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 8 years ago
3 changed file(s) with 156 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
17701770 Intrinsic<[], [llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i64_ty], []>;
17711771 }
17721772
1773 // Variable bit shift ops
1774 let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
1775 def int_x86_avx2_psllv_d : GCCBuiltin<"__builtin_ia32_psllv4si">,
1776 Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
1777 [IntrNoMem]>;
1778 def int_x86_avx2_psllv_d_256 : GCCBuiltin<"__builtin_ia32_psllv8si">,
1779 Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
1780 [IntrNoMem]>;
1781 def int_x86_avx2_psllv_q : GCCBuiltin<"__builtin_ia32_psllv2di">,
1782 Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
1783 [IntrNoMem]>;
1784 def int_x86_avx2_psllv_q_256 : GCCBuiltin<"__builtin_ia32_psllv4di">,
1785 Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty],
1786 [IntrNoMem]>;
1787
1788 def int_x86_avx2_psrlv_d : GCCBuiltin<"__builtin_ia32_psrlv4si">,
1789 Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
1790 [IntrNoMem]>;
1791 def int_x86_avx2_psrlv_d_256 : GCCBuiltin<"__builtin_ia32_psrlv8si">,
1792 Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
1793 [IntrNoMem]>;
1794 def int_x86_avx2_psrlv_q : GCCBuiltin<"__builtin_ia32_psrlv2di">,
1795 Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
1796 [IntrNoMem]>;
1797 def int_x86_avx2_psrlv_q_256 : GCCBuiltin<"__builtin_ia32_psrlv4di">,
1798 Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty],
1799 [IntrNoMem]>;
1800
1801 def int_x86_avx2_psrav_d : GCCBuiltin<"__builtin_ia32_psrav4si">,
1802 Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
1803 [IntrNoMem]>;
1804 def int_x86_avx2_psrav_d_256 : GCCBuiltin<"__builtin_ia32_psrav8si">,
1805 Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
1806 [IntrNoMem]>;
1807 }
1808
17731809 // Misc.
17741810 let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
17751811 def int_x86_avx2_pmovmskb : GCCBuiltin<"__builtin_ia32_pmovmskb256">,
76007600 int_x86_avx2_maskstore_q,
76017601 int_x86_avx2_maskstore_q_256,
76027602 memopv2i64, memopv4i64>, VEX_W;
7603
7604
7605 //===----------------------------------------------------------------------===//
7606 // Variable Bit Shifts
7607 //
7608 multiclass avx2_var_shift opc, string OpcodeStr,
7609 PatFrag pf128, PatFrag pf256,
7610 Intrinsic Int128, Intrinsic Int256> {
7611 def rr : AVX28I
7612 (ins VR128:$src1, VR128:$src2),
7613 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7614 [(set VR128:$dst, (Int128 VR128:$src1, VR128:$src2))]>, VEX_4V;
7615 def rm : AVX28I
7616 (ins VR128:$src1, i128mem:$src2),
7617 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7618 [(set VR128:$dst, (Int128 VR128:$src1, (pf128 addr:$src2)))]>,
7619 VEX_4V;
7620 def Yrr : AVX28I
7621 (ins VR256:$src1, VR256:$src2),
7622 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7623 [(set VR256:$dst, (Int256 VR256:$src1, VR256:$src2))]>, VEX_4V;
7624 def Yrm : AVX28I
7625 (ins VR256:$src1, i256mem:$src2),
7626 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7627 [(set VR256:$dst, (Int256 VR256:$src1, (pf256 addr:$src2)))]>,
7628 VEX_4V;
7629 }
7630
7631 defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", memopv4i32, memopv8i32,
7632 int_x86_avx2_psllv_d, int_x86_avx2_psllv_d_256>;
7633 defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", memopv2i64, memopv4i64,
7634 int_x86_avx2_psllv_q, int_x86_avx2_psllv_q_256>,
7635 VEX_W;
7636 defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", memopv4i32, memopv8i32,
7637 int_x86_avx2_psrlv_d, int_x86_avx2_psrlv_d_256>;
7638 defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", memopv2i64, memopv4i64,
7639 int_x86_avx2_psrlv_q, int_x86_avx2_psrlv_q_256>,
7640 VEX_W;
7641 defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", memopv4i32, memopv8i32,
7642 int_x86_avx2_psrav_d, int_x86_avx2_psrav_d_256>;
965965 ret void
966966 }
967967 declare void @llvm.x86.avx2.maskstore.d.256(i8*, <8 x i32>, <8 x i32>) nounwind
968
969
970 define <4 x i32> @test_x86_avx2_psllv_d(<4 x i32> %a0, <4 x i32> %a1) {
971 ; CHECK: vpsllvd
972 %res = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
973 ret <4 x i32> %res
974 }
975 declare <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32>, <4 x i32>) nounwind readnone
976
977
978 define <8 x i32> @test_x86_avx2_psllv_d_256(<8 x i32> %a0, <8 x i32> %a1) {
979 ; CHECK: vpsllvd
980 %res = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
981 ret <8 x i32> %res
982 }
983 declare <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32>, <8 x i32>) nounwind readnone
984
985
986 define <2 x i64> @test_x86_avx2_psllv_q(<2 x i64> %a0, <2 x i64> %a1) {
987 ; CHECK: vpsllvq
988 %res = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
989 ret <2 x i64> %res
990 }
991 declare <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64>, <2 x i64>) nounwind readnone
992
993
994 define <4 x i64> @test_x86_avx2_psllv_q_256(<4 x i64> %a0, <4 x i64> %a1) {
995 ; CHECK: vpsllvq
996 %res = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1]
997 ret <4 x i64> %res
998 }
999 declare <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64>, <4 x i64>) nounwind readnone
1000
1001
1002 define <4 x i32> @test_x86_avx2_psrlv_d(<4 x i32> %a0, <4 x i32> %a1) {
1003 ; CHECK: vpsrlvd
1004 %res = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1005 ret <4 x i32> %res
1006 }
1007 declare <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32>, <4 x i32>) nounwind readnone
1008
1009
1010 define <8 x i32> @test_x86_avx2_psrlv_d_256(<8 x i32> %a0, <8 x i32> %a1) {
1011 ; CHECK: vpsrlvd
1012 %res = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
1013 ret <8 x i32> %res
1014 }
1015 declare <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32>, <8 x i32>) nounwind readnone
1016
1017
1018 define <2 x i64> @test_x86_avx2_psrlv_q(<2 x i64> %a0, <2 x i64> %a1) {
1019 ; CHECK: vpsrlvq
1020 %res = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
1021 ret <2 x i64> %res
1022 }
1023 declare <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64>, <2 x i64>) nounwind readnone
1024
1025
1026 define <4 x i64> @test_x86_avx2_psrlv_q_256(<4 x i64> %a0, <4 x i64> %a1) {
1027 ; CHECK: vpsrlvq
1028 %res = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1]
1029 ret <4 x i64> %res
1030 }
1031 declare <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64>, <4 x i64>) nounwind readnone
1032
1033
1034 define <4 x i32> @test_x86_avx2_psrav_d(<4 x i32> %a0, <4 x i32> %a1) {
1035 ; CHECK: vpsravd
1036 %res = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1037 ret <4 x i32> %res
1038 }
1039 declare <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32>, <4 x i32>) nounwind readnone
1040
1041
1042 define <8 x i32> @test_x86_avx2_psrav_d_256(<8 x i32> %a0, <8 x i32> %a1) {
1043 ; CHECK: vpsravd
1044 %res = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
1045 ret <8 x i32> %res
1046 }
1047 declare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) nounwind readnone