llvm.org GIT mirror llvm / 22b942a
Add separate intrinsics for MMX / SSE shifts with i32 integer operands. This allow us to simplify the horribly complicated matching code. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@50601 91177308-0d34-0410-b5e6-96231b3b80d8 Evan Cheng 12 years ago
5 changed file(s) with 110 addition(s) and 84 deletion(s). Raw diff Collapse all Expand all
323323 def int_x86_sse2_psll_q : GCCBuiltin<"__builtin_ia32_psllq128">,
324324 Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty,
325325 llvm_v2i64_ty], [IntrNoMem]>;
326 def int_x86_sse2_psll_dq : GCCBuiltin<"__builtin_ia32_pslldqi128">,
327 Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty,
328 llvm_i32_ty], [IntrNoMem]>;
329326 def int_x86_sse2_psrl_w : GCCBuiltin<"__builtin_ia32_psrlw128">,
330327 Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty,
331328 llvm_v8i16_ty], [IntrNoMem]>;
335332 def int_x86_sse2_psrl_q : GCCBuiltin<"__builtin_ia32_psrlq128">,
336333 Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty,
337334 llvm_v2i64_ty], [IntrNoMem]>;
335 def int_x86_sse2_psra_w : GCCBuiltin<"__builtin_ia32_psraw128">,
336 Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty,
337 llvm_v8i16_ty], [IntrNoMem]>;
338 def int_x86_sse2_psra_d : GCCBuiltin<"__builtin_ia32_psrad128">,
339 Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty,
340 llvm_v4i32_ty], [IntrNoMem]>;
341
342 def int_x86_sse2_pslli_w : GCCBuiltin<"__builtin_ia32_psllwi128">,
343 Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty,
344 llvm_i32_ty], [IntrNoMem]>;
345 def int_x86_sse2_pslli_d : GCCBuiltin<"__builtin_ia32_pslldi128">,
346 Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty,
347 llvm_i32_ty], [IntrNoMem]>;
348 def int_x86_sse2_pslli_q : GCCBuiltin<"__builtin_ia32_psllqi128">,
349 Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty,
350 llvm_i32_ty], [IntrNoMem]>;
351 def int_x86_sse2_psrli_w : GCCBuiltin<"__builtin_ia32_psrlwi128">,
352 Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty,
353 llvm_i32_ty], [IntrNoMem]>;
354 def int_x86_sse2_psrli_d : GCCBuiltin<"__builtin_ia32_psrldi128">,
355 Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty,
356 llvm_i32_ty], [IntrNoMem]>;
357 def int_x86_sse2_psrli_q : GCCBuiltin<"__builtin_ia32_psrlqi128">,
358 Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty,
359 llvm_i32_ty], [IntrNoMem]>;
360 def int_x86_sse2_psrai_w : GCCBuiltin<"__builtin_ia32_psrawi128">,
361 Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty,
362 llvm_i32_ty], [IntrNoMem]>;
363 def int_x86_sse2_psrai_d : GCCBuiltin<"__builtin_ia32_psradi128">,
364 Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty,
365 llvm_i32_ty], [IntrNoMem]>;
366
367 def int_x86_sse2_psll_dq : GCCBuiltin<"__builtin_ia32_pslldqi128">,
368 Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty,
369 llvm_i32_ty], [IntrNoMem]>;
338370 def int_x86_sse2_psrl_dq : GCCBuiltin<"__builtin_ia32_psrldqi128">,
339371 Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty,
340372 llvm_i32_ty], [IntrNoMem]>;
341 def int_x86_sse2_psra_w : GCCBuiltin<"__builtin_ia32_psraw128">,
342 Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty,
343 llvm_v8i16_ty], [IntrNoMem]>;
344 def int_x86_sse2_psra_d : GCCBuiltin<"__builtin_ia32_psrad128">,
345 Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty,
346 llvm_v4i32_ty], [IntrNoMem]>;
347373 }
348374
349375 // Integer comparison ops
938964 def int_x86_mmx_psra_d : GCCBuiltin<"__builtin_ia32_psrad">,
939965 Intrinsic<[llvm_v2i32_ty, llvm_v2i32_ty,
940966 llvm_v1i64_ty], [IntrNoMem]>;
967
968 def int_x86_mmx_pslli_w : GCCBuiltin<"__builtin_ia32_psllwi">,
969 Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty,
970 llvm_i32_ty], [IntrNoMem]>;
971 def int_x86_mmx_pslli_d : GCCBuiltin<"__builtin_ia32_pslldi">,
972 Intrinsic<[llvm_v2i32_ty, llvm_v2i32_ty,
973 llvm_i32_ty], [IntrNoMem]>;
974 def int_x86_mmx_pslli_q : GCCBuiltin<"__builtin_ia32_psllqi">,
975 Intrinsic<[llvm_v1i64_ty, llvm_v1i64_ty,
976 llvm_i32_ty], [IntrNoMem]>;
977
978 def int_x86_mmx_psrli_w : GCCBuiltin<"__builtin_ia32_psrlwi">,
979 Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty,
980 llvm_i32_ty], [IntrNoMem]>;
981 def int_x86_mmx_psrli_d : GCCBuiltin<"__builtin_ia32_psrldi">,
982 Intrinsic<[llvm_v2i32_ty, llvm_v2i32_ty,
983 llvm_i32_ty], [IntrNoMem]>;
984 def int_x86_mmx_psrli_q : GCCBuiltin<"__builtin_ia32_psrlqi">,
985 Intrinsic<[llvm_v1i64_ty, llvm_v1i64_ty,
986 llvm_i32_ty], [IntrNoMem]>;
987
988 def int_x86_mmx_psrai_w : GCCBuiltin<"__builtin_ia32_psrawi">,
989 Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty,
990 llvm_i32_ty], [IntrNoMem]>;
991 def int_x86_mmx_psrai_d : GCCBuiltin<"__builtin_ia32_psradi">,
992 Intrinsic<[llvm_v2i32_ty, llvm_v2i32_ty,
993 llvm_i32_ty], [IntrNoMem]>;
941994 }
942995
943996 // Pack ops.
117117 }
118118
119119 multiclass MMXI_binop_rmi_int opc, bits<8> opc2, Format ImmForm,
120 string OpcodeStr, Intrinsic IntId> {
120 string OpcodeStr, Intrinsic IntId,
121 Intrinsic IntId2> {
121122 def rr : MMXI
122123 (ins VR64:$src1, VR64:$src2),
123124 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
130131 def ri : MMXIi8
131132 (ins VR64:$src1, i32i8imm:$src2),
132133 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
133 [(set VR64:$dst, (IntId VR64:$src1,
134 (v1i64 (bitconvert
135 (v2i32 (vector_shuffle immAllZerosV,
136 (v2i32 (scalar_to_vector (i32 imm:$src2))),
137 MMX_MOVL_shuffle_mask))))))]>;
134 [(set VR64:$dst, (IntId2 VR64:$src1, (i32 imm:$src2)))]>;
138135 }
139136 }
140137
282279
283280 // Shift Instructions
284281 defm MMX_PSRLW : MMXI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw",
285 int_x86_mmx_psrl_w>;
282 int_x86_mmx_psrl_w, int_x86_mmx_psrli_w>;
286283 defm MMX_PSRLD : MMXI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld",
287 int_x86_mmx_psrl_d>;
284 int_x86_mmx_psrl_d, int_x86_mmx_psrli_d>;
288285 defm MMX_PSRLQ : MMXI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq",
289 int_x86_mmx_psrl_q>;
286 int_x86_mmx_psrl_q, int_x86_mmx_psrli_q>;
290287
291288 defm MMX_PSLLW : MMXI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw",
292 int_x86_mmx_psll_w>;
289 int_x86_mmx_psll_w, int_x86_mmx_pslli_w>;
293290 defm MMX_PSLLD : MMXI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld",
294 int_x86_mmx_psll_d>;
291 int_x86_mmx_psll_d, int_x86_mmx_pslli_d>;
295292 defm MMX_PSLLQ : MMXI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq",
296 int_x86_mmx_psll_q>;
293 int_x86_mmx_psll_q, int_x86_mmx_pslli_q>;
297294
298295 defm MMX_PSRAW : MMXI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw",
299 int_x86_mmx_psra_w>;
296 int_x86_mmx_psra_w, int_x86_mmx_psrai_w>;
300297 defm MMX_PSRAD : MMXI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad",
301 int_x86_mmx_psra_d>;
298 int_x86_mmx_psra_d, int_x86_mmx_psrai_d>;
302299
303300 // Comparison Instructions
304301 defm MMX_PCMPEQB : MMXI_binop_rm_int<0x74, "pcmpeqb", int_x86_mmx_pcmpeq_b>;
17791779 (bitconvert (memopv2i64 addr:$src2))))]>;
17801780 }
17811781
1782 multiclass PDI_binop_rmi_int opc, bits<8> opc2, Format ImmForm,
1783 string OpcodeStr,
1784 Intrinsic IntId, Intrinsic IntId2> {
1785 def rr : PDI
1786 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
1787 [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
1788 def rm : PDI
1789 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
1790 [(set VR128:$dst, (IntId VR128:$src1,
1791 (bitconvert (memopv2i64 addr:$src2))))]>;
1792 def ri : PDIi8
1793 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
1794 [(set VR128:$dst, (IntId2 VR128:$src1, (i32 imm:$src2)))]>;
1795 }
1796
17821797 /// PDI_binop_rm - Simple SSE2 binary operator.
17831798 multiclass PDI_binop_rm opc, string OpcodeStr, SDNode OpNode,
17841799 ValueType OpVT, bit Commutable = 0> {
18531868 defm PSADBW : PDI_binop_rm_int<0xE0, "psadbw", int_x86_sse2_psad_bw, 1>;
18541869
18551870
1856 defm PSLLW : PDI_binop_rm_int<0xF1, "psllw", int_x86_sse2_psll_w>;
1857 defm PSLLD : PDI_binop_rm_int<0xF2, "pslld", int_x86_sse2_psll_d>;
1858 defm PSLLQ : PDI_binop_rm_int<0xF3, "psllq", int_x86_sse2_psll_q>;
1859
1860 defm PSRLW : PDI_binop_rm_int<0xD1, "psrlw", int_x86_sse2_psrl_w>;
1861 defm PSRLD : PDI_binop_rm_int<0xD2, "psrld", int_x86_sse2_psrl_d>;
1862 defm PSRLQ : PDI_binop_rm_int<0xD3, "psrlq", int_x86_sse2_psrl_q>;
1863
1864 defm PSRAW : PDI_binop_rm_int<0xE1, "psraw", int_x86_sse2_psra_w>;
1865 defm PSRAD : PDI_binop_rm_int<0xE2, "psrad", int_x86_sse2_psra_d>;
1866
1867 // Some immediate variants need to match a bit_convert.
1868 let Constraints = "$src1 = $dst" in {
1869 def PSLLWri : PDIi8<0x71, MRM6r, (outs VR128:$dst),
1870 (ins VR128:$src1, i32i8imm:$src2),
1871 "psllw\t{$src2, $dst|$dst, $src2}",
1872 [(set VR128:$dst, (int_x86_sse2_psll_w VR128:$src1,
1873 (bc_v8i16 (v4i32 (scalar_to_vector (i32 imm:$src2))))))]>;
1874 def PSLLDri : PDIi8<0x72, MRM6r, (outs VR128:$dst),
1875 (ins VR128:$src1, i32i8imm:$src2),
1876 "pslld\t{$src2, $dst|$dst, $src2}",
1877 [(set VR128:$dst, (int_x86_sse2_psll_d VR128:$src1,
1878 (scalar_to_vector (i32 imm:$src2))))]>;
1879 def PSLLQri : PDIi8<0x73, MRM6r, (outs VR128:$dst),
1880 (ins VR128:$src1, i32i8imm:$src2),
1881 "psllq\t{$src2, $dst|$dst, $src2}",
1882 [(set VR128:$dst, (int_x86_sse2_psll_q VR128:$src1,
1883 (bc_v2i64 (v4i32 (scalar_to_vector (i32 imm:$src2))))))]>;
1884
1885 def PSRLWri : PDIi8<0x71, MRM2r, (outs VR128:$dst),
1886 (ins VR128:$src1, i32i8imm:$src2),
1887 "psrlw\t{$src2, $dst|$dst, $src2}",
1888 [(set VR128:$dst, (int_x86_sse2_psrl_w VR128:$src1,
1889 (bc_v8i16 (v4i32 (scalar_to_vector (i32 imm:$src2))))))]>;
1890 def PSRLDri : PDIi8<0x72, MRM2r, (outs VR128:$dst),
1891 (ins VR128:$src1, i32i8imm:$src2),
1892 "psrld\t{$src2, $dst|$dst, $src2}",
1893 [(set VR128:$dst, (int_x86_sse2_psrl_d VR128:$src1,
1894 (scalar_to_vector (i32 imm:$src2))))]>;
1895 def PSRLQri : PDIi8<0x73, MRM2r, (outs VR128:$dst),
1896 (ins VR128:$src1, i32i8imm:$src2),
1897 "psrlq\t{$src2, $dst|$dst, $src2}",
1898 [(set VR128:$dst, (int_x86_sse2_psrl_q VR128:$src1,
1899 (bc_v2i64 (v4i32 (scalar_to_vector (i32 imm:$src2))))))]>;
1900
1901 def PSRAWri : PDIi8<0x71, MRM4r, (outs VR128:$dst),
1902 (ins VR128:$src1, i32i8imm:$src2),
1903 "psraw\t{$src2, $dst|$dst, $src2}",
1904 [(set VR128:$dst, (int_x86_sse2_psra_w VR128:$src1,
1905 (bc_v8i16 (v4i32 (scalar_to_vector (i32 imm:$src2))))))]>;
1906 def PSRADri : PDIi8<0x72, MRM4r, (outs VR128:$dst),
1907 (ins VR128:$src1, i32i8imm:$src2),
1908 "psrad\t{$src2, $dst|$dst, $src2}",
1909 [(set VR128:$dst, (int_x86_sse2_psra_d VR128:$src1,
1910 (scalar_to_vector (i32 imm:$src2))))]>;
1911 }
1912
1913 // PSRAQ doesn't exist in SSE[1-3].
1871 defm PSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw",
1872 int_x86_sse2_psll_w, int_x86_sse2_pslli_w>;
1873 defm PSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld",
1874 int_x86_sse2_psll_d, int_x86_sse2_pslli_d>;
1875 defm PSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq",
1876 int_x86_sse2_psll_q, int_x86_sse2_pslli_q>;
1877
1878 defm PSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw",
1879 int_x86_sse2_psrl_w, int_x86_sse2_psrli_w>;
1880 defm PSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld",
1881 int_x86_sse2_psrl_d, int_x86_sse2_psrli_d>;
1882 defm PSRLQ : PDI_binop_rmi_int<0xD3, 0x72, MRM2r, "psrlq",
1883 int_x86_sse2_psrl_q, int_x86_sse2_psrli_q>;
1884
1885 defm PSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw",
1886 int_x86_sse2_psra_w, int_x86_sse2_psrai_w>;
1887 defm PSRAD : PDI_binop_rmi_int<0xE2, 0x71, MRM4r, "psrad",
1888 int_x86_sse2_psra_d, int_x86_sse2_psrai_d>;
19141889
19151890 // 128-bit logical shifts.
19161891 let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in {
121121 if (Name.compare(5,10,"x86.mmx.ps",10) == 0 &&
122122 (Name.compare(13,4,"psll", 4) == 0 ||
123123 Name.compare(13,4,"psra", 4) == 0 ||
124 Name.compare(13,4,"psrl", 4) == 0)) {
124 Name.compare(13,4,"psrl", 4) == 0) && Name[17] != 'i') {
125125
126126 const llvm::Type *VT = VectorType::get(IntegerType::get(64), 1);
127127
0 ; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx | grep psllq | grep 32
1 ; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+mmx | grep psllq | grep 32
12 ; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx | grep psrad
23
34 define i64 @t1(<1 x i64> %mm1) nounwind {
45 entry:
5 %tmp6 = tail call <1 x i64> @llvm.x86.mmx.psll.q( <1 x i64> %mm1, <1 x i64> ) ; <<1 x i64>> [#uses=1]
6 %tmp6 = tail call <1 x i64> @llvm.x86.mmx.pslli.q( <1 x i64> %mm1, i32 32 ) ; <<1 x i64>> [#uses=1]
67 %retval1112 = bitcast <1 x i64> %tmp6 to i64 ; [#uses=1]
78 ret i64 %retval1112
89 }
910
10 declare <1 x i64> @llvm.x86.mmx.psll.q(<1 x i64>, <1 x i64>) nounwind readnone
11 declare <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64>, i32) nounwind readnone
1112
1213 define i64 @t2(<2 x i32> %mm1, <2 x i32> %mm2) nounwind {
1314 entry: