llvm.org GIT mirror llvm / eebc8a1
Add support for the v1i64 type. This makes better code for this: #include <mmintrin.h> extern __m64 C; void baz(__v2si *A, __v2si *B) { *A = C; _mm_empty(); } We get this: _baz: call "L1$pb" "L1$pb": popl %eax movl L_C$non_lazy_ptr-"L1$pb"(%eax), %eax movq (%eax), %mm0 movl 4(%esp), %eax movq %mm0, (%eax) emms ret GCC gives us this: _baz: pushl %ebx call L3 "L00000000001$pb": L3: popl %ebx subl $8, %esp movl L_C$non_lazy_ptr-"L00000000001$pb"(%ebx), %eax movl (%eax), %edx movl 4(%eax), %ecx movl 16(%esp), %eax movl %edx, (%eax) movl %ecx, 4(%eax) emms addl $8, %esp popl %ebx ret git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@35351 91177308-0d34-0410-b5e6-96231b3b80d8 Bill Wendling 13 years ago
5 changed file(s) with 62 addition(s) and 29 deletion(s). Raw diff Collapse all Expand all
9393 def llvm_v8i16_ty : LLVMVectorType; // 8 x i16
9494 def llvm_v2i64_ty : LLVMVectorType; // 2 x i64
9595 def llvm_v2i32_ty : LLVMVectorType; // 2 x i32
96 def llvm_v1i64_ty : LLVMVectorType; // 1 x i64
9697 def llvm_v4i32_ty : LLVMVectorType; // 4 x i32
9798 def llvm_v4f32_ty : LLVMVectorType; // 4 x float
9899 def llvm_v2f64_ty : LLVMVectorType;// 2 x double
313313 addRegisterClass(MVT::v8i8, X86::VR64RegisterClass);
314314 addRegisterClass(MVT::v4i16, X86::VR64RegisterClass);
315315 addRegisterClass(MVT::v2i32, X86::VR64RegisterClass);
316 addRegisterClass(MVT::v1i64, X86::VR64RegisterClass);
316317
317318 // FIXME: add MMX packed arithmetics
318319
346347 setOperationAction(ISD::XOR, MVT::v2i32, Legal);
347348
348349 setOperationAction(ISD::LOAD, MVT::v8i8, Promote);
349 AddPromotedToType (ISD::LOAD, MVT::v8i8, MVT::v2i32);
350 AddPromotedToType (ISD::LOAD, MVT::v8i8, MVT::v1i64);
350351 setOperationAction(ISD::LOAD, MVT::v4i16, Promote);
351 AddPromotedToType (ISD::LOAD, MVT::v4i16, MVT::v2i32);
352 setOperationAction(ISD::LOAD, MVT::v2i32, Legal);
352 AddPromotedToType (ISD::LOAD, MVT::v4i16, MVT::v1i64);
353 setOperationAction(ISD::LOAD, MVT::v2i32, Promote);
354 AddPromotedToType (ISD::LOAD, MVT::v2i32, MVT::v1i64);
355 setOperationAction(ISD::LOAD, MVT::v1i64, Legal);
353356
354357 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Expand);
355358 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Expand);
3636 def : Pat<(v8i8 (undef)), (IMPLICIT_DEF_VR64)>;
3737 def : Pat<(v4i16 (undef)), (IMPLICIT_DEF_VR64)>;
3838 def : Pat<(v2i32 (undef)), (IMPLICIT_DEF_VR64)>;
39 def : Pat<(v1i64 (undef)), (IMPLICIT_DEF_VR64)>;
3940
4041 //===----------------------------------------------------------------------===//
4142 // MMX Pattern Fragments
4243 //===----------------------------------------------------------------------===//
4344
44 def loadv2i32 : PatFrag<(ops node:$ptr), (v2i32 (load node:$ptr))>;
45 def loadv1i64 : PatFrag<(ops node:$ptr), (v1i64 (load node:$ptr))>;
4546
4647 def bc_v8i8 : PatFrag<(ops node:$in), (v8i8 (bitconvert node:$in))>;
4748 def bc_v4i16 : PatFrag<(ops node:$in), (v4i16 (bitconvert node:$in))>;
6465 !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
6566 [(set VR64:$dst, (OpVT (OpNode VR64:$src1,
6667 (bitconvert
67 (loadv2i32 addr:$src2)))))]>;
68 (loadv1i64 addr:$src2)))))]>;
6869 }
6970
7071 multiclass MMXI_binop_rm_int opc, string OpcodeStr, Intrinsic IntId,
7778 def rm : MMXI
7879 !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
7980 [(set VR64:$dst, (IntId VR64:$src1,
80 (bitconvert (loadv2i32 addr:$src2))))]>;
81 (bitconvert (loadv1i64 addr:$src2))))]>;
8182 }
8283
83 // MMXI_binop_rm_v2i32 - Simple MMX binary operator whose type is v2i32.
84 // MMXI_binop_rm_v1i64 - Simple MMX binary operator whose type is v1i64.
8485 //
8586 // FIXME: we could eliminate this and use MMXI_binop_rm instead if tblgen knew
8687 // to collapse (bitconvert VT to VT) into its operand.
8788 //
88 multiclass MMXI_binop_rm_v2i32 opc, string OpcodeStr, SDNode OpNode,
89 multiclass MMXI_binop_rm_v1i64 opc, string OpcodeStr, SDNode OpNode,
8990 bit Commutable = 0> {
9091 def rr : MMXI
9192 !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
92 [(set VR64:$dst, (v2i32 (OpNode VR64:$src1, VR64:$src2)))]> {
93 [(set VR64:$dst, (v1i64 (OpNode VR64:$src1, VR64:$src2)))]> {
9394 let isCommutable = Commutable;
9495 }
9596 def rm : MMXI
9697 !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
9798 [(set VR64:$dst,
98 (OpNode VR64:$src1,(loadv2i32 addr:$src2)))]>;
99 (OpNode VR64:$src1,(loadv1i64 addr:$src2)))]>;
99100 }
100101
101102 multiclass MMXI_binop_rmi_int opc, bits<8> opc2, Format ImmForm,
106107 def rm : MMXI
107108 !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
108109 [(set VR64:$dst, (IntId VR64:$src1,
109 (bitconvert (loadv2i32 addr:$src2))))]>;
110 (bitconvert (loadv1i64 addr:$src2))))]>;
110111 def ri : MMXIi8
111112 !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
112113 [(set VR64:$dst, (IntId VR64:$src1,
176177 "punpckhbw {$src2, $dst|$dst, $src2}",
177178 [(set VR64:$dst,
178179 (v8i8 (vector_shuffle VR64:$src1,
179 (bc_v8i8 (loadv2i32 addr:$src2)),
180 (bc_v8i8 (loadv1i64 addr:$src2)),
180181 MMX_UNPCKH_shuffle_mask)))]>;
181182 def MMX_PUNPCKHWDrr : MMXI<0x69, MRMSrcReg,
182183 (ops VR64:$dst, VR64:$src1, VR64:$src2),
189190 "punpckhwd {$src2, $dst|$dst, $src2}",
190191 [(set VR64:$dst,
191192 (v4i16 (vector_shuffle VR64:$src1,
192 (bc_v4i16 (loadv2i32 addr:$src2)),
193 (bc_v4i16 (loadv1i64 addr:$src2)),
193194 MMX_UNPCKH_shuffle_mask)))]>;
194195 def MMX_PUNPCKHDQrr : MMXI<0x6A, MRMSrcReg,
195196 (ops VR64:$dst, VR64:$src1, VR64:$src2),
196197 "punpckhdq {$src2, $dst|$dst, $src2}",
197198 [(set VR64:$dst,
198 (v2i32 (vector_shuffle VR64:$src1, VR64:$src2,
199 (v1i64 (vector_shuffle VR64:$src1, VR64:$src2,
199200 MMX_UNPCKH_shuffle_mask)))]>;
200201 def MMX_PUNPCKHDQrm : MMXI<0x6A, MRMSrcMem,
201202 (ops VR64:$dst, VR64:$src1, i64mem:$src2),
202203 "punpckhdq {$src2, $dst|$dst, $src2}",
203204 [(set VR64:$dst,
204 (v2i32 (vector_shuffle VR64:$src1,
205 (loadv2i32 addr:$src2),
205 (v1i64 (vector_shuffle VR64:$src1,
206 (loadv1i64 addr:$src2),
206207 MMX_UNPCKH_shuffle_mask)))]>;
207208 }
208209
209210 // Logical Instructions
210 defm MMX_PAND : MMXI_binop_rm_v2i32<0xDB, "pand", and, 1>;
211 defm MMX_POR : MMXI_binop_rm_v2i32<0xEB, "por" , or, 1>;
212 defm MMX_PXOR : MMXI_binop_rm_v2i32<0xEF, "pxor", xor, 1>;
211 defm MMX_PAND : MMXI_binop_rm_v1i64<0xDB, "pand", and, 1>;
212 defm MMX_POR : MMXI_binop_rm_v1i64<0xEB, "por" , or, 1>;
213 defm MMX_PXOR : MMXI_binop_rm_v1i64<0xEF, "pxor", xor, 1>;
213214
214215 let isTwoAddress = 1 in {
215216 def MMX_PANDNrr : MMXI<0xDF, MRMSrcReg,
216217 (ops VR64:$dst, VR64:$src1, VR64:$src2),
217218 "pandn {$src2, $dst|$dst, $src2}",
218 [(set VR64:$dst, (v2i32 (and (vnot VR64:$src1),
219 [(set VR64:$dst, (v1i64 (and (vnot VR64:$src1),
219220 VR64:$src2)))]>;
220221 def MMX_PANDNrm : MMXI<0xDF, MRMSrcMem,
221222 (ops VR64:$dst, VR64:$src1, i64mem:$src2),
222223 "pandn {$src2, $dst|$dst, $src2}",
223 [(set VR64:$dst, (v2i32 (and (vnot VR64:$src1),
224 [(set VR64:$dst, (v1i64 (and (vnot VR64:$src1),
224225 (load addr:$src2))))]>;
225226 }
226227
261262 "movq {$src, $dst|$dst, $src}", []>;
262263 def MOVQ64rm : MMXI<0x6F, MRMSrcMem, (ops VR64:$dst, i64mem:$src),
263264 "movq {$src, $dst|$dst, $src}",
264 [(set VR64:$dst, (loadv2i32 addr:$src))]>;
265 [(set VR64:$dst, (loadv1i64 addr:$src))]>;
265266 def MOVQ64mr : MMXI<0x7F, MRMDestMem, (ops i64mem:$dst, VR64:$src),
266267 "movq {$src, $dst|$dst, $src}",
267 [(store (v2i32 VR64:$src), addr:$dst)]>;
268 [(store (v1i64 VR64:$src), addr:$dst)]>;
268269
269270 // Conversion instructions
270271 def CVTPI2PSrr : MMXI<0x2A, MRMSrcReg, (ops VR128:$dst, VR64:$src),
308309 Requires<[HasMMX]>;
309310
310311 //===----------------------------------------------------------------------===//
312 // Alias Instructions
313 //===----------------------------------------------------------------------===//
314
315 // Alias instructions that map zero vector to pxor.
316 // FIXME: remove when we can teach regalloc that xor reg, reg is ok.
317 let isReMaterializable = 1 in {
318 def MMX_V_SET0 : MMXI<0xEF, MRMInitReg, (ops VR64:$dst),
319 "pxor $dst, $dst",
320 [(set VR64:$dst, (v1i64 immAllZerosV))]>;
321 }
322
323 //===----------------------------------------------------------------------===//
311324 // Non-Instruction Patterns
312325 //===----------------------------------------------------------------------===//
313326
316329 (MOVQ64mr addr:$dst, VR64:$src)>;
317330 def : Pat<(store (v4i16 VR64:$src), addr:$dst),
318331 (MOVQ64mr addr:$dst, VR64:$src)>;
332 def : Pat<(store (v2i32 VR64:$src), addr:$dst),
333 (MOVQ64mr addr:$dst, VR64:$src)>;
334
335 // 128-bit vector all zero's.
336 def : Pat<(v8i8 immAllZerosV), (MMX_V_SET0)>;
337 def : Pat<(v4i16 immAllZerosV), (MMX_V_SET0)>;
338 def : Pat<(v2i32 immAllZerosV), (MMX_V_SET0)>;
339 def : Pat<(v1i64 immAllZerosV), (MMX_V_SET0)>;
319340
320341 // Bit convert.
342 def : Pat<(v8i8 (bitconvert (v1i64 VR64:$src))), (v8i8 VR64:$src)>;
321343 def : Pat<(v8i8 (bitconvert (v2i32 VR64:$src))), (v8i8 VR64:$src)>;
322344 def : Pat<(v8i8 (bitconvert (v4i16 VR64:$src))), (v8i8 VR64:$src)>;
345 def : Pat<(v4i16 (bitconvert (v1i64 VR64:$src))), (v4i16 VR64:$src)>;
323346 def : Pat<(v4i16 (bitconvert (v2i32 VR64:$src))), (v4i16 VR64:$src)>;
324347 def : Pat<(v4i16 (bitconvert (v8i8 VR64:$src))), (v4i16 VR64:$src)>;
348 def : Pat<(v2i32 (bitconvert (v1i64 VR64:$src))), (v2i32 VR64:$src)>;
325349 def : Pat<(v2i32 (bitconvert (v4i16 VR64:$src))), (v2i32 VR64:$src)>;
326350 def : Pat<(v2i32 (bitconvert (v8i8 VR64:$src))), (v2i32 VR64:$src)>;
327
328 // Splat v2i32
351 def : Pat<(v1i64 (bitconvert (v2i32 VR64:$src))), (v1i64 VR64:$src)>;
352 def : Pat<(v1i64 (bitconvert (v4i16 VR64:$src))), (v1i64 VR64:$src)>;
353 def : Pat<(v1i64 (bitconvert (v8i8 VR64:$src))), (v1i64 VR64:$src)>;
354
355 // Splat v1i64
329356 let AddedComplexity = 10 in {
330 def : Pat<(vector_shuffle (v2i32 VR64:$src), (undef),
357 def : Pat<(vector_shuffle (v1i64 VR64:$src), (undef),
331358 MMX_splat_mask:$sm),
332359 (MMX_PUNPCKHDQrr VR64:$src, VR64:$src)>;
333 def : Pat<(vector_shuffle (v2i32 VR64:$src), (undef),
360 def : Pat<(vector_shuffle (v1i64 VR64:$src), (undef),
334361 MMX_UNPCKH_shuffle_mask:$sm),
335362 (MMX_PUNPCKHDQrr VR64:$src, VR64:$src)>;
336363 }
339366
340367 // Scalar to v4i16 / v8i8. The source may be a GR32, but only the lower 8 or
341368 // 16-bits matter.
369 def : Pat<(v8i8 (MMX_X86s2vec GR32:$src)), (MOVD64rr GR32:$src)>;
342370 def : Pat<(v4i16 (MMX_X86s2vec GR32:$src)), (MOVD64rr GR32:$src)>;
343 def : Pat<(v8i8 (MMX_X86s2vec GR32:$src)), (MOVD64rr GR32:$src)>;
440440 }
441441
442442 // Generic vector registers: VR64 and VR128.
443 def VR64 : RegisterClass<"X86", [v8i8, v4i16, v2i32], 64,
443 def VR64 : RegisterClass<"X86", [v8i8, v4i16, v2i32, v1i64], 64,
444444 [MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7]>;
445445 def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128,
446446 [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
5151 case MVT::v8i8: return "MVT::v8i8";
5252 case MVT::v4i16: return "MVT::v4i16";
5353 case MVT::v2i32: return "MVT::v2i32";
54 case MVT::v1i64: return "MVT::v1i64";
5455 case MVT::v16i8: return "MVT::v16i8";
5556 case MVT::v8i16: return "MVT::v8i16";
5657 case MVT::v4i32: return "MVT::v4i32";
8182 case MVT::v8i8: return "MVT::v8i8";
8283 case MVT::v4i16: return "MVT::v4i16";
8384 case MVT::v2i32: return "MVT::v2i32";
85 case MVT::v1i64: return "MVT::v1i64";
8486 case MVT::v16i8: return "MVT::v16i8";
8587 case MVT::v8i16: return "MVT::v8i16";
8688 case MVT::v4i32: return "MVT::v4i32";