llvm.org GIT mirror llvm / a348c56
Support added for shifts and unpacking MMX instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@35266 91177308-0d34-0410-b5e6-96231b3b80d8 Bill Wendling 13 years ago
5 changed file(s) with 250 addition(s) and 3 deletion(s). Raw diff Collapse all Expand all
584584 Intrinsic<[llvm_v2i32_ty, llvm_v4i16_ty,
585585 llvm_v4i16_ty], [IntrNoMem]>;
586586 }
587
588 // Integer shift ops.
589 let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
590 // Shift left logical
591 def int_x86_mmx_psll_w : GCCBuiltin<"__builtin_ia32_psllw">,
592 Intrinsic<[llvm_v8i8_ty, llvm_v8i8_ty,
593 llvm_v2i32_ty], [IntrNoMem]>;
594 def int_x86_mmx_psll_d : GCCBuiltin<"__builtin_ia32_pslld">,
595 Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty,
596 llvm_v2i32_ty], [IntrNoMem]>;
597 def int_x86_mmx_psll_q : GCCBuiltin<"__builtin_ia32_psllq">,
598 Intrinsic<[llvm_v2i32_ty, llvm_v2i32_ty,
599 llvm_v2i32_ty], [IntrNoMem]>;
600
601 def int_x86_mmx_psrl_w : GCCBuiltin<"__builtin_ia32_psrlw">,
602 Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty,
603 llvm_v2i32_ty], [IntrNoMem]>;
604 def int_x86_mmx_psrl_d : GCCBuiltin<"__builtin_ia32_psrld">,
605 Intrinsic<[llvm_v2i32_ty, llvm_v2i32_ty,
606 llvm_v2i32_ty], [IntrNoMem]>;
607 def int_x86_mmx_psrl_q : GCCBuiltin<"__builtin_ia32_psrlq">,
608 Intrinsic<[llvm_v2i32_ty, llvm_v2i32_ty,
609 llvm_v2i32_ty], [IntrNoMem]>;
610
611 def int_x86_mmx_psra_w : GCCBuiltin<"__builtin_ia32_psraw">,
612 Intrinsic<[llvm_v8i8_ty, llvm_v8i8_ty,
613 llvm_v2i32_ty], [IntrNoMem]>;
614 def int_x86_mmx_psra_d : GCCBuiltin<"__builtin_ia32_psrad">,
615 Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty,
616 llvm_v2i32_ty], [IntrNoMem]>;
617 }
618
619 // Vector pack/unpack ops.
620 let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
621 def int_x86_mmx_punpckh_dq : GCCBuiltin<"__builtin_ia32_punpckhdq">,
622 Intrinsic<[llvm_v2i32_ty, llvm_v2i32_ty,
623 llvm_v2i32_ty], [IntrNoMem]>;
624 }
0 //===---------------------------------------------------------------------===//
1 // Random ideas for the X86 backend: MMX-specific stuff.
2 //===---------------------------------------------------------------------===//
3
4 //===---------------------------------------------------------------------===//
5
6 We should compile
7
8 #include
9
10 extern __m64 C;
11
12 void baz(__v2si *A, __v2si *B)
13 {
14 *A = __builtin_ia32_psllq(*B, C);
15 _mm_empty();
16 }
17
18 to:
19
20 .globl _baz
21 _baz:
22 call L3
23 "L00000000001$pb":
24 L3:
25 popl %ecx
26 subl $12, %esp
27 movl 20(%esp), %eax
28 movq (%eax), %mm0
29 movl L_C$non_lazy_ptr-"L00000000001$pb"(%ecx), %eax
30 movq (%eax), %mm1
31 movl 16(%esp), %eax
32 psllq %mm1, %mm0
33 movq %mm0, (%eax)
34 emms
35 addl $12, %esp
36 ret
37
38 not:
39
40 _baz:
41 subl $12, %esp
42 call "L1$pb"
43 "L1$pb":
44 popl %eax
45 movl L_C$non_lazy_ptr-"L1$pb"(%eax), %eax
46 movl (%eax), %ecx
47 movl %ecx, (%esp)
48 movl 4(%eax), %eax
49 movl %eax, 4(%esp)
50 movl 20(%esp), %eax
51 movq (%eax), %mm0
52 movq (%esp), %mm1
53 psllq %mm1, %mm0
54 movl 16(%esp), %eax
55 movq %mm0, (%eax)
56 emms
57 addl $12, %esp
58 ret
570570 movaps %xmm0, (%eax)
571571 ret
572572
573
573 //===---------------------------------------------------------------------===//
574
575 We should compile this:
576
577 #include
578
579 void foo(__m128i *A, __m128i *B) {
580 *A = _mm_sll_epi16 (*A, *B);
581 }
582
583 to:
584
585 _foo:
586 subl $12, %esp
587 movl 16(%esp), %edx
588 movl 20(%esp), %eax
589 movdqa (%edx), %xmm1
590 movdqa (%eax), %xmm0
591 psllw %xmm0, %xmm1
592 movdqa %xmm1, (%edx)
593 addl $12, %esp
594 ret
595
596 not:
597
598 _foo:
599 movl 8(%esp), %eax
600 movdqa (%eax), %xmm0
601 #IMPLICIT_DEF %eax
602 pinsrw $2, %eax, %xmm0
603 xorl %ecx, %ecx
604 pinsrw $3, %ecx, %xmm0
605 pinsrw $4, %eax, %xmm0
606 pinsrw $5, %ecx, %xmm0
607 pinsrw $6, %eax, %xmm0
608 pinsrw $7, %ecx, %xmm0
609 movl 4(%esp), %eax
610 movdqa (%eax), %xmm1
611 psllw %xmm0, %xmm1
612 movdqa %xmm1, (%eax)
613 ret
354354 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Expand);
355355 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Expand);
356356 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Expand);
357
358 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom);
359 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
360 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom);
357361 }
358362
359363 if (Subtarget->hasSSE1()) {
23112315 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V);
23122316 }
23132317
2314 /// LowerBuildVectorv16i8 - Custom lower build_vector of v8i16.
2318 /// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16.
23152319 ///
23162320 static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros,
23172321 unsigned NumNonZero, unsigned NumZero,
4242 //===----------------------------------------------------------------------===//
4343
4444 def loadv2i32 : PatFrag<(ops node:$ptr), (v2i32 (load node:$ptr))>;
45
46 def bc_v8i8 : PatFrag<(ops node:$in), (v8i8 (bitconvert node:$in))>;
47 def bc_v4i16 : PatFrag<(ops node:$in), (v4i16 (bitconvert node:$in))>;
48 def bc_v2i32 : PatFrag<(ops node:$in), (v2i32 (bitconvert node:$in))>;
4549
4650 //===----------------------------------------------------------------------===//
4751 // MMX Multiclasses
9397 [(set VR64:$dst,
9498 (OpNode VR64:$src1,(loadv2i32 addr:$src2)))]>;
9599 }
100
101 multiclass MMXI_binop_rmi_int opc, bits<8> opc2, Format ImmForm,
102 string OpcodeStr, Intrinsic IntId> {
103 def rr : MMXI
104 !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
105 [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))]>;
106 def rm : MMXI
107 !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
108 [(set VR64:$dst, (IntId VR64:$src1,
109 (bitconvert (loadv2i32 addr:$src2))))]>;
110 def ri : MMXIi8
111 !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
112 [(set VR64:$dst, (IntId VR64:$src1,
113 (scalar_to_vector (i32 imm:$src2))))]>;
114 }
96115 }
97116
98117 //===----------------------------------------------------------------------===//
99118 // MMX EMMS Instruction
100119 //===----------------------------------------------------------------------===//
101120
102 def EMMS : MMXI<0x77, RawFrm, (ops), "emms", [(int_x86_mmx_emms)]>;
121 def MMX_EMMS : MMXI<0x77, RawFrm, (ops), "emms", [(int_x86_mmx_emms)]>;
103122
104123 //===----------------------------------------------------------------------===//
105124 // MMX Scalar Instructions
130149
131150 defm MMX_PMULHW : MMXI_binop_rm_int<0xE5, "pmulhw" , int_x86_mmx_pmulh_w , 1>;
132151 defm MMX_PMADDWD : MMXI_binop_rm_int<0xF5, "pmaddwd", int_x86_mmx_pmadd_wd, 1>;
152
153
154 def MMX_UNPCKH_shuffle_mask : PatLeaf<(build_vector), [{
155 return X86::isUNPCKHMask(N);
156 }]>;
157
158 let isTwoAddress = 1 in {
159 def MMX_PUNPCKHBWrr : MMXI<0x68, MRMSrcReg,
160 (ops VR64:$dst, VR64:$src1, VR64:$src2),
161 "punpckhbw {$src2, $dst|$dst, $src2}",
162 [(set VR64:$dst,
163 (v8i8 (vector_shuffle VR64:$src1, VR64:$src2,
164 MMX_UNPCKH_shuffle_mask)))]>;
165 def MMX_PUNPCKHBWrm : MMXI<0x68, MRMSrcMem,
166 (ops VR64:$dst, VR64:$src1, i64mem:$src2),
167 "punpckhbw {$src2, $dst|$dst, $src2}",
168 [(set VR64:$dst,
169 (v8i8 (vector_shuffle VR64:$src1,
170 (bc_v8i8 (loadv2i32 addr:$src2)),
171 MMX_UNPCKH_shuffle_mask)))]>;
172 def MMX_PUNPCKHWDrr : MMXI<0x69, MRMSrcReg,
173 (ops VR64:$dst, VR64:$src1, VR64:$src2),
174 "punpckhwd {$src2, $dst|$dst, $src2}",
175 [(set VR64:$dst,
176 (v4i16 (vector_shuffle VR64:$src1, VR64:$src2,
177 MMX_UNPCKH_shuffle_mask)))]>;
178 def MMX_PUNPCKHWDrm : MMXI<0x69, MRMSrcMem,
179 (ops VR64:$dst, VR64:$src1, i64mem:$src2),
180 "punpckhwd {$src2, $dst|$dst, $src2}",
181 [(set VR64:$dst,
182 (v4i16 (vector_shuffle VR64:$src1,
183 (bc_v4i16 (loadv2i32 addr:$src2)),
184 MMX_UNPCKH_shuffle_mask)))]>;
185 def MMX_PUNPCKHDQrr : MMXI<0x6A, MRMSrcReg,
186 (ops VR64:$dst, VR64:$src1, VR64:$src2),
187 "punpckhdq {$src2, $dst|$dst, $src2}",
188 [(set VR64:$dst,
189 (v2i32 (vector_shuffle VR64:$src1, VR64:$src2,
190 MMX_UNPCKH_shuffle_mask)))]>;
191 def MMX_PUNPCKHDQrm : MMXI<0x6A, MRMSrcMem,
192 (ops VR64:$dst, VR64:$src1, i64mem:$src2),
193 "punpckhdq {$src2, $dst|$dst, $src2}",
194 [(set VR64:$dst,
195 (v2i32 (vector_shuffle VR64:$src1,
196 (loadv2i32 addr:$src2),
197 MMX_UNPCKH_shuffle_mask)))]>;
198 }
133199
134200 // Logical Instructions
135201 defm MMX_PAND : MMXI_binop_rm_v2i32<0xDB, "pand", and, 1>;
148214 [(set VR64:$dst, (v2i32 (and (vnot VR64:$src1),
149215 (load addr:$src2))))]>;
150216 }
217
218 // Shift Instructions
219 defm MMX_PSRLW : MMXI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw",
220 int_x86_mmx_psrl_w>;
221 defm MMX_PSRLD : MMXI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld",
222 int_x86_mmx_psrl_d>;
223 defm MMX_PSRLQ : MMXI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq",
224 int_x86_mmx_psrl_q>;
225
226 defm MMX_PSLLW : MMXI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw",
227 int_x86_mmx_psll_w>;
228 defm MMX_PSLLD : MMXI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld",
229 int_x86_mmx_psll_d>;
230 defm MMX_PSLLQ : MMXI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq",
231 int_x86_mmx_psll_q>;
232
233 defm MMX_PSRAW : MMXI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw",
234 int_x86_mmx_psra_w>;
235 defm MMX_PSRAD : MMXI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad",
236 int_x86_mmx_psra_d>;
151237
152238 // Move Instructions
153239 def MOVD64rr : MMXI<0x6E, MRMSrcReg, (ops VR64:$dst, GR32:$src),
224310 def : Pat<(v4i16 (bitconvert (v8i8 VR64:$src))), (v4i16 VR64:$src)>;
225311 def : Pat<(v2i32 (bitconvert (v4i16 VR64:$src))), (v2i32 VR64:$src)>;
226312 def : Pat<(v2i32 (bitconvert (v8i8 VR64:$src))), (v2i32 VR64:$src)>;
313
314 // Splat v2i32
315 let AddedComplexity = 10 in {
316 def : Pat<(vector_shuffle (v2i32 VR64:$src), (undef),
317 MMX_UNPCKH_shuffle_mask:$sm),
318 (MMX_PUNPCKHDQrr VR64:$src, VR64:$src)>;
319 }
320
321 // FIXME: Temporary workaround because 2-wide shuffle is broken.
322 def : Pat<(int_x86_mmx_punpckh_dq VR64:$src1, VR64:$src2),
323 (v2i32 (MMX_PUNPCKHDQrr VR64:$src1, VR64:$src2))>;
324 def : Pat<(int_x86_mmx_punpckh_dq VR64:$src1, (load addr:$src2)),
325 (v2i32 (MMX_PUNPCKHDQrm VR64:$src1, addr:$src2))>;
326
327 def MMX_X86s2vec : SDNode<"X86ISD::S2VEC", SDTypeProfile<1, 1, []>, []>;
328
329 // Scalar to v4i16 / v8i8. The source may be a GR32, but only the lower 8 or
330 // 16-bits matter.
331 def : Pat<(v4i16 (MMX_X86s2vec GR32:$src)), (MOVD64rr GR32:$src)>;
332 def : Pat<(v8i8 (MMX_X86s2vec GR32:$src)), (MOVD64rr GR32:$src)>;