llvm.org GIT mirror llvm / b058971
[x86] Give movss and movsd execution domains in the x86 backend. This associates movss and movsd with the packed single and packed double execution domains (resp.). While this is largely cosmetic, as we now don't have weird ping-pong-ing between single and double precision, it is also useful because it avoids the domain fixing algorithm from seeing domain breaks that don't actually exist. It will also be much more important if we have an execution domain default other than packed single, as that would cause us to mix movss and movsd with integer vector code on a regular basis, a very bad mixture. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@228135 91177308-0d34-0410-b5e6-96231b3b80d8 Chandler Carruth 5 years ago
11 changed file(s) with 77 addition(s) and 70 deletion(s). Raw diff Collapse all Expand all
421421
422422 // SI - SSE 1 & 2 scalar instructions
423423 class SI o, Format F, dag outs, dag ins, string asm,
424 list pattern, InstrItinClass itin = NoItinerary>
425 : I {
424 list pattern, InstrItinClass itin = NoItinerary,
425 Domain d = GenericDomain>
426 : I {
426427 let Predicates = !if(!eq(OpEnc.Value, EncEVEX.Value), [HasAVX512],
427428 !if(!eq(OpEnc.Value, EncVEX.Value), [UseAVX],
428429 !if(!eq(OpPrefix.Value, XS.Value), [UseSSE1],
547547
548548 multiclass sse12_move_rr
549549 X86MemOperand x86memop, string base_opc,
550 string asm_opr> {
550 string asm_opr, Domain d = GenericDomain> {
551551 def rr : SI<0x10, MRMSrcReg, (outs VR128:$dst),
552552 (ins VR128:$src1, RC:$src2),
553553 !strconcat(base_opc, asm_opr),
554554 [(set VR128:$dst, (vt (OpNode VR128:$src1,
555555 (scalar_to_vector RC:$src2))))],
556 IIC_SSE_MOV_S_RR>, Sched<[WriteFShuffle]>;
556 IIC_SSE_MOV_S_RR, d>, Sched<[WriteFShuffle]>;
557557
558558 // For the disassembler
559559 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
564564 }
565565
566566 multiclass sse12_move
567 X86MemOperand x86memop, string OpcodeStr> {
567 X86MemOperand x86memop, string OpcodeStr,
568 Domain d = GenericDomain> {
568569 // AVX
569570 defm V#NAME : sse12_move_rr
570 "\t{$src2, $src1, $dst|$dst, $src1, $src2}">,
571 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", d>,
571572 VEX_4V, VEX_LIG;
572573
573574 def V#NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
574575 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
575 [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
576 [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR, d>,
576577 VEX, VEX_LIG, Sched<[WriteStore]>;
577578 // SSE1 & 2
578579 let Constraints = "$src1 = $dst" in {
579580 defm NAME : sse12_move_rr
580 "\t{$src2, $dst|$dst, $src2}">;
581 "\t{$src2, $dst|$dst, $src2}", d>;
581582 }
582583
583584 def NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
584585 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
585 [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
586 [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR, d>,
586587 Sched<[WriteStore]>;
587588 }
588589
589590 // Loading from memory automatically zeroing upper bits.
590591 multiclass sse12_move_rm
591 PatFrag mem_pat, string OpcodeStr> {
592 PatFrag mem_pat, string OpcodeStr,
593 Domain d = GenericDomain> {
592594 def V#NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
593595 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
594596 [(set RC:$dst, (mem_pat addr:$src))],
595 IIC_SSE_MOV_S_RM>, VEX, VEX_LIG, Sched<[WriteLoad]>;
597 IIC_SSE_MOV_S_RM, d>, VEX, VEX_LIG, Sched<[WriteLoad]>;
596598 def NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
597599 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
598600 [(set RC:$dst, (mem_pat addr:$src))],
599 IIC_SSE_MOV_S_RM>, Sched<[WriteLoad]>;
600 }
601
602 defm MOVSS : sse12_move, XS;
603 defm MOVSD : sse12_move, XD;
601 IIC_SSE_MOV_S_RM, d>, Sched<[WriteLoad]>;
602 }
603
604 defm MOVSS : sse12_move
605 SSEPackedSingle>, XS;
606 defm MOVSD : sse12_move
607 SSEPackedDouble>, XD;
604608
605609 let canFoldAsLoad = 1, isReMaterializable = 1 in {
606 defm MOVSS : sse12_move_rm>, XS;
610 defm MOVSS : sse12_move_rm,
611 SSEPackedSingle>, XS;
607612
608613 let AddedComplexity = 20 in
609 defm MOVSD : sse12_move_rm>, XD;
614 defm MOVSD : sse12_move_rm,
615 SSEPackedDouble>, XD;
610616 }
611617
612618 // Patterns
178178 ; CHECK-LABEL: test12:
179179 ; CHECK: ## BB#0:
180180 ; CHECK-NEXT: movapd 0, %xmm0
181 ; CHECK-NEXT: movaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
181 ; CHECK-NEXT: movapd {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
182182 ; CHECK-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
183183 ; CHECK-NEXT: xorpd %xmm2, %xmm2
184184 ; CHECK-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm2[1]
66 ; CHECK-NEXT: pushl %eax
77 ; CHECK-NEXT: movsd .LCPI0_0, %xmm0
88 ; CHECK-NEXT: movd {{[0-9]+}}(%esp), %xmm1
9 ; CHECK-NEXT: orps %xmm0, %xmm1
9 ; CHECK-NEXT: orpd %xmm0, %xmm1
1010 ; CHECK-NEXT: subsd %xmm0, %xmm1
1111 ; CHECK-NEXT: xorps %xmm0, %xmm0
1212 ; CHECK-NEXT: cvtsd2ss %xmm1, %xmm0
167167 ; SSE2-LABEL: vsel_double:
168168 ; SSE2: # BB#0: # %entry
169169 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
170 ; SSE2-NEXT: movaps %xmm1, %xmm0
170 ; SSE2-NEXT: movapd %xmm1, %xmm0
171171 ; SSE2-NEXT: retq
172172 ;
173173 ; SSSE3-LABEL: vsel_double:
174174 ; SSSE3: # BB#0: # %entry
175175 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
176 ; SSSE3-NEXT: movaps %xmm1, %xmm0
176 ; SSSE3-NEXT: movapd %xmm1, %xmm0
177177 ; SSSE3-NEXT: retq
178178 ;
179179 ; SSE41-LABEL: vsel_double:
194194 ; SSE2-LABEL: vsel_i64:
195195 ; SSE2: # BB#0: # %entry
196196 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
197 ; SSE2-NEXT: movaps %xmm1, %xmm0
197 ; SSE2-NEXT: movapd %xmm1, %xmm0
198198 ; SSE2-NEXT: retq
199199 ;
200200 ; SSSE3-LABEL: vsel_i64:
201201 ; SSSE3: # BB#0: # %entry
202202 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
203 ; SSSE3-NEXT: movaps %xmm1, %xmm0
203 ; SSSE3-NEXT: movapd %xmm1, %xmm0
204204 ; SSSE3-NEXT: retq
205205 ;
206206 ; SSE41-LABEL: vsel_i64:
361361 ; SSE2: # BB#0: # %entry
362362 ; SSE2-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1]
363363 ; SSE2-NEXT: movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1]
364 ; SSE2-NEXT: movaps %xmm4, %xmm0
364 ; SSE2-NEXT: movapd %xmm4, %xmm0
365365 ; SSE2-NEXT: movaps %xmm5, %xmm1
366 ; SSE2-NEXT: movaps %xmm6, %xmm2
366 ; SSE2-NEXT: movapd %xmm6, %xmm2
367367 ; SSE2-NEXT: movaps %xmm7, %xmm3
368368 ; SSE2-NEXT: retq
369369 ;
371371 ; SSSE3: # BB#0: # %entry
372372 ; SSSE3-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1]
373373 ; SSSE3-NEXT: movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1]
374 ; SSSE3-NEXT: movaps %xmm4, %xmm0
374 ; SSSE3-NEXT: movapd %xmm4, %xmm0
375375 ; SSSE3-NEXT: movaps %xmm5, %xmm1
376 ; SSSE3-NEXT: movaps %xmm6, %xmm2
376 ; SSSE3-NEXT: movapd %xmm6, %xmm2
377377 ; SSSE3-NEXT: movaps %xmm7, %xmm3
378378 ; SSSE3-NEXT: retq
379379 ;
400400 ; SSE2: # BB#0: # %entry
401401 ; SSE2-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1]
402402 ; SSE2-NEXT: movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1]
403 ; SSE2-NEXT: movaps %xmm4, %xmm0
403 ; SSE2-NEXT: movapd %xmm4, %xmm0
404404 ; SSE2-NEXT: movaps %xmm5, %xmm1
405 ; SSE2-NEXT: movaps %xmm6, %xmm2
405 ; SSE2-NEXT: movapd %xmm6, %xmm2
406406 ; SSE2-NEXT: movaps %xmm7, %xmm3
407407 ; SSE2-NEXT: retq
408408 ;
410410 ; SSSE3: # BB#0: # %entry
411411 ; SSSE3-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1]
412412 ; SSSE3-NEXT: movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1]
413 ; SSSE3-NEXT: movaps %xmm4, %xmm0
413 ; SSSE3-NEXT: movapd %xmm4, %xmm0
414414 ; SSSE3-NEXT: movaps %xmm5, %xmm1
415 ; SSSE3-NEXT: movaps %xmm6, %xmm2
415 ; SSSE3-NEXT: movapd %xmm6, %xmm2
416416 ; SSSE3-NEXT: movaps %xmm7, %xmm3
417417 ; SSSE3-NEXT: retq
418418 ;
445445 ; SSE2: # BB#0: # %entry
446446 ; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
447447 ; SSE2-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1]
448 ; SSE2-NEXT: movaps %xmm2, %xmm0
449 ; SSE2-NEXT: movaps %xmm3, %xmm1
448 ; SSE2-NEXT: movapd %xmm2, %xmm0
449 ; SSE2-NEXT: movapd %xmm3, %xmm1
450450 ; SSE2-NEXT: retq
451451 ;
452452 ; SSSE3-LABEL: vsel_double4:
453453 ; SSSE3: # BB#0: # %entry
454454 ; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
455455 ; SSSE3-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1]
456 ; SSSE3-NEXT: movaps %xmm2, %xmm0
457 ; SSSE3-NEXT: movaps %xmm3, %xmm1
456 ; SSSE3-NEXT: movapd %xmm2, %xmm0
457 ; SSSE3-NEXT: movapd %xmm3, %xmm1
458458 ; SSSE3-NEXT: retq
459459 ;
460460 ; SSE41-LABEL: vsel_double4:
557557 ; SSE2: # BB#0: # %entry
558558 ; SSE2-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1]
559559 ; SSE2-NEXT: movaps %xmm2, %xmm0
560 ; SSE2-NEXT: movaps %xmm3, %xmm1
560 ; SSE2-NEXT: movapd %xmm3, %xmm1
561561 ; SSE2-NEXT: retq
562562 ;
563563 ; SSSE3-LABEL: constant_blendvpd_avx:
564564 ; SSSE3: # BB#0: # %entry
565565 ; SSSE3-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1]
566566 ; SSSE3-NEXT: movaps %xmm2, %xmm0
567 ; SSSE3-NEXT: movaps %xmm3, %xmm1
567 ; SSSE3-NEXT: movapd %xmm3, %xmm1
568568 ; SSSE3-NEXT: retq
569569 ;
570570 ; SSE41-LABEL: constant_blendvpd_avx:
743743 ; SSE2-LABEL: blend_shufflevector_4xdouble:
744744 ; SSE2: # BB#0: # %entry
745745 ; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
746 ; SSE2-NEXT: movaps %xmm2, %xmm0
746 ; SSE2-NEXT: movapd %xmm2, %xmm0
747747 ; SSE2-NEXT: retq
748748 ;
749749 ; SSSE3-LABEL: blend_shufflevector_4xdouble:
750750 ; SSSE3: # BB#0: # %entry
751751 ; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
752 ; SSSE3-NEXT: movaps %xmm2, %xmm0
752 ; SSSE3-NEXT: movapd %xmm2, %xmm0
753753 ; SSSE3-NEXT: retq
754754 ;
755755 ; SSE41-LABEL: blend_shufflevector_4xdouble:
211211 ; SSE2-LABEL: shuffle_v2f64_03:
212212 ; SSE2: # BB#0:
213213 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
214 ; SSE2-NEXT: movaps %xmm1, %xmm0
214 ; SSE2-NEXT: movapd %xmm1, %xmm0
215215 ; SSE2-NEXT: retq
216216 ;
217217 ; SSE3-LABEL: shuffle_v2f64_03:
218218 ; SSE3: # BB#0:
219219 ; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
220 ; SSE3-NEXT: movaps %xmm1, %xmm0
220 ; SSE3-NEXT: movapd %xmm1, %xmm0
221221 ; SSE3-NEXT: retq
222222 ;
223223 ; SSSE3-LABEL: shuffle_v2f64_03:
224224 ; SSSE3: # BB#0:
225225 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
226 ; SSSE3-NEXT: movaps %xmm1, %xmm0
226 ; SSSE3-NEXT: movapd %xmm1, %xmm0
227227 ; SSSE3-NEXT: retq
228228 ;
229229 ; SSE41-LABEL: shuffle_v2f64_03:
299299 ; SSE2-LABEL: shuffle_v2i64_03:
300300 ; SSE2: # BB#0:
301301 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
302 ; SSE2-NEXT: movaps %xmm1, %xmm0
302 ; SSE2-NEXT: movapd %xmm1, %xmm0
303303 ; SSE2-NEXT: retq
304304 ;
305305 ; SSE3-LABEL: shuffle_v2i64_03:
306306 ; SSE3: # BB#0:
307307 ; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
308 ; SSE3-NEXT: movaps %xmm1, %xmm0
308 ; SSE3-NEXT: movapd %xmm1, %xmm0
309309 ; SSE3-NEXT: retq
310310 ;
311311 ; SSSE3-LABEL: shuffle_v2i64_03:
312312 ; SSSE3: # BB#0:
313313 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
314 ; SSSE3-NEXT: movaps %xmm1, %xmm0
314 ; SSSE3-NEXT: movapd %xmm1, %xmm0
315315 ; SSSE3-NEXT: retq
316316 ;
317317 ; SSE41-LABEL: shuffle_v2i64_03:
335335 ; SSE2-LABEL: shuffle_v2i64_03_copy:
336336 ; SSE2: # BB#0:
337337 ; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
338 ; SSE2-NEXT: movaps %xmm2, %xmm0
338 ; SSE2-NEXT: movapd %xmm2, %xmm0
339339 ; SSE2-NEXT: retq
340340 ;
341341 ; SSE3-LABEL: shuffle_v2i64_03_copy:
342342 ; SSE3: # BB#0:
343343 ; SSE3-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
344 ; SSE3-NEXT: movaps %xmm2, %xmm0
344 ; SSE3-NEXT: movapd %xmm2, %xmm0
345345 ; SSE3-NEXT: retq
346346 ;
347347 ; SSSE3-LABEL: shuffle_v2i64_03_copy:
348348 ; SSSE3: # BB#0:
349349 ; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
350 ; SSSE3-NEXT: movaps %xmm2, %xmm0
350 ; SSSE3-NEXT: movapd %xmm2, %xmm0
351351 ; SSSE3-NEXT: retq
352352 ;
353353 ; SSE41-LABEL: shuffle_v2i64_03_copy:
522522 ; SSE2-LABEL: shuffle_v2i64_21_copy:
523523 ; SSE2: # BB#0:
524524 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
525 ; SSE2-NEXT: movaps %xmm1, %xmm0
525 ; SSE2-NEXT: movapd %xmm1, %xmm0
526526 ; SSE2-NEXT: retq
527527 ;
528528 ; SSE3-LABEL: shuffle_v2i64_21_copy:
529529 ; SSE3: # BB#0:
530530 ; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
531 ; SSE3-NEXT: movaps %xmm1, %xmm0
531 ; SSE3-NEXT: movapd %xmm1, %xmm0
532532 ; SSE3-NEXT: retq
533533 ;
534534 ; SSSE3-LABEL: shuffle_v2i64_21_copy:
535535 ; SSSE3: # BB#0:
536536 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
537 ; SSSE3-NEXT: movaps %xmm1, %xmm0
537 ; SSSE3-NEXT: movapd %xmm1, %xmm0
538538 ; SSSE3-NEXT: retq
539539 ;
540540 ; SSE41-LABEL: shuffle_v2i64_21_copy:
691691 define <2 x i64> @shuffle_v2i64_z1(<2 x i64> %a) {
692692 ; SSE2-LABEL: shuffle_v2i64_z1:
693693 ; SSE2: # BB#0:
694 ; SSE2-NEXT: xorps %xmm1, %xmm1
694 ; SSE2-NEXT: xorpd %xmm1, %xmm1
695695 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
696696 ; SSE2-NEXT: retq
697697 ;
698698 ; SSE3-LABEL: shuffle_v2i64_z1:
699699 ; SSE3: # BB#0:
700 ; SSE3-NEXT: xorps %xmm1, %xmm1
700 ; SSE3-NEXT: xorpd %xmm1, %xmm1
701701 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
702702 ; SSE3-NEXT: retq
703703 ;
704704 ; SSSE3-LABEL: shuffle_v2i64_z1:
705705 ; SSSE3: # BB#0:
706 ; SSSE3-NEXT: xorps %xmm1, %xmm1
706 ; SSSE3-NEXT: xorpd %xmm1, %xmm1
707707 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
708708 ; SSSE3-NEXT: retq
709709 ;
778778 define <2 x double> @shuffle_v2f64_z1(<2 x double> %a) {
779779 ; SSE2-LABEL: shuffle_v2f64_z1:
780780 ; SSE2: # BB#0:
781 ; SSE2-NEXT: xorps %xmm1, %xmm1
781 ; SSE2-NEXT: xorpd %xmm1, %xmm1
782782 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
783783 ; SSE2-NEXT: retq
784784 ;
785785 ; SSE3-LABEL: shuffle_v2f64_z1:
786786 ; SSE3: # BB#0:
787 ; SSE3-NEXT: xorps %xmm1, %xmm1
787 ; SSE3-NEXT: xorpd %xmm1, %xmm1
788788 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
789789 ; SSE3-NEXT: retq
790790 ;
791791 ; SSSE3-LABEL: shuffle_v2f64_z1:
792792 ; SSSE3: # BB#0:
793 ; SSSE3-NEXT: xorps %xmm1, %xmm1
793 ; SSSE3-NEXT: xorpd %xmm1, %xmm1
794794 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
795795 ; SSSE3-NEXT: retq
796796 ;
990990 ; SSE-LABEL: insert_reg_lo_v2f64:
991991 ; SSE: # BB#0:
992992 ; SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
993 ; SSE-NEXT: movaps %xmm1, %xmm0
993 ; SSE-NEXT: movapd %xmm1, %xmm0
994994 ; SSE-NEXT: retq
995995 ;
996996 ; AVX-LABEL: insert_reg_lo_v2f64:
14661466 ; SSE-LABEL: insert_reg_lo_v4f32:
14671467 ; SSE: # BB#0:
14681468 ; SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
1469 ; SSE-NEXT: movaps %xmm1, %xmm0
1469 ; SSE-NEXT: movapd %xmm1, %xmm0
14701470 ; SSE-NEXT: retq
14711471 ;
14721472 ; AVX-LABEL: insert_reg_lo_v4f32:
800800 define <4 x double> @insert_reg_and_zero_v4f64(double %a) {
801801 ; ALL-LABEL: insert_reg_and_zero_v4f64:
802802 ; ALL: # BB#0:
803 ; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1
803 ; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
804804 ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
805805 ; ALL-NEXT: retq
806806 %v = insertelement <4 x double> undef, double %a, i32 0
21232123 ; SSE2-LABEL: combine_undef_input_test5:
21242124 ; SSE2: # BB#0:
21252125 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
2126 ; SSE2-NEXT: movaps %xmm1, %xmm0
2126 ; SSE2-NEXT: movapd %xmm1, %xmm0
21272127 ; SSE2-NEXT: retq
21282128 ;
21292129 ; SSSE3-LABEL: combine_undef_input_test5:
21302130 ; SSSE3: # BB#0:
21312131 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
2132 ; SSSE3-NEXT: movaps %xmm1, %xmm0
2132 ; SSSE3-NEXT: movapd %xmm1, %xmm0
21332133 ; SSSE3-NEXT: retq
21342134 ;
21352135 ; SSE41-LABEL: combine_undef_input_test5:
23082308 ; SSE2-LABEL: combine_undef_input_test15:
23092309 ; SSE2: # BB#0:
23102310 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
2311 ; SSE2-NEXT: movaps %xmm1, %xmm0
2311 ; SSE2-NEXT: movapd %xmm1, %xmm0
23122312 ; SSE2-NEXT: retq
23132313 ;
23142314 ; SSSE3-LABEL: combine_undef_input_test15:
23152315 ; SSSE3: # BB#0:
23162316 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
2317 ; SSSE3-NEXT: movaps %xmm1, %xmm0
2317 ; SSSE3-NEXT: movapd %xmm1, %xmm0
23182318 ; SSSE3-NEXT: retq
23192319 ;
23202320 ; SSE41-LABEL: combine_undef_input_test15:
44 ; SSE2-LABEL: test1:
55 ; SSE2: # BB#0:
66 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
7 ; SSE2-NEXT: movaps %xmm1, %xmm0
7 ; SSE2-NEXT: movapd %xmm1, %xmm0
88 ; SSE2-NEXT: retq
99 ;
1010 ; SSE41-LABEL: test1:
3333 ; SSE2-LABEL: test3:
3434 ; SSE2: # BB#0:
3535 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
36 ; SSE2-NEXT: movaps %xmm1, %xmm0
36 ; SSE2-NEXT: movapd %xmm1, %xmm0
3737 ; SSE2-NEXT: retq
3838 ;
3939 ; SSE41-LABEL: test3:
1717 ; CHECK-LABEL: test2:
1818 ; CHECK: # BB#0:
1919 ; CHECK-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
20 ; CHECK-NEXT: movaps %xmm1, %xmm0
20 ; CHECK-NEXT: movapd %xmm1, %xmm0
2121 ; CHECK-NEXT: retq
2222 %1 = select <4 x i1> , <4 x float> %a, <4 x float> %b
2323 ret <4 x float> %1
225225 ; CHECK-LABEL: test24:
226226 ; CHECK: # BB#0:
227227 ; CHECK-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
228 ; CHECK-NEXT: movaps %xmm1, %xmm0
228 ; CHECK-NEXT: movapd %xmm1, %xmm0
229229 ; CHECK-NEXT: retq
230230 %1 = select <2 x i1> , <2 x double> %a, <2 x double> %b
231231 ret <2 x double> %1
235235 ; CHECK-LABEL: test25:
236236 ; CHECK: # BB#0:
237237 ; CHECK-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
238 ; CHECK-NEXT: movaps %xmm1, %xmm0
238 ; CHECK-NEXT: movapd %xmm1, %xmm0
239239 ; CHECK-NEXT: retq
240240 %1 = select <2 x i1> , <2 x i64> %a, <2 x i64> %b
241241 ret <2 x i64> %1