llvm.org GIT mirror llvm / d28a1ea
[X86] Teach the execution domain fixing tables to use movlhps inplace of unpcklpd for the packed single domain. MOVLHPS has a smaller encoding than UNPCKLPD in the legacy encodings. With VEX and EVEX encodings it doesn't matter. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@313509 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 2 years ago
42 changed file(s) with 363 addition(s) and 363 deletion(s). Raw diff Collapse all Expand all
93359335 { X86::XORPSrm, X86::XORPDrm, X86::PXORrm },
93369336 { X86::XORPSrr, X86::XORPDrr, X86::PXORrr },
93379337 { X86::UNPCKLPDrm, X86::UNPCKLPDrm, X86::PUNPCKLQDQrm },
9338 { X86::UNPCKLPDrr, X86::UNPCKLPDrr, X86::PUNPCKLQDQrr },
9338 { X86::MOVLHPSrr, X86::UNPCKLPDrr, X86::PUNPCKLQDQrr },
93399339 { X86::UNPCKHPDrm, X86::UNPCKHPDrm, X86::PUNPCKHQDQrm },
93409340 { X86::UNPCKHPDrr, X86::UNPCKHPDrr, X86::PUNPCKHQDQrr },
93419341 { X86::UNPCKLPSrm, X86::UNPCKLPSrm, X86::PUNPCKLDQrm },
93639363 { X86::VXORPSrm, X86::VXORPDrm, X86::VPXORrm },
93649364 { X86::VXORPSrr, X86::VXORPDrr, X86::VPXORrr },
93659365 { X86::VUNPCKLPDrm, X86::VUNPCKLPDrm, X86::VPUNPCKLQDQrm },
9366 { X86::VUNPCKLPDrr, X86::VUNPCKLPDrr, X86::VPUNPCKLQDQrr },
9366 { X86::VMOVLHPSrr, X86::VUNPCKLPDrr, X86::VPUNPCKLQDQrr },
93679367 { X86::VUNPCKHPDrm, X86::VUNPCKHPDrm, X86::VPUNPCKHQDQrm },
93689368 { X86::VUNPCKHPDrr, X86::VUNPCKHPDrr, X86::VPUNPCKHQDQrr },
93699369 { X86::VUNPCKLPSrm, X86::VUNPCKLPSrm, X86::VPUNPCKLDQrm },
94379437 { X86::VUNPCKHPSZ256rm, X86::VUNPCKHPSZ256rm, X86::VPUNPCKHDQZ256rm },
94389438 { X86::VUNPCKHPSZ256rr, X86::VUNPCKHPSZ256rr, X86::VPUNPCKHDQZ256rr },
94399439 { X86::VUNPCKLPDZ128rm, X86::VUNPCKLPDZ128rm, X86::VPUNPCKLQDQZ128rm },
9440 { X86::VUNPCKLPDZ128rr, X86::VUNPCKLPDZ128rr, X86::VPUNPCKLQDQZ128rr },
9440 { X86::VMOVLHPSZrr, X86::VUNPCKLPDZ128rr, X86::VPUNPCKLQDQZ128rr },
94419441 { X86::VUNPCKHPDZ128rm, X86::VUNPCKHPDZ128rm, X86::VPUNPCKHQDQZ128rm },
94429442 { X86::VUNPCKHPDZ128rr, X86::VUNPCKHPDZ128rr, X86::VPUNPCKHQDQZ128rr },
94439443 { X86::VUNPCKLPSZ128rm, X86::VUNPCKLPSZ128rm, X86::VPUNPCKLDQZ128rm },
22452245 ; X32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
22462246 ; X32-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
22472247 ; X32-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
2248 ; X32-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
2249 ; X32-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2248 ; X32-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
2249 ; X32-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
22502250 ; X32-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
22512251 ; X32-NEXT: retl
22522252 ;
22532253 ; X64-LABEL: test_mm256_set_pd:
22542254 ; X64: # BB#0:
2255 ; X64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2256 ; X64-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm3[0],xmm2[0]
2255 ; X64-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2256 ; X64-NEXT: vmovlhps {{.*#+}} xmm1 = xmm3[0],xmm2[0]
22572257 ; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
22582258 ; X64-NEXT: retq
22592259 %res0 = insertelement <4 x double> undef, double %a3, i32 0
28822882 ; X32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
28832883 ; X32-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
28842884 ; X32-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
2885 ; X32-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2886 ; X32-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm3[0],xmm2[0]
2885 ; X32-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2886 ; X32-NEXT: vmovlhps {{.*#+}} xmm1 = xmm3[0],xmm2[0]
28872887 ; X32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
28882888 ; X32-NEXT: retl
28892889 ;
28902890 ; X64-LABEL: test_mm256_setr_pd:
28912891 ; X64: # BB#0:
2892 ; X64-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
2893 ; X64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2892 ; X64-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
2893 ; X64-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
28942894 ; X64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
28952895 ; X64-NEXT: retq
28962896 %res0 = insertelement <4 x double> undef, double %a0, i32 0
5151 ; CHECK-LABEL: unpacklopd_not:
5252 ; CHECK: # BB#0:
5353 ; CHECK-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1]
54 ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
54 ; CHECK-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
5555 ; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
5656 ; CHECK-NEXT: retq
5757 %shuffle.i = shufflevector <4 x double> %src1, <4 x double> %src2, <4 x i32>
7575 ; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm1
7676 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
7777 ; CHECK-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1]
78 ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
78 ; CHECK-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
7979 ; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
8080 ; CHECK-NEXT: retq
8181 %shuffle.i = shufflevector <4 x double> %src1, <4 x double> %src2, <4 x i32>
2424 ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2
2525 ; NODQ-NEXT: vmovq %xmm1, %rax
2626 ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1
27 ; NODQ-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
27 ; NODQ-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
2828 ; NODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm2
2929 ; NODQ-NEXT: vpextrq $1, %xmm2, %rax
3030 ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm3
3131 ; NODQ-NEXT: vmovq %xmm2, %rax
3232 ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm2
33 ; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
33 ; NODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
3434 ; NODQ-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
3535 ; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm2
3636 ; NODQ-NEXT: vpextrq $1, %xmm2, %rax
3737 ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm3
3838 ; NODQ-NEXT: vmovq %xmm2, %rax
3939 ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm2
40 ; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
40 ; NODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
4141 ; NODQ-NEXT: vpextrq $1, %xmm0, %rax
4242 ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm3
4343 ; NODQ-NEXT: vmovq %xmm0, %rax
4444 ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm0
45 ; NODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0]
45 ; NODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm3[0]
4646 ; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
4747 ; NODQ-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
4848 ; NODQ-NEXT: retq
6363 ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2
6464 ; NODQ-NEXT: vmovq %xmm1, %rax
6565 ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1
66 ; NODQ-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
66 ; NODQ-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
6767 ; NODQ-NEXT: vpextrq $1, %xmm0, %rax
6868 ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm2
6969 ; NODQ-NEXT: vmovq %xmm0, %rax
7070 ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0
71 ; NODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
71 ; NODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
7272 ; NODQ-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
7373 ; NODQ-NEXT: retq
7474 ;
9494 ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm1, %xmm1
9595 ; NODQ-NEXT: vmovq %xmm0, %rax
9696 ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm0
97 ; NODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
97 ; NODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
9898 ; NODQ-NEXT: retq
9999 ;
100100 ; VLDQ-LABEL: slto2f64:
424424 ; NODQ-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm2
425425 ; NODQ-NEXT: vmovq %xmm1, %rax
426426 ; NODQ-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm1
427 ; NODQ-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
427 ; NODQ-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
428428 ; NODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm2
429429 ; NODQ-NEXT: vpextrq $1, %xmm2, %rax
430430 ; NODQ-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm3
431431 ; NODQ-NEXT: vmovq %xmm2, %rax
432432 ; NODQ-NEXT: vcvtusi2sdq %rax, %xmm4, %xmm2
433 ; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
433 ; NODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
434434 ; NODQ-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
435435 ; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm2
436436 ; NODQ-NEXT: vpextrq $1, %xmm2, %rax
437437 ; NODQ-NEXT: vcvtusi2sdq %rax, %xmm4, %xmm3
438438 ; NODQ-NEXT: vmovq %xmm2, %rax
439439 ; NODQ-NEXT: vcvtusi2sdq %rax, %xmm4, %xmm2
440 ; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
440 ; NODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
441441 ; NODQ-NEXT: vpextrq $1, %xmm0, %rax
442442 ; NODQ-NEXT: vcvtusi2sdq %rax, %xmm4, %xmm3
443443 ; NODQ-NEXT: vmovq %xmm0, %rax
444444 ; NODQ-NEXT: vcvtusi2sdq %rax, %xmm4, %xmm0
445 ; NODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0]
445 ; NODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm3[0]
446446 ; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
447447 ; NODQ-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
448448 ; NODQ-NEXT: retq
465465 ; KNL-NEXT: vcvtusi2sdq %rax, %xmm4, %xmm2
466466 ; KNL-NEXT: vextracti32x4 $2, %zmm0, %xmm4
467467 ; KNL-NEXT: vpextrq $1, %xmm4, %rax
468 ; KNL-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
468 ; KNL-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
469469 ; KNL-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm3
470470 ; KNL-NEXT: vmovq %xmm4, %rax
471471 ; KNL-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm4
472 ; KNL-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm4[0],xmm3[0]
472 ; KNL-NEXT: vmovlhps {{.*#+}} xmm3 = xmm4[0],xmm3[0]
473473 ; KNL-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
474474 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm3
475475 ; KNL-NEXT: vpextrq $1, %xmm3, %rax
477477 ; KNL-NEXT: vmovq %xmm3, %rax
478478 ; KNL-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm3
479479 ; KNL-NEXT: vpextrq $1, %xmm0, %rax
480 ; KNL-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
480 ; KNL-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
481481 ; KNL-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm4
482482 ; KNL-NEXT: vmovq %xmm0, %rax
483483 ; KNL-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm0
484 ; KNL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm4[0]
484 ; KNL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm4[0]
485485 ; KNL-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
486486 ; KNL-NEXT: vextracti32x4 $3, %zmm1, %xmm3
487487 ; KNL-NEXT: vpextrq $1, %xmm3, %rax
489489 ; KNL-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm2
490490 ; KNL-NEXT: vmovq %xmm3, %rax
491491 ; KNL-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm3
492 ; KNL-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm3[0],xmm2[0]
492 ; KNL-NEXT: vmovlhps {{.*#+}} xmm2 = xmm3[0],xmm2[0]
493493 ; KNL-NEXT: vextracti32x4 $2, %zmm1, %xmm3
494494 ; KNL-NEXT: vpextrq $1, %xmm3, %rax
495495 ; KNL-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm4
496496 ; KNL-NEXT: vmovq %xmm3, %rax
497497 ; KNL-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm3
498 ; KNL-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
498 ; KNL-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
499499 ; KNL-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
500500 ; KNL-NEXT: vextracti128 $1, %ymm1, %xmm3
501501 ; KNL-NEXT: vpextrq $1, %xmm3, %rax
502502 ; KNL-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm4
503503 ; KNL-NEXT: vmovq %xmm3, %rax
504504 ; KNL-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm3
505 ; KNL-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
505 ; KNL-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
506506 ; KNL-NEXT: vpextrq $1, %xmm1, %rax
507507 ; KNL-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm4
508508 ; KNL-NEXT: vmovq %xmm1, %rax
509509 ; KNL-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm1
510 ; KNL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm4[0]
510 ; KNL-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm4[0]
511511 ; KNL-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
512512 ; KNL-NEXT: vinsertf64x4 $1, %ymm2, %zmm1, %zmm1
513513 ; KNL-NEXT: retq
525525 ; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm3
526526 ; VLNODQ-NEXT: vmovq %xmm2, %rax
527527 ; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm4, %xmm2
528 ; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
528 ; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
529529 ; VLNODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm3
530530 ; VLNODQ-NEXT: vpextrq $1, %xmm3, %rax
531531 ; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm4, %xmm4
532532 ; VLNODQ-NEXT: vmovq %xmm3, %rax
533533 ; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm3
534 ; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
534 ; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
535535 ; VLNODQ-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
536536 ; VLNODQ-NEXT: vextracti128 $1, %ymm0, %xmm3
537537 ; VLNODQ-NEXT: vpextrq $1, %xmm3, %rax
538538 ; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm4
539539 ; VLNODQ-NEXT: vmovq %xmm3, %rax
540540 ; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm3
541 ; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
541 ; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
542542 ; VLNODQ-NEXT: vpextrq $1, %xmm0, %rax
543543 ; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm4
544544 ; VLNODQ-NEXT: vmovq %xmm0, %rax
545545 ; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm0
546 ; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm4[0]
546 ; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm4[0]
547547 ; VLNODQ-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
548548 ; VLNODQ-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
549549 ; VLNODQ-NEXT: vextracti32x4 $3, %zmm1, %xmm2
551551 ; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm3
552552 ; VLNODQ-NEXT: vmovq %xmm2, %rax
553553 ; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm2
554 ; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
554 ; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
555555 ; VLNODQ-NEXT: vextracti32x4 $2, %zmm1, %xmm3
556556 ; VLNODQ-NEXT: vpextrq $1, %xmm3, %rax
557557 ; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm4
558558 ; VLNODQ-NEXT: vmovq %xmm3, %rax
559559 ; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm3
560 ; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
560 ; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
561561 ; VLNODQ-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
562562 ; VLNODQ-NEXT: vextracti128 $1, %ymm1, %xmm3
563563 ; VLNODQ-NEXT: vpextrq $1, %xmm3, %rax
564564 ; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm4
565565 ; VLNODQ-NEXT: vmovq %xmm3, %rax
566566 ; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm3
567 ; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
567 ; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
568568 ; VLNODQ-NEXT: vpextrq $1, %xmm1, %rax
569569 ; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm4
570570 ; VLNODQ-NEXT: vmovq %xmm1, %rax
571571 ; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm1
572 ; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm4[0]
572 ; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm4[0]
573573 ; VLNODQ-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
574574 ; VLNODQ-NEXT: vinsertf64x4 $1, %ymm2, %zmm1, %zmm1
575575 ; VLNODQ-NEXT: retq
581581 ; AVX512BW-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm3
582582 ; AVX512BW-NEXT: vmovq %xmm2, %rax
583583 ; AVX512BW-NEXT: vcvtusi2sdq %rax, %xmm4, %xmm2
584 ; AVX512BW-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
584 ; AVX512BW-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
585585 ; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm3
586586 ; AVX512BW-NEXT: vpextrq $1, %xmm3, %rax
587587 ; AVX512BW-NEXT: vcvtusi2sdq %rax, %xmm4, %xmm4
588588 ; AVX512BW-NEXT: vmovq %xmm3, %rax
589589 ; AVX512BW-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm3
590 ; AVX512BW-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
590 ; AVX512BW-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
591591 ; AVX512BW-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
592592 ; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm3
593593 ; AVX512BW-NEXT: vpextrq $1, %xmm3, %rax
594594 ; AVX512BW-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm4
595595 ; AVX512BW-NEXT: vmovq %xmm3, %rax
596596 ; AVX512BW-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm3
597 ; AVX512BW-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
597 ; AVX512BW-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
598598 ; AVX512BW-NEXT: vpextrq $1, %xmm0, %rax
599599 ; AVX512BW-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm4
600600 ; AVX512BW-NEXT: vmovq %xmm0, %rax
601601 ; AVX512BW-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm0
602 ; AVX512BW-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm4[0]
602 ; AVX512BW-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm4[0]
603603 ; AVX512BW-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
604604 ; AVX512BW-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
605605 ; AVX512BW-NEXT: vextracti32x4 $3, %zmm1, %xmm2
607607 ; AVX512BW-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm3
608608 ; AVX512BW-NEXT: vmovq %xmm2, %rax
609609 ; AVX512BW-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm2
610 ; AVX512BW-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
610 ; AVX512BW-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
611611 ; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, %xmm3
612612 ; AVX512BW-NEXT: vpextrq $1, %xmm3, %rax
613613 ; AVX512BW-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm4
614614 ; AVX512BW-NEXT: vmovq %xmm3, %rax
615615 ; AVX512BW-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm3
616 ; AVX512BW-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
616 ; AVX512BW-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
617617 ; AVX512BW-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
618618 ; AVX512BW-NEXT: vextracti128 $1, %ymm1, %xmm3
619619 ; AVX512BW-NEXT: vpextrq $1, %xmm3, %rax
620620 ; AVX512BW-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm4
621621 ; AVX512BW-NEXT: vmovq %xmm3, %rax
622622 ; AVX512BW-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm3
623 ; AVX512BW-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
623 ; AVX512BW-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
624624 ; AVX512BW-NEXT: vpextrq $1, %xmm1, %rax
625625 ; AVX512BW-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm4
626626 ; AVX512BW-NEXT: vmovq %xmm1, %rax
627627 ; AVX512BW-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm1
628 ; AVX512BW-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm4[0]
628 ; AVX512BW-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm4[0]
629629 ; AVX512BW-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
630630 ; AVX512BW-NEXT: vinsertf64x4 $1, %ymm2, %zmm1, %zmm1
631631 ; AVX512BW-NEXT: retq
13531353 ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2
13541354 ; NODQ-NEXT: vmovq %xmm1, %rax
13551355 ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1
1356 ; NODQ-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
1356 ; NODQ-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
13571357 ; NODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm2
13581358 ; NODQ-NEXT: vpextrq $1, %xmm2, %rax
13591359 ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm3
13601360 ; NODQ-NEXT: vmovq %xmm2, %rax
13611361 ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm2
1362 ; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
1362 ; NODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
13631363 ; NODQ-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
13641364 ; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm2
13651365 ; NODQ-NEXT: vpextrq $1, %xmm2, %rax
13661366 ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm3
13671367 ; NODQ-NEXT: vmovq %xmm2, %rax
13681368 ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm2
1369 ; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
1369 ; NODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
13701370 ; NODQ-NEXT: vpextrq $1, %xmm0, %rax
13711371 ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm3
13721372 ; NODQ-NEXT: vmovq %xmm0, %rax
13731373 ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm0
1374 ; NODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0]
1374 ; NODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm3[0]
13751375 ; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
13761376 ; NODQ-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
13771377 ; NODQ-NEXT: retq
13941394 ; KNL-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm2
13951395 ; KNL-NEXT: vextracti32x4 $2, %zmm0, %xmm4
13961396 ; KNL-NEXT: vpextrq $1, %xmm4, %rax
1397 ; KNL-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
1397 ; KNL-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
13981398 ; KNL-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm3
13991399 ; KNL-NEXT: vmovq %xmm4, %rax
14001400 ; KNL-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm4
1401 ; KNL-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm4[0],xmm3[0]
1401 ; KNL-NEXT: vmovlhps {{.*#+}} xmm3 = xmm4[0],xmm3[0]
14021402 ; KNL-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
14031403 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm3
14041404 ; KNL-NEXT: vpextrq $1, %xmm3, %rax
14061406 ; KNL-NEXT: vmovq %xmm3, %rax
14071407 ; KNL-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm3
14081408 ; KNL-NEXT: vpextrq $1, %xmm0, %rax
1409 ; KNL-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
1409 ; KNL-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
14101410 ; KNL-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm4
14111411 ; KNL-NEXT: vmovq %xmm0, %rax
14121412 ; KNL-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm0
1413 ; KNL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm4[0]
1413 ; KNL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm4[0]
14141414 ; KNL-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
14151415 ; KNL-NEXT: vextracti32x4 $3, %zmm1, %xmm3
14161416 ; KNL-NEXT: vpextrq $1, %xmm3, %rax
14181418 ; KNL-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm2
14191419 ; KNL-NEXT: vmovq %xmm3, %rax
14201420 ; KNL-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm3
1421 ; KNL-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm3[0],xmm2[0]
1421 ; KNL-NEXT: vmovlhps {{.*#+}} xmm2 = xmm3[0],xmm2[0]
14221422 ; KNL-NEXT: vextracti32x4 $2, %zmm1, %xmm3
14231423 ; KNL-NEXT: vpextrq $1, %xmm3, %rax
14241424 ; KNL-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm4
14251425 ; KNL-NEXT: vmovq %xmm3, %rax
14261426 ; KNL-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm3
1427 ; KNL-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
1427 ; KNL-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
14281428 ; KNL-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
14291429 ; KNL-NEXT: vextracti128 $1, %ymm1, %xmm3
14301430 ; KNL-NEXT: vpextrq $1, %xmm3, %rax
14311431 ; KNL-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm4
14321432 ; KNL-NEXT: vmovq %xmm3, %rax
14331433 ; KNL-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm3
1434 ; KNL-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
1434 ; KNL-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
14351435 ; KNL-NEXT: vpextrq $1, %xmm1, %rax
14361436 ; KNL-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm4
14371437 ; KNL-NEXT: vmovq %xmm1, %rax
14381438 ; KNL-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm1
1439 ; KNL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm4[0]
1439 ; KNL-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm4[0]
14401440 ; KNL-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
14411441 ; KNL-NEXT: vinsertf64x4 $1, %ymm2, %zmm1, %zmm1
14421442 ; KNL-NEXT: retq
14541454 ; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm3
14551455 ; VLNODQ-NEXT: vmovq %xmm2, %rax
14561456 ; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm2
1457 ; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
1457 ; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
14581458 ; VLNODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm3
14591459 ; VLNODQ-NEXT: vpextrq $1, %xmm3, %rax
14601460 ; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm4
14611461 ; VLNODQ-NEXT: vmovq %xmm3, %rax
14621462 ; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm3
1463 ; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
1463 ; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
14641464 ; VLNODQ-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
14651465 ; VLNODQ-NEXT: vextracti128 $1, %ymm0, %xmm3
14661466 ; VLNODQ-NEXT: vpextrq $1, %xmm3, %rax
14671467 ; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm4
14681468 ; VLNODQ-NEXT: vmovq %xmm3, %rax
14691469 ; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm3
1470 ; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
1470 ; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
14711471 ; VLNODQ-NEXT: vpextrq $1, %xmm0, %rax
14721472 ; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm4
14731473 ; VLNODQ-NEXT: vmovq %xmm0, %rax
14741474 ; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm0
1475 ; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm4[0]
1475 ; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm4[0]
14761476 ; VLNODQ-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
14771477 ; VLNODQ-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
14781478 ; VLNODQ-NEXT: vextracti32x4 $3, %zmm1, %xmm2
14801480 ; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm3
14811481 ; VLNODQ-NEXT: vmovq %xmm2, %rax
14821482 ; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm2
1483 ; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
1483 ; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
14841484 ; VLNODQ-NEXT: vextracti32x4 $2, %zmm1, %xmm3
14851485 ; VLNODQ-NEXT: vpextrq $1, %xmm3, %rax
14861486 ; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm4
14871487 ; VLNODQ-NEXT: vmovq %xmm3, %rax
14881488 ; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm3
1489 ; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
1489 ; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
14901490 ; VLNODQ-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
14911491 ; VLNODQ-NEXT: vextracti128 $1, %ymm1, %xmm3
14921492 ; VLNODQ-NEXT: vpextrq $1, %xmm3, %rax
14931493 ; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm4
14941494 ; VLNODQ-NEXT: vmovq %xmm3, %rax
14951495 ; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm3
1496 ; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
1496 ; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
14971497 ; VLNODQ-NEXT: vpextrq $1, %xmm1, %rax
14981498 ; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm4
14991499 ; VLNODQ-NEXT: vmovq %xmm1, %rax
15001500 ; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm1
1501 ; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm4[0]
1501 ; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm4[0]
15021502 ; VLNODQ-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
15031503 ; VLNODQ-NEXT: vinsertf64x4 $1, %ymm2, %zmm1, %zmm1
15041504 ; VLNODQ-NEXT: retq
15101510 ; AVX512BW-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm3
15111511 ; AVX512BW-NEXT: vmovq %xmm2, %rax
15121512 ; AVX512BW-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm2
1513 ; AVX512BW-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
1513 ; AVX512BW-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
15141514 ; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm3
15151515 ; AVX512BW-NEXT: vpextrq $1, %xmm3, %rax
15161516 ; AVX512BW-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm4
15171517 ; AVX512BW-NEXT: vmovq %xmm3, %rax
15181518 ; AVX512BW-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm3
1519 ; AVX512BW-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
1519 ; AVX512BW-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
15201520 ; AVX512BW-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
15211521 ; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm3
15221522 ; AVX512BW-NEXT: vpextrq $1, %xmm3, %rax
15231523 ; AVX512BW-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm4
15241524 ; AVX512BW-NEXT: vmovq %xmm3, %rax
15251525 ; AVX512BW-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm3
1526 ; AVX512BW-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
1526 ; AVX512BW-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
15271527 ; AVX512BW-NEXT: vpextrq $1, %xmm0, %rax
15281528 ; AVX512BW-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm4
15291529 ; AVX512BW-NEXT: vmovq %xmm0, %rax
15301530 ; AVX512BW-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm0
1531 ; AVX512BW-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm4[0]
1531 ; AVX512BW-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm4[0]
15321532 ; AVX512BW-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
15331533 ; AVX512BW-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
15341534 ; AVX512BW-NEXT: vextracti32x4 $3, %zmm1, %xmm2
15361536 ; AVX512BW-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm3
15371537 ; AVX512BW-NEXT: vmovq %xmm2, %rax
15381538 ; AVX512BW-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm2
1539 ; AVX512BW-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
1539 ; AVX512BW-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
15401540 ; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, %xmm3
15411541 ; AVX512BW-NEXT: vpextrq $1, %xmm3, %rax
15421542 ; AVX512BW-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm4
15431543 ; AVX512BW-NEXT: vmovq %xmm3, %rax
15441544 ; AVX512BW-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm3
1545 ; AVX512BW-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
1545 ; AVX512BW-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
15461546 ; AVX512BW-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
15471547 ; AVX512BW-NEXT: vextracti128 $1, %ymm1, %xmm3
15481548 ; AVX512BW-NEXT: vpextrq $1, %xmm3, %rax
15491549 ; AVX512BW-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm4
15501550 ; AVX512BW-NEXT: vmovq %xmm3, %rax
15511551 ; AVX512BW-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm3
1552 ; AVX512BW-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
1552 ; AVX512BW-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
15531553 ; AVX512BW-NEXT: vpextrq $1, %xmm1, %rax
15541554 ; AVX512BW-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm4
15551555 ; AVX512BW-NEXT: vmovq %xmm1, %rax
15561556 ; AVX512BW-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm1
1557 ; AVX512BW-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm4[0]
1557 ; AVX512BW-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm4[0]
15581558 ; AVX512BW-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
15591559 ; AVX512BW-NEXT: vinsertf64x4 $1, %ymm2, %zmm1, %zmm1
15601560 ; AVX512BW-NEXT: retq
22402240 ; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm1
22412241 ; VLNODQ-NEXT: vmovq %xmm0, %rax
22422242 ; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm0
2243 ; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2243 ; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
22442244 ; VLNODQ-NEXT: retq
22452245 %cmpres = fcmp ogt <2 x double> %a, zeroinitializer
22462246 %1 = sitofp <2 x i1> %cmpres to <2 x double>
25922592 ; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm1
25932593 ; VLNODQ-NEXT: vmovq %xmm0, %rax
25942594 ; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm0
2595 ; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2595 ; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
25962596 ; VLNODQ-NEXT: retq
25972597 %mask = icmp ult <2 x i32> %a, zeroinitializer
25982598 %1 = uitofp <2 x i1> %mask to <2 x double>
881881 define <8 x double> @test_insert_128_v8f64(<8 x double> %x, double %y) {
882882 ; CHECK-LABEL: test_insert_128_v8f64:
883883 ; CHECK: ## BB#0:
884 ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0]
884 ; CHECK-NEXT: vmovlhps {{.*#+}} xmm1 = xmm0[0],xmm1[0]
885885 ; CHECK-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
886886 ; CHECK-NEXT: retq
887887 %r = insertelement <8 x double> %x, double %y, i32 1
19051905 ; CHECK-LABEL: test_4xi64_to_2xi64_perm_mask0:
19061906 ; CHECK: # BB#0:
19071907 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
1908 ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1908 ; CHECK-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
19091909 ; CHECK-NEXT: vzeroupper
19101910 ; CHECK-NEXT: retq
19111911 %res = shufflevector <4 x i64> %vec, <4 x i64> undef, <2 x i32>
23122312 ; CHECK: # BB#0:
23132313 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
23142314 ; CHECK-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[2,3,0,1]
2315 ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2315 ; CHECK-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
23162316 ; CHECK-NEXT: vzeroupper
23172317 ; CHECK-NEXT: retq
23182318 %res = shufflevector <8 x i64> %vec, <8 x i64> undef, <2 x i32>
26832683 ; CHECK-NEXT: vmovaps (%rdi), %zmm0
26842684 ; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm1
26852685 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
2686 ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2686 ; CHECK-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
26872687 ; CHECK-NEXT: vzeroupper
26882688 ; CHECK-NEXT: retq
26892689 %vec = load <8 x i64>, <8 x i64>* %vp
37023702 ; CHECK-LABEL: test_4xdouble_to_2xdouble_perm_mask0:
37033703 ; CHECK: # BB#0:
37043704 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
3705 ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
3705 ; CHECK-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
37063706 ; CHECK-NEXT: vzeroupper
37073707 ; CHECK-NEXT: retq
37083708 %res = shufflevector <4 x double> %vec, <4 x double> undef, <2 x i32>
717717 define <2 x double> @test_2xdouble_unpack_low_mask0(<2 x double> %vec1, <2 x double> %vec2) {
718718 ; CHECK-LABEL: test_2xdouble_unpack_low_mask0:
719719 ; CHECK: # BB#0:
720 ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
720 ; CHECK-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
721721 ; CHECK-NEXT: retq
722722 %res = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32>
723723 ret <2 x double> %res
1515 ;
1616 ; SSE-64-LABEL: test_buildvector_v2f64:
1717 ; SSE-64: # BB#0:
18 ; SSE-64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
18 ; SSE-64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1919 ; SSE-64-NEXT: retq
2020 ;
2121 ; AVX-32-LABEL: test_buildvector_v2f64:
2525 ;
2626 ; AVX-64-LABEL: test_buildvector_v2f64:
2727 ; AVX-64: # BB#0:
28 ; AVX-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
28 ; AVX-64-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2929 ; AVX-64-NEXT: retq
3030 %ins0 = insertelement <2 x double> undef, double %a0, i32 0
3131 %ins1 = insertelement <2 x double> %ins0, double %a1, i32 1
4242 ; SSE2-64: # BB#0:
4343 ; SSE2-64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
4444 ; SSE2-64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
45 ; SSE2-64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
45 ; SSE2-64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
4646 ; SSE2-64-NEXT: retq
4747 ;
4848 ; SSE41-64-LABEL: test_buildvector_v4f32:
1111 ;
1212 ; AVX-64-LABEL: test_buildvector_v4f64:
1313 ; AVX-64: # BB#0:
14 ; AVX-64-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
15 ; AVX-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
14 ; AVX-64-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
15 ; AVX-64-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1616 ; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1717 ; AVX-64-NEXT: retq
1818 %ins0 = insertelement <4 x double> undef, double %a0, i32 0
1111 ;
1212 ; AVX-64-LABEL: test_buildvector_v8f64:
1313 ; AVX-64: # BB#0:
14 ; AVX-64-NEXT: vunpcklpd {{.*#+}} xmm6 = xmm6[0],xmm7[0]
15 ; AVX-64-NEXT: vunpcklpd {{.*#+}} xmm4 = xmm4[0],xmm5[0]
14 ; AVX-64-NEXT: vmovlhps {{.*#+}} xmm6 = xmm6[0],xmm7[0]
15 ; AVX-64-NEXT: vmovlhps {{.*#+}} xmm4 = xmm4[0],xmm5[0]
1616 ; AVX-64-NEXT: vinsertf128 $1, %xmm6, %ymm4, %ymm4
17 ; AVX-64-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
18 ; AVX-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
17 ; AVX-64-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
18 ; AVX-64-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1919 ; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2020 ; AVX-64-NEXT: vinsertf64x4 $1, %ymm4, %zmm0, %zmm0
2121 ; AVX-64-NEXT: retq
4242 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
4343 ; SSE2-NEXT: xorps %xmm2, %xmm2
4444 ; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
45 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
45 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
4646 ; SSE2-NEXT: retq
4747 ;
4848 ; SSE41-LABEL: test_negative_zero_1:
7676 ; SSE2: # BB#0:
7777 ; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
7878 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
79 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
79 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
8080 ; SSE2-NEXT: retq
8181 ;
8282 ; SSE41-LABEL: test_buildvector_v4f32_register:
101101 ; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
102102 ; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
103103 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
104 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
104 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
105105 ; SSE2-NEXT: retq
106106 ;
107107 ; SSE41-LABEL: test_buildvector_v4f32_load:
128128 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
129129 ; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
130130 ; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
131 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
131 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
132132 ; SSE2-NEXT: retq
133133 ;
134134 ; SSE41-LABEL: test_buildvector_v4f32_partial_load:
854854 ; SSE-NEXT: movb $0, -{{[0-9]+}}(%rsp)
855855 ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
856856 ; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
857 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
857 ; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
858858 ; SSE-NEXT: popq %rbx
859859 ; SSE-NEXT: popq %r14
860860 ; SSE-NEXT: retq
10321032 ; SSE-NEXT: movb $0, -{{[0-9]+}}(%rsp)
10331033 ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
10341034 ; SSE-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
1035 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
1035 ; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
10361036 ; SSE-NEXT: popq %rbx
10371037 ; SSE-NEXT: popq %r14
10381038 ; SSE-NEXT: retq
244244 ; SSE-NEXT: cvtss2sd %xmm5, %xmm4
245245 ; SSE-NEXT: andps %xmm8, %xmm4
246246 ; SSE-NEXT: orps %xmm0, %xmm4
247 ; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm4[0]
247 ; SSE-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm4[0]
248248 ; SSE-NEXT: movaps %xmm1, %xmm0
249249 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
250250 ; SSE-NEXT: andps %xmm7, %xmm0
256256 ; SSE-NEXT: cvtss2sd %xmm6, %xmm0
257257 ; SSE-NEXT: andps %xmm8, %xmm0
258258 ; SSE-NEXT: orps %xmm0, %xmm1
259 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm3[0]
259 ; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm3[0]
260260 ; SSE-NEXT: movaps %xmm2, %xmm0
261261 ; SSE-NEXT: retq
262262 ;
181181 define <2 x i64> @test14(<2 x i64> %a, <2 x i64> %b) {
182182 ; CHECK-LABEL: test14:
183183 ; CHECK: # BB#0:
184 ; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
184 ; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
185185 ; CHECK-NEXT: retq
186186 %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32>
187187 %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32>
206206 define <2 x i64> @test16(<2 x i64> %a, <2 x i64> %b) {
207207 ; CHECK-LABEL: test16:
208208 ; CHECK: # BB#0:
209 ; CHECK-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
209 ; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
210210 ; CHECK-NEXT: movaps %xmm1, %xmm0
211211 ; CHECK-NEXT: retq
212212 %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32>
77 ; CHECK-LABEL: test:
88 ; CHECK: # BB#0: # %entry
99 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
10 ; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
10 ; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1111 ; CHECK-NEXT: movaps %xmm0, (%eax)
1212 ; CHECK-NEXT: retl
1313 entry:
922922 ; SSE-NEXT: subss %xmm4, %xmm1
923923 ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
924924 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
925 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
925 ; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
926926 ; SSE-NEXT: retq
927927 ;
928928 ; AVX-LABEL: not_a_hsub_2:
170170 ; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1]
171171 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
172172 ; SSE-NEXT: addss %xmm2, %xmm0
173 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
173 ; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
174174 ; SSE-NEXT: movaps %xmm1, %xmm0
175175 ; SSE-NEXT: retq
176176 ;
401401 ; CHECK-LIBCALL-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
402402 ; CHECK-LIBCALL-NEXT: unpcklps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Folded Reload
403403 ; CHECK-LIBCALL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
404 ; CHECK-LIBCALL-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
404 ; CHECK-LIBCALL-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
405405 ; CHECK-LIBCALL-NEXT: addq $48, %rsp
406406 ; CHECK-LIBCALL-NEXT: popq %rbx
407407 ; CHECK-LIBCALL-NEXT: retq
456456 ; CHECK-I686-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
457457 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
458458 ; CHECK-I686-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
459 ; CHECK-I686-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
459 ; CHECK-I686-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
460460 ; CHECK-I686-NEXT: addl $56, %esp
461461 ; CHECK-I686-NEXT: popl %esi
462462 ; CHECK-I686-NEXT: retl
486486 ; CHECK-LIBCALL-NEXT: movss {{[0-9]+}}(%rsp), %xmm0 # 4-byte Reload
487487 ; CHECK-LIBCALL-NEXT: # xmm0 = mem[0],zero,zero,zero
488488 ; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm0
489 ; CHECK-LIBCALL-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
489 ; CHECK-LIBCALL-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
490490 ; CHECK-LIBCALL-NEXT: movss {{[0-9]+}}(%rsp), %xmm1 # 4-byte Reload
491491 ; CHECK-LIBCALL-NEXT: # xmm1 = mem[0],zero,zero,zero
492492 ; CHECK-LIBCALL-NEXT: cvtss2sd %xmm1, %xmm2
493493 ; CHECK-LIBCALL-NEXT: movss {{[0-9]+}}(%rsp), %xmm1 # 4-byte Reload
494494 ; CHECK-LIBCALL-NEXT: # xmm1 = mem[0],zero,zero,zero
495495 ; CHECK-LIBCALL-NEXT: cvtss2sd %xmm1, %xmm1
496 ; CHECK-LIBCALL-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
496 ; CHECK-LIBCALL-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
497497 ; CHECK-LIBCALL-NEXT: addq $16, %rsp
498498 ; CHECK-LIBCALL-NEXT: popq %rbx
499499 ; CHECK-LIBCALL-NEXT: retq
514514 ; BWON-F16C-NEXT: vcvtph2ps %xmm3, %xmm3
515515 ; BWON-F16C-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
516516 ; BWON-F16C-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
517 ; BWON-F16C-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
517 ; BWON-F16C-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
518518 ; BWON-F16C-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
519519 ; BWON-F16C-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
520 ; BWON-F16C-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
520 ; BWON-F16C-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
521521 ; BWON-F16C-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
522522 ; BWON-F16C-NEXT: retq
523523 ;
1010 ; X32: ## BB#0:
1111 ; X32-NEXT: vhaddps %xmm1, %xmm0, %xmm0
1212 ; X32-NEXT: vhaddps %xmm3, %xmm2, %xmm1
13 ; X32-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
13 ; X32-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1414 ; X32-NEXT: retl
1515 ;
1616 ; X64-LABEL: test_unpackl_fhadd_128:
1717 ; X64: ## BB#0:
1818 ; X64-NEXT: vhaddps %xmm1, %xmm0, %xmm0
1919 ; X64-NEXT: vhaddps %xmm3, %xmm2, %xmm1
20 ; X64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
20 ; X64-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2121 ; X64-NEXT: retq
2222 %1 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1)
2323 %2 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a2, <4 x float> %a3)
257257 ; X64-SSE-NEXT: movq %xmm1, %rax
258258 ; X64-SSE-NEXT: xorps %xmm1, %xmm1
259259 ; X64-SSE-NEXT: cvtsi2sdq %rax, %xmm1
260 ; X64-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
260 ; X64-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
261261 ; X64-SSE-NEXT: retq
262262 ;
263263 ; X64-AVX-LABEL: clamp_sitofp_2i64_2f64:
272272 ; X64-AVX-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1
273273 ; X64-AVX-NEXT: vmovq %xmm0, %rax
274274 ; X64-AVX-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0
275 ; X64-AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
275 ; X64-AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
276276 ; X64-AVX-NEXT: retq
277277 %clo = icmp slt <2 x i64> %a,
278278 %lo = select <2 x i1> %clo, <2 x i64> , <2 x i64> %a
11251125 ; AVX: ## BB#0:
11261126 ; AVX-NEXT: vextractf128 $1, %ymm1, %xmm2
11271127 ; AVX-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
1128 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
1128 ; AVX-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
11291129 ; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
11301130 ; AVX-NEXT: retq
11311131 ;
269269 ; SSE2: # BB#0:
270270 ; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
271271 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
272 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
272 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
273273 ; SSE2-NEXT: retq
274274 ;
275275 ; SSE41-LABEL: merge_4f32_f32_012u:
318318 ; SSE2: # BB#0:
319319 ; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
320320 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
321 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
321 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
322322 ; SSE2-NEXT: retq
323323 ;
324324 ; SSE41-LABEL: merge_4f32_f32_019u:
903903 ; SSE: # BB#0:
904904 ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
905905 ; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
906 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
906 ; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
907907 ; SSE-NEXT: retq
908908 ;
909909 ; AVX-LABEL: merge_2i64_i64_12_volatile:
910910 ; AVX: # BB#0:
911911 ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
912912 ; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
913 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
913 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
914914 ; AVX-NEXT: retq
915915 ;
916916 ; X32-SSE1-LABEL: merge_2i64_i64_12_volatile:
963963 ; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
964964 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
965965 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
966 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
966 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
967967 ; SSE2-NEXT: retq
968968 ;
969969 ; SSE41-LABEL: merge_4f32_f32_2345_volatile:
14431443 define <4 x float> @test_movlhps(<4 x float> %a0, <4 x float> %a1) {
14441444 ; GENERIC-LABEL: test_movlhps:
14451445 ; GENERIC: # BB#0:
1446 ; GENERIC-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
1446 ; GENERIC-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
14471447 ; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
14481448 ; GENERIC-NEXT: retq # sched: [1:1.00]
14491449 ;
14501450 ; ATOM-LABEL: test_movlhps:
14511451 ; ATOM: # BB#0:
1452 ; ATOM-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
1452 ; ATOM-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
14531453 ; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00]
14541454 ; ATOM-NEXT: retq # sched: [79:39.50]
14551455 ;
14561456 ; SLM-LABEL: test_movlhps:
14571457 ; SLM: # BB#0:
1458 ; SLM-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
1458 ; SLM-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
14591459 ; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
14601460 ; SLM-NEXT: retq # sched: [4:1.00]
14611461 ;
14621462 ; SANDY-LABEL: test_movlhps:
14631463 ; SANDY: # BB#0:
1464 ; SANDY-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
1464 ; SANDY-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
14651465 ; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
14661466 ; SANDY-NEXT: retq # sched: [1:1.00]
14671467 ;
14681468 ; HASWELL-LABEL: test_movlhps:
14691469 ; HASWELL: # BB#0:
1470 ; HASWELL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
1470 ; HASWELL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
14711471 ; HASWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
14721472 ; HASWELL-NEXT: retq # sched: [2:1.00]
14731473 ;
14741474 ; SKYLAKE-LABEL: test_movlhps:
14751475 ; SKYLAKE: # BB#0:
1476 ; SKYLAKE-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
1476 ; SKYLAKE-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
14771477 ; SKYLAKE-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
14781478 ; SKYLAKE-NEXT: retq # sched: [2:1.00]
14791479 ;
14801480 ; BTVER2-LABEL: test_movlhps:
14811481 ; BTVER2: # BB#0:
1482 ; BTVER2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
1482 ; BTVER2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
14831483 ; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
14841484 ; BTVER2-NEXT: retq # sched: [4:1.00]
14851485 ;
14861486 ; ZNVER1-LABEL: test_movlhps:
14871487 ; ZNVER1: # BB#0:
1488 ; ZNVER1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
1488 ; ZNVER1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
14891489 ; ZNVER1-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
14901490 ; ZNVER1-NEXT: retq # sched: [1:0.50]
14911491 %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32>
22612261 ; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
22622262 ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
22632263 ; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2264 ; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2264 ; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
22652265 ; X32-NEXT: retl
22662266 ;
22672267 ; X64-LABEL: test_mm_set_epi32:
22932293 ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
22942294 ; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
22952295 ; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2296 ; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2296 ; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
22972297 ; X32-NEXT: retl
22982298 ;
22992299 ; X64-LABEL: test_mm_set_epi64x:
23122312 ; X32: # BB#0:
23132313 ; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
23142314 ; X32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
2315 ; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2315 ; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
23162316 ; X32-NEXT: retl
23172317 ;
23182318 ; X64-LABEL: test_mm_set_pd:
23192319 ; X64: # BB#0:
2320 ; X64-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
2320 ; X64-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
23212321 ; X64-NEXT: movaps %xmm1, %xmm0
23222322 ; X64-NEXT: retq
23232323 %res0 = insertelement <2 x double> undef, double %a1, i32 0
26702670 ; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
26712671 ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
26722672 ; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2673 ; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2673 ; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
26742674 ; X32-NEXT: retl
26752675 ;
26762676 ; X64-LABEL: test_mm_setr_epi32:
27022702 ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
27032703 ; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
27042704 ; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2705 ; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2705 ; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
27062706 ; X32-NEXT: retl
27072707 ;
27082708 ; X64-LABEL: test_mm_setr_epi64x:
27212721 ; X32: # BB#0:
27222722 ; X32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
27232723 ; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2724 ; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2724 ; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
27252725 ; X32-NEXT: retl
27262726 ;
27272727 ; X64-LABEL: test_mm_setr_pd:
27282728 ; X64: # BB#0:
2729 ; X64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2729 ; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
27302730 ; X64-NEXT: retq
27312731 %res0 = insertelement <2 x double> undef, double %a0, i32 0
27322732 %res1 = insertelement <2 x double> %res0, double %a1, i32 1
38383838 define <2 x i64> @test_mm_unpacklo_epi64(<2 x i64> %a0, <2 x i64> %a1) {
38393839 ; X32-LABEL: test_mm_unpacklo_epi64:
38403840 ; X32: # BB#0:
3841 ; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3841 ; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
38423842 ; X32-NEXT: retl
38433843 ;
38443844 ; X64-LABEL: test_mm_unpacklo_epi64:
38453845 ; X64: # BB#0:
3846 ; X64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3846 ; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
38473847 ; X64-NEXT: retq
38483848 %res = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32>
38493849 ret <2 x i64> %res
38523852 define <2 x double> @test_mm_unpacklo_pd(<2 x double> %a0, <2 x double> %a1) {
38533853 ; X32-LABEL: test_mm_unpacklo_pd:
38543854 ; X32: # BB#0:
3855 ; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3855 ; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
38563856 ; X32-NEXT: retl
38573857 ;
38583858 ; X64-LABEL: test_mm_unpacklo_pd:
38593859 ; X64: # BB#0:
3860 ; X64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3860 ; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
38613861 ; X64-NEXT: retq
38623862 %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32>
38633863 ret <2 x double> %res
28622862 define <2 x double> @test_movsd_reg(<2 x double> %a0, <2 x double> %a1) {
28632863 ; GENERIC-LABEL: test_movsd_reg:
28642864 ; GENERIC: # BB#0:
2865 ; GENERIC-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
2865 ; GENERIC-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
28662866 ; GENERIC-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
28672867 ; GENERIC-NEXT: retq # sched: [1:1.00]
28682868 ;
28692869 ; ATOM-LABEL: test_movsd_reg:
28702870 ; ATOM: # BB#0:
2871 ; ATOM-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
2871 ; ATOM-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
28722872 ; ATOM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
28732873 ; ATOM-NEXT: nop # sched: [1:0.50]
28742874 ; ATOM-NEXT: nop # sched: [1:0.50]
28782878 ;
28792879 ; SLM-LABEL: test_movsd_reg:
28802880 ; SLM: # BB#0:
2881 ; SLM-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
2881 ; SLM-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
28822882 ; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
28832883 ; SLM-NEXT: retq # sched: [4:1.00]
28842884 ;
28852885 ; SANDY-LABEL: test_movsd_reg:
28862886 ; SANDY: # BB#0:
2887 ; SANDY-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
2887 ; SANDY-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
28882888 ; SANDY-NEXT: retq # sched: [1:1.00]
28892889 ;
28902890 ; HASWELL-LABEL: test_movsd_reg:
28912891 ; HASWELL: # BB#0:
2892 ; HASWELL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
2892 ; HASWELL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
28932893 ; HASWELL-NEXT: retq # sched: [2:1.00]
28942894 ;
28952895 ; SKYLAKE-LABEL: test_movsd_reg:
28962896 ; SKYLAKE: # BB#0:
2897 ; SKYLAKE-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
2897 ; SKYLAKE-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
28982898 ; SKYLAKE-NEXT: retq # sched: [2:1.00]
28992899 ;
29002900 ; BTVER2-LABEL: test_movsd_reg:
29012901 ; BTVER2: # BB#0:
2902 ; BTVER2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:0.50]
2902 ; BTVER2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:0.50]
29032903 ; BTVER2-NEXT: retq # sched: [4:1.00]
29042904 ;
29052905 ; ZNVER1-LABEL: test_movsd_reg:
29062906 ; ZNVER1: # BB#0:
2907 ; ZNVER1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:0.50]
2907 ; ZNVER1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:0.50]
29082908 ; ZNVER1-NEXT: retq # sched: [1:0.50]
29092909 %1 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32>
29102910 ret <2 x double> %1
3939 ; X64-LABEL: test2:
4040 ; X64: # BB#0:
4141 ; X64-NEXT: movaps (%rsi), %xmm1
42 ; X64-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
42 ; X64-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
4343 ; X64-NEXT: movaps %xmm1, (%rdi)
4444 ; X64-NEXT: retq
4545 %tmp3 = load <2 x double>, <2 x double>* %A, align 16
205205 ; X64: # BB#0:
206206 ; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
207207 ; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
208 ; X64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
208 ; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
209209 ; X64-NEXT: retq
210210 %tmp = insertelement <4 x float> undef, float %a, i32 0 ; <<4 x float>> [#uses=1]
211211 %tmp11 = insertelement <4 x float> %tmp, float %b, i32 1 ; <<4 x float>> [#uses=1]
224224 ; X64: # BB#0:
225225 ; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
226226 ; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
227 ; X64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
227 ; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
228228 ; X64-NEXT: retq
229229 %tmp = insertelement <4 x float> undef, float %a, i32 0 ; <<4 x float>> [#uses=1]
230230 %tmp11 = insertelement <4 x float> %tmp, float %b, i32 1 ; <<4 x float>> [#uses=1]
241241 ;
242242 ; X64-LABEL: test11:
243243 ; X64: # BB#0:
244 ; X64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
244 ; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
245245 ; X64-NEXT: retq
246246 %tmp = insertelement <2 x double> undef, double %a, i32 0 ; <<2 x double>> [#uses=1]
247247 %tmp7 = insertelement <2 x double> %tmp, double %b, i32 1 ; <<2 x double>> [#uses=1]
314314 ; X86-NEXT: movaps %xmm2, %xmm0
315315 ; X86-NEXT: addps %xmm1, %xmm0
316316 ; X86-NEXT: subps %xmm1, %xmm2
317 ; X86-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
317 ; X86-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
318318 ; X86-NEXT: retl
319319 ;
320320 ; X64-LABEL: test14:
324324 ; X64-NEXT: movaps %xmm2, %xmm0
325325 ; X64-NEXT: addps %xmm1, %xmm0
326326 ; X64-NEXT: subps %xmm1, %xmm2
327 ; X64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
327 ; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
328328 ; X64-NEXT: retq
329329 %tmp = load <4 x float>, <4 x float>* %y ; <<4 x float>> [#uses=2]
330330 %tmp5 = load <4 x float>, <4 x float>* %x ; <<4 x float>> [#uses=2]
341341 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
342342 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
343343 ; SSE-NEXT: subss %xmm1, %xmm0
344 ; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0]
344 ; SSE-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm0[0]
345345 ; SSE-NEXT: movaps %xmm2, %xmm0
346346 ; SSE-NEXT: retq
347347 ;
416416 ; SSE-NEXT: addss %xmm0, %xmm1
417417 ; SSE-NEXT: unpcklps {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
418418 ; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
419 ; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
419 ; SSE-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
420420 ; SSE-NEXT: movaps %xmm2, %xmm0
421421 ; SSE-NEXT: retq
422422 ;
934934 ; X32: ## BB#0:
935935 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
936936 ; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
937 ; X32-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
937 ; X32-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
938938 ; X32-NEXT: movaps %xmm1, %xmm0
939939 ; X32-NEXT: retl
940940 ;
941941 ; X64-LABEL: insertps_with_undefs:
942942 ; X64: ## BB#0:
943943 ; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
944 ; X64-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
944 ; X64-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
945945 ; X64-NEXT: movaps %xmm1, %xmm0
946946 ; X64-NEXT: retq
947947 %1 = load float, float* %b, align 4
23742374 ; SSE-NEXT: fldcw -{{[0-9]+}}(%rsp)
23752375 ; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
23762376 ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2377 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2377 ; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
23782378 ; SSE-NEXT: xorps %xmm1, %xmm1
23792379 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3]
23802380 ; SSE-NEXT: retq
23872387 ; AVX-NEXT: fisttpll -{{[0-9]+}}(%rsp)
23882388 ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
23892389 ; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
2390 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2390 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
23912391 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
23922392 ; AVX-NEXT: retq
23932393 %cvt = fptosi <2 x x86_fp80> %a to <2 x i32>
4545 ;
4646 ; X64-LABEL: t3:
4747 ; X64: # BB#0:
48 ; X64-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
48 ; X64-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
4949 ; X64-NEXT: movaps %xmm1, %xmm0
5050 ; X64-NEXT: retq
5151 %tmp1 = insertelement <2 x double> %tmp, double %s, i32 1
2424 ; SSE-NEXT: movq %xmm0, %rax
2525 ; SSE-NEXT: xorps %xmm0, %xmm0
2626 ; SSE-NEXT: cvtsi2sdq %rax, %xmm0
27 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
27 ; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
2828 ; SSE-NEXT: movaps %xmm1, %xmm0
2929 ; SSE-NEXT: retq
3030 ;
3434 ; VEX-NEXT: vcvtsi2sdq %rax, %xmm1, %xmm1
3535 ; VEX-NEXT: vmovq %xmm0, %rax
3636 ; VEX-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm0
37 ; VEX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
37 ; VEX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3838 ; VEX-NEXT: retq
3939 ;
4040 ; AVX512F-LABEL: sitofp_2i64_to_2f64:
4343 ; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm1, %xmm1
4444 ; AVX512F-NEXT: vmovq %xmm0, %rax
4545 ; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm0
46 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
46 ; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
4747 ; AVX512F-NEXT: retq
4848 ;
4949 ; AVX512VL-LABEL: sitofp_2i64_to_2f64:
5252 ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm1, %xmm1
5353 ; AVX512VL-NEXT: vmovq %xmm0, %rax
5454 ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm0
55 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
55 ; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
5656 ; AVX512VL-NEXT: retq
5757 ;
5858 ; AVX512DQ-LABEL: sitofp_2i64_to_2f64:
222222 ; SSE-NEXT: movq %xmm0, %rax
223223 ; SSE-NEXT: xorps %xmm0, %xmm0
224224 ; SSE-NEXT: cvtsi2sdq %rax, %xmm0
225 ; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0]
225 ; SSE-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm0[0]
226226 ; SSE-NEXT: movq %xmm1, %rax
227227 ; SSE-NEXT: cvtsi2sdq %rax, %xmm3
228228 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
229229 ; SSE-NEXT: movq %xmm0, %rax
230230 ; SSE-NEXT: xorps %xmm0, %xmm0
231231 ; SSE-NEXT: cvtsi2sdq %rax, %xmm0
232 ; SSE-NEXT: unpcklpd {{.*#+}} xmm3 = xmm3[0],xmm0[0]
232 ; SSE-NEXT: movlhps {{.*#+}} xmm3 = xmm3[0],xmm0[0]
233233 ; SSE-NEXT: movaps %xmm2, %xmm0
234234 ; SSE-NEXT: movaps %xmm3, %xmm1
235235 ; SSE-NEXT: retq
241241 ; AVX1-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2
242242 ; AVX1-NEXT: vmovq %xmm1, %rax
243243 ; AVX1-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1
244 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
244 ; AVX1-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
245245 ; AVX1-NEXT: vpextrq $1, %xmm0, %rax
246246 ; AVX1-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm2
247247 ; AVX1-NEXT: vmovq %xmm0, %rax
248248 ; AVX1-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0
249 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
249 ; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
250250 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
251251 ; AVX1-NEXT: retq
252252 ;
257257 ; AVX2-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2
258258 ; AVX2-NEXT: vmovq %xmm1, %rax
259259 ; AVX2-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1
260 ; AVX2-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
260 ; AVX2-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
261261 ; AVX2-NEXT: vpextrq $1, %xmm0, %rax
262262 ; AVX2-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm2
263263 ; AVX2-NEXT: vmovq %xmm0, %rax
264264 ; AVX2-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0
265 ; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
265 ; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
266266 ; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
267267 ; AVX2-NEXT: retq
268268 ;
273273 ; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2
274274 ; AVX512F-NEXT: vmovq %xmm1, %rax
275275 ; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1
276 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
276 ; AVX512F-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
277277 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
278278 ; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm2
279279 ; AVX512F-NEXT: vmovq %xmm0, %rax
280280 ; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0
281 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
281 ; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
282282 ; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
283283 ; AVX512F-NEXT: retq
284284 ;
289289 ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2
290290 ; AVX512VL-NEXT: vmovq %xmm1, %rax
291291 ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1
292 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
292 ; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
293293 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
294294 ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm2
295295 ; AVX512VL-NEXT: vmovq %xmm0, %rax
296296 ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0
297 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
297 ; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
298298 ; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
299299 ; AVX512VL-NEXT: retq
300300 ;
477477 ; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm1, %xmm1
478478 ; AVX512F-NEXT: vmovq %xmm0, %rax
479479 ; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm0
480 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
480 ; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
481481 ; AVX512F-NEXT: retq
482482 ;
483483 ; AVX512VL-LABEL: uitofp_2i64_to_2f64:
486486 ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm1, %xmm1
487487 ; AVX512VL-NEXT: vmovq %xmm0, %rax
488488 ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm0
489 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
489 ; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
490490 ; AVX512VL-NEXT: retq
491491 ;
492492 ; AVX512DQ-LABEL: uitofp_2i64_to_2f64:
795795 ; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm2
796796 ; AVX512F-NEXT: vmovq %xmm1, %rax
797797 ; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm1
798 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
798 ; AVX512F-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
799799 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
800800 ; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm2
801801 ; AVX512F-NEXT: vmovq %xmm0, %rax
802802 ; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm0
803 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
803 ; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
804804 ; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
805805 ; AVX512F-NEXT: retq
806806 ;
811811 ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm2
812812 ; AVX512VL-NEXT: vmovq %xmm1, %rax
813813 ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm1
814 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
814 ; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
815815 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
816816 ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm2
817817 ; AVX512VL-NEXT: vmovq %xmm0, %rax
818818 ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm0
819 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
819 ; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
820820 ; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
821821 ; AVX512VL-NEXT: retq
822822 ;
13611361 ; SSE-NEXT: xorps %xmm0, %xmm0
13621362 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0
13631363 ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1364 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
1364 ; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
13651365 ; SSE-NEXT: movaps %xmm1, %xmm0
13661366 ; SSE-NEXT: retq
13671367 ;
21882188 ; SSE-NEXT: addss %xmm0, %xmm0
21892189 ; SSE-NEXT: .LBB47_12:
21902190 ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2191 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
2191 ; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
21922192 ; SSE-NEXT: movaps %xmm1, %xmm0
21932193 ; SSE-NEXT: retq
21942194 ;
25762576 ; SSE-NEXT: movq %xmm1, %rax
25772577 ; SSE-NEXT: xorps %xmm1, %xmm1
25782578 ; SSE-NEXT: cvtsi2sdq %rax, %xmm1
2579 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2579 ; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
25802580 ; SSE-NEXT: retq
25812581 ;
25822582 ; VEX-LABEL: sitofp_load_2i64_to_2f64:
25862586 ; VEX-NEXT: vcvtsi2sdq %rax, %xmm1, %xmm1
25872587 ; VEX-NEXT: vmovq %xmm0, %rax
25882588 ; VEX-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm0
2589 ; VEX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2589 ; VEX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
25902590 ; VEX-NEXT: retq
25912591 ;
25922592 ; AVX512F-LABEL: sitofp_load_2i64_to_2f64:
25962596 ; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm1, %xmm1
25972597 ; AVX512F-NEXT: vmovq %xmm0, %rax
25982598 ; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm0
2599 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2599 ; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
26002600 ; AVX512F-NEXT: retq
26012601 ;
26022602 ; AVX512VL-LABEL: sitofp_load_2i64_to_2f64:
26062606 ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm1, %xmm1
26072607 ; AVX512VL-NEXT: vmovq %xmm0, %rax
26082608 ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm0
2609 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2609 ; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
26102610 ; AVX512VL-NEXT: retq
26112611 ;
26122612 ; AVX512DQ-LABEL: sitofp_load_2i64_to_2f64:
27182718 ; SSE-NEXT: movq %xmm1, %rax
27192719 ; SSE-NEXT: xorps %xmm1, %xmm1
27202720 ; SSE-NEXT: cvtsi2sdq %rax, %xmm1
2721 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2721 ; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
27222722 ; SSE-NEXT: movq %xmm2, %rax
27232723 ; SSE-NEXT: xorps %xmm1, %xmm1
27242724 ; SSE-NEXT: cvtsi2sdq %rax, %xmm1
27262726 ; SSE-NEXT: movq %xmm2, %rax
27272727 ; SSE-NEXT: xorps %xmm2, %xmm2
27282728 ; SSE-NEXT: cvtsi2sdq %rax, %xmm2
2729 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
2729 ; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
27302730 ; SSE-NEXT: retq
27312731 ;
27322732 ; AVX1-LABEL: sitofp_load_4i64_to_4f64:
27372737 ; AVX1-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2
27382738 ; AVX1-NEXT: vmovq %xmm1, %rax
27392739 ; AVX1-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1
2740 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
2740 ; AVX1-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
27412741 ; AVX1-NEXT: vpextrq $1, %xmm0, %rax
27422742 ; AVX1-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm2
27432743 ; AVX1-NEXT: vmovq %xmm0, %rax
27442744 ; AVX1-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0
2745 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
2745 ; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
27462746 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
27472747 ; AVX1-NEXT: retq
27482748 ;
27542754 ; AVX2-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2
27552755 ; AVX2-NEXT: vmovq %xmm1, %rax
27562756 ; AVX2-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1
2757 ; AVX2-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
2757 ; AVX2-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
27582758 ; AVX2-NEXT: vpextrq $1, %xmm0, %rax
27592759 ; AVX2-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm2
27602760 ; AVX2-NEXT: vmovq %xmm0, %rax
27612761 ; AVX2-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0
2762 ; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
2762 ; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
27632763 ; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
27642764 ; AVX2-NEXT: retq
27652765 ;
27712771 ; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2
27722772 ; AVX512F-NEXT: vmovq %xmm1, %rax
27732773 ; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1
2774 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
2774 ; AVX512F-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
27752775 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
27762776 ; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm2
27772777 ; AVX512F-NEXT: vmovq %xmm0, %rax
27782778 ; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0
2779 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
2779 ; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
27802780 ; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
27812781 ; AVX512F-NEXT: retq
27822782 ;
27882788 ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2
27892789 ; AVX512VL-NEXT: vmovq %xmm1, %rax
27902790 ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1
2791 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
2791 ; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
27922792 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
27932793 ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm2
27942794 ; AVX512VL-NEXT: vmovq %xmm0, %rax
27952795 ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0
2796 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
2796 ; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
27972797 ; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
27982798 ; AVX512VL-NEXT: retq
27992799 ;
29162916 ; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm1, %xmm1
29172917 ; AVX512F-NEXT: vmovq %xmm0, %rax
29182918 ; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm0
2919 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2919 ; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
29202920 ; AVX512F-NEXT: retq
29212921 ;
29222922 ; AVX512VL-LABEL: uitofp_load_2i64_to_2f64:
29262926 ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm1, %xmm1
29272927 ; AVX512VL-NEXT: vmovq %xmm0, %rax
29282928 ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm0
2929 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2929 ; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
29302930 ; AVX512VL-NEXT: retq
29312931 ;
29322932 ; AVX512DQ-LABEL: uitofp_load_2i64_to_2f64:
31373137 ; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm2
31383138 ; AVX512F-NEXT: vmovq %xmm1, %rax
31393139 ; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm1
3140 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
3140 ; AVX512F-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
31413141 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
31423142 ; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm2
31433143 ; AVX512F-NEXT: vmovq %xmm0, %rax
31443144 ; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm0
3145 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
3145 ; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
31463146 ; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
31473147 ; AVX512F-NEXT: retq
31483148 ;
31543154 ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm2
31553155 ; AVX512VL-NEXT: vmovq %xmm1, %rax
31563156 ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm1
3157 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
3157 ; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
31583158 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
31593159 ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm2
31603160 ; AVX512VL-NEXT: vmovq %xmm0, %rax
31613161 ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm0
3162 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
3162 ; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
31633163 ; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
31643164 ; AVX512VL-NEXT: retq
31653165 ;
33223322 ; SSE-NEXT: xorps %xmm1, %xmm1
33233323 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1
33243324 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3325 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
3325 ; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
33263326 ; SSE-NEXT: retq
33273327 ;
33283328 ; AVX1-LABEL: sitofp_load_4i64_to_4f32:
34903490 ; SSE-NEXT: xorps %xmm1, %xmm1
34913491 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1
34923492 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3493 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm4[0]
3493 ; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm4[0]
34943494 ; SSE-NEXT: movq %xmm3, %rax
34953495 ; SSE-NEXT: xorps %xmm4, %xmm4
34963496 ; SSE-NEXT: cvtsi2ssq %rax, %xmm4
35073507 ; SSE-NEXT: xorps %xmm2, %xmm2
35083508 ; SSE-NEXT: cvtsi2ssq %rax, %xmm2
35093509 ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
3510 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm4[0]
3510 ; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm4[0]
35113511 ; SSE-NEXT: retq
35123512 ;
35133513 ; AVX1-LABEL: sitofp_load_8i64_to_8f32:
38153815 ; SSE-NEXT: addss %xmm2, %xmm2
38163816 ; SSE-NEXT: .LBB76_12:
38173817 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
3818 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3818 ; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
38193819 ; SSE-NEXT: retq
38203820 ;
38213821 ; AVX1-LABEL: uitofp_load_4i64_to_4f32:
42264226 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1
42274227 ; SSE-NEXT: addss %xmm1, %xmm1
42284228 ; SSE-NEXT: .LBB80_21:
4229 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0]
4229 ; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm3[0]
42304230 ; SSE-NEXT: unpcklps {{.*#+}} xmm5 = xmm5[0],xmm7[0],xmm5[1],xmm7[1]
42314231 ; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
42324232 ; SSE-NEXT: movq %xmm2, %rax
42464246 ; SSE-NEXT: addss %xmm2, %xmm2
42474247 ; SSE-NEXT: .LBB80_24:
42484248 ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
4249 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm5[0]
4249 ; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm5[0]
42504250 ; SSE-NEXT: retq
42514251 ;
42524252 ; AVX1-LABEL: uitofp_load_8i64_to_8f32:
15691569 ; AVX1-NEXT: vcvtph2ps %xmm1, %xmm1
15701570 ; AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
15711571 ; AVX1-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
1572 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1572 ; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
15731573 ; AVX1-NEXT: retq
15741574 ;
15751575 ; AVX2-LABEL: cvt_2i16_to_2f64:
15861586 ; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1
15871587 ; AVX2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
15881588 ; AVX2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
1589 ; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1589 ; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
15901590 ; AVX2-NEXT: retq
15911591 ;
15921592 ; AVX512F-LABEL: cvt_2i16_to_2f64:
16031603 ; AVX512F-NEXT: vcvtph2ps %ymm1, %zmm1
16041604 ; AVX512F-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
16051605 ; AVX512F-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
1606 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1606 ; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
16071607 ; AVX512F-NEXT: vzeroupper
16081608 ; AVX512F-NEXT: retq
16091609 ;
16201620 ; AVX512VL-NEXT: vcvtph2ps %xmm1, %xmm1
16211621 ; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
16221622 ; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
1623 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1623 ; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
16241624 ; AVX512VL-NEXT: retq
16251625 %1 = bitcast <2 x i16> %a0 to <2 x half>
16261626 %2 = fpext <2 x half> %1 to <2 x double>
16511651 ; AVX1-NEXT: vcvtph2ps %xmm3, %xmm3
16521652 ; AVX1-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
16531653 ; AVX1-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
1654 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
1654 ; AVX1-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
16551655 ; AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
16561656 ; AVX1-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
1657 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1657 ; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
16581658 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
16591659 ; AVX1-NEXT: retq
16601660 ;
16811681 ; AVX2-NEXT: vcvtph2ps %xmm3, %xmm3
16821682 ; AVX2-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
16831683 ; AVX2-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
1684 ; AVX2-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
1684 ; AVX2-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
16851685 ; AVX2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
16861686 ; AVX2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
1687 ; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1687 ; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
16881688 ; AVX2-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
16891689 ; AVX2-NEXT: retq
16901690 ;
17111711 ; AVX512F-NEXT: vcvtph2ps %ymm3, %zmm3
17121712 ; AVX512F-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
17131713 ; AVX512F-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
1714 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
1714 ; AVX512F-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
17151715 ; AVX512F-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
17161716 ; AVX512F-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
1717 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1717 ; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
17181718 ; AVX512F-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
17191719 ; AVX512F-NEXT: retq
17201720 ;
17411741 ; AVX512VL-NEXT: vcvtph2ps %xmm3, %xmm3
17421742 ; AVX512VL-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
17431743 ; AVX512VL-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
1744 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
1744 ; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
17451745 ; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
17461746 ; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
1747 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1747 ; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
17481748 ; AVX512VL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
17491749 ; AVX512VL-NEXT: retq
17501750 %1 = bitcast <4 x i16> %a0 to <4 x half>
17651765 ; AVX1-NEXT: vcvtph2ps %xmm1, %xmm1
17661766 ; AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
17671767 ; AVX1-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
1768 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1768 ; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
17691769 ; AVX1-NEXT: retq
17701770 ;
17711771 ; AVX2-LABEL: cvt_8i16_to_2f64:
17801780 ; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1
17811781 ; AVX2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
17821782 ; AVX2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
1783 ; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1783 ; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
17841784 ; AVX2-NEXT: retq
17851785 ;
17861786 ; AVX512F-LABEL: cvt_8i16_to_2f64:
17951795 ; AVX512F-NEXT: vcvtph2ps %ymm1, %zmm1
17961796 ; AVX512F-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
17971797 ; AVX512F-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
1798 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1798 ; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
17991799 ; AVX512F-NEXT: vzeroupper
18001800 ; AVX512F-NEXT: retq
18011801 ;
18131813 ; AVX512VL-NEXT: vcvtph2ps %xmm1, %xmm1
18141814 ; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
18151815 ; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
1816 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1816 ; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
18171817 ; AVX512VL-NEXT: retq
18181818 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <2 x i32>
18191819 %2 = bitcast <2 x i16> %1 to <2 x half>
18441844 ; AVX1-NEXT: vcvtph2ps %xmm3, %xmm3
18451845 ; AVX1-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
18461846 ; AVX1-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
1847 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
1847 ; AVX1-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
18481848 ; AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
18491849 ; AVX1-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
1850 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1850 ; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
18511851 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
18521852 ; AVX1-NEXT: retq
18531853 ;
18731873 ; AVX2-NEXT: vcvtph2ps %xmm3, %xmm3
18741874 ; AVX2-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
18751875 ; AVX2-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
1876 ; AVX2-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
1876 ; AVX2-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
18771877 ; AVX2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
18781878 ; AVX2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
1879 ; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1879 ; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
18801880 ; AVX2-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
18811881 ; AVX2-NEXT: retq
18821882 ;
19021902 ; AVX512F-NEXT: vcvtph2ps %ymm3, %zmm3
19031903 ; AVX512F-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
19041904 ; AVX512F-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
1905 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
1905 ; AVX512F-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
19061906 ; AVX512F-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
19071907 ; AVX512F-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
1908 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1908 ; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
19091909 ; AVX512F-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
19101910 ; AVX512F-NEXT: retq
19111911 ;
19331933 ; AVX512VL-NEXT: vcvtph2ps %xmm3, %xmm3
19341934 ; AVX512VL-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
19351935 ; AVX512VL-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
1936 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
1936 ; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
19371937 ; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
19381938 ; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
1939 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1939 ; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
19401940 ; AVX512VL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
19411941 ; AVX512VL-NEXT: retq
19421942 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32>
19861986 ; AVX1-NEXT: vcvtph2ps %xmm7, %xmm7
19871987 ; AVX1-NEXT: vcvtss2sd %xmm7, %xmm7, %xmm7
19881988 ; AVX1-NEXT: vcvtss2sd %xmm6, %xmm6, %xmm6
1989 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm6 = xmm6[0],xmm7[0]
1989 ; AVX1-NEXT: vmovlhps {{.*#+}} xmm6 = xmm6[0],xmm7[0]
19901990 ; AVX1-NEXT: vcvtss2sd %xmm5, %xmm5, %xmm5
19911991 ; AVX1-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
1992 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm5[0],xmm0[0]
1992 ; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm5[0],xmm0[0]
19931993 ; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm0, %ymm0
19941994 ; AVX1-NEXT: vcvtss2sd %xmm4, %xmm4, %xmm4
19951995 ; AVX1-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
1996 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
1996 ; AVX1-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
19971997 ; AVX1-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
19981998 ; AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
1999 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
1999 ; AVX1-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0]
20002000 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
20012001 ; AVX1-NEXT: retq
20022002 ;
20402040 ; AVX2-NEXT: vcvtph2ps %xmm7, %xmm7
20412041 ; AVX2-NEXT: vcvtss2sd %xmm7, %xmm7, %xmm7
20422042 ; AVX2-NEXT: vcvtss2sd %xmm6, %xmm6, %xmm6
2043 ; AVX2-NEXT: vunpcklpd {{.*#+}} xmm6 = xmm6[0],xmm7[0]
2043 ; AVX2-NEXT: vmovlhps {{.*#+}} xmm6 = xmm6[0],xmm7[0]
20442044 ; AVX2-NEXT: vcvtss2sd %xmm5, %xmm5, %xmm5
20452045 ; AVX2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
2046 ; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm5[0],xmm0[0]
2046 ; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm5[0],xmm0[0]
20472047 ; AVX2-NEXT: vinsertf128 $1, %xmm6, %ymm0, %ymm0
20482048 ; AVX2-NEXT: vcvtss2sd %xmm4, %xmm4, %xmm4
20492049 ; AVX2-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
2050 ; AVX2-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
2050 ; AVX2-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
20512051 ; AVX2-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
20522052 ; AVX2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
2053 ; AVX2-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
2053 ; AVX2-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0]
20542054 ; AVX2-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
20552055 ; AVX2-NEXT: retq
20562056 ;
20942094 ; AVX512F-NEXT: vcvtph2ps %ymm7, %zmm7
20952095 ; AVX512F-NEXT: vcvtss2sd %xmm7, %xmm7, %xmm7
20962096 ; AVX512F-NEXT: vcvtss2sd %xmm6, %xmm6, %xmm6
2097 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm6 = xmm6[0],xmm7[0]
2097 ; AVX512F-NEXT: vmovlhps {{.*#+}} xmm6 = xmm6[0],xmm7[0]
20982098 ; AVX512F-NEXT: vcvtss2sd %xmm5, %xmm5, %xmm5
20992099 ; AVX512F-NEXT: vcvtss2sd %xmm4, %xmm4, %xmm4
2100 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm4 = xmm5[0],xmm4[0]
2100 ; AVX512F-NEXT: vmovlhps {{.*#+}} xmm4 = xmm5[0],xmm4[0]
21012101 ; AVX512F-NEXT: vinsertf128 $1, %xmm6, %ymm4, %ymm4
21022102 ; AVX512F-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
21032103 ; AVX512F-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
2104 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
2104 ; AVX512F-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
21052105 ; AVX512F-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
21062106 ; AVX512F-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
2107 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2107 ; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
21082108 ; AVX512F-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
21092109 ; AVX512F-NEXT: vinsertf64x4 $1, %ymm4, %zmm0, %zmm0
21102110 ; AVX512F-NEXT: retq
21492149 ; AVX512VL-NEXT: vcvtph2ps %xmm7, %xmm7
21502150 ; AVX512VL-NEXT: vcvtss2sd %xmm7, %xmm7, %xmm7
21512151 ; AVX512VL-NEXT: vcvtss2sd %xmm6, %xmm6, %xmm6
2152 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm6 = xmm6[0],xmm7[0]
2152 ; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm6 = xmm6[0],xmm7[0]
21532153 ; AVX512VL-NEXT: vcvtss2sd %xmm5, %xmm5, %xmm5
21542154 ; AVX512VL-NEXT: vcvtss2sd %xmm4, %xmm4, %xmm4
2155 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm4 = xmm5[0],xmm4[0]
2155 ; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm4 = xmm5[0],xmm4[0]
21562156 ; AVX512VL-NEXT: vinsertf128 $1, %xmm6, %ymm4, %ymm4
21572157 ; AVX512VL-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
21582158 ; AVX512VL-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
2159 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
2159 ; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
21602160 ; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
21612161 ; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
2162 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2162 ; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
21632163 ; AVX512VL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
21642164 ; AVX512VL-NEXT: vinsertf64x4 $1, %ymm4, %zmm0, %zmm0
21652165 ; AVX512VL-NEXT: retq
22222222 ; AVX1-NEXT: vcvtph2ps %xmm1, %xmm1
22232223 ; AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
22242224 ; AVX1-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
2225 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2225 ; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
22262226 ; AVX1-NEXT: retq
22272227 ;
22282228 ; AVX2-LABEL: load_cvt_2i16_to_2f64:
22352235 ; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1
22362236 ; AVX2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
22372237 ; AVX2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
2238 ; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2238 ; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
22392239 ; AVX2-NEXT: retq
22402240 ;
22412241 ; AVX512F-LABEL: load_cvt_2i16_to_2f64:
22482248 ; AVX512F-NEXT: vcvtph2ps %ymm1, %zmm1
22492249 ; AVX512F-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
22502250 ; AVX512F-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
2251 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2251 ; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
22522252 ; AVX512F-NEXT: vzeroupper
22532253 ; AVX512F-NEXT: retq
22542254 ;
22622262 ; AVX512VL-NEXT: vcvtph2ps %xmm1, %xmm1
22632263 ; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
22642264 ; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
2265 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2265 ; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
22662266 ; AVX512VL-NEXT: retq
22672267 %1 = load <2 x i16>, <2 x i16>* %a0
22682268 %2 = bitcast <2 x i16> %1 to <2 x half>
22872287 ; AVX1-NEXT: vcvtph2ps %xmm3, %xmm3
22882288 ; AVX1-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
22892289 ; AVX1-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
2290 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
2290 ; AVX1-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
22912291 ; AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
22922292 ; AVX1-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
2293 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2293 ; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
22942294 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
22952295 ; AVX1-NEXT: retq
22962296 ;
23102310 ; AVX2-NEXT: vcvtph2ps %xmm3, %xmm3
23112311 ; AVX2-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
23122312 ; AVX2-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
2313 ; AVX2-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
2313 ; AVX2-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
23142314 ; AVX2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
23152315 ; AVX2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
2316 ; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2316 ; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
23172317 ; AVX2-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
23182318 ; AVX2-NEXT: retq
23192319 ;
23332333 ; AVX512F-NEXT: vcvtph2ps %ymm3, %zmm3
23342334 ; AVX512F-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
23352335 ; AVX512F-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
2336 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
2336 ; AVX512F-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
23372337 ; AVX512F-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
23382338 ; AVX512F-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
2339 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2339 ; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
23402340 ; AVX512F-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
23412341 ; AVX512F-NEXT: retq
23422342 ;
23562356 ; AVX512VL-NEXT: vcvtph2ps %xmm3, %xmm3
23572357 ; AVX512VL-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
23582358 ; AVX512VL-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
2359 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
2359 ; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
23602360 ; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
23612361 ; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
2362 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2362 ; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
23632363 ; AVX512VL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
23642364 ; AVX512VL-NEXT: retq
23652365 %1 = load <4 x i16>, <4 x i16>* %a0
23912391 ; AVX1-NEXT: vcvtph2ps %xmm3, %xmm3
23922392 ; AVX1-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
23932393 ; AVX1-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
2394 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
2394 ; AVX1-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
23952395 ; AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
23962396 ; AVX1-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
2397 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2397 ; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
23982398 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
23992399 ; AVX1-NEXT: retq
24002400 ;
24202420 ; AVX2-NEXT: vcvtph2ps %xmm3, %xmm3
24212421 ; AVX2-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
24222422 ; AVX2-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
2423 ; AVX2-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
2423 ; AVX2-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
24242424 ; AVX2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
24252425 ; AVX2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
2426 ; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2426 ; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
24272427 ; AVX2-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
24282428 ; AVX2-NEXT: retq
24292429 ;
24492449 ; AVX512F-NEXT: vcvtph2ps %ymm3, %zmm3
24502450 ; AVX512F-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
24512451 ; AVX512F-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
2452 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
2452 ; AVX512F-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
24532453 ; AVX512F-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
24542454 ; AVX512F-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
2455 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2455 ; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
24562456 ; AVX512F-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
24572457 ; AVX512F-NEXT: retq
24582458 ;
24802480 ; AVX512VL-NEXT: vcvtph2ps %xmm3, %xmm3
24812481 ; AVX512VL-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
24822482 ; AVX512VL-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
2483 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
2483 ; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
24842484 ; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
24852485 ; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
2486 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2486 ; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
24872487 ; AVX512VL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
24882488 ; AVX512VL-NEXT: retq
24892489 %1 = load <8 x i16>, <8 x i16>* %a0
25222522 ; AVX1-NEXT: vcvtph2ps %xmm7, %xmm7
25232523 ; AVX1-NEXT: vcvtss2sd %xmm7, %xmm7, %xmm7
25242524 ; AVX1-NEXT: vcvtss2sd %xmm6, %xmm6, %xmm6
2525 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm6 = xmm6[0],xmm7[0]
2525 ; AVX1-NEXT: vmovlhps {{.*#+}} xmm6 = xmm6[0],xmm7[0]
25262526 ; AVX1-NEXT: vcvtss2sd %xmm5, %xmm5, %xmm5
25272527 ; AVX1-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
2528 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm5[0]
2528 ; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm5[0]
25292529 ; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm0, %ymm0
25302530 ; AVX1-NEXT: vcvtss2sd %xmm4, %xmm4, %xmm4
25312531 ; AVX1-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
2532 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
2532 ; AVX1-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
25332533 ; AVX1-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
25342534 ; AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
2535 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
2535 ; AVX1-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
25362536 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
25372537 ; AVX1-NEXT: retq
25382538 ;
25642564 ; AVX2-NEXT: vcvtph2ps %xmm7, %xmm7
25652565 ; AVX2-NEXT: vcvtss2sd %xmm7, %xmm7, %xmm7
25662566 ; AVX2-NEXT: vcvtss2sd %xmm6, %xmm6, %xmm6
2567 ; AVX2-NEXT: vunpcklpd {{.*#+}} xmm6 = xmm6[0],xmm7[0]
2567 ; AVX2-NEXT: vmovlhps {{.*#+}} xmm6 = xmm6[0],xmm7[0]
25682568 ; AVX2-NEXT: vcvtss2sd %xmm5, %xmm5, %xmm5
25692569 ; AVX2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
2570 ; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm5[0]
2570 ; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm5[0]
25712571 ; AVX2-NEXT: vinsertf128 $1, %xmm6, %ymm0, %ymm0
25722572 ; AVX2-NEXT: vcvtss2sd %xmm4, %xmm4, %xmm4
25732573 ; AVX2-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
2574 ; AVX2-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
2574 ; AVX2-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
25752575 ; AVX2-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
25762576 ; AVX2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
2577 ; AVX2-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
2577 ; AVX2-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
25782578 ; AVX2-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
25792579 ; AVX2-NEXT: retq
25802580 ;
26062606 ; AVX512F-NEXT: vcvtph2ps %ymm7, %zmm7
26072607 ; AVX512F-NEXT: vcvtss2sd %xmm7, %xmm7, %xmm7
26082608 ; AVX512F-NEXT: vcvtss2sd %xmm6, %xmm6, %xmm6
2609 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm6 = xmm6[0],xmm7[0]
2609 ; AVX512F-NEXT: vmovlhps {{.*#+}} xmm6 = xmm6[0],xmm7[0]
26102610 ; AVX512F-NEXT: vcvtss2sd %xmm5, %xmm5, %xmm5
26112611 ; AVX512F-NEXT: vcvtss2sd %xmm4, %xmm4, %xmm4
2612 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm4 = xmm4[0],xmm5[0]
2612 ; AVX512F-NEXT: vmovlhps {{.*#+}} xmm4 = xmm4[0],xmm5[0]
26132613 ; AVX512F-NEXT: vinsertf128 $1, %xmm6, %ymm4, %ymm4
26142614 ; AVX512F-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
26152615 ; AVX512F-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
2616 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
2616 ; AVX512F-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
26172617 ; AVX512F-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
26182618 ; AVX512F-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
2619 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2619 ; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
26202620 ; AVX512F-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
26212621 ; AVX512F-NEXT: vinsertf64x4 $1, %ymm4, %zmm0, %zmm0
26222622 ; AVX512F-NEXT: retq
26492649 ; AVX512VL-NEXT: vcvtph2ps %xmm7, %xmm7
26502650 ; AVX512VL-NEXT: vcvtss2sd %xmm7, %xmm7, %xmm7
26512651 ; AVX512VL-NEXT: vcvtss2sd %xmm6, %xmm6, %xmm6
2652 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm6 = xmm6[0],xmm7[0]
2652 ; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm6 = xmm6[0],xmm7[0]
26532653 ; AVX512VL-NEXT: vcvtss2sd %xmm5, %xmm5, %xmm5
26542654 ; AVX512VL-NEXT: vcvtss2sd %xmm4, %xmm4, %xmm4
2655 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm4 = xmm4[0],xmm5[0]
2655 ; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm4 = xmm4[0],xmm5[0]
26562656 ; AVX512VL-NEXT: vinsertf128 $1, %xmm6, %ymm4, %ymm4
26572657 ; AVX512VL-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
26582658 ; AVX512VL-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
2659 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
2659 ; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
26602660 ; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
26612661 ; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
2662 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2662 ; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
26632663 ; AVX512VL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
26642664 ; AVX512VL-NEXT: vinsertf64x4 $1, %ymm4, %zmm0, %zmm0
26652665 ; AVX512VL-NEXT: retq
302302 define <2 x i64> @shuffle_v2i64_02(<2 x i64> %a, <2 x i64> %b) {
303303 ; SSE-LABEL: shuffle_v2i64_02:
304304 ; SSE: # BB#0:
305 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
305 ; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
306306 ; SSE-NEXT: retq
307307 ;
308308 ; AVX-LABEL: shuffle_v2i64_02:
309309 ; AVX: # BB#0:
310 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
310 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
311311 ; AVX-NEXT: retq
312312 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32>
313313 ret <2 x i64> %shuffle
315315 define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
316316 ; SSE-LABEL: shuffle_v2i64_02_copy:
317317 ; SSE: # BB#0:
318 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
318 ; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
319319 ; SSE-NEXT: movaps %xmm1, %xmm0
320320 ; SSE-NEXT: retq
321321 ;
322322 ; AVX-LABEL: shuffle_v2i64_02_copy:
323323 ; AVX: # BB#0:
324 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm2[0]
324 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm2[0]
325325 ; AVX-NEXT: retq
326326 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32>
327327 ret <2 x i64> %shuffle
501501 define <2 x i64> @shuffle_v2i64_20(<2 x i64> %a, <2 x i64> %b) {
502502 ; SSE-LABEL: shuffle_v2i64_20:
503503 ; SSE: # BB#0:
504 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
504 ; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
505505 ; SSE-NEXT: movaps %xmm1, %xmm0
506506 ; SSE-NEXT: retq
507507 ;
508508 ; AVX-LABEL: shuffle_v2i64_20:
509509 ; AVX: # BB#0:
510 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
510 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
511511 ; AVX-NEXT: retq
512512 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32>
513513 ret <2 x i64> %shuffle
515515 define <2 x i64> @shuffle_v2i64_20_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
516516 ; SSE-LABEL: shuffle_v2i64_20_copy:
517517 ; SSE: # BB#0:
518 ; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0]
518 ; SSE-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm1[0]
519519 ; SSE-NEXT: movaps %xmm2, %xmm0
520520 ; SSE-NEXT: retq
521521 ;
522522 ; AVX-LABEL: shuffle_v2i64_20_copy:
523523 ; AVX: # BB#0:
524 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm2[0],xmm1[0]
524 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm2[0],xmm1[0]
525525 ; AVX-NEXT: retq
526526 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32>
527527 ret <2 x i64> %shuffle
831831 ; SSE-LABEL: shuffle_v2f64_z0:
832832 ; SSE: # BB#0:
833833 ; SSE-NEXT: xorps %xmm1, %xmm1
834 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
834 ; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
835835 ; SSE-NEXT: movaps %xmm1, %xmm0
836836 ; SSE-NEXT: retq
837837 ;
838838 ; AVX1-LABEL: shuffle_v2f64_z0:
839839 ; AVX1: # BB#0:
840840 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
841 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
841 ; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
842842 ; AVX1-NEXT: retq
843843 ;
844844 ; AVX2-LABEL: shuffle_v2f64_z0:
845845 ; AVX2: # BB#0:
846846 ; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
847 ; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
847 ; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
848848 ; AVX2-NEXT: retq
849849 ;
850850 ; AVX512VL-LABEL: shuffle_v2f64_z0:
11541154 ; SSE-LABEL: insert_mem_hi_v2i64:
11551155 ; SSE: # BB#0:
11561156 ; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
1157 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1157 ; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
11581158 ; SSE-NEXT: retq
11591159 ;
11601160 ; AVX-LABEL: insert_mem_hi_v2i64:
11611161 ; AVX: # BB#0:
11621162 ; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
1163 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1163 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
11641164 ; AVX-NEXT: retq
11651165 %a = load i64, i64* %ptr
11661166 %v = insertelement <2 x i64> undef, i64 %a, i32 0
12301230 define <2 x double> @insert_reg_hi_v2f64(double %a, <2 x double> %b) {
12311231 ; SSE-LABEL: insert_reg_hi_v2f64:
12321232 ; SSE: # BB#0:
1233 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1233 ; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
12341234 ; SSE-NEXT: movaps %xmm1, %xmm0
12351235 ; SSE-NEXT: retq
12361236 ;
12371237 ; AVX-LABEL: insert_reg_hi_v2f64:
12381238 ; AVX: # BB#0:
1239 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1239 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
12401240 ; AVX-NEXT: retq
12411241 %v = insertelement <2 x double> undef, double %a, i32 0
12421242 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32>
302302 define <4 x float> @shuffle_v4f32_0145(<4 x float> %a, <4 x float> %b) {
303303 ; SSE-LABEL: shuffle_v4f32_0145:
304304 ; SSE: # BB#0:
305 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
305 ; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
306306 ; SSE-NEXT: retq
307307 ;
308308 ; AVX-LABEL: shuffle_v4f32_0145:
309309 ; AVX: # BB#0:
310 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
310 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
311311 ; AVX-NEXT: retq
312312 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32>
313313 ret <4 x float> %shuffle
498498 define <4 x i32> @shuffle_v4i32_0145(<4 x i32> %a, <4 x i32> %b) {
499499 ; SSE-LABEL: shuffle_v4i32_0145:
500500 ; SSE: # BB#0:
501 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
501 ; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
502502 ; SSE-NEXT: retq
503503 ;
504504 ; AVX-LABEL: shuffle_v4i32_0145:
505505 ; AVX: # BB#0:
506 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
506 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
507507 ; AVX-NEXT: retq
508508 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32>
509509 ret <4 x i32> %shuffle
553553 define <4 x i32> @shuffle_v4i32_4501(<4 x i32> %a, <4 x i32> %b) {
554554 ; SSE-LABEL: shuffle_v4i32_4501:
555555 ; SSE: # BB#0:
556 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
556 ; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
557557 ; SSE-NEXT: movaps %xmm1, %xmm0
558558 ; SSE-NEXT: retq
559559 ;
560560 ; AVX-LABEL: shuffle_v4i32_4501:
561561 ; AVX: # BB#0:
562 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
562 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
563563 ; AVX-NEXT: retq
564564 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32>
565565 ret <4 x i32> %shuffle
18241824 ; AVX512VL-LABEL: shuffle_v4f32_bitcast_4401:
18251825 ; AVX512VL: # BB#0:
18261826 ; AVX512VL-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,1,1]
1827 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1827 ; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
18281828 ; AVX512VL-NEXT: retq
18291829 %1 = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32>
18301830 %2 = bitcast <4 x i32> %1 to <2 x double>
22062206 ; SSE-LABEL: insert_mem_hi_v4i32:
22072207 ; SSE: # BB#0:
22082208 ; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
2209 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2209 ; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
22102210 ; SSE-NEXT: retq
22112211 ;
22122212 ; AVX1OR2-LABEL: insert_mem_hi_v4i32:
22132213 ; AVX1OR2: # BB#0:
22142214 ; AVX1OR2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
2215 ; AVX1OR2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2215 ; AVX1OR2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
22162216 ; AVX1OR2-NEXT: retq
22172217 ;
22182218 ; AVX512VL-LABEL: insert_mem_hi_v4i32:
22842284 define <4 x float> @insert_reg_hi_v4f32(double %a, <4 x float> %b) {
22852285 ; SSE-LABEL: insert_reg_hi_v4f32:
22862286 ; SSE: # BB#0:
2287 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
2287 ; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
22882288 ; SSE-NEXT: movaps %xmm1, %xmm0
22892289 ; SSE-NEXT: retq
22902290 ;
22912291 ; AVX-LABEL: insert_reg_hi_v4f32:
22922292 ; AVX: # BB#0:
2293 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2293 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
22942294 ; AVX-NEXT: retq
22952295 %a.cast = bitcast double %a to <2 x float>
22962296 %v = shufflevector <2 x float> %a.cast, <2 x float> undef, <4 x i32>
429429 ; AVX1-LABEL: shuffle_v4f64_0415:
430430 ; AVX1: # BB#0:
431431 ; AVX1-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1]
432 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
432 ; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
433433 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
434434 ; AVX1-NEXT: retq
435435 ;
613613 ; AVX1-LABEL: shuffle_v4i64_0020:
614614 ; AVX1: # BB#0:
615615 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
616 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
616 ; AVX1-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
617617 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1]
618618 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
619619 ; AVX1-NEXT: retq
868868 ; AVX1-LABEL: shuffle_v4i64_0451:
869869 ; AVX1: # BB#0:
870870 ; AVX1-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm1[1],xmm0[1]
871 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
871 ; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
872872 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
873873 ; AVX1-NEXT: retq
874874 ;
902902 ; AVX1-LABEL: shuffle_v4i64_4015:
903903 ; AVX1: # BB#0:
904904 ; AVX1-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1]
905 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
905 ; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
906906 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
907907 ; AVX1-NEXT: retq
908908 ;
10651065 ; AVX1-LABEL: shuffle_v4i64_0415:
10661066 ; AVX1: # BB#0:
10671067 ; AVX1-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1]
1068 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1068 ; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
10691069 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
10701070 ; AVX1-NEXT: retq
10711071 ;
12761276 define <4 x float> @combine_test3(<4 x float> %a, <4 x float> %b) {
12771277 ; SSE-LABEL: combine_test3:
12781278 ; SSE: # BB#0:
1279 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1279 ; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
12801280 ; SSE-NEXT: retq
12811281 ;
12821282 ; AVX-LABEL: combine_test3:
12831283 ; AVX: # BB#0:
1284 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1284 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
12851285 ; AVX-NEXT: retq
12861286 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32>
12871287 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32>
13801380 define <4 x i32> @combine_test8(<4 x i32> %a, <4 x i32> %b) {
13811381 ; SSE-LABEL: combine_test8:
13821382 ; SSE: # BB#0:
1383 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1383 ; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
13841384 ; SSE-NEXT: retq
13851385 ;
13861386 ; AVX-LABEL: combine_test8:
13871387 ; AVX: # BB#0:
1388 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1388 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
13891389 ; AVX-NEXT: retq
13901390 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32>
13911391 %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32>
14791479 define <4 x float> @combine_test13(<4 x float> %a, <4 x float> %b) {
14801480 ; SSE-LABEL: combine_test13:
14811481 ; SSE: # BB#0:
1482 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1482 ; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
14831483 ; SSE-NEXT: retq
14841484 ;
14851485 ; AVX-LABEL: combine_test13:
14861486 ; AVX: # BB#0:
1487 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1487 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
14881488 ; AVX-NEXT: retq
14891489 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32>
14901490 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32>
15771577 define <4 x i32> @combine_test18(<4 x i32> %a, <4 x i32> %b) {
15781578 ; SSE-LABEL: combine_test18:
15791579 ; SSE: # BB#0:
1580 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1580 ; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
15811581 ; SSE-NEXT: retq
15821582 ;
15831583 ; AVX-LABEL: combine_test18:
15841584 ; AVX: # BB#0:
1585 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1585 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
15861586 ; AVX-NEXT: retq
15871587 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32>
15881588 %2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32>
16401640 ; SSE-LABEL: combine_test21:
16411641 ; SSE: # BB#0:
16421642 ; SSE-NEXT: movaps %xmm0, %xmm2
1643 ; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0]
1643 ; SSE-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm1[0]
16441644 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
16451645 ; SSE-NEXT: movaps %xmm2, (%rdi)
16461646 ; SSE-NEXT: retq
16481648 ; AVX-LABEL: combine_test21:
16491649 ; AVX: # BB#0:
16501650 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
1651 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm0[0],xmm1[0]
1651 ; AVX-NEXT: vmovlhps {{.*#+}} xmm2 = xmm0[0],xmm1[0]
16521652 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
16531653 ; AVX-NEXT: vmovaps %xmm2, (%rdi)
16541654 ; AVX-NEXT: vzeroupper
21672167 define <4 x float> @combine_undef_input_test2(<4 x float> %a, <4 x float> %b) {
21682168 ; SSE-LABEL: combine_undef_input_test2:
21692169 ; SSE: # BB#0:
2170 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2170 ; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
21712171 ; SSE-NEXT: retq
21722172 ;
21732173 ; AVX-LABEL: combine_undef_input_test2:
21742174 ; AVX: # BB#0:
2175 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2175 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
21762176 ; AVX-NEXT: retq
21772177 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32>
21782178 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32>
21822182 define <4 x float> @combine_undef_input_test3(<4 x float> %a, <4 x float> %b) {
21832183 ; SSE-LABEL: combine_undef_input_test3:
21842184 ; SSE: # BB#0:
2185 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2185 ; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
21862186 ; SSE-NEXT: retq
21872187 ;
21882188 ; AVX-LABEL: combine_undef_input_test3:
21892189 ; AVX: # BB#0:
2190 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2190 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
21912191 ; AVX-NEXT: retq
21922192 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32>
21932193 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32>
23512351 define <4 x float> @combine_undef_input_test12(<4 x float> %a, <4 x float> %b) {
23522352 ; SSE-LABEL: combine_undef_input_test12:
23532353 ; SSE: # BB#0:
2354 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2354 ; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
23552355 ; SSE-NEXT: retq
23562356 ;
23572357 ; AVX-LABEL: combine_undef_input_test12:
23582358 ; AVX: # BB#0:
2359 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2359 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
23602360 ; AVX-NEXT: retq
23612361 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32>
23622362 %2 = shufflevector <4 x float> %b, <4 x float> %1, <4 x i32>
23662366 define <4 x float> @combine_undef_input_test13(<4 x float> %a, <4 x float> %b) {
23672367 ; SSE-LABEL: combine_undef_input_test13:
23682368 ; SSE: # BB#0:
2369 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2369 ; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
23702370 ; SSE-NEXT: retq
23712371 ;
23722372 ; AVX-LABEL: combine_undef_input_test13:
23732373 ; AVX: # BB#0:
2374 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2374 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
23752375 ; AVX-NEXT: retq
23762376 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32>
23772377 %2 = shufflevector <4 x float> %b, <4 x float> %1, <4 x i32>
4343 ; SSE-NEXT: andl $1, %esi
4444 ; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
4545 ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
46 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
46 ; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
4747 ; SSE-NEXT: retq
4848 ;
4949 ; AVX-LABEL: var_shuffle_v2i64_v2i64_xx_i64:
5555 ; AVX-NEXT: andl $1, %esi
5656 ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
5757 ; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
58 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
58 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
5959 ; AVX-NEXT: retq
6060 %x0 = extractelement <2 x i64> %x, i32 %i0
6161 %x1 = extractelement <2 x i64> %x, i32 %i1
8282 ; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
8383 ; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
8484 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
85 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
85 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
8686 ; SSE2-NEXT: retq
8787 ;
8888 ; SSSE3-LABEL: var_shuffle_v4f32_v4f32_xxxx_i32:
102102 ; SSSE3-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
103103 ; SSSE3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
104104 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
105 ; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
105 ; SSSE3-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
106106 ; SSSE3-NEXT: retq
107107 ;
108108 ; SSE41-LABEL: var_shuffle_v4f32_v4f32_xxxx_i32:
167167 ; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
168168 ; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
169169 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
170 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
170 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
171171 ; SSE2-NEXT: retq
172172 ;
173173 ; SSSE3-LABEL: var_shuffle_v4i32_v4i32_xxxx_i32:
187187 ; SSSE3-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
188188 ; SSSE3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
189189 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
190 ; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
190 ; SSSE3-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
191191 ; SSSE3-NEXT: retq
192192 ;
193193 ; SSE41-LABEL: var_shuffle_v4i32_v4i32_xxxx_i32:
738738 ; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
739739 ; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
740740 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
741 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
741 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
742742 ; SSE2-NEXT: retq
743743 ;
744744 ; SSSE3-LABEL: mem_shuffle_v4i32_v4i32_xxxx_i32:
758758 ; SSSE3-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
759759 ; SSSE3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
760760 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
761 ; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
761 ; SSSE3-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
762762 ; SSSE3-NEXT: retq
763763 ;
764764 ; SSE41-LABEL: mem_shuffle_v4i32_v4i32_xxxx_i32:
11791179 ; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
11801180 ; SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
11811181 ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1182 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1182 ; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
11831183 ; SSE-NEXT: retq
11841184 ;
11851185 ; AVX-LABEL: var_shuffle_v4f32_v4f32_x0yx_i32:
11961196 ; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
11971197 ; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
11981198 ; AVX-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1199 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1199 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
12001200 ; AVX-NEXT: retq
12011201 %x0 = extractelement <4 x float> %x, i32 %i0
12021202 %x1 = extractelement <4 x float> %x, i32 %i1
103103 ; ALL-NEXT: vmovaps %ymm0, (%rsp)
104104 ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
105105 ; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
106 ; ALL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
106 ; ALL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
107107 ; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
108108 ; ALL-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
109 ; ALL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
109 ; ALL-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0]
110110 ; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
111111 ; ALL-NEXT: movq %rbp, %rsp
112112 ; ALL-NEXT: popq %rbp
134134 ; ALL-NEXT: vmovaps %ymm0, (%rsp)
135135 ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
136136 ; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
137 ; ALL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
137 ; ALL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
138138 ; ALL-NEXT: vmovaps %xmm0, %xmm0
139139 ; ALL-NEXT: movq %rbp, %rsp
140140 ; ALL-NEXT: popq %rbp
160160 ; ALL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
161161 ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
162162 ; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
163 ; ALL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
163 ; ALL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
164164 ; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
165165 ; ALL-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
166 ; ALL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
166 ; ALL-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0]
167167 ; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
168168 ; ALL-NEXT: retq
169169 %x0 = extractelement <2 x i64> %x, i64 %i0
609609 ; ALL-NEXT: vmovaps %ymm0, (%rsp)
610610 ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
611611 ; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
612 ; ALL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
612 ; ALL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
613613 ; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
614614 ; ALL-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
615 ; ALL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
615 ; ALL-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0]
616616 ; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
617617 ; ALL-NEXT: movq %rbp, %rsp
618618 ; ALL-NEXT: popq %rbp
650650 ; ALL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
651651 ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
652652 ; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
653 ; ALL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
653 ; ALL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
654654 ; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
655655 ; ALL-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
656 ; ALL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
656 ; ALL-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0]
657657 ; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
658658 ; ALL-NEXT: retq
659659 %p0 = getelementptr inbounds i64, i64* %i, i32 0
1313 ; NOTE: This operation is collapsed to a single truncate, so this test no longer covers
1414 ; what it originally intended to.
1515
16 ; CHECK: MOVLHPSrr
16 ; CHECK: PUNPCKLQDQrr
1717 ; CHECK: PSHUFHWri
1818 ; CHECK: PACKUSWBrr
1919 ; CHECK: PACKUSWBrr
461461 define <4 x float> @select_of_shuffles_0(<2 x float> %a0, <2 x float> %b0, <2 x float> %a1, <2 x float> %b1) {
462462 ; SSE-LABEL: select_of_shuffles_0:
463463 ; SSE: # BB#0:
464 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
465 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm3[0]
464 ; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
465 ; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm3[0]
466466 ; SSE-NEXT: subps %xmm1, %xmm0
467467 ; SSE-NEXT: retq
468468 ;
469469 ; AVX-LABEL: select_of_shuffles_0:
470470 ; AVX: # BB#0:
471 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
472 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm3[0]
471 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
472 ; AVX-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm3[0]
473473 ; AVX-NEXT: vsubps %xmm1, %xmm0, %xmm0
474474 ; AVX-NEXT: retq
475475 %1 = shufflevector <2 x float> %a0, <2 x float> undef, <4 x i32>
1313 ;
1414 ; X64-LABEL: convert:
1515 ; X64: # BB#0: # %entry
16 ; X64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
16 ; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1717 ; X64-NEXT: movaps %xmm0, (%rdi)
1818 ; X64-NEXT: retq
1919 entry:
100100 ; X32-LABEL: vpermil2pd_21:
101101 ; X32: # BB#0:
102102 ; X32-NEXT: vxorps %xmm1, %xmm1, %xmm1
103 ; X32-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
103 ; X32-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
104104 ; X32-NEXT: retl
105105 ;
106106 ; X64-LABEL: vpermil2pd_21:
107107 ; X64: # BB#0:
108108 ; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
109 ; X64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
109 ; X64-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
110110 ; X64-NEXT: retq
111111 %1 = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x i64> , i8 2)
112112 ret <2 x double> %1