llvm.org GIT mirror llvm / 2dc0224
[X86] Teach execution domain fixing to convert between FP and int unpack instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@313508 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 2 years ago
43 changed file(s) with 623 addition(s) and 745 deletion(s). Raw diff Collapse all Expand all
93349334 { X86::ORPSrr, X86::ORPDrr, X86::PORrr },
93359335 { X86::XORPSrm, X86::XORPDrm, X86::PXORrm },
93369336 { X86::XORPSrr, X86::XORPDrr, X86::PXORrr },
9337 { X86::UNPCKLPDrm, X86::UNPCKLPDrm, X86::PUNPCKLQDQrm },
9338 { X86::UNPCKLPDrr, X86::UNPCKLPDrr, X86::PUNPCKLQDQrr },
9339 { X86::UNPCKHPDrm, X86::UNPCKHPDrm, X86::PUNPCKHQDQrm },
9340 { X86::UNPCKHPDrr, X86::UNPCKHPDrr, X86::PUNPCKHQDQrr },
9341 { X86::UNPCKLPSrm, X86::UNPCKLPSrm, X86::PUNPCKLDQrm },
9342 { X86::UNPCKLPSrr, X86::UNPCKLPSrr, X86::PUNPCKLDQrr },
9343 { X86::UNPCKHPSrm, X86::UNPCKHPSrm, X86::PUNPCKHDQrm },
9344 { X86::UNPCKHPSrr, X86::UNPCKHPSrr, X86::PUNPCKHDQrr },
93379345 // AVX 128-bit support
93389346 { X86::VMOVAPSmr, X86::VMOVAPDmr, X86::VMOVDQAmr },
93399347 { X86::VMOVAPSrm, X86::VMOVAPDrm, X86::VMOVDQArm },
93549362 { X86::VORPSrr, X86::VORPDrr, X86::VPORrr },
93559363 { X86::VXORPSrm, X86::VXORPDrm, X86::VPXORrm },
93569364 { X86::VXORPSrr, X86::VXORPDrr, X86::VPXORrr },
9365 { X86::VUNPCKLPDrm, X86::VUNPCKLPDrm, X86::VPUNPCKLQDQrm },
9366 { X86::VUNPCKLPDrr, X86::VUNPCKLPDrr, X86::VPUNPCKLQDQrr },
9367 { X86::VUNPCKHPDrm, X86::VUNPCKHPDrm, X86::VPUNPCKHQDQrm },
9368 { X86::VUNPCKHPDrr, X86::VUNPCKHPDrr, X86::VPUNPCKHQDQrr },
9369 { X86::VUNPCKLPSrm, X86::VUNPCKLPSrm, X86::VPUNPCKLDQrm },
9370 { X86::VUNPCKLPSrr, X86::VUNPCKLPSrr, X86::VPUNPCKLDQrr },
9371 { X86::VUNPCKHPSrm, X86::VUNPCKHPSrm, X86::VPUNPCKHDQrm },
9372 { X86::VUNPCKHPSrr, X86::VUNPCKHPSrr, X86::VPUNPCKHDQrr },
93579373 // AVX 256-bit support
93589374 { X86::VMOVAPSYmr, X86::VMOVAPDYmr, X86::VMOVDQAYmr },
93599375 { X86::VMOVAPSYrm, X86::VMOVAPDYrm, X86::VMOVDQAYrm },
94129428 { X86::VPERMILPSZ256ri, X86::VPERMILPSZ256ri, X86::VPSHUFDZ256ri },
94139429 { X86::VPERMILPSZmi, X86::VPERMILPSZmi, X86::VPSHUFDZmi },
94149430 { X86::VPERMILPSZri, X86::VPERMILPSZri, X86::VPSHUFDZri },
9431 { X86::VUNPCKLPDZ256rm, X86::VUNPCKLPDZ256rm, X86::VPUNPCKLQDQZ256rm },
9432 { X86::VUNPCKLPDZ256rr, X86::VUNPCKLPDZ256rr, X86::VPUNPCKLQDQZ256rr },
9433 { X86::VUNPCKHPDZ256rm, X86::VUNPCKHPDZ256rm, X86::VPUNPCKHQDQZ256rm },
9434 { X86::VUNPCKHPDZ256rr, X86::VUNPCKHPDZ256rr, X86::VPUNPCKHQDQZ256rr },
9435 { X86::VUNPCKLPSZ256rm, X86::VUNPCKLPSZ256rm, X86::VPUNPCKLDQZ256rm },
9436 { X86::VUNPCKLPSZ256rr, X86::VUNPCKLPSZ256rr, X86::VPUNPCKLDQZ256rr },
9437 { X86::VUNPCKHPSZ256rm, X86::VUNPCKHPSZ256rm, X86::VPUNPCKHDQZ256rm },
9438 { X86::VUNPCKHPSZ256rr, X86::VUNPCKHPSZ256rr, X86::VPUNPCKHDQZ256rr },
9439 { X86::VUNPCKLPDZ128rm, X86::VUNPCKLPDZ128rm, X86::VPUNPCKLQDQZ128rm },
9440 { X86::VUNPCKLPDZ128rr, X86::VUNPCKLPDZ128rr, X86::VPUNPCKLQDQZ128rr },
9441 { X86::VUNPCKHPDZ128rm, X86::VUNPCKHPDZ128rm, X86::VPUNPCKHQDQZ128rm },
9442 { X86::VUNPCKHPDZ128rr, X86::VUNPCKHPDZ128rr, X86::VPUNPCKHQDQZ128rr },
9443 { X86::VUNPCKLPSZ128rm, X86::VUNPCKLPSZ128rm, X86::VPUNPCKLDQZ128rm },
9444 { X86::VUNPCKLPSZ128rr, X86::VUNPCKLPSZ128rr, X86::VPUNPCKLDQZ128rr },
9445 { X86::VUNPCKHPSZ128rm, X86::VUNPCKHPSZ128rm, X86::VPUNPCKHDQZ128rm },
9446 { X86::VUNPCKHPSZ128rr, X86::VUNPCKHPSZ128rr, X86::VPUNPCKHDQZ128rr },
9447 { X86::VUNPCKLPDZrm, X86::VUNPCKLPDZrm, X86::VPUNPCKLQDQZrm },
9448 { X86::VUNPCKLPDZrr, X86::VUNPCKLPDZrr, X86::VPUNPCKLQDQZrr },
9449 { X86::VUNPCKHPDZrm, X86::VUNPCKHPDZrm, X86::VPUNPCKHQDQZrm },
9450 { X86::VUNPCKHPDZrr, X86::VUNPCKHPDZrr, X86::VPUNPCKHQDQZrr },
9451 { X86::VUNPCKLPSZrm, X86::VUNPCKLPSZrm, X86::VPUNPCKLDQZrm },
9452 { X86::VUNPCKLPSZrr, X86::VUNPCKLPSZrr, X86::VPUNPCKLDQZrr },
9453 { X86::VUNPCKHPSZrm, X86::VUNPCKHPSZrm, X86::VPUNPCKHDQZrm },
9454 { X86::VUNPCKHPSZrr, X86::VUNPCKHPSZrr, X86::VPUNPCKHDQZrr },
94159455 };
94169456
94179457 static const uint16_t ReplaceableInstrsAVX2[][3] = {
94399479 { X86::VBLENDPSYrmi, X86::VBLENDPSYrmi, X86::VPBLENDDYrmi },
94409480 { X86::VPERMILPSYmi, X86::VPERMILPSYmi, X86::VPSHUFDYmi },
94419481 { X86::VPERMILPSYri, X86::VPERMILPSYri, X86::VPSHUFDYri },
9482 { X86::VUNPCKLPDYrm, X86::VUNPCKLPDYrm, X86::VPUNPCKLQDQYrm },
9483 { X86::VUNPCKLPDYrr, X86::VUNPCKLPDYrr, X86::VPUNPCKLQDQYrr },
9484 { X86::VUNPCKHPDYrm, X86::VUNPCKHPDYrm, X86::VPUNPCKHQDQYrm },
9485 { X86::VUNPCKHPDYrr, X86::VUNPCKHPDYrr, X86::VPUNPCKHQDQYrr },
9486 { X86::VUNPCKLPSYrm, X86::VUNPCKLPSYrm, X86::VPUNPCKLDQYrm },
9487 { X86::VUNPCKLPSYrr, X86::VUNPCKLPSYrr, X86::VPUNPCKLDQYrr },
9488 { X86::VUNPCKHPSYrm, X86::VUNPCKHPSYrm, X86::VPUNPCKHDQYrm },
9489 { X86::VUNPCKHPSYrr, X86::VUNPCKHPSYrr, X86::VPUNPCKHDQYrr },
94429490 };
94439491
94449492 static const uint16_t ReplaceableInstrsAVX2InsertExtract[][3] = {
55 ; CHECK-LABEL: test1:
66 ; CHECK: # BB#0:
77 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
8 ; CHECK-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
8 ; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
99 ; CHECK-NEXT: retl
1010 %b = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32>
1111 ret <4 x i64>%b
119119 define <4 x i64> @unpackhipd2(<4 x i64>* %src1, <4 x i64>* %src2) nounwind uwtable readnone ssp {
120120 ; CHECK-LABEL: unpackhipd2:
121121 ; CHECK: # BB#0:
122 ; CHECK-NEXT: vmovapd (%rdi), %ymm0
122 ; CHECK-NEXT: vmovaps (%rdi), %ymm0
123123 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3]
124124 ; CHECK-NEXT: retq
125125 %a = load <4 x i64>, <4 x i64>* %src1
161161 define <4 x i64> @unpacklopd2(<4 x i64>* %src1, <4 x i64>* %src2) nounwind uwtable readnone ssp {
162162 ; CHECK-LABEL: unpacklopd2:
163163 ; CHECK: # BB#0:
164 ; CHECK-NEXT: vmovapd (%rdi), %ymm0
164 ; CHECK-NEXT: vmovaps (%rdi), %ymm0
165165 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2]
166166 ; CHECK-NEXT: retq
167167 %a = load <4 x i64>, <4 x i64>* %src1
33133313 define <4 x i64> @test_mm256_unpackhi_epi32(<4 x i64> %a0, <4 x i64> %a1) nounwind {
33143314 ; X32-LABEL: test_mm256_unpackhi_epi32:
33153315 ; X32: # BB#0:
3316 ; X32-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
3316 ; X32-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
33173317 ; X32-NEXT: retl
33183318 ;
33193319 ; X64-LABEL: test_mm256_unpackhi_epi32:
33203320 ; X64: # BB#0:
3321 ; X64-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
3321 ; X64-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
33223322 ; X64-NEXT: retq
33233323 %arg0 = bitcast <4 x i64> %a0 to <8 x i32>
33243324 %arg1 = bitcast <4 x i64> %a1 to <8 x i32>
33303330 define <4 x i64> @test_mm256_unpackhi_epi64(<4 x i64> %a0, <4 x i64> %a1) nounwind {
33313331 ; X32-LABEL: test_mm256_unpackhi_epi64:
33323332 ; X32: # BB#0:
3333 ; X32-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
3333 ; X32-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
33343334 ; X32-NEXT: retl
33353335 ;
33363336 ; X64-LABEL: test_mm256_unpackhi_epi64:
33373337 ; X64: # BB#0:
3338 ; X64-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
3338 ; X64-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
33393339 ; X64-NEXT: retq
33403340 %res = shufflevector <4 x i64> %a0, <4 x i64> %a1, <4 x i32>
33413341 ret <4 x i64> %res
33783378 define <4 x i64> @test_mm256_unpacklo_epi32(<4 x i64> %a0, <4 x i64> %a1) nounwind {
33793379 ; X32-LABEL: test_mm256_unpacklo_epi32:
33803380 ; X32: # BB#0:
3381 ; X32-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
3381 ; X32-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
33823382 ; X32-NEXT: retl
33833383 ;
33843384 ; X64-LABEL: test_mm256_unpacklo_epi32:
33853385 ; X64: # BB#0:
3386 ; X64-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
3386 ; X64-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
33873387 ; X64-NEXT: retq
33883388 %arg0 = bitcast <4 x i64> %a0 to <8 x i32>
33893389 %arg1 = bitcast <4 x i64> %a1 to <8 x i32>
33953395 define <4 x i64> @test_mm256_unpacklo_epi64(<4 x i64> %a0, <4 x i64> %a1) nounwind {
33963396 ; X32-LABEL: test_mm256_unpacklo_epi64:
33973397 ; X32: # BB#0:
3398 ; X32-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
3398 ; X32-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
33993399 ; X32-NEXT: retl
34003400 ;
34013401 ; X64-LABEL: test_mm256_unpacklo_epi64:
34023402 ; X64: # BB#0:
3403 ; X64-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
3403 ; X64-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
34043404 ; X64-NEXT: retq
34053405 %res = shufflevector <4 x i64> %a0, <4 x i64> %a1, <4 x i32>
34063406 ret <4 x i64> %res
696696 define <8 x i64> @test_mm512_unpackhi_epi32(<8 x i64> %a0, <8 x i64> %a1) {
697697 ; X32-LABEL: test_mm512_unpackhi_epi32:
698698 ; X32: # BB#0:
699 ; X32-NEXT: vpunpckhdq {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
699 ; X32-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
700700 ; X32-NEXT: retl
701701 ;
702702 ; X64-LABEL: test_mm512_unpackhi_epi32:
703703 ; X64: # BB#0:
704 ; X64-NEXT: vpunpckhdq {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
704 ; X64-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
705705 ; X64-NEXT: retq
706706 %arg0 = bitcast <8 x i64> %a0 to <16 x i32>
707707 %arg1 = bitcast <8 x i64> %a1 to <16 x i32>
758758 define <8 x i64> @test_mm512_unpackhi_epi64(<8 x i64> %a0, <8 x i64> %a1) {
759759 ; X32-LABEL: test_mm512_unpackhi_epi64:
760760 ; X32: # BB#0:
761 ; X32-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
761 ; X32-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
762762 ; X32-NEXT: retl
763763 ;
764764 ; X64-LABEL: test_mm512_unpackhi_epi64:
765765 ; X64: # BB#0:
766 ; X64-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
766 ; X64-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
767767 ; X64-NEXT: retq
768768 %res = shufflevector <8 x i64> %a0, <8 x i64> %a1, <8 x i32>
769769 ret <8 x i64> %res
914914 define <8 x i64> @test_mm512_unpacklo_epi32(<8 x i64> %a0, <8 x i64> %a1) {
915915 ; X32-LABEL: test_mm512_unpacklo_epi32:
916916 ; X32: # BB#0:
917 ; X32-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
917 ; X32-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
918918 ; X32-NEXT: retl
919919 ;
920920 ; X64-LABEL: test_mm512_unpacklo_epi32:
921921 ; X64: # BB#0:
922 ; X64-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
922 ; X64-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
923923 ; X64-NEXT: retq
924924 %arg0 = bitcast <8 x i64> %a0 to <16 x i32>
925925 %arg1 = bitcast <8 x i64> %a1 to <16 x i32>
976976 define <8 x i64> @test_mm512_unpacklo_epi64(<8 x i64> %a0, <8 x i64> %a1) {
977977 ; X32-LABEL: test_mm512_unpacklo_epi64:
978978 ; X32: # BB#0:
979 ; X32-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
979 ; X32-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
980980 ; X32-NEXT: retl
981981 ;
982982 ; X64-LABEL: test_mm512_unpacklo_epi64:
983983 ; X64: # BB#0:
984 ; X64-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
984 ; X64-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
985985 ; X64-NEXT: retq
986986 %res = shufflevector <8 x i64> %a0, <8 x i64> %a1, <8 x i32>
987987 ret <8 x i64> %res
19041904 define <2 x i64> @test_4xi64_to_2xi64_perm_mask0(<4 x i64> %vec) {
19051905 ; CHECK-LABEL: test_4xi64_to_2xi64_perm_mask0:
19061906 ; CHECK: # BB#0:
1907 ; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1
1908 ; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1907 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
1908 ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
19091909 ; CHECK-NEXT: vzeroupper
19101910 ; CHECK-NEXT: retq
19111911 %res = shufflevector <4 x i64> %vec, <4 x i64> undef, <2 x i32>
19711971 define <2 x i64> @test_4xi64_to_2xi64_perm_mem_mask0(<4 x i64>* %vp) {
19721972 ; CHECK-LABEL: test_4xi64_to_2xi64_perm_mem_mask0:
19731973 ; CHECK: # BB#0:
1974 ; CHECK-NEXT: vmovdqa (%rdi), %ymm0
1975 ; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1
1976 ; CHECK-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
1974 ; CHECK-NEXT: vmovaps (%rdi), %ymm0
1975 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
1976 ; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
19771977 ; CHECK-NEXT: vzeroupper
19781978 ; CHECK-NEXT: retq
19791979 %vec = load <4 x i64>, <4 x i64>* %vp
23102310 define <2 x i64> @test_8xi64_to_2xi64_perm_mask0(<8 x i64> %vec) {
23112311 ; CHECK-LABEL: test_8xi64_to_2xi64_perm_mask0:
23122312 ; CHECK: # BB#0:
2313 ; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1
2314 ; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
2315 ; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2313 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
2314 ; CHECK-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[2,3,0,1]
2315 ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
23162316 ; CHECK-NEXT: vzeroupper
23172317 ; CHECK-NEXT: retq
23182318 %res = shufflevector <8 x i64> %vec, <8 x i64> undef, <2 x i32>
26802680 define <2 x i64> @test_8xi64_to_2xi64_perm_mem_mask0(<8 x i64>* %vp) {
26812681 ; CHECK-LABEL: test_8xi64_to_2xi64_perm_mem_mask0:
26822682 ; CHECK: # BB#0:
2683 ; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0
2684 ; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm1
2685 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
2686 ; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2683 ; CHECK-NEXT: vmovaps (%rdi), %zmm0
2684 ; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm1
2685 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
2686 ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
26872687 ; CHECK-NEXT: vzeroupper
26882688 ; CHECK-NEXT: retq
26892689 %vec = load <8 x i64>, <8 x i64>* %vp
852852 ; SSE-NEXT: andb $15, %r11b
853853 ; SSE-NEXT: movb %r11b, -{{[0-9]+}}(%rsp)
854854 ; SSE-NEXT: movb $0, -{{[0-9]+}}(%rsp)
855 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
856 ; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
857 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
855 ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
856 ; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
857 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
858858 ; SSE-NEXT: popq %rbx
859859 ; SSE-NEXT: popq %r14
860860 ; SSE-NEXT: retq
10301030 ; SSE-NEXT: andb $15, %r11b
10311031 ; SSE-NEXT: movb %r11b, -{{[0-9]+}}(%rsp)
10321032 ; SSE-NEXT: movb $0, -{{[0-9]+}}(%rsp)
1033 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1034 ; SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
1035 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
1033 ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1034 ; SSE-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
1035 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
10361036 ; SSE-NEXT: popq %rbx
10371037 ; SSE-NEXT: popq %r14
10381038 ; SSE-NEXT: retq
257257 ; SSE-NEXT: andps %xmm8, %xmm0
258258 ; SSE-NEXT: orps %xmm0, %xmm1
259259 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm3[0]
260 ; SSE-NEXT: movapd %xmm2, %xmm0
260 ; SSE-NEXT: movaps %xmm2, %xmm0
261261 ; SSE-NEXT: retq
262262 ;
263263 ; AVX-LABEL: combine_vec_fcopysign_fpext_sgn:
181181 define <2 x i64> @test14(<2 x i64> %a, <2 x i64> %b) {
182182 ; CHECK-LABEL: test14:
183183 ; CHECK: # BB#0:
184 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
184 ; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
185185 ; CHECK-NEXT: retq
186186 %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32>
187187 %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32>
206206 define <2 x i64> @test16(<2 x i64> %a, <2 x i64> %b) {
207207 ; CHECK-LABEL: test16:
208208 ; CHECK: # BB#0:
209 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
210 ; CHECK-NEXT: movdqa %xmm1, %xmm0
209 ; CHECK-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
210 ; CHECK-NEXT: movaps %xmm1, %xmm0
211211 ; CHECK-NEXT: retq
212212 %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32>
213213 %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32>
88 ; CHECK: # BB#0: # %entry
99 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
1010 ; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
11 ; CHECK-NEXT: movapd %xmm0, (%eax)
11 ; CHECK-NEXT: movaps %xmm0, (%eax)
1212 ; CHECK-NEXT: retl
1313 entry:
1414 %tmp7.i = shufflevector <4 x double> %src, <4 x double> undef, <2 x i32>
171171 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
172172 ; SSE-NEXT: addss %xmm2, %xmm0
173173 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
174 ; SSE-NEXT: movapd %xmm1, %xmm0
174 ; SSE-NEXT: movaps %xmm1, %xmm0
175175 ; SSE-NEXT: retq
176176 ;
177177 ; AVX-LABEL: test8_undef:
901901 define <2 x i64> @merge_2i64_i64_12_volatile(i64* %ptr) nounwind uwtable noinline ssp {
902902 ; SSE-LABEL: merge_2i64_i64_12_volatile:
903903 ; SSE: # BB#0:
904 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
905 ; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
906 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
904 ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
905 ; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
906 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
907907 ; SSE-NEXT: retq
908908 ;
909909 ; AVX-LABEL: merge_2i64_i64_12_volatile:
910910 ; AVX: # BB#0:
911 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
912 ; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
913 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
911 ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
912 ; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
913 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
914914 ; AVX-NEXT: retq
915915 ;
916916 ; X32-SSE1-LABEL: merge_2i64_i64_12_volatile:
307307 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
308308 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
309309 ; AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
310 ; AVX1-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm2[1],xmm0[1]
311 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm2[0],xmm0[0]
310 ; AVX1-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm2[1],xmm0[1]
311 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
312312 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
313313 ; AVX1-NEXT: retq
314314 ;
2121 ;
2222 ; AVX1-LABEL: v3i64:
2323 ; AVX1: # BB#0:
24 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0]
24 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm0[0],xmm1[0]
2525 ; AVX1-NEXT: vpextrq $1, %xmm0, 16(%rdi)
26 ; AVX1-NEXT: vmovapd %xmm1, (%rdi)
26 ; AVX1-NEXT: vmovdqa %xmm1, (%rdi)
2727 ; AVX1-NEXT: retq
2828 ;
2929 ; AVX2-LABEL: v3i64:
501501 ; AVX512_32-NEXT: movl %esp, %ebp
502502 ; AVX512_32-NEXT: andl $-8, %esp
503503 ; AVX512_32-NEXT: subl $8, %esp
504 ; AVX512_32-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
505 ; AVX512_32-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
504 ; AVX512_32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
505 ; AVX512_32-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
506506 ; AVX512_32-NEXT: vsubpd {{\.LCPI.*}}, %xmm0, %xmm0
507507 ; AVX512_32-NEXT: vhaddpd %xmm0, %xmm0, %xmm0
508508 ; AVX512_32-NEXT: vmovlpd %xmm0, (%esp)
522522 ; SSE2_32-NEXT: movl %esp, %ebp
523523 ; SSE2_32-NEXT: andl $-8, %esp
524524 ; SSE2_32-NEXT: subl $8, %esp
525 ; SSE2_32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
526 ; SSE2_32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
525 ; SSE2_32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
526 ; SSE2_32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
527527 ; SSE2_32-NEXT: subpd {{\.LCPI.*}}, %xmm0
528528 ; SSE2_32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
529529 ; SSE2_32-NEXT: addpd %xmm0, %xmm1
5353 define <2 x double> @d(<2 x double>* %y, <2 x double> %z) nounwind {
5454 ; CHECK-LABEL: d:
5555 ; CHECK: # BB#0:
56 ; CHECK-NEXT: movupd (%rdi), %xmm1
56 ; CHECK-NEXT: movups (%rdi), %xmm1
5757 ; CHECK-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
5858 ; CHECK-NEXT: retq
5959 %x = load <2 x double>, <2 x double>* %y, align 8
22552255 define <2 x i64> @test_mm_set_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind {
22562256 ; X32-LABEL: test_mm_set_epi32:
22572257 ; X32: # BB#0:
2258 ; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2259 ; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2260 ; X32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2261 ; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
2262 ; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2263 ; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2264 ; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2258 ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2259 ; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
2260 ; X32-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2261 ; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
2262 ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2263 ; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2264 ; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
22652265 ; X32-NEXT: retl
22662266 ;
22672267 ; X64-LABEL: test_mm_set_epi32:
22872287 define <2 x i64> @test_mm_set_epi64x(i64 %a0, i64 %a1) nounwind {
22882288 ; X32-LABEL: test_mm_set_epi64x:
22892289 ; X32: # BB#0:
2290 ; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2291 ; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2292 ; X32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2293 ; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2294 ; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
2295 ; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2296 ; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2290 ; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
2291 ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2292 ; X32-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2293 ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2294 ; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
2295 ; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2296 ; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
22972297 ; X32-NEXT: retl
22982298 ;
22992299 ; X64-LABEL: test_mm_set_epi64x:
23182318 ; X64-LABEL: test_mm_set_pd:
23192319 ; X64: # BB#0:
23202320 ; X64-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
2321 ; X64-NEXT: movapd %xmm1, %xmm0
2321 ; X64-NEXT: movaps %xmm1, %xmm0
23222322 ; X64-NEXT: retq
23232323 %res0 = insertelement <2 x double> undef, double %a1, i32 0
23242324 %res1 = insertelement <2 x double> %res0, double %a0, i32 1
26642664 define <2 x i64> @test_mm_setr_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind {
26652665 ; X32-LABEL: test_mm_setr_epi32:
26662666 ; X32: # BB#0:
2667 ; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2668 ; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2669 ; X32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2670 ; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
2671 ; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2672 ; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2673 ; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2667 ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2668 ; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
2669 ; X32-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2670 ; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
2671 ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2672 ; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2673 ; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
26742674 ; X32-NEXT: retl
26752675 ;
26762676 ; X64-LABEL: test_mm_setr_epi32:
26962696 define <2 x i64> @test_mm_setr_epi64x(i64 %a0, i64 %a1) nounwind {
26972697 ; X32-LABEL: test_mm_setr_epi64x:
26982698 ; X32: # BB#0:
2699 ; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2700 ; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2701 ; X32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2702 ; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2703 ; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
2704 ; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2705 ; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2699 ; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
2700 ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2701 ; X32-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2702 ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2703 ; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
2704 ; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2705 ; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
27062706 ; X32-NEXT: retl
27072707 ;
27082708 ; X64-LABEL: test_mm_setr_epi64x:
37423742 define <2 x i64> @test_mm_unpackhi_epi32(<2 x i64> %a0, <2 x i64> %a1) {
37433743 ; X32-LABEL: test_mm_unpackhi_epi32:
37443744 ; X32: # BB#0:
3745 ; X32-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3745 ; X32-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
37463746 ; X32-NEXT: retl
37473747 ;
37483748 ; X64-LABEL: test_mm_unpackhi_epi32:
37493749 ; X64: # BB#0:
3750 ; X64-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3750 ; X64-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
37513751 ; X64-NEXT: retq
37523752 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
37533753 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
37593759 define <2 x i64> @test_mm_unpackhi_epi64(<2 x i64> %a0, <2 x i64> %a1) {
37603760 ; X32-LABEL: test_mm_unpackhi_epi64:
37613761 ; X32: # BB#0:
3762 ; X32-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
3762 ; X32-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
37633763 ; X32-NEXT: retl
37643764 ;
37653765 ; X64-LABEL: test_mm_unpackhi_epi64:
37663766 ; X64: # BB#0:
3767 ; X64-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
3767 ; X64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
37683768 ; X64-NEXT: retq
37693769 %res = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32>
37703770 ret <2 x i64> %res
38213821 define <2 x i64> @test_mm_unpacklo_epi32(<2 x i64> %a0, <2 x i64> %a1) {
38223822 ; X32-LABEL: test_mm_unpacklo_epi32:
38233823 ; X32: # BB#0:
3824 ; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3824 ; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
38253825 ; X32-NEXT: retl
38263826 ;
38273827 ; X64-LABEL: test_mm_unpacklo_epi32:
38283828 ; X64: # BB#0:
3829 ; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3829 ; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
38303830 ; X64-NEXT: retq
38313831 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
38323832 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
38383838 define <2 x i64> @test_mm_unpacklo_epi64(<2 x i64> %a0, <2 x i64> %a1) {
38393839 ; X32-LABEL: test_mm_unpacklo_epi64:
38403840 ; X32: # BB#0:
3841 ; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3841 ; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
38423842 ; X32-NEXT: retl
38433843 ;
38443844 ; X64-LABEL: test_mm_unpacklo_epi64:
38453845 ; X64: # BB#0:
3846 ; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3846 ; X64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
38473847 ; X64-NEXT: retq
38483848 %res = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32>
38493849 ret <2 x i64> %res
28632863 ; GENERIC-LABEL: test_movsd_reg:
28642864 ; GENERIC: # BB#0:
28652865 ; GENERIC-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
2866 ; GENERIC-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00]
2866 ; GENERIC-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
28672867 ; GENERIC-NEXT: retq # sched: [1:1.00]
28682868 ;
28692869 ; ATOM-LABEL: test_movsd_reg:
28702870 ; ATOM: # BB#0:
28712871 ; ATOM-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
2872 ; ATOM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50]
2872 ; ATOM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
28732873 ; ATOM-NEXT: nop # sched: [1:0.50]
28742874 ; ATOM-NEXT: nop # sched: [1:0.50]
28752875 ; ATOM-NEXT: nop # sched: [1:0.50]
28792879 ; SLM-LABEL: test_movsd_reg:
28802880 ; SLM: # BB#0:
28812881 ; SLM-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
2882 ; SLM-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00]
2882 ; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
28832883 ; SLM-NEXT: retq # sched: [4:1.00]
28842884 ;
28852885 ; SANDY-LABEL: test_movsd_reg:
3838 ;
3939 ; X64-LABEL: test2:
4040 ; X64: # BB#0:
41 ; X64-NEXT: movapd (%rsi), %xmm1
41 ; X64-NEXT: movaps (%rsi), %xmm1
4242 ; X64-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
43 ; X64-NEXT: movapd %xmm1, (%rdi)
43 ; X64-NEXT: movaps %xmm1, (%rdi)
4444 ; X64-NEXT: retq
4545 %tmp3 = load <2 x double>, <2 x double>* %A, align 16
4646 %tmp7 = insertelement <2 x double> undef, double %B, i32 0
339339 ; X86: # BB#0: # %entry
340340 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
341341 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
342 ; X86-NEXT: movapd (%ecx), %xmm0
342 ; X86-NEXT: movaps (%ecx), %xmm0
343343 ; X86-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],mem[1]
344344 ; X86-NEXT: retl
345345 ;
346346 ; X64-LABEL: test15:
347347 ; X64: # BB#0: # %entry
348 ; X64-NEXT: movapd (%rdi), %xmm0
348 ; X64-NEXT: movaps (%rdi), %xmm0
349349 ; X64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],mem[1]
350350 ; X64-NEXT: retq
351351 entry:
361361 ; X86-LABEL: test16:
362362 ; X86: # BB#0:
363363 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
364 ; X86-NEXT: movapd 96(%eax), %xmm0
364 ; X86-NEXT: movaps 96(%eax), %xmm0
365365 ; X86-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0]
366366 ; X86-NEXT: retl
367367 ;
368368 ; X64-LABEL: test16:
369369 ; X64: # BB#0:
370 ; X64-NEXT: movapd 96(%rdi), %xmm0
370 ; X64-NEXT: movaps 96(%rdi), %xmm0
371371 ; X64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0]
372372 ; X64-NEXT: retq
373373 %i5 = getelementptr inbounds <4 x double>, <4 x double>* %srcA, i32 3
342342 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
343343 ; SSE-NEXT: subss %xmm1, %xmm0
344344 ; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0]
345 ; SSE-NEXT: movapd %xmm2, %xmm0
345 ; SSE-NEXT: movaps %xmm2, %xmm0
346346 ; SSE-NEXT: retq
347347 ;
348348 ; AVX-LABEL: test14:
417417 ; SSE-NEXT: unpcklps {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
418418 ; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
419419 ; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
420 ; SSE-NEXT: movapd %xmm2, %xmm0
420 ; SSE-NEXT: movaps %xmm2, %xmm0
421421 ; SSE-NEXT: retq
422422 ;
423423 ; AVX-LABEL: test16:
127127 define <8 x i16> @t5(<8 x i16> %A, <8 x i16> %B) nounwind {
128128 ; X86-LABEL: t5:
129129 ; X86: # BB#0:
130 ; X86-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
131 ; X86-NEXT: movdqa %xmm1, %xmm0
130 ; X86-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
131 ; X86-NEXT: movaps %xmm1, %xmm0
132132 ; X86-NEXT: retl
133133 ;
134134 ; X64-LABEL: t5:
135135 ; X64: # BB#0:
136 ; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
137 ; X64-NEXT: movdqa %xmm1, %xmm0
136 ; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
137 ; X64-NEXT: movaps %xmm1, %xmm0
138138 ; X64-NEXT: retq
139139 %tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 8, i32 9, i32 0, i32 1, i32 10, i32 11, i32 2, i32 3 >
140140 ret <8 x i16> %tmp
401401 ; X86: # BB#0: # %entry
402402 ; X86-NEXT: movaps (%eax), %xmm0
403403 ; X86-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0,0,1,1]
404 ; X86-NEXT: pxor %xmm1, %xmm1
405 ; X86-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
404 ; X86-NEXT: xorps %xmm1, %xmm1
405 ; X86-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
406406 ; X86-NEXT: retl
407407 ;
408408 ; X64-LABEL: t17:
409409 ; X64: # BB#0: # %entry
410410 ; X64-NEXT: movaps (%rax), %xmm0
411411 ; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0,0,1,1]
412 ; X64-NEXT: pxor %xmm1, %xmm1
413 ; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
412 ; X64-NEXT: xorps %xmm1, %xmm1
413 ; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
414414 ; X64-NEXT: retq
415415 entry:
416416 %tmp1 = load <4 x float>, <4 x float>* undef, align 16
935935 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
936936 ; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
937937 ; X32-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
938 ; X32-NEXT: movapd %xmm1, %xmm0
938 ; X32-NEXT: movaps %xmm1, %xmm0
939939 ; X32-NEXT: retl
940940 ;
941941 ; X64-LABEL: insertps_with_undefs:
942942 ; X64: ## BB#0:
943943 ; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
944944 ; X64-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
945 ; X64-NEXT: movapd %xmm1, %xmm0
945 ; X64-NEXT: movaps %xmm1, %xmm0
946946 ; X64-NEXT: retq
947947 %1 = load float, float* %b, align 4
948948 %2 = insertelement <4 x float> undef, float %1, i32 0
109109 ; SSE: # BB#0:
110110 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm1
111111 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
112 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
112 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
113113 ; SSE-NEXT: retq
114114 ;
115115 ; AVX-LABEL: fptosi_4f64_to_2i32:
234234 ; SSE: # BB#0:
235235 ; SSE-NEXT: cvttpd2dq %xmm1, %xmm1
236236 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
237 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
237 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
238238 ; SSE-NEXT: retq
239239 ;
240240 ; AVX-LABEL: fptosi_4f64_to_4i32:
23722372 ; SSE-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
23732373 ; SSE-NEXT: fistpll -{{[0-9]+}}(%rsp)
23742374 ; SSE-NEXT: fldcw -{{[0-9]+}}(%rsp)
2375 ; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
2376 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
2377 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2378 ; SSE-NEXT: pxor %xmm1, %xmm1
2375 ; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
2376 ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2377 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2378 ; SSE-NEXT: xorps %xmm1, %xmm1
23792379 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3]
23802380 ; SSE-NEXT: retq
23812381 ;
23852385 ; AVX-NEXT: fldt {{[0-9]+}}(%rsp)
23862386 ; AVX-NEXT: fisttpll -{{[0-9]+}}(%rsp)
23872387 ; AVX-NEXT: fisttpll -{{[0-9]+}}(%rsp)
2388 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
2389 ; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
2390 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2388 ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2389 ; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
2390 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
23912391 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
23922392 ; AVX-NEXT: retq
23932393 %cvt = fptosi <2 x x86_fp80> %a to <2 x i32>
4646 ; X64-LABEL: t3:
4747 ; X64: # BB#0:
4848 ; X64-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
49 ; X64-NEXT: movapd %xmm1, %xmm0
49 ; X64-NEXT: movaps %xmm1, %xmm0
5050 ; X64-NEXT: retq
5151 %tmp1 = insertelement <2 x double> %tmp, double %s, i32 1
5252 ret <2 x double> %tmp1
2525 ; SSE-NEXT: xorps %xmm0, %xmm0
2626 ; SSE-NEXT: cvtsi2sdq %rax, %xmm0
2727 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
28 ; SSE-NEXT: movapd %xmm1, %xmm0
28 ; SSE-NEXT: movaps %xmm1, %xmm0
2929 ; SSE-NEXT: retq
3030 ;
3131 ; VEX-LABEL: sitofp_2i64_to_2f64:
230230 ; SSE-NEXT: xorps %xmm0, %xmm0
231231 ; SSE-NEXT: cvtsi2sdq %rax, %xmm0
232232 ; SSE-NEXT: unpcklpd {{.*#+}} xmm3 = xmm3[0],xmm0[0]
233 ; SSE-NEXT: movapd %xmm2, %xmm0
234 ; SSE-NEXT: movapd %xmm3, %xmm1
233 ; SSE-NEXT: movaps %xmm2, %xmm0
234 ; SSE-NEXT: movaps %xmm3, %xmm1
235235 ; SSE-NEXT: retq
236236 ;
237237 ; AVX1-LABEL: sitofp_4i64_to_4f64:
461461 ;
462462 ; VEX-LABEL: uitofp_2i64_to_2f64:
463463 ; VEX: # BB#0:
464 ; VEX-NEXT: vmovdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
465 ; VEX-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
464 ; VEX-NEXT: vmovapd {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
465 ; VEX-NEXT: vunpcklps {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
466466 ; VEX-NEXT: vmovapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25]
467467 ; VEX-NEXT: vsubpd %xmm3, %xmm2, %xmm2
468 ; VEX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
469 ; VEX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
468 ; VEX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
469 ; VEX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
470470 ; VEX-NEXT: vsubpd %xmm3, %xmm0, %xmm0
471471 ; VEX-NEXT: vhaddpd %xmm0, %xmm2, %xmm0
472472 ; VEX-NEXT: retq
768768 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
769769 ; SSE-NEXT: retq
770770 ;
771 ; AVX1-LABEL: uitofp_4i64_to_4f64:
772 ; AVX1: # BB#0:
773 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
774 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
775 ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
776 ; AVX1-NEXT: vmovapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25]
777 ; AVX1-NEXT: vsubpd %xmm4, %xmm3, %xmm3
778 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
779 ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
780 ; AVX1-NEXT: vsubpd %xmm4, %xmm1, %xmm1
781 ; AVX1-NEXT: vhaddpd %xmm1, %xmm3, %xmm1
782 ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
783 ; AVX1-NEXT: vsubpd %xmm4, %xmm3, %xmm3
784 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
785 ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
786 ; AVX1-NEXT: vsubpd %xmm4, %xmm0, %xmm0
787 ; AVX1-NEXT: vhaddpd %xmm0, %xmm3, %xmm0
788 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
789 ; AVX1-NEXT: retq
790 ;
791 ; AVX2-LABEL: uitofp_4i64_to_4f64:
792 ; AVX2: # BB#0:
793 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
794 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
795 ; AVX2-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
796 ; AVX2-NEXT: vmovapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25]
797 ; AVX2-NEXT: vsubpd %xmm4, %xmm3, %xmm3
798 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
799 ; AVX2-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
800 ; AVX2-NEXT: vsubpd %xmm4, %xmm1, %xmm1
801 ; AVX2-NEXT: vhaddpd %xmm1, %xmm3, %xmm1
802 ; AVX2-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
803 ; AVX2-NEXT: vsubpd %xmm4, %xmm3, %xmm3
804 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
805 ; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
806 ; AVX2-NEXT: vsubpd %xmm4, %xmm0, %xmm0
807 ; AVX2-NEXT: vhaddpd %xmm0, %xmm3, %xmm0
808 ; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
809 ; AVX2-NEXT: retq
771 ; VEX-LABEL: uitofp_4i64_to_4f64:
772 ; VEX: # BB#0:
773 ; VEX-NEXT: vextractf128 $1, %ymm0, %xmm1
774 ; VEX-NEXT: vmovapd {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
775 ; VEX-NEXT: vunpcklps {{.*#+}} xmm3 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
776 ; VEX-NEXT: vmovapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25]
777 ; VEX-NEXT: vsubpd %xmm4, %xmm3, %xmm3
778 ; VEX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[2,3,0,1]
779 ; VEX-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
780 ; VEX-NEXT: vsubpd %xmm4, %xmm1, %xmm1
781 ; VEX-NEXT: vhaddpd %xmm1, %xmm3, %xmm1
782 ; VEX-NEXT: vunpcklps {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
783 ; VEX-NEXT: vsubpd %xmm4, %xmm3, %xmm3
784 ; VEX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
785 ; VEX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
786 ; VEX-NEXT: vsubpd %xmm4, %xmm0, %xmm0
787 ; VEX-NEXT: vhaddpd %xmm0, %xmm3, %xmm0
788 ; VEX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
789 ; VEX-NEXT: retq
810790 ;
811791 ; AVX512F-LABEL: uitofp_4i64_to_4f64:
812792 ; AVX512F: # BB#0:
11161096 ; SSE-NEXT: movq %xmm0, %rax
11171097 ; SSE-NEXT: xorps %xmm0, %xmm0
11181098 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0
1119 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1099 ; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
11201100 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
11211101 ; SSE-NEXT: retq
11221102 ;
13821362 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0
13831363 ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
13841364 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
1385 ; SSE-NEXT: movapd %xmm1, %xmm0
1365 ; SSE-NEXT: movaps %xmm1, %xmm0
13861366 ; SSE-NEXT: retq
13871367 ;
13881368 ; AVX1-LABEL: sitofp_4i64_to_4f32:
22092189 ; SSE-NEXT: .LBB47_12:
22102190 ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
22112191 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
2212 ; SSE-NEXT: movapd %xmm1, %xmm0
2192 ; SSE-NEXT: movaps %xmm1, %xmm0
22132193 ; SSE-NEXT: retq
22142194 ;
22152195 ; AVX1-LABEL: uitofp_4i64_to_4f32:
29182898 ;
29192899 ; VEX-LABEL: uitofp_load_2i64_to_2f64:
29202900 ; VEX: # BB#0:
2921 ; VEX-NEXT: vmovdqa (%rdi), %xmm0
2922 ; VEX-NEXT: vmovdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
2923 ; VEX-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2901 ; VEX-NEXT: vmovapd (%rdi), %xmm0
2902 ; VEX-NEXT: vmovapd {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
2903 ; VEX-NEXT: vunpcklps {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
29242904 ; VEX-NEXT: vmovapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25]
29252905 ; VEX-NEXT: vsubpd %xmm3, %xmm2, %xmm2
2926 ; VEX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
2927 ; VEX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2906 ; VEX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
2907 ; VEX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
29282908 ; VEX-NEXT: vsubpd %xmm3, %xmm0, %xmm0
29292909 ; VEX-NEXT: vhaddpd %xmm0, %xmm2, %xmm0
29302910 ; VEX-NEXT: retq
31283108 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
31293109 ; SSE-NEXT: retq
31303110 ;
3131 ; AVX1-LABEL: uitofp_load_4i64_to_4f64:
3132 ; AVX1: # BB#0:
3133 ; AVX1-NEXT: vmovdqa (%rdi), %ymm0
3134 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3135 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
3136 ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
3137 ; AVX1-NEXT: vmovapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25]
3138 ; AVX1-NEXT: vsubpd %xmm4, %xmm3, %xmm3
3139 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
3140 ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
3141 ; AVX1-NEXT: vsubpd %xmm4, %xmm1, %xmm1
3142 ; AVX1-NEXT: vhaddpd %xmm1, %xmm3, %xmm1
3143 ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
3144 ; AVX1-NEXT: vsubpd %xmm4, %xmm3, %xmm3
3145 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
3146 ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
3147 ; AVX1-NEXT: vsubpd %xmm4, %xmm0, %xmm0
3148 ; AVX1-NEXT: vhaddpd %xmm0, %xmm3, %xmm0
3149 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3150 ; AVX1-NEXT: retq
3151 ;
3152 ; AVX2-LABEL: uitofp_load_4i64_to_4f64:
3153 ; AVX2: # BB#0:
3154 ; AVX2-NEXT: vmovdqa (%rdi), %ymm0
3155 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
3156 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
3157 ; AVX2-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
3158 ; AVX2-NEXT: vmovapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25]
3159 ; AVX2-NEXT: vsubpd %xmm4, %xmm3, %xmm3
3160 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
3161 ; AVX2-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
3162 ; AVX2-NEXT: vsubpd %xmm4, %xmm1, %xmm1
3163 ; AVX2-NEXT: vhaddpd %xmm1, %xmm3, %xmm1
3164 ; AVX2-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
3165 ; AVX2-NEXT: vsubpd %xmm4, %xmm3, %xmm3
3166 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
3167 ; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
3168 ; AVX2-NEXT: vsubpd %xmm4, %xmm0, %xmm0
3169 ; AVX2-NEXT: vhaddpd %xmm0, %xmm3, %xmm0
3170 ; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3171 ; AVX2-NEXT: retq
3111 ; VEX-LABEL: uitofp_load_4i64_to_4f64:
3112 ; VEX: # BB#0:
3113 ; VEX-NEXT: vmovapd (%rdi), %ymm0
3114 ; VEX-NEXT: vextractf128 $1, %ymm0, %xmm1
3115 ; VEX-NEXT: vmovapd {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
3116 ; VEX-NEXT: vunpcklps {{.*#+}} xmm3 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
3117 ; VEX-NEXT: vmovapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25]
3118 ; VEX-NEXT: vsubpd %xmm4, %xmm3, %xmm3
3119 ; VEX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[2,3,0,1]
3120 ; VEX-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
3121 ; VEX-NEXT: vsubpd %xmm4, %xmm1, %xmm1
3122 ; VEX-NEXT: vhaddpd %xmm1, %xmm3, %xmm1
3123 ; VEX-NEXT: vunpcklps {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
3124 ; VEX-NEXT: vsubpd %xmm4, %xmm3, %xmm3
3125 ; VEX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
3126 ; VEX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
3127 ; VEX-NEXT: vsubpd %xmm4, %xmm0, %xmm0
3128 ; VEX-NEXT: vhaddpd %xmm0, %xmm3, %xmm0
3129 ; VEX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3130 ; VEX-NEXT: retq
31723131 ;
31733132 ; AVX512F-LABEL: uitofp_load_4i64_to_4f64:
31743133 ; AVX512F: # BB#0:
6262 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
6363 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
6464 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
65 ; AVX1-NEXT: vunpckhps {{.*#+}} xmm9 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
66 ; AVX1-NEXT: vunpcklps {{.*#+}} xmm2 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
67 ; AVX1-NEXT: vunpckhps {{.*#+}} xmm3 = xmm8[2],xmm1[2],xmm8[3],xmm1[3]
68 ; AVX1-NEXT: vunpcklps {{.*#+}} xmm0 = xmm8[0],xmm1[0],xmm8[1],xmm1[1]
65 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm9 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
66 ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
67 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm3 = xmm8[2],xmm1[2],xmm8[3],xmm1[3]
68 ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm8[0],xmm1[0],xmm8[1],xmm1[1]
6969 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3]
7070 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm4 = xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7]
7171 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm5 = xmm7[0],xmm6[0],xmm7[1],xmm6[1],xmm7[2],xmm6[2],xmm7[3],xmm6[3]
7272 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm6 = xmm7[4],xmm6[4],xmm7[5],xmm6[5],xmm7[6],xmm6[6],xmm7[7],xmm6[7]
73 ; AVX1-NEXT: vunpckhps {{.*#+}} xmm7 = xmm4[2],xmm6[2],xmm4[3],xmm6[3]
74 ; AVX1-NEXT: vunpcklps {{.*#+}} xmm4 = xmm4[0],xmm6[0],xmm4[1],xmm6[1]
75 ; AVX1-NEXT: vunpckhps {{.*#+}} xmm6 = xmm1[2],xmm5[2],xmm1[3],xmm5[3]
76 ; AVX1-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1]
73 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm7 = xmm4[2],xmm6[2],xmm4[3],xmm6[3]
74 ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm4 = xmm4[0],xmm6[0],xmm4[1],xmm6[1]
75 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm6 = xmm1[2],xmm5[2],xmm1[3],xmm5[3]
76 ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1]
7777 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm5 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
7878 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7979 ; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm0
107107 ; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
108108 ; CHECK-NEXT: unpcklpd (%rsp), %xmm1 # 16-byte Folded Reload
109109 ; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
110 ; CHECK-NEXT: movapd %xmm1, %xmm0
110 ; CHECK-NEXT: movaps %xmm1, %xmm0
111111 ; CHECK-NEXT: addq $72, %rsp
112112 ; CHECK-NEXT: retq
113113 %m = frem <4 x float> %t, %u
302302 define <2 x i64> @shuffle_v2i64_02(<2 x i64> %a, <2 x i64> %b) {
303303 ; SSE-LABEL: shuffle_v2i64_02:
304304 ; SSE: # BB#0:
305 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
305 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
306306 ; SSE-NEXT: retq
307307 ;
308308 ; AVX-LABEL: shuffle_v2i64_02:
309309 ; AVX: # BB#0:
310 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
310 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
311311 ; AVX-NEXT: retq
312312 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32>
313313 ret <2 x i64> %shuffle
315315 define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
316316 ; SSE-LABEL: shuffle_v2i64_02_copy:
317317 ; SSE: # BB#0:
318 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
319 ; SSE-NEXT: movdqa %xmm1, %xmm0
318 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
319 ; SSE-NEXT: movaps %xmm1, %xmm0
320320 ; SSE-NEXT: retq
321321 ;
322322 ; AVX-LABEL: shuffle_v2i64_02_copy:
323323 ; AVX: # BB#0:
324 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm2[0]
324 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm2[0]
325325 ; AVX-NEXT: retq
326326 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32>
327327 ret <2 x i64> %shuffle
474474 define <2 x i64> @shuffle_v2i64_13(<2 x i64> %a, <2 x i64> %b) {
475475 ; SSE-LABEL: shuffle_v2i64_13:
476476 ; SSE: # BB#0:
477 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
477 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
478478 ; SSE-NEXT: retq
479479 ;
480480 ; AVX-LABEL: shuffle_v2i64_13:
481481 ; AVX: # BB#0:
482 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
482 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
483483 ; AVX-NEXT: retq
484484 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32>
485485 ret <2 x i64> %shuffle
487487 define <2 x i64> @shuffle_v2i64_13_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
488488 ; SSE-LABEL: shuffle_v2i64_13_copy:
489489 ; SSE: # BB#0:
490 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm2[1]
491 ; SSE-NEXT: movdqa %xmm1, %xmm0
490 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
491 ; SSE-NEXT: movaps %xmm1, %xmm0
492492 ; SSE-NEXT: retq
493493 ;
494494 ; AVX-LABEL: shuffle_v2i64_13_copy:
495495 ; AVX: # BB#0:
496 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm2[1]
496 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm2[1]
497497 ; AVX-NEXT: retq
498498 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32>
499499 ret <2 x i64> %shuffle
501501 define <2 x i64> @shuffle_v2i64_20(<2 x i64> %a, <2 x i64> %b) {
502502 ; SSE-LABEL: shuffle_v2i64_20:
503503 ; SSE: # BB#0:
504 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
505 ; SSE-NEXT: movdqa %xmm1, %xmm0
504 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
505 ; SSE-NEXT: movaps %xmm1, %xmm0
506506 ; SSE-NEXT: retq
507507 ;
508508 ; AVX-LABEL: shuffle_v2i64_20:
509509 ; AVX: # BB#0:
510 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
510 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
511511 ; AVX-NEXT: retq
512512 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32>
513513 ret <2 x i64> %shuffle
515515 define <2 x i64> @shuffle_v2i64_20_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
516516 ; SSE-LABEL: shuffle_v2i64_20_copy:
517517 ; SSE: # BB#0:
518 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
519 ; SSE-NEXT: movdqa %xmm2, %xmm0
518 ; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0]
519 ; SSE-NEXT: movaps %xmm2, %xmm0
520520 ; SSE-NEXT: retq
521521 ;
522522 ; AVX-LABEL: shuffle_v2i64_20_copy:
523523 ; AVX: # BB#0:
524 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm1[0]
524 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm2[0],xmm1[0]
525525 ; AVX-NEXT: retq
526526 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32>
527527 ret <2 x i64> %shuffle
671671 define <2 x i64> @shuffle_v2i64_31(<2 x i64> %a, <2 x i64> %b) {
672672 ; SSE-LABEL: shuffle_v2i64_31:
673673 ; SSE: # BB#0:
674 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1]
675 ; SSE-NEXT: movdqa %xmm1, %xmm0
674 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
675 ; SSE-NEXT: movaps %xmm1, %xmm0
676676 ; SSE-NEXT: retq
677677 ;
678678 ; AVX-LABEL: shuffle_v2i64_31:
679679 ; AVX: # BB#0:
680 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
680 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1]
681681 ; AVX-NEXT: retq
682682 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32>
683683 ret <2 x i64> %shuffle
685685 define <2 x i64> @shuffle_v2i64_31_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
686686 ; SSE-LABEL: shuffle_v2i64_31_copy:
687687 ; SSE: # BB#0:
688 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm1[1]
689 ; SSE-NEXT: movdqa %xmm2, %xmm0
688 ; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
689 ; SSE-NEXT: movaps %xmm2, %xmm0
690690 ; SSE-NEXT: retq
691691 ;
692692 ; AVX-LABEL: shuffle_v2i64_31_copy:
693693 ; AVX: # BB#0:
694 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm2[1],xmm1[1]
694 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm2[1],xmm1[1]
695695 ; AVX-NEXT: retq
696696 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32>
697697 ret <2 x i64> %shuffle
802802 define <2 x double> @shuffle_v2f64_1z(<2 x double> %a) {
803803 ; SSE-LABEL: shuffle_v2f64_1z:
804804 ; SSE: # BB#0:
805 ; SSE-NEXT: xorpd %xmm1, %xmm1
805 ; SSE-NEXT: xorps %xmm1, %xmm1
806806 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
807807 ; SSE-NEXT: retq
808808 ;
809809 ; AVX1-LABEL: shuffle_v2f64_1z:
810810 ; AVX1: # BB#0:
811 ; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
811 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
812812 ; AVX1-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
813813 ; AVX1-NEXT: retq
814814 ;
815815 ; AVX2-LABEL: shuffle_v2f64_1z:
816816 ; AVX2: # BB#0:
817 ; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
817 ; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
818818 ; AVX2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
819819 ; AVX2-NEXT: retq
820820 ;
821821 ; AVX512VL-LABEL: shuffle_v2f64_1z:
822822 ; AVX512VL: # BB#0:
823823 ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
824 ; AVX512VL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
824 ; AVX512VL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
825825 ; AVX512VL-NEXT: retq
826826 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32>
827827 ret <2 x double> %shuffle
830830 define <2 x double> @shuffle_v2f64_z0(<2 x double> %a) {
831831 ; SSE-LABEL: shuffle_v2f64_z0:
832832 ; SSE: # BB#0:
833 ; SSE-NEXT: xorpd %xmm1, %xmm1
833 ; SSE-NEXT: xorps %xmm1, %xmm1
834834 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
835 ; SSE-NEXT: movapd %xmm1, %xmm0
835 ; SSE-NEXT: movaps %xmm1, %xmm0
836836 ; SSE-NEXT: retq
837837 ;
838838 ; AVX1-LABEL: shuffle_v2f64_z0:
839839 ; AVX1: # BB#0:
840 ; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
840 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
841841 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
842842 ; AVX1-NEXT: retq
843843 ;
844844 ; AVX2-LABEL: shuffle_v2f64_z0:
845845 ; AVX2: # BB#0:
846 ; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
846 ; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
847847 ; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
848848 ; AVX2-NEXT: retq
849849 ;
850850 ; AVX512VL-LABEL: shuffle_v2f64_z0:
851851 ; AVX512VL: # BB#0:
852852 ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
853 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
853 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
854854 ; AVX512VL-NEXT: retq
855855 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32>
856856 ret <2 x double> %shuffle
905905 define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) {
906906 ; SSE-LABEL: shuffle_v2f64_bitcast_1z:
907907 ; SSE: # BB#0:
908 ; SSE-NEXT: xorpd %xmm1, %xmm1
908 ; SSE-NEXT: xorps %xmm1, %xmm1
909909 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
910910 ; SSE-NEXT: retq
911911 ;
912912 ; AVX1-LABEL: shuffle_v2f64_bitcast_1z:
913913 ; AVX1: # BB#0:
914 ; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
914 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
915915 ; AVX1-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
916916 ; AVX1-NEXT: retq
917917 ;
918918 ; AVX2-LABEL: shuffle_v2f64_bitcast_1z:
919919 ; AVX2: # BB#0:
920 ; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
920 ; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
921921 ; AVX2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
922922 ; AVX2-NEXT: retq
923923 ;
924924 ; AVX512VL-LABEL: shuffle_v2f64_bitcast_1z:
925925 ; AVX512VL: # BB#0:
926926 ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
927 ; AVX512VL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
927 ; AVX512VL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
928928 ; AVX512VL-NEXT: retq
929929 %shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32>
930930 %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float>
11531153 define <2 x i64> @insert_mem_hi_v2i64(i64* %ptr, <2 x i64> %b) {
11541154 ; SSE-LABEL: insert_mem_hi_v2i64:
11551155 ; SSE: # BB#0:
1156 ; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
1157 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1156 ; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
1157 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
11581158 ; SSE-NEXT: retq
11591159 ;
11601160 ; AVX-LABEL: insert_mem_hi_v2i64:
11611161 ; AVX: # BB#0:
1162 ; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1163 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1162 ; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
1163 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
11641164 ; AVX-NEXT: retq
11651165 %a = load i64, i64* %ptr
11661166 %v = insertelement <2 x i64> undef, i64 %a, i32 0
12311231 ; SSE-LABEL: insert_reg_hi_v2f64:
12321232 ; SSE: # BB#0:
12331233 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1234 ; SSE-NEXT: movapd %xmm1, %xmm0
1234 ; SSE-NEXT: movaps %xmm1, %xmm0
12351235 ; SSE-NEXT: retq
12361236 ;
12371237 ; AVX-LABEL: insert_reg_hi_v2f64:
498498 define <4 x i32> @shuffle_v4i32_0145(<4 x i32> %a, <4 x i32> %b) {
499499 ; SSE-LABEL: shuffle_v4i32_0145:
500500 ; SSE: # BB#0:
501 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
501 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
502502 ; SSE-NEXT: retq
503503 ;
504504 ; AVX-LABEL: shuffle_v4i32_0145:
505505 ; AVX: # BB#0:
506 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
506 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
507507 ; AVX-NEXT: retq
508508 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32>
509509 ret <4 x i32> %shuffle
553553 define <4 x i32> @shuffle_v4i32_4501(<4 x i32> %a, <4 x i32> %b) {
554554 ; SSE-LABEL: shuffle_v4i32_4501:
555555 ; SSE: # BB#0:
556 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
557 ; SSE-NEXT: movdqa %xmm1, %xmm0
556 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
557 ; SSE-NEXT: movaps %xmm1, %xmm0
558558 ; SSE-NEXT: retq
559559 ;
560560 ; AVX-LABEL: shuffle_v4i32_4501:
561561 ; AVX: # BB#0:
562 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
562 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
563563 ; AVX-NEXT: retq
564564 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32>
565565 ret <4 x i32> %shuffle
15241524 define <4 x i32> @shuffle_v4i32_40u1(<4 x i32> %a, <4 x i32> %b) {
15251525 ; SSE-LABEL: shuffle_v4i32_40u1:
15261526 ; SSE: # BB#0:
1527 ; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1528 ; SSE-NEXT: movdqa %xmm1, %xmm0
1527 ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1528 ; SSE-NEXT: movaps %xmm1, %xmm0
15291529 ; SSE-NEXT: retq
15301530 ;
15311531 ; AVX-LABEL: shuffle_v4i32_40u1:
15321532 ; AVX: # BB#0:
1533 ; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1533 ; AVX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
15341534 ; AVX-NEXT: retq
15351535 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32>
15361536 ret <4 x i32> %shuffle
16011601 define <4 x i32> @shuffle_v4i32_0z1z(<4 x i32> %a) {
16021602 ; SSE2-LABEL: shuffle_v4i32_0z1z:
16031603 ; SSE2: # BB#0:
1604 ; SSE2-NEXT: pxor %xmm1, %xmm1
1605 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1604 ; SSE2-NEXT: xorps %xmm1, %xmm1
1605 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
16061606 ; SSE2-NEXT: retq
16071607 ;
16081608 ; SSE3-LABEL: shuffle_v4i32_0z1z:
16091609 ; SSE3: # BB#0:
1610 ; SSE3-NEXT: pxor %xmm1, %xmm1
1611 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1610 ; SSE3-NEXT: xorps %xmm1, %xmm1
1611 ; SSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
16121612 ; SSE3-NEXT: retq
16131613 ;
16141614 ; SSSE3-LABEL: shuffle_v4i32_0z1z:
16151615 ; SSSE3: # BB#0:
1616 ; SSSE3-NEXT: pxor %xmm1, %xmm1
1617 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1616 ; SSSE3-NEXT: xorps %xmm1, %xmm1
1617 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
16181618 ; SSSE3-NEXT: retq
16191619 ;
16201620 ; SSE41-LABEL: shuffle_v4i32_0z1z:
17951795 define <4 x i32> @shuffle_v4i32_bitcast_0415(<4 x i32> %a, <4 x i32> %b) {
17961796 ; SSE-LABEL: shuffle_v4i32_bitcast_0415:
17971797 ; SSE: # BB#0:
1798 ; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1798 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
17991799 ; SSE-NEXT: retq
18001800 ;
18011801 ; AVX-LABEL: shuffle_v4i32_bitcast_0415:
18021802 ; AVX: # BB#0:
1803 ; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1803 ; AVX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
18041804 ; AVX-NEXT: retq
18051805 %shuffle32 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32>
18061806 %bitcast64 = bitcast <4 x i32> %shuffle32 to <2 x double>
22052205 define <4 x i32> @insert_mem_hi_v4i32(<2 x i32>* %ptr, <4 x i32> %b) {
22062206 ; SSE-LABEL: insert_mem_hi_v4i32:
22072207 ; SSE: # BB#0:
2208 ; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
2209 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2208 ; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
2209 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
22102210 ; SSE-NEXT: retq
22112211 ;
22122212 ; AVX1OR2-LABEL: insert_mem_hi_v4i32:
22132213 ; AVX1OR2: # BB#0:
2214 ; AVX1OR2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
2215 ; AVX1OR2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2214 ; AVX1OR2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
2215 ; AVX1OR2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
22162216 ; AVX1OR2-NEXT: retq
22172217 ;
22182218 ; AVX512VL-LABEL: insert_mem_hi_v4i32:
22852285 ; SSE-LABEL: insert_reg_hi_v4f32:
22862286 ; SSE: # BB#0:
22872287 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
2288 ; SSE-NEXT: movapd %xmm1, %xmm0
2288 ; SSE-NEXT: movaps %xmm1, %xmm0
22892289 ; SSE-NEXT: retq
22902290 ;
22912291 ; AVX-LABEL: insert_reg_hi_v4f32:
10891089 define <4 x i64> @shuffle_v4i64_z4z6(<4 x i64> %a) {
10901090 ; AVX1-LABEL: shuffle_v4i64_z4z6:
10911091 ; AVX1: # BB#0:
1092 ; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
1092 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
10931093 ; AVX1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
10941094 ; AVX1-NEXT: retq
10951095 ;
11091109 define <4 x i64> @shuffle_v4i64_5zuz(<4 x i64> %a) {
11101110 ; AVX1-LABEL: shuffle_v4i64_5zuz:
11111111 ; AVX1: # BB#0:
1112 ; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
1112 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
11131113 ; AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
11141114 ; AVX1-NEXT: retq
11151115 ;
11271127 }
11281128
11291129 define <4 x i64> @shuffle_v4i64_40u2(<4 x i64> %a, <4 x i64> %b) {
1130 ; AVX1-LABEL: shuffle_v4i64_40u2:
1131 ; AVX1: # BB#0:
1132 ; AVX1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
1133 ; AVX1-NEXT: retq
1134 ;
1135 ; AVX2-LABEL: shuffle_v4i64_40u2:
1136 ; AVX2: # BB#0:
1137 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
1138 ; AVX2-NEXT: retq
1139 ;
1140 ; AVX512VL-LABEL: shuffle_v4i64_40u2:
1141 ; AVX512VL: # BB#0:
1142 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
1143 ; AVX512VL-NEXT: retq
1130 ; ALL-LABEL: shuffle_v4i64_40u2:
1131 ; ALL: # BB#0:
1132 ; ALL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
1133 ; ALL-NEXT: retq
11441134 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32>
11451135 ret <4 x i64> %shuffle
11461136 }
11481138 define <4 x i64> @shuffle_v4i64_15uu(<4 x i64> %a, <4 x i64> %b) {
11491139 ; ALL-LABEL: shuffle_v4i64_15uu:
11501140 ; ALL: # BB#0:
1151 ; ALL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
1141 ; ALL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
11521142 ; ALL-NEXT: retq
11531143 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32>
11541144 ret <4 x i64> %shuffle
14221412 }
14231413
14241414 define <4 x double> @bitcast_v4f64_0426(<4 x double> %a, <4 x double> %b) {
1425 ; AVX1-LABEL: bitcast_v4f64_0426:
1426 ; AVX1: # BB#0:
1427 ; AVX1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
1428 ; AVX1-NEXT: retq
1429 ;
1430 ; AVX2-LABEL: bitcast_v4f64_0426:
1431 ; AVX2: # BB#0:
1432 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
1433 ; AVX2-NEXT: retq
1434 ;
1435 ; AVX512VL-LABEL: bitcast_v4f64_0426:
1436 ; AVX512VL: # BB#0:
1437 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
1438 ; AVX512VL-NEXT: retq
1415 ; ALL-LABEL: bitcast_v4f64_0426:
1416 ; ALL: # BB#0:
1417 ; ALL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
1418 ; ALL-NEXT: retq
14391419 %shuffle64 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32>
14401420 %bitcast32 = bitcast <4 x double> %shuffle64 to <8 x float>
14411421 %shuffle32 = shufflevector <8 x float> %bitcast32, <8 x float> undef, <8 x i32>
11251125 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
11261126 ; AVX1-NEXT: retq
11271127 ;
1128 ; AVX2OR512VL-LABEL: shuffle_v8i32_08080808:
1129 ; AVX2OR512VL: # BB#0:
1130 ; AVX2OR512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1131 ; AVX2OR512VL-NEXT: vpbroadcastq %xmm0, %ymm0
1132 ; AVX2OR512VL-NEXT: retq
1128 ; AVX2-LABEL: shuffle_v8i32_08080808:
1129 ; AVX2: # BB#0:
1130 ; AVX2-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1131 ; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
1132 ; AVX2-NEXT: retq
1133 ;
1134 ; AVX512VL-LABEL: shuffle_v8i32_08080808:
1135 ; AVX512VL: # BB#0:
1136 ; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1137 ; AVX512VL-NEXT: vpbroadcastq %xmm0, %ymm0
1138 ; AVX512VL-NEXT: retq
11331139 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32>
11341140 ret <8 x i32> %shuffle
11351141 }
11791185 }
11801186
11811187 define <8 x i32> @shuffle_v8i32_08194c5d(<8 x i32> %a, <8 x i32> %b) {
1182 ; AVX1-LABEL: shuffle_v8i32_08194c5d:
1183 ; AVX1: # BB#0:
1184 ; AVX1-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
1185 ; AVX1-NEXT: retq
1186 ;
1187 ; AVX2OR512VL-LABEL: shuffle_v8i32_08194c5d:
1188 ; AVX2OR512VL: # BB#0:
1189 ; AVX2OR512VL-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
1190 ; AVX2OR512VL-NEXT: retq
1188 ; ALL-LABEL: shuffle_v8i32_08194c5d:
1189 ; ALL: # BB#0:
1190 ; ALL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
1191 ; ALL-NEXT: retq
11911192 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32>
11921193 ret <8 x i32> %shuffle
11931194 }
11941195
11951196 define <8 x i32> @shuffle_v8i32_2a3b6e7f(<8 x i32> %a, <8 x i32> %b) {
1196 ; AVX1-LABEL: shuffle_v8i32_2a3b6e7f:
1197 ; AVX1: # BB#0:
1198 ; AVX1-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
1199 ; AVX1-NEXT: retq
1200 ;
1201 ; AVX2OR512VL-LABEL: shuffle_v8i32_2a3b6e7f:
1202 ; AVX2OR512VL: # BB#0:
1203 ; AVX2OR512VL-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
1204 ; AVX2OR512VL-NEXT: retq
1197 ; ALL-LABEL: shuffle_v8i32_2a3b6e7f:
1198 ; ALL: # BB#0:
1199 ; ALL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
1200 ; ALL-NEXT: retq
12051201 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32>
12061202 ret <8 x i32> %shuffle
12071203 }
12081204
12091205 define <8 x i32> @shuffle_v8i32_08192a3b(<8 x i32> %a, <8 x i32> %b) {
1210 ; AVX1-LABEL: shuffle_v8i32_08192a3b:
1211 ; AVX1: # BB#0:
1212 ; AVX1-NEXT: vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1213 ; AVX1-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1214 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1215 ; AVX1-NEXT: retq
1216 ;
1217 ; AVX2-LABEL: shuffle_v8i32_08192a3b:
1218 ; AVX2: # BB#0:
1219 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1220 ; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1221 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
1222 ; AVX2-NEXT: retq
1206 ; AVX1OR2-LABEL: shuffle_v8i32_08192a3b:
1207 ; AVX1OR2: # BB#0:
1208 ; AVX1OR2-NEXT: vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1209 ; AVX1OR2-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1210 ; AVX1OR2-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1211 ; AVX1OR2-NEXT: retq
12231212 ;
12241213 ; AVX512VL-LABEL: shuffle_v8i32_08192a3b:
12251214 ; AVX512VL: # BB#0:
19001889 }
19011890
19021891 define <8 x i32> @shuffle_v8i32_80u1b4uu(<8 x i32> %a, <8 x i32> %b) {
1903 ; AVX1-LABEL: shuffle_v8i32_80u1b4uu:
1904 ; AVX1: # BB#0:
1905 ; AVX1-NEXT: vunpcklps {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
1906 ; AVX1-NEXT: retq
1907 ;
1908 ; AVX2OR512VL-LABEL: shuffle_v8i32_80u1b4uu:
1909 ; AVX2OR512VL: # BB#0:
1910 ; AVX2OR512VL-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
1911 ; AVX2OR512VL-NEXT: retq
1892 ; ALL-LABEL: shuffle_v8i32_80u1b4uu:
1893 ; ALL: # BB#0:
1894 ; ALL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
1895 ; ALL-NEXT: retq
19121896 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32>
19131897 ret <8 x i32> %shuffle
19141898 }
19351919 define <8 x i32> @shuffle_v8i32_2A3Buuuu(<8 x i32> %a, <8 x i32> %b) {
19361920 ; ALL-LABEL: shuffle_v8i32_2A3Buuuu:
19371921 ; ALL: # BB#0:
1938 ; ALL-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1922 ; ALL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
19391923 ; ALL-NEXT: retq
19401924 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32>
19411925 ret <8 x i32> %shuffle
7575 define <16 x i32> @shuffle_v16i32_00_10_01_11_04_14_05_15_08_18_09_19_0c_1c_0d_1d(<16 x i32> %a, <16 x i32> %b) {
7676 ; ALL-LABEL: shuffle_v16i32_00_10_01_11_04_14_05_15_08_18_09_19_0c_1c_0d_1d:
7777 ; ALL: # BB#0:
78 ; ALL-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
78 ; ALL-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
7979 ; ALL-NEXT: retq
8080 %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32>
8181 ret <16 x i32> %shuffle
8484 define <16 x i32> @shuffle_v16i32_zz_10_zz_11_zz_14_zz_15_zz_18_zz_19_zz_1c_zz_1d(<16 x i32> %a, <16 x i32> %b) {
8585 ; ALL-LABEL: shuffle_v16i32_zz_10_zz_11_zz_14_zz_15_zz_18_zz_19_zz_1c_zz_1d:
8686 ; ALL: # BB#0:
87 ; ALL-NEXT: vpxor %xmm0, %xmm0, %xmm0
88 ; ALL-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
87 ; ALL-NEXT: vxorps %xmm0, %xmm0, %xmm0
88 ; ALL-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
8989 ; ALL-NEXT: retq
9090 %shuffle = shufflevector <16 x i32> zeroinitializer, <16 x i32> %b, <16 x i32>
9191 ret <16 x i32> %shuffle
177177 define <16 x i32> @shuffle_v16i32_02_12_03_13_06_16_07_17_0a_1a_0b_1b_0e_1e_0f_1f(<16 x i32> %a, <16 x i32> %b) {
178178 ; ALL-LABEL: shuffle_v16i32_02_12_03_13_06_16_07_17_0a_1a_0b_1b_0e_1e_0f_1f:
179179 ; ALL: # BB#0:
180 ; ALL-NEXT: vpunpckhdq {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
180 ; ALL-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
181181 ; ALL-NEXT: retq
182182 %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32>
183183 ret <16 x i32> %shuffle
186186 define <16 x i32> @shuffle_v16i32_02_zz_03_zz_06_zz_07_zz_0a_zz_0b_zz_0e_zz_0f_zz(<16 x i32> %a, <16 x i32> %b) {
187187 ; ALL-LABEL: shuffle_v16i32_02_zz_03_zz_06_zz_07_zz_0a_zz_0b_zz_0e_zz_0f_zz:
188188 ; ALL: # BB#0:
189 ; ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1
190 ; ALL-NEXT: vpunpckhdq {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
189 ; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1
190 ; ALL-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
191191 ; ALL-NEXT: retq
192192 %shuffle = shufflevector <16 x i32> %a, <16 x i32> zeroinitializer, <16 x i32>
193193 ret <16 x i32> %shuffle
19821982 ;
19831983 ; AVX512F-LABEL: shuffle_v8f64_0z2z4z6z:
19841984 ; AVX512F: # BB#0:
1985 ; AVX512F-NEXT: vxorpd %xmm1, %xmm1, %xmm1
1985 ; AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1
19861986 ; AVX512F-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
19871987 ; AVX512F-NEXT: retq
19881988 ;
19891989 ; AVX512F-32-LABEL: shuffle_v8f64_0z2z4z6z:
19901990 ; AVX512F-32: # BB#0:
1991 ; AVX512F-32-NEXT: vxorpd %xmm1, %xmm1, %xmm1
1991 ; AVX512F-32-NEXT: vxorps %xmm1, %xmm1, %xmm1
19921992 ; AVX512F-32-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
19931993 ; AVX512F-32-NEXT: retl
19941994 %shuffle = shufflevector <8 x double> %a, <8 x double> zeroinitializer, <8 x i32>
19991999 ;
20002000 ; AVX512F-LABEL: shuffle_v8i64_082a4c6e:
20012001 ; AVX512F: # BB#0:
2002 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
2002 ; AVX512F-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
20032003 ; AVX512F-NEXT: retq
20042004 ;
20052005 ; AVX512F-32-LABEL: shuffle_v8i64_082a4c6e:
20062006 ; AVX512F-32: # BB#0:
2007 ; AVX512F-32-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
2007 ; AVX512F-32-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
20082008 ; AVX512F-32-NEXT: retl
20092009 %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32>
20102010 ret <8 x i64> %shuffle
20142014 ;
20152015 ; AVX512F-LABEL: shuffle_v8i64_z8zazcze:
20162016 ; AVX512F: # BB#0:
2017 ; AVX512F-NEXT: vpxor %xmm0, %xmm0, %xmm0
2018 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
2017 ; AVX512F-NEXT: vxorps %xmm0, %xmm0, %xmm0
2018 ; AVX512F-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
20192019 ; AVX512F-NEXT: retq
20202020 ;
20212021 ; AVX512F-32-LABEL: shuffle_v8i64_z8zazcze:
20222022 ; AVX512F-32: # BB#0:
2023 ; AVX512F-32-NEXT: vpxor %xmm0, %xmm0, %xmm0
2024 ; AVX512F-32-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
2023 ; AVX512F-32-NEXT: vxorps %xmm0, %xmm0, %xmm0
2024 ; AVX512F-32-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
20252025 ; AVX512F-32-NEXT: retl
20262026 %shuffle = shufflevector <8 x i64> zeroinitializer, <8 x i64> %b, <8 x i32>
20272027 ret <8 x i64> %shuffle
20462046 ;
20472047 ; AVX512F-LABEL: shuffle_v8f64_z9zbzdzf:
20482048 ; AVX512F: # BB#0:
2049 ; AVX512F-NEXT: vxorpd %xmm0, %xmm0, %xmm0
2049 ; AVX512F-NEXT: vxorps %xmm0, %xmm0, %xmm0
20502050 ; AVX512F-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
20512051 ; AVX512F-NEXT: retq
20522052 ;
20532053 ; AVX512F-32-LABEL: shuffle_v8f64_z9zbzdzf:
20542054 ; AVX512F-32: # BB#0:
2055 ; AVX512F-32-NEXT: vxorpd %xmm0, %xmm0, %xmm0
2055 ; AVX512F-32-NEXT: vxorps %xmm0, %xmm0, %xmm0
20562056 ; AVX512F-32-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
20572057 ; AVX512F-32-NEXT: retl
20582058 %shuffle = shufflevector <8 x double> zeroinitializer, <8 x double> %b, <8 x i32>
20632063 ;
20642064 ; AVX512F-LABEL: shuffle_v8i64_193b5d7f:
20652065 ; AVX512F: # BB#0:
2066 ; AVX512F-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
2066 ; AVX512F-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
20672067 ; AVX512F-NEXT: retq
20682068 ;
20692069 ; AVX512F-32-LABEL: shuffle_v8i64_193b5d7f:
20702070 ; AVX512F-32: # BB#0:
2071 ; AVX512F-32-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
2071 ; AVX512F-32-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
20722072 ; AVX512F-32-NEXT: retl
20732073 %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32>
20742074 ret <8 x i64> %shuffle
20782078 ;
20792079 ; AVX512F-LABEL: shuffle_v8i64_1z3z5z7z:
20802080 ; AVX512F: # BB#0:
2081 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
2082 ; AVX512F-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
2081 ; AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1
2082 ; AVX512F-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
20832083 ; AVX512F-NEXT: retq
20842084 ;
20852085 ; AVX512F-32-LABEL: shuffle_v8i64_1z3z5z7z:
20862086 ; AVX512F-32: # BB#0:
2087 ; AVX512F-32-NEXT: vpxor %xmm1, %xmm1, %xmm1
2088 ; AVX512F-32-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
2087 ; AVX512F-32-NEXT: vxorps %xmm1, %xmm1, %xmm1
2088 ; AVX512F-32-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
20892089 ; AVX512F-32-NEXT: retl
20902090 %shuffle = shufflevector <8 x i64> %a, <8 x i64> zeroinitializer, <8 x i32>
20912091 ret <8 x i64> %shuffle
913913 define <16 x i32> @vpermt2var_vpermi2var_16i32_as_unpckldq(<16 x i32> %a0, <16 x i32> %a1) {
914914 ; X32-LABEL: vpermt2var_vpermi2var_16i32_as_unpckldq:
915915 ; X32: # BB#0:
916 ; X32-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
916 ; X32-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
917917 ; X32-NEXT: retl
918918 ;
919919 ; X64-LABEL: vpermt2var_vpermi2var_16i32_as_unpckldq:
920920 ; X64: # BB#0:
921 ; X64-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
921 ; X64-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
922922 ; X64-NEXT: retq
923923 %res0 = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %a0, <16 x i32> , <16 x i32> %a1, i16 -1)
924924 ret <16 x i32> %res0
509509 define <16 x i8> @combine_pshufb_as_unpacklo_zero(<16 x i8> %a0) {
510510 ; SSE-LABEL: combine_pshufb_as_unpacklo_zero:
511511 ; SSE: # BB#0:
512 ; SSE-NEXT: pxor %xmm1, %xmm1
513 ; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
514 ; SSE-NEXT: movdqa %xmm1, %xmm0
512 ; SSE-NEXT: xorps %xmm1, %xmm1
513 ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
514 ; SSE-NEXT: movaps %xmm1, %xmm0
515515 ; SSE-NEXT: retq
516516 ;
517517 ; AVX-LABEL: combine_pshufb_as_unpacklo_zero:
518518 ; AVX: # BB#0:
519 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
520 ; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
519 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
520 ; AVX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
521521 ; AVX-NEXT: retq
522522 %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> )
523523 ret <16 x i8> %1
13801380 define <4 x i32> @combine_test8(<4 x i32> %a, <4 x i32> %b) {
13811381 ; SSE-LABEL: combine_test8:
13821382 ; SSE: # BB#0:
1383 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1383 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
13841384 ; SSE-NEXT: retq
13851385 ;
13861386 ; AVX-LABEL: combine_test8:
13871387 ; AVX: # BB#0:
1388 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1388 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
13891389 ; AVX-NEXT: retq
13901390 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32>
13911391 %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32>
13951395 define <4 x i32> @combine_test9(<4 x i32> %a, <4 x i32> %b) {
13961396 ; SSE-LABEL: combine_test9:
13971397 ; SSE: # BB#0:
1398 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1]
1399 ; SSE-NEXT: movdqa %xmm1, %xmm0
1398 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
1399 ; SSE-NEXT: movaps %xmm1, %xmm0
14001400 ; SSE-NEXT: retq
14011401 ;
14021402 ; AVX-LABEL: combine_test9:
14031403 ; AVX: # BB#0:
1404 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
1404 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1]
14051405 ; AVX-NEXT: retq
14061406 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32>
14071407 %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32>
15771577 define <4 x i32> @combine_test18(<4 x i32> %a, <4 x i32> %b) {
15781578 ; SSE-LABEL: combine_test18:
15791579 ; SSE: # BB#0:
1580 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1580 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
15811581 ; SSE-NEXT: retq
15821582 ;
15831583 ; AVX-LABEL: combine_test18:
15841584 ; AVX: # BB#0:
1585 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1585 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
15861586 ; AVX-NEXT: retq
15871587 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32>
15881588 %2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32>
15921592 define <4 x i32> @combine_test19(<4 x i32> %a, <4 x i32> %b) {
15931593 ; SSE-LABEL: combine_test19:
15941594 ; SSE: # BB#0:
1595 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
1595 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
15961596 ; SSE-NEXT: retq
15971597 ;
15981598 ; AVX-LABEL: combine_test19:
15991599 ; AVX: # BB#0:
1600 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
1600 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
16011601 ; AVX-NEXT: retq
16021602 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32>
16031603 %2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32>
16391639 define <4 x i32> @combine_test21(<8 x i32> %a, <4 x i32>* %ptr) {
16401640 ; SSE-LABEL: combine_test21:
16411641 ; SSE: # BB#0:
1642 ; SSE-NEXT: movdqa %xmm0, %xmm2
1643 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
1644 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
1645 ; SSE-NEXT: movdqa %xmm2, (%rdi)
1646 ; SSE-NEXT: retq
1647 ;
1648 ; AVX1-LABEL: combine_test21:
1649 ; AVX1: # BB#0:
1650 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1651 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm0[0],xmm1[0]
1652 ; AVX1-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
1653 ; AVX1-NEXT: vmovdqa %xmm2, (%rdi)
1654 ; AVX1-NEXT: vzeroupper
1655 ; AVX1-NEXT: retq
1656 ;
1657 ; AVX2-LABEL: combine_test21:
1658 ; AVX2: # BB#0:
1659 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1660 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm0[0],xmm1[0]
1661 ; AVX2-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
1662 ; AVX2-NEXT: vmovdqa %xmm2, (%rdi)
1663 ; AVX2-NEXT: vzeroupper
1664 ; AVX2-NEXT: retq
1642 ; SSE-NEXT: movaps %xmm0, %xmm2
1643 ; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0]
1644 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
1645 ; SSE-NEXT: movaps %xmm2, (%rdi)
1646 ; SSE-NEXT: retq
1647 ;
1648 ; AVX-LABEL: combine_test21:
1649 ; AVX: # BB#0:
1650 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
1651 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm0[0],xmm1[0]
1652 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
1653 ; AVX-NEXT: vmovaps %xmm2, (%rdi)
1654 ; AVX-NEXT: vzeroupper
1655 ; AVX-NEXT: retq
16651656 %1 = shufflevector <8 x i32> %a, <8 x i32> %a, <4 x i32>
16661657 %2 = shufflevector <8 x i32> %a, <8 x i32> %a, <4 x i32>
16671658 store <4 x i32> %1, <4 x i32>* %ptr, align 16
20992090 define <4 x i32> @combine_test_movhl_1(<4 x i32> %a, <4 x i32> %b) {
21002091 ; SSE-LABEL: combine_test_movhl_1:
21012092 ; SSE: # BB#0:
2102 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1]
2103 ; SSE-NEXT: movdqa %xmm1, %xmm0
2093 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
2094 ; SSE-NEXT: movaps %xmm1, %xmm0
21042095 ; SSE-NEXT: retq
21052096 ;
21062097 ; AVX-LABEL: combine_test_movhl_1:
21072098 ; AVX: # BB#0:
2108 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
2099 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1]
21092100 ; AVX-NEXT: retq
21102101 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32>
21112102 %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32>
21152106 define <4 x i32> @combine_test_movhl_2(<4 x i32> %a, <4 x i32> %b) {
21162107 ; SSE-LABEL: combine_test_movhl_2:
21172108 ; SSE: # BB#0:
2118 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1]
2119 ; SSE-NEXT: movdqa %xmm1, %xmm0
2109 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
2110 ; SSE-NEXT: movaps %xmm1, %xmm0
21202111 ; SSE-NEXT: retq
21212112 ;
21222113 ; AVX-LABEL: combine_test_movhl_2:
21232114 ; AVX: # BB#0:
2124 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
2115 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1]
21252116 ; AVX-NEXT: retq
21262117 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32>
21272118 %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32>
21312122 define <4 x i32> @combine_test_movhl_3(<4 x i32> %a, <4 x i32> %b) {
21322123 ; SSE-LABEL: combine_test_movhl_3:
21332124 ; SSE: # BB#0:
2134 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1]
2135 ; SSE-NEXT: movdqa %xmm1, %xmm0
2125 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
2126 ; SSE-NEXT: movaps %xmm1, %xmm0
21362127 ; SSE-NEXT: retq
21372128 ;
21382129 ; AVX-LABEL: combine_test_movhl_3:
21392130 ; AVX: # BB#0:
2140 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
2131 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1]
21412132 ; AVX-NEXT: retq
21422133 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32>
21432134 %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32>
211211 define <4 x i32> @shuf_0z1z(<4 x i32> %a0) {
212212 ; AMD10H-LABEL: shuf_0z1z:
213213 ; AMD10H: # BB#0:
214 ; AMD10H-NEXT: pxor %xmm1, %xmm1
215 ; AMD10H-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
214 ; AMD10H-NEXT: xorps %xmm1, %xmm1
215 ; AMD10H-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
216216 ; AMD10H-NEXT: retq
217217 ;
218218 ; BTVER1-LABEL: shuf_0z1z:
219219 ; BTVER1: # BB#0:
220 ; BTVER1-NEXT: pxor %xmm1, %xmm1
221 ; BTVER1-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
220 ; BTVER1-NEXT: xorps %xmm1, %xmm1
221 ; BTVER1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
222222 ; BTVER1-NEXT: retq
223223 ;
224224 ; BTVER2-LABEL: shuf_0z1z:
4141 ; SSE-NEXT: andl $1, %edi
4242 ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
4343 ; SSE-NEXT: andl $1, %esi
44 ; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
45 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
46 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
44 ; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
45 ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
46 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
4747 ; SSE-NEXT: retq
4848 ;
4949 ; AVX-LABEL: var_shuffle_v2i64_v2i64_xx_i64:
5353 ; AVX-NEXT: andl $1, %edi
5454 ; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
5555 ; AVX-NEXT: andl $1, %esi
56 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
57 ; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
58 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
56 ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
57 ; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
58 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
5959 ; AVX-NEXT: retq
6060 %x0 = extractelement <2 x i64> %x, i32 %i0
6161 %x1 = extractelement <2 x i64> %x, i32 %i1
161161 ; SSE2-NEXT: andl $3, %edx
162162 ; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
163163 ; SSE2-NEXT: andl $3, %ecx
164 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
165 ; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
166 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
167 ; SSE2-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
168 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
169 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
170 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
164 ; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
165 ; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
166 ; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
167 ; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
168 ; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
169 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
170 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
171171 ; SSE2-NEXT: retq
172172 ;
173173 ; SSSE3-LABEL: var_shuffle_v4i32_v4i32_xxxx_i32:
181181 ; SSSE3-NEXT: andl $3, %edx
182182 ; SSSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
183183 ; SSSE3-NEXT: andl $3, %ecx
184 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
185 ; SSSE3-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
186 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
187 ; SSSE3-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
188 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
189 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
190 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
184 ; SSSE3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
185 ; SSSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
186 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
187 ; SSSE3-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
188 ; SSSE3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
189 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
190 ; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
191191 ; SSSE3-NEXT: retq
192192 ;
193193 ; SSE41-LABEL: var_shuffle_v4i32_v4i32_xxxx_i32:
732732 ; SSE2-NEXT: andl $3, %edx
733733 ; SSE2-NEXT: movl 12(%rdi), %esi
734734 ; SSE2-NEXT: andl $3, %esi
735 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
736 ; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
737 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
738 ; SSE2-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
739 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
740 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
741 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
735 ; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
736 ; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
737 ; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
738 ; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
739 ; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
740 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
741 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
742742 ; SSE2-NEXT: retq
743743 ;
744744 ; SSSE3-LABEL: mem_shuffle_v4i32_v4i32_xxxx_i32:
752752 ; SSSE3-NEXT: andl $3, %edx
753753 ; SSSE3-NEXT: movl 12(%rdi), %esi
754754 ; SSSE3-NEXT: andl $3, %esi
755 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
756 ; SSSE3-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
757 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
758 ; SSSE3-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
759 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
760 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
761 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
755 ; SSSE3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
756 ; SSSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
757 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
758 ; SSSE3-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
759 ; SSSE3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
760 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
761 ; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
762762 ; SSSE3-NEXT: retq
763763 ;
764764 ; SSE41-LABEL: mem_shuffle_v4i32_v4i32_xxxx_i32:
9090 }
9191
9292 define <4 x i64> @var_shuffle_v4i64_v4i64_xxxx_i64(<4 x i64> %x, i64 %i0, i64 %i1, i64 %i2, i64 %i3) nounwind {
93 ; AVX1-LABEL: var_shuffle_v4i64_v4i64_xxxx_i64:
94 ; AVX1: # BB#0:
95 ; AVX1-NEXT: pushq %rbp
96 ; AVX1-NEXT: movq %rsp, %rbp
97 ; AVX1-NEXT: andq $-32, %rsp
98 ; AVX1-NEXT: subq $64, %rsp
99 ; AVX1-NEXT: andl $3, %edi
100 ; AVX1-NEXT: andl $3, %esi
101 ; AVX1-NEXT: andl $3, %edx
102 ; AVX1-NEXT: andl $3, %ecx
103 ; AVX1-NEXT: vmovaps %ymm0, (%rsp)
104 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
105 ; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
106 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
107 ; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
108 ; AVX1-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
109 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
110 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
111 ; AVX1-NEXT: movq %rbp, %rsp
112 ; AVX1-NEXT: popq %rbp
113 ; AVX1-NEXT: retq
114 ;
115 ; AVX2-LABEL: var_shuffle_v4i64_v4i64_xxxx_i64:
116 ; AVX2: # BB#0:
117 ; AVX2-NEXT: pushq %rbp
118 ; AVX2-NEXT: movq %rsp, %rbp
119 ; AVX2-NEXT: andq $-32, %rsp
120 ; AVX2-NEXT: subq $64, %rsp
121 ; AVX2-NEXT: andl $3, %edi
122 ; AVX2-NEXT: andl $3, %esi
123 ; AVX2-NEXT: andl $3, %edx
124 ; AVX2-NEXT: andl $3, %ecx
125 ; AVX2-NEXT: vmovaps %ymm0, (%rsp)
126 ; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
127 ; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
128 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
129 ; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
130 ; AVX2-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
131 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
132 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
133 ; AVX2-NEXT: movq %rbp, %rsp
134 ; AVX2-NEXT: popq %rbp
135 ; AVX2-NEXT: retq
93 ; ALL-LABEL: var_shuffle_v4i64_v4i64_xxxx_i64:
94 ; ALL: # BB#0:
95 ; ALL-NEXT: pushq %rbp
96 ; ALL-NEXT: movq %rsp, %rbp
97 ; ALL-NEXT: andq $-32, %rsp
98 ; ALL-NEXT: subq $64, %rsp
99 ; ALL-NEXT: andl $3, %edi
100 ; ALL-NEXT: andl $3, %esi
101 ; ALL-NEXT: andl $3, %edx
102 ; ALL-NEXT: andl $3, %ecx
103 ; ALL-NEXT: vmovaps %ymm0, (%rsp)
104 ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
105 ; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
106 ; ALL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
107 ; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
108 ; ALL-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
109 ; ALL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
110 ; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
111 ; ALL-NEXT: movq %rbp, %rsp
112 ; ALL-NEXT: popq %rbp
113 ; ALL-NEXT: retq
136114 %x0 = extractelement <4 x i64> %x, i64 %i0
137115 %x1 = extractelement <4 x i64> %x, i64 %i1
138116 %x2 = extractelement <4 x i64> %x, i64 %i2
154132 ; ALL-NEXT: andl $3, %edi
155133 ; ALL-NEXT: andl $3, %esi
156134 ; ALL-NEXT: vmovaps %ymm0, (%rsp)
157 ; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
158 ; ALL-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
159 ; ALL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
160 ; ALL-NEXT: vmovdqa %xmm0, %xmm0
135 ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
136 ; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
137 ; ALL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
138 ; ALL-NEXT: vmovaps %xmm0, %xmm0
161139 ; ALL-NEXT: movq %rbp, %rsp
162140 ; ALL-NEXT: popq %rbp
163141 ; ALL-NEXT: retq
173151 }
174152
175153 define <4 x i64> @var_shuffle_v4i64_v2i64_xxxx_i64(<2 x i64> %x, i64 %i0, i64 %i1, i64 %i2, i64 %i3) nounwind {
176 ; AVX1-LABEL: var_shuffle_v4i64_v2i64_xxxx_i64:
177 ; AVX1: # BB#0:
178 ; AVX1-NEXT: andl $1, %edi
179 ; AVX1-NEXT: andl $1, %esi
180 ; AVX1-NEXT: andl $1, %edx
181 ; AVX1-NEXT: andl $1, %ecx
182 ; AVX1-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
183 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
184 ; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
185 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
186 ; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
187 ; AVX1-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
188 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
189 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
190 ; AVX1-NEXT: retq
191 ;
192 ; AVX2-LABEL: var_shuffle_v4i64_v2i64_xxxx_i64:
193 ; AVX2: # BB#0:
194 ; AVX2-NEXT: andl $1, %edi
195 ; AVX2-NEXT: andl $1, %esi
196 ; AVX2-NEXT: andl $1, %edx
197 ; AVX2-NEXT: andl $1, %ecx
198 ; AVX2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
199 ; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
200 ; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
201 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
202 ; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
203 ; AVX2-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
204 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
205 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
206 ; AVX2-NEXT: retq
154 ; ALL-LABEL: var_shuffle_v4i64_v2i64_xxxx_i64:
155 ; ALL: # BB#0:
156 ; ALL-NEXT: andl $1, %edi
157 ; ALL-NEXT: andl $1, %esi
158 ; ALL-NEXT: andl $1, %edx
159 ; ALL-NEXT: andl $1, %ecx
160 ; ALL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
161 ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
162 ; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
163 ; ALL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
164 ; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
165 ; ALL-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
166 ; ALL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
167 ; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
168 ; ALL-NEXT: retq
207169 %x0 = extractelement <2 x i64> %x, i64 %i0
208170 %x1 = extractelement <2 x i64> %x, i64 %i1
209171 %x2 = extractelement <2 x i64> %x, i64 %i2
630592 ;
631593
632594 define <4 x i64> @mem_shuffle_v4i64_v4i64_xxxx_i64(<4 x i64> %x, i64* %i) nounwind {
633 ; AVX1-LABEL: mem_shuffle_v4i64_v4i64_xxxx_i64:
634 ; AVX1: # BB#0:
635 ; AVX1-NEXT: pushq %rbp
636 ; AVX1-NEXT: movq %rsp, %rbp
637 ; AVX1-NEXT: andq $-32, %rsp
638 ; AVX1-NEXT: subq $64, %rsp
639 ; AVX1-NEXT: movq (%rdi), %rax
640 ; AVX1-NEXT: movq 8(%rdi), %rcx
641 ; AVX1-NEXT: andl $3, %eax
642 ; AVX1-NEXT: andl $3, %ecx
643 ; AVX1-NEXT: movq 16(%rdi), %rdx
644 ; AVX1-NEXT: andl $3, %edx
645 ; AVX1-NEXT: movq 24(%rdi), %rsi
646 ; AVX1-NEXT: andl $3, %esi
647 ; AVX1-NEXT: vmovaps %ymm0, (%rsp)
648 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
649 ; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
650 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
651 ; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
652 ; AVX1-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
653 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
654 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
655 ; AVX1-NEXT: movq %rbp, %rsp
656 ; AVX1-NEXT: popq %rbp
657 ; AVX1-NEXT: retq
658 ;
659 ; AVX2-LABEL: mem_shuffle_v4i64_v4i64_xxxx_i64:
660 ; AVX2: # BB#0:
661 ; AVX2-NEXT: pushq %rbp
662 ; AVX2-NEXT: movq %rsp, %rbp
663 ; AVX2-NEXT: andq $-32, %rsp
664 ; AVX2-NEXT: subq $64, %rsp
665 ; AVX2-NEXT: movq (%rdi), %rax
666 ; AVX2-NEXT: movq 8(%rdi), %rcx
667 ; AVX2-NEXT: andl $3, %eax
668 ; AVX2-NEXT: andl $3, %ecx
669 ; AVX2-NEXT: movq 16(%rdi), %rdx
670 ; AVX2-NEXT: andl $3, %edx
671 ; AVX2-NEXT: movq 24(%rdi), %rsi
672 ; AVX2-NEXT: andl $3, %esi
673 ; AVX2-NEXT: vmovaps %ymm0, (%rsp)
674 ; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
675 ; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
676 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
677 ; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
678 ; AVX2-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
679 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
680 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
681 ; AVX2-NEXT: movq %rbp, %rsp
682 ; AVX2-NEXT: popq %rbp
683 ; AVX2-NEXT: retq
595 ; ALL-LABEL: mem_shuffle_v4i64_v4i64_xxxx_i64:
596 ; ALL: # BB#0:
597 ; ALL-NEXT: pushq %rbp
598 ; ALL-NEXT: movq %rsp, %rbp
599 ; ALL-NEXT: andq $-32, %rsp
600 ; ALL-NEXT: subq $64, %rsp
601 ; ALL-NEXT: movq (%rdi), %rax
602 ; ALL-NEXT: movq 8(%rdi), %rcx
603 ; ALL-NEXT: andl $3, %eax
604 ; ALL-NEXT: andl $3, %ecx
605 ; ALL-NEXT: movq 16(%rdi), %rdx
606 ; ALL-NEXT: andl $3, %edx
607 ; ALL-NEXT: movq 24(%rdi), %rsi
608 ; ALL-NEXT: andl $3, %esi
609 ; ALL-NEXT: vmovaps %ymm0, (%rsp)
610 ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
611 ; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
612 ; ALL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
613 ; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
614 ; ALL-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
615 ; ALL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
616 ; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
617 ; ALL-NEXT: movq %rbp, %rsp
618 ; ALL-NEXT: popq %rbp
619 ; ALL-NEXT: retq
684620 %p0 = getelementptr inbounds i64, i64* %i, i32 0
685621 %p1 = getelementptr inbounds i64, i64* %i, i32 1
686622 %p2 = getelementptr inbounds i64, i64* %i, i32 2
701637 }
702638
703639 define <4 x i64> @mem_shuffle_v4i64_v2i64_xxxx_i64(<2 x i64> %x, i64* %i) nounwind {
704 ; AVX1-LABEL: mem_shuffle_v4i64_v2i64_xxxx_i64:
705 ; AVX1: # BB#0:
706 ; AVX1-NEXT: movq (%rdi), %rax
707 ; AVX1-NEXT: movq 8(%rdi), %rcx
708 ; AVX1-NEXT: andl $1, %eax
709 ; AVX1-NEXT: andl $1, %ecx
710 ; AVX1-NEXT: movq 16(%rdi), %rdx
711 ; AVX1-NEXT: andl $1, %edx
712 ; AVX1-NEXT: movq 24(%rdi), %rsi
713 ; AVX1-NEXT: andl $1, %esi
714 ; AVX1-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
715 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
716 ; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
717 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
718 ; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
719 ; AVX1-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
720 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
721 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
722 ; AVX1-NEXT: retq
723 ;
724 ; AVX2-LABEL: mem_shuffle_v4i64_v2i64_xxxx_i64:
725 ; AVX2: # BB#0:
726 ; AVX2-NEXT: movq (%rdi), %rax
727 ; AVX2-NEXT: movq 8(%rdi), %rcx
728 ; AVX2-NEXT: andl $1, %eax
729 ; AVX2-NEXT: andl $1, %ecx
730 ; AVX2-NEXT: movq 16(%rdi), %rdx
731 ; AVX2-NEXT: andl $1, %edx
732 ; AVX2-NEXT: movq 24(%rdi), %rsi
733 ; AVX2-NEXT: andl $1, %esi
734 ; AVX2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
735 ; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
736 ; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
737 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
738 ; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
739 ; AVX2-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
740 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
741 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
742 ; AVX2-NEXT: retq
640 ; ALL-LABEL: mem_shuffle_v4i64_v2i64_xxxx_i64:
641 ; ALL: # BB#0:
642 ; ALL-NEXT: movq (%rdi), %rax
643 ; ALL-NEXT: movq 8(%rdi), %rcx
644 ; ALL-NEXT: andl $1, %eax
645 ; ALL-NEXT: andl $1, %ecx
646 ; ALL-NEXT: movq 16(%rdi), %rdx
647 ; ALL-NEXT: andl $1, %edx
648 ; ALL-NEXT: movq 24(%rdi), %rsi
649 ; ALL-NEXT: andl $1, %esi
650 ; ALL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
651 ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
652 ; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
653 ; ALL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
654 ; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
655 ; ALL-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
656 ; ALL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
657 ; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
658 ; ALL-NEXT: retq
743659 %p0 = getelementptr inbounds i64, i64* %i, i32 0
744660 %p1 = getelementptr inbounds i64, i64* %i, i32 1
745661 %p2 = getelementptr inbounds i64, i64* %i, i32 2
771771 define <2 x i64> @zext_4i32_to_2i64(<4 x i32> %A) nounwind uwtable readnone ssp {
772772 ; SSE2-LABEL: zext_4i32_to_2i64:
773773 ; SSE2: # BB#0: # %entry
774 ; SSE2-NEXT: pxor %xmm1, %xmm1
775 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
774 ; SSE2-NEXT: xorps %xmm1, %xmm1
775 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
776776 ; SSE2-NEXT: retq
777777 ;
778778 ; SSSE3-LABEL: zext_4i32_to_2i64:
779779 ; SSSE3: # BB#0: # %entry
780 ; SSSE3-NEXT: pxor %xmm1, %xmm1
781 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
780 ; SSSE3-NEXT: xorps %xmm1, %xmm1
781 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
782782 ; SSSE3-NEXT: retq
783783 ;
784784 ; SSE41-LABEL: zext_4i32_to_2i64:
799799 define <4 x i64> @zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
800800 ; SSE2-LABEL: zext_4i32_to_4i64:
801801 ; SSE2: # BB#0: # %entry
802 ; SSE2-NEXT: movdqa %xmm0, %xmm1
803 ; SSE2-NEXT: pxor %xmm2, %xmm2
804 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
805 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
802 ; SSE2-NEXT: movaps %xmm0, %xmm1
803 ; SSE2-NEXT: xorps %xmm2, %xmm2
804 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
805 ; SSE2-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
806806 ; SSE2-NEXT: retq
807807 ;
808808 ; SSSE3-LABEL: zext_4i32_to_4i64:
809809 ; SSSE3: # BB#0: # %entry
810 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
811 ; SSSE3-NEXT: pxor %xmm2, %xmm2
812 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
813 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
810 ; SSSE3-NEXT: movaps %xmm0, %xmm1
811 ; SSSE3-NEXT: xorps %xmm2, %xmm2
812 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
813 ; SSSE3-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
814814 ; SSSE3-NEXT: retq
815815 ;
816816 ; SSE41-LABEL: zext_4i32_to_4i64:
846846 define <8 x i64> @zext_8i32_to_8i64(<8 x i32> %A) nounwind uwtable readnone ssp {
847847 ; SSE2-LABEL: zext_8i32_to_8i64:
848848 ; SSE2: # BB#0: # %entry
849 ; SSE2-NEXT: movdqa %xmm1, %xmm3
850 ; SSE2-NEXT: movdqa %xmm0, %xmm1
851 ; SSE2-NEXT: pxor %xmm4, %xmm4
852 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
853 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
854 ; SSE2-NEXT: movdqa %xmm3, %xmm2
855 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
856 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
849 ; SSE2-NEXT: movaps %xmm1, %xmm3
850 ; SSE2-NEXT: movaps %xmm0, %xmm1
851 ; SSE2-NEXT: xorps %xmm4, %xmm4
852 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
853 ; SSE2-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
854 ; SSE2-NEXT: movaps %xmm3, %xmm2
855 ; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
856 ; SSE2-NEXT: unpckhps {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
857857 ; SSE2-NEXT: retq
858858 ;
859859 ; SSSE3-LABEL: zext_8i32_to_8i64:
860860 ; SSSE3: # BB#0: # %entry
861 ; SSSE3-NEXT: movdqa %xmm1, %xmm3
862 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
863 ; SSSE3-NEXT: pxor %xmm4, %xmm4
864 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
865 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
866 ; SSSE3-NEXT: movdqa %xmm3, %xmm2
867 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
868 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
861 ; SSSE3-NEXT: movaps %xmm1, %xmm3
862 ; SSSE3-NEXT: movaps %xmm0, %xmm1
863 ; SSSE3-NEXT: xorps %xmm4, %xmm4
864 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
865 ; SSSE3-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
866 ; SSSE3-NEXT: movaps %xmm3, %xmm2
867 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
868 ; SSSE3-NEXT: unpckhps {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
869869 ; SSSE3-NEXT: retq
870870 ;
871871 ; SSE41-LABEL: zext_8i32_to_8i64:
14311431 define <2 x i64> @load_zext_2i32_to_2i64(<2 x i32> *%ptr) {
14321432 ; SSE2-LABEL: load_zext_2i32_to_2i64:
14331433 ; SSE2: # BB#0: # %entry
1434 ; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1435 ; SSE2-NEXT: pxor %xmm1, %xmm1
1436 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1434 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1435 ; SSE2-NEXT: xorps %xmm1, %xmm1
1436 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
14371437 ; SSE2-NEXT: retq
14381438 ;
14391439 ; SSSE3-LABEL: load_zext_2i32_to_2i64:
14401440 ; SSSE3: # BB#0: # %entry
1441 ; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1442 ; SSSE3-NEXT: pxor %xmm1, %xmm1
1443 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1441 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1442 ; SSSE3-NEXT: xorps %xmm1, %xmm1
1443 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
14441444 ; SSSE3-NEXT: retq
14451445 ;
14461446 ; SSE41-LABEL: load_zext_2i32_to_2i64:
14611461 define <4 x i64> @load_zext_4i32_to_4i64(<4 x i32> *%ptr) {
14621462 ; SSE2-LABEL: load_zext_4i32_to_4i64:
14631463 ; SSE2: # BB#0: # %entry
1464 ; SSE2-NEXT: movdqa (%rdi), %xmm1
1465 ; SSE2-NEXT: pxor %xmm2, %xmm2
1466 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1467 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1468 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1464 ; SSE2-NEXT: movaps (%rdi), %xmm1
1465 ; SSE2-NEXT: xorps %xmm2, %xmm2
1466 ; SSE2-NEXT: movaps %xmm1, %xmm0
1467 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1468 ; SSE2-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
14691469 ; SSE2-NEXT: retq
14701470 ;
14711471 ; SSSE3-LABEL: load_zext_4i32_to_4i64:
14721472 ; SSSE3: # BB#0: # %entry
1473 ; SSSE3-NEXT: movdqa (%rdi), %xmm1
1474 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1475 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1476 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1477 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1473 ; SSSE3-NEXT: movaps (%rdi), %xmm1
1474 ; SSSE3-NEXT: xorps %xmm2, %xmm2
1475 ; SSSE3-NEXT: movaps %xmm1, %xmm0
1476 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1477 ; SSSE3-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
14781478 ; SSSE3-NEXT: retq
14791479 ;
14801480 ; SSE41-LABEL: load_zext_4i32_to_4i64:
16111611 define <4 x i64> @shuf_zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
16121612 ; SSE2-LABEL: shuf_zext_4i32_to_4i64:
16131613 ; SSE2: # BB#0: # %entry
1614 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1615 ; SSE2-NEXT: pxor %xmm2, %xmm2
1616 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1617 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1614 ; SSE2-NEXT: movaps %xmm0, %xmm1
1615 ; SSE2-NEXT: xorps %xmm2, %xmm2
1616 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1617 ; SSE2-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
16181618 ; SSE2-NEXT: retq
16191619 ;
16201620 ; SSSE3-LABEL: shuf_zext_4i32_to_4i64:
16211621 ; SSSE3: # BB#0: # %entry
1622 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
1623 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1624 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1625 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1622 ; SSSE3-NEXT: movaps %xmm0, %xmm1
1623 ; SSSE3-NEXT: xorps %xmm2, %xmm2
1624 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1625 ; SSSE3-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
16261626 ; SSSE3-NEXT: retq
16271627 ;
16281628 ; SSE41-LABEL: shuf_zext_4i32_to_4i64:
20312031 define <2 x i64> @shuf_zext_4i32_to_2i64_offset2(<4 x i32> %A) nounwind uwtable readnone ssp {
20322032 ; SSE-LABEL: shuf_zext_4i32_to_2i64_offset2:
20332033 ; SSE: # BB#0: # %entry
2034 ; SSE-NEXT: pxor %xmm1, %xmm1
2035 ; SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2034 ; SSE-NEXT: xorps %xmm1, %xmm1
2035 ; SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
20362036 ; SSE-NEXT: retq
20372037 ;
20382038 ; AVX-LABEL: shuf_zext_4i32_to_2i64_offset2:
20392039 ; AVX: # BB#0: # %entry
2040 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
2041 ; AVX-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2040 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
2041 ; AVX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
20422042 ; AVX-NEXT: retq
20432043 entry:
20442044 %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <4 x i32>
1414 ; X64-LABEL: convert:
1515 ; X64: # BB#0: # %entry
1616 ; X64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
17 ; X64-NEXT: movapd %xmm0, (%rdi)
17 ; X64-NEXT: movaps %xmm0, (%rdi)
1818 ; X64-NEXT: retq
1919 entry:
2020 %val = shufflevector <3 x double> %src, <3 x double> undef, <2 x i32>
114114 define <4 x i64> @load_factori64_4(<16 x i64>* %ptr) {
115115 ; AVX1-LABEL: load_factori64_4:
116116 ; AVX1: # BB#0:
117 ; AVX1-NEXT: vmovupd (%rdi), %ymm0
118 ; AVX1-NEXT: vmovupd 32(%rdi), %ymm1
119 ; AVX1-NEXT: vmovupd 64(%rdi), %ymm2
120 ; AVX1-NEXT: vmovupd 96(%rdi), %ymm3
117 ; AVX1-NEXT: vmovups (%rdi), %ymm0
118 ; AVX1-NEXT: vmovups 32(%rdi), %ymm1
119 ; AVX1-NEXT: vmovups 64(%rdi), %ymm2
120 ; AVX1-NEXT: vmovups 96(%rdi), %ymm3
121121 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm0[0,1],ymm2[0,1]
122122 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm5 = ymm1[0,1],ymm3[0,1]
123123 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm2[2,3]
179179 ; AVX1-NEXT: vunpcklpd {{.*#+}} ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
180180 ; AVX1-NEXT: vunpckhpd {{.*#+}} ymm4 = ymm4[1],ymm5[1],ymm4[3],ymm5[3]
181181 ; AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
182 ; AVX1-NEXT: vmovupd %ymm0, 96(%rdi)
183 ; AVX1-NEXT: vmovupd %ymm3, 64(%rdi)
184 ; AVX1-NEXT: vmovupd %ymm4, 32(%rdi)
185 ; AVX1-NEXT: vmovupd %ymm2, (%rdi)
182 ; AVX1-NEXT: vmovups %ymm0, 96(%rdi)
183 ; AVX1-NEXT: vmovups %ymm3, 64(%rdi)
184 ; AVX1-NEXT: vmovups %ymm4, 32(%rdi)
185 ; AVX1-NEXT: vmovups %ymm2, (%rdi)
186186 ; AVX1-NEXT: vzeroupper
187187 ; AVX1-NEXT: retq
188188 ;
196196 ; AVX2-NEXT: vunpcklpd {{.*#+}} ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
197197 ; AVX2-NEXT: vunpckhpd {{.*#+}} ymm4 = ymm4[1],ymm5[1],ymm4[3],ymm5[3]
198198 ; AVX2-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
199 ; AVX2-NEXT: vmovupd %ymm0, 96(%rdi)
200 ; AVX2-NEXT: vmovupd %ymm3, 64(%rdi)
201 ; AVX2-NEXT: vmovupd %ymm4, 32(%rdi)
202 ; AVX2-NEXT: vmovupd %ymm2, (%rdi)
199 ; AVX2-NEXT: vmovups %ymm0, 96(%rdi)
200 ; AVX2-NEXT: vmovups %ymm3, 64(%rdi)
201 ; AVX2-NEXT: vmovups %ymm4, 32(%rdi)
202 ; AVX2-NEXT: vmovups %ymm2, (%rdi)
203203 ; AVX2-NEXT: vzeroupper
204204 ; AVX2-NEXT: retq
205205 ;
215215 ; AVX512-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
216216 ; AVX512-NEXT: vinsertf64x4 $1, %ymm4, %zmm2, %zmm1
217217 ; AVX512-NEXT: vinsertf64x4 $1, %ymm0, %zmm3, %zmm0
218 ; AVX512-NEXT: vmovupd %zmm0, 64(%rdi)
219 ; AVX512-NEXT: vmovupd %zmm1, (%rdi)
218 ; AVX512-NEXT: vmovups %zmm0, 64(%rdi)
219 ; AVX512-NEXT: vmovups %zmm1, (%rdi)
220220 ; AVX512-NEXT: vzeroupper
221221 ; AVX512-NEXT: retq
222222 %s0 = shufflevector <4 x double> %v0, <4 x double> %v1, <8 x i32>
237237 ; AVX1-NEXT: vunpcklpd {{.*#+}} ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
238238 ; AVX1-NEXT: vunpckhpd {{.*#+}} ymm4 = ymm4[1],ymm5[1],ymm4[3],ymm5[3]
239239 ; AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
240 ; AVX1-NEXT: vmovupd %ymm0, 96(%rdi)
241 ; AVX1-NEXT: vmovupd %ymm3, 64(%rdi)
242 ; AVX1-NEXT: vmovupd %ymm4, 32(%rdi)
243 ; AVX1-NEXT: vmovupd %ymm2, (%rdi)
240 ; AVX1-NEXT: vmovups %ymm0, 96(%rdi)
241 ; AVX1-NEXT: vmovups %ymm3, 64(%rdi)
242 ; AVX1-NEXT: vmovups %ymm4, 32(%rdi)
243 ; AVX1-NEXT: vmovups %ymm2, (%rdi)
244244 ; AVX1-NEXT: vzeroupper
245245 ; AVX1-NEXT: retq
246246 ;
247247 ; AVX2-LABEL: store_factori64_4:
248248 ; AVX2: # BB#0:
249 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm4
250 ; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm5
251 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm2[2,3]
252 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm1[2,3],ymm3[2,3]
253 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} ymm2 = ymm4[0],ymm5[0],ymm4[2],ymm5[2]
254 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
255 ; AVX2-NEXT: vpunpckhqdq {{.*#+}} ymm4 = ymm4[1],ymm5[1],ymm4[3],ymm5[3]
256 ; AVX2-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
257 ; AVX2-NEXT: vmovdqu %ymm0, 96(%rdi)
258 ; AVX2-NEXT: vmovdqu %ymm3, 64(%rdi)
259 ; AVX2-NEXT: vmovdqu %ymm4, 32(%rdi)
260 ; AVX2-NEXT: vmovdqu %ymm2, (%rdi)
249 ; AVX2-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm4
250 ; AVX2-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm5
251 ; AVX2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm2[2,3]
252 ; AVX2-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3],ymm3[2,3]
253 ; AVX2-NEXT: vunpcklpd {{.*#+}} ymm2 = ymm4[0],ymm5[0],ymm4[2],ymm5[2]
254 ; AVX2-NEXT: vunpcklpd {{.*#+}} ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
255 ; AVX2-NEXT: vunpckhpd {{.*#+}} ymm4 = ymm4[1],ymm5[1],ymm4[3],ymm5[3]
256 ; AVX2-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
257 ; AVX2-NEXT: vmovups %ymm0, 96(%rdi)
258 ; AVX2-NEXT: vmovups %ymm3, 64(%rdi)
259 ; AVX2-NEXT: vmovups %ymm4, 32(%rdi)
260 ; AVX2-NEXT: vmovups %ymm2, (%rdi)
261261 ; AVX2-NEXT: vzeroupper
262262 ; AVX2-NEXT: retq
263263 ;
264264 ; AVX512-LABEL: store_factori64_4:
265265 ; AVX512: # BB#0:
266 ; AVX512-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm4
267 ; AVX512-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm5
268 ; AVX512-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm2[2,3]
269 ; AVX512-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm1[2,3],ymm3[2,3]
270 ; AVX512-NEXT: vpunpcklqdq {{.*#+}} ymm2 = ymm4[0],ymm5[0],ymm4[2],ymm5[2]
271 ; AVX512-NEXT: vpunpcklqdq {{.*#+}} ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
272 ; AVX512-NEXT: vpunpckhqdq {{.*#+}} ymm4 = ymm4[1],ymm5[1],ymm4[3],ymm5[3]
273 ; AVX512-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
274 ; AVX512-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm1
275 ; AVX512-NEXT: vinserti64x4 $1, %ymm0, %zmm3, %zmm0
276 ; AVX512-NEXT: vmovdqu64 %zmm0, 64(%rdi)
277 ; AVX512-NEXT: vmovdqu64 %zmm1, (%rdi)
266 ; AVX512-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm4
267 ; AVX512-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm5
268 ; AVX512-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm2[2,3]
269 ; AVX512-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3],ymm3[2,3]
270 ; AVX512-NEXT: vunpcklpd {{.*#+}} ymm2 = ymm4[0],ymm5[0],ymm4[2],ymm5[2]
271 ; AVX512-NEXT: vunpcklpd {{.*#+}} ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
272 ; AVX512-NEXT: vunpckhpd {{.*#+}} ymm4 = ymm4[1],ymm5[1],ymm4[3],ymm5[3]
273 ; AVX512-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
274 ; AVX512-NEXT: vinsertf64x4 $1, %ymm4, %zmm2, %zmm1
275 ; AVX512-NEXT: vinsertf64x4 $1, %ymm0, %zmm3, %zmm0
276 ; AVX512-NEXT: vmovups %zmm0, 64(%rdi)
277 ; AVX512-NEXT: vmovups %zmm1, (%rdi)
278278 ; AVX512-NEXT: vzeroupper
279279 ; AVX512-NEXT: retq
280280 %s0 = shufflevector <4 x i64> %v0, <4 x i64> %v1, <8 x i32>
9999 define <2 x double> @vpermil2pd_21(<2 x double> %a0, <2 x double> %a1) {
100100 ; X32-LABEL: vpermil2pd_21:
101101 ; X32: # BB#0:
102 ; X32-NEXT: vxorpd %xmm1, %xmm1, %xmm1
102 ; X32-NEXT: vxorps %xmm1, %xmm1, %xmm1
103103 ; X32-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
104104 ; X32-NEXT: retl
105105 ;
106106 ; X64-LABEL: vpermil2pd_21:
107107 ; X64: # BB#0:
108 ; X64-NEXT: vxorpd %xmm1, %xmm1, %xmm1
108 ; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
109109 ; X64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
110110 ; X64-NEXT: retq
111111 %1 = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x i64> , i8 2)