llvm.org GIT mirror llvm / 41188e6
[X86][AVX512] Add support for 512-bit shuffle lowering to VPERMPD/VPERMQ git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@274473 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 4 years ago
2 changed file(s) with 78 addition(s) and 89 deletion(s). Raw diff Collapse all Expand all
70397039 return false;
70407040 }
70417041
7042 /// \brief Test whether a shuffle mask is equivalent within each 128-bit lane.
7042 /// \brief Test whether a shuffle mask is equivalent within each sub-lane.
70437043 ///
70447044 /// This checks a shuffle mask to see if it is performing the same
7045 /// 128-bit lane-relative shuffle in each 128-bit lane. This trivially implies
7045 /// lane-relative shuffle in each sub-lane. This trivially implies
70467046 /// that it is also not lane-crossing. It may however involve a blend from the
70477047 /// same lane of a second vector.
70487048 ///
70507050 /// non-trivial to compute in the face of undef lanes. The representation is
70517051 /// suitable for use with existing 128-bit shuffles as entries from the second
70527052 /// vector have been remapped to [LaneSize, 2*LaneSize).
7053 static bool
7054 is128BitLaneRepeatedShuffleMask(MVT VT, ArrayRef Mask,
7055 SmallVectorImpl &RepeatedMask) {
7056 int LaneSize = 128 / VT.getScalarSizeInBits();
7053 static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT,
7054 ArrayRef Mask,
7055 SmallVectorImpl &RepeatedMask) {
7056 int LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
70577057 RepeatedMask.assign(LaneSize, -1);
70587058 int Size = Mask.size();
70597059 for (int i = 0; i < Size; ++i) {
70757075 return false;
70767076 }
70777077 return true;
7078 }
7079
7080 /// Test whether a shuffle mask is equivalent within each 128-bit lane.
7081 static bool
7082 is128BitLaneRepeatedShuffleMask(MVT VT, ArrayRef Mask,
7083 SmallVectorImpl &RepeatedMask) {
7084 return isRepeatedShuffleMask(128, VT, Mask, RepeatedMask);
7085 }
7086
7087 /// Test whether a shuffle mask is equivalent within each 256-bit lane.
7088 static bool
7089 is256BitLaneRepeatedShuffleMask(MVT VT, ArrayRef Mask,
7090 SmallVectorImpl &RepeatedMask) {
7091 return isRepeatedShuffleMask(256, VT, Mask, RepeatedMask);
70787092 }
70797093
70807094 /// \brief Checks whether a shuffle mask is equivalent to an explicit list of
1173111745 return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v8f64, V1,
1173211746 DAG.getConstant(VPERMILPMask, DL, MVT::i8));
1173311747 }
11748
11749 SmallVector RepeatedMask;
11750 if (is256BitLaneRepeatedShuffleMask(MVT::v8f64, Mask, RepeatedMask))
11751 return DAG.getNode(X86ISD::VPERMI, DL, MVT::v8f64, V1,
11752 getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG));
1173411753 }
1173511754
1173611755 if (SDValue Shuf128 =
1179011809 lowerV4X128VectorShuffle(DL, MVT::v8i64, Mask, V1, V2, DAG))
1179111810 return Shuf128;
1179211811
11793 // When the shuffle is mirrored between the 128-bit lanes of the unit, we can
11794 // use lower latency instructions that will operate on both 128-bit lanes.
11795 SmallVector RepeatedMask;
11796 if (is128BitLaneRepeatedShuffleMask(MVT::v8i64, Mask, RepeatedMask)) {
11797 if (V2.isUndef()) {
11812 if (V2.isUndef()) {
11813 // When the shuffle is mirrored between the 128-bit lanes of the unit, we
11814 // can use lower latency instructions that will operate on all four
11815 // 128-bit lanes.
11816 SmallVector Repeated128Mask;
11817 if (is128BitLaneRepeatedShuffleMask(MVT::v8i64, Mask, Repeated128Mask)) {
1179811818 int PSHUFDMask[] = {-1, -1, -1, -1};
1179911819 for (int i = 0; i < 2; ++i)
11800 if (RepeatedMask[i] >= 0) {
11801 PSHUFDMask[2 * i] = 2 * RepeatedMask[i];
11802 PSHUFDMask[2 * i + 1] = 2 * RepeatedMask[i] + 1;
11820 if (Repeated128Mask[i] >= 0) {
11821 PSHUFDMask[2 * i] = 2 * Repeated128Mask[i];
11822 PSHUFDMask[2 * i + 1] = 2 * Repeated128Mask[i] + 1;
1180311823 }
1180411824 return DAG.getBitcast(
1180511825 MVT::v8i64,
1180711827 DAG.getBitcast(MVT::v16i32, V1),
1180811828 getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG)));
1180911829 }
11830
11831 SmallVector Repeated256Mask;
11832 if (is256BitLaneRepeatedShuffleMask(MVT::v8i64, Mask, Repeated256Mask))
11833 return DAG.getNode(X86ISD::VPERMI, DL, MVT::v8i64, V1,
11834 getV4X86ShuffleImm8ForMask(Repeated256Mask, DL, DAG));
1181011835 }
1181111836
1181211837 // Try to use shift instructions.
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX512F
1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX512F
22 ; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX512F-32
3
4 target triple = "x86_64-unknown-unknown"
53
64 define <8 x double> @shuffle_v8f64_00000000(<8 x double> %a, <8 x double> %b) {
75 ; AVX512F-LABEL: shuffle_v8f64_00000000:
171169 define <8 x double> @shuffle_v8f64_01014545(<8 x double> %a, <8 x double> %b) {
172170 ; AVX512F-LABEL: shuffle_v8f64_01014545:
173171 ; AVX512F: # BB#0:
174 ; AVX512F-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,4,5,4,5]
172 ; AVX512F-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[0,1,0,1,4,5,4,5]
175173 ; AVX512F-NEXT: retq
176174 ;
177175 ; AVX512F-32-LABEL: shuffle_v8f64_01014545:
178176 ; AVX512F-32: # BB#0:
179 ; AVX512F-32-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,4,5,4,5]
177 ; AVX512F-32-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[0,1,0,1,4,5,4,5]
180178 ; AVX512F-32-NEXT: retl
181179 %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
182180 ret <8 x double> %shuffle
434432 ;
435433 ; AVX512F-LABEL: shuffle_v8f64_00014445:
436434 ; AVX512F: # BB#0:
437 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,1,4,4,4,5]
438 ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
435 ; AVX512F-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[0,0,0,1,4,4,4,5]
439436 ; AVX512F-NEXT: retq
440437 ;
441438 ; AVX512F-32-LABEL: shuffle_v8f64_00014445:
442439 ; AVX512F-32: # BB#0:
443 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,1,0,4,0,4,0,4,0,5,0]
444 ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
440 ; AVX512F-32-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[0,0,0,1,4,4,4,5]
445441 ; AVX512F-32-NEXT: retl
446442 %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
447443 ret <8 x double> %shuffle
451447 ;
452448 ; AVX512F-LABEL: shuffle_v8f64_00204464:
453449 ; AVX512F: # BB#0:
454 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,2,0,4,4,6,4]
455 ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
450 ; AVX512F-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[0,0,2,0,4,4,6,4]
456451 ; AVX512F-NEXT: retq
457452 ;
458453 ; AVX512F-32-LABEL: shuffle_v8f64_00204464:
459454 ; AVX512F-32: # BB#0:
460 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,0,0,4,0,4,0,6,0,4,0]
461 ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
455 ; AVX512F-32-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[0,0,2,0,4,4,6,4]
462456 ; AVX512F-32-NEXT: retl
463457 %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
464458 ret <8 x double> %shuffle
468462 ;
469463 ; AVX512F-LABEL: shuffle_v8f64_03004744:
470464 ; AVX512F: # BB#0:
471 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,3,0,0,4,7,4,4]
472 ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
465 ; AVX512F-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[0,3,0,0,4,7,4,4]
473466 ; AVX512F-NEXT: retq
474467 ;
475468 ; AVX512F-32-LABEL: shuffle_v8f64_03004744:
476469 ; AVX512F-32: # BB#0:
477 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,3,0,0,0,0,0,4,0,7,0,4,0,4,0]
478 ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
470 ; AVX512F-32-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[0,3,0,0,4,7,4,4]
479471 ; AVX512F-32-NEXT: retl
480472 %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
481473 ret <8 x double> %shuffle
485477 ;
486478 ; AVX512F-LABEL: shuffle_v8f64_10005444:
487479 ; AVX512F: # BB#0:
488 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,5,4,4,4]
489 ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
480 ; AVX512F-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[1,0,0,0,5,4,4,4]
490481 ; AVX512F-NEXT: retq
491482 ;
492483 ; AVX512F-32-LABEL: shuffle_v8f64_10005444:
493484 ; AVX512F-32: # BB#0:
494 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,0,0,0,0,5,0,4,0,4,0,4,0]
495 ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
485 ; AVX512F-32-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[1,0,0,0,5,4,4,4]
496486 ; AVX512F-32-NEXT: retl
497487 %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
498488 ret <8 x double> %shuffle
502492 ;
503493 ; AVX512F-LABEL: shuffle_v8f64_22006644:
504494 ; AVX512F: # BB#0:
505 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [2,2,0,0,6,6,4,4]
506 ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
495 ; AVX512F-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[2,2,0,0,6,6,4,4]
507496 ; AVX512F-NEXT: retq
508497 ;
509498 ; AVX512F-32-LABEL: shuffle_v8f64_22006644:
510499 ; AVX512F-32: # BB#0:
511 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [2,0,2,0,0,0,0,0,6,0,6,0,4,0,4,0]
512 ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
500 ; AVX512F-32-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[2,2,0,0,6,6,4,4]
513501 ; AVX512F-32-NEXT: retl
514502 %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
515503 ret <8 x double> %shuffle
519507 ;
520508 ; AVX512F-LABEL: shuffle_v8f64_33307774:
521509 ; AVX512F: # BB#0:
522 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,3,3,0,7,7,7,4]
523 ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
510 ; AVX512F-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[3,3,3,0,7,7,7,4]
524511 ; AVX512F-NEXT: retq
525512 ;
526513 ; AVX512F-32-LABEL: shuffle_v8f64_33307774:
527514 ; AVX512F-32: # BB#0:
528 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,0,3,0,3,0,0,0,7,0,7,0,7,0,4,0]
529 ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
515 ; AVX512F-32-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[3,3,3,0,7,7,7,4]
530516 ; AVX512F-32-NEXT: retl
531517 %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
532518 ret <8 x double> %shuffle
536522 ;
537523 ; AVX512F-LABEL: shuffle_v8f64_32107654:
538524 ; AVX512F: # BB#0:
539 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,2,1,0,7,6,5,4]
540 ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
525 ; AVX512F-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4]
541526 ; AVX512F-NEXT: retq
542527 ;
543528 ; AVX512F-32-LABEL: shuffle_v8f64_32107654:
544529 ; AVX512F-32: # BB#0:
545 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,0,2,0,1,0,0,0,7,0,6,0,5,0,4,0]
546 ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
530 ; AVX512F-32-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4]
547531 ; AVX512F-32-NEXT: retl
548532 %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
549533 ret <8 x double> %shuffle
14241408 ;
14251409 ; AVX512F-LABEL: shuffle_v8i64_00014445:
14261410 ; AVX512F: # BB#0:
1427 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,1,4,4,4,5]
1428 ; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
1411 ; AVX512F-NEXT: vpermq {{.*#+}} zmm0 = zmm0[0,0,0,1,4,4,4,5]
14291412 ; AVX512F-NEXT: retq
14301413 ;
14311414 ; AVX512F-32-LABEL: shuffle_v8i64_00014445:
14321415 ; AVX512F-32: # BB#0:
1433 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,1,0,4,0,4,0,4,0,5,0]
1434 ; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
1416 ; AVX512F-32-NEXT: vpermq {{.*#+}} zmm0 = zmm0[0,0,0,1,4,4,4,5]
14351417 ; AVX512F-32-NEXT: retl
14361418 %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32>
14371419 ret <8 x i64> %shuffle
14411423 ;
14421424 ; AVX512F-LABEL: shuffle_v8i64_00204464:
14431425 ; AVX512F: # BB#0:
1444 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,2,0,4,4,6,4]
1445 ; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
1426 ; AVX512F-NEXT: vpermq {{.*#+}} zmm0 = zmm0[0,0,2,0,4,4,6,4]
14461427 ; AVX512F-NEXT: retq
14471428 ;
14481429 ; AVX512F-32-LABEL: shuffle_v8i64_00204464:
14491430 ; AVX512F-32: # BB#0:
1450 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,0,0,4,0,4,0,6,0,4,0]
1451 ; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
1431 ; AVX512F-32-NEXT: vpermq {{.*#+}} zmm0 = zmm0[0,0,2,0,4,4,6,4]
14521432 ; AVX512F-32-NEXT: retl
14531433 %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32>
14541434 ret <8 x i64> %shuffle
14581438 ;
14591439 ; AVX512F-LABEL: shuffle_v8i64_03004744:
14601440 ; AVX512F: # BB#0:
1461 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,3,0,0,4,7,4,4]
1462 ; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
1441 ; AVX512F-NEXT: vpermq {{.*#+}} zmm0 = zmm0[0,3,0,0,4,7,4,4]
14631442 ; AVX512F-NEXT: retq
14641443 ;
14651444 ; AVX512F-32-LABEL: shuffle_v8i64_03004744:
14661445 ; AVX512F-32: # BB#0:
1467 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,3,0,0,0,0,0,4,0,7,0,4,0,4,0]
1468 ; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
1446 ; AVX512F-32-NEXT: vpermq {{.*#+}} zmm0 = zmm0[0,3,0,0,4,7,4,4]
14691447 ; AVX512F-32-NEXT: retl
14701448 %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32>
14711449 ret <8 x i64> %shuffle
14751453 ;
14761454 ; AVX512F-LABEL: shuffle_v8i64_10005444:
14771455 ; AVX512F: # BB#0:
1478 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,5,4,4,4]
1479 ; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
1456 ; AVX512F-NEXT: vpermq {{.*#+}} zmm0 = zmm0[1,0,0,0,5,4,4,4]
14801457 ; AVX512F-NEXT: retq
14811458 ;
14821459 ; AVX512F-32-LABEL: shuffle_v8i64_10005444:
14831460 ; AVX512F-32: # BB#0:
1484 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,0,0,0,0,5,0,4,0,4,0,4,0]
1485 ; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
1461 ; AVX512F-32-NEXT: vpermq {{.*#+}} zmm0 = zmm0[1,0,0,0,5,4,4,4]
14861462 ; AVX512F-32-NEXT: retl
14871463 %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32>
14881464 ret <8 x i64> %shuffle
14921468 ;
14931469 ; AVX512F-LABEL: shuffle_v8i64_22006644:
14941470 ; AVX512F: # BB#0:
1495 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [2,2,0,0,6,6,4,4]
1496 ; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
1471 ; AVX512F-NEXT: vpermq {{.*#+}} zmm0 = zmm0[2,2,0,0,6,6,4,4]
14971472 ; AVX512F-NEXT: retq
14981473 ;
14991474 ; AVX512F-32-LABEL: shuffle_v8i64_22006644:
15001475 ; AVX512F-32: # BB#0:
1501 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [2,0,2,0,0,0,0,0,6,0,6,0,4,0,4,0]
1502 ; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
1476 ; AVX512F-32-NEXT: vpermq {{.*#+}} zmm0 = zmm0[2,2,0,0,6,6,4,4]
15031477 ; AVX512F-32-NEXT: retl
15041478 %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32>
15051479 ret <8 x i64> %shuffle
15091483 ;
15101484 ; AVX512F-LABEL: shuffle_v8i64_33307774:
15111485 ; AVX512F: # BB#0:
1512 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,3,3,0,7,7,7,4]
1513 ; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
1486 ; AVX512F-NEXT: vpermq {{.*#+}} zmm0 = zmm0[3,3,3,0,7,7,7,4]
15141487 ; AVX512F-NEXT: retq
15151488 ;
15161489 ; AVX512F-32-LABEL: shuffle_v8i64_33307774:
15171490 ; AVX512F-32: # BB#0:
1518 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,0,3,0,3,0,0,0,7,0,7,0,7,0,4,0]
1519 ; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
1491 ; AVX512F-32-NEXT: vpermq {{.*#+}} zmm0 = zmm0[3,3,3,0,7,7,7,4]
15201492 ; AVX512F-32-NEXT: retl
15211493 %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32>
15221494 ret <8 x i64> %shuffle
15261498 ;
15271499 ; AVX512F-LABEL: shuffle_v8i64_32107654:
15281500 ; AVX512F: # BB#0:
1529 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,2,1,0,7,6,5,4]
1530 ; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
1501 ; AVX512F-NEXT: vpermq {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4]
15311502 ; AVX512F-NEXT: retq
15321503 ;
15331504 ; AVX512F-32-LABEL: shuffle_v8i64_32107654:
15341505 ; AVX512F-32: # BB#0:
1535 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,0,2,0,1,0,0,0,7,0,6,0,5,0,4,0]
1536 ; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
1506 ; AVX512F-32-NEXT: vpermq {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4]
15371507 ; AVX512F-32-NEXT: retl
15381508 %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32>
15391509 ret <8 x i64> %shuffle
15431513 ;
15441514 ; AVX512F-LABEL: shuffle_v8i64_00234467:
15451515 ; AVX512F: # BB#0:
1546 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,2,3,4,4,6,7]
1547 ; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
1516 ; AVX512F-NEXT: vpermq {{.*#+}} zmm0 = zmm0[0,0,2,3,4,4,6,7]
15481517 ; AVX512F-NEXT: retq
15491518 ;
15501519 ; AVX512F-32-LABEL: shuffle_v8i64_00234467:
15511520 ; AVX512F-32: # BB#0:
1552 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,3,0,4,0,4,0,6,0,7,0]
1553 ; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
1521 ; AVX512F-32-NEXT: vpermq {{.*#+}} zmm0 = zmm0[0,0,2,3,4,4,6,7]
15541522 ; AVX512F-32-NEXT: retl
15551523 %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32>
15561524 ret <8 x i64> %shuffle
16051573 ;
16061574 ; AVX512F-LABEL: shuffle_v8i64_10235467:
16071575 ; AVX512F: # BB#0:
1608 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,2,3,5,4,6,7]
1609 ; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
1576 ; AVX512F-NEXT: vpermq {{.*#+}} zmm0 = zmm0[1,0,2,3,5,4,6,7]
16101577 ; AVX512F-NEXT: retq
16111578 ;
16121579 ; AVX512F-32-LABEL: shuffle_v8i64_10235467:
16131580 ; AVX512F-32: # BB#0:
1614 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,2,0,3,0,5,0,4,0,6,0,7,0]
1615 ; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
1581 ; AVX512F-32-NEXT: vpermq {{.*#+}} zmm0 = zmm0[1,0,2,3,5,4,6,7]
16161582 ; AVX512F-32-NEXT: retl
16171583 %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32>
16181584 ret <8 x i64> %shuffle
16221588 ;
16231589 ; AVX512F-LABEL: shuffle_v8i64_10225466:
16241590 ; AVX512F: # BB#0:
1625 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,2,2,5,4,6,6]
1626 ; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
1591 ; AVX512F-NEXT: vpermq {{.*#+}} zmm0 = zmm0[1,0,2,2,5,4,6,6]
16271592 ; AVX512F-NEXT: retq
16281593 ;
16291594 ; AVX512F-32-LABEL: shuffle_v8i64_10225466:
16301595 ; AVX512F-32: # BB#0:
1631 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,2,0,2,0,5,0,4,0,6,0,6,0]
1632 ; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
1596 ; AVX512F-32-NEXT: vpermq {{.*#+}} zmm0 = zmm0[1,0,2,2,5,4,6,6]
16331597 ; AVX512F-32-NEXT: retl
16341598 %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32>
16351599 ret <8 x i64> %shuffle
22682232 define <8 x double> @shuffle_v8f64_2301uuuu(<8 x double> %a0, <8 x double> %a1) {
22692233 ; AVX512F-LABEL: shuffle_v8f64_2301uuuu:
22702234 ; AVX512F: # BB#0:
2271 ; AVX512F-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm1[2,3,0,1],zmm0[0,1,0,1]
2235 ; AVX512F-NEXT: vpermpd {{.*#+}} zmm0 = zmm1[2,3,0,1,6,7,4,5]
22722236 ; AVX512F-NEXT: retq
22732237 ;
22742238 ; AVX512F-32-LABEL: shuffle_v8f64_2301uuuu:
22752239 ; AVX512F-32: # BB#0:
2276 ; AVX512F-32-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm1[2,3,0,1],zmm0[0,1,0,1]
2240 ; AVX512F-32-NEXT: vpermpd {{.*#+}} zmm0 = zmm1[2,3,0,1,6,7,4,5]
22772241 ; AVX512F-32-NEXT: retl
22782242 %1 = shufflevector <8 x double> %a1, <8 x double> undef, <8 x i32>
22792243 ret <8 x double> %1