llvm.org GIT mirror llvm / 2e1720f
[X86][AVX512] Add support for lowering shuffles to MOVDDUP/MOVSLDUP/MOVSHDUP git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@274436 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 4 years ago
3 changed file(s) with 23 addition(s) and 8 deletion(s). Raw diff Collapse all Expand all
1171511715 assert(V2.getSimpleValueType() == MVT::v8f64 && "Bad operand type!");
1171611716 assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
1171711717
11718 if (V2.isUndef()) {
11719 // Use low duplicate instructions for masks that match their pattern.
11720 if (isShuffleEquivalent(V1, V2, Mask, {0, 0, 2, 2, 4, 4, 6, 6}))
11721 return DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v8f64, V1);
11722 }
11723
1171811724 if (SDValue Shuf128 =
1171911725 lowerV4X128VectorShuffle(DL, MVT::v8f64, Mask, V1, V2, DAG))
1172011726 return Shuf128;
1173411740 assert(V1.getSimpleValueType() == MVT::v16f32 && "Bad operand type!");
1173511741 assert(V2.getSimpleValueType() == MVT::v16f32 && "Bad operand type!");
1173611742 assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
11743
11744 // If the shuffle mask is repeated in each 128-bit lane, we have many more
11745 // options to efficiently lower the shuffle.
11746 SmallVector RepeatedMask;
11747 if (is128BitLaneRepeatedShuffleMask(MVT::v16f32, Mask, RepeatedMask)) {
11748 assert(RepeatedMask.size() == 4 && "Unexpected repeated mask size!");
11749
11750 // Use even/odd duplicate instructions for masks that match their pattern.
11751 if (isShuffleEquivalent(V1, V2, RepeatedMask, {0, 0, 2, 2}))
11752 return DAG.getNode(X86ISD::MOVSLDUP, DL, MVT::v16f32, V1);
11753 if (isShuffleEquivalent(V1, V2, RepeatedMask, {1, 1, 3, 3}))
11754 return DAG.getNode(X86ISD::MOVSHDUP, DL, MVT::v16f32, V1);
11755 }
1173711756
1173811757 if (SDValue Unpck =
1173911758 lowerVectorShuffleWithUNPCK(DL, MVT::v16f32, Mask, V1, V2, DAG))
9191 define <16 x float> @shuffle_v16f32_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14(<16 x float> %a, <16 x float> %b) {
9292 ; ALL-LABEL: shuffle_v16f32_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14:
9393 ; ALL: # BB#0:
94 ; ALL-NEXT: vmovdqa32 {{.*#+}} zmm1 = [0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
95 ; ALL-NEXT: vpermps %zmm0, %zmm1, %zmm0
94 ; ALL-NEXT: vmovsldup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
9695 ; ALL-NEXT: retq
9796 %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32>
9897 ret <16 x float> %shuffle
101100 define <16 x float> @shuffle_v16f32_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15(<16 x float> %a, <16 x float> %b) {
102101 ; ALL-LABEL: shuffle_v16f32_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15:
103102 ; ALL: # BB#0:
104 ; ALL-NEXT: vmovdqa32 {{.*#+}} zmm1 = [1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
105 ; ALL-NEXT: vpermps %zmm0, %zmm1, %zmm0
103 ; ALL-NEXT: vmovshdup {{.*#+}} zmm0 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
106104 ; ALL-NEXT: retq
107105 %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32>
108106 ret <16 x float> %shuffle
570570 ;
571571 ; AVX512F-LABEL: shuffle_v8f64_00224466:
572572 ; AVX512F: # BB#0:
573 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,2,2,4,4,6,6]
574 ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
573 ; AVX512F-NEXT: vmovddup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6]
575574 ; AVX512F-NEXT: retq
576575 ;
577576 ; AVX512F-32-LABEL: shuffle_v8f64_00224466:
578577 ; AVX512F-32: # BB#0:
579 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,2,0,4,0,4,0,6,0,6,0]
580 ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
578 ; AVX512F-32-NEXT: vmovddup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6]
581579 ; AVX512F-32-NEXT: retl
582580 %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
583581 ret <8 x double> %shuffle