llvm.org GIT mirror llvm / e37a539
Merging r371305 and r371307: ------------------------------------------------------------------------ r371305 | nikic | 2019-09-07 14:03:48 +0200 (Sat, 07 Sep 2019) | 1 line [X86] Add test for PR43230; NFC ------------------------------------------------------------------------ ------------------------------------------------------------------------ r371307 | nikic | 2019-09-07 14:13:44 +0200 (Sat, 07 Sep 2019) | 9 lines [X86] Fix pshuflw formation from repeated shuffle mask (PR43230) Fix for https://bugs.llvm.org/show_bug.cgi?id=43230. When creating PSHUFLW from a repeated shuffle mask, we have to apply the checks to the repeated mask, not the original one. For the test case from PR43230 the inspected part of the original mask is all undef. Differential Revision: https://reviews.llvm.org/D67314 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_90@371378 91177308-0d34-0410-b5e6-96231b3b80d8 Hans Wennborg 1 year, 2 months ago
2 changed file(s) with 43 addition(s) and 2 deletion(s). Raw diff Collapse all Expand all
3166331663 if (!ContainsZeros && AllowIntDomain && MaskScalarSizeInBits == 16) {
3166431664 SmallVector RepeatedMask;
3166531665 if (is128BitLaneRepeatedShuffleMask(MaskEltVT, Mask, RepeatedMask)) {
31666 ArrayRef LoMask(Mask.data() + 0, 4);
31667 ArrayRef HiMask(Mask.data() + 4, 4);
31666 ArrayRef LoMask(RepeatedMask.data() + 0, 4);
31667 ArrayRef HiMask(RepeatedMask.data() + 4, 4);
3166831668
3166931669 // PSHUFLW: permute lower 4 elements only.
3167031670 if (isUndefOrInRange(LoMask, 0, 4) &&
47534753 ret <16 x i16> %unpckh
47544754 }
47554755
4756 define <16 x i16> @pr43230(<16 x i16> %a, <16 x i16> %b) {
4757 ; AVX1-LABEL: pr43230:
4758 ; AVX1: # %bb.0:
4759 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
4760 ; AVX1-NEXT: vpsllw $12, %xmm1, %xmm2
4761 ; AVX1-NEXT: vpsllw $4, %xmm1, %xmm1
4762 ; AVX1-NEXT: vpor %xmm2, %xmm1, %xmm1
4763 ; AVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm2
4764 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
4765 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm3
4766 ; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
4767 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm1
4768 ; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
4769 ; AVX1-NEXT: vpsrlw $2, %xmm0, %xmm1
4770 ; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2
4771 ; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
4772 ; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm1
4773 ; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2
4774 ; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
4775 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
4776 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
4777 ; AVX1-NEXT: retq
4778 ;
4779 ; AVX2-LABEL: pr43230:
4780 ; AVX2: # %bb.0:
4781 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
4782 ; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm1 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
4783 ; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
4784 ; AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
4785 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[26,27],zero,zero
4786 ; AVX2-NEXT: retq
4787 ;
4788 ; AVX512VL-LABEL: pr43230:
4789 ; AVX512VL: # %bb.0:
4790 ; AVX512VL-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0
4791 ; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
4792 ; AVX512VL-NEXT: retq
4793 %shr = lshr <16 x i16> %a, %b
4794 %shuf = shufflevector <16 x i16> zeroinitializer, <16 x i16> %shr, <16 x i32>
4795 ret <16 x i16> %shuf
4796 }