llvm.org GIT mirror llvm / 28a9d63
Merging r354034 and r354117: ------------------------------------------------------------------------ r354034 | rksimon | 2019-02-14 15:45:32 +0100 (Thu, 14 Feb 2019) | 1 line [X86][AVX] Add PR40730 test case ------------------------------------------------------------------------ ------------------------------------------------------------------------ r354117 | rksimon | 2019-02-15 12:39:21 +0100 (Fri, 15 Feb 2019) | 9 lines [X86][AVX] lowerShuffleAsLanePermuteAndPermute - fully populate the lane shuffle mask (PR40730) As detailed on PR40730, we are not correctly filling in the lane shuffle mask (D53148/rL344446) - we fill in for the correct src lane but don't add it to the correct mask element, so any reference to the correct element is likely to see an UNDEF mask index. This allows constant folding to propagate UNDEFs prior to the lane mask being (correctly) lowered to vperm2f128. This patch fixes the issue by fully populating the lane shuffle mask - this is more than is necessary (if we only filled in the required mask elements we might be able to match other shuffle instructions - broadcasts etc.), but its the most cautious approach as this needs to be cherrypicked into the 8.0.0 release branch. Differential Revision: https://reviews.llvm.org/D58237 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_80@354260 91177308-0d34-0410-b5e6-96231b3b80d8 Hans Wennborg 1 year, 9 months ago
2 changed file(s) with 47 addition(s) and 2 deletion(s). Raw diff Collapse all Expand all
1388313883 int NumEltsPerLane = NumElts / NumLanes;
1388413884
1388513885 SmallVector SrcLaneMask(NumLanes, SM_SentinelUndef);
13886 SmallVector LaneMask(NumElts, SM_SentinelUndef);
1388713886 SmallVector PermMask(NumElts, SM_SentinelUndef);
1388813887
1388913888 for (int i = 0; i != NumElts; ++i) {
1389813897 return SDValue();
1389913898 SrcLaneMask[DstLane] = SrcLane;
1390013899
13901 LaneMask[i] = (SrcLane * NumEltsPerLane) + (i % NumEltsPerLane);
1390213900 PermMask[i] = (DstLane * NumEltsPerLane) + (M % NumEltsPerLane);
13901 }
13902
13903 // Make sure we set all elements of the lane mask, to avoid undef propagation.
13904 SmallVector LaneMask(NumElts, SM_SentinelUndef);
13905 for (int DstLane = 0; DstLane != NumLanes; ++DstLane) {
13906 int SrcLane = SrcLaneMask[DstLane];
13907 if (0 <= SrcLane)
13908 for (int j = 0; j != NumEltsPerLane; ++j) {
13909 LaneMask[(DstLane * NumEltsPerLane) + j] =
13910 (SrcLane * NumEltsPerLane) + j;
13911 }
1390313912 }
1390413913
1390513914 // If we're only shuffling a single lowest lane and the rest are identity
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
2
3 define <8 x i32> @shuffle_v8i32_0dcd3f14(<8 x i32> %a, <8 x i32> %b) {
4 ; CHECK-LABEL: shuffle_v8i32_0dcd3f14:
5 ; CHECK: # %bb.0:
6 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm2
7 ; CHECK-NEXT: vblendps {{.*#+}} xmm2 = xmm2[0],xmm0[1,2,3]
8 ; CHECK-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,1,0]
9 ; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
10 ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,2,3]
11 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[0,0,3,2]
12 ; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5],ymm0[6,7]
13 ; CHECK-NEXT: retq
14 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32>
15 ret <8 x i32> %shuffle
16 }
17
18 ; CHECK: .LCPI1_0:
19 ; CHECK-NEXT: .quad 60129542157
20 ; CHECK-NEXT: .quad 60129542157
21 ; CHECK-NEXT: .quad 68719476736
22 ; CHECK-NEXT: .quad 60129542157
23
24 define <8 x i32> @shuffle_v8i32_0dcd3f14_constant(<8 x i32> %a0) {
25 ; CHECK-LABEL: shuffle_v8i32_0dcd3f14_constant:
26 ; CHECK: # %bb.0:
27 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
28 ; CHECK-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3]
29 ; CHECK-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,1,0]
30 ; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
31 ; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],mem[1,2,3],ymm0[4],mem[5],ymm0[6,7]
32 ; CHECK-NEXT: retq
33 %res = shufflevector <8 x i32> %a0, <8 x i32> , <8 x i32>
34 ret <8 x i32> %res
35 }