llvm.org GIT mirror llvm / 0e2037b
Add support for selecting 256-bit PALIGNR. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@148532 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 8 years ago
3 changed file(s) with 143 addition(s) and 29 deletion(s). Raw diff Collapse all Expand all
32523252
32533253 /// isPALIGNRMask - Return true if the node specifies a shuffle of elements that
32543254 /// is suitable for input to PALIGNR.
3255 static bool isPALIGNRMask(ArrayRef Mask, EVT VT, bool hasSSSE3) {
3256 int i, e = VT.getVectorNumElements();
3257 if (VT.getSizeInBits() != 128)
3255 static bool isPALIGNRMask(ArrayRef Mask, EVT VT,
3256 const X86Subtarget *Subtarget) {
3257 if ((VT.getSizeInBits() == 128 && !Subtarget->hasSSSE3()) ||
3258 (VT.getSizeInBits() == 256 && !Subtarget->hasAVX2()))
32583259 return false;
32593260
3260 // Do not handle v2i64 / v2f64 shuffles with palignr.
3261 if (e < 4 || !hasSSSE3)
3261 unsigned NumElts = VT.getVectorNumElements();
3262 unsigned NumLanes = VT.getSizeInBits()/128;
3263 unsigned NumLaneElts = NumElts/NumLanes;
3264
3265 // Do not handle 64-bit element shuffles with palignr.
3266 if (NumLaneElts == 2)
32623267 return false;
32633268
3264 for (i = 0; i != e; ++i)
3265 if (Mask[i] >= 0)
3266 break;
3267
3268 // All undef, not a palignr.
3269 if (i == e)
3270 return false;
3271
3272 // Make sure we're shifting in the right direction.
3273 if (Mask[i] <= i)
3274 return false;
3275
3276 int s = Mask[i] - i;
3277
3278 // Check the rest of the elements to see if they are consecutive.
3279 for (++i; i != e; ++i) {
3280 int m = Mask[i];
3281 if (m >= 0 && m != s+i)
3269 for (unsigned l = 0; l != NumElts; l+=NumLaneElts) {
3270 unsigned i;
3271 for (i = 0; i != NumLaneElts; ++i) {
3272 if (Mask[i+l] >= 0)
3273 break;
3274 }
3275
3276 // Lane is all undef, go to next lane
3277 if (i == NumLaneElts)
3278 continue;
3279
3280 int Start = Mask[i+l];
3281
3282 // Make sure its in this lane in one of the sources
3283 if (!isUndefOrInRange(Start, l, l+NumLaneElts) &&
3284 !isUndefOrInRange(Start, l+NumElts, l+NumElts+NumLaneElts))
32823285 return false;
3283 }
3286
3287 // If not lane 0, then we must match lane 0
3288 if (l != 0 && Mask[i] >= 0 && !isUndefOrEqual(Start, Mask[i]+l))
3289 return false;
3290
3291 // Correct second source to be contiguous with first source
3292 if (Start >= (int)NumElts)
3293 Start -= NumElts - NumLaneElts;
3294
3295 // Make sure we're shifting in the right direction.
3296 if (Start <= (int)(i+l))
3297 return false;
3298
3299 Start -= i;
3300
3301 // Check the rest of the elements to see if they are consecutive.
3302 for (++i; i != NumLaneElts; ++i) {
3303 int Idx = Mask[i+l];
3304
3305 // Make sure its in this lane
3306 if (!isUndefOrInRange(Idx, l, l+NumLaneElts) &&
3307 !isUndefOrInRange(Idx, l+NumElts, l+NumElts+NumLaneElts))
3308 return false;
3309
3310 // If not lane 0, then we must match lane 0
3311 if (l != 0 && Mask[i] >= 0 && !isUndefOrEqual(Idx, Mask[i]+l))
3312 return false;
3313
3314 if (Idx >= (int)NumElts)
3315 Idx -= NumElts - NumLaneElts;
3316
3317 if (!isUndefOrEqual(Idx, Start+i))
3318 return false;
3319
3320 }
3321 }
3322
32843323 return true;
32853324 }
32863325
39824021 static unsigned getShufflePALIGNRImmediate(ShuffleVectorSDNode *SVOp) {
39834022 EVT VT = SVOp->getValueType(0);
39844023 unsigned EltSize = VT.getVectorElementType().getSizeInBits() >> 3;
4024
4025 unsigned NumElts = VT.getVectorNumElements();
4026 unsigned NumLanes = VT.getSizeInBits()/128;
4027 unsigned NumLaneElts = NumElts/NumLanes;
4028
39854029 int Val = 0;
3986
3987 unsigned i, e;
3988 for (i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
4030 unsigned i;
4031 for (i = 0; i != NumElts; ++i) {
39894032 Val = SVOp->getMaskElt(i);
39904033 if (Val >= 0)
39914034 break;
39924035 }
4036 if (Val >= (int)NumElts)
4037 Val -= NumElts - NumLaneElts;
4038
39934039 assert(Val - i > 0 && "PALIGNR imm should be positive");
39944040 return (Val - i) * EltSize;
39954041 }
66256671 // inlined here right now to enable us to directly emit target specific
66266672 // nodes, and remove one by one until they don't return Op anymore.
66276673
6628 if (isPALIGNRMask(M, VT, Subtarget->hasSSSE3()))
6674 if (isPALIGNRMask(M, VT, Subtarget))
66296675 return getTargetShuffleNode(X86ISD::PALIGN, dl, VT, V1, V2,
66306676 getShufflePALIGNRImmediate(SVOp),
66316677 DAG);
1108811134 isPSHUFDMask(M, VT) ||
1108911135 isPSHUFHWMask(M, VT) ||
1109011136 isPSHUFLWMask(M, VT) ||
11091 isPALIGNRMask(M, VT, Subtarget->hasSSSE3()) ||
11137 isPALIGNRMask(M, VT, Subtarget) ||
1109211138 isUNPCKLMask(M, VT, Subtarget->hasAVX2()) ||
1109311139 isUNPCKHMask(M, VT, Subtarget->hasAVX2()) ||
1109411140 isUNPCKL_v_undef_Mask(M, VT, Subtarget->hasAVX2()) ||
54745474 defm VPALIGN : ssse3_palign_y<"vpalignr", 0>, VEX_4V;
54755475 let Constraints = "$src1 = $dst", Predicates = [HasSSSE3] in
54765476 defm PALIGN : ssse3_palign<"palignr">;
5477
5478 let Predicates = [HasAVX2] in {
5479 def : Pat<(v8i32 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))),
5480 (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
5481 def : Pat<(v8f32 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))),
5482 (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
5483 def : Pat<(v16i16 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))),
5484 (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
5485 def : Pat<(v32i8 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))),
5486 (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
5487 }
54775488
54785489 let Predicates = [HasAVX] in {
54795490 def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
0 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
1
2 define <8 x i32> @test1(<8 x i32> %A, <8 x i32> %B) nounwind {
3 ; CHECK: test1:
4 ; CHECK: vpalignr $4
5 %C = shufflevector <8 x i32> %A, <8 x i32> %B, <8 x i32>
6 ret <8 x i32> %C
7 }
8
9 define <8 x i32> @test2(<8 x i32> %A, <8 x i32> %B) nounwind {
10 ; CHECK: test2:
11 ; CHECK: vpalignr $4
12 %C = shufflevector <8 x i32> %A, <8 x i32> %B, <8 x i32>
13 ret <8 x i32> %C
14 }
15
16 define <8 x i32> @test3(<8 x i32> %A, <8 x i32> %B) nounwind {
17 ; CHECK: test3:
18 ; CHECK: vpalignr $4
19 %C = shufflevector <8 x i32> %A, <8 x i32> %B, <8 x i32>
20 ret <8 x i32> %C
21 }
22 ;
23 define <8 x i32> @test4(<8 x i32> %A, <8 x i32> %B) nounwind {
24 ; CHECK: test4:
25 ; CHECK: vpalignr $8
26 %C = shufflevector <8 x i32> %A, <8 x i32> %B, <8 x i32>
27 ret <8 x i32> %C
28 }
29
30 define <16 x i16> @test5(<16 x i16> %A, <16 x i16> %B) nounwind {
31 ; CHECK: test5:
32 ; CHECK: vpalignr $6
33 %C = shufflevector <16 x i16> %A, <16 x i16> %B, <16 x i32>
34 ret <16 x i16> %C
35 }
36
37 define <16 x i16> @test6(<16 x i16> %A, <16 x i16> %B) nounwind {
38 ; CHECK: test6:
39 ; CHECK: vpalignr $6
40 %C = shufflevector <16 x i16> %A, <16 x i16> %B, <16 x i32>
41 ret <16 x i16> %C
42 }
43
44 define <16 x i16> @test7(<16 x i16> %A, <16 x i16> %B) nounwind {
45 ; CHECK: test7:
46 ; CHECK: vpalignr $6
47 %C = shufflevector <16 x i16> %A, <16 x i16> %B, <16 x i32>
48 ret <16 x i16> %C
49 }
50
51 define <32 x i8> @test8(<32 x i8> %A, <32 x i8> %B) nounwind {
52 ; CHECK: test8:
53 ; CHECK: palignr $5
54 %C = shufflevector <32 x i8> %A, <32 x i8> %B, <32 x i32>
55 ret <32 x i8> %C
56 }