llvm.org GIT mirror llvm / c692cb7
Match VTRN, VZIP, and VUZP shuffles. Restore the tests for these operations, now using shuffles instead of intrinsics. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@79673 91177308-0d34-0410-b5e6-96231b3b80d8 Bob Wilson 10 years ago
5 changed file(s) with 331 addition(s) and 9 deletion(s). Raw diff Collapse all Expand all
24022402 return true;
24032403 }
24042404
2405 static bool isVTRNMask(const SmallVectorImpl &M, EVT VT,
2406 unsigned &WhichResult) {
2407 unsigned NumElts = VT.getVectorNumElements();
2408 WhichResult = (M[0] == 0 ? 0 : 1);
2409 for (unsigned i = 0; i < NumElts; i += 2) {
2410 if ((unsigned) M[i] != i + WhichResult ||
2411 (unsigned) M[i+1] != i + NumElts + WhichResult)
2412 return false;
2413 }
2414 return true;
2415 }
2416
2417 static bool isVUZPMask(const SmallVectorImpl &M, EVT VT,
2418 unsigned &WhichResult) {
2419 unsigned NumElts = VT.getVectorNumElements();
2420 WhichResult = (M[0] == 0 ? 0 : 1);
2421 for (unsigned i = 0; i != NumElts; ++i) {
2422 if ((unsigned) M[i] != 2 * i + WhichResult)
2423 return false;
2424 }
2425
2426 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
2427 if (VT.is64BitVector() && VT.getVectorElementType().getSizeInBits() == 32)
2428 return false;
2429
2430 return true;
2431 }
2432
2433 static bool isVZIPMask(const SmallVectorImpl &M, EVT VT,
2434 unsigned &WhichResult) {
2435 unsigned NumElts = VT.getVectorNumElements();
2436 WhichResult = (M[0] == 0 ? 0 : 1);
2437 unsigned Idx = WhichResult * NumElts / 2;
2438 for (unsigned i = 0; i != NumElts; i += 2) {
2439 if ((unsigned) M[i] != Idx ||
2440 (unsigned) M[i+1] != Idx + NumElts)
2441 return false;
2442 Idx += 1;
2443 }
2444
2445 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
2446 if (VT.is64BitVector() && VT.getVectorElementType().getSizeInBits() == 32)
2447 return false;
2448
2449 return true;
2450 }
2451
24052452 static SDValue BuildSplat(SDValue Val, EVT VT, SelectionDAG &DAG, DebugLoc dl) {
24062453 // Canonicalize all-zeros and all-ones vectors.
24072454 ConstantSDNode *ConstVal = cast(Val.getNode());
25032550 }
25042551
25052552 bool ReverseVEXT;
2506 unsigned Imm;
2553 unsigned Imm, WhichResult;
25072554
25082555 return (ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
25092556 isVREVMask(M, VT, 64) ||
25102557 isVREVMask(M, VT, 32) ||
25112558 isVREVMask(M, VT, 16) ||
2512 isVEXTMask(M, VT, ReverseVEXT, Imm));
2559 isVEXTMask(M, VT, ReverseVEXT, Imm) ||
2560 isVTRNMask(M, VT, WhichResult) ||
2561 isVUZPMask(M, VT, WhichResult) ||
2562 isVZIPMask(M, VT, WhichResult));
25132563 }
25142564
25152565 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
26092659 bool ReverseVEXT;
26102660 unsigned Imm;
26112661 if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
2612 SDValue Op0 = SVN->getOperand(0);
2613 SDValue Op1 = SVN->getOperand(1);
26142662 if (ReverseVEXT)
2615 std::swap(Op0, Op1);
2616 return DAG.getNode(ARMISD::VEXT, dl, VT, Op0, Op1,
2663 std::swap(V1, V2);
2664 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
26172665 DAG.getConstant(Imm, MVT::i32));
26182666 }
26192667
26242672 if (isVREVMask(ShuffleMask, VT, 16))
26252673 return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
26262674
2675 // Check for Neon shuffles that modify both input vectors in place.
2676 // If both results are used, i.e., if there are two shuffles with the same
2677 // source operands and with masks corresponding to both results of one of
2678 // these operations, DAG memoization will ensure that a single node is
2679 // used for both shuffles.
2680 unsigned WhichResult;
2681 if (isVTRNMask(ShuffleMask, VT, WhichResult))
2682 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
2683 V1, V2).getValue(WhichResult);
2684 if (isVUZPMask(ShuffleMask, VT, WhichResult))
2685 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
2686 V1, V2).getValue(WhichResult);
2687 if (isVZIPMask(ShuffleMask, VT, WhichResult))
2688 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
2689 V1, V2).getValue(WhichResult);
2690
2691 // If the shuffle is not directly supported and it has 4 elements, use
2692 // the PerfectShuffle-generated table to synthesize it from other shuffles.
26272693 if (VT.getVectorNumElements() == 4 &&
26282694 (VT.is128BitVector() || VT.is64BitVector())) {
26292695 unsigned PFIndexes[4];
131131 VREV64, // reverse elements within 64-bit doublewords
132132 VREV32, // reverse elements within 32-bit words
133133 VREV16, // reverse elements within 16-bit halfwords
134
135 VZIP, // zip
136 VUZP, // unzip
134 VZIP, // zip (interleave)
135 VUZP, // unzip (deinterleave)
137136 VTRN // transpose
138137 };
139138 }
0 ; RUN: llvm-as < %s | llc -march=arm -mattr=+neon | FileCheck %s
1
2 %struct.__builtin_neon_v8qi2 = type { <8 x i8>, <8 x i8> }
3 %struct.__builtin_neon_v4hi2 = type { <4 x i16>, <4 x i16> }
4 %struct.__builtin_neon_v2si2 = type { <2 x i32>, <2 x i32> }
5 %struct.__builtin_neon_v2sf2 = type { <2 x float>, <2 x float> }
6
7 %struct.__builtin_neon_v16qi2 = type { <16 x i8>, <16 x i8> }
8 %struct.__builtin_neon_v8hi2 = type { <8 x i16>, <8 x i16> }
9 %struct.__builtin_neon_v4si2 = type { <4 x i32>, <4 x i32> }
10 %struct.__builtin_neon_v4sf2 = type { <4 x float>, <4 x float> }
11
12 define <8 x i8> @vtrni8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
13 ;CHECK: vtrni8:
14 ;CHECK: vtrn.8
15 %tmp1 = load <8 x i8>* %A
16 %tmp2 = load <8 x i8>* %B
17 %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32>
18 %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32>
19 %tmp5 = add <8 x i8> %tmp3, %tmp4
20 ret <8 x i8> %tmp5
21 }
22
23 define <4 x i16> @vtrni16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
24 ;CHECK: vtrni16:
25 ;CHECK: vtrn.16
26 %tmp1 = load <4 x i16>* %A
27 %tmp2 = load <4 x i16>* %B
28 %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32>
29 %tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32>
30 %tmp5 = add <4 x i16> %tmp3, %tmp4
31 ret <4 x i16> %tmp5
32 }
33
34 define <2 x i32> @vtrni32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
35 ;CHECK: vtrni32:
36 ;CHECK: vtrn.32
37 %tmp1 = load <2 x i32>* %A
38 %tmp2 = load <2 x i32>* %B
39 %tmp3 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32>
40 %tmp4 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32>
41 %tmp5 = add <2 x i32> %tmp3, %tmp4
42 ret <2 x i32> %tmp5
43 }
44
45 define <2 x float> @vtrnf(<2 x float>* %A, <2 x float>* %B) nounwind {
46 ;CHECK: vtrnf:
47 ;CHECK: vtrn.32
48 %tmp1 = load <2 x float>* %A
49 %tmp2 = load <2 x float>* %B
50 %tmp3 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <2 x i32>
51 %tmp4 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <2 x i32>
52 %tmp5 = add <2 x float> %tmp3, %tmp4
53 ret <2 x float> %tmp5
54 }
55
56 define <16 x i8> @vtrnQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
57 ;CHECK: vtrnQi8:
58 ;CHECK: vtrn.8
59 %tmp1 = load <16 x i8>* %A
60 %tmp2 = load <16 x i8>* %B
61 %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32>
62 %tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32>
63 %tmp5 = add <16 x i8> %tmp3, %tmp4
64 ret <16 x i8> %tmp5
65 }
66
67 define <8 x i16> @vtrnQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
68 ;CHECK: vtrnQi16:
69 ;CHECK: vtrn.16
70 %tmp1 = load <8 x i16>* %A
71 %tmp2 = load <8 x i16>* %B
72 %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32>
73 %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32>
74 %tmp5 = add <8 x i16> %tmp3, %tmp4
75 ret <8 x i16> %tmp5
76 }
77
78 define <4 x i32> @vtrnQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
79 ;CHECK: vtrnQi32:
80 ;CHECK: vtrn.32
81 %tmp1 = load <4 x i32>* %A
82 %tmp2 = load <4 x i32>* %B
83 %tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32>
84 %tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32>
85 %tmp5 = add <4 x i32> %tmp3, %tmp4
86 ret <4 x i32> %tmp5
87 }
88
89 define <4 x float> @vtrnQf(<4 x float>* %A, <4 x float>* %B) nounwind {
90 ;CHECK: vtrnQf:
91 ;CHECK: vtrn.32
92 %tmp1 = load <4 x float>* %A
93 %tmp2 = load <4 x float>* %B
94 %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32>
95 %tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32>
96 %tmp5 = add <4 x float> %tmp3, %tmp4
97 ret <4 x float> %tmp5
98 }
0 ; RUN: llvm-as < %s | llc -march=arm -mattr=+neon | FileCheck %s
1
2 %struct.__builtin_neon_v8qi2 = type { <8 x i8>, <8 x i8> }
3 %struct.__builtin_neon_v4hi2 = type { <4 x i16>, <4 x i16> }
4 %struct.__builtin_neon_v2si2 = type { <2 x i32>, <2 x i32> }
5 %struct.__builtin_neon_v2sf2 = type { <2 x float>, <2 x float> }
6
7 %struct.__builtin_neon_v16qi2 = type { <16 x i8>, <16 x i8> }
8 %struct.__builtin_neon_v8hi2 = type { <8 x i16>, <8 x i16> }
9 %struct.__builtin_neon_v4si2 = type { <4 x i32>, <4 x i32> }
10 %struct.__builtin_neon_v4sf2 = type { <4 x float>, <4 x float> }
11
12 define <8 x i8> @vuzpi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
13 ;CHECK: vuzpi8:
14 ;CHECK: vuzp.8
15 %tmp1 = load <8 x i8>* %A
16 %tmp2 = load <8 x i8>* %B
17 %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32>
18 %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32>
19 %tmp5 = add <8 x i8> %tmp3, %tmp4
20 ret <8 x i8> %tmp5
21 }
22
23 define <4 x i16> @vuzpi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
24 ;CHECK: vuzpi16:
25 ;CHECK: vuzp.16
26 %tmp1 = load <4 x i16>* %A
27 %tmp2 = load <4 x i16>* %B
28 %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32>
29 %tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32>
30 %tmp5 = add <4 x i16> %tmp3, %tmp4
31 ret <4 x i16> %tmp5
32 }
33
34 ; VUZP.32 is equivalent to VTRN.32 for 64-bit vectors.
35
36 define <16 x i8> @vuzpQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
37 ;CHECK: vuzpQi8:
38 ;CHECK: vuzp.8
39 %tmp1 = load <16 x i8>* %A
40 %tmp2 = load <16 x i8>* %B
41 %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32>
42 %tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32>
43 %tmp5 = add <16 x i8> %tmp3, %tmp4
44 ret <16 x i8> %tmp5
45 }
46
47 define <8 x i16> @vuzpQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
48 ;CHECK: vuzpQi16:
49 ;CHECK: vuzp.16
50 %tmp1 = load <8 x i16>* %A
51 %tmp2 = load <8 x i16>* %B
52 %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32>
53 %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32>
54 %tmp5 = add <8 x i16> %tmp3, %tmp4
55 ret <8 x i16> %tmp5
56 }
57
58 define <4 x i32> @vuzpQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
59 ;CHECK: vuzpQi32:
60 ;CHECK: vuzp.32
61 %tmp1 = load <4 x i32>* %A
62 %tmp2 = load <4 x i32>* %B
63 %tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32>
64 %tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32>
65 %tmp5 = add <4 x i32> %tmp3, %tmp4
66 ret <4 x i32> %tmp5
67 }
68
69 define <4 x float> @vuzpQf(<4 x float>* %A, <4 x float>* %B) nounwind {
70 ;CHECK: vuzpQf:
71 ;CHECK: vuzp.32
72 %tmp1 = load <4 x float>* %A
73 %tmp2 = load <4 x float>* %B
74 %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32>
75 %tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32>
76 %tmp5 = add <4 x float> %tmp3, %tmp4
77 ret <4 x float> %tmp5
78 }
0 ; RUN: llvm-as < %s | llc -march=arm -mattr=+neon | FileCheck %s
1
2 %struct.__builtin_neon_v8qi2 = type { <8 x i8>, <8 x i8> }
3 %struct.__builtin_neon_v4hi2 = type { <4 x i16>, <4 x i16> }
4 %struct.__builtin_neon_v2si2 = type { <2 x i32>, <2 x i32> }
5 %struct.__builtin_neon_v2sf2 = type { <2 x float>, <2 x float> }
6
7 %struct.__builtin_neon_v16qi2 = type { <16 x i8>, <16 x i8> }
8 %struct.__builtin_neon_v8hi2 = type { <8 x i16>, <8 x i16> }
9 %struct.__builtin_neon_v4si2 = type { <4 x i32>, <4 x i32> }
10 %struct.__builtin_neon_v4sf2 = type { <4 x float>, <4 x float> }
11
12 define <8 x i8> @vzipi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
13 ;CHECK: vzipi8:
14 ;CHECK: vzip.8
15 %tmp1 = load <8 x i8>* %A
16 %tmp2 = load <8 x i8>* %B
17 %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32>
18 %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32>
19 %tmp5 = add <8 x i8> %tmp3, %tmp4
20 ret <8 x i8> %tmp5
21 }
22
23 define <4 x i16> @vzipi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
24 ;CHECK: vzipi16:
25 ;CHECK: vzip.16
26 %tmp1 = load <4 x i16>* %A
27 %tmp2 = load <4 x i16>* %B
28 %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32>
29 %tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32>
30 %tmp5 = add <4 x i16> %tmp3, %tmp4
31 ret <4 x i16> %tmp5
32 }
33
34 ; VZIP.32 is equivalent to VTRN.32 for 64-bit vectors.
35
36 define <16 x i8> @vzipQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
37 ;CHECK: vzipQi8:
38 ;CHECK: vzip.8
39 %tmp1 = load <16 x i8>* %A
40 %tmp2 = load <16 x i8>* %B
41 %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32>
42 %tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32>
43 %tmp5 = add <16 x i8> %tmp3, %tmp4
44 ret <16 x i8> %tmp5
45 }
46
47 define <8 x i16> @vzipQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
48 ;CHECK: vzipQi16:
49 ;CHECK: vzip.16
50 %tmp1 = load <8 x i16>* %A
51 %tmp2 = load <8 x i16>* %B
52 %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32>
53 %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32>
54 %tmp5 = add <8 x i16> %tmp3, %tmp4
55 ret <8 x i16> %tmp5
56 }
57
58 define <4 x i32> @vzipQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
59 ;CHECK: vzipQi32:
60 ;CHECK: vzip.32
61 %tmp1 = load <4 x i32>* %A
62 %tmp2 = load <4 x i32>* %B
63 %tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32>
64 %tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32>
65 %tmp5 = add <4 x i32> %tmp3, %tmp4
66 ret <4 x i32> %tmp5
67 }
68
69 define <4 x float> @vzipQf(<4 x float>* %A, <4 x float>* %B) nounwind {
70 ;CHECK: vzipQf:
71 ;CHECK: vzip.32
72 %tmp1 = load <4 x float>* %A
73 %tmp2 = load <4 x float>* %B
74 %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32>
75 %tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32>
76 %tmp5 = add <4 x float> %tmp3, %tmp4
77 ret <4 x float> %tmp5
78 }