llvm.org GIT mirror llvm / 8bb9e48
Add support for ARM Neon VREV instructions. Patch by Anton Korzh, with some modifications from me. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@77101 91177308-0d34-0410-b5e6-96231b3b80d8 Bob Wilson 10 years ago
4 changed file(s) with 214 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
21872187 SplatBitSize, DAG);
21882188 }
21892189
2190 /// isVREVMask - Check if a vector shuffle corresponds to a VREV
2191 /// instruction with the specified blocksize. (The order of the elements
2192 /// within each block of the vector is reversed.)
2193 bool ARM::isVREVMask(ShuffleVectorSDNode *N, unsigned BlockSize) {
2194 assert((BlockSize==16 || BlockSize==32 || BlockSize==64) &&
2195 "Only possible block sizes for VREV are: 16, 32, 64");
2196
2197 MVT VT = N->getValueType(0);
2198 unsigned NumElts = VT.getVectorNumElements();
2199 unsigned EltSz = VT.getVectorElementType().getSizeInBits();
2200 unsigned BlockElts = N->getMaskElt(0) + 1;
2201
2202 if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
2203 return false;
2204
2205 for (unsigned i = 0; i < NumElts; ++i) {
2206 if ((unsigned) N->getMaskElt(i) !=
2207 (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
2208 return false;
2209 }
2210
2211 return true;
2212 }
2213
21902214 static SDValue BuildSplat(SDValue Val, MVT VT, SelectionDAG &DAG, DebugLoc dl) {
21912215 // Canonicalize all-zeros and all-ones vectors.
21922216 ConstantSDNode *ConstVal = dyn_cast(Val.getNode());
124124 /// return the constant being splatted. The ByteSize field indicates the
125125 /// number of bytes of each element [1248].
126126 SDValue getVMOVImm(SDNode *N, unsigned ByteSize, SelectionDAG &DAG);
127
128 /// isVREVMask - Check if a vector shuffle corresponds to a VREV
129 /// instruction with the specified blocksize. (The order of the elements
130 /// within each block of the vector is reversed.)
131 bool isVREVMask(ShuffleVectorSDNode *N, unsigned blocksize);
127132 }
128133
129134 //===--------------------------------------------------------------------===//
16611661 def VCVTxu2fq : N2VCvtQ<1, 1, 0b000000, 0b1110, 0, 1, "vcvt.f32.u32",
16621662 v4f32, v4i32, int_arm_neon_vcvtfxu2fp>;
16631663
1664 // VREV : Vector Reverse
1665
1666 def vrev64_shuffle : PatFrag<(ops node:$in),
1667 (vector_shuffle node:$in, undef), [{
1668 ShuffleVectorSDNode *SVOp = cast(N);
1669 return ARM::isVREVMask(SVOp, 64);
1670 }]>;
1671
1672 def vrev32_shuffle : PatFrag<(ops node:$in),
1673 (vector_shuffle node:$in, undef), [{
1674 ShuffleVectorSDNode *SVOp = cast(N);
1675 return ARM::isVREVMask(SVOp, 32);
1676 }]>;
1677
1678 def vrev16_shuffle : PatFrag<(ops node:$in),
1679 (vector_shuffle node:$in, undef), [{
1680 ShuffleVectorSDNode *SVOp = cast(N);
1681 return ARM::isVREVMask(SVOp, 16);
1682 }]>;
1683
1684 // VREV64 : Vector Reverse elements within 64-bit doublewords
1685
1686 class VREV64D op19_18, string OpcodeStr, ValueType Ty>
1687 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$dst),
1688 (ins DPR:$src), !strconcat(OpcodeStr, "\t$dst, $src"), "",
1689 [(set DPR:$dst, (Ty (vrev64_shuffle (Ty DPR:$src))))]>;
1690 class VREV64Q op19_18, string OpcodeStr, ValueType Ty>
1691 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$dst),
1692 (ins QPR:$src), !strconcat(OpcodeStr, "\t$dst, $src"), "",
1693 [(set QPR:$dst, (Ty (vrev64_shuffle (Ty QPR:$src))))]>;
1694
1695 def VREV64d8 : VREV64D<0b00, "vrev64.8", v8i8>;
1696 def VREV64d16 : VREV64D<0b01, "vrev64.16", v4i16>;
1697 def VREV64d32 : VREV64D<0b10, "vrev64.32", v2i32>;
1698 def VREV64df : VREV64D<0b10, "vrev64.32", v2f32>;
1699
1700 def VREV64q8 : VREV64Q<0b00, "vrev64.8", v16i8>;
1701 def VREV64q16 : VREV64Q<0b01, "vrev64.16", v8i16>;
1702 def VREV64q32 : VREV64Q<0b10, "vrev64.32", v4i32>;
1703 def VREV64qf : VREV64Q<0b10, "vrev64.32", v4f32>;
1704
1705 // VREV32 : Vector Reverse elements within 32-bit words
1706
1707 class VREV32D op19_18, string OpcodeStr, ValueType Ty>
1708 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$dst),
1709 (ins DPR:$src), !strconcat(OpcodeStr, "\t$dst, $src"), "",
1710 [(set DPR:$dst, (Ty (vrev32_shuffle (Ty DPR:$src))))]>;
1711 class VREV32Q op19_18, string OpcodeStr, ValueType Ty>
1712 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$dst),
1713 (ins QPR:$src), !strconcat(OpcodeStr, "\t$dst, $src"), "",
1714 [(set QPR:$dst, (Ty (vrev32_shuffle (Ty QPR:$src))))]>;
1715
1716 def VREV32d8 : VREV32D<0b00, "vrev32.8", v8i8>;
1717 def VREV32d16 : VREV32D<0b01, "vrev32.16", v4i16>;
1718
1719 def VREV32q8 : VREV32Q<0b00, "vrev32.8", v16i8>;
1720 def VREV32q16 : VREV32Q<0b01, "vrev32.16", v8i16>;
1721
1722 // VREV16 : Vector Reverse elements within 16-bit halfwords
1723
1724 class VREV16D op19_18, string OpcodeStr, ValueType Ty>
1725 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$dst),
1726 (ins DPR:$src), !strconcat(OpcodeStr, "\t$dst, $src"), "",
1727 [(set DPR:$dst, (Ty (vrev16_shuffle (Ty DPR:$src))))]>;
1728 class VREV16Q op19_18, string OpcodeStr, ValueType Ty>
1729 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$dst),
1730 (ins QPR:$src), !strconcat(OpcodeStr, "\t$dst, $src"), "",
1731 [(set QPR:$dst, (Ty (vrev16_shuffle (Ty QPR:$src))))]>;
1732
1733 def VREV16d8 : VREV16D<0b00, "vrev16.8", v8i8>;
1734 def VREV16q8 : VREV16Q<0b00, "vrev16.8", v16i8>;
1735
16641736 //===----------------------------------------------------------------------===//
16651737 // Non-Instruction Patterns
16661738 //===----------------------------------------------------------------------===//
0 ; RUN: llvm-as < %s | llc -march=arm -mattr=+neon | FileCheck %s
1
2 define arm_apcscc <8 x i8> @test_vrev64D8(<8 x i8>* %A) nounwind {
3 ;CHECK: test_vrev64D8:
4 ;CHECK: vrev64.8
5 %tmp1 = load <8 x i8>* %A
6 %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32>
7 ret <8 x i8> %tmp2
8 }
9
10 define arm_apcscc <4 x i16> @test_vrev64D16(<4 x i16>* %A) nounwind {
11 ;CHECK: test_vrev64D16:
12 ;CHECK: vrev64.16
13 %tmp1 = load <4 x i16>* %A
14 %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32>
15 ret <4 x i16> %tmp2
16 }
17
18 define arm_apcscc <2 x i32> @test_vrev64D32(<2 x i32>* %A) nounwind {
19 ;CHECK: test_vrev64D32:
20 ;CHECK: vrev64.32
21 %tmp1 = load <2 x i32>* %A
22 %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32>
23 ret <2 x i32> %tmp2
24 }
25
26 define arm_apcscc <2 x float> @test_vrev64Df(<2 x float>* %A) nounwind {
27 ;CHECK: test_vrev64Df:
28 ;CHECK: vrev64.32
29 %tmp1 = load <2 x float>* %A
30 %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32>
31 ret <2 x float> %tmp2
32 }
33
34 define arm_apcscc <16 x i8> @test_vrev64Q8(<16 x i8>* %A) nounwind {
35 ;CHECK: test_vrev64Q8:
36 ;CHECK: vrev64.8
37 %tmp1 = load <16 x i8>* %A
38 %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32>
39 ret <16 x i8> %tmp2
40 }
41
42 define arm_apcscc <8 x i16> @test_vrev64Q16(<8 x i16>* %A) nounwind {
43 ;CHECK: test_vrev64Q16:
44 ;CHECK: vrev64.16
45 %tmp1 = load <8 x i16>* %A
46 %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32>
47 ret <8 x i16> %tmp2
48 }
49
50 define arm_apcscc <4 x i32> @test_vrev64Q32(<4 x i32>* %A) nounwind {
51 ;CHECK: test_vrev64Q32:
52 ;CHECK: vrev64.32
53 %tmp1 = load <4 x i32>* %A
54 %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32>
55 ret <4 x i32> %tmp2
56 }
57
58 define arm_apcscc <4 x float> @test_vrev64Qf(<4 x float>* %A) nounwind {
59 ;CHECK: test_vrev64Qf:
60 ;CHECK: vrev64.32
61 %tmp1 = load <4 x float>* %A
62 %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32>
63 ret <4 x float> %tmp2
64 }
65
66 define arm_apcscc <8 x i8> @test_vrev32D8(<8 x i8>* %A) nounwind {
67 ;CHECK: test_vrev32D8:
68 ;CHECK: vrev32.8
69 %tmp1 = load <8 x i8>* %A
70 %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32>
71 ret <8 x i8> %tmp2
72 }
73
74 define arm_apcscc <4 x i16> @test_vrev32D16(<4 x i16>* %A) nounwind {
75 ;CHECK: test_vrev32D16:
76 ;CHECK: vrev32.16
77 %tmp1 = load <4 x i16>* %A
78 %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32>
79 ret <4 x i16> %tmp2
80 }
81
82 define arm_apcscc <16 x i8> @test_vrev32Q8(<16 x i8>* %A) nounwind {
83 ;CHECK: test_vrev32Q8:
84 ;CHECK: vrev32.8
85 %tmp1 = load <16 x i8>* %A
86 %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32>
87 ret <16 x i8> %tmp2
88 }
89
90 define arm_apcscc <8 x i16> @test_vrev32Q16(<8 x i16>* %A) nounwind {
91 ;CHECK: test_vrev32Q16:
92 ;CHECK: vrev32.16
93 %tmp1 = load <8 x i16>* %A
94 %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32>
95 ret <8 x i16> %tmp2
96 }
97
98 define arm_apcscc <8 x i8> @test_vrev16D8(<8 x i8>* %A) nounwind {
99 ;CHECK: test_vrev16D8:
100 ;CHECK: vrev16.8
101 %tmp1 = load <8 x i8>* %A
102 %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32>
103 ret <8 x i8> %tmp2
104 }
105
106 define arm_apcscc <16 x i8> @test_vrev16Q8(<16 x i8>* %A) nounwind {
107 ;CHECK: test_vrev16Q8:
108 ;CHECK: vrev16.8
109 %tmp1 = load <16 x i8>* %A
110 %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32>
111 ret <16 x i8> %tmp2
112 }