llvm.org GIT mirror llvm / 76c0edf
[ARM] Generate MVE VHADDs/VHSUBs git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@368146 91177308-0d34-0410-b5e6-96231b3b80d8 Oliver Cruickshank 3 months ago
2 changed file(s) with 335 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
15591559 def MVE_VHSUBu8 : MVE_VHSUB<"u8", 0b1, 0b00>;
15601560 def MVE_VHSUBu16 : MVE_VHSUB<"u16", 0b1, 0b01>;
15611561 def MVE_VHSUBu32 : MVE_VHSUB<"u32", 0b1, 0b10>;
1562
1563 let Predicates = [HasMVEInt] in {
1564 def : Pat<(v16i8 (ARMvshrsImm
1565 (v16i8 (add (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), 1)),
1566 (v16i8 (MVE_VHADDs8
1567 (v16i8 MQPR:$v1), (v16i8 MQPR:$v2)))>;
1568 def : Pat<(v8i16 (ARMvshrsImm
1569 (v8i16 (add (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), 1)),
1570 (v8i16 (MVE_VHADDs16
1571 (v8i16 MQPR:$v1), (v8i16 MQPR:$v2)))>;
1572 def : Pat<(v4i32 (ARMvshrsImm
1573 (v4i32 (add (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), 1)),
1574 (v4i32 (MVE_VHADDs32
1575 (v4i32 MQPR:$v1), (v4i32 MQPR:$v2)))>;
1576
1577 def : Pat<(v16i8 (ARMvshruImm
1578 (v16i8 (add (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), 1)),
1579 (v16i8 (MVE_VHADDu8
1580 (v16i8 MQPR:$v1), (v16i8 MQPR:$v2)))>;
1581 def : Pat<(v8i16 (ARMvshruImm
1582 (v8i16 (add (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), 1)),
1583 (v8i16 (MVE_VHADDu16
1584 (v8i16 MQPR:$v1), (v8i16 MQPR:$v2)))>;
1585 def : Pat<(v4i32 (ARMvshruImm
1586 (v4i32 (add (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), 1)),
1587 (v4i32 (MVE_VHADDu32
1588 (v4i32 MQPR:$v1), (v4i32 MQPR:$v2)))>;
1589
1590 def : Pat<(v16i8 (ARMvshrsImm
1591 (v16i8 (sub (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), 1)),
1592 (v16i8 (MVE_VHSUBs8
1593 (v16i8 MQPR:$v1), (v16i8 MQPR:$v2)))>;
1594 def : Pat<(v8i16 (ARMvshrsImm
1595 (v8i16 (sub (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), 1)),
1596 (v8i16 (MVE_VHSUBs16
1597 (v8i16 MQPR:$v1), (v8i16 MQPR:$v2)))>;
1598 def : Pat<(v4i32 (ARMvshrsImm
1599 (v4i32 (sub (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), 1)),
1600 (v4i32 (MVE_VHSUBs32
1601 (v4i32 MQPR:$v1), (v4i32 MQPR:$v2)))>;
1602
1603 def : Pat<(v16i8 (ARMvshruImm
1604 (v16i8 (sub (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), 1)),
1605 (v16i8 (MVE_VHSUBu8
1606 (v16i8 MQPR:$v1), (v16i8 MQPR:$v2)))>;
1607 def : Pat<(v8i16 (ARMvshruImm
1608 (v8i16 (sub (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), 1)),
1609 (v8i16 (MVE_VHSUBu16
1610 (v8i16 MQPR:$v1), (v8i16 MQPR:$v2)))>;
1611 def : Pat<(v4i32 (ARMvshruImm
1612 (v4i32 (sub (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), 1)),
1613 (v4i32 (MVE_VHSUBu32
1614 (v4i32 MQPR:$v1), (v4i32 MQPR:$v2)))>;
1615 }
15621616
15631617 class MVE_VDUP pattern=[]>
15641618 : MVE_p<(outs MQPR:$Qd), (ins rGPR:$Rt), NoItinerary,
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve %s -o - | FileCheck %s
2
3 define arm_aapcs_vfpcc <16 x i8> @add_ashr_v16i8(<16 x i8> %src1, <16 x i8> %src2) {
4 ; CHECK-LABEL: add_ashr_v16i8:
5 ; CHECK: @ %bb.0: @ %entry
6 ; CHECK-NEXT: vhadd.s8 q0, q0, q1
7 ; CHECK-NEXT: bx lr
8 entry:
9 %0 = add <16 x i8> %src1, %src2
10 %1 = ashr <16 x i8> %0,
11 ret <16 x i8> %1
12 }
13
14 define arm_aapcs_vfpcc <8 x i16> @add_ashr_v8i16(<8 x i16> %src1, <8 x i16> %src2) {
15 ; CHECK-LABEL: add_ashr_v8i16:
16 ; CHECK: @ %bb.0: @ %entry
17 ; CHECK-NEXT: vhadd.s16 q0, q0, q1
18 ; CHECK-NEXT: bx lr
19 entry:
20 %0 = add <8 x i16> %src1, %src2
21 %1 = ashr <8 x i16> %0,
22 ret <8 x i16> %1
23 }
24
25 define arm_aapcs_vfpcc <4 x i32> @add_ashr_v4i32(<4 x i32> %src1, <4 x i32> %src2) {
26 ; CHECK-LABEL: add_ashr_v4i32:
27 ; CHECK: @ %bb.0: @ %entry
28 ; CHECK-NEXT: vhadd.s32 q0, q0, q1
29 ; CHECK-NEXT: bx lr
30 entry:
31 %0 = add nsw <4 x i32> %src1, %src2
32 %1 = ashr <4 x i32> %0,
33 ret <4 x i32> %1
34 }
35
36 define arm_aapcs_vfpcc <16 x i8> @add_lshr_v16i8(<16 x i8> %src1, <16 x i8> %src2) {
37 ; CHECK-LABEL: add_lshr_v16i8:
38 ; CHECK: @ %bb.0: @ %entry
39 ; CHECK-NEXT: vhadd.u8 q0, q0, q1
40 ; CHECK-NEXT: bx lr
41 entry:
42 %0 = add <16 x i8> %src1, %src2
43 %1 = lshr <16 x i8> %0,
44 ret <16 x i8> %1
45 }
46
47 define arm_aapcs_vfpcc <8 x i16> @add_lshr_v8i16(<8 x i16> %src1, <8 x i16> %src2) {
48 ; CHECK-LABEL: add_lshr_v8i16:
49 ; CHECK: @ %bb.0: @ %entry
50 ; CHECK-NEXT: vhadd.u16 q0, q0, q1
51 ; CHECK-NEXT: bx lr
52 entry:
53 %0 = add <8 x i16> %src1, %src2
54 %1 = lshr <8 x i16> %0,
55 ret <8 x i16> %1
56 }
57
58 define arm_aapcs_vfpcc <4 x i32> @add_lshr_v4i32(<4 x i32> %src1, <4 x i32> %src2) {
59 ; CHECK-LABEL: add_lshr_v4i32:
60 ; CHECK: @ %bb.0: @ %entry
61 ; CHECK-NEXT: vhadd.u32 q0, q0, q1
62 ; CHECK-NEXT: bx lr
63 entry:
64 %0 = add nsw <4 x i32> %src1, %src2
65 %1 = lshr <4 x i32> %0,
66 ret <4 x i32> %1
67 }
68
69 define arm_aapcs_vfpcc <16 x i8> @sub_ashr_v16i8(<16 x i8> %src1, <16 x i8> %src2) {
70 ; CHECK-LABEL: sub_ashr_v16i8:
71 ; CHECK: @ %bb.0: @ %entry
72 ; CHECK-NEXT: vhsub.s8 q0, q0, q1
73 ; CHECK-NEXT: bx lr
74 entry:
75 %0 = sub <16 x i8> %src1, %src2
76 %1 = ashr <16 x i8> %0,
77 ret <16 x i8> %1
78 }
79
80 define arm_aapcs_vfpcc <8 x i16> @sub_ashr_v8i16(<8 x i16> %src1, <8 x i16> %src2) {
81 ; CHECK-LABEL: sub_ashr_v8i16:
82 ; CHECK: @ %bb.0: @ %entry
83 ; CHECK-NEXT: vhsub.s16 q0, q0, q1
84 ; CHECK-NEXT: bx lr
85 entry:
86 %0 = sub <8 x i16> %src1, %src2
87 %1 = ashr <8 x i16> %0,
88 ret <8 x i16> %1
89 }
90
91 define arm_aapcs_vfpcc <4 x i32> @sub_ashr_v4i32(<4 x i32> %src1, <4 x i32> %src2) {
92 ; CHECK-LABEL: sub_ashr_v4i32:
93 ; CHECK: @ %bb.0: @ %entry
94 ; CHECK-NEXT: vhsub.s32 q0, q0, q1
95 ; CHECK-NEXT: bx lr
96 entry:
97 %0 = sub nsw <4 x i32> %src1, %src2
98 %1 = ashr <4 x i32> %0,
99 ret <4 x i32> %1
100 }
101
102 define arm_aapcs_vfpcc <16 x i8> @sub_lshr_v16i8(<16 x i8> %src1, <16 x i8> %src2) {
103 ; CHECK-LABEL: sub_lshr_v16i8:
104 ; CHECK: @ %bb.0: @ %entry
105 ; CHECK-NEXT: vhsub.u8 q0, q0, q1
106 ; CHECK-NEXT: bx lr
107 entry:
108 %0 = sub <16 x i8> %src1, %src2
109 %1 = lshr <16 x i8> %0,
110 ret <16 x i8> %1
111 }
112
113 define arm_aapcs_vfpcc <8 x i16> @sub_lshr_v8i16(<8 x i16> %src1, <8 x i16> %src2) {
114 ; CHECK-LABEL: sub_lshr_v8i16:
115 ; CHECK: @ %bb.0: @ %entry
116 ; CHECK-NEXT: vhsub.u16 q0, q0, q1
117 ; CHECK-NEXT: bx lr
118 entry:
119 %0 = sub <8 x i16> %src1, %src2
120 %1 = lshr <8 x i16> %0,
121 ret <8 x i16> %1
122 }
123
124 define arm_aapcs_vfpcc <4 x i32> @sub_lshr_v4i32(<4 x i32> %src1, <4 x i32> %src2) {
125 ; CHECK-LABEL: sub_lshr_v4i32:
126 ; CHECK: @ %bb.0: @ %entry
127 ; CHECK-NEXT: vhsub.u32 q0, q0, q1
128 ; CHECK-NEXT: bx lr
129 entry:
130 %0 = sub nsw <4 x i32> %src1, %src2
131 %1 = lshr <4 x i32> %0,
132 ret <4 x i32> %1
133 }
134
135
136
137 define arm_aapcs_vfpcc <16 x i8> @add_sdiv_v16i8(<16 x i8> %src1, <16 x i8> %src2) {
138 ; CHECK-LABEL: add_sdiv_v16i8:
139 ; CHECK: @ %bb.0: @ %entry
140 ; CHECK-NEXT: vadd.i8 q0, q0, q1
141 ; CHECK-NEXT: vshr.u8 q1, q0, #7
142 ; CHECK-NEXT: vhadd.s8 q0, q0, q1
143 ; CHECK-NEXT: bx lr
144 entry:
145 %0 = add <16 x i8> %src1, %src2
146 %1 = sdiv <16 x i8> %0,
147 ret <16 x i8> %1
148 }
149
150 define arm_aapcs_vfpcc <8 x i16> @add_sdiv_v8i16(<8 x i16> %src1, <8 x i16> %src2) {
151 ; CHECK-LABEL: add_sdiv_v8i16:
152 ; CHECK: @ %bb.0: @ %entry
153 ; CHECK-NEXT: vadd.i16 q0, q0, q1
154 ; CHECK-NEXT: vshr.u16 q1, q0, #15
155 ; CHECK-NEXT: vhadd.s16 q0, q0, q1
156 ; CHECK-NEXT: bx lr
157 entry:
158 %0 = add <8 x i16> %src1, %src2
159 %1 = sdiv <8 x i16> %0,
160 ret <8 x i16> %1
161 }
162
163 define arm_aapcs_vfpcc <4 x i32> @add_sdiv_v4i32(<4 x i32> %src1, <4 x i32> %src2) {
164 ; CHECK-LABEL: add_sdiv_v4i32:
165 ; CHECK: @ %bb.0: @ %entry
166 ; CHECK-NEXT: vadd.i32 q0, q0, q1
167 ; CHECK-NEXT: vshr.u32 q1, q0, #31
168 ; CHECK-NEXT: vhadd.s32 q0, q0, q1
169 ; CHECK-NEXT: bx lr
170 entry:
171 %0 = add nsw <4 x i32> %src1, %src2
172 %1 = sdiv <4 x i32> %0,
173 ret <4 x i32> %1
174 }
175
176 define arm_aapcs_vfpcc <16 x i8> @add_udiv_v16i8(<16 x i8> %src1, <16 x i8> %src2) {
177 ; CHECK-LABEL: add_udiv_v16i8:
178 ; CHECK: @ %bb.0: @ %entry
179 ; CHECK-NEXT: vhadd.u8 q0, q0, q1
180 ; CHECK-NEXT: bx lr
181 entry:
182 %0 = add <16 x i8> %src1, %src2
183 %1 = udiv <16 x i8> %0,
184 ret <16 x i8> %1
185 }
186
187 define arm_aapcs_vfpcc <8 x i16> @add_udiv_v8i16(<8 x i16> %src1, <8 x i16> %src2) {
188 ; CHECK-LABEL: add_udiv_v8i16:
189 ; CHECK: @ %bb.0: @ %entry
190 ; CHECK-NEXT: vhadd.u16 q0, q0, q1
191 ; CHECK-NEXT: bx lr
192 entry:
193 %0 = add <8 x i16> %src1, %src2
194 %1 = udiv <8 x i16> %0,
195 ret <8 x i16> %1
196 }
197
198 define arm_aapcs_vfpcc <4 x i32> @add_udiv_v4i32(<4 x i32> %src1, <4 x i32> %src2) {
199 ; CHECK-LABEL: add_udiv_v4i32:
200 ; CHECK: @ %bb.0: @ %entry
201 ; CHECK-NEXT: vhadd.u32 q0, q0, q1
202 ; CHECK-NEXT: bx lr
203 entry:
204 %0 = add nsw <4 x i32> %src1, %src2
205 %1 = udiv <4 x i32> %0,
206 ret <4 x i32> %1
207 }
208
209 define arm_aapcs_vfpcc <16 x i8> @sub_sdiv_v16i8(<16 x i8> %src1, <16 x i8> %src2) {
210 ; CHECK-LABEL: sub_sdiv_v16i8:
211 ; CHECK: @ %bb.0: @ %entry
212 ; CHECK-NEXT: vsub.i8 q0, q0, q1
213 ; CHECK-NEXT: vshr.u8 q1, q0, #7
214 ; CHECK-NEXT: vhadd.s8 q0, q0, q1
215 ; CHECK-NEXT: bx lr
216 entry:
217 %0 = sub <16 x i8> %src1, %src2
218 %1 = sdiv <16 x i8> %0,
219 ret <16 x i8> %1
220 }
221
222 define arm_aapcs_vfpcc <8 x i16> @sub_sdiv_v8i16(<8 x i16> %src1, <8 x i16> %src2) {
223 ; CHECK-LABEL: sub_sdiv_v8i16:
224 ; CHECK: @ %bb.0: @ %entry
225 ; CHECK-NEXT: vsub.i16 q0, q0, q1
226 ; CHECK-NEXT: vshr.u16 q1, q0, #15
227 ; CHECK-NEXT: vhadd.s16 q0, q0, q1
228 ; CHECK-NEXT: bx lr
229 entry:
230 %0 = sub <8 x i16> %src1, %src2
231 %1 = sdiv <8 x i16> %0,
232 ret <8 x i16> %1
233 }
234
235 define arm_aapcs_vfpcc <4 x i32> @sub_sdiv_v4i32(<4 x i32> %src1, <4 x i32> %src2) {
236 ; CHECK-LABEL: sub_sdiv_v4i32:
237 ; CHECK: @ %bb.0: @ %entry
238 ; CHECK-NEXT: vsub.i32 q0, q0, q1
239 ; CHECK-NEXT: vshr.u32 q1, q0, #31
240 ; CHECK-NEXT: vhadd.s32 q0, q0, q1
241 ; CHECK-NEXT: bx lr
242 entry:
243 %0 = sub nsw <4 x i32> %src1, %src2
244 %1 = sdiv <4 x i32> %0,
245 ret <4 x i32> %1
246 }
247
248 define arm_aapcs_vfpcc <16 x i8> @sub_udiv_v16i8(<16 x i8> %src1, <16 x i8> %src2) {
249 ; CHECK-LABEL: sub_udiv_v16i8:
250 ; CHECK: @ %bb.0: @ %entry
251 ; CHECK-NEXT: vhsub.u8 q0, q0, q1
252 ; CHECK-NEXT: bx lr
253 entry:
254 %0 = sub <16 x i8> %src1, %src2
255 %1 = udiv <16 x i8> %0,
256 ret <16 x i8> %1
257 }
258
259 define arm_aapcs_vfpcc <8 x i16> @sub_udiv_v8i16(<8 x i16> %src1, <8 x i16> %src2) {
260 ; CHECK-LABEL: sub_udiv_v8i16:
261 ; CHECK: @ %bb.0: @ %entry
262 ; CHECK-NEXT: vhsub.u16 q0, q0, q1
263 ; CHECK-NEXT: bx lr
264 entry:
265 %0 = sub <8 x i16> %src1, %src2
266 %1 = udiv <8 x i16> %0,
267 ret <8 x i16> %1
268 }
269
270 define arm_aapcs_vfpcc <4 x i32> @sub_udiv_v4i32(<4 x i32> %src1, <4 x i32> %src2) {
271 ; CHECK-LABEL: sub_udiv_v4i32:
272 ; CHECK: @ %bb.0: @ %entry
273 ; CHECK-NEXT: vhsub.u32 q0, q0, q1
274 ; CHECK-NEXT: bx lr
275 entry:
276 %0 = sub nsw <4 x i32> %src1, %src2
277 %1 = udiv <4 x i32> %0,
278 ret <4 x i32> %1
279 }
280