llvm.org GIT mirror llvm / 68567be
[SDAG][AArch64] Boolean and/or reduce to umax/min reduce (PR41635) This addresses one half of https://bugs.llvm.org/show_bug.cgi?id=41635 by combining a VECREDUCE_AND/OR into VECREDUCE_UMIN/UMAX (if latter is legal but former is not) for zero-or-all-ones boolean reductions (which are detected based on sign bits). Differential Revision: https://reviews.llvm.org/D61398 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@360054 91177308-0d34-0410-b5e6-96231b3b80d8 Nikita Popov 1 year, 4 months ago
2 changed file(s) with 32 addition(s) and 118 deletion(s). Raw diff Collapse all Expand all
1866918669 SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
1867018670 SDValue N0 = N->getOperand(0);
1867118671 EVT VT = N0.getValueType();
18672 unsigned Opcode = N->getOpcode();
1867218673
1867318674 // VECREDUCE over 1-element vector is just an extract.
1867418675 if (VT.getVectorNumElements() == 1) {
1867918680 if (Res.getValueType() != N->getValueType(0))
1868018681 Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
1868118682 return Res;
18683 }
18684
18685 // On an boolean vector an and/or reduction is the same as a umin/umax
18686 // reduction. Convert them if the latter is legal while the former isn't.
18687 if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
18688 unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
18689 ? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX;
18690 if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
18691 TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
18692 DAG.ComputeNumSignBits(N0) == VT.getScalarSizeInBits())
18693 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
1868218694 }
1868318695
1868418696 return SDValue();
3434 ; CHECK-NEXT: shl v0.2s, v0.2s, #24
3535 ; CHECK-NEXT: sshr v0.2s, v0.2s, #24
3636 ; CHECK-NEXT: cmlt v0.2s, v0.2s, #0
37 ; CHECK-NEXT: mov w8, v0.s[1]
38 ; CHECK-NEXT: fmov w9, s0
39 ; CHECK-NEXT: and w8, w9, w8
37 ; CHECK-NEXT: uminp v0.2s, v0.2s, v0.2s
38 ; CHECK-NEXT: fmov w8, s0
4039 ; CHECK-NEXT: tst w8, #0x1
4140 ; CHECK-NEXT: csel w0, w0, w1, ne
4241 ; CHECK-NEXT: ret
5251 ; CHECK-NEXT: shl v0.4h, v0.4h, #8
5352 ; CHECK-NEXT: sshr v0.4h, v0.4h, #8
5453 ; CHECK-NEXT: cmlt v0.4h, v0.4h, #0
55 ; CHECK-NEXT: umov w10, v0.h[1]
56 ; CHECK-NEXT: umov w11, v0.h[0]
57 ; CHECK-NEXT: umov w9, v0.h[2]
58 ; CHECK-NEXT: and w10, w11, w10
59 ; CHECK-NEXT: umov w8, v0.h[3]
60 ; CHECK-NEXT: and w9, w10, w9
61 ; CHECK-NEXT: and w8, w9, w8
54 ; CHECK-NEXT: uminv h0, v0.4h
55 ; CHECK-NEXT: fmov w8, s0
6256 ; CHECK-NEXT: tst w8, #0x1
6357 ; CHECK-NEXT: csel w0, w0, w1, ne
6458 ; CHECK-NEXT: ret
7266 ; CHECK-LABEL: reduce_and_v8:
7367 ; CHECK: // %bb.0:
7468 ; CHECK-NEXT: cmlt v0.8b, v0.8b, #0
75 ; CHECK-NEXT: umov w14, v0.b[1]
76 ; CHECK-NEXT: umov w15, v0.b[0]
77 ; CHECK-NEXT: umov w13, v0.b[2]
78 ; CHECK-NEXT: and w14, w15, w14
79 ; CHECK-NEXT: umov w12, v0.b[3]
80 ; CHECK-NEXT: and w13, w14, w13
81 ; CHECK-NEXT: umov w11, v0.b[4]
82 ; CHECK-NEXT: and w12, w13, w12
83 ; CHECK-NEXT: umov w10, v0.b[5]
84 ; CHECK-NEXT: and w11, w12, w11
85 ; CHECK-NEXT: umov w9, v0.b[6]
86 ; CHECK-NEXT: and w10, w11, w10
87 ; CHECK-NEXT: umov w8, v0.b[7]
88 ; CHECK-NEXT: and w9, w10, w9
89 ; CHECK-NEXT: and w8, w9, w8
69 ; CHECK-NEXT: uminv b0, v0.8b
70 ; CHECK-NEXT: fmov w8, s0
9071 ; CHECK-NEXT: tst w8, #0x1
9172 ; CHECK-NEXT: csel w0, w0, w1, ne
9273 ; CHECK-NEXT: ret
10081 ; CHECK-LABEL: reduce_and_v16:
10182 ; CHECK: // %bb.0:
10283 ; CHECK-NEXT: cmlt v0.16b, v0.16b, #0
103 ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
104 ; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
105 ; CHECK-NEXT: umov w8, v0.b[1]
106 ; CHECK-NEXT: umov w9, v0.b[0]
107 ; CHECK-NEXT: and w8, w9, w8
108 ; CHECK-NEXT: umov w9, v0.b[2]
109 ; CHECK-NEXT: and w8, w8, w9
110 ; CHECK-NEXT: umov w9, v0.b[3]
111 ; CHECK-NEXT: and w8, w8, w9
112 ; CHECK-NEXT: umov w9, v0.b[4]
113 ; CHECK-NEXT: and w8, w8, w9
114 ; CHECK-NEXT: umov w9, v0.b[5]
115 ; CHECK-NEXT: and w8, w8, w9
116 ; CHECK-NEXT: umov w9, v0.b[6]
117 ; CHECK-NEXT: and w8, w8, w9
118 ; CHECK-NEXT: umov w9, v0.b[7]
119 ; CHECK-NEXT: and w8, w8, w9
84 ; CHECK-NEXT: uminv b0, v0.16b
85 ; CHECK-NEXT: fmov w8, s0
12086 ; CHECK-NEXT: tst w8, #0x1
12187 ; CHECK-NEXT: csel w0, w0, w1, ne
12288 ; CHECK-NEXT: ret
13298 ; CHECK-NEXT: cmlt v1.16b, v1.16b, #0
13399 ; CHECK-NEXT: cmlt v0.16b, v0.16b, #0
134100 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
135 ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
136 ; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
137 ; CHECK-NEXT: umov w8, v0.b[1]
138 ; CHECK-NEXT: umov w9, v0.b[0]
139 ; CHECK-NEXT: and w8, w9, w8
140 ; CHECK-NEXT: umov w9, v0.b[2]
141 ; CHECK-NEXT: and w8, w8, w9
142 ; CHECK-NEXT: umov w9, v0.b[3]
143 ; CHECK-NEXT: and w8, w8, w9
144 ; CHECK-NEXT: umov w9, v0.b[4]
145 ; CHECK-NEXT: and w8, w8, w9
146 ; CHECK-NEXT: umov w9, v0.b[5]
147 ; CHECK-NEXT: and w8, w8, w9
148 ; CHECK-NEXT: umov w9, v0.b[6]
149 ; CHECK-NEXT: and w8, w8, w9
150 ; CHECK-NEXT: umov w9, v0.b[7]
151 ; CHECK-NEXT: and w8, w8, w9
101 ; CHECK-NEXT: uminv b0, v0.16b
102 ; CHECK-NEXT: fmov w8, s0
152103 ; CHECK-NEXT: tst w8, #0x1
153104 ; CHECK-NEXT: csel w0, w0, w1, ne
154105 ; CHECK-NEXT: ret
178129 ; CHECK-NEXT: shl v0.2s, v0.2s, #24
179130 ; CHECK-NEXT: sshr v0.2s, v0.2s, #24
180131 ; CHECK-NEXT: cmlt v0.2s, v0.2s, #0
181 ; CHECK-NEXT: mov w8, v0.s[1]
182 ; CHECK-NEXT: fmov w9, s0
183 ; CHECK-NEXT: orr w8, w9, w8
132 ; CHECK-NEXT: umaxp v0.2s, v0.2s, v0.2s
133 ; CHECK-NEXT: fmov w8, s0
184134 ; CHECK-NEXT: tst w8, #0x1
185135 ; CHECK-NEXT: csel w0, w0, w1, ne
186136 ; CHECK-NEXT: ret
196146 ; CHECK-NEXT: shl v0.4h, v0.4h, #8
197147 ; CHECK-NEXT: sshr v0.4h, v0.4h, #8
198148 ; CHECK-NEXT: cmlt v0.4h, v0.4h, #0
199 ; CHECK-NEXT: umov w10, v0.h[1]
200 ; CHECK-NEXT: umov w11, v0.h[0]
201 ; CHECK-NEXT: umov w9, v0.h[2]
202 ; CHECK-NEXT: orr w10, w11, w10
203 ; CHECK-NEXT: umov w8, v0.h[3]
204 ; CHECK-NEXT: orr w9, w10, w9
205 ; CHECK-NEXT: orr w8, w9, w8
149 ; CHECK-NEXT: umaxv h0, v0.4h
150 ; CHECK-NEXT: fmov w8, s0
206151 ; CHECK-NEXT: tst w8, #0x1
207152 ; CHECK-NEXT: csel w0, w0, w1, ne
208153 ; CHECK-NEXT: ret
216161 ; CHECK-LABEL: reduce_or_v8:
217162 ; CHECK: // %bb.0:
218163 ; CHECK-NEXT: cmlt v0.8b, v0.8b, #0
219 ; CHECK-NEXT: umov w14, v0.b[1]
220 ; CHECK-NEXT: umov w15, v0.b[0]
221 ; CHECK-NEXT: umov w13, v0.b[2]
222 ; CHECK-NEXT: orr w14, w15, w14
223 ; CHECK-NEXT: umov w12, v0.b[3]
224 ; CHECK-NEXT: orr w13, w14, w13
225 ; CHECK-NEXT: umov w11, v0.b[4]
226 ; CHECK-NEXT: orr w12, w13, w12
227 ; CHECK-NEXT: umov w10, v0.b[5]
228 ; CHECK-NEXT: orr w11, w12, w11
229 ; CHECK-NEXT: umov w9, v0.b[6]
230 ; CHECK-NEXT: orr w10, w11, w10
231 ; CHECK-NEXT: umov w8, v0.b[7]
232 ; CHECK-NEXT: orr w9, w10, w9
233 ; CHECK-NEXT: orr w8, w9, w8
164 ; CHECK-NEXT: umaxv b0, v0.8b
165 ; CHECK-NEXT: fmov w8, s0
234166 ; CHECK-NEXT: tst w8, #0x1
235167 ; CHECK-NEXT: csel w0, w0, w1, ne
236168 ; CHECK-NEXT: ret
244176 ; CHECK-LABEL: reduce_or_v16:
245177 ; CHECK: // %bb.0:
246178 ; CHECK-NEXT: cmlt v0.16b, v0.16b, #0
247 ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
248 ; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
249 ; CHECK-NEXT: umov w8, v0.b[1]
250 ; CHECK-NEXT: umov w9, v0.b[0]
251 ; CHECK-NEXT: orr w8, w9, w8
252 ; CHECK-NEXT: umov w9, v0.b[2]
253 ; CHECK-NEXT: orr w8, w8, w9
254 ; CHECK-NEXT: umov w9, v0.b[3]
255 ; CHECK-NEXT: orr w8, w8, w9
256 ; CHECK-NEXT: umov w9, v0.b[4]
257 ; CHECK-NEXT: orr w8, w8, w9
258 ; CHECK-NEXT: umov w9, v0.b[5]
259 ; CHECK-NEXT: orr w8, w8, w9
260 ; CHECK-NEXT: umov w9, v0.b[6]
261 ; CHECK-NEXT: orr w8, w8, w9
262 ; CHECK-NEXT: umov w9, v0.b[7]
263 ; CHECK-NEXT: orr w8, w8, w9
179 ; CHECK-NEXT: umaxv b0, v0.16b
180 ; CHECK-NEXT: fmov w8, s0
264181 ; CHECK-NEXT: tst w8, #0x1
265182 ; CHECK-NEXT: csel w0, w0, w1, ne
266183 ; CHECK-NEXT: ret
276193 ; CHECK-NEXT: cmlt v1.16b, v1.16b, #0
277194 ; CHECK-NEXT: cmlt v0.16b, v0.16b, #0
278195 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
279 ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
280 ; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
281 ; CHECK-NEXT: umov w8, v0.b[1]
282 ; CHECK-NEXT: umov w9, v0.b[0]
283 ; CHECK-NEXT: orr w8, w9, w8
284 ; CHECK-NEXT: umov w9, v0.b[2]
285 ; CHECK-NEXT: orr w8, w8, w9
286 ; CHECK-NEXT: umov w9, v0.b[3]
287 ; CHECK-NEXT: orr w8, w8, w9
288 ; CHECK-NEXT: umov w9, v0.b[4]
289 ; CHECK-NEXT: orr w8, w8, w9
290 ; CHECK-NEXT: umov w9, v0.b[5]
291 ; CHECK-NEXT: orr w8, w8, w9
292 ; CHECK-NEXT: umov w9, v0.b[6]
293 ; CHECK-NEXT: orr w8, w8, w9
294 ; CHECK-NEXT: umov w9, v0.b[7]
295 ; CHECK-NEXT: orr w8, w8, w9
196 ; CHECK-NEXT: umaxv b0, v0.16b
197 ; CHECK-NEXT: fmov w8, s0
296198 ; CHECK-NEXT: tst w8, #0x1
297199 ; CHECK-NEXT: csel w0, w0, w1, ne
298200 ; CHECK-NEXT: ret