llvm.org GIT mirror llvm / 825b93b
[X86] Teach how to combine a vselect into a movss/movsd Add target specific rules for combining vselect dag nodes into movss/movsd when possible. If the vector type of the vselect dag node in input is either MVT::v4i13 or MVT::v4f32, then try to fold according to rules: 1) fold (vselect (build_vector (0, -1, -1, -1)), A, B) -> (movss A, B) 2) fold (vselect (build_vector (-1, 0, 0, 0)), A, B) -> (movss B, A) If the vector type of the vselect dag node in input is either MVT::v2i64 or MVT::v2f64 (and we have SSE2), then try to fold according to rules: 3) fold (vselect (build_vector (0, -1)), A, B) -> (movsd A, B) 4) fold (vselect (build_vector (-1, 0)), A, B) -> (movsd B, A) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@199683 91177308-0d34-0410-b5e6-96231b3b80d8 Andrea Di Biagio 6 years ago
7 changed file(s) with 355 addition(s) and 27 deletion(s). Raw diff Collapse all Expand all
1715417154 }
1715517155 }
1715617156
17157 // Try to fold this VSELECT into a MOVSS/MOVSD
17158 if (N->getOpcode() == ISD::VSELECT &&
17159 Cond.getOpcode() == ISD::BUILD_VECTOR && !DCI.isBeforeLegalize()) {
17160 if (VT == MVT::v4i32 || VT == MVT::v4f32 ||
17161 (Subtarget->hasSSE2() && (VT == MVT::v2i64 || VT == MVT::v2f64))) {
17162 bool CanFold = false;
17163 unsigned NumElems = Cond.getNumOperands();
17164 SDValue A = LHS;
17165 SDValue B = RHS;
17166
17167 if (isZero(Cond.getOperand(0))) {
17168 CanFold = true;
17169
17170 // fold (vselect <0,-1,-1,-1>, A, B) -> (movss A, B)
17171 // fold (vselect <0,-1> -> (movsd A, B)
17172 for (unsigned i = 1, e = NumElems; i != e && CanFold; ++i)
17173 CanFold = isAllOnes(Cond.getOperand(i));
17174 } else if (isAllOnes(Cond.getOperand(0))) {
17175 CanFold = true;
17176 std::swap(A, B);
17177
17178 // fold (vselect <-1,0,0,0>, A, B) -> (movss B, A)
17179 // fold (vselect <-1,0> -> (movsd B, A)
17180 for (unsigned i = 1, e = NumElems; i != e && CanFold; ++i)
17181 CanFold = isZero(Cond.getOperand(i));
17182 }
17183
17184 if (CanFold) {
17185 if (VT == MVT::v4i32 || VT == MVT::v4f32)
17186 return getTargetShuffleNode(X86ISD::MOVSS, DL, VT, A, B, DAG);
17187 return getTargetShuffleNode(X86ISD::MOVSD, DL, VT, A, B, DAG);
17188 }
17189 }
17190 }
17191
1715717192 // If we know that this node is legal then we know that it is going to be
1715817193 // matched by one of the SSE/AVX BLEND instructions. These instructions only
1715917194 // depend on the highest bit in each word. Try to use SimplifyDemandedBits
55 ;CHECK: vblendvps
66 ;CHECK: ret
77 define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
8 %vsel = select <4 x i1> false, i1 false>, <4 x float> %v1, <4 x float> %v2
8 %vsel = select <4 x i1> true, i1 false>, <4 x float> %v1, <4 x float> %v2
99 ret <4 x float> %vsel
1010 }
1111
1414 ;CHECK: vblendvps
1515 ;CHECK: ret
1616 define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) {
17 %vsel = select <4 x i1> false, i1 false>, <4 x i32> %v1, <4 x i32> %v2
17 %vsel = select <4 x i1> true, i1 false>, <4 x i32> %v1, <4 x i32> %v2
1818 ret <4 x i32> %vsel
1919 }
2020
2121
2222 ;CHECK-LABEL: vsel_double:
23 ;CHECK: vblendvpd
23 ;CHECK: vmovsd
2424 ;CHECK: ret
2525 define <2 x double> @vsel_double(<2 x double> %v1, <2 x double> %v2) {
2626 %vsel = select <2 x i1> , <2 x double> %v1, <2 x double> %v2
2929
3030
3131 ;CHECK-LABEL: vsel_i64:
32 ;CHECK: vblendvpd
32 ;CHECK: vmovsd
3333 ;CHECK: ret
3434 define <2 x i64> @vsel_i64(<2 x i64> %v1, <2 x i64> %v2) {
3535 %vsel = select <2 x i1> , <2 x i64> %v1, <2 x i64> %v2
0 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -mattr=+sse4.1 | FileCheck %s
11
22
3 ; In this test we check that sign-extend of the mask bit is performed by
4 ; shifting the needed bit to the MSB, and not using shl+sra.
3 ; Verify that we produce movss instead of blendvps when possible.
54
65 ;CHECK-LABEL: vsel_float:
7 ;CHECK: movl $-1
8 ;CHECK-NEXT: movd
9 ;CHECK-NEXT: blendvps
6 ;CHECK-NOT: blendvps
7 ;CHECK: movss
108 ;CHECK: ret
119 define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
1210 %vsel = select <4 x i1> , <4 x float> %v1, <4 x float> %v2
1412 }
1513
1614 ;CHECK-LABEL: vsel_4xi8:
17 ;CHECK: movl $-1
18 ;CHECK-NEXT: movd
19 ;CHECK-NEXT: blendvps
15 ;CHECK-NOT: blendvps
16 ;CHECK: movss
2017 ;CHECK: ret
2118 define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) {
2219 %vsel = select <4 x i1> , <4 x i8> %v1, <4 x i8> %v2
212212 ; CHECK-NOT: movsd
213213 ; CHECK: ret
214214
215
216 define <4 x float> @test3_add_ss(<4 x float> %a, <4 x float> %b) {
217 %1 = fadd <4 x float> %a, %b
218 %2 = select <4 x i1> , <4 x float> %a, <4 x float> %1
219 ret <4 x float> %2
220 }
221
222 ; CHECK-LABEL: test3_add_ss
223 ; SSE2: addss %xmm1, %xmm0
224 ; AVX: vaddss %xmm1, %xmm0, %xmm0
225 ; CHECK-NOT: movss
226 ; CHECK: ret
227
228
229 define <4 x float> @test3_sub_ss(<4 x float> %a, <4 x float> %b) {
230 %1 = fsub <4 x float> %a, %b
231 %2 = select <4 x i1> , <4 x float> %a, <4 x float> %1
232 ret <4 x float> %2
233 }
234
235 ; CHECK-LABEL: test3_sub_ss
236 ; SSE2: subss %xmm1, %xmm0
237 ; AVX: vsubss %xmm1, %xmm0, %xmm0
238 ; CHECK-NOT: movss
239 ; CHECK: ret
240
241
242 define <4 x float> @test3_mul_ss(<4 x float> %a, <4 x float> %b) {
243 %1 = fmul <4 x float> %a, %b
244 %2 = select <4 x i1> , <4 x float> %a, <4 x float> %1
245 ret <4 x float> %2
246 }
247
248 ; CHECK-LABEL: test3_mul_ss
249 ; SSE2: mulss %xmm1, %xmm0
250 ; AVX: vmulss %xmm1, %xmm0, %xmm0
251 ; CHECK-NOT: movss
252 ; CHECK: ret
253
254
255 define <4 x float> @test3_div_ss(<4 x float> %a, <4 x float> %b) {
256 %1 = fdiv <4 x float> %a, %b
257 %2 = select <4 x i1> , <4 x float> %a, <4 x float> %1
258 ret <4 x float> %2
259 }
260
261 ; CHECK-LABEL: test3_div_ss
262 ; SSE2: divss %xmm1, %xmm0
263 ; AVX: vdivss %xmm1, %xmm0, %xmm0
264 ; CHECK-NOT: movss
265 ; CHECK: ret
266
267
268 define <2 x double> @test3_add_sd(<2 x double> %a, <2 x double> %b) {
269 %1 = fadd <2 x double> %a, %b
270 %2 = select <2 x i1> , <2 x double> %a, <2 x double> %1
271 ret <2 x double> %2
272 }
273
274 ; CHECK-LABEL: test3_add_sd
275 ; SSE2: addsd %xmm1, %xmm0
276 ; AVX: vaddsd %xmm1, %xmm0, %xmm0
277 ; CHECK-NOT: movsd
278 ; CHECK: ret
279
280
281 define <2 x double> @test3_sub_sd(<2 x double> %a, <2 x double> %b) {
282 %1 = fsub <2 x double> %a, %b
283 %2 = select <2 x i1> , <2 x double> %a, <2 x double> %1
284 ret <2 x double> %2
285 }
286
287 ; CHECK-LABEL: test3_sub_sd
288 ; SSE2: subsd %xmm1, %xmm0
289 ; AVX: vsubsd %xmm1, %xmm0, %xmm0
290 ; CHECK-NOT: movsd
291 ; CHECK: ret
292
293
294 define <2 x double> @test3_mul_sd(<2 x double> %a, <2 x double> %b) {
295 %1 = fmul <2 x double> %a, %b
296 %2 = select <2 x i1> , <2 x double> %a, <2 x double> %1
297 ret <2 x double> %2
298 }
299
300 ; CHECK-LABEL: test3_mul_sd
301 ; SSE2: mulsd %xmm1, %xmm0
302 ; AVX: vmulsd %xmm1, %xmm0, %xmm0
303 ; CHECK-NOT: movsd
304 ; CHECK: ret
305
306
307 define <2 x double> @test3_div_sd(<2 x double> %a, <2 x double> %b) {
308 %1 = fdiv <2 x double> %a, %b
309 %2 = select <2 x i1> , <2 x double> %a, <2 x double> %1
310 ret <2 x double> %2
311 }
312
313 ; CHECK-LABEL: test3_div_sd
314 ; SSE2: divsd %xmm1, %xmm0
315 ; AVX: vdivsd %xmm1, %xmm0, %xmm0
316 ; CHECK-NOT: movsd
317 ; CHECK: ret
318
319
320 define <4 x float> @test4_add_ss(<4 x float> %a, <4 x float> %b) {
321 %1 = fadd <4 x float> %b, %a
322 %2 = select <4 x i1> , <4 x float> %b, <4 x float> %1
323 ret <4 x float> %2
324 }
325
326 ; CHECK-LABEL: test4_add_ss
327 ; SSE2: addss %xmm0, %xmm1
328 ; AVX: vaddss %xmm0, %xmm1, %xmm0
329 ; CHECK-NOT: movss
330 ; CHECK: ret
331
332
333 define <4 x float> @test4_sub_ss(<4 x float> %a, <4 x float> %b) {
334 %1 = fsub <4 x float> %b, %a
335 %2 = select <4 x i1> , <4 x float> %b, <4 x float> %1
336 ret <4 x float> %2
337 }
338
339 ; CHECK-LABEL: test4_sub_ss
340 ; SSE2: subss %xmm0, %xmm1
341 ; AVX: vsubss %xmm0, %xmm1, %xmm0
342 ; CHECK-NOT: movss
343 ; CHECK: ret
344
345
346 define <4 x float> @test4_mul_ss(<4 x float> %a, <4 x float> %b) {
347 %1 = fmul <4 x float> %b, %a
348 %2 = select <4 x i1> , <4 x float> %b, <4 x float> %1
349 ret <4 x float> %2
350 }
351
352 ; CHECK-LABEL: test4_mul_ss
353 ; SSE2: mulss %xmm0, %xmm1
354 ; AVX: vmulss %xmm0, %xmm1, %xmm0
355 ; CHECK-NOT: movss
356 ; CHECK: ret
357
358
359 define <4 x float> @test4_div_ss(<4 x float> %a, <4 x float> %b) {
360 %1 = fdiv <4 x float> %b, %a
361 %2 = select <4 x i1> , <4 x float> %b, <4 x float> %1
362 ret <4 x float> %2
363 }
364
365 ; CHECK-LABEL: test4_div_ss
366 ; SSE2: divss %xmm0, %xmm1
367 ; AVX: vdivss %xmm0, %xmm1, %xmm0
368 ; CHECK-NOT: movss
369 ; CHECK: ret
370
371
372 define <2 x double> @test4_add_sd(<2 x double> %a, <2 x double> %b) {
373 %1 = fadd <2 x double> %b, %a
374 %2 = select <2 x i1> , <2 x double> %b, <2 x double> %1
375 ret <2 x double> %2
376 }
377
378 ; CHECK-LABEL: test4_add_sd
379 ; SSE2: addsd %xmm0, %xmm1
380 ; AVX: vaddsd %xmm0, %xmm1, %xmm0
381 ; CHECK-NOT: movsd
382 ; CHECK: ret
383
384
385 define <2 x double> @test4_sub_sd(<2 x double> %a, <2 x double> %b) {
386 %1 = fsub <2 x double> %b, %a
387 %2 = select <2 x i1> , <2 x double> %b, <2 x double> %1
388 ret <2 x double> %2
389 }
390
391 ; CHECK-LABEL: test4_sub_sd
392 ; SSE2: subsd %xmm0, %xmm1
393 ; AVX: vsubsd %xmm0, %xmm1, %xmm0
394 ; CHECK-NOT: movsd
395 ; CHECK: ret
396
397
398 define <2 x double> @test4_mul_sd(<2 x double> %a, <2 x double> %b) {
399 %1 = fmul <2 x double> %b, %a
400 %2 = select <2 x i1> , <2 x double> %b, <2 x double> %1
401 ret <2 x double> %2
402 }
403
404 ; CHECK-LABEL: test4_mul_sd
405 ; SSE2: mulsd %xmm0, %xmm1
406 ; AVX: vmulsd %xmm0, %xmm1, %xmm0
407 ; CHECK-NOT: movsd
408 ; CHECK: ret
409
410
411 define <2 x double> @test4_div_sd(<2 x double> %a, <2 x double> %b) {
412 %1 = fdiv <2 x double> %b, %a
413 %2 = select <2 x i1> , <2 x double> %b, <2 x double> %1
414 ret <2 x double> %2
415 }
416
417 ; CHECK-LABEL: test4_div_sd
418 ; SSE2: divsd %xmm0, %xmm1
419 ; AVX: vdivsd %xmm0, %xmm1, %xmm0
420 ; CHECK-NOT: movsd
421 ; CHECK: ret
422
0 ; RUN: llc < %s -march=x86 -mcpu=yonah -mattr=+sse2,-sse4.1 | FileCheck %s
11
2 ; CHECK: vsel_float
3 ; CHECK: xorps
2 ; CHECK-LABEL: vsel_float
3 ; CHECK-NOT: xorps
44 ; CHECK: movss
5 ; CHECK: orps
5 ; CHECK-NOT: orps
66 ; CHECK: ret
77 define void@vsel_float(<4 x float>* %v1, <4 x float>* %v2) {
88 %A = load <4 x float>* %v1
99 %B = load <4 x float>* %v2
10 %vsel = select <4 x i1> true, i1 false, i1 false, i1 false>, <4 x float> %A, <4 x float> %B
10 %vsel = select <4 x i1> false, i1 true, i1 true, i1 true>, <4 x float> %A, <4 x float> %B
1111 store <4 x float > %vsel, <4 x float>* %v1
1212 ret void
1313 }
1414
15 ; CHECK: vsel_i32
16 ; CHECK: xorps
15 ; CHECK-LABEL: vsel_i32
16 ; CHECK-NOT: xorps
1717 ; CHECK: movss
18 ; CHECK: orps
18 ; CHECK-NOT: orps
1919 ; CHECK: ret
2020 define void@vsel_i32(<4 x i32>* %v1, <4 x i32>* %v2) {
2121 %A = load <4 x i32>* %v1
2626 }
2727
2828 ; Without forcing instructions, fall back to the preferred PS domain.
29 ; CHECK: vsel_i64
29 ; CHECK-LABEL: vsel_i64
3030 ; CHECK: andnps
3131 ; CHECK: orps
3232 ; CHECK: ret
4040 }
4141
4242 ; Without forcing instructions, fall back to the preferred PS domain.
43 ; CHECK: vsel_double
43 ; CHECK-LABEL: vsel_double
4444 ; CHECK: andnps
4545 ; CHECK: orps
4646 ; CHECK: ret
33 ;CHECK: blendvps
44 ;CHECK: ret
55 define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
6 %vsel = select <4 x i1> false, i1 false>, <4 x float> %v1, <4 x float> %v2
6 %vsel = select <4 x i1> true, i1 true>, <4 x float> %v1, <4 x float> %v2
77 ret <4 x float> %vsel
88 }
99
1212 ;CHECK: blendvps
1313 ;CHECK: ret
1414 define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) {
15 %vsel = select <4 x i1> false, i1 false, i1 false>, <4 x i8> %v1, <4 x i8> %v2
15 %vsel = select <4 x i1> true, i1 false, i1 false>, <4 x i8> %v1, <4 x i8> %v2
1616 ret <4 x i8> %vsel
1717 }
1818
2020 ;CHECK: blendvps
2121 ;CHECK: ret
2222 define <4 x i16> @vsel_4xi16(<4 x i16> %v1, <4 x i16> %v2) {
23 %vsel = select <4 x i1> false, i1 false>, <4 x i16> %v1, <4 x i16> %v2
23 %vsel = select <4 x i1> true, i1 true>, <4 x i16> %v1, <4 x i16> %v2
2424 ret <4 x i16> %vsel
2525 }
2626
2929 ;CHECK: blendvps
3030 ;CHECK: ret
3131 define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) {
32 %vsel = select <4 x i1> false, i1 false, i1 false>, <4 x i32> %v1, <4 x i32> %v2
32 %vsel = select <4 x i1> true, i1 false, i1 false>, <4 x i32> %v1, <4 x i32> %v2
3333 ret <4 x i32> %vsel
3434 }
3535
3636
3737 ;CHECK-LABEL: vsel_double:
38 ;CHECK: blendvpd
38 ;CHECK: movsd
3939 ;CHECK: ret
4040 define <4 x double> @vsel_double(<4 x double> %v1, <4 x double> %v2) {
4141 %vsel = select <4 x i1> , <4 x double> %v1, <4 x double> %v2
4444
4545
4646 ;CHECK-LABEL: vsel_i64:
47 ;CHECK: blendvpd
47 ;CHECK: movsd
4848 ;CHECK: ret
4949 define <4 x i64> @vsel_i64(<4 x i64> %v1, <4 x i64> %v2) {
5050 %vsel = select <4 x i1> , <4 x i64> %v1, <4 x i64> %v2
173173 ; CHECK-NOT: xorps
174174 ; CHECK: ret
175175
176 define <4 x float> @test18(<4 x float> %a, <4 x float> %b) {
177 %1 = select <4 x i1> , <4 x float> %a, <4 x float> %b
178 ret <4 x float> %1
179 }
180 ; CHECK-LABEL: test18
181 ; CHECK-NOT: psllw
182 ; CHECK-NOT: psraw
183 ; CHECK-NOT: xorps
184 ; CHECK: movss
185 ; CHECK: ret
186
187 define <4 x i32> @test19(<4 x i32> %a, <4 x i32> %b) {
188 %1 = select <4 x i1> , <4 x i32> %a, <4 x i32> %b
189 ret <4 x i32> %1
190 }
191 ; CHECK-LABEL: test19
192 ; CHECK-NOT: psllw
193 ; CHECK-NOT: psraw
194 ; CHECK-NOT: xorps
195 ; CHECK: movss
196 ; CHECK: ret
197
198 define <2 x double> @test20(<2 x double> %a, <2 x double> %b) {
199 %1 = select <2 x i1> , <2 x double> %a, <2 x double> %b
200 ret <2 x double> %1
201 }
202 ; CHECK-LABEL: test20
203 ; CHECK-NOT: psllw
204 ; CHECK-NOT: psraw
205 ; CHECK-NOT: xorps
206 ; CHECK: movsd
207 ; CHECK: ret
208
209 define <2 x i64> @test21(<2 x i64> %a, <2 x i64> %b) {
210 %1 = select <2 x i1> , <2 x i64> %a, <2 x i64> %b
211 ret <2 x i64> %1
212 }
213 ; CHECK-LABEL: test21
214 ; CHECK-NOT: psllw
215 ; CHECK-NOT: psraw
216 ; CHECK-NOT: xorps
217 ; CHECK: movsd
218 ; CHECK: ret
219
220 define <4 x float> @test22(<4 x float> %a, <4 x float> %b) {
221 %1 = select <4 x i1> , <4 x float> %a, <4 x float> %b
222 ret <4 x float> %1
223 }
224 ; CHECK-LABEL: test22
225 ; CHECK-NOT: psllw
226 ; CHECK-NOT: psraw
227 ; CHECK-NOT: xorps
228 ; CHECK: movss
229 ; CHECK: ret
230
231 define <4 x i32> @test23(<4 x i32> %a, <4 x i32> %b) {
232 %1 = select <4 x i1> , <4 x i32> %a, <4 x i32> %b
233 ret <4 x i32> %1
234 }
235 ; CHECK-LABEL: test23
236 ; CHECK-NOT: psllw
237 ; CHECK-NOT: psraw
238 ; CHECK-NOT: xorps
239 ; CHECK: movss
240 ; CHECK: ret
241
242 define <2 x double> @test24(<2 x double> %a, <2 x double> %b) {
243 %1 = select <2 x i1> , <2 x double> %a, <2 x double> %b
244 ret <2 x double> %1
245 }
246 ; CHECK-LABEL: test24
247 ; CHECK-NOT: psllw
248 ; CHECK-NOT: psraw
249 ; CHECK-NOT: xorps
250 ; CHECK: movsd
251 ; CHECK: ret
252
253 define <2 x i64> @test25(<2 x i64> %a, <2 x i64> %b) {
254 %1 = select <2 x i1> , <2 x i64> %a, <2 x i64> %b
255 ret <2 x i64> %1
256 }
257 ; CHECK-LABEL: test25
258 ; CHECK-NOT: psllw
259 ; CHECK-NOT: psraw
260 ; CHECK-NOT: xorps
261 ; CHECK: movsd
262 ; CHECK: ret
263