llvm.org GIT mirror llvm / 3c8bef9
[X86] Promote fp_to_sint v16f32->v16i16/v16i8 to avoid scalarization. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@319266 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 2 years ago
3 changed file(s) with 8 addition(s) and 97 deletion(s). Raw diff Collapse all Expand all
11721172 }
11731173
11741174 setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
1175 setOperationAction(ISD::FP_TO_SINT, MVT::v16i16, Promote);
1176 setOperationAction(ISD::FP_TO_SINT, MVT::v16i8, Promote);
11751177 setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
11761178 setOperationAction(ISD::FP_TO_UINT, MVT::v16i8, Promote);
11771179 setOperationAction(ISD::FP_TO_UINT, MVT::v16i16, Promote);
223223 ; SSE42: cost of 7 {{.*}} %V16I16 = fptosi
224224 ; AVX1: cost of 3 {{.*}} %V16I16 = fptosi
225225 ; AVX2: cost of 3 {{.*}} %V16I16 = fptosi
226 ; AVX512: cost of 48 {{.*}} %V16I16 = fptosi
226 ; AVX512: cost of 1 {{.*}} %V16I16 = fptosi
227227 %V16I16 = fptosi <16 x float> undef to <16 x i16>
228228
229229 ret i32 undef
253253 ; SSE42: cost of 7 {{.*}} %V16I8 = fptosi
254254 ; AVX1: cost of 15 {{.*}} %V16I8 = fptosi
255255 ; AVX2: cost of 15 {{.*}} %V16I8 = fptosi
256 ; AVX512: cost of 48 {{.*}} %V16I8 = fptosi
256 ; AVX512: cost of 1 {{.*}} %V16I8 = fptosi
257257 %V16I8 = fptosi <16 x float> undef to <16 x i8>
258258
259259 ret i32 undef
432432 define <16 x i8> @f32to16sc(<16 x float> %f) {
433433 ; ALL-LABEL: f32to16sc:
434434 ; ALL: # BB#0:
435 ; ALL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
436 ; ALL-NEXT: vcvttss2si %xmm1, %eax
437 ; ALL-NEXT: vcvttss2si %xmm0, %ecx
438 ; ALL-NEXT: vmovd %ecx, %xmm1
439 ; ALL-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
440 ; ALL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
441 ; ALL-NEXT: vcvttss2si %xmm2, %eax
442 ; ALL-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
443 ; ALL-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
444 ; ALL-NEXT: vcvttss2si %xmm2, %eax
445 ; ALL-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
446 ; ALL-NEXT: vextractf128 $1, %ymm0, %xmm2
447 ; ALL-NEXT: vcvttss2si %xmm2, %eax
448 ; ALL-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
449 ; ALL-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3]
450 ; ALL-NEXT: vcvttss2si %xmm3, %eax
451 ; ALL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
452 ; ALL-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0]
453 ; ALL-NEXT: vcvttss2si %xmm3, %eax
454 ; ALL-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
455 ; ALL-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3]
456 ; ALL-NEXT: vcvttss2si %xmm2, %eax
457 ; ALL-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
458 ; ALL-NEXT: vextractf32x4 $2, %zmm0, %xmm2
459 ; ALL-NEXT: vcvttss2si %xmm2, %eax
460 ; ALL-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
461 ; ALL-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3]
462 ; ALL-NEXT: vcvttss2si %xmm3, %eax
463 ; ALL-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
464 ; ALL-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0]
465 ; ALL-NEXT: vcvttss2si %xmm3, %eax
466 ; ALL-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
467 ; ALL-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3]
468 ; ALL-NEXT: vcvttss2si %xmm2, %eax
469 ; ALL-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
470 ; ALL-NEXT: vextractf32x4 $3, %zmm0, %xmm0
471 ; ALL-NEXT: vcvttss2si %xmm0, %eax
472 ; ALL-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
473 ; ALL-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
474 ; ALL-NEXT: vcvttss2si %xmm2, %eax
475 ; ALL-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
476 ; ALL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
477 ; ALL-NEXT: vcvttss2si %xmm2, %eax
478 ; ALL-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
479 ; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
480 ; ALL-NEXT: vcvttss2si %xmm0, %eax
481 ; ALL-NEXT: vpinsrb $15, %eax, %xmm1, %xmm0
435 ; ALL-NEXT: vcvttps2dq %zmm0, %zmm0
436 ; ALL-NEXT: vpmovdb %zmm0, %xmm0
482437 ; ALL-NEXT: vzeroupper
483438 ; ALL-NEXT: retq
484439 %res = fptosi <16 x float> %f to <16 x i8>
488443 define <16 x i16> @f32to16ss(<16 x float> %f) {
489444 ; ALL-LABEL: f32to16ss:
490445 ; ALL: # BB#0:
491 ; ALL-NEXT: vextractf32x4 $2, %zmm0, %xmm1
492 ; ALL-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
493 ; ALL-NEXT: vcvttss2si %xmm2, %eax
494 ; ALL-NEXT: vcvttss2si %xmm1, %ecx
495 ; ALL-NEXT: vmovd %ecx, %xmm2
496 ; ALL-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
497 ; ALL-NEXT: vpermilpd {{.*#+}} xmm3 = xmm1[1,0]
498 ; ALL-NEXT: vcvttss2si %xmm3, %eax
499 ; ALL-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
500 ; ALL-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3]
501 ; ALL-NEXT: vcvttss2si %xmm1, %eax
502 ; ALL-NEXT: vpinsrw $3, %eax, %xmm2, %xmm1
503 ; ALL-NEXT: vextractf32x4 $3, %zmm0, %xmm2
504 ; ALL-NEXT: vcvttss2si %xmm2, %eax
505 ; ALL-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
506 ; ALL-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3]
507 ; ALL-NEXT: vcvttss2si %xmm3, %eax
508 ; ALL-NEXT: vpinsrw $5, %eax, %xmm1, %xmm1
509 ; ALL-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0]
510 ; ALL-NEXT: vcvttss2si %xmm3, %eax
511 ; ALL-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1
512 ; ALL-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3]
513 ; ALL-NEXT: vcvttss2si %xmm2, %eax
514 ; ALL-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
515 ; ALL-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
516 ; ALL-NEXT: vcvttss2si %xmm2, %eax
517 ; ALL-NEXT: vcvttss2si %xmm0, %ecx
518 ; ALL-NEXT: vmovd %ecx, %xmm2
519 ; ALL-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
520 ; ALL-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
521 ; ALL-NEXT: vcvttss2si %xmm3, %eax
522 ; ALL-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
523 ; ALL-NEXT: vpermilps {{.*#+}} xmm3 = xmm0[3,1,2,3]
524 ; ALL-NEXT: vcvttss2si %xmm3, %eax
525 ; ALL-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
526 ; ALL-NEXT: vextractf128 $1, %ymm0, %xmm0
527 ; ALL-NEXT: vcvttss2si %xmm0, %eax
528 ; ALL-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
529 ; ALL-NEXT: vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3]
530 ; ALL-NEXT: vcvttss2si %xmm3, %eax
531 ; ALL-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
532 ; ALL-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
533 ; ALL-NEXT: vcvttss2si %xmm3, %eax
534 ; ALL-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2
535 ; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
536 ; ALL-NEXT: vcvttss2si %xmm0, %eax
537 ; ALL-NEXT: vpinsrw $7, %eax, %xmm2, %xmm0
538 ; ALL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
446 ; ALL-NEXT: vcvttps2dq %zmm0, %zmm0
447 ; ALL-NEXT: vpmovdw %zmm0, %ymm0
539448 ; ALL-NEXT: retq
540449 %res = fptosi <16 x float> %f to <16 x i16>
541450 ret <16 x i16> %res