llvm.org GIT mirror llvm / f5c1edb
[X86] Check Subtarget.hasSSE3() before calling shouldUseHorizontalOp and emitting X86ISD::FHADD in LowerUINT_TO_FP_i64. This was a regression from r375341. Fixes PR43729. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@375381 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 1 year, 1 month ago
2 changed file(s) with 105 addition(s) and 1 deletion(s). Raw diff Collapse all Expand all
1859018590 SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1);
1859118591 SDValue Result;
1859218592
18593 if (shouldUseHorizontalOp(true, DAG, Subtarget)) {
18593 if (Subtarget.hasSSE3() && shouldUseHorizontalOp(true, DAG, Subtarget)) {
1859418594 Result = DAG.getNode(X86ISD::FHADD, dl, MVT::v2f64, Sub, Sub);
1859518595 } else {
1859618596 SDValue Shuffle = DAG.getVectorShuffle(MVT::v2f64, dl, Sub, Sub, {1,-1});
669669 ret double %r
670670 }
671671
672 define double @u64_to_d_optsize(i64 %a) nounwind optsize {
673 ; AVX512DQVL_32-LABEL: u64_to_d_optsize:
674 ; AVX512DQVL_32: # %bb.0:
675 ; AVX512DQVL_32-NEXT: pushl %ebp
676 ; AVX512DQVL_32-NEXT: movl %esp, %ebp
677 ; AVX512DQVL_32-NEXT: andl $-8, %esp
678 ; AVX512DQVL_32-NEXT: subl $8, %esp
679 ; AVX512DQVL_32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
680 ; AVX512DQVL_32-NEXT: vcvtuqq2pd %ymm0, %ymm0
681 ; AVX512DQVL_32-NEXT: vmovlps %xmm0, (%esp)
682 ; AVX512DQVL_32-NEXT: fldl (%esp)
683 ; AVX512DQVL_32-NEXT: movl %ebp, %esp
684 ; AVX512DQVL_32-NEXT: popl %ebp
685 ; AVX512DQVL_32-NEXT: vzeroupper
686 ; AVX512DQVL_32-NEXT: retl
687 ;
688 ; AVX512_64-LABEL: u64_to_d_optsize:
689 ; AVX512_64: # %bb.0:
690 ; AVX512_64-NEXT: vcvtusi2sd %rdi, %xmm0, %xmm0
691 ; AVX512_64-NEXT: retq
692 ;
693 ; AVX512DQ_32-LABEL: u64_to_d_optsize:
694 ; AVX512DQ_32: # %bb.0:
695 ; AVX512DQ_32-NEXT: pushl %ebp
696 ; AVX512DQ_32-NEXT: movl %esp, %ebp
697 ; AVX512DQ_32-NEXT: andl $-8, %esp
698 ; AVX512DQ_32-NEXT: subl $8, %esp
699 ; AVX512DQ_32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
700 ; AVX512DQ_32-NEXT: vcvtuqq2pd %zmm0, %zmm0
701 ; AVX512DQ_32-NEXT: vmovlps %xmm0, (%esp)
702 ; AVX512DQ_32-NEXT: fldl (%esp)
703 ; AVX512DQ_32-NEXT: movl %ebp, %esp
704 ; AVX512DQ_32-NEXT: popl %ebp
705 ; AVX512DQ_32-NEXT: vzeroupper
706 ; AVX512DQ_32-NEXT: retl
707 ;
708 ; AVX512F_32-LABEL: u64_to_d_optsize:
709 ; AVX512F_32: # %bb.0:
710 ; AVX512F_32-NEXT: pushl %ebp
711 ; AVX512F_32-NEXT: movl %esp, %ebp
712 ; AVX512F_32-NEXT: andl $-8, %esp
713 ; AVX512F_32-NEXT: subl $8, %esp
714 ; AVX512F_32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
715 ; AVX512F_32-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
716 ; AVX512F_32-NEXT: vsubpd {{\.LCPI.*}}, %xmm0, %xmm0
717 ; AVX512F_32-NEXT: vhaddpd %xmm0, %xmm0, %xmm0
718 ; AVX512F_32-NEXT: vmovlpd %xmm0, (%esp)
719 ; AVX512F_32-NEXT: fldl (%esp)
720 ; AVX512F_32-NEXT: movl %ebp, %esp
721 ; AVX512F_32-NEXT: popl %ebp
722 ; AVX512F_32-NEXT: retl
723 ;
724 ; SSE2_32-LABEL: u64_to_d_optsize:
725 ; SSE2_32: # %bb.0:
726 ; SSE2_32-NEXT: pushl %ebp
727 ; SSE2_32-NEXT: movl %esp, %ebp
728 ; SSE2_32-NEXT: andl $-8, %esp
729 ; SSE2_32-NEXT: subl $8, %esp
730 ; SSE2_32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
731 ; SSE2_32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
732 ; SSE2_32-NEXT: subpd {{\.LCPI.*}}, %xmm0
733 ; SSE2_32-NEXT: movapd %xmm0, %xmm1
734 ; SSE2_32-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
735 ; SSE2_32-NEXT: addsd %xmm0, %xmm1
736 ; SSE2_32-NEXT: movsd %xmm1, (%esp)
737 ; SSE2_32-NEXT: fldl (%esp)
738 ; SSE2_32-NEXT: movl %ebp, %esp
739 ; SSE2_32-NEXT: popl %ebp
740 ; SSE2_32-NEXT: retl
741 ;
742 ; SSE2_64-LABEL: u64_to_d_optsize:
743 ; SSE2_64: # %bb.0:
744 ; SSE2_64-NEXT: movq %rdi, %xmm1
745 ; SSE2_64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
746 ; SSE2_64-NEXT: subpd {{.*}}(%rip), %xmm1
747 ; SSE2_64-NEXT: movapd %xmm1, %xmm0
748 ; SSE2_64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
749 ; SSE2_64-NEXT: addsd %xmm1, %xmm0
750 ; SSE2_64-NEXT: retq
751 ;
752 ; X87-LABEL: u64_to_d_optsize:
753 ; X87: # %bb.0:
754 ; X87-NEXT: pushl %ebp
755 ; X87-NEXT: movl %esp, %ebp
756 ; X87-NEXT: andl $-8, %esp
757 ; X87-NEXT: subl $16, %esp
758 ; X87-NEXT: movl 8(%ebp), %eax
759 ; X87-NEXT: movl 12(%ebp), %ecx
760 ; X87-NEXT: movl %ecx, {{[0-9]+}}(%esp)
761 ; X87-NEXT: movl %eax, (%esp)
762 ; X87-NEXT: xorl %eax, %eax
763 ; X87-NEXT: testl %ecx, %ecx
764 ; X87-NEXT: setns %al
765 ; X87-NEXT: fildll (%esp)
766 ; X87-NEXT: fadds {{\.LCPI.*}}(,%eax,4)
767 ; X87-NEXT: fstpl {{[0-9]+}}(%esp)
768 ; X87-NEXT: fldl {{[0-9]+}}(%esp)
769 ; X87-NEXT: movl %ebp, %esp
770 ; X87-NEXT: popl %ebp
771 ; X87-NEXT: retl
772 %r = uitofp i64 %a to double
773 ret double %r
774 }
775
672776 define double @s64_to_d(i64 %a) nounwind {
673777 ; AVX512DQVL_32-LABEL: s64_to_d:
674778 ; AVX512DQVL_32: # %bb.0: