llvm.org GIT mirror llvm / 8596002
[X86] Custom legalize (v2i32 (setcc (v2f32))) so that we don't end up with a (v4i1 (setcc (v4f32))) Undef VLX, getSetCCResultType returns v2i1/v4i1 for v2f32/v4f32 so default type legalization will end up changing the setcc result type back to vXi1 if it had been extended. The resulting extend gets messed up further by type legalization and is difficult to recombine back to (v4i32 (setcc (v4f32))) after legalization. I went ahead and enabled this for SSE2 and later since its always the result we want and this helps type legalization get there in less steps. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@324822 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 2 years ago
4 changed file(s) with 38 addition(s) and 15 deletion(s). Raw diff Collapse all Expand all
797797 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
798798 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
799799 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
800
801 // Provide custom widening for v2f32 setcc. This is really for VLX when
802 // setcc result type returns v2i1/v4i1 vector for v2f32/v4f32 leading to
803 // type legalization changing the result type to v4i1 during widening.
804 // It works fine for SSE2 and is probably faster so no need to qualify with
805 // VLX support.
806 setOperationAction(ISD::SETCC, MVT::v2i32, Custom);
800807
801808 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
802809 setOperationAction(ISD::SETCC, VT, Custom);
1792617933 "Expected operands with same type!");
1792717934 assert(VT.getVectorNumElements() == VTOp0.getVectorNumElements() &&
1792817935 "Invalid number of packed elements for source and destination!");
17936
17937 // This is being called by type legalization because v2i32 is marked custom
17938 // for result type legalization for v2f32.
17939 if (VTOp0 == MVT::v2i32)
17940 return SDValue();
1792917941
1793017942 if (VT.is128BitVector() && VTOp0.is256BitVector()) {
1793117943 // On non-AVX512 targets, a vector of MVT::i1 is promoted by the type
2473524747 SDValue Res = DAG.getNode(X86ISD::AVG, dl, RegVT, InVec0, InVec1);
2473624748 if (!ExperimentalVectorWideningLegalization)
2473724749 Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InVT, Res,
24750 DAG.getIntPtrConstant(0, dl));
24751 Results.push_back(Res);
24752 return;
24753 }
24754 case ISD::SETCC: {
24755 // Widen v2i32 (setcc v2f32). This is really needed for AVX512VL when
24756 // setCC result type is v2i1 because type legalzation will end up with
24757 // a v4i1 setcc plus an extend.
24758 assert(N->getValueType(0) == MVT::v2i32 && "Unexpected type");
24759 if (N->getOperand(0).getValueType() != MVT::v2f32)
24760 return;
24761 SDValue UNDEF = DAG.getUNDEF(MVT::v2f32);
24762 SDValue LHS = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32,
24763 N->getOperand(0), UNDEF);
24764 SDValue RHS = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32,
24765 N->getOperand(1), UNDEF);
24766 SDValue Res = DAG.getNode(ISD::SETCC, dl, MVT::v4i32, LHS, RHS,
24767 N->getOperand(2));
24768 if (!ExperimentalVectorWideningLegalization)
24769 Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res,
2473824770 DAG.getIntPtrConstant(0, dl));
2473924771 Results.push_back(Res);
2474024772 return;
17061706 ; VLDQ-LABEL: sbto2f32:
17071707 ; VLDQ: # %bb.0:
17081708 ; VLDQ-NEXT: vxorps %xmm1, %xmm1, %xmm1
1709 ; VLDQ-NEXT: vcmpltps %xmm0, %xmm1, %k0
1710 ; VLDQ-NEXT: vpmovm2q %k0, %xmm0
1711 ; VLDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
1709 ; VLDQ-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
17121710 ; VLDQ-NEXT: vcvtdq2ps %xmm0, %xmm0
17131711 ; VLDQ-NEXT: retq
17141712 ;
17151713 ; VLNODQ-LABEL: sbto2f32:
17161714 ; VLNODQ: # %bb.0:
17171715 ; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
1718 ; VLNODQ-NEXT: vcmpltps %xmm0, %xmm1, %k1
1719 ; VLNODQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
1720 ; VLNODQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
1721 ; VLNODQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
1716 ; VLNODQ-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
17221717 ; VLNODQ-NEXT: vcvtdq2ps %xmm0, %xmm0
17231718 ; VLNODQ-NEXT: retq
17241719 %cmpres = fcmp ogt <2 x float> %a, zeroinitializer
25352535 ; GENERIC-LABEL: sbto2f32:
25362536 ; GENERIC: # %bb.0:
25372537 ; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
2538 ; GENERIC-NEXT: vcmpltps %xmm0, %xmm1, %k0 # sched: [3:1.00]
2539 ; GENERIC-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.33]
2540 ; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:0.50]
2538 ; GENERIC-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
25412539 ; GENERIC-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
25422540 ; GENERIC-NEXT: retq # sched: [1:1.00]
25432541 ;
25442542 ; SKX-LABEL: sbto2f32:
25452543 ; SKX: # %bb.0:
25462544 ; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
2547 ; SKX-NEXT: vcmpltps %xmm0, %xmm1, %k0 # sched: [3:1.00]
2548 ; SKX-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.25]
2549 ; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:1.00]
2545 ; SKX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
25502546 ; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.33]
25512547 ; SKX-NEXT: retq # sched: [7:1.00]
25522548 %cmpres = fcmp ogt <2 x float> %a, zeroinitializer
907907 ; AVX512-LABEL: test46:
908908 ; AVX512: ## %bb.0:
909909 ; AVX512-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
910 ; AVX512-NEXT: vpmovsxdq %xmm0, %xmm0
911 ; AVX512-NEXT: vpsrlq $63, %xmm0, %xmm0
910 ; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
911 ; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
912912 ; AVX512-NEXT: retq
913913 ;
914914 ; SKX-LABEL: test46: