llvm.org GIT mirror llvm / d717471
[X86] Perform VSELECT DAG combines also before DAG type legalization. If the DAG already has only legal types, then the second round of DAG combines is skipped. In this case VSELECT+SETCC patterns that match a more efficient instruction (e.g. min/max) are never recognized. This fix allows VSELECT+SETCC combines if the types are already legal before DAG type legalization. Reviewer: Nadav git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190105 91177308-0d34-0410-b5e6-96231b3b80d8 Juergen Ributzka 6 years ago
2 changed file(s) with 12 addition(s) and 12 deletion(s). Raw diff Collapse all Expand all
1641816418 SDValue LHS = N->getOperand(1);
1641916419 SDValue RHS = N->getOperand(2);
1642016420 EVT VT = LHS.getValueType();
16421 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1642116422
1642216423 // If we have SSE[12] support, try to form min/max nodes. SSE min/max
1642316424 // instructions match the semantics of the common C idiom x
1642416425 // x<=y?x:y, because of how they handle negative zero (which can be
1642516426 // ignored in unsafe-math mode).
1642616427 if (Cond.getOpcode() == ISD::SETCC && VT.isFloatingPoint() &&
16427 VT != MVT::f80 && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
16428 VT != MVT::f80 && TLI.isTypeLegal(VT) &&
1642816429 (Subtarget->hasSSE2() ||
1642916430 (Subtarget->hasSSE1() && VT.getScalarType() == MVT::f32))) {
1643016431 ISD::CondCode CC = cast(Cond.getOperand(2))->get();
1657716578 DCI.AddToWorklist(Cond.getNode());
1657816579 return DAG.getNode(N->getOpcode(), DL, OpVT, Cond, LHS, RHS);
1657916580 }
16580 else
16581 return SDValue();
1658216581 }
1658316582 // If this is a select between two integer constants, try to do some
1658416583 // optimizations.
1670416703 }
1670516704 }
1670616705
16706 // Early exit check
16707 if (!TLI.isTypeLegal(VT))
16708 return SDValue();
16709
1670716710 // Match VSELECTs into subs with unsigned saturation.
16708 if (!DCI.isBeforeLegalize() &&
16709 N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC &&
16711 if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC &&
1671016712 // psubus is available in SSE2 and AVX2 for i8 and i16 vectors.
1671116713 ((Subtarget->hasSSE2() && (VT == MVT::v16i8 || VT == MVT::v8i16)) ||
1671216714 (Subtarget->hasAVX2() && (VT == MVT::v32i8 || VT == MVT::v16i16)))) {
1676016762 }
1676116763
1676216764 // Try to match a min/max vector operation.
16763 if (!DCI.isBeforeLegalize() &&
16764 N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC)
16765 if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC)
1676516766 if (unsigned Op = matchIntegerMINMAX(Cond, VT, LHS, RHS, DAG, Subtarget))
1676616767 return DAG.getNode(Op, DL, N->getValueType(0), LHS, RHS);
1676716768
1676816769 // Simplify vector selection if the selector will be produced by CMPP*/PCMP*.
16769 if (!DCI.isBeforeLegalize() && N->getOpcode() == ISD::VSELECT &&
16770 Cond.getOpcode() == ISD::SETCC) {
16770 if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC &&
16771 // Check if SETCC has already been promoted
16772 TLI.getSetCCResultType(*DAG.getContext(), VT) == Cond.getValueType()) {
1677116773
1677216774 assert(Cond.getValueType().isVector() &&
1677316775 "vector select expects a vector selector!");
1681416816 // matched by one of the SSE/AVX BLEND instructions. These instructions only
1681516817 // depend on the highest bit in each word. Try to use SimplifyDemandedBits
1681616818 // to simplify previous instructions.
16817 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1681816819 if (N->getOpcode() == ISD::VSELECT && DCI.isBeforeLegalizeOps() &&
1681916820 !DCI.isBeforeLegalize() && TLI.isOperationLegal(ISD::VSELECT, VT)) {
1682016821 unsigned BitWidth = Cond.getValueType().getScalarType().getSizeInBits();
103103 }
104104
105105 ; CHECK-LABEL: test11_unsigned
106 ; CHECK: vpcmpnleud %zmm
107 ; CHECK: vpblendmd %zmm
106 ; CHECK: vpmaxud
108107 ; CHECK: ret
109108 define <8 x i32> @test11_unsigned(<8 x i32> %x, <8 x i32> %y) nounwind {
110109 %mask = icmp ugt <8 x i32> %x, %y