llvm.org GIT mirror llvm / 251b4a0
Revert 132424 to fix PR10068. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@132479 91177308-0d34-0410-b5e6-96231b3b80d8 Rafael Espindola 8 years ago
9 changed file(s) with 22 addition(s) and 152 deletion(s). Raw diff Collapse all Expand all
17581758 if (NewMask == APInt::getSignBit(Op.getValueType().getSizeInBits()) &&
17591759 Op.getOperand(0).getValueType().isFloatingPoint() &&
17601760 !Op.getOperand(0).getValueType().isVector()) {
1761 bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType());
1762 bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
1763 if (OpVTLegal || i32Legal) {
1764 EVT Ty = OpVTLegal ? Op.getValueType() : MVT::i32;
1761 if (isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32)) {
1762 EVT Ty = (isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType())) ?
1763 Op.getValueType() : MVT::i32;
17651764 // Make a FGETSIGN + SHL to move the sign bit into the appropriate
17661765 // place. We expect the SHL to be eliminated by other optimizations.
17671766 SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Op.getOperand(0));
1768 if (!OpVTLegal)
1767 if (Ty != Op.getValueType())
17691768 Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), Sign);
17701769 unsigned ShVal = Op.getValueType().getSizeInBits()-1;
17711770 SDValue ShAmt = TLO.DAG.getConstant(ShVal, Op.getValueType());
94019401 case X86ISD::UCOMI: return "X86ISD::UCOMI";
94029402 case X86ISD::SETCC: return "X86ISD::SETCC";
94039403 case X86ISD::SETCC_CARRY: return "X86ISD::SETCC_CARRY";
9404 case X86ISD::FSETCCsd: return "X86ISD::FSETCCsd";
9405 case X86ISD::FSETCCss: return "X86ISD::FSETCCss";
94069404 case X86ISD::CMOV: return "X86ISD::CMOV";
94079405 case X86ISD::BRCOND: return "X86ISD::BRCOND";
94089406 case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG";
1168011678 }
1168111679
1168211680
11683 // CMPEQCombine - Recognize the distinctive (AND (setcc ...) (setcc ..))
11684 // where both setccs reference the same FP CMP, and rewrite for CMPEQSS
11685 // and friends. Likewise for OR -> CMPNEQSS.
11686 static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG,
11687 TargetLowering::DAGCombinerInfo &DCI,
11688 const X86Subtarget *Subtarget) {
11689 unsigned opcode;
11690
11691 // SSE1 supports CMP{eq|ne}SS, and SSE2 added CMP{eq|ne}SD, but
11692 // we're requiring SSE2 for both.
11693 if (Subtarget->hasSSE2() && isAndOrOfSetCCs(SDValue(N, 0U), opcode)) {
11694 SDValue N0 = N->getOperand(0);
11695 SDValue N1 = N->getOperand(1);
11696 SDValue CMP = N0->getOperand(1);
11697 SDValue CMP0 = CMP->getOperand(0);
11698 SDValue CMP1 = CMP->getOperand(1);
11699 EVT VT = CMP0.getValueType();
11700 DebugLoc DL = N->getDebugLoc();
11701
11702 if (VT == MVT::f32 || VT == MVT::f64) {
11703 bool ExpectingFlags = false;
11704 // Check for any users that want flags:
11705 for (SDNode::use_iterator UI = N->use_begin(),
11706 UE = N->use_end();
11707 !ExpectingFlags && UI != UE; ++UI)
11708 switch (UI->getOpcode()) {
11709 default:
11710 case ISD::BR_CC:
11711 case ISD::BRCOND:
11712 case ISD::SELECT:
11713 ExpectingFlags = true;
11714 break;
11715 case ISD::CopyToReg:
11716 case ISD::SIGN_EXTEND:
11717 case ISD::ZERO_EXTEND:
11718 case ISD::ANY_EXTEND:
11719 break;
11720 }
11721
11722 if (!ExpectingFlags) {
11723 enum X86::CondCode cc0 = (enum X86::CondCode)N0.getConstantOperandVal(0);
11724 enum X86::CondCode cc1 = (enum X86::CondCode)N1.getConstantOperandVal(0);
11725
11726 if (cc1 == X86::COND_E || cc1 == X86::COND_NE) {
11727 X86::CondCode tmp = cc0;
11728 cc0 = cc1;
11729 cc1 = tmp;
11730 }
11731
11732 if ((cc0 == X86::COND_E && cc1 == X86::COND_NP) ||
11733 (cc0 == X86::COND_NE && cc1 == X86::COND_P)) {
11734 bool is64BitFP = (CMP0.getValueType() == MVT::f64);
11735 X86ISD::NodeType NTOperator = is64BitFP ?
11736 X86ISD::FSETCCsd : X86ISD::FSETCCss;
11737 // FIXME: need symbolic constants for these magic numbers.
11738 // See X86ATTInstPrinter.cpp:printSSECC().
11739 unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4;
11740 SDValue OnesOrZeroesF = DAG.getNode(NTOperator, DL, MVT::f32, CMP0, CMP1,
11741 DAG.getConstant(x86cc, MVT::i8));
11742 SDValue OnesOrZeroesI = DAG.getNode(ISD::BITCAST, DL, MVT::i32,
11743 OnesOrZeroesF);
11744 SDValue ANDed = DAG.getNode(ISD::AND, DL, MVT::i32, OnesOrZeroesI,
11745 DAG.getConstant(1, MVT::i32));
11746 SDValue OneBitOfTruth = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ANDed);
11747 return OneBitOfTruth;
11748 }
11749 }
11750 }
11751 }
11752 return SDValue();
11753 }
11754
1175511681 static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
1175611682 TargetLowering::DAGCombinerInfo &DCI,
1175711683 const X86Subtarget *Subtarget) {
1175811684 if (DCI.isBeforeLegalizeOps())
1175911685 return SDValue();
1176011686
11761 SDValue R = CMPEQCombine(N, DAG, DCI, Subtarget);
11762 if (R.getNode())
11763 return R;
11764
1176511687 // Want to form PANDN nodes, in the hopes of then easily combining them with
1176611688 // OR and AND nodes to form PBLEND/PSIGN.
1176711689 EVT VT = N->getValueType(0);
1179011712 const X86Subtarget *Subtarget) {
1179111713 if (DCI.isBeforeLegalizeOps())
1179211714 return SDValue();
11793
11794 SDValue R = CMPEQCombine(N, DAG, DCI, Subtarget);
11795 if (R.getNode())
11796 return R;
1179711715
1179811716 EVT VT = N->getValueType(0);
1179911717 if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64 && VT != MVT::v2i64)
9292 // Same as SETCC except it's materialized with a sbb and the value is all
9393 // one's or all zero's.
9494 SETCC_CARRY, // R = carry_bit ? ~0 : 0
95
96 /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
97 /// Operands are two FP values to compare; result is a mask of
98 /// 0s or 1s. Generally DTRT for C/C++ with NaNs.
99 FSETCCss, FSETCCsd,
10095
10196 /// X86 MOVMSK{pd|ps}, extracts sign bits of two or four FP values,
10297 /// result in an integer GPR. Needs masking for scalar result.
4040 def X86fgetsign: SDNode<"X86ISD::FGETSIGNx86",SDTFPToIntOp>;
4141 def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>;
4242 def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>;
43 def X86cmpss : SDNode<"X86ISD::FSETCCss", SDTX86Cmpss>;
44 def X86cmpsd : SDNode<"X86ISD::FSETCCsd", SDTX86Cmpsd>;
4543 def X86pshufb : SDNode<"X86ISD::PSHUFB",
4644 SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
4745 SDTCisSameAs<0,2>]>>;
2121 SDTCisInt<0>, SDTCisInt<3>]>;
2222
2323 def SDTX86CmpTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisSameAs<1, 2>]>;
24
25 def SDTX86Cmpsd : SDTypeProfile<1, 3, [SDTCisVT<0, f64>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
26 def SDTX86Cmpss : SDTypeProfile<1, 3, [SDTCisVT<0, f32>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
2724
2825 def SDTX86Cmov : SDTypeProfile<1, 4,
2926 [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
10551055 XD, VEX_4V;
10561056 }
10571057
1058 let Constraints = "$src1 = $dst" in {
1059 def CMPSSrr : SIi8<0xC2, MRMSrcReg,
1060 (outs FR32:$dst), (ins FR32:$src1, FR32:$src2, SSECC:$cc),
1061 "cmp${cc}ss\t{$src2, $dst|$dst, $src2}",
1062 [(set FR32:$dst, (X86cmpss (f32 FR32:$src1), FR32:$src2, imm:$cc))]>, XS;
1063 def CMPSSrm : SIi8<0xC2, MRMSrcMem,
1064 (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2, SSECC:$cc),
1065 "cmp${cc}ss\t{$src2, $dst|$dst, $src2}",
1066 [(set FR32:$dst, (X86cmpss (f32 FR32:$src1), (loadf32 addr:$src2), imm:$cc))]>, XS;
1067 def CMPSDrr : SIi8<0xC2, MRMSrcReg,
1068 (outs FR64:$dst), (ins FR64:$src1, FR64:$src2, SSECC:$cc),
1069 "cmp${cc}sd\t{$src2, $dst|$dst, $src2}",
1070 [(set FR64:$dst, (X86cmpsd (f64 FR64:$src1), FR64:$src2, imm:$cc))]>, XD;
1071 def CMPSDrm : SIi8<0xC2, MRMSrcMem,
1072 (outs FR64:$dst), (ins FR64:$src1, f64mem:$src2, SSECC:$cc),
1073 "cmp${cc}sd\t{$src2, $dst|$dst, $src2}",
1074 [(set FR64:$dst, (X86cmpsd (f64 FR64:$src1), (loadf64 addr:$src2), imm:$cc))]>, XD;
1075 }
10761058 let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in {
1077 def CMPSSrr_alt : SIi8<0xC2, MRMSrcReg,
1078 (outs FR32:$dst), (ins FR32:$src1, FR32:$src, i8imm:$src2),
1079 "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}", []>, XS;
1080 def CMPSSrm_alt : SIi8<0xC2, MRMSrcMem,
1081 (outs FR32:$dst), (ins FR32:$src1, f32mem:$src, i8imm:$src2),
1082 "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}", []>, XS;
1083 def CMPSDrr_alt : SIi8<0xC2, MRMSrcReg,
1084 (outs FR64:$dst), (ins FR64:$src1, FR64:$src, i8imm:$src2),
1085 "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}", []>, XD;
1086 def CMPSDrm_alt : SIi8<0xC2, MRMSrcMem,
1087 (outs FR64:$dst), (ins FR64:$src1, f64mem:$src, i8imm:$src2),
1088 "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}", []>, XD;
1059 defm CMPSS : sse12_cmp_scalar
1060 "cmp${cc}ss\t{$src, $dst|$dst, $src}",
1061 "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}">, XS;
1062 defm CMPSD : sse12_cmp_scalar
1063 "cmp${cc}sd\t{$src, $dst|$dst, $src}",
1064 "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}">, XD;
10891065 }
10901066
10911067 multiclass sse12_cmp_scalar_int
None ; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
0 ; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
1 ; RUN: not grep cmp %t
2 ; RUN: not grep xor %t
3 ; RUN: grep jne %t | count 1
4 ; RUN: grep jp %t | count 1
5 ; RUN: grep setnp %t | count 1
6 ; RUN: grep sete %t | count 1
7 ; RUN: grep and %t | count 1
8 ; RUN: grep cvt %t | count 4
19
210 define i32 @isint_return(double %d) nounwind {
3 ; CHECK-NOT: xor
4 ; CHECK: cvt
511 %i = fptosi double %d to i32
6 ; CHECK-NEXT: cvt
712 %e = sitofp i32 %i to double
8 ; CHECK: cmpeqsd
913 %c = fcmp oeq double %d, %e
10 ; CHECK-NEXT: movd
11 ; CHECK-NEXT: andl
1214 %z = zext i1 %c to i32
1315 ret i32 %z
1416 }
1618 declare void @foo()
1719
1820 define void @isint_branch(double %d) nounwind {
19 ; CHECK: cvt
2021 %i = fptosi double %d to i32
21 ; CHECK-NEXT: cvt
2222 %e = sitofp i32 %i to double
23 ; CHECK: ucomisd
2423 %c = fcmp oeq double %d, %e
25 ; CHECK-NEXT: jne
26 ; CHECK-NEXT: jp
2724 br i1 %c, label %true, label %false
2825 true:
2926 call void @foo()
99 }
1010
1111 ; test that the load is folded.
12 ; CHECK: cmpeqsd (%{{rdi|rdx}}), %xmm0
12 ; CHECK: ucomisd (%{{rdi|rdx}}), %xmm0
None ; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
0 ; RUN: llc < %s -march=x86 | grep set | count 2
1 ; RUN: llc < %s -march=x86 | grep and
12
23 define zeroext i8 @t(double %x) nounwind readnone {
34 entry:
56 %1 = sitofp i32 %0 to double ; [#uses=1]
67 %2 = fcmp oeq double %1, %x ; [#uses=1]
78 %retval12 = zext i1 %2 to i8 ; [#uses=1]
8 ; CHECK: cmpeqsd
99 ret i8 %retval12
1010 }
11
12 define zeroext i8 @u(double %x) nounwind readnone {
13 entry:
14 %0 = fptosi double %x to i32 ; [#uses=1]
15 %1 = sitofp i32 %0 to double ; [#uses=1]
16 %2 = fcmp une double %1, %x ; [#uses=1]
17 %retval12 = zext i1 %2 to i8 ; [#uses=1]
18 ; CHECK: cmpneqsd
19 ret i8 %retval12
20 }