llvm.org GIT mirror llvm / 218977b
Extend the r107852 optimization which turns some fp compare to code sequence using only i32 operations. It now optimize some f64 compares when fp compare is exceptionally slow (e.g. cortex-a8). It also catches comparison against 0.0. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@108258 91177308-0d34-0410-b5e6-96231b3b80d8 Evan Cheng 10 years ago
4 changed file(s) with 302 addition(s) and 80 deletion(s). Raw diff Collapse all Expand all
564564 case ARMISD::CMPZ: return "ARMISD::CMPZ";
565565 case ARMISD::CMPFP: return "ARMISD::CMPFP";
566566 case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0";
567 case ARMISD::BCC_i64: return "ARMISD::BCC_i64";
567568 case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
568569 case ARMISD::CMOV: return "ARMISD::CMOV";
569570 case ARMISD::CNEG: return "ARMISD::CNEG";
22152216 /// the given operands.
22162217 SDValue
22172218 ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2218 SDValue &ARMCC, SelectionDAG &DAG,
2219 SDValue &ARMcc, SelectionDAG &DAG,
22192220 DebugLoc dl) const {
22202221 if (ConstantSDNode *RHSC = dyn_cast(RHS.getNode())) {
22212222 unsigned C = RHSC->getZExtValue();
22672268 CompareType = ARMISD::CMPZ;
22682269 break;
22692270 }
2270 ARMCC = DAG.getConstant(CondCode, MVT::i32);
2271 ARMcc = DAG.getConstant(CondCode, MVT::i32);
22712272 return DAG.getNode(CompareType, dl, MVT::Flag, LHS, RHS);
2272 }
2273
2274 static bool canBitcastToInt(SDNode *Op) {
2275 return Op->hasOneUse() &&
2276 ISD::isNormalLoad(Op) &&
2277 Op->getValueType(0) == MVT::f32;
2278 }
2279
2280 static SDValue bitcastToInt(SDValue Op, SelectionDAG &DAG) {
2281 if (LoadSDNode *Ld = dyn_cast(Op))
2282 return DAG.getLoad(MVT::i32, Op.getDebugLoc(),
2283 Ld->getChain(), Ld->getBasePtr(),
2284 Ld->getSrcValue(), Ld->getSrcValueOffset(),
2285 Ld->isVolatile(), Ld->isNonTemporal(),
2286 Ld->getAlignment());
2287
2288 llvm_unreachable("Unknown VFP cmp argument!");
22892273 }
22902274
22912275 /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
22922276 SDValue
2293 ARMTargetLowering::getVFPCmp(SDValue &LHS, SDValue &RHS, ISD::CondCode CC,
2294 SDValue &ARMCC, SelectionDAG &DAG,
2277 ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
22952278 DebugLoc dl) const {
2296 if (UnsafeFPMath && FiniteOnlyFPMath() &&
2297 (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
2298 CC == ISD::SETNE || CC == ISD::SETUNE) &&
2299 canBitcastToInt(LHS.getNode()) && canBitcastToInt(RHS.getNode())) {
2300 // If unsafe fp math optimization is enabled and there are no othter uses of
2301 // the CMP operands, and the condition code is EQ oe NE, we can optimize it
2302 // to an integer comparison.
2303 if (CC == ISD::SETOEQ)
2304 CC = ISD::SETEQ;
2305 else if (CC == ISD::SETUNE)
2306 CC = ISD::SETNE;
2307 LHS = bitcastToInt(LHS, DAG);
2308 RHS = bitcastToInt(RHS, DAG);
2309 return getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl);
2310 }
2311
23122279 SDValue Cmp;
23132280 if (!isFloatingPointZero(RHS))
23142281 Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Flag, LHS, RHS);
23272294 DebugLoc dl = Op.getDebugLoc();
23282295
23292296 if (LHS.getValueType() == MVT::i32) {
2330 SDValue ARMCC;
2297 SDValue ARMcc;
23312298 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2332 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl);
2333 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMCC, CCR,Cmp);
2299 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
2300 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp);
23342301 }
23352302
23362303 ARMCC::CondCodes CondCode, CondCode2;
23372304 FPCCToARMCC(CC, CondCode, CondCode2);
23382305
2339 SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32);
2306 SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
2307 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
23402308 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2341 SDValue Cmp = getVFPCmp(LHS, RHS, CC, ARMCC, DAG, dl);
23422309 SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
2343 ARMCC, CCR, Cmp);
2310 ARMcc, CCR, Cmp);
23442311 if (CondCode2 != ARMCC::AL) {
2345 SDValue ARMCC2 = DAG.getConstant(CondCode2, MVT::i32);
2312 SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32);
23462313 // FIXME: Needs another CMP because flag can have but one use.
2347 SDValue Cmp2 = getVFPCmp(LHS, RHS, CC, ARMCC2, DAG, dl);
2314 SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
23482315 Result = DAG.getNode(ARMISD::CMOV, dl, VT,
2349 Result, TrueVal, ARMCC2, CCR, Cmp2);
2316 Result, TrueVal, ARMcc2, CCR, Cmp2);
23502317 }
23512318 return Result;
23522319 }
23532320
2321 /// canChangeToInt - Given the fp compare operand, return true if it is suitable
2322 /// to morph to an integer compare sequence.
2323 static bool canChangeToInt(SDValue Op, bool &SeenZero,
2324 const ARMSubtarget *Subtarget) {
2325 SDNode *N = Op.getNode();
2326 if (!N->hasOneUse())
2327 // Otherwise it requires moving the value from fp to integer registers.
2328 return false;
2329 if (!N->getNumValues())
2330 return false;
2331 EVT VT = Op.getValueType();
2332 if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
2333 // f32 case is generally profitable. f64 case only makes sense when vcmpe +
2334 // vmrs are very slow, e.g. cortex-a8.
2335 return false;
2336
2337 if (isFloatingPointZero(Op)) {
2338 SeenZero = true;
2339 return true;
2340 }
2341 return ISD::isNormalLoad(N);
2342 }
2343
2344 static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
2345 if (isFloatingPointZero(Op))
2346 return DAG.getConstant(0, MVT::i32);
2347
2348 if (LoadSDNode *Ld = dyn_cast(Op))
2349 return DAG.getLoad(MVT::i32, Op.getDebugLoc(),
2350 Ld->getChain(), Ld->getBasePtr(),
2351 Ld->getSrcValue(), Ld->getSrcValueOffset(),
2352 Ld->isVolatile(), Ld->isNonTemporal(),
2353 Ld->getAlignment());
2354
2355 llvm_unreachable("Unknown VFP cmp argument!");
2356 }
2357
2358 static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
2359 SDValue &RetVal1, SDValue &RetVal2) {
2360 if (isFloatingPointZero(Op)) {
2361 RetVal1 = DAG.getConstant(0, MVT::i32);
2362 RetVal2 = DAG.getConstant(0, MVT::i32);
2363 return;
2364 }
2365
2366 if (LoadSDNode *Ld = dyn_cast(Op)) {
2367 SDValue Ptr = Ld->getBasePtr();
2368 RetVal1 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
2369 Ld->getChain(), Ptr,
2370 Ld->getSrcValue(), Ld->getSrcValueOffset(),
2371 Ld->isVolatile(), Ld->isNonTemporal(),
2372 Ld->getAlignment());
2373
2374 EVT PtrType = Ptr.getValueType();
2375 unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
2376 SDValue NewPtr = DAG.getNode(ISD::ADD, Op.getDebugLoc(),
2377 PtrType, Ptr, DAG.getConstant(4, PtrType));
2378 RetVal2 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
2379 Ld->getChain(), NewPtr,
2380 Ld->getSrcValue(), Ld->getSrcValueOffset() + 4,
2381 Ld->isVolatile(), Ld->isNonTemporal(),
2382 NewAlign);
2383 return;
2384 }
2385
2386 llvm_unreachable("Unknown VFP cmp argument!");
2387 }
2388
2389 /// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
2390 /// f32 and even f64 comparisons to integer ones.
2391 SDValue
2392 ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
2393 SDValue Chain = Op.getOperand(0);
2394 ISD::CondCode CC = cast(Op.getOperand(1))->get();
2395 SDValue LHS = Op.getOperand(2);
2396 SDValue RHS = Op.getOperand(3);
2397 SDValue Dest = Op.getOperand(4);
2398 DebugLoc dl = Op.getDebugLoc();
2399
2400 bool SeenZero = false;
2401 if (canChangeToInt(LHS, SeenZero, Subtarget) &&
2402 canChangeToInt(RHS, SeenZero, Subtarget) &&
2403 // If one of the operand is zero, it's safe to ignore the NaN case.
2404 (FiniteOnlyFPMath() || SeenZero)) {
2405 // If unsafe fp math optimization is enabled and there are no othter uses of
2406 // the CMP operands, and the condition code is EQ oe NE, we can optimize it
2407 // to an integer comparison.
2408 if (CC == ISD::SETOEQ)
2409 CC = ISD::SETEQ;
2410 else if (CC == ISD::SETUNE)
2411 CC = ISD::SETNE;
2412
2413 SDValue ARMcc;
2414 if (LHS.getValueType() == MVT::f32) {
2415 LHS = bitcastf32Toi32(LHS, DAG);
2416 RHS = bitcastf32Toi32(RHS, DAG);
2417 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
2418 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2419 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
2420 Chain, Dest, ARMcc, CCR, Cmp);
2421 }
2422
2423 SDValue LHS1, LHS2;
2424 SDValue RHS1, RHS2;
2425 expandf64Toi32(LHS, DAG, LHS1, LHS2);
2426 expandf64Toi32(RHS, DAG, RHS1, RHS2);
2427 ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
2428 ARMcc = DAG.getConstant(CondCode, MVT::i32);
2429 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag);
2430 SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
2431 return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7);
2432 }
2433
2434 return SDValue();
2435 }
2436
23542437 SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
2355 SDValue Chain = Op.getOperand(0);
2438 SDValue Chain = Op.getOperand(0);
23562439 ISD::CondCode CC = cast(Op.getOperand(1))->get();
2357 SDValue LHS = Op.getOperand(2);
2358 SDValue RHS = Op.getOperand(3);
2359 SDValue Dest = Op.getOperand(4);
2440 SDValue LHS = Op.getOperand(2);
2441 SDValue RHS = Op.getOperand(3);
2442 SDValue Dest = Op.getOperand(4);
23602443 DebugLoc dl = Op.getDebugLoc();
23612444
23622445 if (LHS.getValueType() == MVT::i32) {
2363 SDValue ARMCC;
2446 SDValue ARMcc;
2447 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
23642448 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2365 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl);
23662449 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
2367 Chain, Dest, ARMCC, CCR,Cmp);
2450 Chain, Dest, ARMcc, CCR, Cmp);
23682451 }
23692452
23702453 assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
2454
2455 if (UnsafeFPMath &&
2456 (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
2457 CC == ISD::SETNE || CC == ISD::SETUNE)) {
2458 SDValue Result = OptimizeVFPBrcond(Op, DAG);
2459 if (Result.getNode())
2460 return Result;
2461 }
2462
23712463 ARMCC::CondCodes CondCode, CondCode2;
23722464 FPCCToARMCC(CC, CondCode, CondCode2);
23732465
2374 SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32);
2375 SDValue Cmp = getVFPCmp(LHS, RHS, CC, ARMCC, DAG, dl);
2466 SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
2467 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
23762468 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
23772469 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag);
2378 SDValue Ops[] = { Chain, Dest, ARMCC, CCR, Cmp };
2470 SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
23792471 SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
23802472 if (CondCode2 != ARMCC::AL) {
2381 ARMCC = DAG.getConstant(CondCode2, MVT::i32);
2382 SDValue Ops[] = { Res, Dest, ARMCC, CCR, Res.getValue(1) };
2473 ARMcc = DAG.getConstant(CondCode2, MVT::i32);
2474 SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
23832475 Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
23842476 }
23852477 return Res;
24682560 EVT VT = Op.getValueType();
24692561 EVT SrcVT = Tmp1.getValueType();
24702562 SDValue AbsVal = DAG.getNode(ISD::FABS, dl, VT, Tmp0);
2471 SDValue ARMCC = DAG.getConstant(ARMCC::LT, MVT::i32);
2563 SDValue ARMcc = DAG.getConstant(ARMCC::LT, MVT::i32);
24722564 SDValue FP0 = DAG.getConstantFP(0.0, SrcVT);
2473 SDValue Cmp = getVFPCmp(Tmp1, FP0,
2474 ISD::SETLT, ARMCC, DAG, dl);
2565 SDValue Cmp = getVFPCmp(Tmp1, FP0, DAG, dl);
24752566 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2476 return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMCC, CCR, Cmp);
2567 return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMcc, CCR, Cmp);
24772568 }
24782569
24792570 SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
26102701 SDValue ShOpLo = Op.getOperand(0);
26112702 SDValue ShOpHi = Op.getOperand(1);
26122703 SDValue ShAmt = Op.getOperand(2);
2613 SDValue ARMCC;
2704 SDValue ARMcc;
26142705 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
26152706
26162707 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
26262717
26272718 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
26282719 SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
2629 ARMCC, DAG, dl);
2720 ARMcc, DAG, dl);
26302721 SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
2631 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMCC,
2722 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc,
26322723 CCR, Cmp);
26332724
26342725 SDValue Ops[2] = { Lo, Hi };
26462737 SDValue ShOpLo = Op.getOperand(0);
26472738 SDValue ShOpHi = Op.getOperand(1);
26482739 SDValue ShAmt = Op.getOperand(2);
2649 SDValue ARMCC;
2740 SDValue ARMcc;
26502741
26512742 assert(Op.getOpcode() == ISD::SHL_PARTS);
26522743 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
26602751 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
26612752 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
26622753 SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
2663 ARMCC, DAG, dl);
2754 ARMcc, DAG, dl);
26642755 SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
2665 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMCC,
2756 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc,
26662757 CCR, Cmp);
26672758
26682759 SDValue Ops[2] = { Lo, Hi };
38243915 return BB;
38253916 }
38263917
3918 static
3919 MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
3920 for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
3921 E = MBB->succ_end(); I != E; ++I)
3922 if (*I != Succ)
3923 return *I;
3924 llvm_unreachable("Expecting a BB with two successors!");
3925 }
3926
38273927 MachineBasicBlock *
38283928 ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
38293929 MachineBasicBlock *BB) const {
39354035 TII->get(ARM::PHI), MI->getOperand(0).getReg())
39364036 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
39374037 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
4038
4039 MI->eraseFromParent(); // The pseudo instruction is gone now.
4040 return BB;
4041 }
4042
4043 case ARM::BCCi64:
4044 case ARM::BCCZi64: {
4045 // Compare both parts that make up the double comparison separately for
4046 // equality.
4047 bool RHSisZero = MI->getOpcode() == ARM::BCCZi64;
4048
4049 unsigned LHS1 = MI->getOperand(1).getReg();
4050 unsigned LHS2 = MI->getOperand(2).getReg();
4051 if (RHSisZero) {
4052 AddDefaultPred(BuildMI(BB, dl,
4053 TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
4054 .addReg(LHS1).addImm(0));
4055 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
4056 .addReg(LHS2).addImm(0)
4057 .addImm(ARMCC::EQ).addReg(ARM::CPSR);
4058 } else {
4059 unsigned RHS1 = MI->getOperand(3).getReg();
4060 unsigned RHS2 = MI->getOperand(4).getReg();
4061 AddDefaultPred(BuildMI(BB, dl,
4062 TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
4063 .addReg(LHS1).addReg(RHS1));
4064 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
4065 .addReg(LHS2).addReg(RHS2)
4066 .addImm(ARMCC::EQ).addReg(ARM::CPSR);
4067 }
4068
4069 MachineBasicBlock *destMBB = MI->getOperand(RHSisZero ? 3 : 5).getMBB();
4070 MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB);
4071 if (MI->getOperand(0).getImm() == ARMCC::NE)
4072 std::swap(destMBB, exitMBB);
4073
4074 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
4075 .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);
4076 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2B : ARM::B))
4077 .addMBB(exitMBB);
39384078
39394079 MI->eraseFromParent(); // The pseudo instruction is gone now.
39404080 return BB;
5151 FMSTAT, // ARM fmstat instruction.
5252 CMOV, // ARM conditional move instructions.
5353 CNEG, // ARM conditional negate instructions.
54
55 BCC_i64,
5456
5557 RBIT, // ARM bitreverse instruction
5658
362364 DebugLoc dl, SelectionDAG &DAG) const;
363365
364366 SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
365 SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl) const;
366 SDValue getVFPCmp(SDValue &LHS, SDValue &RHS, ISD::CondCode CC,
367 SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl) const;
367 SDValue &ARMcc, SelectionDAG &DAG, DebugLoc dl) const;
368 SDValue getVFPCmp(SDValue LHS, SDValue RHS,
369 SelectionDAG &DAG, DebugLoc dl) const;
370
371 SDValue OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const;
368372
369373 MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI,
370374 MachineBasicBlock *BB,
3636 def SDT_ARMBr2JT : SDTypeProfile<0, 4,
3737 [SDTCisPtrTy<0>, SDTCisVT<1, i32>,
3838 SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
39
40 def SDT_ARMBCC_i64 : SDTypeProfile<0, 6,
41 [SDTCisVT<0, i32>,
42 SDTCisVT<1, i32>, SDTCisVT<2, i32>,
43 SDTCisVT<3, i32>, SDTCisVT<4, i32>,
44 SDTCisVT<5, OtherVT>]>;
3945
4046 def SDT_ARMCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
4147
8793 def ARMbrjt : SDNode<"ARMISD::BR_JT", SDT_ARMBrJT,
8894 [SDNPHasChain]>;
8995 def ARMbr2jt : SDNode<"ARMISD::BR2_JT", SDT_ARMBr2JT,
96 [SDNPHasChain]>;
97
98 def ARMBcci64 : SDNode<"ARMISD::BCC_i64", SDT_ARMBCC_i64,
9099 [SDNPHasChain]>;
91100
92101 def ARMcmp : SDNode<"ARMISD::CMP", SDT_ARMCmp,
22772286
22782287 def : ARMPat<(ARMcmpZ GPR:$src, so_imm_neg:$imm),
22792288 (CMNzri GPR:$src, so_imm_neg:$imm)>;
2289
2290 // Pseudo i64 compares for some floating point compares.
2291 let usesCustomInserter = 1, isBranch = 1, isTerminator = 1,
2292 Defs = [CPSR] in {
2293 def BCCi64 : PseudoInst<(outs),
2294 (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, brtarget:$dst),
2295 IIC_Br,
2296 "${:comment} B\t$dst GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, imm:$cc",
2297 [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, bb:$dst)]>;
2298
2299 def BCCZi64 : PseudoInst<(outs),
2300 (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, brtarget:$dst),
2301 IIC_Br,
2302 "${:comment} B\t$dst GPR:$lhs1, GPR:$lhs2, 0, 0, imm:$cc",
2303 [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, 0, 0, bb:$dst)]>;
2304 } // usesCustomInserter
22802305
22812306
22822307 // Conditional moves
None ; RUN: llc < %s -march=arm -mattr=+vfp2 -enable-unsafe-fp-math -enable-finite-only-fp-math | FileCheck %s
0 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math -enable-finite-only-fp-math | FileCheck -check-prefix=FINITE %s
1 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math | FileCheck -check-prefix=NAN %s
12 ; rdar://7461510
23
34 define arm_apcscc i32 @t1(float* %a, float* %b) nounwind {
45 entry:
5 ; CHECK: t1:
6 ; CHECK-NOT: vldr
7 ; CHECK: ldr
8 ; CHECK: ldr
9 ; CHECK: cmp r0, r1
10 ; CHECK-NOT: vcmpe.f32
11 ; CHECK-NOT: vmrs
12 ; CHECK: beq
6 ; FINITE: t1:
7 ; FINITE-NOT: vldr
8 ; FINITE: ldr
9 ; FINITE: ldr
10 ; FINITE: cmp r0, r1
11 ; FINITE-NOT: vcmpe.f32
12 ; FINITE-NOT: vmrs
13 ; FINITE: beq
14
15 ; NAN: t1:
16 ; NAN: vldr.32 s0,
17 ; NAN: vldr.32 s1,
18 ; NAN: vcmpe.f32 s1, s0
19 ; NAN: vmrs apsr_nzcv, fpscr
20 ; NAN: beq
1321 %0 = load float* %a
1422 %1 = load float* %b
1523 %2 = fcmp une float %0, %1
2432 ret i32 %4
2533 }
2634
35 define arm_apcscc i32 @t2(double* %a, double* %b) nounwind {
36 entry:
37 ; FINITE: t2:
38 ; FINITE-NOT: vldr
39 ; FINITE: ldrd r0, [r0]
40 ; FINITE: cmp r0, #0
41 ; FINITE: cmpeq r1, #0
42 ; FINITE-NOT: vcmpe.f32
43 ; FINITE-NOT: vmrs
44 ; FINITE: bne
45 %0 = load double* %a
46 %1 = fcmp oeq double %0, 0.000000e+00
47 br i1 %1, label %bb1, label %bb2
48
49 bb1:
50 %2 = call i32 @bar()
51 ret i32 %2
52
53 bb2:
54 %3 = call i32 @foo()
55 ret i32 %3
56 }
57
58 define arm_apcscc i32 @t3(float* %a, float* %b) nounwind {
59 entry:
60 ; FINITE: t3:
61 ; FINITE-NOT: vldr
62 ; FINITE: ldr r0, [r0]
63 ; FINITE: cmp r0, #0
64 ; FINITE-NOT: vcmpe.f32
65 ; FINITE-NOT: vmrs
66 ; FINITE: bne
67 %0 = load float* %a
68 %1 = fcmp oeq float %0, 0.000000e+00
69 br i1 %1, label %bb1, label %bb2
70
71 bb1:
72 %2 = call i32 @bar()
73 ret i32 %2
74
75 bb2:
76 %3 = call i32 @foo()
77 ret i32 %3
78 }
79
2780 declare i32 @bar()
2881 declare i32 @foo()