llvm.org GIT mirror llvm / c5e9935
[X86] Remove X86ISD::INC/DEC. Just select them from X86ISD::ADD/SUB at isel time INC/DEC are pretty much the same as ADD/SUB except that they don't update the C flag. This patch removes the special nodes and just pattern matches from ADD/SUB during isel if the C flag isn't being used. I had to avoid selecting DEC is the result isn't used. This will become a SUB immediate which will turned into a CMP later by optimizeCompareInstr. This lead to the one test change where we use a CMP instead of a DEC for an overflow intrinsic since we only checked the flag. This also exposed a hole in our RMW flag matching use of hasNoCarryFlagUses. Our root node for the match is a store and there's no guarantee that all the flag users have been selected yet. So hasNoCarryFlagUses needs to check copyToReg and machine opcodes, but it also needs to check for the pre-match SETCC, SETCC_CARRY, BRCOND, and CMOV opcodes. Differential Revision: https://reviews.llvm.org/D55975 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@350245 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 9 months ago
8 changed file(s) with 184 addition(s) and 216 deletion(s). Raw diff Collapse all Expand all
28992899 isCommutativeIntrinsic(II))
29002900 std::swap(LHS, RHS);
29012901
2902 bool UseIncDec = false;
2903 if (isa(RHS) && cast(RHS)->isOne())
2904 UseIncDec = true;
2905
29062902 unsigned BaseOpc, CondOpc;
29072903 switch (II->getIntrinsicID()) {
29082904 default: llvm_unreachable("Unexpected intrinsic!");
29092905 case Intrinsic::sadd_with_overflow:
2910 BaseOpc = UseIncDec ? unsigned(X86ISD::INC) : unsigned(ISD::ADD);
2911 CondOpc = X86::SETOr;
2912 break;
2906 BaseOpc = ISD::ADD; CondOpc = X86::SETOr; break;
29132907 case Intrinsic::uadd_with_overflow:
29142908 BaseOpc = ISD::ADD; CondOpc = X86::SETBr; break;
29152909 case Intrinsic::ssub_with_overflow:
2916 BaseOpc = UseIncDec ? unsigned(X86ISD::DEC) : unsigned(ISD::SUB);
2917 CondOpc = X86::SETOr;
2918 break;
2910 BaseOpc = ISD::SUB; CondOpc = X86::SETOr; break;
29192911 case Intrinsic::usub_with_overflow:
29202912 BaseOpc = ISD::SUB; CondOpc = X86::SETBr; break;
29212913 case Intrinsic::smul_with_overflow:
29372929 { X86::DEC8r, X86::DEC16r, X86::DEC32r, X86::DEC64r }
29382930 };
29392931
2940 if (BaseOpc == X86ISD::INC || BaseOpc == X86ISD::DEC) {
2932 if (CI->isOne() && (BaseOpc == ISD::ADD || BaseOpc == ISD::SUB) &&
2933 CondOpc == X86::SETOr) {
2934 // We can use INC/DEC.
29412935 ResultReg = createResultReg(TLI.getRegClassFor(VT));
2942 bool IsDec = BaseOpc == X86ISD::DEC;
2936 bool IsDec = BaseOpc == ISD::SUB;
29432937 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
29442938 TII.get(Opc[IsDec][VT.SimpleTy-MVT::i8]), ResultReg)
29452939 .addReg(LHSReg, getKillRegState(LHSIsKill));
23262326 return true;
23272327 }
23282328
2329 static bool mayUseCarryFlag(X86::CondCode CC) {
2330 switch (CC) {
2331 // Comparisons which don't examine the CF flag.
2332 case X86::COND_O: case X86::COND_NO:
2333 case X86::COND_E: case X86::COND_NE:
2334 case X86::COND_S: case X86::COND_NS:
2335 case X86::COND_P: case X86::COND_NP:
2336 case X86::COND_L: case X86::COND_GE:
2337 case X86::COND_G: case X86::COND_LE:
2338 return false;
2339 // Anything else: assume conservatively.
2340 default:
2341 return true;
2342 }
2343 }
2344
23292345 /// Test whether the given node which sets flags has any uses which require the
23302346 /// CF flag to be accurate.
23312347 bool X86DAGToDAGISel::hasNoCarryFlagUses(SDValue Flags) const {
23352351 // Only check things that use the flags.
23362352 if (UI.getUse().getResNo() != Flags.getResNo())
23372353 continue;
2338 // Only examine CopyToReg uses that copy to EFLAGS.
2339 if (UI->getOpcode() != ISD::CopyToReg ||
2340 cast(UI->getOperand(1))->getReg() != X86::EFLAGS)
2354
2355 unsigned UIOpc = UI->getOpcode();
2356
2357 if (UIOpc == ISD::CopyToReg) {
2358 // Only examine CopyToReg uses that copy to EFLAGS.
2359 if (cast(UI->getOperand(1))->getReg() != X86::EFLAGS)
2360 return false;
2361 // Examine each user of the CopyToReg use.
2362 for (SDNode::use_iterator FlagUI = UI->use_begin(), FlagUE = UI->use_end();
2363 FlagUI != FlagUE; ++FlagUI) {
2364 // Only examine the Flag result.
2365 if (FlagUI.getUse().getResNo() != 1)
2366 continue;
2367 // Anything unusual: assume conservatively.
2368 if (!FlagUI->isMachineOpcode())
2369 return false;
2370 // Examine the condition code of the user.
2371 X86::CondCode CC = getCondFromOpc(FlagUI->getMachineOpcode());
2372
2373 if (mayUseCarryFlag(CC))
2374 return false;
2375 }
2376
2377 // This CopyToReg is ok. Move on to the next user.
2378 continue;
2379 }
2380
2381 // This might be an unselected node. So look for the pre-isel opcodes that
2382 // use flags.
2383 unsigned CCOpNo;
2384 switch (UIOpc) {
2385 default:
2386 // Something unusual. Be conservative.
23412387 return false;
2342 // Examine each user of the CopyToReg use.
2343 for (SDNode::use_iterator FlagUI = UI->use_begin(), FlagUE = UI->use_end();
2344 FlagUI != FlagUE; ++FlagUI) {
2345 // Only examine the Flag result.
2346 if (FlagUI.getUse().getResNo() != 1)
2347 continue;
2348 // Anything unusual: assume conservatively.
2349 if (!FlagUI->isMachineOpcode())
2350 return false;
2351 // Examine the condition code of the user.
2352 X86::CondCode CC = getCondFromOpc(FlagUI->getMachineOpcode());
2353
2354 switch (CC) {
2355 // Comparisons which don't examine the CF flag.
2356 case X86::COND_O: case X86::COND_NO:
2357 case X86::COND_E: case X86::COND_NE:
2358 case X86::COND_S: case X86::COND_NS:
2359 case X86::COND_P: case X86::COND_NP:
2360 case X86::COND_L: case X86::COND_GE:
2361 case X86::COND_G: case X86::COND_LE:
2362 continue;
2363 // Anything else: assume conservatively.
2364 default:
2365 return false;
2366 }
2367 }
2388 case X86ISD::SETCC: CCOpNo = 0; break;
2389 case X86ISD::SETCC_CARRY: CCOpNo = 0; break;
2390 case X86ISD::CMOV: CCOpNo = 2; break;
2391 case X86ISD::BRCOND: CCOpNo = 2; break;
2392 }
2393
2394 X86::CondCode CC = (X86::CondCode)UI->getConstantOperandVal(CCOpNo);
2395 if (mayUseCarryFlag(CC))
2396 return false;
23682397 }
23692398 return true;
23702399 }
25202549 switch (Opc) {
25212550 default:
25222551 return false;
2523 case X86ISD::INC:
2524 case X86ISD::DEC:
25252552 case X86ISD::SUB:
25262553 case X86ISD::SBB:
25272554 break;
25722599
25732600 MachineSDNode *Result;
25742601 switch (Opc) {
2575 case X86ISD::INC:
2576 case X86ISD::DEC: {
2577 unsigned NewOpc =
2578 Opc == X86ISD::INC
2579 ? SelectOpcode(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m)
2580 : SelectOpcode(X86::DEC64m, X86::DEC32m, X86::DEC16m, X86::DEC8m);
2581 const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
2582 Result =
2583 CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other, Ops);
2584 break;
2585 }
25862602 case X86ISD::ADD:
2603 case X86ISD::SUB:
2604 // Try to match inc/dec.
2605 if (!Subtarget->slowIncDec() ||
2606 CurDAG->getMachineFunction().getFunction().optForSize()) {
2607 bool IsOne = isOneConstant(StoredVal.getOperand(1));
2608 bool IsNegOne = isAllOnesConstant(StoredVal.getOperand(1));
2609 // ADD/SUB with 1/-1 and carry flag isn't used can use inc/dec.
2610 if ((IsOne || IsNegOne) && hasNoCarryFlagUses(StoredVal.getValue(1))) {
2611 unsigned NewOpc =
2612 ((Opc == X86ISD::ADD) == IsOne)
2613 ? SelectOpcode(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m)
2614 : SelectOpcode(X86::DEC64m, X86::DEC32m, X86::DEC16m, X86::DEC8m);
2615 const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
2616 Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32,
2617 MVT::Other, Ops);
2618 break;
2619 }
2620 }
2621 LLVM_FALLTHROUGH;
25872622 case X86ISD::ADC:
2588 case X86ISD::SUB:
25892623 case X86ISD::SBB:
25902624 case X86ISD::AND:
25912625 case X86ISD::OR:
1864318643 // which may be the result of a CAST. We use the variable 'Op', which is the
1864418644 // non-casted variable when we check for possible users.
1864518645 switch (ArithOp.getOpcode()) {
18646 case ISD::ADD:
18647 // We only want to rewrite this as a target-specific node with attached
18648 // flags if there is a reasonable chance of either using that to do custom
18649 // instructions selection that can fold some of the memory operands, or if
18650 // only the flags are used. If there are other uses, leave the node alone
18651 // and emit a test instruction.
18652 for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
18653 UE = Op.getNode()->use_end(); UI != UE; ++UI)
18654 if (UI->getOpcode() != ISD::CopyToReg &&
18655 UI->getOpcode() != ISD::SETCC &&
18656 UI->getOpcode() != ISD::STORE)
18657 goto default_case;
18658
18659 if (auto *C = dyn_cast(ArithOp.getOperand(1))) {
18660 // An add of one will be selected as an INC.
18661 if (C->isOne() &&
18662 (!Subtarget.slowIncDec() ||
18663 DAG.getMachineFunction().getFunction().optForSize())) {
18664 Opcode = X86ISD::INC;
18665 NumOperands = 1;
18666 break;
18667 }
18668
18669 // An add of negative one (subtract of one) will be selected as a DEC.
18670 if (C->isAllOnesValue() &&
18671 (!Subtarget.slowIncDec() ||
18672 DAG.getMachineFunction().getFunction().optForSize())) {
18673 Opcode = X86ISD::DEC;
18674 NumOperands = 1;
18675 break;
18676 }
18677 }
18678
18679 // Otherwise use a regular EFLAGS-setting add.
18680 Opcode = X86ISD::ADD;
18681 NumOperands = 2;
18682 break;
18683
1868418646 case ISD::AND:
1868518647 // If the primary 'and' result isn't used, don't bother using X86ISD::AND,
1868618648 // because a TEST instruction will be better.
1868818650 break;
1868918651
1869018652 LLVM_FALLTHROUGH;
18653 case ISD::ADD:
1869118654 case ISD::SUB:
1869218655 case ISD::OR:
1869318656 case ISD::XOR:
18694 // Similar to ISD::ADD above, check if the uses will preclude useful
18695 // lowering of the target-specific node.
18657 // Transform to an x86-specific ALU node with flags if there is a chance of
18658 // using an RMW op or only the flags are used. Otherwise, leave
18659 // the node alone and emit a 'test' instruction.
1869618660 for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
1869718661 UE = Op.getNode()->use_end(); UI != UE; ++UI)
1869818662 if (UI->getOpcode() != ISD::CopyToReg &&
1870318667 // Otherwise use a regular EFLAGS-setting instruction.
1870418668 switch (ArithOp.getOpcode()) {
1870518669 default: llvm_unreachable("unexpected operator!");
18670 case ISD::ADD: Opcode = X86ISD::ADD; break;
1870618671 case ISD::SUB: Opcode = X86ISD::SUB; break;
1870718672 case ISD::XOR: Opcode = X86ISD::XOR; break;
1870818673 case ISD::AND: Opcode = X86ISD::AND; break;
1871318678 break;
1871418679 case X86ISD::ADD:
1871518680 case X86ISD::SUB:
18716 case X86ISD::INC:
18717 case X86ISD::DEC:
1871818681 case X86ISD::OR:
1871918682 case X86ISD::XOR:
1872018683 case X86ISD::AND:
1960219565 switch (Op.getOpcode()) {
1960319566 default: llvm_unreachable("Unknown ovf instruction!");
1960419567 case ISD::SADDO:
19605 // A subtract of one will be selected as a INC. Note that INC doesn't
19606 // set CF, so we can't do this for UADDO.
19607 if (isOneConstant(RHS)) {
19608 BaseOp = X86ISD::INC;
19609 Cond = X86::COND_O;
19610 break;
19611 }
1961219568 BaseOp = X86ISD::ADD;
1961319569 Cond = X86::COND_O;
1961419570 break;
1961719573 Cond = X86::COND_B;
1961819574 break;
1961919575 case ISD::SSUBO:
19620 // A subtract of one will be selected as a DEC. Note that DEC doesn't
19621 // set CF, so we can't do this for USUBO.
19622 if (isOneConstant(RHS)) {
19623 BaseOp = X86ISD::DEC;
19624 Cond = X86::COND_O;
19625 break;
19626 }
1962719576 BaseOp = X86ISD::SUB;
1962819577 Cond = X86::COND_O;
1962919578 break;
1967419623 if (Op.getResNo() == 1 &&
1967519624 (Opc == X86ISD::ADD || Opc == X86ISD::SUB || Opc == X86ISD::ADC ||
1967619625 Opc == X86ISD::SBB || Opc == X86ISD::SMUL || Opc == X86ISD::UMUL ||
19677 Opc == X86ISD::INC || Opc == X86ISD::DEC || Opc == X86ISD::OR ||
19678 Opc == X86ISD::XOR || Opc == X86ISD::AND))
19626 Opc == X86ISD::OR || Opc == X86ISD::XOR || Opc == X86ISD::AND))
1967919627 return true;
1968019628
1968119629 return false;
2551025458 }
2551125459
2551225460 static SDValue lowerAtomicArithWithLOCK(SDValue N, SelectionDAG &DAG,
25513 const X86Subtarget &Subtarget,
25514 bool AllowIncDec = true) {
25461 const X86Subtarget &Subtarget) {
2551525462 unsigned NewOpc = 0;
2551625463 switch (N->getOpcode()) {
2551725464 case ISD::ATOMIC_LOAD_ADD:
2553425481 }
2553525482
2553625483 MachineMemOperand *MMO = cast(N)->getMemOperand();
25537
25538 if (auto *C = dyn_cast(N->getOperand(2))) {
25539 // Convert to inc/dec if they aren't slow or we are optimizing for size.
25540 if (AllowIncDec && (!Subtarget.slowIncDec() ||
25541 DAG.getMachineFunction().getFunction().optForSize())) {
25542 if ((NewOpc == X86ISD::LADD && C->isOne()) ||
25543 (NewOpc == X86ISD::LSUB && C->isAllOnesValue()))
25544 return DAG.getMemIntrinsicNode(X86ISD::LINC, SDLoc(N),
25545 DAG.getVTList(MVT::i32, MVT::Other),
25546 {N->getOperand(0), N->getOperand(1)},
25547 /*MemVT=*/N->getSimpleValueType(0), MMO);
25548 if ((NewOpc == X86ISD::LSUB && C->isOne()) ||
25549 (NewOpc == X86ISD::LADD && C->isAllOnesValue()))
25550 return DAG.getMemIntrinsicNode(X86ISD::LDEC, SDLoc(N),
25551 DAG.getVTList(MVT::i32, MVT::Other),
25552 {N->getOperand(0), N->getOperand(1)},
25553 /*MemVT=*/N->getSimpleValueType(0), MMO);
25554 }
25555 }
2555625484
2555725485 return DAG.getMemIntrinsicNode(
2555825486 NewOpc, SDLoc(N), DAG.getVTList(MVT::i32, MVT::Other),
2703326961 case X86ISD::LOR: return "X86ISD::LOR";
2703426962 case X86ISD::LXOR: return "X86ISD::LXOR";
2703526963 case X86ISD::LAND: return "X86ISD::LAND";
27036 case X86ISD::LINC: return "X86ISD::LINC";
27037 case X86ISD::LDEC: return "X86ISD::LDEC";
2703826964 case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL";
2703926965 case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
2704026966 case X86ISD::VTRUNC: return "X86ISD::VTRUNC";
2707226998 case X86ISD::SBB: return "X86ISD::SBB";
2707326999 case X86ISD::SMUL: return "X86ISD::SMUL";
2707427000 case X86ISD::UMUL: return "X86ISD::UMUL";
27075 case X86ISD::INC: return "X86ISD::INC";
27076 case X86ISD::DEC: return "X86ISD::DEC";
2707727001 case X86ISD::OR: return "X86ISD::OR";
2707827002 case X86ISD::XOR: return "X86ISD::XOR";
2707927003 case X86ISD::AND: return "X86ISD::AND";
3429634220 /*Chain*/ CmpLHS.getOperand(0), /*LHS*/ CmpLHS.getOperand(1),
3429734221 /*RHS*/ DAG.getConstant(-Addend, SDLoc(CmpRHS), CmpRHS.getValueType()),
3429834222 AN->getMemOperand());
34299 // If the comparision uses the CF flag we can't use INC/DEC instructions.
34300 bool NeedCF = false;
34301 switch (CC) {
34302 default: break;
34303 case X86::COND_A: case X86::COND_AE:
34304 case X86::COND_B: case X86::COND_BE:
34305 NeedCF = true;
34306 break;
34307 }
34308 auto LockOp = lowerAtomicArithWithLOCK(AtomicSub, DAG, Subtarget, !NeedCF);
34223 auto LockOp = lowerAtomicArithWithLOCK(AtomicSub, DAG, Subtarget);
3430934224 DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(0),
3431034225 DAG.getUNDEF(CmpLHS.getValueType()));
3431134226 DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(1), LockOp.getValue(1));
336336
337337 // Arithmetic operations with FLAGS results.
338338 ADD, SUB, ADC, SBB, SMUL, UMUL,
339 INC, DEC, OR, XOR, AND,
339 OR, XOR, AND,
340340
341341 // Bit field extract.
342342 BEXTR,
567567
568568 /// LOCK-prefixed arithmetic read-modify-write instructions.
569569 /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
570 LADD, LSUB, LOR, LXOR, LAND, LINC, LDEC,
570 LADD, LSUB, LOR, LXOR, LAND,
571571
572572 // Load, scalar_to_vector, and zero extend.
573573 VZEXT_LOAD,
421421 } // SchedRW
422422 } // CodeSize
423423
424 def X86add_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs),
425 (X86add_flag node:$lhs, node:$rhs), [{
426 return hasNoCarryFlagUses(SDValue(N, 1));
427 }]>;
428
429 def X86sub_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs),
430 (X86sub_flag node:$lhs, node:$rhs), [{
431 // Only use DEC if the result is used.
432 return !SDValue(N, 0).use_empty() && hasNoCarryFlagUses(SDValue(N, 1));
433 }]>;
434
424435 // TODO: inc/dec is slow for P4, but fast for Pentium-M.
425436 let Defs = [EFLAGS] in {
426437 let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
427438 let CodeSize = 2 in
428439 def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
429440 "inc{b}\t$dst",
430 [(set GR8:$dst, EFLAGS, (X86inc_flag GR8:$src1))]>;
441 [(set GR8:$dst, EFLAGS, (X86add_flag_nocf GR8:$src1, 1))]>;
431442 let isConvertibleToThreeAddress = 1, CodeSize = 2 in { // Can xform into LEA.
432443 def INC16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
433444 "inc{w}\t$dst",
434 [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))]>, OpSize16;
445 [(set GR16:$dst, EFLAGS, (X86add_flag_nocf GR16:$src1, 1))]>,
446 OpSize16;
435447 def INC32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
436448 "inc{l}\t$dst",
437 [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))]>, OpSize32;
449 [(set GR32:$dst, EFLAGS, (X86add_flag_nocf GR32:$src1, 1))]>,
450 OpSize32;
438451 def INC64r : RI<0xFF, MRM0r, (outs GR64:$dst), (ins GR64:$src1), "inc{q}\t$dst",
439 [(set GR64:$dst, EFLAGS, (X86inc_flag GR64:$src1))]>;
452 [(set GR64:$dst, EFLAGS, (X86add_flag_nocf GR64:$src1, 1))]>;
440453 } // isConvertibleToThreeAddress = 1, CodeSize = 2
441454
442455 // Short forms only valid in 32-bit mode. Selected during MCInst lowering.
473486 let CodeSize = 2 in
474487 def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
475488 "dec{b}\t$dst",
476 [(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src1))]>;
489 [(set GR8:$dst, EFLAGS, (X86sub_flag_nocf GR8:$src1, 1))]>;
477490 let isConvertibleToThreeAddress = 1, CodeSize = 2 in { // Can xform into LEA.
478491 def DEC16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
479492 "dec{w}\t$dst",
480 [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))]>, OpSize16;
493 [(set GR16:$dst, EFLAGS, (X86sub_flag_nocf GR16:$src1, 1))]>,
494 OpSize16;
481495 def DEC32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
482496 "dec{l}\t$dst",
483 [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))]>, OpSize32;
497 [(set GR32:$dst, EFLAGS, (X86sub_flag_nocf GR32:$src1, 1))]>,
498 OpSize32;
484499 def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "dec{q}\t$dst",
485 [(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src1))]>;
500 [(set GR64:$dst, EFLAGS, (X86sub_flag_nocf GR64:$src1, 1))]>;
486501 } // isConvertibleToThreeAddress = 1, CodeSize = 2
487502
488503 // Short forms only valid in 32-bit mode. Selected during MCInst lowering.
775775 defm LOCK_AND : LOCK_ArithBinOp<0x20, 0x80, 0x83, MRM4m, X86lock_and, "and">;
776776 defm LOCK_XOR : LOCK_ArithBinOp<0x30, 0x80, 0x83, MRM6m, X86lock_xor, "xor">;
777777
778 multiclass LOCK_ArithUnOp Opc8, bits<8> Opc, Format Form,
779 string frag, string mnemonic> {
780 let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,
781 SchedRW = [WriteALURMW] in {
782 def NAME#8m : I
783 !strconcat(mnemonic, "{b}\t$dst"),
784 [(set EFLAGS, (!cast(frag # "_8") addr:$dst))]>,
785 LOCK;
786 def NAME#16m : I
787 !strconcat(mnemonic, "{w}\t$dst"),
788 [(set EFLAGS, (!cast(frag # "_16") addr:$dst))]>,
789 OpSize16, LOCK;
790 def NAME#32m : I
791 !strconcat(mnemonic, "{l}\t$dst"),
792 [(set EFLAGS, (!cast(frag # "_32") addr:$dst))]>,
793 OpSize32, LOCK;
794 def NAME#64m : RI
795 !strconcat(mnemonic, "{q}\t$dst"),
796 [(set EFLAGS, (!cast(frag # "_64") addr:$dst))]>,
797 LOCK;
798 }
799 }
800
801 multiclass unary_atomic_intrin {
802 def _8 : PatFrag<(ops node:$ptr),
803 (atomic_op node:$ptr), [{
804 return cast(N)->getMemoryVT() == MVT::i8;
805 }]>;
806 def _16 : PatFrag<(ops node:$ptr),
807 (atomic_op node:$ptr), [{
808 return cast(N)->getMemoryVT() == MVT::i16;
809 }]>;
810 def _32 : PatFrag<(ops node:$ptr),
811 (atomic_op node:$ptr), [{
812 return cast(N)->getMemoryVT() == MVT::i32;
813 }]>;
814 def _64 : PatFrag<(ops node:$ptr),
815 (atomic_op node:$ptr), [{
816 return cast(N)->getMemoryVT() == MVT::i64;
817 }]>;
818 }
819
820 defm X86lock_inc : unary_atomic_intrin;
821 defm X86lock_dec : unary_atomic_intrin;
822
823 defm LOCK_INC : LOCK_ArithUnOp<0xFE, 0xFF, MRM0m, "X86lock_inc", "inc">;
824 defm LOCK_DEC : LOCK_ArithUnOp<0xFE, 0xFF, MRM1m, "X86lock_dec", "dec">;
778 def X86lock_add_nocf : PatFrag<(ops node:$lhs, node:$rhs),
779 (X86lock_add node:$lhs, node:$rhs), [{
780 return hasNoCarryFlagUses(SDValue(N, 0));
781 }]>;
782
783 def X86lock_sub_nocf : PatFrag<(ops node:$lhs, node:$rhs),
784 (X86lock_sub node:$lhs, node:$rhs), [{
785 return hasNoCarryFlagUses(SDValue(N, 0));
786 }]>;
787
788 let Predicates = [UseIncDec] in {
789 let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,
790 SchedRW = [WriteALURMW] in {
791 def LOCK_INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst),
792 "inc{b}\t$dst",
793 [(set EFLAGS, (X86lock_add_nocf addr:$dst, (i8 1)))]>,
794 LOCK;
795 def LOCK_INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst),
796 "inc{w}\t$dst",
797 [(set EFLAGS, (X86lock_add_nocf addr:$dst, (i16 1)))]>,
798 OpSize16, LOCK;
799 def LOCK_INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst),
800 "inc{l}\t$dst",
801 [(set EFLAGS, (X86lock_add_nocf addr:$dst, (i32 1)))]>,
802 OpSize32, LOCK;
803 def LOCK_INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst),
804 "inc{q}\t$dst",
805 [(set EFLAGS, (X86lock_add_nocf addr:$dst, (i64 1)))]>,
806 LOCK;
807
808 def LOCK_DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst),
809 "dec{b}\t$dst",
810 [(set EFLAGS, (X86lock_sub_nocf addr:$dst, (i8 1)))]>,
811 LOCK;
812 def LOCK_DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst),
813 "dec{w}\t$dst",
814 [(set EFLAGS, (X86lock_sub_nocf addr:$dst, (i16 1)))]>,
815 OpSize16, LOCK;
816 def LOCK_DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst),
817 "dec{l}\t$dst",
818 [(set EFLAGS, (X86lock_sub_nocf addr:$dst, (i32 1)))]>,
819 OpSize32, LOCK;
820 def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst),
821 "dec{q}\t$dst",
822 [(set EFLAGS, (X86lock_sub_nocf addr:$dst, (i64 1)))]>,
823 LOCK;
824 }
825
826 // Additional patterns for -1 constant.
827 def : Pat<(X86lock_add addr:$dst, (i8 -1)), (LOCK_DEC8m addr:$dst)>;
828 def : Pat<(X86lock_add addr:$dst, (i16 -1)), (LOCK_DEC16m addr:$dst)>;
829 def : Pat<(X86lock_add addr:$dst, (i32 -1)), (LOCK_DEC32m addr:$dst)>;
830 def : Pat<(X86lock_add addr:$dst, (i64 -1)), (LOCK_DEC64m addr:$dst)>;
831 def : Pat<(X86lock_sub addr:$dst, (i8 -1)), (LOCK_INC8m addr:$dst)>;
832 def : Pat<(X86lock_sub addr:$dst, (i16 -1)), (LOCK_INC16m addr:$dst)>;
833 def : Pat<(X86lock_sub addr:$dst, (i32 -1)), (LOCK_INC32m addr:$dst)>;
834 def : Pat<(X86lock_sub addr:$dst, (i64 -1)), (LOCK_INC64m addr:$dst)>;
835 }
825836
826837 // Atomic compare and swap.
827838 multiclass LCMPXCHG_UnOp Opc, Format Form, string mnemonic,
20172028 def : Pat<(add GR16:$src, -1), (DEC16r GR16:$src)>;
20182029 def : Pat<(add GR32:$src, -1), (DEC32r GR32:$src)>;
20192030 def : Pat<(add GR64:$src, -1), (DEC64r GR64:$src)>;
2031
2032 def : Pat<(X86add_flag_nocf GR8:$src, -1), (DEC8r GR8:$src)>;
2033 def : Pat<(X86add_flag_nocf GR16:$src, -1), (DEC16r GR16:$src)>;
2034 def : Pat<(X86add_flag_nocf GR32:$src, -1), (DEC32r GR32:$src)>;
2035 def : Pat<(X86add_flag_nocf GR64:$src, -1), (DEC64r GR64:$src)>;
2036 def : Pat<(X86sub_flag_nocf GR8:$src, -1), (INC8r GR8:$src)>;
2037 def : Pat<(X86sub_flag_nocf GR16:$src, -1), (INC16r GR16:$src)>;
2038 def : Pat<(X86sub_flag_nocf GR32:$src, -1), (INC32r GR32:$src)>;
2039 def : Pat<(X86sub_flag_nocf GR64:$src, -1), (INC64r GR64:$src)>;
20202040 }
20212041
20222042 // or reg/reg.
252252 def X86adc_flag : SDNode<"X86ISD::ADC", SDTBinaryArithWithFlagsInOut>;
253253 def X86sbb_flag : SDNode<"X86ISD::SBB", SDTBinaryArithWithFlagsInOut>;
254254
255 def X86inc_flag : SDNode<"X86ISD::INC", SDTUnaryArithWithFlags>;
256 def X86dec_flag : SDNode<"X86ISD::DEC", SDTUnaryArithWithFlags>;
257255 def X86or_flag : SDNode<"X86ISD::OR", SDTBinaryArithWithFlags,
258256 [SDNPCommutative]>;
259257 def X86xor_flag : SDNode<"X86ISD::XOR", SDTBinaryArithWithFlags,
274272 [SDNPHasChain, SDNPMayStore, SDNPMayLoad,
275273 SDNPMemOperand]>;
276274 def X86lock_and : SDNode<"X86ISD::LAND", SDTLockBinaryArithWithFlags,
277 [SDNPHasChain, SDNPMayStore, SDNPMayLoad,
278 SDNPMemOperand]>;
279
280 def X86lock_inc : SDNode<"X86ISD::LINC", SDTLockUnaryArithWithFlags,
281 [SDNPHasChain, SDNPMayStore, SDNPMayLoad,
282 SDNPMemOperand]>;
283 def X86lock_dec : SDNode<"X86ISD::LDEC", SDTLockUnaryArithWithFlags,
284275 [SDNPHasChain, SDNPMayStore, SDNPMayLoad,
285276 SDNPMemOperand]>;
286277
8282 define i1 @func3(i32 %x) nounwind {
8383 ; CHECK-LABEL: func3:
8484 ; CHECK: # %bb.0: # %entry
85 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
86 ; CHECK-NEXT: decl %eax
85 ; CHECK-NEXT: cmpl $1, {{[0-9]+}}(%esp)
8786 ; CHECK-NEXT: seto %al
8887 ; CHECK-NEXT: retl
8988 entry: