llvm.org GIT mirror llvm / cd98f42
Merging r348444: ------------------------------------------------------------------------ r348444 | matze | 2018-12-05 17:40:23 -0800 (Wed, 05 Dec 2018) | 15 lines AArch64: Fix invalid CCMP emission The code emitting AND-subtrees used to check whether any of the operands was an OR in order to figure out if the result needs to be negated. However the OR could be hidden in further subtrees and not immediately visible. Change the code so that canEmitConjunction() determines whether the result of the generated subtree needs to be negated. Cleanup emission logic to use this. I also changed the code a bit to make all negation decisions early before we actually emit the subtrees. This fixes http://llvm.org/PR39550 Differential Revision: https://reviews.llvm.org/D54137 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_70@348642 91177308-0d34-0410-b5e6-96231b3b80d8 Tom Stellard 1 year, 10 months ago
2 changed file(s) with 195 addition(s) and 100 deletion(s). Raw diff Collapse all Expand all
15201520 /// ccmp B, inv(CB), CA
15211521 /// check for CB flags
15221522 ///
1523 /// In general we can create code for arbitrary "... (and (and A B) C)"
1524 /// sequences. We can also implement some "or" expressions, because "(or A B)"
1525 /// is equivalent to "not (and (not A) (not B))" and we can implement some
1526 /// negation operations:
1527 /// We can negate the results of a single comparison by inverting the flags
1528 /// used when the predicate fails and inverting the flags tested in the next
1529 /// instruction; We can also negate the results of the whole previous
1530 /// conditional compare sequence by inverting the flags tested in the next
1531 /// instruction. However there is no way to negate the result of a partial
1532 /// sequence.
1523 /// This naturally lets us implement chains of AND operations with SETCC
1524 /// operands. And we can even implement some other situations by transforming
1525 /// them:
1526 /// - We can implement (NEG SETCC) i.e. negating a single comparison by
1527 /// negating the flags used in a CCMP/FCCMP operations.
1528 /// - We can negate the result of a whole chain of CMP/CCMP/FCCMP operations
1529 /// by negating the flags we test for afterwards. i.e.
1530 /// NEG (CMP CCMP CCCMP ...) can be implemented.
1531 /// - Note that we can only ever negate all previously processed results.
1532 /// What we can not implement by flipping the flags to test is a negation
1533 /// of two sub-trees (because the negation affects all sub-trees emitted so
1534 /// far, so the 2nd sub-tree we emit would also affect the first).
1535 /// With those tools we can implement some OR operations:
1536 /// - (OR (SETCC A) (SETCC B)) can be implemented via:
1537 /// NEG (AND (NEG (SETCC A)) (NEG (SETCC B)))
1538 /// - After transforming OR to NEG/AND combinations we may be able to use NEG
1539 /// elimination rules from earlier to implement the whole thing as a
1540 /// CCMP/FCCMP chain.
15331541 ///
1534 /// Therefore on encountering an "or" expression we can negate the subtree on
1535 /// one side and have to be able to push the negate to the leafs of the subtree
1536 /// on the other side (see also the comments in code). As complete example:
1537 /// "or (or (setCA (cmp A)) (setCB (cmp B)))
1538 /// (and (setCC (cmp C)) (setCD (cmp D)))"
1539 /// is transformed to
1540 /// "not (and (not (and (setCC (cmp C)) (setCC (cmp D))))
1541 /// (and (not (setCA (cmp A)) (not (setCB (cmp B))))))"
1542 /// and implemented as:
1542 /// As complete example:
1543 /// or (or (setCA (cmp A)) (setCB (cmp B)))
1544 /// (and (setCC (cmp C)) (setCD (cmp D)))"
1545 /// can be reassociated to:
1546 /// or (and (setCC (cmp C)) setCD (cmp D))
1547 // (or (setCA (cmp A)) (setCB (cmp B)))
1548 /// can be transformed to:
1549 /// not (and (not (and (setCC (cmp C)) (setCD (cmp D))))
1550 /// (and (not (setCA (cmp A)) (not (setCB (cmp B))))))"
1551 /// which can be implemented as:
15431552 /// cmp C
15441553 /// ccmp D, inv(CD), CC
15451554 /// ccmp A, CA, inv(CD)
15461555 /// ccmp B, CB, inv(CA)
15471556 /// check for CB flags
1548 /// A counterexample is "or (and A B) (and C D)" which cannot be implemented
1549 /// by conditional compare sequences.
1557 ///
1558 /// A counterexample is "or (and A B) (and C D)" which translates to
1559 /// not (and (not (and (not A) (not B))) (not (and (not C) (not D)))), we
1560 /// can only implement 1 of the inner (not) operations, but not both!
15501561 /// @{
15511562
15521563 /// Create a conditional comparison; Use CCMP, CCMN or FCCMP as appropriate.
15861597
15871598 /// Returns true if @p Val is a tree of AND/OR/SETCC operations that can be
15881599 /// expressed as a conjunction. See \ref AArch64CCMP.
1589 /// \param CanNegate Set to true if we can also emit the negation of the
1590 /// tree as a conjunction.
1600 /// \param CanNegate Set to true if we can negate the whole sub-tree just by
1601 /// changing the conditions on the SETCC tests.
1602 /// (this means we can call emitConjunctionRec() with
1603 /// Negate==true on this sub-tree)
1604 /// \param MustBeFirst Set to true if this subtree needs to be negated and we
1605 /// cannot do the negation naturally. We are required to
1606 /// emit the subtree first in this case.
1607 /// \param WillNegate Is true if are called when the result of this
1608 /// subexpression must be negated. This happens when the
1609 /// outer expression is an OR. We can use this fact to know
1610 /// that we have a double negation (or (or ...) ...) that
1611 /// can be implemented for free.
15911612 static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
1613 bool &MustBeFirst, bool WillNegate,
15921614 unsigned Depth = 0) {
15931615 if (!Val.hasOneUse())
15941616 return false;
15971619 if (Val->getOperand(0).getValueType() == MVT::f128)
15981620 return false;
15991621 CanNegate = true;
1622 MustBeFirst = false;
16001623 return true;
16011624 }
16021625 // Protect against exponential runtime and stack overflow.
16031626 if (Depth > 6)
16041627 return false;
16051628 if (Opcode == ISD::AND || Opcode == ISD::OR) {
1629 bool IsOR = Opcode == ISD::OR;
16061630 SDValue O0 = Val->getOperand(0);
16071631 SDValue O1 = Val->getOperand(1);
16081632 bool CanNegateL;
1609 if (!canEmitConjunction(O0, CanNegateL, Depth+1))
1633 bool MustBeFirstL;
1634 if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, Depth+1))
16101635 return false;
16111636 bool CanNegateR;
1612 if (!canEmitConjunction(O1, CanNegateR, Depth+1))
1637 bool MustBeFirstR;
1638 if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, Depth+1))
16131639 return false;
16141640
1615 if (Opcode == ISD::OR) {
1616 // For an OR expression we need to be able to negate at least one side or
1617 // we cannot do the transformation at all.
1641 if (MustBeFirstL && MustBeFirstR)
1642 return false;
1643
1644 if (IsOR) {
1645 // For an OR expression we need to be able to naturally negate at least
1646 // one side or we cannot do the transformation at all.
16181647 if (!CanNegateL && !CanNegateR)
16191648 return false;
1620 // However if we can negate x and y, then we can change
1621 // (not (or x y))
1622 // into
1623 // (and (not x) (not y))
1624 // to eliminate the outer negation.
1625 CanNegate = CanNegateL && CanNegateR;
1649 // If we the result of the OR will be negated and we can naturally negate
1650 // the leafs, then this sub-tree as a whole negates naturally.
1651 CanNegate = WillNegate && CanNegateL && CanNegateR;
1652 // If we cannot naturally negate the whole sub-tree, then this must be
1653 // emitted first.
1654 MustBeFirst = !CanNegate;
16261655 } else {
1627 // If the operands are OR expressions then we finally need to negate their
1628 // outputs, we can only do that for the operand with emitted last by
1629 // negating OutCC, not for both operands.
1630 bool NeedsNegOutL = O0->getOpcode() == ISD::OR;
1631 bool NeedsNegOutR = O1->getOpcode() == ISD::OR;
1632 if (NeedsNegOutL && NeedsNegOutR)
1633 return false;
1634 // We cannot negate an AND operation.
1656 assert(Opcode == ISD::AND && "Must be OR or AND");
1657 // We cannot naturally negate an AND operation.
16351658 CanNegate = false;
1659 MustBeFirst = MustBeFirstL || MustBeFirstR;
16361660 }
16371661 return true;
16381662 }
16451669 /// and conditional compare operations. @returns an NZCV flags producing node
16461670 /// and sets @p OutCC to the flags that should be tested or returns SDValue() if
16471671 /// transformation was not possible.
1648 /// On recursive invocations @p PushNegate may be set to true to have negation
1649 /// effects pushed to the tree leafs; @p Predicate is an NZCV flag predicate
1650 /// for the comparisons in the current subtree; @p Depth limits the search
1651 /// depth to avoid stack overflow.
1672 /// \p Negate is true if we want this sub-tree being negated just by changing
1673 /// SETCC conditions.
16521674 static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val,
16531675 AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp,
16541676 AArch64CC::CondCode Predicate) {
16901712 return emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, OutCC, DL,
16911713 DAG);
16921714 }
1693 assert((Opcode == ISD::AND || (Opcode == ISD::OR && Val->hasOneUse())) &&
1694 "Valid conjunction/disjunction tree");
1695
1696 // Check if both sides can be transformed.
1715 assert(Val->hasOneUse() && "Valid conjunction/disjunction tree");
1716
1717 bool IsOR = Opcode == ISD::OR;
1718
16971719 SDValue LHS = Val->getOperand(0);
1720 bool CanNegateL;
1721 bool MustBeFirstL;
1722 bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR);
1723 assert(ValidL && "Valid conjunction/disjunction tree");
1724 (void)ValidL;
1725
16981726 SDValue RHS = Val->getOperand(1);
1699
1700 // In case of an OR we need to negate our operands and the result.
1701 // (A v B) <=> not(not(A) ^ not(B))
1702 bool NegateOpsAndResult = Opcode == ISD::OR;
1703 // We can negate the results of all previous operations by inverting the
1704 // predicate flags giving us a free negation for one side. The other side
1705 // must be negatable by itself.
1706 if (NegateOpsAndResult) {
1707 // See which side we can negate.
1708 bool CanNegateL;
1709 bool isValidL = canEmitConjunction(LHS, CanNegateL);
1710 assert(isValidL && "Valid conjunction/disjunction tree");
1711 (void)isValidL;
1712
1713 #ifndef NDEBUG
1714 bool CanNegateR;
1715 bool isValidR = canEmitConjunction(RHS, CanNegateR);
1716 assert(isValidR && "Valid conjunction/disjunction tree");
1717 assert((CanNegateL || CanNegateR) && "Valid conjunction/disjunction tree");
1718 #endif
1719
1720 // Order the side which we cannot negate to RHS so we can emit it first.
1721 if (!CanNegateL)
1727 bool CanNegateR;
1728 bool MustBeFirstR;
1729 bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR);
1730 assert(ValidR && "Valid conjunction/disjunction tree");
1731 (void)ValidR;
1732
1733 // Swap sub-tree that must come first to the right side.
1734 if (MustBeFirstL) {
1735 assert(!MustBeFirstR && "Valid conjunction/disjunction tree");
1736 std::swap(LHS, RHS);
1737 std::swap(CanNegateL, CanNegateR);
1738 std::swap(MustBeFirstL, MustBeFirstR);
1739 }
1740
1741 bool NegateR;
1742 bool NegateAfterR;
1743 bool NegateL;
1744 bool NegateAfterAll;
1745 if (Opcode == ISD::OR) {
1746 // Swap the sub-tree that we can negate naturally to the left.
1747 if (!CanNegateL) {
1748 assert(CanNegateR && "at least one side must be negatable");
1749 assert(!MustBeFirstR && "invalid conjunction/disjunction tree");
1750 assert(!Negate);
17221751 std::swap(LHS, RHS);
1752 NegateR = false;
1753 NegateAfterR = true;
1754 } else {
1755 // Negate the left sub-tree if possible, otherwise negate the result.
1756 NegateR = CanNegateR;
1757 NegateAfterR = !CanNegateR;
1758 }
1759 NegateL = true;
1760 NegateAfterAll = !Negate;
17231761 } else {
1724 bool NeedsNegOutL = LHS->getOpcode() == ISD::OR;
1725 assert((!NeedsNegOutL || RHS->getOpcode() != ISD::OR) &&
1726 "Valid conjunction/disjunction tree");
1727 // Order the side where we need to negate the output flags to RHS so it
1728 // gets emitted first.
1729 if (NeedsNegOutL)
1730 std::swap(LHS, RHS);
1731 }
1732
1733 // Emit RHS. If we want to negate the tree we only need to push a negate
1734 // through if we are already in a PushNegate case, otherwise we can negate
1735 // the "flags to test" afterwards.
1762 assert(Opcode == ISD::AND && "Valid conjunction/disjunction tree");
1763 assert(!Negate && "Valid conjunction/disjunction tree");
1764
1765 NegateL = false;
1766 NegateR = false;
1767 NegateAfterR = false;
1768 NegateAfterAll = false;
1769 }
1770
1771 // Emit sub-trees.
17361772 AArch64CC::CondCode RHSCC;
1737 SDValue CmpR = emitConjunctionRec(DAG, RHS, RHSCC, Negate,
1738 CCOp, Predicate);
1739 if (NegateOpsAndResult && !Negate)
1773 SDValue CmpR = emitConjunctionRec(DAG, RHS, RHSCC, NegateR, CCOp, Predicate);
1774 if (NegateAfterR)
17401775 RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
1741 // Emit LHS. We may need to negate it.
1742 SDValue CmpL = emitConjunctionRec(DAG, LHS, OutCC,
1743 NegateOpsAndResult, CmpR,
1744 RHSCC);
1745 // If we transformed an OR to and AND then we have to negate the result
1746 // (or absorb the Negate parameter).
1747 if (NegateOpsAndResult && !Negate)
1776 SDValue CmpL = emitConjunctionRec(DAG, LHS, OutCC, NegateL, CmpR, RHSCC);
1777 if (NegateAfterAll)
17481778 OutCC = AArch64CC::getInvertedCondCode(OutCC);
17491779 return CmpL;
17501780 }
17561786 static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val,
17571787 AArch64CC::CondCode &OutCC) {
17581788 bool DummyCanNegate;
1759 if (!canEmitConjunction(Val, DummyCanNegate))
1789 bool DummyMustBeFirst;
1790 if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false))
17601791 return SDValue();
17611792
17621793 return emitConjunctionRec(DAG, Val, OutCC, false, SDValue(), AArch64CC::AL);
525525 ; CHECK-LABEL: select_or_one_olt:
526526 ; CHECK-LABEL: ; %bb.0:
527527 ; CHECK-NEXT: fcmp d0, d1
528 ; CHECK-NEXT: fccmp d0, d1, #1, ne
529 ; CHECK-NEXT: fccmp d2, d3, #8, vs
528 ; CHECK-NEXT: fccmp d0, d1, #8, le
529 ; CHECK-NEXT: fccmp d2, d3, #8, pl
530530 ; CHECK-NEXT: csel w0, w0, w1, mi
531531 ; CHECK-NEXT: ret
532532 define i32 @select_or_one_olt(double %v0, double %v1, double %v2, double %v3, i32 %a, i32 %b) #0 {
555555 ; CHECK-LABEL: select_or_ueq_olt:
556556 ; CHECK-LABEL: ; %bb.0:
557557 ; CHECK-NEXT: fcmp d0, d1
558 ; CHECK-NEXT: fccmp d0, d1, #8, le
559 ; CHECK-NEXT: fccmp d2, d3, #8, mi
558 ; CHECK-NEXT: fccmp d0, d1, #1, ne
559 ; CHECK-NEXT: fccmp d2, d3, #8, vc
560560 ; CHECK-NEXT: csel w0, w0, w1, mi
561561 ; CHECK-NEXT: ret
562562 define i32 @select_or_ueq_olt(double %v0, double %v1, double %v2, double %v3, i32 %a, i32 %b) #0 {
655655 ret i32 %sel
656656 }
657657
658 ; This testcase resembles the core problem of http://llvm.org/PR39550
659 ; (an OR operation is 2 levels deep but needs to be implemented first)
660 ; CHECK-LABEL: deep_or
661 ; CHECK: cmp w2, #20
662 ; CHECK-NEXT: ccmp w2, #15, #4, ne
663 ; CHECK-NEXT: ccmp w1, #0, #4, eq
664 ; CHECK-NEXT: ccmp w0, #0, #4, ne
665 ; CHECK-NEXT: csel w0, w4, w5, ne
666 ; CHECK-NEXT: ret
667 define i32 @deep_or(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %x, i32 %y) {
668 %c0 = icmp ne i32 %a0, 0
669 %c1 = icmp ne i32 %a1, 0
670 %c2 = icmp eq i32 %a2, 15
671 %c3 = icmp eq i32 %a2, 20
672
673 %or = or i1 %c2, %c3
674 %and0 = and i1 %or, %c1
675 %and1 = and i1 %and0, %c0
676 %sel = select i1 %and1, i32 %x, i32 %y
677 ret i32 %sel
678 }
679
680 ; Variation of deep_or, we still need to implement the OR first though.
681 ; CHECK-LABEL: deep_or1
682 ; CHECK: cmp w2, #20
683 ; CHECK-NEXT: ccmp w2, #15, #4, ne
684 ; CHECK-NEXT: ccmp w0, #0, #4, eq
685 ; CHECK-NEXT: ccmp w1, #0, #4, ne
686 ; CHECK-NEXT: csel w0, w4, w5, ne
687 ; CHECK-NEXT: ret
688 define i32 @deep_or1(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %x, i32 %y) {
689 %c0 = icmp ne i32 %a0, 0
690 %c1 = icmp ne i32 %a1, 0
691 %c2 = icmp eq i32 %a2, 15
692 %c3 = icmp eq i32 %a2, 20
693
694 %or = or i1 %c2, %c3
695 %and0 = and i1 %c0, %or
696 %and1 = and i1 %and0, %c1
697 %sel = select i1 %and1, i32 %x, i32 %y
698 ret i32 %sel
699 }
700
701 ; Variation of deep_or, we still need to implement the OR first though.
702 ; CHECK-LABEL: deep_or2
703 ; CHECK: cmp w2, #20
704 ; CHECK-NEXT: ccmp w2, #15, #4, ne
705 ; CHECK-NEXT: ccmp w1, #0, #4, eq
706 ; CHECK-NEXT: ccmp w0, #0, #4, ne
707 ; CHECK-NEXT: csel w0, w4, w5, ne
708 ; CHECK-NEXT: ret
709 define i32 @deep_or2(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %x, i32 %y) {
710 %c0 = icmp ne i32 %a0, 0
711 %c1 = icmp ne i32 %a1, 0
712 %c2 = icmp eq i32 %a2, 15
713 %c3 = icmp eq i32 %a2, 20
714
715 %or = or i1 %c2, %c3
716 %and0 = and i1 %c0, %c1
717 %and1 = and i1 %and0, %or
718 %sel = select i1 %and1, i32 %x, i32 %y
719 ret i32 %sel
720 }
721
658722 attributes #0 = { nounwind }