llvm.org GIT mirror llvm / fc933c0
indvars: LinearFunctionTestReplace for non-canonical IVs. For -disable-iv-rewrite, perform LFTR without generating a new "canonical" induction variable. Instead find the "best" existing induction variable for use in the loop exit test and compute the final value of that IV for use in the new loop exit test. In short, convert to a simple eq/ne exit test as long as it's cheap to do so. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@135420 91177308-0d34-0410-b5e6-96231b3b80d8 Andrew Trick 8 years ago
3 changed file(s) with 526 addition(s) and 53 deletion(s). Raw diff Collapse all Expand all
7878 "disable-iv-rewrite", cl::Hidden,
7979 cl::desc("Disable canonical induction variable rewriting"));
8080
81 // Temporary flag for use with -disable-iv-rewrite to force a canonical IV for
82 // LFTR purposes.
83 static cl::opt ForceLFTR(
84 "force-lftr", cl::Hidden,
85 cl::desc("Enable forced linear function test replacement"));
86
8187 namespace {
8288 class IndVarSimplify : public LoopPass {
8389 IVUsers *IU;
139145
140146 void RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter);
141147
142 ICmpInst *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
143 PHINode *IndVar,
144 SCEVExpander &Rewriter);
148 Value *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
149 PHINode *IndVar, SCEVExpander &Rewriter);
145150
146151 void SinkUnusedInvariants(Loop *L);
147152 };
10131018 NarrowUse->replaceUsesOfWith(NarrowDef, Trunc);
10141019 return 0;
10151020 }
1016 // We assume that block terminators are not SCEVable. We wouldn't want to
1021 // Assume block terminators cannot evaluate to a recurrence. We can't to
10171022 // insert a Trunc after a terminator if there happens to be a critical edge.
10181023 assert(NarrowUse != NarrowUse->getParent()->getTerminator() &&
10191024 "SCEV is not expected to evaluate a block terminator");
13001305
13011306 // Get the symbolic expression for this instruction.
13021307 const SCEV *S = SE->getSCEV(I);
1303
1304 // We assume that terminators are not SCEVable.
1305 assert((!S || I != I->getParent()->getTerminator()) &&
1306 "can't fold terminators");
13071308
13081309 // Only consider affine recurrences.
13091310 const SCEVAddRecExpr *AR = dyn_cast(S);
14701471 }
14711472 }
14721473
1473 if (!DisableIVRewrite)
1474 if (!DisableIVRewrite || ForceLFTR)
14741475 return false;
14751476
14761477 // Recurse past add expressions, which commonly occur in the
15211522 /// getBackedgeIVType - Get the widest type used by the loop test after peeking
15221523 /// through Truncs.
15231524 ///
1524 /// TODO: Unnecessary if LFTR does not force a canonical IV.
1525 /// TODO: Unnecessary when ForceLFTR is removed.
15251526 static Type *getBackedgeIVType(Loop *L) {
15261527 if (!L->getExitingBlock())
15271528 return 0;
15481549 return Ty;
15491550 }
15501551
1552 /// isLoopInvariant - Perform a quick domtree based check for loop invariance
1553 /// assuming that V is used within the loop. LoopInfo::isLoopInvariant() seems
1554 /// gratuitous for this purpose.
1555 static bool isLoopInvariant(Value *V, Loop *L, DominatorTree *DT) {
1556 Instruction *Inst = dyn_cast(V);
1557 if (!Inst)
1558 return true;
1559
1560 return DT->properlyDominates(Inst->getParent(), L->getHeader());
1561 }
1562
1563 /// getLoopPhiForCounter - Return the loop header phi IFF IncV adds a loop
1564 /// invariant value to the phi.
1565 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
1566 Instruction *IncI = dyn_cast(IncV);
1567 if (!IncI)
1568 return 0;
1569
1570 switch (IncI->getOpcode()) {
1571 case Instruction::Add:
1572 case Instruction::Sub:
1573 break;
1574 case Instruction::GetElementPtr:
1575 // An IV counter must preserve its type.
1576 if (IncI->getNumOperands() == 2)
1577 break;
1578 default:
1579 return 0;
1580 }
1581
1582 PHINode *Phi = dyn_cast(IncI->getOperand(0));
1583 if (Phi && Phi->getParent() == L->getHeader()) {
1584 if (isLoopInvariant(IncI->getOperand(1), L, DT))
1585 return Phi;
1586 return 0;
1587 }
1588 if (IncI->getOpcode() == Instruction::GetElementPtr)
1589 return 0;
1590
1591 // Allow add/sub to be commuted.
1592 Phi = dyn_cast(IncI->getOperand(1));
1593 if (Phi && Phi->getParent() == L->getHeader()) {
1594 if (isLoopInvariant(IncI->getOperand(0), L, DT))
1595 return Phi;
1596 }
1597 return 0;
1598 }
1599
1600 /// needsLFTR - LinearFunctionTestReplace policy. Return true unless we can show
1601 /// that the current exit test is already sufficiently canonical.
1602 static bool needsLFTR(Loop *L, DominatorTree *DT) {
1603 assert(L->getExitingBlock() && "expected loop exit");
1604
1605 BasicBlock *LatchBlock = L->getLoopLatch();
1606 // Don't bother with LFTR if the loop is not properly simplified.
1607 if (!LatchBlock)
1608 return false;
1609
1610 BranchInst *BI = dyn_cast(L->getExitingBlock()->getTerminator());
1611 assert(BI && "expected exit branch");
1612
1613 // Do LFTR to simplify the exit condition to an ICMP.
1614 ICmpInst *Cond = dyn_cast(BI->getCondition());
1615 if (!Cond)
1616 return true;
1617
1618 // Do LFTR to simplify the exit ICMP to EQ/NE
1619 ICmpInst::Predicate Pred = Cond->getPredicate();
1620 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
1621 return true;
1622
1623 // Look for a loop invariant RHS
1624 Value *LHS = Cond->getOperand(0);
1625 Value *RHS = Cond->getOperand(1);
1626 if (!isLoopInvariant(RHS, L, DT)) {
1627 if (!isLoopInvariant(LHS, L, DT))
1628 return true;
1629 std::swap(LHS, RHS);
1630 }
1631 // Look for a simple IV counter LHS
1632 PHINode *Phi = dyn_cast(LHS);
1633 if (!Phi)
1634 Phi = getLoopPhiForCounter(LHS, L, DT);
1635
1636 if (!Phi)
1637 return true;
1638
1639 // Do LFTR if the exit condition's IV is *not* a simple counter.
1640 Value *IncV = Phi->getIncomingValueForBlock(L->getLoopLatch());
1641 return Phi != getLoopPhiForCounter(IncV, L, DT);
1642 }
1643
1644 /// AlmostDeadIV - Return true if this IV has any uses other than the (soon to
1645 /// be rewritten) loop exit test.
1646 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
1647 int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
1648 Value *IncV = Phi->getIncomingValue(LatchIdx);
1649
1650 for (Value::use_iterator UI = Phi->use_begin(), UE = Phi->use_end();
1651 UI != UE; ++UI) {
1652 if (*UI != Cond && *UI != IncV) return false;
1653 }
1654
1655 for (Value::use_iterator UI = IncV->use_begin(), UE = IncV->use_end();
1656 UI != UE; ++UI) {
1657 if (*UI != Cond && *UI != Phi) return false;
1658 }
1659 return true;
1660 }
1661
1662 /// FindLoopCounter - Find an affine IV in canonical form.
1663 ///
1664 /// FIXME: Accept -1 stride and set IVLimit = IVInit - BECount
1665 ///
1666 /// FIXME: Accept non-unit stride as long as SCEV can reduce BECount * Stride.
1667 /// This is difficult in general for SCEV because of potential overflow. But we
1668 /// could at least handle constant BECounts.
1669 static PHINode *
1670 FindLoopCounter(Loop *L, const SCEV *BECount,
1671 ScalarEvolution *SE, DominatorTree *DT, const TargetData *TD) {
1672 // I'm not sure how BECount could be a pointer type, but we definitely don't
1673 // want to LFTR that.
1674 if (BECount->getType()->isPointerTy())
1675 return 0;
1676
1677 uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
1678
1679 Value *Cond =
1680 cast(L->getExitingBlock()->getTerminator())->getCondition();
1681
1682 // Loop over all of the PHI nodes, looking for a simple counter.
1683 PHINode *BestPhi = 0;
1684 const SCEV *BestInit = 0;
1685 BasicBlock *LatchBlock = L->getLoopLatch();
1686 assert(LatchBlock && "needsLFTR should guarantee a loop latch");
1687
1688 for (BasicBlock::iterator I = L->getHeader()->begin(); isa(I); ++I) {
1689 PHINode *Phi = cast(I);
1690 if (!SE->isSCEVable(Phi->getType()))
1691 continue;
1692
1693 const SCEVAddRecExpr *AR = dyn_cast(SE->getSCEV(Phi));
1694 if (!AR || AR->getLoop() != L || !AR->isAffine())
1695 continue;
1696
1697 // AR may be a pointer type, while BECount is an integer type.
1698 // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
1699 // AR may not be a narrower type, or we may never exit.
1700 uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
1701 if (PhiWidth < BCWidth || (TD && !TD->isLegalInteger(PhiWidth)))
1702 continue;
1703
1704 const SCEV *Step = dyn_cast(AR->getStepRecurrence(*SE));
1705 if (!Step || !Step->isOne())
1706 continue;
1707
1708 int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
1709 Value *IncV = Phi->getIncomingValue(LatchIdx);
1710 if (getLoopPhiForCounter(IncV, L, DT) != Phi)
1711 continue;
1712
1713 const SCEV *Init = AR->getStart();
1714
1715 if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
1716 // Don't force a live loop counter if another IV can be used.
1717 if (AlmostDeadIV(Phi, LatchBlock, Cond))
1718 continue;
1719
1720 // Prefer to count-from-zero. This is a more "canonical" counter form. It
1721 // also prefers integer to pointer IVs.
1722 if (BestInit->isZero() != Init->isZero()) {
1723 if (BestInit->isZero())
1724 continue;
1725 }
1726 // If two IVs both count from zero or both count from nonzero then the
1727 // narrower is likely a dead phi that has been widened. Use the wider phi
1728 // to allow the other to be eliminated.
1729 if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
1730 continue;
1731 }
1732 BestPhi = Phi;
1733 BestInit = Init;
1734 }
1735 return BestPhi;
1736 }
1737
15511738 /// LinearFunctionTestReplace - This method rewrites the exit condition of the
15521739 /// loop to be a canonical != comparison against the incremented loop induction
15531740 /// variable. This pass is able to rewrite the exit tests of any loop where the
15541741 /// SCEV analysis can determine a loop-invariant trip count of the loop, which
15551742 /// is actually a much broader range than just linear tests.
1556 ICmpInst *IndVarSimplify::
1743 Value *IndVarSimplify::
15571744 LinearFunctionTestReplace(Loop *L,
15581745 const SCEV *BackedgeTakenCount,
15591746 PHINode *IndVar,
15611748 assert(canExpandBackedgeTakenCount(L, SE) && "precondition");
15621749 BranchInst *BI = cast(L->getExitingBlock()->getTerminator());
15631750
1751 // In DisableIVRewrite mode, IndVar is not necessarily a canonical IV. In this
1752 // mode, LFTR can ignore IV overflow and truncate to the width of
1753 // BECount. This avoids materializing the add(zext(add)) expression.
1754 Type *CntTy = DisableIVRewrite ?
1755 BackedgeTakenCount->getType() : IndVar->getType();
1756
1757 const SCEV *IVLimit = BackedgeTakenCount;
1758
15641759 // If the exiting block is not the same as the backedge block, we must compare
15651760 // against the preincremented value, otherwise we prefer to compare against
15661761 // the post-incremented value.
15671762 Value *CmpIndVar;
1568 const SCEV *RHS = BackedgeTakenCount;
15691763 if (L->getExitingBlock() == L->getLoopLatch()) {
15701764 // Add one to the "backedge-taken" count to get the trip count.
15711765 // If this addition may overflow, we have to be more pessimistic and
15721766 // cast the induction variable before doing the add.
1573 const SCEV *Zero = SE->getConstant(BackedgeTakenCount->getType(), 0);
15741767 const SCEV *N =
1575 SE->getAddExpr(BackedgeTakenCount,
1576 SE->getConstant(BackedgeTakenCount->getType(), 1));
1577 if ((isa(N) && !N->isZero()) ||
1578 SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) {
1579 // No overflow. Cast the sum.
1580 RHS = SE->getTruncateOrZeroExtend(N, IndVar->getType());
1581 } else {
1582 // Potential overflow. Cast before doing the add.
1583 RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount,
1584 IndVar->getType());
1585 RHS = SE->getAddExpr(RHS,
1586 SE->getConstant(IndVar->getType(), 1));
1587 }
1588
1768 SE->getAddExpr(IVLimit, SE->getConstant(IVLimit->getType(), 1));
1769 if (CntTy == IVLimit->getType())
1770 IVLimit = N;
1771 else {
1772 const SCEV *Zero = SE->getConstant(IVLimit->getType(), 0);
1773 if ((isa(N) && !N->isZero()) ||
1774 SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) {
1775 // No overflow. Cast the sum.
1776 IVLimit = SE->getTruncateOrZeroExtend(N, CntTy);
1777 } else {
1778 // Potential overflow. Cast before doing the add.
1779 IVLimit = SE->getTruncateOrZeroExtend(IVLimit, CntTy);
1780 IVLimit = SE->getAddExpr(IVLimit, SE->getConstant(CntTy, 1));
1781 }
1782 }
15891783 // The BackedgeTaken expression contains the number of times that the
15901784 // backedge branches to the loop header. This is one less than the
15911785 // number of times the loop executes, so use the incremented indvar.
15921786 CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
15931787 } else {
15941788 // We have to use the preincremented value...
1595 RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount,
1596 IndVar->getType());
1789 IVLimit = SE->getTruncateOrZeroExtend(IVLimit, CntTy);
15971790 CmpIndVar = IndVar;
15981791 }
15991792
1793 // For unit stride, IVLimit = Start + BECount with 2's complement overflow.
1794 // So for, non-zero start compute the IVLimit here.
1795 bool isPtrIV = false;
1796 Type *CmpTy = CntTy;
1797 const SCEVAddRecExpr *AR = dyn_cast(SE->getSCEV(IndVar));
1798 assert(AR && AR->getLoop() == L && AR->isAffine() && "bad loop counter");
1799 if (!AR->getStart()->isZero()) {
1800 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
1801 const SCEV *IVInit = AR->getStart();
1802
1803 // For pointer types, sign extend BECount in order to materialize a GEP.
1804 // Note that for DisableIVRewrite, we never run SCEVExpander on a
1805 // pointer type, because we must preserve the existing GEPs. Instead we
1806 // directly generate a GEP later.
1807 if (IVInit->getType()->isPointerTy()) {
1808 isPtrIV = true;
1809 CmpTy = SE->getEffectiveSCEVType(IVInit->getType());
1810 IVLimit = SE->getTruncateOrSignExtend(IVLimit, CmpTy);
1811 }
1812 // For integer types, truncate the IV before computing IVInit + BECount.
1813 else {
1814 if (SE->getTypeSizeInBits(IVInit->getType())
1815 > SE->getTypeSizeInBits(CmpTy))
1816 IVInit = SE->getTruncateExpr(IVInit, CmpTy);
1817
1818 IVLimit = SE->getAddExpr(IVInit, IVLimit);
1819 }
1820 }
16001821 // Expand the code for the iteration count.
1601 assert(SE->isLoopInvariant(RHS, L) &&
1822 IRBuilder<> Builder(BI);
1823
1824 assert(SE->isLoopInvariant(IVLimit, L) &&
16021825 "Computed iteration count is not loop invariant!");
1603 Value *ExitCnt = Rewriter.expandCodeFor(RHS, IndVar->getType(), BI);
1826 Value *ExitCnt = Rewriter.expandCodeFor(IVLimit, CmpTy, BI);
1827
1828 // Create a gep for IVInit + IVLimit from on an existing pointer base.
1829 assert(isPtrIV == IndVar->getType()->isPointerTy() &&
1830 "IndVar type must match IVInit type");
1831 if (isPtrIV) {
1832 Value *IVStart = IndVar->getIncomingValueForBlock(L->getLoopPreheader());
1833 assert(AR->getStart() == SE->getSCEV(IVStart) && "bad loop counter");
1834 const PointerType *PointerTy = cast(IVStart->getType());
1835 assert(SE->getSizeOfExpr(PointerTy->getElementType())->isOne() &&
1836 "unit stride pointer IV must be i8*");
1837
1838 Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
1839 ExitCnt = Builder.CreateGEP(IVStart, ExitCnt, "lftr.limit");
1840 Builder.SetInsertPoint(BI);
1841 }
16041842
16051843 // Insert a new icmp_ne or icmp_eq instruction before the branch.
1606 ICmpInst::Predicate Opcode;
1844 ICmpInst::Predicate P;
16071845 if (L->contains(BI->getSuccessor(0)))
1608 Opcode = ICmpInst::ICMP_NE;
1846 P = ICmpInst::ICMP_NE;
16091847 else
1610 Opcode = ICmpInst::ICMP_EQ;
1848 P = ICmpInst::ICMP_EQ;
16111849
16121850 DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
16131851 << " LHS:" << *CmpIndVar << '\n'
16141852 << " op:\t"
1615 << (Opcode == ICmpInst::ICMP_NE ? "!=" : "==") << "\n"
1616 << " RHS:\t" << *RHS << "\n");
1617
1618 ICmpInst *Cond = new ICmpInst(BI, Opcode, CmpIndVar, ExitCnt, "exitcond");
1619 Cond->setDebugLoc(BI->getDebugLoc());
1853 << (P == ICmpInst::ICMP_NE ? "!=" : "==") << "\n"
1854 << " RHS:\t" << *ExitCnt << "\n"
1855 << " Expr:\t" << *IVLimit << "\n");
1856
1857 if (SE->getTypeSizeInBits(CmpIndVar->getType())
1858 > SE->getTypeSizeInBits(CmpTy)) {
1859 CmpIndVar = Builder.CreateTrunc(CmpIndVar, CmpTy, "lftr.wideiv");
1860 }
1861
1862 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
16201863 Value *OrigCond = BI->getCondition();
16211864 // It's tempting to use replaceAllUsesWith here to fully replace the old
16221865 // comparison, but that's not immediately safe, since users of the old
17832026 // a canonical induction variable should be inserted.
17842027 Type *LargestType = 0;
17852028 bool NeedCannIV = false;
2029 bool ReuseIVForExit = DisableIVRewrite && !ForceLFTR;
17862030 bool ExpandBECount = canExpandBackedgeTakenCount(L, SE);
1787 if (ExpandBECount) {
2031 if (ExpandBECount && !ReuseIVForExit) {
17882032 // If we have a known trip count and a single exit block, we'll be
17892033 // rewriting the loop exit test condition below, which requires a
17902034 // canonical induction variable.
18472091 OldCannIV->insertBefore(L->getHeader()->getFirstNonPHI());
18482092 }
18492093 }
1850
2094 else if (ExpandBECount && ReuseIVForExit && needsLFTR(L, DT)) {
2095 IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT, TD);
2096 }
18512097 // If we have a trip count expression, rewrite the loop's exit condition
18522098 // using it. We can currently only handle loops with a single exit.
1853 ICmpInst *NewICmp = 0;
1854 if (ExpandBECount) {
1855 assert(canExpandBackedgeTakenCount(L, SE) &&
1856 "canonical IV disrupted BackedgeTaken expansion");
1857 assert(NeedCannIV &&
1858 "LinearFunctionTestReplace requires a canonical induction variable");
2099 Value *NewICmp = 0;
2100 if (ExpandBECount && IndVar) {
18592101 // Check preconditions for proper SCEVExpander operation. SCEV does not
18602102 // express SCEVExpander's dependencies, such as LoopSimplify. Instead any
18612103 // pass that uses the SCEVExpander must do it. This does not work well for
18932135
18942136 // For completeness, inform IVUsers of the IV use in the newly-created
18952137 // loop exit test instruction.
1896 if (NewICmp && IU)
1897 IU->AddUsersIfInteresting(cast(NewICmp->getOperand(0)));
1898
2138 if (IU && NewICmp) {
2139 ICmpInst *NewICmpInst = dyn_cast(NewICmp);
2140 if (NewICmpInst)
2141 IU->AddUsersIfInteresting(cast(NewICmpInst->getOperand(0)));
2142 }
18992143 // Clean up dead instructions.
19002144 Changed |= DeleteDeadPHIs(L->getHeader());
19012145 // Check a post-condition.
88 ; Note that all four functions should actually be converted to
99 ; memset. However, this test case validates indvars behavior. We
1010 ; don't check that phis are "folded together" because that is a job
11 ; for loop strength reduction. But indvars must remove sext, zext,
12 ; trunc, and add i8.
11 ; for loop strength reduction. But indvars must remove sext, zext, and add i8.
1312 ;
14 ; CHECK-NOT: {{sext|zext|trunc|add i8}}
13 ; CHECK-NOT: {{sext|zext|add i8}}
1514
1615 ; ModuleID = 'ada.bc'
1716 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-n:8:16:32"
0 ; RUN: opt < %s -indvars -disable-iv-rewrite -S | FileCheck %s
1 ;
2 ; Make sure that indvars can perform LFTR without a canonical IV.
3
4 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
5
6 ; Perform LFTR using the original pointer-type IV.
7
8 ; for(char* p = base; p < base + n; ++p) {
9 ; *p = p-base;
10 ; }
11 define void @ptriv(i8* %base, i32 %n) nounwind {
12 entry:
13 %idx.ext = sext i32 %n to i64
14 %add.ptr = getelementptr inbounds i8* %base, i64 %idx.ext
15 %cmp1 = icmp ult i8* %base, %add.ptr
16 br i1 %cmp1, label %for.body, label %for.end
17
18 ; CHECK: for.body:
19 ; CHECK: phi i8*
20 ; CHECK-NOT: phi
21 ; CHECK-NOT: add
22 ; CHECK: icmp ne i8*
23 ; CHECK: br i1
24 for.body:
25 %p.02 = phi i8* [ %base, %entry ], [ %incdec.ptr, %for.body ]
26 ; cruft to make the IV useful
27 %sub.ptr.lhs.cast = ptrtoint i8* %p.02 to i64
28 %sub.ptr.rhs.cast = ptrtoint i8* %base to i64
29 %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
30 %conv = trunc i64 %sub.ptr.sub to i8
31 store i8 %conv, i8* %p.02
32 %incdec.ptr = getelementptr inbounds i8* %p.02, i32 1
33 %cmp = icmp ult i8* %incdec.ptr, %add.ptr
34 br i1 %cmp, label %for.body, label %for.end
35
36 for.end:
37 ret void
38 }
39
40 ; It would be nice if SCEV and any loop analysis could assume that
41 ; preheaders exist. Unfortunately it is not always the case. This test
42 ; checks that SCEVExpander can handle an outer loop that has not yet
43 ; been simplified. As a result, the inner loop's exit test will not be
44 ; rewritten.
45 define void @expandOuterRecurrence(i32 %arg) nounwind {
46 entry:
47 %sub1 = sub nsw i32 %arg, 1
48 %cmp1 = icmp slt i32 0, %sub1
49 br i1 %cmp1, label %outer, label %exit
50
51 outer:
52 %i = phi i32 [ 0, %entry ], [ %i.inc, %outer.inc ]
53 %sub2 = sub nsw i32 %arg, %i
54 %sub3 = sub nsw i32 %sub2, 1
55 %cmp2 = icmp slt i32 0, %sub3
56 br i1 %cmp2, label %inner.ph, label %outer.inc
57
58 inner.ph:
59 br label %inner
60
61 ; CHECK: inner:
62 ; CHECK: icmp slt
63 ; CHECK: br i1
64 inner:
65 %j = phi i32 [ 0, %inner.ph ], [ %j.inc, %inner ]
66 %j.inc = add nsw i32 %j, 1
67 %cmp3 = icmp slt i32 %j.inc, %sub3
68 br i1 %cmp3, label %inner, label %outer.inc
69
70 ; CHECK: outer.inc:
71 ; CHECK: icmp ne
72 ; CHECK: br i1
73 outer.inc:
74 %i.inc = add nsw i32 %i, 1
75 %cmp4 = icmp slt i32 %i.inc, %sub1
76 br i1 %cmp4, label %outer, label %exit
77
78 exit:
79 ret void
80 }
81
82 ; Force SCEVExpander to look for an existing well-formed phi.
83 ; Perform LFTR without generating extra preheader code.
84 define void @guardedloop([0 x double]* %matrix, [0 x double]* %vector,
85 i32 %irow, i32 %ilead) nounwind {
86 ; CHECK: entry:
87 ; CHECK-NOT: zext
88 ; CHECK-NOT: add
89 ; CHECK: loop:
90 ; CHECK: phi i64
91 ; CHECK: phi i64
92 ; CHECK-NOT: phi
93 ; CHECK: icmp ne
94 ; CHECK: br i1
95 entry:
96 %cmp = icmp slt i32 1, %irow
97 br i1 %cmp, label %loop, label %return
98
99 loop:
100 %rowidx = phi i32 [ 0, %entry ], [ %row.inc, %loop ]
101 %i = phi i32 [ 0, %entry ], [ %i.inc, %loop ]
102 %diagidx = add nsw i32 %rowidx, %i
103 %diagidxw = sext i32 %diagidx to i64
104 %matrixp = getelementptr inbounds [0 x double]* %matrix, i32 0, i64 %diagidxw
105 %v1 = load double* %matrixp
106 %iw = sext i32 %i to i64
107 %vectorp = getelementptr inbounds [0 x double]* %vector, i32 0, i64 %iw
108 %v2 = load double* %vectorp
109 %row.inc = add nsw i32 %rowidx, %ilead
110 %i.inc = add nsw i32 %i, 1
111 %cmp196 = icmp slt i32 %i.inc, %irow
112 br i1 %cmp196, label %loop, label %return
113
114 return:
115 ret void
116 }
117
118 ; Avoid generating extra code to materialize a trip count. Skip LFTR.
119 define void @unguardedloop([0 x double]* %matrix, [0 x double]* %vector,
120 i32 %irow, i32 %ilead) nounwind {
121 entry:
122 br label %loop
123
124 ; CHECK: entry:
125 ; CHECK-NOT: zext
126 ; CHECK-NOT: add
127 ; CHECK: loop:
128 ; CHECK: phi i64
129 ; CHECK: phi i64
130 ; CHECK-NOT: phi
131 ; CHECK: icmp slt
132 ; CHECK: br i1
133 loop:
134 %rowidx = phi i32 [ 0, %entry ], [ %row.inc, %loop ]
135 %i = phi i32 [ 0, %entry ], [ %i.inc, %loop ]
136 %diagidx = add nsw i32 %rowidx, %i
137 %diagidxw = sext i32 %diagidx to i64
138 %matrixp = getelementptr inbounds [0 x double]* %matrix, i32 0, i64 %diagidxw
139 %v1 = load double* %matrixp
140 %iw = sext i32 %i to i64
141 %vectorp = getelementptr inbounds [0 x double]* %vector, i32 0, i64 %iw
142 %v2 = load double* %vectorp
143 %row.inc = add nsw i32 %rowidx, %ilead
144 %i.inc = add nsw i32 %i, 1
145 %cmp196 = icmp slt i32 %i.inc, %irow
146 br i1 %cmp196, label %loop, label %return
147
148 return:
149 ret void
150 }
151
152 ; Remove %i which is only used by the exit test.
153 ; Verify that SCEV can still compute a backedge count from the sign
154 ; extended %n, used for pointer comparison by LFTR.
155 define void @geplftr(i8* %base, i32 %x, i32 %y, i32 %n) nounwind {
156 entry:
157 %x.ext = sext i32 %x to i64
158 %add.ptr = getelementptr inbounds i8* %base, i64 %x.ext
159 %y.ext = sext i32 %y to i64
160 %add.ptr10 = getelementptr inbounds i8* %add.ptr, i64 %y.ext
161 %lim = add i32 %x, %n
162 %cmp.ph = icmp ult i32 %x, %lim
163 br i1 %cmp.ph, label %loop, label %exit
164
165 ; CHECK: loop:
166 ; CHECK: phi i8*
167 ; CHECK-NOT: phi
168 ; CHECK: getelementptr
169 ; CHECK: store
170 ; CHECK: icmp ne i8*
171 ; CHECK: br i1
172 loop:
173 %i = phi i32 [ %x, %entry ], [ %inc, %loop ]
174 %aptr = phi i8* [ %add.ptr10, %entry ], [ %incdec.ptr, %loop ]
175 %incdec.ptr = getelementptr inbounds i8* %aptr, i32 1
176 store i8 3, i8* %aptr
177 %inc = add i32 %i, 1
178 %cmp = icmp ult i32 %inc, %lim
179 br i1 %cmp, label %loop, label %exit
180
181 exit:
182 ret void
183 }
184
185 ; Exercise backedge taken count verification with a never-taken loop.
186 define void @nevertaken() nounwind uwtable ssp {
187 entry:
188 br label %loop
189
190 ; CHECK: loop:
191 ; CHECK-NOT: phi
192 ; CHECK-NOT: add
193 ; CHECK-NOT: icmp
194 ; CHECK: exit:
195 loop:
196 %i = phi i32 [ 0, %entry ], [ %inc, %loop ]
197 %inc = add nsw i32 %i, 1
198 %cmp = icmp sle i32 %inc, 0
199 br i1 %cmp, label %loop, label %exit
200
201 exit:
202 ret void
203 }
204
205 ; Test LFTR on an IV whose recurrence start is a non-unit pointer type.
206 define void @aryptriv([256 x i8]* %base, i32 %n) nounwind {
207 entry:
208 %ivstart = getelementptr inbounds [256 x i8]* %base, i32 0, i32 0
209 %ivend = getelementptr inbounds [256 x i8]* %base, i32 0, i32 %n
210 %cmp.ph = icmp ult i8* %ivstart, %ivend
211 br i1 %cmp.ph, label %loop, label %exit
212
213 ; CHECK: loop:
214 ; CHECK: phi i8*
215 ; CHECK-NOT: phi
216 ; CHECK: getelementptr
217 ; CHECK: store
218 ; CHECK: icmp ne i8*
219 ; CHECK: br i1
220 loop:
221 %aptr = phi i8* [ %ivstart, %entry ], [ %incdec.ptr, %loop ]
222 %incdec.ptr = getelementptr inbounds i8* %aptr, i32 1
223 store i8 3, i8* %aptr
224 %cmp = icmp ult i8* %incdec.ptr, %ivend
225 br i1 %cmp, label %loop, label %exit
226
227 exit:
228 ret void
229 }