llvm.org GIT mirror llvm / caba756
[LoopReroll] Introduce the concept of DAGRootSets. A DAGRootSet models an induction variable being used in a rerollable loop. For example: x[i*3+0] = y1 x[i*3+1] = y2 x[i*3+2] = y3 Base instruction -> i*3 +---+----+ / | \ ST[y1] +1 +2 <-- Roots | | ST[y2] ST[y3] There may be multiple DAGRootSets, for example: x[i*2+0] = ... (1) x[i*2+1] = ... (1) x[i*2+4] = ... (2) x[i*2+5] = ... (2) x[(i+1234)*2+5678] = ... (3) x[(i+1234)*2+5679] = ... (3) This concept is similar to the "Scale" member used previously, but allows multiple independent sets of roots based off the same induction variable. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@228821 91177308-0d34-0410-b5e6-96231b3b80d8 James Molloy 4 years ago
2 changed file(s) with 567 addition(s) and 226 deletion(s). Raw diff Collapse all Expand all
125125 /// has to be less than 25 in order to fit into a SmallBitVector.
126126 IL_MaxRerollIterations = 16,
127127 /// The bitvector index used by loop induction variables and other
128 /// instructions that belong to no one particular iteration.
129 IL_LoopIncIdx,
128 /// instructions that belong to all iterations.
129 IL_All,
130130 IL_End
131131 };
132132
322322 DenseSet Reds;
323323 };
324324
325 // A DAGRootSet models an induction variable being used in a rerollable
326 // loop. For example,
327 //
328 // x[i*3+0] = y1
329 // x[i*3+1] = y2
330 // x[i*3+2] = y3
331 //
332 // Base instruction -> i*3
333 // +---+----+
334 // / | \
335 // ST[y1] +1 +2 <-- Roots
336 // | |
337 // ST[y2] ST[y3]
338 //
339 // There may be multiple DAGRoots, for example:
340 //
341 // x[i*2+0] = ... (1)
342 // x[i*2+1] = ... (1)
343 // x[i*2+4] = ... (2)
344 // x[i*2+5] = ... (2)
345 // x[(i+1234)*2+5678] = ... (3)
346 // x[(i+1234)*2+5679] = ... (3)
347 //
348 // The loop will be rerolled by adding a new loop induction variable,
349 // one for the Base instruction in each DAGRootSet.
350 //
351 struct DAGRootSet {
352 Instruction *BaseInst;
353 SmallInstructionVector Roots;
354 // The instructions between IV and BaseInst (but not including BaseInst).
355 SmallInstructionSet SubsumedInsts;
356 };
357
325358 // The set of all DAG roots, and state tracking of all roots
326359 // for a particular induction variable.
327360 struct DAGRootTracker {
344377 protected:
345378 typedef MapVector UsesTy;
346379
347 bool findScaleFromMul();
348 bool collectAllRoots();
380 bool findRootsRecursive(Instruction *IVU,
381 SmallInstructionSet SubsumedInsts);
382 bool findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts);
383 bool collectPossibleRoots(Instruction *Base,
384 std::map &Roots);
349385
350386 bool collectUsedInstructions(SmallInstructionSet &PossibleRedSet);
351387 void collectInLoopUserSet(const SmallInstructionVector &Roots,
358394 DenseSet &Users);
359395
360396 UsesTy::iterator nextInstr(int Val, UsesTy &In, UsesTy::iterator I);
397 bool isBaseInst(Instruction *I);
398 bool isRootInst(Instruction *I);
361399
362400 LoopReroll *Parent;
363401
376414 // to the indvar: a[i*2+0] = ...; a[i*2+1] = ... ;
377415 // If Inc is not 1, Scale = Inc.
378416 uint64_t Scale;
379 // If Scale != Inc, then RealIV is IV after its multiplication.
380 Instruction *RealIV;
381417 // The roots themselves.
382 SmallInstructionVector Roots;
418 SmallVector RootSets;
383419 // All increment instructions for IV.
384420 SmallInstructionVector LoopIncs;
385421 // Map of all instructions in the loop (in order) to the iterations
386 // they are used in (or specially, IL_LoopIncIdx for instructions
422 // they are used in (or specially, IL_All for instructions
387423 // used in the loop increment mechanism).
388424 UsesTy Uses;
389425 };
585621 return false;
586622 }
587623
624 /// Return true if IVU is a "simple" arithmetic operation.
625 /// This is used for narrowing the search space for DAGRoots; only arithmetic
626 /// and GEPs can be part of a DAGRoot.
627 static bool isSimpleArithmeticOp(User *IVU) {
628 if (Instruction *I = dyn_cast(IVU)) {
629 switch (I->getOpcode()) {
630 default: return false;
631 case Instruction::Add:
632 case Instruction::Sub:
633 case Instruction::Mul:
634 case Instruction::Shl:
635 case Instruction::AShr:
636 case Instruction::LShr:
637 case Instruction::GetElementPtr:
638 case Instruction::Trunc:
639 case Instruction::ZExt:
640 case Instruction::SExt:
641 return true;
642 }
643 }
644 return false;
645 }
646
647 static bool isLoopIncrement(User *U, Instruction *IV) {
648 BinaryOperator *BO = dyn_cast(U);
649 if (!BO || BO->getOpcode() != Instruction::Add)
650 return false;
651
652 for (auto *UU : BO->users()) {
653 PHINode *PN = dyn_cast(UU);
654 if (PN && PN == IV)
655 return true;
656 }
657 return false;
658 }
659
660 bool LoopReroll::DAGRootTracker::
661 collectPossibleRoots(Instruction *Base, std::map &Roots) {
662 SmallInstructionVector BaseUsers;
663
664 for (auto *I : Base->users()) {
665 ConstantInt *CI = nullptr;
666
667 if (isLoopIncrement(I, IV)) {
668 LoopIncs.push_back(cast(I));
669 continue;
670 }
671
672 // The root nodes must be either GEPs, ORs or ADDs.
673 if (auto *BO = dyn_cast(I)) {
674 if (BO->getOpcode() == Instruction::Add ||
675 BO->getOpcode() == Instruction::Or)
676 CI = dyn_cast(BO->getOperand(1));
677 } else if (auto *GEP = dyn_cast(I)) {
678 Value *LastOperand = GEP->getOperand(GEP->getNumOperands()-1);
679 CI = dyn_cast(LastOperand);
680 }
681
682 if (!CI) {
683 if (Instruction *II = dyn_cast(I)) {
684 BaseUsers.push_back(II);
685 continue;
686 } else {
687 DEBUG(dbgs() << "LRR: Aborting due to non-instruction: " << *I << "\n");
688 return false;
689 }
690 }
691
692 int64_t V = CI->getValue().getSExtValue();
693 if (Roots.find(V) != Roots.end())
694 // No duplicates, please.
695 return false;
696
697 // FIXME: Add support for negative values.
698 if (V < 0) {
699 DEBUG(dbgs() << "LRR: Aborting due to negative value: " << V << "\n");
700 return false;
701 }
702
703 Roots[V] = cast(I);
704 }
705
706 if (Roots.empty())
707 return false;
708
709 assert(Roots.find(0) == Roots.end() && "Didn't expect a zero index!");
710
711 // If we found non-loop-inc, non-root users of Base, assume they are
712 // for the zeroth root index. This is because "add %a, 0" gets optimized
713 // away.
714 if (BaseUsers.size())
715 Roots[0] = Base;
716
717 // Calculate the number of users of the base, or lowest indexed, iteration.
718 unsigned NumBaseUses = BaseUsers.size();
719 if (NumBaseUses == 0)
720 NumBaseUses = Roots.begin()->second->getNumUses();
721
722 // Check that every node has the same number of users.
723 for (auto &KV : Roots) {
724 if (KV.first == 0)
725 continue;
726 if (KV.second->getNumUses() != NumBaseUses) {
727 DEBUG(dbgs() << "LRR: Aborting - Root and Base #users not the same: "
728 << "#Base=" << NumBaseUses << ", #Root=" <<
729 KV.second->getNumUses() << "\n");
730 return false;
731 }
732 }
733
734 return true;
735 }
736
737 bool LoopReroll::DAGRootTracker::
738 findRootsRecursive(Instruction *I, SmallInstructionSet SubsumedInsts) {
739 // Does the user look like it could be part of a root set?
740 // All its users must be simple arithmetic ops.
741 if (I->getNumUses() > IL_MaxRerollIterations)
742 return false;
743
744 if ((I->getOpcode() == Instruction::Mul ||
745 I->getOpcode() == Instruction::PHI) &&
746 I != IV &&
747 findRootsBase(I, SubsumedInsts))
748 return true;
749
750 SubsumedInsts.insert(I);
751
752 for (User *V : I->users()) {
753 Instruction *I = dyn_cast(V);
754 if (std::find(LoopIncs.begin(), LoopIncs.end(), I) != LoopIncs.end())
755 continue;
756
757 if (!I || !isSimpleArithmeticOp(I) ||
758 !findRootsRecursive(I, SubsumedInsts))
759 return false;
760 }
761 return true;
762 }
763
764 bool LoopReroll::DAGRootTracker::
765 findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts) {
766
767 // The base instruction needs to be a multiply so
768 // that we can erase it.
769 if (IVU->getOpcode() != Instruction::Mul &&
770 IVU->getOpcode() != Instruction::PHI)
771 return false;
772
773 std::map V;
774 if (!collectPossibleRoots(IVU, V))
775 return false;
776
777 // If we didn't get a root for index zero, then IVU must be
778 // subsumed.
779 if (V.find(0) == V.end())
780 SubsumedInsts.insert(IVU);
781
782 // Partition the vector into monotonically increasing indexes.
783 DAGRootSet DRS;
784 DRS.BaseInst = nullptr;
785
786 for (auto &KV : V) {
787 if (!DRS.BaseInst) {
788 DRS.BaseInst = KV.second;
789 DRS.SubsumedInsts = SubsumedInsts;
790 } else if (DRS.Roots.empty()) {
791 DRS.Roots.push_back(KV.second);
792 } else if (V.find(KV.first - 1) != V.end()) {
793 DRS.Roots.push_back(KV.second);
794 } else {
795 // Linear sequence terminated.
796 RootSets.push_back(DRS);
797 DRS.BaseInst = KV.second;
798 DRS.SubsumedInsts = SubsumedInsts;
799 DRS.Roots.clear();
800 }
801 }
802 RootSets.push_back(DRS);
803
804 return true;
805 }
806
588807 bool LoopReroll::DAGRootTracker::findRoots() {
589808
590809 const SCEVAddRecExpr *RealIVSCEV = cast(SE->getSCEV(IV));
591810 Inc = cast(RealIVSCEV->getOperand(1))->
592811 getValue()->getZExtValue();
593812
594 // The effective induction variable, IV, is normally also the real induction
595 // variable. When we're dealing with a loop like:
596 // for (int i = 0; i < 500; ++i)
597 // x[3*i] = ...;
598 // x[3*i+1] = ...;
599 // x[3*i+2] = ...;
600 // then the real IV is still i, but the effective IV is (3*i).
601 Scale = Inc;
602 RealIV = IV;
603 if (Inc == 1 && !findScaleFromMul())
813 assert(RootSets.empty() && "Unclean state!");
814 if (Inc == 1) {
815 for (auto *IVU : IV->users()) {
816 if (isLoopIncrement(IVU, IV))
817 LoopIncs.push_back(cast(IVU));
818 }
819 if (!findRootsRecursive(IV, SmallInstructionSet()))
820 return false;
821 LoopIncs.push_back(IV);
822 } else {
823 if (!findRootsBase(IV, SmallInstructionSet()))
824 return false;
825 }
826
827 // Ensure all sets have the same size.
828 if (RootSets.empty()) {
829 DEBUG(dbgs() << "LRR: Aborting because no root sets found!\n");
604830 return false;
605
606 // The set of increment instructions for each increment value.
607 if (!collectAllRoots())
608 return false;
609
610 if (Roots.size() > IL_MaxRerollIterations) {
831 }
832 for (auto &V : RootSets) {
833 if (V.Roots.empty() || V.Roots.size() != RootSets[0].Roots.size()) {
834 DEBUG(dbgs()
835 << "LRR: Aborting because not all root sets have the same size\n");
836 return false;
837 }
838 }
839
840 // And ensure all loop iterations are consecutive. We rely on std::map
841 // providing ordered traversal.
842 for (auto &V : RootSets) {
843 const auto *ADR = dyn_cast(SE->getSCEV(V.BaseInst));
844 if (!ADR)
845 return false;
846
847 // Consider a DAGRootSet with N-1 roots (so N different values including
848 // BaseInst).
849 // Define d = Roots[0] - BaseInst, which should be the same as
850 // Roots[I] - Roots[I-1] for all I in [1..N).
851 // Define D = BaseInst@J - BaseInst@J-1, where "@J" means the value at the
852 // loop iteration J.
853 //
854 // Now, For the loop iterations to be consecutive:
855 // D = d * N
856
857 unsigned N = V.Roots.size() + 1;
858 const SCEV *StepSCEV = SE->getMinusSCEV(SE->getSCEV(V.Roots[0]), ADR);
859 const SCEV *ScaleSCEV = SE->getConstant(StepSCEV->getType(), N);
860 if (ADR->getStepRecurrence(*SE) != SE->getMulExpr(StepSCEV, ScaleSCEV)) {
861 DEBUG(dbgs() << "LRR: Aborting because iterations are not consecutive\n");
862 return false;
863 }
864 }
865 Scale = RootSets[0].Roots.size() + 1;
866
867 if (Scale > IL_MaxRerollIterations) {
611868 DEBUG(dbgs() << "LRR: Aborting - too many iterations found. "
612 << "#Found=" << Roots.size() << ", #Max=" << IL_MaxRerollIterations
869 << "#Found=" << Scale << ", #Max=" << IL_MaxRerollIterations
613870 << "\n");
614871 return false;
615872 }
616873
617 return true;
618 }
619
620 // Recognize loops that are setup like this:
621 //
622 // %iv = phi [ (preheader, ...), (body, %iv.next) ]
623 // %scaled.iv = mul %iv, scale
624 // f(%scaled.iv)
625 // %scaled.iv.1 = add %scaled.iv, 1
626 // f(%scaled.iv.1)
627 // %scaled.iv.2 = add %scaled.iv, 2
628 // f(%scaled.iv.2)
629 // %scaled.iv.scale_m_1 = add %scaled.iv, scale-1
630 // f(%scaled.iv.scale_m_1)
631 // ...
632 // %iv.next = add %iv, 1
633 // %cmp = icmp(%iv, ...)
634 // br %cmp, header, exit
635 //
636 // and, if found, set IV = %scaled.iv, and add %iv.next to LoopIncs.
637 bool LoopReroll::DAGRootTracker::findScaleFromMul() {
638
639 // This is a special case: here we're looking for all uses (except for
640 // the increment) to be multiplied by a common factor. The increment must
641 // be by one. This is to capture loops like:
642 // for (int i = 0; i < 500; ++i) {
643 // foo(3*i); foo(3*i+1); foo(3*i+2);
644 // }
645 if (RealIV->getNumUses() != 2)
646 return false;
647 const SCEVAddRecExpr *RealIVSCEV = cast(SE->getSCEV(RealIV));
648 Instruction *User1 = cast(*RealIV->user_begin()),
649 *User2 = cast(*std::next(RealIV->user_begin()));
650 if (!SE->isSCEVable(User1->getType()) || !SE->isSCEVable(User2->getType()))
651 return false;
652 const SCEVAddRecExpr *User1SCEV =
653 dyn_cast(SE->getSCEV(User1)),
654 *User2SCEV =
655 dyn_cast(SE->getSCEV(User2));
656 if (!User1SCEV || !User1SCEV->isAffine() ||
657 !User2SCEV || !User2SCEV->isAffine())
658 return false;
659
660 // We assume below that User1 is the scale multiply and User2 is the
661 // increment. If this can't be true, then swap them.
662 if (User1SCEV == RealIVSCEV->getPostIncExpr(*SE)) {
663 std::swap(User1, User2);
664 std::swap(User1SCEV, User2SCEV);
665 }
666
667 if (User2SCEV != RealIVSCEV->getPostIncExpr(*SE))
668 return false;
669 assert(User2SCEV->getStepRecurrence(*SE)->isOne() &&
670 "Invalid non-unit step for multiplicative scaling");
671 LoopIncs.push_back(User2);
672
673 if (const SCEVConstant *MulScale =
674 dyn_cast(User1SCEV->getStepRecurrence(*SE))) {
675 // Make sure that both the start and step have the same multiplier.
676 if (RealIVSCEV->getStart()->getType() != MulScale->getType())
677 return false;
678 if (SE->getMulExpr(RealIVSCEV->getStart(), MulScale) !=
679 User1SCEV->getStart())
680 return false;
681
682 ConstantInt *MulScaleCI = MulScale->getValue();
683 if (!MulScaleCI->uge(2) || MulScaleCI->uge(MaxInc))
684 return false;
685 Scale = MulScaleCI->getZExtValue();
686 IV = User1;
687 } else
688 return false;
689
690 DEBUG(dbgs() << "LRR: Found possible scaling " << *User1 << "\n");
691
692 assert(Scale <= MaxInc && "Scale is too large");
693 assert(Scale > 1 && "Scale must be at least 2");
694
695 return true;
696 }
697
698 // Collect all root increments with respect to the provided induction variable
699 // (normally the PHI, but sometimes a multiply). A root increment is an
700 // instruction, normally an add, with a positive constant less than Scale. In a
701 // rerollable loop, each of these increments is the root of an instruction
702 // graph isomorphic to the others. Also, we collect the final induction
703 // increment (the increment equal to the Scale), and its users in LoopIncs.
704 bool LoopReroll::DAGRootTracker::collectAllRoots() {
705 Roots.resize(Scale-1);
706
707 for (User *U : IV->users()) {
708 Instruction *UI = cast(U);
709 if (!SE->isSCEVable(UI->getType()))
710 continue;
711 if (UI->getType() != IV->getType())
712 continue;
713 if (!L->contains(UI))
714 continue;
715 if (hasUsesOutsideLoop(UI, L))
716 continue;
717
718 if (const SCEVConstant *Diff = dyn_cast(SE->getMinusSCEV(
719 SE->getSCEV(UI), SE->getSCEV(IV)))) {
720 uint64_t Idx = Diff->getValue()->getValue().getZExtValue();
721 if (Idx > 0 && Idx < Scale) {
722 if (Roots[Idx-1])
723 // No duplicates allowed.
724 return false;
725 Roots[Idx-1] = UI;
726 } else if (Idx == Scale && Inc > 1) {
727 LoopIncs.push_back(UI);
728 }
729 }
730 }
731
732 for (unsigned i = 0; i < Scale-1; ++i) {
733 if (!Roots[i])
734 return false;
735 }
874 DEBUG(dbgs() << "LRR: Successfully found roots: Scale=" << Scale << "\n");
736875
737876 return true;
738877 }
745884 }
746885
747886 SmallInstructionSet Exclude;
748 Exclude.insert(Roots.begin(), Roots.end());
887 for (auto &DRS : RootSets) {
888 Exclude.insert(DRS.Roots.begin(), DRS.Roots.end());
889 Exclude.insert(DRS.SubsumedInsts.begin(), DRS.SubsumedInsts.end());
890 Exclude.insert(DRS.BaseInst);
891 }
749892 Exclude.insert(LoopIncs.begin(), LoopIncs.end());
750893
751 DenseSet VBase;
752 collectInLoopUserSet(IV, Exclude, PossibleRedSet, VBase);
753 for (auto *I : VBase) {
754 Uses[I].set(0);
755 }
756
757 unsigned Idx = 1;
758 for (auto *Root : Roots) {
759 DenseSet V;
760 collectInLoopUserSet(Root, Exclude, PossibleRedSet, V);
761
762 // While we're here, check the use sets are the same size.
763 if (V.size() != VBase.size()) {
764 DEBUG(dbgs() << "LRR: Aborting - use sets are different sizes\n");
765 return false;
766 }
767
768 for (auto *I : V) {
769 Uses[I].set(Idx);
770 }
771 ++Idx;
894 for (auto &DRS : RootSets) {
895 DenseSet VBase;
896 collectInLoopUserSet(DRS.BaseInst, Exclude, PossibleRedSet, VBase);
897 for (auto *I : VBase) {
898 Uses[I].set(0);
899 }
900
901 unsigned Idx = 1;
902 for (auto *Root : DRS.Roots) {
903 DenseSet V;
904 collectInLoopUserSet(Root, Exclude, PossibleRedSet, V);
905
906 // While we're here, check the use sets are the same size.
907 if (V.size() != VBase.size()) {
908 DEBUG(dbgs() << "LRR: Aborting - use sets are different sizes\n");
909 return false;
910 }
911
912 for (auto *I : V) {
913 Uses[I].set(Idx);
914 }
915 ++Idx;
916 }
917
918 // Make sure our subsumed instructions are remembered too.
919 for (auto *I : DRS.SubsumedInsts) {
920 Uses[I].set(IL_All);
921 }
772922 }
773923
774924 // Make sure the loop increments are also accounted for.
925
775926 Exclude.clear();
776 Exclude.insert(Roots.begin(), Roots.end());
927 for (auto &DRS : RootSets) {
928 Exclude.insert(DRS.Roots.begin(), DRS.Roots.end());
929 Exclude.insert(DRS.SubsumedInsts.begin(), DRS.SubsumedInsts.end());
930 Exclude.insert(DRS.BaseInst);
931 }
777932
778933 DenseSet V;
779934 collectInLoopUserSet(LoopIncs, Exclude, PossibleRedSet, V);
780935 for (auto *I : V) {
781 Uses[I].set(IL_LoopIncIdx);
782 }
783 if (IV != RealIV)
784 Uses[RealIV].set(IL_LoopIncIdx);
936 Uses[I].set(IL_All);
937 }
785938
786939 return true;
787940
793946 while (I != In.end() && I->second.test(Val) == 0)
794947 ++I;
795948 return I;
949 }
950
951 bool LoopReroll::DAGRootTracker::isBaseInst(Instruction *I) {
952 for (auto &DRS : RootSets) {
953 if (DRS.BaseInst == I)
954 return true;
955 }
956 return false;
957 }
958
959 bool LoopReroll::DAGRootTracker::isRootInst(Instruction *I) {
960 for (auto &DRS : RootSets) {
961 if (std::find(DRS.Roots.begin(), DRS.Roots.end(), I) != DRS.Roots.end())
962 return true;
963 }
964 return false;
796965 }
797966
798967 bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) {
822991
823992 // Make sure we mark the reduction PHIs as used in all iterations.
824993 for (auto *I : PossibleRedPHISet) {
825 Uses[I].set(IL_LoopIncIdx);
994 Uses[I].set(IL_All);
826995 }
827996
828997 // Make sure all instructions in the loop are in one and only one
8621031
8631032 // Skip over the IV or root instructions; only match their users.
8641033 bool Continue = false;
865 if (BaseInst == RealIV || BaseInst == IV) {
1034 if (isBaseInst(BaseInst)) {
8661035 BaseIt = nextInstr(0, Uses, ++BaseIt);
8671036 Continue = true;
8681037 }
869 if (std::find(Roots.begin(), Roots.end(), RootInst) != Roots.end()) {
1038 if (isRootInst(RootInst)) {
8701039 LastRootIt = RootIt;
8711040 RootIt = nextInstr(Iter, Uses, ++RootIt);
8721041 Continue = true;
9601129 continue;
9611130
9621131 DenseMap::iterator BMI = BaseMap.find(Op2);
963 if (BMI != BaseMap.end())
1132 if (BMI != BaseMap.end()) {
9641133 Op2 = BMI->second;
965 else if (Roots[Iter-1] == (Instruction*) Op2)
966 Op2 = IV;
1134 } else {
1135 for (auto &DRS : RootSets) {
1136 if (DRS.Roots[Iter-1] == (Instruction*) Op2) {
1137 Op2 = DRS.BaseInst;
1138 break;
1139 }
1140 }
1141 }
9671142
9681143 if (BaseInst->getOperand(Swapped ? unsigned(!j) : j) != Op2) {
9691144 // If we've not already decided to swap the matched operands, and
10061181 }
10071182
10081183 DEBUG(dbgs() << "LRR: Matched all iteration increments for " <<
1009 *RealIV << "\n");
1184 *IV << "\n");
10101185
10111186 return true;
10121187 }
10171192 for (BasicBlock::reverse_iterator J = Header->rbegin();
10181193 J != Header->rend();) {
10191194 unsigned I = Uses[&*J].find_first();
1020 if (I > 0 && I < IL_LoopIncIdx) {
1195 if (I > 0 && I < IL_All) {
10211196 Instruction *D = &*J;
10221197 DEBUG(dbgs() << "LRR: removing: " << *D << "\n");
10231198 D->eraseFromParent();
10271202 ++J;
10281203 }
10291204
1030 // Insert the new induction variable.
1031 const SCEVAddRecExpr *RealIVSCEV = cast(SE->getSCEV(RealIV));
1032 const SCEV *Start = RealIVSCEV->getStart();
1033 if (Inc == 1)
1034 Start = SE->getMulExpr(Start,
1035 SE->getConstant(Start->getType(), Scale));
1036 const SCEVAddRecExpr *H =
1037 cast(SE->getAddRecExpr(Start,
1038 SE->getConstant(RealIVSCEV->getType(), 1),
1039 L, SCEV::FlagAnyWrap));
1040 { // Limit the lifetime of SCEVExpander.
1041 SCEVExpander Expander(*SE, "reroll");
1042 Value *NewIV = Expander.expandCodeFor(H, IV->getType(), Header->begin());
1043
1044 for (auto &KV : Uses) {
1045 if (KV.second.find_first() == 0)
1046 KV.first->replaceUsesOfWith(IV, NewIV);
1047 }
1048
1049 if (BranchInst *BI = dyn_cast(Header->getTerminator())) {
1050 // FIXME: Why do we need this check?
1051 if (Uses[BI].find_first() == IL_LoopIncIdx) {
1052 const SCEV *ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE);
1053 if (Inc == 1)
1054 ICSCEV =
1055 SE->getMulExpr(ICSCEV, SE->getConstant(ICSCEV->getType(), Scale));
1056 // Iteration count SCEV minus 1
1057 const SCEV *ICMinus1SCEV =
1058 SE->getMinusSCEV(ICSCEV, SE->getConstant(ICSCEV->getType(), 1));
1059
1060 Value *ICMinus1; // Iteration count minus 1
1061 if (isa(ICMinus1SCEV)) {
1062 ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(), BI);
1063 } else {
1064 BasicBlock *Preheader = L->getLoopPreheader();
1065 if (!Preheader)
1066 Preheader = InsertPreheaderForLoop(L, Parent);
1067
1068 ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(),
1069 Preheader->getTerminator());
1205 // We need to create a new induction variable for each different BaseInst.
1206 for (auto &DRS : RootSets) {
1207 // Insert the new induction variable.
1208 const SCEVAddRecExpr *RealIVSCEV =
1209 cast(SE->getSCEV(DRS.BaseInst));
1210 const SCEV *Start = RealIVSCEV->getStart();
1211 const SCEVAddRecExpr *H = cast
1212 (SE->getAddRecExpr(Start,
1213 SE->getConstant(RealIVSCEV->getType(), 1),
1214 L, SCEV::FlagAnyWrap));
1215 { // Limit the lifetime of SCEVExpander.
1216 SCEVExpander Expander(*SE, "reroll");
1217 Value *NewIV = Expander.expandCodeFor(H, IV->getType(), Header->begin());
1218
1219 for (auto &KV : Uses) {
1220 if (KV.second.find_first() == 0)
1221 KV.first->replaceUsesOfWith(DRS.BaseInst, NewIV);
1222 }
1223
1224 if (BranchInst *BI = dyn_cast(Header->getTerminator())) {
1225 // FIXME: Why do we need this check?
1226 if (Uses[BI].find_first() == IL_All) {
1227 const SCEV *ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE);
1228
1229 // Iteration count SCEV minus 1
1230 const SCEV *ICMinus1SCEV =
1231 SE->getMinusSCEV(ICSCEV, SE->getConstant(ICSCEV->getType(), 1));
1232
1233 Value *ICMinus1; // Iteration count minus 1
1234 if (isa(ICMinus1SCEV)) {
1235 ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(), BI);
1236 } else {
1237 BasicBlock *Preheader = L->getLoopPreheader();
1238 if (!Preheader)
1239 Preheader = InsertPreheaderForLoop(L, Parent);
1240
1241 ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(),
1242 Preheader->getTerminator());
1243 }
1244
1245 Value *Cond =
1246 new ICmpInst(BI, CmpInst::ICMP_EQ, NewIV, ICMinus1, "exitcond");
1247 BI->setCondition(Cond);
1248
1249 if (BI->getSuccessor(1) != Header)
1250 BI->swapSuccessors();
10701251 }
1071
1072 Value *Cond =
1073 new ICmpInst(BI, CmpInst::ICMP_EQ, NewIV, ICMinus1, "exitcond");
1074 BI->setCondition(Cond);
1075
1076 if (BI->getSuccessor(1) != Header)
1077 BI->swapSuccessors();
10781252 }
10791253 }
10801254 }
321321 ret void
322322 }
323323
324 ; void multi1(int *x) {
325 ; y = foo(0)
326 ; for (int i = 0; i < 500; ++i) {
327 ; x[3*i] = y;
328 ; x[3*i+1] = y;
329 ; x[3*i+2] = y;
330 ; x[3*i+6] = y;
331 ; x[3*i+7] = y;
332 ; x[3*i+8] = y;
333 ; }
334 ; }
335
336 ; Function Attrs: nounwind uwtable
337 define void @multi1(i32* nocapture %x) #0 {
338 entry:
339 %call = tail call i32 @foo(i32 0) #1
340 br label %for.body
341
342 for.body: ; preds = %for.body, %entry
343 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
344 %0 = mul nsw i64 %indvars.iv, 3
345 %arrayidx = getelementptr inbounds i32* %x, i64 %0
346 store i32 %call, i32* %arrayidx, align 4
347 %1 = add nsw i64 %0, 1
348 %arrayidx4 = getelementptr inbounds i32* %x, i64 %1
349 store i32 %call, i32* %arrayidx4, align 4
350 %2 = add nsw i64 %0, 2
351 %arrayidx9 = getelementptr inbounds i32* %x, i64 %2
352 store i32 %call, i32* %arrayidx9, align 4
353 %3 = add nsw i64 %0, 6
354 %arrayidx6 = getelementptr inbounds i32* %x, i64 %3
355 store i32 %call, i32* %arrayidx6, align 4
356 %4 = add nsw i64 %0, 7
357 %arrayidx7 = getelementptr inbounds i32* %x, i64 %4
358 store i32 %call, i32* %arrayidx7, align 4
359 %5 = add nsw i64 %0, 8
360 %arrayidx8 = getelementptr inbounds i32* %x, i64 %5
361 store i32 %call, i32* %arrayidx8, align 4
362 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
363 %exitcond = icmp eq i64 %indvars.iv.next, 500
364 br i1 %exitcond, label %for.end, label %for.body
365
366 ; CHECK-LABEL: @multi1
367
368 ; CHECK:for.body:
369 ; CHECK: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
370 ; CHECK: %0 = add i64 %indvars.iv, 6
371 ; CHECK: %arrayidx = getelementptr inbounds i32* %x, i64 %indvars.iv
372 ; CHECK: store i32 %call, i32* %arrayidx, align 4
373 ; CHECK: %arrayidx6 = getelementptr inbounds i32* %x, i64 %0
374 ; CHECK: store i32 %call, i32* %arrayidx6, align 4
375 ; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
376 ; CHECK: %exitcond2 = icmp eq i64 %0, 1505
377 ; CHECK: br i1 %exitcond2, label %for.end, label %for.body
378
379 for.end: ; preds = %for.body
380 ret void
381 }
382
383 ; void multi2(int *x) {
384 ; y = foo(0)
385 ; for (int i = 0; i < 500; ++i) {
386 ; x[3*i] = y;
387 ; x[3*i+1] = y;
388 ; x[3*i+2] = y;
389 ; x[3*(i+1)] = y;
390 ; x[3*(i+1)+1] = y;
391 ; x[3*(i+1)+2] = y;
392 ; }
393 ; }
394
395 ; Function Attrs: nounwind uwtable
396 define void @multi2(i32* nocapture %x) #0 {
397 entry:
398 %call = tail call i32 @foo(i32 0) #1
399 br label %for.body
400
401 for.body: ; preds = %for.body, %entry
402 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
403 %0 = mul nsw i64 %indvars.iv, 3
404 %add = add nsw i64 %indvars.iv, 1
405 %newmul = mul nsw i64 %add, 3
406 %arrayidx = getelementptr inbounds i32* %x, i64 %0
407 store i32 %call, i32* %arrayidx, align 4
408 %1 = add nsw i64 %0, 1
409 %arrayidx4 = getelementptr inbounds i32* %x, i64 %1
410 store i32 %call, i32* %arrayidx4, align 4
411 %2 = add nsw i64 %0, 2
412 %arrayidx9 = getelementptr inbounds i32* %x, i64 %2
413 store i32 %call, i32* %arrayidx9, align 4
414 %arrayidx6 = getelementptr inbounds i32* %x, i64 %newmul
415 store i32 %call, i32* %arrayidx6, align 4
416 %3 = add nsw i64 %newmul, 1
417 %arrayidx7 = getelementptr inbounds i32* %x, i64 %3
418 store i32 %call, i32* %arrayidx7, align 4
419 %4 = add nsw i64 %newmul, 2
420 %arrayidx8 = getelementptr inbounds i32* %x, i64 %4
421 store i32 %call, i32* %arrayidx8, align 4
422 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
423 %exitcond = icmp eq i64 %indvars.iv.next, 500
424 br i1 %exitcond, label %for.end, label %for.body
425
426 ; CHECK-LABEL: @multi2
427
428 ; CHECK:for.body:
429 ; CHECK: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
430 ; CHECK: %0 = add i64 %indvars.iv, 3
431 ; CHECK: %arrayidx = getelementptr inbounds i32* %x, i64 %indvars.iv
432 ; CHECK: store i32 %call, i32* %arrayidx, align 4
433 ; CHECK: %arrayidx6 = getelementptr inbounds i32* %x, i64 %0
434 ; CHECK: store i32 %call, i32* %arrayidx6, align 4
435 ; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
436 ; CHECK: %exitcond2 = icmp eq i64 %indvars.iv, 1499
437 ; CHECK: br i1 %exitcond2, label %for.end, label %for.body
438
439 for.end: ; preds = %for.body
440 ret void
441 }
442
443 ; void multi3(int *x) {
444 ; y = foo(0)
445 ; for (int i = 0; i < 500; ++i) {
446 ; // Note: No zero index
447 ; x[3*i+3] = y;
448 ; x[3*i+4] = y;
449 ; x[3*i+5] = y;
450 ; }
451 ; }
452
453 ; Function Attrs: nounwind uwtable
454 define void @multi3(i32* nocapture %x) #0 {
455 entry:
456 %call = tail call i32 @foo(i32 0) #1
457 br label %for.body
458
459 for.body: ; preds = %for.body, %entry
460 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
461 %0 = mul nsw i64 %indvars.iv, 3
462 %x0 = add nsw i64 %0, 3
463 %add = add nsw i64 %indvars.iv, 1
464 %arrayidx = getelementptr inbounds i32* %x, i64 %x0
465 store i32 %call, i32* %arrayidx, align 4
466 %1 = add nsw i64 %0, 4
467 %arrayidx4 = getelementptr inbounds i32* %x, i64 %1
468 store i32 %call, i32* %arrayidx4, align 4
469 %2 = add nsw i64 %0, 5
470 %arrayidx9 = getelementptr inbounds i32* %x, i64 %2
471 store i32 %call, i32* %arrayidx9, align 4
472 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
473 %exitcond = icmp eq i64 %indvars.iv.next, 500
474 br i1 %exitcond, label %for.end, label %for.body
475
476 ; CHECK-LABEL: @multi3
477 ; CHECK: for.body:
478 ; CHECK: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
479 ; CHECK: %0 = add i64 %indvars.iv, 3
480 ; CHECK: %arrayidx = getelementptr inbounds i32* %x, i64 %0
481 ; CHECK: store i32 %call, i32* %arrayidx, align 4
482 ; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
483 ; CHECK: %exitcond1 = icmp eq i64 %0, 1502
484 ; CHECK: br i1 %exitcond1, label %for.end, label %for.body
485
486 for.end: ; preds = %for.body
487 ret void
488 }
489
490
324491 attributes #0 = { nounwind uwtable }
325492 attributes #1 = { nounwind }
326493