llvm.org GIT mirror llvm / 1e10f37
[LoopReroll] Make root-finding more aggressive. Allow using an instruction other than a mul or phi as the base for root-finding. For example, the included testcase includes a loop which requires using a getelementptr as the base for root-finding. Differential Revision: https://reviews.llvm.org/D26529 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@287588 91177308-0d34-0410-b5e6-96231b3b80d8 Eli Friedman 2 years ago
2 changed file(s) with 91 addition(s) and 52 deletion(s). Raw diff Collapse all Expand all
370370 protected:
371371 typedef MapVector UsesTy;
372372
373 bool findRootsRecursive(Instruction *IVU,
373 void findRootsRecursive(Instruction *IVU,
374374 SmallInstructionSet SubsumedInsts);
375375 bool findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts);
376376 bool collectPossibleRoots(Instruction *Base,
377377 std::map &Roots);
378 bool validateRootSet(DAGRootSet &DRS);
378379
379380 bool collectUsedInstructions(SmallInstructionSet &PossibleRedSet);
380381 void collectInLoopUserSet(const SmallInstructionVector &Roots,
826827 Roots[V] = cast(I);
827828 }
828829
829 if (Roots.empty())
830 // Make sure we have at least two roots.
831 if (Roots.empty() || (Roots.size() == 1 && BaseUsers.empty()))
830832 return false;
831833
832834 // If we found non-loop-inc, non-root users of Base, assume they are
860862 return true;
861863 }
862864
863 bool LoopReroll::DAGRootTracker::
865 void LoopReroll::DAGRootTracker::
864866 findRootsRecursive(Instruction *I, SmallInstructionSet SubsumedInsts) {
865867 // Does the user look like it could be part of a root set?
866868 // All its users must be simple arithmetic ops.
867869 if (I->getNumUses() > IL_MaxRerollIterations)
868 return false;
869
870 if ((I->getOpcode() == Instruction::Mul ||
871 I->getOpcode() == Instruction::PHI) &&
872 I != IV &&
873 findRootsBase(I, SubsumedInsts))
874 return true;
870 return;
871
872 if (I != IV && findRootsBase(I, SubsumedInsts))
873 return;
875874
876875 SubsumedInsts.insert(I);
877876
878877 for (User *V : I->users()) {
879 Instruction *I = dyn_cast(V);
878 Instruction *I = cast(V);
880879 if (is_contained(LoopIncs, I))
881880 continue;
882881
883 if (!I || !isSimpleArithmeticOp(I) ||
884 !findRootsRecursive(I, SubsumedInsts))
885 return false;
886 }
882 if (!isSimpleArithmeticOp(I))
883 continue;
884
885 // The recursive call makes a copy of SubsumedInsts.
886 findRootsRecursive(I, SubsumedInsts);
887 }
888 }
889
890 bool LoopReroll::DAGRootTracker::validateRootSet(DAGRootSet &DRS) {
891 if (DRS.Roots.empty())
892 return false;
893
894 // Consider a DAGRootSet with N-1 roots (so N different values including
895 // BaseInst).
896 // Define d = Roots[0] - BaseInst, which should be the same as
897 // Roots[I] - Roots[I-1] for all I in [1..N).
898 // Define D = BaseInst@J - BaseInst@J-1, where "@J" means the value at the
899 // loop iteration J.
900 //
901 // Now, For the loop iterations to be consecutive:
902 // D = d * N
903 const auto *ADR = dyn_cast(SE->getSCEV(DRS.BaseInst));
904 if (!ADR)
905 return false;
906 unsigned N = DRS.Roots.size() + 1;
907 const SCEV *StepSCEV = SE->getMinusSCEV(SE->getSCEV(DRS.Roots[0]), ADR);
908 const SCEV *ScaleSCEV = SE->getConstant(StepSCEV->getType(), N);
909 if (ADR->getStepRecurrence(*SE) != SE->getMulExpr(StepSCEV, ScaleSCEV))
910 return false;
911
887912 return true;
888913 }
889914
890915 bool LoopReroll::DAGRootTracker::
891916 findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts) {
892
893 // The base instruction needs to be a multiply so
894 // that we can erase it.
895 if (IVU->getOpcode() != Instruction::Mul &&
896 IVU->getOpcode() != Instruction::PHI)
917 // The base of a RootSet must be an AddRec, so it can be erased.
918 const auto *IVU_ADR = dyn_cast(SE->getSCEV(IVU));
919 if (!IVU_ADR || IVU_ADR->getLoop() != L)
897920 return false;
898921
899922 std::map V;
908931 // Partition the vector into monotonically increasing indexes.
909932 DAGRootSet DRS;
910933 DRS.BaseInst = nullptr;
934
935 SmallVector PotentialRootSets;
911936
912937 for (auto &KV : V) {
913938 if (!DRS.BaseInst) {
919944 DRS.Roots.push_back(KV.second);
920945 } else {
921946 // Linear sequence terminated.
922 RootSets.push_back(DRS);
947 if (!validateRootSet(DRS))
948 return false;
949
950 // Construct a new DAGRootSet with the next sequence.
951 PotentialRootSets.push_back(DRS);
923952 DRS.BaseInst = KV.second;
924 DRS.SubsumedInsts = SubsumedInsts;
925953 DRS.Roots.clear();
926954 }
927955 }
928 RootSets.push_back(DRS);
956
957 if (!validateRootSet(DRS))
958 return false;
959
960 PotentialRootSets.push_back(DRS);
961
962 RootSets.append(PotentialRootSets.begin(), PotentialRootSets.end());
929963
930964 return true;
931965 }
939973 if (isLoopIncrement(IVU, IV))
940974 LoopIncs.push_back(cast(IVU));
941975 }
942 if (!findRootsRecursive(IV, SmallInstructionSet()))
943 return false;
976 findRootsRecursive(IV, SmallInstructionSet());
944977 LoopIncs.push_back(IV);
945978 } else {
946979 if (!findRootsBase(IV, SmallInstructionSet()))
960993 }
961994 }
962995
963 // And ensure all loop iterations are consecutive. We rely on std::map
964 // providing ordered traversal.
965 for (auto &V : RootSets) {
966 const auto *ADR = dyn_cast(SE->getSCEV(V.BaseInst));
967 if (!ADR)
968 return false;
969
970 // Consider a DAGRootSet with N-1 roots (so N different values including
971 // BaseInst).
972 // Define d = Roots[0] - BaseInst, which should be the same as
973 // Roots[I] - Roots[I-1] for all I in [1..N).
974 // Define D = BaseInst@J - BaseInst@J-1, where "@J" means the value at the
975 // loop iteration J.
976 //
977 // Now, For the loop iterations to be consecutive:
978 // D = d * N
979
980 unsigned N = V.Roots.size() + 1;
981 const SCEV *StepSCEV = SE->getMinusSCEV(SE->getSCEV(V.Roots[0]), ADR);
982 const SCEV *ScaleSCEV = SE->getConstant(StepSCEV->getType(), N);
983 if (ADR->getStepRecurrence(*SE) != SE->getMulExpr(StepSCEV, ScaleSCEV)) {
984 DEBUG(dbgs() << "LRR: Aborting because iterations are not consecutive\n");
985 return false;
986 }
987 }
988996 Scale = RootSets[0].Roots.size() + 1;
989997
990998 if (Scale > IL_MaxRerollIterations) {
14971505 { // Limit the lifetime of SCEVExpander.
14981506 const DataLayout &DL = Header->getModule()->getDataLayout();
14991507 SCEVExpander Expander(*SE, DL, "reroll");
1500 Value *NewIV =
1501 Expander.expandCodeFor(NewIVSCEV, InstIV->getType(), &Header->front());
1508 Value *NewIV = Expander.expandCodeFor(NewIVSCEV, Inst->getType(),
1509 Header->getFirstNonPHIOrDbg());
15021510
15031511 for (auto &KV : Uses)
15041512 if (KV.second.find_first() == 0)
574574 ret void
575575 }
576576
577 define void @gep-indexing(i32* nocapture %x) {
578 entry:
579 %call = tail call i32 @foo(i32 0) #1
580 br label %for.body
581
582 for.body: ; preds = %for.body, %entry
583 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
584 %0 = mul nsw i64 %indvars.iv, 3
585 %arrayidx = getelementptr inbounds i32, i32* %x, i64 %0
586 store i32 %call, i32* %arrayidx, align 4
587 %arrayidx4 = getelementptr inbounds i32, i32* %arrayidx, i64 1
588 store i32 %call, i32* %arrayidx4, align 4
589 %arrayidx9 = getelementptr inbounds i32, i32* %arrayidx, i64 2
590 store i32 %call, i32* %arrayidx9, align 4
591 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
592 %exitcond = icmp eq i64 %indvars.iv.next, 500
593 br i1 %exitcond, label %for.end, label %for.body
594
595 ; CHECK-LABEL: @gep-indexing
596 ; CHECK: for.body:
597 ; CHECK-NEXT: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
598 ; CHECK-NEXT: %scevgep = getelementptr i32, i32* %x, i64 %indvars.iv
599 ; CHECK-NEXT: store i32 %call, i32* %scevgep, align 4
600 ; CHECK-NEXT: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
601 ; CHECK-NEXT: %exitcond2 = icmp eq i32* %scevgep, %scevgep1
602 ; CHECK-NEXT: br i1 %exitcond2, label %for.end, label %for.body
603
604 for.end: ; preds = %for.body
605 ret void
606 }
607
577608
578609 define void @unordered_atomic_ops(i32* noalias %buf_0, i32* noalias %buf_1) {
579610 ; CHECK-LABEL: @unordered_atomic_ops(