llvm.org GIT mirror llvm / e32bfe2
[SCEV] Try to reuse existing value during SCEV expansion Current SCEV expansion will expand SCEV as a sequence of operations and doesn't utilize the value already existed. This will introduce redundent computation which may not be cleaned up throughly by following optimizations. This patch introduces an ExprValueMap which is a map from SCEV to the set of equal values with the same SCEV. When a SCEV is expanded, the set of values is checked and reused whenever possible before generating a sequence of operations. Differential Revision: http://reviews.llvm.org/D12090 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@259662 91177308-0d34-0410-b5e6-96231b3b80d8 Wei Mi 3 years ago
12 changed file(s) with 205 addition(s) and 30 deletion(s). Raw diff Collapse all Expand all
2222
2323 #include "llvm/ADT/DenseSet.h"
2424 #include "llvm/ADT/FoldingSet.h"
25 #include "llvm/ADT/SetVector.h"
2526 #include "llvm/Analysis/LoopInfo.h"
2627 #include "llvm/IR/ConstantRange.h"
2728 #include "llvm/IR/Function.h"
381382 /// This SCEV is used to represent unknown trip counts and things.
382383 std::unique_ptr CouldNotCompute;
383384
385 /// HasRecMapType - The typedef for HasRecMap.
386 ///
387 typedef DenseMap HasRecMapType;
388
389 /// HasRecMap -- This is a cache to record whether a SCEV contains
390 /// any scAddRecExpr.
391 HasRecMapType HasRecMap;
392
393 /// ExprValueMapType - The typedef for ExprValueMap.
394 ///
395 typedef DenseMap> ExprValueMapType;
396
397 /// ExprValueMap -- This map records the original values from which
398 /// the SCEV expr is generated from.
399 ExprValueMapType ExprValueMap;
400
384401 /// The typedef for ValueExprMap.
385402 ///
386403 typedef DenseMap >
819836 /// represents how SCEV will treat the given type, for which isSCEVable must
820837 /// return true. For pointer types, this is the pointer-sized integer type.
821838 Type *getEffectiveSCEVType(Type *Ty) const;
839
840 /// containsAddRecurrence - Return true if the SCEV is a scAddRecExpr or
841 /// it contains scAddRecExpr. The result will be cached in HasRecMap.
842 ///
843 bool containsAddRecurrence(const SCEV *S);
844
845 /// getSCEVValues - Return the Value set from which the SCEV expr is
846 /// generated.
847 SetVector *getSCEVValues(const SCEV *S);
848
849 /// eraseValueFromMap - Erase Value from ValueExprMap and ExprValueMap.
850 void eraseValueFromMap(Value *V);
822851
823852 /// Return a SCEV expression for the full generality of the specified
824853 /// expression.
114114 static cl::opt
115115 VerifySCEV("verify-scev",
116116 cl::desc("Verify ScalarEvolution's backedge taken counts (slow)"));
117 static cl::opt
118 VerifySCEVMap("verify-scev-maps",
119 cl::desc("Verify no dangling value in ScalarEvolution's"
120 "ExprValueMap (slow)"));
117121
118122 //===----------------------------------------------------------------------===//
119123 // SCEV class definitions
33093313 return !F.FindOne;
33103314 }
33113315
3316 namespace {
3317 // Helper class working with SCEVTraversal to figure out if a SCEV contains
3318 // a sub SCEV of scAddRecExpr type. FindInvalidSCEVUnknown::FoundOne is set
3319 // iff if such sub scAddRecExpr type SCEV is found.
3320 struct FindAddRecurrence {
3321 bool FoundOne;
3322 FindAddRecurrence() : FoundOne(false) {}
3323
3324 bool follow(const SCEV *S) {
3325 switch (static_cast(S->getSCEVType())) {
3326 case scAddRecExpr:
3327 FoundOne = true;
3328 case scConstant:
3329 case scUnknown:
3330 case scCouldNotCompute:
3331 return false;
3332 default:
3333 return true;
3334 }
3335 }
3336 bool isDone() const { return FoundOne; }
3337 };
3338 }
3339
3340 bool ScalarEvolution::containsAddRecurrence(const SCEV *S) {
3341 HasRecMapType::iterator I = HasRecMap.find_as(S);
3342 if (I != HasRecMap.end())
3343 return I->second;
3344
3345 FindAddRecurrence F;
3346 SCEVTraversal ST(F);
3347 ST.visitAll(S);
3348 HasRecMap.insert(std::make_pair(S, F.FoundOne));
3349 return F.FoundOne;
3350 }
3351
3352 /// getSCEVValues - Return the Value set from S.
3353 SetVector *ScalarEvolution::getSCEVValues(const SCEV *S) {
3354 ExprValueMapType::iterator SI = ExprValueMap.find_as(S);
3355 if (SI == ExprValueMap.end())
3356 return nullptr;
3357 #ifndef NDEBUG
3358 if (VerifySCEVMap) {
3359 // Check there is no dangling Value in the set returned.
3360 for (const auto &VE : SI->second)
3361 assert(ValueExprMap.count(VE));
3362 }
3363 #endif
3364 return &SI->second;
3365 }
3366
3367 /// eraseValueFromMap - Erase Value from ValueExprMap and ExprValueMap.
3368 /// If ValueExprMap.erase(V) is not used together with forgetMemoizedResults(S),
3369 /// eraseValueFromMap should be used instead to ensure whenever V->S is removed
3370 /// from ValueExprMap, V is also removed from the set of ExprValueMap[S].
3371 void ScalarEvolution::eraseValueFromMap(Value *V) {
3372 ValueExprMapType::iterator I = ValueExprMap.find_as(V);
3373 if (I != ValueExprMap.end()) {
3374 const SCEV *S = I->second;
3375 SetVector *SV = getSCEVValues(S);
3376 // Remove V from the set of ExprValueMap[S]
3377 if (SV)
3378 SV->remove(V);
3379 ValueExprMap.erase(V);
3380 }
3381 }
3382
33123383 /// getSCEV - Return an existing SCEV if it exists, otherwise analyze the
33133384 /// expression and create a new one.
33143385 const SCEV *ScalarEvolution::getSCEV(Value *V) {
33173388 const SCEV *S = getExistingSCEV(V);
33183389 if (S == nullptr) {
33193390 S = createSCEV(V);
3320 ValueExprMap.insert(std::make_pair(SCEVCallbackVH(V, this), S));
3391 // During PHI resolution, it is possible to create two SCEVs for the same
3392 // V, so it is needed to double check whether V->S is inserted into
3393 // ValueExprMap before insert S->V into ExprValueMap.
3394 std::pair Pair =
3395 ValueExprMap.insert(std::make_pair(SCEVCallbackVH(V, this), S));
3396 if (Pair.second)
3397 ExprValueMap[S].insert(V);
33213398 }
33223399 return S;
33233400 }
33303407 const SCEV *S = I->second;
33313408 if (checkValidity(S))
33323409 return S;
3410 forgetMemoizedResults(S);
33333411 ValueExprMap.erase(I);
33343412 }
33353413 return nullptr;
89669044 assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!");
89679045 if (PHINode *PN = dyn_cast(getValPtr()))
89689046 SE->ConstantEvolutionLoopExitValue.erase(PN);
8969 SE->ValueExprMap.erase(getValPtr());
9047 SE->eraseValueFromMap(getValPtr());
89709048 // this now dangles!
89719049 }
89729050
89899067 continue;
89909068 if (PHINode *PN = dyn_cast(U))
89919069 SE->ConstantEvolutionLoopExitValue.erase(PN);
8992 SE->ValueExprMap.erase(U);
9070 SE->eraseValueFromMap(U);
89939071 Worklist.insert(Worklist.end(), U->user_begin(), U->user_end());
89949072 }
89959073 // Delete the Old value.
89969074 if (PHINode *PN = dyn_cast(Old))
89979075 SE->ConstantEvolutionLoopExitValue.erase(PN);
8998 SE->ValueExprMap.erase(Old);
9076 SE->eraseValueFromMap(Old);
89999077 // this now dangles!
90009078 }
90019079
90459123 }
90469124 FirstUnknown = nullptr;
90479125
9126 ExprValueMap.clear();
90489127 ValueExprMap.clear();
9128 HasRecMap.clear();
90499129
90509130 // Free any extra memory created for ExitNotTakenInfo in the unlikely event
90519131 // that a loop had multiple computable exits.
93749454 BlockDispositions.erase(S);
93759455 UnsignedRanges.erase(S);
93769456 SignedRanges.erase(S);
9457 ExprValueMap.erase(S);
9458 HasRecMap.erase(S);
93779459
93789460 for (DenseMap::iterator I =
93799461 BackedgeTakenCounts.begin(), E = BackedgeTakenCounts.end(); I != E; ) {
15991599 return V;
16001600 }
16011601
1602 // The expansion of SCEV will either reuse a previous Value in ExprValueMap,
1603 // or expand the SCEV literally. Specifically, if the expansion is in LSRMode,
1604 // and the SCEV contains any sub scAddRecExpr type SCEV, it will be expanded
1605 // literally, to prevent LSR's transformed SCEV from being reverted. Otherwise,
1606 // the expansion will try to reuse Value from ExprValueMap, and only when it
1607 // fails, expand the SCEV literally.
16021608 Value *SCEVExpander::expand(const SCEV *S) {
16031609 // Compute an insertion point for this SCEV object. Hoist the instructions
16041610 // as far out in the loop nest as possible.
16381644 Builder.SetInsertPoint(InsertPt);
16391645
16401646 // Expand the expression into instructions.
1641 Value *V = visit(S);
1647 SetVector *Set = SE.getSCEVValues(S);
1648 Value *V = nullptr;
1649 // If the expansion is in LSRMode, and the SCEV contains any sub scAddRecExpr
1650 // type SCEV, it will be expanded literally, to prevent LSR's transformed SCEV
1651 // from being reverted.
1652 if (!(LSRMode && SE.containsAddRecurrence(S))) {
1653 if (Set) {
1654 // Choose a Value from the set which dominates the insertPt.
1655 for (auto const &Ent : *Set) {
1656 if (Ent && isa(Ent) && S->getType() == Ent->getType() &&
1657 SE.DT.dominates(cast(Ent), InsertPt)) {
1658 V = Ent;
1659 break;
1660 }
1661 }
1662 }
1663 }
1664 if (!V)
1665 V = visit(S);
16421666
16431667 // Remember the expanded value for this SCEV at this location.
16441668 //
27172717
27182718 BasicBlock *NewBB = BB->splitBasicBlock(BB->getTerminator(),
27192719 "min.iters.checked");
2720 // Update dominator tree immediately if the generated block is a
2721 // LoopBypassBlock because SCEV expansions to generate loop bypass
2722 // checks may query it before the current function is finished.
2723 DT->addNewBlock(NewBB, BB);
27202724 if (L->getParentLoop())
27212725 L->getParentLoop()->addBasicBlockToLoop(NewBB, *LI);
27222726 ReplaceInstWithInst(BB->getTerminator(),
27392743 // adding one to the backedge-taken count will not overflow.
27402744 BasicBlock *NewBB = BB->splitBasicBlock(BB->getTerminator(),
27412745 "vector.ph");
2746 // Update dominator tree immediately if the generated block is a
2747 // LoopBypassBlock because SCEV expansions to generate loop bypass
2748 // checks may query it before the current function is finished.
2749 DT->addNewBlock(NewBB, BB);
27422750 if (L->getParentLoop())
27432751 L->getParentLoop()->addBasicBlockToLoop(NewBB, *LI);
27442752 ReplaceInstWithInst(BB->getTerminator(),
27642772 // Create a new block containing the stride check.
27652773 BB->setName("vector.scevcheck");
27662774 auto *NewBB = BB->splitBasicBlock(BB->getTerminator(), "vector.ph");
2775 // Update dominator tree immediately if the generated block is a
2776 // LoopBypassBlock because SCEV expansions to generate loop bypass
2777 // checks may query it before the current function is finished.
2778 DT->addNewBlock(NewBB, BB);
27672779 if (L->getParentLoop())
27682780 L->getParentLoop()->addBasicBlockToLoop(NewBB, *LI);
27692781 ReplaceInstWithInst(BB->getTerminator(),
27892801 // Create a new block containing the memory check.
27902802 BB->setName("vector.memcheck");
27912803 auto *NewBB = BB->splitBasicBlock(BB->getTerminator(), "vector.ph");
2804 // Update dominator tree immediately if the generated block is a
2805 // LoopBypassBlock because SCEV expansions to generate loop bypass
2806 // checks may query it before the current function is finished.
2807 DT->addNewBlock(NewBB, BB);
27922808 if (L->getParentLoop())
27932809 L->getParentLoop()->addBasicBlockToLoop(NewBB, *LI);
27942810 ReplaceInstWithInst(BB->getTerminator(),
39563972 assert(DT->properlyDominates(LoopBypassBlocks.front(), LoopExitBlock) &&
39573973 "Entry does not dominate exit.");
39583974
3959 for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I)
3960 DT->addNewBlock(LoopBypassBlocks[I], LoopBypassBlocks[I-1]);
3961 DT->addNewBlock(LoopVectorPreHeader, LoopBypassBlocks.back());
3962
39633975 // We don't predicate stores by this point, so the vector body should be a
39643976 // single loop.
39653977 assert(LoopVectorBody.size() == 1 && "Expected single block loop!");
0 ; RUN: opt < %s -loop-vectorize -force-vector-width=4 -verify-scev-maps -S |FileCheck %s
1
2 ; SCEV expansion uses existing value when the SCEV has no AddRec expr.
3 ; CHECK: select
4 ; CHECK-NOT: select
5
6 @a = common global [1000 x i16] zeroinitializer, align 16
7
8 define i32 @foo(i32 %x, i32 %y) {
9 entry:
10 %cmp = icmp slt i32 %x, %y
11 %cond = select i1 %cmp, i32 %x, i32 %y
12 %cmp1.10 = icmp sgt i32 %cond, 0
13 br i1 %cmp1.10, label %for.body.lr.ph, label %for.end
14
15 for.body.lr.ph: ; preds = %entry
16 %tmp = sext i32 %cond to i64
17 br label %for.body
18
19 for.body: ; preds = %for.body, %for.body.lr.ph
20 %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
21 %total.011 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
22 %arrayidx = getelementptr inbounds [1000 x i16], [1000 x i16]* @a, i64 0, i64 %indvars.iv
23 %tmp1 = load i16, i16* %arrayidx, align 2
24 %conv = sext i16 %tmp1 to i32
25 %add = add nsw i32 %conv, %total.011
26 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
27 %cmp1 = icmp slt i64 %indvars.iv.next, %tmp
28 br i1 %cmp1, label %for.body, label %for.end.loopexit
29
30 for.end.loopexit: ; preds = %for.body
31 %add.lcssa = phi i32 [ %add, %for.body ]
32 br label %for.end
33
34 for.end: ; preds = %for.end.loopexit, %entry
35 %total.0.lcssa = phi i32 [ 0, %entry ], [ %add.lcssa, %for.end.loopexit ]
36 ret i32 %total.0.lcssa
37 }
77 ; -- The loop following the load should only use a single add-literation
88 ; instruction.
99 ; CHECK: vldr
10 ; CHECK: adds r{{[0-9]+.*}}#1
1110 ; CHECK-NOT: adds
1211 ; CHECK: subsections_via_symbols
1312
2727 ret void
2828
2929 ; CHECK: loop.preheader:
30 ; CHECK: [[indvar_start:[^ ]+]] = add i32 %n, -1
3130 ; CHECK: [[not_len:[^ ]+]] = sub i32 -1, %len
3231 ; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n
3332 ; CHECK: [[not_len_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_len]], [[not_n]]
1010 br i1 %cmp1, label %for.body, label %for.end
1111
1212 ; Make sure the added GEP has the right index type
13 ; CHECK: %lftr.limit = getelementptr i8, i8 addrspace(2)* %base, i8 %0
13 ; CHECK: %lftr.limit = getelementptr i8, i8 addrspace(2)* %base, i8 %idx.trunc
1414
1515 ; CHECK: for.body:
1616 ; CHECK: phi i8 addrspace(2)*
4242 br i1 %cmp1, label %for.body, label %for.end
4343
4444 ; Make sure the added GEP has the right index type
45 ; CHECK: %lftr.limit = getelementptr i8, i8 addrspace(3)* %base, i16 %0
45 ; CHECK: %lftr.limit = getelementptr i8, i8 addrspace(3)* %base, i16 %idx.trunc
4646
4747 ; CHECK: for.body:
4848 ; CHECK: phi i8 addrspace(3)*
55 define void @f(i32* %end.s, i8** %loc, i32 %p) {
66 ; CHECK-LABEL: @f(
77 entry:
8 ; CHECK: [[P_SEXT:%[0-9a-z]+]] = sext i32 %p to i64
9 ; CHECK: [[END:%[0-9a-z]+]] = getelementptr i32, i32* %end.s, i64 [[P_SEXT]]
10
118 %end = getelementptr inbounds i32, i32* %end.s, i32 %p
129 %init = bitcast i32* %end.s to i8*
1310 br label %while.body.i
2118
2219 loop.exit:
2320 ; CHECK: loop.exit:
24 ; CHECK: [[END_BCASTED:%[a-z0-9]+]] = bitcast i32* %scevgep to i8*
21 ; CHECK: [[END_BCASTED:%[a-z0-9]+]] = bitcast i32* %end to i8*
2522 ; CHECK: store i8* [[END_BCASTED]], i8** %loc
2623 %ptr.inc.lcssa = phi i8* [ %ptr.inc, %while.body.i ]
2724 store i8* %ptr.inc.lcssa, i8** %loc
126126
127127 declare i32 @printf(i8* nocapture, ...) nounwind
128128
129 ; IndVars shouldn't be afraid to emit a udiv here, since there's a udiv in
130 ; the original code.
129 ; IndVars doesn't emit a udiv in for.body.preheader since SCEVExpander::expand will
130 ; find out there's already a udiv in the original code.
131131
132132 ; CHECK-LABEL: @foo(
133133 ; CHECK: for.body.preheader:
134 ; CHECK-NEXT: udiv
134 ; CHECK-NOT: udiv
135135
136136 define void @foo(double* %p, i64 %n) nounwind {
137137 entry:
3131
3232 ; CHECK-LABEL: @test1(
3333
34 ; First check that we move the sub into the preheader, it doesn't have to be
35 ; executed if %cmp4 == false
36 ; CHECK: for.body.preheader:
37 ; CHECK: sub i32 %data_len, %sample
38 ; CHECK: br label %for.body
39
40 ; Second, check that we turn the IV test into an eq.
34 ; check that we turn the IV test into an eq.
4135 ; CHECK: %lftr.wideiv = trunc i64 %indvars.iv.next to i32
42 ; CHECK: %exitcond = icmp ne i32 %lftr.wideiv, %0
36 ; CHECK: %exitcond = icmp ne i32 %lftr.wideiv, %sub
4337 ; CHECK: br i1 %exitcond, label %for.body, label %for.end.loopexit
4438 }
4539
33 ; LSR should properly handle the post-inc offset when folding the
44 ; non-IV operand of an icmp into the IV.
55
6 ; CHECK: [[r1:%[a-z0-9]+]] = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
7 ; CHECK: [[r2:%[a-z0-9]+]] = lshr i64 [[r1]], 1
6 ; CHECK: [[r1:%[a-z0-9\.]+]] = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
7 ; CHECK: [[r2:%[a-z0-9\.]+]] = lshr exact i64 [[r1]], 1
8 ; CHECK: for.body.lr.ph:
89 ; CHECK: [[r3:%[a-z0-9]+]] = shl i64 [[r2]], 1
910 ; CHECK: br label %for.body
1011 ; CHECK: for.body: