llvm.org GIT mirror llvm / 2f46bb8
Fix the time regression I introduced in 464.h264ref with my earlier patch to this file. The issue there was that all uses of an IV inside a loop are actually references to Base[IV*2], and there was one use outside that was the same but LSR didn't see the base or the scaling because it didn't recurse into uses outside the loop; thus, it used base+IV*scale mode inside the loop instead of pulling base out of the loop. This was extra bad because register pressure later forced both base and IV into memory. Doing that recursion, at least enough to figure out addressing modes, is a good idea in general; the change in AddUsersIfInteresting does this. However, there were side effects.... It is also possible for recursing outside the loop to introduce another IV where there was only 1 before (if the refs inside are not scaled and the ref outside is). I don't think this is a common case, but it's in the testsuite. It is right to be very aggressive about getting rid of such introduced IVs (CheckForIVReuse and the handling of nonzero RewriteFactor in StrengthReduceStridedIVUsers). In the testcase in question the new IV produced this way has both a nonconstant stride and a nonzero base, neither of which was handled before. And when inserting new code that feeds into a PHI, it's right to put such code at the original location rather than in the PHI's immediate predecessor(s) when the original location is outside the loop (a case that couldn't happen before) (RewriteInstructionToUseNewBase); better to avoid making multiple copies of it in this case. Also, the mechanism for keeping SCEV's corresponding to GEP's no longer works, as the GEP might change after its SCEV is remembered, invalidating the SCEV, and we might get a bad SCEV value when looking up the GEP again for a later loop. This also couldn't happen before, as we weren't recursing into GEP's outside the loop. Also, when we build an expression that involves a (possibly non-affine) IV from a different loop as well as an IV from the one we're interested in (containsAddRecFromDifferentLoop), don't recurse into that. We can't do much with it and will get in trouble if we try to create new non-affine IVs or something. More testcases are coming. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@62212 91177308-0d34-0410-b5e6-96231b3b80d8 Dale Johannesen 10 years ago
3 changed file(s) with 230 addition(s) and 48 deletion(s). Raw diff Collapse all Expand all
730730
731731 // Internals
732732
733 static bool isNotAlreadyContainedIn(LoopBase *SubLoop,
734 LoopBase *ParentLoop) {
733 static bool isNotAlreadyContainedIn(const LoopBase *SubLoop,
734 const LoopBase *ParentLoop) {
735735 if (SubLoop == 0) return true;
736736 if (SubLoop == ParentLoop) return false;
737737 return isNotAlreadyContainedIn(SubLoop->getParentLoop(), ParentLoop);
129129 /// dependent on random ordering of pointers in the process.
130130 SmallVector StrideOrder;
131131
132 /// GEPlist - A list of the GEP's that have been remembered in the SCEV
133 /// data structures. SCEV does not know to update these when the operands
134 /// of the GEP are changed, which means we cannot leave them live across
135 /// loops.
136 SmallVector GEPlist;
137
132138 /// CastedValues - As we need to cast values to uintptr_t, this keeps track
133139 /// of the casted version of each value. This is accessed by
134140 /// getCastedVersionOf.
190196 bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse,
191197 const SCEVHandle *&CondStride);
192198 bool RequiresTypeConversion(const Type *Ty, const Type *NewTy);
193 int64_t CheckForIVReuse(bool, bool, bool, const SCEVHandle&,
199 SCEVHandle CheckForIVReuse(bool, bool, bool, const SCEVHandle&,
194200 IVExpr&, const Type*,
195201 const std::vector& UsersToProcess);
196202 bool ValidStride(bool, int64_t,
339345 }
340346
341347 SE->setSCEV(GEP, GEPVal);
348 GEPlist.push_back(GEP);
342349 return GEPVal;
350 }
351
352 /// containsAddRecFromDifferentLoop - Determine whether expression S involves a
353 /// subexpression that is an AddRec from a loop other than L. An outer loop
354 /// of L is OK, but not an inner loop nor a disjoint loop.
355 static bool containsAddRecFromDifferentLoop(SCEVHandle S, Loop *L) {
356 // This is very common, put it first.
357 if (isa(S))
358 return false;
359 if (SCEVCommutativeExpr *AE = dyn_cast(S)) {
360 for (unsigned int i=0; i< AE->getNumOperands(); i++)
361 if (containsAddRecFromDifferentLoop(AE->getOperand(i), L))
362 return true;
363 return false;
364 }
365 if (SCEVAddRecExpr *AE = dyn_cast(S)) {
366 if (const Loop *newLoop = AE->getLoop()) {
367 if (newLoop == L)
368 return false;
369 // if newLoop is an outer loop of L, this is OK.
370 if (!LoopInfoBase::isNotAlreadyContainedIn(L, newLoop))
371 return false;
372 }
373 return true;
374 }
375 if (SCEVUDivExpr *DE = dyn_cast(S))
376 return containsAddRecFromDifferentLoop(DE->getLHS(), L) ||
377 containsAddRecFromDifferentLoop(DE->getRHS(), L);
378 #if 0
379 // SCEVSDivExpr has been backed out temporarily, but will be back; we'll
380 // need this when it is.
381 if (SCEVSDivExpr *DE = dyn_cast(S))
382 return containsAddRecFromDifferentLoop(DE->getLHS(), L) ||
383 containsAddRecFromDifferentLoop(DE->getRHS(), L);
384 #endif
385 if (SCEVTruncateExpr *TE = dyn_cast(S))
386 return containsAddRecFromDifferentLoop(TE->getOperand(), L);
387 if (SCEVZeroExtendExpr *ZE = dyn_cast(S))
388 return containsAddRecFromDifferentLoop(ZE->getOperand(), L);
389 if (SCEVSignExtendExpr *SE = dyn_cast(S))
390 return containsAddRecFromDifferentLoop(SE->getOperand(), L);
391 return false;
343392 }
344393
345394 /// getSCEVStartAndStride - Compute the start and stride of this expression,
346395 /// returning false if the expression is not a start/stride pair, or true if it
347396 /// is. The stride must be a loop invariant expression, but the start may be
348 /// a mix of loop invariant and loop variant expressions.
397 /// a mix of loop invariant and loop variant expressions. The start cannot,
398 /// however, contain an AddRec from a different loop, unless that loop is an
399 /// outer loop of the current loop.
349400 static bool getSCEVStartAndStride(const SCEVHandle &SH, Loop *L,
350401 SCEVHandle &Start, SCEVHandle &Stride,
351402 ScalarEvolution *SE) {
376427
377428 // FIXME: Generalize to non-affine IV's.
378429 if (!AddRec->isAffine()) return false;
430
431 // If Start contains an SCEVAddRecExpr from a different loop, other than an
432 // outer loop of the current loop, reject it. SCEV has no concept of operating
433 // on one loop at a time so don't confuse it with such expressions.
434 if (containsAddRecFromDifferentLoop(Start, L))
435 return false;
379436
380437 Start = SE->getAddExpr(Start, AddRec->getOperand(0));
381438
507564 if (isa(User) && Processed.count(User))
508565 continue;
509566
510 // If this is an instruction defined in a nested loop, or outside this loop,
511 // don't recurse into it.
567 // Descend recursively, but not into PHI nodes outside the current loop.
568 // It's important to see the entire expression outside the loop to get
569 // choices that depend on addressing mode use right, although we won't
570 // consider references ouside the loop in all cases.
571 // If User is already in Processed, we don't want to recurse into it again,
572 // but do want to record a second reference in the same instruction.
512573 bool AddUserToIVUsers = false;
513574 if (LI->getLoopFor(User->getParent()) != L) {
514 DOUT << "FOUND USER in other loop: " << *User
515 << " OF SCEV: " << *ISE << "\n";
516 AddUserToIVUsers = true;
517 } else if (!AddUsersIfInteresting(User, L, Processed)) {
575 if (isa(User) || Processed.count(User) ||
576 !AddUsersIfInteresting(User, L, Processed)) {
577 DOUT << "FOUND USER in other loop: " << *User
578 << " OF SCEV: " << *ISE << "\n";
579 AddUserToIVUsers = true;
580 }
581 } else if (Processed.count(User) ||
582 !AddUsersIfInteresting(User, L, Processed)) {
518583 DOUT << "FOUND USER: " << *User
519584 << " OF SCEV: " << *ISE << "\n";
520585 AddUserToIVUsers = true;
703768 PHINode *PN = cast(Inst);
704769 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
705770 if (PN->getIncomingValue(i) == OperandValToReplace) {
706 // If this is a critical edge, split the edge so that we do not insert the
707 // code on all predecessor/successor paths. We do this unless this is the
708 // canonical backedge for this loop, as this can make some inserted code
709 // be in an illegal position.
710 BasicBlock *PHIPred = PN->getIncomingBlock(i);
711 if (e != 1 && PHIPred->getTerminator()->getNumSuccessors() > 1 &&
712 (PN->getParent() != L->getHeader() || !L->contains(PHIPred))) {
713
714 // First step, split the critical edge.
715 SplitCriticalEdge(PHIPred, PN->getParent(), P, false);
716
717 // Next step: move the basic block. In particular, if the PHI node
718 // is outside of the loop, and PredTI is in the loop, we want to
719 // move the block to be immediately before the PHI block, not
720 // immediately after PredTI.
721 if (L->contains(PHIPred) && !L->contains(PN->getParent())) {
722 BasicBlock *NewBB = PN->getIncomingBlock(i);
723 NewBB->moveBefore(PN->getParent());
771 // If the original expression is outside the loop, put the replacement
772 // code in the same place as the original expression,
773 // which need not be an immediate predecessor of this PHI. This way we
774 // need only one copy of it even if it is referenced multiple times in
775 // the PHI. We don't do this when the original expression is inside the
776 // loop because multiple copies sometimes do useful sinking of code in that
777 // case(?).
778 Instruction *OldLoc = dyn_cast(OperandValToReplace);
779 if (L->contains(OldLoc->getParent())) {
780 // If this is a critical edge, split the edge so that we do not insert the
781 // code on all predecessor/successor paths. We do this unless this is the
782 // canonical backedge for this loop, as this can make some inserted code
783 // be in an illegal position.
784 BasicBlock *PHIPred = PN->getIncomingBlock(i);
785 if (e != 1 && PHIPred->getTerminator()->getNumSuccessors() > 1 &&
786 (PN->getParent() != L->getHeader() || !L->contains(PHIPred))) {
787
788 // First step, split the critical edge.
789 SplitCriticalEdge(PHIPred, PN->getParent(), P, false);
790
791 // Next step: move the basic block. In particular, if the PHI node
792 // is outside of the loop, and PredTI is in the loop, we want to
793 // move the block to be immediately before the PHI block, not
794 // immediately after PredTI.
795 if (L->contains(PHIPred) && !L->contains(PN->getParent())) {
796 BasicBlock *NewBB = PN->getIncomingBlock(i);
797 NewBB->moveBefore(PN->getParent());
798 }
799
800 // Splitting the edge can reduce the number of PHI entries we have.
801 e = PN->getNumIncomingValues();
724802 }
725
726 // Splitting the edge can reduce the number of PHI entries we have.
727 e = PN->getNumIncomingValues();
728 }
729
803 }
730804 Value *&Code = InsertedCode[PN->getIncomingBlock(i)];
731805 if (!Code) {
732806 // Insert the code into the end of the predecessor block.
733 Instruction *InsertPt = PN->getIncomingBlock(i)->getTerminator();
807 Instruction *InsertPt = (L->contains(OldLoc->getParent())) ?
808 PN->getIncomingBlock(i)->getTerminator() :
809 OldLoc->getParent()->getTerminator();
734810 Code = InsertCodeForBaseAtPosition(NewBase, Rewriter, InsertPt, L);
735811
736812 // Adjust the type back to match the PHI. Note that we can't use
11671243 /// mode scale component and optional base reg. This allows the users of
11681244 /// this stride to be rewritten as prev iv * factor. It returns 0 if no
11691245 /// reuse is possible. Factors can be negative on same targets, e.g. ARM.
1170 int64_t LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
1246 ///
1247 /// If all uses are outside the loop, we don't require that all multiplies
1248 /// be folded into the addressing mode, nor even that the factor be constant;
1249 /// a multiply (executed once) outside the loop is better than another IV
1250 /// within. Well, usually.
1251 SCEVHandle LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
11711252 bool AllUsesAreAddresses,
11721253 bool AllUsesAreOutsideLoop,
11731254 const SCEVHandle &Stride,
11791260 ++NewStride) {
11801261 std::map::iterator SI =
11811262 IVsByStride.find(StrideOrder[NewStride]);
1182 if (SI == IVsByStride.end())
1263 if (SI == IVsByStride.end() || !isa(SI->first))
11831264 continue;
11841265 int64_t SSInt = cast(SI->first)->getValue()->getSExtValue();
11851266 if (SI->first != Stride &&
12011282 if (II->Base->isZero() &&
12021283 !RequiresTypeConversion(II->Base->getType(), Ty)) {
12031284 IV = *II;
1204 return Scale;
1285 return SE->getIntegerSCEV(Scale, Stride->getType());
12051286 }
12061287 }
1207 }
1208 return 0;
1288 } else if (AllUsesAreOutsideLoop) {
1289 // Accept nonconstant strides here; it is really really right to substitute
1290 // an existing IV if we can.
1291 for (unsigned NewStride = 0, e = StrideOrder.size(); NewStride != e;
1292 ++NewStride) {
1293 std::map::iterator SI =
1294 IVsByStride.find(StrideOrder[NewStride]);
1295 if (SI == IVsByStride.end() || !isa(SI->first))
1296 continue;
1297 int64_t SSInt = cast(SI->first)->getValue()->getSExtValue();
1298 if (SI->first != Stride && SSInt != 1)
1299 continue;
1300 for (std::vector::iterator II = SI->second.IVs.begin(),
1301 IE = SI->second.IVs.end(); II != IE; ++II)
1302 // Accept nonzero base here.
1303 // Only reuse previous IV if it would not require a type conversion.
1304 if (!RequiresTypeConversion(II->Base->getType(), Ty)) {
1305 IV = *II;
1306 return Stride;
1307 }
1308 }
1309 // Special case, old IV is -1*x and this one is x. Can treat this one as
1310 // -1*old.
1311 for (unsigned NewStride = 0, e = StrideOrder.size(); NewStride != e;
1312 ++NewStride) {
1313 std::map::iterator SI =
1314 IVsByStride.find(StrideOrder[NewStride]);
1315 if (SI == IVsByStride.end())
1316 continue;
1317 if (SCEVMulExpr *ME = dyn_cast(SI->first))
1318 if (SCEVConstant *SC = dyn_cast(ME->getOperand(0)))
1319 if (Stride == ME->getOperand(1) &&
1320 SC->getValue()->getSExtValue() == -1LL)
1321 for (std::vector::iterator II = SI->second.IVs.begin(),
1322 IE = SI->second.IVs.end(); II != IE; ++II)
1323 // Accept nonzero base here.
1324 // Only reuse previous IV if it would not require type conversion.
1325 if (!RequiresTypeConversion(II->Base->getType(), Ty)) {
1326 IV = *II;
1327 return SE->getIntegerSCEV(-1LL, Stride->getType());
1328 }
1329 }
1330 }
1331 return SE->getIntegerSCEV(0, Stride->getType());
12091332 }
12101333
12111334 /// PartitionByIsUseOfPostIncrementedValue - Simple boolean predicate that
13561479 IVExpr ReuseIV(SE->getIntegerSCEV(0, Type::Int32Ty),
13571480 SE->getIntegerSCEV(0, Type::Int32Ty),
13581481 0, 0);
1359 int64_t RewriteFactor = 0;
1360 RewriteFactor = CheckForIVReuse(HaveCommonExprs, AllUsesAreAddresses,
1482 SCEVHandle RewriteFactor =
1483 CheckForIVReuse(HaveCommonExprs, AllUsesAreAddresses,
13611484 AllUsesAreOutsideLoop,
13621485 Stride, ReuseIV, CommonExprs->getType(),
13631486 UsersToProcess);
1364 if (RewriteFactor != 0) {
1487 if (!isa(RewriteFactor) ||
1488 !cast(RewriteFactor)->isZero()) {
13651489 DOUT << "BASED ON IV of STRIDE " << *ReuseIV.Stride
13661490 << " and BASE " << *ReuseIV.Base << " :\n";
13671491 NewPHI = ReuseIV.PHI;
13891513 Value *CommonBaseV
13901514 = PreheaderRewriter.expandCodeFor(CommonExprs, PreInsertPt);
13911515
1392 if (RewriteFactor == 0) {
1516 if (isa(RewriteFactor) &&
1517 cast(RewriteFactor)->isZero()) {
13931518 // Create a new Phi for this base, and stick it in the loop header.
13941519 NewPHI = PHINode::Create(ReplacedTy, "iv.", PhiInsertBefore);
13951520 ++NumInserted;
15361661
15371662 // If we are reusing the iv, then it must be multiplied by a constant
15381663 // factor take advantage of addressing mode scale component.
1539 if (RewriteFactor != 0) {
1540 RewriteExpr = SE->getMulExpr(SE->getIntegerSCEV(RewriteFactor,
1541 RewriteExpr->getType()),
1664 if (!isa(RewriteFactor) ||
1665 !cast(RewriteFactor)->isZero()) {
1666 // If we're reusing an IV with a nonzero base (currently this happens
1667 // only when all reuses are outside the loop) subtract that base here.
1668 // The base has been used to initialize the PHI node but we don't want
1669 // it here.
1670 if (!ReuseIV.Base->isZero())
1671 RewriteExpr = SE->getMinusSCEV(RewriteExpr, ReuseIV.Base);
1672
1673 // Multiply old variable, with base removed, by new scale factor.
1674 RewriteExpr = SE->getMulExpr(RewriteFactor,
15421675 RewriteExpr);
15431676
15441677 // The common base is emitted in the loop preheader. But since we
15451678 // are reusing an IV, it has not been used to initialize the PHI node.
15461679 // Add it to the expression used to rewrite the uses.
1680 // When this use is outside the loop, we earlier subtracted the
1681 // common base, and are adding it back here. Use the same expression
1682 // as before, rather than CommonBaseV, so DAGCombiner will zap it.
15471683 if (!isa(CommonBaseV) ||
1548 !cast(CommonBaseV)->isZero())
1549 RewriteExpr = SE->getAddExpr(RewriteExpr,
1684 !cast(CommonBaseV)->isZero()) {
1685 if (L->contains(User.Inst->getParent()))
1686 RewriteExpr = SE->getAddExpr(RewriteExpr,
15501687 SE->getUnknown(CommonBaseV));
1688 else
1689 RewriteExpr = SE->getAddExpr(RewriteExpr, CommonExprs);
1690 }
15511691 }
15521692
15531693 // Now that we know what we need to do, insert code before User for the
21732313 IVUsesByStride.clear();
21742314 IVsByStride.clear();
21752315 StrideOrder.clear();
2316 for (unsigned i=0; i
2317 SE->deleteValueFromRecords(GEPlist[i]);
2318 GEPlist.clear();
21762319
21772320 // Clean up after ourselves
21782321 if (!DeadInsts.empty()) {
0 ; RUN: llvm-as < %s | opt -loop-reduce | llvm-dis | grep phi | count 1
1 ; RUN: llvm-as < %s | opt -loop-reduce | llvm-dis | grep mul | count 1
2 ; ModuleID = ''
3 ; Make sure examining a fuller expression outside the loop doesn't cause us to create a second
4 ; IV of stride %3.
5 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
6 target triple = "i386-apple-darwin9.5"
7 %struct.anon = type { %struct.obj*, %struct.obj* }
8 %struct.obj = type { i16, i16, { %struct.anon } }
9 @heap_size = external global i32 ; [#uses=1]
10 @"\01LC85" = external constant [39 x i8] ; <[39 x i8]*> [#uses=1]
11
12 declare i32 @sprintf(i8*, i8*, ...) nounwind
13
14 define %struct.obj* @gc_status(%struct.obj* %args) nounwind {
15 entry:
16 br label %bb1.i
17
18 bb.i2: ; preds = %bb2.i3
19 %indvar.next24 = add i32 %m.0.i, 1 ; [#uses=1]
20 br label %bb1.i
21
22 bb1.i: ; preds = %bb.i2, %entry
23 %m.0.i = phi i32 [ 0, %entry ], [ %indvar.next24, %bb.i2 ] ; [#uses=4]
24 %0 = icmp slt i32 %m.0.i, 0 ; [#uses=1]
25 br i1 %0, label %bb2.i3, label %nactive_heaps.exit
26
27 bb2.i3: ; preds = %bb1.i
28 %1 = load %struct.obj** null, align 4 ; <%struct.obj*> [#uses=1]
29 %2 = icmp eq %struct.obj* %1, null ; [#uses=1]
30 br i1 %2, label %nactive_heaps.exit, label %bb.i2
31
32 nactive_heaps.exit: ; preds = %bb2.i3, %bb1.i
33 %3 = load i32* @heap_size, align 4 ; [#uses=1]
34 %4 = mul i32 %3, %m.0.i ; [#uses=1]
35 %5 = sub i32 %4, 0 ; [#uses=1]
36 %6 = tail call i32 (i8*, i8*, ...)* @sprintf(i8* null, i8* getelementptr ([39 x i8]* @"\01LC85", i32 0, i32 0), i32 %m.0.i, i32 0, i32 %5, i32 0) nounwind ; [#uses=0]
37 ret %struct.obj* null
38 }