llvm.org GIT mirror llvm / c9b1e25
Enable the new LoopInfo algorithm by default. The primary advantage is that loop optimizations will be applied in a stable order. This helps debugging and unit test creation. It is also a better overall implementation without pathologically bad performance on deep functions. On large functions (llvm-stress --size=200000 | opt -loops) Before: 0.1263s After: 0.0225s On deep functions (after tweaking llvm-stress, thanks Nadav): Before: 0.2281s After: 0.0227s See r158790 for more comments. The loop tree is now consistently generated in forward order, but loop passes are applied in reverse order over the program. If we have a loop optimization that prefers forward order, that can easily be achieved by adding a different type of LoopPassManager. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@159183 91177308-0d34-0410-b5e6-96231b3b80d8 Andrew Trick 7 years ago
10 changed file(s) with 52 addition(s) and 244 deletion(s). Raw diff Collapse all Expand all
126126 const std::vector &getSubLoops() const { return SubLoops; }
127127 std::vector &getSubLoopsVector() { return SubLoops; }
128128 typedef typename std::vector::const_iterator iterator;
129 typedef typename std::vector::const_reverse_iterator
130 reverse_iterator;
129131 iterator begin() const { return SubLoops.begin(); }
130132 iterator end() const { return SubLoops.end(); }
133 reverse_iterator rbegin() const { return SubLoops.rbegin(); }
134 reverse_iterator rend() const { return SubLoops.rend(); }
131135 bool empty() const { return SubLoops.empty(); }
132136
133137 /// getBlocks - Get a list of the basic blocks which make up this loop.
430434 /// function.
431435 ///
432436 typedef typename std::vector::const_iterator iterator;
437 typedef typename std::vector::const_reverse_iterator
438 reverse_iterator;
433439 iterator begin() const { return TopLevelLoops.begin(); }
434440 iterator end() const { return TopLevelLoops.end(); }
441 reverse_iterator rbegin() const { return TopLevelLoops.rbegin(); }
442 reverse_iterator rend() const { return TopLevelLoops.rend(); }
435443 bool empty() const { return TopLevelLoops.empty(); }
436444
437445 /// getLoopFor - Return the inner most loop that BB lives in. If a basic
524532 return isNotAlreadyContainedIn(SubLoop->getParentLoop(), ParentLoop);
525533 }
526534
527 void Calculate(DominatorTreeBase &DT);
528
529 LoopT *ConsiderForLoop(BlockT *BB, DominatorTreeBase &DT);
530
531 /// MoveSiblingLoopInto - This method moves the NewChild loop to live inside
532 /// of the NewParent Loop, instead of being a sibling of it.
533 void MoveSiblingLoopInto(LoopT *NewChild, LoopT *NewParent);
534
535 /// InsertLoopInto - This inserts loop L into the specified parent loop. If
536 /// the parent loop contains a loop which should contain L, the loop gets
537 /// inserted into L instead.
538 void InsertLoopInto(LoopT *L, LoopT *Parent);
539
540535 /// Create the loop forest using a stable algorithm.
541536 void Analyze(DominatorTreeBase &DomTree);
542537
569564 /// function.
570565 ///
571566 typedef LoopInfoBase::iterator iterator;
567 typedef LoopInfoBase::reverse_iterator reverse_iterator;
572568 inline iterator begin() const { return LI.begin(); }
573569 inline iterator end() const { return LI.end(); }
570 inline reverse_iterator rbegin() const { return LI.rbegin(); }
571 inline reverse_iterator rend() const { return LI.rend(); }
574572 bool empty() const { return LI.empty(); }
575573
576574 /// getLoopFor - Return the inner most loop that BB lives in. If a basic
353353 }
354354
355355 //===----------------------------------------------------------------------===//
356 /// LoopInfo - This class builds and contains all of the top level loop
357 /// structures in the specified function.
358 ///
359
360 template
361 void LoopInfoBase::Calculate(DominatorTreeBase &DT) {
362 BlockT *RootNode = DT.getRootNode()->getBlock();
363
364 for (df_iterator NI = df_begin(RootNode),
365 NE = df_end(RootNode); NI != NE; ++NI)
366 if (LoopT *L = ConsiderForLoop(*NI, DT))
367 TopLevelLoops.push_back(L);
368 }
369
370 template
371 LoopT *LoopInfoBase::
372 ConsiderForLoop(BlockT *BB, DominatorTreeBase &DT) {
373 if (BBMap.count(BB)) return 0; // Haven't processed this node?
374
375 std::vector TodoStack;
376
377 // Scan the predecessors of BB, checking to see if BB dominates any of
378 // them. This identifies backedges which target this node...
379 typedef GraphTraits > InvBlockTraits;
380 for (typename InvBlockTraits::ChildIteratorType I =
381 InvBlockTraits::child_begin(BB), E = InvBlockTraits::child_end(BB);
382 I != E; ++I) {
383 typename InvBlockTraits::NodeType *N = *I;
384 // If BB dominates its predecessor...
385 if (DT.dominates(BB, N) && DT.isReachableFromEntry(N))
386 TodoStack.push_back(N);
387 }
388
389 if (TodoStack.empty()) return 0; // No backedges to this block...
390
391 // Create a new loop to represent this basic block...
392 LoopT *L = new LoopT(BB);
393 BBMap[BB] = L;
394
395 while (!TodoStack.empty()) { // Process all the nodes in the loop
396 BlockT *X = TodoStack.back();
397 TodoStack.pop_back();
398
399 if (!L->contains(X) && // As of yet unprocessed??
400 DT.isReachableFromEntry(X)) {
401 // Check to see if this block already belongs to a loop. If this occurs
402 // then we have a case where a loop that is supposed to be a child of
403 // the current loop was processed before the current loop. When this
404 // occurs, this child loop gets added to a part of the current loop,
405 // making it a sibling to the current loop. We have to reparent this
406 // loop.
407 if (LoopT *SubLoop =
408 const_cast(getLoopFor(X)))
409 if (SubLoop->getHeader() == X && isNotAlreadyContainedIn(SubLoop, L)){
410 // Remove the subloop from its current parent...
411 assert(SubLoop->ParentLoop && SubLoop->ParentLoop != L);
412 LoopT *SLP = SubLoop->ParentLoop; // SubLoopParent
413 typename std::vector::iterator I =
414 std::find(SLP->SubLoops.begin(), SLP->SubLoops.end(), SubLoop);
415 assert(I != SLP->SubLoops.end() &&"SubLoop not a child of parent?");
416 SLP->SubLoops.erase(I); // Remove from parent...
417
418 // Add the subloop to THIS loop...
419 SubLoop->ParentLoop = L;
420 L->SubLoops.push_back(SubLoop);
421 }
422
423 // Normal case, add the block to our loop...
424 L->Blocks.push_back(X);
425
426 typedef GraphTraits > InvBlockTraits;
427
428 // Add all of the predecessors of X to the end of the work stack...
429 TodoStack.insert(TodoStack.end(), InvBlockTraits::child_begin(X),
430 InvBlockTraits::child_end(X));
431 }
432 }
433
434 // If there are any loops nested within this loop, create them now!
435 for (typename std::vector::iterator I = L->Blocks.begin(),
436 E = L->Blocks.end(); I != E; ++I)
437 if (LoopT *NewLoop = ConsiderForLoop(*I, DT)) {
438 L->SubLoops.push_back(NewLoop);
439 NewLoop->ParentLoop = L;
440 }
441
442 // Add the basic blocks that comprise this loop to the BBMap so that this
443 // loop can be found for them.
444 //
445 for (typename std::vector::iterator I = L->Blocks.begin(),
446 E = L->Blocks.end(); I != E; ++I)
447 BBMap.insert(std::make_pair(*I, L));
448
449 // Now that we have a list of all of the child loops of this loop, check to
450 // see if any of them should actually be nested inside of each other. We
451 // can accidentally pull loops our of their parents, so we must make sure to
452 // organize the loop nests correctly now.
453 {
454 std::map ContainingLoops;
455 for (unsigned i = 0; i != L->SubLoops.size(); ++i) {
456 LoopT *Child = L->SubLoops[i];
457 assert(Child->getParentLoop() == L && "Not proper child loop?");
458
459 if (LoopT *ContainingLoop = ContainingLoops[Child->getHeader()]) {
460 // If there is already a loop which contains this loop, move this loop
461 // into the containing loop.
462 MoveSiblingLoopInto(Child, ContainingLoop);
463 --i; // The loop got removed from the SubLoops list.
464 } else {
465 // This is currently considered to be a top-level loop. Check to see
466 // if any of the contained blocks are loop headers for subloops we
467 // have already processed.
468 for (unsigned b = 0, e = Child->Blocks.size(); b != e; ++b) {
469 LoopT *&BlockLoop = ContainingLoops[Child->Blocks[b]];
470 if (BlockLoop == 0) { // Child block not processed yet...
471 BlockLoop = Child;
472 } else if (BlockLoop != Child) {
473 LoopT *SubLoop = BlockLoop;
474 // Reparent all of the blocks which used to belong to BlockLoops
475 for (unsigned j = 0, f = SubLoop->Blocks.size(); j != f; ++j)
476 ContainingLoops[SubLoop->Blocks[j]] = Child;
477
478 // There is already a loop which contains this block, that means
479 // that we should reparent the loop which the block is currently
480 // considered to belong to to be a child of this loop.
481 MoveSiblingLoopInto(SubLoop, Child);
482 --i; // We just shrunk the SubLoops list.
483 }
484 }
485 }
486 }
487 }
488
489 return L;
490 }
491
492 /// MoveSiblingLoopInto - This method moves the NewChild loop to live inside
493 /// of the NewParent Loop, instead of being a sibling of it.
494 template
495 void LoopInfoBase::
496 MoveSiblingLoopInto(LoopT *NewChild, LoopT *NewParent) {
497 LoopT *OldParent = NewChild->getParentLoop();
498 assert(OldParent && OldParent == NewParent->getParentLoop() &&
499 NewChild != NewParent && "Not sibling loops!");
500
501 // Remove NewChild from being a child of OldParent
502 typename std::vector::iterator I =
503 std::find(OldParent->SubLoops.begin(), OldParent->SubLoops.end(),
504 NewChild);
505 assert(I != OldParent->SubLoops.end() && "Parent fields incorrect??");
506 OldParent->SubLoops.erase(I); // Remove from parent's subloops list
507 NewChild->ParentLoop = 0;
508
509 InsertLoopInto(NewChild, NewParent);
510 }
511
512 /// InsertLoopInto - This inserts loop L into the specified parent loop. If
513 /// the parent loop contains a loop which should contain L, the loop gets
514 /// inserted into L instead.
515 template
516 void LoopInfoBase::InsertLoopInto(LoopT *L, LoopT *Parent) {
517 BlockT *LHeader = L->getHeader();
518 assert(Parent->contains(LHeader) &&
519 "This loop should not be inserted here!");
520
521 // Check to see if it belongs in a child loop...
522 for (unsigned i = 0, e = static_cast(Parent->SubLoops.size());
523 i != e; ++i)
524 if (Parent->SubLoops[i]->contains(LHeader)) {
525 InsertLoopInto(L, Parent->SubLoops[i]);
526 return;
527 }
528
529 // If not, insert it here!
530 Parent->SubLoops.push_back(L);
531 L->ParentLoop = Parent;
532 }
533
534 //===----------------------------------------------------------------------===//
535356 /// Stable LoopInfo Analysis - Build a loop tree using stable iterators so the
536357 /// result does / not depend on use list (block predecessor) order.
537358 ///
4242 static cl::opt
4343 VerifyLoopInfoX("verify-loop-info", cl::location(VerifyLoopInfo),
4444 cl::desc("Verify loop info (time consuming)"));
45
46 static cl::opt
47 StableLoopInfo("stable-loops", cl::Hidden, cl::init(false),
48 cl::desc("Compute a stable loop tree."));
4945
5046 char LoopInfo::ID = 0;
5147 INITIALIZE_PASS_BEGIN(LoopInfo, "loops", "Natural Loop Information", true, true)
515511 //
516512 bool LoopInfo::runOnFunction(Function &) {
517513 releaseMemory();
518 if (StableLoopInfo)
519 LI.Analyze(getAnalysis().getBase());
520 else
521 LI.Calculate(getAnalysis().getBase()); // Update
514 LI.Analyze(getAnalysis().getBase());
522515 return false;
523516 }
524517
161161 // Recurse through all subloops and all loops into LQ.
162162 static void addLoopIntoQueue(Loop *L, std::deque &LQ) {
163163 LQ.push_back(L);
164 for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
164 for (Loop::reverse_iterator I = L->rbegin(), E = L->rend(); I != E; ++I)
165165 addLoopIntoQueue(*I, LQ);
166166 }
167167
182182 // Collect inherited analysis from Module level pass manager.
183183 populateInheritedAnalysis(TPM->activeStack);
184184
185 // Populate Loop Queue
186 for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
185 // Populate the loop queue in reverse program order. There is no clear need to
186 // process sibling loops in either forward or reverse order. There may be some
187 // advantage in deleting uses in a later loop before optimizing the
188 // definitions in an earlier loop. If we find a clear reason to process in
189 // forward order, then a forward variant of LoopPassManager should be created.
190 for (LoopInfo::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I)
187191 addLoopIntoQueue(*I, LQ);
188192
189193 if (LQ.empty()) // No loops, skip calling finalizers
1717 #include "llvm/CodeGen/MachineDominators.h"
1818 #include "llvm/CodeGen/Passes.h"
1919 #include "llvm/Analysis/LoopInfoImpl.h"
20 #include "llvm/Support/CommandLine.h"
2120 #include "llvm/Support/Debug.h"
2221 using namespace llvm;
2322
2423 // Explicitly instantiate methods in LoopInfoImpl.h for MI-level Loops.
2524 template class llvm::LoopBase;
2625 template class llvm::LoopInfoBase;
27
28 static cl::opt
29 StableLoopInfo("stable-machine-loops", cl::Hidden, cl::init(false),
30 cl::desc("Compute a stable loop tree."));
3126
3227 char MachineLoopInfo::ID = 0;
3328 INITIALIZE_PASS_BEGIN(MachineLoopInfo, "machine-loops",
4035
4136 bool MachineLoopInfo::runOnMachineFunction(MachineFunction &) {
4237 releaseMemory();
43 if (StableLoopInfo)
44 LI.Analyze(getAnalysis().getBase());
45 else
46 LI.Calculate(getAnalysis().getBase()); // Update
38 LI.Analyze(getAnalysis().getBase());
4739 return false;
4840 }
4941
0 ; RUN: llc -asm-verbose=false -disable-branch-fold -disable-code-place -disable-tail-duplicate -march=x86-64 -mcpu=nehalem < %s | FileCheck %s
11 ; rdar://7236213
2
3 ; Xfailed now that scheduler 2-address hack is disabled a lea is generated.
4 ; The code isn't any worse though.
5 ; XFAIL: *
2 ;
3 ; The scheduler's 2-address hack has been disabled, so there is
4 ; currently no good guarantee that this test will pass until the
5 ; machine scheduler develops an equivalent heuristic.
66
77 ; CodeGen shouldn't require any lea instructions inside the marked loop.
88 ; It should properly set up post-increment uses and do coalescing for
11 ; CHECK: exit1:
22 ; CHECK: .lcssa =
33 ; CHECK: exit2:
4 ; CHECK: .lcssa2 =
4 ; CHECK: .lcssa1 =
55 ; CHECK: exit3:
6 ; CHECK-NOT: .lcssa1 =
6 ; CHECK-NOT: .lcssa
77
88 ; Test to ensure that when there are multiple exit blocks, PHI nodes are
99 ; only inserted by LCSSA when there is a use dominated by a given exit
3434 ; CHECK: loop_begin.us1: ; preds = %loop_begin.backedge.us5, %.split.split.us
3535 ; CHECK-NEXT: %var_val.us2 = load i32* %var
3636 ; CHECK-NEXT: switch i32 2, label %default.us-lcssa.us-lcssa.us [
37 ; CHECK-NEXT: i32 1, label %inc.us3
38 ; CHECK-NEXT: i32 2, label %dec.us4
37 ; CHECK-NEXT: i32 1, label %inc.us4
38 ; CHECK-NEXT: i32 2, label %dec.us3
3939 ; CHECK-NEXT: ]
4040
41 ; CHECK: dec.us4: ; preds = %loop_begin.us1
41 ; CHECK: dec.us3: ; preds = %loop_begin.us1
4242 ; CHECK-NEXT: call void @decf() noreturn nounwind
4343 ; CHECK-NEXT: br label %loop_begin.backedge.us5
4444
8080 dec:
8181 call void @decf() noreturn nounwind
8282 br label %loop_begin
83 default:
83 default:
8484 br label %loop_exit
8585 loop_exit:
8686 ret i32 0
1818 ; CHECK: switch i32 1, label %second_switch.us [
1919 ; CHECK-NEXT: i32 1, label %inc.us
2020
21 ; CHECK: inc.us: ; preds = %second_switch.us, %loop_begin.us
22 ; CHECK-NEXT: call void @incf() noreturn nounwind
23 ; CHECK-NEXT: br label %loop_begin.backedge.us
24
2521 ; CHECK: second_switch.us: ; preds = %loop_begin.us
2622 ; CHECK-NEXT: switch i32 %d, label %default.us [
2723 ; CHECK-NEXT: i32 1, label %inc.us
2824 ; CHECK-NEXT: ]
25
26 ; CHECK: inc.us: ; preds = %second_switch.us, %loop_begin.us
27 ; CHECK-NEXT: call void @incf() noreturn nounwind
28 ; CHECK-NEXT: br label %loop_begin.backedge.us
2929
3030 ; CHECK: .split: ; preds = %..split_crit_edge
3131 ; CHECK-NEXT: br label %loop_begin
7272 call void @incf() noreturn nounwind
7373 br label %loop_begin
7474
75 default:
75 default:
7676 br label %loop_begin
7777
7878 loop_exit:
2424 ; CHECK-NEXT: switch i32 1, label %second_switch.us.us [
2525 ; CHECK-NEXT: i32 1, label %inc.us.us
2626
27 ; CHECK: second_switch.us.us: ; preds = %loop_begin.us.us
28 ; CHECK-NEXT: switch i32 1, label %default.us.us [
29 ; CHECK-NEXT: i32 1, label %inc.us.us
30
2731 ; CHECK: inc.us.us: ; preds = %second_switch.us.us, %loop_begin.us.us
2832 ; CHECK-NEXT: call void @incf() noreturn nounwind
2933 ; CHECK-NEXT: br label %loop_begin.backedge.us.us
30
31 ; CHECK: second_switch.us.us: ; preds = %loop_begin.us.us
32 ; CHECK-NEXT: switch i32 1, label %default.us.us [
33 ; CHECK-NEXT: i32 1, label %inc.us.us
3434
3535 ; CHECK: .split.us.split: ; preds = %.split.us..split.us.split_crit_edge
3636 ; CHECK-NEXT: br label %loop_begin.us
4040 ; CHECK-NEXT: switch i32 1, label %second_switch.us [
4141 ; CHECK-NEXT: i32 1, label %inc.us
4242
43 ; CHECK: inc.us: ; preds = %second_switch.us.inc.us_crit_edge, %loop_begin.us
44 ; CHECK-NEXT: call void @incf() noreturn nounwind
45 ; CHECK-NEXT: br label %loop_begin.backedge.us
46
4743 ; CHECK: second_switch.us: ; preds = %loop_begin.us
4844 ; CHECK-NEXT: switch i32 %d, label %default.us [
4945 ; CHECK-NEXT: i32 1, label %second_switch.us.inc.us_crit_edge
5147
5248 ; CHECK: second_switch.us.inc.us_crit_edge: ; preds = %second_switch.us
5349 ; CHECK-NEXT: br i1 true, label %us-unreachable8, label %inc.us
50
51 ; CHECK: inc.us: ; preds = %second_switch.us.inc.us_crit_edge, %loop_begin.us
52 ; CHECK-NEXT: call void @incf() noreturn nounwind
53 ; CHECK-NEXT: br label %loop_begin.backedge.us
5454
5555 ; CHECK: .split: ; preds = %..split_crit_edge
5656 ; CHECK-NEXT: %3 = icmp eq i32 %d, 1
6464
6565 ; CHECK: loop_begin.us1: ; preds = %loop_begin.backedge.us6, %.split.split.us
6666 ; CHECK-NEXT: %var_val.us2 = load i32* %var
67 ; CHECK-NEXT: switch i32 %c, label %second_switch.us4 [
67 ; CHECK-NEXT: switch i32 %c, label %second_switch.us3 [
6868 ; CHECK-NEXT: i32 1, label %loop_begin.inc_crit_edge.us
6969 ; CHECK-NEXT: ]
7070
71 ; CHECK: inc.us3: ; preds = %loop_begin.inc_crit_edge.us, %second_switch.us4
71 ; CHECK: second_switch.us3: ; preds = %loop_begin.us1
72 ; CHECK-NEXT: switch i32 1, label %default.us5 [
73 ; CHECK-NEXT: i32 1, label %inc.us4
74 ; CHECK-NEXT: ]
75
76 ; CHECK: inc.us4: ; preds = %loop_begin.inc_crit_edge.us, %second_switch.us3
7277 ; CHECK-NEXT: call void @incf() noreturn nounwind
7378 ; CHECK-NEXT: br label %loop_begin.backedge.us6
7479
75 ; CHECK: second_switch.us4: ; preds = %loop_begin.us1
76 ; CHECK-NEXT: switch i32 1, label %default.us5 [
77 ; CHECK-NEXT: i32 1, label %inc.us3
78 ; CHECK-NEXT: ]
79
8080 ; CHECK: loop_begin.inc_crit_edge.us: ; preds = %loop_begin.us1
81 ; CHECK-NEXT: br i1 true, label %us-unreachable.us-lcssa.us, label %inc.us3
81 ; CHECK-NEXT: br i1 true, label %us-unreachable.us-lcssa.us, label %inc.us4
8282
8383 ; CHECK: .split.split: ; preds = %.split..split.split_crit_edge
8484 ; CHECK-NEXT: br label %loop_begin
126126 call void @incf() noreturn nounwind
127127 br label %loop_begin
128128
129 default:
129 default:
130130 br label %loop_begin
131131
132132 loop_exit: