llvm.org GIT mirror llvm / 2aaad36
[PM/LoopUnswitch] Fix how the cloned loops are handled when updating analyses. Summary: I noticed this issue because we didn't put the primary cloned loop into the `NonChildClonedLoops` vector and so never iterated on it. Once I fixed that, it made it clear why I had to do a really complicated and unnecesasry dance when updating the loops to remain in canonical form -- I was unwittingly working around the fact that the primary cloned loop wasn't in the expected list of cloned loops. Doh! Now that we include it in this vector, we don't need to return it and we can consolidate the update logic as we correctly have a single place where it can be handled. I've just added a test for the iteration order aspect as every time I changed the update logic partially or incorrectly here, an existing test failed and caught it so that seems well covered (which is also evidenced by the extensive working around of this missing update). Reviewers: asbirlea, sanjoy Subscribers: mcrosier, hiraditya, llvm-commits Differential Revision: https://reviews.llvm.org/D47647 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@333811 91177308-0d34-0410-b5e6-96231b3b80d8 Chandler Carruth 1 year, 3 months ago
2 changed file(s) with 132 addition(s) and 44 deletion(s). Raw diff Collapse all Expand all
791791 /// original loop, multiple cloned sibling loops may be created. All of them
792792 /// are returned so that the newly introduced loop nest roots can be
793793 /// identified.
794 static Loop *buildClonedLoops(Loop &OrigL, ArrayRef ExitBlocks,
795 const ValueToValueMapTy &VMap, LoopInfo &LI,
796 SmallVectorImpl &NonChildClonedLoops) {
794 static void buildClonedLoops(Loop &OrigL, ArrayRef ExitBlocks,
795 const ValueToValueMapTy &VMap, LoopInfo &LI,
796 SmallVectorImpl &NonChildClonedLoops) {
797797 Loop *ClonedL = nullptr;
798798
799799 auto *OrigPH = OrigL.getLoopPreheader();
886886 } else {
887887 LI.addTopLevelLoop(ClonedL);
888888 }
889 NonChildClonedLoops.push_back(ClonedL);
889890
890891 ClonedL->reserveBlocks(BlocksInClonedLoop.size());
891892 // We don't want to just add the cloned loop blocks based on how we
10391040 NonChildClonedLoops.push_back(cloneLoopNest(
10401041 *ChildL, ExitLoopMap.lookup(ClonedChildHeader), VMap, LI));
10411042 }
1042
1043 // Return the main cloned loop if any.
1044 return ClonedL;
10451043 }
10461044
10471045 static void
16071605 // different from the original structure due to the simplified CFG. This also
16081606 // handles inserting all the cloned blocks into the correct loops.
16091607 SmallVector NonChildClonedLoops;
1610 Loop *ClonedL =
1611 buildClonedLoops(L, ExitBlocks, VMap, LI, NonChildClonedLoops);
1608 buildClonedLoops(L, ExitBlocks, VMap, LI, NonChildClonedLoops);
16121609
16131610 // Delete anything that was made dead in the original loop due to
16141611 // unswitching.
16371634 // also need to cover any intervening loops. We add all of these loops to
16381635 // a list and sort them by loop depth to achieve this without updating
16391636 // unnecessary loops.
1640 auto UpdateLCSSA = [&](Loop &UpdateL) {
1637 auto UpdateLoop = [&](Loop &UpdateL) {
16411638 #ifndef NDEBUG
16421639 UpdateL.verifyLoop();
16431640 for (Loop *ChildL : UpdateL) {
16461643 "Perturbed a child loop's LCSSA form!");
16471644 }
16481645 #endif
1646 // First build LCSSA for this loop so that we can preserve it when
1647 // forming dedicated exits. We don't want to perturb some other loop's
1648 // LCSSA while doing that CFG edit.
16491649 formLCSSA(UpdateL, DT, &LI, nullptr);
1650
1651 // For loops reached by this loop's original exit blocks we may
1652 // introduced new, non-dedicated exits. At least try to re-form dedicated
1653 // exits for these loops. This may fail if they couldn't have dedicated
1654 // exits to start with.
1655 formDedicatedExitBlocks(&UpdateL, &DT, &LI, /*PreserveLCSSA*/ true);
16501656 };
16511657
16521658 // For non-child cloned loops and hoisted loops, we just need to update LCSSA
16531659 // and we can do it in any order as they don't nest relative to each other.
1654 for (Loop *UpdatedL : llvm::concat(NonChildClonedLoops, HoistedLoops))
1655 UpdateLCSSA(*UpdatedL);
1660 //
1661 // Also check if any of the loops we have updated have become top-level loops
1662 // as that will necessitate widening the outer loop scope.
1663 for (Loop *UpdatedL :
1664 llvm::concat(NonChildClonedLoops, HoistedLoops)) {
1665 UpdateLoop(*UpdatedL);
1666 if (!UpdatedL->getParentLoop())
1667 OuterExitL = nullptr;
1668 }
1669 if (IsStillLoop) {
1670 UpdateLoop(L);
1671 if (!L.getParentLoop())
1672 OuterExitL = nullptr;
1673 }
16561674
16571675 // If the original loop had exit blocks, walk up through the outer most loop
16581676 // of those exit blocks to update LCSSA and form updated dedicated exits.
1659 if (OuterExitL != &L) {
1660 SmallVector OuterLoops;
1661 // We start with the cloned loop and the current loop if they are loops and
1662 // move toward OuterExitL. Also, if either the cloned loop or the current
1663 // loop have become top level loops we need to walk all the way out.
1664 if (ClonedL) {
1665 OuterLoops.push_back(ClonedL);
1666 if (!ClonedL->getParentLoop())
1667 OuterExitL = nullptr;
1668 }
1669 if (IsStillLoop) {
1670 OuterLoops.push_back(&L);
1671 if (!L.getParentLoop())
1672 OuterExitL = nullptr;
1673 }
1674 // Grab all of the enclosing loops now.
1677 if (OuterExitL != &L)
16751678 for (Loop *OuterL = ParentL; OuterL != OuterExitL;
16761679 OuterL = OuterL->getParentLoop())
1677 OuterLoops.push_back(OuterL);
1678
1679 // Finally, update our list of outer loops. This is nicely ordered to work
1680 // inside-out.
1681 for (Loop *OuterL : OuterLoops) {
1682 // First build LCSSA for this loop so that we can preserve it when
1683 // forming dedicated exits. We don't want to perturb some other loop's
1684 // LCSSA while doing that CFG edit.
1685 UpdateLCSSA(*OuterL);
1686
1687 // For loops reached by this loop's original exit blocks we may
1688 // introduced new, non-dedicated exits. At least try to re-form dedicated
1689 // exits for these loops. This may fail if they couldn't have dedicated
1690 // exits to start with.
1691 formDedicatedExitBlocks(OuterL, &DT, &LI, /*PreserveLCSSA*/ true);
1692 }
1693 }
1680 UpdateLoop(*OuterL);
16941681
16951682 #ifndef NDEBUG
16961683 // Verify the entire loop structure to catch any incorrect updates before we
25612561 exit:
25622562 ret void
25632563 }
2564
2565 ; Non-trivial loop unswitching where there are two invariant conditions, but the
2566 ; second one is only in the cloned copy of the loop after unswitching.
2567 define i32 @test24(i1* %ptr, i1 %cond1, i1 %cond2) {
2568 ; CHECK-LABEL: @test24(
2569 entry:
2570 br label %loop_begin
2571 ; CHECK-NEXT: entry:
2572 ; CHECK-NEXT: br i1 %cond1, label %entry.split.us, label %entry.split
2573
2574 loop_begin:
2575 br i1 %cond1, label %loop_a, label %loop_b
2576
2577 loop_a:
2578 br i1 %cond2, label %loop_a_a, label %loop_a_c
2579 ; The second unswitched condition.
2580 ;
2581 ; CHECK: entry.split.us:
2582 ; CHECK-NEXT: br i1 %cond2, label %entry.split.us.split.us, label %entry.split.us.split
2583
2584 loop_a_a:
2585 call void @a()
2586 br label %latch
2587 ; The 'loop_a_a' unswitched loop.
2588 ;
2589 ; CHECK: entry.split.us.split.us:
2590 ; CHECK-NEXT: br label %loop_begin.us.us
2591 ;
2592 ; CHECK: loop_begin.us.us:
2593 ; CHECK-NEXT: br label %loop_a.us.us
2594 ;
2595 ; CHECK: loop_a.us.us:
2596 ; CHECK-NEXT: br label %loop_a_a.us.us
2597 ;
2598 ; CHECK: loop_a_a.us.us:
2599 ; CHECK-NEXT: call void @a()
2600 ; CHECK-NEXT: br label %latch.us.us
2601 ;
2602 ; CHECK: latch.us.us:
2603 ; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
2604 ; CHECK-NEXT: br i1 %[[V]], label %loop_begin.us.us, label %loop_exit.split.us.split.us
2605 ;
2606 ; CHECK: loop_exit.split.us.split.us:
2607 ; CHECK-NEXT: br label %loop_exit.split
2608
2609 loop_a_c:
2610 call void @c()
2611 br label %latch
2612 ; The 'loop_a_c' unswitched loop.
2613 ;
2614 ; CHECK: entry.split.us.split:
2615 ; CHECK-NEXT: br label %loop_begin.us
2616 ;
2617 ; CHECK: loop_begin.us:
2618 ; CHECK-NEXT: br label %loop_a.us
2619 ;
2620 ; CHECK: loop_a.us:
2621 ; CHECK-NEXT: br label %loop_a_c.us
2622 ;
2623 ; CHECK: loop_a_c.us:
2624 ; CHECK-NEXT: call void @c()
2625 ; CHECK-NEXT: br label %latch
2626 ;
2627 ; CHECK: latch.us:
2628 ; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
2629 ; CHECK-NEXT: br i1 %[[V]], label %loop_begin.us, label %loop_exit.split.us.split
2630 ;
2631 ; CHECK: loop_exit.split.us.split:
2632 ; CHECK-NEXT: br label %loop_exit.split
2633
2634 loop_b:
2635 call void @b()
2636 br label %latch
2637 ; The 'loop_b' unswitched loop.
2638 ;
2639 ; CHECK: entry.split:
2640 ; CHECK-NEXT: br label %loop_begin
2641 ;
2642 ; CHECK: loop_begin:
2643 ; CHECK-NEXT: br label %loop_b
2644 ;
2645 ; CHECK: loop_b:
2646 ; CHECK-NEXT: call void @b()
2647 ; CHECK-NEXT: br label %latch
2648 ;
2649 ; CHECK: latch:
2650 ; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
2651 ; CHECK-NEXT: br i1 %[[V]], label %loop_begin, label %loop_exit.split
2652 ;
2653 ; CHECK: loop_exit.split:
2654 ; CHECK-NEXT: br label %loop_exit
2655
2656 latch:
2657 %v = load i1, i1* %ptr
2658 br i1 %v, label %loop_begin, label %loop_exit
2659
2660 loop_exit:
2661 ret i32 0
2662 ; CHECK: loop_exit:
2663 ; CHECK-NEXT: ret
2664 }