llvm.org GIT mirror llvm / 86d5365
[PM] Introduce a devirtualization iteration layer for the new PM. This is an orthogonal and separated layer instead of being embedded inside the pass manager. While it adds a small amount of complexity, it is fairly minimal and the composability and control seems worth the cost. The logic for this ends up being nicely isolated and targeted. It should be easy to experiment with different iteration strategies wrapped around the CGSCC bottom-up walk using this kind of facility. The mechanism used to track devirtualization is the simplest one I came up with. I think it handles most of the cases the existing iteration machinery handles, but I haven't done a *very* in depth analysis. It does however match the basic intended semantics, and we can tweak or tune its exact behavior incrementally as necessary. One thing that we may want to revisit is freshly building the value handle set on each iteration. While I don't think this will be a significant cost (it is strictly fewer value handles but more churn of value handes than the old call graph), it is conceivable that we'll want a somewhat more clever tracking mechanism. My hope is to layer that on as a follow up patch with data supporting any implementation complexity it adds. This code also provides for a basic count heuristic: if the number of indirect calls decreases and the number of direct calls increases for a given function in the SCC, we assume devirtualization is responsible. This matches the heuristics currently used in the legacy pass manager. Differential Revision: https://reviews.llvm.org/D23114 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@290665 91177308-0d34-0410-b5e6-96231b3b80d8 Chandler Carruth 2 years ago
5 changed file(s) with 384 addition(s) and 94 deletion(s). Raw diff Collapse all Expand all
9090
9191 #include "llvm/ADT/PriorityWorklist.h"
9292 #include "llvm/Analysis/LazyCallGraph.h"
93 #include "llvm/IR/CallSite.h"
94 #include "llvm/IR/InstIterator.h"
9395 #include "llvm/IR/PassManager.h"
96 #include "llvm/IR/ValueHandle.h"
9497
9598 namespace llvm {
9699
606609 return CGSCCToFunctionPassAdaptor(std::move(Pass),
607610 DebugLogging);
608611 }
612
613 /// A helper that repeats an SCC pass each time an indirect call is refined to
614 /// a direct call by that pass.
615 ///
616 /// While the CGSCC pass manager works to re-visit SCCs and RefSCCs as they
617 /// change shape, we may also want to repeat an SCC pass if it simply refines
618 /// an indirect call to a direct call, even if doing so does not alter the
619 /// shape of the graph. Note that this only pertains to direct calls to
620 /// functions where IPO across the SCC may be able to compute more precise
621 /// results. For intrinsics, we assume scalar optimizations already can fully
622 /// reason about them.
623 ///
624 /// This repetition has the potential to be very large however, as each one
625 /// might refine a single call site. As a consequence, in practice we use an
626 /// upper bound on the number of repetitions to limit things.
627 template
628 class DevirtSCCRepeatedPass
629 : public PassInfoMixin> {
630 public:
631 explicit DevirtSCCRepeatedPass(PassT Pass, int MaxIterations,
632 bool DebugLogging = false)
633 : Pass(std::move(Pass)), MaxIterations(MaxIterations),
634 DebugLogging(DebugLogging) {}
635
636 /// Runs the wrapped pass up to \c MaxIterations on the SCC, iterating
637 /// whenever an indirect call is refined.
638 PreservedAnalyses run(LazyCallGraph::SCC &InitialC, CGSCCAnalysisManager &AM,
639 LazyCallGraph &CG, CGSCCUpdateResult &UR) {
640 PreservedAnalyses PA = PreservedAnalyses::all();
641
642 // The SCC may be refined while we are running passes over it, so set up
643 // a pointer that we can update.
644 LazyCallGraph::SCC *C = &InitialC;
645
646 // Collect value handles for all of the indirect call sites.
647 SmallVector CallHandles;
648
649 // Struct to track the counts of direct and indirect calls in each function
650 // of the SCC.
651 struct CallCount {
652 int Direct;
653 int Indirect;
654 };
655
656 // Put value handles on all of the indirect calls and return the number of
657 // direct calls for each function in the SCC.
658 auto ScanSCC = [](LazyCallGraph::SCC &C,
659 SmallVectorImpl &CallHandles) {
660 assert(CallHandles.empty() && "Must start with a clear set of handles.");
661
662 SmallVector CallCounts;
663 for (LazyCallGraph::Node &N : C) {
664 CallCounts.push_back({0, 0});
665 CallCount &Count = CallCounts.back();
666 for (Instruction &I : instructions(N.getFunction()))
667 if (auto CS = CallSite(&I)) {
668 if (CS.getCalledFunction()) {
669 ++Count.Direct;
670 } else {
671 ++Count.Indirect;
672 CallHandles.push_back(WeakVH(&I));
673 }
674 }
675 }
676
677 return CallCounts;
678 };
679
680 // Populate the initial call handles and get the initial call counts.
681 auto CallCounts = ScanSCC(*C, CallHandles);
682
683 for (int Iteration = 0;; ++Iteration) {
684 PreservedAnalyses PassPA = Pass.run(*C, AM, CG, UR);
685
686 // If the SCC structure has changed, bail immediately and let the outer
687 // CGSCC layer handle any iteration to reflect the refined structure.
688 if (UR.UpdatedC && UR.UpdatedC != C) {
689 PA.intersect(std::move(PassPA));
690 break;
691 }
692
693 // Check that we didn't miss any update scenario.
694 assert(!UR.InvalidatedSCCs.count(C) && "Processing an invalid SCC!");
695 assert(C->begin() != C->end() && "Cannot have an empty SCC!");
696 assert((int)CallCounts.size() == C->size() &&
697 "Cannot have changed the size of the SCC!");
698
699 // Check whether any of the handles were devirtualized.
700 auto IsDevirtualizedHandle = [&](WeakVH &CallH) {
701 if (!CallH)
702 return false;
703 auto CS = CallSite(CallH);
704 if (!CS)
705 return false;
706
707 // If the call is still indirect, leave it alone.
708 Function *F = CS.getCalledFunction();
709 if (!F)
710 return false;
711
712 if (DebugLogging)
713 dbgs() << "Found devirutalized call from "
714 << CS.getParent()->getParent()->getName() << " to "
715 << F->getName() << "\n";
716
717 // We now have a direct call where previously we had an indirect call,
718 // so iterate to process this devirtualization site.
719 return true;
720 };
721 bool Devirt = any_of(CallHandles, IsDevirtualizedHandle);
722
723 // Rescan to build up a new set of handles and count how many direct
724 // calls remain. If we decide to iterate, this also sets up the input to
725 // the next iteration.
726 CallHandles.clear();
727 auto NewCallCounts = ScanSCC(*C, CallHandles);
728
729 // If we haven't found an explicit devirtualization already see if we
730 // have decreased the number of indirect calls and increased the number
731 // of direct calls for any function in the SCC. This can be fooled by all
732 // manner of transformations such as DCE and other things, but seems to
733 // work well in practice.
734 if (!Devirt)
735 for (int i = 0, Size = C->size(); i < Size; ++i)
736 if (CallCounts[i].Indirect > NewCallCounts[i].Indirect &&
737 CallCounts[i].Direct < NewCallCounts[i].Direct) {
738 Devirt = true;
739 break;
740 }
741
742 if (!Devirt) {
743 PA.intersect(std::move(PassPA));
744 break;
745 }
746
747 // Otherwise, if we've already hit our max, we're done.
748 if (Iteration >= MaxIterations) {
749 if (DebugLogging)
750 dbgs() << "Found another devirtualization after hitting the max "
751 "number of repetitions ("
752 << MaxIterations << ") on SCC: " << *C << "\n";
753 PA.intersect(std::move(PassPA));
754 break;
755 }
756
757 if (DebugLogging)
758 dbgs() << "Repeating an SCC pass after finding a devirtualization in: "
759 << *C << "\n";
760
761 // Move over the new call counts in preparation for iterating.
762 CallCounts = std::move(NewCallCounts);
763
764 // Update the analysis manager with each run and intersect the total set
765 // of preserved analyses so we're ready to iterate.
766 AM.invalidate(*C, PassPA);
767 PA.intersect(std::move(PassPA));
768 }
769
770 // Note that we don't add any preserved entries here unlike a more normal
771 // "pass manager" because we only handle invalidation *between* iterations,
772 // not after the last iteration.
773 return PA;
774 }
775
776 private:
777 PassT Pass;
778 int MaxIterations;
779 bool DebugLogging;
780 };
781
782 /// \brief A function to deduce a function pass type and wrap it in the
783 /// templated adaptor.
784 template
785 DevirtSCCRepeatedPass
786 createDevirtSCCRepeatedPass(PassT Pass, int MaxIterations,
787 bool DebugLogging = false) {
788 return DevirtSCCRepeatedPass(std::move(Pass), MaxIterations,
789 DebugLogging);
609790 }
791 }
610792
611793 #endif
591591 return Count;
592592 }
593593
594 static Optional parseDevirtPassName(StringRef Name) {
595 if (!Name.consume_front("devirt<") || !Name.consume_back(">"))
596 return None;
597 int Count;
598 if (Name.getAsInteger(0, Count) || Count <= 0)
599 return None;
600 return Count;
601 }
602
594603 static bool isModulePassName(StringRef Name) {
595604 // Manually handle aliases for pre-configured pipeline fragments.
596605 if (Name.startswith("default") || Name.startswith("lto"))
628637
629638 // Explicitly handle custom-parsed pass names.
630639 if (parseRepeatPassName(Name))
640 return true;
641 if (parseDevirtPassName(Name))
631642 return true;
632643
633644 #define CGSCC_PASS(NAME, CREATE_PASS) \
870881 DebugLogging))
871882 return false;
872883 CGPM.addPass(createRepeatedPass(*Count, std::move(NestedCGPM)));
884 return true;
885 }
886 if (auto MaxRepetitions = parseDevirtPassName(Name)) {
887 CGSCCPassManager NestedCGPM(DebugLogging);
888 if (!parseCGSCCPassPipeline(NestedCGPM, InnerPipeline, VerifyEachPass,
889 DebugLogging))
890 return false;
891 CGPM.addPass(createDevirtSCCRepeatedPass(std::move(NestedCGPM),
892 *MaxRepetitions, DebugLogging));
873893 return true;
874894 }
875895 // Normal passes can't have pipelines.
0 ; The CGSCC pass manager includes an SCC iteration utility that tracks indirect
1 ; calls that are turned into direct calls (devirtualization) and re-visits the
2 ; SCC to expose those calls to the SCC-based IPO passes. We trigger
3 ; devirtualization here with GVN which forwards a store through a load and to
4 ; an indirect call.
5 ;
6 ; RUN: opt -aa-pipeline=basic-aa -passes='cgscc(function-attrs,function(gvn,instcombine))' -S < %s | FileCheck %s --check-prefix=CHECK --check-prefix=BEFORE
7 ; RUN: opt -aa-pipeline=basic-aa -passes='cgscc(devirt<1>(function-attrs,function(gvn,instcombine)))' -S < %s | FileCheck %s --check-prefix=CHECK --check-prefix=AFTER --check-prefix=AFTER1
8 ; RUN: opt -aa-pipeline=basic-aa -passes='cgscc(devirt<2>(function-attrs,function(gvn,instcombine)))' -S < %s | FileCheck %s --check-prefix=CHECK --check-prefix=AFTER --check-prefix=AFTER2
9
10 declare void @readnone() readnone
11 ; CHECK: Function Attrs: readnone
12 ; CHECK: declare void @readnone()
13
14 declare void @unknown()
15 ; CHECK-NOT: Function Attrs
16 ; CHECK: declare void @unknown()
17
18 ; The @test1 function checks that when we refine an indirect call to a direct
19 ; call we revisit the SCC passes to reflect the more precise information. This
20 ; is the basic functionality.
21
22 define void @test1() {
23 ; BEFORE-NOT: Function Attrs
24 ; AFTER: Function Attrs: readnone
25 ; CHECK: define void @test1()
26 entry:
27 %fptr = alloca void ()*
28 store void ()* @readnone, void ()** %fptr
29 %f = load void ()*, void ()** %fptr
30 call void %f()
31 ret void
32 }
33
34 ; The @test2_* functions check that when we need multiple (in this case 2)
35 ; repetitions to compute some state that is incrementally exposed with each
36 ; one, the limit on repetitions is enforced. So we make progress with
37 ; one repetition but not as much as with three.
38 ;
39 ; This is somewhat awkward to test because we have to contrive to have a state
40 ; repetition triggered and observed with very few passes. The technique here
41 ; is to have one indirect call that can only be resolved when the entire SCC is
42 ; deduced as readonly, and mark that indirect call at the call site as readonly
43 ; to make that possible. This forces us to first deduce readonly, then
44 ; devirtualize again, and then deduce readnone.
45
46 declare void @readnone_with_arg(void ()**) readnone
47 ; CHECK: Function Attrs: readnone
48 ; CHECK: declare void @readnone_with_arg(void ()**)
49
50 define void @test2_a(void ()** %ignore) {
51 ; BEFORE-NOT: Function Attrs
52 ; AFTER1: Function Attrs: readonly
53 ; AFTER2: Function Attrs: readnone
54 ; BEFORE: define void @test2_a(void ()** %ignore)
55 ; AFTER: define void @test2_a(void ()** readnone %ignore)
56 entry:
57 %f1ptr = alloca void (void ()**)*
58 store void (void ()**)* @readnone_with_arg, void (void ()**)** %f1ptr
59 %f1 = load void (void ()**)*, void (void ()**)** %f1ptr
60 ; This indirect call is the first to be resolved, allowing us to deduce
61 ; readonly but not (yet) readnone.
62 call void %f1(void ()** %ignore)
63 ; CHECK: call void @readnone_with_arg(void ()** %ignore)
64
65 ; Bogus call to test2_b to make this a cycle.
66 call void @test2_b()
67
68 ret void
69 }
70
71 define void @test2_b() {
72 ; BEFORE-NOT: Function Attrs
73 ; AFTER1: Function Attrs: readonly
74 ; AFTER2: Function Attrs: readnone
75 ; CHECK: define void @test2_b()
76 entry:
77 %f2ptr = alloca void ()*
78 store void ()* @readnone, void ()** %f2ptr
79 ; Call the other function here to prevent forwarding until the SCC has had
80 ; function attrs deduced.
81 call void @test2_a(void ()** %f2ptr)
82
83 %f2 = load void ()*, void ()** %f2ptr
84 ; This is the second indirect call to be resolved, and can only be resolved
85 ; after we deduce 'readonly' for the rest of the SCC. Once it is
86 ; devirtualized, we can deduce readnone for the SCC.
87 call void %f2() readonly
88 ; BEFORE: call void %f2()
89 ; AFTER: call void @readnone()
90
91 ret void
92 }
93
94 declare i8* @memcpy(i8*, i8*, i64)
95 ; CHECK-NOT: Function Attrs
96 ; CHECK: declare i8* @memcpy(i8*, i8*, i64)
97
98 ; The @test3 function checks that when we refine an indirect call to an
99 ; intrinsic we still revisit the SCC pass. This also covers cases where the
100 ; value handle itself doesn't persist due to the nature of how instcombine
101 ; creates the memcpy intrinsic call, and we rely on the count of indirect calls
102 ; decreasing and the count of direct calls increasing.
103 define void @test3(i8* %src, i8* %dest, i64 %size) {
104 ; CHECK-NOT: Function Attrs
105 ; BEFORE: define void @test3(i8* %src, i8* %dest, i64 %size)
106 ; AFTER: define void @test3(i8* nocapture readonly %src, i8* nocapture %dest, i64 %size)
107 %fptr = alloca i8* (i8*, i8*, i64)*
108 store i8* (i8*, i8*, i64)* @memcpy, i8* (i8*, i8*, i64)** %fptr
109 %f = load i8* (i8*, i8*, i64)*, i8* (i8*, i8*, i64)** %fptr
110 call i8* %f(i8* %dest, i8* %src, i64 %size)
111 ; CHECK: call void @llvm.memcpy
112 ret void
113 }
0 ; Make sure that even without some external devirtualization iteration tool,
1 ; the CGSCC pass manager correctly observes and re-visits SCCs that change
2 ; structure due to devirtualization. We trigger devirtualization here with GVN
3 ; which forwards a store through a load and to an indirect call.
4 ;
5 ; RUN: opt -aa-pipeline=basic-aa -passes='cgscc(function-attrs)' -S < %s | FileCheck %s --check-prefix=BEFORE
16 ; RUN: opt -aa-pipeline=basic-aa -passes='cgscc(function-attrs,function(gvn))' -S < %s | FileCheck %s --check-prefix=AFTER
27 ;
38 ; Also check that adding an extra CGSCC pass after the function update but
49 ; without requiring the outer manager to iterate doesn't break any invariant.
5 ; RUN: opt -aa-pipeline=basic-aa -passes='cgscc(function-attrs,function(gvn),function-attrs)' -S < %s | FileCheck %s --check-prefix=AFTER2
10 ; RUN: opt -aa-pipeline=basic-aa -passes='cgscc(function-attrs,function(gvn),function-attrs)' -S < %s | FileCheck %s --check-prefix=AFTER
611
712 declare void @readnone() readnone
813 declare void @unknown()
914
10 ; The @test1_* functions check that when we refine an indirect call to a direct
11 ; call, even if it doesn't change the call graph structure, we revisit the SCC
12 ; passes to reflect the more precise information.
13 ; FIXME: Currently, this isn't implemented in the new pass manager and so we
14 ; only get this with AFTER2, not with AFTER.
15 ; The @test1_* checks that if we refine an indirect call to a direct call and
16 ; in the process change the very structure of the call graph we also revisit
17 ; that component of the graph and do so in an up-to-date fashion.
1518
16 ; BEFORE: define void @test1_a() {
17 ; AFTER: define void @test1_a() {
18 ; AFTER2: define void @test1_a() {
19 define void @test1_a() {
19 ; BEFORE: define void @test1_a1() {
20 ; AFTER: define void @test1_a1() {
21 define void @test1_a1() {
2022 %fptr = alloca void()*
21 store void()* @unknown, void()** %fptr
23 store void()* @test1_b2, void()** %fptr
24 store void()* @test1_b1, void()** %fptr
2225 %f = load void()*, void()** %fptr
2326 call void %f()
2427 ret void
2528 }
2629
27 ; BEFORE: define void @test1_b() {
28 ; AFTER: define void @test1_b() {
29 ; AFTER2: define void @test1_b() #0 {
30 define void @test1_b() {
30 ; BEFORE: define void @test1_b1() {
31 ; AFTER: define void @test1_b1() {
32 define void @test1_b1() {
33 call void @unknown()
34 call void @test1_a1()
35 ret void
36 }
37
38 ; BEFORE: define void @test1_a2() {
39 ; AFTER: define void @test1_a2() #0 {
40 define void @test1_a2() {
3141 %fptr = alloca void()*
42 store void()* @test1_b1, void()** %fptr
43 store void()* @test1_b2, void()** %fptr
44 %f = load void()*, void()** %fptr
45 call void %f()
46 ret void
47 }
48
49 ; BEFORE: define void @test1_b2() {
50 ; AFTER: define void @test1_b2() #0 {
51 define void @test1_b2() {
52 call void @readnone()
53 call void @test1_a2()
54 ret void
55 }
56
57
58 ; The @test2_* set of functions exercise a case where running function passes
59 ; introduces a new post-order relationship that was not present originally and
60 ; makes sure we walk across the SCCs in that order.
61
62 ; CHECK: define void @test2_a() {
63 define void @test2_a() {
64 call void @test2_b1()
65 call void @test2_b2()
66 call void @test2_b3()
67 call void @unknown()
68 ret void
69 }
70
71 ; CHECK: define void @test2_b1() #0 {
72 define void @test2_b1() {
73 %fptr = alloca void()*
74 store void()* @test2_a, void()** %fptr
3275 store void()* @readnone, void()** %fptr
3376 %f = load void()*, void()** %fptr
3477 call void %f()
3578 ret void
3679 }
3780
38 ; The @test2_* checks that if we refine an indirect call to a direct call and
39 ; in the process change the very structure of the call graph we also revisit
40 ; that component of the graph and do so in an up-to-date fashion.
41
42 ; BEFORE: define void @test2_a1() {
43 ; AFTER: define void @test2_a1() {
44 ; AFTER2: define void @test2_a1() {
45 define void @test2_a1() {
81 ; CHECK: define void @test2_b2() #0 {
82 define void @test2_b2() {
4683 %fptr = alloca void()*
84 store void()* @test2_a, void()** %fptr
4785 store void()* @test2_b2, void()** %fptr
86 store void()* @test2_b3, void()** %fptr
4887 store void()* @test2_b1, void()** %fptr
4988 %f = load void()*, void()** %fptr
5089 call void %f()
5190 ret void
5291 }
5392
54 ; BEFORE: define void @test2_b1() {
55 ; AFTER: define void @test2_b1() {
56 ; AFTER2: define void @test2_b1() {
57 define void @test2_b1() {
58 call void @unknown()
59 call void @test2_a1()
60 ret void
61 }
62
63 ; BEFORE: define void @test2_a2() {
64 ; AFTER: define void @test2_a2() #0 {
65 ; AFTER2: define void @test2_a2() #0 {
66 define void @test2_a2() {
93 ; CHECK: define void @test2_b3() #0 {
94 define void @test2_b3() {
6795 %fptr = alloca void()*
96 store void()* @test2_a, void()** %fptr
97 store void()* @test2_b2, void()** %fptr
98 store void()* @test2_b3, void()** %fptr
6899 store void()* @test2_b1, void()** %fptr
69 store void()* @test2_b2, void()** %fptr
70 %f = load void()*, void()** %fptr
71 call void %f()
72 ret void
73 }
74
75 ; BEFORE: define void @test2_b2() {
76 ; AFTER: define void @test2_b2() #0 {
77 ; AFTER2: define void @test2_b2() #0 {
78 define void @test2_b2() {
79 call void @readnone()
80 call void @test2_a2()
81 ret void
82 }
83
84
85 ; The @test3_* set of functions exercise a case where running function passes
86 ; introduces a new post-order relationship that was not present originally and
87 ; makes sure we walk across the SCCs in that order.
88
89 ; CHECK: define void @test3_a() {
90 define void @test3_a() {
91 call void @test3_b1()
92 call void @test3_b2()
93 call void @test3_b3()
94 call void @unknown()
95 ret void
96 }
97
98 ; CHECK: define void @test3_b1() #0 {
99 define void @test3_b1() {
100 %fptr = alloca void()*
101 store void()* @test3_a, void()** %fptr
102 store void()* @readnone, void()** %fptr
103 %f = load void()*, void()** %fptr
104 call void %f()
105 ret void
106 }
107
108 ; CHECK: define void @test3_b2() #0 {
109 define void @test3_b2() {
110 %fptr = alloca void()*
111 store void()* @test3_a, void()** %fptr
112 store void()* @test3_b2, void()** %fptr
113 store void()* @test3_b3, void()** %fptr
114 store void()* @test3_b1, void()** %fptr
115 %f = load void()*, void()** %fptr
116 call void %f()
117 ret void
118 }
119
120 ; CHECK: define void @test3_b3() #0 {
121 define void @test3_b3() {
122 %fptr = alloca void()*
123 store void()* @test3_a, void()** %fptr
124 store void()* @test3_b2, void()** %fptr
125 store void()* @test3_b3, void()** %fptr
126 store void()* @test3_b1, void()** %fptr
127100 %f = load void()*, void()** %fptr
128101 call void %f()
129102 ret void
0 ; RUN: opt < %s -inline -S | FileCheck %s
1 ; RUN: opt < %s -passes='cgscc(devirt<4>(inline))' -S | FileCheck %s
12 ; PR4834
23
34 define i32 @test1() {