llvm.org GIT mirror llvm / 8e4472e
[INLINER] allow inlining of blockaddresses if sole uses are callbrs Summary: It was supposed that Ref LazyCallGraph::Edge's were being inserted by inlining, but that doesn't seem to be the case. Instead, it seems that there was no test for a blockaddress Constant in an instruction that referenced the function that contained the instruction. Ex: ``` define void @f() { %1 = alloca i8*, align 8 2: store i8* blockaddress(@f, %2), i8** %1, align 8 ret void } ``` When iterating blockaddresses, do not add the function they refer to back to the worklist if the blockaddress is referring to the contained function (as opposed to an external function). Because blockaddress has sligtly different semantics than GNU C's address of labels, there are 3 cases that can occur with blockaddress, where only 1 can happen in GNU C due to C's scoping rules: * blockaddress is within the function it refers to (possible in GNU C). * blockaddress is within a different function than the one it refers to (not possible in GNU C). * blockaddress is used in to declare a global (not possible in GNU C). The second case is tested in: ``` $ ./llvm/build/unittests/Analysis/AnalysisTests \ --gtest_filter=LazyCallGraphTest.HandleBlockAddress ``` This patch adjusts the iteration of blockaddresses in LazyCallGraph::visitReferences to not revisit the blockaddresses function in the first case. The Linux kernel contains code that's not semantically valid at -O0; specifically code passed to asm goto. It requires that asm goto be inline-able. This patch conservatively does not attempt to handle the more general case of inlining blockaddresses that have non-callbr users (pr/39560). https://bugs.llvm.org/show_bug.cgi?id=39560 https://bugs.llvm.org/show_bug.cgi?id=40722 https://github.com/ClangBuiltLinux/linux/issues/6 https://reviews.llvm.org/rL212077 Reviewers: jyknight, eli.friedman, chandlerc Reviewed By: chandlerc Subscribers: george.burgess.iv, nathanchance, mgorny, craig.topper, mengxu.gatech, void, mehdi_amini, E5ten, chandlerc, efriedma, eraman, hiraditya, haicheng, pirama, llvm-commits, srhines Tags: #llvm Differential Revision: https://reviews.llvm.org/D58260 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@361173 91177308-0d34-0410-b5e6-96231b3b80d8 Nick Desaulniers 2 months ago
5 changed file(s) with 199 addition(s) and 15 deletion(s). Raw diff Collapse all Expand all
3737 #include "llvm/ADT/DenseMap.h"
3838 #include "llvm/ADT/Optional.h"
3939 #include "llvm/ADT/PointerIntPair.h"
40 #include "llvm/ADT/STLExtras.h"
4041 #include "llvm/ADT/SetVector.h"
4142 #include "llvm/ADT/SmallPtrSet.h"
4243 #include "llvm/ADT/SmallVector.h"
10811082 continue;
10821083 }
10831084
1085 // The blockaddress constant expression is a weird special case, we can't
1086 // generically walk its operands the way we do for all other constants.
10841087 if (BlockAddress *BA = dyn_cast(C)) {
1085 // The blockaddress constant expression is a weird special case, we
1086 // can't generically walk its operands the way we do for all other
1087 // constants.
1088 if (Visited.insert(BA->getFunction()).second)
1089 Worklist.push_back(BA->getFunction());
1088 // If we've already visited the function referred to by the block
1089 // address, we don't need to revisit it.
1090 if (Visited.count(BA->getFunction()))
1091 continue;
1092
1093 // If all of the blockaddress' users are instructions within the
1094 // referred to function, we don't need to insert a cycle.
1095 if (llvm::all_of(BA->users(), [&](User *U) {
1096 if (Instruction *I = dyn_cast(U))
1097 return I->getFunction() == BA->getFunction();
1098 return false;
1099 }))
1100 continue;
1101
1102 // Otherwise we should go visit the referred to function.
1103 Visited.insert(BA->getFunction());
1104 Worklist.push_back(BA->getFunction());
10901105 continue;
10911106 }
10921107
18291829 if (BB->empty())
18301830 continue;
18311831
1832 // Disallow inlining a blockaddress. A blockaddress only has defined
1833 // behavior for an indirect branch in the same function, and we do not
1834 // currently support inlining indirect branches. But, the inliner may not
1835 // see an indirect branch that ends up being dead code at a particular call
1836 // site. If the blockaddress escapes the function, e.g., via a global
1837 // variable, inlining may lead to an invalid cross-function reference.
1832 // Disallow inlining a blockaddress with uses other than strictly callbr.
1833 // A blockaddress only has defined behavior for an indirect branch in the
1834 // same function, and we do not currently support inlining indirect
1835 // branches. But, the inliner may not see an indirect branch that ends up
1836 // being dead code at a particular call site. If the blockaddress escapes
1837 // the function, e.g., via a global variable, inlining may lead to an
1838 // invalid cross-function reference.
1839 // FIXME: pr/39560: continue relaxing this overt restriction.
18381840 if (BB->hasAddressTaken())
1839 return "blockaddress";
1841 for (User *U : BlockAddress::get(&*BB)->users())
1842 if (!isa(*U))
1843 return "blockaddress used outside of callbr";
18401844
18411845 // Analyze the cost of this block. If we blow through the threshold, this
18421846 // returns false, and we can bail on out.
20802084 InlineResult llvm::isInlineViable(Function &F) {
20812085 bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice);
20822086 for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
2083 // Disallow inlining of functions which contain indirect branches or
2084 // blockaddresses.
2087 // Disallow inlining of functions which contain indirect branches.
20852088 if (isa(BI->getTerminator()))
20862089 return "contains indirect branches";
20872090
2091 // Disallow inlining of blockaddresses which are used by non-callbr
2092 // instructions.
20882093 if (BI->hasAddressTaken())
2089 return "uses block address";
2094 for (User *U : BlockAddress::get(&*BI)->users())
2095 if (!isa(*U))
2096 return "blockaddress used outside of callbr";
20902097
20912098 for (auto &II : *BI) {
20922099 CallBase *Call = dyn_cast(&II);
4848 }
4949
5050 @run.bb = global [1 x i8*] zeroinitializer
51
52 ; Check that a function referenced by a global blockaddress wont be inlined,
53 ; even if it contains a callbr. We might be able to relax this in the future
54 ; as long as the global blockaddress is updated correctly.
55 @ba = internal global i8* blockaddress(@foo, %7), align 8
56 define internal i32 @foo(i32) {
57 %2 = alloca i32, align 4
58 %3 = alloca i32, align 4
59 store i32 %0, i32* %3, align 4
60 %4 = load i32, i32* %3, align 4
61 callbr void asm sideeffect "testl $0, $0; jne ${1:l};", "r,X,X,~{dirflag},~{fpsr},~{flags}"(i32 %4, i8* blockaddress(@foo, %7), i8* blockaddress(@foo, %6)) #1
62 to label %5 [label %7, label %6]
63
64 ;
65 store i32 0, i32* %2, align 4
66 br label %8
67
68 ;
69 store i32 1, i32* %2, align 4
70 br label %8
71
72 ;
73 store i32 2, i32* %2, align 4
74 br label %8
75
76 ;
77 %9 = load i32, i32* %2, align 4
78 ret i32 %9
79 }
80 define dso_local i32 @bar() {
81 %1 = call i32 @foo(i32 0)
82 ret i32 %1
83 }
84
85 ; CHECK: define dso_local i32 @bar() {
86 ; CHECK: %1 = call i32 @foo(i32 0)
87 ; CHECK: ret i32 %1
88 ; CHECK: }
89
90 ; Triple check that even with a global aggregate whose member is a blockaddress,
91 ; we still don't inline referred to functions.
92
93 %struct.foo = type { i8* }
94
95 @my_foo = dso_local global %struct.foo { i8* blockaddress(@baz, %7) }
96
97 define internal i32 @baz(i32) {
98 %2 = alloca i32, align 4
99 %3 = alloca i32, align 4
100 store i32 %0, i32* %3, align 4
101 %4 = load i32, i32* %3, align 4
102 callbr void asm sideeffect "testl $0, $0; jne ${1:l};", "r,X,X,~{dirflag},~{fpsr},~{flags}"(i32 %4, i8* blockaddress(@baz, %7), i8* blockaddress(@baz, %6)) #1
103 to label %5 [label %7, label %6]
104
105 ;
106 store i32 0, i32* %2, align 4
107 br label %8
108
109 ;
110 store i32 1, i32* %2, align 4
111 br label %8
112
113 ;
114 store i32 2, i32* %2, align 4
115 br label %8
116
117 ;
118 %9 = load i32, i32* %2, align 4
119 ret i32 %9
120 }
121 define dso_local i32 @quux() {
122 %1 = call i32 @baz(i32 0)
123 ret i32 %1
124 }
125
126 ; CHECK: define dso_local i32 @quux() {
127 ; CHECK: %1 = call i32 @baz(i32 0)
128 ; CHECK: ret i32 %1
129 ; CHECK: }
0 ; RUN: opt -inline -S < %s | FileCheck %s
1 ; RUN: opt -passes='cgscc(inline)' -S < %s | FileCheck %s
2
3 define dso_local i32 @main() #0 {
4 %1 = alloca i32, align 4
5 store i32 0, i32* %1, align 4
6 %2 = call i32 @t32(i32 0)
7 ret i32 %2
8 }
9
10 define internal i32 @t32(i32) #0 {
11 %2 = alloca i32, align 4
12 %3 = alloca i32, align 4
13 store i32 %0, i32* %3, align 4
14 %4 = load i32, i32* %3, align 4
15 callbr void asm sideeffect "testl $0, $0; jne ${1:l};", "r,X,X,~{dirflag},~{fpsr},~{flags}"(i32 %4, i8* blockaddress(@t32, %7), i8* blockaddress(@t32, %6)) #1
16 to label %5 [label %7, label %6]
17
18 ;
19 store i32 0, i32* %2, align 4
20 br label %8
21
22 ;
23 store i32 1, i32* %2, align 4
24 br label %8
25
26 ;
27 store i32 2, i32* %2, align 4
28 br label %8
29
30 ;
31 %9 = load i32, i32* %2, align 4
32 ret i32 %9
33 }
34
35 ; Check that @t32 no longer exists after inlining, as it has now been inlined
36 ; into @main.
37
38 ; CHECK-NOT: @t32
39 ; CHECK: define dso_local i32 @main
40 ; CHECK: callbr void asm sideeffect "testl $0, $0; jne ${1:l};", "r,X,X,~{dirflag},~{fpsr},~{flags}"(i32 %6, i8* blockaddress(@main, %9), i8* blockaddress(@main, %8))
41 ; CHECK: to label %7 [label %9, label %8]
42 ; CHECK: 7:
43 ; CHECK-NEXT: store i32 0, i32* %1, align 4
44 ; CHECK-NEXT: br label %t32.exit
45 ; CHECK: 8:
46 ; CHECK-NEXT: store i32 1, i32* %1, align 4
47 ; CHECK-NEXT: br label %t32.exit
48 ; CHECK: 9:
49 ; CHECK-NEXT: store i32 2, i32* %1, align 4
50 ; CHECK-NEXT: br label %t32.exit
51 ; CHECK: t32.exit:
52 ; CHECK-NEXT: %10 = load i32, i32* %1, align 4
53 ; CHECK: ret i32 %10
19741974 EXPECT_EQ(&FRC, CG.lookupRefSCC(F));
19751975 EXPECT_EQ(&GRC, CG.lookupRefSCC(G));
19761976 EXPECT_TRUE(GRC.isParentOf(FRC));
1977 }
1978
1979 // Test that a blockaddress that refers to itself creates no new RefSCC
1980 // connections. https://bugs.llvm.org/show_bug.cgi?id=40722
1981 TEST(LazyCallGraphTest, HandleBlockAddress2) {
1982 LLVMContext Context;
1983 std::unique_ptr M =
1984 parseAssembly(Context, "define void @f() {\n"
1985 " ret void\n"
1986 "}\n"
1987 "define void @g(i8** %ptr) {\n"
1988 "bb:\n"
1989 " store i8* blockaddress(@g, %bb), i8** %ptr\n"
1990 " ret void\n"
1991 "}\n");
1992 LazyCallGraph CG = buildCG(*M);
1993
1994 CG.buildRefSCCs();
1995 auto I = CG.postorder_ref_scc_begin();
1996 LazyCallGraph::RefSCC &GRC = *I++;
1997 LazyCallGraph::RefSCC &FRC = *I++;
1998 EXPECT_EQ(CG.postorder_ref_scc_end(), I);
1999
2000 LazyCallGraph::Node &F = *CG.lookup(lookupFunction(*M, "f"));
2001 LazyCallGraph::Node &G = *CG.lookup(lookupFunction(*M, "g"));
2002 EXPECT_EQ(&FRC, CG.lookupRefSCC(F));
2003 EXPECT_EQ(&GRC, CG.lookupRefSCC(G));
2004 EXPECT_FALSE(GRC.isParentOf(FRC));
2005 EXPECT_FALSE(FRC.isParentOf(GRC));
19772006 }
19782007
19792008 TEST(LazyCallGraphTest, ReplaceNodeFunction) {