llvm.org GIT mirror llvm / cbd4ff9
[PartialInlining] Shrinkwrap allocas with live range contained in outline region. Differential Revision: http://reviews.llvm.org/D33618 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@304245 91177308-0d34-0410-b5e6-96231b3b80d8 Xinliang David Li 2 years ago
6 changed file(s) with 355 addition(s) and 8 deletion(s). Raw diff Collapse all Expand all
2424 class BranchProbabilityInfo;
2525 class DominatorTree;
2626 class Function;
27 class Instruction;
2728 class Loop;
2829 class Module;
2930 class RegionNode;
102103 /// a code sequence, that sequence is modified, including changing these
103104 /// sets, before extraction occurs. These modifications won't have any
104105 /// significant impact on the cost however.
105 void findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs) const;
106 void findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs,
107 const ValueSet &Allocas) const;
108 /// Find the set of allocas whose life ranges are contained within the
109 /// outlined region.
110 ///
111 /// Allocas which have life_time markers contained in the outlined region
112 /// should be pushed to the outlined function. The address bitcasts that
113 /// are used by the lifetime markers are also candidates for shrink-
114 /// wrapping. The instructions that need to be sinked are collected in
115 /// 'Allocas'.
116 void findAllocas(ValueSet &Allocas) const;
106117
107118 private:
108119 void severSplitPHINodes(BasicBlock *&Header);
2626 #include "llvm/IR/DerivedTypes.h"
2727 #include "llvm/IR/Dominators.h"
2828 #include "llvm/IR/Instructions.h"
29 #include "llvm/IR/IntrinsicInst.h"
2930 #include "llvm/IR/Intrinsics.h"
3031 #include "llvm/IR/LLVMContext.h"
3132 #include "llvm/IR/MDBuilder.h"
140141 return false;
141142 }
142143
143 void CodeExtractor::findInputsOutputs(ValueSet &Inputs,
144 ValueSet &Outputs) const {
144 void CodeExtractor::findAllocas(ValueSet &SinkCands) const {
145 Function *Func = (*Blocks.begin())->getParent();
146 for (BasicBlock &BB : *Func) {
147 if (Blocks.count(&BB))
148 continue;
149 for (Instruction &II : BB) {
150 auto *AI = dyn_cast(&II);
151 if (!AI)
152 continue;
153
154 // Returns true if matching life time markers are found within
155 // the outlined region.
156 auto GetLifeTimeMarkers = [&](Instruction *Addr) {
157 Instruction *LifeStart = nullptr, *LifeEnd = nullptr;
158 for (User *U : Addr->users()) {
159 if (!definedInRegion(Blocks, U))
160 return false;
161
162 IntrinsicInst *IntrInst = dyn_cast(U);
163 if (IntrInst) {
164 if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start)
165 LifeStart = IntrInst;
166 if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_end)
167 LifeEnd = IntrInst;
168 }
169 }
170 return LifeStart && LifeEnd;
171 };
172
173 if (GetLifeTimeMarkers(AI)) {
174 SinkCands.insert(AI);
175 continue;
176 }
177
178 // Follow the bitcast:
179 Instruction *MarkerAddr = nullptr;
180 for (User *U : AI->users()) {
181 if (U->stripPointerCasts() == AI) {
182 Instruction *Bitcast = cast(U);
183 if (GetLifeTimeMarkers(Bitcast)) {
184 MarkerAddr = Bitcast;
185 continue;
186 }
187 }
188 if (!definedInRegion(Blocks, U)) {
189 MarkerAddr = nullptr;
190 break;
191 }
192 }
193 if (MarkerAddr) {
194 if (!definedInRegion(Blocks, MarkerAddr))
195 SinkCands.insert(MarkerAddr);
196 SinkCands.insert(AI);
197 }
198 }
199 }
200 }
201
202 void CodeExtractor::findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs,
203 const ValueSet &SinkCands) const {
204
145205 for (BasicBlock *BB : Blocks) {
146206 // If a used value is defined outside the region, it's an input. If an
147207 // instruction is used outside the region, it's an output.
148208 for (Instruction &II : *BB) {
149209 for (User::op_iterator OI = II.op_begin(), OE = II.op_end(); OI != OE;
150 ++OI)
151 if (definedInCaller(Blocks, *OI))
152 Inputs.insert(*OI);
210 ++OI) {
211 Value *V = *OI;
212 if (!SinkCands.count(V) && definedInCaller(Blocks, V))
213 Inputs.insert(V);
214 }
153215
154216 for (User *U : II.users())
155217 if (!definedInRegion(Blocks, U)) {
717779 if (!isEligible())
718780 return nullptr;
719781
720 ValueSet inputs, outputs;
782 ValueSet inputs, outputs, SinkingCands;
721783
722784 // Assumption: this is a single-entry code region, and the header is the first
723785 // block in the region.
756818 "newFuncRoot");
757819 newFuncRoot->getInstList().push_back(BranchInst::Create(header));
758820
821 findAllocas(SinkingCands);
822
759823 // Find inputs to, outputs from the code region.
760 findInputsOutputs(inputs, outputs);
824 findInputsOutputs(inputs, outputs, SinkingCands);
825
826 // Now sink all instructions which only have non-phi uses inside the region
827 for (auto *II : SinkingCands)
828 cast(II)->moveBefore(*newFuncRoot,
829 newFuncRoot->getFirstInsertionPt());
761830
762831 // Calculate the exit blocks for the extracted region and the total exit
763832 // weights for each of those blocks.
0
1 ; RUN: opt < %s -partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck %s
2 ; RUN: opt < %s -passes=partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck %s
3
4 %"class.base" = type { %"struct.base"* }
5 %"struct.base" = type opaque
6
7 @g = external local_unnamed_addr global i32, align 4
8
9 ; Function Attrs: nounwind uwtable
10 define i32 @callee_sinkable_bitcast(i32 %arg) local_unnamed_addr #0 {
11 ; CHECK-LABEL:define{{.*}}@callee_sinkable_bitcast.{{[0-9]}}
12 ; CHECK: alloca
13 ; CHECK-NEXT: bitcast
14 ; CHECK: call void @llvm.lifetime
15 bb:
16 %tmp = alloca %"class.base", align 4
17 %tmp1 = bitcast %"class.base"* %tmp to i8*
18 %tmp2 = load i32, i32* @g, align 4, !tbaa !2
19 %tmp3 = add nsw i32 %tmp2, 1
20 %tmp4 = icmp slt i32 %arg, 0
21 br i1 %tmp4, label %bb6, label %bb5
22
23 bb5: ; preds = %bb
24 call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp1) #2
25 %tmp11 = bitcast %"class.base"* %tmp to i32*
26 store i32 %tmp3, i32* %tmp11, align 4, !tbaa !2
27 store i32 %tmp3, i32* @g, align 4, !tbaa !2
28 call void @bar(i32* nonnull %tmp11) #2
29 call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp1) #2
30 br label %bb6
31
32 bb6: ; preds = %bb5, %bb
33 %tmp7 = phi i32 [ 1, %bb5 ], [ 0, %bb ]
34 ret i32 %tmp7
35 }
36
37 declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1
38
39 declare void @bar(i32*) local_unnamed_addr #2
40 declare void @bar2(i32*, i32*) local_unnamed_addr #1
41
42
43 ; Function Attrs: argmemonly nounwind
44 declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1
45
46 ; Function Attrs: nounwind uwtable
47 define i32 @caller(i32 %arg) local_unnamed_addr #0 {
48 bb:
49 %tmp = tail call i32 @callee_sinkable_bitcast(i32 %arg)
50 ret i32 %tmp
51 }
52
53 attributes #0 = { nounwind uwtable}
54 attributes #1 = { argmemonly nounwind }
55 attributes #2 = { nounwind }
56
57 !llvm.module.flags = !{!0}
58 !llvm.ident = !{!1}
59
60 !0 = !{i32 1, !"wchar_size", i32 4}
61 !1 = !{!"clang version 5.0.0 (trunk 303574)"}
62 !2 = !{!3, !3, i64 0}
63 !3 = !{!"int", !4, i64 0}
64 !4 = !{!"omnipotent char", !5, i64 0}
65 !5 = !{!"Simple C/C++ TBAA"}
66
67
0 ; RUN: opt < %s -partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck %s
1 ; RUN: opt < %s -passes=partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck %s
2
3 %"class.base" = type { %"struct.base"* }
4 %"struct.base" = type opaque
5
6 @g = external local_unnamed_addr global i32, align 4
7
8 define i32 @callee_no_bitcast(i32 %arg) local_unnamed_addr #0 {
9 ; CHECK-LABEL:define{{.*}}@callee_no_bitcast.{{[0-9]}}
10 ; CHECK: alloca
11 ; CHECK: call void @llvm.lifetime
12 bb:
13 %tmp = alloca i8, align 4
14 %tmp2 = load i32, i32* @g, align 4, !tbaa !2
15 %tmp3 = add nsw i32 %tmp2, 1
16 %tmp4 = icmp slt i32 %arg, 0
17 br i1 %tmp4, label %bb6, label %bb5
18
19 bb5: ; preds = %bb
20 call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp) #2
21 store i32 %tmp3, i32* @g, align 4, !tbaa !2
22 %tmp11 = bitcast i8 * %tmp to i32*
23 call void @bar(i32* nonnull %tmp11) #2
24 call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp) #2
25 br label %bb6
26
27 bb6: ; preds = %bb5, %bb
28 %tmp7 = phi i32 [ 1, %bb5 ], [ 0, %bb ]
29 ret i32 %tmp7
30 }
31
32 ; Function Attrs: argmemonly nounwind
33 declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1
34
35 declare void @bar(i32*) local_unnamed_addr #2
36 declare void @bar2(i32*, i32*) local_unnamed_addr #1
37
38
39 ; Function Attrs: argmemonly nounwind
40 declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1
41
42 ; Function Attrs: nounwind uwtable
43 define i32 @caller(i32 %arg) local_unnamed_addr #0 {
44 bb:
45 %tmp = tail call i32 @callee_no_bitcast(i32 %arg)
46 ret i32 %tmp
47 }
48
49 attributes #0 = { nounwind uwtable}
50 attributes #1 = { argmemonly nounwind }
51 attributes #2 = { nounwind }
52
53 !llvm.module.flags = !{!0}
54 !llvm.ident = !{!1}
55
56 !0 = !{i32 1, !"wchar_size", i32 4}
57 !1 = !{!"clang version 5.0.0 (trunk 303574)"}
58 !2 = !{!3, !3, i64 0}
59 !3 = !{!"int", !4, i64 0}
60 !4 = !{!"omnipotent char", !5, i64 0}
61 !5 = !{!"Simple C/C++ TBAA"}
62
63
64
0 ; RUN: opt < %s -partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck %s
1 ; RUN: opt < %s -passes=partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck %s
2
3 %"class.base" = type { %"struct.base"* }
4 %"struct.base" = type opaque
5
6 @g = external local_unnamed_addr global i32, align 4
7
8 define i32 @callee_unknown_use1(i32 %arg) local_unnamed_addr #0 {
9 ; CHECK-LABEL:define{{.*}}@callee_unknown_use1.{{[0-9]}}
10 ; CHECK-NOT: alloca
11 ; CHECK: call void @llvm.lifetime
12 bb:
13 %tmp = alloca i8, align 4
14 %tmp2 = load i32, i32* @g, align 4, !tbaa !2
15 %tmp3 = add nsw i32 %tmp2, 1
16 %tmp4 = icmp slt i32 %arg, 0
17 br i1 %tmp4, label %bb6, label %bb5
18
19 bb5: ; preds = %bb
20 call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp) #2
21 store i32 %tmp3, i32* @g, align 4, !tbaa !2
22 %tmp11 = bitcast i8* %tmp to i32*
23 call void @bar(i32* nonnull %tmp11) #2
24 call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp) #2
25 br label %bb6
26
27 bb6: ; preds = %bb5, %bb
28 %tmp7 = phi i32 [ 1, %bb5 ], [ 0, %bb ]
29 %tmp1 = bitcast i8* %tmp to i32*
30 ret i32 %tmp7
31 }
32
33
34 ; Function Attrs: argmemonly nounwind
35 declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1
36
37 declare void @bar(i32*) local_unnamed_addr #2
38 declare void @bar2(i32*, i32*) local_unnamed_addr #1
39
40
41 ; Function Attrs: argmemonly nounwind
42 declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1
43
44 ; Function Attrs: nounwind uwtable
45 define i32 @caller(i32 %arg) local_unnamed_addr #0 {
46 bb:
47 %tmp = tail call i32 @callee_unknown_use1(i32 %arg)
48 ret i32 %tmp
49 }
50
51 attributes #0 = { nounwind uwtable}
52 attributes #1 = { argmemonly nounwind }
53 attributes #2 = { nounwind }
54
55 !llvm.module.flags = !{!0}
56 !llvm.ident = !{!1}
57
58 !0 = !{i32 1, !"wchar_size", i32 4}
59 !1 = !{!"clang version 5.0.0 (trunk 303574)"}
60 !2 = !{!3, !3, i64 0}
61 !3 = !{!"int", !4, i64 0}
62 !4 = !{!"omnipotent char", !5, i64 0}
63 !5 = !{!"Simple C/C++ TBAA"}
64
65
66
0 ; RUN: opt < %s -partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck %s
1 ; RUN: opt < %s -passes=partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck %s
2
3 %"class.base" = type { %"struct.base"* }
4 %"struct.base" = type opaque
5
6 @g = external local_unnamed_addr global i32, align 4
7
8 define i32 @callee_unknown_use2(i32 %arg) local_unnamed_addr #0 {
9 ; CHECK-LABEL:define{{.*}}@callee_unknown_use2.{{[0-9]}}
10 ; CHECK-NOT: alloca
11 ; CHECK: call void @llvm.lifetime
12 bb:
13 %tmp = alloca i32, align 4
14 %tmp1 = bitcast i32* %tmp to i8*
15 %tmp2 = load i32, i32* @g, align 4, !tbaa !2
16 %tmp3 = add nsw i32 %tmp2, 1
17 %tmp4 = icmp slt i32 %arg, 0
18 br i1 %tmp4, label %bb6, label %bb5
19
20 bb5: ; preds = %bb
21 call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp1) #2
22 store i32 %tmp3, i32* %tmp, align 4, !tbaa !2
23 store i32 %tmp3, i32* @g, align 4, !tbaa !2
24 call void @bar(i32* nonnull %tmp) #2
25 call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp1) #2
26 br label %bb6
27
28 bb6: ; preds = %bb5, %bb
29 %tmp7 = phi i32 [ 1, %bb5 ], [ 0, %bb ]
30 %tmp10 = bitcast i8* %tmp1 to i32*
31 ret i32 %tmp7
32 }
33
34 ; Function Attrs: argmemonly nounwind
35 declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1
36
37 declare void @bar(i32*) local_unnamed_addr #2
38 declare void @bar2(i32*, i32*) local_unnamed_addr #1
39
40
41 ; Function Attrs: argmemonly nounwind
42 declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1
43
44 ; Function Attrs: nounwind uwtable
45 define i32 @caller(i32 %arg) local_unnamed_addr #0 {
46 bb:
47 %tmp = tail call i32 @callee_unknown_use2(i32 %arg)
48 ret i32 %tmp
49 }
50
51 attributes #0 = { nounwind uwtable}
52 attributes #1 = { argmemonly nounwind }
53 attributes #2 = { nounwind }
54
55 !llvm.module.flags = !{!0}
56 !llvm.ident = !{!1}
57
58 !0 = !{i32 1, !"wchar_size", i32 4}
59 !1 = !{!"clang version 5.0.0 (trunk 303574)"}
60 !2 = !{!3, !3, i64 0}
61 !3 = !{!"int", !4, i64 0}
62 !4 = !{!"omnipotent char", !5, i64 0}
63 !5 = !{!"Simple C/C++ TBAA"}
64
65
66