llvm.org GIT mirror llvm / 7574b7e
[PartialInlining] Fix Crash from holding a reference to a destructed ORE. The callback used to create an ORE for the legacy PI pass caches the allocated object in a unique_ptr in the runOnModule function, and returns a reference to that object. Under certian circumstances we can end up holding onto that reference after the OREs destruction. Rather then allowing the new and legacy passes to create ORE object in diffrent ways, create the ORE at the point of use. Differential Revision: https://reviews.llvm.org/D43219 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@330473 91177308-0d34-0410-b5e6-96231b3b80d8 Sean Fertile 1 year, 4 months ago
2 changed file(s) with 190 addition(s) and 37 deletion(s). Raw diff Collapse all Expand all
201201 std::function *GetAC,
202202 std::function *GTTI,
203203 Optional> GBFI,
204 ProfileSummaryInfo *ProfSI,
205 std::function *GORE)
206 : GetAssumptionCache(GetAC), GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI),
207 GetORE(GORE) {}
204 ProfileSummaryInfo *ProfSI)
205 : GetAssumptionCache(GetAC), GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI) {}
208206
209207 bool run(Module &M);
210208 // Main part of the transformation that calls helper functions to find
270268 std::function *GetTTI;
271269 Optional> GetBFI;
272270 ProfileSummaryInfo *PSI;
273 std::function *GetORE;
274271
275272 // Return the frequency of the OutlininingBB relative to F's entry point.
276273 // The result is no larger than 1 and is represented using BP.
281278 // Return true if the callee of CS should be partially inlined with
282279 // profit.
283280 bool shouldPartialInline(CallSite CS, FunctionCloner &Cloner,
284 BlockFrequency WeightedOutliningRcost);
281 BlockFrequency WeightedOutliningRcost,
282 OptimizationRemarkEmitter &ORE);
285283
286284 // Try to inline DuplicateFunction (cloned from F with call to
287285 // the OutlinedFunction into its callers. Return true
336334
337335 std::unique_ptr computeOutliningInfo(Function *F);
338336 std::unique_ptr
339 computeOutliningColdRegionsInfo(Function *F);
337 computeOutliningColdRegionsInfo(Function *F, OptimizationRemarkEmitter &ORE);
340338 };
341339
342340 struct PartialInlinerLegacyPass : public ModulePass {
361359 &getAnalysis();
362360 ProfileSummaryInfo *PSI =
363361 getAnalysis().getPSI();
364 std::unique_ptr UPORE;
365362
366363 std::function GetAssumptionCache =
367364 [&ACT](Function &F) -> AssumptionCache & {
373370 return TTIWP->getTTI(F);
374371 };
375372
376 std::function GetORE =
377 [&UPORE](Function &F) -> OptimizationRemarkEmitter & {
378 UPORE.reset(new OptimizationRemarkEmitter(&F));
379 return *UPORE.get();
380 };
381
382 return PartialInlinerImpl(&GetAssumptionCache, &GetTTI, NoneType::None, PSI,
383 &GetORE)
373 return PartialInlinerImpl(&GetAssumptionCache, &GetTTI, NoneType::None, PSI)
384374 .run(M);
385375 }
386376 };
388378 } // end anonymous namespace
389379
390380 std::unique_ptr
391 PartialInlinerImpl::computeOutliningColdRegionsInfo(Function *F) {
381 PartialInlinerImpl::computeOutliningColdRegionsInfo(Function *F,
382 OptimizationRemarkEmitter &ORE) {
392383 BasicBlock *EntryBlock = &F->front();
393384
394385 DominatorTree DT(*F);
401392 BFI = ScopedBFI.get();
402393 } else
403394 BFI = &(*GetBFI)(*F);
404
405 auto &ORE = (*GetORE)(*F);
406395
407396 // Return if we don't have profiling information.
408397 if (!PSI->hasInstrumentationProfile())
765754
766755 bool PartialInlinerImpl::shouldPartialInline(
767756 CallSite CS, FunctionCloner &Cloner,
768 BlockFrequency WeightedOutliningRcost) {
757 BlockFrequency WeightedOutliningRcost,
758 OptimizationRemarkEmitter &ORE) {
769759 using namespace ore;
770760
771761 Instruction *Call = CS.getInstruction();
777767
778768 Function *Caller = CS.getCaller();
779769 auto &CalleeTTI = (*GetTTI)(*Callee);
780 auto &ORE = (*GetORE)(*Caller);
781770 InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI,
782771 *GetAssumptionCache, GetBFI, PSI, &ORE);
783772
12691258 if (F->user_begin() == F->user_end())
12701259 return {false, nullptr};
12711260
1272 auto &ORE = (*GetORE)(*F);
1261 OptimizationRemarkEmitter ORE(F);
12731262
12741263 // Only try to outline cold regions if we have a profile summary, which
12751264 // implies we have profiling information.
12761265 if (PSI->hasProfileSummary() && F->hasProfileData() &&
12771266 !DisableMultiRegionPartialInline) {
12781267 std::unique_ptr OMRI =
1279 computeOutliningColdRegionsInfo(F);
1268 computeOutliningColdRegionsInfo(F, ORE);
12801269 if (OMRI) {
12811270 FunctionCloner Cloner(F, OMRI.get(), ORE);
12821271
13561345 // inlining the function with outlining (The inliner uses the size increase to
13571346 // model the cost of inlining a callee).
13581347 if (!SkipCostAnalysis && Cloner.OutlinedRegionCost < SizeCost) {
1359 auto &ORE = (*GetORE)(*Cloner.OrigFunc);
1348 OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
13601349 DebugLoc DLoc;
13611350 BasicBlock *Block;
13621351 std::tie(DLoc, Block) = getOneDebugLoc(Cloner.ClonedFunc);
1363 ORE.emit([&]() {
1352 OrigFuncORE.emit([&]() {
13641353 return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall",
13651354 DLoc, Block)
13661355 << ore::NV("Function", Cloner.OrigFunc)
13931382 if (IsLimitReached())
13941383 continue;
13951384
1396
1397 if (!shouldPartialInline(CS, Cloner, WeightedRcost))
1385 OptimizationRemarkEmitter CallerORE(CS.getCaller());
1386 if (!shouldPartialInline(CS, Cloner, WeightedRcost, CallerORE))
13981387 continue;
13991388
1400 auto &ORE = (*GetORE)(*CS.getCaller());
14011389 // Construct remark before doing the inlining, as after successful inlining
14021390 // the callsite is removed.
14031391 OptimizationRemark OR(DEBUG_TYPE, "PartiallyInlined", CS.getInstruction());
14121400 : nullptr)))
14131401 continue;
14141402
1415 ORE.emit(OR);
1403 CallerORE.emit(OR);
14161404
14171405 // Now update the entry count:
14181406 if (CalleeEntryCountV && CallSiteToProfCountMap.count(User)) {
14351423 if (CalleeEntryCount)
14361424 Cloner.OrigFunc->setEntryCount(
14371425 CalleeEntryCount.setCount(CalleeEntryCountV));
1438 auto &ORE = (*GetORE)(*Cloner.OrigFunc);
1439 ORE.emit([&]() {
1426 OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
1427 OrigFuncORE.emit([&]() {
14401428 return OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", Cloner.OrigFunc)
14411429 << "Partially inlined into at least one caller";
14421430 });
15181506 return FAM.getResult(F);
15191507 };
15201508
1521 std::function GetORE =
1522 [&FAM](Function &F) -> OptimizationRemarkEmitter & {
1523 return FAM.getResult(F);
1524 };
1525
15261509 ProfileSummaryInfo *PSI = &AM.getResult(M);
15271510
1528 if (PartialInlinerImpl(&GetAssumptionCache, &GetTTI, {GetBFI}, PSI, &GetORE)
1511 if (PartialInlinerImpl(&GetAssumptionCache, &GetTTI, {GetBFI}, PSI)
15291512 .run(M))
15301513 return PreservedAnalyses::none();
15311514 return PreservedAnalyses::all();
0 ; RUN: opt < %s -partial-inliner -skip-partial-inlining-cost-analysis -inline-threshold=0 -disable-output
1
2 target datalayout = "e-m:e-i64:64-n32:64"
3 target triple = "powerpc64le-unknown-linux-gnu"
4
5 %0 = type { i32 (...)**, %1, %1, %3, %3, %3, i8, float, %4*, %5*, %5*, i32, i32, i32, i32, float, float, float, i8*, i32, float, float, float, i8, [7 x i8] }
6 %1 = type { %2, %3 }
7 %2 = type { [3 x %3] }
8 %3 = type { [4 x float] }
9 %4 = type <{ i8*, i16, i16, [4 x i8], i8*, i32, %3, %3, [4 x i8] }>
10 %5 = type { i32 (...)**, i32, i8* }
11 %6 = type <{ %7, [4 x i8], %19*, %20*, %30, %35, %3, float, i8, i8, i8, i8, %37, i32, [4 x i8] }>
12 %7 = type <{ %8, [7 x i8], void (%16*, float)*, void (%16*, float)*, i8*, %17 }>
13 %8 = type <{ i32 (...)**, %9, %11*, %12, %13*, %14*, %15*, i8 }>
14 %9 = type <{ i8, [3 x i8], i32, i32, [4 x i8], %0**, i8, [7 x i8] }>
15 %11 = type { i32 (...)** }
16 %12 = type { float, i32, i32, float, i8, %15*, i8, i8, i8, float, i8, float, %13* }
17 %13 = type opaque
18 %14 = type { i32 (...)** }
19 %15 = type { i32 (...)** }
20 %16 = type <{ %8, [7 x i8], void (%16*, float)*, void (%16*, float)*, i8*, %17, [4 x i8] }>
21 %17 = type { %18 }
22 %18 = type { float, float, float, float, float, i32, float, float, float, float, float, i32, float, float, float, i32, i32 }
23 %19 = type { i32 (...)** }
24 %20 = type <{ i32 (...)**, %21, %25, %9, i8, [7 x i8] }>
25 %21 = type { %22 }
26 %22 = type <{ i8, [3 x i8], i32, i32, [4 x i8], %24*, i8, [7 x i8] }>
27 %24 = type { i32, i32 }
28 %25 = type <{ i8, [3 x i8], i32, i32, [4 x i8], %27**, i8, [7 x i8] }>
29 %27 = type { i32, [4 x i8], [4 x %29], i8*, i8*, i32, float, float, i32 }
30 %29 = type <{ %3, %3, %3, %3, %3, float, float, float, i32, i32, i32, i32, [4 x i8], i8*, float, i8, [3 x i8], float, float, i32, %3, %3, [4 x i8] }>
31 %30 = type <{ i8, [3 x i8], i32, i32, [4 x i8], %32**, i8, [7 x i8] }>
32 %32 = type { i32 (...)**, i32, i32, i32, i8, %33*, %33*, float, float, %3, %3, %3 }
33 %33 = type <{ %0, %2, %3, %3, float, %3, %3, %3, %3, %3, %3, %3, float, float, i8, [3 x i8], float, float, float, float, float, float, %34*, %30, i32, i32, i32, [4 x i8] }>
34 %34 = type { i32 (...)** }
35 %35 = type <{ i8, [3 x i8], i32, i32, [4 x i8], %33**, i8, [7 x i8] }>
36 %37 = type <{ i8, [3 x i8], i32, i32, [4 x i8], %39**, i8, [7 x i8] }>
37 %39 = type { i32 (...)** }
38 %40 = type <{ i32 (...)**, %9, %11*, %12, %13*, %14*, %15*, i8, [7 x i8] }>
39
40 @gDisableDeactivation = external local_unnamed_addr global i8, align 1
41 @0 = external dso_local unnamed_addr constant [29 x i8], align 1
42 @1 = external dso_local unnamed_addr constant [14 x i8], align 1
43 @2 = external dso_local unnamed_addr constant [22 x i8], align 1
44 @gDeactivationTime = external local_unnamed_addr global float, align 4
45
46 declare void @_ZN15CProfileManager12Stop_ProfileEv() local_unnamed_addr
47
48 declare void @_ZN15CProfileManager13Start_ProfileEPKc(i8*) local_unnamed_addr
49
50 declare void @_ZN17btCollisionObject18setActivationStateEi(%0*, i32 signext) local_unnamed_addr
51
52 declare hidden void @__clang_call_terminate(i8*) local_unnamed_addr
53
54 declare i32 @__gxx_personality_v0(...)
55
56 ; Function Attrs: argmemonly nounwind
57 declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #0
58
59 define void @_ZN23btDiscreteDynamicsWorld28internalSingleStepSimulationEf(%6*, float) unnamed_addr align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !prof !27 {
60 invoke void null(%6* nonnull %0, float %1)
61 to label %5 unwind label %3
62
63 ;
64 %4 = landingpad { i8*, i32 }
65 cleanup
66 br label %16
67
68 ;
69 %6 = invoke %15* null(%40* null)
70 to label %11 unwind label %13
71
72 ;
73 invoke void null(%40* null)
74 to label %8 unwind label %13
75
76 ;
77 invoke void null(%6* nonnull %0)
78 to label %9 unwind label %13
79
80 ;
81 invoke void null(%6* nonnull %0, %17* nonnull dereferenceable(68) null)
82 to label %10 unwind label %13
83
84 ;
85 invoke void null(%6* nonnull %0, float %1)
86 to label %11 unwind label %13
87
88 ;
89 invoke void @_ZN23btDiscreteDynamicsWorld21updateActivationStateEf(%6* nonnull %0, float %1)
90 to label %12 unwind label %13
91
92 ;
93 ret void
94
95 ;
96 %14 = landingpad { i8*, i32 }
97 cleanup
98 %15 = extractvalue { i8*, i32 } %14, 0
99 br label %16
100
101
102 ;
103 call void @_ZN15CProfileManager12Stop_ProfileEv()
104 resume { i8*, i32 } zeroinitializer
105 }
106
107 define void @_ZN23btDiscreteDynamicsWorld21updateActivationStateEf(%6* nocapture readonly, float) local_unnamed_addr align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !prof !27 {
108 %3 = icmp sgt i32 0, 0
109 br i1 %3, label %4, label %5, !prof !29
110
111 ;
112 br i1 false, label %5, label %6, !prof !30
113
114 ;
115 ret void
116
117 ;
118 invoke void @_ZN17btCollisionObject18setActivationStateEi(%0* nonnull null, i32 signext 0)
119 to label %7 unwind label %8
120
121 ;
122 invoke void @_ZN17btCollisionObject18setActivationStateEi(%0* nonnull null, i32 signext 1)
123 to label %5 unwind label %8
124
125 ;
126 %9 = landingpad { i8*, i32 }
127 cleanup
128 resume { i8*, i32 } %9
129 }
130
131 ; Function Attrs: noreturn nounwind
132 declare void @llvm.trap() #1
133
134 attributes #0 = { argmemonly nounwind }
135 attributes #1 = { noreturn nounwind }
136
137 !llvm.module.flags = !{!0}
138
139 !0 = !{i32 1, !"ProfileSummary", !1}
140 !1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
141 !2 = !{!"ProfileFormat", !"InstrProf"}
142 !3 = !{!"TotalCount", i64 6540578580}
143 !4 = !{!"MaxCount", i64 629805108}
144 !5 = !{!"MaxInternalCount", i64 40670372}
145 !6 = !{!"MaxFunctionCount", i64 629805108}
146 !7 = !{!"NumCounts", i64 8554}
147 !8 = !{!"NumFunctions", i64 3836}
148 !9 = !{!"DetailedSummary", !10}
149 !10 = !{!11, !12, !13, !14, !15, !16, !16, !17, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26}
150 !11 = !{i32 10000, i64 629805108, i32 1}
151 !12 = !{i32 100000, i64 366853677, i32 2}
152 !13 = !{i32 200000, i64 196816893, i32 4}
153 !14 = !{i32 300000, i64 192575561, i32 7}
154 !15 = !{i32 400000, i64 130688163, i32 11}
155 !16 = !{i32 500000, i64 74857169, i32 19}
156 !17 = !{i32 600000, i64 48184151, i32 30}
157 !18 = !{i32 700000, i64 21298588, i32 49}
158 !19 = !{i32 800000, i64 10721033, i32 90}
159 !20 = !{i32 900000, i64 3301634, i32 202}
160 !21 = !{i32 950000, i64 1454952, i32 362}
161 !22 = !{i32 990000, i64 343872, i32 675}
162 !23 = !{i32 999000, i64 46009, i32 1112}
163 !24 = !{i32 999900, i64 6067, i32 1435}
164 !25 = !{i32 999990, i64 700, i32 1721}
165 !26 = !{i32 999999, i64 72, i32 1955}
166 !27 = !{!"function_entry_count", i64 700}
167 !28 = !{!"branch_weights", i32 701, i32 1}
168 !29 = !{!"branch_weights", i32 954001, i32 701}
169 !30 = !{!"branch_weights", i32 1, i32 954001}