llvm.org GIT mirror llvm / 650f9d4
[PartialInlining] Profile based cost analysis Implemented frequency based cost/saving analysis and related options. The pass is now in a state ready to be turne on in the pipeline (in follow up). Differential Revision: http://reviews.llvm.org/D32783 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@302967 91177308-0d34-0410-b5e6-96231b3b80d8 Xinliang David Li 3 years ago
10 changed file(s) with 538 addition(s) and 72 deletion(s). Raw diff Collapse all Expand all
1515 #include "llvm/ADT/Statistic.h"
1616 #include "llvm/Analysis/BlockFrequencyInfo.h"
1717 #include "llvm/Analysis/BranchProbabilityInfo.h"
18 #include "llvm/Analysis/CodeMetrics.h"
1819 #include "llvm/Analysis/InlineCost.h"
1920 #include "llvm/Analysis/LoopInfo.h"
2021 #include "llvm/Analysis/OptimizationDiagnosticInfo.h"
4142 static cl::opt
4243 DisablePartialInlining("disable-partial-inlining", cl::init(false),
4344 cl::Hidden, cl::desc("Disable partial ininling"));
45 // This is an option used by testing:
46 static cl::opt SkipCostAnalysis("skip-partial-inlining-cost-analysis",
47 cl::init(false), cl::ZeroOrMore,
48 cl::ReallyHidden,
49 cl::desc("Skip Cost Analysis"));
4450
4551 static cl::opt MaxNumInlineBlocks(
4652 "max-num-inline-blocks", cl::init(5), cl::Hidden,
5157 static cl::opt MaxNumPartialInlining(
5258 "max-partial-inlining", cl::init(-1), cl::Hidden, cl::ZeroOrMore,
5359 cl::desc("Max number of partial inlining. The default is unlimited"));
60
61 // Used only when PGO or user annotated branch data is absent. It is
62 // the least value that is used to weigh the outline region. If BFI
63 // produces larger value, the BFI value will be used.
64 static cl::opt
65 OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75),
66 cl::Hidden, cl::ZeroOrMore,
67 cl::desc("Relative frequency of outline region to "
68 "the entry block"));
5469
5570 namespace {
5671
8398 bool run(Module &M);
8499 Function *unswitchFunction(Function *F);
85100
86 std::unique_ptr computeOutliningInfo(Function *F);
87
88101 private:
89102 int NumPartialInlining = 0;
90103 std::function *GetAssumptionCache;
92105 Optional> GetBFI;
93106 ProfileSummaryInfo *PSI;
94107
95 bool shouldPartialInline(CallSite CS, OptimizationRemarkEmitter &ORE);
108 // Return the frequency of the OutlininingBB relative to F's entry point.
109 // The result is no larger than 1 and is represented using BP.
110 // (Note that the outlined region's 'head' block can only have incoming
111 // edges from the guarding entry blocks).
112 BranchProbability getOutliningCallBBRelativeFreq(Function *F,
113 FunctionOutliningInfo *OI,
114 Function *DuplicateFunction,
115 BlockFrequencyInfo *BFI,
116 BasicBlock *OutliningCallBB);
117
118 // Return true if the callee of CS should be partially inlined with
119 // profit.
120 bool shouldPartialInline(CallSite CS, Function *F, FunctionOutliningInfo *OI,
121 BlockFrequencyInfo *CalleeBFI,
122 BasicBlock *OutliningCallBB,
123 int OutliningCallOverhead,
124 OptimizationRemarkEmitter &ORE);
125
126 // Try to inline DuplicateFunction (cloned from F with call to
127 // the OutlinedFunction into its callers. Return true
128 // if there is any successful inlining.
129 bool tryPartialInline(Function *DuplicateFunction,
130 Function *F, /*orignal function */
131 FunctionOutliningInfo *OI, Function *OutlinedFunction,
132 BlockFrequencyInfo *CalleeBFI);
133
134 // Compute the mapping from use site of DuplicationFunction to the enclosing
135 // BB's profile count.
136 void computeCallsiteToProfCountMap(Function *DuplicateFunction,
137 DenseMap &SiteCountMap);
138
96139 bool IsLimitReached() {
97140 return (MaxNumPartialInlining != -1 &&
98141 NumPartialInlining >= MaxNumPartialInlining);
99142 }
143
144 CallSite getCallSite(User *U) {
145 CallSite CS;
146 if (CallInst *CI = dyn_cast(U))
147 CS = CallSite(CI);
148 else if (InvokeInst *II = dyn_cast(U))
149 CS = CallSite(II);
150 else
151 llvm_unreachable("All uses must be calls");
152 return CS;
153 }
154
155 CallSite getOneCallSiteTo(Function *F) {
156 User *User = *F->user_begin();
157 return getCallSite(User);
158 }
159
160 std::tuple getOneDebugLoc(Function *F) {
161 CallSite CS = getOneCallSiteTo(F);
162 DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
163 BasicBlock *Block = CS.getParent();
164 return std::make_tuple(DLoc, Block);
165 }
166
167 // Returns the costs associated with function outlining:
168 // - The first value is the non-weighted runtime cost for making the call
169 // to the outlined function 'OutlinedFunction', including the addtional
170 // setup cost in the outlined function itself;
171 // - The second value is the estimated size of the new call sequence in
172 // basic block 'OutliningCallBB';
173 // - The third value is the estimated size of the original code from
174 // function 'F' that is extracted into the outlined function.
175 std::tuple
176 computeOutliningCosts(Function *F, const FunctionOutliningInfo *OutliningInfo,
177 Function *OutlinedFunction,
178 BasicBlock *OutliningCallBB);
179 // Compute the 'InlineCost' of block BB. InlineCost is a proxy used to
180 // approximate both the size and runtime cost (Note that in the current
181 // inline cost analysis, there is no clear distinction there either).
182 int computeBBInlineCost(BasicBlock *BB);
183
184 std::unique_ptr computeOutliningInfo(Function *F);
185
100186 };
101187
102188 struct PartialInlinerLegacyPass : public ModulePass {
222308 // Do sanity check of the entries: threre should not
223309 // be any successors (not in the entry set) other than
224310 // {ReturnBlock, NonReturnBlock}
225 assert(OutliningInfo->Entries[0] == &F->front());
311 assert(OutliningInfo->Entries[0] == &F->front() &&
312 "Function Entry must be the first in Entries vector");
226313 DenseSet Entries;
227314 for (BasicBlock *E : OutliningInfo->Entries)
228315 Entries.insert(E);
288375 return OutliningInfo;
289376 }
290377
291 bool PartialInlinerImpl::shouldPartialInline(CallSite CS,
292 OptimizationRemarkEmitter &ORE) {
293 // TODO : more sharing with shouldInline in Inliner.cpp
378 // Check if there is PGO data or user annoated branch data:
379 static bool hasProfileData(Function *F, FunctionOutliningInfo *OI) {
380 if (F->getEntryCount())
381 return true;
382 // Now check if any of the entry block has MD_prof data:
383 for (auto *E : OI->Entries) {
384 BranchInst *BR = dyn_cast(E->getTerminator());
385 if (!BR || BR->isUnconditional())
386 continue;
387 uint64_t T, F;
388 if (BR->extractProfMetadata(T, F))
389 return true;
390 }
391 return false;
392 }
393
394 BranchProbability PartialInlinerImpl::getOutliningCallBBRelativeFreq(
395 Function *F, FunctionOutliningInfo *OI, Function *DuplicateFunction,
396 BlockFrequencyInfo *BFI, BasicBlock *OutliningCallBB) {
397
398 auto EntryFreq =
399 BFI->getBlockFreq(&DuplicateFunction->getEntryBlock());
400 auto OutliningCallFreq = BFI->getBlockFreq(OutliningCallBB);
401
402 auto OutlineRegionRelFreq =
403 BranchProbability::getBranchProbability(OutliningCallFreq.getFrequency(),
404 EntryFreq.getFrequency());
405
406 if (hasProfileData(F, OI))
407 return OutlineRegionRelFreq;
408
409 // When profile data is not available, we need to be very
410 // conservative in estimating the overall savings. We need to make sure
411 // the outline region relative frequency is not below the threshold
412 // specified by the option.
413 OutlineRegionRelFreq = std::max(OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100));
414
415 return OutlineRegionRelFreq;
416 }
417
418 bool PartialInlinerImpl::shouldPartialInline(
419 CallSite CS, Function *F /* Original Callee */, FunctionOutliningInfo *OI,
420 BlockFrequencyInfo *CalleeBFI, BasicBlock *OutliningCallBB,
421 int NonWeightedOutliningRcost, OptimizationRemarkEmitter &ORE) {
294422 using namespace ore;
423 if (SkipCostAnalysis)
424 return true;
425
295426 Instruction *Call = CS.getInstruction();
296427 Function *Callee = CS.getCalledFunction();
297428 Function *Caller = CS.getCaller();
301432
302433 if (IC.isAlways()) {
303434 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call)
304 << NV("Callee", Callee)
435 << NV("Callee", F)
305436 << " should always be fully inlined, not partially");
306437 return false;
307438 }
308439
309440 if (IC.isNever()) {
310441 ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
311 << NV("Callee", Callee) << " not partially inlined into "
442 << NV("Callee", F) << " not partially inlined into "
312443 << NV("Caller", Caller)
313444 << " because it should never be inlined (cost=never)");
314445 return false;
315446 }
316447
317448 if (!IC) {
318 ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "TooCostly", Call)
319 << NV("Callee", Callee) << " not partially inlined into "
449 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", Call)
450 << NV("Callee", F) << " not partially inlined into "
320451 << NV("Caller", Caller) << " because too costly to inline (cost="
321452 << NV("Cost", IC.getCost()) << ", threshold="
322453 << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
323454 return false;
324455 }
456 const DataLayout &DL = Caller->getParent()->getDataLayout();
457 // The savings of eliminating the call:
458 int NonWeightedSavings = getCallsiteCost(CS, DL);
459 BlockFrequency NormWeightedSavings(NonWeightedSavings);
460
461 auto RelativeFreq =
462 getOutliningCallBBRelativeFreq(F, OI, Callee, CalleeBFI, OutliningCallBB);
463 auto NormWeightedRcost =
464 BlockFrequency(NonWeightedOutliningRcost) * RelativeFreq;
465
466 // Weighted saving is smaller than weighted cost, return false
467 if (NormWeightedSavings < NormWeightedRcost) {
468 ORE.emit(
469 OptimizationRemarkAnalysis(DEBUG_TYPE, "OutliningCallcostTooHigh", Call)
470 << NV("Callee", F) << " not partially inlined into "
471 << NV("Caller", Caller) << " runtime overhead (overhead="
472 << NV("Overhead", (unsigned)NormWeightedRcost.getFrequency())
473 << ", savings="
474 << NV("Savings", (unsigned)NormWeightedSavings.getFrequency()) << ")"
475 << " of making the outlined call is too high");
476
477 return false;
478 }
325479
326480 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", Call)
327 << NV("Callee", Callee) << " can be partially inlined into "
481 << NV("Callee", F) << " can be partially inlined into "
328482 << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())
329483 << " (threshold="
330484 << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
331485 return true;
332486 }
333487
488 // TODO: Ideally we should share Inliner's InlineCost Analysis code.
489 // For now use a simplified version. The returned 'InlineCost' will be used
490 // to esimate the size cost as well as runtime cost of the BB.
491 int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) {
492 int InlineCost = 0;
493 const DataLayout &DL = BB->getParent()->getParent()->getDataLayout();
494 for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
495 if (isa(I))
496 continue;
497
498 if (CallInst *CI = dyn_cast(I)) {
499 InlineCost += getCallsiteCost(CallSite(CI), DL);
500 continue;
501 }
502
503 if (InvokeInst *II = dyn_cast(I)) {
504 InlineCost += getCallsiteCost(CallSite(II), DL);
505 continue;
506 }
507
508 if (SwitchInst *SI = dyn_cast(I)) {
509 InlineCost += (SI->getNumCases() + 1) * InlineConstants::InstrCost;
510 continue;
511 }
512 InlineCost += InlineConstants::InstrCost;
513 }
514 return InlineCost;
515 }
516
517 std::tuple PartialInlinerImpl::computeOutliningCosts(
518 Function *F, const FunctionOutliningInfo *OI, Function *OutlinedFunction,
519 BasicBlock *OutliningCallBB) {
520 // First compute the cost of the outlined region 'OI' in the original
521 // function 'F':
522 int OutlinedRegionCost = 0;
523 for (BasicBlock &BB : *F) {
524 if (&BB != OI->ReturnBlock &&
525 // Assuming Entry set is small -- do a linear search here:
526 std::find(OI->Entries.begin(), OI->Entries.end(), &BB) ==
527 OI->Entries.end()) {
528 OutlinedRegionCost += computeBBInlineCost(&BB);
529 }
530 }
531
532 // Now compute the cost of the call sequence to the outlined function
533 // 'OutlinedFunction' in BB 'OutliningCallBB':
534 int OutliningFuncCallCost = computeBBInlineCost(OutliningCallBB);
535
536 // Now compute the cost of the extracted/outlined function itself:
537 int OutlinedFunctionCost = 0;
538 for (BasicBlock &BB : *OutlinedFunction) {
539 OutlinedFunctionCost += computeBBInlineCost(&BB);
540 }
541
542 assert(OutlinedFunctionCost >= OutlinedRegionCost &&
543 "Outlined function cost should be no less than the outlined region");
544 int OutliningRuntimeOverhead =
545 OutliningFuncCallCost + (OutlinedFunctionCost - OutlinedRegionCost);
546
547 return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead,
548 OutlinedRegionCost);
549 }
550
551 // Create the callsite to profile count map which is
552 // used to update the original function's entry count,
553 // after the function is partially inlined into the callsite.
554 void PartialInlinerImpl::computeCallsiteToProfCountMap(
555 Function *DuplicateFunction,
556 DenseMap &CallSiteToProfCountMap) {
557 std::vector Users(DuplicateFunction->user_begin(),
558 DuplicateFunction->user_end());
559 Function *CurrentCaller = nullptr;
560 BlockFrequencyInfo *CurrentCallerBFI = nullptr;
561
562 auto ComputeCurrBFI = [&,this](Function *Caller) {
563 // For the old pass manager:
564 if (!GetBFI) {
565 if (CurrentCallerBFI)
566 delete CurrentCallerBFI;
567 DominatorTree DT(*Caller);
568 LoopInfo LI(DT);
569 BranchProbabilityInfo BPI(*Caller, LI);
570 CurrentCallerBFI = new BlockFrequencyInfo(*Caller, BPI, LI);
571 } else {
572 // New pass manager:
573 CurrentCallerBFI = &(*GetBFI)(*Caller);
574 }
575 };
576
577 for (User *User : Users) {
578 CallSite CS = getCallSite(User);
579 Function *Caller = CS.getCaller();
580 if (CurrentCaller != Caller) {
581 CurrentCaller = Caller;
582 ComputeCurrBFI(Caller);
583 } else {
584 assert(CurrentCallerBFI && "CallerBFI is not set");
585 }
586 BasicBlock *CallBB = CS.getInstruction()->getParent();
587 auto Count = CurrentCallerBFI->getBlockProfileCount(CallBB);
588 if (Count)
589 CallSiteToProfCountMap[User] = *Count;
590 else
591 CallSiteToProfCountMap[User] = 0;
592 }
593 }
594
334595 Function *PartialInlinerImpl::unswitchFunction(Function *F) {
335596
336597 if (F->hasAddressTaken())
346607 if (PSI->isFunctionEntryCold(F))
347608 return nullptr;
348609
349 std::unique_ptr OutliningInfo =
350 computeOutliningInfo(F);
351
352 if (!OutliningInfo)
610 if (F->user_begin() == F->user_end())
611 return nullptr;
612
613 std::unique_ptr OI = computeOutliningInfo(F);
614
615 if (!OI)
353616 return nullptr;
354617
355618 // Clone the function, so that we can hack away on it.
356619 ValueToValueMapTy VMap;
357620 Function *DuplicateFunction = CloneFunction(F, VMap);
358 BasicBlock *NewReturnBlock =
359 cast(VMap[OutliningInfo->ReturnBlock]);
360 BasicBlock *NewNonReturnBlock =
361 cast(VMap[OutliningInfo->NonReturnBlock]);
621 BasicBlock *NewReturnBlock = cast(VMap[OI->ReturnBlock]);
622 BasicBlock *NewNonReturnBlock = cast(VMap[OI->NonReturnBlock]);
362623 DenseSet NewEntries;
363 for (BasicBlock *BB : OutliningInfo->Entries) {
624 for (BasicBlock *BB : OI->Entries) {
364625 NewEntries.insert(cast(VMap[BB]));
365626 }
366627
389650 BasicBlock *PreReturn = NewReturnBlock;
390651 // only split block when necessary:
391652 PHINode *FirstPhi = getFirstPHI(PreReturn);
392 unsigned NumPredsFromEntries = OutliningInfo->ReturnBlockPreds.size();
653 unsigned NumPredsFromEntries = OI->ReturnBlockPreds.size();
393654 if (FirstPhi && FirstPhi->getNumIncomingValues() > NumPredsFromEntries + 1) {
394655
395656 NewReturnBlock = NewReturnBlock->splitBasicBlock(
407668 Ins = NewReturnBlock->getFirstNonPHI();
408669
409670 RetPhi->addIncoming(&*I, PreReturn);
410 for (BasicBlock *E : OutliningInfo->ReturnBlockPreds) {
671 for (BasicBlock *E : OI->ReturnBlockPreds) {
411672 BasicBlock *NewE = cast(VMap[E]);
412673 RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(NewE), NewE);
413674 OldPhi->removeIncomingValue(NewE);
414675 }
415676 ++I;
416677 }
417 for (auto E : OutliningInfo->ReturnBlockPreds) {
678 for (auto E : OI->ReturnBlockPreds) {
418679 BasicBlock *NewE = cast(VMap[E]);
419680 NewE->getTerminator()->replaceUsesOfWith(PreReturn, NewReturnBlock);
420681 }
442703 BlockFrequencyInfo BFI(*DuplicateFunction, BPI, LI);
443704
444705 // Extract the body of the if.
445 Function *ExtractedFunction =
706 Function *OutlinedFunction =
446707 CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false, &BFI, &BPI)
447708 .extractCodeRegion();
448709
449 // Inline the top-level if test into all callers.
450 std::vector Users(DuplicateFunction->user_begin(),
451 DuplicateFunction->user_end());
452
453 for (User *User : Users) {
454 CallSite CS;
455 if (CallInst *CI = dyn_cast(User))
456 CS = CallSite(CI);
457 else if (InvokeInst *II = dyn_cast(User))
458 CS = CallSite(II);
459 else
460 llvm_unreachable("All uses must be calls");
461
462 if (IsLimitReached())
463 continue;
464
465 OptimizationRemarkEmitter ORE(CS.getCaller());
466 if (!shouldPartialInline(CS, ORE))
467 continue;
468
469 DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
470 BasicBlock *Block = CS.getParent();
471 ORE.emit(OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", DLoc, Block)
472 << ore::NV("Callee", F) << " partially inlined into "
473 << ore::NV("Caller", CS.getCaller()));
474
475 InlineFunctionInfo IFI(nullptr, GetAssumptionCache, PSI);
476 InlineFunction(CS, IFI);
477 NumPartialInlining++;
478 // update stats
479 NumPartialInlined++;
480 }
710 bool AnyInline =
711 tryPartialInline(DuplicateFunction, F, OI.get(), OutlinedFunction, &BFI);
481712
482713 // Ditch the duplicate, since we're done with it, and rewrite all remaining
483714 // users (function pointers, etc.) back to the original function.
484715 DuplicateFunction->replaceAllUsesWith(F);
485716 DuplicateFunction->eraseFromParent();
486
487
488 return ExtractedFunction;
717 if (!AnyInline && OutlinedFunction)
718 OutlinedFunction->eraseFromParent();
719 return OutlinedFunction;
720 }
721
722 bool PartialInlinerImpl::tryPartialInline(Function *DuplicateFunction,
723 Function *F,
724 FunctionOutliningInfo *OI,
725 Function *OutlinedFunction,
726 BlockFrequencyInfo *CalleeBFI) {
727 if (OutlinedFunction == nullptr)
728 return false;
729
730 int NonWeightedRcost;
731 int SizeCost;
732 int OutlinedRegionSizeCost;
733
734 auto OutliningCallBB =
735 getOneCallSiteTo(OutlinedFunction).getInstruction()->getParent();
736
737 std::tie(SizeCost, NonWeightedRcost, OutlinedRegionSizeCost) =
738 computeOutliningCosts(F, OI, OutlinedFunction, OutliningCallBB);
739
740 // The call sequence to the outlined function is larger than the original
741 // outlined region size, it does not increase the chances of inlining
742 // 'F' with outlining (The inliner usies the size increase to model the
743 // the cost of inlining a callee).
744 if (!SkipCostAnalysis && OutlinedRegionSizeCost < SizeCost) {
745 OptimizationRemarkEmitter ORE(F);
746 DebugLoc DLoc;
747 BasicBlock *Block;
748 std::tie(DLoc, Block) = getOneDebugLoc(DuplicateFunction);
749 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall",
750 DLoc, Block)
751 << ore::NV("Function", F)
752 << " not partially inlined into callers (Original Size = "
753 << ore::NV("OutlinedRegionOriginalSize", OutlinedRegionSizeCost)
754 << ", Size of call sequence to outlined function = "
755 << ore::NV("NewSize", SizeCost) << ")");
756 return false;
757 }
758
759 assert(F->user_begin() == F->user_end() &&
760 "F's users should all be replaced!");
761 std::vector Users(DuplicateFunction->user_begin(),
762 DuplicateFunction->user_end());
763
764 DenseMap CallSiteToProfCountMap;
765 if (F->getEntryCount())
766 computeCallsiteToProfCountMap(DuplicateFunction, CallSiteToProfCountMap);
767
768 auto CalleeEntryCount = F->getEntryCount();
769 uint64_t CalleeEntryCountV = (CalleeEntryCount ? *CalleeEntryCount : 0);
770 bool AnyInline = false;
771 for (User *User : Users) {
772 CallSite CS = getCallSite(User);
773
774 if (IsLimitReached())
775 continue;
776
777 OptimizationRemarkEmitter ORE(CS.getCaller());
778
779 if (!shouldPartialInline(CS, F, OI, CalleeBFI, OutliningCallBB,
780 NonWeightedRcost, ORE))
781 continue;
782
783 ORE.emit(
784 OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", CS.getInstruction())
785 << ore::NV("Callee", F) << " partially inlined into "
786 << ore::NV("Caller", CS.getCaller()));
787
788 InlineFunctionInfo IFI(nullptr, GetAssumptionCache, PSI);
789 InlineFunction(CS, IFI);
790
791 // Now update the entry count:
792 if (CalleeEntryCountV && CallSiteToProfCountMap.count(User)) {
793 uint64_t CallSiteCount = CallSiteToProfCountMap[User];
794 CalleeEntryCountV -= std::min(CalleeEntryCountV, CallSiteCount);
795 }
796
797 AnyInline = true;
798 NumPartialInlining++;
799 // Update the stats
800 NumPartialInlined++;
801 }
802
803 if (AnyInline && CalleeEntryCount)
804 F->setEntryCount(CalleeEntryCountV);
805
806 return AnyInline;
489807 }
490808
491809 bool PartialInlinerImpl::run(Module &M) {
None ; RUN: opt < %s -partial-inliner -S | FileCheck %s
0 ; RUN: opt < %s -partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck %s
11
22 ; This test checks to make sure that the CodeExtractor
33 ; properly sets the entry count for the function that is
None ; RUN: opt < %s -partial-inliner -max-num-inline-blocks=2 -S | FileCheck %s
0 ; RUN: opt < %s -partial-inliner -max-num-inline-blocks=2 -skip-partial-inlining-cost-analysis -S | FileCheck %s
11
22 ; This test checks to make sure that CodeExtractor updates
33 ; the exit branch probabilities for multiple exit blocks.
0 ; RUN: opt < %s -partial-inliner -S | FileCheck %s
11 ; RUN: opt < %s -passes=partial-inliner -S | FileCheck %s
2 ; RUN: opt < %s -partial-inliner -max-num-inline-blocks=2 -S | FileCheck --check-prefix=LIMIT %s
3 ; RUN: opt < %s -passes=partial-inliner -max-num-inline-blocks=2 -S | FileCheck --check-prefix=LIMIT %s
2 ; RUN: opt < %s -partial-inliner -skip-partial-inlining-cost-analysis -max-num-inline-blocks=2 -S | FileCheck --check-prefix=LIMIT %s
3 ; RUN: opt < %s -passes=partial-inliner -skip-partial-inlining-cost-analysis -max-num-inline-blocks=2 -S | FileCheck --check-prefix=LIMIT %s
44
55 ; Function Attrs: nounwind uwtable
66 define i32 @bar(i32 %arg) local_unnamed_addr #0 {
0 ; RUN: opt < %s -skip-partial-inlining-cost-analysis -partial-inliner -S | FileCheck %s
1 ; RUN: opt < %s -skip-partial-inlining-cost-analysis -passes=partial-inliner -S | FileCheck %s
2
3 define i32 @Func(i1 %cond, i32* align 4 %align.val) !prof !1 {
4 ; CHECK: @Func({{.*}}) !prof [[REMAINCOUNT:![0-9]+]]
5 entry:
6 br i1 %cond, label %if.then, label %return
7 if.then:
8 ; Dummy store to have more than 0 uses
9 store i32 10, i32* %align.val, align 4
10 br label %return
11 return: ; preds = %entry
12 ret i32 0
13 }
14
15 define internal i32 @Caller1(i1 %cond, i32* align 2 %align.val) !prof !3{
16 entry:
17 ; CHECK-LABEL: @Caller1
18 ; CHECK: br
19 ; CHECK: call void @Func.1_
20 ; CHECK: br
21 ; CHECK: call void @Func.1_
22 %val = call i32 @Func(i1 %cond, i32* %align.val)
23 %val2 = call i32 @Func(i1 %cond, i32* %align.val)
24 ret i32 %val
25 }
26
27 define internal i32 @Caller2(i1 %cond, i32* align 2 %align.val) !prof !2{
28 entry:
29 ; CHECK-LABEL: @Caller2
30 ; CHECK: br
31 ; CHECK: call void @Func.1_
32 %val = call i32 @Func(i1 %cond, i32* %align.val)
33 ret i32 %val
34 }
35
36 ; CHECK: [[REMAINCOUNT]] = !{!"function_entry_count", i64 150}
37 !1 = !{!"function_entry_count", i64 200}
38 !2 = !{!"function_entry_count", i64 10}
39 !3 = !{!"function_entry_count", i64 20}
40
0 ; The outlined region has high frequency and the outlining
1 ; call sequence is expensive (input, output, multiple exit etc)
2 ; RUN: opt < %s -partial-inliner -max-num-inline-blocks=2 -S | FileCheck %s
3 ; RUN: opt < %s -passes=partial-inliner -max-num-inline-blocks=2 -S | FileCheck %s
4 ; RUN: opt < %s -partial-inliner -skip-partial-inlining-cost-analysis -max-num-inline-blocks=2 -S | FileCheck --check-prefix=NOCOST %s
5 ; RUN: opt < %s -passes=partial-inliner -skip-partial-inlining-cost-analysis -max-num-inline-blocks=2 -S | FileCheck --check-prefix=NOCOST %s
6
7
8 ; Function Attrs: nounwind
9 define i32 @bar_hot_outline_region(i32 %arg) local_unnamed_addr #0 {
10 bb:
11 %tmp = icmp slt i32 %arg, 0
12 br i1 %tmp, label %bb1, label %bb16, !prof !1
13
14 bb1: ; preds = %bb
15 %tmp2 = tail call i32 (...) @foo() #0
16 %tmp3 = tail call i32 (...) @foo() #0
17 %tmp4 = tail call i32 (...) @foo() #0
18 %tmp5 = tail call i32 (...) @foo() #0
19 %tmp6 = tail call i32 (...) @foo() #0
20 %tmp7 = tail call i32 (...) @foo() #0
21 %tmp8 = add nsw i32 %arg, 1
22 %tmp9 = tail call i32 @goo(i32 %tmp8) #0
23 %tmp10 = tail call i32 (...) @foo() #0
24 %tmp11 = icmp eq i32 %tmp10, 0
25 br i1 %tmp11, label %bb12, label %bb16
26
27 bb12: ; preds = %bb1
28 %tmp13 = tail call i32 (...) @foo() #0
29 %tmp14 = icmp eq i32 %tmp13, 0
30 %tmp15 = select i1 %tmp14, i32 0, i32 3
31 br label %bb16
32
33 bb16: ; preds = %bb12, %bb1, %bb
34 %tmp17 = phi i32 [ 2, %bb1 ], [ %tmp15, %bb12 ], [ 0, %bb ]
35 ret i32 %tmp17
36 }
37
38 define i32 @bar_cold_outline_region(i32 %arg) local_unnamed_addr #0 {
39 bb:
40 %tmp = icmp slt i32 %arg, 0
41 br i1 %tmp, label %bb1, label %bb16, !prof !2
42
43 bb1: ; preds = %bb
44 %tmp2 = tail call i32 (...) @foo() #0
45 %tmp3 = tail call i32 (...) @foo() #0
46 %tmp4 = tail call i32 (...) @foo() #0
47 %tmp5 = tail call i32 (...) @foo() #0
48 %tmp6 = tail call i32 (...) @foo() #0
49 %tmp7 = tail call i32 (...) @foo() #0
50 %tmp8 = add nsw i32 %arg, 1
51 %tmp9 = tail call i32 @goo(i32 %tmp8) #0
52 %tmp10 = tail call i32 (...) @foo() #0
53 %tmp11 = icmp eq i32 %tmp10, 0
54 br i1 %tmp11, label %bb12, label %bb16
55
56 bb12: ; preds = %bb1
57 %tmp13 = tail call i32 (...) @foo() #0
58 %tmp14 = icmp eq i32 %tmp13, 0
59 %tmp15 = select i1 %tmp14, i32 0, i32 3
60 br label %bb16
61
62 bb16: ; preds = %bb12, %bb1, %bb
63 %tmp17 = phi i32 [ 2, %bb1 ], [ %tmp15, %bb12 ], [ 0, %bb ]
64 ret i32 %tmp17
65 }
66
67 ; Function Attrs: nounwind
68 declare i32 @foo(...) local_unnamed_addr #0
69
70 ; Function Attrs: nounwind
71 declare i32 @goo(i32) local_unnamed_addr #0
72
73 ; Function Attrs: nounwind
74 define i32 @dummy_caller(i32 %arg) local_unnamed_addr #0 {
75 bb:
76 ; CHECK-LABEL: @dummy_caller
77 ; CHECK-NOT: br i1
78 ; CHECK-NOT: call{{.*}}bar_hot_outline_region.
79 ; NOCOST-LABEL: @dummy_caller
80 ; NOCOST: br i1
81 ; NOCOST: call{{.*}}bar_hot_outline_region.
82
83 %tmp = tail call i32 @bar_hot_outline_region(i32 %arg)
84 ret i32 %tmp
85 }
86
87 define i32 @dummy_caller2(i32 %arg) local_unnamed_addr #0 {
88 bb:
89 ; CHECK-LABEL: @dummy_caller2
90 ; CHECK: br i1
91 ; CHECK: call{{.*}}bar_cold_outline_region.
92 ; NOCOST-LABEL: @dummy_caller2
93 ; NOCOST: br i1
94 ; NOCOST: call{{.*}}bar_cold_outline_region.
95
96 %tmp = tail call i32 @bar_cold_outline_region(i32 %arg)
97 ret i32 %tmp
98 }
99
100 attributes #0 = { nounwind }
101
102 !llvm.ident = !{!0}
103
104 !0 = !{!"clang version 5.0.0 (trunk 301898)"}
105 !1 = !{!"branch_weights", i32 2000, i32 1}
106 !2 = !{!"branch_weights", i32 1, i32 100}
None ; RUN: opt < %s -partial-inliner -S | FileCheck %s
1 ; RUN: opt < %s -passes=partial-inliner -S | FileCheck %s
0 ; RUN: opt < %s -partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck %s
1 ; RUN: opt < %s -passes=partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck %s
22 ; RUN: opt < %s -partial-inliner -max-num-inline-blocks=2 -S | FileCheck --check-prefix=LIMIT %s
33 ; RUN: opt < %s -passes=partial-inliner -max-num-inline-blocks=2 -S | FileCheck --check-prefix=LIMIT %s
44
0 ; RUN: opt < %s -partial-inliner -S | FileCheck %s
11 ; RUN: opt < %s -passes=partial-inliner -S | FileCheck %s
2 ; RUN: opt < %s -partial-inliner -max-num-inline-blocks=3 -S | FileCheck --check-prefix=LIMIT3 %s
3 ; RUN: opt < %s -passes=partial-inliner -max-num-inline-blocks=3 -S | FileCheck --check-prefix=LIMIT3 %s
2 ; RUN: opt < %s -partial-inliner -max-num-inline-blocks=3 -skip-partial-inlining-cost-analysis -S | FileCheck --check-prefix=LIMIT3 %s
3 ; RUN: opt < %s -passes=partial-inliner -max-num-inline-blocks=3 -skip-partial-inlining-cost-analysis -S | FileCheck --check-prefix=LIMIT3 %s
44 ; RUN: opt < %s -partial-inliner -max-num-inline-blocks=2 -S | FileCheck --check-prefix=LIMIT2 %s
55 ; RUN: opt < %s -passes=partial-inliner -max-num-inline-blocks=2 -S | FileCheck --check-prefix=LIMIT2 %s
66
None ; RUN: opt < %s -partial-inliner -S | FileCheck %s
1 ; RUN: opt < %s -passes=partial-inliner -S | FileCheck %s
0 ; RUN: opt < %s -skip-partial-inlining-cost-analysis -partial-inliner -S | FileCheck %s
1 ; RUN: opt < %s -skip-partial-inlining-cost-analysis -passes=partial-inliner -S | FileCheck %s
22
33 define internal i32 @inlinedFunc(i1 %cond, i32* align 4 %align.val) {
44 entry:
None ; RUN: opt < %s -partial-inliner | llc -filetype=null
1 ; RUN: opt < %s -partial-inliner -S | FileCheck %s
0 ; RUN: opt < %s -partial-inliner -skip-partial-inlining-cost-analysis | llc -filetype=null
1 ; RUN: opt < %s -partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck %s
22 ; This testcase checks to see if CodeExtractor properly inherits
33 ; target specific attributes for the extracted function. This can
44 ; cause certain instructions that depend on the attributes to not