llvm.org GIT mirror llvm / 118cef0
[PartialInlining] Minor cost anaysis tuning Also added a test option and 2 cost analysis related tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@304599 91177308-0d34-0410-b5e6-96231b3b80d8 Xinliang David Li 2 years ago
3 changed file(s) with 161 addition(s) and 9 deletion(s). Raw diff Collapse all Expand all
6767 cl::desc("Relative frequency of outline region to "
6868 "the entry block"));
6969
70 static cl::opt ExtraOutliningPenalty(
71 "partial-inlining-extra-penalty", cl::init(0), cl::Hidden,
72 cl::desc("A debug option to add additional penalty to the computed one."));
73
7074 namespace {
7175
7276 struct FunctionOutliningInfo {
8286 SmallVector Entries;
8387 // The return block that is not included in the outlined region.
8488 BasicBlock *ReturnBlock;
85 // The dominating block of the region ot be outlined.
89 // The dominating block of the region to be outlined.
8690 BasicBlock *NonReturnBlock;
8791 // The set of blocks in Entries that that are predecessors to ReturnBlock
8892 SmallVector ReturnBlockPreds;
406410 if (hasProfileData(F, OI))
407411 return OutlineRegionRelFreq;
408412
409 // When profile data is not available, we need to be very
410 // conservative in estimating the overall savings. We need to make sure
411 // the outline region relative frequency is not below the threshold
412 // specified by the option.
413 OutlineRegionRelFreq = std::max(OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100));
413 // When profile data is not available, we need to be conservative in
414 // estimating the overall savings. Static branch prediction can usually
415 // guess the branch direction right (taken/non-taken), but the guessed
416 // branch probability is usually not biased enough. In case when the
417 // outlined region is predicted to be likely, its probability needs
418 // to be made higher (more biased) to not under-estimate the cost of
419 // function outlining. On the other hand, if the outlined region
420 // is predicted to be less likely, the predicted probablity is usually
421 // higher than the actual. For instance, the actual probability of the
422 // less likely target is only 5%, but the guessed probablity can be
423 // 40%. In the latter case, there is no need for further adjustement.
424 // FIXME: add an option for this.
425 if (OutlineRegionRelFreq < BranchProbability(45, 100))
426 return OutlineRegionRelFreq;
427
428 OutlineRegionRelFreq = std::max(
429 OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100));
414430
415431 return OutlineRegionRelFreq;
416432 }
495511 if (isa(I))
496512 continue;
497513
514 switch (I->getOpcode()) {
515 case Instruction::BitCast:
516 case Instruction::PtrToInt:
517 case Instruction::IntToPtr:
518 case Instruction::Alloca:
519 continue;
520 case Instruction::GetElementPtr:
521 if (cast(I)->hasAllZeroIndices())
522 continue;
523 default:
524 break;
525 }
526
527 IntrinsicInst *IntrInst = dyn_cast(I);
528 if (IntrInst) {
529 if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start ||
530 IntrInst->getIntrinsicID() == Intrinsic::lifetime_end)
531 continue;
532 }
533
498534 if (CallInst *CI = dyn_cast(I)) {
499535 InlineCost += getCallsiteCost(CallSite(CI), DL);
500536 continue;
518554 Function *F, const FunctionOutliningInfo *OI, Function *OutlinedFunction,
519555 BasicBlock *OutliningCallBB) {
520556 // First compute the cost of the outlined region 'OI' in the original
521 // function 'F':
557 // function 'F'.
558 // FIXME: The code extractor (outliner) can now do code sinking/hoisting
559 // to reduce outlining cost. The hoisted/sunk code currently do not
560 // incur any runtime cost so it is still OK to compare the outlined
561 // function cost with the outlined region in the original function.
562 // If this ever changes, we will need to introduce new extractor api
563 // to pass the information.
522564 int OutlinedRegionCost = 0;
523565 for (BasicBlock &BB : *F) {
524566 if (&BB != OI->ReturnBlock &&
538580 for (BasicBlock &BB : *OutlinedFunction) {
539581 OutlinedFunctionCost += computeBBInlineCost(&BB);
540582 }
583 // The code extractor introduces a new root and exit stub blocks with
584 // additional unconditional branches. Those branches will be eliminated
585 // later with bb layout. The cost should be adjusted accordingly:
586 OutlinedFunctionCost -= 2 * InlineConstants::InstrCost;
541587
542588 assert(OutlinedFunctionCost >= OutlinedRegionCost &&
543589 "Outlined function cost should be no less than the outlined region");
544 int OutliningRuntimeOverhead =
545 OutliningFuncCallCost + (OutlinedFunctionCost - OutlinedRegionCost);
590 int OutliningRuntimeOverhead = OutliningFuncCallCost +
591 (OutlinedFunctionCost - OutlinedRegionCost) +
592 ExtraOutliningPenalty;
546593
547594 return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead,
548595 OutlinedRegionCost);
0 ; RUN: opt -S < %s -partial-inliner -partial-inlining-extra-penalty=10 | FileCheck %s
1 ; RUN: opt -S < %s -passes=partial-inliner -partial-inlining-extra-penalty=10 | FileCheck %s
2 define i32 @outline_region_notlikely(i32* %arg) local_unnamed_addr {
3 bb:
4 ; ptr != null is predicted to be true
5 %tmp = icmp ne i32* %arg, null
6 br i1 %tmp, label %bb8, label %bb1
7
8 ; bb1 is not likely
9 bb1: ; preds = %bb
10 %tmp2 = tail call i32 @foo(i32* nonnull %arg)
11 %tmp3 = tail call i32 @foo(i32* nonnull %arg)
12 %tmp4 = tail call i32 @foo(i32* nonnull %arg)
13 %tmp5 = tail call i32 @foo(i32* nonnull %arg)
14 %tmp6 = tail call i32 @foo(i32* nonnull %arg)
15 %tmp7 = tail call i32 @foo(i32* nonnull %arg)
16 br label %bb8
17
18 bb8: ; preds = %bb1, %bb
19 %tmp9 = phi i32 [ 0, %bb1 ], [ 1, %bb ]
20 ret i32 %tmp9
21 }
22
23 define i32 @outline_region_likely(i32* %arg) local_unnamed_addr {
24 bb:
25 ; ptr == null is predicted to be false
26 %tmp = icmp eq i32* %arg, null
27 br i1 %tmp, label %bb8, label %bb1
28
29 ; bb1 is likely
30 bb1: ; preds = %bb
31 %tmp2 = tail call i32 @foo(i32* nonnull %arg)
32 %tmp3 = tail call i32 @foo(i32* nonnull %arg)
33 %tmp4 = tail call i32 @foo(i32* nonnull %arg)
34 %tmp5 = tail call i32 @foo(i32* nonnull %arg)
35 %tmp6 = tail call i32 @foo(i32* nonnull %arg)
36 %tmp7 = tail call i32 @foo(i32* nonnull %arg)
37 br label %bb8
38
39 bb8: ; preds = %bb1, %bb
40 %tmp9 = phi i32 [ 0, %bb1 ], [ 1, %bb ]
41 ret i32 %tmp9
42 }
43
44 declare i32 @foo(i32* %arg)
45
46 define i32 @dummy_caller(i32* %arg) local_unnamed_addr {
47 ; CHECK-LABEL: @dummy_caller
48 %tmp = call i32 @outline_region_notlikely(i32* %arg)
49 ; CHECK: call void @outline_region_notlikely.2_bb1
50 %tmp2 = tail call i32 @outline_region_likely(i32* %arg)
51 ; CHECK: %tmp2 = tail call i32 @outline_region_likely(i32* %arg)
52 ret i32 %tmp
53
54 }
55
56 ; CHECK-LABEL: define internal void @outline_region_notlikely.2_bb1(i32* %arg) {
57 ; CHECK-NEXT: newFuncRoot:
58
59 !llvm.module.flags = !{!0}
60 !llvm.ident = !{!1}
61
62 !0 = !{i32 1, !"wchar_size", i32 4}
63 !1 = !{!"clang version 5.0.0 (trunk 304489)"}
0 ; RUN: opt -S < %s -partial-inliner -partial-inlining-extra-penalty=2000 | FileCheck %s
1 ; RUN: opt -S < %s -passes=partial-inliner -partial-inlining-extra-penalty=2000 | FileCheck %s
2 define i32 @outline_region_notlikely(i32* %arg) local_unnamed_addr {
3 bb:
4 ; ptr != null is predicted to be true
5 %tmp = icmp ne i32* %arg, null
6 br i1 %tmp, label %bb8, label %bb1, !prof !2
7
8 ; bb1 is not likely
9 bb1: ; preds = %bb
10 %tmp2 = tail call i32 @foo(i32* nonnull %arg)
11 %tmp3 = tail call i32 @foo(i32* nonnull %arg)
12 %tmp4 = tail call i32 @foo(i32* nonnull %arg)
13 %tmp5 = tail call i32 @foo(i32* nonnull %arg)
14 %tmp6 = tail call i32 @foo(i32* nonnull %arg)
15 %tmp7 = tail call i32 @foo(i32* nonnull %arg)
16 br label %bb8
17
18 bb8: ; preds = %bb1, %bb
19 %tmp9 = phi i32 [ 0, %bb1 ], [ 1, %bb ]
20 ret i32 %tmp9
21 }
22
23 define i32 @dummy_caller(i32* %arg) local_unnamed_addr {
24 ; CHECK-LABEL: @dummy_caller
25 %tmp = call i32 @outline_region_notlikely(i32* %arg)
26 ret i32 %tmp
27 }
28
29
30 ; CHECK-LABEL: define internal void @outline_region_notlikely.1_bb1(i32* %arg) {
31 ; CHECK-NEXT: newFuncRoot:
32
33 declare i32 @foo(i32 * %arg)
34
35 !llvm.module.flags = !{!0}
36 !llvm.ident = !{!1}
37
38 !0 = !{i32 1, !"wchar_size", i32 4}
39 !1 = !{!"clang version 5.0.0 (trunk 304489)"}
40 !2 = !{!"branch_weights", i32 2000, i32 1}