llvm.org GIT mirror llvm / 0e6bdca
[PartialInliner] Inline vararg functions that forward varargs. Summary: This patch extends the partial inliner to support inlining parts of vararg functions, if the vararg handling is done in the outlined part. It adds a `ForwardVarArgsTo` argument to InlineFunction. If it is non-null, all varargs passed to the inlined function will be added to all calls to `ForwardVarArgsTo`. The partial inliner takes care to only pass `ForwardVarArgsTo` if the varargs handing is done in the outlined function. It checks that vastart is not part of the function to be inlined. `test/Transforms/CodeExtractor/PartialInlineNoInline.ll` (already part of the repo) checks we do not do partial inlining if vastart is used in a basic block that will be inlined. Reviewers: davide, davidxl, grosser Reviewed By: davide, davidxl, grosser Subscribers: gyiu, grosser, eraman, llvm-commits Differential Revision: https://reviews.llvm.org/D39607 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@318028 91177308-0d34-0410-b5e6-96231b3b80d8 Florian Hahn 1 year, 10 months ago
6 changed file(s) with 176 addition(s) and 29 deletion(s). Raw diff Collapse all Expand all
226226 /// *inlined* code to minimize the actual inserted code, it must not delete
227227 /// code in the caller as users of this routine may have pointers to
228228 /// instructions in the caller that need to remain stable.
229 ///
230 /// If ForwardVarArgsTo is passed, inlining a function with varargs is allowed
231 /// and all varargs at the callsite will be passed to any calls to
232 /// ForwardVarArgsTo. The caller of InlineFunction has to make sure any varargs
233 /// are only used by ForwardVarArgsTo.
229234 bool InlineFunction(CallInst *C, InlineFunctionInfo &IFI,
230235 AAResults *CalleeAAR = nullptr, bool InsertLifetime = true);
231236 bool InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI,
232237 AAResults *CalleeAAR = nullptr, bool InsertLifetime = true);
233238 bool InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
234 AAResults *CalleeAAR = nullptr, bool InsertLifetime = true);
239 AAResults *CalleeAAR = nullptr, bool InsertLifetime = true,
240 Function *ForwardVarArgsTo = nullptr);
235241
236242 /// \brief Clones a loop \p OrigLoop. Returns the loop and the blocks in \p
237243 /// Blocks.
5555 BlockFrequencyInfo *BFI;
5656 BranchProbabilityInfo *BPI;
5757
58 // If true, varargs functions can be extracted.
59 bool AllowVarArgs;
60
5861 // Bits of intermediate state computed at various phases of extraction.
5962 SetVector Blocks;
6063 unsigned NumExitBlocks = std::numeric_limits::max();
6669 /// Given a sequence of basic blocks where the first block in the sequence
6770 /// dominates the rest, prepare a code extractor object for pulling this
6871 /// sequence out into its new function. When a DominatorTree is also given,
69 /// extra checking and transformations are enabled.
72 /// extra checking and transformations are enabled. If AllowVarArgs is true,
73 /// vararg functions can be extracted. This is safe, if all vararg handling
74 /// code is extracted, including vastart.
7075 CodeExtractor(ArrayRef BBs, DominatorTree *DT = nullptr,
7176 bool AggregateArgs = false, BlockFrequencyInfo *BFI = nullptr,
72 BranchProbabilityInfo *BPI = nullptr);
77 BranchProbabilityInfo *BPI = nullptr,
78 bool AllowVarArgs = false);
7379
7480 /// \brief Create a code extractor for a loop body.
7581 ///
8187
8288 /// \brief Check to see if a block is valid for extraction.
8389 ///
84 /// Blocks containing EHPads, allocas, invokes, or vastarts are not valid.
85 static bool isBlockValidForExtraction(const BasicBlock &BB);
90 /// Blocks containing EHPads, allocas and invokes are not valid. If
91 /// AllowVarArgs is true, blocks with vastart can be extracted. This is
92 /// safe, if all vararg handling code is extracted, including vastart.
93 static bool isBlockValidForExtraction(const BasicBlock &BB,
94 bool AllowVarArgs);
8695
8796 /// \brief Perform the extraction, returning the new function.
8897 ///
148148 // the return block.
149149 void NormalizeReturnBlock();
150150
151 // Do function outlining:
151 // Do function outlining.
152 // NOTE: For vararg functions that do the vararg handling in the outlined
153 // function, we temporarily generate IR that does not properly
154 // forward varargs to the outlined function. Calling InlineFunction
155 // will update calls to the outlined functions to properly forward
156 // the varargs.
152157 Function *doFunctionOutlining();
153158
154159 Function *OrigFunc = nullptr;
812817
813818 // Extract the body of the if.
814819 OutlinedFunc = CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false,
815 ClonedFuncBFI.get(), &BPI)
820 ClonedFuncBFI.get(), &BPI,
821 /* AllowVarargs */ true)
816822 .extractCodeRegion();
817823
818824 if (OutlinedFunc) {
937943 << ore::NV("Caller", CS.getCaller());
938944
939945 InlineFunctionInfo IFI(nullptr, GetAssumptionCache, PSI);
940 if (!InlineFunction(CS, IFI))
946 if (!InlineFunction(CS, IFI, nullptr, true, Cloner.OutlinedFunc))
941947 continue;
942948
943949 ORE.emit(OR);
7777 cl::desc("Aggregate arguments to code-extracted functions"));
7878
7979 /// \brief Test whether a block is valid for extraction.
80 bool CodeExtractor::isBlockValidForExtraction(const BasicBlock &BB) {
80 bool CodeExtractor::isBlockValidForExtraction(const BasicBlock &BB,
81 bool AllowVarArgs) {
8182 // Landing pads must be in the function where they were inserted for cleanup.
8283 if (BB.isEHPad())
8384 return false;
109110 }
110111 }
111112
112 // Don't hoist code containing allocas, invokes, or vastarts.
113 // Don't hoist code containing allocas or invokes. If explicitly requested,
114 // allow vastart.
113115 for (BasicBlock::const_iterator I = BB.begin(), E = BB.end(); I != E; ++I) {
114116 if (isa(I) || isa(I))
115117 return false;
116118 if (const CallInst *CI = dyn_cast(I))
117119 if (const Function *F = CI->getCalledFunction())
118 if (F->getIntrinsicID() == Intrinsic::vastart)
119 return false;
120 if (F->getIntrinsicID() == Intrinsic::vastart) {
121 if (AllowVarArgs)
122 continue;
123 else
124 return false;
125 }
120126 }
121127
122128 return true;
124130
125131 /// \brief Build a set of blocks to extract if the input blocks are viable.
126132 static SetVector
127 buildExtractionBlockSet(ArrayRef BBs, DominatorTree *DT) {
133 buildExtractionBlockSet(ArrayRef BBs, DominatorTree *DT,
134 bool AllowVarArgs) {
128135 assert(!BBs.empty() && "The set of blocks to extract must be non-empty");
129136 SetVector Result;
130137
137144
138145 if (!Result.insert(BB))
139146 llvm_unreachable("Repeated basic blocks in extraction input");
140 if (!CodeExtractor::isBlockValidForExtraction(*BB)) {
147 if (!CodeExtractor::isBlockValidForExtraction(*BB, AllowVarArgs)) {
141148 Result.clear();
142149 return Result;
143150 }
159166
160167 CodeExtractor::CodeExtractor(ArrayRef BBs, DominatorTree *DT,
161168 bool AggregateArgs, BlockFrequencyInfo *BFI,
162 BranchProbabilityInfo *BPI)
169 BranchProbabilityInfo *BPI, bool AllowVarArgs)
163170 : DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
164 BPI(BPI), Blocks(buildExtractionBlockSet(BBs, DT)) {}
171 BPI(BPI), AllowVarArgs(AllowVarArgs),
172 Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs)) {}
165173
166174 CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs,
167175 BlockFrequencyInfo *BFI,
168176 BranchProbabilityInfo *BPI)
169177 : DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
170 BPI(BPI), Blocks(buildExtractionBlockSet(L.getBlocks(), &DT)) {}
178 BPI(BPI), Blocks(buildExtractionBlockSet(L.getBlocks(), &DT,
179 /* AllowVarArgs */ false)) {}
171180
172181 /// definedInRegion - Return true if the specified value is defined in the
173182 /// extracted region.
593602 paramTy.push_back(PointerType::getUnqual(StructTy));
594603 }
595604 FunctionType *funcType =
596 FunctionType::get(RetTy, paramTy, false);
605 FunctionType::get(RetTy, paramTy,
606 AllowVarArgs && oldFunction->isVarArg());
597607
598608 // Create the new function
599609 Function *newFunction = Function::Create(funcType,
956966 if (!isEligible())
957967 return nullptr;
958968
959 ValueSet inputs, outputs, SinkingCands, HoistingCands;
960 BasicBlock *CommonExit = nullptr;
961
962969 // Assumption: this is a single-entry code region, and the header is the first
963970 // block in the region.
964971 BasicBlock *header = *Blocks.begin();
972 Function *oldFunction = header->getParent();
973
974 // For functions with varargs, check that varargs handling is only done in the
975 // outlined function, i.e vastart and vaend are only used in outlined blocks.
976 if (AllowVarArgs && oldFunction->getFunctionType()->isVarArg()) {
977 auto containsVarArgIntrinsic = [](Instruction &I) {
978 if (const CallInst *CI = dyn_cast(&I))
979 if (const Function *F = CI->getCalledFunction())
980 return F->getIntrinsicID() == Intrinsic::vastart ||
981 F->getIntrinsicID() == Intrinsic::vaend;
982 return false;
983 };
984
985 for (auto &BB : *oldFunction) {
986 if (Blocks.count(&BB))
987 continue;
988 if (llvm::any_of(BB, containsVarArgIntrinsic))
989 return nullptr;
990 }
991 }
992 ValueSet inputs, outputs, SinkingCands, HoistingCands;
993 BasicBlock *CommonExit = nullptr;
965994
966995 // Calculate the entry frequency of the new function before we change the root
967996 // block.
9821011 // If we have any return instructions in the region, split those blocks so
9831012 // that the return is not in the region.
9841013 splitReturnBlocks();
985
986 Function *oldFunction = header->getParent();
9871014
9881015 // This takes place of the original loop
9891016 BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(),
14891489 /// exists in the instruction stream. Similarly this will inline a recursive
14901490 /// function by one level.
14911491 bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
1492 AAResults *CalleeAAR, bool InsertLifetime) {
1492 AAResults *CalleeAAR, bool InsertLifetime,
1493 Function *ForwardVarArgsTo) {
14931494 Instruction *TheCall = CS.getInstruction();
14941495 assert(TheCall->getParent() && TheCall->getFunction()
14951496 && "Instruction not in function!");
14991500
15001501 Function *CalledFunc = CS.getCalledFunction();
15011502 if (!CalledFunc || // Can't inline external function or indirect
1502 CalledFunc->isDeclaration() || // call, or call to a vararg function!
1503 CalledFunc->getFunctionType()->isVarArg()) return false;
1503 CalledFunc->isDeclaration() ||
1504 (!ForwardVarArgsTo && CalledFunc->isVarArg())) // call, or call to a vararg function!
1505 return false;
15041506
15051507 // The inliner does not know how to inline through calls with operand bundles
15061508 // in general ...
16271629
16281630 auto &DL = Caller->getParent()->getDataLayout();
16291631
1630 assert(CalledFunc->arg_size() == CS.arg_size() &&
1631 "No varargs calls can be inlined!");
1632 assert((CalledFunc->arg_size() == CS.arg_size() || ForwardVarArgsTo) &&
1633 "Varargs calls can only be inlined if the Varargs are forwarded!");
16321634
16331635 // Calculate the vector of arguments to pass into the function cloner, which
16341636 // matches up the formal to the actual argument values.
18101812 replaceDbgDeclareForAlloca(AI, AI, DIB, /*Deref=*/false);
18111813 }
18121814
1815 SmallVector VarArgsToForward;
1816 for (unsigned i = CalledFunc->getFunctionType()->getNumParams();
1817 i < CS.getNumArgOperands(); i++)
1818 VarArgsToForward.push_back(CS.getArgOperand(i));
1819
18131820 bool InlinedMustTailCalls = false, InlinedDeoptimizeCalls = false;
18141821 if (InlinedFunctionInfo.ContainsCalls) {
18151822 CallInst::TailCallKind CallSiteTailKind = CallInst::TCK_None;
18181825
18191826 for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E;
18201827 ++BB) {
1821 for (Instruction &I : *BB) {
1828 for (auto II = BB->begin(); II != BB->end();) {
1829 Instruction &I = *II++;
18221830 CallInst *CI = dyn_cast(&I);
18231831 if (!CI)
18241832 continue;
18491857 // 'nounwind'.
18501858 if (MarkNoUnwind)
18511859 CI->setDoesNotThrow();
1860
1861 if (ForwardVarArgsTo && CI->getCalledFunction() == ForwardVarArgsTo) {
1862 SmallVector Params(CI->arg_operands());
1863 Params.append(VarArgsToForward.begin(), VarArgsToForward.end());
1864 CallInst *Call = CallInst::Create(CI->getCalledFunction(), Params, "", CI);
1865 CI->replaceAllUsesWith(Call);
1866 CI->eraseFromParent();
1867 }
18521868 }
18531869 }
18541870 }
0 ; RUN: opt < %s -partial-inliner -S -skip-partial-inlining-cost-analysis | FileCheck %s
1 ; RUN: opt < %s -passes=partial-inliner -S -skip-partial-inlining-cost-analysis | FileCheck %s
2
3 @stat = external global i32, align 4
4
5 define i32 @vararg(i32 %count, ...) {
6 entry:
7 %vargs = alloca i8*, align 8
8 %stat1 = load i32, i32* @stat, align 4
9 %cmp = icmp slt i32 %stat1, 0
10 br i1 %cmp, label %bb2, label %bb1
11
12 bb1: ; preds = %entry
13 %vg1 = add nsw i32 %stat1, 1
14 store i32 %vg1, i32* @stat, align 4
15 %vargs1 = bitcast i8** %vargs to i8*
16 call void @llvm.va_start(i8* %vargs1)
17 %va1 = va_arg i8** %vargs, i32
18 call void @foo(i32 %count, i32 %va1) #2
19 call void @llvm.va_end(i8* %vargs1)
20 br label %bb2
21
22 bb2: ; preds = %bb1, %entry
23 %res = phi i32 [ 1, %bb1 ], [ 0, %entry ]
24 ret i32 %res
25 }
26
27 declare void @foo(i32, i32)
28 declare void @llvm.va_start(i8*)
29 declare void @llvm.va_end(i8*)
30
31 define i32 @caller1(i32 %arg) {
32 bb:
33 %tmp = tail call i32 (i32, ...) @vararg(i32 %arg)
34 ret i32 %tmp
35 }
36 ; CHECK-LABEL: @caller1
37 ; CHECK: codeRepl.i:
38 ; CHECK-NEXT: call void (i32, i8**, i32, ...) @vararg.2_bb1(i32 %stat1.i, i8** %vargs.i, i32 %arg)
39
40 define i32 @caller2(i32 %arg, float %arg2) {
41 bb:
42 %tmp = tail call i32 (i32, ...) @vararg(i32 %arg, i32 10, float %arg2)
43 ret i32 %tmp
44 }
45
46 ; CHECK-LABEL: @caller2
47 ; CHECK: codeRepl.i:
48 ; CHECK-NEXT: call void (i32, i8**, i32, ...) @vararg.2_bb1(i32 %stat1.i, i8** %vargs.i, i32 %arg, i32 10, float %arg2)
49
50 ; Test case to check that we do not extract a vararg function, if va_end is in
51 ; a block that is not outlined.
52 define i32 @vararg_not_legal(i32 %count, ...) {
53 entry:
54 %vargs = alloca i8*, align 8
55 %vargs0 = bitcast i8** %vargs to i8*
56 %stat1 = load i32, i32* @stat, align 4
57 %cmp = icmp slt i32 %stat1, 0
58 br i1 %cmp, label %bb2, label %bb1
59
60 bb1: ; preds = %entry
61 %vg1 = add nsw i32 %stat1, 1
62 store i32 %vg1, i32* @stat, align 4
63 %vargs1 = bitcast i8** %vargs to i8*
64 call void @llvm.va_start(i8* %vargs1)
65 %va1 = va_arg i8** %vargs, i32
66 call void @foo(i32 %count, i32 %va1)
67 br label %bb2
68
69 bb2: ; preds = %bb1, %entry
70 %res = phi i32 [ 1, %bb1 ], [ 0, %entry ]
71 %ptr = phi i8* [ %vargs1, %bb1 ], [ %vargs0, %entry]
72 call void @llvm.va_end(i8* %ptr)
73 ret i32 %res
74 }
75
76 ; CHECK-LABEL: @caller3
77 ; CHECK: tail call i32 (i32, ...) @vararg_not_legal(i32 %arg, i32 %arg)
78 define i32 @caller3(i32 %arg) {
79 bb:
80 %res = tail call i32 (i32, ...) @vararg_not_legal(i32 %arg, i32 %arg)
81 ret i32 %res
82 }