llvm.org GIT mirror llvm / 1564d12
[PGO] Promote indirect calls to conditional direct calls with value-profile This patch implements the transformation that promotes indirect calls to conditional direct calls when the indirect-call value profile meta-data is available. Differential Revision: http://reviews.llvm.org/D17864 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@267815 91177308-0d34-0410-b5e6-96231b3b80d8 Rong Xu 3 years ago
13 changed file(s) with 1092 addition(s) and 1 deletion(s). Raw diff Collapse all Expand all
123123 void initializeGCOVProfilerPass(PassRegistry&);
124124 void initializePGOInstrumentationGenPass(PassRegistry&);
125125 void initializePGOInstrumentationUsePass(PassRegistry&);
126 void initializePGOIndirectCallPromotionPass(PassRegistry&);
126127 void initializeInstrProfilingLegacyPassPass(PassRegistry &);
127128 void initializeAddressSanitizerPass(PassRegistry&);
128129 void initializeAddressSanitizerModulePass(PassRegistry&);
9090 (void) llvm::createGCOVProfilerPass();
9191 (void) llvm::createPGOInstrumentationGenPass();
9292 (void) llvm::createPGOInstrumentationUsePass();
93 (void) llvm::createPGOIndirectCallPromotionPass();
9394 (void) llvm::createInstrProfilingLegacyPass();
9495 (void) llvm::createFunctionImportPass();
9596 (void) llvm::createFunctionInliningPass();
8282 ModulePass *createPGOInstrumentationGenPass();
8383 ModulePass *
8484 createPGOInstrumentationUsePass(StringRef Filename = StringRef(""));
85 ModulePass *createPGOIndirectCallPromotionPass(bool InLTO = false);
8586
8687 /// Options for the frontend instrumentation based profiling pass.
8788 struct InstrProfOptions {
370370 MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE
371371 }
372372
373 if (!PerformThinLTO)
373 if (!PerformThinLTO) {
374374 /// PGO instrumentation is added during the compile phase for ThinLTO, do
375375 /// not run it a second time
376376 addPGOInstrPasses(MPM);
377 // Indirect call promotion that promotes intra-module targets only.
378 MPM.add(createPGOIndirectCallPromotionPass());
379 }
377380
378381 if (EnableNonLTOGlobalsModRef)
379382 // We add a module alias analysis pass here. In part due to bugs in the
584587 // Infer attributes about declarations if possible.
585588 PM.add(createInferFunctionAttrsLegacyPass());
586589
590 // Indirect call promotion. This should promote all the targets that are left
591 // by the earlier promotion pass that promotes intra-module targets.
592 // This two-step promotion is to save the compile time. For LTO, it should
593 // produce the same result as if we only do promotion here.
594 PM.add(createPGOIndirectCallPromotionPass(true));
595
587596 // Propagate constants at call sites into the functions they call. This
588597 // opens opportunities for globalopt (and inlining) by substituting function
589598 // pointers passed as arguments to direct uses of functions.
33 DataFlowSanitizer.cpp
44 GCOVProfiling.cpp
55 MemorySanitizer.cpp
6 IndirectCallPromotion.cpp
67 Instrumentation.cpp
78 InstrProfiling.cpp
89 PGOInstrumentation.cpp
0 //===-- IndirectCallPromotion.cpp - Promote indirect calls to direct calls ===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the transformation that promotes indirect calls to
10 // conditional direct calls when the indirect-call value profile metadata is
11 // available.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "IndirectCallSiteVisitor.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/Statistic.h"
18 #include "llvm/ADT/Triple.h"
19 #include "llvm/Analysis/CFG.h"
20 #include "llvm/IR/CallSite.h"
21 #include "llvm/IR/DiagnosticInfo.h"
22 #include "llvm/IR/IRBuilder.h"
23 #include "llvm/IR/InstIterator.h"
24 #include "llvm/IR/InstVisitor.h"
25 #include "llvm/IR/Instructions.h"
26 #include "llvm/IR/IntrinsicInst.h"
27 #include "llvm/IR/MDBuilder.h"
28 #include "llvm/IR/Module.h"
29 #include "llvm/Pass.h"
30 #include "llvm/ProfileData/InstrProfReader.h"
31 #include "llvm/Support/Debug.h"
32 #include "llvm/Transforms/Instrumentation.h"
33 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
34 #include
35 #include
36 #include
37
38 using namespace llvm;
39
40 #define DEBUG_TYPE "icall-promotion"
41
42 STATISTIC(NumOfPGOICallPromotion, "Number of indirect call promotions.");
43 STATISTIC(NumOfPGOICallsites, "Number of indirect call candidate sites.");
44
45 // Command line option to disable indirect-call promotion with the default as
46 // false. This is for debug purpose.
47 static cl::opt DisableICP("disable-icp", cl::init(false), cl::Hidden,
48 cl::desc("Disable indirect call promotion"));
49
50 // The minimum call count for the direct-call target to be considered as the
51 // promotion candidate.
52 static cl::opt
53 ICPCountThreshold("icp-count-threshold", cl::Hidden, cl::ZeroOrMore,
54 cl::init(1000),
55 cl::desc("The minimum count to the direct call target "
56 "for the promotion"));
57
58 // The percent threshold for the direct-call target (this call site vs the
59 // total call count) for it to be considered as the promotion target.
60 static cl::opt
61 ICPPercentThreshold("icp-percent-threshold", cl::init(33), cl::Hidden,
62 cl::ZeroOrMore,
63 cl::desc("The percentage threshold for the promotion"));
64
65 // Set the maximum number of targets to promote for a single indirect-call
66 // callsite.
67 static cl::opt
68 MaxNumPromotions("icp-max-prom", cl::init(2), cl::Hidden, cl::ZeroOrMore,
69 cl::desc("Max number of promotions for a single indirect "
70 "call callsite"));
71
72 // Set the cutoff value for the promotion. If the value is other than 0, we
73 // stop the transformation once the total number of promotions equals the cutoff
74 // value.
75 // For debug use only.
76 static cl::opt
77 ICPCutOff("icp-cutoff", cl::init(0), cl::Hidden, cl::ZeroOrMore,
78 cl::desc("Max number of promotions for this compilaiton"));
79
80 // If ICPCSSkip is non zero, the first ICPCSSkip callsites will be skipped.
81 // For debug use only.
82 static cl::opt
83 ICPCSSkip("icp-csskip", cl::init(0), cl::Hidden, cl::ZeroOrMore,
84 cl::desc("Skip Callsite up to this number for this compilaiton"));
85
86 // Set if the pass is called in LTO optimization. The difference for LTO mode
87 // is the pass won't prefix the source module name to the internal linkage
88 // symbols.
89 static cl::opt ICPLTOMode("icp-lto", cl::init(false), cl::Hidden,
90 cl::desc("Run indirect-call promotion in LTO "
91 "mode"));
92 // If the option is set to true, only call instructions will be considered for
93 // transformation -- invoke instructions will be ignored.
94 static cl::opt
95 ICPCallOnly("icp-call-only", cl::init(false), cl::Hidden,
96 cl::desc("Run indirect-call promotion for call instructions "
97 "only"));
98
99 // If the option is set to true, only invoke instructions will be considered for
100 // transformation -- call instructions will be ignored.
101 static cl::opt ICPInvokeOnly("icp-invoke-only", cl::init(false),
102 cl::Hidden,
103 cl::desc("Run indirect-call promotion for "
104 "invoke instruction only"));
105
106 // Dump the function level IR if the transformation happened in this
107 // function. For debug use only.
108 static cl::opt
109 ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden,
110 cl::desc("Dump IR after transformation happens"));
111
112 namespace {
113 class PGOIndirectCallPromotion : public ModulePass {
114 public:
115 static char ID;
116
117 PGOIndirectCallPromotion(bool InLTO = false) : ModulePass(ID), InLTO(InLTO) {
118 initializePGOIndirectCallPromotionPass(*PassRegistry::getPassRegistry());
119 }
120
121 const char *getPassName() const override {
122 return "PGOIndirectCallPromotion";
123 }
124
125 private:
126 bool runOnModule(Module &M) override;
127
128 // If this pass is called in LTO. We need to special handling the PGOFuncName
129 // for the static variables due to LTO's internalization.
130 bool InLTO;
131 };
132 } // end anonymous namespace
133
134 char PGOIndirectCallPromotion::ID = 0;
135 INITIALIZE_PASS(PGOIndirectCallPromotion, "pgo-icall-prom",
136 "Use PGO instrumentation profile to promote indirect calls to "
137 "direct calls.",
138 false, false)
139
140 ModulePass *llvm::createPGOIndirectCallPromotionPass(bool InLTO) {
141 return new PGOIndirectCallPromotion(InLTO);
142 }
143
144 // The class for main data structure to promote indirect calls to conditional
145 // direct calls.
146 class ICallPromotionFunc {
147 private:
148 Function &F;
149 Module *M;
150
151 // Symtab that maps indirect call profile values to function names and
152 // defines.
153 InstrProfSymtab *Symtab;
154
155 // Allocate space to read the profile annotation.
156 std::unique_ptr ValueDataArray;
157
158 // Count is the call count for the direct-call target and
159 // TotalCount is the call count for the indirect-call callsite.
160 // Return true we should promote this indirect-call target.
161 bool isPromotionProfitable(uint64_t Count, uint64_t TotalCount);
162
163 enum TargetStatus {
164 OK, // Should be able to promote.
165 NotAvailableInModule, // Cannot find the target in current module.
166 ReturnTypeMismatch, // Return type mismatch b/w target and indirect-call.
167 NumArgsMismatch, // Number of arguments does not match.
168 ArgTypeMismatch // Type mismatch in the arguments (cannot bitcast).
169 };
170
171 // Test if we can legally promote this direct-call of Target.
172 TargetStatus isPromotionLegal(Instruction *Inst, uint64_t Target,
173 Function *&F);
174
175 // A struct that records the direct target and it's call count.
176 struct PromotionCandidate {
177 Function *TargetFunction;
178 uint64_t Count;
179 PromotionCandidate(Function *F, uint64_t C) : TargetFunction(F), Count(C) {}
180 };
181
182 // Check if the indirect-call call site should be promoted. Return the number
183 // of promotions.
184 std::vector getPromotionCandidatesForCallSite(
185 Instruction *Inst, const ArrayRef &ValueDataRef,
186 uint64_t TotalCount);
187
188 // Main function that transforms Inst (either a indirect-call instruction, or
189 // an invoke instruction , to a conditional call to F. This is like:
190 // if (Inst.CalledValue == F)
191 // F(...);
192 // else
193 // Inst(...);
194 // end
195 // TotalCount is the profile count value that the instruction executes.
196 // Count is the profile count value that F is the target function.
197 // These two values are being used to update the branch weight.
198 void promote(Instruction *Inst, Function *F, uint64_t Count,
199 uint64_t TotalCount);
200
201 // Promote a list of targets for one indirect-call callsite. Return
202 // the number of promotions.
203 uint32_t tryToPromote(Instruction *Inst,
204 const std::vector &Candidates,
205 uint64_t &TotalCount);
206
207 static const char *StatusToString(const TargetStatus S) {
208 switch (S) {
209 case OK:
210 return "OK to promote";
211 case NotAvailableInModule:
212 return "Cannot find the target";
213 case ReturnTypeMismatch:
214 return "Return type mismatch";
215 case NumArgsMismatch:
216 return "The number of arguments mismatch";
217 case ArgTypeMismatch:
218 return "Argument Type mismatch";
219 }
220 llvm_unreachable("Should not reach here");
221 }
222
223 // Noncopyable
224 ICallPromotionFunc(const ICallPromotionFunc &other) = delete;
225 ICallPromotionFunc &operator=(const ICallPromotionFunc &other) = delete;
226
227 public:
228 ICallPromotionFunc(Function &Func, Module *Modu, InstrProfSymtab *Symtab)
229 : F(Func), M(Modu), Symtab(Symtab) {
230 ValueDataArray = llvm::make_unique(MaxNumPromotions);
231 }
232 bool processFunction();
233 };
234
235 bool ICallPromotionFunc::isPromotionProfitable(uint64_t Count,
236 uint64_t TotalCount) {
237 if (Count < ICPCountThreshold)
238 return false;
239
240 unsigned Percentage = (Count * 100) / TotalCount;
241 return (Percentage >= ICPPercentThreshold);
242 }
243
244 ICallPromotionFunc::TargetStatus
245 ICallPromotionFunc::isPromotionLegal(Instruction *Inst, uint64_t Target,
246 Function *&TargetFunction) {
247 Function *DirectCallee = Symtab->getFunction(Target);
248 if (DirectCallee == nullptr)
249 return NotAvailableInModule;
250 // Check the return type.
251 Type *CallRetType = Inst->getType();
252 if (!CallRetType->isVoidTy()) {
253 Type *FuncRetType = DirectCallee->getReturnType();
254 if (FuncRetType != CallRetType &&
255 !CastInst::isBitCastable(FuncRetType, CallRetType))
256 return ReturnTypeMismatch;
257 }
258
259 // Check if the arguments are compatible with the parameters
260 FunctionType *DirectCalleeType = DirectCallee->getFunctionType();
261 unsigned ParamNum = DirectCalleeType->getFunctionNumParams();
262 CallSite CS(Inst);
263 unsigned ArgNum = CS.arg_size();
264
265 if (ParamNum != ArgNum && !DirectCalleeType->isVarArg())
266 return NumArgsMismatch;
267
268 for (unsigned I = 0; I < ParamNum; ++I) {
269 Type *PTy = DirectCalleeType->getFunctionParamType(I);
270 Type *ATy = CS.getArgument(I)->getType();
271 if (PTy == ATy)
272 continue;
273 if (!CastInst::castIsValid(Instruction::BitCast, CS.getArgument(I), PTy))
274 return ArgTypeMismatch;
275 }
276
277 DEBUG(dbgs() << " #" << NumOfPGOICallPromotion << " Promote the icall to "
278 << Symtab->getFuncName(Target) << "\n");
279 TargetFunction = DirectCallee;
280 return OK;
281 }
282
283 // Indirect-call promotion heuristic. The direct targets are sorted based on
284 // the count. Stop at the first target that is not promoted.
285 std::vector
286 ICallPromotionFunc::getPromotionCandidatesForCallSite(
287 Instruction *Inst, const ArrayRef &ValueDataRef,
288 uint64_t TotalCount) {
289 uint32_t NumVals = ValueDataRef.size();
290 std::vector Ret;
291
292 DEBUG(dbgs() << " \nWork on callsite #" << NumOfPGOICallsites << *Inst
293 << " Num_targets: " << NumVals << "\n");
294 NumOfPGOICallsites++;
295 if (ICPCSSkip != 0 && NumOfPGOICallsites <= ICPCSSkip) {
296 DEBUG(dbgs() << " Skip: User options.\n");
297 return Ret;
298 }
299
300 for (uint32_t I = 0; I < MaxNumPromotions && I < NumVals; I++) {
301 uint64_t Count = ValueDataRef[I].Count;
302 assert(Count <= TotalCount);
303 uint64_t Target = ValueDataRef[I].Value;
304 DEBUG(dbgs() << " Candidate " << I << " Count=" << Count
305 << " Target_func: " << Target << "\n");
306
307 if (ICPInvokeOnly && dyn_cast(Inst)) {
308 DEBUG(dbgs() << " Not promote: User options.\n");
309 break;
310 }
311 if (ICPCallOnly && dyn_cast(Inst)) {
312 DEBUG(dbgs() << " Not promote: User option.\n");
313 break;
314 }
315 if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) {
316 DEBUG(dbgs() << " Not promote: Cutoff reached.\n");
317 break;
318 }
319 if (!isPromotionProfitable(Count, TotalCount)) {
320 DEBUG(dbgs() << " Not promote: Cold target.\n");
321 break;
322 }
323 Function *TargetFunction = nullptr;
324 TargetStatus Status = isPromotionLegal(Inst, Target, TargetFunction);
325 if (Status != OK) {
326 StringRef TargetFuncName = Symtab->getFuncName(Target);
327 const char *Reason = StatusToString(Status);
328 DEBUG(dbgs() << " Not promote: " << Reason << "\n");
329 Twine Msg =
330 Twine("Cannot promote indirect call to ") +
331 (TargetFuncName.empty() ? Twine(Target) : Twine(TargetFuncName)) +
332 Twine(" with count of ") + Twine(Count) + ": " + Reason;
333 emitOptimizationRemarkMissed(F.getContext(), "PGOIndirectCallPromotion",
334 F, Inst->getDebugLoc(), Msg);
335 break;
336 }
337 Ret.push_back(PromotionCandidate(TargetFunction, Count));
338 TotalCount -= Count;
339 }
340 return Ret;
341 }
342
343 // Create a diamond structure for If_Then_Else. Also update the profile
344 // count. Do the fix-up for the invoke instruction.
345 static void createIfThenElse(Instruction *Inst, Function *DirectCallee,
346 uint64_t Count, uint64_t TotalCount,
347 BasicBlock **DirectCallBB,
348 BasicBlock **IndirectCallBB,
349 BasicBlock **MergeBB) {
350 CallSite CS(Inst);
351 Value *OrigCallee = CS.getCalledValue();
352
353 IRBuilder<> BBBuilder(Inst);
354 LLVMContext &Ctx = Inst->getContext();
355 Value *BCI1 =
356 BBBuilder.CreateBitCast(OrigCallee, Type::getInt8PtrTy(Ctx), "");
357 Value *BCI2 =
358 BBBuilder.CreateBitCast(DirectCallee, Type::getInt8PtrTy(Ctx), "");
359 Value *PtrCmp = BBBuilder.CreateICmpEQ(BCI1, BCI2, "");
360
361 uint64_t ElseCount = TotalCount - Count;
362 uint64_t MaxCount = (Count >= ElseCount ? Count : ElseCount);
363 uint64_t Scale = calculateCountScale(MaxCount);
364 MDBuilder MDB(Inst->getContext());
365 MDNode *BranchWeights = MDB.createBranchWeights(
366 scaleBranchCount(Count, Scale), scaleBranchCount(ElseCount, Scale));
367 TerminatorInst *ThenTerm, *ElseTerm;
368 SplitBlockAndInsertIfThenElse(PtrCmp, Inst, &ThenTerm, &ElseTerm,
369 BranchWeights);
370 *DirectCallBB = ThenTerm->getParent();
371 (*DirectCallBB)->setName("if.true.direct_targ");
372 *IndirectCallBB = ElseTerm->getParent();
373 (*IndirectCallBB)->setName("if.false.orig_indirect");
374 *MergeBB = Inst->getParent();
375 (*MergeBB)->setName("if.end.icp");
376
377 // Special handing of Invoke instructions.
378 InvokeInst *II = dyn_cast(Inst);
379 if (!II)
380 return;
381
382 // We don't need branch instructions for invoke.
383 ThenTerm->eraseFromParent();
384 ElseTerm->eraseFromParent();
385
386 // Add jump from Merge BB to the NormalDest. This is needed for the newly
387 // created direct invoke stmt -- as its NormalDst will be fixed up to MergeBB.
388 BranchInst::Create(II->getNormalDest(), *MergeBB);
389 }
390
391 // Find the PHI in BB that have the CallResult as the operand.
392 static bool getCallRetPHINode(BasicBlock *BB, Instruction *Inst) {
393 BasicBlock *From = Inst->getParent();
394 for (auto &I : *BB) {
395 PHINode *PHI = dyn_cast(&I);
396 if (!PHI)
397 continue;
398 int IX = PHI->getBasicBlockIndex(From);
399 if (IX == -1)
400 continue;
401 Value *V = PHI->getIncomingValue(IX);
402 if (dyn_cast(V) == Inst)
403 return true;
404 }
405 return false;
406 }
407
408 // This method fixes up PHI nodes in BB where BB is the UnwindDest of an
409 // invoke instruction. In BB, there may be PHIs with incoming block being
410 // OrigBB (the MergeBB after if-then-else splitting). After moving the invoke
411 // instructions to its own BB, OrigBB is no longer the predecessor block of BB.
412 // Instead two new predecessors are added: IndirectCallBB and DirectCallBB,
413 // so the PHI node's incoming BBs need to be fixed up accordingly.
414 static void fixupPHINodeForUnwind(Instruction *Inst, BasicBlock *BB,
415 BasicBlock *OrigBB,
416 BasicBlock *IndirectCallBB,
417 BasicBlock *DirectCallBB) {
418 for (auto &I : *BB) {
419 PHINode *PHI = dyn_cast(&I);
420 if (!PHI)
421 continue;
422 int IX = PHI->getBasicBlockIndex(OrigBB);
423 if (IX == -1)
424 continue;
425 Value *V = PHI->getIncomingValue(IX);
426 PHI->addIncoming(V, IndirectCallBB);
427 PHI->setIncomingBlock(IX, DirectCallBB);
428 }
429 }
430
431 // This method fixes up PHI nodes in BB where BB is the NormalDest of an
432 // invoke instruction. In BB, there may be PHIs with incoming block being
433 // OrigBB (the MergeBB after if-then-else splitting). After moving the invoke
434 // instructions to its own BB, a new incoming edge will be added to the original
435 // NormalDstBB from the IndirectCallBB.
436 static void fixupPHINodeForNormalDest(Instruction *Inst, BasicBlock *BB,
437 BasicBlock *OrigBB,
438 BasicBlock *IndirectCallBB,
439 Instruction *NewInst) {
440 for (auto &I : *BB) {
441 PHINode *PHI = dyn_cast(&I);
442 if (!PHI)
443 continue;
444 int IX = PHI->getBasicBlockIndex(OrigBB);
445 if (IX == -1)
446 continue;
447 Value *V = PHI->getIncomingValue(IX);
448 if (dyn_cast(V) == Inst) {
449 PHI->setIncomingBlock(IX, IndirectCallBB);
450 PHI->addIncoming(NewInst, OrigBB);
451 continue;
452 }
453 PHI->addIncoming(V, IndirectCallBB);
454 }
455 }
456
457 // Add a bitcast instruction to the direct-call return value if needed.
458 // Add a bitcast instruction to the direct-call return value if needed.
459 static Instruction *insertCallRetCast(const Instruction *Inst,
460 Instruction *DirectCallInst,
461 Function *DirectCallee) {
462 if (Inst->getType()->isVoidTy())
463 return DirectCallInst;
464
465 Type *CallRetType = Inst->getType();
466 Type *FuncRetType = DirectCallee->getReturnType();
467 if (FuncRetType == CallRetType)
468 return DirectCallInst;
469
470 BasicBlock *InsertionBB;
471 if (CallInst *CI = dyn_cast(DirectCallInst))
472 InsertionBB = CI->getParent();
473 else
474 InsertionBB = (dyn_cast(DirectCallInst))->getNormalDest();
475
476 return (new BitCastInst(DirectCallInst, CallRetType, "",
477 InsertionBB->getTerminator()));
478 }
479
480 // Create a DirectCall instruction in the DirectCallBB.
481 // Parameter Inst is the indirect-call (invoke) instruction.
482 // DirectCallee is the decl of the direct-call (invoke) target.
483 // DirecallBB is the BB that the direct-call (invoke) instruction is inserted.
484 // MergeBB is the bottom BB of the if-then-else-diamond after the
485 // transformation. For invoke instruction, the edges from DirectCallBB and
486 // IndirectCallBB to MergeBB are removed before this call (during
487 // createIfThenElse).
488 static Instruction *createDirectCallInst(const Instruction *Inst,
489 Function *DirectCallee,
490 BasicBlock *DirectCallBB,
491 BasicBlock *MergeBB) {
492 Instruction *NewInst = Inst->clone();
493 if (CallInst *CI = dyn_cast(NewInst)) {
494 CI->setCalledFunction(DirectCallee);
495 CI->mutateFunctionType(DirectCallee->getFunctionType());
496 } else {
497 // Must be an invoke instruction. Direct invoke's normal destination is
498 // fixed up to MergeBB. MergeBB is the place where return cast is inserted.
499 // Also since IndirectCallBB does not have an edge to MergeBB, there is no
500 // need to insert new PHIs into MergeBB.
501 InvokeInst *II = dyn_cast(NewInst);
502 assert(II);
503 II->setCalledFunction(DirectCallee);
504 II->mutateFunctionType(DirectCallee->getFunctionType());
505 II->setNormalDest(MergeBB);
506 }
507
508 DirectCallBB->getInstList().insert(DirectCallBB->getFirstInsertionPt(),
509 NewInst);
510
511 // Clear the value profile data.
512 NewInst->setMetadata(LLVMContext::MD_prof, 0);
513 CallSite NewCS(NewInst);
514 FunctionType *DirectCalleeType = DirectCallee->getFunctionType();
515 unsigned ParamNum = DirectCalleeType->getFunctionNumParams();
516 for (unsigned I = 0; I < ParamNum; ++I) {
517 Type *ATy = NewCS.getArgument(I)->getType();
518 Type *PTy = DirectCalleeType->getParamType(I);
519 if (ATy != PTy) {
520 BitCastInst *BI = new BitCastInst(NewCS.getArgument(I), PTy, "", NewInst);
521 NewCS.setArgument(I, BI);
522 }
523 }
524
525 return insertCallRetCast(Inst, NewInst, DirectCallee);
526 }
527
528 // Create a PHI to unify the return values of calls.
529 static void insertCallRetPHI(Instruction *Inst, Instruction *CallResult,
530 Function *DirectCallee) {
531 if (Inst->getType()->isVoidTy())
532 return;
533
534 BasicBlock *RetValBB = CallResult->getParent();
535
536 BasicBlock *PHIBB;
537 if (InvokeInst *II = dyn_cast(CallResult))
538 RetValBB = II->getNormalDest();
539
540 PHIBB = RetValBB->getSingleSuccessor();
541 if (getCallRetPHINode(PHIBB, Inst))
542 return;
543
544 PHINode *CallRetPHI = PHINode::Create(Inst->getType(), 0);
545 PHIBB->getInstList().push_front(CallRetPHI);
546 Inst->replaceAllUsesWith(CallRetPHI);
547 CallRetPHI->addIncoming(Inst, Inst->getParent());
548 CallRetPHI->addIncoming(CallResult, RetValBB);
549 }
550
551 // This function does the actual indirect-call promotion transformation:
552 // For an indirect-call like:
553 // Ret = (*Foo)(Args);
554 // It transforms to:
555 // if (Foo == DirectCallee)
556 // Ret1 = DirectCallee(Args);
557 // else
558 // Ret2 = (*Foo)(Args);
559 // Ret = phi(Ret1, Ret2);
560 // It adds type casts for the args do not match the parameters and the return
561 // value. Branch weights metadata also updated.
562 void ICallPromotionFunc::promote(Instruction *Inst, Function *DirectCallee,
563 uint64_t Count, uint64_t TotalCount) {
564 assert(DirectCallee != nullptr);
565 BasicBlock *BB = Inst->getParent();
566 // Just to suppress the non-debug build warning.
567 (void)BB;
568 DEBUG(dbgs() << "\n\n== Basic Block Before ==\n");
569 DEBUG(dbgs() << *BB << "\n");
570
571 BasicBlock *DirectCallBB, *IndirectCallBB, *MergeBB;
572 createIfThenElse(Inst, DirectCallee, Count, TotalCount, &DirectCallBB,
573 &IndirectCallBB, &MergeBB);
574
575 Instruction *NewInst =
576 createDirectCallInst(Inst, DirectCallee, DirectCallBB, MergeBB);
577
578 // Move Inst from MergeBB to IndirectCallBB.
579 Inst->removeFromParent();
580 IndirectCallBB->getInstList().insert(IndirectCallBB->getFirstInsertionPt(),
581 Inst);
582
583 if (InvokeInst *II = dyn_cast(Inst)) {
584 // At this point, the original indirect invoke instruction has the original
585 // UnwindDest and NormalDest. For the direct invoke instruction, the
586 // NormalDest points to MergeBB, and MergeBB jumps to the original
587 // NormalDest. MergeBB might have a new bitcast instruction for the return
588 // value. The PHIs are with the original NormalDest. Since we now have two
589 // incoming edges to NormalDest and UnwindDest, we have to do some fixups.
590 //
591 // UnwindDest will not use the return value. So pass nullptr here.
592 fixupPHINodeForUnwind(Inst, II->getUnwindDest(), MergeBB, IndirectCallBB,
593 DirectCallBB);
594 // We don't need to update the operand from NormalDest for DirectCallBB.
595 // Pass nullptr here.
596 fixupPHINodeForNormalDest(Inst, II->getNormalDest(), MergeBB,
597 IndirectCallBB, NewInst);
598 }
599
600 insertCallRetPHI(Inst, NewInst, DirectCallee);
601
602 DEBUG(dbgs() << "\n== Basic Blocks After ==\n");
603 DEBUG(dbgs() << *BB << *DirectCallBB << *IndirectCallBB << *MergeBB << "\n");
604
605 Twine Msg = Twine("Promote indirect call to ") + DirectCallee->getName() +
606 " with count " + Twine(Count) + " out of " + Twine(TotalCount);
607 emitOptimizationRemark(F.getContext(), "PGOIndirectCallPromotion", F,
608 Inst->getDebugLoc(), Msg);
609 }
610
611 // Promote indirect-call to conditional direct-call for one callsite.
612 uint32_t ICallPromotionFunc::tryToPromote(
613 Instruction *Inst, const std::vector &Candidates,
614 uint64_t &TotalCount) {
615 uint32_t NumPromoted = 0;
616
617 for (auto &C : Candidates) {
618 uint64_t Count = C.Count;
619 promote(Inst, C.TargetFunction, Count, TotalCount);
620 assert(TotalCount >= Count);
621 TotalCount -= Count;
622 NumOfPGOICallPromotion++;
623 NumPromoted++;
624 }
625 return NumPromoted;
626 }
627
628 // Traverse all the indirect-call callsite and get the value profile
629 // annotation to perform indirect-call promotion.
630 bool ICallPromotionFunc::processFunction() {
631 bool Changed = false;
632 for (auto &I : findIndirectCallSites(F)) {
633 uint32_t NumVals;
634 uint64_t TotalCount;
635 bool Res =
636 getValueProfDataFromInst(*I, IPVK_IndirectCallTarget, MaxNumPromotions,
637 ValueDataArray.get(), NumVals, TotalCount);
638 if (!Res)
639 continue;
640 ArrayRef ValueDataArrayRef(ValueDataArray.get(),
641 NumVals);
642 auto PromotionCandidates =
643 getPromotionCandidatesForCallSite(I, ValueDataArrayRef, TotalCount);
644 uint32_t NumPromoted = tryToPromote(I, PromotionCandidates, TotalCount);
645 if (NumPromoted == 0)
646 continue;
647
648 Changed = true;
649 // Adjust the MD.prof metadata. First delete the old one.
650 I->setMetadata(LLVMContext::MD_prof, 0);
651 // If all promoted, we don't need the MD.prof metadata.
652 if (TotalCount == 0 || NumPromoted == NumVals)
653 continue;
654 // Otherwise we need update with the un-promoted records back.
655 annotateValueSite(*M, *I, ValueDataArrayRef.slice(NumPromoted), TotalCount,
656 IPVK_IndirectCallTarget, MaxNumPromotions);
657 }
658 return Changed;
659 }
660
661 // A wrapper function that does the actual work.
662 static bool promoteIndirectCalls(Module &M, bool InLTO) {
663 if (DisableICP)
664 return false;
665 InstrProfSymtab Symtab;
666 Symtab.create(M, InLTO);
667 bool Changed = false;
668 for (auto &F : M) {
669 if (F.isDeclaration())
670 continue;
671 if (F.hasFnAttribute(Attribute::OptimizeNone))
672 continue;
673 ICallPromotionFunc ICallPromotion(F, &M, &Symtab);
674 bool FuncChanged = ICallPromotion.processFunction();
675 if (ICPDUMPAFTER && FuncChanged) {
676 DEBUG(dbgs() << "\n== IR Dump After =="; F.print(dbgs()));
677 DEBUG(dbgs() << "\n");
678 }
679 Changed |= FuncChanged;
680 if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) {
681 DEBUG(dbgs() << " Stop: Cutoff reached.\n");
682 break;
683 }
684 }
685 return Changed;
686 }
687
688 bool PGOIndirectCallPromotion::runOnModule(Module &M) {
689 // Command-line option has the priority for InLTO.
690 InLTO |= ICPLTOMode;
691 return promoteIndirectCalls(M, InLTO);
692 }
6161 initializeGCOVProfilerPass(Registry);
6262 initializePGOInstrumentationGenPass(Registry);
6363 initializePGOInstrumentationUsePass(Registry);
64 initializePGOIndirectCallPromotionPass(Registry);
6465 initializeInstrProfilingLegacyPassPass(Registry);
6566 initializeMemorySanitizerPass(Registry);
6667 initializeThreadSanitizerPass(Registry);
0 ; RUN: opt < %s -pgo-icall-prom -S | FileCheck %s --check-prefix=ICALL-PROM
1 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
2 target triple = "x86_64-unknown-linux-gnu"
3
4 %struct.D = type { %struct.B }
5 %struct.B = type { i32 (...)** }
6 %struct.Base = type { i8 }
7 %struct.Derived = type { i8 }
8
9 declare noalias i8* @_Znwm(i64)
10 declare void @_ZN1DC2Ev(%struct.D*);
11 declare %struct.Derived* @_ZN1D4funcEv(%struct.D*);
12
13 define i32 @bar() {
14 entry:
15 %call = call noalias i8* @_Znwm(i64 8)
16 %tmp = bitcast i8* %call to %struct.D*
17 call void @_ZN1DC2Ev(%struct.D* %tmp)
18 %tmp1 = bitcast %struct.D* %tmp to %struct.B*
19 %tmp2 = bitcast %struct.B* %tmp1 to %struct.Base* (%struct.B*)***
20 %vtable = load %struct.Base* (%struct.B*)**, %struct.Base* (%struct.B*)*** %tmp2, align 8
21 %vfn = getelementptr inbounds %struct.Base* (%struct.B*)*, %struct.Base* (%struct.B*)** %vtable, i64 0
22 %tmp3 = load %struct.Base* (%struct.B*)*, %struct.Base* (%struct.B*)** %vfn, align 8
23 ; ICALL-PROM: [[BITCAST:%[0-9]+]] = bitcast %struct.Base* (%struct.B*)* %tmp3 to i8*
24 ; ICALL-PROM: [[CMP:%[0-9]+]] = icmp eq i8* [[BITCAST]], bitcast (%struct.Derived* (%struct.D*)* @_ZN1D4funcEv to i8*)
25 ; ICALL-PROM: br i1 [[CMP]], label %if.true.direct_targ, label %if.false.orig_indirect, !prof [[BRANCH_WEIGHT:![0-9]+]]
26 ; ICALL-PROM:if.true.direct_targ:
27 ; ICALL-PROM: [[ARG_BITCAST:%[0-9]+]] = bitcast %struct.B* %tmp1 to %struct.D*
28 ; ICALL-PROM: [[DIRCALL_RET:%[0-9]+]] = call %struct.Derived* @_ZN1D4funcEv(%struct.D* [[ARG_BITCAST]])
29 ; ICALL-PROM: [[DIRCALL_RET_CAST:%[0-9]+]] = bitcast %struct.Derived* [[DIRCALL_RET]] to %struct.Base*
30 ; ICALL-PROM: br label %if.end.icp
31 ; ICALL-PROM:if.false.orig_indirect:
32 ; ICALL-PROM: %call1 = call %struct.Base* %tmp3(%struct.B* %tmp1)
33 ; ICALL-PROM: br label %if.end.icp
34 ; ICALL-PROM:if.end.icp:
35 ; ICALL-PROM: [[PHI_RET:%[0-9]+]] = phi %struct.Base* [ %call1, %if.false.orig_indirect ], [ [[DIRCALL_RET_CAST]], %if.true.direct_targ ]
36 %call1 = call %struct.Base* %tmp3(%struct.B* %tmp1), !prof !1
37 ret i32 0
38 }
39
40 !1 = !{!"VP", i32 0, i64 12345, i64 -3913987384944532146, i64 12345}
41 ; ICALL-PROM-NOT: !1 = !{!"VP", i32 0, i64 12345, i64 -3913987384944532146, i64 12345}
42 ; ICALL-PROM: [[BRANCH_WEIGHT]] = !{!"branch_weights", i32 12345, i32 0}
43 ; ICALL-PROM-NOT: !1 = !{!"VP", i32 0, i64 12345, i64 -3913987384944532146, i64 12345}
0 ; RUN: opt < %s -pgo-icall-prom -S | FileCheck %s --check-prefix=ICALL-PROM
1 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
2 target triple = "x86_64-unknown-linux-gnu"
3 %struct.D = type { %struct.B }
4 %struct.B = type { i32 (...)** }
5 %struct.Derived = type { %struct.Base, i32 }
6 %struct.Base = type { i32 }
7
8 @_ZTIi = external constant i8*
9 declare i8* @_Znwm(i64)
10 declare void @_ZN1DC2Ev(%struct.D*)
11 declare %struct.Derived* @_ZN1D4funcEv(%struct.D*)
12 declare void @_ZN1DD0Ev(%struct.D*)
13 declare void @_ZdlPv(i8*)
14 declare i32 @__gxx_personality_v0(...)
15 declare i32 @llvm.eh.typeid.for(i8*)
16 declare i8* @__cxa_begin_catch(i8*)
17 declare void @__cxa_end_catch()
18
19
20 define i32 @foo() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
21 entry:
22 %call = invoke i8* @_Znwm(i64 8)
23 to label %invoke.cont unwind label %lpad
24
25 invoke.cont:
26 %tmp = bitcast i8* %call to %struct.D*
27 call void @_ZN1DC2Ev(%struct.D* %tmp)
28 %tmp1 = bitcast %struct.D* %tmp to %struct.B*
29 %tmp2 = bitcast %struct.B* %tmp1 to %struct.Base* (%struct.B*)***
30 %vtable = load %struct.Base* (%struct.B*)**, %struct.Base* (%struct.B*)*** %tmp2, align 8
31 %vfn = getelementptr inbounds %struct.Base* (%struct.B*)*, %struct.Base* (%struct.B*)** %vtable, i64 0
32 %tmp3 = load %struct.Base* (%struct.B*)*, %struct.Base* (%struct.B*)** %vfn, align 8
33 ; ICALL-PROM: [[BITCAST:%[0-9]+]] = bitcast %struct.Base* (%struct.B*)* %tmp3 to i8*
34 ; ICALL-PROM: [[CMP:%[0-9]+]] = icmp eq i8* [[BITCAST]], bitcast (%struct.Derived* (%struct.D*)* @_ZN1D4funcEv to i8*)
35 ; ICALL-PROM: br i1 [[CMP]], label %if.true.direct_targ, label %if.false.orig_indirect, !prof [[BRANCH_WEIGHT:![0-9]+]]
36 ; ICALL-PROM:if.true.direct_targ:
37 ; ICALL-PROM: [[ARG_BITCAST:%[0-9]+]] = bitcast %struct.B* %tmp1 to %struct.D*
38 ; ICALL-PROM: [[DIRCALL_RET:%[0-9]+]] = invoke %struct.Derived* @_ZN1D4funcEv(%struct.D* [[ARG_BITCAST]])
39 ; ICALL-PROM: to label %if.end.icp unwind label %lpad
40 ; ICALL-PROM:if.false.orig_indirect:
41 ; ICAll-PROM: %call2 = invoke %struct.Base* %tmp3(%struct.B* %tmp1)
42 ; ICAll-PROM: to label %invoke.cont1 unwind label %lpad
43 ; ICALL-PROM:if.end.icp:
44 ; ICALL-PROM: [[DIRCALL_RET_CAST:%[0-9]+]] = bitcast %struct.Derived* [[DIRCALL_RET]] to %struct.Base*
45 ; ICALL-PROM: br label %invoke.cont1
46 %call2 = invoke %struct.Base* %tmp3(%struct.B* %tmp1)
47 to label %invoke.cont1 unwind label %lpad, !prof !1
48
49 invoke.cont1:
50 ; ICAll-PROM: [[PHI_RET:%[0-9]+]] = phi %struct.Base* [ %call2, %if.false.orig_indirect ], [ [[DIRCALL_RET_CAST]], %if.end.icp ]
51 ; ICAll-PROM: %isnull = icmp eq %struct.Base* [[PHI_RET]], null
52 %isnull = icmp eq %struct.Base* %call2, null
53 br i1 %isnull, label %delete.end, label %delete.notnull
54
55 delete.notnull:
56 %tmp4 = bitcast %struct.Base* %call2 to i8*
57 call void @_ZdlPv(i8* %tmp4)
58 br label %delete.end
59
60 delete.end:
61 %isnull3 = icmp eq %struct.B* %tmp1, null
62 br i1 %isnull3, label %delete.end8, label %delete.notnull4
63
64 delete.notnull4:
65 %tmp5 = bitcast %struct.B* %tmp1 to void (%struct.B*)***
66 %vtable5 = load void (%struct.B*)**, void (%struct.B*)*** %tmp5, align 8
67 %vfn6 = getelementptr inbounds void (%struct.B*)*, void (%struct.B*)** %vtable5, i64 2
68 %tmp6 = load void (%struct.B*)*, void (%struct.B*)** %vfn6, align 8
69 invoke void %tmp6(%struct.B* %tmp1)
70 to label %invoke.cont7 unwind label %lpad
71
72 invoke.cont7:
73 br label %delete.end8
74
75 delete.end8:
76 br label %try.cont
77
78 lpad:
79 %tmp7 = landingpad { i8*, i32 }
80 catch i8* bitcast (i8** @_ZTIi to i8*)
81 %tmp8 = extractvalue { i8*, i32 } %tmp7, 0
82 %tmp9 = extractvalue { i8*, i32 } %tmp7, 1
83 br label %catch.dispatch
84
85 catch.dispatch:
86 %tmp10 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*))
87 %matches = icmp eq i32 %tmp9, %tmp10
88 br i1 %matches, label %catch, label %eh.resume
89
90 catch:
91 %tmp11 = call i8* @__cxa_begin_catch(i8* %tmp8)
92 %tmp12 = bitcast i8* %tmp11 to i32*
93 %tmp13 = load i32, i32* %tmp12, align 4
94 call void @__cxa_end_catch()
95 br label %try.cont
96
97 try.cont:
98 ret i32 0
99
100 eh.resume:
101 %lpad.val = insertvalue { i8*, i32 } undef, i8* %tmp8, 0
102 %lpad.val11 = insertvalue { i8*, i32 } %lpad.val, i32 %tmp9, 1
103 resume { i8*, i32 } %lpad.val11
104 }
105
106 !1 = !{!"VP", i32 0, i64 12345, i64 -3913987384944532146, i64 12345}
107 ; ICALL-PROM-NOT: !1 = !{!"VP", i32 0, i64 12345, i64 -3913987384944532146, i64 12345}
108 ; ICALL-PROM: [[BRANCH_WEIGHT]] = !{!"branch_weights", i32 12345, i32 0}
109 ; ICALL-PROM-NOT: !1 = !{!"VP", i32 0, i64 12345, i64 -3913987384944532146, i64 12345}
0 ; RUN: opt < %s -icp-lto -pgo-icall-prom -S -icp-count-threshold=0 | FileCheck %s --check-prefix=ICP
1 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
2 target triple = "x86_64-unknown-linux-gnu"
3
4 @foo1 = global void ()* null, align 8
5 @foo2 = global i32 ()* null, align 8
6 @_ZTIi = external constant i8*
7
8 define internal void @_ZL4bar1v() !PGOFuncName !0 {
9 entry:
10 ret void
11 }
12
13 define internal i32 @_ZL4bar2v() !PGOFuncName !1 {
14 entry:
15 ret i32 100
16 }
17
18 define i32 @_Z3goov() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
19 entry:
20 %tmp = load void ()*, void ()** @foo1, align 8
21 ; ICP: [[BITCAST_IC1:%[0-9]+]] = bitcast void ()* %tmp to i8*
22 ; ICP: [[CMP_IC1:%[0-9]+]] = icmp eq i8* [[BITCAST_IC1]], bitcast (void ()* @_ZL4bar1v to i8*)
23 ; ICP: br i1 [[CMP_IC1]], label %[[TRUE_LABEL_IC1:.*]], label %[[FALSE_LABEL_IC1:.*]], !prof [[BRANCH_WEIGHT:![0-9]+]]
24 ; ICP:[[TRUE_LABEL_IC1]]:
25 ; ICP: invoke void @_ZL4bar1v()
26 ; ICP: to label %[[DCALL_NORMAL_DEST_IC1:.*]] unwind label %lpad
27 ; ICP:[[FALSE_LABEL_IC1]]:
28 invoke void %tmp()
29 to label %try.cont unwind label %lpad, !prof !2
30
31 ; ICP:[[DCALL_NORMAL_DEST_IC1]]:
32 ; ICP: br label %try.cont
33
34 lpad:
35 %tmp1 = landingpad { i8*, i32 }
36 catch i8* bitcast (i8** @_ZTIi to i8*)
37 %tmp2 = extractvalue { i8*, i32 } %tmp1, 0
38 %tmp3 = extractvalue { i8*, i32 } %tmp1, 1
39 %tmp4 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*))
40 %matches = icmp eq i32 %tmp3, %tmp4
41 br i1 %matches, label %catch, label %eh.resume
42
43 catch:
44 %tmp5 = tail call i8* @__cxa_begin_catch(i8* %tmp2)
45 tail call void @__cxa_end_catch()
46 br label %try.cont
47
48 try.cont:
49 %tmp6 = load i32 ()*, i32 ()** @foo2, align 8
50 ; ICP: [[BITCAST_IC2:%[0-9]+]] = bitcast i32 ()* %tmp6 to i8*
51 ; ICP: [[CMP_IC2:%[0-9]+]] = icmp eq i8* [[BITCAST_IC2]], bitcast (i32 ()* @_ZL4bar2v to i8*)
52 ; ICP: br i1 [[CMP_IC2]], label %[[TRUE_LABEL_IC2:.*]], label %[[FALSE_LABEL_IC2:.*]], !prof [[BRANCH_WEIGHT:![0-9]+]]
53 ; ICP:[[TRUE_LABEL_IC2]]:
54 ; ICP: [[RESULT_IC2:%[0-9]+]] = invoke i32 @_ZL4bar2v()
55 ; ICP: to label %[[DCALL_NORMAL_DEST_IC2:.*]] unwind label %lpad1
56 ; ICP:[[FALSE_LABEL_IC2]]:
57 %call = invoke i32 %tmp6()
58 to label %try.cont8 unwind label %lpad1, !prof !3
59
60 ; ICP:[[DCALL_NORMAL_DEST_IC2]]:
61 ; ICP: br label %try.cont8
62 lpad1:
63 %tmp7 = landingpad { i8*, i32 }
64 catch i8* bitcast (i8** @_ZTIi to i8*)
65 %tmp8 = extractvalue { i8*, i32 } %tmp7, 0
66 %tmp9 = extractvalue { i8*, i32 } %tmp7, 1
67 %tmp10 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*))
68 %matches5 = icmp eq i32 %tmp9, %tmp10
69 br i1 %matches5, label %catch6, label %eh.resume
70
71 catch6:
72 %tmp11 = tail call i8* @__cxa_begin_catch(i8* %tmp8)
73 tail call void @__cxa_end_catch()
74 br label %try.cont8
75
76 try.cont8:
77 %i.0 = phi i32 [ undef, %catch6 ], [ %call, %try.cont ]
78 ; ICP: %i.0 = phi i32 [ undef, %catch6 ], [ %call, %[[FALSE_LABEL_IC2]] ], [ [[RESULT_IC2]], %[[DCALL_NORMAL_DEST_IC2]] ]
79 ret i32 %i.0
80
81 eh.resume:
82 %ehselector.slot.0 = phi i32 [ %tmp9, %lpad1 ], [ %tmp3, %lpad ]
83 %exn.slot.0 = phi i8* [ %tmp8, %lpad1 ], [ %tmp2, %lpad ]
84 %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn.slot.0, 0
85 %lpad.val11 = insertvalue { i8*, i32 } %lpad.val, i32 %ehselector.slot.0, 1
86 resume { i8*, i32 } %lpad.val11
87 }
88
89 declare i32 @__gxx_personality_v0(...)
90
91 declare i32 @llvm.eh.typeid.for(i8*)
92
93 declare i8* @__cxa_begin_catch(i8*)
94
95 declare void @__cxa_end_catch()
96
97 !0 = !{!"invoke.ll:_ZL4bar1v"}
98 !1 = !{!"invoke.ll:_ZL4bar2v"}
99 !2 = !{!"VP", i32 0, i64 1, i64 -2732222848796217051, i64 1}
100 !3 = !{!"VP", i32 0, i64 1, i64 -6116256810522035449, i64 1}
101 ; ICP-NOT !3 = !{!"VP", i32 0, i64 1, i64 -2732222848796217051, i64 1}
102 ; ICP-NOT !4 = !{!"VP", i32 0, i64 1, i64 -6116256810522035449, i64 1}
103 ; ICP: [[BRANCH_WEIGHT]] = !{!"branch_weights", i32 1, i32 0}
0 ; RUN: opt < %s -pgo-icall-prom -pass-remarks-missed=PGOIndirectCallPromotion -S 2>& 1 | FileCheck %s
1
2 ; CHECK: remark: :0:0: Cannot promote indirect call to func4 with count of 1234: The number of arguments mismatch
3 ; CHECK: remark: :0:0: Cannot promote indirect call to 11517462787082255043 with count of 2345: Cannot find the target
4 ; CHECK: remark: :0:0: Cannot promote indirect call to func2 with count of 7890: Return type mismatch
5
6 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
7 target triple = "x86_64-unknown-linux-gnu"
8
9 @foo = common global i32 ()* null, align 8
10 @foo2 = common global i32 ()* null, align 8
11 @foo3 = common global i32 ()* null, align 8
12
13 define i32 @func4(i32 %i) {
14 entry:
15 ret i32 %i
16 }
17
18 define void @func2() {
19 entry:
20 ret void
21 }
22
23 define i32 @bar() {
24 entry:
25 %tmp = load i32 ()*, i32 ()** @foo, align 8
26 %call = call i32 %tmp(), !prof !1
27 %tmp2 = load i32 ()*, i32 ()** @foo2, align 8
28 %call1 = call i32 %tmp2(), !prof !2
29 %add = add nsw i32 %call1, %call
30 %tmp3 = load i32 ()*, i32 ()** @foo3, align 8
31 %call2 = call i32 %tmp3(), !prof !3
32 %add2 = add nsw i32 %add, %call2
33 ret i32 %add2
34 }
35
36 !1 = !{!"VP", i32 0, i64 1801, i64 7651369219802541373, i64 1234, i64 -4377547752858689819, i64 567}
37 !2 = !{!"VP", i32 0, i64 3023, i64 -6929281286627296573, i64 2345, i64 -4377547752858689819, i64 678}
38 !3 = !{!"VP", i32 0, i64 7890, i64 -4377547752858689819, i64 7890}
0 ; RUN: opt < %s -pgo-icall-prom -S | FileCheck %s --check-prefix=ICALL-PROM
1 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
2 target triple = "x86_64-unknown-linux-gnu"
3
4 @foo = common global i32 (i32, ...)* null, align 8
5
6 define i32 @va_func(i32 %num, ...) {
7 entry:
8 ret i32 0
9 }
10
11 define i32 @bar() #1 {
12 entry:
13 %tmp = load i32 (i32, ...)*, i32 (i32, ...)** @foo, align 8
14 ; ICALL-PROM: [[BITCAST:%[0-9]+]] = bitcast i32 (i32, ...)* %tmp to i8*
15 ; ICALL-PROM: [[CMP:%[0-9]+]] = icmp eq i8* [[BITCAST]], bitcast (i32 (i32, ...)* @va_func to i8*)
16 ; ICALL-PROM: br i1 [[CMP]], label %if.true.direct_targ, label %if.false.orig_indirect, !prof [[BRANCH_WEIGHT:![0-9]+]]
17 ; ICALL-PROM:if.true.direct_targ:
18 ; ICALL-PROM: [[DIRCALL_RET:%[0-9]+]] = call i32 (i32, ...) @va_func(i32 3, i32 12, i32 22, i32 4)
19 ; ICALL-PROM: br label %if.end.icp
20 %call = call i32 (i32, ...) %tmp(i32 3, i32 12, i32 22, i32 4), !prof !1
21 ; ICALL-PROM:if.false.orig_indirect:
22 ; ICALL-PROM: %call = call i32 (i32, ...) %tmp(i32 3, i32 12, i32 22, i32 4)
23 ; ICALL-PROM: br label %if.end.icp
24 ret i32 %call
25 ; ICALL-PROM:if.end.icp:
26 ; ICALL-PROM: [[PHI_RET:%[0-9]+]] = phi i32 [ %call, %if.false.orig_indirect ], [ [[DIRCALL_RET]], %if.true.direct_targ ]
27 ; ICALL-PROM: ret i32 [[PHI_RET]]
28
29 }
30
31 !1 = !{!"VP", i32 0, i64 12345, i64 989055279648259519, i64 12345}
32 ; ICALL-PROM: [[BRANCH_WEIGHT]] = !{!"branch_weights", i32 12345, i32 0}
0 ; RUN: opt < %s -pgo-icall-prom -S | FileCheck %s --check-prefix=ICALL-PROM
1 ; RUN: opt < %s -pgo-icall-prom -S -pass-remarks=PGOIndirectCallPromotion -icp-count-threshold=0 -icp-percent-threshold=0 -icp-max-prom=4 2>&1 | FileCheck %s --check-prefix=PASS-REMARK
2 ; PASS-REMARK: remark: :0:0: Promote indirect call to func4 with count 1030 out of 1600
3 ; PASS-REMARK: remark: :0:0: Promote indirect call to func2 with count 410 out of 570
4 ; PASS-REMARK: remark: :0:0: Promote indirect call to func3 with count 150 out of 160
5 ; PASS-REMARK: remark: :0:0: Promote indirect call to func1 with count 10 out of 10
6
7 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
8 target triple = "x86_64-unknown-linux-gnu"
9
10 @foo = common global i32 ()* null, align 8
11
12 define i32 @func1() {
13 entry:
14 ret i32 0
15 }
16
17 define i32 @func2() {
18 entry:
19 ret i32 1
20 }
21
22 define i32 @func3() {
23 entry:
24 ret i32 2
25 }
26
27 define i32 @func4() {
28 entry:
29 ret i32 3
30 }
31
32 define i32 @bar() {
33 entry:
34 %tmp = load i32 ()*, i32 ()** @foo, align 8
35 ; ICALL-PROM: [[BITCAST:%[0-9]+]] = bitcast i32 ()* %tmp to i8*
36 ; ICALL-PROM: [[CMP:%[0-9]+]] = icmp eq i8* [[BITCAST]], bitcast (i32 ()* @func4 to i8*)
37 ; ICALL-PROM: br i1 [[CMP]], label %if.true.direct_targ, label %if.false.orig_indirect, !prof [[BRANCH_WEIGHT:![0-9]+]]
38 ; ICALL-PROM: if.true.direct_targ:
39 ; ICALL-PROM: [[DIRCALL_RET:%[0-9]+]] = call i32 @func4()
40 ; ICALL-PROM: br label %if.end.icp
41 %call = call i32 %tmp(), !prof !1
42 ; ICALL-PROM: if.false.orig_indirect:
43 ; ICALL-PROM: %call = call i32 %tmp(), !prof [[NEW_VP_METADATA:![0-9]+]]
44 ret i32 %call
45 ; ICALL-PROM: if.end.icp:
46 ; ICALL-PROM: [[PHI_RET:%[0-9]+]] = phi i32 [ %call, %if.false.orig_indirect ], [ [[DIRCALL_RET]], %if.true.direct_targ ]
47 ; ICALL-PROM: ret i32 [[PHI_RET]]
48 }
49
50 !1 = !{!"VP", i32 0, i64 1600, i64 7651369219802541373, i64 1030, i64 -4377547752858689819, i64 410, i64 -6929281286627296573, i64 150, i64 -2545542355363006406, i64 10}
51
52 ; ICALL-PROM: [[BRANCH_WEIGHT]] = !{!"branch_weights", i32 1030, i32 570}
53 ; ICALL-PROM: [[NEW_VP_METADATA]] = !{!"VP", i32 0, i64 570, i64 -4377547752858689819, i64 410}