llvm.org GIT mirror llvm / 7d6a3b6
Add CalledValuePropagation pass This patch adds a new pass for attaching !callees metadata to indirect call sites. The pass propagates values to call sites by performing an IPSCCP-like analysis using the generic sparse propagation solver. For indirect call sites having a small set of possible callees, the attached metadata indicates what those callees are. The metadata can be used to facilitate optimizations like intersecting the function attributes of the possible callees, refining the call graph, performing indirect call promotion, etc. Differential Revision: https://reviews.llvm.org/D37355 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@316576 91177308-0d34-0410-b5e6-96231b3b80d8 Matthew Simpson 1 year, 10 months ago
17 changed file(s) with 674 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
9292 void initializeCallGraphWrapperPassPass(PassRegistry&);
9393 void initializeCodeGenPreparePass(PassRegistry&);
9494 void initializeConstantHoistingLegacyPassPass(PassRegistry&);
95 void initializeCalledValuePropagationLegacyPassPass(PassRegistry &);
9596 void initializeConstantMergeLegacyPassPass(PassRegistry&);
9697 void initializeConstantPropagationPass(PassRegistry&);
9798 void initializeCorrelatedValuePropagationPass(PassRegistry&);
7979 (void) llvm::createCFLSteensAAWrapperPass();
8080 (void) llvm::createStructurizeCFGPass();
8181 (void) llvm::createLibCallsShrinkWrapPass();
82 (void) llvm::createCalledValuePropagationPass();
8283 (void) llvm::createConstantMergePass();
8384 (void) llvm::createConstantPropagationPass();
8485 (void) llvm::createCostModelAnalysisPass();
0 //===- CalledValuePropagation.h - Propagate called values -------*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a transformation that attaches !callees metadata to
10 // indirect call sites. For a given call site, the metadata, if present,
11 // indicates the set of functions the call site could possibly target at
12 // run-time. This metadata is added to indirect call sites when the set of
13 // possible targets can be determined by analysis and is known to be small. The
14 // analysis driving the transformation is similar to constant propagation and
15 // makes uses of the generic sparse propagation solver.
16 //
17 //===----------------------------------------------------------------------===//
18
19 #ifndef LLVM_TRANSFORMS_IPO_CALLEDVALUEPROPAGATION_H
20 #define LLVM_TRANSFORMS_IPO_CALLEDVALUEPROPAGATION_H
21
22 #include "llvm/IR/Module.h"
23 #include "llvm/IR/PassManager.h"
24
25 namespace llvm {
26
27 class CalledValuePropagationPass
28 : public PassInfoMixin {
29 public:
30 PreservedAnalyses run(Module &M, ModuleAnalysisManager &);
31 };
32 } // namespace llvm
33
34 #endif // LLVM_TRANSFORMS_IPO_CALLEDVALUEPROPAGATION_H
214214 /// createBarrierNoopPass - This pass is purely a module pass barrier in a pass
215215 /// manager.
216216 ModulePass *createBarrierNoopPass();
217
218 /// createCalledValuePropagationPass - Attach metadata to indirct call sites
219 /// indicating the set of functions they may target at run-time.
220 ModulePass *createCalledValuePropagationPass();
217221
218222 /// What to do with the summary when running passes that operate on it.
219223 enum class PassSummaryAction {
3232
3333 /** See llvm::createConstantMergePass function. */
3434 void LLVMAddConstantMergePass(LLVMPassManagerRef PM);
35
36 /** See llvm::createCalledValuePropagationPass function. */
37 void LLVMAddCalledValuePropagationPass(LLVMPassManagerRef PM);
3538
3639 /** See llvm::createDeadArgEliminationPass function. */
3740 void LLVMAddDeadArgEliminationPass(LLVMPassManagerRef PM);
6262 #include "llvm/Transforms/GCOVProfiler.h"
6363 #include "llvm/Transforms/IPO/AlwaysInliner.h"
6464 #include "llvm/Transforms/IPO/ArgumentPromotion.h"
65 #include "llvm/Transforms/IPO/CalledValuePropagation.h"
6566 #include "llvm/Transforms/IPO/ConstantMerge.h"
6667 #include "llvm/Transforms/IPO/CrossDSOCFI.h"
6768 #include "llvm/Transforms/IPO/DeadArgumentElimination.h"
579580 // years, it should be re-analyzed.
580581 MPM.addPass(IPSCCPPass());
581582
583 // Attach metadata to indirect call sites indicating the set of functions
584 // they may target at run-time. This should follow IPSCCP.
585 MPM.addPass(CalledValuePropagationPass());
586
582587 // Optimize globals to try and fold them into constants.
583588 MPM.addPass(GlobalOptPass());
584589
920925 // opens opportunities for globalopt (and inlining) by substituting function
921926 // pointers passed as arguments to direct uses of functions.
922927 MPM.addPass(IPSCCPPass());
928
929 // Attach metadata to indirect call sites indicating the set of functions
930 // they may target at run-time. This should follow IPSCCP.
931 MPM.addPass(CalledValuePropagationPass());
923932 }
924933
925934 // Now deduce any function attributes based in the current code.
3838 #define MODULE_PASS(NAME, CREATE_PASS)
3939 #endif
4040 MODULE_PASS("always-inline", AlwaysInlinerPass())
41 MODULE_PASS("called-value-propagation", CalledValuePropagationPass())
4142 MODULE_PASS("constmerge", ConstantMergePass())
4243 MODULE_PASS("cross-dso-cfi", CrossDSOCFIPass())
4344 MODULE_PASS("deadargelim", DeadArgumentEliminationPass())
11 AlwaysInliner.cpp
22 ArgumentPromotion.cpp
33 BarrierNoopPass.cpp
4 CalledValuePropagation.cpp
45 ConstantMerge.cpp
56 CrossDSOCFI.cpp
67 DeadArgumentElimination.cpp
0 //===- CalledValuePropagation.cpp - Propagate called values -----*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a transformation that attaches !callees metadata to
10 // indirect call sites. For a given call site, the metadata, if present,
11 // indicates the set of functions the call site could possibly target at
12 // run-time. This metadata is added to indirect call sites when the set of
13 // possible targets can be determined by analysis and is known to be small. The
14 // analysis driving the transformation is similar to constant propagation and
15 // makes uses of the generic sparse propagation solver.
16 //
17 //===----------------------------------------------------------------------===//
18
19 #include "llvm/Transforms/IPO/CalledValuePropagation.h"
20 #include "llvm/Analysis/SparsePropagation.h"
21 #include "llvm/Analysis/ValueLatticeUtils.h"
22 #include "llvm/IR/InstVisitor.h"
23 #include "llvm/IR/MDBuilder.h"
24 #include "llvm/Transforms/IPO.h"
25 using namespace llvm;
26
27 #define DEBUG_TYPE "called-value-propagation"
28
29 /// The maximum number of functions to track per lattice value. Once the number
30 /// of functions a call site can possibly target exceeds this threshold, it's
31 /// lattice value becomes overdefined. The number of possible lattice values is
32 /// bounded by Ch(F, M), where F is the number of functions in the module and M
33 /// is MaxFunctionsPerValue. As such, this value should be kept very small. We
34 /// likely can't do anything useful for call sites with a large number of
35 /// possible targets, anyway.
36 static cl::opt MaxFunctionsPerValue(
37 "cvp-max-functions-per-value", cl::Hidden, cl::init(4),
38 cl::desc("The maximum number of functions to track per lattice value"));
39
40 namespace {
41 /// To enable interprocedural analysis, we assign LLVM values to the following
42 /// groups. The register group represents SSA registers, the return group
43 /// represents the return values of functions, and the memory group represents
44 /// in-memory values. An LLVM Value can technically be in more than one group.
45 /// It's necessary to distinguish these groups so we can, for example, track a
46 /// global variable separately from the value stored at its location.
47 enum class IPOGrouping { Register, Return, Memory };
48
49 /// Our LatticeKeys are PointerIntPairs composed of LLVM values and groupings.
50 using CVPLatticeKey = PointerIntPair;
51
52 /// The lattice value type used by our custom lattice function. It holds the
53 /// lattice state, and a set of functions.
54 class CVPLatticeVal {
55 public:
56 /// The states of the lattice values. Only the FunctionSet state is
57 /// interesting. It indicates the set of functions to which an LLVM value may
58 /// refer.
59 enum CVPLatticeStateTy { Undefined, FunctionSet, Overdefined, Untracked };
60
61 /// Comparator for sorting the functions set. We want to keep the order
62 /// deterministic for testing, etc.
63 struct Compare {
64 bool operator()(const Function *LHS, const Function *RHS) const {
65 return LHS->getName() < RHS->getName();
66 }
67 };
68
69 CVPLatticeVal() : LatticeState(Undefined) {}
70 CVPLatticeVal(CVPLatticeStateTy LatticeState) : LatticeState(LatticeState) {}
71 CVPLatticeVal(std::set &&Functions)
72 : LatticeState(FunctionSet), Functions(Functions) {}
73
74 /// Get a reference to the functions held by this lattice value. The number
75 /// of functions will be zero for states other than FunctionSet.
76 const std::set &getFunctions() const {
77 return Functions;
78 }
79
80 /// Returns true if the lattice value is in the FunctionSet state.
81 bool isFunctionSet() const { return LatticeState == FunctionSet; }
82
83 bool operator==(const CVPLatticeVal &RHS) const {
84 return LatticeState == RHS.LatticeState && Functions == RHS.Functions;
85 }
86
87 bool operator!=(const CVPLatticeVal &RHS) const {
88 return LatticeState != RHS.LatticeState || Functions != RHS.Functions;
89 }
90
91 private:
92 /// Holds the state this lattice value is in.
93 CVPLatticeStateTy LatticeState;
94
95 /// Holds functions indicating the possible targets of call sites. This set
96 /// is empty for lattice values in the undefined, overdefined, and untracked
97 /// states. The maximum size of the set is controlled by
98 /// MaxFunctionsPerValue. Since most LLVM values are expected to be in
99 /// uninteresting states (i.e., overdefined), CVPLatticeVal objects should be
100 /// small and efficiently copyable.
101 std::set Functions;
102 };
103
104 /// The custom lattice function used by the generic sparse propagation solver.
105 /// It handles merging lattice values and computing new lattice values for
106 /// constants, arguments, values returned from trackable functions, and values
107 /// located in trackable global variables. It also computes the lattice values
108 /// that change as a result of executing instructions.
109 class CVPLatticeFunc
110 : public AbstractLatticeFunction {
111 public:
112 CVPLatticeFunc()
113 : AbstractLatticeFunction(CVPLatticeVal(CVPLatticeVal::Undefined),
114 CVPLatticeVal(CVPLatticeVal::Overdefined),
115 CVPLatticeVal(CVPLatticeVal::Untracked)) {}
116
117 /// Compute and return a CVPLatticeVal for the given CVPLatticeKey.
118 CVPLatticeVal ComputeLatticeVal(CVPLatticeKey Key) override {
119 switch (Key.getInt()) {
120 case IPOGrouping::Register:
121 if (isa(Key.getPointer())) {
122 return getUndefVal();
123 } else if (auto *A = dyn_cast(Key.getPointer())) {
124 if (canTrackArgumentsInterprocedurally(A->getParent()))
125 return getUndefVal();
126 } else if (auto *C = dyn_cast(Key.getPointer())) {
127 return computeConstant(C);
128 }
129 return getOverdefinedVal();
130 case IPOGrouping::Memory:
131 case IPOGrouping::Return:
132 if (auto *GV = dyn_cast(Key.getPointer())) {
133 if (canTrackGlobalVariableInterprocedurally(GV))
134 return computeConstant(GV->getInitializer());
135 } else if (auto *F = cast(Key.getPointer()))
136 if (canTrackReturnsInterprocedurally(F))
137 return getUndefVal();
138 }
139 return getOverdefinedVal();
140 }
141
142 /// Merge the two given lattice values. The interesting cases are merging two
143 /// FunctionSet values and a FunctionSet value with an Undefined value. For
144 /// these cases, we simply union the function sets. If the size of the union
145 /// is greater than the maximum functions we track, the merged value is
146 /// overdefined.
147 CVPLatticeVal MergeValues(CVPLatticeVal X, CVPLatticeVal Y) override {
148 if (X == getOverdefinedVal() || Y == getOverdefinedVal())
149 return getOverdefinedVal();
150 if (X == getUndefVal() && Y == getUndefVal())
151 return getUndefVal();
152 std::set Union;
153 std::set_union(X.getFunctions().begin(), X.getFunctions().end(),
154 Y.getFunctions().begin(), Y.getFunctions().end(),
155 std::inserter(Union, Union.begin()));
156 if (Union.size() > MaxFunctionsPerValue)
157 return getOverdefinedVal();
158 return CVPLatticeVal(std::move(Union));
159 }
160
161 /// Compute the lattice values that change as a result of executing the given
162 /// instruction. The changed values are stored in \p ChangedValues. We handle
163 /// just a few kinds of instructions since we're only propagating values that
164 /// can be called.
165 void ComputeInstructionState(
166 Instruction &I, DenseMap &ChangedValues,
167 SparseSolver &SS) override {
168 switch (I.getOpcode()) {
169 case Instruction::Call:
170 return visitCallSite(cast(&I), ChangedValues, SS);
171 case Instruction::Invoke:
172 return visitCallSite(cast(&I), ChangedValues, SS);
173 case Instruction::Load:
174 return visitLoad(*cast(&I), ChangedValues, SS);
175 case Instruction::Ret:
176 return visitReturn(*cast(&I), ChangedValues, SS);
177 case Instruction::Select:
178 return visitSelect(*cast(&I), ChangedValues, SS);
179 case Instruction::Store:
180 return visitStore(*cast(&I), ChangedValues, SS);
181 default:
182 return visitInst(I, ChangedValues, SS);
183 }
184 }
185
186 /// Print the given CVPLatticeVal to the specified stream.
187 void PrintLatticeVal(CVPLatticeVal LV, raw_ostream &OS) override {
188 if (LV == getUndefVal())
189 OS << "Undefined ";
190 else if (LV == getOverdefinedVal())
191 OS << "Overdefined";
192 else if (LV == getUntrackedVal())
193 OS << "Untracked ";
194 else
195 OS << "FunctionSet";
196 }
197
198 /// Print the given CVPLatticeKey to the specified stream.
199 void PrintLatticeKey(CVPLatticeKey Key, raw_ostream &OS) override {
200 if (Key.getInt() == IPOGrouping::Register)
201 OS << " ";
202 else if (Key.getInt() == IPOGrouping::Memory)
203 OS << " ";
204 else if (Key.getInt() == IPOGrouping::Return)
205 OS << " ";
206 if (isa(Key.getPointer()))
207 OS << Key.getPointer()->getName();
208 else
209 OS << *Key.getPointer();
210 }
211
212 /// We collect a set of indirect calls when visiting call sites. This method
213 /// returns a reference to that set.
214 SmallPtrSetImpl &getIndirectCalls() { return IndirectCalls; }
215
216 private:
217 /// Holds the indirect calls we encounter during the analysis. We will attach
218 /// metadata to these calls after the analysis indicating the functions the
219 /// calls can possibly target.
220 SmallPtrSet IndirectCalls;
221
222 /// Compute a new lattice value for the given constant. The constant, after
223 /// stripping any pointer casts, should be a Function. We ignore null
224 /// pointers as an optimization, since calling these values is undefined
225 /// behavior.
226 CVPLatticeVal computeConstant(Constant *C) {
227 if (isa(C))
228 return CVPLatticeVal(CVPLatticeVal::FunctionSet);
229 if (auto *F = dyn_cast(C->stripPointerCasts()))
230 return CVPLatticeVal({F});
231 return getOverdefinedVal();
232 }
233
234 /// Handle return instructions. The function's return state is the merge of
235 /// the returned value state and the function's return state.
236 void visitReturn(ReturnInst &I,
237 DenseMap &ChangedValues,
238 SparseSolver &SS) {
239 Function *F = I.getParent()->getParent();
240 if (F->getReturnType()->isVoidTy())
241 return;
242 auto RegI = CVPLatticeKey(I.getReturnValue(), IPOGrouping::Register);
243 auto RetF = CVPLatticeKey(F, IPOGrouping::Return);
244 ChangedValues[RetF] =
245 MergeValues(SS.getValueState(RegI), SS.getValueState(RetF));
246 }
247
248 /// Handle call sites. The state of a called function's formal arguments is
249 /// the merge of the argument state with the call sites corresponding actual
250 /// argument state. The call site state is the merge of the call site state
251 /// with the returned value state of the called function.
252 void visitCallSite(CallSite CS,
253 DenseMap &ChangedValues,
254 SparseSolver &SS) {
255 Function *F = CS.getCalledFunction();
256 Instruction *I = CS.getInstruction();
257 auto RegI = CVPLatticeKey(I, IPOGrouping::Register);
258
259 // If this is an indirect call, save it so we can quickly revisit it when
260 // attaching metadata.
261 if (!F)
262 IndirectCalls.insert(I);
263
264 // If we can't track the function's return values, there's nothing to do.
265 if (!F || !canTrackReturnsInterprocedurally(F)) {
266 ChangedValues[RegI] = getOverdefinedVal();
267 return;
268 }
269
270 // Inform the solver that the called function is executable, and perform
271 // the merges for the arguments and return value.
272 SS.MarkBlockExecutable(&F->front());
273 auto RetF = CVPLatticeKey(F, IPOGrouping::Return);
274 for (Argument &A : F->args()) {
275 auto RegFormal = CVPLatticeKey(&A, IPOGrouping::Register);
276 auto RegActual =
277 CVPLatticeKey(CS.getArgument(A.getArgNo()), IPOGrouping::Register);
278 ChangedValues[RegFormal] =
279 MergeValues(SS.getValueState(RegFormal), SS.getValueState(RegActual));
280 }
281 ChangedValues[RegI] =
282 MergeValues(SS.getValueState(RegI), SS.getValueState(RetF));
283 }
284
285 /// Handle select instructions. The select instruction state is the merge the
286 /// true and false value states.
287 void visitSelect(SelectInst &I,
288 DenseMap &ChangedValues,
289 SparseSolver &SS) {
290 auto RegI = CVPLatticeKey(&I, IPOGrouping::Register);
291 auto RegT = CVPLatticeKey(I.getTrueValue(), IPOGrouping::Register);
292 auto RegF = CVPLatticeKey(I.getFalseValue(), IPOGrouping::Register);
293 ChangedValues[RegI] =
294 MergeValues(SS.getValueState(RegT), SS.getValueState(RegF));
295 }
296
297 /// Handle load instructions. If the pointer operand of the load is a global
298 /// variable, we attempt to track the value. The loaded value state is the
299 /// merge of the loaded value state with the global variable state.
300 void visitLoad(LoadInst &I,
301 DenseMap &ChangedValues,
302 SparseSolver &SS) {
303 auto RegI = CVPLatticeKey(&I, IPOGrouping::Register);
304 if (auto *GV = dyn_cast(I.getPointerOperand())) {
305 auto MemGV = CVPLatticeKey(GV, IPOGrouping::Memory);
306 ChangedValues[RegI] =
307 MergeValues(SS.getValueState(RegI), SS.getValueState(MemGV));
308 } else {
309 ChangedValues[RegI] = getOverdefinedVal();
310 }
311 }
312
313 /// Handle store instructions. If the pointer operand of the store is a
314 /// global variable, we attempt to track the value. The global variable state
315 /// is the merge of the stored value state with the global variable state.
316 void visitStore(StoreInst &I,
317 DenseMap &ChangedValues,
318 SparseSolver &SS) {
319 auto *GV = dyn_cast(I.getPointerOperand());
320 if (!GV)
321 return;
322 auto RegI = CVPLatticeKey(I.getValueOperand(), IPOGrouping::Register);
323 auto MemGV = CVPLatticeKey(GV, IPOGrouping::Memory);
324 ChangedValues[MemGV] =
325 MergeValues(SS.getValueState(RegI), SS.getValueState(MemGV));
326 }
327
328 /// Handle all other instructions. All other instructions are marked
329 /// overdefined.
330 void visitInst(Instruction &I,
331 DenseMap &ChangedValues,
332 SparseSolver &SS) {
333 auto RegI = CVPLatticeKey(&I, IPOGrouping::Register);
334 ChangedValues[RegI] = getOverdefinedVal();
335 }
336 };
337 } // namespace
338
339 namespace llvm {
340 /// A specialization of LatticeKeyInfo for CVPLatticeKeys. The generic solver
341 /// must translate between LatticeKeys and LLVM Values when adding Values to
342 /// its work list and inspecting the state of control-flow related values.
343 template <> struct LatticeKeyInfo {
344 static inline Value *getValueFromLatticeKey(CVPLatticeKey Key) {
345 return Key.getPointer();
346 }
347 static inline CVPLatticeKey getLatticeKeyFromValue(Value *V) {
348 return CVPLatticeKey(V, IPOGrouping::Register);
349 }
350 };
351 } // namespace llvm
352
353 static bool runCVP(Module &M) {
354 // Our custom lattice function and generic sparse propagation solver.
355 CVPLatticeFunc Lattice;
356 SparseSolver Solver(&Lattice);
357
358 // For each function in the module, if we can't track its arguments, let the
359 // generic solver assume it is executable.
360 for (Function &F : M)
361 if (!F.isDeclaration() && !canTrackArgumentsInterprocedurally(&F))
362 Solver.MarkBlockExecutable(&F.front());
363
364 // Solver our custom lattice. In doing so, we will also build a set of
365 // indirect call sites.
366 Solver.Solve();
367
368 // Attach metadata to the indirect call sites that were collected indicating
369 // the set of functions they can possibly target.
370 bool Changed = false;
371 MDBuilder MDB(M.getContext());
372 for (Instruction *C : Lattice.getIndirectCalls()) {
373 CallSite CS(C);
374 auto RegI = CVPLatticeKey(CS.getCalledValue(), IPOGrouping::Register);
375 CVPLatticeVal LV = Solver.getExistingValueState(RegI);
376 if (!LV.isFunctionSet() || LV.getFunctions().empty())
377 continue;
378 MDNode *Callees = MDB.createCallees(SmallVector(
379 LV.getFunctions().begin(), LV.getFunctions().end()));
380 C->setMetadata(LLVMContext::MD_callees, Callees);
381 Changed = true;
382 }
383
384 return Changed;
385 }
386
387 PreservedAnalyses CalledValuePropagationPass::run(Module &M,
388 ModuleAnalysisManager &) {
389 runCVP(M);
390 return PreservedAnalyses::all();
391 }
392
393 namespace {
394 class CalledValuePropagationLegacyPass : public ModulePass {
395 public:
396 static char ID;
397
398 void getAnalysisUsage(AnalysisUsage &AU) const override {
399 AU.setPreservesAll();
400 }
401
402 CalledValuePropagationLegacyPass() : ModulePass(ID) {
403 initializeCalledValuePropagationLegacyPassPass(
404 *PassRegistry::getPassRegistry());
405 }
406
407 bool runOnModule(Module &M) override {
408 if (skipModule(M))
409 return false;
410 return runCVP(M);
411 }
412 };
413 } // namespace
414
415 char CalledValuePropagationLegacyPass::ID = 0;
416 INITIALIZE_PASS(CalledValuePropagationLegacyPass, "called-value-propagation",
417 "Called Value Propagation", false, false)
418
419 ModulePass *llvm::createCalledValuePropagationPass() {
420 return new CalledValuePropagationLegacyPass();
421 }
2424
2525 void llvm::initializeIPO(PassRegistry &Registry) {
2626 initializeArgPromotionPass(Registry);
27 initializeCalledValuePropagationLegacyPassPass(Registry);
2728 initializeConstantMergeLegacyPassPass(Registry);
2829 initializeCrossDSOCFIPass(Registry);
2930 initializeDAEPass(Registry);
6465
6566 void LLVMAddArgumentPromotionPass(LLVMPassManagerRef PM) {
6667 unwrap(PM)->add(createArgumentPromotionPass());
68 }
69
70 void LLVMAddCalledValuePropagationPass(LLVMPassManagerRef PM) {
71 unwrap(PM)->add(createCalledValuePropagationPass());
6772 }
6873
6974 void LLVMAddConstantMergePass(LLVMPassManagerRef PM) {
459459 addExtensionsToPM(EP_ModuleOptimizerEarly, MPM);
460460
461461 MPM.add(createIPSCCPPass()); // IP SCCP
462 MPM.add(createCalledValuePropagationPass());
462463 MPM.add(createGlobalOptimizerPass()); // Optimize out global vars
463464 // Promote any localized global vars.
464465 MPM.add(createPromoteMemoryToRegisterPass());
702703 // opens opportunities for globalopt (and inlining) by substituting function
703704 // pointers passed as arguments to direct uses of functions.
704705 PM.add(createIPSCCPPass());
706
707 // Attach metadata to indirect call sites indicating the set of functions
708 // they may target at run-time. This should follow IPSCCP.
709 PM.add(createCalledValuePropagationPass());
705710 }
706711
707712 // Infer attributes about definitions. The readnone attribute in particular is
7777 ; CHECK-O-NEXT: Running pass: LowerExpectIntrinsicPass
7878 ; CHECK-O-NEXT: Finished llvm::Function pass manager run.
7979 ; CHECK-O-NEXT: Running pass: IPSCCPPass
80 ; CHECK-O-NEXT: Running pass: CalledValuePropagationPass
8081 ; CHECK-O-NEXT: Running pass: GlobalOptPass
8182 ; CHECK-O-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PromotePass>
8283 ; CHECK-O-NEXT: Running pass: DeadArgumentEliminationPass
3333 ; CHECK-O2-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}Function
3434 ; CHECK-O2-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
3535 ; CHECK-O2-NEXT: Running pass: IPSCCPPass
36 ; CHECK-O2-NEXT: Running pass: CalledValuePropagationPass
3637 ; CHECK-O-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}PostOrderFunctionAttrsPass>
3738 ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}SCC
3839 ; CHECK-O1-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}Function
7373 ; CHECK-O-NEXT: Running pass: LowerExpectIntrinsicPass
7474 ; CHECK-O-NEXT: Finished llvm::Function pass manager run.
7575 ; CHECK-O-NEXT: Running pass: IPSCCPPass
76 ; CHECK-O-NEXT: Running pass: CalledValuePropagationPass
7677 ; CHECK-O-NEXT: Running pass: GlobalOptPass
7778 ; CHECK-O-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PromotePass>
7879 ; CHECK-O-NEXT: Running pass: DeadArgumentEliminationPass
0 ; RUN: opt -called-value-propagation -S < %s | FileCheck %s
1
2 target triple = "aarch64-unknown-linux-gnueabi"
3
4
5 ; This test checks that we propagate the functions through arguments and attach
6 ; !callees metadata to the call. Such metadata can enable optimizations of this
7 ; code sequence.
8 ;
9 ; For example, the code below a illustrates a contrived sort-like algorithm
10 ; that accepts a pointer to a comparison function. Since the indirect call to
11 ; the comparison function has only two targets, the call can be promoted to two
12 ; direct calls using an if-then-else. The loop can then be unswitched and the
13 ; called functions inlined. This essentially produces two loops, once
14 ; specialized for each comparison.
15 ;
16 ; CHECK: %tmp3 = call i1 %cmp(i64* %tmp1, i64* %tmp2), !callees ![[MD:[0-9]+]]
17 ; CHECK: ![[MD]] = !{i1 (i64*, i64*)* @ugt, i1 (i64*, i64*)* @ule}
18 ;
19 define void @test_argument(i64* %x, i64 %n, i1 %flag) {
20 entry:
21 %tmp0 = sub i64 %n, 1
22 br i1 %flag, label %then, label %else
23
24 then:
25 call void @arrange_data(i64* %x, i64 %tmp0, i1 (i64*, i64*)* @ugt)
26 br label %merge
27
28 else:
29 call void @arrange_data(i64* %x, i64 %tmp0, i1 (i64*, i64*)* @ule)
30 br label %merge
31
32 merge:
33 ret void
34 }
35
36 define internal void @arrange_data(i64* %x, i64 %n, i1 (i64*, i64*)* %cmp) {
37 entry:
38 %tmp0 = icmp eq i64 %n, 1
39 br i1 %tmp0, label %merge, label %for.body
40
41 for.body:
42 %i = phi i64 [ 0, %entry ], [ %i.next, %cmp.false ]
43 %i.next = add nuw nsw i64 %i, 1
44 %tmp1 = getelementptr inbounds i64, i64* %x, i64 %i
45 %tmp2 = getelementptr inbounds i64, i64* %x, i64 %i.next
46 %tmp3 = call i1 %cmp(i64* %tmp1, i64* %tmp2)
47 br i1 %tmp3, label %cmp.true, label %cmp.false
48
49 cmp.true:
50 call void @swap(i64* %tmp1, i64* %tmp2)
51 br label %cmp.false
52
53 cmp.false:
54 %cond = icmp slt i64 %i.next, %n
55 br i1 %cond, label %for.body, label %for.end
56
57 for.end:
58 %tmp4 = sub i64 %n, 1
59 call void @arrange_data(i64* %x, i64 %tmp4, i1 (i64*, i64*)* %cmp)
60 br label %merge
61
62 merge:
63 ret void
64 }
65
66 define internal i1 @ugt(i64* %a, i64* %b) {
67 entry:
68 %tmp0 = load i64, i64* %a
69 %tmp1 = load i64, i64* %b
70 %tmp2 = icmp ugt i64 %tmp0, %tmp1
71 ret i1 %tmp2
72 }
73
74 define internal i1 @ule(i64* %a, i64* %b) {
75 entry:
76 %tmp0 = load i64, i64* %a
77 %tmp1 = load i64, i64* %b
78 %tmp2 = icmp ule i64 %tmp0, %tmp1
79 ret i1 %tmp2
80 }
81
82 declare void @swap(i64*, i64*)
0 ; RUN: opt -called-value-propagation -S < %s | FileCheck %s
1
2 target triple = "aarch64-unknown-linux-gnueabi"
3
4 @global_function = internal unnamed_addr global void ()* null, align 8
5 @global_array = common unnamed_addr global i64* null, align 8
6
7 ; This test checks that we propagate the functions through an internal global
8 ; variable, and attach !callees metadata to the call. Such metadata can enable
9 ; optimizations of this code sequence.
10 ;
11 ; For example, since both of the targeted functions have the "nounwind" and
12 ; "readnone" function attributes, LICM can be made to move the call and the
13 ; function pointer load outside the loop. This would then enable the loop
14 ; vectorizer to vectorize the sum reduction.
15 ;
16 ; CHECK: call void %tmp0(), !callees ![[MD:[0-9]+]]
17 ; CHECK: ![[MD]] = !{void ()* @invariant_1, void ()* @invariant_2}
18 ;
19 define i64 @test_memory_entry(i64 %n, i1 %flag) {
20 entry:
21 br i1 %flag, label %then, label %else
22
23 then:
24 store void ()* @invariant_1, void ()** @global_function
25 br label %merge
26
27 else:
28 store void ()* @invariant_2, void ()** @global_function
29 br label %merge
30
31 merge:
32 %tmp1 = call i64 @test_memory(i64 %n)
33 ret i64 %tmp1
34 }
35
36 define internal i64 @test_memory(i64 %n) {
37 entry:
38 %array = load i64*, i64** @global_array
39 br label %for.body
40
41 for.body:
42 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
43 %r = phi i64 [ 0, %entry ], [ %tmp3, %for.body ]
44 %tmp0 = load void ()*, void ()** @global_function
45 call void %tmp0()
46 %tmp1 = getelementptr inbounds i64, i64* %array, i64 %i
47 %tmp2 = load i64, i64* %tmp1
48 %tmp3 = add i64 %tmp2, %r
49 %i.next = add nuw nsw i64 %i, 1
50 %cond = icmp slt i64 %i.next, %n
51 br i1 %cond, label %for.body, label %for.end
52
53 for.end:
54 %tmp4 = phi i64 [ %tmp3, %for.body ]
55 ret i64 %tmp4
56 }
57
58 declare void @invariant_1() #0
59 declare void @invariant_2() #0
60
61 attributes #0 = { nounwind readnone }
0 ; RUN: opt -called-value-propagation -S < %s | FileCheck %s
1
2 target triple = "aarch64-unknown-linux-gnueabi"
3
4 @global_function = internal unnamed_addr global void ()* null, align 8
5 @global_scalar = internal unnamed_addr global i64 zeroinitializer
6
7 ; This test checks that we propagate the functions through a select
8 ; instruction, and attach !callees metadata to the call. Such metadata can
9 ; enable optimizations of this code sequence.
10 ;
11 ; For example, since both of the targeted functions have the "norecurse"
12 ; attribute, the function attributes pass can be made to infer that
13 ; "@test_select" is also norecurse. This would allow the globals optimizer to
14 ; localize "@global_scalar". The function could then be further simplified to
15 ; always return the constant "1", eliminating the load and store instructions.
16 ;
17 ; CHECK: call void %tmp0(), !callees ![[MD:[0-9]+]]
18 ; CHECK: ![[MD]] = !{void ()* @norecurse_1, void ()* @norecurse_2}
19 ;
20 define i64 @test_select_entry(i1 %flag) {
21 entry:
22 %tmp0 = call i64 @test_select(i1 %flag)
23 ret i64 %tmp0
24 }
25
26 define internal i64 @test_select(i1 %flag) {
27 entry:
28 %tmp0 = select i1 %flag, void ()* @norecurse_1, void ()* @norecurse_2
29 store i64 1, i64* @global_scalar
30 call void %tmp0()
31 %tmp1 = load i64, i64* @global_scalar
32 ret i64 %tmp1
33 }
34
35 declare void @norecurse_1() #0
36 declare void @norecurse_2() #0
37
38 attributes #0 = { norecurse }