llvm.org GIT mirror llvm / 1a87474
Define the ThinLTO Pipeline Summary: On the contrary to Full LTO, ThinLTO can afford to shift compile time from the frontend to the linker: both phases are parallel. This pipeline is based on the proposal in D13443 for full LTO. We ] didn't move forward on this proposal because the link was far too long after that. This patch refactor the "function simplification" passes that are part of the inliner loop in a helper function (this part is NFC and can be commited separately to simplify the diff). The ThinLTO pipeline integrates in the regular O2/O3 flow: - The compile phase perform the inliner with a somehow lighter function simplification. (TODO: tune the inliner thresholds here) This is intendend to simplify the IR and get rid of obvious things like linkonce_odr that will be inlined. - The link phase will run the pipeline from the start, extended with some specific passes that leverage the augmented knowledge we have during LTO. Especially after the inliner is done, a sequence of globalDCE/globalOpt is performed, followed by another run of the "function simplification" passes. The measurements on the public test suite as well as on our internal suite show an overall net improvement. The binary size for the clang executable is reduced by 5%. We're still tuning it with the bringup of ThinLTO but this should provide a good starting point. Reviewers: tejohnson Subscribers: joker.eph, llvm-commits, dexonsmith Differential Revision: http://reviews.llvm.org/D17115 From: Mehdi Amini <mehdi.amini@apple.com> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@260604 91177308-0d34-0410-b5e6-96231b3b80d8 Mehdi Amini 3 years ago
2 changed file(s) with 46 addition(s) and 1 deletion(s). Raw diff Collapse all Expand all
132132 bool VerifyOutput;
133133 bool MergeFunctions;
134134 bool PrepareForLTO;
135 bool PrepareForThinLTO;
136 bool PerformThinLTO;
135137
136138 /// Profile data file name that the instrumentation will be written to.
137139 std::string PGOInstrGen;
169171 /// populateModulePassManager - This sets up the primary pass manager.
170172 void populateModulePassManager(legacy::PassManagerBase &MPM);
171173 void populateLTOPassManager(legacy::PassManagerBase &PM);
174 void populateThinLTOPassManager(legacy::PassManagerBase &PM);
172175 };
173176
174177 /// Registers a function for adding a standard set of passes. This should be
139139 PrepareForLTO = false;
140140 PGOInstrGen = RunPGOInstrGen;
141141 PGOInstrUse = RunPGOInstrUse;
142 PrepareForThinLTO = false;
143 PerformThinLTO = false;
142144 }
143145
144146 PassManagerBuilder::~PassManagerBuilder() {
232234 MPM.add(createTailCallEliminationPass()); // Eliminate tail calls
233235 MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
234236 MPM.add(createReassociatePass()); // Reassociate expressions
237 if (PrepareForThinLTO) {
238 MPM.add(createAggressiveDCEPass()); // Delete dead instructions
239 MPM.add(createInstructionCombiningPass()); // Combine silly seq's
240 return;
241 }
235242 // Rotate Loop - disable header duplication at -Oz
236243 MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
237244 MPM.add(createLICMPass()); // Hoist loop invariants
345352
346353 MPM.add(createIPSCCPPass()); // IP SCCP
347354 MPM.add(createGlobalOptimizerPass()); // Optimize out global vars
355
356 if (PerformThinLTO)
357 // Linking modules together can lead to duplicated global constants, only
358 // keep one copy of each constant.
359 MPM.add(createConstantMergePass());
360
348361 // Promote any localized global vars
349362 MPM.add(createPromoteMemoryToRegisterPass());
350363
376389 MPM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args
377390
378391 addFunctionSimplificationPasses(MPM);
392
393 // If we are planning to perform ThinLTO later, let's not bloat the code with
394 // unrolling/vectorization/... now. We'll first run the inliner + CGSCC passes
395 // during ThinLTO and performs the rest of the optimizations afterward.
396 if (PrepareForThinLTO)
397 return;
379398
380399 // FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC
381400 // pass manager that we are specifically trying to avoid. To prevent this
395414 if (!DisableUnitAtATime)
396415 MPM.add(createReversePostOrderFunctionAttrsPass());
397416
398 if (!DisableUnitAtATime && OptLevel > 1 && !PrepareForLTO) {
417 if (!DisableUnitAtATime && OptLevel > 1 && !PrepareForLTO)
399418 // Remove avail extern fns and globals definitions if we aren't
400419 // compiling an object file for later LTO. For LTO we want to preserve
401420 // these so they are eligible for inlining at link-time. Note if they
406425 // globals referenced by available external functions dead
407426 // and saves running remaining passes on the eliminated functions.
408427 MPM.add(createEliminateAvailableExternallyPass());
428
429 if (PerformThinLTO) {
430 // Remove dead fns and globals. Removing unreferenced functions could lead
431 // to more opportunities for globalopt
432 MPM.add(createGlobalDCEPass());
433 MPM.add(createGlobalOptimizerPass());
434 // Remove dead fns and globals after globalopt
435 MPM.add(createGlobalDCEPass());
436 addFunctionSimplificationPasses(MPM);
409437 }
410438
411439 if (EnableNonLTOGlobalsModRef)
681709 PM.add(createMergeFunctionsPass());
682710 }
683711
712 void PassManagerBuilder::populateThinLTOPassManager(
713 legacy::PassManagerBase &PM) {
714 PerformThinLTO = true;
715
716 if (VerifyInput)
717 PM.add(createVerifierPass());
718
719 populateModulePassManager(PM);
720
721 if (VerifyOutput)
722 PM.add(createVerifierPass());
723 PerformThinLTO = false;
724 }
725
684726 void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) {
685727 if (LibraryInfo)
686728 PM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo));