llvm.org GIT mirror llvm / 7716d65
[PGO] Profile guided code size optimization. Summary: Enable some of the existing size optimizations for cold code under PGO. A ~5% code size saving in big internal app under PGO. The way it gets BFI/PSI is discussed in the RFC thread http://lists.llvm.org/pipermail/llvm-dev/2019-March/130894.html Note it doesn't currently touch loop passes. Reviewers: davidxl, eraman Reviewed By: eraman Subscribers: mgorny, javed.absar, smeenai, mehdi_amini, eraman, zzheng, steven_wu, dexonsmith, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D59514 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@358422 91177308-0d34-0410-b5e6-96231b3b80d8 Hiroshi Yamauchi 4 months ago
28 changed file(s) with 473 addition(s) and 44 deletion(s). Raw diff Collapse all Expand all
5454 class Function;
5555 class GlobalVariable;
5656 class Instruction;
57 class ProfileSummaryInfo;
5758 class TargetTransformInfo;
5859
5960 /// A private "module" namespace for types and utilities used by
123124
124125 // Glue for old PM.
125126 bool runImpl(Function &F, TargetTransformInfo &TTI, DominatorTree &DT,
126 BlockFrequencyInfo *BFI, BasicBlock &Entry);
127 BlockFrequencyInfo *BFI, BasicBlock &Entry,
128 ProfileSummaryInfo *PSI);
127129
128130 void cleanup() {
129131 ClonedCastMap.clear();
147149 LLVMContext *Ctx;
148150 const DataLayout *DL;
149151 BasicBlock *Entry;
152 ProfileSummaryInfo *PSI;
150153
151154 /// Keeps track of constant candidates found in the function.
152155 using ConstCandVecType = std::vector;
2727 class BasicBlock;
2828 class Function;
2929 class OptimizationRemarkEmitter;
30 class BlockFrequencyInfo;
31 class ProfileSummaryInfo;
3032
3133 /// This class implements simplifications for calls to fortified library
3234 /// functions (__st*cpy_chk, __memcpy_chk, __memmove_chk, __memset_chk), to,
7375 const DataLayout &DL;
7476 const TargetLibraryInfo *TLI;
7577 OptimizationRemarkEmitter &ORE;
78 BlockFrequencyInfo *BFI;
79 ProfileSummaryInfo *PSI;
7680 bool UnsafeFPShrink;
7781 function_ref Replacer;
7882 function_ref Eraser;
100104 LibCallSimplifier(
101105 const DataLayout &DL, const TargetLibraryInfo *TLI,
102106 OptimizationRemarkEmitter &ORE,
107 BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
103108 function_ref Replacer =
104109 &replaceAllUsesWithDefault,
105110 function_ref Eraser = &eraseFromParentDefault);
0 //===- llvm/Transforms/Utils/SizeOpts.h - size optimization -----*- C++ -*-===//
1 //
2 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
3 // See https://llvm.org/LICENSE.txt for license information.
4 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5 //
6 //===----------------------------------------------------------------------===//
7 //
8 // This file contains some shared code size optimization related code.
9 //
10 //===----------------------------------------------------------------------===//
11
12 #ifndef LLVM_TRANSFORMS_UTILS_SIZEOPTS_H
13 #define LLVM_TRANSFORMS_UTILS_SiZEOPTS_H
14
15 namespace llvm {
16
17 class BasicBlock;
18 class BlockFrequencyInfo;
19 class Function;
20 class ProfileSummaryInfo;
21
22 /// Returns true if function \p F is suggested to be size-optimized base on the
23 /// profile.
24 bool shouldOptimizeForSize(Function *F, ProfileSummaryInfo *PSI,
25 BlockFrequencyInfo *BFI);
26 /// Returns true if basic block \p BB is suggested to be size-optimized base
27 /// on the profile.
28 bool shouldOptimizeForSize(BasicBlock *BB, ProfileSummaryInfo *PSI,
29 BlockFrequencyInfo *BFI);
30
31 } // end namespace llvm
32
33 #endif // LLVM_TRANSFORMS_UTILS_SiZEOPTS_H
2323
2424 class AssumptionCache;
2525 class BasicBlock;
26 class BlockFrequencyInfo;
2627 class DependenceInfo;
2728 class DominatorTree;
2829 class Loop;
2930 class LoopInfo;
3031 class MDNode;
32 class ProfileSummaryInfo;
3133 class OptimizationRemarkEmitter;
3234 class ScalarEvolution;
3335
119121 MDNode *GetUnrollMetadata(MDNode *LoopID, StringRef Name);
120122
121123 TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
122 Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, int OptLevel,
124 Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI,
125 BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel,
123126 Optional UserThreshold, Optional UserCount,
124127 Optional UserAllowPartial, Optional UserRuntime,
125128 Optional UserUpperBound, Optional UserAllowPeeling);
7070 class LoopAccessInfo;
7171 class LoopInfo;
7272 class OptimizationRemarkEmitter;
73 class ProfileSummaryInfo;
7374 class ScalarEvolution;
7475 class TargetLibraryInfo;
7576 class TargetTransformInfo;
9596 AssumptionCache *AC;
9697 std::function *GetLAA;
9798 OptimizationRemarkEmitter *ORE;
99 ProfileSummaryInfo *PSI;
98100
99101 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
100102
104106 BlockFrequencyInfo &BFI_, TargetLibraryInfo *TLI_,
105107 DemandedBits &DB_, AliasAnalysis &AA_, AssumptionCache &AC_,
106108 std::function &GetLAA_,
107 OptimizationRemarkEmitter &ORE);
109 OptimizationRemarkEmitter &ORE_, ProfileSummaryInfo *PSI_);
108110
109111 bool processLoop(Loop *L);
110112 };
574574 Options.DoCounterPromotion = true;
575575 Options.UseBFIInPromotion = IsCS;
576576 MPM.addPass(InstrProfiling(Options, IsCS));
577 } else if (!ProfileFile.empty())
577 } else if (!ProfileFile.empty()) {
578578 MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS));
579 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
580 // RequireAnalysisPass for PSI before subsequent non-module passes.
581 MPM.addPass(RequireAnalysisPass());
582 }
579583 }
580584
581585 static InlineParams
648652 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
649653 PGOOpt->ProfileRemappingFile,
650654 Phase == ThinLTOPhase::PreLink));
655 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
656 // RequireAnalysisPass for PSI before subsequent non-module passes.
657 MPM.addPass(RequireAnalysisPass());
651658 // Do not invoke ICP in the ThinLTOPrelink phase as it makes it hard
652659 // for the profile annotation to be accurate in the ThinLTO backend.
653660 if (Phase != ThinLTOPhase::PreLink)
10641071 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
10651072 PGOOpt->ProfileRemappingFile,
10661073 false /* ThinLTOPhase::PreLink */));
1074 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1075 // RequireAnalysisPass for PSI before subsequent non-module passes.
1076 MPM.addPass(RequireAnalysisPass());
10671077 }
10681078
10691079 // Remove unused virtual tables to improve the quality of code generated by
41774177 auto InstCombineErase = [this](Instruction *I) {
41784178 eraseInstFromFunction(*I);
41794179 };
4180 LibCallSimplifier Simplifier(DL, &TLI, ORE, InstCombineRAUW,
4180 LibCallSimplifier Simplifier(DL, &TLI, ORE, BFI, PSI, InstCombineRAUW,
41814181 InstCombineErase);
41824182 if (Value *With = Simplifier.optimizeCall(CI)) {
41834183 ++NumSimplified;
5151
5252 class APInt;
5353 class AssumptionCache;
54 class BlockFrequencyInfo;
5455 class DataLayout;
5556 class DominatorTree;
5657 class GEPOperator;
5758 class GlobalVariable;
5859 class LoopInfo;
5960 class OptimizationRemarkEmitter;
61 class ProfileSummaryInfo;
6062 class TargetLibraryInfo;
6163 class User;
6264
303305 const DataLayout &DL;
304306 const SimplifyQuery SQ;
305307 OptimizationRemarkEmitter &ORE;
308 BlockFrequencyInfo *BFI;
309 ProfileSummaryInfo *PSI;
306310
307311 // Optional analyses. When non-null, these can both be used to do better
308312 // combining and will be updated to reflect any changes.
314318 InstCombiner(InstCombineWorklist &Worklist, BuilderTy &Builder,
315319 bool MinimizeSize, bool ExpensiveCombines, AliasAnalysis *AA,
316320 AssumptionCache &AC, TargetLibraryInfo &TLI, DominatorTree &DT,
317 OptimizationRemarkEmitter &ORE, const DataLayout &DL,
318 LoopInfo *LI)
321 OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI,
322 ProfileSummaryInfo *PSI, const DataLayout &DL, LoopInfo *LI)
319323 : Worklist(Worklist), Builder(Builder), MinimizeSize(MinimizeSize),
320324 ExpensiveCombines(ExpensiveCombines), AA(AA), AC(AC), TLI(TLI), DT(DT),
321 DL(DL), SQ(DL, &TLI, &DT, &AC), ORE(ORE), LI(LI) {}
325 DL(DL), SQ(DL, &TLI, &DT, &AC), ORE(ORE), BFI(BFI), PSI(PSI), LI(LI) {}
322326
323327 /// Run the combiner over the entire worklist until it is empty.
324328 ///
4545 #include "llvm/Analysis/AliasAnalysis.h"
4646 #include "llvm/Analysis/AssumptionCache.h"
4747 #include "llvm/Analysis/BasicAliasAnalysis.h"
48 #include "llvm/Analysis/BlockFrequencyInfo.h"
4849 #include "llvm/Analysis/CFG.h"
4950 #include "llvm/Analysis/ConstantFolding.h"
5051 #include "llvm/Analysis/EHPersonalities.h"
5152 #include "llvm/Analysis/GlobalsModRef.h"
5253 #include "llvm/Analysis/InstructionSimplify.h"
54 #include "llvm/Analysis/LazyBlockFrequencyInfo.h"
5355 #include "llvm/Analysis/LoopInfo.h"
5456 #include "llvm/Analysis/MemoryBuiltins.h"
5557 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
58 #include "llvm/Analysis/ProfileSummaryInfo.h"
5659 #include "llvm/Analysis/TargetFolder.h"
5760 #include "llvm/Analysis/TargetLibraryInfo.h"
5861 #include "llvm/Analysis/ValueTracking.h"
34773480 static bool combineInstructionsOverFunction(
34783481 Function &F, InstCombineWorklist &Worklist, AliasAnalysis *AA,
34793482 AssumptionCache &AC, TargetLibraryInfo &TLI, DominatorTree &DT,
3480 OptimizationRemarkEmitter &ORE, bool ExpensiveCombines = true,
3483 OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI,
3484 ProfileSummaryInfo *PSI, bool ExpensiveCombines = true,
34813485 LoopInfo *LI = nullptr) {
34823486 auto &DL = F.getParent()->getDataLayout();
34833487 ExpensiveCombines |= EnableExpensiveCombines;
35083512 MadeIRChange |= prepareICWorklistFromFunction(F, DL, &TLI, Worklist);
35093513
35103514 InstCombiner IC(Worklist, Builder, F.hasMinSize(), ExpensiveCombines, AA,
3511 AC, TLI, DT, ORE, DL, LI);
3515 AC, TLI, DT, ORE, BFI, PSI, DL, LI);
35123516 IC.MaxArraySizeForCombine = MaxArraySize;
35133517
35143518 if (!IC.run())
35283532 auto *LI = AM.getCachedResult(F);
35293533
35303534 auto *AA = &AM.getResult(F);
3535 const ModuleAnalysisManager &MAM =
3536 AM.getResult(F).getManager();
3537 ProfileSummaryInfo *PSI =
3538 MAM.getCachedResult(*F.getParent());
3539 auto *BFI = (PSI && PSI->hasProfileSummary()) ?
3540 &AM.getResult(F) : nullptr;
3541
35313542 if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, ORE,
3532 ExpensiveCombines, LI))
3543 BFI, PSI, ExpensiveCombines, LI))
35333544 // No changes, all analyses are preserved.
35343545 return PreservedAnalyses::all();
35353546
35533564 AU.addPreserved();
35543565 AU.addPreserved();
35553566 AU.addPreserved();
3567 AU.addRequired();
3568 LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
35563569 }
35573570
35583571 bool InstructionCombiningPass::runOnFunction(Function &F) {
35693582 // Optional analyses.
35703583 auto *LIWP = getAnalysisIfAvailable();
35713584 auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
3585 ProfileSummaryInfo *PSI =
3586 &getAnalysis().getPSI();
3587 BlockFrequencyInfo *BFI =
3588 (PSI && PSI->hasProfileSummary()) ?
3589 &getAnalysis().getBFI() :
3590 nullptr;
35723591
35733592 return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, ORE,
3574 ExpensiveCombines, LI);
3593 BFI, PSI, ExpensiveCombines, LI);
35753594 }
35763595
35773596 char InstructionCombiningPass::ID = 0;
35843603 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
35853604 INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
35863605 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
3606 INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass)
3607 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
35873608 INITIALIZE_PASS_END(InstructionCombiningPass, "instcombine",
35883609 "Combine redundant instructions", false, false)
35893610
4040 #include "llvm/ADT/SmallVector.h"
4141 #include "llvm/ADT/Statistic.h"
4242 #include "llvm/Analysis/BlockFrequencyInfo.h"
43 #include "llvm/Analysis/ProfileSummaryInfo.h"
4344 #include "llvm/Analysis/TargetTransformInfo.h"
4445 #include "llvm/Transforms/Utils/Local.h"
4546 #include "llvm/IR/BasicBlock.h"
5960 #include "llvm/Support/Debug.h"
6061 #include "llvm/Support/raw_ostream.h"
6162 #include "llvm/Transforms/Scalar.h"
63 #include "llvm/Transforms/Utils/SizeOpts.h"
6264 #include
6365 #include
6466 #include
110112 if (ConstHoistWithBlockFrequency)
111113 AU.addRequired();
112114 AU.addRequired();
115 AU.addRequired();
113116 AU.addRequired();
114117 }
115118
125128 "Constant Hoisting", false, false)
126129 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
127130 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
131 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
128132 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
129133 INITIALIZE_PASS_END(ConstantHoistingLegacyPass, "consthoist",
130134 "Constant Hoisting", false, false)
147151 ConstHoistWithBlockFrequency
148152 ? &getAnalysis().getBFI()
149153 : nullptr,
150 Fn.getEntryBlock());
154 Fn.getEntryBlock(),
155 &getAnalysis().getPSI());
151156
152157 if (MadeChange) {
153158 LLVM_DEBUG(dbgs() << "********** Function after Constant Hoisting: "
547552 ConstCandVecType::iterator &MaxCostItr) {
548553 unsigned NumUses = 0;
549554
550 if(!Entry->getParent()->hasOptSize() || std::distance(S,E) > 100) {
555 bool OptForSize = Entry->getParent()->hasOptSize() ||
556 llvm::shouldOptimizeForSize(Entry->getParent(), PSI, BFI);
557 if (!OptForSize || std::distance(S,E) > 100) {
551558 for (auto ConstCand = S; ConstCand != E; ++ConstCand) {
552559 NumUses += ConstCand->Uses.size();
553560 if (ConstCand->CumulativeCost > MaxCostItr->CumulativeCost)
918925 /// Optimize expensive integer constants in the given function.
919926 bool ConstantHoistingPass::runImpl(Function &Fn, TargetTransformInfo &TTI,
920927 DominatorTree &DT, BlockFrequencyInfo *BFI,
921 BasicBlock &Entry) {
928 BasicBlock &Entry, ProfileSummaryInfo *PSI) {
922929 this->TTI = &TTI;
923930 this->DT = &DT;
924931 this->BFI = BFI;
925932 this->DL = &Fn.getParent()->getDataLayout();
926933 this->Ctx = &Fn.getContext();
927934 this->Entry = &Entry;
935 this->PSI = PSI;
928936 // Collect all constant candidates.
929937 collectConstantCandidates(Fn);
930938
961969 auto BFI = ConstHoistWithBlockFrequency
962970 ? &AM.getResult(F)
963971 : nullptr;
964 if (!runImpl(F, TTI, DT, BFI, F.getEntryBlock()))
972 auto &MAM = AM.getResult(F).getManager();
973 auto *PSI = MAM.getCachedResult(*F.getParent());
974 if (!runImpl(F, TTI, DT, BFI, F.getEntryBlock(), PSI))
965975 return PreservedAnalyses::all();
966976
967977 PreservedAnalyses PA;
2828 #include "llvm/ADT/Statistic.h"
2929 #include "llvm/Analysis/AliasAnalysis.h"
3030 #include "llvm/Analysis/AssumptionCache.h"
31 #include "llvm/Analysis/BlockFrequencyInfo.h"
3132 #include "llvm/Analysis/GlobalsModRef.h"
33 #include "llvm/Analysis/LazyBlockFrequencyInfo.h"
3234 #include "llvm/Analysis/LoopAccessAnalysis.h"
3335 #include "llvm/Analysis/LoopAnalysisManager.h"
3436 #include "llvm/Analysis/LoopInfo.h"
3537 #include "llvm/Analysis/MemorySSA.h"
38 #include "llvm/Analysis/ProfileSummaryInfo.h"
3639 #include "llvm/Analysis/ScalarEvolution.h"
3740 #include "llvm/Analysis/ScalarEvolutionExpander.h"
3841 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
5356 #include "llvm/Transforms/Scalar.h"
5457 #include "llvm/Transforms/Utils.h"
5558 #include "llvm/Transforms/Utils/LoopVersioning.h"
59 #include "llvm/Transforms/Utils/SizeOpts.h"
5660 #include
5761 #include
5862 #include
158162 class LoadEliminationForLoop {
159163 public:
160164 LoadEliminationForLoop(Loop *L, LoopInfo *LI, const LoopAccessInfo &LAI,
161 DominatorTree *DT)
162 : L(L), LI(LI), LAI(LAI), DT(DT), PSE(LAI.getPSE()) {}
165 DominatorTree *DT, BlockFrequencyInfo *BFI,
166 ProfileSummaryInfo* PSI)
167 : L(L), LI(LI), LAI(LAI), DT(DT), BFI(BFI), PSI(PSI), PSE(LAI.getPSE()) {}
163168
164169 /// Look through the loop-carried and loop-independent dependences in
165170 /// this loop and find store->load dependences.
528533 }
529534
530535 if (!Checks.empty() || !LAI.getPSE().getUnionPredicate().isAlwaysTrue()) {
531 if (L->getHeader()->getParent()->hasOptSize()) {
536 auto *HeaderBB = L->getHeader();
537 auto *F = HeaderBB->getParent();
538 bool OptForSize = F->hasOptSize() ||
539 llvm::shouldOptimizeForSize(HeaderBB, PSI, BFI);
540 if (OptForSize) {
532541 LLVM_DEBUG(
533542 dbgs() << "Versioning is needed but not allowed when optimizing "
534543 "for size.\n");
571580 LoopInfo *LI;
572581 const LoopAccessInfo &LAI;
573582 DominatorTree *DT;
583 BlockFrequencyInfo *BFI;
584 ProfileSummaryInfo *PSI;
574585 PredicatedScalarEvolution PSE;
575586 };
576587
578589
579590 static bool
580591 eliminateLoadsAcrossLoops(Function &F, LoopInfo &LI, DominatorTree &DT,
592 BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
581593 function_ref GetLAI) {
582594 // Build up a worklist of inner-loops to transform to avoid iterator
583595 // invalidation.
596608 bool Changed = false;
597609 for (Loop *L : Worklist) {
598610 // The actual work is performed by LoadEliminationForLoop.
599 LoadEliminationForLoop LEL(L, &LI, GetLAI(*L), &DT);
611 LoadEliminationForLoop LEL(L, &LI, GetLAI(*L), &DT, BFI, PSI);
600612 Changed |= LEL.processLoop();
601613 }
602614 return Changed;
621633 auto &LI = getAnalysis().getLoopInfo();
622634 auto &LAA = getAnalysis();
623635 auto &DT = getAnalysis().getDomTree();
636 auto *PSI = &getAnalysis().getPSI();
637 auto *BFI = (PSI && PSI->hasProfileSummary()) ?
638 &getAnalysis().getBFI() :
639 nullptr;
624640
625641 // Process each loop nest in the function.
626642 return eliminateLoadsAcrossLoops(
627 F, LI, DT,
643 F, LI, DT, BFI, PSI,
628644 [&LAA](Loop &L) -> const LoopAccessInfo & { return LAA.getInfo(&L); });
629645 }
630646
637653 AU.addRequired();
638654 AU.addPreserved();
639655 AU.addPreserved();
656 AU.addRequired();
657 LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
640658 }
641659 };
642660
652670 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
653671 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
654672 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
673 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
674 INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass)
655675 INITIALIZE_PASS_END(LoopLoadElimination, LLE_OPTION, LLE_name, false, false)
656676
657677 FunctionPass *llvm::createLoopLoadEliminationPass() {
667687 auto &TLI = AM.getResult(F);
668688 auto &AA = AM.getResult(F);
669689 auto &AC = AM.getResult(F);
690 auto &MAM = AM.getResult(F).getManager();
691 auto *PSI = MAM.getCachedResult(*F.getParent());
692 auto *BFI = (PSI && PSI->hasProfileSummary()) ?
693 &AM.getResult(F) : nullptr;
670694 MemorySSA *MSSA = EnableMSSALoopDependency
671695 ? &AM.getResult(F).getMSSA()
672696 : nullptr;
673697
674698 auto &LAM = AM.getResult(F).getManager();
675699 bool Changed = eliminateLoadsAcrossLoops(
676 F, LI, DT, [&](Loop &L) -> const LoopAccessInfo & {
700 F, LI, DT, BFI, PSI, [&](Loop &L) -> const LoopAccessInfo & {
677701 LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, MSSA};
678702 return LAM.getResult(L, AR);
679703 });
293293 return LoopUnrollResult::Unmodified;
294294
295295 TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
296 L, SE, TTI, OptLevel, None, None, None, None, None, None);
296 L, SE, TTI, nullptr, nullptr, OptLevel,
297 None, None, None, None, None, None);
297298 if (AllowUnrollAndJam.getNumOccurrences() > 0)
298299 UP.UnrollAndJam = AllowUnrollAndJam;
299300 if (UnrollAndJamThreshold.getNumOccurrences() > 0)
2222 #include "llvm/ADT/SmallVector.h"
2323 #include "llvm/ADT/StringRef.h"
2424 #include "llvm/Analysis/AssumptionCache.h"
25 #include "llvm/Analysis/BlockFrequencyInfo.h"
2526 #include "llvm/Analysis/CodeMetrics.h"
27 #include "llvm/Analysis/LazyBlockFrequencyInfo.h"
2628 #include "llvm/Analysis/LoopAnalysisManager.h"
2729 #include "llvm/Analysis/LoopInfo.h"
2830 #include "llvm/Analysis/LoopPass.h"
5456 #include "llvm/Transforms/Utils.h"
5557 #include "llvm/Transforms/Utils/LoopSimplify.h"
5658 #include "llvm/Transforms/Utils/LoopUtils.h"
59 #include "llvm/Transforms/Utils/SizeOpts.h"
5760 #include "llvm/Transforms/Utils/UnrollLoop.h"
5861 #include
5962 #include
164167 /// Gather the various unrolling parameters based on the defaults, compiler
165168 /// flags, TTI overrides and user specified parameters.
166169 TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
167 Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, int OptLevel,
170 Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI,
171 BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel,
168172 Optional UserThreshold, Optional UserCount,
169173 Optional UserAllowPartial, Optional UserRuntime,
170174 Optional UserUpperBound, Optional UserAllowPeeling) {
197201 TTI.getUnrollingPreferences(L, SE, UP);
198202
199203 // Apply size attributes
200 if (L->getHeader()->getParent()->hasOptSize()) {
204 bool OptForSize = L->getHeader()->getParent()->hasOptSize() ||
205 llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI);
206 if (OptForSize) {
201207 UP.Threshold = UP.OptSizeThreshold;
202208 UP.PartialThreshold = UP.PartialOptSizeThreshold;
203209 }
962968 static LoopUnrollResult tryToUnrollLoop(
963969 Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
964970 const TargetTransformInfo &TTI, AssumptionCache &AC,
965 OptimizationRemarkEmitter &ORE, bool PreserveLCSSA, int OptLevel,
971 OptimizationRemarkEmitter &ORE,
972 BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
973 bool PreserveLCSSA, int OptLevel,
966974 bool OnlyWhenForced, bool ForgetAllSCEV, Optional ProvidedCount,
967975 Optional ProvidedThreshold, Optional ProvidedAllowPartial,
968976 Optional ProvidedRuntime, Optional ProvidedUpperBound,
988996 bool NotDuplicatable;
989997 bool Convergent;
990998 TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
991 L, SE, TTI, OptLevel, ProvidedThreshold, ProvidedCount,
999 L, SE, TTI, BFI, PSI, OptLevel, ProvidedThreshold, ProvidedCount,
9921000 ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
9931001 ProvidedAllowPeeling);
9941002 // Exit early if unrolling is disabled.
11751183 bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
11761184
11771185 LoopUnrollResult Result = tryToUnrollLoop(
1178 L, DT, LI, SE, TTI, AC, ORE, PreserveLCSSA, OptLevel, OnlyWhenForced,
1186 L, DT, LI, SE, TTI, AC, ORE, nullptr, nullptr,
1187 PreserveLCSSA, OptLevel, OnlyWhenForced,
11791188 ForgetAllSCEV, ProvidedCount, ProvidedThreshold, ProvidedAllowPartial,
11801189 ProvidedRuntime, ProvidedUpperBound, ProvidedAllowPeeling);
11811190
12561265
12571266 bool Changed =
12581267 tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, *ORE,
1268 /*BFI*/ nullptr, /*PSI*/ nullptr,
12591269 /*PreserveLCSSA*/ true, OptLevel, OnlyWhenForced,
12601270 /*ForgetAllSCEV*/ false, /*Count*/ None,
12611271 /*Threshold*/ None, /*AllowPartial*/ false,
13581368 AM.getResult(F).getManager();
13591369 ProfileSummaryInfo *PSI =
13601370 MAM.getCachedResult(*F.getParent());
1371 auto *BFI = (PSI && PSI->hasProfileSummary()) ?
1372 &AM.getResult(F) : nullptr;
13611373
13621374 bool Changed = false;
13631375
13931405 // The API here is quite complex to call and we allow to select some
13941406 // flavors of unrolling during construction time (by setting UnrollOpts).
13951407 LoopUnrollResult Result = tryToUnrollLoop(
1396 &L, DT, &LI, SE, TTI, AC, ORE,
1408 &L, DT, &LI, SE, TTI, AC, ORE, BFI, PSI,
13971409 /*PreserveLCSSA*/ true, UnrollOpts.OptLevel, UnrollOpts.OnlyWhenForced,
13981410 /*ForgetAllSCEV*/ false, /*Count*/ None,
13991411 /*Threshold*/ None, UnrollOpts.AllowPartial, UnrollOpts.AllowRuntime,
5050 SimplifyCFG.cpp
5151 SimplifyIndVar.cpp
5252 SimplifyLibCalls.cpp
53 SizeOpts.cpp
5354 SplitModule.cpp
5455 StripNonLineTableDebugInfo.cpp
5556 SymbolRewriter.cpp
1515 #include "llvm/ADT/SmallString.h"
1616 #include "llvm/ADT/StringMap.h"
1717 #include "llvm/ADT/Triple.h"
18 #include "llvm/Analysis/BlockFrequencyInfo.h"
1819 #include "llvm/Analysis/ConstantFolding.h"
1920 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
21 #include "llvm/Analysis/ProfileSummaryInfo.h"
2022 #include "llvm/Analysis/TargetLibraryInfo.h"
2123 #include "llvm/Transforms/Utils/Local.h"
2224 #include "llvm/Analysis/ValueTracking.h"
3335 #include "llvm/Support/CommandLine.h"
3436 #include "llvm/Support/KnownBits.h"
3537 #include "llvm/Transforms/Utils/BuildLibCalls.h"
38 #include "llvm/Transforms/Utils/SizeOpts.h"
3639
3740 using namespace llvm;
3841 using namespace PatternMatch;
23742377
23752378 // Don't rewrite fputs to fwrite when optimising for size because fwrite
23762379 // requires more arguments and thus extra MOVs are required.
2377 if (CI->getFunction()->hasOptSize())
2380 bool OptForSize = CI->getFunction()->hasOptSize() ||
2381 llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI);
2382 if (OptForSize)
23782383 return nullptr;
23792384
23802385 // Check if has any use
27492754 LibCallSimplifier::LibCallSimplifier(
27502755 const DataLayout &DL, const TargetLibraryInfo *TLI,
27512756 OptimizationRemarkEmitter &ORE,
2757 BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
27522758 function_ref Replacer,
27532759 function_ref Eraser)
2754 : FortifiedSimplifier(TLI), DL(DL), TLI(TLI), ORE(ORE),
2760 : FortifiedSimplifier(TLI), DL(DL), TLI(TLI), ORE(ORE), BFI(BFI), PSI(PSI),
27552761 UnsafeFPShrink(false), Replacer(Replacer), Eraser(Eraser) {}
27562762
27572763 void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) {
0 //===-- SizeOpts.cpp - code size optimization related code ----------------===//
1 //
2 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
3 // See https://llvm.org/LICENSE.txt for license information.
4 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5 //
6 //===----------------------------------------------------------------------===//
7 //
8 // This file contains some shared code size optimization related code.
9 //
10 //===----------------------------------------------------------------------===//
11
12 #include "llvm/Analysis/BlockFrequencyInfo.h"
13 #include "llvm/Analysis/ProfileSummaryInfo.h"
14 #include "llvm/Support/CommandLine.h"
15 #include "llvm/Transforms/Utils/SizeOpts.h"
16 using namespace llvm;
17
18 static cl::opt ProfileGuidedSizeOpt(
19 "pgso", cl::Hidden, cl::init(true),
20 cl::desc("Enable the profile guided size optimization. "));
21
22 bool llvm::shouldOptimizeForSize(Function *F, ProfileSummaryInfo *PSI,
23 BlockFrequencyInfo *BFI) {
24 assert(F);
25 if (!PSI || !BFI || !PSI->hasProfileSummary())
26 return false;
27 return ProfileGuidedSizeOpt && PSI->isFunctionColdInCallGraph(F, *BFI);
28 }
29
30 bool llvm::shouldOptimizeForSize(BasicBlock *BB, ProfileSummaryInfo *PSI,
31 BlockFrequencyInfo *BFI) {
32 assert(BB);
33 if (!PSI || !BFI || !PSI->hasProfileSummary())
34 return false;
35 return ProfileGuidedSizeOpt && PSI->isColdBlock(BB, BFI);
36 }
8787 #include "llvm/Analysis/LoopIterator.h"
8888 #include "llvm/Analysis/MemorySSA.h"
8989 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
90 #include "llvm/Analysis/ProfileSummaryInfo.h"
9091 #include "llvm/Analysis/ScalarEvolution.h"
9192 #include "llvm/Analysis/ScalarEvolutionExpander.h"
9293 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
133134 #include "llvm/Transforms/Utils/LoopSimplify.h"
134135 #include "llvm/Transforms/Utils/LoopUtils.h"
135136 #include "llvm/Transforms/Utils/LoopVersioning.h"
137 #include "llvm/Transforms/Utils/SizeOpts.h"
136138 #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
137139 #include
138140 #include
14511453 auto *LAA = &getAnalysis();
14521454 auto *DB = &getAnalysis().getDemandedBits();
14531455 auto *ORE = &getAnalysis().getORE();
1456 auto *PSI = &getAnalysis().getPSI();
14541457
14551458 std::function GetLAA =
14561459 [&](Loop &L) -> const LoopAccessInfo & { return LAA->getInfo(&L); };
14571460
14581461 return Impl.runImpl(F, *SE, *LI, *TTI, *DT, *BFI, TLI, *DB, *AA, *AC,
1459 GetLAA, *ORE);
1462 GetLAA, *ORE, PSI);
14601463 }
14611464
14621465 void getAnalysisUsage(AnalysisUsage &AU) const override {
14821485
14831486 AU.addPreserved();
14841487 AU.addPreserved();
1488 AU.addRequired();
14851489 }
14861490 };
14871491
60536057 INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis)
60546058 INITIALIZE_PASS_DEPENDENCY(DemandedBitsWrapperPass)
60556059 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
6060 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
60566061 INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false)
60576062
60586063 namespace llvm {
71467151 Loop *L, PredicatedScalarEvolution &PSE, LoopInfo *LI, DominatorTree *DT,
71477152 LoopVectorizationLegality *LVL, TargetTransformInfo *TTI,
71487153 TargetLibraryInfo *TLI, DemandedBits *DB, AssumptionCache *AC,
7149 OptimizationRemarkEmitter *ORE, LoopVectorizeHints &Hints) {
7154 OptimizationRemarkEmitter *ORE, BlockFrequencyInfo *BFI,
7155 ProfileSummaryInfo *PSI, LoopVectorizeHints &Hints) {
71507156
71517157 assert(EnableVPlanNativePath && "VPlan-native path is disabled.");
71527158 Function *F = L->getHeader()->getParent();
71617167 // Get user vectorization factor.
71627168 const unsigned UserVF = Hints.getWidth();
71637169
7164 // Check the function attributes to find out if this function should be
7165 // optimized for size.
7170 // Check the function attributes and profiles to find out if this function
7171 // should be optimized for size.
71667172 bool OptForSize =
7167 Hints.getForce() != LoopVectorizeHints::FK_Enabled && F->hasOptSize();
7173 Hints.getForce() != LoopVectorizeHints::FK_Enabled &&
7174 (F->hasOptSize() ||
7175 llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI));
71687176
71697177 // Plan how to best vectorize, return the best VF and its cost.
71707178 const VectorizationFactor VF = LVP.planInVPlanNativePath(OptForSize, UserVF);
72447252 return false;
72457253 }
72467254
7247 // Check the function attributes to find out if this function should be
7248 // optimized for size.
7255 // Check the function attributes and profiles to find out if this function
7256 // should be optimized for size.
72497257 bool OptForSize =
7250 Hints.getForce() != LoopVectorizeHints::FK_Enabled && F->hasOptSize();
7258 Hints.getForce() != LoopVectorizeHints::FK_Enabled &&
7259 (F->hasOptSize() ||
7260 llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI));
72517261
72527262 // Entrance to the VPlan-native vectorization path. Outer loops are processed
72537263 // here. They may require CFG and instruction level transformations before
72567266 // pipeline.
72577267 if (!L->empty())
72587268 return processLoopInVPlanNativePath(L, PSE, LI, DT, &LVL, TTI, TLI, DB, AC,
7259 ORE, Hints);
7269 ORE, BFI, PSI, Hints);
72607270
72617271 assert(L->empty() && "Inner loop expected.");
72627272 // Check the loop for a trip count threshold: vectorize loops with a tiny trip
75227532 DominatorTree &DT_, BlockFrequencyInfo &BFI_, TargetLibraryInfo *TLI_,
75237533 DemandedBits &DB_, AliasAnalysis &AA_, AssumptionCache &AC_,
75247534 std::function &GetLAA_,
7525 OptimizationRemarkEmitter &ORE_) {
7535 OptimizationRemarkEmitter &ORE_, ProfileSummaryInfo *PSI_) {
75267536 SE = &SE_;
75277537 LI = &LI_;
75287538 TTI = &TTI_;
75347544 GetLAA = &GetLAA_;
75357545 DB = &DB_;
75367546 ORE = &ORE_;
7547 PSI = PSI_;
75377548
75387549 // Don't attempt if
75397550 // 1. the target claims to have no vector registers, and
76027613 LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, MSSA};
76037614 return LAM.getResult(L, AR);
76047615 };
7616 const ModuleAnalysisManager &MAM =
7617 AM.getResult(F).getManager();
7618 ProfileSummaryInfo *PSI =
7619 MAM.getCachedResult(*F.getParent());
76057620 bool Changed =
7606 runImpl(F, SE, LI, TTI, DT, BFI, &TLI, DB, AA, AC, GetLAA, ORE);
7621 runImpl(F, SE, LI, TTI, DT, BFI, &TLI, DB, AA, AC, GetLAA, ORE, PSI);
76077622 if (!Changed)
76087623 return PreservedAnalyses::all();
76097624 PreservedAnalyses PA;
105105 ; CHECK-O-NEXT: Running pass: InstCombinePass
106106 ; CHECK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
107107 ; CHECK-O-NEXT: Running analysis: AAManager
108 ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
108109 ; CHECK-EP-PEEPHOLE-NEXT: Running pass: NoOpFunctionPass
109110 ; CHECK-O-NEXT: Running pass: SimplifyCFGPass
110111 ; CHECK-O-NEXT: Finished llvm::Function pass manager run.
244245 ; CHECK-O-NEXT: Running pass: SLPVectorizerPass
245246 ; CHECK-O-NEXT: Running pass: InstCombinePass
246247 ; CHECK-O-NEXT: Running pass: LoopUnrollPass
247 ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
248248 ; CHECK-O-NEXT: Running pass: WarnMissedTransformationsPass
249249 ; CHECK-O-NEXT: Running pass: InstCombinePass
250250 ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}OptimizationRemarkEmitterAnalysis
6868 ; CHECK-O2-NEXT: Starting llvm::Function pass manager run.
6969 ; CHECK-O3-NEXT: Running pass: AggressiveInstCombinePass
7070 ; CHECK-O2-NEXT: Running pass: InstCombinePass
71 ; CHECK-O2-NEXT: Running analysis: OuterAnalysisManagerProxy
7172 ; CHECK-EP-Peephole-NEXT: Running pass: NoOpFunctionPass
7273 ; CHECK-O2-NEXT: Finished llvm::Function pass manager run.
7374 ; CHECK-O2-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}InlinerPass>
8787 ; CHECK-O-NEXT: Running pass: InstCombinePass
8888 ; CHECK-PRELINK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
8989 ; CHECK-O-NEXT: Running analysis: AAManager
90 ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
9091 ; CHECK-O-NEXT: Running pass: SimplifyCFGPass
9192 ; CHECK-O-NEXT: Finished llvm::Function pass manager run.
9293 ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA
218219 ; CHECK-POSTLINK-O-NEXT: Running pass: SLPVectorizerPass
219220 ; CHECK-POSTLINK-O-NEXT: Running pass: InstCombinePass
220221 ; CHECK-POSTLINK-O-NEXT: Running pass: LoopUnrollPass
221 ; CHECK-POSTLINK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
222222 ; CHECK-POSTLINK-O-NEXT: Running pass: WarnMissedTransformationsPass
223223 ; CHECK-POSTLINK-O-NEXT: Running pass: InstCombinePass
224224 ; CHECK-POSTLINK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}OptimizationRemarkEmitterAnalysis
213213 ; CHECK-NEXT: Scalar Evolution Analysis
214214 ; CHECK-NEXT: Function Alias Analysis Results
215215 ; CHECK-NEXT: Loop Access Analysis
216 ; CHECK-NEXT: Lazy Branch Probability Analysis
217 ; CHECK-NEXT: Lazy Block Frequency Analysis
216218 ; CHECK-NEXT: Loop Load Elimination
217219 ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
218220 ; CHECK-NEXT: Function Alias Analysis Results
218218 ; CHECK-NEXT: Scalar Evolution Analysis
219219 ; CHECK-NEXT: Function Alias Analysis Results
220220 ; CHECK-NEXT: Loop Access Analysis
221 ; CHECK-NEXT: Lazy Branch Probability Analysis
222 ; CHECK-NEXT: Lazy Block Frequency Analysis
221223 ; CHECK-NEXT: Loop Load Elimination
222224 ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
223225 ; CHECK-NEXT: Function Alias Analysis Results
200200 ; CHECK-NEXT: Scalar Evolution Analysis
201201 ; CHECK-NEXT: Function Alias Analysis Results
202202 ; CHECK-NEXT: Loop Access Analysis
203 ; CHECK-NEXT: Lazy Branch Probability Analysis
204 ; CHECK-NEXT: Lazy Block Frequency Analysis
203205 ; CHECK-NEXT: Loop Load Elimination
204206 ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
205207 ; CHECK-NEXT: Function Alias Analysis Results
0 ; RUN: opt -mtriple=arm-arm-none-eabi -consthoist -S < %s | FileCheck %s
1 ; RUN: opt -mtriple=arm-arm-none-eabi -consthoist -pgso -S < %s | FileCheck %s -check-prefix=PGSO
2 ; RUN: opt -mtriple=arm-arm-none-eabi -consthoist -pgso=false -S < %s | FileCheck %s -check-prefix=NPGSO
13
24 ; There are different candidates here for the base constant: 1073876992 and
35 ; 1073876996. But we don't want to see the latter because it results in
79 entry:
810 ; CHECK-LABEL: @foo
911 ; CHECK-NOT: [[CONST1:%const_mat[0-9]*]] = add i32 %const, -4
12 ; CHECK-LABEL: @foo_pgso
1013 %0 = load volatile i32, i32* inttoptr (i32 1073876992 to i32*), align 4096
1114 %or = or i32 %0, 1
1215 store volatile i32 %or, i32* inttoptr (i32 1073876992 to i32*), align 4096
3942 }
4043
4144 attributes #0 = { minsize norecurse nounwind optsize readnone uwtable }
45
46 define void @foo_pgso() #1 !prof !14 {
47 entry:
48 ; PGSO-LABEL: @foo_pgso
49 ; PGSO-NOT: [[CONST2:%const_mat[0-9]*]] = add i32 %const, -4
50 ; NPGSO-LABEL: @foo_pgso
51 ; NPGSO: [[CONST2:%const_mat[0-9]*]] = add i32 %const, -4
52 %0 = load volatile i32, i32* inttoptr (i32 1073876992 to i32*), align 4096
53 %or = or i32 %0, 1
54 store volatile i32 %or, i32* inttoptr (i32 1073876992 to i32*), align 4096
55 %1 = load volatile i32, i32* inttoptr (i32 1073876996 to i32*), align 4
56 %and = and i32 %1, -117506048
57 store volatile i32 %and, i32* inttoptr (i32 1073876996 to i32*), align 4
58 %2 = load volatile i32, i32* inttoptr (i32 1073876992 to i32*), align 4096
59 %and1 = and i32 %2, -17367041
60 store volatile i32 %and1, i32* inttoptr (i32 1073876996 to i32*), align 4096
61 %3 = load volatile i32, i32* inttoptr (i32 1073876992 to i32*), align 4096
62 %and2 = and i32 %3, -262145
63 store volatile i32 %and2, i32* inttoptr (i32 1073876992 to i32*), align 4096
64 %4 = load volatile i32, i32* inttoptr (i32 1073876996 to i32*), align 4
65 %and3 = and i32 %4, -8323073
66 store volatile i32 %and3, i32* inttoptr (i32 1073876996 to i32*), align 4
67 store volatile i32 10420224, i32* inttoptr (i32 1073877000 to i32*), align 8
68 %5 = load volatile i32, i32* inttoptr (i32 1073876996 to i32*), align 4096
69 %or4 = or i32 %5, 65536
70 store volatile i32 %or4, i32* inttoptr (i32 1073876996 to i32*), align 4096
71 %6 = load volatile i32, i32* inttoptr (i32 1073881088 to i32*), align 8192
72 %or6.i.i = or i32 %6, 16
73 store volatile i32 %or6.i.i, i32* inttoptr (i32 1073881088 to i32*), align 8192
74 %7 = load volatile i32, i32* inttoptr (i32 1073881088 to i32*), align 8192
75 %and7.i.i = and i32 %7, -4
76 store volatile i32 %and7.i.i, i32* inttoptr (i32 1073881088 to i32*), align 8192
77 %8 = load volatile i32, i32* inttoptr (i32 1073881088 to i32*), align 8192
78 %or8.i.i = or i32 %8, 2
79 store volatile i32 %or8.i.i, i32* inttoptr (i32 1073881088 to i32*), align 8192
80 ret void
81 }
82
83 attributes #1 = { norecurse nounwind readnone uwtable } ; no optsize or minsize
84
85 !llvm.module.flags = !{!0}
86 !0 = !{i32 1, !"ProfileSummary", !1}
87 !1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
88 !2 = !{!"ProfileFormat", !"InstrProf"}
89 !3 = !{!"TotalCount", i64 10000}
90 !4 = !{!"MaxCount", i64 10}
91 !5 = !{!"MaxInternalCount", i64 1}
92 !6 = !{!"MaxFunctionCount", i64 1000}
93 !7 = !{!"NumCounts", i64 3}
94 !8 = !{!"NumFunctions", i64 3}
95 !9 = !{!"DetailedSummary", !10}
96 !10 = !{!11, !12, !13}
97 !11 = !{i32 10000, i64 100, i32 1}
98 !12 = !{i32 999000, i64 100, i32 1}
99 !13 = !{i32 999999, i64 1, i32 2}
100 !14 = !{!"function_entry_count", i64 0}
11 ; because it requires more arguments and thus extra MOVs are required.
22 ;
33 ; RUN: opt < %s -instcombine -S | FileCheck %s
4 ; RUN: opt < %s -instcombine -pgso -S | FileCheck %s -check-prefix=PGSO
5 ; RUN: opt < %s -instcombine -pgso=false -S | FileCheck %s -check-prefix=NPGSO
46
57 %struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i32, i32, [40 x i8] }
68 %struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
2527
2628 attributes #0 = { nounwind optsize }
2729 attributes #1 = { nounwind optsize }
30
31 define i32 @main_pgso() local_unnamed_addr !prof !14 {
32 entry:
33 ; PGSO-LABEL: @main_pgso(
34 ; PGSO-NOT: call i64 @fwrite
35 ; PGSO: call i32 @fputs
36 ; NPGSO-LABEL: @main_pgso(
37 ; NPGSO: call i64 @fwrite
38 ; NPGSO-NOT: call i32 @fputs
39
40 %call = tail call %struct._IO_FILE* @fopen(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i32 0, i32 0)) #2
41 %call1 = tail call i32 @fputs(i8* getelementptr inbounds ([27 x i8], [27 x i8]* @.str.2, i32 0, i32 0), %struct._IO_FILE* %call) #2
42 ret i32 0
43 }
44
45 !llvm.module.flags = !{!0}
46 !0 = !{i32 1, !"ProfileSummary", !1}
47 !1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
48 !2 = !{!"ProfileFormat", !"InstrProf"}
49 !3 = !{!"TotalCount", i64 10000}
50 !4 = !{!"MaxCount", i64 10}
51 !5 = !{!"MaxInternalCount", i64 1}
52 !6 = !{!"MaxFunctionCount", i64 1000}
53 !7 = !{!"NumCounts", i64 3}
54 !8 = !{!"NumFunctions", i64 3}
55 !9 = !{!"DetailedSummary", !10}
56 !10 = !{!11, !12, !13}
57 !11 = !{i32 10000, i64 100, i32 1}
58 !12 = !{i32 999000, i64 100, i32 1}
59 !13 = !{i32 999999, i64 1, i32 2}
60 !14 = !{!"function_entry_count", i64 0}
0 ; RUN: opt -basicaa -loop-load-elim -S < %s | FileCheck %s
1 ; RUN: opt -basicaa -loop-load-elim -pgso -S < %s | FileCheck %s -check-prefix=PGSO
2 ; RUN: opt -basicaa -loop-load-elim -pgso=false -S < %s | FileCheck %s -check-prefix=NPGSO
13
24 ; When optimizing for size don't eliminate in this loop because the loop would
35 ; have to be versioned first because A and C may alias.
7375 for.end: ; preds = %for.body
7476 ret void
7577 }
78
79
80 ; PGSO-LABEL: @f_pgso(
81 ; NPGSO-LABEL: @f_pgso(
82 define void @f_pgso(i32* %A, i32* %B, i32* %C, i64 %N) !prof !14 {
83
84 entry:
85 br label %for.body
86
87 for.body: ; preds = %for.body, %entry
88 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
89 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
90
91 %Aidx_next = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next
92 %Bidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
93 %Cidx = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
94 %Aidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
95
96 %b = load i32, i32* %Bidx, align 4
97 %a_p1 = add i32 %b, 2
98 store i32 %a_p1, i32* %Aidx_next, align 4
99
100 %a = load i32, i32* %Aidx, align 4
101 ; PGSO: %c = mul i32 %a, 2
102 ; NPGSO-NOT: %c = mul i32 %a, 2
103 %c = mul i32 %a, 2
104 store i32 %c, i32* %Cidx, align 4
105
106 %exitcond = icmp eq i64 %indvars.iv.next, %N
107 br i1 %exitcond, label %for.end, label %for.body
108
109 for.end: ; preds = %for.body
110 ret void
111 }
112
113 !llvm.module.flags = !{!0}
114 !0 = !{i32 1, !"ProfileSummary", !1}
115 !1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
116 !2 = !{!"ProfileFormat", !"InstrProf"}
117 !3 = !{!"TotalCount", i64 10000}
118 !4 = !{!"MaxCount", i64 10}
119 !5 = !{!"MaxInternalCount", i64 1}
120 !6 = !{!"MaxFunctionCount", i64 1000}
121 !7 = !{!"NumCounts", i64 3}
122 !8 = !{!"NumFunctions", i64 3}
123 !9 = !{!"DetailedSummary", !10}
124 !10 = !{!11, !12, !13}
125 !11 = !{i32 10000, i64 100, i32 1}
126 !12 = !{i32 999000, i64 100, i32 1}
127 !13 = !{i32 999999, i64 1, i32 2}
128 !14 = !{!"function_entry_count", i64 0}
0 ; RUN: opt < %s -S -loop-unroll -unroll-count=4 | FileCheck -check-prefix=CHECK_COUNT4 %s
11 ; RUN: opt < %s -S -loop-unroll | FileCheck -check-prefix=CHECK_NOCOUNT %s
2 ; RUN: opt < %s -S -passes='require,function(unroll)' -pgso | FileCheck -check-prefix=PGSO %s
3 ; RUN: opt < %s -S -passes='require,function(unroll)' -pgso=false | FileCheck -check-prefix=NPGSO %s
24
35
46 ;///////////////////// TEST 1 //////////////////////////////
127129 ; CHECK_NOCOUNT-LABEL: @Test4
128130 ; CHECK_NOCOUNT: phi
129131 ; CHECK_NOCOUNT: icmp
132
133 ;///////////////////// TEST 5 //////////////////////////////
134
135 ; This test shows that with PGO, this loop is cold and not unrolled.
136
137 define i32 @Test5() !prof !14 {
138 entry:
139 br label %for.body
140
141 for.body: ; preds = %for.body, %entry
142 %i.05 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
143 %arrayidx = getelementptr inbounds [24 x i32], [24 x i32]* @tab, i32 0, i32 %i.05
144 store i32 %i.05, i32* %arrayidx, align 4
145 %inc = add nuw nsw i32 %i.05, 1
146 %exitcond = icmp eq i32 %inc, 24
147 br i1 %exitcond, label %for.end, label %for.body
148
149 for.end: ; preds = %for.body
150 ret i32 42
151 }
152
153 ; PGSO-LABEL: @Test5
154 ; PGSO: phi
155 ; PGSO: icmp
156 ; NPGSO-LABEL: @Test5
157 ; NPGSO-NOT: phi
158 ; NPGSO-NOT: icmp
159
160 !llvm.module.flags = !{!0}
161 !0 = !{i32 1, !"ProfileSummary", !1}
162 !1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
163 !2 = !{!"ProfileFormat", !"InstrProf"}
164 !3 = !{!"TotalCount", i64 10000}
165 !4 = !{!"MaxCount", i64 10}
166 !5 = !{!"MaxInternalCount", i64 1}
167 !6 = !{!"MaxFunctionCount", i64 1000}
168 !7 = !{!"NumCounts", i64 3}
169 !8 = !{!"NumFunctions", i64 3}
170 !9 = !{!"DetailedSummary", !10}
171 !10 = !{!11, !12, !13}
172 !11 = !{i32 10000, i64 100, i32 1}
173 !12 = !{i32 999000, i64 100, i32 1}
174 !13 = !{i32 999999, i64 1, i32 2}
175 !14 = !{!"function_entry_count", i64 0}
11 ; loop with the optimize for size or the minimize size attributes.
22 ; REQUIRES: asserts
33 ; RUN: opt < %s -loop-vectorize -S | FileCheck %s
4 ; RUN: opt < %s -loop-vectorize -pgso -S | FileCheck %s -check-prefix=PGSO
5 ; RUN: opt < %s -loop-vectorize -pgso=false -S | FileCheck %s -check-prefix=NPGSO
46
57 target datalayout = "E-m:e-p:32:32-i64:32-f64:32:64-a:0:32-n32-S128"
68
3537 ; CHECK-LABEL: @foo_minsize(
3638 ; CHECK-NOT: <2 x i8>
3739 ; CHECK-NOT: <4 x i8>
40 ; CHECK-LABEL: @foo_pgso(
3841
3942 entry:
4043 br label %for.body
5659
5760 attributes #1 = { minsize }
5861
62 define i32 @foo_pgso() !prof !14 {
63 ; PGSO-LABEL: @foo_pgso(
64 ; PGSO-NOT: <{{[0-9]+}} x i8>
65 ; NPGSO-LABEL: @foo_pgso(
66 ; NPGSO: <{{[0-9]+}} x i8>
67
68 entry:
69 br label %for.body
70
71 for.body: ; preds = %for.body, %entry
72 %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
73 %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
74 %0 = load i8, i8* %arrayidx, align 1
75 %cmp1 = icmp eq i8 %0, 0
76 %. = select i1 %cmp1, i8 2, i8 1
77 store i8 %., i8* %arrayidx, align 1
78 %inc = add nsw i32 %i.08, 1
79 %exitcond = icmp eq i32 %i.08, 202
80 br i1 %exitcond, label %for.end, label %for.body
81
82 for.end: ; preds = %for.body
83 ret i32 0
84 }
85
86 !llvm.module.flags = !{!0}
87 !0 = !{i32 1, !"ProfileSummary", !1}
88 !1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
89 !2 = !{!"ProfileFormat", !"InstrProf"}
90 !3 = !{!"TotalCount", i64 10000}
91 !4 = !{!"MaxCount", i64 10}
92 !5 = !{!"MaxInternalCount", i64 1}
93 !6 = !{!"MaxFunctionCount", i64 1000}
94 !7 = !{!"NumCounts", i64 3}
95 !8 = !{!"NumFunctions", i64 3}
96 !9 = !{!"DetailedSummary", !10}
97 !10 = !{!11, !12, !13}
98 !11 = !{i32 10000, i64 100, i32 1}
99 !12 = !{i32 999000, i64 100, i32 1}
100 !13 = !{i32 999999, i64 1, i32 2}
101 !14 = !{!"function_entry_count", i64 0}