llvm.org GIT mirror llvm / 51d3739
[PGOMemOPSize] Preserve the DominatorTree Summary: PGOMemOPSize only modifies CFG in a couple of places; thus we can preserve the DominatorTree with little effort. When optimizing SQLite with -O3, this patch can decrease 3.8% of the numbers of nodes traversed by DFS and 5.7% of the times DominatorTreeBase::recalculation is called. Reviewers: kuhar, davide, dmgreen Reviewed By: dmgreen Subscribers: mzolotukhin, vsk, llvm-commits Differential Revision: https://reviews.llvm.org/D48914 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@336522 91177308-0d34-0410-b5e6-96231b3b80d8 Chijun Sima 1 year, 2 months ago
6 changed file(s) with 36 addition(s) and 17 deletion(s). Raw diff Collapse all Expand all
2424 #include "llvm/IR/BasicBlock.h"
2525 #include "llvm/IR/CallSite.h"
2626 #include "llvm/IR/DerivedTypes.h"
27 #include "llvm/IR/DomTreeUpdater.h"
28 #include "llvm/IR/Dominators.h"
2729 #include "llvm/IR/Function.h"
2830 #include "llvm/IR/IRBuilder.h"
2931 #include "llvm/IR/InstVisitor.h"
111113 AU.addRequired();
112114 AU.addRequired();
113115 AU.addPreserved();
116 AU.addPreserved();
114117 }
115118 };
116119 } // end anonymous namespace
132135 class MemOPSizeOpt : public InstVisitor {
133136 public:
134137 MemOPSizeOpt(Function &Func, BlockFrequencyInfo &BFI,
135 OptimizationRemarkEmitter &ORE)
136 : Func(Func), BFI(BFI), ORE(ORE), Changed(false) {
138 OptimizationRemarkEmitter &ORE, DominatorTree *DT)
139 : Func(Func), BFI(BFI), ORE(ORE), DT(DT), Changed(false) {
137140 ValueDataArray =
138141 llvm::make_unique(MemOPMaxVersion + 2);
139142 // Get the MemOPSize range information from option MemOPSizeRange,
169172 Function &Func;
170173 BlockFrequencyInfo &BFI;
171174 OptimizationRemarkEmitter &ORE;
175 DominatorTree *DT;
172176 bool Changed;
173177 std::vector WorkList;
174178 // Start of the previse range.
335339 LLVM_DEBUG(dbgs() << *BB << "\n");
336340 auto OrigBBFreq = BFI.getBlockFreq(BB);
337341
338 BasicBlock *DefaultBB = SplitBlock(BB, MI);
342 BasicBlock *DefaultBB = SplitBlock(BB, MI, DT);
339343 BasicBlock::iterator It(*MI);
340344 ++It;
341345 assert(It != DefaultBB->end());
342 BasicBlock *MergeBB = SplitBlock(DefaultBB, &(*It));
346 BasicBlock *MergeBB = SplitBlock(DefaultBB, &(*It), DT);
343347 MergeBB->setName("MemOP.Merge");
344348 BFI.setBlockFreq(MergeBB, OrigBBFreq.getFrequency());
345349 DefaultBB->setName("MemOP.Default");
346350
351 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
347352 auto &Ctx = Func.getContext();
348353 IRBuilder<> IRB(BB);
349354 BB->getTerminator()->eraseFromParent();
359364 SavedRemainCount, IPVK_MemOPSize, NumVals);
360365
361366 LLVM_DEBUG(dbgs() << "\n\n== Basic Block After==\n");
367
368 std::vector Updates;
369 if (DT)
370 Updates.reserve(2 * SizeIds.size());
362371
363372 for (uint64_t SizeId : SizeIds) {
364373 BasicBlock *CaseBB = BasicBlock::Create(
374383 IRBuilder<> IRBCase(CaseBB);
375384 IRBCase.CreateBr(MergeBB);
376385 SI->addCase(CaseSizeId, CaseBB);
386 if (DT) {
387 Updates.push_back({DominatorTree::Insert, CaseBB, MergeBB});
388 Updates.push_back({DominatorTree::Insert, BB, CaseBB});
389 }
377390 LLVM_DEBUG(dbgs() << *CaseBB << "\n");
378391 }
392 DTU.applyUpdates(Updates);
393 Updates.clear();
394
379395 setProfMetadata(Func.getParent(), SI, CaseCounts, MaxCount);
380396
381397 LLVM_DEBUG(dbgs() << *BB << "\n");
396412 } // namespace
397413
398414 static bool PGOMemOPSizeOptImpl(Function &F, BlockFrequencyInfo &BFI,
399 OptimizationRemarkEmitter &ORE) {
415 OptimizationRemarkEmitter &ORE,
416 DominatorTree *DT) {
400417 if (DisableMemOPOPT)
401418 return false;
402419
403420 if (F.hasFnAttribute(Attribute::OptimizeForSize))
404421 return false;
405 MemOPSizeOpt MemOPSizeOpt(F, BFI, ORE);
422 MemOPSizeOpt MemOPSizeOpt(F, BFI, ORE, DT);
406423 MemOPSizeOpt.perform();
407424 return MemOPSizeOpt.isChanged();
408425 }
411428 BlockFrequencyInfo &BFI =
412429 getAnalysis().getBFI();
413430 auto &ORE = getAnalysis().getORE();
414 return PGOMemOPSizeOptImpl(F, BFI, ORE);
431 auto *DTWP = getAnalysisIfAvailable();
432 DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
433 return PGOMemOPSizeOptImpl(F, BFI, ORE, DT);
415434 }
416435
417436 namespace llvm {
421440 FunctionAnalysisManager &FAM) {
422441 auto &BFI = FAM.getResult(F);
423442 auto &ORE = FAM.getResult(F);
424 bool Changed = PGOMemOPSizeOptImpl(F, BFI, ORE);
443 auto *DT = FAM.getCachedResult(F);
444 bool Changed = PGOMemOPSizeOptImpl(F, BFI, ORE, DT);
425445 if (!Changed)
426446 return PreservedAnalyses::all();
427447 auto PA = PreservedAnalyses();
428448 PA.preserve();
449 PA.preserve();
429450 return PA;
430451 }
431452 } // namespace llvm
7979 ; CHECK-NEXT: Lazy Block Frequency Analysis
8080 ; CHECK-NEXT: Optimization Remark Emitter
8181 ; CHECK-NEXT: PGOMemOPSize
82 ; CHECK-NEXT: Dominator Tree Construction
8382 ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
8483 ; CHECK-NEXT: Function Alias Analysis Results
8584 ; CHECK-NEXT: Natural Loop Information
8383 ; CHECK-NEXT: Lazy Block Frequency Analysis
8484 ; CHECK-NEXT: Optimization Remark Emitter
8585 ; CHECK-NEXT: PGOMemOPSize
86 ; CHECK-NEXT: Dominator Tree Construction
8786 ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
8887 ; CHECK-NEXT: Function Alias Analysis Results
8988 ; CHECK-NEXT: Natural Loop Information
None ; RUN: opt < %s -pgo-memop-opt -S | FileCheck %s
0 ; RUN: opt < %s -pgo-memop-opt -verify-dom-info -S | FileCheck %s
11
22 define i32 @test(i8* %a, i8* %b) !prof !1 {
33 ; CHECK_LABEL: test
None ; RUN: opt < %s -pgo-memop-opt -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -S | FileCheck %s --check-prefix=MEMOP_OPT
1 ; RUN: opt < %s -passes=pgo-memop-opt -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -S | FileCheck %s --check-prefix=MEMOP_OPT
2 ; RUN: opt < %s -pgo-memop-opt -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -pass-remarks-with-hotness -pass-remarks-output=%t.opt.yaml -S | FileCheck %s --check-prefix=MEMOP_OPT
0 ; RUN: opt < %s -pgo-memop-opt -verify-dom-info -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -S | FileCheck %s --check-prefix=MEMOP_OPT
1 ; RUN: opt < %s -passes=pgo-memop-opt -verify-dom-info -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -S | FileCheck %s --check-prefix=MEMOP_OPT
2 ; RUN: opt < %s -pgo-memop-opt -verify-dom-info -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -pass-remarks-with-hotness -pass-remarks-output=%t.opt.yaml -S | FileCheck %s --check-prefix=MEMOP_OPT
33 ; RUN: FileCheck %s -input-file=%t.opt.yaml --check-prefix=YAML
4 ; RUN: opt < %s -passes=pgo-memop-opt -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -pass-remarks-with-hotness -pass-remarks-output=%t.opt.yaml -S | FileCheck %s --check-prefix=MEMOP_OPT
4 ; RUN: opt < %s -passes=pgo-memop-opt -verify-dom-info -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -pass-remarks-with-hotness -pass-remarks-output=%t.opt.yaml -S | FileCheck %s --check-prefix=MEMOP_OPT
55 ; RUN: FileCheck %s -input-file=%t.opt.yaml --check-prefix=YAML
66
77
0 ; Test to ensure the pgo memop optimization pass doesn't try to scale
11 ; up a value profile with a 0 count, which would lead to divide by 0.
2 ; RUN: opt < %s -passes=pgo-memop-opt -pgo-memop-count-threshold=1 -S | FileCheck %s --check-prefix=MEMOP_OPT
3 ; RUN: opt < %s -pgo-memop-opt -pgo-memop-count-threshold=1 -S | FileCheck %s --check-prefix=MEMOP_OPT
2 ; RUN: opt < %s -passes=pgo-memop-opt -verify-dom-info -pgo-memop-count-threshold=1 -S | FileCheck %s --check-prefix=MEMOP_OPT
3 ; RUN: opt < %s -pgo-memop-opt -verify-dom-info -pgo-memop-count-threshold=1 -S | FileCheck %s --check-prefix=MEMOP_OPT
44
55 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
66 target triple = "x86_64-unknown-linux-gnu"