llvm.org GIT mirror llvm / aa61209
[EarlyCSE] Optionally use MemorySSA. NFC. Summary: Use MemorySSA, if requested, to do less conservative memory dependency checking. This change doesn't enable the MemorySSA enhanced EarlyCSE in the default pipelines, so should be NFC. Reviewers: dberlin, sanjoy, reames, majnemer Subscribers: mcrosier, llvm-commits Differential Revision: http://reviews.llvm.org/D19821 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@280279 91177308-0d34-0410-b5e6-96231b3b80d8 Geoff Berry 3 years ago
22 changed file(s) with 188 addition(s) and 24 deletion(s). Raw diff Collapse all Expand all
118118 void initializeDominatorTreeWrapperPassPass(PassRegistry&);
119119 void initializeDwarfEHPreparePass(PassRegistry&);
120120 void initializeEarlyCSELegacyPassPass(PassRegistry &);
121 void initializeEarlyCSEMemSSALegacyPassPass(PassRegistry &);
121122 void initializeEarlyIfConverterPass(PassRegistry&);
122123 void initializeEdgeBundlesPass(PassRegistry&);
123124 void initializeEfficiencySanitizerPass(PassRegistry&);
2626 /// cases so that instcombine and other passes are more effective. It is
2727 /// expected that a later pass of GVN will catch the interesting/hard cases.
2828 struct EarlyCSEPass : PassInfoMixin {
29 EarlyCSEPass(bool UseMemorySSA = false) : UseMemorySSA(UseMemorySSA) {}
30
2931 /// \brief Run the pass over the function.
3032 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
33
34 bool UseMemorySSA;
3135 };
3236
3337 }
321321 // EarlyCSE - This pass performs a simple and fast CSE pass over the dominator
322322 // tree.
323323 //
324 FunctionPass *createEarlyCSEPass();
324 FunctionPass *createEarlyCSEPass(bool UseMemorySSA = false);
325325
326326 //===----------------------------------------------------------------------===//
327327 //
132132 void LLVMAddCorrelatedValuePropagationPass(LLVMPassManagerRef PM);
133133
134134 /** See llvm::createEarlyCSEPass function */
135 void LLVMAddEarlyCSEPass(LLVMPassManagerRef PM);
135 void LLVMAddEarlyCSEPass(LLVMPassManagerRef PM, int UseMemorySSA);
136136
137137 /** See llvm::createLowerExpectIntrinsicPass function */
138138 void LLVMAddLowerExpectIntrinsicPass(LLVMPassManagerRef PM);
138138 FUNCTION_PASS("correlated-propagation", CorrelatedValuePropagationPass())
139139 FUNCTION_PASS("dce", DCEPass())
140140 FUNCTION_PASS("dse", DSEPass())
141 FUNCTION_PASS("early-cse", EarlyCSEPass())
141 FUNCTION_PASS("early-cse", EarlyCSEPass(/*UseMemorySSA=*/false))
142 FUNCTION_PASS("early-cse-memssa", EarlyCSEPass(/*UseMemorySSA=*/true))
142143 FUNCTION_PASS("gvn-hoist", GVNHoistPass())
143144 FUNCTION_PASS("instcombine", InstCombinePass())
144145 FUNCTION_PASS("instsimplify", InstSimplifierPass())
3131 #include "llvm/Support/raw_ostream.h"
3232 #include "llvm/Transforms/Scalar.h"
3333 #include "llvm/Transforms/Utils/Local.h"
34 #include "llvm/Transforms/Utils/MemorySSA.h"
3435 #include
3536 using namespace llvm;
3637 using namespace llvm::PatternMatch;
250251 const TargetTransformInfo &TTI;
251252 DominatorTree &DT;
252253 AssumptionCache ∾
254 MemorySSA *MSSA;
253255 typedef RecyclingAllocator<
254256 BumpPtrAllocator, ScopedHashTableVal> AllocatorTy;
255257 typedef ScopedHashTable,
311313
312314 /// \brief Set up the EarlyCSE runner for a particular function.
313315 EarlyCSE(const TargetLibraryInfo &TLI, const TargetTransformInfo &TTI,
314 DominatorTree &DT, AssumptionCache &AC)
315 : TLI(TLI), TTI(TTI), DT(DT), AC(AC), CurrentGeneration(0) {}
316 DominatorTree &DT, AssumptionCache &AC, MemorySSA *MSSA)
317 : TLI(TLI), TTI(TTI), DT(DT), AC(AC), MSSA(MSSA), CurrentGeneration(0) {}
316318
317319 bool run();
318320
486488 return TTI.getOrCreateResultFromMemIntrinsic(cast(Inst),
487489 ExpectedType);
488490 }
491
492 bool isSameMemGeneration(unsigned EarlierGeneration, unsigned LaterGeneration,
493 Instruction *EarlierInst, Instruction *LaterInst);
494
495 void removeMSSA(Instruction *Inst) {
496 if (!MSSA)
497 return;
498 // FIXME: Removing a store here can leave MemorySSA in an unoptimized state
499 // by creating MemoryPhis that have identical arguments and by creating
500 // MemoryUses whose defining access is not an actual clobber.
501 if (MemoryAccess *MA = MSSA->getMemoryAccess(Inst))
502 MSSA->removeMemoryAccess(MA);
503 }
489504 };
505 }
506
507 /// Determine if the memory referenced by LaterInst is from the same heap version
508 /// as EarlierInst.
509 /// This is currently called in two scenarios:
510 ///
511 /// load p
512 /// ...
513 /// load p
514 ///
515 /// and
516 ///
517 /// x = load p
518 /// ...
519 /// store x, p
520 ///
521 /// in both cases we want to verify that there are no possible writes to the
522 /// memory referenced by p between the earlier and later instruction.
523 bool EarlyCSE::isSameMemGeneration(unsigned EarlierGeneration,
524 unsigned LaterGeneration,
525 Instruction *EarlierInst,
526 Instruction *LaterInst) {
527 // Check the simple memory generation tracking first.
528 if (EarlierGeneration == LaterGeneration)
529 return true;
530
531 if (!MSSA)
532 return false;
533
534 // Since we know LaterDef dominates LaterInst and EarlierInst dominates
535 // LaterInst, if LaterDef dominates EarlierInst then it can't occur between
536 // EarlierInst and LaterInst and neither can any other write that potentially
537 // clobbers LaterInst.
538 // FIXME: This is currently fairly expensive since it does an AA check even
539 // for MemoryUses that were already optimized by MemorySSA construction.
540 // Re-visit once MemorySSA optimized use tracking change has been committed.
541 MemoryAccess *LaterDef =
542 MSSA->getWalker()->getClobberingMemoryAccess(LaterInst);
543 return MSSA->dominates(LaterDef, MSSA->getMemoryAccess(EarlierInst));
490544 }
491545
492546 bool EarlyCSE::processNode(DomTreeNode *Node) {
546600 // Dead instructions should just be removed.
547601 if (isInstructionTriviallyDead(Inst, &TLI)) {
548602 DEBUG(dbgs() << "EarlyCSE DCE: " << *Inst << '\n');
603 removeMSSA(Inst);
549604 Inst->eraseFromParent();
550605 Changed = true;
551606 ++NumSimplify;
600655 Changed = true;
601656 }
602657 if (isInstructionTriviallyDead(Inst, &TLI)) {
658 removeMSSA(Inst);
603659 Inst->eraseFromParent();
604660 Changed = true;
605661 Killed = true;
618674 if (auto *I = dyn_cast(V))
619675 I->andIRFlags(Inst);
620676 Inst->replaceAllUsesWith(V);
677 removeMSSA(Inst);
621678 Inst->eraseFromParent();
622679 Changed = true;
623680 ++NumCSE;
648705 // load.
649706 LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
650707 if (InVal.DefInst != nullptr &&
651 (InVal.Generation == CurrentGeneration ||
652 InVal.IsInvariant || MemInst.isInvariantLoad()) &&
653708 InVal.MatchingId == MemInst.getMatchingId() &&
654709 // We don't yet handle removing loads with ordering of any kind.
655710 !MemInst.isVolatile() && MemInst.isUnordered() &&
656711 // We can't replace an atomic load with one which isn't also atomic.
657 InVal.IsAtomic >= MemInst.isAtomic()) {
712 InVal.IsAtomic >= MemInst.isAtomic() &&
713 (InVal.IsInvariant || MemInst.isInvariantLoad() ||
714 isSameMemGeneration(InVal.Generation, CurrentGeneration,
715 InVal.DefInst, Inst))) {
658716 Value *Op = getOrCreateResult(InVal.DefInst, Inst->getType());
659717 if (Op != nullptr) {
660718 DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst
661719 << " to: " << *InVal.DefInst << '\n');
662720 if (!Inst->use_empty())
663721 Inst->replaceAllUsesWith(Op);
722 removeMSSA(Inst);
664723 Inst->eraseFromParent();
665724 Changed = true;
666725 ++NumCSELoad;
691750 // If we have an available version of this call, and if it is the right
692751 // generation, replace this instruction.
693752 std::pair InVal = AvailableCalls.lookup(Inst);
694 if (InVal.first != nullptr && InVal.second == CurrentGeneration) {
753 if (InVal.first != nullptr &&
754 isSameMemGeneration(InVal.second, CurrentGeneration, InVal.first,
755 Inst)) {
695756 DEBUG(dbgs() << "EarlyCSE CSE CALL: " << *Inst
696757 << " to: " << *InVal.first << '\n');
697758 if (!Inst->use_empty())
698759 Inst->replaceAllUsesWith(InVal.first);
760 removeMSSA(Inst);
699761 Inst->eraseFromParent();
700762 Changed = true;
701763 ++NumCSECall;
728790 LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
729791 if (InVal.DefInst &&
730792 InVal.DefInst == getOrCreateResult(Inst, InVal.DefInst->getType()) &&
731 InVal.Generation == CurrentGeneration &&
732793 InVal.MatchingId == MemInst.getMatchingId() &&
733794 // We don't yet handle removing stores with ordering of any kind.
734 !MemInst.isVolatile() && MemInst.isUnordered()) {
795 !MemInst.isVolatile() && MemInst.isUnordered() &&
796 isSameMemGeneration(InVal.Generation, CurrentGeneration,
797 InVal.DefInst, Inst)) {
798 // It is okay to have a LastStore to a different pointer here if MemorySSA
799 // tells us that the load and store are from the same memory generation.
800 // In that case, LastStore should keep its present value since we're
801 // removing the current store.
735802 assert((!LastStore ||
736803 ParseMemoryInst(LastStore, TTI).getPointerOperand() ==
737 MemInst.getPointerOperand()) &&
738 "can't have an intervening store!");
804 MemInst.getPointerOperand() ||
805 MSSA) &&
806 "can't have an intervening store if not using MemorySSA!");
739807 DEBUG(dbgs() << "EarlyCSE DSE (writeback): " << *Inst << '\n');
808 removeMSSA(Inst);
740809 Inst->eraseFromParent();
741810 Changed = true;
742811 ++NumDSE;
768837 if (LastStoreMemInst.isMatchingMemLoc(MemInst)) {
769838 DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore
770839 << " due to: " << *Inst << '\n');
840 removeMSSA(LastStore);
771841 LastStore->eraseFromParent();
772842 Changed = true;
773843 ++NumDSE;
864934 auto &TTI = AM.getResult(F);
865935 auto &DT = AM.getResult(F);
866936 auto &AC = AM.getResult(F);
867
868 EarlyCSE CSE(TLI, TTI, DT, AC);
937 auto *MSSA =
938 UseMemorySSA ? &AM.getResult(F).getMSSA() : nullptr;
939
940 EarlyCSE CSE(TLI, TTI, DT, AC, MSSA);
869941
870942 if (!CSE.run())
871943 return PreservedAnalyses::all();
875947 PreservedAnalyses PA;
876948 PA.preserve();
877949 PA.preserve();
950 if (UseMemorySSA)
951 PA.preserve();
878952 return PA;
879953 }
880954
886960 /// canonicalize things as it goes. It is intended to be fast and catch obvious
887961 /// cases so that instcombine and other passes are more effective. It is
888962 /// expected that a later pass of GVN will catch the interesting/hard cases.
889 class EarlyCSELegacyPass : public FunctionPass {
963 template
964 class EarlyCSELegacyCommonPass : public FunctionPass {
890965 public:
891966 static char ID;
892967
893 EarlyCSELegacyPass() : FunctionPass(ID) {
894 initializeEarlyCSELegacyPassPass(*PassRegistry::getPassRegistry());
968 EarlyCSELegacyCommonPass() : FunctionPass(ID) {
969 if (UseMemorySSA)
970 initializeEarlyCSEMemSSALegacyPassPass(*PassRegistry::getPassRegistry());
971 else
972 initializeEarlyCSELegacyPassPass(*PassRegistry::getPassRegistry());
895973 }
896974
897975 bool runOnFunction(Function &F) override {
902980 auto &TTI = getAnalysis().getTTI(F);
903981 auto &DT = getAnalysis().getDomTree();
904982 auto &AC = getAnalysis().getAssumptionCache(F);
905
906 EarlyCSE CSE(TLI, TTI, DT, AC);
983 auto *MSSA =
984 UseMemorySSA ? &getAnalysis().getMSSA() : nullptr;
985
986 EarlyCSE CSE(TLI, TTI, DT, AC, MSSA);
907987
908988 return CSE.run();
909989 }
913993 AU.addRequired();
914994 AU.addRequired();
915995 AU.addRequired();
996 if (UseMemorySSA) {
997 AU.addRequired();
998 AU.addPreserved();
999 }
9161000 AU.addPreserved();
9171001 AU.setPreservesCFG();
9181002 }
9191003 };
9201004 }
9211005
1006 using EarlyCSELegacyPass = EarlyCSELegacyCommonPass;
1007
1008 template<>
9221009 char EarlyCSELegacyPass::ID = 0;
923
924 FunctionPass *llvm::createEarlyCSEPass() { return new EarlyCSELegacyPass(); }
9251010
9261011 INITIALIZE_PASS_BEGIN(EarlyCSELegacyPass, "early-cse", "Early CSE", false,
9271012 false)
9301015 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
9311016 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
9321017 INITIALIZE_PASS_END(EarlyCSELegacyPass, "early-cse", "Early CSE", false, false)
1018
1019 using EarlyCSEMemSSALegacyPass =
1020 EarlyCSELegacyCommonPass;
1021
1022 template<>
1023 char EarlyCSEMemSSALegacyPass::ID = 0;
1024
1025 FunctionPass *llvm::createEarlyCSEPass(bool UseMemorySSA) {
1026 if (UseMemorySSA)
1027 return new EarlyCSEMemSSALegacyPass();
1028 else
1029 return new EarlyCSELegacyPass();
1030 }
1031
1032 INITIALIZE_PASS_BEGIN(EarlyCSEMemSSALegacyPass, "early-cse-memssa",
1033 "Early CSE w/ MemorySSA", false, false)
1034 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
1035 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
1036 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
1037 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
1038 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
1039 INITIALIZE_PASS_END(EarlyCSEMemSSALegacyPass, "early-cse-memssa",
1040 "Early CSE w/ MemorySSA", false, false)
4343 initializeGuardWideningLegacyPassPass(Registry);
4444 initializeGVNLegacyPassPass(Registry);
4545 initializeEarlyCSELegacyPassPass(Registry);
46 initializeEarlyCSEMemSSALegacyPassPass(Registry);
4647 initializeGVNHoistLegacyPassPass(Registry);
4748 initializeFlattenCFGPassPass(Registry);
4849 initializeInductiveRangeCheckEliminationPass(Registry);
232233 unwrap(PM)->add(createCorrelatedValuePropagationPass());
233234 }
234235
235 void LLVMAddEarlyCSEPass(LLVMPassManagerRef PM) {
236 unwrap(PM)->add(createEarlyCSEPass());
236 void LLVMAddEarlyCSEPass(LLVMPassManagerRef PM, int UseMemorySSA) {
237 unwrap(PM)->add(createEarlyCSEPass(UseMemorySSA));
237238 }
238239
239240 void LLVMAddGVNHoistLegacyPass(LLVMPassManagerRef PM) {
0 ; RUN: opt < %s -S -mtriple=aarch64-none-linux-gnu -mattr=+neon -early-cse | FileCheck %s
1 ; RUN: opt < %s -S -mtriple=aarch64-none-linux-gnu -mattr=+neon -basicaa -early-cse-memssa | FileCheck %s
12 ; RUN: opt < %s -S -mtriple=aarch64-none-linux-gnu -mattr=+neon -passes=early-cse | FileCheck %s
3 ; RUN: opt < %s -S -mtriple=aarch64-none-linux-gnu -mattr=+neon -aa-pipeline=basic-aa -passes=early-cse-memssa | FileCheck %s
24
35 define <4 x i32> @test_cse(i32* %a, [2 x <4 x i32>] %s.coerce, i32 %n) {
46 entry:
0 ; RUN: opt -S -early-cse < %s | FileCheck %s
1 ; RUN: opt -S -basicaa -early-cse-memssa < %s | FileCheck %s
12 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
23 target triple = "aarch64--linux-gnu"
34
0 ; RUN: opt < %s -S -early-cse | FileCheck %s
1 ; RUN: opt < %s -S -basicaa -early-cse-memssa | FileCheck %s
12
23 ; CHECK-LABEL: @test12(
34 define i32 @test12(i1 %B, i32* %P1, i32* %P2) {
0 ; RUN: opt < %s -S -early-cse | FileCheck %s
1 ; RUN: opt < %s -S -basicaa -early-cse-memssa | FileCheck %s
12 ; RUN: opt < %s -S -passes=early-cse | FileCheck %s
23
34 declare void @llvm.assume(i1) nounwind
0 ; RUN: opt < %s -S -early-cse | FileCheck %s
1 ; RUN: opt < %s -S -basicaa -early-cse-memssa | FileCheck %s
12
23 ; CHECK-LABEL: @test1(
34 define void @test1(float %A, float %B, float* %PA, float* %PB) {
0 ; RUN: opt -early-cse -S < %s | FileCheck %s
1 ; RUN: opt -basicaa -early-cse-memssa -S < %s | FileCheck %s
12
23 ; Can we CSE a known condition to a constant?
34 define i1 @test(i8* %p) {
0 ; RUN: opt -early-cse -S < %s | FileCheck %s
1 ; RUN: opt -basicaa -early-cse-memssa -S < %s | FileCheck %s
12 ; Same as GVN/edge.ll, but updated to reflect EarlyCSE's less powerful
23 ; implementation. EarlyCSE currently doesn't exploit equality comparisons
34 ; against constants.
0 ; RUN: opt -S -early-cse < %s | FileCheck %s
1 ; RUN: opt < %s -S -basicaa -early-cse-memssa | FileCheck %s
12 ; NOTE: This file is testing the current implementation. Some of
23 ; the transforms used as negative tests below would be legal, but
34 ; only if reached through a chain of logic which EarlyCSE is incapable
0 ; RUN: opt -early-cse -S < %s | FileCheck %s
1 ; RUN: opt -basicaa -early-cse-memssa -S < %s | FileCheck %s
12
23 declare void @use(i1)
34
0 ; RUN: opt < %s -S -early-cse | FileCheck %s
1 ; RUN: opt < %s -S -basicaa -early-cse-memssa | FileCheck %s
12
23 ; Ensure we don't simplify away additions vectors of +0.0's (same as scalars).
34 define <4 x float> @fV( <4 x float> %a) {
0 ; RUN: opt -S -early-cse < %s | FileCheck %s
1 ; RUN: opt < %s -S -basicaa -early-cse-memssa | FileCheck %s
12
23 declare void @llvm.experimental.guard(i1,...)
34
0 ; RUN: opt -early-cse -S < %s | FileCheck %s
1 ; RUN: opt -basicaa -early-cse-memssa -S < %s | FileCheck %s
12 ; PR12231
23
34 declare i32 @f()
0 ; RUN: opt -S -early-cse < %s | FileCheck %s
1 ; RUN: opt -S -basicaa -early-cse-memssa < %s | FileCheck %s
12
23 declare void @clobber_and_use(i32)
34
0 ; RUN: opt < %s -S -early-cse | FileCheck %s --check-prefix=CHECK-NOMEMSSA
1 ; RUN: opt < %s -S -basicaa -early-cse-memssa | FileCheck %s
2 ; RUN: opt < %s -S -passes='early-cse' | FileCheck %s --check-prefix=CHECK-NOMEMSSA
3 ; RUN: opt < %s -S -aa-pipeline=basic-aa -passes='early-cse-memssa' | FileCheck %s
4
5 @G1 = global i32 zeroinitializer
6 @G2 = global i32 zeroinitializer
7
8 ;; Simple load value numbering across non-clobbering store.
9 ; CHECK-LABEL: @test1(
10 ; CHECK-NOMEMSSA-LABEL: @test1(
11 define i32 @test1() {
12 %V1 = load i32, i32* @G1
13 store i32 0, i32* @G2
14 %V2 = load i32, i32* @G1
15 ; CHECK-NOMEMSSA: sub i32 %V1, %V2
16 %Diff = sub i32 %V1, %V2
17 ret i32 %Diff
18 ; CHECK: ret i32 0
19 }
20
21 ;; Simple dead store elimination across non-clobbering store.
22 ; CHECK-LABEL: @test2(
23 ; CHECK-NOMEMSSA-LABEL: @test2(
24 define void @test2() {
25 entry:
26 %V1 = load i32, i32* @G1
27 ; CHECK: store i32 0, i32* @G2
28 store i32 0, i32* @G2
29 ; CHECK-NOT: store
30 ; CHECK-NOMEMSSA: store i32 %V1, i32* @G1
31 store i32 %V1, i32* @G1
32 ret void
33 }
0 ; RUN: opt -S -early-cse < %s | FileCheck %s
1 ; RUN: opt -S -basicaa -early-cse-memssa < %s | FileCheck %s
12 target datalayout = "E-m:e-i64:64-n32:64"
23 target triple = "powerpc64-unknown-linux-gnu"
34