llvm.org GIT mirror llvm / 23c0f3b
Revert r255247, r255265, and r255286 due to serious compile-time regressions. Revert "[DSE] Disable non-local DSE to see if the bots go green." Revert "[DeadStoreElimination] Use range-based loops. NFC." Revert "[DeadStoreElimination] Add support for non-local DSE." git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@255354 91177308-0d34-0410-b5e6-96231b3b80d8 Chad Rosier 3 years ago
6 changed file(s) with 96 addition(s) and 396 deletion(s). Raw diff Collapse all Expand all
66 //
77 //===----------------------------------------------------------------------===//
88 //
9 // This file implements dead store elimination that considers redundant stores
10 // within a basic-block as well as across basic blocks in a reverse CFG order.
9 // This file implements a trivial dead store elimination that only considers
10 // basic-block local redundant stores.
11 //
12 // FIXME: This should eventually be extended to be a post-dominator tree
13 // traversal. Doing so would be pretty trivial.
1114 //
1215 //===----------------------------------------------------------------------===//
1316
4043 STATISTIC(NumRedundantStores, "Number of redundant stores deleted");
4144 STATISTIC(NumFastStores, "Number of stores deleted");
4245 STATISTIC(NumFastOther , "Number of other instrs removed");
43 STATISTIC(NumNonLocalStores, "Number of non-local stores deleted");
44
45 static cl::opt EnableNonLocalDSE("enable-nonlocal-dse", cl::init(false));
46
47 /// MaxNonLocalAttempts is an arbitrary threshold that provides
48 /// an early opportunitiy for bail out to control compile time.
49 static const unsigned MaxNonLocalAttempts = 100;
5046
5147 namespace {
5248 struct DSE : public FunctionPass {
8379 bool runOnBasicBlock(BasicBlock &BB);
8480 bool MemoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI);
8581 bool HandleFree(CallInst *F);
86 bool handleNonLocalDependency(Instruction *Inst);
8782 bool handleEndBlock(BasicBlock &BB);
8883 void RemoveAccessedObjects(const MemoryLocation &LoadedLoc,
8984 SmallSetVector &DeadStackObjects,
489484 bool DSE::runOnBasicBlock(BasicBlock &BB) {
490485 const DataLayout &DL = BB.getModule()->getDataLayout();
491486 bool MadeChange = false;
492 unsigned NumNonLocalAttempts = 0;
493487
494488 // Do a top-down walk on the BB.
495489 for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) {
559553
560554 MemDepResult InstDep = MD->getDependency(Inst);
561555
562 if (InstDep.isDef() || InstDep.isClobber()) {
563 // Figure out what location is being stored to.
564 MemoryLocation Loc = getLocForWrite(Inst, *AA);
565
566 // If we didn't get a useful location, fail.
567 if (!Loc.Ptr)
568 continue;
569
570 while (InstDep.isDef() || InstDep.isClobber()) {
571 // Get the memory clobbered by the instruction we depend on. MemDep
572 // will skip any instructions that 'Loc' clearly doesn't interact with.
573 // If we end up depending on a may- or must-aliased load, then we can't
574 // optimize away the store and we bail out. However, if we depend on on
575 // something that overwrites the memory location we *can* potentially
576 // optimize it.
577 //
578 // Find out what memory location the dependent instruction stores.
579 Instruction *DepWrite = InstDep.getInst();
580 MemoryLocation DepLoc = getLocForWrite(DepWrite, *AA);
581 // If we didn't get a useful location, or if it isn't a size, bail out.
582 if (!DepLoc.Ptr)
556 // Ignore any store where we can't find a local dependence.
557 // FIXME: cross-block DSE would be fun. :)
558 if (!InstDep.isDef() && !InstDep.isClobber())
559 continue;
560
561 // Figure out what location is being stored to.
562 MemoryLocation Loc = getLocForWrite(Inst, *AA);
563
564 // If we didn't get a useful location, fail.
565 if (!Loc.Ptr)
566 continue;
567
568 while (InstDep.isDef() || InstDep.isClobber()) {
569 // Get the memory clobbered by the instruction we depend on. MemDep will
570 // skip any instructions that 'Loc' clearly doesn't interact with. If we
571 // end up depending on a may- or must-aliased load, then we can't optimize
572 // away the store and we bail out. However, if we depend on on something
573 // that overwrites the memory location we *can* potentially optimize it.
574 //
575 // Find out what memory location the dependent instruction stores.
576 Instruction *DepWrite = InstDep.getInst();
577 MemoryLocation DepLoc = getLocForWrite(DepWrite, *AA);
578 // If we didn't get a useful location, or if it isn't a size, bail out.
579 if (!DepLoc.Ptr)
580 break;
581
582 // If we find a write that is a) removable (i.e., non-volatile), b) is
583 // completely obliterated by the store to 'Loc', and c) which we know that
584 // 'Inst' doesn't load from, then we can remove it.
585 if (isRemovable(DepWrite) &&
586 !isPossibleSelfRead(Inst, Loc, DepWrite, *TLI, *AA)) {
587 int64_t InstWriteOffset, DepWriteOffset;
588 OverwriteResult OR =
589 isOverwrite(Loc, DepLoc, DL, *TLI, DepWriteOffset, InstWriteOffset);
590 if (OR == OverwriteComplete) {
591 DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
592 << *DepWrite << "\n KILLER: " << *Inst << '\n');
593
594 // Delete the store and now-dead instructions that feed it.
595 DeleteDeadInstruction(DepWrite, *MD, *TLI);
596 ++NumFastStores;
597 MadeChange = true;
598
599 // DeleteDeadInstruction can delete the current instruction in loop
600 // cases, reset BBI.
601 BBI = Inst->getIterator();
602 if (BBI != BB.begin())
603 --BBI;
583604 break;
584
585 // If we find a write that is a) removable (i.e., non-volatile), b) is
586 // completely obliterated by the store to 'Loc', and c) which we know
587 // that 'Inst' doesn't load from, then we can remove it.
588 if (isRemovable(DepWrite) &&
589 !isPossibleSelfRead(Inst, Loc, DepWrite, *TLI, *AA)) {
590 int64_t InstWriteOffset, DepWriteOffset;
591 OverwriteResult OR = isOverwrite(Loc, DepLoc, DL, *TLI,
592 DepWriteOffset, InstWriteOffset);
593 if (OR == OverwriteComplete) {
594 DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DepWrite
595 << "\n KILLER: " << *Inst << '\n');
596
597 // Delete the store and now-dead instructions that feed it.
598 DeleteDeadInstruction(DepWrite, *MD, *TLI);
599 ++NumFastStores;
605 } else if (OR == OverwriteEnd && isShortenable(DepWrite)) {
606 // TODO: base this on the target vector size so that if the earlier
607 // store was too small to get vector writes anyway then its likely
608 // a good idea to shorten it
609 // Power of 2 vector writes are probably always a bad idea to optimize
610 // as any store/memset/memcpy is likely using vector instructions so
611 // shortening it to not vector size is likely to be slower
612 MemIntrinsic* DepIntrinsic = cast(DepWrite);
613 unsigned DepWriteAlign = DepIntrinsic->getAlignment();
614 if (llvm::isPowerOf2_64(InstWriteOffset) ||
615 ((DepWriteAlign != 0) && InstWriteOffset % DepWriteAlign == 0)) {
616
617 DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW END: "
618 << *DepWrite << "\n KILLER (offset "
619 << InstWriteOffset << ", "
620 << DepLoc.Size << ")"
621 << *Inst << '\n');
622
623 Value* DepWriteLength = DepIntrinsic->getLength();
624 Value* TrimmedLength = ConstantInt::get(DepWriteLength->getType(),
625 InstWriteOffset -
626 DepWriteOffset);
627 DepIntrinsic->setLength(TrimmedLength);
600628 MadeChange = true;
601
602 // DeleteDeadInstruction can delete the current instruction in loop
603 // cases, reset BBI.
604 BBI = Inst->getIterator();
605 if (BBI != BB.begin())
606 --BBI;
607 break;
608 } else if (OR == OverwriteEnd && isShortenable(DepWrite)) {
609 // TODO: base this on the target vector size so that if the earlier
610 // store was too small to get vector writes anyway then its likely a
611 // good idea to shorten it.
612
613 // Power of 2 vector writes are probably always a bad idea to
614 // optimize as any store/memset/memcpy is likely using vector
615 // instructions so shortening it to not vector size is likely to be
616 // slower.
617 MemIntrinsic *DepIntrinsic = cast(DepWrite);
618 unsigned DepWriteAlign = DepIntrinsic->getAlignment();
619 if (llvm::isPowerOf2_64(InstWriteOffset) ||
620 ((DepWriteAlign != 0) &&
621 InstWriteOffset % DepWriteAlign == 0)) {
622
623 DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW END: " << *DepWrite
624 << "\n KILLER (offset " << InstWriteOffset << ", "
625 << DepLoc.Size << ")" << *Inst << '\n');
626
627 Value *DepWriteLength = DepIntrinsic->getLength();
628 Value *TrimmedLength = ConstantInt::get(
629 DepWriteLength->getType(), InstWriteOffset - DepWriteOffset);
630 DepIntrinsic->setLength(TrimmedLength);
631 MadeChange = true;
632 }
633629 }
634630 }
635
636 // If this is a may-aliased store that is clobbering the store value, we
637 // can keep searching past it for another must-aliased pointer that
638 // stores to the same location. For example, in
639 // store -> P
640 // store -> Q
641 // store -> P
642 // we can remove the first store to P even though we don't know if P and
643 // Q alias.
644 if (DepWrite == &BB.front())
645 break;
646
647 // Can't look past this instruction if it might read 'Loc'.
648 if (AA->getModRefInfo(DepWrite, Loc) & MRI_Ref)
649 break;
650
651 InstDep = MD->getPointerDependencyFrom(Loc, false,
652 DepWrite->getIterator(), &BB);
653631 }
654 } else if (EnableNonLocalDSE && InstDep.isNonLocal()) { // DSE across BB
655 if (++NumNonLocalAttempts < MaxNonLocalAttempts)
656 MadeChange |= handleNonLocalDependency(Inst);
632
633 // If this is a may-aliased store that is clobbering the store value, we
634 // can keep searching past it for another must-aliased pointer that stores
635 // to the same location. For example, in:
636 // store -> P
637 // store -> Q
638 // store -> P
639 // we can remove the first store to P even though we don't know if P and Q
640 // alias.
641 if (DepWrite == &BB.front()) break;
642
643 // Can't look past this instruction if it might read 'Loc'.
644 if (AA->getModRefInfo(DepWrite, Loc) & MRI_Ref)
645 break;
646
647 InstDep = MD->getPointerDependencyFrom(Loc, false,
648 DepWrite->getIterator(), &BB);
657649 }
658650 }
659651
661653 // dead at its end, which means stores to them are also dead.
662654 if (BB.getTerminator()->getNumSuccessors() == 0)
663655 MadeChange |= handleEndBlock(BB);
664
665 return MadeChange;
666 }
667
668 /// A helper for handleNonLocalDependency() function to find all blocks
669 /// that lead to the input block BB and append them to the output PredBlocks.
670 /// PredBlocks will include not only predecessors of BB that unconditionally
671 /// lead to BB but also:
672 /// - single-block loops that lead to BB, and
673 /// - if-blocks for which one edge goes to BB and the other edge goes to
674 /// a block in the input SafeBlocks.
675 /// PredBlocks will not include blocks unreachable from the entry block, nor
676 /// blocks that form cycles with BB.
677 static void findSafePreds(SmallVectorImpl &PredBlocks,
678 SmallSetVector &SafeBlocks,
679 BasicBlock *BB, DominatorTree *DT) {
680 for (auto *Pred : predecessors(BB)) {
681 if (Pred == BB)
682 continue;
683 // The second check below prevents adding blocks that form a cycle with BB
684 // in order to avoid potential problems due to MemoryDependenceAnalysis,
685 // isOverwrite, etc. being not loop-aware.
686 if (!DT->isReachableFromEntry(Pred) || DT->dominates(BB, Pred))
687 continue;
688
689 bool PredIsSafe = true;
690 for (auto *Succ : successors(Pred)) {
691 if (Succ == BB || Succ == Pred) // shortcut, BB should be in SafeBlocks
692 continue;
693 if (!SafeBlocks.count(Succ)) {
694 PredIsSafe = false;
695 break;
696 }
697 }
698 if (PredIsSafe)
699 PredBlocks.push_back(Pred);
700 }
701 }
702
703 static bool underlyingObjectsDoNotAlias(StoreInst *SI, LoadInst *LI,
704 const DataLayout &DL,
705 AliasAnalysis &AA) {
706 Value *AObj = GetUnderlyingObject(SI->getPointerOperand(), DL);
707 SmallVector Pointers;
708 GetUnderlyingObjects(LI->getPointerOperand(), Pointers, DL);
709
710 for (auto *BObj : Pointers) {
711 if (!AA.isNoAlias(AObj, DL.getTypeStoreSize(AObj->getType()), BObj,
712 DL.getTypeStoreSize(BObj->getType())))
713 return false;
714 }
715 return true;
716 }
717
718 /// handleNonLocalDependency - Handle a non-local dependency on
719 /// the input instruction Inst located in BB in attempt to remove
720 /// redundant stores outside BB.
721 bool DSE::handleNonLocalDependency(Instruction *Inst) {
722 auto *SI = dyn_cast(Inst);
723 if (!SI)
724 return false;
725 // Get the location being stored to.
726 // If we don't get a useful location, bail out.
727 MemoryLocation Loc = getLocForWrite(SI, *AA);
728 if (!Loc.Ptr)
729 return false;
730
731 bool MadeChange = false;
732 BasicBlock *BB = Inst->getParent();
733 const DataLayout &DL = BB->getModule()->getDataLayout();
734
735 // Worklist of predecessor blocks of BB
736 SmallVector Blocks;
737 // Keep track of all predecessor blocks that are safe to search through
738 SmallSetVector SafeBlocks;
739 SafeBlocks.insert(BB);
740 findSafePreds(Blocks, SafeBlocks, BB, DT);
741
742 while (!Blocks.empty()) {
743 BasicBlock *PB = Blocks.pop_back_val();
744 MemDepResult Dep =
745 MD->getPointerDependencyFrom(Loc, false, PB->end(), PB, SI);
746 while (Dep.isDef() || Dep.isClobber()) {
747 Instruction *Dependency = Dep.getInst();
748
749 // Filter out false dependency from a load to SI looking through phis.
750 if (auto *LI = dyn_cast(Dependency)) {
751 if (underlyingObjectsDoNotAlias(SI, LI, DL, *AA)) {
752 Dep = MD->getPointerDependencyFrom(Loc, false,
753 Dependency->getIterator(), PB, SI);
754 continue;
755 }
756 }
757
758 // If we don't get a useful location for the dependent instruction,
759 // it doesn't write memory, it is not removable, or it might read Loc,
760 // then bail out.
761 MemoryLocation DepLoc = getLocForWrite(Dependency, *AA);
762 if (!DepLoc.Ptr || !hasMemoryWrite(Dependency, *TLI) ||
763 !isRemovable(Dependency) ||
764 (AA->getModRefInfo(Dependency, Loc) & MRI_Ref))
765 break;
766
767 // Don't remove a store within single-block loops;
768 // we need more analysis: e.g. looking for an interferring load
769 // above the store within the loop, etc.
770 bool SingleBlockLoop = false;
771 for (auto I = succ_begin(PB), E = succ_end(PB); I != E; ++I) {
772 BasicBlock *Succ = *I;
773 if (Succ == PB) {
774 SingleBlockLoop = true;
775 break;
776 }
777 }
778 if (SingleBlockLoop)
779 break;
780
781 int64_t InstWriteOffset, DepWriteOffset;
782 OverwriteResult OR =
783 isOverwrite(Loc, DepLoc, DL, *TLI, DepWriteOffset, InstWriteOffset);
784 if (OR == OverwriteComplete) {
785 DEBUG(dbgs() << "DSE: Remove Non-Local Dead Store:\n DEAD: "
786 << *Dependency << "\n KILLER: " << *SI << '\n');
787
788 // Delete redundant store and now-dead instructions that feed it.
789 auto Next = std::next(Dependency->getIterator());
790 DeleteDeadInstruction(Dependency, *MD, *TLI);
791 ++NumNonLocalStores;
792 MadeChange = true;
793 Dep = MD->getPointerDependencyFrom(Loc, false, Next, PB, SI);
794 continue;
795 }
796 // TODO: attempt shortening of Dependency inst as in the local case
797 break;
798 }
799
800 if (Dep.isNonLocal()) {
801 SafeBlocks.insert(PB);
802 findSafePreds(Blocks, SafeBlocks, PB, DT);
803 }
804 }
805656
806657 return MadeChange;
807658 }
854705 if (B != FirstBB) {
855706 assert(B != &FirstBB->getParent()->getEntryBlock() &&
856707 "Should not hit the entry block because SI must be dominated by LI");
857 for (auto *PredI : predecessors(B)) {
858 if (!Visited.insert(PredI).second)
708 for (auto PredI = pred_begin(B), PE = pred_end(B); PredI != PE; ++PredI) {
709 if (!Visited.insert(*PredI).second)
859710 continue;
860 WorkList.push_back(PredI);
711 WorkList.push_back(*PredI);
861712 }
862713 }
863714 }
+0
-26
test/Transforms/DeadStoreElimination/cycle.ll less more
None ; RUN: opt < %s -basicaa -dse -S | FileCheck %s
1
2 @Table = global [535 x i32] zeroinitializer, align 4
3
4 ; The store in for.inc block should NOT be removed by non-local DSE.
5 ; CHECK: store i32 64, i32* %arrayidx
6 ;
7 define void @foo() {
8 entry:
9 br label %for.body
10
11 for.body:
12 %i = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
13 %arrayidx = getelementptr inbounds [535 x i32], [535 x i32]* @Table, i32 0, i32 %i
14 store i32 %i, i32* %arrayidx, align 4
15 %cmp1 = icmp slt i32 %i, 64
16 br i1 %cmp1, label %for.inc, label %for.end
17
18 for.inc:
19 store i32 64, i32* %arrayidx, align 4
20 %inc = add nsw i32 %i, 1
21 br label %for.body
22
23 for.end:
24 ret void
25 }
+0
-22
test/Transforms/DeadStoreElimination/ifthen.ll less more
None ; RUN: opt < %s -basicaa -dse -enable-nonlocal-dse -S | FileCheck %s
1
2 ; The store and add in if.then block should be removed by non-local DSE.
3 ; CHECK-NOT: %stval = add
4 ; CHECK-NOT: store i32 %stval
5 ;
6 define void @foo(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32 %c) {
7 entry:
8 %cmp = icmp sgt i32 %c, 0
9 br i1 %cmp, label %if.then, label %if.end
10
11 if.then:
12 %0 = load i32, i32* %b, align 4
13 %stval = add nsw i32 %0, 1
14 store i32 %stval, i32* %a, align 4
15 br label %if.end
16
17 if.end:
18 %m.0 = phi i32 [ 13, %if.then ], [ 10, %entry ]
19 store i32 %m.0, i32* %a, align 4
20 ret void
21 }
+0
-27
test/Transforms/DeadStoreElimination/ifthenelse.ll less more
None ; RUN: opt < %s -basicaa -dse -enable-nonlocal-dse -S | FileCheck %s
1
2 ; The add and store in entry block should be removed by non-local DSE.
3 ; CHECK-NOT: %stval = add
4 ; CHECK-NOT: store i32 %stval
5 ;
6 define void @foo(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32 %c) {
7 entry:
8 %0 = load i32, i32* %b, align 4
9 %stval = add nsw i32 %0, 1
10 store i32 %stval, i32* %a, align 4
11 %cmp = icmp sgt i32 %c, 0
12 br i1 %cmp, label %if.then, label %if.else
13
14 if.then:
15 %1 = add nsw i32 %c, 10
16 br label %if.end
17
18 if.else:
19 %2 = add nsw i32 %c, 13
20 br label %if.end
21
22 if.end:
23 %3 = phi i32 [ %1, %if.then ], [ %2, %if.else ]
24 store i32 %3, i32* %a, align 4
25 ret void
26 }
+0
-34
test/Transforms/DeadStoreElimination/ifthenelse2.ll less more
None ; RUN: opt < %s -basicaa -dse -enable-nonlocal-dse -S | FileCheck %s
1
2 ; The add and store in entry block should be removed by non-local DSE.
3 ; CHECK-NOT: %stval = add
4 ; CHECK-NOT: store i32 %stval
5 ;
6 ; The stores in if.then and if.else blocks should be removed by non-local DSE.
7 ; CHECK-NOT: store i32 %1
8 ; CHECK-NOT: store i32 %2
9 ;
10 define void @foo(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32 %c) {
11 entry:
12 %0 = load i32, i32* %b, align 4
13 %stval = add nsw i32 %0, 1
14 store i32 %stval, i32* %a, align 4
15 %cmp = icmp sgt i32 %c, 0
16 br i1 %cmp, label %if.then, label %if.else
17
18 if.then:
19 %1 = add nsw i32 %c, 10
20 store i32 %1, i32* %a, align 4
21 br label %if.end
22
23 if.else:
24 %2 = add nsw i32 %c, 13
25 store i32 %2, i32* %a, align 4
26 br label %if.end
27
28 if.end:
29 %3 = phi i32 [ %1, %if.then ], [ %2, %if.else ]
30 %4 = sub nsw i32 %3, 6
31 store i32 %4, i32* %a, align 4
32 ret void
33 }
+0
-42
test/Transforms/DeadStoreElimination/loop.ll less more
None ; RUN: opt < %s -basicaa -dse -enable-nonlocal-dse -S | FileCheck %s
1
2 ; The store in for.body block should be removed by non-local DSE.
3 ; CHECK-NOT: store i32 0, i32* %arrayidx
4 ;
5 define void @sum(i32 %N, i32* noalias nocapture %C, i32* noalias nocapture readonly %A, i32* noalias nocapture readonly %B) {
6 entry:
7 %cmp24 = icmp eq i32 %N, 0
8 br i1 %cmp24, label %for.end11, label %for.body
9
10 for.body:
11 %i.025 = phi i32 [ %inc10, %for.cond1.for.inc9_crit_edge ], [ 0, %entry ]
12 %arrayidx = getelementptr inbounds i32, i32* %C, i32 %i.025
13 store i32 0, i32* %arrayidx, align 4
14 %mul = mul i32 %i.025, %N
15 %arrayidx4.gep = getelementptr i32, i32* %A, i32 %mul
16 br label %for.body3
17
18 for.body3:
19 %0 = phi i32 [ 0, %for.body ], [ %add8, %for.body3 ]
20 %arrayidx4.phi = phi i32* [ %arrayidx4.gep, %for.body ], [ %arrayidx4.inc, %for.body3 ]
21 %arrayidx5.phi = phi i32* [ %B, %for.body ], [ %arrayidx5.inc, %for.body3 ]
22 %j.023 = phi i32 [ 0, %for.body ], [ %inc, %for.body3 ]
23 %1 = load i32, i32* %arrayidx4.phi, align 4
24 %2 = load i32, i32* %arrayidx5.phi, align 4
25 %add6 = add nsw i32 %2, %1
26 %add8 = add nsw i32 %add6, %0
27 %inc = add i32 %j.023, 1
28 %exitcond = icmp ne i32 %inc, %N
29 %arrayidx4.inc = getelementptr i32, i32* %arrayidx4.phi, i32 1
30 %arrayidx5.inc = getelementptr i32, i32* %arrayidx5.phi, i32 1
31 br i1 %exitcond, label %for.body3, label %for.cond1.for.inc9_crit_edge
32
33 for.cond1.for.inc9_crit_edge:
34 store i32 %add8, i32* %arrayidx, align 4
35 %inc10 = add i32 %i.025, 1
36 %exitcond26 = icmp ne i32 %inc10, %N
37 br i1 %exitcond26, label %for.body, label %for.end11
38
39 for.end11:
40 ret void
41 }