llvm.org GIT mirror llvm / 0618303
Recommit - [DSE]Enhance shorthening MemIntrinsic based on OverlapIntervals Recommiting r275571 after fixing crash reported in PR28270. Now we erase elements of IOL in deleteDeadInstruction(). Original Summary: This change use the overlap interval map built from partial overwrite tracking to perform shortening MemIntrinsics. Add test cases which was missing opportunities before. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@276452 91177308-0d34-0410-b5e6-96231b3b80d8 Jun Bum Lim 3 years ago
4 changed file(s) with 218 addition(s) and 62 deletion(s). Raw diff Collapse all Expand all
5858 //===----------------------------------------------------------------------===//
5959 // Helper functions
6060 //===----------------------------------------------------------------------===//
61 typedef std::map OverlapIntervalsTy;
62 typedef DenseMap InstOverlapIntervalsTy;
6163
6264 /// Delete this instruction. Before we do, go through and zero out all the
6365 /// operands of this instruction. If any of them become dead, delete them and
6668 static void
6769 deleteDeadInstruction(Instruction *I, BasicBlock::iterator *BBI,
6870 MemoryDependenceResults &MD, const TargetLibraryInfo &TLI,
71 InstOverlapIntervalsTy &IOL,
6972 SmallSetVector *ValueSet = nullptr) {
7073 SmallVector NowDeadInsts;
7174
103106 NewIter = DeadInst->eraseFromParent();
104107 else
105108 DeadInst->eraseFromParent();
109
110 IOL.erase(DeadInst);
106111
107112 if (ValueSet) ValueSet->remove(DeadInst);
108113 } while (!NowDeadInsts.empty());
288293 OverwriteUnknown
289294 };
290295 }
291
292 typedef DenseMap
293 std::map> InstOverlapIntervalsTy;
294296
295297 /// Return 'OverwriteComplete' if a store to the 'Later' location completely
296298 /// overwrites a store to the 'Earlier' location, 'OverwriteEnd' if the end of
437439 //
438440 // In this case we may want to trim the size of earlier to avoid generating
439441 // writes to addresses which will definitely be overwritten later
440 if (LaterOff > EarlierOff &&
441 LaterOff < int64_t(EarlierOff + Earlier.Size) &&
442 int64_t(LaterOff + Later.Size) >= int64_t(EarlierOff + Earlier.Size))
442 if (!EnablePartialOverwriteTracking &&
443 (LaterOff > EarlierOff && LaterOff < int64_t(EarlierOff + Earlier.Size) &&
444 int64_t(LaterOff + Later.Size) >= int64_t(EarlierOff + Earlier.Size)))
443445 return OverwriteEnd;
444446
445447 // Finally, we also need to check if the later store overwrites the beginning
451453 // In this case we may want to move the destination address and trim the size
452454 // of earlier to avoid generating writes to addresses which will definitely
453455 // be overwritten later.
454 if (LaterOff <= EarlierOff && int64_t(LaterOff + Later.Size) > EarlierOff) {
455 assert (int64_t(LaterOff + Later.Size) < int64_t(EarlierOff + Earlier.Size)
456 && "Expect to be handled as OverwriteComplete" );
456 if (!EnablePartialOverwriteTracking &&
457 (LaterOff <= EarlierOff && int64_t(LaterOff + Later.Size) > EarlierOff)) {
458 assert(int64_t(LaterOff + Later.Size) <
459 int64_t(EarlierOff + Earlier.Size) &&
460 "Expect to be handled as OverwriteComplete");
457461 return OverwriteBegin;
458462 }
459463 // Otherwise, they don't completely overlap.
584588 /// to a field of that structure.
585589 static bool handleFree(CallInst *F, AliasAnalysis *AA,
586590 MemoryDependenceResults *MD, DominatorTree *DT,
587 const TargetLibraryInfo *TLI) {
591 const TargetLibraryInfo *TLI,
592 InstOverlapIntervalsTy &IOL) {
588593 bool MadeChange = false;
589594
590595 MemoryLocation Loc = MemoryLocation(F->getOperand(0));
616621
617622 // DCE instructions only used to calculate that store.
618623 BasicBlock::iterator BBI(Dependency);
619 deleteDeadInstruction(Dependency, &BBI, *MD, *TLI);
624 deleteDeadInstruction(Dependency, &BBI, *MD, *TLI, IOL);
620625 ++NumFastStores;
621626 MadeChange = true;
622627
671676 /// ret void
672677 static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA,
673678 MemoryDependenceResults *MD,
674 const TargetLibraryInfo *TLI) {
679 const TargetLibraryInfo *TLI,
680 InstOverlapIntervalsTy &IOL) {
675681 bool MadeChange = false;
676682
677683 // Keep track of all of the stack objects that are dead at the end of the
730736 dbgs() << '\n');
731737
732738 // DCE instructions only used to calculate that store.
733 deleteDeadInstruction(Dead, &BBI, *MD, *TLI, &DeadStackObjects);
739 deleteDeadInstruction(Dead, &BBI, *MD, *TLI, IOL, &DeadStackObjects);
734740 ++NumFastStores;
735741 MadeChange = true;
736742 continue;
741747 if (isInstructionTriviallyDead(&*BBI, TLI)) {
742748 DEBUG(dbgs() << "DSE: Removing trivially dead instruction:\n DEAD: "
743749 << *&*BBI << '\n');
744 deleteDeadInstruction(&*BBI, &BBI, *MD, *TLI, &DeadStackObjects);
750 deleteDeadInstruction(&*BBI, &BBI, *MD, *TLI, IOL, &DeadStackObjects);
745751 ++NumFastOther;
746752 MadeChange = true;
747753 continue;
823829 return MadeChange;
824830 }
825831
832 static bool tryToShorten(Instruction *EarlierWrite, int64_t &EarlierOffset,
833 int64_t &EarlierSize, int64_t LaterOffset,
834 int64_t LaterSize, bool IsOverwriteEnd) {
835 // TODO: base this on the target vector size so that if the earlier
836 // store was too small to get vector writes anyway then its likely
837 // a good idea to shorten it
838 // Power of 2 vector writes are probably always a bad idea to optimize
839 // as any store/memset/memcpy is likely using vector instructions so
840 // shortening it to not vector size is likely to be slower
841 MemIntrinsic *EarlierIntrinsic = cast(EarlierWrite);
842 unsigned EarlierWriteAlign = EarlierIntrinsic->getAlignment();
843 if (!IsOverwriteEnd)
844 LaterOffset = int64_t(LaterOffset + LaterSize);
845
846 if (!(llvm::isPowerOf2_64(LaterOffset) && EarlierWriteAlign <= LaterOffset) &&
847 !((EarlierWriteAlign != 0) && LaterOffset % EarlierWriteAlign == 0))
848 return false;
849
850 DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW "
851 << (IsOverwriteEnd ? "END" : "BEGIN") << ": " << *EarlierWrite
852 << "\n KILLER (offset " << LaterOffset << ", " << EarlierSize
853 << ")\n");
854
855 int64_t NewLength = IsOverwriteEnd
856 ? LaterOffset - EarlierOffset
857 : EarlierSize - (LaterOffset - EarlierOffset);
858
859 Value *EarlierWriteLength = EarlierIntrinsic->getLength();
860 Value *TrimmedLength =
861 ConstantInt::get(EarlierWriteLength->getType(), NewLength);
862 EarlierIntrinsic->setLength(TrimmedLength);
863
864 EarlierSize = NewLength;
865 if (!IsOverwriteEnd) {
866 int64_t OffsetMoved = (LaterOffset - EarlierOffset);
867 Value *Indices[1] = {
868 ConstantInt::get(EarlierWriteLength->getType(), OffsetMoved)};
869 GetElementPtrInst *NewDestGEP = GetElementPtrInst::CreateInBounds(
870 EarlierIntrinsic->getRawDest(), Indices, "", EarlierWrite);
871 EarlierIntrinsic->setDest(NewDestGEP);
872 EarlierOffset = EarlierOffset + OffsetMoved;
873 }
874 return true;
875 }
876
877 static bool tryToShortenEnd(Instruction *EarlierWrite,
878 OverlapIntervalsTy &IntervalMap,
879 int64_t &EarlierStart, int64_t &EarlierSize) {
880 if (IntervalMap.empty() || !isShortenableAtTheEnd(EarlierWrite))
881 return false;
882
883 OverlapIntervalsTy::iterator OII = --IntervalMap.end();
884 int64_t LaterStart = OII->second;
885 int64_t LaterSize = OII->first - LaterStart;
886
887 if (LaterStart > EarlierStart && LaterStart < EarlierStart + EarlierSize &&
888 LaterStart + LaterSize >= EarlierStart + EarlierSize) {
889 if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize, LaterStart,
890 LaterSize, true)) {
891 IntervalMap.erase(OII);
892 return true;
893 }
894 }
895 return false;
896 }
897
898 static bool tryToShortenBegin(Instruction *EarlierWrite,
899 OverlapIntervalsTy &IntervalMap,
900 int64_t &EarlierStart, int64_t &EarlierSize) {
901 if (IntervalMap.empty() || !isShortenableAtTheBeginning(EarlierWrite))
902 return false;
903
904 OverlapIntervalsTy::iterator OII = IntervalMap.begin();
905 int64_t LaterStart = OII->second;
906 int64_t LaterSize = OII->first - LaterStart;
907
908 if (LaterStart <= EarlierStart && LaterStart + LaterSize > EarlierStart) {
909 assert(LaterStart + LaterSize < EarlierStart + EarlierSize &&
910 "Should have been handled as OverwriteComplete");
911 if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize, LaterStart,
912 LaterSize, false)) {
913 IntervalMap.erase(OII);
914 return true;
915 }
916 }
917 return false;
918 }
919
920 static bool removePartiallyOverlappedStores(AliasAnalysis *AA,
921 const DataLayout &DL,
922 InstOverlapIntervalsTy &IOL) {
923 bool Changed = false;
924 for (auto OI : IOL) {
925 Instruction *EarlierWrite = OI.first;
926 MemoryLocation Loc = getLocForWrite(EarlierWrite, *AA);
927 assert(isRemovable(EarlierWrite) && "Expect only removable instruction");
928 assert(Loc.Size != MemoryLocation::UnknownSize && "Unexpected mem loc");
929
930 const Value *Ptr = Loc.Ptr->stripPointerCasts();
931 int64_t EarlierStart = 0;
932 int64_t EarlierSize = int64_t(Loc.Size);
933 GetPointerBaseWithConstantOffset(Ptr, EarlierStart, DL);
934 OverlapIntervalsTy &IntervalMap = OI.second;
935 Changed =
936 tryToShortenEnd(EarlierWrite, IntervalMap, EarlierStart, EarlierSize);
937 if (IntervalMap.empty())
938 continue;
939 Changed |=
940 tryToShortenBegin(EarlierWrite, IntervalMap, EarlierStart, EarlierSize);
941 }
942 return Changed;
943 }
944
826945 static bool eliminateNoopStore(Instruction *Inst, BasicBlock::iterator &BBI,
827946 AliasAnalysis *AA, MemoryDependenceResults *MD,
828947 const DataLayout &DL,
829 const TargetLibraryInfo *TLI) {
948 const TargetLibraryInfo *TLI,
949 InstOverlapIntervalsTy &IOL) {
830950 // Must be a store instruction.
831951 StoreInst *SI = dyn_cast(Inst);
832952 if (!SI)
841961 DEBUG(dbgs() << "DSE: Remove Store Of Load from same pointer:\n LOAD: "
842962 << *DepLoad << "\n STORE: " << *SI << '\n');
843963
844 deleteDeadInstruction(SI, &BBI, *MD, *TLI);
964 deleteDeadInstruction(SI, &BBI, *MD, *TLI, IOL);
845965 ++NumRedundantStores;
846966 return true;
847967 }
859979 dbgs() << "DSE: Remove null store to the calloc'ed object:\n DEAD: "
860980 << *Inst << "\n OBJECT: " << *UnderlyingPointer << '\n');
861981
862 deleteDeadInstruction(SI, &BBI, *MD, *TLI);
982 deleteDeadInstruction(SI, &BBI, *MD, *TLI, IOL);
863983 ++NumRedundantStores;
864984 return true;
865985 }
8801000 for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) {
8811001 // Handle 'free' calls specially.
8821002 if (CallInst *F = isFreeCall(&*BBI, TLI)) {
883 MadeChange |= handleFree(F, AA, MD, DT, TLI);
1003 MadeChange |= handleFree(F, AA, MD, DT, TLI, IOL);
8841004 // Increment BBI after handleFree has potentially deleted instructions.
8851005 // This ensures we maintain a valid iterator.
8861006 ++BBI;
8941014 continue;
8951015
8961016 // eliminateNoopStore will update in iterator, if necessary.
897 if (eliminateNoopStore(Inst, BBI, AA, MD, DL, TLI)) {
1017 if (eliminateNoopStore(Inst, BBI, AA, MD, DL, TLI, IOL)) {
8981018 MadeChange = true;
8991019 continue;
9001020 }
9421062 << *DepWrite << "\n KILLER: " << *Inst << '\n');
9431063
9441064 // Delete the store and now-dead instructions that feed it.
945 deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI);
1065 deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI, IOL);
9461066 ++NumFastStores;
9471067 MadeChange = true;
9481068
9521072 } else if ((OR == OverwriteEnd && isShortenableAtTheEnd(DepWrite)) ||
9531073 ((OR == OverwriteBegin &&
9541074 isShortenableAtTheBeginning(DepWrite)))) {
955 // TODO: base this on the target vector size so that if the earlier
956 // store was too small to get vector writes anyway then its likely
957 // a good idea to shorten it
958 // Power of 2 vector writes are probably always a bad idea to optimize
959 // as any store/memset/memcpy is likely using vector instructions so
960 // shortening it to not vector size is likely to be slower
961 MemIntrinsic *DepIntrinsic = cast(DepWrite);
962 unsigned DepWriteAlign = DepIntrinsic->getAlignment();
1075 assert(!EnablePartialOverwriteTracking && "Do not expect to perform "
1076 "when partial-overwrite "
1077 "tracking is enabled");
1078 int64_t EarlierSize = DepLoc.Size;
1079 int64_t LaterSize = Loc.Size;
9631080 bool IsOverwriteEnd = (OR == OverwriteEnd);
964 if (!IsOverwriteEnd)
965 InstWriteOffset = int64_t(InstWriteOffset + Loc.Size);
966
967 if ((llvm::isPowerOf2_64(InstWriteOffset) &&
968 DepWriteAlign <= InstWriteOffset) ||
969 ((DepWriteAlign != 0) && InstWriteOffset % DepWriteAlign == 0)) {
970
971 DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW "
972 << (IsOverwriteEnd ? "END" : "BEGIN") << ": "
973 << *DepWrite << "\n KILLER (offset "
974 << InstWriteOffset << ", " << DepLoc.Size << ")"
975 << *Inst << '\n');
976
977 int64_t NewLength =
978 IsOverwriteEnd
979 ? InstWriteOffset - DepWriteOffset
980 : DepLoc.Size - (InstWriteOffset - DepWriteOffset);
981
982 Value *DepWriteLength = DepIntrinsic->getLength();
983 Value *TrimmedLength =
984 ConstantInt::get(DepWriteLength->getType(), NewLength);
985 DepIntrinsic->setLength(TrimmedLength);
986
987 if (!IsOverwriteEnd) {
988 int64_t OffsetMoved = (InstWriteOffset - DepWriteOffset);
989 Value *Indices[1] = {
990 ConstantInt::get(DepWriteLength->getType(), OffsetMoved)};
991 GetElementPtrInst *NewDestGEP = GetElementPtrInst::CreateInBounds(
992 DepIntrinsic->getRawDest(), Indices, "", DepWrite);
993 DepIntrinsic->setDest(NewDestGEP);
994 }
995 MadeChange = true;
996 }
1081 MadeChange = tryToShorten(DepWrite, DepWriteOffset, EarlierSize,
1082 InstWriteOffset, LaterSize, IsOverwriteEnd);
9971083 }
9981084 }
9991085
10161102 }
10171103 }
10181104
1105 if (EnablePartialOverwriteTracking)
1106 MadeChange |= removePartiallyOverlappedStores(AA, DL, IOL);
1107
10191108 // If this block ends in a return, unwind, or unreachable, all allocas are
10201109 // dead at its end, which means stores to them are also dead.
10211110 if (BB.getTerminator()->getNumSuccessors() == 0)
1022 MadeChange |= handleEndBlock(BB, AA, MD, TLI);
1111 MadeChange |= handleEndBlock(BB, AA, MD, TLI, IOL);
10231112
10241113 return MadeChange;
10251114 }
0 ; RUN: opt < %s -basicaa -dse -S -enable-dse-partial-overwrite-tracking | FileCheck %s
1 ; PR28588
2
3 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
4 target triple = "x86_64-unknown-linux-gnu"
5
6 ; Function Attrs: nounwind
7 define void @_UPT_destroy(i8* nocapture %ptr) local_unnamed_addr #0 {
8 entry:
9 %edi = getelementptr inbounds i8, i8* %ptr, i64 8
10
11 ; CHECK-NOT: tail call void @llvm.memset.p0i8.i64(i8* %edi, i8 0, i64 176, i32 8, i1 false)
12 ; CHECK-NOT: store i32 -1, i32* %addr
13
14 tail call void @llvm.memset.p0i8.i64(i8* %edi, i8 0, i64 176, i32 8, i1 false)
15 %format4.i = getelementptr inbounds i8, i8* %ptr, i64 144
16 %addr = bitcast i8* %format4.i to i32*
17 store i32 -1, i32* %addr, align 8
18
19 ; CHECK: tail call void @free
20 tail call void @free(i8* nonnull %ptr)
21 ret void
22 }
23
24 ; Function Attrs: nounwind
25 declare void @free(i8* nocapture) local_unnamed_addr #0
26
27 ; Function Attrs: argmemonly nounwind
28 declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) #1
29
30 attributes #0 = { nounwind }
31 attributes #1 = { argmemonly nounwind }
8585 ret void
8686 }
8787
88 define void @write8To15AndThen0To7(i64* nocapture %P) {
89 entry:
90 ; CHECK-LABEL: @write8To15AndThen0To7(
91 ; CHECK: [[GEP:%[0-9]+]] = getelementptr inbounds i8, i8* %mybase0, i64 16
92 ; CHECK: tail call void @llvm.memset.p0i8.i64(i8* [[GEP]], i8 0, i64 16, i32 8, i1 false)
93
94 %base0 = bitcast i64* %P to i8*
95 %mybase0 = getelementptr inbounds i8, i8* %base0, i64 0
96 tail call void @llvm.memset.p0i8.i64(i8* %mybase0, i8 0, i64 32, i32 8, i1 false)
97
98 %base64_0 = getelementptr inbounds i64, i64* %P, i64 0
99 %base64_1 = getelementptr inbounds i64, i64* %P, i64 1
100
101 store i64 1, i64* %base64_1
102 store i64 2, i64* %base64_0
103 ret void
104 }
105
88106 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
89107
9292 store i64 3, i64* %tf_trapno, align 8
9393 ret void
9494 }
95
96 define void @write16To23AndThen24To31(i64* nocapture %P, i64 %n64, i32 %n32, i16 %n16, i8 %n8) {
97 entry:
98 ; CHECK-LABEL: @write16To23AndThen24To31(
99 ; CHECK: tail call void @llvm.memset.p0i8.i64(i8* %mybase0, i8 0, i64 16, i32 8, i1 false)
100
101 %base0 = bitcast i64* %P to i8*
102 %mybase0 = getelementptr inbounds i8, i8* %base0, i64 0
103 tail call void @llvm.memset.p0i8.i64(i8* %mybase0, i8 0, i64 32, i32 8, i1 false)
104
105 %base64_2 = getelementptr inbounds i64, i64* %P, i64 2
106 %base64_3 = getelementptr inbounds i64, i64* %P, i64 3
107
108 store i64 3, i64* %base64_2
109 store i64 3, i64* %base64_3
110 ret void
111 }