llvm.org GIT mirror llvm / 2ca934e
[DSE]Enhance shorthening MemIntrinsic based on OverlapIntervals Summary: This change use the overlap interval map built from partial overwrite tracking to perform shortening MemIntrinsics. Add test cases which was missing opportunities before. Reviewers: hfinkel, eeckstein, mcrosier Subscribers: mcrosier, llvm-commits Differential Revision: https://reviews.llvm.org/D21909 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@275571 91177308-0d34-0410-b5e6-96231b3b80d8 Jun Bum Lim 3 years ago
3 changed file(s) with 169 addition(s) and 50 deletion(s). Raw diff Collapse all Expand all
289289 };
290290 }
291291
292 typedef DenseMap
293 std::map> InstOverlapIntervalsTy;
292 typedef std::map OverlapIntervalsTy;
293 typedef DenseMap InstOverlapIntervalsTy;
294294
295295 /// Return 'OverwriteComplete' if a store to the 'Later' location completely
296296 /// overwrites a store to the 'Earlier' location, 'OverwriteEnd' if the end of
437437 //
438438 // In this case we may want to trim the size of earlier to avoid generating
439439 // writes to addresses which will definitely be overwritten later
440 if (LaterOff > EarlierOff &&
441 LaterOff < int64_t(EarlierOff + Earlier.Size) &&
442 int64_t(LaterOff + Later.Size) >= int64_t(EarlierOff + Earlier.Size))
440 if (!EnablePartialOverwriteTracking &&
441 (LaterOff > EarlierOff && LaterOff < int64_t(EarlierOff + Earlier.Size) &&
442 int64_t(LaterOff + Later.Size) >= int64_t(EarlierOff + Earlier.Size)))
443443 return OverwriteEnd;
444444
445445 // Finally, we also need to check if the later store overwrites the beginning
451451 // In this case we may want to move the destination address and trim the size
452452 // of earlier to avoid generating writes to addresses which will definitely
453453 // be overwritten later.
454 if (LaterOff <= EarlierOff && int64_t(LaterOff + Later.Size) > EarlierOff) {
455 assert (int64_t(LaterOff + Later.Size) < int64_t(EarlierOff + Earlier.Size)
456 && "Expect to be handled as OverwriteComplete" );
454 if (!EnablePartialOverwriteTracking &&
455 (LaterOff <= EarlierOff && int64_t(LaterOff + Later.Size) > EarlierOff)) {
456 assert(int64_t(LaterOff + Later.Size) <
457 int64_t(EarlierOff + Earlier.Size) &&
458 "Expect to be handled as OverwriteComplete");
457459 return OverwriteBegin;
458460 }
459461 // Otherwise, they don't completely overlap.
818820 return MadeChange;
819821 }
820822
823 static bool tryToShorten(Instruction *EarlierWrite, int64_t &EarlierOffset,
824 int64_t &EarlierSize, int64_t LaterOffset,
825 int64_t LaterSize, bool IsOverwriteEnd) {
826 // TODO: base this on the target vector size so that if the earlier
827 // store was too small to get vector writes anyway then its likely
828 // a good idea to shorten it
829 // Power of 2 vector writes are probably always a bad idea to optimize
830 // as any store/memset/memcpy is likely using vector instructions so
831 // shortening it to not vector size is likely to be slower
832 MemIntrinsic *EarlierIntrinsic = cast(EarlierWrite);
833 unsigned EarlierWriteAlign = EarlierIntrinsic->getAlignment();
834 if (!IsOverwriteEnd)
835 LaterOffset = int64_t(LaterOffset + LaterSize);
836
837 if (!(llvm::isPowerOf2_64(LaterOffset) && EarlierWriteAlign <= LaterOffset) &&
838 !((EarlierWriteAlign != 0) && LaterOffset % EarlierWriteAlign == 0))
839 return false;
840
841 DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW "
842 << (IsOverwriteEnd ? "END" : "BEGIN") << ": " << *EarlierWrite
843 << "\n KILLER (offset " << LaterOffset << ", " << EarlierSize
844 << ")\n");
845
846 int64_t NewLength = IsOverwriteEnd
847 ? LaterOffset - EarlierOffset
848 : EarlierSize - (LaterOffset - EarlierOffset);
849
850 Value *EarlierWriteLength = EarlierIntrinsic->getLength();
851 Value *TrimmedLength =
852 ConstantInt::get(EarlierWriteLength->getType(), NewLength);
853 EarlierIntrinsic->setLength(TrimmedLength);
854
855 EarlierSize = NewLength;
856 if (!IsOverwriteEnd) {
857 int64_t OffsetMoved = (LaterOffset - EarlierOffset);
858 Value *Indices[1] = {
859 ConstantInt::get(EarlierWriteLength->getType(), OffsetMoved)};
860 GetElementPtrInst *NewDestGEP = GetElementPtrInst::CreateInBounds(
861 EarlierIntrinsic->getRawDest(), Indices, "", EarlierWrite);
862 EarlierIntrinsic->setDest(NewDestGEP);
863 EarlierOffset = EarlierOffset + OffsetMoved;
864 }
865 return true;
866 }
867
868 static bool tryToShortenEnd(Instruction *EarlierWrite,
869 OverlapIntervalsTy &IntervalMap,
870 int64_t &EarlierStart, int64_t &EarlierSize) {
871 if (IntervalMap.empty() || !isShortenableAtTheEnd(EarlierWrite))
872 return false;
873
874 OverlapIntervalsTy::iterator OII = --IntervalMap.end();
875 int64_t LaterStart = OII->second;
876 int64_t LaterSize = OII->first - LaterStart;
877
878 if (LaterStart > EarlierStart && LaterStart < EarlierStart + EarlierSize &&
879 LaterStart + LaterSize >= EarlierStart + EarlierSize) {
880 if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize, LaterStart,
881 LaterSize, true)) {
882 IntervalMap.erase(OII);
883 return true;
884 }
885 }
886 return false;
887 }
888
889 static bool tryToShortenBegin(Instruction *EarlierWrite,
890 OverlapIntervalsTy &IntervalMap,
891 int64_t &EarlierStart, int64_t &EarlierSize) {
892 if (IntervalMap.empty() || !isShortenableAtTheBeginning(EarlierWrite))
893 return false;
894
895 OverlapIntervalsTy::iterator OII = IntervalMap.begin();
896 int64_t LaterStart = OII->second;
897 int64_t LaterSize = OII->first - LaterStart;
898
899 if (LaterStart <= EarlierStart && LaterStart + LaterSize > EarlierStart) {
900 assert(LaterStart + LaterSize < EarlierStart + EarlierSize &&
901 "Should have been handled as OverwriteComplete");
902 if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize, LaterStart,
903 LaterSize, false)) {
904 IntervalMap.erase(OII);
905 return true;
906 }
907 }
908 return false;
909 }
910
911 static bool removePartiallyOverlappedStores(AliasAnalysis *AA,
912 const DataLayout &DL,
913 InstOverlapIntervalsTy &IOL) {
914 bool Changed = false;
915 for (auto OI : IOL) {
916 Instruction *EarlierWrite = OI.first;
917 MemoryLocation Loc = getLocForWrite(EarlierWrite, *AA);
918 assert(isRemovable(EarlierWrite) && "Expect only removable instruction");
919 assert(Loc.Size != MemoryLocation::UnknownSize && "Unexpected mem loc");
920
921 const Value *Ptr = Loc.Ptr->stripPointerCasts();
922 int64_t EarlierStart = 0;
923 int64_t EarlierSize = int64_t(Loc.Size);
924 GetPointerBaseWithConstantOffset(Ptr, EarlierStart, DL);
925 OverlapIntervalsTy &IntervalMap = OI.second;
926 Changed =
927 tryToShortenEnd(EarlierWrite, IntervalMap, EarlierStart, EarlierSize);
928 if (IntervalMap.empty())
929 continue;
930 Changed |=
931 tryToShortenBegin(EarlierWrite, IntervalMap, EarlierStart, EarlierSize);
932 }
933 return Changed;
934 }
935
821936 static bool eliminateNoopStore(Instruction *Inst, BasicBlock::iterator &BBI,
822937 AliasAnalysis *AA, MemoryDependenceResults *MD,
823938 const DataLayout &DL,
9351050 if (OR == OverwriteComplete) {
9361051 DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
9371052 << *DepWrite << "\n KILLER: " << *Inst << '\n');
938
1053 IOL.erase(DepWrite);
9391054 // Delete the store and now-dead instructions that feed it.
9401055 deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI);
9411056 ++NumFastStores;
9471062 } else if ((OR == OverwriteEnd && isShortenableAtTheEnd(DepWrite)) ||
9481063 ((OR == OverwriteBegin &&
9491064 isShortenableAtTheBeginning(DepWrite)))) {
950 // TODO: base this on the target vector size so that if the earlier
951 // store was too small to get vector writes anyway then its likely
952 // a good idea to shorten it
953 // Power of 2 vector writes are probably always a bad idea to optimize
954 // as any store/memset/memcpy is likely using vector instructions so
955 // shortening it to not vector size is likely to be slower
956 MemIntrinsic *DepIntrinsic = cast(DepWrite);
957 unsigned DepWriteAlign = DepIntrinsic->getAlignment();
1065 assert(!EnablePartialOverwriteTracking && "Do not expect to perform "
1066 "when partial-overwrite "
1067 "tracking is enabled");
1068 int64_t EarlierSize = DepLoc.Size;
1069 int64_t LaterSize = Loc.Size;
9581070 bool IsOverwriteEnd = (OR == OverwriteEnd);
959 if (!IsOverwriteEnd)
960 InstWriteOffset = int64_t(InstWriteOffset + Loc.Size);
961
962 if ((llvm::isPowerOf2_64(InstWriteOffset) &&
963 DepWriteAlign <= InstWriteOffset) ||
964 ((DepWriteAlign != 0) && InstWriteOffset % DepWriteAlign == 0)) {
965
966 DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW "
967 << (IsOverwriteEnd ? "END" : "BEGIN") << ": "
968 << *DepWrite << "\n KILLER (offset "
969 << InstWriteOffset << ", " << DepLoc.Size << ")"
970 << *Inst << '\n');
971
972 int64_t NewLength =
973 IsOverwriteEnd
974 ? InstWriteOffset - DepWriteOffset
975 : DepLoc.Size - (InstWriteOffset - DepWriteOffset);
976
977 Value *DepWriteLength = DepIntrinsic->getLength();
978 Value *TrimmedLength =
979 ConstantInt::get(DepWriteLength->getType(), NewLength);
980 DepIntrinsic->setLength(TrimmedLength);
981
982 if (!IsOverwriteEnd) {
983 int64_t OffsetMoved = (InstWriteOffset - DepWriteOffset);
984 Value *Indices[1] = {
985 ConstantInt::get(DepWriteLength->getType(), OffsetMoved)};
986 GetElementPtrInst *NewDestGEP = GetElementPtrInst::CreateInBounds(
987 DepIntrinsic->getRawDest(), Indices, "", DepWrite);
988 DepIntrinsic->setDest(NewDestGEP);
989 }
990 MadeChange = true;
991 }
1071 MadeChange = tryToShorten(DepWrite, DepWriteOffset, EarlierSize,
1072 InstWriteOffset, LaterSize, IsOverwriteEnd);
9921073 }
9931074 }
9941075
10101091 DepWrite->getIterator(), &BB);
10111092 }
10121093 }
1094
1095 if (EnablePartialOverwriteTracking)
1096 MadeChange |= removePartiallyOverlappedStores(AA, DL, IOL);
10131097
10141098 // If this block ends in a return, unwind, or unreachable, all allocas are
10151099 // dead at its end, which means stores to them are also dead.
8585 ret void
8686 }
8787
88 define void @write8To15AndThen0To7(i64* nocapture %P) {
89 entry:
90 ; CHECK-LABEL: @write8To15AndThen0To7(
91 ; CHECK: [[GEP:%[0-9]+]] = getelementptr inbounds i8, i8* %mybase0, i64 16
92 ; CHECK: tail call void @llvm.memset.p0i8.i64(i8* [[GEP]], i8 0, i64 16, i32 8, i1 false)
93
94 %base0 = bitcast i64* %P to i8*
95 %mybase0 = getelementptr inbounds i8, i8* %base0, i64 0
96 tail call void @llvm.memset.p0i8.i64(i8* %mybase0, i8 0, i64 32, i32 8, i1 false)
97
98 %base64_0 = getelementptr inbounds i64, i64* %P, i64 0
99 %base64_1 = getelementptr inbounds i64, i64* %P, i64 1
100
101 store i64 1, i64* %base64_1
102 store i64 2, i64* %base64_0
103 ret void
104 }
105
88106 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
89107
9292 store i64 3, i64* %tf_trapno, align 8
9393 ret void
9494 }
95
96 define void @write16To23AndThen24To31(i64* nocapture %P, i64 %n64, i32 %n32, i16 %n16, i8 %n8) {
97 entry:
98 ; CHECK-LABEL: @write16To23AndThen24To31(
99 ; CHECK: tail call void @llvm.memset.p0i8.i64(i8* %mybase0, i8 0, i64 16, i32 8, i1 false)
100
101 %base0 = bitcast i64* %P to i8*
102 %mybase0 = getelementptr inbounds i8, i8* %base0, i64 0
103 tail call void @llvm.memset.p0i8.i64(i8* %mybase0, i8 0, i64 32, i32 8, i1 false)
104
105 %base64_2 = getelementptr inbounds i64, i64* %P, i64 2
106 %base64_3 = getelementptr inbounds i64, i64* %P, i64 3
107
108 store i64 3, i64* %base64_2
109 store i64 3, i64* %base64_3
110 ret void
111 }