llvm.org GIT mirror llvm / 950aa30
[DeadStoreElimination] Shorten beginning of memset overwritten by later stores Summary: This change will shorten memset if the beginning of memset is overwritten by later stores. Reviewers: hfinkel, eeckstein, dberlin, mcrosier Subscribers: mgrang, mcrosier, llvm-commits Differential Revision: http://reviews.llvm.org/D18906 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@267197 91177308-0d34-0410-b5e6-96231b3b80d8 Jun Bum Lim 3 years ago
2 changed file(s) with 164 addition(s) and 29 deletion(s). Raw diff Collapse all Expand all
274274 }
275275
276276
277 /// isShortenable - Returns true if this instruction can be safely shortened in
277 /// Returns true if the end of this instruction can be safely shortened in
278278 /// length.
279 static bool isShortenable(Instruction *I) {
279 static bool isShortenableAtTheEnd(Instruction *I) {
280280 // Don't shorten stores for now
281281 if (isa(I))
282282 return false;
287287 case Intrinsic::memset:
288288 case Intrinsic::memcpy:
289289 // Do shorten memory intrinsics.
290 // FIXME: Add memmove if it's also safe to transform.
290291 return true;
291292 }
292293 }
294295 // Don't shorten libcalls calls for now.
295296
296297 return false;
298 }
299
300 /// Returns true if the beginning of this instruction can be safely shortened
301 /// in length.
302 static bool isShortenableAtTheBeginning(Instruction *I) {
303 // FIXME: Handle only memset for now. Supporting memcpy/memmove should be
304 // easily done by offsetting the source address.
305 IntrinsicInst *II = dyn_cast(I);
306 return II && II->getIntrinsicID() == Intrinsic::memset;
297307 }
298308
299309 /// getStoredPointerOperand - Return the pointer that is being written to.
326336 }
327337
328338 namespace {
329 enum OverwriteResult
330 {
331 OverwriteComplete,
332 OverwriteEnd,
333 OverwriteUnknown
334 };
335 }
336
337 /// isOverwrite - Return 'OverwriteComplete' if a store to the 'Later' location
338 /// completely overwrites a store to the 'Earlier' location.
339 /// 'OverwriteEnd' if the end of the 'Earlier' location is completely
340 /// overwritten by 'Later', or 'OverwriteUnknown' if nothing can be determined
339 enum OverwriteResult {
340 OverwriteBegin,
341 OverwriteComplete,
342 OverwriteEnd,
343 OverwriteUnknown
344 };
345 }
346
347 /// Return 'OverwriteComplete' if a store to the 'Later' location completely
348 /// overwrites a store to the 'Earlier' location, 'OverwriteEnd' if the end of
349 /// the 'Earlier' location is completely overwritten by 'Later',
350 /// 'OverwriteBegin' if the beginning of the 'Earlier' location is overwritten
351 /// by 'Later', or 'OverwriteUnknown' if nothing can be determined.
341352 static OverwriteResult isOverwrite(const MemoryLocation &Later,
342353 const MemoryLocation &Earlier,
343354 const DataLayout &DL,
415426 uint64_t(EarlierOff - LaterOff) + Earlier.Size <= Later.Size)
416427 return OverwriteComplete;
417428
418 // The other interesting case is if the later store overwrites the end of
419 // the earlier store
429 // Another interesting case is if the later store overwrites the end of the
430 // earlier store.
420431 //
421432 // |--earlier--|
422433 // |-- later --|
428439 int64_t(LaterOff + Later.Size) >= int64_t(EarlierOff + Earlier.Size))
429440 return OverwriteEnd;
430441
442 // Finally, we also need to check if the later store overwrites the beginning
443 // of the earlier store.
444 //
445 // |--earlier--|
446 // |-- later --|
447 //
448 // In this case we may want to move the destination address and trim the size
449 // of earlier to avoid generating writes to addresses which will definitely
450 // be overwritten later.
451 if (LaterOff <= EarlierOff && int64_t(LaterOff + Later.Size) > EarlierOff) {
452 assert (int64_t(LaterOff + Later.Size) < int64_t(EarlierOff + Earlier.Size)
453 && "Expect to be handled as OverwriteComplete" );
454 return OverwriteBegin;
455 }
431456 // Otherwise, they don't completely overlap.
432457 return OverwriteUnknown;
433458 }
602627 if (BBI != BB.begin())
603628 --BBI;
604629 break;
605 } else if (OR == OverwriteEnd && isShortenable(DepWrite)) {
630 } else if ((OR == OverwriteEnd && isShortenableAtTheEnd(DepWrite)) ||
631 ((OR == OverwriteBegin &&
632 isShortenableAtTheBeginning(DepWrite)))) {
606633 // TODO: base this on the target vector size so that if the earlier
607634 // store was too small to get vector writes anyway then its likely
608635 // a good idea to shorten it
609636 // Power of 2 vector writes are probably always a bad idea to optimize
610637 // as any store/memset/memcpy is likely using vector instructions so
611638 // shortening it to not vector size is likely to be slower
612 MemIntrinsic* DepIntrinsic = cast(DepWrite);
639 MemIntrinsic *DepIntrinsic = cast(DepWrite);
613640 unsigned DepWriteAlign = DepIntrinsic->getAlignment();
614 if (llvm::isPowerOf2_64(InstWriteOffset) ||
641 bool IsOverwriteEnd = (OR == OverwriteEnd);
642 if (!IsOverwriteEnd)
643 InstWriteOffset = int64_t(InstWriteOffset + Loc.Size);
644
645 if ((llvm::isPowerOf2_64(InstWriteOffset) &&
646 DepWriteAlign <= InstWriteOffset) ||
615647 ((DepWriteAlign != 0) && InstWriteOffset % DepWriteAlign == 0)) {
616648
617 DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW END: "
618 << *DepWrite << "\n KILLER (offset "
619 << InstWriteOffset << ", "
620 << DepLoc.Size << ")"
621 << *Inst << '\n');
622
623 Value* DepWriteLength = DepIntrinsic->getLength();
624 Value* TrimmedLength = ConstantInt::get(DepWriteLength->getType(),
625 InstWriteOffset -
626 DepWriteOffset);
649 DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW "
650 << (IsOverwriteEnd ? "END" : "BEGIN") << ": "
651 << *DepWrite << "\n KILLER (offset "
652 << InstWriteOffset << ", " << DepLoc.Size << ")"
653 << *Inst << '\n');
654
655 int64_t NewLength =
656 IsOverwriteEnd
657 ? InstWriteOffset - DepWriteOffset
658 : DepLoc.Size - (InstWriteOffset - DepWriteOffset);
659
660 Value *DepWriteLength = DepIntrinsic->getLength();
661 Value *TrimmedLength =
662 ConstantInt::get(DepWriteLength->getType(), NewLength);
627663 DepIntrinsic->setLength(TrimmedLength);
664
665 if (!IsOverwriteEnd) {
666 int64_t OffsetMoved = (InstWriteOffset - DepWriteOffset);
667 Value *Indices[1] = {
668 ConstantInt::get(DepWriteLength->getType(), OffsetMoved)};
669 GetElementPtrInst *NewDestGEP = GetElementPtrInst::CreateInBounds(
670 DepIntrinsic->getRawDest(), Indices, "", DepWrite);
671 DepIntrinsic->setDest(NewDestGEP);
672 }
628673 MadeChange = true;
629674 }
630675 }
0 ; RUN: opt < %s -basicaa -dse -S | FileCheck %s
1
2 define void @write4to7(i32* nocapture %p) {
3 ; CHECK-LABEL: @write4to7(
4 entry:
5 %arrayidx0 = getelementptr inbounds i32, i32* %p, i64 1
6 %p3 = bitcast i32* %arrayidx0 to i8*
7 ; CHECK: [[GEP:%[0-9]+]] = getelementptr inbounds i8, i8* %p3, i64 4
8 ; CHECK: call void @llvm.memset.p0i8.i64(i8* [[GEP]], i8 0, i64 24, i32 4, i1 false)
9 call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false)
10 %arrayidx1 = getelementptr inbounds i32, i32* %p, i64 1
11 store i32 1, i32* %arrayidx1, align 4
12 ret void
13 }
14
15 define void @write0to3(i32* nocapture %p) {
16 ; CHECK-LABEL: @write0to3(
17 entry:
18 %p3 = bitcast i32* %p to i8*
19 ; CHECK: [[GEP:%[0-9]+]] = getelementptr inbounds i8, i8* %p3, i64 4
20 ; CHECK: call void @llvm.memset.p0i8.i64(i8* [[GEP]], i8 0, i64 24, i32 4, i1 false)
21 call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false)
22 store i32 1, i32* %p, align 4
23 ret void
24 }
25
26 define void @write0to7(i32* nocapture %p) {
27 ; CHECK-LABEL: @write0to7(
28 entry:
29 %p3 = bitcast i32* %p to i8*
30 ; CHECK: [[GEP:%[0-9]+]] = getelementptr inbounds i8, i8* %p3, i64 8
31 ; CHECK: call void @llvm.memset.p0i8.i64(i8* [[GEP]], i8 0, i64 24, i32 4, i1 false)
32 call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 32, i32 4, i1 false)
33 %p4 = bitcast i32* %p to i64*
34 store i64 1, i64* %p4, align 8
35 ret void
36 }
37
38 define void @write0to7_2(i32* nocapture %p) {
39 ; CHECK-LABEL: @write0to7_2(
40 entry:
41 %arrayidx0 = getelementptr inbounds i32, i32* %p, i64 1
42 %p3 = bitcast i32* %arrayidx0 to i8*
43 ; CHECK: [[GEP:%[0-9]+]] = getelementptr inbounds i8, i8* %p3, i64 4
44 ; CHECK: call void @llvm.memset.p0i8.i64(i8* [[GEP]], i8 0, i64 24, i32 4, i1 false)
45 call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false)
46 %p4 = bitcast i32* %p to i64*
47 store i64 1, i64* %p4, align 8
48 ret void
49 }
50
51 ; We do not trim the beginning of the eariler write if the alignment of the
52 ; start pointer is changed.
53 define void @dontwrite0to3_align8(i32* nocapture %p) {
54 ; CHECK-LABEL: @dontwrite0to3_align8(
55 entry:
56 %p3 = bitcast i32* %p to i8*
57 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 32, i32 8, i1 false)
58 call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 32, i32 8, i1 false)
59 store i32 1, i32* %p, align 4
60 ret void
61 }
62
63 define void @dontwrite0to1(i32* nocapture %p) {
64 ; CHECK-LABEL: @dontwrite0to1(
65 entry:
66 %p3 = bitcast i32* %p to i8*
67 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 32, i32 4, i1 false)
68 call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 32, i32 4, i1 false)
69 %p4 = bitcast i32* %p to i16*
70 store i16 1, i16* %p4, align 4
71 ret void
72 }
73
74 define void @dontwrite2to9(i32* nocapture %p) {
75 ; CHECK-LABEL: @dontwrite2to9(
76 entry:
77 %arrayidx0 = getelementptr inbounds i32, i32* %p, i64 1
78 %p3 = bitcast i32* %arrayidx0 to i8*
79 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 32, i32 4, i1 false)
80 call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 32, i32 4, i1 false)
81 %p4 = bitcast i32* %p to i16*
82 %arrayidx2 = getelementptr inbounds i16, i16* %p4, i64 1
83 %p5 = bitcast i16* %arrayidx2 to i64*
84 store i64 1, i64* %p5, align 8
85 ret void
86 }
87
88 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
89