llvm.org GIT mirror llvm / 9302e9a
[DSE] Don't DSE stores that subsequent memmove calls read from Summary: We used to remove the first memmove in cases like this: memmove(p, p+2, 8); memmove(p, p+2, 8); which is incorrect. Fix this by changing isPossibleSelfRead to what was most likely the intended behavior. Historical note: the buggy code was added in https://reviews.llvm.org/rL120974 to address PR8728. Reviewers: rsmith Subscribers: mcrosier, llvm-commits, jlebar Differential Revision: https://reviews.llvm.org/D43425 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@325641 91177308-0d34-0410-b5e6-96231b3b80d8 Sanjoy Das 1 year, 6 months ago
2 changed file(s) with 78 addition(s) and 16 deletion(s). Raw diff Collapse all Expand all
509509 /// memory region into an identical pointer) then it doesn't actually make its
510510 /// input dead in the traditional sense. Consider this case:
511511 ///
512 /// memcpy(A <- B)
513 /// memcpy(A <- A)
512 /// memmove(A <- B)
513 /// memmove(A <- A)
514514 ///
515515 /// In this case, the second store to A does not make the first store to A dead.
516516 /// The usual situation isn't an explicit A<-A store like this (which can be
526526 // Self reads can only happen for instructions that read memory. Get the
527527 // location read.
528528 MemoryLocation InstReadLoc = getLocForRead(Inst, TLI);
529 if (!InstReadLoc.Ptr) return false; // Not a reading instruction.
529 if (!InstReadLoc.Ptr)
530 return false; // Not a reading instruction.
530531
531532 // If the read and written loc obviously don't alias, it isn't a read.
532 if (AA.isNoAlias(InstReadLoc, InstStoreLoc)) return false;
533
534 // Okay, 'Inst' may copy over itself. However, we can still remove a the
535 // DepWrite instruction if we can prove that it reads from the same location
536 // as Inst. This handles useful cases like:
537 // memcpy(A <- B)
538 // memcpy(A <- B)
539 // Here we don't know if A/B may alias, but we do know that B/B are must
540 // aliases, so removing the first memcpy is safe (assuming it writes <= #
541 // bytes as the second one.
542 MemoryLocation DepReadLoc = getLocForRead(DepWrite, TLI);
543
544 if (DepReadLoc.Ptr && AA.isMustAlias(InstReadLoc.Ptr, DepReadLoc.Ptr))
533 if (AA.isNoAlias(InstReadLoc, InstStoreLoc))
545534 return false;
535
536 if (isa(Inst)) {
537 // LLVM's memcpy overlap semantics are not fully fleshed out (see PR11763)
538 // but in practice memcpy(A <- B) either means that A and B are disjoint or
539 // are equal (i.e. there are not partial overlaps). Given that, if we have:
540 //
541 // memcpy/memmove(A <- B) // DepWrite
542 // memcpy(A <- B) // Inst
543 //
544 // with Inst reading/writing a >= size than DepWrite, we can reason as
545 // follows:
546 //
547 // - If A == B then both the copies are no-ops, so the DepWrite can be
548 // removed.
549 // - If A != B then A and B are disjoint locations in Inst. Since
550 // Inst.size >= DepWrite.size A and B are disjoint in DepWrite too.
551 // Therefore DepWrite can be removed.
552 MemoryLocation DepReadLoc = getLocForRead(DepWrite, TLI);
553
554 if (DepReadLoc.Ptr && AA.isMustAlias(InstReadLoc.Ptr, DepReadLoc.Ptr))
555 return false;
556 }
546557
547558 // If DepWrite doesn't read memory or if we can't prove it is a must alias,
548559 // then it can't be considered dead.
251251 ; Do not delete instruction where possible situation is:
252252 ; A = B
253253 ; A = A
254 ;
255 ; NB! See PR11763 - currently LLVM allows memcpy's source and destination to be
256 ; equal (but not inequal and overlapping).
254257 define void @test18(i8* %P, i8* %Q, i8* %R) nounwind ssp {
255258 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
256259 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false)
520523 store i32 0, i32* %p
521524 ret void
522525 }
526
527 ; We cannot optimize away the first memmove since %P could overlap with %Q.
528 define void @test36(i8* %P, i8* %Q) {
529 ; CHECK-LABEL: @test36(
530 ; CHECK-NEXT: tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
531 ; CHECK-NEXT: tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
532 ; CHECK-NEXT: ret
533
534 tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
535 tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
536 ret void
537 }
538
539 define void @test37(i8* %P, i8* %Q, i8* %R) {
540 ; CHECK-LABEL: @test37(
541 ; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
542 ; CHECK-NEXT: tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false)
543 ; CHECK-NEXT: ret
544
545 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
546 tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false)
547 ret void
548 }
549
550 ; Same caveat about memcpy as in @test18 applies here.
551 define void @test38(i8* %P, i8* %Q, i8* %R) {
552 ; CHECK-LABEL: @test38(
553 ; CHECK-NEXT: tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
554 ; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false)
555 ; CHECK-NEXT: ret
556
557 tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
558 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false)
559 ret void
560 }
561
562 define void @test39(i8* %P, i8* %Q, i8* %R) {
563 ; CHECK-LABEL: @test39(
564 ; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
565 ; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 8, i1 false)
566 ; CHECK-NEXT: ret
567
568 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
569 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 8, i1 false)
570 ret void
571 }
572
573 declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1)