llvm.org GIT mirror llvm / 8081057
[MemCpyOpt] Turn memcpy from just-memset'd source into memset. There's no point in copying around constants, so, when all else fails, we can still transform memcpy of memset into two independent memsets. To quote the example, we can turn: memset(dst1, c, dst1_size); memcpy(dst2, dst1, dst2_size); into: memset(dst1, c, dst1_size); memset(dst2, c, dst2_size); When dst2_size <= dst1_size. Like r235232 for copy constructors, this can occur in move constructors. Differential Revision: http://reviews.llvm.org/D9682 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@237506 91177308-0d34-0410-b5e6-96231b3b80d8 Ahmed Bougacha 4 years ago
4 changed file(s) with 149 addition(s) and 3 deletion(s). Raw diff Collapse all Expand all
345345 uint64_t cpyLen, unsigned cpyAlign, CallInst *C);
346346 bool processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep);
347347 bool processMemSetMemCpyDependence(MemCpyInst *M, MemSetInst *MDep);
348 bool performMemCpyToMemSetOptzn(MemCpyInst *M, MemSetInst *MDep);
348349 bool processByValArgument(CallSite CS, unsigned ArgNo);
349350 Instruction *tryMergingIntoMemset(Instruction *I, Value *StartPtr,
350351 Value *ByteVal);
895896 return true;
896897 }
897898
899 /// Transform memcpy to memset when its source was just memset.
900 /// In other words, turn:
901 /// \code
902 /// memset(dst1, c, dst1_size);
903 /// memcpy(dst2, dst1, dst2_size);
904 /// \endcode
905 /// into:
906 /// \code
907 /// memset(dst1, c, dst1_size);
908 /// memset(dst2, c, dst2_size);
909 /// \endcode
910 /// When dst2_size <= dst1_size.
911 ///
912 /// The \p MemCpy must have a Constant length.
913 bool MemCpyOpt::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
914 MemSetInst *MemSet) {
915 // This only makes sense on memcpy(..., memset(...), ...).
916 if (MemSet->getRawDest() != MemCpy->getRawSource())
917 return false;
918
919 ConstantInt *CopySize = cast(MemCpy->getLength());
920 ConstantInt *MemSetSize = dyn_cast(MemSet->getLength());
921 // Make sure the memcpy doesn't read any more than what the memset wrote.
922 // Don't worry about sizes larger than i64.
923 if (!MemSetSize || CopySize->getZExtValue() > MemSetSize->getZExtValue())
924 return false;
925
926 IRBuilder<> Builder(MemCpy->getNextNode());
927 Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
928 CopySize, MemCpy->getAlignment());
929 return true;
930 }
931
898932 /// processMemCpy - perform simplification of memcpy's. If we have memcpy A
899933 /// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
900934 /// B to be a memcpy from X to Z (or potentially a memmove, depending on
937971 ConstantInt *CopySize = dyn_cast(M->getLength());
938972 if (!CopySize) return false;
939973
940 // The are three possible optimizations we can do for memcpy:
974 // There are four possible optimizations we can do for memcpy:
941975 // a) memcpy-memcpy xform which exposes redundance for DSE.
942976 // b) call-memcpy xform for return slot optimization.
943977 // c) memcpy from freshly alloca'd space or space that has just started its
944978 // lifetime copies undefined data, and we can therefore eliminate the
945979 // memcpy in favor of the data that was already at the destination.
980 // d) memcpy from a just-memset'd source can be turned into memset.
946981 if (DepInfo.isClobber()) {
947982 if (CallInst *C = dyn_cast(DepInfo.getInst())) {
948983 if (performCallSlotOptzn(M, M->getDest(), M->getSource(),
9821017 return true;
9831018 }
9841019 }
1020
1021 if (SrcDepInfo.isClobber())
1022 if (MemSetInst *MDep = dyn_cast(SrcDepInfo.getInst()))
1023 if (performMemCpyToMemSetOptzn(M, MDep)) {
1024 MD->removeInstruction(M);
1025 M->eraseFromParent();
1026 ++NumCpyToSet;
1027 return true;
1028 }
9851029
9861030 return false;
9871031 }
1616 }
1717
1818 ; memset touch more bytes than those guaranteed to be dereferenceable
19 ; We can't remove the memcpy, but we can turn it into an independent memset.
1920 define void @must_not_remove_memcpy(i8* noalias nocapture dereferenceable(1024) %dst) {
2021 ; CHECK-LABEL: @must_not_remove_memcpy(
2122 ; CHECK: call void @llvm.memset.p0i8.i64
22 ; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
23 ; CHECK: call void @llvm.memset.p0i8.i64
2324 %src = alloca [4096 x i8], align 1
2425 %p = getelementptr inbounds [4096 x i8], [4096 x i8]* %src, i64 0, i64 0
2526 call void @llvm.memset.p0i8.i64(i8* %p, i8 0, i64 4096, i32 1, i1 false)
3939 ; CHECK: %[[a_cast:[^=]+]] = bitcast [8 x i64]* %[[a]] to i8*
4040 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %[[a_cast]], i8 0, i64 64
4141 ; CHECK: %[[sret_cast:[^=]+]] = bitcast [8 x i64]* %sret to i8*
42 ; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %[[sret_cast]], i8* %[[a_cast]], i64 64
42 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %[[sret_cast]], i8 0, i64 64
4343 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %[[a_cast]], i8 42, i64 32
4444 ; CHECK: %[[out_cast:[^=]+]] = bitcast [8 x i64]* %out to i8*
4545 ; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %[[out_cast]], i8* %[[a_cast]], i64 64
0 ; RUN: opt -memcpyopt -S %s | FileCheck %s
1
2 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
3
4 ; CHECK-LABEL: define void @test(
5 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 1, i1 false)
6 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst2, i8 %c, i64 128, i32 8, i1 false)
7 ; CHECK-NEXT: ret void
8 define void @test(i8* %dst1, i8* %dst2, i8 %c) {
9 call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 1, i1 false)
10 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i32 8, i1 false)
11 ret void
12 }
13
14 ; CHECK-LABEL: define void @test_smaller_memcpy(
15 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 1, i1 false)
16 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst2, i8 %c, i64 100, i32 1, i1 false)
17 ; CHECK-NEXT: ret void
18 define void @test_smaller_memcpy(i8* %dst1, i8* %dst2, i8 %c) {
19 call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 1, i1 false)
20 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 100, i32 1, i1 false)
21 ret void
22 }
23
24 ; CHECK-LABEL: define void @test_smaller_memset(
25 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 100, i32 1, i1 false)
26 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i32 1, i1 false)
27 ; CHECK-NEXT: ret void
28 define void @test_smaller_memset(i8* %dst1, i8* %dst2, i8 %c) {
29 call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 100, i32 1, i1 false)
30 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i32 1, i1 false)
31 ret void
32 }
33
34 ; CHECK-LABEL: define void @test_align_memset(
35 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 8, i1 false)
36 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst2, i8 %c, i64 128, i32 1, i1 false)
37 ; CHECK-NEXT: ret void
38 define void @test_align_memset(i8* %dst1, i8* %dst2, i8 %c) {
39 call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 8, i1 false)
40 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i32 1, i1 false)
41 ret void
42 }
43
44 ; CHECK-LABEL: define void @test_different_types(
45 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 8, i1 false)
46 ; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %dst2, i8 %c, i32 100, i32 1, i1 false)
47 ; CHECK-NEXT: ret void
48 define void @test_different_types(i8* %dst1, i8* %dst2, i8 %c) {
49 call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 8, i1 false)
50 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst2, i8* %dst1, i32 100, i32 1, i1 false)
51 ret void
52 }
53
54 ; CHECK-LABEL: define void @test_different_types_2(
55 ; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %dst1, i8 %c, i32 128, i32 8, i1 false)
56 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst2, i8 %c, i64 100, i32 1, i1 false)
57 ; CHECK-NEXT: ret void
58 define void @test_different_types_2(i8* %dst1, i8* %dst2, i8 %c) {
59 call void @llvm.memset.p0i8.i32(i8* %dst1, i8 %c, i32 128, i32 8, i1 false)
60 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 100, i32 1, i1 false)
61 ret void
62 }
63
64 ; CHECK-LABEL: define void @test_different_source_gep(
65 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 1, i1 false)
66 ; CHECK-NEXT: %p = getelementptr i8, i8* %dst1, i64 64
67 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %p, i64 64, i32 1, i1 false)
68 ; CHECK-NEXT: ret void
69 define void @test_different_source_gep(i8* %dst1, i8* %dst2, i8 %c) {
70 call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 1, i1 false)
71 ; FIXME: We could optimize this as well.
72 %p = getelementptr i8, i8* %dst1, i64 64
73 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %p, i64 64, i32 1, i1 false)
74 ret void
75 }
76
77 ; CHECK-LABEL: define void @test_variable_size_1(
78 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 %dst1_size, i32 1, i1 false)
79 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i32 1, i1 false)
80 ; CHECK-NEXT: ret void
81 define void @test_variable_size_1(i8* %dst1, i64 %dst1_size, i8* %dst2, i8 %c) {
82 call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 %dst1_size, i32 1, i1 false)
83 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i32 1, i1 false)
84 ret void
85 }
86
87 ; CHECK-LABEL: define void @test_variable_size_2(
88 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 1, i1 false)
89 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 %dst2_size, i32 1, i1 false)
90 ; CHECK-NEXT: ret void
91 define void @test_variable_size_2(i8* %dst1, i8* %dst2, i64 %dst2_size, i8 %c) {
92 call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 1, i1 false)
93 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 %dst2_size, i32 1, i1 false)
94 ret void
95 }
96
97 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
98 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1)
99 declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1)
100 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1)