llvm.org GIT mirror llvm / f41eaac
enhance memcpy opt to turn memmoves into memcpy when the src/dest don't alias. Remove an old and poorly reduced testcase that fails with this transform for reasons unrelated to the original test. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@80693 91177308-0d34-0410-b5e6-96231b3b80d8 Chris Lattner 10 years ago
3 changed file(s) with 78 addition(s) and 113 deletion(s). Raw diff Collapse all Expand all
316316 // Helper fuctions
317317 bool processStore(StoreInst *SI, BasicBlock::iterator &BBI);
318318 bool processMemCpy(MemCpyInst *M);
319 bool processMemMove(MemMoveInst *M);
319320 bool performCallSlotOptzn(MemCpyInst *cpy, CallInst *C);
320321 bool iterateOnFunction(Function &F);
321322 };
430431 BasicBlock::iterator InsertPt = BI;
431432
432433 if (MemSetF == 0) {
433 const Type *Tys[] = {Type::getInt64Ty(SI->getContext())};
434 MemSetF = Intrinsic::getDeclaration(M, Intrinsic::memset,
435 Tys, 1);
434 const Type *Ty = Type::getInt64Ty(SI->getContext());
435 MemSetF = Intrinsic::getDeclaration(M, Intrinsic::memset, &Ty, 1);
436436 }
437437
438438 // Get the starting pointer of the block.
678678 return false;
679679
680680 // If all checks passed, then we can transform these memcpy's
681 const Type *Tys[1];
682 Tys[0] = M->getLength()->getType();
681 const Type *Ty = M->getLength()->getType();
683682 Function *MemCpyFun = Intrinsic::getDeclaration(
684683 M->getParent()->getParent()->getParent(),
685 M->getIntrinsicID(), Tys, 1);
684 M->getIntrinsicID(), &Ty, 1);
686685
687686 Value *Args[4] = {
688687 M->getRawDest(), MDep->getRawSource(), M->getLength(), M->getAlignmentCst()
706705 C->eraseFromParent();
707706 return false;
708707 }
708
709 /// processMemMove - Transforms memmove calls to memcpy calls when the src/dst
710 /// are guaranteed not to alias.
711 bool MemCpyOpt::processMemMove(MemMoveInst *M) {
712 AliasAnalysis &AA = getAnalysis();
713
714 // If the memmove is a constant size, use it for the alias query, this allows
715 // us to optimize things like: memmove(P, P+64, 64);
716 uint64_t MemMoveSize = ~0ULL;
717 if (ConstantInt *Len = dyn_cast(M->getLength()))
718 MemMoveSize = Len->getZExtValue();
719
720 // See if the pointers alias.
721 if (AA.alias(M->getRawDest(), MemMoveSize, M->getRawSource(), MemMoveSize) !=
722 AliasAnalysis::NoAlias)
723 return false;
724
725 DEBUG(errs() << "MemCpyOpt: Optimizing memmove -> memcpy: " << *M << "\n");
726
727 // If not, then we know we can transform this.
728 Module *Mod = M->getParent()->getParent()->getParent();
729 const Type *Ty = M->getLength()->getType();
730 M->setOperand(0, Intrinsic::getDeclaration(Mod, Intrinsic::memcpy, &Ty, 1));
731
732 // MemDep may have over conservative information about this instruction, just
733 // conservatively flush it from the cache.
734 getAnalysis().removeInstruction(M);
735 return true;
736 }
737
709738
710739 // MemCpyOpt::iterateOnFunction - Executes one iteration of GVN.
711740 bool MemCpyOpt::iterateOnFunction(Function &F) {
722751 MadeChange |= processStore(SI, BI);
723752 else if (MemCpyInst *M = dyn_cast(I))
724753 MadeChange |= processMemCpy(M);
754 else if (MemMoveInst *M = dyn_cast(I)) {
755 if (processMemMove(M)) {
756 --BI; // Reprocess the new memcpy.
757 MadeChange = true;
758 }
759 }
725760 }
726761 }
727762
+0
-107
test/Transforms/MemCpyOpt/2008-06-01-MemCpy-MemMove.ll less more
None ; RUN: llvm-as < %s | opt -memcpyopt | llvm-dis | grep {call.*memmove.*arg1.*}
1 ; PR2401
2
3 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
4 target triple = "i686-pc-linux-gnu"
5 %struct.Info = type <{ i32, i32, i8*, i8*, i8*, [32 x i8*], i32, [32 x i32], i32, i32, i32, [32 x i32] }>
6 %struct.S98 = type <{ [31 x double] }>
7 %struct._IO_FILE = type <{ i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i32, i32, [40 x i8] }>
8 %struct._IO_marker = type <{ %struct._IO_marker*, %struct._IO_FILE*, i32 }>
9 %struct.anon = type <{ }>
10 %union.anon = type { }
11 @info = common global %struct.Info zeroinitializer, align 4 ; <%struct.Info*> [#uses=13]
12 @fails = common global i32 0, align 4 ; [#uses=37]
13 @s98 = common global %struct.S98 zeroinitializer, align 4 ; <%struct.S98*> [#uses=2]
14 @a98 = common global [5 x %struct.S98] zeroinitializer, align 4 ; <[5 x %struct.S98]*> [#uses=5]
15 @stdout = external global %struct._IO_FILE* ; <%struct._IO_FILE**> [#uses=1]
16
17 declare void @llvm.memmove.i32(i8*, i8*, i32, i32) nounwind
18
19 define void @test98() nounwind {
20 entry:
21 %arg = alloca %struct.S98, align 8 ; <%struct.S98*> [#uses=2]
22 %tmp13 = alloca %struct.S98 ; <%struct.S98*> [#uses=2]
23 %tmp14 = alloca %struct.S98 ; <%struct.S98*> [#uses=2]
24 %tmp15 = alloca %struct.S98 ; <%struct.S98*> [#uses=2]
25 %tmp17 = alloca %struct.S98 ; <%struct.S98*> [#uses=2]
26 %tmp21 = alloca %struct.S98 ; <%struct.S98*> [#uses=0]
27 %tmp23 = alloca %struct.S98 ; <%struct.S98*> [#uses=0]
28 %tmp25 = alloca %struct.S98 ; <%struct.S98*> [#uses=0]
29 %tmp27 = alloca %struct.S98 ; <%struct.S98*> [#uses=0]
30 %tmp29 = alloca %struct.S98 ; <%struct.S98*> [#uses=0]
31 %tmp31 = alloca %struct.S98 ; <%struct.S98*> [#uses=0]
32 %tmp33 = alloca %struct.S98 ; <%struct.S98*> [#uses=0]
33 call void @llvm.memset.i32( i8* bitcast (%struct.S98* @s98 to i8*), i8 0, i32 248, i32 4 )
34 call void @llvm.memset.i32( i8* bitcast ([5 x %struct.S98]* @a98 to i8*), i8 0, i32 1240, i32 4 )
35 call void @llvm.memset.i32( i8* bitcast (%struct.Info* @info to i8*), i8 0, i32 420, i32 4 )
36 store i8* bitcast (%struct.S98* @s98 to i8*), i8** getelementptr (%struct.Info* @info, i32 0, i32 2)
37 store i8* bitcast ([5 x %struct.S98]* @a98 to i8*), i8** getelementptr (%struct.Info* @info, i32 0, i32 3)
38 store i8* bitcast (%struct.S98* getelementptr ([5 x %struct.S98]* @a98, i32 0, i32 3) to i8*), i8** getelementptr (%struct.Info* @info, i32 0, i32 4)
39 store i32 248, i32* getelementptr (%struct.Info* @info, i32 0, i32 6)
40 store i32 4, i32* getelementptr (%struct.Info* @info, i32 0, i32 8)
41 store i32 4, i32* getelementptr (%struct.Info* @info, i32 0, i32 9)
42 store i32 4, i32* getelementptr (%struct.Info* @info, i32 0, i32 10)
43 %tmp = load i32* getelementptr (%struct.Info* @info, i32 0, i32 8) ; [#uses=1]
44 %sub = add i32 %tmp, -1 ; [#uses=1]
45 %and = and i32 %sub, ptrtoint (%struct.S98* getelementptr ([5 x %struct.S98]* @a98, i32 0, i32 3) to i32) ; [#uses=1]
46 %tobool = icmp eq i32 %and, 0 ; [#uses=1]
47 br i1 %tobool, label %ifend, label %ifthen
48
49 ifthen: ; preds = %entry
50 %tmp3 = load i32* @fails ; [#uses=1]
51 %inc = add i32 %tmp3, 1 ; [#uses=1]
52 store i32 %inc, i32* @fails
53 br label %ifend
54
55 ifend: ; preds = %ifthen, %entry
56 store i8* bitcast (double* getelementptr (%struct.S98* @s98, i32 0, i32 0, i32 18) to i8*), i8** getelementptr (%struct.Info* @info, i32 0, i32 5, i32 0)
57 store i32 8, i32* getelementptr (%struct.Info* @info, i32 0, i32 7, i32 0)
58 store i32 4, i32* getelementptr (%struct.Info* @info, i32 0, i32 11, i32 0)
59 store double 0xC1075E4620000000, double* getelementptr (%struct.S98* @s98, i32 0, i32 0, i32 18)
60 store double 0x410CD219E0000000, double* getelementptr ([5 x %struct.S98]* @a98, i32 0, i32 2, i32 0, i32 18)
61 store i32 1, i32* getelementptr (%struct.Info* @info, i32 0, i32 0)
62 store i32 0, i32* getelementptr (%struct.Info* @info, i32 0, i32 1)
63 %tmp16 = bitcast %struct.S98* %tmp15 to i8* ; [#uses=1]
64 call void @llvm.memmove.i32( i8* %tmp16, i8* bitcast (%struct.S98* @s98 to i8*), i32 248, i32 4 )
65 %tmp18 = bitcast %struct.S98* %tmp17 to i8* ; [#uses=1]
66 call void @llvm.memmove.i32( i8* %tmp18, i8* bitcast (%struct.S98* getelementptr ([5 x %struct.S98]* @a98, i32 0, i32 2) to i8*), i32 248, i32 4 )
67 call void @check98( %struct.S98* sret %tmp14, %struct.S98* byval %tmp15, %struct.S98* getelementptr ([5 x %struct.S98]* @a98, i32 0, i32 1), %struct.S98* byval %tmp17 )
68 %tmp19 = bitcast %struct.S98* %tmp13 to i8* ; [#uses=1]
69 %tmp20 = bitcast %struct.S98* %tmp14 to i8* ; [#uses=1]
70 call void @llvm.memmove.i32( i8* %tmp19, i8* %tmp20, i32 248, i32 8 )
71 %tmp1 = bitcast %struct.S98* %arg to i8* ; [#uses=1]
72 %tmp2 = bitcast %struct.S98* %tmp13 to i8* ; [#uses=1]
73 call void @llvm.memcpy.i64( i8* %tmp1, i8* %tmp2, i64 248, i32 8 )
74 %arrayidx.i = getelementptr %struct.S98* %arg, i32 0, i32 0, i32 18 ; [#uses=1]
75 %tmp1.i = load double* %arrayidx.i, align 8 ; [#uses=1]
76 %tmp2.i = load double* getelementptr (%struct.S98* @s98, i32 0, i32 0, i32 18) ; [#uses=1]
77 %cmp.i = fcmp une double %tmp1.i, %tmp2.i ; [#uses=1]
78 br i1 %cmp.i, label %ifthen.i, label %checkx98.exit
79
80 ifthen.i: ; preds = %ifend
81 %tmp3.i = load i32* @fails ; [#uses=1]
82 %inc.i = add i32 %tmp3.i, 1 ; [#uses=1]
83 store i32 %inc.i, i32* @fails
84 br label %checkx98.exit
85
86 checkx98.exit: ; preds = %ifthen.i, %ifend
87 ret void
88 }
89
90 declare void @check98(%struct.S98* sret %agg.result, %struct.S98* byval %arg0, %struct.S98* %arg1, %struct.S98* byval %arg2) nounwind
91
92 declare void @llvm.va_start(i8*) nounwind
93
94 declare void @llvm.va_end(i8*) nounwind
95
96 declare i32 @main() noreturn
97
98 declare i32 @fflush(%struct._IO_FILE*)
99
100 declare void @abort() noreturn nounwind
101
102 declare void @exit(i32) noreturn nounwind
103
104 declare void @llvm.memset.i32(i8*, i8, i32, i32) nounwind
105
106 declare void @llvm.memcpy.i64(i8*, i8*, i64, i32) nounwind
0 ; RUN: llvm-as < %s | opt -memcpyopt | llvm-dis | FileCheck %s
1 ; These memmoves should get optimized to memcpys.
2
3 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
4 target triple = "x86_64-apple-darwin9.0"
5
6 declare void @llvm.memmove.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
7
8 define i8* @test1(i8* nocapture %src) nounwind {
9 entry:
10 ; CHECK: @test1
11 ; CHECK: call void @llvm.memcpy
12
13 %call3 = malloc [13 x i8] ; <[13 x i8]*> [#uses=1]
14 %call3.sub = getelementptr inbounds [13 x i8]* %call3, i64 0, i64 0 ; [#uses=2]
15 tail call void @llvm.memmove.i64(i8* %call3.sub, i8* %src, i64 13, i32 1)
16 ret i8* %call3.sub
17 }
18
19 define void @test2(i8* %P) nounwind {
20 entry:
21 ; CHECK: @test2
22 ; CHECK: call void @llvm.memcpy
23 %add.ptr = getelementptr i8* %P, i64 16 ; [#uses=1]
24 tail call void @llvm.memmove.i64(i8* %P, i8* %add.ptr, i64 16, i32 1)
25 ret void
26 }
27
28 ; This cannot be optimize because the src/dst really do overlap.
29 define void @test3(i8* %P) nounwind {
30 entry:
31 ; CHECK: @test3
32 ; CHECK: call void @llvm.memmove
33 %add.ptr = getelementptr i8* %P, i64 16 ; [#uses=1]
34 tail call void @llvm.memmove.i64(i8* %P, i8* %add.ptr, i64 17, i32 1)
35 ret void
36 }