llvm.org GIT mirror llvm / c0808a7
Fix an issue where GVN was performing the return slot optimization when it was not safe. This is fixed by more aggressively checking that the return slot is not used elsewhere in the function. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@47544 91177308-0d34-0410-b5e6-96231b3b80d8 Owen Anderson 12 years ago
2 changed file(s) with 58 addition(s) and 8 deletion(s). Raw diff Collapse all Expand all
751751 bool iterateOnFunction(Function &F);
752752 Value* CollapsePhi(PHINode* p);
753753 bool isSafeReplacement(PHINode* p, Instruction* inst);
754 bool valueHasOnlyOneUseAfter(Value* val, MemCpyInst* use,
755 Instruction* cutoff);
754756 };
755757
756758 char GVN::ID = 0;
10541056 return deletedLoad;
10551057 }
10561058
1057 /// isReturnSlotOptznProfitable - Determine if performing a return slot
1058 /// fusion with the slot dest is profitable
1059 static bool isReturnSlotOptznProfitable(Value* dest, MemCpyInst* cpy) {
1060 // We currently consider it profitable if dest is otherwise dead.
1061 SmallVector useList(dest->use_begin(), dest->use_end());
1059 /// valueHasOnlyOneUse - Returns true if a value has only one use after the
1060 /// cutoff that is in the current same block and is the same as the use
1061 /// parameter.
1062 bool GVN::valueHasOnlyOneUseAfter(Value* val, MemCpyInst* use,
1063 Instruction* cutoff) {
1064 DominatorTree& DT = getAnalysis();
1065
1066 SmallVector useList(val->use_begin(), val->use_end());
10621067 while (!useList.empty()) {
10631068 User* UI = useList.back();
1069
10641070
10651071 if (isa(UI) || isa(UI)) {
10661072 useList.pop_back();
10671073 for (User::use_iterator I = UI->use_begin(), E = UI->use_end();
10681074 I != E; ++I)
10691075 useList.push_back(*I);
1070 } else if (UI == cpy)
1076 } else if (UI == use) {
10711077 useList.pop_back();
1072 else
1078 } else if (Instruction* inst = dyn_cast(UI)) {
1079 if (inst->getParent() == use->getParent() &&
1080 (inst == cutoff || !DT.dominates(cutoff, inst))) {
1081 useList.pop_back();
1082 } else
1083 return false;
1084 } else
10731085 return false;
10741086 }
10751087
11221134 if (TD.getTypeStoreSize(PT->getElementType()) != cpyLength->getZExtValue())
11231135 return false;
11241136
1137 // For safety, we must ensure that the output parameter of the call only has
1138 // a single use, the memcpy. Otherwise this can introduce an invalid
1139 // transformation.
1140 if (!valueHasOnlyOneUseAfter(CS.getArgument(0), cpy, C))
1141 return false;
1142
11251143 // We only perform the transformation if it will be profitable.
1126 if (!isReturnSlotOptznProfitable(cpyDest, cpy))
1144 if (!valueHasOnlyOneUseAfter(cpyDest, cpy, C))
11271145 return false;
11281146
11291147 // In addition to knowing that the call does not access the return slot
0 ; RUN: llvm-as < %s | opt -gvn -dse | llvm-dis | grep {call.*initialize} | grep memtmp | count 1
1 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
2 target triple = "i386-pc-linux-gnu"
3
4 define internal fastcc void @initialize({ x86_fp80, x86_fp80 }* noalias sret %agg.result) nounwind {
5 entry:
6 %agg.result.03 = getelementptr { x86_fp80, x86_fp80 }* %agg.result, i32 0, i32 0 ; [#uses=1]
7 store x86_fp80 0xK00000000000000000000, x86_fp80* %agg.result.03
8 %agg.result.15 = getelementptr { x86_fp80, x86_fp80 }* %agg.result, i32 0, i32 1 ; [#uses=1]
9 store x86_fp80 0xK00000000000000000000, x86_fp80* %agg.result.15
10 ret void
11 }
12
13 declare fastcc x86_fp80 @passed_uninitialized({ x86_fp80, x86_fp80 }* %x) nounwind
14
15 define fastcc void @badly_optimized() nounwind {
16 entry:
17 %z = alloca { x86_fp80, x86_fp80 } ; <{ x86_fp80, x86_fp80 }*> [#uses=2]
18 %tmp = alloca { x86_fp80, x86_fp80 } ; <{ x86_fp80, x86_fp80 }*> [#uses=2]
19 %memtmp = alloca { x86_fp80, x86_fp80 }, align 8 ; <{ x86_fp80, x86_fp80 }*> [#uses=2]
20 call fastcc void @initialize( { x86_fp80, x86_fp80 }* noalias sret %memtmp )
21 %tmp1 = bitcast { x86_fp80, x86_fp80 }* %tmp to i8* ; [#uses=1]
22 %memtmp2 = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8* ; [#uses=1]
23 call void @llvm.memcpy.i32( i8* %tmp1, i8* %memtmp2, i32 24, i32 8 )
24 %z3 = bitcast { x86_fp80, x86_fp80 }* %z to i8* ; [#uses=1]
25 %tmp4 = bitcast { x86_fp80, x86_fp80 }* %tmp to i8* ; [#uses=1]
26 call void @llvm.memcpy.i32( i8* %z3, i8* %tmp4, i32 24, i32 8 )
27 %tmp5 = call fastcc x86_fp80 @passed_uninitialized( { x86_fp80, x86_fp80 }* %z ) ; [#uses=0]
28 ret void
29 }
30
31 declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind