llvm.org GIT mirror llvm / bbbee1f
[CodeExtractor] Do not extract unsafe lifetime markers Lifetime markers which reference inputs to the extraction region are not safe to extract. Example ('rhs' will be extracted): ``` entry: +------------+ | x = alloca | | y = alloca | +------------+ / \ lhs: rhs: +-------------------+ +-------------------+ | lifetime_start(x) | | lifetime_start(x) | | use(x) | | lifetime_start(y) | | lifetime_end(x) | | use(x, y) | | lifetime_start(y) | | lifetime_end(y) | | use(y) | | lifetime_end(x) | | lifetime_end(y) | +-------------------+ +-------------------+ ``` Prior to extraction, the stack coloring pass sees that the slots for 'x' and 'y' are in-use at the same time. After extraction, the coloring pass infers that 'x' and 'y' are *not* in-use concurrently, because markers from 'rhs' are no longer available to help decide otherwise. This leads to a miscompile, because the stack slots actually are in-use concurrently in the extracted function. Fix this by moving lifetime start/end markers for memory regions defined in the calling function around the call to the extracted function. Fixes llvm.org/PR39671 (rdar://45939472). Differential Revision: https://reviews.llvm.org/D55967 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@350420 91177308-0d34-0410-b5e6-96231b3b80d8 Vedant Kumar 8 months ago
5 changed file(s) with 166 addition(s) and 16 deletion(s). Raw diff Collapse all Expand all
2626 class BlockFrequency;
2727 class BlockFrequencyInfo;
2828 class BranchProbabilityInfo;
29 class CallInst;
2930 class DominatorTree;
3031 class Function;
3132 class Instruction;
163164 DenseMap &ExitWeights,
164165 BranchProbabilityInfo *BPI);
165166
166 void emitCallAndSwitchStatement(Function *newFunction,
167 BasicBlock *newHeader,
168 ValueSet &inputs,
169 ValueSet &outputs);
167 CallInst *emitCallAndSwitchStatement(Function *newFunction,
168 BasicBlock *newHeader,
169 ValueSet &inputs, ValueSet &outputs);
170170 };
171171
172172 } // end namespace llvm
882882 /// emitCallAndSwitchStatement - This method sets up the caller side by adding
883883 /// the call instruction, splitting any PHI nodes in the header block as
884884 /// necessary.
885 void CodeExtractor::
886 emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
887 ValueSet &inputs, ValueSet &outputs) {
885 CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction,
886 BasicBlock *codeReplacer,
887 ValueSet &inputs,
888 ValueSet &outputs) {
888889 // Emit a call to the new function, passing in: *pointer to struct (if
889890 // aggregating parameters), or plan inputs and allocated memory for outputs
890891 std::vector params, StructValues, ReloadOutputs, Reloads;
892893 Module *M = newFunction->getParent();
893894 LLVMContext &Context = M->getContext();
894895 const DataLayout &DL = M->getDataLayout();
896 CallInst *call = nullptr;
895897
896898 // Add inputs as params, or to be filled into the struct
897899 for (Value *input : inputs)
942944 }
943945
944946 // Emit the call to the function
945 CallInst *call = CallInst::Create(newFunction, params,
946 NumExitBlocks > 1 ? "targetBlock" : "");
947 call = CallInst::Create(newFunction, params,
948 NumExitBlocks > 1 ? "targetBlock" : "");
947949 // Add debug location to the new call, if the original function has debug
948950 // info. In that case, the terminator of the entry block of the extracted
949951 // function contains the first debug location of the extracted function,
11151117 TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks-1));
11161118 break;
11171119 }
1120
1121 return call;
11181122 }
11191123
11201124 void CodeExtractor::moveCodeToFunction(Function *newFunction) {
11741178 TI->setMetadata(
11751179 LLVMContext::MD_prof,
11761180 MDBuilder(TI->getContext()).createBranchWeights(BranchWeights));
1181 }
1182
1183 /// Scan the extraction region for lifetime markers which reference inputs.
1184 /// Erase these markers. Return the inputs which were referenced.
1185 ///
1186 /// The extraction region is defined by a set of blocks (\p Blocks), and a set
1187 /// of allocas which will be moved from the caller function into the extracted
1188 /// function (\p SunkAllocas).
1189 static SetVector
1190 eraseLifetimeMarkersOnInputs(const SetVector &Blocks,
1191 const SetVector &SunkAllocas) {
1192 SetVector InputObjectsWithLifetime;
1193 for (BasicBlock *BB : Blocks) {
1194 for (auto It = BB->begin(), End = BB->end(); It != End;) {
1195 auto *II = dyn_cast(&*It);
1196 ++It;
1197 if (!II || !II->isLifetimeStartOrEnd())
1198 continue;
1199
1200 // Get the memory operand of the lifetime marker. If the underlying
1201 // object is a sunk alloca, or is otherwise defined in the extraction
1202 // region, the lifetime marker must not be erased.
1203 Value *Mem = II->getOperand(1)->stripInBoundsOffsets();
1204 if (SunkAllocas.count(Mem) || definedInRegion(Blocks, Mem))
1205 continue;
1206
1207 InputObjectsWithLifetime.insert(Mem);
1208 II->eraseFromParent();
1209 }
1210 }
1211 return InputObjectsWithLifetime;
1212 }
1213
1214 /// Insert lifetime start/end markers surrounding the call to the new function
1215 /// for objects defined in the caller.
1216 static void insertLifetimeMarkersSurroundingCall(
1217 Module *M, const SetVector &InputObjectsWithLifetime,
1218 CallInst *TheCall) {
1219 if (InputObjectsWithLifetime.empty())
1220 return;
1221
1222 LLVMContext &Ctx = M->getContext();
1223 auto Int8PtrTy = Type::getInt8PtrTy(Ctx);
1224 auto NegativeOne = ConstantInt::getSigned(Type::getInt64Ty(Ctx), -1);
1225 auto LifetimeStartFn = llvm::Intrinsic::getDeclaration(
1226 M, llvm::Intrinsic::lifetime_start, Int8PtrTy);
1227 auto LifetimeEndFn = llvm::Intrinsic::getDeclaration(
1228 M, llvm::Intrinsic::lifetime_end, Int8PtrTy);
1229 for (Value *Mem : InputObjectsWithLifetime) {
1230 assert((!isa(Mem) ||
1231 cast(Mem)->getFunction() == TheCall->getFunction()) &&
1232 "Input memory not defined in original function");
1233 Value *MemAsI8Ptr = nullptr;
1234 if (Mem->getType() == Int8PtrTy)
1235 MemAsI8Ptr = Mem;
1236 else
1237 MemAsI8Ptr =
1238 CastInst::CreatePointerCast(Mem, Int8PtrTy, "lt.cast", TheCall);
1239
1240 auto StartMarker =
1241 CallInst::Create(LifetimeStartFn, {NegativeOne, MemAsI8Ptr});
1242 StartMarker->insertBefore(TheCall);
1243 auto EndMarker = CallInst::Create(LifetimeEndFn, {NegativeOne, MemAsI8Ptr});
1244 EndMarker->insertAfter(TheCall);
1245 }
11771246 }
11781247
11791248 Function *CodeExtractor::extractCodeRegion() {
12901359 cast(II)->moveBefore(TI);
12911360 }
12921361
1362 // Collect objects which are inputs to the extraction region and also
1363 // referenced by lifetime start/end markers within it. The effects of these
1364 // markers must be replicated in the calling function to prevent the stack
1365 // coloring pass from merging slots which store input objects.
1366 ValueSet InputObjectsWithLifetime =
1367 eraseLifetimeMarkersOnInputs(Blocks, SinkingCands);
1368
12931369 // Construct new function based on inputs/outputs & add allocas for all defs.
1294 Function *newFunction = constructFunction(inputs, outputs, header,
1295 newFuncRoot,
1296 codeReplacer, oldFunction,
1297 oldFunction->getParent());
1370 Function *newFunction =
1371 constructFunction(inputs, outputs, header, newFuncRoot, codeReplacer,
1372 oldFunction, oldFunction->getParent());
12981373
12991374 // Update the entry count of the function.
13001375 if (BFI) {
13051380 BFI->setBlockFreq(codeReplacer, EntryFreq.getFrequency());
13061381 }
13071382
1308 emitCallAndSwitchStatement(newFunction, codeReplacer, inputs, outputs);
1383 CallInst *TheCall =
1384 emitCallAndSwitchStatement(newFunction, codeReplacer, inputs, outputs);
13091385
13101386 moveCodeToFunction(newFunction);
1387
1388 // Replicate the effects of any lifetime start/end markers which referenced
1389 // input objects in the extraction region by placing markers around the call.
1390 insertLifetimeMarkersSurroundingCall(oldFunction->getParent(),
1391 InputObjectsWithLifetime, TheCall);
13111392
13121393 // Propagate personality info to the new function if there is one.
13131394 if (oldFunction->hasPersonalityFn())
55
66 @g = external local_unnamed_addr global i32, align 4
77
8 ; CHECK-LABEL: define{{.*}}@caller(
9 ; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %tmp.i)
10 ; CHECK-NEXT: call void @callee_unknown_use1.{{.*}}(i8* %tmp.i
11 ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %tmp.i)
12
813 define i32 @callee_unknown_use1(i32 %arg) local_unnamed_addr #0 {
914 ; CHECK-LABEL:define{{.*}}@callee_unknown_use1.{{[0-9]}}
1015 ; CHECK-NOT: alloca
11 ; CHECK: call void @llvm.lifetime
1216 bb:
1317 %tmp = alloca i8, align 4
1418 %tmp2 = load i32, i32* @g, align 4, !tbaa !2
88 define i32 @callee_unknown_use2(i32 %arg) local_unnamed_addr #0 {
99 ; CHECK-LABEL:define{{.*}}@callee_unknown_use2.{{[0-9]}}
1010 ; CHECK-NOT: alloca
11 ; CHECK: call void @llvm.lifetime
1211 bb:
1312 %tmp = alloca i32, align 4
1413 %tmp1 = bitcast i32* %tmp to i8*
0 ; RUN: opt -S -hotcoldsplit < %s 2>&1 | FileCheck %s
1
2 declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
3
4 declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
5
6 declare void @use(i8*)
7
8 declare void @cold_use2(i8*, i8*) cold
9
10 ; CHECK-LABEL: define {{.*}}@foo(
11 define void @foo() {
12 entry:
13 %local1 = alloca i256
14 %local2 = alloca i256
15 %local1_cast = bitcast i256* %local1 to i8*
16 %local2_cast = bitcast i256* %local2 to i8*
17 br i1 undef, label %normalPath, label %outlinedPath
18
19 normalPath:
20 ; These two uses of stack slots are non-overlapping. Based on this alone,
21 ; the stack slots could be merged.
22 call void @llvm.lifetime.start.p0i8(i64 1, i8* %local1_cast)
23 call void @use(i8* %local1_cast)
24 call void @llvm.lifetime.end.p0i8(i64 1, i8* %local1_cast)
25 call void @llvm.lifetime.start.p0i8(i64 1, i8* %local2_cast)
26 call void @use(i8* %local2_cast)
27 call void @llvm.lifetime.end.p0i8(i64 1, i8* %local2_cast)
28 ret void
29
30 ; CHECK-LABEL: codeRepl:
31 ; CHECK: [[local1_cast:%.*]] = bitcast i256* %local1 to i8*
32 ; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[local1_cast]])
33 ; CHECK: [[local2_cast:%.*]] = bitcast i256* %local2 to i8*
34 ; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[local2_cast]])
35 ; CHECK: call i1 @foo.cold.1(i8* %local1_cast, i8* %local2_cast)
36 ; CHECK: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[local2_cast]])
37 ; CHECK: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[local1_cast]])
38 ; CHECK: br i1
39
40 outlinedPath:
41 ; These two uses of stack slots are overlapping. This should prevent
42 ; merging of stack slots. CodeExtractor must replicate the effects of
43 ; these markers in the caller to inhibit stack coloring.
44 %gep1 = getelementptr inbounds i8, i8* %local1_cast, i64 1
45 call void @llvm.lifetime.start.p0i8(i64 1, i8* %gep1)
46 call void @llvm.lifetime.start.p0i8(i64 1, i8* %local2_cast)
47 call void @cold_use2(i8* %local1_cast, i8* %local2_cast)
48 call void @llvm.lifetime.end.p0i8(i64 1, i8* %gep1)
49 call void @llvm.lifetime.end.p0i8(i64 1, i8* %local2_cast)
50 br i1 undef, label %outlinedPath2, label %outlinedPathExit
51
52 outlinedPath2:
53 ; These extra lifetime markers are used to test that we emit only one
54 ; pair of guard markers in the caller per memory object.
55 call void @llvm.lifetime.start.p0i8(i64 1, i8* %local2_cast)
56 call void @use(i8* %local2_cast)
57 call void @llvm.lifetime.end.p0i8(i64 1, i8* %local2_cast)
58 ret void
59
60 outlinedPathExit:
61 ret void
62 }
63
64 ; CHECK-LABEL: define {{.*}}@foo.cold.1(
65 ; CHECK-NOT: @llvm.lifetime