llvm.org GIT mirror llvm / 8175d01
[CodeExtractor] Emit lifetime markers around reloads of outputs CodeExtractor permits extracting a region of blocks from a function even when values defined within the region are used outside of it. This is typically done by creating an alloca in the original function and reloading the alloca after a call to the extracted function. Wrap the reload in lifetime start/end markers to promote stack coloring. Suggested by Sergei Kachkov! Differential Revision: https://reviews.llvm.org/D56045 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@351621 91177308-0d34-0410-b5e6-96231b3b80d8 Vedant Kumar 7 months ago
7 changed file(s) with 92 addition(s) and 77 deletion(s). Raw diff Collapse all Expand all
879879 return newFunction;
880880 }
881881
882 /// Scan the extraction region for lifetime markers which reference inputs.
883 /// Erase these markers. Return the inputs which were referenced.
884 ///
885 /// The extraction region is defined by a set of blocks (\p Blocks), and a set
886 /// of allocas which will be moved from the caller function into the extracted
887 /// function (\p SunkAllocas).
888 static SetVector
889 eraseLifetimeMarkersOnInputs(const SetVector &Blocks,
890 const SetVector &SunkAllocas) {
891 SetVector InputObjectsWithLifetime;
892 for (BasicBlock *BB : Blocks) {
893 for (auto It = BB->begin(), End = BB->end(); It != End;) {
894 auto *II = dyn_cast(&*It);
895 ++It;
896 if (!II || !II->isLifetimeStartOrEnd())
897 continue;
898
899 // Get the memory operand of the lifetime marker. If the underlying
900 // object is a sunk alloca, or is otherwise defined in the extraction
901 // region, the lifetime marker must not be erased.
902 Value *Mem = II->getOperand(1)->stripInBoundsOffsets();
903 if (SunkAllocas.count(Mem) || definedInRegion(Blocks, Mem))
904 continue;
905
906 InputObjectsWithLifetime.insert(Mem);
907 II->eraseFromParent();
908 }
909 }
910 return InputObjectsWithLifetime;
911 }
912
913 /// Insert lifetime start/end markers surrounding the call to the new function
914 /// for objects defined in the caller.
915 static void insertLifetimeMarkersSurroundingCall(Module *M,
916 ArrayRef Objects,
917 CallInst *TheCall) {
918 if (Objects.empty())
919 return;
920
921 LLVMContext &Ctx = M->getContext();
922 auto Int8PtrTy = Type::getInt8PtrTy(Ctx);
923 auto NegativeOne = ConstantInt::getSigned(Type::getInt64Ty(Ctx), -1);
924 auto StartFn = llvm::Intrinsic::getDeclaration(
925 M, llvm::Intrinsic::lifetime_start, Int8PtrTy);
926 auto EndFn = llvm::Intrinsic::getDeclaration(M, llvm::Intrinsic::lifetime_end,
927 Int8PtrTy);
928 Instruction *Term = TheCall->getParent()->getTerminator();
929 for (Value *Mem : Objects) {
930 assert((!isa(Mem) ||
931 cast(Mem)->getFunction() == TheCall->getFunction()) &&
932 "Input memory not defined in original function");
933 Value *MemAsI8Ptr = nullptr;
934 if (Mem->getType() == Int8PtrTy)
935 MemAsI8Ptr = Mem;
936 else
937 MemAsI8Ptr =
938 CastInst::CreatePointerCast(Mem, Int8PtrTy, "lt.cast", TheCall);
939
940 auto StartMarker = CallInst::Create(StartFn, {NegativeOne, MemAsI8Ptr});
941 StartMarker->insertBefore(TheCall);
942 auto EndMarker = CallInst::Create(EndFn, {NegativeOne, MemAsI8Ptr});
943 EndMarker->insertBefore(Term);
944 }
945 }
946
882947 /// emitCallAndSwitchStatement - This method sets up the caller side by adding
883948 /// the call instruction, splitting any PHI nodes in the header block as
884949 /// necessary.
11181183 break;
11191184 }
11201185
1186 // Insert lifetime markers around the reloads of any output values. The
1187 // allocas output values are stored in are only in-use in the codeRepl block.
1188 insertLifetimeMarkersSurroundingCall(M, ReloadOutputs, call);
1189
11211190 return call;
11221191 }
11231192
11781247 TI->setMetadata(
11791248 LLVMContext::MD_prof,
11801249 MDBuilder(TI->getContext()).createBranchWeights(BranchWeights));
1181 }
1182
1183 /// Scan the extraction region for lifetime markers which reference inputs.
1184 /// Erase these markers. Return the inputs which were referenced.
1185 ///
1186 /// The extraction region is defined by a set of blocks (\p Blocks), and a set
1187 /// of allocas which will be moved from the caller function into the extracted
1188 /// function (\p SunkAllocas).
1189 static SetVector
1190 eraseLifetimeMarkersOnInputs(const SetVector &Blocks,
1191 const SetVector &SunkAllocas) {
1192 SetVector InputObjectsWithLifetime;
1193 for (BasicBlock *BB : Blocks) {
1194 for (auto It = BB->begin(), End = BB->end(); It != End;) {
1195 auto *II = dyn_cast(&*It);
1196 ++It;
1197 if (!II || !II->isLifetimeStartOrEnd())
1198 continue;
1199
1200 // Get the memory operand of the lifetime marker. If the underlying
1201 // object is a sunk alloca, or is otherwise defined in the extraction
1202 // region, the lifetime marker must not be erased.
1203 Value *Mem = II->getOperand(1)->stripInBoundsOffsets();
1204 if (SunkAllocas.count(Mem) || definedInRegion(Blocks, Mem))
1205 continue;
1206
1207 InputObjectsWithLifetime.insert(Mem);
1208 II->eraseFromParent();
1209 }
1210 }
1211 return InputObjectsWithLifetime;
1212 }
1213
1214 /// Insert lifetime start/end markers surrounding the call to the new function
1215 /// for objects defined in the caller.
1216 static void insertLifetimeMarkersSurroundingCall(
1217 Module *M, const SetVector &InputObjectsWithLifetime,
1218 CallInst *TheCall) {
1219 if (InputObjectsWithLifetime.empty())
1220 return;
1221
1222 LLVMContext &Ctx = M->getContext();
1223 auto Int8PtrTy = Type::getInt8PtrTy(Ctx);
1224 auto NegativeOne = ConstantInt::getSigned(Type::getInt64Ty(Ctx), -1);
1225 auto LifetimeStartFn = llvm::Intrinsic::getDeclaration(
1226 M, llvm::Intrinsic::lifetime_start, Int8PtrTy);
1227 auto LifetimeEndFn = llvm::Intrinsic::getDeclaration(
1228 M, llvm::Intrinsic::lifetime_end, Int8PtrTy);
1229 for (Value *Mem : InputObjectsWithLifetime) {
1230 assert((!isa(Mem) ||
1231 cast(Mem)->getFunction() == TheCall->getFunction()) &&
1232 "Input memory not defined in original function");
1233 Value *MemAsI8Ptr = nullptr;
1234 if (Mem->getType() == Int8PtrTy)
1235 MemAsI8Ptr = Mem;
1236 else
1237 MemAsI8Ptr =
1238 CastInst::CreatePointerCast(Mem, Int8PtrTy, "lt.cast", TheCall);
1239
1240 auto StartMarker =
1241 CallInst::Create(LifetimeStartFn, {NegativeOne, MemAsI8Ptr});
1242 StartMarker->insertBefore(TheCall);
1243 auto EndMarker = CallInst::Create(LifetimeEndFn, {NegativeOne, MemAsI8Ptr});
1244 EndMarker->insertAfter(TheCall);
1245 }
12461250 }
12471251
12481252 Function *CodeExtractor::extractCodeRegion() {
13881392 // Replicate the effects of any lifetime start/end markers which referenced
13891393 // input objects in the extraction region by placing markers around the call.
13901394 insertLifetimeMarkersSurroundingCall(oldFunction->getParent(),
1391 InputObjectsWithLifetime, TheCall);
1395 InputObjectsWithLifetime.getArrayRef(),
1396 TheCall);
13921397
13931398 // Propagate personality info to the new function if there is one.
13941399 if (oldFunction->hasPersonalityFn())
2222
2323 ; CHECK-LABEL: @caller
2424 ; CHECK: codeRepl.i:
25 ; CHECK-NEXT: call void @callee.2.if.then(i32 %v, i32* %mul.loc.i), !dbg ![[DBG2:[0-9]+]]
25 ; CHECK-NOT: br label
26 ; CHECK: call void @callee.2.if.then(i32 %v, i32* %mul.loc.i), !dbg ![[DBG2:[0-9]+]]
2627 define i32 @caller(i32 %v) !dbg !8 {
2728 entry:
2829 %call = call i32 @callee(i32 %v), !dbg !14
5253
5354 ; CHECK-LABEL: @caller2
5455 ; CHECK: codeRepl.i:
55 ; CHECK-NEXT: call void @callee2.1.if.then(i32 %v, i32* %sub.loc.i), !dbg ![[DBG4:[0-9]+]]
56 ; CHECK-NOT: br label
57 ; CHECK: call void @callee2.1.if.then(i32 %v, i32* %sub.loc.i), !dbg ![[DBG4:[0-9]+]]
5658 define i32 @caller2(i32 %v) !dbg !21 {
5759 entry:
5860 %call = call i32 @callee2(i32 %v), !dbg !22
2525 ; CHECK-LABEL: bb:
2626 ; CHECK-NEXT: [[CALL26LOC:%.*]] = alloca i8*
2727 ; CHECK-LABEL: codeRepl.i:
28 ; CHECK-NEXT: %lt.cast.i = bitcast i8** [[CALL26LOC]] to i8*
29 ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %lt.cast.i)
2830 ; CHECK-NEXT: call void @bar.1.bb1(i8** [[CALL26LOC]])
31 ; CHECK-NEXT: %call26.reload.i = load i8*, i8** [[CALL26LOC]]
32 ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %lt.cast.i)
2933 define i8* @dummy_caller(i32 %arg) {
3034 bb:
3135 %tmp = tail call i8* @bar(i32 %arg)
1818
1919 ; CHECK-LABEL: @caller
2020 ; CHECK: codeRepl.i:
21 ; CHECK-NEXT: call void (i32, i32*, ...) @callee.1.if.then(i32 %v, i32* %mul.loc.i, i32 99), !dbg ![[DBG2:[0-9]+]]
21 ; CHECK-NOT: br label
22 ; CHECK: call void (i32, i32*, ...) @callee.1.if.then(i32 %v, i32* %mul.loc.i, i32 99), !dbg ![[DBG2:[0-9]+]]
2223 define i32 @caller(i32 %v) !dbg !8 {
2324 entry:
2425 %call = call i32 (i32, ...) @callee(i32 %v, i32 99), !dbg !14
1414 ; CHECK: call {{.*}}@sideeffect(
1515 ; CHECK: call {{.*}}@realloc(
1616 ; CHECK-LABEL: codeRepl:
17 ; CHECK-NEXT: call {{.*}}@realloc2.cold.1(i64 %size, i8* %ptr, i8** %retval.0.ce.loc)
17 ; CHECK: call {{.*}}@realloc2.cold.1(i64 %size, i8* %ptr, i8** %retval.0.ce.loc)
1818 ; CHECK-LABEL: cleanup:
1919 ; CHECK-NEXT: phi i8* [ null, %if.then ], [ %call, %if.end ], [ %retval.0.ce.reload, %codeRepl ]
2020 define i8* @realloc2(i8* %ptr, i64 %size) {
2929
3030 ; CHECK-LABEL: codeRepl:
3131 ; CHECK: [[local1_cast:%.*]] = bitcast i256* %local1 to i8*
32 ; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[local1_cast]])
33 ; CHECK: [[local2_cast:%.*]] = bitcast i256* %local2 to i8*
34 ; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[local2_cast]])
35 ; CHECK: call i1 @foo.cold.1(i8* %local1_cast, i8* %local2_cast)
36 ; CHECK: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[local2_cast]])
37 ; CHECK: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[local1_cast]])
38 ; CHECK: br i1
32 ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[local1_cast]])
33 ; CHECK-NEXT: [[local2_cast:%.*]] = bitcast i256* %local2 to i8*
34 ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[local2_cast]])
35 ; CHECK-NEXT: call i1 @foo.cold.1(i8* %local1_cast, i8* %local2_cast)
36 ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[local1_cast]])
37 ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[local2_cast]])
38 ; CHECK-NEXT: br i1
3939
4040 outlinedPath:
4141 ; These two uses of stack slots are overlapping. This should prevent
1111 ; CHECK-NEXT: ]
1212 ;
1313 ; CHECK: codeRepl:
14 ; CHECK-NEXT: bitcast
15 ; CHECK-NEXT: lifetime.start
1416 ; CHECK-NEXT: call void @pluto.cold.1(i1* %tmp8.ce.loc)
1517 ; CHECK-NEXT: %tmp8.ce.reload = load i1, i1* %tmp8.ce.loc
18 ; CHECK-NEXT: lifetime.end
1619 ; CHECK-NEXT: br label %bb7
1720 ;
1821 ; CHECK: bb7: