llvm.org GIT mirror llvm / 8fa8929
With this patch, the LowerGC transformation becomes the ShadowStackCollector, which additionally has reduced overhead with no sacrifice in portability. Considering a function @fun with 8 loop-local roots, ShadowStackCollector introduces the following overhead (x86): ; shadowstack prologue movl L_llvm_gc_root_chain$non_lazy_ptr, %eax movl (%eax), %ecx movl $___gc_fun, 20(%esp) movl $0, 24(%esp) movl $0, 28(%esp) movl $0, 32(%esp) movl $0, 36(%esp) movl $0, 40(%esp) movl $0, 44(%esp) movl $0, 48(%esp) movl $0, 52(%esp) movl %ecx, 16(%esp) leal 16(%esp), %ecx movl %ecx, (%eax) ; shadowstack loop overhead (none) ; shadowstack epilogue movl 48(%esp), %edx movl %edx, (%ecx) ; shadowstack metadata .align 3 ___gc_fun: # __gc_fun .long 8 .space 4 In comparison to LowerGC: ; lowergc prologue movl L_llvm_gc_root_chain$non_lazy_ptr, %eax movl (%eax), %ecx movl %ecx, 48(%esp) movl $8, 52(%esp) movl $0, 60(%esp) movl $0, 56(%esp) movl $0, 68(%esp) movl $0, 64(%esp) movl $0, 76(%esp) movl $0, 72(%esp) movl $0, 84(%esp) movl $0, 80(%esp) movl $0, 92(%esp) movl $0, 88(%esp) movl $0, 100(%esp) movl $0, 96(%esp) movl $0, 108(%esp) movl $0, 104(%esp) movl $0, 116(%esp) movl $0, 112(%esp) ; lowergc loop overhead leal 44(%esp), %eax movl %eax, 56(%esp) leal 40(%esp), %eax movl %eax, 64(%esp) leal 36(%esp), %eax movl %eax, 72(%esp) leal 32(%esp), %eax movl %eax, 80(%esp) leal 28(%esp), %eax movl %eax, 88(%esp) leal 24(%esp), %eax movl %eax, 96(%esp) leal 20(%esp), %eax movl %eax, 104(%esp) leal 16(%esp), %eax movl %eax, 112(%esp) ; lowergc epilogue movl 48(%esp), %edx movl %edx, (%ecx) ; lowergc metadata (none) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@45670 91177308-0d34-0410-b5e6-96231b3b80d8 Gordon Henriksen 11 years ago
7 changed file(s) with 477 addition(s) and 372 deletion(s). Raw diff Collapse all Expand all
1616
1717 #include "llvm/CodeGen/Passes.h"
1818 #include "llvm/CodeGen/ScheduleDAG.h"
19 #include "llvm/CodeGen/Collectors.h"
1920
2021 namespace {
2122 struct ForceCodegenLinking {
3435
3536 (void) llvm::createSimpleRegisterCoalescer();
3637
38 (void) llvm::createShadowStackCollector();
39
3740 (void) llvm::createBURRListDAGScheduler(NULL, NULL, NULL);
3841 (void) llvm::createTDRRListDAGScheduler(NULL, NULL, NULL);
3942 (void) llvm::createTDListDAGScheduler(NULL, NULL, NULL);
8383 (void) llvm::createLoopRotatePass();
8484 (void) llvm::createLoopIndexSplitPass();
8585 (void) llvm::createLowerAllocationsPass();
86 (void) llvm::createLowerGCPass();
8786 (void) llvm::createLowerInvokePass();
8887 (void) llvm::createLowerPackedPass();
8988 (void) llvm::createLowerSelectPass();
295295
296296 //===----------------------------------------------------------------------===//
297297 //
298 // LowerGCPass - This function returns an instance of the "lowergc" pass, which
299 // lowers garbage collection intrinsics to normal LLVM code.
300 //
301 FunctionPass *createLowerGCPass();
302
303 //===----------------------------------------------------------------------===//
304 //
305298 // BlockPlacement - This pass reorders basic blocks in order to increase the
306299 // number of fall-through conditional branches.
307300 //
0 //===-- ShadowStackCollector.cpp - GC support for uncooperative targets ---===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements lowering for the llvm.gc* intrinsics for targets that do
10 // not natively support them (which includes the C backend). Note that the code
11 // generated is not quite as efficient as collectors which generate stack maps
12 // to identify roots.
13 //
14 // This pass implements the code transformation described in this paper:
15 // "Accurate Garbage Collection in an Uncooperative Environment"
16 // Fergus Henderson, ISMM, 2002
17 //
18 // In runtime/GC/SemiSpace.cpp is a prototype runtime which is compatible with
19 // this collector.
20 //
21 // In order to support this particular transformation, all stack roots are
22 // coallocated in the stack. This allows a fully target-independent stack map
23 // while introducing only minor runtime overhead.
24 //
25 //===----------------------------------------------------------------------===//
26
27 #define DEBUG_TYPE "shadowstackgc"
28 #include "llvm/CodeGen/Collectors.h"
29 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/ADT/StringExtras.h"
31 #include "llvm/CodeGen/Collector.h"
32 #include "llvm/Constants.h"
33 #include "llvm/DerivedTypes.h"
34 #include "llvm/Instructions.h"
35 #include "llvm/IntrinsicInst.h"
36 #include "llvm/Module.h"
37 #include "llvm/Pass.h"
38 #include "llvm/Support/Compiler.h"
39 #include "llvm/Support/LLVMBuilder.h"
40 #include "llvm/Analysis/Verifier.h"
41 #include
42
43 using namespace llvm;
44
45 namespace {
46
47 class VISIBILITY_HIDDEN ShadowStackCollector : public Collector {
48 /// RootChain - This is the global linked-list that contains the chain of GC
49 /// roots.
50 GlobalVariable *Head;
51
52 /// StackEntryTy - Abstract type of a link in the shadow stack.
53 ///
54 const StructType *StackEntryTy;
55
56 /// Roots - GC roots in the current function. Each is a pair of the
57 /// intrinsic call and its corresponding alloca.
58 std::vector > Roots;
59
60 public:
61 ShadowStackCollector();
62
63 bool initializeCustomLowering(Module &M);
64 bool performCustomLowering(Function &F);
65
66 private:
67 bool IsNullValue(Value *V);
68 Constant *GetFrameMap(Function &F);
69 const Type* GetConcreteStackEntryType(Function &F);
70 void CollectRoots(Function &F);
71 static GetElementPtrInst *CreateGEP(LLVMBuilder &B, Value *BasePtr,
72 int Idx1, const char *Name);
73 static GetElementPtrInst *CreateGEP(LLVMBuilder &B, Value *BasePtr,
74 int Idx1, int Idx2, const char *Name);
75 };
76
77 CollectorRegistry::Add
78 Y("shadow-stack",
79 "Very portable collector for uncooperative code generators");
80
81 /// EscapeEnumerator - This is a little algorithm to find all escape points
82 /// from a function so that "finally"-style code can be inserted. In addition
83 /// to finding the existing return and unwind instructions, it also (if
84 /// necessary) transforms any call instructions into invokes and sends them to
85 /// a landing pad.
86 ///
87 /// It's wrapped up in a state machine using the same transform C# uses for
88 /// 'yield return' enumerators, This transform allows it to be non-allocating.
89 class VISIBILITY_HIDDEN EscapeEnumerator {
90 Function &F;
91 const char *CleanupBBName;
92
93 // State.
94 int State;
95 Function::iterator StateBB, StateE;
96 LLVMBuilder Builder;
97
98 public:
99 EscapeEnumerator(Function &F, const char *N = "cleanup")
100 : F(F), CleanupBBName(N), State(0) {}
101
102 LLVMBuilder *Next() {
103 switch (State) {
104 default:
105 return 0;
106
107 case 0:
108 StateBB = F.begin();
109 StateE = F.end();
110 State = 1;
111
112 case 1:
113 // Find all 'return' and 'unwind' instructions.
114 while (StateBB != StateE) {
115 BasicBlock *CurBB = StateBB++;
116
117 // Branches and invokes do not escape, only unwind and return do.
118 TerminatorInst *TI = CurBB->getTerminator();
119 if (!isa(TI) && !isa(TI))
120 continue;
121
122 Builder.SetInsertPoint(TI->getParent(), TI);
123 return &Builder;
124 }
125
126 State = 2;
127
128 // Find all 'call' instructions.
129 SmallVector Calls;
130 for (Function::iterator BB = F.begin(),
131 E = F.end(); BB != E; ++BB)
132 for (BasicBlock::iterator II = BB->begin(),
133 EE = BB->end(); II != EE; ++II)
134 if (CallInst *CI = dyn_cast(II))
135 if (!CI->getCalledFunction() ||
136 !CI->getCalledFunction()->getIntrinsicID())
137 Calls.push_back(CI);
138
139 if (Calls.empty())
140 return 0;
141
142 // Create a cleanup block.
143 BasicBlock *CleanupBB = new BasicBlock(CleanupBBName, &F);
144 UnwindInst *UI = new UnwindInst(CleanupBB);
145
146 // Transform the 'call' instructions into 'invoke's branching to the
147 // cleanup block. Go in reverse order to make prettier BB names.
148 SmallVector Args;
149 for (unsigned I = Calls.size(); I != 0; ) {
150 CallInst *CI = cast(Calls[--I]);
151
152 // Split the basic block containing the function call.
153 BasicBlock *CallBB = CI->getParent();
154 BasicBlock *NewBB =
155 CallBB->splitBasicBlock(CI, CallBB->getName() + ".cont");
156
157 // Remove the unconditional branch inserted at the end of CallBB.
158 CallBB->getInstList().pop_back();
159 NewBB->getInstList().remove(CI);
160
161 // Create a new invoke instruction.
162 Args.clear();
163 Args.append(CI->op_begin() + 1, CI->op_end());
164
165 InvokeInst *II = new InvokeInst(CI->getOperand(0),
166 NewBB, CleanupBB,
167 Args.begin(), Args.end(),
168 CI->getName(), CallBB);
169 II->setCallingConv(CI->getCallingConv());
170 II->setParamAttrs(CI->getParamAttrs());
171 CI->replaceAllUsesWith(II);
172 delete CI;
173 }
174
175 Builder.SetInsertPoint(UI->getParent(), UI);
176 return &Builder;
177 }
178 }
179 };
180
181 }
182
183 // -----------------------------------------------------------------------------
184
185 Collector *llvm::createShadowStackCollector() {
186 return new ShadowStackCollector();
187 }
188
189 ShadowStackCollector::ShadowStackCollector() : Head(0), StackEntryTy(0) {
190 InitRoots = true;
191 CustomRoots = true;
192 }
193
194 Constant *ShadowStackCollector::GetFrameMap(Function &F) {
195 // doInitialization creates the abstract type of this value.
196
197 Type *VoidPtr = PointerType::getUnqual(Type::Int8Ty);
198
199 // Truncate the ShadowStackDescriptor if some metadata is null.
200 unsigned NumMeta = 0;
201 SmallVector Metadata;
202 for (unsigned I = 0; I != Roots.size(); ++I) {
203 Constant *C = cast(Roots[I].first->getOperand(2));
204 if (!C->isNullValue())
205 NumMeta = I + 1;
206 Metadata.push_back(ConstantExpr::getBitCast(C, VoidPtr));
207 }
208
209 Constant *BaseElts[] = {
210 ConstantInt::get(Type::Int32Ty, Roots.size(), false),
211 ConstantInt::get(Type::Int32Ty, NumMeta, false),
212 };
213
214 Constant *DescriptorElts[] = {
215 ConstantStruct::get(BaseElts, 2),
216 ConstantArray::get(ArrayType::get(VoidPtr, NumMeta),
217 Metadata.begin(), NumMeta)
218 };
219
220 Constant *FrameMap = ConstantStruct::get(DescriptorElts, 2);
221
222 std::string TypeName("gc_map.");
223 TypeName += utostr(NumMeta);
224 F.getParent()->addTypeName(TypeName, FrameMap->getType());
225
226 // FIXME: Is this actually dangerous as WritingAnLLVMPass.html claims? Seems
227 // that, short of multithreaded LLVM, it should be safe; all that is
228 // necessary is that a simple Module::iterator loop not be invalidated.
229 // Appending to the GlobalVariable list is safe in that sense.
230 //
231 // All of the output passes emit globals last. The ExecutionEngine
232 // explicitly supports adding globals to the module after
233 // initialization.
234 //
235 // Still, if it isn't deemed acceptable, then this transformation needs
236 // to be a ModulePass (which means it cannot be in the 'llc' pipeline
237 // (which uses a FunctionPassManager (which segfaults (not asserts) if
238 // provided a ModulePass))).
239 Constant *GV = new GlobalVariable(FrameMap->getType(), true,
240 GlobalVariable::InternalLinkage,
241 FrameMap, "__gc_" + F.getName(),
242 F.getParent());
243
244 Constant *GEPIndices[2] = { ConstantInt::get(Type::Int32Ty, 0),
245 ConstantInt::get(Type::Int32Ty, 0) };
246 return ConstantExpr::getGetElementPtr(GV, GEPIndices, 2);
247 }
248
249 const Type* ShadowStackCollector::GetConcreteStackEntryType(Function &F) {
250 // doInitialization creates the generic version of this type.
251 std::vector EltTys;
252 EltTys.push_back(StackEntryTy);
253 for (size_t I = 0; I != Roots.size(); I++)
254 EltTys.push_back(Roots[I].second->getAllocatedType());
255 Type *Ty = StructType::get(EltTys);
256
257 std::string TypeName("gc_stackentry.");
258 TypeName += F.getName();
259 F.getParent()->addTypeName(TypeName, Ty);
260
261 return Ty;
262 }
263
264 /// doInitialization - If this module uses the GC intrinsics, find them now. If
265 /// not, exit fast.
266 bool ShadowStackCollector::initializeCustomLowering(Module &M) {
267 // struct FrameMap {
268 // int32_t NumRoots; // Number of roots in stack frame.
269 // int32_t NumMeta; // Number of metadata descriptors. May be < NumRoots.
270 // void *Meta[]; // May be absent for roots without metadata.
271 // };
272 std::vector EltTys;
273 EltTys.push_back(Type::Int32Ty); // 32 bits is ok up to a 32GB stack frame. :)
274 EltTys.push_back(Type::Int32Ty); // Specifies length of variable length array.
275 StructType *FrameMapTy = StructType::get(EltTys);
276 M.addTypeName("gc_map", FrameMapTy);
277 PointerType *FrameMapPtrTy = PointerType::getUnqual(FrameMapTy);
278
279 // struct StackEntry {
280 // ShadowStackEntry *Next; // Caller's stack entry.
281 // FrameMap *Map; // Pointer to constant FrameMap.
282 // void *Roots[]; // Stack roots (in-place array, so we pretend).
283 // };
284 OpaqueType *RecursiveTy = OpaqueType::get();
285
286 EltTys.clear();
287 EltTys.push_back(PointerType::getUnqual(RecursiveTy));
288 EltTys.push_back(FrameMapPtrTy);
289 PATypeHolder LinkTyH = StructType::get(EltTys);
290
291 RecursiveTy->refineAbstractTypeTo(LinkTyH.get());
292 StackEntryTy = cast(LinkTyH.get());
293 const PointerType *StackEntryPtrTy = PointerType::getUnqual(StackEntryTy);
294 M.addTypeName("gc_stackentry", LinkTyH.get()); // FIXME: Is this safe from
295 // a FunctionPass?
296
297 // Get the root chain if it already exists.
298 Head = M.getGlobalVariable("llvm_gc_root_chain");
299 if (!Head) {
300 // If the root chain does not exist, insert a new one with linkonce
301 // linkage!
302 Head = new GlobalVariable(StackEntryPtrTy, false,
303 GlobalValue::LinkOnceLinkage,
304 Constant::getNullValue(StackEntryPtrTy),
305 "llvm_gc_root_chain", &M);
306 } else if (Head->hasExternalLinkage() && Head->isDeclaration()) {
307 Head->setInitializer(Constant::getNullValue(StackEntryPtrTy));
308 Head->setLinkage(GlobalValue::LinkOnceLinkage);
309 }
310
311 return true;
312 }
313
314 bool ShadowStackCollector::IsNullValue(Value *V) {
315 if (Constant *C = dyn_cast(V))
316 return C->isNullValue();
317 return false;
318 }
319
320 void ShadowStackCollector::CollectRoots(Function &F) {
321 // FIXME: Account for original alignment. Could fragment the root array.
322 // Approach 1: Null initialize empty slots at runtime. Yuck.
323 // Approach 2: Emit a map of the array instead of just a count.
324
325 assert(Roots.empty() && "Not cleaned up?");
326
327 SmallVector,16> MetaRoots;
328
329 for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
330 for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;)
331 if (IntrinsicInst *CI = dyn_cast(II++))
332 if (Function *F = CI->getCalledFunction())
333 if (F->getIntrinsicID() == Intrinsic::gcroot) {
334 std::pair Pair = std::make_pair(
335 CI, cast(
336 IntrinsicInst::StripPointerCasts(CI->getOperand(1))));
337 if (IsNullValue(CI->getOperand(2)))
338 Roots.push_back(Pair);
339 else
340 MetaRoots.push_back(Pair);
341 }
342
343 // Number roots with metadata (usually empty) at the beginning, so that the
344 // FrameMap::Meta array can be elided.
345 Roots.insert(Roots.begin(), MetaRoots.begin(), MetaRoots.end());
346 }
347
348 GetElementPtrInst *
349 ShadowStackCollector::CreateGEP(LLVMBuilder &B, Value *BasePtr,
350 int Idx, int Idx2, const char *Name) {
351 Value *Indices[] = { ConstantInt::get(Type::Int32Ty, 0),
352 ConstantInt::get(Type::Int32Ty, Idx),
353 ConstantInt::get(Type::Int32Ty, Idx2) };
354 return B.CreateGEP(BasePtr, Indices, Indices + 3, Name);
355 }
356
357 GetElementPtrInst *
358 ShadowStackCollector::CreateGEP(LLVMBuilder &B, Value *BasePtr,
359 int Idx, const char *Name) {
360 Value *Indices[] = { ConstantInt::get(Type::Int32Ty, 0),
361 ConstantInt::get(Type::Int32Ty, Idx) };
362 return B.CreateGEP(BasePtr, Indices, Indices + 2, Name);
363 }
364
365 /// runOnFunction - Insert code to maintain the shadow stack.
366 bool ShadowStackCollector::performCustomLowering(Function &F) {
367 // Find calls to llvm.gcroot.
368 CollectRoots(F);
369
370 // If there are no roots in this function, then there is no need to add a
371 // stack map entry for it.
372 if (Roots.empty())
373 return false;
374
375 // Build the constant map and figure the type of the shadow stack entry.
376 Value *FrameMap = GetFrameMap(F);
377 const Type *ConcreteStackEntryTy = GetConcreteStackEntryType(F);
378
379 // Build the shadow stack entry at the very start of the function.
380 BasicBlock::iterator IP = F.getEntryBlock().begin();
381 LLVMBuilder AtEntry(IP->getParent(), IP);
382
383 Instruction *StackEntry = AtEntry.CreateAlloca(ConcreteStackEntryTy, 0,
384 "gc_frame");
385
386 while (isa(IP)) ++IP;
387 AtEntry.SetInsertPoint(IP->getParent(), IP);
388
389 // Initialize the map pointer and load the current head of the shadow stack.
390 Instruction *CurrentHead = AtEntry.CreateLoad(Head, "gc_currhead");
391 Instruction *EntryMapPtr = CreateGEP(AtEntry, StackEntry,0,1,"gc_frame.map");
392 AtEntry.CreateStore(FrameMap, EntryMapPtr);
393
394 // After all the allocas...
395 for (unsigned I = 0, E = Roots.size(); I != E; ++I) {
396 // For each root, find the corresponding slot in the aggregate...
397 Value *SlotPtr = CreateGEP(AtEntry, StackEntry, 1 + I, "gc_root");
398
399 // And use it in lieu of the alloca.
400 AllocaInst *OriginalAlloca = Roots[I].second;
401 SlotPtr->takeName(OriginalAlloca);
402 OriginalAlloca->replaceAllUsesWith(SlotPtr);
403 }
404
405 // Move past the original stores inserted by Collector::InitRoots. This isn't
406 // really necessary (the collector would never see the intermediate state),
407 // but it's nicer not to push the half-initialized entry onto the stack.
408 while (isa(IP)) ++IP;
409 AtEntry.SetInsertPoint(IP->getParent(), IP);
410
411 // Push the entry onto the shadow stack.
412 Instruction *EntryNextPtr = CreateGEP(AtEntry,StackEntry,0,0,"gc_frame.next");
413 Instruction *NewHeadVal = CreateGEP(AtEntry,StackEntry, 0, "gc_newhead");
414 AtEntry.CreateStore(CurrentHead, EntryNextPtr);
415 AtEntry.CreateStore(NewHeadVal, Head);
416
417 // For each instruction that escapes...
418 EscapeEnumerator EE(F, "gc_cleanup");
419 while (LLVMBuilder *AtExit = EE.Next()) {
420 // Pop the entry from the shadow stack. Don't reuse CurrentHead from
421 // AtEntry, since that would make the value live for the entire function.
422 Instruction *EntryNextPtr2 = CreateGEP(*AtExit, StackEntry, 0, 0,
423 "gc_frame.next");
424 Value *SavedHead = AtExit->CreateLoad(EntryNextPtr2, "gc_savedhead");
425 AtExit->CreateStore(SavedHead, Head);
426 }
427
428 // Delete the original allocas (which are no longer used) and the intrinsic
429 // calls (which are no longer valid). Doing this last avoids invalidating
430 // iterators.
431 for (unsigned I = 0, E = Roots.size(); I != E; ++I) {
432 Roots[I].first->eraseFromParent();
433 Roots[I].second->eraseFromParent();
434 }
435
436 F.dump();
437
438 Roots.clear();
439 return true;
440 }
None //===-- LowerGC.cpp - Provide GC support for targets that don't -----------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements lowering for the llvm.gc* intrinsics for targets that do
10 // not natively support them (which includes the C backend). Note that the code
11 // generated is not as efficient as it would be for targets that natively
12 // support the GC intrinsics, but it is useful for getting new targets
13 // up-and-running quickly.
14 //
15 // This pass implements the code transformation described in this paper:
16 // "Accurate Garbage Collection in an Uncooperative Environment"
17 // Fergus Henderson, ISMM, 2002
18 //
19 //===----------------------------------------------------------------------===//
20
21 #define DEBUG_TYPE "lowergc"
22 #include "llvm/Transforms/Scalar.h"
23 #include "llvm/Constants.h"
24 #include "llvm/DerivedTypes.h"
25 #include "llvm/Instructions.h"
26 #include "llvm/Module.h"
27 #include "llvm/Pass.h"
28 #include "llvm/Support/Compiler.h"
29 #include "llvm/ADT/SmallVector.h"
30 using namespace llvm;
31
32 namespace {
33 class VISIBILITY_HIDDEN LowerGC : public FunctionPass {
34 /// GCRootInt, GCReadInt, GCWriteInt - The function prototypes for the
35 /// llvm.gcread/llvm.gcwrite/llvm.gcroot intrinsics.
36 Function *GCRootInt, *GCReadInt, *GCWriteInt;
37
38 /// GCRead/GCWrite - These are the functions provided by the garbage
39 /// collector for read/write barriers.
40 Constant *GCRead, *GCWrite;
41
42 /// RootChain - This is the global linked-list that contains the chain of GC
43 /// roots.
44 GlobalVariable *RootChain;
45
46 /// MainRootRecordType - This is the type for a function root entry if it
47 /// had zero roots.
48 const Type *MainRootRecordType;
49 public:
50 static char ID; // Pass identification, replacement for typeid
51 LowerGC() : FunctionPass((intptr_t)&ID),
52 GCRootInt(0), GCReadInt(0), GCWriteInt(0),
53 GCRead(0), GCWrite(0), RootChain(0), MainRootRecordType(0) {}
54 virtual bool doInitialization(Module &M);
55 virtual bool runOnFunction(Function &F);
56
57 private:
58 const StructType *getRootRecordType(unsigned NumRoots);
59 };
60
61 char LowerGC::ID = 0;
62 RegisterPass
63 X("lowergc", "Lower GC intrinsics, for GCless code generators");
64 }
65
66 /// createLowerGCPass - This function returns an instance of the "lowergc"
67 /// pass, which lowers garbage collection intrinsics to normal LLVM code.
68 FunctionPass *llvm::createLowerGCPass() {
69 return new LowerGC();
70 }
71
72 /// getRootRecordType - This function creates and returns the type for a root
73 /// record containing 'NumRoots' roots.
74 const StructType *LowerGC::getRootRecordType(unsigned NumRoots) {
75 // Build a struct that is a type used for meta-data/root pairs.
76 std::vector ST;
77 ST.push_back(GCRootInt->getFunctionType()->getParamType(0));
78 ST.push_back(GCRootInt->getFunctionType()->getParamType(1));
79 StructType *PairTy = StructType::get(ST);
80
81 // Build the array of pairs.
82 ArrayType *PairArrTy = ArrayType::get(PairTy, NumRoots);
83
84 // Now build the recursive list type.
85 PATypeHolder RootListH =
86 MainRootRecordType ? (Type*)MainRootRecordType : (Type*)OpaqueType::get();
87 ST.clear();
88 ST.push_back(PointerType::getUnqual(RootListH)); // Prev pointer
89 ST.push_back(Type::Int32Ty); // NumElements in array
90 ST.push_back(PairArrTy); // The pairs
91 StructType *RootList = StructType::get(ST);
92 if (MainRootRecordType)
93 return RootList;
94
95 assert(NumRoots == 0 && "The main struct type should have zero entries!");
96 cast((Type*)RootListH.get())->refineAbstractTypeTo(RootList);
97 MainRootRecordType = RootListH;
98 return cast(RootListH.get());
99 }
100
101 /// doInitialization - If this module uses the GC intrinsics, find them now. If
102 /// not, this pass does not do anything.
103 bool LowerGC::doInitialization(Module &M) {
104 GCRootInt = M.getFunction("llvm.gcroot");
105 GCReadInt = M.getFunction("llvm.gcread");
106 GCWriteInt = M.getFunction("llvm.gcwrite");
107 if (!GCRootInt && !GCReadInt && !GCWriteInt) return false;
108
109 PointerType *VoidPtr = PointerType::getUnqual(Type::Int8Ty);
110 PointerType *VoidPtrPtr = PointerType::getUnqual(VoidPtr);
111
112 // If the program is using read/write barriers, find the implementations of
113 // them from the GC runtime library.
114 if (GCReadInt) // Make: sbyte* %llvm_gc_read(sbyte**)
115 GCRead = M.getOrInsertFunction("llvm_gc_read", VoidPtr, VoidPtr, VoidPtrPtr,
116 (Type *)0);
117 if (GCWriteInt) // Make: void %llvm_gc_write(sbyte*, sbyte**)
118 GCWrite = M.getOrInsertFunction("llvm_gc_write", Type::VoidTy,
119 VoidPtr, VoidPtr, VoidPtrPtr, (Type *)0);
120
121 // If the program has GC roots, get or create the global root list.
122 if (GCRootInt) {
123 const StructType *RootListTy = getRootRecordType(0);
124 const Type *PRLTy = PointerType::getUnqual(RootListTy);
125 M.addTypeName("llvm_gc_root_ty", RootListTy);
126
127 // Get the root chain if it already exists.
128 RootChain = M.getGlobalVariable("llvm_gc_root_chain", PRLTy);
129 if (RootChain == 0) {
130 // If the root chain does not exist, insert a new one with linkonce
131 // linkage!
132 RootChain = new GlobalVariable(PRLTy, false,
133 GlobalValue::LinkOnceLinkage,
134 Constant::getNullValue(PRLTy),
135 "llvm_gc_root_chain", &M);
136 } else if (RootChain->hasExternalLinkage() && RootChain->isDeclaration()) {
137 RootChain->setInitializer(Constant::getNullValue(PRLTy));
138 RootChain->setLinkage(GlobalValue::LinkOnceLinkage);
139 }
140 }
141 return true;
142 }
143
144 /// Coerce - If the specified operand number of the specified instruction does
145 /// not have the specified type, insert a cast. Note that this only uses BitCast
146 /// because the types involved are all pointers.
147 static void Coerce(Instruction *I, unsigned OpNum, Type *Ty) {
148 if (I->getOperand(OpNum)->getType() != Ty) {
149 if (Constant *C = dyn_cast(I->getOperand(OpNum)))
150 I->setOperand(OpNum, ConstantExpr::getBitCast(C, Ty));
151 else {
152 CastInst *CI = new BitCastInst(I->getOperand(OpNum), Ty, "", I);
153 I->setOperand(OpNum, CI);
154 }
155 }
156 }
157
158 /// runOnFunction - If the program is using GC intrinsics, replace any
159 /// read/write intrinsics with the appropriate read/write barrier calls, then
160 /// inline them. Finally, build the data structures for
161 bool LowerGC::runOnFunction(Function &F) {
162 // Quick exit for programs that are not using GC mechanisms.
163 if (!GCRootInt && !GCReadInt && !GCWriteInt) return false;
164
165 PointerType *VoidPtr = PointerType::getUnqual(Type::Int8Ty);
166 PointerType *VoidPtrPtr = PointerType::getUnqual(VoidPtr);
167
168 // If there are read/write barriers in the program, perform a quick pass over
169 // the function eliminating them. While we are at it, remember where we see
170 // calls to llvm.gcroot.
171 std::vector GCRoots;
172 std::vector NormalCalls;
173
174 bool MadeChange = false;
175 for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
176 for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;)
177 if (CallInst *CI = dyn_cast(II++)) {
178 if (!CI->getCalledFunction() ||
179 !CI->getCalledFunction()->isIntrinsic())
180 NormalCalls.push_back(CI); // Remember all normal function calls.
181
182 if (Function *F = CI->getCalledFunction())
183 if (F == GCRootInt)
184 GCRoots.push_back(CI);
185 else if (F == GCReadInt || F == GCWriteInt) {
186 if (F == GCWriteInt) {
187 // Change a llvm.gcwrite call to call llvm_gc_write instead.
188 CI->setOperand(0, GCWrite);
189 // Insert casts of the operands as needed.
190 Coerce(CI, 1, VoidPtr);
191 Coerce(CI, 2, VoidPtr);
192 Coerce(CI, 3, VoidPtrPtr);
193 } else {
194 Coerce(CI, 1, VoidPtr);
195 Coerce(CI, 2, VoidPtrPtr);
196 if (CI->getType() == VoidPtr) {
197 CI->setOperand(0, GCRead);
198 } else {
199 // Create a whole new call to replace the old one.
200
201 // It sure would be nice to pass op_begin()+1,
202 // op_begin()+2 but it runs into trouble with
203 // CallInst::init's &*iterator, which requires a
204 // conversion from Use* to Value*. The conversion
205 // from Use to Value * is not useful because the
206 // memory for Value * won't be contiguous.
207 Value* Args[] = {
208 CI->getOperand(1),
209 CI->getOperand(2)
210 };
211 CallInst *NC = new CallInst(GCRead, Args, Args + 2,
212 CI->getName(), CI);
213 // These functions only deal with ptr type results so BitCast
214 // is the correct kind of cast (no-op cast).
215 Value *NV = new BitCastInst(NC, CI->getType(), "", CI);
216 CI->replaceAllUsesWith(NV);
217 BB->getInstList().erase(CI);
218 CI = NC;
219 }
220 }
221
222 MadeChange = true;
223 }
224 }
225
226 // If there are no GC roots in this function, then there is no need to create
227 // a GC list record for it.
228 if (GCRoots.empty()) return MadeChange;
229
230 // Okay, there are GC roots in this function. On entry to the function, add a
231 // record to the llvm_gc_root_chain, and remove it on exit.
232
233 // Create the alloca, and zero it out.
234 const StructType *RootListTy = getRootRecordType(GCRoots.size());
235 AllocaInst *AI = new AllocaInst(RootListTy, 0, "gcroots", F.begin()->begin());
236
237 // Insert the memset call after all of the allocas in the function.
238 BasicBlock::iterator IP = AI;
239 while (isa(IP)) ++IP;
240
241 Constant *Zero = ConstantInt::get(Type::Int32Ty, 0);
242 Constant *One = ConstantInt::get(Type::Int32Ty, 1);
243
244 Value *Idx[2] = { Zero, Zero };
245
246 // Get a pointer to the prev pointer.
247 Value *PrevPtrPtr = new GetElementPtrInst(AI, Idx, Idx + 2,
248 "prevptrptr", IP);
249
250 // Load the previous pointer.
251 Value *PrevPtr = new LoadInst(RootChain, "prevptr", IP);
252 // Store the previous pointer into the prevptrptr
253 new StoreInst(PrevPtr, PrevPtrPtr, IP);
254
255 // Set the number of elements in this record.
256 Idx[1] = One;
257 Value *NumEltsPtr = new GetElementPtrInst(AI, Idx, Idx + 2,
258 "numeltsptr", IP);
259 new StoreInst(ConstantInt::get(Type::Int32Ty, GCRoots.size()), NumEltsPtr,IP);
260
261 Value* Par[4];
262 Par[0] = Zero;
263 Par[1] = ConstantInt::get(Type::Int32Ty, 2);
264
265 const PointerType *PtrLocTy =
266 cast(GCRootInt->getFunctionType()->getParamType(0));
267 Constant *Null = ConstantPointerNull::get(PtrLocTy);
268
269 // Initialize all of the gcroot records now.
270 for (unsigned i = 0, e = GCRoots.size(); i != e; ++i) {
271 // Initialize the meta-data pointer.
272 Par[2] = ConstantInt::get(Type::Int32Ty, i);
273 Par[3] = One;
274 Value *MetaDataPtr = new GetElementPtrInst(AI, Par, Par + 4,
275 "MetaDataPtr", IP);
276 assert(isa(GCRoots[i]->getOperand(2)) && "Must be a constant");
277 new StoreInst(GCRoots[i]->getOperand(2), MetaDataPtr, IP);
278
279 // Initialize the root pointer to null on entry to the function.
280 Par[3] = Zero;
281 Value *RootPtrPtr = new GetElementPtrInst(AI, Par, Par + 4,
282 "RootEntPtr", IP);
283 new StoreInst(Null, RootPtrPtr, IP);
284
285 // Each occurrance of the llvm.gcroot intrinsic now turns into an
286 // initialization of the slot with the address.
287 new StoreInst(GCRoots[i]->getOperand(1), RootPtrPtr, GCRoots[i]);
288 }
289
290 // Now that the record is all initialized, store the pointer into the global
291 // pointer.
292 Value *C = new BitCastInst(AI, PointerType::getUnqual(MainRootRecordType), "", IP);
293 new StoreInst(C, RootChain, IP);
294
295 // Eliminate all the gcroot records now.
296 for (unsigned i = 0, e = GCRoots.size(); i != e; ++i)
297 GCRoots[i]->getParent()->getInstList().erase(GCRoots[i]);
298
299 // On exit from the function we have to remove the entry from the GC root
300 // chain. Doing this is straight-forward for return and unwind instructions:
301 // just insert the appropriate copy.
302 for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
303 if (isa(BB->getTerminator()) ||
304 isa(BB->getTerminator())) {
305 // We could reuse the PrevPtr loaded on entry to the function, but this
306 // would make the value live for the whole function, which is probably a
307 // bad idea. Just reload the value out of our stack entry.
308 PrevPtr = new LoadInst(PrevPtrPtr, "prevptr", BB->getTerminator());
309 new StoreInst(PrevPtr, RootChain, BB->getTerminator());
310 }
311
312 // If an exception is thrown from a callee we have to make sure to
313 // unconditionally take the record off the stack. For this reason, we turn
314 // all call instructions into invoke whose cleanup pops the entry off the
315 // stack. We only insert one cleanup block, which is shared by all invokes.
316 if (!NormalCalls.empty()) {
317 // Create the shared cleanup block.
318 BasicBlock *Cleanup = new BasicBlock("gc_cleanup", &F);
319 UnwindInst *UI = new UnwindInst(Cleanup);
320 PrevPtr = new LoadInst(PrevPtrPtr, "prevptr", UI);
321 new StoreInst(PrevPtr, RootChain, UI);
322
323 // Loop over all of the function calls, turning them into invokes.
324 while (!NormalCalls.empty()) {
325 CallInst *CI = NormalCalls.back();
326 BasicBlock *CBB = CI->getParent();
327 NormalCalls.pop_back();
328
329 // Split the basic block containing the function call.
330 BasicBlock *NewBB = CBB->splitBasicBlock(CI, CBB->getName()+".cont");
331
332 // Remove the unconditional branch inserted at the end of the CBB.
333 CBB->getInstList().pop_back();
334 NewBB->getInstList().remove(CI);
335
336 // Create a new invoke instruction.
337 std::vector Args(CI->op_begin()+1, CI->op_end());
338
339 Value *II = new InvokeInst(CI->getCalledValue(), NewBB, Cleanup,
340 Args.begin(), Args.end(), CI->getName(), CBB);
341 cast(II)->setCallingConv(CI->getCallingConv());
342 cast(II)->setParamAttrs(CI->getParamAttrs());
343 CI->replaceAllUsesWith(II);
344 delete CI;
345 }
346 }
347
348 return true;
349 }
9696 * FIXME: This should be in a code-generator specific library, but for now this
9797 * will work for all code generators.
9898 */
99 typedef struct GCRoot {
100 void **RootPtr;
101 void *Meta;
102 } GCRoot;
99 struct FrameMap {
100 int32_t NumRoots; // Number of roots in stack frame.
101 int32_t NumMeta; // Number of metadata descriptors. May be < NumRoots.
102 void *Meta[]; // May be absent for roots without metadata.
103 };
103104
104 typedef struct GCRoots {
105 struct GCRoots *Next;
106 unsigned NumRoots;
107 GCRoot RootRecords[];
108 } GCRoots;
109 GCRoots *llvm_gc_root_chain;
105 struct StackEntry {
106 ShadowStackEntry *Next; // Caller's stack entry.
107 const FrameMap *Map; // Pointer to constant FrameMap.
108 void *Roots[]; // Stack roots (in-place array).
109 };
110 StackEntry *llvm_gc_root_chain;
110111
111112 void llvm_cg_walk_gcroots(void (*FP)(void **Root, void *Meta)) {
112 GCRoots *R = llvm_gc_root_chain;
113 for (; R; R = R->Next) {
113 for (StackEntry *R; R; R = R->Next) {
114114 unsigned i, e;
115 for (i = 0, e = R->NumRoots; i != e; ++i)
116 FP(R->RootRecords[i].RootPtr, R->RootRecords[i].Meta);
115 for (i = 0, e = R->NumMeta; i != e; ++i)
116 FP(&R->Roots[i], R->Map->Meta[i]);
117 for (e = R->NumRoots; i != e; ++i)
118 FP(&R->Roots[i], NULL);
117119 }
118120 }
119121 /* END FIXME! */
0 ; RUN: llvm-as < %s | llc -march=x86 | \
1 ; RUN: ignore grep {movl..0} | count 0
2
3 %struct.obj = type { i8*, %struct.obj* }
4
5 declare void @g() gc "shadow-stack"
6
7 define void @f(i8* %o) gc "shadow-stack" {
8 entry:
9 %root = alloca i8*
10 call void @llvm.gcroot(i8** %root, i8* null)
11 store i8* %o, i8** %root
12 call void @g()
13 ret void
14 }
15
16 declare void @llvm.gcroot(i8**, i8*)