llvm.org GIT mirror llvm / 2838831
Big Kaleidoscope tutorial update. This commit switches the underlying JIT for the Kaleidoscope tutorials from MCJIT to a custom ORC-based JIT, KaleidoscopeJIT. This fixes a lot of the bugs in Kaleidoscope that were introduced when we deleted the legacy JIT. The documentation for Chapter 4, which introduces the JIT APIs, is updated to reflect the change. Also included are a number of C++11 modernizations and general cleanup. Where appropriate, the docs have been updated to reflect these changes too. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@246002 91177308-0d34-0410-b5e6-96231b3b80d8 Lang Hames 4 years ago
15 changed file(s) with 1185 addition(s) and 1294 deletion(s). Raw diff Collapse all Expand all
8484 /// CallExprAST - Expression class for function calls.
8585 class CallExprAST : public ExprAST {
8686 std::string Callee;
87 std::vector<ExprAST*> Args;
87 std::vector<std::unique_ptr> Args;
8888
8989 public:
9090 CallExprAST(const std::string &Callee,
1414 well as demonstrate how easy it is to use. It's much more work to build
1515 a lexer and parser than it is to generate LLVM IR code. :)
1616
17 **Please note**: the code in this chapter and later require LLVM 2.2 or
18 later. LLVM 2.1 and before will not work with it. Also note that you
17 **Please note**: the code in this chapter and later require LLVM 3.7 or
18 later. LLVM 3.6 and before will not work with it. Also note that you
1919 need to use a version of this tutorial that matches your LLVM release:
2020 If you are using an official LLVM release, use the version of the
2121 documentation included with your release or on the `llvm.org releases
3434 class ExprAST {
3535 public:
3636 virtual ~ExprAST() {}
37 virtual Value *Codegen() = 0;
37 virtual Value *codegen() = 0;
3838 };
3939
4040 /// NumberExprAST - Expression class for numeric literals like "1.0".
4343
4444 public:
4545 NumberExprAST(double Val) : Val(Val) {}
46 virtual Value *Codegen();
46 virtual Value *codegen();
4747 };
4848 ...
4949
50 The Codegen() method says to emit IR for that AST node along with all
50 The codegen() method says to emit IR for that AST node along with all
5151 the things it depends on, and they all return an LLVM Value object.
5252 "Value" is the class used to represent a "`Static Single Assignment
5353 (SSA) `_
7272
7373 .. code-block:: c++
7474
75 static std::unique_ptr *TheModule;
76 static IRBuilder<> Builder(getGlobalContext());
77 static std::map NamedValues;
78
7579 Value *ErrorV(const char *Str) {
7680 Error(Str);
7781 return nullptr;
7882 }
7983
80 static Module *TheModule;
81 static IRBuilder<> Builder(getGlobalContext());
82 static std::map NamedValues;
83
8484 The static variables will be used during code generation. ``TheModule``
85 is the LLVM construct that contains all of the functions and global
86 variables in a chunk of code. In many ways, it is the top-level
87 structure that the LLVM IR uses to contain code.
85 is an LLVM construct that contains functions and global variables. In many
86 ways, it is the top-level structure that the LLVM IR uses to contain code.
87 It will own the memory for all of the IR that we generate, which is why
88 the codegen() method returns a raw Value\*, rather than a unique_ptr.
8889
8990 The ``Builder`` object is a helper object that makes it easy to generate
9091 LLVM instructions. Instances of the
113114
114115 .. code-block:: c++
115116
116 Value *NumberExprAST::Codegen() {
117 Value *NumberExprAST::codegen() {
117118 return ConstantFP::get(getGlobalContext(), APFloat(Val));
118119 }
119120
127128
128129 .. code-block:: c++
129130
130 Value *VariableExprAST::Codegen() {
131 Value *VariableExprAST::codegen() {
131132 // Look this variable up in the function.
132133 Value *V = NamedValues[Name];
133134 if (!V)
147148
148149 .. code-block:: c++
149150
150 Value *BinaryExprAST::Codegen() {
151 Value *L = LHS->Codegen();
152 Value *R = RHS->Codegen();
151 Value *BinaryExprAST::codegen() {
152 Value *L = LHS->codegen();
153 Value *R = RHS->codegen();
153154 if (!L || !R)
154155 return nullptr;
155156
208209
209210 .. code-block:: c++
210211
211 Value *CallExprAST::Codegen() {
212 Value *CallExprAST::codegen() {
212213 // Look up the name in the global module table.
213214 Function *CalleeF = TheModule->getFunction(Callee);
214215 if (!CalleeF)
220221
221222 std::vector ArgsV;
222223 for (unsigned i = 0, e = Args.size(); i != e; ++i) {
223 ArgsV.push_back(Args[i]->Codegen());
224 ArgsV.push_back(Args[i]->codegen());
224225 if (!ArgsV.back())
225226 return nullptr;
226227 }
228229 return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
229230 }
230231
231 Code generation for function calls is quite straightforward with LLVM.
232 The code above initially does a function name lookup in the LLVM
233 Module's symbol table. Recall that the LLVM Module is the container that
234 holds all of the functions we are JIT'ing. By giving each function the
235 same name as what the user specifies, we can use the LLVM symbol table
236 to resolve function names for us.
232 Code generation for function calls is quite straightforward with LLVM. The code
233 above initially does a function name lookup in the LLVM Module's symbol table.
234 Recall that the LLVM Module is the container that holds the functions we are
235 JIT'ing. By giving each function the same name as what the user specifies, we
236 can use the LLVM symbol table to resolve function names for us.
237237
238238 Once we have the function to call, we recursively codegen each argument
239239 that is to be passed in, and create an LLVM `call
260260
261261 .. code-block:: c++
262262
263 Function *PrototypeAST::Codegen() {
263 Function *PrototypeAST::codegen() {
264264 // Make the function type: double(double,double) etc.
265265 std::vector Doubles(Args.size(),
266266 Type::getDoubleTy(getGlobalContext()));
285285 indicates this). Note that Types in LLVM are uniqued just like Constants
286286 are, so you don't "new" a type, you "get" it.
287287
288 The final line above actually creates the function that the prototype
289 will correspond to. This indicates the type, linkage and name to use, as
288 The final line above actually creates the IR Function corresponding to
289 the Prototype. This indicates the type, linkage and name to use, as
290290 well as which module to insert into. "`external
291291 linkage <../LangRef.html#linkage>`_" means that the function may be
292292 defined outside the current module and/or that it is callable by
293293 functions outside the module. The Name passed in is the name the user
294294 specified: since "``TheModule``" is specified, this name is registered
295 in "``TheModule``"s symbol table, which is used by the function call
296 code above.
297
298 .. code-block:: c++
299
300 // If F conflicted, there was already something named 'Name'. If it has a
301 // body, don't allow redefinition or reextern.
302 if (F->getName() != Name) {
303 // Delete the one we just made and get the existing one.
304 F->eraseFromParent();
305 F = TheModule->getFunction(Name);
306
307 The Module symbol table works just like the Function symbol table when
308 it comes to name conflicts: if a new function is created with a name
309 that was previously added to the symbol table, the new function will get
310 implicitly renamed when added to the Module. The code above exploits
311 this fact to determine if there was a previous definition of this
312 function.
313
314 In Kaleidoscope, I choose to allow redefinitions of functions in two
315 cases: first, we want to allow 'extern'ing a function more than once, as
316 long as the prototypes for the externs match (since all arguments have
317 the same type, we just have to check that the number of arguments
318 match). Second, we want to allow 'extern'ing a function and then
319 defining a body for it. This is useful when defining mutually recursive
320 functions.
321
322 In order to implement this, the code above first checks to see if there
323 is a collision on the name of the function. If so, it deletes the
324 function we just created (by calling ``eraseFromParent``) and then
325 calling ``getFunction`` to get the existing function with the specified
326 name. Note that many APIs in LLVM have "erase" forms and "remove" forms.
327 The "remove" form unlinks the object from its parent (e.g. a Function
328 from a Module) and returns it. The "erase" form unlinks the object and
329 then deletes it.
330
331 .. code-block:: c++
332
333 // If F already has a body, reject this.
334 if (!F->empty()) {
335 ErrorF("redefinition of function");
336 return nullptr;
337 }
338
339 // If F took a different number of args, reject.
340 if (F->arg_size() != Args.size()) {
341 ErrorF("redefinition of function with different # args");
342 return nullptr;
343 }
344 }
345
346 In order to verify the logic above, we first check to see if the
347 pre-existing function is "empty". In this case, empty means that it has
348 no basic blocks in it, which means it has no body. If it has no body, it
349 is a forward declaration. Since we don't allow anything after a full
350 definition of the function, the code rejects this case. If the previous
351 reference to a function was an 'extern', we simply verify that the
352 number of arguments for that definition and this one match up. If not,
353 we emit an error.
354
355 .. code-block:: c++
356
357 // Set names for all arguments.
358 unsigned Idx = 0;
359 for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size();
360 ++AI, ++Idx) {
361 AI->setName(Args[Idx]);
362
363 // Add arguments to variable symbol table.
364 NamedValues[Args[Idx]] = AI;
365 }
366
367 return F;
368 }
369
370 The last bit of code for prototypes loops over all of the arguments in
371 the function, setting the name of the LLVM Argument objects to match,
372 and registering the arguments in the ``NamedValues`` map for future use
373 by the ``VariableExprAST`` AST node. Once this is set up, it returns the
374 Function object to the caller. Note that we don't check for conflicting
375 argument names here (e.g. "extern foo(a b a)"). Doing so would be very
376 straight-forward with the mechanics we have already used above.
377
378 .. code-block:: c++
379
380 Function *FunctionAST::Codegen() {
381 NamedValues.clear();
382
383 Function *TheFunction = Proto->Codegen();
384 if (!TheFunction)
385 return nullptr;
386
387 Code generation for function definitions starts out simply enough: we
388 just codegen the prototype (Proto) and verify that it is ok. We then
389 clear out the ``NamedValues`` map to make sure that there isn't anything
390 in it from the last function we compiled. Code generation of the
391 prototype ensures that there is an LLVM Function object that is ready to
392 go for us.
393
394 .. code-block:: c++
395
396 // Create a new basic block to start insertion into.
397 BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
398 Builder.SetInsertPoint(BB);
399
400 if (Value *RetVal = Body->Codegen()) {
295 in "``TheModule``"s symbol table.
296
297 .. code-block:: c++
298
299 // Set names for all arguments.
300 unsigned Idx = 0;
301 for (auto &Arg : F->args())
302 Arg.setName(Args[Idx++]);
303
304 return F;
305
306 Finally, we set the name of each of the function's arguments according to the
307 names given in the Prototype. This step isn't strictly necessary, but keeping
308 the names consistent makes the IR more readable, and allows subsequent code to
309 refer directly to the arguments for their names, rather than having to look up
310 them up in the Prototype AST.
311
312 At this point we have a function prototype with no body. This is how LLVM IR
313 represents function declarations. For extern statements in Kaleidoscope, this
314 is as far as we need to go. For function definitions however, we need to
315 codegen and attach a function body.
316
317 .. code-block:: c++
318
319 Function *FunctionAST::codegen() {
320 // First, check for an existing function from a previous 'extern' declaration.
321 Function *TheFunction = TheModule->getFunction(Proto->getName());
322
323 if (!TheFunction)
324 TheFunction = Proto->codegen();
325
326 if (!TheFunction)
327 return nullptr;
328
329 if (!TheFunction->empty())
330 return (Function*)ErrorV("Function cannot be redefined.");
331
332
333 For function definitions, we start by searching TheModule's symbol table for an
334 existing version of this function, in case one has already been created using an
335 'extern' statement. If Module::getFunction returns null then no previous version
336 exists, so we'll codegen one from the Prototype. In either case, we want to
337 assert that the function is empty (i.e. has no body yet) before we start.
338
339 .. code-block:: c++
340
341 // Create a new basic block to start insertion into.
342 BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
343 Builder.SetInsertPoint(BB);
344
345 // Record the function arguments in the NamedValues map.
346 NamedValues.clear();
347 for (auto &Arg : TheFunction->args())
348 NamedValues[Arg.getName()] = &Arg;
401349
402350 Now we get to the point where the ``Builder`` is set up. The first line
403351 creates a new `basic block `_
409357 don't have any control flow, our functions will only contain one block
410358 at this point. We'll fix this in `Chapter 5 `_ :).
411359
412 .. code-block:: c++
413
414 if (Value *RetVal = Body->Codegen()) {
360 Next we add the function arguments to the NamedValues map (after first clearing
361 it out) so that they're accessible to ``VariableExprAST`` nodes.
362
363 .. code-block:: c++
364
365 if (Value *RetVal = Body->codegen()) {
415366 // Finish off the function.
416367 Builder.CreateRet(RetVal);
417368
421372 return TheFunction;
422373 }
423374
424 Once the insertion point is set up, we call the ``CodeGen()`` method for
425 the root expression of the function. If no error happens, this emits
426 code to compute the expression into the entry block and returns the
427 value that was computed. Assuming no error, we then create an LLVM `ret
428 instruction <../LangRef.html#i_ret>`_, which completes the function.
375 Once the insertion point has been set up and the NamedValues map populated,
376 we call the ``codegen()`` method for the root expression of the function. If no
377 error happens, this emits code to compute the expression into the entry block
378 and returns the value that was computed. Assuming no error, we then create an
379 LLVM `ret instruction <../LangRef.html#i_ret>`_, which completes the function.
429380 Once the function is built, we call ``verifyFunction``, which is
430381 provided by LLVM. This function does a variety of consistency checks on
431382 the generated code, to determine if our compiler is doing everything
445396 that they incorrectly typed in before: if we didn't delete it, it would
446397 live in the symbol table, with a body, preventing future redefinition.
447398
448 This code does have a bug, though. Since the ``PrototypeAST::Codegen``
449 can return a previously defined forward declaration, our code can
450 actually delete a forward declaration. There are a number of ways to fix
451 this bug, see what you can come up with! Here is a testcase:
399 This code does have a bug, though: If the ``FunctionAST::codegen()`` method
400 finds an existing IR Function, it does not validate its signature against the
401 definition's own prototype. This means that an earlier 'extern' declaration will
402 take precedence over the function definition's signature, which can cause
403 codegen to fail, for instance if the function arguments are named differently.
404 There are a number of ways to fix this bug, see what you can come up with! Here
405 is a testcase:
452406
453407 ::
454408
455 extern foo(a b); # ok, defines foo.
456 def foo(a b) c; # error, 'c' is invalid.
457 def bar() foo(1, 2); # error, unknown function "foo"
409 extern foo(a); # ok, defines foo.
410 def foo(b) b; # Error: Unknown variable name. (decl using 'a' takes precedence).
458411
459412 Driver Changes and Closing Thoughts
460413 ===================================
461414
462415 For now, code generation to LLVM doesn't really get us much, except that
463416 we can look at the pretty IR calls. The sample code inserts calls to
464 Codegen into the "``HandleDefinition``", "``HandleExtern``" etc
417 codegen into the "``HandleDefinition``", "``HandleExtern``" etc
465418 functions, and then dumps out the LLVM IR. This gives a nice way to look
466419 at the LLVM IR for simple functions. For example:
467420
121121 In order to get per-function optimizations going, we need to set up a
122122 `FunctionPassManager <../WritingAnLLVMPass.html#passmanager>`_ to hold
123123 and organize the LLVM optimizations that we want to run. Once we have
124 that, we can add a set of optimizations to run. The code looks like
125 this:
124 that, we can add a set of optimizations to run. We'll need a new
125 FunctionPassManager for each module that we want to optimize, so we'll
126 write a function to create and initialize both the module and pass manager
127 for us:
126128
127129 .. code-block:: c++
128130
129 FunctionPassManager OurFPM(TheModule);
130
131 // Set up the optimizer pipeline. Start with registering info about how the
132 // target lays out data structures.
133 OurFPM.add(new DataLayout(*TheExecutionEngine->getDataLayout()));
131 void InitializeModuleAndPassManager(void) {
132 // Open a new module.
133 TheModule = llvm::make_unique("my cool jit", getGlobalContext());
134 TheModule->setDataLayout(TheJIT->getTargetMachine().createDataLayout());
135
136 // Create a new pass manager attached to it.
137 TheFPM = llvm::make_unique(TheModule.get());
138
134139 // Provide basic AliasAnalysis support for GVN.
135 OurFPM.add(createBasicAliasAnalysisPass());
140 TheFPM.add(createBasicAliasAnalysisPass());
136141 // Do simple "peephole" optimizations and bit-twiddling optzns.
137 OurFPM.add(createInstructionCombiningPass());
142 TheFPM.add(createInstructionCombiningPass());
138143 // Reassociate expressions.
139 OurFPM.add(createReassociatePass());
144 TheFPM.add(createReassociatePass());
140145 // Eliminate Common SubExpressions.
141 OurFPM.add(createGVNPass());
146 TheFPM.add(createGVNPass());
142147 // Simplify the control flow graph (deleting unreachable blocks, etc).
143 OurFPM.add(createCFGSimplificationPass());
144
145 OurFPM.doInitialization();
146
147 // Set the global so the code gen can use this.
148 TheFPM = &OurFPM;
149
150 // Run the main "interpreter loop" now.
151 MainLoop();
152
153 This code defines a ``FunctionPassManager``, "``OurFPM``". It requires a
154 pointer to the ``Module`` to construct itself. Once it is set up, we use
155 a series of "add" calls to add a bunch of LLVM passes. The first pass is
156 basically boilerplate, it adds a pass so that later optimizations know
157 how the data structures in the program are laid out. The
158 "``TheExecutionEngine``" variable is related to the JIT, which we will
159 get to in the next section.
160
161 In this case, we choose to add 4 optimization passes. The passes we
162 chose here are a pretty standard set of "cleanup" optimizations that are
163 useful for a wide variety of code. I won't delve into what they do but,
164 believe me, they are a good starting place :).
148 TheFPM.add(createCFGSimplificationPass());
149
150 TheFPM.doInitialization();
151 }
152
153 This code initializes the global module ``TheModule``, and the function pass
154 manager ``TheFPM``, which is attached to ``TheModule``. One the pass manager is
155 set up, we use a series of "add" calls to add a bunch of LLVM passes.
156
157 In this case, we choose to add five passes: one analysis pass (alias analysis),
158 and four optimization passes. The passes we choose here are a pretty standard set
159 of "cleanup" optimizations that are useful for a wide variety of code. I won't
160 delve into what they do but, believe me, they are a good starting place :).
165161
166162 Once the PassManager is set up, we need to make use of it. We do this by
167163 running it after our newly created function is constructed (in
168 ``FunctionAST::Codegen``), but before it is returned to the client:
164 ``FunctionAST::codegen()``), but before it is returned to the client:
169165
170166 .. code-block:: c++
171167
172 if (Value *RetVal = Body->Codegen()) {
168 if (Value *RetVal = Body->codegen()) {
173169 // Finish off the function.
174170 Builder.CreateRet(RetVal);
175171
230226 be able to call it from the command line.
231227
232228 In order to do this, we first declare and initialize the JIT. This is
233 done by adding a global variable and a call in ``main``:
229 done by adding a global variable ``TheJIT``, and initializing it in
230 ``main``:
234231
235232 .. code-block:: c++
236233
237 static ExecutionEngine *TheExecutionEngine;
234 static std::unique_ptr TheJIT;
238235 ...
239236 int main() {
240237 ..
241 // Create the JIT. This takes ownership of the module.
242 TheExecutionEngine = EngineBuilder(TheModule).create();
243 ..
244 }
245
246 This creates an abstract "Execution Engine" which can be either a JIT
247 compiler or the LLVM interpreter. LLVM will automatically pick a JIT
248 compiler for you if one is available for your platform, otherwise it
249 will fall back to the interpreter.
250
251 Once the ``ExecutionEngine`` is created, the JIT is ready to be used.
252 There are a variety of APIs that are useful, but the simplest one is the
253 "``getPointerToFunction(F)``" method. This method JIT compiles the
254 specified LLVM Function and returns a function pointer to the generated
255 machine code. In our case, this means that we can change the code that
256 parses a top-level expression to look like this:
238 TheJIT = llvm::make_unique();
239
240 // Run the main "interpreter loop" now.
241 MainLoop();
242
243 return 0;
244 }
245
246 The KaleidoscopeJIT class is a simple JIT built specifically for these
247 tutorials. In later chapters we will look at how it works and extend it with
248 new features, but for now we will take it as given. Its API is very simple::
249 ``addModule`` adds an LLVM IR module to the JIT, making its functions
250 available for execution; ``removeModule`` removes a module, freeing any
251 memory associated with the code in that module; and ``findSymbol`` allows us
252 to look up pointers to the compiled code.
253
254 We can take this simple API and change our code that parses top-level expressions to
255 look like this:
257256
258257 .. code-block:: c++
259258
260259 static void HandleTopLevelExpression() {
261260 // Evaluate a top-level expression into an anonymous function.
262261 if (auto FnAST = ParseTopLevelExpr()) {
263 if (auto *FnIR = FnAST->Codegen()) {
264 FnIR->dump(); // Dump the function for exposition purposes.
265
266 // JIT the function, returning a function pointer.
267 void *FPtr = TheExecutionEngine->getPointerToFunction(FnIR);
268
269 // Cast it to the right type (takes no arguments, returns a double) so we
270 // can call it as a native function.
271 double (*FP)() = (double (*)())(intptr_t)FPtr;
262 if (FnAST->codegen()) {
263
264 // JIT the module containing the anonymous expression, keeping a handle so
265 // we can free it later.
266 auto H = TheJIT->addModule(std::move(TheModule));
267 InitializeModuleAndPassManager();
268
269 // Search the JIT for the __anon_expr symbol.
270 auto ExprSymbol = TheJIT->findSymbol("__anon_expr");
271 assert(ExprSymbol && "Function not found");
272
273 // Get the symbol's address and cast it to the right type (takes no
274 // arguments, returns a double) so we can call it as a native function.
275 double (*FP)() = (double (*)())(intptr_t)ExprSymbol.getAddress();
272276 fprintf(stderr, "Evaluated to %f\n", FP());
277
278 // Delete the anonymous expression module from the JIT.
279 TheJIT->removeModule(H);
273280 }
274281
275 Recall that we compile top-level expressions into a self-contained LLVM
276 function that takes no arguments and returns the computed double.
277 Because the LLVM JIT compiler matches the native platform ABI, this
278 means that you can just cast the result pointer to a function pointer of
279 that type and call it directly. This means, there is no difference
280 between JIT compiled code and native machine code that is statically
281 linked into your application.
282 If parsing and codegen succeeed, the next step is to add the module containing
283 the top-level expression to the JIT. We do this by calling addModule, which
284 triggers code generation for all the functions in the module, and returns a
285 handle that can be used to remove the module from the JIT later. Once the module
286 has been added to the JIT it can no longer be modified, so we also open a new
287 module to hold subsequent code by calling ``InitializeModuleAndPassManager()``.
288
289 Once we've added the module to the JIT we need to get a pointer to the final
290 generated code. We do this by calling the JIT's findSymbol method, and passing
291 the name of the top-level expression function: ``__anon_expr``. Since we just
292 added this function, we assert that findSymbol returned a result.
293
294 Next, we get the in-memory address of the ``__anon_expr`` function by calling
295 ``getAddress()`` on the symbol. Recall that we compile top-level expressions
296 into a self-contained LLVM function that takes no arguments and returns the
297 computed double. Because the LLVM JIT compiler matches the native platform ABI,
298 this means that you can just cast the result pointer to a function pointer of
299 that type and call it directly. This means, there is no difference between JIT
300 compiled code and native machine code that is statically linked into your
301 application.
302
303 Finally, since we don't support re-evaluation of top-level expressions, we
304 remove the module from the JIT when we're done to free the associated memory.
305 Recall, however, that the module we created a few lines earlier (via
306 ``InitializeModuleAndPassManager``) is still open and waiting for new code to be
307 added.
282308
283309 With just these two changes, lets see how Kaleidoscope works now!
284310
319345
320346 Evaluated to 24.000000
321347
322 This illustrates that we can now call user code, but there is something
323 a bit subtle going on here. Note that we only invoke the JIT on the
324 anonymous functions that *call testfunc*, but we never invoked it on
325 *testfunc* itself. What actually happened here is that the JIT scanned
326 for all non-JIT'd functions transitively called from the anonymous
327 function and compiled all of them before returning from
328 ``getPointerToFunction()``.
329
330 The JIT provides a number of other more advanced interfaces for things
331 like freeing allocated machine code, rejit'ing functions to update them,
332 etc. However, even with this simple code, we get some surprisingly
333 powerful capabilities - check this out (I removed the dump of the
334 anonymous functions, you should get the idea by now :) :
348 ready> testfunc(5, 10);
349 ready> LLVM ERROR: Program used external function 'testfunc' which could not be resolved!
350
351
352 Function definitions and calls also work, but something went very wrong on that
353 last line. The call looks valid, so what happened? As you may have guessed from
354 the the API a Module is a unit of allocation for the JIT, and testfunc was part
355 of the same module that contained anonymous expression. When we removed that
356 module from the JIT to free the memory for the anonymous expression, we deleted
357 the definition of ``testfunc`` along with it. Then, when we tried to call
358 testfunc a second time, the JIT could no longer find it.
359
360 The easiest way to fix this is to put the anonymous expression in a separate
361 module from the rest of the function definitions. The JIT will happily resolve
362 function calls across module boundaries, as long as each of the functions called
363 has a prototype, and is added to the JIT before it is called. By putting the
364 anonymous expression in a different module we can delete it without affecting
365 the rest of the functions.
366
367 In fact, we're going to go a step further and put every function in its own
368 module. Doing so allows us to exploit a useful property of the KaleidoscopeJIT
369 that will make our environment more REPL-like: Functions can be added to the
370 JIT more than once (unlike a module where every function must have a unique
371 definition). When you look up a symbol in KaleidoscopeJIT it will always return
372 the most recent definition:
373
374 ::
375
376 ready> def foo(x) x + 1;
377 Read function definition:
378 define double @foo(double %x) {
379 entry:
380 %addtmp = fadd double %x, 1.000000e+00
381 ret double %addtmp
382 }
383
384 ready> foo(2);
385 Evaluated to 3.000000
386
387 ready> def foo(x) x + 2;
388 define double @foo(double %x) {
389 entry:
390 %addtmp = fadd double %x, 2.000000e+00
391 ret double %addtmp
392 }
393
394 ready> foo(2);
395 Evaluated to 4.000000
396
397
398 To allow each function to live in its own module we'll need a way to
399 re-generate previous function declarations into each new module we open:
400
401 .. code-block:: c++
402
403 static std::unique_ptr TheJIT;
404
405 ...
406
407 Function *getFunction(std::string Name) {
408 // First, see if the function has already been added to the current module.
409 if (auto *F = TheModule->getFunction(Name))
410 return F;
411
412 // If not, check whether we can codegen the declaration from some existing
413 // prototype.
414 auto FI = FunctionProtos.find(Name);
415 if (FI != FunctionProtos.end())
416 return FI->second->codegen();
417
418 // If no existing prototype exists, return null.
419 return nullptr;
420 }
421
422 ...
423
424 Value *CallExprAST::codegen() {
425 // Look up the name in the global module table.
426 Function *CalleeF = getFunction(Callee);
427
428 ...
429
430 Function *FunctionAST::codegen() {
431 // Transfer ownership of the prototype to the FunctionProtos map, but keep a
432 // reference to it for use below.
433 auto &P = *Proto;
434 FunctionProtos[Proto->getName()] = std::move(Proto);
435 Function *TheFunction = getFunction(P.getName());
436 if (!TheFunction)
437 return nullptr;
438
439
440 To enable this, we'll start by adding a new global, ``FunctionProtos``, that
441 holds the most recent prototype for each function. We'll also add a convenience
442 method, ``getFunction()``, to replace calls to ``TheModule->getFunction()``.
443 Our convenience method searches ``TheModule`` for an existing function
444 declaration, falling back to generating a new declaration from FunctionProtos if
445 it doesn't find one. In ``CallExprAST::codegen()`` we just need to replace the
446 call to ``TheModule->getFunction()``. In ``FunctionAST::codegen()`` we need to
447 update the FunctionProtos map first, then call ``getFunction()``. With this
448 done, we can always obtain a function declaration in the current module for any
449 previously declared function.
450
451 We also need to update HandleDefinition and HandleExtern:
452
453 .. code-block:: c++
454
455 static void HandleDefinition() {
456 if (auto FnAST = ParseDefinition()) {
457 if (auto *FnIR = FnAST->codegen()) {
458 fprintf(stderr, "Read function definition:");
459 FnIR->dump();
460 TheJIT->addModule(std::move(TheModule));
461 InitializeModuleAndPassManager();
462 }
463 } else {
464 // Skip token for error recovery.
465 getNextToken();
466 }
467 }
468
469 static void HandleExtern() {
470 if (auto ProtoAST = ParseExtern()) {
471 if (auto *FnIR = ProtoAST->codegen()) {
472 fprintf(stderr, "Read extern: ");
473 FnIR->dump();
474 FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST);
475 }
476 } else {
477 // Skip token for error recovery.
478 getNextToken();
479 }
480 }
481
482 In HandleDefinition, we add two lines to transfer the newly defined function to
483 the JIT and open a new module. In HandleExtern, we just need to add one line to
484 add the prototype to FunctionProtos.
485
486 With these changes made, lets try our REPL again (I removed the dump of the
487 anonymous functions this time, you should get the idea by now :) :
488
489 ::
490
491 ready> def foo(x) x + 1;
492 ready> foo(2);
493 Evaluated to 3.000000
494
495 ready> def foo(x) x + 2;
496 ready> foo(2);
497 Evaluated to 4.000000
498
499 It works!
500
501 Even with this simple code, we get some surprisingly powerful capabilities -
502 check this out:
335503
336504 ::
337505
374542
375543 Evaluated to 1.000000
376544
377 Whoa, how does the JIT know about sin and cos? The answer is
378 surprisingly simple: in this example, the JIT started execution of a
379 function and got to a function call. It realized that the function was
380 not yet JIT compiled and invoked the standard set of routines to resolve
381 the function. In this case, there is no body defined for the function,
382 so the JIT ended up calling "``dlsym("sin")``" on the Kaleidoscope
383 process itself. Since "``sin``" is defined within the JIT's address
384 space, it simply patches up calls in the module to call the libm version
385 of ``sin`` directly.
386
387 The LLVM JIT provides a number of interfaces (look in the
388 ``ExecutionEngine.h`` file) for controlling how unknown functions get
389 resolved. It allows you to establish explicit mappings between IR
390 objects and addresses (useful for LLVM global variables that you want to
391 map to static tables, for example), allows you to dynamically decide on
392 the fly based on the function name, and even allows you to have the JIT
393 compile functions lazily the first time they're called.
394
395 One interesting application of this is that we can now extend the
396 language by writing arbitrary C++ code to implement operations. For
397 example, if we add:
545 Whoa, how does the JIT know about sin and cos? The answer is surprisingly
546 simple: The KaleidoscopeJIT has a straightforward symbol resolution rule that
547 it uses to find symbols that aren't available in any given module: First
548 it searches all the modules that have already been added to the JIT, from the
549 most recent to the oldest, to find the newest definition. If no definition is
550 found inside the JIT, it falls back to calling "``dlsym("sin")``" on the
551 Kaleidoscope process itself. Since "``sin``" is defined within the JIT's
552 address space, it simply patches up calls in the module to call the libm
553 version of ``sin`` directly.
554
555 In the future we'll see how tweaking this symbol resolution rule can be used to
556 enable all sorts of useful features, from security (restricting the set of
557 symbols available to JIT'd code), to dynamic code generation based on symbol
558 names, and even lazy compilation.
559
560 One immediate benefit of the symbol resolution rule is that we can now extend
561 the language by writing arbitrary C++ code to implement operations. For example,
562 if we add:
398563
399564 .. code-block:: c++
400565
102102 IfExprAST(std::unique_ptr Cond, std::unique_ptr Then,
103103 std::unique_ptr Else)
104104 : Cond(std::move(Cond)), Then(std::move(Then)), Else(std::move(Else)) {}
105 virtual Value *Codegen();
105 virtual Value *codegen();
106106 };
107107
108108 The AST node just has pointers to the various subexpressions.
279279 Code Generation for If/Then/Else
280280 --------------------------------
281281
282 In order to generate code for this, we implement the ``Codegen`` method
282 In order to generate code for this, we implement the ``codegen`` method
283283 for ``IfExprAST``:
284284
285285 .. code-block:: c++
286286
287 Value *IfExprAST::Codegen() {
288 Value *CondV = Cond->Codegen();
287 Value *IfExprAST::codegen() {
288 Value *CondV = Cond->codegen();
289289 if (!CondV)
290290 return nullptr;
291291
336336 // Emit then value.
337337 Builder.SetInsertPoint(ThenBB);
338338
339 Value *ThenV = Then->Codegen();
339 Value *ThenV = Then->codegen();
340340 if (!ThenV)
341341 return nullptr;
342342
368368 we just set it to ThenBB 5 lines above? The problem is that the "Then"
369369 expression may actually itself change the block that the Builder is
370370 emitting into if, for example, it contains a nested "if/then/else"
371 expression. Because calling Codegen recursively could arbitrarily change
371 expression. Because calling ``codegen()`` recursively could arbitrarily change
372372 the notion of the current block, we are required to get an up-to-date
373373 value for code that will set up the Phi node.
374374
378378 TheFunction->getBasicBlockList().push_back(ElseBB);
379379 Builder.SetInsertPoint(ElseBB);
380380
381 Value *ElseV = Else->Codegen();
381 Value *ElseV = Else->codegen();
382382 if (!ElseV)
383383 return nullptr;
384384
385385 Builder.CreateBr(MergeBB);
386 // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
386 // codegen of 'Else' can change the current block, update ElseBB for the PHI.
387387 ElseBB = Builder.GetInsertBlock();
388388
389389 Code generation for the 'else' block is basically identical to codegen
499499 std::unique_ptr Body)
500500 : VarName(VarName), Start(std::move(Start)), End(std::move(End)),
501501 Step(std::move(Step)), Body(std::move(Body)) {}
502 virtual Value *Codegen();
502 virtual Value *codegen();
503503 };
504504
505505 Parser Extensions for the 'for' Loop
601601 Code Generation for the 'for' Loop
602602 ----------------------------------
603603
604 The first part of Codegen is very simple: we just output the start
604 The first part of codegen is very simple: we just output the start
605605 expression for the loop value:
606606
607607 .. code-block:: c++
608608
609 Value *ForExprAST::Codegen() {
609 Value *ForExprAST::codegen() {
610610 // Emit the start code first, without 'variable' in scope.
611 Value *StartVal = Start->Codegen();
611 Value *StartVal = Start->codegen();
612612 if (StartVal == 0) return 0;
613613
614614 With this out of the way, the next step is to set up the LLVM basic
662662 // Emit the body of the loop. This, like any other expr, can change the
663663 // current BB. Note that we ignore the value computed by the body, but don't
664664 // allow an error.
665 if (!Body->Codegen())
665 if (!Body->codegen())
666666 return nullptr;
667667
668668 Now the code starts to get more interesting. Our 'for' loop introduces a
687687 // Emit the step value.
688688 Value *StepVal = nullptr;
689689 if (Step) {
690 StepVal = Step->Codegen();
690 StepVal = Step->codegen();
691691 if (!StepVal)
692692 return nullptr;
693693 } else {
705705 .. code-block:: c++
706706
707707 // Compute the end condition.
708 Value *EndCond = End->Codegen();
708 Value *EndCond = End->codegen();
709709 if (!EndCond)
710710 return nullptr;
711711
758758 we remove the loop variable from the symbol table, so that it isn't in
759759 scope after the for loop. Finally, code generation of the for loop
760760 always returns 0.0, so that is what we return from
761 ``ForExprAST::Codegen``.
761 ``ForExprAST::codegen()``.
762762
763763 With this, we conclude the "adding control flow to Kaleidoscope" chapter
764764 of the tutorial. In this chapter we added two control flow constructs,
152152
153153 unsigned getBinaryPrecedence() const { return Precedence; }
154154
155 Function *Codegen();
155 Function *codegen();
156156 };
157157
158158 Basically, in addition to knowing a name for the prototype, we now keep
234234
235235 .. code-block:: c++
236236
237 Value *BinaryExprAST::Codegen() {
238 Value *L = LHS->Codegen();
239 Value *R = RHS->Codegen();
237 Value *BinaryExprAST::codegen() {
238 Value *L = LHS->codegen();
239 Value *R = RHS->codegen();
240240 if (!L || !R)
241241 return nullptr;
242242
275275
276276 .. code-block:: c++
277277
278 Function *FunctionAST::Codegen() {
278 Function *FunctionAST::codegen() {
279279 NamedValues.clear();
280280
281 Function *TheFunction = Proto->Codegen();
281 Function *TheFunction = Proto->codegen();
282282 if (!TheFunction)
283283 return nullptr;
284284
290290 BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
291291 Builder.SetInsertPoint(BB);
292292
293 if (Value *RetVal = Body->Codegen()) {
293 if (Value *RetVal = Body->codegen()) {
294294 ...
295295
296296 Basically, before codegening a function, if it is a user-defined
322322 public:
323323 UnaryExprAST(char Opcode, std::unique_ptr Operand)
324324 : Opcode(Opcode), Operand(std::move(Operand)) {}
325 virtual Value *Codegen();
325 virtual Value *codegen();
326326 };
327327
328328 This AST node is very simple and obvious by now. It directly mirrors the
427427
428428 .. code-block:: c++
429429
430 Value *UnaryExprAST::Codegen() {
431 Value *OperandV = Operand->Codegen();
430 Value *UnaryExprAST::codegen() {
431 Value *OperandV = Operand->codegen();
432432 if (!OperandV)
433433 return nullptr;
434434
354354
355355 .. code-block:: c++
356356
357 Value *VariableExprAST::Codegen() {
357 Value *VariableExprAST::codegen() {
358358 // Look this variable up in the function.
359359 Value *V = NamedValues[Name];
360360 if (!V)
366366
367367 As you can see, this is pretty straightforward. Now we need to update
368368 the things that define the variables to set up the alloca. We'll start
369 with ``ForExprAST::Codegen`` (see the `full code listing <#code>`_ for
369 with ``ForExprAST::codegen()`` (see the `full code listing <#code>`_ for
370370 the unabridged code):
371371
372372 .. code-block:: c++
377377 AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
378378
379379 // Emit the start code first, without 'variable' in scope.
380 Value *StartVal = Start->Codegen();
380 Value *StartVal = Start->codegen();
381381 if (!StartVal)
382382 return nullptr;
383383
386386 ...
387387
388388 // Compute the end condition.
389 Value *EndCond = End->Codegen();
389 Value *EndCond = End->codegen();
390390 if (!EndCond)
391391 return nullptr;
392392
425425
426426 For each argument, we make an alloca, store the input value to the
427427 function into the alloca, and register the alloca as the memory location
428 for the argument. This method gets invoked by ``FunctionAST::Codegen``
428 for the argument. This method gets invoked by ``FunctionAST::codegen()``
429429 right after it sets up the entry block for the function.
430430
431431 The final missing piece is adding the mem2reg pass, which allows us to
571571
572572 .. code-block:: c++
573573
574 Value *BinaryExprAST::Codegen() {
574 Value *BinaryExprAST::codegen() {
575575 // Special case '=' because we don't want to emit the LHS as an expression.
576576 if (Op == '=') {
577577 // Assignment requires the LHS to be an identifier.
589589 .. code-block:: c++
590590
591591 // Codegen the RHS.
592 Value *Val = RHS->Codegen();
592 Value *Val = RHS->codegen();
593593 if (!Val)
594594 return nullptr;
595595
679679 std::unique_ptr body)
680680 : VarNames(std::move(VarNames)), Body(std::move(Body)) {}
681681
682 virtual Value *Codegen();
682 virtual Value *codegen();
683683 };
684684
685685 var/in allows a list of names to be defined all at once, and each name
784784
785785 .. code-block:: c++
786786
787 Value *VarExprAST::Codegen() {
787 Value *VarExprAST::codegen() {
788788 std::vector OldBindings;
789789
790790 Function *TheFunction = Builder.GetInsertBlock()->getParent();
807807 // var a = a in ... # refers to outer 'a'.
808808 Value *InitVal;
809809 if (Init) {
810 InitVal = Init->Codegen();
810 InitVal = Init->codegen();
811811 if (!InitVal)
812812 return nullptr;
813813 } else { // If not specified, use 0.0.
833833 .. code-block:: c++
834834
835835 // Codegen the body, now that all vars are in scope.
836 Value *BodyVal = Body->Codegen();
836 Value *BodyVal = Body->codegen();
837837 if (!BodyVal)
838838 return nullptr;
839839
108108 static void HandleTopLevelExpression() {
109109 // Evaluate a top-level expression into an anonymous function.
110110 if (auto FnAST = ParseTopLevelExpr()) {
111 - if (auto *FnIR = FnAST->Codegen()) {
111 - if (auto *FnIR = FnAST->codegen()) {
112112 - // We're just doing this to make sure it executes.
113113 - TheExecutionEngine->finalizeObject();
114114 - // JIT the function, returning a function pointer.
119119 - double (*FP)() = (double (*)())(intptr_t)FPtr;
120120 - // Ignore the return value for this.
121121 - (void)FP;
122 + if (!F->Codegen()) {
122 + if (!F->codegen()) {
123123 + fprintf(stderr, "Error generating code for top level expr");
124124 }
125125 } else {
236236 =========
237237
238238 Now that we have our ``Compile Unit`` and our source locations, we can add
239 function definitions to the debug info. So in ``PrototypeAST::Codegen`` we
239 function definitions to the debug info. So in ``PrototypeAST::codegen()`` we
240240 add a few lines of code to describe a context for our subprogram, in this
241241 case the "File", and the actual definition of the function itself.
242242
308308 public:
309309 ExprAST(SourceLocation Loc = CurLoc) : Loc(Loc) {}
310310 virtual ~ExprAST() {}
311 virtual Value* Codegen() = 0;
311 virtual Value* codegen() = 0;
312312 int getLine() const { return Loc.Line; }
313313 int getCol() const { return Loc.Col; }
314314 virtual raw_ostream &dump(raw_ostream &out, int ind) {
347347 static std::unique_ptr ParseTopLevelExpr() {
348348 if (auto E = ParseExpression()) {
349349 // Make an anonymous proto.
350 auto Proto =
351 llvm::make_unique("", std::vector());
350 auto Proto = llvm::make_unique("__anon_expr",
351 std::vector());
352352 return llvm::make_unique(std::move(Proto), std::move(E));
353353 }
354354 return nullptr;
0 #include "llvm/ADT/STLExtras.h"
1 #include "llvm/IR/Verifier.h"
2 #include "llvm/IR/DerivedTypes.h"
31 #include "llvm/IR/IRBuilder.h"
42 #include "llvm/IR/LLVMContext.h"
53 #include "llvm/IR/Module.h"
4 #include "llvm/IR/Verifier.h"
65 #include
76 #include
87 #include
9089 class ExprAST {
9190 public:
9291 virtual ~ExprAST() {}
93 virtual Value *Codegen() = 0;
92 virtual Value *codegen() = 0;
9493 };
9594
9695 /// NumberExprAST - Expression class for numeric literals like "1.0".
9998
10099 public:
101100 NumberExprAST(double Val) : Val(Val) {}
102 Value *Codegen() override;
101 Value *codegen() override;
103102 };
104103
105104 /// VariableExprAST - Expression class for referencing a variable, like "a".
108107
109108 public:
110109 VariableExprAST(const std::string &Name) : Name(Name) {}
111 Value *Codegen() override;
110 Value *codegen() override;
112111 };
113112
114113 /// BinaryExprAST - Expression class for a binary operator.
120119 BinaryExprAST(char Op, std::unique_ptr LHS,
121120 std::unique_ptr RHS)
122121 : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {}
123 Value *Codegen() override;
122 Value *codegen() override;
124123 };
125124
126125 /// CallExprAST - Expression class for function calls.
132131 CallExprAST(const std::string &Callee,
133132 std::vector> Args)
134133 : Callee(Callee), Args(std::move(Args)) {}
135 Value *Codegen() override;
134 Value *codegen() override;
136135 };
137136
138137 /// PrototypeAST - This class represents the "prototype" for a function,
145144 public:
146145 PrototypeAST(const std::string &Name, std::vector Args)
147146 : Name(Name), Args(std::move(Args)) {}
148 Function *Codegen();
147 Function *codegen();
148 const std::string &getName() const { return Name; }
149149 };
150150
151151 /// FunctionAST - This class represents a function definition itself.
157157 FunctionAST(std::unique_ptr Proto,
158158 std::unique_ptr Body)
159159 : Proto(std::move(Proto)), Body(std::move(Body)) {}
160 Function *Codegen();
160 Function *codegen();
161161 };
162162 } // end anonymous namespace
163163
193193 return nullptr;
194194 }
195195 std::unique_ptr ErrorP(const char *Str) {
196 Error(Str);
197 return nullptr;
198 }
199 std::unique_ptr ErrorF(const char *Str) {
200196 Error(Str);
201197 return nullptr;
202198 }
364360 static std::unique_ptr ParseTopLevelExpr() {
365361 if (auto E = ParseExpression()) {
366362 // Make an anonymous proto.
367 auto Proto =
368 llvm::make_unique("", std::vector());
363 auto Proto = llvm::make_unique("__anon_expr",
364 std::vector());
369365 return llvm::make_unique(std::move(Proto), std::move(E));
370366 }
371367 return nullptr;
381377 // Code Generation
382378 //===----------------------------------------------------------------------===//
383379
380 static std::unique_ptr TheModule;
381 static IRBuilder<> Builder(getGlobalContext());
382 static std::map NamedValues;
383
384384 Value *ErrorV(const char *Str) {
385385 Error(Str);
386386 return nullptr;
387387 }
388388
389 static Module *TheModule;
390 static IRBuilder<> Builder(getGlobalContext());
391 static std::map NamedValues;
392
393 Value *NumberExprAST::Codegen() {
389 Value *NumberExprAST::codegen() {
394390 return ConstantFP::get(getGlobalContext(), APFloat(Val));
395391 }
396392
397 Value *VariableExprAST::Codegen() {
393 Value *VariableExprAST::codegen() {
398394 // Look this variable up in the function.
399395 Value *V = NamedValues[Name];
400396 if (!V)
402398 return V;
403399 }
404400
405 Value *BinaryExprAST::Codegen() {
406 Value *L = LHS->Codegen();
407 Value *R = RHS->Codegen();
401 Value *BinaryExprAST::codegen() {
402 Value *L = LHS->codegen();
403 Value *R = RHS->codegen();
408404 if (!L || !R)
409405 return nullptr;
410406
425421 }
426422 }
427423
428 Value *CallExprAST::Codegen() {
424 Value *CallExprAST::codegen() {
429425 // Look up the name in the global module table.
430426 Function *CalleeF = TheModule->getFunction(Callee);
431427 if (!CalleeF)
437433
438434 std::vector ArgsV;
439435 for (unsigned i = 0, e = Args.size(); i != e; ++i) {
440 ArgsV.push_back(Args[i]->Codegen());
436 ArgsV.push_back(Args[i]->codegen());
441437 if (!ArgsV.back())
442438 return nullptr;
443439 }
445441 return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
446442 }
447443
448 Function *PrototypeAST::Codegen() {
444 Function *PrototypeAST::codegen() {
449445 // Make the function type: double(double,double) etc.
450446 std::vector Doubles(Args.size(),
451447 Type::getDoubleTy(getGlobalContext()));
453449 FunctionType::get(Type::getDoubleTy(getGlobalContext()), Doubles, false);
454450
455451 Function *F =
456 Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
457
458 // If F conflicted, there was already something named 'Name'. If it has a
459 // body, don't allow redefinition or reextern.
460 if (F->getName() != Name) {
461 // Delete the one we just made and get the existing one.
462 F->eraseFromParent();
463 F = TheModule->getFunction(Name);
464
465 // If F already has a body, reject this.
466 if (!F->empty()) {
467 ErrorF("redefinition of function");
468 return nullptr;
469 }
470
471 // If F took a different number of args, reject.
472 if (F->arg_size() != Args.size()) {
473 ErrorF("redefinition of function with different # args");
474 return nullptr;
475 }
476 }
452 Function::Create(FT, Function::ExternalLinkage, Name, TheModule.get());
477453
478454 // Set names for all arguments.
479455 unsigned Idx = 0;
480 for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size();
481 ++AI, ++Idx) {
482 AI->setName(Args[Idx]);
483
484 // Add arguments to variable symbol table.
485 NamedValues[Args[Idx]] = AI;
486 }
456 for (auto &Arg : F->args())
457 Arg.setName(Args[Idx++]);
487458
488459 return F;
489460 }
490461
491 Function *FunctionAST::Codegen() {
492 NamedValues.clear();
493
494 Function *TheFunction = Proto->Codegen();
462 Function *FunctionAST::codegen() {
463 // First, check for an existing function from a previous 'extern' declaration.
464 Function *TheFunction = TheModule->getFunction(Proto->getName());
465
466 if (!TheFunction)
467 TheFunction = Proto->codegen();
468
495469 if (!TheFunction)
496470 return nullptr;
497471
499473 BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
500474 Builder.SetInsertPoint(BB);
501475
502 if (Value *RetVal = Body->Codegen()) {
476 // Record the function arguments in the NamedValues map.
477 NamedValues.clear();
478 for (auto &Arg : TheFunction->args())
479 NamedValues[Arg.getName()] = &Arg;
480
481 if (Value *RetVal = Body->codegen()) {
503482 // Finish off the function.
504483 Builder.CreateRet(RetVal);
505484
520499
521500 static void HandleDefinition() {
522501 if (auto FnAST = ParseDefinition()) {
523 if (auto *FnIR = FnAST->Codegen()) {
502 if (auto *FnIR = FnAST->codegen()) {
524503 fprintf(stderr, "Read function definition:");
525504 FnIR->dump();
526505 }
532511
533512 static void HandleExtern() {
534513 if (auto ProtoAST = ParseExtern()) {
535 if (auto *FnIR = ProtoAST->Codegen()) {
514 if (auto *FnIR = ProtoAST->codegen()) {
536515 fprintf(stderr, "Read extern: ");
537516 FnIR->dump();
538517 }
545524 static void HandleTopLevelExpression() {
546525 // Evaluate a top-level expression into an anonymous function.
547526 if (auto FnAST = ParseTopLevelExpr()) {
548 if (auto *FnIR = FnAST->Codegen()) {
527 if (auto *FnIR = FnAST->codegen()) {
549528 fprintf(stderr, "Read top-level expression:");
550529 FnIR->dump();
551530 }
583562 //===----------------------------------------------------------------------===//
584563
585564 int main() {
586 LLVMContext &Context = getGlobalContext();
587
588565 // Install standard binary operators.
589566 // 1 is lowest precedence.
590567 BinopPrecedence['<'] = 10;
597574 getNextToken();
598575
599576 // Make the module, which holds all the code.
600 std::unique_ptr Owner =
601 llvm::make_unique("my cool jit", Context);
602 TheModule = Owner.get();
577 TheModule = llvm::make_unique("my cool jit", getGlobalContext());
603578
604579 // Run the main "interpreter loop" now.
605580 MainLoop();
0 #include "llvm/ADT/STLExtras.h"
11 #include "llvm/Analysis/BasicAliasAnalysis.h"
22 #include "llvm/Analysis/Passes.h"
3 #include "llvm/ExecutionEngine/ExecutionEngine.h"
4 #include "llvm/ExecutionEngine/MCJIT.h"
5 #include "llvm/ExecutionEngine/SectionMemoryManager.h"
6 #include "llvm/IR/DataLayout.h"
7 #include "llvm/IR/DerivedTypes.h"
83 #include "llvm/IR/IRBuilder.h"
94 #include "llvm/IR/LLVMContext.h"
105 #include "llvm/IR/LegacyPassManager.h"
1712 #include
1813 #include
1914 #include
15 #include "../include/KaleidoscopeJIT.h"
16
2017 using namespace llvm;
18 using namespace llvm::orc;
2119
2220 //===----------------------------------------------------------------------===//
2321 // Lexer
9997 class ExprAST {
10098 public:
10199 virtual ~ExprAST() {}
102 virtual Value *Codegen() = 0;
100 virtual Value *codegen() = 0;
103101 };
104102
105103 /// NumberExprAST - Expression class for numeric literals like "1.0".
108106
109107 public:
110108 NumberExprAST(double Val) : Val(Val) {}
111 Value *Codegen() override;
109 Value *codegen() override;
112110 };
113111
114112 /// VariableExprAST - Expression class for referencing a variable, like "a".
117115
118116 public:
119117 VariableExprAST(const std::string &Name) : Name(Name) {}
120 Value *Codegen() override;
118 Value *codegen() override;
121119 };
122120
123121 /// BinaryExprAST - Expression class for a binary operator.
129127 BinaryExprAST(char Op, std::unique_ptr LHS,
130128 std::unique_ptr RHS)
131129 : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {}
132 Value *Codegen() override;
130 Value *codegen() override;
133131 };
134132
135133 /// CallExprAST - Expression class for function calls.
141139 CallExprAST(const std::string &Callee,
142140 std::vector> Args)
143141 : Callee(Callee), Args(std::move(Args)) {}
144 Value *Codegen() override;
142 Value *codegen() override;
145143 };
146144
147145 /// PrototypeAST - This class represents the "prototype" for a function,
154152 public:
155153 PrototypeAST(const std::string &Name, std::vector Args)
156154 : Name(Name), Args(std::move(Args)) {}
157 Function *Codegen();
155 Function *codegen();
156 const std::string &getName() const { return Name; }
158157 };
159158
160159 /// FunctionAST - This class represents a function definition itself.
166165 FunctionAST(std::unique_ptr Proto,
167166 std::unique_ptr Body)
168167 : Proto(std::move(Proto)), Body(std::move(Body)) {}
169 Function *Codegen();
168 Function *codegen();
170169 };
171170 } // end anonymous namespace
172171
202201 return nullptr;
203202 }
204203 std::unique_ptr ErrorP(const char *Str) {
205 Error(Str);
206 return nullptr;
207 }
208 std::unique_ptr ErrorF(const char *Str) {
209204 Error(Str);
210205 return nullptr;
211206 }
373368 static std::unique_ptr ParseTopLevelExpr() {
374369 if (auto E = ParseExpression()) {
375370 // Make an anonymous proto.
376 auto Proto =
377 llvm::make_unique("", std::vector());
371 auto Proto = llvm::make_unique("__anon_expr",
372 std::vector());
378373 return llvm::make_unique(std::move(Proto), std::move(E));
379374 }
380375 return nullptr;
387382 }
388383
389384 //===----------------------------------------------------------------------===//
390 // Quick and dirty hack
391 //===----------------------------------------------------------------------===//
392
393 // FIXME: Obviously we can do better than this
394 std::string GenerateUniqueName(const char *root) {
395 static int i = 0;
396 char s[16];
397 sprintf(s, "%s%d", root, i++);
398 std::string S = s;
399 return S;
400 }
401
402 std::string MakeLegalFunctionName(std::string Name) {
403 std::string NewName;
404 if (!Name.length())
405 return GenerateUniqueName("anon_func_");
406
407 // Start with what we have
408 NewName = Name;
409
410 // Look for a numberic first character
411 if (NewName.find_first_of("0123456789") == 0) {
412 NewName.insert(0, 1, 'n');
413 }
414
415 // Replace illegal characters with their ASCII equivalent
416 std::string legal_elements =
417 "_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
418 size_t pos;
419 while ((pos = NewName.find_first_not_of(legal_elements)) !=
420 std::string::npos) {
421 char old_c = NewName.at(pos);
422 char new_str[16];
423 sprintf(new_str, "%d", (int)old_c);
424 NewName = NewName.replace(pos, 1, new_str);
425 }
426
427 return NewName;
428 }
429
430 //===----------------------------------------------------------------------===//
431 // MCJIT helper class
432 //===----------------------------------------------------------------------===//
433
434 class MCJITHelper {
435 public:
436 MCJITHelper(LLVMContext &C) : Context(C), OpenModule(NULL) {}
437 ~MCJITHelper();
438
439 Function *getFunction(const std::string FnName);
440 Module *getModuleForNewFunction();
441 void *getPointerToFunction(Function *F);
442 void *getSymbolAddress(const std::string &Name);
443 void dump();
444
445 private:
446 typedef std::vector ModuleVector;
447 typedef std::vector EngineVector;
448
449 LLVMContext &Context;
450 Module *OpenModule;
451 ModuleVector Modules;
452 EngineVector Engines;
453 };
454
455 class HelpingMemoryManager : public SectionMemoryManager {
456 HelpingMemoryManager(const HelpingMemoryManager &) = delete;
457 void operator=(const HelpingMemoryManager &) = delete;
458
459 public:
460 HelpingMemoryManager(MCJITHelper *Helper) : MasterHelper(Helper) {}
461 ~HelpingMemoryManager() override {}
462
463 /// This method returns the address of the specified symbol.
464 /// Our implementation will attempt to find symbols in other
465 /// modules associated with the MCJITHelper to cross link symbols
466 /// from one generated module to another.
467 uint64_t getSymbolAddress(const std::string &Name) override;
468
469 private:
470 MCJITHelper *MasterHelper;
471 };
472
473 uint64_t HelpingMemoryManager::getSymbolAddress(const std::string &Name) {
474 uint64_t FnAddr = SectionMemoryManager::getSymbolAddress(Name);
475 if (FnAddr)
476 return FnAddr;
477
478 uint64_t HelperFun = (uint64_t)MasterHelper->getSymbolAddress(Name);
479 if (!HelperFun)
480 report_fatal_error("Program used extern function '" + Name +
481 "' which could not be resolved!");
482
483 return HelperFun;
484 }
485
486 MCJITHelper::~MCJITHelper() {
487 if (OpenModule)
488 delete OpenModule;
489 EngineVector::iterator begin = Engines.begin();
490 EngineVector::iterator end = Engines.end();
491 EngineVector::iterator it;
492 for (it = begin; it != end; ++it)
493 delete *it;
494 }
495
496 Function *MCJITHelper::getFunction(const std::string FnName) {
497 ModuleVector::iterator begin = Modules.begin();
498 ModuleVector::iterator end = Modules.end();
499 ModuleVector::iterator it;
500 for (it = begin; it != end; ++it) {
501 Function *F = (*it)->getFunction(FnName);
502 if (F) {
503 if (*it == OpenModule)
504 return F;
505
506 assert(OpenModule != NULL);
507
508 // This function is in a module that has already been JITed.
509 // We need to generate a new prototype for external linkage.
510 Function *PF = OpenModule->getFunction(FnName);
511 if (PF && !PF->empty()) {
512 ErrorF("redefinition of function across modules");
513 return nullptr;
514 }
515
516 // If we don't have a prototype yet, create one.
517 if (!PF)
518 PF = Function::Create(F->getFunctionType(), Function::ExternalLinkage,
519 FnName, OpenModule);
520 return PF;
521 }
522 }
523 return NULL;
524 }
525
526 Module *MCJITHelper::getModuleForNewFunction() {
527 // If we have a Module that hasn't been JITed, use that.
528 if (OpenModule)
529 return OpenModule;
530
531 // Otherwise create a new Module.
532 std::string ModName = GenerateUniqueName("mcjit_module_");
533 Module *M = new Module(ModName, Context);
534 Modules.push_back(M);
535 OpenModule = M;
536 return M;
537 }
538
539 void *MCJITHelper::getPointerToFunction(Function *F) {
540 // See if an existing instance of MCJIT has this function.
541 EngineVector::iterator begin = Engines.begin();
542 EngineVector::iterator end = Engines.end();
543 EngineVector::iterator it;
544 for (it = begin; it != end; ++it) {
545 void *P = (*it)->getPointerToFunction(F);
546 if (P)
547 return P;
548 }
549
550 // If we didn't find the function, see if we can generate it.
551 if (OpenModule) {
552 std::string ErrStr;
553 ExecutionEngine *NewEngine =
554 EngineBuilder(std::unique_ptr(OpenModule))
555 .setErrorStr(&ErrStr)
556 .setMCJITMemoryManager(std::unique_ptr(
557 new HelpingMemoryManager(this)))
558 .create();
559 if (!NewEngine) {
560 fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str());
561 exit(1);
562 }
563
564 // Create a function pass manager for this engine
565 auto *FPM = new legacy::FunctionPassManager(OpenModule);
566
567 // Set up the optimizer pipeline. Start with registering info about how the
568 // target lays out data structures.
569 OpenModule->setDataLayout(NewEngine->getDataLayout());
570 // Provide basic AliasAnalysis support for GVN.
571 FPM->add(createBasicAliasAnalysisPass());
572 // Promote allocas to registers.
573 FPM->add(createPromoteMemoryToRegisterPass());
574 // Do simple "peephole" optimizations and bit-twiddling optzns.
575 FPM->add(createInstructionCombiningPass());
576 // Reassociate expressions.
577 FPM->add(createReassociatePass());
578 // Eliminate Common SubExpressions.
579 FPM->add(createGVNPass());
580 // Simplify the control flow graph (deleting unreachable blocks, etc).
581 FPM->add(createCFGSimplificationPass());
582 FPM->doInitialization();
583
584 // For each function in the module
585 Module::iterator it;
586 Module::iterator end = OpenModule->end();
587 for (it = OpenModule->begin(); it != end; ++it) {
588 // Run the FPM on this function
589 FPM->run(*it);
590 }
591
592 // We don't need this anymore
593 delete FPM;
594
595 OpenModule = NULL;
596 Engines.push_back(NewEngine);
597 NewEngine->finalizeObject();
598 return NewEngine->getPointerToFunction(F);
599 }
600 return NULL;
601 }
602
603 void *MCJITHelper::getSymbolAddress(const std::string &Name) {
604 // Look for the symbol in each of our execution engines.
605 EngineVector::iterator begin = Engines.begin();
606 EngineVector::iterator end = Engines.end();
607 EngineVector::iterator it;
608 for (it = begin; it != end; ++it) {
609 uint64_t FAddr = (*it)->getFunctionAddress(Name);
610 if (FAddr) {
611 return (void *)FAddr;
612 }
613 }
614 return NULL;
615 }
616
617 void MCJITHelper::dump() {
618 ModuleVector::iterator begin = Modules.begin();
619 ModuleVector::iterator end = Modules.end();
620 ModuleVector::iterator it;
621 for (it = begin; it != end; ++it)
622 (*it)->dump();
623 }
624 //===----------------------------------------------------------------------===//
625385 // Code Generation
626386 //===----------------------------------------------------------------------===//
627387
628 static MCJITHelper *JITHelper;
388 static std::unique_ptr TheModule;
629389 static IRBuilder<> Builder(getGlobalContext());
630390 static std::map NamedValues;
391 static std::unique_ptr TheFPM;
392 static std::unique_ptr TheJIT;
393 static std::map> FunctionProtos;
631394
632395 Value *ErrorV(const char *Str) {
633396 Error(Str);
634397 return nullptr;
635398 }
636399
637 Value *NumberExprAST::Codegen() {
400 Function *getFunction(std::string Name) {
401 // First, see if the function has already been added to the current module.
402 if (auto *F = TheModule->getFunction(Name))
403 return F;
404
405 // If not, check whether we can codegen the declaration from some existing
406 // prototype.
407 auto FI = FunctionProtos.find(Name);
408 if (FI != FunctionProtos.end())
409 return FI->second->codegen();
410
411 // If no existing prototype exists, return null.
412 return nullptr;
413 }
414
415 Value *NumberExprAST::codegen() {
638416 return ConstantFP::get(getGlobalContext(), APFloat(Val));
639417 }
640418
641 Value *VariableExprAST::Codegen() {
419 Value *VariableExprAST::codegen() {
642420 // Look this variable up in the function.
643421 Value *V = NamedValues[Name];
644422 if (!V)
646424 return V;
647425 }
648426
649 Value *BinaryExprAST::Codegen() {
650 Value *L = LHS->Codegen();
651 Value *R = RHS->Codegen();
427 Value *BinaryExprAST::codegen() {
428 Value *L = LHS->codegen();
429 Value *R = RHS->codegen();
652430 if (!L || !R)
653431 return nullptr;
654432
669447 }
670448 }
671449
672 Value *CallExprAST::Codegen() {
450 Value *CallExprAST::codegen() {
673451 // Look up the name in the global module table.
674 Function *CalleeF = JITHelper->getFunction(Callee);
452 Function *CalleeF = getFunction(Callee);
675453 if (!CalleeF)
676454 return ErrorV("Unknown function referenced");
677455
681459
682460 std::vector ArgsV;
683461 for (unsigned i = 0, e = Args.size(); i != e; ++i) {
684 ArgsV.push_back(Args[i]->Codegen());
462 ArgsV.push_back(Args[i]->codegen());
685463 if (!ArgsV.back())
686464 return nullptr;
687465 }
689467 return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
690468 }
691469
692 Function *PrototypeAST::Codegen() {
470 Function *PrototypeAST::codegen() {
693471 // Make the function type: double(double,double) etc.
694472 std::vector Doubles(Args.size(),
695473 Type::getDoubleTy(getGlobalContext()));
696474 FunctionType *FT =
697475 FunctionType::get(Type::getDoubleTy(getGlobalContext()), Doubles, false);
698476
699 std::string FnName = MakeLegalFunctionName(Name);
700
701 Module *M = JITHelper->getModuleForNewFunction();
702
703 Function *F = Function::Create(FT, Function::ExternalLinkage, FnName, M);
704
705 // If F conflicted, there was already something named 'Name'. If it has a
706 // body, don't allow redefinition or reextern.
707 if (F->getName() != FnName) {
708 // Delete the one we just made and get the existing one.
709 F->eraseFromParent();
710 F = JITHelper->getFunction(Name);
711 // If F already has a body, reject this.
712 if (!F->empty()) {
713 ErrorF("redefinition of function");
714 return nullptr;
715 }
716
717 // If F took a different number of args, reject.
718 if (F->arg_size() != Args.size()) {
719 ErrorF("redefinition of function with different # args");
720 return nullptr;
721 }
722 }
477 Function *F =
478 Function::Create(FT, Function::ExternalLinkage, Name, TheModule.get());
723479
724480 // Set names for all arguments.
725481 unsigned Idx = 0;
726 for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size();
727 ++AI, ++Idx) {
728 AI->setName(Args[Idx]);
729
730 // Add arguments to variable symbol table.
731 NamedValues[Args[Idx]] = AI;
732 }
482 for (auto &Arg : F->args())
483 Arg.setName(Args[Idx++]);
733484
734485 return F;
735486 }
736487
737 Function *FunctionAST::Codegen() {
738 NamedValues.clear();
739
740 Function *TheFunction = Proto->Codegen();
488 Function *FunctionAST::codegen() {
489 // Transfer ownership of the prototype to the FunctionProtos map, but keep a
490 // reference to it for use below.
491 auto &P = *Proto;
492 FunctionProtos[Proto->getName()] = std::move(Proto);
493 Function *TheFunction = getFunction(P.getName());
741494 if (!TheFunction)
742495 return nullptr;
743496
745498 BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
746499 Builder.SetInsertPoint(BB);
747500
748 if (Value *RetVal = Body->Codegen()) {
501 // Record the function arguments in the NamedValues map.
502 NamedValues.clear();
503 for (auto &Arg : TheFunction->args())
504 NamedValues[Arg.getName()] = &Arg;
505
506 if (Value *RetVal = Body->codegen()) {
749507 // Finish off the function.
750508 Builder.CreateRet(RetVal);
751509
752510 // Validate the generated code, checking for consistency.
753511 verifyFunction(*TheFunction);
512
513 // Run the optimizer on the function.
514 TheFPM->run(*TheFunction);
754515
755516 return TheFunction;
756517 }
764525 // Top-Level parsing and JIT Driver
765526 //===----------------------------------------------------------------------===//
766527
528 static void InitializeModuleAndPassManager() {
529 // Open a new module.
530 TheModule = llvm::make_unique("my cool jit", getGlobalContext());
531 TheModule->setDataLayout(TheJIT->getTargetMachine().createDataLayout());
532
533 // Create a new pass manager attached to it.
534 TheFPM = llvm::make_unique(TheModule.get());
535
536 // Provide basic AliasAnalysis support for GVN.
537 TheFPM->add(createBasicAliasAnalysisPass());
538 // Do simple "peephole" optimizations and bit-twiddling optzns.
539 TheFPM->add(createInstructionCombiningPass());
540 // Reassociate expressions.
541 TheFPM->add(createReassociatePass());
542 // Eliminate Common SubExpressions.
543 TheFPM->add(createGVNPass());
544 // Simplify the control flow graph (deleting unreachable blocks, etc).
545 TheFPM->add(createCFGSimplificationPass());
546
547 TheFPM->doInitialization();
548 }
549
767550 static void HandleDefinition() {
768551 if (auto FnAST = ParseDefinition()) {
769 if (auto *FnIR = FnAST->Codegen()) {
552 if (auto *FnIR = FnAST->codegen()) {
770553 fprintf(stderr, "Read function definition:");
771554 FnIR->dump();
555 TheJIT->addModule(std::move(TheModule));
556 InitializeModuleAndPassManager();
772557 }
773558 } else {
774559 // Skip token for error recovery.
778563
779564 static void HandleExtern() {
780565 if (auto ProtoAST = ParseExtern()) {
781 if (auto *FnIR = ProtoAST->Codegen()) {
566 if (auto *FnIR = ProtoAST->codegen()) {
782567 fprintf(stderr, "Read extern: ");
783568 FnIR->dump();
569 FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST);
784570 }
785571 } else {
786572 // Skip token for error recovery.
791577 static void HandleTopLevelExpression() {
792578 // Evaluate a top-level expression into an anonymous function.
793579 if (auto FnAST = ParseTopLevelExpr()) {
794 if (auto *FnIR = FnAST->Codegen()) {
795 // JIT the function, returning a function pointer.
796 void *FPtr = JITHelper->getPointerToFunction(FnIR);
797
798 // Cast it to the right type (takes no arguments, returns a double) so we
799 // can call it as a native function.
800 double (*FP)() = (double (*)())(intptr_t)FPtr;
580 if (FnAST->codegen()) {
581
582 // JIT the module containing the anonymous expression, keeping a handle so
583 // we can free it later.
584 auto H = TheJIT->addModule(std::move(TheModule));
585 InitializeModuleAndPassManager();
586
587 // Search the JIT for the __anon_expr symbol.
588 auto ExprSymbol = TheJIT->findSymbol("__anon_expr");
589 assert(ExprSymbol && "Function not found");
590
591 // Get the symbol's address and cast it to the right type (takes no
592 // arguments, returns a double) so we can call it as a native function.
593 double (*FP)() = (double (*)())(intptr_t)ExprSymbol.getAddress();
801594 fprintf(stderr, "Evaluated to %f\n", FP());
595
596 // Delete the anonymous expression module from the JIT.
597 TheJIT->removeModule(H);
802598 }
803599 } else {
804600 // Skip token for error recovery.
853649 InitializeNativeTarget();
854650 InitializeNativeTargetAsmPrinter();
855651 InitializeNativeTargetAsmParser();
856 LLVMContext &Context = getGlobalContext();
857 JITHelper = new MCJITHelper(Context);
858652
859653 // Install standard binary operators.
860654 // 1 is lowest precedence.
867661 fprintf(stderr, "ready> ");
868662 getNextToken();
869663
664 TheJIT = llvm::make_unique();
665
666 InitializeModuleAndPassManager();
667
870668 // Run the main "interpreter loop" now.
871669 MainLoop();
872670
873 // Print out all of the generated code.
874 JITHelper->dump();
875
876671 return 0;
877672 }
0 #include "llvm/ADT/STLExtras.h"
11 #include "llvm/Analysis/BasicAliasAnalysis.h"
22 #include "llvm/Analysis/Passes.h"
3 #include "llvm/ExecutionEngine/ExecutionEngine.h"
4 #include "llvm/ExecutionEngine/MCJIT.h"
5 #include "llvm/ExecutionEngine/SectionMemoryManager.h"
6 #include "llvm/IR/DataLayout.h"
7 #include "llvm/IR/DerivedTypes.h"
83 #include "llvm/IR/IRBuilder.h"
94 #include "llvm/IR/LLVMContext.h"
105 #include "llvm/IR/LegacyPassManager.h"
1712 #include
1813 #include
1914 #include
15 #include "../include/KaleidoscopeJIT.h"
16
2017 using namespace llvm;
18 using namespace llvm::orc;
2119
2220 //===----------------------------------------------------------------------===//
2321 // Lexer
116114 class ExprAST {
117115 public:
118116 virtual ~ExprAST() {}
119 virtual Value *Codegen() = 0;
117 virtual Value *codegen() = 0;
120118 };
121119
122120 /// NumberExprAST - Expression class for numeric literals like "1.0".
125123
126124 public:
127125 NumberExprAST(double Val) : Val(Val) {}
128 Value *Codegen() override;
126 Value *codegen() override;
129127 };
130128
131129 /// VariableExprAST - Expression class for referencing a variable, like "a".
134132
135133 public:
136134 VariableExprAST(const std::string &Name) : Name(Name) {}
137 Value *Codegen() override;
135 Value *codegen() override;
138136 };
139137
140138 /// BinaryExprAST - Expression class for a binary operator.
146144 BinaryExprAST(char Op, std::unique_ptr LHS,
147145 std::unique_ptr RHS)
148146 : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {}
149 Value *Codegen() override;
147 Value *codegen() override;
150148 };
151149
152150 /// CallExprAST - Expression class for function calls.
158156 CallExprAST(const std::string &Callee,
159157 std::vector> Args)
160158 : Callee(Callee), Args(std::move(Args)) {}
161 Value *Codegen() override;
159 Value *codegen() override;
162160 };
163161
164162 /// IfExprAST - Expression class for if/then/else.
169167 IfExprAST(std::unique_ptr Cond, std::unique_ptr Then,
170168 std::unique_ptr Else)
171169 : Cond(std::move(Cond)), Then(std::move(Then)), Else(std::move(Else)) {}
172 Value *Codegen() override;
170 Value *codegen() override;
173171 };
174172
175173 /// ForExprAST - Expression class for for/in.
183181 std::unique_ptr Body)
184182 : VarName(VarName), Start(std::move(Start)), End(std::move(End)),
185183 Step(std::move(Step)), Body(std::move(Body)) {}
186 Value *Codegen() override;
184 Value *codegen() override;
187185 };
188186
189187 /// PrototypeAST - This class represents the "prototype" for a function,
196194 public:
197195 PrototypeAST(const std::string &Name, std::vector Args)
198196 : Name(Name), Args(std::move(Args)) {}
199 Function *Codegen();
197 Function *codegen();
198 const std::string &getName() const { return Name; }
200199 };
201200
202201 /// FunctionAST - This class represents a function definition itself.
208207 FunctionAST(std::unique_ptr Proto,
209208 std::unique_ptr Body)
210209 : Proto(std::move(Proto)), Body(std::move(Body)) {}
211 Function *Codegen();
210 Function *codegen();
212211 };
213212 } // end anonymous namespace
214213
244243 return nullptr;
245244 }
246245 std::unique_ptr ErrorP(const char *Str) {
247 Error(Str);
248 return nullptr;
249 }
250 std::unique_ptr ErrorF(const char *Str) {
251246 Error(Str);
252247 return nullptr;
253248 }
497492 static std::unique_ptr ParseTopLevelExpr() {
498493 if (auto E = ParseExpression()) {
499494 // Make an anonymous proto.
500 auto Proto =
501 llvm::make_unique("", std::vector());
495 auto Proto = llvm::make_unique("__anon_expr",
496 std::vector());
502497 return llvm::make_unique(std::move(Proto), std::move(E));
503498 }
504499 return nullptr;
514509 // Code Generation
515510 //===----------------------------------------------------------------------===//
516511
517 static Module *TheModule;
512 static std::unique_ptr TheModule;
518513 static IRBuilder<> Builder(getGlobalContext());
519514 static std::map NamedValues;
520 static legacy::FunctionPassManager *TheFPM;
515 static std::unique_ptr TheFPM;
516 static std::unique_ptr TheJIT;
517 static std::map> FunctionProtos;
521518
522519 Value *ErrorV(const char *Str) {
523520 Error(Str);
524521 return nullptr;
525522 }
526523
527 Value *NumberExprAST::Codegen() {
524 Function *getFunction(std::string Name) {
525 // First, see if the function has already been added to the current module.
526 if (auto *F = TheModule->getFunction(Name))
527 return F;
528
529 // If not, check whether we can codegen the declaration from some existing
530 // prototype.
531 auto FI = FunctionProtos.find(Name);
532 if (FI != FunctionProtos.end())
533 return FI->second->codegen();
534
535 // If no existing prototype exists, return null.
536 return nullptr;
537 }
538
539 Value *NumberExprAST::codegen() {
528540 return ConstantFP::get(getGlobalContext(), APFloat(Val));
529541 }
530542
531 Value *VariableExprAST::Codegen() {
543 Value *VariableExprAST::codegen() {
532544 // Look this variable up in the function.
533545 Value *V = NamedValues[Name];
534546 if (!V)
536548 return V;
537549 }
538550
539 Value *BinaryExprAST::Codegen() {
540 Value *L = LHS->Codegen();
541 Value *R = RHS->Codegen();
551 Value *BinaryExprAST::codegen() {
552 Value *L = LHS->codegen();
553 Value *R = RHS->codegen();
542554 if (!L || !R)
543555 return nullptr;
544556
559571 }
560572 }
561573
562 Value *CallExprAST::Codegen() {
574 Value *CallExprAST::codegen() {
563575 // Look up the name in the global module table.
564 Function *CalleeF = TheModule->getFunction(Callee);
576 Function *CalleeF = getFunction(Callee);
565577 if (!CalleeF)
566578 return ErrorV("Unknown function referenced");
567579
571583
572584 std::vector ArgsV;
573585 for (unsigned i = 0, e = Args.size(); i != e; ++i) {
574 ArgsV.push_back(Args[i]->Codegen());
586 ArgsV.push_back(Args[i]->codegen());
575587 if (!ArgsV.back())
576588 return nullptr;
577589 }
579591 return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
580592 }
581593
582 Value *IfExprAST::Codegen() {
583 Value *CondV = Cond->Codegen();
594 Value *IfExprAST::codegen() {
595 Value *CondV = Cond->codegen();
584596 if (!CondV)
585597 return nullptr;
586598
602614 // Emit then value.
603615 Builder.SetInsertPoint(ThenBB);
604616
605 Value *ThenV = Then->Codegen();
617 Value *ThenV = Then->codegen();
606618 if (!ThenV)
607619 return nullptr;
608620
614626 TheFunction->getBasicBlockList().push_back(ElseBB);
615627 Builder.SetInsertPoint(ElseBB);
616628
617 Value *ElseV = Else->Codegen();
629 Value *ElseV = Else->codegen();
618630 if (!ElseV)
619631 return nullptr;
620632
648660 // endcond = endexpr
649661 // br endcond, loop, endloop
650662 // outloop:
651 Value *ForExprAST::Codegen() {
663 Value *ForExprAST::codegen() {
652664 // Emit the start code first, without 'variable' in scope.
653 Value *StartVal = Start->Codegen();
665 Value *StartVal = Start->codegen();
654666 if (!StartVal)
655667 return nullptr;
656668
680692 // Emit the body of the loop. This, like any other expr, can change the
681693 // current BB. Note that we ignore the value computed by the body, but don't
682694 // allow an error.
683 if (!Body->Codegen())
695 if (!Body->codegen())
684696 return nullptr;
685697
686698 // Emit the step value.
687699 Value *StepVal = nullptr;
688700 if (Step) {
689 StepVal = Step->Codegen();
701 StepVal = Step->codegen();
690702 if (!StepVal)
691703 return nullptr;
692704 } else {
697709 Value *NextVar = Builder.CreateFAdd(Variable, StepVal, "nextvar");
698710
699711 // Compute the end condition.
700 Value *EndCond = End->Codegen();
712 Value *EndCond = End->codegen();
701713 if (!EndCond)
702714 return nullptr;
703715
729741 return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
730742 }
731743
732 Function *PrototypeAST::Codegen() {
744 Function *PrototypeAST::codegen() {
733745 // Make the function type: double(double,double) etc.
734746 std::vector Doubles(Args.size(),
735747 Type::getDoubleTy(getGlobalContext()));
737749 FunctionType::get(Type::getDoubleTy(getGlobalContext()), Doubles, false);
738750
739751 Function *F =
740 Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
741
742 // If F conflicted, there was already something named 'Name'. If it has a
743 // body, don't allow redefinition or reextern.
744 if (F->getName() != Name) {
745 // Delete the one we just made and get the existing one.
746 F->eraseFromParent();
747 F = TheModule->getFunction(Name);
748
749 // If F already has a body, reject this.
750 if (!F->empty()) {
751 ErrorF("redefinition of function");
752 return nullptr;
753 }
754
755 // If F took a different number of args, reject.
756 if (F->arg_size() != Args.size()) {
757 ErrorF("redefinition of function with different # args");
758 return nullptr;
759 }
760 }
752 Function::Create(FT, Function::ExternalLinkage, Name, TheModule.get());
761753
762754 // Set names for all arguments.
763755 unsigned Idx = 0;
764 for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size();
765 ++AI, ++Idx) {
766 AI->setName(Args[Idx]);
767
768 // Add arguments to variable symbol table.
769 NamedValues[Args[Idx]] = AI;
770 }
756 for (auto &Arg : F->args())
757 Arg.setName(Args[Idx++]);
771758
772759 return F;
773760 }
774761
775 Function *FunctionAST::Codegen() {
776 NamedValues.clear();
777
778 Function *TheFunction = Proto->Codegen();
762 Function *FunctionAST::codegen() {
763 // Transfer ownership of the prototype to the FunctionProtos map, but keep a
764 // reference to it for use below.
765 auto &P = *Proto;
766 FunctionProtos[Proto->getName()] = std::move(Proto);
767 Function *TheFunction = getFunction(P.getName());
779768 if (!TheFunction)
780769 return nullptr;
781770
783772 BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
784773 Builder.SetInsertPoint(BB);
785774
786 if (Value *RetVal = Body->Codegen()) {
775 // Record the function arguments in the NamedValues map.
776 NamedValues.clear();
777 for (auto &Arg : TheFunction->args())
778 NamedValues[Arg.getName()] = &Arg;
779
780 if (Value *RetVal = Body->codegen()) {
787781 // Finish off the function.
788782 Builder.CreateRet(RetVal);
789783
790784 // Validate the generated code, checking for consistency.
791785 verifyFunction(*TheFunction);
792786
793 // Optimize the function.
787 // Run the optimizer on the function.
794788 TheFPM->run(*TheFunction);
795789
796790 return TheFunction;
805799 // Top-Level parsing and JIT Driver
806800 //===----------------------------------------------------------------------===//
807801
808 static ExecutionEngine *TheExecutionEngine;
802 static void InitializeModuleAndPassManager() {
803 // Open a new module.
804 TheModule = llvm::make_unique("my cool jit", getGlobalContext());
805 TheModule->setDataLayout(TheJIT->getTargetMachine().createDataLayout());
806
807 // Create a new pass manager attached to it.
808 TheFPM = llvm::make_unique(TheModule.get());
809
810 // Provide basic AliasAnalysis support for GVN.
811 TheFPM->add(createBasicAliasAnalysisPass());
812 // Do simple "peephole" optimizations and bit-twiddling optzns.
813 TheFPM->add(createInstructionCombiningPass());
814 // Reassociate expressions.
815 TheFPM->add(createReassociatePass());
816 // Eliminate Common SubExpressions.
817 TheFPM->add(createGVNPass());
818 // Simplify the control flow graph (deleting unreachable blocks, etc).
819 TheFPM->add(createCFGSimplificationPass());
820
821 TheFPM->doInitialization();
822 }
809823
810824 static void HandleDefinition() {
811825 if (auto FnAST = ParseDefinition()) {
812 if (auto *FnIR = FnAST->Codegen()) {
826 if (auto *FnIR = FnAST->codegen()) {
813827 fprintf(stderr, "Read function definition:");
814828 FnIR->dump();
829 TheJIT->addModule(std::move(TheModule));
830 InitializeModuleAndPassManager();
815831 }
816832 } else {
817833 // Skip token for error recovery.
821837
822838 static void HandleExtern() {
823839 if (auto ProtoAST = ParseExtern()) {
824 if (auto *FnIR = ProtoAST->Codegen()) {
840 if (auto *FnIR = ProtoAST->codegen()) {
825841 fprintf(stderr, "Read extern: ");
826842 FnIR->dump();
843 FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST);
827844 }
828845 } else {
829846 // Skip token for error recovery.
834851 static void HandleTopLevelExpression() {
835852 // Evaluate a top-level expression into an anonymous function.
836853 if (auto FnAST = ParseTopLevelExpr()) {
837 if (auto *FnIR = FnAST->Codegen()) {
838 TheExecutionEngine->finalizeObject();
839 // JIT the function, returning a function pointer.
840 void *FPtr = TheExecutionEngine->getPointerToFunction(FnIR);
841
842 // Cast it to the right type (takes no arguments, returns a double) so we
843 // can call it as a native function.
844 double (*FP)() = (double (*)())(intptr_t)FPtr;
854 if (FnAST->codegen()) {
855
856 // JIT the module containing the anonymous expression, keeping a handle so
857 // we can free it later.
858 auto H = TheJIT->addModule(std::move(TheModule));
859 InitializeModuleAndPassManager();
860
861 // Search the JIT for the __anon_expr symbol.
862 auto ExprSymbol = TheJIT->findSymbol("__anon_expr");
863 assert(ExprSymbol && "Function not found");
864
865 // Get the symbol's address and cast it to the right type (takes no
866 // arguments, returns a double) so we can call it as a native function.
867 double (*FP)() = (double (*)())(intptr_t)ExprSymbol.getAddress();
845868 fprintf(stderr, "Evaluated to %f\n", FP());
869
870 // Delete the anonymous expression module from the JIT.
871 TheJIT->removeModule(H);
846872 }
847873 } else {
848874 // Skip token for error recovery.
897923 InitializeNativeTarget();
898924 InitializeNativeTargetAsmPrinter();
899925 InitializeNativeTargetAsmParser();
900 LLVMContext &Context = getGlobalContext();
901926
902927 // Install standard binary operators.
903928 // 1 is lowest precedence.
910935 fprintf(stderr, "ready> ");
911936 getNextToken();
912937
913 // Make the module, which holds all the code.
914 std::unique_ptr Owner = make_unique("my cool jit", Context);
915 TheModule = Owner.get();
916
917 // Create the JIT. This takes ownership of the module.
918 std::string ErrStr;
919 TheExecutionEngine =
920 EngineBuilder(std::move(Owner))
921 .setErrorStr(&ErrStr)
922 .setMCJITMemoryManager(llvm::make_unique())
923 .create();
924 if (!TheExecutionEngine) {
925 fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str());
926 exit(1);
927 }
928
929 legacy::FunctionPassManager OurFPM(TheModule);
930
931 // Set up the optimizer pipeline. Start with registering info about how the
932 // target lays out data structures.
933 TheModule->setDataLayout(TheExecutionEngine->getDataLayout());
934 // Provide basic AliasAnalysis support for GVN.
935 OurFPM.add(createBasicAliasAnalysisPass());
936 // Do simple "peephole" optimizations and bit-twiddling optzns.
937 OurFPM.add(createInstructionCombiningPass());
938 // Reassociate expressions.
939 OurFPM.add(createReassociatePass());
940 // Eliminate Common SubExpressions.
941 OurFPM.add(createGVNPass());
942 // Simplify the control flow graph (deleting unreachable blocks, etc).
943 OurFPM.add(createCFGSimplificationPass());
944
945 OurFPM.doInitialization();
946
947 // Set the global so the code gen can use this.
948 TheFPM = &OurFPM;
938 TheJIT = llvm::make_unique();
939
940 InitializeModuleAndPassManager();
949941
950942 // Run the main "interpreter loop" now.
951943 MainLoop();
952944
953 TheFPM = 0;
954
955 // Print out all of the generated code.
956 TheModule->dump();
957
958945 return 0;
959946 }
0 #include "llvm/ADT/STLExtras.h"
11 #include "llvm/Analysis/BasicAliasAnalysis.h"
22 #include "llvm/Analysis/Passes.h"
3 #include "llvm/ExecutionEngine/ExecutionEngine.h"
4 #include "llvm/ExecutionEngine/MCJIT.h"
5 #include "llvm/ExecutionEngine/SectionMemoryManager.h"
6 #include "llvm/IR/DataLayout.h"
7 #include "llvm/IR/DerivedTypes.h"
83 #include "llvm/IR/IRBuilder.h"
94 #include "llvm/IR/LLVMContext.h"
105 #include "llvm/IR/LegacyPassManager.h"
1712 #include
1813 #include
1914 #include
15 #include "../include/KaleidoscopeJIT.h"
16
2017 using namespace llvm;
18 using namespace llvm::orc;
2119
2220 //===----------------------------------------------------------------------===//
2321 // Lexer
124122 class ExprAST {
125123 public:
126124 virtual ~ExprAST() {}
127 virtual Value *Codegen() = 0;
125 virtual Value *codegen() = 0;
128126 };
129127
130128 /// NumberExprAST - Expression class for numeric literals like "1.0".
133131
134132 public:
135133 NumberExprAST(double Val) : Val(Val) {}
136 Value *Codegen() override;
134 Value *codegen() override;
137135 };
138136
139137 /// VariableExprAST - Expression class for referencing a variable, like "a".
142140
143141 public:
144142 VariableExprAST(const std::string &Name) : Name(Name) {}
145 Value *Codegen() override;
143 Value *codegen() override;
146144 };
147145
148146 /// UnaryExprAST - Expression class for a unary operator.
153151 public:
154152 UnaryExprAST(char Opcode, std::unique_ptr Operand)
155153 : Opcode(Opcode), Operand(std::move(Operand)) {}
156 Value *Codegen() override;
154 Value *codegen() override;
157155 };
158156
159157 /// BinaryExprAST - Expression class for a binary operator.
165163 BinaryExprAST(char Op, std::unique_ptr LHS,
166164 std::unique_ptr RHS)
167165 : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {}
168 Value *Codegen() override;
166 Value *codegen() override;
169167 };
170168
171169 /// CallExprAST - Expression class for function calls.
177175 CallExprAST(const std::string &Callee,
178176 std::vector> Args)
179177 : Callee(Callee), Args(std::move(Args)) {}
180 Value *Codegen() override;
178 Value *codegen() override;
181179 };
182180
183181 /// IfExprAST - Expression class for if/then/else.
188186 IfExprAST(std::unique_ptr Cond, std::unique_ptr Then,
189187 std::unique_ptr Else)
190188 : Cond(std::move(Cond)), Then(std::move(Then)), Else(std::move(Else)) {}
191 Value *Codegen() override;
189 Value *codegen() override;
192190 };
193191
194192 /// ForExprAST - Expression class for for/in.
202200 std::unique_ptr Body)
203201 : VarName(VarName), Start(std::move(Start)), End(std::move(End)),
204202 Step(std::move(Step)), Body(std::move(Body)) {}
205 Value *Codegen() override;
203 Value *codegen() override;
206204 };
207205
208206 /// PrototypeAST - This class represents the "prototype" for a function,
219217 bool IsOperator = false, unsigned Prec = 0)
220218 : Name(Name), Args(std::move(Args)), IsOperator(IsOperator),
221219 Precedence(Prec) {}
220 Function *codegen();
221 const std::string &getName() const { return Name; }
222222
223223 bool isUnaryOp() const { return IsOperator && Args.size() == 1; }
224224 bool isBinaryOp() const { return IsOperator && Args.size() == 2; }
229229 }
230230
231231 unsigned getBinaryPrecedence() const { return Precedence; }
232
233 Function *Codegen();
234232 };
235233
236234 /// FunctionAST - This class represents a function definition itself.
242240 FunctionAST(std::unique_ptr Proto,
243241 std::unique_ptr Body)
244242 : Proto(std::move(Proto)), Body(std::move(Body)) {}
245 Function *Codegen();
243 Function *codegen();
246244 };
247245 } // end anonymous namespace
248246
278276 return nullptr;
279277 }
280278 std::unique_ptr ErrorP(const char *Str) {
281 Error(Str);
282 return nullptr;
283 }
284 std::unique_ptr ErrorF(const char *Str) {
285279 Error(Str);
286280 return nullptr;
287281 }
589583 static std::unique_ptr ParseTopLevelExpr() {
590584 if (auto E = ParseExpression()) {
591585 // Make an anonymous proto.
592 auto Proto =
593 llvm::make_unique("", std::vector());
586 auto Proto = llvm::make_unique("__anon_expr",
587 std::vector());
594588 return llvm::make_unique(std::move(Proto), std::move(E));
595589 }
596590 return nullptr;
606600 // Code Generation
607601 //===----------------------------------------------------------------------===//
608602
609 static Module *TheModule;
603 static std::unique_ptr TheModule;
610604 static IRBuilder<> Builder(getGlobalContext());
611605 static std::map NamedValues;
612 static legacy::FunctionPassManager *TheFPM;
606 static std::unique_ptr TheFPM;
607 static std::unique_ptr TheJIT;
608 static std::map> FunctionProtos;
613609
614610 Value *ErrorV(const char *Str) {
615611 Error(Str);
616612 return nullptr;
617613 }
618614
619 Value *NumberExprAST::Codegen() {
615 Function *getFunction(std::string Name) {
616 // First, see if the function has already been added to the current module.
617 if (auto *F = TheModule->getFunction(Name))
618 return F;
619
620 // If not, check whether we can codegen the declaration from some existing
621 // prototype.
622 auto FI = FunctionProtos.find(Name);
623 if (FI != FunctionProtos.end())
624 return FI->second->codegen();
625
626 // If no existing prototype exists, return null.
627 return nullptr;
628 }
629
630 Value *NumberExprAST::codegen() {
620631 return ConstantFP::get(getGlobalContext(), APFloat(Val));
621632 }
622633
623 Value *VariableExprAST::Codegen() {
634 Value *VariableExprAST::codegen() {
624635 // Look this variable up in the function.
625636 Value *V = NamedValues[Name];
626637 if (!V)
628639 return V;
629640 }
630641
631 Value *UnaryExprAST::Codegen() {
632 Value *OperandV = Operand->Codegen();
642 Value *UnaryExprAST::codegen() {
643 Value *OperandV = Operand->codegen();
633644 if (!OperandV)
634645 return nullptr;
635646
636 Function *F = TheModule->getFunction(std::string("unary") + Opcode);
647 Function *F = getFunction(std::string("unary") + Opcode);
637648 if (!F)
638649 return ErrorV("Unknown unary operator");
639650
640651 return Builder.CreateCall(F, OperandV, "unop");
641652 }
642653
643 Value *BinaryExprAST::Codegen() {
644 Value *L = LHS->Codegen();
645 Value *R = RHS->Codegen();
654 Value *BinaryExprAST::codegen() {
655 Value *L = LHS->codegen();
656 Value *R = RHS->codegen();
646657 if (!L || !R)
647658 return nullptr;
648659
664675
665676 // If it wasn't a builtin binary operator, it must be a user defined one. Emit
666677 // a call to it.
667 Function *F = TheModule->getFunction(std::string("binary") + Op);
678 Function *F = getFunction(std::string("binary") + Op);
668679 assert(F && "binary operator not found!");
669680
670681 Value *Ops[] = {L, R};
671682 return Builder.CreateCall(F, Ops, "binop");
672683 }
673684
674 Value *CallExprAST::Codegen() {
685 Value *CallExprAST::codegen() {
675686 // Look up the name in the global module table.
676 Function *CalleeF = TheModule->getFunction(Callee);
687 Function *CalleeF = getFunction(Callee);
677688 if (!CalleeF)
678689 return ErrorV("Unknown function referenced");
679690
683694
684695 std::vector ArgsV;
685696 for (unsigned i = 0, e = Args.size(); i != e; ++i) {
686 ArgsV.push_back(Args[i]->Codegen());
697 ArgsV.push_back(Args[i]->codegen());
687698 if (!ArgsV.back())
688699 return nullptr;
689700 }
691702 return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
692703 }
693704
694 Value *IfExprAST::Codegen() {
695 Value *CondV = Cond->Codegen();
705 Value *IfExprAST::codegen() {
706 Value *CondV = Cond->codegen();
696707 if (!CondV)
697708 return nullptr;
698709
714725 // Emit then value.
715726 Builder.SetInsertPoint(ThenBB);
716727
717 Value *ThenV = Then->Codegen();
728 Value *ThenV = Then->codegen();
718729 if (!ThenV)
719730 return nullptr;
720731
726737 TheFunction->getBasicBlockList().push_back(ElseBB);
727738 Builder.SetInsertPoint(ElseBB);
728739
729 Value *ElseV = Else->Codegen();
740 Value *ElseV = Else->codegen();
730741 if (!ElseV)
731742 return nullptr;
732743
760771 // endcond = endexpr
761772 // br endcond, loop, endloop
762773 // outloop:
763 Value *ForExprAST::Codegen() {
774 Value *ForExprAST::codegen() {
764775 // Emit the start code first, without 'variable' in scope.
765 Value *StartVal = Start->Codegen();
776 Value *StartVal = Start->codegen();
766777 if (!StartVal)
767778 return nullptr;
768779
792803 // Emit the body of the loop. This, like any other expr, can change the
793804 // current BB. Note that we ignore the value computed by the body, but don't
794805 // allow an error.
795 if (!Body->Codegen())
806 if (!Body->codegen())
796807 return nullptr;
797808
798809 // Emit the step value.
799810 Value *StepVal = nullptr;
800811 if (Step) {
801 StepVal = Step->Codegen();
812 StepVal = Step->codegen();
802813 if (!StepVal)
803814 return nullptr;
804815 } else {
809820 Value *NextVar = Builder.CreateFAdd(Variable, StepVal, "nextvar");
810821
811822 // Compute the end condition.
812 Value *EndCond = End->Codegen();
823 Value *EndCond = End->codegen();
813824 if (!EndCond)
814825 return nullptr;
815826
841852 return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
842853 }
843854
844 Function *PrototypeAST::Codegen() {
855 Function *PrototypeAST::codegen() {
845856 // Make the function type: double(double,double) etc.
846857 std::vector Doubles(Args.size(),
847858 Type::getDoubleTy(getGlobalContext()));
849860 FunctionType::get(Type::getDoubleTy(getGlobalContext()), Doubles, false);
850861
851862 Function *F =
852 Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
853
854 // If F conflicted, there was already something named 'Name'. If it has a
855 // body, don't allow redefinition or reextern.
856 if (F->getName() != Name) {
857 // Delete the one we just made and get the existing one.
858 F->eraseFromParent();
859 F = TheModule->getFunction(Name);
860
861 // If F already has a body, reject this.
862 if (!F->empty()) {
863 ErrorF("redefinition of function");
864 return nullptr;
865 }
866
867 // If F took a different number of args, reject.
868 if (F->arg_size() != Args.size()) {
869 ErrorF("redefinition of function with different # args");
870 return nullptr;
871 }
872 }
863 Function::Create(FT, Function::ExternalLinkage, Name, TheModule.get());
873864
874865 // Set names for all arguments.
875866 unsigned Idx = 0;
876 for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size();
877 ++AI, ++Idx) {
878 AI->setName(Args[Idx]);
879
880 // Add arguments to variable symbol table.
881 NamedValues[Args[Idx]] = AI;
882 }
867 for (auto &Arg : F->args())
868 Arg.setName(Args[Idx++]);
883869
884870 return F;
885871 }
886872
887 Function *FunctionAST::Codegen() {
888 NamedValues.clear();
889
890 Function *TheFunction = Proto->Codegen();
873 Function *FunctionAST::codegen() {
874 // Transfer ownership of the prototype to the FunctionProtos map, but keep a
875 // reference to it for use below.
876 auto &P = *Proto;
877 FunctionProtos[Proto->getName()] = std::move(Proto);
878 Function *TheFunction = getFunction(P.getName());
891879 if (!TheFunction)
892880 return nullptr;
893881
894882 // If this is an operator, install it.
895 if (Proto->isBinaryOp())
896 BinopPrecedence[Proto->getOperatorName()] = Proto->getBinaryPrecedence();
883 if (P.isBinaryOp())
884 BinopPrecedence[P.getOperatorName()] = P.getBinaryPrecedence();
897885
898886 // Create a new basic block to start insertion into.
899887 BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
900888 Builder.SetInsertPoint(BB);
901889
902 if (Value *RetVal = Body->Codegen()) {
890 // Record the function arguments in the NamedValues map.
891 NamedValues.clear();
892 for (auto &Arg : TheFunction->args())
893 NamedValues[Arg.getName()] = &Arg;
894
895 if (Value *RetVal = Body->codegen()) {
903896 // Finish off the function.
904897 Builder.CreateRet(RetVal);
905898
906899 // Validate the generated code, checking for consistency.
907900 verifyFunction(*TheFunction);
908901
909 // Optimize the function.
902 // Run the optimizer on the function.
910903 TheFPM->run(*TheFunction);
911904
912905 return TheFunction;
915908 // Error reading body, remove function.
916909 TheFunction->eraseFromParent();
917910
918 if (Proto->isBinaryOp())
911 if (P.isBinaryOp())
919912 BinopPrecedence.erase(Proto->getOperatorName());
920913 return nullptr;
921914 }
924917 // Top-Level parsing and JIT Driver
925918 //===----------------------------------------------------------------------===//
926919
927 static ExecutionEngine *TheExecutionEngine;
920 static void InitializeModuleAndPassManager() {
921 // Open a new module.
922 TheModule = llvm::make_unique("my cool jit", getGlobalContext());
923 TheModule->setDataLayout(TheJIT->getTargetMachine().createDataLayout());
924
925 // Create a new pass manager attached to it.
926 TheFPM = llvm::make_unique(TheModule.get());
927
928 // Provide basic AliasAnalysis support for GVN.
929 TheFPM->add(createBasicAliasAnalysisPass());
930 // Do simple "peephole" optimizations and bit-twiddling optzns.
931 TheFPM->add(createInstructionCombiningPass());
932 // Reassociate expressions.
933 TheFPM->add(createReassociatePass());
934 // Eliminate Common SubExpressions.
935 TheFPM->add(createGVNPass());
936 // Simplify the control flow graph (deleting unreachable blocks, etc).
937 TheFPM->add(createCFGSimplificationPass());
938
939 TheFPM->doInitialization();
940 }
928941
929942 static void HandleDefinition() {
930943 if (auto FnAST = ParseDefinition()) {
931 if (auto *FnIR = FnAST->Codegen()) {
944 if (auto *FnIR = FnAST->codegen()) {
932945 fprintf(stderr, "Read function definition:");
933946 FnIR->dump();
947 TheJIT->addModule(std::move(TheModule));
948 InitializeModuleAndPassManager();
934949 }
935950 } else {
936951 // Skip token for error recovery.
940955
941956 static void HandleExtern() {
942957 if (auto ProtoAST = ParseExtern()) {
943 if (auto *FnIR = ProtoAST->Codegen()) {
958 if (auto *FnIR = ProtoAST->codegen()) {
944959 fprintf(stderr, "Read extern: ");
945960 FnIR->dump();
961 FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST);
946962 }
947963 } else {
948964 // Skip token for error recovery.
953969 static void HandleTopLevelExpression() {
954970 // Evaluate a top-level expression into an anonymous function.
955971 if (auto FnAST = ParseTopLevelExpr()) {
956 if (auto *FnIR = FnAST->Codegen()) {
957 TheExecutionEngine->finalizeObject();
958 // JIT the function, returning a function pointer.
959 void *FPtr = TheExecutionEngine->getPointerToFunction(FnIR);
960
961 // Cast it to the right type (takes no arguments, returns a double) so we
962 // can call it as a native function.
963 double (*FP)() = (double (*)())(intptr_t)FPtr;
972 if (FnAST->codegen()) {
973
974 // JIT the module containing the anonymous expression, keeping a handle so
975 // we can free it later.
976 auto H = TheJIT->addModule(std::move(TheModule));
977 InitializeModuleAndPassManager();
978
979 // Search the JIT for the __anon_expr symbol.
980 auto ExprSymbol = TheJIT->findSymbol("__anon_expr");
981 assert(ExprSymbol && "Function not found");
982
983 // Get the symbol's address and cast it to the right type (takes no
984 // arguments, returns a double) so we can call it as a native function.
985 double (*FP)() = (double (*)())(intptr_t)ExprSymbol.getAddress();
964986 fprintf(stderr, "Evaluated to %f\n", FP());
987
988 // Delete the anonymous expression module from the JIT.
989 TheJIT->removeModule(H);
965990 }
966991 } else {
967992 // Skip token for error recovery.
10161041 InitializeNativeTarget();
10171042 InitializeNativeTargetAsmPrinter();
10181043 InitializeNativeTargetAsmParser();
1019 LLVMContext &Context = getGlobalContext();
10201044
10211045 // Install standard binary operators.
10221046 // 1 is lowest precedence.
10291053 fprintf(stderr, "ready> ");
10301054 getNextToken();
10311055
1032 // Make the module, which holds all the code.
1033 std::unique_ptr Owner = make_unique("my cool jit", Context);
1034 TheModule = Owner.get();
1035
1036 // Create the JIT. This takes ownership of the module.
1037 std::string ErrStr;
1038 TheExecutionEngine =
1039 EngineBuilder(std::move(Owner))
1040 .setErrorStr(&ErrStr)
1041 .setMCJITMemoryManager(llvm::make_unique())
1042 .create();
1043 if (!TheExecutionEngine) {
1044 fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str());
1045 exit(1);
1046 }
1047
1048 legacy::FunctionPassManager OurFPM(TheModule);
1049
1050 // Set up the optimizer pipeline. Start with registering info about how the
1051 // target lays out data structures.
1052 TheModule->setDataLayout(TheExecutionEngine->getDataLayout());
1053 // Provide basic AliasAnalysis support for GVN.
1054 OurFPM.add(createBasicAliasAnalysisPass());
1055 // Do simple "peephole" optimizations and bit-twiddling optzns.
1056 OurFPM.add(createInstructionCombiningPass());
1057 // Reassociate expressions.
1058 OurFPM.add(createReassociatePass());
1059 // Eliminate Common SubExpressions.
1060 OurFPM.add(createGVNPass());
1061 // Simplify the control flow graph (deleting unreachable blocks, etc).
1062 OurFPM.add(createCFGSimplificationPass());
1063
1064 OurFPM.doInitialization();
1065
1066 // Set the global so the code gen can use this.
1067 TheFPM = &OurFPM;
1056 TheJIT = llvm::make_unique();
1057
1058 InitializeModuleAndPassManager();
10681059
10691060 // Run the main "interpreter loop" now.
10701061 MainLoop();
10711062
1072 TheFPM = 0;
1073
1074 // Print out all of the generated code.
1075 TheModule->dump();
1076
10771063 return 0;
10781064 }
0 #include "llvm/ADT/STLExtras.h"
11 #include "llvm/Analysis/BasicAliasAnalysis.h"
22 #include "llvm/Analysis/Passes.h"
3 #include "llvm/ExecutionEngine/ExecutionEngine.h"
4 #include "llvm/ExecutionEngine/MCJIT.h"
5 #include "llvm/ExecutionEngine/SectionMemoryManager.h"
6 #include "llvm/IR/DataLayout.h"
7 #include "llvm/IR/DerivedTypes.h"
83 #include "llvm/IR/IRBuilder.h"
94 #include "llvm/IR/LLVMContext.h"
105 #include "llvm/IR/LegacyPassManager.h"
1712 #include
1813 #include
1914 #include
15 #include "../include/KaleidoscopeJIT.h"
16
2017 using namespace llvm;
18 using namespace llvm::orc;
2119
2220 //===----------------------------------------------------------------------===//
2321 // Lexer
129127 class ExprAST {
130128 public:
131129 virtual ~ExprAST() {}
132 virtual Value *Codegen() = 0;
130 virtual Value *codegen() = 0;
133131 };
134132
135133 /// NumberExprAST - Expression class for numeric literals like "1.0".
138136
139137 public:
140138 NumberExprAST(double Val) : Val(Val) {}
141 Value *Codegen() override;
139 Value *codegen() override;
142140 };
143141
144142 /// VariableExprAST - Expression class for referencing a variable, like "a".
148146 public:
149147 VariableExprAST(const std::string &Name) : Name(Name) {}
150148 const std::string &getName() const { return Name; }
151 Value *Codegen() override;
149 Value *codegen() override;
152150 };
153151
154152 /// UnaryExprAST - Expression class for a unary operator.
159157 public:
160158 UnaryExprAST(char Opcode, std::unique_ptr Operand)
161159 : Opcode(Opcode), Operand(std::move(Operand)) {}
162 Value *Codegen() override;
160 Value *codegen() override;
163161 };
164162
165163 /// BinaryExprAST - Expression class for a binary operator.
171169 BinaryExprAST(char Op, std::unique_ptr LHS,
172170 std::unique_ptr RHS)
173171 : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {}
174 Value *Codegen() override;
172 Value *codegen() override;
175173 };
176174
177175 /// CallExprAST - Expression class for function calls.
183181 CallExprAST(const std::string &Callee,
184182 std::vector> Args)
185183 : Callee(Callee), Args(std::move(Args)) {}
186 Value *Codegen() override;
184 Value *codegen() override;
187185 };
188186
189187 /// IfExprAST - Expression class for if/then/else.
194192 IfExprAST(std::unique_ptr Cond, std::unique_ptr Then,
195193 std::unique_ptr Else)
196194 : Cond(std::move(Cond)), Then(std::move(Then)), Else(std::move(Else)) {}
197 Value *Codegen() override;
195 Value *codegen() override;
198196 };
199197
200198 /// ForExprAST - Expression class for for/in.
208206 std::unique_ptr Body)
209207 : VarName(VarName), Start(std::move(Start)), End(std::move(End)),
210208 Step(std::move(Step)), Body(std::move(Body)) {}
211 Value *Codegen() override;
209 Value *codegen() override;
212210 };
213211
214212 /// VarExprAST - Expression class for var/in
221219 std::vector>> VarNames,
222220 std::unique_ptr Body)
223221 : VarNames(std::move(VarNames)), Body(std::move(Body)) {}
224 Value *Codegen() override;
222 Value *codegen() override;
225223 };
226224
227225 /// PrototypeAST - This class represents the "prototype" for a function,
238236 bool IsOperator = false, unsigned Prec = 0)
239237 : Name(Name), Args(std::move(Args)), IsOperator(IsOperator),
240238 Precedence(Prec) {}
239 Function *codegen();
240 const std::string &getName() const { return Name; }
241241
242242 bool isUnaryOp() const { return IsOperator && Args.size() == 1; }
243243 bool isBinaryOp() const { return IsOperator && Args.size() == 2; }
248248 }
249249
250250 unsigned getBinaryPrecedence() const { return Precedence; }
251
252 Function *Codegen();
253
254 void CreateArgumentAllocas(Function *F);
255251 };
256252
257253 /// FunctionAST - This class represents a function definition itself.
263259 FunctionAST(std::unique_ptr Proto,
264260 std::unique_ptr Body)
265261 : Proto(std::move(Proto)), Body(std::move(Body)) {}
266 Function *Codegen();
262 Function *codegen();
267263 };
268264 } // end anonymous namespace
269265
299295 return nullptr;
300296 }
301297 std::unique_ptr ErrorP(const char *Str) {
302 Error(Str);
303 return nullptr;
304 }
305 std::unique_ptr ErrorF(const char *Str) {
306298 Error(Str);
307299 return nullptr;
308300 }
661653 static std::unique_ptr ParseTopLevelExpr() {
662654 if (auto E = ParseExpression()) {
663655 // Make an anonymous proto.
664 auto Proto =
665 llvm::make_unique("", std::vector());
656 auto Proto = llvm::make_unique("__anon_expr",
657 std::vector());
666658 return llvm::make_unique(std::move(Proto), std::move(E));
667659 }
668660 return nullptr;
678670 // Code Generation
679671 //===----------------------------------------------------------------------===//
680672
681 static Module *TheModule;
673 static std::unique_ptr TheModule;
682674 static IRBuilder<> Builder(getGlobalContext());
683675 static std::map NamedValues;
684 static legacy::FunctionPassManager *TheFPM;
676 static std::unique_ptr TheFPM;
677 static std::unique_ptr TheJIT;
678 static std::map> FunctionProtos;
685679
686680 Value *ErrorV(const char *Str) {
687681 Error(Str);
682 return nullptr;
683 }
684
685 Function *getFunction(std::string Name) {
686 // First, see if the function has already been added to the current module.
687 if (auto *F = TheModule->getFunction(Name))
688 return F;
689
690 // If not, check whether we can codegen the declaration from some existing
691 // prototype.
692 auto FI = FunctionProtos.find(Name);
693 if (FI != FunctionProtos.end())
694 return FI->second->codegen();
695
696 // If no existing prototype exists, return null.
688697 return nullptr;
689698 }
690699
698707 VarName.c_str());
699708 }
700709
701 Value *NumberExprAST::Codegen() {
710 Value *NumberExprAST::codegen() {
702711 return ConstantFP::get(getGlobalContext(), APFloat(Val));
703712 }
704713
705 Value *VariableExprAST::Codegen() {
714 Value *VariableExprAST::codegen() {
706715 // Look this variable up in the function.
707716 Value *V = NamedValues[Name];
708717 if (!V)
712721 return Builder.CreateLoad(V, Name.c_str());
713722 }
714723
715 Value *UnaryExprAST::Codegen() {
716 Value *OperandV = Operand->Codegen();
724 Value *UnaryExprAST::codegen() {
725 Value *OperandV = Operand->codegen();
717726 if (!OperandV)
718727 return nullptr;
719728
720 Function *F = TheModule->getFunction(std::string("unary") + Opcode);
729 Function *F = getFunction(std::string("unary") + Opcode);
721730 if (!F)
722731 return ErrorV("Unknown unary operator");
723732
724733 return Builder.CreateCall(F, OperandV, "unop");
725734 }
726735
727 Value *BinaryExprAST::Codegen() {
736 Value *BinaryExprAST::codegen() {
728737 // Special case '=' because we don't want to emit the LHS as an expression.
729738 if (Op == '=') {
730739 // Assignment requires the LHS to be an identifier.
735744 if (!LHSE)
736745 return ErrorV("destination of '=' must be a variable");
737746 // Codegen the RHS.
738 Value *Val = RHS->Codegen();
747 Value *Val = RHS->codegen();
739748 if (!Val)
740749 return nullptr;
741750
748757 return Val;
749758 }
750759
751 Value *L = LHS->Codegen();
752 Value *R = RHS->Codegen();
760 Value *L = LHS->codegen();
761 Value *R = RHS->codegen();
753762 if (!L || !R)
754763 return nullptr;
755764
771780
772781 // If it wasn't a builtin binary operator, it must be a user defined one. Emit
773782 // a call to it.
774 Function *F = TheModule->getFunction(std::string("binary") + Op);
783 Function *F = getFunction(std::string("binary") + Op);
775784 assert(F && "binary operator not found!");
776785
777786 Value *Ops[] = {L, R};
778787 return Builder.CreateCall(F, Ops, "binop");
779788 }
780789
781 Value *CallExprAST::Codegen() {
790 Value *CallExprAST::codegen() {
782791 // Look up the name in the global module table.
783 Function *CalleeF = TheModule->getFunction(Callee);
792 Function *CalleeF = getFunction(Callee);
784793 if (!CalleeF)
785794 return ErrorV("Unknown function referenced");
786795
790799
791800 std::vector ArgsV;
792801 for (unsigned i = 0, e = Args.size(); i != e; ++i) {
793 ArgsV.push_back(Args[i]->Codegen());
802 ArgsV.push_back(Args[i]->codegen());
794803 if (!ArgsV.back())
795804 return nullptr;
796805 }
798807 return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
799808 }
800809
801 Value *IfExprAST::Codegen() {
802 Value *CondV = Cond->Codegen();
810 Value *IfExprAST::codegen() {
811 Value *CondV = Cond->codegen();
803812 if (!CondV)
804813 return nullptr;
805814
821830 // Emit then value.
822831 Builder.SetInsertPoint(ThenBB);
823832
824 Value *ThenV = Then->Codegen();
833 Value *ThenV = Then->codegen();
825834 if (!ThenV)
826835 return nullptr;
827836
833842 TheFunction->getBasicBlockList().push_back(ElseBB);
834843 Builder.SetInsertPoint(ElseBB);
835844
836 Value *ElseV = Else->Codegen();
845 Value *ElseV = Else->codegen();
837846 if (!ElseV)
838847 return nullptr;
839848
871880 // store nextvar -> var
872881 // br endcond, loop, endloop
873882 // outloop:
874 Value *ForExprAST::Codegen() {
883 Value *ForExprAST::codegen() {
875884 Function *TheFunction = Builder.GetInsertBlock()->getParent();
876885
877886 // Create an alloca for the variable in the entry block.
878887 AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
879888
880889 // Emit the start code first, without 'variable' in scope.
881 Value *StartVal = Start->Codegen();
890 Value *StartVal = Start->codegen();
882891 if (!StartVal)
883892 return nullptr;
884893
904913 // Emit the body of the loop. This, like any other expr, can change the
905914 // current BB. Note that we ignore the value computed by the body, but don't
906915 // allow an error.
907 if (!Body->Codegen())
916 if (!Body->codegen())
908917 return nullptr;
909918
910919 // Emit the step value.
911920 Value *StepVal = nullptr;
912921 if (Step) {
913 StepVal = Step->Codegen();
922 StepVal = Step->codegen();
914923 if (!StepVal)
915924 return nullptr;
916925 } else {
919928 }
920929
921930 // Compute the end condition.
922 Value *EndCond = End->Codegen();
931 Value *EndCond = End->codegen();
923932 if (!EndCond)
924933 return nullptr;
925934
953962 return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
954963 }
955964
956 Value *VarExprAST::Codegen() {
965 Value *VarExprAST::codegen() {
957966