Home | History | Annotate | Download | only in Chapter7
      1 #include "llvm/ADT/STLExtras.h"
      2 #include "llvm/Analysis/Passes.h"
      3 #include "llvm/IR/IRBuilder.h"
      4 #include "llvm/IR/LLVMContext.h"
      5 #include "llvm/IR/LegacyPassManager.h"
      6 #include "llvm/IR/Module.h"
      7 #include "llvm/IR/Verifier.h"
      8 #include "llvm/Support/TargetSelect.h"
      9 #include "llvm/Transforms/Scalar.h"
     10 #include <cctype>
     11 #include <cstdio>
     12 #include <map>
     13 #include <string>
     14 #include <vector>
     15 #include "../include/KaleidoscopeJIT.h"
     16 
     17 using namespace llvm;
     18 using namespace llvm::orc;
     19 
     20 //===----------------------------------------------------------------------===//
     21 // Lexer
     22 //===----------------------------------------------------------------------===//
     23 
     24 // The lexer returns tokens [0-255] if it is an unknown character, otherwise one
     25 // of these for known things.
     26 enum Token {
     27   tok_eof = -1,
     28 
     29   // commands
     30   tok_def = -2,
     31   tok_extern = -3,
     32 
     33   // primary
     34   tok_identifier = -4,
     35   tok_number = -5,
     36 
     37   // control
     38   tok_if = -6,
     39   tok_then = -7,
     40   tok_else = -8,
     41   tok_for = -9,
     42   tok_in = -10,
     43 
     44   // operators
     45   tok_binary = -11,
     46   tok_unary = -12,
     47 
     48   // var definition
     49   tok_var = -13
     50 };
     51 
     52 static std::string IdentifierStr; // Filled in if tok_identifier
     53 static double NumVal;             // Filled in if tok_number
     54 
     55 /// gettok - Return the next token from standard input.
     56 static int gettok() {
     57   static int LastChar = ' ';
     58 
     59   // Skip any whitespace.
     60   while (isspace(LastChar))
     61     LastChar = getchar();
     62 
     63   if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
     64     IdentifierStr = LastChar;
     65     while (isalnum((LastChar = getchar())))
     66       IdentifierStr += LastChar;
     67 
     68     if (IdentifierStr == "def")
     69       return tok_def;
     70     if (IdentifierStr == "extern")
     71       return tok_extern;
     72     if (IdentifierStr == "if")
     73       return tok_if;
     74     if (IdentifierStr == "then")
     75       return tok_then;
     76     if (IdentifierStr == "else")
     77       return tok_else;
     78     if (IdentifierStr == "for")
     79       return tok_for;
     80     if (IdentifierStr == "in")
     81       return tok_in;
     82     if (IdentifierStr == "binary")
     83       return tok_binary;
     84     if (IdentifierStr == "unary")
     85       return tok_unary;
     86     if (IdentifierStr == "var")
     87       return tok_var;
     88     return tok_identifier;
     89   }
     90 
     91   if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+
     92     std::string NumStr;
     93     do {
     94       NumStr += LastChar;
     95       LastChar = getchar();
     96     } while (isdigit(LastChar) || LastChar == '.');
     97 
     98     NumVal = strtod(NumStr.c_str(), nullptr);
     99     return tok_number;
    100   }
    101 
    102   if (LastChar == '#') {
    103     // Comment until end of line.
    104     do
    105       LastChar = getchar();
    106     while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
    107 
    108     if (LastChar != EOF)
    109       return gettok();
    110   }
    111 
    112   // Check for end of file.  Don't eat the EOF.
    113   if (LastChar == EOF)
    114     return tok_eof;
    115 
    116   // Otherwise, just return the character as its ascii value.
    117   int ThisChar = LastChar;
    118   LastChar = getchar();
    119   return ThisChar;
    120 }
    121 
    122 //===----------------------------------------------------------------------===//
    123 // Abstract Syntax Tree (aka Parse Tree)
    124 //===----------------------------------------------------------------------===//
    125 namespace {
    126 /// ExprAST - Base class for all expression nodes.
    127 class ExprAST {
    128 public:
    129   virtual ~ExprAST() {}
    130   virtual Value *codegen() = 0;
    131 };
    132 
    133 /// NumberExprAST - Expression class for numeric literals like "1.0".
    134 class NumberExprAST : public ExprAST {
    135   double Val;
    136 
    137 public:
    138   NumberExprAST(double Val) : Val(Val) {}
    139   Value *codegen() override;
    140 };
    141 
    142 /// VariableExprAST - Expression class for referencing a variable, like "a".
    143 class VariableExprAST : public ExprAST {
    144   std::string Name;
    145 
    146 public:
    147   VariableExprAST(const std::string &Name) : Name(Name) {}
    148   const std::string &getName() const { return Name; }
    149   Value *codegen() override;
    150 };
    151 
    152 /// UnaryExprAST - Expression class for a unary operator.
    153 class UnaryExprAST : public ExprAST {
    154   char Opcode;
    155   std::unique_ptr<ExprAST> Operand;
    156 
    157 public:
    158   UnaryExprAST(char Opcode, std::unique_ptr<ExprAST> Operand)
    159       : Opcode(Opcode), Operand(std::move(Operand)) {}
    160   Value *codegen() override;
    161 };
    162 
    163 /// BinaryExprAST - Expression class for a binary operator.
    164 class BinaryExprAST : public ExprAST {
    165   char Op;
    166   std::unique_ptr<ExprAST> LHS, RHS;
    167 
    168 public:
    169   BinaryExprAST(char Op, std::unique_ptr<ExprAST> LHS,
    170                 std::unique_ptr<ExprAST> RHS)
    171       : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {}
    172   Value *codegen() override;
    173 };
    174 
    175 /// CallExprAST - Expression class for function calls.
    176 class CallExprAST : public ExprAST {
    177   std::string Callee;
    178   std::vector<std::unique_ptr<ExprAST>> Args;
    179 
    180 public:
    181   CallExprAST(const std::string &Callee,
    182               std::vector<std::unique_ptr<ExprAST>> Args)
    183       : Callee(Callee), Args(std::move(Args)) {}
    184   Value *codegen() override;
    185 };
    186 
    187 /// IfExprAST - Expression class for if/then/else.
    188 class IfExprAST : public ExprAST {
    189   std::unique_ptr<ExprAST> Cond, Then, Else;
    190 
    191 public:
    192   IfExprAST(std::unique_ptr<ExprAST> Cond, std::unique_ptr<ExprAST> Then,
    193             std::unique_ptr<ExprAST> Else)
    194       : Cond(std::move(Cond)), Then(std::move(Then)), Else(std::move(Else)) {}
    195   Value *codegen() override;
    196 };
    197 
    198 /// ForExprAST - Expression class for for/in.
    199 class ForExprAST : public ExprAST {
    200   std::string VarName;
    201   std::unique_ptr<ExprAST> Start, End, Step, Body;
    202 
    203 public:
    204   ForExprAST(const std::string &VarName, std::unique_ptr<ExprAST> Start,
    205              std::unique_ptr<ExprAST> End, std::unique_ptr<ExprAST> Step,
    206              std::unique_ptr<ExprAST> Body)
    207       : VarName(VarName), Start(std::move(Start)), End(std::move(End)),
    208         Step(std::move(Step)), Body(std::move(Body)) {}
    209   Value *codegen() override;
    210 };
    211 
    212 /// VarExprAST - Expression class for var/in
    213 class VarExprAST : public ExprAST {
    214   std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames;
    215   std::unique_ptr<ExprAST> Body;
    216 
    217 public:
    218   VarExprAST(
    219       std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames,
    220       std::unique_ptr<ExprAST> Body)
    221       : VarNames(std::move(VarNames)), Body(std::move(Body)) {}
    222   Value *codegen() override;
    223 };
    224 
    225 /// PrototypeAST - This class represents the "prototype" for a function,
    226 /// which captures its name, and its argument names (thus implicitly the number
    227 /// of arguments the function takes), as well as if it is an operator.
    228 class PrototypeAST {
    229   std::string Name;
    230   std::vector<std::string> Args;
    231   bool IsOperator;
    232   unsigned Precedence; // Precedence if a binary op.
    233 
    234 public:
    235   PrototypeAST(const std::string &Name, std::vector<std::string> Args,
    236                bool IsOperator = false, unsigned Prec = 0)
    237       : Name(Name), Args(std::move(Args)), IsOperator(IsOperator),
    238         Precedence(Prec) {}
    239   Function *codegen();
    240   const std::string &getName() const { return Name; }
    241 
    242   bool isUnaryOp() const { return IsOperator && Args.size() == 1; }
    243   bool isBinaryOp() const { return IsOperator && Args.size() == 2; }
    244 
    245   char getOperatorName() const {
    246     assert(isUnaryOp() || isBinaryOp());
    247     return Name[Name.size() - 1];
    248   }
    249 
    250   unsigned getBinaryPrecedence() const { return Precedence; }
    251 };
    252 
    253 /// FunctionAST - This class represents a function definition itself.
    254 class FunctionAST {
    255   std::unique_ptr<PrototypeAST> Proto;
    256   std::unique_ptr<ExprAST> Body;
    257 
    258 public:
    259   FunctionAST(std::unique_ptr<PrototypeAST> Proto,
    260               std::unique_ptr<ExprAST> Body)
    261       : Proto(std::move(Proto)), Body(std::move(Body)) {}
    262   Function *codegen();
    263 };
    264 } // end anonymous namespace
    265 
    266 //===----------------------------------------------------------------------===//
    267 // Parser
    268 //===----------------------------------------------------------------------===//
    269 
    270 /// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
    271 /// token the parser is looking at.  getNextToken reads another token from the
    272 /// lexer and updates CurTok with its results.
    273 static int CurTok;
    274 static int getNextToken() { return CurTok = gettok(); }
    275 
    276 /// BinopPrecedence - This holds the precedence for each binary operator that is
    277 /// defined.
    278 static std::map<char, int> BinopPrecedence;
    279 
    280 /// GetTokPrecedence - Get the precedence of the pending binary operator token.
    281 static int GetTokPrecedence() {
    282   if (!isascii(CurTok))
    283     return -1;
    284 
    285   // Make sure it's a declared binop.
    286   int TokPrec = BinopPrecedence[CurTok];
    287   if (TokPrec <= 0)
    288     return -1;
    289   return TokPrec;
    290 }
    291 
    292 /// Error* - These are little helper functions for error handling.
    293 std::unique_ptr<ExprAST> Error(const char *Str) {
    294   fprintf(stderr, "Error: %s\n", Str);
    295   return nullptr;
    296 }
    297 
    298 std::unique_ptr<PrototypeAST> ErrorP(const char *Str) {
    299   Error(Str);
    300   return nullptr;
    301 }
    302 
    303 static std::unique_ptr<ExprAST> ParseExpression();
    304 
    305 /// numberexpr ::= number
    306 static std::unique_ptr<ExprAST> ParseNumberExpr() {
    307   auto Result = llvm::make_unique<NumberExprAST>(NumVal);
    308   getNextToken(); // consume the number
    309   return std::move(Result);
    310 }
    311 
    312 /// parenexpr ::= '(' expression ')'
    313 static std::unique_ptr<ExprAST> ParseParenExpr() {
    314   getNextToken(); // eat (.
    315   auto V = ParseExpression();
    316   if (!V)
    317     return nullptr;
    318 
    319   if (CurTok != ')')
    320     return Error("expected ')'");
    321   getNextToken(); // eat ).
    322   return V;
    323 }
    324 
    325 /// identifierexpr
    326 ///   ::= identifier
    327 ///   ::= identifier '(' expression* ')'
    328 static std::unique_ptr<ExprAST> ParseIdentifierExpr() {
    329   std::string IdName = IdentifierStr;
    330 
    331   getNextToken(); // eat identifier.
    332 
    333   if (CurTok != '(') // Simple variable ref.
    334     return llvm::make_unique<VariableExprAST>(IdName);
    335 
    336   // Call.
    337   getNextToken(); // eat (
    338   std::vector<std::unique_ptr<ExprAST>> Args;
    339   if (CurTok != ')') {
    340     while (1) {
    341       if (auto Arg = ParseExpression())
    342         Args.push_back(std::move(Arg));
    343       else
    344         return nullptr;
    345 
    346       if (CurTok == ')')
    347         break;
    348 
    349       if (CurTok != ',')
    350         return Error("Expected ')' or ',' in argument list");
    351       getNextToken();
    352     }
    353   }
    354 
    355   // Eat the ')'.
    356   getNextToken();
    357 
    358   return llvm::make_unique<CallExprAST>(IdName, std::move(Args));
    359 }
    360 
    361 /// ifexpr ::= 'if' expression 'then' expression 'else' expression
    362 static std::unique_ptr<ExprAST> ParseIfExpr() {
    363   getNextToken(); // eat the if.
    364 
    365   // condition.
    366   auto Cond = ParseExpression();
    367   if (!Cond)
    368     return nullptr;
    369 
    370   if (CurTok != tok_then)
    371     return Error("expected then");
    372   getNextToken(); // eat the then
    373 
    374   auto Then = ParseExpression();
    375   if (!Then)
    376     return nullptr;
    377 
    378   if (CurTok != tok_else)
    379     return Error("expected else");
    380 
    381   getNextToken();
    382 
    383   auto Else = ParseExpression();
    384   if (!Else)
    385     return nullptr;
    386 
    387   return llvm::make_unique<IfExprAST>(std::move(Cond), std::move(Then),
    388                                       std::move(Else));
    389 }
    390 
    391 /// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
    392 static std::unique_ptr<ExprAST> ParseForExpr() {
    393   getNextToken(); // eat the for.
    394 
    395   if (CurTok != tok_identifier)
    396     return Error("expected identifier after for");
    397 
    398   std::string IdName = IdentifierStr;
    399   getNextToken(); // eat identifier.
    400 
    401   if (CurTok != '=')
    402     return Error("expected '=' after for");
    403   getNextToken(); // eat '='.
    404 
    405   auto Start = ParseExpression();
    406   if (!Start)
    407     return nullptr;
    408   if (CurTok != ',')
    409     return Error("expected ',' after for start value");
    410   getNextToken();
    411 
    412   auto End = ParseExpression();
    413   if (!End)
    414     return nullptr;
    415 
    416   // The step value is optional.
    417   std::unique_ptr<ExprAST> Step;
    418   if (CurTok == ',') {
    419     getNextToken();
    420     Step = ParseExpression();
    421     if (!Step)
    422       return nullptr;
    423   }
    424 
    425   if (CurTok != tok_in)
    426     return Error("expected 'in' after for");
    427   getNextToken(); // eat 'in'.
    428 
    429   auto Body = ParseExpression();
    430   if (!Body)
    431     return nullptr;
    432 
    433   return llvm::make_unique<ForExprAST>(IdName, std::move(Start), std::move(End),
    434                                        std::move(Step), std::move(Body));
    435 }
    436 
    437 /// varexpr ::= 'var' identifier ('=' expression)?
    438 //                    (',' identifier ('=' expression)?)* 'in' expression
    439 static std::unique_ptr<ExprAST> ParseVarExpr() {
    440   getNextToken(); // eat the var.
    441 
    442   std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames;
    443 
    444   // At least one variable name is required.
    445   if (CurTok != tok_identifier)
    446     return Error("expected identifier after var");
    447 
    448   while (1) {
    449     std::string Name = IdentifierStr;
    450     getNextToken(); // eat identifier.
    451 
    452     // Read the optional initializer.
    453     std::unique_ptr<ExprAST> Init = nullptr;
    454     if (CurTok == '=') {
    455       getNextToken(); // eat the '='.
    456 
    457       Init = ParseExpression();
    458       if (!Init)
    459         return nullptr;
    460     }
    461 
    462     VarNames.push_back(std::make_pair(Name, std::move(Init)));
    463 
    464     // End of var list, exit loop.
    465     if (CurTok != ',')
    466       break;
    467     getNextToken(); // eat the ','.
    468 
    469     if (CurTok != tok_identifier)
    470       return Error("expected identifier list after var");
    471   }
    472 
    473   // At this point, we have to have 'in'.
    474   if (CurTok != tok_in)
    475     return Error("expected 'in' keyword after 'var'");
    476   getNextToken(); // eat 'in'.
    477 
    478   auto Body = ParseExpression();
    479   if (!Body)
    480     return nullptr;
    481 
    482   return llvm::make_unique<VarExprAST>(std::move(VarNames), std::move(Body));
    483 }
    484 
    485 /// primary
    486 ///   ::= identifierexpr
    487 ///   ::= numberexpr
    488 ///   ::= parenexpr
    489 ///   ::= ifexpr
    490 ///   ::= forexpr
    491 ///   ::= varexpr
    492 static std::unique_ptr<ExprAST> ParsePrimary() {
    493   switch (CurTok) {
    494   default:
    495     return Error("unknown token when expecting an expression");
    496   case tok_identifier:
    497     return ParseIdentifierExpr();
    498   case tok_number:
    499     return ParseNumberExpr();
    500   case '(':
    501     return ParseParenExpr();
    502   case tok_if:
    503     return ParseIfExpr();
    504   case tok_for:
    505     return ParseForExpr();
    506   case tok_var:
    507     return ParseVarExpr();
    508   }
    509 }
    510 
    511 /// unary
    512 ///   ::= primary
    513 ///   ::= '!' unary
    514 static std::unique_ptr<ExprAST> ParseUnary() {
    515   // If the current token is not an operator, it must be a primary expr.
    516   if (!isascii(CurTok) || CurTok == '(' || CurTok == ',')
    517     return ParsePrimary();
    518 
    519   // If this is a unary operator, read it.
    520   int Opc = CurTok;
    521   getNextToken();
    522   if (auto Operand = ParseUnary())
    523     return llvm::make_unique<UnaryExprAST>(Opc, std::move(Operand));
    524   return nullptr;
    525 }
    526 
    527 /// binoprhs
    528 ///   ::= ('+' unary)*
    529 static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec,
    530                                               std::unique_ptr<ExprAST> LHS) {
    531   // If this is a binop, find its precedence.
    532   while (1) {
    533     int TokPrec = GetTokPrecedence();
    534 
    535     // If this is a binop that binds at least as tightly as the current binop,
    536     // consume it, otherwise we are done.
    537     if (TokPrec < ExprPrec)
    538       return LHS;
    539 
    540     // Okay, we know this is a binop.
    541     int BinOp = CurTok;
    542     getNextToken(); // eat binop
    543 
    544     // Parse the unary expression after the binary operator.
    545     auto RHS = ParseUnary();
    546     if (!RHS)
    547       return nullptr;
    548 
    549     // If BinOp binds less tightly with RHS than the operator after RHS, let
    550     // the pending operator take RHS as its LHS.
    551     int NextPrec = GetTokPrecedence();
    552     if (TokPrec < NextPrec) {
    553       RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS));
    554       if (!RHS)
    555         return nullptr;
    556     }
    557 
    558     // Merge LHS/RHS.
    559     LHS =
    560         llvm::make_unique<BinaryExprAST>(BinOp, std::move(LHS), std::move(RHS));
    561   }
    562 }
    563 
    564 /// expression
    565 ///   ::= unary binoprhs
    566 ///
    567 static std::unique_ptr<ExprAST> ParseExpression() {
    568   auto LHS = ParseUnary();
    569   if (!LHS)
    570     return nullptr;
    571 
    572   return ParseBinOpRHS(0, std::move(LHS));
    573 }
    574 
    575 /// prototype
    576 ///   ::= id '(' id* ')'
    577 ///   ::= binary LETTER number? (id, id)
    578 ///   ::= unary LETTER (id)
    579 static std::unique_ptr<PrototypeAST> ParsePrototype() {
    580   std::string FnName;
    581 
    582   unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary.
    583   unsigned BinaryPrecedence = 30;
    584 
    585   switch (CurTok) {
    586   default:
    587     return ErrorP("Expected function name in prototype");
    588   case tok_identifier:
    589     FnName = IdentifierStr;
    590     Kind = 0;
    591     getNextToken();
    592     break;
    593   case tok_unary:
    594     getNextToken();
    595     if (!isascii(CurTok))
    596       return ErrorP("Expected unary operator");
    597     FnName = "unary";
    598     FnName += (char)CurTok;
    599     Kind = 1;
    600     getNextToken();
    601     break;
    602   case tok_binary:
    603     getNextToken();
    604     if (!isascii(CurTok))
    605       return ErrorP("Expected binary operator");
    606     FnName = "binary";
    607     FnName += (char)CurTok;
    608     Kind = 2;
    609     getNextToken();
    610 
    611     // Read the precedence if present.
    612     if (CurTok == tok_number) {
    613       if (NumVal < 1 || NumVal > 100)
    614         return ErrorP("Invalid precedecnce: must be 1..100");
    615       BinaryPrecedence = (unsigned)NumVal;
    616       getNextToken();
    617     }
    618     break;
    619   }
    620 
    621   if (CurTok != '(')
    622     return ErrorP("Expected '(' in prototype");
    623 
    624   std::vector<std::string> ArgNames;
    625   while (getNextToken() == tok_identifier)
    626     ArgNames.push_back(IdentifierStr);
    627   if (CurTok != ')')
    628     return ErrorP("Expected ')' in prototype");
    629 
    630   // success.
    631   getNextToken(); // eat ')'.
    632 
    633   // Verify right number of names for operator.
    634   if (Kind && ArgNames.size() != Kind)
    635     return ErrorP("Invalid number of operands for operator");
    636 
    637   return llvm::make_unique<PrototypeAST>(FnName, ArgNames, Kind != 0,
    638                                          BinaryPrecedence);
    639 }
    640 
    641 /// definition ::= 'def' prototype expression
    642 static std::unique_ptr<FunctionAST> ParseDefinition() {
    643   getNextToken(); // eat def.
    644   auto Proto = ParsePrototype();
    645   if (!Proto)
    646     return nullptr;
    647 
    648   if (auto E = ParseExpression())
    649     return llvm::make_unique<FunctionAST>(std::move(Proto), std::move(E));
    650   return nullptr;
    651 }
    652 
    653 /// toplevelexpr ::= expression
    654 static std::unique_ptr<FunctionAST> ParseTopLevelExpr() {
    655   if (auto E = ParseExpression()) {
    656     // Make an anonymous proto.
    657     auto Proto = llvm::make_unique<PrototypeAST>("__anon_expr",
    658                                                  std::vector<std::string>());
    659     return llvm::make_unique<FunctionAST>(std::move(Proto), std::move(E));
    660   }
    661   return nullptr;
    662 }
    663 
    664 /// external ::= 'extern' prototype
    665 static std::unique_ptr<PrototypeAST> ParseExtern() {
    666   getNextToken(); // eat extern.
    667   return ParsePrototype();
    668 }
    669 
    670 //===----------------------------------------------------------------------===//
    671 // Code Generation
    672 //===----------------------------------------------------------------------===//
    673 
    674 static std::unique_ptr<Module> TheModule;
    675 static IRBuilder<> Builder(getGlobalContext());
    676 static std::map<std::string, AllocaInst *> NamedValues;
    677 static std::unique_ptr<legacy::FunctionPassManager> TheFPM;
    678 static std::unique_ptr<KaleidoscopeJIT> TheJIT;
    679 static std::map<std::string, std::unique_ptr<PrototypeAST>> FunctionProtos;
    680 
    681 Value *ErrorV(const char *Str) {
    682   Error(Str);
    683   return nullptr;
    684 }
    685 
    686 Function *getFunction(std::string Name) {
    687   // First, see if the function has already been added to the current module.
    688   if (auto *F = TheModule->getFunction(Name))
    689     return F;
    690 
    691   // If not, check whether we can codegen the declaration from some existing
    692   // prototype.
    693   auto FI = FunctionProtos.find(Name);
    694   if (FI != FunctionProtos.end())
    695     return FI->second->codegen();
    696 
    697   // If no existing prototype exists, return null.
    698   return nullptr;
    699 }
    700 
    701 /// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of
    702 /// the function.  This is used for mutable variables etc.
    703 static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction,
    704                                           const std::string &VarName) {
    705   IRBuilder<> TmpB(&TheFunction->getEntryBlock(),
    706                    TheFunction->getEntryBlock().begin());
    707   return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), nullptr,
    708                            VarName.c_str());
    709 }
    710 
    711 Value *NumberExprAST::codegen() {
    712   return ConstantFP::get(getGlobalContext(), APFloat(Val));
    713 }
    714 
    715 Value *VariableExprAST::codegen() {
    716   // Look this variable up in the function.
    717   Value *V = NamedValues[Name];
    718   if (!V)
    719     return ErrorV("Unknown variable name");
    720 
    721   // Load the value.
    722   return Builder.CreateLoad(V, Name.c_str());
    723 }
    724 
    725 Value *UnaryExprAST::codegen() {
    726   Value *OperandV = Operand->codegen();
    727   if (!OperandV)
    728     return nullptr;
    729 
    730   Function *F = getFunction(std::string("unary") + Opcode);
    731   if (!F)
    732     return ErrorV("Unknown unary operator");
    733 
    734   return Builder.CreateCall(F, OperandV, "unop");
    735 }
    736 
    737 Value *BinaryExprAST::codegen() {
    738   // Special case '=' because we don't want to emit the LHS as an expression.
    739   if (Op == '=') {
    740     // Assignment requires the LHS to be an identifier.
    741     // This assume we're building without RTTI because LLVM builds that way by
    742     // default.  If you build LLVM with RTTI this can be changed to a
    743     // dynamic_cast for automatic error checking.
    744     VariableExprAST *LHSE = static_cast<VariableExprAST *>(LHS.get());
    745     if (!LHSE)
    746       return ErrorV("destination of '=' must be a variable");
    747     // Codegen the RHS.
    748     Value *Val = RHS->codegen();
    749     if (!Val)
    750       return nullptr;
    751 
    752     // Look up the name.
    753     Value *Variable = NamedValues[LHSE->getName()];
    754     if (!Variable)
    755       return ErrorV("Unknown variable name");
    756 
    757     Builder.CreateStore(Val, Variable);
    758     return Val;
    759   }
    760 
    761   Value *L = LHS->codegen();
    762   Value *R = RHS->codegen();
    763   if (!L || !R)
    764     return nullptr;
    765 
    766   switch (Op) {
    767   case '+':
    768     return Builder.CreateFAdd(L, R, "addtmp");
    769   case '-':
    770     return Builder.CreateFSub(L, R, "subtmp");
    771   case '*':
    772     return Builder.CreateFMul(L, R, "multmp");
    773   case '<':
    774     L = Builder.CreateFCmpULT(L, R, "cmptmp");
    775     // Convert bool 0/1 to double 0.0 or 1.0
    776     return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
    777                                 "booltmp");
    778   default:
    779     break;
    780   }
    781 
    782   // If it wasn't a builtin binary operator, it must be a user defined one. Emit
    783   // a call to it.
    784   Function *F = getFunction(std::string("binary") + Op);
    785   assert(F && "binary operator not found!");
    786 
    787   Value *Ops[] = {L, R};
    788   return Builder.CreateCall(F, Ops, "binop");
    789 }
    790 
    791 Value *CallExprAST::codegen() {
    792   // Look up the name in the global module table.
    793   Function *CalleeF = getFunction(Callee);
    794   if (!CalleeF)
    795     return ErrorV("Unknown function referenced");
    796 
    797   // If argument mismatch error.
    798   if (CalleeF->arg_size() != Args.size())
    799     return ErrorV("Incorrect # arguments passed");
    800 
    801   std::vector<Value *> ArgsV;
    802   for (unsigned i = 0, e = Args.size(); i != e; ++i) {
    803     ArgsV.push_back(Args[i]->codegen());
    804     if (!ArgsV.back())
    805       return nullptr;
    806   }
    807 
    808   return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
    809 }
    810 
    811 Value *IfExprAST::codegen() {
    812   Value *CondV = Cond->codegen();
    813   if (!CondV)
    814     return nullptr;
    815 
    816   // Convert condition to a bool by comparing equal to 0.0.
    817   CondV = Builder.CreateFCmpONE(
    818       CondV, ConstantFP::get(getGlobalContext(), APFloat(0.0)), "ifcond");
    819 
    820   Function *TheFunction = Builder.GetInsertBlock()->getParent();
    821 
    822   // Create blocks for the then and else cases.  Insert the 'then' block at the
    823   // end of the function.
    824   BasicBlock *ThenBB =
    825       BasicBlock::Create(getGlobalContext(), "then", TheFunction);
    826   BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else");
    827   BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont");
    828 
    829   Builder.CreateCondBr(CondV, ThenBB, ElseBB);
    830 
    831   // Emit then value.
    832   Builder.SetInsertPoint(ThenBB);
    833 
    834   Value *ThenV = Then->codegen();
    835   if (!ThenV)
    836     return nullptr;
    837 
    838   Builder.CreateBr(MergeBB);
    839   // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
    840   ThenBB = Builder.GetInsertBlock();
    841 
    842   // Emit else block.
    843   TheFunction->getBasicBlockList().push_back(ElseBB);
    844   Builder.SetInsertPoint(ElseBB);
    845 
    846   Value *ElseV = Else->codegen();
    847   if (!ElseV)
    848     return nullptr;
    849 
    850   Builder.CreateBr(MergeBB);
    851   // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
    852   ElseBB = Builder.GetInsertBlock();
    853 
    854   // Emit merge block.
    855   TheFunction->getBasicBlockList().push_back(MergeBB);
    856   Builder.SetInsertPoint(MergeBB);
    857   PHINode *PN =
    858       Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, "iftmp");
    859 
    860   PN->addIncoming(ThenV, ThenBB);
    861   PN->addIncoming(ElseV, ElseBB);
    862   return PN;
    863 }
    864 
    865 // Output for-loop as:
    866 //   var = alloca double
    867 //   ...
    868 //   start = startexpr
    869 //   store start -> var
    870 //   goto loop
    871 // loop:
    872 //   ...
    873 //   bodyexpr
    874 //   ...
    875 // loopend:
    876 //   step = stepexpr
    877 //   endcond = endexpr
    878 //
    879 //   curvar = load var
    880 //   nextvar = curvar + step
    881 //   store nextvar -> var
    882 //   br endcond, loop, endloop
    883 // outloop:
    884 Value *ForExprAST::codegen() {
    885   Function *TheFunction = Builder.GetInsertBlock()->getParent();
    886 
    887   // Create an alloca for the variable in the entry block.
    888   AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
    889 
    890   // Emit the start code first, without 'variable' in scope.
    891   Value *StartVal = Start->codegen();
    892   if (!StartVal)
    893     return nullptr;
    894 
    895   // Store the value into the alloca.
    896   Builder.CreateStore(StartVal, Alloca);
    897 
    898   // Make the new basic block for the loop header, inserting after current
    899   // block.
    900   BasicBlock *LoopBB =
    901       BasicBlock::Create(getGlobalContext(), "loop", TheFunction);
    902 
    903   // Insert an explicit fall through from the current block to the LoopBB.
    904   Builder.CreateBr(LoopBB);
    905 
    906   // Start insertion in LoopBB.
    907   Builder.SetInsertPoint(LoopBB);
    908 
    909   // Within the loop, the variable is defined equal to the PHI node.  If it
    910   // shadows an existing variable, we have to restore it, so save it now.
    911   AllocaInst *OldVal = NamedValues[VarName];
    912   NamedValues[VarName] = Alloca;
    913 
    914   // Emit the body of the loop.  This, like any other expr, can change the
    915   // current BB.  Note that we ignore the value computed by the body, but don't
    916   // allow an error.
    917   if (!Body->codegen())
    918     return nullptr;
    919 
    920   // Emit the step value.
    921   Value *StepVal = nullptr;
    922   if (Step) {
    923     StepVal = Step->codegen();
    924     if (!StepVal)
    925       return nullptr;
    926   } else {
    927     // If not specified, use 1.0.
    928     StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
    929   }
    930 
    931   // Compute the end condition.
    932   Value *EndCond = End->codegen();
    933   if (!EndCond)
    934     return nullptr;
    935 
    936   // Reload, increment, and restore the alloca.  This handles the case where
    937   // the body of the loop mutates the variable.
    938   Value *CurVar = Builder.CreateLoad(Alloca, VarName.c_str());
    939   Value *NextVar = Builder.CreateFAdd(CurVar, StepVal, "nextvar");
    940   Builder.CreateStore(NextVar, Alloca);
    941 
    942   // Convert condition to a bool by comparing equal to 0.0.
    943   EndCond = Builder.CreateFCmpONE(
    944       EndCond, ConstantFP::get(getGlobalContext(), APFloat(0.0)), "loopcond");
    945 
    946   // Create the "after loop" block and insert it.
    947   BasicBlock *AfterBB =
    948       BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction);
    949 
    950   // Insert the conditional branch into the end of LoopEndBB.
    951   Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
    952 
    953   // Any new code will be inserted in AfterBB.
    954   Builder.SetInsertPoint(AfterBB);
    955 
    956   // Restore the unshadowed variable.
    957   if (OldVal)
    958     NamedValues[VarName] = OldVal;
    959   else
    960     NamedValues.erase(VarName);
    961 
    962   // for expr always returns 0.0.
    963   return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
    964 }
    965 
    966 Value *VarExprAST::codegen() {
    967   std::vector<AllocaInst *> OldBindings;
    968 
    969   Function *TheFunction = Builder.GetInsertBlock()->getParent();
    970 
    971   // Register all variables and emit their initializer.
    972   for (unsigned i = 0, e = VarNames.size(); i != e; ++i) {
    973     const std::string &VarName = VarNames[i].first;
    974     ExprAST *Init = VarNames[i].second.get();
    975 
    976     // Emit the initializer before adding the variable to scope, this prevents
    977     // the initializer from referencing the variable itself, and permits stuff
    978     // like this:
    979     //  var a = 1 in
    980     //    var a = a in ...   # refers to outer 'a'.
    981     Value *InitVal;
    982     if (Init) {
    983       InitVal = Init->codegen();
    984       if (!InitVal)
    985         return nullptr;
    986     } else { // If not specified, use 0.0.
    987       InitVal = ConstantFP::get(getGlobalContext(), APFloat(0.0));
    988     }
    989 
    990     AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
    991     Builder.CreateStore(InitVal, Alloca);
    992 
    993     // Remember the old variable binding so that we can restore the binding when
    994     // we unrecurse.
    995     OldBindings.push_back(NamedValues[VarName]);
    996 
    997     // Remember this binding.
    998     NamedValues[VarName] = Alloca;
    999   }
   1000 
   1001   // Codegen the body, now that all vars are in scope.
   1002   Value *BodyVal = Body->codegen();
   1003   if (!BodyVal)
   1004     return nullptr;
   1005 
   1006   // Pop all our variables from scope.
   1007   for (unsigned i = 0, e = VarNames.size(); i != e; ++i)
   1008     NamedValues[VarNames[i].first] = OldBindings[i];
   1009 
   1010   // Return the body computation.
   1011   return BodyVal;
   1012 }
   1013 
   1014 Function *PrototypeAST::codegen() {
   1015   // Make the function type:  double(double,double) etc.
   1016   std::vector<Type *> Doubles(Args.size(),
   1017                               Type::getDoubleTy(getGlobalContext()));
   1018   FunctionType *FT =
   1019       FunctionType::get(Type::getDoubleTy(getGlobalContext()), Doubles, false);
   1020 
   1021   Function *F =
   1022       Function::Create(FT, Function::ExternalLinkage, Name, TheModule.get());
   1023 
   1024   // Set names for all arguments.
   1025   unsigned Idx = 0;
   1026   for (auto &Arg : F->args())
   1027     Arg.setName(Args[Idx++]);
   1028 
   1029   return F;
   1030 }
   1031 
   1032 Function *FunctionAST::codegen() {
   1033   // Transfer ownership of the prototype to the FunctionProtos map, but keep a
   1034   // reference to it for use below.
   1035   auto &P = *Proto;
   1036   FunctionProtos[Proto->getName()] = std::move(Proto);
   1037   Function *TheFunction = getFunction(P.getName());
   1038   if (!TheFunction)
   1039     return nullptr;
   1040 
   1041   // If this is an operator, install it.
   1042   if (P.isBinaryOp())
   1043     BinopPrecedence[P.getOperatorName()] = P.getBinaryPrecedence();
   1044 
   1045   // Create a new basic block to start insertion into.
   1046   BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
   1047   Builder.SetInsertPoint(BB);
   1048 
   1049   // Record the function arguments in the NamedValues map.
   1050   NamedValues.clear();
   1051   for (auto &Arg : TheFunction->args()) {
   1052     // Create an alloca for this variable.
   1053     AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, Arg.getName());
   1054 
   1055     // Store the initial value into the alloca.
   1056     Builder.CreateStore(&Arg, Alloca);
   1057 
   1058     // Add arguments to variable symbol table.
   1059     NamedValues[Arg.getName()] = Alloca;
   1060   }
   1061 
   1062   if (Value *RetVal = Body->codegen()) {
   1063     // Finish off the function.
   1064     Builder.CreateRet(RetVal);
   1065 
   1066     // Validate the generated code, checking for consistency.
   1067     verifyFunction(*TheFunction);
   1068 
   1069     // Run the optimizer on the function.
   1070     TheFPM->run(*TheFunction);
   1071 
   1072     return TheFunction;
   1073   }
   1074 
   1075   // Error reading body, remove function.
   1076   TheFunction->eraseFromParent();
   1077 
   1078   if (P.isBinaryOp())
   1079     BinopPrecedence.erase(Proto->getOperatorName());
   1080   return nullptr;
   1081 }
   1082 
   1083 //===----------------------------------------------------------------------===//
   1084 // Top-Level parsing and JIT Driver
   1085 //===----------------------------------------------------------------------===//
   1086 
   1087 static void InitializeModuleAndPassManager() {
   1088   // Open a new module.
   1089   TheModule = llvm::make_unique<Module>("my cool jit", getGlobalContext());
   1090   TheModule->setDataLayout(TheJIT->getTargetMachine().createDataLayout());
   1091 
   1092   // Create a new pass manager attached to it.
   1093   TheFPM = llvm::make_unique<legacy::FunctionPassManager>(TheModule.get());
   1094 
   1095   // Do simple "peephole" optimizations and bit-twiddling optzns.
   1096   TheFPM->add(createInstructionCombiningPass());
   1097   // Reassociate expressions.
   1098   TheFPM->add(createReassociatePass());
   1099   // Eliminate Common SubExpressions.
   1100   TheFPM->add(createGVNPass());
   1101   // Simplify the control flow graph (deleting unreachable blocks, etc).
   1102   TheFPM->add(createCFGSimplificationPass());
   1103 
   1104   TheFPM->doInitialization();
   1105 }
   1106 
   1107 static void HandleDefinition() {
   1108   if (auto FnAST = ParseDefinition()) {
   1109     if (auto *FnIR = FnAST->codegen()) {
   1110       fprintf(stderr, "Read function definition:");
   1111       FnIR->dump();
   1112       TheJIT->addModule(std::move(TheModule));
   1113       InitializeModuleAndPassManager();
   1114     }
   1115   } else {
   1116     // Skip token for error recovery.
   1117     getNextToken();
   1118   }
   1119 }
   1120 
   1121 static void HandleExtern() {
   1122   if (auto ProtoAST = ParseExtern()) {
   1123     if (auto *FnIR = ProtoAST->codegen()) {
   1124       fprintf(stderr, "Read extern: ");
   1125       FnIR->dump();
   1126       FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST);
   1127     }
   1128   } else {
   1129     // Skip token for error recovery.
   1130     getNextToken();
   1131   }
   1132 }
   1133 
   1134 static void HandleTopLevelExpression() {
   1135   // Evaluate a top-level expression into an anonymous function.
   1136   if (auto FnAST = ParseTopLevelExpr()) {
   1137     if (FnAST->codegen()) {
   1138 
   1139       // JIT the module containing the anonymous expression, keeping a handle so
   1140       // we can free it later.
   1141       auto H = TheJIT->addModule(std::move(TheModule));
   1142       InitializeModuleAndPassManager();
   1143 
   1144       // Search the JIT for the __anon_expr symbol.
   1145       auto ExprSymbol = TheJIT->findSymbol("__anon_expr");
   1146       assert(ExprSymbol && "Function not found");
   1147 
   1148       // Get the symbol's address and cast it to the right type (takes no
   1149       // arguments, returns a double) so we can call it as a native function.
   1150       double (*FP)() = (double (*)())(intptr_t)ExprSymbol.getAddress();
   1151       fprintf(stderr, "Evaluated to %f\n", FP());
   1152 
   1153       // Delete the anonymous expression module from the JIT.
   1154       TheJIT->removeModule(H);
   1155     }
   1156   } else {
   1157     // Skip token for error recovery.
   1158     getNextToken();
   1159   }
   1160 }
   1161 
   1162 /// top ::= definition | external | expression | ';'
   1163 static void MainLoop() {
   1164   while (1) {
   1165     fprintf(stderr, "ready> ");
   1166     switch (CurTok) {
   1167     case tok_eof:
   1168       return;
   1169     case ';': // ignore top-level semicolons.
   1170       getNextToken();
   1171       break;
   1172     case tok_def:
   1173       HandleDefinition();
   1174       break;
   1175     case tok_extern:
   1176       HandleExtern();
   1177       break;
   1178     default:
   1179       HandleTopLevelExpression();
   1180       break;
   1181     }
   1182   }
   1183 }
   1184 
   1185 //===----------------------------------------------------------------------===//
   1186 // "Library" functions that can be "extern'd" from user code.
   1187 //===----------------------------------------------------------------------===//
   1188 
   1189 /// putchard - putchar that takes a double and returns 0.
   1190 extern "C" double putchard(double X) {
   1191   fputc((char)X, stderr);
   1192   return 0;
   1193 }
   1194 
   1195 /// printd - printf that takes a double prints it as "%f\n", returning 0.
   1196 extern "C" double printd(double X) {
   1197   fprintf(stderr, "%f\n", X);
   1198   return 0;
   1199 }
   1200 
   1201 //===----------------------------------------------------------------------===//
   1202 // Main driver code.
   1203 //===----------------------------------------------------------------------===//
   1204 
   1205 int main() {
   1206   InitializeNativeTarget();
   1207   InitializeNativeTargetAsmPrinter();
   1208   InitializeNativeTargetAsmParser();
   1209 
   1210   // Install standard binary operators.
   1211   // 1 is lowest precedence.
   1212   BinopPrecedence['='] = 2;
   1213   BinopPrecedence['<'] = 10;
   1214   BinopPrecedence['+'] = 20;
   1215   BinopPrecedence['-'] = 20;
   1216   BinopPrecedence['*'] = 40; // highest.
   1217 
   1218   // Prime the first token.
   1219   fprintf(stderr, "ready> ");
   1220   getNextToken();
   1221 
   1222   TheJIT = llvm::make_unique<KaleidoscopeJIT>();
   1223 
   1224   InitializeModuleAndPassManager();
   1225 
   1226   // Run the main "interpreter loop" now.
   1227   MainLoop();
   1228 
   1229   return 0;
   1230 }
   1231