Home | History | Annotate | Download | only in Chapter2
      1 #include <cctype>
      2 #include <cstdio>
      3 #include <cstdlib>
      4 #include <map>
      5 #include <string>
      6 #include <vector>
      7 
      8 //===----------------------------------------------------------------------===//
      9 // Lexer
     10 //===----------------------------------------------------------------------===//
     11 
     12 // The lexer returns tokens [0-255] if it is an unknown character, otherwise one
     13 // of these for known things.
     14 enum Token {
     15   tok_eof = -1,
     16 
     17   // commands
     18   tok_def = -2, tok_extern = -3,
     19 
     20   // primary
     21   tok_identifier = -4, tok_number = -5
     22 };
     23 
     24 static std::string IdentifierStr;  // Filled in if tok_identifier
     25 static double NumVal;              // Filled in if tok_number
     26 
     27 /// gettok - Return the next token from standard input.
     28 static int gettok() {
     29   static int LastChar = ' ';
     30 
     31   // Skip any whitespace.
     32   while (isspace(LastChar))
     33     LastChar = getchar();
     34 
     35   if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
     36     IdentifierStr = LastChar;
     37     while (isalnum((LastChar = getchar())))
     38       IdentifierStr += LastChar;
     39 
     40     if (IdentifierStr == "def") return tok_def;
     41     if (IdentifierStr == "extern") return tok_extern;
     42     return tok_identifier;
     43   }
     44 
     45   if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
     46     std::string NumStr;
     47     do {
     48       NumStr += LastChar;
     49       LastChar = getchar();
     50     } while (isdigit(LastChar) || LastChar == '.');
     51 
     52     NumVal = strtod(NumStr.c_str(), 0);
     53     return tok_number;
     54   }
     55 
     56   if (LastChar == '#') {
     57     // Comment until end of line.
     58     do LastChar = getchar();
     59     while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
     60 
     61     if (LastChar != EOF)
     62       return gettok();
     63   }
     64 
     65   // Check for end of file.  Don't eat the EOF.
     66   if (LastChar == EOF)
     67     return tok_eof;
     68 
     69   // Otherwise, just return the character as its ascii value.
     70   int ThisChar = LastChar;
     71   LastChar = getchar();
     72   return ThisChar;
     73 }
     74 
     75 //===----------------------------------------------------------------------===//
     76 // Abstract Syntax Tree (aka Parse Tree)
     77 //===----------------------------------------------------------------------===//
     78 namespace {
     79 /// ExprAST - Base class for all expression nodes.
     80 class ExprAST {
     81 public:
     82   virtual ~ExprAST() {}
     83 };
     84 
     85 /// NumberExprAST - Expression class for numeric literals like "1.0".
     86 class NumberExprAST : public ExprAST {
     87 public:
     88   NumberExprAST(double val) {}
     89 };
     90 
     91 /// VariableExprAST - Expression class for referencing a variable, like "a".
     92 class VariableExprAST : public ExprAST {
     93   std::string Name;
     94 public:
     95   VariableExprAST(const std::string &name) : Name(name) {}
     96 };
     97 
     98 /// BinaryExprAST - Expression class for a binary operator.
     99 class BinaryExprAST : public ExprAST {
    100 public:
    101   BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) {}
    102 };
    103 
    104 /// CallExprAST - Expression class for function calls.
    105 class CallExprAST : public ExprAST {
    106   std::string Callee;
    107   std::vector<ExprAST*> Args;
    108 public:
    109   CallExprAST(const std::string &callee, std::vector<ExprAST*> &args)
    110     : Callee(callee), Args(args) {}
    111 };
    112 
    113 /// PrototypeAST - This class represents the "prototype" for a function,
    114 /// which captures its name, and its argument names (thus implicitly the number
    115 /// of arguments the function takes).
    116 class PrototypeAST {
    117   std::string Name;
    118   std::vector<std::string> Args;
    119 public:
    120   PrototypeAST(const std::string &name, const std::vector<std::string> &args)
    121     : Name(name), Args(args) {}
    122 
    123 };
    124 
    125 /// FunctionAST - This class represents a function definition itself.
    126 class FunctionAST {
    127 public:
    128   FunctionAST(PrototypeAST *proto, ExprAST *body) {}
    129 };
    130 } // end anonymous namespace
    131 
    132 //===----------------------------------------------------------------------===//
    133 // Parser
    134 //===----------------------------------------------------------------------===//
    135 
    136 /// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
    137 /// token the parser is looking at.  getNextToken reads another token from the
    138 /// lexer and updates CurTok with its results.
    139 static int CurTok;
    140 static int getNextToken() {
    141   return CurTok = gettok();
    142 }
    143 
    144 /// BinopPrecedence - This holds the precedence for each binary operator that is
    145 /// defined.
    146 static std::map<char, int> BinopPrecedence;
    147 
    148 /// GetTokPrecedence - Get the precedence of the pending binary operator token.
    149 static int GetTokPrecedence() {
    150   if (!isascii(CurTok))
    151     return -1;
    152 
    153   // Make sure it's a declared binop.
    154   int TokPrec = BinopPrecedence[CurTok];
    155   if (TokPrec <= 0) return -1;
    156   return TokPrec;
    157 }
    158 
    159 /// Error* - These are little helper functions for error handling.
    160 ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
    161 PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
    162 
    163 static ExprAST *ParseExpression();
    164 
    165 /// identifierexpr
    166 ///   ::= identifier
    167 ///   ::= identifier '(' expression* ')'
    168 static ExprAST *ParseIdentifierExpr() {
    169   std::string IdName = IdentifierStr;
    170 
    171   getNextToken();  // eat identifier.
    172 
    173   if (CurTok != '(') // Simple variable ref.
    174     return new VariableExprAST(IdName);
    175 
    176   // Call.
    177   getNextToken();  // eat (
    178   std::vector<ExprAST*> Args;
    179   if (CurTok != ')') {
    180     while (1) {
    181       ExprAST *Arg = ParseExpression();
    182       if (!Arg) return 0;
    183       Args.push_back(Arg);
    184 
    185       if (CurTok == ')') break;
    186 
    187       if (CurTok != ',')
    188         return Error("Expected ')' or ',' in argument list");
    189       getNextToken();
    190     }
    191   }
    192 
    193   // Eat the ')'.
    194   getNextToken();
    195 
    196   return new CallExprAST(IdName, Args);
    197 }
    198 
    199 /// numberexpr ::= number
    200 static ExprAST *ParseNumberExpr() {
    201   ExprAST *Result = new NumberExprAST(NumVal);
    202   getNextToken(); // consume the number
    203   return Result;
    204 }
    205 
    206 /// parenexpr ::= '(' expression ')'
    207 static ExprAST *ParseParenExpr() {
    208   getNextToken();  // eat (.
    209   ExprAST *V = ParseExpression();
    210   if (!V) return 0;
    211 
    212   if (CurTok != ')')
    213     return Error("expected ')'");
    214   getNextToken();  // eat ).
    215   return V;
    216 }
    217 
    218 /// primary
    219 ///   ::= identifierexpr
    220 ///   ::= numberexpr
    221 ///   ::= parenexpr
    222 static ExprAST *ParsePrimary() {
    223   switch (CurTok) {
    224   default: return Error("unknown token when expecting an expression");
    225   case tok_identifier: return ParseIdentifierExpr();
    226   case tok_number:     return ParseNumberExpr();
    227   case '(':            return ParseParenExpr();
    228   }
    229 }
    230 
    231 /// binoprhs
    232 ///   ::= ('+' primary)*
    233 static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
    234   // If this is a binop, find its precedence.
    235   while (1) {
    236     int TokPrec = GetTokPrecedence();
    237 
    238     // If this is a binop that binds at least as tightly as the current binop,
    239     // consume it, otherwise we are done.
    240     if (TokPrec < ExprPrec)
    241       return LHS;
    242 
    243     // Okay, we know this is a binop.
    244     int BinOp = CurTok;
    245     getNextToken();  // eat binop
    246 
    247     // Parse the primary expression after the binary operator.
    248     ExprAST *RHS = ParsePrimary();
    249     if (!RHS) return 0;
    250 
    251     // If BinOp binds less tightly with RHS than the operator after RHS, let
    252     // the pending operator take RHS as its LHS.
    253     int NextPrec = GetTokPrecedence();
    254     if (TokPrec < NextPrec) {
    255       RHS = ParseBinOpRHS(TokPrec+1, RHS);
    256       if (RHS == 0) return 0;
    257     }
    258 
    259     // Merge LHS/RHS.
    260     LHS = new BinaryExprAST(BinOp, LHS, RHS);
    261   }
    262 }
    263 
    264 /// expression
    265 ///   ::= primary binoprhs
    266 ///
    267 static ExprAST *ParseExpression() {
    268   ExprAST *LHS = ParsePrimary();
    269   if (!LHS) return 0;
    270 
    271   return ParseBinOpRHS(0, LHS);
    272 }
    273 
    274 /// prototype
    275 ///   ::= id '(' id* ')'
    276 static PrototypeAST *ParsePrototype() {
    277   if (CurTok != tok_identifier)
    278     return ErrorP("Expected function name in prototype");
    279 
    280   std::string FnName = IdentifierStr;
    281   getNextToken();
    282 
    283   if (CurTok != '(')
    284     return ErrorP("Expected '(' in prototype");
    285 
    286   std::vector<std::string> ArgNames;
    287   while (getNextToken() == tok_identifier)
    288     ArgNames.push_back(IdentifierStr);
    289   if (CurTok != ')')
    290     return ErrorP("Expected ')' in prototype");
    291 
    292   // success.
    293   getNextToken();  // eat ')'.
    294 
    295   return new PrototypeAST(FnName, ArgNames);
    296 }
    297 
    298 /// definition ::= 'def' prototype expression
    299 static FunctionAST *ParseDefinition() {
    300   getNextToken();  // eat def.
    301   PrototypeAST *Proto = ParsePrototype();
    302   if (Proto == 0) return 0;
    303 
    304   if (ExprAST *E = ParseExpression())
    305     return new FunctionAST(Proto, E);
    306   return 0;
    307 }
    308 
    309 /// toplevelexpr ::= expression
    310 static FunctionAST *ParseTopLevelExpr() {
    311   if (ExprAST *E = ParseExpression()) {
    312     // Make an anonymous proto.
    313     PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
    314     return new FunctionAST(Proto, E);
    315   }
    316   return 0;
    317 }
    318 
    319 /// external ::= 'extern' prototype
    320 static PrototypeAST *ParseExtern() {
    321   getNextToken();  // eat extern.
    322   return ParsePrototype();
    323 }
    324 
    325 //===----------------------------------------------------------------------===//
    326 // Top-Level parsing
    327 //===----------------------------------------------------------------------===//
    328 
    329 static void HandleDefinition() {
    330   if (ParseDefinition()) {
    331     fprintf(stderr, "Parsed a function definition.\n");
    332   } else {
    333     // Skip token for error recovery.
    334     getNextToken();
    335   }
    336 }
    337 
    338 static void HandleExtern() {
    339   if (ParseExtern()) {
    340     fprintf(stderr, "Parsed an extern\n");
    341   } else {
    342     // Skip token for error recovery.
    343     getNextToken();
    344   }
    345 }
    346 
    347 static void HandleTopLevelExpression() {
    348   // Evaluate a top-level expression into an anonymous function.
    349   if (ParseTopLevelExpr()) {
    350     fprintf(stderr, "Parsed a top-level expr\n");
    351   } else {
    352     // Skip token for error recovery.
    353     getNextToken();
    354   }
    355 }
    356 
    357 /// top ::= definition | external | expression | ';'
    358 static void MainLoop() {
    359   while (1) {
    360     fprintf(stderr, "ready> ");
    361     switch (CurTok) {
    362     case tok_eof:    return;
    363     case ';':        getNextToken(); break;  // ignore top-level semicolons.
    364     case tok_def:    HandleDefinition(); break;
    365     case tok_extern: HandleExtern(); break;
    366     default:         HandleTopLevelExpression(); break;
    367     }
    368   }
    369 }
    370 
    371 //===----------------------------------------------------------------------===//
    372 // Main driver code.
    373 //===----------------------------------------------------------------------===//
    374 
    375 int main() {
    376   // Install standard binary operators.
    377   // 1 is lowest precedence.
    378   BinopPrecedence['<'] = 10;
    379   BinopPrecedence['+'] = 20;
    380   BinopPrecedence['-'] = 20;
    381   BinopPrecedence['*'] = 40;  // highest.
    382 
    383   // Prime the first token.
    384   fprintf(stderr, "ready> ");
    385   getNextToken();
    386 
    387   // Run the main "interpreter loop" now.
    388   MainLoop();
    389 
    390   return 0;
    391 }
    392