Home | History | Annotate | Download | only in Chapter2
      1 #include <cstdio>
      2 #include <cstdlib>
      3 #include <map>
      4 #include <string>
      5 #include <vector>
      6 
      7 //===----------------------------------------------------------------------===//
      8 // Lexer
      9 //===----------------------------------------------------------------------===//
     10 
     11 // The lexer returns tokens [0-255] if it is an unknown character, otherwise one
     12 // of these for known things.
     13 enum Token {
     14   tok_eof = -1,
     15 
     16   // commands
     17   tok_def = -2, tok_extern = -3,
     18 
     19   // primary
     20   tok_identifier = -4, tok_number = -5
     21 };
     22 
     23 static std::string IdentifierStr;  // Filled in if tok_identifier
     24 static double NumVal;              // Filled in if tok_number
     25 
     26 /// gettok - Return the next token from standard input.
     27 static int gettok() {
     28   static int LastChar = ' ';
     29 
     30   // Skip any whitespace.
     31   while (isspace(LastChar))
     32     LastChar = getchar();
     33 
     34   if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
     35     IdentifierStr = LastChar;
     36     while (isalnum((LastChar = getchar())))
     37       IdentifierStr += LastChar;
     38 
     39     if (IdentifierStr == "def") return tok_def;
     40     if (IdentifierStr == "extern") return tok_extern;
     41     return tok_identifier;
     42   }
     43 
     44   if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
     45     std::string NumStr;
     46     do {
     47       NumStr += LastChar;
     48       LastChar = getchar();
     49     } while (isdigit(LastChar) || LastChar == '.');
     50 
     51     NumVal = strtod(NumStr.c_str(), 0);
     52     return tok_number;
     53   }
     54 
     55   if (LastChar == '#') {
     56     // Comment until end of line.
     57     do LastChar = getchar();
     58     while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
     59 
     60     if (LastChar != EOF)
     61       return gettok();
     62   }
     63 
     64   // Check for end of file.  Don't eat the EOF.
     65   if (LastChar == EOF)
     66     return tok_eof;
     67 
     68   // Otherwise, just return the character as its ascii value.
     69   int ThisChar = LastChar;
     70   LastChar = getchar();
     71   return ThisChar;
     72 }
     73 
     74 //===----------------------------------------------------------------------===//
     75 // Abstract Syntax Tree (aka Parse Tree)
     76 //===----------------------------------------------------------------------===//
     77 
     78 /// ExprAST - Base class for all expression nodes.
     79 class ExprAST {
     80 public:
     81   virtual ~ExprAST() {}
     82 };
     83 
     84 /// NumberExprAST - Expression class for numeric literals like "1.0".
     85 class NumberExprAST : public ExprAST {
     86 public:
     87   NumberExprAST(double val) {}
     88 };
     89 
     90 /// VariableExprAST - Expression class for referencing a variable, like "a".
     91 class VariableExprAST : public ExprAST {
     92   std::string Name;
     93 public:
     94   VariableExprAST(const std::string &name) : Name(name) {}
     95 };
     96 
     97 /// BinaryExprAST - Expression class for a binary operator.
     98 class BinaryExprAST : public ExprAST {
     99 public:
    100   BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) {}
    101 };
    102 
    103 /// CallExprAST - Expression class for function calls.
    104 class CallExprAST : public ExprAST {
    105   std::string Callee;
    106   std::vector<ExprAST*> Args;
    107 public:
    108   CallExprAST(const std::string &callee, std::vector<ExprAST*> &args)
    109     : Callee(callee), Args(args) {}
    110 };
    111 
    112 /// PrototypeAST - This class represents the "prototype" for a function,
    113 /// which captures its name, and its argument names (thus implicitly the number
    114 /// of arguments the function takes).
    115 class PrototypeAST {
    116   std::string Name;
    117   std::vector<std::string> Args;
    118 public:
    119   PrototypeAST(const std::string &name, const std::vector<std::string> &args)
    120     : Name(name), Args(args) {}
    121 
    122 };
    123 
    124 /// FunctionAST - This class represents a function definition itself.
    125 class FunctionAST {
    126 public:
    127   FunctionAST(PrototypeAST *proto, ExprAST *body) {}
    128 };
    129 
    130 //===----------------------------------------------------------------------===//
    131 // Parser
    132 //===----------------------------------------------------------------------===//
    133 
    134 /// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
    135 /// token the parser is looking at.  getNextToken reads another token from the
    136 /// lexer and updates CurTok with its results.
    137 static int CurTok;
    138 static int getNextToken() {
    139   return CurTok = gettok();
    140 }
    141 
    142 /// BinopPrecedence - This holds the precedence for each binary operator that is
    143 /// defined.
    144 static std::map<char, int> BinopPrecedence;
    145 
    146 /// GetTokPrecedence - Get the precedence of the pending binary operator token.
    147 static int GetTokPrecedence() {
    148   if (!isascii(CurTok))
    149     return -1;
    150 
    151   // Make sure it's a declared binop.
    152   int TokPrec = BinopPrecedence[CurTok];
    153   if (TokPrec <= 0) return -1;
    154   return TokPrec;
    155 }
    156 
    157 /// Error* - These are little helper functions for error handling.
    158 ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
    159 PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
    160 FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
    161 
    162 static ExprAST *ParseExpression();
    163 
    164 /// identifierexpr
    165 ///   ::= identifier
    166 ///   ::= identifier '(' expression* ')'
    167 static ExprAST *ParseIdentifierExpr() {
    168   std::string IdName = IdentifierStr;
    169 
    170   getNextToken();  // eat identifier.
    171 
    172   if (CurTok != '(') // Simple variable ref.
    173     return new VariableExprAST(IdName);
    174 
    175   // Call.
    176   getNextToken();  // eat (
    177   std::vector<ExprAST*> Args;
    178   if (CurTok != ')') {
    179     while (1) {
    180       ExprAST *Arg = ParseExpression();
    181       if (!Arg) return 0;
    182       Args.push_back(Arg);
    183 
    184       if (CurTok == ')') break;
    185 
    186       if (CurTok != ',')
    187         return Error("Expected ')' or ',' in argument list");
    188       getNextToken();
    189     }
    190   }
    191 
    192   // Eat the ')'.
    193   getNextToken();
    194 
    195   return new CallExprAST(IdName, Args);
    196 }
    197 
    198 /// numberexpr ::= number
    199 static ExprAST *ParseNumberExpr() {
    200   ExprAST *Result = new NumberExprAST(NumVal);
    201   getNextToken(); // consume the number
    202   return Result;
    203 }
    204 
    205 /// parenexpr ::= '(' expression ')'
    206 static ExprAST *ParseParenExpr() {
    207   getNextToken();  // eat (.
    208   ExprAST *V = ParseExpression();
    209   if (!V) return 0;
    210 
    211   if (CurTok != ')')
    212     return Error("expected ')'");
    213   getNextToken();  // eat ).
    214   return V;
    215 }
    216 
    217 /// primary
    218 ///   ::= identifierexpr
    219 ///   ::= numberexpr
    220 ///   ::= parenexpr
    221 static ExprAST *ParsePrimary() {
    222   switch (CurTok) {
    223   default: return Error("unknown token when expecting an expression");
    224   case tok_identifier: return ParseIdentifierExpr();
    225   case tok_number:     return ParseNumberExpr();
    226   case '(':            return ParseParenExpr();
    227   }
    228 }
    229 
    230 /// binoprhs
    231 ///   ::= ('+' primary)*
    232 static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
    233   // If this is a binop, find its precedence.
    234   while (1) {
    235     int TokPrec = GetTokPrecedence();
    236 
    237     // If this is a binop that binds at least as tightly as the current binop,
    238     // consume it, otherwise we are done.
    239     if (TokPrec < ExprPrec)
    240       return LHS;
    241 
    242     // Okay, we know this is a binop.
    243     int BinOp = CurTok;
    244     getNextToken();  // eat binop
    245 
    246     // Parse the primary expression after the binary operator.
    247     ExprAST *RHS = ParsePrimary();
    248     if (!RHS) return 0;
    249 
    250     // If BinOp binds less tightly with RHS than the operator after RHS, let
    251     // the pending operator take RHS as its LHS.
    252     int NextPrec = GetTokPrecedence();
    253     if (TokPrec < NextPrec) {
    254       RHS = ParseBinOpRHS(TokPrec+1, RHS);
    255       if (RHS == 0) return 0;
    256     }
    257 
    258     // Merge LHS/RHS.
    259     LHS = new BinaryExprAST(BinOp, LHS, RHS);
    260   }
    261 }
    262 
    263 /// expression
    264 ///   ::= primary binoprhs
    265 ///
    266 static ExprAST *ParseExpression() {
    267   ExprAST *LHS = ParsePrimary();
    268   if (!LHS) return 0;
    269 
    270   return ParseBinOpRHS(0, LHS);
    271 }
    272 
    273 /// prototype
    274 ///   ::= id '(' id* ')'
    275 static PrototypeAST *ParsePrototype() {
    276   if (CurTok != tok_identifier)
    277     return ErrorP("Expected function name in prototype");
    278 
    279   std::string FnName = IdentifierStr;
    280   getNextToken();
    281 
    282   if (CurTok != '(')
    283     return ErrorP("Expected '(' in prototype");
    284 
    285   std::vector<std::string> ArgNames;
    286   while (getNextToken() == tok_identifier)
    287     ArgNames.push_back(IdentifierStr);
    288   if (CurTok != ')')
    289     return ErrorP("Expected ')' in prototype");
    290 
    291   // success.
    292   getNextToken();  // eat ')'.
    293 
    294   return new PrototypeAST(FnName, ArgNames);
    295 }
    296 
    297 /// definition ::= 'def' prototype expression
    298 static FunctionAST *ParseDefinition() {
    299   getNextToken();  // eat def.
    300   PrototypeAST *Proto = ParsePrototype();
    301   if (Proto == 0) return 0;
    302 
    303   if (ExprAST *E = ParseExpression())
    304     return new FunctionAST(Proto, E);
    305   return 0;
    306 }
    307 
    308 /// toplevelexpr ::= expression
    309 static FunctionAST *ParseTopLevelExpr() {
    310   if (ExprAST *E = ParseExpression()) {
    311     // Make an anonymous proto.
    312     PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
    313     return new FunctionAST(Proto, E);
    314   }
    315   return 0;
    316 }
    317 
    318 /// external ::= 'extern' prototype
    319 static PrototypeAST *ParseExtern() {
    320   getNextToken();  // eat extern.
    321   return ParsePrototype();
    322 }
    323 
    324 //===----------------------------------------------------------------------===//
    325 // Top-Level parsing
    326 //===----------------------------------------------------------------------===//
    327 
    328 static void HandleDefinition() {
    329   if (ParseDefinition()) {
    330     fprintf(stderr, "Parsed a function definition.\n");
    331   } else {
    332     // Skip token for error recovery.
    333     getNextToken();
    334   }
    335 }
    336 
    337 static void HandleExtern() {
    338   if (ParseExtern()) {
    339     fprintf(stderr, "Parsed an extern\n");
    340   } else {
    341     // Skip token for error recovery.
    342     getNextToken();
    343   }
    344 }
    345 
    346 static void HandleTopLevelExpression() {
    347   // Evaluate a top-level expression into an anonymous function.
    348   if (ParseTopLevelExpr()) {
    349     fprintf(stderr, "Parsed a top-level expr\n");
    350   } else {
    351     // Skip token for error recovery.
    352     getNextToken();
    353   }
    354 }
    355 
    356 /// top ::= definition | external | expression | ';'
    357 static void MainLoop() {
    358   while (1) {
    359     fprintf(stderr, "ready> ");
    360     switch (CurTok) {
    361     case tok_eof:    return;
    362     case ';':        getNextToken(); break;  // ignore top-level semicolons.
    363     case tok_def:    HandleDefinition(); break;
    364     case tok_extern: HandleExtern(); break;
    365     default:         HandleTopLevelExpression(); break;
    366     }
    367   }
    368 }
    369 
    370 //===----------------------------------------------------------------------===//
    371 // Main driver code.
    372 //===----------------------------------------------------------------------===//
    373 
    374 int main() {
    375   // Install standard binary operators.
    376   // 1 is lowest precedence.
    377   BinopPrecedence['<'] = 10;
    378   BinopPrecedence['+'] = 20;
    379   BinopPrecedence['-'] = 20;
    380   BinopPrecedence['*'] = 40;  // highest.
    381 
    382   // Prime the first token.
    383   fprintf(stderr, "ready> ");
    384   getNextToken();
    385 
    386   // Run the main "interpreter loop" now.
    387   MainLoop();
    388 
    389   return 0;
    390 }
    391