Home | History | Annotate | Download | only in Chapter2
      1 #include <cstdio>
      2 #include <cstdlib>
      3 #include <map>
      4 #include <string>
      5 #include <vector>
      6 
      7 //===----------------------------------------------------------------------===//
      8 // Lexer
      9 //===----------------------------------------------------------------------===//
     10 
     11 // The lexer returns tokens [0-255] if it is an unknown character, otherwise one
     12 // of these for known things.
     13 enum Token {
     14   tok_eof = -1,
     15 
     16   // commands
     17   tok_def = -2, tok_extern = -3,
     18 
     19   // primary
     20   tok_identifier = -4, tok_number = -5
     21 };
     22 
     23 static std::string IdentifierStr;  // Filled in if tok_identifier
     24 static double NumVal;              // Filled in if tok_number
     25 
     26 /// gettok - Return the next token from standard input.
     27 static int gettok() {
     28   static int LastChar = ' ';
     29 
     30   // Skip any whitespace.
     31   while (isspace(LastChar))
     32     LastChar = getchar();
     33 
     34   if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
     35     IdentifierStr = LastChar;
     36     while (isalnum((LastChar = getchar())))
     37       IdentifierStr += LastChar;
     38 
     39     if (IdentifierStr == "def") return tok_def;
     40     if (IdentifierStr == "extern") return tok_extern;
     41     return tok_identifier;
     42   }
     43 
     44   if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
     45     std::string NumStr;
     46     do {
     47       NumStr += LastChar;
     48       LastChar = getchar();
     49     } while (isdigit(LastChar) || LastChar == '.');
     50 
     51     NumVal = strtod(NumStr.c_str(), 0);
     52     return tok_number;
     53   }
     54 
     55   if (LastChar == '#') {
     56     // Comment until end of line.
     57     do LastChar = getchar();
     58     while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
     59 
     60     if (LastChar != EOF)
     61       return gettok();
     62   }
     63 
     64   // Check for end of file.  Don't eat the EOF.
     65   if (LastChar == EOF)
     66     return tok_eof;
     67 
     68   // Otherwise, just return the character as its ascii value.
     69   int ThisChar = LastChar;
     70   LastChar = getchar();
     71   return ThisChar;
     72 }
     73 
     74 //===----------------------------------------------------------------------===//
     75 // Abstract Syntax Tree (aka Parse Tree)
     76 //===----------------------------------------------------------------------===//
     77 
     78 /// ExprAST - Base class for all expression nodes.
     79 class ExprAST {
     80 public:
     81   virtual ~ExprAST() {}
     82 };
     83 
     84 /// NumberExprAST - Expression class for numeric literals like "1.0".
     85 class NumberExprAST : public ExprAST {
     86   double Val;
     87 public:
     88   NumberExprAST(double val) : Val(val) {}
     89 };
     90 
     91 /// VariableExprAST - Expression class for referencing a variable, like "a".
     92 class VariableExprAST : public ExprAST {
     93   std::string Name;
     94 public:
     95   VariableExprAST(const std::string &name) : Name(name) {}
     96 };
     97 
     98 /// BinaryExprAST - Expression class for a binary operator.
     99 class BinaryExprAST : public ExprAST {
    100   char Op;
    101   ExprAST *LHS, *RHS;
    102 public:
    103   BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs)
    104     : Op(op), LHS(lhs), RHS(rhs) {}
    105 };
    106 
    107 /// CallExprAST - Expression class for function calls.
    108 class CallExprAST : public ExprAST {
    109   std::string Callee;
    110   std::vector<ExprAST*> Args;
    111 public:
    112   CallExprAST(const std::string &callee, std::vector<ExprAST*> &args)
    113     : Callee(callee), Args(args) {}
    114 };
    115 
    116 /// PrototypeAST - This class represents the "prototype" for a function,
    117 /// which captures its name, and its argument names (thus implicitly the number
    118 /// of arguments the function takes).
    119 class PrototypeAST {
    120   std::string Name;
    121   std::vector<std::string> Args;
    122 public:
    123   PrototypeAST(const std::string &name, const std::vector<std::string> &args)
    124     : Name(name), Args(args) {}
    125 
    126 };
    127 
    128 /// FunctionAST - This class represents a function definition itself.
    129 class FunctionAST {
    130   PrototypeAST *Proto;
    131   ExprAST *Body;
    132 public:
    133   FunctionAST(PrototypeAST *proto, ExprAST *body)
    134     : Proto(proto), Body(body) {}
    135 
    136 };
    137 
    138 //===----------------------------------------------------------------------===//
    139 // Parser
    140 //===----------------------------------------------------------------------===//
    141 
    142 /// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
    143 /// token the parser is looking at.  getNextToken reads another token from the
    144 /// lexer and updates CurTok with its results.
    145 static int CurTok;
    146 static int getNextToken() {
    147   return CurTok = gettok();
    148 }
    149 
    150 /// BinopPrecedence - This holds the precedence for each binary operator that is
    151 /// defined.
    152 static std::map<char, int> BinopPrecedence;
    153 
    154 /// GetTokPrecedence - Get the precedence of the pending binary operator token.
    155 static int GetTokPrecedence() {
    156   if (!isascii(CurTok))
    157     return -1;
    158 
    159   // Make sure it's a declared binop.
    160   int TokPrec = BinopPrecedence[CurTok];
    161   if (TokPrec <= 0) return -1;
    162   return TokPrec;
    163 }
    164 
    165 /// Error* - These are little helper functions for error handling.
    166 ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
    167 PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
    168 FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
    169 
    170 static ExprAST *ParseExpression();
    171 
    172 /// identifierexpr
    173 ///   ::= identifier
    174 ///   ::= identifier '(' expression* ')'
    175 static ExprAST *ParseIdentifierExpr() {
    176   std::string IdName = IdentifierStr;
    177 
    178   getNextToken();  // eat identifier.
    179 
    180   if (CurTok != '(') // Simple variable ref.
    181     return new VariableExprAST(IdName);
    182 
    183   // Call.
    184   getNextToken();  // eat (
    185   std::vector<ExprAST*> Args;
    186   if (CurTok != ')') {
    187     while (1) {
    188       ExprAST *Arg = ParseExpression();
    189       if (!Arg) return 0;
    190       Args.push_back(Arg);
    191 
    192       if (CurTok == ')') break;
    193 
    194       if (CurTok != ',')
    195         return Error("Expected ')' or ',' in argument list");
    196       getNextToken();
    197     }
    198   }
    199 
    200   // Eat the ')'.
    201   getNextToken();
    202 
    203   return new CallExprAST(IdName, Args);
    204 }
    205 
    206 /// numberexpr ::= number
    207 static ExprAST *ParseNumberExpr() {
    208   ExprAST *Result = new NumberExprAST(NumVal);
    209   getNextToken(); // consume the number
    210   return Result;
    211 }
    212 
    213 /// parenexpr ::= '(' expression ')'
    214 static ExprAST *ParseParenExpr() {
    215   getNextToken();  // eat (.
    216   ExprAST *V = ParseExpression();
    217   if (!V) return 0;
    218 
    219   if (CurTok != ')')
    220     return Error("expected ')'");
    221   getNextToken();  // eat ).
    222   return V;
    223 }
    224 
    225 /// primary
    226 ///   ::= identifierexpr
    227 ///   ::= numberexpr
    228 ///   ::= parenexpr
    229 static ExprAST *ParsePrimary() {
    230   switch (CurTok) {
    231   default: return Error("unknown token when expecting an expression");
    232   case tok_identifier: return ParseIdentifierExpr();
    233   case tok_number:     return ParseNumberExpr();
    234   case '(':            return ParseParenExpr();
    235   }
    236 }
    237 
    238 /// binoprhs
    239 ///   ::= ('+' primary)*
    240 static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
    241   // If this is a binop, find its precedence.
    242   while (1) {
    243     int TokPrec = GetTokPrecedence();
    244 
    245     // If this is a binop that binds at least as tightly as the current binop,
    246     // consume it, otherwise we are done.
    247     if (TokPrec < ExprPrec)
    248       return LHS;
    249 
    250     // Okay, we know this is a binop.
    251     int BinOp = CurTok;
    252     getNextToken();  // eat binop
    253 
    254     // Parse the primary expression after the binary operator.
    255     ExprAST *RHS = ParsePrimary();
    256     if (!RHS) return 0;
    257 
    258     // If BinOp binds less tightly with RHS than the operator after RHS, let
    259     // the pending operator take RHS as its LHS.
    260     int NextPrec = GetTokPrecedence();
    261     if (TokPrec < NextPrec) {
    262       RHS = ParseBinOpRHS(TokPrec+1, RHS);
    263       if (RHS == 0) return 0;
    264     }
    265 
    266     // Merge LHS/RHS.
    267     LHS = new BinaryExprAST(BinOp, LHS, RHS);
    268   }
    269 }
    270 
    271 /// expression
    272 ///   ::= primary binoprhs
    273 ///
    274 static ExprAST *ParseExpression() {
    275   ExprAST *LHS = ParsePrimary();
    276   if (!LHS) return 0;
    277 
    278   return ParseBinOpRHS(0, LHS);
    279 }
    280 
    281 /// prototype
    282 ///   ::= id '(' id* ')'
    283 static PrototypeAST *ParsePrototype() {
    284   if (CurTok != tok_identifier)
    285     return ErrorP("Expected function name in prototype");
    286 
    287   std::string FnName = IdentifierStr;
    288   getNextToken();
    289 
    290   if (CurTok != '(')
    291     return ErrorP("Expected '(' in prototype");
    292 
    293   std::vector<std::string> ArgNames;
    294   while (getNextToken() == tok_identifier)
    295     ArgNames.push_back(IdentifierStr);
    296   if (CurTok != ')')
    297     return ErrorP("Expected ')' in prototype");
    298 
    299   // success.
    300   getNextToken();  // eat ')'.
    301 
    302   return new PrototypeAST(FnName, ArgNames);
    303 }
    304 
    305 /// definition ::= 'def' prototype expression
    306 static FunctionAST *ParseDefinition() {
    307   getNextToken();  // eat def.
    308   PrototypeAST *Proto = ParsePrototype();
    309   if (Proto == 0) return 0;
    310 
    311   if (ExprAST *E = ParseExpression())
    312     return new FunctionAST(Proto, E);
    313   return 0;
    314 }
    315 
    316 /// toplevelexpr ::= expression
    317 static FunctionAST *ParseTopLevelExpr() {
    318   if (ExprAST *E = ParseExpression()) {
    319     // Make an anonymous proto.
    320     PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
    321     return new FunctionAST(Proto, E);
    322   }
    323   return 0;
    324 }
    325 
    326 /// external ::= 'extern' prototype
    327 static PrototypeAST *ParseExtern() {
    328   getNextToken();  // eat extern.
    329   return ParsePrototype();
    330 }
    331 
    332 //===----------------------------------------------------------------------===//
    333 // Top-Level parsing
    334 //===----------------------------------------------------------------------===//
    335 
    336 static void HandleDefinition() {
    337   if (ParseDefinition()) {
    338     fprintf(stderr, "Parsed a function definition.\n");
    339   } else {
    340     // Skip token for error recovery.
    341     getNextToken();
    342   }
    343 }
    344 
    345 static void HandleExtern() {
    346   if (ParseExtern()) {
    347     fprintf(stderr, "Parsed an extern\n");
    348   } else {
    349     // Skip token for error recovery.
    350     getNextToken();
    351   }
    352 }
    353 
    354 static void HandleTopLevelExpression() {
    355   // Evaluate a top-level expression into an anonymous function.
    356   if (ParseTopLevelExpr()) {
    357     fprintf(stderr, "Parsed a top-level expr\n");
    358   } else {
    359     // Skip token for error recovery.
    360     getNextToken();
    361   }
    362 }
    363 
    364 /// top ::= definition | external | expression | ';'
    365 static void MainLoop() {
    366   while (1) {
    367     fprintf(stderr, "ready> ");
    368     switch (CurTok) {
    369     case tok_eof:    return;
    370     case ';':        getNextToken(); break;  // ignore top-level semicolons.
    371     case tok_def:    HandleDefinition(); break;
    372     case tok_extern: HandleExtern(); break;
    373     default:         HandleTopLevelExpression(); break;
    374     }
    375   }
    376 }
    377 
    378 //===----------------------------------------------------------------------===//
    379 // Main driver code.
    380 //===----------------------------------------------------------------------===//
    381 
    382 int main() {
    383   // Install standard binary operators.
    384   // 1 is lowest precedence.
    385   BinopPrecedence['<'] = 10;
    386   BinopPrecedence['+'] = 20;
    387   BinopPrecedence['-'] = 20;
    388   BinopPrecedence['*'] = 40;  // highest.
    389 
    390   // Prime the first token.
    391   fprintf(stderr, "ready> ");
    392   getNextToken();
    393 
    394   // Run the main "interpreter loop" now.
    395   MainLoop();
    396 
    397   return 0;
    398 }
    399