1 #include <cstdio> 2 #include <cstdlib> 3 #include <string> 4 #include <map> 5 #include <vector> 6 7 //===----------------------------------------------------------------------===// 8 // Lexer 9 //===----------------------------------------------------------------------===// 10 11 // The lexer returns tokens [0-255] if it is an unknown character, otherwise one 12 // of these for known things. 13 enum Token { 14 tok_eof = -1, 15 16 // commands 17 tok_def = -2, tok_extern = -3, 18 19 // primary 20 tok_identifier = -4, tok_number = -5 21 }; 22 23 static std::string IdentifierStr; // Filled in if tok_identifier 24 static double NumVal; // Filled in if tok_number 25 26 /// gettok - Return the next token from standard input. 27 static int gettok() { 28 static int LastChar = ' '; 29 30 // Skip any whitespace. 31 while (isspace(LastChar)) 32 LastChar = getchar(); 33 34 if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* 35 IdentifierStr = LastChar; 36 while (isalnum((LastChar = getchar()))) 37 IdentifierStr += LastChar; 38 39 if (IdentifierStr == "def") return tok_def; 40 if (IdentifierStr == "extern") return tok_extern; 41 return tok_identifier; 42 } 43 44 if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ 45 std::string NumStr; 46 do { 47 NumStr += LastChar; 48 LastChar = getchar(); 49 } while (isdigit(LastChar) || LastChar == '.'); 50 51 NumVal = strtod(NumStr.c_str(), 0); 52 return tok_number; 53 } 54 55 if (LastChar == '#') { 56 // Comment until end of line. 57 do LastChar = getchar(); 58 while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); 59 60 if (LastChar != EOF) 61 return gettok(); 62 } 63 64 // Check for end of file. Don't eat the EOF. 65 if (LastChar == EOF) 66 return tok_eof; 67 68 // Otherwise, just return the character as its ascii value. 69 int ThisChar = LastChar; 70 LastChar = getchar(); 71 return ThisChar; 72 } 73 74 //===----------------------------------------------------------------------===// 75 // Abstract Syntax Tree (aka Parse Tree) 76 //===----------------------------------------------------------------------===// 77 78 /// ExprAST - Base class for all expression nodes. 79 class ExprAST { 80 public: 81 virtual ~ExprAST() {} 82 }; 83 84 /// NumberExprAST - Expression class for numeric literals like "1.0". 85 class NumberExprAST : public ExprAST { 86 double Val; 87 public: 88 NumberExprAST(double val) : Val(val) {} 89 }; 90 91 /// VariableExprAST - Expression class for referencing a variable, like "a". 92 class VariableExprAST : public ExprAST { 93 std::string Name; 94 public: 95 VariableExprAST(const std::string &name) : Name(name) {} 96 }; 97 98 /// BinaryExprAST - Expression class for a binary operator. 99 class BinaryExprAST : public ExprAST { 100 char Op; 101 ExprAST *LHS, *RHS; 102 public: 103 BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) 104 : Op(op), LHS(lhs), RHS(rhs) {} 105 }; 106 107 /// CallExprAST - Expression class for function calls. 108 class CallExprAST : public ExprAST { 109 std::string Callee; 110 std::vector<ExprAST*> Args; 111 public: 112 CallExprAST(const std::string &callee, std::vector<ExprAST*> &args) 113 : Callee(callee), Args(args) {} 114 }; 115 116 /// PrototypeAST - This class represents the "prototype" for a function, 117 /// which captures its name, and its argument names (thus implicitly the number 118 /// of arguments the function takes). 119 class PrototypeAST { 120 std::string Name; 121 std::vector<std::string> Args; 122 public: 123 PrototypeAST(const std::string &name, const std::vector<std::string> &args) 124 : Name(name), Args(args) {} 125 126 }; 127 128 /// FunctionAST - This class represents a function definition itself. 129 class FunctionAST { 130 PrototypeAST *Proto; 131 ExprAST *Body; 132 public: 133 FunctionAST(PrototypeAST *proto, ExprAST *body) 134 : Proto(proto), Body(body) {} 135 136 }; 137 138 //===----------------------------------------------------------------------===// 139 // Parser 140 //===----------------------------------------------------------------------===// 141 142 /// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current 143 /// token the parser is looking at. getNextToken reads another token from the 144 /// lexer and updates CurTok with its results. 145 static int CurTok; 146 static int getNextToken() { 147 return CurTok = gettok(); 148 } 149 150 /// BinopPrecedence - This holds the precedence for each binary operator that is 151 /// defined. 152 static std::map<char, int> BinopPrecedence; 153 154 /// GetTokPrecedence - Get the precedence of the pending binary operator token. 155 static int GetTokPrecedence() { 156 if (!isascii(CurTok)) 157 return -1; 158 159 // Make sure it's a declared binop. 160 int TokPrec = BinopPrecedence[CurTok]; 161 if (TokPrec <= 0) return -1; 162 return TokPrec; 163 } 164 165 /// Error* - These are little helper functions for error handling. 166 ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;} 167 PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; } 168 FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; } 169 170 static ExprAST *ParseExpression(); 171 172 /// identifierexpr 173 /// ::= identifier 174 /// ::= identifier '(' expression* ')' 175 static ExprAST *ParseIdentifierExpr() { 176 std::string IdName = IdentifierStr; 177 178 getNextToken(); // eat identifier. 179 180 if (CurTok != '(') // Simple variable ref. 181 return new VariableExprAST(IdName); 182 183 // Call. 184 getNextToken(); // eat ( 185 std::vector<ExprAST*> Args; 186 if (CurTok != ')') { 187 while (1) { 188 ExprAST *Arg = ParseExpression(); 189 if (!Arg) return 0; 190 Args.push_back(Arg); 191 192 if (CurTok == ')') break; 193 194 if (CurTok != ',') 195 return Error("Expected ')' or ',' in argument list"); 196 getNextToken(); 197 } 198 } 199 200 // Eat the ')'. 201 getNextToken(); 202 203 return new CallExprAST(IdName, Args); 204 } 205 206 /// numberexpr ::= number 207 static ExprAST *ParseNumberExpr() { 208 ExprAST *Result = new NumberExprAST(NumVal); 209 getNextToken(); // consume the number 210 return Result; 211 } 212 213 /// parenexpr ::= '(' expression ')' 214 static ExprAST *ParseParenExpr() { 215 getNextToken(); // eat (. 216 ExprAST *V = ParseExpression(); 217 if (!V) return 0; 218 219 if (CurTok != ')') 220 return Error("expected ')'"); 221 getNextToken(); // eat ). 222 return V; 223 } 224 225 /// primary 226 /// ::= identifierexpr 227 /// ::= numberexpr 228 /// ::= parenexpr 229 static ExprAST *ParsePrimary() { 230 switch (CurTok) { 231 default: return Error("unknown token when expecting an expression"); 232 case tok_identifier: return ParseIdentifierExpr(); 233 case tok_number: return ParseNumberExpr(); 234 case '(': return ParseParenExpr(); 235 } 236 } 237 238 /// binoprhs 239 /// ::= ('+' primary)* 240 static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) { 241 // If this is a binop, find its precedence. 242 while (1) { 243 int TokPrec = GetTokPrecedence(); 244 245 // If this is a binop that binds at least as tightly as the current binop, 246 // consume it, otherwise we are done. 247 if (TokPrec < ExprPrec) 248 return LHS; 249 250 // Okay, we know this is a binop. 251 int BinOp = CurTok; 252 getNextToken(); // eat binop 253 254 // Parse the primary expression after the binary operator. 255 ExprAST *RHS = ParsePrimary(); 256 if (!RHS) return 0; 257 258 // If BinOp binds less tightly with RHS than the operator after RHS, let 259 // the pending operator take RHS as its LHS. 260 int NextPrec = GetTokPrecedence(); 261 if (TokPrec < NextPrec) { 262 RHS = ParseBinOpRHS(TokPrec+1, RHS); 263 if (RHS == 0) return 0; 264 } 265 266 // Merge LHS/RHS. 267 LHS = new BinaryExprAST(BinOp, LHS, RHS); 268 } 269 } 270 271 /// expression 272 /// ::= primary binoprhs 273 /// 274 static ExprAST *ParseExpression() { 275 ExprAST *LHS = ParsePrimary(); 276 if (!LHS) return 0; 277 278 return ParseBinOpRHS(0, LHS); 279 } 280 281 /// prototype 282 /// ::= id '(' id* ')' 283 static PrototypeAST *ParsePrototype() { 284 if (CurTok != tok_identifier) 285 return ErrorP("Expected function name in prototype"); 286 287 std::string FnName = IdentifierStr; 288 getNextToken(); 289 290 if (CurTok != '(') 291 return ErrorP("Expected '(' in prototype"); 292 293 std::vector<std::string> ArgNames; 294 while (getNextToken() == tok_identifier) 295 ArgNames.push_back(IdentifierStr); 296 if (CurTok != ')') 297 return ErrorP("Expected ')' in prototype"); 298 299 // success. 300 getNextToken(); // eat ')'. 301 302 return new PrototypeAST(FnName, ArgNames); 303 } 304 305 /// definition ::= 'def' prototype expression 306 static FunctionAST *ParseDefinition() { 307 getNextToken(); // eat def. 308 PrototypeAST *Proto = ParsePrototype(); 309 if (Proto == 0) return 0; 310 311 if (ExprAST *E = ParseExpression()) 312 return new FunctionAST(Proto, E); 313 return 0; 314 } 315 316 /// toplevelexpr ::= expression 317 static FunctionAST *ParseTopLevelExpr() { 318 if (ExprAST *E = ParseExpression()) { 319 // Make an anonymous proto. 320 PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>()); 321 return new FunctionAST(Proto, E); 322 } 323 return 0; 324 } 325 326 /// external ::= 'extern' prototype 327 static PrototypeAST *ParseExtern() { 328 getNextToken(); // eat extern. 329 return ParsePrototype(); 330 } 331 332 //===----------------------------------------------------------------------===// 333 // Top-Level parsing 334 //===----------------------------------------------------------------------===// 335 336 static void HandleDefinition() { 337 if (ParseDefinition()) { 338 fprintf(stderr, "Parsed a function definition.\n"); 339 } else { 340 // Skip token for error recovery. 341 getNextToken(); 342 } 343 } 344 345 static void HandleExtern() { 346 if (ParseExtern()) { 347 fprintf(stderr, "Parsed an extern\n"); 348 } else { 349 // Skip token for error recovery. 350 getNextToken(); 351 } 352 } 353 354 static void HandleTopLevelExpression() { 355 // Evaluate a top-level expression into an anonymous function. 356 if (ParseTopLevelExpr()) { 357 fprintf(stderr, "Parsed a top-level expr\n"); 358 } else { 359 // Skip token for error recovery. 360 getNextToken(); 361 } 362 } 363 364 /// top ::= definition | external | expression | ';' 365 static void MainLoop() { 366 while (1) { 367 fprintf(stderr, "ready> "); 368 switch (CurTok) { 369 case tok_eof: return; 370 case ';': getNextToken(); break; // ignore top-level semicolons. 371 case tok_def: HandleDefinition(); break; 372 case tok_extern: HandleExtern(); break; 373 default: HandleTopLevelExpression(); break; 374 } 375 } 376 } 377 378 //===----------------------------------------------------------------------===// 379 // Main driver code. 380 //===----------------------------------------------------------------------===// 381 382 int main() { 383 // Install standard binary operators. 384 // 1 is lowest precedence. 385 BinopPrecedence['<'] = 10; 386 BinopPrecedence['+'] = 20; 387 BinopPrecedence['-'] = 20; 388 BinopPrecedence['*'] = 40; // highest. 389 390 // Prime the first token. 391 fprintf(stderr, "ready> "); 392 getNextToken(); 393 394 // Run the main "interpreter loop" now. 395 MainLoop(); 396 397 return 0; 398 } 399