1 #include <cstdio> 2 #include <cstdlib> 3 #include <map> 4 #include <string> 5 #include <vector> 6 7 //===----------------------------------------------------------------------===// 8 // Lexer 9 //===----------------------------------------------------------------------===// 10 11 // The lexer returns tokens [0-255] if it is an unknown character, otherwise one 12 // of these for known things. 13 enum Token { 14 tok_eof = -1, 15 16 // commands 17 tok_def = -2, tok_extern = -3, 18 19 // primary 20 tok_identifier = -4, tok_number = -5 21 }; 22 23 static std::string IdentifierStr; // Filled in if tok_identifier 24 static double NumVal; // Filled in if tok_number 25 26 /// gettok - Return the next token from standard input. 27 static int gettok() { 28 static int LastChar = ' '; 29 30 // Skip any whitespace. 31 while (isspace(LastChar)) 32 LastChar = getchar(); 33 34 if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* 35 IdentifierStr = LastChar; 36 while (isalnum((LastChar = getchar()))) 37 IdentifierStr += LastChar; 38 39 if (IdentifierStr == "def") return tok_def; 40 if (IdentifierStr == "extern") return tok_extern; 41 return tok_identifier; 42 } 43 44 if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ 45 std::string NumStr; 46 do { 47 NumStr += LastChar; 48 LastChar = getchar(); 49 } while (isdigit(LastChar) || LastChar == '.'); 50 51 NumVal = strtod(NumStr.c_str(), 0); 52 return tok_number; 53 } 54 55 if (LastChar == '#') { 56 // Comment until end of line. 57 do LastChar = getchar(); 58 while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); 59 60 if (LastChar != EOF) 61 return gettok(); 62 } 63 64 // Check for end of file. Don't eat the EOF. 65 if (LastChar == EOF) 66 return tok_eof; 67 68 // Otherwise, just return the character as its ascii value. 69 int ThisChar = LastChar; 70 LastChar = getchar(); 71 return ThisChar; 72 } 73 74 //===----------------------------------------------------------------------===// 75 // Abstract Syntax Tree (aka Parse Tree) 76 //===----------------------------------------------------------------------===// 77 78 /// ExprAST - Base class for all expression nodes. 79 class ExprAST { 80 public: 81 virtual ~ExprAST() {} 82 }; 83 84 /// NumberExprAST - Expression class for numeric literals like "1.0". 85 class NumberExprAST : public ExprAST { 86 public: 87 NumberExprAST(double val) {} 88 }; 89 90 /// VariableExprAST - Expression class for referencing a variable, like "a". 91 class VariableExprAST : public ExprAST { 92 std::string Name; 93 public: 94 VariableExprAST(const std::string &name) : Name(name) {} 95 }; 96 97 /// BinaryExprAST - Expression class for a binary operator. 98 class BinaryExprAST : public ExprAST { 99 public: 100 BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) {} 101 }; 102 103 /// CallExprAST - Expression class for function calls. 104 class CallExprAST : public ExprAST { 105 std::string Callee; 106 std::vector<ExprAST*> Args; 107 public: 108 CallExprAST(const std::string &callee, std::vector<ExprAST*> &args) 109 : Callee(callee), Args(args) {} 110 }; 111 112 /// PrototypeAST - This class represents the "prototype" for a function, 113 /// which captures its name, and its argument names (thus implicitly the number 114 /// of arguments the function takes). 115 class PrototypeAST { 116 std::string Name; 117 std::vector<std::string> Args; 118 public: 119 PrototypeAST(const std::string &name, const std::vector<std::string> &args) 120 : Name(name), Args(args) {} 121 122 }; 123 124 /// FunctionAST - This class represents a function definition itself. 125 class FunctionAST { 126 public: 127 FunctionAST(PrototypeAST *proto, ExprAST *body) {} 128 }; 129 130 //===----------------------------------------------------------------------===// 131 // Parser 132 //===----------------------------------------------------------------------===// 133 134 /// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current 135 /// token the parser is looking at. getNextToken reads another token from the 136 /// lexer and updates CurTok with its results. 137 static int CurTok; 138 static int getNextToken() { 139 return CurTok = gettok(); 140 } 141 142 /// BinopPrecedence - This holds the precedence for each binary operator that is 143 /// defined. 144 static std::map<char, int> BinopPrecedence; 145 146 /// GetTokPrecedence - Get the precedence of the pending binary operator token. 147 static int GetTokPrecedence() { 148 if (!isascii(CurTok)) 149 return -1; 150 151 // Make sure it's a declared binop. 152 int TokPrec = BinopPrecedence[CurTok]; 153 if (TokPrec <= 0) return -1; 154 return TokPrec; 155 } 156 157 /// Error* - These are little helper functions for error handling. 158 ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;} 159 PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; } 160 FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; } 161 162 static ExprAST *ParseExpression(); 163 164 /// identifierexpr 165 /// ::= identifier 166 /// ::= identifier '(' expression* ')' 167 static ExprAST *ParseIdentifierExpr() { 168 std::string IdName = IdentifierStr; 169 170 getNextToken(); // eat identifier. 171 172 if (CurTok != '(') // Simple variable ref. 173 return new VariableExprAST(IdName); 174 175 // Call. 176 getNextToken(); // eat ( 177 std::vector<ExprAST*> Args; 178 if (CurTok != ')') { 179 while (1) { 180 ExprAST *Arg = ParseExpression(); 181 if (!Arg) return 0; 182 Args.push_back(Arg); 183 184 if (CurTok == ')') break; 185 186 if (CurTok != ',') 187 return Error("Expected ')' or ',' in argument list"); 188 getNextToken(); 189 } 190 } 191 192 // Eat the ')'. 193 getNextToken(); 194 195 return new CallExprAST(IdName, Args); 196 } 197 198 /// numberexpr ::= number 199 static ExprAST *ParseNumberExpr() { 200 ExprAST *Result = new NumberExprAST(NumVal); 201 getNextToken(); // consume the number 202 return Result; 203 } 204 205 /// parenexpr ::= '(' expression ')' 206 static ExprAST *ParseParenExpr() { 207 getNextToken(); // eat (. 208 ExprAST *V = ParseExpression(); 209 if (!V) return 0; 210 211 if (CurTok != ')') 212 return Error("expected ')'"); 213 getNextToken(); // eat ). 214 return V; 215 } 216 217 /// primary 218 /// ::= identifierexpr 219 /// ::= numberexpr 220 /// ::= parenexpr 221 static ExprAST *ParsePrimary() { 222 switch (CurTok) { 223 default: return Error("unknown token when expecting an expression"); 224 case tok_identifier: return ParseIdentifierExpr(); 225 case tok_number: return ParseNumberExpr(); 226 case '(': return ParseParenExpr(); 227 } 228 } 229 230 /// binoprhs 231 /// ::= ('+' primary)* 232 static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) { 233 // If this is a binop, find its precedence. 234 while (1) { 235 int TokPrec = GetTokPrecedence(); 236 237 // If this is a binop that binds at least as tightly as the current binop, 238 // consume it, otherwise we are done. 239 if (TokPrec < ExprPrec) 240 return LHS; 241 242 // Okay, we know this is a binop. 243 int BinOp = CurTok; 244 getNextToken(); // eat binop 245 246 // Parse the primary expression after the binary operator. 247 ExprAST *RHS = ParsePrimary(); 248 if (!RHS) return 0; 249 250 // If BinOp binds less tightly with RHS than the operator after RHS, let 251 // the pending operator take RHS as its LHS. 252 int NextPrec = GetTokPrecedence(); 253 if (TokPrec < NextPrec) { 254 RHS = ParseBinOpRHS(TokPrec+1, RHS); 255 if (RHS == 0) return 0; 256 } 257 258 // Merge LHS/RHS. 259 LHS = new BinaryExprAST(BinOp, LHS, RHS); 260 } 261 } 262 263 /// expression 264 /// ::= primary binoprhs 265 /// 266 static ExprAST *ParseExpression() { 267 ExprAST *LHS = ParsePrimary(); 268 if (!LHS) return 0; 269 270 return ParseBinOpRHS(0, LHS); 271 } 272 273 /// prototype 274 /// ::= id '(' id* ')' 275 static PrototypeAST *ParsePrototype() { 276 if (CurTok != tok_identifier) 277 return ErrorP("Expected function name in prototype"); 278 279 std::string FnName = IdentifierStr; 280 getNextToken(); 281 282 if (CurTok != '(') 283 return ErrorP("Expected '(' in prototype"); 284 285 std::vector<std::string> ArgNames; 286 while (getNextToken() == tok_identifier) 287 ArgNames.push_back(IdentifierStr); 288 if (CurTok != ')') 289 return ErrorP("Expected ')' in prototype"); 290 291 // success. 292 getNextToken(); // eat ')'. 293 294 return new PrototypeAST(FnName, ArgNames); 295 } 296 297 /// definition ::= 'def' prototype expression 298 static FunctionAST *ParseDefinition() { 299 getNextToken(); // eat def. 300 PrototypeAST *Proto = ParsePrototype(); 301 if (Proto == 0) return 0; 302 303 if (ExprAST *E = ParseExpression()) 304 return new FunctionAST(Proto, E); 305 return 0; 306 } 307 308 /// toplevelexpr ::= expression 309 static FunctionAST *ParseTopLevelExpr() { 310 if (ExprAST *E = ParseExpression()) { 311 // Make an anonymous proto. 312 PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>()); 313 return new FunctionAST(Proto, E); 314 } 315 return 0; 316 } 317 318 /// external ::= 'extern' prototype 319 static PrototypeAST *ParseExtern() { 320 getNextToken(); // eat extern. 321 return ParsePrototype(); 322 } 323 324 //===----------------------------------------------------------------------===// 325 // Top-Level parsing 326 //===----------------------------------------------------------------------===// 327 328 static void HandleDefinition() { 329 if (ParseDefinition()) { 330 fprintf(stderr, "Parsed a function definition.\n"); 331 } else { 332 // Skip token for error recovery. 333 getNextToken(); 334 } 335 } 336 337 static void HandleExtern() { 338 if (ParseExtern()) { 339 fprintf(stderr, "Parsed an extern\n"); 340 } else { 341 // Skip token for error recovery. 342 getNextToken(); 343 } 344 } 345 346 static void HandleTopLevelExpression() { 347 // Evaluate a top-level expression into an anonymous function. 348 if (ParseTopLevelExpr()) { 349 fprintf(stderr, "Parsed a top-level expr\n"); 350 } else { 351 // Skip token for error recovery. 352 getNextToken(); 353 } 354 } 355 356 /// top ::= definition | external | expression | ';' 357 static void MainLoop() { 358 while (1) { 359 fprintf(stderr, "ready> "); 360 switch (CurTok) { 361 case tok_eof: return; 362 case ';': getNextToken(); break; // ignore top-level semicolons. 363 case tok_def: HandleDefinition(); break; 364 case tok_extern: HandleExtern(); break; 365 default: HandleTopLevelExpression(); break; 366 } 367 } 368 } 369 370 //===----------------------------------------------------------------------===// 371 // Main driver code. 372 //===----------------------------------------------------------------------===// 373 374 int main() { 375 // Install standard binary operators. 376 // 1 is lowest precedence. 377 BinopPrecedence['<'] = 10; 378 BinopPrecedence['+'] = 20; 379 BinopPrecedence['-'] = 20; 380 BinopPrecedence['*'] = 40; // highest. 381 382 // Prime the first token. 383 fprintf(stderr, "ready> "); 384 getNextToken(); 385 386 // Run the main "interpreter loop" now. 387 MainLoop(); 388 389 return 0; 390 } 391