1 #include <cctype> 2 #include <cstdio> 3 #include <cstdlib> 4 #include <map> 5 #include <string> 6 #include <vector> 7 8 //===----------------------------------------------------------------------===// 9 // Lexer 10 //===----------------------------------------------------------------------===// 11 12 // The lexer returns tokens [0-255] if it is an unknown character, otherwise one 13 // of these for known things. 14 enum Token { 15 tok_eof = -1, 16 17 // commands 18 tok_def = -2, tok_extern = -3, 19 20 // primary 21 tok_identifier = -4, tok_number = -5 22 }; 23 24 static std::string IdentifierStr; // Filled in if tok_identifier 25 static double NumVal; // Filled in if tok_number 26 27 /// gettok - Return the next token from standard input. 28 static int gettok() { 29 static int LastChar = ' '; 30 31 // Skip any whitespace. 32 while (isspace(LastChar)) 33 LastChar = getchar(); 34 35 if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* 36 IdentifierStr = LastChar; 37 while (isalnum((LastChar = getchar()))) 38 IdentifierStr += LastChar; 39 40 if (IdentifierStr == "def") return tok_def; 41 if (IdentifierStr == "extern") return tok_extern; 42 return tok_identifier; 43 } 44 45 if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ 46 std::string NumStr; 47 do { 48 NumStr += LastChar; 49 LastChar = getchar(); 50 } while (isdigit(LastChar) || LastChar == '.'); 51 52 NumVal = strtod(NumStr.c_str(), 0); 53 return tok_number; 54 } 55 56 if (LastChar == '#') { 57 // Comment until end of line. 58 do LastChar = getchar(); 59 while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); 60 61 if (LastChar != EOF) 62 return gettok(); 63 } 64 65 // Check for end of file. Don't eat the EOF. 66 if (LastChar == EOF) 67 return tok_eof; 68 69 // Otherwise, just return the character as its ascii value. 70 int ThisChar = LastChar; 71 LastChar = getchar(); 72 return ThisChar; 73 } 74 75 //===----------------------------------------------------------------------===// 76 // Abstract Syntax Tree (aka Parse Tree) 77 //===----------------------------------------------------------------------===// 78 namespace { 79 /// ExprAST - Base class for all expression nodes. 80 class ExprAST { 81 public: 82 virtual ~ExprAST() {} 83 }; 84 85 /// NumberExprAST - Expression class for numeric literals like "1.0". 86 class NumberExprAST : public ExprAST { 87 public: 88 NumberExprAST(double val) {} 89 }; 90 91 /// VariableExprAST - Expression class for referencing a variable, like "a". 92 class VariableExprAST : public ExprAST { 93 std::string Name; 94 public: 95 VariableExprAST(const std::string &name) : Name(name) {} 96 }; 97 98 /// BinaryExprAST - Expression class for a binary operator. 99 class BinaryExprAST : public ExprAST { 100 public: 101 BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) {} 102 }; 103 104 /// CallExprAST - Expression class for function calls. 105 class CallExprAST : public ExprAST { 106 std::string Callee; 107 std::vector<ExprAST*> Args; 108 public: 109 CallExprAST(const std::string &callee, std::vector<ExprAST*> &args) 110 : Callee(callee), Args(args) {} 111 }; 112 113 /// PrototypeAST - This class represents the "prototype" for a function, 114 /// which captures its name, and its argument names (thus implicitly the number 115 /// of arguments the function takes). 116 class PrototypeAST { 117 std::string Name; 118 std::vector<std::string> Args; 119 public: 120 PrototypeAST(const std::string &name, const std::vector<std::string> &args) 121 : Name(name), Args(args) {} 122 123 }; 124 125 /// FunctionAST - This class represents a function definition itself. 126 class FunctionAST { 127 public: 128 FunctionAST(PrototypeAST *proto, ExprAST *body) {} 129 }; 130 } // end anonymous namespace 131 132 //===----------------------------------------------------------------------===// 133 // Parser 134 //===----------------------------------------------------------------------===// 135 136 /// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current 137 /// token the parser is looking at. getNextToken reads another token from the 138 /// lexer and updates CurTok with its results. 139 static int CurTok; 140 static int getNextToken() { 141 return CurTok = gettok(); 142 } 143 144 /// BinopPrecedence - This holds the precedence for each binary operator that is 145 /// defined. 146 static std::map<char, int> BinopPrecedence; 147 148 /// GetTokPrecedence - Get the precedence of the pending binary operator token. 149 static int GetTokPrecedence() { 150 if (!isascii(CurTok)) 151 return -1; 152 153 // Make sure it's a declared binop. 154 int TokPrec = BinopPrecedence[CurTok]; 155 if (TokPrec <= 0) return -1; 156 return TokPrec; 157 } 158 159 /// Error* - These are little helper functions for error handling. 160 ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;} 161 PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; } 162 163 static ExprAST *ParseExpression(); 164 165 /// identifierexpr 166 /// ::= identifier 167 /// ::= identifier '(' expression* ')' 168 static ExprAST *ParseIdentifierExpr() { 169 std::string IdName = IdentifierStr; 170 171 getNextToken(); // eat identifier. 172 173 if (CurTok != '(') // Simple variable ref. 174 return new VariableExprAST(IdName); 175 176 // Call. 177 getNextToken(); // eat ( 178 std::vector<ExprAST*> Args; 179 if (CurTok != ')') { 180 while (1) { 181 ExprAST *Arg = ParseExpression(); 182 if (!Arg) return 0; 183 Args.push_back(Arg); 184 185 if (CurTok == ')') break; 186 187 if (CurTok != ',') 188 return Error("Expected ')' or ',' in argument list"); 189 getNextToken(); 190 } 191 } 192 193 // Eat the ')'. 194 getNextToken(); 195 196 return new CallExprAST(IdName, Args); 197 } 198 199 /// numberexpr ::= number 200 static ExprAST *ParseNumberExpr() { 201 ExprAST *Result = new NumberExprAST(NumVal); 202 getNextToken(); // consume the number 203 return Result; 204 } 205 206 /// parenexpr ::= '(' expression ')' 207 static ExprAST *ParseParenExpr() { 208 getNextToken(); // eat (. 209 ExprAST *V = ParseExpression(); 210 if (!V) return 0; 211 212 if (CurTok != ')') 213 return Error("expected ')'"); 214 getNextToken(); // eat ). 215 return V; 216 } 217 218 /// primary 219 /// ::= identifierexpr 220 /// ::= numberexpr 221 /// ::= parenexpr 222 static ExprAST *ParsePrimary() { 223 switch (CurTok) { 224 default: return Error("unknown token when expecting an expression"); 225 case tok_identifier: return ParseIdentifierExpr(); 226 case tok_number: return ParseNumberExpr(); 227 case '(': return ParseParenExpr(); 228 } 229 } 230 231 /// binoprhs 232 /// ::= ('+' primary)* 233 static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) { 234 // If this is a binop, find its precedence. 235 while (1) { 236 int TokPrec = GetTokPrecedence(); 237 238 // If this is a binop that binds at least as tightly as the current binop, 239 // consume it, otherwise we are done. 240 if (TokPrec < ExprPrec) 241 return LHS; 242 243 // Okay, we know this is a binop. 244 int BinOp = CurTok; 245 getNextToken(); // eat binop 246 247 // Parse the primary expression after the binary operator. 248 ExprAST *RHS = ParsePrimary(); 249 if (!RHS) return 0; 250 251 // If BinOp binds less tightly with RHS than the operator after RHS, let 252 // the pending operator take RHS as its LHS. 253 int NextPrec = GetTokPrecedence(); 254 if (TokPrec < NextPrec) { 255 RHS = ParseBinOpRHS(TokPrec+1, RHS); 256 if (RHS == 0) return 0; 257 } 258 259 // Merge LHS/RHS. 260 LHS = new BinaryExprAST(BinOp, LHS, RHS); 261 } 262 } 263 264 /// expression 265 /// ::= primary binoprhs 266 /// 267 static ExprAST *ParseExpression() { 268 ExprAST *LHS = ParsePrimary(); 269 if (!LHS) return 0; 270 271 return ParseBinOpRHS(0, LHS); 272 } 273 274 /// prototype 275 /// ::= id '(' id* ')' 276 static PrototypeAST *ParsePrototype() { 277 if (CurTok != tok_identifier) 278 return ErrorP("Expected function name in prototype"); 279 280 std::string FnName = IdentifierStr; 281 getNextToken(); 282 283 if (CurTok != '(') 284 return ErrorP("Expected '(' in prototype"); 285 286 std::vector<std::string> ArgNames; 287 while (getNextToken() == tok_identifier) 288 ArgNames.push_back(IdentifierStr); 289 if (CurTok != ')') 290 return ErrorP("Expected ')' in prototype"); 291 292 // success. 293 getNextToken(); // eat ')'. 294 295 return new PrototypeAST(FnName, ArgNames); 296 } 297 298 /// definition ::= 'def' prototype expression 299 static FunctionAST *ParseDefinition() { 300 getNextToken(); // eat def. 301 PrototypeAST *Proto = ParsePrototype(); 302 if (Proto == 0) return 0; 303 304 if (ExprAST *E = ParseExpression()) 305 return new FunctionAST(Proto, E); 306 return 0; 307 } 308 309 /// toplevelexpr ::= expression 310 static FunctionAST *ParseTopLevelExpr() { 311 if (ExprAST *E = ParseExpression()) { 312 // Make an anonymous proto. 313 PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>()); 314 return new FunctionAST(Proto, E); 315 } 316 return 0; 317 } 318 319 /// external ::= 'extern' prototype 320 static PrototypeAST *ParseExtern() { 321 getNextToken(); // eat extern. 322 return ParsePrototype(); 323 } 324 325 //===----------------------------------------------------------------------===// 326 // Top-Level parsing 327 //===----------------------------------------------------------------------===// 328 329 static void HandleDefinition() { 330 if (ParseDefinition()) { 331 fprintf(stderr, "Parsed a function definition.\n"); 332 } else { 333 // Skip token for error recovery. 334 getNextToken(); 335 } 336 } 337 338 static void HandleExtern() { 339 if (ParseExtern()) { 340 fprintf(stderr, "Parsed an extern\n"); 341 } else { 342 // Skip token for error recovery. 343 getNextToken(); 344 } 345 } 346 347 static void HandleTopLevelExpression() { 348 // Evaluate a top-level expression into an anonymous function. 349 if (ParseTopLevelExpr()) { 350 fprintf(stderr, "Parsed a top-level expr\n"); 351 } else { 352 // Skip token for error recovery. 353 getNextToken(); 354 } 355 } 356 357 /// top ::= definition | external | expression | ';' 358 static void MainLoop() { 359 while (1) { 360 fprintf(stderr, "ready> "); 361 switch (CurTok) { 362 case tok_eof: return; 363 case ';': getNextToken(); break; // ignore top-level semicolons. 364 case tok_def: HandleDefinition(); break; 365 case tok_extern: HandleExtern(); break; 366 default: HandleTopLevelExpression(); break; 367 } 368 } 369 } 370 371 //===----------------------------------------------------------------------===// 372 // Main driver code. 373 //===----------------------------------------------------------------------===// 374 375 int main() { 376 // Install standard binary operators. 377 // 1 is lowest precedence. 378 BinopPrecedence['<'] = 10; 379 BinopPrecedence['+'] = 20; 380 BinopPrecedence['-'] = 20; 381 BinopPrecedence['*'] = 40; // highest. 382 383 // Prime the first token. 384 fprintf(stderr, "ready> "); 385 getNextToken(); 386 387 // Run the main "interpreter loop" now. 388 MainLoop(); 389 390 return 0; 391 } 392