1 #include <cctype> 2 #include <cstdio> 3 #include <cstdlib> 4 #include <map> 5 #include <memory> 6 #include <string> 7 #include <vector> 8 9 namespace helper { 10 // Cloning make_unique here until it's standard in C++14. 11 // Using a namespace to avoid conflicting with MSVC's std::make_unique (which 12 // ADL can sometimes find in unqualified calls). 13 template <class T, class... Args> 14 static 15 typename std::enable_if<!std::is_array<T>::value, std::unique_ptr<T>>::type 16 make_unique(Args &&... args) { 17 return std::unique_ptr<T>(new T(std::forward<Args>(args)...)); 18 } 19 } // end namespace helper 20 21 //===----------------------------------------------------------------------===// 22 // Lexer 23 //===----------------------------------------------------------------------===// 24 25 // The lexer returns tokens [0-255] if it is an unknown character, otherwise one 26 // of these for known things. 27 enum Token { 28 tok_eof = -1, 29 30 // commands 31 tok_def = -2, 32 tok_extern = -3, 33 34 // primary 35 tok_identifier = -4, 36 tok_number = -5 37 }; 38 39 static std::string IdentifierStr; // Filled in if tok_identifier 40 static double NumVal; // Filled in if tok_number 41 42 /// gettok - Return the next token from standard input. 43 static int gettok() { 44 static int LastChar = ' '; 45 46 // Skip any whitespace. 47 while (isspace(LastChar)) 48 LastChar = getchar(); 49 50 if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* 51 IdentifierStr = LastChar; 52 while (isalnum((LastChar = getchar()))) 53 IdentifierStr += LastChar; 54 55 if (IdentifierStr == "def") 56 return tok_def; 57 if (IdentifierStr == "extern") 58 return tok_extern; 59 return tok_identifier; 60 } 61 62 if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ 63 std::string NumStr; 64 do { 65 NumStr += LastChar; 66 LastChar = getchar(); 67 } while (isdigit(LastChar) || LastChar == '.'); 68 69 NumVal = strtod(NumStr.c_str(), nullptr); 70 return tok_number; 71 } 72 73 if (LastChar == '#') { 74 // Comment until end of line. 75 do 76 LastChar = getchar(); 77 while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); 78 79 if (LastChar != EOF) 80 return gettok(); 81 } 82 83 // Check for end of file. Don't eat the EOF. 84 if (LastChar == EOF) 85 return tok_eof; 86 87 // Otherwise, just return the character as its ascii value. 88 int ThisChar = LastChar; 89 LastChar = getchar(); 90 return ThisChar; 91 } 92 93 //===----------------------------------------------------------------------===// 94 // Abstract Syntax Tree (aka Parse Tree) 95 //===----------------------------------------------------------------------===// 96 namespace { 97 /// ExprAST - Base class for all expression nodes. 98 class ExprAST { 99 public: 100 virtual ~ExprAST() {} 101 }; 102 103 /// NumberExprAST - Expression class for numeric literals like "1.0". 104 class NumberExprAST : public ExprAST { 105 double Val; 106 107 public: 108 NumberExprAST(double Val) : Val(Val) {} 109 }; 110 111 /// VariableExprAST - Expression class for referencing a variable, like "a". 112 class VariableExprAST : public ExprAST { 113 std::string Name; 114 115 public: 116 VariableExprAST(const std::string &Name) : Name(Name) {} 117 }; 118 119 /// BinaryExprAST - Expression class for a binary operator. 120 class BinaryExprAST : public ExprAST { 121 char Op; 122 std::unique_ptr<ExprAST> LHS, RHS; 123 124 public: 125 BinaryExprAST(char Op, std::unique_ptr<ExprAST> LHS, 126 std::unique_ptr<ExprAST> RHS) 127 : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {} 128 }; 129 130 /// CallExprAST - Expression class for function calls. 131 class CallExprAST : public ExprAST { 132 std::string Callee; 133 std::vector<std::unique_ptr<ExprAST>> Args; 134 135 public: 136 CallExprAST(const std::string &Callee, 137 std::vector<std::unique_ptr<ExprAST>> Args) 138 : Callee(Callee), Args(std::move(Args)) {} 139 }; 140 141 /// PrototypeAST - This class represents the "prototype" for a function, 142 /// which captures its name, and its argument names (thus implicitly the number 143 /// of arguments the function takes). 144 class PrototypeAST { 145 std::string Name; 146 std::vector<std::string> Args; 147 148 public: 149 PrototypeAST(const std::string &Name, std::vector<std::string> Args) 150 : Name(Name), Args(std::move(Args)) {} 151 }; 152 153 /// FunctionAST - This class represents a function definition itself. 154 class FunctionAST { 155 std::unique_ptr<PrototypeAST> Proto; 156 std::unique_ptr<ExprAST> Body; 157 158 public: 159 FunctionAST(std::unique_ptr<PrototypeAST> Proto, 160 std::unique_ptr<ExprAST> Body) 161 : Proto(std::move(Proto)), Body(std::move(Body)) {} 162 }; 163 } // end anonymous namespace 164 165 //===----------------------------------------------------------------------===// 166 // Parser 167 //===----------------------------------------------------------------------===// 168 169 /// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current 170 /// token the parser is looking at. getNextToken reads another token from the 171 /// lexer and updates CurTok with its results. 172 static int CurTok; 173 static int getNextToken() { return CurTok = gettok(); } 174 175 /// BinopPrecedence - This holds the precedence for each binary operator that is 176 /// defined. 177 static std::map<char, int> BinopPrecedence; 178 179 /// GetTokPrecedence - Get the precedence of the pending binary operator token. 180 static int GetTokPrecedence() { 181 if (!isascii(CurTok)) 182 return -1; 183 184 // Make sure it's a declared binop. 185 int TokPrec = BinopPrecedence[CurTok]; 186 if (TokPrec <= 0) 187 return -1; 188 return TokPrec; 189 } 190 191 /// LogError* - These are little helper functions for error handling. 192 std::unique_ptr<ExprAST> LogError(const char *Str) { 193 fprintf(stderr, "Error: %s\n", Str); 194 return nullptr; 195 } 196 std::unique_ptr<PrototypeAST> LogErrorP(const char *Str) { 197 LogError(Str); 198 return nullptr; 199 } 200 201 static std::unique_ptr<ExprAST> ParseExpression(); 202 203 /// numberexpr ::= number 204 static std::unique_ptr<ExprAST> ParseNumberExpr() { 205 auto Result = helper::make_unique<NumberExprAST>(NumVal); 206 getNextToken(); // consume the number 207 return std::move(Result); 208 } 209 210 /// parenexpr ::= '(' expression ')' 211 static std::unique_ptr<ExprAST> ParseParenExpr() { 212 getNextToken(); // eat (. 213 auto V = ParseExpression(); 214 if (!V) 215 return nullptr; 216 217 if (CurTok != ')') 218 return LogError("expected ')'"); 219 getNextToken(); // eat ). 220 return V; 221 } 222 223 /// identifierexpr 224 /// ::= identifier 225 /// ::= identifier '(' expression* ')' 226 static std::unique_ptr<ExprAST> ParseIdentifierExpr() { 227 std::string IdName = IdentifierStr; 228 229 getNextToken(); // eat identifier. 230 231 if (CurTok != '(') // Simple variable ref. 232 return helper::make_unique<VariableExprAST>(IdName); 233 234 // Call. 235 getNextToken(); // eat ( 236 std::vector<std::unique_ptr<ExprAST>> Args; 237 if (CurTok != ')') { 238 while (true) { 239 if (auto Arg = ParseExpression()) 240 Args.push_back(std::move(Arg)); 241 else 242 return nullptr; 243 244 if (CurTok == ')') 245 break; 246 247 if (CurTok != ',') 248 return LogError("Expected ')' or ',' in argument list"); 249 getNextToken(); 250 } 251 } 252 253 // Eat the ')'. 254 getNextToken(); 255 256 return helper::make_unique<CallExprAST>(IdName, std::move(Args)); 257 } 258 259 /// primary 260 /// ::= identifierexpr 261 /// ::= numberexpr 262 /// ::= parenexpr 263 static std::unique_ptr<ExprAST> ParsePrimary() { 264 switch (CurTok) { 265 default: 266 return LogError("unknown token when expecting an expression"); 267 case tok_identifier: 268 return ParseIdentifierExpr(); 269 case tok_number: 270 return ParseNumberExpr(); 271 case '(': 272 return ParseParenExpr(); 273 } 274 } 275 276 /// binoprhs 277 /// ::= ('+' primary)* 278 static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec, 279 std::unique_ptr<ExprAST> LHS) { 280 // If this is a binop, find its precedence. 281 while (true) { 282 int TokPrec = GetTokPrecedence(); 283 284 // If this is a binop that binds at least as tightly as the current binop, 285 // consume it, otherwise we are done. 286 if (TokPrec < ExprPrec) 287 return LHS; 288 289 // Okay, we know this is a binop. 290 int BinOp = CurTok; 291 getNextToken(); // eat binop 292 293 // Parse the primary expression after the binary operator. 294 auto RHS = ParsePrimary(); 295 if (!RHS) 296 return nullptr; 297 298 // If BinOp binds less tightly with RHS than the operator after RHS, let 299 // the pending operator take RHS as its LHS. 300 int NextPrec = GetTokPrecedence(); 301 if (TokPrec < NextPrec) { 302 RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS)); 303 if (!RHS) 304 return nullptr; 305 } 306 307 // Merge LHS/RHS. 308 LHS = helper::make_unique<BinaryExprAST>(BinOp, std::move(LHS), 309 std::move(RHS)); 310 } 311 } 312 313 /// expression 314 /// ::= primary binoprhs 315 /// 316 static std::unique_ptr<ExprAST> ParseExpression() { 317 auto LHS = ParsePrimary(); 318 if (!LHS) 319 return nullptr; 320 321 return ParseBinOpRHS(0, std::move(LHS)); 322 } 323 324 /// prototype 325 /// ::= id '(' id* ')' 326 static std::unique_ptr<PrototypeAST> ParsePrototype() { 327 if (CurTok != tok_identifier) 328 return LogErrorP("Expected function name in prototype"); 329 330 std::string FnName = IdentifierStr; 331 getNextToken(); 332 333 if (CurTok != '(') 334 return LogErrorP("Expected '(' in prototype"); 335 336 std::vector<std::string> ArgNames; 337 while (getNextToken() == tok_identifier) 338 ArgNames.push_back(IdentifierStr); 339 if (CurTok != ')') 340 return LogErrorP("Expected ')' in prototype"); 341 342 // success. 343 getNextToken(); // eat ')'. 344 345 return helper::make_unique<PrototypeAST>(FnName, std::move(ArgNames)); 346 } 347 348 /// definition ::= 'def' prototype expression 349 static std::unique_ptr<FunctionAST> ParseDefinition() { 350 getNextToken(); // eat def. 351 auto Proto = ParsePrototype(); 352 if (!Proto) 353 return nullptr; 354 355 if (auto E = ParseExpression()) 356 return helper::make_unique<FunctionAST>(std::move(Proto), std::move(E)); 357 return nullptr; 358 } 359 360 /// toplevelexpr ::= expression 361 static std::unique_ptr<FunctionAST> ParseTopLevelExpr() { 362 if (auto E = ParseExpression()) { 363 // Make an anonymous proto. 364 auto Proto = helper::make_unique<PrototypeAST>("__anon_expr", 365 std::vector<std::string>()); 366 return helper::make_unique<FunctionAST>(std::move(Proto), std::move(E)); 367 } 368 return nullptr; 369 } 370 371 /// external ::= 'extern' prototype 372 static std::unique_ptr<PrototypeAST> ParseExtern() { 373 getNextToken(); // eat extern. 374 return ParsePrototype(); 375 } 376 377 //===----------------------------------------------------------------------===// 378 // Top-Level parsing 379 //===----------------------------------------------------------------------===// 380 381 static void HandleDefinition() { 382 if (ParseDefinition()) { 383 fprintf(stderr, "Parsed a function definition.\n"); 384 } else { 385 // Skip token for error recovery. 386 getNextToken(); 387 } 388 } 389 390 static void HandleExtern() { 391 if (ParseExtern()) { 392 fprintf(stderr, "Parsed an extern\n"); 393 } else { 394 // Skip token for error recovery. 395 getNextToken(); 396 } 397 } 398 399 static void HandleTopLevelExpression() { 400 // Evaluate a top-level expression into an anonymous function. 401 if (ParseTopLevelExpr()) { 402 fprintf(stderr, "Parsed a top-level expr\n"); 403 } else { 404 // Skip token for error recovery. 405 getNextToken(); 406 } 407 } 408 409 /// top ::= definition | external | expression | ';' 410 static void MainLoop() { 411 while (true) { 412 fprintf(stderr, "ready> "); 413 switch (CurTok) { 414 case tok_eof: 415 return; 416 case ';': // ignore top-level semicolons. 417 getNextToken(); 418 break; 419 case tok_def: 420 HandleDefinition(); 421 break; 422 case tok_extern: 423 HandleExtern(); 424 break; 425 default: 426 HandleTopLevelExpression(); 427 break; 428 } 429 } 430 } 431 432 //===----------------------------------------------------------------------===// 433 // Main driver code. 434 //===----------------------------------------------------------------------===// 435 436 int main() { 437 // Install standard binary operators. 438 // 1 is lowest precedence. 439 BinopPrecedence['<'] = 10; 440 BinopPrecedence['+'] = 20; 441 BinopPrecedence['-'] = 20; 442 BinopPrecedence['*'] = 40; // highest. 443 444 // Prime the first token. 445 fprintf(stderr, "ready> "); 446 getNextToken(); 447 448 // Run the main "interpreter loop" now. 449 MainLoop(); 450 451 return 0; 452 } 453