1 #include "llvm/ADT/STLExtras.h" 2 #include "llvm/Analysis/Passes.h" 3 #include "llvm/IR/IRBuilder.h" 4 #include "llvm/IR/LLVMContext.h" 5 #include "llvm/IR/LegacyPassManager.h" 6 #include "llvm/IR/Module.h" 7 #include "llvm/IR/Verifier.h" 8 #include "llvm/Support/TargetSelect.h" 9 #include "llvm/Transforms/Scalar.h" 10 #include <cctype> 11 #include <cstdio> 12 #include <map> 13 #include <string> 14 #include <vector> 15 #include "../include/KaleidoscopeJIT.h" 16 17 using namespace llvm; 18 using namespace llvm::orc; 19 20 //===----------------------------------------------------------------------===// 21 // Lexer 22 //===----------------------------------------------------------------------===// 23 24 // The lexer returns tokens [0-255] if it is an unknown character, otherwise one 25 // of these for known things. 26 enum Token { 27 tok_eof = -1, 28 29 // commands 30 tok_def = -2, 31 tok_extern = -3, 32 33 // primary 34 tok_identifier = -4, 35 tok_number = -5, 36 37 // control 38 tok_if = -6, 39 tok_then = -7, 40 tok_else = -8, 41 tok_for = -9, 42 tok_in = -10, 43 44 // operators 45 tok_binary = -11, 46 tok_unary = -12, 47 48 // var definition 49 tok_var = -13 50 }; 51 52 static std::string IdentifierStr; // Filled in if tok_identifier 53 static double NumVal; // Filled in if tok_number 54 55 /// gettok - Return the next token from standard input. 56 static int gettok() { 57 static int LastChar = ' '; 58 59 // Skip any whitespace. 60 while (isspace(LastChar)) 61 LastChar = getchar(); 62 63 if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* 64 IdentifierStr = LastChar; 65 while (isalnum((LastChar = getchar()))) 66 IdentifierStr += LastChar; 67 68 if (IdentifierStr == "def") 69 return tok_def; 70 if (IdentifierStr == "extern") 71 return tok_extern; 72 if (IdentifierStr == "if") 73 return tok_if; 74 if (IdentifierStr == "then") 75 return tok_then; 76 if (IdentifierStr == "else") 77 return tok_else; 78 if (IdentifierStr == "for") 79 return tok_for; 80 if (IdentifierStr == "in") 81 return tok_in; 82 if (IdentifierStr == "binary") 83 return tok_binary; 84 if (IdentifierStr == "unary") 85 return tok_unary; 86 if (IdentifierStr == "var") 87 return tok_var; 88 return tok_identifier; 89 } 90 91 if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ 92 std::string NumStr; 93 do { 94 NumStr += LastChar; 95 LastChar = getchar(); 96 } while (isdigit(LastChar) || LastChar == '.'); 97 98 NumVal = strtod(NumStr.c_str(), nullptr); 99 return tok_number; 100 } 101 102 if (LastChar == '#') { 103 // Comment until end of line. 104 do 105 LastChar = getchar(); 106 while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); 107 108 if (LastChar != EOF) 109 return gettok(); 110 } 111 112 // Check for end of file. Don't eat the EOF. 113 if (LastChar == EOF) 114 return tok_eof; 115 116 // Otherwise, just return the character as its ascii value. 117 int ThisChar = LastChar; 118 LastChar = getchar(); 119 return ThisChar; 120 } 121 122 //===----------------------------------------------------------------------===// 123 // Abstract Syntax Tree (aka Parse Tree) 124 //===----------------------------------------------------------------------===// 125 namespace { 126 /// ExprAST - Base class for all expression nodes. 127 class ExprAST { 128 public: 129 virtual ~ExprAST() {} 130 virtual Value *codegen() = 0; 131 }; 132 133 /// NumberExprAST - Expression class for numeric literals like "1.0". 134 class NumberExprAST : public ExprAST { 135 double Val; 136 137 public: 138 NumberExprAST(double Val) : Val(Val) {} 139 Value *codegen() override; 140 }; 141 142 /// VariableExprAST - Expression class for referencing a variable, like "a". 143 class VariableExprAST : public ExprAST { 144 std::string Name; 145 146 public: 147 VariableExprAST(const std::string &Name) : Name(Name) {} 148 const std::string &getName() const { return Name; } 149 Value *codegen() override; 150 }; 151 152 /// UnaryExprAST - Expression class for a unary operator. 153 class UnaryExprAST : public ExprAST { 154 char Opcode; 155 std::unique_ptr<ExprAST> Operand; 156 157 public: 158 UnaryExprAST(char Opcode, std::unique_ptr<ExprAST> Operand) 159 : Opcode(Opcode), Operand(std::move(Operand)) {} 160 Value *codegen() override; 161 }; 162 163 /// BinaryExprAST - Expression class for a binary operator. 164 class BinaryExprAST : public ExprAST { 165 char Op; 166 std::unique_ptr<ExprAST> LHS, RHS; 167 168 public: 169 BinaryExprAST(char Op, std::unique_ptr<ExprAST> LHS, 170 std::unique_ptr<ExprAST> RHS) 171 : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {} 172 Value *codegen() override; 173 }; 174 175 /// CallExprAST - Expression class for function calls. 176 class CallExprAST : public ExprAST { 177 std::string Callee; 178 std::vector<std::unique_ptr<ExprAST>> Args; 179 180 public: 181 CallExprAST(const std::string &Callee, 182 std::vector<std::unique_ptr<ExprAST>> Args) 183 : Callee(Callee), Args(std::move(Args)) {} 184 Value *codegen() override; 185 }; 186 187 /// IfExprAST - Expression class for if/then/else. 188 class IfExprAST : public ExprAST { 189 std::unique_ptr<ExprAST> Cond, Then, Else; 190 191 public: 192 IfExprAST(std::unique_ptr<ExprAST> Cond, std::unique_ptr<ExprAST> Then, 193 std::unique_ptr<ExprAST> Else) 194 : Cond(std::move(Cond)), Then(std::move(Then)), Else(std::move(Else)) {} 195 Value *codegen() override; 196 }; 197 198 /// ForExprAST - Expression class for for/in. 199 class ForExprAST : public ExprAST { 200 std::string VarName; 201 std::unique_ptr<ExprAST> Start, End, Step, Body; 202 203 public: 204 ForExprAST(const std::string &VarName, std::unique_ptr<ExprAST> Start, 205 std::unique_ptr<ExprAST> End, std::unique_ptr<ExprAST> Step, 206 std::unique_ptr<ExprAST> Body) 207 : VarName(VarName), Start(std::move(Start)), End(std::move(End)), 208 Step(std::move(Step)), Body(std::move(Body)) {} 209 Value *codegen() override; 210 }; 211 212 /// VarExprAST - Expression class for var/in 213 class VarExprAST : public ExprAST { 214 std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames; 215 std::unique_ptr<ExprAST> Body; 216 217 public: 218 VarExprAST( 219 std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames, 220 std::unique_ptr<ExprAST> Body) 221 : VarNames(std::move(VarNames)), Body(std::move(Body)) {} 222 Value *codegen() override; 223 }; 224 225 /// PrototypeAST - This class represents the "prototype" for a function, 226 /// which captures its name, and its argument names (thus implicitly the number 227 /// of arguments the function takes), as well as if it is an operator. 228 class PrototypeAST { 229 std::string Name; 230 std::vector<std::string> Args; 231 bool IsOperator; 232 unsigned Precedence; // Precedence if a binary op. 233 234 public: 235 PrototypeAST(const std::string &Name, std::vector<std::string> Args, 236 bool IsOperator = false, unsigned Prec = 0) 237 : Name(Name), Args(std::move(Args)), IsOperator(IsOperator), 238 Precedence(Prec) {} 239 Function *codegen(); 240 const std::string &getName() const { return Name; } 241 242 bool isUnaryOp() const { return IsOperator && Args.size() == 1; } 243 bool isBinaryOp() const { return IsOperator && Args.size() == 2; } 244 245 char getOperatorName() const { 246 assert(isUnaryOp() || isBinaryOp()); 247 return Name[Name.size() - 1]; 248 } 249 250 unsigned getBinaryPrecedence() const { return Precedence; } 251 }; 252 253 /// FunctionAST - This class represents a function definition itself. 254 class FunctionAST { 255 std::unique_ptr<PrototypeAST> Proto; 256 std::unique_ptr<ExprAST> Body; 257 258 public: 259 FunctionAST(std::unique_ptr<PrototypeAST> Proto, 260 std::unique_ptr<ExprAST> Body) 261 : Proto(std::move(Proto)), Body(std::move(Body)) {} 262 Function *codegen(); 263 }; 264 } // end anonymous namespace 265 266 //===----------------------------------------------------------------------===// 267 // Parser 268 //===----------------------------------------------------------------------===// 269 270 /// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current 271 /// token the parser is looking at. getNextToken reads another token from the 272 /// lexer and updates CurTok with its results. 273 static int CurTok; 274 static int getNextToken() { return CurTok = gettok(); } 275 276 /// BinopPrecedence - This holds the precedence for each binary operator that is 277 /// defined. 278 static std::map<char, int> BinopPrecedence; 279 280 /// GetTokPrecedence - Get the precedence of the pending binary operator token. 281 static int GetTokPrecedence() { 282 if (!isascii(CurTok)) 283 return -1; 284 285 // Make sure it's a declared binop. 286 int TokPrec = BinopPrecedence[CurTok]; 287 if (TokPrec <= 0) 288 return -1; 289 return TokPrec; 290 } 291 292 /// Error* - These are little helper functions for error handling. 293 std::unique_ptr<ExprAST> Error(const char *Str) { 294 fprintf(stderr, "Error: %s\n", Str); 295 return nullptr; 296 } 297 298 std::unique_ptr<PrototypeAST> ErrorP(const char *Str) { 299 Error(Str); 300 return nullptr; 301 } 302 303 static std::unique_ptr<ExprAST> ParseExpression(); 304 305 /// numberexpr ::= number 306 static std::unique_ptr<ExprAST> ParseNumberExpr() { 307 auto Result = llvm::make_unique<NumberExprAST>(NumVal); 308 getNextToken(); // consume the number 309 return std::move(Result); 310 } 311 312 /// parenexpr ::= '(' expression ')' 313 static std::unique_ptr<ExprAST> ParseParenExpr() { 314 getNextToken(); // eat (. 315 auto V = ParseExpression(); 316 if (!V) 317 return nullptr; 318 319 if (CurTok != ')') 320 return Error("expected ')'"); 321 getNextToken(); // eat ). 322 return V; 323 } 324 325 /// identifierexpr 326 /// ::= identifier 327 /// ::= identifier '(' expression* ')' 328 static std::unique_ptr<ExprAST> ParseIdentifierExpr() { 329 std::string IdName = IdentifierStr; 330 331 getNextToken(); // eat identifier. 332 333 if (CurTok != '(') // Simple variable ref. 334 return llvm::make_unique<VariableExprAST>(IdName); 335 336 // Call. 337 getNextToken(); // eat ( 338 std::vector<std::unique_ptr<ExprAST>> Args; 339 if (CurTok != ')') { 340 while (1) { 341 if (auto Arg = ParseExpression()) 342 Args.push_back(std::move(Arg)); 343 else 344 return nullptr; 345 346 if (CurTok == ')') 347 break; 348 349 if (CurTok != ',') 350 return Error("Expected ')' or ',' in argument list"); 351 getNextToken(); 352 } 353 } 354 355 // Eat the ')'. 356 getNextToken(); 357 358 return llvm::make_unique<CallExprAST>(IdName, std::move(Args)); 359 } 360 361 /// ifexpr ::= 'if' expression 'then' expression 'else' expression 362 static std::unique_ptr<ExprAST> ParseIfExpr() { 363 getNextToken(); // eat the if. 364 365 // condition. 366 auto Cond = ParseExpression(); 367 if (!Cond) 368 return nullptr; 369 370 if (CurTok != tok_then) 371 return Error("expected then"); 372 getNextToken(); // eat the then 373 374 auto Then = ParseExpression(); 375 if (!Then) 376 return nullptr; 377 378 if (CurTok != tok_else) 379 return Error("expected else"); 380 381 getNextToken(); 382 383 auto Else = ParseExpression(); 384 if (!Else) 385 return nullptr; 386 387 return llvm::make_unique<IfExprAST>(std::move(Cond), std::move(Then), 388 std::move(Else)); 389 } 390 391 /// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression 392 static std::unique_ptr<ExprAST> ParseForExpr() { 393 getNextToken(); // eat the for. 394 395 if (CurTok != tok_identifier) 396 return Error("expected identifier after for"); 397 398 std::string IdName = IdentifierStr; 399 getNextToken(); // eat identifier. 400 401 if (CurTok != '=') 402 return Error("expected '=' after for"); 403 getNextToken(); // eat '='. 404 405 auto Start = ParseExpression(); 406 if (!Start) 407 return nullptr; 408 if (CurTok != ',') 409 return Error("expected ',' after for start value"); 410 getNextToken(); 411 412 auto End = ParseExpression(); 413 if (!End) 414 return nullptr; 415 416 // The step value is optional. 417 std::unique_ptr<ExprAST> Step; 418 if (CurTok == ',') { 419 getNextToken(); 420 Step = ParseExpression(); 421 if (!Step) 422 return nullptr; 423 } 424 425 if (CurTok != tok_in) 426 return Error("expected 'in' after for"); 427 getNextToken(); // eat 'in'. 428 429 auto Body = ParseExpression(); 430 if (!Body) 431 return nullptr; 432 433 return llvm::make_unique<ForExprAST>(IdName, std::move(Start), std::move(End), 434 std::move(Step), std::move(Body)); 435 } 436 437 /// varexpr ::= 'var' identifier ('=' expression)? 438 // (',' identifier ('=' expression)?)* 'in' expression 439 static std::unique_ptr<ExprAST> ParseVarExpr() { 440 getNextToken(); // eat the var. 441 442 std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames; 443 444 // At least one variable name is required. 445 if (CurTok != tok_identifier) 446 return Error("expected identifier after var"); 447 448 while (1) { 449 std::string Name = IdentifierStr; 450 getNextToken(); // eat identifier. 451 452 // Read the optional initializer. 453 std::unique_ptr<ExprAST> Init = nullptr; 454 if (CurTok == '=') { 455 getNextToken(); // eat the '='. 456 457 Init = ParseExpression(); 458 if (!Init) 459 return nullptr; 460 } 461 462 VarNames.push_back(std::make_pair(Name, std::move(Init))); 463 464 // End of var list, exit loop. 465 if (CurTok != ',') 466 break; 467 getNextToken(); // eat the ','. 468 469 if (CurTok != tok_identifier) 470 return Error("expected identifier list after var"); 471 } 472 473 // At this point, we have to have 'in'. 474 if (CurTok != tok_in) 475 return Error("expected 'in' keyword after 'var'"); 476 getNextToken(); // eat 'in'. 477 478 auto Body = ParseExpression(); 479 if (!Body) 480 return nullptr; 481 482 return llvm::make_unique<VarExprAST>(std::move(VarNames), std::move(Body)); 483 } 484 485 /// primary 486 /// ::= identifierexpr 487 /// ::= numberexpr 488 /// ::= parenexpr 489 /// ::= ifexpr 490 /// ::= forexpr 491 /// ::= varexpr 492 static std::unique_ptr<ExprAST> ParsePrimary() { 493 switch (CurTok) { 494 default: 495 return Error("unknown token when expecting an expression"); 496 case tok_identifier: 497 return ParseIdentifierExpr(); 498 case tok_number: 499 return ParseNumberExpr(); 500 case '(': 501 return ParseParenExpr(); 502 case tok_if: 503 return ParseIfExpr(); 504 case tok_for: 505 return ParseForExpr(); 506 case tok_var: 507 return ParseVarExpr(); 508 } 509 } 510 511 /// unary 512 /// ::= primary 513 /// ::= '!' unary 514 static std::unique_ptr<ExprAST> ParseUnary() { 515 // If the current token is not an operator, it must be a primary expr. 516 if (!isascii(CurTok) || CurTok == '(' || CurTok == ',') 517 return ParsePrimary(); 518 519 // If this is a unary operator, read it. 520 int Opc = CurTok; 521 getNextToken(); 522 if (auto Operand = ParseUnary()) 523 return llvm::make_unique<UnaryExprAST>(Opc, std::move(Operand)); 524 return nullptr; 525 } 526 527 /// binoprhs 528 /// ::= ('+' unary)* 529 static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec, 530 std::unique_ptr<ExprAST> LHS) { 531 // If this is a binop, find its precedence. 532 while (1) { 533 int TokPrec = GetTokPrecedence(); 534 535 // If this is a binop that binds at least as tightly as the current binop, 536 // consume it, otherwise we are done. 537 if (TokPrec < ExprPrec) 538 return LHS; 539 540 // Okay, we know this is a binop. 541 int BinOp = CurTok; 542 getNextToken(); // eat binop 543 544 // Parse the unary expression after the binary operator. 545 auto RHS = ParseUnary(); 546 if (!RHS) 547 return nullptr; 548 549 // If BinOp binds less tightly with RHS than the operator after RHS, let 550 // the pending operator take RHS as its LHS. 551 int NextPrec = GetTokPrecedence(); 552 if (TokPrec < NextPrec) { 553 RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS)); 554 if (!RHS) 555 return nullptr; 556 } 557 558 // Merge LHS/RHS. 559 LHS = 560 llvm::make_unique<BinaryExprAST>(BinOp, std::move(LHS), std::move(RHS)); 561 } 562 } 563 564 /// expression 565 /// ::= unary binoprhs 566 /// 567 static std::unique_ptr<ExprAST> ParseExpression() { 568 auto LHS = ParseUnary(); 569 if (!LHS) 570 return nullptr; 571 572 return ParseBinOpRHS(0, std::move(LHS)); 573 } 574 575 /// prototype 576 /// ::= id '(' id* ')' 577 /// ::= binary LETTER number? (id, id) 578 /// ::= unary LETTER (id) 579 static std::unique_ptr<PrototypeAST> ParsePrototype() { 580 std::string FnName; 581 582 unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary. 583 unsigned BinaryPrecedence = 30; 584 585 switch (CurTok) { 586 default: 587 return ErrorP("Expected function name in prototype"); 588 case tok_identifier: 589 FnName = IdentifierStr; 590 Kind = 0; 591 getNextToken(); 592 break; 593 case tok_unary: 594 getNextToken(); 595 if (!isascii(CurTok)) 596 return ErrorP("Expected unary operator"); 597 FnName = "unary"; 598 FnName += (char)CurTok; 599 Kind = 1; 600 getNextToken(); 601 break; 602 case tok_binary: 603 getNextToken(); 604 if (!isascii(CurTok)) 605 return ErrorP("Expected binary operator"); 606 FnName = "binary"; 607 FnName += (char)CurTok; 608 Kind = 2; 609 getNextToken(); 610 611 // Read the precedence if present. 612 if (CurTok == tok_number) { 613 if (NumVal < 1 || NumVal > 100) 614 return ErrorP("Invalid precedecnce: must be 1..100"); 615 BinaryPrecedence = (unsigned)NumVal; 616 getNextToken(); 617 } 618 break; 619 } 620 621 if (CurTok != '(') 622 return ErrorP("Expected '(' in prototype"); 623 624 std::vector<std::string> ArgNames; 625 while (getNextToken() == tok_identifier) 626 ArgNames.push_back(IdentifierStr); 627 if (CurTok != ')') 628 return ErrorP("Expected ')' in prototype"); 629 630 // success. 631 getNextToken(); // eat ')'. 632 633 // Verify right number of names for operator. 634 if (Kind && ArgNames.size() != Kind) 635 return ErrorP("Invalid number of operands for operator"); 636 637 return llvm::make_unique<PrototypeAST>(FnName, ArgNames, Kind != 0, 638 BinaryPrecedence); 639 } 640 641 /// definition ::= 'def' prototype expression 642 static std::unique_ptr<FunctionAST> ParseDefinition() { 643 getNextToken(); // eat def. 644 auto Proto = ParsePrototype(); 645 if (!Proto) 646 return nullptr; 647 648 if (auto E = ParseExpression()) 649 return llvm::make_unique<FunctionAST>(std::move(Proto), std::move(E)); 650 return nullptr; 651 } 652 653 /// toplevelexpr ::= expression 654 static std::unique_ptr<FunctionAST> ParseTopLevelExpr() { 655 if (auto E = ParseExpression()) { 656 // Make an anonymous proto. 657 auto Proto = llvm::make_unique<PrototypeAST>("__anon_expr", 658 std::vector<std::string>()); 659 return llvm::make_unique<FunctionAST>(std::move(Proto), std::move(E)); 660 } 661 return nullptr; 662 } 663 664 /// external ::= 'extern' prototype 665 static std::unique_ptr<PrototypeAST> ParseExtern() { 666 getNextToken(); // eat extern. 667 return ParsePrototype(); 668 } 669 670 //===----------------------------------------------------------------------===// 671 // Code Generation 672 //===----------------------------------------------------------------------===// 673 674 static std::unique_ptr<Module> TheModule; 675 static IRBuilder<> Builder(getGlobalContext()); 676 static std::map<std::string, AllocaInst *> NamedValues; 677 static std::unique_ptr<legacy::FunctionPassManager> TheFPM; 678 static std::unique_ptr<KaleidoscopeJIT> TheJIT; 679 static std::map<std::string, std::unique_ptr<PrototypeAST>> FunctionProtos; 680 681 Value *ErrorV(const char *Str) { 682 Error(Str); 683 return nullptr; 684 } 685 686 Function *getFunction(std::string Name) { 687 // First, see if the function has already been added to the current module. 688 if (auto *F = TheModule->getFunction(Name)) 689 return F; 690 691 // If not, check whether we can codegen the declaration from some existing 692 // prototype. 693 auto FI = FunctionProtos.find(Name); 694 if (FI != FunctionProtos.end()) 695 return FI->second->codegen(); 696 697 // If no existing prototype exists, return null. 698 return nullptr; 699 } 700 701 /// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of 702 /// the function. This is used for mutable variables etc. 703 static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction, 704 const std::string &VarName) { 705 IRBuilder<> TmpB(&TheFunction->getEntryBlock(), 706 TheFunction->getEntryBlock().begin()); 707 return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), nullptr, 708 VarName.c_str()); 709 } 710 711 Value *NumberExprAST::codegen() { 712 return ConstantFP::get(getGlobalContext(), APFloat(Val)); 713 } 714 715 Value *VariableExprAST::codegen() { 716 // Look this variable up in the function. 717 Value *V = NamedValues[Name]; 718 if (!V) 719 return ErrorV("Unknown variable name"); 720 721 // Load the value. 722 return Builder.CreateLoad(V, Name.c_str()); 723 } 724 725 Value *UnaryExprAST::codegen() { 726 Value *OperandV = Operand->codegen(); 727 if (!OperandV) 728 return nullptr; 729 730 Function *F = getFunction(std::string("unary") + Opcode); 731 if (!F) 732 return ErrorV("Unknown unary operator"); 733 734 return Builder.CreateCall(F, OperandV, "unop"); 735 } 736 737 Value *BinaryExprAST::codegen() { 738 // Special case '=' because we don't want to emit the LHS as an expression. 739 if (Op == '=') { 740 // Assignment requires the LHS to be an identifier. 741 // This assume we're building without RTTI because LLVM builds that way by 742 // default. If you build LLVM with RTTI this can be changed to a 743 // dynamic_cast for automatic error checking. 744 VariableExprAST *LHSE = static_cast<VariableExprAST *>(LHS.get()); 745 if (!LHSE) 746 return ErrorV("destination of '=' must be a variable"); 747 // Codegen the RHS. 748 Value *Val = RHS->codegen(); 749 if (!Val) 750 return nullptr; 751 752 // Look up the name. 753 Value *Variable = NamedValues[LHSE->getName()]; 754 if (!Variable) 755 return ErrorV("Unknown variable name"); 756 757 Builder.CreateStore(Val, Variable); 758 return Val; 759 } 760 761 Value *L = LHS->codegen(); 762 Value *R = RHS->codegen(); 763 if (!L || !R) 764 return nullptr; 765 766 switch (Op) { 767 case '+': 768 return Builder.CreateFAdd(L, R, "addtmp"); 769 case '-': 770 return Builder.CreateFSub(L, R, "subtmp"); 771 case '*': 772 return Builder.CreateFMul(L, R, "multmp"); 773 case '<': 774 L = Builder.CreateFCmpULT(L, R, "cmptmp"); 775 // Convert bool 0/1 to double 0.0 or 1.0 776 return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()), 777 "booltmp"); 778 default: 779 break; 780 } 781 782 // If it wasn't a builtin binary operator, it must be a user defined one. Emit 783 // a call to it. 784 Function *F = getFunction(std::string("binary") + Op); 785 assert(F && "binary operator not found!"); 786 787 Value *Ops[] = {L, R}; 788 return Builder.CreateCall(F, Ops, "binop"); 789 } 790 791 Value *CallExprAST::codegen() { 792 // Look up the name in the global module table. 793 Function *CalleeF = getFunction(Callee); 794 if (!CalleeF) 795 return ErrorV("Unknown function referenced"); 796 797 // If argument mismatch error. 798 if (CalleeF->arg_size() != Args.size()) 799 return ErrorV("Incorrect # arguments passed"); 800 801 std::vector<Value *> ArgsV; 802 for (unsigned i = 0, e = Args.size(); i != e; ++i) { 803 ArgsV.push_back(Args[i]->codegen()); 804 if (!ArgsV.back()) 805 return nullptr; 806 } 807 808 return Builder.CreateCall(CalleeF, ArgsV, "calltmp"); 809 } 810 811 Value *IfExprAST::codegen() { 812 Value *CondV = Cond->codegen(); 813 if (!CondV) 814 return nullptr; 815 816 // Convert condition to a bool by comparing equal to 0.0. 817 CondV = Builder.CreateFCmpONE( 818 CondV, ConstantFP::get(getGlobalContext(), APFloat(0.0)), "ifcond"); 819 820 Function *TheFunction = Builder.GetInsertBlock()->getParent(); 821 822 // Create blocks for the then and else cases. Insert the 'then' block at the 823 // end of the function. 824 BasicBlock *ThenBB = 825 BasicBlock::Create(getGlobalContext(), "then", TheFunction); 826 BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else"); 827 BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont"); 828 829 Builder.CreateCondBr(CondV, ThenBB, ElseBB); 830 831 // Emit then value. 832 Builder.SetInsertPoint(ThenBB); 833 834 Value *ThenV = Then->codegen(); 835 if (!ThenV) 836 return nullptr; 837 838 Builder.CreateBr(MergeBB); 839 // Codegen of 'Then' can change the current block, update ThenBB for the PHI. 840 ThenBB = Builder.GetInsertBlock(); 841 842 // Emit else block. 843 TheFunction->getBasicBlockList().push_back(ElseBB); 844 Builder.SetInsertPoint(ElseBB); 845 846 Value *ElseV = Else->codegen(); 847 if (!ElseV) 848 return nullptr; 849 850 Builder.CreateBr(MergeBB); 851 // Codegen of 'Else' can change the current block, update ElseBB for the PHI. 852 ElseBB = Builder.GetInsertBlock(); 853 854 // Emit merge block. 855 TheFunction->getBasicBlockList().push_back(MergeBB); 856 Builder.SetInsertPoint(MergeBB); 857 PHINode *PN = 858 Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, "iftmp"); 859 860 PN->addIncoming(ThenV, ThenBB); 861 PN->addIncoming(ElseV, ElseBB); 862 return PN; 863 } 864 865 // Output for-loop as: 866 // var = alloca double 867 // ... 868 // start = startexpr 869 // store start -> var 870 // goto loop 871 // loop: 872 // ... 873 // bodyexpr 874 // ... 875 // loopend: 876 // step = stepexpr 877 // endcond = endexpr 878 // 879 // curvar = load var 880 // nextvar = curvar + step 881 // store nextvar -> var 882 // br endcond, loop, endloop 883 // outloop: 884 Value *ForExprAST::codegen() { 885 Function *TheFunction = Builder.GetInsertBlock()->getParent(); 886 887 // Create an alloca for the variable in the entry block. 888 AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName); 889 890 // Emit the start code first, without 'variable' in scope. 891 Value *StartVal = Start->codegen(); 892 if (!StartVal) 893 return nullptr; 894 895 // Store the value into the alloca. 896 Builder.CreateStore(StartVal, Alloca); 897 898 // Make the new basic block for the loop header, inserting after current 899 // block. 900 BasicBlock *LoopBB = 901 BasicBlock::Create(getGlobalContext(), "loop", TheFunction); 902 903 // Insert an explicit fall through from the current block to the LoopBB. 904 Builder.CreateBr(LoopBB); 905 906 // Start insertion in LoopBB. 907 Builder.SetInsertPoint(LoopBB); 908 909 // Within the loop, the variable is defined equal to the PHI node. If it 910 // shadows an existing variable, we have to restore it, so save it now. 911 AllocaInst *OldVal = NamedValues[VarName]; 912 NamedValues[VarName] = Alloca; 913 914 // Emit the body of the loop. This, like any other expr, can change the 915 // current BB. Note that we ignore the value computed by the body, but don't 916 // allow an error. 917 if (!Body->codegen()) 918 return nullptr; 919 920 // Emit the step value. 921 Value *StepVal = nullptr; 922 if (Step) { 923 StepVal = Step->codegen(); 924 if (!StepVal) 925 return nullptr; 926 } else { 927 // If not specified, use 1.0. 928 StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0)); 929 } 930 931 // Compute the end condition. 932 Value *EndCond = End->codegen(); 933 if (!EndCond) 934 return nullptr; 935 936 // Reload, increment, and restore the alloca. This handles the case where 937 // the body of the loop mutates the variable. 938 Value *CurVar = Builder.CreateLoad(Alloca, VarName.c_str()); 939 Value *NextVar = Builder.CreateFAdd(CurVar, StepVal, "nextvar"); 940 Builder.CreateStore(NextVar, Alloca); 941 942 // Convert condition to a bool by comparing equal to 0.0. 943 EndCond = Builder.CreateFCmpONE( 944 EndCond, ConstantFP::get(getGlobalContext(), APFloat(0.0)), "loopcond"); 945 946 // Create the "after loop" block and insert it. 947 BasicBlock *AfterBB = 948 BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction); 949 950 // Insert the conditional branch into the end of LoopEndBB. 951 Builder.CreateCondBr(EndCond, LoopBB, AfterBB); 952 953 // Any new code will be inserted in AfterBB. 954 Builder.SetInsertPoint(AfterBB); 955 956 // Restore the unshadowed variable. 957 if (OldVal) 958 NamedValues[VarName] = OldVal; 959 else 960 NamedValues.erase(VarName); 961 962 // for expr always returns 0.0. 963 return Constant::getNullValue(Type::getDoubleTy(getGlobalContext())); 964 } 965 966 Value *VarExprAST::codegen() { 967 std::vector<AllocaInst *> OldBindings; 968 969 Function *TheFunction = Builder.GetInsertBlock()->getParent(); 970 971 // Register all variables and emit their initializer. 972 for (unsigned i = 0, e = VarNames.size(); i != e; ++i) { 973 const std::string &VarName = VarNames[i].first; 974 ExprAST *Init = VarNames[i].second.get(); 975 976 // Emit the initializer before adding the variable to scope, this prevents 977 // the initializer from referencing the variable itself, and permits stuff 978 // like this: 979 // var a = 1 in 980 // var a = a in ... # refers to outer 'a'. 981 Value *InitVal; 982 if (Init) { 983 InitVal = Init->codegen(); 984 if (!InitVal) 985 return nullptr; 986 } else { // If not specified, use 0.0. 987 InitVal = ConstantFP::get(getGlobalContext(), APFloat(0.0)); 988 } 989 990 AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName); 991 Builder.CreateStore(InitVal, Alloca); 992 993 // Remember the old variable binding so that we can restore the binding when 994 // we unrecurse. 995 OldBindings.push_back(NamedValues[VarName]); 996 997 // Remember this binding. 998 NamedValues[VarName] = Alloca; 999 } 1000 1001 // Codegen the body, now that all vars are in scope. 1002 Value *BodyVal = Body->codegen(); 1003 if (!BodyVal) 1004 return nullptr; 1005 1006 // Pop all our variables from scope. 1007 for (unsigned i = 0, e = VarNames.size(); i != e; ++i) 1008 NamedValues[VarNames[i].first] = OldBindings[i]; 1009 1010 // Return the body computation. 1011 return BodyVal; 1012 } 1013 1014 Function *PrototypeAST::codegen() { 1015 // Make the function type: double(double,double) etc. 1016 std::vector<Type *> Doubles(Args.size(), 1017 Type::getDoubleTy(getGlobalContext())); 1018 FunctionType *FT = 1019 FunctionType::get(Type::getDoubleTy(getGlobalContext()), Doubles, false); 1020 1021 Function *F = 1022 Function::Create(FT, Function::ExternalLinkage, Name, TheModule.get()); 1023 1024 // Set names for all arguments. 1025 unsigned Idx = 0; 1026 for (auto &Arg : F->args()) 1027 Arg.setName(Args[Idx++]); 1028 1029 return F; 1030 } 1031 1032 Function *FunctionAST::codegen() { 1033 // Transfer ownership of the prototype to the FunctionProtos map, but keep a 1034 // reference to it for use below. 1035 auto &P = *Proto; 1036 FunctionProtos[Proto->getName()] = std::move(Proto); 1037 Function *TheFunction = getFunction(P.getName()); 1038 if (!TheFunction) 1039 return nullptr; 1040 1041 // If this is an operator, install it. 1042 if (P.isBinaryOp()) 1043 BinopPrecedence[P.getOperatorName()] = P.getBinaryPrecedence(); 1044 1045 // Create a new basic block to start insertion into. 1046 BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction); 1047 Builder.SetInsertPoint(BB); 1048 1049 // Record the function arguments in the NamedValues map. 1050 NamedValues.clear(); 1051 for (auto &Arg : TheFunction->args()) { 1052 // Create an alloca for this variable. 1053 AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, Arg.getName()); 1054 1055 // Store the initial value into the alloca. 1056 Builder.CreateStore(&Arg, Alloca); 1057 1058 // Add arguments to variable symbol table. 1059 NamedValues[Arg.getName()] = Alloca; 1060 } 1061 1062 if (Value *RetVal = Body->codegen()) { 1063 // Finish off the function. 1064 Builder.CreateRet(RetVal); 1065 1066 // Validate the generated code, checking for consistency. 1067 verifyFunction(*TheFunction); 1068 1069 // Run the optimizer on the function. 1070 TheFPM->run(*TheFunction); 1071 1072 return TheFunction; 1073 } 1074 1075 // Error reading body, remove function. 1076 TheFunction->eraseFromParent(); 1077 1078 if (P.isBinaryOp()) 1079 BinopPrecedence.erase(Proto->getOperatorName()); 1080 return nullptr; 1081 } 1082 1083 //===----------------------------------------------------------------------===// 1084 // Top-Level parsing and JIT Driver 1085 //===----------------------------------------------------------------------===// 1086 1087 static void InitializeModuleAndPassManager() { 1088 // Open a new module. 1089 TheModule = llvm::make_unique<Module>("my cool jit", getGlobalContext()); 1090 TheModule->setDataLayout(TheJIT->getTargetMachine().createDataLayout()); 1091 1092 // Create a new pass manager attached to it. 1093 TheFPM = llvm::make_unique<legacy::FunctionPassManager>(TheModule.get()); 1094 1095 // Do simple "peephole" optimizations and bit-twiddling optzns. 1096 TheFPM->add(createInstructionCombiningPass()); 1097 // Reassociate expressions. 1098 TheFPM->add(createReassociatePass()); 1099 // Eliminate Common SubExpressions. 1100 TheFPM->add(createGVNPass()); 1101 // Simplify the control flow graph (deleting unreachable blocks, etc). 1102 TheFPM->add(createCFGSimplificationPass()); 1103 1104 TheFPM->doInitialization(); 1105 } 1106 1107 static void HandleDefinition() { 1108 if (auto FnAST = ParseDefinition()) { 1109 if (auto *FnIR = FnAST->codegen()) { 1110 fprintf(stderr, "Read function definition:"); 1111 FnIR->dump(); 1112 TheJIT->addModule(std::move(TheModule)); 1113 InitializeModuleAndPassManager(); 1114 } 1115 } else { 1116 // Skip token for error recovery. 1117 getNextToken(); 1118 } 1119 } 1120 1121 static void HandleExtern() { 1122 if (auto ProtoAST = ParseExtern()) { 1123 if (auto *FnIR = ProtoAST->codegen()) { 1124 fprintf(stderr, "Read extern: "); 1125 FnIR->dump(); 1126 FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST); 1127 } 1128 } else { 1129 // Skip token for error recovery. 1130 getNextToken(); 1131 } 1132 } 1133 1134 static void HandleTopLevelExpression() { 1135 // Evaluate a top-level expression into an anonymous function. 1136 if (auto FnAST = ParseTopLevelExpr()) { 1137 if (FnAST->codegen()) { 1138 1139 // JIT the module containing the anonymous expression, keeping a handle so 1140 // we can free it later. 1141 auto H = TheJIT->addModule(std::move(TheModule)); 1142 InitializeModuleAndPassManager(); 1143 1144 // Search the JIT for the __anon_expr symbol. 1145 auto ExprSymbol = TheJIT->findSymbol("__anon_expr"); 1146 assert(ExprSymbol && "Function not found"); 1147 1148 // Get the symbol's address and cast it to the right type (takes no 1149 // arguments, returns a double) so we can call it as a native function. 1150 double (*FP)() = (double (*)())(intptr_t)ExprSymbol.getAddress(); 1151 fprintf(stderr, "Evaluated to %f\n", FP()); 1152 1153 // Delete the anonymous expression module from the JIT. 1154 TheJIT->removeModule(H); 1155 } 1156 } else { 1157 // Skip token for error recovery. 1158 getNextToken(); 1159 } 1160 } 1161 1162 /// top ::= definition | external | expression | ';' 1163 static void MainLoop() { 1164 while (1) { 1165 fprintf(stderr, "ready> "); 1166 switch (CurTok) { 1167 case tok_eof: 1168 return; 1169 case ';': // ignore top-level semicolons. 1170 getNextToken(); 1171 break; 1172 case tok_def: 1173 HandleDefinition(); 1174 break; 1175 case tok_extern: 1176 HandleExtern(); 1177 break; 1178 default: 1179 HandleTopLevelExpression(); 1180 break; 1181 } 1182 } 1183 } 1184 1185 //===----------------------------------------------------------------------===// 1186 // "Library" functions that can be "extern'd" from user code. 1187 //===----------------------------------------------------------------------===// 1188 1189 /// putchard - putchar that takes a double and returns 0. 1190 extern "C" double putchard(double X) { 1191 fputc((char)X, stderr); 1192 return 0; 1193 } 1194 1195 /// printd - printf that takes a double prints it as "%f\n", returning 0. 1196 extern "C" double printd(double X) { 1197 fprintf(stderr, "%f\n", X); 1198 return 0; 1199 } 1200 1201 //===----------------------------------------------------------------------===// 1202 // Main driver code. 1203 //===----------------------------------------------------------------------===// 1204 1205 int main() { 1206 InitializeNativeTarget(); 1207 InitializeNativeTargetAsmPrinter(); 1208 InitializeNativeTargetAsmParser(); 1209 1210 // Install standard binary operators. 1211 // 1 is lowest precedence. 1212 BinopPrecedence['='] = 2; 1213 BinopPrecedence['<'] = 10; 1214 BinopPrecedence['+'] = 20; 1215 BinopPrecedence['-'] = 20; 1216 BinopPrecedence['*'] = 40; // highest. 1217 1218 // Prime the first token. 1219 fprintf(stderr, "ready> "); 1220 getNextToken(); 1221 1222 TheJIT = llvm::make_unique<KaleidoscopeJIT>(); 1223 1224 InitializeModuleAndPassManager(); 1225 1226 // Run the main "interpreter loop" now. 1227 MainLoop(); 1228 1229 return 0; 1230 } 1231