1 #include "llvm/ADT/APFloat.h" 2 #include "llvm/ADT/STLExtras.h" 3 #include "llvm/IR/BasicBlock.h" 4 #include "llvm/IR/Constants.h" 5 #include "llvm/IR/DerivedTypes.h" 6 #include "llvm/IR/Function.h" 7 #include "llvm/IR/Instructions.h" 8 #include "llvm/IR/IRBuilder.h" 9 #include "llvm/IR/LLVMContext.h" 10 #include "llvm/IR/LegacyPassManager.h" 11 #include "llvm/IR/Module.h" 12 #include "llvm/IR/Type.h" 13 #include "llvm/IR/Verifier.h" 14 #include "llvm/Support/TargetSelect.h" 15 #include "llvm/Target/TargetMachine.h" 16 #include "llvm/Transforms/Scalar.h" 17 #include "llvm/Transforms/Scalar/GVN.h" 18 #include "../include/KaleidoscopeJIT.h" 19 #include <cassert> 20 #include <cctype> 21 #include <cstdint> 22 #include <cstdio> 23 #include <cstdlib> 24 #include <map> 25 #include <memory> 26 #include <string> 27 #include <utility> 28 #include <vector> 29 30 using namespace llvm; 31 using namespace llvm::orc; 32 33 //===----------------------------------------------------------------------===// 34 // Lexer 35 //===----------------------------------------------------------------------===// 36 37 // The lexer returns tokens [0-255] if it is an unknown character, otherwise one 38 // of these for known things. 39 enum Token { 40 tok_eof = -1, 41 42 // commands 43 tok_def = -2, 44 tok_extern = -3, 45 46 // primary 47 tok_identifier = -4, 48 tok_number = -5, 49 50 // control 51 tok_if = -6, 52 tok_then = -7, 53 tok_else = -8, 54 tok_for = -9, 55 tok_in = -10, 56 57 // operators 58 tok_binary = -11, 59 tok_unary = -12, 60 61 // var definition 62 tok_var = -13 63 }; 64 65 static std::string IdentifierStr; // Filled in if tok_identifier 66 static double NumVal; // Filled in if tok_number 67 68 /// gettok - Return the next token from standard input. 69 static int gettok() { 70 static int LastChar = ' '; 71 72 // Skip any whitespace. 73 while (isspace(LastChar)) 74 LastChar = getchar(); 75 76 if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* 77 IdentifierStr = LastChar; 78 while (isalnum((LastChar = getchar()))) 79 IdentifierStr += LastChar; 80 81 if (IdentifierStr == "def") 82 return tok_def; 83 if (IdentifierStr == "extern") 84 return tok_extern; 85 if (IdentifierStr == "if") 86 return tok_if; 87 if (IdentifierStr == "then") 88 return tok_then; 89 if (IdentifierStr == "else") 90 return tok_else; 91 if (IdentifierStr == "for") 92 return tok_for; 93 if (IdentifierStr == "in") 94 return tok_in; 95 if (IdentifierStr == "binary") 96 return tok_binary; 97 if (IdentifierStr == "unary") 98 return tok_unary; 99 if (IdentifierStr == "var") 100 return tok_var; 101 return tok_identifier; 102 } 103 104 if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ 105 std::string NumStr; 106 do { 107 NumStr += LastChar; 108 LastChar = getchar(); 109 } while (isdigit(LastChar) || LastChar == '.'); 110 111 NumVal = strtod(NumStr.c_str(), nullptr); 112 return tok_number; 113 } 114 115 if (LastChar == '#') { 116 // Comment until end of line. 117 do 118 LastChar = getchar(); 119 while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); 120 121 if (LastChar != EOF) 122 return gettok(); 123 } 124 125 // Check for end of file. Don't eat the EOF. 126 if (LastChar == EOF) 127 return tok_eof; 128 129 // Otherwise, just return the character as its ascii value. 130 int ThisChar = LastChar; 131 LastChar = getchar(); 132 return ThisChar; 133 } 134 135 //===----------------------------------------------------------------------===// 136 // Abstract Syntax Tree (aka Parse Tree) 137 //===----------------------------------------------------------------------===// 138 namespace { 139 /// ExprAST - Base class for all expression nodes. 140 class ExprAST { 141 public: 142 virtual ~ExprAST() {} 143 virtual Value *codegen() = 0; 144 }; 145 146 /// NumberExprAST - Expression class for numeric literals like "1.0". 147 class NumberExprAST : public ExprAST { 148 double Val; 149 150 public: 151 NumberExprAST(double Val) : Val(Val) {} 152 Value *codegen() override; 153 }; 154 155 /// VariableExprAST - Expression class for referencing a variable, like "a". 156 class VariableExprAST : public ExprAST { 157 std::string Name; 158 159 public: 160 VariableExprAST(const std::string &Name) : Name(Name) {} 161 const std::string &getName() const { return Name; } 162 Value *codegen() override; 163 }; 164 165 /// UnaryExprAST - Expression class for a unary operator. 166 class UnaryExprAST : public ExprAST { 167 char Opcode; 168 std::unique_ptr<ExprAST> Operand; 169 170 public: 171 UnaryExprAST(char Opcode, std::unique_ptr<ExprAST> Operand) 172 : Opcode(Opcode), Operand(std::move(Operand)) {} 173 Value *codegen() override; 174 }; 175 176 /// BinaryExprAST - Expression class for a binary operator. 177 class BinaryExprAST : public ExprAST { 178 char Op; 179 std::unique_ptr<ExprAST> LHS, RHS; 180 181 public: 182 BinaryExprAST(char Op, std::unique_ptr<ExprAST> LHS, 183 std::unique_ptr<ExprAST> RHS) 184 : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {} 185 Value *codegen() override; 186 }; 187 188 /// CallExprAST - Expression class for function calls. 189 class CallExprAST : public ExprAST { 190 std::string Callee; 191 std::vector<std::unique_ptr<ExprAST>> Args; 192 193 public: 194 CallExprAST(const std::string &Callee, 195 std::vector<std::unique_ptr<ExprAST>> Args) 196 : Callee(Callee), Args(std::move(Args)) {} 197 Value *codegen() override; 198 }; 199 200 /// IfExprAST - Expression class for if/then/else. 201 class IfExprAST : public ExprAST { 202 std::unique_ptr<ExprAST> Cond, Then, Else; 203 204 public: 205 IfExprAST(std::unique_ptr<ExprAST> Cond, std::unique_ptr<ExprAST> Then, 206 std::unique_ptr<ExprAST> Else) 207 : Cond(std::move(Cond)), Then(std::move(Then)), Else(std::move(Else)) {} 208 Value *codegen() override; 209 }; 210 211 /// ForExprAST - Expression class for for/in. 212 class ForExprAST : public ExprAST { 213 std::string VarName; 214 std::unique_ptr<ExprAST> Start, End, Step, Body; 215 216 public: 217 ForExprAST(const std::string &VarName, std::unique_ptr<ExprAST> Start, 218 std::unique_ptr<ExprAST> End, std::unique_ptr<ExprAST> Step, 219 std::unique_ptr<ExprAST> Body) 220 : VarName(VarName), Start(std::move(Start)), End(std::move(End)), 221 Step(std::move(Step)), Body(std::move(Body)) {} 222 Value *codegen() override; 223 }; 224 225 /// VarExprAST - Expression class for var/in 226 class VarExprAST : public ExprAST { 227 std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames; 228 std::unique_ptr<ExprAST> Body; 229 230 public: 231 VarExprAST( 232 std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames, 233 std::unique_ptr<ExprAST> Body) 234 : VarNames(std::move(VarNames)), Body(std::move(Body)) {} 235 Value *codegen() override; 236 }; 237 238 /// PrototypeAST - This class represents the "prototype" for a function, 239 /// which captures its name, and its argument names (thus implicitly the number 240 /// of arguments the function takes), as well as if it is an operator. 241 class PrototypeAST { 242 std::string Name; 243 std::vector<std::string> Args; 244 bool IsOperator; 245 unsigned Precedence; // Precedence if a binary op. 246 247 public: 248 PrototypeAST(const std::string &Name, std::vector<std::string> Args, 249 bool IsOperator = false, unsigned Prec = 0) 250 : Name(Name), Args(std::move(Args)), IsOperator(IsOperator), 251 Precedence(Prec) {} 252 Function *codegen(); 253 const std::string &getName() const { return Name; } 254 255 bool isUnaryOp() const { return IsOperator && Args.size() == 1; } 256 bool isBinaryOp() const { return IsOperator && Args.size() == 2; } 257 258 char getOperatorName() const { 259 assert(isUnaryOp() || isBinaryOp()); 260 return Name[Name.size() - 1]; 261 } 262 263 unsigned getBinaryPrecedence() const { return Precedence; } 264 }; 265 266 /// FunctionAST - This class represents a function definition itself. 267 class FunctionAST { 268 std::unique_ptr<PrototypeAST> Proto; 269 std::unique_ptr<ExprAST> Body; 270 271 public: 272 FunctionAST(std::unique_ptr<PrototypeAST> Proto, 273 std::unique_ptr<ExprAST> Body) 274 : Proto(std::move(Proto)), Body(std::move(Body)) {} 275 Function *codegen(); 276 }; 277 } // end anonymous namespace 278 279 //===----------------------------------------------------------------------===// 280 // Parser 281 //===----------------------------------------------------------------------===// 282 283 /// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current 284 /// token the parser is looking at. getNextToken reads another token from the 285 /// lexer and updates CurTok with its results. 286 static int CurTok; 287 static int getNextToken() { return CurTok = gettok(); } 288 289 /// BinopPrecedence - This holds the precedence for each binary operator that is 290 /// defined. 291 static std::map<char, int> BinopPrecedence; 292 293 /// GetTokPrecedence - Get the precedence of the pending binary operator token. 294 static int GetTokPrecedence() { 295 if (!isascii(CurTok)) 296 return -1; 297 298 // Make sure it's a declared binop. 299 int TokPrec = BinopPrecedence[CurTok]; 300 if (TokPrec <= 0) 301 return -1; 302 return TokPrec; 303 } 304 305 /// LogError* - These are little helper functions for error handling. 306 std::unique_ptr<ExprAST> LogError(const char *Str) { 307 fprintf(stderr, "Error: %s\n", Str); 308 return nullptr; 309 } 310 311 std::unique_ptr<PrototypeAST> LogErrorP(const char *Str) { 312 LogError(Str); 313 return nullptr; 314 } 315 316 static std::unique_ptr<ExprAST> ParseExpression(); 317 318 /// numberexpr ::= number 319 static std::unique_ptr<ExprAST> ParseNumberExpr() { 320 auto Result = llvm::make_unique<NumberExprAST>(NumVal); 321 getNextToken(); // consume the number 322 return std::move(Result); 323 } 324 325 /// parenexpr ::= '(' expression ')' 326 static std::unique_ptr<ExprAST> ParseParenExpr() { 327 getNextToken(); // eat (. 328 auto V = ParseExpression(); 329 if (!V) 330 return nullptr; 331 332 if (CurTok != ')') 333 return LogError("expected ')'"); 334 getNextToken(); // eat ). 335 return V; 336 } 337 338 /// identifierexpr 339 /// ::= identifier 340 /// ::= identifier '(' expression* ')' 341 static std::unique_ptr<ExprAST> ParseIdentifierExpr() { 342 std::string IdName = IdentifierStr; 343 344 getNextToken(); // eat identifier. 345 346 if (CurTok != '(') // Simple variable ref. 347 return llvm::make_unique<VariableExprAST>(IdName); 348 349 // Call. 350 getNextToken(); // eat ( 351 std::vector<std::unique_ptr<ExprAST>> Args; 352 if (CurTok != ')') { 353 while (true) { 354 if (auto Arg = ParseExpression()) 355 Args.push_back(std::move(Arg)); 356 else 357 return nullptr; 358 359 if (CurTok == ')') 360 break; 361 362 if (CurTok != ',') 363 return LogError("Expected ')' or ',' in argument list"); 364 getNextToken(); 365 } 366 } 367 368 // Eat the ')'. 369 getNextToken(); 370 371 return llvm::make_unique<CallExprAST>(IdName, std::move(Args)); 372 } 373 374 /// ifexpr ::= 'if' expression 'then' expression 'else' expression 375 static std::unique_ptr<ExprAST> ParseIfExpr() { 376 getNextToken(); // eat the if. 377 378 // condition. 379 auto Cond = ParseExpression(); 380 if (!Cond) 381 return nullptr; 382 383 if (CurTok != tok_then) 384 return LogError("expected then"); 385 getNextToken(); // eat the then 386 387 auto Then = ParseExpression(); 388 if (!Then) 389 return nullptr; 390 391 if (CurTok != tok_else) 392 return LogError("expected else"); 393 394 getNextToken(); 395 396 auto Else = ParseExpression(); 397 if (!Else) 398 return nullptr; 399 400 return llvm::make_unique<IfExprAST>(std::move(Cond), std::move(Then), 401 std::move(Else)); 402 } 403 404 /// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression 405 static std::unique_ptr<ExprAST> ParseForExpr() { 406 getNextToken(); // eat the for. 407 408 if (CurTok != tok_identifier) 409 return LogError("expected identifier after for"); 410 411 std::string IdName = IdentifierStr; 412 getNextToken(); // eat identifier. 413 414 if (CurTok != '=') 415 return LogError("expected '=' after for"); 416 getNextToken(); // eat '='. 417 418 auto Start = ParseExpression(); 419 if (!Start) 420 return nullptr; 421 if (CurTok != ',') 422 return LogError("expected ',' after for start value"); 423 getNextToken(); 424 425 auto End = ParseExpression(); 426 if (!End) 427 return nullptr; 428 429 // The step value is optional. 430 std::unique_ptr<ExprAST> Step; 431 if (CurTok == ',') { 432 getNextToken(); 433 Step = ParseExpression(); 434 if (!Step) 435 return nullptr; 436 } 437 438 if (CurTok != tok_in) 439 return LogError("expected 'in' after for"); 440 getNextToken(); // eat 'in'. 441 442 auto Body = ParseExpression(); 443 if (!Body) 444 return nullptr; 445 446 return llvm::make_unique<ForExprAST>(IdName, std::move(Start), std::move(End), 447 std::move(Step), std::move(Body)); 448 } 449 450 /// varexpr ::= 'var' identifier ('=' expression)? 451 // (',' identifier ('=' expression)?)* 'in' expression 452 static std::unique_ptr<ExprAST> ParseVarExpr() { 453 getNextToken(); // eat the var. 454 455 std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames; 456 457 // At least one variable name is required. 458 if (CurTok != tok_identifier) 459 return LogError("expected identifier after var"); 460 461 while (true) { 462 std::string Name = IdentifierStr; 463 getNextToken(); // eat identifier. 464 465 // Read the optional initializer. 466 std::unique_ptr<ExprAST> Init = nullptr; 467 if (CurTok == '=') { 468 getNextToken(); // eat the '='. 469 470 Init = ParseExpression(); 471 if (!Init) 472 return nullptr; 473 } 474 475 VarNames.push_back(std::make_pair(Name, std::move(Init))); 476 477 // End of var list, exit loop. 478 if (CurTok != ',') 479 break; 480 getNextToken(); // eat the ','. 481 482 if (CurTok != tok_identifier) 483 return LogError("expected identifier list after var"); 484 } 485 486 // At this point, we have to have 'in'. 487 if (CurTok != tok_in) 488 return LogError("expected 'in' keyword after 'var'"); 489 getNextToken(); // eat 'in'. 490 491 auto Body = ParseExpression(); 492 if (!Body) 493 return nullptr; 494 495 return llvm::make_unique<VarExprAST>(std::move(VarNames), std::move(Body)); 496 } 497 498 /// primary 499 /// ::= identifierexpr 500 /// ::= numberexpr 501 /// ::= parenexpr 502 /// ::= ifexpr 503 /// ::= forexpr 504 /// ::= varexpr 505 static std::unique_ptr<ExprAST> ParsePrimary() { 506 switch (CurTok) { 507 default: 508 return LogError("unknown token when expecting an expression"); 509 case tok_identifier: 510 return ParseIdentifierExpr(); 511 case tok_number: 512 return ParseNumberExpr(); 513 case '(': 514 return ParseParenExpr(); 515 case tok_if: 516 return ParseIfExpr(); 517 case tok_for: 518 return ParseForExpr(); 519 case tok_var: 520 return ParseVarExpr(); 521 } 522 } 523 524 /// unary 525 /// ::= primary 526 /// ::= '!' unary 527 static std::unique_ptr<ExprAST> ParseUnary() { 528 // If the current token is not an operator, it must be a primary expr. 529 if (!isascii(CurTok) || CurTok == '(' || CurTok == ',') 530 return ParsePrimary(); 531 532 // If this is a unary operator, read it. 533 int Opc = CurTok; 534 getNextToken(); 535 if (auto Operand = ParseUnary()) 536 return llvm::make_unique<UnaryExprAST>(Opc, std::move(Operand)); 537 return nullptr; 538 } 539 540 /// binoprhs 541 /// ::= ('+' unary)* 542 static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec, 543 std::unique_ptr<ExprAST> LHS) { 544 // If this is a binop, find its precedence. 545 while (true) { 546 int TokPrec = GetTokPrecedence(); 547 548 // If this is a binop that binds at least as tightly as the current binop, 549 // consume it, otherwise we are done. 550 if (TokPrec < ExprPrec) 551 return LHS; 552 553 // Okay, we know this is a binop. 554 int BinOp = CurTok; 555 getNextToken(); // eat binop 556 557 // Parse the unary expression after the binary operator. 558 auto RHS = ParseUnary(); 559 if (!RHS) 560 return nullptr; 561 562 // If BinOp binds less tightly with RHS than the operator after RHS, let 563 // the pending operator take RHS as its LHS. 564 int NextPrec = GetTokPrecedence(); 565 if (TokPrec < NextPrec) { 566 RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS)); 567 if (!RHS) 568 return nullptr; 569 } 570 571 // Merge LHS/RHS. 572 LHS = 573 llvm::make_unique<BinaryExprAST>(BinOp, std::move(LHS), std::move(RHS)); 574 } 575 } 576 577 /// expression 578 /// ::= unary binoprhs 579 /// 580 static std::unique_ptr<ExprAST> ParseExpression() { 581 auto LHS = ParseUnary(); 582 if (!LHS) 583 return nullptr; 584 585 return ParseBinOpRHS(0, std::move(LHS)); 586 } 587 588 /// prototype 589 /// ::= id '(' id* ')' 590 /// ::= binary LETTER number? (id, id) 591 /// ::= unary LETTER (id) 592 static std::unique_ptr<PrototypeAST> ParsePrototype() { 593 std::string FnName; 594 595 unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary. 596 unsigned BinaryPrecedence = 30; 597 598 switch (CurTok) { 599 default: 600 return LogErrorP("Expected function name in prototype"); 601 case tok_identifier: 602 FnName = IdentifierStr; 603 Kind = 0; 604 getNextToken(); 605 break; 606 case tok_unary: 607 getNextToken(); 608 if (!isascii(CurTok)) 609 return LogErrorP("Expected unary operator"); 610 FnName = "unary"; 611 FnName += (char)CurTok; 612 Kind = 1; 613 getNextToken(); 614 break; 615 case tok_binary: 616 getNextToken(); 617 if (!isascii(CurTok)) 618 return LogErrorP("Expected binary operator"); 619 FnName = "binary"; 620 FnName += (char)CurTok; 621 Kind = 2; 622 getNextToken(); 623 624 // Read the precedence if present. 625 if (CurTok == tok_number) { 626 if (NumVal < 1 || NumVal > 100) 627 return LogErrorP("Invalid precedecnce: must be 1..100"); 628 BinaryPrecedence = (unsigned)NumVal; 629 getNextToken(); 630 } 631 break; 632 } 633 634 if (CurTok != '(') 635 return LogErrorP("Expected '(' in prototype"); 636 637 std::vector<std::string> ArgNames; 638 while (getNextToken() == tok_identifier) 639 ArgNames.push_back(IdentifierStr); 640 if (CurTok != ')') 641 return LogErrorP("Expected ')' in prototype"); 642 643 // success. 644 getNextToken(); // eat ')'. 645 646 // Verify right number of names for operator. 647 if (Kind && ArgNames.size() != Kind) 648 return LogErrorP("Invalid number of operands for operator"); 649 650 return llvm::make_unique<PrototypeAST>(FnName, ArgNames, Kind != 0, 651 BinaryPrecedence); 652 } 653 654 /// definition ::= 'def' prototype expression 655 static std::unique_ptr<FunctionAST> ParseDefinition() { 656 getNextToken(); // eat def. 657 auto Proto = ParsePrototype(); 658 if (!Proto) 659 return nullptr; 660 661 if (auto E = ParseExpression()) 662 return llvm::make_unique<FunctionAST>(std::move(Proto), std::move(E)); 663 return nullptr; 664 } 665 666 /// toplevelexpr ::= expression 667 static std::unique_ptr<FunctionAST> ParseTopLevelExpr() { 668 if (auto E = ParseExpression()) { 669 // Make an anonymous proto. 670 auto Proto = llvm::make_unique<PrototypeAST>("__anon_expr", 671 std::vector<std::string>()); 672 return llvm::make_unique<FunctionAST>(std::move(Proto), std::move(E)); 673 } 674 return nullptr; 675 } 676 677 /// external ::= 'extern' prototype 678 static std::unique_ptr<PrototypeAST> ParseExtern() { 679 getNextToken(); // eat extern. 680 return ParsePrototype(); 681 } 682 683 //===----------------------------------------------------------------------===// 684 // Code Generation 685 //===----------------------------------------------------------------------===// 686 687 static LLVMContext TheContext; 688 static IRBuilder<> Builder(TheContext); 689 static std::unique_ptr<Module> TheModule; 690 static std::map<std::string, AllocaInst *> NamedValues; 691 static std::unique_ptr<legacy::FunctionPassManager> TheFPM; 692 static std::unique_ptr<KaleidoscopeJIT> TheJIT; 693 static std::map<std::string, std::unique_ptr<PrototypeAST>> FunctionProtos; 694 695 Value *LogErrorV(const char *Str) { 696 LogError(Str); 697 return nullptr; 698 } 699 700 Function *getFunction(std::string Name) { 701 // First, see if the function has already been added to the current module. 702 if (auto *F = TheModule->getFunction(Name)) 703 return F; 704 705 // If not, check whether we can codegen the declaration from some existing 706 // prototype. 707 auto FI = FunctionProtos.find(Name); 708 if (FI != FunctionProtos.end()) 709 return FI->second->codegen(); 710 711 // If no existing prototype exists, return null. 712 return nullptr; 713 } 714 715 /// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of 716 /// the function. This is used for mutable variables etc. 717 static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction, 718 const std::string &VarName) { 719 IRBuilder<> TmpB(&TheFunction->getEntryBlock(), 720 TheFunction->getEntryBlock().begin()); 721 return TmpB.CreateAlloca(Type::getDoubleTy(TheContext), nullptr, VarName); 722 } 723 724 Value *NumberExprAST::codegen() { 725 return ConstantFP::get(TheContext, APFloat(Val)); 726 } 727 728 Value *VariableExprAST::codegen() { 729 // Look this variable up in the function. 730 Value *V = NamedValues[Name]; 731 if (!V) 732 return LogErrorV("Unknown variable name"); 733 734 // Load the value. 735 return Builder.CreateLoad(V, Name.c_str()); 736 } 737 738 Value *UnaryExprAST::codegen() { 739 Value *OperandV = Operand->codegen(); 740 if (!OperandV) 741 return nullptr; 742 743 Function *F = getFunction(std::string("unary") + Opcode); 744 if (!F) 745 return LogErrorV("Unknown unary operator"); 746 747 return Builder.CreateCall(F, OperandV, "unop"); 748 } 749 750 Value *BinaryExprAST::codegen() { 751 // Special case '=' because we don't want to emit the LHS as an expression. 752 if (Op == '=') { 753 // Assignment requires the LHS to be an identifier. 754 // This assume we're building without RTTI because LLVM builds that way by 755 // default. If you build LLVM with RTTI this can be changed to a 756 // dynamic_cast for automatic error checking. 757 VariableExprAST *LHSE = static_cast<VariableExprAST *>(LHS.get()); 758 if (!LHSE) 759 return LogErrorV("destination of '=' must be a variable"); 760 // Codegen the RHS. 761 Value *Val = RHS->codegen(); 762 if (!Val) 763 return nullptr; 764 765 // Look up the name. 766 Value *Variable = NamedValues[LHSE->getName()]; 767 if (!Variable) 768 return LogErrorV("Unknown variable name"); 769 770 Builder.CreateStore(Val, Variable); 771 return Val; 772 } 773 774 Value *L = LHS->codegen(); 775 Value *R = RHS->codegen(); 776 if (!L || !R) 777 return nullptr; 778 779 switch (Op) { 780 case '+': 781 return Builder.CreateFAdd(L, R, "addtmp"); 782 case '-': 783 return Builder.CreateFSub(L, R, "subtmp"); 784 case '*': 785 return Builder.CreateFMul(L, R, "multmp"); 786 case '<': 787 L = Builder.CreateFCmpULT(L, R, "cmptmp"); 788 // Convert bool 0/1 to double 0.0 or 1.0 789 return Builder.CreateUIToFP(L, Type::getDoubleTy(TheContext), "booltmp"); 790 default: 791 break; 792 } 793 794 // If it wasn't a builtin binary operator, it must be a user defined one. Emit 795 // a call to it. 796 Function *F = getFunction(std::string("binary") + Op); 797 assert(F && "binary operator not found!"); 798 799 Value *Ops[] = {L, R}; 800 return Builder.CreateCall(F, Ops, "binop"); 801 } 802 803 Value *CallExprAST::codegen() { 804 // Look up the name in the global module table. 805 Function *CalleeF = getFunction(Callee); 806 if (!CalleeF) 807 return LogErrorV("Unknown function referenced"); 808 809 // If argument mismatch error. 810 if (CalleeF->arg_size() != Args.size()) 811 return LogErrorV("Incorrect # arguments passed"); 812 813 std::vector<Value *> ArgsV; 814 for (unsigned i = 0, e = Args.size(); i != e; ++i) { 815 ArgsV.push_back(Args[i]->codegen()); 816 if (!ArgsV.back()) 817 return nullptr; 818 } 819 820 return Builder.CreateCall(CalleeF, ArgsV, "calltmp"); 821 } 822 823 Value *IfExprAST::codegen() { 824 Value *CondV = Cond->codegen(); 825 if (!CondV) 826 return nullptr; 827 828 // Convert condition to a bool by comparing equal to 0.0. 829 CondV = Builder.CreateFCmpONE( 830 CondV, ConstantFP::get(TheContext, APFloat(0.0)), "ifcond"); 831 832 Function *TheFunction = Builder.GetInsertBlock()->getParent(); 833 834 // Create blocks for the then and else cases. Insert the 'then' block at the 835 // end of the function. 836 BasicBlock *ThenBB = BasicBlock::Create(TheContext, "then", TheFunction); 837 BasicBlock *ElseBB = BasicBlock::Create(TheContext, "else"); 838 BasicBlock *MergeBB = BasicBlock::Create(TheContext, "ifcont"); 839 840 Builder.CreateCondBr(CondV, ThenBB, ElseBB); 841 842 // Emit then value. 843 Builder.SetInsertPoint(ThenBB); 844 845 Value *ThenV = Then->codegen(); 846 if (!ThenV) 847 return nullptr; 848 849 Builder.CreateBr(MergeBB); 850 // Codegen of 'Then' can change the current block, update ThenBB for the PHI. 851 ThenBB = Builder.GetInsertBlock(); 852 853 // Emit else block. 854 TheFunction->getBasicBlockList().push_back(ElseBB); 855 Builder.SetInsertPoint(ElseBB); 856 857 Value *ElseV = Else->codegen(); 858 if (!ElseV) 859 return nullptr; 860 861 Builder.CreateBr(MergeBB); 862 // Codegen of 'Else' can change the current block, update ElseBB for the PHI. 863 ElseBB = Builder.GetInsertBlock(); 864 865 // Emit merge block. 866 TheFunction->getBasicBlockList().push_back(MergeBB); 867 Builder.SetInsertPoint(MergeBB); 868 PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(TheContext), 2, "iftmp"); 869 870 PN->addIncoming(ThenV, ThenBB); 871 PN->addIncoming(ElseV, ElseBB); 872 return PN; 873 } 874 875 // Output for-loop as: 876 // var = alloca double 877 // ... 878 // start = startexpr 879 // store start -> var 880 // goto loop 881 // loop: 882 // ... 883 // bodyexpr 884 // ... 885 // loopend: 886 // step = stepexpr 887 // endcond = endexpr 888 // 889 // curvar = load var 890 // nextvar = curvar + step 891 // store nextvar -> var 892 // br endcond, loop, endloop 893 // outloop: 894 Value *ForExprAST::codegen() { 895 Function *TheFunction = Builder.GetInsertBlock()->getParent(); 896 897 // Create an alloca for the variable in the entry block. 898 AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName); 899 900 // Emit the start code first, without 'variable' in scope. 901 Value *StartVal = Start->codegen(); 902 if (!StartVal) 903 return nullptr; 904 905 // Store the value into the alloca. 906 Builder.CreateStore(StartVal, Alloca); 907 908 // Make the new basic block for the loop header, inserting after current 909 // block. 910 BasicBlock *LoopBB = BasicBlock::Create(TheContext, "loop", TheFunction); 911 912 // Insert an explicit fall through from the current block to the LoopBB. 913 Builder.CreateBr(LoopBB); 914 915 // Start insertion in LoopBB. 916 Builder.SetInsertPoint(LoopBB); 917 918 // Within the loop, the variable is defined equal to the PHI node. If it 919 // shadows an existing variable, we have to restore it, so save it now. 920 AllocaInst *OldVal = NamedValues[VarName]; 921 NamedValues[VarName] = Alloca; 922 923 // Emit the body of the loop. This, like any other expr, can change the 924 // current BB. Note that we ignore the value computed by the body, but don't 925 // allow an error. 926 if (!Body->codegen()) 927 return nullptr; 928 929 // Emit the step value. 930 Value *StepVal = nullptr; 931 if (Step) { 932 StepVal = Step->codegen(); 933 if (!StepVal) 934 return nullptr; 935 } else { 936 // If not specified, use 1.0. 937 StepVal = ConstantFP::get(TheContext, APFloat(1.0)); 938 } 939 940 // Compute the end condition. 941 Value *EndCond = End->codegen(); 942 if (!EndCond) 943 return nullptr; 944 945 // Reload, increment, and restore the alloca. This handles the case where 946 // the body of the loop mutates the variable. 947 Value *CurVar = Builder.CreateLoad(Alloca, VarName.c_str()); 948 Value *NextVar = Builder.CreateFAdd(CurVar, StepVal, "nextvar"); 949 Builder.CreateStore(NextVar, Alloca); 950 951 // Convert condition to a bool by comparing equal to 0.0. 952 EndCond = Builder.CreateFCmpONE( 953 EndCond, ConstantFP::get(TheContext, APFloat(0.0)), "loopcond"); 954 955 // Create the "after loop" block and insert it. 956 BasicBlock *AfterBB = 957 BasicBlock::Create(TheContext, "afterloop", TheFunction); 958 959 // Insert the conditional branch into the end of LoopEndBB. 960 Builder.CreateCondBr(EndCond, LoopBB, AfterBB); 961 962 // Any new code will be inserted in AfterBB. 963 Builder.SetInsertPoint(AfterBB); 964 965 // Restore the unshadowed variable. 966 if (OldVal) 967 NamedValues[VarName] = OldVal; 968 else 969 NamedValues.erase(VarName); 970 971 // for expr always returns 0.0. 972 return Constant::getNullValue(Type::getDoubleTy(TheContext)); 973 } 974 975 Value *VarExprAST::codegen() { 976 std::vector<AllocaInst *> OldBindings; 977 978 Function *TheFunction = Builder.GetInsertBlock()->getParent(); 979 980 // Register all variables and emit their initializer. 981 for (unsigned i = 0, e = VarNames.size(); i != e; ++i) { 982 const std::string &VarName = VarNames[i].first; 983 ExprAST *Init = VarNames[i].second.get(); 984 985 // Emit the initializer before adding the variable to scope, this prevents 986 // the initializer from referencing the variable itself, and permits stuff 987 // like this: 988 // var a = 1 in 989 // var a = a in ... # refers to outer 'a'. 990 Value *InitVal; 991 if (Init) { 992 InitVal = Init->codegen(); 993 if (!InitVal) 994 return nullptr; 995 } else { // If not specified, use 0.0. 996 InitVal = ConstantFP::get(TheContext, APFloat(0.0)); 997 } 998 999 AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName); 1000 Builder.CreateStore(InitVal, Alloca); 1001 1002 // Remember the old variable binding so that we can restore the binding when 1003 // we unrecurse. 1004 OldBindings.push_back(NamedValues[VarName]); 1005 1006 // Remember this binding. 1007 NamedValues[VarName] = Alloca; 1008 } 1009 1010 // Codegen the body, now that all vars are in scope. 1011 Value *BodyVal = Body->codegen(); 1012 if (!BodyVal) 1013 return nullptr; 1014 1015 // Pop all our variables from scope. 1016 for (unsigned i = 0, e = VarNames.size(); i != e; ++i) 1017 NamedValues[VarNames[i].first] = OldBindings[i]; 1018 1019 // Return the body computation. 1020 return BodyVal; 1021 } 1022 1023 Function *PrototypeAST::codegen() { 1024 // Make the function type: double(double,double) etc. 1025 std::vector<Type *> Doubles(Args.size(), Type::getDoubleTy(TheContext)); 1026 FunctionType *FT = 1027 FunctionType::get(Type::getDoubleTy(TheContext), Doubles, false); 1028 1029 Function *F = 1030 Function::Create(FT, Function::ExternalLinkage, Name, TheModule.get()); 1031 1032 // Set names for all arguments. 1033 unsigned Idx = 0; 1034 for (auto &Arg : F->args()) 1035 Arg.setName(Args[Idx++]); 1036 1037 return F; 1038 } 1039 1040 Function *FunctionAST::codegen() { 1041 // Transfer ownership of the prototype to the FunctionProtos map, but keep a 1042 // reference to it for use below. 1043 auto &P = *Proto; 1044 FunctionProtos[Proto->getName()] = std::move(Proto); 1045 Function *TheFunction = getFunction(P.getName()); 1046 if (!TheFunction) 1047 return nullptr; 1048 1049 // If this is an operator, install it. 1050 if (P.isBinaryOp()) 1051 BinopPrecedence[P.getOperatorName()] = P.getBinaryPrecedence(); 1052 1053 // Create a new basic block to start insertion into. 1054 BasicBlock *BB = BasicBlock::Create(TheContext, "entry", TheFunction); 1055 Builder.SetInsertPoint(BB); 1056 1057 // Record the function arguments in the NamedValues map. 1058 NamedValues.clear(); 1059 for (auto &Arg : TheFunction->args()) { 1060 // Create an alloca for this variable. 1061 AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, Arg.getName()); 1062 1063 // Store the initial value into the alloca. 1064 Builder.CreateStore(&Arg, Alloca); 1065 1066 // Add arguments to variable symbol table. 1067 NamedValues[Arg.getName()] = Alloca; 1068 } 1069 1070 if (Value *RetVal = Body->codegen()) { 1071 // Finish off the function. 1072 Builder.CreateRet(RetVal); 1073 1074 // Validate the generated code, checking for consistency. 1075 verifyFunction(*TheFunction); 1076 1077 // Run the optimizer on the function. 1078 TheFPM->run(*TheFunction); 1079 1080 return TheFunction; 1081 } 1082 1083 // Error reading body, remove function. 1084 TheFunction->eraseFromParent(); 1085 1086 if (P.isBinaryOp()) 1087 BinopPrecedence.erase(Proto->getOperatorName()); 1088 return nullptr; 1089 } 1090 1091 //===----------------------------------------------------------------------===// 1092 // Top-Level parsing and JIT Driver 1093 //===----------------------------------------------------------------------===// 1094 1095 static void InitializeModuleAndPassManager() { 1096 // Open a new module. 1097 TheModule = llvm::make_unique<Module>("my cool jit", TheContext); 1098 TheModule->setDataLayout(TheJIT->getTargetMachine().createDataLayout()); 1099 1100 // Create a new pass manager attached to it. 1101 TheFPM = llvm::make_unique<legacy::FunctionPassManager>(TheModule.get()); 1102 1103 // Do simple "peephole" optimizations and bit-twiddling optzns. 1104 TheFPM->add(createInstructionCombiningPass()); 1105 // Reassociate expressions. 1106 TheFPM->add(createReassociatePass()); 1107 // Eliminate Common SubExpressions. 1108 TheFPM->add(createGVNPass()); 1109 // Simplify the control flow graph (deleting unreachable blocks, etc). 1110 TheFPM->add(createCFGSimplificationPass()); 1111 1112 TheFPM->doInitialization(); 1113 } 1114 1115 static void HandleDefinition() { 1116 if (auto FnAST = ParseDefinition()) { 1117 if (auto *FnIR = FnAST->codegen()) { 1118 fprintf(stderr, "Read function definition:"); 1119 FnIR->dump(); 1120 TheJIT->addModule(std::move(TheModule)); 1121 InitializeModuleAndPassManager(); 1122 } 1123 } else { 1124 // Skip token for error recovery. 1125 getNextToken(); 1126 } 1127 } 1128 1129 static void HandleExtern() { 1130 if (auto ProtoAST = ParseExtern()) { 1131 if (auto *FnIR = ProtoAST->codegen()) { 1132 fprintf(stderr, "Read extern: "); 1133 FnIR->dump(); 1134 FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST); 1135 } 1136 } else { 1137 // Skip token for error recovery. 1138 getNextToken(); 1139 } 1140 } 1141 1142 static void HandleTopLevelExpression() { 1143 // Evaluate a top-level expression into an anonymous function. 1144 if (auto FnAST = ParseTopLevelExpr()) { 1145 if (FnAST->codegen()) { 1146 // JIT the module containing the anonymous expression, keeping a handle so 1147 // we can free it later. 1148 auto H = TheJIT->addModule(std::move(TheModule)); 1149 InitializeModuleAndPassManager(); 1150 1151 // Search the JIT for the __anon_expr symbol. 1152 auto ExprSymbol = TheJIT->findSymbol("__anon_expr"); 1153 assert(ExprSymbol && "Function not found"); 1154 1155 // Get the symbol's address and cast it to the right type (takes no 1156 // arguments, returns a double) so we can call it as a native function. 1157 double (*FP)() = (double (*)())(intptr_t)ExprSymbol.getAddress(); 1158 fprintf(stderr, "Evaluated to %f\n", FP()); 1159 1160 // Delete the anonymous expression module from the JIT. 1161 TheJIT->removeModule(H); 1162 } 1163 } else { 1164 // Skip token for error recovery. 1165 getNextToken(); 1166 } 1167 } 1168 1169 /// top ::= definition | external | expression | ';' 1170 static void MainLoop() { 1171 while (true) { 1172 fprintf(stderr, "ready> "); 1173 switch (CurTok) { 1174 case tok_eof: 1175 return; 1176 case ';': // ignore top-level semicolons. 1177 getNextToken(); 1178 break; 1179 case tok_def: 1180 HandleDefinition(); 1181 break; 1182 case tok_extern: 1183 HandleExtern(); 1184 break; 1185 default: 1186 HandleTopLevelExpression(); 1187 break; 1188 } 1189 } 1190 } 1191 1192 //===----------------------------------------------------------------------===// 1193 // "Library" functions that can be "extern'd" from user code. 1194 //===----------------------------------------------------------------------===// 1195 1196 /// putchard - putchar that takes a double and returns 0. 1197 extern "C" double putchard(double X) { 1198 fputc((char)X, stderr); 1199 return 0; 1200 } 1201 1202 /// printd - printf that takes a double prints it as "%f\n", returning 0. 1203 extern "C" double printd(double X) { 1204 fprintf(stderr, "%f\n", X); 1205 return 0; 1206 } 1207 1208 //===----------------------------------------------------------------------===// 1209 // Main driver code. 1210 //===----------------------------------------------------------------------===// 1211 1212 int main() { 1213 InitializeNativeTarget(); 1214 InitializeNativeTargetAsmPrinter(); 1215 InitializeNativeTargetAsmParser(); 1216 1217 // Install standard binary operators. 1218 // 1 is lowest precedence. 1219 BinopPrecedence['='] = 2; 1220 BinopPrecedence['<'] = 10; 1221 BinopPrecedence['+'] = 20; 1222 BinopPrecedence['-'] = 20; 1223 BinopPrecedence['*'] = 40; // highest. 1224 1225 // Prime the first token. 1226 fprintf(stderr, "ready> "); 1227 getNextToken(); 1228 1229 TheJIT = llvm::make_unique<KaleidoscopeJIT>(); 1230 1231 InitializeModuleAndPassManager(); 1232 1233 // Run the main "interpreter loop" now. 1234 MainLoop(); 1235 1236 return 0; 1237 } 1238