1 #include "llvm/Analysis/Passes.h" 2 #include "llvm/ExecutionEngine/Orc/CompileUtils.h" 3 #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h" 4 #include "llvm/ExecutionEngine/Orc/LambdaResolver.h" 5 #include "llvm/ExecutionEngine/Orc/LazyEmittingLayer.h" 6 #include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h" 7 #include "llvm/IR/DataLayout.h" 8 #include "llvm/IR/DerivedTypes.h" 9 #include "llvm/IR/IRBuilder.h" 10 #include "llvm/IR/LegacyPassManager.h" 11 #include "llvm/IR/LLVMContext.h" 12 #include "llvm/IR/Module.h" 13 #include "llvm/IR/Verifier.h" 14 #include "llvm/Support/TargetSelect.h" 15 #include "llvm/Transforms/Scalar.h" 16 #include <cctype> 17 #include <iomanip> 18 #include <iostream> 19 #include <map> 20 #include <sstream> 21 #include <string> 22 #include <vector> 23 24 using namespace llvm; 25 using namespace llvm::orc; 26 27 //===----------------------------------------------------------------------===// 28 // Lexer 29 //===----------------------------------------------------------------------===// 30 31 // The lexer returns tokens [0-255] if it is an unknown character, otherwise one 32 // of these for known things. 33 enum Token { 34 tok_eof = -1, 35 36 // commands 37 tok_def = -2, tok_extern = -3, 38 39 // primary 40 tok_identifier = -4, tok_number = -5, 41 42 // control 43 tok_if = -6, tok_then = -7, tok_else = -8, 44 tok_for = -9, tok_in = -10, 45 46 // operators 47 tok_binary = -11, tok_unary = -12, 48 49 // var definition 50 tok_var = -13 51 }; 52 53 static std::string IdentifierStr; // Filled in if tok_identifier 54 static double NumVal; // Filled in if tok_number 55 56 /// gettok - Return the next token from standard input. 57 static int gettok() { 58 static int LastChar = ' '; 59 60 // Skip any whitespace. 61 while (isspace(LastChar)) 62 LastChar = getchar(); 63 64 if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* 65 IdentifierStr = LastChar; 66 while (isalnum((LastChar = getchar()))) 67 IdentifierStr += LastChar; 68 69 if (IdentifierStr == "def") return tok_def; 70 if (IdentifierStr == "extern") return tok_extern; 71 if (IdentifierStr == "if") return tok_if; 72 if (IdentifierStr == "then") return tok_then; 73 if (IdentifierStr == "else") return tok_else; 74 if (IdentifierStr == "for") return tok_for; 75 if (IdentifierStr == "in") return tok_in; 76 if (IdentifierStr == "binary") return tok_binary; 77 if (IdentifierStr == "unary") return tok_unary; 78 if (IdentifierStr == "var") return tok_var; 79 return tok_identifier; 80 } 81 82 if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ 83 std::string NumStr; 84 do { 85 NumStr += LastChar; 86 LastChar = getchar(); 87 } while (isdigit(LastChar) || LastChar == '.'); 88 89 NumVal = strtod(NumStr.c_str(), nullptr); 90 return tok_number; 91 } 92 93 if (LastChar == '#') { 94 // Comment until end of line. 95 do LastChar = getchar(); 96 while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); 97 98 if (LastChar != EOF) 99 return gettok(); 100 } 101 102 // Check for end of file. Don't eat the EOF. 103 if (LastChar == EOF) 104 return tok_eof; 105 106 // Otherwise, just return the character as its ascii value. 107 int ThisChar = LastChar; 108 LastChar = getchar(); 109 return ThisChar; 110 } 111 112 //===----------------------------------------------------------------------===// 113 // Abstract Syntax Tree (aka Parse Tree) 114 //===----------------------------------------------------------------------===// 115 116 class IRGenContext; 117 118 /// ExprAST - Base class for all expression nodes. 119 struct ExprAST { 120 virtual ~ExprAST() {} 121 virtual Value *IRGen(IRGenContext &C) const = 0; 122 }; 123 124 /// NumberExprAST - Expression class for numeric literals like "1.0". 125 struct NumberExprAST : public ExprAST { 126 NumberExprAST(double Val) : Val(Val) {} 127 Value *IRGen(IRGenContext &C) const override; 128 129 double Val; 130 }; 131 132 /// VariableExprAST - Expression class for referencing a variable, like "a". 133 struct VariableExprAST : public ExprAST { 134 VariableExprAST(std::string Name) : Name(std::move(Name)) {} 135 Value *IRGen(IRGenContext &C) const override; 136 137 std::string Name; 138 }; 139 140 /// UnaryExprAST - Expression class for a unary operator. 141 struct UnaryExprAST : public ExprAST { 142 UnaryExprAST(char Opcode, std::unique_ptr<ExprAST> Operand) 143 : Opcode(std::move(Opcode)), Operand(std::move(Operand)) {} 144 145 Value *IRGen(IRGenContext &C) const override; 146 147 char Opcode; 148 std::unique_ptr<ExprAST> Operand; 149 }; 150 151 /// BinaryExprAST - Expression class for a binary operator. 152 struct BinaryExprAST : public ExprAST { 153 BinaryExprAST(char Op, std::unique_ptr<ExprAST> LHS, 154 std::unique_ptr<ExprAST> RHS) 155 : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {} 156 157 Value *IRGen(IRGenContext &C) const override; 158 159 char Op; 160 std::unique_ptr<ExprAST> LHS, RHS; 161 }; 162 163 /// CallExprAST - Expression class for function calls. 164 struct CallExprAST : public ExprAST { 165 CallExprAST(std::string CalleeName, 166 std::vector<std::unique_ptr<ExprAST>> Args) 167 : CalleeName(std::move(CalleeName)), Args(std::move(Args)) {} 168 169 Value *IRGen(IRGenContext &C) const override; 170 171 std::string CalleeName; 172 std::vector<std::unique_ptr<ExprAST>> Args; 173 }; 174 175 /// IfExprAST - Expression class for if/then/else. 176 struct IfExprAST : public ExprAST { 177 IfExprAST(std::unique_ptr<ExprAST> Cond, std::unique_ptr<ExprAST> Then, 178 std::unique_ptr<ExprAST> Else) 179 : Cond(std::move(Cond)), Then(std::move(Then)), Else(std::move(Else)) {} 180 Value *IRGen(IRGenContext &C) const override; 181 182 std::unique_ptr<ExprAST> Cond, Then, Else; 183 }; 184 185 /// ForExprAST - Expression class for for/in. 186 struct ForExprAST : public ExprAST { 187 ForExprAST(std::string VarName, std::unique_ptr<ExprAST> Start, 188 std::unique_ptr<ExprAST> End, std::unique_ptr<ExprAST> Step, 189 std::unique_ptr<ExprAST> Body) 190 : VarName(std::move(VarName)), Start(std::move(Start)), End(std::move(End)), 191 Step(std::move(Step)), Body(std::move(Body)) {} 192 193 Value *IRGen(IRGenContext &C) const override; 194 195 std::string VarName; 196 std::unique_ptr<ExprAST> Start, End, Step, Body; 197 }; 198 199 /// VarExprAST - Expression class for var/in 200 struct VarExprAST : public ExprAST { 201 typedef std::pair<std::string, std::unique_ptr<ExprAST>> Binding; 202 typedef std::vector<Binding> BindingList; 203 204 VarExprAST(BindingList VarBindings, std::unique_ptr<ExprAST> Body) 205 : VarBindings(std::move(VarBindings)), Body(std::move(Body)) {} 206 207 Value *IRGen(IRGenContext &C) const override; 208 209 BindingList VarBindings; 210 std::unique_ptr<ExprAST> Body; 211 }; 212 213 /// PrototypeAST - This class represents the "prototype" for a function, 214 /// which captures its argument names as well as if it is an operator. 215 struct PrototypeAST { 216 PrototypeAST(std::string Name, std::vector<std::string> Args, 217 bool IsOperator = false, unsigned Precedence = 0) 218 : Name(std::move(Name)), Args(std::move(Args)), IsOperator(IsOperator), 219 Precedence(Precedence) {} 220 221 Function *IRGen(IRGenContext &C) const; 222 void CreateArgumentAllocas(Function *F, IRGenContext &C); 223 224 bool isUnaryOp() const { return IsOperator && Args.size() == 1; } 225 bool isBinaryOp() const { return IsOperator && Args.size() == 2; } 226 227 char getOperatorName() const { 228 assert(isUnaryOp() || isBinaryOp()); 229 return Name[Name.size()-1]; 230 } 231 232 std::string Name; 233 std::vector<std::string> Args; 234 bool IsOperator; 235 unsigned Precedence; // Precedence if a binary op. 236 }; 237 238 /// FunctionAST - This class represents a function definition itself. 239 struct FunctionAST { 240 FunctionAST(std::unique_ptr<PrototypeAST> Proto, 241 std::unique_ptr<ExprAST> Body) 242 : Proto(std::move(Proto)), Body(std::move(Body)) {} 243 244 Function *IRGen(IRGenContext &C) const; 245 246 std::unique_ptr<PrototypeAST> Proto; 247 std::unique_ptr<ExprAST> Body; 248 }; 249 250 //===----------------------------------------------------------------------===// 251 // Parser 252 //===----------------------------------------------------------------------===// 253 254 /// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current 255 /// token the parser is looking at. getNextToken reads another token from the 256 /// lexer and updates CurTok with its results. 257 static int CurTok; 258 static int getNextToken() { 259 return CurTok = gettok(); 260 } 261 262 /// BinopPrecedence - This holds the precedence for each binary operator that is 263 /// defined. 264 static std::map<char, int> BinopPrecedence; 265 266 /// GetTokPrecedence - Get the precedence of the pending binary operator token. 267 static int GetTokPrecedence() { 268 if (!isascii(CurTok)) 269 return -1; 270 271 // Make sure it's a declared binop. 272 int TokPrec = BinopPrecedence[CurTok]; 273 if (TokPrec <= 0) return -1; 274 return TokPrec; 275 } 276 277 template <typename T> 278 std::unique_ptr<T> ErrorU(const std::string &Str) { 279 std::cerr << "Error: " << Str << "\n"; 280 return nullptr; 281 } 282 283 template <typename T> 284 T* ErrorP(const std::string &Str) { 285 std::cerr << "Error: " << Str << "\n"; 286 return nullptr; 287 } 288 289 static std::unique_ptr<ExprAST> ParseExpression(); 290 291 /// identifierexpr 292 /// ::= identifier 293 /// ::= identifier '(' expression* ')' 294 static std::unique_ptr<ExprAST> ParseIdentifierExpr() { 295 std::string IdName = IdentifierStr; 296 297 getNextToken(); // eat identifier. 298 299 if (CurTok != '(') // Simple variable ref. 300 return llvm::make_unique<VariableExprAST>(IdName); 301 302 // Call. 303 getNextToken(); // eat ( 304 std::vector<std::unique_ptr<ExprAST>> Args; 305 if (CurTok != ')') { 306 while (1) { 307 auto Arg = ParseExpression(); 308 if (!Arg) return nullptr; 309 Args.push_back(std::move(Arg)); 310 311 if (CurTok == ')') break; 312 313 if (CurTok != ',') 314 return ErrorU<CallExprAST>("Expected ')' or ',' in argument list"); 315 getNextToken(); 316 } 317 } 318 319 // Eat the ')'. 320 getNextToken(); 321 322 return llvm::make_unique<CallExprAST>(IdName, std::move(Args)); 323 } 324 325 /// numberexpr ::= number 326 static std::unique_ptr<NumberExprAST> ParseNumberExpr() { 327 auto Result = llvm::make_unique<NumberExprAST>(NumVal); 328 getNextToken(); // consume the number 329 return Result; 330 } 331 332 /// parenexpr ::= '(' expression ')' 333 static std::unique_ptr<ExprAST> ParseParenExpr() { 334 getNextToken(); // eat (. 335 auto V = ParseExpression(); 336 if (!V) 337 return nullptr; 338 339 if (CurTok != ')') 340 return ErrorU<ExprAST>("expected ')'"); 341 getNextToken(); // eat ). 342 return V; 343 } 344 345 /// ifexpr ::= 'if' expression 'then' expression 'else' expression 346 static std::unique_ptr<ExprAST> ParseIfExpr() { 347 getNextToken(); // eat the if. 348 349 // condition. 350 auto Cond = ParseExpression(); 351 if (!Cond) 352 return nullptr; 353 354 if (CurTok != tok_then) 355 return ErrorU<ExprAST>("expected then"); 356 getNextToken(); // eat the then 357 358 auto Then = ParseExpression(); 359 if (!Then) 360 return nullptr; 361 362 if (CurTok != tok_else) 363 return ErrorU<ExprAST>("expected else"); 364 365 getNextToken(); 366 367 auto Else = ParseExpression(); 368 if (!Else) 369 return nullptr; 370 371 return llvm::make_unique<IfExprAST>(std::move(Cond), std::move(Then), 372 std::move(Else)); 373 } 374 375 /// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression 376 static std::unique_ptr<ForExprAST> ParseForExpr() { 377 getNextToken(); // eat the for. 378 379 if (CurTok != tok_identifier) 380 return ErrorU<ForExprAST>("expected identifier after for"); 381 382 std::string IdName = IdentifierStr; 383 getNextToken(); // eat identifier. 384 385 if (CurTok != '=') 386 return ErrorU<ForExprAST>("expected '=' after for"); 387 getNextToken(); // eat '='. 388 389 auto Start = ParseExpression(); 390 if (!Start) 391 return nullptr; 392 if (CurTok != ',') 393 return ErrorU<ForExprAST>("expected ',' after for start value"); 394 getNextToken(); 395 396 auto End = ParseExpression(); 397 if (!End) 398 return nullptr; 399 400 // The step value is optional. 401 std::unique_ptr<ExprAST> Step; 402 if (CurTok == ',') { 403 getNextToken(); 404 Step = ParseExpression(); 405 if (!Step) 406 return nullptr; 407 } 408 409 if (CurTok != tok_in) 410 return ErrorU<ForExprAST>("expected 'in' after for"); 411 getNextToken(); // eat 'in'. 412 413 auto Body = ParseExpression(); 414 if (Body) 415 return nullptr; 416 417 return llvm::make_unique<ForExprAST>(IdName, std::move(Start), std::move(End), 418 std::move(Step), std::move(Body)); 419 } 420 421 /// varexpr ::= 'var' identifier ('=' expression)? 422 // (',' identifier ('=' expression)?)* 'in' expression 423 static std::unique_ptr<VarExprAST> ParseVarExpr() { 424 getNextToken(); // eat the var. 425 426 VarExprAST::BindingList VarBindings; 427 428 // At least one variable name is required. 429 if (CurTok != tok_identifier) 430 return ErrorU<VarExprAST>("expected identifier after var"); 431 432 while (1) { 433 std::string Name = IdentifierStr; 434 getNextToken(); // eat identifier. 435 436 // Read the optional initializer. 437 std::unique_ptr<ExprAST> Init; 438 if (CurTok == '=') { 439 getNextToken(); // eat the '='. 440 441 Init = ParseExpression(); 442 if (!Init) 443 return nullptr; 444 } 445 446 VarBindings.push_back(VarExprAST::Binding(Name, std::move(Init))); 447 448 // End of var list, exit loop. 449 if (CurTok != ',') break; 450 getNextToken(); // eat the ','. 451 452 if (CurTok != tok_identifier) 453 return ErrorU<VarExprAST>("expected identifier list after var"); 454 } 455 456 // At this point, we have to have 'in'. 457 if (CurTok != tok_in) 458 return ErrorU<VarExprAST>("expected 'in' keyword after 'var'"); 459 getNextToken(); // eat 'in'. 460 461 auto Body = ParseExpression(); 462 if (!Body) 463 return nullptr; 464 465 return llvm::make_unique<VarExprAST>(std::move(VarBindings), std::move(Body)); 466 } 467 468 /// primary 469 /// ::= identifierexpr 470 /// ::= numberexpr 471 /// ::= parenexpr 472 /// ::= ifexpr 473 /// ::= forexpr 474 /// ::= varexpr 475 static std::unique_ptr<ExprAST> ParsePrimary() { 476 switch (CurTok) { 477 default: return ErrorU<ExprAST>("unknown token when expecting an expression"); 478 case tok_identifier: return ParseIdentifierExpr(); 479 case tok_number: return ParseNumberExpr(); 480 case '(': return ParseParenExpr(); 481 case tok_if: return ParseIfExpr(); 482 case tok_for: return ParseForExpr(); 483 case tok_var: return ParseVarExpr(); 484 } 485 } 486 487 /// unary 488 /// ::= primary 489 /// ::= '!' unary 490 static std::unique_ptr<ExprAST> ParseUnary() { 491 // If the current token is not an operator, it must be a primary expr. 492 if (!isascii(CurTok) || CurTok == '(' || CurTok == ',') 493 return ParsePrimary(); 494 495 // If this is a unary operator, read it. 496 int Opc = CurTok; 497 getNextToken(); 498 if (auto Operand = ParseUnary()) 499 return llvm::make_unique<UnaryExprAST>(Opc, std::move(Operand)); 500 return nullptr; 501 } 502 503 /// binoprhs 504 /// ::= ('+' unary)* 505 static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec, 506 std::unique_ptr<ExprAST> LHS) { 507 // If this is a binop, find its precedence. 508 while (1) { 509 int TokPrec = GetTokPrecedence(); 510 511 // If this is a binop that binds at least as tightly as the current binop, 512 // consume it, otherwise we are done. 513 if (TokPrec < ExprPrec) 514 return LHS; 515 516 // Okay, we know this is a binop. 517 int BinOp = CurTok; 518 getNextToken(); // eat binop 519 520 // Parse the unary expression after the binary operator. 521 auto RHS = ParseUnary(); 522 if (!RHS) 523 return nullptr; 524 525 // If BinOp binds less tightly with RHS than the operator after RHS, let 526 // the pending operator take RHS as its LHS. 527 int NextPrec = GetTokPrecedence(); 528 if (TokPrec < NextPrec) { 529 RHS = ParseBinOpRHS(TokPrec+1, std::move(RHS)); 530 if (!RHS) 531 return nullptr; 532 } 533 534 // Merge LHS/RHS. 535 LHS = llvm::make_unique<BinaryExprAST>(BinOp, std::move(LHS), std::move(RHS)); 536 } 537 } 538 539 /// expression 540 /// ::= unary binoprhs 541 /// 542 static std::unique_ptr<ExprAST> ParseExpression() { 543 auto LHS = ParseUnary(); 544 if (!LHS) 545 return nullptr; 546 547 return ParseBinOpRHS(0, std::move(LHS)); 548 } 549 550 /// prototype 551 /// ::= id '(' id* ')' 552 /// ::= binary LETTER number? (id, id) 553 /// ::= unary LETTER (id) 554 static std::unique_ptr<PrototypeAST> ParsePrototype() { 555 std::string FnName; 556 557 unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary. 558 unsigned BinaryPrecedence = 30; 559 560 switch (CurTok) { 561 default: 562 return ErrorU<PrototypeAST>("Expected function name in prototype"); 563 case tok_identifier: 564 FnName = IdentifierStr; 565 Kind = 0; 566 getNextToken(); 567 break; 568 case tok_unary: 569 getNextToken(); 570 if (!isascii(CurTok)) 571 return ErrorU<PrototypeAST>("Expected unary operator"); 572 FnName = "unary"; 573 FnName += (char)CurTok; 574 Kind = 1; 575 getNextToken(); 576 break; 577 case tok_binary: 578 getNextToken(); 579 if (!isascii(CurTok)) 580 return ErrorU<PrototypeAST>("Expected binary operator"); 581 FnName = "binary"; 582 FnName += (char)CurTok; 583 Kind = 2; 584 getNextToken(); 585 586 // Read the precedence if present. 587 if (CurTok == tok_number) { 588 if (NumVal < 1 || NumVal > 100) 589 return ErrorU<PrototypeAST>("Invalid precedecnce: must be 1..100"); 590 BinaryPrecedence = (unsigned)NumVal; 591 getNextToken(); 592 } 593 break; 594 } 595 596 if (CurTok != '(') 597 return ErrorU<PrototypeAST>("Expected '(' in prototype"); 598 599 std::vector<std::string> ArgNames; 600 while (getNextToken() == tok_identifier) 601 ArgNames.push_back(IdentifierStr); 602 if (CurTok != ')') 603 return ErrorU<PrototypeAST>("Expected ')' in prototype"); 604 605 // success. 606 getNextToken(); // eat ')'. 607 608 // Verify right number of names for operator. 609 if (Kind && ArgNames.size() != Kind) 610 return ErrorU<PrototypeAST>("Invalid number of operands for operator"); 611 612 return llvm::make_unique<PrototypeAST>(FnName, std::move(ArgNames), Kind != 0, 613 BinaryPrecedence); 614 } 615 616 /// definition ::= 'def' prototype expression 617 static std::unique_ptr<FunctionAST> ParseDefinition() { 618 getNextToken(); // eat def. 619 auto Proto = ParsePrototype(); 620 if (!Proto) 621 return nullptr; 622 623 if (auto Body = ParseExpression()) 624 return llvm::make_unique<FunctionAST>(std::move(Proto), std::move(Body)); 625 return nullptr; 626 } 627 628 /// toplevelexpr ::= expression 629 static std::unique_ptr<FunctionAST> ParseTopLevelExpr() { 630 if (auto E = ParseExpression()) { 631 // Make an anonymous proto. 632 auto Proto = 633 llvm::make_unique<PrototypeAST>("__anon_expr", std::vector<std::string>()); 634 return llvm::make_unique<FunctionAST>(std::move(Proto), std::move(E)); 635 } 636 return nullptr; 637 } 638 639 /// external ::= 'extern' prototype 640 static std::unique_ptr<PrototypeAST> ParseExtern() { 641 getNextToken(); // eat extern. 642 return ParsePrototype(); 643 } 644 645 //===----------------------------------------------------------------------===// 646 // Code Generation 647 //===----------------------------------------------------------------------===// 648 649 // FIXME: Obviously we can do better than this 650 std::string GenerateUniqueName(const std::string &Root) { 651 static int i = 0; 652 std::ostringstream NameStream; 653 NameStream << Root << ++i; 654 return NameStream.str(); 655 } 656 657 std::string MakeLegalFunctionName(std::string Name) 658 { 659 std::string NewName; 660 assert(!Name.empty() && "Base name must not be empty"); 661 662 // Start with what we have 663 NewName = Name; 664 665 // Look for a numberic first character 666 if (NewName.find_first_of("0123456789") == 0) { 667 NewName.insert(0, 1, 'n'); 668 } 669 670 // Replace illegal characters with their ASCII equivalent 671 std::string legal_elements = "_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; 672 size_t pos; 673 while ((pos = NewName.find_first_not_of(legal_elements)) != std::string::npos) { 674 std::ostringstream NumStream; 675 NumStream << (int)NewName.at(pos); 676 NewName = NewName.replace(pos, 1, NumStream.str()); 677 } 678 679 return NewName; 680 } 681 682 class SessionContext { 683 public: 684 SessionContext(LLVMContext &C) 685 : Context(C), TM(EngineBuilder().selectTarget()) {} 686 LLVMContext& getLLVMContext() const { return Context; } 687 TargetMachine& getTarget() { return *TM; } 688 void addPrototypeAST(std::unique_ptr<PrototypeAST> P); 689 PrototypeAST* getPrototypeAST(const std::string &Name); 690 private: 691 typedef std::map<std::string, std::unique_ptr<PrototypeAST>> PrototypeMap; 692 693 LLVMContext &Context; 694 std::unique_ptr<TargetMachine> TM; 695 696 PrototypeMap Prototypes; 697 }; 698 699 void SessionContext::addPrototypeAST(std::unique_ptr<PrototypeAST> P) { 700 Prototypes[P->Name] = std::move(P); 701 } 702 703 PrototypeAST* SessionContext::getPrototypeAST(const std::string &Name) { 704 PrototypeMap::iterator I = Prototypes.find(Name); 705 if (I != Prototypes.end()) 706 return I->second.get(); 707 return nullptr; 708 } 709 710 class IRGenContext { 711 public: 712 713 IRGenContext(SessionContext &S) 714 : Session(S), 715 M(new Module(GenerateUniqueName("jit_module_"), 716 Session.getLLVMContext())), 717 Builder(Session.getLLVMContext()) { 718 M->setDataLayout(Session.getTarget().createDataLayout()); 719 } 720 721 SessionContext& getSession() { return Session; } 722 Module& getM() const { return *M; } 723 std::unique_ptr<Module> takeM() { return std::move(M); } 724 IRBuilder<>& getBuilder() { return Builder; } 725 LLVMContext& getLLVMContext() { return Session.getLLVMContext(); } 726 Function* getPrototype(const std::string &Name); 727 728 std::map<std::string, AllocaInst*> NamedValues; 729 private: 730 SessionContext &Session; 731 std::unique_ptr<Module> M; 732 IRBuilder<> Builder; 733 }; 734 735 Function* IRGenContext::getPrototype(const std::string &Name) { 736 if (Function *ExistingProto = M->getFunction(Name)) 737 return ExistingProto; 738 if (PrototypeAST *ProtoAST = Session.getPrototypeAST(Name)) 739 return ProtoAST->IRGen(*this); 740 return nullptr; 741 } 742 743 /// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of 744 /// the function. This is used for mutable variables etc. 745 static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction, 746 const std::string &VarName) { 747 IRBuilder<> TmpB(&TheFunction->getEntryBlock(), 748 TheFunction->getEntryBlock().begin()); 749 return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), nullptr, 750 VarName.c_str()); 751 } 752 753 Value *NumberExprAST::IRGen(IRGenContext &C) const { 754 return ConstantFP::get(C.getLLVMContext(), APFloat(Val)); 755 } 756 757 Value *VariableExprAST::IRGen(IRGenContext &C) const { 758 // Look this variable up in the function. 759 Value *V = C.NamedValues[Name]; 760 761 if (!V) 762 return ErrorP<Value>("Unknown variable name '" + Name + "'"); 763 764 // Load the value. 765 return C.getBuilder().CreateLoad(V, Name.c_str()); 766 } 767 768 Value *UnaryExprAST::IRGen(IRGenContext &C) const { 769 if (Value *OperandV = Operand->IRGen(C)) { 770 std::string FnName = MakeLegalFunctionName(std::string("unary")+Opcode); 771 if (Function *F = C.getPrototype(FnName)) 772 return C.getBuilder().CreateCall(F, OperandV, "unop"); 773 return ErrorP<Value>("Unknown unary operator"); 774 } 775 776 // Could not codegen operand - return null. 777 return nullptr; 778 } 779 780 Value *BinaryExprAST::IRGen(IRGenContext &C) const { 781 // Special case '=' because we don't want to emit the LHS as an expression. 782 if (Op == '=') { 783 // Assignment requires the LHS to be an identifier. 784 auto &LHSVar = static_cast<VariableExprAST &>(*LHS); 785 // Codegen the RHS. 786 Value *Val = RHS->IRGen(C); 787 if (!Val) return nullptr; 788 789 // Look up the name. 790 if (auto Variable = C.NamedValues[LHSVar.Name]) { 791 C.getBuilder().CreateStore(Val, Variable); 792 return Val; 793 } 794 return ErrorP<Value>("Unknown variable name"); 795 } 796 797 Value *L = LHS->IRGen(C); 798 Value *R = RHS->IRGen(C); 799 if (!L || !R) return nullptr; 800 801 switch (Op) { 802 case '+': return C.getBuilder().CreateFAdd(L, R, "addtmp"); 803 case '-': return C.getBuilder().CreateFSub(L, R, "subtmp"); 804 case '*': return C.getBuilder().CreateFMul(L, R, "multmp"); 805 case '/': return C.getBuilder().CreateFDiv(L, R, "divtmp"); 806 case '<': 807 L = C.getBuilder().CreateFCmpULT(L, R, "cmptmp"); 808 // Convert bool 0/1 to double 0.0 or 1.0 809 return C.getBuilder().CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()), 810 "booltmp"); 811 default: break; 812 } 813 814 // If it wasn't a builtin binary operator, it must be a user defined one. Emit 815 // a call to it. 816 std::string FnName = MakeLegalFunctionName(std::string("binary")+Op); 817 if (Function *F = C.getPrototype(FnName)) { 818 Value *Ops[] = { L, R }; 819 return C.getBuilder().CreateCall(F, Ops, "binop"); 820 } 821 822 return ErrorP<Value>("Unknown binary operator"); 823 } 824 825 Value *CallExprAST::IRGen(IRGenContext &C) const { 826 // Look up the name in the global module table. 827 if (auto CalleeF = C.getPrototype(CalleeName)) { 828 // If argument mismatch error. 829 if (CalleeF->arg_size() != Args.size()) 830 return ErrorP<Value>("Incorrect # arguments passed"); 831 832 std::vector<Value*> ArgsV; 833 for (unsigned i = 0, e = Args.size(); i != e; ++i) { 834 ArgsV.push_back(Args[i]->IRGen(C)); 835 if (!ArgsV.back()) return nullptr; 836 } 837 838 return C.getBuilder().CreateCall(CalleeF, ArgsV, "calltmp"); 839 } 840 841 return ErrorP<Value>("Unknown function referenced"); 842 } 843 844 Value *IfExprAST::IRGen(IRGenContext &C) const { 845 Value *CondV = Cond->IRGen(C); 846 if (!CondV) return nullptr; 847 848 // Convert condition to a bool by comparing equal to 0.0. 849 ConstantFP *FPZero = 850 ConstantFP::get(C.getLLVMContext(), APFloat(0.0)); 851 CondV = C.getBuilder().CreateFCmpONE(CondV, FPZero, "ifcond"); 852 853 Function *TheFunction = C.getBuilder().GetInsertBlock()->getParent(); 854 855 // Create blocks for the then and else cases. Insert the 'then' block at the 856 // end of the function. 857 BasicBlock *ThenBB = BasicBlock::Create(C.getLLVMContext(), "then", TheFunction); 858 BasicBlock *ElseBB = BasicBlock::Create(C.getLLVMContext(), "else"); 859 BasicBlock *MergeBB = BasicBlock::Create(C.getLLVMContext(), "ifcont"); 860 861 C.getBuilder().CreateCondBr(CondV, ThenBB, ElseBB); 862 863 // Emit then value. 864 C.getBuilder().SetInsertPoint(ThenBB); 865 866 Value *ThenV = Then->IRGen(C); 867 if (!ThenV) return nullptr; 868 869 C.getBuilder().CreateBr(MergeBB); 870 // Codegen of 'Then' can change the current block, update ThenBB for the PHI. 871 ThenBB = C.getBuilder().GetInsertBlock(); 872 873 // Emit else block. 874 TheFunction->getBasicBlockList().push_back(ElseBB); 875 C.getBuilder().SetInsertPoint(ElseBB); 876 877 Value *ElseV = Else->IRGen(C); 878 if (!ElseV) return nullptr; 879 880 C.getBuilder().CreateBr(MergeBB); 881 // Codegen of 'Else' can change the current block, update ElseBB for the PHI. 882 ElseBB = C.getBuilder().GetInsertBlock(); 883 884 // Emit merge block. 885 TheFunction->getBasicBlockList().push_back(MergeBB); 886 C.getBuilder().SetInsertPoint(MergeBB); 887 PHINode *PN = C.getBuilder().CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, 888 "iftmp"); 889 890 PN->addIncoming(ThenV, ThenBB); 891 PN->addIncoming(ElseV, ElseBB); 892 return PN; 893 } 894 895 Value *ForExprAST::IRGen(IRGenContext &C) const { 896 // Output this as: 897 // var = alloca double 898 // ... 899 // start = startexpr 900 // store start -> var 901 // goto loop 902 // loop: 903 // ... 904 // bodyexpr 905 // ... 906 // loopend: 907 // step = stepexpr 908 // endcond = endexpr 909 // 910 // curvar = load var 911 // nextvar = curvar + step 912 // store nextvar -> var 913 // br endcond, loop, endloop 914 // outloop: 915 916 Function *TheFunction = C.getBuilder().GetInsertBlock()->getParent(); 917 918 // Create an alloca for the variable in the entry block. 919 AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName); 920 921 // Emit the start code first, without 'variable' in scope. 922 Value *StartVal = Start->IRGen(C); 923 if (!StartVal) return nullptr; 924 925 // Store the value into the alloca. 926 C.getBuilder().CreateStore(StartVal, Alloca); 927 928 // Make the new basic block for the loop header, inserting after current 929 // block. 930 BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction); 931 932 // Insert an explicit fall through from the current block to the LoopBB. 933 C.getBuilder().CreateBr(LoopBB); 934 935 // Start insertion in LoopBB. 936 C.getBuilder().SetInsertPoint(LoopBB); 937 938 // Within the loop, the variable is defined equal to the PHI node. If it 939 // shadows an existing variable, we have to restore it, so save it now. 940 AllocaInst *OldVal = C.NamedValues[VarName]; 941 C.NamedValues[VarName] = Alloca; 942 943 // Emit the body of the loop. This, like any other expr, can change the 944 // current BB. Note that we ignore the value computed by the body, but don't 945 // allow an error. 946 if (!Body->IRGen(C)) 947 return nullptr; 948 949 // Emit the step value. 950 Value *StepVal; 951 if (Step) { 952 StepVal = Step->IRGen(C); 953 if (!StepVal) return nullptr; 954 } else { 955 // If not specified, use 1.0. 956 StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0)); 957 } 958 959 // Compute the end condition. 960 Value *EndCond = End->IRGen(C); 961 if (!EndCond) return nullptr; 962 963 // Reload, increment, and restore the alloca. This handles the case where 964 // the body of the loop mutates the variable. 965 Value *CurVar = C.getBuilder().CreateLoad(Alloca, VarName.c_str()); 966 Value *NextVar = C.getBuilder().CreateFAdd(CurVar, StepVal, "nextvar"); 967 C.getBuilder().CreateStore(NextVar, Alloca); 968 969 // Convert condition to a bool by comparing equal to 0.0. 970 EndCond = C.getBuilder().CreateFCmpONE(EndCond, 971 ConstantFP::get(getGlobalContext(), APFloat(0.0)), 972 "loopcond"); 973 974 // Create the "after loop" block and insert it. 975 BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction); 976 977 // Insert the conditional branch into the end of LoopEndBB. 978 C.getBuilder().CreateCondBr(EndCond, LoopBB, AfterBB); 979 980 // Any new code will be inserted in AfterBB. 981 C.getBuilder().SetInsertPoint(AfterBB); 982 983 // Restore the unshadowed variable. 984 if (OldVal) 985 C.NamedValues[VarName] = OldVal; 986 else 987 C.NamedValues.erase(VarName); 988 989 // for expr always returns 0.0. 990 return Constant::getNullValue(Type::getDoubleTy(getGlobalContext())); 991 } 992 993 Value *VarExprAST::IRGen(IRGenContext &C) const { 994 std::vector<AllocaInst *> OldBindings; 995 996 Function *TheFunction = C.getBuilder().GetInsertBlock()->getParent(); 997 998 // Register all variables and emit their initializer. 999 for (unsigned i = 0, e = VarBindings.size(); i != e; ++i) { 1000 auto &VarName = VarBindings[i].first; 1001 auto &Init = VarBindings[i].second; 1002 1003 // Emit the initializer before adding the variable to scope, this prevents 1004 // the initializer from referencing the variable itself, and permits stuff 1005 // like this: 1006 // var a = 1 in 1007 // var a = a in ... # refers to outer 'a'. 1008 Value *InitVal; 1009 if (Init) { 1010 InitVal = Init->IRGen(C); 1011 if (!InitVal) return nullptr; 1012 } else // If not specified, use 0.0. 1013 InitVal = ConstantFP::get(getGlobalContext(), APFloat(0.0)); 1014 1015 AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName); 1016 C.getBuilder().CreateStore(InitVal, Alloca); 1017 1018 // Remember the old variable binding so that we can restore the binding when 1019 // we unrecurse. 1020 OldBindings.push_back(C.NamedValues[VarName]); 1021 1022 // Remember this binding. 1023 C.NamedValues[VarName] = Alloca; 1024 } 1025 1026 // Codegen the body, now that all vars are in scope. 1027 Value *BodyVal = Body->IRGen(C); 1028 if (!BodyVal) return nullptr; 1029 1030 // Pop all our variables from scope. 1031 for (unsigned i = 0, e = VarBindings.size(); i != e; ++i) 1032 C.NamedValues[VarBindings[i].first] = OldBindings[i]; 1033 1034 // Return the body computation. 1035 return BodyVal; 1036 } 1037 1038 Function *PrototypeAST::IRGen(IRGenContext &C) const { 1039 std::string FnName = MakeLegalFunctionName(Name); 1040 1041 // Make the function type: double(double,double) etc. 1042 std::vector<Type*> Doubles(Args.size(), 1043 Type::getDoubleTy(getGlobalContext())); 1044 FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()), 1045 Doubles, false); 1046 Function *F = Function::Create(FT, Function::ExternalLinkage, FnName, 1047 &C.getM()); 1048 1049 // If F conflicted, there was already something named 'FnName'. If it has a 1050 // body, don't allow redefinition or reextern. 1051 if (F->getName() != FnName) { 1052 // Delete the one we just made and get the existing one. 1053 F->eraseFromParent(); 1054 F = C.getM().getFunction(Name); 1055 1056 // If F already has a body, reject this. 1057 if (!F->empty()) { 1058 ErrorP<Function>("redefinition of function"); 1059 return nullptr; 1060 } 1061 1062 // If F took a different number of args, reject. 1063 if (F->arg_size() != Args.size()) { 1064 ErrorP<Function>("redefinition of function with different # args"); 1065 return nullptr; 1066 } 1067 } 1068 1069 // Set names for all arguments. 1070 unsigned Idx = 0; 1071 for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size(); 1072 ++AI, ++Idx) 1073 AI->setName(Args[Idx]); 1074 1075 return F; 1076 } 1077 1078 /// CreateArgumentAllocas - Create an alloca for each argument and register the 1079 /// argument in the symbol table so that references to it will succeed. 1080 void PrototypeAST::CreateArgumentAllocas(Function *F, IRGenContext &C) { 1081 Function::arg_iterator AI = F->arg_begin(); 1082 for (unsigned Idx = 0, e = Args.size(); Idx != e; ++Idx, ++AI) { 1083 // Create an alloca for this variable. 1084 AllocaInst *Alloca = CreateEntryBlockAlloca(F, Args[Idx]); 1085 1086 // Store the initial value into the alloca. 1087 C.getBuilder().CreateStore(&*AI, Alloca); 1088 1089 // Add arguments to variable symbol table. 1090 C.NamedValues[Args[Idx]] = Alloca; 1091 } 1092 } 1093 1094 Function *FunctionAST::IRGen(IRGenContext &C) const { 1095 C.NamedValues.clear(); 1096 1097 Function *TheFunction = Proto->IRGen(C); 1098 if (!TheFunction) 1099 return nullptr; 1100 1101 // If this is an operator, install it. 1102 if (Proto->isBinaryOp()) 1103 BinopPrecedence[Proto->getOperatorName()] = Proto->Precedence; 1104 1105 // Create a new basic block to start insertion into. 1106 BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction); 1107 C.getBuilder().SetInsertPoint(BB); 1108 1109 // Add all arguments to the symbol table and create their allocas. 1110 Proto->CreateArgumentAllocas(TheFunction, C); 1111 1112 if (Value *RetVal = Body->IRGen(C)) { 1113 // Finish off the function. 1114 C.getBuilder().CreateRet(RetVal); 1115 1116 // Validate the generated code, checking for consistency. 1117 verifyFunction(*TheFunction); 1118 1119 return TheFunction; 1120 } 1121 1122 // Error reading body, remove function. 1123 TheFunction->eraseFromParent(); 1124 1125 if (Proto->isBinaryOp()) 1126 BinopPrecedence.erase(Proto->getOperatorName()); 1127 return nullptr; 1128 } 1129 1130 //===----------------------------------------------------------------------===// 1131 // Top-Level parsing and JIT Driver 1132 //===----------------------------------------------------------------------===// 1133 1134 static std::unique_ptr<llvm::Module> IRGen(SessionContext &S, 1135 const FunctionAST &F) { 1136 IRGenContext C(S); 1137 auto LF = F.IRGen(C); 1138 if (!LF) 1139 return nullptr; 1140 #ifndef MINIMAL_STDERR_OUTPUT 1141 fprintf(stderr, "Read function definition:"); 1142 LF->dump(); 1143 #endif 1144 return C.takeM(); 1145 } 1146 1147 template <typename T> 1148 static std::vector<T> singletonSet(T t) { 1149 std::vector<T> Vec; 1150 Vec.push_back(std::move(t)); 1151 return Vec; 1152 } 1153 1154 class KaleidoscopeJIT { 1155 public: 1156 typedef ObjectLinkingLayer<> ObjLayerT; 1157 typedef IRCompileLayer<ObjLayerT> CompileLayerT; 1158 typedef LazyEmittingLayer<CompileLayerT> LazyEmitLayerT; 1159 1160 typedef LazyEmitLayerT::ModuleSetHandleT ModuleHandleT; 1161 1162 KaleidoscopeJIT(SessionContext &Session) 1163 : DL(Session.getTarget().createDataLayout()), 1164 CompileLayer(ObjectLayer, SimpleCompiler(Session.getTarget())), 1165 LazyEmitLayer(CompileLayer) {} 1166 1167 std::string mangle(const std::string &Name) { 1168 std::string MangledName; 1169 { 1170 raw_string_ostream MangledNameStream(MangledName); 1171 Mangler::getNameWithPrefix(MangledNameStream, Name, DL); 1172 } 1173 return MangledName; 1174 } 1175 1176 ModuleHandleT addModule(std::unique_ptr<Module> M) { 1177 // We need a memory manager to allocate memory and resolve symbols for this 1178 // new module. Create one that resolves symbols by looking back into the 1179 // JIT. 1180 auto Resolver = createLambdaResolver( 1181 [&](const std::string &Name) { 1182 if (auto Sym = findSymbol(Name)) 1183 return RuntimeDyld::SymbolInfo(Sym.getAddress(), 1184 Sym.getFlags()); 1185 return RuntimeDyld::SymbolInfo(nullptr); 1186 }, 1187 [](const std::string &S) { return nullptr; } ); 1188 1189 return LazyEmitLayer.addModuleSet(singletonSet(std::move(M)), 1190 make_unique<SectionMemoryManager>(), 1191 std::move(Resolver)); 1192 } 1193 1194 void removeModule(ModuleHandleT H) { LazyEmitLayer.removeModuleSet(H); } 1195 1196 JITSymbol findSymbol(const std::string &Name) { 1197 return LazyEmitLayer.findSymbol(Name, true); 1198 } 1199 1200 JITSymbol findUnmangledSymbol(const std::string Name) { 1201 return findSymbol(mangle(Name)); 1202 } 1203 1204 private: 1205 const DataLayout DL; 1206 ObjLayerT ObjectLayer; 1207 CompileLayerT CompileLayer; 1208 LazyEmitLayerT LazyEmitLayer; 1209 }; 1210 1211 static void HandleDefinition(SessionContext &S, KaleidoscopeJIT &J) { 1212 if (auto F = ParseDefinition()) { 1213 if (auto M = IRGen(S, *F)) { 1214 S.addPrototypeAST(llvm::make_unique<PrototypeAST>(*F->Proto)); 1215 J.addModule(std::move(M)); 1216 } 1217 } else { 1218 // Skip token for error recovery. 1219 getNextToken(); 1220 } 1221 } 1222 1223 static void HandleExtern(SessionContext &S) { 1224 if (auto P = ParseExtern()) 1225 S.addPrototypeAST(std::move(P)); 1226 else { 1227 // Skip token for error recovery. 1228 getNextToken(); 1229 } 1230 } 1231 1232 static void HandleTopLevelExpression(SessionContext &S, KaleidoscopeJIT &J) { 1233 // Evaluate a top-level expression into an anonymous function. 1234 if (auto F = ParseTopLevelExpr()) { 1235 IRGenContext C(S); 1236 if (auto ExprFunc = F->IRGen(C)) { 1237 #ifndef MINIMAL_STDERR_OUTPUT 1238 std::cerr << "Expression function:\n"; 1239 ExprFunc->dump(); 1240 #endif 1241 // Add the CodeGen'd module to the JIT. Keep a handle to it: We can remove 1242 // this module as soon as we've executed Function ExprFunc. 1243 auto H = J.addModule(C.takeM()); 1244 1245 // Get the address of the JIT'd function in memory. 1246 auto ExprSymbol = J.findUnmangledSymbol("__anon_expr"); 1247 1248 // Cast it to the right type (takes no arguments, returns a double) so we 1249 // can call it as a native function. 1250 double (*FP)() = (double (*)())(intptr_t)ExprSymbol.getAddress(); 1251 #ifdef MINIMAL_STDERR_OUTPUT 1252 FP(); 1253 #else 1254 std::cerr << "Evaluated to " << FP() << "\n"; 1255 #endif 1256 1257 // Remove the function. 1258 J.removeModule(H); 1259 } 1260 } else { 1261 // Skip token for error recovery. 1262 getNextToken(); 1263 } 1264 } 1265 1266 /// top ::= definition | external | expression | ';' 1267 static void MainLoop() { 1268 SessionContext S(getGlobalContext()); 1269 KaleidoscopeJIT J(S); 1270 1271 while (1) { 1272 switch (CurTok) { 1273 case tok_eof: return; 1274 case ';': getNextToken(); continue; // ignore top-level semicolons. 1275 case tok_def: HandleDefinition(S, J); break; 1276 case tok_extern: HandleExtern(S); break; 1277 default: HandleTopLevelExpression(S, J); break; 1278 } 1279 #ifndef MINIMAL_STDERR_OUTPUT 1280 std::cerr << "ready> "; 1281 #endif 1282 } 1283 } 1284 1285 //===----------------------------------------------------------------------===// 1286 // "Library" functions that can be "extern'd" from user code. 1287 //===----------------------------------------------------------------------===// 1288 1289 /// putchard - putchar that takes a double and returns 0. 1290 extern "C" 1291 double putchard(double X) { 1292 putchar((char)X); 1293 return 0; 1294 } 1295 1296 /// printd - printf that takes a double prints it as "%f\n", returning 0. 1297 extern "C" 1298 double printd(double X) { 1299 printf("%f", X); 1300 return 0; 1301 } 1302 1303 extern "C" 1304 double printlf() { 1305 printf("\n"); 1306 return 0; 1307 } 1308 1309 //===----------------------------------------------------------------------===// 1310 // Main driver code. 1311 //===----------------------------------------------------------------------===// 1312 1313 int main() { 1314 InitializeNativeTarget(); 1315 InitializeNativeTargetAsmPrinter(); 1316 InitializeNativeTargetAsmParser(); 1317 1318 // Install standard binary operators. 1319 // 1 is lowest precedence. 1320 BinopPrecedence['='] = 2; 1321 BinopPrecedence['<'] = 10; 1322 BinopPrecedence['+'] = 20; 1323 BinopPrecedence['-'] = 20; 1324 BinopPrecedence['/'] = 40; 1325 BinopPrecedence['*'] = 40; // highest. 1326 1327 // Prime the first token. 1328 #ifndef MINIMAL_STDERR_OUTPUT 1329 std::cerr << "ready> "; 1330 #endif 1331 getNextToken(); 1332 1333 std::cerr << std::fixed; 1334 1335 // Run the main "interpreter loop" now. 1336 MainLoop(); 1337 1338 return 0; 1339 } 1340