1 #define MINIMAL_STDERR_OUTPUT 2 3 #include "llvm/Analysis/Passes.h" 4 #include "llvm/Analysis/Verifier.h" 5 #include "llvm/ExecutionEngine/ExecutionEngine.h" 6 #include "llvm/ExecutionEngine/JIT.h" 7 #include "llvm/IR/DataLayout.h" 8 #include "llvm/IR/DerivedTypes.h" 9 #include "llvm/IR/IRBuilder.h" 10 #include "llvm/IR/LLVMContext.h" 11 #include "llvm/IR/Module.h" 12 #include "llvm/IRReader/IRReader.h" 13 #include "llvm/PassManager.h" 14 #include "llvm/Support/CommandLine.h" 15 #include "llvm/Support/raw_ostream.h" 16 #include "llvm/Support/SourceMgr.h" 17 #include "llvm/Support/TargetSelect.h" 18 #include "llvm/Transforms/Scalar.h" 19 #include <cstdio> 20 #include <map> 21 #include <string> 22 #include <vector> 23 24 using namespace llvm; 25 26 //===----------------------------------------------------------------------===// 27 // Command-line options 28 //===----------------------------------------------------------------------===// 29 30 namespace { 31 cl::opt<std::string> 32 InputIR("input-IR", 33 cl::desc("Specify the name of an IR file to load for function definitions"), 34 cl::value_desc("input IR file name")); 35 } // namespace 36 37 //===----------------------------------------------------------------------===// 38 // Lexer 39 //===----------------------------------------------------------------------===// 40 41 // The lexer returns tokens [0-255] if it is an unknown character, otherwise one 42 // of these for known things. 43 enum Token { 44 tok_eof = -1, 45 46 // commands 47 tok_def = -2, tok_extern = -3, 48 49 // primary 50 tok_identifier = -4, tok_number = -5, 51 52 // control 53 tok_if = -6, tok_then = -7, tok_else = -8, 54 tok_for = -9, tok_in = -10, 55 56 // operators 57 tok_binary = -11, tok_unary = -12, 58 59 // var definition 60 tok_var = -13 61 }; 62 63 static std::string IdentifierStr; // Filled in if tok_identifier 64 static double NumVal; // Filled in if tok_number 65 66 /// gettok - Return the next token from standard input. 67 static int gettok() { 68 static int LastChar = ' '; 69 70 // Skip any whitespace. 71 while (isspace(LastChar)) 72 LastChar = getchar(); 73 74 if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* 75 IdentifierStr = LastChar; 76 while (isalnum((LastChar = getchar()))) 77 IdentifierStr += LastChar; 78 79 if (IdentifierStr == "def") return tok_def; 80 if (IdentifierStr == "extern") return tok_extern; 81 if (IdentifierStr == "if") return tok_if; 82 if (IdentifierStr == "then") return tok_then; 83 if (IdentifierStr == "else") return tok_else; 84 if (IdentifierStr == "for") return tok_for; 85 if (IdentifierStr == "in") return tok_in; 86 if (IdentifierStr == "binary") return tok_binary; 87 if (IdentifierStr == "unary") return tok_unary; 88 if (IdentifierStr == "var") return tok_var; 89 return tok_identifier; 90 } 91 92 if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ 93 std::string NumStr; 94 do { 95 NumStr += LastChar; 96 LastChar = getchar(); 97 } while (isdigit(LastChar) || LastChar == '.'); 98 99 NumVal = strtod(NumStr.c_str(), 0); 100 return tok_number; 101 } 102 103 if (LastChar == '#') { 104 // Comment until end of line. 105 do LastChar = getchar(); 106 while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); 107 108 if (LastChar != EOF) 109 return gettok(); 110 } 111 112 // Check for end of file. Don't eat the EOF. 113 if (LastChar == EOF) 114 return tok_eof; 115 116 // Otherwise, just return the character as its ascii value. 117 int ThisChar = LastChar; 118 LastChar = getchar(); 119 return ThisChar; 120 } 121 122 //===----------------------------------------------------------------------===// 123 // Abstract Syntax Tree (aka Parse Tree) 124 //===----------------------------------------------------------------------===// 125 126 /// ExprAST - Base class for all expression nodes. 127 class ExprAST { 128 public: 129 virtual ~ExprAST() {} 130 virtual Value *Codegen() = 0; 131 }; 132 133 /// NumberExprAST - Expression class for numeric literals like "1.0". 134 class NumberExprAST : public ExprAST { 135 double Val; 136 public: 137 NumberExprAST(double val) : Val(val) {} 138 virtual Value *Codegen(); 139 }; 140 141 /// VariableExprAST - Expression class for referencing a variable, like "a". 142 class VariableExprAST : public ExprAST { 143 std::string Name; 144 public: 145 VariableExprAST(const std::string &name) : Name(name) {} 146 const std::string &getName() const { return Name; } 147 virtual Value *Codegen(); 148 }; 149 150 /// UnaryExprAST - Expression class for a unary operator. 151 class UnaryExprAST : public ExprAST { 152 char Opcode; 153 ExprAST *Operand; 154 public: 155 UnaryExprAST(char opcode, ExprAST *operand) 156 : Opcode(opcode), Operand(operand) {} 157 virtual Value *Codegen(); 158 }; 159 160 /// BinaryExprAST - Expression class for a binary operator. 161 class BinaryExprAST : public ExprAST { 162 char Op; 163 ExprAST *LHS, *RHS; 164 public: 165 BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) 166 : Op(op), LHS(lhs), RHS(rhs) {} 167 virtual Value *Codegen(); 168 }; 169 170 /// CallExprAST - Expression class for function calls. 171 class CallExprAST : public ExprAST { 172 std::string Callee; 173 std::vector<ExprAST*> Args; 174 public: 175 CallExprAST(const std::string &callee, std::vector<ExprAST*> &args) 176 : Callee(callee), Args(args) {} 177 virtual Value *Codegen(); 178 }; 179 180 /// IfExprAST - Expression class for if/then/else. 181 class IfExprAST : public ExprAST { 182 ExprAST *Cond, *Then, *Else; 183 public: 184 IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else) 185 : Cond(cond), Then(then), Else(_else) {} 186 virtual Value *Codegen(); 187 }; 188 189 /// ForExprAST - Expression class for for/in. 190 class ForExprAST : public ExprAST { 191 std::string VarName; 192 ExprAST *Start, *End, *Step, *Body; 193 public: 194 ForExprAST(const std::string &varname, ExprAST *start, ExprAST *end, 195 ExprAST *step, ExprAST *body) 196 : VarName(varname), Start(start), End(end), Step(step), Body(body) {} 197 virtual Value *Codegen(); 198 }; 199 200 /// VarExprAST - Expression class for var/in 201 class VarExprAST : public ExprAST { 202 std::vector<std::pair<std::string, ExprAST*> > VarNames; 203 ExprAST *Body; 204 public: 205 VarExprAST(const std::vector<std::pair<std::string, ExprAST*> > &varnames, 206 ExprAST *body) 207 : VarNames(varnames), Body(body) {} 208 209 virtual Value *Codegen(); 210 }; 211 212 /// PrototypeAST - This class represents the "prototype" for a function, 213 /// which captures its argument names as well as if it is an operator. 214 class PrototypeAST { 215 std::string Name; 216 std::vector<std::string> Args; 217 bool isOperator; 218 unsigned Precedence; // Precedence if a binary op. 219 public: 220 PrototypeAST(const std::string &name, const std::vector<std::string> &args, 221 bool isoperator = false, unsigned prec = 0) 222 : Name(name), Args(args), isOperator(isoperator), Precedence(prec) {} 223 224 bool isUnaryOp() const { return isOperator && Args.size() == 1; } 225 bool isBinaryOp() const { return isOperator && Args.size() == 2; } 226 227 char getOperatorName() const { 228 assert(isUnaryOp() || isBinaryOp()); 229 return Name[Name.size()-1]; 230 } 231 232 unsigned getBinaryPrecedence() const { return Precedence; } 233 234 Function *Codegen(); 235 236 void CreateArgumentAllocas(Function *F); 237 }; 238 239 /// FunctionAST - This class represents a function definition itself. 240 class FunctionAST { 241 PrototypeAST *Proto; 242 ExprAST *Body; 243 public: 244 FunctionAST(PrototypeAST *proto, ExprAST *body) 245 : Proto(proto), Body(body) {} 246 247 Function *Codegen(); 248 }; 249 250 //===----------------------------------------------------------------------===// 251 // Parser 252 //===----------------------------------------------------------------------===// 253 254 /// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current 255 /// token the parser is looking at. getNextToken reads another token from the 256 /// lexer and updates CurTok with its results. 257 static int CurTok; 258 static int getNextToken() { 259 return CurTok = gettok(); 260 } 261 262 /// BinopPrecedence - This holds the precedence for each binary operator that is 263 /// defined. 264 static std::map<char, int> BinopPrecedence; 265 266 /// GetTokPrecedence - Get the precedence of the pending binary operator token. 267 static int GetTokPrecedence() { 268 if (!isascii(CurTok)) 269 return -1; 270 271 // Make sure it's a declared binop. 272 int TokPrec = BinopPrecedence[CurTok]; 273 if (TokPrec <= 0) return -1; 274 return TokPrec; 275 } 276 277 /// Error* - These are little helper functions for error handling. 278 ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;} 279 PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; } 280 FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; } 281 282 static ExprAST *ParseExpression(); 283 284 /// identifierexpr 285 /// ::= identifier 286 /// ::= identifier '(' expression* ')' 287 static ExprAST *ParseIdentifierExpr() { 288 std::string IdName = IdentifierStr; 289 290 getNextToken(); // eat identifier. 291 292 if (CurTok != '(') // Simple variable ref. 293 return new VariableExprAST(IdName); 294 295 // Call. 296 getNextToken(); // eat ( 297 std::vector<ExprAST*> Args; 298 if (CurTok != ')') { 299 while (1) { 300 ExprAST *Arg = ParseExpression(); 301 if (!Arg) return 0; 302 Args.push_back(Arg); 303 304 if (CurTok == ')') break; 305 306 if (CurTok != ',') 307 return Error("Expected ')' or ',' in argument list"); 308 getNextToken(); 309 } 310 } 311 312 // Eat the ')'. 313 getNextToken(); 314 315 return new CallExprAST(IdName, Args); 316 } 317 318 /// numberexpr ::= number 319 static ExprAST *ParseNumberExpr() { 320 ExprAST *Result = new NumberExprAST(NumVal); 321 getNextToken(); // consume the number 322 return Result; 323 } 324 325 /// parenexpr ::= '(' expression ')' 326 static ExprAST *ParseParenExpr() { 327 getNextToken(); // eat (. 328 ExprAST *V = ParseExpression(); 329 if (!V) return 0; 330 331 if (CurTok != ')') 332 return Error("expected ')'"); 333 getNextToken(); // eat ). 334 return V; 335 } 336 337 /// ifexpr ::= 'if' expression 'then' expression 'else' expression 338 static ExprAST *ParseIfExpr() { 339 getNextToken(); // eat the if. 340 341 // condition. 342 ExprAST *Cond = ParseExpression(); 343 if (!Cond) return 0; 344 345 if (CurTok != tok_then) 346 return Error("expected then"); 347 getNextToken(); // eat the then 348 349 ExprAST *Then = ParseExpression(); 350 if (Then == 0) return 0; 351 352 if (CurTok != tok_else) 353 return Error("expected else"); 354 355 getNextToken(); 356 357 ExprAST *Else = ParseExpression(); 358 if (!Else) return 0; 359 360 return new IfExprAST(Cond, Then, Else); 361 } 362 363 /// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression 364 static ExprAST *ParseForExpr() { 365 getNextToken(); // eat the for. 366 367 if (CurTok != tok_identifier) 368 return Error("expected identifier after for"); 369 370 std::string IdName = IdentifierStr; 371 getNextToken(); // eat identifier. 372 373 if (CurTok != '=') 374 return Error("expected '=' after for"); 375 getNextToken(); // eat '='. 376 377 378 ExprAST *Start = ParseExpression(); 379 if (Start == 0) return 0; 380 if (CurTok != ',') 381 return Error("expected ',' after for start value"); 382 getNextToken(); 383 384 ExprAST *End = ParseExpression(); 385 if (End == 0) return 0; 386 387 // The step value is optional. 388 ExprAST *Step = 0; 389 if (CurTok == ',') { 390 getNextToken(); 391 Step = ParseExpression(); 392 if (Step == 0) return 0; 393 } 394 395 if (CurTok != tok_in) 396 return Error("expected 'in' after for"); 397 getNextToken(); // eat 'in'. 398 399 ExprAST *Body = ParseExpression(); 400 if (Body == 0) return 0; 401 402 return new ForExprAST(IdName, Start, End, Step, Body); 403 } 404 405 /// varexpr ::= 'var' identifier ('=' expression)? 406 // (',' identifier ('=' expression)?)* 'in' expression 407 static ExprAST *ParseVarExpr() { 408 getNextToken(); // eat the var. 409 410 std::vector<std::pair<std::string, ExprAST*> > VarNames; 411 412 // At least one variable name is required. 413 if (CurTok != tok_identifier) 414 return Error("expected identifier after var"); 415 416 while (1) { 417 std::string Name = IdentifierStr; 418 getNextToken(); // eat identifier. 419 420 // Read the optional initializer. 421 ExprAST *Init = 0; 422 if (CurTok == '=') { 423 getNextToken(); // eat the '='. 424 425 Init = ParseExpression(); 426 if (Init == 0) return 0; 427 } 428 429 VarNames.push_back(std::make_pair(Name, Init)); 430 431 // End of var list, exit loop. 432 if (CurTok != ',') break; 433 getNextToken(); // eat the ','. 434 435 if (CurTok != tok_identifier) 436 return Error("expected identifier list after var"); 437 } 438 439 // At this point, we have to have 'in'. 440 if (CurTok != tok_in) 441 return Error("expected 'in' keyword after 'var'"); 442 getNextToken(); // eat 'in'. 443 444 ExprAST *Body = ParseExpression(); 445 if (Body == 0) return 0; 446 447 return new VarExprAST(VarNames, Body); 448 } 449 450 /// primary 451 /// ::= identifierexpr 452 /// ::= numberexpr 453 /// ::= parenexpr 454 /// ::= ifexpr 455 /// ::= forexpr 456 /// ::= varexpr 457 static ExprAST *ParsePrimary() { 458 switch (CurTok) { 459 default: return Error("unknown token when expecting an expression"); 460 case tok_identifier: return ParseIdentifierExpr(); 461 case tok_number: return ParseNumberExpr(); 462 case '(': return ParseParenExpr(); 463 case tok_if: return ParseIfExpr(); 464 case tok_for: return ParseForExpr(); 465 case tok_var: return ParseVarExpr(); 466 } 467 } 468 469 /// unary 470 /// ::= primary 471 /// ::= '!' unary 472 static ExprAST *ParseUnary() { 473 // If the current token is not an operator, it must be a primary expr. 474 if (!isascii(CurTok) || CurTok == '(' || CurTok == ',') 475 return ParsePrimary(); 476 477 // If this is a unary operator, read it. 478 int Opc = CurTok; 479 getNextToken(); 480 if (ExprAST *Operand = ParseUnary()) 481 return new UnaryExprAST(Opc, Operand); 482 return 0; 483 } 484 485 /// binoprhs 486 /// ::= ('+' unary)* 487 static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) { 488 // If this is a binop, find its precedence. 489 while (1) { 490 int TokPrec = GetTokPrecedence(); 491 492 // If this is a binop that binds at least as tightly as the current binop, 493 // consume it, otherwise we are done. 494 if (TokPrec < ExprPrec) 495 return LHS; 496 497 // Okay, we know this is a binop. 498 int BinOp = CurTok; 499 getNextToken(); // eat binop 500 501 // Parse the unary expression after the binary operator. 502 ExprAST *RHS = ParseUnary(); 503 if (!RHS) return 0; 504 505 // If BinOp binds less tightly with RHS than the operator after RHS, let 506 // the pending operator take RHS as its LHS. 507 int NextPrec = GetTokPrecedence(); 508 if (TokPrec < NextPrec) { 509 RHS = ParseBinOpRHS(TokPrec+1, RHS); 510 if (RHS == 0) return 0; 511 } 512 513 // Merge LHS/RHS. 514 LHS = new BinaryExprAST(BinOp, LHS, RHS); 515 } 516 } 517 518 /// expression 519 /// ::= unary binoprhs 520 /// 521 static ExprAST *ParseExpression() { 522 ExprAST *LHS = ParseUnary(); 523 if (!LHS) return 0; 524 525 return ParseBinOpRHS(0, LHS); 526 } 527 528 /// prototype 529 /// ::= id '(' id* ')' 530 /// ::= binary LETTER number? (id, id) 531 /// ::= unary LETTER (id) 532 static PrototypeAST *ParsePrototype() { 533 std::string FnName; 534 535 unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary. 536 unsigned BinaryPrecedence = 30; 537 538 switch (CurTok) { 539 default: 540 return ErrorP("Expected function name in prototype"); 541 case tok_identifier: 542 FnName = IdentifierStr; 543 Kind = 0; 544 getNextToken(); 545 break; 546 case tok_unary: 547 getNextToken(); 548 if (!isascii(CurTok)) 549 return ErrorP("Expected unary operator"); 550 FnName = "unary"; 551 FnName += (char)CurTok; 552 Kind = 1; 553 getNextToken(); 554 break; 555 case tok_binary: 556 getNextToken(); 557 if (!isascii(CurTok)) 558 return ErrorP("Expected binary operator"); 559 FnName = "binary"; 560 FnName += (char)CurTok; 561 Kind = 2; 562 getNextToken(); 563 564 // Read the precedence if present. 565 if (CurTok == tok_number) { 566 if (NumVal < 1 || NumVal > 100) 567 return ErrorP("Invalid precedecnce: must be 1..100"); 568 BinaryPrecedence = (unsigned)NumVal; 569 getNextToken(); 570 } 571 break; 572 } 573 574 if (CurTok != '(') 575 return ErrorP("Expected '(' in prototype"); 576 577 std::vector<std::string> ArgNames; 578 while (getNextToken() == tok_identifier) 579 ArgNames.push_back(IdentifierStr); 580 if (CurTok != ')') 581 return ErrorP("Expected ')' in prototype"); 582 583 // success. 584 getNextToken(); // eat ')'. 585 586 // Verify right number of names for operator. 587 if (Kind && ArgNames.size() != Kind) 588 return ErrorP("Invalid number of operands for operator"); 589 590 return new PrototypeAST(FnName, ArgNames, Kind != 0, BinaryPrecedence); 591 } 592 593 /// definition ::= 'def' prototype expression 594 static FunctionAST *ParseDefinition() { 595 getNextToken(); // eat def. 596 PrototypeAST *Proto = ParsePrototype(); 597 if (Proto == 0) return 0; 598 599 if (ExprAST *E = ParseExpression()) 600 return new FunctionAST(Proto, E); 601 return 0; 602 } 603 604 /// toplevelexpr ::= expression 605 static FunctionAST *ParseTopLevelExpr() { 606 if (ExprAST *E = ParseExpression()) { 607 // Make an anonymous proto. 608 PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>()); 609 return new FunctionAST(Proto, E); 610 } 611 return 0; 612 } 613 614 /// external ::= 'extern' prototype 615 static PrototypeAST *ParseExtern() { 616 getNextToken(); // eat extern. 617 return ParsePrototype(); 618 } 619 620 //===----------------------------------------------------------------------===// 621 // Code Generation 622 //===----------------------------------------------------------------------===// 623 624 static Module *TheModule; 625 static FunctionPassManager *TheFPM; 626 static IRBuilder<> Builder(getGlobalContext()); 627 static std::map<std::string, AllocaInst*> NamedValues; 628 629 Value *ErrorV(const char *Str) { Error(Str); return 0; } 630 631 /// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of 632 /// the function. This is used for mutable variables etc. 633 static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction, 634 const std::string &VarName) { 635 IRBuilder<> TmpB(&TheFunction->getEntryBlock(), 636 TheFunction->getEntryBlock().begin()); 637 return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), 0, 638 VarName.c_str()); 639 } 640 641 Value *NumberExprAST::Codegen() { 642 return ConstantFP::get(getGlobalContext(), APFloat(Val)); 643 } 644 645 Value *VariableExprAST::Codegen() { 646 // Look this variable up in the function. 647 Value *V = NamedValues[Name]; 648 if (V == 0) return ErrorV("Unknown variable name"); 649 650 // Load the value. 651 return Builder.CreateLoad(V, Name.c_str()); 652 } 653 654 Value *UnaryExprAST::Codegen() { 655 Value *OperandV = Operand->Codegen(); 656 if (OperandV == 0) return 0; 657 #ifdef USE_MCJIT 658 Function *F = TheHelper->getFunction(MakeLegalFunctionName(std::string("unary")+Opcode)); 659 #else 660 Function *F = TheModule->getFunction(std::string("unary")+Opcode); 661 #endif 662 if (F == 0) 663 return ErrorV("Unknown unary operator"); 664 665 return Builder.CreateCall(F, OperandV, "unop"); 666 } 667 668 Value *BinaryExprAST::Codegen() { 669 // Special case '=' because we don't want to emit the LHS as an expression. 670 if (Op == '=') { 671 // Assignment requires the LHS to be an identifier. 672 // For now, I'm building without RTTI because LLVM builds that way by 673 // default and so we need to build that way to use the command line supprt. 674 // If you build LLVM with RTTI this can be changed back to a dynamic_cast. 675 VariableExprAST *LHSE = reinterpret_cast<VariableExprAST*>(LHS); 676 if (!LHSE) 677 return ErrorV("destination of '=' must be a variable"); 678 // Codegen the RHS. 679 Value *Val = RHS->Codegen(); 680 if (Val == 0) return 0; 681 682 // Look up the name. 683 Value *Variable = NamedValues[LHSE->getName()]; 684 if (Variable == 0) return ErrorV("Unknown variable name"); 685 686 Builder.CreateStore(Val, Variable); 687 return Val; 688 } 689 690 Value *L = LHS->Codegen(); 691 Value *R = RHS->Codegen(); 692 if (L == 0 || R == 0) return 0; 693 694 switch (Op) { 695 case '+': return Builder.CreateFAdd(L, R, "addtmp"); 696 case '-': return Builder.CreateFSub(L, R, "subtmp"); 697 case '*': return Builder.CreateFMul(L, R, "multmp"); 698 case '/': return Builder.CreateFDiv(L, R, "divtmp"); 699 case '<': 700 L = Builder.CreateFCmpULT(L, R, "cmptmp"); 701 // Convert bool 0/1 to double 0.0 or 1.0 702 return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()), 703 "booltmp"); 704 default: break; 705 } 706 707 // If it wasn't a builtin binary operator, it must be a user defined one. Emit 708 // a call to it. 709 Function *F = TheModule->getFunction(std::string("binary")+Op); 710 assert(F && "binary operator not found!"); 711 712 Value *Ops[] = { L, R }; 713 return Builder.CreateCall(F, Ops, "binop"); 714 } 715 716 Value *CallExprAST::Codegen() { 717 // Look up the name in the global module table. 718 Function *CalleeF = TheModule->getFunction(Callee); 719 if (CalleeF == 0) { 720 char error_str[64]; 721 sprintf(error_str, "Unknown function referenced %s", Callee.c_str()); 722 return ErrorV(error_str); 723 } 724 725 // If argument mismatch error. 726 if (CalleeF->arg_size() != Args.size()) 727 return ErrorV("Incorrect # arguments passed"); 728 729 std::vector<Value*> ArgsV; 730 for (unsigned i = 0, e = Args.size(); i != e; ++i) { 731 ArgsV.push_back(Args[i]->Codegen()); 732 if (ArgsV.back() == 0) return 0; 733 } 734 735 return Builder.CreateCall(CalleeF, ArgsV, "calltmp"); 736 } 737 738 Value *IfExprAST::Codegen() { 739 Value *CondV = Cond->Codegen(); 740 if (CondV == 0) return 0; 741 742 // Convert condition to a bool by comparing equal to 0.0. 743 CondV = Builder.CreateFCmpONE(CondV, 744 ConstantFP::get(getGlobalContext(), APFloat(0.0)), 745 "ifcond"); 746 747 Function *TheFunction = Builder.GetInsertBlock()->getParent(); 748 749 // Create blocks for the then and else cases. Insert the 'then' block at the 750 // end of the function. 751 BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction); 752 BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else"); 753 BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont"); 754 755 Builder.CreateCondBr(CondV, ThenBB, ElseBB); 756 757 // Emit then value. 758 Builder.SetInsertPoint(ThenBB); 759 760 Value *ThenV = Then->Codegen(); 761 if (ThenV == 0) return 0; 762 763 Builder.CreateBr(MergeBB); 764 // Codegen of 'Then' can change the current block, update ThenBB for the PHI. 765 ThenBB = Builder.GetInsertBlock(); 766 767 // Emit else block. 768 TheFunction->getBasicBlockList().push_back(ElseBB); 769 Builder.SetInsertPoint(ElseBB); 770 771 Value *ElseV = Else->Codegen(); 772 if (ElseV == 0) return 0; 773 774 Builder.CreateBr(MergeBB); 775 // Codegen of 'Else' can change the current block, update ElseBB for the PHI. 776 ElseBB = Builder.GetInsertBlock(); 777 778 // Emit merge block. 779 TheFunction->getBasicBlockList().push_back(MergeBB); 780 Builder.SetInsertPoint(MergeBB); 781 PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, 782 "iftmp"); 783 784 PN->addIncoming(ThenV, ThenBB); 785 PN->addIncoming(ElseV, ElseBB); 786 return PN; 787 } 788 789 Value *ForExprAST::Codegen() { 790 // Output this as: 791 // var = alloca double 792 // ... 793 // start = startexpr 794 // store start -> var 795 // goto loop 796 // loop: 797 // ... 798 // bodyexpr 799 // ... 800 // loopend: 801 // step = stepexpr 802 // endcond = endexpr 803 // 804 // curvar = load var 805 // nextvar = curvar + step 806 // store nextvar -> var 807 // br endcond, loop, endloop 808 // outloop: 809 810 Function *TheFunction = Builder.GetInsertBlock()->getParent(); 811 812 // Create an alloca for the variable in the entry block. 813 AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName); 814 815 // Emit the start code first, without 'variable' in scope. 816 Value *StartVal = Start->Codegen(); 817 if (StartVal == 0) return 0; 818 819 // Store the value into the alloca. 820 Builder.CreateStore(StartVal, Alloca); 821 822 // Make the new basic block for the loop header, inserting after current 823 // block. 824 BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction); 825 826 // Insert an explicit fall through from the current block to the LoopBB. 827 Builder.CreateBr(LoopBB); 828 829 // Start insertion in LoopBB. 830 Builder.SetInsertPoint(LoopBB); 831 832 // Within the loop, the variable is defined equal to the PHI node. If it 833 // shadows an existing variable, we have to restore it, so save it now. 834 AllocaInst *OldVal = NamedValues[VarName]; 835 NamedValues[VarName] = Alloca; 836 837 // Emit the body of the loop. This, like any other expr, can change the 838 // current BB. Note that we ignore the value computed by the body, but don't 839 // allow an error. 840 if (Body->Codegen() == 0) 841 return 0; 842 843 // Emit the step value. 844 Value *StepVal; 845 if (Step) { 846 StepVal = Step->Codegen(); 847 if (StepVal == 0) return 0; 848 } else { 849 // If not specified, use 1.0. 850 StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0)); 851 } 852 853 // Compute the end condition. 854 Value *EndCond = End->Codegen(); 855 if (EndCond == 0) return EndCond; 856 857 // Reload, increment, and restore the alloca. This handles the case where 858 // the body of the loop mutates the variable. 859 Value *CurVar = Builder.CreateLoad(Alloca, VarName.c_str()); 860 Value *NextVar = Builder.CreateFAdd(CurVar, StepVal, "nextvar"); 861 Builder.CreateStore(NextVar, Alloca); 862 863 // Convert condition to a bool by comparing equal to 0.0. 864 EndCond = Builder.CreateFCmpONE(EndCond, 865 ConstantFP::get(getGlobalContext(), APFloat(0.0)), 866 "loopcond"); 867 868 // Create the "after loop" block and insert it. 869 BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction); 870 871 // Insert the conditional branch into the end of LoopEndBB. 872 Builder.CreateCondBr(EndCond, LoopBB, AfterBB); 873 874 // Any new code will be inserted in AfterBB. 875 Builder.SetInsertPoint(AfterBB); 876 877 // Restore the unshadowed variable. 878 if (OldVal) 879 NamedValues[VarName] = OldVal; 880 else 881 NamedValues.erase(VarName); 882 883 884 // for expr always returns 0.0. 885 return Constant::getNullValue(Type::getDoubleTy(getGlobalContext())); 886 } 887 888 Value *VarExprAST::Codegen() { 889 std::vector<AllocaInst *> OldBindings; 890 891 Function *TheFunction = Builder.GetInsertBlock()->getParent(); 892 893 // Register all variables and emit their initializer. 894 for (unsigned i = 0, e = VarNames.size(); i != e; ++i) { 895 const std::string &VarName = VarNames[i].first; 896 ExprAST *Init = VarNames[i].second; 897 898 // Emit the initializer before adding the variable to scope, this prevents 899 // the initializer from referencing the variable itself, and permits stuff 900 // like this: 901 // var a = 1 in 902 // var a = a in ... # refers to outer 'a'. 903 Value *InitVal; 904 if (Init) { 905 InitVal = Init->Codegen(); 906 if (InitVal == 0) return 0; 907 } else { // If not specified, use 0.0. 908 InitVal = ConstantFP::get(getGlobalContext(), APFloat(0.0)); 909 } 910 911 AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName); 912 Builder.CreateStore(InitVal, Alloca); 913 914 // Remember the old variable binding so that we can restore the binding when 915 // we unrecurse. 916 OldBindings.push_back(NamedValues[VarName]); 917 918 // Remember this binding. 919 NamedValues[VarName] = Alloca; 920 } 921 922 // Codegen the body, now that all vars are in scope. 923 Value *BodyVal = Body->Codegen(); 924 if (BodyVal == 0) return 0; 925 926 // Pop all our variables from scope. 927 for (unsigned i = 0, e = VarNames.size(); i != e; ++i) 928 NamedValues[VarNames[i].first] = OldBindings[i]; 929 930 // Return the body computation. 931 return BodyVal; 932 } 933 934 Function *PrototypeAST::Codegen() { 935 // Make the function type: double(double,double) etc. 936 std::vector<Type*> Doubles(Args.size(), 937 Type::getDoubleTy(getGlobalContext())); 938 FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()), 939 Doubles, false); 940 941 Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule); 942 // If F conflicted, there was already something named 'Name'. If it has a 943 // body, don't allow redefinition or reextern. 944 if (F->getName() != Name) { 945 // Delete the one we just made and get the existing one. 946 F->eraseFromParent(); 947 F = TheModule->getFunction(Name); 948 // If F already has a body, reject this. 949 if (!F->empty()) { 950 ErrorF("redefinition of function"); 951 return 0; 952 } 953 // If F took a different number of args, reject. 954 if (F->arg_size() != Args.size()) { 955 ErrorF("redefinition of function with different # args"); 956 return 0; 957 } 958 } 959 960 // Set names for all arguments. 961 unsigned Idx = 0; 962 for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size(); 963 ++AI, ++Idx) 964 AI->setName(Args[Idx]); 965 966 return F; 967 } 968 969 /// CreateArgumentAllocas - Create an alloca for each argument and register the 970 /// argument in the symbol table so that references to it will succeed. 971 void PrototypeAST::CreateArgumentAllocas(Function *F) { 972 Function::arg_iterator AI = F->arg_begin(); 973 for (unsigned Idx = 0, e = Args.size(); Idx != e; ++Idx, ++AI) { 974 // Create an alloca for this variable. 975 AllocaInst *Alloca = CreateEntryBlockAlloca(F, Args[Idx]); 976 977 // Store the initial value into the alloca. 978 Builder.CreateStore(AI, Alloca); 979 980 // Add arguments to variable symbol table. 981 NamedValues[Args[Idx]] = Alloca; 982 } 983 } 984 985 Function *FunctionAST::Codegen() { 986 NamedValues.clear(); 987 988 Function *TheFunction = Proto->Codegen(); 989 if (TheFunction == 0) 990 return 0; 991 992 // If this is an operator, install it. 993 if (Proto->isBinaryOp()) 994 BinopPrecedence[Proto->getOperatorName()] = Proto->getBinaryPrecedence(); 995 996 // Create a new basic block to start insertion into. 997 BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction); 998 Builder.SetInsertPoint(BB); 999 1000 // Add all arguments to the symbol table and create their allocas. 1001 Proto->CreateArgumentAllocas(TheFunction); 1002 1003 if (Value *RetVal = Body->Codegen()) { 1004 // Finish off the function. 1005 Builder.CreateRet(RetVal); 1006 1007 // Validate the generated code, checking for consistency. 1008 verifyFunction(*TheFunction); 1009 1010 // Optimize the function. 1011 TheFPM->run(*TheFunction); 1012 1013 return TheFunction; 1014 } 1015 1016 // Error reading body, remove function. 1017 TheFunction->eraseFromParent(); 1018 1019 if (Proto->isBinaryOp()) 1020 BinopPrecedence.erase(Proto->getOperatorName()); 1021 return 0; 1022 } 1023 1024 //===----------------------------------------------------------------------===// 1025 // Top-Level parsing and JIT Driver 1026 //===----------------------------------------------------------------------===// 1027 1028 static ExecutionEngine *TheExecutionEngine; 1029 1030 static void HandleDefinition() { 1031 if (FunctionAST *F = ParseDefinition()) { 1032 if (Function *LF = F->Codegen()) { 1033 #ifndef MINIMAL_STDERR_OUTPUT 1034 fprintf(stderr, "Read function definition:"); 1035 LF->dump(); 1036 #endif 1037 } 1038 } else { 1039 // Skip token for error recovery. 1040 getNextToken(); 1041 } 1042 } 1043 1044 static void HandleExtern() { 1045 if (PrototypeAST *P = ParseExtern()) { 1046 if (Function *F = P->Codegen()) { 1047 #ifndef MINIMAL_STDERR_OUTPUT 1048 fprintf(stderr, "Read extern: "); 1049 F->dump(); 1050 #endif 1051 } 1052 } else { 1053 // Skip token for error recovery. 1054 getNextToken(); 1055 } 1056 } 1057 1058 static void HandleTopLevelExpression() { 1059 // Evaluate a top-level expression into an anonymous function. 1060 if (FunctionAST *F = ParseTopLevelExpr()) { 1061 if (Function *LF = F->Codegen()) { 1062 // JIT the function, returning a function pointer. 1063 void *FPtr = TheExecutionEngine->getPointerToFunction(LF); 1064 // Cast it to the right type (takes no arguments, returns a double) so we 1065 // can call it as a native function. 1066 double (*FP)() = (double (*)())(intptr_t)FPtr; 1067 #ifdef MINIMAL_STDERR_OUTPUT 1068 FP(); 1069 #else 1070 fprintf(stderr, "Evaluated to %f\n", FP()); 1071 #endif 1072 } 1073 } else { 1074 // Skip token for error recovery. 1075 getNextToken(); 1076 } 1077 } 1078 1079 /// top ::= definition | external | expression | ';' 1080 static void MainLoop() { 1081 while (1) { 1082 #ifndef MINIMAL_STDERR_OUTPUT 1083 fprintf(stderr, "ready> "); 1084 #endif 1085 switch (CurTok) { 1086 case tok_eof: return; 1087 case ';': getNextToken(); break; // ignore top-level semicolons. 1088 case tok_def: HandleDefinition(); break; 1089 case tok_extern: HandleExtern(); break; 1090 default: HandleTopLevelExpression(); break; 1091 } 1092 } 1093 } 1094 1095 //===----------------------------------------------------------------------===// 1096 // "Library" functions that can be "extern'd" from user code. 1097 //===----------------------------------------------------------------------===// 1098 1099 /// putchard - putchar that takes a double and returns 0. 1100 extern "C" 1101 double putchard(double X) { 1102 putchar((char)X); 1103 return 0; 1104 } 1105 1106 /// printd - printf that takes a double prints it as "%f\n", returning 0. 1107 extern "C" 1108 double printd(double X) { 1109 printf("%f", X); 1110 return 0; 1111 } 1112 1113 extern "C" 1114 double printlf() { 1115 printf("\n"); 1116 return 0; 1117 } 1118 1119 //===----------------------------------------------------------------------===// 1120 // Command line input file handlers 1121 //===----------------------------------------------------------------------===// 1122 1123 Module* parseInputIR(std::string InputFile) { 1124 SMDiagnostic Err; 1125 Module *M = ParseIRFile(InputFile, Err, getGlobalContext()); 1126 if (!M) { 1127 Err.print("IR parsing failed: ", errs()); 1128 return NULL; 1129 } 1130 1131 return M; 1132 } 1133 1134 //===----------------------------------------------------------------------===// 1135 // Main driver code. 1136 //===----------------------------------------------------------------------===// 1137 1138 int main(int argc, char **argv) { 1139 InitializeNativeTarget(); 1140 LLVMContext &Context = getGlobalContext(); 1141 1142 cl::ParseCommandLineOptions(argc, argv, 1143 "Kaleidoscope example program\n"); 1144 1145 // Install standard binary operators. 1146 // 1 is lowest precedence. 1147 BinopPrecedence['='] = 2; 1148 BinopPrecedence['<'] = 10; 1149 BinopPrecedence['+'] = 20; 1150 BinopPrecedence['-'] = 20; 1151 BinopPrecedence['/'] = 40; 1152 BinopPrecedence['*'] = 40; // highest. 1153 1154 // Make the module, which holds all the code. 1155 if (!InputIR.empty()) { 1156 TheModule = parseInputIR(InputIR); 1157 } else { 1158 TheModule = new Module("my cool jit", Context); 1159 } 1160 1161 // Create the JIT. This takes ownership of the module. 1162 std::string ErrStr; 1163 TheExecutionEngine = EngineBuilder(TheModule).setErrorStr(&ErrStr).create(); 1164 if (!TheExecutionEngine) { 1165 fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str()); 1166 exit(1); 1167 } 1168 1169 FunctionPassManager OurFPM(TheModule); 1170 1171 // Set up the optimizer pipeline. Start with registering info about how the 1172 // target lays out data structures. 1173 OurFPM.add(new DataLayout(*TheExecutionEngine->getDataLayout())); 1174 // Provide basic AliasAnalysis support for GVN. 1175 OurFPM.add(createBasicAliasAnalysisPass()); 1176 // Promote allocas to registers. 1177 OurFPM.add(createPromoteMemoryToRegisterPass()); 1178 // Do simple "peephole" optimizations and bit-twiddling optzns. 1179 OurFPM.add(createInstructionCombiningPass()); 1180 // Reassociate expressions. 1181 OurFPM.add(createReassociatePass()); 1182 // Eliminate Common SubExpressions. 1183 OurFPM.add(createGVNPass()); 1184 // Simplify the control flow graph (deleting unreachable blocks, etc). 1185 OurFPM.add(createCFGSimplificationPass()); 1186 1187 OurFPM.doInitialization(); 1188 1189 // Set the global so the code gen can use this. 1190 TheFPM = &OurFPM; 1191 1192 // Prime the first token. 1193 #ifndef MINIMAL_STDERR_OUTPUT 1194 fprintf(stderr, "ready> "); 1195 #endif 1196 getNextToken(); 1197 1198 // Run the main "interpreter loop" now. 1199 MainLoop(); 1200 1201 // Print out all of the generated code. 1202 TheFPM = 0; 1203 #if !defined(MINIMAL_STDERR_OUTPUT) || defined(DUMP_FINAL_MODULE) 1204 TheModule->dump(); 1205 #endif 1206 return 0; 1207 } 1208