1 #define MINIMAL_STDERR_OUTPUT 2 3 #include "llvm/Analysis/Passes.h" 4 #include "llvm/ExecutionEngine/ExecutionEngine.h" 5 #include "llvm/IR/DataLayout.h" 6 #include "llvm/IR/DerivedTypes.h" 7 #include "llvm/IR/IRBuilder.h" 8 #include "llvm/IR/LLVMContext.h" 9 #include "llvm/IR/LegacyPassManager.h" 10 #include "llvm/IR/Module.h" 11 #include "llvm/IR/Verifier.h" 12 #include "llvm/IRReader/IRReader.h" 13 #include "llvm/Support/CommandLine.h" 14 #include "llvm/Support/SourceMgr.h" 15 #include "llvm/Support/TargetSelect.h" 16 #include "llvm/Support/raw_ostream.h" 17 #include "llvm/Transforms/Scalar.h" 18 #include <cctype> 19 #include <cstdio> 20 #include <map> 21 #include <string> 22 #include <vector> 23 24 using namespace llvm; 25 26 //===----------------------------------------------------------------------===// 27 // Command-line options 28 //===----------------------------------------------------------------------===// 29 30 namespace { 31 cl::opt<std::string> 32 InputIR("input-IR", 33 cl::desc("Specify the name of an IR file to load for function definitions"), 34 cl::value_desc("input IR file name")); 35 } // namespace 36 37 //===----------------------------------------------------------------------===// 38 // Lexer 39 //===----------------------------------------------------------------------===// 40 41 // The lexer returns tokens [0-255] if it is an unknown character, otherwise one 42 // of these for known things. 43 enum Token { 44 tok_eof = -1, 45 46 // commands 47 tok_def = -2, tok_extern = -3, 48 49 // primary 50 tok_identifier = -4, tok_number = -5, 51 52 // control 53 tok_if = -6, tok_then = -7, tok_else = -8, 54 tok_for = -9, tok_in = -10, 55 56 // operators 57 tok_binary = -11, tok_unary = -12, 58 59 // var definition 60 tok_var = -13 61 }; 62 63 static std::string IdentifierStr; // Filled in if tok_identifier 64 static double NumVal; // Filled in if tok_number 65 66 /// gettok - Return the next token from standard input. 67 static int gettok() { 68 static int LastChar = ' '; 69 70 // Skip any whitespace. 71 while (isspace(LastChar)) 72 LastChar = getchar(); 73 74 if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* 75 IdentifierStr = LastChar; 76 while (isalnum((LastChar = getchar()))) 77 IdentifierStr += LastChar; 78 79 if (IdentifierStr == "def") return tok_def; 80 if (IdentifierStr == "extern") return tok_extern; 81 if (IdentifierStr == "if") return tok_if; 82 if (IdentifierStr == "then") return tok_then; 83 if (IdentifierStr == "else") return tok_else; 84 if (IdentifierStr == "for") return tok_for; 85 if (IdentifierStr == "in") return tok_in; 86 if (IdentifierStr == "binary") return tok_binary; 87 if (IdentifierStr == "unary") return tok_unary; 88 if (IdentifierStr == "var") return tok_var; 89 return tok_identifier; 90 } 91 92 if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ 93 std::string NumStr; 94 do { 95 NumStr += LastChar; 96 LastChar = getchar(); 97 } while (isdigit(LastChar) || LastChar == '.'); 98 99 NumVal = strtod(NumStr.c_str(), 0); 100 return tok_number; 101 } 102 103 if (LastChar == '#') { 104 // Comment until end of line. 105 do LastChar = getchar(); 106 while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); 107 108 if (LastChar != EOF) 109 return gettok(); 110 } 111 112 // Check for end of file. Don't eat the EOF. 113 if (LastChar == EOF) 114 return tok_eof; 115 116 // Otherwise, just return the character as its ascii value. 117 int ThisChar = LastChar; 118 LastChar = getchar(); 119 return ThisChar; 120 } 121 122 //===----------------------------------------------------------------------===// 123 // Abstract Syntax Tree (aka Parse Tree) 124 //===----------------------------------------------------------------------===// 125 126 /// ExprAST - Base class for all expression nodes. 127 class ExprAST { 128 public: 129 virtual ~ExprAST() {} 130 virtual Value *Codegen() = 0; 131 }; 132 133 /// NumberExprAST - Expression class for numeric literals like "1.0". 134 class NumberExprAST : public ExprAST { 135 double Val; 136 public: 137 NumberExprAST(double val) : Val(val) {} 138 virtual Value *Codegen(); 139 }; 140 141 /// VariableExprAST - Expression class for referencing a variable, like "a". 142 class VariableExprAST : public ExprAST { 143 std::string Name; 144 public: 145 VariableExprAST(const std::string &name) : Name(name) {} 146 const std::string &getName() const { return Name; } 147 virtual Value *Codegen(); 148 }; 149 150 /// UnaryExprAST - Expression class for a unary operator. 151 class UnaryExprAST : public ExprAST { 152 char Opcode; 153 ExprAST *Operand; 154 public: 155 UnaryExprAST(char opcode, ExprAST *operand) 156 : Opcode(opcode), Operand(operand) {} 157 virtual Value *Codegen(); 158 }; 159 160 /// BinaryExprAST - Expression class for a binary operator. 161 class BinaryExprAST : public ExprAST { 162 char Op; 163 ExprAST *LHS, *RHS; 164 public: 165 BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) 166 : Op(op), LHS(lhs), RHS(rhs) {} 167 virtual Value *Codegen(); 168 }; 169 170 /// CallExprAST - Expression class for function calls. 171 class CallExprAST : public ExprAST { 172 std::string Callee; 173 std::vector<ExprAST*> Args; 174 public: 175 CallExprAST(const std::string &callee, std::vector<ExprAST*> &args) 176 : Callee(callee), Args(args) {} 177 virtual Value *Codegen(); 178 }; 179 180 /// IfExprAST - Expression class for if/then/else. 181 class IfExprAST : public ExprAST { 182 ExprAST *Cond, *Then, *Else; 183 public: 184 IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else) 185 : Cond(cond), Then(then), Else(_else) {} 186 virtual Value *Codegen(); 187 }; 188 189 /// ForExprAST - Expression class for for/in. 190 class ForExprAST : public ExprAST { 191 std::string VarName; 192 ExprAST *Start, *End, *Step, *Body; 193 public: 194 ForExprAST(const std::string &varname, ExprAST *start, ExprAST *end, 195 ExprAST *step, ExprAST *body) 196 : VarName(varname), Start(start), End(end), Step(step), Body(body) {} 197 virtual Value *Codegen(); 198 }; 199 200 /// VarExprAST - Expression class for var/in 201 class VarExprAST : public ExprAST { 202 std::vector<std::pair<std::string, ExprAST*> > VarNames; 203 ExprAST *Body; 204 public: 205 VarExprAST(const std::vector<std::pair<std::string, ExprAST*> > &varnames, 206 ExprAST *body) 207 : VarNames(varnames), Body(body) {} 208 209 virtual Value *Codegen(); 210 }; 211 212 /// PrototypeAST - This class represents the "prototype" for a function, 213 /// which captures its argument names as well as if it is an operator. 214 class PrototypeAST { 215 std::string Name; 216 std::vector<std::string> Args; 217 bool isOperator; 218 unsigned Precedence; // Precedence if a binary op. 219 public: 220 PrototypeAST(const std::string &name, const std::vector<std::string> &args, 221 bool isoperator = false, unsigned prec = 0) 222 : Name(name), Args(args), isOperator(isoperator), Precedence(prec) {} 223 224 bool isUnaryOp() const { return isOperator && Args.size() == 1; } 225 bool isBinaryOp() const { return isOperator && Args.size() == 2; } 226 227 char getOperatorName() const { 228 assert(isUnaryOp() || isBinaryOp()); 229 return Name[Name.size()-1]; 230 } 231 232 unsigned getBinaryPrecedence() const { return Precedence; } 233 234 Function *Codegen(); 235 236 void CreateArgumentAllocas(Function *F); 237 }; 238 239 /// FunctionAST - This class represents a function definition itself. 240 class FunctionAST { 241 PrototypeAST *Proto; 242 ExprAST *Body; 243 public: 244 FunctionAST(PrototypeAST *proto, ExprAST *body) 245 : Proto(proto), Body(body) {} 246 247 Function *Codegen(); 248 }; 249 250 //===----------------------------------------------------------------------===// 251 // Parser 252 //===----------------------------------------------------------------------===// 253 254 /// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current 255 /// token the parser is looking at. getNextToken reads another token from the 256 /// lexer and updates CurTok with its results. 257 static int CurTok; 258 static int getNextToken() { 259 return CurTok = gettok(); 260 } 261 262 /// BinopPrecedence - This holds the precedence for each binary operator that is 263 /// defined. 264 static std::map<char, int> BinopPrecedence; 265 266 /// GetTokPrecedence - Get the precedence of the pending binary operator token. 267 static int GetTokPrecedence() { 268 if (!isascii(CurTok)) 269 return -1; 270 271 // Make sure it's a declared binop. 272 int TokPrec = BinopPrecedence[CurTok]; 273 if (TokPrec <= 0) return -1; 274 return TokPrec; 275 } 276 277 /// Error* - These are little helper functions for error handling. 278 ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;} 279 PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; } 280 FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; } 281 282 static ExprAST *ParseExpression(); 283 284 /// identifierexpr 285 /// ::= identifier 286 /// ::= identifier '(' expression* ')' 287 static ExprAST *ParseIdentifierExpr() { 288 std::string IdName = IdentifierStr; 289 290 getNextToken(); // eat identifier. 291 292 if (CurTok != '(') // Simple variable ref. 293 return new VariableExprAST(IdName); 294 295 // Call. 296 getNextToken(); // eat ( 297 std::vector<ExprAST*> Args; 298 if (CurTok != ')') { 299 while (1) { 300 ExprAST *Arg = ParseExpression(); 301 if (!Arg) return 0; 302 Args.push_back(Arg); 303 304 if (CurTok == ')') break; 305 306 if (CurTok != ',') 307 return Error("Expected ')' or ',' in argument list"); 308 getNextToken(); 309 } 310 } 311 312 // Eat the ')'. 313 getNextToken(); 314 315 return new CallExprAST(IdName, Args); 316 } 317 318 /// numberexpr ::= number 319 static ExprAST *ParseNumberExpr() { 320 ExprAST *Result = new NumberExprAST(NumVal); 321 getNextToken(); // consume the number 322 return Result; 323 } 324 325 /// parenexpr ::= '(' expression ')' 326 static ExprAST *ParseParenExpr() { 327 getNextToken(); // eat (. 328 ExprAST *V = ParseExpression(); 329 if (!V) return 0; 330 331 if (CurTok != ')') 332 return Error("expected ')'"); 333 getNextToken(); // eat ). 334 return V; 335 } 336 337 /// ifexpr ::= 'if' expression 'then' expression 'else' expression 338 static ExprAST *ParseIfExpr() { 339 getNextToken(); // eat the if. 340 341 // condition. 342 ExprAST *Cond = ParseExpression(); 343 if (!Cond) return 0; 344 345 if (CurTok != tok_then) 346 return Error("expected then"); 347 getNextToken(); // eat the then 348 349 ExprAST *Then = ParseExpression(); 350 if (Then == 0) return 0; 351 352 if (CurTok != tok_else) 353 return Error("expected else"); 354 355 getNextToken(); 356 357 ExprAST *Else = ParseExpression(); 358 if (!Else) return 0; 359 360 return new IfExprAST(Cond, Then, Else); 361 } 362 363 /// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression 364 static ExprAST *ParseForExpr() { 365 getNextToken(); // eat the for. 366 367 if (CurTok != tok_identifier) 368 return Error("expected identifier after for"); 369 370 std::string IdName = IdentifierStr; 371 getNextToken(); // eat identifier. 372 373 if (CurTok != '=') 374 return Error("expected '=' after for"); 375 getNextToken(); // eat '='. 376 377 378 ExprAST *Start = ParseExpression(); 379 if (Start == 0) return 0; 380 if (CurTok != ',') 381 return Error("expected ',' after for start value"); 382 getNextToken(); 383 384 ExprAST *End = ParseExpression(); 385 if (End == 0) return 0; 386 387 // The step value is optional. 388 ExprAST *Step = 0; 389 if (CurTok == ',') { 390 getNextToken(); 391 Step = ParseExpression(); 392 if (Step == 0) return 0; 393 } 394 395 if (CurTok != tok_in) 396 return Error("expected 'in' after for"); 397 getNextToken(); // eat 'in'. 398 399 ExprAST *Body = ParseExpression(); 400 if (Body == 0) return 0; 401 402 return new ForExprAST(IdName, Start, End, Step, Body); 403 } 404 405 /// varexpr ::= 'var' identifier ('=' expression)? 406 // (',' identifier ('=' expression)?)* 'in' expression 407 static ExprAST *ParseVarExpr() { 408 getNextToken(); // eat the var. 409 410 std::vector<std::pair<std::string, ExprAST*> > VarNames; 411 412 // At least one variable name is required. 413 if (CurTok != tok_identifier) 414 return Error("expected identifier after var"); 415 416 while (1) { 417 std::string Name = IdentifierStr; 418 getNextToken(); // eat identifier. 419 420 // Read the optional initializer. 421 ExprAST *Init = 0; 422 if (CurTok == '=') { 423 getNextToken(); // eat the '='. 424 425 Init = ParseExpression(); 426 if (Init == 0) return 0; 427 } 428 429 VarNames.push_back(std::make_pair(Name, Init)); 430 431 // End of var list, exit loop. 432 if (CurTok != ',') break; 433 getNextToken(); // eat the ','. 434 435 if (CurTok != tok_identifier) 436 return Error("expected identifier list after var"); 437 } 438 439 // At this point, we have to have 'in'. 440 if (CurTok != tok_in) 441 return Error("expected 'in' keyword after 'var'"); 442 getNextToken(); // eat 'in'. 443 444 ExprAST *Body = ParseExpression(); 445 if (Body == 0) return 0; 446 447 return new VarExprAST(VarNames, Body); 448 } 449 450 /// primary 451 /// ::= identifierexpr 452 /// ::= numberexpr 453 /// ::= parenexpr 454 /// ::= ifexpr 455 /// ::= forexpr 456 /// ::= varexpr 457 static ExprAST *ParsePrimary() { 458 switch (CurTok) { 459 default: return Error("unknown token when expecting an expression"); 460 case tok_identifier: return ParseIdentifierExpr(); 461 case tok_number: return ParseNumberExpr(); 462 case '(': return ParseParenExpr(); 463 case tok_if: return ParseIfExpr(); 464 case tok_for: return ParseForExpr(); 465 case tok_var: return ParseVarExpr(); 466 } 467 } 468 469 /// unary 470 /// ::= primary 471 /// ::= '!' unary 472 static ExprAST *ParseUnary() { 473 // If the current token is not an operator, it must be a primary expr. 474 if (!isascii(CurTok) || CurTok == '(' || CurTok == ',') 475 return ParsePrimary(); 476 477 // If this is a unary operator, read it. 478 int Opc = CurTok; 479 getNextToken(); 480 if (ExprAST *Operand = ParseUnary()) 481 return new UnaryExprAST(Opc, Operand); 482 return 0; 483 } 484 485 /// binoprhs 486 /// ::= ('+' unary)* 487 static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) { 488 // If this is a binop, find its precedence. 489 while (1) { 490 int TokPrec = GetTokPrecedence(); 491 492 // If this is a binop that binds at least as tightly as the current binop, 493 // consume it, otherwise we are done. 494 if (TokPrec < ExprPrec) 495 return LHS; 496 497 // Okay, we know this is a binop. 498 int BinOp = CurTok; 499 getNextToken(); // eat binop 500 501 // Parse the unary expression after the binary operator. 502 ExprAST *RHS = ParseUnary(); 503 if (!RHS) return 0; 504 505 // If BinOp binds less tightly with RHS than the operator after RHS, let 506 // the pending operator take RHS as its LHS. 507 int NextPrec = GetTokPrecedence(); 508 if (TokPrec < NextPrec) { 509 RHS = ParseBinOpRHS(TokPrec+1, RHS); 510 if (RHS == 0) return 0; 511 } 512 513 // Merge LHS/RHS. 514 LHS = new BinaryExprAST(BinOp, LHS, RHS); 515 } 516 } 517 518 /// expression 519 /// ::= unary binoprhs 520 /// 521 static ExprAST *ParseExpression() { 522 ExprAST *LHS = ParseUnary(); 523 if (!LHS) return 0; 524 525 return ParseBinOpRHS(0, LHS); 526 } 527 528 /// prototype 529 /// ::= id '(' id* ')' 530 /// ::= binary LETTER number? (id, id) 531 /// ::= unary LETTER (id) 532 static PrototypeAST *ParsePrototype() { 533 std::string FnName; 534 535 unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary. 536 unsigned BinaryPrecedence = 30; 537 538 switch (CurTok) { 539 default: 540 return ErrorP("Expected function name in prototype"); 541 case tok_identifier: 542 FnName = IdentifierStr; 543 Kind = 0; 544 getNextToken(); 545 break; 546 case tok_unary: 547 getNextToken(); 548 if (!isascii(CurTok)) 549 return ErrorP("Expected unary operator"); 550 FnName = "unary"; 551 FnName += (char)CurTok; 552 Kind = 1; 553 getNextToken(); 554 break; 555 case tok_binary: 556 getNextToken(); 557 if (!isascii(CurTok)) 558 return ErrorP("Expected binary operator"); 559 FnName = "binary"; 560 FnName += (char)CurTok; 561 Kind = 2; 562 getNextToken(); 563 564 // Read the precedence if present. 565 if (CurTok == tok_number) { 566 if (NumVal < 1 || NumVal > 100) 567 return ErrorP("Invalid precedecnce: must be 1..100"); 568 BinaryPrecedence = (unsigned)NumVal; 569 getNextToken(); 570 } 571 break; 572 } 573 574 if (CurTok != '(') 575 return ErrorP("Expected '(' in prototype"); 576 577 std::vector<std::string> ArgNames; 578 while (getNextToken() == tok_identifier) 579 ArgNames.push_back(IdentifierStr); 580 if (CurTok != ')') 581 return ErrorP("Expected ')' in prototype"); 582 583 // success. 584 getNextToken(); // eat ')'. 585 586 // Verify right number of names for operator. 587 if (Kind && ArgNames.size() != Kind) 588 return ErrorP("Invalid number of operands for operator"); 589 590 return new PrototypeAST(FnName, ArgNames, Kind != 0, BinaryPrecedence); 591 } 592 593 /// definition ::= 'def' prototype expression 594 static FunctionAST *ParseDefinition() { 595 getNextToken(); // eat def. 596 PrototypeAST *Proto = ParsePrototype(); 597 if (Proto == 0) return 0; 598 599 if (ExprAST *E = ParseExpression()) 600 return new FunctionAST(Proto, E); 601 return 0; 602 } 603 604 /// toplevelexpr ::= expression 605 static FunctionAST *ParseTopLevelExpr() { 606 if (ExprAST *E = ParseExpression()) { 607 // Make an anonymous proto. 608 PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>()); 609 return new FunctionAST(Proto, E); 610 } 611 return 0; 612 } 613 614 /// external ::= 'extern' prototype 615 static PrototypeAST *ParseExtern() { 616 getNextToken(); // eat extern. 617 return ParsePrototype(); 618 } 619 620 //===----------------------------------------------------------------------===// 621 // Code Generation 622 //===----------------------------------------------------------------------===// 623 624 static Module *TheModule; 625 static FunctionPassManager *TheFPM; 626 static LLVMContext TheContext; 627 static IRBuilder<> Builder(TheContext); 628 static std::map<std::string, AllocaInst*> NamedValues; 629 630 Value *ErrorV(const char *Str) { Error(Str); return 0; } 631 632 /// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of 633 /// the function. This is used for mutable variables etc. 634 static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction, 635 const std::string &VarName) { 636 IRBuilder<> TmpB(&TheFunction->getEntryBlock(), 637 TheFunction->getEntryBlock().begin()); 638 return TmpB.CreateAlloca(Type::getDoubleTy(TheContext), 0, VarName.c_str()); 639 } 640 641 Value *NumberExprAST::Codegen() { 642 return ConstantFP::get(TheContext, APFloat(Val)); 643 } 644 645 Value *VariableExprAST::Codegen() { 646 // Look this variable up in the function. 647 Value *V = NamedValues[Name]; 648 if (V == 0) return ErrorV("Unknown variable name"); 649 650 // Load the value. 651 return Builder.CreateLoad(V, Name.c_str()); 652 } 653 654 Value *UnaryExprAST::Codegen() { 655 Value *OperandV = Operand->Codegen(); 656 if (OperandV == 0) return 0; 657 #ifdef USE_MCJIT 658 Function *F = TheHelper->getFunction(MakeLegalFunctionName(std::string("unary")+Opcode)); 659 #else 660 Function *F = TheModule->getFunction(std::string("unary")+Opcode); 661 #endif 662 if (F == 0) 663 return ErrorV("Unknown unary operator"); 664 665 return Builder.CreateCall(F, OperandV, "unop"); 666 } 667 668 Value *BinaryExprAST::Codegen() { 669 // Special case '=' because we don't want to emit the LHS as an expression. 670 if (Op == '=') { 671 // Assignment requires the LHS to be an identifier. 672 // For now, I'm building without RTTI because LLVM builds that way by 673 // default and so we need to build that way to use the command line supprt. 674 // If you build LLVM with RTTI this can be changed back to a dynamic_cast. 675 VariableExprAST *LHSE = static_cast<VariableExprAST*>(LHS); 676 if (!LHSE) 677 return ErrorV("destination of '=' must be a variable"); 678 // Codegen the RHS. 679 Value *Val = RHS->Codegen(); 680 if (Val == 0) return 0; 681 682 // Look up the name. 683 Value *Variable = NamedValues[LHSE->getName()]; 684 if (Variable == 0) return ErrorV("Unknown variable name"); 685 686 Builder.CreateStore(Val, Variable); 687 return Val; 688 } 689 690 Value *L = LHS->Codegen(); 691 Value *R = RHS->Codegen(); 692 if (L == 0 || R == 0) return 0; 693 694 switch (Op) { 695 case '+': return Builder.CreateFAdd(L, R, "addtmp"); 696 case '-': return Builder.CreateFSub(L, R, "subtmp"); 697 case '*': return Builder.CreateFMul(L, R, "multmp"); 698 case '/': return Builder.CreateFDiv(L, R, "divtmp"); 699 case '<': 700 L = Builder.CreateFCmpULT(L, R, "cmptmp"); 701 // Convert bool 0/1 to double 0.0 or 1.0 702 return Builder.CreateUIToFP(L, Type::getDoubleTy(TheContext), "booltmp"); 703 default: break; 704 } 705 706 // If it wasn't a builtin binary operator, it must be a user defined one. Emit 707 // a call to it. 708 Function *F = TheModule->getFunction(std::string("binary")+Op); 709 assert(F && "binary operator not found!"); 710 711 Value *Ops[] = { L, R }; 712 return Builder.CreateCall(F, Ops, "binop"); 713 } 714 715 Value *CallExprAST::Codegen() { 716 // Look up the name in the global module table. 717 Function *CalleeF = TheModule->getFunction(Callee); 718 if (CalleeF == 0) { 719 char error_str[64]; 720 sprintf(error_str, "Unknown function referenced %s", Callee.c_str()); 721 return ErrorV(error_str); 722 } 723 724 // If argument mismatch error. 725 if (CalleeF->arg_size() != Args.size()) 726 return ErrorV("Incorrect # arguments passed"); 727 728 std::vector<Value*> ArgsV; 729 for (unsigned i = 0, e = Args.size(); i != e; ++i) { 730 ArgsV.push_back(Args[i]->Codegen()); 731 if (ArgsV.back() == 0) return 0; 732 } 733 734 return Builder.CreateCall(CalleeF, ArgsV, "calltmp"); 735 } 736 737 Value *IfExprAST::Codegen() { 738 Value *CondV = Cond->Codegen(); 739 if (CondV == 0) return 0; 740 741 // Convert condition to a bool by comparing equal to 0.0. 742 CondV = Builder.CreateFCmpONE( 743 CondV, ConstantFP::get(TheContext, APFloat(0.0)), "ifcond"); 744 745 Function *TheFunction = Builder.GetInsertBlock()->getParent(); 746 747 // Create blocks for the then and else cases. Insert the 'then' block at the 748 // end of the function. 749 BasicBlock *ThenBB = BasicBlock::Create(TheContext, "then", TheFunction); 750 BasicBlock *ElseBB = BasicBlock::Create(TheContext, "else"); 751 BasicBlock *MergeBB = BasicBlock::Create(TheContext, "ifcont"); 752 753 Builder.CreateCondBr(CondV, ThenBB, ElseBB); 754 755 // Emit then value. 756 Builder.SetInsertPoint(ThenBB); 757 758 Value *ThenV = Then->Codegen(); 759 if (ThenV == 0) return 0; 760 761 Builder.CreateBr(MergeBB); 762 // Codegen of 'Then' can change the current block, update ThenBB for the PHI. 763 ThenBB = Builder.GetInsertBlock(); 764 765 // Emit else block. 766 TheFunction->getBasicBlockList().push_back(ElseBB); 767 Builder.SetInsertPoint(ElseBB); 768 769 Value *ElseV = Else->Codegen(); 770 if (ElseV == 0) return 0; 771 772 Builder.CreateBr(MergeBB); 773 // Codegen of 'Else' can change the current block, update ElseBB for the PHI. 774 ElseBB = Builder.GetInsertBlock(); 775 776 // Emit merge block. 777 TheFunction->getBasicBlockList().push_back(MergeBB); 778 Builder.SetInsertPoint(MergeBB); 779 PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(TheContext), 2, "iftmp"); 780 781 PN->addIncoming(ThenV, ThenBB); 782 PN->addIncoming(ElseV, ElseBB); 783 return PN; 784 } 785 786 Value *ForExprAST::Codegen() { 787 // Output this as: 788 // var = alloca double 789 // ... 790 // start = startexpr 791 // store start -> var 792 // goto loop 793 // loop: 794 // ... 795 // bodyexpr 796 // ... 797 // loopend: 798 // step = stepexpr 799 // endcond = endexpr 800 // 801 // curvar = load var 802 // nextvar = curvar + step 803 // store nextvar -> var 804 // br endcond, loop, endloop 805 // outloop: 806 807 Function *TheFunction = Builder.GetInsertBlock()->getParent(); 808 809 // Create an alloca for the variable in the entry block. 810 AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName); 811 812 // Emit the start code first, without 'variable' in scope. 813 Value *StartVal = Start->Codegen(); 814 if (StartVal == 0) return 0; 815 816 // Store the value into the alloca. 817 Builder.CreateStore(StartVal, Alloca); 818 819 // Make the new basic block for the loop header, inserting after current 820 // block. 821 BasicBlock *LoopBB = BasicBlock::Create(TheContext, "loop", TheFunction); 822 823 // Insert an explicit fall through from the current block to the LoopBB. 824 Builder.CreateBr(LoopBB); 825 826 // Start insertion in LoopBB. 827 Builder.SetInsertPoint(LoopBB); 828 829 // Within the loop, the variable is defined equal to the PHI node. If it 830 // shadows an existing variable, we have to restore it, so save it now. 831 AllocaInst *OldVal = NamedValues[VarName]; 832 NamedValues[VarName] = Alloca; 833 834 // Emit the body of the loop. This, like any other expr, can change the 835 // current BB. Note that we ignore the value computed by the body, but don't 836 // allow an error. 837 if (Body->Codegen() == 0) 838 return 0; 839 840 // Emit the step value. 841 Value *StepVal; 842 if (Step) { 843 StepVal = Step->Codegen(); 844 if (StepVal == 0) return 0; 845 } else { 846 // If not specified, use 1.0. 847 StepVal = ConstantFP::get(TheContext, APFloat(1.0)); 848 } 849 850 // Compute the end condition. 851 Value *EndCond = End->Codegen(); 852 if (EndCond == 0) return EndCond; 853 854 // Reload, increment, and restore the alloca. This handles the case where 855 // the body of the loop mutates the variable. 856 Value *CurVar = Builder.CreateLoad(Alloca, VarName.c_str()); 857 Value *NextVar = Builder.CreateFAdd(CurVar, StepVal, "nextvar"); 858 Builder.CreateStore(NextVar, Alloca); 859 860 // Convert condition to a bool by comparing equal to 0.0. 861 EndCond = Builder.CreateFCmpONE( 862 EndCond, ConstantFP::get(TheContext, APFloat(0.0)), "loopcond"); 863 864 // Create the "after loop" block and insert it. 865 BasicBlock *AfterBB = 866 BasicBlock::Create(TheContext, "afterloop", TheFunction); 867 868 // Insert the conditional branch into the end of LoopEndBB. 869 Builder.CreateCondBr(EndCond, LoopBB, AfterBB); 870 871 // Any new code will be inserted in AfterBB. 872 Builder.SetInsertPoint(AfterBB); 873 874 // Restore the unshadowed variable. 875 if (OldVal) 876 NamedValues[VarName] = OldVal; 877 else 878 NamedValues.erase(VarName); 879 880 881 // for expr always returns 0.0. 882 return Constant::getNullValue(Type::getDoubleTy(TheContext)); 883 } 884 885 Value *VarExprAST::Codegen() { 886 std::vector<AllocaInst *> OldBindings; 887 888 Function *TheFunction = Builder.GetInsertBlock()->getParent(); 889 890 // Register all variables and emit their initializer. 891 for (unsigned i = 0, e = VarNames.size(); i != e; ++i) { 892 const std::string &VarName = VarNames[i].first; 893 ExprAST *Init = VarNames[i].second; 894 895 // Emit the initializer before adding the variable to scope, this prevents 896 // the initializer from referencing the variable itself, and permits stuff 897 // like this: 898 // var a = 1 in 899 // var a = a in ... # refers to outer 'a'. 900 Value *InitVal; 901 if (Init) { 902 InitVal = Init->Codegen(); 903 if (InitVal == 0) return 0; 904 } else { // If not specified, use 0.0. 905 InitVal = ConstantFP::get(TheContext, APFloat(0.0)); 906 } 907 908 AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName); 909 Builder.CreateStore(InitVal, Alloca); 910 911 // Remember the old variable binding so that we can restore the binding when 912 // we unrecurse. 913 OldBindings.push_back(NamedValues[VarName]); 914 915 // Remember this binding. 916 NamedValues[VarName] = Alloca; 917 } 918 919 // Codegen the body, now that all vars are in scope. 920 Value *BodyVal = Body->Codegen(); 921 if (BodyVal == 0) return 0; 922 923 // Pop all our variables from scope. 924 for (unsigned i = 0, e = VarNames.size(); i != e; ++i) 925 NamedValues[VarNames[i].first] = OldBindings[i]; 926 927 // Return the body computation. 928 return BodyVal; 929 } 930 931 Function *PrototypeAST::Codegen() { 932 // Make the function type: double(double,double) etc. 933 std::vector<Type *> Doubles(Args.size(), Type::getDoubleTy(TheContext)); 934 FunctionType *FT = 935 FunctionType::get(Type::getDoubleTy(TheContext), Doubles, false); 936 937 Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule); 938 // If F conflicted, there was already something named 'Name'. If it has a 939 // body, don't allow redefinition or reextern. 940 if (F->getName() != Name) { 941 // Delete the one we just made and get the existing one. 942 F->eraseFromParent(); 943 F = TheModule->getFunction(Name); 944 // If F already has a body, reject this. 945 if (!F->empty()) { 946 ErrorF("redefinition of function"); 947 return 0; 948 } 949 // If F took a different number of args, reject. 950 if (F->arg_size() != Args.size()) { 951 ErrorF("redefinition of function with different # args"); 952 return 0; 953 } 954 } 955 956 // Set names for all arguments. 957 unsigned Idx = 0; 958 for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size(); 959 ++AI, ++Idx) 960 AI->setName(Args[Idx]); 961 962 return F; 963 } 964 965 /// CreateArgumentAllocas - Create an alloca for each argument and register the 966 /// argument in the symbol table so that references to it will succeed. 967 void PrototypeAST::CreateArgumentAllocas(Function *F) { 968 Function::arg_iterator AI = F->arg_begin(); 969 for (unsigned Idx = 0, e = Args.size(); Idx != e; ++Idx, ++AI) { 970 // Create an alloca for this variable. 971 AllocaInst *Alloca = CreateEntryBlockAlloca(F, Args[Idx]); 972 973 // Store the initial value into the alloca. 974 Builder.CreateStore(AI, Alloca); 975 976 // Add arguments to variable symbol table. 977 NamedValues[Args[Idx]] = Alloca; 978 } 979 } 980 981 Function *FunctionAST::Codegen() { 982 NamedValues.clear(); 983 984 Function *TheFunction = Proto->Codegen(); 985 if (TheFunction == 0) 986 return 0; 987 988 // If this is an operator, install it. 989 if (Proto->isBinaryOp()) 990 BinopPrecedence[Proto->getOperatorName()] = Proto->getBinaryPrecedence(); 991 992 // Create a new basic block to start insertion into. 993 BasicBlock *BB = BasicBlock::Create(TheContext, "entry", TheFunction); 994 Builder.SetInsertPoint(BB); 995 996 // Add all arguments to the symbol table and create their allocas. 997 Proto->CreateArgumentAllocas(TheFunction); 998 999 if (Value *RetVal = Body->Codegen()) { 1000 // Finish off the function. 1001 Builder.CreateRet(RetVal); 1002 1003 // Validate the generated code, checking for consistency. 1004 verifyFunction(*TheFunction); 1005 1006 // Optimize the function. 1007 TheFPM->run(*TheFunction); 1008 1009 return TheFunction; 1010 } 1011 1012 // Error reading body, remove function. 1013 TheFunction->eraseFromParent(); 1014 1015 if (Proto->isBinaryOp()) 1016 BinopPrecedence.erase(Proto->getOperatorName()); 1017 return 0; 1018 } 1019 1020 //===----------------------------------------------------------------------===// 1021 // Top-Level parsing and JIT Driver 1022 //===----------------------------------------------------------------------===// 1023 1024 static ExecutionEngine *TheExecutionEngine; 1025 1026 static void HandleDefinition() { 1027 if (FunctionAST *F = ParseDefinition()) { 1028 if (Function *LF = F->Codegen()) { 1029 #ifndef MINIMAL_STDERR_OUTPUT 1030 fprintf(stderr, "Read function definition:"); 1031 LF->dump(); 1032 #endif 1033 } 1034 } else { 1035 // Skip token for error recovery. 1036 getNextToken(); 1037 } 1038 } 1039 1040 static void HandleExtern() { 1041 if (PrototypeAST *P = ParseExtern()) { 1042 if (Function *F = P->Codegen()) { 1043 #ifndef MINIMAL_STDERR_OUTPUT 1044 fprintf(stderr, "Read extern: "); 1045 F->dump(); 1046 #endif 1047 } 1048 } else { 1049 // Skip token for error recovery. 1050 getNextToken(); 1051 } 1052 } 1053 1054 static void HandleTopLevelExpression() { 1055 // Evaluate a top-level expression into an anonymous function. 1056 if (FunctionAST *F = ParseTopLevelExpr()) { 1057 if (Function *LF = F->Codegen()) { 1058 // JIT the function, returning a function pointer. 1059 void *FPtr = TheExecutionEngine->getPointerToFunction(LF); 1060 // Cast it to the right type (takes no arguments, returns a double) so we 1061 // can call it as a native function. 1062 double (*FP)() = (double (*)())(intptr_t)FPtr; 1063 #ifdef MINIMAL_STDERR_OUTPUT 1064 FP(); 1065 #else 1066 fprintf(stderr, "Evaluated to %f\n", FP()); 1067 #endif 1068 } 1069 } else { 1070 // Skip token for error recovery. 1071 getNextToken(); 1072 } 1073 } 1074 1075 /// top ::= definition | external | expression | ';' 1076 static void MainLoop() { 1077 while (1) { 1078 #ifndef MINIMAL_STDERR_OUTPUT 1079 fprintf(stderr, "ready> "); 1080 #endif 1081 switch (CurTok) { 1082 case tok_eof: return; 1083 case ';': getNextToken(); break; // ignore top-level semicolons. 1084 case tok_def: HandleDefinition(); break; 1085 case tok_extern: HandleExtern(); break; 1086 default: HandleTopLevelExpression(); break; 1087 } 1088 } 1089 } 1090 1091 //===----------------------------------------------------------------------===// 1092 // "Library" functions that can be "extern'd" from user code. 1093 //===----------------------------------------------------------------------===// 1094 1095 /// putchard - putchar that takes a double and returns 0. 1096 extern "C" 1097 double putchard(double X) { 1098 putchar((char)X); 1099 return 0; 1100 } 1101 1102 /// printd - printf that takes a double prints it as "%f\n", returning 0. 1103 extern "C" 1104 double printd(double X) { 1105 printf("%f", X); 1106 return 0; 1107 } 1108 1109 extern "C" 1110 double printlf() { 1111 printf("\n"); 1112 return 0; 1113 } 1114 1115 //===----------------------------------------------------------------------===// 1116 // Command line input file handlers 1117 //===----------------------------------------------------------------------===// 1118 1119 Module* parseInputIR(std::string InputFile) { 1120 SMDiagnostic Err; 1121 Module *M = ParseIRFile(InputFile, Err, TheContext); 1122 if (!M) { 1123 Err.print("IR parsing failed: ", errs()); 1124 return NULL; 1125 } 1126 1127 return M; 1128 } 1129 1130 //===----------------------------------------------------------------------===// 1131 // Main driver code. 1132 //===----------------------------------------------------------------------===// 1133 1134 int main(int argc, char **argv) { 1135 InitializeNativeTarget(); 1136 LLVMContext &Context = TheContext; 1137 1138 cl::ParseCommandLineOptions(argc, argv, 1139 "Kaleidoscope example program\n"); 1140 1141 // Install standard binary operators. 1142 // 1 is lowest precedence. 1143 BinopPrecedence['='] = 2; 1144 BinopPrecedence['<'] = 10; 1145 BinopPrecedence['+'] = 20; 1146 BinopPrecedence['-'] = 20; 1147 BinopPrecedence['/'] = 40; 1148 BinopPrecedence['*'] = 40; // highest. 1149 1150 // Make the module, which holds all the code. 1151 if (!InputIR.empty()) { 1152 TheModule = parseInputIR(InputIR); 1153 } else { 1154 TheModule = new Module("my cool jit", Context); 1155 } 1156 1157 // Create the JIT. This takes ownership of the module. 1158 std::string ErrStr; 1159 TheExecutionEngine = EngineBuilder(TheModule).setErrorStr(&ErrStr).create(); 1160 if (!TheExecutionEngine) { 1161 fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str()); 1162 exit(1); 1163 } 1164 1165 FunctionPassManager OurFPM(TheModule); 1166 1167 // Set up the optimizer pipeline. Start with registering info about how the 1168 // target lays out data structures. 1169 OurFPM.add(new DataLayout(*TheExecutionEngine->getDataLayout())); 1170 // Provide basic AliasAnalysis support for GVN. 1171 OurFPM.add(createBasicAliasAnalysisPass()); 1172 // Promote allocas to registers. 1173 OurFPM.add(createPromoteMemoryToRegisterPass()); 1174 // Do simple "peephole" optimizations and bit-twiddling optzns. 1175 OurFPM.add(createInstructionCombiningPass()); 1176 // Reassociate expressions. 1177 OurFPM.add(createReassociatePass()); 1178 // Eliminate Common SubExpressions. 1179 OurFPM.add(createGVNPass()); 1180 // Simplify the control flow graph (deleting unreachable blocks, etc). 1181 OurFPM.add(createCFGSimplificationPass()); 1182 1183 OurFPM.doInitialization(); 1184 1185 // Set the global so the code gen can use this. 1186 TheFPM = &OurFPM; 1187 1188 // Prime the first token. 1189 #ifndef MINIMAL_STDERR_OUTPUT 1190 fprintf(stderr, "ready> "); 1191 #endif 1192 getNextToken(); 1193 1194 // Run the main "interpreter loop" now. 1195 MainLoop(); 1196 1197 // Print out all of the generated code. 1198 TheFPM = 0; 1199 #if !defined(MINIMAL_STDERR_OUTPUT) || defined(DUMP_FINAL_MODULE) 1200 TheModule->dump(); 1201 #endif 1202 return 0; 1203 } 1204