1 /*********************************************************************** 2 3 A JavaScript tokenizer / parser / beautifier / compressor. 4 5 This version is suitable for Node.js. With minimal changes (the 6 exports stuff) it should work on any JS platform. 7 8 This file contains the tokenizer/parser. It is a port to JavaScript 9 of parse-js [1], a JavaScript parser library written in Common Lisp 10 by Marijn Haverbeke. Thank you Marijn! 11 12 [1] http://marijn.haverbeke.nl/parse-js/ 13 14 Exported functions: 15 16 - tokenizer(code) -- returns a function. Call the returned 17 function to fetch the next token. 18 19 - parse(code) -- returns an AST of the given JavaScript code. 20 21 -------------------------------- (C) --------------------------------- 22 23 Author: Mihai Bazon 24 <mihai.bazon (at) gmail.com> 25 http://mihai.bazon.net/blog 26 27 Distributed under the BSD license: 28 29 Copyright 2010 (c) Mihai Bazon <mihai.bazon (at) gmail.com> 30 Based on parse-js (http://marijn.haverbeke.nl/parse-js/). 31 32 Redistribution and use in source and binary forms, with or without 33 modification, are permitted provided that the following conditions 34 are met: 35 36 * Redistributions of source code must retain the above 37 copyright notice, this list of conditions and the following 38 disclaimer. 39 40 * Redistributions in binary form must reproduce the above 41 copyright notice, this list of conditions and the following 42 disclaimer in the documentation and/or other materials 43 provided with the distribution. 44 45 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AS IS AND ANY 46 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 47 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 48 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE 49 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, 50 OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 51 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 52 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 53 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR 54 TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF 55 THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 56 SUCH DAMAGE. 57 58 ***********************************************************************/ 59 60 /* -----[ Tokenizer (constants) ]----- */ 61 62 var KEYWORDS = array_to_hash([ 63 "break", 64 "case", 65 "catch", 66 "const", 67 "continue", 68 "default", 69 "delete", 70 "do", 71 "else", 72 "finally", 73 "for", 74 "function", 75 "if", 76 "in", 77 "instanceof", 78 "new", 79 "return", 80 "switch", 81 "throw", 82 "try", 83 "typeof", 84 "var", 85 "void", 86 "while", 87 "with" 88 ]); 89 90 var RESERVED_WORDS = array_to_hash([ 91 "abstract", 92 "boolean", 93 "byte", 94 "char", 95 "class", 96 "debugger", 97 "double", 98 "enum", 99 "export", 100 "extends", 101 "final", 102 "float", 103 "goto", 104 "implements", 105 "import", 106 "int", 107 "interface", 108 "long", 109 "native", 110 "package", 111 "private", 112 "protected", 113 "public", 114 "short", 115 "static", 116 "super", 117 "synchronized", 118 "throws", 119 "transient", 120 "volatile" 121 ]); 122 123 var KEYWORDS_BEFORE_EXPRESSION = array_to_hash([ 124 "return", 125 "new", 126 "delete", 127 "throw", 128 "else", 129 "case" 130 ]); 131 132 var KEYWORDS_ATOM = array_to_hash([ 133 "false", 134 "null", 135 "true", 136 "undefined" 137 ]); 138 139 var OPERATOR_CHARS = array_to_hash(characters("+-*&%=<>!?|~^")); 140 141 var RE_HEX_NUMBER = /^0x[0-9a-f]+$/i; 142 var RE_OCT_NUMBER = /^0[0-7]+$/; 143 var RE_DEC_NUMBER = /^\d*\.?\d*(?:e[+-]?\d*(?:\d\.?|\.?\d)\d*)?$/i; 144 145 var OPERATORS = array_to_hash([ 146 "in", 147 "instanceof", 148 "typeof", 149 "new", 150 "void", 151 "delete", 152 "++", 153 "--", 154 "+", 155 "-", 156 "!", 157 "~", 158 "&", 159 "|", 160 "^", 161 "*", 162 "/", 163 "%", 164 ">>", 165 "<<", 166 ">>>", 167 "<", 168 ">", 169 "<=", 170 ">=", 171 "==", 172 "===", 173 "!=", 174 "!==", 175 "?", 176 "=", 177 "+=", 178 "-=", 179 "/=", 180 "*=", 181 "%=", 182 ">>=", 183 "<<=", 184 ">>>=", 185 "%=", 186 "|=", 187 "^=", 188 "&=", 189 "&&", 190 "||" 191 ]); 192 193 var WHITESPACE_CHARS = array_to_hash(characters(" \n\r\t")); 194 195 var PUNC_BEFORE_EXPRESSION = array_to_hash(characters("[{}(,.;:")); 196 197 var PUNC_CHARS = array_to_hash(characters("[]{}(),;:")); 198 199 var REGEXP_MODIFIERS = array_to_hash(characters("gmsiy")); 200 201 /* -----[ Tokenizer ]----- */ 202 203 function is_alphanumeric_char(ch) { 204 ch = ch.charCodeAt(0); 205 return (ch >= 48 && ch <= 57) || 206 (ch >= 65 && ch <= 90) || 207 (ch >= 97 && ch <= 122); 208 }; 209 210 function is_identifier_char(ch) { 211 return is_alphanumeric_char(ch) || ch == "$" || ch == "_"; 212 }; 213 214 function is_digit(ch) { 215 ch = ch.charCodeAt(0); 216 return ch >= 48 && ch <= 57; 217 }; 218 219 function parse_js_number(num) { 220 if (RE_HEX_NUMBER.test(num)) { 221 return parseInt(num.substr(2), 16); 222 } else if (RE_OCT_NUMBER.test(num)) { 223 return parseInt(num.substr(1), 8); 224 } else if (RE_DEC_NUMBER.test(num)) { 225 return parseFloat(num); 226 } 227 }; 228 229 function JS_Parse_Error(message, line, col, pos) { 230 this.message = message; 231 this.line = line; 232 this.col = col; 233 this.pos = pos; 234 try { 235 ({})(); 236 } catch(ex) { 237 this.stack = ex.stack; 238 }; 239 }; 240 241 JS_Parse_Error.prototype.toString = function() { 242 return this.message + " (line: " + this.line + ", col: " + this.col + ", pos: " + this.pos + ")" + "\n\n" + this.stack; 243 }; 244 245 function js_error(message, line, col, pos) { 246 throw new JS_Parse_Error(message, line, col, pos); 247 }; 248 249 function is_token(token, type, val) { 250 return token.type == type && (val == null || token.value == val); 251 }; 252 253 var EX_EOF = {}; 254 255 function tokenizer($TEXT) { 256 257 var S = { 258 text : $TEXT.replace(/\r\n?|[\n\u2028\u2029]/g, "\n").replace(/^\uFEFF/, ''), 259 pos : 0, 260 tokpos : 0, 261 line : 0, 262 tokline : 0, 263 col : 0, 264 tokcol : 0, 265 newline_before : false, 266 regex_allowed : false, 267 comments_before : [] 268 }; 269 270 function peek() { return S.text.charAt(S.pos); }; 271 272 function next(signal_eof) { 273 var ch = S.text.charAt(S.pos++); 274 if (signal_eof && !ch) 275 throw EX_EOF; 276 if (ch == "\n") { 277 S.newline_before = true; 278 ++S.line; 279 S.col = 0; 280 } else { 281 ++S.col; 282 } 283 return ch; 284 }; 285 286 function eof() { 287 return !S.peek(); 288 }; 289 290 function find(what, signal_eof) { 291 var pos = S.text.indexOf(what, S.pos); 292 if (signal_eof && pos == -1) throw EX_EOF; 293 return pos; 294 }; 295 296 function start_token() { 297 S.tokline = S.line; 298 S.tokcol = S.col; 299 S.tokpos = S.pos; 300 }; 301 302 function token(type, value, is_comment) { 303 S.regex_allowed = ((type == "operator" && !HOP(UNARY_POSTFIX, value)) || 304 (type == "keyword" && HOP(KEYWORDS_BEFORE_EXPRESSION, value)) || 305 (type == "punc" && HOP(PUNC_BEFORE_EXPRESSION, value))); 306 var ret = { 307 type : type, 308 value : value, 309 line : S.tokline, 310 col : S.tokcol, 311 pos : S.tokpos, 312 nlb : S.newline_before 313 }; 314 if (!is_comment) { 315 ret.comments_before = S.comments_before; 316 S.comments_before = []; 317 } 318 S.newline_before = false; 319 return ret; 320 }; 321 322 function skip_whitespace() { 323 while (HOP(WHITESPACE_CHARS, peek())) 324 next(); 325 }; 326 327 function read_while(pred) { 328 var ret = "", ch = peek(), i = 0; 329 while (ch && pred(ch, i++)) { 330 ret += next(); 331 ch = peek(); 332 } 333 return ret; 334 }; 335 336 function parse_error(err) { 337 js_error(err, S.tokline, S.tokcol, S.tokpos); 338 }; 339 340 function read_num(prefix) { 341 var has_e = false, after_e = false, has_x = false, has_dot = prefix == "."; 342 var num = read_while(function(ch, i){ 343 if (ch == "x" || ch == "X") { 344 if (has_x) return false; 345 return has_x = true; 346 } 347 if (!has_x && (ch == "E" || ch == "e")) { 348 if (has_e) return false; 349 return has_e = after_e = true; 350 } 351 if (ch == "-") { 352 if (after_e || (i == 0 && !prefix)) return true; 353 return false; 354 } 355 if (ch == "+") return after_e; 356 after_e = false; 357 if (ch == ".") { 358 if (!has_dot) 359 return has_dot = true; 360 return false; 361 } 362 return is_alphanumeric_char(ch); 363 }); 364 if (prefix) 365 num = prefix + num; 366 var valid = parse_js_number(num); 367 if (!isNaN(valid)) { 368 return token("num", valid); 369 } else { 370 parse_error("Invalid syntax: " + num); 371 } 372 }; 373 374 function read_escaped_char() { 375 var ch = next(true); 376 switch (ch) { 377 case "n" : return "\n"; 378 case "r" : return "\r"; 379 case "t" : return "\t"; 380 case "b" : return "\b"; 381 case "v" : return "\v"; 382 case "f" : return "\f"; 383 case "0" : return "\0"; 384 case "x" : return String.fromCharCode(hex_bytes(2)); 385 case "u" : return String.fromCharCode(hex_bytes(4)); 386 default : return ch; 387 } 388 }; 389 390 function hex_bytes(n) { 391 var num = 0; 392 for (; n > 0; --n) { 393 var digit = parseInt(next(true), 16); 394 if (isNaN(digit)) 395 parse_error("Invalid hex-character pattern in string"); 396 num = (num << 4) | digit; 397 } 398 return num; 399 }; 400 401 function read_string() { 402 return with_eof_error("Unterminated string constant", function(){ 403 var quote = next(), ret = ""; 404 for (;;) { 405 var ch = next(true); 406 if (ch == "\\") ch = read_escaped_char(); 407 else if (ch == quote) break; 408 ret += ch; 409 } 410 return token("string", ret); 411 }); 412 }; 413 414 function read_line_comment() { 415 next(); 416 var i = find("\n"), ret; 417 if (i == -1) { 418 ret = S.text.substr(S.pos); 419 S.pos = S.text.length; 420 } else { 421 ret = S.text.substring(S.pos, i); 422 S.pos = i; 423 } 424 return token("comment1", ret, true); 425 }; 426 427 function read_multiline_comment() { 428 next(); 429 return with_eof_error("Unterminated multiline comment", function(){ 430 var i = find("*/", true), 431 text = S.text.substring(S.pos, i), 432 tok = token("comment2", text, true); 433 S.pos = i + 2; 434 S.line += text.split("\n").length - 1; 435 S.newline_before = text.indexOf("\n") >= 0; 436 return tok; 437 }); 438 }; 439 440 function read_regexp() { 441 return with_eof_error("Unterminated regular expression", function(){ 442 var prev_backslash = false, regexp = "", ch, in_class = false; 443 while ((ch = next(true))) if (prev_backslash) { 444 regexp += "\\" + ch; 445 prev_backslash = false; 446 } else if (ch == "[") { 447 in_class = true; 448 regexp += ch; 449 } else if (ch == "]" && in_class) { 450 in_class = false; 451 regexp += ch; 452 } else if (ch == "/" && !in_class) { 453 break; 454 } else if (ch == "\\") { 455 prev_backslash = true; 456 } else { 457 regexp += ch; 458 } 459 var mods = read_while(function(ch){ 460 return HOP(REGEXP_MODIFIERS, ch); 461 }); 462 return token("regexp", [ regexp, mods ]); 463 }); 464 }; 465 466 function read_operator(prefix) { 467 function grow(op) { 468 if (!peek()) return op; 469 var bigger = op + peek(); 470 if (HOP(OPERATORS, bigger)) { 471 next(); 472 return grow(bigger); 473 } else { 474 return op; 475 } 476 }; 477 return token("operator", grow(prefix || next())); 478 }; 479 480 function handle_slash() { 481 next(); 482 var regex_allowed = S.regex_allowed; 483 switch (peek()) { 484 case "/": 485 S.comments_before.push(read_line_comment()); 486 S.regex_allowed = regex_allowed; 487 return next_token(); 488 case "*": 489 S.comments_before.push(read_multiline_comment()); 490 S.regex_allowed = regex_allowed; 491 return next_token(); 492 } 493 return S.regex_allowed ? read_regexp() : read_operator("/"); 494 }; 495 496 function handle_dot() { 497 next(); 498 return is_digit(peek()) 499 ? read_num(".") 500 : token("punc", "."); 501 }; 502 503 function read_word() { 504 var word = read_while(is_identifier_char); 505 return !HOP(KEYWORDS, word) 506 ? token("name", word) 507 : HOP(OPERATORS, word) 508 ? token("operator", word) 509 : HOP(KEYWORDS_ATOM, word) 510 ? token("atom", word) 511 : token("keyword", word); 512 }; 513 514 function with_eof_error(eof_error, cont) { 515 try { 516 return cont(); 517 } catch(ex) { 518 if (ex === EX_EOF) parse_error(eof_error); 519 else throw ex; 520 } 521 }; 522 523 function next_token(force_regexp) { 524 if (force_regexp) 525 return read_regexp(); 526 skip_whitespace(); 527 start_token(); 528 var ch = peek(); 529 if (!ch) return token("eof"); 530 if (is_digit(ch)) return read_num(); 531 if (ch == '"' || ch == "'") return read_string(); 532 if (HOP(PUNC_CHARS, ch)) return token("punc", next()); 533 if (ch == ".") return handle_dot(); 534 if (ch == "/") return handle_slash(); 535 if (HOP(OPERATOR_CHARS, ch)) return read_operator(); 536 if (is_identifier_char(ch)) return read_word(); 537 parse_error("Unexpected character '" + ch + "'"); 538 }; 539 540 next_token.context = function(nc) { 541 if (nc) S = nc; 542 return S; 543 }; 544 545 return next_token; 546 547 }; 548 549 /* -----[ Parser (constants) ]----- */ 550 551 var UNARY_PREFIX = array_to_hash([ 552 "typeof", 553 "void", 554 "delete", 555 "--", 556 "++", 557 "!", 558 "~", 559 "-", 560 "+" 561 ]); 562 563 var UNARY_POSTFIX = array_to_hash([ "--", "++" ]); 564 565 var ASSIGNMENT = (function(a, ret, i){ 566 while (i < a.length) { 567 ret[a[i]] = a[i].substr(0, a[i].length - 1); 568 i++; 569 } 570 return ret; 571 })( 572 ["+=", "-=", "/=", "*=", "%=", ">>=", "<<=", ">>>=", "|=", "^=", "&="], 573 { "=": true }, 574 0 575 ); 576 577 var PRECEDENCE = (function(a, ret){ 578 for (var i = 0, n = 1; i < a.length; ++i, ++n) { 579 var b = a[i]; 580 for (var j = 0; j < b.length; ++j) { 581 ret[b[j]] = n; 582 } 583 } 584 return ret; 585 })( 586 [ 587 ["||"], 588 ["&&"], 589 ["|"], 590 ["^"], 591 ["&"], 592 ["==", "===", "!=", "!=="], 593 ["<", ">", "<=", ">=", "in", "instanceof"], 594 [">>", "<<", ">>>"], 595 ["+", "-"], 596 ["*", "/", "%"] 597 ], 598 {} 599 ); 600 601 var STATEMENTS_WITH_LABELS = array_to_hash([ "for", "do", "while", "switch" ]); 602 603 var ATOMIC_START_TOKEN = array_to_hash([ "atom", "num", "string", "regexp", "name" ]); 604 605 /* -----[ Parser ]----- */ 606 607 function NodeWithToken(str, start, end) { 608 this.name = str; 609 this.start = start; 610 this.end = end; 611 }; 612 613 NodeWithToken.prototype.toString = function() { return this.name; }; 614 615 function parse($TEXT, strict_mode, embed_tokens) { 616 617 var S = { 618 input : typeof $TEXT == "string" ? tokenizer($TEXT, true) : $TEXT, 619 token : null, 620 prev : null, 621 peeked : null, 622 in_function : 0, 623 in_loop : 0, 624 labels : [] 625 }; 626 627 S.token = next(); 628 629 function is(type, value) { 630 return is_token(S.token, type, value); 631 }; 632 633 function peek() { return S.peeked || (S.peeked = S.input()); }; 634 635 function next() { 636 S.prev = S.token; 637 if (S.peeked) { 638 S.token = S.peeked; 639 S.peeked = null; 640 } else { 641 S.token = S.input(); 642 } 643 return S.token; 644 }; 645 646 function prev() { 647 return S.prev; 648 }; 649 650 function croak(msg, line, col, pos) { 651 var ctx = S.input.context(); 652 js_error(msg, 653 line != null ? line : ctx.tokline, 654 col != null ? col : ctx.tokcol, 655 pos != null ? pos : ctx.tokpos); 656 }; 657 658 function token_error(token, msg) { 659 croak(msg, token.line, token.col); 660 }; 661 662 function unexpected(token) { 663 if (token == null) 664 token = S.token; 665 token_error(token, "Unexpected token: " + token.type + " (" + token.value + ")"); 666 }; 667 668 function expect_token(type, val) { 669 if (is(type, val)) { 670 return next(); 671 } 672 token_error(S.token, "Unexpected token " + S.token.type + ", expected " + type); 673 }; 674 675 function expect(punc) { return expect_token("punc", punc); }; 676 677 function can_insert_semicolon() { 678 return !strict_mode && ( 679 S.token.nlb || is("eof") || is("punc", "}") 680 ); 681 }; 682 683 function semicolon() { 684 if (is("punc", ";")) next(); 685 else if (!can_insert_semicolon()) unexpected(); 686 }; 687 688 function as() { 689 return slice(arguments); 690 }; 691 692 function parenthesised() { 693 expect("("); 694 var ex = expression(); 695 expect(")"); 696 return ex; 697 }; 698 699 function add_tokens(str, start, end) { 700 return new NodeWithToken(str, start, end); 701 }; 702 703 var statement = embed_tokens ? function() { 704 var start = S.token; 705 var stmt = $statement(); 706 stmt[0] = add_tokens(stmt[0], start, prev()); 707 return stmt; 708 } : $statement; 709 710 function $statement() { 711 if (is("operator", "/")) { 712 S.peeked = null; 713 S.token = S.input(true); // force regexp 714 } 715 switch (S.token.type) { 716 case "num": 717 case "string": 718 case "regexp": 719 case "operator": 720 case "atom": 721 return simple_statement(); 722 723 case "name": 724 return is_token(peek(), "punc", ":") 725 ? labeled_statement(prog1(S.token.value, next, next)) 726 : simple_statement(); 727 728 case "punc": 729 switch (S.token.value) { 730 case "{": 731 return as("block", block_()); 732 case "[": 733 case "(": 734 return simple_statement(); 735 case ";": 736 next(); 737 return as("block"); 738 default: 739 unexpected(); 740 } 741 742 case "keyword": 743 switch (prog1(S.token.value, next)) { 744 case "break": 745 return break_cont("break"); 746 747 case "continue": 748 return break_cont("continue"); 749 750 case "debugger": 751 semicolon(); 752 return as("debugger"); 753 754 case "do": 755 return (function(body){ 756 expect_token("keyword", "while"); 757 return as("do", prog1(parenthesised, semicolon), body); 758 })(in_loop(statement)); 759 760 case "for": 761 return for_(); 762 763 case "function": 764 return function_(true); 765 766 case "if": 767 return if_(); 768 769 case "return": 770 if (S.in_function == 0) 771 croak("'return' outside of function"); 772 return as("return", 773 is("punc", ";") 774 ? (next(), null) 775 : can_insert_semicolon() 776 ? null 777 : prog1(expression, semicolon)); 778 779 case "switch": 780 return as("switch", parenthesised(), switch_block_()); 781 782 case "throw": 783 return as("throw", prog1(expression, semicolon)); 784 785 case "try": 786 return try_(); 787 788 case "var": 789 return prog1(var_, semicolon); 790 791 case "const": 792 return prog1(const_, semicolon); 793 794 case "while": 795 return as("while", parenthesised(), in_loop(statement)); 796 797 case "with": 798 return as("with", parenthesised(), statement()); 799 800 default: 801 unexpected(); 802 } 803 } 804 }; 805 806 function labeled_statement(label) { 807 S.labels.push(label); 808 var start = S.token, stat = statement(); 809 if (strict_mode && !HOP(STATEMENTS_WITH_LABELS, stat[0])) 810 unexpected(start); 811 S.labels.pop(); 812 return as("label", label, stat); 813 }; 814 815 function simple_statement() { 816 return as("stat", prog1(expression, semicolon)); 817 }; 818 819 function break_cont(type) { 820 var name = is("name") ? S.token.value : null; 821 if (name != null) { 822 next(); 823 if (!member(name, S.labels)) 824 croak("Label " + name + " without matching loop or statement"); 825 } 826 else if (S.in_loop == 0) 827 croak(type + " not inside a loop or switch"); 828 semicolon(); 829 return as(type, name); 830 }; 831 832 function for_() { 833 expect("("); 834 var has_var = is("keyword", "var"); 835 if (has_var) 836 next(); 837 if (is("name") && is_token(peek(), "operator", "in")) { 838 // for (i in foo) 839 var name = S.token.value; 840 next(); next(); 841 var obj = expression(); 842 expect(")"); 843 return as("for-in", has_var, name, obj, in_loop(statement)); 844 } else { 845 // classic for 846 var init = is("punc", ";") ? null : has_var ? var_() : expression(); 847 expect(";"); 848 var test = is("punc", ";") ? null : expression(); 849 expect(";"); 850 var step = is("punc", ")") ? null : expression(); 851 expect(")"); 852 return as("for", init, test, step, in_loop(statement)); 853 } 854 }; 855 856 function function_(in_statement) { 857 var name = is("name") ? prog1(S.token.value, next) : null; 858 if (in_statement && !name) 859 unexpected(); 860 expect("("); 861 return as(in_statement ? "defun" : "function", 862 name, 863 // arguments 864 (function(first, a){ 865 while (!is("punc", ")")) { 866 if (first) first = false; else expect(","); 867 if (!is("name")) unexpected(); 868 a.push(S.token.value); 869 next(); 870 } 871 next(); 872 return a; 873 })(true, []), 874 // body 875 (function(){ 876 ++S.in_function; 877 var loop = S.in_loop; 878 S.in_loop = 0; 879 var a = block_(); 880 --S.in_function; 881 S.in_loop = loop; 882 return a; 883 })()); 884 }; 885 886 function if_() { 887 var cond = parenthesised(), body = statement(), belse; 888 if (is("keyword", "else")) { 889 next(); 890 belse = statement(); 891 } 892 return as("if", cond, body, belse); 893 }; 894 895 function block_() { 896 expect("{"); 897 var a = []; 898 while (!is("punc", "}")) { 899 if (is("eof")) unexpected(); 900 a.push(statement()); 901 } 902 next(); 903 return a; 904 }; 905 906 var switch_block_ = curry(in_loop, function(){ 907 expect("{"); 908 var a = [], cur = null; 909 while (!is("punc", "}")) { 910 if (is("eof")) unexpected(); 911 if (is("keyword", "case")) { 912 next(); 913 cur = []; 914 a.push([ expression(), cur ]); 915 expect(":"); 916 } 917 else if (is("keyword", "default")) { 918 next(); 919 expect(":"); 920 cur = []; 921 a.push([ null, cur ]); 922 } 923 else { 924 if (!cur) unexpected(); 925 cur.push(statement()); 926 } 927 } 928 next(); 929 return a; 930 }); 931 932 function try_() { 933 var body = block_(), bcatch, bfinally; 934 if (is("keyword", "catch")) { 935 next(); 936 expect("("); 937 if (!is("name")) 938 croak("Name expected"); 939 var name = S.token.value; 940 next(); 941 expect(")"); 942 bcatch = [ name, block_() ]; 943 } 944 if (is("keyword", "finally")) { 945 next(); 946 bfinally = block_(); 947 } 948 if (!bcatch && !bfinally) 949 croak("Missing catch/finally blocks"); 950 return as("try", body, bcatch, bfinally); 951 }; 952 953 function vardefs() { 954 var a = []; 955 for (;;) { 956 if (!is("name")) 957 unexpected(); 958 var name = S.token.value; 959 next(); 960 if (is("operator", "=")) { 961 next(); 962 a.push([ name, expression(false) ]); 963 } else { 964 a.push([ name ]); 965 } 966 if (!is("punc", ",")) 967 break; 968 next(); 969 } 970 return a; 971 }; 972 973 function var_() { 974 return as("var", vardefs()); 975 }; 976 977 function const_() { 978 return as("const", vardefs()); 979 }; 980 981 function new_() { 982 var newexp = expr_atom(false), args; 983 if (is("punc", "(")) { 984 next(); 985 args = expr_list(")"); 986 } else { 987 args = []; 988 } 989 return subscripts(as("new", newexp, args), true); 990 }; 991 992 function expr_atom(allow_calls) { 993 if (is("operator", "new")) { 994 next(); 995 return new_(); 996 } 997 if (is("operator") && HOP(UNARY_PREFIX, S.token.value)) { 998 return make_unary("unary-prefix", 999 prog1(S.token.value, next), 1000 expr_atom(allow_calls)); 1001 } 1002 if (is("punc")) { 1003 switch (S.token.value) { 1004 case "(": 1005 next(); 1006 return subscripts(prog1(expression, curry(expect, ")")), allow_calls); 1007 case "[": 1008 next(); 1009 return subscripts(array_(), allow_calls); 1010 case "{": 1011 next(); 1012 return subscripts(object_(), allow_calls); 1013 } 1014 unexpected(); 1015 } 1016 if (is("keyword", "function")) { 1017 next(); 1018 return subscripts(function_(false), allow_calls); 1019 } 1020 if (HOP(ATOMIC_START_TOKEN, S.token.type)) { 1021 var atom = S.token.type == "regexp" 1022 ? as("regexp", S.token.value[0], S.token.value[1]) 1023 : as(S.token.type, S.token.value); 1024 return subscripts(prog1(atom, next), allow_calls); 1025 } 1026 unexpected(); 1027 }; 1028 1029 function expr_list(closing, allow_trailing_comma, allow_empty) { 1030 var first = true, a = []; 1031 while (!is("punc", closing)) { 1032 if (first) first = false; else expect(","); 1033 if (allow_trailing_comma && is("punc", closing)) break; 1034 if (is("punc", ",") && allow_empty) { 1035 a.push([ "atom", "undefined" ]); 1036 } else { 1037 a.push(expression(false)); 1038 } 1039 } 1040 next(); 1041 return a; 1042 }; 1043 1044 function array_() { 1045 return as("array", expr_list("]", !strict_mode, true)); 1046 }; 1047 1048 function object_() { 1049 var first = true, a = []; 1050 while (!is("punc", "}")) { 1051 if (first) first = false; else expect(","); 1052 if (!strict_mode && is("punc", "}")) 1053 // allow trailing comma 1054 break; 1055 var type = S.token.type; 1056 var name = as_property_name(); 1057 if (type == "name" && (name == "get" || name == "set") && !is("punc", ":")) { 1058 a.push([ as_name(), function_(false), name ]); 1059 } else { 1060 expect(":"); 1061 a.push([ name, expression(false) ]); 1062 } 1063 } 1064 next(); 1065 return as("object", a); 1066 }; 1067 1068 function as_property_name() { 1069 switch (S.token.type) { 1070 case "num": 1071 case "string": 1072 return prog1(S.token.value, next); 1073 } 1074 return as_name(); 1075 }; 1076 1077 function as_name() { 1078 switch (S.token.type) { 1079 case "name": 1080 case "operator": 1081 case "keyword": 1082 case "atom": 1083 return prog1(S.token.value, next); 1084 default: 1085 unexpected(); 1086 } 1087 }; 1088 1089 function subscripts(expr, allow_calls) { 1090 if (is("punc", ".")) { 1091 next(); 1092 return subscripts(as("dot", expr, as_name()), allow_calls); 1093 } 1094 if (is("punc", "[")) { 1095 next(); 1096 return subscripts(as("sub", expr, prog1(expression, curry(expect, "]"))), allow_calls); 1097 } 1098 if (allow_calls && is("punc", "(")) { 1099 next(); 1100 return subscripts(as("call", expr, expr_list(")")), true); 1101 } 1102 if (allow_calls && is("operator") && HOP(UNARY_POSTFIX, S.token.value)) { 1103 return prog1(curry(make_unary, "unary-postfix", S.token.value, expr), 1104 next); 1105 } 1106 return expr; 1107 }; 1108 1109 function make_unary(tag, op, expr) { 1110 if ((op == "++" || op == "--") && !is_assignable(expr)) 1111 croak("Invalid use of " + op + " operator"); 1112 return as(tag, op, expr); 1113 }; 1114 1115 function expr_op(left, min_prec) { 1116 var op = is("operator") ? S.token.value : null; 1117 var prec = op != null ? PRECEDENCE[op] : null; 1118 if (prec != null && prec > min_prec) { 1119 next(); 1120 var right = expr_op(expr_atom(true), prec); 1121 return expr_op(as("binary", op, left, right), min_prec); 1122 } 1123 return left; 1124 }; 1125 1126 function expr_ops() { 1127 return expr_op(expr_atom(true), 0); 1128 }; 1129 1130 function maybe_conditional() { 1131 var expr = expr_ops(); 1132 if (is("operator", "?")) { 1133 next(); 1134 var yes = expression(false); 1135 expect(":"); 1136 return as("conditional", expr, yes, expression(false)); 1137 } 1138 return expr; 1139 }; 1140 1141 function is_assignable(expr) { 1142 switch (expr[0]) { 1143 case "dot": 1144 case "sub": 1145 return true; 1146 case "name": 1147 return expr[1] != "this"; 1148 } 1149 }; 1150 1151 function maybe_assign() { 1152 var left = maybe_conditional(), val = S.token.value; 1153 if (is("operator") && HOP(ASSIGNMENT, val)) { 1154 if (is_assignable(left)) { 1155 next(); 1156 return as("assign", ASSIGNMENT[val], left, maybe_assign()); 1157 } 1158 croak("Invalid assignment"); 1159 } 1160 return left; 1161 }; 1162 1163 function expression(commas) { 1164 if (arguments.length == 0) 1165 commas = true; 1166 var expr = maybe_assign(); 1167 if (commas && is("punc", ",")) { 1168 next(); 1169 return as("seq", expr, expression()); 1170 } 1171 return expr; 1172 }; 1173 1174 function in_loop(cont) { 1175 try { 1176 ++S.in_loop; 1177 return cont(); 1178 } finally { 1179 --S.in_loop; 1180 } 1181 }; 1182 1183 return as("toplevel", (function(a){ 1184 while (!is("eof")) 1185 a.push(statement()); 1186 return a; 1187 })([])); 1188 1189 }; 1190 1191 /* -----[ Utilities ]----- */ 1192 1193 function curry(f) { 1194 var args = slice(arguments, 1); 1195 return function() { return f.apply(this, args.concat(slice(arguments))); }; 1196 }; 1197 1198 function prog1(ret) { 1199 if (ret instanceof Function) 1200 ret = ret(); 1201 for (var i = 1, n = arguments.length; --n > 0; ++i) 1202 arguments[i](); 1203 return ret; 1204 }; 1205 1206 function array_to_hash(a) { 1207 var ret = {}; 1208 for (var i = 0; i < a.length; ++i) 1209 ret[a[i]] = true; 1210 return ret; 1211 }; 1212 1213 function slice(a, start) { 1214 return Array.prototype.slice.call(a, start == null ? 0 : start); 1215 }; 1216 1217 function characters(str) { 1218 return str.split(""); 1219 }; 1220 1221 function member(name, array) { 1222 for (var i = array.length; --i >= 0;) 1223 if (array[i] === name) 1224 return true; 1225 return false; 1226 }; 1227 1228 function HOP(obj, prop) { 1229 return Object.prototype.hasOwnProperty.call(obj, prop); 1230 }; 1231 1232 /* -----[ Exports ]----- */ 1233 1234 exports.tokenizer = tokenizer; 1235 exports.parse = parse; 1236 exports.slice = slice; 1237 exports.curry = curry; 1238 exports.member = member; 1239 exports.array_to_hash = array_to_hash; 1240 exports.PRECEDENCE = PRECEDENCE; 1241 exports.KEYWORDS_ATOM = KEYWORDS_ATOM; 1242 exports.RESERVED_WORDS = RESERVED_WORDS; 1243 exports.KEYWORDS = KEYWORDS; 1244 exports.ATOMIC_START_TOKEN = ATOMIC_START_TOKEN; 1245 exports.OPERATORS = OPERATORS; 1246 exports.is_alphanumeric_char = is_alphanumeric_char; 1247 exports.is_identifier_char = is_identifier_char; 1248