1 #!/usr/bin/env python 2 # 3 # Copyright 2012 the V8 project authors. All rights reserved. 4 # Redistribution and use in source and binary forms, with or without 5 # modification, are permitted provided that the following conditions are 6 # met: 7 # 8 # * Redistributions of source code must retain the above copyright 9 # notice, this list of conditions and the following disclaimer. 10 # * Redistributions in binary form must reproduce the above 11 # copyright notice, this list of conditions and the following 12 # disclaimer in the documentation and/or other materials provided 13 # with the distribution. 14 # * Neither the name of Google Inc. nor the names of its 15 # contributors may be used to endorse or promote products derived 16 # from this software without specific prior written permission. 17 # 18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30 # This is a utility for converting JavaScript source code into C-style 31 # char arrays. It is used for embedded JavaScript code in the V8 32 # library. 33 34 import os, re 35 import optparse 36 import jsmin 37 import textwrap 38 39 40 class Error(Exception): 41 def __init__(self, msg): 42 Exception.__init__(self, msg) 43 44 45 def ToCArray(byte_sequence): 46 result = [] 47 for chr in byte_sequence: 48 result.append(str(ord(chr))) 49 joined = ", ".join(result) 50 return textwrap.fill(joined, 80) 51 52 53 def RemoveCommentsAndTrailingWhitespace(lines): 54 lines = re.sub(r'//.*\n', '\n', lines) # end-of-line comments 55 lines = re.sub(re.compile(r'/\*.*?\*/', re.DOTALL), '', lines) # comments. 56 lines = re.sub(r'\s+\n+', '\n', lines) # trailing whitespace 57 return lines 58 59 60 def ReadFile(filename): 61 file = open(filename, "rt") 62 try: 63 lines = file.read() 64 finally: 65 file.close() 66 return lines 67 68 69 EVAL_PATTERN = re.compile(r'\beval\s*\(') 70 WITH_PATTERN = re.compile(r'\bwith\s*\(') 71 INVALID_ERROR_MESSAGE_PATTERN = re.compile( 72 r'Make(?!Generic)\w*Error\(([kA-Z]\w+)') 73 NEW_ERROR_PATTERN = re.compile(r'new \$\w*Error\((?!\))') 74 75 def Validate(lines): 76 # Because of simplified context setup, eval and with is not 77 # allowed in the natives files. 78 if EVAL_PATTERN.search(lines): 79 raise Error("Eval disallowed in natives.") 80 if WITH_PATTERN.search(lines): 81 raise Error("With statements disallowed in natives.") 82 invalid_error = INVALID_ERROR_MESSAGE_PATTERN.search(lines) 83 if invalid_error: 84 raise Error("Unknown error message template '%s'" % invalid_error.group(1)) 85 if NEW_ERROR_PATTERN.search(lines): 86 raise Error("Error constructed without message template.") 87 # Pass lines through unchanged. 88 return lines 89 90 91 def ExpandConstants(lines, constants): 92 for key, value in constants: 93 lines = key.sub(str(value), lines) 94 return lines 95 96 97 def ExpandMacroDefinition(lines, pos, name_pattern, macro, expander): 98 pattern_match = name_pattern.search(lines, pos) 99 while pattern_match is not None: 100 # Scan over the arguments 101 height = 1 102 start = pattern_match.start() 103 end = pattern_match.end() 104 assert lines[end - 1] == '(' 105 last_match = end 106 arg_index = [0] # Wrap state into array, to work around Python "scoping" 107 mapping = { } 108 def add_arg(str): 109 # Remember to expand recursively in the arguments 110 if arg_index[0] >= len(macro.args): 111 lineno = lines.count(os.linesep, 0, start) + 1 112 raise Error('line %s: Too many arguments for macro "%s"' % (lineno, name_pattern.pattern)) 113 replacement = expander(str.strip()) 114 mapping[macro.args[arg_index[0]]] = replacement 115 arg_index[0] += 1 116 while end < len(lines) and height > 0: 117 # We don't count commas at higher nesting levels. 118 if lines[end] == ',' and height == 1: 119 add_arg(lines[last_match:end]) 120 last_match = end + 1 121 elif lines[end] in ['(', '{', '[']: 122 height = height + 1 123 elif lines[end] in [')', '}', ']']: 124 height = height - 1 125 end = end + 1 126 # Remember to add the last match. 127 add_arg(lines[last_match:end-1]) 128 result = macro.expand(mapping) 129 # Replace the occurrence of the macro with the expansion 130 lines = lines[:start] + result + lines[end:] 131 pattern_match = name_pattern.search(lines, start + len(result)) 132 return lines 133 134 def ExpandMacros(lines, macros): 135 # We allow macros to depend on the previously declared macros, but 136 # we don't allow self-dependecies or recursion. 137 for name_pattern, macro in reversed(macros): 138 def expander(s): 139 return ExpandMacros(s, macros) 140 lines = ExpandMacroDefinition(lines, 0, name_pattern, macro, expander) 141 return lines 142 143 class TextMacro: 144 def __init__(self, args, body): 145 self.args = args 146 self.body = body 147 def expand(self, mapping): 148 result = self.body 149 for key, value in mapping.items(): 150 result = result.replace(key, value) 151 return result 152 153 class PythonMacro: 154 def __init__(self, args, fun): 155 self.args = args 156 self.fun = fun 157 def expand(self, mapping): 158 args = [] 159 for arg in self.args: 160 args.append(mapping[arg]) 161 return str(self.fun(*args)) 162 163 CONST_PATTERN = re.compile(r'^define\s+([a-zA-Z0-9_]+)\s*=\s*([^;]*);$') 164 MACRO_PATTERN = re.compile(r'^macro\s+([a-zA-Z0-9_]+)\s*\(([^)]*)\)\s*=\s*([^;]*);$') 165 PYTHON_MACRO_PATTERN = re.compile(r'^python\s+macro\s+([a-zA-Z0-9_]+)\s*\(([^)]*)\)\s*=\s*([^;]*);$') 166 167 168 def ReadMacros(lines): 169 constants = [] 170 macros = [] 171 for line in lines.split('\n'): 172 hash = line.find('#') 173 if hash != -1: line = line[:hash] 174 line = line.strip() 175 if len(line) is 0: continue 176 const_match = CONST_PATTERN.match(line) 177 if const_match: 178 name = const_match.group(1) 179 value = const_match.group(2).strip() 180 constants.append((re.compile("\\b%s\\b" % name), value)) 181 else: 182 macro_match = MACRO_PATTERN.match(line) 183 if macro_match: 184 name = macro_match.group(1) 185 args = [match.strip() for match in macro_match.group(2).split(',')] 186 body = macro_match.group(3).strip() 187 macros.append((re.compile("\\b%s\\(" % name), TextMacro(args, body))) 188 else: 189 python_match = PYTHON_MACRO_PATTERN.match(line) 190 if python_match: 191 name = python_match.group(1) 192 args = [match.strip() for match in python_match.group(2).split(',')] 193 body = python_match.group(3).strip() 194 fun = eval("lambda " + ",".join(args) + ': ' + body) 195 macros.append((re.compile("\\b%s\\(" % name), PythonMacro(args, fun))) 196 else: 197 raise Error("Illegal line: " + line) 198 return (constants, macros) 199 200 201 TEMPLATE_PATTERN = re.compile(r'^\s+T\(([A-Z][a-zA-Z0-9]*),') 202 203 def ReadMessageTemplates(lines): 204 templates = [] 205 index = 0 206 for line in lines.split('\n'): 207 template_match = TEMPLATE_PATTERN.match(line) 208 if template_match: 209 name = "k%s" % template_match.group(1) 210 value = index 211 index = index + 1 212 templates.append((re.compile("\\b%s\\b" % name), value)) 213 return templates 214 215 INLINE_MACRO_PATTERN = re.compile(r'macro\s+([a-zA-Z0-9_]+)\s*\(([^)]*)\)\s*\n') 216 INLINE_MACRO_END_PATTERN = re.compile(r'endmacro\s*\n') 217 218 def ExpandInlineMacros(lines): 219 pos = 0 220 while True: 221 macro_match = INLINE_MACRO_PATTERN.search(lines, pos) 222 if macro_match is None: 223 # no more macros 224 return lines 225 name = macro_match.group(1) 226 args = [match.strip() for match in macro_match.group(2).split(',')] 227 end_macro_match = INLINE_MACRO_END_PATTERN.search(lines, macro_match.end()); 228 if end_macro_match is None: 229 raise Error("Macro %s unclosed" % name) 230 body = lines[macro_match.end():end_macro_match.start()] 231 232 # remove macro definition 233 lines = lines[:macro_match.start()] + lines[end_macro_match.end():] 234 name_pattern = re.compile("\\b%s\\(" % name) 235 macro = TextMacro(args, body) 236 237 # advance position to where the macro defintion was 238 pos = macro_match.start() 239 240 def non_expander(s): 241 return s 242 lines = ExpandMacroDefinition(lines, pos, name_pattern, macro, non_expander) 243 244 245 INLINE_CONSTANT_PATTERN = re.compile(r'define\s+([a-zA-Z0-9_]+)\s*=\s*([^;\n]+);\n') 246 247 def ExpandInlineConstants(lines): 248 pos = 0 249 while True: 250 const_match = INLINE_CONSTANT_PATTERN.search(lines, pos) 251 if const_match is None: 252 # no more constants 253 return lines 254 name = const_match.group(1) 255 replacement = const_match.group(2) 256 name_pattern = re.compile("\\b%s\\b" % name) 257 258 # remove constant definition and replace 259 lines = (lines[:const_match.start()] + 260 re.sub(name_pattern, replacement, lines[const_match.end():])) 261 262 # advance position to where the constant defintion was 263 pos = const_match.start() 264 265 266 HEADER_TEMPLATE = """\ 267 // Copyright 2011 Google Inc. All Rights Reserved. 268 269 // This file was generated from .js source files by GYP. If you 270 // want to make changes to this file you should either change the 271 // javascript source files or the GYP script. 272 273 #include "src/v8.h" 274 #include "src/snapshot/natives.h" 275 #include "src/utils.h" 276 277 namespace v8 { 278 namespace internal { 279 280 %(sources_declaration)s\ 281 282 template <> 283 int NativesCollection<%(type)s>::GetBuiltinsCount() { 284 return %(builtin_count)i; 285 } 286 287 template <> 288 int NativesCollection<%(type)s>::GetDebuggerCount() { 289 return %(debugger_count)i; 290 } 291 292 template <> 293 int NativesCollection<%(type)s>::GetIndex(const char* name) { 294 %(get_index_cases)s\ 295 return -1; 296 } 297 298 template <> 299 Vector<const char> NativesCollection<%(type)s>::GetScriptSource(int index) { 300 %(get_script_source_cases)s\ 301 return Vector<const char>("", 0); 302 } 303 304 template <> 305 Vector<const char> NativesCollection<%(type)s>::GetScriptName(int index) { 306 %(get_script_name_cases)s\ 307 return Vector<const char>("", 0); 308 } 309 310 template <> 311 Vector<const char> NativesCollection<%(type)s>::GetScriptsSource() { 312 return Vector<const char>(sources, %(total_length)i); 313 } 314 } // internal 315 } // v8 316 """ 317 318 SOURCES_DECLARATION = """\ 319 static const char sources[] = { %s }; 320 """ 321 322 323 GET_INDEX_CASE = """\ 324 if (strcmp(name, "%(id)s") == 0) return %(i)i; 325 """ 326 327 328 GET_SCRIPT_SOURCE_CASE = """\ 329 if (index == %(i)i) return Vector<const char>(sources + %(offset)i, %(source_length)i); 330 """ 331 332 333 GET_SCRIPT_NAME_CASE = """\ 334 if (index == %(i)i) return Vector<const char>("%(name)s", %(length)i); 335 """ 336 337 338 def BuildFilterChain(macro_filename, message_template_file): 339 """Build the chain of filter functions to be applied to the sources. 340 341 Args: 342 macro_filename: Name of the macro file, if any. 343 344 Returns: 345 A function (string -> string) that processes a source file. 346 """ 347 filter_chain = [] 348 349 if macro_filename: 350 (consts, macros) = ReadMacros(ReadFile(macro_filename)) 351 filter_chain.append(lambda l: ExpandMacros(l, macros)) 352 filter_chain.append(lambda l: ExpandConstants(l, consts)) 353 354 if message_template_file: 355 message_templates = ReadMessageTemplates(ReadFile(message_template_file)) 356 filter_chain.append(lambda l: ExpandConstants(l, message_templates)) 357 358 filter_chain.extend([ 359 RemoveCommentsAndTrailingWhitespace, 360 ExpandInlineMacros, 361 ExpandInlineConstants, 362 Validate, 363 jsmin.JavaScriptMinifier().JSMinify 364 ]) 365 366 def chain(f1, f2): 367 return lambda x: f2(f1(x)) 368 369 return reduce(chain, filter_chain) 370 371 def BuildExtraFilterChain(): 372 return lambda x: RemoveCommentsAndTrailingWhitespace(Validate(x)) 373 374 class Sources: 375 def __init__(self): 376 self.names = [] 377 self.modules = [] 378 self.is_debugger_id = [] 379 380 381 def IsDebuggerFile(filename): 382 return "debug" in filename 383 384 def IsMacroFile(filename): 385 return filename.endswith("macros.py") 386 387 def IsMessageTemplateFile(filename): 388 return filename.endswith("messages.h") 389 390 391 def PrepareSources(source_files, native_type, emit_js): 392 """Read, prepare and assemble the list of source files. 393 394 Args: 395 source_files: List of JavaScript-ish source files. A file named macros.py 396 will be treated as a list of macros. 397 native_type: String corresponding to a NativeType enum value, allowing us 398 to treat different types of sources differently. 399 emit_js: True if we should skip the byte conversion and just leave the 400 sources as JS strings. 401 402 Returns: 403 An instance of Sources. 404 """ 405 macro_file = None 406 macro_files = filter(IsMacroFile, source_files) 407 assert len(macro_files) in [0, 1] 408 if macro_files: 409 source_files.remove(macro_files[0]) 410 macro_file = macro_files[0] 411 412 message_template_file = None 413 message_template_files = filter(IsMessageTemplateFile, source_files) 414 assert len(message_template_files) in [0, 1] 415 if message_template_files: 416 source_files.remove(message_template_files[0]) 417 message_template_file = message_template_files[0] 418 419 filters = None 420 if native_type in ("EXTRAS", "EXPERIMENTAL_EXTRAS"): 421 filters = BuildExtraFilterChain() 422 else: 423 filters = BuildFilterChain(macro_file, message_template_file) 424 425 # Sort 'debugger' sources first. 426 source_files = sorted(source_files, 427 lambda l,r: IsDebuggerFile(r) - IsDebuggerFile(l)) 428 429 source_files_and_contents = [(f, ReadFile(f)) for f in source_files] 430 431 # Have a single not-quite-empty source file if there are none present; 432 # otherwise you get errors trying to compile an empty C++ array. 433 # It cannot be empty (or whitespace, which gets trimmed to empty), as 434 # the deserialization code assumes each file is nonempty. 435 if not source_files_and_contents: 436 source_files_and_contents = [("dummy.js", "(function() {})")] 437 438 result = Sources() 439 440 for (source, contents) in source_files_and_contents: 441 try: 442 lines = filters(contents) 443 except Error as e: 444 raise Error("In file %s:\n%s" % (source, str(e))) 445 446 result.modules.append(lines) 447 448 is_debugger = IsDebuggerFile(source) 449 result.is_debugger_id.append(is_debugger) 450 451 name = os.path.basename(source)[:-3] 452 result.names.append(name) 453 454 return result 455 456 457 def BuildMetadata(sources, source_bytes, native_type): 458 """Build the meta data required to generate a libaries file. 459 460 Args: 461 sources: A Sources instance with the prepared sources. 462 source_bytes: A list of source bytes. 463 (The concatenation of all sources; might be compressed.) 464 native_type: The parameter for the NativesCollection template. 465 466 Returns: 467 A dictionary for use with HEADER_TEMPLATE. 468 """ 469 total_length = len(source_bytes) 470 raw_sources = "".join(sources.modules) 471 472 # The sources are expected to be ASCII-only. 473 assert not filter(lambda value: ord(value) >= 128, raw_sources) 474 475 # Loop over modules and build up indices into the source blob: 476 get_index_cases = [] 477 get_script_name_cases = [] 478 get_script_source_cases = [] 479 offset = 0 480 for i in xrange(len(sources.modules)): 481 native_name = "native %s.js" % sources.names[i] 482 d = { 483 "i": i, 484 "id": sources.names[i], 485 "name": native_name, 486 "length": len(native_name), 487 "offset": offset, 488 "source_length": len(sources.modules[i]), 489 } 490 get_index_cases.append(GET_INDEX_CASE % d) 491 get_script_name_cases.append(GET_SCRIPT_NAME_CASE % d) 492 get_script_source_cases.append(GET_SCRIPT_SOURCE_CASE % d) 493 offset += len(sources.modules[i]) 494 assert offset == len(raw_sources) 495 496 metadata = { 497 "builtin_count": len(sources.modules), 498 "debugger_count": sum(sources.is_debugger_id), 499 "sources_declaration": SOURCES_DECLARATION % ToCArray(source_bytes), 500 "total_length": total_length, 501 "get_index_cases": "".join(get_index_cases), 502 "get_script_source_cases": "".join(get_script_source_cases), 503 "get_script_name_cases": "".join(get_script_name_cases), 504 "type": native_type, 505 } 506 return metadata 507 508 509 def PutInt(blob_file, value): 510 assert(value >= 0 and value < (1 << 28)) 511 if (value < 1 << 6): 512 size = 1 513 elif (value < 1 << 14): 514 size = 2 515 elif (value < 1 << 22): 516 size = 3 517 else: 518 size = 4 519 value_with_length = (value << 2) | (size - 1) 520 521 byte_sequence = bytearray() 522 for i in xrange(size): 523 byte_sequence.append(value_with_length & 255) 524 value_with_length >>= 8; 525 blob_file.write(byte_sequence) 526 527 528 def PutStr(blob_file, value): 529 PutInt(blob_file, len(value)); 530 blob_file.write(value); 531 532 533 def WriteStartupBlob(sources, startup_blob): 534 """Write a startup blob, as expected by V8 Initialize ... 535 TODO(vogelheim): Add proper method name. 536 537 Args: 538 sources: A Sources instance with the prepared sources. 539 startup_blob_file: Name of file to write the blob to. 540 """ 541 output = open(startup_blob, "wb") 542 543 debug_sources = sum(sources.is_debugger_id); 544 PutInt(output, debug_sources) 545 for i in xrange(debug_sources): 546 PutStr(output, sources.names[i]); 547 PutStr(output, sources.modules[i]); 548 549 PutInt(output, len(sources.names) - debug_sources) 550 for i in xrange(debug_sources, len(sources.names)): 551 PutStr(output, sources.names[i]); 552 PutStr(output, sources.modules[i]); 553 554 output.close() 555 556 557 def JS2C(sources, target, native_type, raw_file, startup_blob, emit_js): 558 prepared_sources = PrepareSources(sources, native_type, emit_js) 559 sources_output = "".join(prepared_sources.modules) 560 metadata = BuildMetadata(prepared_sources, sources_output, native_type) 561 562 # Optionally emit raw file. 563 if raw_file: 564 output = open(raw_file, "w") 565 output.write(sources_output) 566 output.close() 567 568 if startup_blob: 569 WriteStartupBlob(prepared_sources, startup_blob) 570 571 # Emit resulting source file. 572 output = open(target, "w") 573 if emit_js: 574 output.write(sources_output) 575 else: 576 output.write(HEADER_TEMPLATE % metadata) 577 output.close() 578 579 580 def main(): 581 parser = optparse.OptionParser() 582 parser.add_option("--raw", 583 help="file to write the processed sources array to.") 584 parser.add_option("--startup_blob", 585 help="file to write the startup blob to.") 586 parser.add_option("--js", 587 help="writes a JS file output instead of a C file", 588 action="store_true", default=False, dest='js') 589 parser.add_option("--nojs", action="store_false", default=False, dest='js') 590 parser.set_usage("""js2c out.cc type sources.js ... 591 out.cc: C code to be generated. 592 type: type parameter for NativesCollection template. 593 sources.js: JS internal sources or macros.py.""") 594 (options, args) = parser.parse_args() 595 JS2C(args[2:], 596 args[0], 597 args[1], 598 options.raw, 599 options.startup_blob, 600 options.js) 601 602 603 if __name__ == "__main__": 604 main() 605