Home | History | Annotate | Download | only in tools
      1 #!/usr/bin/env python
      2 #
      3 # Copyright 2012 the V8 project authors. All rights reserved.
      4 # Redistribution and use in source and binary forms, with or without
      5 # modification, are permitted provided that the following conditions are
      6 # met:
      7 #
      8 #     * Redistributions of source code must retain the above copyright
      9 #       notice, this list of conditions and the following disclaimer.
     10 #     * Redistributions in binary form must reproduce the above
     11 #       copyright notice, this list of conditions and the following
     12 #       disclaimer in the documentation and/or other materials provided
     13 #       with the distribution.
     14 #     * Neither the name of Google Inc. nor the names of its
     15 #       contributors may be used to endorse or promote products derived
     16 #       from this software without specific prior written permission.
     17 #
     18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 
     30 # This is a utility for converting JavaScript source code into C-style
     31 # char arrays. It is used for embedded JavaScript code in the V8
     32 # library.
     33 
     34 import os, re
     35 import optparse
     36 import jsmin
     37 import textwrap
     38 
     39 
     40 class Error(Exception):
     41   def __init__(self, msg):
     42     Exception.__init__(self, msg)
     43 
     44 
     45 def ToCArray(byte_sequence):
     46   result = []
     47   for chr in byte_sequence:
     48     result.append(str(ord(chr)))
     49   joined = ", ".join(result)
     50   return textwrap.fill(joined, 80)
     51 
     52 
     53 def RemoveCommentsAndTrailingWhitespace(lines):
     54   lines = re.sub(r'//.*\n', '\n', lines) # end-of-line comments
     55   lines = re.sub(re.compile(r'/\*.*?\*/', re.DOTALL), '', lines) # comments.
     56   lines = re.sub(r'\s+\n+', '\n', lines) # trailing whitespace
     57   return lines
     58 
     59 
     60 def ReadFile(filename):
     61   file = open(filename, "rt")
     62   try:
     63     lines = file.read()
     64   finally:
     65     file.close()
     66   return lines
     67 
     68 
     69 EVAL_PATTERN = re.compile(r'\beval\s*\(')
     70 WITH_PATTERN = re.compile(r'\bwith\s*\(')
     71 INVALID_ERROR_MESSAGE_PATTERN = re.compile(
     72     r'Make(?!Generic)\w*Error\(([kA-Z]\w+)')
     73 NEW_ERROR_PATTERN = re.compile(r'new \$\w*Error\((?!\))')
     74 
     75 def Validate(lines):
     76   # Because of simplified context setup, eval and with is not
     77   # allowed in the natives files.
     78   if EVAL_PATTERN.search(lines):
     79     raise Error("Eval disallowed in natives.")
     80   if WITH_PATTERN.search(lines):
     81     raise Error("With statements disallowed in natives.")
     82   invalid_error = INVALID_ERROR_MESSAGE_PATTERN.search(lines)
     83   if invalid_error:
     84     raise Error("Unknown error message template '%s'" % invalid_error.group(1))
     85   if NEW_ERROR_PATTERN.search(lines):
     86     raise Error("Error constructed without message template.")
     87   # Pass lines through unchanged.
     88   return lines
     89 
     90 
     91 def ExpandConstants(lines, constants):
     92   for key, value in constants:
     93     lines = key.sub(str(value), lines)
     94   return lines
     95 
     96 
     97 def ExpandMacroDefinition(lines, pos, name_pattern, macro, expander):
     98   pattern_match = name_pattern.search(lines, pos)
     99   while pattern_match is not None:
    100     # Scan over the arguments
    101     height = 1
    102     start = pattern_match.start()
    103     end = pattern_match.end()
    104     assert lines[end - 1] == '('
    105     last_match = end
    106     arg_index = [0]  # Wrap state into array, to work around Python "scoping"
    107     mapping = { }
    108     def add_arg(str):
    109       # Remember to expand recursively in the arguments
    110       if arg_index[0] >= len(macro.args):
    111         lineno = lines.count(os.linesep, 0, start) + 1
    112         raise Error('line %s: Too many arguments for macro "%s"' % (lineno, name_pattern.pattern))
    113       replacement = expander(str.strip())
    114       mapping[macro.args[arg_index[0]]] = replacement
    115       arg_index[0] += 1
    116     while end < len(lines) and height > 0:
    117       # We don't count commas at higher nesting levels.
    118       if lines[end] == ',' and height == 1:
    119         add_arg(lines[last_match:end])
    120         last_match = end + 1
    121       elif lines[end] in ['(', '{', '[']:
    122         height = height + 1
    123       elif lines[end] in [')', '}', ']']:
    124         height = height - 1
    125       end = end + 1
    126     # Remember to add the last match.
    127     add_arg(lines[last_match:end-1])
    128     result = macro.expand(mapping)
    129     # Replace the occurrence of the macro with the expansion
    130     lines = lines[:start] + result + lines[end:]
    131     pattern_match = name_pattern.search(lines, start + len(result))
    132   return lines
    133 
    134 def ExpandMacros(lines, macros):
    135   # We allow macros to depend on the previously declared macros, but
    136   # we don't allow self-dependecies or recursion.
    137   for name_pattern, macro in reversed(macros):
    138     def expander(s):
    139       return ExpandMacros(s, macros)
    140     lines = ExpandMacroDefinition(lines, 0, name_pattern, macro, expander)
    141   return lines
    142 
    143 class TextMacro:
    144   def __init__(self, args, body):
    145     self.args = args
    146     self.body = body
    147   def expand(self, mapping):
    148     # Keys could be substrings of earlier values. To avoid unintended
    149     # clobbering, apply all replacements simultaneously.
    150     any_key_pattern = "|".join(re.escape(k) for k in mapping.iterkeys())
    151     def replace(match):
    152       return mapping[match.group(0)]
    153     return re.sub(any_key_pattern, replace, self.body)
    154 
    155 class PythonMacro:
    156   def __init__(self, args, fun):
    157     self.args = args
    158     self.fun = fun
    159   def expand(self, mapping):
    160     args = []
    161     for arg in self.args:
    162       args.append(mapping[arg])
    163     return str(self.fun(*args))
    164 
    165 CONST_PATTERN = re.compile(r'^define\s+([a-zA-Z0-9_]+)\s*=\s*([^;]*);$')
    166 MACRO_PATTERN = re.compile(r'^macro\s+([a-zA-Z0-9_]+)\s*\(([^)]*)\)\s*=\s*([^;]*);$')
    167 PYTHON_MACRO_PATTERN = re.compile(r'^python\s+macro\s+([a-zA-Z0-9_]+)\s*\(([^)]*)\)\s*=\s*([^;]*);$')
    168 
    169 
    170 def ReadMacros(lines):
    171   constants = []
    172   macros = []
    173   for line in lines.split('\n'):
    174     hash = line.find('#')
    175     if hash != -1: line = line[:hash]
    176     line = line.strip()
    177     if len(line) is 0: continue
    178     const_match = CONST_PATTERN.match(line)
    179     if const_match:
    180       name = const_match.group(1)
    181       value = const_match.group(2).strip()
    182       constants.append((re.compile("\\b%s\\b" % name), value))
    183     else:
    184       macro_match = MACRO_PATTERN.match(line)
    185       if macro_match:
    186         name = macro_match.group(1)
    187         args = [match.strip() for match in macro_match.group(2).split(',')]
    188         body = macro_match.group(3).strip()
    189         macros.append((re.compile("\\b%s\\(" % name), TextMacro(args, body)))
    190       else:
    191         python_match = PYTHON_MACRO_PATTERN.match(line)
    192         if python_match:
    193           name = python_match.group(1)
    194           args = [match.strip() for match in python_match.group(2).split(',')]
    195           body = python_match.group(3).strip()
    196           fun = eval("lambda " + ",".join(args) + ': ' + body)
    197           macros.append((re.compile("\\b%s\\(" % name), PythonMacro(args, fun)))
    198         else:
    199           raise Error("Illegal line: " + line)
    200   return (constants, macros)
    201 
    202 
    203 TEMPLATE_PATTERN = re.compile(r'^\s+T\(([A-Z][a-zA-Z0-9]*),')
    204 
    205 def ReadMessageTemplates(lines):
    206   templates = []
    207   index = 0
    208   for line in lines.split('\n'):
    209     template_match = TEMPLATE_PATTERN.match(line)
    210     if template_match:
    211       name = "k%s" % template_match.group(1)
    212       value = index
    213       index = index + 1
    214       templates.append((re.compile("\\b%s\\b" % name), value))
    215   return templates
    216 
    217 INLINE_MACRO_PATTERN = re.compile(r'macro\s+([a-zA-Z0-9_]+)\s*\(([^)]*)\)\s*\n')
    218 INLINE_MACRO_END_PATTERN = re.compile(r'endmacro\s*\n')
    219 
    220 def ExpandInlineMacros(lines):
    221   pos = 0
    222   while True:
    223     macro_match = INLINE_MACRO_PATTERN.search(lines, pos)
    224     if macro_match is None:
    225       # no more macros
    226       return lines
    227     name = macro_match.group(1)
    228     args = [match.strip() for match in macro_match.group(2).split(',')]
    229     end_macro_match = INLINE_MACRO_END_PATTERN.search(lines, macro_match.end());
    230     if end_macro_match is None:
    231       raise Error("Macro %s unclosed" % name)
    232     body = lines[macro_match.end():end_macro_match.start()]
    233 
    234     # remove macro definition
    235     lines = lines[:macro_match.start()] + lines[end_macro_match.end():]
    236     name_pattern = re.compile("\\b%s\\(" % name)
    237     macro = TextMacro(args, body)
    238 
    239     # advance position to where the macro defintion was
    240     pos = macro_match.start()
    241 
    242     def non_expander(s):
    243       return s
    244     lines = ExpandMacroDefinition(lines, pos, name_pattern, macro, non_expander)
    245 
    246 
    247 INLINE_CONSTANT_PATTERN = re.compile(r'define\s+([a-zA-Z0-9_]+)\s*=\s*([^;\n]+);\n')
    248 
    249 def ExpandInlineConstants(lines):
    250   pos = 0
    251   while True:
    252     const_match = INLINE_CONSTANT_PATTERN.search(lines, pos)
    253     if const_match is None:
    254       # no more constants
    255       return lines
    256     name = const_match.group(1)
    257     replacement = const_match.group(2)
    258     name_pattern = re.compile("\\b%s\\b" % name)
    259 
    260     # remove constant definition and replace
    261     lines = (lines[:const_match.start()] +
    262              re.sub(name_pattern, replacement, lines[const_match.end():]))
    263 
    264     # advance position to where the constant defintion was
    265     pos = const_match.start()
    266 
    267 
    268 HEADER_TEMPLATE = """\
    269 // Copyright 2011 Google Inc. All Rights Reserved.
    270 
    271 // This file was generated from .js source files by GYP.  If you
    272 // want to make changes to this file you should either change the
    273 // javascript source files or the GYP script.
    274 
    275 #include "src/v8.h"
    276 #include "src/snapshot/natives.h"
    277 #include "src/utils.h"
    278 
    279 namespace v8 {
    280 namespace internal {
    281 
    282 %(sources_declaration)s\
    283 
    284   template <>
    285   int NativesCollection<%(type)s>::GetBuiltinsCount() {
    286     return %(builtin_count)i;
    287   }
    288 
    289   template <>
    290   int NativesCollection<%(type)s>::GetDebuggerCount() {
    291     return %(debugger_count)i;
    292   }
    293 
    294   template <>
    295   int NativesCollection<%(type)s>::GetIndex(const char* name) {
    296 %(get_index_cases)s\
    297     return -1;
    298   }
    299 
    300   template <>
    301   Vector<const char> NativesCollection<%(type)s>::GetScriptSource(int index) {
    302 %(get_script_source_cases)s\
    303     return Vector<const char>("", 0);
    304   }
    305 
    306   template <>
    307   Vector<const char> NativesCollection<%(type)s>::GetScriptName(int index) {
    308 %(get_script_name_cases)s\
    309     return Vector<const char>("", 0);
    310   }
    311 
    312   template <>
    313   Vector<const char> NativesCollection<%(type)s>::GetScriptsSource() {
    314     return Vector<const char>(sources, %(total_length)i);
    315   }
    316 }  // internal
    317 }  // v8
    318 """
    319 
    320 SOURCES_DECLARATION = """\
    321   static const char sources[] = { %s };
    322 """
    323 
    324 
    325 GET_INDEX_CASE = """\
    326     if (strcmp(name, "%(id)s") == 0) return %(i)i;
    327 """
    328 
    329 
    330 GET_SCRIPT_SOURCE_CASE = """\
    331     if (index == %(i)i) return Vector<const char>(sources + %(offset)i, %(source_length)i);
    332 """
    333 
    334 
    335 GET_SCRIPT_NAME_CASE = """\
    336     if (index == %(i)i) return Vector<const char>("%(name)s", %(length)i);
    337 """
    338 
    339 
    340 def BuildFilterChain(macro_filename, message_template_file):
    341   """Build the chain of filter functions to be applied to the sources.
    342 
    343   Args:
    344     macro_filename: Name of the macro file, if any.
    345 
    346   Returns:
    347     A function (string -> string) that processes a source file.
    348   """
    349   filter_chain = []
    350 
    351   if macro_filename:
    352     (consts, macros) = ReadMacros(ReadFile(macro_filename))
    353     filter_chain.append(lambda l: ExpandMacros(l, macros))
    354     filter_chain.append(lambda l: ExpandConstants(l, consts))
    355 
    356   if message_template_file:
    357     message_templates = ReadMessageTemplates(ReadFile(message_template_file))
    358     filter_chain.append(lambda l: ExpandConstants(l, message_templates))
    359 
    360   filter_chain.extend([
    361     RemoveCommentsAndTrailingWhitespace,
    362     ExpandInlineMacros,
    363     ExpandInlineConstants,
    364     Validate,
    365     jsmin.JavaScriptMinifier().JSMinify
    366   ])
    367 
    368   def chain(f1, f2):
    369     return lambda x: f2(f1(x))
    370 
    371   return reduce(chain, filter_chain)
    372 
    373 def BuildExtraFilterChain():
    374   return lambda x: RemoveCommentsAndTrailingWhitespace(Validate(x))
    375 
    376 class Sources:
    377   def __init__(self):
    378     self.names = []
    379     self.modules = []
    380     self.is_debugger_id = []
    381 
    382 
    383 def IsDebuggerFile(filename):
    384   return "debug" in filename
    385 
    386 def IsMacroFile(filename):
    387   return filename.endswith("macros.py")
    388 
    389 def IsMessageTemplateFile(filename):
    390   return filename.endswith("messages.h")
    391 
    392 
    393 def PrepareSources(source_files, native_type, emit_js):
    394   """Read, prepare and assemble the list of source files.
    395 
    396   Args:
    397     source_files: List of JavaScript-ish source files. A file named macros.py
    398         will be treated as a list of macros.
    399     native_type: String corresponding to a NativeType enum value, allowing us
    400         to treat different types of sources differently.
    401     emit_js: True if we should skip the byte conversion and just leave the
    402         sources as JS strings.
    403 
    404   Returns:
    405     An instance of Sources.
    406   """
    407   macro_file = None
    408   macro_files = filter(IsMacroFile, source_files)
    409   assert len(macro_files) in [0, 1]
    410   if macro_files:
    411     source_files.remove(macro_files[0])
    412     macro_file = macro_files[0]
    413 
    414   message_template_file = None
    415   message_template_files = filter(IsMessageTemplateFile, source_files)
    416   assert len(message_template_files) in [0, 1]
    417   if message_template_files:
    418     source_files.remove(message_template_files[0])
    419     message_template_file = message_template_files[0]
    420 
    421   filters = None
    422   if native_type in ("EXTRAS", "EXPERIMENTAL_EXTRAS"):
    423     filters = BuildExtraFilterChain()
    424   else:
    425     filters = BuildFilterChain(macro_file, message_template_file)
    426 
    427   # Sort 'debugger' sources first.
    428   source_files = sorted(source_files,
    429                         lambda l,r: IsDebuggerFile(r) - IsDebuggerFile(l))
    430 
    431   source_files_and_contents = [(f, ReadFile(f)) for f in source_files]
    432 
    433   # Have a single not-quite-empty source file if there are none present;
    434   # otherwise you get errors trying to compile an empty C++ array.
    435   # It cannot be empty (or whitespace, which gets trimmed to empty), as
    436   # the deserialization code assumes each file is nonempty.
    437   if not source_files_and_contents:
    438     source_files_and_contents = [("dummy.js", "(function() {})")]
    439 
    440   result = Sources()
    441 
    442   for (source, contents) in source_files_and_contents:
    443     try:
    444       lines = filters(contents)
    445     except Error as e:
    446       raise Error("In file %s:\n%s" % (source, str(e)))
    447 
    448     result.modules.append(lines)
    449 
    450     is_debugger = IsDebuggerFile(source)
    451     result.is_debugger_id.append(is_debugger)
    452 
    453     name = os.path.basename(source)[:-3]
    454     result.names.append(name)
    455 
    456   return result
    457 
    458 
    459 def BuildMetadata(sources, source_bytes, native_type):
    460   """Build the meta data required to generate a libaries file.
    461 
    462   Args:
    463     sources: A Sources instance with the prepared sources.
    464     source_bytes: A list of source bytes.
    465         (The concatenation of all sources; might be compressed.)
    466     native_type: The parameter for the NativesCollection template.
    467 
    468   Returns:
    469     A dictionary for use with HEADER_TEMPLATE.
    470   """
    471   total_length = len(source_bytes)
    472   raw_sources = "".join(sources.modules)
    473 
    474   # The sources are expected to be ASCII-only.
    475   assert not filter(lambda value: ord(value) >= 128, raw_sources)
    476 
    477   # Loop over modules and build up indices into the source blob:
    478   get_index_cases = []
    479   get_script_name_cases = []
    480   get_script_source_cases = []
    481   offset = 0
    482   for i in xrange(len(sources.modules)):
    483     native_name = "native %s.js" % sources.names[i]
    484     d = {
    485         "i": i,
    486         "id": sources.names[i],
    487         "name": native_name,
    488         "length": len(native_name),
    489         "offset": offset,
    490         "source_length": len(sources.modules[i]),
    491     }
    492     get_index_cases.append(GET_INDEX_CASE % d)
    493     get_script_name_cases.append(GET_SCRIPT_NAME_CASE % d)
    494     get_script_source_cases.append(GET_SCRIPT_SOURCE_CASE % d)
    495     offset += len(sources.modules[i])
    496   assert offset == len(raw_sources)
    497 
    498   metadata = {
    499     "builtin_count": len(sources.modules),
    500     "debugger_count": sum(sources.is_debugger_id),
    501     "sources_declaration": SOURCES_DECLARATION % ToCArray(source_bytes),
    502     "total_length": total_length,
    503     "get_index_cases": "".join(get_index_cases),
    504     "get_script_source_cases": "".join(get_script_source_cases),
    505     "get_script_name_cases": "".join(get_script_name_cases),
    506     "type": native_type,
    507   }
    508   return metadata
    509 
    510 
    511 def PutInt(blob_file, value):
    512   assert(value >= 0 and value < (1 << 28))
    513   if (value < 1 << 6):
    514     size = 1
    515   elif (value < 1 << 14):
    516     size = 2
    517   elif (value < 1 << 22):
    518     size = 3
    519   else:
    520     size = 4
    521   value_with_length = (value << 2) | (size - 1)
    522 
    523   byte_sequence = bytearray()
    524   for i in xrange(size):
    525     byte_sequence.append(value_with_length & 255)
    526     value_with_length >>= 8;
    527   blob_file.write(byte_sequence)
    528 
    529 
    530 def PutStr(blob_file, value):
    531   PutInt(blob_file, len(value));
    532   blob_file.write(value);
    533 
    534 
    535 def WriteStartupBlob(sources, startup_blob):
    536   """Write a startup blob, as expected by V8 Initialize ...
    537     TODO(vogelheim): Add proper method name.
    538 
    539   Args:
    540     sources: A Sources instance with the prepared sources.
    541     startup_blob_file: Name of file to write the blob to.
    542   """
    543   output = open(startup_blob, "wb")
    544 
    545   debug_sources = sum(sources.is_debugger_id);
    546   PutInt(output, debug_sources)
    547   for i in xrange(debug_sources):
    548     PutStr(output, sources.names[i]);
    549     PutStr(output, sources.modules[i]);
    550 
    551   PutInt(output, len(sources.names) - debug_sources)
    552   for i in xrange(debug_sources, len(sources.names)):
    553     PutStr(output, sources.names[i]);
    554     PutStr(output, sources.modules[i]);
    555 
    556   output.close()
    557 
    558 
    559 def JS2C(sources, target, native_type, raw_file, startup_blob, emit_js):
    560   prepared_sources = PrepareSources(sources, native_type, emit_js)
    561   sources_output = "".join(prepared_sources.modules)
    562   metadata = BuildMetadata(prepared_sources, sources_output, native_type)
    563 
    564   # Optionally emit raw file.
    565   if raw_file:
    566     output = open(raw_file, "w")
    567     output.write(sources_output)
    568     output.close()
    569 
    570   if startup_blob:
    571     WriteStartupBlob(prepared_sources, startup_blob)
    572 
    573   # Emit resulting source file.
    574   output = open(target, "w")
    575   if emit_js:
    576     output.write(sources_output)
    577   else:
    578     output.write(HEADER_TEMPLATE % metadata)
    579   output.close()
    580 
    581 
    582 def main():
    583   parser = optparse.OptionParser()
    584   parser.add_option("--raw",
    585                     help="file to write the processed sources array to.")
    586   parser.add_option("--startup_blob",
    587                     help="file to write the startup blob to.")
    588   parser.add_option("--js",
    589                     help="writes a JS file output instead of a C file",
    590                     action="store_true", default=False, dest='js')
    591   parser.add_option("--nojs", action="store_false", default=False, dest='js')
    592   parser.set_usage("""js2c out.cc type sources.js ...
    593         out.cc: C code to be generated.
    594         type: type parameter for NativesCollection template.
    595         sources.js: JS internal sources or macros.py.""")
    596   (options, args) = parser.parse_args()
    597   JS2C(args[2:],
    598        args[0],
    599        args[1],
    600        options.raw,
    601        options.startup_blob,
    602        options.js)
    603 
    604 
    605 if __name__ == "__main__":
    606   main()
    607