Home | History | Annotate | Download | only in tools
      1 #!/usr/bin/env python
      2 #
      3 # Copyright 2012 the V8 project authors. All rights reserved.
      4 # Redistribution and use in source and binary forms, with or without
      5 # modification, are permitted provided that the following conditions are
      6 # met:
      7 #
      8 #     * Redistributions of source code must retain the above copyright
      9 #       notice, this list of conditions and the following disclaimer.
     10 #     * Redistributions in binary form must reproduce the above
     11 #       copyright notice, this list of conditions and the following
     12 #       disclaimer in the documentation and/or other materials provided
     13 #       with the distribution.
     14 #     * Neither the name of Google Inc. nor the names of its
     15 #       contributors may be used to endorse or promote products derived
     16 #       from this software without specific prior written permission.
     17 #
     18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 
     30 # This is a utility for converting JavaScript source code into C-style
     31 # char arrays. It is used for embedded JavaScript code in the V8
     32 # library.
     33 
     34 import os, re, sys, string
     35 import optparse
     36 import jsmin
     37 import bz2
     38 import textwrap
     39 
     40 
     41 class Error(Exception):
     42   def __init__(self, msg):
     43     Exception.__init__(self, msg)
     44 
     45 
     46 def ToCArray(byte_sequence):
     47   result = []
     48   for chr in byte_sequence:
     49     result.append(str(ord(chr)))
     50   joined = ", ".join(result)
     51   return textwrap.fill(joined, 80)
     52 
     53 
     54 def RemoveCommentsAndTrailingWhitespace(lines):
     55   lines = re.sub(r'//.*\n', '\n', lines) # end-of-line comments
     56   lines = re.sub(re.compile(r'/\*.*?\*/', re.DOTALL), '', lines) # comments.
     57   lines = re.sub(r'\s+\n+', '\n', lines) # trailing whitespace
     58   return lines
     59 
     60 
     61 def ReadFile(filename):
     62   file = open(filename, "rt")
     63   try:
     64     lines = file.read()
     65   finally:
     66     file.close()
     67   return lines
     68 
     69 
     70 EVAL_PATTERN = re.compile(r'\beval\s*\(')
     71 WITH_PATTERN = re.compile(r'\bwith\s*\(')
     72 
     73 def Validate(lines):
     74   # Because of simplified context setup, eval and with is not
     75   # allowed in the natives files.
     76   if EVAL_PATTERN.search(lines):
     77     raise Error("Eval disallowed in natives.")
     78   if WITH_PATTERN.search(lines):
     79     raise Error("With statements disallowed in natives.")
     80 
     81   # Pass lines through unchanged.
     82   return lines
     83 
     84 
     85 def ExpandConstants(lines, constants):
     86   for key, value in constants:
     87     lines = key.sub(str(value), lines)
     88   return lines
     89 
     90 
     91 def ExpandMacroDefinition(lines, pos, name_pattern, macro, expander):
     92   pattern_match = name_pattern.search(lines, pos)
     93   while pattern_match is not None:
     94     # Scan over the arguments
     95     height = 1
     96     start = pattern_match.start()
     97     end = pattern_match.end()
     98     assert lines[end - 1] == '('
     99     last_match = end
    100     arg_index = [0]  # Wrap state into array, to work around Python "scoping"
    101     mapping = { }
    102     def add_arg(str):
    103       # Remember to expand recursively in the arguments
    104       replacement = expander(str.strip())
    105       mapping[macro.args[arg_index[0]]] = replacement
    106       arg_index[0] += 1
    107     while end < len(lines) and height > 0:
    108       # We don't count commas at higher nesting levels.
    109       if lines[end] == ',' and height == 1:
    110         add_arg(lines[last_match:end])
    111         last_match = end + 1
    112       elif lines[end] in ['(', '{', '[']:
    113         height = height + 1
    114       elif lines[end] in [')', '}', ']']:
    115         height = height - 1
    116       end = end + 1
    117     # Remember to add the last match.
    118     add_arg(lines[last_match:end-1])
    119     result = macro.expand(mapping)
    120     # Replace the occurrence of the macro with the expansion
    121     lines = lines[:start] + result + lines[end:]
    122     pattern_match = name_pattern.search(lines, start + len(result))
    123   return lines
    124 
    125 def ExpandMacros(lines, macros):
    126   # We allow macros to depend on the previously declared macros, but
    127   # we don't allow self-dependecies or recursion.
    128   for name_pattern, macro in reversed(macros):
    129     def expander(s):
    130       return ExpandMacros(s, macros)
    131     lines = ExpandMacroDefinition(lines, 0, name_pattern, macro, expander)
    132   return lines
    133 
    134 class TextMacro:
    135   def __init__(self, args, body):
    136     self.args = args
    137     self.body = body
    138   def expand(self, mapping):
    139     result = self.body
    140     for key, value in mapping.items():
    141         result = result.replace(key, value)
    142     return result
    143 
    144 class PythonMacro:
    145   def __init__(self, args, fun):
    146     self.args = args
    147     self.fun = fun
    148   def expand(self, mapping):
    149     args = []
    150     for arg in self.args:
    151       args.append(mapping[arg])
    152     return str(self.fun(*args))
    153 
    154 CONST_PATTERN = re.compile(r'^const\s+([a-zA-Z0-9_]+)\s*=\s*([^;]*);$')
    155 MACRO_PATTERN = re.compile(r'^macro\s+([a-zA-Z0-9_]+)\s*\(([^)]*)\)\s*=\s*([^;]*);$')
    156 PYTHON_MACRO_PATTERN = re.compile(r'^python\s+macro\s+([a-zA-Z0-9_]+)\s*\(([^)]*)\)\s*=\s*([^;]*);$')
    157 
    158 
    159 def ReadMacros(lines):
    160   constants = []
    161   macros = []
    162   for line in lines.split('\n'):
    163     hash = line.find('#')
    164     if hash != -1: line = line[:hash]
    165     line = line.strip()
    166     if len(line) is 0: continue
    167     const_match = CONST_PATTERN.match(line)
    168     if const_match:
    169       name = const_match.group(1)
    170       value = const_match.group(2).strip()
    171       constants.append((re.compile("\\b%s\\b" % name), value))
    172     else:
    173       macro_match = MACRO_PATTERN.match(line)
    174       if macro_match:
    175         name = macro_match.group(1)
    176         args = [match.strip() for match in macro_match.group(2).split(',')]
    177         body = macro_match.group(3).strip()
    178         macros.append((re.compile("\\b%s\\(" % name), TextMacro(args, body)))
    179       else:
    180         python_match = PYTHON_MACRO_PATTERN.match(line)
    181         if python_match:
    182           name = python_match.group(1)
    183           args = [match.strip() for match in python_match.group(2).split(',')]
    184           body = python_match.group(3).strip()
    185           fun = eval("lambda " + ",".join(args) + ': ' + body)
    186           macros.append((re.compile("\\b%s\\(" % name), PythonMacro(args, fun)))
    187         else:
    188           raise Error("Illegal line: " + line)
    189   return (constants, macros)
    190 
    191 INLINE_MACRO_PATTERN = re.compile(r'macro\s+([a-zA-Z0-9_]+)\s*\(([^)]*)\)\s*\n')
    192 INLINE_MACRO_END_PATTERN = re.compile(r'endmacro\s*\n')
    193 
    194 def ExpandInlineMacros(lines):
    195   pos = 0
    196   while True:
    197     macro_match = INLINE_MACRO_PATTERN.search(lines, pos)
    198     if macro_match is None:
    199       # no more macros
    200       return lines
    201     name = macro_match.group(1)
    202     args = [match.strip() for match in macro_match.group(2).split(',')]
    203     end_macro_match = INLINE_MACRO_END_PATTERN.search(lines, macro_match.end());
    204     if end_macro_match is None:
    205       raise Error("Macro %s unclosed" % name)
    206     body = lines[macro_match.end():end_macro_match.start()]
    207 
    208     # remove macro definition
    209     lines = lines[:macro_match.start()] + lines[end_macro_match.end():]
    210     name_pattern = re.compile("\\b%s\\(" % name)
    211     macro = TextMacro(args, body)
    212 
    213     # advance position to where the macro defintion was
    214     pos = macro_match.start()
    215 
    216     def non_expander(s):
    217       return s
    218     lines = ExpandMacroDefinition(lines, pos, name_pattern, macro, non_expander)
    219 
    220 
    221 INLINE_CONSTANT_PATTERN = re.compile(r'const\s+([a-zA-Z0-9_]+)\s*=\s*([^;\n]+)[;\n]')
    222 
    223 def ExpandInlineConstants(lines):
    224   pos = 0
    225   while True:
    226     const_match = INLINE_CONSTANT_PATTERN.search(lines, pos)
    227     if const_match is None:
    228       # no more constants
    229       return lines
    230     name = const_match.group(1)
    231     replacement = const_match.group(2)
    232     name_pattern = re.compile("\\b%s\\b" % name)
    233 
    234     # remove constant definition and replace
    235     lines = (lines[:const_match.start()] +
    236              re.sub(name_pattern, replacement, lines[const_match.end():]))
    237 
    238     # advance position to where the constant defintion was
    239     pos = const_match.start()
    240 
    241 
    242 HEADER_TEMPLATE = """\
    243 // Copyright 2011 Google Inc. All Rights Reserved.
    244 
    245 // This file was generated from .js source files by GYP.  If you
    246 // want to make changes to this file you should either change the
    247 // javascript source files or the GYP script.
    248 
    249 #include "src/v8.h"
    250 #include "src/natives.h"
    251 #include "src/utils.h"
    252 
    253 namespace v8 {
    254 namespace internal {
    255 
    256 %(sources_declaration)s\
    257 
    258 %(raw_sources_declaration)s\
    259 
    260   template <>
    261   int NativesCollection<%(type)s>::GetBuiltinsCount() {
    262     return %(builtin_count)i;
    263   }
    264 
    265   template <>
    266   int NativesCollection<%(type)s>::GetDebuggerCount() {
    267     return %(debugger_count)i;
    268   }
    269 
    270   template <>
    271   int NativesCollection<%(type)s>::GetIndex(const char* name) {
    272 %(get_index_cases)s\
    273     return -1;
    274   }
    275 
    276   template <>
    277   int NativesCollection<%(type)s>::GetRawScriptsSize() {
    278     return %(raw_total_length)i;
    279   }
    280 
    281   template <>
    282   Vector<const char> NativesCollection<%(type)s>::GetRawScriptSource(int index) {
    283 %(get_raw_script_source_cases)s\
    284     return Vector<const char>("", 0);
    285   }
    286 
    287   template <>
    288   Vector<const char> NativesCollection<%(type)s>::GetScriptName(int index) {
    289 %(get_script_name_cases)s\
    290     return Vector<const char>("", 0);
    291   }
    292 
    293   template <>
    294   Vector<const byte> NativesCollection<%(type)s>::GetScriptsSource() {
    295     return Vector<const byte>(sources, %(total_length)i);
    296   }
    297 
    298   template <>
    299   void NativesCollection<%(type)s>::SetRawScriptsSource(Vector<const char> raw_source) {
    300     DCHECK(%(raw_total_length)i == raw_source.length());
    301     raw_sources = raw_source.start();
    302   }
    303 
    304 }  // internal
    305 }  // v8
    306 """
    307 
    308 SOURCES_DECLARATION = """\
    309   static const byte sources[] = { %s };
    310 """
    311 
    312 
    313 RAW_SOURCES_COMPRESSION_DECLARATION = """\
    314   static const char* raw_sources = NULL;
    315 """
    316 
    317 
    318 RAW_SOURCES_DECLARATION = """\
    319   static const char* raw_sources = reinterpret_cast<const char*>(sources);
    320 """
    321 
    322 
    323 GET_INDEX_CASE = """\
    324     if (strcmp(name, "%(id)s") == 0) return %(i)i;
    325 """
    326 
    327 
    328 GET_RAW_SCRIPT_SOURCE_CASE = """\
    329     if (index == %(i)i) return Vector<const char>(raw_sources + %(offset)i, %(raw_length)i);
    330 """
    331 
    332 
    333 GET_SCRIPT_NAME_CASE = """\
    334     if (index == %(i)i) return Vector<const char>("%(name)s", %(length)i);
    335 """
    336 
    337 
    338 def BuildFilterChain(macro_filename):
    339   """Build the chain of filter functions to be applied to the sources.
    340 
    341   Args:
    342     macro_filename: Name of the macro file, if any.
    343 
    344   Returns:
    345     A function (string -> string) that reads a source file and processes it.
    346   """
    347   filter_chain = [ReadFile]
    348 
    349   if macro_filename:
    350     (consts, macros) = ReadMacros(ReadFile(macro_filename))
    351     filter_chain.append(lambda l: ExpandConstants(l, consts))
    352     filter_chain.append(lambda l: ExpandMacros(l, macros))
    353 
    354   filter_chain.extend([
    355     RemoveCommentsAndTrailingWhitespace,
    356     ExpandInlineMacros,
    357     ExpandInlineConstants,
    358     Validate,
    359     jsmin.JavaScriptMinifier().JSMinify
    360   ])
    361 
    362   def chain(f1, f2):
    363     return lambda x: f2(f1(x))
    364 
    365   return reduce(chain, filter_chain)
    366 
    367 
    368 class Sources:
    369   def __init__(self):
    370     self.names = []
    371     self.modules = []
    372     self.is_debugger_id = []
    373 
    374 
    375 def IsDebuggerFile(filename):
    376   return filename.endswith("-debugger.js")
    377 
    378 def IsMacroFile(filename):
    379   return filename.endswith("macros.py")
    380 
    381 
    382 def PrepareSources(source_files):
    383   """Read, prepare and assemble the list of source files.
    384 
    385   Args:
    386     sources: List of Javascript-ish source files. A file named macros.py
    387         will be treated as a list of macros.
    388 
    389   Returns:
    390     An instance of Sources.
    391   """
    392   macro_file = None
    393   macro_files = filter(IsMacroFile, source_files)
    394   assert len(macro_files) in [0, 1]
    395   if macro_files:
    396     source_files.remove(macro_files[0])
    397     macro_file = macro_files[0]
    398 
    399   filters = BuildFilterChain(macro_file)
    400 
    401   # Sort 'debugger' sources first.
    402   source_files = sorted(source_files,
    403                         lambda l,r: IsDebuggerFile(r) - IsDebuggerFile(l))
    404 
    405   result = Sources()
    406   for source in source_files:
    407     try:
    408       lines = filters(source)
    409     except Error as e:
    410       raise Error("In file %s:\n%s" % (source, str(e)))
    411 
    412     result.modules.append(lines);
    413 
    414     is_debugger = IsDebuggerFile(source)
    415     result.is_debugger_id.append(is_debugger);
    416 
    417     name = os.path.basename(source)[:-3]
    418     result.names.append(name if not is_debugger else name[:-9]);
    419   return result
    420 
    421 
    422 def BuildMetadata(sources, source_bytes, native_type):
    423   """Build the meta data required to generate a libaries file.
    424 
    425   Args:
    426     sources: A Sources instance with the prepared sources.
    427     source_bytes: A list of source bytes.
    428         (The concatenation of all sources; might be compressed.)
    429     native_type: The parameter for the NativesCollection template.
    430 
    431   Returns:
    432     A dictionary for use with HEADER_TEMPLATE.
    433   """
    434   total_length = len(source_bytes)
    435   raw_sources = "".join(sources.modules)
    436 
    437   # The sources are expected to be ASCII-only.
    438   assert not filter(lambda value: ord(value) >= 128, raw_sources)
    439 
    440   # Loop over modules and build up indices into the source blob:
    441   get_index_cases = []
    442   get_script_name_cases = []
    443   get_raw_script_source_cases = []
    444   offset = 0
    445   for i in xrange(len(sources.modules)):
    446     native_name = "native %s.js" % sources.names[i]
    447     d = {
    448         "i": i,
    449         "id": sources.names[i],
    450         "name": native_name,
    451         "length": len(native_name),
    452         "offset": offset,
    453         "raw_length": len(sources.modules[i]),
    454     }
    455     get_index_cases.append(GET_INDEX_CASE % d)
    456     get_script_name_cases.append(GET_SCRIPT_NAME_CASE % d)
    457     get_raw_script_source_cases.append(GET_RAW_SCRIPT_SOURCE_CASE % d)
    458     offset += len(sources.modules[i])
    459   assert offset == len(raw_sources)
    460 
    461   # If we have the raw sources we can declare them accordingly.
    462   have_raw_sources = source_bytes == raw_sources
    463   raw_sources_declaration = (RAW_SOURCES_DECLARATION
    464       if have_raw_sources else RAW_SOURCES_COMPRESSION_DECLARATION)
    465 
    466   metadata = {
    467     "builtin_count": len(sources.modules),
    468     "debugger_count": sum(sources.is_debugger_id),
    469     "sources_declaration": SOURCES_DECLARATION % ToCArray(source_bytes),
    470     "raw_sources_declaration": raw_sources_declaration,
    471     "raw_total_length": sum(map(len, sources.modules)),
    472     "total_length": total_length,
    473     "get_index_cases": "".join(get_index_cases),
    474     "get_raw_script_source_cases": "".join(get_raw_script_source_cases),
    475     "get_script_name_cases": "".join(get_script_name_cases),
    476     "type": native_type,
    477   }
    478   return metadata
    479 
    480 
    481 def CompressMaybe(sources, compression_type):
    482   """Take the prepared sources and generate a sequence of bytes.
    483 
    484   Args:
    485     sources: A Sources instance with the prepared sourced.
    486     compression_type: string, describing the desired compression.
    487 
    488   Returns:
    489     A sequence of bytes.
    490   """
    491   sources_bytes = "".join(sources.modules)
    492   if compression_type == "off":
    493     return sources_bytes
    494   elif compression_type == "bz2":
    495     return bz2.compress(sources_bytes)
    496   else:
    497     raise Error("Unknown compression type %s." % compression_type)
    498 
    499 
    500 def PutInt(blob_file, value):
    501   assert(value >= 0 and value < (1 << 20))
    502   size = 1 if (value < 1 << 6) else (2 if (value < 1 << 14) else 3)
    503   value_with_length = (value << 2) | size
    504 
    505   byte_sequence = bytearray()
    506   for i in xrange(size):
    507     byte_sequence.append(value_with_length & 255)
    508     value_with_length >>= 8;
    509   blob_file.write(byte_sequence)
    510 
    511 
    512 def PutStr(blob_file, value):
    513   PutInt(blob_file, len(value));
    514   blob_file.write(value);
    515 
    516 
    517 def WriteStartupBlob(sources, startup_blob):
    518   """Write a startup blob, as expected by V8 Initialize ...
    519     TODO(vogelheim): Add proper method name.
    520 
    521   Args:
    522     sources: A Sources instance with the prepared sources.
    523     startup_blob_file: Name of file to write the blob to.
    524   """
    525   output = open(startup_blob, "wb")
    526 
    527   debug_sources = sum(sources.is_debugger_id);
    528   PutInt(output, debug_sources)
    529   for i in xrange(debug_sources):
    530     PutStr(output, sources.names[i]);
    531     PutStr(output, sources.modules[i]);
    532 
    533   PutInt(output, len(sources.names) - debug_sources)
    534   for i in xrange(debug_sources, len(sources.names)):
    535     PutStr(output, sources.names[i]);
    536     PutStr(output, sources.modules[i]);
    537 
    538   output.close()
    539 
    540 
    541 def JS2C(source, target, native_type, compression_type, raw_file, startup_blob):
    542   sources = PrepareSources(source)
    543   sources_bytes = CompressMaybe(sources, compression_type)
    544   metadata = BuildMetadata(sources, sources_bytes, native_type)
    545 
    546   # Optionally emit raw file.
    547   if raw_file:
    548     output = open(raw_file, "w")
    549     output.write(sources_bytes)
    550     output.close()
    551 
    552   if startup_blob:
    553     WriteStartupBlob(sources, startup_blob);
    554 
    555   # Emit resulting source file.
    556   output = open(target, "w")
    557   output.write(HEADER_TEMPLATE % metadata)
    558   output.close()
    559 
    560 
    561 def main():
    562   parser = optparse.OptionParser()
    563   parser.add_option("--raw", action="store",
    564                     help="file to write the processed sources array to.")
    565   parser.add_option("--startup_blob", action="store",
    566                     help="file to write the startup blob to.")
    567   parser.set_usage("""js2c out.cc type compression sources.js ...
    568       out.cc: C code to be generated.
    569       type: type parameter for NativesCollection template.
    570       compression: type of compression used. [off|bz2]
    571       sources.js: JS internal sources or macros.py.""")
    572   (options, args) = parser.parse_args()
    573 
    574   JS2C(args[3:], args[0], args[1], args[2], options.raw, options.startup_blob)
    575 
    576 
    577 if __name__ == "__main__":
    578   main()
    579