Home | History | Annotate | Download | only in tools
      1 #!/usr/bin/env python
      2 
      3 #
      4 # Copyright 2012 the V8 project authors. All rights reserved.
      5 # Redistribution and use in source and binary forms, with or without
      6 # modification, are permitted provided that the following conditions are
      7 # met:
      8 #
      9 #     * Redistributions of source code must retain the above copyright
     10 #       notice, this list of conditions and the following disclaimer.
     11 #     * Redistributions in binary form must reproduce the above
     12 #       copyright notice, this list of conditions and the following
     13 #       disclaimer in the documentation and/or other materials provided
     14 #       with the distribution.
     15 #     * Neither the name of Google Inc. nor the names of its
     16 #       contributors may be used to endorse or promote products derived
     17 #       from this software without specific prior written permission.
     18 #
     19 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     20 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     21 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     22 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     23 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     24 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     25 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     26 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     27 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     28 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     29 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     30 #
     31 
     32 #
     33 # Emits a C++ file to be compiled and linked into libv8 to support postmortem
     34 # debugging tools.  Most importantly, this tool emits constants describing V8
     35 # internals:
     36 #
     37 #    v8dbg_type_CLASS__TYPE = VALUE             Describes class type values
     38 #    v8dbg_class_CLASS__FIELD__TYPE = OFFSET    Describes class fields
     39 #    v8dbg_parent_CLASS__PARENT                 Describes class hierarchy
     40 #    v8dbg_frametype_NAME = VALUE               Describes stack frame values
     41 #    v8dbg_off_fp_NAME = OFFSET                 Frame pointer offsets
     42 #    v8dbg_prop_NAME = OFFSET                   Object property offsets
     43 #    v8dbg_NAME = VALUE                         Miscellaneous values
     44 #
     45 # These constants are declared as global integers so that they'll be present in
     46 # the generated libv8 binary.
     47 #
     48 
     49 import re
     50 import sys
     51 
     52 #
     53 # Miscellaneous constants, tags, and masks used for object identification.
     54 #
     55 consts_misc = [
     56     { 'name': 'FirstNonstringType',     'value': 'FIRST_NONSTRING_TYPE' },
     57 
     58     { 'name': 'IsNotStringMask',        'value': 'kIsNotStringMask' },
     59     { 'name': 'StringTag',              'value': 'kStringTag' },
     60     { 'name': 'NotStringTag',           'value': 'kNotStringTag' },
     61 
     62     { 'name': 'StringEncodingMask',     'value': 'kStringEncodingMask' },
     63     { 'name': 'TwoByteStringTag',       'value': 'kTwoByteStringTag' },
     64     { 'name': 'AsciiStringTag',         'value': 'kOneByteStringTag' },
     65 
     66     { 'name': 'StringRepresentationMask',
     67         'value': 'kStringRepresentationMask' },
     68     { 'name': 'SeqStringTag',           'value': 'kSeqStringTag' },
     69     { 'name': 'ConsStringTag',          'value': 'kConsStringTag' },
     70     { 'name': 'ExternalStringTag',      'value': 'kExternalStringTag' },
     71     { 'name': 'SlicedStringTag',        'value': 'kSlicedStringTag' },
     72 
     73     { 'name': 'FailureTag',             'value': 'kFailureTag' },
     74     { 'name': 'FailureTagMask',         'value': 'kFailureTagMask' },
     75     { 'name': 'HeapObjectTag',          'value': 'kHeapObjectTag' },
     76     { 'name': 'HeapObjectTagMask',      'value': 'kHeapObjectTagMask' },
     77     { 'name': 'SmiTag',                 'value': 'kSmiTag' },
     78     { 'name': 'SmiTagMask',             'value': 'kSmiTagMask' },
     79     { 'name': 'SmiValueShift',          'value': 'kSmiTagSize' },
     80     { 'name': 'SmiShiftSize',           'value': 'kSmiShiftSize' },
     81     { 'name': 'PointerSizeLog2',        'value': 'kPointerSizeLog2' },
     82 
     83     { 'name': 'prop_idx_first',
     84         'value': 'DescriptorArray::kFirstIndex' },
     85     { 'name': 'prop_type_field',
     86         'value': 'FIELD' },
     87     { 'name': 'prop_type_first_phantom',
     88         'value': 'TRANSITION' },
     89     { 'name': 'prop_type_mask',
     90         'value': 'PropertyDetails::TypeField::kMask' },
     91 
     92     { 'name': 'prop_desc_key',
     93         'value': 'DescriptorArray::kDescriptorKey' },
     94     { 'name': 'prop_desc_details',
     95         'value': 'DescriptorArray::kDescriptorDetails' },
     96     { 'name': 'prop_desc_value',
     97         'value': 'DescriptorArray::kDescriptorValue' },
     98     { 'name': 'prop_desc_size',
     99         'value': 'DescriptorArray::kDescriptorSize' },
    100 
    101     { 'name': 'off_fp_context',
    102         'value': 'StandardFrameConstants::kContextOffset' },
    103     { 'name': 'off_fp_constant_pool',
    104         'value': 'StandardFrameConstants::kConstantPoolOffset' },
    105     { 'name': 'off_fp_marker',
    106         'value': 'StandardFrameConstants::kMarkerOffset' },
    107     { 'name': 'off_fp_function',
    108         'value': 'JavaScriptFrameConstants::kFunctionOffset' },
    109     { 'name': 'off_fp_args',
    110         'value': 'JavaScriptFrameConstants::kLastParameterOffset' },
    111 ];
    112 
    113 #
    114 # The following useful fields are missing accessors, so we define fake ones.
    115 #
    116 extras_accessors = [
    117     'HeapObject, map, Map, kMapOffset',
    118     'JSObject, elements, Object, kElementsOffset',
    119     'FixedArray, data, uintptr_t, kHeaderSize',
    120     'Map, instance_attributes, int, kInstanceAttributesOffset',
    121     'Map, inobject_properties, int, kInObjectPropertiesOffset',
    122     'Map, instance_size, int, kInstanceSizeOffset',
    123     'HeapNumber, value, double, kValueOffset',
    124     'ConsString, first, String, kFirstOffset',
    125     'ConsString, second, String, kSecondOffset',
    126     'ExternalString, resource, Object, kResourceOffset',
    127     'SeqOneByteString, chars, char, kHeaderSize',
    128     'SeqTwoByteString, chars, char, kHeaderSize',
    129     'SharedFunctionInfo, code, Code, kCodeOffset',
    130     'SlicedString, parent, String, kParentOffset',
    131     'Code, instruction_start, uintptr_t, kHeaderSize',
    132     'Code, instruction_size, int, kInstructionSizeOffset',
    133 ];
    134 
    135 #
    136 # The following is a whitelist of classes we expect to find when scanning the
    137 # source code. This list is not exhaustive, but it's still useful to identify
    138 # when this script gets out of sync with the source. See load_objects().
    139 #
    140 expected_classes = [
    141     'ConsString', 'FixedArray', 'HeapNumber', 'JSArray', 'JSFunction',
    142     'JSObject', 'JSRegExp', 'JSValue', 'Map', 'Oddball', 'Script',
    143     'SeqOneByteString', 'SharedFunctionInfo'
    144 ];
    145 
    146 
    147 #
    148 # The following structures store high-level representations of the structures
    149 # for which we're going to emit descriptive constants.
    150 #
    151 types = {};             # set of all type names
    152 typeclasses = {};       # maps type names to corresponding class names
    153 klasses = {};           # known classes, including parents
    154 fields = [];            # field declarations
    155 
    156 header = '''
    157 /*
    158  * This file is generated by %s.  Do not edit directly.
    159  */
    160 
    161 #include "v8.h"
    162 #include "frames.h"
    163 #include "frames-inl.h" /* for architecture-specific frame constants */
    164 
    165 using namespace v8::internal;
    166 
    167 extern "C" {
    168 
    169 /* stack frame constants */
    170 #define FRAME_CONST(value, klass)       \
    171     int v8dbg_frametype_##klass = StackFrame::value;
    172 
    173 STACK_FRAME_TYPE_LIST(FRAME_CONST)
    174 
    175 #undef FRAME_CONST
    176 
    177 ''' % sys.argv[0];
    178 
    179 footer = '''
    180 }
    181 '''
    182 
    183 #
    184 # Loads class hierarchy and type information from "objects.h".
    185 #
    186 def load_objects():
    187         objfilename = sys.argv[2];
    188         objfile = open(objfilename, 'r');
    189         in_insttype = False;
    190 
    191         typestr = '';
    192 
    193         #
    194         # Construct a dictionary for the classes we're sure should be present.
    195         #
    196         checktypes = {};
    197         for klass in expected_classes:
    198                 checktypes[klass] = True;
    199 
    200         #
    201         # Iterate objects.h line-by-line to collect type and class information.
    202         # For types, we accumulate a string representing the entire InstanceType
    203         # enum definition and parse it later because it's easier to do so
    204         # without the embedded newlines.
    205         #
    206         for line in objfile:
    207                 if (line.startswith('enum InstanceType {')):
    208                         in_insttype = True;
    209                         continue;
    210 
    211                 if (in_insttype and line.startswith('};')):
    212                         in_insttype = False;
    213                         continue;
    214 
    215                 line = re.sub('//.*', '', line.rstrip().lstrip());
    216 
    217                 if (in_insttype):
    218                         typestr += line;
    219                         continue;
    220 
    221                 match = re.match('class (\w[^\s:]*)(: public (\w[^\s{]*))?\s*{',
    222                     line);
    223 
    224                 if (match):
    225                         klass = match.group(1);
    226                         pklass = match.group(3);
    227                         klasses[klass] = { 'parent': pklass };
    228 
    229         #
    230         # Process the instance type declaration.
    231         #
    232         entries = typestr.split(',');
    233         for entry in entries:
    234                 types[re.sub('\s*=.*', '', entry).lstrip()] = True;
    235 
    236         #
    237         # Infer class names for each type based on a systematic transformation.
    238         # For example, "JS_FUNCTION_TYPE" becomes "JSFunction".  We find the
    239         # class for each type rather than the other way around because there are
    240         # fewer cases where one type maps to more than one class than the other
    241         # way around.
    242         #
    243         for type in types:
    244                 #
    245                 # Symbols and Strings are implemented using the same classes.
    246                 #
    247                 usetype = re.sub('SYMBOL_', 'STRING_', type);
    248 
    249                 #
    250                 # REGEXP behaves like REG_EXP, as in JS_REGEXP_TYPE => JSRegExp.
    251                 #
    252                 usetype = re.sub('_REGEXP_', '_REG_EXP_', usetype);
    253 
    254                 #
    255                 # Remove the "_TYPE" suffix and then convert to camel case,
    256                 # except that a "JS" prefix remains uppercase (as in
    257                 # "JS_FUNCTION_TYPE" => "JSFunction").
    258                 #
    259                 if (not usetype.endswith('_TYPE')):
    260                         continue;
    261 
    262                 usetype = usetype[0:len(usetype) - len('_TYPE')];
    263                 parts = usetype.split('_');
    264                 cctype = '';
    265 
    266                 if (parts[0] == 'JS'):
    267                         cctype = 'JS';
    268                         start = 1;
    269                 else:
    270                         cctype = '';
    271                         start = 0;
    272 
    273                 for ii in range(start, len(parts)):
    274                         part = parts[ii];
    275                         cctype += part[0].upper() + part[1:].lower();
    276 
    277                 #
    278                 # Mapping string types is more complicated.  Both types and
    279                 # class names for Strings specify a representation (e.g., Seq,
    280                 # Cons, External, or Sliced) and an encoding (TwoByte or Ascii),
    281                 # In the simplest case, both of these are explicit in both
    282                 # names, as in:
    283                 #
    284                 #       EXTERNAL_ASCII_STRING_TYPE => ExternalAsciiString
    285                 #
    286                 # However, either the representation or encoding can be omitted
    287                 # from the type name, in which case "Seq" and "TwoByte" are
    288                 # assumed, as in:
    289                 #
    290                 #       STRING_TYPE => SeqTwoByteString
    291                 #
    292                 # Additionally, sometimes the type name has more information
    293                 # than the class, as in:
    294                 #
    295                 #       CONS_ASCII_STRING_TYPE => ConsString
    296                 #
    297                 # To figure this out dynamically, we first check for a
    298                 # representation and encoding and add them if they're not
    299                 # present.  If that doesn't yield a valid class name, then we
    300                 # strip out the representation.
    301                 #
    302                 if (cctype.endswith('String')):
    303                         if (cctype.find('Cons') == -1 and
    304                             cctype.find('External') == -1 and
    305                             cctype.find('Sliced') == -1):
    306                                 if (cctype.find('Ascii') != -1):
    307                                         cctype = re.sub('AsciiString$',
    308                                             'SeqOneByteString', cctype);
    309                                 else:
    310                                         cctype = re.sub('String$',
    311                                             'SeqString', cctype);
    312 
    313                         if (cctype.find('Ascii') == -1):
    314                                 cctype = re.sub('String$', 'TwoByteString',
    315                                     cctype);
    316 
    317                         if (not (cctype in klasses)):
    318                                 cctype = re.sub('Ascii', '', cctype);
    319                                 cctype = re.sub('TwoByte', '', cctype);
    320 
    321                 #
    322                 # Despite all that, some types have no corresponding class.
    323                 #
    324                 if (cctype in klasses):
    325                         typeclasses[type] = cctype;
    326                         if (cctype in checktypes):
    327                                 del checktypes[cctype];
    328 
    329         if (len(checktypes) > 0):
    330                 for klass in checktypes:
    331                         print('error: expected class \"%s\" not found' % klass);
    332 
    333                 sys.exit(1);
    334 
    335 
    336 #
    337 # For a given macro call, pick apart the arguments and return an object
    338 # describing the corresponding output constant.  See load_fields().
    339 #
    340 def parse_field(call):
    341         # Replace newlines with spaces.
    342         for ii in range(0, len(call)):
    343                 if (call[ii] == '\n'):
    344                         call[ii] == ' ';
    345 
    346         idx = call.find('(');
    347         kind = call[0:idx];
    348         rest = call[idx + 1: len(call) - 1];
    349         args = re.split('\s*,\s*', rest);
    350 
    351         consts = [];
    352 
    353         if (kind == 'ACCESSORS' or kind == 'ACCESSORS_GCSAFE'):
    354                 klass = args[0];
    355                 field = args[1];
    356                 dtype = args[2];
    357                 offset = args[3];
    358 
    359                 return ({
    360                     'name': 'class_%s__%s__%s' % (klass, field, dtype),
    361                     'value': '%s::%s' % (klass, offset)
    362                 });
    363 
    364         assert(kind == 'SMI_ACCESSORS');
    365         klass = args[0];
    366         field = args[1];
    367         offset = args[2];
    368 
    369         return ({
    370             'name': 'class_%s__%s__%s' % (klass, field, 'SMI'),
    371             'value': '%s::%s' % (klass, offset)
    372         });
    373 
    374 #
    375 # Load field offset information from objects-inl.h.
    376 #
    377 def load_fields():
    378         inlfilename = sys.argv[3];
    379         inlfile = open(inlfilename, 'r');
    380 
    381         #
    382         # Each class's fields and the corresponding offsets are described in the
    383         # source by calls to macros like "ACCESSORS" (and friends).  All we do
    384         # here is extract these macro invocations, taking into account that they
    385         # may span multiple lines and may contain nested parentheses.  We also
    386         # call parse_field() to pick apart the invocation.
    387         #
    388         prefixes = [ 'ACCESSORS', 'ACCESSORS_GCSAFE', 'SMI_ACCESSORS' ];
    389         current = '';
    390         opens = 0;
    391 
    392         for line in inlfile:
    393                 if (opens > 0):
    394                         # Continuation line
    395                         for ii in range(0, len(line)):
    396                                 if (line[ii] == '('):
    397                                         opens += 1;
    398                                 elif (line[ii] == ')'):
    399                                         opens -= 1;
    400 
    401                                 if (opens == 0):
    402                                         break;
    403 
    404                         current += line[0:ii + 1];
    405                         continue;
    406 
    407                 for prefix in prefixes:
    408                         if (not line.startswith(prefix + '(')):
    409                                 continue;
    410 
    411                         if (len(current) > 0):
    412                                 fields.append(parse_field(current));
    413                                 current = '';
    414 
    415                         for ii in range(len(prefix), len(line)):
    416                                 if (line[ii] == '('):
    417                                         opens += 1;
    418                                 elif (line[ii] == ')'):
    419                                         opens -= 1;
    420 
    421                                 if (opens == 0):
    422                                         break;
    423 
    424                         current += line[0:ii + 1];
    425 
    426         if (len(current) > 0):
    427                 fields.append(parse_field(current));
    428                 current = '';
    429 
    430         for body in extras_accessors:
    431                 fields.append(parse_field('ACCESSORS(%s)' % body));
    432 
    433 #
    434 # Emit a block of constants.
    435 #
    436 def emit_set(out, consts):
    437         # Fix up overzealous parses.  This could be done inside the
    438         # parsers but as there are several, it's easiest to do it here.
    439         ws = re.compile('\s+')
    440         for const in consts:
    441                 name = ws.sub('', const['name'])
    442                 value = ws.sub('', str(const['value']))  # Can be a number.
    443                 out.write('int v8dbg_%s = %s;\n' % (name, value))
    444         out.write('\n');
    445 
    446 #
    447 # Emit the whole output file.
    448 #
    449 def emit_config():
    450         out = file(sys.argv[1], 'w');
    451 
    452         out.write(header);
    453 
    454         out.write('/* miscellaneous constants */\n');
    455         emit_set(out, consts_misc);
    456 
    457         out.write('/* class type information */\n');
    458         consts = [];
    459         keys = typeclasses.keys();
    460         keys.sort();
    461         for typename in keys:
    462                 klass = typeclasses[typename];
    463                 consts.append({
    464                     'name': 'type_%s__%s' % (klass, typename),
    465                     'value': typename
    466                 });
    467 
    468         emit_set(out, consts);
    469 
    470         out.write('/* class hierarchy information */\n');
    471         consts = [];
    472         keys = klasses.keys();
    473         keys.sort();
    474         for klassname in keys:
    475                 pklass = klasses[klassname]['parent'];
    476                 if (pklass == None):
    477                         continue;
    478 
    479                 consts.append({
    480                     'name': 'parent_%s__%s' % (klassname, pklass),
    481                     'value': 0
    482                 });
    483 
    484         emit_set(out, consts);
    485 
    486         out.write('/* field information */\n');
    487         emit_set(out, fields);
    488 
    489         out.write(footer);
    490 
    491 if (len(sys.argv) < 4):
    492         print('usage: %s output.cc objects.h objects-inl.h' % sys.argv[0]);
    493         sys.exit(2);
    494 
    495 load_objects();
    496 load_fields();
    497 emit_config();
    498