Home | History | Annotate | Download | only in tools
      1 #!/usr/bin/env python
      2 
      3 #
      4 # Copyright 2012 the V8 project authors. All rights reserved.
      5 # Redistribution and use in source and binary forms, with or without
      6 # modification, are permitted provided that the following conditions are
      7 # met:
      8 #
      9 #     * Redistributions of source code must retain the above copyright
     10 #       notice, this list of conditions and the following disclaimer.
     11 #     * Redistributions in binary form must reproduce the above
     12 #       copyright notice, this list of conditions and the following
     13 #       disclaimer in the documentation and/or other materials provided
     14 #       with the distribution.
     15 #     * Neither the name of Google Inc. nor the names of its
     16 #       contributors may be used to endorse or promote products derived
     17 #       from this software without specific prior written permission.
     18 #
     19 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     20 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     21 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     22 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     23 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     24 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     25 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     26 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     27 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     28 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     29 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     30 #
     31 
     32 #
     33 # Emits a C++ file to be compiled and linked into libv8 to support postmortem
     34 # debugging tools.  Most importantly, this tool emits constants describing V8
     35 # internals:
     36 #
     37 #    v8dbg_type_CLASS__TYPE = VALUE             Describes class type values
     38 #    v8dbg_class_CLASS__FIELD__TYPE = OFFSET    Describes class fields
     39 #    v8dbg_parent_CLASS__PARENT                 Describes class hierarchy
     40 #    v8dbg_frametype_NAME = VALUE               Describes stack frame values
     41 #    v8dbg_off_fp_NAME = OFFSET                 Frame pointer offsets
     42 #    v8dbg_prop_NAME = OFFSET                   Object property offsets
     43 #    v8dbg_NAME = VALUE                         Miscellaneous values
     44 #
     45 # These constants are declared as global integers so that they'll be present in
     46 # the generated libv8 binary.
     47 #
     48 
     49 import re
     50 import sys
     51 
     52 #
     53 # Miscellaneous constants, tags, and masks used for object identification.
     54 #
     55 consts_misc = [
     56     { 'name': 'FirstNonstringType',     'value': 'FIRST_NONSTRING_TYPE' },
     57 
     58     { 'name': 'IsNotStringMask',        'value': 'kIsNotStringMask' },
     59     { 'name': 'StringTag',              'value': 'kStringTag' },
     60     { 'name': 'NotStringTag',           'value': 'kNotStringTag' },
     61 
     62     { 'name': 'StringEncodingMask',     'value': 'kStringEncodingMask' },
     63     { 'name': 'TwoByteStringTag',       'value': 'kTwoByteStringTag' },
     64     { 'name': 'OneByteStringTag',       'value': 'kOneByteStringTag' },
     65 
     66     { 'name': 'StringRepresentationMask',
     67         'value': 'kStringRepresentationMask' },
     68     { 'name': 'SeqStringTag',           'value': 'kSeqStringTag' },
     69     { 'name': 'ConsStringTag',          'value': 'kConsStringTag' },
     70     { 'name': 'ExternalStringTag',      'value': 'kExternalStringTag' },
     71     { 'name': 'SlicedStringTag',        'value': 'kSlicedStringTag' },
     72 
     73     { 'name': 'FailureTag',             'value': 'kFailureTag' },
     74     { 'name': 'FailureTagMask',         'value': 'kFailureTagMask' },
     75     { 'name': 'HeapObjectTag',          'value': 'kHeapObjectTag' },
     76     { 'name': 'HeapObjectTagMask',      'value': 'kHeapObjectTagMask' },
     77     { 'name': 'SmiTag',                 'value': 'kSmiTag' },
     78     { 'name': 'SmiTagMask',             'value': 'kSmiTagMask' },
     79     { 'name': 'SmiValueShift',          'value': 'kSmiTagSize' },
     80     { 'name': 'SmiShiftSize',           'value': 'kSmiShiftSize' },
     81     { 'name': 'PointerSizeLog2',        'value': 'kPointerSizeLog2' },
     82 
     83     { 'name': 'OddballFalse',           'value': 'Oddball::kFalse' },
     84     { 'name': 'OddballTrue',            'value': 'Oddball::kTrue' },
     85     { 'name': 'OddballTheHole',         'value': 'Oddball::kTheHole' },
     86     { 'name': 'OddballNull',            'value': 'Oddball::kNull' },
     87     { 'name': 'OddballArgumentMarker',  'value': 'Oddball::kArgumentMarker' },
     88     { 'name': 'OddballUndefined',       'value': 'Oddball::kUndefined' },
     89     { 'name': 'OddballUninitialized',   'value': 'Oddball::kUninitialized' },
     90     { 'name': 'OddballOther',           'value': 'Oddball::kOther' },
     91     { 'name': 'OddballException',       'value': 'Oddball::kException' },
     92 
     93     { 'name': 'prop_idx_first',
     94         'value': 'DescriptorArray::kFirstIndex' },
     95     { 'name': 'prop_type_field',
     96         'value': 'FIELD' },
     97     { 'name': 'prop_type_first_phantom',
     98         'value': 'TRANSITION' },
     99     { 'name': 'prop_type_mask',
    100         'value': 'PropertyDetails::TypeField::kMask' },
    101     { 'name': 'prop_index_mask',
    102         'value': 'PropertyDetails::FieldIndexField::kMask' },
    103     { 'name': 'prop_index_shift',
    104         'value': 'PropertyDetails::FieldIndexField::kShift' },
    105 
    106     { 'name': 'prop_desc_key',
    107         'value': 'DescriptorArray::kDescriptorKey' },
    108     { 'name': 'prop_desc_details',
    109         'value': 'DescriptorArray::kDescriptorDetails' },
    110     { 'name': 'prop_desc_value',
    111         'value': 'DescriptorArray::kDescriptorValue' },
    112     { 'name': 'prop_desc_size',
    113         'value': 'DescriptorArray::kDescriptorSize' },
    114 
    115     { 'name': 'elements_fast_holey_elements',
    116         'value': 'FAST_HOLEY_ELEMENTS' },
    117     { 'name': 'elements_fast_elements',
    118         'value': 'FAST_ELEMENTS' },
    119     { 'name': 'elements_dictionary_elements',
    120         'value': 'DICTIONARY_ELEMENTS' },
    121 
    122     { 'name': 'bit_field2_elements_kind_mask',
    123        'value': 'Map::kElementsKindMask' },
    124     { 'name': 'bit_field2_elements_kind_shift',
    125        'value': 'Map::kElementsKindShift' },
    126     { 'name': 'bit_field3_dictionary_map_shift',
    127         'value': 'Map::DictionaryMap::kShift' },
    128 
    129     { 'name': 'off_fp_context',
    130         'value': 'StandardFrameConstants::kContextOffset' },
    131     { 'name': 'off_fp_constant_pool',
    132         'value': 'StandardFrameConstants::kConstantPoolOffset' },
    133     { 'name': 'off_fp_marker',
    134         'value': 'StandardFrameConstants::kMarkerOffset' },
    135     { 'name': 'off_fp_function',
    136         'value': 'JavaScriptFrameConstants::kFunctionOffset' },
    137     { 'name': 'off_fp_args',
    138         'value': 'JavaScriptFrameConstants::kLastParameterOffset' },
    139 ];
    140 
    141 #
    142 # The following useful fields are missing accessors, so we define fake ones.
    143 #
    144 extras_accessors = [
    145     'HeapObject, map, Map, kMapOffset',
    146     'JSObject, elements, Object, kElementsOffset',
    147     'FixedArray, data, uintptr_t, kHeaderSize',
    148     'Map, instance_attributes, int, kInstanceAttributesOffset',
    149     'Map, inobject_properties, int, kInObjectPropertiesOffset',
    150     'Map, instance_size, int, kInstanceSizeOffset',
    151     'Map, bit_field, char, kBitFieldOffset',
    152     'Map, bit_field2, char, kBitField2Offset',
    153     'Map, bit_field3, SMI, kBitField3Offset',
    154     'Map, prototype, Object, kPrototypeOffset',
    155     'NameDictionaryShape, prefix_size, int, kPrefixSize',
    156     'NameDictionaryShape, entry_size, int, kEntrySize',
    157     'SeededNumberDictionaryShape, prefix_size, int, kPrefixSize',
    158     'UnseededNumberDictionaryShape, prefix_size, int, kPrefixSize',
    159     'NumberDictionaryShape, entry_size, int, kEntrySize',
    160     'Oddball, kind_offset, int, kKindOffset',
    161     'HeapNumber, value, double, kValueOffset',
    162     'ConsString, first, String, kFirstOffset',
    163     'ConsString, second, String, kSecondOffset',
    164     'ExternalString, resource, Object, kResourceOffset',
    165     'SeqOneByteString, chars, char, kHeaderSize',
    166     'SeqTwoByteString, chars, char, kHeaderSize',
    167     'SharedFunctionInfo, code, Code, kCodeOffset',
    168     'SlicedString, parent, String, kParentOffset',
    169     'Code, instruction_start, uintptr_t, kHeaderSize',
    170     'Code, instruction_size, int, kInstructionSizeOffset',
    171 ];
    172 
    173 #
    174 # The following is a whitelist of classes we expect to find when scanning the
    175 # source code. This list is not exhaustive, but it's still useful to identify
    176 # when this script gets out of sync with the source. See load_objects().
    177 #
    178 expected_classes = [
    179     'ConsString', 'FixedArray', 'HeapNumber', 'JSArray', 'JSFunction',
    180     'JSObject', 'JSRegExp', 'JSValue', 'Map', 'Oddball', 'Script',
    181     'SeqOneByteString', 'SharedFunctionInfo'
    182 ];
    183 
    184 
    185 #
    186 # The following structures store high-level representations of the structures
    187 # for which we're going to emit descriptive constants.
    188 #
    189 types = {};             # set of all type names
    190 typeclasses = {};       # maps type names to corresponding class names
    191 klasses = {};           # known classes, including parents
    192 fields = [];            # field declarations
    193 
    194 header = '''
    195 /*
    196  * This file is generated by %s.  Do not edit directly.
    197  */
    198 
    199 #include "v8.h"
    200 #include "frames.h"
    201 #include "frames-inl.h" /* for architecture-specific frame constants */
    202 
    203 using namespace v8::internal;
    204 
    205 extern "C" {
    206 
    207 /* stack frame constants */
    208 #define FRAME_CONST(value, klass)       \
    209     int v8dbg_frametype_##klass = StackFrame::value;
    210 
    211 STACK_FRAME_TYPE_LIST(FRAME_CONST)
    212 
    213 #undef FRAME_CONST
    214 
    215 ''' % sys.argv[0];
    216 
    217 footer = '''
    218 }
    219 '''
    220 
    221 #
    222 # Loads class hierarchy and type information from "objects.h".
    223 #
    224 def load_objects():
    225         objfilename = sys.argv[2];
    226         objfile = open(objfilename, 'r');
    227         in_insttype = False;
    228 
    229         typestr = '';
    230 
    231         #
    232         # Construct a dictionary for the classes we're sure should be present.
    233         #
    234         checktypes = {};
    235         for klass in expected_classes:
    236                 checktypes[klass] = True;
    237 
    238         #
    239         # Iterate objects.h line-by-line to collect type and class information.
    240         # For types, we accumulate a string representing the entire InstanceType
    241         # enum definition and parse it later because it's easier to do so
    242         # without the embedded newlines.
    243         #
    244         for line in objfile:
    245                 if (line.startswith('enum InstanceType {')):
    246                         in_insttype = True;
    247                         continue;
    248 
    249                 if (in_insttype and line.startswith('};')):
    250                         in_insttype = False;
    251                         continue;
    252 
    253                 line = re.sub('//.*', '', line.rstrip().lstrip());
    254 
    255                 if (in_insttype):
    256                         typestr += line;
    257                         continue;
    258 
    259                 match = re.match('class (\w[^\s:]*)(: public (\w[^\s{]*))?\s*{',
    260                     line);
    261 
    262                 if (match):
    263                         klass = match.group(1);
    264                         pklass = match.group(3);
    265                         klasses[klass] = { 'parent': pklass };
    266 
    267         #
    268         # Process the instance type declaration.
    269         #
    270         entries = typestr.split(',');
    271         for entry in entries:
    272                 types[re.sub('\s*=.*', '', entry).lstrip()] = True;
    273 
    274         #
    275         # Infer class names for each type based on a systematic transformation.
    276         # For example, "JS_FUNCTION_TYPE" becomes "JSFunction".  We find the
    277         # class for each type rather than the other way around because there are
    278         # fewer cases where one type maps to more than one class than the other
    279         # way around.
    280         #
    281         for type in types:
    282                 #
    283                 # Symbols and Strings are implemented using the same classes.
    284                 #
    285                 usetype = re.sub('SYMBOL_', 'STRING_', type);
    286 
    287                 #
    288                 # REGEXP behaves like REG_EXP, as in JS_REGEXP_TYPE => JSRegExp.
    289                 #
    290                 usetype = re.sub('_REGEXP_', '_REG_EXP_', usetype);
    291 
    292                 #
    293                 # Remove the "_TYPE" suffix and then convert to camel case,
    294                 # except that a "JS" prefix remains uppercase (as in
    295                 # "JS_FUNCTION_TYPE" => "JSFunction").
    296                 #
    297                 if (not usetype.endswith('_TYPE')):
    298                         continue;
    299 
    300                 usetype = usetype[0:len(usetype) - len('_TYPE')];
    301                 parts = usetype.split('_');
    302                 cctype = '';
    303 
    304                 if (parts[0] == 'JS'):
    305                         cctype = 'JS';
    306                         start = 1;
    307                 else:
    308                         cctype = '';
    309                         start = 0;
    310 
    311                 for ii in range(start, len(parts)):
    312                         part = parts[ii];
    313                         cctype += part[0].upper() + part[1:].lower();
    314 
    315                 #
    316                 # Mapping string types is more complicated.  Both types and
    317                 # class names for Strings specify a representation (e.g., Seq,
    318                 # Cons, External, or Sliced) and an encoding (TwoByte/OneByte),
    319                 # In the simplest case, both of these are explicit in both
    320                 # names, as in:
    321                 #
    322                 #       EXTERNAL_ONE_BYTE_STRING_TYPE => ExternalOneByteString
    323                 #
    324                 # However, either the representation or encoding can be omitted
    325                 # from the type name, in which case "Seq" and "TwoByte" are
    326                 # assumed, as in:
    327                 #
    328                 #       STRING_TYPE => SeqTwoByteString
    329                 #
    330                 # Additionally, sometimes the type name has more information
    331                 # than the class, as in:
    332                 #
    333                 #       CONS_ONE_BYTE_STRING_TYPE => ConsString
    334                 #
    335                 # To figure this out dynamically, we first check for a
    336                 # representation and encoding and add them if they're not
    337                 # present.  If that doesn't yield a valid class name, then we
    338                 # strip out the representation.
    339                 #
    340                 if (cctype.endswith('String')):
    341                         if (cctype.find('Cons') == -1 and
    342                             cctype.find('External') == -1 and
    343                             cctype.find('Sliced') == -1):
    344                                 if (cctype.find('OneByte') != -1):
    345                                         cctype = re.sub('OneByteString$',
    346                                             'SeqOneByteString', cctype);
    347                                 else:
    348                                         cctype = re.sub('String$',
    349                                             'SeqString', cctype);
    350 
    351                         if (cctype.find('OneByte') == -1):
    352                                 cctype = re.sub('String$', 'TwoByteString',
    353                                     cctype);
    354 
    355                         if (not (cctype in klasses)):
    356                                 cctype = re.sub('OneByte', '', cctype);
    357                                 cctype = re.sub('TwoByte', '', cctype);
    358 
    359                 #
    360                 # Despite all that, some types have no corresponding class.
    361                 #
    362                 if (cctype in klasses):
    363                         typeclasses[type] = cctype;
    364                         if (cctype in checktypes):
    365                                 del checktypes[cctype];
    366 
    367         if (len(checktypes) > 0):
    368                 for klass in checktypes:
    369                         print('error: expected class \"%s\" not found' % klass);
    370 
    371                 sys.exit(1);
    372 
    373 
    374 #
    375 # For a given macro call, pick apart the arguments and return an object
    376 # describing the corresponding output constant.  See load_fields().
    377 #
    378 def parse_field(call):
    379         # Replace newlines with spaces.
    380         for ii in range(0, len(call)):
    381                 if (call[ii] == '\n'):
    382                         call[ii] == ' ';
    383 
    384         idx = call.find('(');
    385         kind = call[0:idx];
    386         rest = call[idx + 1: len(call) - 1];
    387         args = re.split('\s*,\s*', rest);
    388 
    389         consts = [];
    390 
    391         if (kind == 'ACCESSORS' or kind == 'ACCESSORS_GCSAFE'):
    392                 klass = args[0];
    393                 field = args[1];
    394                 dtype = args[2];
    395                 offset = args[3];
    396 
    397                 return ({
    398                     'name': 'class_%s__%s__%s' % (klass, field, dtype),
    399                     'value': '%s::%s' % (klass, offset)
    400                 });
    401 
    402         assert(kind == 'SMI_ACCESSORS' or kind == 'ACCESSORS_TO_SMI');
    403         klass = args[0];
    404         field = args[1];
    405         offset = args[2];
    406 
    407         return ({
    408             'name': 'class_%s__%s__%s' % (klass, field, 'SMI'),
    409             'value': '%s::%s' % (klass, offset)
    410         });
    411 
    412 #
    413 # Load field offset information from objects-inl.h.
    414 #
    415 def load_fields():
    416         inlfilename = sys.argv[3];
    417         inlfile = open(inlfilename, 'r');
    418 
    419         #
    420         # Each class's fields and the corresponding offsets are described in the
    421         # source by calls to macros like "ACCESSORS" (and friends).  All we do
    422         # here is extract these macro invocations, taking into account that they
    423         # may span multiple lines and may contain nested parentheses.  We also
    424         # call parse_field() to pick apart the invocation.
    425         #
    426         prefixes = [ 'ACCESSORS', 'ACCESSORS_GCSAFE',
    427                      'SMI_ACCESSORS', 'ACCESSORS_TO_SMI' ];
    428         current = '';
    429         opens = 0;
    430 
    431         for line in inlfile:
    432                 if (opens > 0):
    433                         # Continuation line
    434                         for ii in range(0, len(line)):
    435                                 if (line[ii] == '('):
    436                                         opens += 1;
    437                                 elif (line[ii] == ')'):
    438                                         opens -= 1;
    439 
    440                                 if (opens == 0):
    441                                         break;
    442 
    443                         current += line[0:ii + 1];
    444                         continue;
    445 
    446                 for prefix in prefixes:
    447                         if (not line.startswith(prefix + '(')):
    448                                 continue;
    449 
    450                         if (len(current) > 0):
    451                                 fields.append(parse_field(current));
    452                                 current = '';
    453 
    454                         for ii in range(len(prefix), len(line)):
    455                                 if (line[ii] == '('):
    456                                         opens += 1;
    457                                 elif (line[ii] == ')'):
    458                                         opens -= 1;
    459 
    460                                 if (opens == 0):
    461                                         break;
    462 
    463                         current += line[0:ii + 1];
    464 
    465         if (len(current) > 0):
    466                 fields.append(parse_field(current));
    467                 current = '';
    468 
    469         for body in extras_accessors:
    470                 fields.append(parse_field('ACCESSORS(%s)' % body));
    471 
    472 #
    473 # Emit a block of constants.
    474 #
    475 def emit_set(out, consts):
    476         # Fix up overzealous parses.  This could be done inside the
    477         # parsers but as there are several, it's easiest to do it here.
    478         ws = re.compile('\s+')
    479         for const in consts:
    480                 name = ws.sub('', const['name'])
    481                 value = ws.sub('', str(const['value']))  # Can be a number.
    482                 out.write('int v8dbg_%s = %s;\n' % (name, value))
    483         out.write('\n');
    484 
    485 #
    486 # Emit the whole output file.
    487 #
    488 def emit_config():
    489         out = file(sys.argv[1], 'w');
    490 
    491         out.write(header);
    492 
    493         out.write('/* miscellaneous constants */\n');
    494         emit_set(out, consts_misc);
    495 
    496         out.write('/* class type information */\n');
    497         consts = [];
    498         keys = typeclasses.keys();
    499         keys.sort();
    500         for typename in keys:
    501                 klass = typeclasses[typename];
    502                 consts.append({
    503                     'name': 'type_%s__%s' % (klass, typename),
    504                     'value': typename
    505                 });
    506 
    507         emit_set(out, consts);
    508 
    509         out.write('/* class hierarchy information */\n');
    510         consts = [];
    511         keys = klasses.keys();
    512         keys.sort();
    513         for klassname in keys:
    514                 pklass = klasses[klassname]['parent'];
    515                 if (pklass == None):
    516                         continue;
    517 
    518                 consts.append({
    519                     'name': 'parent_%s__%s' % (klassname, pklass),
    520                     'value': 0
    521                 });
    522 
    523         emit_set(out, consts);
    524 
    525         out.write('/* field information */\n');
    526         emit_set(out, fields);
    527 
    528         out.write(footer);
    529 
    530 if (len(sys.argv) < 4):
    531         print('usage: %s output.cc objects.h objects-inl.h' % sys.argv[0]);
    532         sys.exit(2);
    533 
    534 load_objects();
    535 load_fields();
    536 emit_config();
    537