Home | History | Annotate | Download | only in tools
      1 #!/usr/bin/env python
      2 
      3 #
      4 # Copyright 2012 the V8 project authors. All rights reserved.
      5 # Redistribution and use in source and binary forms, with or without
      6 # modification, are permitted provided that the following conditions are
      7 # met:
      8 #
      9 #     * Redistributions of source code must retain the above copyright
     10 #       notice, this list of conditions and the following disclaimer.
     11 #     * Redistributions in binary form must reproduce the above
     12 #       copyright notice, this list of conditions and the following
     13 #       disclaimer in the documentation and/or other materials provided
     14 #       with the distribution.
     15 #     * Neither the name of Google Inc. nor the names of its
     16 #       contributors may be used to endorse or promote products derived
     17 #       from this software without specific prior written permission.
     18 #
     19 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     20 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     21 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     22 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     23 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     24 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     25 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     26 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     27 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     28 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     29 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     30 #
     31 
     32 #
     33 # Emits a C++ file to be compiled and linked into libv8 to support postmortem
     34 # debugging tools.  Most importantly, this tool emits constants describing V8
     35 # internals:
     36 #
     37 #    v8dbg_type_CLASS__TYPE = VALUE             Describes class type values
     38 #    v8dbg_class_CLASS__FIELD__TYPE = OFFSET    Describes class fields
     39 #    v8dbg_parent_CLASS__PARENT                 Describes class hierarchy
     40 #    v8dbg_frametype_NAME = VALUE               Describes stack frame values
     41 #    v8dbg_off_fp_NAME = OFFSET                 Frame pointer offsets
     42 #    v8dbg_prop_NAME = OFFSET                   Object property offsets
     43 #    v8dbg_NAME = VALUE                         Miscellaneous values
     44 #
     45 # These constants are declared as global integers so that they'll be present in
     46 # the generated libv8 binary.
     47 #
     48 
     49 import re
     50 import sys
     51 
     52 #
     53 # Miscellaneous constants, tags, and masks used for object identification.
     54 #
     55 consts_misc = [
     56     { 'name': 'FirstNonstringType',     'value': 'FIRST_NONSTRING_TYPE' },
     57 
     58     { 'name': 'IsNotStringMask',        'value': 'kIsNotStringMask' },
     59     { 'name': 'StringTag',              'value': 'kStringTag' },
     60     { 'name': 'NotStringTag',           'value': 'kNotStringTag' },
     61 
     62     { 'name': 'StringEncodingMask',     'value': 'kStringEncodingMask' },
     63     { 'name': 'TwoByteStringTag',       'value': 'kTwoByteStringTag' },
     64     { 'name': 'AsciiStringTag',         'value': 'kOneByteStringTag' },
     65 
     66     { 'name': 'StringRepresentationMask',
     67         'value': 'kStringRepresentationMask' },
     68     { 'name': 'SeqStringTag',           'value': 'kSeqStringTag' },
     69     { 'name': 'ConsStringTag',          'value': 'kConsStringTag' },
     70     { 'name': 'ExternalStringTag',      'value': 'kExternalStringTag' },
     71     { 'name': 'SlicedStringTag',        'value': 'kSlicedStringTag' },
     72 
     73     { 'name': 'FailureTag',             'value': 'kFailureTag' },
     74     { 'name': 'FailureTagMask',         'value': 'kFailureTagMask' },
     75     { 'name': 'HeapObjectTag',          'value': 'kHeapObjectTag' },
     76     { 'name': 'HeapObjectTagMask',      'value': 'kHeapObjectTagMask' },
     77     { 'name': 'SmiTag',                 'value': 'kSmiTag' },
     78     { 'name': 'SmiTagMask',             'value': 'kSmiTagMask' },
     79     { 'name': 'SmiValueShift',          'value': 'kSmiTagSize' },
     80     { 'name': 'SmiShiftSize',           'value': 'kSmiShiftSize' },
     81     { 'name': 'PointerSizeLog2',        'value': 'kPointerSizeLog2' },
     82 
     83     { 'name': 'prop_idx_first',
     84         'value': 'DescriptorArray::kFirstIndex' },
     85     { 'name': 'prop_type_field',
     86         'value': 'FIELD' },
     87     { 'name': 'prop_type_first_phantom',
     88         'value': 'TRANSITION' },
     89     { 'name': 'prop_type_mask',
     90         'value': 'PropertyDetails::TypeField::kMask' },
     91 
     92     { 'name': 'prop_desc_key',
     93         'value': 'DescriptorArray::kDescriptorKey' },
     94     { 'name': 'prop_desc_details',
     95         'value': 'DescriptorArray::kDescriptorDetails' },
     96     { 'name': 'prop_desc_value',
     97         'value': 'DescriptorArray::kDescriptorValue' },
     98     { 'name': 'prop_desc_size',
     99         'value': 'DescriptorArray::kDescriptorSize' },
    100 
    101     { 'name': 'off_fp_context',
    102         'value': 'StandardFrameConstants::kContextOffset' },
    103     { 'name': 'off_fp_marker',
    104         'value': 'StandardFrameConstants::kMarkerOffset' },
    105     { 'name': 'off_fp_function',
    106         'value': 'JavaScriptFrameConstants::kFunctionOffset' },
    107     { 'name': 'off_fp_args',
    108         'value': 'JavaScriptFrameConstants::kLastParameterOffset' },
    109 ];
    110 
    111 #
    112 # The following useful fields are missing accessors, so we define fake ones.
    113 #
    114 extras_accessors = [
    115     'HeapObject, map, Map, kMapOffset',
    116     'JSObject, elements, Object, kElementsOffset',
    117     'FixedArray, data, uintptr_t, kHeaderSize',
    118     'Map, instance_attributes, int, kInstanceAttributesOffset',
    119     'Map, inobject_properties, int, kInObjectPropertiesOffset',
    120     'Map, instance_size, int, kInstanceSizeOffset',
    121     'HeapNumber, value, double, kValueOffset',
    122     'ConsString, first, String, kFirstOffset',
    123     'ConsString, second, String, kSecondOffset',
    124     'ExternalString, resource, Object, kResourceOffset',
    125     'SeqOneByteString, chars, char, kHeaderSize',
    126     'SeqTwoByteString, chars, char, kHeaderSize',
    127     'SharedFunctionInfo, code, Code, kCodeOffset',
    128     'SlicedString, parent, String, kParentOffset',
    129     'Code, instruction_start, uintptr_t, kHeaderSize',
    130     'Code, instruction_size, int, kInstructionSizeOffset',
    131 ];
    132 
    133 #
    134 # The following is a whitelist of classes we expect to find when scanning the
    135 # source code. This list is not exhaustive, but it's still useful to identify
    136 # when this script gets out of sync with the source. See load_objects().
    137 #
    138 expected_classes = [
    139     'ConsString', 'FixedArray', 'HeapNumber', 'JSArray', 'JSFunction',
    140     'JSObject', 'JSRegExp', 'JSValue', 'Map', 'Oddball', 'Script',
    141     'SeqOneByteString', 'SharedFunctionInfo'
    142 ];
    143 
    144 
    145 #
    146 # The following structures store high-level representations of the structures
    147 # for which we're going to emit descriptive constants.
    148 #
    149 types = {};             # set of all type names
    150 typeclasses = {};       # maps type names to corresponding class names
    151 klasses = {};           # known classes, including parents
    152 fields = [];            # field declarations
    153 
    154 header = '''
    155 /*
    156  * This file is generated by %s.  Do not edit directly.
    157  */
    158 
    159 #include "v8.h"
    160 #include "frames.h"
    161 #include "frames-inl.h" /* for architecture-specific frame constants */
    162 
    163 using namespace v8::internal;
    164 
    165 extern "C" {
    166 
    167 /* stack frame constants */
    168 #define FRAME_CONST(value, klass)       \
    169     int v8dbg_frametype_##klass = StackFrame::value;
    170 
    171 STACK_FRAME_TYPE_LIST(FRAME_CONST)
    172 
    173 #undef FRAME_CONST
    174 
    175 ''' % sys.argv[0];
    176 
    177 footer = '''
    178 }
    179 '''
    180 
    181 #
    182 # Loads class hierarchy and type information from "objects.h".
    183 #
    184 def load_objects():
    185         objfilename = sys.argv[2];
    186         objfile = open(objfilename, 'r');
    187         in_insttype = False;
    188 
    189         typestr = '';
    190 
    191         #
    192         # Construct a dictionary for the classes we're sure should be present.
    193         #
    194         checktypes = {};
    195         for klass in expected_classes:
    196                 checktypes[klass] = True;
    197 
    198         #
    199         # Iterate objects.h line-by-line to collect type and class information.
    200         # For types, we accumulate a string representing the entire InstanceType
    201         # enum definition and parse it later because it's easier to do so
    202         # without the embedded newlines.
    203         #
    204         for line in objfile:
    205                 if (line.startswith('enum InstanceType {')):
    206                         in_insttype = True;
    207                         continue;
    208 
    209                 if (in_insttype and line.startswith('};')):
    210                         in_insttype = False;
    211                         continue;
    212 
    213                 line = re.sub('//.*', '', line.rstrip().lstrip());
    214 
    215                 if (in_insttype):
    216                         typestr += line;
    217                         continue;
    218 
    219                 match = re.match('class (\w[^\s:]*)(: public (\w[^\s{]*))?\s*{',
    220                     line);
    221 
    222                 if (match):
    223                         klass = match.group(1);
    224                         pklass = match.group(3);
    225                         klasses[klass] = { 'parent': pklass };
    226 
    227         #
    228         # Process the instance type declaration.
    229         #
    230         entries = typestr.split(',');
    231         for entry in entries:
    232                 types[re.sub('\s*=.*', '', entry).lstrip()] = True;
    233 
    234         #
    235         # Infer class names for each type based on a systematic transformation.
    236         # For example, "JS_FUNCTION_TYPE" becomes "JSFunction".  We find the
    237         # class for each type rather than the other way around because there are
    238         # fewer cases where one type maps to more than one class than the other
    239         # way around.
    240         #
    241         for type in types:
    242                 #
    243                 # Symbols and Strings are implemented using the same classes.
    244                 #
    245                 usetype = re.sub('SYMBOL_', 'STRING_', type);
    246 
    247                 #
    248                 # REGEXP behaves like REG_EXP, as in JS_REGEXP_TYPE => JSRegExp.
    249                 #
    250                 usetype = re.sub('_REGEXP_', '_REG_EXP_', usetype);
    251 
    252                 #
    253                 # Remove the "_TYPE" suffix and then convert to camel case,
    254                 # except that a "JS" prefix remains uppercase (as in
    255                 # "JS_FUNCTION_TYPE" => "JSFunction").
    256                 #
    257                 if (not usetype.endswith('_TYPE')):
    258                         continue;
    259 
    260                 usetype = usetype[0:len(usetype) - len('_TYPE')];
    261                 parts = usetype.split('_');
    262                 cctype = '';
    263 
    264                 if (parts[0] == 'JS'):
    265                         cctype = 'JS';
    266                         start = 1;
    267                 else:
    268                         cctype = '';
    269                         start = 0;
    270 
    271                 for ii in range(start, len(parts)):
    272                         part = parts[ii];
    273                         cctype += part[0].upper() + part[1:].lower();
    274 
    275                 #
    276                 # Mapping string types is more complicated.  Both types and
    277                 # class names for Strings specify a representation (e.g., Seq,
    278                 # Cons, External, or Sliced) and an encoding (TwoByte or Ascii),
    279                 # In the simplest case, both of these are explicit in both
    280                 # names, as in:
    281                 #
    282                 #       EXTERNAL_ASCII_STRING_TYPE => ExternalAsciiString
    283                 #
    284                 # However, either the representation or encoding can be omitted
    285                 # from the type name, in which case "Seq" and "TwoByte" are
    286                 # assumed, as in:
    287                 #
    288                 #       STRING_TYPE => SeqTwoByteString
    289                 #
    290                 # Additionally, sometimes the type name has more information
    291                 # than the class, as in:
    292                 #
    293                 #       CONS_ASCII_STRING_TYPE => ConsString
    294                 #
    295                 # To figure this out dynamically, we first check for a
    296                 # representation and encoding and add them if they're not
    297                 # present.  If that doesn't yield a valid class name, then we
    298                 # strip out the representation.
    299                 #
    300                 if (cctype.endswith('String')):
    301                         if (cctype.find('Cons') == -1 and
    302                             cctype.find('External') == -1 and
    303                             cctype.find('Sliced') == -1):
    304                                 if (cctype.find('Ascii') != -1):
    305                                         cctype = re.sub('AsciiString$',
    306                                             'SeqOneByteString', cctype);
    307                                 else:
    308                                         cctype = re.sub('String$',
    309                                             'SeqString', cctype);
    310 
    311                         if (cctype.find('Ascii') == -1):
    312                                 cctype = re.sub('String$', 'TwoByteString',
    313                                     cctype);
    314 
    315                         if (not (cctype in klasses)):
    316                                 cctype = re.sub('Ascii', '', cctype);
    317                                 cctype = re.sub('TwoByte', '', cctype);
    318 
    319                 #
    320                 # Despite all that, some types have no corresponding class.
    321                 #
    322                 if (cctype in klasses):
    323                         typeclasses[type] = cctype;
    324                         if (cctype in checktypes):
    325                                 del checktypes[cctype];
    326 
    327         if (len(checktypes) > 0):
    328                 for klass in checktypes:
    329                         print('error: expected class \"%s\" not found' % klass);
    330 
    331                 sys.exit(1);
    332 
    333 
    334 #
    335 # For a given macro call, pick apart the arguments and return an object
    336 # describing the corresponding output constant.  See load_fields().
    337 #
    338 def parse_field(call):
    339         # Replace newlines with spaces.
    340         for ii in range(0, len(call)):
    341                 if (call[ii] == '\n'):
    342                         call[ii] == ' ';
    343 
    344         idx = call.find('(');
    345         kind = call[0:idx];
    346         rest = call[idx + 1: len(call) - 1];
    347         args = re.split('\s*,\s*', rest);
    348 
    349         consts = [];
    350 
    351         if (kind == 'ACCESSORS' or kind == 'ACCESSORS_GCSAFE'):
    352                 klass = args[0];
    353                 field = args[1];
    354                 dtype = args[2];
    355                 offset = args[3];
    356 
    357                 return ({
    358                     'name': 'class_%s__%s__%s' % (klass, field, dtype),
    359                     'value': '%s::%s' % (klass, offset)
    360                 });
    361 
    362         assert(kind == 'SMI_ACCESSORS');
    363         klass = args[0];
    364         field = args[1];
    365         offset = args[2];
    366 
    367         return ({
    368             'name': 'class_%s__%s__%s' % (klass, field, 'SMI'),
    369             'value': '%s::%s' % (klass, offset)
    370         });
    371 
    372 #
    373 # Load field offset information from objects-inl.h.
    374 #
    375 def load_fields():
    376         inlfilename = sys.argv[3];
    377         inlfile = open(inlfilename, 'r');
    378 
    379         #
    380         # Each class's fields and the corresponding offsets are described in the
    381         # source by calls to macros like "ACCESSORS" (and friends).  All we do
    382         # here is extract these macro invocations, taking into account that they
    383         # may span multiple lines and may contain nested parentheses.  We also
    384         # call parse_field() to pick apart the invocation.
    385         #
    386         prefixes = [ 'ACCESSORS', 'ACCESSORS_GCSAFE', 'SMI_ACCESSORS' ];
    387         current = '';
    388         opens = 0;
    389 
    390         for line in inlfile:
    391                 if (opens > 0):
    392                         # Continuation line
    393                         for ii in range(0, len(line)):
    394                                 if (line[ii] == '('):
    395                                         opens += 1;
    396                                 elif (line[ii] == ')'):
    397                                         opens -= 1;
    398 
    399                                 if (opens == 0):
    400                                         break;
    401 
    402                         current += line[0:ii + 1];
    403                         continue;
    404 
    405                 for prefix in prefixes:
    406                         if (not line.startswith(prefix + '(')):
    407                                 continue;
    408 
    409                         if (len(current) > 0):
    410                                 fields.append(parse_field(current));
    411                                 current = '';
    412 
    413                         for ii in range(len(prefix), len(line)):
    414                                 if (line[ii] == '('):
    415                                         opens += 1;
    416                                 elif (line[ii] == ')'):
    417                                         opens -= 1;
    418 
    419                                 if (opens == 0):
    420                                         break;
    421 
    422                         current += line[0:ii + 1];
    423 
    424         if (len(current) > 0):
    425                 fields.append(parse_field(current));
    426                 current = '';
    427 
    428         for body in extras_accessors:
    429                 fields.append(parse_field('ACCESSORS(%s)' % body));
    430 
    431 #
    432 # Emit a block of constants.
    433 #
    434 def emit_set(out, consts):
    435         for ii in range(0, len(consts)):
    436                 out.write('int v8dbg_%s = %s;\n' %
    437                     (consts[ii]['name'], consts[ii]['value']));
    438         out.write('\n');
    439 
    440 #
    441 # Emit the whole output file.
    442 #
    443 def emit_config():
    444         out = file(sys.argv[1], 'w');
    445 
    446         out.write(header);
    447 
    448         out.write('/* miscellaneous constants */\n');
    449         emit_set(out, consts_misc);
    450 
    451         out.write('/* class type information */\n');
    452         consts = [];
    453         keys = typeclasses.keys();
    454         keys.sort();
    455         for typename in keys:
    456                 klass = typeclasses[typename];
    457                 consts.append({
    458                     'name': 'type_%s__%s' % (klass, typename),
    459                     'value': typename
    460                 });
    461 
    462         emit_set(out, consts);
    463 
    464         out.write('/* class hierarchy information */\n');
    465         consts = [];
    466         keys = klasses.keys();
    467         keys.sort();
    468         for klassname in keys:
    469                 pklass = klasses[klassname]['parent'];
    470                 if (pklass == None):
    471                         continue;
    472 
    473                 consts.append({
    474                     'name': 'parent_%s__%s' % (klassname, pklass),
    475                     'value': 0
    476                 });
    477 
    478         emit_set(out, consts);
    479 
    480         out.write('/* field information */\n');
    481         emit_set(out, fields);
    482 
    483         out.write(footer);
    484 
    485 if (len(sys.argv) < 4):
    486         print('usage: %s output.cc objects.h objects-inl.h' % sys.argv[0]);
    487         sys.exit(2);
    488 
    489 load_objects();
    490 load_fields();
    491 emit_config();
    492