Home | History | Annotate | Download | only in tools
      1 #!/usr/bin/env python
      2 
      3 #
      4 # Copyright 2012 the V8 project authors. All rights reserved.
      5 # Redistribution and use in source and binary forms, with or without
      6 # modification, are permitted provided that the following conditions are
      7 # met:
      8 #
      9 #     * Redistributions of source code must retain the above copyright
     10 #       notice, this list of conditions and the following disclaimer.
     11 #     * Redistributions in binary form must reproduce the above
     12 #       copyright notice, this list of conditions and the following
     13 #       disclaimer in the documentation and/or other materials provided
     14 #       with the distribution.
     15 #     * Neither the name of Google Inc. nor the names of its
     16 #       contributors may be used to endorse or promote products derived
     17 #       from this software without specific prior written permission.
     18 #
     19 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     20 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     21 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     22 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     23 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     24 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     25 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     26 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     27 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     28 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     29 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     30 #
     31 
     32 #
     33 # Emits a C++ file to be compiled and linked into libv8 to support postmortem
     34 # debugging tools.  Most importantly, this tool emits constants describing V8
     35 # internals:
     36 #
     37 #    v8dbg_type_CLASS__TYPE = VALUE             Describes class type values
     38 #    v8dbg_class_CLASS__FIELD__TYPE = OFFSET    Describes class fields
     39 #    v8dbg_parent_CLASS__PARENT                 Describes class hierarchy
     40 #    v8dbg_frametype_NAME = VALUE               Describes stack frame values
     41 #    v8dbg_off_fp_NAME = OFFSET                 Frame pointer offsets
     42 #    v8dbg_prop_NAME = OFFSET                   Object property offsets
     43 #    v8dbg_NAME = VALUE                         Miscellaneous values
     44 #
     45 # These constants are declared as global integers so that they'll be present in
     46 # the generated libv8 binary.
     47 #
     48 
     49 import re
     50 import sys
     51 
     52 #
     53 # Miscellaneous constants, tags, and masks used for object identification.
     54 #
     55 consts_misc = [
     56     { 'name': 'FirstNonstringType',     'value': 'FIRST_NONSTRING_TYPE' },
     57 
     58     { 'name': 'IsNotStringMask',        'value': 'kIsNotStringMask' },
     59     { 'name': 'StringTag',              'value': 'kStringTag' },
     60     { 'name': 'NotStringTag',           'value': 'kNotStringTag' },
     61 
     62     { 'name': 'StringEncodingMask',     'value': 'kStringEncodingMask' },
     63     { 'name': 'TwoByteStringTag',       'value': 'kTwoByteStringTag' },
     64     { 'name': 'AsciiStringTag',         'value': 'kAsciiStringTag' },
     65 
     66     { 'name': 'StringRepresentationMask',
     67         'value': 'kStringRepresentationMask' },
     68     { 'name': 'SeqStringTag',           'value': 'kSeqStringTag' },
     69     { 'name': 'ConsStringTag',          'value': 'kConsStringTag' },
     70     { 'name': 'ExternalStringTag',      'value': 'kExternalStringTag' },
     71 
     72     { 'name': 'FailureTag',             'value': 'kFailureTag' },
     73     { 'name': 'FailureTagMask',         'value': 'kFailureTagMask' },
     74     { 'name': 'HeapObjectTag',          'value': 'kHeapObjectTag' },
     75     { 'name': 'HeapObjectTagMask',      'value': 'kHeapObjectTagMask' },
     76     { 'name': 'SmiTag',                 'value': 'kSmiTag' },
     77     { 'name': 'SmiTagMask',             'value': 'kSmiTagMask' },
     78     { 'name': 'SmiValueShift',          'value': 'kSmiTagSize' },
     79     { 'name': 'PointerSizeLog2',        'value': 'kPointerSizeLog2' },
     80 
     81     { 'name': 'prop_idx_content',
     82         'value': 'DescriptorArray::kContentArrayIndex' },
     83     { 'name': 'prop_idx_first',
     84         'value': 'DescriptorArray::kFirstIndex' },
     85     { 'name': 'prop_type_field',
     86         'value': 'FIELD' },
     87     { 'name': 'prop_type_first_phantom',
     88         'value': 'MAP_TRANSITION' },
     89     { 'name': 'prop_type_mask',
     90         'value': 'PropertyDetails::TypeField::kMask' },
     91 
     92     { 'name': 'off_fp_context',
     93         'value': 'StandardFrameConstants::kContextOffset' },
     94     { 'name': 'off_fp_marker',
     95         'value': 'StandardFrameConstants::kMarkerOffset' },
     96     { 'name': 'off_fp_function',
     97         'value': 'JavaScriptFrameConstants::kFunctionOffset' },
     98     { 'name': 'off_fp_args',
     99         'value': 'JavaScriptFrameConstants::kLastParameterOffset' },
    100 ];
    101 
    102 #
    103 # The following useful fields are missing accessors, so we define fake ones.
    104 #
    105 extras_accessors = [
    106     'HeapObject, map, Map, kMapOffset',
    107     'JSObject, elements, Object, kElementsOffset',
    108     'FixedArray, data, uintptr_t, kHeaderSize',
    109     'Map, instance_attributes, int, kInstanceAttributesOffset',
    110     'Map, instance_descriptors, int, kInstanceDescriptorsOrBitField3Offset',
    111     'Map, inobject_properties, int, kInObjectPropertiesOffset',
    112     'Map, instance_size, int, kInstanceSizeOffset',
    113     'HeapNumber, value, double, kValueOffset',
    114     'ConsString, first, String, kFirstOffset',
    115     'ConsString, second, String, kSecondOffset',
    116     'ExternalString, resource, Object, kResourceOffset',
    117     'SeqAsciiString, chars, char, kHeaderSize',
    118     'SharedFunctionInfo, code, Code, kCodeOffset',
    119     'Code, instruction_start, uintptr_t, kHeaderSize',
    120     'Code, instruction_size, int, kInstructionSizeOffset',
    121 ];
    122 
    123 #
    124 # The following is a whitelist of classes we expect to find when scanning the
    125 # source code. This list is not exhaustive, but it's still useful to identify
    126 # when this script gets out of sync with the source. See load_objects().
    127 #
    128 expected_classes = [
    129     'ConsString', 'FixedArray', 'HeapNumber', 'JSArray', 'JSFunction',
    130     'JSObject', 'JSRegExp', 'JSValue', 'Map', 'Oddball', 'Script',
    131     'SeqAsciiString', 'SharedFunctionInfo'
    132 ];
    133 
    134 
    135 #
    136 # The following structures store high-level representations of the structures
    137 # for which we're going to emit descriptive constants.
    138 #
    139 types = {};             # set of all type names
    140 typeclasses = {};       # maps type names to corresponding class names
    141 klasses = {};           # known classes, including parents
    142 fields = [];            # field declarations
    143 
    144 header = '''
    145 /*
    146  * This file is generated by %s.  Do not edit directly.
    147  */
    148 
    149 #include "v8.h"
    150 #include "frames.h"
    151 #include "frames-inl.h" /* for architecture-specific frame constants */
    152 
    153 using namespace v8::internal;
    154 
    155 extern "C" {
    156 
    157 /* stack frame constants */
    158 #define FRAME_CONST(value, klass)       \
    159     int v8dbg_frametype_##klass = StackFrame::value;
    160 
    161 STACK_FRAME_TYPE_LIST(FRAME_CONST)
    162 
    163 #undef FRAME_CONST
    164 
    165 ''' % sys.argv[0];
    166 
    167 footer = '''
    168 }
    169 '''
    170 
    171 #
    172 # Loads class hierarchy and type information from "objects.h".
    173 #
    174 def load_objects():
    175         objfilename = sys.argv[2];
    176         objfile = open(objfilename, 'r');
    177         in_insttype = False;
    178 
    179         typestr = '';
    180 
    181         #
    182         # Construct a dictionary for the classes we're sure should be present.
    183         #
    184         checktypes = {};
    185         for klass in expected_classes:
    186                 checktypes[klass] = True;
    187 
    188         #
    189         # Iterate objects.h line-by-line to collect type and class information.
    190         # For types, we accumulate a string representing the entire InstanceType
    191         # enum definition and parse it later because it's easier to do so
    192         # without the embedded newlines.
    193         #
    194         for line in objfile:
    195                 if (line.startswith('enum InstanceType {')):
    196                         in_insttype = True;
    197                         continue;
    198 
    199                 if (in_insttype and line.startswith('};')):
    200                         in_insttype = False;
    201                         continue;
    202 
    203                 line = re.sub('//.*', '', line.rstrip().lstrip());
    204 
    205                 if (in_insttype):
    206                         typestr += line;
    207                         continue;
    208 
    209                 match = re.match('class (\w[^\s:]*)(: public (\w[^\s{]*))?\s*{',
    210                     line);
    211 
    212                 if (match):
    213                         klass = match.group(1);
    214                         pklass = match.group(3);
    215                         klasses[klass] = { 'parent': pklass };
    216 
    217         #
    218         # Process the instance type declaration.
    219         #
    220         entries = typestr.split(',');
    221         for entry in entries:
    222                 types[re.sub('\s*=.*', '', entry).lstrip()] = True;
    223 
    224         #
    225         # Infer class names for each type based on a systematic transformation.
    226         # For example, "JS_FUNCTION_TYPE" becomes "JSFunction".  We find the
    227         # class for each type rather than the other way around because there are
    228         # fewer cases where one type maps to more than one class than the other
    229         # way around.
    230         #
    231         for type in types:
    232                 #
    233                 # Symbols and Strings are implemented using the same classes.
    234                 #
    235                 usetype = re.sub('SYMBOL_', 'STRING_', type);
    236 
    237                 #
    238                 # REGEXP behaves like REG_EXP, as in JS_REGEXP_TYPE => JSRegExp.
    239                 #
    240                 usetype = re.sub('_REGEXP_', '_REG_EXP_', usetype);
    241 
    242                 #
    243                 # Remove the "_TYPE" suffix and then convert to camel case,
    244                 # except that a "JS" prefix remains uppercase (as in
    245                 # "JS_FUNCTION_TYPE" => "JSFunction").
    246                 #
    247                 if (not usetype.endswith('_TYPE')):
    248                         continue;
    249 
    250                 usetype = usetype[0:len(usetype) - len('_TYPE')];
    251                 parts = usetype.split('_');
    252                 cctype = '';
    253 
    254                 if (parts[0] == 'JS'):
    255                         cctype = 'JS';
    256                         start = 1;
    257                 else:
    258                         cctype = '';
    259                         start = 0;
    260 
    261                 for ii in range(start, len(parts)):
    262                         part = parts[ii];
    263                         cctype += part[0].upper() + part[1:].lower();
    264 
    265                 #
    266                 # Mapping string types is more complicated.  Both types and
    267                 # class names for Strings specify a representation (e.g., Seq,
    268                 # Cons, External, or Sliced) and an encoding (TwoByte or Ascii),
    269                 # In the simplest case, both of these are explicit in both
    270                 # names, as in:
    271                 #
    272                 #       EXTERNAL_ASCII_STRING_TYPE => ExternalAsciiString
    273                 #
    274                 # However, either the representation or encoding can be omitted
    275                 # from the type name, in which case "Seq" and "TwoByte" are
    276                 # assumed, as in:
    277                 #
    278                 #       STRING_TYPE => SeqTwoByteString
    279                 #
    280                 # Additionally, sometimes the type name has more information
    281                 # than the class, as in:
    282                 #
    283                 #       CONS_ASCII_STRING_TYPE => ConsString
    284                 #
    285                 # To figure this out dynamically, we first check for a
    286                 # representation and encoding and add them if they're not
    287                 # present.  If that doesn't yield a valid class name, then we
    288                 # strip out the representation.
    289                 #
    290                 if (cctype.endswith('String')):
    291                         if (cctype.find('Cons') == -1 and
    292                             cctype.find('External') == -1 and
    293                             cctype.find('Sliced') == -1):
    294                                 if (cctype.find('Ascii') != -1):
    295                                         cctype = re.sub('AsciiString$',
    296                                             'SeqAsciiString', cctype);
    297                                 else:
    298                                         cctype = re.sub('String$',
    299                                             'SeqString', cctype);
    300 
    301                         if (cctype.find('Ascii') == -1):
    302                                 cctype = re.sub('String$', 'TwoByteString',
    303                                     cctype);
    304 
    305                         if (not (cctype in klasses)):
    306                                 cctype = re.sub('Ascii', '', cctype);
    307                                 cctype = re.sub('TwoByte', '', cctype);
    308 
    309                 #
    310                 # Despite all that, some types have no corresponding class.
    311                 #
    312                 if (cctype in klasses):
    313                         typeclasses[type] = cctype;
    314                         if (cctype in checktypes):
    315                                 del checktypes[cctype];
    316 
    317         if (len(checktypes) > 0):
    318                 for klass in checktypes:
    319                         print('error: expected class \"%s\" not found' % klass);
    320 
    321                 sys.exit(1);
    322 
    323 
    324 #
    325 # For a given macro call, pick apart the arguments and return an object
    326 # describing the corresponding output constant.  See load_fields().
    327 #
    328 def parse_field(call):
    329         # Replace newlines with spaces.
    330         for ii in range(0, len(call)):
    331                 if (call[ii] == '\n'):
    332                         call[ii] == ' ';
    333 
    334         idx = call.find('(');
    335         kind = call[0:idx];
    336         rest = call[idx + 1: len(call) - 1];
    337         args = re.split('\s*,\s*', rest);
    338 
    339         consts = [];
    340 
    341         if (kind == 'ACCESSORS' or kind == 'ACCESSORS_GCSAFE'):
    342                 klass = args[0];
    343                 field = args[1];
    344                 dtype = args[2];
    345                 offset = args[3];
    346 
    347                 return ({
    348                     'name': 'class_%s__%s__%s' % (klass, field, dtype),
    349                     'value': '%s::%s' % (klass, offset)
    350                 });
    351 
    352         assert(kind == 'SMI_ACCESSORS');
    353         klass = args[0];
    354         field = args[1];
    355         offset = args[2];
    356 
    357         return ({
    358             'name': 'class_%s__%s__%s' % (klass, field, 'SMI'),
    359             'value': '%s::%s' % (klass, offset)
    360         });
    361 
    362 #
    363 # Load field offset information from objects-inl.h.
    364 #
    365 def load_fields():
    366         inlfilename = sys.argv[3];
    367         inlfile = open(inlfilename, 'r');
    368 
    369         #
    370         # Each class's fields and the corresponding offsets are described in the
    371         # source by calls to macros like "ACCESSORS" (and friends).  All we do
    372         # here is extract these macro invocations, taking into account that they
    373         # may span multiple lines and may contain nested parentheses.  We also
    374         # call parse_field() to pick apart the invocation.
    375         #
    376         prefixes = [ 'ACCESSORS', 'ACCESSORS_GCSAFE', 'SMI_ACCESSORS' ];
    377         current = '';
    378         opens = 0;
    379 
    380         for line in inlfile:
    381                 if (opens > 0):
    382                         # Continuation line
    383                         for ii in range(0, len(line)):
    384                                 if (line[ii] == '('):
    385                                         opens += 1;
    386                                 elif (line[ii] == ')'):
    387                                         opens -= 1;
    388 
    389                                 if (opens == 0):
    390                                         break;
    391 
    392                         current += line[0:ii + 1];
    393                         continue;
    394 
    395                 for prefix in prefixes:
    396                         if (not line.startswith(prefix + '(')):
    397                                 continue;
    398 
    399                         if (len(current) > 0):
    400                                 fields.append(parse_field(current));
    401                                 current = '';
    402 
    403                         for ii in range(len(prefix), len(line)):
    404                                 if (line[ii] == '('):
    405                                         opens += 1;
    406                                 elif (line[ii] == ')'):
    407                                         opens -= 1;
    408 
    409                                 if (opens == 0):
    410                                         break;
    411 
    412                         current += line[0:ii + 1];
    413 
    414         if (len(current) > 0):
    415                 fields.append(parse_field(current));
    416                 current = '';
    417 
    418         for body in extras_accessors:
    419                 fields.append(parse_field('ACCESSORS(%s)' % body));
    420 
    421 #
    422 # Emit a block of constants.
    423 #
    424 def emit_set(out, consts):
    425         for ii in range(0, len(consts)):
    426                 out.write('int v8dbg_%s = %s;\n' %
    427                     (consts[ii]['name'], consts[ii]['value']));
    428         out.write('\n');
    429 
    430 #
    431 # Emit the whole output file.
    432 #
    433 def emit_config():
    434         out = file(sys.argv[1], 'w');
    435 
    436         out.write(header);
    437 
    438         out.write('/* miscellaneous constants */\n');
    439         emit_set(out, consts_misc);
    440 
    441         out.write('/* class type information */\n');
    442         consts = [];
    443         keys = typeclasses.keys();
    444         keys.sort();
    445         for typename in keys:
    446                 klass = typeclasses[typename];
    447                 consts.append({
    448                     'name': 'type_%s__%s' % (klass, typename),
    449                     'value': typename
    450                 });
    451 
    452         emit_set(out, consts);
    453 
    454         out.write('/* class hierarchy information */\n');
    455         consts = [];
    456         keys = klasses.keys();
    457         keys.sort();
    458         for klassname in keys:
    459                 pklass = klasses[klassname]['parent'];
    460                 if (pklass == None):
    461                         continue;
    462 
    463                 consts.append({
    464                     'name': 'parent_%s__%s' % (klassname, pklass),
    465                     'value': 0
    466                 });
    467 
    468         emit_set(out, consts);
    469 
    470         out.write('/* field information */\n');
    471         emit_set(out, fields);
    472 
    473         out.write(footer);
    474 
    475 if (len(sys.argv) < 4):
    476         print('usage: %s output.cc objects.h objects-inl.h' % sys.argv[0]);
    477         sys.exit(2);
    478 
    479 load_objects();
    480 load_fields();
    481 emit_config();
    482