Home | History | Annotate | Download | only in tools
      1 #!/usr/bin/env python
      2 
      3 #
      4 # Copyright 2012 the V8 project authors. All rights reserved.
      5 # Redistribution and use in source and binary forms, with or without
      6 # modification, are permitted provided that the following conditions are
      7 # met:
      8 #
      9 #     * Redistributions of source code must retain the above copyright
     10 #       notice, this list of conditions and the following disclaimer.
     11 #     * Redistributions in binary form must reproduce the above
     12 #       copyright notice, this list of conditions and the following
     13 #       disclaimer in the documentation and/or other materials provided
     14 #       with the distribution.
     15 #     * Neither the name of Google Inc. nor the names of its
     16 #       contributors may be used to endorse or promote products derived
     17 #       from this software without specific prior written permission.
     18 #
     19 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     20 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     21 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     22 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     23 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     24 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     25 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     26 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     27 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     28 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     29 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     30 #
     31 
     32 #
     33 # Emits a C++ file to be compiled and linked into libv8 to support postmortem
     34 # debugging tools.  Most importantly, this tool emits constants describing V8
     35 # internals:
     36 #
     37 #    v8dbg_type_CLASS__TYPE = VALUE             Describes class type values
     38 #    v8dbg_class_CLASS__FIELD__TYPE = OFFSET    Describes class fields
     39 #    v8dbg_parent_CLASS__PARENT                 Describes class hierarchy
     40 #    v8dbg_frametype_NAME = VALUE               Describes stack frame values
     41 #    v8dbg_off_fp_NAME = OFFSET                 Frame pointer offsets
     42 #    v8dbg_prop_NAME = OFFSET                   Object property offsets
     43 #    v8dbg_NAME = VALUE                         Miscellaneous values
     44 #
     45 # These constants are declared as global integers so that they'll be present in
     46 # the generated libv8 binary.
     47 #
     48 
     49 import re
     50 import sys
     51 
     52 #
     53 # Miscellaneous constants, tags, and masks used for object identification.
     54 #
     55 consts_misc = [
     56     { 'name': 'FirstNonstringType',     'value': 'FIRST_NONSTRING_TYPE' },
     57 
     58     { 'name': 'IsNotStringMask',        'value': 'kIsNotStringMask' },
     59     { 'name': 'StringTag',              'value': 'kStringTag' },
     60     { 'name': 'NotStringTag',           'value': 'kNotStringTag' },
     61 
     62     { 'name': 'StringEncodingMask',     'value': 'kStringEncodingMask' },
     63     { 'name': 'TwoByteStringTag',       'value': 'kTwoByteStringTag' },
     64     { 'name': 'AsciiStringTag',         'value': 'kOneByteStringTag' },
     65 
     66     { 'name': 'StringRepresentationMask',
     67         'value': 'kStringRepresentationMask' },
     68     { 'name': 'SeqStringTag',           'value': 'kSeqStringTag' },
     69     { 'name': 'ConsStringTag',          'value': 'kConsStringTag' },
     70     { 'name': 'ExternalStringTag',      'value': 'kExternalStringTag' },
     71 
     72     { 'name': 'FailureTag',             'value': 'kFailureTag' },
     73     { 'name': 'FailureTagMask',         'value': 'kFailureTagMask' },
     74     { 'name': 'HeapObjectTag',          'value': 'kHeapObjectTag' },
     75     { 'name': 'HeapObjectTagMask',      'value': 'kHeapObjectTagMask' },
     76     { 'name': 'SmiTag',                 'value': 'kSmiTag' },
     77     { 'name': 'SmiTagMask',             'value': 'kSmiTagMask' },
     78     { 'name': 'SmiValueShift',          'value': 'kSmiTagSize' },
     79     { 'name': 'SmiShiftSize',           'value': 'kSmiShiftSize' },
     80     { 'name': 'PointerSizeLog2',        'value': 'kPointerSizeLog2' },
     81 
     82     { 'name': 'prop_idx_first',
     83         'value': 'DescriptorArray::kFirstIndex' },
     84     { 'name': 'prop_type_field',
     85         'value': 'FIELD' },
     86     { 'name': 'prop_type_first_phantom',
     87         'value': 'TRANSITION' },
     88     { 'name': 'prop_type_mask',
     89         'value': 'PropertyDetails::TypeField::kMask' },
     90 
     91     { 'name': 'off_fp_context',
     92         'value': 'StandardFrameConstants::kContextOffset' },
     93     { 'name': 'off_fp_marker',
     94         'value': 'StandardFrameConstants::kMarkerOffset' },
     95     { 'name': 'off_fp_function',
     96         'value': 'JavaScriptFrameConstants::kFunctionOffset' },
     97     { 'name': 'off_fp_args',
     98         'value': 'JavaScriptFrameConstants::kLastParameterOffset' },
     99 ];
    100 
    101 #
    102 # The following useful fields are missing accessors, so we define fake ones.
    103 #
    104 extras_accessors = [
    105     'HeapObject, map, Map, kMapOffset',
    106     'JSObject, elements, Object, kElementsOffset',
    107     'FixedArray, data, uintptr_t, kHeaderSize',
    108     'Map, instance_attributes, int, kInstanceAttributesOffset',
    109     'Map, inobject_properties, int, kInObjectPropertiesOffset',
    110     'Map, instance_size, int, kInstanceSizeOffset',
    111     'HeapNumber, value, double, kValueOffset',
    112     'ConsString, first, String, kFirstOffset',
    113     'ConsString, second, String, kSecondOffset',
    114     'ExternalString, resource, Object, kResourceOffset',
    115     'SeqOneByteString, chars, char, kHeaderSize',
    116     'SharedFunctionInfo, code, Code, kCodeOffset',
    117     'Code, instruction_start, uintptr_t, kHeaderSize',
    118     'Code, instruction_size, int, kInstructionSizeOffset',
    119 ];
    120 
    121 #
    122 # The following is a whitelist of classes we expect to find when scanning the
    123 # source code. This list is not exhaustive, but it's still useful to identify
    124 # when this script gets out of sync with the source. See load_objects().
    125 #
    126 expected_classes = [
    127     'ConsString', 'FixedArray', 'HeapNumber', 'JSArray', 'JSFunction',
    128     'JSObject', 'JSRegExp', 'JSValue', 'Map', 'Oddball', 'Script',
    129     'SeqOneByteString', 'SharedFunctionInfo'
    130 ];
    131 
    132 
    133 #
    134 # The following structures store high-level representations of the structures
    135 # for which we're going to emit descriptive constants.
    136 #
    137 types = {};             # set of all type names
    138 typeclasses = {};       # maps type names to corresponding class names
    139 klasses = {};           # known classes, including parents
    140 fields = [];            # field declarations
    141 
    142 header = '''
    143 /*
    144  * This file is generated by %s.  Do not edit directly.
    145  */
    146 
    147 #include "v8.h"
    148 #include "frames.h"
    149 #include "frames-inl.h" /* for architecture-specific frame constants */
    150 
    151 using namespace v8::internal;
    152 
    153 extern "C" {
    154 
    155 /* stack frame constants */
    156 #define FRAME_CONST(value, klass)       \
    157     int v8dbg_frametype_##klass = StackFrame::value;
    158 
    159 STACK_FRAME_TYPE_LIST(FRAME_CONST)
    160 
    161 #undef FRAME_CONST
    162 
    163 ''' % sys.argv[0];
    164 
    165 footer = '''
    166 }
    167 '''
    168 
    169 #
    170 # Loads class hierarchy and type information from "objects.h".
    171 #
    172 def load_objects():
    173         objfilename = sys.argv[2];
    174         objfile = open(objfilename, 'r');
    175         in_insttype = False;
    176 
    177         typestr = '';
    178 
    179         #
    180         # Construct a dictionary for the classes we're sure should be present.
    181         #
    182         checktypes = {};
    183         for klass in expected_classes:
    184                 checktypes[klass] = True;
    185 
    186         #
    187         # Iterate objects.h line-by-line to collect type and class information.
    188         # For types, we accumulate a string representing the entire InstanceType
    189         # enum definition and parse it later because it's easier to do so
    190         # without the embedded newlines.
    191         #
    192         for line in objfile:
    193                 if (line.startswith('enum InstanceType {')):
    194                         in_insttype = True;
    195                         continue;
    196 
    197                 if (in_insttype and line.startswith('};')):
    198                         in_insttype = False;
    199                         continue;
    200 
    201                 line = re.sub('//.*', '', line.rstrip().lstrip());
    202 
    203                 if (in_insttype):
    204                         typestr += line;
    205                         continue;
    206 
    207                 match = re.match('class (\w[^\s:]*)(: public (\w[^\s{]*))?\s*{',
    208                     line);
    209 
    210                 if (match):
    211                         klass = match.group(1);
    212                         pklass = match.group(3);
    213                         klasses[klass] = { 'parent': pklass };
    214 
    215         #
    216         # Process the instance type declaration.
    217         #
    218         entries = typestr.split(',');
    219         for entry in entries:
    220                 types[re.sub('\s*=.*', '', entry).lstrip()] = True;
    221 
    222         #
    223         # Infer class names for each type based on a systematic transformation.
    224         # For example, "JS_FUNCTION_TYPE" becomes "JSFunction".  We find the
    225         # class for each type rather than the other way around because there are
    226         # fewer cases where one type maps to more than one class than the other
    227         # way around.
    228         #
    229         for type in types:
    230                 #
    231                 # Symbols and Strings are implemented using the same classes.
    232                 #
    233                 usetype = re.sub('SYMBOL_', 'STRING_', type);
    234 
    235                 #
    236                 # REGEXP behaves like REG_EXP, as in JS_REGEXP_TYPE => JSRegExp.
    237                 #
    238                 usetype = re.sub('_REGEXP_', '_REG_EXP_', usetype);
    239 
    240                 #
    241                 # Remove the "_TYPE" suffix and then convert to camel case,
    242                 # except that a "JS" prefix remains uppercase (as in
    243                 # "JS_FUNCTION_TYPE" => "JSFunction").
    244                 #
    245                 if (not usetype.endswith('_TYPE')):
    246                         continue;
    247 
    248                 usetype = usetype[0:len(usetype) - len('_TYPE')];
    249                 parts = usetype.split('_');
    250                 cctype = '';
    251 
    252                 if (parts[0] == 'JS'):
    253                         cctype = 'JS';
    254                         start = 1;
    255                 else:
    256                         cctype = '';
    257                         start = 0;
    258 
    259                 for ii in range(start, len(parts)):
    260                         part = parts[ii];
    261                         cctype += part[0].upper() + part[1:].lower();
    262 
    263                 #
    264                 # Mapping string types is more complicated.  Both types and
    265                 # class names for Strings specify a representation (e.g., Seq,
    266                 # Cons, External, or Sliced) and an encoding (TwoByte or Ascii),
    267                 # In the simplest case, both of these are explicit in both
    268                 # names, as in:
    269                 #
    270                 #       EXTERNAL_ASCII_STRING_TYPE => ExternalAsciiString
    271                 #
    272                 # However, either the representation or encoding can be omitted
    273                 # from the type name, in which case "Seq" and "TwoByte" are
    274                 # assumed, as in:
    275                 #
    276                 #       STRING_TYPE => SeqTwoByteString
    277                 #
    278                 # Additionally, sometimes the type name has more information
    279                 # than the class, as in:
    280                 #
    281                 #       CONS_ASCII_STRING_TYPE => ConsString
    282                 #
    283                 # To figure this out dynamically, we first check for a
    284                 # representation and encoding and add them if they're not
    285                 # present.  If that doesn't yield a valid class name, then we
    286                 # strip out the representation.
    287                 #
    288                 if (cctype.endswith('String')):
    289                         if (cctype.find('Cons') == -1 and
    290                             cctype.find('External') == -1 and
    291                             cctype.find('Sliced') == -1):
    292                                 if (cctype.find('Ascii') != -1):
    293                                         cctype = re.sub('AsciiString$',
    294                                             'SeqOneByteString', cctype);
    295                                 else:
    296                                         cctype = re.sub('String$',
    297                                             'SeqString', cctype);
    298 
    299                         if (cctype.find('Ascii') == -1):
    300                                 cctype = re.sub('String$', 'TwoByteString',
    301                                     cctype);
    302 
    303                         if (not (cctype in klasses)):
    304                                 cctype = re.sub('Ascii', '', cctype);
    305                                 cctype = re.sub('TwoByte', '', cctype);
    306 
    307                 #
    308                 # Despite all that, some types have no corresponding class.
    309                 #
    310                 if (cctype in klasses):
    311                         typeclasses[type] = cctype;
    312                         if (cctype in checktypes):
    313                                 del checktypes[cctype];
    314 
    315         if (len(checktypes) > 0):
    316                 for klass in checktypes:
    317                         print('error: expected class \"%s\" not found' % klass);
    318 
    319                 sys.exit(1);
    320 
    321 
    322 #
    323 # For a given macro call, pick apart the arguments and return an object
    324 # describing the corresponding output constant.  See load_fields().
    325 #
    326 def parse_field(call):
    327         # Replace newlines with spaces.
    328         for ii in range(0, len(call)):
    329                 if (call[ii] == '\n'):
    330                         call[ii] == ' ';
    331 
    332         idx = call.find('(');
    333         kind = call[0:idx];
    334         rest = call[idx + 1: len(call) - 1];
    335         args = re.split('\s*,\s*', rest);
    336 
    337         consts = [];
    338 
    339         if (kind == 'ACCESSORS' or kind == 'ACCESSORS_GCSAFE'):
    340                 klass = args[0];
    341                 field = args[1];
    342                 dtype = args[2];
    343                 offset = args[3];
    344 
    345                 return ({
    346                     'name': 'class_%s__%s__%s' % (klass, field, dtype),
    347                     'value': '%s::%s' % (klass, offset)
    348                 });
    349 
    350         assert(kind == 'SMI_ACCESSORS');
    351         klass = args[0];
    352         field = args[1];
    353         offset = args[2];
    354 
    355         return ({
    356             'name': 'class_%s__%s__%s' % (klass, field, 'SMI'),
    357             'value': '%s::%s' % (klass, offset)
    358         });
    359 
    360 #
    361 # Load field offset information from objects-inl.h.
    362 #
    363 def load_fields():
    364         inlfilename = sys.argv[3];
    365         inlfile = open(inlfilename, 'r');
    366 
    367         #
    368         # Each class's fields and the corresponding offsets are described in the
    369         # source by calls to macros like "ACCESSORS" (and friends).  All we do
    370         # here is extract these macro invocations, taking into account that they
    371         # may span multiple lines and may contain nested parentheses.  We also
    372         # call parse_field() to pick apart the invocation.
    373         #
    374         prefixes = [ 'ACCESSORS', 'ACCESSORS_GCSAFE', 'SMI_ACCESSORS' ];
    375         current = '';
    376         opens = 0;
    377 
    378         for line in inlfile:
    379                 if (opens > 0):
    380                         # Continuation line
    381                         for ii in range(0, len(line)):
    382                                 if (line[ii] == '('):
    383                                         opens += 1;
    384                                 elif (line[ii] == ')'):
    385                                         opens -= 1;
    386 
    387                                 if (opens == 0):
    388                                         break;
    389 
    390                         current += line[0:ii + 1];
    391                         continue;
    392 
    393                 for prefix in prefixes:
    394                         if (not line.startswith(prefix + '(')):
    395                                 continue;
    396 
    397                         if (len(current) > 0):
    398                                 fields.append(parse_field(current));
    399                                 current = '';
    400 
    401                         for ii in range(len(prefix), len(line)):
    402                                 if (line[ii] == '('):
    403                                         opens += 1;
    404                                 elif (line[ii] == ')'):
    405                                         opens -= 1;
    406 
    407                                 if (opens == 0):
    408                                         break;
    409 
    410                         current += line[0:ii + 1];
    411 
    412         if (len(current) > 0):
    413                 fields.append(parse_field(current));
    414                 current = '';
    415 
    416         for body in extras_accessors:
    417                 fields.append(parse_field('ACCESSORS(%s)' % body));
    418 
    419 #
    420 # Emit a block of constants.
    421 #
    422 def emit_set(out, consts):
    423         for ii in range(0, len(consts)):
    424                 out.write('int v8dbg_%s = %s;\n' %
    425                     (consts[ii]['name'], consts[ii]['value']));
    426         out.write('\n');
    427 
    428 #
    429 # Emit the whole output file.
    430 #
    431 def emit_config():
    432         out = file(sys.argv[1], 'w');
    433 
    434         out.write(header);
    435 
    436         out.write('/* miscellaneous constants */\n');
    437         emit_set(out, consts_misc);
    438 
    439         out.write('/* class type information */\n');
    440         consts = [];
    441         keys = typeclasses.keys();
    442         keys.sort();
    443         for typename in keys:
    444                 klass = typeclasses[typename];
    445                 consts.append({
    446                     'name': 'type_%s__%s' % (klass, typename),
    447                     'value': typename
    448                 });
    449 
    450         emit_set(out, consts);
    451 
    452         out.write('/* class hierarchy information */\n');
    453         consts = [];
    454         keys = klasses.keys();
    455         keys.sort();
    456         for klassname in keys:
    457                 pklass = klasses[klassname]['parent'];
    458                 if (pklass == None):
    459                         continue;
    460 
    461                 consts.append({
    462                     'name': 'parent_%s__%s' % (klassname, pklass),
    463                     'value': 0
    464                 });
    465 
    466         emit_set(out, consts);
    467 
    468         out.write('/* field information */\n');
    469         emit_set(out, fields);
    470 
    471         out.write(footer);
    472 
    473 if (len(sys.argv) < 4):
    474         print('usage: %s output.cc objects.h objects-inl.h' % sys.argv[0]);
    475         sys.exit(2);
    476 
    477 load_objects();
    478 load_fields();
    479 emit_config();
    480