1 #!/usr/bin/env python 2 3 # 4 # Copyright 2012 the V8 project authors. All rights reserved. 5 # Redistribution and use in source and binary forms, with or without 6 # modification, are permitted provided that the following conditions are 7 # met: 8 # 9 # * Redistributions of source code must retain the above copyright 10 # notice, this list of conditions and the following disclaimer. 11 # * Redistributions in binary form must reproduce the above 12 # copyright notice, this list of conditions and the following 13 # disclaimer in the documentation and/or other materials provided 14 # with the distribution. 15 # * Neither the name of Google Inc. nor the names of its 16 # contributors may be used to endorse or promote products derived 17 # from this software without specific prior written permission. 18 # 19 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 # 31 32 # 33 # Emits a C++ file to be compiled and linked into libv8 to support postmortem 34 # debugging tools. Most importantly, this tool emits constants describing V8 35 # internals: 36 # 37 # v8dbg_type_CLASS__TYPE = VALUE Describes class type values 38 # v8dbg_class_CLASS__FIELD__TYPE = OFFSET Describes class fields 39 # v8dbg_parent_CLASS__PARENT Describes class hierarchy 40 # v8dbg_frametype_NAME = VALUE Describes stack frame values 41 # v8dbg_off_fp_NAME = OFFSET Frame pointer offsets 42 # v8dbg_prop_NAME = OFFSET Object property offsets 43 # v8dbg_NAME = VALUE Miscellaneous values 44 # 45 # These constants are declared as global integers so that they'll be present in 46 # the generated libv8 binary. 47 # 48 49 import re 50 import sys 51 52 # 53 # Miscellaneous constants, tags, and masks used for object identification. 54 # 55 consts_misc = [ 56 { 'name': 'FirstNonstringType', 'value': 'FIRST_NONSTRING_TYPE' }, 57 58 { 'name': 'IsNotStringMask', 'value': 'kIsNotStringMask' }, 59 { 'name': 'StringTag', 'value': 'kStringTag' }, 60 { 'name': 'NotStringTag', 'value': 'kNotStringTag' }, 61 62 { 'name': 'StringEncodingMask', 'value': 'kStringEncodingMask' }, 63 { 'name': 'TwoByteStringTag', 'value': 'kTwoByteStringTag' }, 64 { 'name': 'AsciiStringTag', 'value': 'kAsciiStringTag' }, 65 66 { 'name': 'StringRepresentationMask', 67 'value': 'kStringRepresentationMask' }, 68 { 'name': 'SeqStringTag', 'value': 'kSeqStringTag' }, 69 { 'name': 'ConsStringTag', 'value': 'kConsStringTag' }, 70 { 'name': 'ExternalStringTag', 'value': 'kExternalStringTag' }, 71 72 { 'name': 'FailureTag', 'value': 'kFailureTag' }, 73 { 'name': 'FailureTagMask', 'value': 'kFailureTagMask' }, 74 { 'name': 'HeapObjectTag', 'value': 'kHeapObjectTag' }, 75 { 'name': 'HeapObjectTagMask', 'value': 'kHeapObjectTagMask' }, 76 { 'name': 'SmiTag', 'value': 'kSmiTag' }, 77 { 'name': 'SmiTagMask', 'value': 'kSmiTagMask' }, 78 { 'name': 'SmiValueShift', 'value': 'kSmiTagSize' }, 79 { 'name': 'PointerSizeLog2', 'value': 'kPointerSizeLog2' }, 80 81 { 'name': 'prop_idx_content', 82 'value': 'DescriptorArray::kContentArrayIndex' }, 83 { 'name': 'prop_idx_first', 84 'value': 'DescriptorArray::kFirstIndex' }, 85 { 'name': 'prop_type_field', 86 'value': 'FIELD' }, 87 { 'name': 'prop_type_first_phantom', 88 'value': 'MAP_TRANSITION' }, 89 { 'name': 'prop_type_mask', 90 'value': 'PropertyDetails::TypeField::kMask' }, 91 92 { 'name': 'off_fp_context', 93 'value': 'StandardFrameConstants::kContextOffset' }, 94 { 'name': 'off_fp_marker', 95 'value': 'StandardFrameConstants::kMarkerOffset' }, 96 { 'name': 'off_fp_function', 97 'value': 'JavaScriptFrameConstants::kFunctionOffset' }, 98 { 'name': 'off_fp_args', 99 'value': 'JavaScriptFrameConstants::kLastParameterOffset' }, 100 ]; 101 102 # 103 # The following useful fields are missing accessors, so we define fake ones. 104 # 105 extras_accessors = [ 106 'HeapObject, map, Map, kMapOffset', 107 'JSObject, elements, Object, kElementsOffset', 108 'FixedArray, data, uintptr_t, kHeaderSize', 109 'Map, instance_attributes, int, kInstanceAttributesOffset', 110 'Map, instance_descriptors, int, kInstanceDescriptorsOrBitField3Offset', 111 'Map, inobject_properties, int, kInObjectPropertiesOffset', 112 'Map, instance_size, int, kInstanceSizeOffset', 113 'HeapNumber, value, double, kValueOffset', 114 'ConsString, first, String, kFirstOffset', 115 'ConsString, second, String, kSecondOffset', 116 'ExternalString, resource, Object, kResourceOffset', 117 'SeqAsciiString, chars, char, kHeaderSize', 118 'SharedFunctionInfo, code, Code, kCodeOffset', 119 'Code, instruction_start, uintptr_t, kHeaderSize', 120 'Code, instruction_size, int, kInstructionSizeOffset', 121 ]; 122 123 # 124 # The following is a whitelist of classes we expect to find when scanning the 125 # source code. This list is not exhaustive, but it's still useful to identify 126 # when this script gets out of sync with the source. See load_objects(). 127 # 128 expected_classes = [ 129 'ConsString', 'FixedArray', 'HeapNumber', 'JSArray', 'JSFunction', 130 'JSObject', 'JSRegExp', 'JSValue', 'Map', 'Oddball', 'Script', 131 'SeqAsciiString', 'SharedFunctionInfo' 132 ]; 133 134 135 # 136 # The following structures store high-level representations of the structures 137 # for which we're going to emit descriptive constants. 138 # 139 types = {}; # set of all type names 140 typeclasses = {}; # maps type names to corresponding class names 141 klasses = {}; # known classes, including parents 142 fields = []; # field declarations 143 144 header = ''' 145 /* 146 * This file is generated by %s. Do not edit directly. 147 */ 148 149 #include "v8.h" 150 #include "frames.h" 151 #include "frames-inl.h" /* for architecture-specific frame constants */ 152 153 using namespace v8::internal; 154 155 extern "C" { 156 157 /* stack frame constants */ 158 #define FRAME_CONST(value, klass) \ 159 int v8dbg_frametype_##klass = StackFrame::value; 160 161 STACK_FRAME_TYPE_LIST(FRAME_CONST) 162 163 #undef FRAME_CONST 164 165 ''' % sys.argv[0]; 166 167 footer = ''' 168 } 169 ''' 170 171 # 172 # Loads class hierarchy and type information from "objects.h". 173 # 174 def load_objects(): 175 objfilename = sys.argv[2]; 176 objfile = open(objfilename, 'r'); 177 in_insttype = False; 178 179 typestr = ''; 180 181 # 182 # Construct a dictionary for the classes we're sure should be present. 183 # 184 checktypes = {}; 185 for klass in expected_classes: 186 checktypes[klass] = True; 187 188 # 189 # Iterate objects.h line-by-line to collect type and class information. 190 # For types, we accumulate a string representing the entire InstanceType 191 # enum definition and parse it later because it's easier to do so 192 # without the embedded newlines. 193 # 194 for line in objfile: 195 if (line.startswith('enum InstanceType {')): 196 in_insttype = True; 197 continue; 198 199 if (in_insttype and line.startswith('};')): 200 in_insttype = False; 201 continue; 202 203 line = re.sub('//.*', '', line.rstrip().lstrip()); 204 205 if (in_insttype): 206 typestr += line; 207 continue; 208 209 match = re.match('class (\w[^\s:]*)(: public (\w[^\s{]*))?\s*{', 210 line); 211 212 if (match): 213 klass = match.group(1); 214 pklass = match.group(3); 215 klasses[klass] = { 'parent': pklass }; 216 217 # 218 # Process the instance type declaration. 219 # 220 entries = typestr.split(','); 221 for entry in entries: 222 types[re.sub('\s*=.*', '', entry).lstrip()] = True; 223 224 # 225 # Infer class names for each type based on a systematic transformation. 226 # For example, "JS_FUNCTION_TYPE" becomes "JSFunction". We find the 227 # class for each type rather than the other way around because there are 228 # fewer cases where one type maps to more than one class than the other 229 # way around. 230 # 231 for type in types: 232 # 233 # Symbols and Strings are implemented using the same classes. 234 # 235 usetype = re.sub('SYMBOL_', 'STRING_', type); 236 237 # 238 # REGEXP behaves like REG_EXP, as in JS_REGEXP_TYPE => JSRegExp. 239 # 240 usetype = re.sub('_REGEXP_', '_REG_EXP_', usetype); 241 242 # 243 # Remove the "_TYPE" suffix and then convert to camel case, 244 # except that a "JS" prefix remains uppercase (as in 245 # "JS_FUNCTION_TYPE" => "JSFunction"). 246 # 247 if (not usetype.endswith('_TYPE')): 248 continue; 249 250 usetype = usetype[0:len(usetype) - len('_TYPE')]; 251 parts = usetype.split('_'); 252 cctype = ''; 253 254 if (parts[0] == 'JS'): 255 cctype = 'JS'; 256 start = 1; 257 else: 258 cctype = ''; 259 start = 0; 260 261 for ii in range(start, len(parts)): 262 part = parts[ii]; 263 cctype += part[0].upper() + part[1:].lower(); 264 265 # 266 # Mapping string types is more complicated. Both types and 267 # class names for Strings specify a representation (e.g., Seq, 268 # Cons, External, or Sliced) and an encoding (TwoByte or Ascii), 269 # In the simplest case, both of these are explicit in both 270 # names, as in: 271 # 272 # EXTERNAL_ASCII_STRING_TYPE => ExternalAsciiString 273 # 274 # However, either the representation or encoding can be omitted 275 # from the type name, in which case "Seq" and "TwoByte" are 276 # assumed, as in: 277 # 278 # STRING_TYPE => SeqTwoByteString 279 # 280 # Additionally, sometimes the type name has more information 281 # than the class, as in: 282 # 283 # CONS_ASCII_STRING_TYPE => ConsString 284 # 285 # To figure this out dynamically, we first check for a 286 # representation and encoding and add them if they're not 287 # present. If that doesn't yield a valid class name, then we 288 # strip out the representation. 289 # 290 if (cctype.endswith('String')): 291 if (cctype.find('Cons') == -1 and 292 cctype.find('External') == -1 and 293 cctype.find('Sliced') == -1): 294 if (cctype.find('Ascii') != -1): 295 cctype = re.sub('AsciiString$', 296 'SeqAsciiString', cctype); 297 else: 298 cctype = re.sub('String$', 299 'SeqString', cctype); 300 301 if (cctype.find('Ascii') == -1): 302 cctype = re.sub('String$', 'TwoByteString', 303 cctype); 304 305 if (not (cctype in klasses)): 306 cctype = re.sub('Ascii', '', cctype); 307 cctype = re.sub('TwoByte', '', cctype); 308 309 # 310 # Despite all that, some types have no corresponding class. 311 # 312 if (cctype in klasses): 313 typeclasses[type] = cctype; 314 if (cctype in checktypes): 315 del checktypes[cctype]; 316 317 if (len(checktypes) > 0): 318 for klass in checktypes: 319 print('error: expected class \"%s\" not found' % klass); 320 321 sys.exit(1); 322 323 324 # 325 # For a given macro call, pick apart the arguments and return an object 326 # describing the corresponding output constant. See load_fields(). 327 # 328 def parse_field(call): 329 # Replace newlines with spaces. 330 for ii in range(0, len(call)): 331 if (call[ii] == '\n'): 332 call[ii] == ' '; 333 334 idx = call.find('('); 335 kind = call[0:idx]; 336 rest = call[idx + 1: len(call) - 1]; 337 args = re.split('\s*,\s*', rest); 338 339 consts = []; 340 341 if (kind == 'ACCESSORS' or kind == 'ACCESSORS_GCSAFE'): 342 klass = args[0]; 343 field = args[1]; 344 dtype = args[2]; 345 offset = args[3]; 346 347 return ({ 348 'name': 'class_%s__%s__%s' % (klass, field, dtype), 349 'value': '%s::%s' % (klass, offset) 350 }); 351 352 assert(kind == 'SMI_ACCESSORS'); 353 klass = args[0]; 354 field = args[1]; 355 offset = args[2]; 356 357 return ({ 358 'name': 'class_%s__%s__%s' % (klass, field, 'SMI'), 359 'value': '%s::%s' % (klass, offset) 360 }); 361 362 # 363 # Load field offset information from objects-inl.h. 364 # 365 def load_fields(): 366 inlfilename = sys.argv[3]; 367 inlfile = open(inlfilename, 'r'); 368 369 # 370 # Each class's fields and the corresponding offsets are described in the 371 # source by calls to macros like "ACCESSORS" (and friends). All we do 372 # here is extract these macro invocations, taking into account that they 373 # may span multiple lines and may contain nested parentheses. We also 374 # call parse_field() to pick apart the invocation. 375 # 376 prefixes = [ 'ACCESSORS', 'ACCESSORS_GCSAFE', 'SMI_ACCESSORS' ]; 377 current = ''; 378 opens = 0; 379 380 for line in inlfile: 381 if (opens > 0): 382 # Continuation line 383 for ii in range(0, len(line)): 384 if (line[ii] == '('): 385 opens += 1; 386 elif (line[ii] == ')'): 387 opens -= 1; 388 389 if (opens == 0): 390 break; 391 392 current += line[0:ii + 1]; 393 continue; 394 395 for prefix in prefixes: 396 if (not line.startswith(prefix + '(')): 397 continue; 398 399 if (len(current) > 0): 400 fields.append(parse_field(current)); 401 current = ''; 402 403 for ii in range(len(prefix), len(line)): 404 if (line[ii] == '('): 405 opens += 1; 406 elif (line[ii] == ')'): 407 opens -= 1; 408 409 if (opens == 0): 410 break; 411 412 current += line[0:ii + 1]; 413 414 if (len(current) > 0): 415 fields.append(parse_field(current)); 416 current = ''; 417 418 for body in extras_accessors: 419 fields.append(parse_field('ACCESSORS(%s)' % body)); 420 421 # 422 # Emit a block of constants. 423 # 424 def emit_set(out, consts): 425 for ii in range(0, len(consts)): 426 out.write('int v8dbg_%s = %s;\n' % 427 (consts[ii]['name'], consts[ii]['value'])); 428 out.write('\n'); 429 430 # 431 # Emit the whole output file. 432 # 433 def emit_config(): 434 out = file(sys.argv[1], 'w'); 435 436 out.write(header); 437 438 out.write('/* miscellaneous constants */\n'); 439 emit_set(out, consts_misc); 440 441 out.write('/* class type information */\n'); 442 consts = []; 443 keys = typeclasses.keys(); 444 keys.sort(); 445 for typename in keys: 446 klass = typeclasses[typename]; 447 consts.append({ 448 'name': 'type_%s__%s' % (klass, typename), 449 'value': typename 450 }); 451 452 emit_set(out, consts); 453 454 out.write('/* class hierarchy information */\n'); 455 consts = []; 456 keys = klasses.keys(); 457 keys.sort(); 458 for klassname in keys: 459 pklass = klasses[klassname]['parent']; 460 if (pklass == None): 461 continue; 462 463 consts.append({ 464 'name': 'parent_%s__%s' % (klassname, pklass), 465 'value': 0 466 }); 467 468 emit_set(out, consts); 469 470 out.write('/* field information */\n'); 471 emit_set(out, fields); 472 473 out.write(footer); 474 475 if (len(sys.argv) < 4): 476 print('usage: %s output.cc objects.h objects-inl.h' % sys.argv[0]); 477 sys.exit(2); 478 479 load_objects(); 480 load_fields(); 481 emit_config(); 482