1 #!/usr/bin/env python 2 3 # 4 # Copyright 2012 the V8 project authors. All rights reserved. 5 # Redistribution and use in source and binary forms, with or without 6 # modification, are permitted provided that the following conditions are 7 # met: 8 # 9 # * Redistributions of source code must retain the above copyright 10 # notice, this list of conditions and the following disclaimer. 11 # * Redistributions in binary form must reproduce the above 12 # copyright notice, this list of conditions and the following 13 # disclaimer in the documentation and/or other materials provided 14 # with the distribution. 15 # * Neither the name of Google Inc. nor the names of its 16 # contributors may be used to endorse or promote products derived 17 # from this software without specific prior written permission. 18 # 19 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 # 31 32 # 33 # Emits a C++ file to be compiled and linked into libv8 to support postmortem 34 # debugging tools. Most importantly, this tool emits constants describing V8 35 # internals: 36 # 37 # v8dbg_type_CLASS__TYPE = VALUE Describes class type values 38 # v8dbg_class_CLASS__FIELD__TYPE = OFFSET Describes class fields 39 # v8dbg_parent_CLASS__PARENT Describes class hierarchy 40 # v8dbg_frametype_NAME = VALUE Describes stack frame values 41 # v8dbg_off_fp_NAME = OFFSET Frame pointer offsets 42 # v8dbg_prop_NAME = OFFSET Object property offsets 43 # v8dbg_NAME = VALUE Miscellaneous values 44 # 45 # These constants are declared as global integers so that they'll be present in 46 # the generated libv8 binary. 47 # 48 49 import re 50 import sys 51 52 # 53 # Miscellaneous constants, tags, and masks used for object identification. 54 # 55 consts_misc = [ 56 { 'name': 'FirstNonstringType', 'value': 'FIRST_NONSTRING_TYPE' }, 57 58 { 'name': 'IsNotStringMask', 'value': 'kIsNotStringMask' }, 59 { 'name': 'StringTag', 'value': 'kStringTag' }, 60 { 'name': 'NotStringTag', 'value': 'kNotStringTag' }, 61 62 { 'name': 'StringEncodingMask', 'value': 'kStringEncodingMask' }, 63 { 'name': 'TwoByteStringTag', 'value': 'kTwoByteStringTag' }, 64 { 'name': 'AsciiStringTag', 'value': 'kOneByteStringTag' }, 65 66 { 'name': 'StringRepresentationMask', 67 'value': 'kStringRepresentationMask' }, 68 { 'name': 'SeqStringTag', 'value': 'kSeqStringTag' }, 69 { 'name': 'ConsStringTag', 'value': 'kConsStringTag' }, 70 { 'name': 'ExternalStringTag', 'value': 'kExternalStringTag' }, 71 { 'name': 'SlicedStringTag', 'value': 'kSlicedStringTag' }, 72 73 { 'name': 'FailureTag', 'value': 'kFailureTag' }, 74 { 'name': 'FailureTagMask', 'value': 'kFailureTagMask' }, 75 { 'name': 'HeapObjectTag', 'value': 'kHeapObjectTag' }, 76 { 'name': 'HeapObjectTagMask', 'value': 'kHeapObjectTagMask' }, 77 { 'name': 'SmiTag', 'value': 'kSmiTag' }, 78 { 'name': 'SmiTagMask', 'value': 'kSmiTagMask' }, 79 { 'name': 'SmiValueShift', 'value': 'kSmiTagSize' }, 80 { 'name': 'SmiShiftSize', 'value': 'kSmiShiftSize' }, 81 { 'name': 'PointerSizeLog2', 'value': 'kPointerSizeLog2' }, 82 83 { 'name': 'prop_idx_first', 84 'value': 'DescriptorArray::kFirstIndex' }, 85 { 'name': 'prop_type_field', 86 'value': 'FIELD' }, 87 { 'name': 'prop_type_first_phantom', 88 'value': 'TRANSITION' }, 89 { 'name': 'prop_type_mask', 90 'value': 'PropertyDetails::TypeField::kMask' }, 91 92 { 'name': 'prop_desc_key', 93 'value': 'DescriptorArray::kDescriptorKey' }, 94 { 'name': 'prop_desc_details', 95 'value': 'DescriptorArray::kDescriptorDetails' }, 96 { 'name': 'prop_desc_value', 97 'value': 'DescriptorArray::kDescriptorValue' }, 98 { 'name': 'prop_desc_size', 99 'value': 'DescriptorArray::kDescriptorSize' }, 100 101 { 'name': 'off_fp_context', 102 'value': 'StandardFrameConstants::kContextOffset' }, 103 { 'name': 'off_fp_constant_pool', 104 'value': 'StandardFrameConstants::kConstantPoolOffset' }, 105 { 'name': 'off_fp_marker', 106 'value': 'StandardFrameConstants::kMarkerOffset' }, 107 { 'name': 'off_fp_function', 108 'value': 'JavaScriptFrameConstants::kFunctionOffset' }, 109 { 'name': 'off_fp_args', 110 'value': 'JavaScriptFrameConstants::kLastParameterOffset' }, 111 ]; 112 113 # 114 # The following useful fields are missing accessors, so we define fake ones. 115 # 116 extras_accessors = [ 117 'HeapObject, map, Map, kMapOffset', 118 'JSObject, elements, Object, kElementsOffset', 119 'FixedArray, data, uintptr_t, kHeaderSize', 120 'Map, instance_attributes, int, kInstanceAttributesOffset', 121 'Map, inobject_properties, int, kInObjectPropertiesOffset', 122 'Map, instance_size, int, kInstanceSizeOffset', 123 'HeapNumber, value, double, kValueOffset', 124 'ConsString, first, String, kFirstOffset', 125 'ConsString, second, String, kSecondOffset', 126 'ExternalString, resource, Object, kResourceOffset', 127 'SeqOneByteString, chars, char, kHeaderSize', 128 'SeqTwoByteString, chars, char, kHeaderSize', 129 'SharedFunctionInfo, code, Code, kCodeOffset', 130 'SlicedString, parent, String, kParentOffset', 131 'Code, instruction_start, uintptr_t, kHeaderSize', 132 'Code, instruction_size, int, kInstructionSizeOffset', 133 ]; 134 135 # 136 # The following is a whitelist of classes we expect to find when scanning the 137 # source code. This list is not exhaustive, but it's still useful to identify 138 # when this script gets out of sync with the source. See load_objects(). 139 # 140 expected_classes = [ 141 'ConsString', 'FixedArray', 'HeapNumber', 'JSArray', 'JSFunction', 142 'JSObject', 'JSRegExp', 'JSValue', 'Map', 'Oddball', 'Script', 143 'SeqOneByteString', 'SharedFunctionInfo' 144 ]; 145 146 147 # 148 # The following structures store high-level representations of the structures 149 # for which we're going to emit descriptive constants. 150 # 151 types = {}; # set of all type names 152 typeclasses = {}; # maps type names to corresponding class names 153 klasses = {}; # known classes, including parents 154 fields = []; # field declarations 155 156 header = ''' 157 /* 158 * This file is generated by %s. Do not edit directly. 159 */ 160 161 #include "v8.h" 162 #include "frames.h" 163 #include "frames-inl.h" /* for architecture-specific frame constants */ 164 165 using namespace v8::internal; 166 167 extern "C" { 168 169 /* stack frame constants */ 170 #define FRAME_CONST(value, klass) \ 171 int v8dbg_frametype_##klass = StackFrame::value; 172 173 STACK_FRAME_TYPE_LIST(FRAME_CONST) 174 175 #undef FRAME_CONST 176 177 ''' % sys.argv[0]; 178 179 footer = ''' 180 } 181 ''' 182 183 # 184 # Loads class hierarchy and type information from "objects.h". 185 # 186 def load_objects(): 187 objfilename = sys.argv[2]; 188 objfile = open(objfilename, 'r'); 189 in_insttype = False; 190 191 typestr = ''; 192 193 # 194 # Construct a dictionary for the classes we're sure should be present. 195 # 196 checktypes = {}; 197 for klass in expected_classes: 198 checktypes[klass] = True; 199 200 # 201 # Iterate objects.h line-by-line to collect type and class information. 202 # For types, we accumulate a string representing the entire InstanceType 203 # enum definition and parse it later because it's easier to do so 204 # without the embedded newlines. 205 # 206 for line in objfile: 207 if (line.startswith('enum InstanceType {')): 208 in_insttype = True; 209 continue; 210 211 if (in_insttype and line.startswith('};')): 212 in_insttype = False; 213 continue; 214 215 line = re.sub('//.*', '', line.rstrip().lstrip()); 216 217 if (in_insttype): 218 typestr += line; 219 continue; 220 221 match = re.match('class (\w[^\s:]*)(: public (\w[^\s{]*))?\s*{', 222 line); 223 224 if (match): 225 klass = match.group(1); 226 pklass = match.group(3); 227 klasses[klass] = { 'parent': pklass }; 228 229 # 230 # Process the instance type declaration. 231 # 232 entries = typestr.split(','); 233 for entry in entries: 234 types[re.sub('\s*=.*', '', entry).lstrip()] = True; 235 236 # 237 # Infer class names for each type based on a systematic transformation. 238 # For example, "JS_FUNCTION_TYPE" becomes "JSFunction". We find the 239 # class for each type rather than the other way around because there are 240 # fewer cases where one type maps to more than one class than the other 241 # way around. 242 # 243 for type in types: 244 # 245 # Symbols and Strings are implemented using the same classes. 246 # 247 usetype = re.sub('SYMBOL_', 'STRING_', type); 248 249 # 250 # REGEXP behaves like REG_EXP, as in JS_REGEXP_TYPE => JSRegExp. 251 # 252 usetype = re.sub('_REGEXP_', '_REG_EXP_', usetype); 253 254 # 255 # Remove the "_TYPE" suffix and then convert to camel case, 256 # except that a "JS" prefix remains uppercase (as in 257 # "JS_FUNCTION_TYPE" => "JSFunction"). 258 # 259 if (not usetype.endswith('_TYPE')): 260 continue; 261 262 usetype = usetype[0:len(usetype) - len('_TYPE')]; 263 parts = usetype.split('_'); 264 cctype = ''; 265 266 if (parts[0] == 'JS'): 267 cctype = 'JS'; 268 start = 1; 269 else: 270 cctype = ''; 271 start = 0; 272 273 for ii in range(start, len(parts)): 274 part = parts[ii]; 275 cctype += part[0].upper() + part[1:].lower(); 276 277 # 278 # Mapping string types is more complicated. Both types and 279 # class names for Strings specify a representation (e.g., Seq, 280 # Cons, External, or Sliced) and an encoding (TwoByte or Ascii), 281 # In the simplest case, both of these are explicit in both 282 # names, as in: 283 # 284 # EXTERNAL_ASCII_STRING_TYPE => ExternalAsciiString 285 # 286 # However, either the representation or encoding can be omitted 287 # from the type name, in which case "Seq" and "TwoByte" are 288 # assumed, as in: 289 # 290 # STRING_TYPE => SeqTwoByteString 291 # 292 # Additionally, sometimes the type name has more information 293 # than the class, as in: 294 # 295 # CONS_ASCII_STRING_TYPE => ConsString 296 # 297 # To figure this out dynamically, we first check for a 298 # representation and encoding and add them if they're not 299 # present. If that doesn't yield a valid class name, then we 300 # strip out the representation. 301 # 302 if (cctype.endswith('String')): 303 if (cctype.find('Cons') == -1 and 304 cctype.find('External') == -1 and 305 cctype.find('Sliced') == -1): 306 if (cctype.find('Ascii') != -1): 307 cctype = re.sub('AsciiString$', 308 'SeqOneByteString', cctype); 309 else: 310 cctype = re.sub('String$', 311 'SeqString', cctype); 312 313 if (cctype.find('Ascii') == -1): 314 cctype = re.sub('String$', 'TwoByteString', 315 cctype); 316 317 if (not (cctype in klasses)): 318 cctype = re.sub('Ascii', '', cctype); 319 cctype = re.sub('TwoByte', '', cctype); 320 321 # 322 # Despite all that, some types have no corresponding class. 323 # 324 if (cctype in klasses): 325 typeclasses[type] = cctype; 326 if (cctype in checktypes): 327 del checktypes[cctype]; 328 329 if (len(checktypes) > 0): 330 for klass in checktypes: 331 print('error: expected class \"%s\" not found' % klass); 332 333 sys.exit(1); 334 335 336 # 337 # For a given macro call, pick apart the arguments and return an object 338 # describing the corresponding output constant. See load_fields(). 339 # 340 def parse_field(call): 341 # Replace newlines with spaces. 342 for ii in range(0, len(call)): 343 if (call[ii] == '\n'): 344 call[ii] == ' '; 345 346 idx = call.find('('); 347 kind = call[0:idx]; 348 rest = call[idx + 1: len(call) - 1]; 349 args = re.split('\s*,\s*', rest); 350 351 consts = []; 352 353 if (kind == 'ACCESSORS' or kind == 'ACCESSORS_GCSAFE'): 354 klass = args[0]; 355 field = args[1]; 356 dtype = args[2]; 357 offset = args[3]; 358 359 return ({ 360 'name': 'class_%s__%s__%s' % (klass, field, dtype), 361 'value': '%s::%s' % (klass, offset) 362 }); 363 364 assert(kind == 'SMI_ACCESSORS'); 365 klass = args[0]; 366 field = args[1]; 367 offset = args[2]; 368 369 return ({ 370 'name': 'class_%s__%s__%s' % (klass, field, 'SMI'), 371 'value': '%s::%s' % (klass, offset) 372 }); 373 374 # 375 # Load field offset information from objects-inl.h. 376 # 377 def load_fields(): 378 inlfilename = sys.argv[3]; 379 inlfile = open(inlfilename, 'r'); 380 381 # 382 # Each class's fields and the corresponding offsets are described in the 383 # source by calls to macros like "ACCESSORS" (and friends). All we do 384 # here is extract these macro invocations, taking into account that they 385 # may span multiple lines and may contain nested parentheses. We also 386 # call parse_field() to pick apart the invocation. 387 # 388 prefixes = [ 'ACCESSORS', 'ACCESSORS_GCSAFE', 'SMI_ACCESSORS' ]; 389 current = ''; 390 opens = 0; 391 392 for line in inlfile: 393 if (opens > 0): 394 # Continuation line 395 for ii in range(0, len(line)): 396 if (line[ii] == '('): 397 opens += 1; 398 elif (line[ii] == ')'): 399 opens -= 1; 400 401 if (opens == 0): 402 break; 403 404 current += line[0:ii + 1]; 405 continue; 406 407 for prefix in prefixes: 408 if (not line.startswith(prefix + '(')): 409 continue; 410 411 if (len(current) > 0): 412 fields.append(parse_field(current)); 413 current = ''; 414 415 for ii in range(len(prefix), len(line)): 416 if (line[ii] == '('): 417 opens += 1; 418 elif (line[ii] == ')'): 419 opens -= 1; 420 421 if (opens == 0): 422 break; 423 424 current += line[0:ii + 1]; 425 426 if (len(current) > 0): 427 fields.append(parse_field(current)); 428 current = ''; 429 430 for body in extras_accessors: 431 fields.append(parse_field('ACCESSORS(%s)' % body)); 432 433 # 434 # Emit a block of constants. 435 # 436 def emit_set(out, consts): 437 # Fix up overzealous parses. This could be done inside the 438 # parsers but as there are several, it's easiest to do it here. 439 ws = re.compile('\s+') 440 for const in consts: 441 name = ws.sub('', const['name']) 442 value = ws.sub('', str(const['value'])) # Can be a number. 443 out.write('int v8dbg_%s = %s;\n' % (name, value)) 444 out.write('\n'); 445 446 # 447 # Emit the whole output file. 448 # 449 def emit_config(): 450 out = file(sys.argv[1], 'w'); 451 452 out.write(header); 453 454 out.write('/* miscellaneous constants */\n'); 455 emit_set(out, consts_misc); 456 457 out.write('/* class type information */\n'); 458 consts = []; 459 keys = typeclasses.keys(); 460 keys.sort(); 461 for typename in keys: 462 klass = typeclasses[typename]; 463 consts.append({ 464 'name': 'type_%s__%s' % (klass, typename), 465 'value': typename 466 }); 467 468 emit_set(out, consts); 469 470 out.write('/* class hierarchy information */\n'); 471 consts = []; 472 keys = klasses.keys(); 473 keys.sort(); 474 for klassname in keys: 475 pklass = klasses[klassname]['parent']; 476 if (pklass == None): 477 continue; 478 479 consts.append({ 480 'name': 'parent_%s__%s' % (klassname, pklass), 481 'value': 0 482 }); 483 484 emit_set(out, consts); 485 486 out.write('/* field information */\n'); 487 emit_set(out, fields); 488 489 out.write(footer); 490 491 if (len(sys.argv) < 4): 492 print('usage: %s output.cc objects.h objects-inl.h' % sys.argv[0]); 493 sys.exit(2); 494 495 load_objects(); 496 load_fields(); 497 emit_config(); 498