1 #!/usr/bin/env python 2 3 # 4 # Copyright 2012 the V8 project authors. All rights reserved. 5 # Redistribution and use in source and binary forms, with or without 6 # modification, are permitted provided that the following conditions are 7 # met: 8 # 9 # * Redistributions of source code must retain the above copyright 10 # notice, this list of conditions and the following disclaimer. 11 # * Redistributions in binary form must reproduce the above 12 # copyright notice, this list of conditions and the following 13 # disclaimer in the documentation and/or other materials provided 14 # with the distribution. 15 # * Neither the name of Google Inc. nor the names of its 16 # contributors may be used to endorse or promote products derived 17 # from this software without specific prior written permission. 18 # 19 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 # 31 32 # 33 # Emits a C++ file to be compiled and linked into libv8 to support postmortem 34 # debugging tools. Most importantly, this tool emits constants describing V8 35 # internals: 36 # 37 # v8dbg_type_CLASS__TYPE = VALUE Describes class type values 38 # v8dbg_class_CLASS__FIELD__TYPE = OFFSET Describes class fields 39 # v8dbg_parent_CLASS__PARENT Describes class hierarchy 40 # v8dbg_frametype_NAME = VALUE Describes stack frame values 41 # v8dbg_off_fp_NAME = OFFSET Frame pointer offsets 42 # v8dbg_prop_NAME = OFFSET Object property offsets 43 # v8dbg_NAME = VALUE Miscellaneous values 44 # 45 # These constants are declared as global integers so that they'll be present in 46 # the generated libv8 binary. 47 # 48 49 import re 50 import sys 51 52 # 53 # Miscellaneous constants, tags, and masks used for object identification. 54 # 55 consts_misc = [ 56 { 'name': 'FirstNonstringType', 'value': 'FIRST_NONSTRING_TYPE' }, 57 58 { 'name': 'IsNotStringMask', 'value': 'kIsNotStringMask' }, 59 { 'name': 'StringTag', 'value': 'kStringTag' }, 60 { 'name': 'NotStringTag', 'value': 'kNotStringTag' }, 61 62 { 'name': 'StringEncodingMask', 'value': 'kStringEncodingMask' }, 63 { 'name': 'TwoByteStringTag', 'value': 'kTwoByteStringTag' }, 64 { 'name': 'AsciiStringTag', 'value': 'kOneByteStringTag' }, 65 66 { 'name': 'StringRepresentationMask', 67 'value': 'kStringRepresentationMask' }, 68 { 'name': 'SeqStringTag', 'value': 'kSeqStringTag' }, 69 { 'name': 'ConsStringTag', 'value': 'kConsStringTag' }, 70 { 'name': 'ExternalStringTag', 'value': 'kExternalStringTag' }, 71 { 'name': 'SlicedStringTag', 'value': 'kSlicedStringTag' }, 72 73 { 'name': 'FailureTag', 'value': 'kFailureTag' }, 74 { 'name': 'FailureTagMask', 'value': 'kFailureTagMask' }, 75 { 'name': 'HeapObjectTag', 'value': 'kHeapObjectTag' }, 76 { 'name': 'HeapObjectTagMask', 'value': 'kHeapObjectTagMask' }, 77 { 'name': 'SmiTag', 'value': 'kSmiTag' }, 78 { 'name': 'SmiTagMask', 'value': 'kSmiTagMask' }, 79 { 'name': 'SmiValueShift', 'value': 'kSmiTagSize' }, 80 { 'name': 'SmiShiftSize', 'value': 'kSmiShiftSize' }, 81 { 'name': 'PointerSizeLog2', 'value': 'kPointerSizeLog2' }, 82 83 { 'name': 'prop_idx_first', 84 'value': 'DescriptorArray::kFirstIndex' }, 85 { 'name': 'prop_type_field', 86 'value': 'FIELD' }, 87 { 'name': 'prop_type_first_phantom', 88 'value': 'TRANSITION' }, 89 { 'name': 'prop_type_mask', 90 'value': 'PropertyDetails::TypeField::kMask' }, 91 92 { 'name': 'prop_desc_key', 93 'value': 'DescriptorArray::kDescriptorKey' }, 94 { 'name': 'prop_desc_details', 95 'value': 'DescriptorArray::kDescriptorDetails' }, 96 { 'name': 'prop_desc_value', 97 'value': 'DescriptorArray::kDescriptorValue' }, 98 { 'name': 'prop_desc_size', 99 'value': 'DescriptorArray::kDescriptorSize' }, 100 101 { 'name': 'off_fp_context', 102 'value': 'StandardFrameConstants::kContextOffset' }, 103 { 'name': 'off_fp_marker', 104 'value': 'StandardFrameConstants::kMarkerOffset' }, 105 { 'name': 'off_fp_function', 106 'value': 'JavaScriptFrameConstants::kFunctionOffset' }, 107 { 'name': 'off_fp_args', 108 'value': 'JavaScriptFrameConstants::kLastParameterOffset' }, 109 ]; 110 111 # 112 # The following useful fields are missing accessors, so we define fake ones. 113 # 114 extras_accessors = [ 115 'HeapObject, map, Map, kMapOffset', 116 'JSObject, elements, Object, kElementsOffset', 117 'FixedArray, data, uintptr_t, kHeaderSize', 118 'Map, instance_attributes, int, kInstanceAttributesOffset', 119 'Map, inobject_properties, int, kInObjectPropertiesOffset', 120 'Map, instance_size, int, kInstanceSizeOffset', 121 'HeapNumber, value, double, kValueOffset', 122 'ConsString, first, String, kFirstOffset', 123 'ConsString, second, String, kSecondOffset', 124 'ExternalString, resource, Object, kResourceOffset', 125 'SeqOneByteString, chars, char, kHeaderSize', 126 'SeqTwoByteString, chars, char, kHeaderSize', 127 'SharedFunctionInfo, code, Code, kCodeOffset', 128 'SlicedString, parent, String, kParentOffset', 129 'Code, instruction_start, uintptr_t, kHeaderSize', 130 'Code, instruction_size, int, kInstructionSizeOffset', 131 ]; 132 133 # 134 # The following is a whitelist of classes we expect to find when scanning the 135 # source code. This list is not exhaustive, but it's still useful to identify 136 # when this script gets out of sync with the source. See load_objects(). 137 # 138 expected_classes = [ 139 'ConsString', 'FixedArray', 'HeapNumber', 'JSArray', 'JSFunction', 140 'JSObject', 'JSRegExp', 'JSValue', 'Map', 'Oddball', 'Script', 141 'SeqOneByteString', 'SharedFunctionInfo' 142 ]; 143 144 145 # 146 # The following structures store high-level representations of the structures 147 # for which we're going to emit descriptive constants. 148 # 149 types = {}; # set of all type names 150 typeclasses = {}; # maps type names to corresponding class names 151 klasses = {}; # known classes, including parents 152 fields = []; # field declarations 153 154 header = ''' 155 /* 156 * This file is generated by %s. Do not edit directly. 157 */ 158 159 #include "v8.h" 160 #include "frames.h" 161 #include "frames-inl.h" /* for architecture-specific frame constants */ 162 163 using namespace v8::internal; 164 165 extern "C" { 166 167 /* stack frame constants */ 168 #define FRAME_CONST(value, klass) \ 169 int v8dbg_frametype_##klass = StackFrame::value; 170 171 STACK_FRAME_TYPE_LIST(FRAME_CONST) 172 173 #undef FRAME_CONST 174 175 ''' % sys.argv[0]; 176 177 footer = ''' 178 } 179 ''' 180 181 # 182 # Loads class hierarchy and type information from "objects.h". 183 # 184 def load_objects(): 185 objfilename = sys.argv[2]; 186 objfile = open(objfilename, 'r'); 187 in_insttype = False; 188 189 typestr = ''; 190 191 # 192 # Construct a dictionary for the classes we're sure should be present. 193 # 194 checktypes = {}; 195 for klass in expected_classes: 196 checktypes[klass] = True; 197 198 # 199 # Iterate objects.h line-by-line to collect type and class information. 200 # For types, we accumulate a string representing the entire InstanceType 201 # enum definition and parse it later because it's easier to do so 202 # without the embedded newlines. 203 # 204 for line in objfile: 205 if (line.startswith('enum InstanceType {')): 206 in_insttype = True; 207 continue; 208 209 if (in_insttype and line.startswith('};')): 210 in_insttype = False; 211 continue; 212 213 line = re.sub('//.*', '', line.rstrip().lstrip()); 214 215 if (in_insttype): 216 typestr += line; 217 continue; 218 219 match = re.match('class (\w[^\s:]*)(: public (\w[^\s{]*))?\s*{', 220 line); 221 222 if (match): 223 klass = match.group(1); 224 pklass = match.group(3); 225 klasses[klass] = { 'parent': pklass }; 226 227 # 228 # Process the instance type declaration. 229 # 230 entries = typestr.split(','); 231 for entry in entries: 232 types[re.sub('\s*=.*', '', entry).lstrip()] = True; 233 234 # 235 # Infer class names for each type based on a systematic transformation. 236 # For example, "JS_FUNCTION_TYPE" becomes "JSFunction". We find the 237 # class for each type rather than the other way around because there are 238 # fewer cases where one type maps to more than one class than the other 239 # way around. 240 # 241 for type in types: 242 # 243 # Symbols and Strings are implemented using the same classes. 244 # 245 usetype = re.sub('SYMBOL_', 'STRING_', type); 246 247 # 248 # REGEXP behaves like REG_EXP, as in JS_REGEXP_TYPE => JSRegExp. 249 # 250 usetype = re.sub('_REGEXP_', '_REG_EXP_', usetype); 251 252 # 253 # Remove the "_TYPE" suffix and then convert to camel case, 254 # except that a "JS" prefix remains uppercase (as in 255 # "JS_FUNCTION_TYPE" => "JSFunction"). 256 # 257 if (not usetype.endswith('_TYPE')): 258 continue; 259 260 usetype = usetype[0:len(usetype) - len('_TYPE')]; 261 parts = usetype.split('_'); 262 cctype = ''; 263 264 if (parts[0] == 'JS'): 265 cctype = 'JS'; 266 start = 1; 267 else: 268 cctype = ''; 269 start = 0; 270 271 for ii in range(start, len(parts)): 272 part = parts[ii]; 273 cctype += part[0].upper() + part[1:].lower(); 274 275 # 276 # Mapping string types is more complicated. Both types and 277 # class names for Strings specify a representation (e.g., Seq, 278 # Cons, External, or Sliced) and an encoding (TwoByte or Ascii), 279 # In the simplest case, both of these are explicit in both 280 # names, as in: 281 # 282 # EXTERNAL_ASCII_STRING_TYPE => ExternalAsciiString 283 # 284 # However, either the representation or encoding can be omitted 285 # from the type name, in which case "Seq" and "TwoByte" are 286 # assumed, as in: 287 # 288 # STRING_TYPE => SeqTwoByteString 289 # 290 # Additionally, sometimes the type name has more information 291 # than the class, as in: 292 # 293 # CONS_ASCII_STRING_TYPE => ConsString 294 # 295 # To figure this out dynamically, we first check for a 296 # representation and encoding and add them if they're not 297 # present. If that doesn't yield a valid class name, then we 298 # strip out the representation. 299 # 300 if (cctype.endswith('String')): 301 if (cctype.find('Cons') == -1 and 302 cctype.find('External') == -1 and 303 cctype.find('Sliced') == -1): 304 if (cctype.find('Ascii') != -1): 305 cctype = re.sub('AsciiString$', 306 'SeqOneByteString', cctype); 307 else: 308 cctype = re.sub('String$', 309 'SeqString', cctype); 310 311 if (cctype.find('Ascii') == -1): 312 cctype = re.sub('String$', 'TwoByteString', 313 cctype); 314 315 if (not (cctype in klasses)): 316 cctype = re.sub('Ascii', '', cctype); 317 cctype = re.sub('TwoByte', '', cctype); 318 319 # 320 # Despite all that, some types have no corresponding class. 321 # 322 if (cctype in klasses): 323 typeclasses[type] = cctype; 324 if (cctype in checktypes): 325 del checktypes[cctype]; 326 327 if (len(checktypes) > 0): 328 for klass in checktypes: 329 print('error: expected class \"%s\" not found' % klass); 330 331 sys.exit(1); 332 333 334 # 335 # For a given macro call, pick apart the arguments and return an object 336 # describing the corresponding output constant. See load_fields(). 337 # 338 def parse_field(call): 339 # Replace newlines with spaces. 340 for ii in range(0, len(call)): 341 if (call[ii] == '\n'): 342 call[ii] == ' '; 343 344 idx = call.find('('); 345 kind = call[0:idx]; 346 rest = call[idx + 1: len(call) - 1]; 347 args = re.split('\s*,\s*', rest); 348 349 consts = []; 350 351 if (kind == 'ACCESSORS' or kind == 'ACCESSORS_GCSAFE'): 352 klass = args[0]; 353 field = args[1]; 354 dtype = args[2]; 355 offset = args[3]; 356 357 return ({ 358 'name': 'class_%s__%s__%s' % (klass, field, dtype), 359 'value': '%s::%s' % (klass, offset) 360 }); 361 362 assert(kind == 'SMI_ACCESSORS'); 363 klass = args[0]; 364 field = args[1]; 365 offset = args[2]; 366 367 return ({ 368 'name': 'class_%s__%s__%s' % (klass, field, 'SMI'), 369 'value': '%s::%s' % (klass, offset) 370 }); 371 372 # 373 # Load field offset information from objects-inl.h. 374 # 375 def load_fields(): 376 inlfilename = sys.argv[3]; 377 inlfile = open(inlfilename, 'r'); 378 379 # 380 # Each class's fields and the corresponding offsets are described in the 381 # source by calls to macros like "ACCESSORS" (and friends). All we do 382 # here is extract these macro invocations, taking into account that they 383 # may span multiple lines and may contain nested parentheses. We also 384 # call parse_field() to pick apart the invocation. 385 # 386 prefixes = [ 'ACCESSORS', 'ACCESSORS_GCSAFE', 'SMI_ACCESSORS' ]; 387 current = ''; 388 opens = 0; 389 390 for line in inlfile: 391 if (opens > 0): 392 # Continuation line 393 for ii in range(0, len(line)): 394 if (line[ii] == '('): 395 opens += 1; 396 elif (line[ii] == ')'): 397 opens -= 1; 398 399 if (opens == 0): 400 break; 401 402 current += line[0:ii + 1]; 403 continue; 404 405 for prefix in prefixes: 406 if (not line.startswith(prefix + '(')): 407 continue; 408 409 if (len(current) > 0): 410 fields.append(parse_field(current)); 411 current = ''; 412 413 for ii in range(len(prefix), len(line)): 414 if (line[ii] == '('): 415 opens += 1; 416 elif (line[ii] == ')'): 417 opens -= 1; 418 419 if (opens == 0): 420 break; 421 422 current += line[0:ii + 1]; 423 424 if (len(current) > 0): 425 fields.append(parse_field(current)); 426 current = ''; 427 428 for body in extras_accessors: 429 fields.append(parse_field('ACCESSORS(%s)' % body)); 430 431 # 432 # Emit a block of constants. 433 # 434 def emit_set(out, consts): 435 for ii in range(0, len(consts)): 436 out.write('int v8dbg_%s = %s;\n' % 437 (consts[ii]['name'], consts[ii]['value'])); 438 out.write('\n'); 439 440 # 441 # Emit the whole output file. 442 # 443 def emit_config(): 444 out = file(sys.argv[1], 'w'); 445 446 out.write(header); 447 448 out.write('/* miscellaneous constants */\n'); 449 emit_set(out, consts_misc); 450 451 out.write('/* class type information */\n'); 452 consts = []; 453 keys = typeclasses.keys(); 454 keys.sort(); 455 for typename in keys: 456 klass = typeclasses[typename]; 457 consts.append({ 458 'name': 'type_%s__%s' % (klass, typename), 459 'value': typename 460 }); 461 462 emit_set(out, consts); 463 464 out.write('/* class hierarchy information */\n'); 465 consts = []; 466 keys = klasses.keys(); 467 keys.sort(); 468 for klassname in keys: 469 pklass = klasses[klassname]['parent']; 470 if (pklass == None): 471 continue; 472 473 consts.append({ 474 'name': 'parent_%s__%s' % (klassname, pklass), 475 'value': 0 476 }); 477 478 emit_set(out, consts); 479 480 out.write('/* field information */\n'); 481 emit_set(out, fields); 482 483 out.write(footer); 484 485 if (len(sys.argv) < 4): 486 print('usage: %s output.cc objects.h objects-inl.h' % sys.argv[0]); 487 sys.exit(2); 488 489 load_objects(); 490 load_fields(); 491 emit_config(); 492