1 #!/usr/bin/env python 2 3 # 4 # Copyright 2012 the V8 project authors. All rights reserved. 5 # Redistribution and use in source and binary forms, with or without 6 # modification, are permitted provided that the following conditions are 7 # met: 8 # 9 # * Redistributions of source code must retain the above copyright 10 # notice, this list of conditions and the following disclaimer. 11 # * Redistributions in binary form must reproduce the above 12 # copyright notice, this list of conditions and the following 13 # disclaimer in the documentation and/or other materials provided 14 # with the distribution. 15 # * Neither the name of Google Inc. nor the names of its 16 # contributors may be used to endorse or promote products derived 17 # from this software without specific prior written permission. 18 # 19 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 # 31 32 # 33 # Emits a C++ file to be compiled and linked into libv8 to support postmortem 34 # debugging tools. Most importantly, this tool emits constants describing V8 35 # internals: 36 # 37 # v8dbg_type_CLASS__TYPE = VALUE Describes class type values 38 # v8dbg_class_CLASS__FIELD__TYPE = OFFSET Describes class fields 39 # v8dbg_parent_CLASS__PARENT Describes class hierarchy 40 # v8dbg_frametype_NAME = VALUE Describes stack frame values 41 # v8dbg_off_fp_NAME = OFFSET Frame pointer offsets 42 # v8dbg_prop_NAME = OFFSET Object property offsets 43 # v8dbg_NAME = VALUE Miscellaneous values 44 # 45 # These constants are declared as global integers so that they'll be present in 46 # the generated libv8 binary. 47 # 48 49 import re 50 import sys 51 52 # 53 # Miscellaneous constants, tags, and masks used for object identification. 54 # 55 consts_misc = [ 56 { 'name': 'FirstNonstringType', 'value': 'FIRST_NONSTRING_TYPE' }, 57 58 { 'name': 'IsNotStringMask', 'value': 'kIsNotStringMask' }, 59 { 'name': 'StringTag', 'value': 'kStringTag' }, 60 { 'name': 'NotStringTag', 'value': 'kNotStringTag' }, 61 62 { 'name': 'StringEncodingMask', 'value': 'kStringEncodingMask' }, 63 { 'name': 'TwoByteStringTag', 'value': 'kTwoByteStringTag' }, 64 { 'name': 'AsciiStringTag', 'value': 'kOneByteStringTag' }, 65 66 { 'name': 'StringRepresentationMask', 67 'value': 'kStringRepresentationMask' }, 68 { 'name': 'SeqStringTag', 'value': 'kSeqStringTag' }, 69 { 'name': 'ConsStringTag', 'value': 'kConsStringTag' }, 70 { 'name': 'ExternalStringTag', 'value': 'kExternalStringTag' }, 71 72 { 'name': 'FailureTag', 'value': 'kFailureTag' }, 73 { 'name': 'FailureTagMask', 'value': 'kFailureTagMask' }, 74 { 'name': 'HeapObjectTag', 'value': 'kHeapObjectTag' }, 75 { 'name': 'HeapObjectTagMask', 'value': 'kHeapObjectTagMask' }, 76 { 'name': 'SmiTag', 'value': 'kSmiTag' }, 77 { 'name': 'SmiTagMask', 'value': 'kSmiTagMask' }, 78 { 'name': 'SmiValueShift', 'value': 'kSmiTagSize' }, 79 { 'name': 'SmiShiftSize', 'value': 'kSmiShiftSize' }, 80 { 'name': 'PointerSizeLog2', 'value': 'kPointerSizeLog2' }, 81 82 { 'name': 'prop_idx_first', 83 'value': 'DescriptorArray::kFirstIndex' }, 84 { 'name': 'prop_type_field', 85 'value': 'FIELD' }, 86 { 'name': 'prop_type_first_phantom', 87 'value': 'TRANSITION' }, 88 { 'name': 'prop_type_mask', 89 'value': 'PropertyDetails::TypeField::kMask' }, 90 91 { 'name': 'off_fp_context', 92 'value': 'StandardFrameConstants::kContextOffset' }, 93 { 'name': 'off_fp_marker', 94 'value': 'StandardFrameConstants::kMarkerOffset' }, 95 { 'name': 'off_fp_function', 96 'value': 'JavaScriptFrameConstants::kFunctionOffset' }, 97 { 'name': 'off_fp_args', 98 'value': 'JavaScriptFrameConstants::kLastParameterOffset' }, 99 ]; 100 101 # 102 # The following useful fields are missing accessors, so we define fake ones. 103 # 104 extras_accessors = [ 105 'HeapObject, map, Map, kMapOffset', 106 'JSObject, elements, Object, kElementsOffset', 107 'FixedArray, data, uintptr_t, kHeaderSize', 108 'Map, instance_attributes, int, kInstanceAttributesOffset', 109 'Map, inobject_properties, int, kInObjectPropertiesOffset', 110 'Map, instance_size, int, kInstanceSizeOffset', 111 'HeapNumber, value, double, kValueOffset', 112 'ConsString, first, String, kFirstOffset', 113 'ConsString, second, String, kSecondOffset', 114 'ExternalString, resource, Object, kResourceOffset', 115 'SeqOneByteString, chars, char, kHeaderSize', 116 'SharedFunctionInfo, code, Code, kCodeOffset', 117 'Code, instruction_start, uintptr_t, kHeaderSize', 118 'Code, instruction_size, int, kInstructionSizeOffset', 119 ]; 120 121 # 122 # The following is a whitelist of classes we expect to find when scanning the 123 # source code. This list is not exhaustive, but it's still useful to identify 124 # when this script gets out of sync with the source. See load_objects(). 125 # 126 expected_classes = [ 127 'ConsString', 'FixedArray', 'HeapNumber', 'JSArray', 'JSFunction', 128 'JSObject', 'JSRegExp', 'JSValue', 'Map', 'Oddball', 'Script', 129 'SeqOneByteString', 'SharedFunctionInfo' 130 ]; 131 132 133 # 134 # The following structures store high-level representations of the structures 135 # for which we're going to emit descriptive constants. 136 # 137 types = {}; # set of all type names 138 typeclasses = {}; # maps type names to corresponding class names 139 klasses = {}; # known classes, including parents 140 fields = []; # field declarations 141 142 header = ''' 143 /* 144 * This file is generated by %s. Do not edit directly. 145 */ 146 147 #include "v8.h" 148 #include "frames.h" 149 #include "frames-inl.h" /* for architecture-specific frame constants */ 150 151 using namespace v8::internal; 152 153 extern "C" { 154 155 /* stack frame constants */ 156 #define FRAME_CONST(value, klass) \ 157 int v8dbg_frametype_##klass = StackFrame::value; 158 159 STACK_FRAME_TYPE_LIST(FRAME_CONST) 160 161 #undef FRAME_CONST 162 163 ''' % sys.argv[0]; 164 165 footer = ''' 166 } 167 ''' 168 169 # 170 # Loads class hierarchy and type information from "objects.h". 171 # 172 def load_objects(): 173 objfilename = sys.argv[2]; 174 objfile = open(objfilename, 'r'); 175 in_insttype = False; 176 177 typestr = ''; 178 179 # 180 # Construct a dictionary for the classes we're sure should be present. 181 # 182 checktypes = {}; 183 for klass in expected_classes: 184 checktypes[klass] = True; 185 186 # 187 # Iterate objects.h line-by-line to collect type and class information. 188 # For types, we accumulate a string representing the entire InstanceType 189 # enum definition and parse it later because it's easier to do so 190 # without the embedded newlines. 191 # 192 for line in objfile: 193 if (line.startswith('enum InstanceType {')): 194 in_insttype = True; 195 continue; 196 197 if (in_insttype and line.startswith('};')): 198 in_insttype = False; 199 continue; 200 201 line = re.sub('//.*', '', line.rstrip().lstrip()); 202 203 if (in_insttype): 204 typestr += line; 205 continue; 206 207 match = re.match('class (\w[^\s:]*)(: public (\w[^\s{]*))?\s*{', 208 line); 209 210 if (match): 211 klass = match.group(1); 212 pklass = match.group(3); 213 klasses[klass] = { 'parent': pklass }; 214 215 # 216 # Process the instance type declaration. 217 # 218 entries = typestr.split(','); 219 for entry in entries: 220 types[re.sub('\s*=.*', '', entry).lstrip()] = True; 221 222 # 223 # Infer class names for each type based on a systematic transformation. 224 # For example, "JS_FUNCTION_TYPE" becomes "JSFunction". We find the 225 # class for each type rather than the other way around because there are 226 # fewer cases where one type maps to more than one class than the other 227 # way around. 228 # 229 for type in types: 230 # 231 # Symbols and Strings are implemented using the same classes. 232 # 233 usetype = re.sub('SYMBOL_', 'STRING_', type); 234 235 # 236 # REGEXP behaves like REG_EXP, as in JS_REGEXP_TYPE => JSRegExp. 237 # 238 usetype = re.sub('_REGEXP_', '_REG_EXP_', usetype); 239 240 # 241 # Remove the "_TYPE" suffix and then convert to camel case, 242 # except that a "JS" prefix remains uppercase (as in 243 # "JS_FUNCTION_TYPE" => "JSFunction"). 244 # 245 if (not usetype.endswith('_TYPE')): 246 continue; 247 248 usetype = usetype[0:len(usetype) - len('_TYPE')]; 249 parts = usetype.split('_'); 250 cctype = ''; 251 252 if (parts[0] == 'JS'): 253 cctype = 'JS'; 254 start = 1; 255 else: 256 cctype = ''; 257 start = 0; 258 259 for ii in range(start, len(parts)): 260 part = parts[ii]; 261 cctype += part[0].upper() + part[1:].lower(); 262 263 # 264 # Mapping string types is more complicated. Both types and 265 # class names for Strings specify a representation (e.g., Seq, 266 # Cons, External, or Sliced) and an encoding (TwoByte or Ascii), 267 # In the simplest case, both of these are explicit in both 268 # names, as in: 269 # 270 # EXTERNAL_ASCII_STRING_TYPE => ExternalAsciiString 271 # 272 # However, either the representation or encoding can be omitted 273 # from the type name, in which case "Seq" and "TwoByte" are 274 # assumed, as in: 275 # 276 # STRING_TYPE => SeqTwoByteString 277 # 278 # Additionally, sometimes the type name has more information 279 # than the class, as in: 280 # 281 # CONS_ASCII_STRING_TYPE => ConsString 282 # 283 # To figure this out dynamically, we first check for a 284 # representation and encoding and add them if they're not 285 # present. If that doesn't yield a valid class name, then we 286 # strip out the representation. 287 # 288 if (cctype.endswith('String')): 289 if (cctype.find('Cons') == -1 and 290 cctype.find('External') == -1 and 291 cctype.find('Sliced') == -1): 292 if (cctype.find('Ascii') != -1): 293 cctype = re.sub('AsciiString$', 294 'SeqOneByteString', cctype); 295 else: 296 cctype = re.sub('String$', 297 'SeqString', cctype); 298 299 if (cctype.find('Ascii') == -1): 300 cctype = re.sub('String$', 'TwoByteString', 301 cctype); 302 303 if (not (cctype in klasses)): 304 cctype = re.sub('Ascii', '', cctype); 305 cctype = re.sub('TwoByte', '', cctype); 306 307 # 308 # Despite all that, some types have no corresponding class. 309 # 310 if (cctype in klasses): 311 typeclasses[type] = cctype; 312 if (cctype in checktypes): 313 del checktypes[cctype]; 314 315 if (len(checktypes) > 0): 316 for klass in checktypes: 317 print('error: expected class \"%s\" not found' % klass); 318 319 sys.exit(1); 320 321 322 # 323 # For a given macro call, pick apart the arguments and return an object 324 # describing the corresponding output constant. See load_fields(). 325 # 326 def parse_field(call): 327 # Replace newlines with spaces. 328 for ii in range(0, len(call)): 329 if (call[ii] == '\n'): 330 call[ii] == ' '; 331 332 idx = call.find('('); 333 kind = call[0:idx]; 334 rest = call[idx + 1: len(call) - 1]; 335 args = re.split('\s*,\s*', rest); 336 337 consts = []; 338 339 if (kind == 'ACCESSORS' or kind == 'ACCESSORS_GCSAFE'): 340 klass = args[0]; 341 field = args[1]; 342 dtype = args[2]; 343 offset = args[3]; 344 345 return ({ 346 'name': 'class_%s__%s__%s' % (klass, field, dtype), 347 'value': '%s::%s' % (klass, offset) 348 }); 349 350 assert(kind == 'SMI_ACCESSORS'); 351 klass = args[0]; 352 field = args[1]; 353 offset = args[2]; 354 355 return ({ 356 'name': 'class_%s__%s__%s' % (klass, field, 'SMI'), 357 'value': '%s::%s' % (klass, offset) 358 }); 359 360 # 361 # Load field offset information from objects-inl.h. 362 # 363 def load_fields(): 364 inlfilename = sys.argv[3]; 365 inlfile = open(inlfilename, 'r'); 366 367 # 368 # Each class's fields and the corresponding offsets are described in the 369 # source by calls to macros like "ACCESSORS" (and friends). All we do 370 # here is extract these macro invocations, taking into account that they 371 # may span multiple lines and may contain nested parentheses. We also 372 # call parse_field() to pick apart the invocation. 373 # 374 prefixes = [ 'ACCESSORS', 'ACCESSORS_GCSAFE', 'SMI_ACCESSORS' ]; 375 current = ''; 376 opens = 0; 377 378 for line in inlfile: 379 if (opens > 0): 380 # Continuation line 381 for ii in range(0, len(line)): 382 if (line[ii] == '('): 383 opens += 1; 384 elif (line[ii] == ')'): 385 opens -= 1; 386 387 if (opens == 0): 388 break; 389 390 current += line[0:ii + 1]; 391 continue; 392 393 for prefix in prefixes: 394 if (not line.startswith(prefix + '(')): 395 continue; 396 397 if (len(current) > 0): 398 fields.append(parse_field(current)); 399 current = ''; 400 401 for ii in range(len(prefix), len(line)): 402 if (line[ii] == '('): 403 opens += 1; 404 elif (line[ii] == ')'): 405 opens -= 1; 406 407 if (opens == 0): 408 break; 409 410 current += line[0:ii + 1]; 411 412 if (len(current) > 0): 413 fields.append(parse_field(current)); 414 current = ''; 415 416 for body in extras_accessors: 417 fields.append(parse_field('ACCESSORS(%s)' % body)); 418 419 # 420 # Emit a block of constants. 421 # 422 def emit_set(out, consts): 423 for ii in range(0, len(consts)): 424 out.write('int v8dbg_%s = %s;\n' % 425 (consts[ii]['name'], consts[ii]['value'])); 426 out.write('\n'); 427 428 # 429 # Emit the whole output file. 430 # 431 def emit_config(): 432 out = file(sys.argv[1], 'w'); 433 434 out.write(header); 435 436 out.write('/* miscellaneous constants */\n'); 437 emit_set(out, consts_misc); 438 439 out.write('/* class type information */\n'); 440 consts = []; 441 keys = typeclasses.keys(); 442 keys.sort(); 443 for typename in keys: 444 klass = typeclasses[typename]; 445 consts.append({ 446 'name': 'type_%s__%s' % (klass, typename), 447 'value': typename 448 }); 449 450 emit_set(out, consts); 451 452 out.write('/* class hierarchy information */\n'); 453 consts = []; 454 keys = klasses.keys(); 455 keys.sort(); 456 for klassname in keys: 457 pklass = klasses[klassname]['parent']; 458 if (pklass == None): 459 continue; 460 461 consts.append({ 462 'name': 'parent_%s__%s' % (klassname, pklass), 463 'value': 0 464 }); 465 466 emit_set(out, consts); 467 468 out.write('/* field information */\n'); 469 emit_set(out, fields); 470 471 out.write(footer); 472 473 if (len(sys.argv) < 4): 474 print('usage: %s output.cc objects.h objects-inl.h' % sys.argv[0]); 475 sys.exit(2); 476 477 load_objects(); 478 load_fields(); 479 emit_config(); 480