1 #!/usr/bin/python 2 3 # NOTE: this file is taken from the Python source distribution 4 # It can be found under Tools/gdb/libpython.py. It is shipped with Cython 5 # because it's not installed as a python module, and because changes are only 6 # merged into new python versions (v3.2+). 7 8 ''' 9 From gdb 7 onwards, gdb's build can be configured --with-python, allowing gdb 10 to be extended with Python code e.g. for library-specific data visualizations, 11 such as for the C++ STL types. Documentation on this API can be seen at: 12 http://sourceware.org/gdb/current/onlinedocs/gdb/Python-API.html 13 14 15 This python module deals with the case when the process being debugged (the 16 "inferior process" in gdb parlance) is itself python, or more specifically, 17 linked against libpython. In this situation, almost every item of data is a 18 (PyObject*), and having the debugger merely print their addresses is not very 19 enlightening. 20 21 This module embeds knowledge about the implementation details of libpython so 22 that we can emit useful visualizations e.g. a string, a list, a dict, a frame 23 giving file/line information and the state of local variables 24 25 In particular, given a gdb.Value corresponding to a PyObject* in the inferior 26 process, we can generate a "proxy value" within the gdb process. For example, 27 given a PyObject* in the inferior process that is in fact a PyListObject* 28 holding three PyObject* that turn out to be PyStringObject* instances, we can 29 generate a proxy value within the gdb process that is a list of strings: 30 ["foo", "bar", "baz"] 31 32 Doing so can be expensive for complicated graphs of objects, and could take 33 some time, so we also have a "write_repr" method that writes a representation 34 of the data to a file-like object. This allows us to stop the traversal by 35 having the file-like object raise an exception if it gets too much data. 36 37 With both "proxyval" and "write_repr" we keep track of the set of all addresses 38 visited so far in the traversal, to avoid infinite recursion due to cycles in 39 the graph of object references. 40 41 We try to defer gdb.lookup_type() invocations for python types until as late as 42 possible: for a dynamically linked python binary, when the process starts in 43 the debugger, the libpython.so hasn't been dynamically loaded yet, so none of 44 the type names are known to the debugger 45 46 The module also extends gdb with some python-specific commands. 47 ''' 48 from __future__ import with_statement 49 50 import os 51 import re 52 import sys 53 import struct 54 import locale 55 import atexit 56 import warnings 57 import tempfile 58 import textwrap 59 import itertools 60 61 import gdb 62 63 if sys.version_info[0] < 3: 64 # I think this is the only way to fix this bug :'( 65 # http://sourceware.org/bugzilla/show_bug.cgi?id=12285 66 out, err = sys.stdout, sys.stderr 67 reload(sys).setdefaultencoding('UTF-8') 68 sys.stdout = out 69 sys.stderr = err 70 71 # Look up the gdb.Type for some standard types: 72 _type_char_ptr = gdb.lookup_type('char').pointer() # char* 73 _type_unsigned_char_ptr = gdb.lookup_type('unsigned char').pointer() 74 _type_void_ptr = gdb.lookup_type('void').pointer() # void* 75 76 SIZEOF_VOID_P = _type_void_ptr.sizeof 77 78 Py_TPFLAGS_HEAPTYPE = (1L << 9) 79 80 Py_TPFLAGS_INT_SUBCLASS = (1L << 23) 81 Py_TPFLAGS_LONG_SUBCLASS = (1L << 24) 82 Py_TPFLAGS_LIST_SUBCLASS = (1L << 25) 83 Py_TPFLAGS_TUPLE_SUBCLASS = (1L << 26) 84 Py_TPFLAGS_STRING_SUBCLASS = (1L << 27) 85 Py_TPFLAGS_BYTES_SUBCLASS = (1L << 27) 86 Py_TPFLAGS_UNICODE_SUBCLASS = (1L << 28) 87 Py_TPFLAGS_DICT_SUBCLASS = (1L << 29) 88 Py_TPFLAGS_BASE_EXC_SUBCLASS = (1L << 30) 89 Py_TPFLAGS_TYPE_SUBCLASS = (1L << 31) 90 91 MAX_OUTPUT_LEN = 1024 92 93 hexdigits = "0123456789abcdef" 94 95 ENCODING = locale.getpreferredencoding() 96 97 class NullPyObjectPtr(RuntimeError): 98 pass 99 100 101 def safety_limit(val): 102 # Given a integer value from the process being debugged, limit it to some 103 # safety threshold so that arbitrary breakage within said process doesn't 104 # break the gdb process too much (e.g. sizes of iterations, sizes of lists) 105 return min(val, 1000) 106 107 108 def safe_range(val): 109 # As per range, but don't trust the value too much: cap it to a safety 110 # threshold in case the data was corrupted 111 return xrange(safety_limit(val)) 112 113 def write_unicode(file, text): 114 # Write a byte or unicode string to file. Unicode strings are encoded to 115 # ENCODING encoding with 'backslashreplace' error handler to avoid 116 # UnicodeEncodeError. 117 if isinstance(text, unicode): 118 text = text.encode(ENCODING, 'backslashreplace') 119 file.write(text) 120 121 def os_fsencode(filename): 122 if not isinstance(filename, unicode): 123 return filename 124 encoding = sys.getfilesystemencoding() 125 if encoding == 'mbcs': 126 # mbcs doesn't support surrogateescape 127 return filename.encode(encoding) 128 encoded = [] 129 for char in filename: 130 # surrogateescape error handler 131 if 0xDC80 <= ord(char) <= 0xDCFF: 132 byte = chr(ord(char) - 0xDC00) 133 else: 134 byte = char.encode(encoding) 135 encoded.append(byte) 136 return ''.join(encoded) 137 138 class StringTruncated(RuntimeError): 139 pass 140 141 class TruncatedStringIO(object): 142 '''Similar to cStringIO, but can truncate the output by raising a 143 StringTruncated exception''' 144 def __init__(self, maxlen=None): 145 self._val = '' 146 self.maxlen = maxlen 147 148 def write(self, data): 149 if self.maxlen: 150 if len(data) + len(self._val) > self.maxlen: 151 # Truncation: 152 self._val += data[0:self.maxlen - len(self._val)] 153 raise StringTruncated() 154 155 self._val += data 156 157 def getvalue(self): 158 return self._val 159 160 161 # pretty printer lookup 162 all_pretty_typenames = set() 163 164 class PrettyPrinterTrackerMeta(type): 165 166 def __init__(self, name, bases, dict): 167 super(PrettyPrinterTrackerMeta, self).__init__(name, bases, dict) 168 all_pretty_typenames.add(self._typename) 169 170 171 class PyObjectPtr(object): 172 """ 173 Class wrapping a gdb.Value that's a either a (PyObject*) within the 174 inferior process, or some subclass pointer e.g. (PyStringObject*) 175 176 There will be a subclass for every refined PyObject type that we care 177 about. 178 179 Note that at every stage the underlying pointer could be NULL, point 180 to corrupt data, etc; this is the debugger, after all. 181 """ 182 183 __metaclass__ = PrettyPrinterTrackerMeta 184 185 _typename = 'PyObject' 186 187 def __init__(self, gdbval, cast_to=None): 188 if cast_to: 189 self._gdbval = gdbval.cast(cast_to) 190 else: 191 self._gdbval = gdbval 192 193 def field(self, name): 194 ''' 195 Get the gdb.Value for the given field within the PyObject, coping with 196 some python 2 versus python 3 differences. 197 198 Various libpython types are defined using the "PyObject_HEAD" and 199 "PyObject_VAR_HEAD" macros. 200 201 In Python 2, this these are defined so that "ob_type" and (for a var 202 object) "ob_size" are fields of the type in question. 203 204 In Python 3, this is defined as an embedded PyVarObject type thus: 205 PyVarObject ob_base; 206 so that the "ob_size" field is located insize the "ob_base" field, and 207 the "ob_type" is most easily accessed by casting back to a (PyObject*). 208 ''' 209 if self.is_null(): 210 raise NullPyObjectPtr(self) 211 212 if name == 'ob_type': 213 pyo_ptr = self._gdbval.cast(PyObjectPtr.get_gdb_type()) 214 return pyo_ptr.dereference()[name] 215 216 if name == 'ob_size': 217 pyo_ptr = self._gdbval.cast(PyVarObjectPtr.get_gdb_type()) 218 return pyo_ptr.dereference()[name] 219 220 # General case: look it up inside the object: 221 return self._gdbval.dereference()[name] 222 223 def pyop_field(self, name): 224 ''' 225 Get a PyObjectPtr for the given PyObject* field within this PyObject, 226 coping with some python 2 versus python 3 differences. 227 ''' 228 return PyObjectPtr.from_pyobject_ptr(self.field(name)) 229 230 def write_field_repr(self, name, out, visited): 231 ''' 232 Extract the PyObject* field named "name", and write its representation 233 to file-like object "out" 234 ''' 235 field_obj = self.pyop_field(name) 236 field_obj.write_repr(out, visited) 237 238 def get_truncated_repr(self, maxlen): 239 ''' 240 Get a repr-like string for the data, but truncate it at "maxlen" bytes 241 (ending the object graph traversal as soon as you do) 242 ''' 243 out = TruncatedStringIO(maxlen) 244 try: 245 self.write_repr(out, set()) 246 except StringTruncated: 247 # Truncation occurred: 248 return out.getvalue() + '...(truncated)' 249 250 # No truncation occurred: 251 return out.getvalue() 252 253 def type(self): 254 return PyTypeObjectPtr(self.field('ob_type')) 255 256 def is_null(self): 257 return 0 == long(self._gdbval) 258 259 def is_optimized_out(self): 260 ''' 261 Is the value of the underlying PyObject* visible to the debugger? 262 263 This can vary with the precise version of the compiler used to build 264 Python, and the precise version of gdb. 265 266 See e.g. https://bugzilla.redhat.com/show_bug.cgi?id=556975 with 267 PyEval_EvalFrameEx's "f" 268 ''' 269 return self._gdbval.is_optimized_out 270 271 def safe_tp_name(self): 272 try: 273 return self.type().field('tp_name').string() 274 except NullPyObjectPtr: 275 # NULL tp_name? 276 return 'unknown' 277 except RuntimeError: 278 # Can't even read the object at all? 279 return 'unknown' 280 281 def proxyval(self, visited): 282 ''' 283 Scrape a value from the inferior process, and try to represent it 284 within the gdb process, whilst (hopefully) avoiding crashes when 285 the remote data is corrupt. 286 287 Derived classes will override this. 288 289 For example, a PyIntObject* with ob_ival 42 in the inferior process 290 should result in an int(42) in this process. 291 292 visited: a set of all gdb.Value pyobject pointers already visited 293 whilst generating this value (to guard against infinite recursion when 294 visiting object graphs with loops). Analogous to Py_ReprEnter and 295 Py_ReprLeave 296 ''' 297 298 class FakeRepr(object): 299 """ 300 Class representing a non-descript PyObject* value in the inferior 301 process for when we don't have a custom scraper, intended to have 302 a sane repr(). 303 """ 304 305 def __init__(self, tp_name, address): 306 self.tp_name = tp_name 307 self.address = address 308 309 def __repr__(self): 310 # For the NULL pointer, we have no way of knowing a type, so 311 # special-case it as per 312 # http://bugs.python.org/issue8032#msg100882 313 if self.address == 0: 314 return '0x0' 315 return '<%s at remote 0x%x>' % (self.tp_name, self.address) 316 317 return FakeRepr(self.safe_tp_name(), 318 long(self._gdbval)) 319 320 def write_repr(self, out, visited): 321 ''' 322 Write a string representation of the value scraped from the inferior 323 process to "out", a file-like object. 324 ''' 325 # Default implementation: generate a proxy value and write its repr 326 # However, this could involve a lot of work for complicated objects, 327 # so for derived classes we specialize this 328 return out.write(repr(self.proxyval(visited))) 329 330 @classmethod 331 def subclass_from_type(cls, t): 332 ''' 333 Given a PyTypeObjectPtr instance wrapping a gdb.Value that's a 334 (PyTypeObject*), determine the corresponding subclass of PyObjectPtr 335 to use 336 337 Ideally, we would look up the symbols for the global types, but that 338 isn't working yet: 339 (gdb) python print gdb.lookup_symbol('PyList_Type')[0].value 340 Traceback (most recent call last): 341 File "<string>", line 1, in <module> 342 NotImplementedError: Symbol type not yet supported in Python scripts. 343 Error while executing Python code. 344 345 For now, we use tp_flags, after doing some string comparisons on the 346 tp_name for some special-cases that don't seem to be visible through 347 flags 348 ''' 349 try: 350 tp_name = t.field('tp_name').string() 351 tp_flags = int(t.field('tp_flags')) 352 except RuntimeError: 353 # Handle any kind of error e.g. NULL ptrs by simply using the base 354 # class 355 return cls 356 357 #print 'tp_flags = 0x%08x' % tp_flags 358 #print 'tp_name = %r' % tp_name 359 360 name_map = {'bool': PyBoolObjectPtr, 361 'classobj': PyClassObjectPtr, 362 'instance': PyInstanceObjectPtr, 363 'NoneType': PyNoneStructPtr, 364 'frame': PyFrameObjectPtr, 365 'set' : PySetObjectPtr, 366 'frozenset' : PySetObjectPtr, 367 'builtin_function_or_method' : PyCFunctionObjectPtr, 368 } 369 if tp_name in name_map: 370 return name_map[tp_name] 371 372 if tp_flags & (Py_TPFLAGS_HEAPTYPE|Py_TPFLAGS_TYPE_SUBCLASS): 373 return PyTypeObjectPtr 374 375 if tp_flags & Py_TPFLAGS_INT_SUBCLASS: 376 return PyIntObjectPtr 377 if tp_flags & Py_TPFLAGS_LONG_SUBCLASS: 378 return PyLongObjectPtr 379 if tp_flags & Py_TPFLAGS_LIST_SUBCLASS: 380 return PyListObjectPtr 381 if tp_flags & Py_TPFLAGS_TUPLE_SUBCLASS: 382 return PyTupleObjectPtr 383 if tp_flags & Py_TPFLAGS_STRING_SUBCLASS: 384 try: 385 gdb.lookup_type('PyBytesObject') 386 return PyBytesObjectPtr 387 except RuntimeError: 388 return PyStringObjectPtr 389 if tp_flags & Py_TPFLAGS_UNICODE_SUBCLASS: 390 return PyUnicodeObjectPtr 391 if tp_flags & Py_TPFLAGS_DICT_SUBCLASS: 392 return PyDictObjectPtr 393 if tp_flags & Py_TPFLAGS_BASE_EXC_SUBCLASS: 394 return PyBaseExceptionObjectPtr 395 396 # Use the base class: 397 return cls 398 399 @classmethod 400 def from_pyobject_ptr(cls, gdbval): 401 ''' 402 Try to locate the appropriate derived class dynamically, and cast 403 the pointer accordingly. 404 ''' 405 try: 406 p = PyObjectPtr(gdbval) 407 cls = cls.subclass_from_type(p.type()) 408 return cls(gdbval, cast_to=cls.get_gdb_type()) 409 except RuntimeError, exc: 410 # Handle any kind of error e.g. NULL ptrs by simply using the base 411 # class 412 pass 413 return cls(gdbval) 414 415 @classmethod 416 def get_gdb_type(cls): 417 return gdb.lookup_type(cls._typename).pointer() 418 419 def as_address(self): 420 return long(self._gdbval) 421 422 423 class PyVarObjectPtr(PyObjectPtr): 424 _typename = 'PyVarObject' 425 426 class ProxyAlreadyVisited(object): 427 ''' 428 Placeholder proxy to use when protecting against infinite recursion due to 429 loops in the object graph. 430 431 Analogous to the values emitted by the users of Py_ReprEnter and Py_ReprLeave 432 ''' 433 def __init__(self, rep): 434 self._rep = rep 435 436 def __repr__(self): 437 return self._rep 438 439 440 def _write_instance_repr(out, visited, name, pyop_attrdict, address): 441 '''Shared code for use by old-style and new-style classes: 442 write a representation to file-like object "out"''' 443 out.write('<') 444 out.write(name) 445 446 # Write dictionary of instance attributes: 447 if isinstance(pyop_attrdict, PyDictObjectPtr): 448 out.write('(') 449 first = True 450 for pyop_arg, pyop_val in pyop_attrdict.iteritems(): 451 if not first: 452 out.write(', ') 453 first = False 454 out.write(pyop_arg.proxyval(visited)) 455 out.write('=') 456 pyop_val.write_repr(out, visited) 457 out.write(')') 458 out.write(' at remote 0x%x>' % address) 459 460 461 class InstanceProxy(object): 462 463 def __init__(self, cl_name, attrdict, address): 464 self.cl_name = cl_name 465 self.attrdict = attrdict 466 self.address = address 467 468 def __repr__(self): 469 if isinstance(self.attrdict, dict): 470 kwargs = ', '.join(["%s=%r" % (arg, val) 471 for arg, val in self.attrdict.iteritems()]) 472 return '<%s(%s) at remote 0x%x>' % (self.cl_name, 473 kwargs, self.address) 474 else: 475 return '<%s at remote 0x%x>' % (self.cl_name, 476 self.address) 477 478 def _PyObject_VAR_SIZE(typeobj, nitems): 479 return ( ( typeobj.field('tp_basicsize') + 480 nitems * typeobj.field('tp_itemsize') + 481 (SIZEOF_VOID_P - 1) 482 ) & ~(SIZEOF_VOID_P - 1) 483 ).cast(gdb.lookup_type('size_t')) 484 485 class PyTypeObjectPtr(PyObjectPtr): 486 _typename = 'PyTypeObject' 487 488 def get_attr_dict(self): 489 ''' 490 Get the PyDictObject ptr representing the attribute dictionary 491 (or None if there's a problem) 492 ''' 493 try: 494 typeobj = self.type() 495 dictoffset = int_from_int(typeobj.field('tp_dictoffset')) 496 if dictoffset != 0: 497 if dictoffset < 0: 498 type_PyVarObject_ptr = gdb.lookup_type('PyVarObject').pointer() 499 tsize = int_from_int(self._gdbval.cast(type_PyVarObject_ptr)['ob_size']) 500 if tsize < 0: 501 tsize = -tsize 502 size = _PyObject_VAR_SIZE(typeobj, tsize) 503 dictoffset += size 504 assert dictoffset > 0 505 assert dictoffset % SIZEOF_VOID_P == 0 506 507 dictptr = self._gdbval.cast(_type_char_ptr) + dictoffset 508 PyObjectPtrPtr = PyObjectPtr.get_gdb_type().pointer() 509 dictptr = dictptr.cast(PyObjectPtrPtr) 510 return PyObjectPtr.from_pyobject_ptr(dictptr.dereference()) 511 except RuntimeError: 512 # Corrupt data somewhere; fail safe 513 pass 514 515 # Not found, or some kind of error: 516 return None 517 518 def proxyval(self, visited): 519 ''' 520 Support for new-style classes. 521 522 Currently we just locate the dictionary using a transliteration to 523 python of _PyObject_GetDictPtr, ignoring descriptors 524 ''' 525 # Guard against infinite loops: 526 if self.as_address() in visited: 527 return ProxyAlreadyVisited('<...>') 528 visited.add(self.as_address()) 529 530 pyop_attr_dict = self.get_attr_dict() 531 if pyop_attr_dict: 532 attr_dict = pyop_attr_dict.proxyval(visited) 533 else: 534 attr_dict = {} 535 tp_name = self.safe_tp_name() 536 537 # New-style class: 538 return InstanceProxy(tp_name, attr_dict, long(self._gdbval)) 539 540 def write_repr(self, out, visited): 541 # Guard against infinite loops: 542 if self.as_address() in visited: 543 out.write('<...>') 544 return 545 visited.add(self.as_address()) 546 547 try: 548 tp_name = self.field('tp_name').string() 549 except RuntimeError: 550 tp_name = 'unknown' 551 552 out.write('<type %s at remote 0x%x>' % (tp_name, 553 self.as_address())) 554 # pyop_attrdict = self.get_attr_dict() 555 # _write_instance_repr(out, visited, 556 # self.safe_tp_name(), pyop_attrdict, self.as_address()) 557 558 class ProxyException(Exception): 559 def __init__(self, tp_name, args): 560 self.tp_name = tp_name 561 self.args = args 562 563 def __repr__(self): 564 return '%s%r' % (self.tp_name, self.args) 565 566 class PyBaseExceptionObjectPtr(PyObjectPtr): 567 """ 568 Class wrapping a gdb.Value that's a PyBaseExceptionObject* i.e. an exception 569 within the process being debugged. 570 """ 571 _typename = 'PyBaseExceptionObject' 572 573 def proxyval(self, visited): 574 # Guard against infinite loops: 575 if self.as_address() in visited: 576 return ProxyAlreadyVisited('(...)') 577 visited.add(self.as_address()) 578 arg_proxy = self.pyop_field('args').proxyval(visited) 579 return ProxyException(self.safe_tp_name(), 580 arg_proxy) 581 582 def write_repr(self, out, visited): 583 # Guard against infinite loops: 584 if self.as_address() in visited: 585 out.write('(...)') 586 return 587 visited.add(self.as_address()) 588 589 out.write(self.safe_tp_name()) 590 self.write_field_repr('args', out, visited) 591 592 593 class PyClassObjectPtr(PyObjectPtr): 594 """ 595 Class wrapping a gdb.Value that's a PyClassObject* i.e. a <classobj> 596 instance within the process being debugged. 597 """ 598 _typename = 'PyClassObject' 599 600 601 class BuiltInFunctionProxy(object): 602 def __init__(self, ml_name): 603 self.ml_name = ml_name 604 605 def __repr__(self): 606 return "<built-in function %s>" % self.ml_name 607 608 class BuiltInMethodProxy(object): 609 def __init__(self, ml_name, pyop_m_self): 610 self.ml_name = ml_name 611 self.pyop_m_self = pyop_m_self 612 613 def __repr__(self): 614 return ('<built-in method %s of %s object at remote 0x%x>' 615 % (self.ml_name, 616 self.pyop_m_self.safe_tp_name(), 617 self.pyop_m_self.as_address()) 618 ) 619 620 class PyCFunctionObjectPtr(PyObjectPtr): 621 """ 622 Class wrapping a gdb.Value that's a PyCFunctionObject* 623 (see Include/methodobject.h and Objects/methodobject.c) 624 """ 625 _typename = 'PyCFunctionObject' 626 627 def proxyval(self, visited): 628 m_ml = self.field('m_ml') # m_ml is a (PyMethodDef*) 629 ml_name = m_ml['ml_name'].string() 630 631 pyop_m_self = self.pyop_field('m_self') 632 if pyop_m_self.is_null(): 633 return BuiltInFunctionProxy(ml_name) 634 else: 635 return BuiltInMethodProxy(ml_name, pyop_m_self) 636 637 638 class PyCodeObjectPtr(PyObjectPtr): 639 """ 640 Class wrapping a gdb.Value that's a PyCodeObject* i.e. a <code> instance 641 within the process being debugged. 642 """ 643 _typename = 'PyCodeObject' 644 645 def addr2line(self, addrq): 646 ''' 647 Get the line number for a given bytecode offset 648 649 Analogous to PyCode_Addr2Line; translated from pseudocode in 650 Objects/lnotab_notes.txt 651 ''' 652 co_lnotab = self.pyop_field('co_lnotab').proxyval(set()) 653 654 # Initialize lineno to co_firstlineno as per PyCode_Addr2Line 655 # not 0, as lnotab_notes.txt has it: 656 lineno = int_from_int(self.field('co_firstlineno')) 657 658 addr = 0 659 for addr_incr, line_incr in zip(co_lnotab[::2], co_lnotab[1::2]): 660 addr += ord(addr_incr) 661 if addr > addrq: 662 return lineno 663 lineno += ord(line_incr) 664 return lineno 665 666 667 class PyDictObjectPtr(PyObjectPtr): 668 """ 669 Class wrapping a gdb.Value that's a PyDictObject* i.e. a dict instance 670 within the process being debugged. 671 """ 672 _typename = 'PyDictObject' 673 674 def iteritems(self): 675 ''' 676 Yields a sequence of (PyObjectPtr key, PyObjectPtr value) pairs, 677 analagous to dict.iteritems() 678 ''' 679 for i in safe_range(self.field('ma_mask') + 1): 680 ep = self.field('ma_table') + i 681 pyop_value = PyObjectPtr.from_pyobject_ptr(ep['me_value']) 682 if not pyop_value.is_null(): 683 pyop_key = PyObjectPtr.from_pyobject_ptr(ep['me_key']) 684 yield (pyop_key, pyop_value) 685 686 def proxyval(self, visited): 687 # Guard against infinite loops: 688 if self.as_address() in visited: 689 return ProxyAlreadyVisited('{...}') 690 visited.add(self.as_address()) 691 692 result = {} 693 for pyop_key, pyop_value in self.iteritems(): 694 proxy_key = pyop_key.proxyval(visited) 695 proxy_value = pyop_value.proxyval(visited) 696 result[proxy_key] = proxy_value 697 return result 698 699 def write_repr(self, out, visited): 700 # Guard against infinite loops: 701 if self.as_address() in visited: 702 out.write('{...}') 703 return 704 visited.add(self.as_address()) 705 706 out.write('{') 707 first = True 708 for pyop_key, pyop_value in self.iteritems(): 709 if not first: 710 out.write(', ') 711 first = False 712 pyop_key.write_repr(out, visited) 713 out.write(': ') 714 pyop_value.write_repr(out, visited) 715 out.write('}') 716 717 class PyInstanceObjectPtr(PyObjectPtr): 718 _typename = 'PyInstanceObject' 719 720 def proxyval(self, visited): 721 # Guard against infinite loops: 722 if self.as_address() in visited: 723 return ProxyAlreadyVisited('<...>') 724 visited.add(self.as_address()) 725 726 # Get name of class: 727 in_class = self.pyop_field('in_class') 728 cl_name = in_class.pyop_field('cl_name').proxyval(visited) 729 730 # Get dictionary of instance attributes: 731 in_dict = self.pyop_field('in_dict').proxyval(visited) 732 733 # Old-style class: 734 return InstanceProxy(cl_name, in_dict, long(self._gdbval)) 735 736 def write_repr(self, out, visited): 737 # Guard against infinite loops: 738 if self.as_address() in visited: 739 out.write('<...>') 740 return 741 visited.add(self.as_address()) 742 743 # Old-style class: 744 745 # Get name of class: 746 in_class = self.pyop_field('in_class') 747 cl_name = in_class.pyop_field('cl_name').proxyval(visited) 748 749 # Get dictionary of instance attributes: 750 pyop_in_dict = self.pyop_field('in_dict') 751 752 _write_instance_repr(out, visited, 753 cl_name, pyop_in_dict, self.as_address()) 754 755 class PyIntObjectPtr(PyObjectPtr): 756 _typename = 'PyIntObject' 757 758 def proxyval(self, visited): 759 result = int_from_int(self.field('ob_ival')) 760 return result 761 762 class PyListObjectPtr(PyObjectPtr): 763 _typename = 'PyListObject' 764 765 def __getitem__(self, i): 766 # Get the gdb.Value for the (PyObject*) with the given index: 767 field_ob_item = self.field('ob_item') 768 return field_ob_item[i] 769 770 def proxyval(self, visited): 771 # Guard against infinite loops: 772 if self.as_address() in visited: 773 return ProxyAlreadyVisited('[...]') 774 visited.add(self.as_address()) 775 776 result = [PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited) 777 for i in safe_range(int_from_int(self.field('ob_size')))] 778 return result 779 780 def write_repr(self, out, visited): 781 # Guard against infinite loops: 782 if self.as_address() in visited: 783 out.write('[...]') 784 return 785 visited.add(self.as_address()) 786 787 out.write('[') 788 for i in safe_range(int_from_int(self.field('ob_size'))): 789 if i > 0: 790 out.write(', ') 791 element = PyObjectPtr.from_pyobject_ptr(self[i]) 792 element.write_repr(out, visited) 793 out.write(']') 794 795 class PyLongObjectPtr(PyObjectPtr): 796 _typename = 'PyLongObject' 797 798 def proxyval(self, visited): 799 ''' 800 Python's Include/longobjrep.h has this declaration: 801 struct _longobject { 802 PyObject_VAR_HEAD 803 digit ob_digit[1]; 804 }; 805 806 with this description: 807 The absolute value of a number is equal to 808 SUM(for i=0 through abs(ob_size)-1) ob_digit[i] * 2**(SHIFT*i) 809 Negative numbers are represented with ob_size < 0; 810 zero is represented by ob_size == 0. 811 812 where SHIFT can be either: 813 #define PyLong_SHIFT 30 814 #define PyLong_SHIFT 15 815 ''' 816 ob_size = long(self.field('ob_size')) 817 if ob_size == 0: 818 return 0L 819 820 ob_digit = self.field('ob_digit') 821 822 if gdb.lookup_type('digit').sizeof == 2: 823 SHIFT = 15L 824 else: 825 SHIFT = 30L 826 827 digits = [long(ob_digit[i]) * 2**(SHIFT*i) 828 for i in safe_range(abs(ob_size))] 829 result = sum(digits) 830 if ob_size < 0: 831 result = -result 832 return result 833 834 def write_repr(self, out, visited): 835 # Write this out as a Python 3 int literal, i.e. without the "L" suffix 836 proxy = self.proxyval(visited) 837 out.write("%s" % proxy) 838 839 840 class PyBoolObjectPtr(PyLongObjectPtr): 841 """ 842 Class wrapping a gdb.Value that's a PyBoolObject* i.e. one of the two 843 <bool> instances (Py_True/Py_False) within the process being debugged. 844 """ 845 _typename = 'PyBoolObject' 846 847 def proxyval(self, visited): 848 castto = gdb.lookup_type('PyLongObject').pointer() 849 self._gdbval = self._gdbval.cast(castto) 850 return bool(PyLongObjectPtr(self._gdbval).proxyval(visited)) 851 852 853 class PyNoneStructPtr(PyObjectPtr): 854 """ 855 Class wrapping a gdb.Value that's a PyObject* pointing to the 856 singleton (we hope) _Py_NoneStruct with ob_type PyNone_Type 857 """ 858 _typename = 'PyObject' 859 860 def proxyval(self, visited): 861 return None 862 863 864 class PyFrameObjectPtr(PyObjectPtr): 865 _typename = 'PyFrameObject' 866 867 def __init__(self, gdbval, cast_to=None): 868 PyObjectPtr.__init__(self, gdbval, cast_to) 869 870 if not self.is_optimized_out(): 871 self.co = PyCodeObjectPtr.from_pyobject_ptr(self.field('f_code')) 872 self.co_name = self.co.pyop_field('co_name') 873 self.co_filename = self.co.pyop_field('co_filename') 874 875 self.f_lineno = int_from_int(self.field('f_lineno')) 876 self.f_lasti = int_from_int(self.field('f_lasti')) 877 self.co_nlocals = int_from_int(self.co.field('co_nlocals')) 878 self.co_varnames = PyTupleObjectPtr.from_pyobject_ptr(self.co.field('co_varnames')) 879 880 def iter_locals(self): 881 ''' 882 Yield a sequence of (name,value) pairs of PyObjectPtr instances, for 883 the local variables of this frame 884 ''' 885 if self.is_optimized_out(): 886 return 887 888 f_localsplus = self.field('f_localsplus') 889 for i in safe_range(self.co_nlocals): 890 pyop_value = PyObjectPtr.from_pyobject_ptr(f_localsplus[i]) 891 if not pyop_value.is_null(): 892 pyop_name = PyObjectPtr.from_pyobject_ptr(self.co_varnames[i]) 893 yield (pyop_name, pyop_value) 894 895 def iter_globals(self): 896 ''' 897 Yield a sequence of (name,value) pairs of PyObjectPtr instances, for 898 the global variables of this frame 899 ''' 900 if self.is_optimized_out(): 901 return 902 903 pyop_globals = self.pyop_field('f_globals') 904 return pyop_globals.iteritems() 905 906 def iter_builtins(self): 907 ''' 908 Yield a sequence of (name,value) pairs of PyObjectPtr instances, for 909 the builtin variables 910 ''' 911 if self.is_optimized_out(): 912 return 913 914 pyop_builtins = self.pyop_field('f_builtins') 915 return pyop_builtins.iteritems() 916 917 def get_var_by_name(self, name): 918 ''' 919 Look for the named local variable, returning a (PyObjectPtr, scope) pair 920 where scope is a string 'local', 'global', 'builtin' 921 922 If not found, return (None, None) 923 ''' 924 for pyop_name, pyop_value in self.iter_locals(): 925 if name == pyop_name.proxyval(set()): 926 return pyop_value, 'local' 927 for pyop_name, pyop_value in self.iter_globals(): 928 if name == pyop_name.proxyval(set()): 929 return pyop_value, 'global' 930 for pyop_name, pyop_value in self.iter_builtins(): 931 if name == pyop_name.proxyval(set()): 932 return pyop_value, 'builtin' 933 return None, None 934 935 def filename(self): 936 '''Get the path of the current Python source file, as a string''' 937 if self.is_optimized_out(): 938 return '(frame information optimized out)' 939 return self.co_filename.proxyval(set()) 940 941 def current_line_num(self): 942 '''Get current line number as an integer (1-based) 943 944 Translated from PyFrame_GetLineNumber and PyCode_Addr2Line 945 946 See Objects/lnotab_notes.txt 947 ''' 948 if self.is_optimized_out(): 949 return None 950 f_trace = self.field('f_trace') 951 if long(f_trace) != 0: 952 # we have a non-NULL f_trace: 953 return self.f_lineno 954 else: 955 #try: 956 return self.co.addr2line(self.f_lasti) 957 #except ValueError: 958 # return self.f_lineno 959 960 def current_line(self): 961 '''Get the text of the current source line as a string, with a trailing 962 newline character''' 963 if self.is_optimized_out(): 964 return '(frame information optimized out)' 965 filename = self.filename() 966 with open(os_fsencode(filename), 'r') as f: 967 all_lines = f.readlines() 968 # Convert from 1-based current_line_num to 0-based list offset: 969 return all_lines[self.current_line_num()-1] 970 971 def write_repr(self, out, visited): 972 if self.is_optimized_out(): 973 out.write('(frame information optimized out)') 974 return 975 out.write('Frame 0x%x, for file %s, line %i, in %s (' 976 % (self.as_address(), 977 self.co_filename.proxyval(visited), 978 self.current_line_num(), 979 self.co_name.proxyval(visited))) 980 first = True 981 for pyop_name, pyop_value in self.iter_locals(): 982 if not first: 983 out.write(', ') 984 first = False 985 986 out.write(pyop_name.proxyval(visited)) 987 out.write('=') 988 pyop_value.write_repr(out, visited) 989 990 out.write(')') 991 992 class PySetObjectPtr(PyObjectPtr): 993 _typename = 'PySetObject' 994 995 def proxyval(self, visited): 996 # Guard against infinite loops: 997 if self.as_address() in visited: 998 return ProxyAlreadyVisited('%s(...)' % self.safe_tp_name()) 999 visited.add(self.as_address()) 1000 1001 members = [] 1002 table = self.field('table') 1003 for i in safe_range(self.field('mask')+1): 1004 setentry = table[i] 1005 key = setentry['key'] 1006 if key != 0: 1007 key_proxy = PyObjectPtr.from_pyobject_ptr(key).proxyval(visited) 1008 if key_proxy != '<dummy key>': 1009 members.append(key_proxy) 1010 if self.safe_tp_name() == 'frozenset': 1011 return frozenset(members) 1012 else: 1013 return set(members) 1014 1015 def write_repr(self, out, visited): 1016 # Emulate Python 3's set_repr 1017 tp_name = self.safe_tp_name() 1018 1019 # Guard against infinite loops: 1020 if self.as_address() in visited: 1021 out.write('(...)') 1022 return 1023 visited.add(self.as_address()) 1024 1025 # Python 3's set_repr special-cases the empty set: 1026 if not self.field('used'): 1027 out.write(tp_name) 1028 out.write('()') 1029 return 1030 1031 # Python 3 uses {} for set literals: 1032 if tp_name != 'set': 1033 out.write(tp_name) 1034 out.write('(') 1035 1036 out.write('{') 1037 first = True 1038 table = self.field('table') 1039 for i in safe_range(self.field('mask')+1): 1040 setentry = table[i] 1041 key = setentry['key'] 1042 if key != 0: 1043 pyop_key = PyObjectPtr.from_pyobject_ptr(key) 1044 key_proxy = pyop_key.proxyval(visited) # FIXME! 1045 if key_proxy != '<dummy key>': 1046 if not first: 1047 out.write(', ') 1048 first = False 1049 pyop_key.write_repr(out, visited) 1050 out.write('}') 1051 1052 if tp_name != 'set': 1053 out.write(')') 1054 1055 1056 class PyBytesObjectPtr(PyObjectPtr): 1057 _typename = 'PyBytesObject' 1058 1059 def __str__(self): 1060 field_ob_size = self.field('ob_size') 1061 field_ob_sval = self.field('ob_sval') 1062 return ''.join(struct.pack('b', field_ob_sval[i]) 1063 for i in safe_range(field_ob_size)) 1064 1065 def proxyval(self, visited): 1066 return str(self) 1067 1068 def write_repr(self, out, visited, py3=True): 1069 # Write this out as a Python 3 bytes literal, i.e. with a "b" prefix 1070 1071 # Get a PyStringObject* within the Python 2 gdb process: 1072 proxy = self.proxyval(visited) 1073 1074 # Transliteration of Python 3's Objects/bytesobject.c:PyBytes_Repr 1075 # to Python 2 code: 1076 quote = "'" 1077 if "'" in proxy and not '"' in proxy: 1078 quote = '"' 1079 1080 if py3: 1081 out.write('b') 1082 1083 out.write(quote) 1084 for byte in proxy: 1085 if byte == quote or byte == '\\': 1086 out.write('\\') 1087 out.write(byte) 1088 elif byte == '\t': 1089 out.write('\\t') 1090 elif byte == '\n': 1091 out.write('\\n') 1092 elif byte == '\r': 1093 out.write('\\r') 1094 elif byte < ' ' or ord(byte) >= 0x7f: 1095 out.write('\\x') 1096 out.write(hexdigits[(ord(byte) & 0xf0) >> 4]) 1097 out.write(hexdigits[ord(byte) & 0xf]) 1098 else: 1099 out.write(byte) 1100 out.write(quote) 1101 1102 class PyStringObjectPtr(PyBytesObjectPtr): 1103 _typename = 'PyStringObject' 1104 1105 def write_repr(self, out, visited): 1106 return super(PyStringObjectPtr, self).write_repr(out, visited, py3=False) 1107 1108 class PyTupleObjectPtr(PyObjectPtr): 1109 _typename = 'PyTupleObject' 1110 1111 def __getitem__(self, i): 1112 # Get the gdb.Value for the (PyObject*) with the given index: 1113 field_ob_item = self.field('ob_item') 1114 return field_ob_item[i] 1115 1116 def proxyval(self, visited): 1117 # Guard against infinite loops: 1118 if self.as_address() in visited: 1119 return ProxyAlreadyVisited('(...)') 1120 visited.add(self.as_address()) 1121 1122 result = tuple([PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited) 1123 for i in safe_range(int_from_int(self.field('ob_size')))]) 1124 return result 1125 1126 def write_repr(self, out, visited): 1127 # Guard against infinite loops: 1128 if self.as_address() in visited: 1129 out.write('(...)') 1130 return 1131 visited.add(self.as_address()) 1132 1133 out.write('(') 1134 for i in safe_range(int_from_int(self.field('ob_size'))): 1135 if i > 0: 1136 out.write(', ') 1137 element = PyObjectPtr.from_pyobject_ptr(self[i]) 1138 element.write_repr(out, visited) 1139 if self.field('ob_size') == 1: 1140 out.write(',)') 1141 else: 1142 out.write(')') 1143 1144 1145 def _unichr_is_printable(char): 1146 # Logic adapted from Python 3's Tools/unicode/makeunicodedata.py 1147 if char == u" ": 1148 return True 1149 import unicodedata 1150 return unicodedata.category(char) not in ("C", "Z") 1151 1152 if sys.maxunicode >= 0x10000: 1153 _unichr = unichr 1154 else: 1155 # Needed for proper surrogate support if sizeof(Py_UNICODE) is 2 in gdb 1156 def _unichr(x): 1157 if x < 0x10000: 1158 return unichr(x) 1159 x -= 0x10000 1160 ch1 = 0xD800 | (x >> 10) 1161 ch2 = 0xDC00 | (x & 0x3FF) 1162 return unichr(ch1) + unichr(ch2) 1163 1164 class PyUnicodeObjectPtr(PyObjectPtr): 1165 _typename = 'PyUnicodeObject' 1166 1167 def char_width(self): 1168 _type_Py_UNICODE = gdb.lookup_type('Py_UNICODE') 1169 return _type_Py_UNICODE.sizeof 1170 1171 def proxyval(self, visited): 1172 # From unicodeobject.h: 1173 # Py_ssize_t length; /* Length of raw Unicode data in buffer */ 1174 # Py_UNICODE *str; /* Raw Unicode buffer */ 1175 field_length = long(self.field('length')) 1176 field_str = self.field('str') 1177 1178 # Gather a list of ints from the Py_UNICODE array; these are either 1179 # UCS-2 or UCS-4 code points: 1180 if self.char_width() > 2: 1181 Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)] 1182 else: 1183 # A more elaborate routine if sizeof(Py_UNICODE) is 2 in the 1184 # inferior process: we must join surrogate pairs. 1185 Py_UNICODEs = [] 1186 i = 0 1187 limit = safety_limit(field_length) 1188 while i < limit: 1189 ucs = int(field_str[i]) 1190 i += 1 1191 if ucs < 0xD800 or ucs >= 0xDC00 or i == field_length: 1192 Py_UNICODEs.append(ucs) 1193 continue 1194 # This could be a surrogate pair. 1195 ucs2 = int(field_str[i]) 1196 if ucs2 < 0xDC00 or ucs2 > 0xDFFF: 1197 continue 1198 code = (ucs & 0x03FF) << 10 1199 code |= ucs2 & 0x03FF 1200 code += 0x00010000 1201 Py_UNICODEs.append(code) 1202 i += 1 1203 1204 # Convert the int code points to unicode characters, and generate a 1205 # local unicode instance. 1206 # This splits surrogate pairs if sizeof(Py_UNICODE) is 2 here (in gdb). 1207 result = u''.join([_unichr(ucs) for ucs in Py_UNICODEs]) 1208 return result 1209 1210 def write_repr(self, out, visited): 1211 # Get a PyUnicodeObject* within the Python 2 gdb process: 1212 proxy = self.proxyval(visited) 1213 1214 # Transliteration of Python 3's Object/unicodeobject.c:unicode_repr 1215 # to Python 2: 1216 try: 1217 gdb.parse_and_eval('PyString_Type') 1218 except RuntimeError: 1219 # Python 3, don't write 'u' as prefix 1220 pass 1221 else: 1222 # Python 2, write the 'u' 1223 out.write('u') 1224 1225 if "'" in proxy and '"' not in proxy: 1226 quote = '"' 1227 else: 1228 quote = "'" 1229 out.write(quote) 1230 1231 i = 0 1232 while i < len(proxy): 1233 ch = proxy[i] 1234 i += 1 1235 1236 # Escape quotes and backslashes 1237 if ch == quote or ch == '\\': 1238 out.write('\\') 1239 out.write(ch) 1240 1241 # Map special whitespace to '\t', \n', '\r' 1242 elif ch == '\t': 1243 out.write('\\t') 1244 elif ch == '\n': 1245 out.write('\\n') 1246 elif ch == '\r': 1247 out.write('\\r') 1248 1249 # Map non-printable US ASCII to '\xhh' */ 1250 elif ch < ' ' or ch == 0x7F: 1251 out.write('\\x') 1252 out.write(hexdigits[(ord(ch) >> 4) & 0x000F]) 1253 out.write(hexdigits[ord(ch) & 0x000F]) 1254 1255 # Copy ASCII characters as-is 1256 elif ord(ch) < 0x7F: 1257 out.write(ch) 1258 1259 # Non-ASCII characters 1260 else: 1261 ucs = ch 1262 ch2 = None 1263 if sys.maxunicode < 0x10000: 1264 # If sizeof(Py_UNICODE) is 2 here (in gdb), join 1265 # surrogate pairs before calling _unichr_is_printable. 1266 if (i < len(proxy) 1267 and 0xD800 <= ord(ch) < 0xDC00 \ 1268 and 0xDC00 <= ord(proxy[i]) <= 0xDFFF): 1269 ch2 = proxy[i] 1270 ucs = ch + ch2 1271 i += 1 1272 1273 # Unfortuately, Python 2's unicode type doesn't seem 1274 # to expose the "isprintable" method 1275 printable = _unichr_is_printable(ucs) 1276 if printable: 1277 try: 1278 ucs.encode(ENCODING) 1279 except UnicodeEncodeError: 1280 printable = False 1281 1282 # Map Unicode whitespace and control characters 1283 # (categories Z* and C* except ASCII space) 1284 if not printable: 1285 if ch2 is not None: 1286 # Match Python 3's representation of non-printable 1287 # wide characters. 1288 code = (ord(ch) & 0x03FF) << 10 1289 code |= ord(ch2) & 0x03FF 1290 code += 0x00010000 1291 else: 1292 code = ord(ucs) 1293 1294 # Map 8-bit characters to '\\xhh' 1295 if code <= 0xff: 1296 out.write('\\x') 1297 out.write(hexdigits[(code >> 4) & 0x000F]) 1298 out.write(hexdigits[code & 0x000F]) 1299 # Map 21-bit characters to '\U00xxxxxx' 1300 elif code >= 0x10000: 1301 out.write('\\U') 1302 out.write(hexdigits[(code >> 28) & 0x0000000F]) 1303 out.write(hexdigits[(code >> 24) & 0x0000000F]) 1304 out.write(hexdigits[(code >> 20) & 0x0000000F]) 1305 out.write(hexdigits[(code >> 16) & 0x0000000F]) 1306 out.write(hexdigits[(code >> 12) & 0x0000000F]) 1307 out.write(hexdigits[(code >> 8) & 0x0000000F]) 1308 out.write(hexdigits[(code >> 4) & 0x0000000F]) 1309 out.write(hexdigits[code & 0x0000000F]) 1310 # Map 16-bit characters to '\uxxxx' 1311 else: 1312 out.write('\\u') 1313 out.write(hexdigits[(code >> 12) & 0x000F]) 1314 out.write(hexdigits[(code >> 8) & 0x000F]) 1315 out.write(hexdigits[(code >> 4) & 0x000F]) 1316 out.write(hexdigits[code & 0x000F]) 1317 else: 1318 # Copy characters as-is 1319 out.write(ch) 1320 if ch2 is not None: 1321 out.write(ch2) 1322 1323 out.write(quote) 1324 1325 def __unicode__(self): 1326 return self.proxyval(set()) 1327 1328 def __str__(self): 1329 # In Python 3, everything is unicode (including attributes of e.g. 1330 # code objects, such as function names). The Python 2 debugger code 1331 # uses PyUnicodePtr objects to format strings etc, whereas with a 1332 # Python 2 debuggee we'd get PyStringObjectPtr instances with __str__. 1333 # Be compatible with that. 1334 return unicode(self).encode('UTF-8') 1335 1336 def int_from_int(gdbval): 1337 return int(str(gdbval)) 1338 1339 1340 def stringify(val): 1341 # TODO: repr() puts everything on one line; pformat can be nicer, but 1342 # can lead to v.long results; this function isolates the choice 1343 if True: 1344 return repr(val) 1345 else: 1346 from pprint import pformat 1347 return pformat(val) 1348 1349 1350 class PyObjectPtrPrinter: 1351 "Prints a (PyObject*)" 1352 1353 def __init__ (self, gdbval): 1354 self.gdbval = gdbval 1355 1356 def to_string (self): 1357 pyop = PyObjectPtr.from_pyobject_ptr(self.gdbval) 1358 if True: 1359 return pyop.get_truncated_repr(MAX_OUTPUT_LEN) 1360 else: 1361 # Generate full proxy value then stringify it. 1362 # Doing so could be expensive 1363 proxyval = pyop.proxyval(set()) 1364 return stringify(proxyval) 1365 1366 def pretty_printer_lookup(gdbval): 1367 type = gdbval.type.unqualified() 1368 if type.code == gdb.TYPE_CODE_PTR: 1369 type = type.target().unqualified() 1370 if str(type) in all_pretty_typenames: 1371 return PyObjectPtrPrinter(gdbval) 1372 1373 """ 1374 During development, I've been manually invoking the code in this way: 1375 (gdb) python 1376 1377 import sys 1378 sys.path.append('/home/david/coding/python-gdb') 1379 import libpython 1380 end 1381 1382 then reloading it after each edit like this: 1383 (gdb) python reload(libpython) 1384 1385 The following code should ensure that the prettyprinter is registered 1386 if the code is autoloaded by gdb when visiting libpython.so, provided 1387 that this python file is installed to the same path as the library (or its 1388 .debug file) plus a "-gdb.py" suffix, e.g: 1389 /usr/lib/libpython2.6.so.1.0-gdb.py 1390 /usr/lib/debug/usr/lib/libpython2.6.so.1.0.debug-gdb.py 1391 """ 1392 def register (obj): 1393 if obj == None: 1394 obj = gdb 1395 1396 # Wire up the pretty-printer 1397 obj.pretty_printers.append(pretty_printer_lookup) 1398 1399 register (gdb.current_objfile ()) 1400 1401 # Unfortunately, the exact API exposed by the gdb module varies somewhat 1402 # from build to build 1403 # See http://bugs.python.org/issue8279?#msg102276 1404 1405 class Frame(object): 1406 ''' 1407 Wrapper for gdb.Frame, adding various methods 1408 ''' 1409 def __init__(self, gdbframe): 1410 self._gdbframe = gdbframe 1411 1412 def older(self): 1413 older = self._gdbframe.older() 1414 if older: 1415 return Frame(older) 1416 else: 1417 return None 1418 1419 def newer(self): 1420 newer = self._gdbframe.newer() 1421 if newer: 1422 return Frame(newer) 1423 else: 1424 return None 1425 1426 def select(self): 1427 '''If supported, select this frame and return True; return False if unsupported 1428 1429 Not all builds have a gdb.Frame.select method; seems to be present on Fedora 12 1430 onwards, but absent on Ubuntu buildbot''' 1431 if not hasattr(self._gdbframe, 'select'): 1432 print ('Unable to select frame: ' 1433 'this build of gdb does not expose a gdb.Frame.select method') 1434 return False 1435 self._gdbframe.select() 1436 return True 1437 1438 def get_index(self): 1439 '''Calculate index of frame, starting at 0 for the newest frame within 1440 this thread''' 1441 index = 0 1442 # Go down until you reach the newest frame: 1443 iter_frame = self 1444 while iter_frame.newer(): 1445 index += 1 1446 iter_frame = iter_frame.newer() 1447 return index 1448 1449 def is_evalframeex(self): 1450 '''Is this a PyEval_EvalFrameEx frame?''' 1451 if self._gdbframe.name() == 'PyEval_EvalFrameEx': 1452 ''' 1453 I believe we also need to filter on the inline 1454 struct frame_id.inline_depth, only regarding frames with 1455 an inline depth of 0 as actually being this function 1456 1457 So we reject those with type gdb.INLINE_FRAME 1458 ''' 1459 if self._gdbframe.type() == gdb.NORMAL_FRAME: 1460 # We have a PyEval_EvalFrameEx frame: 1461 return True 1462 1463 return False 1464 1465 def read_var(self, varname): 1466 """ 1467 read_var with respect to code blocks (gdbframe.read_var works with 1468 respect to the most recent block) 1469 1470 Apparently this function doesn't work, though, as it seems to read 1471 variables in other frames also sometimes. 1472 """ 1473 block = self._gdbframe.block() 1474 var = None 1475 1476 while block and var is None: 1477 try: 1478 var = self._gdbframe.read_var(varname, block) 1479 except ValueError: 1480 pass 1481 1482 block = block.superblock 1483 1484 return var 1485 1486 def get_pyop(self): 1487 try: 1488 # self.read_var does not always work properly, so select our frame 1489 # and restore the previously selected frame 1490 selected_frame = gdb.selected_frame() 1491 self._gdbframe.select() 1492 f = gdb.parse_and_eval('f') 1493 selected_frame.select() 1494 except RuntimeError: 1495 return None 1496 else: 1497 return PyFrameObjectPtr.from_pyobject_ptr(f) 1498 1499 @classmethod 1500 def get_selected_frame(cls): 1501 _gdbframe = gdb.selected_frame() 1502 if _gdbframe: 1503 return Frame(_gdbframe) 1504 return None 1505 1506 @classmethod 1507 def get_selected_python_frame(cls): 1508 '''Try to obtain the Frame for the python code in the selected frame, 1509 or None''' 1510 frame = cls.get_selected_frame() 1511 1512 while frame: 1513 if frame.is_evalframeex(): 1514 return frame 1515 frame = frame.older() 1516 1517 # Not found: 1518 return None 1519 1520 def print_summary(self): 1521 if self.is_evalframeex(): 1522 pyop = self.get_pyop() 1523 if pyop: 1524 line = pyop.get_truncated_repr(MAX_OUTPUT_LEN) 1525 write_unicode(sys.stdout, '#%i %s\n' % (self.get_index(), line)) 1526 sys.stdout.write(pyop.current_line()) 1527 else: 1528 sys.stdout.write('#%i (unable to read python frame information)\n' % self.get_index()) 1529 else: 1530 sys.stdout.write('#%i\n' % self.get_index()) 1531 1532 class PyList(gdb.Command): 1533 '''List the current Python source code, if any 1534 1535 Use 1536 py-list START 1537 to list at a different line number within the python source. 1538 1539 Use 1540 py-list START, END 1541 to list a specific range of lines within the python source. 1542 ''' 1543 1544 def __init__(self): 1545 gdb.Command.__init__ (self, 1546 "py-list", 1547 gdb.COMMAND_FILES, 1548 gdb.COMPLETE_NONE) 1549 1550 1551 def invoke(self, args, from_tty): 1552 import re 1553 1554 start = None 1555 end = None 1556 1557 m = re.match(r'\s*(\d+)\s*', args) 1558 if m: 1559 start = int(m.group(0)) 1560 end = start + 10 1561 1562 m = re.match(r'\s*(\d+)\s*,\s*(\d+)\s*', args) 1563 if m: 1564 start, end = map(int, m.groups()) 1565 1566 frame = Frame.get_selected_python_frame() 1567 if not frame: 1568 print 'Unable to locate python frame' 1569 return 1570 1571 pyop = frame.get_pyop() 1572 if not pyop: 1573 print 'Unable to read information on python frame' 1574 return 1575 1576 filename = pyop.filename() 1577 lineno = pyop.current_line_num() 1578 1579 if start is None: 1580 start = lineno - 5 1581 end = lineno + 5 1582 1583 if start<1: 1584 start = 1 1585 1586 with open(os_fsencode(filename), 'r') as f: 1587 all_lines = f.readlines() 1588 # start and end are 1-based, all_lines is 0-based; 1589 # so [start-1:end] as a python slice gives us [start, end] as a 1590 # closed interval 1591 for i, line in enumerate(all_lines[start-1:end]): 1592 linestr = str(i+start) 1593 # Highlight current line: 1594 if i + start == lineno: 1595 linestr = '>' + linestr 1596 sys.stdout.write('%4s %s' % (linestr, line)) 1597 1598 1599 # ...and register the command: 1600 PyList() 1601 1602 def move_in_stack(move_up): 1603 '''Move up or down the stack (for the py-up/py-down command)''' 1604 frame = Frame.get_selected_python_frame() 1605 while frame: 1606 if move_up: 1607 iter_frame = frame.older() 1608 else: 1609 iter_frame = frame.newer() 1610 1611 if not iter_frame: 1612 break 1613 1614 if iter_frame.is_evalframeex(): 1615 # Result: 1616 if iter_frame.select(): 1617 iter_frame.print_summary() 1618 return 1619 1620 frame = iter_frame 1621 1622 if move_up: 1623 print 'Unable to find an older python frame' 1624 else: 1625 print 'Unable to find a newer python frame' 1626 1627 class PyUp(gdb.Command): 1628 'Select and print the python stack frame that called this one (if any)' 1629 def __init__(self): 1630 gdb.Command.__init__ (self, 1631 "py-up", 1632 gdb.COMMAND_STACK, 1633 gdb.COMPLETE_NONE) 1634 1635 1636 def invoke(self, args, from_tty): 1637 move_in_stack(move_up=True) 1638 1639 class PyDown(gdb.Command): 1640 'Select and print the python stack frame called by this one (if any)' 1641 def __init__(self): 1642 gdb.Command.__init__ (self, 1643 "py-down", 1644 gdb.COMMAND_STACK, 1645 gdb.COMPLETE_NONE) 1646 1647 1648 def invoke(self, args, from_tty): 1649 move_in_stack(move_up=False) 1650 1651 # Not all builds of gdb have gdb.Frame.select 1652 if hasattr(gdb.Frame, 'select'): 1653 PyUp() 1654 PyDown() 1655 1656 class PyBacktrace(gdb.Command): 1657 'Display the current python frame and all the frames within its call stack (if any)' 1658 def __init__(self): 1659 gdb.Command.__init__ (self, 1660 "py-bt", 1661 gdb.COMMAND_STACK, 1662 gdb.COMPLETE_NONE) 1663 1664 1665 def invoke(self, args, from_tty): 1666 frame = Frame.get_selected_python_frame() 1667 while frame: 1668 if frame.is_evalframeex(): 1669 frame.print_summary() 1670 frame = frame.older() 1671 1672 PyBacktrace() 1673 1674 class PyPrint(gdb.Command): 1675 'Look up the given python variable name, and print it' 1676 def __init__(self): 1677 gdb.Command.__init__ (self, 1678 "py-print", 1679 gdb.COMMAND_DATA, 1680 gdb.COMPLETE_NONE) 1681 1682 1683 def invoke(self, args, from_tty): 1684 name = str(args) 1685 1686 frame = Frame.get_selected_python_frame() 1687 if not frame: 1688 print 'Unable to locate python frame' 1689 return 1690 1691 pyop_frame = frame.get_pyop() 1692 if not pyop_frame: 1693 print 'Unable to read information on python frame' 1694 return 1695 1696 pyop_var, scope = pyop_frame.get_var_by_name(name) 1697 1698 if pyop_var: 1699 print ('%s %r = %s' 1700 % (scope, 1701 name, 1702 pyop_var.get_truncated_repr(MAX_OUTPUT_LEN))) 1703 else: 1704 print '%r not found' % name 1705 1706 PyPrint() 1707 1708 class PyLocals(gdb.Command): 1709 'Look up the given python variable name, and print it' 1710 1711 def invoke(self, args, from_tty): 1712 name = str(args) 1713 1714 frame = Frame.get_selected_python_frame() 1715 if not frame: 1716 print 'Unable to locate python frame' 1717 return 1718 1719 pyop_frame = frame.get_pyop() 1720 if not pyop_frame: 1721 print 'Unable to read information on python frame' 1722 return 1723 1724 namespace = self.get_namespace(pyop_frame) 1725 namespace = [(name.proxyval(set()), val) for name, val in namespace] 1726 1727 if namespace: 1728 name, val = max(namespace, key=lambda (name, val): len(name)) 1729 max_name_length = len(name) 1730 1731 for name, pyop_value in namespace: 1732 value = pyop_value.get_truncated_repr(MAX_OUTPUT_LEN) 1733 print ('%-*s = %s' % (max_name_length, name, value)) 1734 1735 def get_namespace(self, pyop_frame): 1736 return pyop_frame.iter_locals() 1737 1738 1739 class PyGlobals(PyLocals): 1740 'List all the globals in the currently select Python frame' 1741 1742 def get_namespace(self, pyop_frame): 1743 return pyop_frame.iter_globals() 1744 1745 1746 PyLocals("py-locals", gdb.COMMAND_DATA, gdb.COMPLETE_NONE) 1747 PyGlobals("py-globals", gdb.COMMAND_DATA, gdb.COMPLETE_NONE) 1748 1749 1750 class PyNameEquals(gdb.Function): 1751 1752 def _get_pycurframe_attr(self, attr): 1753 frame = Frame(gdb.selected_frame()) 1754 if frame.is_evalframeex(): 1755 pyframe = frame.get_pyop() 1756 if pyframe is None: 1757 warnings.warn("Use a Python debug build, Python breakpoints " 1758 "won't work otherwise.") 1759 return None 1760 1761 return getattr(pyframe, attr).proxyval(set()) 1762 1763 return None 1764 1765 def invoke(self, funcname): 1766 attr = self._get_pycurframe_attr('co_name') 1767 return attr is not None and attr == funcname.string() 1768 1769 PyNameEquals("pyname_equals") 1770 1771 1772 class PyModEquals(PyNameEquals): 1773 1774 def invoke(self, modname): 1775 attr = self._get_pycurframe_attr('co_filename') 1776 if attr is not None: 1777 filename, ext = os.path.splitext(os.path.basename(attr)) 1778 return filename == modname.string() 1779 return False 1780 1781 PyModEquals("pymod_equals") 1782 1783 1784 class PyBreak(gdb.Command): 1785 """ 1786 Set a Python breakpoint. Examples: 1787 1788 Break on any function or method named 'func' in module 'modname' 1789 1790 py-break modname.func 1791 1792 Break on any function or method named 'func' 1793 1794 py-break func 1795 """ 1796 1797 def invoke(self, funcname, from_tty): 1798 if '.' in funcname: 1799 modname, dot, funcname = funcname.rpartition('.') 1800 cond = '$pyname_equals("%s") && $pymod_equals("%s")' % (funcname, 1801 modname) 1802 else: 1803 cond = '$pyname_equals("%s")' % funcname 1804 1805 gdb.execute('break PyEval_EvalFrameEx if ' + cond) 1806 1807 PyBreak("py-break", gdb.COMMAND_RUNNING, gdb.COMPLETE_NONE) 1808 1809 1810 class _LoggingState(object): 1811 """ 1812 State that helps to provide a reentrant gdb.execute() function. 1813 """ 1814 1815 def __init__(self): 1816 self.fd, self.filename = tempfile.mkstemp() 1817 self.file = os.fdopen(self.fd, 'r+') 1818 _execute("set logging file %s" % self.filename) 1819 self.file_position_stack = [] 1820 1821 atexit.register(os.close, self.fd) 1822 atexit.register(os.remove, self.filename) 1823 1824 def __enter__(self): 1825 if not self.file_position_stack: 1826 _execute("set logging redirect on") 1827 _execute("set logging on") 1828 _execute("set pagination off") 1829 1830 self.file_position_stack.append(os.fstat(self.fd).st_size) 1831 return self 1832 1833 def getoutput(self): 1834 gdb.flush() 1835 self.file.seek(self.file_position_stack[-1]) 1836 result = self.file.read() 1837 return result 1838 1839 def __exit__(self, exc_type, exc_val, tb): 1840 startpos = self.file_position_stack.pop() 1841 self.file.seek(startpos) 1842 self.file.truncate() 1843 if not self.file_position_stack: 1844 _execute("set logging off") 1845 _execute("set logging redirect off") 1846 _execute("set pagination on") 1847 1848 1849 def execute(command, from_tty=False, to_string=False): 1850 """ 1851 Replace gdb.execute() with this function and have it accept a 'to_string' 1852 argument (new in 7.2). Have it properly capture stderr also. Ensure 1853 reentrancy. 1854 """ 1855 if to_string: 1856 with _logging_state as state: 1857 _execute(command, from_tty) 1858 return state.getoutput() 1859 else: 1860 _execute(command, from_tty) 1861 1862 1863 _execute = gdb.execute 1864 gdb.execute = execute 1865 _logging_state = _LoggingState() 1866 1867 1868 def get_selected_inferior(): 1869 """ 1870 Return the selected inferior in gdb. 1871 """ 1872 # Woooh, another bug in gdb! Is there an end in sight? 1873 # http://sourceware.org/bugzilla/show_bug.cgi?id=12212 1874 return gdb.inferiors()[0] 1875 1876 selected_thread = gdb.selected_thread() 1877 1878 for inferior in gdb.inferiors(): 1879 for thread in inferior.threads(): 1880 if thread == selected_thread: 1881 return inferior 1882 1883 def source_gdb_script(script_contents, to_string=False): 1884 """ 1885 Source a gdb script with script_contents passed as a string. This is useful 1886 to provide defines for py-step and py-next to make them repeatable (this is 1887 not possible with gdb.execute()). See 1888 http://sourceware.org/bugzilla/show_bug.cgi?id=12216 1889 """ 1890 fd, filename = tempfile.mkstemp() 1891 f = os.fdopen(fd, 'w') 1892 f.write(script_contents) 1893 f.close() 1894 gdb.execute("source %s" % filename, to_string=to_string) 1895 os.remove(filename) 1896 1897 def register_defines(): 1898 source_gdb_script(textwrap.dedent("""\ 1899 define py-step 1900 -py-step 1901 end 1902 1903 define py-next 1904 -py-next 1905 end 1906 1907 document py-step 1908 %s 1909 end 1910 1911 document py-next 1912 %s 1913 end 1914 """) % (PyStep.__doc__, PyNext.__doc__)) 1915 1916 1917 def stackdepth(frame): 1918 "Tells the stackdepth of a gdb frame." 1919 depth = 0 1920 while frame: 1921 frame = frame.older() 1922 depth += 1 1923 1924 return depth 1925 1926 class ExecutionControlCommandBase(gdb.Command): 1927 """ 1928 Superclass for language specific execution control. Language specific 1929 features should be implemented by lang_info using the LanguageInfo 1930 interface. 'name' is the name of the command. 1931 """ 1932 1933 def __init__(self, name, lang_info): 1934 super(ExecutionControlCommandBase, self).__init__( 1935 name, gdb.COMMAND_RUNNING, gdb.COMPLETE_NONE) 1936 self.lang_info = lang_info 1937 1938 def install_breakpoints(self): 1939 all_locations = itertools.chain( 1940 self.lang_info.static_break_functions(), 1941 self.lang_info.runtime_break_functions()) 1942 1943 for location in all_locations: 1944 result = gdb.execute('break %s' % location, to_string=True) 1945 yield re.search(r'Breakpoint (\d+)', result).group(1) 1946 1947 def delete_breakpoints(self, breakpoint_list): 1948 for bp in breakpoint_list: 1949 gdb.execute("delete %s" % bp) 1950 1951 def filter_output(self, result): 1952 reflags = re.MULTILINE 1953 1954 output_on_halt = [ 1955 (r'^Program received signal .*', reflags|re.DOTALL), 1956 (r'.*[Ww]arning.*', 0), 1957 (r'^Program exited .*', reflags), 1958 ] 1959 1960 output_always = [ 1961 # output when halting on a watchpoint 1962 (r'^(Old|New) value = .*', reflags), 1963 # output from the 'display' command 1964 (r'^\d+: \w+ = .*', reflags), 1965 ] 1966 1967 def filter_output(regexes): 1968 output = [] 1969 for regex, flags in regexes: 1970 for match in re.finditer(regex, result, flags): 1971 output.append(match.group(0)) 1972 1973 return '\n'.join(output) 1974 1975 # Filter the return value output of the 'finish' command 1976 match_finish = re.search(r'^Value returned is \$\d+ = (.*)', result, 1977 re.MULTILINE) 1978 if match_finish: 1979 finish_output = 'Value returned: %s\n' % match_finish.group(1) 1980 else: 1981 finish_output = '' 1982 1983 return (filter_output(output_on_halt), 1984 finish_output + filter_output(output_always)) 1985 1986 1987 def stopped(self): 1988 return get_selected_inferior().pid == 0 1989 1990 def finish_executing(self, result): 1991 """ 1992 After doing some kind of code running in the inferior, print the line 1993 of source code or the result of the last executed gdb command (passed 1994 in as the `result` argument). 1995 """ 1996 output_on_halt, output_always = self.filter_output(result) 1997 1998 if self.stopped(): 1999 print output_always 2000 print output_on_halt 2001 else: 2002 frame = gdb.selected_frame() 2003 source_line = self.lang_info.get_source_line(frame) 2004 if self.lang_info.is_relevant_function(frame): 2005 raised_exception = self.lang_info.exc_info(frame) 2006 if raised_exception: 2007 print raised_exception 2008 2009 if source_line: 2010 if output_always.rstrip(): 2011 print output_always.rstrip() 2012 print source_line 2013 else: 2014 print result 2015 2016 def _finish(self): 2017 """ 2018 Execute until the function returns (or until something else makes it 2019 stop) 2020 """ 2021 if gdb.selected_frame().older() is not None: 2022 return gdb.execute('finish', to_string=True) 2023 else: 2024 # outermost frame, continue 2025 return gdb.execute('cont', to_string=True) 2026 2027 def _finish_frame(self): 2028 """ 2029 Execute until the function returns to a relevant caller. 2030 """ 2031 while True: 2032 result = self._finish() 2033 2034 try: 2035 frame = gdb.selected_frame() 2036 except RuntimeError: 2037 break 2038 2039 hitbp = re.search(r'Breakpoint (\d+)', result) 2040 is_relevant = self.lang_info.is_relevant_function(frame) 2041 if hitbp or is_relevant or self.stopped(): 2042 break 2043 2044 return result 2045 2046 def finish(self, *args): 2047 "Implements the finish command." 2048 result = self._finish_frame() 2049 self.finish_executing(result) 2050 2051 def step(self, stepinto, stepover_command='next'): 2052 """ 2053 Do a single step or step-over. Returns the result of the last gdb 2054 command that made execution stop. 2055 2056 This implementation, for stepping, sets (conditional) breakpoints for 2057 all functions that are deemed relevant. It then does a step over until 2058 either something halts execution, or until the next line is reached. 2059 2060 If, however, stepover_command is given, it should be a string gdb 2061 command that continues execution in some way. The idea is that the 2062 caller has set a (conditional) breakpoint or watchpoint that can work 2063 more efficiently than the step-over loop. For Python this means setting 2064 a watchpoint for f->f_lasti, which means we can then subsequently 2065 "finish" frames. 2066 We want f->f_lasti instead of f->f_lineno, because the latter only 2067 works properly with local trace functions, see 2068 PyFrameObjectPtr.current_line_num and PyFrameObjectPtr.addr2line. 2069 """ 2070 if stepinto: 2071 breakpoint_list = list(self.install_breakpoints()) 2072 2073 beginframe = gdb.selected_frame() 2074 2075 if self.lang_info.is_relevant_function(beginframe): 2076 # If we start in a relevant frame, initialize stuff properly. If 2077 # we don't start in a relevant frame, the loop will halt 2078 # immediately. So don't call self.lang_info.lineno() as it may 2079 # raise for irrelevant frames. 2080 beginline = self.lang_info.lineno(beginframe) 2081 2082 if not stepinto: 2083 depth = stackdepth(beginframe) 2084 2085 newframe = beginframe 2086 2087 while True: 2088 if self.lang_info.is_relevant_function(newframe): 2089 result = gdb.execute(stepover_command, to_string=True) 2090 else: 2091 result = self._finish_frame() 2092 2093 if self.stopped(): 2094 break 2095 2096 newframe = gdb.selected_frame() 2097 is_relevant_function = self.lang_info.is_relevant_function(newframe) 2098 try: 2099 framename = newframe.name() 2100 except RuntimeError: 2101 framename = None 2102 2103 m = re.search(r'Breakpoint (\d+)', result) 2104 if m: 2105 if is_relevant_function and m.group(1) in breakpoint_list: 2106 # although we hit a breakpoint, we still need to check 2107 # that the function, in case hit by a runtime breakpoint, 2108 # is in the right context 2109 break 2110 2111 if newframe != beginframe: 2112 # new function 2113 2114 if not stepinto: 2115 # see if we returned to the caller 2116 newdepth = stackdepth(newframe) 2117 is_relevant_function = (newdepth < depth and 2118 is_relevant_function) 2119 2120 if is_relevant_function: 2121 break 2122 else: 2123 # newframe equals beginframe, check for a difference in the 2124 # line number 2125 lineno = self.lang_info.lineno(newframe) 2126 if lineno and lineno != beginline: 2127 break 2128 2129 if stepinto: 2130 self.delete_breakpoints(breakpoint_list) 2131 2132 self.finish_executing(result) 2133 2134 def run(self, args, from_tty): 2135 self.finish_executing(gdb.execute('run ' + args, to_string=True)) 2136 2137 def cont(self, *args): 2138 self.finish_executing(gdb.execute('cont', to_string=True)) 2139 2140 2141 class LanguageInfo(object): 2142 """ 2143 This class defines the interface that ExecutionControlCommandBase needs to 2144 provide language-specific execution control. 2145 2146 Classes that implement this interface should implement: 2147 2148 lineno(frame) 2149 Tells the current line number (only called for a relevant frame). 2150 If lineno is a false value it is not checked for a difference. 2151 2152 is_relevant_function(frame) 2153 tells whether we care about frame 'frame' 2154 2155 get_source_line(frame) 2156 get the line of source code for the current line (only called for a 2157 relevant frame). If the source code cannot be retrieved this 2158 function should return None 2159 2160 exc_info(frame) -- optional 2161 tells whether an exception was raised, if so, it should return a 2162 string representation of the exception value, None otherwise. 2163 2164 static_break_functions() 2165 returns an iterable of function names that are considered relevant 2166 and should halt step-into execution. This is needed to provide a 2167 performing step-into 2168 2169 runtime_break_functions() -- optional 2170 list of functions that we should break into depending on the 2171 context 2172 """ 2173 2174 def exc_info(self, frame): 2175 "See this class' docstring." 2176 2177 def runtime_break_functions(self): 2178 """ 2179 Implement this if the list of step-into functions depends on the 2180 context. 2181 """ 2182 return () 2183 2184 class PythonInfo(LanguageInfo): 2185 2186 def pyframe(self, frame): 2187 pyframe = Frame(frame).get_pyop() 2188 if pyframe: 2189 return pyframe 2190 else: 2191 raise gdb.RuntimeError( 2192 "Unable to find the Python frame, run your code with a debug " 2193 "build (configure with --with-pydebug or compile with -g).") 2194 2195 def lineno(self, frame): 2196 return self.pyframe(frame).current_line_num() 2197 2198 def is_relevant_function(self, frame): 2199 return Frame(frame).is_evalframeex() 2200 2201 def get_source_line(self, frame): 2202 try: 2203 pyframe = self.pyframe(frame) 2204 return '%4d %s' % (pyframe.current_line_num(), 2205 pyframe.current_line().rstrip()) 2206 except IOError, e: 2207 return None 2208 2209 def exc_info(self, frame): 2210 try: 2211 tstate = frame.read_var('tstate').dereference() 2212 if gdb.parse_and_eval('tstate->frame == f'): 2213 # tstate local variable initialized, check for an exception 2214 inf_type = tstate['curexc_type'] 2215 inf_value = tstate['curexc_value'] 2216 2217 if inf_type: 2218 return 'An exception was raised: %s' % (inf_value,) 2219 except (ValueError, RuntimeError), e: 2220 # Could not read the variable tstate or it's memory, it's ok 2221 pass 2222 2223 def static_break_functions(self): 2224 yield 'PyEval_EvalFrameEx' 2225 2226 2227 class PythonStepperMixin(object): 2228 """ 2229 Make this a mixin so CyStep can also inherit from this and use a 2230 CythonCodeStepper at the same time. 2231 """ 2232 2233 def python_step(self, stepinto): 2234 """ 2235 Set a watchpoint on the Python bytecode instruction pointer and try 2236 to finish the frame 2237 """ 2238 output = gdb.execute('watch f->f_lasti', to_string=True) 2239 watchpoint = int(re.search(r'[Ww]atchpoint (\d+):', output).group(1)) 2240 self.step(stepinto=stepinto, stepover_command='finish') 2241 gdb.execute('delete %s' % watchpoint) 2242 2243 2244 class PyStep(ExecutionControlCommandBase, PythonStepperMixin): 2245 "Step through Python code." 2246 2247 stepinto = True 2248 2249 def invoke(self, args, from_tty): 2250 self.python_step(stepinto=self.stepinto) 2251 2252 class PyNext(PyStep): 2253 "Step-over Python code." 2254 2255 stepinto = False 2256 2257 class PyFinish(ExecutionControlCommandBase): 2258 "Execute until function returns to a caller." 2259 2260 invoke = ExecutionControlCommandBase.finish 2261 2262 class PyRun(ExecutionControlCommandBase): 2263 "Run the program." 2264 2265 invoke = ExecutionControlCommandBase.run 2266 2267 class PyCont(ExecutionControlCommandBase): 2268 2269 invoke = ExecutionControlCommandBase.cont 2270 2271 2272 def _pointervalue(gdbval): 2273 """ 2274 Return the value of the pionter as a Python int. 2275 2276 gdbval.type must be a pointer type 2277 """ 2278 # don't convert with int() as it will raise a RuntimeError 2279 if gdbval.address is not None: 2280 return long(gdbval.address) 2281 else: 2282 # the address attribute is None sometimes, in which case we can 2283 # still convert the pointer to an int 2284 return long(gdbval) 2285 2286 def pointervalue(gdbval): 2287 pointer = _pointervalue(gdbval) 2288 try: 2289 if pointer < 0: 2290 raise gdb.GdbError("Negative pointer value, presumably a bug " 2291 "in gdb, aborting.") 2292 except RuntimeError: 2293 # work around yet another bug in gdb where you get random behaviour 2294 # and tracebacks 2295 pass 2296 2297 return pointer 2298 2299 def get_inferior_unicode_postfix(): 2300 try: 2301 gdb.parse_and_eval('PyUnicode_FromEncodedObject') 2302 except RuntimeError: 2303 try: 2304 gdb.parse_and_eval('PyUnicodeUCS2_FromEncodedObject') 2305 except RuntimeError: 2306 return 'UCS4' 2307 else: 2308 return 'UCS2' 2309 else: 2310 return '' 2311 2312 class PythonCodeExecutor(object): 2313 2314 Py_single_input = 256 2315 Py_file_input = 257 2316 Py_eval_input = 258 2317 2318 def malloc(self, size): 2319 chunk = (gdb.parse_and_eval("(void *) malloc((size_t) %d)" % size)) 2320 2321 pointer = pointervalue(chunk) 2322 if pointer == 0: 2323 raise gdb.GdbError("No memory could be allocated in the inferior.") 2324 2325 return pointer 2326 2327 def alloc_string(self, string): 2328 pointer = self.malloc(len(string)) 2329 get_selected_inferior().write_memory(pointer, string) 2330 2331 return pointer 2332 2333 def alloc_pystring(self, string): 2334 stringp = self.alloc_string(string) 2335 PyString_FromStringAndSize = 'PyString_FromStringAndSize' 2336 2337 try: 2338 gdb.parse_and_eval(PyString_FromStringAndSize) 2339 except RuntimeError: 2340 # Python 3 2341 PyString_FromStringAndSize = ('PyUnicode%s_FromStringAndSize' % 2342 (get_inferior_unicode_postfix(),)) 2343 2344 try: 2345 result = gdb.parse_and_eval( 2346 '(PyObject *) %s((char *) %d, (size_t) %d)' % ( 2347 PyString_FromStringAndSize, stringp, len(string))) 2348 finally: 2349 self.free(stringp) 2350 2351 pointer = pointervalue(result) 2352 if pointer == 0: 2353 raise gdb.GdbError("Unable to allocate Python string in " 2354 "the inferior.") 2355 2356 return pointer 2357 2358 def free(self, pointer): 2359 gdb.parse_and_eval("free((void *) %d)" % pointer) 2360 2361 def incref(self, pointer): 2362 "Increment the reference count of a Python object in the inferior." 2363 gdb.parse_and_eval('Py_IncRef((PyObject *) %d)' % pointer) 2364 2365 def xdecref(self, pointer): 2366 "Decrement the reference count of a Python object in the inferior." 2367 # Py_DecRef is like Py_XDECREF, but a function. So we don't have 2368 # to check for NULL. This should also decref all our allocated 2369 # Python strings. 2370 gdb.parse_and_eval('Py_DecRef((PyObject *) %d)' % pointer) 2371 2372 def evalcode(self, code, input_type, global_dict=None, local_dict=None): 2373 """ 2374 Evaluate python code `code` given as a string in the inferior and 2375 return the result as a gdb.Value. Returns a new reference in the 2376 inferior. 2377 2378 Of course, executing any code in the inferior may be dangerous and may 2379 leave the debuggee in an unsafe state or terminate it alltogether. 2380 """ 2381 if '\0' in code: 2382 raise gdb.GdbError("String contains NUL byte.") 2383 2384 code += '\0' 2385 2386 pointer = self.alloc_string(code) 2387 2388 globalsp = pointervalue(global_dict) 2389 localsp = pointervalue(local_dict) 2390 2391 if globalsp == 0 or localsp == 0: 2392 raise gdb.GdbError("Unable to obtain or create locals or globals.") 2393 2394 code = """ 2395 PyRun_String( 2396 (char *) %(code)d, 2397 (int) %(start)d, 2398 (PyObject *) %(globals)s, 2399 (PyObject *) %(locals)d) 2400 """ % dict(code=pointer, start=input_type, 2401 globals=globalsp, locals=localsp) 2402 2403 with FetchAndRestoreError(): 2404 try: 2405 pyobject_return_value = gdb.parse_and_eval(code) 2406 finally: 2407 self.free(pointer) 2408 2409 return pyobject_return_value 2410 2411 class FetchAndRestoreError(PythonCodeExecutor): 2412 """ 2413 Context manager that fetches the error indicator in the inferior and 2414 restores it on exit. 2415 """ 2416 2417 def __init__(self): 2418 self.sizeof_PyObjectPtr = gdb.lookup_type('PyObject').pointer().sizeof 2419 self.pointer = self.malloc(self.sizeof_PyObjectPtr * 3) 2420 2421 type = self.pointer 2422 value = self.pointer + self.sizeof_PyObjectPtr 2423 traceback = self.pointer + self.sizeof_PyObjectPtr * 2 2424 2425 self.errstate = type, value, traceback 2426 2427 def __enter__(self): 2428 gdb.parse_and_eval("PyErr_Fetch(%d, %d, %d)" % self.errstate) 2429 2430 def __exit__(self, *args): 2431 if gdb.parse_and_eval("(int) PyErr_Occurred()"): 2432 gdb.parse_and_eval("PyErr_Print()") 2433 2434 pyerr_restore = ("PyErr_Restore(" 2435 "(PyObject *) *%d," 2436 "(PyObject *) *%d," 2437 "(PyObject *) *%d)") 2438 2439 try: 2440 gdb.parse_and_eval(pyerr_restore % self.errstate) 2441 finally: 2442 self.free(self.pointer) 2443 2444 2445 class FixGdbCommand(gdb.Command): 2446 2447 def __init__(self, command, actual_command): 2448 super(FixGdbCommand, self).__init__(command, gdb.COMMAND_DATA, 2449 gdb.COMPLETE_NONE) 2450 self.actual_command = actual_command 2451 2452 def fix_gdb(self): 2453 """ 2454 It seems that invoking either 'cy exec' and 'py-exec' work perfectly 2455 fine, but after this gdb's python API is entirely broken. 2456 Maybe some uncleared exception value is still set? 2457 sys.exc_clear() didn't help. A demonstration: 2458 2459 (gdb) cy exec 'hello' 2460 'hello' 2461 (gdb) python gdb.execute('cont') 2462 RuntimeError: Cannot convert value to int. 2463 Error while executing Python code. 2464 (gdb) python gdb.execute('cont') 2465 [15148 refs] 2466 2467 Program exited normally. 2468 """ 2469 warnings.filterwarnings('ignore', r'.*', RuntimeWarning, 2470 re.escape(__name__)) 2471 try: 2472 long(gdb.parse_and_eval("(void *) 0")) == 0 2473 except RuntimeError: 2474 pass 2475 # warnings.resetwarnings() 2476 2477 def invoke(self, args, from_tty): 2478 self.fix_gdb() 2479 try: 2480 gdb.execute('%s %s' % (self.actual_command, args)) 2481 except RuntimeError, e: 2482 raise gdb.GdbError(str(e)) 2483 self.fix_gdb() 2484 2485 2486 def _evalcode_python(executor, code, input_type): 2487 """ 2488 Execute Python code in the most recent stack frame. 2489 """ 2490 global_dict = gdb.parse_and_eval('PyEval_GetGlobals()') 2491 local_dict = gdb.parse_and_eval('PyEval_GetLocals()') 2492 2493 if (pointervalue(global_dict) == 0 or pointervalue(local_dict) == 0): 2494 raise gdb.GdbError("Unable to find the locals or globals of the " 2495 "most recent Python function (relative to the " 2496 "selected frame).") 2497 2498 return executor.evalcode(code, input_type, global_dict, local_dict) 2499 2500 class PyExec(gdb.Command): 2501 2502 def readcode(self, expr): 2503 if expr: 2504 return expr, PythonCodeExecutor.Py_single_input 2505 else: 2506 lines = [] 2507 while True: 2508 try: 2509 line = raw_input('>') 2510 except EOFError: 2511 break 2512 else: 2513 if line.rstrip() == 'end': 2514 break 2515 2516 lines.append(line) 2517 2518 return '\n'.join(lines), PythonCodeExecutor.Py_file_input 2519 2520 def invoke(self, expr, from_tty): 2521 expr, input_type = self.readcode(expr) 2522 executor = PythonCodeExecutor() 2523 executor.xdecref(_evalcode_python(executor, input_type, global_dict, 2524 local_dict)) 2525 2526 2527 gdb.execute('set breakpoint pending on') 2528 2529 if hasattr(gdb, 'GdbError'): 2530 # Wrap py-step and py-next in gdb defines to make them repeatable. 2531 py_step = PyStep('-py-step', PythonInfo()) 2532 py_next = PyNext('-py-next', PythonInfo()) 2533 register_defines() 2534 py_finish = PyFinish('py-finish', PythonInfo()) 2535 py_run = PyRun('py-run', PythonInfo()) 2536 py_cont = PyCont('py-cont', PythonInfo()) 2537 2538 py_exec = FixGdbCommand('py-exec', '-py-exec') 2539 _py_exec = PyExec("-py-exec", gdb.COMMAND_DATA, gdb.COMPLETE_NONE) 2540 else: 2541 warnings.warn("Use gdb 7.2 or higher to use the py-exec command.") 2542