Home | History | Annotate | Download | only in gdb
      1 #!/usr/bin/python
      2 '''
      3 From gdb 7 onwards, gdb's build can be configured --with-python, allowing gdb
      4 to be extended with Python code e.g. for library-specific data visualizations,
      5 such as for the C++ STL types.  Documentation on this API can be seen at:
      6 http://sourceware.org/gdb/current/onlinedocs/gdb/Python-API.html
      7 
      8 
      9 This python module deals with the case when the process being debugged (the
     10 "inferior process" in gdb parlance) is itself python, or more specifically,
     11 linked against libpython.  In this situation, almost every item of data is a
     12 (PyObject*), and having the debugger merely print their addresses is not very
     13 enlightening.
     14 
     15 This module embeds knowledge about the implementation details of libpython so
     16 that we can emit useful visualizations e.g. a string, a list, a dict, a frame
     17 giving file/line information and the state of local variables
     18 
     19 In particular, given a gdb.Value corresponding to a PyObject* in the inferior
     20 process, we can generate a "proxy value" within the gdb process.  For example,
     21 given a PyObject* in the inferior process that is in fact a PyListObject*
     22 holding three PyObject* that turn out to be PyBytesObject* instances, we can
     23 generate a proxy value within the gdb process that is a list of bytes
     24 instances:
     25   [b"foo", b"bar", b"baz"]
     26 
     27 Doing so can be expensive for complicated graphs of objects, and could take
     28 some time, so we also have a "write_repr" method that writes a representation
     29 of the data to a file-like object.  This allows us to stop the traversal by
     30 having the file-like object raise an exception if it gets too much data.
     31 
     32 With both "proxyval" and "write_repr" we keep track of the set of all addresses
     33 visited so far in the traversal, to avoid infinite recursion due to cycles in
     34 the graph of object references.
     35 
     36 We try to defer gdb.lookup_type() invocations for python types until as late as
     37 possible: for a dynamically linked python binary, when the process starts in
     38 the debugger, the libpython.so hasn't been dynamically loaded yet, so none of
     39 the type names are known to the debugger
     40 
     41 The module also extends gdb with some python-specific commands.
     42 '''
     43 
     44 # NOTE: some gdbs are linked with Python 3, so this file should be dual-syntax
     45 # compatible (2.6+ and 3.0+).  See #19308.
     46 
     47 from __future__ import print_function
     48 import gdb
     49 import os
     50 import locale
     51 import sys
     52 
     53 if sys.version_info[0] >= 3:
     54     unichr = chr
     55     xrange = range
     56     long = int
     57 
     58 # Look up the gdb.Type for some standard types:
     59 # Those need to be refreshed as types (pointer sizes) may change when
     60 # gdb loads different executables
     61 
     62 def _type_char_ptr():
     63     return gdb.lookup_type('char').pointer()  # char*
     64 
     65 
     66 def _type_unsigned_char_ptr():
     67     return gdb.lookup_type('unsigned char').pointer()  # unsigned char*
     68 
     69 
     70 def _type_unsigned_short_ptr():
     71     return gdb.lookup_type('unsigned short').pointer()
     72 
     73 
     74 def _type_unsigned_int_ptr():
     75     return gdb.lookup_type('unsigned int').pointer()
     76 
     77 
     78 def _sizeof_void_p():
     79     return gdb.lookup_type('void').pointer().sizeof
     80 
     81 
     82 # value computed later, see PyUnicodeObjectPtr.proxy()
     83 _is_pep393 = None
     84 
     85 Py_TPFLAGS_HEAPTYPE = (1 << 9)
     86 Py_TPFLAGS_LONG_SUBCLASS     = (1 << 24)
     87 Py_TPFLAGS_LIST_SUBCLASS     = (1 << 25)
     88 Py_TPFLAGS_TUPLE_SUBCLASS    = (1 << 26)
     89 Py_TPFLAGS_BYTES_SUBCLASS    = (1 << 27)
     90 Py_TPFLAGS_UNICODE_SUBCLASS  = (1 << 28)
     91 Py_TPFLAGS_DICT_SUBCLASS     = (1 << 29)
     92 Py_TPFLAGS_BASE_EXC_SUBCLASS = (1 << 30)
     93 Py_TPFLAGS_TYPE_SUBCLASS     = (1 << 31)
     94 
     95 
     96 MAX_OUTPUT_LEN=1024
     97 
     98 hexdigits = "0123456789abcdef"
     99 
    100 ENCODING = locale.getpreferredencoding()
    101 
    102 class NullPyObjectPtr(RuntimeError):
    103     pass
    104 
    105 
    106 def safety_limit(val):
    107     # Given an integer value from the process being debugged, limit it to some
    108     # safety threshold so that arbitrary breakage within said process doesn't
    109     # break the gdb process too much (e.g. sizes of iterations, sizes of lists)
    110     return min(val, 1000)
    111 
    112 
    113 def safe_range(val):
    114     # As per range, but don't trust the value too much: cap it to a safety
    115     # threshold in case the data was corrupted
    116     return xrange(safety_limit(int(val)))
    117 
    118 if sys.version_info[0] >= 3:
    119     def write_unicode(file, text):
    120         file.write(text)
    121 else:
    122     def write_unicode(file, text):
    123         # Write a byte or unicode string to file. Unicode strings are encoded to
    124         # ENCODING encoding with 'backslashreplace' error handler to avoid
    125         # UnicodeEncodeError.
    126         if isinstance(text, unicode):
    127             text = text.encode(ENCODING, 'backslashreplace')
    128         file.write(text)
    129 
    130 try:
    131     os_fsencode = os.fsencode
    132 except AttributeError:
    133     def os_fsencode(filename):
    134         if not isinstance(filename, unicode):
    135             return filename
    136         encoding = sys.getfilesystemencoding()
    137         if encoding == 'mbcs':
    138             # mbcs doesn't support surrogateescape
    139             return filename.encode(encoding)
    140         encoded = []
    141         for char in filename:
    142             # surrogateescape error handler
    143             if 0xDC80 <= ord(char) <= 0xDCFF:
    144                 byte = chr(ord(char) - 0xDC00)
    145             else:
    146                 byte = char.encode(encoding)
    147             encoded.append(byte)
    148         return ''.join(encoded)
    149 
    150 class StringTruncated(RuntimeError):
    151     pass
    152 
    153 class TruncatedStringIO(object):
    154     '''Similar to io.StringIO, but can truncate the output by raising a
    155     StringTruncated exception'''
    156     def __init__(self, maxlen=None):
    157         self._val = ''
    158         self.maxlen = maxlen
    159 
    160     def write(self, data):
    161         if self.maxlen:
    162             if len(data) + len(self._val) > self.maxlen:
    163                 # Truncation:
    164                 self._val += data[0:self.maxlen - len(self._val)]
    165                 raise StringTruncated()
    166 
    167         self._val += data
    168 
    169     def getvalue(self):
    170         return self._val
    171 
    172 class PyObjectPtr(object):
    173     """
    174     Class wrapping a gdb.Value that's either a (PyObject*) within the
    175     inferior process, or some subclass pointer e.g. (PyBytesObject*)
    176 
    177     There will be a subclass for every refined PyObject type that we care
    178     about.
    179 
    180     Note that at every stage the underlying pointer could be NULL, point
    181     to corrupt data, etc; this is the debugger, after all.
    182     """
    183     _typename = 'PyObject'
    184 
    185     def __init__(self, gdbval, cast_to=None):
    186         if cast_to:
    187             self._gdbval = gdbval.cast(cast_to)
    188         else:
    189             self._gdbval = gdbval
    190 
    191     def field(self, name):
    192         '''
    193         Get the gdb.Value for the given field within the PyObject, coping with
    194         some python 2 versus python 3 differences.
    195 
    196         Various libpython types are defined using the "PyObject_HEAD" and
    197         "PyObject_VAR_HEAD" macros.
    198 
    199         In Python 2, this these are defined so that "ob_type" and (for a var
    200         object) "ob_size" are fields of the type in question.
    201 
    202         In Python 3, this is defined as an embedded PyVarObject type thus:
    203            PyVarObject ob_base;
    204         so that the "ob_size" field is located insize the "ob_base" field, and
    205         the "ob_type" is most easily accessed by casting back to a (PyObject*).
    206         '''
    207         if self.is_null():
    208             raise NullPyObjectPtr(self)
    209 
    210         if name == 'ob_type':
    211             pyo_ptr = self._gdbval.cast(PyObjectPtr.get_gdb_type())
    212             return pyo_ptr.dereference()[name]
    213 
    214         if name == 'ob_size':
    215             pyo_ptr = self._gdbval.cast(PyVarObjectPtr.get_gdb_type())
    216             return pyo_ptr.dereference()[name]
    217 
    218         # General case: look it up inside the object:
    219         return self._gdbval.dereference()[name]
    220 
    221     def pyop_field(self, name):
    222         '''
    223         Get a PyObjectPtr for the given PyObject* field within this PyObject,
    224         coping with some python 2 versus python 3 differences.
    225         '''
    226         return PyObjectPtr.from_pyobject_ptr(self.field(name))
    227 
    228     def write_field_repr(self, name, out, visited):
    229         '''
    230         Extract the PyObject* field named "name", and write its representation
    231         to file-like object "out"
    232         '''
    233         field_obj = self.pyop_field(name)
    234         field_obj.write_repr(out, visited)
    235 
    236     def get_truncated_repr(self, maxlen):
    237         '''
    238         Get a repr-like string for the data, but truncate it at "maxlen" bytes
    239         (ending the object graph traversal as soon as you do)
    240         '''
    241         out = TruncatedStringIO(maxlen)
    242         try:
    243             self.write_repr(out, set())
    244         except StringTruncated:
    245             # Truncation occurred:
    246             return out.getvalue() + '...(truncated)'
    247 
    248         # No truncation occurred:
    249         return out.getvalue()
    250 
    251     def type(self):
    252         return PyTypeObjectPtr(self.field('ob_type'))
    253 
    254     def is_null(self):
    255         return 0 == long(self._gdbval)
    256 
    257     def is_optimized_out(self):
    258         '''
    259         Is the value of the underlying PyObject* visible to the debugger?
    260 
    261         This can vary with the precise version of the compiler used to build
    262         Python, and the precise version of gdb.
    263 
    264         See e.g. https://bugzilla.redhat.com/show_bug.cgi?id=556975 with
    265         PyEval_EvalFrameEx's "f"
    266         '''
    267         return self._gdbval.is_optimized_out
    268 
    269     def safe_tp_name(self):
    270         try:
    271             return self.type().field('tp_name').string()
    272         except NullPyObjectPtr:
    273             # NULL tp_name?
    274             return 'unknown'
    275         except RuntimeError:
    276             # Can't even read the object at all?
    277             return 'unknown'
    278 
    279     def proxyval(self, visited):
    280         '''
    281         Scrape a value from the inferior process, and try to represent it
    282         within the gdb process, whilst (hopefully) avoiding crashes when
    283         the remote data is corrupt.
    284 
    285         Derived classes will override this.
    286 
    287         For example, a PyIntObject* with ob_ival 42 in the inferior process
    288         should result in an int(42) in this process.
    289 
    290         visited: a set of all gdb.Value pyobject pointers already visited
    291         whilst generating this value (to guard against infinite recursion when
    292         visiting object graphs with loops).  Analogous to Py_ReprEnter and
    293         Py_ReprLeave
    294         '''
    295 
    296         class FakeRepr(object):
    297             """
    298             Class representing a non-descript PyObject* value in the inferior
    299             process for when we don't have a custom scraper, intended to have
    300             a sane repr().
    301             """
    302 
    303             def __init__(self, tp_name, address):
    304                 self.tp_name = tp_name
    305                 self.address = address
    306 
    307             def __repr__(self):
    308                 # For the NULL pointer, we have no way of knowing a type, so
    309                 # special-case it as per
    310                 # http://bugs.python.org/issue8032#msg100882
    311                 if self.address == 0:
    312                     return '0x0'
    313                 return '<%s at remote 0x%x>' % (self.tp_name, self.address)
    314 
    315         return FakeRepr(self.safe_tp_name(),
    316                         long(self._gdbval))
    317 
    318     def write_repr(self, out, visited):
    319         '''
    320         Write a string representation of the value scraped from the inferior
    321         process to "out", a file-like object.
    322         '''
    323         # Default implementation: generate a proxy value and write its repr
    324         # However, this could involve a lot of work for complicated objects,
    325         # so for derived classes we specialize this
    326         return out.write(repr(self.proxyval(visited)))
    327 
    328     @classmethod
    329     def subclass_from_type(cls, t):
    330         '''
    331         Given a PyTypeObjectPtr instance wrapping a gdb.Value that's a
    332         (PyTypeObject*), determine the corresponding subclass of PyObjectPtr
    333         to use
    334 
    335         Ideally, we would look up the symbols for the global types, but that
    336         isn't working yet:
    337           (gdb) python print gdb.lookup_symbol('PyList_Type')[0].value
    338           Traceback (most recent call last):
    339             File "<string>", line 1, in <module>
    340           NotImplementedError: Symbol type not yet supported in Python scripts.
    341           Error while executing Python code.
    342 
    343         For now, we use tp_flags, after doing some string comparisons on the
    344         tp_name for some special-cases that don't seem to be visible through
    345         flags
    346         '''
    347         try:
    348             tp_name = t.field('tp_name').string()
    349             tp_flags = int(t.field('tp_flags'))
    350         except RuntimeError:
    351             # Handle any kind of error e.g. NULL ptrs by simply using the base
    352             # class
    353             return cls
    354 
    355         #print('tp_flags = 0x%08x' % tp_flags)
    356         #print('tp_name = %r' % tp_name)
    357 
    358         name_map = {'bool': PyBoolObjectPtr,
    359                     'classobj': PyClassObjectPtr,
    360                     'NoneType': PyNoneStructPtr,
    361                     'frame': PyFrameObjectPtr,
    362                     'set' : PySetObjectPtr,
    363                     'frozenset' : PySetObjectPtr,
    364                     'builtin_function_or_method' : PyCFunctionObjectPtr,
    365                     }
    366         if tp_name in name_map:
    367             return name_map[tp_name]
    368 
    369         if tp_flags & Py_TPFLAGS_HEAPTYPE:
    370             return HeapTypeObjectPtr
    371 
    372         if tp_flags & Py_TPFLAGS_LONG_SUBCLASS:
    373             return PyLongObjectPtr
    374         if tp_flags & Py_TPFLAGS_LIST_SUBCLASS:
    375             return PyListObjectPtr
    376         if tp_flags & Py_TPFLAGS_TUPLE_SUBCLASS:
    377             return PyTupleObjectPtr
    378         if tp_flags & Py_TPFLAGS_BYTES_SUBCLASS:
    379             return PyBytesObjectPtr
    380         if tp_flags & Py_TPFLAGS_UNICODE_SUBCLASS:
    381             return PyUnicodeObjectPtr
    382         if tp_flags & Py_TPFLAGS_DICT_SUBCLASS:
    383             return PyDictObjectPtr
    384         if tp_flags & Py_TPFLAGS_BASE_EXC_SUBCLASS:
    385             return PyBaseExceptionObjectPtr
    386         #if tp_flags & Py_TPFLAGS_TYPE_SUBCLASS:
    387         #    return PyTypeObjectPtr
    388 
    389         # Use the base class:
    390         return cls
    391 
    392     @classmethod
    393     def from_pyobject_ptr(cls, gdbval):
    394         '''
    395         Try to locate the appropriate derived class dynamically, and cast
    396         the pointer accordingly.
    397         '''
    398         try:
    399             p = PyObjectPtr(gdbval)
    400             cls = cls.subclass_from_type(p.type())
    401             return cls(gdbval, cast_to=cls.get_gdb_type())
    402         except RuntimeError:
    403             # Handle any kind of error e.g. NULL ptrs by simply using the base
    404             # class
    405             pass
    406         return cls(gdbval)
    407 
    408     @classmethod
    409     def get_gdb_type(cls):
    410         return gdb.lookup_type(cls._typename).pointer()
    411 
    412     def as_address(self):
    413         return long(self._gdbval)
    414 
    415 class PyVarObjectPtr(PyObjectPtr):
    416     _typename = 'PyVarObject'
    417 
    418 class ProxyAlreadyVisited(object):
    419     '''
    420     Placeholder proxy to use when protecting against infinite recursion due to
    421     loops in the object graph.
    422 
    423     Analogous to the values emitted by the users of Py_ReprEnter and Py_ReprLeave
    424     '''
    425     def __init__(self, rep):
    426         self._rep = rep
    427 
    428     def __repr__(self):
    429         return self._rep
    430 
    431 
    432 def _write_instance_repr(out, visited, name, pyop_attrdict, address):
    433     '''Shared code for use by all classes:
    434     write a representation to file-like object "out"'''
    435     out.write('<')
    436     out.write(name)
    437 
    438     # Write dictionary of instance attributes:
    439     if isinstance(pyop_attrdict, PyDictObjectPtr):
    440         out.write('(')
    441         first = True
    442         for pyop_arg, pyop_val in pyop_attrdict.iteritems():
    443             if not first:
    444                 out.write(', ')
    445             first = False
    446             out.write(pyop_arg.proxyval(visited))
    447             out.write('=')
    448             pyop_val.write_repr(out, visited)
    449         out.write(')')
    450     out.write(' at remote 0x%x>' % address)
    451 
    452 
    453 class InstanceProxy(object):
    454 
    455     def __init__(self, cl_name, attrdict, address):
    456         self.cl_name = cl_name
    457         self.attrdict = attrdict
    458         self.address = address
    459 
    460     def __repr__(self):
    461         if isinstance(self.attrdict, dict):
    462             kwargs = ', '.join(["%s=%r" % (arg, val)
    463                                 for arg, val in self.attrdict.iteritems()])
    464             return '<%s(%s) at remote 0x%x>' % (self.cl_name,
    465                                                 kwargs, self.address)
    466         else:
    467             return '<%s at remote 0x%x>' % (self.cl_name,
    468                                             self.address)
    469 
    470 def _PyObject_VAR_SIZE(typeobj, nitems):
    471     if _PyObject_VAR_SIZE._type_size_t is None:
    472         _PyObject_VAR_SIZE._type_size_t = gdb.lookup_type('size_t')
    473 
    474     return ( ( typeobj.field('tp_basicsize') +
    475                nitems * typeobj.field('tp_itemsize') +
    476                (_sizeof_void_p() - 1)
    477              ) & ~(_sizeof_void_p() - 1)
    478            ).cast(_PyObject_VAR_SIZE._type_size_t)
    479 _PyObject_VAR_SIZE._type_size_t = None
    480 
    481 class HeapTypeObjectPtr(PyObjectPtr):
    482     _typename = 'PyObject'
    483 
    484     def get_attr_dict(self):
    485         '''
    486         Get the PyDictObject ptr representing the attribute dictionary
    487         (or None if there's a problem)
    488         '''
    489         try:
    490             typeobj = self.type()
    491             dictoffset = int_from_int(typeobj.field('tp_dictoffset'))
    492             if dictoffset != 0:
    493                 if dictoffset < 0:
    494                     type_PyVarObject_ptr = gdb.lookup_type('PyVarObject').pointer()
    495                     tsize = int_from_int(self._gdbval.cast(type_PyVarObject_ptr)['ob_size'])
    496                     if tsize < 0:
    497                         tsize = -tsize
    498                     size = _PyObject_VAR_SIZE(typeobj, tsize)
    499                     dictoffset += size
    500                     assert dictoffset > 0
    501                     assert dictoffset % _sizeof_void_p() == 0
    502 
    503                 dictptr = self._gdbval.cast(_type_char_ptr()) + dictoffset
    504                 PyObjectPtrPtr = PyObjectPtr.get_gdb_type().pointer()
    505                 dictptr = dictptr.cast(PyObjectPtrPtr)
    506                 return PyObjectPtr.from_pyobject_ptr(dictptr.dereference())
    507         except RuntimeError:
    508             # Corrupt data somewhere; fail safe
    509             pass
    510 
    511         # Not found, or some kind of error:
    512         return None
    513 
    514     def proxyval(self, visited):
    515         '''
    516         Support for classes.
    517 
    518         Currently we just locate the dictionary using a transliteration to
    519         python of _PyObject_GetDictPtr, ignoring descriptors
    520         '''
    521         # Guard against infinite loops:
    522         if self.as_address() in visited:
    523             return ProxyAlreadyVisited('<...>')
    524         visited.add(self.as_address())
    525 
    526         pyop_attr_dict = self.get_attr_dict()
    527         if pyop_attr_dict:
    528             attr_dict = pyop_attr_dict.proxyval(visited)
    529         else:
    530             attr_dict = {}
    531         tp_name = self.safe_tp_name()
    532 
    533         # Class:
    534         return InstanceProxy(tp_name, attr_dict, long(self._gdbval))
    535 
    536     def write_repr(self, out, visited):
    537         # Guard against infinite loops:
    538         if self.as_address() in visited:
    539             out.write('<...>')
    540             return
    541         visited.add(self.as_address())
    542 
    543         pyop_attrdict = self.get_attr_dict()
    544         _write_instance_repr(out, visited,
    545                              self.safe_tp_name(), pyop_attrdict, self.as_address())
    546 
    547 class ProxyException(Exception):
    548     def __init__(self, tp_name, args):
    549         self.tp_name = tp_name
    550         self.args = args
    551 
    552     def __repr__(self):
    553         return '%s%r' % (self.tp_name, self.args)
    554 
    555 class PyBaseExceptionObjectPtr(PyObjectPtr):
    556     """
    557     Class wrapping a gdb.Value that's a PyBaseExceptionObject* i.e. an exception
    558     within the process being debugged.
    559     """
    560     _typename = 'PyBaseExceptionObject'
    561 
    562     def proxyval(self, visited):
    563         # Guard against infinite loops:
    564         if self.as_address() in visited:
    565             return ProxyAlreadyVisited('(...)')
    566         visited.add(self.as_address())
    567         arg_proxy = self.pyop_field('args').proxyval(visited)
    568         return ProxyException(self.safe_tp_name(),
    569                               arg_proxy)
    570 
    571     def write_repr(self, out, visited):
    572         # Guard against infinite loops:
    573         if self.as_address() in visited:
    574             out.write('(...)')
    575             return
    576         visited.add(self.as_address())
    577 
    578         out.write(self.safe_tp_name())
    579         self.write_field_repr('args', out, visited)
    580 
    581 class PyClassObjectPtr(PyObjectPtr):
    582     """
    583     Class wrapping a gdb.Value that's a PyClassObject* i.e. a <classobj>
    584     instance within the process being debugged.
    585     """
    586     _typename = 'PyClassObject'
    587 
    588 
    589 class BuiltInFunctionProxy(object):
    590     def __init__(self, ml_name):
    591         self.ml_name = ml_name
    592 
    593     def __repr__(self):
    594         return "<built-in function %s>" % self.ml_name
    595 
    596 class BuiltInMethodProxy(object):
    597     def __init__(self, ml_name, pyop_m_self):
    598         self.ml_name = ml_name
    599         self.pyop_m_self = pyop_m_self
    600 
    601     def __repr__(self):
    602         return ('<built-in method %s of %s object at remote 0x%x>'
    603                 % (self.ml_name,
    604                    self.pyop_m_self.safe_tp_name(),
    605                    self.pyop_m_self.as_address())
    606                 )
    607 
    608 class PyCFunctionObjectPtr(PyObjectPtr):
    609     """
    610     Class wrapping a gdb.Value that's a PyCFunctionObject*
    611     (see Include/methodobject.h and Objects/methodobject.c)
    612     """
    613     _typename = 'PyCFunctionObject'
    614 
    615     def proxyval(self, visited):
    616         m_ml = self.field('m_ml') # m_ml is a (PyMethodDef*)
    617         ml_name = m_ml['ml_name'].string()
    618 
    619         pyop_m_self = self.pyop_field('m_self')
    620         if pyop_m_self.is_null():
    621             return BuiltInFunctionProxy(ml_name)
    622         else:
    623             return BuiltInMethodProxy(ml_name, pyop_m_self)
    624 
    625 
    626 class PyCodeObjectPtr(PyObjectPtr):
    627     """
    628     Class wrapping a gdb.Value that's a PyCodeObject* i.e. a <code> instance
    629     within the process being debugged.
    630     """
    631     _typename = 'PyCodeObject'
    632 
    633     def addr2line(self, addrq):
    634         '''
    635         Get the line number for a given bytecode offset
    636 
    637         Analogous to PyCode_Addr2Line; translated from pseudocode in
    638         Objects/lnotab_notes.txt
    639         '''
    640         co_lnotab = self.pyop_field('co_lnotab').proxyval(set())
    641 
    642         # Initialize lineno to co_firstlineno as per PyCode_Addr2Line
    643         # not 0, as lnotab_notes.txt has it:
    644         lineno = int_from_int(self.field('co_firstlineno'))
    645 
    646         addr = 0
    647         for addr_incr, line_incr in zip(co_lnotab[::2], co_lnotab[1::2]):
    648             addr += ord(addr_incr)
    649             if addr > addrq:
    650                 return lineno
    651             lineno += ord(line_incr)
    652         return lineno
    653 
    654 
    655 class PyDictObjectPtr(PyObjectPtr):
    656     """
    657     Class wrapping a gdb.Value that's a PyDictObject* i.e. a dict instance
    658     within the process being debugged.
    659     """
    660     _typename = 'PyDictObject'
    661 
    662     def iteritems(self):
    663         '''
    664         Yields a sequence of (PyObjectPtr key, PyObjectPtr value) pairs,
    665         analogous to dict.iteritems()
    666         '''
    667         keys = self.field('ma_keys')
    668         values = self.field('ma_values')
    669         entries, nentries = self._get_entries(keys)
    670         for i in safe_range(nentries):
    671             ep = entries[i]
    672             if long(values):
    673                 pyop_value = PyObjectPtr.from_pyobject_ptr(values[i])
    674             else:
    675                 pyop_value = PyObjectPtr.from_pyobject_ptr(ep['me_value'])
    676             if not pyop_value.is_null():
    677                 pyop_key = PyObjectPtr.from_pyobject_ptr(ep['me_key'])
    678                 yield (pyop_key, pyop_value)
    679 
    680     def proxyval(self, visited):
    681         # Guard against infinite loops:
    682         if self.as_address() in visited:
    683             return ProxyAlreadyVisited('{...}')
    684         visited.add(self.as_address())
    685 
    686         result = {}
    687         for pyop_key, pyop_value in self.iteritems():
    688             proxy_key = pyop_key.proxyval(visited)
    689             proxy_value = pyop_value.proxyval(visited)
    690             result[proxy_key] = proxy_value
    691         return result
    692 
    693     def write_repr(self, out, visited):
    694         # Guard against infinite loops:
    695         if self.as_address() in visited:
    696             out.write('{...}')
    697             return
    698         visited.add(self.as_address())
    699 
    700         out.write('{')
    701         first = True
    702         for pyop_key, pyop_value in self.iteritems():
    703             if not first:
    704                 out.write(', ')
    705             first = False
    706             pyop_key.write_repr(out, visited)
    707             out.write(': ')
    708             pyop_value.write_repr(out, visited)
    709         out.write('}')
    710 
    711     def _get_entries(self, keys):
    712         dk_nentries = int(keys['dk_nentries'])
    713         dk_size = int(keys['dk_size'])
    714         try:
    715             # <= Python 3.5
    716             return keys['dk_entries'], dk_size
    717         except gdb.error:
    718             # >= Python 3.6
    719             pass
    720 
    721         if dk_size <= 0xFF:
    722             offset = dk_size
    723         elif dk_size <= 0xFFFF:
    724             offset = 2 * dk_size
    725         elif dk_size <= 0xFFFFFFFF:
    726             offset = 4 * dk_size
    727         else:
    728             offset = 8 * dk_size
    729 
    730         ent_addr = keys['dk_indices']['as_1'].address
    731         ent_addr = ent_addr.cast(_type_unsigned_char_ptr()) + offset
    732         ent_ptr_t = gdb.lookup_type('PyDictKeyEntry').pointer()
    733         ent_addr = ent_addr.cast(ent_ptr_t)
    734 
    735         return ent_addr, dk_nentries
    736 
    737 
    738 class PyListObjectPtr(PyObjectPtr):
    739     _typename = 'PyListObject'
    740 
    741     def __getitem__(self, i):
    742         # Get the gdb.Value for the (PyObject*) with the given index:
    743         field_ob_item = self.field('ob_item')
    744         return field_ob_item[i]
    745 
    746     def proxyval(self, visited):
    747         # Guard against infinite loops:
    748         if self.as_address() in visited:
    749             return ProxyAlreadyVisited('[...]')
    750         visited.add(self.as_address())
    751 
    752         result = [PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited)
    753                   for i in safe_range(int_from_int(self.field('ob_size')))]
    754         return result
    755 
    756     def write_repr(self, out, visited):
    757         # Guard against infinite loops:
    758         if self.as_address() in visited:
    759             out.write('[...]')
    760             return
    761         visited.add(self.as_address())
    762 
    763         out.write('[')
    764         for i in safe_range(int_from_int(self.field('ob_size'))):
    765             if i > 0:
    766                 out.write(', ')
    767             element = PyObjectPtr.from_pyobject_ptr(self[i])
    768             element.write_repr(out, visited)
    769         out.write(']')
    770 
    771 class PyLongObjectPtr(PyObjectPtr):
    772     _typename = 'PyLongObject'
    773 
    774     def proxyval(self, visited):
    775         '''
    776         Python's Include/longobjrep.h has this declaration:
    777            struct _longobject {
    778                PyObject_VAR_HEAD
    779                digit ob_digit[1];
    780            };
    781 
    782         with this description:
    783             The absolute value of a number is equal to
    784                  SUM(for i=0 through abs(ob_size)-1) ob_digit[i] * 2**(SHIFT*i)
    785             Negative numbers are represented with ob_size < 0;
    786             zero is represented by ob_size == 0.
    787 
    788         where SHIFT can be either:
    789             #define PyLong_SHIFT        30
    790             #define PyLong_SHIFT        15
    791         '''
    792         ob_size = long(self.field('ob_size'))
    793         if ob_size == 0:
    794             return 0
    795 
    796         ob_digit = self.field('ob_digit')
    797 
    798         if gdb.lookup_type('digit').sizeof == 2:
    799             SHIFT = 15
    800         else:
    801             SHIFT = 30
    802 
    803         digits = [long(ob_digit[i]) * 2**(SHIFT*i)
    804                   for i in safe_range(abs(ob_size))]
    805         result = sum(digits)
    806         if ob_size < 0:
    807             result = -result
    808         return result
    809 
    810     def write_repr(self, out, visited):
    811         # Write this out as a Python 3 int literal, i.e. without the "L" suffix
    812         proxy = self.proxyval(visited)
    813         out.write("%s" % proxy)
    814 
    815 
    816 class PyBoolObjectPtr(PyLongObjectPtr):
    817     """
    818     Class wrapping a gdb.Value that's a PyBoolObject* i.e. one of the two
    819     <bool> instances (Py_True/Py_False) within the process being debugged.
    820     """
    821     def proxyval(self, visited):
    822         if PyLongObjectPtr.proxyval(self, visited):
    823             return True
    824         else:
    825             return False
    826 
    827 class PyNoneStructPtr(PyObjectPtr):
    828     """
    829     Class wrapping a gdb.Value that's a PyObject* pointing to the
    830     singleton (we hope) _Py_NoneStruct with ob_type PyNone_Type
    831     """
    832     _typename = 'PyObject'
    833 
    834     def proxyval(self, visited):
    835         return None
    836 
    837 
    838 class PyFrameObjectPtr(PyObjectPtr):
    839     _typename = 'PyFrameObject'
    840 
    841     def __init__(self, gdbval, cast_to=None):
    842         PyObjectPtr.__init__(self, gdbval, cast_to)
    843 
    844         if not self.is_optimized_out():
    845             self.co = PyCodeObjectPtr.from_pyobject_ptr(self.field('f_code'))
    846             self.co_name = self.co.pyop_field('co_name')
    847             self.co_filename = self.co.pyop_field('co_filename')
    848 
    849             self.f_lineno = int_from_int(self.field('f_lineno'))
    850             self.f_lasti = int_from_int(self.field('f_lasti'))
    851             self.co_nlocals = int_from_int(self.co.field('co_nlocals'))
    852             self.co_varnames = PyTupleObjectPtr.from_pyobject_ptr(self.co.field('co_varnames'))
    853 
    854     def iter_locals(self):
    855         '''
    856         Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
    857         the local variables of this frame
    858         '''
    859         if self.is_optimized_out():
    860             return
    861 
    862         f_localsplus = self.field('f_localsplus')
    863         for i in safe_range(self.co_nlocals):
    864             pyop_value = PyObjectPtr.from_pyobject_ptr(f_localsplus[i])
    865             if not pyop_value.is_null():
    866                 pyop_name = PyObjectPtr.from_pyobject_ptr(self.co_varnames[i])
    867                 yield (pyop_name, pyop_value)
    868 
    869     def iter_globals(self):
    870         '''
    871         Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
    872         the global variables of this frame
    873         '''
    874         if self.is_optimized_out():
    875             return ()
    876 
    877         pyop_globals = self.pyop_field('f_globals')
    878         return pyop_globals.iteritems()
    879 
    880     def iter_builtins(self):
    881         '''
    882         Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
    883         the builtin variables
    884         '''
    885         if self.is_optimized_out():
    886             return ()
    887 
    888         pyop_builtins = self.pyop_field('f_builtins')
    889         return pyop_builtins.iteritems()
    890 
    891     def get_var_by_name(self, name):
    892         '''
    893         Look for the named local variable, returning a (PyObjectPtr, scope) pair
    894         where scope is a string 'local', 'global', 'builtin'
    895 
    896         If not found, return (None, None)
    897         '''
    898         for pyop_name, pyop_value in self.iter_locals():
    899             if name == pyop_name.proxyval(set()):
    900                 return pyop_value, 'local'
    901         for pyop_name, pyop_value in self.iter_globals():
    902             if name == pyop_name.proxyval(set()):
    903                 return pyop_value, 'global'
    904         for pyop_name, pyop_value in self.iter_builtins():
    905             if name == pyop_name.proxyval(set()):
    906                 return pyop_value, 'builtin'
    907         return None, None
    908 
    909     def filename(self):
    910         '''Get the path of the current Python source file, as a string'''
    911         if self.is_optimized_out():
    912             return '(frame information optimized out)'
    913         return self.co_filename.proxyval(set())
    914 
    915     def current_line_num(self):
    916         '''Get current line number as an integer (1-based)
    917 
    918         Translated from PyFrame_GetLineNumber and PyCode_Addr2Line
    919 
    920         See Objects/lnotab_notes.txt
    921         '''
    922         if self.is_optimized_out():
    923             return None
    924         f_trace = self.field('f_trace')
    925         if long(f_trace) != 0:
    926             # we have a non-NULL f_trace:
    927             return self.f_lineno
    928         else:
    929             #try:
    930             return self.co.addr2line(self.f_lasti)
    931             #except ValueError:
    932             #    return self.f_lineno
    933 
    934     def current_line(self):
    935         '''Get the text of the current source line as a string, with a trailing
    936         newline character'''
    937         if self.is_optimized_out():
    938             return '(frame information optimized out)'
    939         filename = self.filename()
    940         try:
    941             f = open(os_fsencode(filename), 'r')
    942         except IOError:
    943             return None
    944         with f:
    945             all_lines = f.readlines()
    946             # Convert from 1-based current_line_num to 0-based list offset:
    947             return all_lines[self.current_line_num()-1]
    948 
    949     def write_repr(self, out, visited):
    950         if self.is_optimized_out():
    951             out.write('(frame information optimized out)')
    952             return
    953         out.write('Frame 0x%x, for file %s, line %i, in %s ('
    954                   % (self.as_address(),
    955                      self.co_filename.proxyval(visited),
    956                      self.current_line_num(),
    957                      self.co_name.proxyval(visited)))
    958         first = True
    959         for pyop_name, pyop_value in self.iter_locals():
    960             if not first:
    961                 out.write(', ')
    962             first = False
    963 
    964             out.write(pyop_name.proxyval(visited))
    965             out.write('=')
    966             pyop_value.write_repr(out, visited)
    967 
    968         out.write(')')
    969 
    970     def print_traceback(self):
    971         if self.is_optimized_out():
    972             sys.stdout.write('  (frame information optimized out)\n')
    973             return
    974         visited = set()
    975         sys.stdout.write('  File "%s", line %i, in %s\n'
    976                   % (self.co_filename.proxyval(visited),
    977                      self.current_line_num(),
    978                      self.co_name.proxyval(visited)))
    979 
    980 class PySetObjectPtr(PyObjectPtr):
    981     _typename = 'PySetObject'
    982 
    983     @classmethod
    984     def _dummy_key(self):
    985         return gdb.lookup_global_symbol('_PySet_Dummy').value()
    986 
    987     def __iter__(self):
    988         dummy_ptr = self._dummy_key()
    989         table = self.field('table')
    990         for i in safe_range(self.field('mask') + 1):
    991             setentry = table[i]
    992             key = setentry['key']
    993             if key != 0 and key != dummy_ptr:
    994                 yield PyObjectPtr.from_pyobject_ptr(key)
    995 
    996     def proxyval(self, visited):
    997         # Guard against infinite loops:
    998         if self.as_address() in visited:
    999             return ProxyAlreadyVisited('%s(...)' % self.safe_tp_name())
   1000         visited.add(self.as_address())
   1001 
   1002         members = (key.proxyval(visited) for key in self)
   1003         if self.safe_tp_name() == 'frozenset':
   1004             return frozenset(members)
   1005         else:
   1006             return set(members)
   1007 
   1008     def write_repr(self, out, visited):
   1009         # Emulate Python 3's set_repr
   1010         tp_name = self.safe_tp_name()
   1011 
   1012         # Guard against infinite loops:
   1013         if self.as_address() in visited:
   1014             out.write('(...)')
   1015             return
   1016         visited.add(self.as_address())
   1017 
   1018         # Python 3's set_repr special-cases the empty set:
   1019         if not self.field('used'):
   1020             out.write(tp_name)
   1021             out.write('()')
   1022             return
   1023 
   1024         # Python 3 uses {} for set literals:
   1025         if tp_name != 'set':
   1026             out.write(tp_name)
   1027             out.write('(')
   1028 
   1029         out.write('{')
   1030         first = True
   1031         for key in self:
   1032             if not first:
   1033                 out.write(', ')
   1034             first = False
   1035             key.write_repr(out, visited)
   1036         out.write('}')
   1037 
   1038         if tp_name != 'set':
   1039             out.write(')')
   1040 
   1041 
   1042 class PyBytesObjectPtr(PyObjectPtr):
   1043     _typename = 'PyBytesObject'
   1044 
   1045     def __str__(self):
   1046         field_ob_size = self.field('ob_size')
   1047         field_ob_sval = self.field('ob_sval')
   1048         char_ptr = field_ob_sval.address.cast(_type_unsigned_char_ptr())
   1049         return ''.join([chr(char_ptr[i]) for i in safe_range(field_ob_size)])
   1050 
   1051     def proxyval(self, visited):
   1052         return str(self)
   1053 
   1054     def write_repr(self, out, visited):
   1055         # Write this out as a Python 3 bytes literal, i.e. with a "b" prefix
   1056 
   1057         # Get a PyStringObject* within the Python 2 gdb process:
   1058         proxy = self.proxyval(visited)
   1059 
   1060         # Transliteration of Python 3's Objects/bytesobject.c:PyBytes_Repr
   1061         # to Python 2 code:
   1062         quote = "'"
   1063         if "'" in proxy and not '"' in proxy:
   1064             quote = '"'
   1065         out.write('b')
   1066         out.write(quote)
   1067         for byte in proxy:
   1068             if byte == quote or byte == '\\':
   1069                 out.write('\\')
   1070                 out.write(byte)
   1071             elif byte == '\t':
   1072                 out.write('\\t')
   1073             elif byte == '\n':
   1074                 out.write('\\n')
   1075             elif byte == '\r':
   1076                 out.write('\\r')
   1077             elif byte < ' ' or ord(byte) >= 0x7f:
   1078                 out.write('\\x')
   1079                 out.write(hexdigits[(ord(byte) & 0xf0) >> 4])
   1080                 out.write(hexdigits[ord(byte) & 0xf])
   1081             else:
   1082                 out.write(byte)
   1083         out.write(quote)
   1084 
   1085 class PyTupleObjectPtr(PyObjectPtr):
   1086     _typename = 'PyTupleObject'
   1087 
   1088     def __getitem__(self, i):
   1089         # Get the gdb.Value for the (PyObject*) with the given index:
   1090         field_ob_item = self.field('ob_item')
   1091         return field_ob_item[i]
   1092 
   1093     def proxyval(self, visited):
   1094         # Guard against infinite loops:
   1095         if self.as_address() in visited:
   1096             return ProxyAlreadyVisited('(...)')
   1097         visited.add(self.as_address())
   1098 
   1099         result = tuple([PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited)
   1100                         for i in safe_range(int_from_int(self.field('ob_size')))])
   1101         return result
   1102 
   1103     def write_repr(self, out, visited):
   1104         # Guard against infinite loops:
   1105         if self.as_address() in visited:
   1106             out.write('(...)')
   1107             return
   1108         visited.add(self.as_address())
   1109 
   1110         out.write('(')
   1111         for i in safe_range(int_from_int(self.field('ob_size'))):
   1112             if i > 0:
   1113                 out.write(', ')
   1114             element = PyObjectPtr.from_pyobject_ptr(self[i])
   1115             element.write_repr(out, visited)
   1116         if self.field('ob_size') == 1:
   1117             out.write(',)')
   1118         else:
   1119             out.write(')')
   1120 
   1121 class PyTypeObjectPtr(PyObjectPtr):
   1122     _typename = 'PyTypeObject'
   1123 
   1124 
   1125 def _unichr_is_printable(char):
   1126     # Logic adapted from Python 3's Tools/unicode/makeunicodedata.py
   1127     if char == u" ":
   1128         return True
   1129     import unicodedata
   1130     return unicodedata.category(char) not in ("C", "Z")
   1131 
   1132 if sys.maxunicode >= 0x10000:
   1133     _unichr = unichr
   1134 else:
   1135     # Needed for proper surrogate support if sizeof(Py_UNICODE) is 2 in gdb
   1136     def _unichr(x):
   1137         if x < 0x10000:
   1138             return unichr(x)
   1139         x -= 0x10000
   1140         ch1 = 0xD800 | (x >> 10)
   1141         ch2 = 0xDC00 | (x & 0x3FF)
   1142         return unichr(ch1) + unichr(ch2)
   1143 
   1144 
   1145 class PyUnicodeObjectPtr(PyObjectPtr):
   1146     _typename = 'PyUnicodeObject'
   1147 
   1148     def char_width(self):
   1149         _type_Py_UNICODE = gdb.lookup_type('Py_UNICODE')
   1150         return _type_Py_UNICODE.sizeof
   1151 
   1152     def proxyval(self, visited):
   1153         global _is_pep393
   1154         if _is_pep393 is None:
   1155             fields = gdb.lookup_type('PyUnicodeObject').target().fields()
   1156             _is_pep393 = 'data' in [f.name for f in fields]
   1157         if _is_pep393:
   1158             # Python 3.3 and newer
   1159             may_have_surrogates = False
   1160             compact = self.field('_base')
   1161             ascii = compact['_base']
   1162             state = ascii['state']
   1163             is_compact_ascii = (int(state['ascii']) and int(state['compact']))
   1164             if not int(state['ready']):
   1165                 # string is not ready
   1166                 field_length = long(compact['wstr_length'])
   1167                 may_have_surrogates = True
   1168                 field_str = ascii['wstr']
   1169             else:
   1170                 field_length = long(ascii['length'])
   1171                 if is_compact_ascii:
   1172                     field_str = ascii.address + 1
   1173                 elif int(state['compact']):
   1174                     field_str = compact.address + 1
   1175                 else:
   1176                     field_str = self.field('data')['any']
   1177                 repr_kind = int(state['kind'])
   1178                 if repr_kind == 1:
   1179                     field_str = field_str.cast(_type_unsigned_char_ptr())
   1180                 elif repr_kind == 2:
   1181                     field_str = field_str.cast(_type_unsigned_short_ptr())
   1182                 elif repr_kind == 4:
   1183                     field_str = field_str.cast(_type_unsigned_int_ptr())
   1184         else:
   1185             # Python 3.2 and earlier
   1186             field_length = long(self.field('length'))
   1187             field_str = self.field('str')
   1188             may_have_surrogates = self.char_width() == 2
   1189 
   1190         # Gather a list of ints from the Py_UNICODE array; these are either
   1191         # UCS-1, UCS-2 or UCS-4 code points:
   1192         if not may_have_surrogates:
   1193             Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)]
   1194         else:
   1195             # A more elaborate routine if sizeof(Py_UNICODE) is 2 in the
   1196             # inferior process: we must join surrogate pairs.
   1197             Py_UNICODEs = []
   1198             i = 0
   1199             limit = safety_limit(field_length)
   1200             while i < limit:
   1201                 ucs = int(field_str[i])
   1202                 i += 1
   1203                 if ucs < 0xD800 or ucs >= 0xDC00 or i == field_length:
   1204                     Py_UNICODEs.append(ucs)
   1205                     continue
   1206                 # This could be a surrogate pair.
   1207                 ucs2 = int(field_str[i])
   1208                 if ucs2 < 0xDC00 or ucs2 > 0xDFFF:
   1209                     continue
   1210                 code = (ucs & 0x03FF) << 10
   1211                 code |= ucs2 & 0x03FF
   1212                 code += 0x00010000
   1213                 Py_UNICODEs.append(code)
   1214                 i += 1
   1215 
   1216         # Convert the int code points to unicode characters, and generate a
   1217         # local unicode instance.
   1218         # This splits surrogate pairs if sizeof(Py_UNICODE) is 2 here (in gdb).
   1219         result = u''.join([
   1220             (_unichr(ucs) if ucs <= 0x10ffff else '\ufffd')
   1221             for ucs in Py_UNICODEs])
   1222         return result
   1223 
   1224     def write_repr(self, out, visited):
   1225         # Write this out as a Python 3 str literal, i.e. without a "u" prefix
   1226 
   1227         # Get a PyUnicodeObject* within the Python 2 gdb process:
   1228         proxy = self.proxyval(visited)
   1229 
   1230         # Transliteration of Python 3's Object/unicodeobject.c:unicode_repr
   1231         # to Python 2:
   1232         if "'" in proxy and '"' not in proxy:
   1233             quote = '"'
   1234         else:
   1235             quote = "'"
   1236         out.write(quote)
   1237 
   1238         i = 0
   1239         while i < len(proxy):
   1240             ch = proxy[i]
   1241             i += 1
   1242 
   1243             # Escape quotes and backslashes
   1244             if ch == quote or ch == '\\':
   1245                 out.write('\\')
   1246                 out.write(ch)
   1247 
   1248             #  Map special whitespace to '\t', \n', '\r'
   1249             elif ch == '\t':
   1250                 out.write('\\t')
   1251             elif ch == '\n':
   1252                 out.write('\\n')
   1253             elif ch == '\r':
   1254                 out.write('\\r')
   1255 
   1256             # Map non-printable US ASCII to '\xhh' */
   1257             elif ch < ' ' or ch == 0x7F:
   1258                 out.write('\\x')
   1259                 out.write(hexdigits[(ord(ch) >> 4) & 0x000F])
   1260                 out.write(hexdigits[ord(ch) & 0x000F])
   1261 
   1262             # Copy ASCII characters as-is
   1263             elif ord(ch) < 0x7F:
   1264                 out.write(ch)
   1265 
   1266             # Non-ASCII characters
   1267             else:
   1268                 ucs = ch
   1269                 ch2 = None
   1270                 if sys.maxunicode < 0x10000:
   1271                     # If sizeof(Py_UNICODE) is 2 here (in gdb), join
   1272                     # surrogate pairs before calling _unichr_is_printable.
   1273                     if (i < len(proxy)
   1274                     and 0xD800 <= ord(ch) < 0xDC00 \
   1275                     and 0xDC00 <= ord(proxy[i]) <= 0xDFFF):
   1276                         ch2 = proxy[i]
   1277                         ucs = ch + ch2
   1278                         i += 1
   1279 
   1280                 # Unfortuately, Python 2's unicode type doesn't seem
   1281                 # to expose the "isprintable" method
   1282                 printable = _unichr_is_printable(ucs)
   1283                 if printable:
   1284                     try:
   1285                         ucs.encode(ENCODING)
   1286                     except UnicodeEncodeError:
   1287                         printable = False
   1288 
   1289                 # Map Unicode whitespace and control characters
   1290                 # (categories Z* and C* except ASCII space)
   1291                 if not printable:
   1292                     if ch2 is not None:
   1293                         # Match Python 3's representation of non-printable
   1294                         # wide characters.
   1295                         code = (ord(ch) & 0x03FF) << 10
   1296                         code |= ord(ch2) & 0x03FF
   1297                         code += 0x00010000
   1298                     else:
   1299                         code = ord(ucs)
   1300 
   1301                     # Map 8-bit characters to '\\xhh'
   1302                     if code <= 0xff:
   1303                         out.write('\\x')
   1304                         out.write(hexdigits[(code >> 4) & 0x000F])
   1305                         out.write(hexdigits[code & 0x000F])
   1306                     # Map 21-bit characters to '\U00xxxxxx'
   1307                     elif code >= 0x10000:
   1308                         out.write('\\U')
   1309                         out.write(hexdigits[(code >> 28) & 0x0000000F])
   1310                         out.write(hexdigits[(code >> 24) & 0x0000000F])
   1311                         out.write(hexdigits[(code >> 20) & 0x0000000F])
   1312                         out.write(hexdigits[(code >> 16) & 0x0000000F])
   1313                         out.write(hexdigits[(code >> 12) & 0x0000000F])
   1314                         out.write(hexdigits[(code >> 8) & 0x0000000F])
   1315                         out.write(hexdigits[(code >> 4) & 0x0000000F])
   1316                         out.write(hexdigits[code & 0x0000000F])
   1317                     # Map 16-bit characters to '\uxxxx'
   1318                     else:
   1319                         out.write('\\u')
   1320                         out.write(hexdigits[(code >> 12) & 0x000F])
   1321                         out.write(hexdigits[(code >> 8) & 0x000F])
   1322                         out.write(hexdigits[(code >> 4) & 0x000F])
   1323                         out.write(hexdigits[code & 0x000F])
   1324                 else:
   1325                     # Copy characters as-is
   1326                     out.write(ch)
   1327                     if ch2 is not None:
   1328                         out.write(ch2)
   1329 
   1330         out.write(quote)
   1331 
   1332 
   1333 
   1334 
   1335 def int_from_int(gdbval):
   1336     return int(str(gdbval))
   1337 
   1338 
   1339 def stringify(val):
   1340     # TODO: repr() puts everything on one line; pformat can be nicer, but
   1341     # can lead to v.long results; this function isolates the choice
   1342     if True:
   1343         return repr(val)
   1344     else:
   1345         from pprint import pformat
   1346         return pformat(val)
   1347 
   1348 
   1349 class PyObjectPtrPrinter:
   1350     "Prints a (PyObject*)"
   1351 
   1352     def __init__ (self, gdbval):
   1353         self.gdbval = gdbval
   1354 
   1355     def to_string (self):
   1356         pyop = PyObjectPtr.from_pyobject_ptr(self.gdbval)
   1357         if True:
   1358             return pyop.get_truncated_repr(MAX_OUTPUT_LEN)
   1359         else:
   1360             # Generate full proxy value then stringify it.
   1361             # Doing so could be expensive
   1362             proxyval = pyop.proxyval(set())
   1363             return stringify(proxyval)
   1364 
   1365 def pretty_printer_lookup(gdbval):
   1366     type = gdbval.type.unqualified()
   1367     if type.code == gdb.TYPE_CODE_PTR:
   1368         type = type.target().unqualified()
   1369         t = str(type)
   1370         if t in ("PyObject", "PyFrameObject", "PyUnicodeObject"):
   1371             return PyObjectPtrPrinter(gdbval)
   1372 
   1373 """
   1374 During development, I've been manually invoking the code in this way:
   1375 (gdb) python
   1376 
   1377 import sys
   1378 sys.path.append('/home/david/coding/python-gdb')
   1379 import libpython
   1380 end
   1381 
   1382 then reloading it after each edit like this:
   1383 (gdb) python reload(libpython)
   1384 
   1385 The following code should ensure that the prettyprinter is registered
   1386 if the code is autoloaded by gdb when visiting libpython.so, provided
   1387 that this python file is installed to the same path as the library (or its
   1388 .debug file) plus a "-gdb.py" suffix, e.g:
   1389   /usr/lib/libpython2.6.so.1.0-gdb.py
   1390   /usr/lib/debug/usr/lib/libpython2.6.so.1.0.debug-gdb.py
   1391 """
   1392 def register (obj):
   1393     if obj is None:
   1394         obj = gdb
   1395 
   1396     # Wire up the pretty-printer
   1397     obj.pretty_printers.append(pretty_printer_lookup)
   1398 
   1399 register (gdb.current_objfile ())
   1400 
   1401 
   1402 
   1403 # Unfortunately, the exact API exposed by the gdb module varies somewhat
   1404 # from build to build
   1405 # See http://bugs.python.org/issue8279?#msg102276
   1406 
   1407 class Frame(object):
   1408     '''
   1409     Wrapper for gdb.Frame, adding various methods
   1410     '''
   1411     def __init__(self, gdbframe):
   1412         self._gdbframe = gdbframe
   1413 
   1414     def older(self):
   1415         older = self._gdbframe.older()
   1416         if older:
   1417             return Frame(older)
   1418         else:
   1419             return None
   1420 
   1421     def newer(self):
   1422         newer = self._gdbframe.newer()
   1423         if newer:
   1424             return Frame(newer)
   1425         else:
   1426             return None
   1427 
   1428     def select(self):
   1429         '''If supported, select this frame and return True; return False if unsupported
   1430 
   1431         Not all builds have a gdb.Frame.select method; seems to be present on Fedora 12
   1432         onwards, but absent on Ubuntu buildbot'''
   1433         if not hasattr(self._gdbframe, 'select'):
   1434             print ('Unable to select frame: '
   1435                    'this build of gdb does not expose a gdb.Frame.select method')
   1436             return False
   1437         self._gdbframe.select()
   1438         return True
   1439 
   1440     def get_index(self):
   1441         '''Calculate index of frame, starting at 0 for the newest frame within
   1442         this thread'''
   1443         index = 0
   1444         # Go down until you reach the newest frame:
   1445         iter_frame = self
   1446         while iter_frame.newer():
   1447             index += 1
   1448             iter_frame = iter_frame.newer()
   1449         return index
   1450 
   1451     # We divide frames into:
   1452     #   - "python frames":
   1453     #       - "bytecode frames" i.e. PyEval_EvalFrameEx
   1454     #       - "other python frames": things that are of interest from a python
   1455     #         POV, but aren't bytecode (e.g. GC, GIL)
   1456     #   - everything else
   1457 
   1458     def is_python_frame(self):
   1459         '''Is this a PyEval_EvalFrameEx frame, or some other important
   1460         frame? (see is_other_python_frame for what "important" means in this
   1461         context)'''
   1462         if self.is_evalframeex():
   1463             return True
   1464         if self.is_other_python_frame():
   1465             return True
   1466         return False
   1467 
   1468     def is_evalframeex(self):
   1469         '''Is this a PyEval_EvalFrameEx frame?'''
   1470         if self._gdbframe.name() == 'PyEval_EvalFrameEx':
   1471             '''
   1472             I believe we also need to filter on the inline
   1473             struct frame_id.inline_depth, only regarding frames with
   1474             an inline depth of 0 as actually being this function
   1475 
   1476             So we reject those with type gdb.INLINE_FRAME
   1477             '''
   1478             if self._gdbframe.type() == gdb.NORMAL_FRAME:
   1479                 # We have a PyEval_EvalFrameEx frame:
   1480                 return True
   1481 
   1482         return False
   1483 
   1484     def is_other_python_frame(self):
   1485         '''Is this frame worth displaying in python backtraces?
   1486         Examples:
   1487           - waiting on the GIL
   1488           - garbage-collecting
   1489           - within a CFunction
   1490          If it is, return a descriptive string
   1491          For other frames, return False
   1492          '''
   1493         if self.is_waiting_for_gil():
   1494             return 'Waiting for the GIL'
   1495 
   1496         if self.is_gc_collect():
   1497             return 'Garbage-collecting'
   1498 
   1499         # Detect invocations of PyCFunction instances:
   1500         older = self.older()
   1501         if not older:
   1502             return False
   1503 
   1504         caller = older._gdbframe.name()
   1505         if not caller:
   1506             return False
   1507 
   1508         if caller == 'PyCFunction_Call':
   1509             # Within that frame:
   1510             #   "func" is the local containing the PyObject* of the
   1511             # PyCFunctionObject instance
   1512             #   "f" is the same value, but cast to (PyCFunctionObject*)
   1513             #   "self" is the (PyObject*) of the 'self'
   1514             try:
   1515                 # Use the prettyprinter for the func:
   1516                 func = older._gdbframe.read_var('func')
   1517                 return str(func)
   1518             except RuntimeError:
   1519                 return 'PyCFunction invocation (unable to read "func")'
   1520 
   1521         elif caller == '_PyCFunction_FastCallDict':
   1522             try:
   1523                 func = older._gdbframe.read_var('func_obj')
   1524                 return str(func)
   1525             except RuntimeError:
   1526                 return 'PyCFunction invocation (unable to read "func_obj")'
   1527 
   1528         # This frame isn't worth reporting:
   1529         return False
   1530 
   1531     def is_waiting_for_gil(self):
   1532         '''Is this frame waiting on the GIL?'''
   1533         # This assumes the _POSIX_THREADS version of Python/ceval_gil.h:
   1534         name = self._gdbframe.name()
   1535         if name:
   1536             return 'pthread_cond_timedwait' in name
   1537 
   1538     def is_gc_collect(self):
   1539         '''Is this frame "collect" within the garbage-collector?'''
   1540         return self._gdbframe.name() == 'collect'
   1541 
   1542     def get_pyop(self):
   1543         try:
   1544             f = self._gdbframe.read_var('f')
   1545             frame = PyFrameObjectPtr.from_pyobject_ptr(f)
   1546             if not frame.is_optimized_out():
   1547                 return frame
   1548             # gdb is unable to get the "f" argument of PyEval_EvalFrameEx()
   1549             # because it was "optimized out". Try to get "f" from the frame
   1550             # of the caller, PyEval_EvalCodeEx().
   1551             orig_frame = frame
   1552             caller = self._gdbframe.older()
   1553             if caller:
   1554                 f = caller.read_var('f')
   1555                 frame = PyFrameObjectPtr.from_pyobject_ptr(f)
   1556                 if not frame.is_optimized_out():
   1557                     return frame
   1558             return orig_frame
   1559         except ValueError:
   1560             return None
   1561 
   1562     @classmethod
   1563     def get_selected_frame(cls):
   1564         _gdbframe = gdb.selected_frame()
   1565         if _gdbframe:
   1566             return Frame(_gdbframe)
   1567         return None
   1568 
   1569     @classmethod
   1570     def get_selected_python_frame(cls):
   1571         '''Try to obtain the Frame for the python-related code in the selected
   1572         frame, or None'''
   1573         try:
   1574             frame = cls.get_selected_frame()
   1575         except gdb.error:
   1576             # No frame: Python didn't start yet
   1577             return None
   1578 
   1579         while frame:
   1580             if frame.is_python_frame():
   1581                 return frame
   1582             frame = frame.older()
   1583 
   1584         # Not found:
   1585         return None
   1586 
   1587     @classmethod
   1588     def get_selected_bytecode_frame(cls):
   1589         '''Try to obtain the Frame for the python bytecode interpreter in the
   1590         selected GDB frame, or None'''
   1591         frame = cls.get_selected_frame()
   1592 
   1593         while frame:
   1594             if frame.is_evalframeex():
   1595                 return frame
   1596             frame = frame.older()
   1597 
   1598         # Not found:
   1599         return None
   1600 
   1601     def print_summary(self):
   1602         if self.is_evalframeex():
   1603             pyop = self.get_pyop()
   1604             if pyop:
   1605                 line = pyop.get_truncated_repr(MAX_OUTPUT_LEN)
   1606                 write_unicode(sys.stdout, '#%i %s\n' % (self.get_index(), line))
   1607                 if not pyop.is_optimized_out():
   1608                     line = pyop.current_line()
   1609                     if line is not None:
   1610                         sys.stdout.write('    %s\n' % line.strip())
   1611             else:
   1612                 sys.stdout.write('#%i (unable to read python frame information)\n' % self.get_index())
   1613         else:
   1614             info = self.is_other_python_frame()
   1615             if info:
   1616                 sys.stdout.write('#%i %s\n' % (self.get_index(), info))
   1617             else:
   1618                 sys.stdout.write('#%i\n' % self.get_index())
   1619 
   1620     def print_traceback(self):
   1621         if self.is_evalframeex():
   1622             pyop = self.get_pyop()
   1623             if pyop:
   1624                 pyop.print_traceback()
   1625                 if not pyop.is_optimized_out():
   1626                     line = pyop.current_line()
   1627                     if line is not None:
   1628                         sys.stdout.write('    %s\n' % line.strip())
   1629             else:
   1630                 sys.stdout.write('  (unable to read python frame information)\n')
   1631         else:
   1632             info = self.is_other_python_frame()
   1633             if info:
   1634                 sys.stdout.write('  %s\n' % info)
   1635             else:
   1636                 sys.stdout.write('  (not a python frame)\n')
   1637 
   1638 class PyList(gdb.Command):
   1639     '''List the current Python source code, if any
   1640 
   1641     Use
   1642        py-list START
   1643     to list at a different line number within the python source.
   1644 
   1645     Use
   1646        py-list START, END
   1647     to list a specific range of lines within the python source.
   1648     '''
   1649 
   1650     def __init__(self):
   1651         gdb.Command.__init__ (self,
   1652                               "py-list",
   1653                               gdb.COMMAND_FILES,
   1654                               gdb.COMPLETE_NONE)
   1655 
   1656 
   1657     def invoke(self, args, from_tty):
   1658         import re
   1659 
   1660         start = None
   1661         end = None
   1662 
   1663         m = re.match(r'\s*(\d+)\s*', args)
   1664         if m:
   1665             start = int(m.group(0))
   1666             end = start + 10
   1667 
   1668         m = re.match(r'\s*(\d+)\s*,\s*(\d+)\s*', args)
   1669         if m:
   1670             start, end = map(int, m.groups())
   1671 
   1672         # py-list requires an actual PyEval_EvalFrameEx frame:
   1673         frame = Frame.get_selected_bytecode_frame()
   1674         if not frame:
   1675             print('Unable to locate gdb frame for python bytecode interpreter')
   1676             return
   1677 
   1678         pyop = frame.get_pyop()
   1679         if not pyop or pyop.is_optimized_out():
   1680             print('Unable to read information on python frame')
   1681             return
   1682 
   1683         filename = pyop.filename()
   1684         lineno = pyop.current_line_num()
   1685 
   1686         if start is None:
   1687             start = lineno - 5
   1688             end = lineno + 5
   1689 
   1690         if start<1:
   1691             start = 1
   1692 
   1693         try:
   1694             f = open(os_fsencode(filename), 'r')
   1695         except IOError as err:
   1696             sys.stdout.write('Unable to open %s: %s\n'
   1697                              % (filename, err))
   1698             return
   1699         with f:
   1700             all_lines = f.readlines()
   1701             # start and end are 1-based, all_lines is 0-based;
   1702             # so [start-1:end] as a python slice gives us [start, end] as a
   1703             # closed interval
   1704             for i, line in enumerate(all_lines[start-1:end]):
   1705                 linestr = str(i+start)
   1706                 # Highlight current line:
   1707                 if i + start == lineno:
   1708                     linestr = '>' + linestr
   1709                 sys.stdout.write('%4s    %s' % (linestr, line))
   1710 
   1711 
   1712 # ...and register the command:
   1713 PyList()
   1714 
   1715 def move_in_stack(move_up):
   1716     '''Move up or down the stack (for the py-up/py-down command)'''
   1717     frame = Frame.get_selected_python_frame()
   1718     if not frame:
   1719         print('Unable to locate python frame')
   1720         return
   1721 
   1722     while frame:
   1723         if move_up:
   1724             iter_frame = frame.older()
   1725         else:
   1726             iter_frame = frame.newer()
   1727 
   1728         if not iter_frame:
   1729             break
   1730 
   1731         if iter_frame.is_python_frame():
   1732             # Result:
   1733             if iter_frame.select():
   1734                 iter_frame.print_summary()
   1735             return
   1736 
   1737         frame = iter_frame
   1738 
   1739     if move_up:
   1740         print('Unable to find an older python frame')
   1741     else:
   1742         print('Unable to find a newer python frame')
   1743 
   1744 class PyUp(gdb.Command):
   1745     'Select and print the python stack frame that called this one (if any)'
   1746     def __init__(self):
   1747         gdb.Command.__init__ (self,
   1748                               "py-up",
   1749                               gdb.COMMAND_STACK,
   1750                               gdb.COMPLETE_NONE)
   1751 
   1752 
   1753     def invoke(self, args, from_tty):
   1754         move_in_stack(move_up=True)
   1755 
   1756 class PyDown(gdb.Command):
   1757     'Select and print the python stack frame called by this one (if any)'
   1758     def __init__(self):
   1759         gdb.Command.__init__ (self,
   1760                               "py-down",
   1761                               gdb.COMMAND_STACK,
   1762                               gdb.COMPLETE_NONE)
   1763 
   1764 
   1765     def invoke(self, args, from_tty):
   1766         move_in_stack(move_up=False)
   1767 
   1768 # Not all builds of gdb have gdb.Frame.select
   1769 if hasattr(gdb.Frame, 'select'):
   1770     PyUp()
   1771     PyDown()
   1772 
   1773 class PyBacktraceFull(gdb.Command):
   1774     'Display the current python frame and all the frames within its call stack (if any)'
   1775     def __init__(self):
   1776         gdb.Command.__init__ (self,
   1777                               "py-bt-full",
   1778                               gdb.COMMAND_STACK,
   1779                               gdb.COMPLETE_NONE)
   1780 
   1781 
   1782     def invoke(self, args, from_tty):
   1783         frame = Frame.get_selected_python_frame()
   1784         if not frame:
   1785             print('Unable to locate python frame')
   1786             return
   1787 
   1788         while frame:
   1789             if frame.is_python_frame():
   1790                 frame.print_summary()
   1791             frame = frame.older()
   1792 
   1793 PyBacktraceFull()
   1794 
   1795 class PyBacktrace(gdb.Command):
   1796     'Display the current python frame and all the frames within its call stack (if any)'
   1797     def __init__(self):
   1798         gdb.Command.__init__ (self,
   1799                               "py-bt",
   1800                               gdb.COMMAND_STACK,
   1801                               gdb.COMPLETE_NONE)
   1802 
   1803 
   1804     def invoke(self, args, from_tty):
   1805         frame = Frame.get_selected_python_frame()
   1806         if not frame:
   1807             print('Unable to locate python frame')
   1808             return
   1809 
   1810         sys.stdout.write('Traceback (most recent call first):\n')
   1811         while frame:
   1812             if frame.is_python_frame():
   1813                 frame.print_traceback()
   1814             frame = frame.older()
   1815 
   1816 PyBacktrace()
   1817 
   1818 class PyPrint(gdb.Command):
   1819     'Look up the given python variable name, and print it'
   1820     def __init__(self):
   1821         gdb.Command.__init__ (self,
   1822                               "py-print",
   1823                               gdb.COMMAND_DATA,
   1824                               gdb.COMPLETE_NONE)
   1825 
   1826 
   1827     def invoke(self, args, from_tty):
   1828         name = str(args)
   1829 
   1830         frame = Frame.get_selected_python_frame()
   1831         if not frame:
   1832             print('Unable to locate python frame')
   1833             return
   1834 
   1835         pyop_frame = frame.get_pyop()
   1836         if not pyop_frame:
   1837             print('Unable to read information on python frame')
   1838             return
   1839 
   1840         pyop_var, scope = pyop_frame.get_var_by_name(name)
   1841 
   1842         if pyop_var:
   1843             print('%s %r = %s'
   1844                    % (scope,
   1845                       name,
   1846                       pyop_var.get_truncated_repr(MAX_OUTPUT_LEN)))
   1847         else:
   1848             print('%r not found' % name)
   1849 
   1850 PyPrint()
   1851 
   1852 class PyLocals(gdb.Command):
   1853     'Look up the given python variable name, and print it'
   1854     def __init__(self):
   1855         gdb.Command.__init__ (self,
   1856                               "py-locals",
   1857                               gdb.COMMAND_DATA,
   1858                               gdb.COMPLETE_NONE)
   1859 
   1860 
   1861     def invoke(self, args, from_tty):
   1862         name = str(args)
   1863 
   1864         frame = Frame.get_selected_python_frame()
   1865         if not frame:
   1866             print('Unable to locate python frame')
   1867             return
   1868 
   1869         pyop_frame = frame.get_pyop()
   1870         if not pyop_frame:
   1871             print('Unable to read information on python frame')
   1872             return
   1873 
   1874         for pyop_name, pyop_value in pyop_frame.iter_locals():
   1875             print('%s = %s'
   1876                    % (pyop_name.proxyval(set()),
   1877                       pyop_value.get_truncated_repr(MAX_OUTPUT_LEN)))
   1878 
   1879 PyLocals()
   1880