Home | History | Annotate | Download | only in Debugger
      1 #!/usr/bin/python
      2 
      3 # NOTE: this file is taken from the Python source distribution
      4 # It can be found under Tools/gdb/libpython.py. It is shipped with Cython
      5 # because it's not installed as a python module, and because changes are only
      6 # merged into new python versions (v3.2+).
      7 
      8 '''
      9 From gdb 7 onwards, gdb's build can be configured --with-python, allowing gdb
     10 to be extended with Python code e.g. for library-specific data visualizations,
     11 such as for the C++ STL types.  Documentation on this API can be seen at:
     12 http://sourceware.org/gdb/current/onlinedocs/gdb/Python-API.html
     13 
     14 
     15 This python module deals with the case when the process being debugged (the
     16 "inferior process" in gdb parlance) is itself python, or more specifically,
     17 linked against libpython.  In this situation, almost every item of data is a
     18 (PyObject*), and having the debugger merely print their addresses is not very
     19 enlightening.
     20 
     21 This module embeds knowledge about the implementation details of libpython so
     22 that we can emit useful visualizations e.g. a string, a list, a dict, a frame
     23 giving file/line information and the state of local variables
     24 
     25 In particular, given a gdb.Value corresponding to a PyObject* in the inferior
     26 process, we can generate a "proxy value" within the gdb process.  For example,
     27 given a PyObject* in the inferior process that is in fact a PyListObject*
     28 holding three PyObject* that turn out to be PyStringObject* instances, we can
     29 generate a proxy value within the gdb process that is a list of strings:
     30   ["foo", "bar", "baz"]
     31 
     32 Doing so can be expensive for complicated graphs of objects, and could take
     33 some time, so we also have a "write_repr" method that writes a representation
     34 of the data to a file-like object.  This allows us to stop the traversal by
     35 having the file-like object raise an exception if it gets too much data.
     36 
     37 With both "proxyval" and "write_repr" we keep track of the set of all addresses
     38 visited so far in the traversal, to avoid infinite recursion due to cycles in
     39 the graph of object references.
     40 
     41 We try to defer gdb.lookup_type() invocations for python types until as late as
     42 possible: for a dynamically linked python binary, when the process starts in
     43 the debugger, the libpython.so hasn't been dynamically loaded yet, so none of
     44 the type names are known to the debugger
     45 
     46 The module also extends gdb with some python-specific commands.
     47 '''
     48 from __future__ import with_statement
     49 
     50 import os
     51 import re
     52 import sys
     53 import struct
     54 import locale
     55 import atexit
     56 import warnings
     57 import tempfile
     58 import textwrap
     59 import itertools
     60 
     61 import gdb
     62 
     63 if sys.version_info[0] < 3:
     64     # I think this is the only way to fix this bug :'(
     65     # http://sourceware.org/bugzilla/show_bug.cgi?id=12285
     66     out, err = sys.stdout, sys.stderr
     67     reload(sys).setdefaultencoding('UTF-8')
     68     sys.stdout = out
     69     sys.stderr = err
     70 
     71 # Look up the gdb.Type for some standard types:
     72 _type_char_ptr = gdb.lookup_type('char').pointer() # char*
     73 _type_unsigned_char_ptr = gdb.lookup_type('unsigned char').pointer()
     74 _type_void_ptr = gdb.lookup_type('void').pointer() # void*
     75 
     76 SIZEOF_VOID_P = _type_void_ptr.sizeof
     77 
     78 Py_TPFLAGS_HEAPTYPE = (1L << 9)
     79 
     80 Py_TPFLAGS_INT_SUBCLASS      = (1L << 23)
     81 Py_TPFLAGS_LONG_SUBCLASS     = (1L << 24)
     82 Py_TPFLAGS_LIST_SUBCLASS     = (1L << 25)
     83 Py_TPFLAGS_TUPLE_SUBCLASS    = (1L << 26)
     84 Py_TPFLAGS_STRING_SUBCLASS   = (1L << 27)
     85 Py_TPFLAGS_BYTES_SUBCLASS    = (1L << 27)
     86 Py_TPFLAGS_UNICODE_SUBCLASS  = (1L << 28)
     87 Py_TPFLAGS_DICT_SUBCLASS     = (1L << 29)
     88 Py_TPFLAGS_BASE_EXC_SUBCLASS = (1L << 30)
     89 Py_TPFLAGS_TYPE_SUBCLASS     = (1L << 31)
     90 
     91 MAX_OUTPUT_LEN = 1024
     92 
     93 hexdigits = "0123456789abcdef"
     94 
     95 ENCODING = locale.getpreferredencoding()
     96 
     97 class NullPyObjectPtr(RuntimeError):
     98     pass
     99 
    100 
    101 def safety_limit(val):
    102     # Given a integer value from the process being debugged, limit it to some
    103     # safety threshold so that arbitrary breakage within said process doesn't
    104     # break the gdb process too much (e.g. sizes of iterations, sizes of lists)
    105     return min(val, 1000)
    106 
    107 
    108 def safe_range(val):
    109     # As per range, but don't trust the value too much: cap it to a safety
    110     # threshold in case the data was corrupted
    111     return xrange(safety_limit(val))
    112 
    113 def write_unicode(file, text):
    114     # Write a byte or unicode string to file. Unicode strings are encoded to
    115     # ENCODING encoding with 'backslashreplace' error handler to avoid
    116     # UnicodeEncodeError.
    117     if isinstance(text, unicode):
    118         text = text.encode(ENCODING, 'backslashreplace')
    119     file.write(text)
    120 
    121 def os_fsencode(filename):
    122     if not isinstance(filename, unicode):
    123         return filename
    124     encoding = sys.getfilesystemencoding()
    125     if encoding == 'mbcs':
    126         # mbcs doesn't support surrogateescape
    127         return filename.encode(encoding)
    128     encoded = []
    129     for char in filename:
    130         # surrogateescape error handler
    131         if 0xDC80 <= ord(char) <= 0xDCFF:
    132             byte = chr(ord(char) - 0xDC00)
    133         else:
    134             byte = char.encode(encoding)
    135         encoded.append(byte)
    136     return ''.join(encoded)
    137 
    138 class StringTruncated(RuntimeError):
    139     pass
    140 
    141 class TruncatedStringIO(object):
    142     '''Similar to cStringIO, but can truncate the output by raising a
    143     StringTruncated exception'''
    144     def __init__(self, maxlen=None):
    145         self._val = ''
    146         self.maxlen = maxlen
    147 
    148     def write(self, data):
    149         if self.maxlen:
    150             if len(data) + len(self._val) > self.maxlen:
    151                 # Truncation:
    152                 self._val += data[0:self.maxlen - len(self._val)]
    153                 raise StringTruncated()
    154 
    155         self._val += data
    156 
    157     def getvalue(self):
    158         return self._val
    159 
    160 
    161 # pretty printer lookup
    162 all_pretty_typenames = set()
    163 
    164 class PrettyPrinterTrackerMeta(type):
    165 
    166     def __init__(self, name, bases, dict):
    167         super(PrettyPrinterTrackerMeta, self).__init__(name, bases, dict)
    168         all_pretty_typenames.add(self._typename)
    169 
    170 
    171 class PyObjectPtr(object):
    172     """
    173     Class wrapping a gdb.Value that's a either a (PyObject*) within the
    174     inferior process, or some subclass pointer e.g. (PyStringObject*)
    175 
    176     There will be a subclass for every refined PyObject type that we care
    177     about.
    178 
    179     Note that at every stage the underlying pointer could be NULL, point
    180     to corrupt data, etc; this is the debugger, after all.
    181     """
    182 
    183     __metaclass__ = PrettyPrinterTrackerMeta
    184 
    185     _typename = 'PyObject'
    186 
    187     def __init__(self, gdbval, cast_to=None):
    188         if cast_to:
    189             self._gdbval = gdbval.cast(cast_to)
    190         else:
    191             self._gdbval = gdbval
    192 
    193     def field(self, name):
    194         '''
    195         Get the gdb.Value for the given field within the PyObject, coping with
    196         some python 2 versus python 3 differences.
    197 
    198         Various libpython types are defined using the "PyObject_HEAD" and
    199         "PyObject_VAR_HEAD" macros.
    200 
    201         In Python 2, this these are defined so that "ob_type" and (for a var
    202         object) "ob_size" are fields of the type in question.
    203 
    204         In Python 3, this is defined as an embedded PyVarObject type thus:
    205            PyVarObject ob_base;
    206         so that the "ob_size" field is located insize the "ob_base" field, and
    207         the "ob_type" is most easily accessed by casting back to a (PyObject*).
    208         '''
    209         if self.is_null():
    210             raise NullPyObjectPtr(self)
    211 
    212         if name == 'ob_type':
    213             pyo_ptr = self._gdbval.cast(PyObjectPtr.get_gdb_type())
    214             return pyo_ptr.dereference()[name]
    215 
    216         if name == 'ob_size':
    217             pyo_ptr = self._gdbval.cast(PyVarObjectPtr.get_gdb_type())
    218             return pyo_ptr.dereference()[name]
    219 
    220         # General case: look it up inside the object:
    221         return self._gdbval.dereference()[name]
    222 
    223     def pyop_field(self, name):
    224         '''
    225         Get a PyObjectPtr for the given PyObject* field within this PyObject,
    226         coping with some python 2 versus python 3 differences.
    227         '''
    228         return PyObjectPtr.from_pyobject_ptr(self.field(name))
    229 
    230     def write_field_repr(self, name, out, visited):
    231         '''
    232         Extract the PyObject* field named "name", and write its representation
    233         to file-like object "out"
    234         '''
    235         field_obj = self.pyop_field(name)
    236         field_obj.write_repr(out, visited)
    237 
    238     def get_truncated_repr(self, maxlen):
    239         '''
    240         Get a repr-like string for the data, but truncate it at "maxlen" bytes
    241         (ending the object graph traversal as soon as you do)
    242         '''
    243         out = TruncatedStringIO(maxlen)
    244         try:
    245             self.write_repr(out, set())
    246         except StringTruncated:
    247             # Truncation occurred:
    248             return out.getvalue() + '...(truncated)'
    249 
    250         # No truncation occurred:
    251         return out.getvalue()
    252 
    253     def type(self):
    254         return PyTypeObjectPtr(self.field('ob_type'))
    255 
    256     def is_null(self):
    257         return 0 == long(self._gdbval)
    258 
    259     def is_optimized_out(self):
    260         '''
    261         Is the value of the underlying PyObject* visible to the debugger?
    262 
    263         This can vary with the precise version of the compiler used to build
    264         Python, and the precise version of gdb.
    265 
    266         See e.g. https://bugzilla.redhat.com/show_bug.cgi?id=556975 with
    267         PyEval_EvalFrameEx's "f"
    268         '''
    269         return self._gdbval.is_optimized_out
    270 
    271     def safe_tp_name(self):
    272         try:
    273             return self.type().field('tp_name').string()
    274         except NullPyObjectPtr:
    275             # NULL tp_name?
    276             return 'unknown'
    277         except RuntimeError:
    278             # Can't even read the object at all?
    279             return 'unknown'
    280 
    281     def proxyval(self, visited):
    282         '''
    283         Scrape a value from the inferior process, and try to represent it
    284         within the gdb process, whilst (hopefully) avoiding crashes when
    285         the remote data is corrupt.
    286 
    287         Derived classes will override this.
    288 
    289         For example, a PyIntObject* with ob_ival 42 in the inferior process
    290         should result in an int(42) in this process.
    291 
    292         visited: a set of all gdb.Value pyobject pointers already visited
    293         whilst generating this value (to guard against infinite recursion when
    294         visiting object graphs with loops).  Analogous to Py_ReprEnter and
    295         Py_ReprLeave
    296         '''
    297 
    298         class FakeRepr(object):
    299             """
    300             Class representing a non-descript PyObject* value in the inferior
    301             process for when we don't have a custom scraper, intended to have
    302             a sane repr().
    303             """
    304 
    305             def __init__(self, tp_name, address):
    306                 self.tp_name = tp_name
    307                 self.address = address
    308 
    309             def __repr__(self):
    310                 # For the NULL pointer, we have no way of knowing a type, so
    311                 # special-case it as per
    312                 # http://bugs.python.org/issue8032#msg100882
    313                 if self.address == 0:
    314                     return '0x0'
    315                 return '<%s at remote 0x%x>' % (self.tp_name, self.address)
    316 
    317         return FakeRepr(self.safe_tp_name(),
    318                         long(self._gdbval))
    319 
    320     def write_repr(self, out, visited):
    321         '''
    322         Write a string representation of the value scraped from the inferior
    323         process to "out", a file-like object.
    324         '''
    325         # Default implementation: generate a proxy value and write its repr
    326         # However, this could involve a lot of work for complicated objects,
    327         # so for derived classes we specialize this
    328         return out.write(repr(self.proxyval(visited)))
    329 
    330     @classmethod
    331     def subclass_from_type(cls, t):
    332         '''
    333         Given a PyTypeObjectPtr instance wrapping a gdb.Value that's a
    334         (PyTypeObject*), determine the corresponding subclass of PyObjectPtr
    335         to use
    336 
    337         Ideally, we would look up the symbols for the global types, but that
    338         isn't working yet:
    339           (gdb) python print gdb.lookup_symbol('PyList_Type')[0].value
    340           Traceback (most recent call last):
    341             File "<string>", line 1, in <module>
    342           NotImplementedError: Symbol type not yet supported in Python scripts.
    343           Error while executing Python code.
    344 
    345         For now, we use tp_flags, after doing some string comparisons on the
    346         tp_name for some special-cases that don't seem to be visible through
    347         flags
    348         '''
    349         try:
    350             tp_name = t.field('tp_name').string()
    351             tp_flags = int(t.field('tp_flags'))
    352         except RuntimeError:
    353             # Handle any kind of error e.g. NULL ptrs by simply using the base
    354             # class
    355             return cls
    356 
    357         #print 'tp_flags = 0x%08x' % tp_flags
    358         #print 'tp_name = %r' % tp_name
    359 
    360         name_map = {'bool': PyBoolObjectPtr,
    361                     'classobj': PyClassObjectPtr,
    362                     'instance': PyInstanceObjectPtr,
    363                     'NoneType': PyNoneStructPtr,
    364                     'frame': PyFrameObjectPtr,
    365                     'set' : PySetObjectPtr,
    366                     'frozenset' : PySetObjectPtr,
    367                     'builtin_function_or_method' : PyCFunctionObjectPtr,
    368                     }
    369         if tp_name in name_map:
    370             return name_map[tp_name]
    371 
    372         if tp_flags & (Py_TPFLAGS_HEAPTYPE|Py_TPFLAGS_TYPE_SUBCLASS):
    373             return PyTypeObjectPtr
    374 
    375         if tp_flags & Py_TPFLAGS_INT_SUBCLASS:
    376             return PyIntObjectPtr
    377         if tp_flags & Py_TPFLAGS_LONG_SUBCLASS:
    378             return PyLongObjectPtr
    379         if tp_flags & Py_TPFLAGS_LIST_SUBCLASS:
    380             return PyListObjectPtr
    381         if tp_flags & Py_TPFLAGS_TUPLE_SUBCLASS:
    382             return PyTupleObjectPtr
    383         if tp_flags & Py_TPFLAGS_STRING_SUBCLASS:
    384             try:
    385                 gdb.lookup_type('PyBytesObject')
    386                 return PyBytesObjectPtr
    387             except RuntimeError:
    388                 return PyStringObjectPtr
    389         if tp_flags & Py_TPFLAGS_UNICODE_SUBCLASS:
    390             return PyUnicodeObjectPtr
    391         if tp_flags & Py_TPFLAGS_DICT_SUBCLASS:
    392             return PyDictObjectPtr
    393         if tp_flags & Py_TPFLAGS_BASE_EXC_SUBCLASS:
    394             return PyBaseExceptionObjectPtr
    395 
    396         # Use the base class:
    397         return cls
    398 
    399     @classmethod
    400     def from_pyobject_ptr(cls, gdbval):
    401         '''
    402         Try to locate the appropriate derived class dynamically, and cast
    403         the pointer accordingly.
    404         '''
    405         try:
    406             p = PyObjectPtr(gdbval)
    407             cls = cls.subclass_from_type(p.type())
    408             return cls(gdbval, cast_to=cls.get_gdb_type())
    409         except RuntimeError, exc:
    410             # Handle any kind of error e.g. NULL ptrs by simply using the base
    411             # class
    412             pass
    413         return cls(gdbval)
    414 
    415     @classmethod
    416     def get_gdb_type(cls):
    417         return gdb.lookup_type(cls._typename).pointer()
    418 
    419     def as_address(self):
    420         return long(self._gdbval)
    421 
    422 
    423 class PyVarObjectPtr(PyObjectPtr):
    424     _typename = 'PyVarObject'
    425 
    426 class ProxyAlreadyVisited(object):
    427     '''
    428     Placeholder proxy to use when protecting against infinite recursion due to
    429     loops in the object graph.
    430 
    431     Analogous to the values emitted by the users of Py_ReprEnter and Py_ReprLeave
    432     '''
    433     def __init__(self, rep):
    434         self._rep = rep
    435 
    436     def __repr__(self):
    437         return self._rep
    438 
    439 
    440 def _write_instance_repr(out, visited, name, pyop_attrdict, address):
    441     '''Shared code for use by old-style and new-style classes:
    442     write a representation to file-like object "out"'''
    443     out.write('<')
    444     out.write(name)
    445 
    446     # Write dictionary of instance attributes:
    447     if isinstance(pyop_attrdict, PyDictObjectPtr):
    448         out.write('(')
    449         first = True
    450         for pyop_arg, pyop_val in pyop_attrdict.iteritems():
    451             if not first:
    452                 out.write(', ')
    453             first = False
    454             out.write(pyop_arg.proxyval(visited))
    455             out.write('=')
    456             pyop_val.write_repr(out, visited)
    457         out.write(')')
    458     out.write(' at remote 0x%x>' % address)
    459 
    460 
    461 class InstanceProxy(object):
    462 
    463     def __init__(self, cl_name, attrdict, address):
    464         self.cl_name = cl_name
    465         self.attrdict = attrdict
    466         self.address = address
    467 
    468     def __repr__(self):
    469         if isinstance(self.attrdict, dict):
    470             kwargs = ', '.join(["%s=%r" % (arg, val)
    471                                 for arg, val in self.attrdict.iteritems()])
    472             return '<%s(%s) at remote 0x%x>' % (self.cl_name,
    473                                                 kwargs, self.address)
    474         else:
    475             return '<%s at remote 0x%x>' % (self.cl_name,
    476                                             self.address)
    477 
    478 def _PyObject_VAR_SIZE(typeobj, nitems):
    479     return ( ( typeobj.field('tp_basicsize') +
    480                nitems * typeobj.field('tp_itemsize') +
    481                (SIZEOF_VOID_P - 1)
    482              ) & ~(SIZEOF_VOID_P - 1)
    483            ).cast(gdb.lookup_type('size_t'))
    484 
    485 class PyTypeObjectPtr(PyObjectPtr):
    486     _typename = 'PyTypeObject'
    487 
    488     def get_attr_dict(self):
    489         '''
    490         Get the PyDictObject ptr representing the attribute dictionary
    491         (or None if there's a problem)
    492         '''
    493         try:
    494             typeobj = self.type()
    495             dictoffset = int_from_int(typeobj.field('tp_dictoffset'))
    496             if dictoffset != 0:
    497                 if dictoffset < 0:
    498                     type_PyVarObject_ptr = gdb.lookup_type('PyVarObject').pointer()
    499                     tsize = int_from_int(self._gdbval.cast(type_PyVarObject_ptr)['ob_size'])
    500                     if tsize < 0:
    501                         tsize = -tsize
    502                     size = _PyObject_VAR_SIZE(typeobj, tsize)
    503                     dictoffset += size
    504                     assert dictoffset > 0
    505                     assert dictoffset % SIZEOF_VOID_P == 0
    506 
    507                 dictptr = self._gdbval.cast(_type_char_ptr) + dictoffset
    508                 PyObjectPtrPtr = PyObjectPtr.get_gdb_type().pointer()
    509                 dictptr = dictptr.cast(PyObjectPtrPtr)
    510                 return PyObjectPtr.from_pyobject_ptr(dictptr.dereference())
    511         except RuntimeError:
    512             # Corrupt data somewhere; fail safe
    513             pass
    514 
    515         # Not found, or some kind of error:
    516         return None
    517 
    518     def proxyval(self, visited):
    519         '''
    520         Support for new-style classes.
    521 
    522         Currently we just locate the dictionary using a transliteration to
    523         python of _PyObject_GetDictPtr, ignoring descriptors
    524         '''
    525         # Guard against infinite loops:
    526         if self.as_address() in visited:
    527             return ProxyAlreadyVisited('<...>')
    528         visited.add(self.as_address())
    529 
    530         pyop_attr_dict = self.get_attr_dict()
    531         if pyop_attr_dict:
    532             attr_dict = pyop_attr_dict.proxyval(visited)
    533         else:
    534             attr_dict = {}
    535         tp_name = self.safe_tp_name()
    536 
    537         # New-style class:
    538         return InstanceProxy(tp_name, attr_dict, long(self._gdbval))
    539 
    540     def write_repr(self, out, visited):
    541         # Guard against infinite loops:
    542         if self.as_address() in visited:
    543             out.write('<...>')
    544             return
    545         visited.add(self.as_address())
    546 
    547         try:
    548             tp_name = self.field('tp_name').string()
    549         except RuntimeError:
    550             tp_name = 'unknown'
    551 
    552         out.write('<type %s at remote 0x%x>' % (tp_name,
    553                                                 self.as_address()))
    554         # pyop_attrdict = self.get_attr_dict()
    555         # _write_instance_repr(out, visited,
    556                              # self.safe_tp_name(), pyop_attrdict, self.as_address())
    557 
    558 class ProxyException(Exception):
    559     def __init__(self, tp_name, args):
    560         self.tp_name = tp_name
    561         self.args = args
    562 
    563     def __repr__(self):
    564         return '%s%r' % (self.tp_name, self.args)
    565 
    566 class PyBaseExceptionObjectPtr(PyObjectPtr):
    567     """
    568     Class wrapping a gdb.Value that's a PyBaseExceptionObject* i.e. an exception
    569     within the process being debugged.
    570     """
    571     _typename = 'PyBaseExceptionObject'
    572 
    573     def proxyval(self, visited):
    574         # Guard against infinite loops:
    575         if self.as_address() in visited:
    576             return ProxyAlreadyVisited('(...)')
    577         visited.add(self.as_address())
    578         arg_proxy = self.pyop_field('args').proxyval(visited)
    579         return ProxyException(self.safe_tp_name(),
    580                               arg_proxy)
    581 
    582     def write_repr(self, out, visited):
    583         # Guard against infinite loops:
    584         if self.as_address() in visited:
    585             out.write('(...)')
    586             return
    587         visited.add(self.as_address())
    588 
    589         out.write(self.safe_tp_name())
    590         self.write_field_repr('args', out, visited)
    591 
    592 
    593 class PyClassObjectPtr(PyObjectPtr):
    594     """
    595     Class wrapping a gdb.Value that's a PyClassObject* i.e. a <classobj>
    596     instance within the process being debugged.
    597     """
    598     _typename = 'PyClassObject'
    599 
    600 
    601 class BuiltInFunctionProxy(object):
    602     def __init__(self, ml_name):
    603         self.ml_name = ml_name
    604 
    605     def __repr__(self):
    606         return "<built-in function %s>" % self.ml_name
    607 
    608 class BuiltInMethodProxy(object):
    609     def __init__(self, ml_name, pyop_m_self):
    610         self.ml_name = ml_name
    611         self.pyop_m_self = pyop_m_self
    612 
    613     def __repr__(self):
    614         return ('<built-in method %s of %s object at remote 0x%x>'
    615                 % (self.ml_name,
    616                    self.pyop_m_self.safe_tp_name(),
    617                    self.pyop_m_self.as_address())
    618                 )
    619 
    620 class PyCFunctionObjectPtr(PyObjectPtr):
    621     """
    622     Class wrapping a gdb.Value that's a PyCFunctionObject*
    623     (see Include/methodobject.h and Objects/methodobject.c)
    624     """
    625     _typename = 'PyCFunctionObject'
    626 
    627     def proxyval(self, visited):
    628         m_ml = self.field('m_ml') # m_ml is a (PyMethodDef*)
    629         ml_name = m_ml['ml_name'].string()
    630 
    631         pyop_m_self = self.pyop_field('m_self')
    632         if pyop_m_self.is_null():
    633             return BuiltInFunctionProxy(ml_name)
    634         else:
    635             return BuiltInMethodProxy(ml_name, pyop_m_self)
    636 
    637 
    638 class PyCodeObjectPtr(PyObjectPtr):
    639     """
    640     Class wrapping a gdb.Value that's a PyCodeObject* i.e. a <code> instance
    641     within the process being debugged.
    642     """
    643     _typename = 'PyCodeObject'
    644 
    645     def addr2line(self, addrq):
    646         '''
    647         Get the line number for a given bytecode offset
    648 
    649         Analogous to PyCode_Addr2Line; translated from pseudocode in
    650         Objects/lnotab_notes.txt
    651         '''
    652         co_lnotab = self.pyop_field('co_lnotab').proxyval(set())
    653 
    654         # Initialize lineno to co_firstlineno as per PyCode_Addr2Line
    655         # not 0, as lnotab_notes.txt has it:
    656         lineno = int_from_int(self.field('co_firstlineno'))
    657 
    658         addr = 0
    659         for addr_incr, line_incr in zip(co_lnotab[::2], co_lnotab[1::2]):
    660             addr += ord(addr_incr)
    661             if addr > addrq:
    662                 return lineno
    663             lineno += ord(line_incr)
    664         return lineno
    665 
    666 
    667 class PyDictObjectPtr(PyObjectPtr):
    668     """
    669     Class wrapping a gdb.Value that's a PyDictObject* i.e. a dict instance
    670     within the process being debugged.
    671     """
    672     _typename = 'PyDictObject'
    673 
    674     def iteritems(self):
    675         '''
    676         Yields a sequence of (PyObjectPtr key, PyObjectPtr value) pairs,
    677         analagous to dict.iteritems()
    678         '''
    679         for i in safe_range(self.field('ma_mask') + 1):
    680             ep = self.field('ma_table') + i
    681             pyop_value = PyObjectPtr.from_pyobject_ptr(ep['me_value'])
    682             if not pyop_value.is_null():
    683                 pyop_key = PyObjectPtr.from_pyobject_ptr(ep['me_key'])
    684                 yield (pyop_key, pyop_value)
    685 
    686     def proxyval(self, visited):
    687         # Guard against infinite loops:
    688         if self.as_address() in visited:
    689             return ProxyAlreadyVisited('{...}')
    690         visited.add(self.as_address())
    691 
    692         result = {}
    693         for pyop_key, pyop_value in self.iteritems():
    694             proxy_key = pyop_key.proxyval(visited)
    695             proxy_value = pyop_value.proxyval(visited)
    696             result[proxy_key] = proxy_value
    697         return result
    698 
    699     def write_repr(self, out, visited):
    700         # Guard against infinite loops:
    701         if self.as_address() in visited:
    702             out.write('{...}')
    703             return
    704         visited.add(self.as_address())
    705 
    706         out.write('{')
    707         first = True
    708         for pyop_key, pyop_value in self.iteritems():
    709             if not first:
    710                 out.write(', ')
    711             first = False
    712             pyop_key.write_repr(out, visited)
    713             out.write(': ')
    714             pyop_value.write_repr(out, visited)
    715         out.write('}')
    716 
    717 class PyInstanceObjectPtr(PyObjectPtr):
    718     _typename = 'PyInstanceObject'
    719 
    720     def proxyval(self, visited):
    721         # Guard against infinite loops:
    722         if self.as_address() in visited:
    723             return ProxyAlreadyVisited('<...>')
    724         visited.add(self.as_address())
    725 
    726         # Get name of class:
    727         in_class = self.pyop_field('in_class')
    728         cl_name = in_class.pyop_field('cl_name').proxyval(visited)
    729 
    730         # Get dictionary of instance attributes:
    731         in_dict = self.pyop_field('in_dict').proxyval(visited)
    732 
    733         # Old-style class:
    734         return InstanceProxy(cl_name, in_dict, long(self._gdbval))
    735 
    736     def write_repr(self, out, visited):
    737         # Guard against infinite loops:
    738         if self.as_address() in visited:
    739             out.write('<...>')
    740             return
    741         visited.add(self.as_address())
    742 
    743         # Old-style class:
    744 
    745         # Get name of class:
    746         in_class = self.pyop_field('in_class')
    747         cl_name = in_class.pyop_field('cl_name').proxyval(visited)
    748 
    749         # Get dictionary of instance attributes:
    750         pyop_in_dict = self.pyop_field('in_dict')
    751 
    752         _write_instance_repr(out, visited,
    753                              cl_name, pyop_in_dict, self.as_address())
    754 
    755 class PyIntObjectPtr(PyObjectPtr):
    756     _typename = 'PyIntObject'
    757 
    758     def proxyval(self, visited):
    759         result = int_from_int(self.field('ob_ival'))
    760         return result
    761 
    762 class PyListObjectPtr(PyObjectPtr):
    763     _typename = 'PyListObject'
    764 
    765     def __getitem__(self, i):
    766         # Get the gdb.Value for the (PyObject*) with the given index:
    767         field_ob_item = self.field('ob_item')
    768         return field_ob_item[i]
    769 
    770     def proxyval(self, visited):
    771         # Guard against infinite loops:
    772         if self.as_address() in visited:
    773             return ProxyAlreadyVisited('[...]')
    774         visited.add(self.as_address())
    775 
    776         result = [PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited)
    777                   for i in safe_range(int_from_int(self.field('ob_size')))]
    778         return result
    779 
    780     def write_repr(self, out, visited):
    781         # Guard against infinite loops:
    782         if self.as_address() in visited:
    783             out.write('[...]')
    784             return
    785         visited.add(self.as_address())
    786 
    787         out.write('[')
    788         for i in safe_range(int_from_int(self.field('ob_size'))):
    789             if i > 0:
    790                 out.write(', ')
    791             element = PyObjectPtr.from_pyobject_ptr(self[i])
    792             element.write_repr(out, visited)
    793         out.write(']')
    794 
    795 class PyLongObjectPtr(PyObjectPtr):
    796     _typename = 'PyLongObject'
    797 
    798     def proxyval(self, visited):
    799         '''
    800         Python's Include/longobjrep.h has this declaration:
    801            struct _longobject {
    802                PyObject_VAR_HEAD
    803                digit ob_digit[1];
    804            };
    805 
    806         with this description:
    807             The absolute value of a number is equal to
    808                  SUM(for i=0 through abs(ob_size)-1) ob_digit[i] * 2**(SHIFT*i)
    809             Negative numbers are represented with ob_size < 0;
    810             zero is represented by ob_size == 0.
    811 
    812         where SHIFT can be either:
    813             #define PyLong_SHIFT        30
    814             #define PyLong_SHIFT        15
    815         '''
    816         ob_size = long(self.field('ob_size'))
    817         if ob_size == 0:
    818             return 0L
    819 
    820         ob_digit = self.field('ob_digit')
    821 
    822         if gdb.lookup_type('digit').sizeof == 2:
    823             SHIFT = 15L
    824         else:
    825             SHIFT = 30L
    826 
    827         digits = [long(ob_digit[i]) * 2**(SHIFT*i)
    828                   for i in safe_range(abs(ob_size))]
    829         result = sum(digits)
    830         if ob_size < 0:
    831             result = -result
    832         return result
    833 
    834     def write_repr(self, out, visited):
    835         # Write this out as a Python 3 int literal, i.e. without the "L" suffix
    836         proxy = self.proxyval(visited)
    837         out.write("%s" % proxy)
    838 
    839 
    840 class PyBoolObjectPtr(PyLongObjectPtr):
    841     """
    842     Class wrapping a gdb.Value that's a PyBoolObject* i.e. one of the two
    843     <bool> instances (Py_True/Py_False) within the process being debugged.
    844     """
    845     _typename = 'PyBoolObject'
    846 
    847     def proxyval(self, visited):
    848         castto = gdb.lookup_type('PyLongObject').pointer()
    849         self._gdbval = self._gdbval.cast(castto)
    850         return bool(PyLongObjectPtr(self._gdbval).proxyval(visited))
    851 
    852 
    853 class PyNoneStructPtr(PyObjectPtr):
    854     """
    855     Class wrapping a gdb.Value that's a PyObject* pointing to the
    856     singleton (we hope) _Py_NoneStruct with ob_type PyNone_Type
    857     """
    858     _typename = 'PyObject'
    859 
    860     def proxyval(self, visited):
    861         return None
    862 
    863 
    864 class PyFrameObjectPtr(PyObjectPtr):
    865     _typename = 'PyFrameObject'
    866 
    867     def __init__(self, gdbval, cast_to=None):
    868         PyObjectPtr.__init__(self, gdbval, cast_to)
    869 
    870         if not self.is_optimized_out():
    871             self.co = PyCodeObjectPtr.from_pyobject_ptr(self.field('f_code'))
    872             self.co_name = self.co.pyop_field('co_name')
    873             self.co_filename = self.co.pyop_field('co_filename')
    874 
    875             self.f_lineno = int_from_int(self.field('f_lineno'))
    876             self.f_lasti = int_from_int(self.field('f_lasti'))
    877             self.co_nlocals = int_from_int(self.co.field('co_nlocals'))
    878             self.co_varnames = PyTupleObjectPtr.from_pyobject_ptr(self.co.field('co_varnames'))
    879 
    880     def iter_locals(self):
    881         '''
    882         Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
    883         the local variables of this frame
    884         '''
    885         if self.is_optimized_out():
    886             return
    887 
    888         f_localsplus = self.field('f_localsplus')
    889         for i in safe_range(self.co_nlocals):
    890             pyop_value = PyObjectPtr.from_pyobject_ptr(f_localsplus[i])
    891             if not pyop_value.is_null():
    892                 pyop_name = PyObjectPtr.from_pyobject_ptr(self.co_varnames[i])
    893                 yield (pyop_name, pyop_value)
    894 
    895     def iter_globals(self):
    896         '''
    897         Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
    898         the global variables of this frame
    899         '''
    900         if self.is_optimized_out():
    901             return
    902 
    903         pyop_globals = self.pyop_field('f_globals')
    904         return pyop_globals.iteritems()
    905 
    906     def iter_builtins(self):
    907         '''
    908         Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
    909         the builtin variables
    910         '''
    911         if self.is_optimized_out():
    912             return
    913 
    914         pyop_builtins = self.pyop_field('f_builtins')
    915         return pyop_builtins.iteritems()
    916 
    917     def get_var_by_name(self, name):
    918         '''
    919         Look for the named local variable, returning a (PyObjectPtr, scope) pair
    920         where scope is a string 'local', 'global', 'builtin'
    921 
    922         If not found, return (None, None)
    923         '''
    924         for pyop_name, pyop_value in self.iter_locals():
    925             if name == pyop_name.proxyval(set()):
    926                 return pyop_value, 'local'
    927         for pyop_name, pyop_value in self.iter_globals():
    928             if name == pyop_name.proxyval(set()):
    929                 return pyop_value, 'global'
    930         for pyop_name, pyop_value in self.iter_builtins():
    931             if name == pyop_name.proxyval(set()):
    932                 return pyop_value, 'builtin'
    933         return None, None
    934 
    935     def filename(self):
    936         '''Get the path of the current Python source file, as a string'''
    937         if self.is_optimized_out():
    938             return '(frame information optimized out)'
    939         return self.co_filename.proxyval(set())
    940 
    941     def current_line_num(self):
    942         '''Get current line number as an integer (1-based)
    943 
    944         Translated from PyFrame_GetLineNumber and PyCode_Addr2Line
    945 
    946         See Objects/lnotab_notes.txt
    947         '''
    948         if self.is_optimized_out():
    949             return None
    950         f_trace = self.field('f_trace')
    951         if long(f_trace) != 0:
    952             # we have a non-NULL f_trace:
    953             return self.f_lineno
    954         else:
    955             #try:
    956             return self.co.addr2line(self.f_lasti)
    957             #except ValueError:
    958             #    return self.f_lineno
    959 
    960     def current_line(self):
    961         '''Get the text of the current source line as a string, with a trailing
    962         newline character'''
    963         if self.is_optimized_out():
    964             return '(frame information optimized out)'
    965         filename = self.filename()
    966         with open(os_fsencode(filename), 'r') as f:
    967             all_lines = f.readlines()
    968             # Convert from 1-based current_line_num to 0-based list offset:
    969             return all_lines[self.current_line_num()-1]
    970 
    971     def write_repr(self, out, visited):
    972         if self.is_optimized_out():
    973             out.write('(frame information optimized out)')
    974             return
    975         out.write('Frame 0x%x, for file %s, line %i, in %s ('
    976                   % (self.as_address(),
    977                      self.co_filename.proxyval(visited),
    978                      self.current_line_num(),
    979                      self.co_name.proxyval(visited)))
    980         first = True
    981         for pyop_name, pyop_value in self.iter_locals():
    982             if not first:
    983                 out.write(', ')
    984             first = False
    985 
    986             out.write(pyop_name.proxyval(visited))
    987             out.write('=')
    988             pyop_value.write_repr(out, visited)
    989 
    990         out.write(')')
    991 
    992 class PySetObjectPtr(PyObjectPtr):
    993     _typename = 'PySetObject'
    994 
    995     def proxyval(self, visited):
    996         # Guard against infinite loops:
    997         if self.as_address() in visited:
    998             return ProxyAlreadyVisited('%s(...)' % self.safe_tp_name())
    999         visited.add(self.as_address())
   1000 
   1001         members = []
   1002         table = self.field('table')
   1003         for i in safe_range(self.field('mask')+1):
   1004             setentry = table[i]
   1005             key = setentry['key']
   1006             if key != 0:
   1007                 key_proxy = PyObjectPtr.from_pyobject_ptr(key).proxyval(visited)
   1008                 if key_proxy != '<dummy key>':
   1009                     members.append(key_proxy)
   1010         if self.safe_tp_name() == 'frozenset':
   1011             return frozenset(members)
   1012         else:
   1013             return set(members)
   1014 
   1015     def write_repr(self, out, visited):
   1016         # Emulate Python 3's set_repr
   1017         tp_name = self.safe_tp_name()
   1018 
   1019         # Guard against infinite loops:
   1020         if self.as_address() in visited:
   1021             out.write('(...)')
   1022             return
   1023         visited.add(self.as_address())
   1024 
   1025         # Python 3's set_repr special-cases the empty set:
   1026         if not self.field('used'):
   1027             out.write(tp_name)
   1028             out.write('()')
   1029             return
   1030 
   1031         # Python 3 uses {} for set literals:
   1032         if tp_name != 'set':
   1033             out.write(tp_name)
   1034             out.write('(')
   1035 
   1036         out.write('{')
   1037         first = True
   1038         table = self.field('table')
   1039         for i in safe_range(self.field('mask')+1):
   1040             setentry = table[i]
   1041             key = setentry['key']
   1042             if key != 0:
   1043                 pyop_key = PyObjectPtr.from_pyobject_ptr(key)
   1044                 key_proxy = pyop_key.proxyval(visited) # FIXME!
   1045                 if key_proxy != '<dummy key>':
   1046                     if not first:
   1047                         out.write(', ')
   1048                     first = False
   1049                     pyop_key.write_repr(out, visited)
   1050         out.write('}')
   1051 
   1052         if tp_name != 'set':
   1053             out.write(')')
   1054 
   1055 
   1056 class PyBytesObjectPtr(PyObjectPtr):
   1057     _typename = 'PyBytesObject'
   1058 
   1059     def __str__(self):
   1060         field_ob_size = self.field('ob_size')
   1061         field_ob_sval = self.field('ob_sval')
   1062         return ''.join(struct.pack('b', field_ob_sval[i])
   1063                            for i in safe_range(field_ob_size))
   1064 
   1065     def proxyval(self, visited):
   1066         return str(self)
   1067 
   1068     def write_repr(self, out, visited, py3=True):
   1069         # Write this out as a Python 3 bytes literal, i.e. with a "b" prefix
   1070 
   1071         # Get a PyStringObject* within the Python 2 gdb process:
   1072         proxy = self.proxyval(visited)
   1073 
   1074         # Transliteration of Python 3's Objects/bytesobject.c:PyBytes_Repr
   1075         # to Python 2 code:
   1076         quote = "'"
   1077         if "'" in proxy and not '"' in proxy:
   1078             quote = '"'
   1079 
   1080         if py3:
   1081             out.write('b')
   1082 
   1083         out.write(quote)
   1084         for byte in proxy:
   1085             if byte == quote or byte == '\\':
   1086                 out.write('\\')
   1087                 out.write(byte)
   1088             elif byte == '\t':
   1089                 out.write('\\t')
   1090             elif byte == '\n':
   1091                 out.write('\\n')
   1092             elif byte == '\r':
   1093                 out.write('\\r')
   1094             elif byte < ' ' or ord(byte) >= 0x7f:
   1095                 out.write('\\x')
   1096                 out.write(hexdigits[(ord(byte) & 0xf0) >> 4])
   1097                 out.write(hexdigits[ord(byte) & 0xf])
   1098             else:
   1099                 out.write(byte)
   1100         out.write(quote)
   1101 
   1102 class PyStringObjectPtr(PyBytesObjectPtr):
   1103     _typename = 'PyStringObject'
   1104 
   1105     def write_repr(self, out, visited):
   1106         return super(PyStringObjectPtr, self).write_repr(out, visited, py3=False)
   1107 
   1108 class PyTupleObjectPtr(PyObjectPtr):
   1109     _typename = 'PyTupleObject'
   1110 
   1111     def __getitem__(self, i):
   1112         # Get the gdb.Value for the (PyObject*) with the given index:
   1113         field_ob_item = self.field('ob_item')
   1114         return field_ob_item[i]
   1115 
   1116     def proxyval(self, visited):
   1117         # Guard against infinite loops:
   1118         if self.as_address() in visited:
   1119             return ProxyAlreadyVisited('(...)')
   1120         visited.add(self.as_address())
   1121 
   1122         result = tuple([PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited)
   1123                         for i in safe_range(int_from_int(self.field('ob_size')))])
   1124         return result
   1125 
   1126     def write_repr(self, out, visited):
   1127         # Guard against infinite loops:
   1128         if self.as_address() in visited:
   1129             out.write('(...)')
   1130             return
   1131         visited.add(self.as_address())
   1132 
   1133         out.write('(')
   1134         for i in safe_range(int_from_int(self.field('ob_size'))):
   1135             if i > 0:
   1136                 out.write(', ')
   1137             element = PyObjectPtr.from_pyobject_ptr(self[i])
   1138             element.write_repr(out, visited)
   1139         if self.field('ob_size') == 1:
   1140             out.write(',)')
   1141         else:
   1142             out.write(')')
   1143 
   1144 
   1145 def _unichr_is_printable(char):
   1146     # Logic adapted from Python 3's Tools/unicode/makeunicodedata.py
   1147     if char == u" ":
   1148         return True
   1149     import unicodedata
   1150     return unicodedata.category(char) not in ("C", "Z")
   1151 
   1152 if sys.maxunicode >= 0x10000:
   1153     _unichr = unichr
   1154 else:
   1155     # Needed for proper surrogate support if sizeof(Py_UNICODE) is 2 in gdb
   1156     def _unichr(x):
   1157         if x < 0x10000:
   1158             return unichr(x)
   1159         x -= 0x10000
   1160         ch1 = 0xD800 | (x >> 10)
   1161         ch2 = 0xDC00 | (x & 0x3FF)
   1162         return unichr(ch1) + unichr(ch2)
   1163 
   1164 class PyUnicodeObjectPtr(PyObjectPtr):
   1165     _typename = 'PyUnicodeObject'
   1166 
   1167     def char_width(self):
   1168         _type_Py_UNICODE = gdb.lookup_type('Py_UNICODE')
   1169         return _type_Py_UNICODE.sizeof
   1170 
   1171     def proxyval(self, visited):
   1172         # From unicodeobject.h:
   1173         #     Py_ssize_t length;  /* Length of raw Unicode data in buffer */
   1174         #     Py_UNICODE *str;    /* Raw Unicode buffer */
   1175         field_length = long(self.field('length'))
   1176         field_str = self.field('str')
   1177 
   1178         # Gather a list of ints from the Py_UNICODE array; these are either
   1179         # UCS-2 or UCS-4 code points:
   1180         if self.char_width() > 2:
   1181             Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)]
   1182         else:
   1183             # A more elaborate routine if sizeof(Py_UNICODE) is 2 in the
   1184             # inferior process: we must join surrogate pairs.
   1185             Py_UNICODEs = []
   1186             i = 0
   1187             limit = safety_limit(field_length)
   1188             while i < limit:
   1189                 ucs = int(field_str[i])
   1190                 i += 1
   1191                 if ucs < 0xD800 or ucs >= 0xDC00 or i == field_length:
   1192                     Py_UNICODEs.append(ucs)
   1193                     continue
   1194                 # This could be a surrogate pair.
   1195                 ucs2 = int(field_str[i])
   1196                 if ucs2 < 0xDC00 or ucs2 > 0xDFFF:
   1197                     continue
   1198                 code = (ucs & 0x03FF) << 10
   1199                 code |= ucs2 & 0x03FF
   1200                 code += 0x00010000
   1201                 Py_UNICODEs.append(code)
   1202                 i += 1
   1203 
   1204         # Convert the int code points to unicode characters, and generate a
   1205         # local unicode instance.
   1206         # This splits surrogate pairs if sizeof(Py_UNICODE) is 2 here (in gdb).
   1207         result = u''.join([_unichr(ucs) for ucs in Py_UNICODEs])
   1208         return result
   1209 
   1210     def write_repr(self, out, visited):
   1211         # Get a PyUnicodeObject* within the Python 2 gdb process:
   1212         proxy = self.proxyval(visited)
   1213 
   1214         # Transliteration of Python 3's Object/unicodeobject.c:unicode_repr
   1215         # to Python 2:
   1216         try:
   1217             gdb.parse_and_eval('PyString_Type')
   1218         except RuntimeError:
   1219             # Python 3, don't write 'u' as prefix
   1220             pass
   1221         else:
   1222             # Python 2, write the 'u'
   1223             out.write('u')
   1224 
   1225         if "'" in proxy and '"' not in proxy:
   1226             quote = '"'
   1227         else:
   1228             quote = "'"
   1229         out.write(quote)
   1230 
   1231         i = 0
   1232         while i < len(proxy):
   1233             ch = proxy[i]
   1234             i += 1
   1235 
   1236             # Escape quotes and backslashes
   1237             if ch == quote or ch == '\\':
   1238                 out.write('\\')
   1239                 out.write(ch)
   1240 
   1241             #  Map special whitespace to '\t', \n', '\r'
   1242             elif ch == '\t':
   1243                 out.write('\\t')
   1244             elif ch == '\n':
   1245                 out.write('\\n')
   1246             elif ch == '\r':
   1247                 out.write('\\r')
   1248 
   1249             # Map non-printable US ASCII to '\xhh' */
   1250             elif ch < ' ' or ch == 0x7F:
   1251                 out.write('\\x')
   1252                 out.write(hexdigits[(ord(ch) >> 4) & 0x000F])
   1253                 out.write(hexdigits[ord(ch) & 0x000F])
   1254 
   1255             # Copy ASCII characters as-is
   1256             elif ord(ch) < 0x7F:
   1257                 out.write(ch)
   1258 
   1259             # Non-ASCII characters
   1260             else:
   1261                 ucs = ch
   1262                 ch2 = None
   1263                 if sys.maxunicode < 0x10000:
   1264                     # If sizeof(Py_UNICODE) is 2 here (in gdb), join
   1265                     # surrogate pairs before calling _unichr_is_printable.
   1266                     if (i < len(proxy)
   1267                     and 0xD800 <= ord(ch) < 0xDC00 \
   1268                     and 0xDC00 <= ord(proxy[i]) <= 0xDFFF):
   1269                         ch2 = proxy[i]
   1270                         ucs = ch + ch2
   1271                         i += 1
   1272 
   1273                 # Unfortuately, Python 2's unicode type doesn't seem
   1274                 # to expose the "isprintable" method
   1275                 printable = _unichr_is_printable(ucs)
   1276                 if printable:
   1277                     try:
   1278                         ucs.encode(ENCODING)
   1279                     except UnicodeEncodeError:
   1280                         printable = False
   1281 
   1282                 # Map Unicode whitespace and control characters
   1283                 # (categories Z* and C* except ASCII space)
   1284                 if not printable:
   1285                     if ch2 is not None:
   1286                         # Match Python 3's representation of non-printable
   1287                         # wide characters.
   1288                         code = (ord(ch) & 0x03FF) << 10
   1289                         code |= ord(ch2) & 0x03FF
   1290                         code += 0x00010000
   1291                     else:
   1292                         code = ord(ucs)
   1293 
   1294                     # Map 8-bit characters to '\\xhh'
   1295                     if code <= 0xff:
   1296                         out.write('\\x')
   1297                         out.write(hexdigits[(code >> 4) & 0x000F])
   1298                         out.write(hexdigits[code & 0x000F])
   1299                     # Map 21-bit characters to '\U00xxxxxx'
   1300                     elif code >= 0x10000:
   1301                         out.write('\\U')
   1302                         out.write(hexdigits[(code >> 28) & 0x0000000F])
   1303                         out.write(hexdigits[(code >> 24) & 0x0000000F])
   1304                         out.write(hexdigits[(code >> 20) & 0x0000000F])
   1305                         out.write(hexdigits[(code >> 16) & 0x0000000F])
   1306                         out.write(hexdigits[(code >> 12) & 0x0000000F])
   1307                         out.write(hexdigits[(code >> 8) & 0x0000000F])
   1308                         out.write(hexdigits[(code >> 4) & 0x0000000F])
   1309                         out.write(hexdigits[code & 0x0000000F])
   1310                     # Map 16-bit characters to '\uxxxx'
   1311                     else:
   1312                         out.write('\\u')
   1313                         out.write(hexdigits[(code >> 12) & 0x000F])
   1314                         out.write(hexdigits[(code >> 8) & 0x000F])
   1315                         out.write(hexdigits[(code >> 4) & 0x000F])
   1316                         out.write(hexdigits[code & 0x000F])
   1317                 else:
   1318                     # Copy characters as-is
   1319                     out.write(ch)
   1320                     if ch2 is not None:
   1321                         out.write(ch2)
   1322 
   1323         out.write(quote)
   1324 
   1325     def __unicode__(self):
   1326         return self.proxyval(set())
   1327 
   1328     def __str__(self):
   1329         # In Python 3, everything is unicode (including attributes of e.g.
   1330         # code objects, such as function names). The Python 2 debugger code
   1331         # uses PyUnicodePtr objects to format strings etc, whereas with a
   1332         # Python 2 debuggee we'd get PyStringObjectPtr instances with __str__.
   1333         # Be compatible with that.
   1334         return unicode(self).encode('UTF-8')
   1335 
   1336 def int_from_int(gdbval):
   1337     return int(str(gdbval))
   1338 
   1339 
   1340 def stringify(val):
   1341     # TODO: repr() puts everything on one line; pformat can be nicer, but
   1342     # can lead to v.long results; this function isolates the choice
   1343     if True:
   1344         return repr(val)
   1345     else:
   1346         from pprint import pformat
   1347         return pformat(val)
   1348 
   1349 
   1350 class PyObjectPtrPrinter:
   1351     "Prints a (PyObject*)"
   1352 
   1353     def __init__ (self, gdbval):
   1354         self.gdbval = gdbval
   1355 
   1356     def to_string (self):
   1357         pyop = PyObjectPtr.from_pyobject_ptr(self.gdbval)
   1358         if True:
   1359             return pyop.get_truncated_repr(MAX_OUTPUT_LEN)
   1360         else:
   1361             # Generate full proxy value then stringify it.
   1362             # Doing so could be expensive
   1363             proxyval = pyop.proxyval(set())
   1364             return stringify(proxyval)
   1365 
   1366 def pretty_printer_lookup(gdbval):
   1367     type = gdbval.type.unqualified()
   1368     if type.code == gdb.TYPE_CODE_PTR:
   1369         type = type.target().unqualified()
   1370         if str(type) in all_pretty_typenames:
   1371             return PyObjectPtrPrinter(gdbval)
   1372 
   1373 """
   1374 During development, I've been manually invoking the code in this way:
   1375 (gdb) python
   1376 
   1377 import sys
   1378 sys.path.append('/home/david/coding/python-gdb')
   1379 import libpython
   1380 end
   1381 
   1382 then reloading it after each edit like this:
   1383 (gdb) python reload(libpython)
   1384 
   1385 The following code should ensure that the prettyprinter is registered
   1386 if the code is autoloaded by gdb when visiting libpython.so, provided
   1387 that this python file is installed to the same path as the library (or its
   1388 .debug file) plus a "-gdb.py" suffix, e.g:
   1389   /usr/lib/libpython2.6.so.1.0-gdb.py
   1390   /usr/lib/debug/usr/lib/libpython2.6.so.1.0.debug-gdb.py
   1391 """
   1392 def register (obj):
   1393     if obj == None:
   1394         obj = gdb
   1395 
   1396     # Wire up the pretty-printer
   1397     obj.pretty_printers.append(pretty_printer_lookup)
   1398 
   1399 register (gdb.current_objfile ())
   1400 
   1401 # Unfortunately, the exact API exposed by the gdb module varies somewhat
   1402 # from build to build
   1403 # See http://bugs.python.org/issue8279?#msg102276
   1404 
   1405 class Frame(object):
   1406     '''
   1407     Wrapper for gdb.Frame, adding various methods
   1408     '''
   1409     def __init__(self, gdbframe):
   1410         self._gdbframe = gdbframe
   1411 
   1412     def older(self):
   1413         older = self._gdbframe.older()
   1414         if older:
   1415             return Frame(older)
   1416         else:
   1417             return None
   1418 
   1419     def newer(self):
   1420         newer = self._gdbframe.newer()
   1421         if newer:
   1422             return Frame(newer)
   1423         else:
   1424             return None
   1425 
   1426     def select(self):
   1427         '''If supported, select this frame and return True; return False if unsupported
   1428 
   1429         Not all builds have a gdb.Frame.select method; seems to be present on Fedora 12
   1430         onwards, but absent on Ubuntu buildbot'''
   1431         if not hasattr(self._gdbframe, 'select'):
   1432             print ('Unable to select frame: '
   1433                    'this build of gdb does not expose a gdb.Frame.select method')
   1434             return False
   1435         self._gdbframe.select()
   1436         return True
   1437 
   1438     def get_index(self):
   1439         '''Calculate index of frame, starting at 0 for the newest frame within
   1440         this thread'''
   1441         index = 0
   1442         # Go down until you reach the newest frame:
   1443         iter_frame = self
   1444         while iter_frame.newer():
   1445             index += 1
   1446             iter_frame = iter_frame.newer()
   1447         return index
   1448 
   1449     def is_evalframeex(self):
   1450         '''Is this a PyEval_EvalFrameEx frame?'''
   1451         if self._gdbframe.name() == 'PyEval_EvalFrameEx':
   1452             '''
   1453             I believe we also need to filter on the inline
   1454             struct frame_id.inline_depth, only regarding frames with
   1455             an inline depth of 0 as actually being this function
   1456 
   1457             So we reject those with type gdb.INLINE_FRAME
   1458             '''
   1459             if self._gdbframe.type() == gdb.NORMAL_FRAME:
   1460                 # We have a PyEval_EvalFrameEx frame:
   1461                 return True
   1462 
   1463         return False
   1464 
   1465     def read_var(self, varname):
   1466         """
   1467         read_var with respect to code blocks (gdbframe.read_var works with
   1468         respect to the most recent block)
   1469 
   1470         Apparently this function doesn't work, though, as it seems to read
   1471         variables in other frames also sometimes.
   1472         """
   1473         block = self._gdbframe.block()
   1474         var = None
   1475 
   1476         while block and var is None:
   1477             try:
   1478                 var = self._gdbframe.read_var(varname, block)
   1479             except ValueError:
   1480                 pass
   1481 
   1482             block = block.superblock
   1483 
   1484         return var
   1485 
   1486     def get_pyop(self):
   1487         try:
   1488             # self.read_var does not always work properly, so select our frame
   1489             # and restore the previously selected frame
   1490             selected_frame = gdb.selected_frame()
   1491             self._gdbframe.select()
   1492             f = gdb.parse_and_eval('f')
   1493             selected_frame.select()
   1494         except RuntimeError:
   1495             return None
   1496         else:
   1497             return PyFrameObjectPtr.from_pyobject_ptr(f)
   1498 
   1499     @classmethod
   1500     def get_selected_frame(cls):
   1501         _gdbframe = gdb.selected_frame()
   1502         if _gdbframe:
   1503             return Frame(_gdbframe)
   1504         return None
   1505 
   1506     @classmethod
   1507     def get_selected_python_frame(cls):
   1508         '''Try to obtain the Frame for the python code in the selected frame,
   1509         or None'''
   1510         frame = cls.get_selected_frame()
   1511 
   1512         while frame:
   1513             if frame.is_evalframeex():
   1514                 return frame
   1515             frame = frame.older()
   1516 
   1517         # Not found:
   1518         return None
   1519 
   1520     def print_summary(self):
   1521         if self.is_evalframeex():
   1522             pyop = self.get_pyop()
   1523             if pyop:
   1524                 line = pyop.get_truncated_repr(MAX_OUTPUT_LEN)
   1525                 write_unicode(sys.stdout, '#%i %s\n' % (self.get_index(), line))
   1526                 sys.stdout.write(pyop.current_line())
   1527             else:
   1528                 sys.stdout.write('#%i (unable to read python frame information)\n' % self.get_index())
   1529         else:
   1530             sys.stdout.write('#%i\n' % self.get_index())
   1531 
   1532 class PyList(gdb.Command):
   1533     '''List the current Python source code, if any
   1534 
   1535     Use
   1536        py-list START
   1537     to list at a different line number within the python source.
   1538 
   1539     Use
   1540        py-list START, END
   1541     to list a specific range of lines within the python source.
   1542     '''
   1543 
   1544     def __init__(self):
   1545         gdb.Command.__init__ (self,
   1546                               "py-list",
   1547                               gdb.COMMAND_FILES,
   1548                               gdb.COMPLETE_NONE)
   1549 
   1550 
   1551     def invoke(self, args, from_tty):
   1552         import re
   1553 
   1554         start = None
   1555         end = None
   1556 
   1557         m = re.match(r'\s*(\d+)\s*', args)
   1558         if m:
   1559             start = int(m.group(0))
   1560             end = start + 10
   1561 
   1562         m = re.match(r'\s*(\d+)\s*,\s*(\d+)\s*', args)
   1563         if m:
   1564             start, end = map(int, m.groups())
   1565 
   1566         frame = Frame.get_selected_python_frame()
   1567         if not frame:
   1568             print 'Unable to locate python frame'
   1569             return
   1570 
   1571         pyop = frame.get_pyop()
   1572         if not pyop:
   1573             print 'Unable to read information on python frame'
   1574             return
   1575 
   1576         filename = pyop.filename()
   1577         lineno = pyop.current_line_num()
   1578 
   1579         if start is None:
   1580             start = lineno - 5
   1581             end = lineno + 5
   1582 
   1583         if start<1:
   1584             start = 1
   1585 
   1586         with open(os_fsencode(filename), 'r') as f:
   1587             all_lines = f.readlines()
   1588             # start and end are 1-based, all_lines is 0-based;
   1589             # so [start-1:end] as a python slice gives us [start, end] as a
   1590             # closed interval
   1591             for i, line in enumerate(all_lines[start-1:end]):
   1592                 linestr = str(i+start)
   1593                 # Highlight current line:
   1594                 if i + start == lineno:
   1595                     linestr = '>' + linestr
   1596                 sys.stdout.write('%4s    %s' % (linestr, line))
   1597 
   1598 
   1599 # ...and register the command:
   1600 PyList()
   1601 
   1602 def move_in_stack(move_up):
   1603     '''Move up or down the stack (for the py-up/py-down command)'''
   1604     frame = Frame.get_selected_python_frame()
   1605     while frame:
   1606         if move_up:
   1607             iter_frame = frame.older()
   1608         else:
   1609             iter_frame = frame.newer()
   1610 
   1611         if not iter_frame:
   1612             break
   1613 
   1614         if iter_frame.is_evalframeex():
   1615             # Result:
   1616             if iter_frame.select():
   1617                 iter_frame.print_summary()
   1618             return
   1619 
   1620         frame = iter_frame
   1621 
   1622     if move_up:
   1623         print 'Unable to find an older python frame'
   1624     else:
   1625         print 'Unable to find a newer python frame'
   1626 
   1627 class PyUp(gdb.Command):
   1628     'Select and print the python stack frame that called this one (if any)'
   1629     def __init__(self):
   1630         gdb.Command.__init__ (self,
   1631                               "py-up",
   1632                               gdb.COMMAND_STACK,
   1633                               gdb.COMPLETE_NONE)
   1634 
   1635 
   1636     def invoke(self, args, from_tty):
   1637         move_in_stack(move_up=True)
   1638 
   1639 class PyDown(gdb.Command):
   1640     'Select and print the python stack frame called by this one (if any)'
   1641     def __init__(self):
   1642         gdb.Command.__init__ (self,
   1643                               "py-down",
   1644                               gdb.COMMAND_STACK,
   1645                               gdb.COMPLETE_NONE)
   1646 
   1647 
   1648     def invoke(self, args, from_tty):
   1649         move_in_stack(move_up=False)
   1650 
   1651 # Not all builds of gdb have gdb.Frame.select
   1652 if hasattr(gdb.Frame, 'select'):
   1653     PyUp()
   1654     PyDown()
   1655 
   1656 class PyBacktrace(gdb.Command):
   1657     'Display the current python frame and all the frames within its call stack (if any)'
   1658     def __init__(self):
   1659         gdb.Command.__init__ (self,
   1660                               "py-bt",
   1661                               gdb.COMMAND_STACK,
   1662                               gdb.COMPLETE_NONE)
   1663 
   1664 
   1665     def invoke(self, args, from_tty):
   1666         frame = Frame.get_selected_python_frame()
   1667         while frame:
   1668             if frame.is_evalframeex():
   1669                 frame.print_summary()
   1670             frame = frame.older()
   1671 
   1672 PyBacktrace()
   1673 
   1674 class PyPrint(gdb.Command):
   1675     'Look up the given python variable name, and print it'
   1676     def __init__(self):
   1677         gdb.Command.__init__ (self,
   1678                               "py-print",
   1679                               gdb.COMMAND_DATA,
   1680                               gdb.COMPLETE_NONE)
   1681 
   1682 
   1683     def invoke(self, args, from_tty):
   1684         name = str(args)
   1685 
   1686         frame = Frame.get_selected_python_frame()
   1687         if not frame:
   1688             print 'Unable to locate python frame'
   1689             return
   1690 
   1691         pyop_frame = frame.get_pyop()
   1692         if not pyop_frame:
   1693             print 'Unable to read information on python frame'
   1694             return
   1695 
   1696         pyop_var, scope = pyop_frame.get_var_by_name(name)
   1697 
   1698         if pyop_var:
   1699             print ('%s %r = %s'
   1700                    % (scope,
   1701                       name,
   1702                       pyop_var.get_truncated_repr(MAX_OUTPUT_LEN)))
   1703         else:
   1704             print '%r not found' % name
   1705 
   1706 PyPrint()
   1707 
   1708 class PyLocals(gdb.Command):
   1709     'Look up the given python variable name, and print it'
   1710 
   1711     def invoke(self, args, from_tty):
   1712         name = str(args)
   1713 
   1714         frame = Frame.get_selected_python_frame()
   1715         if not frame:
   1716             print 'Unable to locate python frame'
   1717             return
   1718 
   1719         pyop_frame = frame.get_pyop()
   1720         if not pyop_frame:
   1721             print 'Unable to read information on python frame'
   1722             return
   1723 
   1724         namespace = self.get_namespace(pyop_frame)
   1725         namespace = [(name.proxyval(set()), val) for name, val in namespace]
   1726 
   1727         if namespace:
   1728             name, val = max(namespace, key=lambda (name, val): len(name))
   1729             max_name_length = len(name)
   1730 
   1731             for name, pyop_value in namespace:
   1732                 value = pyop_value.get_truncated_repr(MAX_OUTPUT_LEN)
   1733                 print ('%-*s = %s' % (max_name_length, name, value))
   1734 
   1735     def get_namespace(self, pyop_frame):
   1736         return pyop_frame.iter_locals()
   1737 
   1738 
   1739 class PyGlobals(PyLocals):
   1740     'List all the globals in the currently select Python frame'
   1741 
   1742     def get_namespace(self, pyop_frame):
   1743         return pyop_frame.iter_globals()
   1744 
   1745 
   1746 PyLocals("py-locals", gdb.COMMAND_DATA, gdb.COMPLETE_NONE)
   1747 PyGlobals("py-globals", gdb.COMMAND_DATA, gdb.COMPLETE_NONE)
   1748 
   1749 
   1750 class PyNameEquals(gdb.Function):
   1751 
   1752     def _get_pycurframe_attr(self, attr):
   1753         frame = Frame(gdb.selected_frame())
   1754         if frame.is_evalframeex():
   1755             pyframe = frame.get_pyop()
   1756             if pyframe is None:
   1757                 warnings.warn("Use a Python debug build, Python breakpoints "
   1758                               "won't work otherwise.")
   1759                 return None
   1760 
   1761             return getattr(pyframe, attr).proxyval(set())
   1762 
   1763         return None
   1764 
   1765     def invoke(self, funcname):
   1766         attr = self._get_pycurframe_attr('co_name')
   1767         return attr is not None and attr == funcname.string()
   1768 
   1769 PyNameEquals("pyname_equals")
   1770 
   1771 
   1772 class PyModEquals(PyNameEquals):
   1773 
   1774     def invoke(self, modname):
   1775         attr = self._get_pycurframe_attr('co_filename')
   1776         if attr is not None:
   1777             filename, ext = os.path.splitext(os.path.basename(attr))
   1778             return filename == modname.string()
   1779         return False
   1780 
   1781 PyModEquals("pymod_equals")
   1782 
   1783 
   1784 class PyBreak(gdb.Command):
   1785     """
   1786     Set a Python breakpoint. Examples:
   1787 
   1788     Break on any function or method named 'func' in module 'modname'
   1789 
   1790         py-break modname.func
   1791 
   1792     Break on any function or method named 'func'
   1793 
   1794         py-break func
   1795     """
   1796 
   1797     def invoke(self, funcname, from_tty):
   1798         if '.' in funcname:
   1799             modname, dot, funcname = funcname.rpartition('.')
   1800             cond = '$pyname_equals("%s") && $pymod_equals("%s")' % (funcname,
   1801                                                                     modname)
   1802         else:
   1803             cond = '$pyname_equals("%s")' % funcname
   1804 
   1805         gdb.execute('break PyEval_EvalFrameEx if ' + cond)
   1806 
   1807 PyBreak("py-break", gdb.COMMAND_RUNNING, gdb.COMPLETE_NONE)
   1808 
   1809 
   1810 class _LoggingState(object):
   1811     """
   1812     State that helps to provide a reentrant gdb.execute() function.
   1813     """
   1814 
   1815     def __init__(self):
   1816         self.fd, self.filename = tempfile.mkstemp()
   1817         self.file = os.fdopen(self.fd, 'r+')
   1818         _execute("set logging file %s" % self.filename)
   1819         self.file_position_stack = []
   1820 
   1821         atexit.register(os.close, self.fd)
   1822         atexit.register(os.remove, self.filename)
   1823 
   1824     def __enter__(self):
   1825         if not self.file_position_stack:
   1826             _execute("set logging redirect on")
   1827             _execute("set logging on")
   1828             _execute("set pagination off")
   1829 
   1830         self.file_position_stack.append(os.fstat(self.fd).st_size)
   1831         return self
   1832 
   1833     def getoutput(self):
   1834         gdb.flush()
   1835         self.file.seek(self.file_position_stack[-1])
   1836         result = self.file.read()
   1837         return result
   1838 
   1839     def __exit__(self, exc_type, exc_val, tb):
   1840         startpos = self.file_position_stack.pop()
   1841         self.file.seek(startpos)
   1842         self.file.truncate()
   1843         if not self.file_position_stack:
   1844             _execute("set logging off")
   1845             _execute("set logging redirect off")
   1846             _execute("set pagination on")
   1847 
   1848 
   1849 def execute(command, from_tty=False, to_string=False):
   1850     """
   1851     Replace gdb.execute() with this function and have it accept a 'to_string'
   1852     argument (new in 7.2). Have it properly capture stderr also. Ensure
   1853     reentrancy.
   1854     """
   1855     if to_string:
   1856         with _logging_state as state:
   1857             _execute(command, from_tty)
   1858             return state.getoutput()
   1859     else:
   1860         _execute(command, from_tty)
   1861 
   1862 
   1863 _execute = gdb.execute
   1864 gdb.execute = execute
   1865 _logging_state = _LoggingState()
   1866 
   1867 
   1868 def get_selected_inferior():
   1869     """
   1870     Return the selected inferior in gdb.
   1871     """
   1872     # Woooh, another bug in gdb! Is there an end in sight?
   1873     # http://sourceware.org/bugzilla/show_bug.cgi?id=12212
   1874     return gdb.inferiors()[0]
   1875 
   1876     selected_thread = gdb.selected_thread()
   1877 
   1878     for inferior in gdb.inferiors():
   1879         for thread in inferior.threads():
   1880             if thread == selected_thread:
   1881                 return inferior
   1882 
   1883 def source_gdb_script(script_contents, to_string=False):
   1884     """
   1885     Source a gdb script with script_contents passed as a string. This is useful
   1886     to provide defines for py-step and py-next to make them repeatable (this is
   1887     not possible with gdb.execute()). See
   1888     http://sourceware.org/bugzilla/show_bug.cgi?id=12216
   1889     """
   1890     fd, filename = tempfile.mkstemp()
   1891     f = os.fdopen(fd, 'w')
   1892     f.write(script_contents)
   1893     f.close()
   1894     gdb.execute("source %s" % filename, to_string=to_string)
   1895     os.remove(filename)
   1896 
   1897 def register_defines():
   1898     source_gdb_script(textwrap.dedent("""\
   1899         define py-step
   1900         -py-step
   1901         end
   1902 
   1903         define py-next
   1904         -py-next
   1905         end
   1906 
   1907         document py-step
   1908         %s
   1909         end
   1910 
   1911         document py-next
   1912         %s
   1913         end
   1914     """) % (PyStep.__doc__, PyNext.__doc__))
   1915 
   1916 
   1917 def stackdepth(frame):
   1918     "Tells the stackdepth of a gdb frame."
   1919     depth = 0
   1920     while frame:
   1921         frame = frame.older()
   1922         depth += 1
   1923 
   1924     return depth
   1925 
   1926 class ExecutionControlCommandBase(gdb.Command):
   1927     """
   1928     Superclass for language specific execution control. Language specific
   1929     features should be implemented by lang_info using the LanguageInfo
   1930     interface. 'name' is the name of the command.
   1931     """
   1932 
   1933     def __init__(self, name, lang_info):
   1934         super(ExecutionControlCommandBase, self).__init__(
   1935                                 name, gdb.COMMAND_RUNNING, gdb.COMPLETE_NONE)
   1936         self.lang_info = lang_info
   1937 
   1938     def install_breakpoints(self):
   1939         all_locations = itertools.chain(
   1940             self.lang_info.static_break_functions(),
   1941             self.lang_info.runtime_break_functions())
   1942 
   1943         for location in all_locations:
   1944             result = gdb.execute('break %s' % location, to_string=True)
   1945             yield re.search(r'Breakpoint (\d+)', result).group(1)
   1946 
   1947     def delete_breakpoints(self, breakpoint_list):
   1948         for bp in breakpoint_list:
   1949             gdb.execute("delete %s" % bp)
   1950 
   1951     def filter_output(self, result):
   1952         reflags = re.MULTILINE
   1953 
   1954         output_on_halt = [
   1955             (r'^Program received signal .*', reflags|re.DOTALL),
   1956             (r'.*[Ww]arning.*', 0),
   1957             (r'^Program exited .*', reflags),
   1958         ]
   1959 
   1960         output_always = [
   1961             # output when halting on a watchpoint
   1962             (r'^(Old|New) value = .*', reflags),
   1963             # output from the 'display' command
   1964             (r'^\d+: \w+ = .*', reflags),
   1965         ]
   1966 
   1967         def filter_output(regexes):
   1968             output = []
   1969             for regex, flags in regexes:
   1970                 for match in re.finditer(regex, result, flags):
   1971                     output.append(match.group(0))
   1972 
   1973             return '\n'.join(output)
   1974 
   1975         # Filter the return value output of the 'finish' command
   1976         match_finish = re.search(r'^Value returned is \$\d+ = (.*)', result,
   1977                                  re.MULTILINE)
   1978         if match_finish:
   1979             finish_output = 'Value returned: %s\n' % match_finish.group(1)
   1980         else:
   1981             finish_output = ''
   1982 
   1983         return (filter_output(output_on_halt),
   1984                 finish_output + filter_output(output_always))
   1985 
   1986 
   1987     def stopped(self):
   1988         return get_selected_inferior().pid == 0
   1989 
   1990     def finish_executing(self, result):
   1991         """
   1992         After doing some kind of code running in the inferior, print the line
   1993         of source code or the result of the last executed gdb command (passed
   1994         in as the `result` argument).
   1995         """
   1996         output_on_halt, output_always = self.filter_output(result)
   1997 
   1998         if self.stopped():
   1999             print output_always
   2000             print output_on_halt
   2001         else:
   2002             frame = gdb.selected_frame()
   2003             source_line = self.lang_info.get_source_line(frame)
   2004             if self.lang_info.is_relevant_function(frame):
   2005                 raised_exception = self.lang_info.exc_info(frame)
   2006                 if raised_exception:
   2007                     print raised_exception
   2008 
   2009             if source_line:
   2010                 if output_always.rstrip():
   2011                     print output_always.rstrip()
   2012                 print source_line
   2013             else:
   2014                 print result
   2015 
   2016     def _finish(self):
   2017         """
   2018         Execute until the function returns (or until something else makes it
   2019         stop)
   2020         """
   2021         if gdb.selected_frame().older() is not None:
   2022             return gdb.execute('finish', to_string=True)
   2023         else:
   2024             # outermost frame, continue
   2025             return gdb.execute('cont', to_string=True)
   2026 
   2027     def _finish_frame(self):
   2028         """
   2029         Execute until the function returns to a relevant caller.
   2030         """
   2031         while True:
   2032             result = self._finish()
   2033 
   2034             try:
   2035                 frame = gdb.selected_frame()
   2036             except RuntimeError:
   2037                 break
   2038 
   2039             hitbp = re.search(r'Breakpoint (\d+)', result)
   2040             is_relevant = self.lang_info.is_relevant_function(frame)
   2041             if hitbp or is_relevant or self.stopped():
   2042                 break
   2043 
   2044         return result
   2045 
   2046     def finish(self, *args):
   2047         "Implements the finish command."
   2048         result = self._finish_frame()
   2049         self.finish_executing(result)
   2050 
   2051     def step(self, stepinto, stepover_command='next'):
   2052         """
   2053         Do a single step or step-over. Returns the result of the last gdb
   2054         command that made execution stop.
   2055 
   2056         This implementation, for stepping, sets (conditional) breakpoints for
   2057         all functions that are deemed relevant. It then does a step over until
   2058         either something halts execution, or until the next line is reached.
   2059 
   2060         If, however, stepover_command is given, it should be a string gdb
   2061         command that continues execution in some way. The idea is that the
   2062         caller has set a (conditional) breakpoint or watchpoint that can work
   2063         more efficiently than the step-over loop. For Python this means setting
   2064         a watchpoint for f->f_lasti, which means we can then subsequently
   2065         "finish" frames.
   2066         We want f->f_lasti instead of f->f_lineno, because the latter only
   2067         works properly with local trace functions, see
   2068         PyFrameObjectPtr.current_line_num and PyFrameObjectPtr.addr2line.
   2069         """
   2070         if stepinto:
   2071             breakpoint_list = list(self.install_breakpoints())
   2072 
   2073         beginframe = gdb.selected_frame()
   2074 
   2075         if self.lang_info.is_relevant_function(beginframe):
   2076             # If we start in a relevant frame, initialize stuff properly. If
   2077             # we don't start in a relevant frame, the loop will halt
   2078             # immediately. So don't call self.lang_info.lineno() as it may
   2079             # raise for irrelevant frames.
   2080             beginline = self.lang_info.lineno(beginframe)
   2081 
   2082             if not stepinto:
   2083                 depth = stackdepth(beginframe)
   2084 
   2085         newframe = beginframe
   2086 
   2087         while True:
   2088             if self.lang_info.is_relevant_function(newframe):
   2089                 result = gdb.execute(stepover_command, to_string=True)
   2090             else:
   2091                 result = self._finish_frame()
   2092 
   2093             if self.stopped():
   2094                 break
   2095 
   2096             newframe = gdb.selected_frame()
   2097             is_relevant_function = self.lang_info.is_relevant_function(newframe)
   2098             try:
   2099                 framename = newframe.name()
   2100             except RuntimeError:
   2101                 framename = None
   2102 
   2103             m = re.search(r'Breakpoint (\d+)', result)
   2104             if m:
   2105                 if is_relevant_function and m.group(1) in breakpoint_list:
   2106                     # although we hit a breakpoint, we still need to check
   2107                     # that the function, in case hit by a runtime breakpoint,
   2108                     # is in the right context
   2109                     break
   2110 
   2111             if newframe != beginframe:
   2112                 # new function
   2113 
   2114                 if not stepinto:
   2115                     # see if we returned to the caller
   2116                     newdepth = stackdepth(newframe)
   2117                     is_relevant_function = (newdepth < depth and
   2118                                             is_relevant_function)
   2119 
   2120                 if is_relevant_function:
   2121                     break
   2122             else:
   2123                 # newframe equals beginframe, check for a difference in the
   2124                 # line number
   2125                 lineno = self.lang_info.lineno(newframe)
   2126                 if lineno and lineno != beginline:
   2127                     break
   2128 
   2129         if stepinto:
   2130             self.delete_breakpoints(breakpoint_list)
   2131 
   2132         self.finish_executing(result)
   2133 
   2134     def run(self, args, from_tty):
   2135         self.finish_executing(gdb.execute('run ' + args, to_string=True))
   2136 
   2137     def cont(self, *args):
   2138         self.finish_executing(gdb.execute('cont', to_string=True))
   2139 
   2140 
   2141 class LanguageInfo(object):
   2142     """
   2143     This class defines the interface that ExecutionControlCommandBase needs to
   2144     provide language-specific execution control.
   2145 
   2146     Classes that implement this interface should implement:
   2147 
   2148         lineno(frame)
   2149             Tells the current line number (only called for a relevant frame).
   2150             If lineno is a false value it is not checked for a difference.
   2151 
   2152         is_relevant_function(frame)
   2153             tells whether we care about frame 'frame'
   2154 
   2155         get_source_line(frame)
   2156             get the line of source code for the current line (only called for a
   2157             relevant frame). If the source code cannot be retrieved this
   2158             function should return None
   2159 
   2160         exc_info(frame) -- optional
   2161             tells whether an exception was raised, if so, it should return a
   2162             string representation of the exception value, None otherwise.
   2163 
   2164         static_break_functions()
   2165             returns an iterable of function names that are considered relevant
   2166             and should halt step-into execution. This is needed to provide a
   2167             performing step-into
   2168 
   2169         runtime_break_functions() -- optional
   2170             list of functions that we should break into depending on the
   2171             context
   2172     """
   2173 
   2174     def exc_info(self, frame):
   2175         "See this class' docstring."
   2176 
   2177     def runtime_break_functions(self):
   2178         """
   2179         Implement this if the list of step-into functions depends on the
   2180         context.
   2181         """
   2182         return ()
   2183 
   2184 class PythonInfo(LanguageInfo):
   2185 
   2186     def pyframe(self, frame):
   2187         pyframe = Frame(frame).get_pyop()
   2188         if pyframe:
   2189             return pyframe
   2190         else:
   2191             raise gdb.RuntimeError(
   2192                 "Unable to find the Python frame, run your code with a debug "
   2193                 "build (configure with --with-pydebug or compile with -g).")
   2194 
   2195     def lineno(self, frame):
   2196         return self.pyframe(frame).current_line_num()
   2197 
   2198     def is_relevant_function(self, frame):
   2199         return Frame(frame).is_evalframeex()
   2200 
   2201     def get_source_line(self, frame):
   2202         try:
   2203             pyframe = self.pyframe(frame)
   2204             return '%4d    %s' % (pyframe.current_line_num(),
   2205                                   pyframe.current_line().rstrip())
   2206         except IOError, e:
   2207             return None
   2208 
   2209     def exc_info(self, frame):
   2210         try:
   2211             tstate = frame.read_var('tstate').dereference()
   2212             if gdb.parse_and_eval('tstate->frame == f'):
   2213                 # tstate local variable initialized, check for an exception
   2214                 inf_type = tstate['curexc_type']
   2215                 inf_value = tstate['curexc_value']
   2216 
   2217                 if inf_type:
   2218                     return 'An exception was raised: %s' % (inf_value,)
   2219         except (ValueError, RuntimeError), e:
   2220             # Could not read the variable tstate or it's memory, it's ok
   2221             pass
   2222 
   2223     def static_break_functions(self):
   2224         yield 'PyEval_EvalFrameEx'
   2225 
   2226 
   2227 class PythonStepperMixin(object):
   2228     """
   2229     Make this a mixin so CyStep can also inherit from this and use a
   2230     CythonCodeStepper at the same time.
   2231     """
   2232 
   2233     def python_step(self, stepinto):
   2234         """
   2235         Set a watchpoint on the Python bytecode instruction pointer and try
   2236         to finish the frame
   2237         """
   2238         output = gdb.execute('watch f->f_lasti', to_string=True)
   2239         watchpoint = int(re.search(r'[Ww]atchpoint (\d+):', output).group(1))
   2240         self.step(stepinto=stepinto, stepover_command='finish')
   2241         gdb.execute('delete %s' % watchpoint)
   2242 
   2243 
   2244 class PyStep(ExecutionControlCommandBase, PythonStepperMixin):
   2245     "Step through Python code."
   2246 
   2247     stepinto = True
   2248 
   2249     def invoke(self, args, from_tty):
   2250         self.python_step(stepinto=self.stepinto)
   2251 
   2252 class PyNext(PyStep):
   2253     "Step-over Python code."
   2254 
   2255     stepinto = False
   2256 
   2257 class PyFinish(ExecutionControlCommandBase):
   2258     "Execute until function returns to a caller."
   2259 
   2260     invoke = ExecutionControlCommandBase.finish
   2261 
   2262 class PyRun(ExecutionControlCommandBase):
   2263     "Run the program."
   2264 
   2265     invoke = ExecutionControlCommandBase.run
   2266 
   2267 class PyCont(ExecutionControlCommandBase):
   2268 
   2269     invoke = ExecutionControlCommandBase.cont
   2270 
   2271 
   2272 def _pointervalue(gdbval):
   2273     """
   2274     Return the value of the pionter as a Python int.
   2275 
   2276     gdbval.type must be a pointer type
   2277     """
   2278     # don't convert with int() as it will raise a RuntimeError
   2279     if gdbval.address is not None:
   2280         return long(gdbval.address)
   2281     else:
   2282         # the address attribute is None sometimes, in which case we can
   2283         # still convert the pointer to an int
   2284         return long(gdbval)
   2285 
   2286 def pointervalue(gdbval):
   2287     pointer = _pointervalue(gdbval)
   2288     try:
   2289         if pointer < 0:
   2290             raise gdb.GdbError("Negative pointer value, presumably a bug "
   2291                                "in gdb, aborting.")
   2292     except RuntimeError:
   2293         # work around yet another bug in gdb where you get random behaviour
   2294         # and tracebacks
   2295         pass
   2296 
   2297     return pointer
   2298 
   2299 def get_inferior_unicode_postfix():
   2300     try:
   2301         gdb.parse_and_eval('PyUnicode_FromEncodedObject')
   2302     except RuntimeError:
   2303         try:
   2304             gdb.parse_and_eval('PyUnicodeUCS2_FromEncodedObject')
   2305         except RuntimeError:
   2306             return 'UCS4'
   2307         else:
   2308             return 'UCS2'
   2309     else:
   2310         return ''
   2311 
   2312 class PythonCodeExecutor(object):
   2313 
   2314     Py_single_input = 256
   2315     Py_file_input = 257
   2316     Py_eval_input = 258
   2317 
   2318     def malloc(self, size):
   2319         chunk = (gdb.parse_and_eval("(void *) malloc((size_t) %d)" % size))
   2320 
   2321         pointer = pointervalue(chunk)
   2322         if pointer == 0:
   2323             raise gdb.GdbError("No memory could be allocated in the inferior.")
   2324 
   2325         return pointer
   2326 
   2327     def alloc_string(self, string):
   2328         pointer = self.malloc(len(string))
   2329         get_selected_inferior().write_memory(pointer, string)
   2330 
   2331         return pointer
   2332 
   2333     def alloc_pystring(self, string):
   2334         stringp = self.alloc_string(string)
   2335         PyString_FromStringAndSize = 'PyString_FromStringAndSize'
   2336 
   2337         try:
   2338             gdb.parse_and_eval(PyString_FromStringAndSize)
   2339         except RuntimeError:
   2340             # Python 3
   2341             PyString_FromStringAndSize = ('PyUnicode%s_FromStringAndSize' %
   2342                                                (get_inferior_unicode_postfix(),))
   2343 
   2344         try:
   2345             result = gdb.parse_and_eval(
   2346                 '(PyObject *) %s((char *) %d, (size_t) %d)' % (
   2347                             PyString_FromStringAndSize, stringp, len(string)))
   2348         finally:
   2349             self.free(stringp)
   2350 
   2351         pointer = pointervalue(result)
   2352         if pointer == 0:
   2353             raise gdb.GdbError("Unable to allocate Python string in "
   2354                                "the inferior.")
   2355 
   2356         return pointer
   2357 
   2358     def free(self, pointer):
   2359         gdb.parse_and_eval("free((void *) %d)" % pointer)
   2360 
   2361     def incref(self, pointer):
   2362         "Increment the reference count of a Python object in the inferior."
   2363         gdb.parse_and_eval('Py_IncRef((PyObject *) %d)' % pointer)
   2364 
   2365     def xdecref(self, pointer):
   2366         "Decrement the reference count of a Python object in the inferior."
   2367         # Py_DecRef is like Py_XDECREF, but a function. So we don't have
   2368         # to check for NULL. This should also decref all our allocated
   2369         # Python strings.
   2370         gdb.parse_and_eval('Py_DecRef((PyObject *) %d)' % pointer)
   2371 
   2372     def evalcode(self, code, input_type, global_dict=None, local_dict=None):
   2373         """
   2374         Evaluate python code `code` given as a string in the inferior and
   2375         return the result as a gdb.Value. Returns a new reference in the
   2376         inferior.
   2377 
   2378         Of course, executing any code in the inferior may be dangerous and may
   2379         leave the debuggee in an unsafe state or terminate it alltogether.
   2380         """
   2381         if '\0' in code:
   2382             raise gdb.GdbError("String contains NUL byte.")
   2383 
   2384         code += '\0'
   2385 
   2386         pointer = self.alloc_string(code)
   2387 
   2388         globalsp = pointervalue(global_dict)
   2389         localsp = pointervalue(local_dict)
   2390 
   2391         if globalsp == 0 or localsp == 0:
   2392             raise gdb.GdbError("Unable to obtain or create locals or globals.")
   2393 
   2394         code = """
   2395             PyRun_String(
   2396                 (char *) %(code)d,
   2397                 (int) %(start)d,
   2398                 (PyObject *) %(globals)s,
   2399                 (PyObject *) %(locals)d)
   2400         """ % dict(code=pointer, start=input_type,
   2401                    globals=globalsp, locals=localsp)
   2402 
   2403         with FetchAndRestoreError():
   2404             try:
   2405                 pyobject_return_value = gdb.parse_and_eval(code)
   2406             finally:
   2407                 self.free(pointer)
   2408 
   2409         return pyobject_return_value
   2410 
   2411 class FetchAndRestoreError(PythonCodeExecutor):
   2412     """
   2413     Context manager that fetches the error indicator in the inferior and
   2414     restores it on exit.
   2415     """
   2416 
   2417     def __init__(self):
   2418         self.sizeof_PyObjectPtr = gdb.lookup_type('PyObject').pointer().sizeof
   2419         self.pointer = self.malloc(self.sizeof_PyObjectPtr * 3)
   2420 
   2421         type = self.pointer
   2422         value = self.pointer + self.sizeof_PyObjectPtr
   2423         traceback = self.pointer + self.sizeof_PyObjectPtr * 2
   2424 
   2425         self.errstate = type, value, traceback
   2426 
   2427     def __enter__(self):
   2428         gdb.parse_and_eval("PyErr_Fetch(%d, %d, %d)" % self.errstate)
   2429 
   2430     def __exit__(self, *args):
   2431         if gdb.parse_and_eval("(int) PyErr_Occurred()"):
   2432             gdb.parse_and_eval("PyErr_Print()")
   2433 
   2434         pyerr_restore = ("PyErr_Restore("
   2435                             "(PyObject *) *%d,"
   2436                             "(PyObject *) *%d,"
   2437                             "(PyObject *) *%d)")
   2438 
   2439         try:
   2440             gdb.parse_and_eval(pyerr_restore % self.errstate)
   2441         finally:
   2442             self.free(self.pointer)
   2443 
   2444 
   2445 class FixGdbCommand(gdb.Command):
   2446 
   2447     def __init__(self, command, actual_command):
   2448         super(FixGdbCommand, self).__init__(command, gdb.COMMAND_DATA,
   2449                                             gdb.COMPLETE_NONE)
   2450         self.actual_command = actual_command
   2451 
   2452     def fix_gdb(self):
   2453         """
   2454         It seems that invoking either 'cy exec' and 'py-exec' work perfectly 
   2455         fine, but after this gdb's python API is entirely broken. 
   2456         Maybe some uncleared exception value is still set?
   2457         sys.exc_clear() didn't help. A demonstration:
   2458 
   2459         (gdb) cy exec 'hello'
   2460         'hello'
   2461         (gdb) python gdb.execute('cont')
   2462         RuntimeError: Cannot convert value to int.
   2463         Error while executing Python code.
   2464         (gdb) python gdb.execute('cont')
   2465         [15148 refs]
   2466 
   2467         Program exited normally.
   2468         """
   2469         warnings.filterwarnings('ignore', r'.*', RuntimeWarning,
   2470                                 re.escape(__name__))
   2471         try:
   2472             long(gdb.parse_and_eval("(void *) 0")) == 0
   2473         except RuntimeError:
   2474             pass
   2475         # warnings.resetwarnings()
   2476 
   2477     def invoke(self, args, from_tty):
   2478         self.fix_gdb()
   2479         try:
   2480             gdb.execute('%s %s' % (self.actual_command, args))
   2481         except RuntimeError, e:
   2482             raise gdb.GdbError(str(e))
   2483         self.fix_gdb()
   2484 
   2485 
   2486 def _evalcode_python(executor, code, input_type):
   2487     """
   2488     Execute Python code in the most recent stack frame.
   2489     """
   2490     global_dict = gdb.parse_and_eval('PyEval_GetGlobals()')
   2491     local_dict = gdb.parse_and_eval('PyEval_GetLocals()')
   2492 
   2493     if (pointervalue(global_dict) == 0 or pointervalue(local_dict) == 0):
   2494         raise gdb.GdbError("Unable to find the locals or globals of the "
   2495                            "most recent Python function (relative to the "
   2496                            "selected frame).")
   2497 
   2498     return executor.evalcode(code, input_type, global_dict, local_dict)
   2499 
   2500 class PyExec(gdb.Command):
   2501 
   2502     def readcode(self, expr):
   2503         if expr:
   2504             return expr, PythonCodeExecutor.Py_single_input
   2505         else:
   2506             lines = []
   2507             while True:
   2508                 try:
   2509                     line = raw_input('>')
   2510                 except EOFError:
   2511                     break
   2512                 else:
   2513                     if line.rstrip() == 'end':
   2514                         break
   2515 
   2516                     lines.append(line)
   2517 
   2518             return '\n'.join(lines), PythonCodeExecutor.Py_file_input
   2519 
   2520     def invoke(self, expr, from_tty):
   2521         expr, input_type = self.readcode(expr)
   2522         executor = PythonCodeExecutor()
   2523         executor.xdecref(_evalcode_python(executor, input_type, global_dict,
   2524                                           local_dict))
   2525 
   2526 
   2527 gdb.execute('set breakpoint pending on')
   2528 
   2529 if hasattr(gdb, 'GdbError'):
   2530      # Wrap py-step and py-next in gdb defines to make them repeatable.
   2531     py_step = PyStep('-py-step', PythonInfo())
   2532     py_next = PyNext('-py-next', PythonInfo())
   2533     register_defines()
   2534     py_finish = PyFinish('py-finish', PythonInfo())
   2535     py_run = PyRun('py-run', PythonInfo())
   2536     py_cont = PyCont('py-cont', PythonInfo())
   2537 
   2538     py_exec = FixGdbCommand('py-exec', '-py-exec')
   2539     _py_exec = PyExec("-py-exec", gdb.COMMAND_DATA, gdb.COMPLETE_NONE)
   2540 else:
   2541     warnings.warn("Use gdb 7.2 or higher to use the py-exec command.")
   2542