Home | History | Annotate | Download | only in Lib
      1 """Disassembler of Python byte code into mnemonics."""
      2 
      3 import sys
      4 import types
      5 import collections
      6 import io
      7 
      8 from opcode import *
      9 from opcode import __all__ as _opcodes_all
     10 
     11 __all__ = ["code_info", "dis", "disassemble", "distb", "disco",
     12            "findlinestarts", "findlabels", "show_code",
     13            "get_instructions", "Instruction", "Bytecode"] + _opcodes_all
     14 del _opcodes_all
     15 
     16 _have_code = (types.MethodType, types.FunctionType, types.CodeType,
     17               classmethod, staticmethod, type)
     18 
     19 FORMAT_VALUE = opmap['FORMAT_VALUE']
     20 
     21 def _try_compile(source, name):
     22     """Attempts to compile the given source, first as an expression and
     23        then as a statement if the first approach fails.
     24 
     25        Utility function to accept strings in functions that otherwise
     26        expect code objects
     27     """
     28     try:
     29         c = compile(source, name, 'eval')
     30     except SyntaxError:
     31         c = compile(source, name, 'exec')
     32     return c
     33 
     34 def dis(x=None, *, file=None, depth=None):
     35     """Disassemble classes, methods, functions, and other compiled objects.
     36 
     37     With no argument, disassemble the last traceback.
     38 
     39     Compiled objects currently include generator objects, async generator
     40     objects, and coroutine objects, all of which store their code object
     41     in a special attribute.
     42     """
     43     if x is None:
     44         distb(file=file)
     45         return
     46     # Extract functions from methods.
     47     if hasattr(x, '__func__'):
     48         x = x.__func__
     49     # Extract compiled code objects from...
     50     if hasattr(x, '__code__'):  # ...a function, or
     51         x = x.__code__
     52     elif hasattr(x, 'gi_code'):  #...a generator object, or
     53         x = x.gi_code
     54     elif hasattr(x, 'ag_code'):  #...an asynchronous generator object, or
     55         x = x.ag_code
     56     elif hasattr(x, 'cr_code'):  #...a coroutine.
     57         x = x.cr_code
     58     # Perform the disassembly.
     59     if hasattr(x, '__dict__'):  # Class or module
     60         items = sorted(x.__dict__.items())
     61         for name, x1 in items:
     62             if isinstance(x1, _have_code):
     63                 print("Disassembly of %s:" % name, file=file)
     64                 try:
     65                     dis(x1, file=file, depth=depth)
     66                 except TypeError as msg:
     67                     print("Sorry:", msg, file=file)
     68                 print(file=file)
     69     elif hasattr(x, 'co_code'): # Code object
     70         _disassemble_recursive(x, file=file, depth=depth)
     71     elif isinstance(x, (bytes, bytearray)): # Raw bytecode
     72         _disassemble_bytes(x, file=file)
     73     elif isinstance(x, str):    # Source code
     74         _disassemble_str(x, file=file, depth=depth)
     75     else:
     76         raise TypeError("don't know how to disassemble %s objects" %
     77                         type(x).__name__)
     78 
     79 def distb(tb=None, *, file=None):
     80     """Disassemble a traceback (default: last traceback)."""
     81     if tb is None:
     82         try:
     83             tb = sys.last_traceback
     84         except AttributeError:
     85             raise RuntimeError("no last traceback to disassemble") from None
     86         while tb.tb_next: tb = tb.tb_next
     87     disassemble(tb.tb_frame.f_code, tb.tb_lasti, file=file)
     88 
     89 # The inspect module interrogates this dictionary to build its
     90 # list of CO_* constants. It is also used by pretty_flags to
     91 # turn the co_flags field into a human readable list.
     92 COMPILER_FLAG_NAMES = {
     93      1: "OPTIMIZED",
     94      2: "NEWLOCALS",
     95      4: "VARARGS",
     96      8: "VARKEYWORDS",
     97     16: "NESTED",
     98     32: "GENERATOR",
     99     64: "NOFREE",
    100    128: "COROUTINE",
    101    256: "ITERABLE_COROUTINE",
    102    512: "ASYNC_GENERATOR",
    103 }
    104 
    105 def pretty_flags(flags):
    106     """Return pretty representation of code flags."""
    107     names = []
    108     for i in range(32):
    109         flag = 1<<i
    110         if flags & flag:
    111             names.append(COMPILER_FLAG_NAMES.get(flag, hex(flag)))
    112             flags ^= flag
    113             if not flags:
    114                 break
    115     else:
    116         names.append(hex(flags))
    117     return ", ".join(names)
    118 
    119 def _get_code_object(x):
    120     """Helper to handle methods, compiled or raw code objects, and strings."""
    121     # Extract functions from methods.
    122     if hasattr(x, '__func__'):
    123         x = x.__func__
    124     # Extract compiled code objects from...
    125     if hasattr(x, '__code__'):  # ...a function, or
    126         x = x.__code__
    127     elif hasattr(x, 'gi_code'):  #...a generator object, or
    128         x = x.gi_code
    129     elif hasattr(x, 'ag_code'):  #...an asynchronous generator object, or
    130         x = x.ag_code
    131     elif hasattr(x, 'cr_code'):  #...a coroutine.
    132         x = x.cr_code
    133     # Handle source code.
    134     if isinstance(x, str):
    135         x = _try_compile(x, "<disassembly>")
    136     # By now, if we don't have a code object, we can't disassemble x.
    137     if hasattr(x, 'co_code'):
    138         return x
    139     raise TypeError("don't know how to disassemble %s objects" %
    140                     type(x).__name__)
    141 
    142 def code_info(x):
    143     """Formatted details of methods, functions, or code."""
    144     return _format_code_info(_get_code_object(x))
    145 
    146 def _format_code_info(co):
    147     lines = []
    148     lines.append("Name:              %s" % co.co_name)
    149     lines.append("Filename:          %s" % co.co_filename)
    150     lines.append("Argument count:    %s" % co.co_argcount)
    151     lines.append("Kw-only arguments: %s" % co.co_kwonlyargcount)
    152     lines.append("Number of locals:  %s" % co.co_nlocals)
    153     lines.append("Stack size:        %s" % co.co_stacksize)
    154     lines.append("Flags:             %s" % pretty_flags(co.co_flags))
    155     if co.co_consts:
    156         lines.append("Constants:")
    157         for i_c in enumerate(co.co_consts):
    158             lines.append("%4d: %r" % i_c)
    159     if co.co_names:
    160         lines.append("Names:")
    161         for i_n in enumerate(co.co_names):
    162             lines.append("%4d: %s" % i_n)
    163     if co.co_varnames:
    164         lines.append("Variable names:")
    165         for i_n in enumerate(co.co_varnames):
    166             lines.append("%4d: %s" % i_n)
    167     if co.co_freevars:
    168         lines.append("Free variables:")
    169         for i_n in enumerate(co.co_freevars):
    170             lines.append("%4d: %s" % i_n)
    171     if co.co_cellvars:
    172         lines.append("Cell variables:")
    173         for i_n in enumerate(co.co_cellvars):
    174             lines.append("%4d: %s" % i_n)
    175     return "\n".join(lines)
    176 
    177 def show_code(co, *, file=None):
    178     """Print details of methods, functions, or code to *file*.
    179 
    180     If *file* is not provided, the output is printed on stdout.
    181     """
    182     print(code_info(co), file=file)
    183 
    184 _Instruction = collections.namedtuple("_Instruction",
    185      "opname opcode arg argval argrepr offset starts_line is_jump_target")
    186 
    187 _Instruction.opname.__doc__ = "Human readable name for operation"
    188 _Instruction.opcode.__doc__ = "Numeric code for operation"
    189 _Instruction.arg.__doc__ = "Numeric argument to operation (if any), otherwise None"
    190 _Instruction.argval.__doc__ = "Resolved arg value (if known), otherwise same as arg"
    191 _Instruction.argrepr.__doc__ = "Human readable description of operation argument"
    192 _Instruction.offset.__doc__ = "Start index of operation within bytecode sequence"
    193 _Instruction.starts_line.__doc__ = "Line started by this opcode (if any), otherwise None"
    194 _Instruction.is_jump_target.__doc__ = "True if other code jumps to here, otherwise False"
    195 
    196 _OPNAME_WIDTH = 20
    197 _OPARG_WIDTH = 5
    198 
    199 class Instruction(_Instruction):
    200     """Details for a bytecode operation
    201 
    202        Defined fields:
    203          opname - human readable name for operation
    204          opcode - numeric code for operation
    205          arg - numeric argument to operation (if any), otherwise None
    206          argval - resolved arg value (if known), otherwise same as arg
    207          argrepr - human readable description of operation argument
    208          offset - start index of operation within bytecode sequence
    209          starts_line - line started by this opcode (if any), otherwise None
    210          is_jump_target - True if other code jumps to here, otherwise False
    211     """
    212 
    213     def _disassemble(self, lineno_width=3, mark_as_current=False, offset_width=4):
    214         """Format instruction details for inclusion in disassembly output
    215 
    216         *lineno_width* sets the width of the line number field (0 omits it)
    217         *mark_as_current* inserts a '-->' marker arrow as part of the line
    218         *offset_width* sets the width of the instruction offset field
    219         """
    220         fields = []
    221         # Column: Source code line number
    222         if lineno_width:
    223             if self.starts_line is not None:
    224                 lineno_fmt = "%%%dd" % lineno_width
    225                 fields.append(lineno_fmt % self.starts_line)
    226             else:
    227                 fields.append(' ' * lineno_width)
    228         # Column: Current instruction indicator
    229         if mark_as_current:
    230             fields.append('-->')
    231         else:
    232             fields.append('   ')
    233         # Column: Jump target marker
    234         if self.is_jump_target:
    235             fields.append('>>')
    236         else:
    237             fields.append('  ')
    238         # Column: Instruction offset from start of code sequence
    239         fields.append(repr(self.offset).rjust(offset_width))
    240         # Column: Opcode name
    241         fields.append(self.opname.ljust(_OPNAME_WIDTH))
    242         # Column: Opcode argument
    243         if self.arg is not None:
    244             fields.append(repr(self.arg).rjust(_OPARG_WIDTH))
    245             # Column: Opcode argument details
    246             if self.argrepr:
    247                 fields.append('(' + self.argrepr + ')')
    248         return ' '.join(fields).rstrip()
    249 
    250 
    251 def get_instructions(x, *, first_line=None):
    252     """Iterator for the opcodes in methods, functions or code
    253 
    254     Generates a series of Instruction named tuples giving the details of
    255     each operations in the supplied code.
    256 
    257     If *first_line* is not None, it indicates the line number that should
    258     be reported for the first source line in the disassembled code.
    259     Otherwise, the source line information (if any) is taken directly from
    260     the disassembled code object.
    261     """
    262     co = _get_code_object(x)
    263     cell_names = co.co_cellvars + co.co_freevars
    264     linestarts = dict(findlinestarts(co))
    265     if first_line is not None:
    266         line_offset = first_line - co.co_firstlineno
    267     else:
    268         line_offset = 0
    269     return _get_instructions_bytes(co.co_code, co.co_varnames, co.co_names,
    270                                    co.co_consts, cell_names, linestarts,
    271                                    line_offset)
    272 
    273 def _get_const_info(const_index, const_list):
    274     """Helper to get optional details about const references
    275 
    276        Returns the dereferenced constant and its repr if the constant
    277        list is defined.
    278        Otherwise returns the constant index and its repr().
    279     """
    280     argval = const_index
    281     if const_list is not None:
    282         argval = const_list[const_index]
    283     return argval, repr(argval)
    284 
    285 def _get_name_info(name_index, name_list):
    286     """Helper to get optional details about named references
    287 
    288        Returns the dereferenced name as both value and repr if the name
    289        list is defined.
    290        Otherwise returns the name index and its repr().
    291     """
    292     argval = name_index
    293     if name_list is not None:
    294         argval = name_list[name_index]
    295         argrepr = argval
    296     else:
    297         argrepr = repr(argval)
    298     return argval, argrepr
    299 
    300 
    301 def _get_instructions_bytes(code, varnames=None, names=None, constants=None,
    302                       cells=None, linestarts=None, line_offset=0):
    303     """Iterate over the instructions in a bytecode string.
    304 
    305     Generates a sequence of Instruction namedtuples giving the details of each
    306     opcode.  Additional information about the code's runtime environment
    307     (e.g. variable names, constants) can be specified using optional
    308     arguments.
    309 
    310     """
    311     labels = findlabels(code)
    312     starts_line = None
    313     for offset, op, arg in _unpack_opargs(code):
    314         if linestarts is not None:
    315             starts_line = linestarts.get(offset, None)
    316             if starts_line is not None:
    317                 starts_line += line_offset
    318         is_jump_target = offset in labels
    319         argval = None
    320         argrepr = ''
    321         if arg is not None:
    322             #  Set argval to the dereferenced value of the argument when
    323             #  available, and argrepr to the string representation of argval.
    324             #    _disassemble_bytes needs the string repr of the
    325             #    raw name index for LOAD_GLOBAL, LOAD_CONST, etc.
    326             argval = arg
    327             if op in hasconst:
    328                 argval, argrepr = _get_const_info(arg, constants)
    329             elif op in hasname:
    330                 argval, argrepr = _get_name_info(arg, names)
    331             elif op in hasjrel:
    332                 argval = offset + 2 + arg
    333                 argrepr = "to " + repr(argval)
    334             elif op in haslocal:
    335                 argval, argrepr = _get_name_info(arg, varnames)
    336             elif op in hascompare:
    337                 argval = cmp_op[arg]
    338                 argrepr = argval
    339             elif op in hasfree:
    340                 argval, argrepr = _get_name_info(arg, cells)
    341             elif op == FORMAT_VALUE:
    342                 argval = ((None, str, repr, ascii)[arg & 0x3], bool(arg & 0x4))
    343                 argrepr = ('', 'str', 'repr', 'ascii')[arg & 0x3]
    344                 if argval[1]:
    345                     if argrepr:
    346                         argrepr += ', '
    347                     argrepr += 'with format'
    348         yield Instruction(opname[op], op,
    349                           arg, argval, argrepr,
    350                           offset, starts_line, is_jump_target)
    351 
    352 def disassemble(co, lasti=-1, *, file=None):
    353     """Disassemble a code object."""
    354     cell_names = co.co_cellvars + co.co_freevars
    355     linestarts = dict(findlinestarts(co))
    356     _disassemble_bytes(co.co_code, lasti, co.co_varnames, co.co_names,
    357                        co.co_consts, cell_names, linestarts, file=file)
    358 
    359 def _disassemble_recursive(co, *, file=None, depth=None):
    360     disassemble(co, file=file)
    361     if depth is None or depth > 0:
    362         if depth is not None:
    363             depth = depth - 1
    364         for x in co.co_consts:
    365             if hasattr(x, 'co_code'):
    366                 print(file=file)
    367                 print("Disassembly of %r:" % (x,), file=file)
    368                 _disassemble_recursive(x, file=file, depth=depth)
    369 
    370 def _disassemble_bytes(code, lasti=-1, varnames=None, names=None,
    371                        constants=None, cells=None, linestarts=None,
    372                        *, file=None, line_offset=0):
    373     # Omit the line number column entirely if we have no line number info
    374     show_lineno = linestarts is not None
    375     if show_lineno:
    376         maxlineno = max(linestarts.values()) + line_offset
    377         if maxlineno >= 1000:
    378             lineno_width = len(str(maxlineno))
    379         else:
    380             lineno_width = 3
    381     else:
    382         lineno_width = 0
    383     maxoffset = len(code) - 2
    384     if maxoffset >= 10000:
    385         offset_width = len(str(maxoffset))
    386     else:
    387         offset_width = 4
    388     for instr in _get_instructions_bytes(code, varnames, names,
    389                                          constants, cells, linestarts,
    390                                          line_offset=line_offset):
    391         new_source_line = (show_lineno and
    392                            instr.starts_line is not None and
    393                            instr.offset > 0)
    394         if new_source_line:
    395             print(file=file)
    396         is_current_instr = instr.offset == lasti
    397         print(instr._disassemble(lineno_width, is_current_instr, offset_width),
    398               file=file)
    399 
    400 def _disassemble_str(source, **kwargs):
    401     """Compile the source string, then disassemble the code object."""
    402     _disassemble_recursive(_try_compile(source, '<dis>'), **kwargs)
    403 
    404 disco = disassemble                     # XXX For backwards compatibility
    405 
    406 def _unpack_opargs(code):
    407     extended_arg = 0
    408     for i in range(0, len(code), 2):
    409         op = code[i]
    410         if op >= HAVE_ARGUMENT:
    411             arg = code[i+1] | extended_arg
    412             extended_arg = (arg << 8) if op == EXTENDED_ARG else 0
    413         else:
    414             arg = None
    415         yield (i, op, arg)
    416 
    417 def findlabels(code):
    418     """Detect all offsets in a byte code which are jump targets.
    419 
    420     Return the list of offsets.
    421 
    422     """
    423     labels = []
    424     for offset, op, arg in _unpack_opargs(code):
    425         if arg is not None:
    426             if op in hasjrel:
    427                 label = offset + 2 + arg
    428             elif op in hasjabs:
    429                 label = arg
    430             else:
    431                 continue
    432             if label not in labels:
    433                 labels.append(label)
    434     return labels
    435 
    436 def findlinestarts(code):
    437     """Find the offsets in a byte code which are start of lines in the source.
    438 
    439     Generate pairs (offset, lineno) as described in Python/compile.c.
    440 
    441     """
    442     byte_increments = code.co_lnotab[0::2]
    443     line_increments = code.co_lnotab[1::2]
    444 
    445     lastlineno = None
    446     lineno = code.co_firstlineno
    447     addr = 0
    448     for byte_incr, line_incr in zip(byte_increments, line_increments):
    449         if byte_incr:
    450             if lineno != lastlineno:
    451                 yield (addr, lineno)
    452                 lastlineno = lineno
    453             addr += byte_incr
    454         if line_incr >= 0x80:
    455             # line_increments is an array of 8-bit signed integers
    456             line_incr -= 0x100
    457         lineno += line_incr
    458     if lineno != lastlineno:
    459         yield (addr, lineno)
    460 
    461 class Bytecode:
    462     """The bytecode operations of a piece of code
    463 
    464     Instantiate this with a function, method, other compiled object, string of
    465     code, or a code object (as returned by compile()).
    466 
    467     Iterating over this yields the bytecode operations as Instruction instances.
    468     """
    469     def __init__(self, x, *, first_line=None, current_offset=None):
    470         self.codeobj = co = _get_code_object(x)
    471         if first_line is None:
    472             self.first_line = co.co_firstlineno
    473             self._line_offset = 0
    474         else:
    475             self.first_line = first_line
    476             self._line_offset = first_line - co.co_firstlineno
    477         self._cell_names = co.co_cellvars + co.co_freevars
    478         self._linestarts = dict(findlinestarts(co))
    479         self._original_object = x
    480         self.current_offset = current_offset
    481 
    482     def __iter__(self):
    483         co = self.codeobj
    484         return _get_instructions_bytes(co.co_code, co.co_varnames, co.co_names,
    485                                        co.co_consts, self._cell_names,
    486                                        self._linestarts,
    487                                        line_offset=self._line_offset)
    488 
    489     def __repr__(self):
    490         return "{}({!r})".format(self.__class__.__name__,
    491                                  self._original_object)
    492 
    493     @classmethod
    494     def from_traceback(cls, tb):
    495         """ Construct a Bytecode from the given traceback """
    496         while tb.tb_next:
    497             tb = tb.tb_next
    498         return cls(tb.tb_frame.f_code, current_offset=tb.tb_lasti)
    499 
    500     def info(self):
    501         """Return formatted information about the code object."""
    502         return _format_code_info(self.codeobj)
    503 
    504     def dis(self):
    505         """Return a formatted view of the bytecode operations."""
    506         co = self.codeobj
    507         if self.current_offset is not None:
    508             offset = self.current_offset
    509         else:
    510             offset = -1
    511         with io.StringIO() as output:
    512             _disassemble_bytes(co.co_code, varnames=co.co_varnames,
    513                                names=co.co_names, constants=co.co_consts,
    514                                cells=self._cell_names,
    515                                linestarts=self._linestarts,
    516                                line_offset=self._line_offset,
    517                                file=output,
    518                                lasti=offset)
    519             return output.getvalue()
    520 
    521 
    522 def _test():
    523     """Simple test program to disassemble a file."""
    524     import argparse
    525 
    526     parser = argparse.ArgumentParser()
    527     parser.add_argument('infile', type=argparse.FileType(), nargs='?', default='-')
    528     args = parser.parse_args()
    529     with args.infile as infile:
    530         source = infile.read()
    531     code = compile(source, args.infile.name, "exec")
    532     dis(code)
    533 
    534 if __name__ == "__main__":
    535     _test()
    536