Home | History | Annotate | Download | only in llvm
      1 #===- disassembler.py - Python LLVM Bindings -----------------*- python -*--===#
      2 #
      3 #                     The LLVM Compiler Infrastructure
      4 #
      5 # This file is distributed under the University of Illinois Open Source
      6 # License. See LICENSE.TXT for details.
      7 #
      8 #===------------------------------------------------------------------------===#
      9 
     10 from ctypes import CFUNCTYPE
     11 from ctypes import POINTER
     12 from ctypes import addressof
     13 from ctypes import c_byte
     14 from ctypes import c_char_p
     15 from ctypes import c_int
     16 from ctypes import c_size_t
     17 from ctypes import c_ubyte
     18 from ctypes import c_uint64
     19 from ctypes import c_void_p
     20 from ctypes import cast
     21 
     22 from .common import LLVMObject
     23 from .common import c_object_p
     24 from .common import get_library
     25 
     26 __all__ = [
     27     'Disassembler',
     28 ]
     29 
     30 lib = get_library()
     31 callbacks = {}
     32 
     33 # Constants for set_options
     34 Option_UseMarkup = 1
     35 
     36 
     37 
     38 _initialized = False
     39 _targets = ['AArch64', 'ARM', 'Hexagon', 'MSP430', 'Mips', 'NVPTX', 'PowerPC', 'R600', 'Sparc', 'SystemZ', 'X86', 'XCore']
     40 def _ensure_initialized():
     41     global _initialized
     42     if not _initialized:
     43         # Here one would want to call the functions
     44         # LLVMInitializeAll{TargetInfo,TargetMC,Disassembler}s, but
     45         # unfortunately they are only defined as static inline
     46         # functions in the header files of llvm-c, so they don't exist
     47         # as symbols in the shared library.
     48         # So until that is fixed use this hack to initialize them all
     49         for tgt in _targets:
     50             for initializer in ("TargetInfo", "TargetMC", "Disassembler"):
     51                 try:
     52                     f = getattr(lib, "LLVMInitialize" + tgt + initializer)
     53                 except AttributeError:
     54                     continue
     55                 f()
     56         _initialized = True
     57 
     58 
     59 class Disassembler(LLVMObject):
     60     """Represents a disassembler instance.
     61 
     62     Disassembler instances are tied to specific "triple," which must be defined
     63     at creation time.
     64 
     65     Disassembler instances can disassemble instructions from multiple sources.
     66     """
     67     def __init__(self, triple):
     68         """Create a new disassembler instance.
     69 
     70         The triple argument is the triple to create the disassembler for. This
     71         is something like 'i386-apple-darwin9'.
     72         """
     73 
     74         _ensure_initialized()
     75 
     76         ptr = lib.LLVMCreateDisasm(c_char_p(triple), c_void_p(None), c_int(0),
     77                 callbacks['op_info'](0), callbacks['symbol_lookup'](0))
     78         if not ptr:
     79             raise Exception('Could not obtain disassembler for triple: %s' %
     80                             triple)
     81 
     82         LLVMObject.__init__(self, ptr, disposer=lib.LLVMDisasmDispose)
     83 
     84     def get_instruction(self, source, pc=0):
     85         """Obtain the next instruction from an input source.
     86 
     87         The input source should be a str or bytearray or something that
     88         represents a sequence of bytes.
     89 
     90         This function will start reading bytes from the beginning of the
     91         source.
     92 
     93         The pc argument specifies the address that the first byte is at.
     94 
     95         This returns a 2-tuple of:
     96 
     97           long number of bytes read. 0 if no instruction was read.
     98           str representation of instruction. This will be the assembly that
     99             represents the instruction.
    100         """
    101         buf = cast(c_char_p(source), POINTER(c_ubyte))
    102         out_str = cast((c_byte * 255)(), c_char_p)
    103 
    104         result = lib.LLVMDisasmInstruction(self, buf, c_uint64(len(source)),
    105                                            c_uint64(pc), out_str, 255)
    106 
    107         return (result, out_str.value)
    108 
    109     def get_instructions(self, source, pc=0):
    110         """Obtain multiple instructions from an input source.
    111 
    112         This is like get_instruction() except it is a generator for all
    113         instructions within the source. It starts at the beginning of the
    114         source and reads instructions until no more can be read.
    115 
    116         This generator returns 3-tuple of:
    117 
    118           long address of instruction.
    119           long size of instruction, in bytes.
    120           str representation of instruction.
    121         """
    122         source_bytes = c_char_p(source)
    123         out_str = cast((c_byte * 255)(), c_char_p)
    124 
    125         # This could probably be written cleaner. But, it does work.
    126         buf = cast(source_bytes, POINTER(c_ubyte * len(source))).contents
    127         offset = 0
    128         address = pc
    129         end_address = pc + len(source)
    130         while address < end_address:
    131             b = cast(addressof(buf) + offset, POINTER(c_ubyte))
    132             result = lib.LLVMDisasmInstruction(self, b,
    133                     c_uint64(len(source) - offset), c_uint64(address),
    134                     out_str, 255)
    135 
    136             if result == 0:
    137                 break
    138 
    139             yield (address, result, out_str.value)
    140 
    141             address += result
    142             offset += result
    143 
    144     def set_options(self, options):
    145         if not lib.LLVMSetDisasmOptions(self, options):
    146             raise Exception('Unable to set all disassembler options in %i' % options)
    147 
    148 
    149 def register_library(library):
    150     library.LLVMCreateDisasm.argtypes = [c_char_p, c_void_p, c_int,
    151         callbacks['op_info'], callbacks['symbol_lookup']]
    152     library.LLVMCreateDisasm.restype = c_object_p
    153 
    154     library.LLVMDisasmDispose.argtypes = [Disassembler]
    155 
    156     library.LLVMDisasmInstruction.argtypes = [Disassembler, POINTER(c_ubyte),
    157             c_uint64, c_uint64, c_char_p, c_size_t]
    158     library.LLVMDisasmInstruction.restype = c_size_t
    159 
    160     library.LLVMSetDisasmOptions.argtypes = [Disassembler, c_uint64]
    161     library.LLVMSetDisasmOptions.restype = c_int
    162 
    163 
    164 callbacks['op_info'] = CFUNCTYPE(c_int, c_void_p, c_uint64, c_uint64, c_uint64,
    165                                  c_int, c_void_p)
    166 callbacks['symbol_lookup'] = CFUNCTYPE(c_char_p, c_void_p, c_uint64,
    167                                        POINTER(c_uint64), c_uint64,
    168                                        POINTER(c_char_p))
    169 
    170 register_library(lib)
    171