1 #===- disassembler.py - Python LLVM Bindings -----------------*- python -*--===# 2 # 3 # The LLVM Compiler Infrastructure 4 # 5 # This file is distributed under the University of Illinois Open Source 6 # License. See LICENSE.TXT for details. 7 # 8 #===------------------------------------------------------------------------===# 9 10 from ctypes import CFUNCTYPE 11 from ctypes import POINTER 12 from ctypes import addressof 13 from ctypes import c_byte 14 from ctypes import c_char_p 15 from ctypes import c_int 16 from ctypes import c_size_t 17 from ctypes import c_ubyte 18 from ctypes import c_uint64 19 from ctypes import c_void_p 20 from ctypes import cast 21 22 from .common import LLVMObject 23 from .common import c_object_p 24 from .common import get_library 25 26 __all__ = [ 27 'Disassembler', 28 ] 29 30 lib = get_library() 31 callbacks = {} 32 33 # Constants for set_options 34 Option_UseMarkup = 1 35 36 37 38 _initialized = False 39 _targets = ['AArch64', 'ARM', 'Hexagon', 'MSP430', 'Mips', 'NVPTX', 'PowerPC', 'R600', 'Sparc', 'SystemZ', 'X86', 'XCore'] 40 def _ensure_initialized(): 41 global _initialized 42 if not _initialized: 43 # Here one would want to call the functions 44 # LLVMInitializeAll{TargetInfo,TargetMC,Disassembler}s, but 45 # unfortunately they are only defined as static inline 46 # functions in the header files of llvm-c, so they don't exist 47 # as symbols in the shared library. 48 # So until that is fixed use this hack to initialize them all 49 for tgt in _targets: 50 for initializer in ("TargetInfo", "TargetMC", "Disassembler"): 51 try: 52 f = getattr(lib, "LLVMInitialize" + tgt + initializer) 53 except AttributeError: 54 continue 55 f() 56 _initialized = True 57 58 59 class Disassembler(LLVMObject): 60 """Represents a disassembler instance. 61 62 Disassembler instances are tied to specific "triple," which must be defined 63 at creation time. 64 65 Disassembler instances can disassemble instructions from multiple sources. 66 """ 67 def __init__(self, triple): 68 """Create a new disassembler instance. 69 70 The triple argument is the triple to create the disassembler for. This 71 is something like 'i386-apple-darwin9'. 72 """ 73 74 _ensure_initialized() 75 76 ptr = lib.LLVMCreateDisasm(c_char_p(triple), c_void_p(None), c_int(0), 77 callbacks['op_info'](0), callbacks['symbol_lookup'](0)) 78 if not ptr: 79 raise Exception('Could not obtain disassembler for triple: %s' % 80 triple) 81 82 LLVMObject.__init__(self, ptr, disposer=lib.LLVMDisasmDispose) 83 84 def get_instruction(self, source, pc=0): 85 """Obtain the next instruction from an input source. 86 87 The input source should be a str or bytearray or something that 88 represents a sequence of bytes. 89 90 This function will start reading bytes from the beginning of the 91 source. 92 93 The pc argument specifies the address that the first byte is at. 94 95 This returns a 2-tuple of: 96 97 long number of bytes read. 0 if no instruction was read. 98 str representation of instruction. This will be the assembly that 99 represents the instruction. 100 """ 101 buf = cast(c_char_p(source), POINTER(c_ubyte)) 102 out_str = cast((c_byte * 255)(), c_char_p) 103 104 result = lib.LLVMDisasmInstruction(self, buf, c_uint64(len(source)), 105 c_uint64(pc), out_str, 255) 106 107 return (result, out_str.value) 108 109 def get_instructions(self, source, pc=0): 110 """Obtain multiple instructions from an input source. 111 112 This is like get_instruction() except it is a generator for all 113 instructions within the source. It starts at the beginning of the 114 source and reads instructions until no more can be read. 115 116 This generator returns 3-tuple of: 117 118 long address of instruction. 119 long size of instruction, in bytes. 120 str representation of instruction. 121 """ 122 source_bytes = c_char_p(source) 123 out_str = cast((c_byte * 255)(), c_char_p) 124 125 # This could probably be written cleaner. But, it does work. 126 buf = cast(source_bytes, POINTER(c_ubyte * len(source))).contents 127 offset = 0 128 address = pc 129 end_address = pc + len(source) 130 while address < end_address: 131 b = cast(addressof(buf) + offset, POINTER(c_ubyte)) 132 result = lib.LLVMDisasmInstruction(self, b, 133 c_uint64(len(source) - offset), c_uint64(address), 134 out_str, 255) 135 136 if result == 0: 137 break 138 139 yield (address, result, out_str.value) 140 141 address += result 142 offset += result 143 144 def set_options(self, options): 145 if not lib.LLVMSetDisasmOptions(self, options): 146 raise Exception('Unable to set all disassembler options in %i' % options) 147 148 149 def register_library(library): 150 library.LLVMCreateDisasm.argtypes = [c_char_p, c_void_p, c_int, 151 callbacks['op_info'], callbacks['symbol_lookup']] 152 library.LLVMCreateDisasm.restype = c_object_p 153 154 library.LLVMDisasmDispose.argtypes = [Disassembler] 155 156 library.LLVMDisasmInstruction.argtypes = [Disassembler, POINTER(c_ubyte), 157 c_uint64, c_uint64, c_char_p, c_size_t] 158 library.LLVMDisasmInstruction.restype = c_size_t 159 160 library.LLVMSetDisasmOptions.argtypes = [Disassembler, c_uint64] 161 library.LLVMSetDisasmOptions.restype = c_int 162 163 164 callbacks['op_info'] = CFUNCTYPE(c_int, c_void_p, c_uint64, c_uint64, c_uint64, 165 c_int, c_void_p) 166 callbacks['symbol_lookup'] = CFUNCTYPE(c_char_p, c_void_p, c_uint64, 167 POINTER(c_uint64), c_uint64, 168 POINTER(c_char_p)) 169 170 register_library(lib) 171