Home | History | Annotate | Download | only in llvm
      1 #===- object.py - Python Object Bindings --------------------*- python -*--===#
      2 #
      3 #                     The LLVM Compiler Infrastructure
      4 #
      5 # This file is distributed under the University of Illinois Open Source
      6 # License. See LICENSE.TXT for details.
      7 #
      8 #===------------------------------------------------------------------------===#
      9 
     10 r"""
     11 Object File Interface
     12 =====================
     13 
     14 This module provides an interface for reading information from object files
     15 (e.g. binary executables and libraries).
     16 
     17 Using this module, you can obtain information about an object file's sections,
     18 symbols, and relocations. These are represented by the classes ObjectFile,
     19 Section, Symbol, and Relocation, respectively.
     20 
     21 Usage
     22 -----
     23 
     24 The only way to use this module is to start by creating an ObjectFile. You can
     25 create an ObjectFile by loading a file (specified by its path) or by creating a
     26 llvm.core.MemoryBuffer and loading that.
     27 
     28 Once you have an object file, you can inspect its sections and symbols directly
     29 by calling get_sections() and get_symbols() respectively. To inspect
     30 relocations, call get_relocations() on a Section instance.
     31 
     32 Iterator Interface
     33 ------------------
     34 
     35 The LLVM bindings expose iteration over sections, symbols, and relocations in a
     36 way that only allows one instance to be operated on at a single time. This is
     37 slightly annoying from a Python perspective, as it isn't very Pythonic to have
     38 objects that "expire" but are still active from a dynamic language.
     39 
     40 To aid working around this limitation, each Section, Symbol, and Relocation
     41 instance caches its properties after first access. So, if the underlying
     42 iterator is advanced, the properties can still be obtained provided they have
     43 already been retrieved.
     44 
     45 In addition, we also provide a "cache" method on each class to cache all
     46 available data. You can call this on each obtained instance. Or, you can pass
     47 cache=True to the appropriate get_XXX() method to have this done for you.
     48 
     49 Here are some examples on how to perform iteration:
     50 
     51     obj = ObjectFile(filename='/bin/ls')
     52 
     53     # This is OK. Each Section is only accessed inside its own iteration slot.
     54     section_names = []
     55     for section in obj.get_sections():
     56         section_names.append(section.name)
     57 
     58     # This is NOT OK. You perform a lookup after the object has expired.
     59     symbols = list(obj.get_symbols())
     60     for symbol in symbols:
     61         print symbol.name # This raises because the object has expired.
     62 
     63     # In this example, we mix a working and failing scenario.
     64     symbols = []
     65     for symbol in obj.get_symbols():
     66         symbols.append(symbol)
     67         print symbol.name
     68 
     69     for symbol in symbols:
     70         print symbol.name # OK
     71         print symbol.address # NOT OK. We didn't look up this property before.
     72 
     73     # Cache everything up front.
     74     symbols = list(obj.get_symbols(cache=True))
     75     for symbol in symbols:
     76         print symbol.name # OK
     77 
     78 """
     79 
     80 from ctypes import c_char_p
     81 from ctypes import c_char
     82 from ctypes import POINTER
     83 from ctypes import c_uint64
     84 from ctypes import string_at
     85 
     86 from .common import CachedProperty
     87 from .common import LLVMObject
     88 from .common import c_object_p
     89 from .common import get_library
     90 from .core import MemoryBuffer
     91 
     92 __all__ = [
     93     "lib",
     94     "ObjectFile",
     95     "Relocation",
     96     "Section",
     97     "Symbol",
     98 ]
     99 
    100 class ObjectFile(LLVMObject):
    101     """Represents an object/binary file."""
    102 
    103     def __init__(self, filename=None, contents=None):
    104         """Construct an instance from a filename or binary data.
    105 
    106         filename must be a path to a file that can be opened with open().
    107         contents can be either a native Python buffer type (like str) or a
    108         llvm.core.MemoryBuffer instance.
    109         """
    110         if contents:
    111             assert isinstance(contents, MemoryBuffer)
    112 
    113         if filename is not None:
    114             contents = MemoryBuffer(filename=filename)
    115 
    116         if contents is None:
    117             raise Exception('No input found.')
    118 
    119         ptr = lib.LLVMCreateObjectFile(contents)
    120         LLVMObject.__init__(self, ptr, disposer=lib.LLVMDisposeObjectFile)
    121         self.take_ownership(contents)
    122 
    123     def get_sections(self, cache=False):
    124         """Obtain the sections in this object file.
    125 
    126         This is a generator for llvm.object.Section instances.
    127 
    128         Sections are exposed as limited-use objects. See the module's
    129         documentation on iterators for more.
    130         """
    131         sections = lib.LLVMGetSections(self)
    132         last = None
    133         while True:
    134             if lib.LLVMIsSectionIteratorAtEnd(self, sections):
    135                 break
    136 
    137             last = Section(sections)
    138             if cache:
    139                 last.cache()
    140 
    141             yield last
    142 
    143             lib.LLVMMoveToNextSection(sections)
    144             last.expire()
    145 
    146         if last is not None:
    147             last.expire()
    148 
    149         lib.LLVMDisposeSectionIterator(sections)
    150 
    151     def get_symbols(self, cache=False):
    152         """Obtain the symbols in this object file.
    153 
    154         This is a generator for llvm.object.Symbol instances.
    155 
    156         Each Symbol instance is a limited-use object. See this module's
    157         documentation on iterators for more.
    158         """
    159         symbols = lib.LLVMGetSymbols(self)
    160         last = None
    161         while True:
    162             if lib.LLVMIsSymbolIteratorAtEnd(self, symbols):
    163                 break
    164 
    165             last = Symbol(symbols, self)
    166             if cache:
    167                 last.cache()
    168 
    169             yield last
    170 
    171             lib.LLVMMoveToNextSymbol(symbols)
    172             last.expire()
    173 
    174         if last is not None:
    175             last.expire()
    176 
    177         lib.LLVMDisposeSymbolIterator(symbols)
    178 
    179 class Section(LLVMObject):
    180     """Represents a section in an object file."""
    181 
    182     def __init__(self, ptr):
    183         """Construct a new section instance.
    184 
    185         Section instances can currently only be created from an ObjectFile
    186         instance. Therefore, this constructor should not be used outside of
    187         this module.
    188         """
    189         LLVMObject.__init__(self, ptr)
    190 
    191         self.expired = False
    192 
    193     @CachedProperty
    194     def name(self):
    195         """Obtain the string name of the section.
    196 
    197         This is typically something like '.dynsym' or '.rodata'.
    198         """
    199         if self.expired:
    200             raise Exception('Section instance has expired.')
    201 
    202         return lib.LLVMGetSectionName(self)
    203 
    204     @CachedProperty
    205     def size(self):
    206         """The size of the section, in long bytes."""
    207         if self.expired:
    208             raise Exception('Section instance has expired.')
    209 
    210         return lib.LLVMGetSectionSize(self)
    211 
    212     @CachedProperty
    213     def contents(self):
    214         if self.expired:
    215             raise Exception('Section instance has expired.')
    216 
    217         siz = self.size
    218 
    219         r = lib.LLVMGetSectionContents(self)
    220         if r:
    221             return string_at(r, siz)
    222         return None
    223 
    224     @CachedProperty
    225     def address(self):
    226         """The address of this section, in long bytes."""
    227         if self.expired:
    228             raise Exception('Section instance has expired.')
    229 
    230         return lib.LLVMGetSectionAddress(self)
    231 
    232     def has_symbol(self, symbol):
    233         """Returns whether a Symbol instance is present in this Section."""
    234         if self.expired:
    235             raise Exception('Section instance has expired.')
    236 
    237         assert isinstance(symbol, Symbol)
    238         return lib.LLVMGetSectionContainsSymbol(self, symbol)
    239 
    240     def get_relocations(self, cache=False):
    241         """Obtain the relocations in this Section.
    242 
    243         This is a generator for llvm.object.Relocation instances.
    244 
    245         Each instance is a limited used object. See this module's documentation
    246         on iterators for more.
    247         """
    248         if self.expired:
    249             raise Exception('Section instance has expired.')
    250 
    251         relocations = lib.LLVMGetRelocations(self)
    252         last = None
    253         while True:
    254             if lib.LLVMIsRelocationIteratorAtEnd(self, relocations):
    255                 break
    256 
    257             last = Relocation(relocations)
    258             if cache:
    259                 last.cache()
    260 
    261             yield last
    262 
    263             lib.LLVMMoveToNextRelocation(relocations)
    264             last.expire()
    265 
    266         if last is not None:
    267             last.expire()
    268 
    269         lib.LLVMDisposeRelocationIterator(relocations)
    270 
    271     def cache(self):
    272         """Cache properties of this Section.
    273 
    274         This can be called as a workaround to the single active Section
    275         limitation. When called, the properties of the Section are fetched so
    276         they are still available after the Section has been marked inactive.
    277         """
    278         getattr(self, 'name')
    279         getattr(self, 'size')
    280         getattr(self, 'contents')
    281         getattr(self, 'address')
    282 
    283     def expire(self):
    284         """Expire the section.
    285 
    286         This is called internally by the section iterator.
    287         """
    288         self.expired = True
    289 
    290 class Symbol(LLVMObject):
    291     """Represents a symbol in an object file."""
    292     def __init__(self, ptr, object_file):
    293         assert isinstance(ptr, c_object_p)
    294         assert isinstance(object_file, ObjectFile)
    295 
    296         LLVMObject.__init__(self, ptr)
    297 
    298         self.expired = False
    299         self._object_file = object_file
    300 
    301     @CachedProperty
    302     def name(self):
    303         """The str name of the symbol.
    304 
    305         This is often a function or variable name. Keep in mind that name
    306         mangling could be in effect.
    307         """
    308         if self.expired:
    309             raise Exception('Symbol instance has expired.')
    310 
    311         return lib.LLVMGetSymbolName(self)
    312 
    313     @CachedProperty
    314     def address(self):
    315         """The address of this symbol, in long bytes."""
    316         if self.expired:
    317             raise Exception('Symbol instance has expired.')
    318 
    319         return lib.LLVMGetSymbolAddress(self)
    320 
    321     @CachedProperty
    322     def size(self):
    323         """The size of the symbol, in long bytes."""
    324         if self.expired:
    325             raise Exception('Symbol instance has expired.')
    326 
    327         return lib.LLVMGetSymbolSize(self)
    328 
    329     @CachedProperty
    330     def section(self):
    331         """The Section to which this Symbol belongs.
    332 
    333         The returned Section instance does not expire, unlike Sections that are
    334         commonly obtained through iteration.
    335 
    336         Because this obtains a new section iterator each time it is accessed,
    337         calling this on a number of Symbol instances could be expensive.
    338         """
    339         sections = lib.LLVMGetSections(self._object_file)
    340         lib.LLVMMoveToContainingSection(sections, self)
    341 
    342         return Section(sections)
    343 
    344     def cache(self):
    345         """Cache all cacheable properties."""
    346         getattr(self, 'name')
    347         getattr(self, 'address')
    348         getattr(self, 'size')
    349 
    350     def expire(self):
    351         """Mark the object as expired to prevent future API accesses.
    352 
    353         This is called internally by this module and it is unlikely that
    354         external callers have a legitimate reason for using it.
    355         """
    356         self.expired = True
    357 
    358 class Relocation(LLVMObject):
    359     """Represents a relocation definition."""
    360     def __init__(self, ptr):
    361         """Create a new relocation instance.
    362 
    363         Relocations are created from objects derived from Section instances.
    364         Therefore, this constructor should not be called outside of this
    365         module. See Section.get_relocations() for the proper method to obtain
    366         a Relocation instance.
    367         """
    368         assert isinstance(ptr, c_object_p)
    369 
    370         LLVMObject.__init__(self, ptr)
    371 
    372         self.expired = False
    373 
    374     @CachedProperty
    375     def offset(self):
    376         """The offset of this relocation, in long bytes."""
    377         if self.expired:
    378             raise Exception('Relocation instance has expired.')
    379 
    380         return lib.LLVMGetRelocationOffset(self)
    381 
    382     @CachedProperty
    383     def symbol(self):
    384         """The Symbol corresponding to this Relocation."""
    385         if self.expired:
    386             raise Exception('Relocation instance has expired.')
    387 
    388         ptr = lib.LLVMGetRelocationSymbol(self)
    389         return Symbol(ptr)
    390 
    391     @CachedProperty
    392     def type_number(self):
    393         """The relocation type, as a long."""
    394         if self.expired:
    395             raise Exception('Relocation instance has expired.')
    396 
    397         return lib.LLVMGetRelocationType(self)
    398 
    399     @CachedProperty
    400     def type_name(self):
    401         """The relocation type's name, as a str."""
    402         if self.expired:
    403             raise Exception('Relocation instance has expired.')
    404 
    405         return lib.LLVMGetRelocationTypeName(self)
    406 
    407     @CachedProperty
    408     def value_string(self):
    409         if self.expired:
    410             raise Exception('Relocation instance has expired.')
    411 
    412         return lib.LLVMGetRelocationValueString(self)
    413 
    414     def expire(self):
    415         """Expire this instance, making future API accesses fail."""
    416         self.expired = True
    417 
    418     def cache(self):
    419         """Cache all cacheable properties on this instance."""
    420         getattr(self, 'address')
    421         getattr(self, 'offset')
    422         getattr(self, 'symbol')
    423         getattr(self, 'type')
    424         getattr(self, 'type_name')
    425         getattr(self, 'value_string')
    426 
    427 def register_library(library):
    428     """Register function prototypes with LLVM library instance."""
    429 
    430     # Object.h functions
    431     library.LLVMCreateObjectFile.argtypes = [MemoryBuffer]
    432     library.LLVMCreateObjectFile.restype = c_object_p
    433 
    434     library.LLVMDisposeObjectFile.argtypes = [ObjectFile]
    435 
    436     library.LLVMGetSections.argtypes = [ObjectFile]
    437     library.LLVMGetSections.restype = c_object_p
    438 
    439     library.LLVMDisposeSectionIterator.argtypes = [c_object_p]
    440 
    441     library.LLVMIsSectionIteratorAtEnd.argtypes = [ObjectFile, c_object_p]
    442     library.LLVMIsSectionIteratorAtEnd.restype = bool
    443 
    444     library.LLVMMoveToNextSection.argtypes = [c_object_p]
    445 
    446     library.LLVMMoveToContainingSection.argtypes = [c_object_p, c_object_p]
    447 
    448     library.LLVMGetSymbols.argtypes = [ObjectFile]
    449     library.LLVMGetSymbols.restype = c_object_p
    450 
    451     library.LLVMDisposeSymbolIterator.argtypes = [c_object_p]
    452 
    453     library.LLVMIsSymbolIteratorAtEnd.argtypes = [ObjectFile, c_object_p]
    454     library.LLVMIsSymbolIteratorAtEnd.restype = bool
    455 
    456     library.LLVMMoveToNextSymbol.argtypes = [c_object_p]
    457 
    458     library.LLVMGetSectionName.argtypes = [c_object_p]
    459     library.LLVMGetSectionName.restype = c_char_p
    460 
    461     library.LLVMGetSectionSize.argtypes = [c_object_p]
    462     library.LLVMGetSectionSize.restype = c_uint64
    463 
    464     library.LLVMGetSectionContents.argtypes = [c_object_p]
    465     # Can't use c_char_p here as it isn't a NUL-terminated string.
    466     library.LLVMGetSectionContents.restype = POINTER(c_char)
    467 
    468     library.LLVMGetSectionAddress.argtypes = [c_object_p]
    469     library.LLVMGetSectionAddress.restype = c_uint64
    470 
    471     library.LLVMGetSectionContainsSymbol.argtypes = [c_object_p, c_object_p]
    472     library.LLVMGetSectionContainsSymbol.restype = bool
    473 
    474     library.LLVMGetRelocations.argtypes = [c_object_p]
    475     library.LLVMGetRelocations.restype = c_object_p
    476 
    477     library.LLVMDisposeRelocationIterator.argtypes = [c_object_p]
    478 
    479     library.LLVMIsRelocationIteratorAtEnd.argtypes = [c_object_p, c_object_p]
    480     library.LLVMIsRelocationIteratorAtEnd.restype = bool
    481 
    482     library.LLVMMoveToNextRelocation.argtypes = [c_object_p]
    483 
    484     library.LLVMGetSymbolName.argtypes = [Symbol]
    485     library.LLVMGetSymbolName.restype = c_char_p
    486 
    487     library.LLVMGetSymbolAddress.argtypes = [Symbol]
    488     library.LLVMGetSymbolAddress.restype = c_uint64
    489 
    490     library.LLVMGetSymbolSize.argtypes = [Symbol]
    491     library.LLVMGetSymbolSize.restype = c_uint64
    492 
    493     library.LLVMGetRelocationOffset.argtypes = [c_object_p]
    494     library.LLVMGetRelocationOffset.restype = c_uint64
    495 
    496     library.LLVMGetRelocationSymbol.argtypes = [c_object_p]
    497     library.LLVMGetRelocationSymbol.restype = c_object_p
    498 
    499     library.LLVMGetRelocationType.argtypes = [c_object_p]
    500     library.LLVMGetRelocationType.restype = c_uint64
    501 
    502     library.LLVMGetRelocationTypeName.argtypes = [c_object_p]
    503     library.LLVMGetRelocationTypeName.restype = c_char_p
    504 
    505     library.LLVMGetRelocationValueString.argtypes = [c_object_p]
    506     library.LLVMGetRelocationValueString.restype = c_char_p
    507 
    508 lib = get_library()
    509 register_library(lib)
    510