1 #===- object.py - Python Object Bindings --------------------*- python -*--===# 2 # 3 # The LLVM Compiler Infrastructure 4 # 5 # This file is distributed under the University of Illinois Open Source 6 # License. See LICENSE.TXT for details. 7 # 8 #===------------------------------------------------------------------------===# 9 10 r""" 11 Object File Interface 12 ===================== 13 14 This module provides an interface for reading information from object files 15 (e.g. binary executables and libraries). 16 17 Using this module, you can obtain information about an object file's sections, 18 symbols, and relocations. These are represented by the classes ObjectFile, 19 Section, Symbol, and Relocation, respectively. 20 21 Usage 22 ----- 23 24 The only way to use this module is to start by creating an ObjectFile. You can 25 create an ObjectFile by loading a file (specified by its path) or by creating a 26 llvm.core.MemoryBuffer and loading that. 27 28 Once you have an object file, you can inspect its sections and symbols directly 29 by calling get_sections() and get_symbols() respectively. To inspect 30 relocations, call get_relocations() on a Section instance. 31 32 Iterator Interface 33 ------------------ 34 35 The LLVM bindings expose iteration over sections, symbols, and relocations in a 36 way that only allows one instance to be operated on at a single time. This is 37 slightly annoying from a Python perspective, as it isn't very Pythonic to have 38 objects that "expire" but are still active from a dynamic language. 39 40 To aid working around this limitation, each Section, Symbol, and Relocation 41 instance caches its properties after first access. So, if the underlying 42 iterator is advanced, the properties can still be obtained provided they have 43 already been retrieved. 44 45 In addition, we also provide a "cache" method on each class to cache all 46 available data. You can call this on each obtained instance. Or, you can pass 47 cache=True to the appropriate get_XXX() method to have this done for you. 48 49 Here are some examples on how to perform iteration: 50 51 obj = ObjectFile(filename='/bin/ls') 52 53 # This is OK. Each Section is only accessed inside its own iteration slot. 54 section_names = [] 55 for section in obj.get_sections(): 56 section_names.append(section.name) 57 58 # This is NOT OK. You perform a lookup after the object has expired. 59 symbols = list(obj.get_symbols()) 60 for symbol in symbols: 61 print symbol.name # This raises because the object has expired. 62 63 # In this example, we mix a working and failing scenario. 64 symbols = [] 65 for symbol in obj.get_symbols(): 66 symbols.append(symbol) 67 print symbol.name 68 69 for symbol in symbols: 70 print symbol.name # OK 71 print symbol.address # NOT OK. We didn't look up this property before. 72 73 # Cache everything up front. 74 symbols = list(obj.get_symbols(cache=True)) 75 for symbol in symbols: 76 print symbol.name # OK 77 78 """ 79 80 from ctypes import c_char_p 81 from ctypes import c_char 82 from ctypes import POINTER 83 from ctypes import c_uint64 84 from ctypes import string_at 85 86 from .common import CachedProperty 87 from .common import LLVMObject 88 from .common import c_object_p 89 from .common import get_library 90 from .core import MemoryBuffer 91 92 __all__ = [ 93 "lib", 94 "ObjectFile", 95 "Relocation", 96 "Section", 97 "Symbol", 98 ] 99 100 class ObjectFile(LLVMObject): 101 """Represents an object/binary file.""" 102 103 def __init__(self, filename=None, contents=None): 104 """Construct an instance from a filename or binary data. 105 106 filename must be a path to a file that can be opened with open(). 107 contents can be either a native Python buffer type (like str) or a 108 llvm.core.MemoryBuffer instance. 109 """ 110 if contents: 111 assert isinstance(contents, MemoryBuffer) 112 113 if filename is not None: 114 contents = MemoryBuffer(filename=filename) 115 116 if contents is None: 117 raise Exception('No input found.') 118 119 ptr = lib.LLVMCreateObjectFile(contents) 120 LLVMObject.__init__(self, ptr, disposer=lib.LLVMDisposeObjectFile) 121 self.take_ownership(contents) 122 123 def get_sections(self, cache=False): 124 """Obtain the sections in this object file. 125 126 This is a generator for llvm.object.Section instances. 127 128 Sections are exposed as limited-use objects. See the module's 129 documentation on iterators for more. 130 """ 131 sections = lib.LLVMGetSections(self) 132 last = None 133 while True: 134 if lib.LLVMIsSectionIteratorAtEnd(self, sections): 135 break 136 137 last = Section(sections) 138 if cache: 139 last.cache() 140 141 yield last 142 143 lib.LLVMMoveToNextSection(sections) 144 last.expire() 145 146 if last is not None: 147 last.expire() 148 149 lib.LLVMDisposeSectionIterator(sections) 150 151 def get_symbols(self, cache=False): 152 """Obtain the symbols in this object file. 153 154 This is a generator for llvm.object.Symbol instances. 155 156 Each Symbol instance is a limited-use object. See this module's 157 documentation on iterators for more. 158 """ 159 symbols = lib.LLVMGetSymbols(self) 160 last = None 161 while True: 162 if lib.LLVMIsSymbolIteratorAtEnd(self, symbols): 163 break 164 165 last = Symbol(symbols, self) 166 if cache: 167 last.cache() 168 169 yield last 170 171 lib.LLVMMoveToNextSymbol(symbols) 172 last.expire() 173 174 if last is not None: 175 last.expire() 176 177 lib.LLVMDisposeSymbolIterator(symbols) 178 179 class Section(LLVMObject): 180 """Represents a section in an object file.""" 181 182 def __init__(self, ptr): 183 """Construct a new section instance. 184 185 Section instances can currently only be created from an ObjectFile 186 instance. Therefore, this constructor should not be used outside of 187 this module. 188 """ 189 LLVMObject.__init__(self, ptr) 190 191 self.expired = False 192 193 @CachedProperty 194 def name(self): 195 """Obtain the string name of the section. 196 197 This is typically something like '.dynsym' or '.rodata'. 198 """ 199 if self.expired: 200 raise Exception('Section instance has expired.') 201 202 return lib.LLVMGetSectionName(self) 203 204 @CachedProperty 205 def size(self): 206 """The size of the section, in long bytes.""" 207 if self.expired: 208 raise Exception('Section instance has expired.') 209 210 return lib.LLVMGetSectionSize(self) 211 212 @CachedProperty 213 def contents(self): 214 if self.expired: 215 raise Exception('Section instance has expired.') 216 217 siz = self.size 218 219 r = lib.LLVMGetSectionContents(self) 220 if r: 221 return string_at(r, siz) 222 return None 223 224 @CachedProperty 225 def address(self): 226 """The address of this section, in long bytes.""" 227 if self.expired: 228 raise Exception('Section instance has expired.') 229 230 return lib.LLVMGetSectionAddress(self) 231 232 def has_symbol(self, symbol): 233 """Returns whether a Symbol instance is present in this Section.""" 234 if self.expired: 235 raise Exception('Section instance has expired.') 236 237 assert isinstance(symbol, Symbol) 238 return lib.LLVMGetSectionContainsSymbol(self, symbol) 239 240 def get_relocations(self, cache=False): 241 """Obtain the relocations in this Section. 242 243 This is a generator for llvm.object.Relocation instances. 244 245 Each instance is a limited used object. See this module's documentation 246 on iterators for more. 247 """ 248 if self.expired: 249 raise Exception('Section instance has expired.') 250 251 relocations = lib.LLVMGetRelocations(self) 252 last = None 253 while True: 254 if lib.LLVMIsRelocationIteratorAtEnd(self, relocations): 255 break 256 257 last = Relocation(relocations) 258 if cache: 259 last.cache() 260 261 yield last 262 263 lib.LLVMMoveToNextRelocation(relocations) 264 last.expire() 265 266 if last is not None: 267 last.expire() 268 269 lib.LLVMDisposeRelocationIterator(relocations) 270 271 def cache(self): 272 """Cache properties of this Section. 273 274 This can be called as a workaround to the single active Section 275 limitation. When called, the properties of the Section are fetched so 276 they are still available after the Section has been marked inactive. 277 """ 278 getattr(self, 'name') 279 getattr(self, 'size') 280 getattr(self, 'contents') 281 getattr(self, 'address') 282 283 def expire(self): 284 """Expire the section. 285 286 This is called internally by the section iterator. 287 """ 288 self.expired = True 289 290 class Symbol(LLVMObject): 291 """Represents a symbol in an object file.""" 292 def __init__(self, ptr, object_file): 293 assert isinstance(ptr, c_object_p) 294 assert isinstance(object_file, ObjectFile) 295 296 LLVMObject.__init__(self, ptr) 297 298 self.expired = False 299 self._object_file = object_file 300 301 @CachedProperty 302 def name(self): 303 """The str name of the symbol. 304 305 This is often a function or variable name. Keep in mind that name 306 mangling could be in effect. 307 """ 308 if self.expired: 309 raise Exception('Symbol instance has expired.') 310 311 return lib.LLVMGetSymbolName(self) 312 313 @CachedProperty 314 def address(self): 315 """The address of this symbol, in long bytes.""" 316 if self.expired: 317 raise Exception('Symbol instance has expired.') 318 319 return lib.LLVMGetSymbolAddress(self) 320 321 @CachedProperty 322 def size(self): 323 """The size of the symbol, in long bytes.""" 324 if self.expired: 325 raise Exception('Symbol instance has expired.') 326 327 return lib.LLVMGetSymbolSize(self) 328 329 @CachedProperty 330 def section(self): 331 """The Section to which this Symbol belongs. 332 333 The returned Section instance does not expire, unlike Sections that are 334 commonly obtained through iteration. 335 336 Because this obtains a new section iterator each time it is accessed, 337 calling this on a number of Symbol instances could be expensive. 338 """ 339 sections = lib.LLVMGetSections(self._object_file) 340 lib.LLVMMoveToContainingSection(sections, self) 341 342 return Section(sections) 343 344 def cache(self): 345 """Cache all cacheable properties.""" 346 getattr(self, 'name') 347 getattr(self, 'address') 348 getattr(self, 'size') 349 350 def expire(self): 351 """Mark the object as expired to prevent future API accesses. 352 353 This is called internally by this module and it is unlikely that 354 external callers have a legitimate reason for using it. 355 """ 356 self.expired = True 357 358 class Relocation(LLVMObject): 359 """Represents a relocation definition.""" 360 def __init__(self, ptr): 361 """Create a new relocation instance. 362 363 Relocations are created from objects derived from Section instances. 364 Therefore, this constructor should not be called outside of this 365 module. See Section.get_relocations() for the proper method to obtain 366 a Relocation instance. 367 """ 368 assert isinstance(ptr, c_object_p) 369 370 LLVMObject.__init__(self, ptr) 371 372 self.expired = False 373 374 @CachedProperty 375 def address(self): 376 """The address of this relocation, in long bytes.""" 377 if self.expired: 378 raise Exception('Relocation instance has expired.') 379 380 return lib.LLVMGetRelocationAddress(self) 381 382 @CachedProperty 383 def offset(self): 384 """The offset of this relocation, in long bytes.""" 385 if self.expired: 386 raise Exception('Relocation instance has expired.') 387 388 return lib.LLVMGetRelocationOffset(self) 389 390 @CachedProperty 391 def symbol(self): 392 """The Symbol corresponding to this Relocation.""" 393 if self.expired: 394 raise Exception('Relocation instance has expired.') 395 396 ptr = lib.LLVMGetRelocationSymbol(self) 397 return Symbol(ptr) 398 399 @CachedProperty 400 def type_number(self): 401 """The relocation type, as a long.""" 402 if self.expired: 403 raise Exception('Relocation instance has expired.') 404 405 return lib.LLVMGetRelocationType(self) 406 407 @CachedProperty 408 def type_name(self): 409 """The relocation type's name, as a str.""" 410 if self.expired: 411 raise Exception('Relocation instance has expired.') 412 413 return lib.LLVMGetRelocationTypeName(self) 414 415 @CachedProperty 416 def value_string(self): 417 if self.expired: 418 raise Exception('Relocation instance has expired.') 419 420 return lib.LLVMGetRelocationValueString(self) 421 422 def expire(self): 423 """Expire this instance, making future API accesses fail.""" 424 self.expired = True 425 426 def cache(self): 427 """Cache all cacheable properties on this instance.""" 428 getattr(self, 'address') 429 getattr(self, 'offset') 430 getattr(self, 'symbol') 431 getattr(self, 'type') 432 getattr(self, 'type_name') 433 getattr(self, 'value_string') 434 435 def register_library(library): 436 """Register function prototypes with LLVM library instance.""" 437 438 # Object.h functions 439 library.LLVMCreateObjectFile.argtypes = [MemoryBuffer] 440 library.LLVMCreateObjectFile.restype = c_object_p 441 442 library.LLVMDisposeObjectFile.argtypes = [ObjectFile] 443 444 library.LLVMGetSections.argtypes = [ObjectFile] 445 library.LLVMGetSections.restype = c_object_p 446 447 library.LLVMDisposeSectionIterator.argtypes = [c_object_p] 448 449 library.LLVMIsSectionIteratorAtEnd.argtypes = [ObjectFile, c_object_p] 450 library.LLVMIsSectionIteratorAtEnd.restype = bool 451 452 library.LLVMMoveToNextSection.argtypes = [c_object_p] 453 454 library.LLVMMoveToContainingSection.argtypes = [c_object_p, c_object_p] 455 456 library.LLVMGetSymbols.argtypes = [ObjectFile] 457 library.LLVMGetSymbols.restype = c_object_p 458 459 library.LLVMDisposeSymbolIterator.argtypes = [c_object_p] 460 461 library.LLVMIsSymbolIteratorAtEnd.argtypes = [ObjectFile, c_object_p] 462 library.LLVMIsSymbolIteratorAtEnd.restype = bool 463 464 library.LLVMMoveToNextSymbol.argtypes = [c_object_p] 465 466 library.LLVMGetSectionName.argtypes = [c_object_p] 467 library.LLVMGetSectionName.restype = c_char_p 468 469 library.LLVMGetSectionSize.argtypes = [c_object_p] 470 library.LLVMGetSectionSize.restype = c_uint64 471 472 library.LLVMGetSectionContents.argtypes = [c_object_p] 473 # Can't use c_char_p here as it isn't a NUL-terminated string. 474 library.LLVMGetSectionContents.restype = POINTER(c_char) 475 476 library.LLVMGetSectionAddress.argtypes = [c_object_p] 477 library.LLVMGetSectionAddress.restype = c_uint64 478 479 library.LLVMGetSectionContainsSymbol.argtypes = [c_object_p, c_object_p] 480 library.LLVMGetSectionContainsSymbol.restype = bool 481 482 library.LLVMGetRelocations.argtypes = [c_object_p] 483 library.LLVMGetRelocations.restype = c_object_p 484 485 library.LLVMDisposeRelocationIterator.argtypes = [c_object_p] 486 487 library.LLVMIsRelocationIteratorAtEnd.argtypes = [c_object_p, c_object_p] 488 library.LLVMIsRelocationIteratorAtEnd.restype = bool 489 490 library.LLVMMoveToNextRelocation.argtypes = [c_object_p] 491 492 library.LLVMGetSymbolName.argtypes = [Symbol] 493 library.LLVMGetSymbolName.restype = c_char_p 494 495 library.LLVMGetSymbolAddress.argtypes = [Symbol] 496 library.LLVMGetSymbolAddress.restype = c_uint64 497 498 library.LLVMGetSymbolSize.argtypes = [Symbol] 499 library.LLVMGetSymbolSize.restype = c_uint64 500 501 library.LLVMGetRelocationAddress.argtypes = [c_object_p] 502 library.LLVMGetRelocationAddress.restype = c_uint64 503 504 library.LLVMGetRelocationOffset.argtypes = [c_object_p] 505 library.LLVMGetRelocationOffset.restype = c_uint64 506 507 library.LLVMGetRelocationSymbol.argtypes = [c_object_p] 508 library.LLVMGetRelocationSymbol.restype = c_object_p 509 510 library.LLVMGetRelocationType.argtypes = [c_object_p] 511 library.LLVMGetRelocationType.restype = c_uint64 512 513 library.LLVMGetRelocationTypeName.argtypes = [c_object_p] 514 library.LLVMGetRelocationTypeName.restype = c_char_p 515 516 library.LLVMGetRelocationValueString.argtypes = [c_object_p] 517 library.LLVMGetRelocationValueString.restype = c_char_p 518 519 lib = get_library() 520 register_library(lib) 521