1 # -*- coding: Latin-1 -*- 2 """pefile, Portable Executable reader module 3 4 5 All the PE file basic structures are available with their default names 6 as attributes of the instance returned. 7 8 Processed elements such as the import table are made available with lowercase 9 names, to differentiate them from the upper case basic structure names. 10 11 pefile has been tested against the limits of valid PE headers, that is, malware. 12 Lots of packed malware attempt to abuse the format way beyond its standard use. 13 To the best of my knowledge most of the abuses are handled gracefully. 14 15 Copyright (c) 2005, 2006, 2007, 2008 Ero Carrera <ero (at] dkbza.org> 16 17 All rights reserved. 18 19 For detailed copyright information see the file COPYING in 20 the root of the distribution archive. 21 """ 22 23 __author__ = 'Ero Carrera' 24 __version__ = '1.2.9.1' 25 __contact__ = 'ero (at] dkbza.org' 26 27 28 import os 29 import struct 30 import time 31 import math 32 import re 33 import exceptions 34 import string 35 import array 36 37 sha1, sha256, sha512, md5 = None, None, None, None 38 39 try: 40 import hashlib 41 sha1 = hashlib.sha1 42 sha256 = hashlib.sha256 43 sha512 = hashlib.sha512 44 md5 = hashlib.md5 45 except ImportError: 46 try: 47 import sha 48 sha1 = sha.new 49 except ImportError: 50 pass 51 try: 52 import md5 53 md5 = md5.new 54 except ImportError: 55 pass 56 57 58 fast_load = False 59 60 IMAGE_DOS_SIGNATURE = 0x5A4D 61 IMAGE_OS2_SIGNATURE = 0x454E 62 IMAGE_OS2_SIGNATURE_LE = 0x454C 63 IMAGE_VXD_SIGNATURE = 0x454C 64 IMAGE_NT_SIGNATURE = 0x00004550 65 IMAGE_NUMBEROF_DIRECTORY_ENTRIES= 16 66 IMAGE_ORDINAL_FLAG = 0x80000000L 67 IMAGE_ORDINAL_FLAG64 = 0x8000000000000000L 68 OPTIONAL_HEADER_MAGIC_PE = 0x10b 69 OPTIONAL_HEADER_MAGIC_PE_PLUS = 0x20b 70 71 72 directory_entry_types = [ 73 ('IMAGE_DIRECTORY_ENTRY_EXPORT', 0), 74 ('IMAGE_DIRECTORY_ENTRY_IMPORT', 1), 75 ('IMAGE_DIRECTORY_ENTRY_RESOURCE', 2), 76 ('IMAGE_DIRECTORY_ENTRY_EXCEPTION', 3), 77 ('IMAGE_DIRECTORY_ENTRY_SECURITY', 4), 78 ('IMAGE_DIRECTORY_ENTRY_BASERELOC', 5), 79 ('IMAGE_DIRECTORY_ENTRY_DEBUG', 6), 80 ('IMAGE_DIRECTORY_ENTRY_COPYRIGHT', 7), 81 ('IMAGE_DIRECTORY_ENTRY_GLOBALPTR', 8), 82 ('IMAGE_DIRECTORY_ENTRY_TLS', 9), 83 ('IMAGE_DIRECTORY_ENTRY_LOAD_CONFIG', 10), 84 ('IMAGE_DIRECTORY_ENTRY_BOUND_IMPORT', 11), 85 ('IMAGE_DIRECTORY_ENTRY_IAT', 12), 86 ('IMAGE_DIRECTORY_ENTRY_DELAY_IMPORT', 13), 87 ('IMAGE_DIRECTORY_ENTRY_COM_DESCRIPTOR',14), 88 ('IMAGE_DIRECTORY_ENTRY_RESERVED', 15) ] 89 90 DIRECTORY_ENTRY = dict([(e[1], e[0]) for e in directory_entry_types]+directory_entry_types) 91 92 93 image_characteristics = [ 94 ('IMAGE_FILE_RELOCS_STRIPPED', 0x0001), 95 ('IMAGE_FILE_EXECUTABLE_IMAGE', 0x0002), 96 ('IMAGE_FILE_LINE_NUMS_STRIPPED', 0x0004), 97 ('IMAGE_FILE_LOCAL_SYMS_STRIPPED', 0x0008), 98 ('IMAGE_FILE_AGGRESIVE_WS_TRIM', 0x0010), 99 ('IMAGE_FILE_LARGE_ADDRESS_AWARE', 0x0020), 100 ('IMAGE_FILE_16BIT_MACHINE', 0x0040), 101 ('IMAGE_FILE_BYTES_REVERSED_LO', 0x0080), 102 ('IMAGE_FILE_32BIT_MACHINE', 0x0100), 103 ('IMAGE_FILE_DEBUG_STRIPPED', 0x0200), 104 ('IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP', 0x0400), 105 ('IMAGE_FILE_NET_RUN_FROM_SWAP', 0x0800), 106 ('IMAGE_FILE_SYSTEM', 0x1000), 107 ('IMAGE_FILE_DLL', 0x2000), 108 ('IMAGE_FILE_UP_SYSTEM_ONLY', 0x4000), 109 ('IMAGE_FILE_BYTES_REVERSED_HI', 0x8000) ] 110 111 IMAGE_CHARACTERISTICS = dict([(e[1], e[0]) for e in 112 image_characteristics]+image_characteristics) 113 114 115 section_characteristics = [ 116 ('IMAGE_SCN_CNT_CODE', 0x00000020), 117 ('IMAGE_SCN_CNT_INITIALIZED_DATA', 0x00000040), 118 ('IMAGE_SCN_CNT_UNINITIALIZED_DATA', 0x00000080), 119 ('IMAGE_SCN_LNK_OTHER', 0x00000100), 120 ('IMAGE_SCN_LNK_INFO', 0x00000200), 121 ('IMAGE_SCN_LNK_REMOVE', 0x00000800), 122 ('IMAGE_SCN_LNK_COMDAT', 0x00001000), 123 ('IMAGE_SCN_MEM_FARDATA', 0x00008000), 124 ('IMAGE_SCN_MEM_PURGEABLE', 0x00020000), 125 ('IMAGE_SCN_MEM_16BIT', 0x00020000), 126 ('IMAGE_SCN_MEM_LOCKED', 0x00040000), 127 ('IMAGE_SCN_MEM_PRELOAD', 0x00080000), 128 ('IMAGE_SCN_ALIGN_1BYTES', 0x00100000), 129 ('IMAGE_SCN_ALIGN_2BYTES', 0x00200000), 130 ('IMAGE_SCN_ALIGN_4BYTES', 0x00300000), 131 ('IMAGE_SCN_ALIGN_8BYTES', 0x00400000), 132 ('IMAGE_SCN_ALIGN_16BYTES', 0x00500000), 133 ('IMAGE_SCN_ALIGN_32BYTES', 0x00600000), 134 ('IMAGE_SCN_ALIGN_64BYTES', 0x00700000), 135 ('IMAGE_SCN_ALIGN_128BYTES', 0x00800000), 136 ('IMAGE_SCN_ALIGN_256BYTES', 0x00900000), 137 ('IMAGE_SCN_ALIGN_512BYTES', 0x00A00000), 138 ('IMAGE_SCN_ALIGN_1024BYTES', 0x00B00000), 139 ('IMAGE_SCN_ALIGN_2048BYTES', 0x00C00000), 140 ('IMAGE_SCN_ALIGN_4096BYTES', 0x00D00000), 141 ('IMAGE_SCN_ALIGN_8192BYTES', 0x00E00000), 142 ('IMAGE_SCN_ALIGN_MASK', 0x00F00000), 143 ('IMAGE_SCN_LNK_NRELOC_OVFL', 0x01000000), 144 ('IMAGE_SCN_MEM_DISCARDABLE', 0x02000000), 145 ('IMAGE_SCN_MEM_NOT_CACHED', 0x04000000), 146 ('IMAGE_SCN_MEM_NOT_PAGED', 0x08000000), 147 ('IMAGE_SCN_MEM_SHARED', 0x10000000), 148 ('IMAGE_SCN_MEM_EXECUTE', 0x20000000), 149 ('IMAGE_SCN_MEM_READ', 0x40000000), 150 ('IMAGE_SCN_MEM_WRITE', 0x80000000L) ] 151 152 SECTION_CHARACTERISTICS = dict([(e[1], e[0]) for e in 153 section_characteristics]+section_characteristics) 154 155 156 debug_types = [ 157 ('IMAGE_DEBUG_TYPE_UNKNOWN', 0), 158 ('IMAGE_DEBUG_TYPE_COFF', 1), 159 ('IMAGE_DEBUG_TYPE_CODEVIEW', 2), 160 ('IMAGE_DEBUG_TYPE_FPO', 3), 161 ('IMAGE_DEBUG_TYPE_MISC', 4), 162 ('IMAGE_DEBUG_TYPE_EXCEPTION', 5), 163 ('IMAGE_DEBUG_TYPE_FIXUP', 6), 164 ('IMAGE_DEBUG_TYPE_OMAP_TO_SRC', 7), 165 ('IMAGE_DEBUG_TYPE_OMAP_FROM_SRC', 8), 166 ('IMAGE_DEBUG_TYPE_BORLAND', 9), 167 ('IMAGE_DEBUG_TYPE_RESERVED10', 10) ] 168 169 DEBUG_TYPE = dict([(e[1], e[0]) for e in debug_types]+debug_types) 170 171 172 subsystem_types = [ 173 ('IMAGE_SUBSYSTEM_UNKNOWN', 0), 174 ('IMAGE_SUBSYSTEM_NATIVE', 1), 175 ('IMAGE_SUBSYSTEM_WINDOWS_GUI', 2), 176 ('IMAGE_SUBSYSTEM_WINDOWS_CUI', 3), 177 ('IMAGE_SUBSYSTEM_OS2_CUI', 5), 178 ('IMAGE_SUBSYSTEM_POSIX_CUI', 7), 179 ('IMAGE_SUBSYSTEM_WINDOWS_CE_GUI', 9), 180 ('IMAGE_SUBSYSTEM_EFI_APPLICATION', 10), 181 ('IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER', 11), 182 ('IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER', 12), 183 ('IMAGE_SUBSYSTEM_EFI_ROM', 13), 184 ('IMAGE_SUBSYSTEM_XBOX', 14)] 185 186 SUBSYSTEM_TYPE = dict([(e[1], e[0]) for e in subsystem_types]+subsystem_types) 187 188 189 machine_types = [ 190 ('IMAGE_FILE_MACHINE_UNKNOWN', 0), 191 ('IMAGE_FILE_MACHINE_AM33', 0x1d3), 192 ('IMAGE_FILE_MACHINE_AMD64', 0x8664), 193 ('IMAGE_FILE_MACHINE_ARM', 0x1c0), 194 ('IMAGE_FILE_MACHINE_EBC', 0xebc), 195 ('IMAGE_FILE_MACHINE_I386', 0x14c), 196 ('IMAGE_FILE_MACHINE_IA64', 0x200), 197 ('IMAGE_FILE_MACHINE_MR32', 0x9041), 198 ('IMAGE_FILE_MACHINE_MIPS16', 0x266), 199 ('IMAGE_FILE_MACHINE_MIPSFPU', 0x366), 200 ('IMAGE_FILE_MACHINE_MIPSFPU16',0x466), 201 ('IMAGE_FILE_MACHINE_POWERPC', 0x1f0), 202 ('IMAGE_FILE_MACHINE_POWERPCFP',0x1f1), 203 ('IMAGE_FILE_MACHINE_R4000', 0x166), 204 ('IMAGE_FILE_MACHINE_SH3', 0x1a2), 205 ('IMAGE_FILE_MACHINE_SH3DSP', 0x1a3), 206 ('IMAGE_FILE_MACHINE_SH4', 0x1a6), 207 ('IMAGE_FILE_MACHINE_SH5', 0x1a8), 208 ('IMAGE_FILE_MACHINE_THUMB', 0x1c2), 209 ('IMAGE_FILE_MACHINE_WCEMIPSV2',0x169), 210 ] 211 212 MACHINE_TYPE = dict([(e[1], e[0]) for e in machine_types]+machine_types) 213 214 215 relocation_types = [ 216 ('IMAGE_REL_BASED_ABSOLUTE', 0), 217 ('IMAGE_REL_BASED_HIGH', 1), 218 ('IMAGE_REL_BASED_LOW', 2), 219 ('IMAGE_REL_BASED_HIGHLOW', 3), 220 ('IMAGE_REL_BASED_HIGHADJ', 4), 221 ('IMAGE_REL_BASED_MIPS_JMPADDR', 5), 222 ('IMAGE_REL_BASED_SECTION', 6), 223 ('IMAGE_REL_BASED_REL', 7), 224 ('IMAGE_REL_BASED_MIPS_JMPADDR16', 9), 225 ('IMAGE_REL_BASED_IA64_IMM64', 9), 226 ('IMAGE_REL_BASED_DIR64', 10), 227 ('IMAGE_REL_BASED_HIGH3ADJ', 11) ] 228 229 RELOCATION_TYPE = dict([(e[1], e[0]) for e in relocation_types]+relocation_types) 230 231 232 dll_characteristics = [ 233 ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x0001', 0x0001), 234 ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x0002', 0x0002), 235 ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x0004', 0x0004), 236 ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x0008', 0x0008), 237 ('IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE', 0x0040), 238 ('IMAGE_DLL_CHARACTERISTICS_FORCE_INTEGRITY', 0x0080), 239 ('IMAGE_DLL_CHARACTERISTICS_NX_COMPAT', 0x0100), 240 ('IMAGE_DLL_CHARACTERISTICS_NO_ISOLATION', 0x0200), 241 ('IMAGE_DLL_CHARACTERISTICS_NO_SEH', 0x0400), 242 ('IMAGE_DLL_CHARACTERISTICS_NO_BIND', 0x0800), 243 ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x1000', 0x1000), 244 ('IMAGE_DLL_CHARACTERISTICS_WDM_DRIVER', 0x2000), 245 ('IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE', 0x8000) ] 246 247 DLL_CHARACTERISTICS = dict([(e[1], e[0]) for e in dll_characteristics]+dll_characteristics) 248 249 250 # Resource types 251 resource_type = [ 252 ('RT_CURSOR', 1), 253 ('RT_BITMAP', 2), 254 ('RT_ICON', 3), 255 ('RT_MENU', 4), 256 ('RT_DIALOG', 5), 257 ('RT_STRING', 6), 258 ('RT_FONTDIR', 7), 259 ('RT_FONT', 8), 260 ('RT_ACCELERATOR', 9), 261 ('RT_RCDATA', 10), 262 ('RT_MESSAGETABLE', 11), 263 ('RT_GROUP_CURSOR', 12), 264 ('RT_GROUP_ICON', 14), 265 ('RT_VERSION', 16), 266 ('RT_DLGINCLUDE', 17), 267 ('RT_PLUGPLAY', 19), 268 ('RT_VXD', 20), 269 ('RT_ANICURSOR', 21), 270 ('RT_ANIICON', 22), 271 ('RT_HTML', 23), 272 ('RT_MANIFEST', 24) ] 273 274 RESOURCE_TYPE = dict([(e[1], e[0]) for e in resource_type]+resource_type) 275 276 277 # Language definitions 278 lang = [ 279 ('LANG_NEUTRAL', 0x00), 280 ('LANG_INVARIANT', 0x7f), 281 ('LANG_AFRIKAANS', 0x36), 282 ('LANG_ALBANIAN', 0x1c), 283 ('LANG_ARABIC', 0x01), 284 ('LANG_ARMENIAN', 0x2b), 285 ('LANG_ASSAMESE', 0x4d), 286 ('LANG_AZERI', 0x2c), 287 ('LANG_BASQUE', 0x2d), 288 ('LANG_BELARUSIAN', 0x23), 289 ('LANG_BENGALI', 0x45), 290 ('LANG_BULGARIAN', 0x02), 291 ('LANG_CATALAN', 0x03), 292 ('LANG_CHINESE', 0x04), 293 ('LANG_CROATIAN', 0x1a), 294 ('LANG_CZECH', 0x05), 295 ('LANG_DANISH', 0x06), 296 ('LANG_DIVEHI', 0x65), 297 ('LANG_DUTCH', 0x13), 298 ('LANG_ENGLISH', 0x09), 299 ('LANG_ESTONIAN', 0x25), 300 ('LANG_FAEROESE', 0x38), 301 ('LANG_FARSI', 0x29), 302 ('LANG_FINNISH', 0x0b), 303 ('LANG_FRENCH', 0x0c), 304 ('LANG_GALICIAN', 0x56), 305 ('LANG_GEORGIAN', 0x37), 306 ('LANG_GERMAN', 0x07), 307 ('LANG_GREEK', 0x08), 308 ('LANG_GUJARATI', 0x47), 309 ('LANG_HEBREW', 0x0d), 310 ('LANG_HINDI', 0x39), 311 ('LANG_HUNGARIAN', 0x0e), 312 ('LANG_ICELANDIC', 0x0f), 313 ('LANG_INDONESIAN', 0x21), 314 ('LANG_ITALIAN', 0x10), 315 ('LANG_JAPANESE', 0x11), 316 ('LANG_KANNADA', 0x4b), 317 ('LANG_KASHMIRI', 0x60), 318 ('LANG_KAZAK', 0x3f), 319 ('LANG_KONKANI', 0x57), 320 ('LANG_KOREAN', 0x12), 321 ('LANG_KYRGYZ', 0x40), 322 ('LANG_LATVIAN', 0x26), 323 ('LANG_LITHUANIAN', 0x27), 324 ('LANG_MACEDONIAN', 0x2f), 325 ('LANG_MALAY', 0x3e), 326 ('LANG_MALAYALAM', 0x4c), 327 ('LANG_MANIPURI', 0x58), 328 ('LANG_MARATHI', 0x4e), 329 ('LANG_MONGOLIAN', 0x50), 330 ('LANG_NEPALI', 0x61), 331 ('LANG_NORWEGIAN', 0x14), 332 ('LANG_ORIYA', 0x48), 333 ('LANG_POLISH', 0x15), 334 ('LANG_PORTUGUESE', 0x16), 335 ('LANG_PUNJABI', 0x46), 336 ('LANG_ROMANIAN', 0x18), 337 ('LANG_RUSSIAN', 0x19), 338 ('LANG_SANSKRIT', 0x4f), 339 ('LANG_SERBIAN', 0x1a), 340 ('LANG_SINDHI', 0x59), 341 ('LANG_SLOVAK', 0x1b), 342 ('LANG_SLOVENIAN', 0x24), 343 ('LANG_SPANISH', 0x0a), 344 ('LANG_SWAHILI', 0x41), 345 ('LANG_SWEDISH', 0x1d), 346 ('LANG_SYRIAC', 0x5a), 347 ('LANG_TAMIL', 0x49), 348 ('LANG_TATAR', 0x44), 349 ('LANG_TELUGU', 0x4a), 350 ('LANG_THAI', 0x1e), 351 ('LANG_TURKISH', 0x1f), 352 ('LANG_UKRAINIAN', 0x22), 353 ('LANG_URDU', 0x20), 354 ('LANG_UZBEK', 0x43), 355 ('LANG_VIETNAMESE', 0x2a), 356 ('LANG_GAELIC', 0x3c), 357 ('LANG_MALTESE', 0x3a), 358 ('LANG_MAORI', 0x28), 359 ('LANG_RHAETO_ROMANCE',0x17), 360 ('LANG_SAAMI', 0x3b), 361 ('LANG_SORBIAN', 0x2e), 362 ('LANG_SUTU', 0x30), 363 ('LANG_TSONGA', 0x31), 364 ('LANG_TSWANA', 0x32), 365 ('LANG_VENDA', 0x33), 366 ('LANG_XHOSA', 0x34), 367 ('LANG_ZULU', 0x35), 368 ('LANG_ESPERANTO', 0x8f), 369 ('LANG_WALON', 0x90), 370 ('LANG_CORNISH', 0x91), 371 ('LANG_WELSH', 0x92), 372 ('LANG_BRETON', 0x93) ] 373 374 LANG = dict(lang+[(e[1], e[0]) for e in lang]) 375 376 377 # Sublanguage definitions 378 sublang = [ 379 ('SUBLANG_NEUTRAL', 0x00), 380 ('SUBLANG_DEFAULT', 0x01), 381 ('SUBLANG_SYS_DEFAULT', 0x02), 382 ('SUBLANG_ARABIC_SAUDI_ARABIA', 0x01), 383 ('SUBLANG_ARABIC_IRAQ', 0x02), 384 ('SUBLANG_ARABIC_EGYPT', 0x03), 385 ('SUBLANG_ARABIC_LIBYA', 0x04), 386 ('SUBLANG_ARABIC_ALGERIA', 0x05), 387 ('SUBLANG_ARABIC_MOROCCO', 0x06), 388 ('SUBLANG_ARABIC_TUNISIA', 0x07), 389 ('SUBLANG_ARABIC_OMAN', 0x08), 390 ('SUBLANG_ARABIC_YEMEN', 0x09), 391 ('SUBLANG_ARABIC_SYRIA', 0x0a), 392 ('SUBLANG_ARABIC_JORDAN', 0x0b), 393 ('SUBLANG_ARABIC_LEBANON', 0x0c), 394 ('SUBLANG_ARABIC_KUWAIT', 0x0d), 395 ('SUBLANG_ARABIC_UAE', 0x0e), 396 ('SUBLANG_ARABIC_BAHRAIN', 0x0f), 397 ('SUBLANG_ARABIC_QATAR', 0x10), 398 ('SUBLANG_AZERI_LATIN', 0x01), 399 ('SUBLANG_AZERI_CYRILLIC', 0x02), 400 ('SUBLANG_CHINESE_TRADITIONAL', 0x01), 401 ('SUBLANG_CHINESE_SIMPLIFIED', 0x02), 402 ('SUBLANG_CHINESE_HONGKONG', 0x03), 403 ('SUBLANG_CHINESE_SINGAPORE', 0x04), 404 ('SUBLANG_CHINESE_MACAU', 0x05), 405 ('SUBLANG_DUTCH', 0x01), 406 ('SUBLANG_DUTCH_BELGIAN', 0x02), 407 ('SUBLANG_ENGLISH_US', 0x01), 408 ('SUBLANG_ENGLISH_UK', 0x02), 409 ('SUBLANG_ENGLISH_AUS', 0x03), 410 ('SUBLANG_ENGLISH_CAN', 0x04), 411 ('SUBLANG_ENGLISH_NZ', 0x05), 412 ('SUBLANG_ENGLISH_EIRE', 0x06), 413 ('SUBLANG_ENGLISH_SOUTH_AFRICA', 0x07), 414 ('SUBLANG_ENGLISH_JAMAICA', 0x08), 415 ('SUBLANG_ENGLISH_CARIBBEAN', 0x09), 416 ('SUBLANG_ENGLISH_BELIZE', 0x0a), 417 ('SUBLANG_ENGLISH_TRINIDAD', 0x0b), 418 ('SUBLANG_ENGLISH_ZIMBABWE', 0x0c), 419 ('SUBLANG_ENGLISH_PHILIPPINES', 0x0d), 420 ('SUBLANG_FRENCH', 0x01), 421 ('SUBLANG_FRENCH_BELGIAN', 0x02), 422 ('SUBLANG_FRENCH_CANADIAN', 0x03), 423 ('SUBLANG_FRENCH_SWISS', 0x04), 424 ('SUBLANG_FRENCH_LUXEMBOURG', 0x05), 425 ('SUBLANG_FRENCH_MONACO', 0x06), 426 ('SUBLANG_GERMAN', 0x01), 427 ('SUBLANG_GERMAN_SWISS', 0x02), 428 ('SUBLANG_GERMAN_AUSTRIAN', 0x03), 429 ('SUBLANG_GERMAN_LUXEMBOURG', 0x04), 430 ('SUBLANG_GERMAN_LIECHTENSTEIN', 0x05), 431 ('SUBLANG_ITALIAN', 0x01), 432 ('SUBLANG_ITALIAN_SWISS', 0x02), 433 ('SUBLANG_KASHMIRI_SASIA', 0x02), 434 ('SUBLANG_KASHMIRI_INDIA', 0x02), 435 ('SUBLANG_KOREAN', 0x01), 436 ('SUBLANG_LITHUANIAN', 0x01), 437 ('SUBLANG_MALAY_MALAYSIA', 0x01), 438 ('SUBLANG_MALAY_BRUNEI_DARUSSALAM', 0x02), 439 ('SUBLANG_NEPALI_INDIA', 0x02), 440 ('SUBLANG_NORWEGIAN_BOKMAL', 0x01), 441 ('SUBLANG_NORWEGIAN_NYNORSK', 0x02), 442 ('SUBLANG_PORTUGUESE', 0x02), 443 ('SUBLANG_PORTUGUESE_BRAZILIAN', 0x01), 444 ('SUBLANG_SERBIAN_LATIN', 0x02), 445 ('SUBLANG_SERBIAN_CYRILLIC', 0x03), 446 ('SUBLANG_SPANISH', 0x01), 447 ('SUBLANG_SPANISH_MEXICAN', 0x02), 448 ('SUBLANG_SPANISH_MODERN', 0x03), 449 ('SUBLANG_SPANISH_GUATEMALA', 0x04), 450 ('SUBLANG_SPANISH_COSTA_RICA', 0x05), 451 ('SUBLANG_SPANISH_PANAMA', 0x06), 452 ('SUBLANG_SPANISH_DOMINICAN_REPUBLIC', 0x07), 453 ('SUBLANG_SPANISH_VENEZUELA', 0x08), 454 ('SUBLANG_SPANISH_COLOMBIA', 0x09), 455 ('SUBLANG_SPANISH_PERU', 0x0a), 456 ('SUBLANG_SPANISH_ARGENTINA', 0x0b), 457 ('SUBLANG_SPANISH_ECUADOR', 0x0c), 458 ('SUBLANG_SPANISH_CHILE', 0x0d), 459 ('SUBLANG_SPANISH_URUGUAY', 0x0e), 460 ('SUBLANG_SPANISH_PARAGUAY', 0x0f), 461 ('SUBLANG_SPANISH_BOLIVIA', 0x10), 462 ('SUBLANG_SPANISH_EL_SALVADOR', 0x11), 463 ('SUBLANG_SPANISH_HONDURAS', 0x12), 464 ('SUBLANG_SPANISH_NICARAGUA', 0x13), 465 ('SUBLANG_SPANISH_PUERTO_RICO', 0x14), 466 ('SUBLANG_SWEDISH', 0x01), 467 ('SUBLANG_SWEDISH_FINLAND', 0x02), 468 ('SUBLANG_URDU_PAKISTAN', 0x01), 469 ('SUBLANG_URDU_INDIA', 0x02), 470 ('SUBLANG_UZBEK_LATIN', 0x01), 471 ('SUBLANG_UZBEK_CYRILLIC', 0x02), 472 ('SUBLANG_DUTCH_SURINAM', 0x03), 473 ('SUBLANG_ROMANIAN', 0x01), 474 ('SUBLANG_ROMANIAN_MOLDAVIA', 0x02), 475 ('SUBLANG_RUSSIAN', 0x01), 476 ('SUBLANG_RUSSIAN_MOLDAVIA', 0x02), 477 ('SUBLANG_CROATIAN', 0x01), 478 ('SUBLANG_LITHUANIAN_CLASSIC', 0x02), 479 ('SUBLANG_GAELIC', 0x01), 480 ('SUBLANG_GAELIC_SCOTTISH', 0x02), 481 ('SUBLANG_GAELIC_MANX', 0x03) ] 482 483 SUBLANG = dict(sublang+[(e[1], e[0]) for e in sublang]) 484 485 486 class UnicodeStringWrapperPostProcessor: 487 """This class attemps to help the process of identifying strings 488 that might be plain Unicode or Pascal. A list of strings will be 489 wrapped on it with the hope the overlappings will help make the 490 decission about their type.""" 491 492 def __init__(self, pe, rva_ptr): 493 self.pe = pe 494 self.rva_ptr = rva_ptr 495 self.string = None 496 497 498 def get_rva(self): 499 """Get the RVA of the string.""" 500 501 return self.rva_ptr 502 503 504 def __str__(self): 505 """Return the escaped ASCII representation of the string.""" 506 507 def convert_char(char): 508 if char in string.printable: 509 return char 510 else: 511 return r'\x%02x' % ord(char) 512 513 if self.string: 514 return ''.join([convert_char(c) for c in self.string]) 515 516 return '' 517 518 519 def invalidate(self): 520 """Make this instance None, to express it's no known string type.""" 521 522 self = None 523 524 525 def render_pascal_16(self): 526 527 self.string = self.pe.get_string_u_at_rva( 528 self.rva_ptr+2, 529 max_length=self.__get_pascal_16_length()) 530 531 532 def ask_pascal_16(self, next_rva_ptr): 533 """The next RVA is taken to be the one immediately following this one. 534 535 Such RVA could indicate the natural end of the string and will be checked 536 with the possible length contained in the first word. 537 """ 538 539 length = self.__get_pascal_16_length() 540 541 if length == (next_rva_ptr - (self.rva_ptr+2)) / 2: 542 self.length = length 543 return True 544 545 return False 546 547 548 def __get_pascal_16_length(self): 549 550 return self.__get_word_value_at_rva(self.rva_ptr) 551 552 553 def __get_word_value_at_rva(self, rva): 554 555 try: 556 data = self.pe.get_data(self.rva_ptr, 2) 557 except PEFormatError, e: 558 return False 559 560 if len(data)<2: 561 return False 562 563 return struct.unpack('<H', data)[0] 564 565 566 #def render_pascal_8(self): 567 # """""" 568 569 570 def ask_unicode_16(self, next_rva_ptr): 571 """The next RVA is taken to be the one immediately following this one. 572 573 Such RVA could indicate the natural end of the string and will be checked 574 to see if there's a Unicode NULL character there. 575 """ 576 577 if self.__get_word_value_at_rva(next_rva_ptr-2) == 0: 578 self.length = next_rva_ptr - self.rva_ptr 579 return True 580 581 return False 582 583 584 def render_unicode_16(self): 585 """""" 586 587 self.string = self.pe.get_string_u_at_rva(self.rva_ptr) 588 589 590 class PEFormatError(Exception): 591 """Generic PE format error exception.""" 592 593 def __init__(self, value): 594 self.value = value 595 596 def __str__(self): 597 return repr(self.value) 598 599 600 class Dump: 601 """Convenience class for dumping the PE information.""" 602 603 def __init__(self): 604 self.text = '' 605 606 607 def add_lines(self, txt, indent=0): 608 """Adds a list of lines. 609 610 The list can be indented with the optional argument 'indent'. 611 """ 612 for line in txt: 613 self.add_line(line, indent) 614 615 616 def add_line(self, txt, indent=0): 617 """Adds a line. 618 619 The line can be indented with the optional argument 'indent'. 620 """ 621 622 self.add(txt+'\n', indent) 623 624 625 def add(self, txt, indent=0): 626 """Adds some text, no newline will be appended. 627 628 The text can be indented with the optional argument 'indent'. 629 """ 630 631 if isinstance(txt, unicode): 632 s = [] 633 for c in txt: 634 try: 635 s.append(str(c)) 636 except UnicodeEncodeError, e: 637 s.append(repr(c)) 638 639 txt = ''.join(s) 640 641 self.text += ' '*indent+txt 642 643 644 def add_header(self, txt): 645 """Adds a header element.""" 646 647 self.add_line('-'*10+txt+'-'*10+'\n') 648 649 650 def add_newline(self): 651 """Adds a newline.""" 652 653 self.text += '\n' 654 655 656 def get_text(self): 657 """Get the text in its current state.""" 658 659 return self.text 660 661 662 663 class Structure: 664 """Prepare structure object to extract members from data. 665 666 Format is a list containing definitions for the elements 667 of the structure. 668 """ 669 670 671 def __init__(self, format, name=None, file_offset=None): 672 # Format is forced little endian, for big endian non Intel platforms 673 self.__format__ = '<' 674 self.__keys__ = [] 675 # self.values = {} 676 self.__format_length__ = 0 677 self.__set_format__(format[1]) 678 self._all_zeroes = False 679 self.__unpacked_data_elms__ = None 680 self.__file_offset__ = file_offset 681 if name: 682 self.name = name 683 else: 684 self.name = format[0] 685 686 687 def __get_format__(self): 688 return self.__format__ 689 690 691 def get_file_offset(self): 692 return self.__file_offset__ 693 694 def set_file_offset(self, offset): 695 self.__file_offset__ = offset 696 697 def all_zeroes(self): 698 """Returns true is the unpacked data is all zeroes.""" 699 700 return self._all_zeroes 701 702 703 def __set_format__(self, format): 704 705 for elm in format: 706 if ',' in elm: 707 elm_type, elm_name = elm.split(',', 1) 708 self.__format__ += elm_type 709 710 elm_names = elm_name.split(',') 711 names = [] 712 for elm_name in elm_names: 713 if elm_name in self.__keys__: 714 search_list = [x[:len(elm_name)] for x in self.__keys__] 715 occ_count = search_list.count(elm_name) 716 elm_name = elm_name+'_'+str(occ_count) 717 names.append(elm_name) 718 # Some PE header structures have unions on them, so a certain 719 # value might have different names, so each key has a list of 720 # all the possible members referring to the data. 721 self.__keys__.append(names) 722 723 self.__format_length__ = struct.calcsize(self.__format__) 724 725 726 def sizeof(self): 727 """Return size of the structure.""" 728 729 return self.__format_length__ 730 731 732 def __unpack__(self, data): 733 734 if len(data)>self.__format_length__: 735 data = data[:self.__format_length__] 736 737 # OC Patch: 738 # Some malware have incorrect header lengths. 739 # Fail gracefully if this occurs 740 # Buggy malware: a29b0118af8b7408444df81701ad5a7f 741 # 742 elif len(data)<self.__format_length__: 743 raise PEFormatError('Data length less than expected header length.') 744 745 746 if data.count(chr(0)) == len(data): 747 self._all_zeroes = True 748 749 self.__unpacked_data_elms__ = struct.unpack(self.__format__, data) 750 for i in xrange(len(self.__unpacked_data_elms__)): 751 for key in self.__keys__[i]: 752 # self.values[key] = self.__unpacked_data_elms__[i] 753 setattr(self, key, self.__unpacked_data_elms__[i]) 754 755 756 def __pack__(self): 757 758 new_values = [] 759 760 for i in xrange(len(self.__unpacked_data_elms__)): 761 762 for key in self.__keys__[i]: 763 new_val = getattr(self, key) 764 old_val = self.__unpacked_data_elms__[i] 765 766 # In the case of Unions, when the first changed value 767 # is picked the loop is exited 768 if new_val != old_val: 769 break 770 771 new_values.append(new_val) 772 773 return struct.pack(self.__format__, *new_values) 774 775 776 def __str__(self): 777 return '\n'.join( self.dump() ) 778 779 def __repr__(self): 780 return '<Structure: %s>' % (' '.join( [' '.join(s.split()) for s in self.dump()] )) 781 782 783 def dump(self, indentation=0): 784 """Returns a string representation of the structure.""" 785 786 dump = [] 787 788 dump.append('[%s]' % self.name) 789 790 # Refer to the __set_format__ method for an explanation 791 # of the following construct. 792 for keys in self.__keys__: 793 for key in keys: 794 795 val = getattr(self, key) 796 if isinstance(val, int) or isinstance(val, long): 797 val_str = '0x%-8X' % (val) 798 if key == 'TimeDateStamp' or key == 'dwTimeStamp': 799 try: 800 val_str += ' [%s UTC]' % time.asctime(time.gmtime(val)) 801 except exceptions.ValueError, e: 802 val_str += ' [INVALID TIME]' 803 else: 804 val_str = ''.join(filter(lambda c:c != '\0', str(val))) 805 806 dump.append('%-30s %s' % (key+':', val_str)) 807 808 return dump 809 810 811 812 class SectionStructure(Structure): 813 """Convenience section handling class.""" 814 815 def get_data(self, start, length=None): 816 """Get data chunk from a section. 817 818 Allows to query data from the section by passing the 819 addresses where the PE file would be loaded by default. 820 It is then possible to retrieve code and data by its real 821 addresses as it would be if loaded. 822 """ 823 824 offset = start - self.VirtualAddress 825 826 if length: 827 end = offset+length 828 else: 829 end = len(self.data) 830 831 return self.data[offset:end] 832 833 834 def get_rva_from_offset(self, offset): 835 return offset - self.PointerToRawData + self.VirtualAddress 836 837 838 def get_offset_from_rva(self, rva): 839 return (rva - self.VirtualAddress) + self.PointerToRawData 840 841 842 def contains_offset(self, offset): 843 """Check whether the section contains the file offset provided.""" 844 845 if not self.PointerToRawData: 846 # bss and other sections containing only uninitialized data must have 0 847 # and do not take space in the file 848 return False 849 return self.PointerToRawData <= offset < self.VirtualAddress + self.SizeOfRawData 850 851 852 def contains_rva(self, rva): 853 """Check whether the section contains the address provided.""" 854 855 # PECOFF documentation v8 says: 856 # The total size of the section when loaded into memory. 857 # If this value is greater than SizeOfRawData, the section is zero-padded. 858 # This field is valid only for executable images and should be set to zero 859 # for object files. 860 861 if len(self.data) < self.SizeOfRawData: 862 size = self.Misc_VirtualSize 863 else: 864 size = max(self.SizeOfRawData, self.Misc_VirtualSize) 865 866 return self.VirtualAddress <= rva < self.VirtualAddress + size 867 868 def contains(self, rva): 869 #print "DEPRECATION WARNING: you should use contains_rva() instead of contains()" 870 return self.contains_rva(rva) 871 872 873 def set_data(self, data): 874 """Set the data belonging to the section.""" 875 876 self.data = data 877 878 879 def get_entropy(self): 880 """Calculate and return the entropy for the section.""" 881 882 return self.entropy_H( self.data ) 883 884 885 def get_hash_sha1(self): 886 """Get the SHA-1 hex-digest of the section's data.""" 887 888 if sha1 is not None: 889 return sha1( self.data ).hexdigest() 890 891 892 def get_hash_sha256(self): 893 """Get the SHA-256 hex-digest of the section's data.""" 894 895 if sha256 is not None: 896 return sha256( self.data ).hexdigest() 897 898 899 def get_hash_sha512(self): 900 """Get the SHA-512 hex-digest of the section's data.""" 901 902 if sha512 is not None: 903 return sha512( self.data ).hexdigest() 904 905 906 def get_hash_md5(self): 907 """Get the MD5 hex-digest of the section's data.""" 908 909 if md5 is not None: 910 return md5( self.data ).hexdigest() 911 912 913 def entropy_H(self, data): 914 """Calculate the entropy of a chunk of data.""" 915 916 if len(data) == 0: 917 return 0.0 918 919 occurences = array.array('L', [0]*256) 920 921 for x in data: 922 occurences[ord(x)] += 1 923 924 entropy = 0 925 for x in occurences: 926 if x: 927 p_x = float(x) / len(data) 928 entropy -= p_x*math.log(p_x, 2) 929 930 return entropy 931 932 933 934 class DataContainer: 935 """Generic data container.""" 936 937 def __init__(self, **args): 938 for key, value in args.items(): 939 setattr(self, key, value) 940 941 942 943 class ImportDescData(DataContainer): 944 """Holds import descriptor information. 945 946 dll: name of the imported DLL 947 imports: list of imported symbols (ImportData instances) 948 struct: IMAGE_IMPORT_DESCRIPTOR sctruture 949 """ 950 951 class ImportData(DataContainer): 952 """Holds imported symbol's information. 953 954 ordinal: Ordinal of the symbol 955 name: Name of the symbol 956 bound: If the symbol is bound, this contains 957 the address. 958 """ 959 960 class ExportDirData(DataContainer): 961 """Holds export directory information. 962 963 struct: IMAGE_EXPORT_DIRECTORY structure 964 symbols: list of exported symbols (ExportData instances) 965 """ 966 967 class ExportData(DataContainer): 968 """Holds exported symbols' information. 969 970 ordinal: ordinal of the symbol 971 address: address of the symbol 972 name: name of the symbol (None if the symbol is 973 exported by ordinal only) 974 forwarder: if the symbol is forwarded it will 975 contain the name of the target symbol, 976 None otherwise. 977 """ 978 979 980 class ResourceDirData(DataContainer): 981 """Holds resource directory information. 982 983 struct: IMAGE_RESOURCE_DIRECTORY structure 984 entries: list of entries (ResourceDirEntryData instances) 985 """ 986 987 class ResourceDirEntryData(DataContainer): 988 """Holds resource directory entry data. 989 990 struct: IMAGE_RESOURCE_DIRECTORY_ENTRY structure 991 name: If the resource is identified by name this 992 attribute will contain the name string. None 993 otherwise. If identified by id, the id is 994 availabe at 'struct.Id' 995 id: the id, also in struct.Id 996 directory: If this entry has a lower level directory 997 this attribute will point to the 998 ResourceDirData instance representing it. 999 data: If this entry has no futher lower directories 1000 and points to the actual resource data, this 1001 attribute will reference the corresponding 1002 ResourceDataEntryData instance. 1003 (Either of the 'directory' or 'data' attribute will exist, 1004 but not both.) 1005 """ 1006 1007 class ResourceDataEntryData(DataContainer): 1008 """Holds resource data entry information. 1009 1010 struct: IMAGE_RESOURCE_DATA_ENTRY structure 1011 lang: Primary language ID 1012 sublang: Sublanguage ID 1013 """ 1014 1015 class DebugData(DataContainer): 1016 """Holds debug information. 1017 1018 struct: IMAGE_DEBUG_DIRECTORY structure 1019 """ 1020 1021 class BaseRelocationData(DataContainer): 1022 """Holds base relocation information. 1023 1024 struct: IMAGE_BASE_RELOCATION structure 1025 entries: list of relocation data (RelocationData instances) 1026 """ 1027 1028 class RelocationData(DataContainer): 1029 """Holds relocation information. 1030 1031 type: Type of relocation 1032 The type string is can be obtained by 1033 RELOCATION_TYPE[type] 1034 rva: RVA of the relocation 1035 """ 1036 1037 class TlsData(DataContainer): 1038 """Holds TLS information. 1039 1040 struct: IMAGE_TLS_DIRECTORY structure 1041 """ 1042 1043 class BoundImportDescData(DataContainer): 1044 """Holds bound import descriptor data. 1045 1046 This directory entry will provide with information on the 1047 DLLs this PE files has been bound to (if bound at all). 1048 The structure will contain the name and timestamp of the 1049 DLL at the time of binding so that the loader can know 1050 whether it differs from the one currently present in the 1051 system and must, therefore, re-bind the PE's imports. 1052 1053 struct: IMAGE_BOUND_IMPORT_DESCRIPTOR structure 1054 name: DLL name 1055 entries: list of entries (BoundImportRefData instances) 1056 the entries will exist if this DLL has forwarded 1057 symbols. If so, the destination DLL will have an 1058 entry in this list. 1059 """ 1060 1061 class BoundImportRefData(DataContainer): 1062 """Holds bound import forwader reference data. 1063 1064 Contains the same information as the bound descriptor but 1065 for forwarded DLLs, if any. 1066 1067 struct: IMAGE_BOUND_FORWARDER_REF structure 1068 name: dll name 1069 """ 1070 1071 1072 class PE: 1073 """A Portable Executable representation. 1074 1075 This class provides access to most of the information in a PE file. 1076 1077 It expects to be supplied the name of the file to load or PE data 1078 to process and an optional argument 'fast_load' (False by default) 1079 which controls whether to load all the directories information, 1080 which can be quite time consuming. 1081 1082 pe = pefile.PE('module.dll') 1083 pe = pefile.PE(name='module.dll') 1084 1085 would load 'module.dll' and process it. If the data would be already 1086 available in a buffer the same could be achieved with: 1087 1088 pe = pefile.PE(data=module_dll_data) 1089 1090 The "fast_load" can be set to a default by setting its value in the 1091 module itself by means,for instance, of a "pefile.fast_load = True". 1092 That will make all the subsequent instances not to load the 1093 whole PE structure. The "full_load" method can be used to parse 1094 the missing data at a later stage. 1095 1096 Basic headers information will be available in the attributes: 1097 1098 DOS_HEADER 1099 NT_HEADERS 1100 FILE_HEADER 1101 OPTIONAL_HEADER 1102 1103 All of them will contain among their attrbitues the members of the 1104 corresponding structures as defined in WINNT.H 1105 1106 The raw data corresponding to the header (from the beginning of the 1107 file up to the start of the first section) will be avaiable in the 1108 instance's attribute 'header' as a string. 1109 1110 The sections will be available as a list in the 'sections' attribute. 1111 Each entry will contain as attributes all the structure's members. 1112 1113 Directory entries will be available as attributes (if they exist): 1114 (no other entries are processed at this point) 1115 1116 DIRECTORY_ENTRY_IMPORT (list of ImportDescData instances) 1117 DIRECTORY_ENTRY_EXPORT (ExportDirData instance) 1118 DIRECTORY_ENTRY_RESOURCE (ResourceDirData instance) 1119 DIRECTORY_ENTRY_DEBUG (list of DebugData instances) 1120 DIRECTORY_ENTRY_BASERELOC (list of BaseRelocationData instances) 1121 DIRECTORY_ENTRY_TLS 1122 DIRECTORY_ENTRY_BOUND_IMPORT (list of BoundImportData instances) 1123 1124 The following dictionary attributes provide ways of mapping different 1125 constants. They will accept the numeric value and return the string 1126 representation and the opposite, feed in the string and get the 1127 numeric constant: 1128 1129 DIRECTORY_ENTRY 1130 IMAGE_CHARACTERISTICS 1131 SECTION_CHARACTERISTICS 1132 DEBUG_TYPE 1133 SUBSYSTEM_TYPE 1134 MACHINE_TYPE 1135 RELOCATION_TYPE 1136 RESOURCE_TYPE 1137 LANG 1138 SUBLANG 1139 """ 1140 1141 # 1142 # Format specifications for PE structures. 1143 # 1144 1145 __IMAGE_DOS_HEADER_format__ = ('IMAGE_DOS_HEADER', 1146 ('H,e_magic', 'H,e_cblp', 'H,e_cp', 1147 'H,e_crlc', 'H,e_cparhdr', 'H,e_minalloc', 1148 'H,e_maxalloc', 'H,e_ss', 'H,e_sp', 'H,e_csum', 1149 'H,e_ip', 'H,e_cs', 'H,e_lfarlc', 'H,e_ovno', '8s,e_res', 1150 'H,e_oemid', 'H,e_oeminfo', '20s,e_res2', 1151 'L,e_lfanew')) 1152 1153 __IMAGE_FILE_HEADER_format__ = ('IMAGE_FILE_HEADER', 1154 ('H,Machine', 'H,NumberOfSections', 1155 'L,TimeDateStamp', 'L,PointerToSymbolTable', 1156 'L,NumberOfSymbols', 'H,SizeOfOptionalHeader', 1157 'H,Characteristics')) 1158 1159 __IMAGE_DATA_DIRECTORY_format__ = ('IMAGE_DATA_DIRECTORY', 1160 ('L,VirtualAddress', 'L,Size')) 1161 1162 1163 __IMAGE_OPTIONAL_HEADER_format__ = ('IMAGE_OPTIONAL_HEADER', 1164 ('H,Magic', 'B,MajorLinkerVersion', 1165 'B,MinorLinkerVersion', 'L,SizeOfCode', 1166 'L,SizeOfInitializedData', 'L,SizeOfUninitializedData', 1167 'L,AddressOfEntryPoint', 'L,BaseOfCode', 'L,BaseOfData', 1168 'L,ImageBase', 'L,SectionAlignment', 'L,FileAlignment', 1169 'H,MajorOperatingSystemVersion', 'H,MinorOperatingSystemVersion', 1170 'H,MajorImageVersion', 'H,MinorImageVersion', 1171 'H,MajorSubsystemVersion', 'H,MinorSubsystemVersion', 1172 'L,Reserved1', 'L,SizeOfImage', 'L,SizeOfHeaders', 1173 'L,CheckSum', 'H,Subsystem', 'H,DllCharacteristics', 1174 'L,SizeOfStackReserve', 'L,SizeOfStackCommit', 1175 'L,SizeOfHeapReserve', 'L,SizeOfHeapCommit', 1176 'L,LoaderFlags', 'L,NumberOfRvaAndSizes' )) 1177 1178 1179 __IMAGE_OPTIONAL_HEADER64_format__ = ('IMAGE_OPTIONAL_HEADER64', 1180 ('H,Magic', 'B,MajorLinkerVersion', 1181 'B,MinorLinkerVersion', 'L,SizeOfCode', 1182 'L,SizeOfInitializedData', 'L,SizeOfUninitializedData', 1183 'L,AddressOfEntryPoint', 'L,BaseOfCode', 1184 'Q,ImageBase', 'L,SectionAlignment', 'L,FileAlignment', 1185 'H,MajorOperatingSystemVersion', 'H,MinorOperatingSystemVersion', 1186 'H,MajorImageVersion', 'H,MinorImageVersion', 1187 'H,MajorSubsystemVersion', 'H,MinorSubsystemVersion', 1188 'L,Reserved1', 'L,SizeOfImage', 'L,SizeOfHeaders', 1189 'L,CheckSum', 'H,Subsystem', 'H,DllCharacteristics', 1190 'Q,SizeOfStackReserve', 'Q,SizeOfStackCommit', 1191 'Q,SizeOfHeapReserve', 'Q,SizeOfHeapCommit', 1192 'L,LoaderFlags', 'L,NumberOfRvaAndSizes' )) 1193 1194 1195 __IMAGE_NT_HEADERS_format__ = ('IMAGE_NT_HEADERS', ('L,Signature',)) 1196 1197 __IMAGE_SECTION_HEADER_format__ = ('IMAGE_SECTION_HEADER', 1198 ('8s,Name', 'L,Misc,Misc_PhysicalAddress,Misc_VirtualSize', 1199 'L,VirtualAddress', 'L,SizeOfRawData', 'L,PointerToRawData', 1200 'L,PointerToRelocations', 'L,PointerToLinenumbers', 1201 'H,NumberOfRelocations', 'H,NumberOfLinenumbers', 1202 'L,Characteristics')) 1203 1204 __IMAGE_DELAY_IMPORT_DESCRIPTOR_format__ = ('IMAGE_DELAY_IMPORT_DESCRIPTOR', 1205 ('L,grAttrs', 'L,szName', 'L,phmod', 'L,pIAT', 'L,pINT', 1206 'L,pBoundIAT', 'L,pUnloadIAT', 'L,dwTimeStamp')) 1207 1208 __IMAGE_IMPORT_DESCRIPTOR_format__ = ('IMAGE_IMPORT_DESCRIPTOR', 1209 ('L,OriginalFirstThunk,Characteristics', 1210 'L,TimeDateStamp', 'L,ForwarderChain', 'L,Name', 'L,FirstThunk')) 1211 1212 __IMAGE_EXPORT_DIRECTORY_format__ = ('IMAGE_EXPORT_DIRECTORY', 1213 ('L,Characteristics', 1214 'L,TimeDateStamp', 'H,MajorVersion', 'H,MinorVersion', 'L,Name', 1215 'L,Base', 'L,NumberOfFunctions', 'L,NumberOfNames', 1216 'L,AddressOfFunctions', 'L,AddressOfNames', 'L,AddressOfNameOrdinals')) 1217 1218 __IMAGE_RESOURCE_DIRECTORY_format__ = ('IMAGE_RESOURCE_DIRECTORY', 1219 ('L,Characteristics', 1220 'L,TimeDateStamp', 'H,MajorVersion', 'H,MinorVersion', 1221 'H,NumberOfNamedEntries', 'H,NumberOfIdEntries')) 1222 1223 __IMAGE_RESOURCE_DIRECTORY_ENTRY_format__ = ('IMAGE_RESOURCE_DIRECTORY_ENTRY', 1224 ('L,Name', 1225 'L,OffsetToData')) 1226 1227 __IMAGE_RESOURCE_DATA_ENTRY_format__ = ('IMAGE_RESOURCE_DATA_ENTRY', 1228 ('L,OffsetToData', 'L,Size', 'L,CodePage', 'L,Reserved')) 1229 1230 __VS_VERSIONINFO_format__ = ( 'VS_VERSIONINFO', 1231 ('H,Length', 'H,ValueLength', 'H,Type' )) 1232 1233 __VS_FIXEDFILEINFO_format__ = ( 'VS_FIXEDFILEINFO', 1234 ('L,Signature', 'L,StrucVersion', 'L,FileVersionMS', 'L,FileVersionLS', 1235 'L,ProductVersionMS', 'L,ProductVersionLS', 'L,FileFlagsMask', 'L,FileFlags', 1236 'L,FileOS', 'L,FileType', 'L,FileSubtype', 'L,FileDateMS', 'L,FileDateLS')) 1237 1238 __StringFileInfo_format__ = ( 'StringFileInfo', 1239 ('H,Length', 'H,ValueLength', 'H,Type' )) 1240 1241 __StringTable_format__ = ( 'StringTable', 1242 ('H,Length', 'H,ValueLength', 'H,Type' )) 1243 1244 __String_format__ = ( 'String', 1245 ('H,Length', 'H,ValueLength', 'H,Type' )) 1246 1247 __Var_format__ = ( 'Var', ('H,Length', 'H,ValueLength', 'H,Type' )) 1248 1249 __IMAGE_THUNK_DATA_format__ = ('IMAGE_THUNK_DATA', 1250 ('L,ForwarderString,Function,Ordinal,AddressOfData',)) 1251 1252 __IMAGE_THUNK_DATA64_format__ = ('IMAGE_THUNK_DATA', 1253 ('Q,ForwarderString,Function,Ordinal,AddressOfData',)) 1254 1255 __IMAGE_DEBUG_DIRECTORY_format__ = ('IMAGE_DEBUG_DIRECTORY', 1256 ('L,Characteristics', 'L,TimeDateStamp', 'H,MajorVersion', 1257 'H,MinorVersion', 'L,Type', 'L,SizeOfData', 'L,AddressOfRawData', 1258 'L,PointerToRawData')) 1259 1260 __IMAGE_BASE_RELOCATION_format__ = ('IMAGE_BASE_RELOCATION', 1261 ('L,VirtualAddress', 'L,SizeOfBlock') ) 1262 1263 __IMAGE_TLS_DIRECTORY_format__ = ('IMAGE_TLS_DIRECTORY', 1264 ('L,StartAddressOfRawData', 'L,EndAddressOfRawData', 1265 'L,AddressOfIndex', 'L,AddressOfCallBacks', 1266 'L,SizeOfZeroFill', 'L,Characteristics' ) ) 1267 1268 __IMAGE_TLS_DIRECTORY64_format__ = ('IMAGE_TLS_DIRECTORY', 1269 ('Q,StartAddressOfRawData', 'Q,EndAddressOfRawData', 1270 'Q,AddressOfIndex', 'Q,AddressOfCallBacks', 1271 'L,SizeOfZeroFill', 'L,Characteristics' ) ) 1272 1273 __IMAGE_BOUND_IMPORT_DESCRIPTOR_format__ = ('IMAGE_BOUND_IMPORT_DESCRIPTOR', 1274 ('L,TimeDateStamp', 'H,OffsetModuleName', 'H,NumberOfModuleForwarderRefs')) 1275 1276 __IMAGE_BOUND_FORWARDER_REF_format__ = ('IMAGE_BOUND_FORWARDER_REF', 1277 ('L,TimeDateStamp', 'H,OffsetModuleName', 'H,Reserved') ) 1278 1279 1280 def __init__(self, name=None, data=None, fast_load=None): 1281 1282 self.sections = [] 1283 1284 self.__warnings = [] 1285 1286 self.PE_TYPE = None 1287 1288 if not name and not data: 1289 return 1290 1291 # This list will keep track of all the structures created. 1292 # That will allow for an easy iteration through the list 1293 # in order to save the modifications made 1294 self.__structures__ = [] 1295 1296 if not fast_load: 1297 fast_load = globals()['fast_load'] 1298 self.__parse__(name, data, fast_load) 1299 1300 1301 1302 def __unpack_data__(self, format, data, file_offset): 1303 """Apply structure format to raw data. 1304 1305 Returns and unpacked structure object if successful, None otherwise. 1306 """ 1307 1308 structure = Structure(format, file_offset=file_offset) 1309 #if len(data) < structure.sizeof(): 1310 # return None 1311 1312 try: 1313 structure.__unpack__(data) 1314 except PEFormatError, err: 1315 self.__warnings.append( 1316 'Corrupt header "%s" at file offset %d. Exception: %s' % ( 1317 format[0], file_offset, str(err)) ) 1318 return None 1319 1320 self.__structures__.append(structure) 1321 1322 return structure 1323 1324 1325 1326 def __parse__(self, fname, data, fast_load): 1327 """Parse a Portable Executable file. 1328 1329 Loads a PE file, parsing all its structures and making them available 1330 through the instance's attributes. 1331 """ 1332 1333 if fname: 1334 fd = file(fname, 'rb') 1335 self.__data__ = fd.read() 1336 fd.close() 1337 elif data: 1338 self.__data__ = data 1339 1340 1341 self.DOS_HEADER = self.__unpack_data__( 1342 self.__IMAGE_DOS_HEADER_format__, 1343 self.__data__, file_offset=0) 1344 1345 if not self.DOS_HEADER or self.DOS_HEADER.e_magic != IMAGE_DOS_SIGNATURE: 1346 raise PEFormatError('DOS Header magic not found.') 1347 1348 # OC Patch: 1349 # Check for sane value in e_lfanew 1350 # 1351 if self.DOS_HEADER.e_lfanew > len(self.__data__): 1352 raise PEFormatError('Invalid e_lfanew value, probably not a PE file') 1353 1354 nt_headers_offset = self.DOS_HEADER.e_lfanew 1355 1356 self.NT_HEADERS = self.__unpack_data__( 1357 self.__IMAGE_NT_HEADERS_format__, 1358 self.__data__[nt_headers_offset:], 1359 file_offset = nt_headers_offset) 1360 1361 # We better check the signature right here, before the file screws 1362 # around with sections: 1363 # OC Patch: 1364 # Some malware will cause the Signature value to not exist at all 1365 if not self.NT_HEADERS or not self.NT_HEADERS.Signature: 1366 raise PEFormatError('NT Headers not found.') 1367 1368 if self.NT_HEADERS.Signature != IMAGE_NT_SIGNATURE: 1369 raise PEFormatError('Invalid NT Headers signature.') 1370 1371 self.FILE_HEADER = self.__unpack_data__( 1372 self.__IMAGE_FILE_HEADER_format__, 1373 self.__data__[nt_headers_offset+4:], 1374 file_offset = nt_headers_offset+4) 1375 image_flags = self.retrieve_flags(IMAGE_CHARACTERISTICS, 'IMAGE_FILE_') 1376 1377 if not self.FILE_HEADER: 1378 raise PEFormatError('File Header missing') 1379 1380 # Set the image's flags according the the Characteristics member 1381 self.set_flags(self.FILE_HEADER, self.FILE_HEADER.Characteristics, image_flags) 1382 1383 optional_header_offset = \ 1384 nt_headers_offset+4+self.FILE_HEADER.sizeof() 1385 1386 # Note: location of sections can be controlled from PE header: 1387 sections_offset = optional_header_offset + self.FILE_HEADER.SizeOfOptionalHeader 1388 1389 self.OPTIONAL_HEADER = self.__unpack_data__( 1390 self.__IMAGE_OPTIONAL_HEADER_format__, 1391 self.__data__[optional_header_offset:], 1392 file_offset = optional_header_offset) 1393 1394 # According to solardesigner's findings for his 1395 # Tiny PE project, the optional header does not 1396 # need fields beyond "Subsystem" in order to be 1397 # loadable by the Windows loader (given that zeroes 1398 # are acceptable values and the header is loaded 1399 # in a zeroed memory page) 1400 # If trying to parse a full Optional Header fails 1401 # we try to parse it again with some 0 padding 1402 # 1403 MINIMUM_VALID_OPTIONAL_HEADER_RAW_SIZE = 69 1404 1405 if ( self.OPTIONAL_HEADER is None and 1406 len(self.__data__[optional_header_offset:]) 1407 >= MINIMUM_VALID_OPTIONAL_HEADER_RAW_SIZE ): 1408 1409 # Add enough zeroes to make up for the unused fields 1410 # 1411 padding_length = 128 1412 1413 # Create padding 1414 # 1415 padded_data = self.__data__[optional_header_offset:] + ( 1416 '\0' * padding_length) 1417 1418 self.OPTIONAL_HEADER = self.__unpack_data__( 1419 self.__IMAGE_OPTIONAL_HEADER_format__, 1420 padded_data, 1421 file_offset = optional_header_offset) 1422 1423 1424 # Check the Magic in the OPTIONAL_HEADER and set the PE file 1425 # type accordingly 1426 # 1427 if self.OPTIONAL_HEADER is not None: 1428 1429 if self.OPTIONAL_HEADER.Magic == OPTIONAL_HEADER_MAGIC_PE: 1430 1431 self.PE_TYPE = OPTIONAL_HEADER_MAGIC_PE 1432 1433 elif self.OPTIONAL_HEADER.Magic == OPTIONAL_HEADER_MAGIC_PE_PLUS: 1434 1435 self.PE_TYPE = OPTIONAL_HEADER_MAGIC_PE_PLUS 1436 1437 self.OPTIONAL_HEADER = self.__unpack_data__( 1438 self.__IMAGE_OPTIONAL_HEADER64_format__, 1439 self.__data__[optional_header_offset:], 1440 file_offset = optional_header_offset) 1441 1442 # Again, as explained above, we try to parse 1443 # a reduced form of the Optional Header which 1444 # is still valid despite not including all 1445 # structure members 1446 # 1447 MINIMUM_VALID_OPTIONAL_HEADER_RAW_SIZE = 69+4 1448 1449 if ( self.OPTIONAL_HEADER is None and 1450 len(self.__data__[optional_header_offset:]) 1451 >= MINIMUM_VALID_OPTIONAL_HEADER_RAW_SIZE ): 1452 1453 padding_length = 128 1454 padded_data = self.__data__[optional_header_offset:] + ( 1455 '\0' * padding_length) 1456 self.OPTIONAL_HEADER = self.__unpack_data__( 1457 self.__IMAGE_OPTIONAL_HEADER64_format__, 1458 padded_data, 1459 file_offset = optional_header_offset) 1460 1461 1462 if not self.FILE_HEADER: 1463 raise PEFormatError('File Header missing') 1464 1465 1466 # OC Patch: 1467 # Die gracefully if there is no OPTIONAL_HEADER field 1468 # 975440f5ad5e2e4a92c4d9a5f22f75c1 1469 if self.PE_TYPE is None or self.OPTIONAL_HEADER is None: 1470 raise PEFormatError("No Optional Header found, invalid PE32 or PE32+ file") 1471 1472 dll_characteristics_flags = self.retrieve_flags(DLL_CHARACTERISTICS, 'IMAGE_DLL_CHARACTERISTICS_') 1473 1474 # Set the Dll Characteristics flags according the the DllCharacteristics member 1475 self.set_flags( 1476 self.OPTIONAL_HEADER, 1477 self.OPTIONAL_HEADER.DllCharacteristics, 1478 dll_characteristics_flags) 1479 1480 1481 self.OPTIONAL_HEADER.DATA_DIRECTORY = [] 1482 #offset = (optional_header_offset + self.FILE_HEADER.SizeOfOptionalHeader) 1483 offset = (optional_header_offset + self.OPTIONAL_HEADER.sizeof()) 1484 1485 1486 self.NT_HEADERS.FILE_HEADER = self.FILE_HEADER 1487 self.NT_HEADERS.OPTIONAL_HEADER = self.OPTIONAL_HEADER 1488 1489 1490 # The NumberOfRvaAndSizes is sanitized to stay within 1491 # reasonable limits so can be casted to an int 1492 # 1493 if self.OPTIONAL_HEADER.NumberOfRvaAndSizes > 0x10: 1494 self.__warnings.append( 1495 'Suspicious NumberOfRvaAndSizes in the Optional Header. ' + 1496 'Normal values are never larger than 0x10, the value is: 0x%x' % 1497 self.OPTIONAL_HEADER.NumberOfRvaAndSizes ) 1498 1499 for i in xrange(int(0x7fffffffL & self.OPTIONAL_HEADER.NumberOfRvaAndSizes)): 1500 1501 if len(self.__data__[offset:]) == 0: 1502 break 1503 1504 if len(self.__data__[offset:]) < 8: 1505 data = self.__data__[offset:]+'\0'*8 1506 else: 1507 data = self.__data__[offset:] 1508 1509 dir_entry = self.__unpack_data__( 1510 self.__IMAGE_DATA_DIRECTORY_format__, 1511 data, 1512 file_offset = offset) 1513 1514 if dir_entry is None: 1515 break 1516 1517 # Would fail if missing an entry 1518 # 1d4937b2fa4d84ad1bce0309857e70ca offending sample 1519 try: 1520 dir_entry.name = DIRECTORY_ENTRY[i] 1521 except (KeyError, AttributeError): 1522 break 1523 1524 offset += dir_entry.sizeof() 1525 1526 self.OPTIONAL_HEADER.DATA_DIRECTORY.append(dir_entry) 1527 1528 # If the offset goes outside the optional header, 1529 # the loop is broken, regardless of how many directories 1530 # NumberOfRvaAndSizes says there are 1531 # 1532 # We assume a normally sized optional header, hence that we do 1533 # a sizeof() instead of reading SizeOfOptionalHeader. 1534 # Then we add a default number of drectories times their size, 1535 # if we go beyond that, we assume the number of directories 1536 # is wrong and stop processing 1537 if offset >= (optional_header_offset + 1538 self.OPTIONAL_HEADER.sizeof() + 8*16) : 1539 1540 break 1541 1542 1543 offset = self.parse_sections(sections_offset) 1544 1545 # OC Patch: 1546 # There could be a problem if there are no raw data sections 1547 # greater than 0 1548 # fc91013eb72529da005110a3403541b6 example 1549 # Should this throw an exception in the minimum header offset 1550 # can't be found? 1551 # 1552 rawDataPointers = [ 1553 s.PointerToRawData for s in self.sections if s.PointerToRawData>0] 1554 1555 if len(rawDataPointers) > 0: 1556 lowest_section_offset = min(rawDataPointers) 1557 else: 1558 lowest_section_offset = None 1559 1560 if not lowest_section_offset or lowest_section_offset<offset: 1561 self.header = self.__data__[:offset] 1562 else: 1563 self.header = self.__data__[:lowest_section_offset] 1564 1565 1566 # Check whether the entry point lies within a section 1567 # 1568 if self.get_section_by_rva(self.OPTIONAL_HEADER.AddressOfEntryPoint) is not None: 1569 1570 # Check whether the entry point lies within the file 1571 # 1572 ep_offset = self.get_offset_from_rva(self.OPTIONAL_HEADER.AddressOfEntryPoint) 1573 if ep_offset > len(self.__data__): 1574 1575 self.__warnings.append( 1576 'Possibly corrupt file. AddressOfEntryPoint lies outside the file. ' + 1577 'AddressOfEntryPoint: 0x%x' % 1578 self.OPTIONAL_HEADER.AddressOfEntryPoint ) 1579 1580 else: 1581 1582 self.__warnings.append( 1583 'AddressOfEntryPoint lies outside the sections\' boundaries. ' + 1584 'AddressOfEntryPoint: 0x%x' % 1585 self.OPTIONAL_HEADER.AddressOfEntryPoint ) 1586 1587 1588 if not fast_load: 1589 self.parse_data_directories() 1590 1591 1592 def get_warnings(self): 1593 """Return the list of warnings. 1594 1595 Non-critical problems found when parsing the PE file are 1596 appended to a list of warnings. This method returns the 1597 full list. 1598 """ 1599 1600 return self.__warnings 1601 1602 1603 def show_warnings(self): 1604 """Print the list of warnings. 1605 1606 Non-critical problems found when parsing the PE file are 1607 appended to a list of warnings. This method prints the 1608 full list to standard output. 1609 """ 1610 1611 for warning in self.__warnings: 1612 print '>', warning 1613 1614 1615 def full_load(self): 1616 """Process the data directories. 1617 1618 This mathod will load the data directories which might not have 1619 been loaded if the "fast_load" option was used. 1620 """ 1621 1622 self.parse_data_directories() 1623 1624 1625 def write(self, filename=None): 1626 """Write the PE file. 1627 1628 This function will process all headers and components 1629 of the PE file and include all changes made (by just 1630 assigning to attributes in the PE objects) and write 1631 the changes back to a file whose name is provided as 1632 an argument. The filename is optional. 1633 The data to be written to the file will be returned 1634 as a 'str' object. 1635 """ 1636 1637 file_data = list(self.__data__) 1638 for struct in self.__structures__: 1639 1640 struct_data = list(struct.__pack__()) 1641 offset = struct.get_file_offset() 1642 1643 file_data[offset:offset+len(struct_data)] = struct_data 1644 1645 if hasattr(self, 'VS_VERSIONINFO'): 1646 if hasattr(self, 'FileInfo'): 1647 for entry in self.FileInfo: 1648 if hasattr(entry, 'StringTable'): 1649 for st_entry in entry.StringTable: 1650 for key, entry in st_entry.entries.items(): 1651 1652 offsets = st_entry.entries_offsets[key] 1653 lengths = st_entry.entries_lengths[key] 1654 1655 if len( entry ) > lengths[1]: 1656 1657 uc = zip( 1658 list(entry[:lengths[1]]), ['\0'] * lengths[1] ) 1659 l = list() 1660 map(l.extend, uc) 1661 1662 file_data[ 1663 offsets[1] : offsets[1] + lengths[1]*2 ] = l 1664 1665 else: 1666 1667 uc = zip( 1668 list(entry), ['\0'] * len(entry) ) 1669 l = list() 1670 map(l.extend, uc) 1671 1672 file_data[ 1673 offsets[1] : offsets[1] + len(entry)*2 ] = l 1674 1675 remainder = lengths[1] - len(entry) 1676 file_data[ 1677 offsets[1] + len(entry)*2 : 1678 offsets[1] + lengths[1]*2 ] = [ 1679 u'\0' ] * remainder*2 1680 1681 new_file_data = ''.join( [ chr(ord(c)) for c in file_data ] ) 1682 1683 if filename: 1684 f = file(filename, 'wb+') 1685 f.write(new_file_data) 1686 f.close() 1687 1688 return new_file_data 1689 1690 1691 1692 def parse_sections(self, offset): 1693 """Fetch the PE file sections. 1694 1695 The sections will be readily available in the "sections" attribute. 1696 Its attributes will contain all the section information plus "data" 1697 a buffer containing the section's data. 1698 1699 The "Characteristics" member will be processed and attributes 1700 representing the section characteristics (with the 'IMAGE_SCN_' 1701 string trimmed from the constant's names) will be added to the 1702 section instance. 1703 1704 Refer to the SectionStructure class for additional info. 1705 """ 1706 1707 self.sections = [] 1708 1709 for i in xrange(self.FILE_HEADER.NumberOfSections): 1710 section = SectionStructure(self.__IMAGE_SECTION_HEADER_format__) 1711 if not section: 1712 break 1713 section_offset = offset + section.sizeof() * i 1714 section.set_file_offset(section_offset) 1715 section.__unpack__(self.__data__[section_offset:]) 1716 self.__structures__.append(section) 1717 1718 if section.SizeOfRawData > len(self.__data__): 1719 self.__warnings.append( 1720 ('Error parsing section %d. ' % i) + 1721 'SizeOfRawData is larger than file.') 1722 1723 if section.PointerToRawData > len(self.__data__): 1724 self.__warnings.append( 1725 ('Error parsing section %d. ' % i) + 1726 'PointerToRawData points beyond the end of the file.') 1727 1728 if section.Misc_VirtualSize > 0x10000000: 1729 self.__warnings.append( 1730 ('Suspicious value found parsing section %d. ' % i) + 1731 'VirtualSize is extremely large > 256MiB.') 1732 1733 if section.VirtualAddress > 0x10000000: 1734 self.__warnings.append( 1735 ('Suspicious value found parsing section %d. ' % i) + 1736 'VirtualAddress is beyond 0x10000000.') 1737 1738 # 1739 # Some packer used a non-aligned PointerToRawData in the sections, 1740 # which causes several common tools not to load the section data 1741 # properly as they blindly read from the indicated offset. 1742 # It seems that Windows will round the offset down to the largest 1743 # offset multiple of FileAlignment which is smaller than 1744 # PointerToRawData. The following code will do the same. 1745 # 1746 1747 #alignment = self.OPTIONAL_HEADER.FileAlignment 1748 section_data_start = section.PointerToRawData 1749 1750 if ( self.OPTIONAL_HEADER.FileAlignment != 0 and 1751 (section.PointerToRawData % self.OPTIONAL_HEADER.FileAlignment) != 0): 1752 self.__warnings.append( 1753 ('Error parsing section %d. ' % i) + 1754 'Suspicious value for FileAlignment in the Optional Header. ' + 1755 'Normally the PointerToRawData entry of the sections\' structures ' + 1756 'is a multiple of FileAlignment, this might imply the file ' + 1757 'is trying to confuse tools which parse this incorrectly') 1758 1759 section_data_end = section_data_start+section.SizeOfRawData 1760 section.set_data(self.__data__[section_data_start:section_data_end]) 1761 1762 section_flags = self.retrieve_flags(SECTION_CHARACTERISTICS, 'IMAGE_SCN_') 1763 1764 # Set the section's flags according the the Characteristics member 1765 self.set_flags(section, section.Characteristics, section_flags) 1766 1767 if ( section.__dict__.get('IMAGE_SCN_MEM_WRITE', False) and 1768 section.__dict__.get('IMAGE_SCN_MEM_EXECUTE', False) ): 1769 1770 self.__warnings.append( 1771 ('Suspicious flags set for section %d. ' % i) + 1772 'Both IMAGE_SCN_MEM_WRITE and IMAGE_SCN_MEM_EXECUTE are set.' + 1773 'This might indicate a packed executable.') 1774 1775 self.sections.append(section) 1776 1777 if self.FILE_HEADER.NumberOfSections > 0 and self.sections: 1778 return offset + self.sections[0].sizeof()*self.FILE_HEADER.NumberOfSections 1779 else: 1780 return offset 1781 1782 1783 def retrieve_flags(self, flag_dict, flag_filter): 1784 """Read the flags from a dictionary and return them in a usable form. 1785 1786 Will return a list of (flag, value) for all flags in "flag_dict" 1787 matching the filter "flag_filter". 1788 """ 1789 1790 return [(f[0], f[1]) for f in flag_dict.items() if 1791 isinstance(f[0], str) and f[0].startswith(flag_filter)] 1792 1793 1794 def set_flags(self, obj, flag_field, flags): 1795 """Will process the flags and set attributes in the object accordingly. 1796 1797 The object "obj" will gain attritutes named after the flags provided in 1798 "flags" and valued True/False, matching the results of applyin each 1799 flag value from "flags" to flag_field. 1800 """ 1801 1802 for flag in flags: 1803 if flag[1] & flag_field: 1804 setattr(obj, flag[0], True) 1805 else: 1806 setattr(obj, flag[0], False) 1807 1808 1809 1810 def parse_data_directories(self): 1811 """Parse and process the PE file's data directories.""" 1812 1813 directory_parsing = ( 1814 ('IMAGE_DIRECTORY_ENTRY_IMPORT', self.parse_import_directory), 1815 ('IMAGE_DIRECTORY_ENTRY_EXPORT', self.parse_export_directory), 1816 ('IMAGE_DIRECTORY_ENTRY_RESOURCE', self.parse_resources_directory), 1817 ('IMAGE_DIRECTORY_ENTRY_DEBUG', self.parse_debug_directory), 1818 ('IMAGE_DIRECTORY_ENTRY_BASERELOC', self.parse_relocations_directory), 1819 ('IMAGE_DIRECTORY_ENTRY_TLS', self.parse_directory_tls), 1820 ('IMAGE_DIRECTORY_ENTRY_DELAY_IMPORT', self.parse_delay_import_directory), 1821 ('IMAGE_DIRECTORY_ENTRY_BOUND_IMPORT', self.parse_directory_bound_imports) ) 1822 1823 for entry in directory_parsing: 1824 # OC Patch: 1825 # 1826 try: 1827 dir_entry = self.OPTIONAL_HEADER.DATA_DIRECTORY[ 1828 DIRECTORY_ENTRY[entry[0]]] 1829 except IndexError: 1830 break 1831 if dir_entry.VirtualAddress: 1832 value = entry[1](dir_entry.VirtualAddress, dir_entry.Size) 1833 if value: 1834 setattr(self, entry[0][6:], value) 1835 1836 1837 def parse_directory_bound_imports(self, rva, size): 1838 """""" 1839 1840 bnd_descr = Structure(self.__IMAGE_BOUND_IMPORT_DESCRIPTOR_format__) 1841 bnd_descr_size = bnd_descr.sizeof() 1842 start = rva 1843 1844 bound_imports = [] 1845 while True: 1846 1847 bnd_descr = self.__unpack_data__( 1848 self.__IMAGE_BOUND_IMPORT_DESCRIPTOR_format__, 1849 self.__data__[rva:rva+bnd_descr_size], 1850 file_offset = rva) 1851 if bnd_descr is None: 1852 # If can't parse directory then silently return. 1853 # This directory does not necesarily have to be valid to 1854 # still have a valid PE file 1855 1856 self.__warnings.append( 1857 'The Bound Imports directory exists but can\'t be parsed.') 1858 1859 return 1860 1861 if bnd_descr.all_zeroes(): 1862 break 1863 1864 rva += bnd_descr.sizeof() 1865 1866 forwarder_refs = [] 1867 for idx in xrange(bnd_descr.NumberOfModuleForwarderRefs): 1868 # Both structures IMAGE_BOUND_IMPORT_DESCRIPTOR and 1869 # IMAGE_BOUND_FORWARDER_REF have the same size. 1870 bnd_frwd_ref = self.__unpack_data__( 1871 self.__IMAGE_BOUND_FORWARDER_REF_format__, 1872 self.__data__[rva:rva+bnd_descr_size], 1873 file_offset = rva) 1874 # OC Patch: 1875 if not bnd_frwd_ref: 1876 raise PEFormatError( 1877 "IMAGE_BOUND_FORWARDER_REF cannot be read") 1878 rva += bnd_frwd_ref.sizeof() 1879 1880 name_str = self.get_string_from_data( 1881 start+bnd_frwd_ref.OffsetModuleName, self.__data__) 1882 1883 if not name_str: 1884 break 1885 forwarder_refs.append(BoundImportRefData( 1886 struct = bnd_frwd_ref, 1887 name = name_str)) 1888 1889 name_str = self.get_string_from_data( 1890 start+bnd_descr.OffsetModuleName, self.__data__) 1891 1892 if not name_str: 1893 break 1894 bound_imports.append( 1895 BoundImportDescData( 1896 struct = bnd_descr, 1897 name = name_str, 1898 entries = forwarder_refs)) 1899 1900 return bound_imports 1901 1902 1903 def parse_directory_tls(self, rva, size): 1904 """""" 1905 1906 if self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE: 1907 format = self.__IMAGE_TLS_DIRECTORY_format__ 1908 1909 elif self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE_PLUS: 1910 format = self.__IMAGE_TLS_DIRECTORY64_format__ 1911 1912 tls_struct = self.__unpack_data__( 1913 format, 1914 self.get_data(rva), 1915 file_offset = self.get_offset_from_rva(rva)) 1916 1917 if not tls_struct: 1918 return None 1919 1920 return TlsData( struct = tls_struct ) 1921 1922 1923 def parse_relocations_directory(self, rva, size): 1924 """""" 1925 1926 rlc = Structure(self.__IMAGE_BASE_RELOCATION_format__) 1927 rlc_size = rlc.sizeof() 1928 end = rva+size 1929 1930 relocations = [] 1931 while rva<end: 1932 1933 # OC Patch: 1934 # Malware that has bad rva entries will cause an error. 1935 # Just continue on after an exception 1936 # 1937 try: 1938 rlc = self.__unpack_data__( 1939 self.__IMAGE_BASE_RELOCATION_format__, 1940 self.get_data(rva, rlc_size), 1941 file_offset = self.get_offset_from_rva(rva) ) 1942 except PEFormatError: 1943 self.__warnings.append( 1944 'Invalid relocation information. Can\'t read ' + 1945 'data at RVA: 0x%x' % rva) 1946 rlc = None 1947 1948 if not rlc: 1949 break 1950 1951 reloc_entries = self.parse_relocations( 1952 rva+rlc_size, rlc.VirtualAddress, rlc.SizeOfBlock-rlc_size) 1953 1954 relocations.append( 1955 BaseRelocationData( 1956 struct = rlc, 1957 entries = reloc_entries)) 1958 1959 if not rlc.SizeOfBlock: 1960 break 1961 rva += rlc.SizeOfBlock 1962 1963 return relocations 1964 1965 1966 def parse_relocations(self, data_rva, rva, size): 1967 """""" 1968 1969 data = self.get_data(data_rva, size) 1970 1971 entries = [] 1972 for idx in xrange(len(data)/2): 1973 word = struct.unpack('<H', data[idx*2:(idx+1)*2])[0] 1974 reloc_type = (word>>12) 1975 reloc_offset = (word&0x0fff) 1976 entries.append( 1977 RelocationData( 1978 type = reloc_type, 1979 rva = reloc_offset+rva)) 1980 1981 return entries 1982 1983 1984 def parse_debug_directory(self, rva, size): 1985 """""" 1986 1987 dbg = Structure(self.__IMAGE_DEBUG_DIRECTORY_format__) 1988 dbg_size = dbg.sizeof() 1989 1990 debug = [] 1991 for idx in xrange(size/dbg_size): 1992 try: 1993 data = self.get_data(rva+dbg_size*idx, dbg_size) 1994 except PEFormatError, e: 1995 self.__warnings.append( 1996 'Invalid debug information. Can\'t read ' + 1997 'data at RVA: 0x%x' % rva) 1998 return None 1999 2000 dbg = self.__unpack_data__( 2001 self.__IMAGE_DEBUG_DIRECTORY_format__, 2002 data, file_offset = self.get_offset_from_rva(rva+dbg_size*idx)) 2003 2004 if not dbg: 2005 return None 2006 2007 debug.append( 2008 DebugData( 2009 struct = dbg)) 2010 2011 return debug 2012 2013 2014 def parse_resources_directory(self, rva, size=0, base_rva = None, level = 0): 2015 """Parse the resources directory. 2016 2017 Given the rva of the resources directory, it will process all 2018 its entries. 2019 2020 The root will have the corresponding member of its structure, 2021 IMAGE_RESOURCE_DIRECTORY plus 'entries', a list of all the 2022 entries in the directory. 2023 2024 Those entries will have, correspondingly, all the structure's 2025 members (IMAGE_RESOURCE_DIRECTORY_ENTRY) and an additional one, 2026 "directory", pointing to the IMAGE_RESOURCE_DIRECTORY structure 2027 representing upper layers of the tree. This one will also have 2028 an 'entries' attribute, pointing to the 3rd, and last, level. 2029 Another directory with more entries. Those last entries will 2030 have a new atribute (both 'leaf' or 'data_entry' can be used to 2031 access it). This structure finally points to the resource data. 2032 All the members of this structure, IMAGE_RESOURCE_DATA_ENTRY, 2033 are available as its attributes. 2034 """ 2035 2036 # OC Patch: 2037 original_rva = rva 2038 2039 if base_rva is None: 2040 base_rva = rva 2041 2042 resources_section = self.get_section_by_rva(rva) 2043 2044 try: 2045 # If the RVA is invalid all would blow up. Some EXEs seem to be 2046 # specially nasty and have an invalid RVA. 2047 data = self.get_data(rva) 2048 except PEFormatError, e: 2049 self.__warnings.append( 2050 'Invalid resources directory. Can\'t read ' + 2051 'directory data at RVA: 0x%x' % rva) 2052 return None 2053 2054 # Get the resource directory structure, that is, the header 2055 # of the table preceding the actual entries 2056 # 2057 resource_dir = self.__unpack_data__( 2058 self.__IMAGE_RESOURCE_DIRECTORY_format__, data, 2059 file_offset = self.get_offset_from_rva(rva) ) 2060 if resource_dir is None: 2061 # If can't parse resources directory then silently return. 2062 # This directory does not necesarily have to be valid to 2063 # still have a valid PE file 2064 self.__warnings.append( 2065 'Invalid resources directory. Can\'t parse ' + 2066 'directory data at RVA: 0x%x' % rva) 2067 return None 2068 2069 dir_entries = [] 2070 2071 # Advance the rva to the positon immediately following the directory 2072 # table header and pointing to the first entry in the table 2073 # 2074 rva += resource_dir.sizeof() 2075 2076 number_of_entries = ( 2077 resource_dir.NumberOfNamedEntries + 2078 resource_dir.NumberOfIdEntries ) 2079 2080 strings_to_postprocess = list() 2081 2082 for idx in xrange(number_of_entries): 2083 2084 res = self.parse_resource_entry(rva) 2085 if res is None: 2086 self.__warnings.append( 2087 'Error parsing the resources directory, ' + 2088 'Entry %d is invalid, RVA = 0x%x. ' % 2089 (idx, rva) ) 2090 break 2091 2092 2093 entry_name = None 2094 entry_id = None 2095 2096 # If all named entries have been processed, only Id ones 2097 # remain 2098 2099 if idx >= resource_dir.NumberOfNamedEntries: 2100 entry_id = res.Name 2101 else: 2102 ustr_offset = base_rva+res.NameOffset 2103 try: 2104 #entry_name = self.get_string_u_at_rva(ustr_offset, max_length=16) 2105 entry_name = UnicodeStringWrapperPostProcessor(self, ustr_offset) 2106 strings_to_postprocess.append(entry_name) 2107 2108 except PEFormatError, excp: 2109 self.__warnings.append( 2110 'Error parsing the resources directory, ' + 2111 'attempting to read entry name. ' + 2112 'Can\'t read unicode string at offset 0x%x' % 2113 (ustr_offset) ) 2114 2115 2116 if res.DataIsDirectory: 2117 # OC Patch: 2118 # 2119 # One trick malware can do is to recursively reference 2120 # the next directory. This causes hilarity to ensue when 2121 # trying to parse everything correctly. 2122 # If the original RVA given to this function is equal to 2123 # the next one to parse, we assume that it's a trick. 2124 # Instead of raising a PEFormatError this would skip some 2125 # reasonable data so we just break. 2126 # 2127 # 9ee4d0a0caf095314fd7041a3e4404dc is the offending sample 2128 if original_rva == (base_rva + res.OffsetToDirectory): 2129 2130 break 2131 2132 else: 2133 entry_directory = self.parse_resources_directory( 2134 base_rva+res.OffsetToDirectory, 2135 base_rva=base_rva, level = level+1) 2136 2137 if not entry_directory: 2138 break 2139 dir_entries.append( 2140 ResourceDirEntryData( 2141 struct = res, 2142 name = entry_name, 2143 id = entry_id, 2144 directory = entry_directory)) 2145 2146 else: 2147 struct = self.parse_resource_data_entry( 2148 base_rva + res.OffsetToDirectory) 2149 2150 if struct: 2151 entry_data = ResourceDataEntryData( 2152 struct = struct, 2153 lang = res.Name & 0xff, 2154 sublang = (res.Name>>8) & 0xff) 2155 2156 dir_entries.append( 2157 ResourceDirEntryData( 2158 struct = res, 2159 name = entry_name, 2160 id = entry_id, 2161 data = entry_data)) 2162 2163 else: 2164 break 2165 2166 2167 2168 # Check if this entry contains version information 2169 # 2170 if level == 0 and res.Id == RESOURCE_TYPE['RT_VERSION']: 2171 if len(dir_entries)>0: 2172 last_entry = dir_entries[-1] 2173 2174 rt_version_struct = None 2175 try: 2176 rt_version_struct = last_entry.directory.entries[0].directory.entries[0].data.struct 2177 except: 2178 # Maybe a malformed directory structure...? 2179 # Lets ignore it 2180 pass 2181 2182 if rt_version_struct is not None: 2183 self.parse_version_information(rt_version_struct) 2184 2185 rva += res.sizeof() 2186 2187 2188 string_rvas = [s.get_rva() for s in strings_to_postprocess] 2189 string_rvas.sort() 2190 2191 for idx, s in enumerate(strings_to_postprocess): 2192 s.render_pascal_16() 2193 2194 2195 resource_directory_data = ResourceDirData( 2196 struct = resource_dir, 2197 entries = dir_entries) 2198 2199 return resource_directory_data 2200 2201 2202 def parse_resource_data_entry(self, rva): 2203 """Parse a data entry from the resources directory.""" 2204 2205 try: 2206 # If the RVA is invalid all would blow up. Some EXEs seem to be 2207 # specially nasty and have an invalid RVA. 2208 data = self.get_data(rva) 2209 except PEFormatError, excp: 2210 self.__warnings.append( 2211 'Error parsing a resource directory data entry, ' + 2212 'the RVA is invalid: 0x%x' % ( rva ) ) 2213 return None 2214 2215 data_entry = self.__unpack_data__( 2216 self.__IMAGE_RESOURCE_DATA_ENTRY_format__, data, 2217 file_offset = self.get_offset_from_rva(rva) ) 2218 2219 return data_entry 2220 2221 2222 def parse_resource_entry(self, rva): 2223 """Parse a directory entry from the resources directory.""" 2224 2225 resource = self.__unpack_data__( 2226 self.__IMAGE_RESOURCE_DIRECTORY_ENTRY_format__, self.get_data(rva), 2227 file_offset = self.get_offset_from_rva(rva) ) 2228 2229 if resource is None: 2230 return None 2231 2232 #resource.NameIsString = (resource.Name & 0x80000000L) >> 31 2233 resource.NameOffset = resource.Name & 0x7FFFFFFFL 2234 2235 resource.__pad = resource.Name & 0xFFFF0000L 2236 resource.Id = resource.Name & 0x0000FFFFL 2237 2238 resource.DataIsDirectory = (resource.OffsetToData & 0x80000000L) >> 31 2239 resource.OffsetToDirectory = resource.OffsetToData & 0x7FFFFFFFL 2240 2241 return resource 2242 2243 2244 def parse_version_information(self, version_struct): 2245 """Parse version information structure. 2246 2247 The date will be made available in three attributes of the PE object. 2248 2249 VS_VERSIONINFO will contain the first three fields of the main structure: 2250 'Length', 'ValueLength', and 'Type' 2251 2252 VS_FIXEDFILEINFO will hold the rest of the fields, accessible as sub-attributes: 2253 'Signature', 'StrucVersion', 'FileVersionMS', 'FileVersionLS', 2254 'ProductVersionMS', 'ProductVersionLS', 'FileFlagsMask', 'FileFlags', 2255 'FileOS', 'FileType', 'FileSubtype', 'FileDateMS', 'FileDateLS' 2256 2257 FileInfo is a list of all StringFileInfo and VarFileInfo structures. 2258 2259 StringFileInfo structures will have a list as an attribute named 'StringTable' 2260 containing all the StringTable structures. Each of those structures contains a 2261 dictionary 'entries' with all the key/value version information string pairs. 2262 2263 VarFileInfo structures will have a list as an attribute named 'Var' containing 2264 all Var structures. Each Var structure will have a dictionary as an attribute 2265 named 'entry' which will contain the name and value of the Var. 2266 """ 2267 2268 2269 # Retrieve the data for the version info resource 2270 # 2271 start_offset = self.get_offset_from_rva( version_struct.OffsetToData ) 2272 raw_data = self.__data__[ start_offset : start_offset+version_struct.Size ] 2273 2274 2275 # Map the main structure and the subsequent string 2276 # 2277 versioninfo_struct = self.__unpack_data__( 2278 self.__VS_VERSIONINFO_format__, raw_data, 2279 file_offset = start_offset ) 2280 2281 if versioninfo_struct is None: 2282 return 2283 2284 ustr_offset = version_struct.OffsetToData + versioninfo_struct.sizeof() 2285 try: 2286 versioninfo_string = self.get_string_u_at_rva( ustr_offset ) 2287 except PEFormatError, excp: 2288 self.__warnings.append( 2289 'Error parsing the version information, ' + 2290 'attempting to read VS_VERSION_INFO string. Can\'t ' + 2291 'read unicode string at offset 0x%x' % ( 2292 ustr_offset ) ) 2293 2294 versioninfo_string = None 2295 2296 # If the structure does not contain the expected name, it's assumed to be invalid 2297 # 2298 if versioninfo_string != u'VS_VERSION_INFO': 2299 2300 self.__warnings.append('Invalid VS_VERSION_INFO block') 2301 return 2302 2303 2304 # Set the PE object's VS_VERSIONINFO to this one 2305 # 2306 self.VS_VERSIONINFO = versioninfo_struct 2307 2308 # The the Key attribute to point to the unicode string identifying the structure 2309 # 2310 self.VS_VERSIONINFO.Key = versioninfo_string 2311 2312 2313 # Process the fixed version information, get the offset and structure 2314 # 2315 fixedfileinfo_offset = self.dword_align( 2316 versioninfo_struct.sizeof() + 2 * (len(versioninfo_string) + 1), 2317 version_struct.OffsetToData) 2318 fixedfileinfo_struct = self.__unpack_data__( 2319 self.__VS_FIXEDFILEINFO_format__, 2320 raw_data[fixedfileinfo_offset:], 2321 file_offset = start_offset+fixedfileinfo_offset ) 2322 2323 if not fixedfileinfo_struct: 2324 return 2325 2326 2327 # Set the PE object's VS_FIXEDFILEINFO to this one 2328 # 2329 self.VS_FIXEDFILEINFO = fixedfileinfo_struct 2330 2331 2332 # Start parsing all the StringFileInfo and VarFileInfo structures 2333 # 2334 2335 # Get the first one 2336 # 2337 stringfileinfo_offset = self.dword_align( 2338 fixedfileinfo_offset + fixedfileinfo_struct.sizeof(), 2339 version_struct.OffsetToData) 2340 original_stringfileinfo_offset = stringfileinfo_offset 2341 2342 2343 # Set the PE object's attribute that will contain them all. 2344 # 2345 self.FileInfo = list() 2346 2347 2348 while True: 2349 2350 # Process the StringFileInfo/VarFileInfo struct 2351 # 2352 stringfileinfo_struct = self.__unpack_data__( 2353 self.__StringFileInfo_format__, 2354 raw_data[stringfileinfo_offset:], 2355 file_offset = start_offset+stringfileinfo_offset ) 2356 2357 if stringfileinfo_struct is None: 2358 self.__warnings.append( 2359 'Error parsing StringFileInfo/VarFileInfo struct' ) 2360 return None 2361 2362 # Get the subsequent string defining the structure. 2363 # 2364 ustr_offset = ( version_struct.OffsetToData + 2365 stringfileinfo_offset + versioninfo_struct.sizeof() ) 2366 try: 2367 stringfileinfo_string = self.get_string_u_at_rva( ustr_offset ) 2368 except PEFormatError, excp: 2369 self.__warnings.append( 2370 'Error parsing the version information, ' + 2371 'attempting to read StringFileInfo string. Can\'t ' + 2372 'read unicode string at offset 0x%x' % ( ustr_offset ) ) 2373 break 2374 2375 # Set such string as the Key attribute 2376 # 2377 stringfileinfo_struct.Key = stringfileinfo_string 2378 2379 2380 # Append the structure to the PE object's list 2381 # 2382 self.FileInfo.append(stringfileinfo_struct) 2383 2384 2385 # Parse a StringFileInfo entry 2386 # 2387 if stringfileinfo_string == u'StringFileInfo': 2388 2389 if stringfileinfo_struct.Type == 1 and stringfileinfo_struct.ValueLength == 0: 2390 2391 stringtable_offset = self.dword_align( 2392 stringfileinfo_offset + stringfileinfo_struct.sizeof() + 2393 2*(len(stringfileinfo_string)+1), 2394 version_struct.OffsetToData) 2395 2396 stringfileinfo_struct.StringTable = list() 2397 2398 # Process the String Table entries 2399 # 2400 while True: 2401 stringtable_struct = self.__unpack_data__( 2402 self.__StringTable_format__, 2403 raw_data[stringtable_offset:], 2404 file_offset = start_offset+stringtable_offset ) 2405 2406 if not stringtable_struct: 2407 break 2408 2409 ustr_offset = ( version_struct.OffsetToData + stringtable_offset + 2410 stringtable_struct.sizeof() ) 2411 try: 2412 stringtable_string = self.get_string_u_at_rva( ustr_offset ) 2413 except PEFormatError, excp: 2414 self.__warnings.append( 2415 'Error parsing the version information, ' + 2416 'attempting to read StringTable string. Can\'t ' + 2417 'read unicode string at offset 0x%x' % ( ustr_offset ) ) 2418 break 2419 2420 stringtable_struct.LangID = stringtable_string 2421 stringtable_struct.entries = dict() 2422 stringtable_struct.entries_offsets = dict() 2423 stringtable_struct.entries_lengths = dict() 2424 stringfileinfo_struct.StringTable.append(stringtable_struct) 2425 2426 entry_offset = self.dword_align( 2427 stringtable_offset + stringtable_struct.sizeof() + 2428 2*(len(stringtable_string)+1), 2429 version_struct.OffsetToData) 2430 2431 # Process all entries in the string table 2432 # 2433 2434 while entry_offset < stringtable_offset + stringtable_struct.Length: 2435 2436 string_struct = self.__unpack_data__( 2437 self.__String_format__, raw_data[entry_offset:], 2438 file_offset = start_offset+entry_offset ) 2439 2440 if not string_struct: 2441 break 2442 2443 ustr_offset = ( version_struct.OffsetToData + entry_offset + 2444 string_struct.sizeof() ) 2445 try: 2446 key = self.get_string_u_at_rva( ustr_offset ) 2447 key_offset = self.get_offset_from_rva( ustr_offset ) 2448 except PEFormatError, excp: 2449 self.__warnings.append( 2450 'Error parsing the version information, ' + 2451 'attempting to read StringTable Key string. Can\'t ' + 2452 'read unicode string at offset 0x%x' % ( ustr_offset ) ) 2453 break 2454 2455 value_offset = self.dword_align( 2456 2*(len(key)+1) + entry_offset + string_struct.sizeof(), 2457 version_struct.OffsetToData) 2458 2459 ustr_offset = version_struct.OffsetToData + value_offset 2460 try: 2461 value = self.get_string_u_at_rva( ustr_offset, 2462 max_length = string_struct.ValueLength ) 2463 value_offset = self.get_offset_from_rva( ustr_offset ) 2464 except PEFormatError, excp: 2465 self.__warnings.append( 2466 'Error parsing the version information, ' + 2467 'attempting to read StringTable Value string. ' + 2468 'Can\'t read unicode string at offset 0x%x' % ( 2469 ustr_offset ) ) 2470 break 2471 2472 if string_struct.Length == 0: 2473 entry_offset = stringtable_offset + stringtable_struct.Length 2474 else: 2475 entry_offset = self.dword_align( 2476 string_struct.Length+entry_offset, version_struct.OffsetToData) 2477 2478 key_as_char = [] 2479 for c in key: 2480 if ord(c)>128: 2481 key_as_char.append('\\x%02x' %ord(c)) 2482 else: 2483 key_as_char.append(c) 2484 2485 key_as_char = ''.join(key_as_char) 2486 2487 setattr(stringtable_struct, key_as_char, value) 2488 stringtable_struct.entries[key] = value 2489 stringtable_struct.entries_offsets[key] = (key_offset, value_offset) 2490 stringtable_struct.entries_lengths[key] = (len(key), len(value)) 2491 2492 2493 stringtable_offset = self.dword_align( 2494 stringtable_struct.Length + stringtable_offset, 2495 version_struct.OffsetToData) 2496 if stringtable_offset >= stringfileinfo_struct.Length: 2497 break 2498 2499 # Parse a VarFileInfo entry 2500 # 2501 elif stringfileinfo_string == u'VarFileInfo': 2502 2503 varfileinfo_struct = stringfileinfo_struct 2504 varfileinfo_struct.name = 'VarFileInfo' 2505 2506 if varfileinfo_struct.Type == 1 and varfileinfo_struct.ValueLength == 0: 2507 2508 var_offset = self.dword_align( 2509 stringfileinfo_offset + varfileinfo_struct.sizeof() + 2510 2*(len(stringfileinfo_string)+1), 2511 version_struct.OffsetToData) 2512 2513 varfileinfo_struct.Var = list() 2514 2515 # Process all entries 2516 # 2517 2518 while True: 2519 var_struct = self.__unpack_data__( 2520 self.__Var_format__, 2521 raw_data[var_offset:], 2522 file_offset = start_offset+var_offset ) 2523 2524 if not var_struct: 2525 break 2526 2527 ustr_offset = ( version_struct.OffsetToData + var_offset + 2528 var_struct.sizeof() ) 2529 try: 2530 var_string = self.get_string_u_at_rva( ustr_offset ) 2531 except PEFormatError, excp: 2532 self.__warnings.append( 2533 'Error parsing the version information, ' + 2534 'attempting to read VarFileInfo Var string. ' + 2535 'Can\'t read unicode string at offset 0x%x' % (ustr_offset)) 2536 break 2537 2538 2539 varfileinfo_struct.Var.append(var_struct) 2540 2541 varword_offset = self.dword_align( 2542 2*(len(var_string)+1) + var_offset + var_struct.sizeof(), 2543 version_struct.OffsetToData) 2544 orig_varword_offset = varword_offset 2545 2546 while varword_offset < orig_varword_offset + var_struct.ValueLength: 2547 word1 = self.get_word_from_data( 2548 raw_data[varword_offset:varword_offset+2], 0) 2549 word2 = self.get_word_from_data( 2550 raw_data[varword_offset+2:varword_offset+4], 0) 2551 varword_offset += 4 2552 2553 var_struct.entry = {var_string: '0x%04x 0x%04x' % (word1, word2)} 2554 2555 var_offset = self.dword_align( 2556 var_offset+var_struct.Length, version_struct.OffsetToData) 2557 2558 if var_offset <= var_offset+var_struct.Length: 2559 break 2560 2561 2562 2563 # Increment and align the offset 2564 # 2565 stringfileinfo_offset = self.dword_align( 2566 stringfileinfo_struct.Length+stringfileinfo_offset, 2567 version_struct.OffsetToData) 2568 2569 # Check if all the StringFileInfo and VarFileInfo items have been processed 2570 # 2571 if stringfileinfo_struct.Length == 0 or stringfileinfo_offset >= versioninfo_struct.Length: 2572 break 2573 2574 2575 2576 def parse_export_directory(self, rva, size): 2577 """Parse the export directory. 2578 2579 Given the rva of the export directory, it will process all 2580 its entries. 2581 2582 The exports will be made available through a list "exports" 2583 containing a tuple with the following elements: 2584 2585 (ordinal, symbol_address, symbol_name) 2586 2587 And also through a dicionary "exports_by_ordinal" whose keys 2588 will be the ordinals and the values tuples of the from: 2589 2590 (symbol_address, symbol_name) 2591 2592 The symbol addresses are relative, not absolute. 2593 """ 2594 2595 try: 2596 export_dir = self.__unpack_data__( 2597 self.__IMAGE_EXPORT_DIRECTORY_format__, self.get_data(rva), 2598 file_offset = self.get_offset_from_rva(rva) ) 2599 except PEFormatError: 2600 self.__warnings.append( 2601 'Error parsing export directory at RVA: 0x%x' % ( rva ) ) 2602 return 2603 2604 if not export_dir: 2605 return 2606 2607 try: 2608 address_of_names = self.get_data( 2609 export_dir.AddressOfNames, export_dir.NumberOfNames*4) 2610 address_of_name_ordinals = self.get_data( 2611 export_dir.AddressOfNameOrdinals, export_dir.NumberOfNames*4) 2612 address_of_functions = self.get_data( 2613 export_dir.AddressOfFunctions, export_dir.NumberOfFunctions*4) 2614 except PEFormatError: 2615 self.__warnings.append( 2616 'Error parsing export directory at RVA: 0x%x' % ( rva ) ) 2617 return 2618 2619 exports = [] 2620 2621 for i in xrange(export_dir.NumberOfNames): 2622 2623 2624 symbol_name = self.get_string_at_rva( 2625 self.get_dword_from_data(address_of_names, i)) 2626 2627 symbol_ordinal = self.get_word_from_data( 2628 address_of_name_ordinals, i) 2629 2630 2631 if symbol_ordinal*4<len(address_of_functions): 2632 symbol_address = self.get_dword_from_data( 2633 address_of_functions, symbol_ordinal) 2634 else: 2635 # Corrupt? a bad pointer... we assume it's all 2636 # useless, no exports 2637 return None 2638 2639 # If the funcion's rva points within the export directory 2640 # it will point to a string with the forwarded symbol's string 2641 # instead of pointing the the function start address. 2642 2643 if symbol_address>=rva and symbol_address<rva+size: 2644 forwarder_str = self.get_string_at_rva(symbol_address) 2645 else: 2646 forwarder_str = None 2647 2648 2649 exports.append( 2650 ExportData( 2651 ordinal = export_dir.Base+symbol_ordinal, 2652 address = symbol_address, 2653 name = symbol_name, 2654 forwarder = forwarder_str)) 2655 2656 ordinals = [exp.ordinal for exp in exports] 2657 2658 for idx in xrange(export_dir.NumberOfFunctions): 2659 2660 if not idx+export_dir.Base in ordinals: 2661 symbol_address = self.get_dword_from_data( 2662 address_of_functions, 2663 idx) 2664 2665 # 2666 # Checking for forwarder again. 2667 # 2668 if symbol_address>=rva and symbol_address<rva+size: 2669 forwarder_str = self.get_string_at_rva(symbol_address) 2670 else: 2671 forwarder_str = None 2672 2673 exports.append( 2674 ExportData( 2675 ordinal = export_dir.Base+idx, 2676 address = symbol_address, 2677 name = None, 2678 forwarder = forwarder_str)) 2679 2680 return ExportDirData( 2681 struct = export_dir, 2682 symbols = exports) 2683 2684 2685 def dword_align(self, offset, base): 2686 offset += base 2687 return (offset+3) - ((offset+3)%4) - base 2688 2689 2690 2691 def parse_delay_import_directory(self, rva, size): 2692 """Walk and parse the delay import directory.""" 2693 2694 import_descs = [] 2695 while True: 2696 try: 2697 # If the RVA is invalid all would blow up. Some PEs seem to be 2698 # specially nasty and have an invalid RVA. 2699 data = self.get_data(rva) 2700 except PEFormatError, e: 2701 self.__warnings.append( 2702 'Error parsing the Delay import directory at RVA: 0x%x' % ( rva ) ) 2703 break 2704 2705 import_desc = self.__unpack_data__( 2706 self.__IMAGE_DELAY_IMPORT_DESCRIPTOR_format__, 2707 data, file_offset = self.get_offset_from_rva(rva) ) 2708 2709 2710 # If the structure is all zeores, we reached the end of the list 2711 if not import_desc or import_desc.all_zeroes(): 2712 break 2713 2714 2715 rva += import_desc.sizeof() 2716 2717 try: 2718 import_data = self.parse_imports( 2719 import_desc.pINT, 2720 import_desc.pIAT, 2721 None) 2722 except PEFormatError, e: 2723 self.__warnings.append( 2724 'Error parsing the Delay import directory. ' + 2725 'Invalid import data at RVA: 0x%x' % ( rva ) ) 2726 break 2727 2728 if not import_data: 2729 continue 2730 2731 2732 dll = self.get_string_at_rva(import_desc.szName) 2733 if dll: 2734 import_descs.append( 2735 ImportDescData( 2736 struct = import_desc, 2737 imports = import_data, 2738 dll = dll)) 2739 2740 return import_descs 2741 2742 2743 2744 def parse_import_directory(self, rva, size): 2745 """Walk and parse the import directory.""" 2746 2747 import_descs = [] 2748 while True: 2749 try: 2750 # If the RVA is invalid all would blow up. Some EXEs seem to be 2751 # specially nasty and have an invalid RVA. 2752 data = self.get_data(rva) 2753 except PEFormatError, e: 2754 self.__warnings.append( 2755 'Error parsing the Import directory at RVA: 0x%x' % ( rva ) ) 2756 break 2757 2758 import_desc = self.__unpack_data__( 2759 self.__IMAGE_IMPORT_DESCRIPTOR_format__, 2760 data, file_offset = self.get_offset_from_rva(rva) ) 2761 2762 # If the structure is all zeores, we reached the end of the list 2763 if not import_desc or import_desc.all_zeroes(): 2764 break 2765 2766 rva += import_desc.sizeof() 2767 2768 try: 2769 import_data = self.parse_imports( 2770 import_desc.OriginalFirstThunk, 2771 import_desc.FirstThunk, 2772 import_desc.ForwarderChain) 2773 except PEFormatError, excp: 2774 self.__warnings.append( 2775 'Error parsing the Import directory. ' + 2776 'Invalid Import data at RVA: 0x%x' % ( rva ) ) 2777 break 2778 #raise excp 2779 2780 if not import_data: 2781 continue 2782 2783 dll = self.get_string_at_rva(import_desc.Name) 2784 if dll: 2785 import_descs.append( 2786 ImportDescData( 2787 struct = import_desc, 2788 imports = import_data, 2789 dll = dll)) 2790 2791 return import_descs 2792 2793 2794 2795 def parse_imports(self, original_first_thunk, first_thunk, forwarder_chain): 2796 """Parse the imported symbols. 2797 2798 It will fill a list, which will be avalable as the dictionary 2799 attribute "imports". Its keys will be the DLL names and the values 2800 all the symbols imported from that object. 2801 """ 2802 2803 imported_symbols = [] 2804 imports_section = self.get_section_by_rva(first_thunk) 2805 if not imports_section: 2806 raise PEFormatError, 'Invalid/corrupt imports.' 2807 2808 2809 # Import Lookup Table. Contains ordinals or pointers to strings. 2810 ilt = self.get_import_table(original_first_thunk) 2811 # Import Address Table. May have identical content to ILT if 2812 # PE file is not bounded, Will contain the address of the 2813 # imported symbols once the binary is loaded or if it is already 2814 # bound. 2815 iat = self.get_import_table(first_thunk) 2816 2817 # OC Patch: 2818 # Would crash if iat or ilt had None type 2819 if not iat and not ilt: 2820 raise PEFormatError( 2821 'Invalid Import Table information. ' + 2822 'Both ILT and IAT appear to be broken.') 2823 2824 if not iat and ilt: 2825 table = ilt 2826 elif iat and not ilt: 2827 table = iat 2828 elif ilt and ((len(ilt) and len(iat)==0) or (len(ilt) == len(iat))): 2829 table = ilt 2830 elif (ilt and len(ilt))==0 and (iat and len(iat)): 2831 table = iat 2832 else: 2833 return None 2834 2835 for idx in xrange(len(table)): 2836 2837 imp_ord = None 2838 imp_hint = None 2839 imp_name = None 2840 hint_name_table_rva = None 2841 2842 if table[idx].AddressOfData: 2843 2844 if self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE: 2845 ordinal_flag = IMAGE_ORDINAL_FLAG 2846 elif self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE_PLUS: 2847 ordinal_flag = IMAGE_ORDINAL_FLAG64 2848 2849 # If imported by ordinal, we will append the ordinal number 2850 # 2851 if table[idx].AddressOfData & ordinal_flag: 2852 import_by_ordinal = True 2853 imp_ord = table[idx].AddressOfData & 0xffff 2854 imp_name = None 2855 else: 2856 import_by_ordinal = False 2857 try: 2858 hint_name_table_rva = table[idx].AddressOfData & 0x7fffffff 2859 data = self.get_data(hint_name_table_rva, 2) 2860 # Get the Hint 2861 imp_hint = self.get_word_from_data(data, 0) 2862 imp_name = self.get_string_at_rva(table[idx].AddressOfData+2) 2863 except PEFormatError, e: 2864 pass 2865 2866 imp_address = first_thunk+self.OPTIONAL_HEADER.ImageBase+idx*4 2867 2868 if iat and ilt and ilt[idx].AddressOfData != iat[idx].AddressOfData: 2869 imp_bound = iat[idx].AddressOfData 2870 else: 2871 imp_bound = None 2872 2873 if imp_name != '' and (imp_ord or imp_name): 2874 imported_symbols.append( 2875 ImportData( 2876 import_by_ordinal = import_by_ordinal, 2877 ordinal = imp_ord, 2878 hint = imp_hint, 2879 name = imp_name, 2880 bound = imp_bound, 2881 address = imp_address, 2882 hint_name_table_rva = hint_name_table_rva)) 2883 2884 return imported_symbols 2885 2886 2887 2888 def get_import_table(self, rva): 2889 2890 table = [] 2891 2892 while True and rva: 2893 try: 2894 data = self.get_data(rva) 2895 except PEFormatError, e: 2896 self.__warnings.append( 2897 'Error parsing the import table. ' + 2898 'Invalid data at RVA: 0x%x' % ( rva ) ) 2899 return None 2900 2901 if self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE: 2902 format = self.__IMAGE_THUNK_DATA_format__ 2903 elif self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE_PLUS: 2904 format = self.__IMAGE_THUNK_DATA64_format__ 2905 2906 thunk_data = self.__unpack_data__( 2907 format, data, file_offset=self.get_offset_from_rva(rva) ) 2908 2909 if not thunk_data or thunk_data.all_zeroes(): 2910 break 2911 2912 rva += thunk_data.sizeof() 2913 2914 table.append(thunk_data) 2915 2916 return table 2917 2918 2919 def get_memory_mapped_image(self, max_virtual_address=0x10000000, ImageBase=None): 2920 """Returns the data corresponding to the memory layout of the PE file. 2921 2922 The data includes the PE header and the sections loaded at offsets 2923 corresponding to their relative virtual addresses. (the VirtualAddress 2924 section header member). 2925 Any offset in this data corresponds to the absolute memory address 2926 ImageBase+offset. 2927 2928 The optional argument 'max_virtual_address' provides with means of limiting 2929 which section are processed. 2930 Any section with their VirtualAddress beyond this value will be skipped. 2931 Normally, sections with values beyond this range are just there to confuse 2932 tools. It's a common trick to see in packed executables. 2933 2934 If the 'ImageBase' optional argument is supplied, the file's relocations 2935 will be applied to the image by calling the 'relocate_image()' method. 2936 """ 2937 2938 # Collect all sections in one code block 2939 data = self.header 2940 for section in self.sections: 2941 2942 # Miscellanous integrity tests. 2943 # Some packer will set these to bogus values to 2944 # make tools go nuts. 2945 # 2946 if section.Misc_VirtualSize == 0 or section.SizeOfRawData == 0: 2947 continue 2948 2949 if section.SizeOfRawData > len(self.__data__): 2950 continue 2951 2952 if section.PointerToRawData > len(self.__data__): 2953 continue 2954 2955 if section.VirtualAddress >= max_virtual_address: 2956 continue 2957 2958 padding_length = section.VirtualAddress - len(data) 2959 2960 if padding_length>0: 2961 data += '\0'*padding_length 2962 elif padding_length<0: 2963 data = data[:padding_length] 2964 2965 data += section.data 2966 2967 return data 2968 2969 2970 def get_data(self, rva, length=None): 2971 """Get data regardless of the section where it lies on. 2972 2973 Given a rva and the size of the chunk to retrieve, this method 2974 will find the section where the data lies and return the data. 2975 """ 2976 2977 s = self.get_section_by_rva(rva) 2978 2979 if not s: 2980 if rva<len(self.header): 2981 if length: 2982 end = rva+length 2983 else: 2984 end = None 2985 return self.header[rva:end] 2986 2987 raise PEFormatError, 'data at RVA can\'t be fetched. Corrupt header?' 2988 2989 return s.get_data(rva, length) 2990 2991 2992 def get_rva_from_offset(self, offset): 2993 """Get the rva corresponding to this file offset. """ 2994 2995 s = self.get_section_by_offset(offset) 2996 if not s: 2997 raise PEFormatError("specified offset (0x%x) doesn't belong to any section." % offset) 2998 return s.get_rva_from_offset(offset) 2999 3000 def get_offset_from_rva(self, rva): 3001 """Get the file offset corresponding to this rva. 3002 3003 Given a rva , this method will find the section where the 3004 data lies and return the offset within the file. 3005 """ 3006 3007 s = self.get_section_by_rva(rva) 3008 if not s: 3009 3010 raise PEFormatError, 'data at RVA can\'t be fetched. Corrupt header?' 3011 3012 return s.get_offset_from_rva(rva) 3013 3014 3015 def get_string_at_rva(self, rva): 3016 """Get an ASCII string located at the given address.""" 3017 3018 s = self.get_section_by_rva(rva) 3019 if not s: 3020 if rva<len(self.header): 3021 return self.get_string_from_data(rva, self.header) 3022 return None 3023 3024 return self.get_string_from_data(rva-s.VirtualAddress, s.data) 3025 3026 3027 def get_string_from_data(self, offset, data): 3028 """Get an ASCII string from within the data.""" 3029 3030 # OC Patch 3031 b = None 3032 3033 try: 3034 b = data[offset] 3035 except IndexError: 3036 return '' 3037 3038 s = '' 3039 while ord(b): 3040 s += b 3041 offset += 1 3042 try: 3043 b = data[offset] 3044 except IndexError: 3045 break 3046 3047 return s 3048 3049 3050 def get_string_u_at_rva(self, rva, max_length = 2**16): 3051 """Get an Unicode string located at the given address.""" 3052 3053 try: 3054 # If the RVA is invalid all would blow up. Some EXEs seem to be 3055 # specially nasty and have an invalid RVA. 3056 data = self.get_data(rva, 2) 3057 except PEFormatError, e: 3058 return None 3059 3060 #length = struct.unpack('<H', data)[0] 3061 3062 s = u'' 3063 for idx in xrange(max_length): 3064 try: 3065 uchr = struct.unpack('<H', self.get_data(rva+2*idx, 2))[0] 3066 except struct.error: 3067 break 3068 3069 if unichr(uchr) == u'\0': 3070 break 3071 s += unichr(uchr) 3072 3073 return s 3074 3075 3076 def get_section_by_offset(self, offset): 3077 """Get the section containing the given file offset.""" 3078 3079 sections = [s for s in self.sections if s.contains_offset(offset)] 3080 3081 if sections: 3082 return sections[0] 3083 3084 return None 3085 3086 3087 def get_section_by_rva(self, rva): 3088 """Get the section containing the given address.""" 3089 3090 sections = [s for s in self.sections if s.contains_rva(rva)] 3091 3092 if sections: 3093 return sections[0] 3094 3095 return None 3096 3097 def __str__(self): 3098 return self.dump_info() 3099 3100 3101 def print_info(self): 3102 """Print all the PE header information in a human readable from.""" 3103 print self.dump_info() 3104 3105 3106 def dump_info(self, dump=None): 3107 """Dump all the PE header information into human readable string.""" 3108 3109 3110 if dump is None: 3111 dump = Dump() 3112 3113 warnings = self.get_warnings() 3114 if warnings: 3115 dump.add_header('Parsing Warnings') 3116 for warning in warnings: 3117 dump.add_line(warning) 3118 dump.add_newline() 3119 3120 3121 dump.add_header('DOS_HEADER') 3122 dump.add_lines(self.DOS_HEADER.dump()) 3123 dump.add_newline() 3124 3125 dump.add_header('NT_HEADERS') 3126 dump.add_lines(self.NT_HEADERS.dump()) 3127 dump.add_newline() 3128 3129 dump.add_header('FILE_HEADER') 3130 dump.add_lines(self.FILE_HEADER.dump()) 3131 3132 image_flags = self.retrieve_flags(IMAGE_CHARACTERISTICS, 'IMAGE_FILE_') 3133 3134 dump.add('Flags: ') 3135 flags = [] 3136 for flag in image_flags: 3137 if getattr(self.FILE_HEADER, flag[0]): 3138 flags.append(flag[0]) 3139 dump.add_line(', '.join(flags)) 3140 dump.add_newline() 3141 3142 if hasattr(self, 'OPTIONAL_HEADER') and self.OPTIONAL_HEADER is not None: 3143 dump.add_header('OPTIONAL_HEADER') 3144 dump.add_lines(self.OPTIONAL_HEADER.dump()) 3145 3146 dll_characteristics_flags = self.retrieve_flags(DLL_CHARACTERISTICS, 'IMAGE_DLL_CHARACTERISTICS_') 3147 3148 dump.add('DllCharacteristics: ') 3149 flags = [] 3150 for flag in dll_characteristics_flags: 3151 if getattr(self.OPTIONAL_HEADER, flag[0]): 3152 flags.append(flag[0]) 3153 dump.add_line(', '.join(flags)) 3154 dump.add_newline() 3155 3156 3157 dump.add_header('PE Sections') 3158 3159 section_flags = self.retrieve_flags(SECTION_CHARACTERISTICS, 'IMAGE_SCN_') 3160 3161 for section in self.sections: 3162 dump.add_lines(section.dump()) 3163 dump.add('Flags: ') 3164 flags = [] 3165 for flag in section_flags: 3166 if getattr(section, flag[0]): 3167 flags.append(flag[0]) 3168 dump.add_line(', '.join(flags)) 3169 dump.add_line('Entropy: %f (Min=0.0, Max=8.0)' % section.get_entropy() ) 3170 if md5 is not None: 3171 dump.add_line('MD5 hash: %s' % section.get_hash_md5() ) 3172 if sha1 is not None: 3173 dump.add_line('SHA-1 hash: %s' % section.get_hash_sha1() ) 3174 if sha256 is not None: 3175 dump.add_line('SHA-256 hash: %s' % section.get_hash_sha256() ) 3176 if sha512 is not None: 3177 dump.add_line('SHA-512 hash: %s' % section.get_hash_sha512() ) 3178 dump.add_newline() 3179 3180 3181 3182 if (hasattr(self, 'OPTIONAL_HEADER') and 3183 hasattr(self.OPTIONAL_HEADER, 'DATA_DIRECTORY') ): 3184 3185 dump.add_header('Directories') 3186 for idx in xrange(len(self.OPTIONAL_HEADER.DATA_DIRECTORY)): 3187 directory = self.OPTIONAL_HEADER.DATA_DIRECTORY[idx] 3188 dump.add_lines(directory.dump()) 3189 dump.add_newline() 3190 3191 3192 if hasattr(self, 'VS_VERSIONINFO'): 3193 dump.add_header('Version Information') 3194 dump.add_lines(self.VS_VERSIONINFO.dump()) 3195 dump.add_newline() 3196 3197 if hasattr(self, 'VS_FIXEDFILEINFO'): 3198 dump.add_lines(self.VS_FIXEDFILEINFO.dump()) 3199 dump.add_newline() 3200 3201 if hasattr(self, 'FileInfo'): 3202 for entry in self.FileInfo: 3203 dump.add_lines(entry.dump()) 3204 dump.add_newline() 3205 3206 if hasattr(entry, 'StringTable'): 3207 for st_entry in entry.StringTable: 3208 [dump.add_line(' '+line) for line in st_entry.dump()] 3209 dump.add_line(' LangID: '+st_entry.LangID) 3210 dump.add_newline() 3211 for str_entry in st_entry.entries.items(): 3212 dump.add_line(' '+str_entry[0]+': '+str_entry[1]) 3213 dump.add_newline() 3214 3215 elif hasattr(entry, 'Var'): 3216 for var_entry in entry.Var: 3217 if hasattr(var_entry, 'entry'): 3218 [dump.add_line(' '+line) for line in var_entry.dump()] 3219 dump.add_line( 3220 ' ' + var_entry.entry.keys()[0] + 3221 ': ' + var_entry.entry.values()[0]) 3222 3223 dump.add_newline() 3224 3225 3226 3227 if hasattr(self, 'DIRECTORY_ENTRY_EXPORT'): 3228 dump.add_header('Exported symbols') 3229 dump.add_lines(self.DIRECTORY_ENTRY_EXPORT.struct.dump()) 3230 dump.add_newline() 3231 dump.add_line('%-10s %-10s %s' % ('Ordinal', 'RVA', 'Name')) 3232 for export in self.DIRECTORY_ENTRY_EXPORT.symbols: 3233 dump.add('%-10d 0x%08Xh %s' % ( 3234 export.ordinal, export.address, export.name)) 3235 if export.forwarder: 3236 dump.add_line(' forwarder: %s' % export.forwarder) 3237 else: 3238 dump.add_newline() 3239 3240 dump.add_newline() 3241 3242 if hasattr(self, 'DIRECTORY_ENTRY_IMPORT'): 3243 dump.add_header('Imported symbols') 3244 for module in self.DIRECTORY_ENTRY_IMPORT: 3245 dump.add_lines(module.struct.dump()) 3246 dump.add_newline() 3247 for symbol in module.imports: 3248 3249 if symbol.import_by_ordinal is True: 3250 dump.add('%s Ordinal[%s] (Imported by Ordinal)' % ( 3251 module.dll, str(symbol.ordinal))) 3252 else: 3253 dump.add('%s.%s Hint[%s]' % ( 3254 module.dll, symbol.name, str(symbol.hint))) 3255 3256 if symbol.bound: 3257 dump.add_line(' Bound: 0x%08X' % (symbol.bound)) 3258 else: 3259 dump.add_newline() 3260 dump.add_newline() 3261 3262 3263 if hasattr(self, 'DIRECTORY_ENTRY_BOUND_IMPORT'): 3264 dump.add_header('Bound imports') 3265 for bound_imp_desc in self.DIRECTORY_ENTRY_BOUND_IMPORT: 3266 3267 dump.add_lines(bound_imp_desc.struct.dump()) 3268 dump.add_line('DLL: %s' % bound_imp_desc.name) 3269 dump.add_newline() 3270 3271 for bound_imp_ref in bound_imp_desc.entries: 3272 dump.add_lines(bound_imp_ref.struct.dump(), 4) 3273 dump.add_line('DLL: %s' % bound_imp_ref.name, 4) 3274 dump.add_newline() 3275 3276 3277 if hasattr(self, 'DIRECTORY_ENTRY_DELAY_IMPORT'): 3278 dump.add_header('Delay Imported symbols') 3279 for module in self.DIRECTORY_ENTRY_DELAY_IMPORT: 3280 3281 dump.add_lines(module.struct.dump()) 3282 dump.add_newline() 3283 3284 for symbol in module.imports: 3285 if symbol.import_by_ordinal is True: 3286 dump.add('%s Ordinal[%s] (Imported by Ordinal)' % ( 3287 module.dll, str(symbol.ordinal))) 3288 else: 3289 dump.add('%s.%s Hint[%s]' % ( 3290 module.dll, symbol.name, str(symbol.hint))) 3291 3292 if symbol.bound: 3293 dump.add_line(' Bound: 0x%08X' % (symbol.bound)) 3294 else: 3295 dump.add_newline() 3296 dump.add_newline() 3297 3298 3299 if hasattr(self, 'DIRECTORY_ENTRY_RESOURCE'): 3300 dump.add_header('Resource directory') 3301 3302 dump.add_lines(self.DIRECTORY_ENTRY_RESOURCE.struct.dump()) 3303 3304 for resource_type in self.DIRECTORY_ENTRY_RESOURCE.entries: 3305 3306 if resource_type.name is not None: 3307 dump.add_line('Name: [%s]' % resource_type.name, 2) 3308 else: 3309 dump.add_line('Id: [0x%X] (%s)' % ( 3310 resource_type.struct.Id, RESOURCE_TYPE.get( 3311 resource_type.struct.Id, '-')), 3312 2) 3313 3314 dump.add_lines(resource_type.struct.dump(), 2) 3315 3316 if hasattr(resource_type, 'directory'): 3317 3318 dump.add_lines(resource_type.directory.struct.dump(), 4) 3319 3320 for resource_id in resource_type.directory.entries: 3321 3322 if resource_id.name is not None: 3323 dump.add_line('Name: [%s]' % resource_id.name, 6) 3324 else: 3325 dump.add_line('Id: [0x%X]' % resource_id.struct.Id, 6) 3326 3327 dump.add_lines(resource_id.struct.dump(), 6) 3328 3329 if hasattr(resource_id, 'directory'): 3330 dump.add_lines(resource_id.directory.struct.dump(), 8) 3331 3332 for resource_lang in resource_id.directory.entries: 3333 # dump.add_line('\\--- LANG [%d,%d][%s]' % ( 3334 # resource_lang.data.lang, 3335 # resource_lang.data.sublang, 3336 # LANG[resource_lang.data.lang]), 8) 3337 dump.add_lines(resource_lang.struct.dump(), 10) 3338 dump.add_lines(resource_lang.data.struct.dump(), 12) 3339 dump.add_newline() 3340 3341 dump.add_newline() 3342 3343 3344 if ( hasattr(self, 'DIRECTORY_ENTRY_TLS') and 3345 self.DIRECTORY_ENTRY_TLS and 3346 self.DIRECTORY_ENTRY_TLS.struct ): 3347 3348 dump.add_header('TLS') 3349 dump.add_lines(self.DIRECTORY_ENTRY_TLS.struct.dump()) 3350 dump.add_newline() 3351 3352 3353 if hasattr(self, 'DIRECTORY_ENTRY_DEBUG'): 3354 dump.add_header('Debug information') 3355 for dbg in self.DIRECTORY_ENTRY_DEBUG: 3356 dump.add_lines(dbg.struct.dump()) 3357 try: 3358 dump.add_line('Type: '+DEBUG_TYPE[dbg.struct.Type]) 3359 except KeyError: 3360 dump.add_line('Type: 0x%x(Unknown)' % dbg.struct.Type) 3361 dump.add_newline() 3362 3363 3364 if hasattr(self, 'DIRECTORY_ENTRY_BASERELOC'): 3365 dump.add_header('Base relocations') 3366 for base_reloc in self.DIRECTORY_ENTRY_BASERELOC: 3367 dump.add_lines(base_reloc.struct.dump()) 3368 for reloc in base_reloc.entries: 3369 try: 3370 dump.add_line('%08Xh %s' % ( 3371 reloc.rva, RELOCATION_TYPE[reloc.type][16:]), 4) 3372 except KeyError: 3373 dump.add_line('0x%08X 0x%x(Unknown)' % ( 3374 reloc.rva, reloc.type), 4) 3375 dump.add_newline() 3376 3377 3378 return dump.get_text() 3379 3380 # OC Patch 3381 def get_physical_by_rva(self, rva): 3382 """Gets the physical address in the PE file from an RVA value.""" 3383 try: 3384 return self.get_offset_from_rva(rva) 3385 except Exception: 3386 return None 3387 3388 3389 ## 3390 # Double-Word get/set 3391 ## 3392 3393 def get_data_from_dword(self, dword): 3394 """Return a four byte string representing the double word value. (little endian).""" 3395 return struct.pack('<L', dword) 3396 3397 3398 def get_dword_from_data(self, data, offset): 3399 """Convert four bytes of data to a double word (little endian) 3400 3401 'offset' is assumed to index into a dword array. So setting it to 3402 N will return a dword out of the data sarting at offset N*4. 3403 3404 Returns None if the data can't be turned into a double word. 3405 """ 3406 3407 if (offset+1)*4 > len(data): 3408 return None 3409 3410 return struct.unpack('<L', data[offset*4:(offset+1)*4])[0] 3411 3412 3413 def get_dword_at_rva(self, rva): 3414 """Return the double word value at the given RVA. 3415 3416 Returns None if the value can't be read, i.e. the RVA can't be mapped 3417 to a file offset. 3418 """ 3419 3420 try: 3421 return self.get_dword_from_data(self.get_data(rva)[:4], 0) 3422 except PEFormatError: 3423 return None 3424 3425 3426 def get_dword_from_offset(self, offset): 3427 """Return the double word value at the given file offset. (little endian)""" 3428 3429 if offset+4 > len(self.__data__): 3430 return None 3431 3432 return self.get_dword_from_data(self.__data__[offset:offset+4], 0) 3433 3434 3435 def set_dword_at_rva(self, rva, dword): 3436 """Set the double word value at the file offset corresponding to the given RVA.""" 3437 return self.set_bytes_at_rva(rva, self.get_data_from_dword(dword)) 3438 3439 3440 def set_dword_at_offset(self, offset, dword): 3441 """Set the double word value at the given file offset.""" 3442 return self.set_bytes_at_offset(offset, self.get_data_from_dword(dword)) 3443 3444 3445 3446 ## 3447 # Word get/set 3448 ## 3449 3450 def get_data_from_word(self, word): 3451 """Return a two byte string representing the word value. (little endian).""" 3452 return struct.pack('<H', word) 3453 3454 3455 def get_word_from_data(self, data, offset): 3456 """Convert two bytes of data to a word (little endian) 3457 3458 'offset' is assumed to index into a word array. So setting it to 3459 N will return a dword out of the data sarting at offset N*2. 3460 3461 Returns None if the data can't be turned into a word. 3462 """ 3463 3464 if (offset+1)*2 > len(data): 3465 return None 3466 3467 return struct.unpack('<H', data[offset*2:(offset+1)*2])[0] 3468 3469 3470 def get_word_at_rva(self, rva): 3471 """Return the word value at the given RVA. 3472 3473 Returns None if the value can't be read, i.e. the RVA can't be mapped 3474 to a file offset. 3475 """ 3476 3477 try: 3478 return self.get_word_from_data(self.get_data(rva)[:2], 0) 3479 except PEFormatError: 3480 return None 3481 3482 3483 def get_word_from_offset(self, offset): 3484 """Return the word value at the given file offset. (little endian)""" 3485 3486 if offset+2 > len(self.__data__): 3487 return None 3488 3489 return self.get_word_from_data(self.__data__[offset:offset+2], 0) 3490 3491 3492 def set_word_at_rva(self, rva, word): 3493 """Set the word value at the file offset corresponding to the given RVA.""" 3494 return self.set_bytes_at_rva(rva, self.get_data_from_word(word)) 3495 3496 3497 def set_word_at_offset(self, offset, word): 3498 """Set the word value at the given file offset.""" 3499 return self.set_bytes_at_offset(offset, self.get_data_from_word(word)) 3500 3501 3502 ## 3503 # Quad-Word get/set 3504 ## 3505 3506 def get_data_from_qword(self, word): 3507 """Return a eight byte string representing the quad-word value. (little endian).""" 3508 return struct.pack('<Q', word) 3509 3510 3511 def get_qword_from_data(self, data, offset): 3512 """Convert eight bytes of data to a word (little endian) 3513 3514 'offset' is assumed to index into a word array. So setting it to 3515 N will return a dword out of the data sarting at offset N*8. 3516 3517 Returns None if the data can't be turned into a quad word. 3518 """ 3519 3520 if (offset+1)*8 > len(data): 3521 return None 3522 3523 return struct.unpack('<Q', data[offset*8:(offset+1)*8])[0] 3524 3525 3526 def get_qword_at_rva(self, rva): 3527 """Return the quad-word value at the given RVA. 3528 3529 Returns None if the value can't be read, i.e. the RVA can't be mapped 3530 to a file offset. 3531 """ 3532 3533 try: 3534 return self.get_qword_from_data(self.get_data(rva)[:8], 0) 3535 except PEFormatError: 3536 return None 3537 3538 3539 def get_qword_from_offset(self, offset): 3540 """Return the quad-word value at the given file offset. (little endian)""" 3541 3542 if offset+8 > len(self.__data__): 3543 return None 3544 3545 return self.get_qword_from_data(self.__data__[offset:offset+8], 0) 3546 3547 3548 def set_qword_at_rva(self, rva, qword): 3549 """Set the quad-word value at the file offset corresponding to the given RVA.""" 3550 return self.set_bytes_at_rva(rva, self.get_data_from_qword(qword)) 3551 3552 3553 def set_qword_at_offset(self, offset, qword): 3554 """Set the quad-word value at the given file offset.""" 3555 return self.set_bytes_at_offset(offset, self.get_data_from_qword(qword)) 3556 3557 3558 3559 ## 3560 # Set bytes 3561 ## 3562 3563 3564 def set_bytes_at_rva(self, rva, data): 3565 """Overwrite, with the given string, the bytes at the file offset corresponding to the given RVA. 3566 3567 Return True if successful, False otherwise. It can fail if the 3568 offset is outside the file's boundaries. 3569 """ 3570 3571 offset = self.get_physical_by_rva(rva) 3572 if not offset: 3573 raise False 3574 3575 return self.set_bytes_at_offset(offset, data) 3576 3577 3578 def set_bytes_at_offset(self, offset, data): 3579 """Overwrite the bytes at the given file offset with the given string. 3580 3581 Return True if successful, False otherwise. It can fail if the 3582 offset is outside the file's boundaries. 3583 """ 3584 3585 if not isinstance(data, str): 3586 raise TypeError('data should be of type: str') 3587 3588 if offset >= 0 and offset < len(self.__data__): 3589 self.__data__ = ( self.__data__[:offset] + 3590 data + 3591 self.__data__[offset+len(data):] ) 3592 else: 3593 return False 3594 3595 # Refresh the section's data with the modified information 3596 # 3597 for section in self.sections: 3598 section_data_start = section.PointerToRawData 3599 section_data_end = section_data_start+section.SizeOfRawData 3600 section.data = self.__data__[section_data_start:section_data_end] 3601 3602 return True 3603 3604 3605 3606 def relocate_image(self, new_ImageBase): 3607 """Apply the relocation information to the image using the provided new image base. 3608 3609 This method will apply the relocation information to the image. Given the new base, 3610 all the relocations will be processed and both the raw data and the section's data 3611 will be fixed accordingly. 3612 The resulting image can be retrieved as well through the method: 3613 3614 get_memory_mapped_image() 3615 3616 In order to get something that would more closely match what could be found in memory 3617 once the Windows loader finished its work. 3618 """ 3619 3620 relocation_difference = new_ImageBase - self.OPTIONAL_HEADER.ImageBase 3621 3622 3623 for reloc in self.DIRECTORY_ENTRY_BASERELOC: 3624 3625 virtual_address = reloc.struct.VirtualAddress 3626 size_of_block = reloc.struct.SizeOfBlock 3627 3628 # We iterate with an index because if the relocation is of type 3629 # IMAGE_REL_BASED_HIGHADJ we need to also process the next entry 3630 # at once and skip it for the next interation 3631 # 3632 entry_idx = 0 3633 while entry_idx<len(reloc.entries): 3634 3635 entry = reloc.entries[entry_idx] 3636 entry_idx += 1 3637 3638 if entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_ABSOLUTE']: 3639 # Nothing to do for this type of relocation 3640 pass 3641 3642 elif entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_HIGH']: 3643 # Fix the high 16bits of a relocation 3644 # 3645 # Add high 16bits of relocation_difference to the 3646 # 16bit value at RVA=entry.rva 3647 3648 self.set_word_at_rva( 3649 entry.rva, 3650 ( self.get_word_at_rva(entry.rva) + relocation_difference>>16)&0xffff ) 3651 3652 elif entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_LOW']: 3653 # Fix the low 16bits of a relocation 3654 # 3655 # Add low 16 bits of relocation_difference to the 16bit value 3656 # at RVA=entry.rva 3657 3658 self.set_word_at_rva( 3659 entry.rva, 3660 ( self.get_word_at_rva(entry.rva) + relocation_difference)&0xffff) 3661 3662 elif entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_HIGHLOW']: 3663 # Handle all high and low parts of a 32bit relocation 3664 # 3665 # Add relocation_difference to the value at RVA=entry.rva 3666 3667 self.set_dword_at_rva( 3668 entry.rva, 3669 self.get_dword_at_rva(entry.rva)+relocation_difference) 3670 3671 elif entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_HIGHADJ']: 3672 # Fix the high 16bits of a relocation and adjust 3673 # 3674 # Add high 16bits of relocation_difference to the 32bit value 3675 # composed from the (16bit value at RVA=entry.rva)<<16 plus 3676 # the 16bit value at the next relocation entry. 3677 # 3678 3679 # If the next entry is beyond the array's limits, 3680 # abort... the table is corrupt 3681 # 3682 if entry_idx == len(reloc.entries): 3683 break 3684 3685 next_entry = reloc.entries[entry_idx] 3686 entry_idx += 1 3687 self.set_word_at_rva( entry.rva, 3688 ((self.get_word_at_rva(entry.rva)<<16) + next_entry.rva + 3689 relocation_difference & 0xffff0000) >> 16 ) 3690 3691 elif entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_DIR64']: 3692 # Apply the difference to the 64bit value at the offset 3693 # RVA=entry.rva 3694 3695 self.set_qword_at_rva( 3696 entry.rva, 3697 self.get_qword_at_rva(entry.rva) + relocation_difference) 3698 3699 3700 def verify_checksum(self): 3701 3702 return self.OPTIONAL_HEADER.CheckSum == self.generate_checksum() 3703 3704 3705 def generate_checksum(self): 3706 3707 # Get the offset to the CheckSum field in the OptionalHeader 3708 # 3709 checksum_offset = self.OPTIONAL_HEADER.__file_offset__ + 0x40 # 64 3710 3711 checksum = 0 3712 3713 for i in range( len(self.__data__) / 4 ): 3714 3715 # Skip the checksum field 3716 # 3717 if i == checksum_offset / 4: 3718 continue 3719 3720 dword = struct.unpack('L', self.__data__[ i*4 : i*4+4 ])[0] 3721 checksum = (checksum & 0xffffffff) + dword + (checksum>>32) 3722 if checksum > 2**32: 3723 checksum = (checksum & 0xffffffff) + (checksum >> 32) 3724 3725 checksum = (checksum & 0xffff) + (checksum >> 16) 3726 checksum = (checksum) + (checksum >> 16) 3727 checksum = checksum & 0xffff 3728 3729 return checksum + len(self.__data__) 3730