Home | History | Annotate | Download | only in symsrc
      1 # -*- coding: Latin-1 -*-
      2 """pefile, Portable Executable reader module
      3 
      4 
      5 All the PE file basic structures are available with their default names
      6 as attributes of the instance returned.
      7 
      8 Processed elements such as the import table are made available with lowercase
      9 names, to differentiate them from the upper case basic structure names.
     10 
     11 pefile has been tested against the limits of valid PE headers, that is, malware.
     12 Lots of packed malware attempt to abuse the format way beyond its standard use.
     13 To the best of my knowledge most of the abuses are handled gracefully.
     14 
     15 Copyright (c) 2005, 2006, 2007, 2008 Ero Carrera <ero (at] dkbza.org>
     16 
     17 All rights reserved.
     18 
     19 For detailed copyright information see the file COPYING in
     20 the root of the distribution archive.
     21 """
     22 
     23 __author__ = 'Ero Carrera'
     24 __version__ = '1.2.9.1'
     25 __contact__ = 'ero (at] dkbza.org'
     26 
     27 
     28 import os
     29 import struct
     30 import time
     31 import math
     32 import re
     33 import exceptions
     34 import string
     35 import array
     36 
     37 sha1, sha256, sha512, md5 = None, None, None, None
     38 
     39 try:
     40     import hashlib
     41     sha1 = hashlib.sha1
     42     sha256 = hashlib.sha256
     43     sha512 = hashlib.sha512
     44     md5 = hashlib.md5
     45 except ImportError:    
     46     try:
     47         import sha
     48         sha1 = sha.new
     49     except ImportError:
     50         pass
     51     try:
     52         import md5
     53         md5 = md5.new
     54     except ImportError:
     55         pass
     56 
     57 
     58 fast_load = False
     59 
     60 IMAGE_DOS_SIGNATURE             = 0x5A4D
     61 IMAGE_OS2_SIGNATURE             = 0x454E
     62 IMAGE_OS2_SIGNATURE_LE          = 0x454C
     63 IMAGE_VXD_SIGNATURE             = 0x454C
     64 IMAGE_NT_SIGNATURE              = 0x00004550
     65 IMAGE_NUMBEROF_DIRECTORY_ENTRIES= 16
     66 IMAGE_ORDINAL_FLAG              = 0x80000000L
     67 IMAGE_ORDINAL_FLAG64            = 0x8000000000000000L
     68 OPTIONAL_HEADER_MAGIC_PE        = 0x10b
     69 OPTIONAL_HEADER_MAGIC_PE_PLUS   = 0x20b
     70 
     71 
     72 directory_entry_types = [
     73     ('IMAGE_DIRECTORY_ENTRY_EXPORT',        0),
     74     ('IMAGE_DIRECTORY_ENTRY_IMPORT',        1),
     75     ('IMAGE_DIRECTORY_ENTRY_RESOURCE',      2),
     76     ('IMAGE_DIRECTORY_ENTRY_EXCEPTION',     3),
     77     ('IMAGE_DIRECTORY_ENTRY_SECURITY',      4),
     78     ('IMAGE_DIRECTORY_ENTRY_BASERELOC',     5),
     79     ('IMAGE_DIRECTORY_ENTRY_DEBUG',         6),
     80     ('IMAGE_DIRECTORY_ENTRY_COPYRIGHT',     7),
     81     ('IMAGE_DIRECTORY_ENTRY_GLOBALPTR',     8),
     82     ('IMAGE_DIRECTORY_ENTRY_TLS',           9),
     83     ('IMAGE_DIRECTORY_ENTRY_LOAD_CONFIG',   10),
     84     ('IMAGE_DIRECTORY_ENTRY_BOUND_IMPORT',  11),
     85     ('IMAGE_DIRECTORY_ENTRY_IAT',           12),
     86     ('IMAGE_DIRECTORY_ENTRY_DELAY_IMPORT',  13),
     87     ('IMAGE_DIRECTORY_ENTRY_COM_DESCRIPTOR',14),
     88     ('IMAGE_DIRECTORY_ENTRY_RESERVED',      15) ]
     89 
     90 DIRECTORY_ENTRY = dict([(e[1], e[0]) for e in directory_entry_types]+directory_entry_types)
     91  
     92 
     93 image_characteristics = [
     94     ('IMAGE_FILE_RELOCS_STRIPPED',          0x0001),
     95     ('IMAGE_FILE_EXECUTABLE_IMAGE',         0x0002),
     96     ('IMAGE_FILE_LINE_NUMS_STRIPPED',       0x0004),
     97     ('IMAGE_FILE_LOCAL_SYMS_STRIPPED',      0x0008),
     98     ('IMAGE_FILE_AGGRESIVE_WS_TRIM',        0x0010),
     99     ('IMAGE_FILE_LARGE_ADDRESS_AWARE',      0x0020),
    100     ('IMAGE_FILE_16BIT_MACHINE',            0x0040),
    101     ('IMAGE_FILE_BYTES_REVERSED_LO',        0x0080),
    102     ('IMAGE_FILE_32BIT_MACHINE',            0x0100),
    103     ('IMAGE_FILE_DEBUG_STRIPPED',           0x0200),
    104     ('IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP',  0x0400),
    105     ('IMAGE_FILE_NET_RUN_FROM_SWAP',        0x0800),
    106     ('IMAGE_FILE_SYSTEM',                   0x1000),
    107     ('IMAGE_FILE_DLL',                      0x2000),
    108     ('IMAGE_FILE_UP_SYSTEM_ONLY',           0x4000),
    109     ('IMAGE_FILE_BYTES_REVERSED_HI',        0x8000) ]
    110 
    111 IMAGE_CHARACTERISTICS = dict([(e[1], e[0]) for e in
    112     image_characteristics]+image_characteristics)
    113 
    114     
    115 section_characteristics = [
    116     ('IMAGE_SCN_CNT_CODE',                  0x00000020),
    117     ('IMAGE_SCN_CNT_INITIALIZED_DATA',      0x00000040),
    118     ('IMAGE_SCN_CNT_UNINITIALIZED_DATA',    0x00000080),
    119     ('IMAGE_SCN_LNK_OTHER',                 0x00000100),
    120     ('IMAGE_SCN_LNK_INFO',                  0x00000200),
    121     ('IMAGE_SCN_LNK_REMOVE',                0x00000800),
    122     ('IMAGE_SCN_LNK_COMDAT',                0x00001000),
    123     ('IMAGE_SCN_MEM_FARDATA',               0x00008000),
    124     ('IMAGE_SCN_MEM_PURGEABLE',             0x00020000),
    125     ('IMAGE_SCN_MEM_16BIT',                 0x00020000),
    126     ('IMAGE_SCN_MEM_LOCKED',                0x00040000),
    127     ('IMAGE_SCN_MEM_PRELOAD',               0x00080000),
    128     ('IMAGE_SCN_ALIGN_1BYTES',              0x00100000),
    129     ('IMAGE_SCN_ALIGN_2BYTES',              0x00200000),
    130     ('IMAGE_SCN_ALIGN_4BYTES',              0x00300000),
    131     ('IMAGE_SCN_ALIGN_8BYTES',              0x00400000),
    132     ('IMAGE_SCN_ALIGN_16BYTES',             0x00500000),
    133     ('IMAGE_SCN_ALIGN_32BYTES',             0x00600000),
    134     ('IMAGE_SCN_ALIGN_64BYTES',             0x00700000),
    135     ('IMAGE_SCN_ALIGN_128BYTES',            0x00800000),
    136     ('IMAGE_SCN_ALIGN_256BYTES',            0x00900000),
    137     ('IMAGE_SCN_ALIGN_512BYTES',            0x00A00000),
    138     ('IMAGE_SCN_ALIGN_1024BYTES',           0x00B00000),
    139     ('IMAGE_SCN_ALIGN_2048BYTES',           0x00C00000),
    140     ('IMAGE_SCN_ALIGN_4096BYTES',           0x00D00000),
    141     ('IMAGE_SCN_ALIGN_8192BYTES',           0x00E00000),
    142     ('IMAGE_SCN_ALIGN_MASK',                0x00F00000),
    143     ('IMAGE_SCN_LNK_NRELOC_OVFL',           0x01000000),
    144     ('IMAGE_SCN_MEM_DISCARDABLE',           0x02000000),
    145     ('IMAGE_SCN_MEM_NOT_CACHED',            0x04000000),
    146     ('IMAGE_SCN_MEM_NOT_PAGED',             0x08000000),
    147     ('IMAGE_SCN_MEM_SHARED',                0x10000000),
    148     ('IMAGE_SCN_MEM_EXECUTE',               0x20000000),
    149     ('IMAGE_SCN_MEM_READ',                  0x40000000),
    150     ('IMAGE_SCN_MEM_WRITE',                 0x80000000L) ]
    151  
    152 SECTION_CHARACTERISTICS = dict([(e[1], e[0]) for e in
    153     section_characteristics]+section_characteristics)
    154 
    155 
    156 debug_types = [
    157     ('IMAGE_DEBUG_TYPE_UNKNOWN',        0),
    158     ('IMAGE_DEBUG_TYPE_COFF',           1),
    159     ('IMAGE_DEBUG_TYPE_CODEVIEW',       2),
    160     ('IMAGE_DEBUG_TYPE_FPO',            3),
    161     ('IMAGE_DEBUG_TYPE_MISC',           4),
    162     ('IMAGE_DEBUG_TYPE_EXCEPTION',      5),
    163     ('IMAGE_DEBUG_TYPE_FIXUP',          6),
    164     ('IMAGE_DEBUG_TYPE_OMAP_TO_SRC',    7),
    165     ('IMAGE_DEBUG_TYPE_OMAP_FROM_SRC',  8),
    166     ('IMAGE_DEBUG_TYPE_BORLAND',        9),
    167     ('IMAGE_DEBUG_TYPE_RESERVED10',     10) ]
    168 
    169 DEBUG_TYPE = dict([(e[1], e[0]) for e in debug_types]+debug_types)
    170 
    171 
    172 subsystem_types = [
    173     ('IMAGE_SUBSYSTEM_UNKNOWN',     0),
    174     ('IMAGE_SUBSYSTEM_NATIVE',      1),
    175     ('IMAGE_SUBSYSTEM_WINDOWS_GUI', 2),
    176     ('IMAGE_SUBSYSTEM_WINDOWS_CUI', 3),
    177     ('IMAGE_SUBSYSTEM_OS2_CUI',     5),
    178     ('IMAGE_SUBSYSTEM_POSIX_CUI',   7),
    179     ('IMAGE_SUBSYSTEM_WINDOWS_CE_GUI',  9),
    180     ('IMAGE_SUBSYSTEM_EFI_APPLICATION', 10),
    181     ('IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER', 11),
    182     ('IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER',      12),
    183     ('IMAGE_SUBSYSTEM_EFI_ROM',     13),
    184     ('IMAGE_SUBSYSTEM_XBOX',        14)]
    185 
    186 SUBSYSTEM_TYPE = dict([(e[1], e[0]) for e in subsystem_types]+subsystem_types)
    187 
    188 
    189 machine_types = [
    190     ('IMAGE_FILE_MACHINE_UNKNOWN',  0),
    191     ('IMAGE_FILE_MACHINE_AM33',     0x1d3),
    192     ('IMAGE_FILE_MACHINE_AMD64',    0x8664),
    193     ('IMAGE_FILE_MACHINE_ARM',      0x1c0),
    194     ('IMAGE_FILE_MACHINE_EBC',      0xebc),
    195     ('IMAGE_FILE_MACHINE_I386',     0x14c),
    196     ('IMAGE_FILE_MACHINE_IA64',     0x200),
    197     ('IMAGE_FILE_MACHINE_MR32',     0x9041),
    198     ('IMAGE_FILE_MACHINE_MIPS16',   0x266),
    199     ('IMAGE_FILE_MACHINE_MIPSFPU',  0x366),
    200     ('IMAGE_FILE_MACHINE_MIPSFPU16',0x466),
    201     ('IMAGE_FILE_MACHINE_POWERPC',  0x1f0),
    202     ('IMAGE_FILE_MACHINE_POWERPCFP',0x1f1),
    203     ('IMAGE_FILE_MACHINE_R4000',    0x166),
    204     ('IMAGE_FILE_MACHINE_SH3',      0x1a2),
    205     ('IMAGE_FILE_MACHINE_SH3DSP',   0x1a3),
    206     ('IMAGE_FILE_MACHINE_SH4',      0x1a6),
    207     ('IMAGE_FILE_MACHINE_SH5',      0x1a8),
    208     ('IMAGE_FILE_MACHINE_THUMB',    0x1c2),
    209     ('IMAGE_FILE_MACHINE_WCEMIPSV2',0x169),
    210  ]
    211 
    212 MACHINE_TYPE = dict([(e[1], e[0]) for e in machine_types]+machine_types)
    213 
    214 
    215 relocation_types = [
    216     ('IMAGE_REL_BASED_ABSOLUTE',        0),
    217     ('IMAGE_REL_BASED_HIGH',            1),
    218     ('IMAGE_REL_BASED_LOW',             2),
    219     ('IMAGE_REL_BASED_HIGHLOW',         3),
    220     ('IMAGE_REL_BASED_HIGHADJ',         4),
    221     ('IMAGE_REL_BASED_MIPS_JMPADDR',    5),
    222     ('IMAGE_REL_BASED_SECTION',         6),
    223     ('IMAGE_REL_BASED_REL',             7),
    224     ('IMAGE_REL_BASED_MIPS_JMPADDR16',  9),
    225     ('IMAGE_REL_BASED_IA64_IMM64',      9),
    226     ('IMAGE_REL_BASED_DIR64',           10),
    227     ('IMAGE_REL_BASED_HIGH3ADJ',        11) ]
    228 
    229 RELOCATION_TYPE = dict([(e[1], e[0]) for e in relocation_types]+relocation_types)
    230 
    231 
    232 dll_characteristics = [
    233     ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x0001', 0x0001),
    234     ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x0002', 0x0002),
    235     ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x0004', 0x0004),
    236     ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x0008', 0x0008),
    237     ('IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE',      0x0040),
    238     ('IMAGE_DLL_CHARACTERISTICS_FORCE_INTEGRITY',   0x0080),
    239     ('IMAGE_DLL_CHARACTERISTICS_NX_COMPAT',         0x0100),
    240     ('IMAGE_DLL_CHARACTERISTICS_NO_ISOLATION',      0x0200),
    241     ('IMAGE_DLL_CHARACTERISTICS_NO_SEH',    0x0400),
    242     ('IMAGE_DLL_CHARACTERISTICS_NO_BIND',   0x0800),
    243     ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x1000', 0x1000),
    244     ('IMAGE_DLL_CHARACTERISTICS_WDM_DRIVER',    0x2000),
    245     ('IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE', 0x8000) ]
    246 
    247 DLL_CHARACTERISTICS = dict([(e[1], e[0]) for e in dll_characteristics]+dll_characteristics)
    248 
    249 
    250 # Resource types
    251 resource_type = [
    252     ('RT_CURSOR',          1),
    253     ('RT_BITMAP',          2),
    254     ('RT_ICON',            3),
    255     ('RT_MENU',            4),
    256     ('RT_DIALOG',          5),
    257     ('RT_STRING',          6),
    258     ('RT_FONTDIR',         7),
    259     ('RT_FONT',            8),
    260     ('RT_ACCELERATOR',     9),
    261     ('RT_RCDATA',          10),
    262     ('RT_MESSAGETABLE',    11),
    263     ('RT_GROUP_CURSOR',    12),
    264     ('RT_GROUP_ICON',      14),
    265     ('RT_VERSION',         16),
    266     ('RT_DLGINCLUDE',      17),
    267     ('RT_PLUGPLAY',        19),
    268     ('RT_VXD',             20),
    269     ('RT_ANICURSOR',       21),
    270     ('RT_ANIICON',         22),
    271     ('RT_HTML',            23),
    272     ('RT_MANIFEST',        24) ]
    273 
    274 RESOURCE_TYPE = dict([(e[1], e[0]) for e in resource_type]+resource_type)
    275 
    276     
    277 # Language definitions
    278 lang = [
    279  ('LANG_NEUTRAL',       0x00),
    280  ('LANG_INVARIANT',     0x7f),
    281  ('LANG_AFRIKAANS',     0x36),
    282  ('LANG_ALBANIAN',      0x1c),
    283  ('LANG_ARABIC',        0x01),
    284  ('LANG_ARMENIAN',      0x2b),
    285  ('LANG_ASSAMESE',      0x4d),
    286  ('LANG_AZERI',         0x2c),
    287  ('LANG_BASQUE',        0x2d),
    288  ('LANG_BELARUSIAN',    0x23),
    289  ('LANG_BENGALI',       0x45),
    290  ('LANG_BULGARIAN',     0x02),
    291  ('LANG_CATALAN',       0x03),
    292  ('LANG_CHINESE',       0x04),
    293  ('LANG_CROATIAN',      0x1a),
    294  ('LANG_CZECH',         0x05),
    295  ('LANG_DANISH',        0x06),
    296  ('LANG_DIVEHI',        0x65),
    297  ('LANG_DUTCH',         0x13),
    298  ('LANG_ENGLISH',       0x09),
    299  ('LANG_ESTONIAN',      0x25),
    300  ('LANG_FAEROESE',      0x38),
    301  ('LANG_FARSI',         0x29),
    302  ('LANG_FINNISH',       0x0b),
    303  ('LANG_FRENCH',        0x0c),
    304  ('LANG_GALICIAN',      0x56),
    305  ('LANG_GEORGIAN',      0x37),
    306  ('LANG_GERMAN',        0x07),
    307  ('LANG_GREEK',         0x08),
    308  ('LANG_GUJARATI',      0x47),
    309  ('LANG_HEBREW',        0x0d),
    310  ('LANG_HINDI',         0x39),
    311  ('LANG_HUNGARIAN',     0x0e),
    312  ('LANG_ICELANDIC',     0x0f),
    313  ('LANG_INDONESIAN',    0x21),
    314  ('LANG_ITALIAN',       0x10),
    315  ('LANG_JAPANESE',      0x11),
    316  ('LANG_KANNADA',       0x4b),
    317  ('LANG_KASHMIRI',      0x60),
    318  ('LANG_KAZAK',         0x3f),
    319  ('LANG_KONKANI',       0x57),
    320  ('LANG_KOREAN',        0x12),
    321  ('LANG_KYRGYZ',        0x40),
    322  ('LANG_LATVIAN',       0x26),
    323  ('LANG_LITHUANIAN',    0x27),
    324  ('LANG_MACEDONIAN',    0x2f),
    325  ('LANG_MALAY',         0x3e),
    326  ('LANG_MALAYALAM',     0x4c),
    327  ('LANG_MANIPURI',      0x58),
    328  ('LANG_MARATHI',       0x4e),
    329  ('LANG_MONGOLIAN',     0x50),
    330  ('LANG_NEPALI',        0x61),
    331  ('LANG_NORWEGIAN',     0x14),
    332  ('LANG_ORIYA',         0x48),
    333  ('LANG_POLISH',        0x15),
    334  ('LANG_PORTUGUESE',    0x16),
    335  ('LANG_PUNJABI',       0x46),
    336  ('LANG_ROMANIAN',      0x18),
    337  ('LANG_RUSSIAN',       0x19),
    338  ('LANG_SANSKRIT',      0x4f),
    339  ('LANG_SERBIAN',       0x1a),
    340  ('LANG_SINDHI',        0x59),
    341  ('LANG_SLOVAK',        0x1b),
    342  ('LANG_SLOVENIAN',     0x24),
    343  ('LANG_SPANISH',       0x0a),
    344  ('LANG_SWAHILI',       0x41),
    345  ('LANG_SWEDISH',       0x1d),
    346  ('LANG_SYRIAC',        0x5a),
    347  ('LANG_TAMIL',         0x49),
    348  ('LANG_TATAR',         0x44),
    349  ('LANG_TELUGU',        0x4a),
    350  ('LANG_THAI',          0x1e),
    351  ('LANG_TURKISH',       0x1f),
    352  ('LANG_UKRAINIAN',     0x22),
    353  ('LANG_URDU',          0x20),
    354  ('LANG_UZBEK',         0x43),
    355  ('LANG_VIETNAMESE',    0x2a),
    356  ('LANG_GAELIC',        0x3c),
    357  ('LANG_MALTESE',       0x3a),
    358  ('LANG_MAORI',         0x28),
    359  ('LANG_RHAETO_ROMANCE',0x17),
    360  ('LANG_SAAMI',         0x3b),
    361  ('LANG_SORBIAN',       0x2e),
    362  ('LANG_SUTU',          0x30),
    363  ('LANG_TSONGA',        0x31),
    364  ('LANG_TSWANA',        0x32),
    365  ('LANG_VENDA',         0x33),
    366  ('LANG_XHOSA',         0x34),
    367  ('LANG_ZULU',          0x35),
    368  ('LANG_ESPERANTO',     0x8f),
    369  ('LANG_WALON',         0x90),
    370  ('LANG_CORNISH',       0x91),
    371  ('LANG_WELSH',         0x92),
    372  ('LANG_BRETON',        0x93) ]
    373 
    374 LANG = dict(lang+[(e[1], e[0]) for e in lang])
    375 
    376 
    377 # Sublanguage definitions
    378 sublang =  [
    379  ('SUBLANG_NEUTRAL',                        0x00),
    380  ('SUBLANG_DEFAULT',                        0x01),
    381  ('SUBLANG_SYS_DEFAULT',                    0x02),
    382  ('SUBLANG_ARABIC_SAUDI_ARABIA',            0x01),
    383  ('SUBLANG_ARABIC_IRAQ',                    0x02),
    384  ('SUBLANG_ARABIC_EGYPT',                   0x03),
    385  ('SUBLANG_ARABIC_LIBYA',                   0x04),
    386  ('SUBLANG_ARABIC_ALGERIA',                 0x05),
    387  ('SUBLANG_ARABIC_MOROCCO',                 0x06),
    388  ('SUBLANG_ARABIC_TUNISIA',                 0x07),
    389  ('SUBLANG_ARABIC_OMAN',                    0x08),
    390  ('SUBLANG_ARABIC_YEMEN',                   0x09),
    391  ('SUBLANG_ARABIC_SYRIA',                   0x0a),
    392  ('SUBLANG_ARABIC_JORDAN',                  0x0b),
    393  ('SUBLANG_ARABIC_LEBANON',                 0x0c),
    394  ('SUBLANG_ARABIC_KUWAIT',                  0x0d),
    395  ('SUBLANG_ARABIC_UAE',                     0x0e),
    396  ('SUBLANG_ARABIC_BAHRAIN',                 0x0f),
    397  ('SUBLANG_ARABIC_QATAR',                   0x10),
    398  ('SUBLANG_AZERI_LATIN',                    0x01),
    399  ('SUBLANG_AZERI_CYRILLIC',                 0x02),
    400  ('SUBLANG_CHINESE_TRADITIONAL',            0x01),
    401  ('SUBLANG_CHINESE_SIMPLIFIED',             0x02),
    402  ('SUBLANG_CHINESE_HONGKONG',               0x03),
    403  ('SUBLANG_CHINESE_SINGAPORE',              0x04),
    404  ('SUBLANG_CHINESE_MACAU',                  0x05),
    405  ('SUBLANG_DUTCH',                          0x01),
    406  ('SUBLANG_DUTCH_BELGIAN',                  0x02),
    407  ('SUBLANG_ENGLISH_US',                     0x01),
    408  ('SUBLANG_ENGLISH_UK',                     0x02),
    409  ('SUBLANG_ENGLISH_AUS',                    0x03),
    410  ('SUBLANG_ENGLISH_CAN',                    0x04),
    411  ('SUBLANG_ENGLISH_NZ',                     0x05),
    412  ('SUBLANG_ENGLISH_EIRE',                   0x06),
    413  ('SUBLANG_ENGLISH_SOUTH_AFRICA',           0x07),
    414  ('SUBLANG_ENGLISH_JAMAICA',                0x08),
    415  ('SUBLANG_ENGLISH_CARIBBEAN',              0x09),
    416  ('SUBLANG_ENGLISH_BELIZE',                 0x0a),
    417  ('SUBLANG_ENGLISH_TRINIDAD',               0x0b),
    418  ('SUBLANG_ENGLISH_ZIMBABWE',               0x0c),
    419  ('SUBLANG_ENGLISH_PHILIPPINES',            0x0d),
    420  ('SUBLANG_FRENCH',                         0x01),
    421  ('SUBLANG_FRENCH_BELGIAN',                 0x02),
    422  ('SUBLANG_FRENCH_CANADIAN',                0x03),
    423  ('SUBLANG_FRENCH_SWISS',                   0x04),
    424  ('SUBLANG_FRENCH_LUXEMBOURG',              0x05),
    425  ('SUBLANG_FRENCH_MONACO',                  0x06),
    426  ('SUBLANG_GERMAN',                         0x01),
    427  ('SUBLANG_GERMAN_SWISS',                   0x02),
    428  ('SUBLANG_GERMAN_AUSTRIAN',                0x03),
    429  ('SUBLANG_GERMAN_LUXEMBOURG',              0x04),
    430  ('SUBLANG_GERMAN_LIECHTENSTEIN',           0x05),
    431  ('SUBLANG_ITALIAN',                        0x01),
    432  ('SUBLANG_ITALIAN_SWISS',                  0x02),
    433  ('SUBLANG_KASHMIRI_SASIA',                 0x02),
    434  ('SUBLANG_KASHMIRI_INDIA',                 0x02),
    435  ('SUBLANG_KOREAN',                         0x01),
    436  ('SUBLANG_LITHUANIAN',                     0x01),
    437  ('SUBLANG_MALAY_MALAYSIA',                 0x01),
    438  ('SUBLANG_MALAY_BRUNEI_DARUSSALAM',        0x02),
    439  ('SUBLANG_NEPALI_INDIA',                   0x02),
    440  ('SUBLANG_NORWEGIAN_BOKMAL',               0x01),
    441  ('SUBLANG_NORWEGIAN_NYNORSK',              0x02),
    442  ('SUBLANG_PORTUGUESE',                     0x02),
    443  ('SUBLANG_PORTUGUESE_BRAZILIAN',           0x01),
    444  ('SUBLANG_SERBIAN_LATIN',                  0x02),
    445  ('SUBLANG_SERBIAN_CYRILLIC',               0x03),
    446  ('SUBLANG_SPANISH',                        0x01),
    447  ('SUBLANG_SPANISH_MEXICAN',                0x02),
    448  ('SUBLANG_SPANISH_MODERN',                 0x03),
    449  ('SUBLANG_SPANISH_GUATEMALA',              0x04),
    450  ('SUBLANG_SPANISH_COSTA_RICA',             0x05),
    451  ('SUBLANG_SPANISH_PANAMA',                 0x06),
    452  ('SUBLANG_SPANISH_DOMINICAN_REPUBLIC',     0x07),
    453  ('SUBLANG_SPANISH_VENEZUELA',              0x08),
    454  ('SUBLANG_SPANISH_COLOMBIA',               0x09),
    455  ('SUBLANG_SPANISH_PERU',                   0x0a),
    456  ('SUBLANG_SPANISH_ARGENTINA',              0x0b),
    457  ('SUBLANG_SPANISH_ECUADOR',                0x0c),
    458  ('SUBLANG_SPANISH_CHILE',                  0x0d),
    459  ('SUBLANG_SPANISH_URUGUAY',                0x0e),
    460  ('SUBLANG_SPANISH_PARAGUAY',               0x0f),
    461  ('SUBLANG_SPANISH_BOLIVIA',                0x10),
    462  ('SUBLANG_SPANISH_EL_SALVADOR',            0x11),
    463  ('SUBLANG_SPANISH_HONDURAS',               0x12),
    464  ('SUBLANG_SPANISH_NICARAGUA',              0x13),
    465  ('SUBLANG_SPANISH_PUERTO_RICO',            0x14),
    466  ('SUBLANG_SWEDISH',                        0x01),
    467  ('SUBLANG_SWEDISH_FINLAND',                0x02),
    468  ('SUBLANG_URDU_PAKISTAN',                  0x01),
    469  ('SUBLANG_URDU_INDIA',                     0x02),
    470  ('SUBLANG_UZBEK_LATIN',                    0x01),
    471  ('SUBLANG_UZBEK_CYRILLIC',                 0x02),
    472  ('SUBLANG_DUTCH_SURINAM',                  0x03),
    473  ('SUBLANG_ROMANIAN',                       0x01),
    474  ('SUBLANG_ROMANIAN_MOLDAVIA',              0x02),
    475  ('SUBLANG_RUSSIAN',                        0x01),
    476  ('SUBLANG_RUSSIAN_MOLDAVIA',               0x02),
    477  ('SUBLANG_CROATIAN',                       0x01),
    478  ('SUBLANG_LITHUANIAN_CLASSIC',             0x02),
    479  ('SUBLANG_GAELIC',                         0x01),
    480  ('SUBLANG_GAELIC_SCOTTISH',                0x02),
    481  ('SUBLANG_GAELIC_MANX',                    0x03) ]
    482 
    483 SUBLANG = dict(sublang+[(e[1], e[0]) for e in sublang])
    484 
    485 
    486 class UnicodeStringWrapperPostProcessor:
    487     """This class attemps to help the process of identifying strings
    488     that might be plain Unicode or Pascal. A list of strings will be
    489     wrapped on it with the hope the overlappings will help make the 
    490     decission about their type."""
    491     
    492     def __init__(self, pe, rva_ptr):
    493         self.pe = pe
    494         self.rva_ptr = rva_ptr
    495         self.string = None
    496         
    497         
    498     def get_rva(self):
    499         """Get the RVA of the string."""
    500         
    501         return self.rva_ptr
    502         
    503         
    504     def __str__(self):
    505         """Return the escaped ASCII representation of the string."""
    506     
    507         def convert_char(char):
    508             if char in string.printable:
    509                 return char
    510             else:
    511                 return r'\x%02x' % ord(char)
    512                 
    513         if self.string:
    514             return ''.join([convert_char(c) for c in self.string])
    515             
    516         return ''
    517         
    518     
    519     def invalidate(self):
    520         """Make this instance None, to express it's no known string type."""
    521         
    522         self = None
    523     
    524         
    525     def render_pascal_16(self):
    526     
    527         self.string = self.pe.get_string_u_at_rva(
    528             self.rva_ptr+2, 
    529             max_length=self.__get_pascal_16_length())
    530 
    531 
    532     def ask_pascal_16(self, next_rva_ptr):
    533         """The next RVA is taken to be the one immediately following this one.
    534         
    535         Such RVA could indicate the natural end of the string and will be checked
    536         with the possible length contained in the first word.
    537         """
    538         
    539         length = self.__get_pascal_16_length()
    540         
    541         if length == (next_rva_ptr - (self.rva_ptr+2)) / 2:
    542             self.length = length
    543             return True
    544             
    545         return False
    546         
    547         
    548     def __get_pascal_16_length(self):
    549     
    550         return self.__get_word_value_at_rva(self.rva_ptr)
    551         
    552     
    553     def __get_word_value_at_rva(self, rva):
    554 
    555         try:
    556             data = self.pe.get_data(self.rva_ptr, 2)
    557         except PEFormatError, e:
    558             return False
    559 
    560         if len(data)<2:
    561             return False
    562 
    563         return struct.unpack('<H', data)[0]
    564 
    565     
    566     #def render_pascal_8(self):
    567     #    """"""
    568         
    569         
    570     def ask_unicode_16(self, next_rva_ptr):
    571         """The next RVA is taken to be the one immediately following this one.
    572         
    573         Such RVA could indicate the natural end of the string and will be checked
    574         to see if there's a Unicode NULL character there.
    575         """
    576         
    577         if self.__get_word_value_at_rva(next_rva_ptr-2) == 0:
    578             self.length = next_rva_ptr - self.rva_ptr
    579             return True
    580             
    581         return False
    582         
    583 
    584     def render_unicode_16(self):
    585         """"""
    586 
    587         self.string = self.pe.get_string_u_at_rva(self.rva_ptr)
    588             
    589 
    590 class PEFormatError(Exception):
    591     """Generic PE format error exception."""
    592     
    593     def __init__(self, value):
    594         self.value = value
    595 
    596     def __str__(self):
    597         return repr(self.value)
    598 
    599 
    600 class Dump:
    601     """Convenience class for dumping the PE information."""
    602     
    603     def __init__(self):
    604         self.text = ''
    605     
    606         
    607     def add_lines(self, txt, indent=0):
    608         """Adds a list of lines.
    609         
    610         The list can be indented with the optional argument 'indent'.
    611         """
    612         for line in txt:
    613             self.add_line(line, indent)
    614         
    615             
    616     def add_line(self, txt, indent=0):
    617         """Adds a line.
    618         
    619         The line can be indented with the optional argument 'indent'.
    620         """
    621         
    622         self.add(txt+'\n', indent)
    623     
    624         
    625     def add(self, txt, indent=0):
    626         """Adds some text, no newline will be appended.
    627         
    628         The text can be indented with the optional argument 'indent'.
    629         """
    630         
    631         if isinstance(txt, unicode):
    632             s = []
    633             for c in txt:
    634                 try:
    635                     s.append(str(c))
    636                 except UnicodeEncodeError, e:
    637                     s.append(repr(c))
    638                         
    639             txt = ''.join(s)
    640         
    641         self.text += ' '*indent+txt
    642     
    643         
    644     def add_header(self, txt):
    645         """Adds a header element."""
    646         
    647         self.add_line('-'*10+txt+'-'*10+'\n')
    648         
    649         
    650     def add_newline(self):
    651         """Adds a newline."""
    652         
    653         self.text += '\n'
    654         
    655         
    656     def get_text(self):
    657         """Get the text in its current state."""
    658     
    659         return self.text
    660 
    661 
    662 
    663 class Structure:
    664     """Prepare structure object to extract members from data.
    665     
    666     Format is a list containing definitions for the elements
    667     of the structure.
    668     """
    669     
    670     
    671     def __init__(self, format, name=None, file_offset=None):
    672         # Format is forced little endian, for big endian non Intel platforms
    673         self.__format__ = '<'
    674         self.__keys__ = []
    675 #        self.values = {}
    676         self.__format_length__ = 0
    677         self.__set_format__(format[1])
    678         self._all_zeroes = False
    679         self.__unpacked_data_elms__ = None
    680         self.__file_offset__ = file_offset
    681         if name:
    682             self.name = name
    683         else:
    684             self.name = format[0]
    685                 
    686             
    687     def __get_format__(self):
    688         return self.__format__
    689         
    690         
    691     def get_file_offset(self):
    692         return self.__file_offset__
    693 
    694     def set_file_offset(self, offset):
    695         self.__file_offset__ = offset
    696         
    697     def all_zeroes(self):
    698         """Returns true is the unpacked data is all zeroes."""
    699         
    700         return self._all_zeroes
    701 
    702                 
    703     def __set_format__(self, format):
    704     
    705         for elm in format:
    706             if ',' in elm:
    707                 elm_type, elm_name = elm.split(',', 1)
    708                 self.__format__ += elm_type
    709                 
    710                 elm_names = elm_name.split(',')
    711                 names = []
    712                 for elm_name in elm_names:
    713                     if elm_name in self.__keys__:
    714                         search_list = [x[:len(elm_name)] for x in self.__keys__]
    715                         occ_count = search_list.count(elm_name)
    716                         elm_name = elm_name+'_'+str(occ_count)
    717                     names.append(elm_name)
    718                 # Some PE header structures have unions on them, so a certain
    719                 # value might have different names, so each key has a list of
    720                 # all the possible members referring to the data.
    721                 self.__keys__.append(names)
    722                     
    723         self.__format_length__ = struct.calcsize(self.__format__)
    724         
    725         
    726     def sizeof(self):
    727         """Return size of the structure."""
    728     
    729         return self.__format_length__
    730         
    731         
    732     def __unpack__(self, data):
    733     
    734         if len(data)>self.__format_length__:
    735             data = data[:self.__format_length__]
    736             
    737         # OC Patch:
    738         # Some malware have incorrect header lengths.
    739         # Fail gracefully if this occurs
    740         # Buggy malware: a29b0118af8b7408444df81701ad5a7f
    741         #
    742         elif len(data)<self.__format_length__:
    743             raise PEFormatError('Data length less than expected header length.')
    744 
    745             
    746         if data.count(chr(0)) == len(data):
    747             self._all_zeroes = True
    748             
    749         self.__unpacked_data_elms__ = struct.unpack(self.__format__, data)
    750         for i in xrange(len(self.__unpacked_data_elms__)):
    751             for key in self.__keys__[i]:
    752 #                self.values[key] = self.__unpacked_data_elms__[i]
    753                 setattr(self, key, self.__unpacked_data_elms__[i])
    754 
    755 
    756     def __pack__(self):
    757     
    758         new_values = []
    759         
    760         for i in xrange(len(self.__unpacked_data_elms__)):
    761         
    762             for key in self.__keys__[i]:
    763                 new_val = getattr(self, key)
    764                 old_val = self.__unpacked_data_elms__[i]
    765                 
    766                 # In the case of Unions, when the first changed value
    767                 # is picked the loop is exited
    768                 if new_val != old_val:
    769                     break
    770                 
    771             new_values.append(new_val)
    772             
    773         return struct.pack(self.__format__, *new_values)
    774         
    775                 
    776     def __str__(self):
    777         return '\n'.join( self.dump() )
    778 
    779     def __repr__(self):
    780         return '<Structure: %s>' % (' '.join( [' '.join(s.split()) for s in self.dump()] ))
    781         
    782         
    783     def dump(self, indentation=0):
    784         """Returns a string representation of the structure."""
    785     
    786         dump = []
    787         
    788         dump.append('[%s]' % self.name)
    789 
    790         # Refer to the __set_format__ method for an explanation
    791         # of the following construct.
    792         for keys in self.__keys__:
    793             for key in keys:
    794 
    795                 val = getattr(self, key)
    796                 if isinstance(val, int) or isinstance(val, long):
    797                     val_str = '0x%-8X' % (val)
    798                     if key == 'TimeDateStamp' or key == 'dwTimeStamp':
    799                         try:
    800                             val_str += ' [%s UTC]' % time.asctime(time.gmtime(val))
    801                         except exceptions.ValueError, e:
    802                             val_str += ' [INVALID TIME]'
    803                 else:
    804                     val_str = ''.join(filter(lambda c:c != '\0', str(val)))
    805 
    806                 dump.append('%-30s %s' % (key+':', val_str))
    807 
    808         return dump
    809 
    810 
    811 
    812 class SectionStructure(Structure):
    813     """Convenience section handling class."""
    814 
    815     def get_data(self, start, length=None):
    816         """Get data chunk from a section.
    817         
    818         Allows to query data from the section by passing the
    819         addresses where the PE file would be loaded by default.
    820         It is then possible to retrieve code and data by its real
    821         addresses as it would be if loaded.
    822         """
    823 
    824         offset = start - self.VirtualAddress
    825 
    826         if length:
    827             end = offset+length
    828         else:
    829             end = len(self.data)
    830             
    831         return self.data[offset:end]
    832 
    833 
    834     def get_rva_from_offset(self, offset):
    835         return offset - self.PointerToRawData + self.VirtualAddress
    836 
    837 
    838     def get_offset_from_rva(self, rva):
    839         return (rva - self.VirtualAddress) + self.PointerToRawData
    840 
    841 
    842     def contains_offset(self, offset):
    843         """Check whether the section contains the file offset provided."""
    844 
    845         if not self.PointerToRawData:
    846            # bss and other sections containing only uninitialized data must have 0
    847            # and do not take space in the file
    848            return False
    849         return self.PointerToRawData <= offset < self.VirtualAddress + self.SizeOfRawData
    850 
    851 
    852     def contains_rva(self, rva):
    853         """Check whether the section contains the address provided."""
    854 
    855         # PECOFF documentation v8 says:
    856         # The total size of the section when loaded into memory.
    857         # If this value is greater than SizeOfRawData, the section is zero-padded.
    858         # This field is valid only for executable images and should be set to zero
    859         # for object files.
    860 
    861         if len(self.data) < self.SizeOfRawData:
    862             size = self.Misc_VirtualSize
    863         else:
    864             size = max(self.SizeOfRawData, self.Misc_VirtualSize)
    865             
    866         return self.VirtualAddress <= rva < self.VirtualAddress + size
    867 
    868     def contains(self, rva):
    869         #print "DEPRECATION WARNING: you should use contains_rva() instead of contains()"
    870         return self.contains_rva(rva)
    871 
    872 
    873     def set_data(self, data):
    874         """Set the data belonging to the section."""
    875         
    876         self.data = data
    877         
    878         
    879     def get_entropy(self):
    880         """Calculate and return the entropy for the section."""
    881         
    882         return self.entropy_H( self.data )
    883         
    884 
    885     def get_hash_sha1(self):
    886         """Get the SHA-1 hex-digest of the section's data."""
    887         
    888         if sha1 is not None:
    889             return sha1( self.data ).hexdigest()
    890     
    891 
    892     def get_hash_sha256(self):
    893         """Get the SHA-256 hex-digest of the section's data."""
    894         
    895         if sha256 is not None:
    896             return sha256( self.data ).hexdigest()
    897     
    898 
    899     def get_hash_sha512(self):
    900         """Get the SHA-512 hex-digest of the section's data."""
    901         
    902         if sha512 is not None:
    903             return sha512( self.data ).hexdigest()
    904     
    905 
    906     def get_hash_md5(self):
    907         """Get the MD5 hex-digest of the section's data."""
    908         
    909         if md5 is not None:
    910             return md5( self.data ).hexdigest()
    911     
    912 
    913     def entropy_H(self, data):
    914         """Calculate the entropy of a chunk of data."""
    915 
    916         if len(data) == 0:
    917             return 0.0
    918     
    919         occurences = array.array('L', [0]*256)
    920     
    921         for x in data:
    922             occurences[ord(x)] += 1
    923     
    924         entropy = 0
    925         for x in occurences:
    926             if x:
    927                 p_x = float(x) / len(data)
    928                 entropy -= p_x*math.log(p_x, 2)
    929     
    930         return entropy
    931 
    932 
    933 
    934 class DataContainer:
    935     """Generic data container."""
    936 	
    937     def __init__(self, **args):
    938         for key, value in args.items():
    939             setattr(self, key, value)
    940 
    941 
    942 
    943 class ImportDescData(DataContainer):
    944     """Holds import descriptor information.
    945     
    946     dll:        name of the imported DLL
    947     imports:    list of imported symbols (ImportData instances)
    948     struct:     IMAGE_IMPORT_DESCRIPTOR sctruture
    949     """
    950 
    951 class ImportData(DataContainer):
    952     """Holds imported symbol's information.
    953     
    954     ordinal:    Ordinal of the symbol
    955     name:       Name of the symbol
    956     bound:      If the symbol is bound, this contains
    957                 the address.
    958     """
    959     
    960 class ExportDirData(DataContainer):
    961     """Holds export directory information.
    962                     
    963     struct:     IMAGE_EXPORT_DIRECTORY structure
    964     symbols:    list of exported symbols (ExportData instances)
    965 """
    966     
    967 class ExportData(DataContainer):
    968     """Holds exported symbols' information.
    969     
    970     ordinal:    ordinal of the symbol
    971     address:    address of the symbol
    972     name:       name of the symbol (None if the symbol is
    973                 exported by ordinal only)
    974     forwarder:  if the symbol is forwarded it will
    975                 contain the name of the target symbol,
    976                 None otherwise.
    977     """
    978               
    979 
    980 class ResourceDirData(DataContainer):
    981     """Holds resource directory information.
    982     
    983     struct:     IMAGE_RESOURCE_DIRECTORY structure
    984     entries:    list of entries (ResourceDirEntryData instances)
    985     """
    986     
    987 class ResourceDirEntryData(DataContainer):
    988     """Holds resource directory entry data.
    989     
    990     struct:     IMAGE_RESOURCE_DIRECTORY_ENTRY structure
    991     name:       If the resource is identified by name this
    992                 attribute will contain the name string. None
    993                 otherwise. If identified by id, the id is
    994                 availabe at 'struct.Id'
    995     id:         the id, also in struct.Id
    996     directory:  If this entry has a lower level directory
    997                 this attribute will point to the
    998                 ResourceDirData instance representing it.
    999     data:       If this entry has no futher lower directories
   1000                 and points to the actual resource data, this
   1001                 attribute will reference the corresponding
   1002                 ResourceDataEntryData instance.
   1003     (Either of the 'directory' or 'data' attribute will exist,
   1004     but not both.)
   1005     """
   1006 
   1007 class ResourceDataEntryData(DataContainer):
   1008     """Holds resource data entry information.
   1009     
   1010     struct:     IMAGE_RESOURCE_DATA_ENTRY structure
   1011     lang:       Primary language ID
   1012     sublang:    Sublanguage ID
   1013     """
   1014 
   1015 class DebugData(DataContainer):
   1016     """Holds debug information.
   1017     
   1018     struct:     IMAGE_DEBUG_DIRECTORY structure
   1019     """
   1020 
   1021 class BaseRelocationData(DataContainer):
   1022     """Holds base relocation information.
   1023     
   1024     struct:     IMAGE_BASE_RELOCATION structure
   1025     entries:    list of relocation data (RelocationData instances)
   1026     """
   1027     
   1028 class RelocationData(DataContainer):
   1029     """Holds relocation information.
   1030     
   1031     type:       Type of relocation
   1032                 The type string is can be obtained by
   1033                 RELOCATION_TYPE[type]
   1034     rva:        RVA of the relocation
   1035     """
   1036 
   1037 class TlsData(DataContainer):
   1038     """Holds TLS information.
   1039     
   1040     struct:     IMAGE_TLS_DIRECTORY structure
   1041     """
   1042 
   1043 class BoundImportDescData(DataContainer):
   1044     """Holds bound import descriptor data.
   1045     
   1046     This directory entry will provide with information on the
   1047     DLLs this PE files has been bound to (if bound at all).
   1048     The structure will contain the name and timestamp of the
   1049     DLL at the time of binding so that the loader can know
   1050     whether it differs from the one currently present in the
   1051     system and must, therefore, re-bind the PE's imports.
   1052     
   1053     struct:     IMAGE_BOUND_IMPORT_DESCRIPTOR structure
   1054     name:       DLL name
   1055     entries:    list of entries (BoundImportRefData instances)
   1056                 the entries will exist if this DLL has forwarded
   1057                 symbols. If so, the destination DLL will have an
   1058                 entry in this list.
   1059     """
   1060 
   1061 class BoundImportRefData(DataContainer):
   1062     """Holds bound import forwader reference data.
   1063     
   1064     Contains the same information as the bound descriptor but
   1065     for forwarded DLLs, if any.
   1066     
   1067     struct:     IMAGE_BOUND_FORWARDER_REF structure
   1068     name:       dll name
   1069     """
   1070 
   1071 
   1072 class PE:
   1073     """A Portable Executable representation.
   1074     
   1075     This class provides access to most of the information in a PE file.
   1076     
   1077     It expects to be supplied the name of the file to load or PE data
   1078     to process and an optional argument 'fast_load' (False by default)
   1079     which controls whether to load all the directories information,
   1080     which can be quite time consuming.
   1081     
   1082     pe = pefile.PE('module.dll')
   1083     pe = pefile.PE(name='module.dll')
   1084     
   1085     would load 'module.dll' and process it. If the data would be already
   1086     available in a buffer the same could be achieved with:
   1087     
   1088     pe = pefile.PE(data=module_dll_data)
   1089     
   1090     The "fast_load" can be set to a default by setting its value in the
   1091     module itself by means,for instance, of a "pefile.fast_load = True".
   1092     That will make all the subsequent instances not to load the
   1093     whole PE structure. The "full_load" method can be used to parse
   1094     the missing data at a later stage.
   1095     
   1096     Basic headers information will be available in the attributes:
   1097     
   1098     DOS_HEADER
   1099     NT_HEADERS
   1100     FILE_HEADER
   1101     OPTIONAL_HEADER
   1102     
   1103     All of them will contain among their attrbitues the members of the
   1104     corresponding structures as defined in WINNT.H
   1105     
   1106     The raw data corresponding to the header (from the beginning of the
   1107     file up to the start of the first section) will be avaiable in the
   1108     instance's attribute 'header' as a string.
   1109     
   1110     The sections will be available as a list in the 'sections' attribute.
   1111     Each entry will contain as attributes all the structure's members.
   1112     
   1113     Directory entries will be available as attributes (if they exist):
   1114     (no other entries are processed at this point)
   1115     
   1116     DIRECTORY_ENTRY_IMPORT (list of ImportDescData instances)
   1117     DIRECTORY_ENTRY_EXPORT (ExportDirData instance)
   1118     DIRECTORY_ENTRY_RESOURCE (ResourceDirData instance)
   1119     DIRECTORY_ENTRY_DEBUG (list of DebugData instances)
   1120     DIRECTORY_ENTRY_BASERELOC (list of BaseRelocationData instances)
   1121     DIRECTORY_ENTRY_TLS 
   1122     DIRECTORY_ENTRY_BOUND_IMPORT (list of BoundImportData instances)
   1123     
   1124     The following dictionary attributes provide ways of mapping different
   1125     constants. They will accept the numeric value and return the string
   1126     representation and the opposite, feed in the string and get the
   1127     numeric constant:
   1128     
   1129     DIRECTORY_ENTRY
   1130     IMAGE_CHARACTERISTICS
   1131     SECTION_CHARACTERISTICS
   1132     DEBUG_TYPE
   1133     SUBSYSTEM_TYPE
   1134     MACHINE_TYPE
   1135     RELOCATION_TYPE
   1136     RESOURCE_TYPE
   1137     LANG
   1138     SUBLANG
   1139     """
   1140 
   1141     #
   1142     # Format specifications for PE structures.
   1143     #
   1144     
   1145     __IMAGE_DOS_HEADER_format__ = ('IMAGE_DOS_HEADER',
   1146         ('H,e_magic', 'H,e_cblp', 'H,e_cp',
   1147         'H,e_crlc', 'H,e_cparhdr', 'H,e_minalloc',
   1148         'H,e_maxalloc', 'H,e_ss', 'H,e_sp', 'H,e_csum',
   1149         'H,e_ip', 'H,e_cs', 'H,e_lfarlc', 'H,e_ovno', '8s,e_res',
   1150         'H,e_oemid', 'H,e_oeminfo', '20s,e_res2',
   1151         'L,e_lfanew'))
   1152         
   1153     __IMAGE_FILE_HEADER_format__ = ('IMAGE_FILE_HEADER',
   1154         ('H,Machine', 'H,NumberOfSections',
   1155         'L,TimeDateStamp', 'L,PointerToSymbolTable',
   1156         'L,NumberOfSymbols', 'H,SizeOfOptionalHeader',
   1157         'H,Characteristics'))
   1158         
   1159     __IMAGE_DATA_DIRECTORY_format__ = ('IMAGE_DATA_DIRECTORY',
   1160         ('L,VirtualAddress', 'L,Size'))
   1161     
   1162     
   1163     __IMAGE_OPTIONAL_HEADER_format__ = ('IMAGE_OPTIONAL_HEADER',
   1164         ('H,Magic', 'B,MajorLinkerVersion',
   1165         'B,MinorLinkerVersion', 'L,SizeOfCode',
   1166         'L,SizeOfInitializedData', 'L,SizeOfUninitializedData',
   1167         'L,AddressOfEntryPoint', 'L,BaseOfCode', 'L,BaseOfData',
   1168         'L,ImageBase', 'L,SectionAlignment', 'L,FileAlignment',
   1169         'H,MajorOperatingSystemVersion', 'H,MinorOperatingSystemVersion',
   1170         'H,MajorImageVersion', 'H,MinorImageVersion',
   1171         'H,MajorSubsystemVersion', 'H,MinorSubsystemVersion',
   1172         'L,Reserved1', 'L,SizeOfImage', 'L,SizeOfHeaders',
   1173         'L,CheckSum', 'H,Subsystem', 'H,DllCharacteristics',
   1174         'L,SizeOfStackReserve', 'L,SizeOfStackCommit',
   1175         'L,SizeOfHeapReserve', 'L,SizeOfHeapCommit',
   1176         'L,LoaderFlags', 'L,NumberOfRvaAndSizes' ))
   1177 
   1178 
   1179     __IMAGE_OPTIONAL_HEADER64_format__ = ('IMAGE_OPTIONAL_HEADER64',
   1180         ('H,Magic', 'B,MajorLinkerVersion',
   1181         'B,MinorLinkerVersion', 'L,SizeOfCode',
   1182         'L,SizeOfInitializedData', 'L,SizeOfUninitializedData',
   1183         'L,AddressOfEntryPoint', 'L,BaseOfCode',
   1184         'Q,ImageBase', 'L,SectionAlignment', 'L,FileAlignment',
   1185         'H,MajorOperatingSystemVersion', 'H,MinorOperatingSystemVersion',
   1186         'H,MajorImageVersion', 'H,MinorImageVersion',
   1187         'H,MajorSubsystemVersion', 'H,MinorSubsystemVersion',
   1188         'L,Reserved1', 'L,SizeOfImage', 'L,SizeOfHeaders',
   1189         'L,CheckSum', 'H,Subsystem', 'H,DllCharacteristics',
   1190         'Q,SizeOfStackReserve', 'Q,SizeOfStackCommit',
   1191         'Q,SizeOfHeapReserve', 'Q,SizeOfHeapCommit',
   1192         'L,LoaderFlags', 'L,NumberOfRvaAndSizes' ))
   1193 
   1194         
   1195     __IMAGE_NT_HEADERS_format__ = ('IMAGE_NT_HEADERS', ('L,Signature',))
   1196         
   1197     __IMAGE_SECTION_HEADER_format__ = ('IMAGE_SECTION_HEADER',
   1198         ('8s,Name', 'L,Misc,Misc_PhysicalAddress,Misc_VirtualSize',
   1199         'L,VirtualAddress', 'L,SizeOfRawData', 'L,PointerToRawData',
   1200         'L,PointerToRelocations', 'L,PointerToLinenumbers',
   1201         'H,NumberOfRelocations', 'H,NumberOfLinenumbers',
   1202         'L,Characteristics'))
   1203 
   1204     __IMAGE_DELAY_IMPORT_DESCRIPTOR_format__ = ('IMAGE_DELAY_IMPORT_DESCRIPTOR',
   1205         ('L,grAttrs', 'L,szName', 'L,phmod', 'L,pIAT', 'L,pINT',
   1206         'L,pBoundIAT', 'L,pUnloadIAT', 'L,dwTimeStamp'))
   1207 
   1208     __IMAGE_IMPORT_DESCRIPTOR_format__ =  ('IMAGE_IMPORT_DESCRIPTOR',
   1209         ('L,OriginalFirstThunk,Characteristics',
   1210         'L,TimeDateStamp', 'L,ForwarderChain', 'L,Name', 'L,FirstThunk'))
   1211 
   1212     __IMAGE_EXPORT_DIRECTORY_format__ =  ('IMAGE_EXPORT_DIRECTORY',
   1213         ('L,Characteristics',
   1214         'L,TimeDateStamp', 'H,MajorVersion', 'H,MinorVersion', 'L,Name',
   1215         'L,Base', 'L,NumberOfFunctions', 'L,NumberOfNames',
   1216         'L,AddressOfFunctions', 'L,AddressOfNames', 'L,AddressOfNameOrdinals'))
   1217 
   1218     __IMAGE_RESOURCE_DIRECTORY_format__ = ('IMAGE_RESOURCE_DIRECTORY',
   1219         ('L,Characteristics',
   1220         'L,TimeDateStamp', 'H,MajorVersion', 'H,MinorVersion',
   1221         'H,NumberOfNamedEntries', 'H,NumberOfIdEntries'))
   1222 
   1223     __IMAGE_RESOURCE_DIRECTORY_ENTRY_format__ = ('IMAGE_RESOURCE_DIRECTORY_ENTRY',
   1224         ('L,Name',
   1225         'L,OffsetToData'))
   1226             
   1227     __IMAGE_RESOURCE_DATA_ENTRY_format__ = ('IMAGE_RESOURCE_DATA_ENTRY',
   1228         ('L,OffsetToData', 'L,Size', 'L,CodePage', 'L,Reserved'))
   1229     
   1230     __VS_VERSIONINFO_format__ = ( 'VS_VERSIONINFO',
   1231         ('H,Length', 'H,ValueLength', 'H,Type' ))
   1232     
   1233     __VS_FIXEDFILEINFO_format__ = ( 'VS_FIXEDFILEINFO',
   1234         ('L,Signature', 'L,StrucVersion', 'L,FileVersionMS', 'L,FileVersionLS',
   1235          'L,ProductVersionMS', 'L,ProductVersionLS', 'L,FileFlagsMask', 'L,FileFlags',
   1236          'L,FileOS', 'L,FileType', 'L,FileSubtype', 'L,FileDateMS', 'L,FileDateLS'))
   1237     
   1238     __StringFileInfo_format__ = ( 'StringFileInfo',
   1239         ('H,Length', 'H,ValueLength', 'H,Type' ))
   1240     
   1241     __StringTable_format__ = ( 'StringTable',
   1242         ('H,Length', 'H,ValueLength', 'H,Type' ))
   1243     
   1244     __String_format__ = ( 'String',
   1245         ('H,Length', 'H,ValueLength', 'H,Type' ))
   1246     
   1247     __Var_format__ = ( 'Var', ('H,Length', 'H,ValueLength', 'H,Type' ))
   1248 
   1249     __IMAGE_THUNK_DATA_format__ = ('IMAGE_THUNK_DATA',
   1250         ('L,ForwarderString,Function,Ordinal,AddressOfData',))
   1251 
   1252     __IMAGE_THUNK_DATA64_format__ = ('IMAGE_THUNK_DATA',
   1253         ('Q,ForwarderString,Function,Ordinal,AddressOfData',))
   1254 
   1255     __IMAGE_DEBUG_DIRECTORY_format__ = ('IMAGE_DEBUG_DIRECTORY',
   1256         ('L,Characteristics', 'L,TimeDateStamp', 'H,MajorVersion',
   1257         'H,MinorVersion', 'L,Type', 'L,SizeOfData', 'L,AddressOfRawData',
   1258         'L,PointerToRawData'))
   1259     
   1260     __IMAGE_BASE_RELOCATION_format__ = ('IMAGE_BASE_RELOCATION',
   1261         ('L,VirtualAddress', 'L,SizeOfBlock') )
   1262 
   1263     __IMAGE_TLS_DIRECTORY_format__ = ('IMAGE_TLS_DIRECTORY',
   1264         ('L,StartAddressOfRawData', 'L,EndAddressOfRawData',
   1265         'L,AddressOfIndex', 'L,AddressOfCallBacks',
   1266         'L,SizeOfZeroFill', 'L,Characteristics' ) )
   1267 
   1268     __IMAGE_TLS_DIRECTORY64_format__ = ('IMAGE_TLS_DIRECTORY',
   1269         ('Q,StartAddressOfRawData', 'Q,EndAddressOfRawData',
   1270         'Q,AddressOfIndex', 'Q,AddressOfCallBacks',
   1271         'L,SizeOfZeroFill', 'L,Characteristics' ) )
   1272 
   1273     __IMAGE_BOUND_IMPORT_DESCRIPTOR_format__ = ('IMAGE_BOUND_IMPORT_DESCRIPTOR',
   1274         ('L,TimeDateStamp', 'H,OffsetModuleName', 'H,NumberOfModuleForwarderRefs'))
   1275 
   1276     __IMAGE_BOUND_FORWARDER_REF_format__ = ('IMAGE_BOUND_FORWARDER_REF',
   1277         ('L,TimeDateStamp', 'H,OffsetModuleName', 'H,Reserved') )
   1278 
   1279 
   1280     def __init__(self, name=None, data=None, fast_load=None):
   1281     
   1282         self.sections = []
   1283         
   1284         self.__warnings = []
   1285         
   1286         self.PE_TYPE = None
   1287         
   1288         if  not name and not data:
   1289             return
   1290             
   1291         # This list will keep track of all the structures created.
   1292         # That will allow for an easy iteration through the list
   1293         # in order to save the modifications made
   1294         self.__structures__ = []
   1295 
   1296         if not fast_load:
   1297             fast_load = globals()['fast_load']
   1298         self.__parse__(name, data, fast_load)
   1299                     
   1300         
   1301     
   1302     def __unpack_data__(self, format, data, file_offset):
   1303         """Apply structure format to raw data.
   1304         
   1305         Returns and unpacked structure object if successful, None otherwise.
   1306         """
   1307     
   1308         structure = Structure(format, file_offset=file_offset)
   1309         #if len(data) < structure.sizeof():
   1310         #    return None
   1311     
   1312         try:
   1313             structure.__unpack__(data)
   1314         except PEFormatError, err:
   1315             self.__warnings.append(
   1316                 'Corrupt header "%s" at file offset %d. Exception: %s' % (
   1317                     format[0], file_offset, str(err))  )
   1318             return None
   1319 
   1320         self.__structures__.append(structure)
   1321     
   1322         return structure
   1323         
   1324 
   1325         
   1326     def __parse__(self, fname, data, fast_load):
   1327         """Parse a Portable Executable file.
   1328         
   1329         Loads a PE file, parsing all its structures and making them available
   1330         through the instance's attributes.
   1331         """
   1332         
   1333         if fname:
   1334             fd = file(fname, 'rb')
   1335             self.__data__ = fd.read()
   1336             fd.close()
   1337         elif data:
   1338             self.__data__ = data
   1339         
   1340 
   1341         self.DOS_HEADER = self.__unpack_data__(
   1342             self.__IMAGE_DOS_HEADER_format__,
   1343             self.__data__, file_offset=0)
   1344             
   1345         if not self.DOS_HEADER or self.DOS_HEADER.e_magic != IMAGE_DOS_SIGNATURE:
   1346             raise PEFormatError('DOS Header magic not found.')
   1347 
   1348         # OC Patch:
   1349         # Check for sane value in e_lfanew
   1350         #                
   1351         if self.DOS_HEADER.e_lfanew > len(self.__data__):
   1352             raise PEFormatError('Invalid e_lfanew value, probably not a PE file')
   1353 
   1354         nt_headers_offset = self.DOS_HEADER.e_lfanew
   1355 
   1356         self.NT_HEADERS = self.__unpack_data__(
   1357             self.__IMAGE_NT_HEADERS_format__,
   1358             self.__data__[nt_headers_offset:],
   1359             file_offset = nt_headers_offset)
   1360 
   1361         # We better check the signature right here, before the file screws
   1362         # around with sections:
   1363         # OC Patch:
   1364         # Some malware will cause the Signature value to not exist at all
   1365         if not self.NT_HEADERS or not self.NT_HEADERS.Signature:
   1366             raise PEFormatError('NT Headers not found.')
   1367 
   1368         if self.NT_HEADERS.Signature != IMAGE_NT_SIGNATURE:
   1369             raise PEFormatError('Invalid NT Headers signature.')
   1370                 
   1371         self.FILE_HEADER = self.__unpack_data__(
   1372             self.__IMAGE_FILE_HEADER_format__,
   1373             self.__data__[nt_headers_offset+4:],
   1374             file_offset = nt_headers_offset+4)
   1375         image_flags = self.retrieve_flags(IMAGE_CHARACTERISTICS, 'IMAGE_FILE_')
   1376         
   1377         if not self.FILE_HEADER:
   1378             raise PEFormatError('File Header missing')
   1379 
   1380         # Set the image's flags according the the Characteristics member
   1381         self.set_flags(self.FILE_HEADER, self.FILE_HEADER.Characteristics, image_flags)
   1382         
   1383         optional_header_offset =    \
   1384             nt_headers_offset+4+self.FILE_HEADER.sizeof()
   1385 
   1386         # Note: location of sections can be controlled from PE header:
   1387         sections_offset = optional_header_offset + self.FILE_HEADER.SizeOfOptionalHeader
   1388 
   1389         self.OPTIONAL_HEADER = self.__unpack_data__(
   1390             self.__IMAGE_OPTIONAL_HEADER_format__,
   1391             self.__data__[optional_header_offset:],
   1392             file_offset = optional_header_offset)
   1393 
   1394         # According to solardesigner's findings for his
   1395         # Tiny PE project, the optional header does not
   1396         # need fields beyond "Subsystem" in order to be
   1397         # loadable by the Windows loader (given that zeroes
   1398         # are acceptable values and the header is loaded
   1399         # in a zeroed memory page)
   1400         # If trying to parse a full Optional Header fails
   1401         # we try to parse it again with some 0 padding
   1402         #
   1403         MINIMUM_VALID_OPTIONAL_HEADER_RAW_SIZE = 69
   1404         
   1405         if ( self.OPTIONAL_HEADER is None and 
   1406             len(self.__data__[optional_header_offset:])
   1407                 >= MINIMUM_VALID_OPTIONAL_HEADER_RAW_SIZE ):
   1408         
   1409             # Add enough zeroes to make up for the unused fields
   1410             #
   1411             padding_length = 128
   1412             
   1413             # Create padding
   1414             #
   1415             padded_data = self.__data__[optional_header_offset:] + (
   1416                 '\0' * padding_length)
   1417             
   1418             self.OPTIONAL_HEADER = self.__unpack_data__(
   1419                 self.__IMAGE_OPTIONAL_HEADER_format__,
   1420                 padded_data,
   1421                 file_offset = optional_header_offset)
   1422          
   1423             
   1424         # Check the Magic in the OPTIONAL_HEADER and set the PE file
   1425         # type accordingly
   1426         #
   1427         if self.OPTIONAL_HEADER is not None:
   1428         
   1429             if self.OPTIONAL_HEADER.Magic == OPTIONAL_HEADER_MAGIC_PE:
   1430             
   1431                 self.PE_TYPE = OPTIONAL_HEADER_MAGIC_PE
   1432                 
   1433             elif self.OPTIONAL_HEADER.Magic == OPTIONAL_HEADER_MAGIC_PE_PLUS:
   1434     
   1435                 self.PE_TYPE = OPTIONAL_HEADER_MAGIC_PE_PLUS
   1436             
   1437                 self.OPTIONAL_HEADER = self.__unpack_data__(
   1438                     self.__IMAGE_OPTIONAL_HEADER64_format__,
   1439                     self.__data__[optional_header_offset:],
   1440                     file_offset = optional_header_offset)
   1441 
   1442                 # Again, as explained above, we try to parse
   1443                 # a reduced form of the Optional Header which
   1444                 # is still valid despite not including all
   1445                 # structure members
   1446                 #
   1447                 MINIMUM_VALID_OPTIONAL_HEADER_RAW_SIZE = 69+4
   1448 
   1449                 if ( self.OPTIONAL_HEADER is None and 
   1450                     len(self.__data__[optional_header_offset:])
   1451                         >= MINIMUM_VALID_OPTIONAL_HEADER_RAW_SIZE ):
   1452                 
   1453                     padding_length = 128
   1454                     padded_data = self.__data__[optional_header_offset:] + (
   1455                         '\0' * padding_length)
   1456                     self.OPTIONAL_HEADER = self.__unpack_data__(
   1457                         self.__IMAGE_OPTIONAL_HEADER64_format__,
   1458                         padded_data,
   1459                         file_offset = optional_header_offset)
   1460         
   1461         
   1462         if not self.FILE_HEADER:
   1463             raise PEFormatError('File Header missing')
   1464 
   1465 
   1466         # OC Patch:
   1467         # Die gracefully if there is no OPTIONAL_HEADER field
   1468         # 975440f5ad5e2e4a92c4d9a5f22f75c1
   1469         if self.PE_TYPE is None or self.OPTIONAL_HEADER is None:
   1470             raise PEFormatError("No Optional Header found, invalid PE32 or PE32+ file")
   1471             
   1472         dll_characteristics_flags = self.retrieve_flags(DLL_CHARACTERISTICS, 'IMAGE_DLL_CHARACTERISTICS_')
   1473 
   1474         # Set the Dll Characteristics flags according the the DllCharacteristics member
   1475         self.set_flags(
   1476             self.OPTIONAL_HEADER,
   1477             self.OPTIONAL_HEADER.DllCharacteristics,
   1478             dll_characteristics_flags)
   1479 
   1480 
   1481         self.OPTIONAL_HEADER.DATA_DIRECTORY = []
   1482         #offset = (optional_header_offset + self.FILE_HEADER.SizeOfOptionalHeader)
   1483         offset = (optional_header_offset + self.OPTIONAL_HEADER.sizeof())
   1484             
   1485         
   1486         self.NT_HEADERS.FILE_HEADER = self.FILE_HEADER
   1487         self.NT_HEADERS.OPTIONAL_HEADER = self.OPTIONAL_HEADER
   1488             
   1489 
   1490         # The NumberOfRvaAndSizes is sanitized to stay within 
   1491         # reasonable limits so can be casted to an int
   1492         #
   1493         if self.OPTIONAL_HEADER.NumberOfRvaAndSizes > 0x10:
   1494             self.__warnings.append(
   1495                 'Suspicious NumberOfRvaAndSizes in the Optional Header. ' +
   1496                 'Normal values are never larger than 0x10, the value is: 0x%x' %
   1497                 self.OPTIONAL_HEADER.NumberOfRvaAndSizes )
   1498                 
   1499         for i in xrange(int(0x7fffffffL & self.OPTIONAL_HEADER.NumberOfRvaAndSizes)):
   1500 
   1501             if len(self.__data__[offset:]) == 0:
   1502                 break
   1503                         
   1504             if len(self.__data__[offset:]) < 8:
   1505                 data = self.__data__[offset:]+'\0'*8
   1506             else:
   1507                 data = self.__data__[offset:]
   1508 
   1509             dir_entry = self.__unpack_data__(
   1510                 self.__IMAGE_DATA_DIRECTORY_format__,
   1511                 data,
   1512                 file_offset = offset)
   1513                 
   1514             if dir_entry is None:
   1515                 break
   1516 
   1517             # Would fail if missing an entry
   1518             # 1d4937b2fa4d84ad1bce0309857e70ca offending sample
   1519             try:
   1520                 dir_entry.name = DIRECTORY_ENTRY[i]
   1521             except (KeyError, AttributeError):
   1522                 break
   1523 
   1524             offset += dir_entry.sizeof()
   1525             
   1526             self.OPTIONAL_HEADER.DATA_DIRECTORY.append(dir_entry)
   1527 
   1528             # If the offset goes outside the optional header,
   1529             # the loop is broken, regardless of how many directories
   1530             # NumberOfRvaAndSizes says there are
   1531             #
   1532             # We assume a normally sized optional header, hence that we do
   1533             # a sizeof() instead of reading SizeOfOptionalHeader.
   1534             # Then we add a default number of drectories times their size,
   1535             # if we go beyond that, we assume the number of directories
   1536             # is wrong and stop processing
   1537             if offset >= (optional_header_offset + 
   1538                 self.OPTIONAL_HEADER.sizeof() + 8*16) :
   1539                 
   1540                 break
   1541                 
   1542                         
   1543         offset = self.parse_sections(sections_offset)
   1544         
   1545         # OC Patch:
   1546         # There could be a problem if there are no raw data sections
   1547         # greater than 0
   1548         # fc91013eb72529da005110a3403541b6 example
   1549         # Should this throw an exception in the minimum header offset
   1550         # can't be found?
   1551         #
   1552         rawDataPointers = [
   1553             s.PointerToRawData for s in self.sections if s.PointerToRawData>0]
   1554             
   1555         if len(rawDataPointers) > 0:
   1556             lowest_section_offset = min(rawDataPointers)
   1557         else:
   1558             lowest_section_offset = None
   1559 
   1560         if not lowest_section_offset or lowest_section_offset<offset:
   1561             self.header = self.__data__[:offset]
   1562         else:
   1563             self.header = self.__data__[:lowest_section_offset]
   1564         
   1565 
   1566         # Check whether the entry point lies within a section
   1567         #
   1568         if self.get_section_by_rva(self.OPTIONAL_HEADER.AddressOfEntryPoint) is not None:
   1569         
   1570             # Check whether the entry point lies within the file
   1571             #
   1572             ep_offset = self.get_offset_from_rva(self.OPTIONAL_HEADER.AddressOfEntryPoint)
   1573             if ep_offset > len(self.__data__):
   1574             
   1575                 self.__warnings.append(
   1576                     'Possibly corrupt file. AddressOfEntryPoint lies outside the file. ' +
   1577                     'AddressOfEntryPoint: 0x%x' %
   1578                     self.OPTIONAL_HEADER.AddressOfEntryPoint )
   1579             
   1580         else:
   1581 
   1582             self.__warnings.append(
   1583                 'AddressOfEntryPoint lies outside the sections\' boundaries. ' +
   1584                 'AddressOfEntryPoint: 0x%x' %
   1585                 self.OPTIONAL_HEADER.AddressOfEntryPoint )
   1586                 
   1587         
   1588         if not fast_load:
   1589             self.parse_data_directories()
   1590 
   1591 
   1592     def get_warnings(self):
   1593         """Return the list of warnings.
   1594         
   1595         Non-critical problems found when parsing the PE file are
   1596         appended to a list of warnings. This method returns the
   1597         full list.
   1598         """
   1599     
   1600         return self.__warnings
   1601         
   1602         
   1603     def show_warnings(self):
   1604         """Print the list of warnings.
   1605         
   1606         Non-critical problems found when parsing the PE file are
   1607         appended to a list of warnings. This method prints the
   1608         full list to standard output.
   1609         """
   1610     
   1611         for warning in self.__warnings:
   1612             print '>', warning
   1613 
   1614 
   1615     def full_load(self):
   1616         """Process the data directories.
   1617         
   1618         This mathod will load the data directories which might not have
   1619         been loaded if the "fast_load" option was used.
   1620         """
   1621         
   1622         self.parse_data_directories()
   1623         
   1624         
   1625     def write(self, filename=None):
   1626         """Write the PE file.
   1627         
   1628         This function will process all headers and components
   1629         of the PE file and include all changes made (by just
   1630         assigning to attributes in the PE objects) and write
   1631         the changes back to a file whose name is provided as
   1632         an argument. The filename is optional.
   1633         The data to be written to the file will be returned
   1634         as a 'str' object.
   1635         """
   1636     
   1637         file_data = list(self.__data__)
   1638         for struct in self.__structures__:
   1639         
   1640             struct_data = list(struct.__pack__())
   1641             offset = struct.get_file_offset()
   1642             
   1643             file_data[offset:offset+len(struct_data)] = struct_data
   1644             
   1645         if hasattr(self, 'VS_VERSIONINFO'):
   1646             if hasattr(self, 'FileInfo'):
   1647                 for entry in self.FileInfo:
   1648                     if hasattr(entry, 'StringTable'):
   1649                         for st_entry in entry.StringTable:
   1650                             for key, entry in st_entry.entries.items():
   1651                             
   1652                                 offsets = st_entry.entries_offsets[key]
   1653                                 lengths = st_entry.entries_lengths[key]
   1654                                 
   1655                                 if len( entry ) > lengths[1]:
   1656 
   1657                                     uc = zip(
   1658                                             list(entry[:lengths[1]]), ['\0'] * lengths[1] )
   1659                                     l = list()
   1660                                     map(l.extend, uc)
   1661 
   1662                                     file_data[ 
   1663                                         offsets[1] : offsets[1] + lengths[1]*2 ] = l
   1664                                         
   1665                                 else:
   1666                                     
   1667                                     uc = zip(
   1668                                             list(entry), ['\0'] * len(entry) )
   1669                                     l = list()
   1670                                     map(l.extend, uc)
   1671 
   1672                                     file_data[ 
   1673                                         offsets[1] : offsets[1] + len(entry)*2 ] = l
   1674 
   1675                                     remainder = lengths[1] - len(entry)
   1676                                     file_data[ 
   1677                                         offsets[1] + len(entry)*2 : 
   1678                                         offsets[1] + lengths[1]*2 ] = [
   1679                                             u'\0' ] * remainder*2
   1680 
   1681         new_file_data = ''.join( [ chr(ord(c)) for c in file_data ] )
   1682 
   1683         if filename:
   1684             f = file(filename, 'wb+')
   1685             f.write(new_file_data)
   1686             f.close()
   1687 
   1688         return new_file_data
   1689         
   1690 
   1691                 
   1692     def parse_sections(self, offset):
   1693         """Fetch the PE file sections.
   1694         
   1695         The sections will be readily available in the "sections" attribute.
   1696         Its attributes will contain all the section information plus "data"
   1697         a buffer containing the section's data.
   1698         
   1699         The "Characteristics" member will be processed and attributes 
   1700         representing the section characteristics (with the 'IMAGE_SCN_'
   1701         string trimmed from the constant's names) will be added to the
   1702         section instance.
   1703         
   1704         Refer to the SectionStructure class for additional info.
   1705         """
   1706         
   1707         self.sections = []
   1708         
   1709         for i in xrange(self.FILE_HEADER.NumberOfSections):
   1710             section = SectionStructure(self.__IMAGE_SECTION_HEADER_format__)
   1711             if not section:
   1712                 break
   1713             section_offset = offset + section.sizeof() * i
   1714             section.set_file_offset(section_offset)
   1715             section.__unpack__(self.__data__[section_offset:])
   1716             self.__structures__.append(section)
   1717                         
   1718             if section.SizeOfRawData > len(self.__data__):
   1719                 self.__warnings.append(
   1720                     ('Error parsing section %d. ' % i) +
   1721                     'SizeOfRawData is larger than file.')
   1722 
   1723             if section.PointerToRawData > len(self.__data__):
   1724                 self.__warnings.append(
   1725                     ('Error parsing section %d. ' % i) +
   1726                     'PointerToRawData points beyond the end of the file.')
   1727 
   1728             if section.Misc_VirtualSize > 0x10000000:
   1729                 self.__warnings.append(
   1730                     ('Suspicious value found parsing section %d. ' % i) +
   1731                     'VirtualSize is extremely large > 256MiB.')
   1732 
   1733             if section.VirtualAddress > 0x10000000:
   1734                 self.__warnings.append(
   1735                     ('Suspicious value found parsing section %d. ' % i) +
   1736                     'VirtualAddress is beyond 0x10000000.')
   1737 
   1738             #
   1739             # Some packer used a non-aligned PointerToRawData in the sections,
   1740             # which causes several common tools not to load the section data
   1741             # properly as they blindly read from the indicated offset.
   1742             # It seems that Windows will round the offset down to the largest
   1743             # offset multiple of FileAlignment which is smaller than
   1744             # PointerToRawData. The following code will do the same.
   1745             #
   1746             
   1747             #alignment = self.OPTIONAL_HEADER.FileAlignment
   1748             section_data_start = section.PointerToRawData
   1749             
   1750             if ( self.OPTIONAL_HEADER.FileAlignment != 0 and 
   1751                 (section.PointerToRawData % self.OPTIONAL_HEADER.FileAlignment) != 0):
   1752                 self.__warnings.append(
   1753                     ('Error parsing section %d. ' % i) +
   1754                     'Suspicious value for FileAlignment in the Optional Header. ' +
   1755                     'Normally the PointerToRawData entry of the sections\' structures ' +
   1756                     'is a multiple of FileAlignment, this might imply the file ' +
   1757                     'is trying to confuse tools which parse this incorrectly')
   1758             
   1759             section_data_end = section_data_start+section.SizeOfRawData
   1760             section.set_data(self.__data__[section_data_start:section_data_end])
   1761             
   1762             section_flags = self.retrieve_flags(SECTION_CHARACTERISTICS, 'IMAGE_SCN_')
   1763             
   1764             # Set the section's flags according the the Characteristics member
   1765             self.set_flags(section, section.Characteristics, section_flags)
   1766 
   1767             if ( section.__dict__.get('IMAGE_SCN_MEM_WRITE', False)  and 
   1768                 section.__dict__.get('IMAGE_SCN_MEM_EXECUTE', False) ):
   1769                 
   1770                 self.__warnings.append(
   1771                     ('Suspicious flags set for section %d. ' % i) +
   1772                     'Both IMAGE_SCN_MEM_WRITE and IMAGE_SCN_MEM_EXECUTE are set.' +
   1773                     'This might indicate a packed executable.')
   1774 
   1775             self.sections.append(section)
   1776             
   1777         if self.FILE_HEADER.NumberOfSections > 0 and self.sections:
   1778             return offset + self.sections[0].sizeof()*self.FILE_HEADER.NumberOfSections
   1779         else:
   1780             return offset
   1781 
   1782         
   1783     def retrieve_flags(self, flag_dict, flag_filter):
   1784         """Read the flags from a dictionary and return them in a usable form.
   1785         
   1786         Will return a list of (flag, value) for all flags in "flag_dict"
   1787         matching the filter "flag_filter".
   1788         """
   1789         
   1790         return [(f[0], f[1]) for f in flag_dict.items() if
   1791                 isinstance(f[0], str) and f[0].startswith(flag_filter)]
   1792 
   1793                 
   1794     def set_flags(self, obj, flag_field, flags):
   1795         """Will process the flags and set attributes in the object accordingly.
   1796         
   1797         The object "obj" will gain attritutes named after the flags provided in
   1798         "flags" and valued True/False, matching the results of applyin each
   1799         flag value from "flags" to flag_field.
   1800         """
   1801     
   1802         for flag in flags:
   1803             if flag[1] & flag_field:
   1804                 setattr(obj, flag[0], True)
   1805             else:
   1806                 setattr(obj, flag[0], False)
   1807     
   1808     
   1809             
   1810     def parse_data_directories(self):
   1811         """Parse and process the PE file's data directories."""
   1812         
   1813         directory_parsing = (
   1814             ('IMAGE_DIRECTORY_ENTRY_IMPORT', self.parse_import_directory),
   1815             ('IMAGE_DIRECTORY_ENTRY_EXPORT', self.parse_export_directory),
   1816             ('IMAGE_DIRECTORY_ENTRY_RESOURCE', self.parse_resources_directory),
   1817             ('IMAGE_DIRECTORY_ENTRY_DEBUG', self.parse_debug_directory),
   1818             ('IMAGE_DIRECTORY_ENTRY_BASERELOC', self.parse_relocations_directory),
   1819             ('IMAGE_DIRECTORY_ENTRY_TLS', self.parse_directory_tls),
   1820             ('IMAGE_DIRECTORY_ENTRY_DELAY_IMPORT', self.parse_delay_import_directory),
   1821             ('IMAGE_DIRECTORY_ENTRY_BOUND_IMPORT', self.parse_directory_bound_imports) )
   1822             
   1823         for entry in directory_parsing:
   1824             # OC Patch:
   1825             #
   1826             try:
   1827                 dir_entry = self.OPTIONAL_HEADER.DATA_DIRECTORY[
   1828                     DIRECTORY_ENTRY[entry[0]]]
   1829             except IndexError:
   1830                 break
   1831             if dir_entry.VirtualAddress:
   1832                 value = entry[1](dir_entry.VirtualAddress, dir_entry.Size)
   1833                 if value:
   1834                     setattr(self, entry[0][6:], value)
   1835         
   1836         
   1837     def parse_directory_bound_imports(self, rva, size):
   1838         """"""
   1839         
   1840         bnd_descr = Structure(self.__IMAGE_BOUND_IMPORT_DESCRIPTOR_format__)
   1841         bnd_descr_size = bnd_descr.sizeof()
   1842         start = rva
   1843         
   1844         bound_imports = []
   1845         while True:
   1846 
   1847             bnd_descr = self.__unpack_data__(
   1848                 self.__IMAGE_BOUND_IMPORT_DESCRIPTOR_format__,
   1849                    self.__data__[rva:rva+bnd_descr_size],
   1850                    file_offset = rva)
   1851             if bnd_descr is None:
   1852                 # If can't parse directory then silently return.
   1853                 # This directory does not necesarily have to be valid to
   1854                 # still have a valid PE file
   1855 
   1856                 self.__warnings.append(
   1857                     'The Bound Imports directory exists but can\'t be parsed.')
   1858 
   1859                 return
   1860                    
   1861             if bnd_descr.all_zeroes():
   1862                 break
   1863                 
   1864             rva += bnd_descr.sizeof()
   1865             
   1866             forwarder_refs = []
   1867             for idx in xrange(bnd_descr.NumberOfModuleForwarderRefs):
   1868                 # Both structures IMAGE_BOUND_IMPORT_DESCRIPTOR and
   1869                 # IMAGE_BOUND_FORWARDER_REF have the same size.
   1870                 bnd_frwd_ref = self.__unpack_data__(
   1871                     self.__IMAGE_BOUND_FORWARDER_REF_format__,
   1872                     self.__data__[rva:rva+bnd_descr_size],
   1873                     file_offset = rva)
   1874                 # OC Patch:
   1875                 if not bnd_frwd_ref:
   1876                     raise PEFormatError(
   1877                         "IMAGE_BOUND_FORWARDER_REF cannot be read")
   1878                 rva += bnd_frwd_ref.sizeof()
   1879                 
   1880                 name_str =  self.get_string_from_data(
   1881                     start+bnd_frwd_ref.OffsetModuleName, self.__data__)
   1882                     
   1883                 if not name_str:
   1884                     break
   1885                 forwarder_refs.append(BoundImportRefData(
   1886                     struct = bnd_frwd_ref,
   1887                     name = name_str))
   1888                 
   1889             name_str = self.get_string_from_data(
   1890                 start+bnd_descr.OffsetModuleName, self.__data__)
   1891                 
   1892             if not name_str:
   1893                 break
   1894             bound_imports.append(
   1895                 BoundImportDescData(
   1896                     struct = bnd_descr,
   1897                     name = name_str,
   1898                     entries = forwarder_refs))
   1899                     
   1900         return bound_imports
   1901 
   1902         
   1903     def parse_directory_tls(self, rva, size):
   1904         """"""
   1905             
   1906         if self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE:
   1907             format = self.__IMAGE_TLS_DIRECTORY_format__
   1908             
   1909         elif self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE_PLUS:
   1910             format = self.__IMAGE_TLS_DIRECTORY64_format__
   1911 
   1912         tls_struct = self.__unpack_data__(
   1913             format,
   1914             self.get_data(rva),
   1915             file_offset = self.get_offset_from_rva(rva))
   1916             
   1917         if not tls_struct:
   1918             return None
   1919                 
   1920         return TlsData( struct = tls_struct )
   1921     
   1922     
   1923     def parse_relocations_directory(self, rva, size):
   1924         """"""
   1925         
   1926         rlc = Structure(self.__IMAGE_BASE_RELOCATION_format__)
   1927         rlc_size = rlc.sizeof()
   1928         end = rva+size
   1929         
   1930         relocations = []
   1931         while rva<end:
   1932             
   1933             # OC Patch:
   1934             # Malware that has bad rva entries will cause an error.
   1935             # Just continue on after an exception
   1936             #
   1937             try:
   1938                 rlc = self.__unpack_data__(
   1939                     self.__IMAGE_BASE_RELOCATION_format__,
   1940                     self.get_data(rva, rlc_size),
   1941                     file_offset = self.get_offset_from_rva(rva) )
   1942             except PEFormatError:
   1943                 self.__warnings.append(
   1944                     'Invalid relocation information. Can\'t read ' +
   1945                     'data at RVA: 0x%x' % rva)
   1946                 rlc = None
   1947             
   1948             if not rlc:
   1949                 break
   1950                 
   1951             reloc_entries = self.parse_relocations(
   1952                 rva+rlc_size, rlc.VirtualAddress, rlc.SizeOfBlock-rlc_size)
   1953                 
   1954             relocations.append(
   1955                 BaseRelocationData(
   1956                     struct = rlc,
   1957                     entries = reloc_entries))
   1958             
   1959             if not rlc.SizeOfBlock:
   1960                 break
   1961             rva += rlc.SizeOfBlock
   1962             
   1963         return relocations
   1964     
   1965         
   1966     def parse_relocations(self, data_rva, rva, size):
   1967         """"""
   1968         
   1969         data = self.get_data(data_rva, size)
   1970         
   1971         entries = []
   1972         for idx in xrange(len(data)/2):
   1973             word = struct.unpack('<H', data[idx*2:(idx+1)*2])[0]
   1974             reloc_type = (word>>12)
   1975             reloc_offset = (word&0x0fff)
   1976             entries.append(
   1977                 RelocationData(
   1978                     type = reloc_type,
   1979                     rva = reloc_offset+rva))
   1980             
   1981         return entries
   1982 
   1983         
   1984     def parse_debug_directory(self, rva, size):
   1985         """"""
   1986             
   1987         dbg = Structure(self.__IMAGE_DEBUG_DIRECTORY_format__)
   1988         dbg_size = dbg.sizeof()
   1989         
   1990         debug = []
   1991         for idx in xrange(size/dbg_size):
   1992             try:
   1993                 data = self.get_data(rva+dbg_size*idx, dbg_size)
   1994             except PEFormatError, e:
   1995                 self.__warnings.append(
   1996                     'Invalid debug information. Can\'t read ' +
   1997                     'data at RVA: 0x%x' % rva)
   1998                 return None
   1999                 
   2000             dbg = self.__unpack_data__(
   2001                 self.__IMAGE_DEBUG_DIRECTORY_format__,
   2002                 data, file_offset = self.get_offset_from_rva(rva+dbg_size*idx))
   2003                 
   2004             if not dbg:
   2005                 return None
   2006                 
   2007             debug.append(
   2008                 DebugData(
   2009                     struct = dbg))
   2010             
   2011         return debug
   2012 
   2013                         
   2014     def parse_resources_directory(self, rva, size=0, base_rva = None, level = 0):
   2015         """Parse the resources directory.
   2016         
   2017         Given the rva of the resources directory, it will process all
   2018         its entries.
   2019         
   2020         The root will have the corresponding member of its structure,
   2021         IMAGE_RESOURCE_DIRECTORY plus 'entries', a list of all the
   2022         entries in the directory.
   2023         
   2024         Those entries will have, correspondingly, all the structure's
   2025         members (IMAGE_RESOURCE_DIRECTORY_ENTRY) and an additional one,
   2026         "directory", pointing to the IMAGE_RESOURCE_DIRECTORY structure
   2027         representing upper layers of the tree. This one will also have
   2028         an 'entries' attribute, pointing to the 3rd, and last, level.
   2029         Another directory with more entries. Those last entries will
   2030         have a new atribute (both 'leaf' or 'data_entry' can be used to
   2031         access it). This structure finally points to the resource data.
   2032         All the members of this structure, IMAGE_RESOURCE_DATA_ENTRY,
   2033         are available as its attributes.
   2034         """
   2035         
   2036         # OC Patch:
   2037         original_rva = rva
   2038         
   2039         if base_rva is None:
   2040             base_rva = rva
   2041         
   2042         resources_section = self.get_section_by_rva(rva)
   2043         
   2044         try:
   2045             # If the RVA is invalid all would blow up. Some EXEs seem to be
   2046             # specially nasty and have an invalid RVA.
   2047             data = self.get_data(rva)
   2048         except PEFormatError, e:
   2049             self.__warnings.append(
   2050                 'Invalid resources directory. Can\'t read ' +
   2051                 'directory data at RVA: 0x%x' % rva)
   2052             return None
   2053             
   2054         # Get the resource directory structure, that is, the header
   2055         # of the table preceding the actual entries
   2056         #
   2057         resource_dir = self.__unpack_data__(
   2058             self.__IMAGE_RESOURCE_DIRECTORY_format__, data,
   2059             file_offset = self.get_offset_from_rva(rva) )
   2060         if resource_dir is None:
   2061             # If can't parse resources directory then silently return.
   2062             # This directory does not necesarily have to be valid to
   2063             # still have a valid PE file
   2064             self.__warnings.append(
   2065                 'Invalid resources directory. Can\'t parse ' +
   2066                 'directory data at RVA: 0x%x' % rva)
   2067             return None
   2068         
   2069         dir_entries = []
   2070         
   2071         # Advance the rva to the positon immediately following the directory
   2072         # table header and pointing to the first entry in the table
   2073         #
   2074         rva += resource_dir.sizeof()
   2075         
   2076         number_of_entries = (
   2077             resource_dir.NumberOfNamedEntries +
   2078             resource_dir.NumberOfIdEntries )
   2079             
   2080         strings_to_postprocess = list()
   2081         
   2082         for idx in xrange(number_of_entries):
   2083             
   2084             res = self.parse_resource_entry(rva)
   2085             if res is None:
   2086                 self.__warnings.append(
   2087                     'Error parsing the resources directory, ' +
   2088                     'Entry %d is invalid, RVA = 0x%x. ' % 
   2089                     (idx, rva) )
   2090                 break
   2091             
   2092 
   2093             entry_name = None
   2094             entry_id = None
   2095             
   2096             # If all named entries have been processed, only Id ones
   2097             # remain
   2098             
   2099             if idx >= resource_dir.NumberOfNamedEntries:
   2100                 entry_id = res.Name
   2101             else:
   2102                 ustr_offset = base_rva+res.NameOffset
   2103                 try:
   2104                     #entry_name = self.get_string_u_at_rva(ustr_offset, max_length=16)
   2105                     entry_name = UnicodeStringWrapperPostProcessor(self, ustr_offset)
   2106                     strings_to_postprocess.append(entry_name)
   2107                     
   2108                 except PEFormatError, excp:
   2109                     self.__warnings.append(
   2110                         'Error parsing the resources directory, ' +
   2111                         'attempting to read entry name. ' +
   2112                         'Can\'t read unicode string at offset 0x%x' % 
   2113                         (ustr_offset) )
   2114                 
   2115                 
   2116             if res.DataIsDirectory:
   2117                 # OC Patch:
   2118                 #
   2119                 # One trick malware can do is to recursively reference
   2120                 # the next directory. This causes hilarity to ensue when
   2121                 # trying to parse everything correctly.
   2122                 # If the original RVA given to this function is equal to
   2123                 # the next one to parse, we assume that it's a trick.
   2124                 # Instead of raising a PEFormatError this would skip some
   2125                 # reasonable data so we just break.
   2126                 #
   2127                 # 9ee4d0a0caf095314fd7041a3e4404dc is the offending sample
   2128                 if original_rva == (base_rva + res.OffsetToDirectory):
   2129                     
   2130                     break
   2131                     
   2132                 else:
   2133                     entry_directory = self.parse_resources_directory(
   2134                         base_rva+res.OffsetToDirectory,
   2135                         base_rva=base_rva, level = level+1)
   2136 
   2137                 if not entry_directory:
   2138                     break
   2139                 dir_entries.append(
   2140                     ResourceDirEntryData(
   2141                         struct = res,
   2142                         name = entry_name,
   2143                         id = entry_id,
   2144                         directory = entry_directory))
   2145 
   2146             else:
   2147                 struct = self.parse_resource_data_entry(
   2148                     base_rva + res.OffsetToDirectory)
   2149 
   2150                 if struct:
   2151                     entry_data = ResourceDataEntryData(
   2152                         struct = struct,
   2153                         lang = res.Name & 0xff,
   2154                         sublang = (res.Name>>8) & 0xff)
   2155                         
   2156                     dir_entries.append(
   2157                         ResourceDirEntryData(
   2158                             struct = res,
   2159                             name = entry_name,
   2160                             id = entry_id,
   2161                             data = entry_data))
   2162                     
   2163                 else:
   2164                     break
   2165 
   2166 
   2167                 
   2168             # Check if this entry contains version information
   2169             #
   2170             if level == 0 and res.Id == RESOURCE_TYPE['RT_VERSION']:
   2171                 if len(dir_entries)>0:
   2172                     last_entry = dir_entries[-1]
   2173                     
   2174                 rt_version_struct = None
   2175                 try:
   2176                     rt_version_struct = last_entry.directory.entries[0].directory.entries[0].data.struct
   2177                 except:
   2178                     # Maybe a malformed directory structure...?
   2179                     # Lets ignore it
   2180                     pass
   2181                     
   2182                 if rt_version_struct is not None:
   2183                     self.parse_version_information(rt_version_struct)
   2184     
   2185             rva += res.sizeof()
   2186             
   2187                     
   2188         string_rvas = [s.get_rva() for s in strings_to_postprocess]
   2189         string_rvas.sort()
   2190         
   2191         for idx, s in enumerate(strings_to_postprocess):
   2192             s.render_pascal_16()
   2193             
   2194             
   2195         resource_directory_data = ResourceDirData(
   2196             struct = resource_dir,
   2197             entries = dir_entries)
   2198                         
   2199         return resource_directory_data
   2200         
   2201             
   2202     def parse_resource_data_entry(self, rva):
   2203         """Parse a data entry from the resources directory."""
   2204     
   2205         try:
   2206             # If the RVA is invalid all would blow up. Some EXEs seem to be
   2207             # specially nasty and have an invalid RVA.
   2208             data = self.get_data(rva)
   2209         except PEFormatError, excp:
   2210             self.__warnings.append(
   2211                 'Error parsing a resource directory data entry, ' +
   2212                 'the RVA is invalid: 0x%x' % ( rva ) )
   2213             return None
   2214             
   2215         data_entry = self.__unpack_data__(
   2216             self.__IMAGE_RESOURCE_DATA_ENTRY_format__, data,
   2217             file_offset = self.get_offset_from_rva(rva) )
   2218             
   2219         return data_entry
   2220 
   2221         
   2222     def parse_resource_entry(self, rva):
   2223         """Parse a directory entry from the resources directory."""
   2224 
   2225         resource = self.__unpack_data__(
   2226             self.__IMAGE_RESOURCE_DIRECTORY_ENTRY_format__, self.get_data(rva),
   2227             file_offset = self.get_offset_from_rva(rva) )
   2228             
   2229         if resource is None:
   2230             return None
   2231             
   2232         #resource.NameIsString = (resource.Name & 0x80000000L) >> 31
   2233         resource.NameOffset = resource.Name & 0x7FFFFFFFL
   2234         
   2235         resource.__pad = resource.Name & 0xFFFF0000L
   2236         resource.Id = resource.Name & 0x0000FFFFL
   2237         
   2238         resource.DataIsDirectory = (resource.OffsetToData & 0x80000000L) >> 31
   2239         resource.OffsetToDirectory = resource.OffsetToData & 0x7FFFFFFFL
   2240         
   2241         return resource
   2242             
   2243             
   2244     def parse_version_information(self, version_struct):
   2245         """Parse version information structure.
   2246         
   2247         The date will be made available in three attributes of the PE object.
   2248         
   2249         VS_VERSIONINFO     will contain the first three fields of the main structure:
   2250             'Length', 'ValueLength', and 'Type'
   2251             
   2252         VS_FIXEDFILEINFO    will hold the rest of the fields, accessible as sub-attributes:
   2253             'Signature', 'StrucVersion', 'FileVersionMS', 'FileVersionLS',
   2254             'ProductVersionMS', 'ProductVersionLS', 'FileFlagsMask', 'FileFlags',
   2255             'FileOS', 'FileType', 'FileSubtype', 'FileDateMS', 'FileDateLS'
   2256             
   2257         FileInfo    is a list of all StringFileInfo and VarFileInfo structures.
   2258         
   2259         StringFileInfo structures will have a list as an attribute named 'StringTable'
   2260         containing all the StringTable structures. Each of those structures contains a 
   2261         dictionary 'entries' with all the key/value version information string pairs.
   2262         
   2263         VarFileInfo structures will have a list as an attribute named 'Var' containing
   2264         all Var structures. Each Var structure will have a dictionary as an attribute
   2265         named 'entry' which will contain the name and value of the Var.
   2266         """
   2267     
   2268     
   2269         # Retrieve the data for the version info resource
   2270         #
   2271         start_offset = self.get_offset_from_rva( version_struct.OffsetToData )
   2272         raw_data = self.__data__[ start_offset : start_offset+version_struct.Size ]
   2273         
   2274         
   2275         # Map the main structure and the subsequent string
   2276         #    
   2277         versioninfo_struct = self.__unpack_data__(
   2278             self.__VS_VERSIONINFO_format__, raw_data, 
   2279             file_offset = start_offset )
   2280             
   2281         if versioninfo_struct is None:
   2282             return 
   2283             
   2284         ustr_offset = version_struct.OffsetToData + versioninfo_struct.sizeof()
   2285         try:
   2286             versioninfo_string = self.get_string_u_at_rva( ustr_offset )
   2287         except PEFormatError, excp:
   2288             self.__warnings.append(
   2289                 'Error parsing the version information, ' +
   2290                 'attempting to read VS_VERSION_INFO string. Can\'t ' +
   2291                 'read unicode string at offset 0x%x' % (
   2292                 ustr_offset ) )
   2293                 
   2294             versioninfo_string = None
   2295          
   2296         # If the structure does not contain the expected name, it's assumed to be invalid
   2297         #            
   2298         if versioninfo_string != u'VS_VERSION_INFO':
   2299 
   2300             self.__warnings.append('Invalid VS_VERSION_INFO block')
   2301             return
   2302 
   2303 
   2304         # Set the PE object's VS_VERSIONINFO to this one
   2305         #
   2306         self.VS_VERSIONINFO = versioninfo_struct
   2307 
   2308         # The the Key attribute to point to the unicode string identifying the structure
   2309         #        
   2310         self.VS_VERSIONINFO.Key = versioninfo_string
   2311 
   2312 
   2313         # Process the fixed version information, get the offset and structure
   2314         #
   2315         fixedfileinfo_offset = self.dword_align(
   2316             versioninfo_struct.sizeof() + 2 * (len(versioninfo_string) + 1),
   2317             version_struct.OffsetToData)
   2318         fixedfileinfo_struct = self.__unpack_data__(
   2319             self.__VS_FIXEDFILEINFO_format__,
   2320             raw_data[fixedfileinfo_offset:], 
   2321             file_offset = start_offset+fixedfileinfo_offset )
   2322 
   2323         if not fixedfileinfo_struct:
   2324             return
   2325 
   2326 
   2327         # Set the PE object's VS_FIXEDFILEINFO to this one
   2328         #
   2329         self.VS_FIXEDFILEINFO = fixedfileinfo_struct
   2330         
   2331         
   2332         # Start parsing all the StringFileInfo and VarFileInfo structures
   2333         #
   2334         
   2335         # Get the first one
   2336         #
   2337         stringfileinfo_offset = self.dword_align(
   2338             fixedfileinfo_offset + fixedfileinfo_struct.sizeof(),
   2339             version_struct.OffsetToData)
   2340         original_stringfileinfo_offset = stringfileinfo_offset
   2341         
   2342         
   2343         # Set the PE object's attribute that will contain them all.
   2344         #
   2345         self.FileInfo = list()
   2346 
   2347 
   2348         while True:
   2349         
   2350             # Process the StringFileInfo/VarFileInfo struct
   2351             #
   2352             stringfileinfo_struct = self.__unpack_data__(
   2353                 self.__StringFileInfo_format__, 
   2354                 raw_data[stringfileinfo_offset:], 
   2355                 file_offset = start_offset+stringfileinfo_offset )
   2356                 
   2357             if stringfileinfo_struct is None:
   2358                 self.__warnings.append(
   2359                     'Error parsing StringFileInfo/VarFileInfo struct' )
   2360                 return None
   2361             
   2362             # Get the subsequent string defining the structure.
   2363             #
   2364             ustr_offset = ( version_struct.OffsetToData + 
   2365                 stringfileinfo_offset + versioninfo_struct.sizeof() )
   2366             try:
   2367                 stringfileinfo_string = self.get_string_u_at_rva( ustr_offset )
   2368             except PEFormatError, excp:
   2369                 self.__warnings.append(
   2370                     'Error parsing the version information, ' +
   2371                     'attempting to read StringFileInfo string. Can\'t ' +
   2372                     'read unicode string at offset 0x%x' %  ( ustr_offset ) )
   2373                 break
   2374         
   2375             # Set such string as the Key attribute
   2376             #
   2377             stringfileinfo_struct.Key = stringfileinfo_string
   2378             
   2379             
   2380             # Append the structure to the PE object's list
   2381             #
   2382             self.FileInfo.append(stringfileinfo_struct)
   2383         
   2384         
   2385             # Parse a StringFileInfo entry
   2386             #
   2387             if stringfileinfo_string == u'StringFileInfo':
   2388                 
   2389                 if stringfileinfo_struct.Type == 1 and stringfileinfo_struct.ValueLength == 0:
   2390             
   2391                     stringtable_offset = self.dword_align(
   2392                         stringfileinfo_offset + stringfileinfo_struct.sizeof() + 
   2393                             2*(len(stringfileinfo_string)+1),
   2394                         version_struct.OffsetToData)
   2395                   
   2396                     stringfileinfo_struct.StringTable = list()
   2397 
   2398                     # Process the String Table entries
   2399                     #
   2400                     while True:
   2401                         stringtable_struct = self.__unpack_data__(
   2402                             self.__StringTable_format__,
   2403                             raw_data[stringtable_offset:], 
   2404                             file_offset = start_offset+stringtable_offset )
   2405                             
   2406                         if not stringtable_struct:
   2407                             break
   2408                             
   2409                         ustr_offset = ( version_struct.OffsetToData + stringtable_offset + 
   2410                             stringtable_struct.sizeof() )
   2411                         try:
   2412                             stringtable_string = self.get_string_u_at_rva( ustr_offset )
   2413                         except PEFormatError, excp:
   2414                             self.__warnings.append(
   2415                                 'Error parsing the version information, ' +
   2416                                 'attempting to read StringTable string. Can\'t ' +
   2417                                 'read unicode string at offset 0x%x' % ( ustr_offset ) )
   2418                             break
   2419                         
   2420                         stringtable_struct.LangID = stringtable_string
   2421                         stringtable_struct.entries = dict()
   2422                         stringtable_struct.entries_offsets = dict()
   2423                         stringtable_struct.entries_lengths = dict()
   2424                         stringfileinfo_struct.StringTable.append(stringtable_struct)
   2425             
   2426                         entry_offset = self.dword_align(
   2427                             stringtable_offset + stringtable_struct.sizeof() +
   2428                                 2*(len(stringtable_string)+1),
   2429                             version_struct.OffsetToData)
   2430             
   2431                         # Process all entries in the string table
   2432                         #
   2433             
   2434                         while entry_offset < stringtable_offset + stringtable_struct.Length:
   2435                     
   2436                             string_struct = self.__unpack_data__(
   2437                                 self.__String_format__, raw_data[entry_offset:], 
   2438                                 file_offset = start_offset+entry_offset )
   2439                                 
   2440                             if not string_struct:
   2441                                 break
   2442                                 
   2443                             ustr_offset = ( version_struct.OffsetToData + entry_offset +
   2444                                 string_struct.sizeof() )
   2445                             try:
   2446                                 key = self.get_string_u_at_rva( ustr_offset )
   2447                                 key_offset = self.get_offset_from_rva( ustr_offset )
   2448                             except PEFormatError, excp:
   2449                                 self.__warnings.append(
   2450                                     'Error parsing the version information, ' +
   2451                                     'attempting to read StringTable Key string. Can\'t ' +
   2452                                     'read unicode string at offset 0x%x' % ( ustr_offset ) )
   2453                                 break
   2454                                 
   2455                             value_offset = self.dword_align(
   2456                                 2*(len(key)+1) + entry_offset + string_struct.sizeof(),
   2457                                 version_struct.OffsetToData)
   2458             
   2459                             ustr_offset = version_struct.OffsetToData + value_offset
   2460                             try:
   2461                                 value = self.get_string_u_at_rva( ustr_offset,
   2462                                     max_length = string_struct.ValueLength )
   2463                                 value_offset = self.get_offset_from_rva( ustr_offset )
   2464                             except PEFormatError, excp:
   2465                                 self.__warnings.append(
   2466                                     'Error parsing the version information, ' +
   2467                                     'attempting to read StringTable Value string. ' +
   2468                                     'Can\'t read unicode string at offset 0x%x' % (
   2469                                     ustr_offset ) )
   2470                                 break
   2471                                 
   2472                             if string_struct.Length == 0:
   2473                                 entry_offset = stringtable_offset + stringtable_struct.Length
   2474                             else:
   2475                                 entry_offset = self.dword_align(
   2476                                     string_struct.Length+entry_offset, version_struct.OffsetToData)
   2477                                 
   2478                             key_as_char = []
   2479                             for c in key:
   2480                                 if ord(c)>128:
   2481                                     key_as_char.append('\\x%02x' %ord(c))
   2482                                 else:
   2483                                     key_as_char.append(c)
   2484                             
   2485                             key_as_char = ''.join(key_as_char)
   2486 
   2487                             setattr(stringtable_struct, key_as_char, value)
   2488                             stringtable_struct.entries[key] = value
   2489                             stringtable_struct.entries_offsets[key] = (key_offset, value_offset)
   2490                             stringtable_struct.entries_lengths[key] = (len(key), len(value))
   2491                             
   2492                     
   2493                         stringtable_offset = self.dword_align(
   2494                             stringtable_struct.Length + stringtable_offset,
   2495                             version_struct.OffsetToData)
   2496                         if stringtable_offset >= stringfileinfo_struct.Length:
   2497                             break
   2498                     
   2499             # Parse a VarFileInfo entry
   2500             #
   2501             elif stringfileinfo_string == u'VarFileInfo':
   2502             
   2503                 varfileinfo_struct = stringfileinfo_struct
   2504                 varfileinfo_struct.name = 'VarFileInfo'
   2505             
   2506                 if varfileinfo_struct.Type == 1 and varfileinfo_struct.ValueLength == 0:
   2507               
   2508                     var_offset = self.dword_align(
   2509                         stringfileinfo_offset + varfileinfo_struct.sizeof() +
   2510                             2*(len(stringfileinfo_string)+1),
   2511                         version_struct.OffsetToData)
   2512                         
   2513                     varfileinfo_struct.Var = list()
   2514               
   2515                     # Process all entries
   2516                     #
   2517 
   2518                     while True:
   2519                         var_struct = self.__unpack_data__(
   2520                             self.__Var_format__,
   2521                             raw_data[var_offset:], 
   2522                             file_offset = start_offset+var_offset )
   2523                             
   2524                         if not var_struct:
   2525                             break
   2526                             
   2527                         ustr_offset = ( version_struct.OffsetToData + var_offset + 
   2528                             var_struct.sizeof() )
   2529                         try:
   2530                             var_string = self.get_string_u_at_rva( ustr_offset )
   2531                         except PEFormatError, excp:
   2532                             self.__warnings.append(
   2533                                 'Error parsing the version information, ' +
   2534                                 'attempting to read VarFileInfo Var string. ' +
   2535                                 'Can\'t read unicode string at offset 0x%x' % (ustr_offset))
   2536                             break
   2537 
   2538                         
   2539                         varfileinfo_struct.Var.append(var_struct)
   2540                 
   2541                         varword_offset = self.dword_align(
   2542                             2*(len(var_string)+1) + var_offset + var_struct.sizeof(),
   2543                             version_struct.OffsetToData)
   2544                         orig_varword_offset = varword_offset
   2545                             
   2546                         while varword_offset < orig_varword_offset + var_struct.ValueLength:
   2547                             word1 = self.get_word_from_data(
   2548                                 raw_data[varword_offset:varword_offset+2], 0)
   2549                             word2 = self.get_word_from_data(
   2550                                 raw_data[varword_offset+2:varword_offset+4], 0)
   2551                             varword_offset += 4
   2552         
   2553                             var_struct.entry = {var_string: '0x%04x 0x%04x' % (word1, word2)}
   2554 
   2555                         var_offset = self.dword_align(
   2556                             var_offset+var_struct.Length, version_struct.OffsetToData)
   2557                             
   2558                         if var_offset <= var_offset+var_struct.Length:
   2559                             break
   2560               
   2561               
   2562               
   2563             # Increment and align the offset
   2564             #
   2565             stringfileinfo_offset = self.dword_align(
   2566                 stringfileinfo_struct.Length+stringfileinfo_offset,
   2567                 version_struct.OffsetToData)
   2568           
   2569             # Check if all the StringFileInfo and VarFileInfo items have been processed
   2570             #
   2571             if stringfileinfo_struct.Length == 0 or stringfileinfo_offset >= versioninfo_struct.Length:
   2572                 break
   2573             
   2574             
   2575                             
   2576     def parse_export_directory(self, rva, size):
   2577         """Parse the export directory.
   2578         
   2579         Given the rva of the export directory, it will process all
   2580         its entries.
   2581         
   2582         The exports will be made available through a list "exports"
   2583         containing a tuple with the following elements:
   2584         
   2585             (ordinal, symbol_address, symbol_name)
   2586             
   2587         And also through a dicionary "exports_by_ordinal" whose keys
   2588         will be the ordinals and the values tuples of the from:
   2589         
   2590             (symbol_address, symbol_name)
   2591             
   2592         The symbol addresses are relative, not absolute.
   2593         """
   2594     
   2595         try:
   2596             export_dir =  self.__unpack_data__(
   2597                 self.__IMAGE_EXPORT_DIRECTORY_format__, self.get_data(rva),
   2598                 file_offset = self.get_offset_from_rva(rva) )
   2599         except PEFormatError:
   2600             self.__warnings.append(
   2601                 'Error parsing export directory at RVA: 0x%x' % ( rva ) )
   2602             return
   2603         
   2604         if not export_dir:
   2605             return
   2606         
   2607         try:
   2608             address_of_names = self.get_data(
   2609                 export_dir.AddressOfNames, export_dir.NumberOfNames*4)
   2610             address_of_name_ordinals = self.get_data(
   2611                 export_dir.AddressOfNameOrdinals, export_dir.NumberOfNames*4)
   2612             address_of_functions = self.get_data(
   2613                 export_dir.AddressOfFunctions, export_dir.NumberOfFunctions*4)
   2614         except PEFormatError:
   2615             self.__warnings.append(
   2616                 'Error parsing export directory at RVA: 0x%x' % ( rva ) )
   2617             return
   2618             
   2619         exports = []
   2620         
   2621         for i in xrange(export_dir.NumberOfNames):
   2622         
   2623                 
   2624             symbol_name = self.get_string_at_rva(
   2625                 self.get_dword_from_data(address_of_names, i))
   2626             
   2627             symbol_ordinal = self.get_word_from_data(
   2628                 address_of_name_ordinals, i)
   2629                 
   2630             
   2631             if symbol_ordinal*4<len(address_of_functions):
   2632                 symbol_address = self.get_dword_from_data(
   2633                     address_of_functions, symbol_ordinal)
   2634             else:
   2635                 # Corrupt? a bad pointer... we assume it's all
   2636                 # useless, no exports
   2637                 return None
   2638             
   2639             # If the funcion's rva points within the export directory
   2640             # it will point to a string with the forwarded symbol's string
   2641             # instead of pointing the the function start address.
   2642             
   2643             if symbol_address>=rva and symbol_address<rva+size:
   2644                 forwarder_str = self.get_string_at_rva(symbol_address)
   2645             else:
   2646                 forwarder_str = None
   2647         
   2648             
   2649             exports.append(
   2650                 ExportData(
   2651                     ordinal = export_dir.Base+symbol_ordinal,
   2652                     address = symbol_address,
   2653                     name = symbol_name,
   2654                     forwarder = forwarder_str))
   2655                     
   2656         ordinals = [exp.ordinal for exp in exports]
   2657         
   2658         for idx in xrange(export_dir.NumberOfFunctions):
   2659 
   2660             if not idx+export_dir.Base in ordinals:
   2661                 symbol_address = self.get_dword_from_data(
   2662                     address_of_functions, 
   2663                     idx)
   2664                 
   2665                 #
   2666                 # Checking for forwarder again.
   2667                 #
   2668                 if symbol_address>=rva and symbol_address<rva+size:
   2669                     forwarder_str = self.get_string_at_rva(symbol_address)
   2670                 else:
   2671                     forwarder_str = None
   2672                     
   2673                 exports.append(
   2674                     ExportData(
   2675                         ordinal = export_dir.Base+idx,
   2676                         address = symbol_address,
   2677                         name = None,
   2678                         forwarder = forwarder_str))
   2679                       
   2680         return ExportDirData(
   2681                 struct = export_dir,
   2682                 symbols = exports)
   2683         
   2684                     
   2685     def dword_align(self, offset, base):
   2686         offset += base
   2687         return (offset+3) - ((offset+3)%4) - base
   2688 
   2689 
   2690 
   2691     def parse_delay_import_directory(self, rva, size):
   2692         """Walk and parse the delay import directory."""
   2693         
   2694         import_descs =  []
   2695         while True:
   2696             try:
   2697                 # If the RVA is invalid all would blow up. Some PEs seem to be
   2698                 # specially nasty and have an invalid RVA.
   2699                 data = self.get_data(rva)
   2700             except PEFormatError, e:
   2701                 self.__warnings.append(
   2702                     'Error parsing the Delay import directory at RVA: 0x%x' % ( rva ) )
   2703                 break
   2704                 
   2705             import_desc =  self.__unpack_data__(
   2706                 self.__IMAGE_DELAY_IMPORT_DESCRIPTOR_format__,
   2707                 data, file_offset = self.get_offset_from_rva(rva) )
   2708             
   2709             
   2710             # If the structure is all zeores, we reached the end of the list
   2711             if not import_desc or import_desc.all_zeroes():
   2712                 break
   2713             
   2714             
   2715             rva += import_desc.sizeof()
   2716             
   2717             try:
   2718                 import_data =  self.parse_imports(
   2719                     import_desc.pINT,
   2720                     import_desc.pIAT,
   2721                     None)
   2722             except PEFormatError, e:
   2723                 self.__warnings.append(
   2724                     'Error parsing the Delay import directory. ' +
   2725                     'Invalid import data at RVA: 0x%x' % ( rva ) )
   2726                 break
   2727             
   2728             if not import_data:
   2729                 continue
   2730             
   2731             
   2732             dll = self.get_string_at_rva(import_desc.szName)
   2733             if dll:
   2734                 import_descs.append(
   2735                     ImportDescData(
   2736                         struct = import_desc,
   2737                         imports = import_data,
   2738                         dll = dll))
   2739         
   2740         return import_descs
   2741 
   2742                     
   2743 
   2744     def parse_import_directory(self, rva, size):
   2745         """Walk and parse the import directory."""
   2746 
   2747         import_descs =  []
   2748         while True:
   2749             try:
   2750                 # If the RVA is invalid all would blow up. Some EXEs seem to be
   2751                 # specially nasty and have an invalid RVA.
   2752                 data = self.get_data(rva)
   2753             except PEFormatError, e:
   2754                 self.__warnings.append(
   2755                     'Error parsing the Import directory at RVA: 0x%x' % ( rva ) )
   2756                 break
   2757                 
   2758             import_desc =  self.__unpack_data__(
   2759                 self.__IMAGE_IMPORT_DESCRIPTOR_format__,
   2760                 data, file_offset = self.get_offset_from_rva(rva) )
   2761                 
   2762             # If the structure is all zeores, we reached the end of the list
   2763             if not import_desc or import_desc.all_zeroes():
   2764                 break
   2765                 
   2766             rva += import_desc.sizeof()
   2767                         
   2768             try:
   2769                 import_data =  self.parse_imports(
   2770                     import_desc.OriginalFirstThunk,
   2771                     import_desc.FirstThunk,
   2772                     import_desc.ForwarderChain)
   2773             except PEFormatError, excp:
   2774                 self.__warnings.append(
   2775                     'Error parsing the Import directory. ' +
   2776                     'Invalid Import data at RVA: 0x%x' % ( rva ) )
   2777                 break
   2778                 #raise excp
   2779                 
   2780             if not import_data:
   2781                 continue
   2782                 
   2783             dll = self.get_string_at_rva(import_desc.Name)
   2784             if dll:
   2785                 import_descs.append(
   2786                     ImportDescData(
   2787                         struct = import_desc,
   2788                         imports = import_data,
   2789                         dll = dll))
   2790 
   2791         return import_descs
   2792             
   2793 
   2794     
   2795     def parse_imports(self, original_first_thunk, first_thunk, forwarder_chain):
   2796         """Parse the imported symbols.
   2797         
   2798         It will fill a list, which will be avalable as the dictionary
   2799         attribute "imports". Its keys will be the DLL names and the values
   2800         all the symbols imported from that object.
   2801         """
   2802         
   2803         imported_symbols = []
   2804         imports_section = self.get_section_by_rva(first_thunk)
   2805         if not imports_section:
   2806             raise PEFormatError, 'Invalid/corrupt imports.'
   2807             
   2808         
   2809         # Import Lookup Table. Contains ordinals or pointers to strings.
   2810         ilt = self.get_import_table(original_first_thunk)
   2811         # Import Address Table. May have identical content to ILT if
   2812         # PE file is not bounded, Will contain the address of the
   2813         # imported symbols once the binary is loaded or if it is already
   2814         # bound.
   2815         iat = self.get_import_table(first_thunk)
   2816 
   2817         # OC Patch:
   2818         # Would crash if iat or ilt had None type 
   2819         if not iat and not ilt:
   2820             raise PEFormatError( 
   2821                 'Invalid Import Table information. ' +
   2822                 'Both ILT and IAT appear to be broken.')
   2823             
   2824         if not iat and ilt:
   2825             table = ilt
   2826         elif iat and not ilt:
   2827             table = iat
   2828         elif ilt and ((len(ilt) and len(iat)==0) or (len(ilt) == len(iat))):
   2829             table = ilt
   2830         elif (ilt and len(ilt))==0 and (iat and len(iat)):
   2831             table = iat
   2832         else:
   2833             return None
   2834             
   2835         for idx in xrange(len(table)):
   2836 
   2837             imp_ord = None
   2838             imp_hint = None
   2839             imp_name = None
   2840             hint_name_table_rva = None
   2841                         
   2842             if table[idx].AddressOfData:
   2843             
   2844                 if self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE:
   2845                     ordinal_flag = IMAGE_ORDINAL_FLAG
   2846                 elif self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE_PLUS:
   2847                     ordinal_flag = IMAGE_ORDINAL_FLAG64
   2848             
   2849                 # If imported by ordinal, we will append the ordinal number
   2850                 #
   2851                 if table[idx].AddressOfData & ordinal_flag:
   2852                     import_by_ordinal = True
   2853                     imp_ord = table[idx].AddressOfData & 0xffff
   2854                     imp_name = None
   2855                 else:
   2856                     import_by_ordinal = False
   2857                     try:
   2858                         hint_name_table_rva = table[idx].AddressOfData & 0x7fffffff
   2859                         data = self.get_data(hint_name_table_rva, 2)
   2860                         # Get the Hint
   2861                         imp_hint = self.get_word_from_data(data, 0)
   2862                         imp_name = self.get_string_at_rva(table[idx].AddressOfData+2)
   2863                     except PEFormatError, e:
   2864                         pass
   2865 
   2866             imp_address = first_thunk+self.OPTIONAL_HEADER.ImageBase+idx*4
   2867                 
   2868             if iat and ilt and ilt[idx].AddressOfData != iat[idx].AddressOfData:
   2869                 imp_bound = iat[idx].AddressOfData
   2870             else:
   2871                 imp_bound = None
   2872                 
   2873             if imp_name != '' and (imp_ord or imp_name):
   2874                 imported_symbols.append(
   2875                     ImportData(
   2876                         import_by_ordinal = import_by_ordinal,
   2877                         ordinal = imp_ord,
   2878                         hint = imp_hint,
   2879                         name = imp_name,
   2880                         bound = imp_bound,
   2881                         address = imp_address,
   2882                         hint_name_table_rva = hint_name_table_rva))
   2883             
   2884         return imported_symbols
   2885                 
   2886 
   2887 
   2888     def get_import_table(self, rva):
   2889     
   2890         table = []
   2891         
   2892         while True and rva:
   2893             try:
   2894                 data = self.get_data(rva)
   2895             except PEFormatError, e:
   2896                 self.__warnings.append(
   2897                     'Error parsing the import table. ' +
   2898                     'Invalid data at RVA: 0x%x' % ( rva ) )
   2899                 return None
   2900                 
   2901             if self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE:
   2902                 format = self.__IMAGE_THUNK_DATA_format__
   2903             elif self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE_PLUS:
   2904                 format = self.__IMAGE_THUNK_DATA64_format__
   2905                 
   2906             thunk_data = self.__unpack_data__(
   2907                 format, data, file_offset=self.get_offset_from_rva(rva) )
   2908                     
   2909             if not thunk_data or thunk_data.all_zeroes():
   2910                 break
   2911                 
   2912             rva += thunk_data.sizeof()
   2913             
   2914             table.append(thunk_data)
   2915             
   2916         return table
   2917     
   2918     
   2919     def get_memory_mapped_image(self, max_virtual_address=0x10000000, ImageBase=None):
   2920         """Returns the data corresponding to the memory layout of the PE file.
   2921         
   2922         The data includes the PE header and the sections loaded at offsets
   2923         corresponding to their relative virtual addresses. (the VirtualAddress
   2924         section header member).
   2925         Any offset in this data corresponds to the absolute memory address
   2926         ImageBase+offset.
   2927         
   2928         The optional argument 'max_virtual_address' provides with means of limiting
   2929         which section are processed.
   2930         Any section with their VirtualAddress beyond this value will be skipped.
   2931         Normally, sections with values beyond this range are just there to confuse
   2932         tools. It's a common trick to see in packed executables.
   2933         
   2934         If the 'ImageBase' optional argument is supplied, the file's relocations
   2935         will be applied to the image by calling the 'relocate_image()' method.
   2936         """
   2937         
   2938         # Collect all sections in one code block    
   2939         data = self.header
   2940         for section in self.sections:
   2941 
   2942             # Miscellanous integrity tests.
   2943             # Some packer will set these to bogus values to
   2944             # make tools go nuts.
   2945             #
   2946             if section.Misc_VirtualSize == 0 or section.SizeOfRawData == 0:
   2947                 continue
   2948             
   2949             if section.SizeOfRawData > len(self.__data__):
   2950                 continue
   2951                 
   2952             if section.PointerToRawData > len(self.__data__):
   2953                 continue
   2954         
   2955             if section.VirtualAddress >= max_virtual_address:
   2956                 continue
   2957                 
   2958             padding_length = section.VirtualAddress - len(data)
   2959             
   2960             if padding_length>0:
   2961                 data += '\0'*padding_length
   2962             elif padding_length<0:
   2963                 data = data[:padding_length]
   2964                 
   2965             data += section.data
   2966             
   2967         return data
   2968 
   2969             
   2970     def get_data(self, rva, length=None):
   2971         """Get data regardless of the section where it lies on.
   2972         
   2973         Given a rva and the size of the chunk to retrieve, this method
   2974         will find the section where the data lies and return the data.
   2975         """
   2976         
   2977         s = self.get_section_by_rva(rva)
   2978 
   2979         if not s:
   2980             if rva<len(self.header):
   2981                 if length:
   2982                     end = rva+length
   2983                 else:
   2984                     end = None
   2985                 return self.header[rva:end]
   2986                 
   2987             raise PEFormatError, 'data at RVA can\'t be fetched. Corrupt header?'
   2988 
   2989         return s.get_data(rva, length)
   2990 
   2991 
   2992     def get_rva_from_offset(self, offset):
   2993         """Get the rva corresponding to this file offset. """
   2994 
   2995         s = self.get_section_by_offset(offset)
   2996         if not s:
   2997             raise PEFormatError("specified offset (0x%x) doesn't belong to any section." % offset)
   2998         return s.get_rva_from_offset(offset)
   2999 
   3000     def get_offset_from_rva(self, rva):
   3001         """Get the file offset corresponding to this rva.
   3002         
   3003         Given a rva , this method will find the section where the
   3004         data lies and return the offset within the file.
   3005         """
   3006         
   3007         s = self.get_section_by_rva(rva)
   3008         if not s:
   3009                 
   3010             raise PEFormatError, 'data at RVA can\'t be fetched. Corrupt header?'
   3011             
   3012         return s.get_offset_from_rva(rva)
   3013         
   3014             
   3015     def get_string_at_rva(self, rva):
   3016         """Get an ASCII string located at the given address."""
   3017 
   3018         s = self.get_section_by_rva(rva)
   3019         if not s:
   3020             if rva<len(self.header):
   3021                 return self.get_string_from_data(rva, self.header)
   3022             return None
   3023                 
   3024         return self.get_string_from_data(rva-s.VirtualAddress, s.data)
   3025         
   3026         
   3027     def get_string_from_data(self, offset, data):
   3028         """Get an ASCII string from within the data."""
   3029 
   3030         # OC Patch
   3031         b = None
   3032         
   3033         try:
   3034             b = data[offset]
   3035         except IndexError:
   3036             return ''
   3037         
   3038         s = ''
   3039         while ord(b):
   3040             s += b
   3041             offset += 1
   3042             try:
   3043                 b = data[offset]
   3044             except IndexError:
   3045                 break
   3046           
   3047         return s
   3048 
   3049                 
   3050     def get_string_u_at_rva(self, rva, max_length = 2**16):
   3051         """Get an Unicode string located at the given address."""
   3052         
   3053         try:
   3054             # If the RVA is invalid all would blow up. Some EXEs seem to be
   3055             # specially nasty and have an invalid RVA.
   3056             data = self.get_data(rva, 2)
   3057         except PEFormatError, e:
   3058             return None
   3059 
   3060         #length = struct.unpack('<H', data)[0]
   3061                 
   3062         s = u''
   3063         for idx in xrange(max_length):
   3064             try:
   3065                 uchr = struct.unpack('<H', self.get_data(rva+2*idx, 2))[0]
   3066             except struct.error:
   3067                 break
   3068                 
   3069             if unichr(uchr) == u'\0':
   3070                 break
   3071             s += unichr(uchr)
   3072             
   3073         return s
   3074 
   3075 
   3076     def get_section_by_offset(self, offset):
   3077         """Get the section containing the given file offset."""
   3078     
   3079         sections = [s for s in self.sections if s.contains_offset(offset)]
   3080         
   3081         if sections:
   3082             return sections[0]
   3083                     
   3084         return None
   3085 
   3086 
   3087     def get_section_by_rva(self, rva):
   3088         """Get the section containing the given address."""
   3089     
   3090         sections = [s for s in self.sections if s.contains_rva(rva)]
   3091         
   3092         if sections:
   3093             return sections[0]
   3094                     
   3095         return None
   3096     
   3097     def __str__(self):
   3098         return self.dump_info()
   3099     
   3100             
   3101     def print_info(self):
   3102         """Print all the PE header information in a human readable from."""
   3103         print self.dump_info()
   3104         
   3105         
   3106     def dump_info(self, dump=None):
   3107         """Dump all the PE header information into human readable string."""
   3108         
   3109         
   3110         if dump is None:
   3111             dump = Dump()
   3112         
   3113         warnings = self.get_warnings()
   3114         if warnings:
   3115             dump.add_header('Parsing Warnings')
   3116             for warning in warnings:
   3117                 dump.add_line(warning)
   3118                 dump.add_newline()
   3119                 
   3120         
   3121         dump.add_header('DOS_HEADER')
   3122         dump.add_lines(self.DOS_HEADER.dump())
   3123         dump.add_newline()
   3124         
   3125         dump.add_header('NT_HEADERS')
   3126         dump.add_lines(self.NT_HEADERS.dump())
   3127         dump.add_newline()
   3128         
   3129         dump.add_header('FILE_HEADER')
   3130         dump.add_lines(self.FILE_HEADER.dump())
   3131         
   3132         image_flags = self.retrieve_flags(IMAGE_CHARACTERISTICS, 'IMAGE_FILE_')
   3133             
   3134         dump.add('Flags: ')
   3135         flags = []
   3136         for flag in image_flags:
   3137             if getattr(self.FILE_HEADER, flag[0]):
   3138                 flags.append(flag[0])
   3139         dump.add_line(', '.join(flags))
   3140         dump.add_newline()
   3141         
   3142         if hasattr(self, 'OPTIONAL_HEADER') and self.OPTIONAL_HEADER is not None:
   3143             dump.add_header('OPTIONAL_HEADER')
   3144             dump.add_lines(self.OPTIONAL_HEADER.dump())
   3145 
   3146         dll_characteristics_flags = self.retrieve_flags(DLL_CHARACTERISTICS, 'IMAGE_DLL_CHARACTERISTICS_')
   3147             
   3148         dump.add('DllCharacteristics: ')
   3149         flags = []
   3150         for flag in dll_characteristics_flags:
   3151             if getattr(self.OPTIONAL_HEADER, flag[0]):
   3152                 flags.append(flag[0])
   3153         dump.add_line(', '.join(flags))
   3154         dump.add_newline()
   3155         
   3156         
   3157         dump.add_header('PE Sections')
   3158         
   3159         section_flags = self.retrieve_flags(SECTION_CHARACTERISTICS, 'IMAGE_SCN_')
   3160         
   3161         for section in self.sections:
   3162             dump.add_lines(section.dump())
   3163             dump.add('Flags: ')
   3164             flags = []
   3165             for flag in section_flags:
   3166                 if getattr(section, flag[0]):
   3167                     flags.append(flag[0])
   3168             dump.add_line(', '.join(flags))
   3169             dump.add_line('Entropy: %f (Min=0.0, Max=8.0)' % section.get_entropy() )
   3170             if md5 is not None:
   3171                 dump.add_line('MD5     hash: %s' % section.get_hash_md5() )
   3172             if sha1 is not None:
   3173                 dump.add_line('SHA-1   hash: %s' % section.get_hash_sha1() )
   3174             if sha256 is not None:
   3175                 dump.add_line('SHA-256 hash: %s' % section.get_hash_sha256() )
   3176             if sha512 is not None:
   3177                 dump.add_line('SHA-512 hash: %s' % section.get_hash_sha512() )
   3178             dump.add_newline()
   3179             
   3180             
   3181         
   3182         if (hasattr(self, 'OPTIONAL_HEADER') and 
   3183             hasattr(self.OPTIONAL_HEADER, 'DATA_DIRECTORY') ):
   3184             
   3185             dump.add_header('Directories')
   3186             for idx in xrange(len(self.OPTIONAL_HEADER.DATA_DIRECTORY)):
   3187                 directory = self.OPTIONAL_HEADER.DATA_DIRECTORY[idx]
   3188                 dump.add_lines(directory.dump())
   3189             dump.add_newline()
   3190 
   3191 
   3192         if hasattr(self, 'VS_VERSIONINFO'):
   3193             dump.add_header('Version Information')
   3194             dump.add_lines(self.VS_VERSIONINFO.dump())
   3195             dump.add_newline()
   3196 
   3197             if hasattr(self, 'VS_FIXEDFILEINFO'):
   3198                 dump.add_lines(self.VS_FIXEDFILEINFO.dump())
   3199                 dump.add_newline()
   3200 
   3201             if hasattr(self, 'FileInfo'):
   3202                 for entry in self.FileInfo:
   3203                     dump.add_lines(entry.dump())
   3204                     dump.add_newline()
   3205                     
   3206                     if hasattr(entry, 'StringTable'):
   3207                         for st_entry in entry.StringTable:
   3208                             [dump.add_line('  '+line) for line in st_entry.dump()]
   3209                             dump.add_line('  LangID: '+st_entry.LangID)
   3210                             dump.add_newline()
   3211                             for str_entry in st_entry.entries.items():
   3212                                 dump.add_line('    '+str_entry[0]+': '+str_entry[1])
   3213                         dump.add_newline()
   3214                                 
   3215                     elif hasattr(entry, 'Var'):
   3216                         for var_entry in entry.Var:
   3217                             if hasattr(var_entry, 'entry'):
   3218                                 [dump.add_line('  '+line) for line in var_entry.dump()]
   3219                                 dump.add_line(
   3220                                     '    ' + var_entry.entry.keys()[0] + 
   3221                                     ': ' + var_entry.entry.values()[0])
   3222                                     
   3223                         dump.add_newline()
   3224 
   3225 
   3226             
   3227         if hasattr(self, 'DIRECTORY_ENTRY_EXPORT'):
   3228             dump.add_header('Exported symbols')
   3229             dump.add_lines(self.DIRECTORY_ENTRY_EXPORT.struct.dump())
   3230             dump.add_newline()
   3231             dump.add_line('%-10s   %-10s  %s' % ('Ordinal', 'RVA', 'Name'))
   3232             for export in self.DIRECTORY_ENTRY_EXPORT.symbols:
   3233                 dump.add('%-10d 0x%08Xh    %s' % (
   3234                     export.ordinal, export.address, export.name))
   3235                 if export.forwarder:
   3236                     dump.add_line(' forwarder: %s' % export.forwarder)
   3237                 else:
   3238                     dump.add_newline()
   3239                 
   3240             dump.add_newline()
   3241         
   3242         if hasattr(self, 'DIRECTORY_ENTRY_IMPORT'):
   3243             dump.add_header('Imported symbols')
   3244             for module in self.DIRECTORY_ENTRY_IMPORT:
   3245                 dump.add_lines(module.struct.dump())
   3246                 dump.add_newline()
   3247                 for symbol in module.imports:
   3248                 
   3249                     if symbol.import_by_ordinal is True:
   3250                         dump.add('%s Ordinal[%s] (Imported by Ordinal)' % (
   3251                             module.dll, str(symbol.ordinal)))
   3252                     else:
   3253                         dump.add('%s.%s Hint[%s]' % (
   3254                             module.dll, symbol.name, str(symbol.hint)))
   3255 
   3256                     if symbol.bound:
   3257                         dump.add_line(' Bound: 0x%08X' % (symbol.bound))
   3258                     else:
   3259                         dump.add_newline()
   3260                 dump.add_newline()
   3261                 
   3262         
   3263         if hasattr(self, 'DIRECTORY_ENTRY_BOUND_IMPORT'):
   3264             dump.add_header('Bound imports')
   3265             for bound_imp_desc in self.DIRECTORY_ENTRY_BOUND_IMPORT:
   3266             
   3267                 dump.add_lines(bound_imp_desc.struct.dump())
   3268                 dump.add_line('DLL: %s' % bound_imp_desc.name)
   3269                 dump.add_newline()
   3270                 
   3271                 for bound_imp_ref in bound_imp_desc.entries:
   3272                     dump.add_lines(bound_imp_ref.struct.dump(), 4)
   3273                     dump.add_line('DLL: %s' % bound_imp_ref.name, 4)
   3274                     dump.add_newline()
   3275 
   3276 
   3277         if hasattr(self, 'DIRECTORY_ENTRY_DELAY_IMPORT'):
   3278             dump.add_header('Delay Imported symbols')
   3279             for module in self.DIRECTORY_ENTRY_DELAY_IMPORT:
   3280             
   3281                 dump.add_lines(module.struct.dump())
   3282                 dump.add_newline()
   3283                 
   3284                 for symbol in module.imports:
   3285                     if symbol.import_by_ordinal is True:
   3286                         dump.add('%s Ordinal[%s] (Imported by Ordinal)' % (
   3287                             module.dll, str(symbol.ordinal)))
   3288                     else:
   3289                         dump.add('%s.%s Hint[%s]' % (
   3290                             module.dll, symbol.name, str(symbol.hint)))
   3291                     
   3292                     if symbol.bound:
   3293                         dump.add_line(' Bound: 0x%08X' % (symbol.bound))
   3294                     else:
   3295                         dump.add_newline()
   3296                 dump.add_newline()
   3297             
   3298         
   3299         if hasattr(self, 'DIRECTORY_ENTRY_RESOURCE'):
   3300             dump.add_header('Resource directory')
   3301             
   3302             dump.add_lines(self.DIRECTORY_ENTRY_RESOURCE.struct.dump())
   3303             
   3304             for resource_type in self.DIRECTORY_ENTRY_RESOURCE.entries:
   3305             
   3306                 if resource_type.name is not None:
   3307                     dump.add_line('Name: [%s]' % resource_type.name, 2)
   3308                 else:
   3309                     dump.add_line('Id: [0x%X] (%s)' % (
   3310                         resource_type.struct.Id, RESOURCE_TYPE.get(
   3311                             resource_type.struct.Id, '-')),
   3312                         2)
   3313                         
   3314                 dump.add_lines(resource_type.struct.dump(), 2)
   3315 
   3316                 if hasattr(resource_type, 'directory'):
   3317                 
   3318                     dump.add_lines(resource_type.directory.struct.dump(), 4)
   3319                         
   3320                     for resource_id in resource_type.directory.entries:
   3321                     
   3322                         if resource_id.name is not None:
   3323                             dump.add_line('Name: [%s]' % resource_id.name, 6)
   3324                         else:
   3325                             dump.add_line('Id: [0x%X]' % resource_id.struct.Id, 6)
   3326                             
   3327                         dump.add_lines(resource_id.struct.dump(), 6)
   3328     
   3329                         if hasattr(resource_id, 'directory'):
   3330                             dump.add_lines(resource_id.directory.struct.dump(), 8)
   3331                             
   3332                             for resource_lang in resource_id.directory.entries:
   3333                             #    dump.add_line('\\--- LANG [%d,%d][%s]' % (
   3334                             #        resource_lang.data.lang,
   3335                             #        resource_lang.data.sublang,
   3336                             #        LANG[resource_lang.data.lang]), 8)
   3337                                 dump.add_lines(resource_lang.struct.dump(), 10)
   3338                                 dump.add_lines(resource_lang.data.struct.dump(), 12)
   3339                 dump.add_newline()
   3340     
   3341             dump.add_newline()
   3342         
   3343         
   3344         if ( hasattr(self, 'DIRECTORY_ENTRY_TLS') and 
   3345              self.DIRECTORY_ENTRY_TLS and 
   3346              self.DIRECTORY_ENTRY_TLS.struct ):
   3347              
   3348             dump.add_header('TLS')
   3349             dump.add_lines(self.DIRECTORY_ENTRY_TLS.struct.dump())
   3350             dump.add_newline()
   3351 
   3352         
   3353         if hasattr(self, 'DIRECTORY_ENTRY_DEBUG'):
   3354             dump.add_header('Debug information')
   3355             for dbg in self.DIRECTORY_ENTRY_DEBUG:
   3356                 dump.add_lines(dbg.struct.dump())
   3357                 try:
   3358                     dump.add_line('Type: '+DEBUG_TYPE[dbg.struct.Type])
   3359                 except KeyError:
   3360                     dump.add_line('Type: 0x%x(Unknown)' % dbg.struct.Type)
   3361                 dump.add_newline()
   3362         
   3363         
   3364         if hasattr(self, 'DIRECTORY_ENTRY_BASERELOC'):
   3365             dump.add_header('Base relocations')
   3366             for base_reloc in self.DIRECTORY_ENTRY_BASERELOC:
   3367                 dump.add_lines(base_reloc.struct.dump())
   3368                 for reloc in base_reloc.entries:
   3369                     try:
   3370                         dump.add_line('%08Xh %s' % (
   3371                             reloc.rva, RELOCATION_TYPE[reloc.type][16:]), 4)
   3372                     except KeyError:
   3373                         dump.add_line('0x%08X 0x%x(Unknown)' % (
   3374                             reloc.rva, reloc.type), 4)
   3375                 dump.add_newline()
   3376         
   3377 
   3378         return dump.get_text()
   3379 
   3380     # OC Patch
   3381     def get_physical_by_rva(self, rva):
   3382         """Gets the physical address in the PE file from an RVA value."""
   3383         try:
   3384             return self.get_offset_from_rva(rva)
   3385         except Exception:
   3386             return None
   3387 
   3388 
   3389     ##
   3390     # Double-Word get/set
   3391     ##
   3392 
   3393     def get_data_from_dword(self, dword):
   3394         """Return a four byte string representing the double word value. (little endian)."""
   3395         return struct.pack('<L', dword)
   3396 
   3397 
   3398     def get_dword_from_data(self, data, offset):
   3399         """Convert four bytes of data to a double word (little endian)
   3400         
   3401         'offset' is assumed to index into a dword array. So setting it to
   3402         N will return a dword out of the data sarting at offset N*4.
   3403         
   3404         Returns None if the data can't be turned into a double word.
   3405         """
   3406         
   3407         if (offset+1)*4 > len(data):
   3408             return None
   3409         
   3410         return struct.unpack('<L', data[offset*4:(offset+1)*4])[0]
   3411         
   3412         
   3413     def get_dword_at_rva(self, rva):
   3414         """Return the double word value at the given RVA.
   3415         
   3416         Returns None if the value can't be read, i.e. the RVA can't be mapped
   3417         to a file offset.
   3418         """
   3419         
   3420         try:
   3421             return self.get_dword_from_data(self.get_data(rva)[:4], 0)
   3422         except PEFormatError:
   3423             return None
   3424 
   3425 
   3426     def get_dword_from_offset(self, offset):
   3427         """Return the double word value at the given file offset. (little endian)"""
   3428         
   3429         if offset+4 > len(self.__data__):
   3430             return None
   3431             
   3432         return self.get_dword_from_data(self.__data__[offset:offset+4], 0)
   3433 
   3434 
   3435     def set_dword_at_rva(self, rva, dword):
   3436         """Set the double word value at the file offset corresponding to the given RVA."""
   3437         return self.set_bytes_at_rva(rva, self.get_data_from_dword(dword))
   3438 
   3439 
   3440     def set_dword_at_offset(self, offset, dword):
   3441         """Set the double word value at the given file offset."""
   3442         return self.set_bytes_at_offset(offset, self.get_data_from_dword(dword))
   3443 
   3444 
   3445 
   3446     ##
   3447     # Word get/set
   3448     ##
   3449 
   3450     def get_data_from_word(self, word):
   3451         """Return a two byte string representing the word value. (little endian)."""
   3452         return struct.pack('<H', word)
   3453 
   3454 
   3455     def get_word_from_data(self, data, offset):
   3456         """Convert two bytes of data to a word (little endian)
   3457         
   3458         'offset' is assumed to index into a word array. So setting it to
   3459         N will return a dword out of the data sarting at offset N*2.
   3460 
   3461         Returns None if the data can't be turned into a word.
   3462         """
   3463 
   3464         if (offset+1)*2 > len(data):
   3465             return None
   3466 
   3467         return struct.unpack('<H', data[offset*2:(offset+1)*2])[0]
   3468 
   3469 
   3470     def get_word_at_rva(self, rva):
   3471         """Return the word value at the given RVA.
   3472         
   3473         Returns None if the value can't be read, i.e. the RVA can't be mapped
   3474         to a file offset.
   3475         """
   3476         
   3477         try:
   3478             return self.get_word_from_data(self.get_data(rva)[:2], 0)
   3479         except PEFormatError:
   3480             return None
   3481             
   3482 
   3483     def get_word_from_offset(self, offset):
   3484         """Return the word value at the given file offset. (little endian)"""
   3485         
   3486         if offset+2 > len(self.__data__):
   3487             return None
   3488             
   3489         return self.get_word_from_data(self.__data__[offset:offset+2], 0)
   3490 
   3491 
   3492     def set_word_at_rva(self, rva, word):
   3493         """Set the word value at the file offset corresponding to the given RVA."""
   3494         return self.set_bytes_at_rva(rva, self.get_data_from_word(word))
   3495 
   3496 
   3497     def set_word_at_offset(self, offset, word):
   3498         """Set the word value at the given file offset."""
   3499         return self.set_bytes_at_offset(offset, self.get_data_from_word(word))
   3500 
   3501 
   3502     ##
   3503     # Quad-Word get/set
   3504     ##
   3505 
   3506     def get_data_from_qword(self, word):
   3507         """Return a eight byte string representing the quad-word value. (little endian)."""
   3508         return struct.pack('<Q', word)
   3509 
   3510 
   3511     def get_qword_from_data(self, data, offset):
   3512         """Convert eight bytes of data to a word (little endian)
   3513         
   3514         'offset' is assumed to index into a word array. So setting it to
   3515         N will return a dword out of the data sarting at offset N*8.
   3516 
   3517         Returns None if the data can't be turned into a quad word.
   3518         """
   3519 
   3520         if (offset+1)*8 > len(data):
   3521             return None
   3522 
   3523         return struct.unpack('<Q', data[offset*8:(offset+1)*8])[0]
   3524 
   3525 
   3526     def get_qword_at_rva(self, rva):
   3527         """Return the quad-word value at the given RVA.
   3528         
   3529         Returns None if the value can't be read, i.e. the RVA can't be mapped
   3530         to a file offset.
   3531         """
   3532         
   3533         try:
   3534             return self.get_qword_from_data(self.get_data(rva)[:8], 0)
   3535         except PEFormatError:
   3536             return None
   3537             
   3538 
   3539     def get_qword_from_offset(self, offset):
   3540         """Return the quad-word value at the given file offset. (little endian)"""
   3541         
   3542         if offset+8 > len(self.__data__):
   3543             return None
   3544             
   3545         return self.get_qword_from_data(self.__data__[offset:offset+8], 0)
   3546 
   3547 
   3548     def set_qword_at_rva(self, rva, qword):
   3549         """Set the quad-word value at the file offset corresponding to the given RVA."""
   3550         return self.set_bytes_at_rva(rva, self.get_data_from_qword(qword))
   3551 
   3552 
   3553     def set_qword_at_offset(self, offset, qword):
   3554         """Set the quad-word value at the given file offset."""
   3555         return self.set_bytes_at_offset(offset, self.get_data_from_qword(qword))
   3556 
   3557 
   3558 
   3559     ##
   3560     # Set bytes
   3561     ##
   3562 
   3563 
   3564     def set_bytes_at_rva(self, rva, data):
   3565         """Overwrite, with the given string, the bytes at the file offset corresponding to the given RVA.
   3566         
   3567         Return True if successful, False otherwise. It can fail if the
   3568         offset is outside the file's boundaries.
   3569         """
   3570 
   3571         offset = self.get_physical_by_rva(rva)
   3572         if not offset:
   3573             raise False
   3574             
   3575         return self.set_bytes_at_offset(offset, data)
   3576 
   3577         
   3578     def set_bytes_at_offset(self, offset, data):
   3579         """Overwrite the bytes at the given file offset with the given string.
   3580         
   3581         Return True if successful, False otherwise. It can fail if the
   3582         offset is outside the file's boundaries.
   3583         """
   3584     
   3585         if not isinstance(data, str):
   3586             raise TypeError('data should be of type: str')
   3587         
   3588         if offset >= 0 and offset < len(self.__data__):
   3589             self.__data__ = ( self.__data__[:offset] + 
   3590                 data +
   3591                 self.__data__[offset+len(data):] )
   3592         else:
   3593             return False
   3594             
   3595         # Refresh the section's data with the modified information
   3596         #
   3597         for section in self.sections:
   3598             section_data_start = section.PointerToRawData
   3599             section_data_end = section_data_start+section.SizeOfRawData
   3600             section.data = self.__data__[section_data_start:section_data_end]
   3601 
   3602         return True
   3603         
   3604 
   3605 
   3606     def relocate_image(self, new_ImageBase):
   3607         """Apply the relocation information to the image using the provided new image base.
   3608         
   3609         This method will apply the relocation information to the image. Given the new base,
   3610         all the relocations will be processed and both the raw data and the section's data
   3611         will be fixed accordingly.
   3612         The resulting image can be retrieved as well through the method:
   3613         
   3614             get_memory_mapped_image()
   3615             
   3616         In order to get something that would more closely match what could be found in memory
   3617         once the Windows loader finished its work.
   3618         """
   3619         
   3620         relocation_difference = new_ImageBase - self.OPTIONAL_HEADER.ImageBase
   3621         
   3622         
   3623         for reloc in self.DIRECTORY_ENTRY_BASERELOC:
   3624 
   3625             virtual_address = reloc.struct.VirtualAddress
   3626             size_of_block = reloc.struct.SizeOfBlock
   3627             
   3628             # We iterate with an index because if the relocation is of type
   3629             # IMAGE_REL_BASED_HIGHADJ we need to also process the next entry
   3630             # at once and skip it for the next interation
   3631             # 
   3632             entry_idx = 0
   3633             while entry_idx<len(reloc.entries):
   3634             
   3635                 entry = reloc.entries[entry_idx]
   3636                 entry_idx += 1
   3637                                 
   3638                 if entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_ABSOLUTE']:
   3639                     # Nothing to do for this type of relocation
   3640                     pass
   3641                     
   3642                 elif entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_HIGH']:
   3643                     # Fix the high 16bits of a relocation
   3644                     #
   3645                     # Add high 16bits of relocation_difference to the
   3646                     # 16bit value at RVA=entry.rva
   3647                     
   3648                     self.set_word_at_rva(
   3649                         entry.rva,
   3650                         ( self.get_word_at_rva(entry.rva) + relocation_difference>>16)&0xffff )
   3651                   
   3652                 elif entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_LOW']:
   3653                     # Fix the low 16bits of a relocation
   3654                     #
   3655                     # Add low 16 bits of relocation_difference to the 16bit value
   3656                     # at RVA=entry.rva
   3657                     
   3658                     self.set_word_at_rva(
   3659                         entry.rva,
   3660                         ( self.get_word_at_rva(entry.rva) + relocation_difference)&0xffff)
   3661                     
   3662                 elif entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_HIGHLOW']:
   3663                     # Handle all high and low parts of a 32bit relocation
   3664                     #
   3665                     # Add relocation_difference to the value at RVA=entry.rva
   3666                     
   3667                     self.set_dword_at_rva(
   3668                         entry.rva,
   3669                         self.get_dword_at_rva(entry.rva)+relocation_difference)
   3670                   
   3671                 elif entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_HIGHADJ']:
   3672                     # Fix the high 16bits of a relocation and adjust
   3673                     #
   3674                     # Add high 16bits of relocation_difference to the 32bit value
   3675                     # composed from the (16bit value at RVA=entry.rva)<<16 plus 
   3676                     # the 16bit value at the next relocation entry.
   3677                     #
   3678                     
   3679                     # If the next entry is beyond the array's limits,
   3680                     # abort... the table is corrupt
   3681                     #
   3682                     if entry_idx == len(reloc.entries):
   3683                         break
   3684                     
   3685                     next_entry = reloc.entries[entry_idx]
   3686                     entry_idx += 1
   3687                     self.set_word_at_rva( entry.rva, 
   3688                         ((self.get_word_at_rva(entry.rva)<<16) + next_entry.rva +
   3689                         relocation_difference & 0xffff0000) >> 16 )
   3690                 
   3691                 elif entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_DIR64']:
   3692                     # Apply the difference to the 64bit value at the offset
   3693                     # RVA=entry.rva
   3694                     
   3695                     self.set_qword_at_rva(
   3696                         entry.rva,
   3697                         self.get_qword_at_rva(entry.rva) + relocation_difference)
   3698 
   3699 
   3700     def verify_checksum(self):
   3701     
   3702         return self.OPTIONAL_HEADER.CheckSum == self.generate_checksum()
   3703         
   3704     
   3705     def generate_checksum(self):
   3706     
   3707         # Get the offset to the CheckSum field in the OptionalHeader
   3708         #
   3709         checksum_offset = self.OPTIONAL_HEADER.__file_offset__ + 0x40 # 64
   3710         
   3711         checksum = 0
   3712         
   3713         for i in range( len(self.__data__) / 4 ):
   3714         
   3715             # Skip the checksum field
   3716             #
   3717             if i == checksum_offset / 4:
   3718                 continue
   3719 
   3720             dword = struct.unpack('L', self.__data__[ i*4 : i*4+4 ])[0]
   3721             checksum = (checksum & 0xffffffff) + dword + (checksum>>32)
   3722             if checksum > 2**32:
   3723                 checksum = (checksum & 0xffffffff) + (checksum >> 32)
   3724         
   3725         checksum = (checksum & 0xffff) + (checksum >> 16)
   3726         checksum = (checksum) + (checksum >> 16)
   3727         checksum = checksum & 0xffff
   3728         
   3729         return checksum + len(self.__data__)
   3730