Home | History | Annotate | Download | only in courgette
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "courgette/disassembler_win32_x86.h"
      6 
      7 #include <algorithm>
      8 #include <string>
      9 #include <vector>
     10 
     11 #include "base/basictypes.h"
     12 #include "base/logging.h"
     13 
     14 #include "courgette/assembly_program.h"
     15 #include "courgette/courgette.h"
     16 #include "courgette/encoded_program.h"
     17 
     18 // COURGETTE_HISTOGRAM_TARGETS prints out a histogram of how frequently
     19 // different target addresses are referenced.  Purely for debugging.
     20 #define COURGETTE_HISTOGRAM_TARGETS 0
     21 
     22 namespace courgette {
     23 
     24 DisassemblerWin32X86::DisassemblerWin32X86(const void* start, size_t length)
     25   : Disassembler(start, length),
     26     incomplete_disassembly_(false),
     27     is_PE32_plus_(false),
     28     optional_header_(NULL),
     29     size_of_optional_header_(0),
     30     offset_of_data_directories_(0),
     31     machine_type_(0),
     32     number_of_sections_(0),
     33     sections_(NULL),
     34     has_text_section_(false),
     35     size_of_code_(0),
     36     size_of_initialized_data_(0),
     37     size_of_uninitialized_data_(0),
     38     base_of_code_(0),
     39     base_of_data_(0),
     40     image_base_(0),
     41     size_of_image_(0),
     42     number_of_data_directories_(0) {
     43 }
     44 
     45 // ParseHeader attempts to match up the buffer with the Windows data
     46 // structures that exist within a Windows 'Portable Executable' format file.
     47 // Returns 'true' if the buffer matches, and 'false' if the data looks
     48 // suspicious.  Rather than try to 'map' the buffer to the numerous windows
     49 // structures, we extract the information we need into the courgette::PEInfo
     50 // structure.
     51 //
     52 bool DisassemblerWin32X86::ParseHeader() {
     53   if (length() < kOffsetOfFileAddressOfNewExeHeader + 4 /*size*/)
     54     return Bad("Too small");
     55 
     56   // Have 'MZ' magic for a DOS header?
     57   if (start()[0] != 'M' || start()[1] != 'Z')
     58     return Bad("Not MZ");
     59 
     60   // offset from DOS header to PE header is stored in DOS header.
     61   uint32 offset = ReadU32(start(),
     62                           kOffsetOfFileAddressOfNewExeHeader);
     63 
     64   if (offset >= length())
     65     return Bad("Bad offset to PE header");
     66 
     67   const uint8* const pe_header = OffsetToPointer(offset);
     68   const size_t kMinPEHeaderSize = 4 /*signature*/ + kSizeOfCoffHeader;
     69   if (pe_header <= start() ||
     70       pe_header >= end() - kMinPEHeaderSize)
     71     return Bad("Bad offset to PE header");
     72 
     73   if (offset % 8 != 0)
     74     return Bad("Misaligned PE header");
     75 
     76   // The 'PE' header is an IMAGE_NT_HEADERS structure as defined in WINNT.H.
     77   // See http://msdn.microsoft.com/en-us/library/ms680336(VS.85).aspx
     78   //
     79   // The first field of the IMAGE_NT_HEADERS is the signature.
     80   if (!(pe_header[0] == 'P' &&
     81         pe_header[1] == 'E' &&
     82         pe_header[2] == 0 &&
     83         pe_header[3] == 0))
     84     return Bad("no PE signature");
     85 
     86   // The second field of the IMAGE_NT_HEADERS is the COFF header.
     87   // The COFF header is also called an IMAGE_FILE_HEADER
     88   //   http://msdn.microsoft.com/en-us/library/ms680313(VS.85).aspx
     89   const uint8* const coff_header = pe_header + 4;
     90   machine_type_       = ReadU16(coff_header, 0);
     91   number_of_sections_ = ReadU16(coff_header, 2);
     92   size_of_optional_header_ = ReadU16(coff_header, 16);
     93 
     94   // The rest of the IMAGE_NT_HEADERS is the IMAGE_OPTIONAL_HEADER(32|64)
     95   const uint8* const optional_header = coff_header + kSizeOfCoffHeader;
     96   optional_header_ = optional_header;
     97 
     98   if (optional_header + size_of_optional_header_ >= end())
     99     return Bad("optional header past end of file");
    100 
    101   // Check we can read the magic.
    102   if (size_of_optional_header_ < 2)
    103     return Bad("optional header no magic");
    104 
    105   uint16 magic = ReadU16(optional_header, 0);
    106 
    107   if (magic == kImageNtOptionalHdr32Magic) {
    108     is_PE32_plus_ = false;
    109     offset_of_data_directories_ =
    110       kOffsetOfDataDirectoryFromImageOptionalHeader32;
    111   } else if (magic == kImageNtOptionalHdr64Magic) {
    112     is_PE32_plus_ = true;
    113     offset_of_data_directories_ =
    114       kOffsetOfDataDirectoryFromImageOptionalHeader64;
    115   } else {
    116     return Bad("unrecognized magic");
    117   }
    118 
    119   // Check that we can read the rest of the the fixed fields.  Data directories
    120   // directly follow the fixed fields of the IMAGE_OPTIONAL_HEADER.
    121   if (size_of_optional_header_ < offset_of_data_directories_)
    122     return Bad("optional header too short");
    123 
    124   // The optional header is either an IMAGE_OPTIONAL_HEADER32 or
    125   // IMAGE_OPTIONAL_HEADER64
    126   // http://msdn.microsoft.com/en-us/library/ms680339(VS.85).aspx
    127   //
    128   // Copy the fields we care about.
    129   size_of_code_               = ReadU32(optional_header, 4);
    130   size_of_initialized_data_   = ReadU32(optional_header, 8);
    131   size_of_uninitialized_data_ = ReadU32(optional_header, 12);
    132   base_of_code_               = ReadU32(optional_header, 20);
    133   if (is_PE32_plus_) {
    134     base_of_data_ = 0;
    135     image_base_  = ReadU64(optional_header, 24);
    136   } else {
    137     base_of_data_ = ReadU32(optional_header, 24);
    138     image_base_   = ReadU32(optional_header, 28);
    139   }
    140   size_of_image_ = ReadU32(optional_header, 56);
    141   number_of_data_directories_ =
    142     ReadU32(optional_header, (is_PE32_plus_ ? 108 : 92));
    143 
    144   if (size_of_code_ >= length() ||
    145       size_of_initialized_data_ >= length() ||
    146       size_of_code_ + size_of_initialized_data_ >= length()) {
    147     // This validation fires on some perfectly fine executables.
    148     //  return Bad("code or initialized data too big");
    149   }
    150 
    151   // TODO(sra): we can probably get rid of most of the data directories.
    152   bool b = true;
    153   // 'b &= ...' could be short circuit 'b = b && ...' but it is not necessary
    154   // for correctness and it compiles smaller this way.
    155   b &= ReadDataDirectory(0, &export_table_);
    156   b &= ReadDataDirectory(1, &import_table_);
    157   b &= ReadDataDirectory(2, &resource_table_);
    158   b &= ReadDataDirectory(3, &exception_table_);
    159   b &= ReadDataDirectory(5, &base_relocation_table_);
    160   b &= ReadDataDirectory(11, &bound_import_table_);
    161   b &= ReadDataDirectory(12, &import_address_table_);
    162   b &= ReadDataDirectory(13, &delay_import_descriptor_);
    163   b &= ReadDataDirectory(14, &clr_runtime_header_);
    164   if (!b) {
    165     return Bad("malformed data directory");
    166   }
    167 
    168   // Sections follow the optional header.
    169   sections_ =
    170       reinterpret_cast<const Section*>(optional_header +
    171                                        size_of_optional_header_);
    172   size_t detected_length = 0;
    173 
    174   for (int i = 0;  i < number_of_sections_;  ++i) {
    175     const Section* section = &sections_[i];
    176 
    177     // TODO(sra): consider using the 'characteristics' field of the section
    178     // header to see if the section contains instructions.
    179     if (memcmp(section->name, ".text", 6) == 0)
    180       has_text_section_ = true;
    181 
    182     uint32 section_end =
    183         section->file_offset_of_raw_data + section->size_of_raw_data;
    184     if (section_end > detected_length)
    185       detected_length = section_end;
    186   }
    187 
    188   // Pretend our in-memory copy is only as long as our detected length.
    189   ReduceLength(detected_length);
    190 
    191   if (!is_32bit()) {
    192     return Bad("64 bit executables are not yet supported");
    193   }
    194 
    195   if (!has_text_section()) {
    196     return Bad("Resource-only executables are not yet supported");
    197   }
    198 
    199   return Good();
    200 }
    201 
    202 bool DisassemblerWin32X86::Disassemble(AssemblyProgram* target) {
    203   if (!ok())
    204     return false;
    205 
    206   target->set_image_base(image_base());
    207 
    208   if (!ParseAbs32Relocs())
    209     return false;
    210 
    211   ParseRel32RelocsFromSections();
    212 
    213   if (!ParseFile(target))
    214     return false;
    215 
    216   target->DefaultAssignIndexes();
    217 
    218   return true;
    219 }
    220 
    221 ////////////////////////////////////////////////////////////////////////////////
    222 
    223 bool DisassemblerWin32X86::ParseRelocs(std::vector<RVA> *relocs) {
    224   relocs->clear();
    225 
    226   size_t relocs_size = base_relocation_table_.size_;
    227   if (relocs_size == 0)
    228     return true;
    229 
    230   // The format of the base relocation table is a sequence of variable sized
    231   // IMAGE_BASE_RELOCATION blocks.  Search for
    232   //   "The format of the base relocation data is somewhat quirky"
    233   // at http://msdn.microsoft.com/en-us/library/ms809762.aspx
    234 
    235   const uint8* relocs_start = RVAToPointer(base_relocation_table_.address_);
    236   const uint8* relocs_end = relocs_start + relocs_size;
    237 
    238   // Make sure entire base relocation table is within the buffer.
    239   if (relocs_start < start() ||
    240       relocs_start >= end() ||
    241       relocs_end <= start() ||
    242       relocs_end > end()) {
    243     return Bad(".relocs outside image");
    244   }
    245 
    246   const uint8* block = relocs_start;
    247 
    248   // Walk the variable sized blocks.
    249   while (block + 8 < relocs_end) {
    250     RVA page_rva = ReadU32(block, 0);
    251     uint32 size = ReadU32(block, 4);
    252     if (size < 8 ||        // Size includes header ...
    253         size % 4  !=  0)   // ... and is word aligned.
    254       return Bad("unreasonable relocs block");
    255 
    256     const uint8* end_entries = block + size;
    257 
    258     if (end_entries <= block ||
    259         end_entries <= start() ||
    260         end_entries > end())
    261       return Bad(".relocs block outside image");
    262 
    263     // Walk through the two-byte entries.
    264     for (const uint8* p = block + 8;  p < end_entries;  p += 2) {
    265       uint16 entry = ReadU16(p, 0);
    266       int type = entry >> 12;
    267       int offset = entry & 0xFFF;
    268 
    269       RVA rva = page_rva + offset;
    270       if (type == 3) {         // IMAGE_REL_BASED_HIGHLOW
    271         relocs->push_back(rva);
    272       } else if (type == 0) {  // IMAGE_REL_BASED_ABSOLUTE
    273         // Ignore, used as padding.
    274       } else {
    275         // Does not occur in Windows x86 executables.
    276         return Bad("unknown type of reloc");
    277       }
    278     }
    279 
    280     block += size;
    281   }
    282 
    283   std::sort(relocs->begin(), relocs->end());
    284 
    285   return true;
    286 }
    287 
    288 const Section* DisassemblerWin32X86::RVAToSection(RVA rva) const {
    289   for (int i = 0; i < number_of_sections_; i++) {
    290     const Section* section = &sections_[i];
    291     uint32 offset = rva - section->virtual_address;
    292     if (offset < section->virtual_size) {
    293       return section;
    294     }
    295   }
    296   return NULL;
    297 }
    298 
    299 int DisassemblerWin32X86::RVAToFileOffset(RVA rva) const {
    300   const Section* section = RVAToSection(rva);
    301   if (section) {
    302     uint32 offset = rva - section->virtual_address;
    303     if (offset < section->size_of_raw_data) {
    304       return section->file_offset_of_raw_data + offset;
    305     } else {
    306       return kNoOffset;  // In section but not in file (e.g. uninit data).
    307     }
    308   }
    309 
    310   // Small RVA values point into the file header in the loaded image.
    311   // RVA 0 is the module load address which Windows uses as the module handle.
    312   // RVA 2 sometimes occurs, I'm not sure what it is, but it would map into the
    313   // DOS header.
    314   if (rva == 0 || rva == 2)
    315     return rva;
    316 
    317   NOTREACHED();
    318   return kNoOffset;
    319 }
    320 
    321 const uint8* DisassemblerWin32X86::RVAToPointer(RVA rva) const {
    322   int file_offset = RVAToFileOffset(rva);
    323   if (file_offset == kNoOffset)
    324     return NULL;
    325   else
    326     return OffsetToPointer(file_offset);
    327 }
    328 
    329 std::string DisassemblerWin32X86::SectionName(const Section* section) {
    330   if (section == NULL)
    331     return "<none>";
    332   char name[9];
    333   memcpy(name, section->name, 8);
    334   name[8] = '\0';  // Ensure termination.
    335   return name;
    336 }
    337 
    338 CheckBool DisassemblerWin32X86::ParseFile(AssemblyProgram* program) {
    339   // Walk all the bytes in the file, whether or not in a section.
    340   uint32 file_offset = 0;
    341   while (file_offset < length()) {
    342     const Section* section = FindNextSection(file_offset);
    343     if (section == NULL) {
    344       // No more sections.  There should not be extra stuff following last
    345       // section.
    346       //   ParseNonSectionFileRegion(file_offset, pe_info().length(), program);
    347       break;
    348     }
    349     if (file_offset < section->file_offset_of_raw_data) {
    350       uint32 section_start_offset = section->file_offset_of_raw_data;
    351       if(!ParseNonSectionFileRegion(file_offset, section_start_offset,
    352                                     program))
    353         return false;
    354 
    355       file_offset = section_start_offset;
    356     }
    357     uint32 end = file_offset + section->size_of_raw_data;
    358     if (!ParseFileRegion(section, file_offset, end, program))
    359       return false;
    360     file_offset = end;
    361   }
    362 
    363 #if COURGETTE_HISTOGRAM_TARGETS
    364   HistogramTargets("abs32 relocs", abs32_target_rvas_);
    365   HistogramTargets("rel32 relocs", rel32_target_rvas_);
    366 #endif
    367 
    368   return true;
    369 }
    370 
    371 bool DisassemblerWin32X86::ParseAbs32Relocs() {
    372   abs32_locations_.clear();
    373   if (!ParseRelocs(&abs32_locations_))
    374     return false;
    375 
    376   std::sort(abs32_locations_.begin(), abs32_locations_.end());
    377 
    378 #if COURGETTE_HISTOGRAM_TARGETS
    379   for (size_t i = 0;  i < abs32_locations_.size(); ++i) {
    380     RVA rva = abs32_locations_[i];
    381     // The 4 bytes at the relocation are a reference to some address.
    382     uint32 target_address = Read32LittleEndian(RVAToPointer(rva));
    383     ++abs32_target_rvas_[target_address - image_base()];
    384   }
    385 #endif
    386   return true;
    387 }
    388 
    389 void DisassemblerWin32X86::ParseRel32RelocsFromSections() {
    390   uint32 file_offset = 0;
    391   while (file_offset < length()) {
    392     const Section* section = FindNextSection(file_offset);
    393     if (section == NULL)
    394       break;
    395     if (file_offset < section->file_offset_of_raw_data)
    396       file_offset = section->file_offset_of_raw_data;
    397     ParseRel32RelocsFromSection(section);
    398     file_offset += section->size_of_raw_data;
    399   }
    400   std::sort(rel32_locations_.begin(), rel32_locations_.end());
    401 
    402 #if COURGETTE_HISTOGRAM_TARGETS
    403   VLOG(1) << "abs32_locations_ " << abs32_locations_.size()
    404           << "\nrel32_locations_ " << rel32_locations_.size()
    405           << "\nabs32_target_rvas_ " << abs32_target_rvas_.size()
    406           << "\nrel32_target_rvas_ " << rel32_target_rvas_.size();
    407 
    408   int common = 0;
    409   std::map<RVA, int>::iterator abs32_iter = abs32_target_rvas_.begin();
    410   std::map<RVA, int>::iterator rel32_iter = rel32_target_rvas_.begin();
    411   while (abs32_iter != abs32_target_rvas_.end() &&
    412          rel32_iter != rel32_target_rvas_.end()) {
    413     if (abs32_iter->first < rel32_iter->first)
    414       ++abs32_iter;
    415     else if (rel32_iter->first < abs32_iter->first)
    416       ++rel32_iter;
    417     else {
    418       ++common;
    419       ++abs32_iter;
    420       ++rel32_iter;
    421     }
    422   }
    423   VLOG(1) << "common " << common;
    424 #endif
    425 }
    426 
    427 void DisassemblerWin32X86::ParseRel32RelocsFromSection(const Section* section) {
    428   // TODO(sra): use characteristic.
    429   bool isCode = strcmp(section->name, ".text") == 0;
    430   if (!isCode)
    431     return;
    432 
    433   uint32 start_file_offset = section->file_offset_of_raw_data;
    434   uint32 end_file_offset = start_file_offset + section->size_of_raw_data;
    435   RVA relocs_start_rva = base_relocation_table().address_;
    436 
    437   const uint8* start_pointer = OffsetToPointer(start_file_offset);
    438   const uint8* end_pointer = OffsetToPointer(end_file_offset);
    439 
    440   RVA start_rva = FileOffsetToRVA(start_file_offset);
    441   RVA end_rva = start_rva + section->virtual_size;
    442 
    443   // Quick way to convert from Pointer to RVA within a single Section is to
    444   // subtract 'pointer_to_rva'.
    445   const uint8* const adjust_pointer_to_rva = start_pointer - start_rva;
    446 
    447   std::vector<RVA>::iterator abs32_pos = abs32_locations_.begin();
    448 
    449   // Find the rel32 relocations.
    450   const uint8* p = start_pointer;
    451   while (p < end_pointer) {
    452     RVA current_rva = static_cast<RVA>(p - adjust_pointer_to_rva);
    453     if (current_rva == relocs_start_rva) {
    454       uint32 relocs_size = base_relocation_table().size_;
    455       if (relocs_size) {
    456         p += relocs_size;
    457         continue;
    458       }
    459     }
    460 
    461     //while (abs32_pos != abs32_locations_.end() && *abs32_pos < current_rva)
    462     //  ++abs32_pos;
    463 
    464     // Heuristic discovery of rel32 locations in instruction stream: are the
    465     // next few bytes the start of an instruction containing a rel32
    466     // addressing mode?
    467     const uint8* rel32 = NULL;
    468 
    469     if (p + 5 <= end_pointer) {
    470       if (*p == 0xE8 || *p == 0xE9) {  // jmp rel32 and call rel32
    471         rel32 = p + 1;
    472       }
    473     }
    474     if (p + 6 <= end_pointer) {
    475       if (*p == 0x0F  &&  (*(p+1) & 0xF0) == 0x80) {  // Jcc long form
    476         if (p[1] != 0x8A && p[1] != 0x8B)  // JPE/JPO unlikely
    477           rel32 = p + 2;
    478       }
    479     }
    480     if (rel32) {
    481       RVA rel32_rva = static_cast<RVA>(rel32 - adjust_pointer_to_rva);
    482 
    483       // Is there an abs32 reloc overlapping the candidate?
    484       while (abs32_pos != abs32_locations_.end() && *abs32_pos < rel32_rva - 3)
    485         ++abs32_pos;
    486       // Now: (*abs32_pos > rel32_rva - 4) i.e. the lowest addressed 4-byte
    487       // region that could overlap rel32_rva.
    488       if (abs32_pos != abs32_locations_.end()) {
    489         if (*abs32_pos < rel32_rva + 4) {
    490           // Beginning of abs32 reloc is before end of rel32 reloc so they
    491           // overlap.  Skip four bytes past the abs32 reloc.
    492           p += (*abs32_pos + 4) - current_rva;
    493           continue;
    494         }
    495       }
    496 
    497       RVA target_rva = rel32_rva + 4 + Read32LittleEndian(rel32);
    498       // To be valid, rel32 target must be within image, and within this
    499       // section.
    500       if (IsValidRVA(target_rva) &&
    501           start_rva <= target_rva && target_rva < end_rva) {
    502         rel32_locations_.push_back(rel32_rva);
    503 #if COURGETTE_HISTOGRAM_TARGETS
    504         ++rel32_target_rvas_[target_rva];
    505 #endif
    506         p = rel32 + 4;
    507         continue;
    508       }
    509     }
    510     p += 1;
    511   }
    512 }
    513 
    514 CheckBool DisassemblerWin32X86::ParseNonSectionFileRegion(
    515     uint32 start_file_offset,
    516     uint32 end_file_offset,
    517     AssemblyProgram* program) {
    518   if (incomplete_disassembly_)
    519     return true;
    520 
    521   const uint8* start = OffsetToPointer(start_file_offset);
    522   const uint8* end = OffsetToPointer(end_file_offset);
    523 
    524   const uint8* p = start;
    525 
    526   while (p < end) {
    527     if (!program->EmitByteInstruction(*p))
    528       return false;
    529     ++p;
    530   }
    531 
    532   return true;
    533 }
    534 
    535 CheckBool DisassemblerWin32X86::ParseFileRegion(
    536     const Section* section,
    537     uint32 start_file_offset, uint32 end_file_offset,
    538     AssemblyProgram* program) {
    539   RVA relocs_start_rva = base_relocation_table().address_;
    540 
    541   const uint8* start_pointer = OffsetToPointer(start_file_offset);
    542   const uint8* end_pointer = OffsetToPointer(end_file_offset);
    543 
    544   RVA start_rva = FileOffsetToRVA(start_file_offset);
    545   RVA end_rva = start_rva + section->virtual_size;
    546 
    547   // Quick way to convert from Pointer to RVA within a single Section is to
    548   // subtract 'pointer_to_rva'.
    549   const uint8* const adjust_pointer_to_rva = start_pointer - start_rva;
    550 
    551   std::vector<RVA>::iterator rel32_pos = rel32_locations_.begin();
    552   std::vector<RVA>::iterator abs32_pos = abs32_locations_.begin();
    553 
    554   if (!program->EmitOriginInstruction(start_rva))
    555     return false;
    556 
    557   const uint8* p = start_pointer;
    558 
    559   while (p < end_pointer) {
    560     RVA current_rva = static_cast<RVA>(p - adjust_pointer_to_rva);
    561 
    562     // The base relocation table is usually in the .relocs section, but it could
    563     // actually be anywhere.  Make sure we skip it because we will regenerate it
    564     // during assembly.
    565     if (current_rva == relocs_start_rva) {
    566       if (!program->EmitPeRelocsInstruction())
    567         return false;
    568       uint32 relocs_size = base_relocation_table().size_;
    569       if (relocs_size) {
    570         p += relocs_size;
    571         continue;
    572       }
    573     }
    574 
    575     while (abs32_pos != abs32_locations_.end() && *abs32_pos < current_rva)
    576       ++abs32_pos;
    577 
    578     if (abs32_pos != abs32_locations_.end() && *abs32_pos == current_rva) {
    579       uint32 target_address = Read32LittleEndian(p);
    580       RVA target_rva = target_address - image_base();
    581       // TODO(sra): target could be Label+offset.  It is not clear how to guess
    582       // which it might be.  We assume offset==0.
    583       if (!program->EmitAbs32(program->FindOrMakeAbs32Label(target_rva)))
    584         return false;
    585       p += 4;
    586       continue;
    587     }
    588 
    589     while (rel32_pos != rel32_locations_.end() && *rel32_pos < current_rva)
    590       ++rel32_pos;
    591 
    592     if (rel32_pos != rel32_locations_.end() && *rel32_pos == current_rva) {
    593       RVA target_rva = current_rva + 4 + Read32LittleEndian(p);
    594       if (!program->EmitRel32(program->FindOrMakeRel32Label(target_rva)))
    595         return false;
    596       p += 4;
    597       continue;
    598     }
    599 
    600     if (incomplete_disassembly_) {
    601       if ((abs32_pos == abs32_locations_.end() || end_rva <= *abs32_pos) &&
    602           (rel32_pos == rel32_locations_.end() || end_rva <= *rel32_pos) &&
    603           (end_rva <= relocs_start_rva || current_rva >= relocs_start_rva)) {
    604         // No more relocs in this section, don't bother encoding bytes.
    605         break;
    606       }
    607     }
    608 
    609     if (!program->EmitByteInstruction(*p))
    610       return false;
    611     p += 1;
    612   }
    613 
    614   return true;
    615 }
    616 
    617 #if COURGETTE_HISTOGRAM_TARGETS
    618 // Histogram is printed to std::cout.  It is purely for debugging the algorithm
    619 // and is only enabled manually in 'exploration' builds.  I don't want to add
    620 // command-line configuration for this feature because this code has to be
    621 // small, which means compiled-out.
    622 void DisassemblerWin32X86::HistogramTargets(const char* kind,
    623                                             const std::map<RVA, int>& map) {
    624   int total = 0;
    625   std::map<int, std::vector<RVA> > h;
    626   for (std::map<RVA, int>::const_iterator p = map.begin();
    627        p != map.end();
    628        ++p) {
    629     h[p->second].push_back(p->first);
    630     total += p->second;
    631   }
    632 
    633   std::cout << total << " " << kind << " to "
    634             << map.size() << " unique targets" << std::endl;
    635 
    636   std::cout << "indegree: #targets-with-indegree (example)" << std::endl;
    637   const int kFirstN = 15;
    638   bool someSkipped = false;
    639   int index = 0;
    640   for (std::map<int, std::vector<RVA> >::reverse_iterator p = h.rbegin();
    641        p != h.rend();
    642        ++p) {
    643     ++index;
    644     if (index <= kFirstN || p->first <= 3) {
    645       if (someSkipped) {
    646         std::cout << "..." << std::endl;
    647       }
    648       size_t count = p->second.size();
    649       std::cout << std::dec << p->first << ": " << count;
    650       if (count <= 2) {
    651         for (size_t i = 0;  i < count;  ++i)
    652           std::cout << "  " << DescribeRVA(p->second[i]);
    653       }
    654       std::cout << std::endl;
    655       someSkipped = false;
    656     } else {
    657       someSkipped = true;
    658     }
    659   }
    660 }
    661 #endif  // COURGETTE_HISTOGRAM_TARGETS
    662 
    663 
    664 // DescribeRVA is for debugging only.  I would put it under #ifdef DEBUG except
    665 // that during development I'm finding I need to call it when compiled in
    666 // Release mode.  Hence:
    667 // TODO(sra): make this compile only for debug mode.
    668 std::string DisassemblerWin32X86::DescribeRVA(RVA rva) const {
    669   const Section* section = RVAToSection(rva);
    670   std::ostringstream s;
    671   s << std::hex << rva;
    672   if (section) {
    673     s << " (";
    674     s << SectionName(section) << "+"
    675       << std::hex << (rva - section->virtual_address)
    676       << ")";
    677   }
    678   return s.str();
    679 }
    680 
    681 const Section* DisassemblerWin32X86::FindNextSection(uint32 fileOffset) const {
    682   const Section* best = 0;
    683   for (int i = 0; i < number_of_sections_; i++) {
    684     const Section* section = &sections_[i];
    685     if (section->size_of_raw_data > 0) {  // i.e. has data in file.
    686       if (fileOffset <= section->file_offset_of_raw_data) {
    687         if (best == 0 ||
    688             section->file_offset_of_raw_data < best->file_offset_of_raw_data) {
    689           best = section;
    690         }
    691       }
    692     }
    693   }
    694   return best;
    695 }
    696 
    697 RVA DisassemblerWin32X86::FileOffsetToRVA(uint32 file_offset) const {
    698   for (int i = 0; i < number_of_sections_; i++) {
    699     const Section* section = &sections_[i];
    700     uint32 offset = file_offset - section->file_offset_of_raw_data;
    701     if (offset < section->size_of_raw_data) {
    702       return section->virtual_address + offset;
    703     }
    704   }
    705   return 0;
    706 }
    707 
    708 bool DisassemblerWin32X86::ReadDataDirectory(
    709     int index,
    710     ImageDataDirectory* directory) {
    711 
    712   if (index < number_of_data_directories_) {
    713     size_t offset = index * 8 + offset_of_data_directories_;
    714     if (offset >= size_of_optional_header_)
    715       return Bad("number of data directories inconsistent");
    716     const uint8* data_directory = optional_header_ + offset;
    717     if (data_directory < start() ||
    718         data_directory + 8 >= end())
    719       return Bad("data directory outside image");
    720     RVA rva = ReadU32(data_directory, 0);
    721     size_t size  = ReadU32(data_directory, 4);
    722     if (size > size_of_image_)
    723       return Bad("data directory size too big");
    724 
    725     // TODO(sra): validate RVA.
    726     directory->address_ = rva;
    727     directory->size_ = static_cast<uint32>(size);
    728     return true;
    729   } else {
    730     directory->address_ = 0;
    731     directory->size_ = 0;
    732     return true;
    733   }
    734 }
    735 
    736 }  // namespace courgette
    737