Home | History | Annotate | Download | only in courgette
      1 // Copyright 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "courgette/disassembler_elf_32.h"
      6 
      7 #include <algorithm>
      8 #include <string>
      9 #include <vector>
     10 
     11 #include "base/basictypes.h"
     12 #include "base/logging.h"
     13 #include "base/memory/scoped_vector.h"
     14 
     15 #include "courgette/assembly_program.h"
     16 #include "courgette/courgette.h"
     17 #include "courgette/encoded_program.h"
     18 
     19 namespace courgette {
     20 
     21 DisassemblerElf32::DisassemblerElf32(const void* start, size_t length)
     22   : Disassembler(start, length),
     23     header_(NULL),
     24     section_header_table_(NULL),
     25     section_header_table_size_(0),
     26     program_header_table_(NULL),
     27     program_header_table_size_(0),
     28     default_string_section_(NULL) {
     29 }
     30 
     31 bool DisassemblerElf32::ParseHeader() {
     32   if (length() < sizeof(Elf32_Ehdr))
     33     return Bad("Too small");
     34 
     35   header_ = (Elf32_Ehdr *)start();
     36 
     37   // Have magic for elf header?
     38   if (header_->e_ident[0] != 0x7f ||
     39       header_->e_ident[1] != 'E' ||
     40       header_->e_ident[2] != 'L' ||
     41       header_->e_ident[3] != 'F')
     42     return Bad("No Magic Number");
     43 
     44   if (header_->e_type != ET_EXEC &&
     45       header_->e_type != ET_DYN)
     46     return Bad("Not an executable file or shared library");
     47 
     48   if (header_->e_machine != ElfEM())
     49     return Bad("Not a supported architecture");
     50 
     51   if (header_->e_version != 1)
     52     return Bad("Unknown file version");
     53 
     54   if (header_->e_shentsize != sizeof(Elf32_Shdr))
     55     return Bad("Unexpected section header size");
     56 
     57   if (header_->e_shoff >= length())
     58     return Bad("Out of bounds section header table offset");
     59 
     60   section_header_table_ = (Elf32_Shdr *)OffsetToPointer(header_->e_shoff);
     61   section_header_table_size_ = header_->e_shnum;
     62 
     63   if ((header_->e_shoff + header_->e_shnum ) >= length())
     64     return Bad("Out of bounds section header table");
     65 
     66   if (header_->e_phoff >= length())
     67     return Bad("Out of bounds program header table offset");
     68 
     69   program_header_table_ = (Elf32_Phdr *)OffsetToPointer(header_->e_phoff);
     70   program_header_table_size_ = header_->e_phnum;
     71 
     72   if ((header_->e_phoff + header_->e_phnum) >= length())
     73     return Bad("Out of bounds program header table");
     74 
     75   default_string_section_ = (const char *)SectionBody((int)header_->e_shstrndx);
     76 
     77   ReduceLength(DiscoverLength());
     78 
     79   return Good();
     80 }
     81 
     82 bool DisassemblerElf32::Disassemble(AssemblyProgram* target) {
     83   if (!ok())
     84     return false;
     85 
     86   // The Image Base is always 0 for ELF Executables
     87   target->set_image_base(0);
     88 
     89   if (!ParseAbs32Relocs())
     90     return false;
     91 
     92   if (!ParseRel32RelocsFromSections())
     93     return false;
     94 
     95   if (!ParseFile(target))
     96     return false;
     97 
     98   target->DefaultAssignIndexes();
     99 
    100   return true;
    101 }
    102 
    103 uint32 DisassemblerElf32::DiscoverLength() {
    104   uint32 result = 0;
    105 
    106   // Find the end of the last section
    107   for (int section_id = 0; section_id < SectionHeaderCount(); section_id++) {
    108     const Elf32_Shdr *section_header = SectionHeader(section_id);
    109 
    110     if (section_header->sh_type == SHT_NOBITS)
    111       continue;
    112 
    113     uint32 section_end = section_header->sh_offset + section_header->sh_size;
    114 
    115     if (section_end > result)
    116       result = section_end;
    117   }
    118 
    119   // Find the end of the last segment
    120   for (int i = 0; i < ProgramSegmentHeaderCount(); i++) {
    121     const Elf32_Phdr *segment_header = ProgramSegmentHeader(i);
    122 
    123     uint32 segment_end = segment_header->p_offset + segment_header->p_filesz;
    124 
    125     if (segment_end > result)
    126       result = segment_end;
    127   }
    128 
    129   uint32 section_table_end = header_->e_shoff +
    130                              (header_->e_shnum * sizeof(Elf32_Shdr));
    131   if (section_table_end > result)
    132     result = section_table_end;
    133 
    134   uint32 segment_table_end = header_->e_phoff +
    135                              (header_->e_phnum * sizeof(Elf32_Phdr));
    136   if (segment_table_end > result)
    137     result = segment_table_end;
    138 
    139   return result;
    140 }
    141 
    142 CheckBool DisassemblerElf32::IsValidRVA(RVA rva) const {
    143 
    144   // It's valid if it's contained in any program segment
    145   for (int i = 0; i < ProgramSegmentHeaderCount(); i++) {
    146     const Elf32_Phdr *segment_header = ProgramSegmentHeader(i);
    147 
    148     if (segment_header->p_type != PT_LOAD)
    149       continue;
    150 
    151     Elf32_Addr begin = segment_header->p_vaddr;
    152     Elf32_Addr end = segment_header->p_vaddr + segment_header->p_memsz;
    153 
    154     if (rva >= begin && rva < end)
    155       return true;
    156   }
    157 
    158   return false;
    159 }
    160 
    161 // Returns RVA for an in memory address, or NULL.
    162 CheckBool DisassemblerElf32::RVAToFileOffset(Elf32_Addr addr,
    163                                                 size_t* result) const {
    164 
    165   for (int i = 0; i < ProgramSegmentHeaderCount(); i++) {
    166     Elf32_Addr begin = ProgramSegmentMemoryBegin(i);
    167     Elf32_Addr end = begin + ProgramSegmentMemorySize(i);
    168 
    169     if (addr >= begin  && addr < end) {
    170       Elf32_Addr offset = addr - begin;
    171 
    172       if (offset < ProgramSegmentFileSize(i)) {
    173         *result = ProgramSegmentFileOffset(i) + offset;
    174         return true;
    175       }
    176     }
    177   }
    178 
    179   return false;
    180 }
    181 
    182 RVA DisassemblerElf32::FileOffsetToRVA(size_t offset) const {
    183   // File offsets can be 64 bit values, but we are dealing with 32
    184   // bit executables and so only need to support 32bit file sizes.
    185   uint32 offset32 = (uint32)offset;
    186 
    187   for (int i = 0; i < SectionHeaderCount(); i++) {
    188 
    189     const Elf32_Shdr *section_header = SectionHeader(i);
    190 
    191     // These can appear to have a size in the file, but don't.
    192     if (section_header->sh_type == SHT_NOBITS)
    193       continue;
    194 
    195     Elf32_Off section_begin = section_header->sh_offset;
    196     Elf32_Off section_end = section_begin + section_header->sh_size;
    197 
    198     if (offset32 >= section_begin && offset32 < section_end) {
    199       return section_header->sh_addr + (offset32 - section_begin);
    200     }
    201   }
    202 
    203   return 0;
    204 }
    205 
    206 CheckBool DisassemblerElf32::RVAsToOffsets(std::vector<RVA>* rvas,
    207                                            std::vector<size_t>* offsets) {
    208   offsets->clear();
    209 
    210   for (std::vector<RVA>::iterator rva = rvas->begin();
    211        rva != rvas->end();
    212        rva++) {
    213 
    214     size_t offset;
    215 
    216     if (!RVAToFileOffset(*rva, &offset))
    217       return false;
    218 
    219     offsets->push_back(offset);
    220   }
    221 
    222   return true;
    223 }
    224 
    225 CheckBool DisassemblerElf32::RVAsToOffsets(ScopedVector<TypedRVA>* rvas) {
    226   for (ScopedVector<TypedRVA>::iterator rva = rvas->begin();
    227        rva != rvas->end();
    228        rva++) {
    229 
    230     size_t offset;
    231 
    232     if (!RVAToFileOffset((*rva)->rva(), &offset))
    233       return false;
    234 
    235     (*rva)->set_offset(offset);
    236   }
    237 
    238   return true;
    239 }
    240 
    241 CheckBool DisassemblerElf32::ParseFile(AssemblyProgram* program) {
    242   // Walk all the bytes in the file, whether or not in a section.
    243   uint32 file_offset = 0;
    244 
    245   std::vector<size_t> abs_offsets;
    246 
    247   if (!RVAsToOffsets(&abs32_locations_, &abs_offsets))
    248     return false;
    249 
    250   if (!RVAsToOffsets(&rel32_locations_))
    251     return false;
    252 
    253   std::vector<size_t>::iterator current_abs_offset = abs_offsets.begin();
    254   ScopedVector<TypedRVA>::iterator current_rel = rel32_locations_.begin();
    255 
    256   std::vector<size_t>::iterator end_abs_offset = abs_offsets.end();
    257   ScopedVector<TypedRVA>::iterator end_rel = rel32_locations_.end();
    258 
    259   for (int section_id = 0;
    260        section_id < SectionHeaderCount();
    261        section_id++) {
    262 
    263     const Elf32_Shdr *section_header = SectionHeader(section_id);
    264 
    265     if (!ParseSimpleRegion(file_offset,
    266                            section_header->sh_offset,
    267                            program))
    268       return false;
    269     file_offset = section_header->sh_offset;
    270 
    271     switch (section_header->sh_type) {
    272       case SHT_REL:
    273         if (!ParseRelocationSection(section_header, program))
    274           return false;
    275         file_offset = section_header->sh_offset + section_header->sh_size;
    276         break;
    277       case SHT_PROGBITS:
    278         if (!ParseProgbitsSection(section_header,
    279                                   &current_abs_offset, end_abs_offset,
    280                                   &current_rel, end_rel,
    281                                   program))
    282           return false;
    283         file_offset = section_header->sh_offset + section_header->sh_size;
    284         break;
    285       case SHT_NOBITS:
    286         // Fall through
    287       case SHT_INIT_ARRAY:
    288         // Fall through
    289       case SHT_FINI_ARRAY:
    290         while (current_abs_offset != end_abs_offset &&
    291                *current_abs_offset >= section_header->sh_offset &&
    292                *current_abs_offset <
    293                (section_header->sh_offset + section_header->sh_size)) {
    294           // Skip any abs_offsets appear in the unsupported INIT_ARRAY section
    295           VLOG(1) << "Skipping relocation entry for unsupported section: " <<
    296             section_header->sh_type;
    297           current_abs_offset++;
    298         }
    299         break;
    300       default:
    301         if (current_abs_offset != end_abs_offset &&
    302                *current_abs_offset >= section_header->sh_offset &&
    303                *current_abs_offset <
    304                (section_header->sh_offset + section_header->sh_size))
    305           VLOG(1) << "Relocation address in unrecognized ELF section: " << \
    306             section_header->sh_type;
    307       break;
    308     }
    309   }
    310 
    311   // Rest of the file past the last section
    312   if (!ParseSimpleRegion(file_offset,
    313                          length(),
    314                          program))
    315     return false;
    316 
    317   // Make certain we consume all of the relocations as expected
    318   return (current_abs_offset == end_abs_offset);
    319 }
    320 
    321 CheckBool DisassemblerElf32::ParseProgbitsSection(
    322     const Elf32_Shdr *section_header,
    323     std::vector<size_t>::iterator* current_abs_offset,
    324     std::vector<size_t>::iterator end_abs_offset,
    325     ScopedVector<TypedRVA>::iterator* current_rel,
    326     ScopedVector<TypedRVA>::iterator end_rel,
    327     AssemblyProgram* program) {
    328 
    329   // Walk all the bytes in the file, whether or not in a section.
    330   size_t file_offset = section_header->sh_offset;
    331   size_t section_end = section_header->sh_offset + section_header->sh_size;
    332 
    333   Elf32_Addr origin = section_header->sh_addr;
    334   size_t origin_offset = section_header->sh_offset;
    335   if (!program->EmitOriginInstruction(origin))
    336     return false;
    337 
    338   while (file_offset < section_end) {
    339 
    340     if (*current_abs_offset != end_abs_offset &&
    341         file_offset > **current_abs_offset)
    342       return false;
    343 
    344     while (*current_rel != end_rel &&
    345            file_offset > (**current_rel)->get_offset()) {
    346       (*current_rel)++;
    347     }
    348 
    349     size_t next_relocation = section_end;
    350 
    351     if (*current_abs_offset != end_abs_offset &&
    352         next_relocation > **current_abs_offset)
    353       next_relocation = **current_abs_offset;
    354 
    355     // Rel offsets are heuristically derived, and might (incorrectly) overlap
    356     // an Abs value, or the end of the section, so +3 to make sure there is
    357     // room for the full 4 byte value.
    358     if (*current_rel != end_rel &&
    359         next_relocation > ((**current_rel)->get_offset() + 3))
    360       next_relocation = (**current_rel)->get_offset();
    361 
    362     if (next_relocation > file_offset) {
    363       if (!ParseSimpleRegion(file_offset, next_relocation, program))
    364         return false;
    365 
    366       file_offset = next_relocation;
    367       continue;
    368     }
    369 
    370     if (*current_abs_offset != end_abs_offset &&
    371         file_offset == **current_abs_offset) {
    372 
    373       const uint8* p = OffsetToPointer(file_offset);
    374       RVA target_rva = Read32LittleEndian(p);
    375 
    376       if (!program->EmitAbs32(program->FindOrMakeAbs32Label(target_rva)))
    377         return false;
    378       file_offset += sizeof(RVA);
    379       (*current_abs_offset)++;
    380       continue;
    381     }
    382 
    383     if (*current_rel != end_rel &&
    384         file_offset == (**current_rel)->get_offset()) {
    385 
    386       uint32 relative_target = (**current_rel)->relative_target();
    387       // This cast is for 64 bit systems, and is only safe because we
    388       // are working on 32 bit executables.
    389       RVA target_rva = (RVA)(origin + (file_offset - origin_offset) +
    390                              relative_target);
    391 
    392       if (! (**current_rel)->EmitInstruction(program, target_rva))
    393         return false;
    394       file_offset += (**current_rel)->op_size();
    395       (*current_rel)++;
    396       continue;
    397     }
    398   }
    399 
    400   // Rest of the section (if any)
    401   return ParseSimpleRegion(file_offset, section_end, program);
    402 }
    403 
    404 CheckBool DisassemblerElf32::ParseSimpleRegion(
    405     size_t start_file_offset,
    406     size_t end_file_offset,
    407     AssemblyProgram* program) {
    408 
    409   const uint8* start = OffsetToPointer(start_file_offset);
    410   const uint8* end = OffsetToPointer(end_file_offset);
    411 
    412   // Callers don't guarantee start < end
    413   if (start >= end) return true;
    414 
    415   const ptrdiff_t len = end - start;  // Works because vars are byte pointers
    416 
    417   if (!program->EmitBytesInstruction(start, len))
    418     return false;
    419 
    420   return true;
    421 }
    422 
    423 CheckBool DisassemblerElf32::ParseAbs32Relocs() {
    424   abs32_locations_.clear();
    425 
    426   // Loop through sections for relocation sections
    427   for (int section_id = 0; section_id < SectionHeaderCount(); section_id++) {
    428     const Elf32_Shdr *section_header = SectionHeader(section_id);
    429 
    430     if (section_header->sh_type == SHT_REL) {
    431 
    432       Elf32_Rel *relocs_table = (Elf32_Rel *)SectionBody(section_id);
    433 
    434       int relocs_table_count = section_header->sh_size /
    435                                section_header->sh_entsize;
    436 
    437       // Elf32_Word relocation_section_id = section_header->sh_info;
    438 
    439       // Loop through relocation objects in the relocation section
    440       for (int rel_id = 0; rel_id < relocs_table_count; rel_id++) {
    441         RVA rva;
    442 
    443         // Quite a few of these conversions fail, and we simply skip
    444         // them, that's okay.
    445         if (RelToRVA(relocs_table[rel_id], &rva) && CheckSection(rva))
    446           abs32_locations_.push_back(rva);
    447       }
    448     }
    449   }
    450 
    451   std::sort(abs32_locations_.begin(), abs32_locations_.end());
    452   return true;
    453 }
    454 
    455 CheckBool DisassemblerElf32::CheckSection(RVA rva) {
    456   size_t offset;
    457 
    458   if (!RVAToFileOffset(rva, &offset)) {
    459     return false;
    460   }
    461 
    462   for (int section_id = 0;
    463        section_id < SectionHeaderCount();
    464        section_id++) {
    465 
    466     const Elf32_Shdr *section_header = SectionHeader(section_id);
    467 
    468     if (offset >= section_header->sh_offset &&
    469         offset < (section_header->sh_offset + section_header->sh_size)) {
    470       switch (section_header->sh_type) {
    471         case SHT_REL:
    472           // Fall-through
    473         case SHT_PROGBITS:
    474           return true;
    475       }
    476     }
    477   }
    478 
    479   return false;
    480 }
    481 
    482 CheckBool DisassemblerElf32::ParseRel32RelocsFromSections() {
    483 
    484   rel32_locations_.clear();
    485 
    486   // Loop through sections for relocation sections
    487   for (int section_id = 0;
    488        section_id < SectionHeaderCount();
    489        section_id++) {
    490 
    491     const Elf32_Shdr *section_header = SectionHeader(section_id);
    492 
    493     if (section_header->sh_type != SHT_PROGBITS)
    494       continue;
    495 
    496     if (!ParseRel32RelocsFromSection(section_header))
    497       return false;
    498   }
    499 
    500   std::sort(rel32_locations_.begin(),
    501             rel32_locations_.end(),
    502             TypedRVA::IsLessThan);
    503   return true;
    504 }
    505 
    506 }  // namespace courgette
    507