Home | History | Annotate | Download | only in mac
      1 // Copyright (c) 2010, Google Inc.
      2 // All rights reserved.
      3 //
      4 // Redistribution and use in source and binary forms, with or without
      5 // modification, are permitted provided that the following conditions are
      6 // met:
      7 //
      8 //     * Redistributions of source code must retain the above copyright
      9 // notice, this list of conditions and the following disclaimer.
     10 //     * Redistributions in binary form must reproduce the above
     11 // copyright notice, this list of conditions and the following disclaimer
     12 // in the documentation and/or other materials provided with the
     13 // distribution.
     14 //     * Neither the name of Google Inc. nor the names of its
     15 // contributors may be used to endorse or promote products derived from
     16 // this software without specific prior written permission.
     17 //
     18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 
     30 // Original author: Jim Blandy <jimb (at) mozilla.com> <jimb (at) red-bean.com>
     31 
     32 // macho_reader.cc: Implementation of google_breakpad::Mach_O::FatReader and
     33 // google_breakpad::Mach_O::Reader. See macho_reader.h for details.
     34 
     35 #include "common/mac/macho_reader.h"
     36 
     37 #include <assert.h>
     38 #include <stdio.h>
     39 #include <stdlib.h>
     40 
     41 // Unfortunately, CPU_TYPE_ARM is not define for 10.4.
     42 #if !defined(CPU_TYPE_ARM)
     43 #define CPU_TYPE_ARM 12
     44 #endif
     45 
     46 #if !defined(CPU_TYPE_ARM_64)
     47 #define CPU_TYPE_ARM_64 16777228
     48 #endif
     49 
     50 namespace google_breakpad {
     51 namespace mach_o {
     52 
     53 // If NDEBUG is #defined, then the 'assert' macro doesn't evaluate its
     54 // arguments, so you can't place expressions that do necessary work in
     55 // the argument of an assert. Nor can you assign the result of the
     56 // expression to a variable and assert that the variable's value is
     57 // true: you'll get unused variable warnings when NDEBUG is #defined.
     58 //
     59 // ASSERT_ALWAYS_EVAL always evaluates its argument, and asserts that
     60 // the result is true if NDEBUG is not #defined.
     61 #if defined(NDEBUG)
     62 #define ASSERT_ALWAYS_EVAL(x) (x)
     63 #else
     64 #define ASSERT_ALWAYS_EVAL(x) assert(x)
     65 #endif
     66 
     67 void FatReader::Reporter::BadHeader() {
     68   fprintf(stderr, "%s: file is neither a fat binary file"
     69           " nor a Mach-O object file\n", filename_.c_str());
     70 }
     71 
     72 void FatReader::Reporter::TooShort() {
     73   fprintf(stderr, "%s: file too short for the data it claims to contain\n",
     74           filename_.c_str());
     75 }
     76 
     77 void FatReader::Reporter::MisplacedObjectFile() {
     78   fprintf(stderr, "%s: file too short for the object files it claims"
     79           " to contain\n", filename_.c_str());
     80 }
     81 
     82 bool FatReader::Read(const uint8_t *buffer, size_t size) {
     83   buffer_.start = buffer;
     84   buffer_.end = buffer + size;
     85   ByteCursor cursor(&buffer_);
     86 
     87   // Fat binaries always use big-endian, so read the magic number in
     88   // that endianness. To recognize Mach-O magic numbers, which can use
     89   // either endianness, check for both the proper and reversed forms
     90   // of the magic numbers.
     91   cursor.set_big_endian(true);
     92   if (cursor >> magic_) {
     93     if (magic_ == FAT_MAGIC) {
     94       // How many object files does this fat binary contain?
     95       uint32_t object_files_count;
     96       if (!(cursor >> object_files_count)) {  // nfat_arch
     97         reporter_->TooShort();
     98         return false;
     99       }
    100 
    101       // Read the list of object files.
    102       object_files_.resize(object_files_count);
    103       for (size_t i = 0; i < object_files_count; i++) {
    104         struct fat_arch *objfile = &object_files_[i];
    105 
    106         // Read this object file entry, byte-swapping as appropriate.
    107         cursor >> objfile->cputype
    108                >> objfile->cpusubtype
    109                >> objfile->offset
    110                >> objfile->size
    111                >> objfile->align;
    112         if (!cursor) {
    113           reporter_->TooShort();
    114           return false;
    115         }
    116         // Does the file actually have the bytes this entry refers to?
    117         size_t fat_size = buffer_.Size();
    118         if (objfile->offset > fat_size ||
    119             objfile->size > fat_size - objfile->offset) {
    120           reporter_->MisplacedObjectFile();
    121           return false;
    122         }
    123       }
    124 
    125       return true;
    126     } else if (magic_ == MH_MAGIC || magic_ == MH_MAGIC_64 ||
    127                magic_ == MH_CIGAM || magic_ == MH_CIGAM_64) {
    128       // If this is a little-endian Mach-O file, fix the cursor's endianness.
    129       if (magic_ == MH_CIGAM || magic_ == MH_CIGAM_64)
    130         cursor.set_big_endian(false);
    131       // Record the entire file as a single entry in the object file list.
    132       object_files_.resize(1);
    133 
    134       // Get the cpu type and subtype from the Mach-O header.
    135       if (!(cursor >> object_files_[0].cputype
    136                    >> object_files_[0].cpusubtype)) {
    137         reporter_->TooShort();
    138         return false;
    139       }
    140 
    141       object_files_[0].offset = 0;
    142       object_files_[0].size = static_cast<uint32_t>(buffer_.Size());
    143       // This alignment is correct for 32 and 64-bit x86 and ppc.
    144       // See get_align in the lipo source for other architectures:
    145       // http://www.opensource.apple.com/source/cctools/cctools-773/misc/lipo.c
    146       object_files_[0].align = 12;  // 2^12 == 4096
    147 
    148       return true;
    149     }
    150   }
    151 
    152   reporter_->BadHeader();
    153   return false;
    154 }
    155 
    156 void Reader::Reporter::BadHeader() {
    157   fprintf(stderr, "%s: file is not a Mach-O object file\n", filename_.c_str());
    158 }
    159 
    160 void Reader::Reporter::CPUTypeMismatch(cpu_type_t cpu_type,
    161                                        cpu_subtype_t cpu_subtype,
    162                                        cpu_type_t expected_cpu_type,
    163                                        cpu_subtype_t expected_cpu_subtype) {
    164   fprintf(stderr, "%s: CPU type %d, subtype %d does not match expected"
    165           " type %d, subtype %d\n",
    166           filename_.c_str(), cpu_type, cpu_subtype,
    167           expected_cpu_type, expected_cpu_subtype);
    168 }
    169 
    170 void Reader::Reporter::HeaderTruncated() {
    171   fprintf(stderr, "%s: file does not contain a complete Mach-O header\n",
    172           filename_.c_str());
    173 }
    174 
    175 void Reader::Reporter::LoadCommandRegionTruncated() {
    176   fprintf(stderr, "%s: file too short to hold load command region"
    177           " given in Mach-O header\n", filename_.c_str());
    178 }
    179 
    180 void Reader::Reporter::LoadCommandsOverrun(size_t claimed, size_t i,
    181                                            LoadCommandType type) {
    182   fprintf(stderr, "%s: file's header claims there are %ld"
    183           " load commands, but load command #%ld",
    184           filename_.c_str(), claimed, i);
    185   if (type) fprintf(stderr, ", of type %d,", type);
    186   fprintf(stderr, " extends beyond the end of the load command region\n");
    187 }
    188 
    189 void Reader::Reporter::LoadCommandTooShort(size_t i, LoadCommandType type) {
    190   fprintf(stderr, "%s: the contents of load command #%ld, of type %d,"
    191           " extend beyond the size given in the load command's header\n",
    192           filename_.c_str(), i, type);
    193 }
    194 
    195 void Reader::Reporter::SectionsMissing(const string &name) {
    196   fprintf(stderr, "%s: the load command for segment '%s'"
    197           " is too short to hold the section headers it claims to have\n",
    198           filename_.c_str(), name.c_str());
    199 }
    200 
    201 void Reader::Reporter::MisplacedSegmentData(const string &name) {
    202   fprintf(stderr, "%s: the segment '%s' claims its contents lie beyond"
    203           " the end of the file\n", filename_.c_str(), name.c_str());
    204 }
    205 
    206 void Reader::Reporter::MisplacedSectionData(const string &section,
    207                                             const string &segment) {
    208   fprintf(stderr, "%s: the section '%s' in segment '%s'"
    209           " claims its contents lie outside the segment's contents\n",
    210           filename_.c_str(), section.c_str(), segment.c_str());
    211 }
    212 
    213 void Reader::Reporter::MisplacedSymbolTable() {
    214   fprintf(stderr, "%s: the LC_SYMTAB load command claims that the symbol"
    215           " table's contents are located beyond the end of the file\n",
    216           filename_.c_str());
    217 }
    218 
    219 void Reader::Reporter::UnsupportedCPUType(cpu_type_t cpu_type) {
    220   fprintf(stderr, "%s: CPU type %d is not supported\n",
    221           filename_.c_str(), cpu_type);
    222 }
    223 
    224 bool Reader::Read(const uint8_t *buffer,
    225                   size_t size,
    226                   cpu_type_t expected_cpu_type,
    227                   cpu_subtype_t expected_cpu_subtype) {
    228   assert(!buffer_.start);
    229   buffer_.start = buffer;
    230   buffer_.end = buffer + size;
    231   ByteCursor cursor(&buffer_, true);
    232   uint32_t magic;
    233   if (!(cursor >> magic)) {
    234     reporter_->HeaderTruncated();
    235     return false;
    236   }
    237 
    238   if (expected_cpu_type != CPU_TYPE_ANY) {
    239     uint32_t expected_magic;
    240     // validate that magic matches the expected cpu type
    241     switch (expected_cpu_type) {
    242       case CPU_TYPE_ARM:
    243       case CPU_TYPE_I386:
    244         expected_magic = MH_CIGAM;
    245         break;
    246       case CPU_TYPE_POWERPC:
    247         expected_magic = MH_MAGIC;
    248         break;
    249       case CPU_TYPE_ARM_64:
    250       case CPU_TYPE_X86_64:
    251         expected_magic = MH_CIGAM_64;
    252         break;
    253       case CPU_TYPE_POWERPC64:
    254         expected_magic = MH_MAGIC_64;
    255         break;
    256       default:
    257         reporter_->UnsupportedCPUType(expected_cpu_type);
    258         return false;
    259     }
    260 
    261     if (expected_magic != magic) {
    262       reporter_->BadHeader();
    263       return false;
    264     }
    265   }
    266 
    267   // Since the byte cursor is in big-endian mode, a reversed magic number
    268   // always indicates a little-endian file, regardless of our own endianness.
    269   switch (magic) {
    270     case MH_MAGIC:    big_endian_ = true;  bits_64_ = false; break;
    271     case MH_CIGAM:    big_endian_ = false; bits_64_ = false; break;
    272     case MH_MAGIC_64: big_endian_ = true;  bits_64_ = true;  break;
    273     case MH_CIGAM_64: big_endian_ = false; bits_64_ = true;  break;
    274     default:
    275       reporter_->BadHeader();
    276       return false;
    277   }
    278   cursor.set_big_endian(big_endian_);
    279   uint32_t commands_size, reserved;
    280   cursor >> cpu_type_ >> cpu_subtype_ >> file_type_ >> load_command_count_
    281          >> commands_size >> flags_;
    282   if (bits_64_)
    283     cursor >> reserved;
    284   if (!cursor) {
    285     reporter_->HeaderTruncated();
    286     return false;
    287   }
    288 
    289   if (expected_cpu_type != CPU_TYPE_ANY &&
    290       (expected_cpu_type != cpu_type_ ||
    291        expected_cpu_subtype != cpu_subtype_)) {
    292     reporter_->CPUTypeMismatch(cpu_type_, cpu_subtype_,
    293                               expected_cpu_type, expected_cpu_subtype);
    294     return false;
    295   }
    296 
    297   cursor
    298       .PointTo(&load_commands_.start, commands_size)
    299       .PointTo(&load_commands_.end, 0);
    300   if (!cursor) {
    301     reporter_->LoadCommandRegionTruncated();
    302     return false;
    303   }
    304 
    305   return true;
    306 }
    307 
    308 bool Reader::WalkLoadCommands(Reader::LoadCommandHandler *handler) const {
    309   ByteCursor list_cursor(&load_commands_, big_endian_);
    310 
    311   for (size_t index = 0; index < load_command_count_; ++index) {
    312     // command refers to this load command alone, so that cursor will
    313     // refuse to read past the load command's end. But since we haven't
    314     // read the size yet, let command initially refer to the entire
    315     // remainder of the load command series.
    316     ByteBuffer command(list_cursor.here(), list_cursor.Available());
    317     ByteCursor cursor(&command, big_endian_);
    318 
    319     // Read the command type and size --- fields common to all commands.
    320     uint32_t type, size;
    321     if (!(cursor >> type)) {
    322       reporter_->LoadCommandsOverrun(load_command_count_, index, 0);
    323       return false;
    324     }
    325     if (!(cursor >> size) || size > command.Size()) {
    326       reporter_->LoadCommandsOverrun(load_command_count_, index, type);
    327       return false;
    328     }
    329 
    330     // Now that we've read the length, restrict command's range to this
    331     // load command only.
    332     command.end = command.start + size;
    333 
    334     switch (type) {
    335       case LC_SEGMENT:
    336       case LC_SEGMENT_64: {
    337         Segment segment;
    338         segment.bits_64 = (type == LC_SEGMENT_64);
    339         size_t word_size = segment.bits_64 ? 8 : 4;
    340         cursor.CString(&segment.name, 16);
    341         size_t file_offset, file_size;
    342         cursor
    343             .Read(word_size, false, &segment.vmaddr)
    344             .Read(word_size, false, &segment.vmsize)
    345             .Read(word_size, false, &file_offset)
    346             .Read(word_size, false, &file_size);
    347         cursor >> segment.maxprot
    348                >> segment.initprot
    349                >> segment.nsects
    350                >> segment.flags;
    351         if (!cursor) {
    352           reporter_->LoadCommandTooShort(index, type);
    353           return false;
    354         }
    355         if (file_offset > buffer_.Size() ||
    356             file_size > buffer_.Size() - file_offset) {
    357           reporter_->MisplacedSegmentData(segment.name);
    358           return false;
    359         }
    360         // Mach-O files in .dSYM bundles have the contents of the loaded
    361         // segments removed, and their file offsets and file sizes zeroed
    362         // out. To help us handle this special case properly, give such
    363         // segments' contents NULL starting and ending pointers.
    364         if (file_offset == 0 && file_size == 0) {
    365           segment.contents.start = segment.contents.end = NULL;
    366         } else {
    367           segment.contents.start = buffer_.start + file_offset;
    368           segment.contents.end = segment.contents.start + file_size;
    369         }
    370         // The section list occupies the remainder of this load command's space.
    371         segment.section_list.start = cursor.here();
    372         segment.section_list.end = command.end;
    373 
    374         if (!handler->SegmentCommand(segment))
    375           return false;
    376         break;
    377       }
    378 
    379       case LC_SYMTAB: {
    380         uint32_t symoff, nsyms, stroff, strsize;
    381         cursor >> symoff >> nsyms >> stroff >> strsize;
    382         if (!cursor) {
    383           reporter_->LoadCommandTooShort(index, type);
    384           return false;
    385         }
    386         // How big are the entries in the symbol table?
    387         // sizeof(struct nlist_64) : sizeof(struct nlist),
    388         // but be paranoid about alignment vs. target architecture.
    389         size_t symbol_size = bits_64_ ? 16 : 12;
    390         // How big is the entire symbol array?
    391         size_t symbols_size = nsyms * symbol_size;
    392         if (symoff > buffer_.Size() || symbols_size > buffer_.Size() - symoff ||
    393             stroff > buffer_.Size() || strsize > buffer_.Size() - stroff) {
    394           reporter_->MisplacedSymbolTable();
    395           return false;
    396         }
    397         ByteBuffer entries(buffer_.start + symoff, symbols_size);
    398         ByteBuffer names(buffer_.start + stroff, strsize);
    399         if (!handler->SymtabCommand(entries, names))
    400           return false;
    401         break;
    402       }
    403 
    404       default: {
    405         if (!handler->UnknownCommand(type, command))
    406           return false;
    407         break;
    408       }
    409     }
    410 
    411     list_cursor.set_here(command.end);
    412   }
    413 
    414   return true;
    415 }
    416 
    417 // A load command handler that looks for a segment of a given name.
    418 class Reader::SegmentFinder : public LoadCommandHandler {
    419  public:
    420   // Create a load command handler that looks for a segment named NAME,
    421   // and sets SEGMENT to describe it if found.
    422   SegmentFinder(const string &name, Segment *segment)
    423       : name_(name), segment_(segment), found_() { }
    424 
    425   // Return true if the traversal found the segment, false otherwise.
    426   bool found() const { return found_; }
    427 
    428   bool SegmentCommand(const Segment &segment) {
    429     if (segment.name == name_) {
    430       *segment_ = segment;
    431       found_ = true;
    432       return false;
    433     }
    434     return true;
    435   }
    436 
    437  private:
    438   // The name of the segment our creator is looking for.
    439   const string &name_;
    440 
    441   // Where we should store the segment if found. (WEAK)
    442   Segment *segment_;
    443 
    444   // True if we found the segment.
    445   bool found_;
    446 };
    447 
    448 bool Reader::FindSegment(const string &name, Segment *segment) const {
    449   SegmentFinder finder(name, segment);
    450   WalkLoadCommands(&finder);
    451   return finder.found();
    452 }
    453 
    454 bool Reader::WalkSegmentSections(const Segment &segment,
    455                                  SectionHandler *handler) const {
    456   size_t word_size = segment.bits_64 ? 8 : 4;
    457   ByteCursor cursor(&segment.section_list, big_endian_);
    458 
    459   for (size_t i = 0; i < segment.nsects; i++) {
    460     Section section;
    461     section.bits_64 = segment.bits_64;
    462     uint64_t size;
    463     uint32_t offset, dummy32;
    464     cursor
    465         .CString(&section.section_name, 16)
    466         .CString(&section.segment_name, 16)
    467         .Read(word_size, false, &section.address)
    468         .Read(word_size, false, &size)
    469         >> offset
    470         >> section.align
    471         >> dummy32
    472         >> dummy32
    473         >> section.flags
    474         >> dummy32
    475         >> dummy32;
    476     if (section.bits_64)
    477       cursor >> dummy32;
    478     if (!cursor) {
    479       reporter_->SectionsMissing(segment.name);
    480       return false;
    481     }
    482     if ((section.flags & SECTION_TYPE) == S_ZEROFILL) {
    483       // Zero-fill sections have a size, but no contents.
    484       section.contents.start = section.contents.end = NULL;
    485     } else if (segment.contents.start == NULL &&
    486                segment.contents.end == NULL) {
    487       // Mach-O files in .dSYM bundles have the contents of the loaded
    488       // segments removed, and their file offsets and file sizes zeroed
    489       // out.  However, the sections within those segments still have
    490       // non-zero sizes.  There's no reason to call MisplacedSectionData in
    491       // this case; the caller may just need the section's load
    492       // address. But do set the contents' limits to NULL, for safety.
    493       section.contents.start = section.contents.end = NULL;
    494     } else {
    495       if (offset < size_t(segment.contents.start - buffer_.start) ||
    496           offset > size_t(segment.contents.end - buffer_.start) ||
    497           size > size_t(segment.contents.end - buffer_.start - offset)) {
    498         reporter_->MisplacedSectionData(section.section_name,
    499                                         section.segment_name);
    500         return false;
    501       }
    502       section.contents.start = buffer_.start + offset;
    503       section.contents.end = section.contents.start + size;
    504     }
    505     if (!handler->HandleSection(section))
    506       return false;
    507   }
    508   return true;
    509 }
    510 
    511 // A SectionHandler that builds a SectionMap for the sections within a
    512 // given segment.
    513 class Reader::SectionMapper: public SectionHandler {
    514  public:
    515   // Create a SectionHandler that populates MAP with an entry for
    516   // each section it is given.
    517   SectionMapper(SectionMap *map) : map_(map) { }
    518   bool HandleSection(const Section &section) {
    519     (*map_)[section.section_name] = section;
    520     return true;
    521   }
    522  private:
    523   // The map under construction. (WEAK)
    524   SectionMap *map_;
    525 };
    526 
    527 bool Reader::MapSegmentSections(const Segment &segment,
    528                                 SectionMap *section_map) const {
    529   section_map->clear();
    530   SectionMapper mapper(section_map);
    531   return WalkSegmentSections(segment, &mapper);
    532 }
    533 
    534 }  // namespace mach_o
    535 }  // namespace google_breakpad
    536