Home | History | Annotate | Download | only in mac
      1 // -*- mode: C++ -*-
      2 
      3 // Copyright (c) 2010, Google Inc.
      4 // All rights reserved.
      5 //
      6 // Redistribution and use in source and binary forms, with or without
      7 // modification, are permitted provided that the following conditions are
      8 // met:
      9 //
     10 //     * Redistributions of source code must retain the above copyright
     11 // notice, this list of conditions and the following disclaimer.
     12 //     * Redistributions in binary form must reproduce the above
     13 // copyright notice, this list of conditions and the following disclaimer
     14 // in the documentation and/or other materials provided with the
     15 // distribution.
     16 //     * Neither the name of Google Inc. nor the names of its
     17 // contributors may be used to endorse or promote products derived from
     18 // this software without specific prior written permission.
     19 //
     20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     31 
     32 // Original author: Jim Blandy <jimb (at) mozilla.com> <jimb (at) red-bean.com>
     33 
     34 // macho_reader.h: A class for parsing Mach-O files.
     35 
     36 #ifndef BREAKPAD_COMMON_MAC_MACHO_READER_H_
     37 #define BREAKPAD_COMMON_MAC_MACHO_READER_H_
     38 
     39 #include <mach-o/loader.h>
     40 #include <mach-o/fat.h>
     41 #include <stdint.h>
     42 #include <stdlib.h>
     43 #include <unistd.h>
     44 
     45 #include <map>
     46 #include <string>
     47 #include <vector>
     48 
     49 #include "common/byte_cursor.h"
     50 
     51 namespace google_breakpad {
     52 namespace mach_o {
     53 
     54 using std::map;
     55 using std::string;
     56 using std::vector;
     57 
     58 // The Mac headers don't specify particular types for these groups of
     59 // constants, but defining them here provides some documentation
     60 // value.  We also give them the same width as the fields in which
     61 // they appear, which makes them a bit easier to use with ByteCursors.
     62 typedef uint32_t Magic;
     63 typedef uint32_t FileType;
     64 typedef uint32_t FileFlags;
     65 typedef uint32_t LoadCommandType;
     66 typedef uint32_t SegmentFlags;
     67 typedef uint32_t SectionFlags;
     68 
     69 // A parser for fat binary files, used to store universal binaries.
     70 // When applied to a (non-fat) Mach-O file, this behaves as if the
     71 // file were a fat file containing a single object file.
     72 class FatReader {
     73  public:
     74 
     75   // A class for reporting errors found while parsing fat binary files. The
     76   // default definitions of these methods print messages to stderr.
     77   class Reporter {
     78    public:
     79     // Create a reporter that attributes problems to |filename|.
     80     explicit Reporter(const string &filename) : filename_(filename) { }
     81 
     82     virtual ~Reporter() { }
     83 
     84     // The data does not begin with a fat binary or Mach-O magic number.
     85     // This is a fatal error.
     86     virtual void BadHeader();
     87 
     88     // The Mach-O fat binary file ends abruptly, without enough space
     89     // to contain an object file it claims is present.
     90     virtual void MisplacedObjectFile();
     91 
     92     // The file ends abruptly: either it is not large enough to hold a
     93     // complete header, or the header implies that contents are present
     94     // beyond the actual end of the file.
     95     virtual void TooShort();
     96 
     97    private:
     98     // The filename to which the reader should attribute problems.
     99     string filename_;
    100   };
    101 
    102   // Create a fat binary file reader that uses |reporter| to report problems.
    103   explicit FatReader(Reporter *reporter) : reporter_(reporter) { }
    104 
    105   // Read the |size| bytes at |buffer| as a fat binary file. On success,
    106   // return true; on failure, report the problem to reporter_ and return
    107   // false.
    108   //
    109   // If the data is a plain Mach-O file, rather than a fat binary file,
    110   // then the reader behaves as if it had found a fat binary file whose
    111   // single object file is the Mach-O file.
    112   bool Read(const uint8_t *buffer, size_t size);
    113 
    114   // Return an array of 'struct fat_arch' structures describing the
    115   // object files present in this fat binary file. Set |size| to the
    116   // number of elements in the array.
    117   //
    118   // Assuming Read returned true, the entries are validated: it is
    119   // safe to assume that the offsets and sizes in each 'struct
    120   // fat_arch' refer to subranges of the bytes passed to Read.
    121   //
    122   // If there are no object files in this fat binary, then this
    123   // function can return NULL.
    124   //
    125   // The array is owned by this FatReader instance; it will be freed when
    126   // this FatReader is destroyed.
    127   //
    128   // This function returns a C-style array instead of a vector to make it
    129   // possible to use the result with OS X functions like NXFindBestFatArch,
    130   // so that the symbol dumper will behave consistently with other OS X
    131   // utilities that work with fat binaries.
    132   const struct fat_arch *object_files(size_t *count) const {
    133     *count = object_files_.size();
    134     if (object_files_.size() > 0)
    135       return &object_files_[0];
    136     return NULL;
    137   }
    138 
    139  private:
    140   // We use this to report problems parsing the file's contents. (WEAK)
    141   Reporter *reporter_;
    142 
    143   // The contents of the fat binary or Mach-O file we're parsing. We do not
    144   // own the storage it refers to.
    145   ByteBuffer buffer_;
    146 
    147   // The magic number of this binary, in host byte order.
    148   Magic magic_;
    149 
    150   // The list of object files in this binary.
    151   // object_files_.size() == fat_header.nfat_arch
    152   vector<struct fat_arch> object_files_;
    153 };
    154 
    155 // A segment in a Mach-O file. All these fields have been byte-swapped as
    156 // appropriate for use by the executing architecture.
    157 struct Segment {
    158   // The ByteBuffers below point into the bytes passed to the Reader that
    159   // created this Segment.
    160 
    161   ByteBuffer section_list;    // This segment's section list.
    162   ByteBuffer contents;        // This segment's contents.
    163 
    164   // This segment's name.
    165   string name;
    166 
    167   // The address at which this segment should be loaded in memory. If
    168   // bits_64 is false, only the bottom 32 bits of this value are valid.
    169   uint64_t vmaddr;
    170 
    171   // The size of this segment when loaded into memory. This may be larger
    172   // than contents.Size(), in which case the extra area will be
    173   // initialized with zeros. If bits_64 is false, only the bottom 32 bits
    174   // of this value are valid.
    175   uint64_t vmsize;
    176 
    177   // The maximum and initial VM protection of this segment's contents.
    178   uint32_t maxprot;
    179   uint32_t initprot;
    180 
    181   // The number of sections in section_list.
    182   uint32_t nsects;
    183 
    184   // Flags describing this segment, from SegmentFlags.
    185   uint32_t flags;
    186 
    187   // True if this is a 64-bit section; false if it is a 32-bit section.
    188   bool bits_64;
    189 };
    190 
    191 // A section in a Mach-O file. All these fields have been byte-swapped as
    192 // appropriate for use by the executing architecture.
    193 struct Section {
    194   // This section's contents. This points into the bytes passed to the
    195   // Reader that created this Section.
    196   ByteBuffer contents;
    197 
    198   // This section's name.
    199   string section_name;  // section[_64].sectname
    200   // The name of the segment this section belongs to.
    201   string segment_name;  // section[_64].segname
    202 
    203   // The address at which this section's contents should be loaded in
    204   // memory. If bits_64 is false, only the bottom 32 bits of this value
    205   // are valid.
    206   uint64_t address;
    207 
    208   // The contents of this section should be loaded into memory at an
    209   // address which is a multiple of (two raised to this power).
    210   uint32_t align;
    211 
    212   // Flags from SectionFlags describing the section's contents.
    213   uint32_t flags;
    214 
    215   // We don't support reading relocations yet.
    216 
    217   // True if this is a 64-bit section; false if it is a 32-bit section.
    218   bool bits_64;
    219 };
    220 
    221 // A map from section names to Sections.
    222 typedef map<string, Section> SectionMap;
    223 
    224 // A reader for a Mach-O file.
    225 //
    226 // This does not handle fat binaries; see FatReader above. FatReader
    227 // provides a friendly interface for parsing data that could be either a
    228 // fat binary or a Mach-O file.
    229 class Reader {
    230  public:
    231 
    232   // A class for reporting errors found while parsing Mach-O files. The
    233   // default definitions of these member functions print messages to
    234   // stderr.
    235   class Reporter {
    236    public:
    237     // Create a reporter that attributes problems to |filename|.
    238     explicit Reporter(const string &filename) : filename_(filename) { }
    239     virtual ~Reporter() { }
    240 
    241     // Reporter functions for fatal errors return void; the reader will
    242     // definitely return an error to its caller after calling them
    243 
    244     // The data does not begin with a Mach-O magic number, or the magic
    245     // number does not match the expected value for the cpu architecture.
    246     // This is a fatal error.
    247     virtual void BadHeader();
    248 
    249     // The data contained in a Mach-O fat binary (|cpu_type|, |cpu_subtype|)
    250     // does not match the expected CPU architecture
    251     // (|expected_cpu_type|, |expected_cpu_subtype|).
    252     virtual void CPUTypeMismatch(cpu_type_t cpu_type,
    253                                  cpu_subtype_t cpu_subtype,
    254                                  cpu_type_t expected_cpu_type,
    255                                  cpu_subtype_t expected_cpu_subtype);
    256 
    257     // The file ends abruptly: either it is not large enough to hold a
    258     // complete header, or the header implies that contents are present
    259     // beyond the actual end of the file.
    260     virtual void HeaderTruncated();
    261 
    262     // The file's load command region, as given in the Mach-O header, is
    263     // too large for the file.
    264     virtual void LoadCommandRegionTruncated();
    265 
    266     // The file's Mach-O header claims the file contains |claimed| load
    267     // commands, but the I'th load command, of type |type|, extends beyond
    268     // the end of the load command region, as given by the Mach-O header.
    269     // If |type| is zero, the command's type was unreadable.
    270     virtual void LoadCommandsOverrun(size_t claimed, size_t i,
    271                                      LoadCommandType type);
    272 
    273     // The contents of the |i|'th load command, of type |type|, extend beyond
    274     // the size given in the load command's header.
    275     virtual void LoadCommandTooShort(size_t i, LoadCommandType type);
    276 
    277     // The LC_SEGMENT or LC_SEGMENT_64 load command for the segment named
    278     // |name| is too short to hold the sections that its header says it does.
    279     // (This more specific than LoadCommandTooShort.)
    280     virtual void SectionsMissing(const string &name);
    281 
    282     // The segment named |name| claims that its contents lie beyond the end
    283     // of the file.
    284     virtual void MisplacedSegmentData(const string &name);
    285 
    286     // The section named |section| in the segment named |segment| claims that
    287     // its contents do not lie entirely within the segment.
    288     virtual void MisplacedSectionData(const string &section,
    289                                       const string &segment);
    290 
    291     // The LC_SYMTAB command claims that symbol table contents are located
    292     // beyond the end of the file.
    293     virtual void MisplacedSymbolTable();
    294 
    295     // An attempt was made to read a Mach-O file of the unsupported
    296     // CPU architecture |cpu_type|.
    297     virtual void UnsupportedCPUType(cpu_type_t cpu_type);
    298 
    299    private:
    300     string filename_;
    301   };
    302 
    303   // A handler for sections parsed from a segment. The WalkSegmentSections
    304   // member function accepts an instance of this class, and applies it to
    305   // each section defined in a given segment.
    306   class SectionHandler {
    307    public:
    308     virtual ~SectionHandler() { }
    309 
    310     // Called to report that the segment's section list contains |section|.
    311     // This should return true if the iteration should continue, or false
    312     // if it should stop.
    313     virtual bool HandleSection(const Section &section) = 0;
    314   };
    315 
    316   // A handler for the load commands in a Mach-O file.
    317   class LoadCommandHandler {
    318    public:
    319     LoadCommandHandler() { }
    320     virtual ~LoadCommandHandler() { }
    321 
    322     // When called from WalkLoadCommands, the following handler functions
    323     // should return true if they wish to continue iterating over the load
    324     // command list, or false if they wish to stop iterating.
    325     //
    326     // When called from LoadCommandIterator::Handle or Reader::Handle,
    327     // these functions' return values are simply passed through to Handle's
    328     // caller.
    329     //
    330     // The definitions provided by this base class simply return true; the
    331     // default is to silently ignore sections whose member functions the
    332     // subclass doesn't override.
    333 
    334     // COMMAND is load command we don't recognize. We provide only the
    335     // command type and a ByteBuffer enclosing the command's data (If we
    336     // cannot parse the command type or its size, we call
    337     // reporter_->IncompleteLoadCommand instead.)
    338     virtual bool UnknownCommand(LoadCommandType type,
    339                                 const ByteBuffer &contents) {
    340       return true;
    341     }
    342 
    343     // The load command is LC_SEGMENT or LC_SEGMENT_64, defining a segment
    344     // with the properties given in |segment|.
    345     virtual bool SegmentCommand(const Segment &segment) {
    346       return true;
    347     }
    348 
    349     // The load command is LC_SYMTAB. |entries| holds the array of nlist
    350     // entries, and |names| holds the strings the entries refer to.
    351     virtual bool SymtabCommand(const ByteBuffer &entries,
    352                                const ByteBuffer &names) {
    353       return true;
    354     }
    355 
    356     // Add handler functions for more load commands here as needed.
    357   };
    358 
    359   // Create a Mach-O file reader that reports problems to |reporter|.
    360   explicit Reader(Reporter *reporter)
    361       : reporter_(reporter) { }
    362 
    363   // Read the given data as a Mach-O file. The reader retains pointers
    364   // into the data passed, so the data should live as long as the reader
    365   // does. On success, return true; on failure, return false.
    366   //
    367   // At most one of these functions should be invoked once on each Reader
    368   // instance.
    369   bool Read(const uint8_t *buffer,
    370             size_t size,
    371             cpu_type_t expected_cpu_type,
    372             cpu_subtype_t expected_cpu_subtype);
    373   bool Read(const ByteBuffer &buffer,
    374             cpu_type_t expected_cpu_type,
    375             cpu_subtype_t expected_cpu_subtype) {
    376     return Read(buffer.start,
    377                 buffer.Size(),
    378                 expected_cpu_type,
    379                 expected_cpu_subtype);
    380   }
    381 
    382   // Return this file's characteristics, as found in the Mach-O header.
    383   cpu_type_t    cpu_type()    const { return cpu_type_; }
    384   cpu_subtype_t cpu_subtype() const { return cpu_subtype_; }
    385   FileType      file_type()   const { return file_type_; }
    386   FileFlags     flags()       const { return flags_; }
    387 
    388   // Return true if this is a 64-bit Mach-O file, false if it is a 32-bit
    389   // Mach-O file.
    390   bool bits_64() const { return bits_64_; }
    391 
    392   // Return true if this is a big-endian Mach-O file, false if it is
    393   // little-endian.
    394   bool big_endian() const { return big_endian_; }
    395 
    396   // Apply |handler| to each load command in this Mach-O file, stopping when
    397   // a handler function returns false. If we encounter a malformed load
    398   // command, report it via reporter_ and return false. Return true if all
    399   // load commands were parseable and all handlers returned true.
    400   bool WalkLoadCommands(LoadCommandHandler *handler) const;
    401 
    402   // Set |segment| to describe the segment named |name|, if present. If
    403   // found, |segment|'s byte buffers refer to a subregion of the bytes
    404   // passed to Read. If we find the section, return true; otherwise,
    405   // return false.
    406   bool FindSegment(const string &name, Segment *segment) const;
    407 
    408   // Apply |handler| to each section defined in |segment|. If |handler| returns
    409   // false, stop iterating and return false. If all calls to |handler| return
    410   // true and we reach the end of the section list, return true.
    411   bool WalkSegmentSections(const Segment &segment, SectionHandler *handler)
    412     const;
    413 
    414   // Clear |section_map| and then populate it with a map of the sections
    415   // in |segment|, from section names to Section structures.
    416   // Each Section's contents refer to bytes in |segment|'s contents.
    417   // On success, return true; if a problem occurs, report it and return false.
    418   bool MapSegmentSections(const Segment &segment, SectionMap *section_map)
    419     const;
    420 
    421  private:
    422   // Used internally.
    423   class SegmentFinder;
    424   class SectionMapper;
    425 
    426   // We use this to report problems parsing the file's contents. (WEAK)
    427   Reporter *reporter_;
    428 
    429   // The contents of the Mach-O file we're parsing. We do not own the
    430   // storage it refers to.
    431   ByteBuffer buffer_;
    432 
    433   // True if this file is big-endian.
    434   bool big_endian_;
    435 
    436   // True if this file is a 64-bit Mach-O file.
    437   bool bits_64_;
    438 
    439   // This file's cpu type and subtype.
    440   cpu_type_t cpu_type_;        // mach_header[_64].cputype
    441   cpu_subtype_t cpu_subtype_;  // mach_header[_64].cpusubtype
    442 
    443   // This file's type.
    444   FileType file_type_;         // mach_header[_64].filetype
    445 
    446   // The region of buffer_ occupied by load commands.
    447   ByteBuffer load_commands_;
    448 
    449   // The number of load commands in load_commands_.
    450   uint32_t load_command_count_;  // mach_header[_64].ncmds
    451 
    452   // This file's header flags.
    453   FileFlags flags_;
    454 };
    455 
    456 }  // namespace mach_o
    457 }  // namespace google_breakpad
    458 
    459 #endif  // BREAKPAD_COMMON_MAC_MACHO_READER_H_
    460