Home | History | Annotate | Download | only in mac
      1 // Copyright (c) 2006, Google Inc.
      2 // All rights reserved.
      3 //
      4 // Redistribution and use in source and binary forms, with or without
      5 // modification, are permitted provided that the following conditions are
      6 // met:
      7 //
      8 //     * Redistributions of source code must retain the above copyright
      9 // notice, this list of conditions and the following disclaimer.
     10 //     * Redistributions in binary form must reproduce the above
     11 // copyright notice, this list of conditions and the following disclaimer
     12 // in the documentation and/or other materials provided with the
     13 // distribution.
     14 //     * Neither the name of Google Inc. nor the names of its
     15 // contributors may be used to endorse or promote products derived from
     16 // this software without specific prior written permission.
     17 //
     18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 
     30 // macho_id.cc: Functions to gather identifying information from a macho file
     31 //
     32 // See macho_id.h for documentation
     33 //
     34 // Author: Dan Waylonis
     35 
     36 extern "C" {  // necessary for Leopard
     37   #include <fcntl.h>
     38   #include <mach-o/loader.h>
     39   #include <mach-o/swap.h>
     40   #include <stdio.h>
     41   #include <stdlib.h>
     42   #include <string.h>
     43   #include <sys/time.h>
     44   #include <sys/types.h>
     45   #include <unistd.h>
     46 }
     47 
     48 #include "common/mac/macho_id.h"
     49 #include "common/mac/macho_walker.h"
     50 #include "common/mac/macho_utilities.h"
     51 
     52 namespace MacFileUtilities {
     53 
     54 using google_breakpad::MD5Init;
     55 using google_breakpad::MD5Update;
     56 using google_breakpad::MD5Final;
     57 
     58 MachoID::MachoID(const char *path)
     59    : memory_(0),
     60      memory_size_(0),
     61      crc_(0),
     62      md5_context_(),
     63      update_function_(NULL) {
     64   strlcpy(path_, path, sizeof(path_));
     65 }
     66 
     67 MachoID::MachoID(const char *path, void *memory, size_t size)
     68    : memory_(memory),
     69      memory_size_(size),
     70      crc_(0),
     71      md5_context_(),
     72      update_function_(NULL) {
     73   strlcpy(path_, path, sizeof(path_));
     74 }
     75 
     76 MachoID::~MachoID() {
     77 }
     78 
     79 // The CRC info is from http://en.wikipedia.org/wiki/Adler-32
     80 // With optimizations from http://www.zlib.net/
     81 
     82 // The largest prime smaller than 65536
     83 #define MOD_ADLER 65521
     84 // MAX_BLOCK is the largest n such that 255n(n+1)/2 + (n+1)(MAX_BLOCK-1) <= 2^32-1
     85 #define MAX_BLOCK 5552
     86 
     87 void MachoID::UpdateCRC(unsigned char *bytes, size_t size) {
     88 // Unrolled loops for summing
     89 #define DO1(buf,i)  {sum1 += (buf)[i]; sum2 += sum1;}
     90 #define DO2(buf,i)  DO1(buf,i); DO1(buf,i+1);
     91 #define DO4(buf,i)  DO2(buf,i); DO2(buf,i+2);
     92 #define DO8(buf,i)  DO4(buf,i); DO4(buf,i+4);
     93 #define DO16(buf)   DO8(buf,0); DO8(buf,8);
     94   // Split up the crc
     95   uint32_t sum1 = crc_ & 0xFFFF;
     96   uint32_t sum2 = (crc_ >> 16) & 0xFFFF;
     97 
     98   // Do large blocks
     99   while (size >= MAX_BLOCK) {
    100     size -= MAX_BLOCK;
    101     int block_count = MAX_BLOCK / 16;
    102     do {
    103       DO16(bytes);
    104       bytes += 16;
    105     } while (--block_count);
    106     sum1 %= MOD_ADLER;
    107     sum2 %= MOD_ADLER;
    108   }
    109 
    110   // Do remaining bytes
    111   if (size) {
    112     while (size >= 16) {
    113       size -= 16;
    114       DO16(bytes);
    115       bytes += 16;
    116     }
    117     while (size--) {
    118       sum1 += *bytes++;
    119       sum2 += sum1;
    120     }
    121     sum1 %= MOD_ADLER;
    122     sum2 %= MOD_ADLER;
    123     crc_ = (sum2 << 16) | sum1;
    124   }
    125 }
    126 
    127 void MachoID::UpdateMD5(unsigned char *bytes, size_t size) {
    128   MD5Update(&md5_context_, bytes, static_cast<unsigned>(size));
    129 }
    130 
    131 void MachoID::Update(MachoWalker *walker, off_t offset, size_t size) {
    132   if (!update_function_ || !size)
    133     return;
    134 
    135   // Read up to 4k bytes at a time
    136   unsigned char buffer[4096];
    137   size_t buffer_size;
    138   off_t file_offset = offset;
    139   while (size > 0) {
    140     if (size > sizeof(buffer)) {
    141       buffer_size = sizeof(buffer);
    142       size -= buffer_size;
    143     } else {
    144       buffer_size = size;
    145       size = 0;
    146     }
    147 
    148     if (!walker->ReadBytes(buffer, buffer_size, file_offset))
    149       return;
    150 
    151     (this->*update_function_)(buffer, buffer_size);
    152     file_offset += buffer_size;
    153   }
    154 }
    155 
    156 bool MachoID::UUIDCommand(cpu_type_t cpu_type,
    157                           cpu_subtype_t cpu_subtype,
    158                           unsigned char bytes[16]) {
    159   struct breakpad_uuid_command uuid_cmd;
    160   uuid_cmd.cmd = 0;
    161   if (!WalkHeader(cpu_type, cpu_subtype, UUIDWalkerCB, &uuid_cmd))
    162     return false;
    163 
    164   // If we found the command, we'll have initialized the uuid_command
    165   // structure
    166   if (uuid_cmd.cmd == LC_UUID) {
    167     memcpy(bytes, uuid_cmd.uuid, sizeof(uuid_cmd.uuid));
    168     return true;
    169   }
    170 
    171   return false;
    172 }
    173 
    174 bool MachoID::IDCommand(cpu_type_t cpu_type,
    175                         cpu_subtype_t cpu_subtype,
    176                         unsigned char identifier[16]) {
    177   struct dylib_command dylib_cmd;
    178   dylib_cmd.cmd = 0;
    179   if (!WalkHeader(cpu_type, cpu_subtype, IDWalkerCB, &dylib_cmd))
    180     return false;
    181 
    182   // If we found the command, we'll have initialized the dylib_command
    183   // structure
    184   if (dylib_cmd.cmd == LC_ID_DYLIB) {
    185     // Take the hashed filename, version, and compatability version bytes
    186     // to form the first 12 bytes, pad the rest with zeros
    187 
    188     // create a crude hash of the filename to generate the first 4 bytes
    189     identifier[0] = 0;
    190     identifier[1] = 0;
    191     identifier[2] = 0;
    192     identifier[3] = 0;
    193 
    194     for (int j = 0, i = (int)strlen(path_)-1; i>=0 && path_[i]!='/'; ++j, --i) {
    195       identifier[j%4] += path_[i];
    196     }
    197 
    198     identifier[4] = (dylib_cmd.dylib.current_version >> 24) & 0xFF;
    199     identifier[5] = (dylib_cmd.dylib.current_version >> 16) & 0xFF;
    200     identifier[6] = (dylib_cmd.dylib.current_version >> 8) & 0xFF;
    201     identifier[7] = dylib_cmd.dylib.current_version & 0xFF;
    202     identifier[8] = (dylib_cmd.dylib.compatibility_version >> 24) & 0xFF;
    203     identifier[9] = (dylib_cmd.dylib.compatibility_version >> 16) & 0xFF;
    204     identifier[10] = (dylib_cmd.dylib.compatibility_version >> 8) & 0xFF;
    205     identifier[11] = dylib_cmd.dylib.compatibility_version & 0xFF;
    206     identifier[12] = (cpu_type >> 24) & 0xFF;
    207     identifier[13] = (cpu_type >> 16) & 0xFF;
    208     identifier[14] = (cpu_type >> 8) & 0xFF;
    209     identifier[15] = cpu_type & 0xFF;
    210 
    211     return true;
    212   }
    213 
    214   return false;
    215 }
    216 
    217 uint32_t MachoID::Adler32(cpu_type_t cpu_type, cpu_subtype_t cpu_subtype) {
    218   update_function_ = &MachoID::UpdateCRC;
    219   crc_ = 0;
    220 
    221   if (!WalkHeader(cpu_type, cpu_subtype, WalkerCB, this))
    222     return 0;
    223 
    224   return crc_;
    225 }
    226 
    227 bool MachoID::MD5(cpu_type_t cpu_type, cpu_subtype_t cpu_subtype, unsigned char identifier[16]) {
    228   update_function_ = &MachoID::UpdateMD5;
    229 
    230   MD5Init(&md5_context_);
    231 
    232   if (!WalkHeader(cpu_type, cpu_subtype, WalkerCB, this))
    233     return false;
    234 
    235   MD5Final(identifier, &md5_context_);
    236   return true;
    237 }
    238 
    239 bool MachoID::WalkHeader(cpu_type_t cpu_type,
    240                          cpu_subtype_t cpu_subtype,
    241                          MachoWalker::LoadCommandCallback callback,
    242                          void *context) {
    243   if (memory_) {
    244     MachoWalker walker(memory_, memory_size_, callback, context);
    245     return walker.WalkHeader(cpu_type, cpu_subtype);
    246   } else {
    247     MachoWalker walker(path_, callback, context);
    248     return walker.WalkHeader(cpu_type, cpu_subtype);
    249   }
    250 }
    251 
    252 // static
    253 bool MachoID::WalkerCB(MachoWalker *walker, load_command *cmd, off_t offset,
    254                        bool swap, void *context) {
    255   MachoID *macho_id = (MachoID *)context;
    256 
    257   if (cmd->cmd == LC_SEGMENT) {
    258     struct segment_command seg;
    259 
    260     if (!walker->ReadBytes(&seg, sizeof(seg), offset))
    261       return false;
    262 
    263     if (swap)
    264       swap_segment_command(&seg, NXHostByteOrder());
    265 
    266     struct mach_header_64 header;
    267     off_t header_offset;
    268 
    269     if (!walker->CurrentHeader(&header, &header_offset))
    270       return false;
    271 
    272     // Process segments that have sections:
    273     // (e.g., __TEXT, __DATA, __IMPORT, __OBJC)
    274     offset += sizeof(struct segment_command);
    275     struct section sec;
    276     for (unsigned long i = 0; i < seg.nsects; ++i) {
    277       if (!walker->ReadBytes(&sec, sizeof(sec), offset))
    278         return false;
    279 
    280       if (swap)
    281         swap_section(&sec, 1, NXHostByteOrder());
    282 
    283       // sections of type S_ZEROFILL are "virtual" and contain no data
    284       // in the file itself
    285       if ((sec.flags & SECTION_TYPE) != S_ZEROFILL && sec.offset != 0)
    286         macho_id->Update(walker, header_offset + sec.offset, sec.size);
    287 
    288       offset += sizeof(struct section);
    289     }
    290   } else if (cmd->cmd == LC_SEGMENT_64) {
    291     struct segment_command_64 seg64;
    292 
    293     if (!walker->ReadBytes(&seg64, sizeof(seg64), offset))
    294       return false;
    295 
    296     if (swap)
    297       breakpad_swap_segment_command_64(&seg64, NXHostByteOrder());
    298 
    299     struct mach_header_64 header;
    300     off_t header_offset;
    301 
    302     if (!walker->CurrentHeader(&header, &header_offset))
    303       return false;
    304 
    305     // Process segments that have sections:
    306     // (e.g., __TEXT, __DATA, __IMPORT, __OBJC)
    307     offset += sizeof(struct segment_command_64);
    308     struct section_64 sec64;
    309     for (unsigned long i = 0; i < seg64.nsects; ++i) {
    310       if (!walker->ReadBytes(&sec64, sizeof(sec64), offset))
    311         return false;
    312 
    313       if (swap)
    314         breakpad_swap_section_64(&sec64, 1, NXHostByteOrder());
    315 
    316       // sections of type S_ZEROFILL are "virtual" and contain no data
    317       // in the file itself
    318       if ((sec64.flags & SECTION_TYPE) != S_ZEROFILL && sec64.offset != 0)
    319         macho_id->Update(walker,
    320                          header_offset + sec64.offset,
    321                          (size_t)sec64.size);
    322 
    323       offset += sizeof(struct section_64);
    324     }
    325   }
    326 
    327   // Continue processing
    328   return true;
    329 }
    330 
    331 // static
    332 bool MachoID::UUIDWalkerCB(MachoWalker *walker, load_command *cmd, off_t offset,
    333                            bool swap, void *context) {
    334   if (cmd->cmd == LC_UUID) {
    335     struct breakpad_uuid_command *uuid_cmd =
    336       (struct breakpad_uuid_command *)context;
    337 
    338     if (!walker->ReadBytes(uuid_cmd, sizeof(struct breakpad_uuid_command),
    339                            offset))
    340       return false;
    341 
    342     if (swap)
    343       breakpad_swap_uuid_command(uuid_cmd, NXHostByteOrder());
    344 
    345     return false;
    346   }
    347 
    348   // Continue processing
    349   return true;
    350 }
    351 
    352 // static
    353 bool MachoID::IDWalkerCB(MachoWalker *walker, load_command *cmd, off_t offset,
    354                          bool swap, void *context) {
    355   if (cmd->cmd == LC_ID_DYLIB) {
    356     struct dylib_command *dylib_cmd = (struct dylib_command *)context;
    357 
    358     if (!walker->ReadBytes(dylib_cmd, sizeof(struct dylib_command), offset))
    359       return false;
    360 
    361     if (swap)
    362       swap_dylib_command(dylib_cmd, NXHostByteOrder());
    363 
    364     return false;
    365   }
    366 
    367   // Continue processing
    368   return true;
    369 }
    370 
    371 }  // namespace MacFileUtilities
    372