Home | History | Annotate | Download | only in handler
      1 // Copyright (c) 2007, Google Inc.
      2 // All rights reserved.
      3 //
      4 // Redistribution and use in source and binary forms, with or without
      5 // modification, are permitted provided that the following conditions are
      6 // met:
      7 //
      8 //     * Redistributions of source code must retain the above copyright
      9 // notice, this list of conditions and the following disclaimer.
     10 //     * Redistributions in binary form must reproduce the above
     11 // copyright notice, this list of conditions and the following disclaimer
     12 // in the documentation and/or other materials provided with the
     13 // distribution.
     14 //     * Neither the name of Google Inc. nor the names of its
     15 // contributors may be used to endorse or promote products derived from
     16 // this software without specific prior written permission.
     17 //
     18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 
     30 #include "client/mac/handler/dynamic_images.h"
     31 
     32 extern "C" { // needed to compile on Leopard
     33   #include <mach-o/nlist.h>
     34   #include <stdlib.h>
     35   #include <stdio.h>
     36 }
     37 
     38 #include <assert.h>
     39 #include <AvailabilityMacros.h>
     40 #include <dlfcn.h>
     41 #include <mach/task_info.h>
     42 #include <sys/sysctl.h>
     43 #include <TargetConditionals.h>
     44 #include <unistd.h>
     45 
     46 #include <algorithm>
     47 #include <string>
     48 #include <vector>
     49 
     50 #include "breakpad_nlist_64.h"
     51 
     52 #if !TARGET_OS_IPHONE
     53 #include <CoreServices/CoreServices.h>
     54 
     55 #ifndef MAC_OS_X_VERSION_10_6
     56 #define MAC_OS_X_VERSION_10_6 1060
     57 #endif
     58 
     59 #if MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_6
     60 
     61 // Fallback declarations for TASK_DYLD_INFO and friends, introduced in
     62 // <mach/task_info.h> in the Mac OS X 10.6 SDK.
     63 #define TASK_DYLD_INFO 17
     64 struct task_dyld_info {
     65   mach_vm_address_t all_image_info_addr;
     66   mach_vm_size_t all_image_info_size;
     67 };
     68 typedef struct task_dyld_info task_dyld_info_data_t;
     69 typedef struct task_dyld_info *task_dyld_info_t;
     70 #define TASK_DYLD_INFO_COUNT (sizeof(task_dyld_info_data_t) / sizeof(natural_t))
     71 
     72 #endif
     73 
     74 #endif  // !TARGET_OS_IPHONE
     75 
     76 namespace google_breakpad {
     77 
     78 using std::string;
     79 using std::vector;
     80 
     81 //==============================================================================
     82 // Returns the size of the memory region containing |address| and the
     83 // number of bytes from |address| to the end of the region.
     84 // We potentially, will extend the size of the original
     85 // region by the size of the following region if it's contiguous with the
     86 // first in order to handle cases when we're reading strings and they
     87 // straddle two vm regions.
     88 //
     89 static mach_vm_size_t GetMemoryRegionSize(task_port_t target_task,
     90                                           const uint64_t address,
     91                                           mach_vm_size_t *size_to_end) {
     92   mach_vm_address_t region_base = (mach_vm_address_t)address;
     93   mach_vm_size_t region_size;
     94   natural_t nesting_level = 0;
     95   vm_region_submap_info_64 submap_info;
     96   mach_msg_type_number_t info_count = VM_REGION_SUBMAP_INFO_COUNT_64;
     97 
     98   // Get information about the vm region containing |address|
     99   vm_region_recurse_info_t region_info;
    100   region_info = reinterpret_cast<vm_region_recurse_info_t>(&submap_info);
    101 
    102   kern_return_t result =
    103     mach_vm_region_recurse(target_task,
    104                            &region_base,
    105                            &region_size,
    106                            &nesting_level,
    107                            region_info,
    108                            &info_count);
    109 
    110   if (result == KERN_SUCCESS) {
    111     // Get distance from |address| to the end of this region
    112     *size_to_end = region_base + region_size -(mach_vm_address_t)address;
    113 
    114     // If we want to handle strings as long as 4096 characters we may need
    115     // to check if there's a vm region immediately following the first one.
    116     // If so, we need to extend |*size_to_end| to go all the way to the end
    117     // of the second region.
    118     if (*size_to_end < 4096) {
    119       // Second region starts where the first one ends
    120       mach_vm_address_t region_base2 =
    121         (mach_vm_address_t)(region_base + region_size);
    122       mach_vm_size_t region_size2;
    123 
    124       // Get information about the following vm region
    125       result =
    126         mach_vm_region_recurse(target_task,
    127                                &region_base2,
    128                                &region_size2,
    129                                &nesting_level,
    130                                region_info,
    131                                &info_count);
    132 
    133       // Extend region_size to go all the way to the end of the 2nd region
    134       if (result == KERN_SUCCESS
    135           && region_base2 == region_base + region_size) {
    136         region_size += region_size2;
    137       }
    138     }
    139 
    140     *size_to_end = region_base + region_size -(mach_vm_address_t)address;
    141   } else {
    142     region_size = 0;
    143     *size_to_end = 0;
    144   }
    145 
    146   return region_size;
    147 }
    148 
    149 #define kMaxStringLength 8192
    150 //==============================================================================
    151 // Reads a NULL-terminated string from another task.
    152 //
    153 // Warning!  This will not read any strings longer than kMaxStringLength-1
    154 //
    155 static string ReadTaskString(task_port_t target_task,
    156                              const uint64_t address) {
    157   // The problem is we don't know how much to read until we know how long
    158   // the string is. And we don't know how long the string is, until we've read
    159   // the memory!  So, we'll try to read kMaxStringLength bytes
    160   // (or as many bytes as we can until we reach the end of the vm region).
    161   mach_vm_size_t size_to_end;
    162   GetMemoryRegionSize(target_task, address, &size_to_end);
    163 
    164   if (size_to_end > 0) {
    165     mach_vm_size_t size_to_read =
    166       size_to_end > kMaxStringLength ? kMaxStringLength : size_to_end;
    167 
    168     vector<uint8_t> bytes;
    169     if (ReadTaskMemory(target_task, address, (size_t)size_to_read, bytes) !=
    170         KERN_SUCCESS)
    171       return string();
    172 
    173     return string(reinterpret_cast<const char*>(&bytes[0]));
    174   }
    175 
    176   return string();
    177 }
    178 
    179 //==============================================================================
    180 // Reads an address range from another task. The bytes read will be returned
    181 // in bytes, which will be resized as necessary.
    182 kern_return_t ReadTaskMemory(task_port_t target_task,
    183                              const uint64_t address,
    184                              size_t length,
    185                              vector<uint8_t> &bytes) {
    186   int systemPageSize = getpagesize();
    187 
    188   // use the negative of the page size for the mask to find the page address
    189   mach_vm_address_t page_address = address & (-systemPageSize);
    190 
    191   mach_vm_address_t last_page_address =
    192       (address + length + (systemPageSize - 1)) & (-systemPageSize);
    193 
    194   mach_vm_size_t page_size = last_page_address - page_address;
    195   uint8_t* local_start;
    196   uint32_t local_length;
    197 
    198   kern_return_t r = mach_vm_read(target_task,
    199                                  page_address,
    200                                  page_size,
    201                                  reinterpret_cast<vm_offset_t*>(&local_start),
    202                                  &local_length);
    203 
    204   if (r != KERN_SUCCESS)
    205     return r;
    206 
    207   bytes.resize(length);
    208   memcpy(&bytes[0],
    209          &local_start[(mach_vm_address_t)address - page_address],
    210          length);
    211   mach_vm_deallocate(mach_task_self(), (uintptr_t)local_start, local_length);
    212   return KERN_SUCCESS;
    213 }
    214 
    215 #pragma mark -
    216 
    217 //==============================================================================
    218 // Traits structs for specializing function templates to handle
    219 // 32-bit/64-bit Mach-O files.
    220 struct MachO32 {
    221   typedef mach_header mach_header_type;
    222   typedef segment_command mach_segment_command_type;
    223   typedef dyld_image_info32 dyld_image_info;
    224   typedef dyld_all_image_infos32 dyld_all_image_infos;
    225   typedef struct nlist nlist_type;
    226   static const uint32_t magic = MH_MAGIC;
    227   static const uint32_t segment_load_command = LC_SEGMENT;
    228 };
    229 
    230 struct MachO64 {
    231   typedef mach_header_64 mach_header_type;
    232   typedef segment_command_64 mach_segment_command_type;
    233   typedef dyld_image_info64 dyld_image_info;
    234   typedef dyld_all_image_infos64 dyld_all_image_infos;
    235   typedef struct nlist_64 nlist_type;
    236   static const uint32_t magic = MH_MAGIC_64;
    237   static const uint32_t segment_load_command = LC_SEGMENT_64;
    238 };
    239 
    240 template<typename MachBits>
    241 bool FindTextSection(DynamicImage& image) {
    242   typedef typename MachBits::mach_header_type mach_header_type;
    243   typedef typename MachBits::mach_segment_command_type
    244       mach_segment_command_type;
    245 
    246   const mach_header_type* header =
    247       reinterpret_cast<const mach_header_type*>(&image.header_[0]);
    248 
    249   if(header->magic != MachBits::magic) {
    250     return false;
    251   }
    252 
    253   const struct load_command *cmd =
    254       reinterpret_cast<const struct load_command *>(header + 1);
    255 
    256   bool found_text_section = false;
    257   bool found_dylib_id_command = false;
    258   for (unsigned int i = 0; cmd && (i < header->ncmds); ++i) {
    259     if (!found_text_section) {
    260       if (cmd->cmd == MachBits::segment_load_command) {
    261         const mach_segment_command_type *seg =
    262             reinterpret_cast<const mach_segment_command_type *>(cmd);
    263 
    264         if (!strcmp(seg->segname, "__TEXT")) {
    265           image.vmaddr_ = static_cast<mach_vm_address_t>(seg->vmaddr);
    266           image.vmsize_ = static_cast<mach_vm_size_t>(seg->vmsize);
    267           image.slide_ = 0;
    268 
    269           if (seg->fileoff == 0 && seg->filesize != 0) {
    270             image.slide_ =
    271                 (uintptr_t)image.GetLoadAddress() - (uintptr_t)seg->vmaddr;
    272           }
    273           found_text_section = true;
    274         }
    275       }
    276     }
    277 
    278     if (!found_dylib_id_command) {
    279       if (cmd->cmd == LC_ID_DYLIB) {
    280         const struct dylib_command *dc =
    281             reinterpret_cast<const struct dylib_command *>(cmd);
    282 
    283         image.version_ = dc->dylib.current_version;
    284         found_dylib_id_command = true;
    285       }
    286     }
    287 
    288     if (found_dylib_id_command && found_text_section) {
    289       return true;
    290     }
    291 
    292     cmd = reinterpret_cast<const struct load_command *>
    293         (reinterpret_cast<const char *>(cmd) + cmd->cmdsize);
    294   }
    295 
    296   return false;
    297 }
    298 
    299 //==============================================================================
    300 // Initializes vmaddr_, vmsize_, and slide_
    301 void DynamicImage::CalculateMemoryAndVersionInfo() {
    302   // unless we can process the header, ensure that calls to
    303   // IsValid() will return false
    304   vmaddr_ = 0;
    305   vmsize_ = 0;
    306   slide_ = 0;
    307   version_ = 0;
    308 
    309   // The function template above does all the real work.
    310   if (Is64Bit())
    311     FindTextSection<MachO64>(*this);
    312   else
    313     FindTextSection<MachO32>(*this);
    314 }
    315 
    316 //==============================================================================
    317 // The helper function template abstracts the 32/64-bit differences.
    318 template<typename MachBits>
    319 uint32_t GetFileTypeFromHeader(DynamicImage& image) {
    320   typedef typename MachBits::mach_header_type mach_header_type;
    321 
    322   const mach_header_type* header =
    323       reinterpret_cast<const mach_header_type*>(&image.header_[0]);
    324   return header->filetype;
    325 }
    326 
    327 uint32_t DynamicImage::GetFileType() {
    328   if (Is64Bit())
    329     return GetFileTypeFromHeader<MachO64>(*this);
    330 
    331   return GetFileTypeFromHeader<MachO32>(*this);
    332 }
    333 
    334 #pragma mark -
    335 
    336 //==============================================================================
    337 // Loads information about dynamically loaded code in the given task.
    338 DynamicImages::DynamicImages(mach_port_t task)
    339     : task_(task),
    340       cpu_type_(DetermineTaskCPUType(task)),
    341       image_list_() {
    342   ReadImageInfoForTask();
    343 }
    344 
    345 template<typename MachBits>
    346 static uint64_t LookupSymbol(const char* symbol_name,
    347                              const char* filename,
    348                              cpu_type_t cpu_type) {
    349   typedef typename MachBits::nlist_type nlist_type;
    350 
    351   nlist_type symbol_info[8] = {};
    352   const char *symbolNames[2] = { symbol_name, "\0" };
    353   nlist_type &list = symbol_info[0];
    354   int invalidEntriesCount = breakpad_nlist(filename,
    355                                            &list,
    356                                            symbolNames,
    357                                            cpu_type);
    358 
    359   if(invalidEntriesCount != 0) {
    360     return 0;
    361   }
    362 
    363   assert(list.n_value);
    364   return list.n_value;
    365 }
    366 
    367 #if TARGET_OS_IPHONE
    368 static bool HasTaskDyldInfo() {
    369   return true;
    370 }
    371 #else
    372 static SInt32 GetOSVersionInternal() {
    373   SInt32 os_version = 0;
    374   Gestalt(gestaltSystemVersion, &os_version);
    375   return os_version;
    376 }
    377 
    378 static SInt32 GetOSVersion() {
    379   static SInt32 os_version = GetOSVersionInternal();
    380   return os_version;
    381 }
    382 
    383 static bool HasTaskDyldInfo() {
    384 #if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_6
    385   return true;
    386 #else
    387   return GetOSVersion() >= 0x1060;
    388 #endif
    389 }
    390 #endif  // TARGET_OS_IPHONE
    391 
    392 uint64_t DynamicImages::GetDyldAllImageInfosPointer() {
    393   if (HasTaskDyldInfo()) {
    394     task_dyld_info_data_t task_dyld_info;
    395     mach_msg_type_number_t count = TASK_DYLD_INFO_COUNT;
    396     if (task_info(task_, TASK_DYLD_INFO, (task_info_t)&task_dyld_info,
    397                   &count) != KERN_SUCCESS) {
    398       return 0;
    399     }
    400 
    401     return (uint64_t)task_dyld_info.all_image_info_addr;
    402   } else {
    403     const char *imageSymbolName = "_dyld_all_image_infos";
    404     const char *dyldPath = "/usr/lib/dyld";
    405 
    406     if (Is64Bit())
    407       return LookupSymbol<MachO64>(imageSymbolName, dyldPath, cpu_type_);
    408     return LookupSymbol<MachO32>(imageSymbolName, dyldPath, cpu_type_);
    409   }
    410 }
    411 
    412 //==============================================================================
    413 // This code was written using dyld_debug.c (from Darwin) as a guide.
    414 
    415 template<typename MachBits>
    416 void ReadImageInfo(DynamicImages& images,
    417                    uint64_t image_list_address) {
    418   typedef typename MachBits::dyld_image_info dyld_image_info;
    419   typedef typename MachBits::dyld_all_image_infos dyld_all_image_infos;
    420   typedef typename MachBits::mach_header_type mach_header_type;
    421 
    422   // Read the structure inside of dyld that contains information about
    423   // loaded images.  We're reading from the desired task's address space.
    424 
    425   // Here we make the assumption that dyld loaded at the same address in
    426   // the crashed process vs. this one.  This is an assumption made in
    427   // "dyld_debug.c" and is said to be nearly always valid.
    428   vector<uint8_t> dyld_all_info_bytes;
    429   if (ReadTaskMemory(images.task_,
    430                      image_list_address,
    431                      sizeof(dyld_all_image_infos),
    432                      dyld_all_info_bytes) != KERN_SUCCESS)
    433     return;
    434 
    435   dyld_all_image_infos *dyldInfo =
    436     reinterpret_cast<dyld_all_image_infos*>(&dyld_all_info_bytes[0]);
    437 
    438   // number of loaded images
    439   int count = dyldInfo->infoArrayCount;
    440 
    441   // Read an array of dyld_image_info structures each containing
    442   // information about a loaded image.
    443   vector<uint8_t> dyld_info_array_bytes;
    444     if (ReadTaskMemory(images.task_,
    445                        dyldInfo->infoArray,
    446                        count * sizeof(dyld_image_info),
    447                        dyld_info_array_bytes) != KERN_SUCCESS)
    448       return;
    449 
    450     dyld_image_info *infoArray =
    451         reinterpret_cast<dyld_image_info*>(&dyld_info_array_bytes[0]);
    452     images.image_list_.reserve(count);
    453 
    454     for (int i = 0; i < count; ++i) {
    455       dyld_image_info &info = infoArray[i];
    456 
    457       // First read just the mach_header from the image in the task.
    458       vector<uint8_t> mach_header_bytes;
    459       if (ReadTaskMemory(images.task_,
    460                          info.load_address_,
    461                          sizeof(mach_header_type),
    462                          mach_header_bytes) != KERN_SUCCESS)
    463         continue;  // bail on this dynamic image
    464 
    465       mach_header_type *header =
    466           reinterpret_cast<mach_header_type*>(&mach_header_bytes[0]);
    467 
    468       // Now determine the total amount necessary to read the header
    469       // plus all of the load commands.
    470       size_t header_size =
    471           sizeof(mach_header_type) + header->sizeofcmds;
    472 
    473       if (ReadTaskMemory(images.task_,
    474                          info.load_address_,
    475                          header_size,
    476                          mach_header_bytes) != KERN_SUCCESS)
    477         continue;
    478 
    479       // Read the file name from the task's memory space.
    480       string file_path;
    481       if (info.file_path_) {
    482         // Although we're reading kMaxStringLength bytes, it's copied in the
    483         // the DynamicImage constructor below with the correct string length,
    484         // so it's not really wasting memory.
    485         file_path = ReadTaskString(images.task_, info.file_path_);
    486       }
    487 
    488       // Create an object representing this image and add it to our list.
    489       DynamicImage *new_image;
    490       new_image = new DynamicImage(&mach_header_bytes[0],
    491                                    header_size,
    492                                    info.load_address_,
    493                                    file_path,
    494                                    static_cast<uintptr_t>(info.file_mod_date_),
    495                                    images.task_,
    496                                    images.cpu_type_);
    497 
    498       if (new_image->IsValid()) {
    499         images.image_list_.push_back(DynamicImageRef(new_image));
    500       } else {
    501         delete new_image;
    502       }
    503     }
    504 
    505     // sorts based on loading address
    506     sort(images.image_list_.begin(), images.image_list_.end());
    507     // remove duplicates - this happens in certain strange cases
    508     // You can see it in DashboardClient when Google Gadgets plugin
    509     // is installed.  Apple's crash reporter log and gdb "info shared"
    510     // both show the same library multiple times at the same address
    511 
    512     vector<DynamicImageRef>::iterator it = unique(images.image_list_.begin(),
    513                                                   images.image_list_.end());
    514     images.image_list_.erase(it, images.image_list_.end());
    515 }
    516 
    517 void DynamicImages::ReadImageInfoForTask() {
    518   uint64_t imageList = GetDyldAllImageInfosPointer();
    519 
    520   if (imageList) {
    521     if (Is64Bit())
    522       ReadImageInfo<MachO64>(*this, imageList);
    523     else
    524       ReadImageInfo<MachO32>(*this, imageList);
    525   }
    526 }
    527 
    528 //==============================================================================
    529 DynamicImage  *DynamicImages::GetExecutableImage() {
    530   int executable_index = GetExecutableImageIndex();
    531 
    532   if (executable_index >= 0) {
    533     return GetImage(executable_index);
    534   }
    535 
    536   return NULL;
    537 }
    538 
    539 //==============================================================================
    540 // returns -1 if failure to find executable
    541 int DynamicImages::GetExecutableImageIndex() {
    542   int image_count = GetImageCount();
    543 
    544   for (int i = 0; i < image_count; ++i) {
    545     DynamicImage  *image = GetImage(i);
    546     if (image->GetFileType() == MH_EXECUTE) {
    547       return i;
    548     }
    549   }
    550 
    551   return -1;
    552 }
    553 
    554 //==============================================================================
    555 // static
    556 cpu_type_t DynamicImages::DetermineTaskCPUType(task_t task) {
    557   if (task == mach_task_self())
    558     return GetNativeCPUType();
    559 
    560   int mib[CTL_MAXNAME];
    561   size_t mibLen = CTL_MAXNAME;
    562   int err = sysctlnametomib("sysctl.proc_cputype", mib, &mibLen);
    563   if (err == 0) {
    564     assert(mibLen < CTL_MAXNAME);
    565     pid_for_task(task, &mib[mibLen]);
    566     mibLen += 1;
    567 
    568     cpu_type_t cpu_type;
    569     size_t cpuTypeSize = sizeof(cpu_type);
    570     sysctl(mib, static_cast<u_int>(mibLen), &cpu_type, &cpuTypeSize, 0, 0);
    571     return cpu_type;
    572   }
    573 
    574   return GetNativeCPUType();
    575 }
    576 
    577 }  // namespace google_breakpad
    578