1 // Copyright (c) 2007, Google Inc. 2 // All rights reserved. 3 // 4 // Redistribution and use in source and binary forms, with or without 5 // modification, are permitted provided that the following conditions are 6 // met: 7 // 8 // * Redistributions of source code must retain the above copyright 9 // notice, this list of conditions and the following disclaimer. 10 // * Redistributions in binary form must reproduce the above 11 // copyright notice, this list of conditions and the following disclaimer 12 // in the documentation and/or other materials provided with the 13 // distribution. 14 // * Neither the name of Google Inc. nor the names of its 15 // contributors may be used to endorse or promote products derived from 16 // this software without specific prior written permission. 17 // 18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30 #include "client/mac/handler/dynamic_images.h" 31 32 extern "C" { // needed to compile on Leopard 33 #include <mach-o/nlist.h> 34 #include <stdlib.h> 35 #include <stdio.h> 36 } 37 38 #include <assert.h> 39 #include <AvailabilityMacros.h> 40 #include <dlfcn.h> 41 #include <mach/task_info.h> 42 #include <sys/sysctl.h> 43 #include <TargetConditionals.h> 44 #include <unistd.h> 45 46 #include <algorithm> 47 #include <string> 48 #include <vector> 49 50 #include "breakpad_nlist_64.h" 51 52 #if !TARGET_OS_IPHONE 53 #include <CoreServices/CoreServices.h> 54 55 #ifndef MAC_OS_X_VERSION_10_6 56 #define MAC_OS_X_VERSION_10_6 1060 57 #endif 58 59 #if MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_6 60 61 // Fallback declarations for TASK_DYLD_INFO and friends, introduced in 62 // <mach/task_info.h> in the Mac OS X 10.6 SDK. 63 #define TASK_DYLD_INFO 17 64 struct task_dyld_info { 65 mach_vm_address_t all_image_info_addr; 66 mach_vm_size_t all_image_info_size; 67 }; 68 typedef struct task_dyld_info task_dyld_info_data_t; 69 typedef struct task_dyld_info *task_dyld_info_t; 70 #define TASK_DYLD_INFO_COUNT (sizeof(task_dyld_info_data_t) / sizeof(natural_t)) 71 72 #endif 73 74 #endif // !TARGET_OS_IPHONE 75 76 namespace google_breakpad { 77 78 using std::string; 79 using std::vector; 80 81 //============================================================================== 82 // Returns the size of the memory region containing |address| and the 83 // number of bytes from |address| to the end of the region. 84 // We potentially, will extend the size of the original 85 // region by the size of the following region if it's contiguous with the 86 // first in order to handle cases when we're reading strings and they 87 // straddle two vm regions. 88 // 89 static mach_vm_size_t GetMemoryRegionSize(task_port_t target_task, 90 const uint64_t address, 91 mach_vm_size_t *size_to_end) { 92 mach_vm_address_t region_base = (mach_vm_address_t)address; 93 mach_vm_size_t region_size; 94 natural_t nesting_level = 0; 95 vm_region_submap_info_64 submap_info; 96 mach_msg_type_number_t info_count = VM_REGION_SUBMAP_INFO_COUNT_64; 97 98 // Get information about the vm region containing |address| 99 vm_region_recurse_info_t region_info; 100 region_info = reinterpret_cast<vm_region_recurse_info_t>(&submap_info); 101 102 kern_return_t result = 103 mach_vm_region_recurse(target_task, 104 ®ion_base, 105 ®ion_size, 106 &nesting_level, 107 region_info, 108 &info_count); 109 110 if (result == KERN_SUCCESS) { 111 // Get distance from |address| to the end of this region 112 *size_to_end = region_base + region_size -(mach_vm_address_t)address; 113 114 // If we want to handle strings as long as 4096 characters we may need 115 // to check if there's a vm region immediately following the first one. 116 // If so, we need to extend |*size_to_end| to go all the way to the end 117 // of the second region. 118 if (*size_to_end < 4096) { 119 // Second region starts where the first one ends 120 mach_vm_address_t region_base2 = 121 (mach_vm_address_t)(region_base + region_size); 122 mach_vm_size_t region_size2; 123 124 // Get information about the following vm region 125 result = 126 mach_vm_region_recurse(target_task, 127 ®ion_base2, 128 ®ion_size2, 129 &nesting_level, 130 region_info, 131 &info_count); 132 133 // Extend region_size to go all the way to the end of the 2nd region 134 if (result == KERN_SUCCESS 135 && region_base2 == region_base + region_size) { 136 region_size += region_size2; 137 } 138 } 139 140 *size_to_end = region_base + region_size -(mach_vm_address_t)address; 141 } else { 142 region_size = 0; 143 *size_to_end = 0; 144 } 145 146 return region_size; 147 } 148 149 #define kMaxStringLength 8192 150 //============================================================================== 151 // Reads a NULL-terminated string from another task. 152 // 153 // Warning! This will not read any strings longer than kMaxStringLength-1 154 // 155 static string ReadTaskString(task_port_t target_task, 156 const uint64_t address) { 157 // The problem is we don't know how much to read until we know how long 158 // the string is. And we don't know how long the string is, until we've read 159 // the memory! So, we'll try to read kMaxStringLength bytes 160 // (or as many bytes as we can until we reach the end of the vm region). 161 mach_vm_size_t size_to_end; 162 GetMemoryRegionSize(target_task, address, &size_to_end); 163 164 if (size_to_end > 0) { 165 mach_vm_size_t size_to_read = 166 size_to_end > kMaxStringLength ? kMaxStringLength : size_to_end; 167 168 vector<uint8_t> bytes; 169 if (ReadTaskMemory(target_task, address, (size_t)size_to_read, bytes) != 170 KERN_SUCCESS) 171 return string(); 172 173 return string(reinterpret_cast<const char*>(&bytes[0])); 174 } 175 176 return string(); 177 } 178 179 //============================================================================== 180 // Reads an address range from another task. The bytes read will be returned 181 // in bytes, which will be resized as necessary. 182 kern_return_t ReadTaskMemory(task_port_t target_task, 183 const uint64_t address, 184 size_t length, 185 vector<uint8_t> &bytes) { 186 int systemPageSize = getpagesize(); 187 188 // use the negative of the page size for the mask to find the page address 189 mach_vm_address_t page_address = address & (-systemPageSize); 190 191 mach_vm_address_t last_page_address = 192 (address + length + (systemPageSize - 1)) & (-systemPageSize); 193 194 mach_vm_size_t page_size = last_page_address - page_address; 195 uint8_t* local_start; 196 uint32_t local_length; 197 198 kern_return_t r = mach_vm_read(target_task, 199 page_address, 200 page_size, 201 reinterpret_cast<vm_offset_t*>(&local_start), 202 &local_length); 203 204 if (r != KERN_SUCCESS) 205 return r; 206 207 bytes.resize(length); 208 memcpy(&bytes[0], 209 &local_start[(mach_vm_address_t)address - page_address], 210 length); 211 mach_vm_deallocate(mach_task_self(), (uintptr_t)local_start, local_length); 212 return KERN_SUCCESS; 213 } 214 215 #pragma mark - 216 217 //============================================================================== 218 // Traits structs for specializing function templates to handle 219 // 32-bit/64-bit Mach-O files. 220 struct MachO32 { 221 typedef mach_header mach_header_type; 222 typedef segment_command mach_segment_command_type; 223 typedef dyld_image_info32 dyld_image_info; 224 typedef dyld_all_image_infos32 dyld_all_image_infos; 225 typedef struct nlist nlist_type; 226 static const uint32_t magic = MH_MAGIC; 227 static const uint32_t segment_load_command = LC_SEGMENT; 228 }; 229 230 struct MachO64 { 231 typedef mach_header_64 mach_header_type; 232 typedef segment_command_64 mach_segment_command_type; 233 typedef dyld_image_info64 dyld_image_info; 234 typedef dyld_all_image_infos64 dyld_all_image_infos; 235 typedef struct nlist_64 nlist_type; 236 static const uint32_t magic = MH_MAGIC_64; 237 static const uint32_t segment_load_command = LC_SEGMENT_64; 238 }; 239 240 template<typename MachBits> 241 bool FindTextSection(DynamicImage& image) { 242 typedef typename MachBits::mach_header_type mach_header_type; 243 typedef typename MachBits::mach_segment_command_type 244 mach_segment_command_type; 245 246 const mach_header_type* header = 247 reinterpret_cast<const mach_header_type*>(&image.header_[0]); 248 249 if(header->magic != MachBits::magic) { 250 return false; 251 } 252 253 const struct load_command *cmd = 254 reinterpret_cast<const struct load_command *>(header + 1); 255 256 bool found_text_section = false; 257 bool found_dylib_id_command = false; 258 for (unsigned int i = 0; cmd && (i < header->ncmds); ++i) { 259 if (!found_text_section) { 260 if (cmd->cmd == MachBits::segment_load_command) { 261 const mach_segment_command_type *seg = 262 reinterpret_cast<const mach_segment_command_type *>(cmd); 263 264 if (!strcmp(seg->segname, "__TEXT")) { 265 image.vmaddr_ = static_cast<mach_vm_address_t>(seg->vmaddr); 266 image.vmsize_ = static_cast<mach_vm_size_t>(seg->vmsize); 267 image.slide_ = 0; 268 269 if (seg->fileoff == 0 && seg->filesize != 0) { 270 image.slide_ = 271 (uintptr_t)image.GetLoadAddress() - (uintptr_t)seg->vmaddr; 272 } 273 found_text_section = true; 274 } 275 } 276 } 277 278 if (!found_dylib_id_command) { 279 if (cmd->cmd == LC_ID_DYLIB) { 280 const struct dylib_command *dc = 281 reinterpret_cast<const struct dylib_command *>(cmd); 282 283 image.version_ = dc->dylib.current_version; 284 found_dylib_id_command = true; 285 } 286 } 287 288 if (found_dylib_id_command && found_text_section) { 289 return true; 290 } 291 292 cmd = reinterpret_cast<const struct load_command *> 293 (reinterpret_cast<const char *>(cmd) + cmd->cmdsize); 294 } 295 296 return false; 297 } 298 299 //============================================================================== 300 // Initializes vmaddr_, vmsize_, and slide_ 301 void DynamicImage::CalculateMemoryAndVersionInfo() { 302 // unless we can process the header, ensure that calls to 303 // IsValid() will return false 304 vmaddr_ = 0; 305 vmsize_ = 0; 306 slide_ = 0; 307 version_ = 0; 308 309 // The function template above does all the real work. 310 if (Is64Bit()) 311 FindTextSection<MachO64>(*this); 312 else 313 FindTextSection<MachO32>(*this); 314 } 315 316 //============================================================================== 317 // The helper function template abstracts the 32/64-bit differences. 318 template<typename MachBits> 319 uint32_t GetFileTypeFromHeader(DynamicImage& image) { 320 typedef typename MachBits::mach_header_type mach_header_type; 321 322 const mach_header_type* header = 323 reinterpret_cast<const mach_header_type*>(&image.header_[0]); 324 return header->filetype; 325 } 326 327 uint32_t DynamicImage::GetFileType() { 328 if (Is64Bit()) 329 return GetFileTypeFromHeader<MachO64>(*this); 330 331 return GetFileTypeFromHeader<MachO32>(*this); 332 } 333 334 #pragma mark - 335 336 //============================================================================== 337 // Loads information about dynamically loaded code in the given task. 338 DynamicImages::DynamicImages(mach_port_t task) 339 : task_(task), 340 cpu_type_(DetermineTaskCPUType(task)), 341 image_list_() { 342 ReadImageInfoForTask(); 343 } 344 345 template<typename MachBits> 346 static uint64_t LookupSymbol(const char* symbol_name, 347 const char* filename, 348 cpu_type_t cpu_type) { 349 typedef typename MachBits::nlist_type nlist_type; 350 351 nlist_type symbol_info[8] = {}; 352 const char *symbolNames[2] = { symbol_name, "\0" }; 353 nlist_type &list = symbol_info[0]; 354 int invalidEntriesCount = breakpad_nlist(filename, 355 &list, 356 symbolNames, 357 cpu_type); 358 359 if(invalidEntriesCount != 0) { 360 return 0; 361 } 362 363 assert(list.n_value); 364 return list.n_value; 365 } 366 367 #if TARGET_OS_IPHONE 368 static bool HasTaskDyldInfo() { 369 return true; 370 } 371 #else 372 static SInt32 GetOSVersionInternal() { 373 SInt32 os_version = 0; 374 Gestalt(gestaltSystemVersion, &os_version); 375 return os_version; 376 } 377 378 static SInt32 GetOSVersion() { 379 static SInt32 os_version = GetOSVersionInternal(); 380 return os_version; 381 } 382 383 static bool HasTaskDyldInfo() { 384 #if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_6 385 return true; 386 #else 387 return GetOSVersion() >= 0x1060; 388 #endif 389 } 390 #endif // TARGET_OS_IPHONE 391 392 uint64_t DynamicImages::GetDyldAllImageInfosPointer() { 393 if (HasTaskDyldInfo()) { 394 task_dyld_info_data_t task_dyld_info; 395 mach_msg_type_number_t count = TASK_DYLD_INFO_COUNT; 396 if (task_info(task_, TASK_DYLD_INFO, (task_info_t)&task_dyld_info, 397 &count) != KERN_SUCCESS) { 398 return 0; 399 } 400 401 return (uint64_t)task_dyld_info.all_image_info_addr; 402 } else { 403 const char *imageSymbolName = "_dyld_all_image_infos"; 404 const char *dyldPath = "/usr/lib/dyld"; 405 406 if (Is64Bit()) 407 return LookupSymbol<MachO64>(imageSymbolName, dyldPath, cpu_type_); 408 return LookupSymbol<MachO32>(imageSymbolName, dyldPath, cpu_type_); 409 } 410 } 411 412 //============================================================================== 413 // This code was written using dyld_debug.c (from Darwin) as a guide. 414 415 template<typename MachBits> 416 void ReadImageInfo(DynamicImages& images, 417 uint64_t image_list_address) { 418 typedef typename MachBits::dyld_image_info dyld_image_info; 419 typedef typename MachBits::dyld_all_image_infos dyld_all_image_infos; 420 typedef typename MachBits::mach_header_type mach_header_type; 421 422 // Read the structure inside of dyld that contains information about 423 // loaded images. We're reading from the desired task's address space. 424 425 // Here we make the assumption that dyld loaded at the same address in 426 // the crashed process vs. this one. This is an assumption made in 427 // "dyld_debug.c" and is said to be nearly always valid. 428 vector<uint8_t> dyld_all_info_bytes; 429 if (ReadTaskMemory(images.task_, 430 image_list_address, 431 sizeof(dyld_all_image_infos), 432 dyld_all_info_bytes) != KERN_SUCCESS) 433 return; 434 435 dyld_all_image_infos *dyldInfo = 436 reinterpret_cast<dyld_all_image_infos*>(&dyld_all_info_bytes[0]); 437 438 // number of loaded images 439 int count = dyldInfo->infoArrayCount; 440 441 // Read an array of dyld_image_info structures each containing 442 // information about a loaded image. 443 vector<uint8_t> dyld_info_array_bytes; 444 if (ReadTaskMemory(images.task_, 445 dyldInfo->infoArray, 446 count * sizeof(dyld_image_info), 447 dyld_info_array_bytes) != KERN_SUCCESS) 448 return; 449 450 dyld_image_info *infoArray = 451 reinterpret_cast<dyld_image_info*>(&dyld_info_array_bytes[0]); 452 images.image_list_.reserve(count); 453 454 for (int i = 0; i < count; ++i) { 455 dyld_image_info &info = infoArray[i]; 456 457 // First read just the mach_header from the image in the task. 458 vector<uint8_t> mach_header_bytes; 459 if (ReadTaskMemory(images.task_, 460 info.load_address_, 461 sizeof(mach_header_type), 462 mach_header_bytes) != KERN_SUCCESS) 463 continue; // bail on this dynamic image 464 465 mach_header_type *header = 466 reinterpret_cast<mach_header_type*>(&mach_header_bytes[0]); 467 468 // Now determine the total amount necessary to read the header 469 // plus all of the load commands. 470 size_t header_size = 471 sizeof(mach_header_type) + header->sizeofcmds; 472 473 if (ReadTaskMemory(images.task_, 474 info.load_address_, 475 header_size, 476 mach_header_bytes) != KERN_SUCCESS) 477 continue; 478 479 // Read the file name from the task's memory space. 480 string file_path; 481 if (info.file_path_) { 482 // Although we're reading kMaxStringLength bytes, it's copied in the 483 // the DynamicImage constructor below with the correct string length, 484 // so it's not really wasting memory. 485 file_path = ReadTaskString(images.task_, info.file_path_); 486 } 487 488 // Create an object representing this image and add it to our list. 489 DynamicImage *new_image; 490 new_image = new DynamicImage(&mach_header_bytes[0], 491 header_size, 492 info.load_address_, 493 file_path, 494 static_cast<uintptr_t>(info.file_mod_date_), 495 images.task_, 496 images.cpu_type_); 497 498 if (new_image->IsValid()) { 499 images.image_list_.push_back(DynamicImageRef(new_image)); 500 } else { 501 delete new_image; 502 } 503 } 504 505 // sorts based on loading address 506 sort(images.image_list_.begin(), images.image_list_.end()); 507 // remove duplicates - this happens in certain strange cases 508 // You can see it in DashboardClient when Google Gadgets plugin 509 // is installed. Apple's crash reporter log and gdb "info shared" 510 // both show the same library multiple times at the same address 511 512 vector<DynamicImageRef>::iterator it = unique(images.image_list_.begin(), 513 images.image_list_.end()); 514 images.image_list_.erase(it, images.image_list_.end()); 515 } 516 517 void DynamicImages::ReadImageInfoForTask() { 518 uint64_t imageList = GetDyldAllImageInfosPointer(); 519 520 if (imageList) { 521 if (Is64Bit()) 522 ReadImageInfo<MachO64>(*this, imageList); 523 else 524 ReadImageInfo<MachO32>(*this, imageList); 525 } 526 } 527 528 //============================================================================== 529 DynamicImage *DynamicImages::GetExecutableImage() { 530 int executable_index = GetExecutableImageIndex(); 531 532 if (executable_index >= 0) { 533 return GetImage(executable_index); 534 } 535 536 return NULL; 537 } 538 539 //============================================================================== 540 // returns -1 if failure to find executable 541 int DynamicImages::GetExecutableImageIndex() { 542 int image_count = GetImageCount(); 543 544 for (int i = 0; i < image_count; ++i) { 545 DynamicImage *image = GetImage(i); 546 if (image->GetFileType() == MH_EXECUTE) { 547 return i; 548 } 549 } 550 551 return -1; 552 } 553 554 //============================================================================== 555 // static 556 cpu_type_t DynamicImages::DetermineTaskCPUType(task_t task) { 557 if (task == mach_task_self()) 558 return GetNativeCPUType(); 559 560 int mib[CTL_MAXNAME]; 561 size_t mibLen = CTL_MAXNAME; 562 int err = sysctlnametomib("sysctl.proc_cputype", mib, &mibLen); 563 if (err == 0) { 564 assert(mibLen < CTL_MAXNAME); 565 pid_for_task(task, &mib[mibLen]); 566 mibLen += 1; 567 568 cpu_type_t cpu_type; 569 size_t cpuTypeSize = sizeof(cpu_type); 570 sysctl(mib, static_cast<u_int>(mibLen), &cpu_type, &cpuTypeSize, 0, 0); 571 return cpu_type; 572 } 573 574 return GetNativeCPUType(); 575 } 576 577 } // namespace google_breakpad 578