1 2 /*--------------------------------------------------------------------*/ 3 /*--- Reading of syms & debug info from Mach-O files. ---*/ 4 /*--- readmacho.c ---*/ 5 /*--------------------------------------------------------------------*/ 6 7 /* 8 This file is part of Valgrind, a dynamic binary instrumentation 9 framework. 10 11 Copyright (C) 2005-2010 Apple Inc. 12 Greg Parker gparker (at) apple.com 13 14 This program is free software; you can redistribute it and/or 15 modify it under the terms of the GNU General Public License as 16 published by the Free Software Foundation; either version 2 of the 17 License, or (at your option) any later version. 18 19 This program is distributed in the hope that it will be useful, but 20 WITHOUT ANY WARRANTY; without even the implied warranty of 21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 General Public License for more details. 23 24 You should have received a copy of the GNU General Public License 25 along with this program; if not, write to the Free Software 26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 27 02111-1307, USA. 28 29 The GNU General Public License is contained in the file COPYING. 30 */ 31 32 #if defined(VGO_darwin) 33 34 #include "pub_core_basics.h" 35 #include "pub_core_vki.h" 36 #include "pub_core_libcbase.h" 37 #include "pub_core_libcprint.h" 38 #include "pub_core_libcassert.h" 39 #include "pub_core_libcfile.h" 40 #include "pub_core_libcproc.h" 41 #include "pub_core_aspacemgr.h" /* for mmaping debuginfo files */ 42 #include "pub_core_machine.h" /* VG_ELF_CLASS */ 43 #include "pub_core_options.h" 44 #include "pub_core_oset.h" 45 #include "pub_core_tooliface.h" /* VG_(needs) */ 46 #include "pub_core_xarray.h" 47 #include "pub_core_clientstate.h" 48 #include "pub_core_debuginfo.h" 49 50 #include "priv_d3basics.h" 51 #include "priv_misc.h" 52 #include "priv_tytypes.h" 53 #include "priv_storage.h" 54 #include "priv_readmacho.h" 55 #include "priv_readdwarf.h" 56 #include "priv_readdwarf3.h" 57 #include "priv_readstabs.h" 58 59 /* --- !!! --- EXTERNAL HEADERS start --- !!! --- */ 60 #include <mach-o/loader.h> 61 #include <mach-o/nlist.h> 62 #include <mach-o/fat.h> 63 /* --- !!! --- EXTERNAL HEADERS end --- !!! --- */ 64 65 #if VG_WORDSIZE == 4 66 # define MAGIC MH_MAGIC 67 # define MACH_HEADER mach_header 68 # define LC_SEGMENT_CMD LC_SEGMENT 69 # define SEGMENT_COMMAND segment_command 70 # define SECTION section 71 # define NLIST nlist 72 #else 73 # define MAGIC MH_MAGIC_64 74 # define MACH_HEADER mach_header_64 75 # define LC_SEGMENT_CMD LC_SEGMENT_64 76 # define SEGMENT_COMMAND segment_command_64 77 # define SECTION section_64 78 # define NLIST nlist_64 79 #endif 80 81 82 /*------------------------------------------------------------*/ 83 /*--- ---*/ 84 /*--- Mach-O file mapping/unmapping helpers ---*/ 85 /*--- ---*/ 86 /*------------------------------------------------------------*/ 87 88 typedef 89 struct { 90 /* These two describe the entire mapped-in ("primary") image, 91 fat headers, kitchen sink, whatnot: the entire file. The 92 image is mapped into img[0 .. img_szB-1]. */ 93 UChar* img; 94 SizeT img_szB; 95 /* These two describe the Mach-O object of interest, which is 96 presumably somewhere inside the primary image. 97 map_image_aboard() below, which generates this info, will 98 carefully check that the macho_ fields denote a section of 99 memory that falls entirely inside img[0 .. img_szB-1]. */ 100 UChar* macho_img; 101 SizeT macho_img_szB; 102 } 103 ImageInfo; 104 105 106 Bool ML_(is_macho_object_file)( const void* buf, SizeT szB ) 107 { 108 /* (JRS: the Mach-O headers might not be in this mapped data, 109 because we only mapped a page for this initial check, 110 or at least not very much, and what's at the start of the file 111 is in general a so-called fat header. The Mach-O object we're 112 interested in could be arbitrarily far along the image, and so 113 we can't assume its header will fall within this page.) */ 114 115 /* But we can say that either it's a fat object, in which case it 116 begins with a fat header, or it's unadorned Mach-O, in which 117 case it starts with a normal header. At least do what checks we 118 can to establish whether or not we're looking at something 119 sane. */ 120 121 const struct fat_header* fh_be = buf; 122 const struct MACH_HEADER* mh = buf; 123 124 vg_assert(buf); 125 if (szB < sizeof(struct fat_header)) 126 return False; 127 if (VG_(ntohl)(fh_be->magic) == FAT_MAGIC) 128 return True; 129 130 if (szB < sizeof(struct MACH_HEADER)) 131 return False; 132 if (mh->magic == MAGIC) 133 return True; 134 135 return False; 136 } 137 138 139 /* Unmap an image mapped in by map_image_aboard. */ 140 static void unmap_image ( /*MOD*/ImageInfo* ii ) 141 { 142 SysRes sres; 143 vg_assert(ii->img); 144 vg_assert(ii->img_szB > 0); 145 sres = VG_(am_munmap_valgrind)( (Addr)ii->img, ii->img_szB ); 146 /* Do we care if this fails? I suppose so; it would indicate 147 some fairly serious snafu with the mapping of the file. */ 148 vg_assert( !sr_isError(sres) ); 149 VG_(memset)(ii, 0, sizeof(*ii)); 150 } 151 152 153 /* Map a given fat or thin object aboard, find the thin part if 154 necessary, do some checks, and write details of both the fat and 155 thin parts into *ii. Returns False (and leaves the file unmapped) 156 on failure. Guarantees to return pointers to a valid(ish) Mach-O 157 image if it succeeds. */ 158 static Bool map_image_aboard ( DebugInfo* di, /* only for err msgs */ 159 /*OUT*/ImageInfo* ii, UChar* filename ) 160 { 161 VG_(memset)(ii, 0, sizeof(*ii)); 162 163 /* First off, try to map the thing in. */ 164 { SizeT size; 165 SysRes fd, sres; 166 struct vg_stat stat_buf; 167 168 fd = VG_(stat)(filename, &stat_buf); 169 if (sr_isError(fd)) { 170 ML_(symerr)(di, True, "Can't stat image (to determine its size)?!"); 171 return False; 172 } 173 size = stat_buf.size; 174 175 fd = VG_(open)(filename, VKI_O_RDONLY, 0); 176 if (sr_isError(fd)) { 177 ML_(symerr)(di, True, "Can't open image to read symbols?!"); 178 return False; 179 } 180 181 sres = VG_(am_mmap_file_float_valgrind) 182 ( size, VKI_PROT_READ, sr_Res(fd), 0 ); 183 if (sr_isError(sres)) { 184 ML_(symerr)(di, True, "Can't mmap image to read symbols?!"); 185 return False; 186 } 187 188 VG_(close)(sr_Res(fd)); 189 190 ii->img = (UChar*)sr_Res(sres); 191 ii->img_szB = size; 192 } 193 194 /* Now it's mapped in and we have .img and .img_szB set. Look for 195 the embedded Mach-O object. If not findable, unmap and fail. */ 196 { struct fat_header* fh_be; 197 struct fat_header fh; 198 struct MACH_HEADER* mh; 199 200 // Assume initially that we have a thin image, and update 201 // these if it turns out to be fat. 202 ii->macho_img = ii->img; 203 ii->macho_img_szB = ii->img_szB; 204 205 // Check for fat header. 206 if (ii->img_szB < sizeof(struct fat_header)) { 207 ML_(symerr)(di, True, "Invalid Mach-O file (0 too small)."); 208 goto unmap_and_fail; 209 } 210 211 // Fat header is always BIG-ENDIAN 212 fh_be = (struct fat_header *)ii->img; 213 fh.magic = VG_(ntohl)(fh_be->magic); 214 fh.nfat_arch = VG_(ntohl)(fh_be->nfat_arch); 215 if (fh.magic == FAT_MAGIC) { 216 // Look for a good architecture. 217 struct fat_arch *arch_be; 218 struct fat_arch arch; 219 Int f; 220 if (ii->img_szB < sizeof(struct fat_header) 221 + fh.nfat_arch * sizeof(struct fat_arch)) { 222 ML_(symerr)(di, True, "Invalid Mach-O file (1 too small)."); 223 goto unmap_and_fail; 224 } 225 for (f = 0, arch_be = (struct fat_arch *)(fh_be+1); 226 f < fh.nfat_arch; 227 f++, arch_be++) { 228 Int cputype; 229 # if defined(VGA_ppc) 230 cputype = CPU_TYPE_POWERPC; 231 # elif defined(VGA_ppc64) 232 cputype = CPU_TYPE_POWERPC64; 233 # elif defined(VGA_x86) 234 cputype = CPU_TYPE_X86; 235 # elif defined(VGA_amd64) 236 cputype = CPU_TYPE_X86_64; 237 # else 238 # error "unknown architecture" 239 # endif 240 arch.cputype = VG_(ntohl)(arch_be->cputype); 241 arch.cpusubtype = VG_(ntohl)(arch_be->cpusubtype); 242 arch.offset = VG_(ntohl)(arch_be->offset); 243 arch.size = VG_(ntohl)(arch_be->size); 244 if (arch.cputype == cputype) { 245 if (ii->img_szB < arch.offset + arch.size) { 246 ML_(symerr)(di, True, "Invalid Mach-O file (2 too small)."); 247 goto unmap_and_fail; 248 } 249 ii->macho_img = ii->img + arch.offset; 250 ii->macho_img_szB = arch.size; 251 break; 252 } 253 } 254 if (f == fh.nfat_arch) { 255 ML_(symerr)(di, True, 256 "No acceptable architecture found in fat file."); 257 goto unmap_and_fail; 258 } 259 } 260 261 /* Sanity check what we found. */ 262 263 /* assured by logic above */ 264 vg_assert(ii->img_szB >= sizeof(struct fat_header)); 265 266 if (ii->macho_img_szB < sizeof(struct MACH_HEADER)) { 267 ML_(symerr)(di, True, "Invalid Mach-O file (3 too small)."); 268 goto unmap_and_fail; 269 } 270 271 if (ii->macho_img_szB > ii->img_szB) { 272 ML_(symerr)(di, True, "Invalid Mach-O file (thin bigger than fat)."); 273 goto unmap_and_fail; 274 } 275 276 if (ii->macho_img >= ii->img 277 && ii->macho_img + ii->macho_img_szB <= ii->img + ii->img_szB) { 278 /* thin entirely within fat, as expected */ 279 } else { 280 ML_(symerr)(di, True, "Invalid Mach-O file (thin not inside fat)."); 281 goto unmap_and_fail; 282 } 283 284 mh = (struct MACH_HEADER *)ii->macho_img; 285 if (mh->magic != MAGIC) { 286 ML_(symerr)(di, True, "Invalid Mach-O file (bad magic)."); 287 goto unmap_and_fail; 288 } 289 290 if (ii->macho_img_szB < sizeof(struct MACH_HEADER) + mh->sizeofcmds) { 291 ML_(symerr)(di, True, "Invalid Mach-O file (4 too small)."); 292 goto unmap_and_fail; 293 } 294 } 295 296 vg_assert(ii->img); 297 vg_assert(ii->macho_img); 298 vg_assert(ii->img_szB > 0); 299 vg_assert(ii->macho_img_szB > 0); 300 vg_assert(ii->macho_img >= ii->img); 301 vg_assert(ii->macho_img + ii->macho_img_szB <= ii->img + ii->img_szB); 302 return True; /* success */ 303 /*NOTREACHED*/ 304 305 unmap_and_fail: 306 unmap_image(ii); 307 return False; /* bah! */ 308 } 309 310 311 /*------------------------------------------------------------*/ 312 /*--- ---*/ 313 /*--- Mach-O symbol table reading ---*/ 314 /*--- ---*/ 315 /*------------------------------------------------------------*/ 316 317 /* Read a symbol table (nlist). Add the resulting candidate symbols 318 to 'syms'; the caller will post-process them and hand them off to 319 ML_(addSym) itself. */ 320 static 321 void read_symtab( /*OUT*/XArray* /* DiSym */ syms, 322 struct _DebugInfo* di, 323 struct NLIST* o_symtab, UInt o_symtab_count, 324 UChar* o_strtab, UInt o_strtab_sz ) 325 { 326 Int i; 327 Addr sym_addr; 328 DiSym risym; 329 UChar* name; 330 331 static UChar* s_a_t_v = NULL; /* do not make non-static */ 332 333 for (i = 0; i < o_symtab_count; i++) { 334 struct NLIST *nl = o_symtab+i; 335 if ((nl->n_type & N_TYPE) == N_SECT) { 336 sym_addr = di->text_bias + nl->n_value; 337 /*} else if ((nl->n_type & N_TYPE) == N_ABS) { 338 GrP fixme don't ignore absolute symbols? 339 sym_addr = nl->n_value; */ 340 } else { 341 continue; 342 } 343 344 if (di->trace_symtab) 345 VG_(printf)("nlist raw: avma %010lx %s\n", 346 sym_addr, o_strtab + nl->n_un.n_strx ); 347 348 /* If no part of the symbol falls within the mapped range, 349 ignore it. */ 350 if (sym_addr <= di->text_avma 351 || sym_addr >= di->text_avma+di->text_size) { 352 continue; 353 } 354 355 /* skip names which point outside the string table; 356 following these risks segfaulting Valgrind */ 357 name = o_strtab + nl->n_un.n_strx; 358 if (name < o_strtab || name >= o_strtab + o_strtab_sz) 359 continue; 360 361 /* skip nameless symbols; these appear to be common, but 362 useless */ 363 if (*name == 0) 364 continue; 365 366 risym.tocptr = 0; 367 risym.addr = sym_addr; 368 risym.size = // let canonicalize fix it 369 di->text_avma+di->text_size - sym_addr; 370 risym.name = ML_(addStr)(di, name, -1); 371 risym.isText = True; 372 risym.isIFunc = False; 373 // Lots of user function names get prepended with an underscore. Eg. the 374 // function 'f' becomes the symbol '_f'. And the "below main" 375 // function is called "start". So we skip the leading underscore, and 376 // if we see 'start' and --show-below-main=no, we rename it as 377 // "start_according_to_valgrind", which makes it easy to spot later 378 // and display as "(below main)". 379 if (risym.name[0] == '_') { 380 risym.name++; 381 } else if (!VG_(clo_show_below_main) && VG_STREQ(risym.name, "start")) { 382 if (s_a_t_v == NULL) 383 s_a_t_v = ML_(addStr)(di, "start_according_to_valgrind", -1); 384 vg_assert(s_a_t_v); 385 risym.name = s_a_t_v; 386 } 387 388 vg_assert(risym.name); 389 VG_(addToXA)( syms, &risym ); 390 } 391 } 392 393 394 /* Compare DiSyms by their start address, and for equal addresses, use 395 the name as a secondary sort key. */ 396 static Int cmp_DiSym_by_start_then_name ( void* v1, void* v2 ) 397 { 398 DiSym* s1 = (DiSym*)v1; 399 DiSym* s2 = (DiSym*)v2; 400 if (s1->addr < s2->addr) return -1; 401 if (s1->addr > s2->addr) return 1; 402 return VG_(strcmp)(s1->name, s2->name); 403 } 404 405 /* 'cand' is a bunch of candidate symbols obtained by reading 406 nlist-style symbol table entries. Their ends may overlap, so sort 407 them and truncate them accordingly. The code in this routine is 408 copied almost verbatim from read_symbol_table() in readxcoff.c. */ 409 static void tidy_up_cand_syms ( /*MOD*/XArray* /* of DiSym */ syms, 410 Bool trace_symtab ) 411 { 412 Word nsyms, i, j, k, m; 413 414 nsyms = VG_(sizeXA)(syms); 415 416 VG_(setCmpFnXA)(syms, cmp_DiSym_by_start_then_name); 417 VG_(sortXA)(syms); 418 419 /* We only know for sure the start addresses (actual VMAs) of 420 symbols, and an overestimation of their end addresses. So sort 421 by start address, then clip each symbol so that its end address 422 does not overlap with the next one along. 423 424 There is a small refinement: if a group of symbols have the same 425 address, treat them as a group: find the next symbol along that 426 has a higher start address, and clip all of the group 427 accordingly. This clips the group as a whole so as not to 428 overlap following symbols. This leaves prefersym() in 429 storage.c, which is not nlist-specific, to later decide which of 430 the symbols in the group to keep. 431 432 Another refinement is that we need to get rid of symbols which, 433 after clipping, have identical starts, ends, and names. So the 434 sorting uses the name as a secondary key. 435 */ 436 437 for (i = 0; i < nsyms; i++) { 438 for (k = i+1; 439 k < nsyms 440 && ((DiSym*)VG_(indexXA)(syms,i))->addr 441 == ((DiSym*)VG_(indexXA)(syms,k))->addr; 442 k++) 443 ; 444 /* So now [i .. k-1] is a group all with the same start address. 445 Clip their ending addresses so they don't overlap [k]. In 446 the normal case (no overlaps), k == i+1. */ 447 if (k < nsyms) { 448 DiSym* next = (DiSym*)VG_(indexXA)(syms,k); 449 for (m = i; m < k; m++) { 450 DiSym* here = (DiSym*)VG_(indexXA)(syms,m); 451 vg_assert(here->addr < next->addr); 452 if (here->addr + here->size > next->addr) 453 here->size = next->addr - here->addr; 454 } 455 } 456 i = k-1; 457 vg_assert(i <= nsyms); 458 } 459 460 j = 0; 461 if (nsyms > 0) { 462 j = 1; 463 for (i = 1; i < nsyms; i++) { 464 DiSym *s_j1, *s_j, *s_i; 465 vg_assert(j <= i); 466 s_j1 = (DiSym*)VG_(indexXA)(syms, j-1); 467 s_j = (DiSym*)VG_(indexXA)(syms, j); 468 s_i = (DiSym*)VG_(indexXA)(syms, i); 469 if (s_i->addr != s_j1->addr 470 || s_i->size != s_j1->size 471 || 0 != VG_(strcmp)(s_i->name, s_j1->name)) { 472 *s_j = *s_i; 473 j++; 474 } else { 475 if (trace_symtab) 476 VG_(printf)("nlist cleanup: dump duplicate avma %010lx %s\n", 477 s_i->addr, s_i->name ); 478 } 479 } 480 } 481 vg_assert(j >= 0 && j <= nsyms); 482 VG_(dropTailXA)(syms, nsyms - j); 483 } 484 485 486 /*------------------------------------------------------------*/ 487 /*--- ---*/ 488 /*--- Mach-O top-level processing ---*/ 489 /*--- ---*/ 490 /*------------------------------------------------------------*/ 491 492 #if !defined(APPLE_DSYM_EXT_AND_SUBDIRECTORY) 493 #define APPLE_DSYM_EXT_AND_SUBDIRECTORY ".dSYM/Contents/Resources/DWARF/" 494 #endif 495 496 497 static Bool file_exists_p(const Char *path) 498 { 499 struct vg_stat sbuf; 500 SysRes res = VG_(stat)(path, &sbuf); 501 return sr_isError(res) ? False : True; 502 } 503 504 505 /* Search for an existing dSYM file as a possible separate debug file. 506 Adapted from gdb. */ 507 static Char * 508 find_separate_debug_file (const Char *executable_name) 509 { 510 Char *basename_str; 511 Char *dot_ptr; 512 Char *slash_ptr; 513 Char *dsymfile; 514 515 /* Make sure the object file name itself doesn't contain ".dSYM" in it or we 516 will end up with an infinite loop where after we add a dSYM symbol file, 517 it will then enter this function asking if there is a debug file for the 518 dSYM file itself. */ 519 if (VG_(strcasestr) (executable_name, ".dSYM") == NULL) 520 { 521 /* Check for the existence of a .dSYM file for a given executable. */ 522 basename_str = VG_(basename) (executable_name); 523 dsymfile = ML_(dinfo_zalloc)("di.readmacho.dsymfile", 524 VG_(strlen) (executable_name) 525 + VG_(strlen) (APPLE_DSYM_EXT_AND_SUBDIRECTORY) 526 + VG_(strlen) (basename_str) 527 + 1 528 ); 529 530 /* First try for the dSYM in the same directory as the original file. */ 531 VG_(strcpy) (dsymfile, executable_name); 532 VG_(strcat) (dsymfile, APPLE_DSYM_EXT_AND_SUBDIRECTORY); 533 VG_(strcat) (dsymfile, basename_str); 534 535 if (file_exists_p (dsymfile)) 536 return dsymfile; 537 538 /* Now search for any parent directory that has a '.' in it so we can find 539 Mac OS X applications, bundles, plugins, and any other kinds of files. 540 Mac OS X application bundles wil have their program in 541 "/some/path/MyApp.app/Contents/MacOS/MyApp" (or replace ".app" with 542 ".bundle" or ".plugin" for other types of bundles). So we look for any 543 prior '.' character and try appending the apple dSYM extension and 544 subdirectory and see if we find an existing dSYM file (in the above 545 MyApp example the dSYM would be at either: 546 "/some/path/MyApp.app.dSYM/Contents/Resources/DWARF/MyApp" or 547 "/some/path/MyApp.dSYM/Contents/Resources/DWARF/MyApp". */ 548 VG_(strcpy) (dsymfile, VG_(dirname) (executable_name)); 549 while ((dot_ptr = VG_(strrchr) (dsymfile, '.'))) 550 { 551 /* Find the directory delimiter that follows the '.' character since 552 we now look for a .dSYM that follows any bundle extension. */ 553 slash_ptr = VG_(strchr) (dot_ptr, '/'); 554 if (slash_ptr) 555 { 556 /* NULL terminate the string at the '/' character and append 557 the path down to the dSYM file. */ 558 *slash_ptr = '\0'; 559 VG_(strcat) (slash_ptr, APPLE_DSYM_EXT_AND_SUBDIRECTORY); 560 VG_(strcat) (slash_ptr, basename_str); 561 if (file_exists_p (dsymfile)) 562 return dsymfile; 563 } 564 565 /* NULL terminate the string at the '.' character and append 566 the path down to the dSYM file. */ 567 *dot_ptr = '\0'; 568 VG_(strcat) (dot_ptr, APPLE_DSYM_EXT_AND_SUBDIRECTORY); 569 VG_(strcat) (dot_ptr, basename_str); 570 if (file_exists_p (dsymfile)) 571 return dsymfile; 572 573 /* NULL terminate the string at the '.' locatated by the strrchr() 574 function again. */ 575 *dot_ptr = '\0'; 576 577 /* We found a previous extension '.' character and did not find a 578 dSYM file so now find previous directory delimiter so we don't 579 try multiple times on a file name that may have a version number 580 in it such as "/some/path/MyApp.6.0.4.app". */ 581 slash_ptr = VG_(strrchr) (dsymfile, '/'); 582 if (!slash_ptr) 583 break; 584 /* NULL terminate the string at the previous directory character 585 and search again. */ 586 *slash_ptr = '\0'; 587 } 588 } 589 590 return NULL; 591 } 592 593 594 static UChar *getsectdata(UChar* base, SizeT size, 595 Char *segname, Char *sectname, 596 /*OUT*/Word *sect_size) 597 { 598 struct MACH_HEADER *mh = (struct MACH_HEADER *)base; 599 struct load_command *cmd; 600 Int c; 601 602 for (c = 0, cmd = (struct load_command *)(mh+1); 603 c < mh->ncmds; 604 c++, cmd = (struct load_command *)(cmd->cmdsize + (Addr)cmd)) 605 { 606 if (cmd->cmd == LC_SEGMENT_CMD) { 607 struct SEGMENT_COMMAND *seg = (struct SEGMENT_COMMAND *)cmd; 608 if (0 == VG_(strncmp(seg->segname, segname, sizeof(seg->segname)))) { 609 struct SECTION *sects = (struct SECTION *)(seg+1); 610 Int s; 611 for (s = 0; s < seg->nsects; s++) { 612 if (0 == VG_(strncmp(sects[s].sectname, sectname, 613 sizeof(sects[s].sectname)))) 614 { 615 if (sect_size) *sect_size = sects[s].size; 616 return (UChar *)(base + sects[s].offset); 617 } 618 } 619 } 620 } 621 } 622 623 if (sect_size) *sect_size = 0; 624 return 0; 625 } 626 627 628 /* Brute force just simply search for uuid[0..15] in img[0..n_img-1] */ 629 static Bool check_uuid_matches ( Addr imgA, Word n_img, UChar* uuid ) 630 { 631 Word i; 632 UChar* img = (UChar*)imgA; 633 UChar first = uuid[0]; 634 if (n_img < 16) 635 return False; 636 for (i = 0; i < n_img-16; i++) { 637 if (img[i] != first) 638 continue; 639 if (0 == VG_(memcmp)( &img[i], &uuid[0], 16 )) 640 return True; 641 } 642 return False; 643 } 644 645 646 /* Heuristic kludge: return True if this looks like an installed 647 standard library; hence we shouldn't consider automagically running 648 dsymutil on it. */ 649 static Bool is_systemish_library_name ( UChar* name ) 650 { 651 vg_assert(name); 652 if (0 == VG_(strncasecmp)(name, "/usr/", 5) 653 || 0 == VG_(strncasecmp)(name, "/bin/", 5) 654 || 0 == VG_(strncasecmp)(name, "/sbin/", 6) 655 || 0 == VG_(strncasecmp)(name, "/opt/", 5) 656 || 0 == VG_(strncasecmp)(name, "/sw/", 4) 657 || 0 == VG_(strncasecmp)(name, "/System/", 8) 658 || 0 == VG_(strncasecmp)(name, "/Library/", 9) 659 || 0 == VG_(strncasecmp)(name, "/Applications/", 14)) { 660 return True; 661 } else { 662 return False; 663 } 664 } 665 666 667 Bool ML_(read_macho_debug_info)( struct _DebugInfo* di ) 668 { 669 struct symtab_command *symcmd = NULL; 670 struct dysymtab_command *dysymcmd = NULL; 671 HChar* dsymfilename = NULL; 672 Bool have_uuid = False; 673 UChar uuid[16]; 674 ImageInfo ii; /* main file */ 675 ImageInfo iid; /* auxiliary .dSYM file */ 676 Bool ok; 677 678 /* mmap the object file to look for di->soname and di->text_bias 679 and uuid and nlist and STABS */ 680 681 if (VG_(clo_verbosity) > 1) 682 VG_(message)(Vg_DebugMsg, 683 "%s (%#lx)\n", di->filename, di->rx_map_avma ); 684 if (VG_(clo_xml)) 685 VG_(printf_xml)("<load_obj><obj>%s</obj><ip>%#lx</ip></load_obj>\n", di->filename, di->rx_map_avma); 686 687 /* This should be ensured by our caller. */ 688 vg_assert(di->have_rx_map); 689 vg_assert(di->have_rw_map); 690 691 VG_(memset)(&ii, 0, sizeof(ii)); 692 VG_(memset)(&iid, 0, sizeof(iid)); 693 VG_(memset)(&uuid, 0, sizeof(uuid)); 694 695 ok = map_image_aboard( di, &ii, di->filename ); 696 if (!ok) goto fail; 697 698 vg_assert(ii.macho_img != NULL && ii.macho_img_szB > 0); 699 700 /* Poke around in the Mach-O header, to find some important 701 stuff. */ 702 // Find LC_SYMTAB and LC_DYSYMTAB, if present. 703 // Read di->soname from LC_ID_DYLIB if present, 704 // or from LC_ID_DYLINKER if present, 705 // or use "NONE". 706 // Get di->text_bias (aka slide) based on the corresponding LC_SEGMENT 707 // Get uuid for later dsym search 708 709 di->text_bias = 0; 710 711 { struct MACH_HEADER *mh = (struct MACH_HEADER *)ii.macho_img; 712 struct load_command *cmd; 713 Int c; 714 715 for (c = 0, cmd = (struct load_command *)(mh+1); 716 c < mh->ncmds; 717 c++, cmd = (struct load_command *)(cmd->cmdsize 718 + (unsigned long)cmd)) { 719 if (cmd->cmd == LC_SYMTAB) { 720 symcmd = (struct symtab_command *)cmd; 721 } 722 else if (cmd->cmd == LC_DYSYMTAB) { 723 dysymcmd = (struct dysymtab_command *)cmd; 724 } 725 else if (cmd->cmd == LC_ID_DYLIB && mh->filetype == MH_DYLIB) { 726 // GrP fixme bundle? 727 struct dylib_command *dcmd = (struct dylib_command *)cmd; 728 UChar *dylibname = dcmd->dylib.name.offset + (UChar *)dcmd; 729 UChar *soname = VG_(strrchr)(dylibname, '/'); 730 if (!soname) soname = dylibname; 731 else soname++; 732 di->soname = ML_(dinfo_strdup)("di.readmacho.dylibname", 733 soname); 734 } 735 else if (cmd->cmd==LC_ID_DYLINKER && mh->filetype==MH_DYLINKER) { 736 struct dylinker_command *dcmd = (struct dylinker_command *)cmd; 737 UChar *dylinkername = dcmd->name.offset + (UChar *)dcmd; 738 UChar *soname = VG_(strrchr)(dylinkername, '/'); 739 if (!soname) soname = dylinkername; 740 else soname++; 741 di->soname = ML_(dinfo_strdup)("di.readmacho.dylinkername", 742 soname); 743 } 744 745 // A comment from Julian about why varinfo[35] fail: 746 // 747 // My impression is, from comparing the output of otool -l for these 748 // executables with the logic in ML_(read_macho_debug_info), 749 // specifically the part that begins "else if (cmd->cmd == 750 // LC_SEGMENT_CMD) {", that it's a complete hack which just happens 751 // to work ok for text symbols. In particular, it appears to assume 752 // that in a "struct load_command" of type LC_SEGMENT_CMD, the first 753 // "struct SEGMENT_COMMAND" inside it is going to contain the info we 754 // need. However, otool -l shows, and also the Apple docs state, 755 // that a struct load_command may contain an arbitrary number of 756 // struct SEGMENT_COMMANDs, so I'm not sure why it's OK to merely 757 // snarf the first. But I'm not sure about this. 758 // 759 // The "Try for __DATA" block below simply adds acquisition of data 760 // svma/bias values using the same assumption. It also needs 761 // (probably) to deal with bss sections, but I don't understand how 762 // this all ties together really, so it requires further study. 763 // 764 // If you can get your head around the relationship between MachO 765 // segments, sections and load commands, this might be relatively 766 // easy to fix properly. 767 // 768 // Basically we need to come up with plausible numbers for di-> 769 // {text,data,bss}_{avma,svma}, from which the _bias numbers are 770 // then trivially derived. Then I think the debuginfo reader should 771 // work pretty well. 772 else if (cmd->cmd == LC_SEGMENT_CMD) { 773 struct SEGMENT_COMMAND *seg = (struct SEGMENT_COMMAND *)cmd; 774 /* Try for __TEXT */ 775 if (!di->text_present 776 && 0 == VG_(strcmp)(seg->segname, "__TEXT") 777 /* DDD: is the next line a kludge? -- JRS */ 778 && seg->fileoff == 0 && seg->filesize != 0) { 779 di->text_present = True; 780 di->text_svma = (Addr)seg->vmaddr; 781 di->text_avma = di->rx_map_avma; 782 di->text_size = seg->vmsize; 783 di->text_bias = di->text_avma - di->text_svma; 784 /* Make the _debug_ values be the same as the 785 svma/bias for the primary object, since there is 786 no secondary (debuginfo) object, but nevertheless 787 downstream biasing of Dwarf3 relies on the 788 _debug_ values. */ 789 di->text_debug_svma = di->text_svma; 790 di->text_debug_bias = di->text_bias; 791 } 792 /* Try for __DATA */ 793 if (!di->data_present 794 && 0 == VG_(strcmp)(seg->segname, "__DATA") 795 /* && DDD:seg->fileoff == 0 */ && seg->filesize != 0) { 796 di->data_present = True; 797 di->data_svma = (Addr)seg->vmaddr; 798 di->data_avma = di->rw_map_avma; 799 di->data_size = seg->vmsize; 800 di->data_bias = di->data_avma - di->data_svma; 801 di->data_debug_svma = di->data_svma; 802 di->data_debug_bias = di->data_bias; 803 } 804 } 805 else if (cmd->cmd == LC_UUID) { 806 struct uuid_command *uuid_cmd = (struct uuid_command *)cmd; 807 VG_(memcpy)(uuid, uuid_cmd->uuid, sizeof(uuid)); 808 have_uuid = True; 809 } 810 } 811 } 812 813 if (!di->soname) { 814 di->soname = ML_(dinfo_strdup)("di.readmacho.noname", "NONE"); 815 } 816 817 /* Now we have the base object to hand. Read symbols from it. */ 818 819 if (ii.macho_img && ii.macho_img_szB > 0 && symcmd && dysymcmd) { 820 821 /* Read nlist symbol table */ 822 struct NLIST *syms; 823 UChar *strs; 824 XArray* /* DiSym */ candSyms = NULL; 825 Word i, nCandSyms; 826 827 if (ii.macho_img_szB < symcmd->stroff + symcmd->strsize 828 || ii.macho_img_szB < symcmd->symoff + symcmd->nsyms 829 * sizeof(struct NLIST)) { 830 ML_(symerr)(di, False, "Invalid Mach-O file (5 too small)."); 831 goto fail; 832 } 833 if (dysymcmd->ilocalsym + dysymcmd->nlocalsym > symcmd->nsyms 834 || dysymcmd->iextdefsym + dysymcmd->nextdefsym > symcmd->nsyms) { 835 ML_(symerr)(di, False, "Invalid Mach-O file (bad symbol table)."); 836 goto fail; 837 } 838 839 syms = (struct NLIST *)(ii.macho_img + symcmd->symoff); 840 strs = (UChar *)(ii.macho_img + symcmd->stroff); 841 842 if (VG_(clo_verbosity) > 1) 843 VG_(message)(Vg_DebugMsg, 844 " reading syms from primary file (%d %d)\n", 845 dysymcmd->nextdefsym, dysymcmd->nlocalsym ); 846 847 /* Read candidate symbols into 'candSyms', so we can truncate 848 overlapping ends and generally tidy up, before presenting 849 them to ML_(addSym). */ 850 candSyms = VG_(newXA)( 851 ML_(dinfo_zalloc), "di.readmacho.candsyms.1", 852 ML_(dinfo_free), sizeof(DiSym) 853 ); 854 vg_assert(candSyms); 855 856 // extern symbols 857 read_symtab(candSyms, 858 di, 859 syms + dysymcmd->iextdefsym, dysymcmd->nextdefsym, 860 strs, symcmd->strsize); 861 // static and private_extern symbols 862 read_symtab(candSyms, 863 di, 864 syms + dysymcmd->ilocalsym, dysymcmd->nlocalsym, 865 strs, symcmd->strsize); 866 867 /* tidy up the cand syms -- trim overlapping ends. May resize 868 candSyms. */ 869 tidy_up_cand_syms( candSyms, di->trace_symtab ); 870 871 /* and finally present them to ML_(addSym) */ 872 nCandSyms = VG_(sizeXA)( candSyms ); 873 for (i = 0; i < nCandSyms; i++) { 874 DiSym* cand = (DiSym*) VG_(indexXA)( candSyms, i ); 875 if (di->trace_symtab) 876 VG_(printf)("nlist final: acquire avma %010lx-%010lx %s\n", 877 cand->addr, cand->addr + cand->size - 1, cand->name ); 878 ML_(addSym)( di, cand ); 879 } 880 VG_(deleteXA)( candSyms ); 881 } 882 883 /* If there's no UUID in the primary, don't even bother to try and 884 read any DWARF, since we won't be able to verify it matches. 885 Our policy is not to load debug info unless we can verify that 886 it matches the primary. Just declare success at this point. 887 And don't complain to the user, since that would cause us to 888 complain on objects compiled without -g. (Some versions of 889 XCode are observed to omit a UUID entry for object linked(?) 890 without -g. Others don't appear to omit it.) */ 891 if (!have_uuid) 892 goto success; 893 894 /* mmap the dSYM file to look for DWARF debug info. If successful, 895 use the .macho_img and .macho_img_szB in iid. */ 896 897 dsymfilename = find_separate_debug_file( di->filename ); 898 899 /* Try to load it. */ 900 if (dsymfilename) { 901 Bool valid; 902 903 if (VG_(clo_verbosity) > 1) 904 VG_(message)(Vg_DebugMsg, " dSYM= %s\n", dsymfilename); 905 906 ok = map_image_aboard( di, &iid, dsymfilename ); 907 if (!ok) goto fail; 908 909 /* check it has the right uuid. */ 910 vg_assert(have_uuid); 911 valid = iid.macho_img && iid.macho_img_szB > 0 912 && check_uuid_matches( (Addr)iid.macho_img, 913 iid.macho_img_szB, uuid ); 914 if (valid) 915 goto read_the_dwarf; 916 917 if (VG_(clo_verbosity) > 1) 918 VG_(message)(Vg_DebugMsg, " dSYM does not have " 919 "correct UUID (out of date?)\n"); 920 } 921 922 /* There was no dsym file, or it doesn't match. We'll have to try 923 regenerating it, unless --dsymutil=no, in which case just complain 924 instead. */ 925 926 /* If this looks like a lib that we shouldn't run dsymutil on, just 927 give up. (possible reasons: is system lib, or in /usr etc, or 928 the dsym dir would not be writable by the user, or we're running 929 as root) */ 930 vg_assert(di->filename); 931 if (is_systemish_library_name(di->filename)) 932 goto success; 933 934 if (!VG_(clo_dsymutil)) { 935 if (VG_(clo_verbosity) == 1) { 936 VG_(message)(Vg_DebugMsg, "%s:\n", di->filename); 937 } 938 if (VG_(clo_verbosity) > 0) 939 VG_(message)(Vg_DebugMsg, "%sdSYM directory %s; consider using " 940 "--dsymutil=yes\n", 941 VG_(clo_verbosity) > 1 ? " " : "", 942 dsymfilename ? "has wrong UUID" : "is missing"); 943 goto success; 944 } 945 946 /* Run dsymutil */ 947 948 { Int r; 949 HChar* dsymutil = "/usr/bin/dsymutil "; 950 HChar* cmd = ML_(dinfo_zalloc)( "di.readmacho.tmp1", 951 VG_(strlen)(dsymutil) 952 + VG_(strlen)(di->filename) 953 + 32 /* misc */ ); 954 VG_(strcpy)(cmd, dsymutil); 955 if (0) VG_(strcat)(cmd, "--verbose "); 956 VG_(strcat)(cmd, "\""); 957 VG_(strcat)(cmd, di->filename); 958 VG_(strcat)(cmd, "\""); 959 VG_(message)(Vg_DebugMsg, "run: %s\n", cmd); 960 r = VG_(system)( cmd ); 961 if (r) 962 VG_(message)(Vg_DebugMsg, "run: %s FAILED\n", dsymutil); 963 ML_(dinfo_free)(cmd); 964 dsymfilename = find_separate_debug_file(di->filename); 965 } 966 967 /* Try again to load it. */ 968 if (dsymfilename) { 969 Bool valid; 970 971 if (VG_(clo_verbosity) > 1) 972 VG_(message)(Vg_DebugMsg, " dsyms= %s\n", dsymfilename); 973 974 ok = map_image_aboard( di, &iid, dsymfilename ); 975 if (!ok) goto fail; 976 977 /* check it has the right uuid. */ 978 vg_assert(have_uuid); 979 valid = iid.macho_img && iid.macho_img_szB > 0 980 && check_uuid_matches( (Addr)iid.macho_img, 981 iid.macho_img_szB, uuid ); 982 if (!valid) { 983 if (VG_(clo_verbosity) > 0) { 984 VG_(message)(Vg_DebugMsg, 985 "WARNING: did not find expected UUID %02X%02X%02X%02X" 986 "-%02X%02X-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X" 987 " in dSYM dir\n", 988 (UInt)uuid[0], (UInt)uuid[1], (UInt)uuid[2], (UInt)uuid[3], 989 (UInt)uuid[4], (UInt)uuid[5], (UInt)uuid[6], (UInt)uuid[7], 990 (UInt)uuid[8], (UInt)uuid[9], (UInt)uuid[10], 991 (UInt)uuid[11], (UInt)uuid[12], (UInt)uuid[13], 992 (UInt)uuid[14], (UInt)uuid[15] ); 993 VG_(message)(Vg_DebugMsg, 994 "WARNING: for %s\n", di->filename); 995 } 996 unmap_image( &iid ); 997 /* unmap_image zeroes the fields, so the following test makes 998 sense. */ 999 goto fail; 1000 } 1001 } 1002 1003 /* Right. Finally we have our best try at the dwarf image, so go 1004 on to reading stuff out of it. */ 1005 1006 read_the_dwarf: 1007 if (iid.macho_img && iid.macho_img_szB > 0) { 1008 UChar* debug_info_img = NULL; 1009 Word debug_info_sz; 1010 UChar* debug_abbv_img; 1011 Word debug_abbv_sz; 1012 UChar* debug_line_img; 1013 Word debug_line_sz; 1014 UChar* debug_str_img; 1015 Word debug_str_sz; 1016 UChar* debug_ranges_img; 1017 Word debug_ranges_sz; 1018 UChar* debug_loc_img; 1019 Word debug_loc_sz; 1020 UChar* debug_name_img; 1021 Word debug_name_sz; 1022 1023 debug_info_img = 1024 getsectdata(iid.macho_img, iid.macho_img_szB, 1025 "__DWARF", "__debug_info", &debug_info_sz); 1026 debug_abbv_img = 1027 getsectdata(iid.macho_img, iid.macho_img_szB, 1028 "__DWARF", "__debug_abbrev", &debug_abbv_sz); 1029 debug_line_img = 1030 getsectdata(iid.macho_img, iid.macho_img_szB, 1031 "__DWARF", "__debug_line", &debug_line_sz); 1032 debug_str_img = 1033 getsectdata(iid.macho_img, iid.macho_img_szB, 1034 "__DWARF", "__debug_str", &debug_str_sz); 1035 debug_ranges_img = 1036 getsectdata(iid.macho_img, iid.macho_img_szB, 1037 "__DWARF", "__debug_ranges", &debug_ranges_sz); 1038 debug_loc_img = 1039 getsectdata(iid.macho_img, iid.macho_img_szB, 1040 "__DWARF", "__debug_loc", &debug_loc_sz); 1041 debug_name_img = 1042 getsectdata(iid.macho_img, iid.macho_img_szB, 1043 "__DWARF", "__debug_pubnames", &debug_name_sz); 1044 1045 if (debug_info_img) { 1046 if (VG_(clo_verbosity) > 1) { 1047 if (0) 1048 VG_(message)(Vg_DebugMsg, 1049 "Reading dwarf3 for %s (%#lx) from %s" 1050 " (%ld %ld %ld %ld %ld %ld)\n", 1051 di->filename, di->text_avma, dsymfilename, 1052 debug_info_sz, debug_abbv_sz, debug_line_sz, 1053 debug_str_sz, debug_ranges_sz, debug_loc_sz 1054 ); 1055 VG_(message)(Vg_DebugMsg, 1056 " reading dwarf3 from dsyms file\n"); 1057 } 1058 /* The old reader: line numbers and unwind info only */ 1059 ML_(read_debuginfo_dwarf3) ( di, 1060 debug_info_img, debug_info_sz, 1061 debug_abbv_img, debug_abbv_sz, 1062 debug_line_img, debug_line_sz, 1063 debug_str_img, debug_str_sz ); 1064 1065 /* The new reader: read the DIEs in .debug_info to acquire 1066 information on variable types and locations. But only if 1067 the tool asks for it, or the user requests it on the 1068 command line. */ 1069 if (VG_(needs).var_info /* the tool requires it */ 1070 || VG_(clo_read_var_info) /* the user asked for it */) { 1071 ML_(new_dwarf3_reader)( 1072 di, debug_info_img, debug_info_sz, 1073 debug_abbv_img, debug_abbv_sz, 1074 debug_line_img, debug_line_sz, 1075 debug_str_img, debug_str_sz, 1076 debug_ranges_img, debug_ranges_sz, 1077 debug_loc_img, debug_loc_sz 1078 ); 1079 } 1080 } 1081 } 1082 1083 if (dsymfilename) ML_(dinfo_free)(dsymfilename); 1084 1085 success: 1086 if (ii.img) 1087 unmap_image(&ii); 1088 if (iid.img) 1089 unmap_image(&iid); 1090 return True; 1091 1092 /* NOTREACHED */ 1093 1094 fail: 1095 ML_(symerr)(di, True, "Error reading Mach-O object."); 1096 if (ii.img) 1097 unmap_image(&ii); 1098 if (iid.img) 1099 unmap_image(&iid); 1100 return False; 1101 } 1102 1103 #endif // defined(VGO_darwin) 1104 1105 /*--------------------------------------------------------------------*/ 1106 /*--- end ---*/ 1107 /*--------------------------------------------------------------------*/ 1108