1 2 /*--------------------------------------------------------------------*/ 3 /*--- Reading of syms & debug info from Mach-O files. ---*/ 4 /*--- readmacho.c ---*/ 5 /*--------------------------------------------------------------------*/ 6 7 /* 8 This file is part of Valgrind, a dynamic binary instrumentation 9 framework. 10 11 Copyright (C) 2005-2011 Apple Inc. 12 Greg Parker gparker (at) apple.com 13 14 This program is free software; you can redistribute it and/or 15 modify it under the terms of the GNU General Public License as 16 published by the Free Software Foundation; either version 2 of the 17 License, or (at your option) any later version. 18 19 This program is distributed in the hope that it will be useful, but 20 WITHOUT ANY WARRANTY; without even the implied warranty of 21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 General Public License for more details. 23 24 You should have received a copy of the GNU General Public License 25 along with this program; if not, write to the Free Software 26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 27 02111-1307, USA. 28 29 The GNU General Public License is contained in the file COPYING. 30 */ 31 32 #if defined(VGO_darwin) 33 34 #include "pub_core_basics.h" 35 #include "pub_core_vki.h" 36 #include "pub_core_libcbase.h" 37 #include "pub_core_libcprint.h" 38 #include "pub_core_libcassert.h" 39 #include "pub_core_libcfile.h" 40 #include "pub_core_libcproc.h" 41 #include "pub_core_aspacemgr.h" /* for mmaping debuginfo files */ 42 #include "pub_core_machine.h" /* VG_ELF_CLASS */ 43 #include "pub_core_options.h" 44 #include "pub_core_oset.h" 45 #include "pub_core_tooliface.h" /* VG_(needs) */ 46 #include "pub_core_xarray.h" 47 #include "pub_core_clientstate.h" 48 #include "pub_core_debuginfo.h" 49 50 #include "priv_d3basics.h" 51 #include "priv_misc.h" 52 #include "priv_tytypes.h" 53 #include "priv_storage.h" 54 #include "priv_readmacho.h" 55 #include "priv_readdwarf.h" 56 #include "priv_readdwarf3.h" 57 #include "priv_readstabs.h" 58 59 /* --- !!! --- EXTERNAL HEADERS start --- !!! --- */ 60 #include <mach-o/loader.h> 61 #include <mach-o/nlist.h> 62 #include <mach-o/fat.h> 63 /* --- !!! --- EXTERNAL HEADERS end --- !!! --- */ 64 65 #if VG_WORDSIZE == 4 66 # define MAGIC MH_MAGIC 67 # define MACH_HEADER mach_header 68 # define LC_SEGMENT_CMD LC_SEGMENT 69 # define SEGMENT_COMMAND segment_command 70 # define SECTION section 71 # define NLIST nlist 72 #else 73 # define MAGIC MH_MAGIC_64 74 # define MACH_HEADER mach_header_64 75 # define LC_SEGMENT_CMD LC_SEGMENT_64 76 # define SEGMENT_COMMAND segment_command_64 77 # define SECTION section_64 78 # define NLIST nlist_64 79 #endif 80 81 82 /*------------------------------------------------------------*/ 83 /*--- ---*/ 84 /*--- Mach-O file mapping/unmapping helpers ---*/ 85 /*--- ---*/ 86 /*------------------------------------------------------------*/ 87 88 typedef 89 struct { 90 /* These two describe the entire mapped-in ("primary") image, 91 fat headers, kitchen sink, whatnot: the entire file. The 92 image is mapped into img[0 .. img_szB-1]. */ 93 UChar* img; 94 SizeT img_szB; 95 /* These two describe the Mach-O object of interest, which is 96 presumably somewhere inside the primary image. 97 map_image_aboard() below, which generates this info, will 98 carefully check that the macho_ fields denote a section of 99 memory that falls entirely inside img[0 .. img_szB-1]. */ 100 UChar* macho_img; 101 SizeT macho_img_szB; 102 } 103 ImageInfo; 104 105 106 Bool ML_(is_macho_object_file)( const void* buf, SizeT szB ) 107 { 108 /* (JRS: the Mach-O headers might not be in this mapped data, 109 because we only mapped a page for this initial check, 110 or at least not very much, and what's at the start of the file 111 is in general a so-called fat header. The Mach-O object we're 112 interested in could be arbitrarily far along the image, and so 113 we can't assume its header will fall within this page.) */ 114 115 /* But we can say that either it's a fat object, in which case it 116 begins with a fat header, or it's unadorned Mach-O, in which 117 case it starts with a normal header. At least do what checks we 118 can to establish whether or not we're looking at something 119 sane. */ 120 121 const struct fat_header* fh_be = buf; 122 const struct MACH_HEADER* mh = buf; 123 124 vg_assert(buf); 125 if (szB < sizeof(struct fat_header)) 126 return False; 127 if (VG_(ntohl)(fh_be->magic) == FAT_MAGIC) 128 return True; 129 130 if (szB < sizeof(struct MACH_HEADER)) 131 return False; 132 if (mh->magic == MAGIC) 133 return True; 134 135 return False; 136 } 137 138 139 /* Unmap an image mapped in by map_image_aboard. */ 140 static void unmap_image ( /*MOD*/ImageInfo* ii ) 141 { 142 SysRes sres; 143 vg_assert(ii->img); 144 vg_assert(ii->img_szB > 0); 145 sres = VG_(am_munmap_valgrind)( (Addr)ii->img, ii->img_szB ); 146 /* Do we care if this fails? I suppose so; it would indicate 147 some fairly serious snafu with the mapping of the file. */ 148 vg_assert( !sr_isError(sres) ); 149 VG_(memset)(ii, 0, sizeof(*ii)); 150 } 151 152 153 /* Map a given fat or thin object aboard, find the thin part if 154 necessary, do some checks, and write details of both the fat and 155 thin parts into *ii. Returns False (and leaves the file unmapped) 156 on failure. Guarantees to return pointers to a valid(ish) Mach-O 157 image if it succeeds. */ 158 static Bool map_image_aboard ( DebugInfo* di, /* only for err msgs */ 159 /*OUT*/ImageInfo* ii, UChar* filename ) 160 { 161 VG_(memset)(ii, 0, sizeof(*ii)); 162 163 /* First off, try to map the thing in. */ 164 { SizeT size; 165 SysRes fd, sres; 166 struct vg_stat stat_buf; 167 168 fd = VG_(stat)(filename, &stat_buf); 169 if (sr_isError(fd)) { 170 ML_(symerr)(di, True, "Can't stat image (to determine its size)?!"); 171 return False; 172 } 173 size = stat_buf.size; 174 175 fd = VG_(open)(filename, VKI_O_RDONLY, 0); 176 if (sr_isError(fd)) { 177 ML_(symerr)(di, True, "Can't open image to read symbols?!"); 178 return False; 179 } 180 181 sres = VG_(am_mmap_file_float_valgrind) 182 ( size, VKI_PROT_READ, sr_Res(fd), 0 ); 183 if (sr_isError(sres)) { 184 ML_(symerr)(di, True, "Can't mmap image to read symbols?!"); 185 return False; 186 } 187 188 VG_(close)(sr_Res(fd)); 189 190 ii->img = (UChar*)sr_Res(sres); 191 ii->img_szB = size; 192 } 193 194 /* Now it's mapped in and we have .img and .img_szB set. Look for 195 the embedded Mach-O object. If not findable, unmap and fail. */ 196 { struct fat_header* fh_be; 197 struct fat_header fh; 198 struct MACH_HEADER* mh; 199 200 // Assume initially that we have a thin image, and update 201 // these if it turns out to be fat. 202 ii->macho_img = ii->img; 203 ii->macho_img_szB = ii->img_szB; 204 205 // Check for fat header. 206 if (ii->img_szB < sizeof(struct fat_header)) { 207 ML_(symerr)(di, True, "Invalid Mach-O file (0 too small)."); 208 goto unmap_and_fail; 209 } 210 211 // Fat header is always BIG-ENDIAN 212 fh_be = (struct fat_header *)ii->img; 213 fh.magic = VG_(ntohl)(fh_be->magic); 214 fh.nfat_arch = VG_(ntohl)(fh_be->nfat_arch); 215 if (fh.magic == FAT_MAGIC) { 216 // Look for a good architecture. 217 struct fat_arch *arch_be; 218 struct fat_arch arch; 219 Int f; 220 if (ii->img_szB < sizeof(struct fat_header) 221 + fh.nfat_arch * sizeof(struct fat_arch)) { 222 ML_(symerr)(di, True, "Invalid Mach-O file (1 too small)."); 223 goto unmap_and_fail; 224 } 225 for (f = 0, arch_be = (struct fat_arch *)(fh_be+1); 226 f < fh.nfat_arch; 227 f++, arch_be++) { 228 Int cputype; 229 # if defined(VGA_ppc) 230 cputype = CPU_TYPE_POWERPC; 231 # elif defined(VGA_ppc64) 232 cputype = CPU_TYPE_POWERPC64; 233 # elif defined(VGA_x86) 234 cputype = CPU_TYPE_X86; 235 # elif defined(VGA_amd64) 236 cputype = CPU_TYPE_X86_64; 237 # else 238 # error "unknown architecture" 239 # endif 240 arch.cputype = VG_(ntohl)(arch_be->cputype); 241 arch.cpusubtype = VG_(ntohl)(arch_be->cpusubtype); 242 arch.offset = VG_(ntohl)(arch_be->offset); 243 arch.size = VG_(ntohl)(arch_be->size); 244 if (arch.cputype == cputype) { 245 if (ii->img_szB < arch.offset + arch.size) { 246 ML_(symerr)(di, True, "Invalid Mach-O file (2 too small)."); 247 goto unmap_and_fail; 248 } 249 ii->macho_img = ii->img + arch.offset; 250 ii->macho_img_szB = arch.size; 251 break; 252 } 253 } 254 if (f == fh.nfat_arch) { 255 ML_(symerr)(di, True, 256 "No acceptable architecture found in fat file."); 257 goto unmap_and_fail; 258 } 259 } 260 261 /* Sanity check what we found. */ 262 263 /* assured by logic above */ 264 vg_assert(ii->img_szB >= sizeof(struct fat_header)); 265 266 if (ii->macho_img_szB < sizeof(struct MACH_HEADER)) { 267 ML_(symerr)(di, True, "Invalid Mach-O file (3 too small)."); 268 goto unmap_and_fail; 269 } 270 271 if (ii->macho_img_szB > ii->img_szB) { 272 ML_(symerr)(di, True, "Invalid Mach-O file (thin bigger than fat)."); 273 goto unmap_and_fail; 274 } 275 276 if (ii->macho_img >= ii->img 277 && ii->macho_img + ii->macho_img_szB <= ii->img + ii->img_szB) { 278 /* thin entirely within fat, as expected */ 279 } else { 280 ML_(symerr)(di, True, "Invalid Mach-O file (thin not inside fat)."); 281 goto unmap_and_fail; 282 } 283 284 mh = (struct MACH_HEADER *)ii->macho_img; 285 if (mh->magic != MAGIC) { 286 ML_(symerr)(di, True, "Invalid Mach-O file (bad magic)."); 287 goto unmap_and_fail; 288 } 289 290 if (ii->macho_img_szB < sizeof(struct MACH_HEADER) + mh->sizeofcmds) { 291 ML_(symerr)(di, True, "Invalid Mach-O file (4 too small)."); 292 goto unmap_and_fail; 293 } 294 } 295 296 vg_assert(ii->img); 297 vg_assert(ii->macho_img); 298 vg_assert(ii->img_szB > 0); 299 vg_assert(ii->macho_img_szB > 0); 300 vg_assert(ii->macho_img >= ii->img); 301 vg_assert(ii->macho_img + ii->macho_img_szB <= ii->img + ii->img_szB); 302 return True; /* success */ 303 /*NOTREACHED*/ 304 305 unmap_and_fail: 306 unmap_image(ii); 307 return False; /* bah! */ 308 } 309 310 311 /*------------------------------------------------------------*/ 312 /*--- ---*/ 313 /*--- Mach-O symbol table reading ---*/ 314 /*--- ---*/ 315 /*------------------------------------------------------------*/ 316 317 /* Read a symbol table (nlist). Add the resulting candidate symbols 318 to 'syms'; the caller will post-process them and hand them off to 319 ML_(addSym) itself. */ 320 static 321 void read_symtab( /*OUT*/XArray* /* DiSym */ syms, 322 struct _DebugInfo* di, 323 struct NLIST* o_symtab, UInt o_symtab_count, 324 UChar* o_strtab, UInt o_strtab_sz ) 325 { 326 Int i; 327 Addr sym_addr; 328 DiSym disym; 329 UChar* name; 330 331 static UChar* s_a_t_v = NULL; /* do not make non-static */ 332 333 for (i = 0; i < o_symtab_count; i++) { 334 struct NLIST *nl = o_symtab+i; 335 if ((nl->n_type & N_TYPE) == N_SECT) { 336 sym_addr = di->text_bias + nl->n_value; 337 /*} else if ((nl->n_type & N_TYPE) == N_ABS) { 338 GrP fixme don't ignore absolute symbols? 339 sym_addr = nl->n_value; */ 340 } else { 341 continue; 342 } 343 344 if (di->trace_symtab) 345 VG_(printf)("nlist raw: avma %010lx %s\n", 346 sym_addr, o_strtab + nl->n_un.n_strx ); 347 348 /* If no part of the symbol falls within the mapped range, 349 ignore it. */ 350 if (sym_addr <= di->text_avma 351 || sym_addr >= di->text_avma+di->text_size) { 352 continue; 353 } 354 355 /* skip names which point outside the string table; 356 following these risks segfaulting Valgrind */ 357 name = o_strtab + nl->n_un.n_strx; 358 if (name < o_strtab || name >= o_strtab + o_strtab_sz) 359 continue; 360 361 /* skip nameless symbols; these appear to be common, but 362 useless */ 363 if (*name == 0) 364 continue; 365 366 disym.addr = sym_addr; 367 disym.tocptr = 0; 368 disym.pri_name = ML_(addStr)(di, name, -1); 369 disym.sec_names = NULL; 370 disym.size = // let canonicalize fix it 371 di->text_avma+di->text_size - sym_addr; 372 disym.isText = True; 373 disym.isIFunc = False; 374 // Lots of user function names get prepended with an underscore. Eg. the 375 // function 'f' becomes the symbol '_f'. And the "below main" 376 // function is called "start". So we skip the leading underscore, and 377 // if we see 'start' and --show-below-main=no, we rename it as 378 // "start_according_to_valgrind", which makes it easy to spot later 379 // and display as "(below main)". 380 if (disym.pri_name[0] == '_') { 381 disym.pri_name++; 382 } 383 else if (!VG_(clo_show_below_main) && VG_STREQ(disym.pri_name, "start")) { 384 if (s_a_t_v == NULL) 385 s_a_t_v = ML_(addStr)(di, "start_according_to_valgrind", -1); 386 vg_assert(s_a_t_v); 387 disym.pri_name = s_a_t_v; 388 } 389 390 vg_assert(disym.pri_name); 391 VG_(addToXA)( syms, &disym ); 392 } 393 } 394 395 396 /* Compare DiSyms by their start address, and for equal addresses, use 397 the primary name as a secondary sort key. */ 398 static Int cmp_DiSym_by_start_then_name ( void* v1, void* v2 ) 399 { 400 DiSym* s1 = (DiSym*)v1; 401 DiSym* s2 = (DiSym*)v2; 402 if (s1->addr < s2->addr) return -1; 403 if (s1->addr > s2->addr) return 1; 404 return VG_(strcmp)(s1->pri_name, s2->pri_name); 405 } 406 407 /* 'cand' is a bunch of candidate symbols obtained by reading 408 nlist-style symbol table entries. Their ends may overlap, so sort 409 them and truncate them accordingly. The code in this routine is 410 copied almost verbatim from read_symbol_table() in readxcoff.c. */ 411 static void tidy_up_cand_syms ( /*MOD*/XArray* /* of DiSym */ syms, 412 Bool trace_symtab ) 413 { 414 Word nsyms, i, j, k, m; 415 416 nsyms = VG_(sizeXA)(syms); 417 418 VG_(setCmpFnXA)(syms, cmp_DiSym_by_start_then_name); 419 VG_(sortXA)(syms); 420 421 /* We only know for sure the start addresses (actual VMAs) of 422 symbols, and an overestimation of their end addresses. So sort 423 by start address, then clip each symbol so that its end address 424 does not overlap with the next one along. 425 426 There is a small refinement: if a group of symbols have the same 427 address, treat them as a group: find the next symbol along that 428 has a higher start address, and clip all of the group 429 accordingly. This clips the group as a whole so as not to 430 overlap following symbols. This leaves prefersym() in 431 storage.c, which is not nlist-specific, to later decide which of 432 the symbols in the group to keep. 433 434 Another refinement is that we need to get rid of symbols which, 435 after clipping, have identical starts, ends, and names. So the 436 sorting uses the name as a secondary key. 437 */ 438 439 for (i = 0; i < nsyms; i++) { 440 for (k = i+1; 441 k < nsyms 442 && ((DiSym*)VG_(indexXA)(syms,i))->addr 443 == ((DiSym*)VG_(indexXA)(syms,k))->addr; 444 k++) 445 ; 446 /* So now [i .. k-1] is a group all with the same start address. 447 Clip their ending addresses so they don't overlap [k]. In 448 the normal case (no overlaps), k == i+1. */ 449 if (k < nsyms) { 450 DiSym* next = (DiSym*)VG_(indexXA)(syms,k); 451 for (m = i; m < k; m++) { 452 DiSym* here = (DiSym*)VG_(indexXA)(syms,m); 453 vg_assert(here->addr < next->addr); 454 if (here->addr + here->size > next->addr) 455 here->size = next->addr - here->addr; 456 } 457 } 458 i = k-1; 459 vg_assert(i <= nsyms); 460 } 461 462 j = 0; 463 if (nsyms > 0) { 464 j = 1; 465 for (i = 1; i < nsyms; i++) { 466 DiSym *s_j1, *s_j, *s_i; 467 vg_assert(j <= i); 468 s_j1 = (DiSym*)VG_(indexXA)(syms, j-1); 469 s_j = (DiSym*)VG_(indexXA)(syms, j); 470 s_i = (DiSym*)VG_(indexXA)(syms, i); 471 if (s_i->addr != s_j1->addr 472 || s_i->size != s_j1->size 473 || 0 != VG_(strcmp)(s_i->pri_name, s_j1->pri_name)) { 474 *s_j = *s_i; 475 j++; 476 } else { 477 if (trace_symtab) 478 VG_(printf)("nlist cleanup: dump duplicate avma %010lx %s\n", 479 s_i->addr, s_i->pri_name ); 480 } 481 } 482 } 483 vg_assert(j >= 0 && j <= nsyms); 484 VG_(dropTailXA)(syms, nsyms - j); 485 } 486 487 488 /*------------------------------------------------------------*/ 489 /*--- ---*/ 490 /*--- Mach-O top-level processing ---*/ 491 /*--- ---*/ 492 /*------------------------------------------------------------*/ 493 494 #if !defined(APPLE_DSYM_EXT_AND_SUBDIRECTORY) 495 #define APPLE_DSYM_EXT_AND_SUBDIRECTORY ".dSYM/Contents/Resources/DWARF/" 496 #endif 497 498 499 static Bool file_exists_p(const Char *path) 500 { 501 struct vg_stat sbuf; 502 SysRes res = VG_(stat)(path, &sbuf); 503 return sr_isError(res) ? False : True; 504 } 505 506 507 /* Search for an existing dSYM file as a possible separate debug file. 508 Adapted from gdb. */ 509 static Char * 510 find_separate_debug_file (const Char *executable_name) 511 { 512 Char *basename_str; 513 Char *dot_ptr; 514 Char *slash_ptr; 515 Char *dsymfile; 516 517 /* Make sure the object file name itself doesn't contain ".dSYM" in it or we 518 will end up with an infinite loop where after we add a dSYM symbol file, 519 it will then enter this function asking if there is a debug file for the 520 dSYM file itself. */ 521 if (VG_(strcasestr) (executable_name, ".dSYM") == NULL) 522 { 523 /* Check for the existence of a .dSYM file for a given executable. */ 524 basename_str = VG_(basename) (executable_name); 525 dsymfile = ML_(dinfo_zalloc)("di.readmacho.dsymfile", 526 VG_(strlen) (executable_name) 527 + VG_(strlen) (APPLE_DSYM_EXT_AND_SUBDIRECTORY) 528 + VG_(strlen) (basename_str) 529 + 1 530 ); 531 532 /* First try for the dSYM in the same directory as the original file. */ 533 VG_(strcpy) (dsymfile, executable_name); 534 VG_(strcat) (dsymfile, APPLE_DSYM_EXT_AND_SUBDIRECTORY); 535 VG_(strcat) (dsymfile, basename_str); 536 537 if (file_exists_p (dsymfile)) 538 return dsymfile; 539 540 /* Now search for any parent directory that has a '.' in it so we can find 541 Mac OS X applications, bundles, plugins, and any other kinds of files. 542 Mac OS X application bundles wil have their program in 543 "/some/path/MyApp.app/Contents/MacOS/MyApp" (or replace ".app" with 544 ".bundle" or ".plugin" for other types of bundles). So we look for any 545 prior '.' character and try appending the apple dSYM extension and 546 subdirectory and see if we find an existing dSYM file (in the above 547 MyApp example the dSYM would be at either: 548 "/some/path/MyApp.app.dSYM/Contents/Resources/DWARF/MyApp" or 549 "/some/path/MyApp.dSYM/Contents/Resources/DWARF/MyApp". */ 550 VG_(strcpy) (dsymfile, VG_(dirname) (executable_name)); 551 while ((dot_ptr = VG_(strrchr) (dsymfile, '.'))) 552 { 553 /* Find the directory delimiter that follows the '.' character since 554 we now look for a .dSYM that follows any bundle extension. */ 555 slash_ptr = VG_(strchr) (dot_ptr, '/'); 556 if (slash_ptr) 557 { 558 /* NULL terminate the string at the '/' character and append 559 the path down to the dSYM file. */ 560 *slash_ptr = '\0'; 561 VG_(strcat) (slash_ptr, APPLE_DSYM_EXT_AND_SUBDIRECTORY); 562 VG_(strcat) (slash_ptr, basename_str); 563 if (file_exists_p (dsymfile)) 564 return dsymfile; 565 } 566 567 /* NULL terminate the string at the '.' character and append 568 the path down to the dSYM file. */ 569 *dot_ptr = '\0'; 570 VG_(strcat) (dot_ptr, APPLE_DSYM_EXT_AND_SUBDIRECTORY); 571 VG_(strcat) (dot_ptr, basename_str); 572 if (file_exists_p (dsymfile)) 573 return dsymfile; 574 575 /* NULL terminate the string at the '.' locatated by the strrchr() 576 function again. */ 577 *dot_ptr = '\0'; 578 579 /* We found a previous extension '.' character and did not find a 580 dSYM file so now find previous directory delimiter so we don't 581 try multiple times on a file name that may have a version number 582 in it such as "/some/path/MyApp.6.0.4.app". */ 583 slash_ptr = VG_(strrchr) (dsymfile, '/'); 584 if (!slash_ptr) 585 break; 586 /* NULL terminate the string at the previous directory character 587 and search again. */ 588 *slash_ptr = '\0'; 589 } 590 } 591 592 return NULL; 593 } 594 595 596 static UChar *getsectdata(UChar* base, SizeT size, 597 Char *segname, Char *sectname, 598 /*OUT*/Word *sect_size) 599 { 600 struct MACH_HEADER *mh = (struct MACH_HEADER *)base; 601 struct load_command *cmd; 602 Int c; 603 604 for (c = 0, cmd = (struct load_command *)(mh+1); 605 c < mh->ncmds; 606 c++, cmd = (struct load_command *)(cmd->cmdsize + (Addr)cmd)) 607 { 608 if (cmd->cmd == LC_SEGMENT_CMD) { 609 struct SEGMENT_COMMAND *seg = (struct SEGMENT_COMMAND *)cmd; 610 if (0 == VG_(strncmp(seg->segname, segname, sizeof(seg->segname)))) { 611 struct SECTION *sects = (struct SECTION *)(seg+1); 612 Int s; 613 for (s = 0; s < seg->nsects; s++) { 614 if (0 == VG_(strncmp(sects[s].sectname, sectname, 615 sizeof(sects[s].sectname)))) 616 { 617 if (sect_size) *sect_size = sects[s].size; 618 return (UChar *)(base + sects[s].offset); 619 } 620 } 621 } 622 } 623 } 624 625 if (sect_size) *sect_size = 0; 626 return 0; 627 } 628 629 630 /* Brute force just simply search for uuid[0..15] in img[0..n_img-1] */ 631 static Bool check_uuid_matches ( Addr imgA, Word n_img, UChar* uuid ) 632 { 633 Word i; 634 UChar* img = (UChar*)imgA; 635 UChar first = uuid[0]; 636 if (n_img < 16) 637 return False; 638 for (i = 0; i < n_img-16; i++) { 639 if (img[i] != first) 640 continue; 641 if (0 == VG_(memcmp)( &img[i], &uuid[0], 16 )) 642 return True; 643 } 644 return False; 645 } 646 647 648 /* Heuristic kludge: return True if this looks like an installed 649 standard library; hence we shouldn't consider automagically running 650 dsymutil on it. */ 651 static Bool is_systemish_library_name ( UChar* name ) 652 { 653 vg_assert(name); 654 if (0 == VG_(strncasecmp)(name, "/usr/", 5) 655 || 0 == VG_(strncasecmp)(name, "/bin/", 5) 656 || 0 == VG_(strncasecmp)(name, "/sbin/", 6) 657 || 0 == VG_(strncasecmp)(name, "/opt/", 5) 658 || 0 == VG_(strncasecmp)(name, "/sw/", 4) 659 || 0 == VG_(strncasecmp)(name, "/System/", 8) 660 || 0 == VG_(strncasecmp)(name, "/Library/", 9) 661 || 0 == VG_(strncasecmp)(name, "/Applications/", 14)) { 662 return True; 663 } else { 664 return False; 665 } 666 } 667 668 669 Bool ML_(read_macho_debug_info)( struct _DebugInfo* di ) 670 { 671 struct symtab_command *symcmd = NULL; 672 struct dysymtab_command *dysymcmd = NULL; 673 HChar* dsymfilename = NULL; 674 Bool have_uuid = False; 675 UChar uuid[16]; 676 ImageInfo ii; /* main file */ 677 ImageInfo iid; /* auxiliary .dSYM file */ 678 Bool ok; 679 680 /* mmap the object file to look for di->soname and di->text_bias 681 and uuid and nlist and STABS */ 682 683 if (VG_(clo_verbosity) > 1) 684 VG_(message)(Vg_DebugMsg, 685 "%s (%#lx)\n", di->fsm.filename, di->fsm.rx_map_avma ); 686 687 /* This should be ensured by our caller (that we're in the accept 688 state). */ 689 vg_assert(di->fsm.have_rx_map); 690 vg_assert(di->fsm.have_rw_map); 691 692 VG_(memset)(&ii, 0, sizeof(ii)); 693 VG_(memset)(&iid, 0, sizeof(iid)); 694 VG_(memset)(&uuid, 0, sizeof(uuid)); 695 696 ok = map_image_aboard( di, &ii, di->fsm.filename ); 697 if (!ok) goto fail; 698 699 vg_assert(ii.macho_img != NULL && ii.macho_img_szB > 0); 700 701 /* Poke around in the Mach-O header, to find some important 702 stuff. */ 703 // Find LC_SYMTAB and LC_DYSYMTAB, if present. 704 // Read di->soname from LC_ID_DYLIB if present, 705 // or from LC_ID_DYLINKER if present, 706 // or use "NONE". 707 // Get di->text_bias (aka slide) based on the corresponding LC_SEGMENT 708 // Get uuid for later dsym search 709 710 di->text_bias = 0; 711 712 { struct MACH_HEADER *mh = (struct MACH_HEADER *)ii.macho_img; 713 struct load_command *cmd; 714 Int c; 715 716 for (c = 0, cmd = (struct load_command *)(mh+1); 717 c < mh->ncmds; 718 c++, cmd = (struct load_command *)(cmd->cmdsize 719 + (unsigned long)cmd)) { 720 if (cmd->cmd == LC_SYMTAB) { 721 symcmd = (struct symtab_command *)cmd; 722 } 723 else if (cmd->cmd == LC_DYSYMTAB) { 724 dysymcmd = (struct dysymtab_command *)cmd; 725 } 726 else if (cmd->cmd == LC_ID_DYLIB && mh->filetype == MH_DYLIB) { 727 // GrP fixme bundle? 728 struct dylib_command *dcmd = (struct dylib_command *)cmd; 729 UChar *dylibname = dcmd->dylib.name.offset + (UChar *)dcmd; 730 UChar *soname = VG_(strrchr)(dylibname, '/'); 731 if (!soname) soname = dylibname; 732 else soname++; 733 di->soname = ML_(dinfo_strdup)("di.readmacho.dylibname", 734 soname); 735 } 736 else if (cmd->cmd==LC_ID_DYLINKER && mh->filetype==MH_DYLINKER) { 737 struct dylinker_command *dcmd = (struct dylinker_command *)cmd; 738 UChar *dylinkername = dcmd->name.offset + (UChar *)dcmd; 739 UChar *soname = VG_(strrchr)(dylinkername, '/'); 740 if (!soname) soname = dylinkername; 741 else soname++; 742 di->soname = ML_(dinfo_strdup)("di.readmacho.dylinkername", 743 soname); 744 } 745 746 // A comment from Julian about why varinfo[35] fail: 747 // 748 // My impression is, from comparing the output of otool -l for these 749 // executables with the logic in ML_(read_macho_debug_info), 750 // specifically the part that begins "else if (cmd->cmd == 751 // LC_SEGMENT_CMD) {", that it's a complete hack which just happens 752 // to work ok for text symbols. In particular, it appears to assume 753 // that in a "struct load_command" of type LC_SEGMENT_CMD, the first 754 // "struct SEGMENT_COMMAND" inside it is going to contain the info we 755 // need. However, otool -l shows, and also the Apple docs state, 756 // that a struct load_command may contain an arbitrary number of 757 // struct SEGMENT_COMMANDs, so I'm not sure why it's OK to merely 758 // snarf the first. But I'm not sure about this. 759 // 760 // The "Try for __DATA" block below simply adds acquisition of data 761 // svma/bias values using the same assumption. It also needs 762 // (probably) to deal with bss sections, but I don't understand how 763 // this all ties together really, so it requires further study. 764 // 765 // If you can get your head around the relationship between MachO 766 // segments, sections and load commands, this might be relatively 767 // easy to fix properly. 768 // 769 // Basically we need to come up with plausible numbers for di-> 770 // {text,data,bss}_{avma,svma}, from which the _bias numbers are 771 // then trivially derived. Then I think the debuginfo reader should 772 // work pretty well. 773 else if (cmd->cmd == LC_SEGMENT_CMD) { 774 struct SEGMENT_COMMAND *seg = (struct SEGMENT_COMMAND *)cmd; 775 /* Try for __TEXT */ 776 if (!di->text_present 777 && 0 == VG_(strcmp)(seg->segname, "__TEXT") 778 /* DDD: is the next line a kludge? -- JRS */ 779 && seg->fileoff == 0 && seg->filesize != 0) { 780 di->text_present = True; 781 di->text_svma = (Addr)seg->vmaddr; 782 di->text_avma = di->fsm.rx_map_avma; 783 di->text_size = seg->vmsize; 784 di->text_bias = di->text_avma - di->text_svma; 785 /* Make the _debug_ values be the same as the 786 svma/bias for the primary object, since there is 787 no secondary (debuginfo) object, but nevertheless 788 downstream biasing of Dwarf3 relies on the 789 _debug_ values. */ 790 di->text_debug_svma = di->text_svma; 791 di->text_debug_bias = di->text_bias; 792 } 793 /* Try for __DATA */ 794 if (!di->data_present 795 && 0 == VG_(strcmp)(seg->segname, "__DATA") 796 /* && DDD:seg->fileoff == 0 */ && seg->filesize != 0) { 797 di->data_present = True; 798 di->data_svma = (Addr)seg->vmaddr; 799 di->data_avma = di->fsm.rw_map_avma; 800 di->data_size = seg->vmsize; 801 di->data_bias = di->data_avma - di->data_svma; 802 di->data_debug_svma = di->data_svma; 803 di->data_debug_bias = di->data_bias; 804 } 805 } 806 else if (cmd->cmd == LC_UUID) { 807 struct uuid_command *uuid_cmd = (struct uuid_command *)cmd; 808 VG_(memcpy)(uuid, uuid_cmd->uuid, sizeof(uuid)); 809 have_uuid = True; 810 } 811 } 812 } 813 814 if (!di->soname) { 815 di->soname = ML_(dinfo_strdup)("di.readmacho.noname", "NONE"); 816 } 817 818 if (di->trace_symtab) { 819 VG_(printf)("\n"); 820 VG_(printf)("SONAME = %s\n", di->soname); 821 VG_(printf)("\n"); 822 } 823 824 /* Now we have the base object to hand. Read symbols from it. */ 825 826 if (ii.macho_img && ii.macho_img_szB > 0 && symcmd && dysymcmd) { 827 828 /* Read nlist symbol table */ 829 struct NLIST *syms; 830 UChar *strs; 831 XArray* /* DiSym */ candSyms = NULL; 832 Word i, nCandSyms; 833 834 if (ii.macho_img_szB < symcmd->stroff + symcmd->strsize 835 || ii.macho_img_szB < symcmd->symoff + symcmd->nsyms 836 * sizeof(struct NLIST)) { 837 ML_(symerr)(di, False, "Invalid Mach-O file (5 too small)."); 838 goto fail; 839 } 840 if (dysymcmd->ilocalsym + dysymcmd->nlocalsym > symcmd->nsyms 841 || dysymcmd->iextdefsym + dysymcmd->nextdefsym > symcmd->nsyms) { 842 ML_(symerr)(di, False, "Invalid Mach-O file (bad symbol table)."); 843 goto fail; 844 } 845 846 syms = (struct NLIST *)(ii.macho_img + symcmd->symoff); 847 strs = (UChar *)(ii.macho_img + symcmd->stroff); 848 849 if (VG_(clo_verbosity) > 1) 850 VG_(message)(Vg_DebugMsg, 851 " reading syms from primary file (%d %d)\n", 852 dysymcmd->nextdefsym, dysymcmd->nlocalsym ); 853 854 /* Read candidate symbols into 'candSyms', so we can truncate 855 overlapping ends and generally tidy up, before presenting 856 them to ML_(addSym). */ 857 candSyms = VG_(newXA)( 858 ML_(dinfo_zalloc), "di.readmacho.candsyms.1", 859 ML_(dinfo_free), sizeof(DiSym) 860 ); 861 vg_assert(candSyms); 862 863 // extern symbols 864 read_symtab(candSyms, 865 di, 866 syms + dysymcmd->iextdefsym, dysymcmd->nextdefsym, 867 strs, symcmd->strsize); 868 // static and private_extern symbols 869 read_symtab(candSyms, 870 di, 871 syms + dysymcmd->ilocalsym, dysymcmd->nlocalsym, 872 strs, symcmd->strsize); 873 874 /* tidy up the cand syms -- trim overlapping ends. May resize 875 candSyms. */ 876 tidy_up_cand_syms( candSyms, di->trace_symtab ); 877 878 /* and finally present them to ML_(addSym) */ 879 nCandSyms = VG_(sizeXA)( candSyms ); 880 for (i = 0; i < nCandSyms; i++) { 881 DiSym* cand = (DiSym*) VG_(indexXA)( candSyms, i ); 882 vg_assert(cand->pri_name != NULL); 883 vg_assert(cand->sec_names == NULL); 884 if (di->trace_symtab) 885 VG_(printf)("nlist final: acquire avma %010lx-%010lx %s\n", 886 cand->addr, cand->addr + cand->size - 1, 887 cand->pri_name ); 888 ML_(addSym)( di, cand ); 889 } 890 VG_(deleteXA)( candSyms ); 891 } 892 893 /* If there's no UUID in the primary, don't even bother to try and 894 read any DWARF, since we won't be able to verify it matches. 895 Our policy is not to load debug info unless we can verify that 896 it matches the primary. Just declare success at this point. 897 And don't complain to the user, since that would cause us to 898 complain on objects compiled without -g. (Some versions of 899 XCode are observed to omit a UUID entry for object linked(?) 900 without -g. Others don't appear to omit it.) */ 901 if (!have_uuid) 902 goto success; 903 904 /* mmap the dSYM file to look for DWARF debug info. If successful, 905 use the .macho_img and .macho_img_szB in iid. */ 906 907 dsymfilename = find_separate_debug_file( di->fsm.filename ); 908 909 /* Try to load it. */ 910 if (dsymfilename) { 911 Bool valid; 912 913 if (VG_(clo_verbosity) > 1) 914 VG_(message)(Vg_DebugMsg, " dSYM= %s\n", dsymfilename); 915 916 ok = map_image_aboard( di, &iid, dsymfilename ); 917 if (!ok) goto fail; 918 919 /* check it has the right uuid. */ 920 vg_assert(have_uuid); 921 valid = iid.macho_img && iid.macho_img_szB > 0 922 && check_uuid_matches( (Addr)iid.macho_img, 923 iid.macho_img_szB, uuid ); 924 if (valid) 925 goto read_the_dwarf; 926 927 if (VG_(clo_verbosity) > 1) 928 VG_(message)(Vg_DebugMsg, " dSYM does not have " 929 "correct UUID (out of date?)\n"); 930 } 931 932 /* There was no dsym file, or it doesn't match. We'll have to try 933 regenerating it, unless --dsymutil=no, in which case just complain 934 instead. */ 935 936 /* If this looks like a lib that we shouldn't run dsymutil on, just 937 give up. (possible reasons: is system lib, or in /usr etc, or 938 the dsym dir would not be writable by the user, or we're running 939 as root) */ 940 vg_assert(di->fsm.filename); 941 if (is_systemish_library_name(di->fsm.filename)) 942 goto success; 943 944 if (!VG_(clo_dsymutil)) { 945 if (VG_(clo_verbosity) == 1) { 946 VG_(message)(Vg_DebugMsg, "%s:\n", di->fsm.filename); 947 } 948 if (VG_(clo_verbosity) > 0) 949 VG_(message)(Vg_DebugMsg, "%sdSYM directory %s; consider using " 950 "--dsymutil=yes\n", 951 VG_(clo_verbosity) > 1 ? " " : "", 952 dsymfilename ? "has wrong UUID" : "is missing"); 953 goto success; 954 } 955 956 /* Run dsymutil */ 957 958 { Int r; 959 HChar* dsymutil = "/usr/bin/dsymutil "; 960 HChar* cmd = ML_(dinfo_zalloc)( "di.readmacho.tmp1", 961 VG_(strlen)(dsymutil) 962 + VG_(strlen)(di->fsm.filename) 963 + 32 /* misc */ ); 964 VG_(strcpy)(cmd, dsymutil); 965 if (0) VG_(strcat)(cmd, "--verbose "); 966 VG_(strcat)(cmd, "\""); 967 VG_(strcat)(cmd, di->fsm.filename); 968 VG_(strcat)(cmd, "\""); 969 VG_(message)(Vg_DebugMsg, "run: %s\n", cmd); 970 r = VG_(system)( cmd ); 971 if (r) 972 VG_(message)(Vg_DebugMsg, "run: %s FAILED\n", dsymutil); 973 ML_(dinfo_free)(cmd); 974 dsymfilename = find_separate_debug_file(di->fsm.filename); 975 } 976 977 /* Try again to load it. */ 978 if (dsymfilename) { 979 Bool valid; 980 981 if (VG_(clo_verbosity) > 1) 982 VG_(message)(Vg_DebugMsg, " dsyms= %s\n", dsymfilename); 983 984 ok = map_image_aboard( di, &iid, dsymfilename ); 985 if (!ok) goto fail; 986 987 /* check it has the right uuid. */ 988 vg_assert(have_uuid); 989 valid = iid.macho_img && iid.macho_img_szB > 0 990 && check_uuid_matches( (Addr)iid.macho_img, 991 iid.macho_img_szB, uuid ); 992 if (!valid) { 993 if (VG_(clo_verbosity) > 0) { 994 VG_(message)(Vg_DebugMsg, 995 "WARNING: did not find expected UUID %02X%02X%02X%02X" 996 "-%02X%02X-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X" 997 " in dSYM dir\n", 998 (UInt)uuid[0], (UInt)uuid[1], (UInt)uuid[2], (UInt)uuid[3], 999 (UInt)uuid[4], (UInt)uuid[5], (UInt)uuid[6], (UInt)uuid[7], 1000 (UInt)uuid[8], (UInt)uuid[9], (UInt)uuid[10], 1001 (UInt)uuid[11], (UInt)uuid[12], (UInt)uuid[13], 1002 (UInt)uuid[14], (UInt)uuid[15] ); 1003 VG_(message)(Vg_DebugMsg, 1004 "WARNING: for %s\n", di->fsm.filename); 1005 } 1006 unmap_image( &iid ); 1007 /* unmap_image zeroes the fields, so the following test makes 1008 sense. */ 1009 goto fail; 1010 } 1011 } 1012 1013 /* Right. Finally we have our best try at the dwarf image, so go 1014 on to reading stuff out of it. */ 1015 1016 read_the_dwarf: 1017 if (iid.macho_img && iid.macho_img_szB > 0) { 1018 UChar* debug_info_img = NULL; 1019 Word debug_info_sz; 1020 UChar* debug_abbv_img; 1021 Word debug_abbv_sz; 1022 UChar* debug_line_img; 1023 Word debug_line_sz; 1024 UChar* debug_str_img; 1025 Word debug_str_sz; 1026 UChar* debug_ranges_img; 1027 Word debug_ranges_sz; 1028 UChar* debug_loc_img; 1029 Word debug_loc_sz; 1030 UChar* debug_name_img; 1031 Word debug_name_sz; 1032 1033 debug_info_img = 1034 getsectdata(iid.macho_img, iid.macho_img_szB, 1035 "__DWARF", "__debug_info", &debug_info_sz); 1036 debug_abbv_img = 1037 getsectdata(iid.macho_img, iid.macho_img_szB, 1038 "__DWARF", "__debug_abbrev", &debug_abbv_sz); 1039 debug_line_img = 1040 getsectdata(iid.macho_img, iid.macho_img_szB, 1041 "__DWARF", "__debug_line", &debug_line_sz); 1042 debug_str_img = 1043 getsectdata(iid.macho_img, iid.macho_img_szB, 1044 "__DWARF", "__debug_str", &debug_str_sz); 1045 debug_ranges_img = 1046 getsectdata(iid.macho_img, iid.macho_img_szB, 1047 "__DWARF", "__debug_ranges", &debug_ranges_sz); 1048 debug_loc_img = 1049 getsectdata(iid.macho_img, iid.macho_img_szB, 1050 "__DWARF", "__debug_loc", &debug_loc_sz); 1051 debug_name_img = 1052 getsectdata(iid.macho_img, iid.macho_img_szB, 1053 "__DWARF", "__debug_pubnames", &debug_name_sz); 1054 1055 if (debug_info_img) { 1056 if (VG_(clo_verbosity) > 1) { 1057 if (0) 1058 VG_(message)(Vg_DebugMsg, 1059 "Reading dwarf3 for %s (%#lx) from %s" 1060 " (%ld %ld %ld %ld %ld %ld)\n", 1061 di->fsm.filename, di->text_avma, dsymfilename, 1062 debug_info_sz, debug_abbv_sz, debug_line_sz, 1063 debug_str_sz, debug_ranges_sz, debug_loc_sz 1064 ); 1065 VG_(message)(Vg_DebugMsg, 1066 " reading dwarf3 from dsyms file\n"); 1067 } 1068 /* The old reader: line numbers and unwind info only */ 1069 ML_(read_debuginfo_dwarf3) ( di, 1070 debug_info_img, debug_info_sz, 1071 debug_abbv_img, debug_abbv_sz, 1072 debug_line_img, debug_line_sz, 1073 debug_str_img, debug_str_sz ); 1074 1075 /* The new reader: read the DIEs in .debug_info to acquire 1076 information on variable types and locations. But only if 1077 the tool asks for it, or the user requests it on the 1078 command line. */ 1079 if (VG_(needs).var_info /* the tool requires it */ 1080 || VG_(clo_read_var_info) /* the user asked for it */) { 1081 ML_(new_dwarf3_reader)( 1082 di, debug_info_img, debug_info_sz, 1083 debug_abbv_img, debug_abbv_sz, 1084 debug_line_img, debug_line_sz, 1085 debug_str_img, debug_str_sz, 1086 debug_ranges_img, debug_ranges_sz, 1087 debug_loc_img, debug_loc_sz 1088 ); 1089 } 1090 } 1091 } 1092 1093 if (dsymfilename) ML_(dinfo_free)(dsymfilename); 1094 1095 success: 1096 if (ii.img) 1097 unmap_image(&ii); 1098 if (iid.img) 1099 unmap_image(&iid); 1100 return True; 1101 1102 /* NOTREACHED */ 1103 1104 fail: 1105 ML_(symerr)(di, True, "Error reading Mach-O object."); 1106 if (ii.img) 1107 unmap_image(&ii); 1108 if (iid.img) 1109 unmap_image(&iid); 1110 return False; 1111 } 1112 1113 #endif // defined(VGO_darwin) 1114 1115 /*--------------------------------------------------------------------*/ 1116 /*--- end ---*/ 1117 /*--------------------------------------------------------------------*/ 1118