1 2 /*--------------------------------------------------------------------*/ 3 /*--- Reading of syms & debug info from Mach-O files. ---*/ 4 /*--- readmacho.c ---*/ 5 /*--------------------------------------------------------------------*/ 6 7 /* 8 This file is part of Valgrind, a dynamic binary instrumentation 9 framework. 10 11 Copyright (C) 2005-2013 Apple Inc. 12 Greg Parker gparker (at) apple.com 13 14 This program is free software; you can redistribute it and/or 15 modify it under the terms of the GNU General Public License as 16 published by the Free Software Foundation; either version 2 of the 17 License, or (at your option) any later version. 18 19 This program is distributed in the hope that it will be useful, but 20 WITHOUT ANY WARRANTY; without even the implied warranty of 21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 General Public License for more details. 23 24 You should have received a copy of the GNU General Public License 25 along with this program; if not, write to the Free Software 26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 27 02111-1307, USA. 28 29 The GNU General Public License is contained in the file COPYING. 30 */ 31 32 #if defined(VGO_darwin) 33 34 #include "pub_core_basics.h" 35 #include "pub_core_vki.h" 36 #include "pub_core_libcbase.h" 37 #include "pub_core_libcprint.h" 38 #include "pub_core_libcassert.h" 39 #include "pub_core_libcfile.h" 40 #include "pub_core_libcproc.h" 41 #include "pub_core_aspacemgr.h" /* for mmaping debuginfo files */ 42 #include "pub_core_machine.h" /* VG_ELF_CLASS */ 43 #include "pub_core_options.h" 44 #include "pub_core_oset.h" 45 #include "pub_core_tooliface.h" /* VG_(needs) */ 46 #include "pub_core_xarray.h" 47 #include "pub_core_clientstate.h" 48 #include "pub_core_debuginfo.h" 49 50 #include "priv_misc.h" 51 #include "priv_image.h" 52 #include "priv_d3basics.h" 53 #include "priv_tytypes.h" 54 #include "priv_storage.h" 55 #include "priv_readmacho.h" 56 #include "priv_readdwarf.h" 57 #include "priv_readdwarf3.h" 58 #include "priv_readstabs.h" 59 60 /* --- !!! --- EXTERNAL HEADERS start --- !!! --- */ 61 #include <mach-o/loader.h> 62 #include <mach-o/nlist.h> 63 #include <mach-o/fat.h> 64 /* --- !!! --- EXTERNAL HEADERS end --- !!! --- */ 65 66 #if VG_WORDSIZE == 4 67 # define MAGIC MH_MAGIC 68 # define MACH_HEADER mach_header 69 # define LC_SEGMENT_CMD LC_SEGMENT 70 # define SEGMENT_COMMAND segment_command 71 # define SECTION section 72 # define NLIST nlist 73 #else 74 # define MAGIC MH_MAGIC_64 75 # define MACH_HEADER mach_header_64 76 # define LC_SEGMENT_CMD LC_SEGMENT_64 77 # define SEGMENT_COMMAND segment_command_64 78 # define SECTION section_64 79 # define NLIST nlist_64 80 #endif 81 82 83 /*------------------------------------------------------------*/ 84 /*--- ---*/ 85 /*--- Mach-O file mapping/unmapping helpers ---*/ 86 /*--- ---*/ 87 /*------------------------------------------------------------*/ 88 89 /* A DiSlice is used to handle the thin/fat distinction for MachO images. 90 (1) the entire mapped-in ("primary") image, fat headers, kitchen sink, 91 whatnot: the entire file. This is the DiImage* that is the backing 92 for the DiSlice. 93 (2) the Mach-O object of interest, which is presumably somewhere inside 94 the primary image. map_image_aboard() below, which generates this 95 info, will carefully check that the macho_ fields denote a section of 96 memory that falls entirely inside the primary image. 97 */ 98 99 Bool ML_(is_macho_object_file)( const void* buf, SizeT szB ) 100 { 101 /* (JRS: the Mach-O headers might not be in this mapped data, 102 because we only mapped a page for this initial check, 103 or at least not very much, and what's at the start of the file 104 is in general a so-called fat header. The Mach-O object we're 105 interested in could be arbitrarily far along the image, and so 106 we can't assume its header will fall within this page.) */ 107 108 /* But we can say that either it's a fat object, in which case it 109 begins with a fat header, or it's unadorned Mach-O, in which 110 case it starts with a normal header. At least do what checks we 111 can to establish whether or not we're looking at something 112 sane. */ 113 114 const struct fat_header* fh_be = buf; 115 const struct MACH_HEADER* mh = buf; 116 117 vg_assert(buf); 118 if (szB < sizeof(struct fat_header)) 119 return False; 120 if (VG_(ntohl)(fh_be->magic) == FAT_MAGIC) 121 return True; 122 123 if (szB < sizeof(struct MACH_HEADER)) 124 return False; 125 if (mh->magic == MAGIC) 126 return True; 127 128 return False; 129 } 130 131 132 /* Unmap an image mapped in by map_image_aboard. */ 133 static void unmap_image ( /*MOD*/DiSlice* sli ) 134 { 135 vg_assert(sli); 136 if (ML_(sli_is_valid)(*sli)) { 137 ML_(img_done)(sli->img); 138 *sli = DiSlice_INVALID; 139 } 140 } 141 142 143 /* Open the given file, find the thin part if necessary, do some 144 checks, and return a DiSlice containing details of both the thin 145 part and (implicitly, via the contained DiImage*) the fat part. 146 returns DiSlice_INVALID if it fails. If it succeeds, the returned 147 slice is guaranteed to refer to a valid(ish) Mach-O image. */ 148 static DiSlice map_image_aboard ( DebugInfo* di, /* only for err msgs */ 149 const HChar* filename ) 150 { 151 DiSlice sli = DiSlice_INVALID; 152 153 /* First off, try to map the thing in. */ 154 DiImage* mimg = ML_(img_from_local_file)(filename); 155 if (mimg == NULL) { 156 VG_(message)(Vg_UserMsg, "warning: connection to image %s failed\n", 157 filename ); 158 VG_(message)(Vg_UserMsg, " no symbols or debug info loaded\n" ); 159 return DiSlice_INVALID; 160 } 161 162 /* Now we have a viable DiImage* for it. Look for the embedded 163 Mach-O object. If not findable, close the image and fail. */ 164 DiOffT fh_be_ioff = 0; 165 struct fat_header fh_be; 166 struct fat_header fh; 167 168 // Assume initially that we have a thin image, and narrow 169 // the bounds if it turns out to be fat. This stores |mimg| as 170 // |sli.img|, so NULL out |mimg| after this point, for the sake of 171 // clarity. 172 sli = ML_(sli_from_img)(mimg); 173 mimg = NULL; 174 175 // Check for fat header. 176 if (ML_(img_size)(sli.img) < sizeof(struct fat_header)) { 177 ML_(symerr)(di, True, "Invalid Mach-O file (0 too small)."); 178 goto close_and_fail; 179 } 180 181 // Fat header is always BIG-ENDIAN 182 ML_(img_get)(&fh_be, sli.img, fh_be_ioff, sizeof(fh_be)); 183 VG_(memset)(&fh, 0, sizeof(fh)); 184 fh.magic = VG_(ntohl)(fh_be.magic); 185 fh.nfat_arch = VG_(ntohl)(fh_be.nfat_arch); 186 if (fh.magic == FAT_MAGIC) { 187 // Look for a good architecture. 188 if (ML_(img_size)(sli.img) < sizeof(struct fat_header) 189 + fh.nfat_arch * sizeof(struct fat_arch)) { 190 ML_(symerr)(di, True, "Invalid Mach-O file (1 too small)."); 191 goto close_and_fail; 192 } 193 DiOffT arch_be_ioff; 194 Int f; 195 for (f = 0, arch_be_ioff = sizeof(struct fat_header); 196 f < fh.nfat_arch; 197 f++, arch_be_ioff += sizeof(struct fat_arch)) { 198 # if defined(VGA_ppc) 199 Int cputype = CPU_TYPE_POWERPC; 200 # elif defined(VGA_ppc64) 201 Int cputype = CPU_TYPE_POWERPC64; 202 # elif defined(VGA_x86) 203 Int cputype = CPU_TYPE_X86; 204 # elif defined(VGA_amd64) 205 Int cputype = CPU_TYPE_X86_64; 206 # else 207 # error "unknown architecture" 208 # endif 209 struct fat_arch arch_be; 210 struct fat_arch arch; 211 ML_(img_get)(&arch_be, sli.img, arch_be_ioff, sizeof(arch_be)); 212 VG_(memset)(&arch, 0, sizeof(arch)); 213 arch.cputype = VG_(ntohl)(arch_be.cputype); 214 arch.cpusubtype = VG_(ntohl)(arch_be.cpusubtype); 215 arch.offset = VG_(ntohl)(arch_be.offset); 216 arch.size = VG_(ntohl)(arch_be.size); 217 if (arch.cputype == cputype) { 218 if (ML_(img_size)(sli.img) < arch.offset + arch.size) { 219 ML_(symerr)(di, True, "Invalid Mach-O file (2 too small)."); 220 goto close_and_fail; 221 } 222 /* Found a suitable arch. Narrow down the slice accordingly. */ 223 sli.ioff = arch.offset; 224 sli.szB = arch.size; 225 break; 226 } 227 } 228 if (f == fh.nfat_arch) { 229 ML_(symerr)(di, True, 230 "No acceptable architecture found in fat file."); 231 goto close_and_fail; 232 } 233 } 234 235 /* Sanity check what we found. */ 236 237 /* assured by logic above */ 238 vg_assert(ML_(img_size)(sli.img) >= sizeof(struct fat_header)); 239 240 if (sli.szB < sizeof(struct MACH_HEADER)) { 241 ML_(symerr)(di, True, "Invalid Mach-O file (3 too small)."); 242 goto close_and_fail; 243 } 244 245 if (sli.szB > ML_(img_size)(sli.img)) { 246 ML_(symerr)(di, True, "Invalid Mach-O file (thin bigger than fat)."); 247 goto close_and_fail; 248 } 249 250 if (sli.ioff >= 0 && sli.ioff + sli.szB <= ML_(img_size)(sli.img)) { 251 /* thin entirely within fat, as expected */ 252 } else { 253 ML_(symerr)(di, True, "Invalid Mach-O file (thin not inside fat)."); 254 goto close_and_fail; 255 } 256 257 /* Peer at the Mach header for the thin object, starting at the 258 beginning of the slice, to check it's at least marginally 259 sane. */ 260 struct MACH_HEADER mh; 261 ML_(cur_read_get)(&mh, ML_(cur_from_sli)(sli), sizeof(mh)); 262 if (mh.magic != MAGIC) { 263 ML_(symerr)(di, True, "Invalid Mach-O file (bad magic)."); 264 goto close_and_fail; 265 } 266 267 if (sli.szB < sizeof(struct MACH_HEADER) + mh.sizeofcmds) { 268 ML_(symerr)(di, True, "Invalid Mach-O file (4 too small)."); 269 goto close_and_fail; 270 } 271 272 /* "main image is plausible" */ 273 vg_assert(sli.img); 274 vg_assert(ML_(img_size)(sli.img) > 0); 275 /* "thin image exists and is a sub-part (or all) of main image" */ 276 vg_assert(sli.ioff >= 0); 277 vg_assert(sli.szB > 0); 278 vg_assert(sli.ioff + sli.szB <= ML_(img_size)(sli.img)); 279 return sli; /* success */ 280 /*NOTREACHED*/ 281 282 close_and_fail: 283 unmap_image(&sli); 284 return DiSlice_INVALID; /* bah! */ 285 } 286 287 288 /*------------------------------------------------------------*/ 289 /*--- ---*/ 290 /*--- Mach-O symbol table reading ---*/ 291 /*--- ---*/ 292 /*------------------------------------------------------------*/ 293 294 /* Read a symbol table (nlist). Add the resulting candidate symbols 295 to 'syms'; the caller will post-process them and hand them off to 296 ML_(addSym) itself. */ 297 static 298 void read_symtab( /*OUT*/XArray* /* DiSym */ syms, 299 struct _DebugInfo* di, 300 DiCursor symtab_cur, UInt symtab_count, 301 DiCursor strtab_cur, UInt strtab_sz ) 302 { 303 Int i; 304 DiSym disym; 305 306 // "start_according_to_valgrind" 307 static HChar* s_a_t_v = NULL; /* do not make non-static */ 308 309 for (i = 0; i < symtab_count; i++) { 310 struct NLIST nl; 311 ML_(cur_read_get)(&nl, 312 ML_(cur_plus)(symtab_cur, i * sizeof(struct NLIST)), 313 sizeof(nl)); 314 315 Addr sym_addr = 0; 316 if ((nl.n_type & N_TYPE) == N_SECT) { 317 sym_addr = di->text_bias + nl.n_value; 318 /*} else if ((nl.n_type & N_TYPE) == N_ABS) { 319 GrP fixme don't ignore absolute symbols? 320 sym_addr = nl.n_value; */ 321 } else { 322 continue; 323 } 324 325 if (di->trace_symtab) { 326 HChar* str = ML_(cur_read_strdup)( 327 ML_(cur_plus)(strtab_cur, nl.n_un.n_strx), 328 "di.read_symtab.1"); 329 VG_(printf)("nlist raw: avma %010lx %s\n", sym_addr, str ); 330 ML_(dinfo_free)(str); 331 } 332 333 /* If no part of the symbol falls within the mapped range, 334 ignore it. */ 335 if (sym_addr <= di->text_avma 336 || sym_addr >= di->text_avma+di->text_size) { 337 continue; 338 } 339 340 /* skip names which point outside the string table; 341 following these risks segfaulting Valgrind */ 342 if (nl.n_un.n_strx < 0 || nl.n_un.n_strx >= strtab_sz) { 343 continue; 344 } 345 346 HChar* name 347 = ML_(cur_read_strdup)( ML_(cur_plus)(strtab_cur, nl.n_un.n_strx), 348 "di.read_symtab.2"); 349 350 /* skip nameless symbols; these appear to be common, but 351 useless */ 352 if (*name == 0) { 353 ML_(dinfo_free)(name); 354 continue; 355 } 356 357 disym.addr = sym_addr; 358 disym.tocptr = 0; 359 disym.pri_name = ML_(addStr)(di, name, -1); 360 disym.sec_names = NULL; 361 disym.size = // let canonicalize fix it 362 di->text_avma+di->text_size - sym_addr; 363 disym.isText = True; 364 disym.isIFunc = False; 365 // Lots of user function names get prepended with an underscore. Eg. the 366 // function 'f' becomes the symbol '_f'. And the "below main" 367 // function is called "start". So we skip the leading underscore, and 368 // if we see 'start' and --show-below-main=no, we rename it as 369 // "start_according_to_valgrind", which makes it easy to spot later 370 // and display as "(below main)". 371 if (disym.pri_name[0] == '_') { 372 disym.pri_name++; 373 } 374 else if (!VG_(clo_show_below_main) && VG_STREQ(disym.pri_name, "start")) { 375 if (s_a_t_v == NULL) 376 s_a_t_v = ML_(addStr)(di, "start_according_to_valgrind", -1); 377 vg_assert(s_a_t_v); 378 disym.pri_name = s_a_t_v; 379 } 380 381 vg_assert(disym.pri_name); 382 VG_(addToXA)( syms, &disym ); 383 ML_(dinfo_free)(name); 384 } 385 } 386 387 388 /* Compare DiSyms by their start address, and for equal addresses, use 389 the primary name as a secondary sort key. */ 390 static Int cmp_DiSym_by_start_then_name ( const void* v1, const void* v2 ) 391 { 392 const DiSym* s1 = (DiSym*)v1; 393 const DiSym* s2 = (DiSym*)v2; 394 if (s1->addr < s2->addr) return -1; 395 if (s1->addr > s2->addr) return 1; 396 return VG_(strcmp)(s1->pri_name, s2->pri_name); 397 } 398 399 /* 'cand' is a bunch of candidate symbols obtained by reading 400 nlist-style symbol table entries. Their ends may overlap, so sort 401 them and truncate them accordingly. The code in this routine is 402 copied almost verbatim from read_symbol_table() in readxcoff.c. */ 403 static void tidy_up_cand_syms ( /*MOD*/XArray* /* of DiSym */ syms, 404 Bool trace_symtab ) 405 { 406 Word nsyms, i, j, k, m; 407 408 nsyms = VG_(sizeXA)(syms); 409 410 VG_(setCmpFnXA)(syms, cmp_DiSym_by_start_then_name); 411 VG_(sortXA)(syms); 412 413 /* We only know for sure the start addresses (actual VMAs) of 414 symbols, and an overestimation of their end addresses. So sort 415 by start address, then clip each symbol so that its end address 416 does not overlap with the next one along. 417 418 There is a small refinement: if a group of symbols have the same 419 address, treat them as a group: find the next symbol along that 420 has a higher start address, and clip all of the group 421 accordingly. This clips the group as a whole so as not to 422 overlap following symbols. This leaves prefersym() in 423 storage.c, which is not nlist-specific, to later decide which of 424 the symbols in the group to keep. 425 426 Another refinement is that we need to get rid of symbols which, 427 after clipping, have identical starts, ends, and names. So the 428 sorting uses the name as a secondary key. 429 */ 430 431 for (i = 0; i < nsyms; i++) { 432 for (k = i+1; 433 k < nsyms 434 && ((DiSym*)VG_(indexXA)(syms,i))->addr 435 == ((DiSym*)VG_(indexXA)(syms,k))->addr; 436 k++) 437 ; 438 /* So now [i .. k-1] is a group all with the same start address. 439 Clip their ending addresses so they don't overlap [k]. In 440 the normal case (no overlaps), k == i+1. */ 441 if (k < nsyms) { 442 DiSym* next = (DiSym*)VG_(indexXA)(syms,k); 443 for (m = i; m < k; m++) { 444 DiSym* here = (DiSym*)VG_(indexXA)(syms,m); 445 vg_assert(here->addr < next->addr); 446 if (here->addr + here->size > next->addr) 447 here->size = next->addr - here->addr; 448 } 449 } 450 i = k-1; 451 vg_assert(i <= nsyms); 452 } 453 454 j = 0; 455 if (nsyms > 0) { 456 j = 1; 457 for (i = 1; i < nsyms; i++) { 458 DiSym *s_j1, *s_j, *s_i; 459 vg_assert(j <= i); 460 s_j1 = (DiSym*)VG_(indexXA)(syms, j-1); 461 s_j = (DiSym*)VG_(indexXA)(syms, j); 462 s_i = (DiSym*)VG_(indexXA)(syms, i); 463 if (s_i->addr != s_j1->addr 464 || s_i->size != s_j1->size 465 || 0 != VG_(strcmp)(s_i->pri_name, s_j1->pri_name)) { 466 *s_j = *s_i; 467 j++; 468 } else { 469 if (trace_symtab) 470 VG_(printf)("nlist cleanup: dump duplicate avma %010lx %s\n", 471 s_i->addr, s_i->pri_name ); 472 } 473 } 474 } 475 vg_assert(j >= 0 && j <= nsyms); 476 VG_(dropTailXA)(syms, nsyms - j); 477 } 478 479 480 /*------------------------------------------------------------*/ 481 /*--- ---*/ 482 /*--- Mach-O top-level processing ---*/ 483 /*--- ---*/ 484 /*------------------------------------------------------------*/ 485 486 #if !defined(APPLE_DSYM_EXT_AND_SUBDIRECTORY) 487 #define APPLE_DSYM_EXT_AND_SUBDIRECTORY ".dSYM/Contents/Resources/DWARF/" 488 #endif 489 490 491 static Bool file_exists_p(const HChar *path) 492 { 493 struct vg_stat sbuf; 494 SysRes res = VG_(stat)(path, &sbuf); 495 return sr_isError(res) ? False : True; 496 } 497 498 499 /* Search for an existing dSYM file as a possible separate debug file. 500 Adapted from gdb. */ 501 static HChar * 502 find_separate_debug_file (const HChar *executable_name) 503 { 504 const HChar *basename_str; 505 HChar *dot_ptr; 506 HChar *slash_ptr; 507 HChar *dsymfile; 508 509 /* Make sure the object file name itself doesn't contain ".dSYM" in it or we 510 will end up with an infinite loop where after we add a dSYM symbol file, 511 it will then enter this function asking if there is a debug file for the 512 dSYM file itself. */ 513 if (VG_(strcasestr) (executable_name, ".dSYM") == NULL) 514 { 515 /* Check for the existence of a .dSYM file for a given executable. */ 516 basename_str = VG_(basename) (executable_name); 517 dsymfile = ML_(dinfo_zalloc)("di.readmacho.dsymfile", 518 VG_(strlen) (executable_name) 519 + VG_(strlen) (APPLE_DSYM_EXT_AND_SUBDIRECTORY) 520 + VG_(strlen) (basename_str) 521 + 1 522 ); 523 524 /* First try for the dSYM in the same directory as the original file. */ 525 VG_(strcpy) (dsymfile, executable_name); 526 VG_(strcat) (dsymfile, APPLE_DSYM_EXT_AND_SUBDIRECTORY); 527 VG_(strcat) (dsymfile, basename_str); 528 529 if (file_exists_p (dsymfile)) 530 return dsymfile; 531 532 /* Now search for any parent directory that has a '.' in it so we can find 533 Mac OS X applications, bundles, plugins, and any other kinds of files. 534 Mac OS X application bundles wil have their program in 535 "/some/path/MyApp.app/Contents/MacOS/MyApp" (or replace ".app" with 536 ".bundle" or ".plugin" for other types of bundles). So we look for any 537 prior '.' character and try appending the apple dSYM extension and 538 subdirectory and see if we find an existing dSYM file (in the above 539 MyApp example the dSYM would be at either: 540 "/some/path/MyApp.app.dSYM/Contents/Resources/DWARF/MyApp" or 541 "/some/path/MyApp.dSYM/Contents/Resources/DWARF/MyApp". */ 542 VG_(strcpy) (dsymfile, VG_(dirname) (executable_name)); 543 while ((dot_ptr = VG_(strrchr) (dsymfile, '.'))) 544 { 545 /* Find the directory delimiter that follows the '.' character since 546 we now look for a .dSYM that follows any bundle extension. */ 547 slash_ptr = VG_(strchr) (dot_ptr, '/'); 548 if (slash_ptr) 549 { 550 /* NULL terminate the string at the '/' character and append 551 the path down to the dSYM file. */ 552 *slash_ptr = '\0'; 553 VG_(strcat) (slash_ptr, APPLE_DSYM_EXT_AND_SUBDIRECTORY); 554 VG_(strcat) (slash_ptr, basename_str); 555 if (file_exists_p (dsymfile)) 556 return dsymfile; 557 } 558 559 /* NULL terminate the string at the '.' character and append 560 the path down to the dSYM file. */ 561 *dot_ptr = '\0'; 562 VG_(strcat) (dot_ptr, APPLE_DSYM_EXT_AND_SUBDIRECTORY); 563 VG_(strcat) (dot_ptr, basename_str); 564 if (file_exists_p (dsymfile)) 565 return dsymfile; 566 567 /* NULL terminate the string at the '.' locatated by the strrchr() 568 function again. */ 569 *dot_ptr = '\0'; 570 571 /* We found a previous extension '.' character and did not find a 572 dSYM file so now find previous directory delimiter so we don't 573 try multiple times on a file name that may have a version number 574 in it such as "/some/path/MyApp.6.0.4.app". */ 575 slash_ptr = VG_(strrchr) (dsymfile, '/'); 576 if (!slash_ptr) 577 break; 578 /* NULL terminate the string at the previous directory character 579 and search again. */ 580 *slash_ptr = '\0'; 581 } 582 } 583 584 return NULL; 585 } 586 587 588 /* Given a DiSlice covering the entire Mach-O thin image, find the 589 DiSlice for the specified (segname, sectname) pairing, if 590 possible. */ 591 static DiSlice getsectdata ( DiSlice img, 592 const HChar *segname, const HChar *sectname ) 593 { 594 DiCursor cur = ML_(cur_from_sli)(img); 595 596 struct MACH_HEADER mh; 597 ML_(cur_step_get)(&mh, &cur, sizeof(mh)); 598 599 Int c; 600 for (c = 0; c < mh.ncmds; c++) { 601 struct load_command cmd; 602 ML_(cur_read_get)(&cmd, cur, sizeof(cmd)); 603 if (cmd.cmd == LC_SEGMENT_CMD) { 604 struct SEGMENT_COMMAND seg; 605 ML_(cur_read_get)(&seg, cur, sizeof(seg)); 606 if (0 == VG_(strncmp(&seg.segname[0], 607 segname, sizeof(seg.segname)))) { 608 DiCursor sects_cur = ML_(cur_plus)(cur, sizeof(seg)); 609 Int s; 610 for (s = 0; s < seg.nsects; s++) { 611 struct SECTION sect; 612 ML_(cur_step_get)(§, §s_cur, sizeof(sect)); 613 if (0 == VG_(strncmp(sect.sectname, sectname, 614 sizeof(sect.sectname)))) { 615 DiSlice res = img; 616 res.ioff = sect.offset; 617 res.szB = sect.size; 618 return res; 619 } 620 } 621 622 } 623 } 624 cur = ML_(cur_plus)(cur, cmd.cmdsize); 625 } 626 627 return DiSlice_INVALID; 628 } 629 630 631 /* Brute force just simply search for uuid[0..15] in |sli| */ 632 static Bool check_uuid_matches ( DiSlice sli, UChar* uuid ) 633 { 634 if (sli.szB < 16) 635 return False; 636 637 /* Work through the slice in 1 KB chunks. */ 638 UChar first = uuid[0]; 639 DiOffT min_off = sli.ioff; 640 DiOffT max1_off = sli.ioff + sli.szB; 641 DiOffT curr_off = min_off; 642 vg_assert(min_off < max1_off); 643 while (1) { 644 vg_assert(curr_off >= min_off && curr_off <= max1_off); 645 if (curr_off == max1_off) break; 646 DiOffT avail = max1_off - curr_off; 647 vg_assert(avail > 0 && avail <= max1_off); 648 if (avail > 1024) avail = 1024; 649 UChar buf[1024]; 650 SizeT nGot = ML_(img_get_some)(buf, sli.img, curr_off, avail); 651 vg_assert(nGot >= 1 && nGot <= avail); 652 UInt i; 653 /* Scan through the 1K chunk we got, looking for the start char. */ 654 for (i = 0; i < (UInt)nGot; i++) { 655 if (LIKELY(buf[i] != first)) 656 continue; 657 /* first char matches. See if we can get 16 bytes at this 658 offset, and compare. */ 659 if (curr_off + i < max1_off && max1_off - (curr_off + i) >= 16) { 660 UChar buff16[16]; 661 ML_(img_get)(&buff16[0], sli.img, curr_off + i, 16); 662 if (0 == VG_(memcmp)(&buff16[0], &uuid[0], 16)) 663 return True; 664 } 665 } 666 curr_off += nGot; 667 } 668 return False; 669 } 670 671 672 /* Heuristic kludge: return True if this looks like an installed 673 standard library; hence we shouldn't consider automagically running 674 dsymutil on it. */ 675 static Bool is_systemish_library_name ( HChar* name ) 676 { 677 vg_assert(name); 678 if (0 == VG_(strncasecmp)(name, "/usr/", 5) 679 || 0 == VG_(strncasecmp)(name, "/bin/", 5) 680 || 0 == VG_(strncasecmp)(name, "/sbin/", 6) 681 || 0 == VG_(strncasecmp)(name, "/opt/", 5) 682 || 0 == VG_(strncasecmp)(name, "/sw/", 4) 683 || 0 == VG_(strncasecmp)(name, "/System/", 8) 684 || 0 == VG_(strncasecmp)(name, "/Library/", 9) 685 || 0 == VG_(strncasecmp)(name, "/Applications/", 14)) { 686 return True; 687 } else { 688 return False; 689 } 690 } 691 692 693 Bool ML_(read_macho_debug_info)( struct _DebugInfo* di ) 694 { 695 DiSlice msli = DiSlice_INVALID; // the main image 696 DiSlice dsli = DiSlice_INVALID; // the debuginfo image 697 DiCursor sym_cur = DiCursor_INVALID; 698 DiCursor dysym_cur = DiCursor_INVALID; 699 HChar* dsymfilename = NULL; 700 Bool have_uuid = False; 701 UChar uuid[16]; 702 Word i; 703 struct _DebugInfoMapping* rx_map = NULL; 704 struct _DebugInfoMapping* rw_map = NULL; 705 706 /* mmap the object file to look for di->soname and di->text_bias 707 and uuid and nlist and STABS */ 708 709 /* This should be ensured by our caller (that we're in the accept 710 state). */ 711 vg_assert(di->fsm.have_rx_map); 712 vg_assert(di->fsm.have_rw_map); 713 714 for (i = 0; i < VG_(sizeXA)(di->fsm.maps); i++) { 715 struct _DebugInfoMapping* map = VG_(indexXA)(di->fsm.maps, i); 716 if (map->rx && !rx_map) 717 rx_map = map; 718 if (map->rw && !rw_map) 719 rw_map = map; 720 if (rx_map && rw_map) 721 break; 722 } 723 vg_assert(rx_map); 724 vg_assert(rw_map); 725 726 if (VG_(clo_verbosity) > 1) 727 VG_(message)(Vg_DebugMsg, 728 "%s (rx at %#lx, rw at %#lx)\n", di->fsm.filename, 729 rx_map->avma, rw_map->avma ); 730 731 VG_(memset)(&uuid, 0, sizeof(uuid)); 732 733 msli = map_image_aboard( di, di->fsm.filename ); 734 if (!ML_(sli_is_valid)(msli)) { 735 ML_(symerr)(di, False, "Connect to main image failed."); 736 goto fail; 737 } 738 739 vg_assert(msli.img != NULL && msli.szB > 0); 740 741 /* Poke around in the Mach-O header, to find some important 742 stuff. */ 743 // Find LC_SYMTAB and LC_DYSYMTAB, if present. 744 // Read di->soname from LC_ID_DYLIB if present, 745 // or from LC_ID_DYLINKER if present, 746 // or use "NONE". 747 // Get di->text_bias (aka slide) based on the corresponding LC_SEGMENT 748 // Get uuid for later dsym search 749 750 di->text_bias = 0; 751 752 { 753 DiCursor cmd_cur = ML_(cur_from_sli)(msli); 754 755 struct MACH_HEADER mh; 756 ML_(cur_step_get)(&mh, &cmd_cur, sizeof(mh)); 757 758 /* Now cur_cmd points just after the Mach header, right at the 759 start of the load commands, which is where we need it to start 760 the following loop. */ 761 762 Int c; 763 for (c = 0; c < mh.ncmds; c++) { 764 struct load_command cmd; 765 ML_(cur_read_get)(&cmd, cmd_cur, sizeof(cmd)); 766 767 if (cmd.cmd == LC_SYMTAB) { 768 sym_cur = cmd_cur; 769 } 770 else if (cmd.cmd == LC_DYSYMTAB) { 771 dysym_cur = cmd_cur; 772 } 773 else if (cmd.cmd == LC_ID_DYLIB && mh.filetype == MH_DYLIB) { 774 // GrP fixme bundle? 775 struct dylib_command dcmd; 776 ML_(cur_read_get)(&dcmd, cmd_cur, sizeof(dcmd)); 777 DiCursor dylibname_cur 778 = ML_(cur_plus)(cmd_cur, dcmd.dylib.name.offset); 779 HChar* dylibname 780 = ML_(cur_read_strdup)(dylibname_cur, "di.rmdi.1"); 781 HChar* soname = VG_(strrchr)(dylibname, '/'); 782 if (!soname) soname = dylibname; 783 else soname++; 784 di->soname = ML_(dinfo_strdup)("di.readmacho.dylibname", 785 soname); 786 ML_(dinfo_free)(dylibname); 787 } 788 else if (cmd.cmd==LC_ID_DYLINKER && mh.filetype==MH_DYLINKER) { 789 struct dylinker_command dcmd; 790 ML_(cur_read_get)(&dcmd, cmd_cur, sizeof(dcmd)); 791 DiCursor dylinkername_cur 792 = ML_(cur_plus)(cmd_cur, dcmd.name.offset); 793 HChar* dylinkername 794 = ML_(cur_read_strdup)(dylinkername_cur, "di.rmdi.2"); 795 HChar* soname = VG_(strrchr)(dylinkername, '/'); 796 if (!soname) soname = dylinkername; 797 else soname++; 798 di->soname = ML_(dinfo_strdup)("di.readmacho.dylinkername", 799 soname); 800 ML_(dinfo_free)(dylinkername); 801 } 802 803 // A comment from Julian about why varinfo[35] fail: 804 // 805 // My impression is, from comparing the output of otool -l for these 806 // executables with the logic in ML_(read_macho_debug_info), 807 // specifically the part that begins "else if (cmd->cmd == 808 // LC_SEGMENT_CMD) {", that it's a complete hack which just happens 809 // to work ok for text symbols. In particular, it appears to assume 810 // that in a "struct load_command" of type LC_SEGMENT_CMD, the first 811 // "struct SEGMENT_COMMAND" inside it is going to contain the info we 812 // need. However, otool -l shows, and also the Apple docs state, 813 // that a struct load_command may contain an arbitrary number of 814 // struct SEGMENT_COMMANDs, so I'm not sure why it's OK to merely 815 // snarf the first. But I'm not sure about this. 816 // 817 // The "Try for __DATA" block below simply adds acquisition of data 818 // svma/bias values using the same assumption. It also needs 819 // (probably) to deal with bss sections, but I don't understand how 820 // this all ties together really, so it requires further study. 821 // 822 // If you can get your head around the relationship between MachO 823 // segments, sections and load commands, this might be relatively 824 // easy to fix properly. 825 // 826 // Basically we need to come up with plausible numbers for di-> 827 // {text,data,bss}_{avma,svma}, from which the _bias numbers are 828 // then trivially derived. Then I think the debuginfo reader should 829 // work pretty well. 830 else if (cmd.cmd == LC_SEGMENT_CMD) { 831 struct SEGMENT_COMMAND seg; 832 ML_(cur_read_get)(&seg, cmd_cur, sizeof(seg)); 833 /* Try for __TEXT */ 834 if (!di->text_present 835 && 0 == VG_(strcmp)(&seg.segname[0], "__TEXT") 836 /* DDD: is the next line a kludge? -- JRS */ 837 && seg.fileoff == 0 && seg.filesize != 0) { 838 di->text_present = True; 839 di->text_svma = (Addr)seg.vmaddr; 840 di->text_avma = rx_map->avma; 841 di->text_size = seg.vmsize; 842 di->text_bias = di->text_avma - di->text_svma; 843 /* Make the _debug_ values be the same as the 844 svma/bias for the primary object, since there is 845 no secondary (debuginfo) object, but nevertheless 846 downstream biasing of Dwarf3 relies on the 847 _debug_ values. */ 848 di->text_debug_svma = di->text_svma; 849 di->text_debug_bias = di->text_bias; 850 } 851 /* Try for __DATA */ 852 if (!di->data_present 853 && 0 == VG_(strcmp)(&seg.segname[0], "__DATA") 854 /* && DDD:seg->fileoff == 0 */ && seg.filesize != 0) { 855 di->data_present = True; 856 di->data_svma = (Addr)seg.vmaddr; 857 di->data_avma = rw_map->avma; 858 di->data_size = seg.vmsize; 859 di->data_bias = di->data_avma - di->data_svma; 860 di->data_debug_svma = di->data_svma; 861 di->data_debug_bias = di->data_bias; 862 } 863 } 864 else if (cmd.cmd == LC_UUID) { 865 ML_(cur_read_get)(&uuid, cmd_cur, sizeof(uuid)); 866 have_uuid = True; 867 } 868 // Move the cursor along 869 cmd_cur = ML_(cur_plus)(cmd_cur, cmd.cmdsize); 870 } 871 } 872 873 if (!di->soname) { 874 di->soname = ML_(dinfo_strdup)("di.readmacho.noname", "NONE"); 875 } 876 877 if (di->trace_symtab) { 878 VG_(printf)("\n"); 879 VG_(printf)("SONAME = %s\n", di->soname); 880 VG_(printf)("\n"); 881 } 882 883 /* Now we have the base object to hand. Read symbols from it. */ 884 885 // We already asserted that .. 886 vg_assert(msli.img != NULL && msli.szB > 0); 887 888 if (ML_(cur_is_valid)(sym_cur) && ML_(cur_is_valid)(dysym_cur)) { 889 890 struct symtab_command symcmd; 891 struct dysymtab_command dysymcmd; 892 893 ML_(cur_read_get)(&symcmd, sym_cur, sizeof(symcmd)); 894 ML_(cur_read_get)(&dysymcmd, dysym_cur, sizeof(dysymcmd)); 895 896 /* Read nlist symbol table */ 897 DiCursor syms = DiCursor_INVALID; 898 DiCursor strs = DiCursor_INVALID; 899 XArray* /* DiSym */ candSyms = NULL; 900 Word nCandSyms; 901 902 if (msli.szB < symcmd.stroff + symcmd.strsize 903 || msli.szB < symcmd.symoff + symcmd.nsyms 904 * sizeof(struct NLIST)) { 905 ML_(symerr)(di, False, "Invalid Mach-O file (5 too small)."); 906 goto fail; 907 } 908 if (dysymcmd.ilocalsym + dysymcmd.nlocalsym > symcmd.nsyms 909 || dysymcmd.iextdefsym + dysymcmd.nextdefsym > symcmd.nsyms) { 910 ML_(symerr)(di, False, "Invalid Mach-O file (bad symbol table)."); 911 goto fail; 912 } 913 914 syms = ML_(cur_plus)(ML_(cur_from_sli)(msli), symcmd.symoff); 915 strs = ML_(cur_plus)(ML_(cur_from_sli)(msli), symcmd.stroff); 916 917 if (VG_(clo_verbosity) > 1) 918 VG_(message)(Vg_DebugMsg, 919 " reading syms from primary file (%d %d)\n", 920 dysymcmd.nextdefsym, dysymcmd.nlocalsym ); 921 922 /* Read candidate symbols into 'candSyms', so we can truncate 923 overlapping ends and generally tidy up, before presenting 924 them to ML_(addSym). */ 925 candSyms = VG_(newXA)( 926 ML_(dinfo_zalloc), "di.readmacho.candsyms.1", 927 ML_(dinfo_free), sizeof(DiSym) 928 ); 929 vg_assert(candSyms); 930 931 // extern symbols 932 read_symtab(candSyms, 933 di, 934 ML_(cur_plus)(syms, 935 dysymcmd.iextdefsym * sizeof(struct NLIST)), 936 dysymcmd.nextdefsym, strs, symcmd.strsize); 937 // static and private_extern symbols 938 read_symtab(candSyms, 939 di, 940 ML_(cur_plus)(syms, 941 dysymcmd.ilocalsym * sizeof(struct NLIST)), 942 dysymcmd.nlocalsym, strs, symcmd.strsize); 943 944 /* tidy up the cand syms -- trim overlapping ends. May resize 945 candSyms. */ 946 tidy_up_cand_syms( candSyms, di->trace_symtab ); 947 948 /* and finally present them to ML_(addSym) */ 949 nCandSyms = VG_(sizeXA)( candSyms ); 950 for (i = 0; i < nCandSyms; i++) { 951 DiSym* cand = (DiSym*) VG_(indexXA)( candSyms, i ); 952 vg_assert(cand->pri_name != NULL); 953 vg_assert(cand->sec_names == NULL); 954 if (di->trace_symtab) 955 VG_(printf)("nlist final: acquire avma %010lx-%010lx %s\n", 956 cand->addr, cand->addr + cand->size - 1, 957 cand->pri_name ); 958 ML_(addSym)( di, cand ); 959 } 960 VG_(deleteXA)( candSyms ); 961 } 962 963 /* If there's no UUID in the primary, don't even bother to try and 964 read any DWARF, since we won't be able to verify it matches. 965 Our policy is not to load debug info unless we can verify that 966 it matches the primary. Just declare success at this point. 967 And don't complain to the user, since that would cause us to 968 complain on objects compiled without -g. (Some versions of 969 XCode are observed to omit a UUID entry for object linked(?) 970 without -g. Others don't appear to omit it.) */ 971 if (!have_uuid) 972 goto success; 973 974 /* mmap the dSYM file to look for DWARF debug info. If successful, 975 use the .macho_img and .macho_img_szB in dsli. */ 976 977 dsymfilename = find_separate_debug_file( di->fsm.filename ); 978 979 /* Try to load it. */ 980 if (dsymfilename) { 981 Bool valid; 982 983 if (VG_(clo_verbosity) > 1) 984 VG_(message)(Vg_DebugMsg, " dSYM= %s\n", dsymfilename); 985 986 dsli = map_image_aboard( di, dsymfilename ); 987 if (!ML_(sli_is_valid)(dsli)) { 988 ML_(symerr)(di, False, "Connect to debuginfo image failed " 989 "(first attempt)."); 990 goto fail; 991 } 992 993 /* check it has the right uuid. */ 994 vg_assert(have_uuid); 995 valid = dsli.img && dsli.szB > 0 && check_uuid_matches( dsli, uuid ); 996 if (valid) 997 goto read_the_dwarf; 998 999 if (VG_(clo_verbosity) > 1) 1000 VG_(message)(Vg_DebugMsg, " dSYM does not have " 1001 "correct UUID (out of date?)\n"); 1002 } 1003 1004 /* There was no dsym file, or it doesn't match. We'll have to try 1005 regenerating it, unless --dsymutil=no, in which case just complain 1006 instead. */ 1007 1008 /* If this looks like a lib that we shouldn't run dsymutil on, just 1009 give up. (possible reasons: is system lib, or in /usr etc, or 1010 the dsym dir would not be writable by the user, or we're running 1011 as root) */ 1012 vg_assert(di->fsm.filename); 1013 if (is_systemish_library_name(di->fsm.filename)) 1014 goto success; 1015 1016 if (!VG_(clo_dsymutil)) { 1017 if (VG_(clo_verbosity) == 1) { 1018 VG_(message)(Vg_DebugMsg, "%s:\n", di->fsm.filename); 1019 } 1020 if (VG_(clo_verbosity) > 0) 1021 VG_(message)(Vg_DebugMsg, "%sdSYM directory %s; consider using " 1022 "--dsymutil=yes\n", 1023 VG_(clo_verbosity) > 1 ? " " : "", 1024 dsymfilename ? "has wrong UUID" : "is missing"); 1025 goto success; 1026 } 1027 1028 /* Run dsymutil */ 1029 1030 { Int r; 1031 const HChar* dsymutil = "/usr/bin/dsymutil "; 1032 HChar* cmd = ML_(dinfo_zalloc)( "di.readmacho.tmp1", 1033 VG_(strlen)(dsymutil) 1034 + VG_(strlen)(di->fsm.filename) 1035 + 32 /* misc */ ); 1036 VG_(strcpy)(cmd, dsymutil); 1037 if (0) VG_(strcat)(cmd, "--verbose "); 1038 VG_(strcat)(cmd, "\""); 1039 VG_(strcat)(cmd, di->fsm.filename); 1040 VG_(strcat)(cmd, "\""); 1041 VG_(message)(Vg_DebugMsg, "run: %s\n", cmd); 1042 r = VG_(system)( cmd ); 1043 if (r) 1044 VG_(message)(Vg_DebugMsg, "run: %s FAILED\n", dsymutil); 1045 ML_(dinfo_free)(cmd); 1046 dsymfilename = find_separate_debug_file(di->fsm.filename); 1047 } 1048 1049 /* Try again to load it. */ 1050 if (dsymfilename) { 1051 Bool valid; 1052 1053 if (VG_(clo_verbosity) > 1) 1054 VG_(message)(Vg_DebugMsg, " dsyms= %s\n", dsymfilename); 1055 1056 dsli = map_image_aboard( di, dsymfilename ); 1057 if (!ML_(sli_is_valid)(dsli)) { 1058 ML_(symerr)(di, False, "Connect to debuginfo image failed " 1059 "(second attempt)."); 1060 goto fail; 1061 } 1062 1063 /* check it has the right uuid. */ 1064 vg_assert(have_uuid); 1065 vg_assert(have_uuid); 1066 valid = dsli.img && dsli.szB > 0 && check_uuid_matches( dsli, uuid ); 1067 if (!valid) { 1068 if (VG_(clo_verbosity) > 0) { 1069 VG_(message)(Vg_DebugMsg, 1070 "WARNING: did not find expected UUID %02X%02X%02X%02X" 1071 "-%02X%02X-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X" 1072 " in dSYM dir\n", 1073 (UInt)uuid[0], (UInt)uuid[1], (UInt)uuid[2], (UInt)uuid[3], 1074 (UInt)uuid[4], (UInt)uuid[5], (UInt)uuid[6], (UInt)uuid[7], 1075 (UInt)uuid[8], (UInt)uuid[9], (UInt)uuid[10], 1076 (UInt)uuid[11], (UInt)uuid[12], (UInt)uuid[13], 1077 (UInt)uuid[14], (UInt)uuid[15] ); 1078 VG_(message)(Vg_DebugMsg, 1079 "WARNING: for %s\n", di->fsm.filename); 1080 } 1081 unmap_image( &dsli ); 1082 /* unmap_image zeroes out dsli, so it's safe for "fail:" to 1083 re-try unmap_image. */ 1084 goto fail; 1085 } 1086 } 1087 1088 /* Right. Finally we have our best try at the dwarf image, so go 1089 on to reading stuff out of it. */ 1090 1091 read_the_dwarf: 1092 if (ML_(sli_is_valid)(msli) && msli.szB > 0) { 1093 // "_mscn" is "mach-o section" 1094 DiSlice debug_info_mscn 1095 = getsectdata(dsli, "__DWARF", "__debug_info"); 1096 DiSlice debug_abbv_mscn 1097 = getsectdata(dsli, "__DWARF", "__debug_abbrev"); 1098 DiSlice debug_line_mscn 1099 = getsectdata(dsli, "__DWARF", "__debug_line"); 1100 DiSlice debug_str_mscn 1101 = getsectdata(dsli, "__DWARF", "__debug_str"); 1102 DiSlice debug_ranges_mscn 1103 = getsectdata(dsli, "__DWARF", "__debug_ranges"); 1104 DiSlice debug_loc_mscn 1105 = getsectdata(dsli, "__DWARF", "__debug_loc"); 1106 1107 if (ML_(sli_is_valid)(debug_info_mscn)) { 1108 if (VG_(clo_verbosity) > 1) { 1109 if (0) 1110 VG_(message)(Vg_DebugMsg, 1111 "Reading dwarf3 for %s (%#lx) from %s" 1112 " (%lld %lld %lld %lld %lld %lld)\n", 1113 di->fsm.filename, di->text_avma, dsymfilename, 1114 debug_info_mscn.szB, debug_abbv_mscn.szB, 1115 debug_line_mscn.szB, debug_str_mscn.szB, 1116 debug_ranges_mscn.szB, debug_loc_mscn.szB 1117 ); 1118 VG_(message)(Vg_DebugMsg, 1119 " reading dwarf3 from dsyms file\n"); 1120 } 1121 /* The old reader: line numbers and unwind info only */ 1122 ML_(read_debuginfo_dwarf3) ( di, 1123 debug_info_mscn, 1124 DiSlice_INVALID, /* .debug_types */ 1125 debug_abbv_mscn, 1126 debug_line_mscn, 1127 debug_str_mscn, 1128 DiSlice_INVALID /* ALT .debug_str */ ); 1129 1130 /* The new reader: read the DIEs in .debug_info to acquire 1131 information on variable types and locations. But only if 1132 the tool asks for it, or the user requests it on the 1133 command line. */ 1134 if (VG_(needs).var_info /* the tool requires it */ 1135 || VG_(clo_read_var_info) /* the user asked for it */) { 1136 ML_(new_dwarf3_reader)( 1137 di, debug_info_mscn, 1138 DiSlice_INVALID, /* .debug_types */ 1139 debug_abbv_mscn, 1140 debug_line_mscn, 1141 debug_str_mscn, 1142 debug_ranges_mscn, 1143 debug_loc_mscn, 1144 DiSlice_INVALID, /* ALT .debug_info */ 1145 DiSlice_INVALID, /* ALT .debug_abbv */ 1146 DiSlice_INVALID, /* ALT .debug_line */ 1147 DiSlice_INVALID /* ALT .debug_str */ 1148 ); 1149 } 1150 } 1151 } 1152 1153 if (dsymfilename) ML_(dinfo_free)(dsymfilename); 1154 1155 success: 1156 unmap_image(&msli); 1157 unmap_image(&dsli); 1158 return True; 1159 1160 /* NOTREACHED */ 1161 1162 fail: 1163 ML_(symerr)(di, True, "Error reading Mach-O object."); 1164 unmap_image(&msli); 1165 unmap_image(&dsli); 1166 return False; 1167 } 1168 1169 #endif // defined(VGO_darwin) 1170 1171 /*--------------------------------------------------------------------*/ 1172 /*--- end ---*/ 1173 /*--------------------------------------------------------------------*/ 1174