Home | History | Annotate | Download | only in m_debuginfo
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- Reading of syms & debug info from Mach-O files.              ---*/
      4 /*---                                                  readmacho.c ---*/
      5 /*--------------------------------------------------------------------*/
      6 
      7 /*
      8    This file is part of Valgrind, a dynamic binary instrumentation
      9    framework.
     10 
     11    Copyright (C) 2005-2010 Apple Inc.
     12       Greg Parker gparker (at) apple.com
     13 
     14    This program is free software; you can redistribute it and/or
     15    modify it under the terms of the GNU General Public License as
     16    published by the Free Software Foundation; either version 2 of the
     17    License, or (at your option) any later version.
     18 
     19    This program is distributed in the hope that it will be useful, but
     20    WITHOUT ANY WARRANTY; without even the implied warranty of
     21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     22    General Public License for more details.
     23 
     24    You should have received a copy of the GNU General Public License
     25    along with this program; if not, write to the Free Software
     26    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     27    02111-1307, USA.
     28 
     29    The GNU General Public License is contained in the file COPYING.
     30 */
     31 
     32 #if defined(VGO_darwin)
     33 
     34 #include "pub_core_basics.h"
     35 #include "pub_core_vki.h"
     36 #include "pub_core_libcbase.h"
     37 #include "pub_core_libcprint.h"
     38 #include "pub_core_libcassert.h"
     39 #include "pub_core_libcfile.h"
     40 #include "pub_core_libcproc.h"
     41 #include "pub_core_aspacemgr.h"    /* for mmaping debuginfo files */
     42 #include "pub_core_machine.h"      /* VG_ELF_CLASS */
     43 #include "pub_core_options.h"
     44 #include "pub_core_oset.h"
     45 #include "pub_core_tooliface.h"    /* VG_(needs) */
     46 #include "pub_core_xarray.h"
     47 #include "pub_core_clientstate.h"
     48 #include "pub_core_debuginfo.h"
     49 
     50 #include "priv_d3basics.h"
     51 #include "priv_misc.h"
     52 #include "priv_tytypes.h"
     53 #include "priv_storage.h"
     54 #include "priv_readmacho.h"
     55 #include "priv_readdwarf.h"
     56 #include "priv_readdwarf3.h"
     57 #include "priv_readstabs.h"
     58 
     59 /* --- !!! --- EXTERNAL HEADERS start --- !!! --- */
     60 #include <mach-o/loader.h>
     61 #include <mach-o/nlist.h>
     62 #include <mach-o/fat.h>
     63 /* --- !!! --- EXTERNAL HEADERS end --- !!! --- */
     64 
     65 #if VG_WORDSIZE == 4
     66 # define MAGIC MH_MAGIC
     67 # define MACH_HEADER mach_header
     68 # define LC_SEGMENT_CMD LC_SEGMENT
     69 # define SEGMENT_COMMAND segment_command
     70 # define SECTION section
     71 # define NLIST nlist
     72 #else
     73 # define MAGIC MH_MAGIC_64
     74 # define MACH_HEADER mach_header_64
     75 # define LC_SEGMENT_CMD LC_SEGMENT_64
     76 # define SEGMENT_COMMAND segment_command_64
     77 # define SECTION section_64
     78 # define NLIST nlist_64
     79 #endif
     80 
     81 
     82 /*------------------------------------------------------------*/
     83 /*---                                                      ---*/
     84 /*--- Mach-O file mapping/unmapping helpers                ---*/
     85 /*---                                                      ---*/
     86 /*------------------------------------------------------------*/
     87 
     88 typedef
     89    struct {
     90       /* These two describe the entire mapped-in ("primary") image,
     91          fat headers, kitchen sink, whatnot: the entire file.  The
     92          image is mapped into img[0 .. img_szB-1]. */
     93       UChar* img;
     94       SizeT  img_szB;
     95       /* These two describe the Mach-O object of interest, which is
     96          presumably somewhere inside the primary image.
     97          map_image_aboard() below, which generates this info, will
     98          carefully check that the macho_ fields denote a section of
     99          memory that falls entirely inside img[0 .. img_szB-1]. */
    100       UChar* macho_img;
    101       SizeT  macho_img_szB;
    102    }
    103    ImageInfo;
    104 
    105 
    106 Bool ML_(is_macho_object_file)( const void* buf, SizeT szB )
    107 {
    108    /* (JRS: the Mach-O headers might not be in this mapped data,
    109       because we only mapped a page for this initial check,
    110       or at least not very much, and what's at the start of the file
    111       is in general a so-called fat header.  The Mach-O object we're
    112       interested in could be arbitrarily far along the image, and so
    113       we can't assume its header will fall within this page.) */
    114 
    115    /* But we can say that either it's a fat object, in which case it
    116       begins with a fat header, or it's unadorned Mach-O, in which
    117       case it starts with a normal header.  At least do what checks we
    118       can to establish whether or not we're looking at something
    119       sane. */
    120 
    121    const struct fat_header*  fh_be = buf;
    122    const struct MACH_HEADER* mh    = buf;
    123 
    124    vg_assert(buf);
    125    if (szB < sizeof(struct fat_header))
    126       return False;
    127    if (VG_(ntohl)(fh_be->magic) == FAT_MAGIC)
    128       return True;
    129 
    130    if (szB < sizeof(struct MACH_HEADER))
    131       return False;
    132    if (mh->magic == MAGIC)
    133       return True;
    134 
    135    return False;
    136 }
    137 
    138 
    139 /* Unmap an image mapped in by map_image_aboard. */
    140 static void unmap_image ( /*MOD*/ImageInfo* ii )
    141 {
    142    SysRes sres;
    143    vg_assert(ii->img);
    144    vg_assert(ii->img_szB > 0);
    145    sres = VG_(am_munmap_valgrind)( (Addr)ii->img, ii->img_szB );
    146    /* Do we care if this fails?  I suppose so; it would indicate
    147       some fairly serious snafu with the mapping of the file. */
    148    vg_assert( !sr_isError(sres) );
    149    VG_(memset)(ii, 0, sizeof(*ii));
    150 }
    151 
    152 
    153 /* Map a given fat or thin object aboard, find the thin part if
    154    necessary, do some checks, and write details of both the fat and
    155    thin parts into *ii.  Returns False (and leaves the file unmapped)
    156    on failure.  Guarantees to return pointers to a valid(ish) Mach-O
    157    image if it succeeds. */
    158 static Bool map_image_aboard ( DebugInfo* di, /* only for err msgs */
    159                                /*OUT*/ImageInfo* ii, UChar* filename )
    160 {
    161    VG_(memset)(ii, 0, sizeof(*ii));
    162 
    163    /* First off, try to map the thing in. */
    164    { SizeT  size;
    165      SysRes fd, sres;
    166      struct vg_stat stat_buf;
    167 
    168      fd = VG_(stat)(filename, &stat_buf);
    169      if (sr_isError(fd)) {
    170         ML_(symerr)(di, True, "Can't stat image (to determine its size)?!");
    171         return False;
    172      }
    173      size = stat_buf.size;
    174 
    175      fd = VG_(open)(filename, VKI_O_RDONLY, 0);
    176      if (sr_isError(fd)) {
    177        ML_(symerr)(di, True, "Can't open image to read symbols?!");
    178         return False;
    179      }
    180 
    181      sres = VG_(am_mmap_file_float_valgrind)
    182                ( size, VKI_PROT_READ, sr_Res(fd), 0 );
    183      if (sr_isError(sres)) {
    184         ML_(symerr)(di, True, "Can't mmap image to read symbols?!");
    185         return False;
    186      }
    187 
    188      VG_(close)(sr_Res(fd));
    189 
    190      ii->img     = (UChar*)sr_Res(sres);
    191      ii->img_szB = size;
    192    }
    193 
    194    /* Now it's mapped in and we have .img and .img_szB set.  Look for
    195       the embedded Mach-O object.  If not findable, unmap and fail. */
    196    { struct fat_header*  fh_be;
    197      struct fat_header   fh;
    198      struct MACH_HEADER* mh;
    199 
    200      // Assume initially that we have a thin image, and update
    201      // these if it turns out to be fat.
    202      ii->macho_img     = ii->img;
    203      ii->macho_img_szB = ii->img_szB;
    204 
    205      // Check for fat header.
    206      if (ii->img_szB < sizeof(struct fat_header)) {
    207         ML_(symerr)(di, True, "Invalid Mach-O file (0 too small).");
    208         goto unmap_and_fail;
    209      }
    210 
    211      // Fat header is always BIG-ENDIAN
    212      fh_be = (struct fat_header *)ii->img;
    213      fh.magic = VG_(ntohl)(fh_be->magic);
    214      fh.nfat_arch = VG_(ntohl)(fh_be->nfat_arch);
    215      if (fh.magic == FAT_MAGIC) {
    216         // Look for a good architecture.
    217         struct fat_arch *arch_be;
    218         struct fat_arch arch;
    219         Int f;
    220         if (ii->img_szB < sizeof(struct fat_header)
    221                           + fh.nfat_arch * sizeof(struct fat_arch)) {
    222            ML_(symerr)(di, True, "Invalid Mach-O file (1 too small).");
    223            goto unmap_and_fail;
    224         }
    225         for (f = 0, arch_be = (struct fat_arch *)(fh_be+1);
    226              f < fh.nfat_arch;
    227              f++, arch_be++) {
    228            Int cputype;
    229 #          if defined(VGA_ppc)
    230            cputype = CPU_TYPE_POWERPC;
    231 #          elif defined(VGA_ppc64)
    232            cputype = CPU_TYPE_POWERPC64;
    233 #          elif defined(VGA_x86)
    234            cputype = CPU_TYPE_X86;
    235 #          elif defined(VGA_amd64)
    236            cputype = CPU_TYPE_X86_64;
    237 #          else
    238 #            error "unknown architecture"
    239 #          endif
    240            arch.cputype    = VG_(ntohl)(arch_be->cputype);
    241            arch.cpusubtype = VG_(ntohl)(arch_be->cpusubtype);
    242            arch.offset     = VG_(ntohl)(arch_be->offset);
    243            arch.size       = VG_(ntohl)(arch_be->size);
    244            if (arch.cputype == cputype) {
    245               if (ii->img_szB < arch.offset + arch.size) {
    246                  ML_(symerr)(di, True, "Invalid Mach-O file (2 too small).");
    247                  goto unmap_and_fail;
    248               }
    249               ii->macho_img     = ii->img + arch.offset;
    250               ii->macho_img_szB = arch.size;
    251               break;
    252            }
    253         }
    254         if (f == fh.nfat_arch) {
    255            ML_(symerr)(di, True,
    256                        "No acceptable architecture found in fat file.");
    257            goto unmap_and_fail;
    258         }
    259      }
    260 
    261      /* Sanity check what we found. */
    262 
    263      /* assured by logic above */
    264      vg_assert(ii->img_szB >= sizeof(struct fat_header));
    265 
    266      if (ii->macho_img_szB < sizeof(struct MACH_HEADER)) {
    267         ML_(symerr)(di, True, "Invalid Mach-O file (3 too small).");
    268         goto unmap_and_fail;
    269      }
    270 
    271      if (ii->macho_img_szB > ii->img_szB) {
    272         ML_(symerr)(di, True, "Invalid Mach-O file (thin bigger than fat).");
    273         goto unmap_and_fail;
    274      }
    275 
    276      if (ii->macho_img >= ii->img
    277          && ii->macho_img + ii->macho_img_szB <= ii->img + ii->img_szB) {
    278         /* thin entirely within fat, as expected */
    279      } else {
    280         ML_(symerr)(di, True, "Invalid Mach-O file (thin not inside fat).");
    281         goto unmap_and_fail;
    282      }
    283 
    284      mh = (struct MACH_HEADER *)ii->macho_img;
    285      if (mh->magic != MAGIC) {
    286         ML_(symerr)(di, True, "Invalid Mach-O file (bad magic).");
    287         goto unmap_and_fail;
    288      }
    289 
    290      if (ii->macho_img_szB < sizeof(struct MACH_HEADER) + mh->sizeofcmds) {
    291         ML_(symerr)(di, True, "Invalid Mach-O file (4 too small).");
    292         goto unmap_and_fail;
    293      }
    294    }
    295 
    296    vg_assert(ii->img);
    297    vg_assert(ii->macho_img);
    298    vg_assert(ii->img_szB > 0);
    299    vg_assert(ii->macho_img_szB > 0);
    300    vg_assert(ii->macho_img >= ii->img);
    301    vg_assert(ii->macho_img + ii->macho_img_szB <= ii->img + ii->img_szB);
    302    return True;  /* success */
    303    /*NOTREACHED*/
    304 
    305   unmap_and_fail:
    306    unmap_image(ii);
    307    return False; /* bah! */
    308 }
    309 
    310 
    311 /*------------------------------------------------------------*/
    312 /*---                                                      ---*/
    313 /*--- Mach-O symbol table reading                          ---*/
    314 /*---                                                      ---*/
    315 /*------------------------------------------------------------*/
    316 
    317 /* Read a symbol table (nlist).  Add the resulting candidate symbols
    318    to 'syms'; the caller will post-process them and hand them off to
    319    ML_(addSym) itself. */
    320 static
    321 void read_symtab( /*OUT*/XArray* /* DiSym */ syms,
    322                   struct _DebugInfo* di,
    323                   struct NLIST* o_symtab, UInt o_symtab_count,
    324                   UChar*     o_strtab, UInt o_strtab_sz )
    325 {
    326    Int    i;
    327    Addr   sym_addr;
    328    DiSym  risym;
    329    UChar* name;
    330 
    331    static UChar* s_a_t_v = NULL; /* do not make non-static */
    332 
    333    for (i = 0; i < o_symtab_count; i++) {
    334       struct NLIST *nl = o_symtab+i;
    335       if ((nl->n_type & N_TYPE) == N_SECT) {
    336          sym_addr = di->text_bias + nl->n_value;
    337     /*} else if ((nl->n_type & N_TYPE) == N_ABS) {
    338          GrP fixme don't ignore absolute symbols?
    339          sym_addr = nl->n_value; */
    340       } else {
    341          continue;
    342       }
    343 
    344       if (di->trace_symtab)
    345          VG_(printf)("nlist raw: avma %010lx  %s\n",
    346                      sym_addr, o_strtab + nl->n_un.n_strx );
    347 
    348       /* If no part of the symbol falls within the mapped range,
    349          ignore it. */
    350       if (sym_addr <= di->text_avma
    351           || sym_addr >= di->text_avma+di->text_size) {
    352          continue;
    353       }
    354 
    355       /* skip names which point outside the string table;
    356          following these risks segfaulting Valgrind */
    357       name = o_strtab + nl->n_un.n_strx;
    358       if (name < o_strtab || name >= o_strtab + o_strtab_sz)
    359          continue;
    360 
    361       /* skip nameless symbols; these appear to be common, but
    362          useless */
    363       if (*name == 0)
    364          continue;
    365 
    366       risym.tocptr = 0;
    367       risym.addr = sym_addr;
    368       risym.size = // let canonicalize fix it
    369                    di->text_avma+di->text_size - sym_addr;
    370       risym.name = ML_(addStr)(di, name, -1);
    371       risym.isText = True;
    372       risym.isIFunc = False;
    373       // Lots of user function names get prepended with an underscore.  Eg. the
    374       // function 'f' becomes the symbol '_f'.  And the "below main"
    375       // function is called "start".  So we skip the leading underscore, and
    376       // if we see 'start' and --show-below-main=no, we rename it as
    377       // "start_according_to_valgrind", which makes it easy to spot later
    378       // and display as "(below main)".
    379       if (risym.name[0] == '_') {
    380          risym.name++;
    381       } else if (!VG_(clo_show_below_main) && VG_STREQ(risym.name, "start")) {
    382          if (s_a_t_v == NULL)
    383             s_a_t_v = ML_(addStr)(di, "start_according_to_valgrind", -1);
    384          vg_assert(s_a_t_v);
    385          risym.name = s_a_t_v;
    386       }
    387 
    388       vg_assert(risym.name);
    389       VG_(addToXA)( syms, &risym );
    390    }
    391 }
    392 
    393 
    394 /* Compare DiSyms by their start address, and for equal addresses, use
    395    the name as a secondary sort key. */
    396 static Int cmp_DiSym_by_start_then_name ( void* v1, void* v2 )
    397 {
    398    DiSym* s1 = (DiSym*)v1;
    399    DiSym* s2 = (DiSym*)v2;
    400    if (s1->addr < s2->addr) return -1;
    401    if (s1->addr > s2->addr) return 1;
    402    return VG_(strcmp)(s1->name, s2->name);
    403 }
    404 
    405 /* 'cand' is a bunch of candidate symbols obtained by reading
    406    nlist-style symbol table entries.  Their ends may overlap, so sort
    407    them and truncate them accordingly.  The code in this routine is
    408    copied almost verbatim from read_symbol_table() in readxcoff.c. */
    409 static void tidy_up_cand_syms ( /*MOD*/XArray* /* of DiSym */ syms,
    410                                 Bool trace_symtab )
    411 {
    412    Word nsyms, i, j, k, m;
    413 
    414    nsyms = VG_(sizeXA)(syms);
    415 
    416    VG_(setCmpFnXA)(syms, cmp_DiSym_by_start_then_name);
    417    VG_(sortXA)(syms);
    418 
    419    /* We only know for sure the start addresses (actual VMAs) of
    420       symbols, and an overestimation of their end addresses.  So sort
    421       by start address, then clip each symbol so that its end address
    422       does not overlap with the next one along.
    423 
    424       There is a small refinement: if a group of symbols have the same
    425       address, treat them as a group: find the next symbol along that
    426       has a higher start address, and clip all of the group
    427       accordingly.  This clips the group as a whole so as not to
    428       overlap following symbols.  This leaves prefersym() in
    429       storage.c, which is not nlist-specific, to later decide which of
    430       the symbols in the group to keep.
    431 
    432       Another refinement is that we need to get rid of symbols which,
    433       after clipping, have identical starts, ends, and names.  So the
    434       sorting uses the name as a secondary key.
    435    */
    436 
    437    for (i = 0; i < nsyms; i++) {
    438       for (k = i+1;
    439            k < nsyms
    440              && ((DiSym*)VG_(indexXA)(syms,i))->addr
    441                  == ((DiSym*)VG_(indexXA)(syms,k))->addr;
    442            k++)
    443          ;
    444       /* So now [i .. k-1] is a group all with the same start address.
    445          Clip their ending addresses so they don't overlap [k].  In
    446          the normal case (no overlaps), k == i+1. */
    447       if (k < nsyms) {
    448          DiSym* next = (DiSym*)VG_(indexXA)(syms,k);
    449          for (m = i; m < k; m++) {
    450             DiSym* here = (DiSym*)VG_(indexXA)(syms,m);
    451             vg_assert(here->addr < next->addr);
    452             if (here->addr + here->size > next->addr)
    453                here->size = next->addr - here->addr;
    454          }
    455       }
    456       i = k-1;
    457       vg_assert(i <= nsyms);
    458    }
    459 
    460    j = 0;
    461    if (nsyms > 0) {
    462       j = 1;
    463       for (i = 1; i < nsyms; i++) {
    464          DiSym *s_j1, *s_j, *s_i;
    465          vg_assert(j <= i);
    466          s_j1 = (DiSym*)VG_(indexXA)(syms, j-1);
    467          s_j  = (DiSym*)VG_(indexXA)(syms, j);
    468          s_i  = (DiSym*)VG_(indexXA)(syms, i);
    469          if (s_i->addr != s_j1->addr
    470              || s_i->size != s_j1->size
    471              || 0 != VG_(strcmp)(s_i->name, s_j1->name)) {
    472             *s_j = *s_i;
    473             j++;
    474          } else {
    475             if (trace_symtab)
    476                VG_(printf)("nlist cleanup: dump duplicate avma %010lx  %s\n",
    477                            s_i->addr, s_i->name );
    478          }
    479       }
    480    }
    481    vg_assert(j >= 0 && j <= nsyms);
    482    VG_(dropTailXA)(syms, nsyms - j);
    483 }
    484 
    485 
    486 /*------------------------------------------------------------*/
    487 /*---                                                      ---*/
    488 /*--- Mach-O top-level processing                          ---*/
    489 /*---                                                      ---*/
    490 /*------------------------------------------------------------*/
    491 
    492 #if !defined(APPLE_DSYM_EXT_AND_SUBDIRECTORY)
    493 #define APPLE_DSYM_EXT_AND_SUBDIRECTORY ".dSYM/Contents/Resources/DWARF/"
    494 #endif
    495 
    496 
    497 static Bool file_exists_p(const Char *path)
    498 {
    499    struct vg_stat sbuf;
    500    SysRes res = VG_(stat)(path, &sbuf);
    501    return sr_isError(res) ? False : True;
    502 }
    503 
    504 
    505 /* Search for an existing dSYM file as a possible separate debug file.
    506    Adapted from gdb. */
    507 static Char *
    508 find_separate_debug_file (const Char *executable_name)
    509 {
    510    Char *basename_str;
    511    Char *dot_ptr;
    512    Char *slash_ptr;
    513    Char *dsymfile;
    514 
    515    /* Make sure the object file name itself doesn't contain ".dSYM" in it or we
    516       will end up with an infinite loop where after we add a dSYM symbol file,
    517       it will then enter this function asking if there is a debug file for the
    518       dSYM file itself.  */
    519    if (VG_(strcasestr) (executable_name, ".dSYM") == NULL)
    520    {
    521       /* Check for the existence of a .dSYM file for a given executable.  */
    522       basename_str = VG_(basename) (executable_name);
    523       dsymfile = ML_(dinfo_zalloc)("di.readmacho.dsymfile",
    524                     VG_(strlen) (executable_name)
    525                     + VG_(strlen) (APPLE_DSYM_EXT_AND_SUBDIRECTORY)
    526                     + VG_(strlen) (basename_str)
    527                     + 1
    528                  );
    529 
    530       /* First try for the dSYM in the same directory as the original file.  */
    531       VG_(strcpy) (dsymfile, executable_name);
    532       VG_(strcat) (dsymfile, APPLE_DSYM_EXT_AND_SUBDIRECTORY);
    533       VG_(strcat) (dsymfile, basename_str);
    534 
    535       if (file_exists_p (dsymfile))
    536          return dsymfile;
    537 
    538       /* Now search for any parent directory that has a '.' in it so we can find
    539          Mac OS X applications, bundles, plugins, and any other kinds of files.
    540          Mac OS X application bundles wil have their program in
    541          "/some/path/MyApp.app/Contents/MacOS/MyApp" (or replace ".app" with
    542          ".bundle" or ".plugin" for other types of bundles).  So we look for any
    543          prior '.' character and try appending the apple dSYM extension and
    544          subdirectory and see if we find an existing dSYM file (in the above
    545          MyApp example the dSYM would be at either:
    546          "/some/path/MyApp.app.dSYM/Contents/Resources/DWARF/MyApp" or
    547          "/some/path/MyApp.dSYM/Contents/Resources/DWARF/MyApp".  */
    548       VG_(strcpy) (dsymfile, VG_(dirname) (executable_name));
    549       while ((dot_ptr = VG_(strrchr) (dsymfile, '.')))
    550       {
    551          /* Find the directory delimiter that follows the '.' character since
    552             we now look for a .dSYM that follows any bundle extension.  */
    553          slash_ptr = VG_(strchr) (dot_ptr, '/');
    554          if (slash_ptr)
    555          {
    556              /* NULL terminate the string at the '/' character and append
    557                 the path down to the dSYM file.  */
    558             *slash_ptr = '\0';
    559             VG_(strcat) (slash_ptr, APPLE_DSYM_EXT_AND_SUBDIRECTORY);
    560             VG_(strcat) (slash_ptr, basename_str);
    561             if (file_exists_p (dsymfile))
    562                return dsymfile;
    563          }
    564 
    565          /* NULL terminate the string at the '.' character and append
    566             the path down to the dSYM file.  */
    567          *dot_ptr = '\0';
    568          VG_(strcat) (dot_ptr, APPLE_DSYM_EXT_AND_SUBDIRECTORY);
    569          VG_(strcat) (dot_ptr, basename_str);
    570          if (file_exists_p (dsymfile))
    571             return dsymfile;
    572 
    573          /* NULL terminate the string at the '.' locatated by the strrchr()
    574             function again.  */
    575          *dot_ptr = '\0';
    576 
    577          /* We found a previous extension '.' character and did not find a
    578             dSYM file so now find previous directory delimiter so we don't
    579             try multiple times on a file name that may have a version number
    580             in it such as "/some/path/MyApp.6.0.4.app".  */
    581          slash_ptr = VG_(strrchr) (dsymfile, '/');
    582          if (!slash_ptr)
    583             break;
    584          /* NULL terminate the string at the previous directory character
    585             and search again.  */
    586          *slash_ptr = '\0';
    587       }
    588    }
    589 
    590    return NULL;
    591 }
    592 
    593 
    594 static UChar *getsectdata(UChar* base, SizeT size,
    595                           Char *segname, Char *sectname,
    596                           /*OUT*/Word *sect_size)
    597 {
    598    struct MACH_HEADER *mh = (struct MACH_HEADER *)base;
    599    struct load_command *cmd;
    600    Int c;
    601 
    602    for (c = 0, cmd = (struct load_command *)(mh+1);
    603         c < mh->ncmds;
    604         c++, cmd = (struct load_command *)(cmd->cmdsize + (Addr)cmd))
    605    {
    606       if (cmd->cmd == LC_SEGMENT_CMD) {
    607          struct SEGMENT_COMMAND *seg = (struct SEGMENT_COMMAND *)cmd;
    608          if (0 == VG_(strncmp(seg->segname, segname, sizeof(seg->segname)))) {
    609             struct SECTION *sects = (struct SECTION *)(seg+1);
    610             Int s;
    611             for (s = 0; s < seg->nsects; s++) {
    612                if (0 == VG_(strncmp(sects[s].sectname, sectname,
    613                                     sizeof(sects[s].sectname))))
    614                {
    615                   if (sect_size) *sect_size = sects[s].size;
    616                   return (UChar *)(base + sects[s].offset);
    617                }
    618             }
    619          }
    620       }
    621    }
    622 
    623    if (sect_size) *sect_size = 0;
    624    return 0;
    625 }
    626 
    627 
    628 /* Brute force just simply search for uuid[0..15] in img[0..n_img-1] */
    629 static Bool check_uuid_matches ( Addr imgA, Word n_img, UChar* uuid )
    630 {
    631    Word   i;
    632    UChar* img = (UChar*)imgA;
    633    UChar  first = uuid[0];
    634    if (n_img < 16)
    635       return False;
    636    for (i = 0; i < n_img-16; i++) {
    637       if (img[i] != first)
    638          continue;
    639       if (0 == VG_(memcmp)( &img[i], &uuid[0], 16 ))
    640          return True;
    641    }
    642    return False;
    643 }
    644 
    645 
    646 /* Heuristic kludge: return True if this looks like an installed
    647    standard library; hence we shouldn't consider automagically running
    648    dsymutil on it. */
    649 static Bool is_systemish_library_name ( UChar* name )
    650 {
    651    vg_assert(name);
    652    if (0 == VG_(strncasecmp)(name, "/usr/", 5)
    653        || 0 == VG_(strncasecmp)(name, "/bin/", 5)
    654        || 0 == VG_(strncasecmp)(name, "/sbin/", 6)
    655        || 0 == VG_(strncasecmp)(name, "/opt/", 5)
    656        || 0 == VG_(strncasecmp)(name, "/sw/", 4)
    657        || 0 == VG_(strncasecmp)(name, "/System/", 8)
    658        || 0 == VG_(strncasecmp)(name, "/Library/", 9)
    659        || 0 == VG_(strncasecmp)(name, "/Applications/", 14)) {
    660       return True;
    661    } else {
    662       return False;
    663    }
    664 }
    665 
    666 
    667 Bool ML_(read_macho_debug_info)( struct _DebugInfo* di )
    668 {
    669    struct symtab_command *symcmd = NULL;
    670    struct dysymtab_command *dysymcmd = NULL;
    671    HChar* dsymfilename = NULL;
    672    Bool have_uuid = False;
    673    UChar uuid[16];
    674    ImageInfo ii;  /* main file */
    675    ImageInfo iid; /* auxiliary .dSYM file */
    676    Bool ok;
    677 
    678    /* mmap the object file to look for di->soname and di->text_bias
    679       and uuid and nlist and STABS */
    680 
    681    if (VG_(clo_verbosity) > 1)
    682       VG_(message)(Vg_DebugMsg,
    683                    "%s (%#lx)\n", di->filename, di->rx_map_avma );
    684    if (VG_(clo_xml))
    685       VG_(printf_xml)("<load_obj><obj>%s</obj><ip>%#lx</ip></load_obj>\n",  di->filename, di->rx_map_avma);
    686 
    687    /* This should be ensured by our caller. */
    688    vg_assert(di->have_rx_map);
    689    vg_assert(di->have_rw_map);
    690 
    691    VG_(memset)(&ii,   0, sizeof(ii));
    692    VG_(memset)(&iid,  0, sizeof(iid));
    693    VG_(memset)(&uuid, 0, sizeof(uuid));
    694 
    695    ok = map_image_aboard( di, &ii, di->filename );
    696    if (!ok) goto fail;
    697 
    698    vg_assert(ii.macho_img != NULL && ii.macho_img_szB > 0);
    699 
    700    /* Poke around in the Mach-O header, to find some important
    701       stuff. */
    702    // Find LC_SYMTAB and LC_DYSYMTAB, if present.
    703    // Read di->soname from LC_ID_DYLIB if present,
    704    //    or from LC_ID_DYLINKER if present,
    705    //    or use "NONE".
    706    // Get di->text_bias (aka slide) based on the corresponding LC_SEGMENT
    707    // Get uuid for later dsym search
    708 
    709    di->text_bias = 0;
    710 
    711    { struct MACH_HEADER *mh = (struct MACH_HEADER *)ii.macho_img;
    712       struct load_command *cmd;
    713       Int c;
    714 
    715       for (c = 0, cmd = (struct load_command *)(mh+1);
    716            c < mh->ncmds;
    717            c++, cmd = (struct load_command *)(cmd->cmdsize
    718                                               + (unsigned long)cmd)) {
    719          if (cmd->cmd == LC_SYMTAB) {
    720             symcmd = (struct symtab_command *)cmd;
    721          }
    722          else if (cmd->cmd == LC_DYSYMTAB) {
    723             dysymcmd = (struct dysymtab_command *)cmd;
    724          }
    725          else if (cmd->cmd == LC_ID_DYLIB && mh->filetype == MH_DYLIB) {
    726             // GrP fixme bundle?
    727             struct dylib_command *dcmd = (struct dylib_command *)cmd;
    728             UChar *dylibname = dcmd->dylib.name.offset + (UChar *)dcmd;
    729             UChar *soname = VG_(strrchr)(dylibname, '/');
    730             if (!soname) soname = dylibname;
    731             else soname++;
    732             di->soname = ML_(dinfo_strdup)("di.readmacho.dylibname",
    733                                            soname);
    734          }
    735          else if (cmd->cmd==LC_ID_DYLINKER  &&  mh->filetype==MH_DYLINKER) {
    736             struct dylinker_command *dcmd = (struct dylinker_command *)cmd;
    737             UChar *dylinkername = dcmd->name.offset + (UChar *)dcmd;
    738             UChar *soname = VG_(strrchr)(dylinkername, '/');
    739             if (!soname) soname = dylinkername;
    740             else soname++;
    741             di->soname = ML_(dinfo_strdup)("di.readmacho.dylinkername",
    742                                            soname);
    743          }
    744 
    745          // A comment from Julian about why varinfo[35] fail:
    746          //
    747          // My impression is, from comparing the output of otool -l for these
    748          // executables with the logic in ML_(read_macho_debug_info),
    749          // specifically the part that begins "else if (cmd->cmd ==
    750          // LC_SEGMENT_CMD) {", that it's a complete hack which just happens
    751          // to work ok for text symbols.  In particular, it appears to assume
    752          // that in a "struct load_command" of type LC_SEGMENT_CMD, the first
    753          // "struct SEGMENT_COMMAND" inside it is going to contain the info we
    754          // need.  However, otool -l shows, and also the Apple docs state,
    755          // that a struct load_command may contain an arbitrary number of
    756          // struct SEGMENT_COMMANDs, so I'm not sure why it's OK to merely
    757          // snarf the first.  But I'm not sure about this.
    758          //
    759          // The "Try for __DATA" block below simply adds acquisition of data
    760          // svma/bias values using the same assumption.  It also needs
    761          // (probably) to deal with bss sections, but I don't understand how
    762          // this all ties together really, so it requires further study.
    763          //
    764          // If you can get your head around the relationship between MachO
    765          // segments, sections and load commands, this might be relatively
    766          // easy to fix properly.
    767          //
    768          // Basically we need to come up with plausible numbers for di->
    769          // {text,data,bss}_{avma,svma}, from which the _bias numbers are
    770          // then trivially derived.  Then I think the debuginfo reader should
    771          // work pretty well.
    772          else if (cmd->cmd == LC_SEGMENT_CMD) {
    773             struct SEGMENT_COMMAND *seg = (struct SEGMENT_COMMAND *)cmd;
    774             /* Try for __TEXT */
    775             if (!di->text_present
    776                 && 0 == VG_(strcmp)(seg->segname, "__TEXT")
    777                 /* DDD: is the  next line a kludge? -- JRS */
    778                 && seg->fileoff == 0 && seg->filesize != 0) {
    779                di->text_present = True;
    780                di->text_svma = (Addr)seg->vmaddr;
    781                di->text_avma = di->rx_map_avma;
    782                di->text_size = seg->vmsize;
    783                di->text_bias = di->text_avma - di->text_svma;
    784                /* Make the _debug_ values be the same as the
    785                   svma/bias for the primary object, since there is
    786                   no secondary (debuginfo) object, but nevertheless
    787                   downstream biasing of Dwarf3 relies on the
    788                   _debug_ values. */
    789                di->text_debug_svma = di->text_svma;
    790                di->text_debug_bias = di->text_bias;
    791             }
    792             /* Try for __DATA */
    793             if (!di->data_present
    794                 && 0 == VG_(strcmp)(seg->segname, "__DATA")
    795                 /* && DDD:seg->fileoff == 0 */ && seg->filesize != 0) {
    796                di->data_present = True;
    797                di->data_svma = (Addr)seg->vmaddr;
    798                di->data_avma = di->rw_map_avma;
    799                di->data_size = seg->vmsize;
    800                di->data_bias = di->data_avma - di->data_svma;
    801                di->data_debug_svma = di->data_svma;
    802                di->data_debug_bias = di->data_bias;
    803             }
    804          }
    805          else if (cmd->cmd == LC_UUID) {
    806              struct uuid_command *uuid_cmd = (struct uuid_command *)cmd;
    807              VG_(memcpy)(uuid, uuid_cmd->uuid, sizeof(uuid));
    808              have_uuid = True;
    809          }
    810       }
    811    }
    812 
    813    if (!di->soname) {
    814       di->soname = ML_(dinfo_strdup)("di.readmacho.noname", "NONE");
    815    }
    816 
    817    /* Now we have the base object to hand.  Read symbols from it. */
    818 
    819    if (ii.macho_img && ii.macho_img_szB > 0 && symcmd && dysymcmd) {
    820 
    821       /* Read nlist symbol table */
    822       struct NLIST *syms;
    823       UChar *strs;
    824       XArray* /* DiSym */ candSyms = NULL;
    825       Word i, nCandSyms;
    826 
    827       if (ii.macho_img_szB < symcmd->stroff + symcmd->strsize
    828           || ii.macho_img_szB < symcmd->symoff + symcmd->nsyms
    829                                                  * sizeof(struct NLIST)) {
    830          ML_(symerr)(di, False, "Invalid Mach-O file (5 too small).");
    831          goto fail;
    832       }
    833       if (dysymcmd->ilocalsym + dysymcmd->nlocalsym > symcmd->nsyms
    834           || dysymcmd->iextdefsym + dysymcmd->nextdefsym > symcmd->nsyms) {
    835          ML_(symerr)(di, False, "Invalid Mach-O file (bad symbol table).");
    836          goto fail;
    837       }
    838 
    839       syms = (struct NLIST *)(ii.macho_img + symcmd->symoff);
    840       strs = (UChar *)(ii.macho_img + symcmd->stroff);
    841 
    842       if (VG_(clo_verbosity) > 1)
    843          VG_(message)(Vg_DebugMsg,
    844             "   reading syms   from primary file (%d %d)\n",
    845             dysymcmd->nextdefsym, dysymcmd->nlocalsym );
    846 
    847       /* Read candidate symbols into 'candSyms', so we can truncate
    848          overlapping ends and generally tidy up, before presenting
    849          them to ML_(addSym). */
    850       candSyms = VG_(newXA)(
    851                     ML_(dinfo_zalloc), "di.readmacho.candsyms.1",
    852                     ML_(dinfo_free), sizeof(DiSym)
    853                  );
    854       vg_assert(candSyms);
    855 
    856       // extern symbols
    857       read_symtab(candSyms,
    858                   di,
    859                   syms + dysymcmd->iextdefsym, dysymcmd->nextdefsym,
    860                   strs, symcmd->strsize);
    861       // static and private_extern symbols
    862       read_symtab(candSyms,
    863                   di,
    864                   syms + dysymcmd->ilocalsym, dysymcmd->nlocalsym,
    865                   strs, symcmd->strsize);
    866 
    867       /* tidy up the cand syms -- trim overlapping ends.  May resize
    868          candSyms. */
    869       tidy_up_cand_syms( candSyms, di->trace_symtab );
    870 
    871       /* and finally present them to ML_(addSym) */
    872       nCandSyms = VG_(sizeXA)( candSyms );
    873       for (i = 0; i < nCandSyms; i++) {
    874          DiSym* cand = (DiSym*) VG_(indexXA)( candSyms, i );
    875          if (di->trace_symtab)
    876             VG_(printf)("nlist final: acquire  avma %010lx-%010lx  %s\n",
    877                         cand->addr, cand->addr + cand->size - 1, cand->name );
    878          ML_(addSym)( di, cand );
    879       }
    880       VG_(deleteXA)( candSyms );
    881    }
    882 
    883    /* If there's no UUID in the primary, don't even bother to try and
    884       read any DWARF, since we won't be able to verify it matches.
    885       Our policy is not to load debug info unless we can verify that
    886       it matches the primary.  Just declare success at this point.
    887       And don't complain to the user, since that would cause us to
    888       complain on objects compiled without -g.  (Some versions of
    889       XCode are observed to omit a UUID entry for object linked(?)
    890       without -g.  Others don't appear to omit it.) */
    891    if (!have_uuid)
    892       goto success;
    893 
    894    /* mmap the dSYM file to look for DWARF debug info.  If successful,
    895       use the .macho_img and .macho_img_szB in iid. */
    896 
    897    dsymfilename = find_separate_debug_file( di->filename );
    898 
    899    /* Try to load it. */
    900    if (dsymfilename) {
    901       Bool valid;
    902 
    903       if (VG_(clo_verbosity) > 1)
    904          VG_(message)(Vg_DebugMsg, "   dSYM= %s\n", dsymfilename);
    905 
    906       ok = map_image_aboard( di, &iid, dsymfilename );
    907       if (!ok) goto fail;
    908 
    909       /* check it has the right uuid. */
    910       vg_assert(have_uuid);
    911       valid = iid.macho_img && iid.macho_img_szB > 0
    912               && check_uuid_matches( (Addr)iid.macho_img,
    913                                      iid.macho_img_szB, uuid );
    914       if (valid)
    915          goto read_the_dwarf;
    916 
    917       if (VG_(clo_verbosity) > 1)
    918          VG_(message)(Vg_DebugMsg, "   dSYM does not have "
    919                                    "correct UUID (out of date?)\n");
    920    }
    921 
    922    /* There was no dsym file, or it doesn't match.  We'll have to try
    923       regenerating it, unless --dsymutil=no, in which case just complain
    924       instead. */
    925 
    926    /* If this looks like a lib that we shouldn't run dsymutil on, just
    927       give up.  (possible reasons: is system lib, or in /usr etc, or
    928       the dsym dir would not be writable by the user, or we're running
    929       as root) */
    930    vg_assert(di->filename);
    931    if (is_systemish_library_name(di->filename))
    932       goto success;
    933 
    934    if (!VG_(clo_dsymutil)) {
    935       if (VG_(clo_verbosity) == 1) {
    936          VG_(message)(Vg_DebugMsg, "%s:\n", di->filename);
    937       }
    938       if (VG_(clo_verbosity) > 0)
    939          VG_(message)(Vg_DebugMsg, "%sdSYM directory %s; consider using "
    940                       "--dsymutil=yes\n",
    941                       VG_(clo_verbosity) > 1 ? "   " : "",
    942                       dsymfilename ? "has wrong UUID" : "is missing");
    943       goto success;
    944    }
    945 
    946    /* Run dsymutil */
    947 
    948    { Int r;
    949      HChar* dsymutil = "/usr/bin/dsymutil ";
    950      HChar* cmd = ML_(dinfo_zalloc)( "di.readmacho.tmp1",
    951                                      VG_(strlen)(dsymutil)
    952                                      + VG_(strlen)(di->filename)
    953                                      + 32 /* misc */ );
    954      VG_(strcpy)(cmd, dsymutil);
    955      if (0) VG_(strcat)(cmd, "--verbose ");
    956      VG_(strcat)(cmd, "\"");
    957      VG_(strcat)(cmd, di->filename);
    958      VG_(strcat)(cmd, "\"");
    959      VG_(message)(Vg_DebugMsg, "run: %s\n", cmd);
    960      r = VG_(system)( cmd );
    961      if (r)
    962         VG_(message)(Vg_DebugMsg, "run: %s FAILED\n", dsymutil);
    963      ML_(dinfo_free)(cmd);
    964      dsymfilename = find_separate_debug_file(di->filename);
    965    }
    966 
    967    /* Try again to load it. */
    968    if (dsymfilename) {
    969       Bool valid;
    970 
    971       if (VG_(clo_verbosity) > 1)
    972          VG_(message)(Vg_DebugMsg, "   dsyms= %s\n", dsymfilename);
    973 
    974       ok = map_image_aboard( di, &iid, dsymfilename );
    975       if (!ok) goto fail;
    976 
    977       /* check it has the right uuid. */
    978       vg_assert(have_uuid);
    979       valid = iid.macho_img && iid.macho_img_szB > 0
    980               && check_uuid_matches( (Addr)iid.macho_img,
    981                                      iid.macho_img_szB, uuid );
    982       if (!valid) {
    983          if (VG_(clo_verbosity) > 0) {
    984             VG_(message)(Vg_DebugMsg,
    985                "WARNING: did not find expected UUID %02X%02X%02X%02X"
    986                "-%02X%02X-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X"
    987                " in dSYM dir\n",
    988                (UInt)uuid[0], (UInt)uuid[1], (UInt)uuid[2], (UInt)uuid[3],
    989                (UInt)uuid[4], (UInt)uuid[5], (UInt)uuid[6], (UInt)uuid[7],
    990                (UInt)uuid[8], (UInt)uuid[9], (UInt)uuid[10],
    991                (UInt)uuid[11], (UInt)uuid[12], (UInt)uuid[13],
    992                (UInt)uuid[14], (UInt)uuid[15] );
    993             VG_(message)(Vg_DebugMsg,
    994                          "WARNING: for %s\n", di->filename);
    995          }
    996          unmap_image( &iid );
    997          /* unmap_image zeroes the fields, so the following test makes
    998             sense. */
    999          goto fail;
   1000       }
   1001    }
   1002 
   1003    /* Right.  Finally we have our best try at the dwarf image, so go
   1004       on to reading stuff out of it. */
   1005 
   1006   read_the_dwarf:
   1007    if (iid.macho_img && iid.macho_img_szB > 0) {
   1008       UChar* debug_info_img = NULL;
   1009       Word   debug_info_sz;
   1010       UChar* debug_abbv_img;
   1011       Word   debug_abbv_sz;
   1012       UChar* debug_line_img;
   1013       Word   debug_line_sz;
   1014       UChar* debug_str_img;
   1015       Word   debug_str_sz;
   1016       UChar* debug_ranges_img;
   1017       Word   debug_ranges_sz;
   1018       UChar* debug_loc_img;
   1019       Word   debug_loc_sz;
   1020       UChar* debug_name_img;
   1021       Word   debug_name_sz;
   1022 
   1023       debug_info_img =
   1024           getsectdata(iid.macho_img, iid.macho_img_szB,
   1025                       "__DWARF", "__debug_info", &debug_info_sz);
   1026       debug_abbv_img =
   1027           getsectdata(iid.macho_img, iid.macho_img_szB,
   1028                       "__DWARF", "__debug_abbrev", &debug_abbv_sz);
   1029       debug_line_img =
   1030           getsectdata(iid.macho_img, iid.macho_img_szB,
   1031                       "__DWARF", "__debug_line", &debug_line_sz);
   1032       debug_str_img =
   1033           getsectdata(iid.macho_img, iid.macho_img_szB,
   1034                       "__DWARF", "__debug_str", &debug_str_sz);
   1035       debug_ranges_img =
   1036           getsectdata(iid.macho_img, iid.macho_img_szB,
   1037                       "__DWARF", "__debug_ranges", &debug_ranges_sz);
   1038       debug_loc_img =
   1039           getsectdata(iid.macho_img, iid.macho_img_szB,
   1040                       "__DWARF", "__debug_loc", &debug_loc_sz);
   1041       debug_name_img =
   1042           getsectdata(iid.macho_img, iid.macho_img_szB,
   1043                       "__DWARF", "__debug_pubnames", &debug_name_sz);
   1044 
   1045       if (debug_info_img) {
   1046          if (VG_(clo_verbosity) > 1) {
   1047             if (0)
   1048             VG_(message)(Vg_DebugMsg,
   1049                          "Reading dwarf3 for %s (%#lx) from %s"
   1050                          " (%ld %ld %ld %ld %ld %ld)\n",
   1051                          di->filename, di->text_avma, dsymfilename,
   1052                          debug_info_sz, debug_abbv_sz, debug_line_sz,
   1053                          debug_str_sz, debug_ranges_sz, debug_loc_sz
   1054                          );
   1055             VG_(message)(Vg_DebugMsg,
   1056                "   reading dwarf3 from dsyms file\n");
   1057          }
   1058          /* The old reader: line numbers and unwind info only */
   1059          ML_(read_debuginfo_dwarf3) ( di,
   1060                                       debug_info_img, debug_info_sz,
   1061                                       debug_abbv_img, debug_abbv_sz,
   1062                                       debug_line_img, debug_line_sz,
   1063                                       debug_str_img,  debug_str_sz );
   1064 
   1065          /* The new reader: read the DIEs in .debug_info to acquire
   1066             information on variable types and locations.  But only if
   1067             the tool asks for it, or the user requests it on the
   1068             command line. */
   1069          if (VG_(needs).var_info /* the tool requires it */
   1070              || VG_(clo_read_var_info) /* the user asked for it */) {
   1071             ML_(new_dwarf3_reader)(
   1072                di, debug_info_img,   debug_info_sz,
   1073                    debug_abbv_img,   debug_abbv_sz,
   1074                    debug_line_img,   debug_line_sz,
   1075                    debug_str_img,    debug_str_sz,
   1076                    debug_ranges_img, debug_ranges_sz,
   1077                    debug_loc_img,    debug_loc_sz
   1078             );
   1079          }
   1080       }
   1081    }
   1082 
   1083    if (dsymfilename) ML_(dinfo_free)(dsymfilename);
   1084 
   1085   success:
   1086    if (ii.img)
   1087       unmap_image(&ii);
   1088    if (iid.img)
   1089       unmap_image(&iid);
   1090    return True;
   1091 
   1092    /* NOTREACHED */
   1093 
   1094   fail:
   1095    ML_(symerr)(di, True, "Error reading Mach-O object.");
   1096    if (ii.img)
   1097       unmap_image(&ii);
   1098    if (iid.img)
   1099       unmap_image(&iid);
   1100    return False;
   1101 }
   1102 
   1103 #endif // defined(VGO_darwin)
   1104 
   1105 /*--------------------------------------------------------------------*/
   1106 /*--- end                                                          ---*/
   1107 /*--------------------------------------------------------------------*/
   1108