Home | History | Annotate | Download | only in m_debuginfo
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- Reading of syms & debug info from Mach-O files.              ---*/
      4 /*---                                                  readmacho.c ---*/
      5 /*--------------------------------------------------------------------*/
      6 
      7 /*
      8    This file is part of Valgrind, a dynamic binary instrumentation
      9    framework.
     10 
     11    Copyright (C) 2005-2011 Apple Inc.
     12       Greg Parker gparker (at) apple.com
     13 
     14    This program is free software; you can redistribute it and/or
     15    modify it under the terms of the GNU General Public License as
     16    published by the Free Software Foundation; either version 2 of the
     17    License, or (at your option) any later version.
     18 
     19    This program is distributed in the hope that it will be useful, but
     20    WITHOUT ANY WARRANTY; without even the implied warranty of
     21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     22    General Public License for more details.
     23 
     24    You should have received a copy of the GNU General Public License
     25    along with this program; if not, write to the Free Software
     26    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     27    02111-1307, USA.
     28 
     29    The GNU General Public License is contained in the file COPYING.
     30 */
     31 
     32 #if defined(VGO_darwin)
     33 
     34 #include "pub_core_basics.h"
     35 #include "pub_core_vki.h"
     36 #include "pub_core_libcbase.h"
     37 #include "pub_core_libcprint.h"
     38 #include "pub_core_libcassert.h"
     39 #include "pub_core_libcfile.h"
     40 #include "pub_core_libcproc.h"
     41 #include "pub_core_aspacemgr.h"    /* for mmaping debuginfo files */
     42 #include "pub_core_machine.h"      /* VG_ELF_CLASS */
     43 #include "pub_core_options.h"
     44 #include "pub_core_oset.h"
     45 #include "pub_core_tooliface.h"    /* VG_(needs) */
     46 #include "pub_core_xarray.h"
     47 #include "pub_core_clientstate.h"
     48 #include "pub_core_debuginfo.h"
     49 
     50 #include "priv_d3basics.h"
     51 #include "priv_misc.h"
     52 #include "priv_tytypes.h"
     53 #include "priv_storage.h"
     54 #include "priv_readmacho.h"
     55 #include "priv_readdwarf.h"
     56 #include "priv_readdwarf3.h"
     57 #include "priv_readstabs.h"
     58 
     59 /* --- !!! --- EXTERNAL HEADERS start --- !!! --- */
     60 #include <mach-o/loader.h>
     61 #include <mach-o/nlist.h>
     62 #include <mach-o/fat.h>
     63 /* --- !!! --- EXTERNAL HEADERS end --- !!! --- */
     64 
     65 #if VG_WORDSIZE == 4
     66 # define MAGIC MH_MAGIC
     67 # define MACH_HEADER mach_header
     68 # define LC_SEGMENT_CMD LC_SEGMENT
     69 # define SEGMENT_COMMAND segment_command
     70 # define SECTION section
     71 # define NLIST nlist
     72 #else
     73 # define MAGIC MH_MAGIC_64
     74 # define MACH_HEADER mach_header_64
     75 # define LC_SEGMENT_CMD LC_SEGMENT_64
     76 # define SEGMENT_COMMAND segment_command_64
     77 # define SECTION section_64
     78 # define NLIST nlist_64
     79 #endif
     80 
     81 
     82 /*------------------------------------------------------------*/
     83 /*---                                                      ---*/
     84 /*--- Mach-O file mapping/unmapping helpers                ---*/
     85 /*---                                                      ---*/
     86 /*------------------------------------------------------------*/
     87 
     88 typedef
     89    struct {
     90       /* These two describe the entire mapped-in ("primary") image,
     91          fat headers, kitchen sink, whatnot: the entire file.  The
     92          image is mapped into img[0 .. img_szB-1]. */
     93       UChar* img;
     94       SizeT  img_szB;
     95       /* These two describe the Mach-O object of interest, which is
     96          presumably somewhere inside the primary image.
     97          map_image_aboard() below, which generates this info, will
     98          carefully check that the macho_ fields denote a section of
     99          memory that falls entirely inside img[0 .. img_szB-1]. */
    100       UChar* macho_img;
    101       SizeT  macho_img_szB;
    102    }
    103    ImageInfo;
    104 
    105 
    106 Bool ML_(is_macho_object_file)( const void* buf, SizeT szB )
    107 {
    108    /* (JRS: the Mach-O headers might not be in this mapped data,
    109       because we only mapped a page for this initial check,
    110       or at least not very much, and what's at the start of the file
    111       is in general a so-called fat header.  The Mach-O object we're
    112       interested in could be arbitrarily far along the image, and so
    113       we can't assume its header will fall within this page.) */
    114 
    115    /* But we can say that either it's a fat object, in which case it
    116       begins with a fat header, or it's unadorned Mach-O, in which
    117       case it starts with a normal header.  At least do what checks we
    118       can to establish whether or not we're looking at something
    119       sane. */
    120 
    121    const struct fat_header*  fh_be = buf;
    122    const struct MACH_HEADER* mh    = buf;
    123 
    124    vg_assert(buf);
    125    if (szB < sizeof(struct fat_header))
    126       return False;
    127    if (VG_(ntohl)(fh_be->magic) == FAT_MAGIC)
    128       return True;
    129 
    130    if (szB < sizeof(struct MACH_HEADER))
    131       return False;
    132    if (mh->magic == MAGIC)
    133       return True;
    134 
    135    return False;
    136 }
    137 
    138 
    139 /* Unmap an image mapped in by map_image_aboard. */
    140 static void unmap_image ( /*MOD*/ImageInfo* ii )
    141 {
    142    SysRes sres;
    143    vg_assert(ii->img);
    144    vg_assert(ii->img_szB > 0);
    145    sres = VG_(am_munmap_valgrind)( (Addr)ii->img, ii->img_szB );
    146    /* Do we care if this fails?  I suppose so; it would indicate
    147       some fairly serious snafu with the mapping of the file. */
    148    vg_assert( !sr_isError(sres) );
    149    VG_(memset)(ii, 0, sizeof(*ii));
    150 }
    151 
    152 
    153 /* Map a given fat or thin object aboard, find the thin part if
    154    necessary, do some checks, and write details of both the fat and
    155    thin parts into *ii.  Returns False (and leaves the file unmapped)
    156    on failure.  Guarantees to return pointers to a valid(ish) Mach-O
    157    image if it succeeds. */
    158 static Bool map_image_aboard ( DebugInfo* di, /* only for err msgs */
    159                                /*OUT*/ImageInfo* ii, UChar* filename )
    160 {
    161    VG_(memset)(ii, 0, sizeof(*ii));
    162 
    163    /* First off, try to map the thing in. */
    164    { SizeT  size;
    165      SysRes fd, sres;
    166      struct vg_stat stat_buf;
    167 
    168      fd = VG_(stat)(filename, &stat_buf);
    169      if (sr_isError(fd)) {
    170         ML_(symerr)(di, True, "Can't stat image (to determine its size)?!");
    171         return False;
    172      }
    173      size = stat_buf.size;
    174 
    175      fd = VG_(open)(filename, VKI_O_RDONLY, 0);
    176      if (sr_isError(fd)) {
    177        ML_(symerr)(di, True, "Can't open image to read symbols?!");
    178         return False;
    179      }
    180 
    181      sres = VG_(am_mmap_file_float_valgrind)
    182                ( size, VKI_PROT_READ, sr_Res(fd), 0 );
    183      if (sr_isError(sres)) {
    184         ML_(symerr)(di, True, "Can't mmap image to read symbols?!");
    185         return False;
    186      }
    187 
    188      VG_(close)(sr_Res(fd));
    189 
    190      ii->img     = (UChar*)sr_Res(sres);
    191      ii->img_szB = size;
    192    }
    193 
    194    /* Now it's mapped in and we have .img and .img_szB set.  Look for
    195       the embedded Mach-O object.  If not findable, unmap and fail. */
    196    { struct fat_header*  fh_be;
    197      struct fat_header   fh;
    198      struct MACH_HEADER* mh;
    199 
    200      // Assume initially that we have a thin image, and update
    201      // these if it turns out to be fat.
    202      ii->macho_img     = ii->img;
    203      ii->macho_img_szB = ii->img_szB;
    204 
    205      // Check for fat header.
    206      if (ii->img_szB < sizeof(struct fat_header)) {
    207         ML_(symerr)(di, True, "Invalid Mach-O file (0 too small).");
    208         goto unmap_and_fail;
    209      }
    210 
    211      // Fat header is always BIG-ENDIAN
    212      fh_be = (struct fat_header *)ii->img;
    213      fh.magic = VG_(ntohl)(fh_be->magic);
    214      fh.nfat_arch = VG_(ntohl)(fh_be->nfat_arch);
    215      if (fh.magic == FAT_MAGIC) {
    216         // Look for a good architecture.
    217         struct fat_arch *arch_be;
    218         struct fat_arch arch;
    219         Int f;
    220         if (ii->img_szB < sizeof(struct fat_header)
    221                           + fh.nfat_arch * sizeof(struct fat_arch)) {
    222            ML_(symerr)(di, True, "Invalid Mach-O file (1 too small).");
    223            goto unmap_and_fail;
    224         }
    225         for (f = 0, arch_be = (struct fat_arch *)(fh_be+1);
    226              f < fh.nfat_arch;
    227              f++, arch_be++) {
    228            Int cputype;
    229 #          if defined(VGA_ppc)
    230            cputype = CPU_TYPE_POWERPC;
    231 #          elif defined(VGA_ppc64)
    232            cputype = CPU_TYPE_POWERPC64;
    233 #          elif defined(VGA_x86)
    234            cputype = CPU_TYPE_X86;
    235 #          elif defined(VGA_amd64)
    236            cputype = CPU_TYPE_X86_64;
    237 #          else
    238 #            error "unknown architecture"
    239 #          endif
    240            arch.cputype    = VG_(ntohl)(arch_be->cputype);
    241            arch.cpusubtype = VG_(ntohl)(arch_be->cpusubtype);
    242            arch.offset     = VG_(ntohl)(arch_be->offset);
    243            arch.size       = VG_(ntohl)(arch_be->size);
    244            if (arch.cputype == cputype) {
    245               if (ii->img_szB < arch.offset + arch.size) {
    246                  ML_(symerr)(di, True, "Invalid Mach-O file (2 too small).");
    247                  goto unmap_and_fail;
    248               }
    249               ii->macho_img     = ii->img + arch.offset;
    250               ii->macho_img_szB = arch.size;
    251               break;
    252            }
    253         }
    254         if (f == fh.nfat_arch) {
    255            ML_(symerr)(di, True,
    256                        "No acceptable architecture found in fat file.");
    257            goto unmap_and_fail;
    258         }
    259      }
    260 
    261      /* Sanity check what we found. */
    262 
    263      /* assured by logic above */
    264      vg_assert(ii->img_szB >= sizeof(struct fat_header));
    265 
    266      if (ii->macho_img_szB < sizeof(struct MACH_HEADER)) {
    267         ML_(symerr)(di, True, "Invalid Mach-O file (3 too small).");
    268         goto unmap_and_fail;
    269      }
    270 
    271      if (ii->macho_img_szB > ii->img_szB) {
    272         ML_(symerr)(di, True, "Invalid Mach-O file (thin bigger than fat).");
    273         goto unmap_and_fail;
    274      }
    275 
    276      if (ii->macho_img >= ii->img
    277          && ii->macho_img + ii->macho_img_szB <= ii->img + ii->img_szB) {
    278         /* thin entirely within fat, as expected */
    279      } else {
    280         ML_(symerr)(di, True, "Invalid Mach-O file (thin not inside fat).");
    281         goto unmap_and_fail;
    282      }
    283 
    284      mh = (struct MACH_HEADER *)ii->macho_img;
    285      if (mh->magic != MAGIC) {
    286         ML_(symerr)(di, True, "Invalid Mach-O file (bad magic).");
    287         goto unmap_and_fail;
    288      }
    289 
    290      if (ii->macho_img_szB < sizeof(struct MACH_HEADER) + mh->sizeofcmds) {
    291         ML_(symerr)(di, True, "Invalid Mach-O file (4 too small).");
    292         goto unmap_and_fail;
    293      }
    294    }
    295 
    296    vg_assert(ii->img);
    297    vg_assert(ii->macho_img);
    298    vg_assert(ii->img_szB > 0);
    299    vg_assert(ii->macho_img_szB > 0);
    300    vg_assert(ii->macho_img >= ii->img);
    301    vg_assert(ii->macho_img + ii->macho_img_szB <= ii->img + ii->img_szB);
    302    return True;  /* success */
    303    /*NOTREACHED*/
    304 
    305   unmap_and_fail:
    306    unmap_image(ii);
    307    return False; /* bah! */
    308 }
    309 
    310 
    311 /*------------------------------------------------------------*/
    312 /*---                                                      ---*/
    313 /*--- Mach-O symbol table reading                          ---*/
    314 /*---                                                      ---*/
    315 /*------------------------------------------------------------*/
    316 
    317 /* Read a symbol table (nlist).  Add the resulting candidate symbols
    318    to 'syms'; the caller will post-process them and hand them off to
    319    ML_(addSym) itself. */
    320 static
    321 void read_symtab( /*OUT*/XArray* /* DiSym */ syms,
    322                   struct _DebugInfo* di,
    323                   struct NLIST* o_symtab, UInt o_symtab_count,
    324                   UChar*     o_strtab, UInt o_strtab_sz )
    325 {
    326    Int    i;
    327    Addr   sym_addr;
    328    DiSym  disym;
    329    UChar* name;
    330 
    331    static UChar* s_a_t_v = NULL; /* do not make non-static */
    332 
    333    for (i = 0; i < o_symtab_count; i++) {
    334       struct NLIST *nl = o_symtab+i;
    335       if ((nl->n_type & N_TYPE) == N_SECT) {
    336          sym_addr = di->text_bias + nl->n_value;
    337     /*} else if ((nl->n_type & N_TYPE) == N_ABS) {
    338          GrP fixme don't ignore absolute symbols?
    339          sym_addr = nl->n_value; */
    340       } else {
    341          continue;
    342       }
    343 
    344       if (di->trace_symtab)
    345          VG_(printf)("nlist raw: avma %010lx  %s\n",
    346                      sym_addr, o_strtab + nl->n_un.n_strx );
    347 
    348       /* If no part of the symbol falls within the mapped range,
    349          ignore it. */
    350       if (sym_addr <= di->text_avma
    351           || sym_addr >= di->text_avma+di->text_size) {
    352          continue;
    353       }
    354 
    355       /* skip names which point outside the string table;
    356          following these risks segfaulting Valgrind */
    357       name = o_strtab + nl->n_un.n_strx;
    358       if (name < o_strtab || name >= o_strtab + o_strtab_sz)
    359          continue;
    360 
    361       /* skip nameless symbols; these appear to be common, but
    362          useless */
    363       if (*name == 0)
    364          continue;
    365 
    366       disym.addr      = sym_addr;
    367       disym.tocptr    = 0;
    368       disym.pri_name  = ML_(addStr)(di, name, -1);
    369       disym.sec_names = NULL;
    370       disym.size      = // let canonicalize fix it
    371                         di->text_avma+di->text_size - sym_addr;
    372       disym.isText    = True;
    373       disym.isIFunc   = False;
    374       // Lots of user function names get prepended with an underscore.  Eg. the
    375       // function 'f' becomes the symbol '_f'.  And the "below main"
    376       // function is called "start".  So we skip the leading underscore, and
    377       // if we see 'start' and --show-below-main=no, we rename it as
    378       // "start_according_to_valgrind", which makes it easy to spot later
    379       // and display as "(below main)".
    380       if (disym.pri_name[0] == '_') {
    381          disym.pri_name++;
    382       }
    383       else if (!VG_(clo_show_below_main) && VG_STREQ(disym.pri_name, "start")) {
    384          if (s_a_t_v == NULL)
    385             s_a_t_v = ML_(addStr)(di, "start_according_to_valgrind", -1);
    386          vg_assert(s_a_t_v);
    387          disym.pri_name = s_a_t_v;
    388       }
    389 
    390       vg_assert(disym.pri_name);
    391       VG_(addToXA)( syms, &disym );
    392    }
    393 }
    394 
    395 
    396 /* Compare DiSyms by their start address, and for equal addresses, use
    397    the primary name as a secondary sort key. */
    398 static Int cmp_DiSym_by_start_then_name ( void* v1, void* v2 )
    399 {
    400    DiSym* s1 = (DiSym*)v1;
    401    DiSym* s2 = (DiSym*)v2;
    402    if (s1->addr < s2->addr) return -1;
    403    if (s1->addr > s2->addr) return 1;
    404    return VG_(strcmp)(s1->pri_name, s2->pri_name);
    405 }
    406 
    407 /* 'cand' is a bunch of candidate symbols obtained by reading
    408    nlist-style symbol table entries.  Their ends may overlap, so sort
    409    them and truncate them accordingly.  The code in this routine is
    410    copied almost verbatim from read_symbol_table() in readxcoff.c. */
    411 static void tidy_up_cand_syms ( /*MOD*/XArray* /* of DiSym */ syms,
    412                                 Bool trace_symtab )
    413 {
    414    Word nsyms, i, j, k, m;
    415 
    416    nsyms = VG_(sizeXA)(syms);
    417 
    418    VG_(setCmpFnXA)(syms, cmp_DiSym_by_start_then_name);
    419    VG_(sortXA)(syms);
    420 
    421    /* We only know for sure the start addresses (actual VMAs) of
    422       symbols, and an overestimation of their end addresses.  So sort
    423       by start address, then clip each symbol so that its end address
    424       does not overlap with the next one along.
    425 
    426       There is a small refinement: if a group of symbols have the same
    427       address, treat them as a group: find the next symbol along that
    428       has a higher start address, and clip all of the group
    429       accordingly.  This clips the group as a whole so as not to
    430       overlap following symbols.  This leaves prefersym() in
    431       storage.c, which is not nlist-specific, to later decide which of
    432       the symbols in the group to keep.
    433 
    434       Another refinement is that we need to get rid of symbols which,
    435       after clipping, have identical starts, ends, and names.  So the
    436       sorting uses the name as a secondary key.
    437    */
    438 
    439    for (i = 0; i < nsyms; i++) {
    440       for (k = i+1;
    441            k < nsyms
    442              && ((DiSym*)VG_(indexXA)(syms,i))->addr
    443                  == ((DiSym*)VG_(indexXA)(syms,k))->addr;
    444            k++)
    445          ;
    446       /* So now [i .. k-1] is a group all with the same start address.
    447          Clip their ending addresses so they don't overlap [k].  In
    448          the normal case (no overlaps), k == i+1. */
    449       if (k < nsyms) {
    450          DiSym* next = (DiSym*)VG_(indexXA)(syms,k);
    451          for (m = i; m < k; m++) {
    452             DiSym* here = (DiSym*)VG_(indexXA)(syms,m);
    453             vg_assert(here->addr < next->addr);
    454             if (here->addr + here->size > next->addr)
    455                here->size = next->addr - here->addr;
    456          }
    457       }
    458       i = k-1;
    459       vg_assert(i <= nsyms);
    460    }
    461 
    462    j = 0;
    463    if (nsyms > 0) {
    464       j = 1;
    465       for (i = 1; i < nsyms; i++) {
    466          DiSym *s_j1, *s_j, *s_i;
    467          vg_assert(j <= i);
    468          s_j1 = (DiSym*)VG_(indexXA)(syms, j-1);
    469          s_j  = (DiSym*)VG_(indexXA)(syms, j);
    470          s_i  = (DiSym*)VG_(indexXA)(syms, i);
    471          if (s_i->addr != s_j1->addr
    472              || s_i->size != s_j1->size
    473              || 0 != VG_(strcmp)(s_i->pri_name, s_j1->pri_name)) {
    474             *s_j = *s_i;
    475             j++;
    476          } else {
    477             if (trace_symtab)
    478                VG_(printf)("nlist cleanup: dump duplicate avma %010lx  %s\n",
    479                            s_i->addr, s_i->pri_name );
    480          }
    481       }
    482    }
    483    vg_assert(j >= 0 && j <= nsyms);
    484    VG_(dropTailXA)(syms, nsyms - j);
    485 }
    486 
    487 
    488 /*------------------------------------------------------------*/
    489 /*---                                                      ---*/
    490 /*--- Mach-O top-level processing                          ---*/
    491 /*---                                                      ---*/
    492 /*------------------------------------------------------------*/
    493 
    494 #if !defined(APPLE_DSYM_EXT_AND_SUBDIRECTORY)
    495 #define APPLE_DSYM_EXT_AND_SUBDIRECTORY ".dSYM/Contents/Resources/DWARF/"
    496 #endif
    497 
    498 
    499 static Bool file_exists_p(const Char *path)
    500 {
    501    struct vg_stat sbuf;
    502    SysRes res = VG_(stat)(path, &sbuf);
    503    return sr_isError(res) ? False : True;
    504 }
    505 
    506 
    507 /* Search for an existing dSYM file as a possible separate debug file.
    508    Adapted from gdb. */
    509 static Char *
    510 find_separate_debug_file (const Char *executable_name)
    511 {
    512    Char *basename_str;
    513    Char *dot_ptr;
    514    Char *slash_ptr;
    515    Char *dsymfile;
    516 
    517    /* Make sure the object file name itself doesn't contain ".dSYM" in it or we
    518       will end up with an infinite loop where after we add a dSYM symbol file,
    519       it will then enter this function asking if there is a debug file for the
    520       dSYM file itself.  */
    521    if (VG_(strcasestr) (executable_name, ".dSYM") == NULL)
    522    {
    523       /* Check for the existence of a .dSYM file for a given executable.  */
    524       basename_str = VG_(basename) (executable_name);
    525       dsymfile = ML_(dinfo_zalloc)("di.readmacho.dsymfile",
    526                     VG_(strlen) (executable_name)
    527                     + VG_(strlen) (APPLE_DSYM_EXT_AND_SUBDIRECTORY)
    528                     + VG_(strlen) (basename_str)
    529                     + 1
    530                  );
    531 
    532       /* First try for the dSYM in the same directory as the original file.  */
    533       VG_(strcpy) (dsymfile, executable_name);
    534       VG_(strcat) (dsymfile, APPLE_DSYM_EXT_AND_SUBDIRECTORY);
    535       VG_(strcat) (dsymfile, basename_str);
    536 
    537       if (file_exists_p (dsymfile))
    538          return dsymfile;
    539 
    540       /* Now search for any parent directory that has a '.' in it so we can find
    541          Mac OS X applications, bundles, plugins, and any other kinds of files.
    542          Mac OS X application bundles wil have their program in
    543          "/some/path/MyApp.app/Contents/MacOS/MyApp" (or replace ".app" with
    544          ".bundle" or ".plugin" for other types of bundles).  So we look for any
    545          prior '.' character and try appending the apple dSYM extension and
    546          subdirectory and see if we find an existing dSYM file (in the above
    547          MyApp example the dSYM would be at either:
    548          "/some/path/MyApp.app.dSYM/Contents/Resources/DWARF/MyApp" or
    549          "/some/path/MyApp.dSYM/Contents/Resources/DWARF/MyApp".  */
    550       VG_(strcpy) (dsymfile, VG_(dirname) (executable_name));
    551       while ((dot_ptr = VG_(strrchr) (dsymfile, '.')))
    552       {
    553          /* Find the directory delimiter that follows the '.' character since
    554             we now look for a .dSYM that follows any bundle extension.  */
    555          slash_ptr = VG_(strchr) (dot_ptr, '/');
    556          if (slash_ptr)
    557          {
    558              /* NULL terminate the string at the '/' character and append
    559                 the path down to the dSYM file.  */
    560             *slash_ptr = '\0';
    561             VG_(strcat) (slash_ptr, APPLE_DSYM_EXT_AND_SUBDIRECTORY);
    562             VG_(strcat) (slash_ptr, basename_str);
    563             if (file_exists_p (dsymfile))
    564                return dsymfile;
    565          }
    566 
    567          /* NULL terminate the string at the '.' character and append
    568             the path down to the dSYM file.  */
    569          *dot_ptr = '\0';
    570          VG_(strcat) (dot_ptr, APPLE_DSYM_EXT_AND_SUBDIRECTORY);
    571          VG_(strcat) (dot_ptr, basename_str);
    572          if (file_exists_p (dsymfile))
    573             return dsymfile;
    574 
    575          /* NULL terminate the string at the '.' locatated by the strrchr()
    576             function again.  */
    577          *dot_ptr = '\0';
    578 
    579          /* We found a previous extension '.' character and did not find a
    580             dSYM file so now find previous directory delimiter so we don't
    581             try multiple times on a file name that may have a version number
    582             in it such as "/some/path/MyApp.6.0.4.app".  */
    583          slash_ptr = VG_(strrchr) (dsymfile, '/');
    584          if (!slash_ptr)
    585             break;
    586          /* NULL terminate the string at the previous directory character
    587             and search again.  */
    588          *slash_ptr = '\0';
    589       }
    590    }
    591 
    592    return NULL;
    593 }
    594 
    595 
    596 static UChar *getsectdata(UChar* base, SizeT size,
    597                           Char *segname, Char *sectname,
    598                           /*OUT*/Word *sect_size)
    599 {
    600    struct MACH_HEADER *mh = (struct MACH_HEADER *)base;
    601    struct load_command *cmd;
    602    Int c;
    603 
    604    for (c = 0, cmd = (struct load_command *)(mh+1);
    605         c < mh->ncmds;
    606         c++, cmd = (struct load_command *)(cmd->cmdsize + (Addr)cmd))
    607    {
    608       if (cmd->cmd == LC_SEGMENT_CMD) {
    609          struct SEGMENT_COMMAND *seg = (struct SEGMENT_COMMAND *)cmd;
    610          if (0 == VG_(strncmp(seg->segname, segname, sizeof(seg->segname)))) {
    611             struct SECTION *sects = (struct SECTION *)(seg+1);
    612             Int s;
    613             for (s = 0; s < seg->nsects; s++) {
    614                if (0 == VG_(strncmp(sects[s].sectname, sectname,
    615                                     sizeof(sects[s].sectname))))
    616                {
    617                   if (sect_size) *sect_size = sects[s].size;
    618                   return (UChar *)(base + sects[s].offset);
    619                }
    620             }
    621          }
    622       }
    623    }
    624 
    625    if (sect_size) *sect_size = 0;
    626    return 0;
    627 }
    628 
    629 
    630 /* Brute force just simply search for uuid[0..15] in img[0..n_img-1] */
    631 static Bool check_uuid_matches ( Addr imgA, Word n_img, UChar* uuid )
    632 {
    633    Word   i;
    634    UChar* img = (UChar*)imgA;
    635    UChar  first = uuid[0];
    636    if (n_img < 16)
    637       return False;
    638    for (i = 0; i < n_img-16; i++) {
    639       if (img[i] != first)
    640          continue;
    641       if (0 == VG_(memcmp)( &img[i], &uuid[0], 16 ))
    642          return True;
    643    }
    644    return False;
    645 }
    646 
    647 
    648 /* Heuristic kludge: return True if this looks like an installed
    649    standard library; hence we shouldn't consider automagically running
    650    dsymutil on it. */
    651 static Bool is_systemish_library_name ( UChar* name )
    652 {
    653    vg_assert(name);
    654    if (0 == VG_(strncasecmp)(name, "/usr/", 5)
    655        || 0 == VG_(strncasecmp)(name, "/bin/", 5)
    656        || 0 == VG_(strncasecmp)(name, "/sbin/", 6)
    657        || 0 == VG_(strncasecmp)(name, "/opt/", 5)
    658        || 0 == VG_(strncasecmp)(name, "/sw/", 4)
    659        || 0 == VG_(strncasecmp)(name, "/System/", 8)
    660        || 0 == VG_(strncasecmp)(name, "/Library/", 9)
    661        || 0 == VG_(strncasecmp)(name, "/Applications/", 14)) {
    662       return True;
    663    } else {
    664       return False;
    665    }
    666 }
    667 
    668 
    669 Bool ML_(read_macho_debug_info)( struct _DebugInfo* di )
    670 {
    671    struct symtab_command *symcmd = NULL;
    672    struct dysymtab_command *dysymcmd = NULL;
    673    HChar* dsymfilename = NULL;
    674    Bool have_uuid = False;
    675    UChar uuid[16];
    676    ImageInfo ii;  /* main file */
    677    ImageInfo iid; /* auxiliary .dSYM file */
    678    Bool ok;
    679 
    680    /* mmap the object file to look for di->soname and di->text_bias
    681       and uuid and nlist and STABS */
    682 
    683    if (VG_(clo_verbosity) > 1)
    684       VG_(message)(Vg_DebugMsg,
    685                    "%s (%#lx)\n", di->fsm.filename, di->fsm.rx_map_avma );
    686 
    687    /* This should be ensured by our caller (that we're in the accept
    688       state). */
    689    vg_assert(di->fsm.have_rx_map);
    690    vg_assert(di->fsm.have_rw_map);
    691 
    692    VG_(memset)(&ii,   0, sizeof(ii));
    693    VG_(memset)(&iid,  0, sizeof(iid));
    694    VG_(memset)(&uuid, 0, sizeof(uuid));
    695 
    696    ok = map_image_aboard( di, &ii, di->fsm.filename );
    697    if (!ok) goto fail;
    698 
    699    vg_assert(ii.macho_img != NULL && ii.macho_img_szB > 0);
    700 
    701    /* Poke around in the Mach-O header, to find some important
    702       stuff. */
    703    // Find LC_SYMTAB and LC_DYSYMTAB, if present.
    704    // Read di->soname from LC_ID_DYLIB if present,
    705    //    or from LC_ID_DYLINKER if present,
    706    //    or use "NONE".
    707    // Get di->text_bias (aka slide) based on the corresponding LC_SEGMENT
    708    // Get uuid for later dsym search
    709 
    710    di->text_bias = 0;
    711 
    712    { struct MACH_HEADER *mh = (struct MACH_HEADER *)ii.macho_img;
    713       struct load_command *cmd;
    714       Int c;
    715 
    716       for (c = 0, cmd = (struct load_command *)(mh+1);
    717            c < mh->ncmds;
    718            c++, cmd = (struct load_command *)(cmd->cmdsize
    719                                               + (unsigned long)cmd)) {
    720          if (cmd->cmd == LC_SYMTAB) {
    721             symcmd = (struct symtab_command *)cmd;
    722          }
    723          else if (cmd->cmd == LC_DYSYMTAB) {
    724             dysymcmd = (struct dysymtab_command *)cmd;
    725          }
    726          else if (cmd->cmd == LC_ID_DYLIB && mh->filetype == MH_DYLIB) {
    727             // GrP fixme bundle?
    728             struct dylib_command *dcmd = (struct dylib_command *)cmd;
    729             UChar *dylibname = dcmd->dylib.name.offset + (UChar *)dcmd;
    730             UChar *soname = VG_(strrchr)(dylibname, '/');
    731             if (!soname) soname = dylibname;
    732             else soname++;
    733             di->soname = ML_(dinfo_strdup)("di.readmacho.dylibname",
    734                                            soname);
    735          }
    736          else if (cmd->cmd==LC_ID_DYLINKER  &&  mh->filetype==MH_DYLINKER) {
    737             struct dylinker_command *dcmd = (struct dylinker_command *)cmd;
    738             UChar *dylinkername = dcmd->name.offset + (UChar *)dcmd;
    739             UChar *soname = VG_(strrchr)(dylinkername, '/');
    740             if (!soname) soname = dylinkername;
    741             else soname++;
    742             di->soname = ML_(dinfo_strdup)("di.readmacho.dylinkername",
    743                                            soname);
    744          }
    745 
    746          // A comment from Julian about why varinfo[35] fail:
    747          //
    748          // My impression is, from comparing the output of otool -l for these
    749          // executables with the logic in ML_(read_macho_debug_info),
    750          // specifically the part that begins "else if (cmd->cmd ==
    751          // LC_SEGMENT_CMD) {", that it's a complete hack which just happens
    752          // to work ok for text symbols.  In particular, it appears to assume
    753          // that in a "struct load_command" of type LC_SEGMENT_CMD, the first
    754          // "struct SEGMENT_COMMAND" inside it is going to contain the info we
    755          // need.  However, otool -l shows, and also the Apple docs state,
    756          // that a struct load_command may contain an arbitrary number of
    757          // struct SEGMENT_COMMANDs, so I'm not sure why it's OK to merely
    758          // snarf the first.  But I'm not sure about this.
    759          //
    760          // The "Try for __DATA" block below simply adds acquisition of data
    761          // svma/bias values using the same assumption.  It also needs
    762          // (probably) to deal with bss sections, but I don't understand how
    763          // this all ties together really, so it requires further study.
    764          //
    765          // If you can get your head around the relationship between MachO
    766          // segments, sections and load commands, this might be relatively
    767          // easy to fix properly.
    768          //
    769          // Basically we need to come up with plausible numbers for di->
    770          // {text,data,bss}_{avma,svma}, from which the _bias numbers are
    771          // then trivially derived.  Then I think the debuginfo reader should
    772          // work pretty well.
    773          else if (cmd->cmd == LC_SEGMENT_CMD) {
    774             struct SEGMENT_COMMAND *seg = (struct SEGMENT_COMMAND *)cmd;
    775             /* Try for __TEXT */
    776             if (!di->text_present
    777                 && 0 == VG_(strcmp)(seg->segname, "__TEXT")
    778                 /* DDD: is the  next line a kludge? -- JRS */
    779                 && seg->fileoff == 0 && seg->filesize != 0) {
    780                di->text_present = True;
    781                di->text_svma = (Addr)seg->vmaddr;
    782                di->text_avma = di->fsm.rx_map_avma;
    783                di->text_size = seg->vmsize;
    784                di->text_bias = di->text_avma - di->text_svma;
    785                /* Make the _debug_ values be the same as the
    786                   svma/bias for the primary object, since there is
    787                   no secondary (debuginfo) object, but nevertheless
    788                   downstream biasing of Dwarf3 relies on the
    789                   _debug_ values. */
    790                di->text_debug_svma = di->text_svma;
    791                di->text_debug_bias = di->text_bias;
    792             }
    793             /* Try for __DATA */
    794             if (!di->data_present
    795                 && 0 == VG_(strcmp)(seg->segname, "__DATA")
    796                 /* && DDD:seg->fileoff == 0 */ && seg->filesize != 0) {
    797                di->data_present = True;
    798                di->data_svma = (Addr)seg->vmaddr;
    799                di->data_avma = di->fsm.rw_map_avma;
    800                di->data_size = seg->vmsize;
    801                di->data_bias = di->data_avma - di->data_svma;
    802                di->data_debug_svma = di->data_svma;
    803                di->data_debug_bias = di->data_bias;
    804             }
    805          }
    806          else if (cmd->cmd == LC_UUID) {
    807              struct uuid_command *uuid_cmd = (struct uuid_command *)cmd;
    808              VG_(memcpy)(uuid, uuid_cmd->uuid, sizeof(uuid));
    809              have_uuid = True;
    810          }
    811       }
    812    }
    813 
    814    if (!di->soname) {
    815       di->soname = ML_(dinfo_strdup)("di.readmacho.noname", "NONE");
    816    }
    817 
    818    if (di->trace_symtab) {
    819       VG_(printf)("\n");
    820       VG_(printf)("SONAME = %s\n", di->soname);
    821       VG_(printf)("\n");
    822    }
    823 
    824    /* Now we have the base object to hand.  Read symbols from it. */
    825 
    826    if (ii.macho_img && ii.macho_img_szB > 0 && symcmd && dysymcmd) {
    827 
    828       /* Read nlist symbol table */
    829       struct NLIST *syms;
    830       UChar *strs;
    831       XArray* /* DiSym */ candSyms = NULL;
    832       Word i, nCandSyms;
    833 
    834       if (ii.macho_img_szB < symcmd->stroff + symcmd->strsize
    835           || ii.macho_img_szB < symcmd->symoff + symcmd->nsyms
    836                                                  * sizeof(struct NLIST)) {
    837          ML_(symerr)(di, False, "Invalid Mach-O file (5 too small).");
    838          goto fail;
    839       }
    840       if (dysymcmd->ilocalsym + dysymcmd->nlocalsym > symcmd->nsyms
    841           || dysymcmd->iextdefsym + dysymcmd->nextdefsym > symcmd->nsyms) {
    842          ML_(symerr)(di, False, "Invalid Mach-O file (bad symbol table).");
    843          goto fail;
    844       }
    845 
    846       syms = (struct NLIST *)(ii.macho_img + symcmd->symoff);
    847       strs = (UChar *)(ii.macho_img + symcmd->stroff);
    848 
    849       if (VG_(clo_verbosity) > 1)
    850          VG_(message)(Vg_DebugMsg,
    851             "   reading syms   from primary file (%d %d)\n",
    852             dysymcmd->nextdefsym, dysymcmd->nlocalsym );
    853 
    854       /* Read candidate symbols into 'candSyms', so we can truncate
    855          overlapping ends and generally tidy up, before presenting
    856          them to ML_(addSym). */
    857       candSyms = VG_(newXA)(
    858                     ML_(dinfo_zalloc), "di.readmacho.candsyms.1",
    859                     ML_(dinfo_free), sizeof(DiSym)
    860                  );
    861       vg_assert(candSyms);
    862 
    863       // extern symbols
    864       read_symtab(candSyms,
    865                   di,
    866                   syms + dysymcmd->iextdefsym, dysymcmd->nextdefsym,
    867                   strs, symcmd->strsize);
    868       // static and private_extern symbols
    869       read_symtab(candSyms,
    870                   di,
    871                   syms + dysymcmd->ilocalsym, dysymcmd->nlocalsym,
    872                   strs, symcmd->strsize);
    873 
    874       /* tidy up the cand syms -- trim overlapping ends.  May resize
    875          candSyms. */
    876       tidy_up_cand_syms( candSyms, di->trace_symtab );
    877 
    878       /* and finally present them to ML_(addSym) */
    879       nCandSyms = VG_(sizeXA)( candSyms );
    880       for (i = 0; i < nCandSyms; i++) {
    881          DiSym* cand = (DiSym*) VG_(indexXA)( candSyms, i );
    882          vg_assert(cand->pri_name != NULL);
    883          vg_assert(cand->sec_names == NULL);
    884          if (di->trace_symtab)
    885             VG_(printf)("nlist final: acquire  avma %010lx-%010lx  %s\n",
    886                         cand->addr, cand->addr + cand->size - 1,
    887                         cand->pri_name );
    888          ML_(addSym)( di, cand );
    889       }
    890       VG_(deleteXA)( candSyms );
    891    }
    892 
    893    /* If there's no UUID in the primary, don't even bother to try and
    894       read any DWARF, since we won't be able to verify it matches.
    895       Our policy is not to load debug info unless we can verify that
    896       it matches the primary.  Just declare success at this point.
    897       And don't complain to the user, since that would cause us to
    898       complain on objects compiled without -g.  (Some versions of
    899       XCode are observed to omit a UUID entry for object linked(?)
    900       without -g.  Others don't appear to omit it.) */
    901    if (!have_uuid)
    902       goto success;
    903 
    904    /* mmap the dSYM file to look for DWARF debug info.  If successful,
    905       use the .macho_img and .macho_img_szB in iid. */
    906 
    907    dsymfilename = find_separate_debug_file( di->fsm.filename );
    908 
    909    /* Try to load it. */
    910    if (dsymfilename) {
    911       Bool valid;
    912 
    913       if (VG_(clo_verbosity) > 1)
    914          VG_(message)(Vg_DebugMsg, "   dSYM= %s\n", dsymfilename);
    915 
    916       ok = map_image_aboard( di, &iid, dsymfilename );
    917       if (!ok) goto fail;
    918 
    919       /* check it has the right uuid. */
    920       vg_assert(have_uuid);
    921       valid = iid.macho_img && iid.macho_img_szB > 0
    922               && check_uuid_matches( (Addr)iid.macho_img,
    923                                      iid.macho_img_szB, uuid );
    924       if (valid)
    925          goto read_the_dwarf;
    926 
    927       if (VG_(clo_verbosity) > 1)
    928          VG_(message)(Vg_DebugMsg, "   dSYM does not have "
    929                                    "correct UUID (out of date?)\n");
    930    }
    931 
    932    /* There was no dsym file, or it doesn't match.  We'll have to try
    933       regenerating it, unless --dsymutil=no, in which case just complain
    934       instead. */
    935 
    936    /* If this looks like a lib that we shouldn't run dsymutil on, just
    937       give up.  (possible reasons: is system lib, or in /usr etc, or
    938       the dsym dir would not be writable by the user, or we're running
    939       as root) */
    940    vg_assert(di->fsm.filename);
    941    if (is_systemish_library_name(di->fsm.filename))
    942       goto success;
    943 
    944    if (!VG_(clo_dsymutil)) {
    945       if (VG_(clo_verbosity) == 1) {
    946          VG_(message)(Vg_DebugMsg, "%s:\n", di->fsm.filename);
    947       }
    948       if (VG_(clo_verbosity) > 0)
    949          VG_(message)(Vg_DebugMsg, "%sdSYM directory %s; consider using "
    950                       "--dsymutil=yes\n",
    951                       VG_(clo_verbosity) > 1 ? "   " : "",
    952                       dsymfilename ? "has wrong UUID" : "is missing");
    953       goto success;
    954    }
    955 
    956    /* Run dsymutil */
    957 
    958    { Int r;
    959      HChar* dsymutil = "/usr/bin/dsymutil ";
    960      HChar* cmd = ML_(dinfo_zalloc)( "di.readmacho.tmp1",
    961                                      VG_(strlen)(dsymutil)
    962                                      + VG_(strlen)(di->fsm.filename)
    963                                      + 32 /* misc */ );
    964      VG_(strcpy)(cmd, dsymutil);
    965      if (0) VG_(strcat)(cmd, "--verbose ");
    966      VG_(strcat)(cmd, "\"");
    967      VG_(strcat)(cmd, di->fsm.filename);
    968      VG_(strcat)(cmd, "\"");
    969      VG_(message)(Vg_DebugMsg, "run: %s\n", cmd);
    970      r = VG_(system)( cmd );
    971      if (r)
    972         VG_(message)(Vg_DebugMsg, "run: %s FAILED\n", dsymutil);
    973      ML_(dinfo_free)(cmd);
    974      dsymfilename = find_separate_debug_file(di->fsm.filename);
    975    }
    976 
    977    /* Try again to load it. */
    978    if (dsymfilename) {
    979       Bool valid;
    980 
    981       if (VG_(clo_verbosity) > 1)
    982          VG_(message)(Vg_DebugMsg, "   dsyms= %s\n", dsymfilename);
    983 
    984       ok = map_image_aboard( di, &iid, dsymfilename );
    985       if (!ok) goto fail;
    986 
    987       /* check it has the right uuid. */
    988       vg_assert(have_uuid);
    989       valid = iid.macho_img && iid.macho_img_szB > 0
    990               && check_uuid_matches( (Addr)iid.macho_img,
    991                                      iid.macho_img_szB, uuid );
    992       if (!valid) {
    993          if (VG_(clo_verbosity) > 0) {
    994             VG_(message)(Vg_DebugMsg,
    995                "WARNING: did not find expected UUID %02X%02X%02X%02X"
    996                "-%02X%02X-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X"
    997                " in dSYM dir\n",
    998                (UInt)uuid[0], (UInt)uuid[1], (UInt)uuid[2], (UInt)uuid[3],
    999                (UInt)uuid[4], (UInt)uuid[5], (UInt)uuid[6], (UInt)uuid[7],
   1000                (UInt)uuid[8], (UInt)uuid[9], (UInt)uuid[10],
   1001                (UInt)uuid[11], (UInt)uuid[12], (UInt)uuid[13],
   1002                (UInt)uuid[14], (UInt)uuid[15] );
   1003             VG_(message)(Vg_DebugMsg,
   1004                          "WARNING: for %s\n", di->fsm.filename);
   1005          }
   1006          unmap_image( &iid );
   1007          /* unmap_image zeroes the fields, so the following test makes
   1008             sense. */
   1009          goto fail;
   1010       }
   1011    }
   1012 
   1013    /* Right.  Finally we have our best try at the dwarf image, so go
   1014       on to reading stuff out of it. */
   1015 
   1016   read_the_dwarf:
   1017    if (iid.macho_img && iid.macho_img_szB > 0) {
   1018       UChar* debug_info_img = NULL;
   1019       Word   debug_info_sz;
   1020       UChar* debug_abbv_img;
   1021       Word   debug_abbv_sz;
   1022       UChar* debug_line_img;
   1023       Word   debug_line_sz;
   1024       UChar* debug_str_img;
   1025       Word   debug_str_sz;
   1026       UChar* debug_ranges_img;
   1027       Word   debug_ranges_sz;
   1028       UChar* debug_loc_img;
   1029       Word   debug_loc_sz;
   1030       UChar* debug_name_img;
   1031       Word   debug_name_sz;
   1032 
   1033       debug_info_img =
   1034           getsectdata(iid.macho_img, iid.macho_img_szB,
   1035                       "__DWARF", "__debug_info", &debug_info_sz);
   1036       debug_abbv_img =
   1037           getsectdata(iid.macho_img, iid.macho_img_szB,
   1038                       "__DWARF", "__debug_abbrev", &debug_abbv_sz);
   1039       debug_line_img =
   1040           getsectdata(iid.macho_img, iid.macho_img_szB,
   1041                       "__DWARF", "__debug_line", &debug_line_sz);
   1042       debug_str_img =
   1043           getsectdata(iid.macho_img, iid.macho_img_szB,
   1044                       "__DWARF", "__debug_str", &debug_str_sz);
   1045       debug_ranges_img =
   1046           getsectdata(iid.macho_img, iid.macho_img_szB,
   1047                       "__DWARF", "__debug_ranges", &debug_ranges_sz);
   1048       debug_loc_img =
   1049           getsectdata(iid.macho_img, iid.macho_img_szB,
   1050                       "__DWARF", "__debug_loc", &debug_loc_sz);
   1051       debug_name_img =
   1052           getsectdata(iid.macho_img, iid.macho_img_szB,
   1053                       "__DWARF", "__debug_pubnames", &debug_name_sz);
   1054 
   1055       if (debug_info_img) {
   1056          if (VG_(clo_verbosity) > 1) {
   1057             if (0)
   1058             VG_(message)(Vg_DebugMsg,
   1059                          "Reading dwarf3 for %s (%#lx) from %s"
   1060                          " (%ld %ld %ld %ld %ld %ld)\n",
   1061                          di->fsm.filename, di->text_avma, dsymfilename,
   1062                          debug_info_sz, debug_abbv_sz, debug_line_sz,
   1063                          debug_str_sz, debug_ranges_sz, debug_loc_sz
   1064                          );
   1065             VG_(message)(Vg_DebugMsg,
   1066                "   reading dwarf3 from dsyms file\n");
   1067          }
   1068          /* The old reader: line numbers and unwind info only */
   1069          ML_(read_debuginfo_dwarf3) ( di,
   1070                                       debug_info_img, debug_info_sz,
   1071                                       debug_abbv_img, debug_abbv_sz,
   1072                                       debug_line_img, debug_line_sz,
   1073                                       debug_str_img,  debug_str_sz );
   1074 
   1075          /* The new reader: read the DIEs in .debug_info to acquire
   1076             information on variable types and locations.  But only if
   1077             the tool asks for it, or the user requests it on the
   1078             command line. */
   1079          if (VG_(needs).var_info /* the tool requires it */
   1080              || VG_(clo_read_var_info) /* the user asked for it */) {
   1081             ML_(new_dwarf3_reader)(
   1082                di, debug_info_img,   debug_info_sz,
   1083                    debug_abbv_img,   debug_abbv_sz,
   1084                    debug_line_img,   debug_line_sz,
   1085                    debug_str_img,    debug_str_sz,
   1086                    debug_ranges_img, debug_ranges_sz,
   1087                    debug_loc_img,    debug_loc_sz
   1088             );
   1089          }
   1090       }
   1091    }
   1092 
   1093    if (dsymfilename) ML_(dinfo_free)(dsymfilename);
   1094 
   1095   success:
   1096    if (ii.img)
   1097       unmap_image(&ii);
   1098    if (iid.img)
   1099       unmap_image(&iid);
   1100    return True;
   1101 
   1102    /* NOTREACHED */
   1103 
   1104   fail:
   1105    ML_(symerr)(di, True, "Error reading Mach-O object.");
   1106    if (ii.img)
   1107       unmap_image(&ii);
   1108    if (iid.img)
   1109       unmap_image(&iid);
   1110    return False;
   1111 }
   1112 
   1113 #endif // defined(VGO_darwin)
   1114 
   1115 /*--------------------------------------------------------------------*/
   1116 /*--- end                                                          ---*/
   1117 /*--------------------------------------------------------------------*/
   1118