Home | History | Annotate | Download | only in m_debuginfo
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- Format-neutral storage of and querying of info acquired from ---*/
      4 /*--- ELF/XCOFF stabs/dwarf1/dwarf2/dwarf3 debug info.             ---*/
      5 /*---                                                    storage.c ---*/
      6 /*--------------------------------------------------------------------*/
      7 
      8 /*
      9    This file is part of Valgrind, a dynamic binary instrumentation
     10    framework.
     11 
     12    Copyright (C) 2000-2010 Julian Seward
     13       jseward (at) acm.org
     14 
     15    This program is free software; you can redistribute it and/or
     16    modify it under the terms of the GNU General Public License as
     17    published by the Free Software Foundation; either version 2 of the
     18    License, or (at your option) any later version.
     19 
     20    This program is distributed in the hope that it will be useful, but
     21    WITHOUT ANY WARRANTY; without even the implied warranty of
     22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     23    General Public License for more details.
     24 
     25    You should have received a copy of the GNU General Public License
     26    along with this program; if not, write to the Free Software
     27    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     28    02111-1307, USA.
     29 
     30    The GNU General Public License is contained in the file COPYING.
     31 */
     32 
     33 /* This file manages the data structures built by the debuginfo
     34    system.  These are: the top level SegInfo list.  For each SegInfo,
     35    there are tables for for address-to-symbol mappings,
     36    address-to-src-file/line mappings, and address-to-CFI-info
     37    mappings.
     38 */
     39 
     40 #include "pub_core_basics.h"
     41 #include "pub_core_options.h"      /* VG_(clo_verbosity) */
     42 #include "pub_core_debuginfo.h"
     43 #include "pub_core_libcassert.h"
     44 #include "pub_core_libcbase.h"
     45 #include "pub_core_libcprint.h"
     46 #include "pub_core_xarray.h"
     47 #include "pub_core_oset.h"
     48 
     49 #include "priv_misc.h"         /* dinfo_zalloc/free/strdup */
     50 #include "priv_d3basics.h"     /* ML_(pp_GX) */
     51 #include "priv_tytypes.h"
     52 #include "priv_storage.h"      /* self */
     53 
     54 
     55 /*------------------------------------------------------------*/
     56 /*--- Misc (printing, errors)                              ---*/
     57 /*------------------------------------------------------------*/
     58 
     59 /* Show a non-fatal debug info reading error.  Use vg_panic if
     60    terminal.  'serious' errors are shown regardless of the
     61    verbosity setting. */
     62 void ML_(symerr) ( struct _DebugInfo* di, Bool serious, HChar* msg )
     63 {
     64    /* XML mode hides everything :-( */
     65    if (VG_(clo_xml))
     66       return;
     67 
     68    if (serious) {
     69 
     70       VG_(message)(Vg_DebugMsg, "WARNING: Serious error when "
     71                                 "reading debug info\n");
     72       if (True || VG_(clo_verbosity) < 2) {
     73          /* Need to show what the file name is, at verbosity levels 2
     74             or below, since that won't already have been shown */
     75          VG_(message)(Vg_DebugMsg,
     76                       "When reading debug info from %s:\n",
     77                       (di && di->filename) ? di->filename : (UChar*)"???");
     78       }
     79       VG_(message)(Vg_DebugMsg, "%s\n", msg);
     80 
     81    } else { /* !serious */
     82 
     83       if (VG_(clo_verbosity) >= 2)
     84          VG_(message)(Vg_DebugMsg, "%s\n", msg);
     85 
     86    }
     87 }
     88 
     89 
     90 /* Print a symbol. */
     91 void ML_(ppSym) ( Int idx, DiSym* sym )
     92 {
     93    VG_(printf)( "%5d:  %#8lx .. %#8lx (%d)      %s\n",
     94                 idx,
     95                 sym->addr,
     96                 sym->addr + sym->size - 1, sym->size,
     97 	        sym->name );
     98 }
     99 
    100 /* Print a call-frame-info summary. */
    101 void ML_(ppDiCfSI) ( XArray* /* of CfiExpr */ exprs, DiCfSI* si )
    102 {
    103 #  define SHOW_HOW(_how, _off)                   \
    104       do {                                       \
    105          if (_how == CFIR_UNKNOWN) {             \
    106             VG_(printf)("Unknown");              \
    107          } else                                  \
    108          if (_how == CFIR_SAME) {                \
    109             VG_(printf)("Same");                 \
    110          } else                                  \
    111          if (_how == CFIR_CFAREL) {              \
    112             VG_(printf)("cfa+%d", _off);         \
    113          } else                                  \
    114          if (_how == CFIR_MEMCFAREL) {           \
    115             VG_(printf)("*(cfa+%d)", _off);      \
    116          } else                                  \
    117          if (_how == CFIR_EXPR) {                \
    118             VG_(printf)("{");                    \
    119             ML_(ppCfiExpr)(exprs, _off);         \
    120             VG_(printf)("}");                    \
    121          } else {                                \
    122             vg_assert(0+0);                      \
    123          }                                       \
    124       } while (0)
    125 
    126    VG_(printf)("[%#lx .. %#lx]: ", si->base,
    127                                si->base + (UWord)si->len - 1);
    128    switch (si->cfa_how) {
    129       case CFIC_IA_SPREL:
    130          VG_(printf)("let cfa=oldSP+%d", si->cfa_off);
    131          break;
    132       case CFIC_IA_BPREL:
    133          VG_(printf)("let cfa=oldBP+%d", si->cfa_off);
    134          break;
    135       case CFIC_ARM_R13REL:
    136          VG_(printf)("let cfa=oldR13+%d", si->cfa_off);
    137          break;
    138       case CFIC_ARM_R12REL:
    139          VG_(printf)("let cfa=oldR12+%d", si->cfa_off);
    140          break;
    141       case CFIC_ARM_R11REL:
    142          VG_(printf)("let cfa=oldR11+%d", si->cfa_off);
    143          break;
    144       case CFIC_ARM_R7REL:
    145          VG_(printf)("let cfa=oldR7+%d", si->cfa_off);
    146          break;
    147       case CFIC_EXPR:
    148          VG_(printf)("let cfa={");
    149          ML_(ppCfiExpr)(exprs, si->cfa_off);
    150          VG_(printf)("}");
    151          break;
    152       default:
    153          vg_assert(0);
    154    }
    155 
    156    VG_(printf)(" in RA=");
    157    SHOW_HOW(si->ra_how, si->ra_off);
    158 #  if defined(VGA_x86) || defined(VGA_amd64)
    159    VG_(printf)(" SP=");
    160    SHOW_HOW(si->sp_how, si->sp_off);
    161    VG_(printf)(" BP=");
    162    SHOW_HOW(si->bp_how, si->bp_off);
    163 #  elif defined(VGA_arm)
    164    VG_(printf)(" R14=");
    165    SHOW_HOW(si->r14_how, si->r14_off);
    166    VG_(printf)(" R13=");
    167    SHOW_HOW(si->r13_how, si->r13_off);
    168    VG_(printf)(" R12=");
    169    SHOW_HOW(si->r12_how, si->r12_off);
    170    VG_(printf)(" R11=");
    171    SHOW_HOW(si->r11_how, si->r11_off);
    172    VG_(printf)(" R7=");
    173    SHOW_HOW(si->r7_how, si->r7_off);
    174 #  elif defined(VGA_ppc32) || defined(VGA_ppc64)
    175 #  else
    176 #    error "Unknown arch"
    177 #  endif
    178    VG_(printf)("\n");
    179 #  undef SHOW_HOW
    180 }
    181 
    182 
    183 /*------------------------------------------------------------*/
    184 /*--- Adding stuff                                         ---*/
    185 /*------------------------------------------------------------*/
    186 
    187 /* Add a str to the string table, including terminating zero, and
    188    return pointer to the string in vg_strtab.  Unless it's been seen
    189    recently, in which case we find the old pointer and return that.
    190    This avoids the most egregious duplications.
    191 
    192    JSGF: changed from returning an index to a pointer, and changed to
    193    a chunking memory allocator rather than reallocating, so the
    194    pointers are stable.
    195 */
    196 UChar* ML_(addStr) ( struct _DebugInfo* di, UChar* str, Int len )
    197 {
    198    struct strchunk *chunk;
    199    Int    space_needed;
    200    UChar* p;
    201 
    202    if (len == -1) {
    203       len = VG_(strlen)(str);
    204    } else {
    205       vg_assert(len >= 0);
    206    }
    207 
    208    space_needed = 1 + len;
    209 
    210    // Allocate a new strtab chunk if necessary
    211    if (di->strchunks == NULL ||
    212        (di->strchunks->strtab_used
    213         + space_needed) > SEGINFO_STRCHUNKSIZE) {
    214       chunk = ML_(dinfo_zalloc)("di.storage.addStr.1", sizeof(*chunk));
    215       chunk->strtab_used = 0;
    216       chunk->next = di->strchunks;
    217       di->strchunks = chunk;
    218    }
    219    chunk = di->strchunks;
    220 
    221    p = &chunk->strtab[chunk->strtab_used];
    222    VG_(memcpy)(p, str, len);
    223    chunk->strtab[chunk->strtab_used+len] = '\0';
    224    chunk->strtab_used += space_needed;
    225 
    226    return p;
    227 }
    228 
    229 
    230 /* Add a symbol to the symbol table.
    231 */
    232 void ML_(addSym) ( struct _DebugInfo* di, DiSym* sym )
    233 {
    234    UInt   new_sz, i;
    235    DiSym* new_tab;
    236 
    237    /* Ignore zero-sized syms. */
    238    if (sym->size == 0) return;
    239 
    240    if (di->symtab_used == di->symtab_size) {
    241       new_sz = 2 * di->symtab_size;
    242       if (new_sz == 0) new_sz = 500;
    243       new_tab = ML_(dinfo_zalloc)( "di.storage.addSym.1",
    244                                    new_sz * sizeof(DiSym) );
    245       if (di->symtab != NULL) {
    246          for (i = 0; i < di->symtab_used; i++)
    247             new_tab[i] = di->symtab[i];
    248          ML_(dinfo_free)(di->symtab);
    249       }
    250       di->symtab = new_tab;
    251       di->symtab_size = new_sz;
    252    }
    253 
    254    di->symtab[di->symtab_used] = *sym;
    255    di->symtab_used++;
    256    vg_assert(di->symtab_used <= di->symtab_size);
    257 }
    258 
    259 
    260 /* Resize the symbol table to save memory.
    261 */
    262 void ML_(shrinkSym)( struct _DebugInfo* di )
    263 {
    264    DiSym* new_tab;
    265    UInt new_sz = di->symtab_used;
    266    if (new_sz == di->symtab_size) return;
    267 
    268    new_tab = ML_(dinfo_zalloc)( "di.storage.shrinkSym",
    269                                 new_sz * sizeof(DiSym) );
    270    VG_(memcpy)(new_tab, di->symtab, new_sz * sizeof(DiSym));
    271 
    272    ML_(dinfo_free)(di->symtab);
    273    di->symtab = new_tab;
    274    di->symtab_size = new_sz;
    275 }
    276 
    277 
    278 /* Add a location to the location table.
    279 */
    280 static void addLoc ( struct _DebugInfo* di, DiLoc* loc )
    281 {
    282    UInt   new_sz, i;
    283    DiLoc* new_tab;
    284 
    285    /* Zero-sized locs should have been ignored earlier */
    286    vg_assert(loc->size > 0);
    287 
    288    if (di->loctab_used == di->loctab_size) {
    289       new_sz = 2 * di->loctab_size;
    290       if (new_sz == 0) new_sz = 500;
    291       new_tab = ML_(dinfo_zalloc)( "di.storage.addLoc.1",
    292                                    new_sz * sizeof(DiLoc) );
    293       if (di->loctab != NULL) {
    294          for (i = 0; i < di->loctab_used; i++)
    295             new_tab[i] = di->loctab[i];
    296          ML_(dinfo_free)(di->loctab);
    297       }
    298       di->loctab = new_tab;
    299       di->loctab_size = new_sz;
    300    }
    301 
    302    di->loctab[di->loctab_used] = *loc;
    303    di->loctab_used++;
    304    vg_assert(di->loctab_used <= di->loctab_size);
    305 }
    306 
    307 
    308 /* Resize the lineinfo table to save memory.
    309 */
    310 void ML_(shrinkLineInfo)( struct _DebugInfo* di )
    311 {
    312    DiLoc* new_tab;
    313    UInt new_sz = di->loctab_used;
    314    if (new_sz == di->loctab_size) return;
    315 
    316    new_tab = ML_(dinfo_zalloc)( "di.storage.shrinkLineInfo",
    317                                 new_sz * sizeof(DiLoc) );
    318    VG_(memcpy)(new_tab, di->loctab, new_sz * sizeof(DiLoc));
    319 
    320    ML_(dinfo_free)(di->loctab);
    321    di->loctab = new_tab;
    322    di->loctab_size = new_sz;
    323 }
    324 
    325 
    326 /* Top-level place to call to add a source-location mapping entry.
    327 */
    328 void ML_(addLineInfo) ( struct _DebugInfo* di,
    329                         UChar*   filename,
    330                         UChar*   dirname, /* NULL == directory is unknown */
    331                         Addr     this,
    332                         Addr     next,
    333                         Int      lineno,
    334                         Int      entry /* only needed for debug printing */
    335      )
    336 {
    337    static const Bool debug = False;
    338    DiLoc loc;
    339    Int size = next - this;
    340 
    341    /* Ignore zero-sized locs */
    342    if (this == next) return;
    343 
    344    if (debug)
    345       VG_(printf)( "  src %s %s line %d %#lx-%#lx\n",
    346                    dirname ? dirname : (UChar*)"(unknown)",
    347                    filename, lineno, this, next );
    348 
    349    /* Maximum sanity checking.  Some versions of GNU as do a shabby
    350     * job with stabs entries; if anything looks suspicious, revert to
    351     * a size of 1.  This should catch the instruction of interest
    352     * (since if using asm-level debug info, one instruction will
    353     * correspond to one line, unlike with C-level debug info where
    354     * multiple instructions can map to the one line), but avoid
    355     * catching any other instructions bogusly. */
    356    if (this > next) {
    357        if (VG_(clo_verbosity) > 2) {
    358            VG_(message)(Vg_DebugMsg,
    359                         "warning: line info addresses out of order "
    360                         "at entry %d: 0x%lx 0x%lx\n", entry, this, next);
    361        }
    362        size = 1;
    363    }
    364 
    365    if (size > MAX_LOC_SIZE) {
    366        if (0)
    367        VG_(message)(Vg_DebugMsg,
    368                     "warning: line info address range too large "
    369                     "at entry %d: %d\n", entry, size);
    370        size = 1;
    371    }
    372 
    373    /* Rule out ones which are completely outside the r-x mapped area.
    374       See "Comment_Regarding_Text_Range_Checks" elsewhere in this file
    375       for background and rationale. */
    376    vg_assert(di->have_rx_map && di->have_rw_map);
    377    if (next-1 < di->rx_map_avma
    378        || this >= di->rx_map_avma + di->rx_map_size ) {
    379        if (0)
    380           VG_(message)(Vg_DebugMsg,
    381                        "warning: ignoring line info entry falling "
    382                        "outside current DebugInfo: %#lx %#lx %#lx %#lx\n",
    383                        di->text_avma,
    384                        di->text_avma + di->text_size,
    385                        this, next-1);
    386        return;
    387    }
    388 
    389    vg_assert(lineno >= 0);
    390    if (lineno > MAX_LINENO) {
    391       static Bool complained = False;
    392       if (!complained) {
    393          complained = True;
    394          VG_(message)(Vg_UserMsg,
    395                       "warning: ignoring line info entry with "
    396                       "huge line number (%d)\n", lineno);
    397          VG_(message)(Vg_UserMsg,
    398                       "         Can't handle line numbers "
    399                       "greater than %d, sorry\n", MAX_LINENO);
    400          VG_(message)(Vg_UserMsg,
    401                       "(Nb: this message is only shown once)\n");
    402       }
    403       return;
    404    }
    405 
    406    loc.addr      = this;
    407    loc.size      = (UShort)size;
    408    loc.lineno    = lineno;
    409    loc.filename  = filename;
    410    loc.dirname   = dirname;
    411 
    412    if (0) VG_(message)(Vg_DebugMsg,
    413 		       "addLoc: addr %#lx, size %d, line %d, file %s\n",
    414 		       this,size,lineno,filename);
    415 
    416    addLoc ( di, &loc );
    417 }
    418 
    419 
    420 /* Top-level place to call to add a CFI summary record.  The supplied
    421    DiCfSI is copied. */
    422 void ML_(addDiCfSI) ( struct _DebugInfo* di, DiCfSI* cfsi_orig )
    423 {
    424    static const Bool debug = False;
    425    UInt    new_sz, i;
    426    DiCfSI* new_tab;
    427    SSizeT  delta;
    428 
    429    /* copy the original, so we can mess with it */
    430    DiCfSI cfsi = *cfsi_orig;
    431 
    432    if (debug) {
    433       VG_(printf)("adding DiCfSI: ");
    434       ML_(ppDiCfSI)(di->cfsi_exprs, &cfsi);
    435    }
    436 
    437    /* sanity */
    438    vg_assert(cfsi.len > 0);
    439    /* If this fails, the implication is you have a single procedure
    440       with more than 5 million bytes of code.  Which is pretty
    441       unlikely.  Either that, or the debuginfo reader is somehow
    442       broken.  5 million is of course arbitrary; but it's big enough
    443       to be bigger than the size of any plausible piece of code that
    444       would fall within a single procedure. */
    445    vg_assert(cfsi.len < 5000000);
    446 
    447    vg_assert(di->have_rx_map && di->have_rw_map);
    448    /* If we have an empty r-x mapping (is that possible?) then the
    449       DiCfSI can't possibly fall inside it.  In which case skip. */
    450    if (di->rx_map_size == 0)
    451       return;
    452 
    453    /* Rule out ones which are completely outside the r-x mapped area.
    454       See "Comment_Regarding_Text_Range_Checks" elsewhere in this file
    455       for background and rationale. */
    456    if (cfsi.base + cfsi.len - 1 < di->rx_map_avma
    457        || cfsi.base >= di->rx_map_avma + di->rx_map_size) {
    458       static Int complaints = 10;
    459       if (VG_(clo_trace_cfi) || complaints > 0) {
    460          complaints--;
    461          if (VG_(clo_verbosity) > 1) {
    462             VG_(message)(
    463                Vg_DebugMsg,
    464                "warning: DiCfSI %#lx .. %#lx outside segment %#lx .. %#lx\n",
    465                cfsi.base,
    466                cfsi.base + cfsi.len - 1,
    467                di->text_avma,
    468                di->text_avma + di->text_size - 1
    469             );
    470          }
    471          if (VG_(clo_trace_cfi))
    472             ML_(ppDiCfSI)(di->cfsi_exprs, &cfsi);
    473       }
    474       return;
    475    }
    476 
    477    /* Now we know the range is at least partially inside the r-x
    478       mapped area.  That implies that at least one of the ends of the
    479       range falls inside the area.  If necessary, clip it so it is
    480       completely within the area.  If we don't do this,
    481       check_CFSI_related_invariants() in debuginfo.c (invariant #2)
    482       will fail.  See
    483       "Comment_on_IMPORTANT_CFSI_REPRESENTATIONAL_INVARIANTS" in
    484       priv_storage.h for background. */
    485    if (cfsi.base < di->rx_map_avma) {
    486       /* Lower end is outside the mapped area.  Hence upper end must
    487          be inside it. */
    488       if (0) VG_(printf)("XXX truncate lower\n");
    489       vg_assert(cfsi.base + cfsi.len - 1 >= di->rx_map_avma);
    490       delta = (SSizeT)(di->rx_map_avma - cfsi.base);
    491       vg_assert(delta > 0);
    492       vg_assert(delta < (SSizeT)cfsi.len);
    493       cfsi.base += delta;
    494       cfsi.len -= delta;
    495    }
    496    else
    497    if (cfsi.base + cfsi.len - 1 > di->rx_map_avma + di->rx_map_size - 1) {
    498       /* Upper end is outside the mapped area.  Hence lower end must be
    499          inside it. */
    500       if (0) VG_(printf)("XXX truncate upper\n");
    501       vg_assert(cfsi.base <= di->rx_map_avma + di->rx_map_size - 1);
    502       delta = (SSizeT)( (cfsi.base + cfsi.len - 1)
    503                         - (di->rx_map_avma + di->rx_map_size - 1) );
    504       vg_assert(delta > 0); vg_assert(delta < (SSizeT)cfsi.len);
    505       cfsi.len -= delta;
    506    }
    507 
    508    /* Final checks */
    509 
    510    /* Because: either cfsi was entirely inside the range, in which
    511       case we asserted that len > 0 at the start, OR it fell partially
    512       inside the range, in which case we reduced it by some size
    513       (delta) which is < its original size. */
    514    vg_assert(cfsi.len > 0);
    515 
    516    /* Similar logic applies for the next two assertions. */
    517    vg_assert(cfsi.base >= di->rx_map_avma);
    518    vg_assert(cfsi.base + cfsi.len - 1
    519              <= di->rx_map_avma + di->rx_map_size - 1);
    520 
    521    if (di->cfsi_used == di->cfsi_size) {
    522       new_sz = 2 * di->cfsi_size;
    523       if (new_sz == 0) new_sz = 20;
    524       new_tab = ML_(dinfo_zalloc)( "di.storage.addDiCfSI.1",
    525                                    new_sz * sizeof(DiCfSI) );
    526       if (di->cfsi != NULL) {
    527          for (i = 0; i < di->cfsi_used; i++)
    528             new_tab[i] = di->cfsi[i];
    529          ML_(dinfo_free)(di->cfsi);
    530       }
    531       di->cfsi = new_tab;
    532       di->cfsi_size = new_sz;
    533    }
    534 
    535    di->cfsi[di->cfsi_used] = cfsi;
    536    di->cfsi_used++;
    537    vg_assert(di->cfsi_used <= di->cfsi_size);
    538 }
    539 
    540 
    541 Int ML_(CfiExpr_Undef)( XArray* dst )
    542 {
    543    CfiExpr e;
    544    VG_(memset)( &e, 0, sizeof(e) );
    545    e.tag = Cex_Undef;
    546    return (Int)VG_(addToXA)( dst, &e );
    547 }
    548 Int ML_(CfiExpr_Deref)( XArray* dst, Int ixAddr )
    549 {
    550    CfiExpr e;
    551    VG_(memset)( &e, 0, sizeof(e) );
    552    e.tag = Cex_Deref;
    553    e.Cex.Deref.ixAddr = ixAddr;
    554    return (Int)VG_(addToXA)( dst, &e );
    555 }
    556 Int ML_(CfiExpr_Const)( XArray* dst, UWord con )
    557 {
    558    CfiExpr e;
    559    VG_(memset)( &e, 0, sizeof(e) );
    560    e.tag = Cex_Const;
    561    e.Cex.Const.con = con;
    562    return (Int)VG_(addToXA)( dst, &e );
    563 }
    564 Int ML_(CfiExpr_Binop)( XArray* dst, CfiOp op, Int ixL, Int ixR )
    565 {
    566    CfiExpr e;
    567    VG_(memset)( &e, 0, sizeof(e) );
    568    e.tag = Cex_Binop;
    569    e.Cex.Binop.op  = op;
    570    e.Cex.Binop.ixL = ixL;
    571    e.Cex.Binop.ixR = ixR;
    572    return (Int)VG_(addToXA)( dst, &e );
    573 }
    574 Int ML_(CfiExpr_CfiReg)( XArray* dst, CfiReg reg )
    575 {
    576    CfiExpr e;
    577    VG_(memset)( &e, 0, sizeof(e) );
    578    e.tag = Cex_CfiReg;
    579    e.Cex.CfiReg.reg = reg;
    580    return (Int)VG_(addToXA)( dst, &e );
    581 }
    582 Int ML_(CfiExpr_DwReg)( XArray* dst, Int reg )
    583 {
    584    CfiExpr e;
    585    VG_(memset)( &e, 0, sizeof(e) );
    586    e.tag = Cex_DwReg;
    587    e.Cex.DwReg.reg = reg;
    588    return (Int)VG_(addToXA)( dst, &e );
    589 }
    590 
    591 static void ppCfiOp ( CfiOp op )
    592 {
    593    switch (op) {
    594       case Cop_Add: VG_(printf)("+"); break;
    595       case Cop_Sub: VG_(printf)("-"); break;
    596       case Cop_And: VG_(printf)("&"); break;
    597       case Cop_Mul: VG_(printf)("*"); break;
    598       default:      vg_assert(0);
    599    }
    600 }
    601 
    602 static void ppCfiReg ( CfiReg reg )
    603 {
    604    switch (reg) {
    605       case Creg_IA_SP:   VG_(printf)("xSP"); break;
    606       case Creg_IA_BP:   VG_(printf)("xBP"); break;
    607       case Creg_IA_IP:   VG_(printf)("xIP"); break;
    608       case Creg_ARM_R13: VG_(printf)("R13"); break;
    609       case Creg_ARM_R12: VG_(printf)("R12"); break;
    610       case Creg_ARM_R15: VG_(printf)("R15"); break;
    611       case Creg_ARM_R14: VG_(printf)("R14"); break;
    612       default: vg_assert(0);
    613    }
    614 }
    615 
    616 void ML_(ppCfiExpr)( XArray* src, Int ix )
    617 {
    618    /* VG_(indexXA) checks for invalid src/ix values, so we can
    619       use it indiscriminately. */
    620    CfiExpr* e = (CfiExpr*) VG_(indexXA)( src, ix );
    621    switch (e->tag) {
    622       case Cex_Undef:
    623          VG_(printf)("Undef");
    624          break;
    625       case Cex_Deref:
    626          VG_(printf)("*(");
    627          ML_(ppCfiExpr)(src, e->Cex.Deref.ixAddr);
    628          VG_(printf)(")");
    629          break;
    630       case Cex_Const:
    631          VG_(printf)("0x%lx", e->Cex.Const.con);
    632          break;
    633       case Cex_Binop:
    634          VG_(printf)("(");
    635          ML_(ppCfiExpr)(src, e->Cex.Binop.ixL);
    636          VG_(printf)(")");
    637          ppCfiOp(e->Cex.Binop.op);
    638          VG_(printf)("(");
    639          ML_(ppCfiExpr)(src, e->Cex.Binop.ixR);
    640          VG_(printf)(")");
    641          break;
    642       case Cex_CfiReg:
    643          ppCfiReg(e->Cex.CfiReg.reg);
    644          break;
    645       case Cex_DwReg:
    646          VG_(printf)("dwr%d", e->Cex.DwReg.reg);
    647          break;
    648       default:
    649          VG_(core_panic)("ML_(ppCfiExpr)");
    650          /*NOTREACHED*/
    651          break;
    652    }
    653 }
    654 
    655 
    656 Word ML_(cmp_for_DiAddrRange_range) ( const void* keyV,
    657                                       const void* elemV ) {
    658    const Addr* key = (const Addr*)keyV;
    659    const DiAddrRange* elem = (const DiAddrRange*)elemV;
    660    if (0)
    661       VG_(printf)("cmp_for_DiAddrRange_range: %#lx vs %#lx\n",
    662                   *key, elem->aMin);
    663    if ((*key) < elem->aMin) return -1;
    664    if ((*key) > elem->aMax) return 1;
    665    return 0;
    666 }
    667 
    668 static
    669 void show_scope ( OSet* /* of DiAddrRange */ scope, HChar* who )
    670 {
    671    DiAddrRange* range;
    672    VG_(printf)("Scope \"%s\" = {\n", who);
    673    VG_(OSetGen_ResetIter)( scope );
    674    while (True) {
    675       range = VG_(OSetGen_Next)( scope );
    676       if (!range) break;
    677       VG_(printf)("   %#lx .. %#lx: %lu vars\n", range->aMin, range->aMax,
    678                   range->vars ? VG_(sizeXA)(range->vars) : 0);
    679    }
    680    VG_(printf)("}\n");
    681 }
    682 
    683 /* Add the variable 'var' to 'scope' for the address range [aMin,aMax]
    684    (inclusive of aMin and aMax).  Split existing ranges as required if
    685    aMin or aMax or both don't match existing range boundaries, and add
    686    'var' to all required ranges.  Take great care to preserve the
    687    invariant that the ranges in 'scope' cover the entire address range
    688    exactly once, with no overlaps and no holes. */
    689 static void add_var_to_arange (
    690                /*MOD*/OSet* /* of DiAddrRange */ scope,
    691                Addr aMin,
    692                Addr aMax,
    693                DiVariable* var
    694             )
    695 {
    696    DiAddrRange *first, *last, *range;
    697    /* These xx variables are for assertion checking only; they don't
    698       contribute anything to the actual work of this function. */
    699    DiAddrRange *xxRangep, *xxFirst, *xxLast;
    700    UWord       xxIters;
    701 
    702    vg_assert(aMin <= aMax);
    703 
    704    if (0) VG_(printf)("add_var_to_arange: %#lx .. %#lx\n", aMin, aMax);
    705    if (0) show_scope( scope, "add_var_to_arange(1)" );
    706 
    707    /* See if the lower end of the range (aMin) falls exactly on an
    708       existing range boundary.  If not, find the range it does fall
    709       into, and split it (copying the variables in the process), so
    710       that aMin does exactly fall on a range boundary. */
    711    first = VG_(OSetGen_Lookup)( scope, &aMin );
    712    /* It must be present, since the presented OSet must cover
    713       the entire address range. */
    714    vg_assert(first);
    715    vg_assert(first->aMin <= first->aMax);
    716    vg_assert(first->aMin <= aMin && aMin <= first->aMax);
    717 
    718    /* Fast track common case, which is that the range specified for
    719       the variable exactly coincides with one already-existing
    720       range. */
    721    if (first->aMin == aMin && first->aMax == aMax) {
    722       vg_assert(first->vars);
    723       VG_(addToXA)( first->vars, var );
    724       return;
    725    }
    726 
    727    /* We have to get into splitting ranges, which is complex
    728       and slow. */
    729    if (first->aMin < aMin) {
    730       DiAddrRange* nyu;
    731       /* Ok.  We'll have to split 'first'. */
    732       /* truncate the upper end of 'first' */
    733       Addr tmp = first->aMax;
    734       first->aMax = aMin-1;
    735       vg_assert(first->aMin <= first->aMax);
    736       /* create a new range */
    737       nyu = VG_(OSetGen_AllocNode)( scope, sizeof(DiAddrRange) );
    738       vg_assert(nyu);
    739       nyu->aMin = aMin;
    740       nyu->aMax = tmp;
    741       vg_assert(nyu->aMin <= nyu->aMax);
    742       /* copy vars into it */
    743       vg_assert(first->vars);
    744       nyu->vars = VG_(cloneXA)( "di.storage.avta.1", first->vars );
    745       vg_assert(nyu->vars);
    746       VG_(OSetGen_Insert)( scope, nyu );
    747       first = nyu;
    748    }
    749 
    750    vg_assert(first->aMin == aMin);
    751 
    752    /* Now do exactly the same for the upper end (aMax): if it doesn't
    753       fall on a boundary, cause it to do so by splitting the range it
    754       does currently fall into. */
    755    last = VG_(OSetGen_Lookup)( scope, &aMax );
    756    vg_assert(last->aMin <= last->aMax);
    757    vg_assert(last->aMin <= aMax && aMax <= last->aMax);
    758 
    759    if (aMax < last->aMax) {
    760       DiAddrRange* nyu;
    761       /* We have to split 'last'. */
    762       /* truncate the lower end of 'last' */
    763       Addr tmp = last->aMin;
    764       last->aMin = aMax+1;
    765       vg_assert(last->aMin <= last->aMax);
    766       /* create a new range */
    767       nyu = VG_(OSetGen_AllocNode)( scope, sizeof(DiAddrRange) );
    768       vg_assert(nyu);
    769       nyu->aMin = tmp;
    770       nyu->aMax = aMax;
    771       vg_assert(nyu->aMin <= nyu->aMax);
    772       /* copy vars into it */
    773       vg_assert(last->vars);
    774       nyu->vars = VG_(cloneXA)( "di.storage.avta.2", last->vars );
    775       vg_assert(nyu->vars);
    776       VG_(OSetGen_Insert)( scope, nyu );
    777       last = nyu;
    778    }
    779 
    780    vg_assert(aMax == last->aMax);
    781 
    782    xxFirst = (DiAddrRange*)VG_(OSetGen_Lookup)(scope, &aMin);
    783    xxLast  = (DiAddrRange*)VG_(OSetGen_Lookup)(scope, &aMax);
    784    vg_assert(xxFirst);
    785    vg_assert(xxLast);
    786    vg_assert(xxFirst->aMin == aMin);
    787    vg_assert(xxLast->aMax == aMax);
    788    if (xxFirst != xxLast)
    789       vg_assert(xxFirst->aMax < xxLast->aMin);
    790 
    791    /* Great.  Now we merely need to iterate over the segments from
    792       'first' to 'last' inclusive, and add 'var' to the variable set
    793       of each of them. */
    794    if (0) {
    795       static UWord ctr = 0;
    796       ctr++;
    797       VG_(printf)("ctr = %lu\n", ctr);
    798       if (ctr >= 33263) show_scope( scope, "add_var_to_arange(2)" );
    799    }
    800 
    801    xxIters = 0;
    802    range = xxRangep = NULL;
    803    VG_(OSetGen_ResetIterAt)( scope, &aMin );
    804    while (True) {
    805       xxRangep = range;
    806       range    = VG_(OSetGen_Next)( scope );
    807       if (!range) break;
    808       if (range->aMin > aMax) break;
    809       xxIters++;
    810       if (0) VG_(printf)("have range %#lx %#lx\n",
    811                          range->aMin, range->aMax);
    812 
    813       /* Sanity checks */
    814       if (!xxRangep) {
    815          /* This is the first in the range */
    816          vg_assert(range->aMin == aMin);
    817       } else {
    818          vg_assert(xxRangep->aMax + 1 == range->aMin);
    819       }
    820 
    821       vg_assert(range->vars);
    822       VG_(addToXA)( range->vars, var );
    823    }
    824    /* Done.  We should have seen at least one range. */
    825    vg_assert(xxIters >= 1);
    826    if (xxIters == 1) vg_assert(xxFirst == xxLast);
    827    if (xxFirst == xxLast) vg_assert(xxIters == 1);
    828    vg_assert(xxRangep);
    829    vg_assert(xxRangep->aMax == aMax);
    830    vg_assert(xxRangep == xxLast);
    831 }
    832 
    833 
    834 /* Top-level place to call to add a variable description (as extracted
    835    from a DWARF3 .debug_info section. */
    836 void ML_(addVar)( struct _DebugInfo* di,
    837                   Int    level,
    838                   Addr   aMin,
    839                   Addr   aMax,
    840                   UChar* name, /* in di's .strchunks */
    841                   UWord  typeR, /* a cuOff */
    842                   GExpr* gexpr,
    843                   GExpr* fbGX,
    844                   UChar* fileName, /* where decl'd - may be NULL.
    845                                       in di's .strchunks */
    846                   Int    lineNo, /* where decl'd - may be zero */
    847                   Bool   show )
    848 {
    849    OSet* /* of DiAddrRange */ scope;
    850    DiVariable var;
    851    Bool       all;
    852    TyEnt*     ent;
    853    MaybeULong mul;
    854    HChar*     badness;
    855 
    856    tl_assert(di && di->admin_tyents);
    857 
    858    if (0) {
    859       VG_(printf)("  ML_(addVar): level %d  %#lx-%#lx  %s :: ",
    860                   level, aMin, aMax, name );
    861       ML_(pp_TyEnt_C_ishly)( di->admin_tyents, typeR );
    862       VG_(printf)("\n  Var=");
    863       ML_(pp_GX)(gexpr);
    864       VG_(printf)("\n");
    865       if (fbGX) {
    866          VG_(printf)("  FrB=");
    867          ML_(pp_GX)( fbGX );
    868          VG_(printf)("\n");
    869       } else {
    870          VG_(printf)("  FrB=none\n");
    871       }
    872       VG_(printf)("\n");
    873    }
    874 
    875    vg_assert(level >= 0);
    876    vg_assert(aMin <= aMax);
    877    vg_assert(name);
    878    vg_assert(gexpr);
    879 
    880    ent = ML_(TyEnts__index_by_cuOff)( di->admin_tyents, NULL, typeR);
    881    tl_assert(ent);
    882    vg_assert(ML_(TyEnt__is_type)(ent));
    883 
    884    /* "Comment_Regarding_Text_Range_Checks" (is referred to elsewhere)
    885       ----------------------------------------------------------------
    886       Ignore any variables whose aMin .. aMax (that is, range of text
    887       addresses for which they actually exist) falls outside the text
    888       segment.  Is this indicative of a bug in the reader?  Maybe.
    889       (LATER): instead of restricting strictly to the .text segment,
    890       be a bit more relaxed, and accept any variable whose text range
    891       falls inside the r-x mapped area.  This is useful because .text
    892       is not always the only instruction-carrying segment: others are:
    893       .init .plt __libc_freeres_fn and .fini.  This implicitly assumes
    894       that those extra sections have the same bias as .text, but that
    895       seems a reasonable assumption to me. */
    896    /* This is assured us by top level steering logic in debuginfo.c,
    897       and it is re-checked at the start of
    898       ML_(read_elf_debug_info). */
    899    vg_assert(di->have_rx_map && di->have_rw_map);
    900    if (level > 0
    901        && (aMax < di->rx_map_avma
    902            || aMin >= di->rx_map_avma + di->rx_map_size)) {
    903       if (VG_(clo_verbosity) >= 0) {
    904          VG_(message)(Vg_DebugMsg,
    905             "warning: addVar: in range %#lx .. %#lx outside "
    906             "segment %#lx .. %#lx (%s)\n",
    907             aMin, aMax,
    908             di->text_avma, di->text_avma + di->text_size -1,
    909             name
    910          );
    911       }
    912       return;
    913    }
    914 
    915    /* If the type's size is zero (which can mean unknown size), ignore
    916       it.  We will never be able to actually relate a data address to
    917       a data object with zero size, so there's no point in storing
    918       info on it.  On 32-bit platforms, also reject types whose size
    919       is 2^32 bytes or large.  (It's amazing what junk shows up ..) */
    920    mul = ML_(sizeOfType)(di->admin_tyents, typeR);
    921 
    922    badness = NULL;
    923    if (mul.b != True)
    924       badness = "unknown size";
    925    else if (mul.ul == 0)
    926       badness = "zero size   ";
    927    else if (sizeof(void*) == 4 && mul.ul >= (1ULL<<32))
    928       badness = "implausibly large";
    929 
    930    if (badness) {
    931       static Int complaints = 10;
    932       if (VG_(clo_verbosity) >= 2 && complaints > 0) {
    933          VG_(message)(Vg_DebugMsg, "warning: addVar: %s (%s)\n",
    934                                    badness, name );
    935          complaints--;
    936       }
    937       return;
    938    }
    939 
    940    if (!di->varinfo) {
    941       di->varinfo = VG_(newXA)( ML_(dinfo_zalloc),
    942                                 "di.storage.addVar.1",
    943                                 ML_(dinfo_free),
    944                                 sizeof(OSet*) );
    945    }
    946 
    947    vg_assert(level < 256); /* arbitrary; stay sane */
    948    /* Expand the top level array enough to map this level */
    949    while ( VG_(sizeXA)(di->varinfo) <= level ) {
    950       DiAddrRange* nyu;
    951       scope = VG_(OSetGen_Create)( offsetof(DiAddrRange,aMin),
    952                                    ML_(cmp_for_DiAddrRange_range),
    953                                    ML_(dinfo_zalloc), "di.storage.addVar.2",
    954                                    ML_(dinfo_free) );
    955       vg_assert(scope);
    956       if (0) VG_(printf)("create: scope = %p, adding at %ld\n",
    957                          scope, VG_(sizeXA)(di->varinfo));
    958       VG_(addToXA)( di->varinfo, &scope );
    959       /* Add a single range covering the entire address space.  At
    960          level 0 we require this doesn't get split.  At levels above 0
    961          we require that any additions to it cause it to get split.
    962          All of these invariants get checked both add_var_to_arange
    963          and after reading is complete, in canonicaliseVarInfo. */
    964       nyu = VG_(OSetGen_AllocNode)( scope, sizeof(DiAddrRange) );
    965       vg_assert(nyu);
    966       nyu->aMin = (Addr)0;
    967       nyu->aMax = ~(Addr)0;
    968       nyu->vars = VG_(newXA)( ML_(dinfo_zalloc), "di.storage.addVar.3",
    969                               ML_(dinfo_free),
    970                               sizeof(DiVariable) );
    971       vg_assert(nyu->vars);
    972       VG_(OSetGen_Insert)( scope, nyu );
    973    }
    974 
    975    vg_assert( VG_(sizeXA)(di->varinfo) > level );
    976    scope = *(OSet**)VG_(indexXA)( di->varinfo, level );
    977    vg_assert(scope);
    978 
    979    var.name     = name;
    980    var.typeR    = typeR;
    981    var.gexpr    = gexpr;
    982    var.fbGX     = fbGX;
    983    var.fileName = fileName;
    984    var.lineNo   = lineNo;
    985 
    986    all = aMin == (Addr)0 && aMax == ~(Addr)0;
    987    vg_assert(level == 0 ? all : !all);
    988 
    989    add_var_to_arange( /*MOD*/scope, aMin, aMax, &var );
    990 }
    991 
    992 
    993 /* This really just checks the constructed data structure, as there is
    994    no canonicalisation to do. */
    995 static void canonicaliseVarInfo ( struct _DebugInfo* di )
    996 {
    997    Word i, nInThisScope;
    998 
    999    if (!di->varinfo)
   1000       return;
   1001 
   1002    for (i = 0; i < VG_(sizeXA)(di->varinfo); i++) {
   1003 
   1004       DiAddrRange *range, *rangep;
   1005       OSet* scope = *(OSet**)VG_(indexXA)(di->varinfo, i);
   1006       if (!scope) continue;
   1007 
   1008       /* Deal with the global-scope case. */
   1009       if (i == 0) {
   1010          Addr zero = 0;
   1011          vg_assert(VG_(OSetGen_Size)( scope ) == 1);
   1012          range = VG_(OSetGen_Lookup)( scope, &zero );
   1013          vg_assert(range);
   1014          vg_assert(range->aMin == (Addr)0);
   1015          vg_assert(range->aMax == ~(Addr)0);
   1016          continue;
   1017       }
   1018 
   1019       /* All the rest of this is for the local-scope case. */
   1020       /* iterate over all entries in 'scope' */
   1021       nInThisScope = 0;
   1022       rangep = NULL;
   1023       VG_(OSetGen_ResetIter)(scope);
   1024       while (True) {
   1025          range = VG_(OSetGen_Next)(scope);
   1026          if (!range) {
   1027            /* We just saw the last one.  There must have been at
   1028               least one entry in the range. */
   1029            vg_assert(rangep);
   1030            vg_assert(rangep->aMax == ~(Addr)0);
   1031            break;
   1032          }
   1033 
   1034          vg_assert(range->aMin <= range->aMax);
   1035          vg_assert(range->vars);
   1036 
   1037          if (!rangep) {
   1038            /* This is the first entry in the range. */
   1039            vg_assert(range->aMin == 0);
   1040          } else {
   1041            vg_assert(rangep->aMax + 1 == range->aMin);
   1042          }
   1043 
   1044          rangep = range;
   1045          nInThisScope++;
   1046       } /* iterating over ranges in a given scope */
   1047 
   1048       /* If there's only one entry in this (local) scope, it must
   1049          cover the entire address space (obviously), but it must not
   1050          contain any vars. */
   1051 
   1052       vg_assert(nInThisScope > 0);
   1053       if (nInThisScope == 1) {
   1054          Addr zero = 0;
   1055          vg_assert(VG_(OSetGen_Size)( scope ) == 1);
   1056          range = VG_(OSetGen_Lookup)( scope, &zero );
   1057          vg_assert(range);
   1058          vg_assert(range->aMin == (Addr)0);
   1059          vg_assert(range->aMax == ~(Addr)0);
   1060          vg_assert(range->vars);
   1061          vg_assert(VG_(sizeXA)(range->vars) == 0);
   1062       }
   1063 
   1064    } /* iterate over scopes */
   1065 }
   1066 
   1067 
   1068 /*------------------------------------------------------------*/
   1069 /*--- Canonicalisers                                       ---*/
   1070 /*------------------------------------------------------------*/
   1071 
   1072 /* Sort the symtab by starting address, and emit warnings if any
   1073    symbols have overlapping address ranges.  We use that old chestnut,
   1074    shellsort.  Mash the table around so as to establish the property
   1075    that addresses are in order and the ranges to not overlap.  This
   1076    facilitates using binary search to map addresses to symbols when we
   1077    come to query the table.
   1078 */
   1079 static Int compare_DiSym ( void* va, void* vb )
   1080 {
   1081    DiSym* a = (DiSym*)va;
   1082    DiSym* b = (DiSym*)vb;
   1083    if (a->addr < b->addr) return -1;
   1084    if (a->addr > b->addr) return  1;
   1085    return 0;
   1086 }
   1087 
   1088 
   1089 /* Two symbols have the same address.  Which name do we prefer?  In order:
   1090 
   1091    - Prefer "PMPI_<foo>" over "MPI_<foo>".
   1092 
   1093    - Else, prefer a non-NULL name over a NULL one.
   1094 
   1095    - Else, prefer a non-whitespace name over an all-whitespace name.
   1096 
   1097    - Else, prefer the shorter symbol name.  If the symbol contains a
   1098      version symbol ('@' on Linux, other platforms may differ), which means it
   1099      is versioned, then the length up to the version symbol is used for length
   1100      comparison purposes (so "foo (at) GLIBC_2.4.2" is considered shorter than
   1101      "foobar").
   1102 
   1103    - Else, if two symbols have the same length, prefer a versioned symbol over
   1104      a non-versioned symbol.
   1105 
   1106    - Else, use alphabetical ordering.
   1107 
   1108    - Otherwise, they must be the same;  use the symbol with the lower address.
   1109 
   1110    Very occasionally this goes wrong (eg. 'memcmp' and 'bcmp' are
   1111    aliases in glibc, we choose the 'bcmp' symbol because it's shorter,
   1112    so we can misdescribe memcmp() as bcmp()).  This is hard to avoid.
   1113    It's mentioned in the FAQ file.
   1114  */
   1115 static DiSym* prefersym ( struct _DebugInfo* di, DiSym* a, DiSym* b )
   1116 {
   1117    Word cmp;
   1118    Word vlena, vlenb;		/* length without version */
   1119    const UChar *vpa, *vpb;
   1120 
   1121    Bool preferA = False;
   1122    Bool preferB = False;
   1123 
   1124    vg_assert(a->addr == b->addr);
   1125 
   1126    vlena = VG_(strlen)(a->name);
   1127    vlenb = VG_(strlen)(b->name);
   1128 
   1129 #if defined(VGO_linux) || defined(VGO_aix5)
   1130 #  define VERSION_CHAR '@'
   1131 #elif defined(VGO_darwin)
   1132 #  define VERSION_CHAR '$'
   1133 #else
   1134 #  error Unknown OS
   1135 #endif
   1136 
   1137    vpa = VG_(strchr)(a->name, VERSION_CHAR);
   1138    vpb = VG_(strchr)(b->name, VERSION_CHAR);
   1139 
   1140    if (vpa)
   1141       vlena = vpa - a->name;
   1142    if (vpb)
   1143       vlenb = vpb - b->name;
   1144 
   1145    /* MPI hack: prefer PMPI_Foo over MPI_Foo */
   1146    if (0==VG_(strncmp)(a->name, "MPI_", 4)
   1147        && 0==VG_(strncmp)(b->name, "PMPI_", 5)
   1148        && 0==VG_(strcmp)(a->name, 1+b->name)) {
   1149       preferB = True; goto out;
   1150    }
   1151    if (0==VG_(strncmp)(b->name, "MPI_", 4)
   1152        && 0==VG_(strncmp)(a->name, "PMPI_", 5)
   1153        && 0==VG_(strcmp)(b->name, 1+a->name)) {
   1154       preferA = True; goto out;
   1155    }
   1156 
   1157    /* Prefer non-empty name. */
   1158    if (vlena  &&  !vlenb) {
   1159       preferA = True; goto out;
   1160    }
   1161    if (vlenb  &&  !vlena) {
   1162       preferB = True; goto out;
   1163    }
   1164 
   1165    /* Prefer non-whitespace name. */
   1166    {
   1167       Bool blankA = True;
   1168       Bool blankB = True;
   1169       Char *s;
   1170       s = a->name;
   1171       while (*s) {
   1172          if (!VG_(isspace)(*s++)) {
   1173             blankA = False;
   1174             break;
   1175          }
   1176       }
   1177       s = b->name;
   1178       while (*s) {
   1179          if (!VG_(isspace)(*s++)) {
   1180             blankB = False;
   1181             break;
   1182          }
   1183       }
   1184 
   1185       if (!blankA  &&  blankB) {
   1186          preferA = True; goto out;
   1187       }
   1188       if (!blankB  &&  blankA) {
   1189          preferB = True; goto out;
   1190       }
   1191    }
   1192 
   1193    /* Select the shortest unversioned name */
   1194    if (vlena < vlenb) {
   1195       preferA = True; goto out;
   1196    }
   1197    if (vlenb < vlena) {
   1198       preferB = True; goto out;
   1199    }
   1200 
   1201    /* Equal lengths; select the versioned name */
   1202    if (vpa && !vpb) {
   1203       preferA = True; goto out;
   1204    }
   1205    if (vpb && !vpa) {
   1206       preferB = True; goto out;
   1207    }
   1208 
   1209    /* Either both versioned or neither is versioned; select them
   1210       alphabetically */
   1211    cmp = VG_(strcmp)(a->name, b->name);
   1212    if (cmp < 0) {
   1213       preferA = True; goto out;
   1214    }
   1215    if (cmp > 0) {
   1216       preferB = True; goto out;
   1217    }
   1218 
   1219    /* If we get here, they are the same name. */
   1220 
   1221    /* In this case we could choose either (arbitrarily), but might as
   1222       well choose the one with the lowest DiSym* address, so as to try
   1223       and make the comparison mechanism more stable (a la sorting
   1224       parlance).  Also, skip the diagnostic printing in this case. */
   1225    return a <= b  ? a  : b;
   1226 
   1227    /*NOTREACHED*/
   1228    vg_assert(0);
   1229   out:
   1230    if (preferA && !preferB) {
   1231       TRACE_SYMTAB("sym at %#lx: prefer '%s' to '%s'\n",
   1232                    a->addr, a->name, b->name );
   1233       return a;
   1234    }
   1235    if (preferB && !preferA) {
   1236       TRACE_SYMTAB("sym at %#lx: prefer '%s' to '%s'\n",
   1237                    b->addr, b->name, a->name );
   1238       return b;
   1239    }
   1240    /*NOTREACHED*/
   1241    vg_assert(0);
   1242 }
   1243 
   1244 static void canonicaliseSymtab ( struct _DebugInfo* di )
   1245 {
   1246    Word  i, j, n_merged, n_truncated;
   1247    Addr  s1, s2, e1, e2, p1, p2;
   1248    UChar *n1, *n2;
   1249    Bool t1, t2, f1, f2;
   1250 
   1251 #  define SWAP(ty,aa,bb) \
   1252       do { ty tt = (aa); (aa) = (bb); (bb) = tt; } while (0)
   1253 
   1254    if (di->symtab_used == 0)
   1255       return;
   1256 
   1257    VG_(ssort)(di->symtab, di->symtab_used,
   1258                           sizeof(*di->symtab), compare_DiSym);
   1259 
   1260   cleanup_more:
   1261 
   1262    /* If two symbols have identical address ranges, we pick one
   1263       using prefersym() (see it for details). */
   1264    do {
   1265       n_merged = 0;
   1266       j = di->symtab_used;
   1267       di->symtab_used = 0;
   1268       for (i = 0; i < j; i++) {
   1269          if (i < j-1
   1270              && di->symtab[i].addr   == di->symtab[i+1].addr
   1271              && di->symtab[i].size   == di->symtab[i+1].size
   1272              ) {
   1273             n_merged++;
   1274             /* merge the two into one */
   1275 	    di->symtab[di->symtab_used++]
   1276                = *prefersym(di, &di->symtab[i], &di->symtab[i+1]);
   1277             i++;
   1278          } else {
   1279             di->symtab[di->symtab_used++] = di->symtab[i];
   1280          }
   1281       }
   1282       TRACE_SYMTAB( "canonicaliseSymtab: %ld symbols merged\n", n_merged);
   1283    }
   1284    while (n_merged > 0);
   1285 
   1286    /* Detect and "fix" overlapping address ranges. */
   1287    n_truncated = 0;
   1288 
   1289    for (i = 0; i < ((Word)di->symtab_used) -1; i++) {
   1290 
   1291       vg_assert(di->symtab[i].addr <= di->symtab[i+1].addr);
   1292 
   1293       /* Check for common (no overlap) case. */
   1294       if (di->symtab[i].addr + di->symtab[i].size
   1295           <= di->symtab[i+1].addr)
   1296          continue;
   1297 
   1298       /* There's an overlap.  Truncate one or the other. */
   1299       if (di->trace_symtab) {
   1300          VG_(printf)("overlapping address ranges in symbol table\n\t");
   1301          ML_(ppSym)( i, &di->symtab[i] );
   1302          VG_(printf)("\t");
   1303          ML_(ppSym)( i+1, &di->symtab[i+1] );
   1304          VG_(printf)("\n");
   1305       }
   1306 
   1307       /* Truncate one or the other. */
   1308       s1 = di->symtab[i].addr;
   1309       e1 = s1 + di->symtab[i].size - 1;
   1310       p1 = di->symtab[i].tocptr;
   1311       n1 = di->symtab[i].name;
   1312       t1 = di->symtab[i].isText;
   1313       f1 = di->symtab[i].isIFunc;
   1314       s2 = di->symtab[i+1].addr;
   1315       e2 = s2 + di->symtab[i+1].size - 1;
   1316       p2 = di->symtab[i+1].tocptr;
   1317       n2 = di->symtab[i+1].name;
   1318       t2 = di->symtab[i+1].isText;
   1319       f2 = di->symtab[i+1].isIFunc;
   1320       if (s1 < s2) {
   1321          e1 = s2-1;
   1322       } else {
   1323          vg_assert(s1 == s2);
   1324          if (e1 > e2) {
   1325             s1 = e2+1; SWAP(Addr,s1,s2); SWAP(Addr,e1,e2); SWAP(Addr,p1,p2);
   1326                        SWAP(UChar *,n1,n2); SWAP(Bool,t1,t2);
   1327          } else
   1328          if (e1 < e2) {
   1329             s2 = e1+1;
   1330          } else {
   1331 	   /* e1 == e2.  Identical addr ranges.  We'll eventually wind
   1332               up back at cleanup_more, which will take care of it. */
   1333 	 }
   1334       }
   1335       di->symtab[i].addr    = s1;
   1336       di->symtab[i].size    = e1 - s1 + 1;
   1337       di->symtab[i].tocptr  = p1;
   1338       di->symtab[i].name    = n1;
   1339       di->symtab[i].isText  = t1;
   1340       di->symtab[i].isIFunc = f1;
   1341       di->symtab[i+1].addr    = s2;
   1342       di->symtab[i+1].size    = e2 - s2 + 1;
   1343       di->symtab[i+1].tocptr  = p2;
   1344       di->symtab[i+1].name    = n2;
   1345       di->symtab[i+1].isText  = t2;
   1346       di->symtab[i+1].isIFunc = f2;
   1347       vg_assert(s1 <= s2);
   1348       vg_assert(di->symtab[i].size > 0);
   1349       vg_assert(di->symtab[i+1].size > 0);
   1350       /* It may be that the i+1 entry now needs to be moved further
   1351          along to maintain the address order requirement. */
   1352       j = i+1;
   1353       while (j < ((Word)di->symtab_used)-1
   1354              && di->symtab[j].addr > di->symtab[j+1].addr) {
   1355          SWAP(DiSym,di->symtab[j],di->symtab[j+1]);
   1356          j++;
   1357       }
   1358       n_truncated++;
   1359    }
   1360 
   1361    if (n_truncated > 0) goto cleanup_more;
   1362 
   1363    /* Ensure relevant postconditions hold. */
   1364    for (i = 0; i < ((Word)di->symtab_used)-1; i++) {
   1365       /* No zero-sized symbols. */
   1366       vg_assert(di->symtab[i].size > 0);
   1367       /* In order. */
   1368       vg_assert(di->symtab[i].addr < di->symtab[i+1].addr);
   1369       /* No overlaps. */
   1370       vg_assert(di->symtab[i].addr + di->symtab[i].size - 1
   1371                 < di->symtab[i+1].addr);
   1372    }
   1373 #  undef SWAP
   1374 }
   1375 
   1376 
   1377 /* Sort the location table by starting address.  Mash the table around
   1378    so as to establish the property that addresses are in order and the
   1379    ranges do not overlap.  This facilitates using binary search to map
   1380    addresses to locations when we come to query the table.
   1381 */
   1382 static Int compare_DiLoc ( void* va, void* vb )
   1383 {
   1384    DiLoc* a = (DiLoc*)va;
   1385    DiLoc* b = (DiLoc*)vb;
   1386    if (a->addr < b->addr) return -1;
   1387    if (a->addr > b->addr) return  1;
   1388    return 0;
   1389 }
   1390 
   1391 static void canonicaliseLoctab ( struct _DebugInfo* di )
   1392 {
   1393    Word i, j;
   1394 
   1395 #  define SWAP(ty,aa,bb) \
   1396       do { ty tt = (aa); (aa) = (bb); (bb) = tt; } while (0);
   1397 
   1398    if (di->loctab_used == 0)
   1399       return;
   1400 
   1401    /* Sort by start address. */
   1402    VG_(ssort)(di->loctab, di->loctab_used,
   1403                           sizeof(*di->loctab), compare_DiLoc);
   1404 
   1405    /* If two adjacent entries overlap, truncate the first. */
   1406    for (i = 0; i < ((Word)di->loctab_used)-1; i++) {
   1407       vg_assert(di->loctab[i].size < 10000);
   1408       if (di->loctab[i].addr + di->loctab[i].size > di->loctab[i+1].addr) {
   1409          /* Do this in signed int32 because the actual .size fields
   1410             are only 12 bits. */
   1411          Int new_size = di->loctab[i+1].addr - di->loctab[i].addr;
   1412          if (new_size < 0) {
   1413             di->loctab[i].size = 0;
   1414          } else
   1415          if (new_size > MAX_LOC_SIZE) {
   1416            di->loctab[i].size = MAX_LOC_SIZE;
   1417          } else {
   1418            di->loctab[i].size = (UShort)new_size;
   1419          }
   1420       }
   1421    }
   1422 
   1423    /* Zap any zero-sized entries resulting from the truncation
   1424       process. */
   1425    j = 0;
   1426    for (i = 0; i < (Word)di->loctab_used; i++) {
   1427       if (di->loctab[i].size > 0) {
   1428          if (j != i)
   1429             di->loctab[j] = di->loctab[i];
   1430          j++;
   1431       }
   1432    }
   1433    di->loctab_used = j;
   1434 
   1435    /* Ensure relevant postconditions hold. */
   1436    for (i = 0; i < ((Word)di->loctab_used)-1; i++) {
   1437       /*
   1438       VG_(printf)("%d   (%d) %d 0x%x\n",
   1439                    i, di->loctab[i+1].confident,
   1440                    di->loctab[i+1].size, di->loctab[i+1].addr );
   1441       */
   1442       /* No zero-sized symbols. */
   1443       vg_assert(di->loctab[i].size > 0);
   1444       /* In order. */
   1445       vg_assert(di->loctab[i].addr < di->loctab[i+1].addr);
   1446       /* No overlaps. */
   1447       vg_assert(di->loctab[i].addr + di->loctab[i].size - 1
   1448                 < di->loctab[i+1].addr);
   1449    }
   1450 #  undef SWAP
   1451 }
   1452 
   1453 
   1454 /* Sort the call-frame-info table by starting address.  Mash the table
   1455    around so as to establish the property that addresses are in order
   1456    and the ranges do not overlap.  This facilitates using binary
   1457    search to map addresses to locations when we come to query the
   1458    table.
   1459 
   1460    Also, set cfisi_minaddr and cfisi_maxaddr to be the min and max of
   1461    any of the address ranges contained in cfisi[0 .. cfisi_used-1], so
   1462    as to facilitate rapidly skipping this SegInfo when looking for an
   1463    address which falls outside that range.
   1464 */
   1465 static Int compare_DiCfSI ( void* va, void* vb )
   1466 {
   1467    DiCfSI* a = (DiCfSI*)va;
   1468    DiCfSI* b = (DiCfSI*)vb;
   1469    if (a->base < b->base) return -1;
   1470    if (a->base > b->base) return  1;
   1471    return 0;
   1472 }
   1473 
   1474 void ML_(canonicaliseCFI) ( struct _DebugInfo* di )
   1475 {
   1476    Word  i, j;
   1477    const Addr minAvma = 0;
   1478    const Addr maxAvma = ~minAvma;
   1479 
   1480    /* Note: take care in here.  di->cfsi can be NULL, in which
   1481       case _used and _size fields will be zero. */
   1482    if (di->cfsi == NULL) {
   1483       vg_assert(di->cfsi_used == 0);
   1484       vg_assert(di->cfsi_size == 0);
   1485    }
   1486 
   1487    /* Set cfsi_minavma and cfsi_maxavma to summarise the entire
   1488       address range contained in cfsi[0 .. cfsi_used-1]. */
   1489    di->cfsi_minavma = maxAvma;
   1490    di->cfsi_maxavma = minAvma;
   1491    for (i = 0; i < (Word)di->cfsi_used; i++) {
   1492       Addr here_min = di->cfsi[i].base;
   1493       Addr here_max = di->cfsi[i].base + di->cfsi[i].len - 1;
   1494       if (here_min < di->cfsi_minavma)
   1495          di->cfsi_minavma = here_min;
   1496       if (here_max > di->cfsi_maxavma)
   1497          di->cfsi_maxavma = here_max;
   1498    }
   1499 
   1500    if (di->trace_cfi)
   1501       VG_(printf)("canonicaliseCfiSI: %ld entries, %#lx .. %#lx\n",
   1502                   di->cfsi_used,
   1503 	          di->cfsi_minavma, di->cfsi_maxavma);
   1504 
   1505    /* Sort the cfsi array by base address. */
   1506    VG_(ssort)(di->cfsi, di->cfsi_used, sizeof(*di->cfsi), compare_DiCfSI);
   1507 
   1508    /* If two adjacent entries overlap, truncate the first. */
   1509    for (i = 0; i < (Word)di->cfsi_used-1; i++) {
   1510       if (di->cfsi[i].base + di->cfsi[i].len > di->cfsi[i+1].base) {
   1511          Word new_len = di->cfsi[i+1].base - di->cfsi[i].base;
   1512          /* how could it be otherwise?  The entries are sorted by the
   1513             .base field. */
   1514          vg_assert(new_len >= 0);
   1515 	 vg_assert(new_len <= di->cfsi[i].len);
   1516          di->cfsi[i].len = new_len;
   1517       }
   1518    }
   1519 
   1520    /* Zap any zero-sized entries resulting from the truncation
   1521       process. */
   1522    j = 0;
   1523    for (i = 0; i < (Word)di->cfsi_used; i++) {
   1524       if (di->cfsi[i].len > 0) {
   1525          if (j != i)
   1526             di->cfsi[j] = di->cfsi[i];
   1527          j++;
   1528       }
   1529    }
   1530    /* VG_(printf)("XXXXXXXXXXXXX %d %d\n", di->cfsi_used, j); */
   1531    di->cfsi_used = j;
   1532 
   1533    /* Ensure relevant postconditions hold. */
   1534    for (i = 0; i < (Word)di->cfsi_used; i++) {
   1535       /* No zero-length ranges. */
   1536       vg_assert(di->cfsi[i].len > 0);
   1537       /* Makes sense w.r.t. summary address range */
   1538       vg_assert(di->cfsi[i].base >= di->cfsi_minavma);
   1539       vg_assert(di->cfsi[i].base + di->cfsi[i].len - 1
   1540                 <= di->cfsi_maxavma);
   1541 
   1542       if (i < di->cfsi_used - 1) {
   1543          /*
   1544          if (!(di->cfsi[i].base < di->cfsi[i+1].base)) {
   1545             VG_(printf)("\nOOO cfsis:\n");
   1546             ML_(ppCfiSI)(&di->cfsi[i]);
   1547             ML_(ppCfiSI)(&di->cfsi[i+1]);
   1548          }
   1549          */
   1550          /* In order. */
   1551          vg_assert(di->cfsi[i].base < di->cfsi[i+1].base);
   1552          /* No overlaps. */
   1553          vg_assert(di->cfsi[i].base + di->cfsi[i].len - 1
   1554                    < di->cfsi[i+1].base);
   1555       }
   1556    }
   1557 
   1558 }
   1559 
   1560 
   1561 /* Canonicalise the tables held by 'di', in preparation for use.  Call
   1562    this after finishing adding entries to these tables. */
   1563 void ML_(canonicaliseTables) ( struct _DebugInfo* di )
   1564 {
   1565    canonicaliseSymtab ( di );
   1566    canonicaliseLoctab ( di );
   1567    ML_(canonicaliseCFI) ( di );
   1568    canonicaliseVarInfo ( di );
   1569 }
   1570 
   1571 
   1572 /*------------------------------------------------------------*/
   1573 /*--- Searching the tables                                 ---*/
   1574 /*------------------------------------------------------------*/
   1575 
   1576 /* Find a symbol-table index containing the specified pointer, or -1
   1577    if not found.  Binary search.  */
   1578 
   1579 Word ML_(search_one_symtab) ( struct _DebugInfo* di, Addr ptr,
   1580                               Bool match_anywhere_in_sym,
   1581                               Bool findText )
   1582 {
   1583    Addr a_mid_lo, a_mid_hi;
   1584    Word mid, size,
   1585         lo = 0,
   1586         hi = di->symtab_used-1;
   1587    while (True) {
   1588       /* current unsearched space is from lo to hi, inclusive. */
   1589       if (lo > hi) return -1; /* not found */
   1590       mid      = (lo + hi) / 2;
   1591       a_mid_lo = di->symtab[mid].addr;
   1592       size = ( match_anywhere_in_sym
   1593              ? di->symtab[mid].size
   1594              : 1);
   1595       a_mid_hi = ((Addr)di->symtab[mid].addr) + size - 1;
   1596 
   1597       if (ptr < a_mid_lo) { hi = mid-1; continue; }
   1598       if (ptr > a_mid_hi) { lo = mid+1; continue; }
   1599       vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi);
   1600       /* Found a symbol with the correct address range.  But is it
   1601          of the right kind (text vs data) ? */
   1602       if (  findText   &&   di->symtab[mid].isText  ) return mid;
   1603       if ( (!findText) && (!di->symtab[mid].isText) ) return mid;
   1604       return -1;
   1605    }
   1606 }
   1607 
   1608 
   1609 /* Find a location-table index containing the specified pointer, or -1
   1610    if not found.  Binary search.  */
   1611 
   1612 Word ML_(search_one_loctab) ( struct _DebugInfo* di, Addr ptr )
   1613 {
   1614    Addr a_mid_lo, a_mid_hi;
   1615    Word mid,
   1616         lo = 0,
   1617         hi = di->loctab_used-1;
   1618    while (True) {
   1619       /* current unsearched space is from lo to hi, inclusive. */
   1620       if (lo > hi) return -1; /* not found */
   1621       mid      = (lo + hi) / 2;
   1622       a_mid_lo = di->loctab[mid].addr;
   1623       a_mid_hi = ((Addr)di->loctab[mid].addr) + di->loctab[mid].size - 1;
   1624 
   1625       if (ptr < a_mid_lo) { hi = mid-1; continue; }
   1626       if (ptr > a_mid_hi) { lo = mid+1; continue; }
   1627       vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi);
   1628       return mid;
   1629    }
   1630 }
   1631 
   1632 
   1633 /* Find a CFI-table index containing the specified pointer, or -1
   1634    if not found.  Binary search.  */
   1635 
   1636 Word ML_(search_one_cfitab) ( struct _DebugInfo* di, Addr ptr )
   1637 {
   1638    Addr a_mid_lo, a_mid_hi;
   1639    Word mid, size,
   1640         lo = 0,
   1641         hi = di->cfsi_used-1;
   1642    while (True) {
   1643       /* current unsearched space is from lo to hi, inclusive. */
   1644       if (lo > hi) return -1; /* not found */
   1645       mid      = (lo + hi) / 2;
   1646       a_mid_lo = di->cfsi[mid].base;
   1647       size     = di->cfsi[mid].len;
   1648       a_mid_hi = a_mid_lo + size - 1;
   1649       vg_assert(a_mid_hi >= a_mid_lo);
   1650       if (ptr < a_mid_lo) { hi = mid-1; continue; }
   1651       if (ptr > a_mid_hi) { lo = mid+1; continue; }
   1652       vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi);
   1653       return mid;
   1654    }
   1655 }
   1656 
   1657 
   1658 /* Find a FPO-table index containing the specified pointer, or -1
   1659    if not found.  Binary search.  */
   1660 
   1661 Word ML_(search_one_fpotab) ( struct _DebugInfo* di, Addr ptr )
   1662 {
   1663    Addr const addr = ptr - di->rx_map_avma;
   1664    Addr a_mid_lo, a_mid_hi;
   1665    Word mid, size,
   1666         lo = 0,
   1667         hi = di->fpo_size-1;
   1668    while (True) {
   1669       /* current unsearched space is from lo to hi, inclusive. */
   1670       if (lo > hi) return -1; /* not found */
   1671       mid      = (lo + hi) / 2;
   1672       a_mid_lo = di->fpo[mid].ulOffStart;
   1673       size     = di->fpo[mid].cbProcSize;
   1674       a_mid_hi = a_mid_lo + size - 1;
   1675       vg_assert(a_mid_hi >= a_mid_lo);
   1676       if (addr < a_mid_lo) { hi = mid-1; continue; }
   1677       if (addr > a_mid_hi) { lo = mid+1; continue; }
   1678       vg_assert(addr >= a_mid_lo && addr <= a_mid_hi);
   1679       return mid;
   1680    }
   1681 }
   1682 
   1683 /*--------------------------------------------------------------------*/
   1684 /*--- end                                                          ---*/
   1685 /*--------------------------------------------------------------------*/
   1686