Home | History | Annotate | Download | only in m_debuginfo
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- Read DWARF3/4 ".debug_info" sections (DIE trees).            ---*/
      4 /*---                                                 readdwarf3.c ---*/
      5 /*--------------------------------------------------------------------*/
      6 
      7 /*
      8    This file is part of Valgrind, a dynamic binary instrumentation
      9    framework.
     10 
     11    Copyright (C) 2008-2010 OpenWorks LLP
     12       info (at) open-works.co.uk
     13 
     14    This program is free software; you can redistribute it and/or
     15    modify it under the terms of the GNU General Public License as
     16    published by the Free Software Foundation; either version 2 of the
     17    License, or (at your option) any later version.
     18 
     19    This program is distributed in the hope that it will be useful, but
     20    WITHOUT ANY WARRANTY; without even the implied warranty of
     21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     22    General Public License for more details.
     23 
     24    You should have received a copy of the GNU General Public License
     25    along with this program; if not, write to the Free Software
     26    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     27    02111-1307, USA.
     28 
     29    The GNU General Public License is contained in the file COPYING.
     30 
     31    Neither the names of the U.S. Department of Energy nor the
     32    University of California nor the names of its contributors may be
     33    used to endorse or promote products derived from this software
     34    without prior written permission.
     35 */
     36 
     37 #if defined(VGO_linux) || defined(VGO_darwin)
     38 
     39 /* REFERENCE (without which this code will not make much sense):
     40 
     41    DWARF Debugging Information Format, Version 3,
     42    dated 20 December 2005 (the "D3 spec").
     43 
     44    Available at http://www.dwarfstd.org/Dwarf3.pdf.  There's also a
     45    .doc (MS Word) version, but for some reason the section numbers
     46    between the Word and PDF versions differ by 1 in the first digit.
     47    All section references in this code are to the PDF version.
     48 
     49    CURRENT HACKS:
     50 
     51    DW_TAG_{const,volatile}_type no DW_AT_type is allowed; it is
     52       assumed to mean "const void" or "volatile void" respectively.
     53       GDB appears to interpret them like this, anyway.
     54 
     55    In many cases it is important to know the svma of a CU (the "base
     56    address of the CU", as the D3 spec calls it).  There are some
     57    situations in which the spec implies this value is unknown, but the
     58    Dwarf3 produced by gcc-4.1 seems to assume is not unknown but
     59    merely zero when not explicitly stated.  So we too have to make
     60    that assumption.
     61 
     62    POTENTIAL BUG?  Spotted 6 Sept 08.  Why doesn't
     63    unitary_range_list() bias the resulting range list in the same way
     64    that its more general cousin, get_range_list(), does?  I don't
     65    know.
     66 
     67    TODO, 2008 Feb 17:
     68 
     69    get rid of cu_svma_known and document the assumed-zero svma hack.
     70 
     71    ML_(sizeOfType): differentiate between zero sized types and types
     72    for which the size is unknown.  Is this important?  I don't know.
     73 
     74    DW_AT_array_types: deal with explicit sizes (currently we compute
     75    the size from the bounds and the element size, although that's
     76    fragile, if the bounds incompletely specified, or completely
     77    absent)
     78 
     79    Document reason for difference (by 1) of stack preening depth in
     80    parse_var_DIE vs parse_type_DIE.
     81 
     82    Don't hand to ML_(addVars), vars whose locations are entirely in
     83    registers (DW_OP_reg*).  This is merely a space-saving
     84    optimisation, as ML_(evaluate_Dwarf3_Expr) should handle these
     85    expressions correctly, by failing to evaluate them and hence
     86    effectively ignoring the variable with which they are associated.
     87 
     88    Deal with DW_AT_array_types which have element size != stride
     89 
     90    In some cases, the info for a variable is split between two
     91    different DIEs (generally a declarer and a definer).  We punt on
     92    these.  Could do better here.
     93 
     94    The 'data_bias' argument passed to the expression evaluator
     95    (ML_(evaluate_Dwarf3_Expr)) should really be changed to a
     96    MaybeUWord, to make it clear when we do vs don't know what it is
     97    for the evaluation of an expression.  At the moment zero is passed
     98    for this parameter in the don't know case.  That's a bit fragile
     99    and obscure; using a MaybeUWord would be clearer.
    100 
    101    POTENTIAL PERFORMANCE IMPROVEMENTS:
    102 
    103    Currently, duplicate removal and all other queries for the type
    104    entities array is done using cuOffset-based pointing, which
    105    involves a binary search (VG_(lookupXA)) for each access.  This is
    106    wildly inefficient, although simple.  It would be better to
    107    translate all the cuOffset-based references (iow, all the "R" and
    108    "Rs" fields in the TyEnts in 'tyents') to direct index numbers in
    109    'tyents' right at the start of dedup_types(), and use direct
    110    indexing (VG_(indexXA)) wherever possible after that.
    111 
    112    cmp__XArrays_of_AddrRange is also a performance bottleneck.  Move
    113    VG_(indexXA) into pub_tool_xarray.h so it can be inlined at all use
    114    points, and possibly also make an _UNCHECKED version which skips
    115    the range checks in performance-critical situations such as this.
    116 
    117    Handle interaction between read_DIE and parse_{var,type}_DIE
    118    better.  Currently read_DIE reads the entire DIE just to find where
    119    the end is (and for debug printing), so that it can later reliably
    120    move the cursor to the end regardless of what parse_{var,type}_DIE
    121    do.  This means many DIEs (most, even?) are read twice.  It would
    122    be smarter to make parse_{var,type}_DIE return a Bool indicating
    123    whether or not they advanced the DIE cursor, and only if they
    124    didn't should read_DIE itself read through the DIE.
    125 
    126    ML_(addVar) and add_var_to_arange: quite a lot of DiAddrRanges have
    127    zero variables in their .vars XArray.  Rather than have an XArray
    128    with zero elements (which uses 2 malloc'd blocks), allow the .vars
    129    pointer to be NULL in this case.
    130 
    131    More generally, reduce the amount of memory allocated and freed
    132    while reading Dwarf3 type/variable information.  Even modest (20MB)
    133    objects cause this module to allocate and free hundreds of
    134    thousands of small blocks, and ML_(arena_malloc) and its various
    135    groupies always show up at the top of performance profiles. */
    136 
    137 #include "pub_core_basics.h"
    138 #include "pub_core_debuginfo.h"
    139 #include "pub_core_libcbase.h"
    140 #include "pub_core_libcassert.h"
    141 #include "pub_core_libcprint.h"
    142 #include "pub_core_options.h"
    143 #include "pub_core_tooliface.h"    /* VG_(needs) */
    144 #include "pub_core_xarray.h"
    145 #include "pub_core_wordfm.h"
    146 #include "priv_misc.h"             /* dinfo_zalloc/free */
    147 #include "priv_tytypes.h"
    148 #include "priv_d3basics.h"
    149 #include "priv_storage.h"
    150 #include "priv_readdwarf3.h"       /* self */
    151 
    152 
    153 /*------------------------------------------------------------*/
    154 /*---                                                      ---*/
    155 /*--- Basic machinery for parsing DIEs.                    ---*/
    156 /*---                                                      ---*/
    157 /*------------------------------------------------------------*/
    158 
    159 #define TRACE_D3(format, args...) \
    160    if (td3) { VG_(printf)(format, ## args); }
    161 
    162 #define D3_INVALID_CUOFF  ((UWord)(-1UL))
    163 #define D3_FAKEVOID_CUOFF ((UWord)(-2UL))
    164 
    165 typedef
    166    struct {
    167       UChar* region_start_img;
    168       UWord  region_szB;
    169       UWord  region_next;
    170       void (*barf)( HChar* ) __attribute__((noreturn));
    171       HChar* barfstr;
    172    }
    173    Cursor;
    174 
    175 static inline Bool is_sane_Cursor ( Cursor* c ) {
    176    if (!c)                return False;
    177    if (!c->barf)          return False;
    178    if (!c->barfstr)       return False;
    179    return True;
    180 }
    181 
    182 static void init_Cursor ( Cursor* c,
    183                           UChar*  region_start_img,
    184                           UWord   region_szB,
    185                           UWord   region_next,
    186                           __attribute__((noreturn)) void (*barf)( HChar* ),
    187                           HChar*  barfstr )
    188 {
    189    vg_assert(c);
    190    VG_(memset)(c, 0, sizeof(*c));
    191    c->region_start_img = region_start_img;
    192    c->region_szB       = region_szB;
    193    c->region_next      = region_next;
    194    c->barf             = barf;
    195    c->barfstr          = barfstr;
    196    vg_assert(is_sane_Cursor(c));
    197 }
    198 
    199 static Bool is_at_end_Cursor ( Cursor* c ) {
    200    vg_assert(is_sane_Cursor(c));
    201    return c->region_next >= c->region_szB;
    202 }
    203 
    204 static inline UWord get_position_of_Cursor ( Cursor* c ) {
    205    vg_assert(is_sane_Cursor(c));
    206    return c->region_next;
    207 }
    208 static inline void set_position_of_Cursor ( Cursor* c, UWord pos ) {
    209    c->region_next = pos;
    210    vg_assert(is_sane_Cursor(c));
    211 }
    212 
    213 static /*signed*/Word get_remaining_length_Cursor ( Cursor* c ) {
    214    vg_assert(is_sane_Cursor(c));
    215    return c->region_szB - c->region_next;
    216 }
    217 
    218 static UChar* get_address_of_Cursor ( Cursor* c ) {
    219    vg_assert(is_sane_Cursor(c));
    220    return &c->region_start_img[ c->region_next ];
    221 }
    222 
    223 /* FIXME: document assumptions on endianness for
    224    get_UShort/UInt/ULong. */
    225 static inline UChar get_UChar ( Cursor* c ) {
    226    UChar r;
    227    /* vg_assert(is_sane_Cursor(c)); */
    228    if (c->region_next + sizeof(UChar) > c->region_szB) {
    229       c->barf(c->barfstr);
    230       /*NOTREACHED*/
    231       vg_assert(0);
    232    }
    233    r = * (UChar*) &c->region_start_img[ c->region_next ];
    234    c->region_next += sizeof(UChar);
    235    return r;
    236 }
    237 static UShort get_UShort ( Cursor* c ) {
    238    UShort r;
    239    vg_assert(is_sane_Cursor(c));
    240    if (c->region_next + sizeof(UShort) > c->region_szB) {
    241       c->barf(c->barfstr);
    242       /*NOTREACHED*/
    243       vg_assert(0);
    244    }
    245    r = * (UShort*) &c->region_start_img[ c->region_next ];
    246    c->region_next += sizeof(UShort);
    247    return r;
    248 }
    249 static UInt get_UInt ( Cursor* c ) {
    250    UInt r;
    251    vg_assert(is_sane_Cursor(c));
    252    if (c->region_next + sizeof(UInt) > c->region_szB) {
    253       c->barf(c->barfstr);
    254       /*NOTREACHED*/
    255       vg_assert(0);
    256    }
    257    r = * (UInt*) &c->region_start_img[ c->region_next ];
    258    c->region_next += sizeof(UInt);
    259    return r;
    260 }
    261 static ULong get_ULong ( Cursor* c ) {
    262    ULong r;
    263    vg_assert(is_sane_Cursor(c));
    264    if (c->region_next + sizeof(ULong) > c->region_szB) {
    265       c->barf(c->barfstr);
    266       /*NOTREACHED*/
    267       vg_assert(0);
    268    }
    269    r = * (ULong*) &c->region_start_img[ c->region_next ];
    270    c->region_next += sizeof(ULong);
    271    return r;
    272 }
    273 static inline ULong get_ULEB128 ( Cursor* c ) {
    274    ULong result;
    275    Int   shift;
    276    UChar byte;
    277    /* unroll first iteration */
    278    byte = get_UChar( c );
    279    result = (ULong)(byte & 0x7f);
    280    if (LIKELY(!(byte & 0x80))) return result;
    281    shift = 7;
    282    /* end unroll first iteration */
    283    do {
    284       byte = get_UChar( c );
    285       result |= ((ULong)(byte & 0x7f)) << shift;
    286       shift += 7;
    287    } while (byte & 0x80);
    288    return result;
    289 }
    290 static Long get_SLEB128 ( Cursor* c ) {
    291    ULong  result = 0;
    292    Int    shift = 0;
    293    UChar  byte;
    294    do {
    295       byte = get_UChar(c);
    296       result |= ((ULong)(byte & 0x7f)) << shift;
    297       shift += 7;
    298    } while (byte & 0x80);
    299    if (shift < 64 && (byte & 0x40))
    300       result |= -(1ULL << shift);
    301    return result;
    302 }
    303 
    304 /* Assume 'c' points to the start of a string.  Return the absolute
    305    address of whatever it points at, and advance it past the
    306    terminating zero.  This makes it safe for the caller to then copy
    307    the string with ML_(addStr), since (w.r.t. image overruns) the
    308    process of advancing past the terminating zero will already have
    309    "vetted" the string. */
    310 static UChar* get_AsciiZ ( Cursor* c ) {
    311    UChar  uc;
    312    UChar* res = get_address_of_Cursor(c);
    313    do { uc = get_UChar(c); } while (uc != 0);
    314    return res;
    315 }
    316 
    317 static ULong peek_ULEB128 ( Cursor* c ) {
    318    Word here = c->region_next;
    319    ULong r = get_ULEB128( c );
    320    c->region_next = here;
    321    return r;
    322 }
    323 static UChar peek_UChar ( Cursor* c ) {
    324    Word here = c->region_next;
    325    UChar r = get_UChar( c );
    326    c->region_next = here;
    327    return r;
    328 }
    329 
    330 static ULong get_Dwarfish_UWord ( Cursor* c, Bool is_dw64 ) {
    331    return is_dw64 ? get_ULong(c) : (ULong) get_UInt(c);
    332 }
    333 
    334 static UWord get_UWord ( Cursor* c ) {
    335    vg_assert(sizeof(UWord) == sizeof(void*));
    336    if (sizeof(UWord) == 4) return get_UInt(c);
    337    if (sizeof(UWord) == 8) return get_ULong(c);
    338    vg_assert(0);
    339 }
    340 
    341 /* Read a DWARF3 'Initial Length' field */
    342 static ULong get_Initial_Length ( /*OUT*/Bool* is64,
    343                                   Cursor* c,
    344                                   HChar* barfMsg )
    345 {
    346    ULong w64;
    347    UInt  w32;
    348    *is64 = False;
    349    w32 = get_UInt( c );
    350    if (w32 >= 0xFFFFFFF0 && w32 < 0xFFFFFFFF) {
    351       c->barf( barfMsg );
    352    }
    353    else if (w32 == 0xFFFFFFFF) {
    354       *is64 = True;
    355       w64   = get_ULong( c );
    356    } else {
    357       *is64 = False;
    358       w64 = (ULong)w32;
    359    }
    360    return w64;
    361 }
    362 
    363 
    364 /*------------------------------------------------------------*/
    365 /*---                                                      ---*/
    366 /*--- "CUConst" structure                                  ---*/
    367 /*---                                                      ---*/
    368 /*------------------------------------------------------------*/
    369 
    370 #define N_ABBV_CACHE 32
    371 
    372 /* Holds information that is constant through the parsing of a
    373    Compilation Unit.  This is basically plumbed through to
    374    everywhere. */
    375 typedef
    376    struct {
    377       /* Call here if anything goes wrong */
    378       void (*barf)( HChar* ) __attribute__((noreturn));
    379       /* Is this 64-bit DWARF ? */
    380       Bool   is_dw64;
    381       /* Which DWARF version ?  (2, 3 or 4) */
    382       UShort version;
    383       /* Length of this Compilation Unit, as stated in the
    384          .unit_length :: InitialLength field of the CU Header.
    385          However, this size (as specified by the D3 spec) does not
    386          include the size of the .unit_length field itself, which is
    387          either 4 or 12 bytes (32-bit or 64-bit Dwarf3).  That value
    388          can be obtained through the expression ".is_dw64 ? 12 : 4". */
    389       ULong  unit_length;
    390       /* Offset of start of this unit in .debug_info */
    391       UWord  cu_start_offset;
    392       /* SVMA for this CU.  In the D3 spec, is known as the "base
    393          address of the compilation unit (last para sec 3.1.1).
    394          Needed for (amongst things) interpretation of location-list
    395          values. */
    396       Addr   cu_svma;
    397       Bool   cu_svma_known;
    398       /* The debug_abbreviations table to be used for this Unit */
    399       UChar* debug_abbv;
    400       /* Upper bound on size thereof (an overestimate, in general) */
    401       UWord  debug_abbv_maxszB;
    402       /* Where is .debug_str ? */
    403       UChar* debug_str_img;
    404       UWord  debug_str_sz;
    405       /* Where is .debug_ranges ? */
    406       UChar* debug_ranges_img;
    407       UWord  debug_ranges_sz;
    408       /* Where is .debug_loc ? */
    409       UChar* debug_loc_img;
    410       UWord  debug_loc_sz;
    411       /* Where is .debug_line? */
    412       UChar* debug_line_img;
    413       UWord  debug_line_sz;
    414       /* Where is .debug_info? */
    415       UChar* debug_info_img;
    416       UWord  debug_info_sz;
    417       /* --- Needed so we can add stuff to the string table. --- */
    418       struct _DebugInfo* di;
    419       /* --- a cache for set_abbv_Cursor --- */
    420       /* abbv_code == (ULong)-1 for an unused entry. */
    421       struct { ULong abbv_code; UWord posn; } saC_cache[N_ABBV_CACHE];
    422       UWord saC_cache_queries;
    423       UWord saC_cache_misses;
    424    }
    425    CUConst;
    426 
    427 
    428 /*------------------------------------------------------------*/
    429 /*---                                                      ---*/
    430 /*--- Helper functions for Guarded Expressions             ---*/
    431 /*---                                                      ---*/
    432 /*------------------------------------------------------------*/
    433 
    434 /* Parse the location list starting at img-offset 'debug_loc_offset'
    435    in .debug_loc.  Results are biased with 'svma_of_referencing_CU'
    436    and so I believe are correct SVMAs for the object as a whole.  This
    437    function allocates the UChar*, and the caller must deallocate it.
    438    The resulting block is in so-called Guarded-Expression format.
    439 
    440    Guarded-Expression format is similar but not identical to the DWARF3
    441    location-list format.  The format of each returned block is:
    442 
    443       UChar biasMe;
    444       UChar isEnd;
    445       followed by zero or more of
    446 
    447       (Addr aMin;  Addr aMax;  UShort nbytes;  ..bytes..;  UChar isEnd)
    448 
    449    '..bytes..' is an standard DWARF3 location expression which is
    450    valid when aMin <= pc <= aMax (possibly after suitable biasing).
    451 
    452    The number of bytes in '..bytes..' is nbytes.
    453 
    454    The end of the sequence is marked by an isEnd == 1 value.  All
    455    previous isEnd values must be zero.
    456 
    457    biasMe is 1 if the aMin/aMax fields need this DebugInfo's
    458    text_bias added before use, and 0 if the GX is this is not
    459    necessary (is ready to go).
    460 
    461    Hence the block can be quickly parsed and is self-describing.  Note
    462    that aMax is 1 less than the corresponding value in a DWARF3
    463    location list.  Zero length ranges, with aMax == aMin-1, are not
    464    allowed.
    465 */
    466 /* 2008-sept-12: moved ML_(pp_GX) from here to d3basics.c, where
    467    it more logically belongs. */
    468 
    469 
    470 /* Apply a text bias to a GX. */
    471 static void bias_GX ( /*MOD*/GExpr* gx, struct _DebugInfo* di )
    472 {
    473    UShort nbytes;
    474    Addr*  pA;
    475    UChar* p = &gx->payload[0];
    476    UChar  uc;
    477    uc = *p++; /*biasMe*/
    478    if (uc == 0)
    479       return;
    480    vg_assert(uc == 1);
    481    p[-1] = 0; /* mark it as done */
    482    while (True) {
    483       uc = *p++;
    484       if (uc == 1)
    485          break; /*isEnd*/
    486       vg_assert(uc == 0);
    487       /* t-bias aMin */
    488       pA = (Addr*)p;
    489       *pA += di->text_debug_bias;
    490       p += sizeof(Addr);
    491       /* t-bias aMax */
    492       pA = (Addr*)p;
    493       *pA += di->text_debug_bias;
    494       p += sizeof(Addr);
    495       /* nbytes, and actual expression */
    496       nbytes = * (UShort*)p; p += sizeof(UShort);
    497       p += nbytes;
    498    }
    499 }
    500 
    501 __attribute__((noinline))
    502 static GExpr* make_singleton_GX ( UChar* block, UWord nbytes )
    503 {
    504    SizeT  bytesReqd;
    505    GExpr* gx;
    506    UChar *p, *pstart;
    507 
    508    vg_assert(sizeof(UWord) == sizeof(Addr));
    509    vg_assert(nbytes <= 0xFFFF); /* else we overflow the nbytes field */
    510    bytesReqd
    511       =   sizeof(UChar)  /*biasMe*/    + sizeof(UChar) /*!isEnd*/
    512         + sizeof(UWord)  /*aMin*/      + sizeof(UWord) /*aMax*/
    513         + sizeof(UShort) /*nbytes*/    + nbytes
    514         + sizeof(UChar); /*isEnd*/
    515 
    516    gx = ML_(dinfo_zalloc)( "di.readdwarf3.msGX.1",
    517                            sizeof(GExpr) + bytesReqd );
    518    vg_assert(gx);
    519 
    520    p = pstart = &gx->payload[0];
    521 
    522    * ((UChar*)p)  = 0;          /*biasMe*/ p += sizeof(UChar);
    523    * ((UChar*)p)  = 0;          /*!isEnd*/ p += sizeof(UChar);
    524    * ((Addr*)p)   = 0;          /*aMin*/   p += sizeof(Addr);
    525    * ((Addr*)p)   = ~((Addr)0); /*aMax */  p += sizeof(Addr);
    526    * ((UShort*)p) = (UShort)nbytes; /*nbytes*/ p += sizeof(UShort);
    527    VG_(memcpy)(p, block, nbytes); p += nbytes;
    528    * ((UChar*)p)  = 1;          /*isEnd*/  p += sizeof(UChar);
    529 
    530    vg_assert( (SizeT)(p - pstart) == bytesReqd);
    531    vg_assert( &gx->payload[bytesReqd]
    532               == ((UChar*)gx) + sizeof(GExpr) + bytesReqd );
    533 
    534    return gx;
    535 }
    536 
    537 __attribute__((noinline))
    538 static GExpr* make_general_GX ( CUConst* cc,
    539                                 Bool     td3,
    540                                 UWord    debug_loc_offset,
    541                                 Addr     svma_of_referencing_CU )
    542 {
    543    Addr      base;
    544    Cursor    loc;
    545    XArray*   xa; /* XArray of UChar */
    546    GExpr*    gx;
    547    Word      nbytes;
    548 
    549    vg_assert(sizeof(UWord) == sizeof(Addr));
    550    if (cc->debug_loc_sz == 0)
    551       cc->barf("make_general_GX: .debug_loc is empty/missing");
    552 
    553    init_Cursor( &loc, cc->debug_loc_img,
    554                 cc->debug_loc_sz, 0, cc->barf,
    555                 "Overrun whilst reading .debug_loc section(2)" );
    556    set_position_of_Cursor( &loc, debug_loc_offset );
    557 
    558    TRACE_D3("make_general_GX (.debug_loc_offset = %lu, img = %p) {\n",
    559             debug_loc_offset, get_address_of_Cursor( &loc ) );
    560 
    561    /* Who frees this xa?  It is freed before this fn exits. */
    562    xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.mgGX.1",
    563                     ML_(dinfo_free),
    564                     sizeof(UChar) );
    565 
    566    { UChar c = 1; /*biasMe*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); }
    567 
    568    base = 0;
    569    while (True) {
    570       Bool  acquire;
    571       UWord len;
    572       /* Read a (host-)word pair.  This is something of a hack since
    573          the word size to read is really dictated by the ELF file;
    574          however, we assume we're reading a file with the same
    575          word-sizeness as the host.  Reasonably enough. */
    576       UWord w1 = get_UWord( &loc );
    577       UWord w2 = get_UWord( &loc );
    578 
    579       TRACE_D3("   %08lx %08lx\n", w1, w2);
    580       if (w1 == 0 && w2 == 0)
    581          break; /* end of list */
    582 
    583       if (w1 == -1UL) {
    584          /* new value for 'base' */
    585          base = w2;
    586          continue;
    587       }
    588 
    589       /* else a location expression follows */
    590       /* else enumerate [w1+base, w2+base) */
    591       /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
    592          (sec 2.17.2) */
    593       if (w1 > w2) {
    594          TRACE_D3("negative range is for .debug_loc expr at "
    595                   "file offset %lu\n",
    596                   debug_loc_offset);
    597          cc->barf( "negative range in .debug_loc section" );
    598       }
    599 
    600       /* ignore zero length ranges */
    601       acquire = w1 < w2;
    602       len     = (UWord)get_UShort( &loc );
    603 
    604       if (acquire) {
    605          UWord  w;
    606          UShort s;
    607          UChar  c;
    608          c = 0; /* !isEnd*/
    609          VG_(addBytesToXA)( xa, &c, sizeof(c) );
    610          w = w1    + base + svma_of_referencing_CU;
    611          VG_(addBytesToXA)( xa, &w, sizeof(w) );
    612          w = w2 -1 + base + svma_of_referencing_CU;
    613          VG_(addBytesToXA)( xa, &w, sizeof(w) );
    614          s = (UShort)len;
    615          VG_(addBytesToXA)( xa, &s, sizeof(s) );
    616       }
    617 
    618       while (len > 0) {
    619          UChar byte = get_UChar( &loc );
    620          TRACE_D3("%02x", (UInt)byte);
    621          if (acquire)
    622             VG_(addBytesToXA)( xa, &byte, 1 );
    623          len--;
    624       }
    625       TRACE_D3("\n");
    626    }
    627 
    628    { UChar c = 1; /*isEnd*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); }
    629 
    630    nbytes = VG_(sizeXA)( xa );
    631    vg_assert(nbytes >= 1);
    632 
    633    gx = ML_(dinfo_zalloc)( "di.readdwarf3.mgGX.2", sizeof(GExpr) + nbytes );
    634    vg_assert(gx);
    635    VG_(memcpy)( &gx->payload[0], (UChar*)VG_(indexXA)(xa,0), nbytes );
    636    vg_assert( &gx->payload[nbytes]
    637               == ((UChar*)gx) + sizeof(GExpr) + nbytes );
    638 
    639    VG_(deleteXA)( xa );
    640 
    641    TRACE_D3("}\n");
    642 
    643    return gx;
    644 }
    645 
    646 
    647 /*------------------------------------------------------------*/
    648 /*---                                                      ---*/
    649 /*--- Helper functions for range lists and CU headers      ---*/
    650 /*---                                                      ---*/
    651 /*------------------------------------------------------------*/
    652 
    653 /* Denotes an address range.  Both aMin and aMax are included in the
    654    range; hence a complete range is (0, ~0) and an empty range is any
    655    (X, X-1) for X > 0.*/
    656 typedef
    657    struct { Addr aMin; Addr aMax; }
    658    AddrRange;
    659 
    660 
    661 /* Generate an arbitrary structural total ordering on
    662    XArray* of AddrRange. */
    663 static Word cmp__XArrays_of_AddrRange ( XArray* rngs1, XArray* rngs2 )
    664 {
    665    Word n1, n2, i;
    666    tl_assert(rngs1 && rngs2);
    667    n1 = VG_(sizeXA)( rngs1 );
    668    n2 = VG_(sizeXA)( rngs2 );
    669    if (n1 < n2) return -1;
    670    if (n1 > n2) return 1;
    671    for (i = 0; i < n1; i++) {
    672       AddrRange* rng1 = (AddrRange*)VG_(indexXA)( rngs1, i );
    673       AddrRange* rng2 = (AddrRange*)VG_(indexXA)( rngs2, i );
    674       if (rng1->aMin < rng2->aMin) return -1;
    675       if (rng1->aMin > rng2->aMin) return 1;
    676       if (rng1->aMax < rng2->aMax) return -1;
    677       if (rng1->aMax > rng2->aMax) return 1;
    678    }
    679    return 0;
    680 }
    681 
    682 
    683 __attribute__((noinline))
    684 static XArray* /* of AddrRange */ empty_range_list ( void )
    685 {
    686    XArray* xa; /* XArray of AddrRange */
    687    /* Who frees this xa?  varstack_preen() does. */
    688    xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.erl.1",
    689                     ML_(dinfo_free),
    690                     sizeof(AddrRange) );
    691    return xa;
    692 }
    693 
    694 
    695 __attribute__((noinline))
    696 static XArray* unitary_range_list ( Addr aMin, Addr aMax )
    697 {
    698    XArray*   xa;
    699    AddrRange pair;
    700    vg_assert(aMin <= aMax);
    701    /* Who frees this xa?  varstack_preen() does. */
    702    xa = VG_(newXA)( ML_(dinfo_zalloc),  "di.readdwarf3.url.1",
    703                     ML_(dinfo_free),
    704                     sizeof(AddrRange) );
    705    pair.aMin = aMin;
    706    pair.aMax = aMax;
    707    VG_(addToXA)( xa, &pair );
    708    return xa;
    709 }
    710 
    711 
    712 /* Enumerate the address ranges starting at img-offset
    713    'debug_ranges_offset' in .debug_ranges.  Results are biased with
    714    'svma_of_referencing_CU' and so I believe are correct SVMAs for the
    715    object as a whole.  This function allocates the XArray, and the
    716    caller must deallocate it. */
    717 __attribute__((noinline))
    718 static XArray* /* of AddrRange */
    719        get_range_list ( CUConst* cc,
    720                         Bool     td3,
    721                         UWord    debug_ranges_offset,
    722                         Addr     svma_of_referencing_CU )
    723 {
    724    Addr      base;
    725    Cursor    ranges;
    726    XArray*   xa; /* XArray of AddrRange */
    727    AddrRange pair;
    728 
    729    if (cc->debug_ranges_sz == 0)
    730       cc->barf("get_range_list: .debug_ranges is empty/missing");
    731 
    732    init_Cursor( &ranges, cc->debug_ranges_img,
    733                 cc->debug_ranges_sz, 0, cc->barf,
    734                 "Overrun whilst reading .debug_ranges section(2)" );
    735    set_position_of_Cursor( &ranges, debug_ranges_offset );
    736 
    737    /* Who frees this xa?  varstack_preen() does. */
    738    xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.grl.1", ML_(dinfo_free),
    739                     sizeof(AddrRange) );
    740    base = 0;
    741    while (True) {
    742       /* Read a (host-)word pair.  This is something of a hack since
    743          the word size to read is really dictated by the ELF file;
    744          however, we assume we're reading a file with the same
    745          word-sizeness as the host.  Reasonably enough. */
    746       UWord w1 = get_UWord( &ranges );
    747       UWord w2 = get_UWord( &ranges );
    748 
    749       if (w1 == 0 && w2 == 0)
    750          break; /* end of list. */
    751 
    752       if (w1 == -1UL) {
    753          /* new value for 'base' */
    754          base = w2;
    755          continue;
    756       }
    757 
    758       /* else enumerate [w1+base, w2+base) */
    759       /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
    760          (sec 2.17.2) */
    761       if (w1 > w2)
    762          cc->barf( "negative range in .debug_ranges section" );
    763       if (w1 < w2) {
    764          pair.aMin = w1     + base + svma_of_referencing_CU;
    765          pair.aMax = w2 - 1 + base + svma_of_referencing_CU;
    766          vg_assert(pair.aMin <= pair.aMax);
    767          VG_(addToXA)( xa, &pair );
    768       }
    769    }
    770    return xa;
    771 }
    772 
    773 
    774 /* Parse the Compilation Unit header indicated at 'c' and
    775    initialise 'cc' accordingly. */
    776 static __attribute__((noinline))
    777 void parse_CU_Header ( /*OUT*/CUConst* cc,
    778                        Bool td3,
    779                        Cursor* c,
    780                        UChar* debug_abbv_img, UWord debug_abbv_sz )
    781 {
    782    UChar  address_size;
    783    UWord  debug_abbrev_offset;
    784    Int    i;
    785 
    786    VG_(memset)(cc, 0, sizeof(*cc));
    787    vg_assert(c && c->barf);
    788    cc->barf = c->barf;
    789 
    790    /* initial_length field */
    791    cc->unit_length
    792       = get_Initial_Length( &cc->is_dw64, c,
    793            "parse_CU_Header: invalid initial-length field" );
    794 
    795    TRACE_D3("   Length:        %lld\n", cc->unit_length );
    796 
    797    /* version */
    798    cc->version = get_UShort( c );
    799    if (cc->version != 2 && cc->version != 3 && cc->version != 4)
    800       cc->barf( "parse_CU_Header: is neither DWARF2 nor DWARF3 nor DWARF4" );
    801    TRACE_D3("   Version:       %d\n", (Int)cc->version );
    802 
    803    /* debug_abbrev_offset */
    804    debug_abbrev_offset = get_Dwarfish_UWord( c, cc->is_dw64 );
    805    if (debug_abbrev_offset >= debug_abbv_sz)
    806       cc->barf( "parse_CU_Header: invalid debug_abbrev_offset" );
    807    TRACE_D3("   Abbrev Offset: %ld\n", debug_abbrev_offset );
    808 
    809    /* address size.  If this isn't equal to the host word size, just
    810       give up.  This makes it safe to assume elsewhere that
    811       DW_FORM_addr and DW_FORM_ref_addr can be treated as a host
    812       word. */
    813    address_size = get_UChar( c );
    814    if (address_size != sizeof(void*))
    815       cc->barf( "parse_CU_Header: invalid address_size" );
    816    TRACE_D3("   Pointer Size:  %d\n", (Int)address_size );
    817 
    818    /* Set up so that cc->debug_abbv points to the relevant table for
    819       this CU.  Set the szB so that at least we can't read off the end
    820       of the debug_abbrev section -- potentially (and quite likely)
    821       too big, if this isn't the last table in the section, but at
    822       least it's safe. */
    823    cc->debug_abbv        = debug_abbv_img + debug_abbrev_offset;
    824    cc->debug_abbv_maxszB = debug_abbv_sz  - debug_abbrev_offset;
    825    /* and empty out the set_abbv_Cursor cache */
    826    if (0) VG_(printf)("XXXXXX initialise set_abbv_Cursor cache\n");
    827    for (i = 0; i < N_ABBV_CACHE; i++) {
    828       cc->saC_cache[i].abbv_code = (ULong)-1; /* unused */
    829       cc->saC_cache[i].posn = 0;
    830    }
    831    cc->saC_cache_queries = 0;
    832    cc->saC_cache_misses = 0;
    833 }
    834 
    835 
    836 /* Set up 'c' so it is ready to parse the abbv table entry code
    837    'abbv_code' for this compilation unit.  */
    838 static __attribute__((noinline))
    839 void set_abbv_Cursor ( /*OUT*/Cursor* c, Bool td3,
    840                        CUConst* cc, ULong abbv_code )
    841 {
    842    Int   i;
    843    ULong acode;
    844 
    845    if (abbv_code == 0)
    846       cc->barf("set_abbv_Cursor: abbv_code == 0" );
    847 
    848    /* (ULong)-1 is used to represent an empty cache slot.  So we can't
    849       allow it.  In any case no valid DWARF3 should make a reference
    850       to a negative abbreviation code.  [at least, they always seem to
    851       be numbered upwards from zero as far as I have seen] */
    852    vg_assert(abbv_code != (ULong)-1);
    853 
    854    /* First search the cache. */
    855    if (0) VG_(printf)("XXXXXX search set_abbv_Cursor cache\n");
    856    cc->saC_cache_queries++;
    857    for (i = 0; i < N_ABBV_CACHE; i++) {
    858       /* No need to test the cached abbv_codes for -1 (empty), since
    859          we just asserted that abbv_code is not -1. */
    860      if (cc->saC_cache[i].abbv_code == abbv_code) {
    861         /* Found it.  Cool.  Set up the parser using the cached
    862            position, and move this cache entry 1 step closer to the
    863            front. */
    864         if (0) VG_(printf)("XXXXXX found in set_abbv_Cursor cache\n");
    865         init_Cursor( c, cc->debug_abbv,
    866                      cc->debug_abbv_maxszB, cc->saC_cache[i].posn,
    867                      cc->barf,
    868                      "Overrun whilst parsing .debug_abbrev section(1)" );
    869         if (i > 0) {
    870            ULong t_abbv_code = cc->saC_cache[i].abbv_code;
    871            UWord t_posn = cc->saC_cache[i].posn;
    872            while (i > 0) {
    873               cc->saC_cache[i] = cc->saC_cache[i-1];
    874               cc->saC_cache[0].abbv_code = t_abbv_code;
    875               cc->saC_cache[0].posn = t_posn;
    876               i--;
    877            }
    878         }
    879         return;
    880      }
    881    }
    882 
    883    /* No.  It's not in the cache.  We have to search through
    884       .debug_abbrev, of course taking care to update the cache
    885       when done. */
    886 
    887    cc->saC_cache_misses++;
    888    init_Cursor( c, cc->debug_abbv, cc->debug_abbv_maxszB, 0, cc->barf,
    889                "Overrun whilst parsing .debug_abbrev section(2)" );
    890 
    891    /* Now iterate though the table until we find the requested
    892       entry. */
    893    while (True) {
    894       //ULong atag;
    895       //UInt  has_children;
    896       acode = get_ULEB128( c );
    897       if (acode == 0) break; /* end of the table */
    898       if (acode == abbv_code) break; /* found it */
    899       /*atag         = */ get_ULEB128( c );
    900       /*has_children = */ get_UChar( c );
    901       //TRACE_D3("   %llu      %s    [%s]\n",
    902       //         acode, pp_DW_TAG(atag), pp_DW_children(has_children));
    903       while (True) {
    904          ULong at_name = get_ULEB128( c );
    905          ULong at_form = get_ULEB128( c );
    906          if (at_name == 0 && at_form == 0) break;
    907          //TRACE_D3("    %18s %s\n",
    908          //         pp_DW_AT(at_name), pp_DW_FORM(at_form));
    909       }
    910    }
    911 
    912    if (acode == 0) {
    913       /* Not found.  This is fatal. */
    914       cc->barf("set_abbv_Cursor: abbv_code not found");
    915    }
    916 
    917    /* Otherwise, 'c' is now set correctly to parse the relevant entry,
    918       starting from the abbreviation entry's tag.  So just cache
    919       the result, and return. */
    920    for (i = N_ABBV_CACHE-1; i > N_ABBV_CACHE/2; i--) {
    921       cc->saC_cache[i] = cc->saC_cache[i-1];
    922    }
    923    if (0) VG_(printf)("XXXXXX update set_abbv_Cursor cache\n");
    924    cc->saC_cache[N_ABBV_CACHE/2].abbv_code = abbv_code;
    925    cc->saC_cache[N_ABBV_CACHE/2].posn = get_position_of_Cursor(c);
    926 }
    927 
    928 
    929 /* From 'c', get the Form data into the lowest 1/2/4/8 bytes of *cts.
    930 
    931    If *cts itself contains the entire result, then *ctsSzB is set to
    932    1,2,4 or 8 accordingly and *ctsMemSzB is set to zero.
    933 
    934    Alternatively, the result can be a block of data (in the
    935    transiently mapped-in object, so-called "image" space).  If so then
    936    the lowest sizeof(void*)/8 bytes of *cts hold a pointer to said
    937    image, *ctsSzB is zero, and *ctsMemSzB is the size of the block.
    938 
    939    Unfortunately this means it is impossible to represent a zero-size
    940    image block since that would have *ctsSzB == 0 and *ctsMemSzB == 0
    941    and so is ambiguous (which case it is?)
    942 
    943    Invariant on successful return:
    944       (*ctsSzB > 0 && *ctsMemSzB == 0)
    945       || (*ctsSzB == 0 && *ctsMemSzB > 0)
    946 */
    947 static
    948 void get_Form_contents ( /*OUT*/ULong* cts,
    949                          /*OUT*/Int*   ctsSzB,
    950                          /*OUT*/UWord* ctsMemSzB,
    951                          CUConst* cc, Cursor* c,
    952                          Bool td3, DW_FORM form )
    953 {
    954    *cts       = 0;
    955    *ctsSzB    = 0;
    956    *ctsMemSzB = 0;
    957    switch (form) {
    958       case DW_FORM_data1:
    959          *cts = (ULong)(UChar)get_UChar(c);
    960          *ctsSzB = 1;
    961          TRACE_D3("%u", (UInt)*cts);
    962          break;
    963       case DW_FORM_data2:
    964          *cts = (ULong)(UShort)get_UShort(c);
    965          *ctsSzB = 2;
    966          TRACE_D3("%u", (UInt)*cts);
    967          break;
    968       case DW_FORM_data4:
    969          *cts = (ULong)(UInt)get_UInt(c);
    970          *ctsSzB = 4;
    971          TRACE_D3("%u", (UInt)*cts);
    972          break;
    973       case DW_FORM_data8:
    974          *cts = get_ULong(c);
    975          *ctsSzB = 8;
    976          TRACE_D3("%llu", *cts);
    977          break;
    978       case DW_FORM_sec_offset:
    979          *cts = (ULong)get_Dwarfish_UWord( c, cc->is_dw64 );
    980          *ctsSzB = cc->is_dw64 ? 8 : 4;
    981          TRACE_D3("%llu", *cts);
    982          break;
    983       case DW_FORM_sdata:
    984          *cts = (ULong)(Long)get_SLEB128(c);
    985          *ctsSzB = 8;
    986          TRACE_D3("%lld", (Long)*cts);
    987          break;
    988       case DW_FORM_udata:
    989          *cts = (ULong)(Long)get_ULEB128(c);
    990          *ctsSzB = 8;
    991          TRACE_D3("%llu", (Long)*cts);
    992          break;
    993       case DW_FORM_addr:
    994          /* note, this is a hack.  DW_FORM_addr is defined as getting
    995             a word the size of the target machine as defined by the
    996             address_size field in the CU Header.  However,
    997             parse_CU_Header() rejects all inputs except those for
    998             which address_size == sizeof(Word), hence we can just
    999             treat it as a (host) Word.  */
   1000          *cts = (ULong)(UWord)get_UWord(c);
   1001          *ctsSzB = sizeof(UWord);
   1002          TRACE_D3("0x%lx", (UWord)*cts);
   1003          break;
   1004 
   1005       case DW_FORM_ref_addr:
   1006          /* We make the same word-size assumption as DW_FORM_addr. */
   1007          /* What does this really mean?  From D3 Sec 7.5.4,
   1008             description of "reference", it would appear to reference
   1009             some other DIE, by specifying the offset from the
   1010             beginning of a .debug_info section.  The D3 spec mentions
   1011             that this might be in some other shared object and
   1012             executable.  But I don't see how the name of the other
   1013             object/exe is specified.
   1014 
   1015             At least for the DW_FORM_ref_addrs created by icc11, the
   1016             references seem to be within the same object/executable.
   1017             So for the moment we merely range-check, to see that they
   1018             actually do specify a plausible offset within this
   1019             object's .debug_info, and return the value unchanged.
   1020          */
   1021          *cts = (ULong)(UWord)get_UWord(c);
   1022          *ctsSzB = sizeof(UWord);
   1023          TRACE_D3("0x%lx", (UWord)*cts);
   1024          if (0) VG_(printf)("DW_FORM_ref_addr 0x%lx\n", (UWord)*cts);
   1025          if (/* the following 2 are surely impossible, but ... */
   1026              cc->debug_info_img == NULL || cc->debug_info_sz == 0
   1027              || *cts >= (ULong)cc->debug_info_sz) {
   1028             /* Hmm.  Offset is nonsensical for this object's .debug_info
   1029                section.  Be safe and reject it. */
   1030             cc->barf("get_Form_contents: DW_FORM_ref_addr points "
   1031                      "outside .debug_info");
   1032          }
   1033          break;
   1034 
   1035       case DW_FORM_strp: {
   1036          /* this is an offset into .debug_str */
   1037          UChar* str;
   1038          UWord uw = (UWord)get_Dwarfish_UWord( c, cc->is_dw64 );
   1039          if (cc->debug_str_img == NULL || uw >= cc->debug_str_sz)
   1040             cc->barf("get_Form_contents: DW_FORM_strp "
   1041                      "points outside .debug_str");
   1042          /* FIXME: check the entire string lies inside debug_str,
   1043             not just the first byte of it. */
   1044          str = (UChar*)cc->debug_str_img + uw;
   1045          TRACE_D3("(indirect string, offset: 0x%lx): %s", uw, str);
   1046          *cts = (ULong)(UWord)str;
   1047          *ctsMemSzB = 1 + (ULong)VG_(strlen)(str);
   1048          break;
   1049       }
   1050       case DW_FORM_string: {
   1051          UChar* str = get_AsciiZ(c);
   1052          TRACE_D3("%s", str);
   1053          *cts = (ULong)(UWord)str;
   1054          /* strlen is safe because get_AsciiZ already 'vetted' the
   1055             entire string */
   1056          *ctsMemSzB = 1 + (ULong)VG_(strlen)(str);
   1057          break;
   1058       }
   1059       case DW_FORM_ref1: {
   1060          UChar  u8 = get_UChar(c);
   1061          UWord res = cc->cu_start_offset + (UWord)u8;
   1062          *cts = (ULong)res;
   1063          *ctsSzB = sizeof(UWord);
   1064          TRACE_D3("<%lx>", res);
   1065          break;
   1066       }
   1067       case DW_FORM_ref2: {
   1068          UShort  u16 = get_UShort(c);
   1069          UWord res = cc->cu_start_offset + (UWord)u16;
   1070          *cts = (ULong)res;
   1071          *ctsSzB = sizeof(UWord);
   1072          TRACE_D3("<%lx>", res);
   1073          break;
   1074       }
   1075       case DW_FORM_ref4: {
   1076          UInt  u32 = get_UInt(c);
   1077          UWord res = cc->cu_start_offset + (UWord)u32;
   1078          *cts = (ULong)res;
   1079          *ctsSzB = sizeof(UWord);
   1080          TRACE_D3("<%lx>", res);
   1081          break;
   1082       }
   1083       case DW_FORM_ref8: {
   1084          ULong  u64 = get_ULong(c);
   1085          UWord res = cc->cu_start_offset + (UWord)u64;
   1086          *cts = (ULong)res;
   1087          *ctsSzB = sizeof(UWord);
   1088          TRACE_D3("<%lx>", res);
   1089          break;
   1090       }
   1091       case DW_FORM_ref_udata: {
   1092          ULong  u64 = get_ULEB128(c);
   1093          UWord res = cc->cu_start_offset + (UWord)u64;
   1094          *cts = (ULong)res;
   1095          *ctsSzB = sizeof(UWord);
   1096          TRACE_D3("<%lx>", res);
   1097          break;
   1098       }
   1099       case DW_FORM_flag: {
   1100          UChar u8 = get_UChar(c);
   1101          TRACE_D3("%u", (UInt)u8);
   1102          *cts = (ULong)u8;
   1103          *ctsSzB = 1;
   1104          break;
   1105       }
   1106       case DW_FORM_flag_present:
   1107          TRACE_D3("1");
   1108          *cts = 1;
   1109          *ctsSzB = 1;
   1110          break;
   1111       case DW_FORM_block1: {
   1112          ULong  u64b;
   1113          ULong  u64 = (ULong)get_UChar(c);
   1114          UChar* block = get_address_of_Cursor(c);
   1115          TRACE_D3("%llu byte block: ", u64);
   1116          for (u64b = u64; u64b > 0; u64b--) {
   1117             UChar u8 = get_UChar(c);
   1118             TRACE_D3("%x ", (UInt)u8);
   1119          }
   1120          *cts = (ULong)(UWord)block;
   1121          *ctsMemSzB = (UWord)u64;
   1122          break;
   1123       }
   1124       case DW_FORM_block2: {
   1125          ULong  u64b;
   1126          ULong  u64 = (ULong)get_UShort(c);
   1127          UChar* block = get_address_of_Cursor(c);
   1128          TRACE_D3("%llu byte block: ", u64);
   1129          for (u64b = u64; u64b > 0; u64b--) {
   1130             UChar u8 = get_UChar(c);
   1131             TRACE_D3("%x ", (UInt)u8);
   1132          }
   1133          *cts = (ULong)(UWord)block;
   1134          *ctsMemSzB = (UWord)u64;
   1135          break;
   1136       }
   1137       case DW_FORM_block4: {
   1138          ULong  u64b;
   1139          ULong  u64 = (ULong)get_UInt(c);
   1140          UChar* block = get_address_of_Cursor(c);
   1141          TRACE_D3("%llu byte block: ", u64);
   1142          for (u64b = u64; u64b > 0; u64b--) {
   1143             UChar u8 = get_UChar(c);
   1144             TRACE_D3("%x ", (UInt)u8);
   1145          }
   1146          *cts = (ULong)(UWord)block;
   1147          *ctsMemSzB = (UWord)u64;
   1148          break;
   1149       }
   1150       case DW_FORM_exprloc:
   1151       case DW_FORM_block: {
   1152          ULong  u64b;
   1153          ULong  u64 = (ULong)get_ULEB128(c);
   1154          UChar* block = get_address_of_Cursor(c);
   1155          TRACE_D3("%llu byte block: ", u64);
   1156          for (u64b = u64; u64b > 0; u64b--) {
   1157             UChar u8 = get_UChar(c);
   1158             TRACE_D3("%x ", (UInt)u8);
   1159          }
   1160          *cts = (ULong)(UWord)block;
   1161          *ctsMemSzB = (UWord)u64;
   1162          break;
   1163       }
   1164       case DW_FORM_ref_sig8: {
   1165          ULong  u64b;
   1166          UChar* block = get_address_of_Cursor(c);
   1167          TRACE_D3("8 byte signature: ");
   1168          for (u64b = 8; u64b > 0; u64b--) {
   1169             UChar u8 = get_UChar(c);
   1170             TRACE_D3("%x ", (UInt)u8);
   1171          }
   1172          *cts = (ULong)(UWord)block;
   1173          *ctsMemSzB = 8;
   1174          break;
   1175       }
   1176       case DW_FORM_indirect:
   1177          get_Form_contents (cts, ctsSzB, ctsMemSzB, cc, c, td3,
   1178                             (DW_FORM)get_ULEB128(c));
   1179          return;
   1180 
   1181       default:
   1182          VG_(printf)(
   1183             "get_Form_contents: unhandled %d (%s) at <%lx>\n",
   1184             form, ML_(pp_DW_FORM)(form), get_position_of_Cursor(c));
   1185          c->barf("get_Form_contents: unhandled DW_FORM");
   1186    }
   1187 }
   1188 
   1189 
   1190 /*------------------------------------------------------------*/
   1191 /*---                                                      ---*/
   1192 /*--- Parsing of variable-related DIEs                     ---*/
   1193 /*---                                                      ---*/
   1194 /*------------------------------------------------------------*/
   1195 
   1196 typedef
   1197    struct _TempVar {
   1198       UChar*  name; /* in DebugInfo's .strchunks */
   1199       /* Represent ranges economically.  nRanges is the number of
   1200          ranges.  Cases:
   1201          0: .rngOneMin .rngOneMax .manyRanges are all zero
   1202          1: .rngOneMin .rngOneMax hold the range; .rngMany is NULL
   1203          2: .rngOneMin .rngOneMax are zero; .rngMany holds the ranges.
   1204          This is merely an optimisation to avoid having to allocate
   1205          and free the XArray in the common (98%) of cases where there
   1206          is zero or one address ranges. */
   1207       UWord   nRanges;
   1208       Addr    rngOneMin;
   1209       Addr    rngOneMax;
   1210       XArray* rngMany; /* of AddrRange.  NON-UNIQUE PTR in AR_DINFO. */
   1211       /* Do not free .rngMany, since many TempVars will have the same
   1212          value.  Instead the associated storage is to be freed by
   1213          deleting 'rangetree', which stores a single copy of each
   1214          range. */
   1215       /* --- */
   1216       Int     level;
   1217       UWord   typeR; /* a cuOff */
   1218       GExpr*  gexpr; /* for this variable */
   1219       GExpr*  fbGX;  /* to find the frame base of the enclosing fn, if
   1220                         any */
   1221       UChar*  fName; /* declaring file name, or NULL */
   1222       Int     fLine; /* declaring file line number, or zero */
   1223       /* offset in .debug_info, so that abstract instances can be
   1224          found to satisfy references from concrete instances. */
   1225       UWord   dioff;
   1226       UWord   absOri; /* so the absOri fields refer to dioff fields
   1227                          in some other, related TempVar. */
   1228    }
   1229    TempVar;
   1230 
   1231 #define N_D3_VAR_STACK 48
   1232 
   1233 typedef
   1234    struct {
   1235       /* Contains the range stack: a stack of address ranges, one
   1236          stack entry for each nested scope.
   1237 
   1238          Some scope entries are created by function definitions
   1239          (DW_AT_subprogram), and for those, we also note the GExpr
   1240          derived from its DW_AT_frame_base attribute, if any.
   1241          Consequently it should be possible to find, for any
   1242          variable's DIE, the GExpr for the the containing function's
   1243          DW_AT_frame_base by scanning back through the stack to find
   1244          the nearest entry associated with a function.  This somewhat
   1245          elaborate scheme is provided so as to make it possible to
   1246          obtain the correct DW_AT_frame_base expression even in the
   1247          presence of nested functions (or to be more precise, in the
   1248          presence of nested DW_AT_subprogram DIEs).
   1249       */
   1250       Int     sp; /* [sp] is innermost active entry; sp==-1 for empty
   1251                      stack */
   1252       XArray* ranges[N_D3_VAR_STACK]; /* XArray of AddrRange */
   1253       Int     level[N_D3_VAR_STACK];  /* D3 DIE levels */
   1254       Bool    isFunc[N_D3_VAR_STACK]; /* from DW_AT_subprogram? */
   1255       GExpr*  fbGX[N_D3_VAR_STACK];   /* if isFunc, contains the FB
   1256                                          expr, else NULL */
   1257       /* The file name table.  Is a mapping from integer index to the
   1258          (permanent) copy of the string, iow a non-img area. */
   1259       XArray* /* of UChar* */ filenameTable;
   1260    }
   1261    D3VarParser;
   1262 
   1263 static void varstack_show ( D3VarParser* parser, HChar* str ) {
   1264    Word i, j;
   1265    VG_(printf)("  varstack (%s) {\n", str);
   1266    for (i = 0; i <= parser->sp; i++) {
   1267       XArray* xa = parser->ranges[i];
   1268       vg_assert(xa);
   1269       VG_(printf)("    [%ld] (level %d)", i, parser->level[i]);
   1270       if (parser->isFunc[i]) {
   1271          VG_(printf)(" (fbGX=%p)", parser->fbGX[i]);
   1272       } else {
   1273          vg_assert(parser->fbGX[i] == NULL);
   1274       }
   1275       VG_(printf)(": ");
   1276       if (VG_(sizeXA)( xa ) == 0) {
   1277          VG_(printf)("** empty PC range array **");
   1278       } else {
   1279          for (j = 0; j < VG_(sizeXA)( xa ); j++) {
   1280             AddrRange* range = (AddrRange*) VG_(indexXA)( xa, j );
   1281             vg_assert(range);
   1282             VG_(printf)("[%#lx,%#lx] ", range->aMin, range->aMax);
   1283          }
   1284       }
   1285       VG_(printf)("\n");
   1286    }
   1287    VG_(printf)("  }\n");
   1288 }
   1289 
   1290 /* Remove from the stack, all entries with .level > 'level' */
   1291 static
   1292 void varstack_preen ( D3VarParser* parser, Bool td3, Int level )
   1293 {
   1294    Bool changed = False;
   1295    vg_assert(parser->sp < N_D3_VAR_STACK);
   1296    while (True) {
   1297       vg_assert(parser->sp >= -1);
   1298       if (parser->sp == -1) break;
   1299       if (parser->level[parser->sp] <= level) break;
   1300       if (0)
   1301          TRACE_D3("BBBBAAAA varstack_pop [newsp=%d]\n", parser->sp-1);
   1302       vg_assert(parser->ranges[parser->sp]);
   1303       /* Who allocated this xa?  get_range_list() or
   1304          unitary_range_list(). */
   1305       VG_(deleteXA)( parser->ranges[parser->sp] );
   1306       parser->ranges[parser->sp] = NULL;
   1307       parser->level[parser->sp]  = 0;
   1308       parser->isFunc[parser->sp] = False;
   1309       parser->fbGX[parser->sp]   = NULL;
   1310       parser->sp--;
   1311       changed = True;
   1312    }
   1313    if (changed && td3)
   1314       varstack_show( parser, "after preen" );
   1315 }
   1316 
   1317 static void varstack_push ( CUConst* cc,
   1318                             D3VarParser* parser,
   1319                             Bool td3,
   1320                             XArray* ranges, Int level,
   1321                             Bool    isFunc, GExpr* fbGX ) {
   1322    if (0)
   1323    TRACE_D3("BBBBAAAA varstack_push[newsp=%d]: %d  %p\n",
   1324             parser->sp+1, level, ranges);
   1325 
   1326    /* First we need to zap everything >= 'level', as we are about to
   1327       replace any previous entry at 'level', so .. */
   1328    varstack_preen(parser, /*td3*/False, level-1);
   1329 
   1330    vg_assert(parser->sp >= -1);
   1331    vg_assert(parser->sp < N_D3_VAR_STACK);
   1332    if (parser->sp == N_D3_VAR_STACK-1)
   1333       cc->barf("varstack_push: N_D3_VAR_STACK is too low; "
   1334                "increase and recompile");
   1335    if (parser->sp >= 0)
   1336       vg_assert(parser->level[parser->sp] < level);
   1337    parser->sp++;
   1338    vg_assert(parser->ranges[parser->sp] == NULL);
   1339    vg_assert(parser->level[parser->sp]  == 0);
   1340    vg_assert(parser->isFunc[parser->sp] == False);
   1341    vg_assert(parser->fbGX[parser->sp]   == NULL);
   1342    vg_assert(ranges != NULL);
   1343    if (!isFunc) vg_assert(fbGX == NULL);
   1344    parser->ranges[parser->sp] = ranges;
   1345    parser->level[parser->sp]  = level;
   1346    parser->isFunc[parser->sp] = isFunc;
   1347    parser->fbGX[parser->sp]   = fbGX;
   1348    if (td3)
   1349       varstack_show( parser, "after push" );
   1350 }
   1351 
   1352 
   1353 /* cts, ctsSzB, ctsMemSzB are derived from a DW_AT_location and so
   1354    refer either to a location expression or to a location list.
   1355    Figure out which, and in both cases bundle the expression or
   1356    location list into a so-called GExpr (guarded expression). */
   1357 __attribute__((noinline))
   1358 static GExpr* get_GX ( CUConst* cc, Bool td3,
   1359                        ULong cts, Int ctsSzB, UWord ctsMemSzB )
   1360 {
   1361    GExpr* gexpr = NULL;
   1362    if (ctsMemSzB > 0 && ctsSzB == 0) {
   1363       /* represents an in-line location expression, and cts points
   1364          right at it */
   1365       gexpr = make_singleton_GX( (UChar*)(UWord)cts, ctsMemSzB );
   1366    }
   1367    else
   1368    if (ctsMemSzB == 0 && ctsSzB > 0) {
   1369       /* represents location list.  cts is the offset of it in
   1370          .debug_loc. */
   1371       if (!cc->cu_svma_known)
   1372          cc->barf("get_GX: location list, but CU svma is unknown");
   1373       gexpr = make_general_GX( cc, td3, (UWord)cts, cc->cu_svma );
   1374    }
   1375    else {
   1376       vg_assert(0); /* else caller is bogus */
   1377    }
   1378    return gexpr;
   1379 }
   1380 
   1381 
   1382 static
   1383 void read_filename_table( /*MOD*/D3VarParser* parser,
   1384                           CUConst* cc, UWord debug_line_offset,
   1385                           Bool td3 )
   1386 {
   1387    Bool   is_dw64;
   1388    Cursor c;
   1389    Word   i;
   1390    UShort version;
   1391    UChar  opcode_base;
   1392    UChar* str;
   1393 
   1394    vg_assert(parser && cc && cc->barf);
   1395    if ((!cc->debug_line_img)
   1396        || cc->debug_line_sz <= debug_line_offset)
   1397       cc->barf("read_filename_table: .debug_line is missing?");
   1398 
   1399    init_Cursor( &c, cc->debug_line_img,
   1400                 cc->debug_line_sz, debug_line_offset, cc->barf,
   1401                 "Overrun whilst reading .debug_line section(1)" );
   1402 
   1403    /* unit_length = */
   1404       get_Initial_Length( &is_dw64, &c,
   1405            "read_filename_table: invalid initial-length field" );
   1406    version = get_UShort( &c );
   1407    if (version != 2 && version != 3 && version != 4)
   1408      cc->barf("read_filename_table: Only DWARF version 2, 3 and 4 line info "
   1409               "is currently supported.");
   1410    /*header_length              = (ULong)*/ get_Dwarfish_UWord( &c, is_dw64 );
   1411    /*minimum_instruction_length = */ get_UChar( &c );
   1412    if (version >= 4)
   1413       /*maximum_operations_per_insn = */ get_UChar( &c );
   1414    /*default_is_stmt            = */ get_UChar( &c );
   1415    /*line_base                  = (Char)*/ get_UChar( &c );
   1416    /*line_range                 = */ get_UChar( &c );
   1417    opcode_base                = get_UChar( &c );
   1418    /* skip over "standard_opcode_lengths" */
   1419    for (i = 1; i < (Word)opcode_base; i++)
   1420      (void)get_UChar( &c );
   1421 
   1422    /* skip over the directory names table */
   1423    while (peek_UChar(&c) != 0) {
   1424      (void)get_AsciiZ(&c);
   1425    }
   1426    (void)get_UChar(&c); /* skip terminating zero */
   1427 
   1428    /* Read and record the file names table */
   1429    vg_assert(parser->filenameTable);
   1430    vg_assert( VG_(sizeXA)( parser->filenameTable ) == 0 );
   1431    /* Add a dummy index-zero entry.  DWARF3 numbers its files
   1432       from 1, for some reason. */
   1433    str = ML_(addStr)( cc->di, "<unknown_file>", -1 );
   1434    VG_(addToXA)( parser->filenameTable, &str );
   1435    while (peek_UChar(&c) != 0) {
   1436       str = get_AsciiZ(&c);
   1437       TRACE_D3("  read_filename_table: %ld %s\n",
   1438                VG_(sizeXA)(parser->filenameTable), str);
   1439       str = ML_(addStr)( cc->di, str, -1 );
   1440       VG_(addToXA)( parser->filenameTable, &str );
   1441       (void)get_ULEB128( &c ); /* skip directory index # */
   1442       (void)get_ULEB128( &c ); /* skip last mod time */
   1443       (void)get_ULEB128( &c ); /* file size */
   1444    }
   1445    /* We're done!  The rest of it is not interesting. */
   1446 }
   1447 
   1448 
   1449 __attribute__((noinline))
   1450 static void parse_var_DIE (
   1451    /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree,
   1452    /*MOD*/XArray* /* of TempVar* */ tempvars,
   1453    /*MOD*/XArray* /* of GExpr* */ gexprs,
   1454    /*MOD*/D3VarParser* parser,
   1455    DW_TAG dtag,
   1456    UWord posn,
   1457    Int level,
   1458    Cursor* c_die,
   1459    Cursor* c_abbv,
   1460    CUConst* cc,
   1461    Bool td3
   1462 )
   1463 {
   1464    ULong       cts;
   1465    Int         ctsSzB;
   1466    UWord       ctsMemSzB;
   1467 
   1468    UWord saved_die_c_offset  = get_position_of_Cursor( c_die );
   1469    UWord saved_abbv_c_offset = get_position_of_Cursor( c_abbv );
   1470 
   1471    varstack_preen( parser, td3, level-1 );
   1472 
   1473    if (dtag == DW_TAG_compile_unit) {
   1474       Bool have_lo    = False;
   1475       Bool have_hi1   = False;
   1476       Bool have_range = False;
   1477       Addr ip_lo    = 0;
   1478       Addr ip_hi1   = 0;
   1479       Addr rangeoff = 0;
   1480       while (True) {
   1481          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   1482          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   1483          if (attr == 0 && form == 0) break;
   1484          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
   1485                             cc, c_die, False/*td3*/, form );
   1486          if (attr == DW_AT_low_pc && ctsSzB > 0) {
   1487             ip_lo   = cts;
   1488             have_lo = True;
   1489          }
   1490          if (attr == DW_AT_high_pc && ctsSzB > 0) {
   1491             ip_hi1   = cts;
   1492             have_hi1 = True;
   1493          }
   1494          if (attr == DW_AT_ranges && ctsSzB > 0) {
   1495             rangeoff = cts;
   1496             have_range = True;
   1497          }
   1498          if (attr == DW_AT_stmt_list && ctsSzB > 0) {
   1499             read_filename_table( parser, cc, (UWord)cts, td3 );
   1500          }
   1501       }
   1502       /* Now, does this give us an opportunity to find this
   1503          CU's svma? */
   1504 #if 0
   1505       if (level == 0 && have_lo) {
   1506          vg_assert(!cc->cu_svma_known); /* if this fails, it must be
   1507          because we've already seen a DW_TAG_compile_unit DIE at level
   1508          0.  But that can't happen, because DWARF3 only allows exactly
   1509          one top level DIE per CU. */
   1510          cc->cu_svma_known = True;
   1511          cc->cu_svma = ip_lo;
   1512          if (1)
   1513             TRACE_D3("BBBBAAAA acquire CU_SVMA of %p\n", cc->cu_svma);
   1514          /* Now, it may be that this DIE doesn't tell us the CU's
   1515             SVMA, by way of not having a DW_AT_low_pc.  That's OK --
   1516             the CU doesn't *have* to have its SVMA specified.
   1517 
   1518             But as per last para D3 spec sec 3.1.1 ("Normal and
   1519             Partial Compilation Unit Entries", "If the base address
   1520             (viz, the SVMA) is undefined, then any DWARF entry of
   1521             structure defined interms of the base address of that
   1522             compilation unit is not valid.".  So that means, if whilst
   1523             processing the children of this top level DIE (or their
   1524             children, etc) we see a DW_AT_range, and cu_svma_known is
   1525             False, then the DIE that contains it is (per the spec)
   1526             invalid, and we can legitimately stop and complain. */
   1527       }
   1528 #else
   1529       /* .. whereas The Reality is, simply assume the SVMA is zero
   1530          if it isn't specified. */
   1531       if (level == 0) {
   1532          vg_assert(!cc->cu_svma_known);
   1533          cc->cu_svma_known = True;
   1534          if (have_lo)
   1535             cc->cu_svma = ip_lo;
   1536          else
   1537             cc->cu_svma = 0;
   1538       }
   1539 #endif
   1540       /* Do we have something that looks sane? */
   1541       if (have_lo && have_hi1 && (!have_range)) {
   1542          if (ip_lo < ip_hi1)
   1543             varstack_push( cc, parser, td3,
   1544                            unitary_range_list(ip_lo, ip_hi1 - 1),
   1545                            level,
   1546                            False/*isFunc*/, NULL/*fbGX*/ );
   1547       } else
   1548       if ((!have_lo) && (!have_hi1) && have_range) {
   1549          varstack_push( cc, parser, td3,
   1550                         get_range_list( cc, td3,
   1551                                         rangeoff, cc->cu_svma ),
   1552                         level,
   1553                         False/*isFunc*/, NULL/*fbGX*/ );
   1554       } else
   1555       if ((!have_lo) && (!have_hi1) && (!have_range)) {
   1556          /* CU has no code, presumably? */
   1557          varstack_push( cc, parser, td3,
   1558                         empty_range_list(),
   1559                         level,
   1560                         False/*isFunc*/, NULL/*fbGX*/ );
   1561       } else
   1562       if (have_lo && (!have_hi1) && have_range && ip_lo == 0) {
   1563          /* broken DIE created by gcc-4.3.X ?  Ignore the
   1564             apparently-redundant DW_AT_low_pc and use the DW_AT_ranges
   1565             instead. */
   1566          varstack_push( cc, parser, td3,
   1567                         get_range_list( cc, td3,
   1568                                         rangeoff, cc->cu_svma ),
   1569                         level,
   1570                         False/*isFunc*/, NULL/*fbGX*/ );
   1571       } else {
   1572          if (0) VG_(printf)("I got hlo %d hhi1 %d hrange %d\n",
   1573                             (Int)have_lo, (Int)have_hi1, (Int)have_range);
   1574          goto bad_DIE;
   1575       }
   1576    }
   1577 
   1578    if (dtag == DW_TAG_lexical_block || dtag == DW_TAG_subprogram) {
   1579       Bool   have_lo    = False;
   1580       Bool   have_hi1   = False;
   1581       Bool   have_range = False;
   1582       Addr   ip_lo      = 0;
   1583       Addr   ip_hi1     = 0;
   1584       Addr   rangeoff   = 0;
   1585       Bool   isFunc     = dtag == DW_TAG_subprogram;
   1586       GExpr* fbGX       = NULL;
   1587       while (True) {
   1588          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   1589          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   1590          if (attr == 0 && form == 0) break;
   1591          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
   1592                             cc, c_die, False/*td3*/, form );
   1593          if (attr == DW_AT_low_pc && ctsSzB > 0) {
   1594             ip_lo   = cts;
   1595             have_lo = True;
   1596          }
   1597          if (attr == DW_AT_high_pc && ctsSzB > 0) {
   1598             ip_hi1   = cts;
   1599             have_hi1 = True;
   1600          }
   1601          if (attr == DW_AT_ranges && ctsSzB > 0) {
   1602             rangeoff = cts;
   1603             have_range = True;
   1604          }
   1605          if (isFunc
   1606              && attr == DW_AT_frame_base
   1607              && ((ctsMemSzB > 0 && ctsSzB == 0)
   1608                  || (ctsMemSzB == 0 && ctsSzB > 0))) {
   1609             fbGX = get_GX( cc, False/*td3*/, cts, ctsSzB, ctsMemSzB );
   1610             vg_assert(fbGX);
   1611             VG_(addToXA)(gexprs, &fbGX);
   1612          }
   1613       }
   1614       /* Do we have something that looks sane? */
   1615       if (dtag == DW_TAG_subprogram
   1616           && (!have_lo) && (!have_hi1) && (!have_range)) {
   1617          /* This is legit - ignore it. Sec 3.3.3: "A subroutine entry
   1618             representing a subroutine declaration that is not also a
   1619             definition does not have code address or range
   1620             attributes." */
   1621       } else
   1622       if (dtag == DW_TAG_lexical_block
   1623           && (!have_lo) && (!have_hi1) && (!have_range)) {
   1624          /* I believe this is legit, and means the lexical block
   1625             contains no insns (whatever that might mean).  Ignore. */
   1626       } else
   1627       if (have_lo && have_hi1 && (!have_range)) {
   1628          /* This scope supplies just a single address range. */
   1629          if (ip_lo < ip_hi1)
   1630             varstack_push( cc, parser, td3,
   1631                            unitary_range_list(ip_lo, ip_hi1 - 1),
   1632                            level, isFunc, fbGX );
   1633       } else
   1634       if ((!have_lo) && (!have_hi1) && have_range) {
   1635          /* This scope supplies multiple address ranges via the use of
   1636             a range list. */
   1637          varstack_push( cc, parser, td3,
   1638                         get_range_list( cc, td3,
   1639                                         rangeoff, cc->cu_svma ),
   1640                         level, isFunc, fbGX );
   1641       } else
   1642       if (have_lo && (!have_hi1) && (!have_range)) {
   1643          /* This scope is bogus.  The D3 spec sec 3.4 (Lexical Block
   1644             Entries) says fairly clearly that a scope must have either
   1645             _range or (_low_pc and _high_pc). */
   1646          /* The spec is a bit ambiguous though.  Perhaps a single byte
   1647             range is intended?  See sec 2.17 (Code Addresses And Ranges) */
   1648          /* This case is here because icc9 produced this:
   1649          <2><13bd>: DW_TAG_lexical_block
   1650             DW_AT_decl_line   : 5229
   1651             DW_AT_decl_column : 37
   1652             DW_AT_decl_file   : 1
   1653             DW_AT_low_pc      : 0x401b03
   1654          */
   1655          /* Ignore (seems safe than pushing a single byte range) */
   1656       } else
   1657          goto bad_DIE;
   1658    }
   1659 
   1660    if (dtag == DW_TAG_variable || dtag == DW_TAG_formal_parameter) {
   1661       UChar* name        = NULL;
   1662       UWord  typeR       = D3_INVALID_CUOFF;
   1663       Bool   external    = False;
   1664       GExpr* gexpr       = NULL;
   1665       Int    n_attrs     = 0;
   1666       UWord  abs_ori     = (UWord)D3_INVALID_CUOFF;
   1667       Int    lineNo      = 0;
   1668       UChar* fileName    = NULL;
   1669       while (True) {
   1670          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   1671          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   1672          if (attr == 0 && form == 0) break;
   1673          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
   1674                             cc, c_die, False/*td3*/, form );
   1675          n_attrs++;
   1676          if (attr == DW_AT_name && ctsMemSzB > 0) {
   1677             name = ML_(addStr)( cc->di, (UChar*)(UWord)cts, -1 );
   1678          }
   1679          if (attr == DW_AT_location
   1680              && ((ctsMemSzB > 0 && ctsSzB == 0)
   1681                  || (ctsMemSzB == 0 && ctsSzB > 0))) {
   1682             gexpr = get_GX( cc, False/*td3*/, cts, ctsSzB, ctsMemSzB );
   1683             vg_assert(gexpr);
   1684             VG_(addToXA)(gexprs, &gexpr);
   1685          }
   1686          if (attr == DW_AT_type && ctsSzB > 0) {
   1687             typeR = (UWord)cts;
   1688          }
   1689          if (attr == DW_AT_external && ctsSzB > 0 && cts > 0) {
   1690             external = True;
   1691          }
   1692          if (attr == DW_AT_abstract_origin && ctsSzB > 0) {
   1693             abs_ori = (UWord)cts;
   1694          }
   1695          if (attr == DW_AT_declaration && ctsSzB > 0 && cts > 0) {
   1696             /*declaration = True;*/
   1697          }
   1698          if (attr == DW_AT_decl_line && ctsSzB > 0) {
   1699             lineNo = (Int)cts;
   1700          }
   1701          if (attr == DW_AT_decl_file && ctsSzB > 0) {
   1702             Int ftabIx = (Int)cts;
   1703             if (ftabIx >= 1
   1704                 && ftabIx < VG_(sizeXA)( parser->filenameTable )) {
   1705                fileName = *(UChar**)
   1706                           VG_(indexXA)( parser->filenameTable, ftabIx );
   1707                vg_assert(fileName);
   1708             }
   1709             if (0) VG_(printf)("XXX filename = %s\n", fileName);
   1710          }
   1711       }
   1712       /* We'll collect it under if one of the following three
   1713          conditions holds:
   1714          (1) has location and type    -> completed
   1715          (2) has type only            -> is an abstract instance
   1716          (3) has location and abs_ori -> is a concrete instance
   1717          Name, filename and line number are all optional frills.
   1718       */
   1719       if ( /* 1 */ (gexpr && typeR != D3_INVALID_CUOFF)
   1720            /* 2 */ || (typeR != D3_INVALID_CUOFF)
   1721            /* 3 */ || (gexpr && abs_ori != (UWord)D3_INVALID_CUOFF) ) {
   1722 
   1723          /* Add this variable to the list of interesting looking
   1724             variables.  Crucially, note along with it the address
   1725             range(s) associated with the variable, which for locals
   1726             will be the address ranges at the top of the varparser's
   1727             stack. */
   1728          GExpr*   fbGX = NULL;
   1729          Word     i, nRanges;
   1730          XArray*  /* of AddrRange */ xa;
   1731          TempVar* tv;
   1732          /* Stack can't be empty; we put a dummy entry on it for the
   1733             entire address range before starting with the DIEs for
   1734             this CU. */
   1735          vg_assert(parser->sp >= 0);
   1736 
   1737          /* If this is a local variable (non-external), try to find
   1738             the GExpr for the DW_AT_frame_base of the containing
   1739             function.  It should have been pushed on the stack at the
   1740             time we encountered its DW_TAG_subprogram DIE, so the way
   1741             to find it is to scan back down the stack looking for it.
   1742             If there isn't an enclosing stack entry marked 'isFunc'
   1743             then we must be seeing variable or formal param DIEs
   1744             outside of a function, so we deem the Dwarf to be
   1745             malformed if that happens.  Note that the fbGX may be NULL
   1746             if the containing DT_TAG_subprogram didn't supply a
   1747             DW_AT_frame_base -- that's OK, but there must actually be
   1748             a containing DW_TAG_subprogram. */
   1749          if (!external) {
   1750             Bool found = False;
   1751             for (i = parser->sp; i >= 0; i--) {
   1752                if (parser->isFunc[i]) {
   1753                   fbGX = parser->fbGX[i];
   1754                   found = True;
   1755                   break;
   1756                }
   1757             }
   1758             if (!found) {
   1759                if (0 && VG_(clo_verbosity) >= 0) {
   1760                   VG_(message)(Vg_DebugMsg,
   1761                      "warning: parse_var_DIE: non-external variable "
   1762                      "outside DW_TAG_subprogram\n");
   1763                }
   1764                /* goto bad_DIE; */
   1765                /* This seems to happen a lot.  Just ignore it -- if,
   1766                   when we come to evaluation of the location (guarded)
   1767                   expression, it requires a frame base value, and
   1768                   there's no expression for that, then evaluation as a
   1769                   whole will fail.  Harmless - a bit of a waste of
   1770                   cycles but nothing more. */
   1771             }
   1772          }
   1773 
   1774          /* re "external ? 0 : parser->sp" (twice), if the var is
   1775             marked 'external' then we must put it at the global scope,
   1776             as only the global scope (level 0) covers the entire PC
   1777             address space.  It is asserted elsewhere that level 0
   1778             always covers the entire address space. */
   1779          xa = parser->ranges[external ? 0 : parser->sp];
   1780          nRanges = VG_(sizeXA)(xa);
   1781          vg_assert(nRanges >= 0);
   1782 
   1783          tv = ML_(dinfo_zalloc)( "di.readdwarf3.pvD.1", sizeof(TempVar) );
   1784          tv->name   = name;
   1785          tv->level  = external ? 0 : parser->sp;
   1786          tv->typeR  = typeR;
   1787          tv->gexpr  = gexpr;
   1788          tv->fbGX   = fbGX;
   1789          tv->fName  = fileName;
   1790          tv->fLine  = lineNo;
   1791          tv->dioff  = posn;
   1792          tv->absOri = abs_ori;
   1793 
   1794          /* See explanation on definition of type TempVar for the
   1795             reason for this elaboration. */
   1796          tv->nRanges = nRanges;
   1797          tv->rngOneMin = 0;
   1798          tv->rngOneMax = 0;
   1799          tv->rngMany = NULL;
   1800          if (nRanges == 1) {
   1801             AddrRange* range = VG_(indexXA)(xa, 0);
   1802             tv->rngOneMin = range->aMin;
   1803             tv->rngOneMax = range->aMax;
   1804          }
   1805          else if (nRanges > 1) {
   1806             /* See if we already have a range list which is
   1807                structurally identical.  If so, use that; if not, clone
   1808                this one, and add it to our collection. */
   1809             UWord keyW, valW;
   1810             if (VG_(lookupFM)( rangestree, &keyW, &valW, (UWord)xa )) {
   1811                XArray* old = (XArray*)keyW;
   1812                tl_assert(valW == 0);
   1813                tl_assert(old != xa);
   1814                tv->rngMany = old;
   1815             } else {
   1816                XArray* cloned = VG_(cloneXA)( "di.readdwarf3.pvD.2", xa );
   1817                tv->rngMany = cloned;
   1818                VG_(addToFM)( rangestree, (UWord)cloned, 0 );
   1819             }
   1820          }
   1821 
   1822          VG_(addToXA)( tempvars, &tv );
   1823 
   1824          TRACE_D3("  Recording this variable, with %ld PC range(s)\n",
   1825                   VG_(sizeXA)(xa) );
   1826          /* collect stats on how effective the ->ranges special
   1827             casing is */
   1828          if (0) {
   1829             static Int ntot=0, ngt=0;
   1830             ntot++;
   1831             if (tv->rngMany) ngt++;
   1832             if (0 == (ntot % 100000))
   1833                VG_(printf)("XXXX %d tot, %d cloned\n", ntot, ngt);
   1834          }
   1835 
   1836       }
   1837 
   1838       /* Here are some other weird cases seen in the wild:
   1839 
   1840             We have a variable with a name and a type, but no
   1841             location.  I guess that's a sign that it has been
   1842             optimised away.  Ignore it.  Here's an example:
   1843 
   1844             static Int lc_compar(void* n1, void* n2) {
   1845                MC_Chunk* mc1 = *(MC_Chunk**)n1;
   1846                MC_Chunk* mc2 = *(MC_Chunk**)n2;
   1847                return (mc1->data < mc2->data ? -1 : 1);
   1848             }
   1849 
   1850             Both mc1 and mc2 are like this
   1851             <2><5bc>: Abbrev Number: 21 (DW_TAG_variable)
   1852                 DW_AT_name        : mc1
   1853                 DW_AT_decl_file   : 1
   1854                 DW_AT_decl_line   : 216
   1855                 DW_AT_type        : <5d3>
   1856 
   1857             whereas n1 and n2 do have locations specified.
   1858 
   1859             ---------------------------------------------
   1860 
   1861             We see a DW_TAG_formal_parameter with a type, but
   1862             no name and no location.  It's probably part of a function type
   1863             construction, thusly, hence ignore it:
   1864          <1><2b4>: Abbrev Number: 12 (DW_TAG_subroutine_type)
   1865              DW_AT_sibling     : <2c9>
   1866              DW_AT_prototyped  : 1
   1867              DW_AT_type        : <114>
   1868          <2><2be>: Abbrev Number: 13 (DW_TAG_formal_parameter)
   1869              DW_AT_type        : <13e>
   1870          <2><2c3>: Abbrev Number: 13 (DW_TAG_formal_parameter)
   1871              DW_AT_type        : <133>
   1872 
   1873             ---------------------------------------------
   1874 
   1875             Is very minimal, like this:
   1876             <4><81d>: Abbrev Number: 44 (DW_TAG_variable)
   1877                 DW_AT_abstract_origin: <7ba>
   1878             What that signifies I have no idea.  Ignore.
   1879 
   1880             ----------------------------------------------
   1881 
   1882             Is very minimal, like this:
   1883             <200f>: DW_TAG_formal_parameter
   1884                 DW_AT_abstract_ori: <1f4c>
   1885                 DW_AT_location    : 13440
   1886             What that signifies I have no idea.  Ignore.
   1887             It might be significant, though: the variable at least
   1888             has a location and so might exist somewhere.
   1889             Maybe we should handle this.
   1890 
   1891             ---------------------------------------------
   1892 
   1893             <22407>: DW_TAG_variable
   1894               DW_AT_name        : (indirect string, offset: 0x6579):
   1895                                   vgPlain_trampoline_stuff_start
   1896               DW_AT_decl_file   : 29
   1897               DW_AT_decl_line   : 56
   1898               DW_AT_external    : 1
   1899               DW_AT_declaration : 1
   1900 
   1901             Nameless and typeless variable that has a location?  Who
   1902             knows.  Not me.
   1903             <2><3d178>: Abbrev Number: 22 (DW_TAG_variable)
   1904                  DW_AT_location    : 9 byte block: 3 c0 c7 13 38 0 0 0 0
   1905                                      (DW_OP_addr: 3813c7c0)
   1906 
   1907             No, really.  Check it out.  gcc is quite simply borked.
   1908             <3><168cc>: Abbrev Number: 141 (DW_TAG_variable)
   1909             // followed by no attributes, and the next DIE is a sibling,
   1910             // not a child
   1911             */
   1912    }
   1913    return;
   1914 
   1915   bad_DIE:
   1916    set_position_of_Cursor( c_die,  saved_die_c_offset );
   1917    set_position_of_Cursor( c_abbv, saved_abbv_c_offset );
   1918    VG_(printf)("\nparse_var_DIE: confused by:\n");
   1919    VG_(printf)(" <%d><%lx>: %s\n", level, posn, ML_(pp_DW_TAG)( dtag ) );
   1920    while (True) {
   1921       DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   1922       DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   1923       if (attr == 0 && form == 0) break;
   1924       VG_(printf)("     %18s: ", ML_(pp_DW_AT)(attr));
   1925       /* Get the form contents, so as to print them */
   1926       get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
   1927                          cc, c_die, True, form );
   1928       VG_(printf)("\t\n");
   1929    }
   1930    VG_(printf)("\n");
   1931    cc->barf("parse_var_DIE: confused by the above DIE");
   1932    /*NOTREACHED*/
   1933 }
   1934 
   1935 
   1936 /*------------------------------------------------------------*/
   1937 /*---                                                      ---*/
   1938 /*--- Parsing of type-related DIEs                         ---*/
   1939 /*---                                                      ---*/
   1940 /*------------------------------------------------------------*/
   1941 
   1942 #define N_D3_TYPE_STACK 16
   1943 
   1944 typedef
   1945    struct {
   1946       /* What source language?  'A'=Ada83/95,
   1947                                 'C'=C/C++,
   1948                                 'F'=Fortran,
   1949                                 '?'=other
   1950          Established once per compilation unit. */
   1951       UChar language;
   1952       /* A stack of types which are currently under construction */
   1953       Int   sp; /* [sp] is innermost active entry; sp==-1 for empty
   1954                    stack */
   1955       /* Note that the TyEnts in qparentE are temporary copies of the
   1956          ones accumulating in the main tyent array.  So it is not safe
   1957          to free up anything on them when popping them off the stack
   1958          (iow, it isn't safe to use TyEnt__make_EMPTY on them).  Just
   1959          memset them to zero when done. */
   1960       TyEnt qparentE[N_D3_TYPE_STACK]; /* parent TyEnts */
   1961       Int   qlevel[N_D3_TYPE_STACK];
   1962 
   1963    }
   1964    D3TypeParser;
   1965 
   1966 static void typestack_show ( D3TypeParser* parser, HChar* str ) {
   1967    Word i;
   1968    VG_(printf)("  typestack (%s) {\n", str);
   1969    for (i = 0; i <= parser->sp; i++) {
   1970       VG_(printf)("    [%ld] (level %d): ", i, parser->qlevel[i]);
   1971       ML_(pp_TyEnt)( &parser->qparentE[i] );
   1972       VG_(printf)("\n");
   1973    }
   1974    VG_(printf)("  }\n");
   1975 }
   1976 
   1977 /* Remove from the stack, all entries with .level > 'level' */
   1978 static
   1979 void typestack_preen ( D3TypeParser* parser, Bool td3, Int level )
   1980 {
   1981    Bool changed = False;
   1982    vg_assert(parser->sp < N_D3_TYPE_STACK);
   1983    while (True) {
   1984       vg_assert(parser->sp >= -1);
   1985       if (parser->sp == -1) break;
   1986       if (parser->qlevel[parser->sp] <= level) break;
   1987       if (0)
   1988          TRACE_D3("BBBBAAAA typestack_pop [newsp=%d]\n", parser->sp-1);
   1989       vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
   1990       VG_(memset)(&parser->qparentE[parser->sp], 0, sizeof(TyEnt));
   1991       parser->qparentE[parser->sp].cuOff = D3_INVALID_CUOFF;
   1992       parser->qparentE[parser->sp].tag = Te_EMPTY;
   1993       parser->qlevel[parser->sp] = 0;
   1994       parser->sp--;
   1995       changed = True;
   1996    }
   1997    if (changed && td3)
   1998       typestack_show( parser, "after preen" );
   1999 }
   2000 
   2001 static Bool typestack_is_empty ( D3TypeParser* parser ) {
   2002    vg_assert(parser->sp >= -1 && parser->sp < N_D3_TYPE_STACK);
   2003    return parser->sp == -1;
   2004 }
   2005 
   2006 static void typestack_push ( CUConst* cc,
   2007                              D3TypeParser* parser,
   2008                              Bool td3,
   2009                              TyEnt* parentE, Int level ) {
   2010    if (0)
   2011    TRACE_D3("BBBBAAAA typestack_push[newsp=%d]: %d  %05lx\n",
   2012             parser->sp+1, level, parentE->cuOff);
   2013 
   2014    /* First we need to zap everything >= 'level', as we are about to
   2015       replace any previous entry at 'level', so .. */
   2016    typestack_preen(parser, /*td3*/False, level-1);
   2017 
   2018    vg_assert(parser->sp >= -1);
   2019    vg_assert(parser->sp < N_D3_TYPE_STACK);
   2020    if (parser->sp == N_D3_TYPE_STACK-1)
   2021       cc->barf("typestack_push: N_D3_TYPE_STACK is too low; "
   2022                "increase and recompile");
   2023    if (parser->sp >= 0)
   2024       vg_assert(parser->qlevel[parser->sp] < level);
   2025    parser->sp++;
   2026    vg_assert(parser->qparentE[parser->sp].tag == Te_EMPTY);
   2027    vg_assert(parser->qlevel[parser->sp]  == 0);
   2028    vg_assert(parentE);
   2029    vg_assert(ML_(TyEnt__is_type)(parentE));
   2030    vg_assert(parentE->cuOff != D3_INVALID_CUOFF);
   2031    parser->qparentE[parser->sp] = *parentE;
   2032    parser->qlevel[parser->sp]  = level;
   2033    if (td3)
   2034       typestack_show( parser, "after push" );
   2035 }
   2036 
   2037 /* True if the subrange type being parsed gives the bounds of an array. */
   2038 static Bool subrange_type_denotes_array_bounds ( D3TypeParser* parser,
   2039                                                  DW_TAG dtag ) {
   2040    vg_assert(dtag == DW_TAG_subrange_type);
   2041    /* For most languages, a subrange_type dtag always gives the
   2042       bounds of an array.
   2043       For Ada, there are additional conditions as a subrange_type
   2044       is also used for other purposes. */
   2045    if (parser->language != 'A')
   2046       /* not Ada, so it definitely denotes an array bound. */
   2047       return True;
   2048    else
   2049       /* Extra constraints for Ada: it only denotes an array bound if .. */
   2050       return (! typestack_is_empty(parser)
   2051               && parser->qparentE[parser->sp].tag == Te_TyArray);
   2052 }
   2053 
   2054 /* Parse a type-related DIE.  'parser' holds the current parser state.
   2055    'admin' is where the completed types are dumped.  'dtag' is the tag
   2056    for this DIE.  'c_die' points to the start of the data fields (FORM
   2057    stuff) for the DIE.  c_abbv points to the start of the (name,form)
   2058    pairs which describe the DIE.
   2059 
   2060    We may find the DIE uninteresting, in which case we should ignore
   2061    it.
   2062 
   2063    What happens: the DIE is examined.  If uninteresting, it is ignored.
   2064    Otherwise, the DIE gives rise to two things:
   2065 
   2066    (1) the offset of this DIE in the CU -- the cuOffset, a UWord
   2067    (2) a TyAdmin structure, which holds the type, or related stuff
   2068 
   2069    (2) is added at the end of 'tyadmins', at some index, say 'i'.
   2070 
   2071    A pair (cuOffset, i) is added to 'tydict'.
   2072 
   2073    Hence 'tyadmins' holds the actual type entities, and 'tydict' holds
   2074    a mapping from cuOffset to the index of the corresponding entry in
   2075    'tyadmin'.
   2076 
   2077    When resolving a cuOffset to a TyAdmin, first look up the cuOffset
   2078    in the tydict (by binary search).  This gives an index into
   2079    tyadmins, and the required entity lives in tyadmins at that index.
   2080 */
   2081 __attribute__((noinline))
   2082 static void parse_type_DIE ( /*MOD*/XArray* /* of TyEnt */ tyents,
   2083                              /*MOD*/D3TypeParser* parser,
   2084                              DW_TAG dtag,
   2085                              UWord posn,
   2086                              Int level,
   2087                              Cursor* c_die,
   2088                              Cursor* c_abbv,
   2089                              CUConst* cc,
   2090                              Bool td3 )
   2091 {
   2092    ULong cts;
   2093    Int   ctsSzB;
   2094    UWord ctsMemSzB;
   2095    TyEnt typeE;
   2096    TyEnt atomE;
   2097    TyEnt fieldE;
   2098    TyEnt boundE;
   2099 
   2100    UWord saved_die_c_offset  = get_position_of_Cursor( c_die );
   2101    UWord saved_abbv_c_offset = get_position_of_Cursor( c_abbv );
   2102 
   2103    VG_(memset)( &typeE,  0xAA, sizeof(typeE) );
   2104    VG_(memset)( &atomE,  0xAA, sizeof(atomE) );
   2105    VG_(memset)( &fieldE, 0xAA, sizeof(fieldE) );
   2106    VG_(memset)( &boundE, 0xAA, sizeof(boundE) );
   2107 
   2108    /* If we've returned to a level at or above any previously noted
   2109       parent, un-note it, so we don't believe we're still collecting
   2110       its children. */
   2111    typestack_preen( parser, td3, level-1 );
   2112 
   2113    if (dtag == DW_TAG_compile_unit) {
   2114       /* See if we can find DW_AT_language, since it is important for
   2115          establishing array bounds (see DW_TAG_subrange_type below in
   2116          this fn) */
   2117       while (True) {
   2118          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   2119          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   2120          if (attr == 0 && form == 0) break;
   2121          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
   2122                             cc, c_die, False/*td3*/, form );
   2123          if (attr != DW_AT_language)
   2124             continue;
   2125          if (ctsSzB == 0)
   2126            goto bad_DIE;
   2127          switch (cts) {
   2128             case DW_LANG_C89: case DW_LANG_C:
   2129             case DW_LANG_C_plus_plus: case DW_LANG_ObjC:
   2130             case DW_LANG_ObjC_plus_plus: case DW_LANG_UPC:
   2131             case DW_LANG_Upc: case DW_LANG_C99:
   2132                parser->language = 'C'; break;
   2133             case DW_LANG_Fortran77: case DW_LANG_Fortran90:
   2134             case DW_LANG_Fortran95:
   2135                parser->language = 'F'; break;
   2136             case DW_LANG_Ada83: case DW_LANG_Ada95:
   2137                parser->language = 'A'; break;
   2138             case DW_LANG_Cobol74:
   2139             case DW_LANG_Cobol85: case DW_LANG_Pascal83:
   2140             case DW_LANG_Modula2: case DW_LANG_Java:
   2141             case DW_LANG_PLI:
   2142             case DW_LANG_D: case DW_LANG_Python:
   2143             case DW_LANG_Mips_Assembler:
   2144                parser->language = '?'; break;
   2145             default:
   2146                goto bad_DIE;
   2147          }
   2148       }
   2149    }
   2150 
   2151    if (dtag == DW_TAG_base_type) {
   2152       /* We can pick up a new base type any time. */
   2153       VG_(memset)(&typeE, 0, sizeof(typeE));
   2154       typeE.cuOff = D3_INVALID_CUOFF;
   2155       typeE.tag   = Te_TyBase;
   2156       while (True) {
   2157          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   2158          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   2159          if (attr == 0 && form == 0) break;
   2160          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
   2161                             cc, c_die, False/*td3*/, form );
   2162          if (attr == DW_AT_name && ctsMemSzB > 0) {
   2163             typeE.Te.TyBase.name
   2164                = ML_(dinfo_strdup)( "di.readdwarf3.ptD.base_type.1",
   2165                                     (UChar*)(UWord)cts );
   2166          }
   2167          if (attr == DW_AT_byte_size && ctsSzB > 0) {
   2168             typeE.Te.TyBase.szB = cts;
   2169          }
   2170          if (attr == DW_AT_encoding && ctsSzB > 0) {
   2171             switch (cts) {
   2172                case DW_ATE_unsigned: case DW_ATE_unsigned_char:
   2173                case DW_ATE_UTF: /* since DWARF4, e.g. char16_t from C++ */
   2174                case DW_ATE_boolean:/* FIXME - is this correct? */
   2175                   typeE.Te.TyBase.enc = 'U'; break;
   2176                case DW_ATE_signed: case DW_ATE_signed_char:
   2177                   typeE.Te.TyBase.enc = 'S'; break;
   2178                case DW_ATE_float:
   2179                   typeE.Te.TyBase.enc = 'F'; break;
   2180                case DW_ATE_complex_float:
   2181                   typeE.Te.TyBase.enc = 'C'; break;
   2182                default:
   2183                   goto bad_DIE;
   2184             }
   2185          }
   2186       }
   2187 
   2188       /* Invent a name if it doesn't have one.  gcc-4.3
   2189          -ftree-vectorize is observed to emit nameless base types. */
   2190       if (!typeE.Te.TyBase.name)
   2191          typeE.Te.TyBase.name
   2192             = ML_(dinfo_strdup)( "di.readdwarf3.ptD.base_type.2",
   2193                                  "<anon_base_type>" );
   2194 
   2195       /* Do we have something that looks sane? */
   2196       if (/* must have a name */
   2197           typeE.Te.TyBase.name == NULL
   2198           /* and a plausible size.  Yes, really 32: "complex long
   2199              double" apparently has size=32 */
   2200           || typeE.Te.TyBase.szB < 0 || typeE.Te.TyBase.szB > 32
   2201           /* and a plausible encoding */
   2202           || (typeE.Te.TyBase.enc != 'U'
   2203               && typeE.Te.TyBase.enc != 'S'
   2204               && typeE.Te.TyBase.enc != 'F'
   2205               && typeE.Te.TyBase.enc != 'C'))
   2206          goto bad_DIE;
   2207       /* Last minute hack: if we see this
   2208          <1><515>: DW_TAG_base_type
   2209              DW_AT_byte_size   : 0
   2210              DW_AT_encoding    : 5
   2211              DW_AT_name        : void
   2212          convert it into a real Void type. */
   2213       if (typeE.Te.TyBase.szB == 0
   2214           && 0 == VG_(strcmp)("void", typeE.Te.TyBase.name)) {
   2215          ML_(TyEnt__make_EMPTY)(&typeE);
   2216          typeE.tag = Te_TyVoid;
   2217          typeE.Te.TyVoid.isFake = False; /* it's a real one! */
   2218       }
   2219 
   2220       goto acquire_Type;
   2221    }
   2222 
   2223    if (dtag == DW_TAG_pointer_type || dtag == DW_TAG_reference_type
   2224        || dtag == DW_TAG_ptr_to_member_type) {
   2225       /* This seems legit for _pointer_type and _reference_type.  I
   2226          don't know if rolling _ptr_to_member_type in here really is
   2227          legit, but it's better than not handling it at all. */
   2228       VG_(memset)(&typeE, 0, sizeof(typeE));
   2229       typeE.cuOff = D3_INVALID_CUOFF;
   2230       typeE.tag   = Te_TyPorR;
   2231       /* target type defaults to void */
   2232       typeE.Te.TyPorR.typeR = D3_FAKEVOID_CUOFF;
   2233       typeE.Te.TyPorR.isPtr = dtag == DW_TAG_pointer_type
   2234                               || dtag == DW_TAG_ptr_to_member_type;
   2235       /* These three type kinds don't *have* to specify their size, in
   2236          which case we assume it's a machine word.  But if they do
   2237          specify it, it must be a machine word :-)  This probably
   2238          assumes that the word size of the Dwarf3 we're reading is the
   2239          same size as that on the machine.  gcc appears to give a size
   2240          whereas icc9 doesn't. */
   2241       typeE.Te.TyPorR.szB = sizeof(UWord);
   2242       while (True) {
   2243          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   2244          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   2245          if (attr == 0 && form == 0) break;
   2246          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
   2247                             cc, c_die, False/*td3*/, form );
   2248          if (attr == DW_AT_byte_size && ctsSzB > 0) {
   2249             typeE.Te.TyPorR.szB = cts;
   2250          }
   2251          if (attr == DW_AT_type && ctsSzB > 0) {
   2252             typeE.Te.TyPorR.typeR = (UWord)cts;
   2253          }
   2254       }
   2255       /* Do we have something that looks sane? */
   2256       if (typeE.Te.TyPorR.szB != sizeof(UWord))
   2257          goto bad_DIE;
   2258       else
   2259          goto acquire_Type;
   2260    }
   2261 
   2262    if (dtag == DW_TAG_enumeration_type) {
   2263       /* Create a new Type to hold the results. */
   2264       VG_(memset)(&typeE, 0, sizeof(typeE));
   2265       typeE.cuOff = posn;
   2266       typeE.tag   = Te_TyEnum;
   2267       typeE.Te.TyEnum.atomRs
   2268          = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.enum_type.1",
   2269                        ML_(dinfo_free),
   2270                        sizeof(UWord) );
   2271       while (True) {
   2272          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   2273          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   2274          if (attr == 0 && form == 0) break;
   2275          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
   2276                             cc, c_die, False/*td3*/, form );
   2277          if (attr == DW_AT_name && ctsMemSzB > 0) {
   2278             typeE.Te.TyEnum.name
   2279               = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enum_type.2",
   2280                                    (UChar*)(UWord)cts );
   2281          }
   2282          if (attr == DW_AT_byte_size && ctsSzB > 0) {
   2283             typeE.Te.TyEnum.szB = cts;
   2284          }
   2285       }
   2286 
   2287       if (!typeE.Te.TyEnum.name)
   2288          typeE.Te.TyEnum.name
   2289             = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enum_type.3",
   2290                                  "<anon_enum_type>" );
   2291 
   2292       /* Do we have something that looks sane? */
   2293       if (typeE.Te.TyEnum.szB == 0
   2294           /* we must know the size */
   2295           /* but not for Ada, which uses such dummy
   2296              enumerations as helper for gdb ada mode. */
   2297           && parser->language != 'A')
   2298          goto bad_DIE;
   2299       /* On't stack! */
   2300       typestack_push( cc, parser, td3, &typeE, level );
   2301       goto acquire_Type;
   2302    }
   2303 
   2304    /* gcc (GCC) 4.4.0 20081017 (experimental) occasionally produces
   2305       DW_TAG_enumerator with only a DW_AT_name but no
   2306       DW_AT_const_value.  This is in violation of the Dwarf3 standard,
   2307       and appears to be a new "feature" of gcc - versions 4.3.x and
   2308       earlier do not appear to do this.  So accept DW_TAG_enumerator
   2309       which only have a name but no value.  An example:
   2310 
   2311       <1><180>: Abbrev Number: 6 (DW_TAG_enumeration_type)
   2312          <181>   DW_AT_name        : (indirect string, offset: 0xda70):
   2313                                      QtMsgType
   2314          <185>   DW_AT_byte_size   : 4
   2315          <186>   DW_AT_decl_file   : 14
   2316          <187>   DW_AT_decl_line   : 1480
   2317          <189>   DW_AT_sibling     : <0x1a7>
   2318       <2><18d>: Abbrev Number: 7 (DW_TAG_enumerator)
   2319          <18e>   DW_AT_name        : (indirect string, offset: 0x9e18):
   2320                                      QtDebugMsg
   2321       <2><192>: Abbrev Number: 7 (DW_TAG_enumerator)
   2322          <193>   DW_AT_name        : (indirect string, offset: 0x1505f):
   2323                                      QtWarningMsg
   2324       <2><197>: Abbrev Number: 7 (DW_TAG_enumerator)
   2325          <198>   DW_AT_name        : (indirect string, offset: 0x16f4a):
   2326                                      QtCriticalMsg
   2327       <2><19c>: Abbrev Number: 7 (DW_TAG_enumerator)
   2328          <19d>   DW_AT_name        : (indirect string, offset: 0x156dd):
   2329                                      QtFatalMsg
   2330       <2><1a1>: Abbrev Number: 7 (DW_TAG_enumerator)
   2331          <1a2>   DW_AT_name        : (indirect string, offset: 0x13660):
   2332                                      QtSystemMsg
   2333    */
   2334    if (dtag == DW_TAG_enumerator) {
   2335       VG_(memset)( &atomE, 0, sizeof(atomE) );
   2336       atomE.cuOff = posn;
   2337       atomE.tag   = Te_Atom;
   2338       while (True) {
   2339          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   2340          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   2341          if (attr == 0 && form == 0) break;
   2342          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
   2343                             cc, c_die, False/*td3*/, form );
   2344          if (attr == DW_AT_name && ctsMemSzB > 0) {
   2345             atomE.Te.Atom.name
   2346               = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enumerator.1",
   2347                                    (UChar*)(UWord)cts );
   2348          }
   2349          if (attr == DW_AT_const_value && ctsSzB > 0) {
   2350             atomE.Te.Atom.value = cts;
   2351             atomE.Te.Atom.valueKnown = True;
   2352          }
   2353       }
   2354       /* Do we have something that looks sane? */
   2355       if (atomE.Te.Atom.name == NULL)
   2356          goto bad_DIE;
   2357       /* Do we have a plausible parent? */
   2358       if (typestack_is_empty(parser)) goto bad_DIE;
   2359       vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
   2360       vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
   2361       if (level != parser->qlevel[parser->sp]+1) goto bad_DIE;
   2362       if (parser->qparentE[parser->sp].tag != Te_TyEnum) goto bad_DIE;
   2363       /* Record this child in the parent */
   2364       vg_assert(parser->qparentE[parser->sp].Te.TyEnum.atomRs);
   2365       VG_(addToXA)( parser->qparentE[parser->sp].Te.TyEnum.atomRs,
   2366                     &atomE );
   2367       /* And record the child itself */
   2368       goto acquire_Atom;
   2369    }
   2370 
   2371    /* Treat DW_TAG_class_type as if it was a DW_TAG_structure_type.  I
   2372       don't know if this is correct, but it at least makes this reader
   2373       usable for gcc-4.3 produced Dwarf3. */
   2374    if (dtag == DW_TAG_structure_type || dtag == DW_TAG_class_type
   2375        || dtag == DW_TAG_union_type) {
   2376       Bool have_szB = False;
   2377       Bool is_decl  = False;
   2378       Bool is_spec  = False;
   2379       /* Create a new Type to hold the results. */
   2380       VG_(memset)(&typeE, 0, sizeof(typeE));
   2381       typeE.cuOff = posn;
   2382       typeE.tag   = Te_TyStOrUn;
   2383       typeE.Te.TyStOrUn.name = NULL;
   2384       typeE.Te.TyStOrUn.fieldRs
   2385          = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.pTD.struct_type.1",
   2386                        ML_(dinfo_free),
   2387                        sizeof(UWord) );
   2388       typeE.Te.TyStOrUn.complete = True;
   2389       typeE.Te.TyStOrUn.isStruct = dtag == DW_TAG_structure_type
   2390                                    || dtag == DW_TAG_class_type;
   2391       while (True) {
   2392          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   2393          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   2394          if (attr == 0 && form == 0) break;
   2395          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
   2396                             cc, c_die, False/*td3*/, form );
   2397          if (attr == DW_AT_name && ctsMemSzB > 0) {
   2398             typeE.Te.TyStOrUn.name
   2399                = ML_(dinfo_strdup)( "di.readdwarf3.ptD.struct_type.2",
   2400                                     (UChar*)(UWord)cts );
   2401          }
   2402          if (attr == DW_AT_byte_size && ctsSzB >= 0) {
   2403             typeE.Te.TyStOrUn.szB = cts;
   2404             have_szB = True;
   2405          }
   2406          if (attr == DW_AT_declaration && ctsSzB > 0 && cts > 0) {
   2407             is_decl = True;
   2408          }
   2409          if (attr == DW_AT_specification && ctsSzB > 0 && cts > 0) {
   2410             is_spec = True;
   2411          }
   2412       }
   2413       /* Do we have something that looks sane? */
   2414       if (is_decl && (!is_spec)) {
   2415          /* It's a DW_AT_declaration.  We require the name but
   2416             nothing else. */
   2417          if (typeE.Te.TyStOrUn.name == NULL)
   2418             goto bad_DIE;
   2419          typeE.Te.TyStOrUn.complete = False;
   2420          /* JRS 2009 Aug 10: <possible kludge>? */
   2421          /* Push this tyent on the stack, even though it's incomplete.
   2422             It appears that gcc-4.4 on Fedora 11 will sometimes create
   2423             DW_TAG_member entries for it, and so we need to have a
   2424             plausible parent present in order for that to work.  See
   2425             #200029 comments 8 and 9. */
   2426          typestack_push( cc, parser, td3, &typeE, level );
   2427          /* </possible kludge> */
   2428          goto acquire_Type;
   2429       }
   2430       if ((!is_decl) /* && (!is_spec) */) {
   2431          /* this is the common, ordinary case */
   2432          if ((!have_szB) /* we must know the size */
   2433              /* But the name can be present, or not */)
   2434             goto bad_DIE;
   2435          /* On't stack! */
   2436          typestack_push( cc, parser, td3, &typeE, level );
   2437          goto acquire_Type;
   2438       }
   2439       else {
   2440          /* don't know how to handle any other variants just now */
   2441          goto bad_DIE;
   2442       }
   2443    }
   2444 
   2445    if (dtag == DW_TAG_member) {
   2446       /* Acquire member entries for both DW_TAG_structure_type and
   2447          DW_TAG_union_type.  They differ minorly, in that struct
   2448          members must have a DW_AT_data_member_location expression
   2449          whereas union members must not. */
   2450       Bool parent_is_struct;
   2451       VG_(memset)( &fieldE, 0, sizeof(fieldE) );
   2452       fieldE.cuOff = posn;
   2453       fieldE.tag   = Te_Field;
   2454       fieldE.Te.Field.typeR = D3_INVALID_CUOFF;
   2455       while (True) {
   2456          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   2457          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   2458          if (attr == 0 && form == 0) break;
   2459          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
   2460                             cc, c_die, False/*td3*/, form );
   2461          if (attr == DW_AT_name && ctsMemSzB > 0) {
   2462             fieldE.Te.Field.name
   2463                = ML_(dinfo_strdup)( "di.readdwarf3.ptD.member.1",
   2464                                     (UChar*)(UWord)cts );
   2465          }
   2466          if (attr == DW_AT_type && ctsSzB > 0) {
   2467             fieldE.Te.Field.typeR = (UWord)cts;
   2468          }
   2469          /* There are 2 different cases for DW_AT_data_member_location.
   2470             If it is a constant class attribute, it contains byte offset
   2471             from the beginning of the containing entity.
   2472             Otherwise it is a location expression.  */
   2473          if (attr == DW_AT_data_member_location && ctsSzB > 0) {
   2474             fieldE.Te.Field.nLoc = -1;
   2475             fieldE.Te.Field.pos.offset = cts;
   2476          } else if (attr == DW_AT_data_member_location && ctsMemSzB > 0) {
   2477             fieldE.Te.Field.nLoc = (UWord)ctsMemSzB;
   2478             fieldE.Te.Field.pos.loc
   2479                = ML_(dinfo_memdup)( "di.readdwarf3.ptD.member.2",
   2480                                     (UChar*)(UWord)cts,
   2481                                     (SizeT)fieldE.Te.Field.nLoc );
   2482          }
   2483       }
   2484       /* Do we have a plausible parent? */
   2485       if (typestack_is_empty(parser)) goto bad_DIE;
   2486       vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
   2487       vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
   2488       if (level != parser->qlevel[parser->sp]+1) goto bad_DIE;
   2489       if (parser->qparentE[parser->sp].tag != Te_TyStOrUn) goto bad_DIE;
   2490       /* Do we have something that looks sane?  If this a member of a
   2491          struct, we must have a location expression; but if a member
   2492          of a union that is irrelevant (D3 spec sec 5.6.6).  We ought
   2493          to reject in the latter case, but some compilers have been
   2494          observed to emit constant-zero expressions.  So just ignore
   2495          them. */
   2496       parent_is_struct
   2497          = parser->qparentE[parser->sp].Te.TyStOrUn.isStruct;
   2498       if (!fieldE.Te.Field.name)
   2499          fieldE.Te.Field.name
   2500             = ML_(dinfo_strdup)( "di.readdwarf3.ptD.member.3",
   2501                                  "<anon_field>" );
   2502       vg_assert(fieldE.Te.Field.name);
   2503       if (fieldE.Te.Field.typeR == D3_INVALID_CUOFF)
   2504          goto bad_DIE;
   2505       if (fieldE.Te.Field.nLoc) {
   2506          if (!parent_is_struct) {
   2507             /* If this is a union type, pretend we haven't seen the data
   2508                member location expression, as it is by definition
   2509                redundant (it must be zero). */
   2510             if (fieldE.Te.Field.nLoc > 0)
   2511                ML_(dinfo_free)(fieldE.Te.Field.pos.loc);
   2512             fieldE.Te.Field.pos.loc = NULL;
   2513             fieldE.Te.Field.nLoc = 0;
   2514          }
   2515          /* Record this child in the parent */
   2516          fieldE.Te.Field.isStruct = parent_is_struct;
   2517          vg_assert(parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs);
   2518          VG_(addToXA)( parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs,
   2519                        &posn );
   2520          /* And record the child itself */
   2521          goto acquire_Field;
   2522       } else {
   2523          /* Member with no location - this can happen with static
   2524             const members in C++ code which are compile time constants
   2525             that do no exist in the class. They're not of any interest
   2526             to us so we ignore them. */
   2527       }
   2528    }
   2529 
   2530    if (dtag == DW_TAG_array_type) {
   2531       VG_(memset)(&typeE, 0, sizeof(typeE));
   2532       typeE.cuOff = posn;
   2533       typeE.tag   = Te_TyArray;
   2534       typeE.Te.TyArray.typeR = D3_INVALID_CUOFF;
   2535       typeE.Te.TyArray.boundRs
   2536          = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.array_type.1",
   2537                        ML_(dinfo_free),
   2538                        sizeof(UWord) );
   2539       while (True) {
   2540          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   2541          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   2542          if (attr == 0 && form == 0) break;
   2543          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
   2544                             cc, c_die, False/*td3*/, form );
   2545          if (attr == DW_AT_type && ctsSzB > 0) {
   2546             typeE.Te.TyArray.typeR = (UWord)cts;
   2547          }
   2548       }
   2549       if (typeE.Te.TyArray.typeR == D3_INVALID_CUOFF)
   2550          goto bad_DIE;
   2551       /* On't stack! */
   2552       typestack_push( cc, parser, td3, &typeE, level );
   2553       goto acquire_Type;
   2554    }
   2555 
   2556    /* this is a subrange type defining the bounds of an array. */
   2557    if (dtag == DW_TAG_subrange_type
   2558        && subrange_type_denotes_array_bounds(parser, dtag)) {
   2559       Bool have_lower = False;
   2560       Bool have_upper = False;
   2561       Bool have_count = False;
   2562       Long lower = 0;
   2563       Long upper = 0;
   2564 
   2565       switch (parser->language) {
   2566          case 'C': have_lower = True;  lower = 0; break;
   2567          case 'F': have_lower = True;  lower = 1; break;
   2568          case '?': have_lower = False; break;
   2569          case 'A': have_lower = False; break;
   2570          default:  vg_assert(0); /* assured us by handling of
   2571                                     DW_TAG_compile_unit in this fn */
   2572       }
   2573 
   2574       VG_(memset)( &boundE, 0, sizeof(boundE) );
   2575       boundE.cuOff = D3_INVALID_CUOFF;
   2576       boundE.tag   = Te_Bound;
   2577       while (True) {
   2578          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   2579          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   2580          if (attr == 0 && form == 0) break;
   2581          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
   2582                             cc, c_die, False/*td3*/, form );
   2583          if (attr == DW_AT_lower_bound && ctsSzB > 0) {
   2584             lower      = (Long)cts;
   2585             have_lower = True;
   2586          }
   2587          if (attr == DW_AT_upper_bound && ctsSzB > 0) {
   2588             upper      = (Long)cts;
   2589             have_upper = True;
   2590          }
   2591          if (attr == DW_AT_count && ctsSzB > 0) {
   2592             /*count    = (Long)cts;*/
   2593             have_count = True;
   2594          }
   2595       }
   2596       /* FIXME: potentially skip the rest if no parent present, since
   2597          it could be the case that this subrange type is free-standing
   2598          (not being used to describe the bounds of a containing array
   2599          type) */
   2600       /* Do we have a plausible parent? */
   2601       if (typestack_is_empty(parser)) goto bad_DIE;
   2602       vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
   2603       vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
   2604       if (level != parser->qlevel[parser->sp]+1) goto bad_DIE;
   2605       if (parser->qparentE[parser->sp].tag != Te_TyArray) goto bad_DIE;
   2606 
   2607       /* Figure out if we have a definite range or not */
   2608       if (have_lower && have_upper && (!have_count)) {
   2609          boundE.Te.Bound.knownL = True;
   2610          boundE.Te.Bound.knownU = True;
   2611          boundE.Te.Bound.boundL = lower;
   2612          boundE.Te.Bound.boundU = upper;
   2613       }
   2614       else if (have_lower && (!have_upper) && (!have_count)) {
   2615          boundE.Te.Bound.knownL = True;
   2616          boundE.Te.Bound.knownU = False;
   2617          boundE.Te.Bound.boundL = lower;
   2618          boundE.Te.Bound.boundU = 0;
   2619       }
   2620       else if ((!have_lower) && have_upper && (!have_count)) {
   2621          boundE.Te.Bound.knownL = False;
   2622          boundE.Te.Bound.knownU = True;
   2623          boundE.Te.Bound.boundL = 0;
   2624          boundE.Te.Bound.boundU = upper;
   2625       }
   2626       else if ((!have_lower) && (!have_upper) && (!have_count)) {
   2627          boundE.Te.Bound.knownL = False;
   2628          boundE.Te.Bound.knownU = False;
   2629          boundE.Te.Bound.boundL = 0;
   2630          boundE.Te.Bound.boundU = 0;
   2631       } else {
   2632          /* FIXME: handle more cases */
   2633          goto bad_DIE;
   2634       }
   2635 
   2636       /* Record this bound in the parent */
   2637       boundE.cuOff = posn;
   2638       vg_assert(parser->qparentE[parser->sp].Te.TyArray.boundRs);
   2639       VG_(addToXA)( parser->qparentE[parser->sp].Te.TyArray.boundRs,
   2640                     &boundE );
   2641       /* And record the child itself */
   2642       goto acquire_Bound;
   2643    }
   2644 
   2645    /* typedef or subrange_type other than array bounds. */
   2646    if (dtag == DW_TAG_typedef
   2647        || (dtag == DW_TAG_subrange_type
   2648            && !subrange_type_denotes_array_bounds(parser, dtag))) {
   2649       /* subrange_type other than array bound is only for Ada. */
   2650       vg_assert (dtag == DW_TAG_typedef || parser->language == 'A');
   2651       /* We can pick up a new typedef/subrange_type any time. */
   2652       VG_(memset)(&typeE, 0, sizeof(typeE));
   2653       typeE.cuOff = D3_INVALID_CUOFF;
   2654       typeE.tag   = Te_TyTyDef;
   2655       typeE.Te.TyTyDef.name = NULL;
   2656       typeE.Te.TyTyDef.typeR = D3_INVALID_CUOFF;
   2657       while (True) {
   2658          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   2659          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   2660          if (attr == 0 && form == 0) break;
   2661          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
   2662                             cc, c_die, False/*td3*/, form );
   2663          if (attr == DW_AT_name && ctsMemSzB > 0) {
   2664             typeE.Te.TyTyDef.name
   2665                = ML_(dinfo_strdup)( "di.readdwarf3.ptD.typedef.1",
   2666                                     (UChar*)(UWord)cts );
   2667          }
   2668          if (attr == DW_AT_type && ctsSzB > 0) {
   2669             typeE.Te.TyTyDef.typeR = (UWord)cts;
   2670          }
   2671       }
   2672       /* Do we have something that looks sane? */
   2673       if (/* must have a name */
   2674           typeE.Te.TyTyDef.name == NULL
   2675           /* However gcc gnat Ada generates minimal typedef
   2676              such as the below => accept no name for Ada.
   2677              <6><91cc>: DW_TAG_typedef
   2678                 DW_AT_abstract_ori: <9066>
   2679           */
   2680           && parser->language != 'A'
   2681           /* but the referred-to type can be absent */)
   2682          goto bad_DIE;
   2683       else
   2684          goto acquire_Type;
   2685    }
   2686 
   2687    if (dtag == DW_TAG_subroutine_type) {
   2688       /* function type? just record that one fact and ask no
   2689          further questions. */
   2690       VG_(memset)(&typeE, 0, sizeof(typeE));
   2691       typeE.cuOff = D3_INVALID_CUOFF;
   2692       typeE.tag   = Te_TyFn;
   2693       goto acquire_Type;
   2694    }
   2695 
   2696    if (dtag == DW_TAG_volatile_type || dtag == DW_TAG_const_type) {
   2697       Int have_ty = 0;
   2698       VG_(memset)(&typeE, 0, sizeof(typeE));
   2699       typeE.cuOff = D3_INVALID_CUOFF;
   2700       typeE.tag   = Te_TyQual;
   2701       typeE.Te.TyQual.qual
   2702          = dtag == DW_TAG_volatile_type ? 'V' : 'C';
   2703       /* target type defaults to 'void' */
   2704       typeE.Te.TyQual.typeR = D3_FAKEVOID_CUOFF;
   2705       while (True) {
   2706          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   2707          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   2708          if (attr == 0 && form == 0) break;
   2709          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
   2710                             cc, c_die, False/*td3*/, form );
   2711          if (attr == DW_AT_type && ctsSzB > 0) {
   2712             typeE.Te.TyQual.typeR = (UWord)cts;
   2713             have_ty++;
   2714          }
   2715       }
   2716       /* gcc sometimes generates DW_TAG_const/volatile_type without
   2717          DW_AT_type and GDB appears to interpret the type as 'const
   2718          void' (resp. 'volatile void').  So just allow it .. */
   2719       if (have_ty == 1 || have_ty == 0)
   2720          goto acquire_Type;
   2721       else
   2722          goto bad_DIE;
   2723    }
   2724 
   2725    /* else ignore this DIE */
   2726    return;
   2727    /*NOTREACHED*/
   2728 
   2729   acquire_Type:
   2730    if (0) VG_(printf)("YYYY Acquire Type\n");
   2731    vg_assert(ML_(TyEnt__is_type)( &typeE ));
   2732    vg_assert(typeE.cuOff == D3_INVALID_CUOFF || typeE.cuOff == posn);
   2733    typeE.cuOff = posn;
   2734    VG_(addToXA)( tyents, &typeE );
   2735    return;
   2736    /*NOTREACHED*/
   2737 
   2738   acquire_Atom:
   2739    if (0) VG_(printf)("YYYY Acquire Atom\n");
   2740    vg_assert(atomE.tag == Te_Atom);
   2741    vg_assert(atomE.cuOff == D3_INVALID_CUOFF || atomE.cuOff == posn);
   2742    atomE.cuOff = posn;
   2743    VG_(addToXA)( tyents, &atomE );
   2744    return;
   2745    /*NOTREACHED*/
   2746 
   2747   acquire_Field:
   2748    /* For union members, Expr should be absent */
   2749    if (0) VG_(printf)("YYYY Acquire Field\n");
   2750    vg_assert(fieldE.tag == Te_Field);
   2751    vg_assert(fieldE.Te.Field.nLoc <= 0 || fieldE.Te.Field.pos.loc != NULL);
   2752    vg_assert(fieldE.Te.Field.nLoc != 0 || fieldE.Te.Field.pos.loc == NULL);
   2753    if (fieldE.Te.Field.isStruct) {
   2754       vg_assert(fieldE.Te.Field.nLoc != 0);
   2755    } else {
   2756       vg_assert(fieldE.Te.Field.nLoc == 0);
   2757    }
   2758    vg_assert(fieldE.cuOff == D3_INVALID_CUOFF || fieldE.cuOff == posn);
   2759    fieldE.cuOff = posn;
   2760    VG_(addToXA)( tyents, &fieldE );
   2761    return;
   2762    /*NOTREACHED*/
   2763 
   2764   acquire_Bound:
   2765    if (0) VG_(printf)("YYYY Acquire Bound\n");
   2766    vg_assert(boundE.tag == Te_Bound);
   2767    vg_assert(boundE.cuOff == D3_INVALID_CUOFF || boundE.cuOff == posn);
   2768    boundE.cuOff = posn;
   2769    VG_(addToXA)( tyents, &boundE );
   2770    return;
   2771    /*NOTREACHED*/
   2772 
   2773   bad_DIE:
   2774    set_position_of_Cursor( c_die,  saved_die_c_offset );
   2775    set_position_of_Cursor( c_abbv, saved_abbv_c_offset );
   2776    VG_(printf)("\nparse_type_DIE: confused by:\n");
   2777    VG_(printf)(" <%d><%lx>: %s\n", level, posn, ML_(pp_DW_TAG)( dtag ) );
   2778    while (True) {
   2779       DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   2780       DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   2781       if (attr == 0 && form == 0) break;
   2782       VG_(printf)("     %18s: ", ML_(pp_DW_AT)(attr));
   2783       /* Get the form contents, so as to print them */
   2784       get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
   2785                          cc, c_die, True, form );
   2786       VG_(printf)("\t\n");
   2787    }
   2788    VG_(printf)("\n");
   2789    cc->barf("parse_type_DIE: confused by the above DIE");
   2790    /*NOTREACHED*/
   2791 }
   2792 
   2793 
   2794 /*------------------------------------------------------------*/
   2795 /*---                                                      ---*/
   2796 /*--- Compression of type DIE information                  ---*/
   2797 /*---                                                      ---*/
   2798 /*------------------------------------------------------------*/
   2799 
   2800 static UWord chase_cuOff ( Bool* changed,
   2801                            XArray* /* of TyEnt */ ents,
   2802                            TyEntIndexCache* ents_cache,
   2803                            UWord cuOff )
   2804 {
   2805    TyEnt* ent;
   2806    ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache, cuOff );
   2807 
   2808    if (!ent) {
   2809       VG_(printf)("chase_cuOff: no entry for 0x%05lx\n", cuOff);
   2810       *changed = False;
   2811       return cuOff;
   2812    }
   2813 
   2814    vg_assert(ent->tag != Te_EMPTY);
   2815    if (ent->tag != Te_INDIR) {
   2816       *changed = False;
   2817       return cuOff;
   2818    } else {
   2819       vg_assert(ent->Te.INDIR.indR < cuOff);
   2820       *changed = True;
   2821       return ent->Te.INDIR.indR;
   2822    }
   2823 }
   2824 
   2825 static
   2826 void chase_cuOffs_in_XArray ( Bool* changed,
   2827                               XArray* /* of TyEnt */ ents,
   2828                               TyEntIndexCache* ents_cache,
   2829                               /*MOD*/XArray* /* of UWord */ cuOffs )
   2830 {
   2831    Bool b2 = False;
   2832    Word i, n = VG_(sizeXA)( cuOffs );
   2833    for (i = 0; i < n; i++) {
   2834       Bool   b = False;
   2835       UWord* p = VG_(indexXA)( cuOffs, i );
   2836       *p = chase_cuOff( &b, ents, ents_cache, *p );
   2837       if (b)
   2838          b2 = True;
   2839    }
   2840    *changed = b2;
   2841 }
   2842 
   2843 static Bool TyEnt__subst_R_fields ( XArray* /* of TyEnt */ ents,
   2844                                     TyEntIndexCache* ents_cache,
   2845                                     /*MOD*/TyEnt* te )
   2846 {
   2847    Bool b, changed = False;
   2848    switch (te->tag) {
   2849       case Te_EMPTY:
   2850          break;
   2851       case Te_INDIR:
   2852          te->Te.INDIR.indR
   2853             = chase_cuOff( &b, ents, ents_cache, te->Te.INDIR.indR );
   2854          if (b) changed = True;
   2855          break;
   2856       case Te_UNKNOWN:
   2857          break;
   2858       case Te_Atom:
   2859          break;
   2860       case Te_Field:
   2861          te->Te.Field.typeR
   2862             = chase_cuOff( &b, ents, ents_cache, te->Te.Field.typeR );
   2863          if (b) changed = True;
   2864          break;
   2865       case Te_Bound:
   2866          break;
   2867       case Te_TyBase:
   2868          break;
   2869       case Te_TyPorR:
   2870          te->Te.TyPorR.typeR
   2871             = chase_cuOff( &b, ents, ents_cache, te->Te.TyPorR.typeR );
   2872          if (b) changed = True;
   2873          break;
   2874       case Te_TyTyDef:
   2875          te->Te.TyTyDef.typeR
   2876             = chase_cuOff( &b, ents, ents_cache, te->Te.TyTyDef.typeR );
   2877          if (b) changed = True;
   2878          break;
   2879       case Te_TyStOrUn:
   2880          chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyStOrUn.fieldRs );
   2881          if (b) changed = True;
   2882          break;
   2883       case Te_TyEnum:
   2884          chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyEnum.atomRs );
   2885          if (b) changed = True;
   2886          break;
   2887       case Te_TyArray:
   2888          te->Te.TyArray.typeR
   2889             = chase_cuOff( &b, ents, ents_cache, te->Te.TyArray.typeR );
   2890          if (b) changed = True;
   2891          chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyArray.boundRs );
   2892          if (b) changed = True;
   2893          break;
   2894       case Te_TyFn:
   2895          break;
   2896       case Te_TyQual:
   2897          te->Te.TyQual.typeR
   2898             = chase_cuOff( &b, ents, ents_cache, te->Te.TyQual.typeR );
   2899          if (b) changed = True;
   2900          break;
   2901       case Te_TyVoid:
   2902          break;
   2903       default:
   2904          ML_(pp_TyEnt)(te);
   2905          vg_assert(0);
   2906    }
   2907    return changed;
   2908 }
   2909 
   2910 /* Make a pass over 'ents'.  For each tyent, inspect the target of any
   2911    'R' or 'Rs' fields (those which refer to other tyents), and replace
   2912    any which point to INDIR nodes with the target of the indirection
   2913    (which should not itself be an indirection).  In summary, this
   2914    routine shorts out all references to indirection nodes. */
   2915 static
   2916 Word dedup_types_substitution_pass ( /*MOD*/XArray* /* of TyEnt */ ents,
   2917                                      TyEntIndexCache* ents_cache )
   2918 {
   2919    Word i, n, nChanged = 0;
   2920    Bool b;
   2921    n = VG_(sizeXA)( ents );
   2922    for (i = 0; i < n; i++) {
   2923       TyEnt* ent = VG_(indexXA)( ents, i );
   2924       vg_assert(ent->tag != Te_EMPTY);
   2925       /* We have to substitute everything, even indirections, so as to
   2926          ensure that chains of indirections don't build up. */
   2927       b = TyEnt__subst_R_fields( ents, ents_cache, ent );
   2928       if (b)
   2929          nChanged++;
   2930    }
   2931 
   2932    return nChanged;
   2933 }
   2934 
   2935 
   2936 /* Make a pass over 'ents', building a dictionary of TyEnts as we go.
   2937    Look up each new tyent in the dictionary in turn.  If it is already
   2938    in the dictionary, replace this tyent with an indirection to the
   2939    existing one, and delete any malloc'd stuff hanging off this one.
   2940    In summary, this routine commons up all tyents that are identical
   2941    as defined by TyEnt__cmp_by_all_except_cuOff. */
   2942 static
   2943 Word dedup_types_commoning_pass ( /*MOD*/XArray* /* of TyEnt */ ents )
   2944 {
   2945    Word    n, i, nDeleted;
   2946    WordFM* dict; /* TyEnt* -> void */
   2947    TyEnt*  ent;
   2948    UWord   keyW, valW;
   2949 
   2950    dict = VG_(newFM)(
   2951              ML_(dinfo_zalloc), "di.readdwarf3.dtcp.1",
   2952              ML_(dinfo_free),
   2953              (Word(*)(UWord,UWord)) ML_(TyEnt__cmp_by_all_except_cuOff)
   2954           );
   2955 
   2956    nDeleted = 0;
   2957    n = VG_(sizeXA)( ents );
   2958    for (i = 0; i < n; i++) {
   2959       ent = VG_(indexXA)( ents, i );
   2960       vg_assert(ent->tag != Te_EMPTY);
   2961 
   2962       /* Ignore indirections, although check that they are
   2963          not forming a cycle. */
   2964       if (ent->tag == Te_INDIR) {
   2965          vg_assert(ent->Te.INDIR.indR < ent->cuOff);
   2966          continue;
   2967       }
   2968 
   2969       keyW = valW = 0;
   2970       if (VG_(lookupFM)( dict, &keyW, &valW, (UWord)ent )) {
   2971          /* it's already in the dictionary. */
   2972          TyEnt* old = (TyEnt*)keyW;
   2973          vg_assert(valW == 0);
   2974          vg_assert(old != ent);
   2975          vg_assert(old->tag != Te_INDIR);
   2976          /* since we are traversing the array in increasing order of
   2977             cuOff: */
   2978          vg_assert(old->cuOff < ent->cuOff);
   2979          /* So anyway, dump this entry and replace it with an
   2980             indirection to the one in the dictionary.  Note that the
   2981             assertion above guarantees that we cannot create cycles of
   2982             indirections, since we are always creating an indirection
   2983             to a tyent with a cuOff lower than this one. */
   2984          ML_(TyEnt__make_EMPTY)( ent );
   2985          ent->tag = Te_INDIR;
   2986          ent->Te.INDIR.indR = old->cuOff;
   2987          nDeleted++;
   2988       } else {
   2989          /* not in dictionary; add it and keep going. */
   2990          VG_(addToFM)( dict, (UWord)ent, 0 );
   2991       }
   2992    }
   2993 
   2994    VG_(deleteFM)( dict, NULL, NULL );
   2995 
   2996    return nDeleted;
   2997 }
   2998 
   2999 
   3000 static
   3001 void dedup_types ( Bool td3,
   3002                    /*MOD*/XArray* /* of TyEnt */ ents,
   3003                    TyEntIndexCache* ents_cache )
   3004 {
   3005    Word m, n, i, nDel, nSubst, nThresh;
   3006    if (0) td3 = True;
   3007 
   3008    n = VG_(sizeXA)( ents );
   3009 
   3010    /* If a commoning pass and a substitution pass both make fewer than
   3011       this many changes, just stop.  It's pointless to burn up CPU
   3012       time trying to compress the last 1% or so out of the array. */
   3013    nThresh = n / 200;
   3014 
   3015    /* First we must sort .ents by its .cuOff fields, so we
   3016       can index into it. */
   3017    VG_(setCmpFnXA)(
   3018       ents,
   3019       (Int(*)(void*,void*)) ML_(TyEnt__cmp_by_cuOff_only)
   3020    );
   3021    VG_(sortXA)( ents );
   3022 
   3023    /* Now repeatedly do commoning and substitution passes over
   3024       the array, until there are no more changes. */
   3025    do {
   3026       nDel   = dedup_types_commoning_pass ( ents );
   3027       nSubst = dedup_types_substitution_pass ( ents, ents_cache );
   3028       vg_assert(nDel >= 0 && nSubst >= 0);
   3029       TRACE_D3("   %ld deletions, %ld substitutions\n", nDel, nSubst);
   3030    } while (nDel > nThresh || nSubst > nThresh);
   3031 
   3032    /* Sanity check: all INDIR nodes should point at a non-INDIR thing.
   3033       In fact this should be true at the end of every loop iteration
   3034       above (a commoning pass followed by a substitution pass), but
   3035       checking it on every iteration is excessively expensive.  Note,
   3036       this loop also computes 'm' for the stats printing below it. */
   3037    m = 0;
   3038    n = VG_(sizeXA)( ents );
   3039    for (i = 0; i < n; i++) {
   3040       TyEnt *ent, *ind;
   3041       ent = VG_(indexXA)( ents, i );
   3042       if (ent->tag != Te_INDIR) continue;
   3043       m++;
   3044       ind = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
   3045                                          ent->Te.INDIR.indR );
   3046       vg_assert(ind);
   3047       vg_assert(ind->tag != Te_INDIR);
   3048    }
   3049 
   3050    TRACE_D3("Overall: %ld before, %ld after\n", n, n-m);
   3051 }
   3052 
   3053 
   3054 /*------------------------------------------------------------*/
   3055 /*---                                                      ---*/
   3056 /*--- Resolution of references to type DIEs                ---*/
   3057 /*---                                                      ---*/
   3058 /*------------------------------------------------------------*/
   3059 
   3060 /* Make a pass through the (temporary) variables array.  Examine the
   3061    type of each variable, check is it found, and chase any Te_INDIRs.
   3062    Postcondition is: each variable has a typeR field that refers to a
   3063    valid type in tyents, or a Te_UNKNOWN, and is certainly guaranteed
   3064    not to refer to a Te_INDIR.  (This is so that we can throw all the
   3065    Te_INDIRs away later). */
   3066 
   3067 __attribute__((noinline))
   3068 static void resolve_variable_types (
   3069                void (*barf)( HChar* ) __attribute__((noreturn)),
   3070                /*R-O*/XArray* /* of TyEnt */ ents,
   3071                /*MOD*/TyEntIndexCache* ents_cache,
   3072                /*MOD*/XArray* /* of TempVar* */ vars
   3073             )
   3074 {
   3075    Word i, n;
   3076    n = VG_(sizeXA)( vars );
   3077    for (i = 0; i < n; i++) {
   3078       TempVar* var = *(TempVar**)VG_(indexXA)( vars, i );
   3079       /* This is the stated type of the variable.  But it might be
   3080          an indirection, so be careful. */
   3081       TyEnt* ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
   3082                                                 var->typeR );
   3083       if (ent && ent->tag == Te_INDIR) {
   3084          ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
   3085                                             ent->Te.INDIR.indR );
   3086          vg_assert(ent);
   3087          vg_assert(ent->tag != Te_INDIR);
   3088       }
   3089 
   3090       /* Deal first with "normal" cases */
   3091       if (ent && ML_(TyEnt__is_type)(ent)) {
   3092          var->typeR = ent->cuOff;
   3093          continue;
   3094       }
   3095 
   3096       /* If there's no ent, it probably we did not manage to read a
   3097          type at the cuOffset which is stated as being this variable's
   3098          type.  Maybe a deficiency in parse_type_DIE.  Complain. */
   3099       if (ent == NULL) {
   3100          VG_(printf)("\n: Invalid cuOff = 0x%05lx\n", var->typeR );
   3101          barf("resolve_variable_types: "
   3102               "cuOff does not refer to a known type");
   3103       }
   3104       vg_assert(ent);
   3105       /* If ent has any other tag, something bad happened, along the
   3106          lines of var->typeR not referring to a type at all. */
   3107       vg_assert(ent->tag == Te_UNKNOWN);
   3108       /* Just accept it; the type will be useless, but at least keep
   3109          going. */
   3110       var->typeR = ent->cuOff;
   3111    }
   3112 }
   3113 
   3114 
   3115 /*------------------------------------------------------------*/
   3116 /*---                                                      ---*/
   3117 /*--- Parsing of Compilation Units                         ---*/
   3118 /*---                                                      ---*/
   3119 /*------------------------------------------------------------*/
   3120 
   3121 static Int cmp_TempVar_by_dioff ( void* v1, void* v2 ) {
   3122    TempVar* t1 = *(TempVar**)v1;
   3123    TempVar* t2 = *(TempVar**)v2;
   3124    if (t1->dioff < t2->dioff) return -1;
   3125    if (t1->dioff > t2->dioff) return 1;
   3126    return 0;
   3127 }
   3128 
   3129 static void read_DIE (
   3130    /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree,
   3131    /*MOD*/XArray* /* of TyEnt */ tyents,
   3132    /*MOD*/XArray* /* of TempVar* */ tempvars,
   3133    /*MOD*/XArray* /* of GExpr* */ gexprs,
   3134    /*MOD*/D3TypeParser* typarser,
   3135    /*MOD*/D3VarParser* varparser,
   3136    Cursor* c, Bool td3, CUConst* cc, Int level
   3137 )
   3138 {
   3139    Cursor abbv;
   3140    ULong  atag, abbv_code;
   3141    UWord  posn;
   3142    UInt   has_children;
   3143    UWord  start_die_c_offset, start_abbv_c_offset;
   3144    UWord  after_die_c_offset, after_abbv_c_offset;
   3145 
   3146    /* --- Deal with this DIE --- */
   3147    posn      = get_position_of_Cursor( c );
   3148    abbv_code = get_ULEB128( c );
   3149    set_abbv_Cursor( &abbv, td3, cc, abbv_code );
   3150    atag      = get_ULEB128( &abbv );
   3151    TRACE_D3("\n");
   3152    TRACE_D3(" <%d><%lx>: Abbrev Number: %llu (%s)\n",
   3153             level, posn, abbv_code, ML_(pp_DW_TAG)( atag ) );
   3154 
   3155    if (atag == 0)
   3156       cc->barf("read_DIE: invalid zero tag on DIE");
   3157 
   3158    has_children = get_UChar( &abbv );
   3159    if (has_children != DW_children_no && has_children != DW_children_yes)
   3160       cc->barf("read_DIE: invalid has_children value");
   3161 
   3162    /* We're set up to look at the fields of this DIE.  Hand it off to
   3163       any parser(s) that want to see it.  Since they will in general
   3164       advance both the DIE and abbrev cursors, remember their current
   3165       settings so that we can then back up and do one final pass over
   3166       the DIE, to print out its contents. */
   3167 
   3168    start_die_c_offset  = get_position_of_Cursor( c );
   3169    start_abbv_c_offset = get_position_of_Cursor( &abbv );
   3170 
   3171    while (True) {
   3172       ULong cts;
   3173       Int   ctsSzB;
   3174       UWord ctsMemSzB;
   3175       ULong at_name = get_ULEB128( &abbv );
   3176       ULong at_form = get_ULEB128( &abbv );
   3177       if (at_name == 0 && at_form == 0) break;
   3178       TRACE_D3("     %18s: ", ML_(pp_DW_AT)(at_name));
   3179       /* Get the form contents, but ignore them; the only purpose is
   3180          to print them, if td3 is True */
   3181       get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
   3182                          cc, c, td3, (DW_FORM)at_form );
   3183       TRACE_D3("\t");
   3184       TRACE_D3("\n");
   3185    }
   3186 
   3187    after_die_c_offset  = get_position_of_Cursor( c );
   3188    after_abbv_c_offset = get_position_of_Cursor( &abbv );
   3189 
   3190    set_position_of_Cursor( c,     start_die_c_offset );
   3191    set_position_of_Cursor( &abbv, start_abbv_c_offset );
   3192 
   3193    parse_type_DIE( tyents,
   3194                    typarser,
   3195                    (DW_TAG)atag,
   3196                    posn,
   3197                    level,
   3198                    c,     /* DIE cursor */
   3199                    &abbv, /* abbrev cursor */
   3200                    cc,
   3201                    td3 );
   3202 
   3203    set_position_of_Cursor( c,     start_die_c_offset );
   3204    set_position_of_Cursor( &abbv, start_abbv_c_offset );
   3205 
   3206    parse_var_DIE( rangestree,
   3207                   tempvars,
   3208                   gexprs,
   3209                   varparser,
   3210                   (DW_TAG)atag,
   3211                   posn,
   3212                   level,
   3213                   c,     /* DIE cursor */
   3214                   &abbv, /* abbrev cursor */
   3215                   cc,
   3216                   td3 );
   3217 
   3218    set_position_of_Cursor( c,     after_die_c_offset );
   3219    set_position_of_Cursor( &abbv, after_abbv_c_offset );
   3220 
   3221    /* --- Now recurse into its children, if any --- */
   3222    if (has_children == DW_children_yes) {
   3223       if (0) TRACE_D3("BEGIN children of level %d\n", level);
   3224       while (True) {
   3225          atag = peek_ULEB128( c );
   3226          if (atag == 0) break;
   3227          read_DIE( rangestree, tyents, tempvars, gexprs,
   3228                    typarser, varparser,
   3229                    c, td3, cc, level+1 );
   3230       }
   3231       /* Now we need to eat the terminating zero */
   3232       atag = get_ULEB128( c );
   3233       vg_assert(atag == 0);
   3234       if (0) TRACE_D3("END children of level %d\n", level);
   3235    }
   3236 
   3237 }
   3238 
   3239 
   3240 static
   3241 void new_dwarf3_reader_wrk (
   3242    struct _DebugInfo* di,
   3243    __attribute__((noreturn)) void (*barf)( HChar* ),
   3244    UChar* debug_info_img,   SizeT debug_info_sz,
   3245    UChar* debug_abbv_img,   SizeT debug_abbv_sz,
   3246    UChar* debug_line_img,   SizeT debug_line_sz,
   3247    UChar* debug_str_img,    SizeT debug_str_sz,
   3248    UChar* debug_ranges_img, SizeT debug_ranges_sz,
   3249    UChar* debug_loc_img,    SizeT debug_loc_sz
   3250 )
   3251 {
   3252    XArray* /* of TyEnt */     tyents;
   3253    XArray* /* of TyEnt */     tyents_to_keep;
   3254    XArray* /* of GExpr* */    gexprs;
   3255    XArray* /* of TempVar* */  tempvars;
   3256    WordFM* /* of (XArray* of AddrRange, void) */ rangestree;
   3257    TyEntIndexCache* tyents_cache = NULL;
   3258    TyEntIndexCache* tyents_to_keep_cache = NULL;
   3259    TempVar *varp, *varp2;
   3260    GExpr* gexpr;
   3261    Cursor abbv; /* for showing .debug_abbrev */
   3262    Cursor info; /* primary cursor for parsing .debug_info */
   3263    Cursor ranges; /* for showing .debug_ranges */
   3264    D3TypeParser typarser;
   3265    D3VarParser varparser;
   3266    Addr  dr_base;
   3267    UWord dr_offset;
   3268    Word  i, j, n;
   3269    Bool td3 = di->trace_symtab;
   3270    XArray* /* of TempVar* */ dioff_lookup_tab;
   3271 #if 0
   3272    /* This doesn't work properly because it assumes all entries are
   3273       packed end to end, with no holes.  But that doesn't always
   3274       appear to be the case, so it loses sync.  And the D3 spec
   3275       doesn't appear to require a no-hole situation either. */
   3276    /* Display .debug_loc */
   3277    Addr  dl_base;
   3278    UWord dl_offset;
   3279    Cursor loc; /* for showing .debug_loc */
   3280    TRACE_SYMTAB("\n");
   3281    TRACE_SYMTAB("\n------ The contents of .debug_loc ------\n");
   3282    TRACE_SYMTAB("    Offset   Begin    End      Expression\n");
   3283    init_Cursor( &loc, debug_loc_img,
   3284                 debug_loc_sz, 0, barf,
   3285                 "Overrun whilst reading .debug_loc section(1)" );
   3286    dl_base = 0;
   3287    dl_offset = 0;
   3288    while (True) {
   3289       UWord  w1, w2;
   3290       UWord  len;
   3291       if (is_at_end_Cursor( &loc ))
   3292          break;
   3293 
   3294       /* Read a (host-)word pair.  This is something of a hack since
   3295          the word size to read is really dictated by the ELF file;
   3296          however, we assume we're reading a file with the same
   3297          word-sizeness as the host.  Reasonably enough. */
   3298       w1 = get_UWord( &loc );
   3299       w2 = get_UWord( &loc );
   3300 
   3301       if (w1 == 0 && w2 == 0) {
   3302          /* end of list.  reset 'base' */
   3303          TRACE_D3("    %08lx <End of list>\n", dl_offset);
   3304          dl_base = 0;
   3305          dl_offset = get_position_of_Cursor( &loc );
   3306          continue;
   3307       }
   3308 
   3309       if (w1 == -1UL) {
   3310          /* new value for 'base' */
   3311          TRACE_D3("    %08lx %16lx %08lx (base address)\n",
   3312                   dl_offset, w1, w2);
   3313          dl_base = w2;
   3314          continue;
   3315       }
   3316 
   3317       /* else a location expression follows */
   3318       TRACE_D3("    %08lx %08lx %08lx ",
   3319                dl_offset, w1 + dl_base, w2 + dl_base);
   3320       len = (UWord)get_UShort( &loc );
   3321       while (len > 0) {
   3322          UChar byte = get_UChar( &loc );
   3323          TRACE_D3("%02x", (UInt)byte);
   3324          len--;
   3325       }
   3326       TRACE_SYMTAB("\n");
   3327    }
   3328 #endif
   3329 
   3330    /* Display .debug_ranges */
   3331    TRACE_SYMTAB("\n");
   3332    TRACE_SYMTAB("\n------ The contents of .debug_ranges ------\n");
   3333    TRACE_SYMTAB("    Offset   Begin    End\n");
   3334    init_Cursor( &ranges, debug_ranges_img,
   3335                 debug_ranges_sz, 0, barf,
   3336                 "Overrun whilst reading .debug_ranges section(1)" );
   3337    dr_base = 0;
   3338    dr_offset = 0;
   3339    while (True) {
   3340       UWord  w1, w2;
   3341 
   3342       if (is_at_end_Cursor( &ranges ))
   3343          break;
   3344 
   3345       /* Read a (host-)word pair.  This is something of a hack since
   3346          the word size to read is really dictated by the ELF file;
   3347          however, we assume we're reading a file with the same
   3348          word-sizeness as the host.  Reasonably enough. */
   3349       w1 = get_UWord( &ranges );
   3350       w2 = get_UWord( &ranges );
   3351 
   3352       if (w1 == 0 && w2 == 0) {
   3353          /* end of list.  reset 'base' */
   3354          TRACE_D3("    %08lx <End of list>\n", dr_offset);
   3355          dr_base = 0;
   3356          dr_offset = get_position_of_Cursor( &ranges );
   3357          continue;
   3358       }
   3359 
   3360       if (w1 == -1UL) {
   3361          /* new value for 'base' */
   3362          TRACE_D3("    %08lx %16lx %08lx (base address)\n",
   3363                   dr_offset, w1, w2);
   3364          dr_base = w2;
   3365          continue;
   3366       }
   3367 
   3368       /* else a range [w1+base, w2+base) is denoted */
   3369       TRACE_D3("    %08lx %08lx %08lx\n",
   3370                dr_offset, w1 + dr_base, w2 + dr_base);
   3371    }
   3372 
   3373    /* Display .debug_abbrev */
   3374    init_Cursor( &abbv, debug_abbv_img, debug_abbv_sz, 0, barf,
   3375                 "Overrun whilst reading .debug_abbrev section" );
   3376    TRACE_SYMTAB("\n");
   3377    TRACE_SYMTAB("\n------ The contents of .debug_abbrev ------\n");
   3378    while (True) {
   3379       if (is_at_end_Cursor( &abbv ))
   3380          break;
   3381       /* Read one abbreviation table */
   3382       TRACE_D3("  Number TAG\n");
   3383       while (True) {
   3384          ULong atag;
   3385          UInt  has_children;
   3386          ULong acode = get_ULEB128( &abbv );
   3387          if (acode == 0) break; /* end of the table */
   3388          atag = get_ULEB128( &abbv );
   3389          has_children = get_UChar( &abbv );
   3390          TRACE_D3("   %llu      %s    [%s]\n",
   3391                   acode, ML_(pp_DW_TAG)(atag),
   3392                          ML_(pp_DW_children)(has_children));
   3393          while (True) {
   3394             ULong at_name = get_ULEB128( &abbv );
   3395             ULong at_form = get_ULEB128( &abbv );
   3396             if (at_name == 0 && at_form == 0) break;
   3397             TRACE_D3("    %18s %s\n",
   3398                      ML_(pp_DW_AT)(at_name), ML_(pp_DW_FORM)(at_form));
   3399          }
   3400       }
   3401    }
   3402    TRACE_SYMTAB("\n");
   3403 
   3404    /* Now loop over the Compilation Units listed in the .debug_info
   3405       section (see D3SPEC sec 7.5) paras 1 and 2.  Each compilation
   3406       unit contains a Compilation Unit Header followed by precisely
   3407       one DW_TAG_compile_unit or DW_TAG_partial_unit DIE. */
   3408    init_Cursor( &info, debug_info_img, debug_info_sz, 0, barf,
   3409                 "Overrun whilst reading .debug_info section" );
   3410 
   3411    /* We'll park the harvested type information in here.  Also create
   3412       a fake "void" entry with offset D3_FAKEVOID_CUOFF, so we always
   3413       have at least one type entry to refer to.  D3_FAKEVOID_CUOFF is
   3414       huge and presumably will not occur in any valid DWARF3 file --
   3415       it would need to have a .debug_info section 4GB long for that to
   3416       happen.  These type entries end up in the DebugInfo. */
   3417    tyents = VG_(newXA)( ML_(dinfo_zalloc),
   3418                         "di.readdwarf3.ndrw.1 (TyEnt temp array)",
   3419                         ML_(dinfo_free), sizeof(TyEnt) );
   3420    { TyEnt tyent;
   3421      VG_(memset)(&tyent, 0, sizeof(tyent));
   3422      tyent.tag   = Te_TyVoid;
   3423      tyent.cuOff = D3_FAKEVOID_CUOFF;
   3424      tyent.Te.TyVoid.isFake = True;
   3425      VG_(addToXA)( tyents, &tyent );
   3426    }
   3427    { TyEnt tyent;
   3428      VG_(memset)(&tyent, 0, sizeof(tyent));
   3429      tyent.tag   = Te_UNKNOWN;
   3430      tyent.cuOff = D3_INVALID_CUOFF;
   3431      VG_(addToXA)( tyents, &tyent );
   3432    }
   3433 
   3434    /* This is a tree used to unique-ify the range lists that are
   3435       manufactured by parse_var_DIE.  References to the keys in the
   3436       tree wind up in .rngMany fields in TempVars.  We'll need to
   3437       delete this tree, and the XArrays attached to it, at the end of
   3438       this function. */
   3439    rangestree = VG_(newFM)( ML_(dinfo_zalloc),
   3440                             "di.readdwarf3.ndrw.2 (rangestree)",
   3441                             ML_(dinfo_free),
   3442                             (Word(*)(UWord,UWord))cmp__XArrays_of_AddrRange );
   3443 
   3444    /* List of variables we're accumulating.  These don't end up in the
   3445       DebugInfo; instead their contents are handed to ML_(addVar) and
   3446       the list elements are then deleted. */
   3447    tempvars = VG_(newXA)( ML_(dinfo_zalloc),
   3448                           "di.readdwarf3.ndrw.3 (TempVar*s array)",
   3449                           ML_(dinfo_free),
   3450                           sizeof(TempVar*) );
   3451 
   3452    /* List of GExprs we're accumulating.  These wind up in the
   3453       DebugInfo. */
   3454    gexprs = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.4",
   3455                         ML_(dinfo_free), sizeof(GExpr*) );
   3456 
   3457    /* We need a D3TypeParser to keep track of partially constructed
   3458       types.  It'll be discarded as soon as we've completed the CU,
   3459       since the resulting information is tipped in to 'tyents' as it
   3460       is generated. */
   3461    VG_(memset)( &typarser, 0, sizeof(typarser) );
   3462    typarser.sp = -1;
   3463    typarser.language = '?';
   3464    for (i = 0; i < N_D3_TYPE_STACK; i++) {
   3465       typarser.qparentE[i].tag   = Te_EMPTY;
   3466       typarser.qparentE[i].cuOff = D3_INVALID_CUOFF;
   3467    }
   3468 
   3469    VG_(memset)( &varparser, 0, sizeof(varparser) );
   3470    varparser.sp = -1;
   3471 
   3472    TRACE_D3("\n------ Parsing .debug_info section ------\n");
   3473    while (True) {
   3474       UWord   cu_start_offset, cu_offset_now;
   3475       CUConst cc;
   3476       /* It may be that the stated size of this CU is larger than the
   3477          amount of stuff actually in it.  icc9 seems to generate CUs
   3478          thusly.  We use these variables to figure out if this is
   3479          indeed the case, and if so how many bytes we need to skip to
   3480          get to the start of the next CU.  Not skipping those bytes
   3481          causes us to misidentify the start of the next CU, and it all
   3482          goes badly wrong after that (not surprisingly). */
   3483       UWord cu_size_including_IniLen, cu_amount_used;
   3484 
   3485       /* It seems icc9 finishes the DIE info before debug_info_sz
   3486          bytes have been used up.  So be flexible, and declare the
   3487          sequence complete if there is not enough remaining bytes to
   3488          hold even the smallest conceivable CU header.  (11 bytes I
   3489          reckon). */
   3490       /* JRS 23Jan09: I suspect this is no longer necessary now that
   3491          the code below contains a 'while (cu_amount_used <
   3492          cu_size_including_IniLen ...'  style loop, which skips over
   3493          any leftover bytes at the end of a CU in the case where the
   3494          CU's stated size is larger than its actual size (as
   3495          determined by reading all its DIEs).  However, for prudence,
   3496          I'll leave the following test in place.  I can't see that a
   3497          CU header can be smaller than 11 bytes, so I don't think
   3498          there's any harm possible through the test -- it just adds
   3499          robustness. */
   3500       Word avail = get_remaining_length_Cursor( &info );
   3501       if (avail < 11) {
   3502          if (avail > 0)
   3503             TRACE_D3("new_dwarf3_reader_wrk: warning: "
   3504                      "%ld unused bytes after end of DIEs\n", avail);
   3505          break;
   3506       }
   3507 
   3508       /* Check the varparser's stack is in a sane state. */
   3509       vg_assert(varparser.sp == -1);
   3510       for (i = 0; i < N_D3_VAR_STACK; i++) {
   3511          vg_assert(varparser.ranges[i] == NULL);
   3512          vg_assert(varparser.level[i] == 0);
   3513       }
   3514       for (i = 0; i < N_D3_TYPE_STACK; i++) {
   3515          vg_assert(typarser.qparentE[i].cuOff == D3_INVALID_CUOFF);
   3516          vg_assert(typarser.qparentE[i].tag   == Te_EMPTY);
   3517          vg_assert(typarser.qlevel[i] == 0);
   3518       }
   3519 
   3520       cu_start_offset = get_position_of_Cursor( &info );
   3521       TRACE_D3("\n");
   3522       TRACE_D3("  Compilation Unit @ offset 0x%lx:\n", cu_start_offset);
   3523       /* parse_CU_header initialises the CU's set_abbv_Cursor cache
   3524          (saC_cache) */
   3525       parse_CU_Header( &cc, td3, &info,
   3526                        (UChar*)debug_abbv_img, debug_abbv_sz );
   3527       cc.debug_str_img    = debug_str_img;
   3528       cc.debug_str_sz     = debug_str_sz;
   3529       cc.debug_ranges_img = debug_ranges_img;
   3530       cc.debug_ranges_sz  = debug_ranges_sz;
   3531       cc.debug_loc_img    = debug_loc_img;
   3532       cc.debug_loc_sz     = debug_loc_sz;
   3533       cc.debug_line_img   = debug_line_img;
   3534       cc.debug_line_sz    = debug_line_sz;
   3535       cc.debug_info_img   = debug_info_img;
   3536       cc.debug_info_sz    = debug_info_sz;
   3537       cc.cu_start_offset  = cu_start_offset;
   3538       cc.di = di;
   3539       /* The CU's svma can be deduced by looking at the AT_low_pc
   3540          value in the top level TAG_compile_unit, which is the topmost
   3541          DIE.  We'll leave it for the 'varparser' to acquire that info
   3542          and fill it in -- since it is the only party to want to know
   3543          it. */
   3544       cc.cu_svma_known = False;
   3545       cc.cu_svma       = 0;
   3546 
   3547       /* Create a fake outermost-level range covering the entire
   3548          address range.  So we always have *something* to catch all
   3549          variable declarations. */
   3550       varstack_push( &cc, &varparser, td3,
   3551                      unitary_range_list(0UL, ~0UL),
   3552                      -1, False/*isFunc*/, NULL/*fbGX*/ );
   3553 
   3554       /* And set up the file name table.  When we come across the top
   3555          level DIE for this CU (which is what the next call to
   3556          read_DIE should process) we will copy all the file names out
   3557          of the .debug_line img area and use this table to look up the
   3558          copies when we later see filename numbers in DW_TAG_variables
   3559          etc. */
   3560       vg_assert(!varparser.filenameTable );
   3561       varparser.filenameTable
   3562          = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.5",
   3563                        ML_(dinfo_free),
   3564                        sizeof(UChar*) );
   3565       vg_assert(varparser.filenameTable);
   3566 
   3567       /* Now read the one-and-only top-level DIE for this CU. */
   3568       vg_assert(varparser.sp == 0);
   3569       read_DIE( rangestree,
   3570                 tyents, tempvars, gexprs,
   3571                 &typarser, &varparser,
   3572                 &info, td3, &cc, 0 );
   3573 
   3574       cu_offset_now = get_position_of_Cursor( &info );
   3575 
   3576       if (0) VG_(printf)("Travelled: %lu  size %llu\n",
   3577                          cu_offset_now - cc.cu_start_offset,
   3578                          cc.unit_length + (cc.is_dw64 ? 12 : 4));
   3579 
   3580       /* How big the CU claims it is .. */
   3581       cu_size_including_IniLen = cc.unit_length + (cc.is_dw64 ? 12 : 4);
   3582       /* .. vs how big we have found it to be */
   3583       cu_amount_used = cu_offset_now - cc.cu_start_offset;
   3584 
   3585       if (1) TRACE_D3("offset now %ld, d-i-size %ld\n",
   3586                       cu_offset_now, debug_info_sz);
   3587       if (cu_offset_now > debug_info_sz)
   3588          barf("toplevel DIEs beyond end of CU");
   3589 
   3590       /* If the CU is bigger than it claims to be, we've got a serious
   3591          problem. */
   3592       if (cu_amount_used > cu_size_including_IniLen)
   3593          barf("CU's actual size appears to be larger than it claims it is");
   3594 
   3595       /* If the CU is smaller than it claims to be, we need to skip some
   3596          bytes.  Loop updates cu_offset_new and cu_amount_used. */
   3597       while (cu_amount_used < cu_size_including_IniLen
   3598              && get_remaining_length_Cursor( &info ) > 0) {
   3599          if (0) VG_(printf)("SKIP\n");
   3600          (void)get_UChar( &info );
   3601          cu_offset_now = get_position_of_Cursor( &info );
   3602          cu_amount_used = cu_offset_now - cc.cu_start_offset;
   3603       }
   3604 
   3605       if (cu_offset_now == debug_info_sz)
   3606          break;
   3607 
   3608       /* Preen to level -2.  DIEs have level >= 0 so -2 cannot occur
   3609          anywhere else at all.  Our fake the-entire-address-space
   3610          range is at level -1, so preening to -2 should completely
   3611          empty the stack out. */
   3612       TRACE_D3("\n");
   3613       varstack_preen( &varparser, td3, -2 );
   3614       /* Similarly, empty the type stack out. */
   3615       typestack_preen( &typarser, td3, -2 );
   3616       /* else keep going */
   3617 
   3618       TRACE_D3("set_abbv_Cursor cache: %lu queries, %lu misses\n",
   3619                cc.saC_cache_queries, cc.saC_cache_misses);
   3620 
   3621       vg_assert(varparser.filenameTable );
   3622       VG_(deleteXA)( varparser.filenameTable );
   3623       varparser.filenameTable = NULL;
   3624    }
   3625 
   3626    /* From here on we're post-processing the stuff we got
   3627       out of the .debug_info section. */
   3628    if (td3) {
   3629       TRACE_D3("\n");
   3630       ML_(pp_TyEnts)(tyents, "Initial type entity (TyEnt) array");
   3631       TRACE_D3("\n");
   3632       TRACE_D3("------ Compressing type entries ------\n");
   3633    }
   3634 
   3635    tyents_cache = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.6",
   3636                                      sizeof(TyEntIndexCache) );
   3637    ML_(TyEntIndexCache__invalidate)( tyents_cache );
   3638    dedup_types( td3, tyents, tyents_cache );
   3639    if (td3) {
   3640       TRACE_D3("\n");
   3641       ML_(pp_TyEnts)(tyents, "After type entity (TyEnt) compression");
   3642    }
   3643 
   3644    TRACE_D3("\n");
   3645    TRACE_D3("------ Resolving the types of variables ------\n" );
   3646    resolve_variable_types( barf, tyents, tyents_cache, tempvars );
   3647 
   3648    /* Copy all the non-INDIR tyents into a new table.  For large
   3649       .so's, about 90% of the tyents will by now have been resolved to
   3650       INDIRs, and we no longer need them, and so don't need to store
   3651       them. */
   3652    tyents_to_keep
   3653       = VG_(newXA)( ML_(dinfo_zalloc),
   3654                     "di.readdwarf3.ndrw.7 (TyEnt to-keep array)",
   3655                     ML_(dinfo_free), sizeof(TyEnt) );
   3656    n = VG_(sizeXA)( tyents );
   3657    for (i = 0; i < n; i++) {
   3658       TyEnt* ent = VG_(indexXA)( tyents, i );
   3659       if (ent->tag != Te_INDIR)
   3660          VG_(addToXA)( tyents_to_keep, ent );
   3661    }
   3662 
   3663    VG_(deleteXA)( tyents );
   3664    tyents = NULL;
   3665    ML_(dinfo_free)( tyents_cache );
   3666    tyents_cache = NULL;
   3667 
   3668    /* Sort tyents_to_keep so we can lookup in it.  A complete (if
   3669       minor) waste of time, since tyents itself is sorted, but
   3670       necessary since VG_(lookupXA) refuses to cooperate if we
   3671       don't. */
   3672    VG_(setCmpFnXA)(
   3673       tyents_to_keep,
   3674       (Int(*)(void*,void*)) ML_(TyEnt__cmp_by_cuOff_only)
   3675    );
   3676    VG_(sortXA)( tyents_to_keep );
   3677 
   3678    /* Enable cacheing on tyents_to_keep */
   3679    tyents_to_keep_cache
   3680       = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.8",
   3681                            sizeof(TyEntIndexCache) );
   3682    ML_(TyEntIndexCache__invalidate)( tyents_to_keep_cache );
   3683 
   3684    /* And record the tyents in the DebugInfo.  We do this before
   3685       starting to hand variables to ML_(addVar), since if ML_(addVar)
   3686       wants to do debug printing (of the types of said vars) then it
   3687       will need the tyents.*/
   3688    vg_assert(!di->admin_tyents);
   3689    di->admin_tyents = tyents_to_keep;
   3690 
   3691    /* Bias all the location expressions. */
   3692    TRACE_D3("\n");
   3693    TRACE_D3("------ Biasing the location expressions ------\n" );
   3694 
   3695    n = VG_(sizeXA)( gexprs );
   3696    for (i = 0; i < n; i++) {
   3697       gexpr = *(GExpr**)VG_(indexXA)( gexprs, i );
   3698       bias_GX( gexpr, di );
   3699    }
   3700 
   3701    TRACE_D3("\n");
   3702    TRACE_D3("------ Acquired the following variables: ------\n\n");
   3703 
   3704    /* Park (pointers to) all the vars in an XArray, so we can look up
   3705       abstract origins quickly.  The array is sorted (hence, looked-up
   3706       by) the .dioff fields.  Since the .dioffs should be in strictly
   3707       ascending order, there is no need to sort the array after
   3708       construction.  The ascendingness is however asserted for. */
   3709    dioff_lookup_tab
   3710       = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.9",
   3711                     ML_(dinfo_free),
   3712                     sizeof(TempVar*) );
   3713    vg_assert(dioff_lookup_tab);
   3714 
   3715    n = VG_(sizeXA)( tempvars );
   3716    for (i = 0; i < n; i++) {
   3717       varp = *(TempVar**)VG_(indexXA)( tempvars, i );
   3718       if (i > 0) {
   3719          varp2 = *(TempVar**)VG_(indexXA)( tempvars, i-1 );
   3720          /* why should this hold?  Only, I think, because we've
   3721             constructed the array by reading .debug_info sequentially,
   3722             and so the array .dioff fields should reflect that, and be
   3723             strictly ascending. */
   3724          vg_assert(varp2->dioff < varp->dioff);
   3725       }
   3726       VG_(addToXA)( dioff_lookup_tab, &varp );
   3727    }
   3728    VG_(setCmpFnXA)( dioff_lookup_tab, cmp_TempVar_by_dioff );
   3729    VG_(sortXA)( dioff_lookup_tab ); /* POINTLESS; FIXME: rm */
   3730 
   3731    /* Now visit each var.  Collect up as much info as possible for
   3732       each var and hand it to ML_(addVar). */
   3733    n = VG_(sizeXA)( tempvars );
   3734    for (j = 0; j < n; j++) {
   3735       TyEnt* ent;
   3736       varp = *(TempVar**)VG_(indexXA)( tempvars, j );
   3737 
   3738       /* Possibly show .. */
   3739       if (td3) {
   3740          VG_(printf)("<%lx> addVar: level %d: %s :: ",
   3741                      varp->dioff,
   3742                      varp->level,
   3743                      varp->name ? varp->name : (UChar*)"<anon_var>" );
   3744          if (varp->typeR) {
   3745             ML_(pp_TyEnt_C_ishly)( tyents_to_keep, varp->typeR );
   3746          } else {
   3747             VG_(printf)("NULL");
   3748          }
   3749          VG_(printf)("\n  Loc=");
   3750          if (varp->gexpr) {
   3751             ML_(pp_GX)(varp->gexpr);
   3752          } else {
   3753             VG_(printf)("NULL");
   3754          }
   3755          VG_(printf)("\n");
   3756          if (varp->fbGX) {
   3757             VG_(printf)("  FrB=");
   3758             ML_(pp_GX)( varp->fbGX );
   3759             VG_(printf)("\n");
   3760          } else {
   3761             VG_(printf)("  FrB=none\n");
   3762          }
   3763          VG_(printf)("  declared at: %s:%d\n",
   3764                      varp->fName ? varp->fName : (UChar*)"NULL",
   3765                      varp->fLine );
   3766          if (varp->absOri != (UWord)D3_INVALID_CUOFF)
   3767             VG_(printf)("  abstract origin: <%lx>\n", varp->absOri);
   3768       }
   3769 
   3770       /* Skip variables which have no location.  These must be
   3771          abstract instances; they are useless as-is since with no
   3772          location they have no specified memory location.  They will
   3773          presumably be referred to via the absOri fields of other
   3774          variables. */
   3775       if (!varp->gexpr) {
   3776          TRACE_D3("  SKIP (no location)\n\n");
   3777          continue;
   3778       }
   3779 
   3780       /* So it has a location, at least.  If it refers to some other
   3781          entry through its absOri field, pull in further info through
   3782          that. */
   3783       if (varp->absOri != (UWord)D3_INVALID_CUOFF) {
   3784          Bool found;
   3785          Word ixFirst, ixLast;
   3786          TempVar key;
   3787          TempVar* keyp = &key;
   3788          TempVar *varAI;
   3789          VG_(memset)(&key, 0, sizeof(key)); /* not necessary */
   3790          key.dioff = varp->absOri; /* this is what we want to find */
   3791          found = VG_(lookupXA)( dioff_lookup_tab, &keyp,
   3792                                 &ixFirst, &ixLast );
   3793          if (!found) {
   3794             /* barf("DW_AT_abstract_origin can't be resolved"); */
   3795             TRACE_D3("  SKIP (DW_AT_abstract_origin can't be resolved)\n\n");
   3796             continue;
   3797          }
   3798          /* If the following fails, there is more than one entry with
   3799             the same dioff.  Which can't happen. */
   3800          vg_assert(ixFirst == ixLast);
   3801          varAI = *(TempVar**)VG_(indexXA)( dioff_lookup_tab, ixFirst );
   3802          /* stay sane */
   3803          vg_assert(varAI);
   3804          vg_assert(varAI->dioff == varp->absOri);
   3805 
   3806          /* Copy what useful info we can. */
   3807          if (varAI->typeR && !varp->typeR)
   3808             varp->typeR = varAI->typeR;
   3809          if (varAI->name && !varp->name)
   3810             varp->name = varAI->name;
   3811          if (varAI->fName && !varp->fName)
   3812             varp->fName = varAI->fName;
   3813          if (varAI->fLine > 0 && varp->fLine == 0)
   3814             varp->fLine = varAI->fLine;
   3815       }
   3816 
   3817       /* Give it a name if it doesn't have one. */
   3818       if (!varp->name)
   3819          varp->name = ML_(addStr)( di, "<anon_var>", -1 );
   3820 
   3821       /* So now does it have enough info to be useful? */
   3822       /* NOTE: re typeR: this is a hack.  If typeR is Te_UNKNOWN then
   3823          the type didn't get resolved.  Really, in that case
   3824          something's broken earlier on, and should be fixed, rather
   3825          than just skipping the variable. */
   3826       ent = ML_(TyEnts__index_by_cuOff)( tyents_to_keep,
   3827                                          tyents_to_keep_cache,
   3828                                          varp->typeR );
   3829       /* The next two assertions should be guaranteed by
   3830          our previous call to resolve_variable_types. */
   3831       vg_assert(ent);
   3832       vg_assert(ML_(TyEnt__is_type)(ent) || ent->tag == Te_UNKNOWN);
   3833 
   3834       if (ent->tag == Te_UNKNOWN) continue;
   3835 
   3836       vg_assert(varp->gexpr);
   3837       vg_assert(varp->name);
   3838       vg_assert(varp->typeR);
   3839       vg_assert(varp->level >= 0);
   3840 
   3841       /* Ok.  So we're going to keep it.  Call ML_(addVar) once for
   3842          each address range in which the variable exists. */
   3843       TRACE_D3("  ACQUIRE for range(s) ");
   3844       { AddrRange  oneRange;
   3845         AddrRange* varPcRanges;
   3846         Word       nVarPcRanges;
   3847         /* Set up to iterate over address ranges, however
   3848            represented. */
   3849         if (varp->nRanges == 0 || varp->nRanges == 1) {
   3850            vg_assert(!varp->rngMany);
   3851            if (varp->nRanges == 0) {
   3852               vg_assert(varp->rngOneMin == 0);
   3853               vg_assert(varp->rngOneMax == 0);
   3854            }
   3855            nVarPcRanges = varp->nRanges;
   3856            oneRange.aMin = varp->rngOneMin;
   3857            oneRange.aMax = varp->rngOneMax;
   3858            varPcRanges = &oneRange;
   3859         } else {
   3860            vg_assert(varp->rngMany);
   3861            vg_assert(varp->rngOneMin == 0);
   3862            vg_assert(varp->rngOneMax == 0);
   3863            nVarPcRanges = VG_(sizeXA)(varp->rngMany);
   3864            vg_assert(nVarPcRanges >= 2);
   3865            vg_assert(nVarPcRanges == (Word)varp->nRanges);
   3866            varPcRanges = VG_(indexXA)(varp->rngMany, 0);
   3867         }
   3868         if (varp->level == 0)
   3869            vg_assert( nVarPcRanges == 1 );
   3870         /* and iterate */
   3871         for (i = 0; i < nVarPcRanges; i++) {
   3872            Addr pcMin = varPcRanges[i].aMin;
   3873            Addr pcMax = varPcRanges[i].aMax;
   3874            vg_assert(pcMin <= pcMax);
   3875            /* Level 0 is the global address range.  So at level 0 we
   3876               don't want to bias pcMin/pcMax; but at all other levels
   3877               we do since those are derived from svmas in the Dwarf
   3878               we're reading.  Be paranoid ... */
   3879            if (varp->level == 0) {
   3880               vg_assert(pcMin == (Addr)0);
   3881               vg_assert(pcMax == ~(Addr)0);
   3882            } else {
   3883               /* vg_assert(pcMin > (Addr)0);
   3884                  No .. we can legitimately expect to see ranges like
   3885                  0x0-0x11D (pre-biasing, of course). */
   3886               vg_assert(pcMax < ~(Addr)0);
   3887            }
   3888 
   3889            /* Apply text biasing, for non-global variables. */
   3890            if (varp->level > 0) {
   3891               pcMin += di->text_debug_bias;
   3892               pcMax += di->text_debug_bias;
   3893            }
   3894 
   3895            if (i > 0 && (i%2) == 0)
   3896               TRACE_D3("\n                       ");
   3897            TRACE_D3("[%#lx,%#lx] ", pcMin, pcMax );
   3898 
   3899            ML_(addVar)(
   3900               di, varp->level,
   3901                   pcMin, pcMax,
   3902                   varp->name,  varp->typeR,
   3903                   varp->gexpr, varp->fbGX,
   3904                   varp->fName, varp->fLine, td3
   3905            );
   3906         }
   3907       }
   3908 
   3909       TRACE_D3("\n\n");
   3910       /* and move on to the next var */
   3911    }
   3912 
   3913    /* Now free all the TempVars */
   3914    n = VG_(sizeXA)( tempvars );
   3915    for (i = 0; i < n; i++) {
   3916       varp = *(TempVar**)VG_(indexXA)( tempvars, i );
   3917       ML_(dinfo_free)(varp);
   3918    }
   3919    VG_(deleteXA)( tempvars );
   3920    tempvars = NULL;
   3921 
   3922    /* and the temp lookup table */
   3923    VG_(deleteXA)( dioff_lookup_tab );
   3924 
   3925    /* and the ranges tree.  Note that we need to also free the XArrays
   3926       which constitute the keys, hence pass VG_(deleteXA) as a
   3927       key-finalizer. */
   3928    VG_(deleteFM)( rangestree, (void(*)(UWord))VG_(deleteXA), NULL );
   3929 
   3930    /* and the tyents_to_keep cache */
   3931    ML_(dinfo_free)( tyents_to_keep_cache );
   3932    tyents_to_keep_cache = NULL;
   3933 
   3934    /* and the file name table (just the array, not the entries
   3935       themselves).  (Apparently, 2008-Oct-23, varparser.filenameTable
   3936       can be NULL here, for icc9 generated Dwarf3.  Not sure what that
   3937       signifies (a deeper problem with the reader?)) */
   3938    if (varparser.filenameTable) {
   3939       VG_(deleteXA)( varparser.filenameTable );
   3940       varparser.filenameTable = NULL;
   3941    }
   3942 
   3943    /* record the GExprs in di so they can be freed later */
   3944    vg_assert(!di->admin_gexprs);
   3945    di->admin_gexprs = gexprs;
   3946 }
   3947 
   3948 
   3949 /*------------------------------------------------------------*/
   3950 /*---                                                      ---*/
   3951 /*--- The "new" DWARF3 reader -- top level control logic   ---*/
   3952 /*---                                                      ---*/
   3953 /*------------------------------------------------------------*/
   3954 
   3955 /* --- !!! --- EXTERNAL HEADERS start --- !!! --- */
   3956 #include <setjmp.h>   /* For jmp_buf */
   3957 /* --- !!! --- EXTERNAL HEADERS end --- !!! --- */
   3958 
   3959 static Bool    d3rd_jmpbuf_valid  = False;
   3960 static HChar*  d3rd_jmpbuf_reason = NULL;
   3961 static jmp_buf d3rd_jmpbuf;
   3962 
   3963 static __attribute__((noreturn)) void barf ( HChar* reason ) {
   3964    vg_assert(d3rd_jmpbuf_valid);
   3965    d3rd_jmpbuf_reason = reason;
   3966    __builtin_longjmp(&d3rd_jmpbuf, 1);
   3967    /*NOTREACHED*/
   3968    vg_assert(0);
   3969 }
   3970 
   3971 
   3972 void
   3973 ML_(new_dwarf3_reader) (
   3974    struct _DebugInfo* di,
   3975    UChar* debug_info_img,   SizeT debug_info_sz,
   3976    UChar* debug_abbv_img,   SizeT debug_abbv_sz,
   3977    UChar* debug_line_img,   SizeT debug_line_sz,
   3978    UChar* debug_str_img,    SizeT debug_str_sz,
   3979    UChar* debug_ranges_img, SizeT debug_ranges_sz,
   3980    UChar* debug_loc_img,    SizeT debug_loc_sz
   3981 )
   3982 {
   3983    volatile Int  jumped;
   3984    volatile Bool td3 = di->trace_symtab;
   3985 
   3986    /* Run the _wrk function to read the dwarf3.  If it succeeds, it
   3987       just returns normally.  If there is any failure, it longjmp's
   3988       back here, having first set d3rd_jmpbuf_reason to something
   3989       useful. */
   3990    vg_assert(d3rd_jmpbuf_valid  == False);
   3991    vg_assert(d3rd_jmpbuf_reason == NULL);
   3992 
   3993    d3rd_jmpbuf_valid = True;
   3994    jumped = __builtin_setjmp(&d3rd_jmpbuf);
   3995    if (jumped == 0) {
   3996       /* try this ... */
   3997       new_dwarf3_reader_wrk( di, barf,
   3998                              debug_info_img,   debug_info_sz,
   3999                              debug_abbv_img,   debug_abbv_sz,
   4000                              debug_line_img,   debug_line_sz,
   4001                              debug_str_img,    debug_str_sz,
   4002                              debug_ranges_img, debug_ranges_sz,
   4003                              debug_loc_img,    debug_loc_sz );
   4004       d3rd_jmpbuf_valid = False;
   4005       TRACE_D3("\n------ .debug_info reading was successful ------\n");
   4006    } else {
   4007       /* It longjmp'd. */
   4008       d3rd_jmpbuf_valid = False;
   4009       /* Can't longjump without giving some sort of reason. */
   4010       vg_assert(d3rd_jmpbuf_reason != NULL);
   4011 
   4012       TRACE_D3("\n------ .debug_info reading failed ------\n");
   4013 
   4014       ML_(symerr)(di, True, d3rd_jmpbuf_reason);
   4015    }
   4016 
   4017    d3rd_jmpbuf_valid  = False;
   4018    d3rd_jmpbuf_reason = NULL;
   4019 }
   4020 
   4021 
   4022 
   4023 /* --- Unused code fragments which might be useful one day. --- */
   4024 
   4025 #if 0
   4026    /* Read the arange tables */
   4027    TRACE_SYMTAB("\n");
   4028    TRACE_SYMTAB("\n------ The contents of .debug_arange ------\n");
   4029    init_Cursor( &aranges, debug_aranges_img,
   4030                 debug_aranges_sz, 0, barf,
   4031                 "Overrun whilst reading .debug_aranges section" );
   4032    while (True) {
   4033       ULong  len, d_i_offset;
   4034       Bool   is64;
   4035       UShort version;
   4036       UChar  asize, segsize;
   4037 
   4038       if (is_at_end_Cursor( &aranges ))
   4039          break;
   4040       /* Read one arange thingy */
   4041       /* initial_length field */
   4042       len = get_Initial_Length( &is64, &aranges,
   4043                "in .debug_aranges: invalid initial-length field" );
   4044       version    = get_UShort( &aranges );
   4045       d_i_offset = get_Dwarfish_UWord( &aranges, is64 );
   4046       asize      = get_UChar( &aranges );
   4047       segsize    = get_UChar( &aranges );
   4048       TRACE_D3("  Length:                   %llu\n", len);
   4049       TRACE_D3("  Version:                  %d\n", (Int)version);
   4050       TRACE_D3("  Offset into .debug_info:  %llx\n", d_i_offset);
   4051       TRACE_D3("  Pointer Size:             %d\n", (Int)asize);
   4052       TRACE_D3("  Segment Size:             %d\n", (Int)segsize);
   4053       TRACE_D3("\n");
   4054       TRACE_D3("    Address            Length\n");
   4055 
   4056       while ((get_position_of_Cursor( &aranges ) % (2 * asize)) > 0) {
   4057          (void)get_UChar( & aranges );
   4058       }
   4059       while (True) {
   4060          ULong address = get_Dwarfish_UWord( &aranges, asize==8 );
   4061          ULong length = get_Dwarfish_UWord( &aranges, asize==8 );
   4062          TRACE_D3("    0x%016llx 0x%llx\n", address, length);
   4063          if (address == 0 && length == 0) break;
   4064       }
   4065    }
   4066    TRACE_SYMTAB("\n");
   4067 #endif
   4068 
   4069 #endif // defined(VGO_linux) || defined(VGO_darwin)
   4070 
   4071 /*--------------------------------------------------------------------*/
   4072 /*--- end                                                          ---*/
   4073 /*--------------------------------------------------------------------*/
   4074