Home | History | Annotate | Download | only in m_debuginfo
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- Read DWARF3/4 ".debug_info" sections (DIE trees).            ---*/
      4 /*---                                                 readdwarf3.c ---*/
      5 /*--------------------------------------------------------------------*/
      6 
      7 /*
      8    This file is part of Valgrind, a dynamic binary instrumentation
      9    framework.
     10 
     11    Copyright (C) 2008-2011 OpenWorks LLP
     12       info (at) open-works.co.uk
     13 
     14    This program is free software; you can redistribute it and/or
     15    modify it under the terms of the GNU General Public License as
     16    published by the Free Software Foundation; either version 2 of the
     17    License, or (at your option) any later version.
     18 
     19    This program is distributed in the hope that it will be useful, but
     20    WITHOUT ANY WARRANTY; without even the implied warranty of
     21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     22    General Public License for more details.
     23 
     24    You should have received a copy of the GNU General Public License
     25    along with this program; if not, write to the Free Software
     26    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     27    02111-1307, USA.
     28 
     29    The GNU General Public License is contained in the file COPYING.
     30 
     31    Neither the names of the U.S. Department of Energy nor the
     32    University of California nor the names of its contributors may be
     33    used to endorse or promote products derived from this software
     34    without prior written permission.
     35 */
     36 
     37 #if defined(VGO_linux) || defined(VGO_darwin)
     38 
     39 /* REFERENCE (without which this code will not make much sense):
     40 
     41    DWARF Debugging Information Format, Version 3,
     42    dated 20 December 2005 (the "D3 spec").
     43 
     44    Available at http://www.dwarfstd.org/Dwarf3.pdf.  There's also a
     45    .doc (MS Word) version, but for some reason the section numbers
     46    between the Word and PDF versions differ by 1 in the first digit.
     47    All section references in this code are to the PDF version.
     48 
     49    CURRENT HACKS:
     50 
     51    DW_TAG_{const,volatile}_type no DW_AT_type is allowed; it is
     52       assumed to mean "const void" or "volatile void" respectively.
     53       GDB appears to interpret them like this, anyway.
     54 
     55    In many cases it is important to know the svma of a CU (the "base
     56    address of the CU", as the D3 spec calls it).  There are some
     57    situations in which the spec implies this value is unknown, but the
     58    Dwarf3 produced by gcc-4.1 seems to assume is not unknown but
     59    merely zero when not explicitly stated.  So we too have to make
     60    that assumption.
     61 
     62    POTENTIAL BUG?  Spotted 6 Sept 08.  Why doesn't
     63    unitary_range_list() bias the resulting range list in the same way
     64    that its more general cousin, get_range_list(), does?  I don't
     65    know.
     66 
     67    TODO, 2008 Feb 17:
     68 
     69    get rid of cu_svma_known and document the assumed-zero svma hack.
     70 
     71    ML_(sizeOfType): differentiate between zero sized types and types
     72    for which the size is unknown.  Is this important?  I don't know.
     73 
     74    DW_AT_array_types: deal with explicit sizes (currently we compute
     75    the size from the bounds and the element size, although that's
     76    fragile, if the bounds incompletely specified, or completely
     77    absent)
     78 
     79    Document reason for difference (by 1) of stack preening depth in
     80    parse_var_DIE vs parse_type_DIE.
     81 
     82    Don't hand to ML_(addVars), vars whose locations are entirely in
     83    registers (DW_OP_reg*).  This is merely a space-saving
     84    optimisation, as ML_(evaluate_Dwarf3_Expr) should handle these
     85    expressions correctly, by failing to evaluate them and hence
     86    effectively ignoring the variable with which they are associated.
     87 
     88    Deal with DW_AT_array_types which have element size != stride
     89 
     90    In some cases, the info for a variable is split between two
     91    different DIEs (generally a declarer and a definer).  We punt on
     92    these.  Could do better here.
     93 
     94    The 'data_bias' argument passed to the expression evaluator
     95    (ML_(evaluate_Dwarf3_Expr)) should really be changed to a
     96    MaybeUWord, to make it clear when we do vs don't know what it is
     97    for the evaluation of an expression.  At the moment zero is passed
     98    for this parameter in the don't know case.  That's a bit fragile
     99    and obscure; using a MaybeUWord would be clearer.
    100 
    101    POTENTIAL PERFORMANCE IMPROVEMENTS:
    102 
    103    Currently, duplicate removal and all other queries for the type
    104    entities array is done using cuOffset-based pointing, which
    105    involves a binary search (VG_(lookupXA)) for each access.  This is
    106    wildly inefficient, although simple.  It would be better to
    107    translate all the cuOffset-based references (iow, all the "R" and
    108    "Rs" fields in the TyEnts in 'tyents') to direct index numbers in
    109    'tyents' right at the start of dedup_types(), and use direct
    110    indexing (VG_(indexXA)) wherever possible after that.
    111 
    112    cmp__XArrays_of_AddrRange is also a performance bottleneck.  Move
    113    VG_(indexXA) into pub_tool_xarray.h so it can be inlined at all use
    114    points, and possibly also make an _UNCHECKED version which skips
    115    the range checks in performance-critical situations such as this.
    116 
    117    Handle interaction between read_DIE and parse_{var,type}_DIE
    118    better.  Currently read_DIE reads the entire DIE just to find where
    119    the end is (and for debug printing), so that it can later reliably
    120    move the cursor to the end regardless of what parse_{var,type}_DIE
    121    do.  This means many DIEs (most, even?) are read twice.  It would
    122    be smarter to make parse_{var,type}_DIE return a Bool indicating
    123    whether or not they advanced the DIE cursor, and only if they
    124    didn't should read_DIE itself read through the DIE.
    125 
    126    ML_(addVar) and add_var_to_arange: quite a lot of DiAddrRanges have
    127    zero variables in their .vars XArray.  Rather than have an XArray
    128    with zero elements (which uses 2 malloc'd blocks), allow the .vars
    129    pointer to be NULL in this case.
    130 
    131    More generally, reduce the amount of memory allocated and freed
    132    while reading Dwarf3 type/variable information.  Even modest (20MB)
    133    objects cause this module to allocate and free hundreds of
    134    thousands of small blocks, and ML_(arena_malloc) and its various
    135    groupies always show up at the top of performance profiles. */
    136 
    137 #include "pub_core_basics.h"
    138 #include "pub_core_debuginfo.h"
    139 #include "pub_core_libcbase.h"
    140 #include "pub_core_libcassert.h"
    141 #include "pub_core_libcprint.h"
    142 #include "pub_core_libcsetjmp.h"   // setjmp facilities
    143 #include "pub_core_options.h"
    144 #include "pub_core_tooliface.h"    /* VG_(needs) */
    145 #include "pub_core_xarray.h"
    146 #include "pub_core_wordfm.h"
    147 #include "priv_misc.h"             /* dinfo_zalloc/free */
    148 #include "priv_tytypes.h"
    149 #include "priv_d3basics.h"
    150 #include "priv_storage.h"
    151 #include "priv_readdwarf3.h"       /* self */
    152 
    153 
    154 /*------------------------------------------------------------*/
    155 /*---                                                      ---*/
    156 /*--- Basic machinery for parsing DIEs.                    ---*/
    157 /*---                                                      ---*/
    158 /*------------------------------------------------------------*/
    159 
    160 #define TRACE_D3(format, args...) \
    161    if (td3) { VG_(printf)(format, ## args); }
    162 
    163 #define D3_INVALID_CUOFF  ((UWord)(-1UL))
    164 #define D3_FAKEVOID_CUOFF ((UWord)(-2UL))
    165 
    166 typedef
    167    struct {
    168       UChar* region_start_img;
    169       UWord  region_szB;
    170       UWord  region_next;
    171       void (*barf)( HChar* ) __attribute__((noreturn));
    172       HChar* barfstr;
    173    }
    174    Cursor;
    175 
    176 static inline Bool is_sane_Cursor ( Cursor* c ) {
    177    if (!c)                return False;
    178    if (!c->barf)          return False;
    179    if (!c->barfstr)       return False;
    180    return True;
    181 }
    182 
    183 static void init_Cursor ( Cursor* c,
    184                           UChar*  region_start_img,
    185                           UWord   region_szB,
    186                           UWord   region_next,
    187                           __attribute__((noreturn)) void (*barf)( HChar* ),
    188                           HChar*  barfstr )
    189 {
    190    vg_assert(c);
    191    VG_(memset)(c, 0, sizeof(*c));
    192    c->region_start_img = region_start_img;
    193    c->region_szB       = region_szB;
    194    c->region_next      = region_next;
    195    c->barf             = barf;
    196    c->barfstr          = barfstr;
    197    vg_assert(is_sane_Cursor(c));
    198 }
    199 
    200 static Bool is_at_end_Cursor ( Cursor* c ) {
    201    vg_assert(is_sane_Cursor(c));
    202    return c->region_next >= c->region_szB;
    203 }
    204 
    205 static inline UWord get_position_of_Cursor ( Cursor* c ) {
    206    vg_assert(is_sane_Cursor(c));
    207    return c->region_next;
    208 }
    209 static inline void set_position_of_Cursor ( Cursor* c, UWord pos ) {
    210    c->region_next = pos;
    211    vg_assert(is_sane_Cursor(c));
    212 }
    213 
    214 static /*signed*/Word get_remaining_length_Cursor ( Cursor* c ) {
    215    vg_assert(is_sane_Cursor(c));
    216    return c->region_szB - c->region_next;
    217 }
    218 
    219 static UChar* get_address_of_Cursor ( Cursor* c ) {
    220    vg_assert(is_sane_Cursor(c));
    221    return &c->region_start_img[ c->region_next ];
    222 }
    223 
    224 /* FIXME: document assumptions on endianness for
    225    get_UShort/UInt/ULong. */
    226 static inline UChar get_UChar ( Cursor* c ) {
    227    UChar r;
    228    /* vg_assert(is_sane_Cursor(c)); */
    229    if (c->region_next + sizeof(UChar) > c->region_szB) {
    230       c->barf(c->barfstr);
    231       /*NOTREACHED*/
    232       vg_assert(0);
    233    }
    234    r = * (UChar*) &c->region_start_img[ c->region_next ];
    235    c->region_next += sizeof(UChar);
    236    return r;
    237 }
    238 static UShort get_UShort ( Cursor* c ) {
    239    UShort r;
    240    vg_assert(is_sane_Cursor(c));
    241    if (c->region_next + sizeof(UShort) > c->region_szB) {
    242       c->barf(c->barfstr);
    243       /*NOTREACHED*/
    244       vg_assert(0);
    245    }
    246    r = ML_(read_UShort)(&c->region_start_img[ c->region_next ]);
    247    c->region_next += sizeof(UShort);
    248    return r;
    249 }
    250 static UInt get_UInt ( Cursor* c ) {
    251    UInt r;
    252    vg_assert(is_sane_Cursor(c));
    253    if (c->region_next + sizeof(UInt) > c->region_szB) {
    254       c->barf(c->barfstr);
    255       /*NOTREACHED*/
    256       vg_assert(0);
    257    }
    258    r = ML_(read_UInt)(&c->region_start_img[ c->region_next ]);
    259    c->region_next += sizeof(UInt);
    260    return r;
    261 }
    262 static ULong get_ULong ( Cursor* c ) {
    263    ULong r;
    264    vg_assert(is_sane_Cursor(c));
    265    if (c->region_next + sizeof(ULong) > c->region_szB) {
    266       c->barf(c->barfstr);
    267       /*NOTREACHED*/
    268       vg_assert(0);
    269    }
    270    r = ML_(read_ULong)(&c->region_start_img[ c->region_next ]);
    271    c->region_next += sizeof(ULong);
    272    return r;
    273 }
    274 static inline ULong get_ULEB128 ( Cursor* c ) {
    275    ULong result;
    276    Int   shift;
    277    UChar byte;
    278    /* unroll first iteration */
    279    byte = get_UChar( c );
    280    result = (ULong)(byte & 0x7f);
    281    if (LIKELY(!(byte & 0x80))) return result;
    282    shift = 7;
    283    /* end unroll first iteration */
    284    do {
    285       byte = get_UChar( c );
    286       result |= ((ULong)(byte & 0x7f)) << shift;
    287       shift += 7;
    288    } while (byte & 0x80);
    289    return result;
    290 }
    291 static Long get_SLEB128 ( Cursor* c ) {
    292    ULong  result = 0;
    293    Int    shift = 0;
    294    UChar  byte;
    295    do {
    296       byte = get_UChar(c);
    297       result |= ((ULong)(byte & 0x7f)) << shift;
    298       shift += 7;
    299    } while (byte & 0x80);
    300    if (shift < 64 && (byte & 0x40))
    301       result |= -(1ULL << shift);
    302    return result;
    303 }
    304 
    305 /* Assume 'c' points to the start of a string.  Return the absolute
    306    address of whatever it points at, and advance it past the
    307    terminating zero.  This makes it safe for the caller to then copy
    308    the string with ML_(addStr), since (w.r.t. image overruns) the
    309    process of advancing past the terminating zero will already have
    310    "vetted" the string. */
    311 static UChar* get_AsciiZ ( Cursor* c ) {
    312    UChar  uc;
    313    UChar* res = get_address_of_Cursor(c);
    314    do { uc = get_UChar(c); } while (uc != 0);
    315    return res;
    316 }
    317 
    318 static ULong peek_ULEB128 ( Cursor* c ) {
    319    Word here = c->region_next;
    320    ULong r = get_ULEB128( c );
    321    c->region_next = here;
    322    return r;
    323 }
    324 static UChar peek_UChar ( Cursor* c ) {
    325    Word here = c->region_next;
    326    UChar r = get_UChar( c );
    327    c->region_next = here;
    328    return r;
    329 }
    330 
    331 static ULong get_Dwarfish_UWord ( Cursor* c, Bool is_dw64 ) {
    332    return is_dw64 ? get_ULong(c) : (ULong) get_UInt(c);
    333 }
    334 
    335 static UWord get_UWord ( Cursor* c ) {
    336    vg_assert(sizeof(UWord) == sizeof(void*));
    337    if (sizeof(UWord) == 4) return get_UInt(c);
    338    if (sizeof(UWord) == 8) return get_ULong(c);
    339    vg_assert(0);
    340 }
    341 
    342 /* Read a DWARF3 'Initial Length' field */
    343 static ULong get_Initial_Length ( /*OUT*/Bool* is64,
    344                                   Cursor* c,
    345                                   HChar* barfMsg )
    346 {
    347    ULong w64;
    348    UInt  w32;
    349    *is64 = False;
    350    w32 = get_UInt( c );
    351    if (w32 >= 0xFFFFFFF0 && w32 < 0xFFFFFFFF) {
    352       c->barf( barfMsg );
    353    }
    354    else if (w32 == 0xFFFFFFFF) {
    355       *is64 = True;
    356       w64   = get_ULong( c );
    357    } else {
    358       *is64 = False;
    359       w64 = (ULong)w32;
    360    }
    361    return w64;
    362 }
    363 
    364 
    365 /*------------------------------------------------------------*/
    366 /*---                                                      ---*/
    367 /*--- "CUConst" structure                                  ---*/
    368 /*---                                                      ---*/
    369 /*------------------------------------------------------------*/
    370 
    371 #define N_ABBV_CACHE 32
    372 
    373 /* Holds information that is constant through the parsing of a
    374    Compilation Unit.  This is basically plumbed through to
    375    everywhere. */
    376 typedef
    377    struct {
    378       /* Call here if anything goes wrong */
    379       void (*barf)( HChar* ) __attribute__((noreturn));
    380       /* Is this 64-bit DWARF ? */
    381       Bool   is_dw64;
    382       /* Which DWARF version ?  (2, 3 or 4) */
    383       UShort version;
    384       /* Length of this Compilation Unit, as stated in the
    385          .unit_length :: InitialLength field of the CU Header.
    386          However, this size (as specified by the D3 spec) does not
    387          include the size of the .unit_length field itself, which is
    388          either 4 or 12 bytes (32-bit or 64-bit Dwarf3).  That value
    389          can be obtained through the expression ".is_dw64 ? 12 : 4". */
    390       ULong  unit_length;
    391       /* Offset of start of this unit in .debug_info */
    392       UWord  cu_start_offset;
    393       /* SVMA for this CU.  In the D3 spec, is known as the "base
    394          address of the compilation unit (last para sec 3.1.1).
    395          Needed for (amongst things) interpretation of location-list
    396          values. */
    397       Addr   cu_svma;
    398       Bool   cu_svma_known;
    399       /* The debug_abbreviations table to be used for this Unit */
    400       UChar* debug_abbv;
    401       /* Upper bound on size thereof (an overestimate, in general) */
    402       UWord  debug_abbv_maxszB;
    403       /* Where is .debug_str ? */
    404       UChar* debug_str_img;
    405       UWord  debug_str_sz;
    406       /* Where is .debug_ranges ? */
    407       UChar* debug_ranges_img;
    408       UWord  debug_ranges_sz;
    409       /* Where is .debug_loc ? */
    410       UChar* debug_loc_img;
    411       UWord  debug_loc_sz;
    412       /* Where is .debug_line? */
    413       UChar* debug_line_img;
    414       UWord  debug_line_sz;
    415       /* Where is .debug_info? */
    416       UChar* debug_info_img;
    417       UWord  debug_info_sz;
    418       /* --- Needed so we can add stuff to the string table. --- */
    419       struct _DebugInfo* di;
    420       /* --- a cache for set_abbv_Cursor --- */
    421       /* abbv_code == (ULong)-1 for an unused entry. */
    422       struct { ULong abbv_code; UWord posn; } saC_cache[N_ABBV_CACHE];
    423       UWord saC_cache_queries;
    424       UWord saC_cache_misses;
    425    }
    426    CUConst;
    427 
    428 
    429 /*------------------------------------------------------------*/
    430 /*---                                                      ---*/
    431 /*--- Helper functions for Guarded Expressions             ---*/
    432 /*---                                                      ---*/
    433 /*------------------------------------------------------------*/
    434 
    435 /* Parse the location list starting at img-offset 'debug_loc_offset'
    436    in .debug_loc.  Results are biased with 'svma_of_referencing_CU'
    437    and so I believe are correct SVMAs for the object as a whole.  This
    438    function allocates the UChar*, and the caller must deallocate it.
    439    The resulting block is in so-called Guarded-Expression format.
    440 
    441    Guarded-Expression format is similar but not identical to the DWARF3
    442    location-list format.  The format of each returned block is:
    443 
    444       UChar biasMe;
    445       UChar isEnd;
    446       followed by zero or more of
    447 
    448       (Addr aMin;  Addr aMax;  UShort nbytes;  ..bytes..;  UChar isEnd)
    449 
    450    '..bytes..' is an standard DWARF3 location expression which is
    451    valid when aMin <= pc <= aMax (possibly after suitable biasing).
    452 
    453    The number of bytes in '..bytes..' is nbytes.
    454 
    455    The end of the sequence is marked by an isEnd == 1 value.  All
    456    previous isEnd values must be zero.
    457 
    458    biasMe is 1 if the aMin/aMax fields need this DebugInfo's
    459    text_bias added before use, and 0 if the GX is this is not
    460    necessary (is ready to go).
    461 
    462    Hence the block can be quickly parsed and is self-describing.  Note
    463    that aMax is 1 less than the corresponding value in a DWARF3
    464    location list.  Zero length ranges, with aMax == aMin-1, are not
    465    allowed.
    466 */
    467 /* 2008-sept-12: moved ML_(pp_GX) from here to d3basics.c, where
    468    it more logically belongs. */
    469 
    470 
    471 /* Apply a text bias to a GX. */
    472 static void bias_GX ( /*MOD*/GExpr* gx, struct _DebugInfo* di )
    473 {
    474    UShort nbytes;
    475    UChar* p = &gx->payload[0];
    476    UChar* pA;
    477    UChar  uc;
    478    uc = *p++; /*biasMe*/
    479    if (uc == 0)
    480       return;
    481    vg_assert(uc == 1);
    482    p[-1] = 0; /* mark it as done */
    483    while (True) {
    484       uc = *p++;
    485       if (uc == 1)
    486          break; /*isEnd*/
    487       vg_assert(uc == 0);
    488       /* t-bias aMin */
    489       pA = (UChar*)p;
    490       ML_(write_Addr)(pA, ML_(read_Addr)(pA) + di->text_debug_bias);
    491       p += sizeof(Addr);
    492       /* t-bias aMax */
    493       pA = (UChar*)p;
    494       ML_(write_Addr)(pA, ML_(read_Addr)(pA) + di->text_debug_bias);
    495       p += sizeof(Addr);
    496       /* nbytes, and actual expression */
    497       nbytes = ML_(read_UShort)(p); p += sizeof(UShort);
    498       p += nbytes;
    499    }
    500 }
    501 
    502 __attribute__((noinline))
    503 static GExpr* make_singleton_GX ( UChar* block, UWord nbytes )
    504 {
    505    SizeT  bytesReqd;
    506    GExpr* gx;
    507    UChar *p, *pstart;
    508 
    509    vg_assert(sizeof(UWord) == sizeof(Addr));
    510    vg_assert(nbytes <= 0xFFFF); /* else we overflow the nbytes field */
    511    bytesReqd
    512       =   sizeof(UChar)  /*biasMe*/    + sizeof(UChar) /*!isEnd*/
    513         + sizeof(UWord)  /*aMin*/      + sizeof(UWord) /*aMax*/
    514         + sizeof(UShort) /*nbytes*/    + nbytes
    515         + sizeof(UChar); /*isEnd*/
    516 
    517    gx = ML_(dinfo_zalloc)( "di.readdwarf3.msGX.1",
    518                            sizeof(GExpr) + bytesReqd );
    519    vg_assert(gx);
    520 
    521    p = pstart = &gx->payload[0];
    522 
    523    p = ML_(write_UChar)(p, 0);        /*biasMe*/
    524    p = ML_(write_UChar)(p, 0);        /*!isEnd*/
    525    p = ML_(write_Addr)(p, 0);         /*aMin*/
    526    p = ML_(write_Addr)(p, ~0);        /*aMax*/
    527    p = ML_(write_UShort)(p, nbytes);  /*nbytes*/
    528    VG_(memcpy)(p, block, nbytes); p += nbytes;
    529    p = ML_(write_UChar)(p, 1);        /*isEnd*/
    530 
    531    vg_assert( (SizeT)(p - pstart) == bytesReqd);
    532    vg_assert( &gx->payload[bytesReqd]
    533               == ((UChar*)gx) + sizeof(GExpr) + bytesReqd );
    534 
    535    return gx;
    536 }
    537 
    538 __attribute__((noinline))
    539 static GExpr* make_general_GX ( CUConst* cc,
    540                                 Bool     td3,
    541                                 UWord    debug_loc_offset,
    542                                 Addr     svma_of_referencing_CU )
    543 {
    544    Addr      base;
    545    Cursor    loc;
    546    XArray*   xa; /* XArray of UChar */
    547    GExpr*    gx;
    548    Word      nbytes;
    549 
    550    vg_assert(sizeof(UWord) == sizeof(Addr));
    551    if (cc->debug_loc_sz == 0)
    552       cc->barf("make_general_GX: .debug_loc is empty/missing");
    553 
    554    init_Cursor( &loc, cc->debug_loc_img,
    555                 cc->debug_loc_sz, 0, cc->barf,
    556                 "Overrun whilst reading .debug_loc section(2)" );
    557    set_position_of_Cursor( &loc, debug_loc_offset );
    558 
    559    TRACE_D3("make_general_GX (.debug_loc_offset = %lu, img = %p) {\n",
    560             debug_loc_offset, get_address_of_Cursor( &loc ) );
    561 
    562    /* Who frees this xa?  It is freed before this fn exits. */
    563    xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.mgGX.1",
    564                     ML_(dinfo_free),
    565                     sizeof(UChar) );
    566 
    567    { UChar c = 1; /*biasMe*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); }
    568 
    569    base = 0;
    570    while (True) {
    571       Bool  acquire;
    572       UWord len;
    573       /* Read a (host-)word pair.  This is something of a hack since
    574          the word size to read is really dictated by the ELF file;
    575          however, we assume we're reading a file with the same
    576          word-sizeness as the host.  Reasonably enough. */
    577       UWord w1 = get_UWord( &loc );
    578       UWord w2 = get_UWord( &loc );
    579 
    580       TRACE_D3("   %08lx %08lx\n", w1, w2);
    581       if (w1 == 0 && w2 == 0)
    582          break; /* end of list */
    583 
    584       if (w1 == -1UL) {
    585          /* new value for 'base' */
    586          base = w2;
    587          continue;
    588       }
    589 
    590       /* else a location expression follows */
    591       /* else enumerate [w1+base, w2+base) */
    592       /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
    593          (sec 2.17.2) */
    594       if (w1 > w2) {
    595          TRACE_D3("negative range is for .debug_loc expr at "
    596                   "file offset %lu\n",
    597                   debug_loc_offset);
    598          cc->barf( "negative range in .debug_loc section" );
    599       }
    600 
    601       /* ignore zero length ranges */
    602       acquire = w1 < w2;
    603       len     = (UWord)get_UShort( &loc );
    604 
    605       if (acquire) {
    606          UWord  w;
    607          UShort s;
    608          UChar  c;
    609          c = 0; /* !isEnd*/
    610          VG_(addBytesToXA)( xa, &c, sizeof(c) );
    611          w = w1    + base + svma_of_referencing_CU;
    612          VG_(addBytesToXA)( xa, &w, sizeof(w) );
    613          w = w2 -1 + base + svma_of_referencing_CU;
    614          VG_(addBytesToXA)( xa, &w, sizeof(w) );
    615          s = (UShort)len;
    616          VG_(addBytesToXA)( xa, &s, sizeof(s) );
    617       }
    618 
    619       while (len > 0) {
    620          UChar byte = get_UChar( &loc );
    621          TRACE_D3("%02x", (UInt)byte);
    622          if (acquire)
    623             VG_(addBytesToXA)( xa, &byte, 1 );
    624          len--;
    625       }
    626       TRACE_D3("\n");
    627    }
    628 
    629    { UChar c = 1; /*isEnd*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); }
    630 
    631    nbytes = VG_(sizeXA)( xa );
    632    vg_assert(nbytes >= 1);
    633 
    634    gx = ML_(dinfo_zalloc)( "di.readdwarf3.mgGX.2", sizeof(GExpr) + nbytes );
    635    vg_assert(gx);
    636    VG_(memcpy)( &gx->payload[0], (UChar*)VG_(indexXA)(xa,0), nbytes );
    637    vg_assert( &gx->payload[nbytes]
    638               == ((UChar*)gx) + sizeof(GExpr) + nbytes );
    639 
    640    VG_(deleteXA)( xa );
    641 
    642    TRACE_D3("}\n");
    643 
    644    return gx;
    645 }
    646 
    647 
    648 /*------------------------------------------------------------*/
    649 /*---                                                      ---*/
    650 /*--- Helper functions for range lists and CU headers      ---*/
    651 /*---                                                      ---*/
    652 /*------------------------------------------------------------*/
    653 
    654 /* Denotes an address range.  Both aMin and aMax are included in the
    655    range; hence a complete range is (0, ~0) and an empty range is any
    656    (X, X-1) for X > 0.*/
    657 typedef
    658    struct { Addr aMin; Addr aMax; }
    659    AddrRange;
    660 
    661 
    662 /* Generate an arbitrary structural total ordering on
    663    XArray* of AddrRange. */
    664 static Word cmp__XArrays_of_AddrRange ( XArray* rngs1, XArray* rngs2 )
    665 {
    666    Word n1, n2, i;
    667    tl_assert(rngs1 && rngs2);
    668    n1 = VG_(sizeXA)( rngs1 );
    669    n2 = VG_(sizeXA)( rngs2 );
    670    if (n1 < n2) return -1;
    671    if (n1 > n2) return 1;
    672    for (i = 0; i < n1; i++) {
    673       AddrRange* rng1 = (AddrRange*)VG_(indexXA)( rngs1, i );
    674       AddrRange* rng2 = (AddrRange*)VG_(indexXA)( rngs2, i );
    675       if (rng1->aMin < rng2->aMin) return -1;
    676       if (rng1->aMin > rng2->aMin) return 1;
    677       if (rng1->aMax < rng2->aMax) return -1;
    678       if (rng1->aMax > rng2->aMax) return 1;
    679    }
    680    return 0;
    681 }
    682 
    683 
    684 __attribute__((noinline))
    685 static XArray* /* of AddrRange */ empty_range_list ( void )
    686 {
    687    XArray* xa; /* XArray of AddrRange */
    688    /* Who frees this xa?  varstack_preen() does. */
    689    xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.erl.1",
    690                     ML_(dinfo_free),
    691                     sizeof(AddrRange) );
    692    return xa;
    693 }
    694 
    695 
    696 __attribute__((noinline))
    697 static XArray* unitary_range_list ( Addr aMin, Addr aMax )
    698 {
    699    XArray*   xa;
    700    AddrRange pair;
    701    vg_assert(aMin <= aMax);
    702    /* Who frees this xa?  varstack_preen() does. */
    703    xa = VG_(newXA)( ML_(dinfo_zalloc),  "di.readdwarf3.url.1",
    704                     ML_(dinfo_free),
    705                     sizeof(AddrRange) );
    706    pair.aMin = aMin;
    707    pair.aMax = aMax;
    708    VG_(addToXA)( xa, &pair );
    709    return xa;
    710 }
    711 
    712 
    713 /* Enumerate the address ranges starting at img-offset
    714    'debug_ranges_offset' in .debug_ranges.  Results are biased with
    715    'svma_of_referencing_CU' and so I believe are correct SVMAs for the
    716    object as a whole.  This function allocates the XArray, and the
    717    caller must deallocate it. */
    718 __attribute__((noinline))
    719 static XArray* /* of AddrRange */
    720        get_range_list ( CUConst* cc,
    721                         Bool     td3,
    722                         UWord    debug_ranges_offset,
    723                         Addr     svma_of_referencing_CU )
    724 {
    725    Addr      base;
    726    Cursor    ranges;
    727    XArray*   xa; /* XArray of AddrRange */
    728    AddrRange pair;
    729 
    730    if (cc->debug_ranges_sz == 0)
    731       cc->barf("get_range_list: .debug_ranges is empty/missing");
    732 
    733    init_Cursor( &ranges, cc->debug_ranges_img,
    734                 cc->debug_ranges_sz, 0, cc->barf,
    735                 "Overrun whilst reading .debug_ranges section(2)" );
    736    set_position_of_Cursor( &ranges, debug_ranges_offset );
    737 
    738    /* Who frees this xa?  varstack_preen() does. */
    739    xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.grl.1", ML_(dinfo_free),
    740                     sizeof(AddrRange) );
    741    base = 0;
    742    while (True) {
    743       /* Read a (host-)word pair.  This is something of a hack since
    744          the word size to read is really dictated by the ELF file;
    745          however, we assume we're reading a file with the same
    746          word-sizeness as the host.  Reasonably enough. */
    747       UWord w1 = get_UWord( &ranges );
    748       UWord w2 = get_UWord( &ranges );
    749 
    750       if (w1 == 0 && w2 == 0)
    751          break; /* end of list. */
    752 
    753       if (w1 == -1UL) {
    754          /* new value for 'base' */
    755          base = w2;
    756          continue;
    757       }
    758 
    759       /* else enumerate [w1+base, w2+base) */
    760       /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
    761          (sec 2.17.2) */
    762       if (w1 > w2)
    763          cc->barf( "negative range in .debug_ranges section" );
    764       if (w1 < w2) {
    765          pair.aMin = w1     + base + svma_of_referencing_CU;
    766          pair.aMax = w2 - 1 + base + svma_of_referencing_CU;
    767          vg_assert(pair.aMin <= pair.aMax);
    768          VG_(addToXA)( xa, &pair );
    769       }
    770    }
    771    return xa;
    772 }
    773 
    774 
    775 /* Parse the Compilation Unit header indicated at 'c' and
    776    initialise 'cc' accordingly. */
    777 static __attribute__((noinline))
    778 void parse_CU_Header ( /*OUT*/CUConst* cc,
    779                        Bool td3,
    780                        Cursor* c,
    781                        UChar* debug_abbv_img, UWord debug_abbv_sz )
    782 {
    783    UChar  address_size;
    784    UWord  debug_abbrev_offset;
    785    Int    i;
    786 
    787    VG_(memset)(cc, 0, sizeof(*cc));
    788    vg_assert(c && c->barf);
    789    cc->barf = c->barf;
    790 
    791    /* initial_length field */
    792    cc->unit_length
    793       = get_Initial_Length( &cc->is_dw64, c,
    794            "parse_CU_Header: invalid initial-length field" );
    795 
    796    TRACE_D3("   Length:        %lld\n", cc->unit_length );
    797 
    798    /* version */
    799    cc->version = get_UShort( c );
    800    if (cc->version != 2 && cc->version != 3 && cc->version != 4)
    801       cc->barf( "parse_CU_Header: is neither DWARF2 nor DWARF3 nor DWARF4" );
    802    TRACE_D3("   Version:       %d\n", (Int)cc->version );
    803 
    804    /* debug_abbrev_offset */
    805    debug_abbrev_offset = get_Dwarfish_UWord( c, cc->is_dw64 );
    806    if (debug_abbrev_offset >= debug_abbv_sz)
    807       cc->barf( "parse_CU_Header: invalid debug_abbrev_offset" );
    808    TRACE_D3("   Abbrev Offset: %ld\n", debug_abbrev_offset );
    809 
    810    /* address size.  If this isn't equal to the host word size, just
    811       give up.  This makes it safe to assume elsewhere that
    812       DW_FORM_addr and DW_FORM_ref_addr can be treated as a host
    813       word. */
    814    address_size = get_UChar( c );
    815    if (address_size != sizeof(void*))
    816       cc->barf( "parse_CU_Header: invalid address_size" );
    817    TRACE_D3("   Pointer Size:  %d\n", (Int)address_size );
    818 
    819    /* Set up so that cc->debug_abbv points to the relevant table for
    820       this CU.  Set the szB so that at least we can't read off the end
    821       of the debug_abbrev section -- potentially (and quite likely)
    822       too big, if this isn't the last table in the section, but at
    823       least it's safe. */
    824    cc->debug_abbv        = debug_abbv_img + debug_abbrev_offset;
    825    cc->debug_abbv_maxszB = debug_abbv_sz  - debug_abbrev_offset;
    826    /* and empty out the set_abbv_Cursor cache */
    827    if (0) VG_(printf)("XXXXXX initialise set_abbv_Cursor cache\n");
    828    for (i = 0; i < N_ABBV_CACHE; i++) {
    829       cc->saC_cache[i].abbv_code = (ULong)-1; /* unused */
    830       cc->saC_cache[i].posn = 0;
    831    }
    832    cc->saC_cache_queries = 0;
    833    cc->saC_cache_misses = 0;
    834 }
    835 
    836 
    837 /* Set up 'c' so it is ready to parse the abbv table entry code
    838    'abbv_code' for this compilation unit.  */
    839 static __attribute__((noinline))
    840 void set_abbv_Cursor ( /*OUT*/Cursor* c, Bool td3,
    841                        CUConst* cc, ULong abbv_code )
    842 {
    843    Int   i;
    844    ULong acode;
    845 
    846    if (abbv_code == 0)
    847       cc->barf("set_abbv_Cursor: abbv_code == 0" );
    848 
    849    /* (ULong)-1 is used to represent an empty cache slot.  So we can't
    850       allow it.  In any case no valid DWARF3 should make a reference
    851       to a negative abbreviation code.  [at least, they always seem to
    852       be numbered upwards from zero as far as I have seen] */
    853    vg_assert(abbv_code != (ULong)-1);
    854 
    855    /* First search the cache. */
    856    if (0) VG_(printf)("XXXXXX search set_abbv_Cursor cache\n");
    857    cc->saC_cache_queries++;
    858    for (i = 0; i < N_ABBV_CACHE; i++) {
    859       /* No need to test the cached abbv_codes for -1 (empty), since
    860          we just asserted that abbv_code is not -1. */
    861      if (cc->saC_cache[i].abbv_code == abbv_code) {
    862         /* Found it.  Cool.  Set up the parser using the cached
    863            position, and move this cache entry 1 step closer to the
    864            front. */
    865         if (0) VG_(printf)("XXXXXX found in set_abbv_Cursor cache\n");
    866         init_Cursor( c, cc->debug_abbv,
    867                      cc->debug_abbv_maxszB, cc->saC_cache[i].posn,
    868                      cc->barf,
    869                      "Overrun whilst parsing .debug_abbrev section(1)" );
    870         if (i > 0) {
    871            ULong t_abbv_code = cc->saC_cache[i].abbv_code;
    872            UWord t_posn = cc->saC_cache[i].posn;
    873            while (i > 0) {
    874               cc->saC_cache[i] = cc->saC_cache[i-1];
    875               cc->saC_cache[0].abbv_code = t_abbv_code;
    876               cc->saC_cache[0].posn = t_posn;
    877               i--;
    878            }
    879         }
    880         return;
    881      }
    882    }
    883 
    884    /* No.  It's not in the cache.  We have to search through
    885       .debug_abbrev, of course taking care to update the cache
    886       when done. */
    887 
    888    cc->saC_cache_misses++;
    889    init_Cursor( c, cc->debug_abbv, cc->debug_abbv_maxszB, 0, cc->barf,
    890                "Overrun whilst parsing .debug_abbrev section(2)" );
    891 
    892    /* Now iterate though the table until we find the requested
    893       entry. */
    894    while (True) {
    895       //ULong atag;
    896       //UInt  has_children;
    897       acode = get_ULEB128( c );
    898       if (acode == 0) break; /* end of the table */
    899       if (acode == abbv_code) break; /* found it */
    900       /*atag         = */ get_ULEB128( c );
    901       /*has_children = */ get_UChar( c );
    902       //TRACE_D3("   %llu      %s    [%s]\n",
    903       //         acode, pp_DW_TAG(atag), pp_DW_children(has_children));
    904       while (True) {
    905          ULong at_name = get_ULEB128( c );
    906          ULong at_form = get_ULEB128( c );
    907          if (at_name == 0 && at_form == 0) break;
    908          //TRACE_D3("    %18s %s\n",
    909          //         pp_DW_AT(at_name), pp_DW_FORM(at_form));
    910       }
    911    }
    912 
    913    if (acode == 0) {
    914       /* Not found.  This is fatal. */
    915       cc->barf("set_abbv_Cursor: abbv_code not found");
    916    }
    917 
    918    /* Otherwise, 'c' is now set correctly to parse the relevant entry,
    919       starting from the abbreviation entry's tag.  So just cache
    920       the result, and return. */
    921    for (i = N_ABBV_CACHE-1; i > N_ABBV_CACHE/2; i--) {
    922       cc->saC_cache[i] = cc->saC_cache[i-1];
    923    }
    924    if (0) VG_(printf)("XXXXXX update set_abbv_Cursor cache\n");
    925    cc->saC_cache[N_ABBV_CACHE/2].abbv_code = abbv_code;
    926    cc->saC_cache[N_ABBV_CACHE/2].posn = get_position_of_Cursor(c);
    927 }
    928 
    929 
    930 /* From 'c', get the Form data into the lowest 1/2/4/8 bytes of *cts.
    931 
    932    If *cts itself contains the entire result, then *ctsSzB is set to
    933    1,2,4 or 8 accordingly and *ctsMemSzB is set to zero.
    934 
    935    Alternatively, the result can be a block of data (in the
    936    transiently mapped-in object, so-called "image" space).  If so then
    937    the lowest sizeof(void*)/8 bytes of *cts hold a pointer to said
    938    image, *ctsSzB is zero, and *ctsMemSzB is the size of the block.
    939 
    940    Unfortunately this means it is impossible to represent a zero-size
    941    image block since that would have *ctsSzB == 0 and *ctsMemSzB == 0
    942    and so is ambiguous (which case it is?)
    943 
    944    Invariant on successful return:
    945       (*ctsSzB > 0 && *ctsMemSzB == 0)
    946       || (*ctsSzB == 0 && *ctsMemSzB > 0)
    947 */
    948 static
    949 void get_Form_contents ( /*OUT*/ULong* cts,
    950                          /*OUT*/Int*   ctsSzB,
    951                          /*OUT*/UWord* ctsMemSzB,
    952                          CUConst* cc, Cursor* c,
    953                          Bool td3, DW_FORM form )
    954 {
    955    *cts       = 0;
    956    *ctsSzB    = 0;
    957    *ctsMemSzB = 0;
    958    switch (form) {
    959       case DW_FORM_data1:
    960          *cts = (ULong)(UChar)get_UChar(c);
    961          *ctsSzB = 1;
    962          TRACE_D3("%u", (UInt)*cts);
    963          break;
    964       case DW_FORM_data2:
    965          *cts = (ULong)(UShort)get_UShort(c);
    966          *ctsSzB = 2;
    967          TRACE_D3("%u", (UInt)*cts);
    968          break;
    969       case DW_FORM_data4:
    970          *cts = (ULong)(UInt)get_UInt(c);
    971          *ctsSzB = 4;
    972          TRACE_D3("%u", (UInt)*cts);
    973          break;
    974       case DW_FORM_data8:
    975          *cts = get_ULong(c);
    976          *ctsSzB = 8;
    977          TRACE_D3("%llu", *cts);
    978          break;
    979       case DW_FORM_sec_offset:
    980          *cts = (ULong)get_Dwarfish_UWord( c, cc->is_dw64 );
    981          *ctsSzB = cc->is_dw64 ? 8 : 4;
    982          TRACE_D3("%llu", *cts);
    983          break;
    984       case DW_FORM_sdata:
    985          *cts = (ULong)(Long)get_SLEB128(c);
    986          *ctsSzB = 8;
    987          TRACE_D3("%lld", (Long)*cts);
    988          break;
    989       case DW_FORM_udata:
    990          *cts = (ULong)(Long)get_ULEB128(c);
    991          *ctsSzB = 8;
    992          TRACE_D3("%llu", (Long)*cts);
    993          break;
    994       case DW_FORM_addr:
    995          /* note, this is a hack.  DW_FORM_addr is defined as getting
    996             a word the size of the target machine as defined by the
    997             address_size field in the CU Header.  However,
    998             parse_CU_Header() rejects all inputs except those for
    999             which address_size == sizeof(Word), hence we can just
   1000             treat it as a (host) Word.  */
   1001          *cts = (ULong)(UWord)get_UWord(c);
   1002          *ctsSzB = sizeof(UWord);
   1003          TRACE_D3("0x%lx", (UWord)*cts);
   1004          break;
   1005 
   1006       case DW_FORM_ref_addr:
   1007          /* We make the same word-size assumption as DW_FORM_addr. */
   1008          /* What does this really mean?  From D3 Sec 7.5.4,
   1009             description of "reference", it would appear to reference
   1010             some other DIE, by specifying the offset from the
   1011             beginning of a .debug_info section.  The D3 spec mentions
   1012             that this might be in some other shared object and
   1013             executable.  But I don't see how the name of the other
   1014             object/exe is specified.
   1015 
   1016             At least for the DW_FORM_ref_addrs created by icc11, the
   1017             references seem to be within the same object/executable.
   1018             So for the moment we merely range-check, to see that they
   1019             actually do specify a plausible offset within this
   1020             object's .debug_info, and return the value unchanged.
   1021          */
   1022          *cts = (ULong)(UWord)get_UWord(c);
   1023          *ctsSzB = sizeof(UWord);
   1024          TRACE_D3("0x%lx", (UWord)*cts);
   1025          if (0) VG_(printf)("DW_FORM_ref_addr 0x%lx\n", (UWord)*cts);
   1026          if (/* the following 2 are surely impossible, but ... */
   1027              cc->debug_info_img == NULL || cc->debug_info_sz == 0
   1028              || *cts >= (ULong)cc->debug_info_sz) {
   1029             /* Hmm.  Offset is nonsensical for this object's .debug_info
   1030                section.  Be safe and reject it. */
   1031             cc->barf("get_Form_contents: DW_FORM_ref_addr points "
   1032                      "outside .debug_info");
   1033          }
   1034          break;
   1035 
   1036       case DW_FORM_strp: {
   1037          /* this is an offset into .debug_str */
   1038          UChar* str;
   1039          UWord uw = (UWord)get_Dwarfish_UWord( c, cc->is_dw64 );
   1040          if (cc->debug_str_img == NULL || uw >= cc->debug_str_sz)
   1041             cc->barf("get_Form_contents: DW_FORM_strp "
   1042                      "points outside .debug_str");
   1043          /* FIXME: check the entire string lies inside debug_str,
   1044             not just the first byte of it. */
   1045          str = (UChar*)cc->debug_str_img + uw;
   1046          TRACE_D3("(indirect string, offset: 0x%lx): %s", uw, str);
   1047          *cts = (ULong)(UWord)str;
   1048          *ctsMemSzB = 1 + (ULong)VG_(strlen)(str);
   1049          break;
   1050       }
   1051       case DW_FORM_string: {
   1052          UChar* str = get_AsciiZ(c);
   1053          TRACE_D3("%s", str);
   1054          *cts = (ULong)(UWord)str;
   1055          /* strlen is safe because get_AsciiZ already 'vetted' the
   1056             entire string */
   1057          *ctsMemSzB = 1 + (ULong)VG_(strlen)(str);
   1058          break;
   1059       }
   1060       case DW_FORM_ref1: {
   1061          UChar  u8 = get_UChar(c);
   1062          UWord res = cc->cu_start_offset + (UWord)u8;
   1063          *cts = (ULong)res;
   1064          *ctsSzB = sizeof(UWord);
   1065          TRACE_D3("<%lx>", res);
   1066          break;
   1067       }
   1068       case DW_FORM_ref2: {
   1069          UShort  u16 = get_UShort(c);
   1070          UWord res = cc->cu_start_offset + (UWord)u16;
   1071          *cts = (ULong)res;
   1072          *ctsSzB = sizeof(UWord);
   1073          TRACE_D3("<%lx>", res);
   1074          break;
   1075       }
   1076       case DW_FORM_ref4: {
   1077          UInt  u32 = get_UInt(c);
   1078          UWord res = cc->cu_start_offset + (UWord)u32;
   1079          *cts = (ULong)res;
   1080          *ctsSzB = sizeof(UWord);
   1081          TRACE_D3("<%lx>", res);
   1082          break;
   1083       }
   1084       case DW_FORM_ref8: {
   1085          ULong  u64 = get_ULong(c);
   1086          UWord res = cc->cu_start_offset + (UWord)u64;
   1087          *cts = (ULong)res;
   1088          *ctsSzB = sizeof(UWord);
   1089          TRACE_D3("<%lx>", res);
   1090          break;
   1091       }
   1092       case DW_FORM_ref_udata: {
   1093          ULong  u64 = get_ULEB128(c);
   1094          UWord res = cc->cu_start_offset + (UWord)u64;
   1095          *cts = (ULong)res;
   1096          *ctsSzB = sizeof(UWord);
   1097          TRACE_D3("<%lx>", res);
   1098          break;
   1099       }
   1100       case DW_FORM_flag: {
   1101          UChar u8 = get_UChar(c);
   1102          TRACE_D3("%u", (UInt)u8);
   1103          *cts = (ULong)u8;
   1104          *ctsSzB = 1;
   1105          break;
   1106       }
   1107       case DW_FORM_flag_present:
   1108          TRACE_D3("1");
   1109          *cts = 1;
   1110          *ctsSzB = 1;
   1111          break;
   1112       case DW_FORM_block1: {
   1113          ULong  u64b;
   1114          ULong  u64 = (ULong)get_UChar(c);
   1115          UChar* block = get_address_of_Cursor(c);
   1116          TRACE_D3("%llu byte block: ", u64);
   1117          for (u64b = u64; u64b > 0; u64b--) {
   1118             UChar u8 = get_UChar(c);
   1119             TRACE_D3("%x ", (UInt)u8);
   1120          }
   1121          *cts = (ULong)(UWord)block;
   1122          *ctsMemSzB = (UWord)u64;
   1123          break;
   1124       }
   1125       case DW_FORM_block2: {
   1126          ULong  u64b;
   1127          ULong  u64 = (ULong)get_UShort(c);
   1128          UChar* block = get_address_of_Cursor(c);
   1129          TRACE_D3("%llu byte block: ", u64);
   1130          for (u64b = u64; u64b > 0; u64b--) {
   1131             UChar u8 = get_UChar(c);
   1132             TRACE_D3("%x ", (UInt)u8);
   1133          }
   1134          *cts = (ULong)(UWord)block;
   1135          *ctsMemSzB = (UWord)u64;
   1136          break;
   1137       }
   1138       case DW_FORM_block4: {
   1139          ULong  u64b;
   1140          ULong  u64 = (ULong)get_UInt(c);
   1141          UChar* block = get_address_of_Cursor(c);
   1142          TRACE_D3("%llu byte block: ", u64);
   1143          for (u64b = u64; u64b > 0; u64b--) {
   1144             UChar u8 = get_UChar(c);
   1145             TRACE_D3("%x ", (UInt)u8);
   1146          }
   1147          *cts = (ULong)(UWord)block;
   1148          *ctsMemSzB = (UWord)u64;
   1149          break;
   1150       }
   1151       case DW_FORM_exprloc:
   1152       case DW_FORM_block: {
   1153          ULong  u64b;
   1154          ULong  u64 = (ULong)get_ULEB128(c);
   1155          UChar* block = get_address_of_Cursor(c);
   1156          TRACE_D3("%llu byte block: ", u64);
   1157          for (u64b = u64; u64b > 0; u64b--) {
   1158             UChar u8 = get_UChar(c);
   1159             TRACE_D3("%x ", (UInt)u8);
   1160          }
   1161          *cts = (ULong)(UWord)block;
   1162          *ctsMemSzB = (UWord)u64;
   1163          break;
   1164       }
   1165       case DW_FORM_ref_sig8: {
   1166          ULong  u64b;
   1167          UChar* block = get_address_of_Cursor(c);
   1168          TRACE_D3("8 byte signature: ");
   1169          for (u64b = 8; u64b > 0; u64b--) {
   1170             UChar u8 = get_UChar(c);
   1171             TRACE_D3("%x ", (UInt)u8);
   1172          }
   1173          *cts = (ULong)(UWord)block;
   1174          *ctsMemSzB = 8;
   1175          break;
   1176       }
   1177       case DW_FORM_indirect:
   1178          get_Form_contents (cts, ctsSzB, ctsMemSzB, cc, c, td3,
   1179                             (DW_FORM)get_ULEB128(c));
   1180          return;
   1181 
   1182       default:
   1183          VG_(printf)(
   1184             "get_Form_contents: unhandled %d (%s) at <%lx>\n",
   1185             form, ML_(pp_DW_FORM)(form), get_position_of_Cursor(c));
   1186          c->barf("get_Form_contents: unhandled DW_FORM");
   1187    }
   1188 }
   1189 
   1190 
   1191 /*------------------------------------------------------------*/
   1192 /*---                                                      ---*/
   1193 /*--- Parsing of variable-related DIEs                     ---*/
   1194 /*---                                                      ---*/
   1195 /*------------------------------------------------------------*/
   1196 
   1197 typedef
   1198    struct _TempVar {
   1199       UChar*  name; /* in DebugInfo's .strchunks */
   1200       /* Represent ranges economically.  nRanges is the number of
   1201          ranges.  Cases:
   1202          0: .rngOneMin .rngOneMax .manyRanges are all zero
   1203          1: .rngOneMin .rngOneMax hold the range; .rngMany is NULL
   1204          2: .rngOneMin .rngOneMax are zero; .rngMany holds the ranges.
   1205          This is merely an optimisation to avoid having to allocate
   1206          and free the XArray in the common (98%) of cases where there
   1207          is zero or one address ranges. */
   1208       UWord   nRanges;
   1209       Addr    rngOneMin;
   1210       Addr    rngOneMax;
   1211       XArray* rngMany; /* of AddrRange.  NON-UNIQUE PTR in AR_DINFO. */
   1212       /* Do not free .rngMany, since many TempVars will have the same
   1213          value.  Instead the associated storage is to be freed by
   1214          deleting 'rangetree', which stores a single copy of each
   1215          range. */
   1216       /* --- */
   1217       Int     level;
   1218       UWord   typeR; /* a cuOff */
   1219       GExpr*  gexpr; /* for this variable */
   1220       GExpr*  fbGX;  /* to find the frame base of the enclosing fn, if
   1221                         any */
   1222       UChar*  fName; /* declaring file name, or NULL */
   1223       Int     fLine; /* declaring file line number, or zero */
   1224       /* offset in .debug_info, so that abstract instances can be
   1225          found to satisfy references from concrete instances. */
   1226       UWord   dioff;
   1227       UWord   absOri; /* so the absOri fields refer to dioff fields
   1228                          in some other, related TempVar. */
   1229    }
   1230    TempVar;
   1231 
   1232 #define N_D3_VAR_STACK 48
   1233 
   1234 typedef
   1235    struct {
   1236       /* Contains the range stack: a stack of address ranges, one
   1237          stack entry for each nested scope.
   1238 
   1239          Some scope entries are created by function definitions
   1240          (DW_AT_subprogram), and for those, we also note the GExpr
   1241          derived from its DW_AT_frame_base attribute, if any.
   1242          Consequently it should be possible to find, for any
   1243          variable's DIE, the GExpr for the the containing function's
   1244          DW_AT_frame_base by scanning back through the stack to find
   1245          the nearest entry associated with a function.  This somewhat
   1246          elaborate scheme is provided so as to make it possible to
   1247          obtain the correct DW_AT_frame_base expression even in the
   1248          presence of nested functions (or to be more precise, in the
   1249          presence of nested DW_AT_subprogram DIEs).
   1250       */
   1251       Int     sp; /* [sp] is innermost active entry; sp==-1 for empty
   1252                      stack */
   1253       XArray* ranges[N_D3_VAR_STACK]; /* XArray of AddrRange */
   1254       Int     level[N_D3_VAR_STACK];  /* D3 DIE levels */
   1255       Bool    isFunc[N_D3_VAR_STACK]; /* from DW_AT_subprogram? */
   1256       GExpr*  fbGX[N_D3_VAR_STACK];   /* if isFunc, contains the FB
   1257                                          expr, else NULL */
   1258       /* The file name table.  Is a mapping from integer index to the
   1259          (permanent) copy of the string, iow a non-img area. */
   1260       XArray* /* of UChar* */ filenameTable;
   1261    }
   1262    D3VarParser;
   1263 
   1264 static void varstack_show ( D3VarParser* parser, HChar* str ) {
   1265    Word i, j;
   1266    VG_(printf)("  varstack (%s) {\n", str);
   1267    for (i = 0; i <= parser->sp; i++) {
   1268       XArray* xa = parser->ranges[i];
   1269       vg_assert(xa);
   1270       VG_(printf)("    [%ld] (level %d)", i, parser->level[i]);
   1271       if (parser->isFunc[i]) {
   1272          VG_(printf)(" (fbGX=%p)", parser->fbGX[i]);
   1273       } else {
   1274          vg_assert(parser->fbGX[i] == NULL);
   1275       }
   1276       VG_(printf)(": ");
   1277       if (VG_(sizeXA)( xa ) == 0) {
   1278          VG_(printf)("** empty PC range array **");
   1279       } else {
   1280          for (j = 0; j < VG_(sizeXA)( xa ); j++) {
   1281             AddrRange* range = (AddrRange*) VG_(indexXA)( xa, j );
   1282             vg_assert(range);
   1283             VG_(printf)("[%#lx,%#lx] ", range->aMin, range->aMax);
   1284          }
   1285       }
   1286       VG_(printf)("\n");
   1287    }
   1288    VG_(printf)("  }\n");
   1289 }
   1290 
   1291 /* Remove from the stack, all entries with .level > 'level' */
   1292 static
   1293 void varstack_preen ( D3VarParser* parser, Bool td3, Int level )
   1294 {
   1295    Bool changed = False;
   1296    vg_assert(parser->sp < N_D3_VAR_STACK);
   1297    while (True) {
   1298       vg_assert(parser->sp >= -1);
   1299       if (parser->sp == -1) break;
   1300       if (parser->level[parser->sp] <= level) break;
   1301       if (0)
   1302          TRACE_D3("BBBBAAAA varstack_pop [newsp=%d]\n", parser->sp-1);
   1303       vg_assert(parser->ranges[parser->sp]);
   1304       /* Who allocated this xa?  get_range_list() or
   1305          unitary_range_list(). */
   1306       VG_(deleteXA)( parser->ranges[parser->sp] );
   1307       parser->ranges[parser->sp] = NULL;
   1308       parser->level[parser->sp]  = 0;
   1309       parser->isFunc[parser->sp] = False;
   1310       parser->fbGX[parser->sp]   = NULL;
   1311       parser->sp--;
   1312       changed = True;
   1313    }
   1314    if (changed && td3)
   1315       varstack_show( parser, "after preen" );
   1316 }
   1317 
   1318 static void varstack_push ( CUConst* cc,
   1319                             D3VarParser* parser,
   1320                             Bool td3,
   1321                             XArray* ranges, Int level,
   1322                             Bool    isFunc, GExpr* fbGX ) {
   1323    if (0)
   1324    TRACE_D3("BBBBAAAA varstack_push[newsp=%d]: %d  %p\n",
   1325             parser->sp+1, level, ranges);
   1326 
   1327    /* First we need to zap everything >= 'level', as we are about to
   1328       replace any previous entry at 'level', so .. */
   1329    varstack_preen(parser, /*td3*/False, level-1);
   1330 
   1331    vg_assert(parser->sp >= -1);
   1332    vg_assert(parser->sp < N_D3_VAR_STACK);
   1333    if (parser->sp == N_D3_VAR_STACK-1)
   1334       cc->barf("varstack_push: N_D3_VAR_STACK is too low; "
   1335                "increase and recompile");
   1336    if (parser->sp >= 0)
   1337       vg_assert(parser->level[parser->sp] < level);
   1338    parser->sp++;
   1339    vg_assert(parser->ranges[parser->sp] == NULL);
   1340    vg_assert(parser->level[parser->sp]  == 0);
   1341    vg_assert(parser->isFunc[parser->sp] == False);
   1342    vg_assert(parser->fbGX[parser->sp]   == NULL);
   1343    vg_assert(ranges != NULL);
   1344    if (!isFunc) vg_assert(fbGX == NULL);
   1345    parser->ranges[parser->sp] = ranges;
   1346    parser->level[parser->sp]  = level;
   1347    parser->isFunc[parser->sp] = isFunc;
   1348    parser->fbGX[parser->sp]   = fbGX;
   1349    if (td3)
   1350       varstack_show( parser, "after push" );
   1351 }
   1352 
   1353 
   1354 /* cts, ctsSzB, ctsMemSzB are derived from a DW_AT_location and so
   1355    refer either to a location expression or to a location list.
   1356    Figure out which, and in both cases bundle the expression or
   1357    location list into a so-called GExpr (guarded expression). */
   1358 __attribute__((noinline))
   1359 static GExpr* get_GX ( CUConst* cc, Bool td3,
   1360                        ULong cts, Int ctsSzB, UWord ctsMemSzB )
   1361 {
   1362    GExpr* gexpr = NULL;
   1363    if (ctsMemSzB > 0 && ctsSzB == 0) {
   1364       /* represents an in-line location expression, and cts points
   1365          right at it */
   1366       gexpr = make_singleton_GX( (UChar*)(UWord)cts, ctsMemSzB );
   1367    }
   1368    else
   1369    if (ctsMemSzB == 0 && ctsSzB > 0) {
   1370       /* represents location list.  cts is the offset of it in
   1371          .debug_loc. */
   1372       if (!cc->cu_svma_known)
   1373          cc->barf("get_GX: location list, but CU svma is unknown");
   1374       gexpr = make_general_GX( cc, td3, (UWord)cts, cc->cu_svma );
   1375    }
   1376    else {
   1377       vg_assert(0); /* else caller is bogus */
   1378    }
   1379    return gexpr;
   1380 }
   1381 
   1382 
   1383 static
   1384 void read_filename_table( /*MOD*/D3VarParser* parser,
   1385                           CUConst* cc, UWord debug_line_offset,
   1386                           Bool td3 )
   1387 {
   1388    Bool   is_dw64;
   1389    Cursor c;
   1390    Word   i;
   1391    UShort version;
   1392    UChar  opcode_base;
   1393    UChar* str;
   1394 
   1395    vg_assert(parser && cc && cc->barf);
   1396    if ((!cc->debug_line_img)
   1397        || cc->debug_line_sz <= debug_line_offset)
   1398       cc->barf("read_filename_table: .debug_line is missing?");
   1399 
   1400    init_Cursor( &c, cc->debug_line_img,
   1401                 cc->debug_line_sz, debug_line_offset, cc->barf,
   1402                 "Overrun whilst reading .debug_line section(1)" );
   1403 
   1404    /* unit_length = */
   1405       get_Initial_Length( &is_dw64, &c,
   1406            "read_filename_table: invalid initial-length field" );
   1407    version = get_UShort( &c );
   1408    if (version != 2 && version != 3 && version != 4)
   1409      cc->barf("read_filename_table: Only DWARF version 2, 3 and 4 line info "
   1410               "is currently supported.");
   1411    /*header_length              = (ULong)*/ get_Dwarfish_UWord( &c, is_dw64 );
   1412    /*minimum_instruction_length = */ get_UChar( &c );
   1413    if (version >= 4)
   1414       /*maximum_operations_per_insn = */ get_UChar( &c );
   1415    /*default_is_stmt            = */ get_UChar( &c );
   1416    /*line_base                  = (Char)*/ get_UChar( &c );
   1417    /*line_range                 = */ get_UChar( &c );
   1418    opcode_base                = get_UChar( &c );
   1419    /* skip over "standard_opcode_lengths" */
   1420    for (i = 1; i < (Word)opcode_base; i++)
   1421      (void)get_UChar( &c );
   1422 
   1423    /* skip over the directory names table */
   1424    while (peek_UChar(&c) != 0) {
   1425      (void)get_AsciiZ(&c);
   1426    }
   1427    (void)get_UChar(&c); /* skip terminating zero */
   1428 
   1429    /* Read and record the file names table */
   1430    vg_assert(parser->filenameTable);
   1431    vg_assert( VG_(sizeXA)( parser->filenameTable ) == 0 );
   1432    /* Add a dummy index-zero entry.  DWARF3 numbers its files
   1433       from 1, for some reason. */
   1434    str = ML_(addStr)( cc->di, "<unknown_file>", -1 );
   1435    VG_(addToXA)( parser->filenameTable, &str );
   1436    while (peek_UChar(&c) != 0) {
   1437       str = get_AsciiZ(&c);
   1438       TRACE_D3("  read_filename_table: %ld %s\n",
   1439                VG_(sizeXA)(parser->filenameTable), str);
   1440       str = ML_(addStr)( cc->di, str, -1 );
   1441       VG_(addToXA)( parser->filenameTable, &str );
   1442       (void)get_ULEB128( &c ); /* skip directory index # */
   1443       (void)get_ULEB128( &c ); /* skip last mod time */
   1444       (void)get_ULEB128( &c ); /* file size */
   1445    }
   1446    /* We're done!  The rest of it is not interesting. */
   1447 }
   1448 
   1449 
   1450 __attribute__((noinline))
   1451 static void parse_var_DIE (
   1452    /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree,
   1453    /*MOD*/XArray* /* of TempVar* */ tempvars,
   1454    /*MOD*/XArray* /* of GExpr* */ gexprs,
   1455    /*MOD*/D3VarParser* parser,
   1456    DW_TAG dtag,
   1457    UWord posn,
   1458    Int level,
   1459    Cursor* c_die,
   1460    Cursor* c_abbv,
   1461    CUConst* cc,
   1462    Bool td3
   1463 )
   1464 {
   1465    ULong       cts;
   1466    Int         ctsSzB;
   1467    UWord       ctsMemSzB;
   1468 
   1469    UWord saved_die_c_offset  = get_position_of_Cursor( c_die );
   1470    UWord saved_abbv_c_offset = get_position_of_Cursor( c_abbv );
   1471 
   1472    varstack_preen( parser, td3, level-1 );
   1473 
   1474    if (dtag == DW_TAG_compile_unit) {
   1475       Bool have_lo    = False;
   1476       Bool have_hi1   = False;
   1477       Bool have_range = False;
   1478       Addr ip_lo    = 0;
   1479       Addr ip_hi1   = 0;
   1480       Addr rangeoff = 0;
   1481       while (True) {
   1482          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   1483          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   1484          if (attr == 0 && form == 0) break;
   1485          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
   1486                             cc, c_die, False/*td3*/, form );
   1487          if (attr == DW_AT_low_pc && ctsSzB > 0) {
   1488             ip_lo   = cts;
   1489             have_lo = True;
   1490          }
   1491          if (attr == DW_AT_high_pc && ctsSzB > 0) {
   1492             ip_hi1   = cts;
   1493             have_hi1 = True;
   1494          }
   1495          if (attr == DW_AT_ranges && ctsSzB > 0) {
   1496             rangeoff = cts;
   1497             have_range = True;
   1498          }
   1499          if (attr == DW_AT_stmt_list && ctsSzB > 0) {
   1500             read_filename_table( parser, cc, (UWord)cts, td3 );
   1501          }
   1502       }
   1503       /* Now, does this give us an opportunity to find this
   1504          CU's svma? */
   1505 #if 0
   1506       if (level == 0 && have_lo) {
   1507          vg_assert(!cc->cu_svma_known); /* if this fails, it must be
   1508          because we've already seen a DW_TAG_compile_unit DIE at level
   1509          0.  But that can't happen, because DWARF3 only allows exactly
   1510          one top level DIE per CU. */
   1511          cc->cu_svma_known = True;
   1512          cc->cu_svma = ip_lo;
   1513          if (1)
   1514             TRACE_D3("BBBBAAAA acquire CU_SVMA of %p\n", cc->cu_svma);
   1515          /* Now, it may be that this DIE doesn't tell us the CU's
   1516             SVMA, by way of not having a DW_AT_low_pc.  That's OK --
   1517             the CU doesn't *have* to have its SVMA specified.
   1518 
   1519             But as per last para D3 spec sec 3.1.1 ("Normal and
   1520             Partial Compilation Unit Entries", "If the base address
   1521             (viz, the SVMA) is undefined, then any DWARF entry of
   1522             structure defined interms of the base address of that
   1523             compilation unit is not valid.".  So that means, if whilst
   1524             processing the children of this top level DIE (or their
   1525             children, etc) we see a DW_AT_range, and cu_svma_known is
   1526             False, then the DIE that contains it is (per the spec)
   1527             invalid, and we can legitimately stop and complain. */
   1528       }
   1529 #else
   1530       /* .. whereas The Reality is, simply assume the SVMA is zero
   1531          if it isn't specified. */
   1532       if (level == 0) {
   1533          vg_assert(!cc->cu_svma_known);
   1534          cc->cu_svma_known = True;
   1535          if (have_lo)
   1536             cc->cu_svma = ip_lo;
   1537          else
   1538             cc->cu_svma = 0;
   1539       }
   1540 #endif
   1541       /* Do we have something that looks sane? */
   1542       if (have_lo && have_hi1 && (!have_range)) {
   1543          if (ip_lo < ip_hi1)
   1544             varstack_push( cc, parser, td3,
   1545                            unitary_range_list(ip_lo, ip_hi1 - 1),
   1546                            level,
   1547                            False/*isFunc*/, NULL/*fbGX*/ );
   1548       } else
   1549       if ((!have_lo) && (!have_hi1) && have_range) {
   1550          varstack_push( cc, parser, td3,
   1551                         get_range_list( cc, td3,
   1552                                         rangeoff, cc->cu_svma ),
   1553                         level,
   1554                         False/*isFunc*/, NULL/*fbGX*/ );
   1555       } else
   1556       if ((!have_lo) && (!have_hi1) && (!have_range)) {
   1557          /* CU has no code, presumably? */
   1558          varstack_push( cc, parser, td3,
   1559                         empty_range_list(),
   1560                         level,
   1561                         False/*isFunc*/, NULL/*fbGX*/ );
   1562       } else
   1563       if (have_lo && (!have_hi1) && have_range && ip_lo == 0) {
   1564          /* broken DIE created by gcc-4.3.X ?  Ignore the
   1565             apparently-redundant DW_AT_low_pc and use the DW_AT_ranges
   1566             instead. */
   1567          varstack_push( cc, parser, td3,
   1568                         get_range_list( cc, td3,
   1569                                         rangeoff, cc->cu_svma ),
   1570                         level,
   1571                         False/*isFunc*/, NULL/*fbGX*/ );
   1572       } else {
   1573          if (0) VG_(printf)("I got hlo %d hhi1 %d hrange %d\n",
   1574                             (Int)have_lo, (Int)have_hi1, (Int)have_range);
   1575          goto bad_DIE;
   1576       }
   1577    }
   1578 
   1579    if (dtag == DW_TAG_lexical_block || dtag == DW_TAG_subprogram) {
   1580       Bool   have_lo    = False;
   1581       Bool   have_hi1   = False;
   1582       Bool   have_range = False;
   1583       Addr   ip_lo      = 0;
   1584       Addr   ip_hi1     = 0;
   1585       Addr   rangeoff   = 0;
   1586       Bool   isFunc     = dtag == DW_TAG_subprogram;
   1587       GExpr* fbGX       = NULL;
   1588       while (True) {
   1589          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   1590          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   1591          if (attr == 0 && form == 0) break;
   1592          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
   1593                             cc, c_die, False/*td3*/, form );
   1594          if (attr == DW_AT_low_pc && ctsSzB > 0) {
   1595             ip_lo   = cts;
   1596             have_lo = True;
   1597          }
   1598          if (attr == DW_AT_high_pc && ctsSzB > 0) {
   1599             ip_hi1   = cts;
   1600             have_hi1 = True;
   1601          }
   1602          if (attr == DW_AT_ranges && ctsSzB > 0) {
   1603             rangeoff = cts;
   1604             have_range = True;
   1605          }
   1606          if (isFunc
   1607              && attr == DW_AT_frame_base
   1608              && ((ctsMemSzB > 0 && ctsSzB == 0)
   1609                  || (ctsMemSzB == 0 && ctsSzB > 0))) {
   1610             fbGX = get_GX( cc, False/*td3*/, cts, ctsSzB, ctsMemSzB );
   1611             vg_assert(fbGX);
   1612             VG_(addToXA)(gexprs, &fbGX);
   1613          }
   1614       }
   1615       /* Do we have something that looks sane? */
   1616       if (dtag == DW_TAG_subprogram
   1617           && (!have_lo) && (!have_hi1) && (!have_range)) {
   1618          /* This is legit - ignore it. Sec 3.3.3: "A subroutine entry
   1619             representing a subroutine declaration that is not also a
   1620             definition does not have code address or range
   1621             attributes." */
   1622       } else
   1623       if (dtag == DW_TAG_lexical_block
   1624           && (!have_lo) && (!have_hi1) && (!have_range)) {
   1625          /* I believe this is legit, and means the lexical block
   1626             contains no insns (whatever that might mean).  Ignore. */
   1627       } else
   1628       if (have_lo && have_hi1 && (!have_range)) {
   1629          /* This scope supplies just a single address range. */
   1630          if (ip_lo < ip_hi1)
   1631             varstack_push( cc, parser, td3,
   1632                            unitary_range_list(ip_lo, ip_hi1 - 1),
   1633                            level, isFunc, fbGX );
   1634       } else
   1635       if ((!have_lo) && (!have_hi1) && have_range) {
   1636          /* This scope supplies multiple address ranges via the use of
   1637             a range list. */
   1638          varstack_push( cc, parser, td3,
   1639                         get_range_list( cc, td3,
   1640                                         rangeoff, cc->cu_svma ),
   1641                         level, isFunc, fbGX );
   1642       } else
   1643       if (have_lo && (!have_hi1) && (!have_range)) {
   1644          /* This scope is bogus.  The D3 spec sec 3.4 (Lexical Block
   1645             Entries) says fairly clearly that a scope must have either
   1646             _range or (_low_pc and _high_pc). */
   1647          /* The spec is a bit ambiguous though.  Perhaps a single byte
   1648             range is intended?  See sec 2.17 (Code Addresses And Ranges) */
   1649          /* This case is here because icc9 produced this:
   1650          <2><13bd>: DW_TAG_lexical_block
   1651             DW_AT_decl_line   : 5229
   1652             DW_AT_decl_column : 37
   1653             DW_AT_decl_file   : 1
   1654             DW_AT_low_pc      : 0x401b03
   1655          */
   1656          /* Ignore (seems safe than pushing a single byte range) */
   1657       } else
   1658          goto bad_DIE;
   1659    }
   1660 
   1661    if (dtag == DW_TAG_variable || dtag == DW_TAG_formal_parameter) {
   1662       UChar* name        = NULL;
   1663       UWord  typeR       = D3_INVALID_CUOFF;
   1664       Bool   external    = False;
   1665       GExpr* gexpr       = NULL;
   1666       Int    n_attrs     = 0;
   1667       UWord  abs_ori     = (UWord)D3_INVALID_CUOFF;
   1668       Int    lineNo      = 0;
   1669       UChar* fileName    = NULL;
   1670       while (True) {
   1671          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   1672          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   1673          if (attr == 0 && form == 0) break;
   1674          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
   1675                             cc, c_die, False/*td3*/, form );
   1676          n_attrs++;
   1677          if (attr == DW_AT_name && ctsMemSzB > 0) {
   1678             name = ML_(addStr)( cc->di, (UChar*)(UWord)cts, -1 );
   1679          }
   1680          if (attr == DW_AT_location
   1681              && ((ctsMemSzB > 0 && ctsSzB == 0)
   1682                  || (ctsMemSzB == 0 && ctsSzB > 0))) {
   1683             gexpr = get_GX( cc, False/*td3*/, cts, ctsSzB, ctsMemSzB );
   1684             vg_assert(gexpr);
   1685             VG_(addToXA)(gexprs, &gexpr);
   1686          }
   1687          if (attr == DW_AT_type && ctsSzB > 0) {
   1688             typeR = (UWord)cts;
   1689          }
   1690          if (attr == DW_AT_external && ctsSzB > 0 && cts > 0) {
   1691             external = True;
   1692          }
   1693          if (attr == DW_AT_abstract_origin && ctsSzB > 0) {
   1694             abs_ori = (UWord)cts;
   1695          }
   1696          if (attr == DW_AT_declaration && ctsSzB > 0 && cts > 0) {
   1697             /*declaration = True;*/
   1698          }
   1699          if (attr == DW_AT_decl_line && ctsSzB > 0) {
   1700             lineNo = (Int)cts;
   1701          }
   1702          if (attr == DW_AT_decl_file && ctsSzB > 0) {
   1703             Int ftabIx = (Int)cts;
   1704             if (ftabIx >= 1
   1705                 && ftabIx < VG_(sizeXA)( parser->filenameTable )) {
   1706                fileName = *(UChar**)
   1707                           VG_(indexXA)( parser->filenameTable, ftabIx );
   1708                vg_assert(fileName);
   1709             }
   1710             if (0) VG_(printf)("XXX filename = %s\n", fileName);
   1711          }
   1712       }
   1713       /* We'll collect it under if one of the following three
   1714          conditions holds:
   1715          (1) has location and type    -> completed
   1716          (2) has type only            -> is an abstract instance
   1717          (3) has location and abs_ori -> is a concrete instance
   1718          Name, filename and line number are all optional frills.
   1719       */
   1720       if ( /* 1 */ (gexpr && typeR != D3_INVALID_CUOFF)
   1721            /* 2 */ || (typeR != D3_INVALID_CUOFF)
   1722            /* 3 */ || (gexpr && abs_ori != (UWord)D3_INVALID_CUOFF) ) {
   1723 
   1724          /* Add this variable to the list of interesting looking
   1725             variables.  Crucially, note along with it the address
   1726             range(s) associated with the variable, which for locals
   1727             will be the address ranges at the top of the varparser's
   1728             stack. */
   1729          GExpr*   fbGX = NULL;
   1730          Word     i, nRanges;
   1731          XArray*  /* of AddrRange */ xa;
   1732          TempVar* tv;
   1733          /* Stack can't be empty; we put a dummy entry on it for the
   1734             entire address range before starting with the DIEs for
   1735             this CU. */
   1736          vg_assert(parser->sp >= 0);
   1737 
   1738          /* If this is a local variable (non-external), try to find
   1739             the GExpr for the DW_AT_frame_base of the containing
   1740             function.  It should have been pushed on the stack at the
   1741             time we encountered its DW_TAG_subprogram DIE, so the way
   1742             to find it is to scan back down the stack looking for it.
   1743             If there isn't an enclosing stack entry marked 'isFunc'
   1744             then we must be seeing variable or formal param DIEs
   1745             outside of a function, so we deem the Dwarf to be
   1746             malformed if that happens.  Note that the fbGX may be NULL
   1747             if the containing DT_TAG_subprogram didn't supply a
   1748             DW_AT_frame_base -- that's OK, but there must actually be
   1749             a containing DW_TAG_subprogram. */
   1750          if (!external) {
   1751             Bool found = False;
   1752             for (i = parser->sp; i >= 0; i--) {
   1753                if (parser->isFunc[i]) {
   1754                   fbGX = parser->fbGX[i];
   1755                   found = True;
   1756                   break;
   1757                }
   1758             }
   1759             if (!found) {
   1760                if (0 && VG_(clo_verbosity) >= 0) {
   1761                   VG_(message)(Vg_DebugMsg,
   1762                      "warning: parse_var_DIE: non-external variable "
   1763                      "outside DW_TAG_subprogram\n");
   1764                }
   1765                /* goto bad_DIE; */
   1766                /* This seems to happen a lot.  Just ignore it -- if,
   1767                   when we come to evaluation of the location (guarded)
   1768                   expression, it requires a frame base value, and
   1769                   there's no expression for that, then evaluation as a
   1770                   whole will fail.  Harmless - a bit of a waste of
   1771                   cycles but nothing more. */
   1772             }
   1773          }
   1774 
   1775          /* re "external ? 0 : parser->sp" (twice), if the var is
   1776             marked 'external' then we must put it at the global scope,
   1777             as only the global scope (level 0) covers the entire PC
   1778             address space.  It is asserted elsewhere that level 0
   1779             always covers the entire address space. */
   1780          xa = parser->ranges[external ? 0 : parser->sp];
   1781          nRanges = VG_(sizeXA)(xa);
   1782          vg_assert(nRanges >= 0);
   1783 
   1784          tv = ML_(dinfo_zalloc)( "di.readdwarf3.pvD.1", sizeof(TempVar) );
   1785          tv->name   = name;
   1786          tv->level  = external ? 0 : parser->sp;
   1787          tv->typeR  = typeR;
   1788          tv->gexpr  = gexpr;
   1789          tv->fbGX   = fbGX;
   1790          tv->fName  = fileName;
   1791          tv->fLine  = lineNo;
   1792          tv->dioff  = posn;
   1793          tv->absOri = abs_ori;
   1794 
   1795          /* See explanation on definition of type TempVar for the
   1796             reason for this elaboration. */
   1797          tv->nRanges = nRanges;
   1798          tv->rngOneMin = 0;
   1799          tv->rngOneMax = 0;
   1800          tv->rngMany = NULL;
   1801          if (nRanges == 1) {
   1802             AddrRange* range = VG_(indexXA)(xa, 0);
   1803             tv->rngOneMin = range->aMin;
   1804             tv->rngOneMax = range->aMax;
   1805          }
   1806          else if (nRanges > 1) {
   1807             /* See if we already have a range list which is
   1808                structurally identical.  If so, use that; if not, clone
   1809                this one, and add it to our collection. */
   1810             UWord keyW, valW;
   1811             if (VG_(lookupFM)( rangestree, &keyW, &valW, (UWord)xa )) {
   1812                XArray* old = (XArray*)keyW;
   1813                tl_assert(valW == 0);
   1814                tl_assert(old != xa);
   1815                tv->rngMany = old;
   1816             } else {
   1817                XArray* cloned = VG_(cloneXA)( "di.readdwarf3.pvD.2", xa );
   1818                tv->rngMany = cloned;
   1819                VG_(addToFM)( rangestree, (UWord)cloned, 0 );
   1820             }
   1821          }
   1822 
   1823          VG_(addToXA)( tempvars, &tv );
   1824 
   1825          TRACE_D3("  Recording this variable, with %ld PC range(s)\n",
   1826                   VG_(sizeXA)(xa) );
   1827          /* collect stats on how effective the ->ranges special
   1828             casing is */
   1829          if (0) {
   1830             static Int ntot=0, ngt=0;
   1831             ntot++;
   1832             if (tv->rngMany) ngt++;
   1833             if (0 == (ntot % 100000))
   1834                VG_(printf)("XXXX %d tot, %d cloned\n", ntot, ngt);
   1835          }
   1836 
   1837       }
   1838 
   1839       /* Here are some other weird cases seen in the wild:
   1840 
   1841             We have a variable with a name and a type, but no
   1842             location.  I guess that's a sign that it has been
   1843             optimised away.  Ignore it.  Here's an example:
   1844 
   1845             static Int lc_compar(void* n1, void* n2) {
   1846                MC_Chunk* mc1 = *(MC_Chunk**)n1;
   1847                MC_Chunk* mc2 = *(MC_Chunk**)n2;
   1848                return (mc1->data < mc2->data ? -1 : 1);
   1849             }
   1850 
   1851             Both mc1 and mc2 are like this
   1852             <2><5bc>: Abbrev Number: 21 (DW_TAG_variable)
   1853                 DW_AT_name        : mc1
   1854                 DW_AT_decl_file   : 1
   1855                 DW_AT_decl_line   : 216
   1856                 DW_AT_type        : <5d3>
   1857 
   1858             whereas n1 and n2 do have locations specified.
   1859 
   1860             ---------------------------------------------
   1861 
   1862             We see a DW_TAG_formal_parameter with a type, but
   1863             no name and no location.  It's probably part of a function type
   1864             construction, thusly, hence ignore it:
   1865          <1><2b4>: Abbrev Number: 12 (DW_TAG_subroutine_type)
   1866              DW_AT_sibling     : <2c9>
   1867              DW_AT_prototyped  : 1
   1868              DW_AT_type        : <114>
   1869          <2><2be>: Abbrev Number: 13 (DW_TAG_formal_parameter)
   1870              DW_AT_type        : <13e>
   1871          <2><2c3>: Abbrev Number: 13 (DW_TAG_formal_parameter)
   1872              DW_AT_type        : <133>
   1873 
   1874             ---------------------------------------------
   1875 
   1876             Is very minimal, like this:
   1877             <4><81d>: Abbrev Number: 44 (DW_TAG_variable)
   1878                 DW_AT_abstract_origin: <7ba>
   1879             What that signifies I have no idea.  Ignore.
   1880 
   1881             ----------------------------------------------
   1882 
   1883             Is very minimal, like this:
   1884             <200f>: DW_TAG_formal_parameter
   1885                 DW_AT_abstract_ori: <1f4c>
   1886                 DW_AT_location    : 13440
   1887             What that signifies I have no idea.  Ignore.
   1888             It might be significant, though: the variable at least
   1889             has a location and so might exist somewhere.
   1890             Maybe we should handle this.
   1891 
   1892             ---------------------------------------------
   1893 
   1894             <22407>: DW_TAG_variable
   1895               DW_AT_name        : (indirect string, offset: 0x6579):
   1896                                   vgPlain_trampoline_stuff_start
   1897               DW_AT_decl_file   : 29
   1898               DW_AT_decl_line   : 56
   1899               DW_AT_external    : 1
   1900               DW_AT_declaration : 1
   1901 
   1902             Nameless and typeless variable that has a location?  Who
   1903             knows.  Not me.
   1904             <2><3d178>: Abbrev Number: 22 (DW_TAG_variable)
   1905                  DW_AT_location    : 9 byte block: 3 c0 c7 13 38 0 0 0 0
   1906                                      (DW_OP_addr: 3813c7c0)
   1907 
   1908             No, really.  Check it out.  gcc is quite simply borked.
   1909             <3><168cc>: Abbrev Number: 141 (DW_TAG_variable)
   1910             // followed by no attributes, and the next DIE is a sibling,
   1911             // not a child
   1912             */
   1913    }
   1914    return;
   1915 
   1916   bad_DIE:
   1917    set_position_of_Cursor( c_die,  saved_die_c_offset );
   1918    set_position_of_Cursor( c_abbv, saved_abbv_c_offset );
   1919    VG_(printf)("\nparse_var_DIE: confused by:\n");
   1920    VG_(printf)(" <%d><%lx>: %s\n", level, posn, ML_(pp_DW_TAG)( dtag ) );
   1921    while (True) {
   1922       DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   1923       DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   1924       if (attr == 0 && form == 0) break;
   1925       VG_(printf)("     %18s: ", ML_(pp_DW_AT)(attr));
   1926       /* Get the form contents, so as to print them */
   1927       get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
   1928                          cc, c_die, True, form );
   1929       VG_(printf)("\t\n");
   1930    }
   1931    VG_(printf)("\n");
   1932    cc->barf("parse_var_DIE: confused by the above DIE");
   1933    /*NOTREACHED*/
   1934 }
   1935 
   1936 
   1937 /*------------------------------------------------------------*/
   1938 /*---                                                      ---*/
   1939 /*--- Parsing of type-related DIEs                         ---*/
   1940 /*---                                                      ---*/
   1941 /*------------------------------------------------------------*/
   1942 
   1943 #define N_D3_TYPE_STACK 16
   1944 
   1945 typedef
   1946    struct {
   1947       /* What source language?  'A'=Ada83/95,
   1948                                 'C'=C/C++,
   1949                                 'F'=Fortran,
   1950                                 '?'=other
   1951          Established once per compilation unit. */
   1952       UChar language;
   1953       /* A stack of types which are currently under construction */
   1954       Int   sp; /* [sp] is innermost active entry; sp==-1 for empty
   1955                    stack */
   1956       /* Note that the TyEnts in qparentE are temporary copies of the
   1957          ones accumulating in the main tyent array.  So it is not safe
   1958          to free up anything on them when popping them off the stack
   1959          (iow, it isn't safe to use TyEnt__make_EMPTY on them).  Just
   1960          memset them to zero when done. */
   1961       TyEnt qparentE[N_D3_TYPE_STACK]; /* parent TyEnts */
   1962       Int   qlevel[N_D3_TYPE_STACK];
   1963 
   1964    }
   1965    D3TypeParser;
   1966 
   1967 static void typestack_show ( D3TypeParser* parser, HChar* str ) {
   1968    Word i;
   1969    VG_(printf)("  typestack (%s) {\n", str);
   1970    for (i = 0; i <= parser->sp; i++) {
   1971       VG_(printf)("    [%ld] (level %d): ", i, parser->qlevel[i]);
   1972       ML_(pp_TyEnt)( &parser->qparentE[i] );
   1973       VG_(printf)("\n");
   1974    }
   1975    VG_(printf)("  }\n");
   1976 }
   1977 
   1978 /* Remove from the stack, all entries with .level > 'level' */
   1979 static
   1980 void typestack_preen ( D3TypeParser* parser, Bool td3, Int level )
   1981 {
   1982    Bool changed = False;
   1983    vg_assert(parser->sp < N_D3_TYPE_STACK);
   1984    while (True) {
   1985       vg_assert(parser->sp >= -1);
   1986       if (parser->sp == -1) break;
   1987       if (parser->qlevel[parser->sp] <= level) break;
   1988       if (0)
   1989          TRACE_D3("BBBBAAAA typestack_pop [newsp=%d]\n", parser->sp-1);
   1990       vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
   1991       VG_(memset)(&parser->qparentE[parser->sp], 0, sizeof(TyEnt));
   1992       parser->qparentE[parser->sp].cuOff = D3_INVALID_CUOFF;
   1993       parser->qparentE[parser->sp].tag = Te_EMPTY;
   1994       parser->qlevel[parser->sp] = 0;
   1995       parser->sp--;
   1996       changed = True;
   1997    }
   1998    if (changed && td3)
   1999       typestack_show( parser, "after preen" );
   2000 }
   2001 
   2002 static Bool typestack_is_empty ( D3TypeParser* parser ) {
   2003    vg_assert(parser->sp >= -1 && parser->sp < N_D3_TYPE_STACK);
   2004    return parser->sp == -1;
   2005 }
   2006 
   2007 static void typestack_push ( CUConst* cc,
   2008                              D3TypeParser* parser,
   2009                              Bool td3,
   2010                              TyEnt* parentE, Int level ) {
   2011    if (0)
   2012    TRACE_D3("BBBBAAAA typestack_push[newsp=%d]: %d  %05lx\n",
   2013             parser->sp+1, level, parentE->cuOff);
   2014 
   2015    /* First we need to zap everything >= 'level', as we are about to
   2016       replace any previous entry at 'level', so .. */
   2017    typestack_preen(parser, /*td3*/False, level-1);
   2018 
   2019    vg_assert(parser->sp >= -1);
   2020    vg_assert(parser->sp < N_D3_TYPE_STACK);
   2021    if (parser->sp == N_D3_TYPE_STACK-1)
   2022       cc->barf("typestack_push: N_D3_TYPE_STACK is too low; "
   2023                "increase and recompile");
   2024    if (parser->sp >= 0)
   2025       vg_assert(parser->qlevel[parser->sp] < level);
   2026    parser->sp++;
   2027    vg_assert(parser->qparentE[parser->sp].tag == Te_EMPTY);
   2028    vg_assert(parser->qlevel[parser->sp]  == 0);
   2029    vg_assert(parentE);
   2030    vg_assert(ML_(TyEnt__is_type)(parentE));
   2031    vg_assert(parentE->cuOff != D3_INVALID_CUOFF);
   2032    parser->qparentE[parser->sp] = *parentE;
   2033    parser->qlevel[parser->sp]  = level;
   2034    if (td3)
   2035       typestack_show( parser, "after push" );
   2036 }
   2037 
   2038 /* True if the subrange type being parsed gives the bounds of an array. */
   2039 static Bool subrange_type_denotes_array_bounds ( D3TypeParser* parser,
   2040                                                  DW_TAG dtag ) {
   2041    vg_assert(dtag == DW_TAG_subrange_type);
   2042    /* For most languages, a subrange_type dtag always gives the
   2043       bounds of an array.
   2044       For Ada, there are additional conditions as a subrange_type
   2045       is also used for other purposes. */
   2046    if (parser->language != 'A')
   2047       /* not Ada, so it definitely denotes an array bound. */
   2048       return True;
   2049    else
   2050       /* Extra constraints for Ada: it only denotes an array bound if .. */
   2051       return (! typestack_is_empty(parser)
   2052               && parser->qparentE[parser->sp].tag == Te_TyArray);
   2053 }
   2054 
   2055 /* Parse a type-related DIE.  'parser' holds the current parser state.
   2056    'admin' is where the completed types are dumped.  'dtag' is the tag
   2057    for this DIE.  'c_die' points to the start of the data fields (FORM
   2058    stuff) for the DIE.  c_abbv points to the start of the (name,form)
   2059    pairs which describe the DIE.
   2060 
   2061    We may find the DIE uninteresting, in which case we should ignore
   2062    it.
   2063 
   2064    What happens: the DIE is examined.  If uninteresting, it is ignored.
   2065    Otherwise, the DIE gives rise to two things:
   2066 
   2067    (1) the offset of this DIE in the CU -- the cuOffset, a UWord
   2068    (2) a TyAdmin structure, which holds the type, or related stuff
   2069 
   2070    (2) is added at the end of 'tyadmins', at some index, say 'i'.
   2071 
   2072    A pair (cuOffset, i) is added to 'tydict'.
   2073 
   2074    Hence 'tyadmins' holds the actual type entities, and 'tydict' holds
   2075    a mapping from cuOffset to the index of the corresponding entry in
   2076    'tyadmin'.
   2077 
   2078    When resolving a cuOffset to a TyAdmin, first look up the cuOffset
   2079    in the tydict (by binary search).  This gives an index into
   2080    tyadmins, and the required entity lives in tyadmins at that index.
   2081 */
   2082 __attribute__((noinline))
   2083 static void parse_type_DIE ( /*MOD*/XArray* /* of TyEnt */ tyents,
   2084                              /*MOD*/D3TypeParser* parser,
   2085                              DW_TAG dtag,
   2086                              UWord posn,
   2087                              Int level,
   2088                              Cursor* c_die,
   2089                              Cursor* c_abbv,
   2090                              CUConst* cc,
   2091                              Bool td3 )
   2092 {
   2093    ULong cts;
   2094    Int   ctsSzB;
   2095    UWord ctsMemSzB;
   2096    TyEnt typeE;
   2097    TyEnt atomE;
   2098    TyEnt fieldE;
   2099    TyEnt boundE;
   2100 
   2101    UWord saved_die_c_offset  = get_position_of_Cursor( c_die );
   2102    UWord saved_abbv_c_offset = get_position_of_Cursor( c_abbv );
   2103 
   2104    VG_(memset)( &typeE,  0xAA, sizeof(typeE) );
   2105    VG_(memset)( &atomE,  0xAA, sizeof(atomE) );
   2106    VG_(memset)( &fieldE, 0xAA, sizeof(fieldE) );
   2107    VG_(memset)( &boundE, 0xAA, sizeof(boundE) );
   2108 
   2109    /* If we've returned to a level at or above any previously noted
   2110       parent, un-note it, so we don't believe we're still collecting
   2111       its children. */
   2112    typestack_preen( parser, td3, level-1 );
   2113 
   2114    if (dtag == DW_TAG_compile_unit) {
   2115       /* See if we can find DW_AT_language, since it is important for
   2116          establishing array bounds (see DW_TAG_subrange_type below in
   2117          this fn) */
   2118       while (True) {
   2119          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   2120          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   2121          if (attr == 0 && form == 0) break;
   2122          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
   2123                             cc, c_die, False/*td3*/, form );
   2124          if (attr != DW_AT_language)
   2125             continue;
   2126          if (ctsSzB == 0)
   2127            goto bad_DIE;
   2128          switch (cts) {
   2129             case DW_LANG_C89: case DW_LANG_C:
   2130             case DW_LANG_C_plus_plus: case DW_LANG_ObjC:
   2131             case DW_LANG_ObjC_plus_plus: case DW_LANG_UPC:
   2132             case DW_LANG_Upc: case DW_LANG_C99:
   2133                parser->language = 'C'; break;
   2134             case DW_LANG_Fortran77: case DW_LANG_Fortran90:
   2135             case DW_LANG_Fortran95:
   2136                parser->language = 'F'; break;
   2137             case DW_LANG_Ada83: case DW_LANG_Ada95:
   2138                parser->language = 'A'; break;
   2139             case DW_LANG_Cobol74:
   2140             case DW_LANG_Cobol85: case DW_LANG_Pascal83:
   2141             case DW_LANG_Modula2: case DW_LANG_Java:
   2142             case DW_LANG_PLI:
   2143             case DW_LANG_D: case DW_LANG_Python:
   2144             case DW_LANG_Mips_Assembler:
   2145                parser->language = '?'; break;
   2146             default:
   2147                goto bad_DIE;
   2148          }
   2149       }
   2150    }
   2151 
   2152    if (dtag == DW_TAG_base_type) {
   2153       /* We can pick up a new base type any time. */
   2154       VG_(memset)(&typeE, 0, sizeof(typeE));
   2155       typeE.cuOff = D3_INVALID_CUOFF;
   2156       typeE.tag   = Te_TyBase;
   2157       while (True) {
   2158          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   2159          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   2160          if (attr == 0 && form == 0) break;
   2161          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
   2162                             cc, c_die, False/*td3*/, form );
   2163          if (attr == DW_AT_name && ctsMemSzB > 0) {
   2164             typeE.Te.TyBase.name
   2165                = ML_(dinfo_strdup)( "di.readdwarf3.ptD.base_type.1",
   2166                                     (UChar*)(UWord)cts );
   2167          }
   2168          if (attr == DW_AT_byte_size && ctsSzB > 0) {
   2169             typeE.Te.TyBase.szB = cts;
   2170          }
   2171          if (attr == DW_AT_encoding && ctsSzB > 0) {
   2172             switch (cts) {
   2173                case DW_ATE_unsigned: case DW_ATE_unsigned_char:
   2174                case DW_ATE_UTF: /* since DWARF4, e.g. char16_t from C++ */
   2175                case DW_ATE_boolean:/* FIXME - is this correct? */
   2176                   typeE.Te.TyBase.enc = 'U'; break;
   2177                case DW_ATE_signed: case DW_ATE_signed_char:
   2178                   typeE.Te.TyBase.enc = 'S'; break;
   2179                case DW_ATE_float:
   2180                   typeE.Te.TyBase.enc = 'F'; break;
   2181                case DW_ATE_complex_float:
   2182                   typeE.Te.TyBase.enc = 'C'; break;
   2183                default:
   2184                   goto bad_DIE;
   2185             }
   2186          }
   2187       }
   2188 
   2189       /* Invent a name if it doesn't have one.  gcc-4.3
   2190          -ftree-vectorize is observed to emit nameless base types. */
   2191       if (!typeE.Te.TyBase.name)
   2192          typeE.Te.TyBase.name
   2193             = ML_(dinfo_strdup)( "di.readdwarf3.ptD.base_type.2",
   2194                                  "<anon_base_type>" );
   2195 
   2196       /* Do we have something that looks sane? */
   2197       if (/* must have a name */
   2198           typeE.Te.TyBase.name == NULL
   2199           /* and a plausible size.  Yes, really 32: "complex long
   2200              double" apparently has size=32 */
   2201           || typeE.Te.TyBase.szB < 0 || typeE.Te.TyBase.szB > 32
   2202           /* and a plausible encoding */
   2203           || (typeE.Te.TyBase.enc != 'U'
   2204               && typeE.Te.TyBase.enc != 'S'
   2205               && typeE.Te.TyBase.enc != 'F'
   2206               && typeE.Te.TyBase.enc != 'C'))
   2207          goto bad_DIE;
   2208       /* Last minute hack: if we see this
   2209          <1><515>: DW_TAG_base_type
   2210              DW_AT_byte_size   : 0
   2211              DW_AT_encoding    : 5
   2212              DW_AT_name        : void
   2213          convert it into a real Void type. */
   2214       if (typeE.Te.TyBase.szB == 0
   2215           && 0 == VG_(strcmp)("void", typeE.Te.TyBase.name)) {
   2216          ML_(TyEnt__make_EMPTY)(&typeE);
   2217          typeE.tag = Te_TyVoid;
   2218          typeE.Te.TyVoid.isFake = False; /* it's a real one! */
   2219       }
   2220 
   2221       goto acquire_Type;
   2222    }
   2223 
   2224    if (dtag == DW_TAG_pointer_type || dtag == DW_TAG_reference_type
   2225        || dtag == DW_TAG_ptr_to_member_type) {
   2226       /* This seems legit for _pointer_type and _reference_type.  I
   2227          don't know if rolling _ptr_to_member_type in here really is
   2228          legit, but it's better than not handling it at all. */
   2229       VG_(memset)(&typeE, 0, sizeof(typeE));
   2230       typeE.cuOff = D3_INVALID_CUOFF;
   2231       typeE.tag   = Te_TyPorR;
   2232       /* target type defaults to void */
   2233       typeE.Te.TyPorR.typeR = D3_FAKEVOID_CUOFF;
   2234       typeE.Te.TyPorR.isPtr = dtag == DW_TAG_pointer_type
   2235                               || dtag == DW_TAG_ptr_to_member_type;
   2236       /* These three type kinds don't *have* to specify their size, in
   2237          which case we assume it's a machine word.  But if they do
   2238          specify it, it must be a machine word :-)  This probably
   2239          assumes that the word size of the Dwarf3 we're reading is the
   2240          same size as that on the machine.  gcc appears to give a size
   2241          whereas icc9 doesn't. */
   2242       typeE.Te.TyPorR.szB = sizeof(UWord);
   2243       while (True) {
   2244          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   2245          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   2246          if (attr == 0 && form == 0) break;
   2247          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
   2248                             cc, c_die, False/*td3*/, form );
   2249          if (attr == DW_AT_byte_size && ctsSzB > 0) {
   2250             typeE.Te.TyPorR.szB = cts;
   2251          }
   2252          if (attr == DW_AT_type && ctsSzB > 0) {
   2253             typeE.Te.TyPorR.typeR = (UWord)cts;
   2254          }
   2255       }
   2256       /* Do we have something that looks sane? */
   2257       if (typeE.Te.TyPorR.szB != sizeof(UWord))
   2258          goto bad_DIE;
   2259       else
   2260          goto acquire_Type;
   2261    }
   2262 
   2263    if (dtag == DW_TAG_enumeration_type) {
   2264       /* Create a new Type to hold the results. */
   2265       VG_(memset)(&typeE, 0, sizeof(typeE));
   2266       typeE.cuOff = posn;
   2267       typeE.tag   = Te_TyEnum;
   2268       typeE.Te.TyEnum.atomRs
   2269          = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.enum_type.1",
   2270                        ML_(dinfo_free),
   2271                        sizeof(UWord) );
   2272       while (True) {
   2273          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   2274          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   2275          if (attr == 0 && form == 0) break;
   2276          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
   2277                             cc, c_die, False/*td3*/, form );
   2278          if (attr == DW_AT_name && ctsMemSzB > 0) {
   2279             typeE.Te.TyEnum.name
   2280               = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enum_type.2",
   2281                                    (UChar*)(UWord)cts );
   2282          }
   2283          if (attr == DW_AT_byte_size && ctsSzB > 0) {
   2284             typeE.Te.TyEnum.szB = cts;
   2285          }
   2286       }
   2287 
   2288       if (!typeE.Te.TyEnum.name)
   2289          typeE.Te.TyEnum.name
   2290             = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enum_type.3",
   2291                                  "<anon_enum_type>" );
   2292 
   2293       /* Do we have something that looks sane? */
   2294       if (typeE.Te.TyEnum.szB == 0
   2295           /* we must know the size */
   2296           /* but not for Ada, which uses such dummy
   2297              enumerations as helper for gdb ada mode. */
   2298           && parser->language != 'A')
   2299          goto bad_DIE;
   2300       /* On't stack! */
   2301       typestack_push( cc, parser, td3, &typeE, level );
   2302       goto acquire_Type;
   2303    }
   2304 
   2305    /* gcc (GCC) 4.4.0 20081017 (experimental) occasionally produces
   2306       DW_TAG_enumerator with only a DW_AT_name but no
   2307       DW_AT_const_value.  This is in violation of the Dwarf3 standard,
   2308       and appears to be a new "feature" of gcc - versions 4.3.x and
   2309       earlier do not appear to do this.  So accept DW_TAG_enumerator
   2310       which only have a name but no value.  An example:
   2311 
   2312       <1><180>: Abbrev Number: 6 (DW_TAG_enumeration_type)
   2313          <181>   DW_AT_name        : (indirect string, offset: 0xda70):
   2314                                      QtMsgType
   2315          <185>   DW_AT_byte_size   : 4
   2316          <186>   DW_AT_decl_file   : 14
   2317          <187>   DW_AT_decl_line   : 1480
   2318          <189>   DW_AT_sibling     : <0x1a7>
   2319       <2><18d>: Abbrev Number: 7 (DW_TAG_enumerator)
   2320          <18e>   DW_AT_name        : (indirect string, offset: 0x9e18):
   2321                                      QtDebugMsg
   2322       <2><192>: Abbrev Number: 7 (DW_TAG_enumerator)
   2323          <193>   DW_AT_name        : (indirect string, offset: 0x1505f):
   2324                                      QtWarningMsg
   2325       <2><197>: Abbrev Number: 7 (DW_TAG_enumerator)
   2326          <198>   DW_AT_name        : (indirect string, offset: 0x16f4a):
   2327                                      QtCriticalMsg
   2328       <2><19c>: Abbrev Number: 7 (DW_TAG_enumerator)
   2329          <19d>   DW_AT_name        : (indirect string, offset: 0x156dd):
   2330                                      QtFatalMsg
   2331       <2><1a1>: Abbrev Number: 7 (DW_TAG_enumerator)
   2332          <1a2>   DW_AT_name        : (indirect string, offset: 0x13660):
   2333                                      QtSystemMsg
   2334    */
   2335    if (dtag == DW_TAG_enumerator) {
   2336       VG_(memset)( &atomE, 0, sizeof(atomE) );
   2337       atomE.cuOff = posn;
   2338       atomE.tag   = Te_Atom;
   2339       while (True) {
   2340          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   2341          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   2342          if (attr == 0 && form == 0) break;
   2343          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
   2344                             cc, c_die, False/*td3*/, form );
   2345          if (attr == DW_AT_name && ctsMemSzB > 0) {
   2346             atomE.Te.Atom.name
   2347               = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enumerator.1",
   2348                                    (UChar*)(UWord)cts );
   2349          }
   2350          if (attr == DW_AT_const_value && ctsSzB > 0) {
   2351             atomE.Te.Atom.value = cts;
   2352             atomE.Te.Atom.valueKnown = True;
   2353          }
   2354       }
   2355       /* Do we have something that looks sane? */
   2356       if (atomE.Te.Atom.name == NULL)
   2357          goto bad_DIE;
   2358       /* Do we have a plausible parent? */
   2359       if (typestack_is_empty(parser)) goto bad_DIE;
   2360       vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
   2361       vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
   2362       if (level != parser->qlevel[parser->sp]+1) goto bad_DIE;
   2363       if (parser->qparentE[parser->sp].tag != Te_TyEnum) goto bad_DIE;
   2364       /* Record this child in the parent */
   2365       vg_assert(parser->qparentE[parser->sp].Te.TyEnum.atomRs);
   2366       VG_(addToXA)( parser->qparentE[parser->sp].Te.TyEnum.atomRs,
   2367                     &atomE );
   2368       /* And record the child itself */
   2369       goto acquire_Atom;
   2370    }
   2371 
   2372    /* Treat DW_TAG_class_type as if it was a DW_TAG_structure_type.  I
   2373       don't know if this is correct, but it at least makes this reader
   2374       usable for gcc-4.3 produced Dwarf3. */
   2375    if (dtag == DW_TAG_structure_type || dtag == DW_TAG_class_type
   2376        || dtag == DW_TAG_union_type) {
   2377       Bool have_szB = False;
   2378       Bool is_decl  = False;
   2379       Bool is_spec  = False;
   2380       /* Create a new Type to hold the results. */
   2381       VG_(memset)(&typeE, 0, sizeof(typeE));
   2382       typeE.cuOff = posn;
   2383       typeE.tag   = Te_TyStOrUn;
   2384       typeE.Te.TyStOrUn.name = NULL;
   2385       typeE.Te.TyStOrUn.fieldRs
   2386          = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.pTD.struct_type.1",
   2387                        ML_(dinfo_free),
   2388                        sizeof(UWord) );
   2389       typeE.Te.TyStOrUn.complete = True;
   2390       typeE.Te.TyStOrUn.isStruct = dtag == DW_TAG_structure_type
   2391                                    || dtag == DW_TAG_class_type;
   2392       while (True) {
   2393          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   2394          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   2395          if (attr == 0 && form == 0) break;
   2396          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
   2397                             cc, c_die, False/*td3*/, form );
   2398          if (attr == DW_AT_name && ctsMemSzB > 0) {
   2399             typeE.Te.TyStOrUn.name
   2400                = ML_(dinfo_strdup)( "di.readdwarf3.ptD.struct_type.2",
   2401                                     (UChar*)(UWord)cts );
   2402          }
   2403          if (attr == DW_AT_byte_size && ctsSzB >= 0) {
   2404             typeE.Te.TyStOrUn.szB = cts;
   2405             have_szB = True;
   2406          }
   2407          if (attr == DW_AT_declaration && ctsSzB > 0 && cts > 0) {
   2408             is_decl = True;
   2409          }
   2410          if (attr == DW_AT_specification && ctsSzB > 0 && cts > 0) {
   2411             is_spec = True;
   2412          }
   2413       }
   2414       /* Do we have something that looks sane? */
   2415       if (is_decl && (!is_spec)) {
   2416          /* It's a DW_AT_declaration.  We require the name but
   2417             nothing else. */
   2418          if (typeE.Te.TyStOrUn.name == NULL)
   2419             goto bad_DIE;
   2420          typeE.Te.TyStOrUn.complete = False;
   2421          /* JRS 2009 Aug 10: <possible kludge>? */
   2422          /* Push this tyent on the stack, even though it's incomplete.
   2423             It appears that gcc-4.4 on Fedora 11 will sometimes create
   2424             DW_TAG_member entries for it, and so we need to have a
   2425             plausible parent present in order for that to work.  See
   2426             #200029 comments 8 and 9. */
   2427          typestack_push( cc, parser, td3, &typeE, level );
   2428          /* </possible kludge> */
   2429          goto acquire_Type;
   2430       }
   2431       if ((!is_decl) /* && (!is_spec) */) {
   2432          /* this is the common, ordinary case */
   2433          if ((!have_szB) /* we must know the size */
   2434              /* But the name can be present, or not */)
   2435             goto bad_DIE;
   2436          /* On't stack! */
   2437          typestack_push( cc, parser, td3, &typeE, level );
   2438          goto acquire_Type;
   2439       }
   2440       else {
   2441          /* don't know how to handle any other variants just now */
   2442          goto bad_DIE;
   2443       }
   2444    }
   2445 
   2446    if (dtag == DW_TAG_member) {
   2447       /* Acquire member entries for both DW_TAG_structure_type and
   2448          DW_TAG_union_type.  They differ minorly, in that struct
   2449          members must have a DW_AT_data_member_location expression
   2450          whereas union members must not. */
   2451       Bool parent_is_struct;
   2452       VG_(memset)( &fieldE, 0, sizeof(fieldE) );
   2453       fieldE.cuOff = posn;
   2454       fieldE.tag   = Te_Field;
   2455       fieldE.Te.Field.typeR = D3_INVALID_CUOFF;
   2456       while (True) {
   2457          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   2458          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   2459          if (attr == 0 && form == 0) break;
   2460          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
   2461                             cc, c_die, False/*td3*/, form );
   2462          if (attr == DW_AT_name && ctsMemSzB > 0) {
   2463             fieldE.Te.Field.name
   2464                = ML_(dinfo_strdup)( "di.readdwarf3.ptD.member.1",
   2465                                     (UChar*)(UWord)cts );
   2466          }
   2467          if (attr == DW_AT_type && ctsSzB > 0) {
   2468             fieldE.Te.Field.typeR = (UWord)cts;
   2469          }
   2470          /* There are 2 different cases for DW_AT_data_member_location.
   2471             If it is a constant class attribute, it contains byte offset
   2472             from the beginning of the containing entity.
   2473             Otherwise it is a location expression.  */
   2474          if (attr == DW_AT_data_member_location && ctsSzB > 0) {
   2475             fieldE.Te.Field.nLoc = -1;
   2476             fieldE.Te.Field.pos.offset = cts;
   2477          } else if (attr == DW_AT_data_member_location && ctsMemSzB > 0) {
   2478             fieldE.Te.Field.nLoc = (UWord)ctsMemSzB;
   2479             fieldE.Te.Field.pos.loc
   2480                = ML_(dinfo_memdup)( "di.readdwarf3.ptD.member.2",
   2481                                     (UChar*)(UWord)cts,
   2482                                     (SizeT)fieldE.Te.Field.nLoc );
   2483          }
   2484       }
   2485       /* Do we have a plausible parent? */
   2486       if (typestack_is_empty(parser)) goto bad_DIE;
   2487       vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
   2488       vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
   2489       if (level != parser->qlevel[parser->sp]+1) goto bad_DIE;
   2490       if (parser->qparentE[parser->sp].tag != Te_TyStOrUn) goto bad_DIE;
   2491       /* Do we have something that looks sane?  If this a member of a
   2492          struct, we must have a location expression; but if a member
   2493          of a union that is irrelevant (D3 spec sec 5.6.6).  We ought
   2494          to reject in the latter case, but some compilers have been
   2495          observed to emit constant-zero expressions.  So just ignore
   2496          them. */
   2497       parent_is_struct
   2498          = parser->qparentE[parser->sp].Te.TyStOrUn.isStruct;
   2499       if (!fieldE.Te.Field.name)
   2500          fieldE.Te.Field.name
   2501             = ML_(dinfo_strdup)( "di.readdwarf3.ptD.member.3",
   2502                                  "<anon_field>" );
   2503       vg_assert(fieldE.Te.Field.name);
   2504       if (fieldE.Te.Field.typeR == D3_INVALID_CUOFF)
   2505          goto bad_DIE;
   2506       if (fieldE.Te.Field.nLoc) {
   2507          if (!parent_is_struct) {
   2508             /* If this is a union type, pretend we haven't seen the data
   2509                member location expression, as it is by definition
   2510                redundant (it must be zero). */
   2511             if (fieldE.Te.Field.nLoc > 0)
   2512                ML_(dinfo_free)(fieldE.Te.Field.pos.loc);
   2513             fieldE.Te.Field.pos.loc = NULL;
   2514             fieldE.Te.Field.nLoc = 0;
   2515          }
   2516          /* Record this child in the parent */
   2517          fieldE.Te.Field.isStruct = parent_is_struct;
   2518          vg_assert(parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs);
   2519          VG_(addToXA)( parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs,
   2520                        &posn );
   2521          /* And record the child itself */
   2522          goto acquire_Field;
   2523       } else {
   2524          /* Member with no location - this can happen with static
   2525             const members in C++ code which are compile time constants
   2526             that do no exist in the class. They're not of any interest
   2527             to us so we ignore them. */
   2528       }
   2529    }
   2530 
   2531    if (dtag == DW_TAG_array_type) {
   2532       VG_(memset)(&typeE, 0, sizeof(typeE));
   2533       typeE.cuOff = posn;
   2534       typeE.tag   = Te_TyArray;
   2535       typeE.Te.TyArray.typeR = D3_INVALID_CUOFF;
   2536       typeE.Te.TyArray.boundRs
   2537          = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.array_type.1",
   2538                        ML_(dinfo_free),
   2539                        sizeof(UWord) );
   2540       while (True) {
   2541          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   2542          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   2543          if (attr == 0 && form == 0) break;
   2544          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
   2545                             cc, c_die, False/*td3*/, form );
   2546          if (attr == DW_AT_type && ctsSzB > 0) {
   2547             typeE.Te.TyArray.typeR = (UWord)cts;
   2548          }
   2549       }
   2550       if (typeE.Te.TyArray.typeR == D3_INVALID_CUOFF)
   2551          goto bad_DIE;
   2552       /* On't stack! */
   2553       typestack_push( cc, parser, td3, &typeE, level );
   2554       goto acquire_Type;
   2555    }
   2556 
   2557    /* this is a subrange type defining the bounds of an array. */
   2558    if (dtag == DW_TAG_subrange_type
   2559        && subrange_type_denotes_array_bounds(parser, dtag)) {
   2560       Bool have_lower = False;
   2561       Bool have_upper = False;
   2562       Bool have_count = False;
   2563       Long lower = 0;
   2564       Long upper = 0;
   2565 
   2566       switch (parser->language) {
   2567          case 'C': have_lower = True;  lower = 0; break;
   2568          case 'F': have_lower = True;  lower = 1; break;
   2569          case '?': have_lower = False; break;
   2570          case 'A': have_lower = False; break;
   2571          default:  vg_assert(0); /* assured us by handling of
   2572                                     DW_TAG_compile_unit in this fn */
   2573       }
   2574 
   2575       VG_(memset)( &boundE, 0, sizeof(boundE) );
   2576       boundE.cuOff = D3_INVALID_CUOFF;
   2577       boundE.tag   = Te_Bound;
   2578       while (True) {
   2579          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   2580          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   2581          if (attr == 0 && form == 0) break;
   2582          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
   2583                             cc, c_die, False/*td3*/, form );
   2584          if (attr == DW_AT_lower_bound && ctsSzB > 0) {
   2585             lower      = (Long)cts;
   2586             have_lower = True;
   2587          }
   2588          if (attr == DW_AT_upper_bound && ctsSzB > 0) {
   2589             upper      = (Long)cts;
   2590             have_upper = True;
   2591          }
   2592          if (attr == DW_AT_count && ctsSzB > 0) {
   2593             /*count    = (Long)cts;*/
   2594             have_count = True;
   2595          }
   2596       }
   2597       /* FIXME: potentially skip the rest if no parent present, since
   2598          it could be the case that this subrange type is free-standing
   2599          (not being used to describe the bounds of a containing array
   2600          type) */
   2601       /* Do we have a plausible parent? */
   2602       if (typestack_is_empty(parser)) goto bad_DIE;
   2603       vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
   2604       vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
   2605       if (level != parser->qlevel[parser->sp]+1) goto bad_DIE;
   2606       if (parser->qparentE[parser->sp].tag != Te_TyArray) goto bad_DIE;
   2607 
   2608       /* Figure out if we have a definite range or not */
   2609       if (have_lower && have_upper && (!have_count)) {
   2610          boundE.Te.Bound.knownL = True;
   2611          boundE.Te.Bound.knownU = True;
   2612          boundE.Te.Bound.boundL = lower;
   2613          boundE.Te.Bound.boundU = upper;
   2614       }
   2615       else if (have_lower && (!have_upper) && (!have_count)) {
   2616          boundE.Te.Bound.knownL = True;
   2617          boundE.Te.Bound.knownU = False;
   2618          boundE.Te.Bound.boundL = lower;
   2619          boundE.Te.Bound.boundU = 0;
   2620       }
   2621       else if ((!have_lower) && have_upper && (!have_count)) {
   2622          boundE.Te.Bound.knownL = False;
   2623          boundE.Te.Bound.knownU = True;
   2624          boundE.Te.Bound.boundL = 0;
   2625          boundE.Te.Bound.boundU = upper;
   2626       }
   2627       else if ((!have_lower) && (!have_upper) && (!have_count)) {
   2628          boundE.Te.Bound.knownL = False;
   2629          boundE.Te.Bound.knownU = False;
   2630          boundE.Te.Bound.boundL = 0;
   2631          boundE.Te.Bound.boundU = 0;
   2632       } else {
   2633          /* FIXME: handle more cases */
   2634          goto bad_DIE;
   2635       }
   2636 
   2637       /* Record this bound in the parent */
   2638       boundE.cuOff = posn;
   2639       vg_assert(parser->qparentE[parser->sp].Te.TyArray.boundRs);
   2640       VG_(addToXA)( parser->qparentE[parser->sp].Te.TyArray.boundRs,
   2641                     &boundE );
   2642       /* And record the child itself */
   2643       goto acquire_Bound;
   2644    }
   2645 
   2646    /* typedef or subrange_type other than array bounds. */
   2647    if (dtag == DW_TAG_typedef
   2648        || (dtag == DW_TAG_subrange_type
   2649            && !subrange_type_denotes_array_bounds(parser, dtag))) {
   2650       /* subrange_type other than array bound is only for Ada. */
   2651       vg_assert (dtag == DW_TAG_typedef || parser->language == 'A');
   2652       /* We can pick up a new typedef/subrange_type any time. */
   2653       VG_(memset)(&typeE, 0, sizeof(typeE));
   2654       typeE.cuOff = D3_INVALID_CUOFF;
   2655       typeE.tag   = Te_TyTyDef;
   2656       typeE.Te.TyTyDef.name = NULL;
   2657       typeE.Te.TyTyDef.typeR = D3_INVALID_CUOFF;
   2658       while (True) {
   2659          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   2660          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   2661          if (attr == 0 && form == 0) break;
   2662          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
   2663                             cc, c_die, False/*td3*/, form );
   2664          if (attr == DW_AT_name && ctsMemSzB > 0) {
   2665             typeE.Te.TyTyDef.name
   2666                = ML_(dinfo_strdup)( "di.readdwarf3.ptD.typedef.1",
   2667                                     (UChar*)(UWord)cts );
   2668          }
   2669          if (attr == DW_AT_type && ctsSzB > 0) {
   2670             typeE.Te.TyTyDef.typeR = (UWord)cts;
   2671          }
   2672       }
   2673       /* Do we have something that looks sane? */
   2674       if (/* must have a name */
   2675           typeE.Te.TyTyDef.name == NULL
   2676           /* However gcc gnat Ada generates minimal typedef
   2677              such as the below => accept no name for Ada.
   2678              <6><91cc>: DW_TAG_typedef
   2679                 DW_AT_abstract_ori: <9066>
   2680           */
   2681           && parser->language != 'A'
   2682           /* but the referred-to type can be absent */)
   2683          goto bad_DIE;
   2684       else
   2685          goto acquire_Type;
   2686    }
   2687 
   2688    if (dtag == DW_TAG_subroutine_type) {
   2689       /* function type? just record that one fact and ask no
   2690          further questions. */
   2691       VG_(memset)(&typeE, 0, sizeof(typeE));
   2692       typeE.cuOff = D3_INVALID_CUOFF;
   2693       typeE.tag   = Te_TyFn;
   2694       goto acquire_Type;
   2695    }
   2696 
   2697    if (dtag == DW_TAG_volatile_type || dtag == DW_TAG_const_type) {
   2698       Int have_ty = 0;
   2699       VG_(memset)(&typeE, 0, sizeof(typeE));
   2700       typeE.cuOff = D3_INVALID_CUOFF;
   2701       typeE.tag   = Te_TyQual;
   2702       typeE.Te.TyQual.qual
   2703          = dtag == DW_TAG_volatile_type ? 'V' : 'C';
   2704       /* target type defaults to 'void' */
   2705       typeE.Te.TyQual.typeR = D3_FAKEVOID_CUOFF;
   2706       while (True) {
   2707          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   2708          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   2709          if (attr == 0 && form == 0) break;
   2710          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
   2711                             cc, c_die, False/*td3*/, form );
   2712          if (attr == DW_AT_type && ctsSzB > 0) {
   2713             typeE.Te.TyQual.typeR = (UWord)cts;
   2714             have_ty++;
   2715          }
   2716       }
   2717       /* gcc sometimes generates DW_TAG_const/volatile_type without
   2718          DW_AT_type and GDB appears to interpret the type as 'const
   2719          void' (resp. 'volatile void').  So just allow it .. */
   2720       if (have_ty == 1 || have_ty == 0)
   2721          goto acquire_Type;
   2722       else
   2723          goto bad_DIE;
   2724    }
   2725 
   2726    /* else ignore this DIE */
   2727    return;
   2728    /*NOTREACHED*/
   2729 
   2730   acquire_Type:
   2731    if (0) VG_(printf)("YYYY Acquire Type\n");
   2732    vg_assert(ML_(TyEnt__is_type)( &typeE ));
   2733    vg_assert(typeE.cuOff == D3_INVALID_CUOFF || typeE.cuOff == posn);
   2734    typeE.cuOff = posn;
   2735    VG_(addToXA)( tyents, &typeE );
   2736    return;
   2737    /*NOTREACHED*/
   2738 
   2739   acquire_Atom:
   2740    if (0) VG_(printf)("YYYY Acquire Atom\n");
   2741    vg_assert(atomE.tag == Te_Atom);
   2742    vg_assert(atomE.cuOff == D3_INVALID_CUOFF || atomE.cuOff == posn);
   2743    atomE.cuOff = posn;
   2744    VG_(addToXA)( tyents, &atomE );
   2745    return;
   2746    /*NOTREACHED*/
   2747 
   2748   acquire_Field:
   2749    /* For union members, Expr should be absent */
   2750    if (0) VG_(printf)("YYYY Acquire Field\n");
   2751    vg_assert(fieldE.tag == Te_Field);
   2752    vg_assert(fieldE.Te.Field.nLoc <= 0 || fieldE.Te.Field.pos.loc != NULL);
   2753    vg_assert(fieldE.Te.Field.nLoc != 0 || fieldE.Te.Field.pos.loc == NULL);
   2754    if (fieldE.Te.Field.isStruct) {
   2755       vg_assert(fieldE.Te.Field.nLoc != 0);
   2756    } else {
   2757       vg_assert(fieldE.Te.Field.nLoc == 0);
   2758    }
   2759    vg_assert(fieldE.cuOff == D3_INVALID_CUOFF || fieldE.cuOff == posn);
   2760    fieldE.cuOff = posn;
   2761    VG_(addToXA)( tyents, &fieldE );
   2762    return;
   2763    /*NOTREACHED*/
   2764 
   2765   acquire_Bound:
   2766    if (0) VG_(printf)("YYYY Acquire Bound\n");
   2767    vg_assert(boundE.tag == Te_Bound);
   2768    vg_assert(boundE.cuOff == D3_INVALID_CUOFF || boundE.cuOff == posn);
   2769    boundE.cuOff = posn;
   2770    VG_(addToXA)( tyents, &boundE );
   2771    return;
   2772    /*NOTREACHED*/
   2773 
   2774   bad_DIE:
   2775    set_position_of_Cursor( c_die,  saved_die_c_offset );
   2776    set_position_of_Cursor( c_abbv, saved_abbv_c_offset );
   2777    VG_(printf)("\nparse_type_DIE: confused by:\n");
   2778    VG_(printf)(" <%d><%lx>: %s\n", level, posn, ML_(pp_DW_TAG)( dtag ) );
   2779    while (True) {
   2780       DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   2781       DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   2782       if (attr == 0 && form == 0) break;
   2783       VG_(printf)("     %18s: ", ML_(pp_DW_AT)(attr));
   2784       /* Get the form contents, so as to print them */
   2785       get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
   2786                          cc, c_die, True, form );
   2787       VG_(printf)("\t\n");
   2788    }
   2789    VG_(printf)("\n");
   2790    cc->barf("parse_type_DIE: confused by the above DIE");
   2791    /*NOTREACHED*/
   2792 }
   2793 
   2794 
   2795 /*------------------------------------------------------------*/
   2796 /*---                                                      ---*/
   2797 /*--- Compression of type DIE information                  ---*/
   2798 /*---                                                      ---*/
   2799 /*------------------------------------------------------------*/
   2800 
   2801 static UWord chase_cuOff ( Bool* changed,
   2802                            XArray* /* of TyEnt */ ents,
   2803                            TyEntIndexCache* ents_cache,
   2804                            UWord cuOff )
   2805 {
   2806    TyEnt* ent;
   2807    ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache, cuOff );
   2808 
   2809    if (!ent) {
   2810       VG_(printf)("chase_cuOff: no entry for 0x%05lx\n", cuOff);
   2811       *changed = False;
   2812       return cuOff;
   2813    }
   2814 
   2815    vg_assert(ent->tag != Te_EMPTY);
   2816    if (ent->tag != Te_INDIR) {
   2817       *changed = False;
   2818       return cuOff;
   2819    } else {
   2820       vg_assert(ent->Te.INDIR.indR < cuOff);
   2821       *changed = True;
   2822       return ent->Te.INDIR.indR;
   2823    }
   2824 }
   2825 
   2826 static
   2827 void chase_cuOffs_in_XArray ( Bool* changed,
   2828                               XArray* /* of TyEnt */ ents,
   2829                               TyEntIndexCache* ents_cache,
   2830                               /*MOD*/XArray* /* of UWord */ cuOffs )
   2831 {
   2832    Bool b2 = False;
   2833    Word i, n = VG_(sizeXA)( cuOffs );
   2834    for (i = 0; i < n; i++) {
   2835       Bool   b = False;
   2836       UWord* p = VG_(indexXA)( cuOffs, i );
   2837       *p = chase_cuOff( &b, ents, ents_cache, *p );
   2838       if (b)
   2839          b2 = True;
   2840    }
   2841    *changed = b2;
   2842 }
   2843 
   2844 static Bool TyEnt__subst_R_fields ( XArray* /* of TyEnt */ ents,
   2845                                     TyEntIndexCache* ents_cache,
   2846                                     /*MOD*/TyEnt* te )
   2847 {
   2848    Bool b, changed = False;
   2849    switch (te->tag) {
   2850       case Te_EMPTY:
   2851          break;
   2852       case Te_INDIR:
   2853          te->Te.INDIR.indR
   2854             = chase_cuOff( &b, ents, ents_cache, te->Te.INDIR.indR );
   2855          if (b) changed = True;
   2856          break;
   2857       case Te_UNKNOWN:
   2858          break;
   2859       case Te_Atom:
   2860          break;
   2861       case Te_Field:
   2862          te->Te.Field.typeR
   2863             = chase_cuOff( &b, ents, ents_cache, te->Te.Field.typeR );
   2864          if (b) changed = True;
   2865          break;
   2866       case Te_Bound:
   2867          break;
   2868       case Te_TyBase:
   2869          break;
   2870       case Te_TyPorR:
   2871          te->Te.TyPorR.typeR
   2872             = chase_cuOff( &b, ents, ents_cache, te->Te.TyPorR.typeR );
   2873          if (b) changed = True;
   2874          break;
   2875       case Te_TyTyDef:
   2876          te->Te.TyTyDef.typeR
   2877             = chase_cuOff( &b, ents, ents_cache, te->Te.TyTyDef.typeR );
   2878          if (b) changed = True;
   2879          break;
   2880       case Te_TyStOrUn:
   2881          chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyStOrUn.fieldRs );
   2882          if (b) changed = True;
   2883          break;
   2884       case Te_TyEnum:
   2885          chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyEnum.atomRs );
   2886          if (b) changed = True;
   2887          break;
   2888       case Te_TyArray:
   2889          te->Te.TyArray.typeR
   2890             = chase_cuOff( &b, ents, ents_cache, te->Te.TyArray.typeR );
   2891          if (b) changed = True;
   2892          chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyArray.boundRs );
   2893          if (b) changed = True;
   2894          break;
   2895       case Te_TyFn:
   2896          break;
   2897       case Te_TyQual:
   2898          te->Te.TyQual.typeR
   2899             = chase_cuOff( &b, ents, ents_cache, te->Te.TyQual.typeR );
   2900          if (b) changed = True;
   2901          break;
   2902       case Te_TyVoid:
   2903          break;
   2904       default:
   2905          ML_(pp_TyEnt)(te);
   2906          vg_assert(0);
   2907    }
   2908    return changed;
   2909 }
   2910 
   2911 /* Make a pass over 'ents'.  For each tyent, inspect the target of any
   2912    'R' or 'Rs' fields (those which refer to other tyents), and replace
   2913    any which point to INDIR nodes with the target of the indirection
   2914    (which should not itself be an indirection).  In summary, this
   2915    routine shorts out all references to indirection nodes. */
   2916 static
   2917 Word dedup_types_substitution_pass ( /*MOD*/XArray* /* of TyEnt */ ents,
   2918                                      TyEntIndexCache* ents_cache )
   2919 {
   2920    Word i, n, nChanged = 0;
   2921    Bool b;
   2922    n = VG_(sizeXA)( ents );
   2923    for (i = 0; i < n; i++) {
   2924       TyEnt* ent = VG_(indexXA)( ents, i );
   2925       vg_assert(ent->tag != Te_EMPTY);
   2926       /* We have to substitute everything, even indirections, so as to
   2927          ensure that chains of indirections don't build up. */
   2928       b = TyEnt__subst_R_fields( ents, ents_cache, ent );
   2929       if (b)
   2930          nChanged++;
   2931    }
   2932 
   2933    return nChanged;
   2934 }
   2935 
   2936 
   2937 /* Make a pass over 'ents', building a dictionary of TyEnts as we go.
   2938    Look up each new tyent in the dictionary in turn.  If it is already
   2939    in the dictionary, replace this tyent with an indirection to the
   2940    existing one, and delete any malloc'd stuff hanging off this one.
   2941    In summary, this routine commons up all tyents that are identical
   2942    as defined by TyEnt__cmp_by_all_except_cuOff. */
   2943 static
   2944 Word dedup_types_commoning_pass ( /*MOD*/XArray* /* of TyEnt */ ents )
   2945 {
   2946    Word    n, i, nDeleted;
   2947    WordFM* dict; /* TyEnt* -> void */
   2948    TyEnt*  ent;
   2949    UWord   keyW, valW;
   2950 
   2951    dict = VG_(newFM)(
   2952              ML_(dinfo_zalloc), "di.readdwarf3.dtcp.1",
   2953              ML_(dinfo_free),
   2954              (Word(*)(UWord,UWord)) ML_(TyEnt__cmp_by_all_except_cuOff)
   2955           );
   2956 
   2957    nDeleted = 0;
   2958    n = VG_(sizeXA)( ents );
   2959    for (i = 0; i < n; i++) {
   2960       ent = VG_(indexXA)( ents, i );
   2961       vg_assert(ent->tag != Te_EMPTY);
   2962 
   2963       /* Ignore indirections, although check that they are
   2964          not forming a cycle. */
   2965       if (ent->tag == Te_INDIR) {
   2966          vg_assert(ent->Te.INDIR.indR < ent->cuOff);
   2967          continue;
   2968       }
   2969 
   2970       keyW = valW = 0;
   2971       if (VG_(lookupFM)( dict, &keyW, &valW, (UWord)ent )) {
   2972          /* it's already in the dictionary. */
   2973          TyEnt* old = (TyEnt*)keyW;
   2974          vg_assert(valW == 0);
   2975          vg_assert(old != ent);
   2976          vg_assert(old->tag != Te_INDIR);
   2977          /* since we are traversing the array in increasing order of
   2978             cuOff: */
   2979          vg_assert(old->cuOff < ent->cuOff);
   2980          /* So anyway, dump this entry and replace it with an
   2981             indirection to the one in the dictionary.  Note that the
   2982             assertion above guarantees that we cannot create cycles of
   2983             indirections, since we are always creating an indirection
   2984             to a tyent with a cuOff lower than this one. */
   2985          ML_(TyEnt__make_EMPTY)( ent );
   2986          ent->tag = Te_INDIR;
   2987          ent->Te.INDIR.indR = old->cuOff;
   2988          nDeleted++;
   2989       } else {
   2990          /* not in dictionary; add it and keep going. */
   2991          VG_(addToFM)( dict, (UWord)ent, 0 );
   2992       }
   2993    }
   2994 
   2995    VG_(deleteFM)( dict, NULL, NULL );
   2996 
   2997    return nDeleted;
   2998 }
   2999 
   3000 
   3001 static
   3002 void dedup_types ( Bool td3,
   3003                    /*MOD*/XArray* /* of TyEnt */ ents,
   3004                    TyEntIndexCache* ents_cache )
   3005 {
   3006    Word m, n, i, nDel, nSubst, nThresh;
   3007    if (0) td3 = True;
   3008 
   3009    n = VG_(sizeXA)( ents );
   3010 
   3011    /* If a commoning pass and a substitution pass both make fewer than
   3012       this many changes, just stop.  It's pointless to burn up CPU
   3013       time trying to compress the last 1% or so out of the array. */
   3014    nThresh = n / 200;
   3015 
   3016    /* First we must sort .ents by its .cuOff fields, so we
   3017       can index into it. */
   3018    VG_(setCmpFnXA)(
   3019       ents,
   3020       (Int(*)(void*,void*)) ML_(TyEnt__cmp_by_cuOff_only)
   3021    );
   3022    VG_(sortXA)( ents );
   3023 
   3024    /* Now repeatedly do commoning and substitution passes over
   3025       the array, until there are no more changes. */
   3026    do {
   3027       nDel   = dedup_types_commoning_pass ( ents );
   3028       nSubst = dedup_types_substitution_pass ( ents, ents_cache );
   3029       vg_assert(nDel >= 0 && nSubst >= 0);
   3030       TRACE_D3("   %ld deletions, %ld substitutions\n", nDel, nSubst);
   3031    } while (nDel > nThresh || nSubst > nThresh);
   3032 
   3033    /* Sanity check: all INDIR nodes should point at a non-INDIR thing.
   3034       In fact this should be true at the end of every loop iteration
   3035       above (a commoning pass followed by a substitution pass), but
   3036       checking it on every iteration is excessively expensive.  Note,
   3037       this loop also computes 'm' for the stats printing below it. */
   3038    m = 0;
   3039    n = VG_(sizeXA)( ents );
   3040    for (i = 0; i < n; i++) {
   3041       TyEnt *ent, *ind;
   3042       ent = VG_(indexXA)( ents, i );
   3043       if (ent->tag != Te_INDIR) continue;
   3044       m++;
   3045       ind = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
   3046                                          ent->Te.INDIR.indR );
   3047       vg_assert(ind);
   3048       vg_assert(ind->tag != Te_INDIR);
   3049    }
   3050 
   3051    TRACE_D3("Overall: %ld before, %ld after\n", n, n-m);
   3052 }
   3053 
   3054 
   3055 /*------------------------------------------------------------*/
   3056 /*---                                                      ---*/
   3057 /*--- Resolution of references to type DIEs                ---*/
   3058 /*---                                                      ---*/
   3059 /*------------------------------------------------------------*/
   3060 
   3061 /* Make a pass through the (temporary) variables array.  Examine the
   3062    type of each variable, check is it found, and chase any Te_INDIRs.
   3063    Postcondition is: each variable has a typeR field that refers to a
   3064    valid type in tyents, or a Te_UNKNOWN, and is certainly guaranteed
   3065    not to refer to a Te_INDIR.  (This is so that we can throw all the
   3066    Te_INDIRs away later). */
   3067 
   3068 __attribute__((noinline))
   3069 static void resolve_variable_types (
   3070                void (*barf)( HChar* ) __attribute__((noreturn)),
   3071                /*R-O*/XArray* /* of TyEnt */ ents,
   3072                /*MOD*/TyEntIndexCache* ents_cache,
   3073                /*MOD*/XArray* /* of TempVar* */ vars
   3074             )
   3075 {
   3076    Word i, n;
   3077    n = VG_(sizeXA)( vars );
   3078    for (i = 0; i < n; i++) {
   3079       TempVar* var = *(TempVar**)VG_(indexXA)( vars, i );
   3080       /* This is the stated type of the variable.  But it might be
   3081          an indirection, so be careful. */
   3082       TyEnt* ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
   3083                                                 var->typeR );
   3084       if (ent && ent->tag == Te_INDIR) {
   3085          ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
   3086                                             ent->Te.INDIR.indR );
   3087          vg_assert(ent);
   3088          vg_assert(ent->tag != Te_INDIR);
   3089       }
   3090 
   3091       /* Deal first with "normal" cases */
   3092       if (ent && ML_(TyEnt__is_type)(ent)) {
   3093          var->typeR = ent->cuOff;
   3094          continue;
   3095       }
   3096 
   3097       /* If there's no ent, it probably we did not manage to read a
   3098          type at the cuOffset which is stated as being this variable's
   3099          type.  Maybe a deficiency in parse_type_DIE.  Complain. */
   3100       if (ent == NULL) {
   3101          VG_(printf)("\n: Invalid cuOff = 0x%05lx\n", var->typeR );
   3102          barf("resolve_variable_types: "
   3103               "cuOff does not refer to a known type");
   3104       }
   3105       vg_assert(ent);
   3106       /* If ent has any other tag, something bad happened, along the
   3107          lines of var->typeR not referring to a type at all. */
   3108       vg_assert(ent->tag == Te_UNKNOWN);
   3109       /* Just accept it; the type will be useless, but at least keep
   3110          going. */
   3111       var->typeR = ent->cuOff;
   3112    }
   3113 }
   3114 
   3115 
   3116 /*------------------------------------------------------------*/
   3117 /*---                                                      ---*/
   3118 /*--- Parsing of Compilation Units                         ---*/
   3119 /*---                                                      ---*/
   3120 /*------------------------------------------------------------*/
   3121 
   3122 static Int cmp_TempVar_by_dioff ( void* v1, void* v2 ) {
   3123    TempVar* t1 = *(TempVar**)v1;
   3124    TempVar* t2 = *(TempVar**)v2;
   3125    if (t1->dioff < t2->dioff) return -1;
   3126    if (t1->dioff > t2->dioff) return 1;
   3127    return 0;
   3128 }
   3129 
   3130 static void read_DIE (
   3131    /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree,
   3132    /*MOD*/XArray* /* of TyEnt */ tyents,
   3133    /*MOD*/XArray* /* of TempVar* */ tempvars,
   3134    /*MOD*/XArray* /* of GExpr* */ gexprs,
   3135    /*MOD*/D3TypeParser* typarser,
   3136    /*MOD*/D3VarParser* varparser,
   3137    Cursor* c, Bool td3, CUConst* cc, Int level
   3138 )
   3139 {
   3140    Cursor abbv;
   3141    ULong  atag, abbv_code;
   3142    UWord  posn;
   3143    UInt   has_children;
   3144    UWord  start_die_c_offset, start_abbv_c_offset;
   3145    UWord  after_die_c_offset, after_abbv_c_offset;
   3146 
   3147    /* --- Deal with this DIE --- */
   3148    posn      = get_position_of_Cursor( c );
   3149    abbv_code = get_ULEB128( c );
   3150    set_abbv_Cursor( &abbv, td3, cc, abbv_code );
   3151    atag      = get_ULEB128( &abbv );
   3152    TRACE_D3("\n");
   3153    TRACE_D3(" <%d><%lx>: Abbrev Number: %llu (%s)\n",
   3154             level, posn, abbv_code, ML_(pp_DW_TAG)( atag ) );
   3155 
   3156    if (atag == 0)
   3157       cc->barf("read_DIE: invalid zero tag on DIE");
   3158 
   3159    has_children = get_UChar( &abbv );
   3160    if (has_children != DW_children_no && has_children != DW_children_yes)
   3161       cc->barf("read_DIE: invalid has_children value");
   3162 
   3163    /* We're set up to look at the fields of this DIE.  Hand it off to
   3164       any parser(s) that want to see it.  Since they will in general
   3165       advance both the DIE and abbrev cursors, remember their current
   3166       settings so that we can then back up and do one final pass over
   3167       the DIE, to print out its contents. */
   3168 
   3169    start_die_c_offset  = get_position_of_Cursor( c );
   3170    start_abbv_c_offset = get_position_of_Cursor( &abbv );
   3171 
   3172    while (True) {
   3173       ULong cts;
   3174       Int   ctsSzB;
   3175       UWord ctsMemSzB;
   3176       ULong at_name = get_ULEB128( &abbv );
   3177       ULong at_form = get_ULEB128( &abbv );
   3178       if (at_name == 0 && at_form == 0) break;
   3179       TRACE_D3("     %18s: ", ML_(pp_DW_AT)(at_name));
   3180       /* Get the form contents, but ignore them; the only purpose is
   3181          to print them, if td3 is True */
   3182       get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
   3183                          cc, c, td3, (DW_FORM)at_form );
   3184       TRACE_D3("\t");
   3185       TRACE_D3("\n");
   3186    }
   3187 
   3188    after_die_c_offset  = get_position_of_Cursor( c );
   3189    after_abbv_c_offset = get_position_of_Cursor( &abbv );
   3190 
   3191    set_position_of_Cursor( c,     start_die_c_offset );
   3192    set_position_of_Cursor( &abbv, start_abbv_c_offset );
   3193 
   3194    parse_type_DIE( tyents,
   3195                    typarser,
   3196                    (DW_TAG)atag,
   3197                    posn,
   3198                    level,
   3199                    c,     /* DIE cursor */
   3200                    &abbv, /* abbrev cursor */
   3201                    cc,
   3202                    td3 );
   3203 
   3204    set_position_of_Cursor( c,     start_die_c_offset );
   3205    set_position_of_Cursor( &abbv, start_abbv_c_offset );
   3206 
   3207    parse_var_DIE( rangestree,
   3208                   tempvars,
   3209                   gexprs,
   3210                   varparser,
   3211                   (DW_TAG)atag,
   3212                   posn,
   3213                   level,
   3214                   c,     /* DIE cursor */
   3215                   &abbv, /* abbrev cursor */
   3216                   cc,
   3217                   td3 );
   3218 
   3219    set_position_of_Cursor( c,     after_die_c_offset );
   3220    set_position_of_Cursor( &abbv, after_abbv_c_offset );
   3221 
   3222    /* --- Now recurse into its children, if any --- */
   3223    if (has_children == DW_children_yes) {
   3224       if (0) TRACE_D3("BEGIN children of level %d\n", level);
   3225       while (True) {
   3226          atag = peek_ULEB128( c );
   3227          if (atag == 0) break;
   3228          read_DIE( rangestree, tyents, tempvars, gexprs,
   3229                    typarser, varparser,
   3230                    c, td3, cc, level+1 );
   3231       }
   3232       /* Now we need to eat the terminating zero */
   3233       atag = get_ULEB128( c );
   3234       vg_assert(atag == 0);
   3235       if (0) TRACE_D3("END children of level %d\n", level);
   3236    }
   3237 
   3238 }
   3239 
   3240 
   3241 static
   3242 void new_dwarf3_reader_wrk (
   3243    struct _DebugInfo* di,
   3244    __attribute__((noreturn)) void (*barf)( HChar* ),
   3245    UChar* debug_info_img,   SizeT debug_info_sz,
   3246    UChar* debug_abbv_img,   SizeT debug_abbv_sz,
   3247    UChar* debug_line_img,   SizeT debug_line_sz,
   3248    UChar* debug_str_img,    SizeT debug_str_sz,
   3249    UChar* debug_ranges_img, SizeT debug_ranges_sz,
   3250    UChar* debug_loc_img,    SizeT debug_loc_sz
   3251 )
   3252 {
   3253    XArray* /* of TyEnt */     tyents;
   3254    XArray* /* of TyEnt */     tyents_to_keep;
   3255    XArray* /* of GExpr* */    gexprs;
   3256    XArray* /* of TempVar* */  tempvars;
   3257    WordFM* /* of (XArray* of AddrRange, void) */ rangestree;
   3258    TyEntIndexCache* tyents_cache = NULL;
   3259    TyEntIndexCache* tyents_to_keep_cache = NULL;
   3260    TempVar *varp, *varp2;
   3261    GExpr* gexpr;
   3262    Cursor abbv; /* for showing .debug_abbrev */
   3263    Cursor info; /* primary cursor for parsing .debug_info */
   3264    Cursor ranges; /* for showing .debug_ranges */
   3265    D3TypeParser typarser;
   3266    D3VarParser varparser;
   3267    Addr  dr_base;
   3268    UWord dr_offset;
   3269    Word  i, j, n;
   3270    Bool td3 = di->trace_symtab;
   3271    XArray* /* of TempVar* */ dioff_lookup_tab;
   3272 #if 0
   3273    /* This doesn't work properly because it assumes all entries are
   3274       packed end to end, with no holes.  But that doesn't always
   3275       appear to be the case, so it loses sync.  And the D3 spec
   3276       doesn't appear to require a no-hole situation either. */
   3277    /* Display .debug_loc */
   3278    Addr  dl_base;
   3279    UWord dl_offset;
   3280    Cursor loc; /* for showing .debug_loc */
   3281    TRACE_SYMTAB("\n");
   3282    TRACE_SYMTAB("\n------ The contents of .debug_loc ------\n");
   3283    TRACE_SYMTAB("    Offset   Begin    End      Expression\n");
   3284    init_Cursor( &loc, debug_loc_img,
   3285                 debug_loc_sz, 0, barf,
   3286                 "Overrun whilst reading .debug_loc section(1)" );
   3287    dl_base = 0;
   3288    dl_offset = 0;
   3289    while (True) {
   3290       UWord  w1, w2;
   3291       UWord  len;
   3292       if (is_at_end_Cursor( &loc ))
   3293          break;
   3294 
   3295       /* Read a (host-)word pair.  This is something of a hack since
   3296          the word size to read is really dictated by the ELF file;
   3297          however, we assume we're reading a file with the same
   3298          word-sizeness as the host.  Reasonably enough. */
   3299       w1 = get_UWord( &loc );
   3300       w2 = get_UWord( &loc );
   3301 
   3302       if (w1 == 0 && w2 == 0) {
   3303          /* end of list.  reset 'base' */
   3304          TRACE_D3("    %08lx <End of list>\n", dl_offset);
   3305          dl_base = 0;
   3306          dl_offset = get_position_of_Cursor( &loc );
   3307          continue;
   3308       }
   3309 
   3310       if (w1 == -1UL) {
   3311          /* new value for 'base' */
   3312          TRACE_D3("    %08lx %16lx %08lx (base address)\n",
   3313                   dl_offset, w1, w2);
   3314          dl_base = w2;
   3315          continue;
   3316       }
   3317 
   3318       /* else a location expression follows */
   3319       TRACE_D3("    %08lx %08lx %08lx ",
   3320                dl_offset, w1 + dl_base, w2 + dl_base);
   3321       len = (UWord)get_UShort( &loc );
   3322       while (len > 0) {
   3323          UChar byte = get_UChar( &loc );
   3324          TRACE_D3("%02x", (UInt)byte);
   3325          len--;
   3326       }
   3327       TRACE_SYMTAB("\n");
   3328    }
   3329 #endif
   3330 
   3331    /* Display .debug_ranges */
   3332    TRACE_SYMTAB("\n");
   3333    TRACE_SYMTAB("\n------ The contents of .debug_ranges ------\n");
   3334    TRACE_SYMTAB("    Offset   Begin    End\n");
   3335    init_Cursor( &ranges, debug_ranges_img,
   3336                 debug_ranges_sz, 0, barf,
   3337                 "Overrun whilst reading .debug_ranges section(1)" );
   3338    dr_base = 0;
   3339    dr_offset = 0;
   3340    while (True) {
   3341       UWord  w1, w2;
   3342 
   3343       if (is_at_end_Cursor( &ranges ))
   3344          break;
   3345 
   3346       /* Read a (host-)word pair.  This is something of a hack since
   3347          the word size to read is really dictated by the ELF file;
   3348          however, we assume we're reading a file with the same
   3349          word-sizeness as the host.  Reasonably enough. */
   3350       w1 = get_UWord( &ranges );
   3351       w2 = get_UWord( &ranges );
   3352 
   3353       if (w1 == 0 && w2 == 0) {
   3354          /* end of list.  reset 'base' */
   3355          TRACE_D3("    %08lx <End of list>\n", dr_offset);
   3356          dr_base = 0;
   3357          dr_offset = get_position_of_Cursor( &ranges );
   3358          continue;
   3359       }
   3360 
   3361       if (w1 == -1UL) {
   3362          /* new value for 'base' */
   3363          TRACE_D3("    %08lx %16lx %08lx (base address)\n",
   3364                   dr_offset, w1, w2);
   3365          dr_base = w2;
   3366          continue;
   3367       }
   3368 
   3369       /* else a range [w1+base, w2+base) is denoted */
   3370       TRACE_D3("    %08lx %08lx %08lx\n",
   3371                dr_offset, w1 + dr_base, w2 + dr_base);
   3372    }
   3373 
   3374    /* Display .debug_abbrev */
   3375    init_Cursor( &abbv, debug_abbv_img, debug_abbv_sz, 0, barf,
   3376                 "Overrun whilst reading .debug_abbrev section" );
   3377    TRACE_SYMTAB("\n");
   3378    TRACE_SYMTAB("\n------ The contents of .debug_abbrev ------\n");
   3379    while (True) {
   3380       if (is_at_end_Cursor( &abbv ))
   3381          break;
   3382       /* Read one abbreviation table */
   3383       TRACE_D3("  Number TAG\n");
   3384       while (True) {
   3385          ULong atag;
   3386          UInt  has_children;
   3387          ULong acode = get_ULEB128( &abbv );
   3388          if (acode == 0) break; /* end of the table */
   3389          atag = get_ULEB128( &abbv );
   3390          has_children = get_UChar( &abbv );
   3391          TRACE_D3("   %llu      %s    [%s]\n",
   3392                   acode, ML_(pp_DW_TAG)(atag),
   3393                          ML_(pp_DW_children)(has_children));
   3394          while (True) {
   3395             ULong at_name = get_ULEB128( &abbv );
   3396             ULong at_form = get_ULEB128( &abbv );
   3397             if (at_name == 0 && at_form == 0) break;
   3398             TRACE_D3("    %18s %s\n",
   3399                      ML_(pp_DW_AT)(at_name), ML_(pp_DW_FORM)(at_form));
   3400          }
   3401       }
   3402    }
   3403    TRACE_SYMTAB("\n");
   3404 
   3405    /* Now loop over the Compilation Units listed in the .debug_info
   3406       section (see D3SPEC sec 7.5) paras 1 and 2.  Each compilation
   3407       unit contains a Compilation Unit Header followed by precisely
   3408       one DW_TAG_compile_unit or DW_TAG_partial_unit DIE. */
   3409    init_Cursor( &info, debug_info_img, debug_info_sz, 0, barf,
   3410                 "Overrun whilst reading .debug_info section" );
   3411 
   3412    /* We'll park the harvested type information in here.  Also create
   3413       a fake "void" entry with offset D3_FAKEVOID_CUOFF, so we always
   3414       have at least one type entry to refer to.  D3_FAKEVOID_CUOFF is
   3415       huge and presumably will not occur in any valid DWARF3 file --
   3416       it would need to have a .debug_info section 4GB long for that to
   3417       happen.  These type entries end up in the DebugInfo. */
   3418    tyents = VG_(newXA)( ML_(dinfo_zalloc),
   3419                         "di.readdwarf3.ndrw.1 (TyEnt temp array)",
   3420                         ML_(dinfo_free), sizeof(TyEnt) );
   3421    { TyEnt tyent;
   3422      VG_(memset)(&tyent, 0, sizeof(tyent));
   3423      tyent.tag   = Te_TyVoid;
   3424      tyent.cuOff = D3_FAKEVOID_CUOFF;
   3425      tyent.Te.TyVoid.isFake = True;
   3426      VG_(addToXA)( tyents, &tyent );
   3427    }
   3428    { TyEnt tyent;
   3429      VG_(memset)(&tyent, 0, sizeof(tyent));
   3430      tyent.tag   = Te_UNKNOWN;
   3431      tyent.cuOff = D3_INVALID_CUOFF;
   3432      VG_(addToXA)( tyents, &tyent );
   3433    }
   3434 
   3435    /* This is a tree used to unique-ify the range lists that are
   3436       manufactured by parse_var_DIE.  References to the keys in the
   3437       tree wind up in .rngMany fields in TempVars.  We'll need to
   3438       delete this tree, and the XArrays attached to it, at the end of
   3439       this function. */
   3440    rangestree = VG_(newFM)( ML_(dinfo_zalloc),
   3441                             "di.readdwarf3.ndrw.2 (rangestree)",
   3442                             ML_(dinfo_free),
   3443                             (Word(*)(UWord,UWord))cmp__XArrays_of_AddrRange );
   3444 
   3445    /* List of variables we're accumulating.  These don't end up in the
   3446       DebugInfo; instead their contents are handed to ML_(addVar) and
   3447       the list elements are then deleted. */
   3448    tempvars = VG_(newXA)( ML_(dinfo_zalloc),
   3449                           "di.readdwarf3.ndrw.3 (TempVar*s array)",
   3450                           ML_(dinfo_free),
   3451                           sizeof(TempVar*) );
   3452 
   3453    /* List of GExprs we're accumulating.  These wind up in the
   3454       DebugInfo. */
   3455    gexprs = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.4",
   3456                         ML_(dinfo_free), sizeof(GExpr*) );
   3457 
   3458    /* We need a D3TypeParser to keep track of partially constructed
   3459       types.  It'll be discarded as soon as we've completed the CU,
   3460       since the resulting information is tipped in to 'tyents' as it
   3461       is generated. */
   3462    VG_(memset)( &typarser, 0, sizeof(typarser) );
   3463    typarser.sp = -1;
   3464    typarser.language = '?';
   3465    for (i = 0; i < N_D3_TYPE_STACK; i++) {
   3466       typarser.qparentE[i].tag   = Te_EMPTY;
   3467       typarser.qparentE[i].cuOff = D3_INVALID_CUOFF;
   3468    }
   3469 
   3470    VG_(memset)( &varparser, 0, sizeof(varparser) );
   3471    varparser.sp = -1;
   3472 
   3473    TRACE_D3("\n------ Parsing .debug_info section ------\n");
   3474    while (True) {
   3475       UWord   cu_start_offset, cu_offset_now;
   3476       CUConst cc;
   3477       /* It may be that the stated size of this CU is larger than the
   3478          amount of stuff actually in it.  icc9 seems to generate CUs
   3479          thusly.  We use these variables to figure out if this is
   3480          indeed the case, and if so how many bytes we need to skip to
   3481          get to the start of the next CU.  Not skipping those bytes
   3482          causes us to misidentify the start of the next CU, and it all
   3483          goes badly wrong after that (not surprisingly). */
   3484       UWord cu_size_including_IniLen, cu_amount_used;
   3485 
   3486       /* It seems icc9 finishes the DIE info before debug_info_sz
   3487          bytes have been used up.  So be flexible, and declare the
   3488          sequence complete if there is not enough remaining bytes to
   3489          hold even the smallest conceivable CU header.  (11 bytes I
   3490          reckon). */
   3491       /* JRS 23Jan09: I suspect this is no longer necessary now that
   3492          the code below contains a 'while (cu_amount_used <
   3493          cu_size_including_IniLen ...'  style loop, which skips over
   3494          any leftover bytes at the end of a CU in the case where the
   3495          CU's stated size is larger than its actual size (as
   3496          determined by reading all its DIEs).  However, for prudence,
   3497          I'll leave the following test in place.  I can't see that a
   3498          CU header can be smaller than 11 bytes, so I don't think
   3499          there's any harm possible through the test -- it just adds
   3500          robustness. */
   3501       Word avail = get_remaining_length_Cursor( &info );
   3502       if (avail < 11) {
   3503          if (avail > 0)
   3504             TRACE_D3("new_dwarf3_reader_wrk: warning: "
   3505                      "%ld unused bytes after end of DIEs\n", avail);
   3506          break;
   3507       }
   3508 
   3509       /* Check the varparser's stack is in a sane state. */
   3510       vg_assert(varparser.sp == -1);
   3511       for (i = 0; i < N_D3_VAR_STACK; i++) {
   3512          vg_assert(varparser.ranges[i] == NULL);
   3513          vg_assert(varparser.level[i] == 0);
   3514       }
   3515       for (i = 0; i < N_D3_TYPE_STACK; i++) {
   3516          vg_assert(typarser.qparentE[i].cuOff == D3_INVALID_CUOFF);
   3517          vg_assert(typarser.qparentE[i].tag   == Te_EMPTY);
   3518          vg_assert(typarser.qlevel[i] == 0);
   3519       }
   3520 
   3521       cu_start_offset = get_position_of_Cursor( &info );
   3522       TRACE_D3("\n");
   3523       TRACE_D3("  Compilation Unit @ offset 0x%lx:\n", cu_start_offset);
   3524       /* parse_CU_header initialises the CU's set_abbv_Cursor cache
   3525          (saC_cache) */
   3526       parse_CU_Header( &cc, td3, &info,
   3527                        (UChar*)debug_abbv_img, debug_abbv_sz );
   3528       cc.debug_str_img    = debug_str_img;
   3529       cc.debug_str_sz     = debug_str_sz;
   3530       cc.debug_ranges_img = debug_ranges_img;
   3531       cc.debug_ranges_sz  = debug_ranges_sz;
   3532       cc.debug_loc_img    = debug_loc_img;
   3533       cc.debug_loc_sz     = debug_loc_sz;
   3534       cc.debug_line_img   = debug_line_img;
   3535       cc.debug_line_sz    = debug_line_sz;
   3536       cc.debug_info_img   = debug_info_img;
   3537       cc.debug_info_sz    = debug_info_sz;
   3538       cc.cu_start_offset  = cu_start_offset;
   3539       cc.di = di;
   3540       /* The CU's svma can be deduced by looking at the AT_low_pc
   3541          value in the top level TAG_compile_unit, which is the topmost
   3542          DIE.  We'll leave it for the 'varparser' to acquire that info
   3543          and fill it in -- since it is the only party to want to know
   3544          it. */
   3545       cc.cu_svma_known = False;
   3546       cc.cu_svma       = 0;
   3547 
   3548       /* Create a fake outermost-level range covering the entire
   3549          address range.  So we always have *something* to catch all
   3550          variable declarations. */
   3551       varstack_push( &cc, &varparser, td3,
   3552                      unitary_range_list(0UL, ~0UL),
   3553                      -1, False/*isFunc*/, NULL/*fbGX*/ );
   3554 
   3555       /* And set up the file name table.  When we come across the top
   3556          level DIE for this CU (which is what the next call to
   3557          read_DIE should process) we will copy all the file names out
   3558          of the .debug_line img area and use this table to look up the
   3559          copies when we later see filename numbers in DW_TAG_variables
   3560          etc. */
   3561       vg_assert(!varparser.filenameTable );
   3562       varparser.filenameTable
   3563          = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.5",
   3564                        ML_(dinfo_free),
   3565                        sizeof(UChar*) );
   3566       vg_assert(varparser.filenameTable);
   3567 
   3568       /* Now read the one-and-only top-level DIE for this CU. */
   3569       vg_assert(varparser.sp == 0);
   3570       read_DIE( rangestree,
   3571                 tyents, tempvars, gexprs,
   3572                 &typarser, &varparser,
   3573                 &info, td3, &cc, 0 );
   3574 
   3575       cu_offset_now = get_position_of_Cursor( &info );
   3576 
   3577       if (0) VG_(printf)("Travelled: %lu  size %llu\n",
   3578                          cu_offset_now - cc.cu_start_offset,
   3579                          cc.unit_length + (cc.is_dw64 ? 12 : 4));
   3580 
   3581       /* How big the CU claims it is .. */
   3582       cu_size_including_IniLen = cc.unit_length + (cc.is_dw64 ? 12 : 4);
   3583       /* .. vs how big we have found it to be */
   3584       cu_amount_used = cu_offset_now - cc.cu_start_offset;
   3585 
   3586       if (1) TRACE_D3("offset now %ld, d-i-size %ld\n",
   3587                       cu_offset_now, debug_info_sz);
   3588       if (cu_offset_now > debug_info_sz)
   3589          barf("toplevel DIEs beyond end of CU");
   3590 
   3591       /* If the CU is bigger than it claims to be, we've got a serious
   3592          problem. */
   3593       if (cu_amount_used > cu_size_including_IniLen)
   3594          barf("CU's actual size appears to be larger than it claims it is");
   3595 
   3596       /* If the CU is smaller than it claims to be, we need to skip some
   3597          bytes.  Loop updates cu_offset_new and cu_amount_used. */
   3598       while (cu_amount_used < cu_size_including_IniLen
   3599              && get_remaining_length_Cursor( &info ) > 0) {
   3600          if (0) VG_(printf)("SKIP\n");
   3601          (void)get_UChar( &info );
   3602          cu_offset_now = get_position_of_Cursor( &info );
   3603          cu_amount_used = cu_offset_now - cc.cu_start_offset;
   3604       }
   3605 
   3606       if (cu_offset_now == debug_info_sz)
   3607          break;
   3608 
   3609       /* Preen to level -2.  DIEs have level >= 0 so -2 cannot occur
   3610          anywhere else at all.  Our fake the-entire-address-space
   3611          range is at level -1, so preening to -2 should completely
   3612          empty the stack out. */
   3613       TRACE_D3("\n");
   3614       varstack_preen( &varparser, td3, -2 );
   3615       /* Similarly, empty the type stack out. */
   3616       typestack_preen( &typarser, td3, -2 );
   3617       /* else keep going */
   3618 
   3619       TRACE_D3("set_abbv_Cursor cache: %lu queries, %lu misses\n",
   3620                cc.saC_cache_queries, cc.saC_cache_misses);
   3621 
   3622       vg_assert(varparser.filenameTable );
   3623       VG_(deleteXA)( varparser.filenameTable );
   3624       varparser.filenameTable = NULL;
   3625    }
   3626 
   3627    /* From here on we're post-processing the stuff we got
   3628       out of the .debug_info section. */
   3629    if (td3) {
   3630       TRACE_D3("\n");
   3631       ML_(pp_TyEnts)(tyents, "Initial type entity (TyEnt) array");
   3632       TRACE_D3("\n");
   3633       TRACE_D3("------ Compressing type entries ------\n");
   3634    }
   3635 
   3636    tyents_cache = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.6",
   3637                                      sizeof(TyEntIndexCache) );
   3638    ML_(TyEntIndexCache__invalidate)( tyents_cache );
   3639    dedup_types( td3, tyents, tyents_cache );
   3640    if (td3) {
   3641       TRACE_D3("\n");
   3642       ML_(pp_TyEnts)(tyents, "After type entity (TyEnt) compression");
   3643    }
   3644 
   3645    TRACE_D3("\n");
   3646    TRACE_D3("------ Resolving the types of variables ------\n" );
   3647    resolve_variable_types( barf, tyents, tyents_cache, tempvars );
   3648 
   3649    /* Copy all the non-INDIR tyents into a new table.  For large
   3650       .so's, about 90% of the tyents will by now have been resolved to
   3651       INDIRs, and we no longer need them, and so don't need to store
   3652       them. */
   3653    tyents_to_keep
   3654       = VG_(newXA)( ML_(dinfo_zalloc),
   3655                     "di.readdwarf3.ndrw.7 (TyEnt to-keep array)",
   3656                     ML_(dinfo_free), sizeof(TyEnt) );
   3657    n = VG_(sizeXA)( tyents );
   3658    for (i = 0; i < n; i++) {
   3659       TyEnt* ent = VG_(indexXA)( tyents, i );
   3660       if (ent->tag != Te_INDIR)
   3661          VG_(addToXA)( tyents_to_keep, ent );
   3662    }
   3663 
   3664    VG_(deleteXA)( tyents );
   3665    tyents = NULL;
   3666    ML_(dinfo_free)( tyents_cache );
   3667    tyents_cache = NULL;
   3668 
   3669    /* Sort tyents_to_keep so we can lookup in it.  A complete (if
   3670       minor) waste of time, since tyents itself is sorted, but
   3671       necessary since VG_(lookupXA) refuses to cooperate if we
   3672       don't. */
   3673    VG_(setCmpFnXA)(
   3674       tyents_to_keep,
   3675       (Int(*)(void*,void*)) ML_(TyEnt__cmp_by_cuOff_only)
   3676    );
   3677    VG_(sortXA)( tyents_to_keep );
   3678 
   3679    /* Enable cacheing on tyents_to_keep */
   3680    tyents_to_keep_cache
   3681       = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.8",
   3682                            sizeof(TyEntIndexCache) );
   3683    ML_(TyEntIndexCache__invalidate)( tyents_to_keep_cache );
   3684 
   3685    /* And record the tyents in the DebugInfo.  We do this before
   3686       starting to hand variables to ML_(addVar), since if ML_(addVar)
   3687       wants to do debug printing (of the types of said vars) then it
   3688       will need the tyents.*/
   3689    vg_assert(!di->admin_tyents);
   3690    di->admin_tyents = tyents_to_keep;
   3691 
   3692    /* Bias all the location expressions. */
   3693    TRACE_D3("\n");
   3694    TRACE_D3("------ Biasing the location expressions ------\n" );
   3695 
   3696    n = VG_(sizeXA)( gexprs );
   3697    for (i = 0; i < n; i++) {
   3698       gexpr = *(GExpr**)VG_(indexXA)( gexprs, i );
   3699       bias_GX( gexpr, di );
   3700    }
   3701 
   3702    TRACE_D3("\n");
   3703    TRACE_D3("------ Acquired the following variables: ------\n\n");
   3704 
   3705    /* Park (pointers to) all the vars in an XArray, so we can look up
   3706       abstract origins quickly.  The array is sorted (hence, looked-up
   3707       by) the .dioff fields.  Since the .dioffs should be in strictly
   3708       ascending order, there is no need to sort the array after
   3709       construction.  The ascendingness is however asserted for. */
   3710    dioff_lookup_tab
   3711       = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.9",
   3712                     ML_(dinfo_free),
   3713                     sizeof(TempVar*) );
   3714    vg_assert(dioff_lookup_tab);
   3715 
   3716    n = VG_(sizeXA)( tempvars );
   3717    for (i = 0; i < n; i++) {
   3718       varp = *(TempVar**)VG_(indexXA)( tempvars, i );
   3719       if (i > 0) {
   3720          varp2 = *(TempVar**)VG_(indexXA)( tempvars, i-1 );
   3721          /* why should this hold?  Only, I think, because we've
   3722             constructed the array by reading .debug_info sequentially,
   3723             and so the array .dioff fields should reflect that, and be
   3724             strictly ascending. */
   3725          vg_assert(varp2->dioff < varp->dioff);
   3726       }
   3727       VG_(addToXA)( dioff_lookup_tab, &varp );
   3728    }
   3729    VG_(setCmpFnXA)( dioff_lookup_tab, cmp_TempVar_by_dioff );
   3730    VG_(sortXA)( dioff_lookup_tab ); /* POINTLESS; FIXME: rm */
   3731 
   3732    /* Now visit each var.  Collect up as much info as possible for
   3733       each var and hand it to ML_(addVar). */
   3734    n = VG_(sizeXA)( tempvars );
   3735    for (j = 0; j < n; j++) {
   3736       TyEnt* ent;
   3737       varp = *(TempVar**)VG_(indexXA)( tempvars, j );
   3738 
   3739       /* Possibly show .. */
   3740       if (td3) {
   3741          VG_(printf)("<%lx> addVar: level %d: %s :: ",
   3742                      varp->dioff,
   3743                      varp->level,
   3744                      varp->name ? varp->name : (UChar*)"<anon_var>" );
   3745          if (varp->typeR) {
   3746             ML_(pp_TyEnt_C_ishly)( tyents_to_keep, varp->typeR );
   3747          } else {
   3748             VG_(printf)("NULL");
   3749          }
   3750          VG_(printf)("\n  Loc=");
   3751          if (varp->gexpr) {
   3752             ML_(pp_GX)(varp->gexpr);
   3753          } else {
   3754             VG_(printf)("NULL");
   3755          }
   3756          VG_(printf)("\n");
   3757          if (varp->fbGX) {
   3758             VG_(printf)("  FrB=");
   3759             ML_(pp_GX)( varp->fbGX );
   3760             VG_(printf)("\n");
   3761          } else {
   3762             VG_(printf)("  FrB=none\n");
   3763          }
   3764          VG_(printf)("  declared at: %s:%d\n",
   3765                      varp->fName ? varp->fName : (UChar*)"NULL",
   3766                      varp->fLine );
   3767          if (varp->absOri != (UWord)D3_INVALID_CUOFF)
   3768             VG_(printf)("  abstract origin: <%lx>\n", varp->absOri);
   3769       }
   3770 
   3771       /* Skip variables which have no location.  These must be
   3772          abstract instances; they are useless as-is since with no
   3773          location they have no specified memory location.  They will
   3774          presumably be referred to via the absOri fields of other
   3775          variables. */
   3776       if (!varp->gexpr) {
   3777          TRACE_D3("  SKIP (no location)\n\n");
   3778          continue;
   3779       }
   3780 
   3781       /* So it has a location, at least.  If it refers to some other
   3782          entry through its absOri field, pull in further info through
   3783          that. */
   3784       if (varp->absOri != (UWord)D3_INVALID_CUOFF) {
   3785          Bool found;
   3786          Word ixFirst, ixLast;
   3787          TempVar key;
   3788          TempVar* keyp = &key;
   3789          TempVar *varAI;
   3790          VG_(memset)(&key, 0, sizeof(key)); /* not necessary */
   3791          key.dioff = varp->absOri; /* this is what we want to find */
   3792          found = VG_(lookupXA)( dioff_lookup_tab, &keyp,
   3793                                 &ixFirst, &ixLast );
   3794          if (!found) {
   3795             /* barf("DW_AT_abstract_origin can't be resolved"); */
   3796             TRACE_D3("  SKIP (DW_AT_abstract_origin can't be resolved)\n\n");
   3797             continue;
   3798          }
   3799          /* If the following fails, there is more than one entry with
   3800             the same dioff.  Which can't happen. */
   3801          vg_assert(ixFirst == ixLast);
   3802          varAI = *(TempVar**)VG_(indexXA)( dioff_lookup_tab, ixFirst );
   3803          /* stay sane */
   3804          vg_assert(varAI);
   3805          vg_assert(varAI->dioff == varp->absOri);
   3806 
   3807          /* Copy what useful info we can. */
   3808          if (varAI->typeR && !varp->typeR)
   3809             varp->typeR = varAI->typeR;
   3810          if (varAI->name && !varp->name)
   3811             varp->name = varAI->name;
   3812          if (varAI->fName && !varp->fName)
   3813             varp->fName = varAI->fName;
   3814          if (varAI->fLine > 0 && varp->fLine == 0)
   3815             varp->fLine = varAI->fLine;
   3816       }
   3817 
   3818       /* Give it a name if it doesn't have one. */
   3819       if (!varp->name)
   3820          varp->name = ML_(addStr)( di, "<anon_var>", -1 );
   3821 
   3822       /* So now does it have enough info to be useful? */
   3823       /* NOTE: re typeR: this is a hack.  If typeR is Te_UNKNOWN then
   3824          the type didn't get resolved.  Really, in that case
   3825          something's broken earlier on, and should be fixed, rather
   3826          than just skipping the variable. */
   3827       ent = ML_(TyEnts__index_by_cuOff)( tyents_to_keep,
   3828                                          tyents_to_keep_cache,
   3829                                          varp->typeR );
   3830       /* The next two assertions should be guaranteed by
   3831          our previous call to resolve_variable_types. */
   3832       vg_assert(ent);
   3833       vg_assert(ML_(TyEnt__is_type)(ent) || ent->tag == Te_UNKNOWN);
   3834 
   3835       if (ent->tag == Te_UNKNOWN) continue;
   3836 
   3837       vg_assert(varp->gexpr);
   3838       vg_assert(varp->name);
   3839       vg_assert(varp->typeR);
   3840       vg_assert(varp->level >= 0);
   3841 
   3842       /* Ok.  So we're going to keep it.  Call ML_(addVar) once for
   3843          each address range in which the variable exists. */
   3844       TRACE_D3("  ACQUIRE for range(s) ");
   3845       { AddrRange  oneRange;
   3846         AddrRange* varPcRanges;
   3847         Word       nVarPcRanges;
   3848         /* Set up to iterate over address ranges, however
   3849            represented. */
   3850         if (varp->nRanges == 0 || varp->nRanges == 1) {
   3851            vg_assert(!varp->rngMany);
   3852            if (varp->nRanges == 0) {
   3853               vg_assert(varp->rngOneMin == 0);
   3854               vg_assert(varp->rngOneMax == 0);
   3855            }
   3856            nVarPcRanges = varp->nRanges;
   3857            oneRange.aMin = varp->rngOneMin;
   3858            oneRange.aMax = varp->rngOneMax;
   3859            varPcRanges = &oneRange;
   3860         } else {
   3861            vg_assert(varp->rngMany);
   3862            vg_assert(varp->rngOneMin == 0);
   3863            vg_assert(varp->rngOneMax == 0);
   3864            nVarPcRanges = VG_(sizeXA)(varp->rngMany);
   3865            vg_assert(nVarPcRanges >= 2);
   3866            vg_assert(nVarPcRanges == (Word)varp->nRanges);
   3867            varPcRanges = VG_(indexXA)(varp->rngMany, 0);
   3868         }
   3869         if (varp->level == 0)
   3870            vg_assert( nVarPcRanges == 1 );
   3871         /* and iterate */
   3872         for (i = 0; i < nVarPcRanges; i++) {
   3873            Addr pcMin = varPcRanges[i].aMin;
   3874            Addr pcMax = varPcRanges[i].aMax;
   3875            vg_assert(pcMin <= pcMax);
   3876            /* Level 0 is the global address range.  So at level 0 we
   3877               don't want to bias pcMin/pcMax; but at all other levels
   3878               we do since those are derived from svmas in the Dwarf
   3879               we're reading.  Be paranoid ... */
   3880            if (varp->level == 0) {
   3881               vg_assert(pcMin == (Addr)0);
   3882               vg_assert(pcMax == ~(Addr)0);
   3883            } else {
   3884               /* vg_assert(pcMin > (Addr)0);
   3885                  No .. we can legitimately expect to see ranges like
   3886                  0x0-0x11D (pre-biasing, of course). */
   3887               vg_assert(pcMax < ~(Addr)0);
   3888            }
   3889 
   3890            /* Apply text biasing, for non-global variables. */
   3891            if (varp->level > 0) {
   3892               pcMin += di->text_debug_bias;
   3893               pcMax += di->text_debug_bias;
   3894            }
   3895 
   3896            if (i > 0 && (i%2) == 0)
   3897               TRACE_D3("\n                       ");
   3898            TRACE_D3("[%#lx,%#lx] ", pcMin, pcMax );
   3899 
   3900            ML_(addVar)(
   3901               di, varp->level,
   3902                   pcMin, pcMax,
   3903                   varp->name,  varp->typeR,
   3904                   varp->gexpr, varp->fbGX,
   3905                   varp->fName, varp->fLine, td3
   3906            );
   3907         }
   3908       }
   3909 
   3910       TRACE_D3("\n\n");
   3911       /* and move on to the next var */
   3912    }
   3913 
   3914    /* Now free all the TempVars */
   3915    n = VG_(sizeXA)( tempvars );
   3916    for (i = 0; i < n; i++) {
   3917       varp = *(TempVar**)VG_(indexXA)( tempvars, i );
   3918       ML_(dinfo_free)(varp);
   3919    }
   3920    VG_(deleteXA)( tempvars );
   3921    tempvars = NULL;
   3922 
   3923    /* and the temp lookup table */
   3924    VG_(deleteXA)( dioff_lookup_tab );
   3925 
   3926    /* and the ranges tree.  Note that we need to also free the XArrays
   3927       which constitute the keys, hence pass VG_(deleteXA) as a
   3928       key-finalizer. */
   3929    VG_(deleteFM)( rangestree, (void(*)(UWord))VG_(deleteXA), NULL );
   3930 
   3931    /* and the tyents_to_keep cache */
   3932    ML_(dinfo_free)( tyents_to_keep_cache );
   3933    tyents_to_keep_cache = NULL;
   3934 
   3935    /* and the file name table (just the array, not the entries
   3936       themselves).  (Apparently, 2008-Oct-23, varparser.filenameTable
   3937       can be NULL here, for icc9 generated Dwarf3.  Not sure what that
   3938       signifies (a deeper problem with the reader?)) */
   3939    if (varparser.filenameTable) {
   3940       VG_(deleteXA)( varparser.filenameTable );
   3941       varparser.filenameTable = NULL;
   3942    }
   3943 
   3944    /* record the GExprs in di so they can be freed later */
   3945    vg_assert(!di->admin_gexprs);
   3946    di->admin_gexprs = gexprs;
   3947 }
   3948 
   3949 
   3950 /*------------------------------------------------------------*/
   3951 /*---                                                      ---*/
   3952 /*--- The "new" DWARF3 reader -- top level control logic   ---*/
   3953 /*---                                                      ---*/
   3954 /*------------------------------------------------------------*/
   3955 
   3956 static Bool               d3rd_jmpbuf_valid  = False;
   3957 static HChar*             d3rd_jmpbuf_reason = NULL;
   3958 static VG_MINIMAL_JMP_BUF(d3rd_jmpbuf);
   3959 
   3960 static __attribute__((noreturn)) void barf ( HChar* reason ) {
   3961    vg_assert(d3rd_jmpbuf_valid);
   3962    d3rd_jmpbuf_reason = reason;
   3963    VG_MINIMAL_LONGJMP(d3rd_jmpbuf);
   3964    /*NOTREACHED*/
   3965    vg_assert(0);
   3966 }
   3967 
   3968 
   3969 void
   3970 ML_(new_dwarf3_reader) (
   3971    struct _DebugInfo* di,
   3972    UChar* debug_info_img,   SizeT debug_info_sz,
   3973    UChar* debug_abbv_img,   SizeT debug_abbv_sz,
   3974    UChar* debug_line_img,   SizeT debug_line_sz,
   3975    UChar* debug_str_img,    SizeT debug_str_sz,
   3976    UChar* debug_ranges_img, SizeT debug_ranges_sz,
   3977    UChar* debug_loc_img,    SizeT debug_loc_sz
   3978 )
   3979 {
   3980    volatile Int  jumped;
   3981    volatile Bool td3 = di->trace_symtab;
   3982 
   3983    /* Run the _wrk function to read the dwarf3.  If it succeeds, it
   3984       just returns normally.  If there is any failure, it longjmp's
   3985       back here, having first set d3rd_jmpbuf_reason to something
   3986       useful. */
   3987    vg_assert(d3rd_jmpbuf_valid  == False);
   3988    vg_assert(d3rd_jmpbuf_reason == NULL);
   3989 
   3990    d3rd_jmpbuf_valid = True;
   3991    jumped = VG_MINIMAL_SETJMP(d3rd_jmpbuf);
   3992    if (jumped == 0) {
   3993       /* try this ... */
   3994       new_dwarf3_reader_wrk( di, barf,
   3995                              debug_info_img,   debug_info_sz,
   3996                              debug_abbv_img,   debug_abbv_sz,
   3997                              debug_line_img,   debug_line_sz,
   3998                              debug_str_img,    debug_str_sz,
   3999                              debug_ranges_img, debug_ranges_sz,
   4000                              debug_loc_img,    debug_loc_sz );
   4001       d3rd_jmpbuf_valid = False;
   4002       TRACE_D3("\n------ .debug_info reading was successful ------\n");
   4003    } else {
   4004       /* It longjmp'd. */
   4005       d3rd_jmpbuf_valid = False;
   4006       /* Can't longjump without giving some sort of reason. */
   4007       vg_assert(d3rd_jmpbuf_reason != NULL);
   4008 
   4009       TRACE_D3("\n------ .debug_info reading failed ------\n");
   4010 
   4011       ML_(symerr)(di, True, d3rd_jmpbuf_reason);
   4012    }
   4013 
   4014    d3rd_jmpbuf_valid  = False;
   4015    d3rd_jmpbuf_reason = NULL;
   4016 }
   4017 
   4018 
   4019 
   4020 /* --- Unused code fragments which might be useful one day. --- */
   4021 
   4022 #if 0
   4023    /* Read the arange tables */
   4024    TRACE_SYMTAB("\n");
   4025    TRACE_SYMTAB("\n------ The contents of .debug_arange ------\n");
   4026    init_Cursor( &aranges, debug_aranges_img,
   4027                 debug_aranges_sz, 0, barf,
   4028                 "Overrun whilst reading .debug_aranges section" );
   4029    while (True) {
   4030       ULong  len, d_i_offset;
   4031       Bool   is64;
   4032       UShort version;
   4033       UChar  asize, segsize;
   4034 
   4035       if (is_at_end_Cursor( &aranges ))
   4036          break;
   4037       /* Read one arange thingy */
   4038       /* initial_length field */
   4039       len = get_Initial_Length( &is64, &aranges,
   4040                "in .debug_aranges: invalid initial-length field" );
   4041       version    = get_UShort( &aranges );
   4042       d_i_offset = get_Dwarfish_UWord( &aranges, is64 );
   4043       asize      = get_UChar( &aranges );
   4044       segsize    = get_UChar( &aranges );
   4045       TRACE_D3("  Length:                   %llu\n", len);
   4046       TRACE_D3("  Version:                  %d\n", (Int)version);
   4047       TRACE_D3("  Offset into .debug_info:  %llx\n", d_i_offset);
   4048       TRACE_D3("  Pointer Size:             %d\n", (Int)asize);
   4049       TRACE_D3("  Segment Size:             %d\n", (Int)segsize);
   4050       TRACE_D3("\n");
   4051       TRACE_D3("    Address            Length\n");
   4052 
   4053       while ((get_position_of_Cursor( &aranges ) % (2 * asize)) > 0) {
   4054          (void)get_UChar( & aranges );
   4055       }
   4056       while (True) {
   4057          ULong address = get_Dwarfish_UWord( &aranges, asize==8 );
   4058          ULong length = get_Dwarfish_UWord( &aranges, asize==8 );
   4059          TRACE_D3("    0x%016llx 0x%llx\n", address, length);
   4060          if (address == 0 && length == 0) break;
   4061       }
   4062    }
   4063    TRACE_SYMTAB("\n");
   4064 #endif
   4065 
   4066 #endif // defined(VGO_linux) || defined(VGO_darwin)
   4067 
   4068 /*--------------------------------------------------------------------*/
   4069 /*--- end                                                          ---*/
   4070 /*--------------------------------------------------------------------*/
   4071