Home | History | Annotate | Download | only in m_debuginfo
      1 /* -*- mode: C; c-basic-offset: 3; -*- */
      2 
      3 /*--------------------------------------------------------------------*/
      4 /*--- Read DWARF3/4 ".debug_info" sections (DIE trees).            ---*/
      5 /*---                                                 readdwarf3.c ---*/
      6 /*--------------------------------------------------------------------*/
      7 
      8 /*
      9    This file is part of Valgrind, a dynamic binary instrumentation
     10    framework.
     11 
     12    Copyright (C) 2008-2013 OpenWorks LLP
     13       info (at) open-works.co.uk
     14 
     15    This program is free software; you can redistribute it and/or
     16    modify it under the terms of the GNU General Public License as
     17    published by the Free Software Foundation; either version 2 of the
     18    License, or (at your option) any later version.
     19 
     20    This program is distributed in the hope that it will be useful, but
     21    WITHOUT ANY WARRANTY; without even the implied warranty of
     22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     23    General Public License for more details.
     24 
     25    You should have received a copy of the GNU General Public License
     26    along with this program; if not, write to the Free Software
     27    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     28    02111-1307, USA.
     29 
     30    The GNU General Public License is contained in the file COPYING.
     31 
     32    Neither the names of the U.S. Department of Energy nor the
     33    University of California nor the names of its contributors may be
     34    used to endorse or promote products derived from this software
     35    without prior written permission.
     36 */
     37 
     38 #if defined(VGO_linux) || defined(VGO_darwin)
     39 
     40 /* REFERENCE (without which this code will not make much sense):
     41 
     42    DWARF Debugging Information Format, Version 3,
     43    dated 20 December 2005 (the "D3 spec").
     44 
     45    Available at http://www.dwarfstd.org/Dwarf3.pdf.  There's also a
     46    .doc (MS Word) version, but for some reason the section numbers
     47    between the Word and PDF versions differ by 1 in the first digit.
     48    All section references in this code are to the PDF version.
     49 
     50    CURRENT HACKS:
     51 
     52    DW_TAG_{const,volatile}_type no DW_AT_type is allowed; it is
     53       assumed to mean "const void" or "volatile void" respectively.
     54       GDB appears to interpret them like this, anyway.
     55 
     56    In many cases it is important to know the svma of a CU (the "base
     57    address of the CU", as the D3 spec calls it).  There are some
     58    situations in which the spec implies this value is unknown, but the
     59    Dwarf3 produced by gcc-4.1 seems to assume is not unknown but
     60    merely zero when not explicitly stated.  So we too have to make
     61    that assumption.
     62 
     63    POTENTIAL BUG?  Spotted 6 Sept 08.  Why doesn't
     64    unitary_range_list() bias the resulting range list in the same way
     65    that its more general cousin, get_range_list(), does?  I don't
     66    know.
     67 
     68    TODO, 2008 Feb 17:
     69 
     70    get rid of cu_svma_known and document the assumed-zero svma hack.
     71 
     72    ML_(sizeOfType): differentiate between zero sized types and types
     73    for which the size is unknown.  Is this important?  I don't know.
     74 
     75    DW_TAG_array_types: deal with explicit sizes (currently we compute
     76    the size from the bounds and the element size, although that's
     77    fragile, if the bounds incompletely specified, or completely
     78    absent)
     79 
     80    Document reason for difference (by 1) of stack preening depth in
     81    parse_var_DIE vs parse_type_DIE.
     82 
     83    Don't hand to ML_(addVars), vars whose locations are entirely in
     84    registers (DW_OP_reg*).  This is merely a space-saving
     85    optimisation, as ML_(evaluate_Dwarf3_Expr) should handle these
     86    expressions correctly, by failing to evaluate them and hence
     87    effectively ignoring the variable with which they are associated.
     88 
     89    Deal with DW_TAG_array_types which have element size != stride
     90 
     91    In some cases, the info for a variable is split between two
     92    different DIEs (generally a declarer and a definer).  We punt on
     93    these.  Could do better here.
     94 
     95    The 'data_bias' argument passed to the expression evaluator
     96    (ML_(evaluate_Dwarf3_Expr)) should really be changed to a
     97    MaybeUWord, to make it clear when we do vs don't know what it is
     98    for the evaluation of an expression.  At the moment zero is passed
     99    for this parameter in the don't know case.  That's a bit fragile
    100    and obscure; using a MaybeUWord would be clearer.
    101 
    102    POTENTIAL PERFORMANCE IMPROVEMENTS:
    103 
    104    Currently, duplicate removal and all other queries for the type
    105    entities array is done using cuOffset-based pointing, which
    106    involves a binary search (VG_(lookupXA)) for each access.  This is
    107    wildly inefficient, although simple.  It would be better to
    108    translate all the cuOffset-based references (iow, all the "R" and
    109    "Rs" fields in the TyEnts in 'tyents') to direct index numbers in
    110    'tyents' right at the start of dedup_types(), and use direct
    111    indexing (VG_(indexXA)) wherever possible after that.
    112 
    113    cmp__XArrays_of_AddrRange is also a performance bottleneck.  Move
    114    VG_(indexXA) into pub_tool_xarray.h so it can be inlined at all use
    115    points, and possibly also make an _UNCHECKED version which skips
    116    the range checks in performance-critical situations such as this.
    117 
    118    Handle interaction between read_DIE and parse_{var,type}_DIE
    119    better.  Currently read_DIE reads the entire DIE just to find where
    120    the end is (and for debug printing), so that it can later reliably
    121    move the cursor to the end regardless of what parse_{var,type}_DIE
    122    do.  This means many DIEs (most, even?) are read twice.  It would
    123    be smarter to make parse_{var,type}_DIE return a Bool indicating
    124    whether or not they advanced the DIE cursor, and only if they
    125    didn't should read_DIE itself read through the DIE.
    126 
    127    ML_(addVar) and add_var_to_arange: quite a lot of DiAddrRanges have
    128    zero variables in their .vars XArray.  Rather than have an XArray
    129    with zero elements (which uses 2 malloc'd blocks), allow the .vars
    130    pointer to be NULL in this case.
    131 
    132    More generally, reduce the amount of memory allocated and freed
    133    while reading Dwarf3 type/variable information.  Even modest (20MB)
    134    objects cause this module to allocate and free hundreds of
    135    thousands of small blocks, and ML_(arena_malloc) and its various
    136    groupies always show up at the top of performance profiles. */
    137 
    138 #include "pub_core_basics.h"
    139 #include "pub_core_debuginfo.h"
    140 #include "pub_core_libcbase.h"
    141 #include "pub_core_libcassert.h"
    142 #include "pub_core_libcprint.h"
    143 #include "pub_core_libcsetjmp.h"   // setjmp facilities
    144 #include "pub_core_hashtable.h"
    145 #include "pub_core_options.h"
    146 #include "pub_core_tooliface.h"    /* VG_(needs) */
    147 #include "pub_core_xarray.h"
    148 #include "pub_core_wordfm.h"
    149 #include "priv_misc.h"             /* dinfo_zalloc/free */
    150 #include "priv_image.h"
    151 #include "priv_tytypes.h"
    152 #include "priv_d3basics.h"
    153 #include "priv_storage.h"
    154 #include "priv_readdwarf3.h"       /* self */
    155 
    156 
    157 /*------------------------------------------------------------*/
    158 /*---                                                      ---*/
    159 /*--- Basic machinery for parsing DIEs.                    ---*/
    160 /*---                                                      ---*/
    161 /*------------------------------------------------------------*/
    162 
    163 #define TRACE_D3(format, args...) \
    164    if (td3) { VG_(printf)(format, ## args); }
    165 
    166 #define D3_INVALID_CUOFF  ((UWord)(-1UL))
    167 #define D3_FAKEVOID_CUOFF ((UWord)(-2UL))
    168 
    169 typedef
    170    struct {
    171       DiSlice sli;      // to which this cursor applies
    172       DiOffT  sli_next; // offset in underlying DiImage; must be >= sli.ioff
    173       void (*barf)( const HChar* ) __attribute__((noreturn));
    174       const HChar* barfstr;
    175    }
    176    Cursor;
    177 
    178 static inline Bool is_sane_Cursor ( Cursor* c ) {
    179    if (!c)                return False;
    180    if (!c->barf)          return False;
    181    if (!c->barfstr)       return False;
    182    if (!ML_(sli_is_valid)(c->sli))    return False;
    183    if (c->sli.ioff == DiOffT_INVALID) return False;
    184    if (c->sli_next < c->sli.ioff)     return False;
    185    return True;
    186 }
    187 
    188 // Initialise a cursor from a DiSlice (ELF section, really) so as to
    189 // start reading at offset |sli_initial_offset| from the start of the
    190 // slice.
    191 static void init_Cursor ( /*OUT*/Cursor* c,
    192                           DiSlice sli,
    193                           ULong   sli_initial_offset,
    194                           __attribute__((noreturn)) void (*barf)(const HChar*),
    195                           const HChar* barfstr )
    196 {
    197    vg_assert(c);
    198    VG_(bzero_inline)(c, sizeof(*c));
    199    c->sli              = sli;
    200    c->sli_next         = c->sli.ioff + sli_initial_offset;
    201    c->barf             = barf;
    202    c->barfstr          = barfstr;
    203    vg_assert(is_sane_Cursor(c));
    204 }
    205 
    206 static Bool is_at_end_Cursor ( Cursor* c ) {
    207    vg_assert(is_sane_Cursor(c));
    208    return c->sli_next >= c->sli.ioff + c->sli.szB;
    209 }
    210 
    211 static inline ULong get_position_of_Cursor ( Cursor* c ) {
    212    vg_assert(is_sane_Cursor(c));
    213    return c->sli_next - c->sli.ioff;
    214 }
    215 static inline void set_position_of_Cursor ( Cursor* c, ULong pos ) {
    216    c->sli_next = c->sli.ioff + pos;
    217    vg_assert(is_sane_Cursor(c));
    218 }
    219 
    220 static /*signed*/Long get_remaining_length_Cursor ( Cursor* c ) {
    221    vg_assert(is_sane_Cursor(c));
    222    return c->sli.ioff + c->sli.szB - c->sli_next;
    223 }
    224 
    225 //static void* get_address_of_Cursor ( Cursor* c ) {
    226 //   vg_assert(is_sane_Cursor(c));
    227 //   return &c->region_start_img[ c->region_next ];
    228 //}
    229 
    230 static DiCursor get_DiCursor_from_Cursor ( Cursor* c ) {
    231    return mk_DiCursor(c->sli.img, c->sli_next);
    232 }
    233 
    234 /* FIXME: document assumptions on endianness for
    235    get_UShort/UInt/ULong. */
    236 static inline UChar get_UChar ( Cursor* c ) {
    237    UChar r;
    238    vg_assert(is_sane_Cursor(c));
    239    if (c->sli_next + sizeof(UChar) > c->sli.ioff + c->sli.szB) {
    240       c->barf(c->barfstr);
    241       /*NOTREACHED*/
    242       vg_assert(0);
    243    }
    244    r = ML_(img_get_UChar)(c->sli.img, c->sli_next);
    245    c->sli_next += sizeof(UChar);
    246    return r;
    247 }
    248 static UShort get_UShort ( Cursor* c ) {
    249    UShort r;
    250    vg_assert(is_sane_Cursor(c));
    251    if (c->sli_next + sizeof(UShort) > c->sli.ioff + c->sli.szB) {
    252       c->barf(c->barfstr);
    253       /*NOTREACHED*/
    254       vg_assert(0);
    255    }
    256    r = ML_(img_get_UShort)(c->sli.img, c->sli_next);
    257    c->sli_next += sizeof(UShort);
    258    return r;
    259 }
    260 static UInt get_UInt ( Cursor* c ) {
    261    UInt r;
    262    vg_assert(is_sane_Cursor(c));
    263    if (c->sli_next + sizeof(UInt) > c->sli.ioff + c->sli.szB) {
    264       c->barf(c->barfstr);
    265       /*NOTREACHED*/
    266       vg_assert(0);
    267    }
    268    r = ML_(img_get_UInt)(c->sli.img, c->sli_next);
    269    c->sli_next += sizeof(UInt);
    270    return r;
    271 }
    272 static ULong get_ULong ( Cursor* c ) {
    273    ULong r;
    274    vg_assert(is_sane_Cursor(c));
    275    if (c->sli_next + sizeof(ULong) > c->sli.ioff + c->sli.szB) {
    276       c->barf(c->barfstr);
    277       /*NOTREACHED*/
    278       vg_assert(0);
    279    }
    280    r = ML_(img_get_ULong)(c->sli.img, c->sli_next);
    281    c->sli_next += sizeof(ULong);
    282    return r;
    283 }
    284 static ULong get_ULEB128 ( Cursor* c ) {
    285    ULong result;
    286    Int   shift;
    287    UChar byte;
    288    /* unroll first iteration */
    289    byte = get_UChar( c );
    290    result = (ULong)(byte & 0x7f);
    291    if (LIKELY(!(byte & 0x80))) return result;
    292    shift = 7;
    293    /* end unroll first iteration */
    294    do {
    295       byte = get_UChar( c );
    296       result |= ((ULong)(byte & 0x7f)) << shift;
    297       shift += 7;
    298    } while (byte & 0x80);
    299    return result;
    300 }
    301 static Long get_SLEB128 ( Cursor* c ) {
    302    ULong  result = 0;
    303    Int    shift = 0;
    304    UChar  byte;
    305    do {
    306       byte = get_UChar(c);
    307       result |= ((ULong)(byte & 0x7f)) << shift;
    308       shift += 7;
    309    } while (byte & 0x80);
    310    if (shift < 64 && (byte & 0x40))
    311       result |= -(1ULL << shift);
    312    return result;
    313 }
    314 
    315 /* Assume 'c' points to the start of a string.  Return a DiCursor of
    316    whatever it points at, and advance it past the terminating zero.
    317    This makes it safe for the caller to then copy the string with
    318    ML_(addStr), since (w.r.t. image overruns) the process of advancing
    319    past the terminating zero will already have "vetted" the string. */
    320 static DiCursor get_AsciiZ ( Cursor* c ) {
    321    UChar uc;
    322    DiCursor res = get_DiCursor_from_Cursor(c);
    323    do { uc = get_UChar(c); } while (uc != 0);
    324    return res;
    325 }
    326 
    327 static ULong peek_ULEB128 ( Cursor* c ) {
    328    DiOffT here = c->sli_next;
    329    ULong  r    = get_ULEB128( c );
    330    c->sli_next = here;
    331    return r;
    332 }
    333 static UChar peek_UChar ( Cursor* c ) {
    334    DiOffT here = c->sli_next;
    335    UChar  r    = get_UChar( c );
    336    c->sli_next = here;
    337    return r;
    338 }
    339 
    340 static ULong get_Dwarfish_UWord ( Cursor* c, Bool is_dw64 ) {
    341    return is_dw64 ? get_ULong(c) : (ULong) get_UInt(c);
    342 }
    343 
    344 static UWord get_UWord ( Cursor* c ) {
    345    vg_assert(sizeof(UWord) == sizeof(void*));
    346    if (sizeof(UWord) == 4) return get_UInt(c);
    347    if (sizeof(UWord) == 8) return get_ULong(c);
    348    vg_assert(0);
    349 }
    350 
    351 /* Read a DWARF3 'Initial Length' field */
    352 static ULong get_Initial_Length ( /*OUT*/Bool* is64,
    353                                   Cursor* c,
    354                                   const HChar* barfMsg )
    355 {
    356    ULong w64;
    357    UInt  w32;
    358    *is64 = False;
    359    w32 = get_UInt( c );
    360    if (w32 >= 0xFFFFFFF0 && w32 < 0xFFFFFFFF) {
    361       c->barf( barfMsg );
    362    }
    363    else if (w32 == 0xFFFFFFFF) {
    364       *is64 = True;
    365       w64   = get_ULong( c );
    366    } else {
    367       *is64 = False;
    368       w64 = (ULong)w32;
    369    }
    370    return w64;
    371 }
    372 
    373 
    374 /*------------------------------------------------------------*/
    375 /*---                                                      ---*/
    376 /*--- "CUConst" structure                                  ---*/
    377 /*---                                                      ---*/
    378 /*------------------------------------------------------------*/
    379 
    380 #define N_ABBV_CACHE 32
    381 
    382 /* Holds information that is constant through the parsing of a
    383    Compilation Unit.  This is basically plumbed through to
    384    everywhere. */
    385 typedef
    386    struct {
    387       /* Call here if anything goes wrong */
    388       void (*barf)( const HChar* ) __attribute__((noreturn));
    389       /* Is this 64-bit DWARF ? */
    390       Bool   is_dw64;
    391       /* Which DWARF version ?  (2, 3 or 4) */
    392       UShort version;
    393       /* Length of this Compilation Unit, as stated in the
    394          .unit_length :: InitialLength field of the CU Header.
    395          However, this size (as specified by the D3 spec) does not
    396          include the size of the .unit_length field itself, which is
    397          either 4 or 12 bytes (32-bit or 64-bit Dwarf3).  That value
    398          can be obtained through the expression ".is_dw64 ? 12 : 4". */
    399       ULong  unit_length;
    400       /* Offset of start of this unit in .debug_info */
    401       UWord  cu_start_offset;
    402       /* SVMA for this CU.  In the D3 spec, is known as the "base
    403          address of the compilation unit (last para sec 3.1.1).
    404          Needed for (amongst things) interpretation of location-list
    405          values. */
    406       Addr   cu_svma;
    407       Bool   cu_svma_known;
    408 
    409       /* The debug_abbreviations table to be used for this Unit */
    410       //UChar* debug_abbv;
    411       /* Upper bound on size thereof (an overestimate, in general) */
    412       //UWord  debug_abbv_maxszB;
    413       /* A bounded area of the image, to be used as the
    414          debug_abbreviations table tobe used for this Unit. */
    415       DiSlice debug_abbv;
    416 
    417       /* Image information for various sections. */
    418       DiSlice escn_debug_str;
    419       DiSlice escn_debug_ranges;
    420       DiSlice escn_debug_loc;
    421       DiSlice escn_debug_line;
    422       DiSlice escn_debug_info;
    423       DiSlice escn_debug_types;
    424       DiSlice escn_debug_info_alt;
    425       DiSlice escn_debug_str_alt;
    426       /* How much to add to .debug_types resp. alternate .debug_info offsets
    427          in cook_die*.  */
    428       UWord  types_cuOff_bias;
    429       UWord  alt_cuOff_bias;
    430       /* --- Needed so we can add stuff to the string table. --- */
    431       struct _DebugInfo* di;
    432       /* --- a cache for set_abbv_Cursor --- */
    433       /* abbv_code == (ULong)-1 for an unused entry. */
    434       struct { ULong abbv_code; UWord posn; } saC_cache[N_ABBV_CACHE];
    435       UWord saC_cache_queries;
    436       UWord saC_cache_misses;
    437 
    438       /* True if this came from .debug_types; otherwise it came from
    439          .debug_info.  */
    440       Bool is_type_unit;
    441       /* For a unit coming from .debug_types, these hold the TU's type
    442          signature and the uncooked DIE offset of the TU's signatured
    443          type.  For a unit coming from .debug_info, these are unused.  */
    444       ULong type_signature;
    445       ULong type_offset;
    446 
    447       /* Signatured type hash; computed once and then shared by all
    448          CUs.  */
    449       VgHashTable signature_types;
    450 
    451       /* True if this came from alternate .debug_info; otherwise
    452          it came from normal .debug_info or .debug_types.  */
    453       Bool is_alt_info;
    454    }
    455    CUConst;
    456 
    457 
    458 /* Return the cooked value of DIE depending on whether CC represents a
    459    .debug_types unit.  To cook a DIE, we pretend that the .debug_info,
    460    .debug_types and optional alternate .debug_info sections form
    461    a contiguous whole, so that DIEs coming from .debug_types are numbered
    462    starting at the end of .debug_info and DIEs coming from alternate
    463    .debug_info are numbered starting at the end of .debug_types.  */
    464 static UWord cook_die( CUConst* cc, UWord die )
    465 {
    466    if (cc->is_type_unit)
    467       die += cc->types_cuOff_bias;
    468    else if (cc->is_alt_info)
    469       die += cc->alt_cuOff_bias;
    470    return die;
    471 }
    472 
    473 /* Like cook_die, but understand that DIEs coming from a
    474    DW_FORM_ref_sig8 reference are already cooked.  Also, handle
    475    DW_FORM_GNU_ref_alt from within primary .debug_info or .debug_types
    476    as reference to alternate .debug_info.  */
    477 static UWord cook_die_using_form( CUConst *cc, UWord die, DW_FORM form)
    478 {
    479    if (form == DW_FORM_ref_sig8)
    480       return die;
    481    if (form == DW_FORM_GNU_ref_alt)
    482       return die + cc->alt_cuOff_bias;
    483    return cook_die( cc, die );
    484 }
    485 
    486 /* Return the uncooked offset of DIE and set *TYPE_FLAG to true if the DIE
    487    came from the .debug_types section and *ALT_FLAG to true if the DIE
    488    came from alternate .debug_info section.  */
    489 static UWord uncook_die( CUConst *cc, UWord die, /*OUT*/Bool *type_flag,
    490                          Bool *alt_flag )
    491 {
    492    *alt_flag = False;
    493    *type_flag = False;
    494    /* The use of escn_debug_{info,types}.szB seems safe to me even if
    495       escn_debug_{info,types} are DiSlice_INVALID (meaning the
    496       sections were not found), because DiSlice_INVALID.szB is always
    497       zero.  That said, it seems unlikely we'd ever get here if
    498       .debug_info or .debug_types were missing. */
    499    if (die >= cc->escn_debug_info.szB) {
    500       if (die >= cc->escn_debug_info.szB + cc->escn_debug_types.szB) {
    501          *alt_flag = True;
    502          die -= cc->escn_debug_info.szB + cc->escn_debug_types.szB;
    503       } else {
    504          *type_flag = True;
    505          die -= cc->escn_debug_info.szB;
    506       }
    507    }
    508    return die;
    509 }
    510 
    511 /*------------------------------------------------------------*/
    512 /*---                                                      ---*/
    513 /*--- Helper functions for Guarded Expressions             ---*/
    514 /*---                                                      ---*/
    515 /*------------------------------------------------------------*/
    516 
    517 /* Parse the location list starting at img-offset 'debug_loc_offset'
    518    in .debug_loc.  Results are biased with 'svma_of_referencing_CU'
    519    and so I believe are correct SVMAs for the object as a whole.  This
    520    function allocates the UChar*, and the caller must deallocate it.
    521    The resulting block is in so-called Guarded-Expression format.
    522 
    523    Guarded-Expression format is similar but not identical to the DWARF3
    524    location-list format.  The format of each returned block is:
    525 
    526       UChar biasMe;
    527       UChar isEnd;
    528       followed by zero or more of
    529 
    530       (Addr aMin;  Addr aMax;  UShort nbytes;  ..bytes..;  UChar isEnd)
    531 
    532    '..bytes..' is an standard DWARF3 location expression which is
    533    valid when aMin <= pc <= aMax (possibly after suitable biasing).
    534 
    535    The number of bytes in '..bytes..' is nbytes.
    536 
    537    The end of the sequence is marked by an isEnd == 1 value.  All
    538    previous isEnd values must be zero.
    539 
    540    biasMe is 1 if the aMin/aMax fields need this DebugInfo's
    541    text_bias added before use, and 0 if the GX is this is not
    542    necessary (is ready to go).
    543 
    544    Hence the block can be quickly parsed and is self-describing.  Note
    545    that aMax is 1 less than the corresponding value in a DWARF3
    546    location list.  Zero length ranges, with aMax == aMin-1, are not
    547    allowed.
    548 */
    549 /* 2008-sept-12: moved ML_(pp_GX) from here to d3basics.c, where
    550    it more logically belongs. */
    551 
    552 
    553 /* Apply a text bias to a GX. */
    554 static void bias_GX ( /*MOD*/GExpr* gx, struct _DebugInfo* di )
    555 {
    556    UShort nbytes;
    557    UChar* p = &gx->payload[0];
    558    UChar* pA;
    559    UChar  uc;
    560    uc = *p++; /*biasMe*/
    561    if (uc == 0)
    562       return;
    563    vg_assert(uc == 1);
    564    p[-1] = 0; /* mark it as done */
    565    while (True) {
    566       uc = *p++;
    567       if (uc == 1)
    568          break; /*isEnd*/
    569       vg_assert(uc == 0);
    570       /* t-bias aMin */
    571       pA = (UChar*)p;
    572       ML_(write_Addr)(pA, ML_(read_Addr)(pA) + di->text_debug_bias);
    573       p += sizeof(Addr);
    574       /* t-bias aMax */
    575       pA = (UChar*)p;
    576       ML_(write_Addr)(pA, ML_(read_Addr)(pA) + di->text_debug_bias);
    577       p += sizeof(Addr);
    578       /* nbytes, and actual expression */
    579       nbytes = ML_(read_UShort)(p); p += sizeof(UShort);
    580       p += nbytes;
    581    }
    582 }
    583 
    584 __attribute__((noinline))
    585 static GExpr* make_singleton_GX ( DiCursor block, ULong nbytes )
    586 {
    587    SizeT  bytesReqd;
    588    GExpr* gx;
    589    UChar *p, *pstart;
    590 
    591    vg_assert(sizeof(UWord) == sizeof(Addr));
    592    vg_assert(nbytes <= 0xFFFF); /* else we overflow the nbytes field */
    593    bytesReqd
    594       =   sizeof(UChar)  /*biasMe*/    + sizeof(UChar) /*!isEnd*/
    595         + sizeof(UWord)  /*aMin*/      + sizeof(UWord) /*aMax*/
    596         + sizeof(UShort) /*nbytes*/    + (SizeT)nbytes
    597         + sizeof(UChar); /*isEnd*/
    598 
    599    gx = ML_(dinfo_zalloc)( "di.readdwarf3.msGX.1",
    600                            sizeof(GExpr) + bytesReqd );
    601    vg_assert(gx);
    602 
    603    p = pstart = &gx->payload[0];
    604 
    605    p = ML_(write_UChar)(p, 0);        /*biasMe*/
    606    p = ML_(write_UChar)(p, 0);        /*!isEnd*/
    607    p = ML_(write_Addr)(p, 0);         /*aMin*/
    608    p = ML_(write_Addr)(p, ~0);        /*aMax*/
    609    p = ML_(write_UShort)(p, nbytes);  /*nbytes*/
    610    ML_(cur_read_get)(p, block, nbytes); p += nbytes;
    611    p = ML_(write_UChar)(p, 1);        /*isEnd*/
    612 
    613    vg_assert( (SizeT)(p - pstart) == bytesReqd);
    614    vg_assert( &gx->payload[bytesReqd]
    615               == ((UChar*)gx) + sizeof(GExpr) + bytesReqd );
    616 
    617    return gx;
    618 }
    619 
    620 __attribute__((noinline))
    621 static GExpr* make_general_GX ( CUConst* cc,
    622                                 Bool     td3,
    623                                 ULong    debug_loc_offset,
    624                                 Addr     svma_of_referencing_CU )
    625 {
    626    Addr      base;
    627    Cursor    loc;
    628    XArray*   xa; /* XArray of UChar */
    629    GExpr*    gx;
    630    Word      nbytes;
    631 
    632    vg_assert(sizeof(UWord) == sizeof(Addr));
    633    if (!ML_(sli_is_valid)(cc->escn_debug_loc) || cc->escn_debug_loc.szB == 0)
    634       cc->barf("make_general_GX: .debug_loc is empty/missing");
    635 
    636    init_Cursor( &loc, cc->escn_debug_loc, 0, cc->barf,
    637                 "Overrun whilst reading .debug_loc section(2)" );
    638    set_position_of_Cursor( &loc, debug_loc_offset );
    639 
    640    TRACE_D3("make_general_GX (.debug_loc_offset = %llu, ioff = %llu) {\n",
    641             debug_loc_offset, (ULong)get_DiCursor_from_Cursor(&loc).ioff );
    642 
    643    /* Who frees this xa?  It is freed before this fn exits. */
    644    xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.mgGX.1",
    645                     ML_(dinfo_free),
    646                     sizeof(UChar) );
    647 
    648    { UChar c = 1; /*biasMe*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); }
    649 
    650    base = 0;
    651    while (True) {
    652       Bool  acquire;
    653       UWord len;
    654       /* Read a (host-)word pair.  This is something of a hack since
    655          the word size to read is really dictated by the ELF file;
    656          however, we assume we're reading a file with the same
    657          word-sizeness as the host.  Reasonably enough. */
    658       UWord w1 = get_UWord( &loc );
    659       UWord w2 = get_UWord( &loc );
    660 
    661       TRACE_D3("   %08lx %08lx\n", w1, w2);
    662       if (w1 == 0 && w2 == 0)
    663          break; /* end of list */
    664 
    665       if (w1 == -1UL) {
    666          /* new value for 'base' */
    667          base = w2;
    668          continue;
    669       }
    670 
    671       /* else a location expression follows */
    672       /* else enumerate [w1+base, w2+base) */
    673       /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
    674          (sec 2.17.2) */
    675       if (w1 > w2) {
    676          TRACE_D3("negative range is for .debug_loc expr at "
    677                   "file offset %llu\n",
    678                   debug_loc_offset);
    679          cc->barf( "negative range in .debug_loc section" );
    680       }
    681 
    682       /* ignore zero length ranges */
    683       acquire = w1 < w2;
    684       len     = (UWord)get_UShort( &loc );
    685 
    686       if (acquire) {
    687          UWord  w;
    688          UShort s;
    689          UChar  c;
    690          c = 0; /* !isEnd*/
    691          VG_(addBytesToXA)( xa, &c, sizeof(c) );
    692          w = w1    + base + svma_of_referencing_CU;
    693          VG_(addBytesToXA)( xa, &w, sizeof(w) );
    694          w = w2 -1 + base + svma_of_referencing_CU;
    695          VG_(addBytesToXA)( xa, &w, sizeof(w) );
    696          s = (UShort)len;
    697          VG_(addBytesToXA)( xa, &s, sizeof(s) );
    698       }
    699 
    700       while (len > 0) {
    701          UChar byte = get_UChar( &loc );
    702          TRACE_D3("%02x", (UInt)byte);
    703          if (acquire)
    704             VG_(addBytesToXA)( xa, &byte, 1 );
    705          len--;
    706       }
    707       TRACE_D3("\n");
    708    }
    709 
    710    { UChar c = 1; /*isEnd*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); }
    711 
    712    nbytes = VG_(sizeXA)( xa );
    713    vg_assert(nbytes >= 1);
    714 
    715    gx = ML_(dinfo_zalloc)( "di.readdwarf3.mgGX.2", sizeof(GExpr) + nbytes );
    716    vg_assert(gx);
    717    VG_(memcpy)( &gx->payload[0], (UChar*)VG_(indexXA)(xa,0), nbytes );
    718    vg_assert( &gx->payload[nbytes]
    719               == ((UChar*)gx) + sizeof(GExpr) + nbytes );
    720 
    721    VG_(deleteXA)( xa );
    722 
    723    TRACE_D3("}\n");
    724 
    725    return gx;
    726 }
    727 
    728 
    729 /*------------------------------------------------------------*/
    730 /*---                                                      ---*/
    731 /*--- Helper functions for range lists and CU headers      ---*/
    732 /*---                                                      ---*/
    733 /*------------------------------------------------------------*/
    734 
    735 /* Denotes an address range.  Both aMin and aMax are included in the
    736    range; hence a complete range is (0, ~0) and an empty range is any
    737    (X, X-1) for X > 0.*/
    738 typedef
    739    struct { Addr aMin; Addr aMax; }
    740    AddrRange;
    741 
    742 
    743 /* Generate an arbitrary structural total ordering on
    744    XArray* of AddrRange. */
    745 static Word cmp__XArrays_of_AddrRange ( XArray* rngs1, XArray* rngs2 )
    746 {
    747    Word n1, n2, i;
    748    tl_assert(rngs1 && rngs2);
    749    n1 = VG_(sizeXA)( rngs1 );
    750    n2 = VG_(sizeXA)( rngs2 );
    751    if (n1 < n2) return -1;
    752    if (n1 > n2) return 1;
    753    for (i = 0; i < n1; i++) {
    754       AddrRange* rng1 = (AddrRange*)VG_(indexXA)( rngs1, i );
    755       AddrRange* rng2 = (AddrRange*)VG_(indexXA)( rngs2, i );
    756       if (rng1->aMin < rng2->aMin) return -1;
    757       if (rng1->aMin > rng2->aMin) return 1;
    758       if (rng1->aMax < rng2->aMax) return -1;
    759       if (rng1->aMax > rng2->aMax) return 1;
    760    }
    761    return 0;
    762 }
    763 
    764 
    765 __attribute__((noinline))
    766 static XArray* /* of AddrRange */ empty_range_list ( void )
    767 {
    768    XArray* xa; /* XArray of AddrRange */
    769    /* Who frees this xa?  varstack_preen() does. */
    770    xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.erl.1",
    771                     ML_(dinfo_free),
    772                     sizeof(AddrRange) );
    773    return xa;
    774 }
    775 
    776 
    777 __attribute__((noinline))
    778 static XArray* unitary_range_list ( Addr aMin, Addr aMax )
    779 {
    780    XArray*   xa;
    781    AddrRange pair;
    782    vg_assert(aMin <= aMax);
    783    /* Who frees this xa?  varstack_preen() does. */
    784    xa = VG_(newXA)( ML_(dinfo_zalloc),  "di.readdwarf3.url.1",
    785                     ML_(dinfo_free),
    786                     sizeof(AddrRange) );
    787    pair.aMin = aMin;
    788    pair.aMax = aMax;
    789    VG_(addToXA)( xa, &pair );
    790    return xa;
    791 }
    792 
    793 
    794 /* Enumerate the address ranges starting at img-offset
    795    'debug_ranges_offset' in .debug_ranges.  Results are biased with
    796    'svma_of_referencing_CU' and so I believe are correct SVMAs for the
    797    object as a whole.  This function allocates the XArray, and the
    798    caller must deallocate it. */
    799 __attribute__((noinline))
    800 static XArray* /* of AddrRange */
    801        get_range_list ( CUConst* cc,
    802                         Bool     td3,
    803                         UWord    debug_ranges_offset,
    804                         Addr     svma_of_referencing_CU )
    805 {
    806    Addr      base;
    807    Cursor    ranges;
    808    XArray*   xa; /* XArray of AddrRange */
    809    AddrRange pair;
    810 
    811    if (!ML_(sli_is_valid)(cc->escn_debug_ranges)
    812        || cc->escn_debug_ranges.szB == 0)
    813       cc->barf("get_range_list: .debug_ranges is empty/missing");
    814 
    815    init_Cursor( &ranges, cc->escn_debug_ranges, 0, cc->barf,
    816                 "Overrun whilst reading .debug_ranges section(2)" );
    817    set_position_of_Cursor( &ranges, debug_ranges_offset );
    818 
    819    /* Who frees this xa?  varstack_preen() does. */
    820    xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.grl.1", ML_(dinfo_free),
    821                     sizeof(AddrRange) );
    822    base = 0;
    823    while (True) {
    824       /* Read a (host-)word pair.  This is something of a hack since
    825          the word size to read is really dictated by the ELF file;
    826          however, we assume we're reading a file with the same
    827          word-sizeness as the host.  Reasonably enough. */
    828       UWord w1 = get_UWord( &ranges );
    829       UWord w2 = get_UWord( &ranges );
    830 
    831       if (w1 == 0 && w2 == 0)
    832          break; /* end of list. */
    833 
    834       if (w1 == -1UL) {
    835          /* new value for 'base' */
    836          base = w2;
    837          continue;
    838       }
    839 
    840       /* else enumerate [w1+base, w2+base) */
    841       /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
    842          (sec 2.17.2) */
    843       if (w1 > w2)
    844          cc->barf( "negative range in .debug_ranges section" );
    845       if (w1 < w2) {
    846          pair.aMin = w1     + base + svma_of_referencing_CU;
    847          pair.aMax = w2 - 1 + base + svma_of_referencing_CU;
    848          vg_assert(pair.aMin <= pair.aMax);
    849          VG_(addToXA)( xa, &pair );
    850       }
    851    }
    852    return xa;
    853 }
    854 
    855 
    856 /* Parse the Compilation Unit header indicated at 'c' and
    857    initialise 'cc' accordingly. */
    858 static __attribute__((noinline))
    859 void parse_CU_Header ( /*OUT*/CUConst* cc,
    860                        Bool td3,
    861                        Cursor* c,
    862                        DiSlice escn_debug_abbv,
    863 		       Bool type_unit,
    864                        Bool alt_info )
    865 {
    866    UChar  address_size;
    867    ULong  debug_abbrev_offset;
    868    Int    i;
    869 
    870    VG_(memset)(cc, 0, sizeof(*cc));
    871    vg_assert(c && c->barf);
    872    cc->barf = c->barf;
    873 
    874    /* initial_length field */
    875    cc->unit_length
    876       = get_Initial_Length( &cc->is_dw64, c,
    877            "parse_CU_Header: invalid initial-length field" );
    878 
    879    TRACE_D3("   Length:        %lld\n", cc->unit_length );
    880 
    881    /* version */
    882    cc->version = get_UShort( c );
    883    if (cc->version != 2 && cc->version != 3 && cc->version != 4)
    884       cc->barf( "parse_CU_Header: is neither DWARF2 nor DWARF3 nor DWARF4" );
    885    TRACE_D3("   Version:       %d\n", (Int)cc->version );
    886 
    887    /* debug_abbrev_offset */
    888    debug_abbrev_offset = get_Dwarfish_UWord( c, cc->is_dw64 );
    889    if (debug_abbrev_offset >= escn_debug_abbv.szB)
    890       cc->barf( "parse_CU_Header: invalid debug_abbrev_offset" );
    891    TRACE_D3("   Abbrev Offset: %lld\n", debug_abbrev_offset );
    892 
    893    /* address size.  If this isn't equal to the host word size, just
    894       give up.  This makes it safe to assume elsewhere that
    895       DW_FORM_addr and DW_FORM_ref_addr can be treated as a host
    896       word. */
    897    address_size = get_UChar( c );
    898    if (address_size != sizeof(void*))
    899       cc->barf( "parse_CU_Header: invalid address_size" );
    900    TRACE_D3("   Pointer Size:  %d\n", (Int)address_size );
    901 
    902    cc->is_type_unit = type_unit;
    903    cc->is_alt_info = alt_info;
    904 
    905    if (type_unit) {
    906       cc->type_signature = get_ULong( c );
    907       cc->type_offset = get_Dwarfish_UWord( c, cc->is_dw64 );
    908    }
    909 
    910    /* Set up cc->debug_abbv to point to the relevant table for this
    911       CU.  Set its .szB so that at least we can't read off the end of
    912       the debug_abbrev section -- potentially (and quite likely) too
    913       big, if this isn't the last table in the section, but at least
    914       it's safe.
    915 
    916       This amounts to taking debug_abbv_escn and moving the start
    917       position along by debug_abbrev_offset bytes, hence forming a
    918       smaller DiSlice which has the same end point.  Since we checked
    919       just above that debug_abbrev_offset is less than the size of
    920       debug_abbv_escn, this should leave us with a nonempty slice. */
    921    vg_assert(debug_abbrev_offset < escn_debug_abbv.szB);
    922    cc->debug_abbv      = escn_debug_abbv;
    923    cc->debug_abbv.ioff += debug_abbrev_offset;
    924    cc->debug_abbv.szB  -= debug_abbrev_offset;
    925 
    926    /* and empty out the set_abbv_Cursor cache */
    927    if (0) VG_(printf)("XXXXXX initialise set_abbv_Cursor cache\n");
    928    for (i = 0; i < N_ABBV_CACHE; i++) {
    929       cc->saC_cache[i].abbv_code = (ULong)-1; /* unused */
    930       cc->saC_cache[i].posn = 0;
    931    }
    932    cc->saC_cache_queries = 0;
    933    cc->saC_cache_misses = 0;
    934 }
    935 
    936 
    937 /* Set up 'c' so it is ready to parse the abbv table entry code
    938    'abbv_code' for this compilation unit.  */
    939 static __attribute__((noinline))
    940 void set_abbv_Cursor ( /*OUT*/Cursor* c, Bool td3,
    941                        CUConst* cc, ULong abbv_code )
    942 {
    943    Int   i;
    944    ULong acode;
    945 
    946    if (abbv_code == 0)
    947       cc->barf("set_abbv_Cursor: abbv_code == 0" );
    948 
    949    /* (ULong)-1 is used to represent an empty cache slot.  So we can't
    950       allow it.  In any case no valid DWARF3 should make a reference
    951       to a negative abbreviation code.  [at least, they always seem to
    952       be numbered upwards from zero as far as I have seen] */
    953    vg_assert(abbv_code != (ULong)-1);
    954 
    955    /* First search the cache. */
    956    if (0) VG_(printf)("XXXXXX search set_abbv_Cursor cache\n");
    957    cc->saC_cache_queries++;
    958    for (i = 0; i < N_ABBV_CACHE; i++) {
    959       /* No need to test the cached abbv_codes for -1 (empty), since
    960          we just asserted that abbv_code is not -1. */
    961       if (LIKELY(cc->saC_cache[i].abbv_code == abbv_code)) {
    962          /* Found it.  Set up the parser using the cached position,
    963             and move this cache entry to the front. */
    964          if (0) VG_(printf)("XXXXXX found in set_abbv_Cursor cache\n");
    965          init_Cursor( c, cc->debug_abbv, cc->saC_cache[i].posn,
    966                       cc->barf,
    967                       "Overrun whilst parsing .debug_abbrev section(1)" );
    968          if (i > 0) {
    969             ULong t_abbv_code = cc->saC_cache[i].abbv_code;
    970             UWord t_posn      = cc->saC_cache[i].posn;
    971             while (i > 0) {
    972                cc->saC_cache[i] = cc->saC_cache[i-1];
    973                i--;
    974             }
    975             cc->saC_cache[0].abbv_code = t_abbv_code;
    976             cc->saC_cache[0].posn      = t_posn;
    977          }
    978          return;
    979       }
    980    }
    981 
    982    /* No.  It's not in the cache.  We have to search through
    983       .debug_abbrev, of course taking care to update the cache
    984       when done. */
    985 
    986    cc->saC_cache_misses++;
    987    init_Cursor( c, cc->debug_abbv, 0, cc->barf,
    988                "Overrun whilst parsing .debug_abbrev section(2)" );
    989 
    990    /* Now iterate though the table until we find the requested
    991       entry. */
    992    while (True) {
    993       //ULong atag;
    994       //UInt  has_children;
    995       acode = get_ULEB128( c );
    996       if (acode == 0) break; /* end of the table */
    997       if (acode == abbv_code) break; /* found it */
    998       /*atag         = */ get_ULEB128( c );
    999       /*has_children = */ get_UChar( c );
   1000       //TRACE_D3("   %llu      %s    [%s]\n",
   1001       //         acode, pp_DW_TAG(atag), pp_DW_children(has_children));
   1002       while (True) {
   1003          ULong at_name = get_ULEB128( c );
   1004          ULong at_form = get_ULEB128( c );
   1005          if (at_name == 0 && at_form == 0) break;
   1006          //TRACE_D3("    %18s %s\n",
   1007          //         pp_DW_AT(at_name), pp_DW_FORM(at_form));
   1008       }
   1009    }
   1010 
   1011    if (acode == 0) {
   1012       /* Not found.  This is fatal. */
   1013       cc->barf("set_abbv_Cursor: abbv_code not found");
   1014    }
   1015 
   1016    /* Otherwise, 'c' is now set correctly to parse the relevant entry,
   1017       starting from the abbreviation entry's tag.  So just cache
   1018       the result, and return. */
   1019    for (i = N_ABBV_CACHE-1; i > N_ABBV_CACHE/2; i--) {
   1020       cc->saC_cache[i] = cc->saC_cache[i-1];
   1021    }
   1022    if (0) VG_(printf)("XXXXXX update set_abbv_Cursor cache\n");
   1023    cc->saC_cache[N_ABBV_CACHE/2].abbv_code = abbv_code;
   1024    cc->saC_cache[N_ABBV_CACHE/2].posn = get_position_of_Cursor(c);
   1025 }
   1026 
   1027 /* This represents a single signatured type.  It maps a type signature
   1028    (a ULong) to a cooked DIE offset.  Objects of this type are stored
   1029    in the type signature hash table.  */
   1030 typedef
   1031    struct D3SignatureType {
   1032       struct D3SignatureType *next;
   1033       UWord data;
   1034       ULong type_signature;
   1035       UWord die;
   1036    }
   1037    D3SignatureType;
   1038 
   1039 /* Record a signatured type in the hash table.  */
   1040 static void record_signatured_type ( VgHashTable tab,
   1041                                      ULong type_signature,
   1042                                      UWord die )
   1043 {
   1044    D3SignatureType *dstype = ML_(dinfo_zalloc) ( "di.readdwarf3.sigtype",
   1045                                                  sizeof(D3SignatureType) );
   1046    dstype->data = (UWord) type_signature;
   1047    dstype->type_signature = type_signature;
   1048    dstype->die = die;
   1049    VG_(HT_add_node) ( tab, dstype );
   1050 }
   1051 
   1052 /* Given a type signature hash table and a type signature, return the
   1053    cooked DIE offset of the type.  If the type cannot be found, call
   1054    BARF.  */
   1055 static UWord lookup_signatured_type ( VgHashTable tab,
   1056                                       ULong type_signature,
   1057                                       void (*barf)( const HChar* ) __attribute__((noreturn)) )
   1058 {
   1059    D3SignatureType *dstype = VG_(HT_lookup) ( tab, (UWord) type_signature );
   1060    /* This may be unwarranted chumminess with the hash table
   1061       implementation.  */
   1062    while ( dstype != NULL && dstype->type_signature != type_signature)
   1063       dstype = dstype->next;
   1064    if (dstype == NULL) {
   1065       barf("lookup_signatured_type: could not find signatured type");
   1066       /*NOTREACHED*/
   1067       vg_assert(0);
   1068    }
   1069    return dstype->die;
   1070 }
   1071 
   1072 
   1073 /* Represents Form data.  If szB is 1/2/4/8 then the result is in the
   1074    lowest 1/2/4/8 bytes of u.val.  If szB is zero or negative then the
   1075    result is an image section beginning at u.cur and with size -szB.
   1076    No other szB values are allowed. */
   1077 typedef
   1078    struct {
   1079       Long szB; // 1, 2, 4, 8 or non-positive values only.
   1080       union { ULong val; DiCursor cur; } u;
   1081    }
   1082    FormContents;
   1083 
   1084 /* From 'c', get the Form data into 'cts'.  Either it gets a 1/2/4/8
   1085    byte scalar value, or (a reference to) zero or more bytes starting
   1086    at a DiCursor.*/
   1087 static
   1088 void get_Form_contents ( /*OUT*/FormContents* cts,
   1089                          CUConst* cc, Cursor* c,
   1090                          Bool td3, DW_FORM form )
   1091 {
   1092    VG_(bzero_inline)(cts, sizeof(*cts));
   1093    switch (form) {
   1094       case DW_FORM_data1:
   1095          cts->u.val = (ULong)(UChar)get_UChar(c);
   1096          cts->szB   = 1;
   1097          TRACE_D3("%u", (UInt)cts->u.val);
   1098          break;
   1099       case DW_FORM_data2:
   1100          cts->u.val = (ULong)(UShort)get_UShort(c);
   1101          cts->szB   = 2;
   1102          TRACE_D3("%u", (UInt)cts->u.val);
   1103          break;
   1104       case DW_FORM_data4:
   1105          cts->u.val = (ULong)(UInt)get_UInt(c);
   1106          cts->szB   = 4;
   1107          TRACE_D3("%u", (UInt)cts->u.val);
   1108          break;
   1109       case DW_FORM_data8:
   1110          cts->u.val = get_ULong(c);
   1111          cts->szB   = 8;
   1112          TRACE_D3("%llu", cts->u.val);
   1113          break;
   1114       case DW_FORM_sec_offset:
   1115          cts->u.val = (ULong)get_Dwarfish_UWord( c, cc->is_dw64 );
   1116          cts->szB   = cc->is_dw64 ? 8 : 4;
   1117          TRACE_D3("%llu", cts->u.val);
   1118          break;
   1119       case DW_FORM_sdata:
   1120          cts->u.val = (ULong)(Long)get_SLEB128(c);
   1121          cts->szB   = 8;
   1122          TRACE_D3("%lld", (Long)cts->u.val);
   1123          break;
   1124       case DW_FORM_udata:
   1125          cts->u.val = (ULong)(Long)get_ULEB128(c);
   1126          cts->szB   = 8;
   1127          TRACE_D3("%llu", (Long)cts->u.val);
   1128          break;
   1129       case DW_FORM_addr:
   1130          /* note, this is a hack.  DW_FORM_addr is defined as getting
   1131             a word the size of the target machine as defined by the
   1132             address_size field in the CU Header.  However,
   1133             parse_CU_Header() rejects all inputs except those for
   1134             which address_size == sizeof(Word), hence we can just
   1135             treat it as a (host) Word.  */
   1136          cts->u.val = (ULong)(UWord)get_UWord(c);
   1137          cts->szB   = sizeof(UWord);
   1138          TRACE_D3("0x%lx", (UWord)cts->u.val);
   1139          break;
   1140 
   1141       case DW_FORM_ref_addr:
   1142          /* We make the same word-size assumption as DW_FORM_addr. */
   1143          /* What does this really mean?  From D3 Sec 7.5.4,
   1144             description of "reference", it would appear to reference
   1145             some other DIE, by specifying the offset from the
   1146             beginning of a .debug_info section.  The D3 spec mentions
   1147             that this might be in some other shared object and
   1148             executable.  But I don't see how the name of the other
   1149             object/exe is specified.
   1150 
   1151             At least for the DW_FORM_ref_addrs created by icc11, the
   1152             references seem to be within the same object/executable.
   1153             So for the moment we merely range-check, to see that they
   1154             actually do specify a plausible offset within this
   1155             object's .debug_info, and return the value unchanged.
   1156 
   1157             In DWARF 2, DW_FORM_ref_addr is address-sized, but in
   1158             DWARF 3 and later, it is offset-sized.
   1159          */
   1160          if (cc->version == 2) {
   1161             cts->u.val = (ULong)(UWord)get_UWord(c);
   1162             cts->szB   = sizeof(UWord);
   1163          } else {
   1164             cts->u.val = get_Dwarfish_UWord(c, cc->is_dw64);
   1165             cts->szB   = cc->is_dw64 ? sizeof(ULong) : sizeof(UInt);
   1166          }
   1167          TRACE_D3("0x%lx", (UWord)cts->u.val);
   1168          if (0) VG_(printf)("DW_FORM_ref_addr 0x%lx\n", (UWord)cts->u.val);
   1169          if (/* the following is surely impossible, but ... */
   1170              !ML_(sli_is_valid)(cc->escn_debug_info)
   1171              || cts->u.val >= (ULong)cc->escn_debug_info.szB) {
   1172             /* Hmm.  Offset is nonsensical for this object's .debug_info
   1173                section.  Be safe and reject it. */
   1174             cc->barf("get_Form_contents: DW_FORM_ref_addr points "
   1175                      "outside .debug_info");
   1176          }
   1177          break;
   1178 
   1179       case DW_FORM_strp: {
   1180          /* this is an offset into .debug_str */
   1181          UWord uw = (UWord)get_Dwarfish_UWord( c, cc->is_dw64 );
   1182          if (!ML_(sli_is_valid)(cc->escn_debug_str)
   1183              || uw >= cc->escn_debug_str.szB)
   1184             cc->barf("get_Form_contents: DW_FORM_strp "
   1185                      "points outside .debug_str");
   1186          /* FIXME: check the entire string lies inside debug_str,
   1187             not just the first byte of it. */
   1188          DiCursor str
   1189             = ML_(cur_plus)( ML_(cur_from_sli)(cc->escn_debug_str), uw );
   1190          if (td3) {
   1191             HChar* tmp = ML_(cur_read_strdup)(str, "di.getFC.1");
   1192             TRACE_D3("(indirect string, offset: 0x%lx): %s", uw, tmp);
   1193             ML_(dinfo_free)(tmp);
   1194          }
   1195          cts->u.cur = str;
   1196          cts->szB   = - (Long)(1 + (ULong)ML_(cur_strlen)(str));
   1197          break;
   1198       }
   1199       case DW_FORM_string: {
   1200          DiCursor str = get_AsciiZ(c);
   1201          if (td3) {
   1202             HChar* tmp = ML_(cur_read_strdup)(str, "di.getFC.2");
   1203             TRACE_D3("%s", tmp);
   1204             ML_(dinfo_free)(tmp);
   1205          }
   1206          cts->u.cur = str;
   1207          /* strlen is safe because get_AsciiZ already 'vetted' the
   1208             entire string */
   1209          cts->szB   = - (Long)(1 + (ULong)ML_(cur_strlen)(str));
   1210          break;
   1211       }
   1212       case DW_FORM_ref1: {
   1213          UChar u8   = get_UChar(c);
   1214          UWord res  = cc->cu_start_offset + (UWord)u8;
   1215          cts->u.val = (ULong)res;
   1216          cts->szB   = sizeof(UWord);
   1217          TRACE_D3("<%lx>", res);
   1218          break;
   1219       }
   1220       case DW_FORM_ref2: {
   1221          UShort u16 = get_UShort(c);
   1222          UWord  res = cc->cu_start_offset + (UWord)u16;
   1223          cts->u.val = (ULong)res;
   1224          cts->szB   = sizeof(UWord);
   1225          TRACE_D3("<%lx>", res);
   1226          break;
   1227       }
   1228       case DW_FORM_ref4: {
   1229          UInt  u32  = get_UInt(c);
   1230          UWord res  = cc->cu_start_offset + (UWord)u32;
   1231          cts->u.val = (ULong)res;
   1232          cts->szB   = sizeof(UWord);
   1233          TRACE_D3("<%lx>", res);
   1234          break;
   1235       }
   1236       case DW_FORM_ref8: {
   1237          ULong u64  = get_ULong(c);
   1238          UWord res  = cc->cu_start_offset + (UWord)u64;
   1239          cts->u.val = (ULong)res;
   1240          cts->szB   = sizeof(UWord);
   1241          TRACE_D3("<%lx>", res);
   1242          break;
   1243       }
   1244       case DW_FORM_ref_udata: {
   1245          ULong u64  = get_ULEB128(c);
   1246          UWord res  = cc->cu_start_offset + (UWord)u64;
   1247          cts->u.val = (ULong)res;
   1248          cts->szB   = sizeof(UWord);
   1249          TRACE_D3("<%lx>", res);
   1250          break;
   1251       }
   1252       case DW_FORM_flag: {
   1253          UChar u8 = get_UChar(c);
   1254          TRACE_D3("%u", (UInt)u8);
   1255          cts->u.val = (ULong)u8;
   1256          cts->szB   = 1;
   1257          break;
   1258       }
   1259       case DW_FORM_flag_present:
   1260          TRACE_D3("1");
   1261          cts->u.val = 1;
   1262          cts->szB   = 1;
   1263          break;
   1264       case DW_FORM_block1: {
   1265          ULong    u64b;
   1266          ULong    u64   = (ULong)get_UChar(c);
   1267          DiCursor block = get_DiCursor_from_Cursor(c);
   1268          TRACE_D3("%llu byte block: ", u64);
   1269          for (u64b = u64; u64b > 0; u64b--) {
   1270             UChar u8 = get_UChar(c);
   1271             TRACE_D3("%x ", (UInt)u8);
   1272          }
   1273          cts->u.cur = block;
   1274          cts->szB   = - (Long)u64;
   1275          break;
   1276       }
   1277       case DW_FORM_block2: {
   1278          ULong    u64b;
   1279          ULong    u64   = (ULong)get_UShort(c);
   1280          DiCursor block = get_DiCursor_from_Cursor(c);
   1281          TRACE_D3("%llu byte block: ", u64);
   1282          for (u64b = u64; u64b > 0; u64b--) {
   1283             UChar u8 = get_UChar(c);
   1284             TRACE_D3("%x ", (UInt)u8);
   1285          }
   1286          cts->u.cur = block;
   1287          cts->szB   = - (Long)u64;
   1288          break;
   1289       }
   1290       case DW_FORM_block4: {
   1291          ULong    u64b;
   1292          ULong    u64   = (ULong)get_UInt(c);
   1293          DiCursor block = get_DiCursor_from_Cursor(c);
   1294          TRACE_D3("%llu byte block: ", u64);
   1295          for (u64b = u64; u64b > 0; u64b--) {
   1296             UChar u8 = get_UChar(c);
   1297             TRACE_D3("%x ", (UInt)u8);
   1298          }
   1299          cts->u.cur = block;
   1300          cts->szB   = - (Long)u64;
   1301          break;
   1302       }
   1303       case DW_FORM_exprloc:
   1304       case DW_FORM_block: {
   1305          ULong    u64b;
   1306          ULong    u64   = (ULong)get_ULEB128(c);
   1307          DiCursor block = get_DiCursor_from_Cursor(c);
   1308          TRACE_D3("%llu byte block: ", u64);
   1309          for (u64b = u64; u64b > 0; u64b--) {
   1310             UChar u8 = get_UChar(c);
   1311             TRACE_D3("%x ", (UInt)u8);
   1312          }
   1313          cts->u.cur = block;
   1314          cts->szB   = - (Long)u64;
   1315          break;
   1316       }
   1317       case DW_FORM_ref_sig8: {
   1318          ULong  u64b;
   1319          ULong  signature = get_ULong (c);
   1320          ULong  work = signature;
   1321          TRACE_D3("8 byte signature: ");
   1322          for (u64b = 8; u64b > 0; u64b--) {
   1323             UChar u8 = work & 0xff;
   1324             TRACE_D3("%x ", (UInt)u8);
   1325             work >>= 8;
   1326          }
   1327          /* Due to the way that the hash table is constructed, the
   1328             resulting DIE offset here is already "cooked".  See
   1329             cook_die_using_form.  */
   1330          cts->u.val = lookup_signatured_type (cc->signature_types, signature,
   1331                                               c->barf);
   1332          cts->szB   = sizeof(UWord);
   1333          break;
   1334       }
   1335       case DW_FORM_indirect:
   1336          get_Form_contents (cts, cc, c, td3, (DW_FORM)get_ULEB128(c));
   1337          return;
   1338 
   1339       case DW_FORM_GNU_ref_alt:
   1340          cts->u.val = get_Dwarfish_UWord(c, cc->is_dw64);
   1341          cts->szB   = cc->is_dw64 ? sizeof(ULong) : sizeof(UInt);
   1342          TRACE_D3("0x%lx", (UWord)cts->u.val);
   1343          if (0) VG_(printf)("DW_FORM_GNU_ref_alt 0x%lx\n", (UWord)cts->u.val);
   1344          if (/* the following is surely impossible, but ... */
   1345              !ML_(sli_is_valid)(cc->escn_debug_info_alt)
   1346              || cts->u.val >= (ULong)cc->escn_debug_info_alt.szB) {
   1347             /* Hmm.  Offset is nonsensical for this object's .debug_info
   1348                section.  Be safe and reject it. */
   1349             cc->barf("get_Form_contents: DW_FORM_ref_addr points "
   1350                      "outside alternate .debug_info");
   1351          }
   1352          break;
   1353 
   1354       case DW_FORM_GNU_strp_alt: {
   1355          /* this is an offset into alternate .debug_str */
   1356          SizeT uw = (UWord)get_Dwarfish_UWord( c, cc->is_dw64 );
   1357          if (!ML_(sli_is_valid)(cc->escn_debug_str_alt)
   1358              || uw >= cc->escn_debug_str_alt.szB)
   1359             cc->barf("get_Form_contents: DW_FORM_GNU_strp_alt "
   1360                      "points outside alternate .debug_str");
   1361          /* FIXME: check the entire string lies inside debug_str,
   1362             not just the first byte of it. */
   1363          DiCursor str
   1364             = ML_(cur_plus)( ML_(cur_from_sli)(cc->escn_debug_str_alt), uw);
   1365          if (td3) {
   1366             HChar* tmp = ML_(cur_read_strdup)(str, "di.getFC.3");
   1367             TRACE_D3("(indirect alt string, offset: 0x%lx): %s", uw, tmp);
   1368             ML_(dinfo_free)(tmp);
   1369          }
   1370          cts->u.cur = str;
   1371          cts->szB   = - (Long)(1 + (ULong)ML_(cur_strlen)(str));
   1372          break;
   1373       }
   1374 
   1375       default:
   1376          VG_(printf)(
   1377             "get_Form_contents: unhandled %d (%s) at <%llx>\n",
   1378             form, ML_(pp_DW_FORM)(form), get_position_of_Cursor(c));
   1379          c->barf("get_Form_contents: unhandled DW_FORM");
   1380    }
   1381 }
   1382 
   1383 
   1384 /*------------------------------------------------------------*/
   1385 /*---                                                      ---*/
   1386 /*--- Parsing of variable-related DIEs                     ---*/
   1387 /*---                                                      ---*/
   1388 /*------------------------------------------------------------*/
   1389 
   1390 typedef
   1391    struct _TempVar {
   1392       HChar*  name; /* in DebugInfo's .strchunks */
   1393       /* Represent ranges economically.  nRanges is the number of
   1394          ranges.  Cases:
   1395          0: .rngOneMin .rngOneMax .manyRanges are all zero
   1396          1: .rngOneMin .rngOneMax hold the range; .rngMany is NULL
   1397          2: .rngOneMin .rngOneMax are zero; .rngMany holds the ranges.
   1398          This is merely an optimisation to avoid having to allocate
   1399          and free the XArray in the common (98%) of cases where there
   1400          is zero or one address ranges. */
   1401       UWord   nRanges;
   1402       Addr    rngOneMin;
   1403       Addr    rngOneMax;
   1404       XArray* rngMany; /* of AddrRange.  NON-UNIQUE PTR in AR_DINFO. */
   1405       /* Do not free .rngMany, since many TempVars will have the same
   1406          value.  Instead the associated storage is to be freed by
   1407          deleting 'rangetree', which stores a single copy of each
   1408          range. */
   1409       /* --- */
   1410       Int     level;
   1411       UWord   typeR; /* a cuOff */
   1412       GExpr*  gexpr; /* for this variable */
   1413       GExpr*  fbGX;  /* to find the frame base of the enclosing fn, if
   1414                         any */
   1415       HChar*  fName; /* declaring file name, or NULL */
   1416       Int     fLine; /* declaring file line number, or zero */
   1417       /* offset in .debug_info, so that abstract instances can be
   1418          found to satisfy references from concrete instances. */
   1419       UWord   dioff;
   1420       UWord   absOri; /* so the absOri fields refer to dioff fields
   1421                          in some other, related TempVar. */
   1422    }
   1423    TempVar;
   1424 
   1425 #define N_D3_VAR_STACK 48
   1426 
   1427 typedef
   1428    struct {
   1429       /* Contains the range stack: a stack of address ranges, one
   1430          stack entry for each nested scope.
   1431 
   1432          Some scope entries are created by function definitions
   1433          (DW_AT_subprogram), and for those, we also note the GExpr
   1434          derived from its DW_AT_frame_base attribute, if any.
   1435          Consequently it should be possible to find, for any
   1436          variable's DIE, the GExpr for the the containing function's
   1437          DW_AT_frame_base by scanning back through the stack to find
   1438          the nearest entry associated with a function.  This somewhat
   1439          elaborate scheme is provided so as to make it possible to
   1440          obtain the correct DW_AT_frame_base expression even in the
   1441          presence of nested functions (or to be more precise, in the
   1442          presence of nested DW_AT_subprogram DIEs).
   1443       */
   1444       Int     sp; /* [sp] is innermost active entry; sp==-1 for empty
   1445                      stack */
   1446       XArray* ranges[N_D3_VAR_STACK]; /* XArray of AddrRange */
   1447       Int     level[N_D3_VAR_STACK];  /* D3 DIE levels */
   1448       Bool    isFunc[N_D3_VAR_STACK]; /* from DW_AT_subprogram? */
   1449       GExpr*  fbGX[N_D3_VAR_STACK];   /* if isFunc, contains the FB
   1450                                          expr, else NULL */
   1451       /* The file name table.  Is a mapping from integer index to the
   1452          (permanent) copy of the string in in DebugInfo's .strchunks. */
   1453       XArray* /* of UChar* */ filenameTable;
   1454    }
   1455    D3VarParser;
   1456 
   1457 static void varstack_show ( D3VarParser* parser, const HChar* str ) {
   1458    Word i, j;
   1459    VG_(printf)("  varstack (%s) {\n", str);
   1460    for (i = 0; i <= parser->sp; i++) {
   1461       XArray* xa = parser->ranges[i];
   1462       vg_assert(xa);
   1463       VG_(printf)("    [%ld] (level %d)", i, parser->level[i]);
   1464       if (parser->isFunc[i]) {
   1465          VG_(printf)(" (fbGX=%p)", parser->fbGX[i]);
   1466       } else {
   1467          vg_assert(parser->fbGX[i] == NULL);
   1468       }
   1469       VG_(printf)(": ");
   1470       if (VG_(sizeXA)( xa ) == 0) {
   1471          VG_(printf)("** empty PC range array **");
   1472       } else {
   1473          for (j = 0; j < VG_(sizeXA)( xa ); j++) {
   1474             AddrRange* range = (AddrRange*) VG_(indexXA)( xa, j );
   1475             vg_assert(range);
   1476             VG_(printf)("[%#lx,%#lx] ", range->aMin, range->aMax);
   1477          }
   1478       }
   1479       VG_(printf)("\n");
   1480    }
   1481    VG_(printf)("  }\n");
   1482 }
   1483 
   1484 /* Remove from the stack, all entries with .level > 'level' */
   1485 static
   1486 void varstack_preen ( D3VarParser* parser, Bool td3, Int level )
   1487 {
   1488    Bool changed = False;
   1489    vg_assert(parser->sp < N_D3_VAR_STACK);
   1490    while (True) {
   1491       vg_assert(parser->sp >= -1);
   1492       if (parser->sp == -1) break;
   1493       if (parser->level[parser->sp] <= level) break;
   1494       if (0)
   1495          TRACE_D3("BBBBAAAA varstack_pop [newsp=%d]\n", parser->sp-1);
   1496       vg_assert(parser->ranges[parser->sp]);
   1497       /* Who allocated this xa?  get_range_list() or
   1498          unitary_range_list(). */
   1499       VG_(deleteXA)( parser->ranges[parser->sp] );
   1500       parser->ranges[parser->sp] = NULL;
   1501       parser->level[parser->sp]  = 0;
   1502       parser->isFunc[parser->sp] = False;
   1503       parser->fbGX[parser->sp]   = NULL;
   1504       parser->sp--;
   1505       changed = True;
   1506    }
   1507    if (changed && td3)
   1508       varstack_show( parser, "after preen" );
   1509 }
   1510 
   1511 static void varstack_push ( CUConst* cc,
   1512                             D3VarParser* parser,
   1513                             Bool td3,
   1514                             XArray* ranges, Int level,
   1515                             Bool    isFunc, GExpr* fbGX ) {
   1516    if (0)
   1517    TRACE_D3("BBBBAAAA varstack_push[newsp=%d]: %d  %p\n",
   1518             parser->sp+1, level, ranges);
   1519 
   1520    /* First we need to zap everything >= 'level', as we are about to
   1521       replace any previous entry at 'level', so .. */
   1522    varstack_preen(parser, /*td3*/False, level-1);
   1523 
   1524    vg_assert(parser->sp >= -1);
   1525    vg_assert(parser->sp < N_D3_VAR_STACK);
   1526    if (parser->sp == N_D3_VAR_STACK-1)
   1527       cc->barf("varstack_push: N_D3_VAR_STACK is too low; "
   1528                "increase and recompile");
   1529    if (parser->sp >= 0)
   1530       vg_assert(parser->level[parser->sp] < level);
   1531    parser->sp++;
   1532    vg_assert(parser->ranges[parser->sp] == NULL);
   1533    vg_assert(parser->level[parser->sp]  == 0);
   1534    vg_assert(parser->isFunc[parser->sp] == False);
   1535    vg_assert(parser->fbGX[parser->sp]   == NULL);
   1536    vg_assert(ranges != NULL);
   1537    if (!isFunc) vg_assert(fbGX == NULL);
   1538    parser->ranges[parser->sp] = ranges;
   1539    parser->level[parser->sp]  = level;
   1540    parser->isFunc[parser->sp] = isFunc;
   1541    parser->fbGX[parser->sp]   = fbGX;
   1542    if (td3)
   1543       varstack_show( parser, "after push" );
   1544 }
   1545 
   1546 
   1547 /* cts is derived from a DW_AT_location and so refers either to a
   1548    location expression or to a location list.  Figure out which, and
   1549    in both cases bundle the expression or location list into a
   1550    so-called GExpr (guarded expression). */
   1551 __attribute__((noinline))
   1552 static GExpr* get_GX ( CUConst* cc, Bool td3, const FormContents* cts )
   1553 {
   1554    GExpr* gexpr = NULL;
   1555    if (cts->szB < 0) {
   1556       /* represents a non-empty in-line location expression, and
   1557          cts->u.cur points at the image bytes */
   1558       gexpr = make_singleton_GX( cts->u.cur, (ULong)(- cts->szB) );
   1559    }
   1560    else
   1561    if (cts->szB > 0) {
   1562       /* represents a location list.  cts->u.val is the offset of it
   1563          in .debug_loc. */
   1564       if (!cc->cu_svma_known)
   1565          cc->barf("get_GX: location list, but CU svma is unknown");
   1566       gexpr = make_general_GX( cc, td3, cts->u.val, cc->cu_svma );
   1567    }
   1568    else {
   1569       vg_assert(0); /* else caller is bogus */
   1570    }
   1571    return gexpr;
   1572 }
   1573 
   1574 
   1575 static
   1576 void read_filename_table( /*MOD*/D3VarParser* parser,
   1577                           CUConst* cc, ULong debug_line_offset,
   1578                           Bool td3 )
   1579 {
   1580    Bool   is_dw64;
   1581    Cursor c;
   1582    Word   i;
   1583    UShort version;
   1584    UChar  opcode_base;
   1585    HChar* str;
   1586 
   1587    vg_assert(parser && cc && cc->barf);
   1588    if (!ML_(sli_is_valid)(cc->escn_debug_line)
   1589        || cc->escn_debug_line.szB <= debug_line_offset) {
   1590       cc->barf("read_filename_table: .debug_line is missing?");
   1591    }
   1592 
   1593    init_Cursor( &c, cc->escn_debug_line, debug_line_offset, cc->barf,
   1594                 "Overrun whilst reading .debug_line section(1)" );
   1595 
   1596    /* unit_length = */
   1597       get_Initial_Length( &is_dw64, &c,
   1598            "read_filename_table: invalid initial-length field" );
   1599    version = get_UShort( &c );
   1600    if (version != 2 && version != 3 && version != 4)
   1601      cc->barf("read_filename_table: Only DWARF version 2, 3 and 4 line info "
   1602               "is currently supported.");
   1603    /*header_length              = (ULong)*/ get_Dwarfish_UWord( &c, is_dw64 );
   1604    /*minimum_instruction_length = */ get_UChar( &c );
   1605    if (version >= 4)
   1606       /*maximum_operations_per_insn = */ get_UChar( &c );
   1607    /*default_is_stmt            = */ get_UChar( &c );
   1608    /*line_base                  = (Char)*/ get_UChar( &c );
   1609    /*line_range                 = */ get_UChar( &c );
   1610    opcode_base                = get_UChar( &c );
   1611    /* skip over "standard_opcode_lengths" */
   1612    for (i = 1; i < (Word)opcode_base; i++)
   1613      (void)get_UChar( &c );
   1614 
   1615    /* skip over the directory names table */
   1616    while (peek_UChar(&c) != 0) {
   1617      (void)get_AsciiZ(&c);
   1618    }
   1619    (void)get_UChar(&c); /* skip terminating zero */
   1620 
   1621    /* Read and record the file names table */
   1622    vg_assert(parser->filenameTable);
   1623    vg_assert( VG_(sizeXA)( parser->filenameTable ) == 0 );
   1624    /* Add a dummy index-zero entry.  DWARF3 numbers its files
   1625       from 1, for some reason. */
   1626    str = ML_(addStr)( cc->di, "<unknown_file>", -1 );
   1627    VG_(addToXA)( parser->filenameTable, &str );
   1628    while (peek_UChar(&c) != 0) {
   1629       DiCursor cur = get_AsciiZ(&c);
   1630       str = ML_(addStrFromCursor)( cc->di, cur );
   1631       TRACE_D3("  read_filename_table: %ld %s\n",
   1632                VG_(sizeXA)(parser->filenameTable), str);
   1633       VG_(addToXA)( parser->filenameTable, &str );
   1634       (void)get_ULEB128( &c ); /* skip directory index # */
   1635       (void)get_ULEB128( &c ); /* skip last mod time */
   1636       (void)get_ULEB128( &c ); /* file size */
   1637    }
   1638    /* We're done!  The rest of it is not interesting. */
   1639 }
   1640 
   1641 __attribute__((noinline))
   1642 static void bad_DIE_confusion(int linenr)
   1643 {
   1644    VG_(printf)("\nparse_var_DIE(%d): confused by:\n", linenr);
   1645 }
   1646 #define goto_bad_DIE do {bad_DIE_confusion(__LINE__); goto bad_DIE;} while (0)
   1647 
   1648 __attribute__((noinline))
   1649 static void parse_var_DIE (
   1650    /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree,
   1651    /*MOD*/XArray* /* of TempVar* */ tempvars,
   1652    /*MOD*/XArray* /* of GExpr* */ gexprs,
   1653    /*MOD*/D3VarParser* parser,
   1654    DW_TAG dtag,
   1655    UWord posn,
   1656    Int level,
   1657    Cursor* c_die,
   1658    Cursor* c_abbv,
   1659    CUConst* cc,
   1660    Bool td3
   1661 )
   1662 {
   1663    FormContents cts;
   1664 
   1665    UWord saved_die_c_offset  = get_position_of_Cursor( c_die );
   1666    UWord saved_abbv_c_offset = get_position_of_Cursor( c_abbv );
   1667    Bool  debug_types_flag;
   1668    Bool  alt_flag;
   1669 
   1670    varstack_preen( parser, td3, level-1 );
   1671 
   1672    if (dtag == DW_TAG_compile_unit
   1673        || dtag == DW_TAG_type_unit
   1674        || dtag == DW_TAG_partial_unit) {
   1675       Bool have_lo    = False;
   1676       Bool have_hi1   = False;
   1677       Bool hiIsRelative = False;
   1678       Bool have_range = False;
   1679       Addr ip_lo    = 0;
   1680       Addr ip_hi1   = 0;
   1681       Addr rangeoff = 0;
   1682       while (True) {
   1683          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   1684          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   1685          if (attr == 0 && form == 0) break;
   1686          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
   1687          if (attr == DW_AT_low_pc && cts.szB > 0) {
   1688             ip_lo   = cts.u.val;
   1689             have_lo = True;
   1690          }
   1691          if (attr == DW_AT_high_pc && cts.szB > 0) {
   1692             ip_hi1   = cts.u.val;
   1693             have_hi1 = True;
   1694             if (form != DW_FORM_addr)
   1695                hiIsRelative = True;
   1696          }
   1697          if (attr == DW_AT_ranges && cts.szB > 0) {
   1698             rangeoff   = cts.u.val;
   1699             have_range = True;
   1700          }
   1701          if (attr == DW_AT_stmt_list && cts.szB > 0) {
   1702             read_filename_table( parser, cc, cts.u.val, td3 );
   1703          }
   1704       }
   1705       if (have_lo && have_hi1 && hiIsRelative)
   1706          ip_hi1 += ip_lo;
   1707       /* Now, does this give us an opportunity to find this
   1708          CU's svma? */
   1709 #if 0
   1710       if (level == 0 && have_lo) {
   1711          vg_assert(!cc->cu_svma_known); /* if this fails, it must be
   1712          because we've already seen a DW_TAG_compile_unit DIE at level
   1713          0.  But that can't happen, because DWARF3 only allows exactly
   1714          one top level DIE per CU. */
   1715          cc->cu_svma_known = True;
   1716          cc->cu_svma = ip_lo;
   1717          if (1)
   1718             TRACE_D3("BBBBAAAA acquire CU_SVMA of %p\n", cc->cu_svma);
   1719          /* Now, it may be that this DIE doesn't tell us the CU's
   1720             SVMA, by way of not having a DW_AT_low_pc.  That's OK --
   1721             the CU doesn't *have* to have its SVMA specified.
   1722 
   1723             But as per last para D3 spec sec 3.1.1 ("Normal and
   1724             Partial Compilation Unit Entries", "If the base address
   1725             (viz, the SVMA) is undefined, then any DWARF entry of
   1726             structure defined interms of the base address of that
   1727             compilation unit is not valid.".  So that means, if whilst
   1728             processing the children of this top level DIE (or their
   1729             children, etc) we see a DW_AT_range, and cu_svma_known is
   1730             False, then the DIE that contains it is (per the spec)
   1731             invalid, and we can legitimately stop and complain. */
   1732       }
   1733 #else
   1734       /* .. whereas The Reality is, simply assume the SVMA is zero
   1735          if it isn't specified. */
   1736       if (level == 0) {
   1737          vg_assert(!cc->cu_svma_known);
   1738          cc->cu_svma_known = True;
   1739          if (have_lo)
   1740             cc->cu_svma = ip_lo;
   1741          else
   1742             cc->cu_svma = 0;
   1743       }
   1744 #endif
   1745       /* Do we have something that looks sane? */
   1746       if (have_lo && have_hi1 && (!have_range)) {
   1747          if (ip_lo < ip_hi1)
   1748             varstack_push( cc, parser, td3,
   1749                            unitary_range_list(ip_lo, ip_hi1 - 1),
   1750                            level,
   1751                            False/*isFunc*/, NULL/*fbGX*/ );
   1752          else if (ip_lo == 0 && ip_hi1 == 0)
   1753             /* CU has no code, presumably?
   1754                Such situations have been encountered for code
   1755                compiled with -ffunction-sections -fdata-sections
   1756                and linked with --gc-sections. Completely
   1757                eliminated CU gives such 0 lo/hi pc. Similarly
   1758                to a CU which has no lo/hi/range pc, we push
   1759                an empty range list. */
   1760             varstack_push( cc, parser, td3,
   1761                            empty_range_list(),
   1762                            level,
   1763                            False/*isFunc*/, NULL/*fbGX*/ );
   1764       } else
   1765       if ((!have_lo) && (!have_hi1) && have_range) {
   1766          varstack_push( cc, parser, td3,
   1767                         get_range_list( cc, td3,
   1768                                         rangeoff, cc->cu_svma ),
   1769                         level,
   1770                         False/*isFunc*/, NULL/*fbGX*/ );
   1771       } else
   1772       if ((!have_lo) && (!have_hi1) && (!have_range)) {
   1773          /* CU has no code, presumably? */
   1774          varstack_push( cc, parser, td3,
   1775                         empty_range_list(),
   1776                         level,
   1777                         False/*isFunc*/, NULL/*fbGX*/ );
   1778       } else
   1779       if (have_lo && (!have_hi1) && have_range && ip_lo == 0) {
   1780          /* broken DIE created by gcc-4.3.X ?  Ignore the
   1781             apparently-redundant DW_AT_low_pc and use the DW_AT_ranges
   1782             instead. */
   1783          varstack_push( cc, parser, td3,
   1784                         get_range_list( cc, td3,
   1785                                         rangeoff, cc->cu_svma ),
   1786                         level,
   1787                         False/*isFunc*/, NULL/*fbGX*/ );
   1788       } else {
   1789          if (0) VG_(printf)("I got hlo %d hhi1 %d hrange %d\n",
   1790                             (Int)have_lo, (Int)have_hi1, (Int)have_range);
   1791          goto_bad_DIE;
   1792       }
   1793    }
   1794 
   1795    if (dtag == DW_TAG_lexical_block || dtag == DW_TAG_subprogram) {
   1796       Bool   have_lo    = False;
   1797       Bool   have_hi1   = False;
   1798       Bool   have_range = False;
   1799       Bool   hiIsRelative = False;
   1800       Addr   ip_lo      = 0;
   1801       Addr   ip_hi1     = 0;
   1802       Addr   rangeoff   = 0;
   1803       Bool   isFunc     = dtag == DW_TAG_subprogram;
   1804       GExpr* fbGX       = NULL;
   1805       while (True) {
   1806          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   1807          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   1808          if (attr == 0 && form == 0) break;
   1809          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
   1810          if (attr == DW_AT_low_pc && cts.szB > 0) {
   1811             ip_lo   = cts.u.val;
   1812             have_lo = True;
   1813          }
   1814          if (attr == DW_AT_high_pc && cts.szB > 0) {
   1815             ip_hi1   = cts.u.val;
   1816             have_hi1 = True;
   1817             if (form != DW_FORM_addr)
   1818                hiIsRelative = True;
   1819          }
   1820          if (attr == DW_AT_ranges && cts.szB > 0) {
   1821             rangeoff   = cts.u.val;
   1822             have_range = True;
   1823          }
   1824          if (isFunc
   1825              && attr == DW_AT_frame_base
   1826              && cts.szB != 0 /* either scalar or nonempty block */) {
   1827             fbGX = get_GX( cc, False/*td3*/, &cts );
   1828             vg_assert(fbGX);
   1829             VG_(addToXA)(gexprs, &fbGX);
   1830          }
   1831       }
   1832       if (have_lo && have_hi1 && hiIsRelative)
   1833          ip_hi1 += ip_lo;
   1834       /* Do we have something that looks sane? */
   1835       if (dtag == DW_TAG_subprogram
   1836           && (!have_lo) && (!have_hi1) && (!have_range)) {
   1837          /* This is legit - ignore it. Sec 3.3.3: "A subroutine entry
   1838             representing a subroutine declaration that is not also a
   1839             definition does not have code address or range
   1840             attributes." */
   1841       } else
   1842       if (dtag == DW_TAG_lexical_block
   1843           && (!have_lo) && (!have_hi1) && (!have_range)) {
   1844          /* I believe this is legit, and means the lexical block
   1845             contains no insns (whatever that might mean).  Ignore. */
   1846       } else
   1847       if (have_lo && have_hi1 && (!have_range)) {
   1848          /* This scope supplies just a single address range. */
   1849          if (ip_lo < ip_hi1)
   1850             varstack_push( cc, parser, td3,
   1851                            unitary_range_list(ip_lo, ip_hi1 - 1),
   1852                            level, isFunc, fbGX );
   1853       } else
   1854       if ((!have_lo) && (!have_hi1) && have_range) {
   1855          /* This scope supplies multiple address ranges via the use of
   1856             a range list. */
   1857          varstack_push( cc, parser, td3,
   1858                         get_range_list( cc, td3,
   1859                                         rangeoff, cc->cu_svma ),
   1860                         level, isFunc, fbGX );
   1861       } else
   1862       if (have_lo && (!have_hi1) && (!have_range)) {
   1863          /* This scope is bogus.  The D3 spec sec 3.4 (Lexical Block
   1864             Entries) says fairly clearly that a scope must have either
   1865             _range or (_low_pc and _high_pc). */
   1866          /* The spec is a bit ambiguous though.  Perhaps a single byte
   1867             range is intended?  See sec 2.17 (Code Addresses And Ranges) */
   1868          /* This case is here because icc9 produced this:
   1869          <2><13bd>: DW_TAG_lexical_block
   1870             DW_AT_decl_line   : 5229
   1871             DW_AT_decl_column : 37
   1872             DW_AT_decl_file   : 1
   1873             DW_AT_low_pc      : 0x401b03
   1874          */
   1875          /* Ignore (seems safe than pushing a single byte range) */
   1876       } else
   1877          goto_bad_DIE;
   1878    }
   1879 
   1880    if (dtag == DW_TAG_variable || dtag == DW_TAG_formal_parameter) {
   1881       HChar* name        = NULL;
   1882       UWord  typeR       = D3_INVALID_CUOFF;
   1883       Bool   global      = False;
   1884       GExpr* gexpr       = NULL;
   1885       Int    n_attrs     = 0;
   1886       UWord  abs_ori     = (UWord)D3_INVALID_CUOFF;
   1887       Int    lineNo      = 0;
   1888       HChar* fileName    = NULL;
   1889       while (True) {
   1890          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   1891          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   1892          if (attr == 0 && form == 0) break;
   1893          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
   1894          n_attrs++;
   1895          if (attr == DW_AT_name && cts.szB < 0) {
   1896             name = ML_(addStrFromCursor)( cc->di, cts.u.cur );
   1897          }
   1898          if (attr == DW_AT_location
   1899              && cts.szB != 0 /* either scalar or nonempty block */) {
   1900             gexpr = get_GX( cc, False/*td3*/, &cts );
   1901             vg_assert(gexpr);
   1902             VG_(addToXA)(gexprs, &gexpr);
   1903          }
   1904          if (attr == DW_AT_type && cts.szB > 0) {
   1905             typeR = cook_die_using_form( cc, cts.u.val, form );
   1906          }
   1907          if (attr == DW_AT_external && cts.szB > 0 && cts.u.val > 0) {
   1908             global = True;
   1909          }
   1910          if (attr == DW_AT_abstract_origin && cts.szB > 0) {
   1911             abs_ori = (UWord)cts.u.val;
   1912          }
   1913          if (attr == DW_AT_declaration && cts.szB > 0 && cts.u.val > 0) {
   1914             /*declaration = True;*/
   1915          }
   1916          if (attr == DW_AT_decl_line && cts.szB > 0) {
   1917             lineNo = (Int)cts.u.val;
   1918          }
   1919          if (attr == DW_AT_decl_file && cts.szB > 0) {
   1920             Int ftabIx = (Int)cts.u.val;
   1921             if (ftabIx >= 1
   1922                 && ftabIx < VG_(sizeXA)( parser->filenameTable )) {
   1923                fileName = *(HChar**)
   1924                           VG_(indexXA)( parser->filenameTable, ftabIx );
   1925                vg_assert(fileName);
   1926             }
   1927             if (0) VG_(printf)("XXX filename = %s\n", fileName);
   1928          }
   1929       }
   1930       if (!global && dtag == DW_TAG_variable && level == 1) {
   1931          /* Case of a static variable. It is better to declare
   1932             it global as the variable is not really related to
   1933             a PC range, as its address can be used by program
   1934             counters outside of the ranges where it is visible . */
   1935          global = True;
   1936       }
   1937 
   1938       /* We'll collect it under if one of the following three
   1939          conditions holds:
   1940          (1) has location and type    -> completed
   1941          (2) has type only            -> is an abstract instance
   1942          (3) has location and abs_ori -> is a concrete instance
   1943          Name, filename and line number are all optional frills.
   1944       */
   1945       if ( /* 1 */ (gexpr && typeR != D3_INVALID_CUOFF)
   1946            /* 2 */ || (typeR != D3_INVALID_CUOFF)
   1947            /* 3 */ || (gexpr && abs_ori != (UWord)D3_INVALID_CUOFF) ) {
   1948 
   1949          /* Add this variable to the list of interesting looking
   1950             variables.  Crucially, note along with it the address
   1951             range(s) associated with the variable, which for locals
   1952             will be the address ranges at the top of the varparser's
   1953             stack. */
   1954          GExpr*   fbGX = NULL;
   1955          Word     i, nRanges;
   1956          XArray*  /* of AddrRange */ xa;
   1957          TempVar* tv;
   1958          /* Stack can't be empty; we put a dummy entry on it for the
   1959             entire address range before starting with the DIEs for
   1960             this CU. */
   1961          vg_assert(parser->sp >= 0);
   1962 
   1963          /* If this is a local variable (non-global), try to find
   1964             the GExpr for the DW_AT_frame_base of the containing
   1965             function.  It should have been pushed on the stack at the
   1966             time we encountered its DW_TAG_subprogram DIE, so the way
   1967             to find it is to scan back down the stack looking for it.
   1968             If there isn't an enclosing stack entry marked 'isFunc'
   1969             then we must be seeing variable or formal param DIEs
   1970             outside of a function, so we deem the Dwarf to be
   1971             malformed if that happens.  Note that the fbGX may be NULL
   1972             if the containing DT_TAG_subprogram didn't supply a
   1973             DW_AT_frame_base -- that's OK, but there must actually be
   1974             a containing DW_TAG_subprogram. */
   1975          if (!global) {
   1976             Bool found = False;
   1977             for (i = parser->sp; i >= 0; i--) {
   1978                if (parser->isFunc[i]) {
   1979                   fbGX = parser->fbGX[i];
   1980                   found = True;
   1981                   break;
   1982                }
   1983             }
   1984             if (!found) {
   1985                if (0 && VG_(clo_verbosity) >= 0) {
   1986                   VG_(message)(Vg_DebugMsg,
   1987                      "warning: parse_var_DIE: non-global variable "
   1988                      "outside DW_TAG_subprogram\n");
   1989                }
   1990                /* goto_bad_DIE; */
   1991                /* This seems to happen a lot.  Just ignore it -- if,
   1992                   when we come to evaluation of the location (guarded)
   1993                   expression, it requires a frame base value, and
   1994                   there's no expression for that, then evaluation as a
   1995                   whole will fail.  Harmless - a bit of a waste of
   1996                   cycles but nothing more. */
   1997             }
   1998          }
   1999 
   2000          /* re "global ? 0 : parser->sp" (twice), if the var is
   2001             marked 'global' then we must put it at the global scope,
   2002             as only the global scope (level 0) covers the entire PC
   2003             address space.  It is asserted elsewhere that level 0
   2004             always covers the entire address space. */
   2005          xa = parser->ranges[global ? 0 : parser->sp];
   2006          nRanges = VG_(sizeXA)(xa);
   2007          vg_assert(nRanges >= 0);
   2008 
   2009          tv = ML_(dinfo_zalloc)( "di.readdwarf3.pvD.1", sizeof(TempVar) );
   2010          tv->name   = name;
   2011          tv->level  = global ? 0 : parser->sp;
   2012          tv->typeR  = typeR;
   2013          tv->gexpr  = gexpr;
   2014          tv->fbGX   = fbGX;
   2015          tv->fName  = fileName;
   2016          tv->fLine  = lineNo;
   2017          tv->dioff  = posn;
   2018          tv->absOri = abs_ori;
   2019 
   2020          /* See explanation on definition of type TempVar for the
   2021             reason for this elaboration. */
   2022          tv->nRanges = nRanges;
   2023          tv->rngOneMin = 0;
   2024          tv->rngOneMax = 0;
   2025          tv->rngMany = NULL;
   2026          if (nRanges == 1) {
   2027             AddrRange* range = VG_(indexXA)(xa, 0);
   2028             tv->rngOneMin = range->aMin;
   2029             tv->rngOneMax = range->aMax;
   2030          }
   2031          else if (nRanges > 1) {
   2032             /* See if we already have a range list which is
   2033                structurally identical.  If so, use that; if not, clone
   2034                this one, and add it to our collection. */
   2035             UWord keyW, valW;
   2036             if (VG_(lookupFM)( rangestree, &keyW, &valW, (UWord)xa )) {
   2037                XArray* old = (XArray*)keyW;
   2038                tl_assert(valW == 0);
   2039                tl_assert(old != xa);
   2040                tv->rngMany = old;
   2041             } else {
   2042                XArray* cloned = VG_(cloneXA)( "di.readdwarf3.pvD.2", xa );
   2043                tv->rngMany = cloned;
   2044                VG_(addToFM)( rangestree, (UWord)cloned, 0 );
   2045             }
   2046          }
   2047 
   2048          VG_(addToXA)( tempvars, &tv );
   2049 
   2050          TRACE_D3("  Recording this variable, with %ld PC range(s)\n",
   2051                   VG_(sizeXA)(xa) );
   2052          /* collect stats on how effective the ->ranges special
   2053             casing is */
   2054          if (0) {
   2055             static Int ntot=0, ngt=0;
   2056             ntot++;
   2057             if (tv->rngMany) ngt++;
   2058             if (0 == (ntot % 100000))
   2059                VG_(printf)("XXXX %d tot, %d cloned\n", ntot, ngt);
   2060          }
   2061 
   2062       }
   2063 
   2064       /* Here are some other weird cases seen in the wild:
   2065 
   2066             We have a variable with a name and a type, but no
   2067             location.  I guess that's a sign that it has been
   2068             optimised away.  Ignore it.  Here's an example:
   2069 
   2070             static Int lc_compar(void* n1, void* n2) {
   2071                MC_Chunk* mc1 = *(MC_Chunk**)n1;
   2072                MC_Chunk* mc2 = *(MC_Chunk**)n2;
   2073                return (mc1->data < mc2->data ? -1 : 1);
   2074             }
   2075 
   2076             Both mc1 and mc2 are like this
   2077             <2><5bc>: Abbrev Number: 21 (DW_TAG_variable)
   2078                 DW_AT_name        : mc1
   2079                 DW_AT_decl_file   : 1
   2080                 DW_AT_decl_line   : 216
   2081                 DW_AT_type        : <5d3>
   2082 
   2083             whereas n1 and n2 do have locations specified.
   2084 
   2085             ---------------------------------------------
   2086 
   2087             We see a DW_TAG_formal_parameter with a type, but
   2088             no name and no location.  It's probably part of a function type
   2089             construction, thusly, hence ignore it:
   2090          <1><2b4>: Abbrev Number: 12 (DW_TAG_subroutine_type)
   2091              DW_AT_sibling     : <2c9>
   2092              DW_AT_prototyped  : 1
   2093              DW_AT_type        : <114>
   2094          <2><2be>: Abbrev Number: 13 (DW_TAG_formal_parameter)
   2095              DW_AT_type        : <13e>
   2096          <2><2c3>: Abbrev Number: 13 (DW_TAG_formal_parameter)
   2097              DW_AT_type        : <133>
   2098 
   2099             ---------------------------------------------
   2100 
   2101             Is very minimal, like this:
   2102             <4><81d>: Abbrev Number: 44 (DW_TAG_variable)
   2103                 DW_AT_abstract_origin: <7ba>
   2104             What that signifies I have no idea.  Ignore.
   2105 
   2106             ----------------------------------------------
   2107 
   2108             Is very minimal, like this:
   2109             <200f>: DW_TAG_formal_parameter
   2110                 DW_AT_abstract_ori: <1f4c>
   2111                 DW_AT_location    : 13440
   2112             What that signifies I have no idea.  Ignore.
   2113             It might be significant, though: the variable at least
   2114             has a location and so might exist somewhere.
   2115             Maybe we should handle this.
   2116 
   2117             ---------------------------------------------
   2118 
   2119             <22407>: DW_TAG_variable
   2120               DW_AT_name        : (indirect string, offset: 0x6579):
   2121                                   vgPlain_trampoline_stuff_start
   2122               DW_AT_decl_file   : 29
   2123               DW_AT_decl_line   : 56
   2124               DW_AT_external    : 1
   2125               DW_AT_declaration : 1
   2126 
   2127             Nameless and typeless variable that has a location?  Who
   2128             knows.  Not me.
   2129             <2><3d178>: Abbrev Number: 22 (DW_TAG_variable)
   2130                  DW_AT_location    : 9 byte block: 3 c0 c7 13 38 0 0 0 0
   2131                                      (DW_OP_addr: 3813c7c0)
   2132 
   2133             No, really.  Check it out.  gcc is quite simply borked.
   2134             <3><168cc>: Abbrev Number: 141 (DW_TAG_variable)
   2135             // followed by no attributes, and the next DIE is a sibling,
   2136             // not a child
   2137             */
   2138    }
   2139    return;
   2140 
   2141   bad_DIE:
   2142    set_position_of_Cursor( c_die,  saved_die_c_offset );
   2143    set_position_of_Cursor( c_abbv, saved_abbv_c_offset );
   2144    posn = uncook_die( cc, posn, &debug_types_flag, &alt_flag );
   2145    VG_(printf)(" <%d><%lx>: %s", level, posn, ML_(pp_DW_TAG)( dtag ) );
   2146    if (debug_types_flag) {
   2147       VG_(printf)(" (in .debug_types)");
   2148    }
   2149    else if (alt_flag) {
   2150       VG_(printf)(" (in alternate .debug_info)");
   2151    }
   2152    VG_(printf)("\n");
   2153    while (True) {
   2154       DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   2155       DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   2156       if (attr == 0 && form == 0) break;
   2157       VG_(printf)("     %18s: ", ML_(pp_DW_AT)(attr));
   2158       /* Get the form contents, so as to print them */
   2159       get_Form_contents( &cts, cc, c_die, True, form );
   2160       VG_(printf)("\t\n");
   2161    }
   2162    VG_(printf)("\n");
   2163    cc->barf("parse_var_DIE: confused by the above DIE");
   2164    /*NOTREACHED*/
   2165 }
   2166 
   2167 
   2168 /*------------------------------------------------------------*/
   2169 /*---                                                      ---*/
   2170 /*--- Parsing of type-related DIEs                         ---*/
   2171 /*---                                                      ---*/
   2172 /*------------------------------------------------------------*/
   2173 
   2174 #define N_D3_TYPE_STACK 16
   2175 
   2176 typedef
   2177    struct {
   2178       /* What source language?  'A'=Ada83/95,
   2179                                 'C'=C/C++,
   2180                                 'F'=Fortran,
   2181                                 '?'=other
   2182          Established once per compilation unit. */
   2183       UChar language;
   2184       /* A stack of types which are currently under construction */
   2185       Int   sp; /* [sp] is innermost active entry; sp==-1 for empty
   2186                    stack */
   2187       /* Note that the TyEnts in qparentE are temporary copies of the
   2188          ones accumulating in the main tyent array.  So it is not safe
   2189          to free up anything on them when popping them off the stack
   2190          (iow, it isn't safe to use TyEnt__make_EMPTY on them).  Just
   2191          memset them to zero when done. */
   2192       TyEnt qparentE[N_D3_TYPE_STACK]; /* parent TyEnts */
   2193       Int   qlevel[N_D3_TYPE_STACK];
   2194 
   2195    }
   2196    D3TypeParser;
   2197 
   2198 static void typestack_show ( D3TypeParser* parser, const HChar* str ) {
   2199    Word i;
   2200    VG_(printf)("  typestack (%s) {\n", str);
   2201    for (i = 0; i <= parser->sp; i++) {
   2202       VG_(printf)("    [%ld] (level %d): ", i, parser->qlevel[i]);
   2203       ML_(pp_TyEnt)( &parser->qparentE[i] );
   2204       VG_(printf)("\n");
   2205    }
   2206    VG_(printf)("  }\n");
   2207 }
   2208 
   2209 /* Remove from the stack, all entries with .level > 'level' */
   2210 static
   2211 void typestack_preen ( D3TypeParser* parser, Bool td3, Int level )
   2212 {
   2213    Bool changed = False;
   2214    vg_assert(parser->sp < N_D3_TYPE_STACK);
   2215    while (True) {
   2216       vg_assert(parser->sp >= -1);
   2217       if (parser->sp == -1) break;
   2218       if (parser->qlevel[parser->sp] <= level) break;
   2219       if (0)
   2220          TRACE_D3("BBBBAAAA typestack_pop [newsp=%d]\n", parser->sp-1);
   2221       vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
   2222       VG_(memset)(&parser->qparentE[parser->sp], 0, sizeof(TyEnt));
   2223       parser->qparentE[parser->sp].cuOff = D3_INVALID_CUOFF;
   2224       parser->qparentE[parser->sp].tag = Te_EMPTY;
   2225       parser->qlevel[parser->sp] = 0;
   2226       parser->sp--;
   2227       changed = True;
   2228    }
   2229    if (changed && td3)
   2230       typestack_show( parser, "after preen" );
   2231 }
   2232 
   2233 static Bool typestack_is_empty ( D3TypeParser* parser ) {
   2234    vg_assert(parser->sp >= -1 && parser->sp < N_D3_TYPE_STACK);
   2235    return parser->sp == -1;
   2236 }
   2237 
   2238 static void typestack_push ( CUConst* cc,
   2239                              D3TypeParser* parser,
   2240                              Bool td3,
   2241                              TyEnt* parentE, Int level ) {
   2242    if (0)
   2243    TRACE_D3("BBBBAAAA typestack_push[newsp=%d]: %d  %05lx\n",
   2244             parser->sp+1, level, parentE->cuOff);
   2245 
   2246    /* First we need to zap everything >= 'level', as we are about to
   2247       replace any previous entry at 'level', so .. */
   2248    typestack_preen(parser, /*td3*/False, level-1);
   2249 
   2250    vg_assert(parser->sp >= -1);
   2251    vg_assert(parser->sp < N_D3_TYPE_STACK);
   2252    if (parser->sp == N_D3_TYPE_STACK-1)
   2253       cc->barf("typestack_push: N_D3_TYPE_STACK is too low; "
   2254                "increase and recompile");
   2255    if (parser->sp >= 0)
   2256       vg_assert(parser->qlevel[parser->sp] < level);
   2257    parser->sp++;
   2258    vg_assert(parser->qparentE[parser->sp].tag == Te_EMPTY);
   2259    vg_assert(parser->qlevel[parser->sp]  == 0);
   2260    vg_assert(parentE);
   2261    vg_assert(ML_(TyEnt__is_type)(parentE));
   2262    vg_assert(parentE->cuOff != D3_INVALID_CUOFF);
   2263    parser->qparentE[parser->sp] = *parentE;
   2264    parser->qlevel[parser->sp]  = level;
   2265    if (td3)
   2266       typestack_show( parser, "after push" );
   2267 }
   2268 
   2269 /* True if the subrange type being parsed gives the bounds of an array. */
   2270 static Bool subrange_type_denotes_array_bounds ( D3TypeParser* parser,
   2271                                                  DW_TAG dtag ) {
   2272    vg_assert(dtag == DW_TAG_subrange_type);
   2273    /* For most languages, a subrange_type dtag always gives the
   2274       bounds of an array.
   2275       For Ada, there are additional conditions as a subrange_type
   2276       is also used for other purposes. */
   2277    if (parser->language != 'A')
   2278       /* not Ada, so it definitely denotes an array bound. */
   2279       return True;
   2280    else
   2281       /* Extra constraints for Ada: it only denotes an array bound if .. */
   2282       return (! typestack_is_empty(parser)
   2283               && parser->qparentE[parser->sp].tag == Te_TyArray);
   2284 }
   2285 
   2286 /* Parse a type-related DIE.  'parser' holds the current parser state.
   2287    'admin' is where the completed types are dumped.  'dtag' is the tag
   2288    for this DIE.  'c_die' points to the start of the data fields (FORM
   2289    stuff) for the DIE.  c_abbv points to the start of the (name,form)
   2290    pairs which describe the DIE.
   2291 
   2292    We may find the DIE uninteresting, in which case we should ignore
   2293    it.
   2294 
   2295    What happens: the DIE is examined.  If uninteresting, it is ignored.
   2296    Otherwise, the DIE gives rise to two things:
   2297 
   2298    (1) the offset of this DIE in the CU -- the cuOffset, a UWord
   2299    (2) a TyAdmin structure, which holds the type, or related stuff
   2300 
   2301    (2) is added at the end of 'tyadmins', at some index, say 'i'.
   2302 
   2303    A pair (cuOffset, i) is added to 'tydict'.
   2304 
   2305    Hence 'tyadmins' holds the actual type entities, and 'tydict' holds
   2306    a mapping from cuOffset to the index of the corresponding entry in
   2307    'tyadmin'.
   2308 
   2309    When resolving a cuOffset to a TyAdmin, first look up the cuOffset
   2310    in the tydict (by binary search).  This gives an index into
   2311    tyadmins, and the required entity lives in tyadmins at that index.
   2312 */
   2313 __attribute__((noinline))
   2314 static void parse_type_DIE ( /*MOD*/XArray* /* of TyEnt */ tyents,
   2315                              /*MOD*/D3TypeParser* parser,
   2316                              DW_TAG dtag,
   2317                              UWord posn,
   2318                              Int level,
   2319                              Cursor* c_die,
   2320                              Cursor* c_abbv,
   2321                              CUConst* cc,
   2322                              Bool td3 )
   2323 {
   2324    FormContents cts;
   2325    TyEnt typeE;
   2326    TyEnt atomE;
   2327    TyEnt fieldE;
   2328    TyEnt boundE;
   2329    Bool  debug_types_flag;
   2330    Bool  alt_flag;
   2331 
   2332    UWord saved_die_c_offset  = get_position_of_Cursor( c_die );
   2333    UWord saved_abbv_c_offset = get_position_of_Cursor( c_abbv );
   2334 
   2335    VG_(memset)( &typeE,  0xAA, sizeof(typeE) );
   2336    VG_(memset)( &atomE,  0xAA, sizeof(atomE) );
   2337    VG_(memset)( &fieldE, 0xAA, sizeof(fieldE) );
   2338    VG_(memset)( &boundE, 0xAA, sizeof(boundE) );
   2339 
   2340    /* If we've returned to a level at or above any previously noted
   2341       parent, un-note it, so we don't believe we're still collecting
   2342       its children. */
   2343    typestack_preen( parser, td3, level-1 );
   2344 
   2345    if (dtag == DW_TAG_compile_unit
   2346        || dtag == DW_TAG_type_unit
   2347        || dtag == DW_TAG_partial_unit) {
   2348       /* See if we can find DW_AT_language, since it is important for
   2349          establishing array bounds (see DW_TAG_subrange_type below in
   2350          this fn) */
   2351       while (True) {
   2352          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   2353          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   2354          if (attr == 0 && form == 0) break;
   2355          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
   2356          if (attr != DW_AT_language)
   2357             continue;
   2358          if (cts.szB <= 0)
   2359            goto_bad_DIE;
   2360          switch (cts.u.val) {
   2361             case DW_LANG_C89: case DW_LANG_C:
   2362             case DW_LANG_C_plus_plus: case DW_LANG_ObjC:
   2363             case DW_LANG_ObjC_plus_plus: case DW_LANG_UPC:
   2364             case DW_LANG_Upc: case DW_LANG_C99:
   2365                parser->language = 'C'; break;
   2366             case DW_LANG_Fortran77: case DW_LANG_Fortran90:
   2367             case DW_LANG_Fortran95:
   2368                parser->language = 'F'; break;
   2369             case DW_LANG_Ada83: case DW_LANG_Ada95:
   2370                parser->language = 'A'; break;
   2371             case DW_LANG_Cobol74:
   2372             case DW_LANG_Cobol85: case DW_LANG_Pascal83:
   2373             case DW_LANG_Modula2: case DW_LANG_Java:
   2374             case DW_LANG_PLI:
   2375             case DW_LANG_D: case DW_LANG_Python:
   2376             case DW_LANG_Mips_Assembler:
   2377                parser->language = '?'; break;
   2378             default:
   2379                goto_bad_DIE;
   2380          }
   2381       }
   2382    }
   2383 
   2384    if (dtag == DW_TAG_base_type) {
   2385       /* We can pick up a new base type any time. */
   2386       VG_(memset)(&typeE, 0, sizeof(typeE));
   2387       typeE.cuOff = D3_INVALID_CUOFF;
   2388       typeE.tag   = Te_TyBase;
   2389       while (True) {
   2390          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   2391          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   2392          if (attr == 0 && form == 0) break;
   2393          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
   2394          if (attr == DW_AT_name && cts.szB < 0) {
   2395             typeE.Te.TyBase.name
   2396                = ML_(cur_read_strdup)( cts.u.cur,
   2397                                        "di.readdwarf3.ptD.base_type.1" );
   2398          }
   2399          if (attr == DW_AT_byte_size && cts.szB > 0) {
   2400             typeE.Te.TyBase.szB = cts.u.val;
   2401          }
   2402          if (attr == DW_AT_encoding && cts.szB > 0) {
   2403             switch (cts.u.val) {
   2404                case DW_ATE_unsigned: case DW_ATE_unsigned_char:
   2405                case DW_ATE_UTF: /* since DWARF4, e.g. char16_t from C++ */
   2406                case DW_ATE_boolean:/* FIXME - is this correct? */
   2407                case DW_ATE_unsigned_fixed:
   2408                   typeE.Te.TyBase.enc = 'U'; break;
   2409                case DW_ATE_signed: case DW_ATE_signed_char:
   2410                case DW_ATE_signed_fixed:
   2411                   typeE.Te.TyBase.enc = 'S'; break;
   2412                case DW_ATE_float:
   2413                   typeE.Te.TyBase.enc = 'F'; break;
   2414                case DW_ATE_complex_float:
   2415                   typeE.Te.TyBase.enc = 'C'; break;
   2416                default:
   2417                   goto_bad_DIE;
   2418             }
   2419          }
   2420       }
   2421 
   2422       /* Invent a name if it doesn't have one.  gcc-4.3
   2423          -ftree-vectorize is observed to emit nameless base types. */
   2424       if (!typeE.Te.TyBase.name)
   2425          typeE.Te.TyBase.name
   2426             = ML_(dinfo_strdup)( "di.readdwarf3.ptD.base_type.2",
   2427                                  "<anon_base_type>" );
   2428 
   2429       /* Do we have something that looks sane? */
   2430       if (/* must have a name */
   2431           typeE.Te.TyBase.name == NULL
   2432           /* and a plausible size.  Yes, really 32: "complex long
   2433              double" apparently has size=32 */
   2434           || typeE.Te.TyBase.szB < 0 || typeE.Te.TyBase.szB > 32
   2435           /* and a plausible encoding */
   2436           || (typeE.Te.TyBase.enc != 'U'
   2437               && typeE.Te.TyBase.enc != 'S'
   2438               && typeE.Te.TyBase.enc != 'F'
   2439               && typeE.Te.TyBase.enc != 'C'))
   2440          goto_bad_DIE;
   2441       /* Last minute hack: if we see this
   2442          <1><515>: DW_TAG_base_type
   2443              DW_AT_byte_size   : 0
   2444              DW_AT_encoding    : 5
   2445              DW_AT_name        : void
   2446          convert it into a real Void type. */
   2447       if (typeE.Te.TyBase.szB == 0
   2448           && 0 == VG_(strcmp)("void", typeE.Te.TyBase.name)) {
   2449          ML_(TyEnt__make_EMPTY)(&typeE);
   2450          typeE.tag = Te_TyVoid;
   2451          typeE.Te.TyVoid.isFake = False; /* it's a real one! */
   2452       }
   2453 
   2454       goto acquire_Type;
   2455    }
   2456 
   2457    /*
   2458     * An example of DW_TAG_rvalue_reference_type:
   2459     *
   2460     * $ readelf --debug-dump /usr/lib/debug/usr/lib/libstdc++.so.6.0.16.debug
   2461     *  <1><1014>: Abbrev Number: 55 (DW_TAG_rvalue_reference_type)
   2462     *     <1015>   DW_AT_byte_size   : 4
   2463     *     <1016>   DW_AT_type        : <0xe52>
   2464     */
   2465    if (dtag == DW_TAG_pointer_type || dtag == DW_TAG_reference_type
   2466        || dtag == DW_TAG_ptr_to_member_type
   2467        || dtag == DW_TAG_rvalue_reference_type) {
   2468       /* This seems legit for _pointer_type and _reference_type.  I
   2469          don't know if rolling _ptr_to_member_type in here really is
   2470          legit, but it's better than not handling it at all. */
   2471       VG_(memset)(&typeE, 0, sizeof(typeE));
   2472       typeE.cuOff = D3_INVALID_CUOFF;
   2473       switch (dtag) {
   2474       case DW_TAG_pointer_type:
   2475          typeE.tag = Te_TyPtr;
   2476          break;
   2477       case DW_TAG_reference_type:
   2478          typeE.tag = Te_TyRef;
   2479          break;
   2480       case DW_TAG_ptr_to_member_type:
   2481          typeE.tag = Te_TyPtrMbr;
   2482          break;
   2483       case DW_TAG_rvalue_reference_type:
   2484          typeE.tag = Te_TyRvalRef;
   2485          break;
   2486       default:
   2487          vg_assert(False);
   2488       }
   2489       /* target type defaults to void */
   2490       typeE.Te.TyPorR.typeR = D3_FAKEVOID_CUOFF;
   2491       /* These four type kinds don't *have* to specify their size, in
   2492          which case we assume it's a machine word.  But if they do
   2493          specify it, it must be a machine word :-)  This probably
   2494          assumes that the word size of the Dwarf3 we're reading is the
   2495          same size as that on the machine.  gcc appears to give a size
   2496          whereas icc9 doesn't. */
   2497       typeE.Te.TyPorR.szB = sizeof(UWord);
   2498       while (True) {
   2499          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   2500          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   2501          if (attr == 0 && form == 0) break;
   2502          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
   2503          if (attr == DW_AT_byte_size && cts.szB > 0) {
   2504             typeE.Te.TyPorR.szB = cts.u.val;
   2505          }
   2506          if (attr == DW_AT_type && cts.szB > 0) {
   2507             typeE.Te.TyPorR.typeR
   2508                = cook_die_using_form( cc, (UWord)cts.u.val, form );
   2509          }
   2510       }
   2511       /* Do we have something that looks sane? */
   2512       if (typeE.Te.TyPorR.szB != sizeof(UWord))
   2513          goto_bad_DIE;
   2514       else
   2515          goto acquire_Type;
   2516    }
   2517 
   2518    if (dtag == DW_TAG_enumeration_type) {
   2519       /* Create a new Type to hold the results. */
   2520       VG_(memset)(&typeE, 0, sizeof(typeE));
   2521       typeE.cuOff = posn;
   2522       typeE.tag   = Te_TyEnum;
   2523       Bool is_decl = False;
   2524       typeE.Te.TyEnum.atomRs
   2525          = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.enum_type.1",
   2526                        ML_(dinfo_free),
   2527                        sizeof(UWord) );
   2528       while (True) {
   2529          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   2530          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   2531          if (attr == 0 && form == 0) break;
   2532          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
   2533          if (attr == DW_AT_name && cts.szB < 0) {
   2534             typeE.Te.TyEnum.name
   2535                = ML_(cur_read_strdup)( cts.u.cur,
   2536                                        "di.readdwarf3.pTD.enum_type.2" );
   2537          }
   2538          if (attr == DW_AT_byte_size && cts.szB > 0) {
   2539             typeE.Te.TyEnum.szB = cts.u.val;
   2540          }
   2541          if (attr == DW_AT_declaration) {
   2542             is_decl = True;
   2543          }
   2544       }
   2545 
   2546       if (!typeE.Te.TyEnum.name)
   2547          typeE.Te.TyEnum.name
   2548             = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enum_type.3",
   2549                                  "<anon_enum_type>" );
   2550 
   2551       /* Do we have something that looks sane? */
   2552       if (typeE.Te.TyEnum.szB == 0
   2553           /* we must know the size */
   2554           /* but not for Ada, which uses such dummy
   2555              enumerations as helper for gdb ada mode.
   2556              Also GCC allows incomplete enums as GNU extension.
   2557              http://gcc.gnu.org/onlinedocs/gcc/Incomplete-Enums.html
   2558              These are marked as DW_AT_declaration and won't have
   2559              a size. They can only be used in declaration or as
   2560              pointer types.  You can't allocate variables or storage
   2561              using such an enum type. (Also GCC seems to have a bug
   2562              that will put such an enumeration_type into a .debug_types
   2563              unit which should only contain complete types.) */
   2564           && (parser->language != 'A' && !is_decl)) {
   2565          goto_bad_DIE;
   2566       }
   2567 
   2568       /* On't stack! */
   2569       typestack_push( cc, parser, td3, &typeE, level );
   2570       goto acquire_Type;
   2571    }
   2572 
   2573    /* gcc (GCC) 4.4.0 20081017 (experimental) occasionally produces
   2574       DW_TAG_enumerator with only a DW_AT_name but no
   2575       DW_AT_const_value.  This is in violation of the Dwarf3 standard,
   2576       and appears to be a new "feature" of gcc - versions 4.3.x and
   2577       earlier do not appear to do this.  So accept DW_TAG_enumerator
   2578       which only have a name but no value.  An example:
   2579 
   2580       <1><180>: Abbrev Number: 6 (DW_TAG_enumeration_type)
   2581          <181>   DW_AT_name        : (indirect string, offset: 0xda70):
   2582                                      QtMsgType
   2583          <185>   DW_AT_byte_size   : 4
   2584          <186>   DW_AT_decl_file   : 14
   2585          <187>   DW_AT_decl_line   : 1480
   2586          <189>   DW_AT_sibling     : <0x1a7>
   2587       <2><18d>: Abbrev Number: 7 (DW_TAG_enumerator)
   2588          <18e>   DW_AT_name        : (indirect string, offset: 0x9e18):
   2589                                      QtDebugMsg
   2590       <2><192>: Abbrev Number: 7 (DW_TAG_enumerator)
   2591          <193>   DW_AT_name        : (indirect string, offset: 0x1505f):
   2592                                      QtWarningMsg
   2593       <2><197>: Abbrev Number: 7 (DW_TAG_enumerator)
   2594          <198>   DW_AT_name        : (indirect string, offset: 0x16f4a):
   2595                                      QtCriticalMsg
   2596       <2><19c>: Abbrev Number: 7 (DW_TAG_enumerator)
   2597          <19d>   DW_AT_name        : (indirect string, offset: 0x156dd):
   2598                                      QtFatalMsg
   2599       <2><1a1>: Abbrev Number: 7 (DW_TAG_enumerator)
   2600          <1a2>   DW_AT_name        : (indirect string, offset: 0x13660):
   2601                                      QtSystemMsg
   2602    */
   2603    if (dtag == DW_TAG_enumerator) {
   2604       VG_(memset)( &atomE, 0, sizeof(atomE) );
   2605       atomE.cuOff = posn;
   2606       atomE.tag   = Te_Atom;
   2607       while (True) {
   2608          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   2609          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   2610          if (attr == 0 && form == 0) break;
   2611          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
   2612          if (attr == DW_AT_name && cts.szB < 0) {
   2613             atomE.Te.Atom.name
   2614               = ML_(cur_read_strdup)( cts.u.cur,
   2615                                       "di.readdwarf3.pTD.enumerator.1" );
   2616          }
   2617          if (attr == DW_AT_const_value && cts.szB > 0) {
   2618             atomE.Te.Atom.value      = cts.u.val;
   2619             atomE.Te.Atom.valueKnown = True;
   2620          }
   2621       }
   2622       /* Do we have something that looks sane? */
   2623       if (atomE.Te.Atom.name == NULL)
   2624          goto_bad_DIE;
   2625       /* Do we have a plausible parent? */
   2626       if (typestack_is_empty(parser)) goto_bad_DIE;
   2627       vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
   2628       vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
   2629       if (level != parser->qlevel[parser->sp]+1) goto_bad_DIE;
   2630       if (parser->qparentE[parser->sp].tag != Te_TyEnum) goto_bad_DIE;
   2631       /* Record this child in the parent */
   2632       vg_assert(parser->qparentE[parser->sp].Te.TyEnum.atomRs);
   2633       VG_(addToXA)( parser->qparentE[parser->sp].Te.TyEnum.atomRs,
   2634                     &atomE );
   2635       /* And record the child itself */
   2636       goto acquire_Atom;
   2637    }
   2638 
   2639    /* Treat DW_TAG_class_type as if it was a DW_TAG_structure_type.  I
   2640       don't know if this is correct, but it at least makes this reader
   2641       usable for gcc-4.3 produced Dwarf3. */
   2642    if (dtag == DW_TAG_structure_type || dtag == DW_TAG_class_type
   2643        || dtag == DW_TAG_union_type) {
   2644       Bool have_szB = False;
   2645       Bool is_decl  = False;
   2646       Bool is_spec  = False;
   2647       /* Create a new Type to hold the results. */
   2648       VG_(memset)(&typeE, 0, sizeof(typeE));
   2649       typeE.cuOff = posn;
   2650       typeE.tag   = Te_TyStOrUn;
   2651       typeE.Te.TyStOrUn.name = NULL;
   2652       typeE.Te.TyStOrUn.typeR = D3_INVALID_CUOFF;
   2653       typeE.Te.TyStOrUn.fieldRs
   2654          = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.pTD.struct_type.1",
   2655                        ML_(dinfo_free),
   2656                        sizeof(UWord) );
   2657       typeE.Te.TyStOrUn.complete = True;
   2658       typeE.Te.TyStOrUn.isStruct = dtag == DW_TAG_structure_type
   2659                                    || dtag == DW_TAG_class_type;
   2660       while (True) {
   2661          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   2662          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   2663          if (attr == 0 && form == 0) break;
   2664          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
   2665          if (attr == DW_AT_name && cts.szB < 0) {
   2666             typeE.Te.TyStOrUn.name
   2667                = ML_(cur_read_strdup)( cts.u.cur,
   2668                                        "di.readdwarf3.ptD.struct_type.2" );
   2669          }
   2670          if (attr == DW_AT_byte_size && cts.szB >= 0) {
   2671             typeE.Te.TyStOrUn.szB = cts.u.val;
   2672             have_szB = True;
   2673          }
   2674          if (attr == DW_AT_declaration && cts.szB > 0 && cts.u.val > 0) {
   2675             is_decl = True;
   2676          }
   2677          if (attr == DW_AT_specification && cts.szB > 0 && cts.u.val > 0) {
   2678             is_spec = True;
   2679          }
   2680          if (attr == DW_AT_signature && form == DW_FORM_ref_sig8
   2681              && cts.szB > 0) {
   2682             have_szB = True;
   2683             typeE.Te.TyStOrUn.szB = 8;
   2684             typeE.Te.TyStOrUn.typeR
   2685                = cook_die_using_form( cc, (UWord)cts.u.val, form );
   2686          }
   2687       }
   2688       /* Do we have something that looks sane? */
   2689       if (is_decl && (!is_spec)) {
   2690          /* It's a DW_AT_declaration.  We require the name but
   2691             nothing else. */
   2692          /* JRS 2012-06-28: following discussion w/ tromey, if the the
   2693             type doesn't have name, just make one up, and accept it.
   2694             It might be referred to by other DIEs, so ignoring it
   2695             doesn't seem like a safe option. */
   2696          if (typeE.Te.TyStOrUn.name == NULL)
   2697             typeE.Te.TyStOrUn.name
   2698                = ML_(dinfo_strdup)( "di.readdwarf3.ptD.struct_type.3",
   2699                                     "<anon_struct_type>" );
   2700          typeE.Te.TyStOrUn.complete = False;
   2701          /* JRS 2009 Aug 10: <possible kludge>? */
   2702          /* Push this tyent on the stack, even though it's incomplete.
   2703             It appears that gcc-4.4 on Fedora 11 will sometimes create
   2704             DW_TAG_member entries for it, and so we need to have a
   2705             plausible parent present in order for that to work.  See
   2706             #200029 comments 8 and 9. */
   2707          typestack_push( cc, parser, td3, &typeE, level );
   2708          /* </possible kludge> */
   2709          goto acquire_Type;
   2710       }
   2711       if ((!is_decl) /* && (!is_spec) */) {
   2712          /* this is the common, ordinary case */
   2713          /* The name can be present, or not */
   2714          if (!have_szB) {
   2715             /* We must know the size.
   2716                But in Ada, record with discriminants might have no size.
   2717                But in C, VLA in the middle of a struct (gcc extension)
   2718                might have no size.
   2719                Instead, some GNAT dwarf extensions and/or dwarf entries
   2720                allow to calculate the struct size at runtime.
   2721                We cannot do that (yet?) so, the temporary kludge is to use
   2722                a small size. */
   2723             typeE.Te.TyStOrUn.szB = 1;
   2724          }
   2725          /* On't stack! */
   2726          typestack_push( cc, parser, td3, &typeE, level );
   2727          goto acquire_Type;
   2728       }
   2729       else {
   2730          /* don't know how to handle any other variants just now */
   2731          goto_bad_DIE;
   2732       }
   2733    }
   2734 
   2735    if (dtag == DW_TAG_member) {
   2736       /* Acquire member entries for both DW_TAG_structure_type and
   2737          DW_TAG_union_type.  They differ minorly, in that struct
   2738          members must have a DW_AT_data_member_location expression
   2739          whereas union members must not. */
   2740       Bool parent_is_struct;
   2741       VG_(memset)( &fieldE, 0, sizeof(fieldE) );
   2742       fieldE.cuOff = posn;
   2743       fieldE.tag   = Te_Field;
   2744       fieldE.Te.Field.typeR = D3_INVALID_CUOFF;
   2745       while (True) {
   2746          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   2747          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   2748          if (attr == 0 && form == 0) break;
   2749          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
   2750          if (attr == DW_AT_name && cts.szB < 0) {
   2751             fieldE.Te.Field.name
   2752                = ML_(cur_read_strdup)( cts.u.cur,
   2753                                        "di.readdwarf3.ptD.member.1" );
   2754          }
   2755          if (attr == DW_AT_type && cts.szB > 0) {
   2756             fieldE.Te.Field.typeR
   2757                = cook_die_using_form( cc, (UWord)cts.u.val, form );
   2758          }
   2759          /* There are 2 different cases for DW_AT_data_member_location.
   2760             If it is a constant class attribute, it contains byte offset
   2761             from the beginning of the containing entity.
   2762             Otherwise it is a location expression.  */
   2763          if (attr == DW_AT_data_member_location && cts.szB > 0) {
   2764             fieldE.Te.Field.nLoc = -1;
   2765             fieldE.Te.Field.pos.offset = cts.u.val;
   2766          }
   2767          if (attr == DW_AT_data_member_location && cts.szB <= 0) {
   2768             fieldE.Te.Field.nLoc = (UWord)(-cts.szB);
   2769             fieldE.Te.Field.pos.loc
   2770                = ML_(cur_read_memdup)( cts.u.cur,
   2771                                        (SizeT)fieldE.Te.Field.nLoc,
   2772                                        "di.readdwarf3.ptD.member.2" );
   2773          }
   2774       }
   2775       /* Do we have a plausible parent? */
   2776       if (typestack_is_empty(parser)) goto_bad_DIE;
   2777       vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
   2778       vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
   2779       if (level != parser->qlevel[parser->sp]+1) goto_bad_DIE;
   2780       if (parser->qparentE[parser->sp].tag != Te_TyStOrUn) goto_bad_DIE;
   2781       /* Do we have something that looks sane?  If this a member of a
   2782          struct, we must have a location expression; but if a member
   2783          of a union that is irrelevant (D3 spec sec 5.6.6).  We ought
   2784          to reject in the latter case, but some compilers have been
   2785          observed to emit constant-zero expressions.  So just ignore
   2786          them. */
   2787       parent_is_struct
   2788          = parser->qparentE[parser->sp].Te.TyStOrUn.isStruct;
   2789       if (!fieldE.Te.Field.name)
   2790          fieldE.Te.Field.name
   2791             = ML_(dinfo_strdup)( "di.readdwarf3.ptD.member.3",
   2792                                  "<anon_field>" );
   2793       vg_assert(fieldE.Te.Field.name);
   2794       if (fieldE.Te.Field.typeR == D3_INVALID_CUOFF)
   2795          goto_bad_DIE;
   2796       if (fieldE.Te.Field.nLoc) {
   2797          if (!parent_is_struct) {
   2798             /* If this is a union type, pretend we haven't seen the data
   2799                member location expression, as it is by definition
   2800                redundant (it must be zero). */
   2801             if (fieldE.Te.Field.nLoc > 0)
   2802                ML_(dinfo_free)(fieldE.Te.Field.pos.loc);
   2803             fieldE.Te.Field.pos.loc = NULL;
   2804             fieldE.Te.Field.nLoc = 0;
   2805          }
   2806          /* Record this child in the parent */
   2807          fieldE.Te.Field.isStruct = parent_is_struct;
   2808          vg_assert(parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs);
   2809          VG_(addToXA)( parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs,
   2810                        &posn );
   2811          /* And record the child itself */
   2812          goto acquire_Field;
   2813       } else {
   2814          /* Member with no location - this can happen with static
   2815             const members in C++ code which are compile time constants
   2816             that do no exist in the class. They're not of any interest
   2817             to us so we ignore them. */
   2818          ML_(TyEnt__make_EMPTY)(&fieldE);
   2819       }
   2820    }
   2821 
   2822    if (dtag == DW_TAG_array_type) {
   2823       VG_(memset)(&typeE, 0, sizeof(typeE));
   2824       typeE.cuOff = posn;
   2825       typeE.tag   = Te_TyArray;
   2826       typeE.Te.TyArray.typeR = D3_INVALID_CUOFF;
   2827       typeE.Te.TyArray.boundRs
   2828          = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.array_type.1",
   2829                        ML_(dinfo_free),
   2830                        sizeof(UWord) );
   2831       while (True) {
   2832          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   2833          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   2834          if (attr == 0 && form == 0) break;
   2835          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
   2836          if (attr == DW_AT_type && cts.szB > 0) {
   2837             typeE.Te.TyArray.typeR
   2838                = cook_die_using_form( cc, (UWord)cts.u.val, form );
   2839          }
   2840       }
   2841       if (typeE.Te.TyArray.typeR == D3_INVALID_CUOFF)
   2842          goto_bad_DIE;
   2843       /* On't stack! */
   2844       typestack_push( cc, parser, td3, &typeE, level );
   2845       goto acquire_Type;
   2846    }
   2847 
   2848    /* this is a subrange type defining the bounds of an array. */
   2849    if (dtag == DW_TAG_subrange_type
   2850        && subrange_type_denotes_array_bounds(parser, dtag)) {
   2851       Bool have_lower = False;
   2852       Bool have_upper = False;
   2853       Bool have_count = False;
   2854       Long lower = 0;
   2855       Long upper = 0;
   2856 
   2857       switch (parser->language) {
   2858          case 'C': have_lower = True;  lower = 0; break;
   2859          case 'F': have_lower = True;  lower = 1; break;
   2860          case '?': have_lower = False; break;
   2861          case 'A': have_lower = False; break;
   2862          default:  vg_assert(0); /* assured us by handling of
   2863                                     DW_TAG_compile_unit in this fn */
   2864       }
   2865 
   2866       VG_(memset)( &boundE, 0, sizeof(boundE) );
   2867       boundE.cuOff = D3_INVALID_CUOFF;
   2868       boundE.tag   = Te_Bound;
   2869       while (True) {
   2870          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   2871          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   2872          if (attr == 0 && form == 0) break;
   2873          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
   2874          if (attr == DW_AT_lower_bound && cts.szB > 0) {
   2875             lower      = (Long)cts.u.val;
   2876             have_lower = True;
   2877          }
   2878          if (attr == DW_AT_upper_bound && cts.szB > 0) {
   2879             upper      = (Long)cts.u.val;
   2880             have_upper = True;
   2881          }
   2882          if (attr == DW_AT_count && cts.szB > 0) {
   2883             /*count    = (Long)cts.u.val;*/
   2884             have_count = True;
   2885          }
   2886       }
   2887       /* FIXME: potentially skip the rest if no parent present, since
   2888          it could be the case that this subrange type is free-standing
   2889          (not being used to describe the bounds of a containing array
   2890          type) */
   2891       /* Do we have a plausible parent? */
   2892       if (typestack_is_empty(parser)) goto_bad_DIE;
   2893       vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
   2894       vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
   2895       if (level != parser->qlevel[parser->sp]+1) goto_bad_DIE;
   2896       if (parser->qparentE[parser->sp].tag != Te_TyArray) goto_bad_DIE;
   2897 
   2898       /* Figure out if we have a definite range or not */
   2899       if (have_lower && have_upper && (!have_count)) {
   2900          boundE.Te.Bound.knownL = True;
   2901          boundE.Te.Bound.knownU = True;
   2902          boundE.Te.Bound.boundL = lower;
   2903          boundE.Te.Bound.boundU = upper;
   2904       }
   2905       else if (have_lower && (!have_upper) && (!have_count)) {
   2906          boundE.Te.Bound.knownL = True;
   2907          boundE.Te.Bound.knownU = False;
   2908          boundE.Te.Bound.boundL = lower;
   2909          boundE.Te.Bound.boundU = 0;
   2910       }
   2911       else if ((!have_lower) && have_upper && (!have_count)) {
   2912          boundE.Te.Bound.knownL = False;
   2913          boundE.Te.Bound.knownU = True;
   2914          boundE.Te.Bound.boundL = 0;
   2915          boundE.Te.Bound.boundU = upper;
   2916       }
   2917       else if ((!have_lower) && (!have_upper) && (!have_count)) {
   2918          boundE.Te.Bound.knownL = False;
   2919          boundE.Te.Bound.knownU = False;
   2920          boundE.Te.Bound.boundL = 0;
   2921          boundE.Te.Bound.boundU = 0;
   2922       } else {
   2923          /* FIXME: handle more cases */
   2924          goto_bad_DIE;
   2925       }
   2926 
   2927       /* Record this bound in the parent */
   2928       boundE.cuOff = posn;
   2929       vg_assert(parser->qparentE[parser->sp].Te.TyArray.boundRs);
   2930       VG_(addToXA)( parser->qparentE[parser->sp].Te.TyArray.boundRs,
   2931                     &boundE.cuOff );
   2932       /* And record the child itself */
   2933       goto acquire_Bound;
   2934    }
   2935 
   2936    /* typedef or subrange_type other than array bounds. */
   2937    if (dtag == DW_TAG_typedef
   2938        || (dtag == DW_TAG_subrange_type
   2939            && !subrange_type_denotes_array_bounds(parser, dtag))) {
   2940       /* subrange_type other than array bound is only for Ada. */
   2941       vg_assert (dtag == DW_TAG_typedef || parser->language == 'A');
   2942       /* We can pick up a new typedef/subrange_type any time. */
   2943       VG_(memset)(&typeE, 0, sizeof(typeE));
   2944       typeE.cuOff = D3_INVALID_CUOFF;
   2945       typeE.tag   = Te_TyTyDef;
   2946       typeE.Te.TyTyDef.name = NULL;
   2947       typeE.Te.TyTyDef.typeR = D3_INVALID_CUOFF;
   2948       while (True) {
   2949          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   2950          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   2951          if (attr == 0 && form == 0) break;
   2952          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
   2953          if (attr == DW_AT_name && cts.szB < 0) {
   2954             typeE.Te.TyTyDef.name
   2955                = ML_(cur_read_strdup)( cts.u.cur,
   2956                                        "di.readdwarf3.ptD.typedef.1" );
   2957          }
   2958          if (attr == DW_AT_type && cts.szB > 0) {
   2959             typeE.Te.TyTyDef.typeR
   2960                = cook_die_using_form( cc, (UWord)cts.u.val, form );
   2961          }
   2962       }
   2963       /* Do we have something that looks sane?
   2964          gcc gnat Ada generates minimal typedef
   2965          such as the below
   2966          <6><91cc>: DW_TAG_typedef
   2967             DW_AT_abstract_ori: <9066>
   2968          g++ for OMP can generate artificial functions that have
   2969          parameters that refer to pointers to unnamed typedefs.
   2970          See https://bugs.kde.org/show_bug.cgi?id=273475
   2971          So we cannot require a name for a DW_TAG_typedef.
   2972       */
   2973       goto acquire_Type;
   2974    }
   2975 
   2976    if (dtag == DW_TAG_subroutine_type) {
   2977       /* function type? just record that one fact and ask no
   2978          further questions. */
   2979       VG_(memset)(&typeE, 0, sizeof(typeE));
   2980       typeE.cuOff = D3_INVALID_CUOFF;
   2981       typeE.tag   = Te_TyFn;
   2982       goto acquire_Type;
   2983    }
   2984 
   2985    if (dtag == DW_TAG_volatile_type || dtag == DW_TAG_const_type) {
   2986       Int have_ty = 0;
   2987       VG_(memset)(&typeE, 0, sizeof(typeE));
   2988       typeE.cuOff = D3_INVALID_CUOFF;
   2989       typeE.tag   = Te_TyQual;
   2990       typeE.Te.TyQual.qual
   2991          = dtag == DW_TAG_volatile_type ? 'V' : 'C';
   2992       /* target type defaults to 'void' */
   2993       typeE.Te.TyQual.typeR = D3_FAKEVOID_CUOFF;
   2994       while (True) {
   2995          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   2996          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   2997          if (attr == 0 && form == 0) break;
   2998          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
   2999          if (attr == DW_AT_type && cts.szB > 0) {
   3000             typeE.Te.TyQual.typeR
   3001                = cook_die_using_form( cc, (UWord)cts.u.val, form );
   3002             have_ty++;
   3003          }
   3004       }
   3005       /* gcc sometimes generates DW_TAG_const/volatile_type without
   3006          DW_AT_type and GDB appears to interpret the type as 'const
   3007          void' (resp. 'volatile void').  So just allow it .. */
   3008       if (have_ty == 1 || have_ty == 0)
   3009          goto acquire_Type;
   3010       else
   3011          goto_bad_DIE;
   3012    }
   3013 
   3014    /*
   3015     * Treat DW_TAG_unspecified_type as type void. An example of DW_TAG_unspecified_type:
   3016     *
   3017     * $ readelf --debug-dump /usr/lib/debug/usr/lib/libstdc++.so.6.0.16.debug
   3018     *  <1><10d4>: Abbrev Number: 53 (DW_TAG_unspecified_type)
   3019     *     <10d5>   DW_AT_name        : (indirect string, offset: 0xdb7): decltype(nullptr)
   3020     */
   3021    if (dtag == DW_TAG_unspecified_type) {
   3022       VG_(memset)(&typeE, 0, sizeof(typeE));
   3023       typeE.cuOff           = D3_INVALID_CUOFF;
   3024       typeE.tag             = Te_TyQual;
   3025       typeE.Te.TyQual.typeR = D3_FAKEVOID_CUOFF;
   3026       goto acquire_Type;
   3027    }
   3028 
   3029    /* else ignore this DIE */
   3030    return;
   3031    /*NOTREACHED*/
   3032 
   3033   acquire_Type:
   3034    if (0) VG_(printf)("YYYY Acquire Type\n");
   3035    vg_assert(ML_(TyEnt__is_type)( &typeE ));
   3036    vg_assert(typeE.cuOff == D3_INVALID_CUOFF || typeE.cuOff == posn);
   3037    typeE.cuOff = posn;
   3038    VG_(addToXA)( tyents, &typeE );
   3039    return;
   3040    /*NOTREACHED*/
   3041 
   3042   acquire_Atom:
   3043    if (0) VG_(printf)("YYYY Acquire Atom\n");
   3044    vg_assert(atomE.tag == Te_Atom);
   3045    vg_assert(atomE.cuOff == D3_INVALID_CUOFF || atomE.cuOff == posn);
   3046    atomE.cuOff = posn;
   3047    VG_(addToXA)( tyents, &atomE );
   3048    return;
   3049    /*NOTREACHED*/
   3050 
   3051   acquire_Field:
   3052    /* For union members, Expr should be absent */
   3053    if (0) VG_(printf)("YYYY Acquire Field\n");
   3054    vg_assert(fieldE.tag == Te_Field);
   3055    vg_assert(fieldE.Te.Field.nLoc <= 0 || fieldE.Te.Field.pos.loc != NULL);
   3056    vg_assert(fieldE.Te.Field.nLoc != 0 || fieldE.Te.Field.pos.loc == NULL);
   3057    if (fieldE.Te.Field.isStruct) {
   3058       vg_assert(fieldE.Te.Field.nLoc != 0);
   3059    } else {
   3060       vg_assert(fieldE.Te.Field.nLoc == 0);
   3061    }
   3062    vg_assert(fieldE.cuOff == D3_INVALID_CUOFF || fieldE.cuOff == posn);
   3063    fieldE.cuOff = posn;
   3064    VG_(addToXA)( tyents, &fieldE );
   3065    return;
   3066    /*NOTREACHED*/
   3067 
   3068   acquire_Bound:
   3069    if (0) VG_(printf)("YYYY Acquire Bound\n");
   3070    vg_assert(boundE.tag == Te_Bound);
   3071    vg_assert(boundE.cuOff == D3_INVALID_CUOFF || boundE.cuOff == posn);
   3072    boundE.cuOff = posn;
   3073    VG_(addToXA)( tyents, &boundE );
   3074    return;
   3075    /*NOTREACHED*/
   3076 
   3077   bad_DIE:
   3078    set_position_of_Cursor( c_die,  saved_die_c_offset );
   3079    set_position_of_Cursor( c_abbv, saved_abbv_c_offset );
   3080    posn = uncook_die( cc, posn, &debug_types_flag, &alt_flag );
   3081    VG_(printf)(" <%d><%lx>: %s", level, posn, ML_(pp_DW_TAG)( dtag ) );
   3082    if (debug_types_flag) {
   3083       VG_(printf)(" (in .debug_types)");
   3084    } else if (alt_flag) {
   3085       VG_(printf)(" (in alternate .debug_info)");
   3086    }
   3087    VG_(printf)("\n");
   3088    while (True) {
   3089       DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   3090       DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   3091       if (attr == 0 && form == 0) break;
   3092       VG_(printf)("     %18s: ", ML_(pp_DW_AT)(attr));
   3093       /* Get the form contents, so as to print them */
   3094       get_Form_contents( &cts, cc, c_die, True, form );
   3095       VG_(printf)("\t\n");
   3096    }
   3097    VG_(printf)("\n");
   3098    cc->barf("parse_type_DIE: confused by the above DIE");
   3099    /*NOTREACHED*/
   3100 }
   3101 
   3102 
   3103 /*------------------------------------------------------------*/
   3104 /*---                                                      ---*/
   3105 /*--- Compression of type DIE information                  ---*/
   3106 /*---                                                      ---*/
   3107 /*------------------------------------------------------------*/
   3108 
   3109 static UWord chase_cuOff ( Bool* changed,
   3110                            XArray* /* of TyEnt */ ents,
   3111                            TyEntIndexCache* ents_cache,
   3112                            UWord cuOff )
   3113 {
   3114    TyEnt* ent;
   3115    ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache, cuOff );
   3116 
   3117    if (!ent) {
   3118       VG_(printf)("chase_cuOff: no entry for 0x%05lx\n", cuOff);
   3119       *changed = False;
   3120       return cuOff;
   3121    }
   3122 
   3123    vg_assert(ent->tag != Te_EMPTY);
   3124    if (ent->tag != Te_INDIR) {
   3125       *changed = False;
   3126       return cuOff;
   3127    } else {
   3128       vg_assert(ent->Te.INDIR.indR < cuOff);
   3129       *changed = True;
   3130       return ent->Te.INDIR.indR;
   3131    }
   3132 }
   3133 
   3134 static
   3135 void chase_cuOffs_in_XArray ( Bool* changed,
   3136                               XArray* /* of TyEnt */ ents,
   3137                               TyEntIndexCache* ents_cache,
   3138                               /*MOD*/XArray* /* of UWord */ cuOffs )
   3139 {
   3140    Bool b2 = False;
   3141    Word i, n = VG_(sizeXA)( cuOffs );
   3142    for (i = 0; i < n; i++) {
   3143       Bool   b = False;
   3144       UWord* p = VG_(indexXA)( cuOffs, i );
   3145       *p = chase_cuOff( &b, ents, ents_cache, *p );
   3146       if (b)
   3147          b2 = True;
   3148    }
   3149    *changed = b2;
   3150 }
   3151 
   3152 static Bool TyEnt__subst_R_fields ( XArray* /* of TyEnt */ ents,
   3153                                     TyEntIndexCache* ents_cache,
   3154                                     /*MOD*/TyEnt* te )
   3155 {
   3156    Bool b, changed = False;
   3157    switch (te->tag) {
   3158       case Te_EMPTY:
   3159          break;
   3160       case Te_INDIR:
   3161          te->Te.INDIR.indR
   3162             = chase_cuOff( &b, ents, ents_cache, te->Te.INDIR.indR );
   3163          if (b) changed = True;
   3164          break;
   3165       case Te_UNKNOWN:
   3166          break;
   3167       case Te_Atom:
   3168          break;
   3169       case Te_Field:
   3170          te->Te.Field.typeR
   3171             = chase_cuOff( &b, ents, ents_cache, te->Te.Field.typeR );
   3172          if (b) changed = True;
   3173          break;
   3174       case Te_Bound:
   3175          break;
   3176       case Te_TyBase:
   3177          break;
   3178       case Te_TyPtr:
   3179       case Te_TyRef:
   3180       case Te_TyPtrMbr:
   3181       case Te_TyRvalRef:
   3182          te->Te.TyPorR.typeR
   3183             = chase_cuOff( &b, ents, ents_cache, te->Te.TyPorR.typeR );
   3184          if (b) changed = True;
   3185          break;
   3186       case Te_TyTyDef:
   3187          te->Te.TyTyDef.typeR
   3188             = chase_cuOff( &b, ents, ents_cache, te->Te.TyTyDef.typeR );
   3189          if (b) changed = True;
   3190          break;
   3191       case Te_TyStOrUn:
   3192          chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyStOrUn.fieldRs );
   3193          if (b) changed = True;
   3194          break;
   3195       case Te_TyEnum:
   3196          chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyEnum.atomRs );
   3197          if (b) changed = True;
   3198          break;
   3199       case Te_TyArray:
   3200          te->Te.TyArray.typeR
   3201             = chase_cuOff( &b, ents, ents_cache, te->Te.TyArray.typeR );
   3202          if (b) changed = True;
   3203          chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyArray.boundRs );
   3204          if (b) changed = True;
   3205          break;
   3206       case Te_TyFn:
   3207          break;
   3208       case Te_TyQual:
   3209          te->Te.TyQual.typeR
   3210             = chase_cuOff( &b, ents, ents_cache, te->Te.TyQual.typeR );
   3211          if (b) changed = True;
   3212          break;
   3213       case Te_TyVoid:
   3214          break;
   3215       default:
   3216          ML_(pp_TyEnt)(te);
   3217          vg_assert(0);
   3218    }
   3219    return changed;
   3220 }
   3221 
   3222 /* Make a pass over 'ents'.  For each tyent, inspect the target of any
   3223    'R' or 'Rs' fields (those which refer to other tyents), and replace
   3224    any which point to INDIR nodes with the target of the indirection
   3225    (which should not itself be an indirection).  In summary, this
   3226    routine shorts out all references to indirection nodes. */
   3227 static
   3228 Word dedup_types_substitution_pass ( /*MOD*/XArray* /* of TyEnt */ ents,
   3229                                      TyEntIndexCache* ents_cache )
   3230 {
   3231    Word i, n, nChanged = 0;
   3232    Bool b;
   3233    n = VG_(sizeXA)( ents );
   3234    for (i = 0; i < n; i++) {
   3235       TyEnt* ent = VG_(indexXA)( ents, i );
   3236       vg_assert(ent->tag != Te_EMPTY);
   3237       /* We have to substitute everything, even indirections, so as to
   3238          ensure that chains of indirections don't build up. */
   3239       b = TyEnt__subst_R_fields( ents, ents_cache, ent );
   3240       if (b)
   3241          nChanged++;
   3242    }
   3243 
   3244    return nChanged;
   3245 }
   3246 
   3247 
   3248 /* Make a pass over 'ents', building a dictionary of TyEnts as we go.
   3249    Look up each new tyent in the dictionary in turn.  If it is already
   3250    in the dictionary, replace this tyent with an indirection to the
   3251    existing one, and delete any malloc'd stuff hanging off this one.
   3252    In summary, this routine commons up all tyents that are identical
   3253    as defined by TyEnt__cmp_by_all_except_cuOff. */
   3254 static
   3255 Word dedup_types_commoning_pass ( /*MOD*/XArray* /* of TyEnt */ ents )
   3256 {
   3257    Word    n, i, nDeleted;
   3258    WordFM* dict; /* TyEnt* -> void */
   3259    TyEnt*  ent;
   3260    UWord   keyW, valW;
   3261 
   3262    dict = VG_(newFM)(
   3263              ML_(dinfo_zalloc), "di.readdwarf3.dtcp.1",
   3264              ML_(dinfo_free),
   3265              (Word(*)(UWord,UWord)) ML_(TyEnt__cmp_by_all_except_cuOff)
   3266           );
   3267 
   3268    nDeleted = 0;
   3269    n = VG_(sizeXA)( ents );
   3270    for (i = 0; i < n; i++) {
   3271       ent = VG_(indexXA)( ents, i );
   3272       vg_assert(ent->tag != Te_EMPTY);
   3273 
   3274       /* Ignore indirections, although check that they are
   3275          not forming a cycle. */
   3276       if (ent->tag == Te_INDIR) {
   3277          vg_assert(ent->Te.INDIR.indR < ent->cuOff);
   3278          continue;
   3279       }
   3280 
   3281       keyW = valW = 0;
   3282       if (VG_(lookupFM)( dict, &keyW, &valW, (UWord)ent )) {
   3283          /* it's already in the dictionary. */
   3284          TyEnt* old = (TyEnt*)keyW;
   3285          vg_assert(valW == 0);
   3286          vg_assert(old != ent);
   3287          vg_assert(old->tag != Te_INDIR);
   3288          /* since we are traversing the array in increasing order of
   3289             cuOff: */
   3290          vg_assert(old->cuOff < ent->cuOff);
   3291          /* So anyway, dump this entry and replace it with an
   3292             indirection to the one in the dictionary.  Note that the
   3293             assertion above guarantees that we cannot create cycles of
   3294             indirections, since we are always creating an indirection
   3295             to a tyent with a cuOff lower than this one. */
   3296          ML_(TyEnt__make_EMPTY)( ent );
   3297          ent->tag = Te_INDIR;
   3298          ent->Te.INDIR.indR = old->cuOff;
   3299          nDeleted++;
   3300       } else {
   3301          /* not in dictionary; add it and keep going. */
   3302          VG_(addToFM)( dict, (UWord)ent, 0 );
   3303       }
   3304    }
   3305 
   3306    VG_(deleteFM)( dict, NULL, NULL );
   3307 
   3308    return nDeleted;
   3309 }
   3310 
   3311 
   3312 static
   3313 void dedup_types ( Bool td3,
   3314                    /*MOD*/XArray* /* of TyEnt */ ents,
   3315                    TyEntIndexCache* ents_cache )
   3316 {
   3317    Word m, n, i, nDel, nSubst, nThresh;
   3318    if (0) td3 = True;
   3319 
   3320    n = VG_(sizeXA)( ents );
   3321 
   3322    /* If a commoning pass and a substitution pass both make fewer than
   3323       this many changes, just stop.  It's pointless to burn up CPU
   3324       time trying to compress the last 1% or so out of the array. */
   3325    nThresh = n / 200;
   3326 
   3327    /* First we must sort .ents by its .cuOff fields, so we
   3328       can index into it. */
   3329    VG_(setCmpFnXA)( ents, (XACmpFn_t) ML_(TyEnt__cmp_by_cuOff_only) );
   3330    VG_(sortXA)( ents );
   3331 
   3332    /* Now repeatedly do commoning and substitution passes over
   3333       the array, until there are no more changes. */
   3334    do {
   3335       nDel   = dedup_types_commoning_pass ( ents );
   3336       nSubst = dedup_types_substitution_pass ( ents, ents_cache );
   3337       vg_assert(nDel >= 0 && nSubst >= 0);
   3338       TRACE_D3("   %ld deletions, %ld substitutions\n", nDel, nSubst);
   3339    } while (nDel > nThresh || nSubst > nThresh);
   3340 
   3341    /* Sanity check: all INDIR nodes should point at a non-INDIR thing.
   3342       In fact this should be true at the end of every loop iteration
   3343       above (a commoning pass followed by a substitution pass), but
   3344       checking it on every iteration is excessively expensive.  Note,
   3345       this loop also computes 'm' for the stats printing below it. */
   3346    m = 0;
   3347    n = VG_(sizeXA)( ents );
   3348    for (i = 0; i < n; i++) {
   3349       TyEnt *ent, *ind;
   3350       ent = VG_(indexXA)( ents, i );
   3351       if (ent->tag != Te_INDIR) continue;
   3352       m++;
   3353       ind = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
   3354                                          ent->Te.INDIR.indR );
   3355       vg_assert(ind);
   3356       vg_assert(ind->tag != Te_INDIR);
   3357    }
   3358 
   3359    TRACE_D3("Overall: %ld before, %ld after\n", n, n-m);
   3360 }
   3361 
   3362 
   3363 /*------------------------------------------------------------*/
   3364 /*---                                                      ---*/
   3365 /*--- Resolution of references to type DIEs                ---*/
   3366 /*---                                                      ---*/
   3367 /*------------------------------------------------------------*/
   3368 
   3369 /* Make a pass through the (temporary) variables array.  Examine the
   3370    type of each variable, check is it found, and chase any Te_INDIRs.
   3371    Postcondition is: each variable has a typeR field that refers to a
   3372    valid type in tyents, or a Te_UNKNOWN, and is certainly guaranteed
   3373    not to refer to a Te_INDIR.  (This is so that we can throw all the
   3374    Te_INDIRs away later). */
   3375 
   3376 __attribute__((noinline))
   3377 static void resolve_variable_types (
   3378                void (*barf)( const HChar* ) __attribute__((noreturn)),
   3379                /*R-O*/XArray* /* of TyEnt */ ents,
   3380                /*MOD*/TyEntIndexCache* ents_cache,
   3381                /*MOD*/XArray* /* of TempVar* */ vars
   3382             )
   3383 {
   3384    Word i, n;
   3385    n = VG_(sizeXA)( vars );
   3386    for (i = 0; i < n; i++) {
   3387       TempVar* var = *(TempVar**)VG_(indexXA)( vars, i );
   3388       /* This is the stated type of the variable.  But it might be
   3389          an indirection, so be careful. */
   3390       TyEnt* ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
   3391                                                 var->typeR );
   3392       if (ent && ent->tag == Te_INDIR) {
   3393          ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
   3394                                             ent->Te.INDIR.indR );
   3395          vg_assert(ent);
   3396          vg_assert(ent->tag != Te_INDIR);
   3397       }
   3398 
   3399       /* Deal first with "normal" cases */
   3400       if (ent && ML_(TyEnt__is_type)(ent)) {
   3401          var->typeR = ent->cuOff;
   3402          continue;
   3403       }
   3404 
   3405       /* If there's no ent, it probably we did not manage to read a
   3406          type at the cuOffset which is stated as being this variable's
   3407          type.  Maybe a deficiency in parse_type_DIE.  Complain. */
   3408       if (ent == NULL) {
   3409          VG_(printf)("\n: Invalid cuOff = 0x%05lx\n", var->typeR );
   3410          barf("resolve_variable_types: "
   3411               "cuOff does not refer to a known type");
   3412       }
   3413       vg_assert(ent);
   3414       /* If ent has any other tag, something bad happened, along the
   3415          lines of var->typeR not referring to a type at all. */
   3416       vg_assert(ent->tag == Te_UNKNOWN);
   3417       /* Just accept it; the type will be useless, but at least keep
   3418          going. */
   3419       var->typeR = ent->cuOff;
   3420    }
   3421 }
   3422 
   3423 
   3424 /*------------------------------------------------------------*/
   3425 /*---                                                      ---*/
   3426 /*--- Parsing of Compilation Units                         ---*/
   3427 /*---                                                      ---*/
   3428 /*------------------------------------------------------------*/
   3429 
   3430 static Int cmp_TempVar_by_dioff ( const void* v1, const void* v2 ) {
   3431    const TempVar* t1 = *(const TempVar *const *)v1;
   3432    const TempVar* t2 = *(const TempVar *const *)v2;
   3433    if (t1->dioff < t2->dioff) return -1;
   3434    if (t1->dioff > t2->dioff) return 1;
   3435    return 0;
   3436 }
   3437 
   3438 static void read_DIE (
   3439    /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree,
   3440    /*MOD*/XArray* /* of TyEnt */ tyents,
   3441    /*MOD*/XArray* /* of TempVar* */ tempvars,
   3442    /*MOD*/XArray* /* of GExpr* */ gexprs,
   3443    /*MOD*/D3TypeParser* typarser,
   3444    /*MOD*/D3VarParser* varparser,
   3445    Cursor* c, Bool td3, CUConst* cc, Int level
   3446 )
   3447 {
   3448    Cursor abbv;
   3449    ULong  atag, abbv_code;
   3450    UWord  posn;
   3451    UInt   has_children;
   3452    UWord  start_die_c_offset, start_abbv_c_offset;
   3453    UWord  after_die_c_offset, after_abbv_c_offset;
   3454 
   3455    /* --- Deal with this DIE --- */
   3456    posn      = cook_die( cc, get_position_of_Cursor( c ) );
   3457    abbv_code = get_ULEB128( c );
   3458    set_abbv_Cursor( &abbv, td3, cc, abbv_code );
   3459    atag      = get_ULEB128( &abbv );
   3460    TRACE_D3("\n");
   3461    TRACE_D3(" <%d><%lx>: Abbrev Number: %llu (%s)\n",
   3462             level, posn, abbv_code, ML_(pp_DW_TAG)( atag ) );
   3463 
   3464    if (atag == 0)
   3465       cc->barf("read_DIE: invalid zero tag on DIE");
   3466 
   3467    has_children = get_UChar( &abbv );
   3468    if (has_children != DW_children_no && has_children != DW_children_yes)
   3469       cc->barf("read_DIE: invalid has_children value");
   3470 
   3471    /* We're set up to look at the fields of this DIE.  Hand it off to
   3472       any parser(s) that want to see it.  Since they will in general
   3473       advance both the DIE and abbrev cursors, remember their current
   3474       settings so that we can then back up and do one final pass over
   3475       the DIE, to print out its contents. */
   3476 
   3477    start_die_c_offset  = get_position_of_Cursor( c );
   3478    start_abbv_c_offset = get_position_of_Cursor( &abbv );
   3479 
   3480    while (True) {
   3481       FormContents cts;
   3482       ULong at_name = get_ULEB128( &abbv );
   3483       ULong at_form = get_ULEB128( &abbv );
   3484       if (at_name == 0 && at_form == 0) break;
   3485       TRACE_D3("     %18s: ", ML_(pp_DW_AT)(at_name));
   3486       /* Get the form contents, but ignore them; the only purpose is
   3487          to print them, if td3 is True */
   3488       get_Form_contents( &cts, cc, c, td3, (DW_FORM)at_form );
   3489       TRACE_D3("\t");
   3490       TRACE_D3("\n");
   3491    }
   3492 
   3493    after_die_c_offset  = get_position_of_Cursor( c );
   3494    after_abbv_c_offset = get_position_of_Cursor( &abbv );
   3495 
   3496    set_position_of_Cursor( c,     start_die_c_offset );
   3497    set_position_of_Cursor( &abbv, start_abbv_c_offset );
   3498 
   3499    parse_type_DIE( tyents,
   3500                    typarser,
   3501                    (DW_TAG)atag,
   3502                    posn,
   3503                    level,
   3504                    c,     /* DIE cursor */
   3505                    &abbv, /* abbrev cursor */
   3506                    cc,
   3507                    td3 );
   3508 
   3509    set_position_of_Cursor( c,     start_die_c_offset );
   3510    set_position_of_Cursor( &abbv, start_abbv_c_offset );
   3511 
   3512    parse_var_DIE( rangestree,
   3513                   tempvars,
   3514                   gexprs,
   3515                   varparser,
   3516                   (DW_TAG)atag,
   3517                   posn,
   3518                   level,
   3519                   c,     /* DIE cursor */
   3520                   &abbv, /* abbrev cursor */
   3521                   cc,
   3522                   td3 );
   3523 
   3524    set_position_of_Cursor( c,     after_die_c_offset );
   3525    set_position_of_Cursor( &abbv, after_abbv_c_offset );
   3526 
   3527    /* --- Now recurse into its children, if any --- */
   3528    if (has_children == DW_children_yes) {
   3529       if (0) TRACE_D3("BEGIN children of level %d\n", level);
   3530       while (True) {
   3531          atag = peek_ULEB128( c );
   3532          if (atag == 0) break;
   3533          read_DIE( rangestree, tyents, tempvars, gexprs,
   3534                    typarser, varparser,
   3535                    c, td3, cc, level+1 );
   3536       }
   3537       /* Now we need to eat the terminating zero */
   3538       atag = get_ULEB128( c );
   3539       vg_assert(atag == 0);
   3540       if (0) TRACE_D3("END children of level %d\n", level);
   3541    }
   3542 
   3543 }
   3544 
   3545 
   3546 static
   3547 void new_dwarf3_reader_wrk (
   3548    struct _DebugInfo* di,
   3549    __attribute__((noreturn)) void (*barf)( const HChar* ),
   3550    DiSlice escn_debug_info,      DiSlice escn_debug_types,
   3551    DiSlice escn_debug_abbv,      DiSlice escn_debug_line,
   3552    DiSlice escn_debug_str,       DiSlice escn_debug_ranges,
   3553    DiSlice escn_debug_loc,       DiSlice escn_debug_info_alt,
   3554    DiSlice escn_debug_abbv_alt,  DiSlice escn_debug_line_alt,
   3555    DiSlice escn_debug_str_alt
   3556 )
   3557 {
   3558    XArray* /* of TyEnt */     tyents;
   3559    XArray* /* of TyEnt */     tyents_to_keep;
   3560    XArray* /* of GExpr* */    gexprs;
   3561    XArray* /* of TempVar* */  tempvars;
   3562    WordFM* /* of (XArray* of AddrRange, void) */ rangestree;
   3563    TyEntIndexCache* tyents_cache = NULL;
   3564    TyEntIndexCache* tyents_to_keep_cache = NULL;
   3565    TempVar *varp, *varp2;
   3566    GExpr* gexpr;
   3567    Cursor abbv; /* for showing .debug_abbrev */
   3568    Cursor info; /* primary cursor for parsing .debug_info */
   3569    Cursor ranges; /* for showing .debug_ranges */
   3570    D3TypeParser typarser;
   3571    D3VarParser varparser;
   3572    Addr  dr_base;
   3573    UWord dr_offset;
   3574    Word  i, j, n;
   3575    Bool td3 = di->trace_symtab;
   3576    XArray* /* of TempVar* */ dioff_lookup_tab;
   3577    Int pass;
   3578    VgHashTable signature_types;
   3579 #if 0
   3580    /* This doesn't work properly because it assumes all entries are
   3581       packed end to end, with no holes.  But that doesn't always
   3582       appear to be the case, so it loses sync.  And the D3 spec
   3583       doesn't appear to require a no-hole situation either. */
   3584    /* Display .debug_loc */
   3585    Addr  dl_base;
   3586    UWord dl_offset;
   3587    Cursor loc; /* for showing .debug_loc */
   3588    TRACE_SYMTAB("\n");
   3589    TRACE_SYMTAB("\n------ The contents of .debug_loc ------\n");
   3590    TRACE_SYMTAB("    Offset   Begin    End      Expression\n");
   3591    init_Cursor( &loc, debug_loc_img,
   3592                 debug_loc_sz, 0, barf,
   3593                 "Overrun whilst reading .debug_loc section(1)" );
   3594    dl_base = 0;
   3595    dl_offset = 0;
   3596    while (True) {
   3597       UWord  w1, w2;
   3598       UWord  len;
   3599       if (is_at_end_Cursor( &loc ))
   3600          break;
   3601 
   3602       /* Read a (host-)word pair.  This is something of a hack since
   3603          the word size to read is really dictated by the ELF file;
   3604          however, we assume we're reading a file with the same
   3605          word-sizeness as the host.  Reasonably enough. */
   3606       w1 = get_UWord( &loc );
   3607       w2 = get_UWord( &loc );
   3608 
   3609       if (w1 == 0 && w2 == 0) {
   3610          /* end of list.  reset 'base' */
   3611          TRACE_D3("    %08lx <End of list>\n", dl_offset);
   3612          dl_base = 0;
   3613          dl_offset = get_position_of_Cursor( &loc );
   3614          continue;
   3615       }
   3616 
   3617       if (w1 == -1UL) {
   3618          /* new value for 'base' */
   3619          TRACE_D3("    %08lx %16lx %08lx (base address)\n",
   3620                   dl_offset, w1, w2);
   3621          dl_base = w2;
   3622          continue;
   3623       }
   3624 
   3625       /* else a location expression follows */
   3626       TRACE_D3("    %08lx %08lx %08lx ",
   3627                dl_offset, w1 + dl_base, w2 + dl_base);
   3628       len = (UWord)get_UShort( &loc );
   3629       while (len > 0) {
   3630          UChar byte = get_UChar( &loc );
   3631          TRACE_D3("%02x", (UInt)byte);
   3632          len--;
   3633       }
   3634       TRACE_SYMTAB("\n");
   3635    }
   3636 #endif
   3637 
   3638    /* Display .debug_ranges */
   3639    TRACE_SYMTAB("\n");
   3640    TRACE_SYMTAB("\n------ The contents of .debug_ranges ------\n");
   3641    TRACE_SYMTAB("    Offset   Begin    End\n");
   3642    if (ML_(sli_is_valid)(escn_debug_ranges)) {
   3643       init_Cursor( &ranges, escn_debug_ranges, 0, barf,
   3644                    "Overrun whilst reading .debug_ranges section(1)" );
   3645       dr_base = 0;
   3646       dr_offset = 0;
   3647       while (True) {
   3648          UWord  w1, w2;
   3649 
   3650          if (is_at_end_Cursor( &ranges ))
   3651             break;
   3652 
   3653          /* Read a (host-)word pair.  This is something of a hack since
   3654             the word size to read is really dictated by the ELF file;
   3655             however, we assume we're reading a file with the same
   3656             word-sizeness as the host.  Reasonably enough. */
   3657          w1 = get_UWord( &ranges );
   3658          w2 = get_UWord( &ranges );
   3659 
   3660          if (w1 == 0 && w2 == 0) {
   3661             /* end of list.  reset 'base' */
   3662             TRACE_D3("    %08lx <End of list>\n", dr_offset);
   3663             dr_base = 0;
   3664             dr_offset = get_position_of_Cursor( &ranges );
   3665             continue;
   3666          }
   3667 
   3668          if (w1 == -1UL) {
   3669             /* new value for 'base' */
   3670             TRACE_D3("    %08lx %16lx %08lx (base address)\n",
   3671                      dr_offset, w1, w2);
   3672             dr_base = w2;
   3673             continue;
   3674          }
   3675 
   3676          /* else a range [w1+base, w2+base) is denoted */
   3677          TRACE_D3("    %08lx %08lx %08lx\n",
   3678                   dr_offset, w1 + dr_base, w2 + dr_base);
   3679       }
   3680    }
   3681 
   3682    /* Display .debug_abbrev */
   3683    TRACE_SYMTAB("\n");
   3684    TRACE_SYMTAB("\n------ The contents of .debug_abbrev ------\n");
   3685    if (ML_(sli_is_valid)(escn_debug_abbv)) {
   3686       init_Cursor( &abbv, escn_debug_abbv, 0, barf,
   3687                    "Overrun whilst reading .debug_abbrev section" );
   3688       while (True) {
   3689          if (is_at_end_Cursor( &abbv ))
   3690             break;
   3691          /* Read one abbreviation table */
   3692          TRACE_D3("  Number TAG\n");
   3693          while (True) {
   3694             ULong atag;
   3695             UInt  has_children;
   3696             ULong acode = get_ULEB128( &abbv );
   3697             if (acode == 0) break; /* end of the table */
   3698             atag = get_ULEB128( &abbv );
   3699             has_children = get_UChar( &abbv );
   3700             TRACE_D3("   %llu      %s    [%s]\n",
   3701                      acode, ML_(pp_DW_TAG)(atag),
   3702                             ML_(pp_DW_children)(has_children));
   3703             while (True) {
   3704                ULong at_name = get_ULEB128( &abbv );
   3705                ULong at_form = get_ULEB128( &abbv );
   3706                if (at_name == 0 && at_form == 0) break;
   3707                TRACE_D3("    %18s %s\n",
   3708                         ML_(pp_DW_AT)(at_name), ML_(pp_DW_FORM)(at_form));
   3709             }
   3710          }
   3711       }
   3712    }
   3713    TRACE_SYMTAB("\n");
   3714 
   3715    /* We'll park the harvested type information in here.  Also create
   3716       a fake "void" entry with offset D3_FAKEVOID_CUOFF, so we always
   3717       have at least one type entry to refer to.  D3_FAKEVOID_CUOFF is
   3718       huge and presumably will not occur in any valid DWARF3 file --
   3719       it would need to have a .debug_info section 4GB long for that to
   3720       happen.  These type entries end up in the DebugInfo. */
   3721    tyents = VG_(newXA)( ML_(dinfo_zalloc),
   3722                         "di.readdwarf3.ndrw.1 (TyEnt temp array)",
   3723                         ML_(dinfo_free), sizeof(TyEnt) );
   3724    { TyEnt tyent;
   3725      VG_(memset)(&tyent, 0, sizeof(tyent));
   3726      tyent.tag   = Te_TyVoid;
   3727      tyent.cuOff = D3_FAKEVOID_CUOFF;
   3728      tyent.Te.TyVoid.isFake = True;
   3729      VG_(addToXA)( tyents, &tyent );
   3730    }
   3731    { TyEnt tyent;
   3732      VG_(memset)(&tyent, 0, sizeof(tyent));
   3733      tyent.tag   = Te_UNKNOWN;
   3734      tyent.cuOff = D3_INVALID_CUOFF;
   3735      VG_(addToXA)( tyents, &tyent );
   3736    }
   3737 
   3738    /* This is a tree used to unique-ify the range lists that are
   3739       manufactured by parse_var_DIE.  References to the keys in the
   3740       tree wind up in .rngMany fields in TempVars.  We'll need to
   3741       delete this tree, and the XArrays attached to it, at the end of
   3742       this function. */
   3743    rangestree = VG_(newFM)( ML_(dinfo_zalloc),
   3744                             "di.readdwarf3.ndrw.2 (rangestree)",
   3745                             ML_(dinfo_free),
   3746                             (Word(*)(UWord,UWord))cmp__XArrays_of_AddrRange );
   3747 
   3748    /* List of variables we're accumulating.  These don't end up in the
   3749       DebugInfo; instead their contents are handed to ML_(addVar) and
   3750       the list elements are then deleted. */
   3751    tempvars = VG_(newXA)( ML_(dinfo_zalloc),
   3752                           "di.readdwarf3.ndrw.3 (TempVar*s array)",
   3753                           ML_(dinfo_free),
   3754                           sizeof(TempVar*) );
   3755 
   3756    /* List of GExprs we're accumulating.  These wind up in the
   3757       DebugInfo. */
   3758    gexprs = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.4",
   3759                         ML_(dinfo_free), sizeof(GExpr*) );
   3760 
   3761    /* We need a D3TypeParser to keep track of partially constructed
   3762       types.  It'll be discarded as soon as we've completed the CU,
   3763       since the resulting information is tipped in to 'tyents' as it
   3764       is generated. */
   3765    VG_(memset)( &typarser, 0, sizeof(typarser) );
   3766    typarser.sp = -1;
   3767    typarser.language = '?';
   3768    for (i = 0; i < N_D3_TYPE_STACK; i++) {
   3769       typarser.qparentE[i].tag   = Te_EMPTY;
   3770       typarser.qparentE[i].cuOff = D3_INVALID_CUOFF;
   3771    }
   3772 
   3773    VG_(memset)( &varparser, 0, sizeof(varparser) );
   3774    varparser.sp = -1;
   3775 
   3776    signature_types = VG_(HT_construct) ("signature_types");
   3777 
   3778    /* Do an initial pass to scan the .debug_types section, if any, and
   3779       fill in the signatured types hash table.  This lets us handle
   3780       mapping from a type signature to a (cooked) DIE offset directly
   3781       in get_Form_contents.  */
   3782    if (ML_(sli_is_valid)(escn_debug_types)) {
   3783       init_Cursor( &info, escn_debug_types, 0, barf,
   3784                    "Overrun whilst reading .debug_types section" );
   3785       TRACE_D3("\n------ Collecting signatures from "
   3786                ".debug_types section ------\n");
   3787 
   3788       while (True) {
   3789          UWord   cu_start_offset, cu_offset_now;
   3790          CUConst cc;
   3791 
   3792          cu_start_offset = get_position_of_Cursor( &info );
   3793          TRACE_D3("\n");
   3794          TRACE_D3("  Compilation Unit @ offset 0x%lx:\n", cu_start_offset);
   3795          /* parse_CU_header initialises the CU's set_abbv_Cursor cache
   3796             (saC_cache) */
   3797          parse_CU_Header( &cc, td3, &info, escn_debug_abbv, True, False );
   3798 
   3799          /* Needed by cook_die.  */
   3800          cc.types_cuOff_bias = escn_debug_info.szB;
   3801 
   3802          record_signatured_type( signature_types, cc.type_signature,
   3803                                  cook_die( &cc, cc.type_offset ));
   3804 
   3805          /* Until proven otherwise we assume we don't need the icc9
   3806             workaround in this case; see the DIE-reading loop below
   3807             for details.  */
   3808          cu_offset_now = (cu_start_offset + cc.unit_length
   3809                           + (cc.is_dw64 ? 12 : 4));
   3810 
   3811          if (cu_offset_now >= escn_debug_types.szB)
   3812             break;
   3813 
   3814          set_position_of_Cursor ( &info, cu_offset_now );
   3815       }
   3816    }
   3817 
   3818    /* Perform three DIE-reading passes.  The first pass reads DIEs from
   3819       alternate .debug_info (if any), the second pass reads DIEs from
   3820       .debug_info, and the third pass reads DIEs from .debug_types.
   3821       Moving the body of this loop into a separate function would
   3822       require a large number of arguments to be passed in, so it is
   3823       kept inline instead.  */
   3824    for (pass = 0; pass < 3; ++pass) {
   3825       ULong section_size;
   3826 
   3827       if (pass == 0) {
   3828          if (!ML_(sli_is_valid)(escn_debug_info_alt))
   3829 	    continue;
   3830          /* Now loop over the Compilation Units listed in the alternate
   3831             .debug_info section (see D3SPEC sec 7.5) paras 1 and 2.
   3832             Each compilation unit contains a Compilation Unit Header
   3833             followed by precisely one DW_TAG_compile_unit or
   3834             DW_TAG_partial_unit DIE. */
   3835          init_Cursor( &info, escn_debug_info_alt, 0, barf,
   3836                       "Overrun whilst reading alternate .debug_info section" );
   3837          section_size = escn_debug_info_alt.szB;
   3838 
   3839          TRACE_D3("\n------ Parsing alternate .debug_info section ------\n");
   3840       } else if (pass == 1) {
   3841          /* Now loop over the Compilation Units listed in the .debug_info
   3842             section (see D3SPEC sec 7.5) paras 1 and 2.  Each compilation
   3843             unit contains a Compilation Unit Header followed by precisely
   3844             one DW_TAG_compile_unit or DW_TAG_partial_unit DIE. */
   3845          init_Cursor( &info, escn_debug_info, 0, barf,
   3846                       "Overrun whilst reading .debug_info section" );
   3847          section_size = escn_debug_info.szB;
   3848 
   3849          TRACE_D3("\n------ Parsing .debug_info section ------\n");
   3850       } else {
   3851          if (!ML_(sli_is_valid)(escn_debug_types))
   3852             continue;
   3853          init_Cursor( &info, escn_debug_types, 0, barf,
   3854                       "Overrun whilst reading .debug_types section" );
   3855          section_size = escn_debug_types.szB;
   3856 
   3857          TRACE_D3("\n------ Parsing .debug_types section ------\n");
   3858       }
   3859 
   3860       while (True) {
   3861          ULong   cu_start_offset, cu_offset_now;
   3862          CUConst cc;
   3863          /* It may be that the stated size of this CU is larger than the
   3864             amount of stuff actually in it.  icc9 seems to generate CUs
   3865             thusly.  We use these variables to figure out if this is
   3866             indeed the case, and if so how many bytes we need to skip to
   3867             get to the start of the next CU.  Not skipping those bytes
   3868             causes us to misidentify the start of the next CU, and it all
   3869             goes badly wrong after that (not surprisingly). */
   3870          UWord cu_size_including_IniLen, cu_amount_used;
   3871 
   3872          /* It seems icc9 finishes the DIE info before debug_info_sz
   3873             bytes have been used up.  So be flexible, and declare the
   3874             sequence complete if there is not enough remaining bytes to
   3875             hold even the smallest conceivable CU header.  (11 bytes I
   3876             reckon). */
   3877          /* JRS 23Jan09: I suspect this is no longer necessary now that
   3878             the code below contains a 'while (cu_amount_used <
   3879             cu_size_including_IniLen ...'  style loop, which skips over
   3880             any leftover bytes at the end of a CU in the case where the
   3881             CU's stated size is larger than its actual size (as
   3882             determined by reading all its DIEs).  However, for prudence,
   3883             I'll leave the following test in place.  I can't see that a
   3884             CU header can be smaller than 11 bytes, so I don't think
   3885             there's any harm possible through the test -- it just adds
   3886             robustness. */
   3887          Word avail = get_remaining_length_Cursor( &info );
   3888          if (avail < 11) {
   3889             if (avail > 0)
   3890                TRACE_D3("new_dwarf3_reader_wrk: warning: "
   3891                         "%ld unused bytes after end of DIEs\n", avail);
   3892             break;
   3893          }
   3894 
   3895          /* Check the varparser's stack is in a sane state. */
   3896          vg_assert(varparser.sp == -1);
   3897          for (i = 0; i < N_D3_VAR_STACK; i++) {
   3898             vg_assert(varparser.ranges[i] == NULL);
   3899             vg_assert(varparser.level[i] == 0);
   3900          }
   3901          for (i = 0; i < N_D3_TYPE_STACK; i++) {
   3902             vg_assert(typarser.qparentE[i].cuOff == D3_INVALID_CUOFF);
   3903             vg_assert(typarser.qparentE[i].tag   == Te_EMPTY);
   3904             vg_assert(typarser.qlevel[i] == 0);
   3905          }
   3906 
   3907          cu_start_offset = get_position_of_Cursor( &info );
   3908          TRACE_D3("\n");
   3909          TRACE_D3("  Compilation Unit @ offset 0x%llx:\n", cu_start_offset);
   3910          /* parse_CU_header initialises the CU's set_abbv_Cursor cache
   3911             (saC_cache) */
   3912          if (pass == 0) {
   3913             parse_CU_Header( &cc, td3, &info, escn_debug_abbv_alt,
   3914                              False, True );
   3915          } else {
   3916             parse_CU_Header( &cc, td3, &info, escn_debug_abbv,
   3917                              pass == 2, False );
   3918          }
   3919          cc.escn_debug_str      = pass == 0 ? escn_debug_str_alt
   3920                                             : escn_debug_str;
   3921          cc.escn_debug_ranges   = escn_debug_ranges;
   3922          cc.escn_debug_loc      = escn_debug_loc;
   3923          cc.escn_debug_line     = pass == 0 ? escn_debug_line_alt
   3924                                             : escn_debug_line;
   3925          cc.escn_debug_info     = pass == 0 ? escn_debug_info_alt
   3926                                             : escn_debug_info;
   3927          cc.escn_debug_types    = escn_debug_types;
   3928          cc.escn_debug_info_alt = escn_debug_info_alt;
   3929          cc.escn_debug_str_alt  = escn_debug_str_alt;
   3930          cc.types_cuOff_bias    = escn_debug_info.szB;
   3931          cc.alt_cuOff_bias      = escn_debug_info.szB + escn_debug_types.szB;
   3932          cc.cu_start_offset     = cu_start_offset;
   3933          cc.di = di;
   3934          /* The CU's svma can be deduced by looking at the AT_low_pc
   3935             value in the top level TAG_compile_unit, which is the topmost
   3936             DIE.  We'll leave it for the 'varparser' to acquire that info
   3937             and fill it in -- since it is the only party to want to know
   3938             it. */
   3939          cc.cu_svma_known = False;
   3940          cc.cu_svma       = 0;
   3941 
   3942          cc.signature_types = signature_types;
   3943 
   3944          /* Create a fake outermost-level range covering the entire
   3945             address range.  So we always have *something* to catch all
   3946             variable declarations. */
   3947          varstack_push( &cc, &varparser, td3,
   3948                         unitary_range_list(0UL, ~0UL),
   3949                         -1, False/*isFunc*/, NULL/*fbGX*/ );
   3950 
   3951          /* And set up the file name table.  When we come across the top
   3952             level DIE for this CU (which is what the next call to
   3953             read_DIE should process) we will copy all the file names out
   3954             of the .debug_line img area and use this table to look up the
   3955             copies when we later see filename numbers in DW_TAG_variables
   3956             etc. */
   3957          vg_assert(!varparser.filenameTable );
   3958          varparser.filenameTable
   3959             = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.5",
   3960                           ML_(dinfo_free),
   3961                           sizeof(UChar*) );
   3962          vg_assert(varparser.filenameTable);
   3963 
   3964          /* Now read the one-and-only top-level DIE for this CU. */
   3965          vg_assert(varparser.sp == 0);
   3966          read_DIE( rangestree,
   3967                    tyents, tempvars, gexprs,
   3968                    &typarser, &varparser,
   3969                    &info, td3, &cc, 0 );
   3970 
   3971          cu_offset_now = get_position_of_Cursor( &info );
   3972 
   3973          if (0) VG_(printf)("Travelled: %llu  size %llu\n",
   3974                             cu_offset_now - cc.cu_start_offset,
   3975                             cc.unit_length + (cc.is_dw64 ? 12 : 4));
   3976 
   3977          /* How big the CU claims it is .. */
   3978          cu_size_including_IniLen = cc.unit_length + (cc.is_dw64 ? 12 : 4);
   3979          /* .. vs how big we have found it to be */
   3980          cu_amount_used = cu_offset_now - cc.cu_start_offset;
   3981 
   3982          if (1) TRACE_D3("offset now %lld, d-i-size %lld\n",
   3983                          cu_offset_now, section_size);
   3984          if (cu_offset_now > section_size)
   3985             barf("toplevel DIEs beyond end of CU");
   3986 
   3987          /* If the CU is bigger than it claims to be, we've got a serious
   3988             problem. */
   3989          if (cu_amount_used > cu_size_including_IniLen)
   3990             barf("CU's actual size appears to be larger than it claims it is");
   3991 
   3992          /* If the CU is smaller than it claims to be, we need to skip some
   3993             bytes.  Loop updates cu_offset_new and cu_amount_used. */
   3994          while (cu_amount_used < cu_size_including_IniLen
   3995                 && get_remaining_length_Cursor( &info ) > 0) {
   3996             if (0) VG_(printf)("SKIP\n");
   3997             (void)get_UChar( &info );
   3998             cu_offset_now = get_position_of_Cursor( &info );
   3999             cu_amount_used = cu_offset_now - cc.cu_start_offset;
   4000          }
   4001 
   4002          /* Preen to level -2.  DIEs have level >= 0 so -2 cannot occur
   4003             anywhere else at all.  Our fake the-entire-address-space
   4004             range is at level -1, so preening to -2 should completely
   4005             empty the stack out. */
   4006          TRACE_D3("\n");
   4007          varstack_preen( &varparser, td3, -2 );
   4008          /* Similarly, empty the type stack out. */
   4009          typestack_preen( &typarser, td3, -2 );
   4010 
   4011          TRACE_D3("set_abbv_Cursor cache: %lu queries, %lu misses\n",
   4012                   cc.saC_cache_queries, cc.saC_cache_misses);
   4013 
   4014          vg_assert(varparser.filenameTable );
   4015          VG_(deleteXA)( varparser.filenameTable );
   4016          varparser.filenameTable = NULL;
   4017 
   4018          if (cu_offset_now == section_size)
   4019             break;
   4020          /* else keep going */
   4021       }
   4022    }
   4023 
   4024    /* From here on we're post-processing the stuff we got
   4025       out of the .debug_info section. */
   4026    if (td3) {
   4027       TRACE_D3("\n");
   4028       ML_(pp_TyEnts)(tyents, "Initial type entity (TyEnt) array");
   4029       TRACE_D3("\n");
   4030       TRACE_D3("------ Compressing type entries ------\n");
   4031    }
   4032 
   4033    tyents_cache = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.6",
   4034                                      sizeof(TyEntIndexCache) );
   4035    ML_(TyEntIndexCache__invalidate)( tyents_cache );
   4036    dedup_types( td3, tyents, tyents_cache );
   4037    if (td3) {
   4038       TRACE_D3("\n");
   4039       ML_(pp_TyEnts)(tyents, "After type entity (TyEnt) compression");
   4040    }
   4041 
   4042    TRACE_D3("\n");
   4043    TRACE_D3("------ Resolving the types of variables ------\n" );
   4044    resolve_variable_types( barf, tyents, tyents_cache, tempvars );
   4045 
   4046    /* Copy all the non-INDIR tyents into a new table.  For large
   4047       .so's, about 90% of the tyents will by now have been resolved to
   4048       INDIRs, and we no longer need them, and so don't need to store
   4049       them. */
   4050    tyents_to_keep
   4051       = VG_(newXA)( ML_(dinfo_zalloc),
   4052                     "di.readdwarf3.ndrw.7 (TyEnt to-keep array)",
   4053                     ML_(dinfo_free), sizeof(TyEnt) );
   4054    n = VG_(sizeXA)( tyents );
   4055    for (i = 0; i < n; i++) {
   4056       TyEnt* ent = VG_(indexXA)( tyents, i );
   4057       if (ent->tag != Te_INDIR)
   4058          VG_(addToXA)( tyents_to_keep, ent );
   4059    }
   4060 
   4061    VG_(deleteXA)( tyents );
   4062    tyents = NULL;
   4063    ML_(dinfo_free)( tyents_cache );
   4064    tyents_cache = NULL;
   4065 
   4066    /* Sort tyents_to_keep so we can lookup in it.  A complete (if
   4067       minor) waste of time, since tyents itself is sorted, but
   4068       necessary since VG_(lookupXA) refuses to cooperate if we
   4069       don't. */
   4070    VG_(setCmpFnXA)( tyents_to_keep, (XACmpFn_t) ML_(TyEnt__cmp_by_cuOff_only) );
   4071    VG_(sortXA)( tyents_to_keep );
   4072 
   4073    /* Enable cacheing on tyents_to_keep */
   4074    tyents_to_keep_cache
   4075       = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.8",
   4076                            sizeof(TyEntIndexCache) );
   4077    ML_(TyEntIndexCache__invalidate)( tyents_to_keep_cache );
   4078 
   4079    /* And record the tyents in the DebugInfo.  We do this before
   4080       starting to hand variables to ML_(addVar), since if ML_(addVar)
   4081       wants to do debug printing (of the types of said vars) then it
   4082       will need the tyents.*/
   4083    vg_assert(!di->admin_tyents);
   4084    di->admin_tyents = tyents_to_keep;
   4085 
   4086    /* Bias all the location expressions. */
   4087    TRACE_D3("\n");
   4088    TRACE_D3("------ Biasing the location expressions ------\n" );
   4089 
   4090    n = VG_(sizeXA)( gexprs );
   4091    for (i = 0; i < n; i++) {
   4092       gexpr = *(GExpr**)VG_(indexXA)( gexprs, i );
   4093       bias_GX( gexpr, di );
   4094    }
   4095 
   4096    TRACE_D3("\n");
   4097    TRACE_D3("------ Acquired the following variables: ------\n\n");
   4098 
   4099    /* Park (pointers to) all the vars in an XArray, so we can look up
   4100       abstract origins quickly.  The array is sorted (hence, looked-up
   4101       by) the .dioff fields.  Since the .dioffs should be in strictly
   4102       ascending order, there is no need to sort the array after
   4103       construction.  The ascendingness is however asserted for. */
   4104    dioff_lookup_tab
   4105       = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.9",
   4106                     ML_(dinfo_free),
   4107                     sizeof(TempVar*) );
   4108    vg_assert(dioff_lookup_tab);
   4109 
   4110    n = VG_(sizeXA)( tempvars );
   4111    Word first_primary_var = 0;
   4112    for (first_primary_var = 0;
   4113         escn_debug_info_alt.szB/*really?*/ && first_primary_var < n;
   4114         first_primary_var++) {
   4115       varp = *(TempVar**)VG_(indexXA)( tempvars, first_primary_var );
   4116       if (varp->dioff < escn_debug_info.szB + escn_debug_types.szB)
   4117          break;
   4118    }
   4119    for (i = 0; i < n; i++) {
   4120       varp = *(TempVar**)VG_(indexXA)( tempvars, (i + first_primary_var) % n );
   4121       if (i > first_primary_var) {
   4122          varp2 = *(TempVar**)VG_(indexXA)( tempvars,
   4123                                            (i + first_primary_var - 1) % n );
   4124          /* why should this hold?  Only, I think, because we've
   4125             constructed the array by reading .debug_info sequentially,
   4126             and so the array .dioff fields should reflect that, and be
   4127             strictly ascending. */
   4128          vg_assert(varp2->dioff < varp->dioff);
   4129       }
   4130       VG_(addToXA)( dioff_lookup_tab, &varp );
   4131    }
   4132    VG_(setCmpFnXA)( dioff_lookup_tab, cmp_TempVar_by_dioff );
   4133    VG_(sortXA)( dioff_lookup_tab ); /* POINTLESS; FIXME: rm */
   4134 
   4135    /* Now visit each var.  Collect up as much info as possible for
   4136       each var and hand it to ML_(addVar). */
   4137    n = VG_(sizeXA)( tempvars );
   4138    for (j = 0; j < n; j++) {
   4139       TyEnt* ent;
   4140       varp = *(TempVar**)VG_(indexXA)( tempvars, j );
   4141 
   4142       /* Possibly show .. */
   4143       if (td3) {
   4144          VG_(printf)("<%lx> addVar: level %d: %s :: ",
   4145                      varp->dioff,
   4146                      varp->level,
   4147                      varp->name ? varp->name : "<anon_var>" );
   4148          if (varp->typeR) {
   4149             ML_(pp_TyEnt_C_ishly)( tyents_to_keep, varp->typeR );
   4150          } else {
   4151             VG_(printf)("NULL");
   4152          }
   4153          VG_(printf)("\n  Loc=");
   4154          if (varp->gexpr) {
   4155             ML_(pp_GX)(varp->gexpr);
   4156          } else {
   4157             VG_(printf)("NULL");
   4158          }
   4159          VG_(printf)("\n");
   4160          if (varp->fbGX) {
   4161             VG_(printf)("  FrB=");
   4162             ML_(pp_GX)( varp->fbGX );
   4163             VG_(printf)("\n");
   4164          } else {
   4165             VG_(printf)("  FrB=none\n");
   4166          }
   4167          VG_(printf)("  declared at: %s:%d\n",
   4168                      varp->fName ? varp->fName : "NULL",
   4169                      varp->fLine );
   4170          if (varp->absOri != (UWord)D3_INVALID_CUOFF)
   4171             VG_(printf)("  abstract origin: <%lx>\n", varp->absOri);
   4172       }
   4173 
   4174       /* Skip variables which have no location.  These must be
   4175          abstract instances; they are useless as-is since with no
   4176          location they have no specified memory location.  They will
   4177          presumably be referred to via the absOri fields of other
   4178          variables. */
   4179       if (!varp->gexpr) {
   4180          TRACE_D3("  SKIP (no location)\n\n");
   4181          continue;
   4182       }
   4183 
   4184       /* So it has a location, at least.  If it refers to some other
   4185          entry through its absOri field, pull in further info through
   4186          that. */
   4187       if (varp->absOri != (UWord)D3_INVALID_CUOFF) {
   4188          Bool found;
   4189          Word ixFirst, ixLast;
   4190          TempVar key;
   4191          TempVar* keyp = &key;
   4192          TempVar *varAI;
   4193          VG_(memset)(&key, 0, sizeof(key)); /* not necessary */
   4194          key.dioff = varp->absOri; /* this is what we want to find */
   4195          found = VG_(lookupXA)( dioff_lookup_tab, &keyp,
   4196                                 &ixFirst, &ixLast );
   4197          if (!found) {
   4198             /* barf("DW_AT_abstract_origin can't be resolved"); */
   4199             TRACE_D3("  SKIP (DW_AT_abstract_origin can't be resolved)\n\n");
   4200             continue;
   4201          }
   4202          /* If the following fails, there is more than one entry with
   4203             the same dioff.  Which can't happen. */
   4204          vg_assert(ixFirst == ixLast);
   4205          varAI = *(TempVar**)VG_(indexXA)( dioff_lookup_tab, ixFirst );
   4206          /* stay sane */
   4207          vg_assert(varAI);
   4208          vg_assert(varAI->dioff == varp->absOri);
   4209 
   4210          /* Copy what useful info we can. */
   4211          if (varAI->typeR && !varp->typeR)
   4212             varp->typeR = varAI->typeR;
   4213          if (varAI->name && !varp->name)
   4214             varp->name = varAI->name;
   4215          if (varAI->fName && !varp->fName)
   4216             varp->fName = varAI->fName;
   4217          if (varAI->fLine > 0 && varp->fLine == 0)
   4218             varp->fLine = varAI->fLine;
   4219       }
   4220 
   4221       /* Give it a name if it doesn't have one. */
   4222       if (!varp->name)
   4223          varp->name = ML_(addStr)( di, "<anon_var>", -1 );
   4224 
   4225       /* So now does it have enough info to be useful? */
   4226       /* NOTE: re typeR: this is a hack.  If typeR is Te_UNKNOWN then
   4227          the type didn't get resolved.  Really, in that case
   4228          something's broken earlier on, and should be fixed, rather
   4229          than just skipping the variable. */
   4230       ent = ML_(TyEnts__index_by_cuOff)( tyents_to_keep,
   4231                                          tyents_to_keep_cache,
   4232                                          varp->typeR );
   4233       /* The next two assertions should be guaranteed by
   4234          our previous call to resolve_variable_types. */
   4235       vg_assert(ent);
   4236       vg_assert(ML_(TyEnt__is_type)(ent) || ent->tag == Te_UNKNOWN);
   4237 
   4238       if (ent->tag == Te_UNKNOWN) continue;
   4239 
   4240       vg_assert(varp->gexpr);
   4241       vg_assert(varp->name);
   4242       vg_assert(varp->typeR);
   4243       vg_assert(varp->level >= 0);
   4244 
   4245       /* Ok.  So we're going to keep it.  Call ML_(addVar) once for
   4246          each address range in which the variable exists. */
   4247       TRACE_D3("  ACQUIRE for range(s) ");
   4248       { AddrRange  oneRange;
   4249         AddrRange* varPcRanges;
   4250         Word       nVarPcRanges;
   4251         /* Set up to iterate over address ranges, however
   4252            represented. */
   4253         if (varp->nRanges == 0 || varp->nRanges == 1) {
   4254            vg_assert(!varp->rngMany);
   4255            if (varp->nRanges == 0) {
   4256               vg_assert(varp->rngOneMin == 0);
   4257               vg_assert(varp->rngOneMax == 0);
   4258            }
   4259            nVarPcRanges = varp->nRanges;
   4260            oneRange.aMin = varp->rngOneMin;
   4261            oneRange.aMax = varp->rngOneMax;
   4262            varPcRanges = &oneRange;
   4263         } else {
   4264            vg_assert(varp->rngMany);
   4265            vg_assert(varp->rngOneMin == 0);
   4266            vg_assert(varp->rngOneMax == 0);
   4267            nVarPcRanges = VG_(sizeXA)(varp->rngMany);
   4268            vg_assert(nVarPcRanges >= 2);
   4269            vg_assert(nVarPcRanges == (Word)varp->nRanges);
   4270            varPcRanges = VG_(indexXA)(varp->rngMany, 0);
   4271         }
   4272         if (varp->level == 0)
   4273            vg_assert( nVarPcRanges == 1 );
   4274         /* and iterate */
   4275         for (i = 0; i < nVarPcRanges; i++) {
   4276            Addr pcMin = varPcRanges[i].aMin;
   4277            Addr pcMax = varPcRanges[i].aMax;
   4278            vg_assert(pcMin <= pcMax);
   4279            /* Level 0 is the global address range.  So at level 0 we
   4280               don't want to bias pcMin/pcMax; but at all other levels
   4281               we do since those are derived from svmas in the Dwarf
   4282               we're reading.  Be paranoid ... */
   4283            if (varp->level == 0) {
   4284               vg_assert(pcMin == (Addr)0);
   4285               vg_assert(pcMax == ~(Addr)0);
   4286            } else {
   4287               /* vg_assert(pcMin > (Addr)0);
   4288                  No .. we can legitimately expect to see ranges like
   4289                  0x0-0x11D (pre-biasing, of course). */
   4290               vg_assert(pcMax < ~(Addr)0);
   4291            }
   4292 
   4293            /* Apply text biasing, for non-global variables. */
   4294            if (varp->level > 0) {
   4295               pcMin += di->text_debug_bias;
   4296               pcMax += di->text_debug_bias;
   4297            }
   4298 
   4299            if (i > 0 && (i%2) == 0)
   4300               TRACE_D3("\n                       ");
   4301            TRACE_D3("[%#lx,%#lx] ", pcMin, pcMax );
   4302 
   4303            ML_(addVar)(
   4304               di, varp->level,
   4305                   pcMin, pcMax,
   4306                   varp->name,  varp->typeR,
   4307                   varp->gexpr, varp->fbGX,
   4308                   varp->fName, varp->fLine, td3
   4309            );
   4310         }
   4311       }
   4312 
   4313       TRACE_D3("\n\n");
   4314       /* and move on to the next var */
   4315    }
   4316 
   4317    /* Now free all the TempVars */
   4318    n = VG_(sizeXA)( tempvars );
   4319    for (i = 0; i < n; i++) {
   4320       varp = *(TempVar**)VG_(indexXA)( tempvars, i );
   4321       ML_(dinfo_free)(varp);
   4322    }
   4323    VG_(deleteXA)( tempvars );
   4324    tempvars = NULL;
   4325 
   4326    /* and the temp lookup table */
   4327    VG_(deleteXA)( dioff_lookup_tab );
   4328 
   4329    /* and the ranges tree.  Note that we need to also free the XArrays
   4330       which constitute the keys, hence pass VG_(deleteXA) as a
   4331       key-finalizer. */
   4332    VG_(deleteFM)( rangestree, (void(*)(UWord))VG_(deleteXA), NULL );
   4333 
   4334    /* and the tyents_to_keep cache */
   4335    ML_(dinfo_free)( tyents_to_keep_cache );
   4336    tyents_to_keep_cache = NULL;
   4337 
   4338    vg_assert( varparser.filenameTable == NULL );
   4339 
   4340    /* And the signatured type hash.  */
   4341    VG_(HT_destruct) ( signature_types, ML_(dinfo_free) );
   4342 
   4343    /* record the GExprs in di so they can be freed later */
   4344    vg_assert(!di->admin_gexprs);
   4345    di->admin_gexprs = gexprs;
   4346 }
   4347 
   4348 
   4349 /*------------------------------------------------------------*/
   4350 /*---                                                      ---*/
   4351 /*--- The "new" DWARF3 reader -- top level control logic   ---*/
   4352 /*---                                                      ---*/
   4353 /*------------------------------------------------------------*/
   4354 
   4355 static Bool               d3rd_jmpbuf_valid  = False;
   4356 static const HChar*       d3rd_jmpbuf_reason = NULL;
   4357 static VG_MINIMAL_JMP_BUF(d3rd_jmpbuf);
   4358 
   4359 static __attribute__((noreturn)) void barf ( const HChar* reason ) {
   4360    vg_assert(d3rd_jmpbuf_valid);
   4361    d3rd_jmpbuf_reason = reason;
   4362    VG_MINIMAL_LONGJMP(d3rd_jmpbuf);
   4363    /*NOTREACHED*/
   4364    vg_assert(0);
   4365 }
   4366 
   4367 
   4368 void
   4369 ML_(new_dwarf3_reader) (
   4370    struct _DebugInfo* di,
   4371    DiSlice escn_debug_info,      DiSlice escn_debug_types,
   4372    DiSlice escn_debug_abbv,      DiSlice escn_debug_line,
   4373    DiSlice escn_debug_str,       DiSlice escn_debug_ranges,
   4374    DiSlice escn_debug_loc,       DiSlice escn_debug_info_alt,
   4375    DiSlice escn_debug_abbv_alt,  DiSlice escn_debug_line_alt,
   4376    DiSlice escn_debug_str_alt
   4377 )
   4378 {
   4379    volatile Int  jumped;
   4380    volatile Bool td3 = di->trace_symtab;
   4381 
   4382    /* Run the _wrk function to read the dwarf3.  If it succeeds, it
   4383       just returns normally.  If there is any failure, it longjmp's
   4384       back here, having first set d3rd_jmpbuf_reason to something
   4385       useful. */
   4386    vg_assert(d3rd_jmpbuf_valid  == False);
   4387    vg_assert(d3rd_jmpbuf_reason == NULL);
   4388 
   4389    d3rd_jmpbuf_valid = True;
   4390    jumped = VG_MINIMAL_SETJMP(d3rd_jmpbuf);
   4391    if (jumped == 0) {
   4392       /* try this ... */
   4393       new_dwarf3_reader_wrk( di, barf,
   4394                              escn_debug_info,     escn_debug_types,
   4395                              escn_debug_abbv,     escn_debug_line,
   4396                              escn_debug_str,      escn_debug_ranges,
   4397                              escn_debug_loc,      escn_debug_info_alt,
   4398                              escn_debug_abbv_alt, escn_debug_line_alt,
   4399                              escn_debug_str_alt );
   4400       d3rd_jmpbuf_valid = False;
   4401       TRACE_D3("\n------ .debug_info reading was successful ------\n");
   4402    } else {
   4403       /* It longjmp'd. */
   4404       d3rd_jmpbuf_valid = False;
   4405       /* Can't longjump without giving some sort of reason. */
   4406       vg_assert(d3rd_jmpbuf_reason != NULL);
   4407 
   4408       TRACE_D3("\n------ .debug_info reading failed ------\n");
   4409 
   4410       ML_(symerr)(di, True, d3rd_jmpbuf_reason);
   4411    }
   4412 
   4413    d3rd_jmpbuf_valid  = False;
   4414    d3rd_jmpbuf_reason = NULL;
   4415 }
   4416 
   4417 
   4418 
   4419 /* --- Unused code fragments which might be useful one day. --- */
   4420 
   4421 #if 0
   4422    /* Read the arange tables */
   4423    TRACE_SYMTAB("\n");
   4424    TRACE_SYMTAB("\n------ The contents of .debug_arange ------\n");
   4425    init_Cursor( &aranges, debug_aranges_img,
   4426                 debug_aranges_sz, 0, barf,
   4427                 "Overrun whilst reading .debug_aranges section" );
   4428    while (True) {
   4429       ULong  len, d_i_offset;
   4430       Bool   is64;
   4431       UShort version;
   4432       UChar  asize, segsize;
   4433 
   4434       if (is_at_end_Cursor( &aranges ))
   4435          break;
   4436       /* Read one arange thingy */
   4437       /* initial_length field */
   4438       len = get_Initial_Length( &is64, &aranges,
   4439                "in .debug_aranges: invalid initial-length field" );
   4440       version    = get_UShort( &aranges );
   4441       d_i_offset = get_Dwarfish_UWord( &aranges, is64 );
   4442       asize      = get_UChar( &aranges );
   4443       segsize    = get_UChar( &aranges );
   4444       TRACE_D3("  Length:                   %llu\n", len);
   4445       TRACE_D3("  Version:                  %d\n", (Int)version);
   4446       TRACE_D3("  Offset into .debug_info:  %llx\n", d_i_offset);
   4447       TRACE_D3("  Pointer Size:             %d\n", (Int)asize);
   4448       TRACE_D3("  Segment Size:             %d\n", (Int)segsize);
   4449       TRACE_D3("\n");
   4450       TRACE_D3("    Address            Length\n");
   4451 
   4452       while ((get_position_of_Cursor( &aranges ) % (2 * asize)) > 0) {
   4453          (void)get_UChar( & aranges );
   4454       }
   4455       while (True) {
   4456          ULong address = get_Dwarfish_UWord( &aranges, asize==8 );
   4457          ULong length = get_Dwarfish_UWord( &aranges, asize==8 );
   4458          TRACE_D3("    0x%016llx 0x%llx\n", address, length);
   4459          if (address == 0 && length == 0) break;
   4460       }
   4461    }
   4462    TRACE_SYMTAB("\n");
   4463 #endif
   4464 
   4465 #endif // defined(VGO_linux) || defined(VGO_darwin)
   4466 
   4467 /*--------------------------------------------------------------------*/
   4468 /*--- end                                                          ---*/
   4469 /*--------------------------------------------------------------------*/
   4470