Home | History | Annotate | Download | only in m_debuginfo
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- Read DWARF3/4 ".debug_info" sections (DIE trees).            ---*/
      4 /*---                                                 readdwarf3.c ---*/
      5 /*--------------------------------------------------------------------*/
      6 
      7 /*
      8    This file is part of Valgrind, a dynamic binary instrumentation
      9    framework.
     10 
     11    Copyright (C) 2008-2012 OpenWorks LLP
     12       info (at) open-works.co.uk
     13 
     14    This program is free software; you can redistribute it and/or
     15    modify it under the terms of the GNU General Public License as
     16    published by the Free Software Foundation; either version 2 of the
     17    License, or (at your option) any later version.
     18 
     19    This program is distributed in the hope that it will be useful, but
     20    WITHOUT ANY WARRANTY; without even the implied warranty of
     21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     22    General Public License for more details.
     23 
     24    You should have received a copy of the GNU General Public License
     25    along with this program; if not, write to the Free Software
     26    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     27    02111-1307, USA.
     28 
     29    The GNU General Public License is contained in the file COPYING.
     30 
     31    Neither the names of the U.S. Department of Energy nor the
     32    University of California nor the names of its contributors may be
     33    used to endorse or promote products derived from this software
     34    without prior written permission.
     35 */
     36 
     37 #if defined(VGO_linux) || defined(VGO_darwin)
     38 
     39 /* REFERENCE (without which this code will not make much sense):
     40 
     41    DWARF Debugging Information Format, Version 3,
     42    dated 20 December 2005 (the "D3 spec").
     43 
     44    Available at http://www.dwarfstd.org/Dwarf3.pdf.  There's also a
     45    .doc (MS Word) version, but for some reason the section numbers
     46    between the Word and PDF versions differ by 1 in the first digit.
     47    All section references in this code are to the PDF version.
     48 
     49    CURRENT HACKS:
     50 
     51    DW_TAG_{const,volatile}_type no DW_AT_type is allowed; it is
     52       assumed to mean "const void" or "volatile void" respectively.
     53       GDB appears to interpret them like this, anyway.
     54 
     55    In many cases it is important to know the svma of a CU (the "base
     56    address of the CU", as the D3 spec calls it).  There are some
     57    situations in which the spec implies this value is unknown, but the
     58    Dwarf3 produced by gcc-4.1 seems to assume is not unknown but
     59    merely zero when not explicitly stated.  So we too have to make
     60    that assumption.
     61 
     62    POTENTIAL BUG?  Spotted 6 Sept 08.  Why doesn't
     63    unitary_range_list() bias the resulting range list in the same way
     64    that its more general cousin, get_range_list(), does?  I don't
     65    know.
     66 
     67    TODO, 2008 Feb 17:
     68 
     69    get rid of cu_svma_known and document the assumed-zero svma hack.
     70 
     71    ML_(sizeOfType): differentiate between zero sized types and types
     72    for which the size is unknown.  Is this important?  I don't know.
     73 
     74    DW_TAG_array_types: deal with explicit sizes (currently we compute
     75    the size from the bounds and the element size, although that's
     76    fragile, if the bounds incompletely specified, or completely
     77    absent)
     78 
     79    Document reason for difference (by 1) of stack preening depth in
     80    parse_var_DIE vs parse_type_DIE.
     81 
     82    Don't hand to ML_(addVars), vars whose locations are entirely in
     83    registers (DW_OP_reg*).  This is merely a space-saving
     84    optimisation, as ML_(evaluate_Dwarf3_Expr) should handle these
     85    expressions correctly, by failing to evaluate them and hence
     86    effectively ignoring the variable with which they are associated.
     87 
     88    Deal with DW_TAG_array_types which have element size != stride
     89 
     90    In some cases, the info for a variable is split between two
     91    different DIEs (generally a declarer and a definer).  We punt on
     92    these.  Could do better here.
     93 
     94    The 'data_bias' argument passed to the expression evaluator
     95    (ML_(evaluate_Dwarf3_Expr)) should really be changed to a
     96    MaybeUWord, to make it clear when we do vs don't know what it is
     97    for the evaluation of an expression.  At the moment zero is passed
     98    for this parameter in the don't know case.  That's a bit fragile
     99    and obscure; using a MaybeUWord would be clearer.
    100 
    101    POTENTIAL PERFORMANCE IMPROVEMENTS:
    102 
    103    Currently, duplicate removal and all other queries for the type
    104    entities array is done using cuOffset-based pointing, which
    105    involves a binary search (VG_(lookupXA)) for each access.  This is
    106    wildly inefficient, although simple.  It would be better to
    107    translate all the cuOffset-based references (iow, all the "R" and
    108    "Rs" fields in the TyEnts in 'tyents') to direct index numbers in
    109    'tyents' right at the start of dedup_types(), and use direct
    110    indexing (VG_(indexXA)) wherever possible after that.
    111 
    112    cmp__XArrays_of_AddrRange is also a performance bottleneck.  Move
    113    VG_(indexXA) into pub_tool_xarray.h so it can be inlined at all use
    114    points, and possibly also make an _UNCHECKED version which skips
    115    the range checks in performance-critical situations such as this.
    116 
    117    Handle interaction between read_DIE and parse_{var,type}_DIE
    118    better.  Currently read_DIE reads the entire DIE just to find where
    119    the end is (and for debug printing), so that it can later reliably
    120    move the cursor to the end regardless of what parse_{var,type}_DIE
    121    do.  This means many DIEs (most, even?) are read twice.  It would
    122    be smarter to make parse_{var,type}_DIE return a Bool indicating
    123    whether or not they advanced the DIE cursor, and only if they
    124    didn't should read_DIE itself read through the DIE.
    125 
    126    ML_(addVar) and add_var_to_arange: quite a lot of DiAddrRanges have
    127    zero variables in their .vars XArray.  Rather than have an XArray
    128    with zero elements (which uses 2 malloc'd blocks), allow the .vars
    129    pointer to be NULL in this case.
    130 
    131    More generally, reduce the amount of memory allocated and freed
    132    while reading Dwarf3 type/variable information.  Even modest (20MB)
    133    objects cause this module to allocate and free hundreds of
    134    thousands of small blocks, and ML_(arena_malloc) and its various
    135    groupies always show up at the top of performance profiles. */
    136 
    137 #include "pub_core_basics.h"
    138 #include "pub_core_debuginfo.h"
    139 #include "pub_core_libcbase.h"
    140 #include "pub_core_libcassert.h"
    141 #include "pub_core_libcprint.h"
    142 #include "pub_core_libcsetjmp.h"   // setjmp facilities
    143 #include "pub_core_hashtable.h"
    144 #include "pub_core_options.h"
    145 #include "pub_core_tooliface.h"    /* VG_(needs) */
    146 #include "pub_core_xarray.h"
    147 #include "pub_core_wordfm.h"
    148 #include "priv_misc.h"             /* dinfo_zalloc/free */
    149 #include "priv_tytypes.h"
    150 #include "priv_d3basics.h"
    151 #include "priv_storage.h"
    152 #include "priv_readdwarf3.h"       /* self */
    153 
    154 
    155 /*------------------------------------------------------------*/
    156 /*---                                                      ---*/
    157 /*--- Basic machinery for parsing DIEs.                    ---*/
    158 /*---                                                      ---*/
    159 /*------------------------------------------------------------*/
    160 
    161 #define TRACE_D3(format, args...) \
    162    if (td3) { VG_(printf)(format, ## args); }
    163 
    164 #define D3_INVALID_CUOFF  ((UWord)(-1UL))
    165 #define D3_FAKEVOID_CUOFF ((UWord)(-2UL))
    166 
    167 typedef
    168    struct {
    169       UChar* region_start_img;
    170       UWord  region_szB;
    171       UWord  region_next;
    172       void (*barf)( HChar* ) __attribute__((noreturn));
    173       HChar* barfstr;
    174    }
    175    Cursor;
    176 
    177 static inline Bool is_sane_Cursor ( Cursor* c ) {
    178    if (!c)                return False;
    179    if (!c->barf)          return False;
    180    if (!c->barfstr)       return False;
    181    return True;
    182 }
    183 
    184 static void init_Cursor ( Cursor* c,
    185                           UChar*  region_start_img,
    186                           UWord   region_szB,
    187                           UWord   region_next,
    188                           __attribute__((noreturn)) void (*barf)( HChar* ),
    189                           HChar*  barfstr )
    190 {
    191    vg_assert(c);
    192    VG_(memset)(c, 0, sizeof(*c));
    193    c->region_start_img = region_start_img;
    194    c->region_szB       = region_szB;
    195    c->region_next      = region_next;
    196    c->barf             = barf;
    197    c->barfstr          = barfstr;
    198    vg_assert(is_sane_Cursor(c));
    199 }
    200 
    201 static Bool is_at_end_Cursor ( Cursor* c ) {
    202    vg_assert(is_sane_Cursor(c));
    203    return c->region_next >= c->region_szB;
    204 }
    205 
    206 static inline UWord get_position_of_Cursor ( Cursor* c ) {
    207    vg_assert(is_sane_Cursor(c));
    208    return c->region_next;
    209 }
    210 static inline void set_position_of_Cursor ( Cursor* c, UWord pos ) {
    211    c->region_next = pos;
    212    vg_assert(is_sane_Cursor(c));
    213 }
    214 
    215 static /*signed*/Word get_remaining_length_Cursor ( Cursor* c ) {
    216    vg_assert(is_sane_Cursor(c));
    217    return c->region_szB - c->region_next;
    218 }
    219 
    220 static UChar* get_address_of_Cursor ( Cursor* c ) {
    221    vg_assert(is_sane_Cursor(c));
    222    return &c->region_start_img[ c->region_next ];
    223 }
    224 
    225 /* FIXME: document assumptions on endianness for
    226    get_UShort/UInt/ULong. */
    227 static inline UChar get_UChar ( Cursor* c ) {
    228    UChar r;
    229    /* vg_assert(is_sane_Cursor(c)); */
    230    if (c->region_next + sizeof(UChar) > c->region_szB) {
    231       c->barf(c->barfstr);
    232       /*NOTREACHED*/
    233       vg_assert(0);
    234    }
    235    r = * (UChar*) &c->region_start_img[ c->region_next ];
    236    c->region_next += sizeof(UChar);
    237    return r;
    238 }
    239 static UShort get_UShort ( Cursor* c ) {
    240    UShort r;
    241    vg_assert(is_sane_Cursor(c));
    242    if (c->region_next + sizeof(UShort) > c->region_szB) {
    243       c->barf(c->barfstr);
    244       /*NOTREACHED*/
    245       vg_assert(0);
    246    }
    247    r = ML_(read_UShort)(&c->region_start_img[ c->region_next ]);
    248    c->region_next += sizeof(UShort);
    249    return r;
    250 }
    251 static UInt get_UInt ( Cursor* c ) {
    252    UInt r;
    253    vg_assert(is_sane_Cursor(c));
    254    if (c->region_next + sizeof(UInt) > c->region_szB) {
    255       c->barf(c->barfstr);
    256       /*NOTREACHED*/
    257       vg_assert(0);
    258    }
    259    r = ML_(read_UInt)(&c->region_start_img[ c->region_next ]);
    260    c->region_next += sizeof(UInt);
    261    return r;
    262 }
    263 static ULong get_ULong ( Cursor* c ) {
    264    ULong r;
    265    vg_assert(is_sane_Cursor(c));
    266    if (c->region_next + sizeof(ULong) > c->region_szB) {
    267       c->barf(c->barfstr);
    268       /*NOTREACHED*/
    269       vg_assert(0);
    270    }
    271    r = ML_(read_ULong)(&c->region_start_img[ c->region_next ]);
    272    c->region_next += sizeof(ULong);
    273    return r;
    274 }
    275 static inline ULong get_ULEB128 ( Cursor* c ) {
    276    ULong result;
    277    Int   shift;
    278    UChar byte;
    279    /* unroll first iteration */
    280    byte = get_UChar( c );
    281    result = (ULong)(byte & 0x7f);
    282    if (LIKELY(!(byte & 0x80))) return result;
    283    shift = 7;
    284    /* end unroll first iteration */
    285    do {
    286       byte = get_UChar( c );
    287       result |= ((ULong)(byte & 0x7f)) << shift;
    288       shift += 7;
    289    } while (byte & 0x80);
    290    return result;
    291 }
    292 static Long get_SLEB128 ( Cursor* c ) {
    293    ULong  result = 0;
    294    Int    shift = 0;
    295    UChar  byte;
    296    do {
    297       byte = get_UChar(c);
    298       result |= ((ULong)(byte & 0x7f)) << shift;
    299       shift += 7;
    300    } while (byte & 0x80);
    301    if (shift < 64 && (byte & 0x40))
    302       result |= -(1ULL << shift);
    303    return result;
    304 }
    305 
    306 /* Assume 'c' points to the start of a string.  Return the absolute
    307    address of whatever it points at, and advance it past the
    308    terminating zero.  This makes it safe for the caller to then copy
    309    the string with ML_(addStr), since (w.r.t. image overruns) the
    310    process of advancing past the terminating zero will already have
    311    "vetted" the string. */
    312 static UChar* get_AsciiZ ( Cursor* c ) {
    313    UChar  uc;
    314    UChar* res = get_address_of_Cursor(c);
    315    do { uc = get_UChar(c); } while (uc != 0);
    316    return res;
    317 }
    318 
    319 static ULong peek_ULEB128 ( Cursor* c ) {
    320    Word here = c->region_next;
    321    ULong r = get_ULEB128( c );
    322    c->region_next = here;
    323    return r;
    324 }
    325 static UChar peek_UChar ( Cursor* c ) {
    326    Word here = c->region_next;
    327    UChar r = get_UChar( c );
    328    c->region_next = here;
    329    return r;
    330 }
    331 
    332 static ULong get_Dwarfish_UWord ( Cursor* c, Bool is_dw64 ) {
    333    return is_dw64 ? get_ULong(c) : (ULong) get_UInt(c);
    334 }
    335 
    336 static UWord get_UWord ( Cursor* c ) {
    337    vg_assert(sizeof(UWord) == sizeof(void*));
    338    if (sizeof(UWord) == 4) return get_UInt(c);
    339    if (sizeof(UWord) == 8) return get_ULong(c);
    340    vg_assert(0);
    341 }
    342 
    343 /* Read a DWARF3 'Initial Length' field */
    344 static ULong get_Initial_Length ( /*OUT*/Bool* is64,
    345                                   Cursor* c,
    346                                   HChar* barfMsg )
    347 {
    348    ULong w64;
    349    UInt  w32;
    350    *is64 = False;
    351    w32 = get_UInt( c );
    352    if (w32 >= 0xFFFFFFF0 && w32 < 0xFFFFFFFF) {
    353       c->barf( barfMsg );
    354    }
    355    else if (w32 == 0xFFFFFFFF) {
    356       *is64 = True;
    357       w64   = get_ULong( c );
    358    } else {
    359       *is64 = False;
    360       w64 = (ULong)w32;
    361    }
    362    return w64;
    363 }
    364 
    365 
    366 /*------------------------------------------------------------*/
    367 /*---                                                      ---*/
    368 /*--- "CUConst" structure                                  ---*/
    369 /*---                                                      ---*/
    370 /*------------------------------------------------------------*/
    371 
    372 #define N_ABBV_CACHE 32
    373 
    374 /* Holds information that is constant through the parsing of a
    375    Compilation Unit.  This is basically plumbed through to
    376    everywhere. */
    377 typedef
    378    struct {
    379       /* Call here if anything goes wrong */
    380       void (*barf)( HChar* ) __attribute__((noreturn));
    381       /* Is this 64-bit DWARF ? */
    382       Bool   is_dw64;
    383       /* Which DWARF version ?  (2, 3 or 4) */
    384       UShort version;
    385       /* Length of this Compilation Unit, as stated in the
    386          .unit_length :: InitialLength field of the CU Header.
    387          However, this size (as specified by the D3 spec) does not
    388          include the size of the .unit_length field itself, which is
    389          either 4 or 12 bytes (32-bit or 64-bit Dwarf3).  That value
    390          can be obtained through the expression ".is_dw64 ? 12 : 4". */
    391       ULong  unit_length;
    392       /* Offset of start of this unit in .debug_info */
    393       UWord  cu_start_offset;
    394       /* SVMA for this CU.  In the D3 spec, is known as the "base
    395          address of the compilation unit (last para sec 3.1.1).
    396          Needed for (amongst things) interpretation of location-list
    397          values. */
    398       Addr   cu_svma;
    399       Bool   cu_svma_known;
    400       /* The debug_abbreviations table to be used for this Unit */
    401       UChar* debug_abbv;
    402       /* Upper bound on size thereof (an overestimate, in general) */
    403       UWord  debug_abbv_maxszB;
    404       /* Where is .debug_str ? */
    405       UChar* debug_str_img;
    406       UWord  debug_str_sz;
    407       /* Where is .debug_ranges ? */
    408       UChar* debug_ranges_img;
    409       UWord  debug_ranges_sz;
    410       /* Where is .debug_loc ? */
    411       UChar* debug_loc_img;
    412       UWord  debug_loc_sz;
    413       /* Where is .debug_line? */
    414       UChar* debug_line_img;
    415       UWord  debug_line_sz;
    416       /* Where is .debug_info? */
    417       UChar* debug_info_img;
    418       UWord  debug_info_sz;
    419       /* Where is .debug_types? */
    420       UChar* debug_types_img;
    421       UWord  debug_types_sz;
    422       /* Where is alternate .debug_info? */
    423       UChar* debug_info_alt_img;
    424       UWord  debug_info_alt_sz;
    425       /* Where is alternate .debug_str ? */
    426       UChar* debug_str_alt_img;
    427       UWord  debug_str_alt_sz;
    428       /* How much to add to .debug_types resp. alternate .debug_info offsets
    429          in cook_die*.  */
    430       UWord  types_cuOff_bias;
    431       UWord  alt_cuOff_bias;
    432       /* --- Needed so we can add stuff to the string table. --- */
    433       struct _DebugInfo* di;
    434       /* --- a cache for set_abbv_Cursor --- */
    435       /* abbv_code == (ULong)-1 for an unused entry. */
    436       struct { ULong abbv_code; UWord posn; } saC_cache[N_ABBV_CACHE];
    437       UWord saC_cache_queries;
    438       UWord saC_cache_misses;
    439 
    440       /* True if this came from .debug_types; otherwise it came from
    441          .debug_info.  */
    442       Bool is_type_unit;
    443       /* For a unit coming from .debug_types, these hold the TU's type
    444          signature and the uncooked DIE offset of the TU's signatured
    445          type.  For a unit coming from .debug_info, these are unused.  */
    446       ULong type_signature;
    447       ULong type_offset;
    448 
    449       /* Signatured type hash; computed once and then shared by all
    450          CUs.  */
    451       VgHashTable signature_types;
    452 
    453       /* True if this came from alternate .debug_info; otherwise
    454          it came from normal .debug_info or .debug_types.  */
    455       Bool is_alt_info;
    456    }
    457    CUConst;
    458 
    459 
    460 /* Return the cooked value of DIE depending on whether CC represents a
    461    .debug_types unit.  To cook a DIE, we pretend that the .debug_info,
    462    .debug_types and optional alternate .debug_info sections form
    463    a contiguous whole, so that DIEs coming from .debug_types are numbered
    464    starting at the end of .debug_info and DIEs coming from alternate
    465    .debug_info are numbered starting at the end of .debug_types.  */
    466 static UWord cook_die( CUConst* cc, UWord die )
    467 {
    468    if (cc->is_type_unit)
    469       die += cc->types_cuOff_bias;
    470    else if (cc->is_alt_info)
    471       die += cc->alt_cuOff_bias;
    472    return die;
    473 }
    474 
    475 /* Like cook_die, but understand that DIEs coming from a
    476    DW_FORM_ref_sig8 reference are already cooked.  Also, handle
    477    DW_FORM_GNU_ref_alt from within primary .debug_info or .debug_types
    478    as reference to alternate .debug_info.  */
    479 static UWord cook_die_using_form( CUConst *cc, UWord die, DW_FORM form)
    480 {
    481    if (form == DW_FORM_ref_sig8)
    482       return die;
    483    if (form == DW_FORM_GNU_ref_alt)
    484       return die + cc->alt_cuOff_bias;
    485    return cook_die( cc, die );
    486 }
    487 
    488 /* Return the uncooked offset of DIE and set *TYPE_FLAG to true if the DIE
    489    came from the .debug_types section and *ALT_FLAG to true if the DIE
    490    came from alternate .debug_info section.  */
    491 static UWord uncook_die( CUConst *cc, UWord die, /*OUT*/Bool *type_flag,
    492                          Bool *alt_flag )
    493 {
    494    *alt_flag = False;
    495    *type_flag = False;
    496    if (die >= cc->debug_info_sz) {
    497       if (die >= cc->debug_info_sz + cc->debug_types_sz) {
    498          *alt_flag = True;
    499          die -= cc->debug_info_sz + cc->debug_types_sz;
    500       } else {
    501          *type_flag = True;
    502          die -= cc->debug_info_sz;
    503       }
    504    }
    505    return die;
    506 }
    507 
    508 /*------------------------------------------------------------*/
    509 /*---                                                      ---*/
    510 /*--- Helper functions for Guarded Expressions             ---*/
    511 /*---                                                      ---*/
    512 /*------------------------------------------------------------*/
    513 
    514 /* Parse the location list starting at img-offset 'debug_loc_offset'
    515    in .debug_loc.  Results are biased with 'svma_of_referencing_CU'
    516    and so I believe are correct SVMAs for the object as a whole.  This
    517    function allocates the UChar*, and the caller must deallocate it.
    518    The resulting block is in so-called Guarded-Expression format.
    519 
    520    Guarded-Expression format is similar but not identical to the DWARF3
    521    location-list format.  The format of each returned block is:
    522 
    523       UChar biasMe;
    524       UChar isEnd;
    525       followed by zero or more of
    526 
    527       (Addr aMin;  Addr aMax;  UShort nbytes;  ..bytes..;  UChar isEnd)
    528 
    529    '..bytes..' is an standard DWARF3 location expression which is
    530    valid when aMin <= pc <= aMax (possibly after suitable biasing).
    531 
    532    The number of bytes in '..bytes..' is nbytes.
    533 
    534    The end of the sequence is marked by an isEnd == 1 value.  All
    535    previous isEnd values must be zero.
    536 
    537    biasMe is 1 if the aMin/aMax fields need this DebugInfo's
    538    text_bias added before use, and 0 if the GX is this is not
    539    necessary (is ready to go).
    540 
    541    Hence the block can be quickly parsed and is self-describing.  Note
    542    that aMax is 1 less than the corresponding value in a DWARF3
    543    location list.  Zero length ranges, with aMax == aMin-1, are not
    544    allowed.
    545 */
    546 /* 2008-sept-12: moved ML_(pp_GX) from here to d3basics.c, where
    547    it more logically belongs. */
    548 
    549 
    550 /* Apply a text bias to a GX. */
    551 static void bias_GX ( /*MOD*/GExpr* gx, struct _DebugInfo* di )
    552 {
    553    UShort nbytes;
    554    UChar* p = &gx->payload[0];
    555    UChar* pA;
    556    UChar  uc;
    557    uc = *p++; /*biasMe*/
    558    if (uc == 0)
    559       return;
    560    vg_assert(uc == 1);
    561    p[-1] = 0; /* mark it as done */
    562    while (True) {
    563       uc = *p++;
    564       if (uc == 1)
    565          break; /*isEnd*/
    566       vg_assert(uc == 0);
    567       /* t-bias aMin */
    568       pA = (UChar*)p;
    569       ML_(write_Addr)(pA, ML_(read_Addr)(pA) + di->text_debug_bias);
    570       p += sizeof(Addr);
    571       /* t-bias aMax */
    572       pA = (UChar*)p;
    573       ML_(write_Addr)(pA, ML_(read_Addr)(pA) + di->text_debug_bias);
    574       p += sizeof(Addr);
    575       /* nbytes, and actual expression */
    576       nbytes = ML_(read_UShort)(p); p += sizeof(UShort);
    577       p += nbytes;
    578    }
    579 }
    580 
    581 __attribute__((noinline))
    582 static GExpr* make_singleton_GX ( UChar* block, UWord nbytes )
    583 {
    584    SizeT  bytesReqd;
    585    GExpr* gx;
    586    UChar *p, *pstart;
    587 
    588    vg_assert(sizeof(UWord) == sizeof(Addr));
    589    vg_assert(nbytes <= 0xFFFF); /* else we overflow the nbytes field */
    590    bytesReqd
    591       =   sizeof(UChar)  /*biasMe*/    + sizeof(UChar) /*!isEnd*/
    592         + sizeof(UWord)  /*aMin*/      + sizeof(UWord) /*aMax*/
    593         + sizeof(UShort) /*nbytes*/    + nbytes
    594         + sizeof(UChar); /*isEnd*/
    595 
    596    gx = ML_(dinfo_zalloc)( "di.readdwarf3.msGX.1",
    597                            sizeof(GExpr) + bytesReqd );
    598    vg_assert(gx);
    599 
    600    p = pstart = &gx->payload[0];
    601 
    602    p = ML_(write_UChar)(p, 0);        /*biasMe*/
    603    p = ML_(write_UChar)(p, 0);        /*!isEnd*/
    604    p = ML_(write_Addr)(p, 0);         /*aMin*/
    605    p = ML_(write_Addr)(p, ~0);        /*aMax*/
    606    p = ML_(write_UShort)(p, nbytes);  /*nbytes*/
    607    VG_(memcpy)(p, block, nbytes); p += nbytes;
    608    p = ML_(write_UChar)(p, 1);        /*isEnd*/
    609 
    610    vg_assert( (SizeT)(p - pstart) == bytesReqd);
    611    vg_assert( &gx->payload[bytesReqd]
    612               == ((UChar*)gx) + sizeof(GExpr) + bytesReqd );
    613 
    614    return gx;
    615 }
    616 
    617 __attribute__((noinline))
    618 static GExpr* make_general_GX ( CUConst* cc,
    619                                 Bool     td3,
    620                                 UWord    debug_loc_offset,
    621                                 Addr     svma_of_referencing_CU )
    622 {
    623    Addr      base;
    624    Cursor    loc;
    625    XArray*   xa; /* XArray of UChar */
    626    GExpr*    gx;
    627    Word      nbytes;
    628 
    629    vg_assert(sizeof(UWord) == sizeof(Addr));
    630    if (cc->debug_loc_sz == 0)
    631       cc->barf("make_general_GX: .debug_loc is empty/missing");
    632 
    633    init_Cursor( &loc, cc->debug_loc_img,
    634                 cc->debug_loc_sz, 0, cc->barf,
    635                 "Overrun whilst reading .debug_loc section(2)" );
    636    set_position_of_Cursor( &loc, debug_loc_offset );
    637 
    638    TRACE_D3("make_general_GX (.debug_loc_offset = %lu, img = %p) {\n",
    639             debug_loc_offset, get_address_of_Cursor( &loc ) );
    640 
    641    /* Who frees this xa?  It is freed before this fn exits. */
    642    xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.mgGX.1",
    643                     ML_(dinfo_free),
    644                     sizeof(UChar) );
    645 
    646    { UChar c = 1; /*biasMe*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); }
    647 
    648    base = 0;
    649    while (True) {
    650       Bool  acquire;
    651       UWord len;
    652       /* Read a (host-)word pair.  This is something of a hack since
    653          the word size to read is really dictated by the ELF file;
    654          however, we assume we're reading a file with the same
    655          word-sizeness as the host.  Reasonably enough. */
    656       UWord w1 = get_UWord( &loc );
    657       UWord w2 = get_UWord( &loc );
    658 
    659       TRACE_D3("   %08lx %08lx\n", w1, w2);
    660       if (w1 == 0 && w2 == 0)
    661          break; /* end of list */
    662 
    663       if (w1 == -1UL) {
    664          /* new value for 'base' */
    665          base = w2;
    666          continue;
    667       }
    668 
    669       /* else a location expression follows */
    670       /* else enumerate [w1+base, w2+base) */
    671       /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
    672          (sec 2.17.2) */
    673       if (w1 > w2) {
    674          TRACE_D3("negative range is for .debug_loc expr at "
    675                   "file offset %lu\n",
    676                   debug_loc_offset);
    677          cc->barf( "negative range in .debug_loc section" );
    678       }
    679 
    680       /* ignore zero length ranges */
    681       acquire = w1 < w2;
    682       len     = (UWord)get_UShort( &loc );
    683 
    684       if (acquire) {
    685          UWord  w;
    686          UShort s;
    687          UChar  c;
    688          c = 0; /* !isEnd*/
    689          VG_(addBytesToXA)( xa, &c, sizeof(c) );
    690          w = w1    + base + svma_of_referencing_CU;
    691          VG_(addBytesToXA)( xa, &w, sizeof(w) );
    692          w = w2 -1 + base + svma_of_referencing_CU;
    693          VG_(addBytesToXA)( xa, &w, sizeof(w) );
    694          s = (UShort)len;
    695          VG_(addBytesToXA)( xa, &s, sizeof(s) );
    696       }
    697 
    698       while (len > 0) {
    699          UChar byte = get_UChar( &loc );
    700          TRACE_D3("%02x", (UInt)byte);
    701          if (acquire)
    702             VG_(addBytesToXA)( xa, &byte, 1 );
    703          len--;
    704       }
    705       TRACE_D3("\n");
    706    }
    707 
    708    { UChar c = 1; /*isEnd*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); }
    709 
    710    nbytes = VG_(sizeXA)( xa );
    711    vg_assert(nbytes >= 1);
    712 
    713    gx = ML_(dinfo_zalloc)( "di.readdwarf3.mgGX.2", sizeof(GExpr) + nbytes );
    714    vg_assert(gx);
    715    VG_(memcpy)( &gx->payload[0], (UChar*)VG_(indexXA)(xa,0), nbytes );
    716    vg_assert( &gx->payload[nbytes]
    717               == ((UChar*)gx) + sizeof(GExpr) + nbytes );
    718 
    719    VG_(deleteXA)( xa );
    720 
    721    TRACE_D3("}\n");
    722 
    723    return gx;
    724 }
    725 
    726 
    727 /*------------------------------------------------------------*/
    728 /*---                                                      ---*/
    729 /*--- Helper functions for range lists and CU headers      ---*/
    730 /*---                                                      ---*/
    731 /*------------------------------------------------------------*/
    732 
    733 /* Denotes an address range.  Both aMin and aMax are included in the
    734    range; hence a complete range is (0, ~0) and an empty range is any
    735    (X, X-1) for X > 0.*/
    736 typedef
    737    struct { Addr aMin; Addr aMax; }
    738    AddrRange;
    739 
    740 
    741 /* Generate an arbitrary structural total ordering on
    742    XArray* of AddrRange. */
    743 static Word cmp__XArrays_of_AddrRange ( XArray* rngs1, XArray* rngs2 )
    744 {
    745    Word n1, n2, i;
    746    tl_assert(rngs1 && rngs2);
    747    n1 = VG_(sizeXA)( rngs1 );
    748    n2 = VG_(sizeXA)( rngs2 );
    749    if (n1 < n2) return -1;
    750    if (n1 > n2) return 1;
    751    for (i = 0; i < n1; i++) {
    752       AddrRange* rng1 = (AddrRange*)VG_(indexXA)( rngs1, i );
    753       AddrRange* rng2 = (AddrRange*)VG_(indexXA)( rngs2, i );
    754       if (rng1->aMin < rng2->aMin) return -1;
    755       if (rng1->aMin > rng2->aMin) return 1;
    756       if (rng1->aMax < rng2->aMax) return -1;
    757       if (rng1->aMax > rng2->aMax) return 1;
    758    }
    759    return 0;
    760 }
    761 
    762 
    763 __attribute__((noinline))
    764 static XArray* /* of AddrRange */ empty_range_list ( void )
    765 {
    766    XArray* xa; /* XArray of AddrRange */
    767    /* Who frees this xa?  varstack_preen() does. */
    768    xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.erl.1",
    769                     ML_(dinfo_free),
    770                     sizeof(AddrRange) );
    771    return xa;
    772 }
    773 
    774 
    775 __attribute__((noinline))
    776 static XArray* unitary_range_list ( Addr aMin, Addr aMax )
    777 {
    778    XArray*   xa;
    779    AddrRange pair;
    780    vg_assert(aMin <= aMax);
    781    /* Who frees this xa?  varstack_preen() does. */
    782    xa = VG_(newXA)( ML_(dinfo_zalloc),  "di.readdwarf3.url.1",
    783                     ML_(dinfo_free),
    784                     sizeof(AddrRange) );
    785    pair.aMin = aMin;
    786    pair.aMax = aMax;
    787    VG_(addToXA)( xa, &pair );
    788    return xa;
    789 }
    790 
    791 
    792 /* Enumerate the address ranges starting at img-offset
    793    'debug_ranges_offset' in .debug_ranges.  Results are biased with
    794    'svma_of_referencing_CU' and so I believe are correct SVMAs for the
    795    object as a whole.  This function allocates the XArray, and the
    796    caller must deallocate it. */
    797 __attribute__((noinline))
    798 static XArray* /* of AddrRange */
    799        get_range_list ( CUConst* cc,
    800                         Bool     td3,
    801                         UWord    debug_ranges_offset,
    802                         Addr     svma_of_referencing_CU )
    803 {
    804    Addr      base;
    805    Cursor    ranges;
    806    XArray*   xa; /* XArray of AddrRange */
    807    AddrRange pair;
    808 
    809    if (cc->debug_ranges_sz == 0)
    810       cc->barf("get_range_list: .debug_ranges is empty/missing");
    811 
    812    init_Cursor( &ranges, cc->debug_ranges_img,
    813                 cc->debug_ranges_sz, 0, cc->barf,
    814                 "Overrun whilst reading .debug_ranges section(2)" );
    815    set_position_of_Cursor( &ranges, debug_ranges_offset );
    816 
    817    /* Who frees this xa?  varstack_preen() does. */
    818    xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.grl.1", ML_(dinfo_free),
    819                     sizeof(AddrRange) );
    820    base = 0;
    821    while (True) {
    822       /* Read a (host-)word pair.  This is something of a hack since
    823          the word size to read is really dictated by the ELF file;
    824          however, we assume we're reading a file with the same
    825          word-sizeness as the host.  Reasonably enough. */
    826       UWord w1 = get_UWord( &ranges );
    827       UWord w2 = get_UWord( &ranges );
    828 
    829       if (w1 == 0 && w2 == 0)
    830          break; /* end of list. */
    831 
    832       if (w1 == -1UL) {
    833          /* new value for 'base' */
    834          base = w2;
    835          continue;
    836       }
    837 
    838       /* else enumerate [w1+base, w2+base) */
    839       /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
    840          (sec 2.17.2) */
    841       if (w1 > w2)
    842          cc->barf( "negative range in .debug_ranges section" );
    843       if (w1 < w2) {
    844          pair.aMin = w1     + base + svma_of_referencing_CU;
    845          pair.aMax = w2 - 1 + base + svma_of_referencing_CU;
    846          vg_assert(pair.aMin <= pair.aMax);
    847          VG_(addToXA)( xa, &pair );
    848       }
    849    }
    850    return xa;
    851 }
    852 
    853 
    854 /* Parse the Compilation Unit header indicated at 'c' and
    855    initialise 'cc' accordingly. */
    856 static __attribute__((noinline))
    857 void parse_CU_Header ( /*OUT*/CUConst* cc,
    858                        Bool td3,
    859                        Cursor* c,
    860                        UChar* debug_abbv_img, UWord debug_abbv_sz,
    861 		       Bool type_unit,
    862                        Bool alt_info )
    863 {
    864    UChar  address_size;
    865    UWord  debug_abbrev_offset;
    866    Int    i;
    867 
    868    VG_(memset)(cc, 0, sizeof(*cc));
    869    vg_assert(c && c->barf);
    870    cc->barf = c->barf;
    871 
    872    /* initial_length field */
    873    cc->unit_length
    874       = get_Initial_Length( &cc->is_dw64, c,
    875            "parse_CU_Header: invalid initial-length field" );
    876 
    877    TRACE_D3("   Length:        %lld\n", cc->unit_length );
    878 
    879    /* version */
    880    cc->version = get_UShort( c );
    881    if (cc->version != 2 && cc->version != 3 && cc->version != 4)
    882       cc->barf( "parse_CU_Header: is neither DWARF2 nor DWARF3 nor DWARF4" );
    883    TRACE_D3("   Version:       %d\n", (Int)cc->version );
    884 
    885    /* debug_abbrev_offset */
    886    debug_abbrev_offset = get_Dwarfish_UWord( c, cc->is_dw64 );
    887    if (debug_abbrev_offset >= debug_abbv_sz)
    888       cc->barf( "parse_CU_Header: invalid debug_abbrev_offset" );
    889    TRACE_D3("   Abbrev Offset: %ld\n", debug_abbrev_offset );
    890 
    891    /* address size.  If this isn't equal to the host word size, just
    892       give up.  This makes it safe to assume elsewhere that
    893       DW_FORM_addr and DW_FORM_ref_addr can be treated as a host
    894       word. */
    895    address_size = get_UChar( c );
    896    if (address_size != sizeof(void*))
    897       cc->barf( "parse_CU_Header: invalid address_size" );
    898    TRACE_D3("   Pointer Size:  %d\n", (Int)address_size );
    899 
    900    cc->is_type_unit = type_unit;
    901    cc->is_alt_info = alt_info;
    902 
    903    if (type_unit) {
    904       cc->type_signature = get_ULong( c );
    905       cc->type_offset = get_Dwarfish_UWord( c, cc->is_dw64 );
    906    }
    907 
    908    /* Set up so that cc->debug_abbv points to the relevant table for
    909       this CU.  Set the szB so that at least we can't read off the end
    910       of the debug_abbrev section -- potentially (and quite likely)
    911       too big, if this isn't the last table in the section, but at
    912       least it's safe. */
    913    cc->debug_abbv        = debug_abbv_img + debug_abbrev_offset;
    914    cc->debug_abbv_maxszB = debug_abbv_sz  - debug_abbrev_offset;
    915    /* and empty out the set_abbv_Cursor cache */
    916    if (0) VG_(printf)("XXXXXX initialise set_abbv_Cursor cache\n");
    917    for (i = 0; i < N_ABBV_CACHE; i++) {
    918       cc->saC_cache[i].abbv_code = (ULong)-1; /* unused */
    919       cc->saC_cache[i].posn = 0;
    920    }
    921    cc->saC_cache_queries = 0;
    922    cc->saC_cache_misses = 0;
    923 }
    924 
    925 
    926 /* Set up 'c' so it is ready to parse the abbv table entry code
    927    'abbv_code' for this compilation unit.  */
    928 static __attribute__((noinline))
    929 void set_abbv_Cursor ( /*OUT*/Cursor* c, Bool td3,
    930                        CUConst* cc, ULong abbv_code )
    931 {
    932    Int   i;
    933    ULong acode;
    934 
    935    if (abbv_code == 0)
    936       cc->barf("set_abbv_Cursor: abbv_code == 0" );
    937 
    938    /* (ULong)-1 is used to represent an empty cache slot.  So we can't
    939       allow it.  In any case no valid DWARF3 should make a reference
    940       to a negative abbreviation code.  [at least, they always seem to
    941       be numbered upwards from zero as far as I have seen] */
    942    vg_assert(abbv_code != (ULong)-1);
    943 
    944    /* First search the cache. */
    945    if (0) VG_(printf)("XXXXXX search set_abbv_Cursor cache\n");
    946    cc->saC_cache_queries++;
    947    for (i = 0; i < N_ABBV_CACHE; i++) {
    948       /* No need to test the cached abbv_codes for -1 (empty), since
    949          we just asserted that abbv_code is not -1. */
    950      if (cc->saC_cache[i].abbv_code == abbv_code) {
    951         /* Found it.  Cool.  Set up the parser using the cached
    952            position, and move this cache entry 1 step closer to the
    953            front. */
    954         if (0) VG_(printf)("XXXXXX found in set_abbv_Cursor cache\n");
    955         init_Cursor( c, cc->debug_abbv,
    956                      cc->debug_abbv_maxszB, cc->saC_cache[i].posn,
    957                      cc->barf,
    958                      "Overrun whilst parsing .debug_abbrev section(1)" );
    959         if (i > 0) {
    960            ULong t_abbv_code = cc->saC_cache[i].abbv_code;
    961            UWord t_posn = cc->saC_cache[i].posn;
    962            while (i > 0) {
    963               cc->saC_cache[i] = cc->saC_cache[i-1];
    964               cc->saC_cache[0].abbv_code = t_abbv_code;
    965               cc->saC_cache[0].posn = t_posn;
    966               i--;
    967            }
    968         }
    969         return;
    970      }
    971    }
    972 
    973    /* No.  It's not in the cache.  We have to search through
    974       .debug_abbrev, of course taking care to update the cache
    975       when done. */
    976 
    977    cc->saC_cache_misses++;
    978    init_Cursor( c, cc->debug_abbv, cc->debug_abbv_maxszB, 0, cc->barf,
    979                "Overrun whilst parsing .debug_abbrev section(2)" );
    980 
    981    /* Now iterate though the table until we find the requested
    982       entry. */
    983    while (True) {
    984       //ULong atag;
    985       //UInt  has_children;
    986       acode = get_ULEB128( c );
    987       if (acode == 0) break; /* end of the table */
    988       if (acode == abbv_code) break; /* found it */
    989       /*atag         = */ get_ULEB128( c );
    990       /*has_children = */ get_UChar( c );
    991       //TRACE_D3("   %llu      %s    [%s]\n",
    992       //         acode, pp_DW_TAG(atag), pp_DW_children(has_children));
    993       while (True) {
    994          ULong at_name = get_ULEB128( c );
    995          ULong at_form = get_ULEB128( c );
    996          if (at_name == 0 && at_form == 0) break;
    997          //TRACE_D3("    %18s %s\n",
    998          //         pp_DW_AT(at_name), pp_DW_FORM(at_form));
    999       }
   1000    }
   1001 
   1002    if (acode == 0) {
   1003       /* Not found.  This is fatal. */
   1004       cc->barf("set_abbv_Cursor: abbv_code not found");
   1005    }
   1006 
   1007    /* Otherwise, 'c' is now set correctly to parse the relevant entry,
   1008       starting from the abbreviation entry's tag.  So just cache
   1009       the result, and return. */
   1010    for (i = N_ABBV_CACHE-1; i > N_ABBV_CACHE/2; i--) {
   1011       cc->saC_cache[i] = cc->saC_cache[i-1];
   1012    }
   1013    if (0) VG_(printf)("XXXXXX update set_abbv_Cursor cache\n");
   1014    cc->saC_cache[N_ABBV_CACHE/2].abbv_code = abbv_code;
   1015    cc->saC_cache[N_ABBV_CACHE/2].posn = get_position_of_Cursor(c);
   1016 }
   1017 
   1018 /* This represents a single signatured type.  It maps a type signature
   1019    (a ULong) to a cooked DIE offset.  Objects of this type are stored
   1020    in the type signature hash table.  */
   1021 typedef
   1022    struct D3SignatureType {
   1023       struct D3SignatureType *next;
   1024       UWord data;
   1025       ULong type_signature;
   1026       UWord die;
   1027    }
   1028    D3SignatureType;
   1029 
   1030 /* Record a signatured type in the hash table.  */
   1031 static void record_signatured_type ( VgHashTable tab,
   1032                                      ULong type_signature,
   1033                                      UWord die )
   1034 {
   1035    D3SignatureType *dstype = ML_(dinfo_zalloc) ( "di.readdwarf3.sigtype",
   1036                                                  sizeof(D3SignatureType) );
   1037    dstype->data = (UWord) type_signature;
   1038    dstype->type_signature = type_signature;
   1039    dstype->die = die;
   1040    VG_(HT_add_node) ( tab, dstype );
   1041 }
   1042 
   1043 /* Given a type signature hash table and a type signature, return the
   1044    cooked DIE offset of the type.  If the type cannot be found, call
   1045    BARF.  */
   1046 static UWord lookup_signatured_type ( VgHashTable tab,
   1047                                       ULong type_signature,
   1048                                       void (*barf)( HChar* ) __attribute__((noreturn)) )
   1049 {
   1050    D3SignatureType *dstype = VG_(HT_lookup) ( tab, (UWord) type_signature );
   1051    /* This may be unwarranted chumminess with the hash table
   1052       implementation.  */
   1053    while ( dstype != NULL && dstype->type_signature != type_signature)
   1054       dstype = dstype->next;
   1055    if (dstype == NULL) {
   1056       barf("lookup_signatured_type: could not find signatured type");
   1057       /*NOTREACHED*/
   1058       vg_assert(0);
   1059    }
   1060    return dstype->die;
   1061 }
   1062 
   1063 /* From 'c', get the Form data into the lowest 1/2/4/8 bytes of *cts.
   1064 
   1065    If *cts itself contains the entire result, then *ctsSzB is set to
   1066    1,2,4 or 8 accordingly and *ctsMemSzB is set to zero.
   1067 
   1068    Alternatively, the result can be a block of data (in the
   1069    transiently mapped-in object, so-called "image" space).  If so then
   1070    the lowest sizeof(void*)/8 bytes of *cts hold a pointer to said
   1071    image, *ctsSzB is zero, and *ctsMemSzB is the size of the block.
   1072 
   1073    Unfortunately this means it is impossible to represent a zero-size
   1074    image block since that would have *ctsSzB == 0 and *ctsMemSzB == 0
   1075    and so is ambiguous (which case it is?)
   1076 
   1077    Invariant on successful return:
   1078       (*ctsSzB > 0 && *ctsMemSzB == 0)
   1079       || (*ctsSzB == 0 && *ctsMemSzB > 0)
   1080 */
   1081 static
   1082 void get_Form_contents ( /*OUT*/ULong* cts,
   1083                          /*OUT*/Int*   ctsSzB,
   1084                          /*OUT*/UWord* ctsMemSzB,
   1085                          CUConst* cc, Cursor* c,
   1086                          Bool td3, DW_FORM form )
   1087 {
   1088    *cts       = 0;
   1089    *ctsSzB    = 0;
   1090    *ctsMemSzB = 0;
   1091    switch (form) {
   1092       case DW_FORM_data1:
   1093          *cts = (ULong)(UChar)get_UChar(c);
   1094          *ctsSzB = 1;
   1095          TRACE_D3("%u", (UInt)*cts);
   1096          break;
   1097       case DW_FORM_data2:
   1098          *cts = (ULong)(UShort)get_UShort(c);
   1099          *ctsSzB = 2;
   1100          TRACE_D3("%u", (UInt)*cts);
   1101          break;
   1102       case DW_FORM_data4:
   1103          *cts = (ULong)(UInt)get_UInt(c);
   1104          *ctsSzB = 4;
   1105          TRACE_D3("%u", (UInt)*cts);
   1106          break;
   1107       case DW_FORM_data8:
   1108          *cts = get_ULong(c);
   1109          *ctsSzB = 8;
   1110          TRACE_D3("%llu", *cts);
   1111          break;
   1112       case DW_FORM_sec_offset:
   1113          *cts = (ULong)get_Dwarfish_UWord( c, cc->is_dw64 );
   1114          *ctsSzB = cc->is_dw64 ? 8 : 4;
   1115          TRACE_D3("%llu", *cts);
   1116          break;
   1117       case DW_FORM_sdata:
   1118          *cts = (ULong)(Long)get_SLEB128(c);
   1119          *ctsSzB = 8;
   1120          TRACE_D3("%lld", (Long)*cts);
   1121          break;
   1122       case DW_FORM_udata:
   1123          *cts = (ULong)(Long)get_ULEB128(c);
   1124          *ctsSzB = 8;
   1125          TRACE_D3("%llu", (Long)*cts);
   1126          break;
   1127       case DW_FORM_addr:
   1128          /* note, this is a hack.  DW_FORM_addr is defined as getting
   1129             a word the size of the target machine as defined by the
   1130             address_size field in the CU Header.  However,
   1131             parse_CU_Header() rejects all inputs except those for
   1132             which address_size == sizeof(Word), hence we can just
   1133             treat it as a (host) Word.  */
   1134          *cts = (ULong)(UWord)get_UWord(c);
   1135          *ctsSzB = sizeof(UWord);
   1136          TRACE_D3("0x%lx", (UWord)*cts);
   1137          break;
   1138 
   1139       case DW_FORM_ref_addr:
   1140          /* We make the same word-size assumption as DW_FORM_addr. */
   1141          /* What does this really mean?  From D3 Sec 7.5.4,
   1142             description of "reference", it would appear to reference
   1143             some other DIE, by specifying the offset from the
   1144             beginning of a .debug_info section.  The D3 spec mentions
   1145             that this might be in some other shared object and
   1146             executable.  But I don't see how the name of the other
   1147             object/exe is specified.
   1148 
   1149             At least for the DW_FORM_ref_addrs created by icc11, the
   1150             references seem to be within the same object/executable.
   1151             So for the moment we merely range-check, to see that they
   1152             actually do specify a plausible offset within this
   1153             object's .debug_info, and return the value unchanged.
   1154 
   1155             In DWARF 2, DW_FORM_ref_addr is address-sized, but in
   1156             DWARF 3 and later, it is offset-sized.
   1157          */
   1158          if (cc->version == 2) {
   1159             *cts = (ULong)(UWord)get_UWord(c);
   1160             *ctsSzB = sizeof(UWord);
   1161          } else {
   1162             *cts = get_Dwarfish_UWord(c, cc->is_dw64);
   1163             *ctsSzB = cc->is_dw64 ? sizeof(ULong) : sizeof(UInt);
   1164          }
   1165          TRACE_D3("0x%lx", (UWord)*cts);
   1166          if (0) VG_(printf)("DW_FORM_ref_addr 0x%lx\n", (UWord)*cts);
   1167          if (/* the following 2 are surely impossible, but ... */
   1168              cc->debug_info_img == NULL || cc->debug_info_sz == 0
   1169              || *cts >= (ULong)cc->debug_info_sz) {
   1170             /* Hmm.  Offset is nonsensical for this object's .debug_info
   1171                section.  Be safe and reject it. */
   1172             cc->barf("get_Form_contents: DW_FORM_ref_addr points "
   1173                      "outside .debug_info");
   1174          }
   1175          break;
   1176 
   1177       case DW_FORM_strp: {
   1178          /* this is an offset into .debug_str */
   1179          UChar* str;
   1180          UWord uw = (UWord)get_Dwarfish_UWord( c, cc->is_dw64 );
   1181          if (cc->debug_str_img == NULL || uw >= cc->debug_str_sz)
   1182             cc->barf("get_Form_contents: DW_FORM_strp "
   1183                      "points outside .debug_str");
   1184          /* FIXME: check the entire string lies inside debug_str,
   1185             not just the first byte of it. */
   1186          str = (UChar*)cc->debug_str_img + uw;
   1187          TRACE_D3("(indirect string, offset: 0x%lx): %s", uw, str);
   1188          *cts = (ULong)(UWord)str;
   1189          *ctsMemSzB = 1 + (ULong)VG_(strlen)(str);
   1190          break;
   1191       }
   1192       case DW_FORM_string: {
   1193          UChar* str = get_AsciiZ(c);
   1194          TRACE_D3("%s", str);
   1195          *cts = (ULong)(UWord)str;
   1196          /* strlen is safe because get_AsciiZ already 'vetted' the
   1197             entire string */
   1198          *ctsMemSzB = 1 + (ULong)VG_(strlen)(str);
   1199          break;
   1200       }
   1201       case DW_FORM_ref1: {
   1202          UChar  u8 = get_UChar(c);
   1203          UWord res = cc->cu_start_offset + (UWord)u8;
   1204          *cts = (ULong)res;
   1205          *ctsSzB = sizeof(UWord);
   1206          TRACE_D3("<%lx>", res);
   1207          break;
   1208       }
   1209       case DW_FORM_ref2: {
   1210          UShort  u16 = get_UShort(c);
   1211          UWord res = cc->cu_start_offset + (UWord)u16;
   1212          *cts = (ULong)res;
   1213          *ctsSzB = sizeof(UWord);
   1214          TRACE_D3("<%lx>", res);
   1215          break;
   1216       }
   1217       case DW_FORM_ref4: {
   1218          UInt  u32 = get_UInt(c);
   1219          UWord res = cc->cu_start_offset + (UWord)u32;
   1220          *cts = (ULong)res;
   1221          *ctsSzB = sizeof(UWord);
   1222          TRACE_D3("<%lx>", res);
   1223          break;
   1224       }
   1225       case DW_FORM_ref8: {
   1226          ULong  u64 = get_ULong(c);
   1227          UWord res = cc->cu_start_offset + (UWord)u64;
   1228          *cts = (ULong)res;
   1229          *ctsSzB = sizeof(UWord);
   1230          TRACE_D3("<%lx>", res);
   1231          break;
   1232       }
   1233       case DW_FORM_ref_udata: {
   1234          ULong  u64 = get_ULEB128(c);
   1235          UWord res = cc->cu_start_offset + (UWord)u64;
   1236          *cts = (ULong)res;
   1237          *ctsSzB = sizeof(UWord);
   1238          TRACE_D3("<%lx>", res);
   1239          break;
   1240       }
   1241       case DW_FORM_flag: {
   1242          UChar u8 = get_UChar(c);
   1243          TRACE_D3("%u", (UInt)u8);
   1244          *cts = (ULong)u8;
   1245          *ctsSzB = 1;
   1246          break;
   1247       }
   1248       case DW_FORM_flag_present:
   1249          TRACE_D3("1");
   1250          *cts = 1;
   1251          *ctsSzB = 1;
   1252          break;
   1253       case DW_FORM_block1: {
   1254          ULong  u64b;
   1255          ULong  u64 = (ULong)get_UChar(c);
   1256          UChar* block = get_address_of_Cursor(c);
   1257          TRACE_D3("%llu byte block: ", u64);
   1258          for (u64b = u64; u64b > 0; u64b--) {
   1259             UChar u8 = get_UChar(c);
   1260             TRACE_D3("%x ", (UInt)u8);
   1261          }
   1262          *cts = (ULong)(UWord)block;
   1263          *ctsMemSzB = (UWord)u64;
   1264          break;
   1265       }
   1266       case DW_FORM_block2: {
   1267          ULong  u64b;
   1268          ULong  u64 = (ULong)get_UShort(c);
   1269          UChar* block = get_address_of_Cursor(c);
   1270          TRACE_D3("%llu byte block: ", u64);
   1271          for (u64b = u64; u64b > 0; u64b--) {
   1272             UChar u8 = get_UChar(c);
   1273             TRACE_D3("%x ", (UInt)u8);
   1274          }
   1275          *cts = (ULong)(UWord)block;
   1276          *ctsMemSzB = (UWord)u64;
   1277          break;
   1278       }
   1279       case DW_FORM_block4: {
   1280          ULong  u64b;
   1281          ULong  u64 = (ULong)get_UInt(c);
   1282          UChar* block = get_address_of_Cursor(c);
   1283          TRACE_D3("%llu byte block: ", u64);
   1284          for (u64b = u64; u64b > 0; u64b--) {
   1285             UChar u8 = get_UChar(c);
   1286             TRACE_D3("%x ", (UInt)u8);
   1287          }
   1288          *cts = (ULong)(UWord)block;
   1289          *ctsMemSzB = (UWord)u64;
   1290          break;
   1291       }
   1292       case DW_FORM_exprloc:
   1293       case DW_FORM_block: {
   1294          ULong  u64b;
   1295          ULong  u64 = (ULong)get_ULEB128(c);
   1296          UChar* block = get_address_of_Cursor(c);
   1297          TRACE_D3("%llu byte block: ", u64);
   1298          for (u64b = u64; u64b > 0; u64b--) {
   1299             UChar u8 = get_UChar(c);
   1300             TRACE_D3("%x ", (UInt)u8);
   1301          }
   1302          *cts = (ULong)(UWord)block;
   1303          *ctsMemSzB = (UWord)u64;
   1304          break;
   1305       }
   1306       case DW_FORM_ref_sig8: {
   1307          ULong  u64b;
   1308          ULong  signature = get_ULong (c);
   1309          ULong  work = signature;
   1310          TRACE_D3("8 byte signature: ");
   1311          for (u64b = 8; u64b > 0; u64b--) {
   1312             UChar u8 = work & 0xff;
   1313             TRACE_D3("%x ", (UInt)u8);
   1314             work >>= 8;
   1315          }
   1316          /* Due to the way that the hash table is constructed, the
   1317             resulting DIE offset here is already "cooked".  See
   1318             cook_die_using_form.  */
   1319          *cts = lookup_signatured_type (cc->signature_types, signature,
   1320                                         c->barf);
   1321          *ctsSzB = sizeof(UWord);
   1322          break;
   1323       }
   1324       case DW_FORM_indirect:
   1325          get_Form_contents (cts, ctsSzB, ctsMemSzB, cc, c, td3,
   1326                             (DW_FORM)get_ULEB128(c));
   1327          return;
   1328 
   1329       case DW_FORM_GNU_ref_alt:
   1330          *cts = get_Dwarfish_UWord(c, cc->is_dw64);
   1331          *ctsSzB = cc->is_dw64 ? sizeof(ULong) : sizeof(UInt);
   1332          TRACE_D3("0x%lx", (UWord)*cts);
   1333          if (0) VG_(printf)("DW_FORM_GNU_ref_alt 0x%lx\n", (UWord)*cts);
   1334          if (/* the following 2 are surely impossible, but ... */
   1335              cc->debug_info_alt_img == NULL || cc->debug_info_alt_sz == 0
   1336              || *cts >= (ULong)cc->debug_info_alt_sz) {
   1337             /* Hmm.  Offset is nonsensical for this object's .debug_info
   1338                section.  Be safe and reject it. */
   1339             cc->barf("get_Form_contents: DW_FORM_ref_addr points "
   1340                      "outside alternate .debug_info");
   1341          }
   1342          break;
   1343 
   1344       case DW_FORM_GNU_strp_alt: {
   1345          /* this is an offset into alternate .debug_str */
   1346          UChar* str;
   1347          UWord uw = (UWord)get_Dwarfish_UWord( c, cc->is_dw64 );
   1348          if (cc->debug_str_alt_img == NULL || uw >= cc->debug_str_alt_sz)
   1349             cc->barf("get_Form_contents: DW_FORM_GNU_strp_alt "
   1350                      "points outside alternate .debug_str");
   1351          /* FIXME: check the entire string lies inside debug_str,
   1352             not just the first byte of it. */
   1353          str = (UChar*)cc->debug_str_alt_img + uw;
   1354          TRACE_D3("(indirect alt string, offset: 0x%lx): %s", uw, str);
   1355          *cts = (ULong)(UWord)str;
   1356          *ctsMemSzB = 1 + (ULong)VG_(strlen)(str);
   1357          break;
   1358       }
   1359 
   1360       default:
   1361          VG_(printf)(
   1362             "get_Form_contents: unhandled %d (%s) at <%lx>\n",
   1363             form, ML_(pp_DW_FORM)(form), get_position_of_Cursor(c));
   1364          c->barf("get_Form_contents: unhandled DW_FORM");
   1365    }
   1366 }
   1367 
   1368 
   1369 /*------------------------------------------------------------*/
   1370 /*---                                                      ---*/
   1371 /*--- Parsing of variable-related DIEs                     ---*/
   1372 /*---                                                      ---*/
   1373 /*------------------------------------------------------------*/
   1374 
   1375 typedef
   1376    struct _TempVar {
   1377       UChar*  name; /* in DebugInfo's .strchunks */
   1378       /* Represent ranges economically.  nRanges is the number of
   1379          ranges.  Cases:
   1380          0: .rngOneMin .rngOneMax .manyRanges are all zero
   1381          1: .rngOneMin .rngOneMax hold the range; .rngMany is NULL
   1382          2: .rngOneMin .rngOneMax are zero; .rngMany holds the ranges.
   1383          This is merely an optimisation to avoid having to allocate
   1384          and free the XArray in the common (98%) of cases where there
   1385          is zero or one address ranges. */
   1386       UWord   nRanges;
   1387       Addr    rngOneMin;
   1388       Addr    rngOneMax;
   1389       XArray* rngMany; /* of AddrRange.  NON-UNIQUE PTR in AR_DINFO. */
   1390       /* Do not free .rngMany, since many TempVars will have the same
   1391          value.  Instead the associated storage is to be freed by
   1392          deleting 'rangetree', which stores a single copy of each
   1393          range. */
   1394       /* --- */
   1395       Int     level;
   1396       UWord   typeR; /* a cuOff */
   1397       GExpr*  gexpr; /* for this variable */
   1398       GExpr*  fbGX;  /* to find the frame base of the enclosing fn, if
   1399                         any */
   1400       UChar*  fName; /* declaring file name, or NULL */
   1401       Int     fLine; /* declaring file line number, or zero */
   1402       /* offset in .debug_info, so that abstract instances can be
   1403          found to satisfy references from concrete instances. */
   1404       UWord   dioff;
   1405       UWord   absOri; /* so the absOri fields refer to dioff fields
   1406                          in some other, related TempVar. */
   1407    }
   1408    TempVar;
   1409 
   1410 #define N_D3_VAR_STACK 48
   1411 
   1412 typedef
   1413    struct {
   1414       /* Contains the range stack: a stack of address ranges, one
   1415          stack entry for each nested scope.
   1416 
   1417          Some scope entries are created by function definitions
   1418          (DW_AT_subprogram), and for those, we also note the GExpr
   1419          derived from its DW_AT_frame_base attribute, if any.
   1420          Consequently it should be possible to find, for any
   1421          variable's DIE, the GExpr for the the containing function's
   1422          DW_AT_frame_base by scanning back through the stack to find
   1423          the nearest entry associated with a function.  This somewhat
   1424          elaborate scheme is provided so as to make it possible to
   1425          obtain the correct DW_AT_frame_base expression even in the
   1426          presence of nested functions (or to be more precise, in the
   1427          presence of nested DW_AT_subprogram DIEs).
   1428       */
   1429       Int     sp; /* [sp] is innermost active entry; sp==-1 for empty
   1430                      stack */
   1431       XArray* ranges[N_D3_VAR_STACK]; /* XArray of AddrRange */
   1432       Int     level[N_D3_VAR_STACK];  /* D3 DIE levels */
   1433       Bool    isFunc[N_D3_VAR_STACK]; /* from DW_AT_subprogram? */
   1434       GExpr*  fbGX[N_D3_VAR_STACK];   /* if isFunc, contains the FB
   1435                                          expr, else NULL */
   1436       /* The file name table.  Is a mapping from integer index to the
   1437          (permanent) copy of the string, iow a non-img area. */
   1438       XArray* /* of UChar* */ filenameTable;
   1439    }
   1440    D3VarParser;
   1441 
   1442 static void varstack_show ( D3VarParser* parser, HChar* str ) {
   1443    Word i, j;
   1444    VG_(printf)("  varstack (%s) {\n", str);
   1445    for (i = 0; i <= parser->sp; i++) {
   1446       XArray* xa = parser->ranges[i];
   1447       vg_assert(xa);
   1448       VG_(printf)("    [%ld] (level %d)", i, parser->level[i]);
   1449       if (parser->isFunc[i]) {
   1450          VG_(printf)(" (fbGX=%p)", parser->fbGX[i]);
   1451       } else {
   1452          vg_assert(parser->fbGX[i] == NULL);
   1453       }
   1454       VG_(printf)(": ");
   1455       if (VG_(sizeXA)( xa ) == 0) {
   1456          VG_(printf)("** empty PC range array **");
   1457       } else {
   1458          for (j = 0; j < VG_(sizeXA)( xa ); j++) {
   1459             AddrRange* range = (AddrRange*) VG_(indexXA)( xa, j );
   1460             vg_assert(range);
   1461             VG_(printf)("[%#lx,%#lx] ", range->aMin, range->aMax);
   1462          }
   1463       }
   1464       VG_(printf)("\n");
   1465    }
   1466    VG_(printf)("  }\n");
   1467 }
   1468 
   1469 /* Remove from the stack, all entries with .level > 'level' */
   1470 static
   1471 void varstack_preen ( D3VarParser* parser, Bool td3, Int level )
   1472 {
   1473    Bool changed = False;
   1474    vg_assert(parser->sp < N_D3_VAR_STACK);
   1475    while (True) {
   1476       vg_assert(parser->sp >= -1);
   1477       if (parser->sp == -1) break;
   1478       if (parser->level[parser->sp] <= level) break;
   1479       if (0)
   1480          TRACE_D3("BBBBAAAA varstack_pop [newsp=%d]\n", parser->sp-1);
   1481       vg_assert(parser->ranges[parser->sp]);
   1482       /* Who allocated this xa?  get_range_list() or
   1483          unitary_range_list(). */
   1484       VG_(deleteXA)( parser->ranges[parser->sp] );
   1485       parser->ranges[parser->sp] = NULL;
   1486       parser->level[parser->sp]  = 0;
   1487       parser->isFunc[parser->sp] = False;
   1488       parser->fbGX[parser->sp]   = NULL;
   1489       parser->sp--;
   1490       changed = True;
   1491    }
   1492    if (changed && td3)
   1493       varstack_show( parser, "after preen" );
   1494 }
   1495 
   1496 static void varstack_push ( CUConst* cc,
   1497                             D3VarParser* parser,
   1498                             Bool td3,
   1499                             XArray* ranges, Int level,
   1500                             Bool    isFunc, GExpr* fbGX ) {
   1501    if (0)
   1502    TRACE_D3("BBBBAAAA varstack_push[newsp=%d]: %d  %p\n",
   1503             parser->sp+1, level, ranges);
   1504 
   1505    /* First we need to zap everything >= 'level', as we are about to
   1506       replace any previous entry at 'level', so .. */
   1507    varstack_preen(parser, /*td3*/False, level-1);
   1508 
   1509    vg_assert(parser->sp >= -1);
   1510    vg_assert(parser->sp < N_D3_VAR_STACK);
   1511    if (parser->sp == N_D3_VAR_STACK-1)
   1512       cc->barf("varstack_push: N_D3_VAR_STACK is too low; "
   1513                "increase and recompile");
   1514    if (parser->sp >= 0)
   1515       vg_assert(parser->level[parser->sp] < level);
   1516    parser->sp++;
   1517    vg_assert(parser->ranges[parser->sp] == NULL);
   1518    vg_assert(parser->level[parser->sp]  == 0);
   1519    vg_assert(parser->isFunc[parser->sp] == False);
   1520    vg_assert(parser->fbGX[parser->sp]   == NULL);
   1521    vg_assert(ranges != NULL);
   1522    if (!isFunc) vg_assert(fbGX == NULL);
   1523    parser->ranges[parser->sp] = ranges;
   1524    parser->level[parser->sp]  = level;
   1525    parser->isFunc[parser->sp] = isFunc;
   1526    parser->fbGX[parser->sp]   = fbGX;
   1527    if (td3)
   1528       varstack_show( parser, "after push" );
   1529 }
   1530 
   1531 
   1532 /* cts, ctsSzB, ctsMemSzB are derived from a DW_AT_location and so
   1533    refer either to a location expression or to a location list.
   1534    Figure out which, and in both cases bundle the expression or
   1535    location list into a so-called GExpr (guarded expression). */
   1536 __attribute__((noinline))
   1537 static GExpr* get_GX ( CUConst* cc, Bool td3,
   1538                        ULong cts, Int ctsSzB, UWord ctsMemSzB )
   1539 {
   1540    GExpr* gexpr = NULL;
   1541    if (ctsMemSzB > 0 && ctsSzB == 0) {
   1542       /* represents an in-line location expression, and cts points
   1543          right at it */
   1544       gexpr = make_singleton_GX( (UChar*)(UWord)cts, ctsMemSzB );
   1545    }
   1546    else
   1547    if (ctsMemSzB == 0 && ctsSzB > 0) {
   1548       /* represents location list.  cts is the offset of it in
   1549          .debug_loc. */
   1550       if (!cc->cu_svma_known)
   1551          cc->barf("get_GX: location list, but CU svma is unknown");
   1552       gexpr = make_general_GX( cc, td3, (UWord)cts, cc->cu_svma );
   1553    }
   1554    else {
   1555       vg_assert(0); /* else caller is bogus */
   1556    }
   1557    return gexpr;
   1558 }
   1559 
   1560 
   1561 static
   1562 void read_filename_table( /*MOD*/D3VarParser* parser,
   1563                           CUConst* cc, UWord debug_line_offset,
   1564                           Bool td3 )
   1565 {
   1566    Bool   is_dw64;
   1567    Cursor c;
   1568    Word   i;
   1569    UShort version;
   1570    UChar  opcode_base;
   1571    UChar* str;
   1572 
   1573    vg_assert(parser && cc && cc->barf);
   1574    if ((!cc->debug_line_img)
   1575        || cc->debug_line_sz <= debug_line_offset)
   1576       cc->barf("read_filename_table: .debug_line is missing?");
   1577 
   1578    init_Cursor( &c, cc->debug_line_img,
   1579                 cc->debug_line_sz, debug_line_offset, cc->barf,
   1580                 "Overrun whilst reading .debug_line section(1)" );
   1581 
   1582    /* unit_length = */
   1583       get_Initial_Length( &is_dw64, &c,
   1584            "read_filename_table: invalid initial-length field" );
   1585    version = get_UShort( &c );
   1586    if (version != 2 && version != 3 && version != 4)
   1587      cc->barf("read_filename_table: Only DWARF version 2, 3 and 4 line info "
   1588               "is currently supported.");
   1589    /*header_length              = (ULong)*/ get_Dwarfish_UWord( &c, is_dw64 );
   1590    /*minimum_instruction_length = */ get_UChar( &c );
   1591    if (version >= 4)
   1592       /*maximum_operations_per_insn = */ get_UChar( &c );
   1593    /*default_is_stmt            = */ get_UChar( &c );
   1594    /*line_base                  = (Char)*/ get_UChar( &c );
   1595    /*line_range                 = */ get_UChar( &c );
   1596    opcode_base                = get_UChar( &c );
   1597    /* skip over "standard_opcode_lengths" */
   1598    for (i = 1; i < (Word)opcode_base; i++)
   1599      (void)get_UChar( &c );
   1600 
   1601    /* skip over the directory names table */
   1602    while (peek_UChar(&c) != 0) {
   1603      (void)get_AsciiZ(&c);
   1604    }
   1605    (void)get_UChar(&c); /* skip terminating zero */
   1606 
   1607    /* Read and record the file names table */
   1608    vg_assert(parser->filenameTable);
   1609    vg_assert( VG_(sizeXA)( parser->filenameTable ) == 0 );
   1610    /* Add a dummy index-zero entry.  DWARF3 numbers its files
   1611       from 1, for some reason. */
   1612    str = ML_(addStr)( cc->di, "<unknown_file>", -1 );
   1613    VG_(addToXA)( parser->filenameTable, &str );
   1614    while (peek_UChar(&c) != 0) {
   1615       str = get_AsciiZ(&c);
   1616       TRACE_D3("  read_filename_table: %ld %s\n",
   1617                VG_(sizeXA)(parser->filenameTable), str);
   1618       str = ML_(addStr)( cc->di, str, -1 );
   1619       VG_(addToXA)( parser->filenameTable, &str );
   1620       (void)get_ULEB128( &c ); /* skip directory index # */
   1621       (void)get_ULEB128( &c ); /* skip last mod time */
   1622       (void)get_ULEB128( &c ); /* file size */
   1623    }
   1624    /* We're done!  The rest of it is not interesting. */
   1625 }
   1626 
   1627 
   1628 __attribute__((noinline))
   1629 static void parse_var_DIE (
   1630    /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree,
   1631    /*MOD*/XArray* /* of TempVar* */ tempvars,
   1632    /*MOD*/XArray* /* of GExpr* */ gexprs,
   1633    /*MOD*/D3VarParser* parser,
   1634    DW_TAG dtag,
   1635    UWord posn,
   1636    Int level,
   1637    Cursor* c_die,
   1638    Cursor* c_abbv,
   1639    CUConst* cc,
   1640    Bool td3
   1641 )
   1642 {
   1643    ULong       cts;
   1644    Int         ctsSzB;
   1645    UWord       ctsMemSzB;
   1646 
   1647    UWord saved_die_c_offset  = get_position_of_Cursor( c_die );
   1648    UWord saved_abbv_c_offset = get_position_of_Cursor( c_abbv );
   1649    Bool  debug_types_flag;
   1650    Bool  alt_flag;
   1651 
   1652    varstack_preen( parser, td3, level-1 );
   1653 
   1654    if (dtag == DW_TAG_compile_unit
   1655        || dtag == DW_TAG_type_unit
   1656        || dtag == DW_TAG_partial_unit) {
   1657       Bool have_lo    = False;
   1658       Bool have_hi1   = False;
   1659       Bool hiIsRelative = False;
   1660       Bool have_range = False;
   1661       Addr ip_lo    = 0;
   1662       Addr ip_hi1   = 0;
   1663       Addr rangeoff = 0;
   1664       while (True) {
   1665          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   1666          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   1667          if (attr == 0 && form == 0) break;
   1668          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
   1669                             cc, c_die, False/*td3*/, form );
   1670          if (attr == DW_AT_low_pc && ctsSzB > 0) {
   1671             ip_lo   = cts;
   1672             have_lo = True;
   1673          }
   1674          if (attr == DW_AT_high_pc && ctsSzB > 0) {
   1675             ip_hi1   = cts;
   1676             have_hi1 = True;
   1677             if (form != DW_FORM_addr)
   1678                hiIsRelative = True;
   1679          }
   1680          if (attr == DW_AT_ranges && ctsSzB > 0) {
   1681             rangeoff = cts;
   1682             have_range = True;
   1683          }
   1684          if (attr == DW_AT_stmt_list && ctsSzB > 0) {
   1685             read_filename_table( parser, cc, (UWord)cts, td3 );
   1686          }
   1687       }
   1688       if (have_lo && have_hi1 && hiIsRelative)
   1689          ip_hi1 += ip_lo;
   1690       /* Now, does this give us an opportunity to find this
   1691          CU's svma? */
   1692 #if 0
   1693       if (level == 0 && have_lo) {
   1694          vg_assert(!cc->cu_svma_known); /* if this fails, it must be
   1695          because we've already seen a DW_TAG_compile_unit DIE at level
   1696          0.  But that can't happen, because DWARF3 only allows exactly
   1697          one top level DIE per CU. */
   1698          cc->cu_svma_known = True;
   1699          cc->cu_svma = ip_lo;
   1700          if (1)
   1701             TRACE_D3("BBBBAAAA acquire CU_SVMA of %p\n", cc->cu_svma);
   1702          /* Now, it may be that this DIE doesn't tell us the CU's
   1703             SVMA, by way of not having a DW_AT_low_pc.  That's OK --
   1704             the CU doesn't *have* to have its SVMA specified.
   1705 
   1706             But as per last para D3 spec sec 3.1.1 ("Normal and
   1707             Partial Compilation Unit Entries", "If the base address
   1708             (viz, the SVMA) is undefined, then any DWARF entry of
   1709             structure defined interms of the base address of that
   1710             compilation unit is not valid.".  So that means, if whilst
   1711             processing the children of this top level DIE (or their
   1712             children, etc) we see a DW_AT_range, and cu_svma_known is
   1713             False, then the DIE that contains it is (per the spec)
   1714             invalid, and we can legitimately stop and complain. */
   1715       }
   1716 #else
   1717       /* .. whereas The Reality is, simply assume the SVMA is zero
   1718          if it isn't specified. */
   1719       if (level == 0) {
   1720          vg_assert(!cc->cu_svma_known);
   1721          cc->cu_svma_known = True;
   1722          if (have_lo)
   1723             cc->cu_svma = ip_lo;
   1724          else
   1725             cc->cu_svma = 0;
   1726       }
   1727 #endif
   1728       /* Do we have something that looks sane? */
   1729       if (have_lo && have_hi1 && (!have_range)) {
   1730          if (ip_lo < ip_hi1)
   1731             varstack_push( cc, parser, td3,
   1732                            unitary_range_list(ip_lo, ip_hi1 - 1),
   1733                            level,
   1734                            False/*isFunc*/, NULL/*fbGX*/ );
   1735       } else
   1736       if ((!have_lo) && (!have_hi1) && have_range) {
   1737          varstack_push( cc, parser, td3,
   1738                         get_range_list( cc, td3,
   1739                                         rangeoff, cc->cu_svma ),
   1740                         level,
   1741                         False/*isFunc*/, NULL/*fbGX*/ );
   1742       } else
   1743       if ((!have_lo) && (!have_hi1) && (!have_range)) {
   1744          /* CU has no code, presumably? */
   1745          varstack_push( cc, parser, td3,
   1746                         empty_range_list(),
   1747                         level,
   1748                         False/*isFunc*/, NULL/*fbGX*/ );
   1749       } else
   1750       if (have_lo && (!have_hi1) && have_range && ip_lo == 0) {
   1751          /* broken DIE created by gcc-4.3.X ?  Ignore the
   1752             apparently-redundant DW_AT_low_pc and use the DW_AT_ranges
   1753             instead. */
   1754          varstack_push( cc, parser, td3,
   1755                         get_range_list( cc, td3,
   1756                                         rangeoff, cc->cu_svma ),
   1757                         level,
   1758                         False/*isFunc*/, NULL/*fbGX*/ );
   1759       } else {
   1760          if (0) VG_(printf)("I got hlo %d hhi1 %d hrange %d\n",
   1761                             (Int)have_lo, (Int)have_hi1, (Int)have_range);
   1762          goto bad_DIE;
   1763       }
   1764    }
   1765 
   1766    if (dtag == DW_TAG_lexical_block || dtag == DW_TAG_subprogram) {
   1767       Bool   have_lo    = False;
   1768       Bool   have_hi1   = False;
   1769       Bool   have_range = False;
   1770       Bool   hiIsRelative = False;
   1771       Addr   ip_lo      = 0;
   1772       Addr   ip_hi1     = 0;
   1773       Addr   rangeoff   = 0;
   1774       Bool   isFunc     = dtag == DW_TAG_subprogram;
   1775       GExpr* fbGX       = NULL;
   1776       while (True) {
   1777          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   1778          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   1779          if (attr == 0 && form == 0) break;
   1780          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
   1781                             cc, c_die, False/*td3*/, form );
   1782          if (attr == DW_AT_low_pc && ctsSzB > 0) {
   1783             ip_lo   = cts;
   1784             have_lo = True;
   1785          }
   1786          if (attr == DW_AT_high_pc && ctsSzB > 0) {
   1787             ip_hi1   = cts;
   1788             have_hi1 = True;
   1789             if (form != DW_FORM_addr)
   1790                hiIsRelative = True;
   1791          }
   1792          if (attr == DW_AT_ranges && ctsSzB > 0) {
   1793             rangeoff = cts;
   1794             have_range = True;
   1795          }
   1796          if (isFunc
   1797              && attr == DW_AT_frame_base
   1798              && ((ctsMemSzB > 0 && ctsSzB == 0)
   1799                  || (ctsMemSzB == 0 && ctsSzB > 0))) {
   1800             fbGX = get_GX( cc, False/*td3*/, cts, ctsSzB, ctsMemSzB );
   1801             vg_assert(fbGX);
   1802             VG_(addToXA)(gexprs, &fbGX);
   1803          }
   1804       }
   1805       if (have_lo && have_hi1 && hiIsRelative)
   1806          ip_hi1 += ip_lo;
   1807       /* Do we have something that looks sane? */
   1808       if (dtag == DW_TAG_subprogram
   1809           && (!have_lo) && (!have_hi1) && (!have_range)) {
   1810          /* This is legit - ignore it. Sec 3.3.3: "A subroutine entry
   1811             representing a subroutine declaration that is not also a
   1812             definition does not have code address or range
   1813             attributes." */
   1814       } else
   1815       if (dtag == DW_TAG_lexical_block
   1816           && (!have_lo) && (!have_hi1) && (!have_range)) {
   1817          /* I believe this is legit, and means the lexical block
   1818             contains no insns (whatever that might mean).  Ignore. */
   1819       } else
   1820       if (have_lo && have_hi1 && (!have_range)) {
   1821          /* This scope supplies just a single address range. */
   1822          if (ip_lo < ip_hi1)
   1823             varstack_push( cc, parser, td3,
   1824                            unitary_range_list(ip_lo, ip_hi1 - 1),
   1825                            level, isFunc, fbGX );
   1826       } else
   1827       if ((!have_lo) && (!have_hi1) && have_range) {
   1828          /* This scope supplies multiple address ranges via the use of
   1829             a range list. */
   1830          varstack_push( cc, parser, td3,
   1831                         get_range_list( cc, td3,
   1832                                         rangeoff, cc->cu_svma ),
   1833                         level, isFunc, fbGX );
   1834       } else
   1835       if (have_lo && (!have_hi1) && (!have_range)) {
   1836          /* This scope is bogus.  The D3 spec sec 3.4 (Lexical Block
   1837             Entries) says fairly clearly that a scope must have either
   1838             _range or (_low_pc and _high_pc). */
   1839          /* The spec is a bit ambiguous though.  Perhaps a single byte
   1840             range is intended?  See sec 2.17 (Code Addresses And Ranges) */
   1841          /* This case is here because icc9 produced this:
   1842          <2><13bd>: DW_TAG_lexical_block
   1843             DW_AT_decl_line   : 5229
   1844             DW_AT_decl_column : 37
   1845             DW_AT_decl_file   : 1
   1846             DW_AT_low_pc      : 0x401b03
   1847          */
   1848          /* Ignore (seems safe than pushing a single byte range) */
   1849       } else
   1850          goto bad_DIE;
   1851    }
   1852 
   1853    if (dtag == DW_TAG_variable || dtag == DW_TAG_formal_parameter) {
   1854       UChar* name        = NULL;
   1855       UWord  typeR       = D3_INVALID_CUOFF;
   1856       Bool   external    = False;
   1857       GExpr* gexpr       = NULL;
   1858       Int    n_attrs     = 0;
   1859       UWord  abs_ori     = (UWord)D3_INVALID_CUOFF;
   1860       Int    lineNo      = 0;
   1861       UChar* fileName    = NULL;
   1862       while (True) {
   1863          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   1864          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   1865          if (attr == 0 && form == 0) break;
   1866          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
   1867                             cc, c_die, False/*td3*/, form );
   1868          n_attrs++;
   1869          if (attr == DW_AT_name && ctsMemSzB > 0) {
   1870             name = ML_(addStr)( cc->di, (UChar*)(UWord)cts, -1 );
   1871          }
   1872          if (attr == DW_AT_location
   1873              && ((ctsMemSzB > 0 && ctsSzB == 0)
   1874                  || (ctsMemSzB == 0 && ctsSzB > 0))) {
   1875             gexpr = get_GX( cc, False/*td3*/, cts, ctsSzB, ctsMemSzB );
   1876             vg_assert(gexpr);
   1877             VG_(addToXA)(gexprs, &gexpr);
   1878          }
   1879          if (attr == DW_AT_type && ctsSzB > 0) {
   1880             typeR = cook_die_using_form( cc, (UWord)cts, form );
   1881          }
   1882          if (attr == DW_AT_external && ctsSzB > 0 && cts > 0) {
   1883             external = True;
   1884          }
   1885          if (attr == DW_AT_abstract_origin && ctsSzB > 0) {
   1886             abs_ori = (UWord)cts;
   1887          }
   1888          if (attr == DW_AT_declaration && ctsSzB > 0 && cts > 0) {
   1889             /*declaration = True;*/
   1890          }
   1891          if (attr == DW_AT_decl_line && ctsSzB > 0) {
   1892             lineNo = (Int)cts;
   1893          }
   1894          if (attr == DW_AT_decl_file && ctsSzB > 0) {
   1895             Int ftabIx = (Int)cts;
   1896             if (ftabIx >= 1
   1897                 && ftabIx < VG_(sizeXA)( parser->filenameTable )) {
   1898                fileName = *(UChar**)
   1899                           VG_(indexXA)( parser->filenameTable, ftabIx );
   1900                vg_assert(fileName);
   1901             }
   1902             if (0) VG_(printf)("XXX filename = %s\n", fileName);
   1903          }
   1904       }
   1905       /* We'll collect it under if one of the following three
   1906          conditions holds:
   1907          (1) has location and type    -> completed
   1908          (2) has type only            -> is an abstract instance
   1909          (3) has location and abs_ori -> is a concrete instance
   1910          Name, filename and line number are all optional frills.
   1911       */
   1912       if ( /* 1 */ (gexpr && typeR != D3_INVALID_CUOFF)
   1913            /* 2 */ || (typeR != D3_INVALID_CUOFF)
   1914            /* 3 */ || (gexpr && abs_ori != (UWord)D3_INVALID_CUOFF) ) {
   1915 
   1916          /* Add this variable to the list of interesting looking
   1917             variables.  Crucially, note along with it the address
   1918             range(s) associated with the variable, which for locals
   1919             will be the address ranges at the top of the varparser's
   1920             stack. */
   1921          GExpr*   fbGX = NULL;
   1922          Word     i, nRanges;
   1923          XArray*  /* of AddrRange */ xa;
   1924          TempVar* tv;
   1925          /* Stack can't be empty; we put a dummy entry on it for the
   1926             entire address range before starting with the DIEs for
   1927             this CU. */
   1928          vg_assert(parser->sp >= 0);
   1929 
   1930          /* If this is a local variable (non-external), try to find
   1931             the GExpr for the DW_AT_frame_base of the containing
   1932             function.  It should have been pushed on the stack at the
   1933             time we encountered its DW_TAG_subprogram DIE, so the way
   1934             to find it is to scan back down the stack looking for it.
   1935             If there isn't an enclosing stack entry marked 'isFunc'
   1936             then we must be seeing variable or formal param DIEs
   1937             outside of a function, so we deem the Dwarf to be
   1938             malformed if that happens.  Note that the fbGX may be NULL
   1939             if the containing DT_TAG_subprogram didn't supply a
   1940             DW_AT_frame_base -- that's OK, but there must actually be
   1941             a containing DW_TAG_subprogram. */
   1942          if (!external) {
   1943             Bool found = False;
   1944             for (i = parser->sp; i >= 0; i--) {
   1945                if (parser->isFunc[i]) {
   1946                   fbGX = parser->fbGX[i];
   1947                   found = True;
   1948                   break;
   1949                }
   1950             }
   1951             if (!found) {
   1952                if (0 && VG_(clo_verbosity) >= 0) {
   1953                   VG_(message)(Vg_DebugMsg,
   1954                      "warning: parse_var_DIE: non-external variable "
   1955                      "outside DW_TAG_subprogram\n");
   1956                }
   1957                /* goto bad_DIE; */
   1958                /* This seems to happen a lot.  Just ignore it -- if,
   1959                   when we come to evaluation of the location (guarded)
   1960                   expression, it requires a frame base value, and
   1961                   there's no expression for that, then evaluation as a
   1962                   whole will fail.  Harmless - a bit of a waste of
   1963                   cycles but nothing more. */
   1964             }
   1965          }
   1966 
   1967          /* re "external ? 0 : parser->sp" (twice), if the var is
   1968             marked 'external' then we must put it at the global scope,
   1969             as only the global scope (level 0) covers the entire PC
   1970             address space.  It is asserted elsewhere that level 0
   1971             always covers the entire address space. */
   1972          xa = parser->ranges[external ? 0 : parser->sp];
   1973          nRanges = VG_(sizeXA)(xa);
   1974          vg_assert(nRanges >= 0);
   1975 
   1976          tv = ML_(dinfo_zalloc)( "di.readdwarf3.pvD.1", sizeof(TempVar) );
   1977          tv->name   = name;
   1978          tv->level  = external ? 0 : parser->sp;
   1979          tv->typeR  = typeR;
   1980          tv->gexpr  = gexpr;
   1981          tv->fbGX   = fbGX;
   1982          tv->fName  = fileName;
   1983          tv->fLine  = lineNo;
   1984          tv->dioff  = posn;
   1985          tv->absOri = abs_ori;
   1986 
   1987          /* See explanation on definition of type TempVar for the
   1988             reason for this elaboration. */
   1989          tv->nRanges = nRanges;
   1990          tv->rngOneMin = 0;
   1991          tv->rngOneMax = 0;
   1992          tv->rngMany = NULL;
   1993          if (nRanges == 1) {
   1994             AddrRange* range = VG_(indexXA)(xa, 0);
   1995             tv->rngOneMin = range->aMin;
   1996             tv->rngOneMax = range->aMax;
   1997          }
   1998          else if (nRanges > 1) {
   1999             /* See if we already have a range list which is
   2000                structurally identical.  If so, use that; if not, clone
   2001                this one, and add it to our collection. */
   2002             UWord keyW, valW;
   2003             if (VG_(lookupFM)( rangestree, &keyW, &valW, (UWord)xa )) {
   2004                XArray* old = (XArray*)keyW;
   2005                tl_assert(valW == 0);
   2006                tl_assert(old != xa);
   2007                tv->rngMany = old;
   2008             } else {
   2009                XArray* cloned = VG_(cloneXA)( "di.readdwarf3.pvD.2", xa );
   2010                tv->rngMany = cloned;
   2011                VG_(addToFM)( rangestree, (UWord)cloned, 0 );
   2012             }
   2013          }
   2014 
   2015          VG_(addToXA)( tempvars, &tv );
   2016 
   2017          TRACE_D3("  Recording this variable, with %ld PC range(s)\n",
   2018                   VG_(sizeXA)(xa) );
   2019          /* collect stats on how effective the ->ranges special
   2020             casing is */
   2021          if (0) {
   2022             static Int ntot=0, ngt=0;
   2023             ntot++;
   2024             if (tv->rngMany) ngt++;
   2025             if (0 == (ntot % 100000))
   2026                VG_(printf)("XXXX %d tot, %d cloned\n", ntot, ngt);
   2027          }
   2028 
   2029       }
   2030 
   2031       /* Here are some other weird cases seen in the wild:
   2032 
   2033             We have a variable with a name and a type, but no
   2034             location.  I guess that's a sign that it has been
   2035             optimised away.  Ignore it.  Here's an example:
   2036 
   2037             static Int lc_compar(void* n1, void* n2) {
   2038                MC_Chunk* mc1 = *(MC_Chunk**)n1;
   2039                MC_Chunk* mc2 = *(MC_Chunk**)n2;
   2040                return (mc1->data < mc2->data ? -1 : 1);
   2041             }
   2042 
   2043             Both mc1 and mc2 are like this
   2044             <2><5bc>: Abbrev Number: 21 (DW_TAG_variable)
   2045                 DW_AT_name        : mc1
   2046                 DW_AT_decl_file   : 1
   2047                 DW_AT_decl_line   : 216
   2048                 DW_AT_type        : <5d3>
   2049 
   2050             whereas n1 and n2 do have locations specified.
   2051 
   2052             ---------------------------------------------
   2053 
   2054             We see a DW_TAG_formal_parameter with a type, but
   2055             no name and no location.  It's probably part of a function type
   2056             construction, thusly, hence ignore it:
   2057          <1><2b4>: Abbrev Number: 12 (DW_TAG_subroutine_type)
   2058              DW_AT_sibling     : <2c9>
   2059              DW_AT_prototyped  : 1
   2060              DW_AT_type        : <114>
   2061          <2><2be>: Abbrev Number: 13 (DW_TAG_formal_parameter)
   2062              DW_AT_type        : <13e>
   2063          <2><2c3>: Abbrev Number: 13 (DW_TAG_formal_parameter)
   2064              DW_AT_type        : <133>
   2065 
   2066             ---------------------------------------------
   2067 
   2068             Is very minimal, like this:
   2069             <4><81d>: Abbrev Number: 44 (DW_TAG_variable)
   2070                 DW_AT_abstract_origin: <7ba>
   2071             What that signifies I have no idea.  Ignore.
   2072 
   2073             ----------------------------------------------
   2074 
   2075             Is very minimal, like this:
   2076             <200f>: DW_TAG_formal_parameter
   2077                 DW_AT_abstract_ori: <1f4c>
   2078                 DW_AT_location    : 13440
   2079             What that signifies I have no idea.  Ignore.
   2080             It might be significant, though: the variable at least
   2081             has a location and so might exist somewhere.
   2082             Maybe we should handle this.
   2083 
   2084             ---------------------------------------------
   2085 
   2086             <22407>: DW_TAG_variable
   2087               DW_AT_name        : (indirect string, offset: 0x6579):
   2088                                   vgPlain_trampoline_stuff_start
   2089               DW_AT_decl_file   : 29
   2090               DW_AT_decl_line   : 56
   2091               DW_AT_external    : 1
   2092               DW_AT_declaration : 1
   2093 
   2094             Nameless and typeless variable that has a location?  Who
   2095             knows.  Not me.
   2096             <2><3d178>: Abbrev Number: 22 (DW_TAG_variable)
   2097                  DW_AT_location    : 9 byte block: 3 c0 c7 13 38 0 0 0 0
   2098                                      (DW_OP_addr: 3813c7c0)
   2099 
   2100             No, really.  Check it out.  gcc is quite simply borked.
   2101             <3><168cc>: Abbrev Number: 141 (DW_TAG_variable)
   2102             // followed by no attributes, and the next DIE is a sibling,
   2103             // not a child
   2104             */
   2105    }
   2106    return;
   2107 
   2108   bad_DIE:
   2109    set_position_of_Cursor( c_die,  saved_die_c_offset );
   2110    set_position_of_Cursor( c_abbv, saved_abbv_c_offset );
   2111    VG_(printf)("\nparse_var_DIE: confused by:\n");
   2112    posn = uncook_die( cc, posn, &debug_types_flag, &alt_flag );
   2113    VG_(printf)(" <%d><%lx>: %s", level, posn, ML_(pp_DW_TAG)( dtag ) );
   2114    if (debug_types_flag) {
   2115       VG_(printf)(" (in .debug_types)");
   2116    }
   2117    else if (alt_flag) {
   2118       VG_(printf)(" (in alternate .debug_info)");
   2119    }
   2120    VG_(printf)("\n");
   2121    while (True) {
   2122       DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   2123       DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   2124       if (attr == 0 && form == 0) break;
   2125       VG_(printf)("     %18s: ", ML_(pp_DW_AT)(attr));
   2126       /* Get the form contents, so as to print them */
   2127       get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
   2128                          cc, c_die, True, form );
   2129       VG_(printf)("\t\n");
   2130    }
   2131    VG_(printf)("\n");
   2132    cc->barf("parse_var_DIE: confused by the above DIE");
   2133    /*NOTREACHED*/
   2134 }
   2135 
   2136 
   2137 /*------------------------------------------------------------*/
   2138 /*---                                                      ---*/
   2139 /*--- Parsing of type-related DIEs                         ---*/
   2140 /*---                                                      ---*/
   2141 /*------------------------------------------------------------*/
   2142 
   2143 #define N_D3_TYPE_STACK 16
   2144 
   2145 typedef
   2146    struct {
   2147       /* What source language?  'A'=Ada83/95,
   2148                                 'C'=C/C++,
   2149                                 'F'=Fortran,
   2150                                 '?'=other
   2151          Established once per compilation unit. */
   2152       UChar language;
   2153       /* A stack of types which are currently under construction */
   2154       Int   sp; /* [sp] is innermost active entry; sp==-1 for empty
   2155                    stack */
   2156       /* Note that the TyEnts in qparentE are temporary copies of the
   2157          ones accumulating in the main tyent array.  So it is not safe
   2158          to free up anything on them when popping them off the stack
   2159          (iow, it isn't safe to use TyEnt__make_EMPTY on them).  Just
   2160          memset them to zero when done. */
   2161       TyEnt qparentE[N_D3_TYPE_STACK]; /* parent TyEnts */
   2162       Int   qlevel[N_D3_TYPE_STACK];
   2163 
   2164    }
   2165    D3TypeParser;
   2166 
   2167 static void typestack_show ( D3TypeParser* parser, HChar* str ) {
   2168    Word i;
   2169    VG_(printf)("  typestack (%s) {\n", str);
   2170    for (i = 0; i <= parser->sp; i++) {
   2171       VG_(printf)("    [%ld] (level %d): ", i, parser->qlevel[i]);
   2172       ML_(pp_TyEnt)( &parser->qparentE[i] );
   2173       VG_(printf)("\n");
   2174    }
   2175    VG_(printf)("  }\n");
   2176 }
   2177 
   2178 /* Remove from the stack, all entries with .level > 'level' */
   2179 static
   2180 void typestack_preen ( D3TypeParser* parser, Bool td3, Int level )
   2181 {
   2182    Bool changed = False;
   2183    vg_assert(parser->sp < N_D3_TYPE_STACK);
   2184    while (True) {
   2185       vg_assert(parser->sp >= -1);
   2186       if (parser->sp == -1) break;
   2187       if (parser->qlevel[parser->sp] <= level) break;
   2188       if (0)
   2189          TRACE_D3("BBBBAAAA typestack_pop [newsp=%d]\n", parser->sp-1);
   2190       vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
   2191       VG_(memset)(&parser->qparentE[parser->sp], 0, sizeof(TyEnt));
   2192       parser->qparentE[parser->sp].cuOff = D3_INVALID_CUOFF;
   2193       parser->qparentE[parser->sp].tag = Te_EMPTY;
   2194       parser->qlevel[parser->sp] = 0;
   2195       parser->sp--;
   2196       changed = True;
   2197    }
   2198    if (changed && td3)
   2199       typestack_show( parser, "after preen" );
   2200 }
   2201 
   2202 static Bool typestack_is_empty ( D3TypeParser* parser ) {
   2203    vg_assert(parser->sp >= -1 && parser->sp < N_D3_TYPE_STACK);
   2204    return parser->sp == -1;
   2205 }
   2206 
   2207 static void typestack_push ( CUConst* cc,
   2208                              D3TypeParser* parser,
   2209                              Bool td3,
   2210                              TyEnt* parentE, Int level ) {
   2211    if (0)
   2212    TRACE_D3("BBBBAAAA typestack_push[newsp=%d]: %d  %05lx\n",
   2213             parser->sp+1, level, parentE->cuOff);
   2214 
   2215    /* First we need to zap everything >= 'level', as we are about to
   2216       replace any previous entry at 'level', so .. */
   2217    typestack_preen(parser, /*td3*/False, level-1);
   2218 
   2219    vg_assert(parser->sp >= -1);
   2220    vg_assert(parser->sp < N_D3_TYPE_STACK);
   2221    if (parser->sp == N_D3_TYPE_STACK-1)
   2222       cc->barf("typestack_push: N_D3_TYPE_STACK is too low; "
   2223                "increase and recompile");
   2224    if (parser->sp >= 0)
   2225       vg_assert(parser->qlevel[parser->sp] < level);
   2226    parser->sp++;
   2227    vg_assert(parser->qparentE[parser->sp].tag == Te_EMPTY);
   2228    vg_assert(parser->qlevel[parser->sp]  == 0);
   2229    vg_assert(parentE);
   2230    vg_assert(ML_(TyEnt__is_type)(parentE));
   2231    vg_assert(parentE->cuOff != D3_INVALID_CUOFF);
   2232    parser->qparentE[parser->sp] = *parentE;
   2233    parser->qlevel[parser->sp]  = level;
   2234    if (td3)
   2235       typestack_show( parser, "after push" );
   2236 }
   2237 
   2238 /* True if the subrange type being parsed gives the bounds of an array. */
   2239 static Bool subrange_type_denotes_array_bounds ( D3TypeParser* parser,
   2240                                                  DW_TAG dtag ) {
   2241    vg_assert(dtag == DW_TAG_subrange_type);
   2242    /* For most languages, a subrange_type dtag always gives the
   2243       bounds of an array.
   2244       For Ada, there are additional conditions as a subrange_type
   2245       is also used for other purposes. */
   2246    if (parser->language != 'A')
   2247       /* not Ada, so it definitely denotes an array bound. */
   2248       return True;
   2249    else
   2250       /* Extra constraints for Ada: it only denotes an array bound if .. */
   2251       return (! typestack_is_empty(parser)
   2252               && parser->qparentE[parser->sp].tag == Te_TyArray);
   2253 }
   2254 
   2255 /* Parse a type-related DIE.  'parser' holds the current parser state.
   2256    'admin' is where the completed types are dumped.  'dtag' is the tag
   2257    for this DIE.  'c_die' points to the start of the data fields (FORM
   2258    stuff) for the DIE.  c_abbv points to the start of the (name,form)
   2259    pairs which describe the DIE.
   2260 
   2261    We may find the DIE uninteresting, in which case we should ignore
   2262    it.
   2263 
   2264    What happens: the DIE is examined.  If uninteresting, it is ignored.
   2265    Otherwise, the DIE gives rise to two things:
   2266 
   2267    (1) the offset of this DIE in the CU -- the cuOffset, a UWord
   2268    (2) a TyAdmin structure, which holds the type, or related stuff
   2269 
   2270    (2) is added at the end of 'tyadmins', at some index, say 'i'.
   2271 
   2272    A pair (cuOffset, i) is added to 'tydict'.
   2273 
   2274    Hence 'tyadmins' holds the actual type entities, and 'tydict' holds
   2275    a mapping from cuOffset to the index of the corresponding entry in
   2276    'tyadmin'.
   2277 
   2278    When resolving a cuOffset to a TyAdmin, first look up the cuOffset
   2279    in the tydict (by binary search).  This gives an index into
   2280    tyadmins, and the required entity lives in tyadmins at that index.
   2281 */
   2282 __attribute__((noinline))
   2283 static void parse_type_DIE ( /*MOD*/XArray* /* of TyEnt */ tyents,
   2284                              /*MOD*/D3TypeParser* parser,
   2285                              DW_TAG dtag,
   2286                              UWord posn,
   2287                              Int level,
   2288                              Cursor* c_die,
   2289                              Cursor* c_abbv,
   2290                              CUConst* cc,
   2291                              Bool td3 )
   2292 {
   2293    ULong cts;
   2294    Int   ctsSzB;
   2295    UWord ctsMemSzB;
   2296    TyEnt typeE;
   2297    TyEnt atomE;
   2298    TyEnt fieldE;
   2299    TyEnt boundE;
   2300    Bool  debug_types_flag;
   2301    Bool  alt_flag;
   2302 
   2303    UWord saved_die_c_offset  = get_position_of_Cursor( c_die );
   2304    UWord saved_abbv_c_offset = get_position_of_Cursor( c_abbv );
   2305 
   2306    VG_(memset)( &typeE,  0xAA, sizeof(typeE) );
   2307    VG_(memset)( &atomE,  0xAA, sizeof(atomE) );
   2308    VG_(memset)( &fieldE, 0xAA, sizeof(fieldE) );
   2309    VG_(memset)( &boundE, 0xAA, sizeof(boundE) );
   2310 
   2311    /* If we've returned to a level at or above any previously noted
   2312       parent, un-note it, so we don't believe we're still collecting
   2313       its children. */
   2314    typestack_preen( parser, td3, level-1 );
   2315 
   2316    if (dtag == DW_TAG_compile_unit
   2317        || dtag == DW_TAG_type_unit
   2318        || dtag == DW_TAG_partial_unit) {
   2319       /* See if we can find DW_AT_language, since it is important for
   2320          establishing array bounds (see DW_TAG_subrange_type below in
   2321          this fn) */
   2322       while (True) {
   2323          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   2324          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   2325          if (attr == 0 && form == 0) break;
   2326          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
   2327                             cc, c_die, False/*td3*/, form );
   2328          if (attr != DW_AT_language)
   2329             continue;
   2330          if (ctsSzB == 0)
   2331            goto bad_DIE;
   2332          switch (cts) {
   2333             case DW_LANG_C89: case DW_LANG_C:
   2334             case DW_LANG_C_plus_plus: case DW_LANG_ObjC:
   2335             case DW_LANG_ObjC_plus_plus: case DW_LANG_UPC:
   2336             case DW_LANG_Upc: case DW_LANG_C99:
   2337                parser->language = 'C'; break;
   2338             case DW_LANG_Fortran77: case DW_LANG_Fortran90:
   2339             case DW_LANG_Fortran95:
   2340                parser->language = 'F'; break;
   2341             case DW_LANG_Ada83: case DW_LANG_Ada95:
   2342                parser->language = 'A'; break;
   2343             case DW_LANG_Cobol74:
   2344             case DW_LANG_Cobol85: case DW_LANG_Pascal83:
   2345             case DW_LANG_Modula2: case DW_LANG_Java:
   2346             case DW_LANG_PLI:
   2347             case DW_LANG_D: case DW_LANG_Python:
   2348             case DW_LANG_Mips_Assembler:
   2349                parser->language = '?'; break;
   2350             default:
   2351                goto bad_DIE;
   2352          }
   2353       }
   2354    }
   2355 
   2356    if (dtag == DW_TAG_base_type) {
   2357       /* We can pick up a new base type any time. */
   2358       VG_(memset)(&typeE, 0, sizeof(typeE));
   2359       typeE.cuOff = D3_INVALID_CUOFF;
   2360       typeE.tag   = Te_TyBase;
   2361       while (True) {
   2362          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   2363          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   2364          if (attr == 0 && form == 0) break;
   2365          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
   2366                             cc, c_die, False/*td3*/, form );
   2367          if (attr == DW_AT_name && ctsMemSzB > 0) {
   2368             typeE.Te.TyBase.name
   2369                = ML_(dinfo_strdup)( "di.readdwarf3.ptD.base_type.1",
   2370                                     (UChar*)(UWord)cts );
   2371          }
   2372          if (attr == DW_AT_byte_size && ctsSzB > 0) {
   2373             typeE.Te.TyBase.szB = cts;
   2374          }
   2375          if (attr == DW_AT_encoding && ctsSzB > 0) {
   2376             switch (cts) {
   2377                case DW_ATE_unsigned: case DW_ATE_unsigned_char:
   2378                case DW_ATE_UTF: /* since DWARF4, e.g. char16_t from C++ */
   2379                case DW_ATE_boolean:/* FIXME - is this correct? */
   2380                case DW_ATE_unsigned_fixed:
   2381                   typeE.Te.TyBase.enc = 'U'; break;
   2382                case DW_ATE_signed: case DW_ATE_signed_char:
   2383                case DW_ATE_signed_fixed:
   2384                   typeE.Te.TyBase.enc = 'S'; break;
   2385                case DW_ATE_float:
   2386                   typeE.Te.TyBase.enc = 'F'; break;
   2387                case DW_ATE_complex_float:
   2388                   typeE.Te.TyBase.enc = 'C'; break;
   2389                default:
   2390                   goto bad_DIE;
   2391             }
   2392          }
   2393       }
   2394 
   2395       /* Invent a name if it doesn't have one.  gcc-4.3
   2396          -ftree-vectorize is observed to emit nameless base types. */
   2397       if (!typeE.Te.TyBase.name)
   2398          typeE.Te.TyBase.name
   2399             = ML_(dinfo_strdup)( "di.readdwarf3.ptD.base_type.2",
   2400                                  "<anon_base_type>" );
   2401 
   2402       /* Do we have something that looks sane? */
   2403       if (/* must have a name */
   2404           typeE.Te.TyBase.name == NULL
   2405           /* and a plausible size.  Yes, really 32: "complex long
   2406              double" apparently has size=32 */
   2407           || typeE.Te.TyBase.szB < 0 || typeE.Te.TyBase.szB > 32
   2408           /* and a plausible encoding */
   2409           || (typeE.Te.TyBase.enc != 'U'
   2410               && typeE.Te.TyBase.enc != 'S'
   2411               && typeE.Te.TyBase.enc != 'F'
   2412               && typeE.Te.TyBase.enc != 'C'))
   2413          goto bad_DIE;
   2414       /* Last minute hack: if we see this
   2415          <1><515>: DW_TAG_base_type
   2416              DW_AT_byte_size   : 0
   2417              DW_AT_encoding    : 5
   2418              DW_AT_name        : void
   2419          convert it into a real Void type. */
   2420       if (typeE.Te.TyBase.szB == 0
   2421           && 0 == VG_(strcmp)("void", typeE.Te.TyBase.name)) {
   2422          ML_(TyEnt__make_EMPTY)(&typeE);
   2423          typeE.tag = Te_TyVoid;
   2424          typeE.Te.TyVoid.isFake = False; /* it's a real one! */
   2425       }
   2426 
   2427       goto acquire_Type;
   2428    }
   2429 
   2430    /*
   2431     * An example of DW_TAG_rvalue_reference_type:
   2432     *
   2433     * $ readelf --debug-dump /usr/lib/debug/usr/lib/libstdc++.so.6.0.16.debug
   2434     *  <1><1014>: Abbrev Number: 55 (DW_TAG_rvalue_reference_type)
   2435     *     <1015>   DW_AT_byte_size   : 4
   2436     *     <1016>   DW_AT_type        : <0xe52>
   2437     */
   2438    if (dtag == DW_TAG_pointer_type || dtag == DW_TAG_reference_type
   2439        || dtag == DW_TAG_ptr_to_member_type
   2440        || dtag == DW_TAG_rvalue_reference_type) {
   2441       /* This seems legit for _pointer_type and _reference_type.  I
   2442          don't know if rolling _ptr_to_member_type in here really is
   2443          legit, but it's better than not handling it at all. */
   2444       VG_(memset)(&typeE, 0, sizeof(typeE));
   2445       typeE.cuOff = D3_INVALID_CUOFF;
   2446       switch (dtag) {
   2447       case DW_TAG_pointer_type:
   2448          typeE.tag = Te_TyPtr;
   2449          break;
   2450       case DW_TAG_reference_type:
   2451          typeE.tag = Te_TyRef;
   2452          break;
   2453       case DW_TAG_ptr_to_member_type:
   2454          typeE.tag = Te_TyPtrMbr;
   2455          break;
   2456       case DW_TAG_rvalue_reference_type:
   2457          typeE.tag = Te_TyRvalRef;
   2458          break;
   2459       default:
   2460          vg_assert(False);
   2461       }
   2462       /* target type defaults to void */
   2463       typeE.Te.TyPorR.typeR = D3_FAKEVOID_CUOFF;
   2464       /* These four type kinds don't *have* to specify their size, in
   2465          which case we assume it's a machine word.  But if they do
   2466          specify it, it must be a machine word :-)  This probably
   2467          assumes that the word size of the Dwarf3 we're reading is the
   2468          same size as that on the machine.  gcc appears to give a size
   2469          whereas icc9 doesn't. */
   2470       typeE.Te.TyPorR.szB = sizeof(UWord);
   2471       while (True) {
   2472          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   2473          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   2474          if (attr == 0 && form == 0) break;
   2475          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
   2476                             cc, c_die, False/*td3*/, form );
   2477          if (attr == DW_AT_byte_size && ctsSzB > 0) {
   2478             typeE.Te.TyPorR.szB = cts;
   2479          }
   2480          if (attr == DW_AT_type && ctsSzB > 0) {
   2481             typeE.Te.TyPorR.typeR = cook_die_using_form( cc, (UWord)cts, form );
   2482          }
   2483       }
   2484       /* Do we have something that looks sane? */
   2485       if (typeE.Te.TyPorR.szB != sizeof(UWord))
   2486          goto bad_DIE;
   2487       else
   2488          goto acquire_Type;
   2489    }
   2490 
   2491    if (dtag == DW_TAG_enumeration_type) {
   2492       /* Create a new Type to hold the results. */
   2493       VG_(memset)(&typeE, 0, sizeof(typeE));
   2494       typeE.cuOff = posn;
   2495       typeE.tag   = Te_TyEnum;
   2496       typeE.Te.TyEnum.atomRs
   2497          = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.enum_type.1",
   2498                        ML_(dinfo_free),
   2499                        sizeof(UWord) );
   2500       while (True) {
   2501          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   2502          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   2503          if (attr == 0 && form == 0) break;
   2504          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
   2505                             cc, c_die, False/*td3*/, form );
   2506          if (attr == DW_AT_name && ctsMemSzB > 0) {
   2507             typeE.Te.TyEnum.name
   2508               = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enum_type.2",
   2509                                    (UChar*)(UWord)cts );
   2510          }
   2511          if (attr == DW_AT_byte_size && ctsSzB > 0) {
   2512             typeE.Te.TyEnum.szB = cts;
   2513          }
   2514       }
   2515 
   2516       if (!typeE.Te.TyEnum.name)
   2517          typeE.Te.TyEnum.name
   2518             = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enum_type.3",
   2519                                  "<anon_enum_type>" );
   2520 
   2521       /* Do we have something that looks sane? */
   2522       if (typeE.Te.TyEnum.szB == 0
   2523           /* we must know the size */
   2524           /* but not for Ada, which uses such dummy
   2525              enumerations as helper for gdb ada mode. */
   2526           && parser->language != 'A') {
   2527          /* GCC has been seen to put an odd DIE like this into
   2528             .debug_types:
   2529 
   2530             <1><cb72>: DW_TAG_enumeration_type (in .debug_types)
   2531             DW_AT_name        : (indirect string, offset: 0x3374a): exec_direction_kind
   2532             DW_AT_declaration : 1
   2533 
   2534             It isn't clear what this means, but we accept it and
   2535             assume that the enum is int-sized.  */
   2536          if (cc->is_type_unit) {
   2537             typeE.Te.TyEnum.szB = sizeof(int);
   2538          } else {
   2539             goto bad_DIE;
   2540          }
   2541       }
   2542 
   2543       /* On't stack! */
   2544       typestack_push( cc, parser, td3, &typeE, level );
   2545       goto acquire_Type;
   2546    }
   2547 
   2548    /* gcc (GCC) 4.4.0 20081017 (experimental) occasionally produces
   2549       DW_TAG_enumerator with only a DW_AT_name but no
   2550       DW_AT_const_value.  This is in violation of the Dwarf3 standard,
   2551       and appears to be a new "feature" of gcc - versions 4.3.x and
   2552       earlier do not appear to do this.  So accept DW_TAG_enumerator
   2553       which only have a name but no value.  An example:
   2554 
   2555       <1><180>: Abbrev Number: 6 (DW_TAG_enumeration_type)
   2556          <181>   DW_AT_name        : (indirect string, offset: 0xda70):
   2557                                      QtMsgType
   2558          <185>   DW_AT_byte_size   : 4
   2559          <186>   DW_AT_decl_file   : 14
   2560          <187>   DW_AT_decl_line   : 1480
   2561          <189>   DW_AT_sibling     : <0x1a7>
   2562       <2><18d>: Abbrev Number: 7 (DW_TAG_enumerator)
   2563          <18e>   DW_AT_name        : (indirect string, offset: 0x9e18):
   2564                                      QtDebugMsg
   2565       <2><192>: Abbrev Number: 7 (DW_TAG_enumerator)
   2566          <193>   DW_AT_name        : (indirect string, offset: 0x1505f):
   2567                                      QtWarningMsg
   2568       <2><197>: Abbrev Number: 7 (DW_TAG_enumerator)
   2569          <198>   DW_AT_name        : (indirect string, offset: 0x16f4a):
   2570                                      QtCriticalMsg
   2571       <2><19c>: Abbrev Number: 7 (DW_TAG_enumerator)
   2572          <19d>   DW_AT_name        : (indirect string, offset: 0x156dd):
   2573                                      QtFatalMsg
   2574       <2><1a1>: Abbrev Number: 7 (DW_TAG_enumerator)
   2575          <1a2>   DW_AT_name        : (indirect string, offset: 0x13660):
   2576                                      QtSystemMsg
   2577    */
   2578    if (dtag == DW_TAG_enumerator) {
   2579       VG_(memset)( &atomE, 0, sizeof(atomE) );
   2580       atomE.cuOff = posn;
   2581       atomE.tag   = Te_Atom;
   2582       while (True) {
   2583          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   2584          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   2585          if (attr == 0 && form == 0) break;
   2586          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
   2587                             cc, c_die, False/*td3*/, form );
   2588          if (attr == DW_AT_name && ctsMemSzB > 0) {
   2589             atomE.Te.Atom.name
   2590               = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enumerator.1",
   2591                                    (UChar*)(UWord)cts );
   2592          }
   2593          if (attr == DW_AT_const_value && ctsSzB > 0) {
   2594             atomE.Te.Atom.value = cts;
   2595             atomE.Te.Atom.valueKnown = True;
   2596          }
   2597       }
   2598       /* Do we have something that looks sane? */
   2599       if (atomE.Te.Atom.name == NULL)
   2600          goto bad_DIE;
   2601       /* Do we have a plausible parent? */
   2602       if (typestack_is_empty(parser)) goto bad_DIE;
   2603       vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
   2604       vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
   2605       if (level != parser->qlevel[parser->sp]+1) goto bad_DIE;
   2606       if (parser->qparentE[parser->sp].tag != Te_TyEnum) goto bad_DIE;
   2607       /* Record this child in the parent */
   2608       vg_assert(parser->qparentE[parser->sp].Te.TyEnum.atomRs);
   2609       VG_(addToXA)( parser->qparentE[parser->sp].Te.TyEnum.atomRs,
   2610                     &atomE );
   2611       /* And record the child itself */
   2612       goto acquire_Atom;
   2613    }
   2614 
   2615    /* Treat DW_TAG_class_type as if it was a DW_TAG_structure_type.  I
   2616       don't know if this is correct, but it at least makes this reader
   2617       usable for gcc-4.3 produced Dwarf3. */
   2618    if (dtag == DW_TAG_structure_type || dtag == DW_TAG_class_type
   2619        || dtag == DW_TAG_union_type) {
   2620       Bool have_szB = False;
   2621       Bool is_decl  = False;
   2622       Bool is_spec  = False;
   2623       /* Create a new Type to hold the results. */
   2624       VG_(memset)(&typeE, 0, sizeof(typeE));
   2625       typeE.cuOff = posn;
   2626       typeE.tag   = Te_TyStOrUn;
   2627       typeE.Te.TyStOrUn.name = NULL;
   2628       typeE.Te.TyStOrUn.fieldRs
   2629          = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.pTD.struct_type.1",
   2630                        ML_(dinfo_free),
   2631                        sizeof(UWord) );
   2632       typeE.Te.TyStOrUn.complete = True;
   2633       typeE.Te.TyStOrUn.isStruct = dtag == DW_TAG_structure_type
   2634                                    || dtag == DW_TAG_class_type;
   2635       while (True) {
   2636          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   2637          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   2638          if (attr == 0 && form == 0) break;
   2639          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
   2640                             cc, c_die, False/*td3*/, form );
   2641          if (attr == DW_AT_name && ctsMemSzB > 0) {
   2642             typeE.Te.TyStOrUn.name
   2643                = ML_(dinfo_strdup)( "di.readdwarf3.ptD.struct_type.2",
   2644                                     (UChar*)(UWord)cts );
   2645          }
   2646          if (attr == DW_AT_byte_size && ctsSzB >= 0) {
   2647             typeE.Te.TyStOrUn.szB = cts;
   2648             have_szB = True;
   2649          }
   2650          if (attr == DW_AT_declaration && ctsSzB > 0 && cts > 0) {
   2651             is_decl = True;
   2652          }
   2653          if (attr == DW_AT_specification && ctsSzB > 0 && cts > 0) {
   2654             is_spec = True;
   2655          }
   2656       }
   2657       /* Do we have something that looks sane? */
   2658       if (is_decl && (!is_spec)) {
   2659          /* It's a DW_AT_declaration.  We require the name but
   2660             nothing else. */
   2661          /* JRS 2012-06-28: following discussion w/ tromey, if the the
   2662             type doesn't have name, just make one up, and accept it.
   2663             It might be referred to by other DIEs, so ignoring it
   2664             doesn't seem like a safe option. */
   2665          if (typeE.Te.TyStOrUn.name == NULL)
   2666             typeE.Te.TyStOrUn.name
   2667                = ML_(dinfo_strdup)( "di.readdwarf3.ptD.struct_type.3",
   2668                                     "<anon_struct_type>" );
   2669          typeE.Te.TyStOrUn.complete = False;
   2670          /* JRS 2009 Aug 10: <possible kludge>? */
   2671          /* Push this tyent on the stack, even though it's incomplete.
   2672             It appears that gcc-4.4 on Fedora 11 will sometimes create
   2673             DW_TAG_member entries for it, and so we need to have a
   2674             plausible parent present in order for that to work.  See
   2675             #200029 comments 8 and 9. */
   2676          typestack_push( cc, parser, td3, &typeE, level );
   2677          /* </possible kludge> */
   2678          goto acquire_Type;
   2679       }
   2680       if ((!is_decl) /* && (!is_spec) */) {
   2681          /* this is the common, ordinary case */
   2682          if ((!have_szB) /* we must know the size */
   2683              /* But the name can be present, or not */)
   2684             goto bad_DIE;
   2685          /* On't stack! */
   2686          typestack_push( cc, parser, td3, &typeE, level );
   2687          goto acquire_Type;
   2688       }
   2689       else {
   2690          /* don't know how to handle any other variants just now */
   2691          goto bad_DIE;
   2692       }
   2693    }
   2694 
   2695    if (dtag == DW_TAG_member) {
   2696       /* Acquire member entries for both DW_TAG_structure_type and
   2697          DW_TAG_union_type.  They differ minorly, in that struct
   2698          members must have a DW_AT_data_member_location expression
   2699          whereas union members must not. */
   2700       Bool parent_is_struct;
   2701       VG_(memset)( &fieldE, 0, sizeof(fieldE) );
   2702       fieldE.cuOff = posn;
   2703       fieldE.tag   = Te_Field;
   2704       fieldE.Te.Field.typeR = D3_INVALID_CUOFF;
   2705       while (True) {
   2706          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   2707          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   2708          if (attr == 0 && form == 0) break;
   2709          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
   2710                             cc, c_die, False/*td3*/, form );
   2711          if (attr == DW_AT_name && ctsMemSzB > 0) {
   2712             fieldE.Te.Field.name
   2713                = ML_(dinfo_strdup)( "di.readdwarf3.ptD.member.1",
   2714                                     (UChar*)(UWord)cts );
   2715          }
   2716          if (attr == DW_AT_type && ctsSzB > 0) {
   2717             fieldE.Te.Field.typeR = cook_die_using_form( cc, (UWord)cts, form );
   2718          }
   2719          /* There are 2 different cases for DW_AT_data_member_location.
   2720             If it is a constant class attribute, it contains byte offset
   2721             from the beginning of the containing entity.
   2722             Otherwise it is a location expression.  */
   2723          if (attr == DW_AT_data_member_location && ctsSzB > 0) {
   2724             fieldE.Te.Field.nLoc = -1;
   2725             fieldE.Te.Field.pos.offset = cts;
   2726          } else if (attr == DW_AT_data_member_location && ctsMemSzB > 0) {
   2727             fieldE.Te.Field.nLoc = (UWord)ctsMemSzB;
   2728             fieldE.Te.Field.pos.loc
   2729                = ML_(dinfo_memdup)( "di.readdwarf3.ptD.member.2",
   2730                                     (UChar*)(UWord)cts,
   2731                                     (SizeT)fieldE.Te.Field.nLoc );
   2732          }
   2733       }
   2734       /* Do we have a plausible parent? */
   2735       if (typestack_is_empty(parser)) goto bad_DIE;
   2736       vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
   2737       vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
   2738       if (level != parser->qlevel[parser->sp]+1) goto bad_DIE;
   2739       if (parser->qparentE[parser->sp].tag != Te_TyStOrUn) goto bad_DIE;
   2740       /* Do we have something that looks sane?  If this a member of a
   2741          struct, we must have a location expression; but if a member
   2742          of a union that is irrelevant (D3 spec sec 5.6.6).  We ought
   2743          to reject in the latter case, but some compilers have been
   2744          observed to emit constant-zero expressions.  So just ignore
   2745          them. */
   2746       parent_is_struct
   2747          = parser->qparentE[parser->sp].Te.TyStOrUn.isStruct;
   2748       if (!fieldE.Te.Field.name)
   2749          fieldE.Te.Field.name
   2750             = ML_(dinfo_strdup)( "di.readdwarf3.ptD.member.3",
   2751                                  "<anon_field>" );
   2752       vg_assert(fieldE.Te.Field.name);
   2753       if (fieldE.Te.Field.typeR == D3_INVALID_CUOFF)
   2754          goto bad_DIE;
   2755       if (fieldE.Te.Field.nLoc) {
   2756          if (!parent_is_struct) {
   2757             /* If this is a union type, pretend we haven't seen the data
   2758                member location expression, as it is by definition
   2759                redundant (it must be zero). */
   2760             if (fieldE.Te.Field.nLoc > 0)
   2761                ML_(dinfo_free)(fieldE.Te.Field.pos.loc);
   2762             fieldE.Te.Field.pos.loc = NULL;
   2763             fieldE.Te.Field.nLoc = 0;
   2764          }
   2765          /* Record this child in the parent */
   2766          fieldE.Te.Field.isStruct = parent_is_struct;
   2767          vg_assert(parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs);
   2768          VG_(addToXA)( parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs,
   2769                        &posn );
   2770          /* And record the child itself */
   2771          goto acquire_Field;
   2772       } else {
   2773          /* Member with no location - this can happen with static
   2774             const members in C++ code which are compile time constants
   2775             that do no exist in the class. They're not of any interest
   2776             to us so we ignore them. */
   2777          ML_(TyEnt__make_EMPTY)(&fieldE);
   2778       }
   2779    }
   2780 
   2781    if (dtag == DW_TAG_array_type) {
   2782       VG_(memset)(&typeE, 0, sizeof(typeE));
   2783       typeE.cuOff = posn;
   2784       typeE.tag   = Te_TyArray;
   2785       typeE.Te.TyArray.typeR = D3_INVALID_CUOFF;
   2786       typeE.Te.TyArray.boundRs
   2787          = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.array_type.1",
   2788                        ML_(dinfo_free),
   2789                        sizeof(UWord) );
   2790       while (True) {
   2791          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   2792          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   2793          if (attr == 0 && form == 0) break;
   2794          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
   2795                             cc, c_die, False/*td3*/, form );
   2796          if (attr == DW_AT_type && ctsSzB > 0) {
   2797             typeE.Te.TyArray.typeR = cook_die_using_form( cc, (UWord)cts,
   2798                                                           form );
   2799          }
   2800       }
   2801       if (typeE.Te.TyArray.typeR == D3_INVALID_CUOFF)
   2802          goto bad_DIE;
   2803       /* On't stack! */
   2804       typestack_push( cc, parser, td3, &typeE, level );
   2805       goto acquire_Type;
   2806    }
   2807 
   2808    /* this is a subrange type defining the bounds of an array. */
   2809    if (dtag == DW_TAG_subrange_type
   2810        && subrange_type_denotes_array_bounds(parser, dtag)) {
   2811       Bool have_lower = False;
   2812       Bool have_upper = False;
   2813       Bool have_count = False;
   2814       Long lower = 0;
   2815       Long upper = 0;
   2816 
   2817       switch (parser->language) {
   2818          case 'C': have_lower = True;  lower = 0; break;
   2819          case 'F': have_lower = True;  lower = 1; break;
   2820          case '?': have_lower = False; break;
   2821          case 'A': have_lower = False; break;
   2822          default:  vg_assert(0); /* assured us by handling of
   2823                                     DW_TAG_compile_unit in this fn */
   2824       }
   2825 
   2826       VG_(memset)( &boundE, 0, sizeof(boundE) );
   2827       boundE.cuOff = D3_INVALID_CUOFF;
   2828       boundE.tag   = Te_Bound;
   2829       while (True) {
   2830          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   2831          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   2832          if (attr == 0 && form == 0) break;
   2833          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
   2834                             cc, c_die, False/*td3*/, form );
   2835          if (attr == DW_AT_lower_bound && ctsSzB > 0) {
   2836             lower      = (Long)cts;
   2837             have_lower = True;
   2838          }
   2839          if (attr == DW_AT_upper_bound && ctsSzB > 0) {
   2840             upper      = (Long)cts;
   2841             have_upper = True;
   2842          }
   2843          if (attr == DW_AT_count && ctsSzB > 0) {
   2844             /*count    = (Long)cts;*/
   2845             have_count = True;
   2846          }
   2847       }
   2848       /* FIXME: potentially skip the rest if no parent present, since
   2849          it could be the case that this subrange type is free-standing
   2850          (not being used to describe the bounds of a containing array
   2851          type) */
   2852       /* Do we have a plausible parent? */
   2853       if (typestack_is_empty(parser)) goto bad_DIE;
   2854       vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
   2855       vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
   2856       if (level != parser->qlevel[parser->sp]+1) goto bad_DIE;
   2857       if (parser->qparentE[parser->sp].tag != Te_TyArray) goto bad_DIE;
   2858 
   2859       /* Figure out if we have a definite range or not */
   2860       if (have_lower && have_upper && (!have_count)) {
   2861          boundE.Te.Bound.knownL = True;
   2862          boundE.Te.Bound.knownU = True;
   2863          boundE.Te.Bound.boundL = lower;
   2864          boundE.Te.Bound.boundU = upper;
   2865       }
   2866       else if (have_lower && (!have_upper) && (!have_count)) {
   2867          boundE.Te.Bound.knownL = True;
   2868          boundE.Te.Bound.knownU = False;
   2869          boundE.Te.Bound.boundL = lower;
   2870          boundE.Te.Bound.boundU = 0;
   2871       }
   2872       else if ((!have_lower) && have_upper && (!have_count)) {
   2873          boundE.Te.Bound.knownL = False;
   2874          boundE.Te.Bound.knownU = True;
   2875          boundE.Te.Bound.boundL = 0;
   2876          boundE.Te.Bound.boundU = upper;
   2877       }
   2878       else if ((!have_lower) && (!have_upper) && (!have_count)) {
   2879          boundE.Te.Bound.knownL = False;
   2880          boundE.Te.Bound.knownU = False;
   2881          boundE.Te.Bound.boundL = 0;
   2882          boundE.Te.Bound.boundU = 0;
   2883       } else {
   2884          /* FIXME: handle more cases */
   2885          goto bad_DIE;
   2886       }
   2887 
   2888       /* Record this bound in the parent */
   2889       boundE.cuOff = posn;
   2890       vg_assert(parser->qparentE[parser->sp].Te.TyArray.boundRs);
   2891       VG_(addToXA)( parser->qparentE[parser->sp].Te.TyArray.boundRs,
   2892                     &boundE.cuOff );
   2893       /* And record the child itself */
   2894       goto acquire_Bound;
   2895    }
   2896 
   2897    /* typedef or subrange_type other than array bounds. */
   2898    if (dtag == DW_TAG_typedef
   2899        || (dtag == DW_TAG_subrange_type
   2900            && !subrange_type_denotes_array_bounds(parser, dtag))) {
   2901       /* subrange_type other than array bound is only for Ada. */
   2902       vg_assert (dtag == DW_TAG_typedef || parser->language == 'A');
   2903       /* We can pick up a new typedef/subrange_type any time. */
   2904       VG_(memset)(&typeE, 0, sizeof(typeE));
   2905       typeE.cuOff = D3_INVALID_CUOFF;
   2906       typeE.tag   = Te_TyTyDef;
   2907       typeE.Te.TyTyDef.name = NULL;
   2908       typeE.Te.TyTyDef.typeR = D3_INVALID_CUOFF;
   2909       while (True) {
   2910          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   2911          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   2912          if (attr == 0 && form == 0) break;
   2913          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
   2914                             cc, c_die, False/*td3*/, form );
   2915          if (attr == DW_AT_name && ctsMemSzB > 0) {
   2916             typeE.Te.TyTyDef.name
   2917                = ML_(dinfo_strdup)( "di.readdwarf3.ptD.typedef.1",
   2918                                     (UChar*)(UWord)cts );
   2919          }
   2920          if (attr == DW_AT_type && ctsSzB > 0) {
   2921             typeE.Te.TyTyDef.typeR = cook_die_using_form( cc, (UWord)cts,
   2922                                                           form );
   2923          }
   2924       }
   2925       /* Do we have something that looks sane? */
   2926       if (/* must have a name */
   2927           typeE.Te.TyTyDef.name == NULL
   2928           /* However gcc gnat Ada generates minimal typedef
   2929              such as the below => accept no name for Ada.
   2930              <6><91cc>: DW_TAG_typedef
   2931                 DW_AT_abstract_ori: <9066>
   2932           */
   2933           && parser->language != 'A'
   2934           /* but the referred-to type can be absent */)
   2935          goto bad_DIE;
   2936       else
   2937          goto acquire_Type;
   2938    }
   2939 
   2940    if (dtag == DW_TAG_subroutine_type) {
   2941       /* function type? just record that one fact and ask no
   2942          further questions. */
   2943       VG_(memset)(&typeE, 0, sizeof(typeE));
   2944       typeE.cuOff = D3_INVALID_CUOFF;
   2945       typeE.tag   = Te_TyFn;
   2946       goto acquire_Type;
   2947    }
   2948 
   2949    if (dtag == DW_TAG_volatile_type || dtag == DW_TAG_const_type) {
   2950       Int have_ty = 0;
   2951       VG_(memset)(&typeE, 0, sizeof(typeE));
   2952       typeE.cuOff = D3_INVALID_CUOFF;
   2953       typeE.tag   = Te_TyQual;
   2954       typeE.Te.TyQual.qual
   2955          = dtag == DW_TAG_volatile_type ? 'V' : 'C';
   2956       /* target type defaults to 'void' */
   2957       typeE.Te.TyQual.typeR = D3_FAKEVOID_CUOFF;
   2958       while (True) {
   2959          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   2960          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   2961          if (attr == 0 && form == 0) break;
   2962          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
   2963                             cc, c_die, False/*td3*/, form );
   2964          if (attr == DW_AT_type && ctsSzB > 0) {
   2965             typeE.Te.TyQual.typeR = cook_die_using_form( cc, (UWord)cts, form );
   2966             have_ty++;
   2967          }
   2968       }
   2969       /* gcc sometimes generates DW_TAG_const/volatile_type without
   2970          DW_AT_type and GDB appears to interpret the type as 'const
   2971          void' (resp. 'volatile void').  So just allow it .. */
   2972       if (have_ty == 1 || have_ty == 0)
   2973          goto acquire_Type;
   2974       else
   2975          goto bad_DIE;
   2976    }
   2977 
   2978    /*
   2979     * Treat DW_TAG_unspecified_type as type void. An example of DW_TAG_unspecified_type:
   2980     *
   2981     * $ readelf --debug-dump /usr/lib/debug/usr/lib/libstdc++.so.6.0.16.debug
   2982     *  <1><10d4>: Abbrev Number: 53 (DW_TAG_unspecified_type)
   2983     *     <10d5>   DW_AT_name        : (indirect string, offset: 0xdb7): decltype(nullptr)
   2984     */
   2985    if (dtag == DW_TAG_unspecified_type) {
   2986       VG_(memset)(&typeE, 0, sizeof(typeE));
   2987       typeE.cuOff           = D3_INVALID_CUOFF;
   2988       typeE.tag             = Te_TyQual;
   2989       typeE.Te.TyQual.typeR = D3_FAKEVOID_CUOFF;
   2990       goto acquire_Type;
   2991    }
   2992 
   2993    /* else ignore this DIE */
   2994    return;
   2995    /*NOTREACHED*/
   2996 
   2997   acquire_Type:
   2998    if (0) VG_(printf)("YYYY Acquire Type\n");
   2999    vg_assert(ML_(TyEnt__is_type)( &typeE ));
   3000    vg_assert(typeE.cuOff == D3_INVALID_CUOFF || typeE.cuOff == posn);
   3001    typeE.cuOff = posn;
   3002    VG_(addToXA)( tyents, &typeE );
   3003    return;
   3004    /*NOTREACHED*/
   3005 
   3006   acquire_Atom:
   3007    if (0) VG_(printf)("YYYY Acquire Atom\n");
   3008    vg_assert(atomE.tag == Te_Atom);
   3009    vg_assert(atomE.cuOff == D3_INVALID_CUOFF || atomE.cuOff == posn);
   3010    atomE.cuOff = posn;
   3011    VG_(addToXA)( tyents, &atomE );
   3012    return;
   3013    /*NOTREACHED*/
   3014 
   3015   acquire_Field:
   3016    /* For union members, Expr should be absent */
   3017    if (0) VG_(printf)("YYYY Acquire Field\n");
   3018    vg_assert(fieldE.tag == Te_Field);
   3019    vg_assert(fieldE.Te.Field.nLoc <= 0 || fieldE.Te.Field.pos.loc != NULL);
   3020    vg_assert(fieldE.Te.Field.nLoc != 0 || fieldE.Te.Field.pos.loc == NULL);
   3021    if (fieldE.Te.Field.isStruct) {
   3022       vg_assert(fieldE.Te.Field.nLoc != 0);
   3023    } else {
   3024       vg_assert(fieldE.Te.Field.nLoc == 0);
   3025    }
   3026    vg_assert(fieldE.cuOff == D3_INVALID_CUOFF || fieldE.cuOff == posn);
   3027    fieldE.cuOff = posn;
   3028    VG_(addToXA)( tyents, &fieldE );
   3029    return;
   3030    /*NOTREACHED*/
   3031 
   3032   acquire_Bound:
   3033    if (0) VG_(printf)("YYYY Acquire Bound\n");
   3034    vg_assert(boundE.tag == Te_Bound);
   3035    vg_assert(boundE.cuOff == D3_INVALID_CUOFF || boundE.cuOff == posn);
   3036    boundE.cuOff = posn;
   3037    VG_(addToXA)( tyents, &boundE );
   3038    return;
   3039    /*NOTREACHED*/
   3040 
   3041   bad_DIE:
   3042    set_position_of_Cursor( c_die,  saved_die_c_offset );
   3043    set_position_of_Cursor( c_abbv, saved_abbv_c_offset );
   3044    VG_(printf)("\nparse_type_DIE: confused by:\n");
   3045    posn = uncook_die( cc, posn, &debug_types_flag, &alt_flag );
   3046    VG_(printf)(" <%d><%lx>: %s", level, posn, ML_(pp_DW_TAG)( dtag ) );
   3047    if (debug_types_flag) {
   3048       VG_(printf)(" (in .debug_types)");
   3049    } else if (alt_flag) {
   3050       VG_(printf)(" (in alternate .debug_info)");
   3051    }
   3052    VG_(printf)("\n");
   3053    while (True) {
   3054       DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
   3055       DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
   3056       if (attr == 0 && form == 0) break;
   3057       VG_(printf)("     %18s: ", ML_(pp_DW_AT)(attr));
   3058       /* Get the form contents, so as to print them */
   3059       get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
   3060                          cc, c_die, True, form );
   3061       VG_(printf)("\t\n");
   3062    }
   3063    VG_(printf)("\n");
   3064    cc->barf("parse_type_DIE: confused by the above DIE");
   3065    /*NOTREACHED*/
   3066 }
   3067 
   3068 
   3069 /*------------------------------------------------------------*/
   3070 /*---                                                      ---*/
   3071 /*--- Compression of type DIE information                  ---*/
   3072 /*---                                                      ---*/
   3073 /*------------------------------------------------------------*/
   3074 
   3075 static UWord chase_cuOff ( Bool* changed,
   3076                            XArray* /* of TyEnt */ ents,
   3077                            TyEntIndexCache* ents_cache,
   3078                            UWord cuOff )
   3079 {
   3080    TyEnt* ent;
   3081    ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache, cuOff );
   3082 
   3083    if (!ent) {
   3084       VG_(printf)("chase_cuOff: no entry for 0x%05lx\n", cuOff);
   3085       *changed = False;
   3086       return cuOff;
   3087    }
   3088 
   3089    vg_assert(ent->tag != Te_EMPTY);
   3090    if (ent->tag != Te_INDIR) {
   3091       *changed = False;
   3092       return cuOff;
   3093    } else {
   3094       vg_assert(ent->Te.INDIR.indR < cuOff);
   3095       *changed = True;
   3096       return ent->Te.INDIR.indR;
   3097    }
   3098 }
   3099 
   3100 static
   3101 void chase_cuOffs_in_XArray ( Bool* changed,
   3102                               XArray* /* of TyEnt */ ents,
   3103                               TyEntIndexCache* ents_cache,
   3104                               /*MOD*/XArray* /* of UWord */ cuOffs )
   3105 {
   3106    Bool b2 = False;
   3107    Word i, n = VG_(sizeXA)( cuOffs );
   3108    for (i = 0; i < n; i++) {
   3109       Bool   b = False;
   3110       UWord* p = VG_(indexXA)( cuOffs, i );
   3111       *p = chase_cuOff( &b, ents, ents_cache, *p );
   3112       if (b)
   3113          b2 = True;
   3114    }
   3115    *changed = b2;
   3116 }
   3117 
   3118 static Bool TyEnt__subst_R_fields ( XArray* /* of TyEnt */ ents,
   3119                                     TyEntIndexCache* ents_cache,
   3120                                     /*MOD*/TyEnt* te )
   3121 {
   3122    Bool b, changed = False;
   3123    switch (te->tag) {
   3124       case Te_EMPTY:
   3125          break;
   3126       case Te_INDIR:
   3127          te->Te.INDIR.indR
   3128             = chase_cuOff( &b, ents, ents_cache, te->Te.INDIR.indR );
   3129          if (b) changed = True;
   3130          break;
   3131       case Te_UNKNOWN:
   3132          break;
   3133       case Te_Atom:
   3134          break;
   3135       case Te_Field:
   3136          te->Te.Field.typeR
   3137             = chase_cuOff( &b, ents, ents_cache, te->Te.Field.typeR );
   3138          if (b) changed = True;
   3139          break;
   3140       case Te_Bound:
   3141          break;
   3142       case Te_TyBase:
   3143          break;
   3144       case Te_TyPtr:
   3145       case Te_TyRef:
   3146       case Te_TyPtrMbr:
   3147       case Te_TyRvalRef:
   3148          te->Te.TyPorR.typeR
   3149             = chase_cuOff( &b, ents, ents_cache, te->Te.TyPorR.typeR );
   3150          if (b) changed = True;
   3151          break;
   3152       case Te_TyTyDef:
   3153          te->Te.TyTyDef.typeR
   3154             = chase_cuOff( &b, ents, ents_cache, te->Te.TyTyDef.typeR );
   3155          if (b) changed = True;
   3156          break;
   3157       case Te_TyStOrUn:
   3158          chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyStOrUn.fieldRs );
   3159          if (b) changed = True;
   3160          break;
   3161       case Te_TyEnum:
   3162          chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyEnum.atomRs );
   3163          if (b) changed = True;
   3164          break;
   3165       case Te_TyArray:
   3166          te->Te.TyArray.typeR
   3167             = chase_cuOff( &b, ents, ents_cache, te->Te.TyArray.typeR );
   3168          if (b) changed = True;
   3169          chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyArray.boundRs );
   3170          if (b) changed = True;
   3171          break;
   3172       case Te_TyFn:
   3173          break;
   3174       case Te_TyQual:
   3175          te->Te.TyQual.typeR
   3176             = chase_cuOff( &b, ents, ents_cache, te->Te.TyQual.typeR );
   3177          if (b) changed = True;
   3178          break;
   3179       case Te_TyVoid:
   3180          break;
   3181       default:
   3182          ML_(pp_TyEnt)(te);
   3183          vg_assert(0);
   3184    }
   3185    return changed;
   3186 }
   3187 
   3188 /* Make a pass over 'ents'.  For each tyent, inspect the target of any
   3189    'R' or 'Rs' fields (those which refer to other tyents), and replace
   3190    any which point to INDIR nodes with the target of the indirection
   3191    (which should not itself be an indirection).  In summary, this
   3192    routine shorts out all references to indirection nodes. */
   3193 static
   3194 Word dedup_types_substitution_pass ( /*MOD*/XArray* /* of TyEnt */ ents,
   3195                                      TyEntIndexCache* ents_cache )
   3196 {
   3197    Word i, n, nChanged = 0;
   3198    Bool b;
   3199    n = VG_(sizeXA)( ents );
   3200    for (i = 0; i < n; i++) {
   3201       TyEnt* ent = VG_(indexXA)( ents, i );
   3202       vg_assert(ent->tag != Te_EMPTY);
   3203       /* We have to substitute everything, even indirections, so as to
   3204          ensure that chains of indirections don't build up. */
   3205       b = TyEnt__subst_R_fields( ents, ents_cache, ent );
   3206       if (b)
   3207          nChanged++;
   3208    }
   3209 
   3210    return nChanged;
   3211 }
   3212 
   3213 
   3214 /* Make a pass over 'ents', building a dictionary of TyEnts as we go.
   3215    Look up each new tyent in the dictionary in turn.  If it is already
   3216    in the dictionary, replace this tyent with an indirection to the
   3217    existing one, and delete any malloc'd stuff hanging off this one.
   3218    In summary, this routine commons up all tyents that are identical
   3219    as defined by TyEnt__cmp_by_all_except_cuOff. */
   3220 static
   3221 Word dedup_types_commoning_pass ( /*MOD*/XArray* /* of TyEnt */ ents )
   3222 {
   3223    Word    n, i, nDeleted;
   3224    WordFM* dict; /* TyEnt* -> void */
   3225    TyEnt*  ent;
   3226    UWord   keyW, valW;
   3227 
   3228    dict = VG_(newFM)(
   3229              ML_(dinfo_zalloc), "di.readdwarf3.dtcp.1",
   3230              ML_(dinfo_free),
   3231              (Word(*)(UWord,UWord)) ML_(TyEnt__cmp_by_all_except_cuOff)
   3232           );
   3233 
   3234    nDeleted = 0;
   3235    n = VG_(sizeXA)( ents );
   3236    for (i = 0; i < n; i++) {
   3237       ent = VG_(indexXA)( ents, i );
   3238       vg_assert(ent->tag != Te_EMPTY);
   3239 
   3240       /* Ignore indirections, although check that they are
   3241          not forming a cycle. */
   3242       if (ent->tag == Te_INDIR) {
   3243          vg_assert(ent->Te.INDIR.indR < ent->cuOff);
   3244          continue;
   3245       }
   3246 
   3247       keyW = valW = 0;
   3248       if (VG_(lookupFM)( dict, &keyW, &valW, (UWord)ent )) {
   3249          /* it's already in the dictionary. */
   3250          TyEnt* old = (TyEnt*)keyW;
   3251          vg_assert(valW == 0);
   3252          vg_assert(old != ent);
   3253          vg_assert(old->tag != Te_INDIR);
   3254          /* since we are traversing the array in increasing order of
   3255             cuOff: */
   3256          vg_assert(old->cuOff < ent->cuOff);
   3257          /* So anyway, dump this entry and replace it with an
   3258             indirection to the one in the dictionary.  Note that the
   3259             assertion above guarantees that we cannot create cycles of
   3260             indirections, since we are always creating an indirection
   3261             to a tyent with a cuOff lower than this one. */
   3262          ML_(TyEnt__make_EMPTY)( ent );
   3263          ent->tag = Te_INDIR;
   3264          ent->Te.INDIR.indR = old->cuOff;
   3265          nDeleted++;
   3266       } else {
   3267          /* not in dictionary; add it and keep going. */
   3268          VG_(addToFM)( dict, (UWord)ent, 0 );
   3269       }
   3270    }
   3271 
   3272    VG_(deleteFM)( dict, NULL, NULL );
   3273 
   3274    return nDeleted;
   3275 }
   3276 
   3277 
   3278 static
   3279 void dedup_types ( Bool td3,
   3280                    /*MOD*/XArray* /* of TyEnt */ ents,
   3281                    TyEntIndexCache* ents_cache )
   3282 {
   3283    Word m, n, i, nDel, nSubst, nThresh;
   3284    if (0) td3 = True;
   3285 
   3286    n = VG_(sizeXA)( ents );
   3287 
   3288    /* If a commoning pass and a substitution pass both make fewer than
   3289       this many changes, just stop.  It's pointless to burn up CPU
   3290       time trying to compress the last 1% or so out of the array. */
   3291    nThresh = n / 200;
   3292 
   3293    /* First we must sort .ents by its .cuOff fields, so we
   3294       can index into it. */
   3295    VG_(setCmpFnXA)(
   3296       ents,
   3297       (Int(*)(void*,void*)) ML_(TyEnt__cmp_by_cuOff_only)
   3298    );
   3299    VG_(sortXA)( ents );
   3300 
   3301    /* Now repeatedly do commoning and substitution passes over
   3302       the array, until there are no more changes. */
   3303    do {
   3304       nDel   = dedup_types_commoning_pass ( ents );
   3305       nSubst = dedup_types_substitution_pass ( ents, ents_cache );
   3306       vg_assert(nDel >= 0 && nSubst >= 0);
   3307       TRACE_D3("   %ld deletions, %ld substitutions\n", nDel, nSubst);
   3308    } while (nDel > nThresh || nSubst > nThresh);
   3309 
   3310    /* Sanity check: all INDIR nodes should point at a non-INDIR thing.
   3311       In fact this should be true at the end of every loop iteration
   3312       above (a commoning pass followed by a substitution pass), but
   3313       checking it on every iteration is excessively expensive.  Note,
   3314       this loop also computes 'm' for the stats printing below it. */
   3315    m = 0;
   3316    n = VG_(sizeXA)( ents );
   3317    for (i = 0; i < n; i++) {
   3318       TyEnt *ent, *ind;
   3319       ent = VG_(indexXA)( ents, i );
   3320       if (ent->tag != Te_INDIR) continue;
   3321       m++;
   3322       ind = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
   3323                                          ent->Te.INDIR.indR );
   3324       vg_assert(ind);
   3325       vg_assert(ind->tag != Te_INDIR);
   3326    }
   3327 
   3328    TRACE_D3("Overall: %ld before, %ld after\n", n, n-m);
   3329 }
   3330 
   3331 
   3332 /*------------------------------------------------------------*/
   3333 /*---                                                      ---*/
   3334 /*--- Resolution of references to type DIEs                ---*/
   3335 /*---                                                      ---*/
   3336 /*------------------------------------------------------------*/
   3337 
   3338 /* Make a pass through the (temporary) variables array.  Examine the
   3339    type of each variable, check is it found, and chase any Te_INDIRs.
   3340    Postcondition is: each variable has a typeR field that refers to a
   3341    valid type in tyents, or a Te_UNKNOWN, and is certainly guaranteed
   3342    not to refer to a Te_INDIR.  (This is so that we can throw all the
   3343    Te_INDIRs away later). */
   3344 
   3345 __attribute__((noinline))
   3346 static void resolve_variable_types (
   3347                void (*barf)( HChar* ) __attribute__((noreturn)),
   3348                /*R-O*/XArray* /* of TyEnt */ ents,
   3349                /*MOD*/TyEntIndexCache* ents_cache,
   3350                /*MOD*/XArray* /* of TempVar* */ vars
   3351             )
   3352 {
   3353    Word i, n;
   3354    n = VG_(sizeXA)( vars );
   3355    for (i = 0; i < n; i++) {
   3356       TempVar* var = *(TempVar**)VG_(indexXA)( vars, i );
   3357       /* This is the stated type of the variable.  But it might be
   3358          an indirection, so be careful. */
   3359       TyEnt* ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
   3360                                                 var->typeR );
   3361       if (ent && ent->tag == Te_INDIR) {
   3362          ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
   3363                                             ent->Te.INDIR.indR );
   3364          vg_assert(ent);
   3365          vg_assert(ent->tag != Te_INDIR);
   3366       }
   3367 
   3368       /* Deal first with "normal" cases */
   3369       if (ent && ML_(TyEnt__is_type)(ent)) {
   3370          var->typeR = ent->cuOff;
   3371          continue;
   3372       }
   3373 
   3374       /* If there's no ent, it probably we did not manage to read a
   3375          type at the cuOffset which is stated as being this variable's
   3376          type.  Maybe a deficiency in parse_type_DIE.  Complain. */
   3377       if (ent == NULL) {
   3378          VG_(printf)("\n: Invalid cuOff = 0x%05lx\n", var->typeR );
   3379          barf("resolve_variable_types: "
   3380               "cuOff does not refer to a known type");
   3381       }
   3382       vg_assert(ent);
   3383       /* If ent has any other tag, something bad happened, along the
   3384          lines of var->typeR not referring to a type at all. */
   3385       vg_assert(ent->tag == Te_UNKNOWN);
   3386       /* Just accept it; the type will be useless, but at least keep
   3387          going. */
   3388       var->typeR = ent->cuOff;
   3389    }
   3390 }
   3391 
   3392 
   3393 /*------------------------------------------------------------*/
   3394 /*---                                                      ---*/
   3395 /*--- Parsing of Compilation Units                         ---*/
   3396 /*---                                                      ---*/
   3397 /*------------------------------------------------------------*/
   3398 
   3399 static Int cmp_TempVar_by_dioff ( void* v1, void* v2 ) {
   3400    TempVar* t1 = *(TempVar**)v1;
   3401    TempVar* t2 = *(TempVar**)v2;
   3402    if (t1->dioff < t2->dioff) return -1;
   3403    if (t1->dioff > t2->dioff) return 1;
   3404    return 0;
   3405 }
   3406 
   3407 static void read_DIE (
   3408    /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree,
   3409    /*MOD*/XArray* /* of TyEnt */ tyents,
   3410    /*MOD*/XArray* /* of TempVar* */ tempvars,
   3411    /*MOD*/XArray* /* of GExpr* */ gexprs,
   3412    /*MOD*/D3TypeParser* typarser,
   3413    /*MOD*/D3VarParser* varparser,
   3414    Cursor* c, Bool td3, CUConst* cc, Int level
   3415 )
   3416 {
   3417    Cursor abbv;
   3418    ULong  atag, abbv_code;
   3419    UWord  posn;
   3420    UInt   has_children;
   3421    UWord  start_die_c_offset, start_abbv_c_offset;
   3422    UWord  after_die_c_offset, after_abbv_c_offset;
   3423 
   3424    /* --- Deal with this DIE --- */
   3425    posn      = cook_die( cc, get_position_of_Cursor( c ) );
   3426    abbv_code = get_ULEB128( c );
   3427    set_abbv_Cursor( &abbv, td3, cc, abbv_code );
   3428    atag      = get_ULEB128( &abbv );
   3429    TRACE_D3("\n");
   3430    TRACE_D3(" <%d><%lx>: Abbrev Number: %llu (%s)\n",
   3431             level, posn, abbv_code, ML_(pp_DW_TAG)( atag ) );
   3432 
   3433    if (atag == 0)
   3434       cc->barf("read_DIE: invalid zero tag on DIE");
   3435 
   3436    has_children = get_UChar( &abbv );
   3437    if (has_children != DW_children_no && has_children != DW_children_yes)
   3438       cc->barf("read_DIE: invalid has_children value");
   3439 
   3440    /* We're set up to look at the fields of this DIE.  Hand it off to
   3441       any parser(s) that want to see it.  Since they will in general
   3442       advance both the DIE and abbrev cursors, remember their current
   3443       settings so that we can then back up and do one final pass over
   3444       the DIE, to print out its contents. */
   3445 
   3446    start_die_c_offset  = get_position_of_Cursor( c );
   3447    start_abbv_c_offset = get_position_of_Cursor( &abbv );
   3448 
   3449    while (True) {
   3450       ULong cts;
   3451       Int   ctsSzB;
   3452       UWord ctsMemSzB;
   3453       ULong at_name = get_ULEB128( &abbv );
   3454       ULong at_form = get_ULEB128( &abbv );
   3455       if (at_name == 0 && at_form == 0) break;
   3456       TRACE_D3("     %18s: ", ML_(pp_DW_AT)(at_name));
   3457       /* Get the form contents, but ignore them; the only purpose is
   3458          to print them, if td3 is True */
   3459       get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
   3460                          cc, c, td3, (DW_FORM)at_form );
   3461       TRACE_D3("\t");
   3462       TRACE_D3("\n");
   3463    }
   3464 
   3465    after_die_c_offset  = get_position_of_Cursor( c );
   3466    after_abbv_c_offset = get_position_of_Cursor( &abbv );
   3467 
   3468    set_position_of_Cursor( c,     start_die_c_offset );
   3469    set_position_of_Cursor( &abbv, start_abbv_c_offset );
   3470 
   3471    parse_type_DIE( tyents,
   3472                    typarser,
   3473                    (DW_TAG)atag,
   3474                    posn,
   3475                    level,
   3476                    c,     /* DIE cursor */
   3477                    &abbv, /* abbrev cursor */
   3478                    cc,
   3479                    td3 );
   3480 
   3481    set_position_of_Cursor( c,     start_die_c_offset );
   3482    set_position_of_Cursor( &abbv, start_abbv_c_offset );
   3483 
   3484    parse_var_DIE( rangestree,
   3485                   tempvars,
   3486                   gexprs,
   3487                   varparser,
   3488                   (DW_TAG)atag,
   3489                   posn,
   3490                   level,
   3491                   c,     /* DIE cursor */
   3492                   &abbv, /* abbrev cursor */
   3493                   cc,
   3494                   td3 );
   3495 
   3496    set_position_of_Cursor( c,     after_die_c_offset );
   3497    set_position_of_Cursor( &abbv, after_abbv_c_offset );
   3498 
   3499    /* --- Now recurse into its children, if any --- */
   3500    if (has_children == DW_children_yes) {
   3501       if (0) TRACE_D3("BEGIN children of level %d\n", level);
   3502       while (True) {
   3503          atag = peek_ULEB128( c );
   3504          if (atag == 0) break;
   3505          read_DIE( rangestree, tyents, tempvars, gexprs,
   3506                    typarser, varparser,
   3507                    c, td3, cc, level+1 );
   3508       }
   3509       /* Now we need to eat the terminating zero */
   3510       atag = get_ULEB128( c );
   3511       vg_assert(atag == 0);
   3512       if (0) TRACE_D3("END children of level %d\n", level);
   3513    }
   3514 
   3515 }
   3516 
   3517 
   3518 static
   3519 void new_dwarf3_reader_wrk (
   3520    struct _DebugInfo* di,
   3521    __attribute__((noreturn)) void (*barf)( HChar* ),
   3522    UChar* debug_info_img,   SizeT debug_info_sz,
   3523    UChar* debug_types_img,  SizeT debug_types_sz,
   3524    UChar* debug_abbv_img,   SizeT debug_abbv_sz,
   3525    UChar* debug_line_img,   SizeT debug_line_sz,
   3526    UChar* debug_str_img,    SizeT debug_str_sz,
   3527    UChar* debug_ranges_img, SizeT debug_ranges_sz,
   3528    UChar* debug_loc_img,    SizeT debug_loc_sz,
   3529    UChar* debug_info_alt_img, SizeT debug_info_alt_sz,
   3530    UChar* debug_abbv_alt_img, SizeT debug_abbv_alt_sz,
   3531    UChar* debug_line_alt_img, SizeT debug_line_alt_sz,
   3532    UChar* debug_str_alt_img,  SizeT debug_str_alt_sz
   3533 )
   3534 {
   3535    XArray* /* of TyEnt */     tyents;
   3536    XArray* /* of TyEnt */     tyents_to_keep;
   3537    XArray* /* of GExpr* */    gexprs;
   3538    XArray* /* of TempVar* */  tempvars;
   3539    WordFM* /* of (XArray* of AddrRange, void) */ rangestree;
   3540    TyEntIndexCache* tyents_cache = NULL;
   3541    TyEntIndexCache* tyents_to_keep_cache = NULL;
   3542    TempVar *varp, *varp2;
   3543    GExpr* gexpr;
   3544    Cursor abbv; /* for showing .debug_abbrev */
   3545    Cursor info; /* primary cursor for parsing .debug_info */
   3546    Cursor ranges; /* for showing .debug_ranges */
   3547    D3TypeParser typarser;
   3548    D3VarParser varparser;
   3549    Addr  dr_base;
   3550    UWord dr_offset;
   3551    Word  i, j, n;
   3552    Bool td3 = di->trace_symtab;
   3553    XArray* /* of TempVar* */ dioff_lookup_tab;
   3554    Int pass;
   3555    VgHashTable signature_types;
   3556 #if 0
   3557    /* This doesn't work properly because it assumes all entries are
   3558       packed end to end, with no holes.  But that doesn't always
   3559       appear to be the case, so it loses sync.  And the D3 spec
   3560       doesn't appear to require a no-hole situation either. */
   3561    /* Display .debug_loc */
   3562    Addr  dl_base;
   3563    UWord dl_offset;
   3564    Cursor loc; /* for showing .debug_loc */
   3565    TRACE_SYMTAB("\n");
   3566    TRACE_SYMTAB("\n------ The contents of .debug_loc ------\n");
   3567    TRACE_SYMTAB("    Offset   Begin    End      Expression\n");
   3568    init_Cursor( &loc, debug_loc_img,
   3569                 debug_loc_sz, 0, barf,
   3570                 "Overrun whilst reading .debug_loc section(1)" );
   3571    dl_base = 0;
   3572    dl_offset = 0;
   3573    while (True) {
   3574       UWord  w1, w2;
   3575       UWord  len;
   3576       if (is_at_end_Cursor( &loc ))
   3577          break;
   3578 
   3579       /* Read a (host-)word pair.  This is something of a hack since
   3580          the word size to read is really dictated by the ELF file;
   3581          however, we assume we're reading a file with the same
   3582          word-sizeness as the host.  Reasonably enough. */
   3583       w1 = get_UWord( &loc );
   3584       w2 = get_UWord( &loc );
   3585 
   3586       if (w1 == 0 && w2 == 0) {
   3587          /* end of list.  reset 'base' */
   3588          TRACE_D3("    %08lx <End of list>\n", dl_offset);
   3589          dl_base = 0;
   3590          dl_offset = get_position_of_Cursor( &loc );
   3591          continue;
   3592       }
   3593 
   3594       if (w1 == -1UL) {
   3595          /* new value for 'base' */
   3596          TRACE_D3("    %08lx %16lx %08lx (base address)\n",
   3597                   dl_offset, w1, w2);
   3598          dl_base = w2;
   3599          continue;
   3600       }
   3601 
   3602       /* else a location expression follows */
   3603       TRACE_D3("    %08lx %08lx %08lx ",
   3604                dl_offset, w1 + dl_base, w2 + dl_base);
   3605       len = (UWord)get_UShort( &loc );
   3606       while (len > 0) {
   3607          UChar byte = get_UChar( &loc );
   3608          TRACE_D3("%02x", (UInt)byte);
   3609          len--;
   3610       }
   3611       TRACE_SYMTAB("\n");
   3612    }
   3613 #endif
   3614 
   3615    /* Display .debug_ranges */
   3616    TRACE_SYMTAB("\n");
   3617    TRACE_SYMTAB("\n------ The contents of .debug_ranges ------\n");
   3618    TRACE_SYMTAB("    Offset   Begin    End\n");
   3619    init_Cursor( &ranges, debug_ranges_img,
   3620                 debug_ranges_sz, 0, barf,
   3621                 "Overrun whilst reading .debug_ranges section(1)" );
   3622    dr_base = 0;
   3623    dr_offset = 0;
   3624    while (True) {
   3625       UWord  w1, w2;
   3626 
   3627       if (is_at_end_Cursor( &ranges ))
   3628          break;
   3629 
   3630       /* Read a (host-)word pair.  This is something of a hack since
   3631          the word size to read is really dictated by the ELF file;
   3632          however, we assume we're reading a file with the same
   3633          word-sizeness as the host.  Reasonably enough. */
   3634       w1 = get_UWord( &ranges );
   3635       w2 = get_UWord( &ranges );
   3636 
   3637       if (w1 == 0 && w2 == 0) {
   3638          /* end of list.  reset 'base' */
   3639          TRACE_D3("    %08lx <End of list>\n", dr_offset);
   3640          dr_base = 0;
   3641          dr_offset = get_position_of_Cursor( &ranges );
   3642          continue;
   3643       }
   3644 
   3645       if (w1 == -1UL) {
   3646          /* new value for 'base' */
   3647          TRACE_D3("    %08lx %16lx %08lx (base address)\n",
   3648                   dr_offset, w1, w2);
   3649          dr_base = w2;
   3650          continue;
   3651       }
   3652 
   3653       /* else a range [w1+base, w2+base) is denoted */
   3654       TRACE_D3("    %08lx %08lx %08lx\n",
   3655                dr_offset, w1 + dr_base, w2 + dr_base);
   3656    }
   3657 
   3658    /* Display .debug_abbrev */
   3659    init_Cursor( &abbv, debug_abbv_img, debug_abbv_sz, 0, barf,
   3660                 "Overrun whilst reading .debug_abbrev section" );
   3661    TRACE_SYMTAB("\n");
   3662    TRACE_SYMTAB("\n------ The contents of .debug_abbrev ------\n");
   3663    while (True) {
   3664       if (is_at_end_Cursor( &abbv ))
   3665          break;
   3666       /* Read one abbreviation table */
   3667       TRACE_D3("  Number TAG\n");
   3668       while (True) {
   3669          ULong atag;
   3670          UInt  has_children;
   3671          ULong acode = get_ULEB128( &abbv );
   3672          if (acode == 0) break; /* end of the table */
   3673          atag = get_ULEB128( &abbv );
   3674          has_children = get_UChar( &abbv );
   3675          TRACE_D3("   %llu      %s    [%s]\n",
   3676                   acode, ML_(pp_DW_TAG)(atag),
   3677                          ML_(pp_DW_children)(has_children));
   3678          while (True) {
   3679             ULong at_name = get_ULEB128( &abbv );
   3680             ULong at_form = get_ULEB128( &abbv );
   3681             if (at_name == 0 && at_form == 0) break;
   3682             TRACE_D3("    %18s %s\n",
   3683                      ML_(pp_DW_AT)(at_name), ML_(pp_DW_FORM)(at_form));
   3684          }
   3685       }
   3686    }
   3687    TRACE_SYMTAB("\n");
   3688 
   3689    /* We'll park the harvested type information in here.  Also create
   3690       a fake "void" entry with offset D3_FAKEVOID_CUOFF, so we always
   3691       have at least one type entry to refer to.  D3_FAKEVOID_CUOFF is
   3692       huge and presumably will not occur in any valid DWARF3 file --
   3693       it would need to have a .debug_info section 4GB long for that to
   3694       happen.  These type entries end up in the DebugInfo. */
   3695    tyents = VG_(newXA)( ML_(dinfo_zalloc),
   3696                         "di.readdwarf3.ndrw.1 (TyEnt temp array)",
   3697                         ML_(dinfo_free), sizeof(TyEnt) );
   3698    { TyEnt tyent;
   3699      VG_(memset)(&tyent, 0, sizeof(tyent));
   3700      tyent.tag   = Te_TyVoid;
   3701      tyent.cuOff = D3_FAKEVOID_CUOFF;
   3702      tyent.Te.TyVoid.isFake = True;
   3703      VG_(addToXA)( tyents, &tyent );
   3704    }
   3705    { TyEnt tyent;
   3706      VG_(memset)(&tyent, 0, sizeof(tyent));
   3707      tyent.tag   = Te_UNKNOWN;
   3708      tyent.cuOff = D3_INVALID_CUOFF;
   3709      VG_(addToXA)( tyents, &tyent );
   3710    }
   3711 
   3712    /* This is a tree used to unique-ify the range lists that are
   3713       manufactured by parse_var_DIE.  References to the keys in the
   3714       tree wind up in .rngMany fields in TempVars.  We'll need to
   3715       delete this tree, and the XArrays attached to it, at the end of
   3716       this function. */
   3717    rangestree = VG_(newFM)( ML_(dinfo_zalloc),
   3718                             "di.readdwarf3.ndrw.2 (rangestree)",
   3719                             ML_(dinfo_free),
   3720                             (Word(*)(UWord,UWord))cmp__XArrays_of_AddrRange );
   3721 
   3722    /* List of variables we're accumulating.  These don't end up in the
   3723       DebugInfo; instead their contents are handed to ML_(addVar) and
   3724       the list elements are then deleted. */
   3725    tempvars = VG_(newXA)( ML_(dinfo_zalloc),
   3726                           "di.readdwarf3.ndrw.3 (TempVar*s array)",
   3727                           ML_(dinfo_free),
   3728                           sizeof(TempVar*) );
   3729 
   3730    /* List of GExprs we're accumulating.  These wind up in the
   3731       DebugInfo. */
   3732    gexprs = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.4",
   3733                         ML_(dinfo_free), sizeof(GExpr*) );
   3734 
   3735    /* We need a D3TypeParser to keep track of partially constructed
   3736       types.  It'll be discarded as soon as we've completed the CU,
   3737       since the resulting information is tipped in to 'tyents' as it
   3738       is generated. */
   3739    VG_(memset)( &typarser, 0, sizeof(typarser) );
   3740    typarser.sp = -1;
   3741    typarser.language = '?';
   3742    for (i = 0; i < N_D3_TYPE_STACK; i++) {
   3743       typarser.qparentE[i].tag   = Te_EMPTY;
   3744       typarser.qparentE[i].cuOff = D3_INVALID_CUOFF;
   3745    }
   3746 
   3747    VG_(memset)( &varparser, 0, sizeof(varparser) );
   3748    varparser.sp = -1;
   3749 
   3750    signature_types = VG_(HT_construct) ("signature_types");
   3751 
   3752    /* Do an initial pass to scan the .debug_types section, if any, and
   3753       fill in the signatured types hash table.  This lets us handle
   3754       mapping from a type signature to a (cooked) DIE offset directly
   3755       in get_Form_contents.  */
   3756    if (debug_types_img != NULL) {
   3757       init_Cursor( &info, debug_types_img, debug_types_sz, 0, barf,
   3758                    "Overrun whilst reading .debug_types section" );
   3759       TRACE_D3("\n------ Collecting signatures from .debug_types section ------\n");
   3760 
   3761       while (True) {
   3762          UWord   cu_start_offset, cu_offset_now;
   3763          CUConst cc;
   3764 
   3765          cu_start_offset = get_position_of_Cursor( &info );
   3766          TRACE_D3("\n");
   3767          TRACE_D3("  Compilation Unit @ offset 0x%lx:\n", cu_start_offset);
   3768          /* parse_CU_header initialises the CU's set_abbv_Cursor cache
   3769             (saC_cache) */
   3770          parse_CU_Header( &cc, td3, &info,
   3771                           (UChar*)debug_abbv_img, debug_abbv_sz,
   3772                           True, False );
   3773 
   3774          /* Needed by cook_die.  */
   3775          cc.types_cuOff_bias = debug_info_sz;
   3776 
   3777          record_signatured_type( signature_types, cc.type_signature,
   3778                                  cook_die( &cc, cc.type_offset ));
   3779 
   3780          /* Until proven otherwise we assume we don't need the icc9
   3781             workaround in this case; see the DIE-reading loop below
   3782             for details.  */
   3783          cu_offset_now = (cu_start_offset + cc.unit_length
   3784                           + (cc.is_dw64 ? 12 : 4));
   3785 
   3786          if (cu_offset_now == debug_types_sz)
   3787             break;
   3788 
   3789          set_position_of_Cursor ( &info, cu_offset_now );
   3790       }
   3791    }
   3792 
   3793    /* Perform three DIE-reading passes.  The first pass reads DIEs from
   3794       alternate .debug_info (if any), the second pass reads DIEs from
   3795       .debug_info, and the third pass reads DIEs from .debug_types.
   3796       Moving the body of this loop into a separate function would
   3797       require a large number of arguments to be passed in, so it is
   3798       kept inline instead.  */
   3799    for (pass = 0; pass < 3; ++pass) {
   3800       UWord section_size;
   3801 
   3802       if (pass == 0) {
   3803          if (debug_info_alt_img == NULL)
   3804 	    continue;
   3805          /* Now loop over the Compilation Units listed in the alternate
   3806             .debug_info section (see D3SPEC sec 7.5) paras 1 and 2.
   3807             Each compilation unit contains a Compilation Unit Header
   3808             followed by precisely one DW_TAG_compile_unit or
   3809             DW_TAG_partial_unit DIE. */
   3810          init_Cursor( &info, debug_info_alt_img, debug_info_alt_sz, 0, barf,
   3811                       "Overrun whilst reading alternate .debug_info section" );
   3812          section_size = debug_info_alt_sz;
   3813 
   3814          TRACE_D3("\n------ Parsing alternate .debug_info section ------\n");
   3815       } else if (pass == 1) {
   3816          /* Now loop over the Compilation Units listed in the .debug_info
   3817             section (see D3SPEC sec 7.5) paras 1 and 2.  Each compilation
   3818             unit contains a Compilation Unit Header followed by precisely
   3819             one DW_TAG_compile_unit or DW_TAG_partial_unit DIE. */
   3820          init_Cursor( &info, debug_info_img, debug_info_sz, 0, barf,
   3821                       "Overrun whilst reading .debug_info section" );
   3822          section_size = debug_info_sz;
   3823 
   3824          TRACE_D3("\n------ Parsing .debug_info section ------\n");
   3825       } else {
   3826          if (debug_types_img == NULL)
   3827             continue;
   3828          init_Cursor( &info, debug_types_img, debug_types_sz, 0, barf,
   3829                       "Overrun whilst reading .debug_types section" );
   3830          section_size = debug_types_sz;
   3831 
   3832          TRACE_D3("\n------ Parsing .debug_types section ------\n");
   3833       }
   3834 
   3835       while (True) {
   3836          UWord   cu_start_offset, cu_offset_now;
   3837          CUConst cc;
   3838          /* It may be that the stated size of this CU is larger than the
   3839             amount of stuff actually in it.  icc9 seems to generate CUs
   3840             thusly.  We use these variables to figure out if this is
   3841             indeed the case, and if so how many bytes we need to skip to
   3842             get to the start of the next CU.  Not skipping those bytes
   3843             causes us to misidentify the start of the next CU, and it all
   3844             goes badly wrong after that (not surprisingly). */
   3845          UWord cu_size_including_IniLen, cu_amount_used;
   3846 
   3847          /* It seems icc9 finishes the DIE info before debug_info_sz
   3848             bytes have been used up.  So be flexible, and declare the
   3849             sequence complete if there is not enough remaining bytes to
   3850             hold even the smallest conceivable CU header.  (11 bytes I
   3851             reckon). */
   3852          /* JRS 23Jan09: I suspect this is no longer necessary now that
   3853             the code below contains a 'while (cu_amount_used <
   3854             cu_size_including_IniLen ...'  style loop, which skips over
   3855             any leftover bytes at the end of a CU in the case where the
   3856             CU's stated size is larger than its actual size (as
   3857             determined by reading all its DIEs).  However, for prudence,
   3858             I'll leave the following test in place.  I can't see that a
   3859             CU header can be smaller than 11 bytes, so I don't think
   3860             there's any harm possible through the test -- it just adds
   3861             robustness. */
   3862          Word avail = get_remaining_length_Cursor( &info );
   3863          if (avail < 11) {
   3864             if (avail > 0)
   3865                TRACE_D3("new_dwarf3_reader_wrk: warning: "
   3866                         "%ld unused bytes after end of DIEs\n", avail);
   3867             break;
   3868          }
   3869 
   3870          /* Check the varparser's stack is in a sane state. */
   3871          vg_assert(varparser.sp == -1);
   3872          for (i = 0; i < N_D3_VAR_STACK; i++) {
   3873             vg_assert(varparser.ranges[i] == NULL);
   3874             vg_assert(varparser.level[i] == 0);
   3875          }
   3876          for (i = 0; i < N_D3_TYPE_STACK; i++) {
   3877             vg_assert(typarser.qparentE[i].cuOff == D3_INVALID_CUOFF);
   3878             vg_assert(typarser.qparentE[i].tag   == Te_EMPTY);
   3879             vg_assert(typarser.qlevel[i] == 0);
   3880          }
   3881 
   3882          cu_start_offset = get_position_of_Cursor( &info );
   3883          TRACE_D3("\n");
   3884          TRACE_D3("  Compilation Unit @ offset 0x%lx:\n", cu_start_offset);
   3885          /* parse_CU_header initialises the CU's set_abbv_Cursor cache
   3886             (saC_cache) */
   3887          if (pass == 0)
   3888             parse_CU_Header( &cc, td3, &info,
   3889                              (UChar*)debug_abbv_alt_img, debug_abbv_alt_sz,
   3890                              False, True );
   3891          else
   3892             parse_CU_Header( &cc, td3, &info,
   3893                              (UChar*)debug_abbv_img, debug_abbv_sz,
   3894                              pass == 2, False );
   3895          cc.debug_str_img    = pass == 0 ? debug_str_alt_img : debug_str_img;
   3896          cc.debug_str_sz     = pass == 0 ? debug_str_alt_sz : debug_str_sz;
   3897          cc.debug_ranges_img = debug_ranges_img;
   3898          cc.debug_ranges_sz  = debug_ranges_sz;
   3899          cc.debug_loc_img    = debug_loc_img;
   3900          cc.debug_loc_sz     = debug_loc_sz;
   3901          cc.debug_line_img   = pass == 0 ? debug_line_alt_img : debug_line_img;
   3902          cc.debug_line_sz    = pass == 0 ? debug_line_alt_sz : debug_line_sz;
   3903          cc.debug_info_img   = pass == 0 ? debug_info_alt_img : debug_info_img;
   3904          cc.debug_info_sz    = pass == 0 ? debug_info_alt_sz : debug_info_sz;
   3905          cc.debug_types_img  = debug_types_img;
   3906          cc.debug_types_sz   = debug_types_sz;
   3907          cc.debug_info_alt_img = debug_info_alt_img;
   3908          cc.debug_info_alt_sz = debug_info_alt_sz;
   3909          cc.debug_str_alt_img = debug_str_alt_img;
   3910          cc.debug_str_alt_sz = debug_str_alt_sz;
   3911          cc.types_cuOff_bias = debug_info_sz;
   3912          cc.alt_cuOff_bias   = debug_info_sz + debug_types_sz;
   3913          cc.cu_start_offset  = cu_start_offset;
   3914          cc.di = di;
   3915          /* The CU's svma can be deduced by looking at the AT_low_pc
   3916             value in the top level TAG_compile_unit, which is the topmost
   3917             DIE.  We'll leave it for the 'varparser' to acquire that info
   3918             and fill it in -- since it is the only party to want to know
   3919             it. */
   3920          cc.cu_svma_known = False;
   3921          cc.cu_svma       = 0;
   3922 
   3923          cc.signature_types = signature_types;
   3924 
   3925          /* Create a fake outermost-level range covering the entire
   3926             address range.  So we always have *something* to catch all
   3927             variable declarations. */
   3928          varstack_push( &cc, &varparser, td3,
   3929                         unitary_range_list(0UL, ~0UL),
   3930                         -1, False/*isFunc*/, NULL/*fbGX*/ );
   3931 
   3932          /* And set up the file name table.  When we come across the top
   3933             level DIE for this CU (which is what the next call to
   3934             read_DIE should process) we will copy all the file names out
   3935             of the .debug_line img area and use this table to look up the
   3936             copies when we later see filename numbers in DW_TAG_variables
   3937             etc. */
   3938          vg_assert(!varparser.filenameTable );
   3939          varparser.filenameTable
   3940             = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.5",
   3941                           ML_(dinfo_free),
   3942                           sizeof(UChar*) );
   3943          vg_assert(varparser.filenameTable);
   3944 
   3945          /* Now read the one-and-only top-level DIE for this CU. */
   3946          vg_assert(varparser.sp == 0);
   3947          read_DIE( rangestree,
   3948                    tyents, tempvars, gexprs,
   3949                    &typarser, &varparser,
   3950                    &info, td3, &cc, 0 );
   3951 
   3952          cu_offset_now = get_position_of_Cursor( &info );
   3953 
   3954          if (0) VG_(printf)("Travelled: %lu  size %llu\n",
   3955                             cu_offset_now - cc.cu_start_offset,
   3956                             cc.unit_length + (cc.is_dw64 ? 12 : 4));
   3957 
   3958          /* How big the CU claims it is .. */
   3959          cu_size_including_IniLen = cc.unit_length + (cc.is_dw64 ? 12 : 4);
   3960          /* .. vs how big we have found it to be */
   3961          cu_amount_used = cu_offset_now - cc.cu_start_offset;
   3962 
   3963          if (1) TRACE_D3("offset now %ld, d-i-size %ld\n",
   3964                          cu_offset_now, section_size);
   3965          if (cu_offset_now > section_size)
   3966             barf("toplevel DIEs beyond end of CU");
   3967 
   3968          /* If the CU is bigger than it claims to be, we've got a serious
   3969             problem. */
   3970          if (cu_amount_used > cu_size_including_IniLen)
   3971             barf("CU's actual size appears to be larger than it claims it is");
   3972 
   3973          /* If the CU is smaller than it claims to be, we need to skip some
   3974             bytes.  Loop updates cu_offset_new and cu_amount_used. */
   3975          while (cu_amount_used < cu_size_including_IniLen
   3976                 && get_remaining_length_Cursor( &info ) > 0) {
   3977             if (0) VG_(printf)("SKIP\n");
   3978             (void)get_UChar( &info );
   3979             cu_offset_now = get_position_of_Cursor( &info );
   3980             cu_amount_used = cu_offset_now - cc.cu_start_offset;
   3981          }
   3982 
   3983          /* Preen to level -2.  DIEs have level >= 0 so -2 cannot occur
   3984             anywhere else at all.  Our fake the-entire-address-space
   3985             range is at level -1, so preening to -2 should completely
   3986             empty the stack out. */
   3987          TRACE_D3("\n");
   3988          varstack_preen( &varparser, td3, -2 );
   3989          /* Similarly, empty the type stack out. */
   3990          typestack_preen( &typarser, td3, -2 );
   3991 
   3992          TRACE_D3("set_abbv_Cursor cache: %lu queries, %lu misses\n",
   3993                   cc.saC_cache_queries, cc.saC_cache_misses);
   3994 
   3995          vg_assert(varparser.filenameTable );
   3996          VG_(deleteXA)( varparser.filenameTable );
   3997          varparser.filenameTable = NULL;
   3998 
   3999          if (cu_offset_now == section_size)
   4000             break;
   4001          /* else keep going */
   4002       }
   4003    }
   4004 
   4005    /* From here on we're post-processing the stuff we got
   4006       out of the .debug_info section. */
   4007    if (td3) {
   4008       TRACE_D3("\n");
   4009       ML_(pp_TyEnts)(tyents, "Initial type entity (TyEnt) array");
   4010       TRACE_D3("\n");
   4011       TRACE_D3("------ Compressing type entries ------\n");
   4012    }
   4013 
   4014    tyents_cache = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.6",
   4015                                      sizeof(TyEntIndexCache) );
   4016    ML_(TyEntIndexCache__invalidate)( tyents_cache );
   4017    dedup_types( td3, tyents, tyents_cache );
   4018    if (td3) {
   4019       TRACE_D3("\n");
   4020       ML_(pp_TyEnts)(tyents, "After type entity (TyEnt) compression");
   4021    }
   4022 
   4023    TRACE_D3("\n");
   4024    TRACE_D3("------ Resolving the types of variables ------\n" );
   4025    resolve_variable_types( barf, tyents, tyents_cache, tempvars );
   4026 
   4027    /* Copy all the non-INDIR tyents into a new table.  For large
   4028       .so's, about 90% of the tyents will by now have been resolved to
   4029       INDIRs, and we no longer need them, and so don't need to store
   4030       them. */
   4031    tyents_to_keep
   4032       = VG_(newXA)( ML_(dinfo_zalloc),
   4033                     "di.readdwarf3.ndrw.7 (TyEnt to-keep array)",
   4034                     ML_(dinfo_free), sizeof(TyEnt) );
   4035    n = VG_(sizeXA)( tyents );
   4036    for (i = 0; i < n; i++) {
   4037       TyEnt* ent = VG_(indexXA)( tyents, i );
   4038       if (ent->tag != Te_INDIR)
   4039          VG_(addToXA)( tyents_to_keep, ent );
   4040    }
   4041 
   4042    VG_(deleteXA)( tyents );
   4043    tyents = NULL;
   4044    ML_(dinfo_free)( tyents_cache );
   4045    tyents_cache = NULL;
   4046 
   4047    /* Sort tyents_to_keep so we can lookup in it.  A complete (if
   4048       minor) waste of time, since tyents itself is sorted, but
   4049       necessary since VG_(lookupXA) refuses to cooperate if we
   4050       don't. */
   4051    VG_(setCmpFnXA)(
   4052       tyents_to_keep,
   4053       (Int(*)(void*,void*)) ML_(TyEnt__cmp_by_cuOff_only)
   4054    );
   4055    VG_(sortXA)( tyents_to_keep );
   4056 
   4057    /* Enable cacheing on tyents_to_keep */
   4058    tyents_to_keep_cache
   4059       = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.8",
   4060                            sizeof(TyEntIndexCache) );
   4061    ML_(TyEntIndexCache__invalidate)( tyents_to_keep_cache );
   4062 
   4063    /* And record the tyents in the DebugInfo.  We do this before
   4064       starting to hand variables to ML_(addVar), since if ML_(addVar)
   4065       wants to do debug printing (of the types of said vars) then it
   4066       will need the tyents.*/
   4067    vg_assert(!di->admin_tyents);
   4068    di->admin_tyents = tyents_to_keep;
   4069 
   4070    /* Bias all the location expressions. */
   4071    TRACE_D3("\n");
   4072    TRACE_D3("------ Biasing the location expressions ------\n" );
   4073 
   4074    n = VG_(sizeXA)( gexprs );
   4075    for (i = 0; i < n; i++) {
   4076       gexpr = *(GExpr**)VG_(indexXA)( gexprs, i );
   4077       bias_GX( gexpr, di );
   4078    }
   4079 
   4080    TRACE_D3("\n");
   4081    TRACE_D3("------ Acquired the following variables: ------\n\n");
   4082 
   4083    /* Park (pointers to) all the vars in an XArray, so we can look up
   4084       abstract origins quickly.  The array is sorted (hence, looked-up
   4085       by) the .dioff fields.  Since the .dioffs should be in strictly
   4086       ascending order, there is no need to sort the array after
   4087       construction.  The ascendingness is however asserted for. */
   4088    dioff_lookup_tab
   4089       = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.9",
   4090                     ML_(dinfo_free),
   4091                     sizeof(TempVar*) );
   4092    vg_assert(dioff_lookup_tab);
   4093 
   4094    n = VG_(sizeXA)( tempvars );
   4095    Word first_primary_var;
   4096    for (first_primary_var = 0;
   4097         debug_info_alt_sz && first_primary_var < n;
   4098         first_primary_var++) {
   4099       varp = *(TempVar**)VG_(indexXA)( tempvars, first_primary_var );
   4100       if (varp->dioff < debug_info_sz + debug_types_sz)
   4101          break;
   4102    }
   4103    for (i = 0; i < n; i++) {
   4104       varp = *(TempVar**)VG_(indexXA)( tempvars, (i + first_primary_var) % n );
   4105       if (i > first_primary_var) {
   4106          varp2 = *(TempVar**)VG_(indexXA)( tempvars,
   4107                                            (i + first_primary_var - 1) % n );
   4108          /* why should this hold?  Only, I think, because we've
   4109             constructed the array by reading .debug_info sequentially,
   4110             and so the array .dioff fields should reflect that, and be
   4111             strictly ascending. */
   4112          vg_assert(varp2->dioff < varp->dioff);
   4113       }
   4114       VG_(addToXA)( dioff_lookup_tab, &varp );
   4115    }
   4116    VG_(setCmpFnXA)( dioff_lookup_tab, cmp_TempVar_by_dioff );
   4117    VG_(sortXA)( dioff_lookup_tab ); /* POINTLESS; FIXME: rm */
   4118 
   4119    /* Now visit each var.  Collect up as much info as possible for
   4120       each var and hand it to ML_(addVar). */
   4121    n = VG_(sizeXA)( tempvars );
   4122    for (j = 0; j < n; j++) {
   4123       TyEnt* ent;
   4124       varp = *(TempVar**)VG_(indexXA)( tempvars, j );
   4125 
   4126       /* Possibly show .. */
   4127       if (td3) {
   4128          VG_(printf)("<%lx> addVar: level %d: %s :: ",
   4129                      varp->dioff,
   4130                      varp->level,
   4131                      varp->name ? varp->name : (UChar*)"<anon_var>" );
   4132          if (varp->typeR) {
   4133             ML_(pp_TyEnt_C_ishly)( tyents_to_keep, varp->typeR );
   4134          } else {
   4135             VG_(printf)("NULL");
   4136          }
   4137          VG_(printf)("\n  Loc=");
   4138          if (varp->gexpr) {
   4139             ML_(pp_GX)(varp->gexpr);
   4140          } else {
   4141             VG_(printf)("NULL");
   4142          }
   4143          VG_(printf)("\n");
   4144          if (varp->fbGX) {
   4145             VG_(printf)("  FrB=");
   4146             ML_(pp_GX)( varp->fbGX );
   4147             VG_(printf)("\n");
   4148          } else {
   4149             VG_(printf)("  FrB=none\n");
   4150          }
   4151          VG_(printf)("  declared at: %s:%d\n",
   4152                      varp->fName ? varp->fName : (UChar*)"NULL",
   4153                      varp->fLine );
   4154          if (varp->absOri != (UWord)D3_INVALID_CUOFF)
   4155             VG_(printf)("  abstract origin: <%lx>\n", varp->absOri);
   4156       }
   4157 
   4158       /* Skip variables which have no location.  These must be
   4159          abstract instances; they are useless as-is since with no
   4160          location they have no specified memory location.  They will
   4161          presumably be referred to via the absOri fields of other
   4162          variables. */
   4163       if (!varp->gexpr) {
   4164          TRACE_D3("  SKIP (no location)\n\n");
   4165          continue;
   4166       }
   4167 
   4168       /* So it has a location, at least.  If it refers to some other
   4169          entry through its absOri field, pull in further info through
   4170          that. */
   4171       if (varp->absOri != (UWord)D3_INVALID_CUOFF) {
   4172          Bool found;
   4173          Word ixFirst, ixLast;
   4174          TempVar key;
   4175          TempVar* keyp = &key;
   4176          TempVar *varAI;
   4177          VG_(memset)(&key, 0, sizeof(key)); /* not necessary */
   4178          key.dioff = varp->absOri; /* this is what we want to find */
   4179          found = VG_(lookupXA)( dioff_lookup_tab, &keyp,
   4180                                 &ixFirst, &ixLast );
   4181          if (!found) {
   4182             /* barf("DW_AT_abstract_origin can't be resolved"); */
   4183             TRACE_D3("  SKIP (DW_AT_abstract_origin can't be resolved)\n\n");
   4184             continue;
   4185          }
   4186          /* If the following fails, there is more than one entry with
   4187             the same dioff.  Which can't happen. */
   4188          vg_assert(ixFirst == ixLast);
   4189          varAI = *(TempVar**)VG_(indexXA)( dioff_lookup_tab, ixFirst );
   4190          /* stay sane */
   4191          vg_assert(varAI);
   4192          vg_assert(varAI->dioff == varp->absOri);
   4193 
   4194          /* Copy what useful info we can. */
   4195          if (varAI->typeR && !varp->typeR)
   4196             varp->typeR = varAI->typeR;
   4197          if (varAI->name && !varp->name)
   4198             varp->name = varAI->name;
   4199          if (varAI->fName && !varp->fName)
   4200             varp->fName = varAI->fName;
   4201          if (varAI->fLine > 0 && varp->fLine == 0)
   4202             varp->fLine = varAI->fLine;
   4203       }
   4204 
   4205       /* Give it a name if it doesn't have one. */
   4206       if (!varp->name)
   4207          varp->name = ML_(addStr)( di, "<anon_var>", -1 );
   4208 
   4209       /* So now does it have enough info to be useful? */
   4210       /* NOTE: re typeR: this is a hack.  If typeR is Te_UNKNOWN then
   4211          the type didn't get resolved.  Really, in that case
   4212          something's broken earlier on, and should be fixed, rather
   4213          than just skipping the variable. */
   4214       ent = ML_(TyEnts__index_by_cuOff)( tyents_to_keep,
   4215                                          tyents_to_keep_cache,
   4216                                          varp->typeR );
   4217       /* The next two assertions should be guaranteed by
   4218          our previous call to resolve_variable_types. */
   4219       vg_assert(ent);
   4220       vg_assert(ML_(TyEnt__is_type)(ent) || ent->tag == Te_UNKNOWN);
   4221 
   4222       if (ent->tag == Te_UNKNOWN) continue;
   4223 
   4224       vg_assert(varp->gexpr);
   4225       vg_assert(varp->name);
   4226       vg_assert(varp->typeR);
   4227       vg_assert(varp->level >= 0);
   4228 
   4229       /* Ok.  So we're going to keep it.  Call ML_(addVar) once for
   4230          each address range in which the variable exists. */
   4231       TRACE_D3("  ACQUIRE for range(s) ");
   4232       { AddrRange  oneRange;
   4233         AddrRange* varPcRanges;
   4234         Word       nVarPcRanges;
   4235         /* Set up to iterate over address ranges, however
   4236            represented. */
   4237         if (varp->nRanges == 0 || varp->nRanges == 1) {
   4238            vg_assert(!varp->rngMany);
   4239            if (varp->nRanges == 0) {
   4240               vg_assert(varp->rngOneMin == 0);
   4241               vg_assert(varp->rngOneMax == 0);
   4242            }
   4243            nVarPcRanges = varp->nRanges;
   4244            oneRange.aMin = varp->rngOneMin;
   4245            oneRange.aMax = varp->rngOneMax;
   4246            varPcRanges = &oneRange;
   4247         } else {
   4248            vg_assert(varp->rngMany);
   4249            vg_assert(varp->rngOneMin == 0);
   4250            vg_assert(varp->rngOneMax == 0);
   4251            nVarPcRanges = VG_(sizeXA)(varp->rngMany);
   4252            vg_assert(nVarPcRanges >= 2);
   4253            vg_assert(nVarPcRanges == (Word)varp->nRanges);
   4254            varPcRanges = VG_(indexXA)(varp->rngMany, 0);
   4255         }
   4256         if (varp->level == 0)
   4257            vg_assert( nVarPcRanges == 1 );
   4258         /* and iterate */
   4259         for (i = 0; i < nVarPcRanges; i++) {
   4260            Addr pcMin = varPcRanges[i].aMin;
   4261            Addr pcMax = varPcRanges[i].aMax;
   4262            vg_assert(pcMin <= pcMax);
   4263            /* Level 0 is the global address range.  So at level 0 we
   4264               don't want to bias pcMin/pcMax; but at all other levels
   4265               we do since those are derived from svmas in the Dwarf
   4266               we're reading.  Be paranoid ... */
   4267            if (varp->level == 0) {
   4268               vg_assert(pcMin == (Addr)0);
   4269               vg_assert(pcMax == ~(Addr)0);
   4270            } else {
   4271               /* vg_assert(pcMin > (Addr)0);
   4272                  No .. we can legitimately expect to see ranges like
   4273                  0x0-0x11D (pre-biasing, of course). */
   4274               vg_assert(pcMax < ~(Addr)0);
   4275            }
   4276 
   4277            /* Apply text biasing, for non-global variables. */
   4278            if (varp->level > 0) {
   4279               pcMin += di->text_debug_bias;
   4280               pcMax += di->text_debug_bias;
   4281            }
   4282 
   4283            if (i > 0 && (i%2) == 0)
   4284               TRACE_D3("\n                       ");
   4285            TRACE_D3("[%#lx,%#lx] ", pcMin, pcMax );
   4286 
   4287            ML_(addVar)(
   4288               di, varp->level,
   4289                   pcMin, pcMax,
   4290                   varp->name,  varp->typeR,
   4291                   varp->gexpr, varp->fbGX,
   4292                   varp->fName, varp->fLine, td3
   4293            );
   4294         }
   4295       }
   4296 
   4297       TRACE_D3("\n\n");
   4298       /* and move on to the next var */
   4299    }
   4300 
   4301    /* Now free all the TempVars */
   4302    n = VG_(sizeXA)( tempvars );
   4303    for (i = 0; i < n; i++) {
   4304       varp = *(TempVar**)VG_(indexXA)( tempvars, i );
   4305       ML_(dinfo_free)(varp);
   4306    }
   4307    VG_(deleteXA)( tempvars );
   4308    tempvars = NULL;
   4309 
   4310    /* and the temp lookup table */
   4311    VG_(deleteXA)( dioff_lookup_tab );
   4312 
   4313    /* and the ranges tree.  Note that we need to also free the XArrays
   4314       which constitute the keys, hence pass VG_(deleteXA) as a
   4315       key-finalizer. */
   4316    VG_(deleteFM)( rangestree, (void(*)(UWord))VG_(deleteXA), NULL );
   4317 
   4318    /* and the tyents_to_keep cache */
   4319    ML_(dinfo_free)( tyents_to_keep_cache );
   4320    tyents_to_keep_cache = NULL;
   4321 
   4322    vg_assert( varparser.filenameTable == NULL );
   4323 
   4324    /* And the signatured type hash.  */
   4325    VG_(HT_destruct) ( signature_types, ML_(dinfo_free) );
   4326 
   4327    /* record the GExprs in di so they can be freed later */
   4328    vg_assert(!di->admin_gexprs);
   4329    di->admin_gexprs = gexprs;
   4330 }
   4331 
   4332 
   4333 /*------------------------------------------------------------*/
   4334 /*---                                                      ---*/
   4335 /*--- The "new" DWARF3 reader -- top level control logic   ---*/
   4336 /*---                                                      ---*/
   4337 /*------------------------------------------------------------*/
   4338 
   4339 static Bool               d3rd_jmpbuf_valid  = False;
   4340 static HChar*             d3rd_jmpbuf_reason = NULL;
   4341 static VG_MINIMAL_JMP_BUF(d3rd_jmpbuf);
   4342 
   4343 static __attribute__((noreturn)) void barf ( HChar* reason ) {
   4344    vg_assert(d3rd_jmpbuf_valid);
   4345    d3rd_jmpbuf_reason = reason;
   4346    VG_MINIMAL_LONGJMP(d3rd_jmpbuf);
   4347    /*NOTREACHED*/
   4348    vg_assert(0);
   4349 }
   4350 
   4351 
   4352 void
   4353 ML_(new_dwarf3_reader) (
   4354    struct _DebugInfo* di,
   4355    UChar* debug_info_img,   SizeT debug_info_sz,
   4356    UChar* debug_types_img,  SizeT debug_types_sz,
   4357    UChar* debug_abbv_img,   SizeT debug_abbv_sz,
   4358    UChar* debug_line_img,   SizeT debug_line_sz,
   4359    UChar* debug_str_img,    SizeT debug_str_sz,
   4360    UChar* debug_ranges_img, SizeT debug_ranges_sz,
   4361    UChar* debug_loc_img,    SizeT debug_loc_sz,
   4362    UChar* debug_info_alt_img, SizeT debug_info_alt_sz,
   4363    UChar* debug_abbv_alt_img, SizeT debug_abbv_alt_sz,
   4364    UChar* debug_line_alt_img, SizeT debug_line_alt_sz,
   4365    UChar* debug_str_alt_img,  SizeT debug_str_alt_sz
   4366 )
   4367 {
   4368    volatile Int  jumped;
   4369    volatile Bool td3 = di->trace_symtab;
   4370 
   4371    /* Run the _wrk function to read the dwarf3.  If it succeeds, it
   4372       just returns normally.  If there is any failure, it longjmp's
   4373       back here, having first set d3rd_jmpbuf_reason to something
   4374       useful. */
   4375    vg_assert(d3rd_jmpbuf_valid  == False);
   4376    vg_assert(d3rd_jmpbuf_reason == NULL);
   4377 
   4378    d3rd_jmpbuf_valid = True;
   4379    jumped = VG_MINIMAL_SETJMP(d3rd_jmpbuf);
   4380    if (jumped == 0) {
   4381       /* try this ... */
   4382       new_dwarf3_reader_wrk( di, barf,
   4383                              debug_info_img,   debug_info_sz,
   4384                              debug_types_img,  debug_types_sz,
   4385                              debug_abbv_img,   debug_abbv_sz,
   4386                              debug_line_img,   debug_line_sz,
   4387                              debug_str_img,    debug_str_sz,
   4388                              debug_ranges_img, debug_ranges_sz,
   4389                              debug_loc_img,    debug_loc_sz,
   4390                              debug_info_alt_img, debug_info_alt_sz,
   4391                              debug_abbv_alt_img, debug_abbv_alt_sz,
   4392                              debug_line_alt_img, debug_line_alt_sz,
   4393                              debug_str_alt_img,  debug_str_alt_sz);
   4394       d3rd_jmpbuf_valid = False;
   4395       TRACE_D3("\n------ .debug_info reading was successful ------\n");
   4396    } else {
   4397       /* It longjmp'd. */
   4398       d3rd_jmpbuf_valid = False;
   4399       /* Can't longjump without giving some sort of reason. */
   4400       vg_assert(d3rd_jmpbuf_reason != NULL);
   4401 
   4402       TRACE_D3("\n------ .debug_info reading failed ------\n");
   4403 
   4404       ML_(symerr)(di, True, d3rd_jmpbuf_reason);
   4405    }
   4406 
   4407    d3rd_jmpbuf_valid  = False;
   4408    d3rd_jmpbuf_reason = NULL;
   4409 }
   4410 
   4411 
   4412 
   4413 /* --- Unused code fragments which might be useful one day. --- */
   4414 
   4415 #if 0
   4416    /* Read the arange tables */
   4417    TRACE_SYMTAB("\n");
   4418    TRACE_SYMTAB("\n------ The contents of .debug_arange ------\n");
   4419    init_Cursor( &aranges, debug_aranges_img,
   4420                 debug_aranges_sz, 0, barf,
   4421                 "Overrun whilst reading .debug_aranges section" );
   4422    while (True) {
   4423       ULong  len, d_i_offset;
   4424       Bool   is64;
   4425       UShort version;
   4426       UChar  asize, segsize;
   4427 
   4428       if (is_at_end_Cursor( &aranges ))
   4429          break;
   4430       /* Read one arange thingy */
   4431       /* initial_length field */
   4432       len = get_Initial_Length( &is64, &aranges,
   4433                "in .debug_aranges: invalid initial-length field" );
   4434       version    = get_UShort( &aranges );
   4435       d_i_offset = get_Dwarfish_UWord( &aranges, is64 );
   4436       asize      = get_UChar( &aranges );
   4437       segsize    = get_UChar( &aranges );
   4438       TRACE_D3("  Length:                   %llu\n", len);
   4439       TRACE_D3("  Version:                  %d\n", (Int)version);
   4440       TRACE_D3("  Offset into .debug_info:  %llx\n", d_i_offset);
   4441       TRACE_D3("  Pointer Size:             %d\n", (Int)asize);
   4442       TRACE_D3("  Segment Size:             %d\n", (Int)segsize);
   4443       TRACE_D3("\n");
   4444       TRACE_D3("    Address            Length\n");
   4445 
   4446       while ((get_position_of_Cursor( &aranges ) % (2 * asize)) > 0) {
   4447          (void)get_UChar( & aranges );
   4448       }
   4449       while (True) {
   4450          ULong address = get_Dwarfish_UWord( &aranges, asize==8 );
   4451          ULong length = get_Dwarfish_UWord( &aranges, asize==8 );
   4452          TRACE_D3("    0x%016llx 0x%llx\n", address, length);
   4453          if (address == 0 && length == 0) break;
   4454       }
   4455    }
   4456    TRACE_SYMTAB("\n");
   4457 #endif
   4458 
   4459 #endif // defined(VGO_linux) || defined(VGO_darwin)
   4460 
   4461 /*--------------------------------------------------------------------*/
   4462 /*--- end                                                          ---*/
   4463 /*--------------------------------------------------------------------*/
   4464