Home | History | Annotate | Download | only in m_debuginfo
      1 /* -*- mode: C; c-basic-offset: 3; -*- */
      2 
      3 /*--------------------------------------------------------------------*/
      4 /*--- Read DWARF3/4 ".debug_info" sections (DIE trees).            ---*/
      5 /*---                                                 readdwarf3.c ---*/
      6 /*--------------------------------------------------------------------*/
      7 
      8 /*
      9    This file is part of Valgrind, a dynamic binary instrumentation
     10    framework.
     11 
     12    Copyright (C) 2008-2017 OpenWorks LLP
     13       info (at) open-works.co.uk
     14 
     15    This program is free software; you can redistribute it and/or
     16    modify it under the terms of the GNU General Public License as
     17    published by the Free Software Foundation; either version 2 of the
     18    License, or (at your option) any later version.
     19 
     20    This program is distributed in the hope that it will be useful, but
     21    WITHOUT ANY WARRANTY; without even the implied warranty of
     22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     23    General Public License for more details.
     24 
     25    You should have received a copy of the GNU General Public License
     26    along with this program; if not, write to the Free Software
     27    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     28    02111-1307, USA.
     29 
     30    The GNU General Public License is contained in the file COPYING.
     31 
     32    Neither the names of the U.S. Department of Energy nor the
     33    University of California nor the names of its contributors may be
     34    used to endorse or promote products derived from this software
     35    without prior written permission.
     36 */
     37 
     38 #if defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_solaris)
     39 
     40 /* REFERENCE (without which this code will not make much sense):
     41 
     42    DWARF Debugging Information Format, Version 3,
     43    dated 20 December 2005 (the "D3 spec").
     44 
     45    Available at http://www.dwarfstd.org/Dwarf3.pdf.  There's also a
     46    .doc (MS Word) version, but for some reason the section numbers
     47    between the Word and PDF versions differ by 1 in the first digit.
     48    All section references in this code are to the PDF version.
     49 
     50    CURRENT HACKS:
     51 
     52    DW_TAG_{const,volatile}_type no DW_AT_type is allowed; it is
     53       assumed to mean "const void" or "volatile void" respectively.
     54       GDB appears to interpret them like this, anyway.
     55 
     56    In many cases it is important to know the svma of a CU (the "base
     57    address of the CU", as the D3 spec calls it).  There are some
     58    situations in which the spec implies this value is unknown, but the
     59    Dwarf3 produced by gcc-4.1 seems to assume is not unknown but
     60    merely zero when not explicitly stated.  So we too have to make
     61    that assumption.
     62 
     63    POTENTIAL BUG?  Spotted 6 Sept 08.  Why doesn't
     64    unitary_range_list() bias the resulting range list in the same way
     65    that its more general cousin, get_range_list(), does?  I don't
     66    know.
     67 
     68    TODO, 2008 Feb 17:
     69 
     70    get rid of cu_svma_known and document the assumed-zero svma hack.
     71 
     72    ML_(sizeOfType): differentiate between zero sized types and types
     73    for which the size is unknown.  Is this important?  I don't know.
     74 
     75    DW_TAG_array_types: deal with explicit sizes (currently we compute
     76    the size from the bounds and the element size, although that's
     77    fragile, if the bounds incompletely specified, or completely
     78    absent)
     79 
     80    Document reason for difference (by 1) of stack preening depth in
     81    parse_var_DIE vs parse_type_DIE.
     82 
     83    Don't hand to ML_(addVars), vars whose locations are entirely in
     84    registers (DW_OP_reg*).  This is merely a space-saving
     85    optimisation, as ML_(evaluate_Dwarf3_Expr) should handle these
     86    expressions correctly, by failing to evaluate them and hence
     87    effectively ignoring the variable with which they are associated.
     88 
     89    Deal with DW_TAG_array_types which have element size != stride
     90 
     91    In some cases, the info for a variable is split between two
     92    different DIEs (generally a declarer and a definer).  We punt on
     93    these.  Could do better here.
     94 
     95    The 'data_bias' argument passed to the expression evaluator
     96    (ML_(evaluate_Dwarf3_Expr)) should really be changed to a
     97    MaybeUWord, to make it clear when we do vs don't know what it is
     98    for the evaluation of an expression.  At the moment zero is passed
     99    for this parameter in the don't know case.  That's a bit fragile
    100    and obscure; using a MaybeUWord would be clearer.
    101 
    102    POTENTIAL PERFORMANCE IMPROVEMENTS:
    103 
    104    Currently, duplicate removal and all other queries for the type
    105    entities array is done using cuOffset-based pointing, which
    106    involves a binary search (VG_(lookupXA)) for each access.  This is
    107    wildly inefficient, although simple.  It would be better to
    108    translate all the cuOffset-based references (iow, all the "R" and
    109    "Rs" fields in the TyEnts in 'tyents') to direct index numbers in
    110    'tyents' right at the start of dedup_types(), and use direct
    111    indexing (VG_(indexXA)) wherever possible after that.
    112 
    113    cmp__XArrays_of_AddrRange is also a performance bottleneck.  Move
    114    VG_(indexXA) into pub_tool_xarray.h so it can be inlined at all use
    115    points, and possibly also make an _UNCHECKED version which skips
    116    the range checks in performance-critical situations such as this.
    117 
    118    Handle interaction between read_DIE and parse_{var,type}_DIE
    119    better.  Currently read_DIE reads the entire DIE just to find where
    120    the end is (and for debug printing), so that it can later reliably
    121    move the cursor to the end regardless of what parse_{var,type}_DIE
    122    do.  This means many DIEs (most, even?) are read twice.  It would
    123    be smarter to make parse_{var,type}_DIE return a Bool indicating
    124    whether or not they advanced the DIE cursor, and only if they
    125    didn't should read_DIE itself read through the DIE.
    126 
    127    ML_(addVar) and add_var_to_arange: quite a lot of DiAddrRanges have
    128    zero variables in their .vars XArray.  Rather than have an XArray
    129    with zero elements (which uses 2 malloc'd blocks), allow the .vars
    130    pointer to be NULL in this case.
    131 
    132    More generally, reduce the amount of memory allocated and freed
    133    while reading Dwarf3 type/variable information.  Even modest (20MB)
    134    objects cause this module to allocate and free hundreds of
    135    thousands of small blocks, and ML_(arena_malloc) and its various
    136    groupies always show up at the top of performance profiles. */
    137 
    138 #include "pub_core_basics.h"
    139 #include "pub_core_debuginfo.h"
    140 #include "pub_core_libcbase.h"
    141 #include "pub_core_libcassert.h"
    142 #include "pub_core_libcprint.h"
    143 #include "pub_core_libcsetjmp.h"   // setjmp facilities
    144 #include "pub_core_hashtable.h"
    145 #include "pub_core_options.h"
    146 #include "pub_core_tooliface.h"    /* VG_(needs) */
    147 #include "pub_core_xarray.h"
    148 #include "pub_core_wordfm.h"
    149 #include "priv_misc.h"             /* dinfo_zalloc/free */
    150 #include "priv_image.h"
    151 #include "priv_tytypes.h"
    152 #include "priv_d3basics.h"
    153 #include "priv_storage.h"
    154 #include "priv_readdwarf3.h"       /* self */
    155 
    156 
    157 /*------------------------------------------------------------*/
    158 /*---                                                      ---*/
    159 /*--- Basic machinery for parsing DIEs.                    ---*/
    160 /*---                                                      ---*/
    161 /*------------------------------------------------------------*/
    162 
    163 #define TRACE_D3(format, args...) \
    164    if (UNLIKELY(td3)) { VG_(printf)(format, ## args); }
    165 #define TD3 (UNLIKELY(td3))
    166 
    167 #define D3_INVALID_CUOFF  ((UWord)(-1UL))
    168 #define D3_FAKEVOID_CUOFF ((UWord)(-2UL))
    169 
    170 typedef
    171    struct {
    172       DiSlice sli;      // to which this cursor applies
    173       DiOffT  sli_next; // offset in underlying DiImage; must be >= sli.ioff
    174       void (*barf)( const HChar* ) __attribute__((noreturn));
    175       const HChar* barfstr;
    176    }
    177    Cursor;
    178 
    179 static inline Bool is_sane_Cursor ( const Cursor* c ) {
    180    if (!c)                return False;
    181    if (!c->barf)          return False;
    182    if (!c->barfstr)       return False;
    183    if (!ML_(sli_is_valid)(c->sli))    return False;
    184    if (c->sli.ioff == DiOffT_INVALID) return False;
    185    if (c->sli_next < c->sli.ioff)     return False;
    186    return True;
    187 }
    188 
    189 // Initialise a cursor from a DiSlice (ELF section, really) so as to
    190 // start reading at offset |sli_initial_offset| from the start of the
    191 // slice.
    192 static void init_Cursor ( /*OUT*/Cursor* c,
    193                           DiSlice sli,
    194                           ULong   sli_initial_offset,
    195                           __attribute__((noreturn)) void (*barf)(const HChar*),
    196                           const HChar* barfstr )
    197 {
    198    vg_assert(c);
    199    VG_(bzero_inline)(c, sizeof(*c));
    200    c->sli              = sli;
    201    c->sli_next         = c->sli.ioff + sli_initial_offset;
    202    c->barf             = barf;
    203    c->barfstr          = barfstr;
    204    vg_assert(is_sane_Cursor(c));
    205 }
    206 
    207 static Bool is_at_end_Cursor ( const Cursor* c ) {
    208    vg_assert(is_sane_Cursor(c));
    209    return c->sli_next >= c->sli.ioff + c->sli.szB;
    210 }
    211 
    212 static inline ULong get_position_of_Cursor ( const Cursor* c ) {
    213    vg_assert(is_sane_Cursor(c));
    214    return c->sli_next - c->sli.ioff;
    215 }
    216 static inline void set_position_of_Cursor ( Cursor* c, ULong pos ) {
    217    c->sli_next = c->sli.ioff + pos;
    218    vg_assert(is_sane_Cursor(c));
    219 }
    220 static inline void advance_position_of_Cursor ( Cursor* c, ULong delta ) {
    221    c->sli_next += delta;
    222    vg_assert(is_sane_Cursor(c));
    223 }
    224 
    225 static /*signed*/Long get_remaining_length_Cursor ( const Cursor* c ) {
    226    vg_assert(is_sane_Cursor(c));
    227    return c->sli.ioff + c->sli.szB - c->sli_next;
    228 }
    229 
    230 //static void* get_address_of_Cursor ( Cursor* c ) {
    231 //   vg_assert(is_sane_Cursor(c));
    232 //   return &c->region_start_img[ c->region_next ];
    233 //}
    234 
    235 static DiCursor get_DiCursor_from_Cursor ( const Cursor* c ) {
    236    return mk_DiCursor(c->sli.img, c->sli_next);
    237 }
    238 
    239 /* FIXME: document assumptions on endianness for
    240    get_UShort/UInt/ULong. */
    241 static inline UChar get_UChar ( Cursor* c ) {
    242    UChar r;
    243    vg_assert(is_sane_Cursor(c));
    244    if (c->sli_next + sizeof(UChar) > c->sli.ioff + c->sli.szB) {
    245       c->barf(c->barfstr);
    246       /*NOTREACHED*/
    247       vg_assert(0);
    248    }
    249    r = ML_(img_get_UChar)(c->sli.img, c->sli_next);
    250    c->sli_next += sizeof(UChar);
    251    return r;
    252 }
    253 static UShort get_UShort ( Cursor* c ) {
    254    UShort r;
    255    vg_assert(is_sane_Cursor(c));
    256    if (c->sli_next + sizeof(UShort) > c->sli.ioff + c->sli.szB) {
    257       c->barf(c->barfstr);
    258       /*NOTREACHED*/
    259       vg_assert(0);
    260    }
    261    r = ML_(img_get_UShort)(c->sli.img, c->sli_next);
    262    c->sli_next += sizeof(UShort);
    263    return r;
    264 }
    265 static UInt get_UInt ( Cursor* c ) {
    266    UInt r;
    267    vg_assert(is_sane_Cursor(c));
    268    if (c->sli_next + sizeof(UInt) > c->sli.ioff + c->sli.szB) {
    269       c->barf(c->barfstr);
    270       /*NOTREACHED*/
    271       vg_assert(0);
    272    }
    273    r = ML_(img_get_UInt)(c->sli.img, c->sli_next);
    274    c->sli_next += sizeof(UInt);
    275    return r;
    276 }
    277 static ULong get_ULong ( Cursor* c ) {
    278    ULong r;
    279    vg_assert(is_sane_Cursor(c));
    280    if (c->sli_next + sizeof(ULong) > c->sli.ioff + c->sli.szB) {
    281       c->barf(c->barfstr);
    282       /*NOTREACHED*/
    283       vg_assert(0);
    284    }
    285    r = ML_(img_get_ULong)(c->sli.img, c->sli_next);
    286    c->sli_next += sizeof(ULong);
    287    return r;
    288 }
    289 static ULong get_ULEB128 ( Cursor* c ) {
    290    ULong result;
    291    Int   shift;
    292    UChar byte;
    293    /* unroll first iteration */
    294    byte = get_UChar( c );
    295    result = (ULong)(byte & 0x7f);
    296    if (LIKELY(!(byte & 0x80))) return result;
    297    shift = 7;
    298    /* end unroll first iteration */
    299    do {
    300       byte = get_UChar( c );
    301       result |= ((ULong)(byte & 0x7f)) << shift;
    302       shift += 7;
    303    } while (byte & 0x80);
    304    return result;
    305 }
    306 static Long get_SLEB128 ( Cursor* c ) {
    307    ULong  result = 0;
    308    Int    shift = 0;
    309    UChar  byte;
    310    do {
    311       byte = get_UChar(c);
    312       result |= ((ULong)(byte & 0x7f)) << shift;
    313       shift += 7;
    314    } while (byte & 0x80);
    315    if (shift < 64 && (byte & 0x40))
    316       result |= -(1ULL << shift);
    317    return result;
    318 }
    319 
    320 /* Assume 'c' points to the start of a string.  Return a DiCursor of
    321    whatever it points at, and advance it past the terminating zero.
    322    This makes it safe for the caller to then copy the string with
    323    ML_(addStr), since (w.r.t. image overruns) the process of advancing
    324    past the terminating zero will already have "vetted" the string. */
    325 static DiCursor get_AsciiZ ( Cursor* c ) {
    326    UChar uc;
    327    DiCursor res = get_DiCursor_from_Cursor(c);
    328    do { uc = get_UChar(c); } while (uc != 0);
    329    return res;
    330 }
    331 
    332 static ULong peek_ULEB128 ( Cursor* c ) {
    333    DiOffT here = c->sli_next;
    334    ULong  r    = get_ULEB128( c );
    335    c->sli_next = here;
    336    return r;
    337 }
    338 static UChar peek_UChar ( Cursor* c ) {
    339    DiOffT here = c->sli_next;
    340    UChar  r    = get_UChar( c );
    341    c->sli_next = here;
    342    return r;
    343 }
    344 
    345 static ULong get_Dwarfish_UWord ( Cursor* c, Bool is_dw64 ) {
    346    return is_dw64 ? get_ULong(c) : (ULong) get_UInt(c);
    347 }
    348 
    349 static UWord get_UWord ( Cursor* c ) {
    350    vg_assert(sizeof(UWord) == sizeof(void*));
    351    if (sizeof(UWord) == 4) return get_UInt(c);
    352    if (sizeof(UWord) == 8) return get_ULong(c);
    353    vg_assert(0);
    354 }
    355 
    356 /* Read a DWARF3 'Initial Length' field */
    357 static ULong get_Initial_Length ( /*OUT*/Bool* is64,
    358                                   Cursor* c,
    359                                   const HChar* barfMsg )
    360 {
    361    ULong w64;
    362    UInt  w32;
    363    *is64 = False;
    364    w32 = get_UInt( c );
    365    if (w32 >= 0xFFFFFFF0 && w32 < 0xFFFFFFFF) {
    366       c->barf( barfMsg );
    367    }
    368    else if (w32 == 0xFFFFFFFF) {
    369       *is64 = True;
    370       w64   = get_ULong( c );
    371    } else {
    372       *is64 = False;
    373       w64 = (ULong)w32;
    374    }
    375    return w64;
    376 }
    377 
    378 
    379 /*------------------------------------------------------------*/
    380 /*---                                                      ---*/
    381 /*--- "CUConst" structure                                  ---*/
    382 /*---                                                      ---*/
    383 /*------------------------------------------------------------*/
    384 
    385 typedef
    386    struct _name_form {
    387       ULong at_name;  // Dwarf Attribute name
    388       ULong at_form;  // Dwarf Attribute form
    389       UInt  skip_szB; // Nr of bytes skippable from here ...
    390       UInt  next_nf;  // ... to reach this attr/form index in the g_abbv.nf
    391    } name_form;
    392 /* skip_szB and next_nf are used to optimise the skipping of uninteresting DIEs.
    393    Each name_form maintains how many (fixed) nr of bytes can be skipped from
    394    the beginning of this form till the next attr/form to look at.
    395    The next form to look can be:
    396        an 'interesting' attr/form to read while skipping a DIE
    397           (currently, this is only DW_AT_sibling)
    398    or
    399        a variable length form which must be read to be skipped.
    400    For a variable length form, the skip_szB will be equal to VARSZ_FORM.
    401 
    402    Note: this technique could also be used to speed up the parsing
    403    of DIEs : for each parser kind, we could have the nr of bytes
    404    to skip to directly reach the interesting form(s) for the parser. */
    405 
    406 typedef
    407    struct _g_abbv {
    408       struct _g_abbv *next; // read/write by hash table.
    409       UWord  abbv_code;     // key, read by hash table
    410       ULong  atag;
    411       ULong  has_children;
    412       name_form nf[0];
    413       /* Variable-length array of name/form pairs, terminated
    414          by a 0/0 pair.
    415          The skip_szB/next_nf allows to skip efficiently a DIE
    416          described by this g_abbv; */
    417     } g_abbv;
    418 
    419 /* Holds information that is constant through the parsing of a
    420    Compilation Unit.  This is basically plumbed through to
    421    everywhere. */
    422 typedef
    423    struct {
    424       /* Call here if anything goes wrong */
    425       void (*barf)( const HChar* ) __attribute__((noreturn));
    426       /* Is this 64-bit DWARF ? */
    427       Bool   is_dw64;
    428       /* Which DWARF version ?  (2, 3 or 4) */
    429       UShort version;
    430       /* Length of this Compilation Unit, as stated in the
    431          .unit_length :: InitialLength field of the CU Header.
    432          However, this size (as specified by the D3 spec) does not
    433          include the size of the .unit_length field itself, which is
    434          either 4 or 12 bytes (32-bit or 64-bit Dwarf3).  That value
    435          can be obtained through the expression ".is_dw64 ? 12 : 4". */
    436       ULong  unit_length;
    437       /* Offset of start of this unit in .debug_info */
    438       UWord  cu_start_offset;
    439       /* SVMA for this CU.  In the D3 spec, is known as the "base
    440          address of the compilation unit (last para sec 3.1.1).
    441          Needed for (amongst things) interpretation of location-list
    442          values. */
    443       Addr   cu_svma;
    444       Bool   cu_svma_known;
    445 
    446       /* The debug_abbreviations table to be used for this Unit */
    447       //UChar* debug_abbv;
    448       /* Upper bound on size thereof (an overestimate, in general) */
    449       //UWord  debug_abbv_maxszB;
    450       /* A bounded area of the image, to be used as the
    451          debug_abbreviations table tobe used for this Unit. */
    452       DiSlice debug_abbv;
    453 
    454       /* Image information for various sections. */
    455       DiSlice escn_debug_str;
    456       DiSlice escn_debug_ranges;
    457       DiSlice escn_debug_loc;
    458       DiSlice escn_debug_line;
    459       DiSlice escn_debug_info;
    460       DiSlice escn_debug_types;
    461       DiSlice escn_debug_info_alt;
    462       DiSlice escn_debug_str_alt;
    463       /* How much to add to .debug_types resp. alternate .debug_info offsets
    464          in cook_die*.  */
    465       UWord  types_cuOff_bias;
    466       UWord  alt_cuOff_bias;
    467       /* --- Needed so we can add stuff to the string table. --- */
    468       struct _DebugInfo* di;
    469       /* --- a hash table of g_abbv (i.e. parsed abbreviations) --- */
    470       VgHashTable *ht_abbvs;
    471 
    472       /* True if this came from .debug_types; otherwise it came from
    473          .debug_info.  */
    474       Bool is_type_unit;
    475       /* For a unit coming from .debug_types, these hold the TU's type
    476          signature and the uncooked DIE offset of the TU's signatured
    477          type.  For a unit coming from .debug_info, these are unused.  */
    478       ULong type_signature;
    479       ULong type_offset;
    480 
    481       /* Signatured type hash; computed once and then shared by all
    482          CUs.  */
    483       VgHashTable *signature_types;
    484 
    485       /* True if this came from alternate .debug_info; otherwise
    486          it came from normal .debug_info or .debug_types.  */
    487       Bool is_alt_info;
    488    }
    489    CUConst;
    490 
    491 
    492 /* Return the cooked value of DIE depending on whether CC represents a
    493    .debug_types unit.  To cook a DIE, we pretend that the .debug_info,
    494    .debug_types and optional alternate .debug_info sections form
    495    a contiguous whole, so that DIEs coming from .debug_types are numbered
    496    starting at the end of .debug_info and DIEs coming from alternate
    497    .debug_info are numbered starting at the end of .debug_types.  */
    498 static UWord cook_die( const CUConst* cc, UWord die )
    499 {
    500    if (cc->is_type_unit)
    501       die += cc->types_cuOff_bias;
    502    else if (cc->is_alt_info)
    503       die += cc->alt_cuOff_bias;
    504    return die;
    505 }
    506 
    507 /* Like cook_die, but understand that DIEs coming from a
    508    DW_FORM_ref_sig8 reference are already cooked.  Also, handle
    509    DW_FORM_GNU_ref_alt from within primary .debug_info or .debug_types
    510    as reference to alternate .debug_info.  */
    511 static UWord cook_die_using_form( const CUConst *cc, UWord die, DW_FORM form)
    512 {
    513    if (form == DW_FORM_ref_sig8)
    514       return die;
    515    if (form == DW_FORM_GNU_ref_alt)
    516       return die + cc->alt_cuOff_bias;
    517    return cook_die( cc, die );
    518 }
    519 
    520 /* Return the uncooked offset of DIE and set *TYPE_FLAG to true if the DIE
    521    came from the .debug_types section and *ALT_FLAG to true if the DIE
    522    came from alternate .debug_info section.  */
    523 static UWord uncook_die( const CUConst *cc, UWord die, /*OUT*/Bool *type_flag,
    524                          Bool *alt_flag )
    525 {
    526    *alt_flag = False;
    527    *type_flag = False;
    528    /* The use of escn_debug_{info,types}.szB seems safe to me even if
    529       escn_debug_{info,types} are DiSlice_INVALID (meaning the
    530       sections were not found), because DiSlice_INVALID.szB is always
    531       zero.  That said, it seems unlikely we'd ever get here if
    532       .debug_info or .debug_types were missing. */
    533    if (die >= cc->escn_debug_info.szB) {
    534       if (die >= cc->escn_debug_info.szB + cc->escn_debug_types.szB) {
    535          *alt_flag = True;
    536          die -= cc->escn_debug_info.szB + cc->escn_debug_types.szB;
    537       } else {
    538          *type_flag = True;
    539          die -= cc->escn_debug_info.szB;
    540       }
    541    }
    542    return die;
    543 }
    544 
    545 /*------------------------------------------------------------*/
    546 /*---                                                      ---*/
    547 /*--- Helper functions for Guarded Expressions             ---*/
    548 /*---                                                      ---*/
    549 /*------------------------------------------------------------*/
    550 
    551 /* Parse the location list starting at img-offset 'debug_loc_offset'
    552    in .debug_loc.  Results are biased with 'svma_of_referencing_CU'
    553    and so I believe are correct SVMAs for the object as a whole.  This
    554    function allocates the UChar*, and the caller must deallocate it.
    555    The resulting block is in so-called Guarded-Expression format.
    556 
    557    Guarded-Expression format is similar but not identical to the DWARF3
    558    location-list format.  The format of each returned block is:
    559 
    560       UChar biasMe;
    561       UChar isEnd;
    562       followed by zero or more of
    563 
    564       (Addr aMin;  Addr aMax;  UShort nbytes;  ..bytes..;  UChar isEnd)
    565 
    566    '..bytes..' is an standard DWARF3 location expression which is
    567    valid when aMin <= pc <= aMax (possibly after suitable biasing).
    568 
    569    The number of bytes in '..bytes..' is nbytes.
    570 
    571    The end of the sequence is marked by an isEnd == 1 value.  All
    572    previous isEnd values must be zero.
    573 
    574    biasMe is 1 if the aMin/aMax fields need this DebugInfo's
    575    text_bias added before use, and 0 if the GX is this is not
    576    necessary (is ready to go).
    577 
    578    Hence the block can be quickly parsed and is self-describing.  Note
    579    that aMax is 1 less than the corresponding value in a DWARF3
    580    location list.  Zero length ranges, with aMax == aMin-1, are not
    581    allowed.
    582 */
    583 /* 2008-sept-12: moved ML_(pp_GX) from here to d3basics.c, where
    584    it more logically belongs. */
    585 
    586 
    587 /* Apply a text bias to a GX. */
    588 static void bias_GX ( /*MOD*/GExpr* gx, const DebugInfo* di )
    589 {
    590    UShort nbytes;
    591    UChar* p = &gx->payload[0];
    592    UChar* pA;
    593    UChar  uc;
    594    uc = *p++; /*biasMe*/
    595    if (uc == 0)
    596       return;
    597    vg_assert(uc == 1);
    598    p[-1] = 0; /* mark it as done */
    599    while (True) {
    600       uc = *p++;
    601       if (uc == 1)
    602          break; /*isEnd*/
    603       vg_assert(uc == 0);
    604       /* t-bias aMin */
    605       pA = (UChar*)p;
    606       ML_(write_Addr)(pA, ML_(read_Addr)(pA) + di->text_debug_bias);
    607       p += sizeof(Addr);
    608       /* t-bias aMax */
    609       pA = (UChar*)p;
    610       ML_(write_Addr)(pA, ML_(read_Addr)(pA) + di->text_debug_bias);
    611       p += sizeof(Addr);
    612       /* nbytes, and actual expression */
    613       nbytes = ML_(read_UShort)(p); p += sizeof(UShort);
    614       p += nbytes;
    615    }
    616 }
    617 
    618 __attribute__((noinline))
    619 static GExpr* make_singleton_GX ( DiCursor block, ULong nbytes )
    620 {
    621    SizeT  bytesReqd;
    622    GExpr* gx;
    623    UChar *p, *pstart;
    624 
    625    vg_assert(sizeof(UWord) == sizeof(Addr));
    626    vg_assert(nbytes <= 0xFFFF); /* else we overflow the nbytes field */
    627    bytesReqd
    628       =   sizeof(UChar)  /*biasMe*/    + sizeof(UChar) /*!isEnd*/
    629         + sizeof(UWord)  /*aMin*/      + sizeof(UWord) /*aMax*/
    630         + sizeof(UShort) /*nbytes*/    + (SizeT)nbytes
    631         + sizeof(UChar); /*isEnd*/
    632 
    633    gx = ML_(dinfo_zalloc)( "di.readdwarf3.msGX.1",
    634                            sizeof(GExpr) + bytesReqd );
    635 
    636    p = pstart = &gx->payload[0];
    637 
    638    p = ML_(write_UChar)(p, 0);        /*biasMe*/
    639    p = ML_(write_UChar)(p, 0);        /*!isEnd*/
    640    p = ML_(write_Addr)(p, 0);         /*aMin*/
    641    p = ML_(write_Addr)(p, ~0);        /*aMax*/
    642    p = ML_(write_UShort)(p, nbytes);  /*nbytes*/
    643    ML_(cur_read_get)(p, block, nbytes); p += nbytes;
    644    p = ML_(write_UChar)(p, 1);        /*isEnd*/
    645 
    646    vg_assert( (SizeT)(p - pstart) == bytesReqd);
    647    vg_assert( &gx->payload[bytesReqd]
    648               == ((UChar*)gx) + sizeof(GExpr) + bytesReqd );
    649 
    650    return gx;
    651 }
    652 
    653 __attribute__((noinline))
    654 static GExpr* make_general_GX ( const CUConst* cc,
    655                                 Bool     td3,
    656                                 ULong    debug_loc_offset,
    657                                 Addr     svma_of_referencing_CU )
    658 {
    659    Addr      base;
    660    Cursor    loc;
    661    XArray*   xa; /* XArray of UChar */
    662    GExpr*    gx;
    663    Word      nbytes;
    664 
    665    vg_assert(sizeof(UWord) == sizeof(Addr));
    666    if (!ML_(sli_is_valid)(cc->escn_debug_loc) || cc->escn_debug_loc.szB == 0)
    667       cc->barf("make_general_GX: .debug_loc is empty/missing");
    668 
    669    init_Cursor( &loc, cc->escn_debug_loc, 0, cc->barf,
    670                 "Overrun whilst reading .debug_loc section(2)" );
    671    set_position_of_Cursor( &loc, debug_loc_offset );
    672 
    673    TRACE_D3("make_general_GX (.debug_loc_offset = %llu, ioff = %llu) {\n",
    674             debug_loc_offset, get_DiCursor_from_Cursor(&loc).ioff );
    675 
    676    /* Who frees this xa?  It is freed before this fn exits. */
    677    xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.mgGX.1",
    678                     ML_(dinfo_free),
    679                     sizeof(UChar) );
    680 
    681    { UChar c = 1; /*biasMe*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); }
    682 
    683    base = 0;
    684    while (True) {
    685       Bool  acquire;
    686       UWord len;
    687       /* Read a (host-)word pair.  This is something of a hack since
    688          the word size to read is really dictated by the ELF file;
    689          however, we assume we're reading a file with the same
    690          word-sizeness as the host.  Reasonably enough. */
    691       UWord w1 = get_UWord( &loc );
    692       UWord w2 = get_UWord( &loc );
    693 
    694       TRACE_D3("   %08lx %08lx\n", w1, w2);
    695       if (w1 == 0 && w2 == 0)
    696          break; /* end of list */
    697 
    698       if (w1 == -1UL) {
    699          /* new value for 'base' */
    700          base = w2;
    701          continue;
    702       }
    703 
    704       /* else a location expression follows */
    705       /* else enumerate [w1+base, w2+base) */
    706       /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
    707          (sec 2.17.2) */
    708       if (w1 > w2) {
    709          TRACE_D3("negative range is for .debug_loc expr at "
    710                   "file offset %llu\n",
    711                   debug_loc_offset);
    712          cc->barf( "negative range in .debug_loc section" );
    713       }
    714 
    715       /* ignore zero length ranges */
    716       acquire = w1 < w2;
    717       len     = (UWord)get_UShort( &loc );
    718 
    719       if (acquire) {
    720          UWord  w;
    721          UShort s;
    722          UChar  c;
    723          c = 0; /* !isEnd*/
    724          VG_(addBytesToXA)( xa, &c, sizeof(c) );
    725          w = w1    + base + svma_of_referencing_CU;
    726          VG_(addBytesToXA)( xa, &w, sizeof(w) );
    727          w = w2 -1 + base + svma_of_referencing_CU;
    728          VG_(addBytesToXA)( xa, &w, sizeof(w) );
    729          s = (UShort)len;
    730          VG_(addBytesToXA)( xa, &s, sizeof(s) );
    731       }
    732 
    733       while (len > 0) {
    734          UChar byte = get_UChar( &loc );
    735          TRACE_D3("%02x", (UInt)byte);
    736          if (acquire)
    737             VG_(addBytesToXA)( xa, &byte, 1 );
    738          len--;
    739       }
    740       TRACE_D3("\n");
    741    }
    742 
    743    { UChar c = 1; /*isEnd*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); }
    744 
    745    nbytes = VG_(sizeXA)( xa );
    746    vg_assert(nbytes >= 1);
    747 
    748    gx = ML_(dinfo_zalloc)( "di.readdwarf3.mgGX.2", sizeof(GExpr) + nbytes );
    749    VG_(memcpy)( &gx->payload[0], (UChar*)VG_(indexXA)(xa,0), nbytes );
    750    vg_assert( &gx->payload[nbytes]
    751               == ((UChar*)gx) + sizeof(GExpr) + nbytes );
    752 
    753    VG_(deleteXA)( xa );
    754 
    755    TRACE_D3("}\n");
    756 
    757    return gx;
    758 }
    759 
    760 
    761 /*------------------------------------------------------------*/
    762 /*---                                                      ---*/
    763 /*--- Helper functions for range lists and CU headers      ---*/
    764 /*---                                                      ---*/
    765 /*------------------------------------------------------------*/
    766 
    767 /* Denotes an address range.  Both aMin and aMax are included in the
    768    range; hence a complete range is (0, ~0) and an empty range is any
    769    (X, X-1) for X > 0.*/
    770 typedef
    771    struct { Addr aMin; Addr aMax; }
    772    AddrRange;
    773 
    774 
    775 /* Generate an arbitrary structural total ordering on
    776    XArray* of AddrRange. */
    777 static Word cmp__XArrays_of_AddrRange ( const XArray* rngs1,
    778                                         const XArray* rngs2 )
    779 {
    780    Word n1, n2, i;
    781    vg_assert(rngs1 && rngs2);
    782    n1 = VG_(sizeXA)( rngs1 );
    783    n2 = VG_(sizeXA)( rngs2 );
    784    if (n1 < n2) return -1;
    785    if (n1 > n2) return 1;
    786    for (i = 0; i < n1; i++) {
    787       AddrRange* rng1 = (AddrRange*)VG_(indexXA)( rngs1, i );
    788       AddrRange* rng2 = (AddrRange*)VG_(indexXA)( rngs2, i );
    789       if (rng1->aMin < rng2->aMin) return -1;
    790       if (rng1->aMin > rng2->aMin) return 1;
    791       if (rng1->aMax < rng2->aMax) return -1;
    792       if (rng1->aMax > rng2->aMax) return 1;
    793    }
    794    return 0;
    795 }
    796 
    797 
    798 __attribute__((noinline))
    799 static XArray* /* of AddrRange */ empty_range_list ( void )
    800 {
    801    XArray* xa; /* XArray of AddrRange */
    802    /* Who frees this xa?  varstack_preen() does. */
    803    xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.erl.1",
    804                     ML_(dinfo_free),
    805                     sizeof(AddrRange) );
    806    return xa;
    807 }
    808 
    809 
    810 __attribute__((noinline))
    811 static XArray* unitary_range_list ( Addr aMin, Addr aMax )
    812 {
    813    XArray*   xa;
    814    AddrRange pair;
    815    vg_assert(aMin <= aMax);
    816    /* Who frees this xa?  varstack_preen() does. */
    817    xa = VG_(newXA)( ML_(dinfo_zalloc),  "di.readdwarf3.url.1",
    818                     ML_(dinfo_free),
    819                     sizeof(AddrRange) );
    820    pair.aMin = aMin;
    821    pair.aMax = aMax;
    822    VG_(addToXA)( xa, &pair );
    823    return xa;
    824 }
    825 
    826 
    827 /* Enumerate the address ranges starting at img-offset
    828    'debug_ranges_offset' in .debug_ranges.  Results are biased with
    829    'svma_of_referencing_CU' and so I believe are correct SVMAs for the
    830    object as a whole.  This function allocates the XArray, and the
    831    caller must deallocate it. */
    832 __attribute__((noinline))
    833 static XArray* /* of AddrRange */
    834 get_range_list ( const CUConst* cc,
    835                  Bool     td3,
    836                  UWord    debug_ranges_offset,
    837                  Addr     svma_of_referencing_CU )
    838 {
    839    Addr      base;
    840    Cursor    ranges;
    841    XArray*   xa; /* XArray of AddrRange */
    842    AddrRange pair;
    843 
    844    if (!ML_(sli_is_valid)(cc->escn_debug_ranges)
    845        || cc->escn_debug_ranges.szB == 0)
    846       cc->barf("get_range_list: .debug_ranges is empty/missing");
    847 
    848    init_Cursor( &ranges, cc->escn_debug_ranges, 0, cc->barf,
    849                 "Overrun whilst reading .debug_ranges section(2)" );
    850    set_position_of_Cursor( &ranges, debug_ranges_offset );
    851 
    852    /* Who frees this xa?  varstack_preen() does. */
    853    xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.grl.1", ML_(dinfo_free),
    854                     sizeof(AddrRange) );
    855    base = 0;
    856    while (True) {
    857       /* Read a (host-)word pair.  This is something of a hack since
    858          the word size to read is really dictated by the ELF file;
    859          however, we assume we're reading a file with the same
    860          word-sizeness as the host.  Reasonably enough. */
    861       UWord w1 = get_UWord( &ranges );
    862       UWord w2 = get_UWord( &ranges );
    863 
    864       if (w1 == 0 && w2 == 0)
    865          break; /* end of list. */
    866 
    867       if (w1 == -1UL) {
    868          /* new value for 'base' */
    869          base = w2;
    870          continue;
    871       }
    872 
    873       /* else enumerate [w1+base, w2+base) */
    874       /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
    875          (sec 2.17.2) */
    876       if (w1 > w2)
    877          cc->barf( "negative range in .debug_ranges section" );
    878       if (w1 < w2) {
    879          pair.aMin = w1     + base + svma_of_referencing_CU;
    880          pair.aMax = w2 - 1 + base + svma_of_referencing_CU;
    881          vg_assert(pair.aMin <= pair.aMax);
    882          VG_(addToXA)( xa, &pair );
    883       }
    884    }
    885    return xa;
    886 }
    887 
    888 #define VARSZ_FORM 0xffffffff
    889 static UInt get_Form_szB (const CUConst* cc, DW_FORM form );
    890 
    891 /* Initialises the hash table of abbreviations.
    892    We do a single scan of the abbv slice to parse and
    893    build all abbreviations, for the following reasons:
    894      * all or most abbreviations will be needed in any case
    895        (at least for var-info reading).
    896      * re-reading each time an abbreviation causes a lot of calls
    897        to get_ULEB128.
    898      * a CU should not have many abbreviations. */
    899 static void init_ht_abbvs (CUConst* cc,
    900                            Bool td3)
    901 {
    902    Cursor c;
    903    g_abbv *ta; // temporary abbreviation, reallocated if needed.
    904    UInt ta_nf_maxE; // max nr of pairs in ta.nf[], doubled when reallocated.
    905    UInt ta_nf_n;    // nr of pairs in ta->nf that are initialised.
    906    g_abbv *ht_ta; // abbv to insert in hash table.
    907    Int i;
    908 
    909    #define SZ_G_ABBV(_nf_szE) (sizeof(g_abbv) + _nf_szE * sizeof(name_form))
    910 
    911    ta_nf_maxE = 10; // starting with enough for 9 pairs+terminating pair.
    912    ta = ML_(dinfo_zalloc) ("di.readdwarf3.ht_ta_nf", SZ_G_ABBV(ta_nf_maxE));
    913    cc->ht_abbvs = VG_(HT_construct) ("di.readdwarf3.ht_abbvs");
    914 
    915    init_Cursor( &c, cc->debug_abbv, 0, cc->barf,
    916                "Overrun whilst parsing .debug_abbrev section(2)" );
    917    while (True) {
    918       ta->abbv_code = get_ULEB128( &c );
    919       if (ta->abbv_code == 0) break; /* end of the table */
    920 
    921       ta->atag = get_ULEB128( &c );
    922       ta->has_children = get_UChar( &c );
    923       ta_nf_n = 0;
    924       while (True) {
    925          if (ta_nf_n >= ta_nf_maxE) {
    926             g_abbv *old_ta = ta;
    927             ta = ML_(dinfo_zalloc) ("di.readdwarf3.ht_ta_nf",
    928                                     SZ_G_ABBV(2 * ta_nf_maxE));
    929             ta_nf_maxE = 2 * ta_nf_maxE;
    930             VG_(memcpy) (ta, old_ta, SZ_G_ABBV(ta_nf_n));
    931             ML_(dinfo_free) (old_ta);
    932          }
    933          ta->nf[ta_nf_n].at_name = get_ULEB128( &c );
    934          ta->nf[ta_nf_n].at_form = get_ULEB128( &c );
    935          if (ta->nf[ta_nf_n].at_name == 0 && ta->nf[ta_nf_n].at_form == 0) {
    936             ta_nf_n++;
    937             break;
    938          }
    939         ta_nf_n++;
    940       }
    941 
    942       // Initialises the skip_szB/next_nf elements : an element at position
    943       // i must contain the sum of its own size + the sizes of all elements
    944       // following i till either the next variable size element, the next
    945       // sibling element or the end of the DIE.
    946       ta->nf[ta_nf_n - 1].skip_szB = 0;
    947       ta->nf[ta_nf_n - 1].next_nf = 0;
    948       for (i = ta_nf_n - 2; i >= 0; i--) {
    949          const UInt form_szB = get_Form_szB (cc, (DW_FORM)ta->nf[i].at_form);
    950 
    951          if (ta->nf[i+1].at_name == DW_AT_sibling
    952              || ta->nf[i+1].skip_szB == VARSZ_FORM) {
    953             ta->nf[i].skip_szB = form_szB;
    954             ta->nf[i].next_nf  = i+1;
    955          } else if (form_szB == VARSZ_FORM) {
    956             ta->nf[i].skip_szB = form_szB;
    957             ta->nf[i].next_nf  = i+1;
    958          } else {
    959             ta->nf[i].skip_szB = ta->nf[i+1].skip_szB + form_szB;
    960             ta->nf[i].next_nf  = ta->nf[i+1].next_nf;
    961          }
    962       }
    963 
    964       ht_ta = ML_(dinfo_zalloc) ("di.readdwarf3.ht_ta", SZ_G_ABBV(ta_nf_n));
    965       VG_(memcpy) (ht_ta, ta, SZ_G_ABBV(ta_nf_n));
    966       VG_(HT_add_node) ( cc->ht_abbvs, ht_ta );
    967       if (TD3) {
    968          TRACE_D3("  Adding abbv_code %lu TAG  %s [%s] nf %u ",
    969                   ht_ta->abbv_code, ML_(pp_DW_TAG)(ht_ta->atag),
    970                   ML_(pp_DW_children)(ht_ta->has_children),
    971                   ta_nf_n);
    972          TRACE_D3("  ");
    973          for (i = 0; i < ta_nf_n; i++)
    974             TRACE_D3("[%u,%u] ", ta->nf[i].skip_szB, ta->nf[i].next_nf);
    975          TRACE_D3("\n");
    976       }
    977    }
    978 
    979    ML_(dinfo_free) (ta);
    980    #undef SZ_G_ABBV
    981 }
    982 
    983 static g_abbv* get_abbv (const CUConst* cc, ULong abbv_code)
    984 {
    985    g_abbv *abbv;
    986 
    987    abbv = VG_(HT_lookup) (cc->ht_abbvs, abbv_code);
    988    if (!abbv)
    989       cc->barf ("abbv_code not found in ht_abbvs table");
    990    return abbv;
    991 }
    992 
    993 /* Free the memory allocated in CUConst. */
    994 static void clear_CUConst (CUConst* cc)
    995 {
    996    VG_(HT_destruct) ( cc->ht_abbvs, ML_(dinfo_free));
    997    cc->ht_abbvs = NULL;
    998 }
    999 
   1000 /* Parse the Compilation Unit header indicated at 'c' and
   1001    initialise 'cc' accordingly. */
   1002 static __attribute__((noinline))
   1003 void parse_CU_Header ( /*OUT*/CUConst* cc,
   1004                        Bool td3,
   1005                        Cursor* c,
   1006                        DiSlice escn_debug_abbv,
   1007 		       Bool type_unit,
   1008                        Bool alt_info )
   1009 {
   1010    UChar  address_size;
   1011    ULong  debug_abbrev_offset;
   1012 
   1013    VG_(memset)(cc, 0, sizeof(*cc));
   1014    vg_assert(c && c->barf);
   1015    cc->barf = c->barf;
   1016 
   1017    /* initial_length field */
   1018    cc->unit_length
   1019       = get_Initial_Length( &cc->is_dw64, c,
   1020            "parse_CU_Header: invalid initial-length field" );
   1021 
   1022    TRACE_D3("   Length:        %llu\n", cc->unit_length );
   1023 
   1024    /* version */
   1025    cc->version = get_UShort( c );
   1026    if (cc->version != 2 && cc->version != 3 && cc->version != 4)
   1027       cc->barf( "parse_CU_Header: is neither DWARF2 nor DWARF3 nor DWARF4" );
   1028    TRACE_D3("   Version:       %d\n", (Int)cc->version );
   1029 
   1030    /* debug_abbrev_offset */
   1031    debug_abbrev_offset = get_Dwarfish_UWord( c, cc->is_dw64 );
   1032    if (debug_abbrev_offset >= escn_debug_abbv.szB)
   1033       cc->barf( "parse_CU_Header: invalid debug_abbrev_offset" );
   1034    TRACE_D3("   Abbrev Offset: %llu\n", debug_abbrev_offset );
   1035 
   1036    /* address size.  If this isn't equal to the host word size, just
   1037       give up.  This makes it safe to assume elsewhere that
   1038       DW_FORM_addr and DW_FORM_ref_addr can be treated as a host
   1039       word. */
   1040    address_size = get_UChar( c );
   1041    if (address_size != sizeof(void*))
   1042       cc->barf( "parse_CU_Header: invalid address_size" );
   1043    TRACE_D3("   Pointer Size:  %d\n", (Int)address_size );
   1044 
   1045    cc->is_type_unit = type_unit;
   1046    cc->is_alt_info = alt_info;
   1047 
   1048    if (type_unit) {
   1049       cc->type_signature = get_ULong( c );
   1050       cc->type_offset = get_Dwarfish_UWord( c, cc->is_dw64 );
   1051    }
   1052 
   1053    /* Set up cc->debug_abbv to point to the relevant table for this
   1054       CU.  Set its .szB so that at least we can't read off the end of
   1055       the debug_abbrev section -- potentially (and quite likely) too
   1056       big, if this isn't the last table in the section, but at least
   1057       it's safe.
   1058 
   1059       This amounts to taking debug_abbv_escn and moving the start
   1060       position along by debug_abbrev_offset bytes, hence forming a
   1061       smaller DiSlice which has the same end point.  Since we checked
   1062       just above that debug_abbrev_offset is less than the size of
   1063       debug_abbv_escn, this should leave us with a nonempty slice. */
   1064    vg_assert(debug_abbrev_offset < escn_debug_abbv.szB);
   1065    cc->debug_abbv      = escn_debug_abbv;
   1066    cc->debug_abbv.ioff += debug_abbrev_offset;
   1067    cc->debug_abbv.szB  -= debug_abbrev_offset;
   1068 
   1069    init_ht_abbvs(cc, td3);
   1070 }
   1071 
   1072 /* This represents a single signatured type.  It maps a type signature
   1073    (a ULong) to a cooked DIE offset.  Objects of this type are stored
   1074    in the type signature hash table.  */
   1075 typedef
   1076    struct D3SignatureType {
   1077       struct D3SignatureType *next;
   1078       UWord data;
   1079       ULong type_signature;
   1080       UWord die;
   1081    }
   1082    D3SignatureType;
   1083 
   1084 /* Record a signatured type in the hash table.  */
   1085 static void record_signatured_type ( VgHashTable *tab,
   1086                                      ULong type_signature,
   1087                                      UWord die )
   1088 {
   1089    D3SignatureType *dstype = ML_(dinfo_zalloc) ( "di.readdwarf3.sigtype",
   1090                                                  sizeof(D3SignatureType) );
   1091    dstype->data = (UWord) type_signature;
   1092    dstype->type_signature = type_signature;
   1093    dstype->die = die;
   1094    VG_(HT_add_node) ( tab, dstype );
   1095 }
   1096 
   1097 /* Given a type signature hash table and a type signature, return the
   1098    cooked DIE offset of the type.  If the type cannot be found, call
   1099    BARF.  */
   1100 static UWord lookup_signatured_type ( const VgHashTable *tab,
   1101                                       ULong type_signature,
   1102                                       void (*barf)( const HChar* ) __attribute__((noreturn)) )
   1103 {
   1104    D3SignatureType *dstype = VG_(HT_lookup) ( tab, (UWord) type_signature );
   1105    /* This may be unwarranted chumminess with the hash table
   1106       implementation.  */
   1107    while ( dstype != NULL && dstype->type_signature != type_signature)
   1108       dstype = dstype->next;
   1109    if (dstype == NULL) {
   1110       barf("lookup_signatured_type: could not find signatured type");
   1111       /*NOTREACHED*/
   1112       vg_assert(0);
   1113    }
   1114    return dstype->die;
   1115 }
   1116 
   1117 
   1118 /* Represents Form data.  If szB is 1/2/4/8 then the result is in the
   1119    lowest 1/2/4/8 bytes of u.val.  If szB is zero or negative then the
   1120    result is an image section beginning at u.cur and with size -szB.
   1121    No other szB values are allowed. */
   1122 typedef
   1123    struct {
   1124       Long szB; // 1, 2, 4, 8 or non-positive values only.
   1125       union { ULong val; DiCursor cur; } u;
   1126    }
   1127    FormContents;
   1128 
   1129 /* From 'c', get the Form data into 'cts'.  Either it gets a 1/2/4/8
   1130    byte scalar value, or (a reference to) zero or more bytes starting
   1131    at a DiCursor.*/
   1132 static
   1133 void get_Form_contents ( /*OUT*/FormContents* cts,
   1134                          const CUConst* cc, Cursor* c,
   1135                          Bool td3, DW_FORM form )
   1136 {
   1137    VG_(bzero_inline)(cts, sizeof(*cts));
   1138    // !!! keep switch in sync with get_Form_szB. The nr of characters read below
   1139    // must be computed similarly in get_Form_szB.
   1140    // The consistency is verified in trace_DIE.
   1141    switch (form) {
   1142       case DW_FORM_data1:
   1143          cts->u.val = (ULong)(UChar)get_UChar(c);
   1144          cts->szB   = 1;
   1145          TRACE_D3("%u", (UInt)cts->u.val);
   1146          break;
   1147       case DW_FORM_data2:
   1148          cts->u.val = (ULong)(UShort)get_UShort(c);
   1149          cts->szB   = 2;
   1150          TRACE_D3("%u", (UInt)cts->u.val);
   1151          break;
   1152       case DW_FORM_data4:
   1153          cts->u.val = (ULong)(UInt)get_UInt(c);
   1154          cts->szB   = 4;
   1155          TRACE_D3("%u", (UInt)cts->u.val);
   1156          break;
   1157       case DW_FORM_data8:
   1158          cts->u.val = get_ULong(c);
   1159          cts->szB   = 8;
   1160          TRACE_D3("%llu", cts->u.val);
   1161          break;
   1162       case DW_FORM_sec_offset:
   1163          cts->u.val = (ULong)get_Dwarfish_UWord( c, cc->is_dw64 );
   1164          cts->szB   = cc->is_dw64 ? 8 : 4;
   1165          TRACE_D3("%llu", cts->u.val);
   1166          break;
   1167       case DW_FORM_sdata:
   1168          cts->u.val = (ULong)(Long)get_SLEB128(c);
   1169          cts->szB   = 8;
   1170          TRACE_D3("%llu", cts->u.val);
   1171          break;
   1172       case DW_FORM_udata:
   1173          cts->u.val = (ULong)(Long)get_ULEB128(c);
   1174          cts->szB   = 8;
   1175          TRACE_D3("%llu", cts->u.val);
   1176          break;
   1177       case DW_FORM_addr:
   1178          /* note, this is a hack.  DW_FORM_addr is defined as getting
   1179             a word the size of the target machine as defined by the
   1180             address_size field in the CU Header.  However,
   1181             parse_CU_Header() rejects all inputs except those for
   1182             which address_size == sizeof(Word), hence we can just
   1183             treat it as a (host) Word.  */
   1184          cts->u.val = (ULong)(UWord)get_UWord(c);
   1185          cts->szB   = sizeof(UWord);
   1186          TRACE_D3("0x%lx", (UWord)cts->u.val);
   1187          break;
   1188 
   1189       case DW_FORM_ref_addr:
   1190          /* We make the same word-size assumption as DW_FORM_addr. */
   1191          /* What does this really mean?  From D3 Sec 7.5.4,
   1192             description of "reference", it would appear to reference
   1193             some other DIE, by specifying the offset from the
   1194             beginning of a .debug_info section.  The D3 spec mentions
   1195             that this might be in some other shared object and
   1196             executable.  But I don't see how the name of the other
   1197             object/exe is specified.
   1198 
   1199             At least for the DW_FORM_ref_addrs created by icc11, the
   1200             references seem to be within the same object/executable.
   1201             So for the moment we merely range-check, to see that they
   1202             actually do specify a plausible offset within this
   1203             object's .debug_info, and return the value unchanged.
   1204 
   1205             In DWARF 2, DW_FORM_ref_addr is address-sized, but in
   1206             DWARF 3 and later, it is offset-sized.
   1207          */
   1208          if (cc->version == 2) {
   1209             cts->u.val = (ULong)(UWord)get_UWord(c);
   1210             cts->szB   = sizeof(UWord);
   1211          } else {
   1212             cts->u.val = get_Dwarfish_UWord(c, cc->is_dw64);
   1213             cts->szB   = cc->is_dw64 ? sizeof(ULong) : sizeof(UInt);
   1214          }
   1215          TRACE_D3("0x%lx", (UWord)cts->u.val);
   1216          if (0) VG_(printf)("DW_FORM_ref_addr 0x%lx\n", (UWord)cts->u.val);
   1217          if (/* the following is surely impossible, but ... */
   1218              !ML_(sli_is_valid)(cc->escn_debug_info)
   1219              || cts->u.val >= (ULong)cc->escn_debug_info.szB) {
   1220             /* Hmm.  Offset is nonsensical for this object's .debug_info
   1221                section.  Be safe and reject it. */
   1222             cc->barf("get_Form_contents: DW_FORM_ref_addr points "
   1223                      "outside .debug_info");
   1224          }
   1225          break;
   1226 
   1227       case DW_FORM_strp: {
   1228          /* this is an offset into .debug_str */
   1229          UWord uw = (UWord)get_Dwarfish_UWord( c, cc->is_dw64 );
   1230          if (!ML_(sli_is_valid)(cc->escn_debug_str)
   1231              || uw >= cc->escn_debug_str.szB)
   1232             cc->barf("get_Form_contents: DW_FORM_strp "
   1233                      "points outside .debug_str");
   1234          /* FIXME: check the entire string lies inside debug_str,
   1235             not just the first byte of it. */
   1236          DiCursor str
   1237             = ML_(cur_plus)( ML_(cur_from_sli)(cc->escn_debug_str), uw );
   1238          if (TD3) {
   1239             HChar* tmp = ML_(cur_read_strdup)(str, "di.getFC.1");
   1240             TRACE_D3("(indirect string, offset: 0x%lx): %s", uw, tmp);
   1241             ML_(dinfo_free)(tmp);
   1242          }
   1243          cts->u.cur = str;
   1244          cts->szB   = - (Long)(1 + (ULong)ML_(cur_strlen)(str));
   1245          break;
   1246       }
   1247       case DW_FORM_string: {
   1248          DiCursor str = get_AsciiZ(c);
   1249          if (TD3) {
   1250             HChar* tmp = ML_(cur_read_strdup)(str, "di.getFC.2");
   1251             TRACE_D3("%s", tmp);
   1252             ML_(dinfo_free)(tmp);
   1253          }
   1254          cts->u.cur = str;
   1255          /* strlen is safe because get_AsciiZ already 'vetted' the
   1256             entire string */
   1257          cts->szB   = - (Long)(1 + (ULong)ML_(cur_strlen)(str));
   1258          break;
   1259       }
   1260       case DW_FORM_ref1: {
   1261          UChar u8   = get_UChar(c);
   1262          UWord res  = cc->cu_start_offset + (UWord)u8;
   1263          cts->u.val = (ULong)res;
   1264          cts->szB   = sizeof(UWord);
   1265          TRACE_D3("<%lx>", res);
   1266          break;
   1267       }
   1268       case DW_FORM_ref2: {
   1269          UShort u16 = get_UShort(c);
   1270          UWord  res = cc->cu_start_offset + (UWord)u16;
   1271          cts->u.val = (ULong)res;
   1272          cts->szB   = sizeof(UWord);
   1273          TRACE_D3("<%lx>", res);
   1274          break;
   1275       }
   1276       case DW_FORM_ref4: {
   1277          UInt  u32  = get_UInt(c);
   1278          UWord res  = cc->cu_start_offset + (UWord)u32;
   1279          cts->u.val = (ULong)res;
   1280          cts->szB   = sizeof(UWord);
   1281          TRACE_D3("<%lx>", res);
   1282          break;
   1283       }
   1284       case DW_FORM_ref8: {
   1285          ULong u64  = get_ULong(c);
   1286          UWord res  = cc->cu_start_offset + (UWord)u64;
   1287          cts->u.val = (ULong)res;
   1288          cts->szB   = sizeof(UWord);
   1289          TRACE_D3("<%lx>", res);
   1290          break;
   1291       }
   1292       case DW_FORM_ref_udata: {
   1293          ULong u64  = get_ULEB128(c);
   1294          UWord res  = cc->cu_start_offset + (UWord)u64;
   1295          cts->u.val = (ULong)res;
   1296          cts->szB   = sizeof(UWord);
   1297          TRACE_D3("<%lx>", res);
   1298          break;
   1299       }
   1300       case DW_FORM_flag: {
   1301          UChar u8 = get_UChar(c);
   1302          TRACE_D3("%u", (UInt)u8);
   1303          cts->u.val = (ULong)u8;
   1304          cts->szB   = 1;
   1305          break;
   1306       }
   1307       case DW_FORM_flag_present:
   1308          TRACE_D3("1");
   1309          cts->u.val = 1;
   1310          cts->szB   = 1;
   1311          break;
   1312       case DW_FORM_block1: {
   1313          ULong    u64b;
   1314          ULong    u64   = (ULong)get_UChar(c);
   1315          DiCursor block = get_DiCursor_from_Cursor(c);
   1316          TRACE_D3("%llu byte block: ", u64);
   1317          for (u64b = u64; u64b > 0; u64b--) {
   1318             UChar u8 = get_UChar(c);
   1319             TRACE_D3("%x ", (UInt)u8);
   1320          }
   1321          cts->u.cur = block;
   1322          cts->szB   = - (Long)u64;
   1323          break;
   1324       }
   1325       case DW_FORM_block2: {
   1326          ULong    u64b;
   1327          ULong    u64   = (ULong)get_UShort(c);
   1328          DiCursor block = get_DiCursor_from_Cursor(c);
   1329          TRACE_D3("%llu byte block: ", u64);
   1330          for (u64b = u64; u64b > 0; u64b--) {
   1331             UChar u8 = get_UChar(c);
   1332             TRACE_D3("%x ", (UInt)u8);
   1333          }
   1334          cts->u.cur = block;
   1335          cts->szB   = - (Long)u64;
   1336          break;
   1337       }
   1338       case DW_FORM_block4: {
   1339          ULong    u64b;
   1340          ULong    u64   = (ULong)get_UInt(c);
   1341          DiCursor block = get_DiCursor_from_Cursor(c);
   1342          TRACE_D3("%llu byte block: ", u64);
   1343          for (u64b = u64; u64b > 0; u64b--) {
   1344             UChar u8 = get_UChar(c);
   1345             TRACE_D3("%x ", (UInt)u8);
   1346          }
   1347          cts->u.cur = block;
   1348          cts->szB   = - (Long)u64;
   1349          break;
   1350       }
   1351       case DW_FORM_exprloc:
   1352       case DW_FORM_block: {
   1353          ULong    u64b;
   1354          ULong    u64   = (ULong)get_ULEB128(c);
   1355          DiCursor block = get_DiCursor_from_Cursor(c);
   1356          TRACE_D3("%llu byte block: ", u64);
   1357          for (u64b = u64; u64b > 0; u64b--) {
   1358             UChar u8 = get_UChar(c);
   1359             TRACE_D3("%x ", (UInt)u8);
   1360          }
   1361          cts->u.cur = block;
   1362          cts->szB   = - (Long)u64;
   1363          break;
   1364       }
   1365       case DW_FORM_ref_sig8: {
   1366          ULong  u64b;
   1367          ULong  signature = get_ULong (c);
   1368          ULong  work = signature;
   1369          TRACE_D3("8 byte signature: ");
   1370          for (u64b = 8; u64b > 0; u64b--) {
   1371             UChar u8 = work & 0xff;
   1372             TRACE_D3("%x ", (UInt)u8);
   1373             work >>= 8;
   1374          }
   1375 
   1376          /* cc->signature_types is only built/initialised when
   1377             VG_(clo_read_var_info) is set. In this case,
   1378             the DW_FORM_ref_sig8 can be looked up.
   1379             But we can also arrive here when only reading inline info
   1380             and VG_(clo_trace_symtab) is set. In such a case,
   1381             we cannot lookup the DW_FORM_ref_sig8, we rather assign
   1382             a dummy value. This is a kludge, but otherwise,
   1383             the 'dwarf inline info reader' tracing would have to
   1384             do type processing/reading. It is better to avoid
   1385             adding significant 'real' processing only due to tracing. */
   1386          if (VG_(clo_read_var_info)) {
   1387             /* Due to the way that the hash table is constructed, the
   1388                resulting DIE offset here is already "cooked".  See
   1389                cook_die_using_form.  */
   1390             cts->u.val = lookup_signatured_type (cc->signature_types, signature,
   1391                                                  c->barf);
   1392          } else {
   1393             vg_assert (td3);
   1394             vg_assert (VG_(clo_read_inline_info));
   1395             TRACE_D3("<not dereferencing signature type>");
   1396             cts->u.val = 0; /* Assign a dummy/rubbish value */
   1397          }
   1398          cts->szB   = sizeof(UWord);
   1399          break;
   1400       }
   1401       case DW_FORM_indirect:
   1402          get_Form_contents (cts, cc, c, td3, (DW_FORM)get_ULEB128(c));
   1403          return;
   1404 
   1405       case DW_FORM_GNU_ref_alt:
   1406          cts->u.val = get_Dwarfish_UWord(c, cc->is_dw64);
   1407          cts->szB   = cc->is_dw64 ? sizeof(ULong) : sizeof(UInt);
   1408          TRACE_D3("0x%lx", (UWord)cts->u.val);
   1409          if (0) VG_(printf)("DW_FORM_GNU_ref_alt 0x%lx\n", (UWord)cts->u.val);
   1410          if (/* the following is surely impossible, but ... */
   1411              !ML_(sli_is_valid)(cc->escn_debug_info_alt))
   1412             cc->barf("get_Form_contents: DW_FORM_GNU_ref_addr used, "
   1413                      "but no alternate .debug_info");
   1414          else if (cts->u.val >= (ULong)cc->escn_debug_info_alt.szB) {
   1415             /* Hmm.  Offset is nonsensical for this object's .debug_info
   1416                section.  Be safe and reject it. */
   1417             cc->barf("get_Form_contents: DW_FORM_GNU_ref_addr points "
   1418                      "outside alternate .debug_info");
   1419          }
   1420          break;
   1421 
   1422       case DW_FORM_GNU_strp_alt: {
   1423          /* this is an offset into alternate .debug_str */
   1424          SizeT uw = (UWord)get_Dwarfish_UWord( c, cc->is_dw64 );
   1425          if (!ML_(sli_is_valid)(cc->escn_debug_str_alt))
   1426             cc->barf("get_Form_contents: DW_FORM_GNU_strp_alt used, "
   1427                      "but no alternate .debug_str");
   1428          else if (uw >= cc->escn_debug_str_alt.szB)
   1429             cc->barf("get_Form_contents: DW_FORM_GNU_strp_alt "
   1430                      "points outside alternate .debug_str");
   1431          /* FIXME: check the entire string lies inside debug_str,
   1432             not just the first byte of it. */
   1433          DiCursor str
   1434             = ML_(cur_plus)( ML_(cur_from_sli)(cc->escn_debug_str_alt), uw);
   1435          if (TD3) {
   1436             HChar* tmp = ML_(cur_read_strdup)(str, "di.getFC.3");
   1437             TRACE_D3("(indirect alt string, offset: 0x%lx): %s", uw, tmp);
   1438             ML_(dinfo_free)(tmp);
   1439          }
   1440          cts->u.cur = str;
   1441          cts->szB   = - (Long)(1 + (ULong)ML_(cur_strlen)(str));
   1442          break;
   1443       }
   1444 
   1445       default:
   1446          VG_(printf)(
   1447             "get_Form_contents: unhandled %u (%s) at <%llx>\n",
   1448             form, ML_(pp_DW_FORM)(form), get_position_of_Cursor(c));
   1449          c->barf("get_Form_contents: unhandled DW_FORM");
   1450    }
   1451 }
   1452 
   1453 static inline UInt sizeof_Dwarfish_UWord (Bool is_dw64)
   1454 {
   1455    if (is_dw64)
   1456       return sizeof(ULong);
   1457    else
   1458       return sizeof(UInt);
   1459 }
   1460 
   1461 #define VARSZ_FORM 0xffffffff
   1462 /* If the form is a fixed length form, return the nr of bytes for this form.
   1463    If the form is a variable length form, return VARSZ_FORM. */
   1464 static
   1465 UInt get_Form_szB (const CUConst* cc, DW_FORM form )
   1466 {
   1467    // !!! keep switch in sync with get_Form_contents : the nr of bytes
   1468    // read from a cursor by get_Form_contents must be returned by
   1469    // the below switch.
   1470    // The consistency is verified in trace_DIE.
   1471    switch (form) {
   1472       case DW_FORM_data1: return 1;
   1473       case DW_FORM_data2: return 2;
   1474       case DW_FORM_data4: return 4;
   1475       case DW_FORM_data8: return 8;
   1476       case DW_FORM_sec_offset:
   1477          if (cc->is_dw64)
   1478             return 8;
   1479          else
   1480             return 4;
   1481       case DW_FORM_sdata:
   1482          return VARSZ_FORM;
   1483       case DW_FORM_udata:
   1484          return VARSZ_FORM;
   1485       case DW_FORM_addr: // See hack in get_Form_contents
   1486          return sizeof(UWord);
   1487       case DW_FORM_ref_addr: // See hack in get_Form_contents
   1488          if (cc->version == 2)
   1489             return sizeof(UWord);
   1490          else
   1491             return sizeof_Dwarfish_UWord (cc->is_dw64);
   1492       case DW_FORM_strp:
   1493          return sizeof_Dwarfish_UWord (cc->is_dw64);
   1494       case DW_FORM_string:
   1495          return VARSZ_FORM;
   1496       case DW_FORM_ref1:
   1497          return 1;
   1498       case DW_FORM_ref2:
   1499          return 2;
   1500       case DW_FORM_ref4:
   1501          return 4;
   1502       case DW_FORM_ref8:
   1503          return 8;
   1504       case DW_FORM_ref_udata:
   1505          return VARSZ_FORM;
   1506       case DW_FORM_flag:
   1507          return 1;
   1508       case DW_FORM_flag_present:
   1509          return 0; // !!! special case, no data.
   1510       case DW_FORM_block1:
   1511          return VARSZ_FORM;
   1512       case DW_FORM_block2:
   1513          return VARSZ_FORM;
   1514       case DW_FORM_block4:
   1515          return VARSZ_FORM;
   1516       case DW_FORM_exprloc:
   1517       case DW_FORM_block:
   1518          return VARSZ_FORM;
   1519       case DW_FORM_ref_sig8:
   1520          return 8;
   1521       case DW_FORM_indirect:
   1522          return VARSZ_FORM;
   1523       case DW_FORM_GNU_ref_alt:
   1524          return sizeof_Dwarfish_UWord(cc->is_dw64);
   1525       case DW_FORM_GNU_strp_alt:
   1526          return sizeof_Dwarfish_UWord(cc->is_dw64);
   1527       default:
   1528          VG_(printf)(
   1529             "get_Form_szB: unhandled %u (%s)\n",
   1530             form, ML_(pp_DW_FORM)(form));
   1531          cc->barf("get_Form_contents: unhandled DW_FORM");
   1532    }
   1533 }
   1534 
   1535 /* Skip a DIE as described by abbv.
   1536    If the DIE has a sibling, *sibling is set to the skipped DIE sibling value. */
   1537 static
   1538 void skip_DIE (UWord  *sibling,
   1539                Cursor* c_die,
   1540                const g_abbv *abbv,
   1541                const CUConst* cc)
   1542 {
   1543    UInt nf_i;
   1544    FormContents cts;
   1545    nf_i = 0;
   1546    while (True) {
   1547       if (abbv->nf[nf_i].at_name == DW_AT_sibling) {
   1548          get_Form_contents( &cts, cc, c_die, False /*td3*/,
   1549                             (DW_FORM)abbv->nf[nf_i].at_form );
   1550          if ( cts.szB > 0 )
   1551             *sibling = cts.u.val;
   1552          nf_i++;
   1553       } else if (abbv->nf[nf_i].skip_szB == VARSZ_FORM) {
   1554          get_Form_contents( &cts, cc, c_die, False /*td3*/,
   1555                             (DW_FORM)abbv->nf[nf_i].at_form );
   1556          nf_i++;
   1557       } else {
   1558          advance_position_of_Cursor (c_die, (ULong)abbv->nf[nf_i].skip_szB);
   1559          nf_i = abbv->nf[nf_i].next_nf;
   1560       }
   1561       if (nf_i == 0)
   1562          break;
   1563    }
   1564 }
   1565 
   1566 
   1567 /*------------------------------------------------------------*/
   1568 /*---                                                      ---*/
   1569 /*--- Parsing of variable-related DIEs                     ---*/
   1570 /*---                                                      ---*/
   1571 /*------------------------------------------------------------*/
   1572 
   1573 typedef
   1574    struct _TempVar {
   1575       const HChar*  name; /* in DebugInfo's .strpool */
   1576       /* Represent ranges economically.  nRanges is the number of
   1577          ranges.  Cases:
   1578          0: .rngOneMin .rngOneMax .manyRanges are all zero
   1579          1: .rngOneMin .rngOneMax hold the range; .rngMany is NULL
   1580          2: .rngOneMin .rngOneMax are zero; .rngMany holds the ranges.
   1581          This is merely an optimisation to avoid having to allocate
   1582          and free the XArray in the common (98%) of cases where there
   1583          is zero or one address ranges. */
   1584       UWord   nRanges;
   1585       Addr    rngOneMin;
   1586       Addr    rngOneMax;
   1587       XArray* rngMany; /* of AddrRange.  NON-UNIQUE PTR in AR_DINFO. */
   1588       /* Do not free .rngMany, since many TempVars will have the same
   1589          value.  Instead the associated storage is to be freed by
   1590          deleting 'rangetree', which stores a single copy of each
   1591          range. */
   1592       /* --- */
   1593       Int     level;
   1594       UWord   typeR; /* a cuOff */
   1595       GExpr*  gexpr; /* for this variable */
   1596       GExpr*  fbGX;  /* to find the frame base of the enclosing fn, if
   1597                         any */
   1598       UInt    fndn_ix; /* declaring file/dirname index in fndnpool, or 0 */
   1599       Int     fLine; /* declaring file line number, or zero */
   1600       /* offset in .debug_info, so that abstract instances can be
   1601          found to satisfy references from concrete instances. */
   1602       UWord   dioff;
   1603       UWord   absOri; /* so the absOri fields refer to dioff fields
   1604                          in some other, related TempVar. */
   1605    }
   1606    TempVar;
   1607 
   1608 typedef
   1609    struct {
   1610       /* Contains the range stack: a stack of address ranges, one
   1611          stack entry for each nested scope.
   1612 
   1613          Some scope entries are created by function definitions
   1614          (DW_AT_subprogram), and for those, we also note the GExpr
   1615          derived from its DW_AT_frame_base attribute, if any.
   1616          Consequently it should be possible to find, for any
   1617          variable's DIE, the GExpr for the containing function's
   1618          DW_AT_frame_base by scanning back through the stack to find
   1619          the nearest entry associated with a function.  This somewhat
   1620          elaborate scheme is provided so as to make it possible to
   1621          obtain the correct DW_AT_frame_base expression even in the
   1622          presence of nested functions (or to be more precise, in the
   1623          presence of nested DW_AT_subprogram DIEs).
   1624       */
   1625       Int     sp; /* [sp] is innermost active entry; sp==-1 for empty
   1626                      stack */
   1627       Int     stack_size;
   1628       XArray **ranges; /* XArray of AddrRange */
   1629       Int     *level;  /* D3 DIE levels */
   1630       Bool    *isFunc; /* from DW_AT_subprogram? */
   1631       GExpr  **fbGX;   /* if isFunc, contains the FB expr, else NULL */
   1632       /* The fndn_ix file name/dirname table.  Is a mapping from dwarf
   1633          integer index to the index in di->fndnpool. */
   1634       XArray* /* of UInt* */ fndn_ix_Table;
   1635    }
   1636    D3VarParser;
   1637 
   1638 /* Completely initialise a variable parser object */
   1639 static void
   1640 var_parser_init ( D3VarParser *parser )
   1641 {
   1642    parser->sp = -1;
   1643    parser->stack_size = 0;
   1644    parser->ranges = NULL;
   1645    parser->level  = NULL;
   1646    parser->isFunc = NULL;
   1647    parser->fbGX = NULL;
   1648    parser->fndn_ix_Table = NULL;
   1649 }
   1650 
   1651 /* Release any memory hanging off a variable parser object */
   1652 static void
   1653 var_parser_release ( D3VarParser *parser )
   1654 {
   1655    ML_(dinfo_free)( parser->ranges );
   1656    ML_(dinfo_free)( parser->level );
   1657    ML_(dinfo_free)( parser->isFunc );
   1658    ML_(dinfo_free)( parser->fbGX );
   1659 }
   1660 
   1661 static void varstack_show ( const D3VarParser* parser, const HChar* str )
   1662 {
   1663    Word i, j;
   1664    VG_(printf)("  varstack (%s) {\n", str);
   1665    for (i = 0; i <= parser->sp; i++) {
   1666       XArray* xa = parser->ranges[i];
   1667       vg_assert(xa);
   1668       VG_(printf)("    [%ld] (level %d)", i, parser->level[i]);
   1669       if (parser->isFunc[i]) {
   1670          VG_(printf)(" (fbGX=%p)", parser->fbGX[i]);
   1671       } else {
   1672          vg_assert(parser->fbGX[i] == NULL);
   1673       }
   1674       VG_(printf)(": ");
   1675       if (VG_(sizeXA)( xa ) == 0) {
   1676          VG_(printf)("** empty PC range array **");
   1677       } else {
   1678          for (j = 0; j < VG_(sizeXA)( xa ); j++) {
   1679             AddrRange* range = (AddrRange*) VG_(indexXA)( xa, j );
   1680             vg_assert(range);
   1681             VG_(printf)("[%#lx,%#lx] ", range->aMin, range->aMax);
   1682          }
   1683       }
   1684       VG_(printf)("\n");
   1685    }
   1686    VG_(printf)("  }\n");
   1687 }
   1688 
   1689 /* Remove from the stack, all entries with .level > 'level' */
   1690 static
   1691 void varstack_preen ( D3VarParser* parser, Bool td3, Int level )
   1692 {
   1693    Bool changed = False;
   1694    vg_assert(parser->sp < parser->stack_size);
   1695    while (True) {
   1696       vg_assert(parser->sp >= -1);
   1697       if (parser->sp == -1) break;
   1698       if (parser->level[parser->sp] <= level) break;
   1699       if (0)
   1700          TRACE_D3("BBBBAAAA varstack_pop [newsp=%d]\n", parser->sp-1);
   1701       vg_assert(parser->ranges[parser->sp]);
   1702       /* Who allocated this xa?  get_range_list() or
   1703          unitary_range_list(). */
   1704       VG_(deleteXA)( parser->ranges[parser->sp] );
   1705       parser->sp--;
   1706       changed = True;
   1707    }
   1708    if (changed && td3)
   1709       varstack_show( parser, "after preen" );
   1710 }
   1711 
   1712 static void varstack_push ( const CUConst* cc,
   1713                             D3VarParser* parser,
   1714                             Bool td3,
   1715                             XArray* ranges, Int level,
   1716                             Bool    isFunc, GExpr* fbGX ) {
   1717    if (0)
   1718    TRACE_D3("BBBBAAAA varstack_push[newsp=%d]: %d  %p\n",
   1719             parser->sp+1, level, ranges);
   1720 
   1721    /* First we need to zap everything >= 'level', as we are about to
   1722       replace any previous entry at 'level', so .. */
   1723    varstack_preen(parser, /*td3*/False, level-1);
   1724 
   1725    vg_assert(parser->sp >= -1);
   1726    vg_assert(parser->sp < parser->stack_size);
   1727    if (parser->sp == parser->stack_size - 1) {
   1728       parser->stack_size += 48;
   1729       parser->ranges =
   1730          ML_(dinfo_realloc)("di.readdwarf3.varpush.1", parser->ranges,
   1731                             parser->stack_size * sizeof parser->ranges[0]);
   1732       parser->level =
   1733          ML_(dinfo_realloc)("di.readdwarf3.varpush.2", parser->level,
   1734                             parser->stack_size * sizeof parser->level[0]);
   1735       parser->isFunc =
   1736          ML_(dinfo_realloc)("di.readdwarf3.varpush.3", parser->isFunc,
   1737                             parser->stack_size * sizeof parser->isFunc[0]);
   1738       parser->fbGX =
   1739          ML_(dinfo_realloc)("di.readdwarf3.varpush.4", parser->fbGX,
   1740                             parser->stack_size * sizeof parser->fbGX[0]);
   1741    }
   1742    if (parser->sp >= 0)
   1743       vg_assert(parser->level[parser->sp] < level);
   1744    parser->sp++;
   1745    vg_assert(ranges != NULL);
   1746    if (!isFunc) vg_assert(fbGX == NULL);
   1747    parser->ranges[parser->sp] = ranges;
   1748    parser->level[parser->sp]  = level;
   1749    parser->isFunc[parser->sp] = isFunc;
   1750    parser->fbGX[parser->sp]   = fbGX;
   1751    if (TD3)
   1752       varstack_show( parser, "after push" );
   1753 }
   1754 
   1755 
   1756 /* cts is derived from a DW_AT_location and so refers either to a
   1757    location expression or to a location list.  Figure out which, and
   1758    in both cases bundle the expression or location list into a
   1759    so-called GExpr (guarded expression). */
   1760 __attribute__((noinline))
   1761 static GExpr* get_GX ( const CUConst* cc, Bool td3, const FormContents* cts )
   1762 {
   1763    GExpr* gexpr = NULL;
   1764    if (cts->szB < 0) {
   1765       /* represents a non-empty in-line location expression, and
   1766          cts->u.cur points at the image bytes */
   1767       gexpr = make_singleton_GX( cts->u.cur, (ULong)(- cts->szB) );
   1768    }
   1769    else
   1770    if (cts->szB > 0) {
   1771       /* represents a location list.  cts->u.val is the offset of it
   1772          in .debug_loc. */
   1773       if (!cc->cu_svma_known)
   1774          cc->barf("get_GX: location list, but CU svma is unknown");
   1775       gexpr = make_general_GX( cc, td3, cts->u.val, cc->cu_svma );
   1776    }
   1777    else {
   1778       vg_assert(0); /* else caller is bogus */
   1779    }
   1780    return gexpr;
   1781 }
   1782 
   1783 /* Returns an xarray* of directory names (indexed by the dwarf dirname
   1784    integer).
   1785    If 'compdir' is NULL, entry [0] will be set to "."
   1786    otherwise entry [0] is set to compdir.
   1787    Entry [0] basically means "the current directory of the compilation",
   1788    whatever that means, according to the DWARF3 spec.
   1789    FIXME??? readdwarf3.c/readdwarf.c have a lot of duplicated code */
   1790 static
   1791 XArray* read_dirname_xa (DebugInfo* di, const HChar *compdir,
   1792                          Cursor *c,
   1793                          Bool td3 )
   1794 {
   1795    XArray*        dirname_xa;   /* xarray of HChar* dirname */
   1796    const HChar*   dirname;
   1797    UInt           compdir_len;
   1798 
   1799    dirname_xa = VG_(newXA) (ML_(dinfo_zalloc), "di.rdxa.1", ML_(dinfo_free),
   1800                             sizeof(HChar*) );
   1801 
   1802    if (compdir == NULL) {
   1803       dirname = ".";
   1804       compdir_len = 1;
   1805    } else {
   1806       dirname = compdir;
   1807       compdir_len = VG_(strlen)(compdir);
   1808    }
   1809    VG_(addToXA) (dirname_xa, &dirname);
   1810 
   1811    TRACE_D3(" The Directory Table%s\n",
   1812             peek_UChar(c) == 0 ? " is empty." : ":" );
   1813 
   1814    while (peek_UChar(c) != 0) {
   1815 
   1816       DiCursor cur = get_AsciiZ(c);
   1817       HChar* data_str = ML_(cur_read_strdup)( cur, "dirname_xa.1" );
   1818       TRACE_D3("  %s\n", data_str);
   1819 
   1820       /* If data_str[0] is '/', then 'data' is an absolute path and we
   1821          don't mess with it.  Otherwise, construct the
   1822          path 'compdir' ++ "/" ++ 'data'. */
   1823 
   1824       if (data_str[0] != '/'
   1825           /* not an absolute path */
   1826           && compdir
   1827           /* actually got something sensible for compdir */
   1828           && compdir_len)
   1829       {
   1830          SizeT  len = compdir_len + 1 + VG_(strlen)(data_str);
   1831          HChar *buf = ML_(dinfo_zalloc)("dirname_xa.2", len + 1);
   1832 
   1833          VG_(strcpy)(buf, compdir);
   1834          VG_(strcat)(buf, "/");
   1835          VG_(strcat)(buf, data_str);
   1836 
   1837          dirname = ML_(addStr)(di, buf, len);
   1838          VG_(addToXA) (dirname_xa, &dirname);
   1839          if (0) VG_(printf)("rel path  %s\n", buf);
   1840          ML_(dinfo_free)(buf);
   1841       } else {
   1842          /* just use 'data'. */
   1843          dirname = ML_(addStr)(di,data_str,-1);
   1844          VG_(addToXA) (dirname_xa, &dirname);
   1845          if (0) VG_(printf)("abs path  %s\n", data_str);
   1846       }
   1847 
   1848       ML_(dinfo_free)(data_str);
   1849    }
   1850 
   1851    TRACE_D3 ("\n");
   1852 
   1853    if (get_UChar (c) != 0) {
   1854       ML_(symerr)(NULL, True,
   1855                   "could not get NUL at end of DWARF directory table");
   1856       VG_(deleteXA)(dirname_xa);
   1857       return NULL;
   1858    }
   1859 
   1860    return dirname_xa;
   1861 }
   1862 
   1863 static
   1864 void read_filename_table( /*MOD*/XArray* /* of UInt* */ fndn_ix_Table,
   1865                           const HChar* compdir,
   1866                           const CUConst* cc, ULong debug_line_offset,
   1867                           Bool td3 )
   1868 {
   1869    Bool   is_dw64;
   1870    Cursor c;
   1871    Word   i;
   1872    UShort version;
   1873    UChar  opcode_base;
   1874    const HChar* str;
   1875    XArray* dirname_xa;   /* xarray of HChar* dirname */
   1876    ULong  dir_xa_ix;     /* Index in dirname_xa, as read from dwarf info. */
   1877    const HChar* dirname;
   1878    UInt   fndn_ix;
   1879 
   1880    vg_assert(fndn_ix_Table && cc && cc->barf);
   1881    if (!ML_(sli_is_valid)(cc->escn_debug_line)
   1882        || cc->escn_debug_line.szB <= debug_line_offset) {
   1883       cc->barf("read_filename_table: .debug_line is missing?");
   1884    }
   1885 
   1886    init_Cursor( &c, cc->escn_debug_line, debug_line_offset, cc->barf,
   1887                 "Overrun whilst reading .debug_line section(1)" );
   1888 
   1889    /* unit_length = */
   1890    get_Initial_Length( &is_dw64, &c,
   1891                        "read_filename_table: invalid initial-length field" );
   1892    version = get_UShort( &c );
   1893    if (version != 2 && version != 3 && version != 4)
   1894      cc->barf("read_filename_table: Only DWARF version 2, 3 and 4 line info "
   1895               "is currently supported.");
   1896    /*header_length              = (ULong)*/ get_Dwarfish_UWord( &c, is_dw64 );
   1897    /*minimum_instruction_length = */ get_UChar( &c );
   1898    if (version >= 4)
   1899       /*maximum_operations_per_insn = */ get_UChar( &c );
   1900    /*default_is_stmt            = */ get_UChar( &c );
   1901    /*line_base                  = (Char)*/ get_UChar( &c );
   1902    /*line_range                 = */ get_UChar( &c );
   1903    opcode_base                = get_UChar( &c );
   1904    /* skip over "standard_opcode_lengths" */
   1905    for (i = 1; i < (Word)opcode_base; i++)
   1906      (void)get_UChar( &c );
   1907 
   1908    dirname_xa = read_dirname_xa(cc->di, compdir, &c, td3);
   1909 
   1910    /* Read and record the file names table */
   1911    vg_assert( VG_(sizeXA)( fndn_ix_Table ) == 0 );
   1912    /* Add a dummy index-zero entry.  DWARF3 numbers its files
   1913       from 1, for some reason. */
   1914    fndn_ix = ML_(addFnDn) ( cc->di, "<unknown_file>", NULL );
   1915    VG_(addToXA)( fndn_ix_Table, &fndn_ix );
   1916    while (peek_UChar(&c) != 0) {
   1917       DiCursor cur = get_AsciiZ(&c);
   1918       str = ML_(addStrFromCursor)( cc->di, cur );
   1919       dir_xa_ix = get_ULEB128( &c );
   1920       if (dirname_xa != NULL
   1921           && dir_xa_ix >= 0 && dir_xa_ix < VG_(sizeXA) (dirname_xa))
   1922          dirname = *(HChar**)VG_(indexXA) ( dirname_xa, dir_xa_ix );
   1923       else
   1924          dirname = NULL;
   1925       fndn_ix = ML_(addFnDn)( cc->di, str, dirname);
   1926       TRACE_D3("  read_filename_table: %ld fndn_ix %u %s %s\n",
   1927                VG_(sizeXA)(fndn_ix_Table), fndn_ix,
   1928                dirname, str);
   1929       VG_(addToXA)( fndn_ix_Table, &fndn_ix );
   1930       (void)get_ULEB128( &c ); /* skip last mod time */
   1931       (void)get_ULEB128( &c ); /* file size */
   1932    }
   1933    /* We're done!  The rest of it is not interesting. */
   1934    if (dirname_xa != NULL)
   1935       VG_(deleteXA)(dirname_xa);
   1936 }
   1937 
   1938 /* setup_cu_svma to be called when a cu is found at level 0,
   1939    to establish the cu_svma. */
   1940 static void setup_cu_svma(CUConst* cc, Bool have_lo, Addr ip_lo, Bool td3)
   1941 {
   1942    Addr cu_svma;
   1943    /* We have potentially more than one type of parser parsing the
   1944       dwarf information. At least currently, each parser establishes
   1945       the cu_svma. So, in case cu_svma_known, we check that the same
   1946       result is obtained by the 2nd parsing of the cu.
   1947 
   1948       Alternatively, we could reset cu_svma_known after each parsing
   1949       and then check that we only see a single DW_TAG_compile_unit DIE
   1950       at level 0, DWARF3 only allows exactly one top level DIE per
   1951       CU. */
   1952 
   1953    if (have_lo)
   1954       cu_svma = ip_lo;
   1955    else {
   1956       /* Now, it may be that this DIE doesn't tell us the CU's
   1957          SVMA, by way of not having a DW_AT_low_pc.  That's OK --
   1958          the CU doesn't *have* to have its SVMA specified.
   1959 
   1960          But as per last para D3 spec sec 3.1.1 ("Normal and
   1961          Partial Compilation Unit Entries", "If the base address
   1962          (viz, the SVMA) is undefined, then any DWARF entry of
   1963          structure defined interms of the base address of that
   1964          compilation unit is not valid.".  So that means, if whilst
   1965          processing the children of this top level DIE (or their
   1966          children, etc) we see a DW_AT_range, and cu_svma_known is
   1967          False, then the DIE that contains it is (per the spec)
   1968          invalid, and we can legitimately stop and complain. */
   1969       /* .. whereas The Reality is, simply assume the SVMA is zero
   1970          if it isn't specified. */
   1971       cu_svma = 0;
   1972    }
   1973 
   1974    if (cc->cu_svma_known) {
   1975       vg_assert (cu_svma == cc->cu_svma);
   1976    } else {
   1977       cc->cu_svma_known = True;
   1978       cc->cu_svma = cu_svma;
   1979       if (0)
   1980          TRACE_D3("setup_cu_svma: acquire CU_SVMA of %p\n", (void*) cc->cu_svma);
   1981    }
   1982 }
   1983 
   1984 static void trace_DIE(
   1985    DW_TAG dtag,
   1986    UWord posn,
   1987    Int level,
   1988    UWord saved_die_c_offset,
   1989    const g_abbv *abbv,
   1990    const CUConst* cc)
   1991 {
   1992    Cursor c;
   1993    FormContents cts;
   1994    UWord sibling = 0;
   1995    UInt nf_i;
   1996    Bool  debug_types_flag;
   1997    Bool  alt_flag;
   1998    Cursor check_skip;
   1999    UWord check_sibling = 0;
   2000 
   2001    posn = uncook_die( cc, posn, &debug_types_flag, &alt_flag );
   2002    init_Cursor (&c,
   2003                 debug_types_flag ? cc->escn_debug_types :
   2004                 alt_flag ? cc->escn_debug_info_alt : cc->escn_debug_info,
   2005                 saved_die_c_offset, cc->barf,
   2006                 "Overrun trace_DIE");
   2007    check_skip = c;
   2008    VG_(printf)(" <%d><%lx>: Abbrev Number: %llu (%s)%s%s\n",
   2009                level, posn, (ULong) abbv->abbv_code, ML_(pp_DW_TAG)( dtag ),
   2010                debug_types_flag ? " (in .debug_types)" : "",
   2011                alt_flag ? " (in alternate .debug_info)" : "");
   2012    nf_i = 0;
   2013    while (True) {
   2014       DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
   2015       DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
   2016       nf_i++;
   2017       if (attr == 0 && form == 0) break;
   2018       VG_(printf)("     %-18s: ", ML_(pp_DW_AT)(attr));
   2019       /* Get the form contents, so as to print them */
   2020       get_Form_contents( &cts, cc, &c, True, form );
   2021       if (attr == DW_AT_sibling && cts.szB > 0) {
   2022          sibling = cts.u.val;
   2023       }
   2024       VG_(printf)("\t\n");
   2025    }
   2026 
   2027    /* Verify that skipping a DIE gives the same displacement as
   2028       tracing (i.e. reading) a DIE. If there is an inconsistency in
   2029       the nr of bytes read by get_Form_contents and get_Form_szB, this
   2030       should be detected by the below. Using --trace-symtab=yes
   2031       --read-var-info=yes will ensure all DIEs are systematically
   2032       verified. */
   2033    skip_DIE (&check_sibling, &check_skip, abbv, cc);
   2034    vg_assert (check_sibling == sibling);
   2035    vg_assert (get_position_of_Cursor (&check_skip)
   2036               == get_position_of_Cursor (&c));
   2037 }
   2038 
   2039 __attribute__((noreturn))
   2040 static void dump_bad_die_and_barf(
   2041    const HChar *whichparser,
   2042    DW_TAG dtag,
   2043    UWord posn,
   2044    Int level,
   2045    Cursor* c_die,
   2046    UWord saved_die_c_offset,
   2047    const g_abbv *abbv,
   2048    const CUConst* cc)
   2049 {
   2050    trace_DIE (dtag, posn, level, saved_die_c_offset, abbv, cc);
   2051    VG_(printf)("%s:\n", whichparser);
   2052    cc->barf("confused by the above DIE");
   2053 }
   2054 
   2055 __attribute__((noinline))
   2056 static void bad_DIE_confusion(int linenr)
   2057 {
   2058    VG_(printf)("\nparse DIE(readdwarf3.c:%d): confused by:\n", linenr);
   2059 }
   2060 #define goto_bad_DIE do {bad_DIE_confusion(__LINE__); goto bad_DIE;} while (0)
   2061 
   2062 __attribute__((noinline))
   2063 static void parse_var_DIE (
   2064    /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree,
   2065    /*MOD*/XArray* /* of TempVar* */ tempvars,
   2066    /*MOD*/XArray* /* of GExpr* */ gexprs,
   2067    /*MOD*/D3VarParser* parser,
   2068    DW_TAG dtag,
   2069    UWord posn,
   2070    Int level,
   2071    Cursor* c_die,
   2072    const g_abbv *abbv,
   2073    CUConst* cc,
   2074    Bool td3
   2075 )
   2076 {
   2077    FormContents cts;
   2078    UInt nf_i;
   2079 
   2080    UWord saved_die_c_offset  = get_position_of_Cursor( c_die );
   2081 
   2082    varstack_preen( parser, td3, level-1 );
   2083 
   2084    if (dtag == DW_TAG_compile_unit
   2085        || dtag == DW_TAG_type_unit
   2086        || dtag == DW_TAG_partial_unit) {
   2087       Bool have_lo    = False;
   2088       Bool have_hi1   = False;
   2089       Bool hiIsRelative = False;
   2090       Bool have_range = False;
   2091       Addr ip_lo    = 0;
   2092       Addr ip_hi1   = 0;
   2093       Addr rangeoff = 0;
   2094       const HChar *compdir = NULL;
   2095       nf_i = 0;
   2096       while (True) {
   2097          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
   2098          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
   2099          nf_i++;
   2100          if (attr == 0 && form == 0) break;
   2101          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
   2102          if (attr == DW_AT_low_pc && cts.szB > 0) {
   2103             ip_lo   = cts.u.val;
   2104             have_lo = True;
   2105          }
   2106          if (attr == DW_AT_high_pc && cts.szB > 0) {
   2107             ip_hi1   = cts.u.val;
   2108             have_hi1 = True;
   2109             if (form != DW_FORM_addr)
   2110                hiIsRelative = True;
   2111          }
   2112          if (attr == DW_AT_ranges && cts.szB > 0) {
   2113             rangeoff   = cts.u.val;
   2114             have_range = True;
   2115          }
   2116          if (attr == DW_AT_comp_dir) {
   2117             if (cts.szB >= 0)
   2118                cc->barf("parse_var_DIE compdir: expecting indirect string");
   2119             HChar *str = ML_(cur_read_strdup)( cts.u.cur,
   2120                                                "parse_var_DIE.compdir" );
   2121             compdir = ML_(addStr)(cc->di, str, -1);
   2122             ML_(dinfo_free) (str);
   2123          }
   2124          if (attr == DW_AT_stmt_list && cts.szB > 0) {
   2125             read_filename_table( parser->fndn_ix_Table, compdir,
   2126                                  cc, cts.u.val, td3 );
   2127          }
   2128       }
   2129       if (have_lo && have_hi1 && hiIsRelative)
   2130          ip_hi1 += ip_lo;
   2131 
   2132       /* Now, does this give us an opportunity to find this
   2133          CU's svma? */
   2134       if (level == 0)
   2135          setup_cu_svma(cc, have_lo, ip_lo, td3);
   2136 
   2137       /* Do we have something that looks sane? */
   2138       if (have_lo && have_hi1 && (!have_range)) {
   2139          if (ip_lo < ip_hi1)
   2140             varstack_push( cc, parser, td3,
   2141                            unitary_range_list(ip_lo, ip_hi1 - 1),
   2142                            level,
   2143                            False/*isFunc*/, NULL/*fbGX*/ );
   2144          else if (ip_lo == 0 && ip_hi1 == 0)
   2145             /* CU has no code, presumably?
   2146                Such situations have been encountered for code
   2147                compiled with -ffunction-sections -fdata-sections
   2148                and linked with --gc-sections. Completely
   2149                eliminated CU gives such 0 lo/hi pc. Similarly
   2150                to a CU which has no lo/hi/range pc, we push
   2151                an empty range list. */
   2152             varstack_push( cc, parser, td3,
   2153                            empty_range_list(),
   2154                            level,
   2155                            False/*isFunc*/, NULL/*fbGX*/ );
   2156       } else
   2157       if ((!have_lo) && (!have_hi1) && have_range) {
   2158          varstack_push( cc, parser, td3,
   2159                         get_range_list( cc, td3,
   2160                                         rangeoff, cc->cu_svma ),
   2161                         level,
   2162                         False/*isFunc*/, NULL/*fbGX*/ );
   2163       } else
   2164       if ((!have_lo) && (!have_hi1) && (!have_range)) {
   2165          /* CU has no code, presumably? */
   2166          varstack_push( cc, parser, td3,
   2167                         empty_range_list(),
   2168                         level,
   2169                         False/*isFunc*/, NULL/*fbGX*/ );
   2170       } else
   2171       if (have_lo && (!have_hi1) && have_range && ip_lo == 0) {
   2172          /* broken DIE created by gcc-4.3.X ?  Ignore the
   2173             apparently-redundant DW_AT_low_pc and use the DW_AT_ranges
   2174             instead. */
   2175          varstack_push( cc, parser, td3,
   2176                         get_range_list( cc, td3,
   2177                                         rangeoff, cc->cu_svma ),
   2178                         level,
   2179                         False/*isFunc*/, NULL/*fbGX*/ );
   2180       } else {
   2181          if (0) VG_(printf)("I got hlo %d hhi1 %d hrange %d\n",
   2182                             (Int)have_lo, (Int)have_hi1, (Int)have_range);
   2183          goto_bad_DIE;
   2184       }
   2185    }
   2186 
   2187    if (dtag == DW_TAG_lexical_block || dtag == DW_TAG_subprogram) {
   2188       Bool   have_lo    = False;
   2189       Bool   have_hi1   = False;
   2190       Bool   have_range = False;
   2191       Bool   hiIsRelative = False;
   2192       Addr   ip_lo      = 0;
   2193       Addr   ip_hi1     = 0;
   2194       Addr   rangeoff   = 0;
   2195       Bool   isFunc     = dtag == DW_TAG_subprogram;
   2196       GExpr* fbGX       = NULL;
   2197       nf_i = 0;
   2198       while (True) {
   2199          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
   2200          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
   2201          nf_i++;
   2202          if (attr == 0 && form == 0) break;
   2203          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
   2204          if (attr == DW_AT_low_pc && cts.szB > 0) {
   2205             ip_lo   = cts.u.val;
   2206             have_lo = True;
   2207          }
   2208          if (attr == DW_AT_high_pc && cts.szB > 0) {
   2209             ip_hi1   = cts.u.val;
   2210             have_hi1 = True;
   2211             if (form != DW_FORM_addr)
   2212                hiIsRelative = True;
   2213          }
   2214          if (attr == DW_AT_ranges && cts.szB > 0) {
   2215             rangeoff   = cts.u.val;
   2216             have_range = True;
   2217          }
   2218          if (isFunc
   2219              && attr == DW_AT_frame_base
   2220              && cts.szB != 0 /* either scalar or nonempty block */) {
   2221             fbGX = get_GX( cc, False/*td3*/, &cts );
   2222             vg_assert(fbGX);
   2223             VG_(addToXA)(gexprs, &fbGX);
   2224          }
   2225       }
   2226       if (have_lo && have_hi1 && hiIsRelative)
   2227          ip_hi1 += ip_lo;
   2228       /* Do we have something that looks sane? */
   2229       if (dtag == DW_TAG_subprogram
   2230           && (!have_lo) && (!have_hi1) && (!have_range)) {
   2231          /* This is legit - ignore it. Sec 3.3.3: "A subroutine entry
   2232             representing a subroutine declaration that is not also a
   2233             definition does not have code address or range
   2234             attributes." */
   2235       } else
   2236       if (dtag == DW_TAG_lexical_block
   2237           && (!have_lo) && (!have_hi1) && (!have_range)) {
   2238          /* I believe this is legit, and means the lexical block
   2239             contains no insns (whatever that might mean).  Ignore. */
   2240       } else
   2241       if (have_lo && have_hi1 && (!have_range)) {
   2242          /* This scope supplies just a single address range. */
   2243          if (ip_lo < ip_hi1)
   2244             varstack_push( cc, parser, td3,
   2245                            unitary_range_list(ip_lo, ip_hi1 - 1),
   2246                            level, isFunc, fbGX );
   2247       } else
   2248       if ((!have_lo) && (!have_hi1) && have_range) {
   2249          /* This scope supplies multiple address ranges via the use of
   2250             a range list. */
   2251          varstack_push( cc, parser, td3,
   2252                         get_range_list( cc, td3,
   2253                                         rangeoff, cc->cu_svma ),
   2254                         level, isFunc, fbGX );
   2255       } else
   2256       if (have_lo && (!have_hi1) && (!have_range)) {
   2257          /* This scope is bogus.  The D3 spec sec 3.4 (Lexical Block
   2258             Entries) says fairly clearly that a scope must have either
   2259             _range or (_low_pc and _high_pc). */
   2260          /* The spec is a bit ambiguous though.  Perhaps a single byte
   2261             range is intended?  See sec 2.17 (Code Addresses And Ranges) */
   2262          /* This case is here because icc9 produced this:
   2263          <2><13bd>: DW_TAG_lexical_block
   2264             DW_AT_decl_line   : 5229
   2265             DW_AT_decl_column : 37
   2266             DW_AT_decl_file   : 1
   2267             DW_AT_low_pc      : 0x401b03
   2268          */
   2269          /* Ignore (seems safe than pushing a single byte range) */
   2270       } else
   2271          goto_bad_DIE;
   2272    }
   2273 
   2274    if (dtag == DW_TAG_variable || dtag == DW_TAG_formal_parameter) {
   2275       const  HChar* name = NULL;
   2276       UWord  typeR       = D3_INVALID_CUOFF;
   2277       Bool   global      = False;
   2278       GExpr* gexpr       = NULL;
   2279       Int    n_attrs     = 0;
   2280       UWord  abs_ori     = (UWord)D3_INVALID_CUOFF;
   2281       Int    lineNo      = 0;
   2282       UInt   fndn_ix     = 0;
   2283       nf_i = 0;
   2284       while (True) {
   2285          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
   2286          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
   2287          nf_i++;
   2288          if (attr == 0 && form == 0) break;
   2289          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
   2290          n_attrs++;
   2291          if (attr == DW_AT_name && cts.szB < 0) {
   2292             name = ML_(addStrFromCursor)( cc->di, cts.u.cur );
   2293          }
   2294          if (attr == DW_AT_location
   2295              && cts.szB != 0 /* either scalar or nonempty block */) {
   2296             gexpr = get_GX( cc, False/*td3*/, &cts );
   2297             vg_assert(gexpr);
   2298             VG_(addToXA)(gexprs, &gexpr);
   2299          }
   2300          if (attr == DW_AT_type && cts.szB > 0) {
   2301             typeR = cook_die_using_form( cc, cts.u.val, form );
   2302          }
   2303          if (attr == DW_AT_external && cts.szB > 0 && cts.u.val > 0) {
   2304             global = True;
   2305          }
   2306          if (attr == DW_AT_abstract_origin && cts.szB > 0) {
   2307             abs_ori = (UWord)cts.u.val;
   2308          }
   2309          if (attr == DW_AT_declaration && cts.szB > 0 && cts.u.val > 0) {
   2310             /*declaration = True;*/
   2311          }
   2312          if (attr == DW_AT_decl_line && cts.szB > 0) {
   2313             lineNo = (Int)cts.u.val;
   2314          }
   2315          if (attr == DW_AT_decl_file && cts.szB > 0) {
   2316             Int ftabIx = (Int)cts.u.val;
   2317             if (ftabIx >= 1
   2318                 && ftabIx < VG_(sizeXA)( parser->fndn_ix_Table )) {
   2319                fndn_ix = *(UInt*)VG_(indexXA)( parser->fndn_ix_Table, ftabIx );
   2320             }
   2321             if (0) VG_(printf)("XXX filename fndn_ix = %u %s\n", fndn_ix,
   2322                                ML_(fndn_ix2filename) (cc->di, fndn_ix));
   2323          }
   2324       }
   2325       if (!global && dtag == DW_TAG_variable && level == 1) {
   2326          /* Case of a static variable. It is better to declare
   2327             it global as the variable is not really related to
   2328             a PC range, as its address can be used by program
   2329             counters outside of the ranges where it is visible . */
   2330          global = True;
   2331       }
   2332 
   2333       /* We'll collect it under if one of the following three
   2334          conditions holds:
   2335          (1) has location and type    -> completed
   2336          (2) has type only            -> is an abstract instance
   2337          (3) has location and abs_ori -> is a concrete instance
   2338          Name, fndn_ix and line number are all optional frills.
   2339       */
   2340       if ( /* 1 */ (gexpr && typeR != D3_INVALID_CUOFF)
   2341            /* 2 */ || (typeR != D3_INVALID_CUOFF)
   2342            /* 3 */ || (gexpr && abs_ori != (UWord)D3_INVALID_CUOFF) ) {
   2343 
   2344          /* Add this variable to the list of interesting looking
   2345             variables.  Crucially, note along with it the address
   2346             range(s) associated with the variable, which for locals
   2347             will be the address ranges at the top of the varparser's
   2348             stack. */
   2349          GExpr*   fbGX = NULL;
   2350          Word     i, nRanges;
   2351          const XArray*  /* of AddrRange */ xa;
   2352          TempVar* tv;
   2353          /* Stack can't be empty; we put a dummy entry on it for the
   2354             entire address range before starting with the DIEs for
   2355             this CU. */
   2356          vg_assert(parser->sp >= 0);
   2357 
   2358          /* If this is a local variable (non-global), try to find
   2359             the GExpr for the DW_AT_frame_base of the containing
   2360             function.  It should have been pushed on the stack at the
   2361             time we encountered its DW_TAG_subprogram DIE, so the way
   2362             to find it is to scan back down the stack looking for it.
   2363             If there isn't an enclosing stack entry marked 'isFunc'
   2364             then we must be seeing variable or formal param DIEs
   2365             outside of a function, so we deem the Dwarf to be
   2366             malformed if that happens.  Note that the fbGX may be NULL
   2367             if the containing DT_TAG_subprogram didn't supply a
   2368             DW_AT_frame_base -- that's OK, but there must actually be
   2369             a containing DW_TAG_subprogram. */
   2370          if (!global) {
   2371             Bool found = False;
   2372             for (i = parser->sp; i >= 0; i--) {
   2373                if (parser->isFunc[i]) {
   2374                   fbGX = parser->fbGX[i];
   2375                   found = True;
   2376                   break;
   2377                }
   2378             }
   2379             if (!found) {
   2380                if (0 && VG_(clo_verbosity) >= 0) {
   2381                   VG_(message)(Vg_DebugMsg,
   2382                      "warning: parse_var_DIE: non-global variable "
   2383                      "outside DW_TAG_subprogram\n");
   2384                }
   2385                /* goto_bad_DIE; */
   2386                /* This seems to happen a lot.  Just ignore it -- if,
   2387                   when we come to evaluation of the location (guarded)
   2388                   expression, it requires a frame base value, and
   2389                   there's no expression for that, then evaluation as a
   2390                   whole will fail.  Harmless - a bit of a waste of
   2391                   cycles but nothing more. */
   2392             }
   2393          }
   2394 
   2395          /* re "global ? 0 : parser->sp" (twice), if the var is
   2396             marked 'global' then we must put it at the global scope,
   2397             as only the global scope (level 0) covers the entire PC
   2398             address space.  It is asserted elsewhere that level 0
   2399             always covers the entire address space. */
   2400          xa = parser->ranges[global ? 0 : parser->sp];
   2401          nRanges = VG_(sizeXA)(xa);
   2402          vg_assert(nRanges >= 0);
   2403 
   2404          tv = ML_(dinfo_zalloc)( "di.readdwarf3.pvD.1", sizeof(TempVar) );
   2405          tv->name   = name;
   2406          tv->level  = global ? 0 : parser->sp;
   2407          tv->typeR  = typeR;
   2408          tv->gexpr  = gexpr;
   2409          tv->fbGX   = fbGX;
   2410          tv->fndn_ix= fndn_ix;
   2411          tv->fLine  = lineNo;
   2412          tv->dioff  = posn;
   2413          tv->absOri = abs_ori;
   2414 
   2415          /* See explanation on definition of type TempVar for the
   2416             reason for this elaboration. */
   2417          tv->nRanges = nRanges;
   2418          tv->rngOneMin = 0;
   2419          tv->rngOneMax = 0;
   2420          tv->rngMany = NULL;
   2421          if (nRanges == 1) {
   2422             AddrRange* range = VG_(indexXA)(xa, 0);
   2423             tv->rngOneMin = range->aMin;
   2424             tv->rngOneMax = range->aMax;
   2425          }
   2426          else if (nRanges > 1) {
   2427             /* See if we already have a range list which is
   2428                structurally identical.  If so, use that; if not, clone
   2429                this one, and add it to our collection. */
   2430             UWord keyW, valW;
   2431             if (VG_(lookupFM)( rangestree, &keyW, &valW, (UWord)xa )) {
   2432                XArray* old = (XArray*)keyW;
   2433                vg_assert(valW == 0);
   2434                vg_assert(old != xa);
   2435                tv->rngMany = old;
   2436             } else {
   2437                XArray* cloned = VG_(cloneXA)( "di.readdwarf3.pvD.2", xa );
   2438                tv->rngMany = cloned;
   2439                VG_(addToFM)( rangestree, (UWord)cloned, 0 );
   2440             }
   2441          }
   2442 
   2443          VG_(addToXA)( tempvars, &tv );
   2444 
   2445          TRACE_D3("  Recording this variable, with %ld PC range(s)\n",
   2446                   VG_(sizeXA)(xa) );
   2447          /* collect stats on how effective the ->ranges special
   2448             casing is */
   2449          if (0) {
   2450             static Int ntot=0, ngt=0;
   2451             ntot++;
   2452             if (tv->rngMany) ngt++;
   2453             if (0 == (ntot % 100000))
   2454                VG_(printf)("XXXX %d tot, %d cloned\n", ntot, ngt);
   2455          }
   2456 
   2457       }
   2458 
   2459       /* Here are some other weird cases seen in the wild:
   2460 
   2461             We have a variable with a name and a type, but no
   2462             location.  I guess that's a sign that it has been
   2463             optimised away.  Ignore it.  Here's an example:
   2464 
   2465             static Int lc_compar(void* n1, void* n2) {
   2466                MC_Chunk* mc1 = *(MC_Chunk**)n1;
   2467                MC_Chunk* mc2 = *(MC_Chunk**)n2;
   2468                return (mc1->data < mc2->data ? -1 : 1);
   2469             }
   2470 
   2471             Both mc1 and mc2 are like this
   2472             <2><5bc>: Abbrev Number: 21 (DW_TAG_variable)
   2473                 DW_AT_name        : mc1
   2474                 DW_AT_decl_file   : 1
   2475                 DW_AT_decl_line   : 216
   2476                 DW_AT_type        : <5d3>
   2477 
   2478             whereas n1 and n2 do have locations specified.
   2479 
   2480             ---------------------------------------------
   2481 
   2482             We see a DW_TAG_formal_parameter with a type, but
   2483             no name and no location.  It's probably part of a function type
   2484             construction, thusly, hence ignore it:
   2485          <1><2b4>: Abbrev Number: 12 (DW_TAG_subroutine_type)
   2486              DW_AT_sibling     : <2c9>
   2487              DW_AT_prototyped  : 1
   2488              DW_AT_type        : <114>
   2489          <2><2be>: Abbrev Number: 13 (DW_TAG_formal_parameter)
   2490              DW_AT_type        : <13e>
   2491          <2><2c3>: Abbrev Number: 13 (DW_TAG_formal_parameter)
   2492              DW_AT_type        : <133>
   2493 
   2494             ---------------------------------------------
   2495 
   2496             Is very minimal, like this:
   2497             <4><81d>: Abbrev Number: 44 (DW_TAG_variable)
   2498                 DW_AT_abstract_origin: <7ba>
   2499             What that signifies I have no idea.  Ignore.
   2500 
   2501             ----------------------------------------------
   2502 
   2503             Is very minimal, like this:
   2504             <200f>: DW_TAG_formal_parameter
   2505                 DW_AT_abstract_ori: <1f4c>
   2506                 DW_AT_location    : 13440
   2507             What that signifies I have no idea.  Ignore.
   2508             It might be significant, though: the variable at least
   2509             has a location and so might exist somewhere.
   2510             Maybe we should handle this.
   2511 
   2512             ---------------------------------------------
   2513 
   2514             <22407>: DW_TAG_variable
   2515               DW_AT_name        : (indirect string, offset: 0x6579):
   2516                                   vgPlain_trampoline_stuff_start
   2517               DW_AT_decl_file   : 29
   2518               DW_AT_decl_line   : 56
   2519               DW_AT_external    : 1
   2520               DW_AT_declaration : 1
   2521 
   2522             Nameless and typeless variable that has a location?  Who
   2523             knows.  Not me.
   2524             <2><3d178>: Abbrev Number: 22 (DW_TAG_variable)
   2525                  DW_AT_location    : 9 byte block: 3 c0 c7 13 38 0 0 0 0
   2526                                      (DW_OP_addr: 3813c7c0)
   2527 
   2528             No, really.  Check it out.  gcc is quite simply borked.
   2529             <3><168cc>: Abbrev Number: 141 (DW_TAG_variable)
   2530             // followed by no attributes, and the next DIE is a sibling,
   2531             // not a child
   2532             */
   2533    }
   2534    return;
   2535 
   2536   bad_DIE:
   2537    dump_bad_die_and_barf("parse_var_DIE", dtag, posn, level,
   2538                          c_die, saved_die_c_offset,
   2539                          abbv,
   2540                          cc);
   2541    /*NOTREACHED*/
   2542 }
   2543 
   2544 typedef
   2545    struct {
   2546       /* The fndn_ix file name/dirname table.  Is a mapping from dwarf
   2547          integer index to the index in di->fndnpool. */
   2548       XArray* /* of UInt* */ fndn_ix_Table;
   2549       UWord sibling; // sibling of the last read DIE (if it has a sibling).
   2550    }
   2551    D3InlParser;
   2552 
   2553 /* Return the function name corresponding to absori.
   2554 
   2555    absori is a 'cooked' reference to a DIE, i.e. absori can be either
   2556    in cc->escn_debug_info or in cc->escn_debug_info_alt.
   2557    get_inlFnName will uncook absori.
   2558 
   2559    The returned value is a (permanent) string in DebugInfo's .strchunks.
   2560 
   2561    LIMITATION: absori must point in the CU of cc. If absori points
   2562    in another CU, returns "UnknownInlinedFun".
   2563 
   2564    Here are the problems to retrieve the fun name if absori is in
   2565    another CU:  the DIE reading code cannot properly extract data from
   2566    another CU, as the abbv code retrieved in the other CU cannot be
   2567    translated in an abbreviation. Reading data from the alternate debug
   2568    info also gives problems as the string reference is also in the alternate
   2569    file, but when reading the alt DIE, the string form is a 'local' string,
   2570    but cannot be read in the current CU, but must be read in the alt CU.
   2571    See bug 338803 comment#3 and attachment for a failed attempt to handle
   2572    these problems (failed because with the patch, only one alt abbrev hash
   2573    table is kept, while we must handle all abbreviations in all CUs
   2574    referenced by an absori (being a reference to an alt CU, or a previous
   2575    or following CU). */
   2576 static const HChar* get_inlFnName (Int absori, const CUConst* cc, Bool td3)
   2577 {
   2578    Cursor c;
   2579    const g_abbv *abbv;
   2580    ULong  atag, abbv_code;
   2581    UInt   has_children;
   2582    UWord  posn;
   2583    Bool type_flag, alt_flag;
   2584    const HChar *ret = NULL;
   2585    FormContents cts;
   2586    UInt nf_i;
   2587 
   2588    posn = uncook_die( cc, absori, &type_flag, &alt_flag);
   2589    if (type_flag)
   2590       cc->barf("get_inlFnName: uncooked absori in type debug info");
   2591 
   2592    /* LIMITATION: check we are in the same CU.
   2593       If not, return unknown inlined function name. */
   2594    /* if crossing between alt debug info<>normal info
   2595           or posn not in the cu range,
   2596       then it is in another CU. */
   2597    if (alt_flag != cc->is_alt_info
   2598        || posn < cc->cu_start_offset
   2599        || posn >= cc->cu_start_offset + cc->unit_length) {
   2600       static Bool reported = False;
   2601       if (!reported && VG_(clo_verbosity) > 1) {
   2602          VG_(message)(Vg_DebugMsg,
   2603                       "Warning: cross-CU LIMITATION: some inlined fn names\n"
   2604                       "might be shown as UnknownInlinedFun\n");
   2605          reported = True;
   2606       }
   2607       TRACE_D3(" <get_inlFnName><%lx>: cross-CU LIMITATION", posn);
   2608       return ML_(addStr)(cc->di, "UnknownInlinedFun", -1);
   2609    }
   2610 
   2611    init_Cursor (&c, cc->escn_debug_info, posn, cc->barf,
   2612                 "Overrun get_inlFnName absori");
   2613 
   2614    abbv_code = get_ULEB128( &c );
   2615    abbv      = get_abbv ( cc, abbv_code);
   2616    atag      = abbv->atag;
   2617    TRACE_D3(" <get_inlFnName><%lx>: Abbrev Number: %llu (%s)\n",
   2618             posn, abbv_code, ML_(pp_DW_TAG)( atag ) );
   2619 
   2620    if (atag == 0)
   2621       cc->barf("get_inlFnName: invalid zero tag on DIE");
   2622 
   2623    has_children = abbv->has_children;
   2624    if (has_children != DW_children_no && has_children != DW_children_yes)
   2625       cc->barf("get_inlFnName: invalid has_children value");
   2626 
   2627    if (atag != DW_TAG_subprogram)
   2628       cc->barf("get_inlFnName: absori not a subprogram");
   2629 
   2630    nf_i = 0;
   2631    while (True) {
   2632       DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
   2633       DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
   2634       nf_i++;
   2635       if (attr == 0 && form == 0) break;
   2636       get_Form_contents( &cts, cc, &c, False/*td3*/, form );
   2637       if (attr == DW_AT_name) {
   2638          HChar *fnname;
   2639          if (cts.szB >= 0)
   2640             cc->barf("get_inlFnName: expecting indirect string");
   2641          fnname = ML_(cur_read_strdup)( cts.u.cur,
   2642                                         "get_inlFnName.1" );
   2643          ret = ML_(addStr)(cc->di, fnname, -1);
   2644          ML_(dinfo_free) (fnname);
   2645          break; /* Name found, get out of the loop, as this has priority over
   2646                  DW_AT_specification. */
   2647       }
   2648       if (attr == DW_AT_specification) {
   2649          UWord cdie;
   2650 
   2651          if (cts.szB == 0)
   2652             cc->barf("get_inlFnName: AT specification missing");
   2653 
   2654          /* The recursive call to get_inlFnName will uncook its arg.
   2655             So, we need to cook it here, so as to reference the
   2656             correct section (e.g. the alt info). */
   2657          cdie = cook_die_using_form(cc, (UWord)cts.u.val, form);
   2658 
   2659          /* hoping that there is no loop */
   2660          ret = get_inlFnName (cdie, cc, td3);
   2661          /* Unclear if having both DW_AT_specification and DW_AT_name is
   2662             possible but in any case, we do not break here.
   2663             If we find later on a DW_AT_name, it will override the name found
   2664             in the DW_AT_specification.*/
   2665       }
   2666    }
   2667 
   2668    if (ret)
   2669       return ret;
   2670    else {
   2671       TRACE_D3("AbsOriFnNameNotFound");
   2672       return ML_(addStr)(cc->di, "AbsOriFnNameNotFound", -1);
   2673    }
   2674 }
   2675 
   2676 /* Returns True if the (possibly) childrens of the current DIE are interesting
   2677    to parse. Returns False otherwise.
   2678    If the current DIE has a sibling, the non interesting children can
   2679    maybe be skipped (if the DIE has a DW_AT_sibling).  */
   2680 __attribute__((noinline))
   2681 static Bool parse_inl_DIE (
   2682    /*MOD*/D3InlParser* parser,
   2683    DW_TAG dtag,
   2684    UWord posn,
   2685    Int level,
   2686    Cursor* c_die,
   2687    const g_abbv *abbv,
   2688    CUConst* cc,
   2689    Bool td3
   2690 )
   2691 {
   2692    FormContents cts;
   2693    UInt nf_i;
   2694 
   2695    UWord saved_die_c_offset  = get_position_of_Cursor( c_die );
   2696 
   2697    /* Get info about DW_TAG_compile_unit and DW_TAG_partial_unit 'which
   2698       in theory could also contain inlined fn calls).  */
   2699    if (dtag == DW_TAG_compile_unit || dtag == DW_TAG_partial_unit) {
   2700       Bool have_lo    = False;
   2701       Addr ip_lo    = 0;
   2702       const HChar *compdir = NULL;
   2703 
   2704       nf_i = 0;
   2705       while (True) {
   2706          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
   2707          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
   2708          nf_i++;
   2709          if (attr == 0 && form == 0) break;
   2710          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
   2711          if (attr == DW_AT_low_pc && cts.szB > 0) {
   2712             ip_lo   = cts.u.val;
   2713             have_lo = True;
   2714          }
   2715          if (attr == DW_AT_comp_dir) {
   2716             if (cts.szB >= 0)
   2717                cc->barf("parse_inl_DIE compdir: expecting indirect string");
   2718             HChar *str = ML_(cur_read_strdup)( cts.u.cur,
   2719                                                "parse_inl_DIE.compdir" );
   2720             compdir = ML_(addStr)(cc->di, str, -1);
   2721             ML_(dinfo_free) (str);
   2722          }
   2723          if (attr == DW_AT_stmt_list && cts.szB > 0) {
   2724             read_filename_table( parser->fndn_ix_Table, compdir,
   2725                                  cc, cts.u.val, td3 );
   2726          }
   2727          if (attr == DW_AT_sibling && cts.szB > 0) {
   2728             parser->sibling = cts.u.val;
   2729          }
   2730       }
   2731       if (level == 0)
   2732          setup_cu_svma (cc, have_lo, ip_lo, td3);
   2733    }
   2734 
   2735    if (dtag == DW_TAG_inlined_subroutine) {
   2736       Bool   have_lo    = False;
   2737       Bool   have_hi1   = False;
   2738       Bool   have_range = False;
   2739       Bool   hiIsRelative = False;
   2740       Addr   ip_lo      = 0;
   2741       Addr   ip_hi1     = 0;
   2742       Addr   rangeoff   = 0;
   2743       UInt   caller_fndn_ix = 0;
   2744       Int caller_lineno = 0;
   2745       Int inlinedfn_abstract_origin = 0;
   2746 
   2747       nf_i = 0;
   2748       while (True) {
   2749          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
   2750          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
   2751          nf_i++;
   2752          if (attr == 0 && form == 0) break;
   2753          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
   2754          if (attr == DW_AT_call_file && cts.szB > 0) {
   2755             Int ftabIx = (Int)cts.u.val;
   2756             if (ftabIx >= 1
   2757                 && ftabIx < VG_(sizeXA)( parser->fndn_ix_Table )) {
   2758                caller_fndn_ix = *(UInt*)
   2759                           VG_(indexXA)( parser->fndn_ix_Table, ftabIx );
   2760             }
   2761             if (0) VG_(printf)("XXX caller_fndn_ix = %u %s\n", caller_fndn_ix,
   2762                                ML_(fndn_ix2filename) (cc->di, caller_fndn_ix));
   2763          }
   2764          if (attr == DW_AT_call_line && cts.szB > 0) {
   2765             caller_lineno = cts.u.val;
   2766          }
   2767 
   2768          if (attr == DW_AT_abstract_origin  && cts.szB > 0) {
   2769             inlinedfn_abstract_origin
   2770                = cook_die_using_form (cc, (UWord)cts.u.val, form);
   2771          }
   2772 
   2773          if (attr == DW_AT_low_pc && cts.szB > 0) {
   2774             ip_lo   = cts.u.val;
   2775             have_lo = True;
   2776          }
   2777          if (attr == DW_AT_high_pc && cts.szB > 0) {
   2778             ip_hi1   = cts.u.val;
   2779             have_hi1 = True;
   2780             if (form != DW_FORM_addr)
   2781                hiIsRelative = True;
   2782          }
   2783          if (attr == DW_AT_ranges && cts.szB > 0) {
   2784             rangeoff   = cts.u.val;
   2785             have_range = True;
   2786          }
   2787          if (attr == DW_AT_sibling && cts.szB > 0) {
   2788             parser->sibling = cts.u.val;
   2789          }
   2790       }
   2791       if (have_lo && have_hi1 && hiIsRelative)
   2792          ip_hi1 += ip_lo;
   2793       /* Do we have something that looks sane? */
   2794       if (dtag == DW_TAG_inlined_subroutine
   2795           && (!have_lo) && (!have_hi1) && (!have_range)) {
   2796          /* Seems strange. How can an inlined subroutine have
   2797             no code ? */
   2798          goto_bad_DIE;
   2799       } else
   2800       if (have_lo && have_hi1 && (!have_range)) {
   2801          /* This inlined call is just a single address range. */
   2802          if (ip_lo < ip_hi1) {
   2803             /* Apply text debug biasing */
   2804             ip_lo += cc->di->text_debug_bias;
   2805             ip_hi1 += cc->di->text_debug_bias;
   2806             ML_(addInlInfo) (cc->di,
   2807                              ip_lo, ip_hi1,
   2808                              get_inlFnName (inlinedfn_abstract_origin, cc, td3),
   2809                              caller_fndn_ix,
   2810                              caller_lineno, level);
   2811          }
   2812       } else if (have_range) {
   2813          /* This inlined call is several address ranges. */
   2814          XArray *ranges;
   2815          Word j;
   2816          const HChar *inlfnname =
   2817             get_inlFnName (inlinedfn_abstract_origin, cc, td3);
   2818 
   2819          /* Ranges are biased for the inline info using the same logic
   2820             as what is used for biasing ranges for the var info, for which
   2821             ranges are read using cc->cu_svma (see parse_var_DIE).
   2822             Then text_debug_bias is added when a (non global) var
   2823             is recorded (see just before the call to ML_(addVar)) */
   2824          ranges = get_range_list( cc, td3,
   2825                                   rangeoff, cc->cu_svma );
   2826          for (j = 0; j < VG_(sizeXA)( ranges ); j++) {
   2827             AddrRange* range = (AddrRange*) VG_(indexXA)( ranges, j );
   2828             ML_(addInlInfo) (cc->di,
   2829                              range->aMin   + cc->di->text_debug_bias,
   2830                              range->aMax+1 + cc->di->text_debug_bias,
   2831                              // aMax+1 as range has its last bound included
   2832                              // while ML_(addInlInfo) expects last bound not
   2833                              // included.
   2834                              inlfnname,
   2835                              caller_fndn_ix,
   2836                              caller_lineno, level);
   2837          }
   2838          VG_(deleteXA)( ranges );
   2839       } else
   2840          goto_bad_DIE;
   2841    }
   2842 
   2843    // Only recursively parse the (possible) children for the DIE which
   2844    // might maybe contain a DW_TAG_inlined_subroutine:
   2845    return dtag == DW_TAG_lexical_block || dtag == DW_TAG_subprogram
   2846       || dtag == DW_TAG_inlined_subroutine
   2847       || dtag == DW_TAG_compile_unit || dtag == DW_TAG_partial_unit;
   2848 
   2849   bad_DIE:
   2850    dump_bad_die_and_barf("parse_inl_DIE", dtag, posn, level,
   2851                          c_die, saved_die_c_offset,
   2852                          abbv,
   2853                          cc);
   2854    /*NOTREACHED*/
   2855 }
   2856 
   2857 
   2858 /*------------------------------------------------------------*/
   2859 /*---                                                      ---*/
   2860 /*--- Parsing of type-related DIEs                         ---*/
   2861 /*---                                                      ---*/
   2862 /*------------------------------------------------------------*/
   2863 
   2864 typedef
   2865    struct {
   2866       /* What source language?  'A'=Ada83/95,
   2867                                 'C'=C/C++,
   2868                                 'F'=Fortran,
   2869                                 '?'=other
   2870          Established once per compilation unit. */
   2871       UChar language;
   2872       /* A stack of types which are currently under construction */
   2873       Int   sp; /* [sp] is innermost active entry; sp==-1 for empty
   2874                    stack */
   2875       Int   stack_size;
   2876       /* Note that the TyEnts in qparentE are temporary copies of the
   2877          ones accumulating in the main tyent array.  So it is not safe
   2878          to free up anything on them when popping them off the stack
   2879          (iow, it isn't safe to use TyEnt__make_EMPTY on them).  Just
   2880          memset them to zero when done. */
   2881       TyEnt *qparentE; /* parent TyEnts */
   2882       Int   *qlevel;
   2883    }
   2884    D3TypeParser;
   2885 
   2886 /* Completely initialise a type parser object */
   2887 static void
   2888 type_parser_init ( D3TypeParser *parser )
   2889 {
   2890    parser->sp = -1;
   2891    parser->language = '?';
   2892    parser->stack_size = 0;
   2893    parser->qparentE = NULL;
   2894    parser->qlevel   = NULL;
   2895 }
   2896 
   2897 /* Release any memory hanging off a type parser object */
   2898 static void
   2899 type_parser_release ( D3TypeParser *parser )
   2900 {
   2901    ML_(dinfo_free)( parser->qparentE );
   2902    ML_(dinfo_free)( parser->qlevel );
   2903 }
   2904 
   2905 static void typestack_show ( const D3TypeParser* parser, const HChar* str )
   2906 {
   2907    Word i;
   2908    VG_(printf)("  typestack (%s) {\n", str);
   2909    for (i = 0; i <= parser->sp; i++) {
   2910       VG_(printf)("    [%ld] (level %d): ", i, parser->qlevel[i]);
   2911       ML_(pp_TyEnt)( &parser->qparentE[i] );
   2912       VG_(printf)("\n");
   2913    }
   2914    VG_(printf)("  }\n");
   2915 }
   2916 
   2917 /* Remove from the stack, all entries with .level > 'level' */
   2918 static
   2919 void typestack_preen ( D3TypeParser* parser, Bool td3, Int level )
   2920 {
   2921    Bool changed = False;
   2922    vg_assert(parser->sp < parser->stack_size);
   2923    while (True) {
   2924       vg_assert(parser->sp >= -1);
   2925       if (parser->sp == -1) break;
   2926       if (parser->qlevel[parser->sp] <= level) break;
   2927       if (0)
   2928          TRACE_D3("BBBBAAAA typestack_pop [newsp=%d]\n", parser->sp-1);
   2929       vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
   2930       parser->sp--;
   2931       changed = True;
   2932    }
   2933    if (changed && td3)
   2934       typestack_show( parser, "after preen" );
   2935 }
   2936 
   2937 static Bool typestack_is_empty ( const D3TypeParser* parser )
   2938 {
   2939    vg_assert(parser->sp >= -1 && parser->sp < parser->stack_size);
   2940    return parser->sp == -1;
   2941 }
   2942 
   2943 static void typestack_push ( const CUConst* cc,
   2944                              D3TypeParser* parser,
   2945                              Bool td3,
   2946                              const TyEnt* parentE, Int level )
   2947 {
   2948    if (0)
   2949    TRACE_D3("BBBBAAAA typestack_push[newsp=%d]: %d  %05lx\n",
   2950             parser->sp+1, level, parentE->cuOff);
   2951 
   2952    /* First we need to zap everything >= 'level', as we are about to
   2953       replace any previous entry at 'level', so .. */
   2954    typestack_preen(parser, /*td3*/False, level-1);
   2955 
   2956    vg_assert(parser->sp >= -1);
   2957    vg_assert(parser->sp < parser->stack_size);
   2958    if (parser->sp == parser->stack_size - 1) {
   2959       parser->stack_size += 16;
   2960       parser->qparentE =
   2961          ML_(dinfo_realloc)("di.readdwarf3.typush.1", parser->qparentE,
   2962                             parser->stack_size * sizeof parser->qparentE[0]);
   2963       parser->qlevel =
   2964          ML_(dinfo_realloc)("di.readdwarf3.typush.2", parser->qlevel,
   2965                             parser->stack_size * sizeof parser->qlevel[0]);
   2966    }
   2967    if (parser->sp >= 0)
   2968       vg_assert(parser->qlevel[parser->sp] < level);
   2969    parser->sp++;
   2970    vg_assert(parentE);
   2971    vg_assert(ML_(TyEnt__is_type)(parentE));
   2972    vg_assert(parentE->cuOff != D3_INVALID_CUOFF);
   2973    parser->qparentE[parser->sp] = *parentE;
   2974    parser->qlevel[parser->sp]  = level;
   2975    if (TD3)
   2976       typestack_show( parser, "after push" );
   2977 }
   2978 
   2979 /* True if the subrange type being parsed gives the bounds of an array. */
   2980 static Bool subrange_type_denotes_array_bounds ( const D3TypeParser* parser,
   2981                                                  DW_TAG dtag ) {
   2982    vg_assert(dtag == DW_TAG_subrange_type);
   2983    /* For most languages, a subrange_type dtag always gives the
   2984       bounds of an array.
   2985       For Ada, there are additional conditions as a subrange_type
   2986       is also used for other purposes. */
   2987    if (parser->language != 'A')
   2988       /* not Ada, so it definitely denotes an array bound. */
   2989       return True;
   2990    else
   2991       /* Extra constraints for Ada: it only denotes an array bound if .. */
   2992       return (! typestack_is_empty(parser)
   2993               && parser->qparentE[parser->sp].tag == Te_TyArray);
   2994 }
   2995 
   2996 /* Parse a type-related DIE.  'parser' holds the current parser state.
   2997    'admin' is where the completed types are dumped.  'dtag' is the tag
   2998    for this DIE.  'c_die' points to the start of the data fields (FORM
   2999    stuff) for the DIE.  abbv is the parsed abbreviation which describe
   3000    the DIE.
   3001 
   3002    We may find the DIE uninteresting, in which case we should ignore
   3003    it.
   3004 
   3005    What happens: the DIE is examined.  If uninteresting, it is ignored.
   3006    Otherwise, the DIE gives rise to two things:
   3007 
   3008    (1) the offset of this DIE in the CU -- the cuOffset, a UWord
   3009    (2) a TyAdmin structure, which holds the type, or related stuff
   3010 
   3011    (2) is added at the end of 'tyadmins', at some index, say 'i'.
   3012 
   3013    A pair (cuOffset, i) is added to 'tydict'.
   3014 
   3015    Hence 'tyadmins' holds the actual type entities, and 'tydict' holds
   3016    a mapping from cuOffset to the index of the corresponding entry in
   3017    'tyadmin'.
   3018 
   3019    When resolving a cuOffset to a TyAdmin, first look up the cuOffset
   3020    in the tydict (by binary search).  This gives an index into
   3021    tyadmins, and the required entity lives in tyadmins at that index.
   3022 */
   3023 __attribute__((noinline))
   3024 static void parse_type_DIE ( /*MOD*/XArray* /* of TyEnt */ tyents,
   3025                              /*MOD*/D3TypeParser* parser,
   3026                              DW_TAG dtag,
   3027                              UWord posn,
   3028                              Int level,
   3029                              Cursor* c_die,
   3030                              const g_abbv *abbv,
   3031                              const CUConst* cc,
   3032                              Bool td3 )
   3033 {
   3034    FormContents cts;
   3035    UInt nf_i;
   3036    TyEnt typeE;
   3037    TyEnt atomE;
   3038    TyEnt fieldE;
   3039    TyEnt boundE;
   3040 
   3041    UWord saved_die_c_offset  = get_position_of_Cursor( c_die );
   3042 
   3043    VG_(memset)( &typeE,  0xAA, sizeof(typeE) );
   3044    VG_(memset)( &atomE,  0xAA, sizeof(atomE) );
   3045    VG_(memset)( &fieldE, 0xAA, sizeof(fieldE) );
   3046    VG_(memset)( &boundE, 0xAA, sizeof(boundE) );
   3047 
   3048    /* If we've returned to a level at or above any previously noted
   3049       parent, un-note it, so we don't believe we're still collecting
   3050       its children. */
   3051    typestack_preen( parser, td3, level-1 );
   3052 
   3053    if (dtag == DW_TAG_compile_unit
   3054        || dtag == DW_TAG_type_unit
   3055        || dtag == DW_TAG_partial_unit) {
   3056       /* See if we can find DW_AT_language, since it is important for
   3057          establishing array bounds (see DW_TAG_subrange_type below in
   3058          this fn) */
   3059       nf_i = 0;
   3060       while (True) {
   3061          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
   3062          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
   3063          nf_i++;
   3064          if (attr == 0 && form == 0) break;
   3065          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
   3066          if (attr != DW_AT_language)
   3067             continue;
   3068          if (cts.szB <= 0)
   3069            goto_bad_DIE;
   3070          switch (cts.u.val) {
   3071             case DW_LANG_C89: case DW_LANG_C:
   3072             case DW_LANG_C_plus_plus: case DW_LANG_ObjC:
   3073             case DW_LANG_ObjC_plus_plus: case DW_LANG_UPC:
   3074             case DW_LANG_Upc: case DW_LANG_C99: case DW_LANG_C11:
   3075             case DW_LANG_C_plus_plus_11: case DW_LANG_C_plus_plus_14:
   3076                parser->language = 'C'; break;
   3077             case DW_LANG_Fortran77: case DW_LANG_Fortran90:
   3078             case DW_LANG_Fortran95: case DW_LANG_Fortran03:
   3079             case DW_LANG_Fortran08:
   3080                parser->language = 'F'; break;
   3081             case DW_LANG_Ada83: case DW_LANG_Ada95:
   3082                parser->language = 'A'; break;
   3083             case DW_LANG_Cobol74:
   3084             case DW_LANG_Cobol85: case DW_LANG_Pascal83:
   3085             case DW_LANG_Modula2: case DW_LANG_Java:
   3086             case DW_LANG_PLI:
   3087             case DW_LANG_D: case DW_LANG_Python: case DW_LANG_Go:
   3088             case DW_LANG_Mips_Assembler:
   3089                parser->language = '?'; break;
   3090             default:
   3091                goto_bad_DIE;
   3092          }
   3093       }
   3094    }
   3095 
   3096    if (dtag == DW_TAG_base_type) {
   3097       /* We can pick up a new base type any time. */
   3098       VG_(memset)(&typeE, 0, sizeof(typeE));
   3099       typeE.cuOff = D3_INVALID_CUOFF;
   3100       typeE.tag   = Te_TyBase;
   3101       nf_i = 0;
   3102       while (True) {
   3103          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
   3104          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
   3105          nf_i++;
   3106          if (attr == 0 && form == 0) break;
   3107          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
   3108          if (attr == DW_AT_name && cts.szB < 0) {
   3109             typeE.Te.TyBase.name
   3110                = ML_(cur_read_strdup)( cts.u.cur,
   3111                                        "di.readdwarf3.ptD.base_type.1" );
   3112          }
   3113          if (attr == DW_AT_byte_size && cts.szB > 0) {
   3114             typeE.Te.TyBase.szB = cts.u.val;
   3115          }
   3116          if (attr == DW_AT_encoding && cts.szB > 0) {
   3117             switch (cts.u.val) {
   3118                case DW_ATE_unsigned: case DW_ATE_unsigned_char:
   3119                case DW_ATE_UTF: /* since DWARF4, e.g. char16_t from C++ */
   3120                case DW_ATE_boolean:/* FIXME - is this correct? */
   3121                case DW_ATE_unsigned_fixed:
   3122                   typeE.Te.TyBase.enc = 'U'; break;
   3123                case DW_ATE_signed: case DW_ATE_signed_char:
   3124                case DW_ATE_signed_fixed:
   3125                   typeE.Te.TyBase.enc = 'S'; break;
   3126                case DW_ATE_float:
   3127                   typeE.Te.TyBase.enc = 'F'; break;
   3128                case DW_ATE_complex_float:
   3129                   typeE.Te.TyBase.enc = 'C'; break;
   3130                default:
   3131                   goto_bad_DIE;
   3132             }
   3133          }
   3134       }
   3135 
   3136       /* Invent a name if it doesn't have one.  gcc-4.3
   3137          -ftree-vectorize is observed to emit nameless base types. */
   3138       if (!typeE.Te.TyBase.name)
   3139          typeE.Te.TyBase.name
   3140             = ML_(dinfo_strdup)( "di.readdwarf3.ptD.base_type.2",
   3141                                  "<anon_base_type>" );
   3142 
   3143       /* Do we have something that looks sane? */
   3144       if (/* must have a name */
   3145           typeE.Te.TyBase.name == NULL
   3146           /* and a plausible size.  Yes, really 32: "complex long
   3147              double" apparently has size=32 */
   3148           || typeE.Te.TyBase.szB < 0 || typeE.Te.TyBase.szB > 32
   3149           /* and a plausible encoding */
   3150           || (typeE.Te.TyBase.enc != 'U'
   3151               && typeE.Te.TyBase.enc != 'S'
   3152               && typeE.Te.TyBase.enc != 'F'
   3153               && typeE.Te.TyBase.enc != 'C'))
   3154          goto_bad_DIE;
   3155       /* Last minute hack: if we see this
   3156          <1><515>: DW_TAG_base_type
   3157              DW_AT_byte_size   : 0
   3158              DW_AT_encoding    : 5
   3159              DW_AT_name        : void
   3160          convert it into a real Void type. */
   3161       if (typeE.Te.TyBase.szB == 0
   3162           && 0 == VG_(strcmp)("void", typeE.Te.TyBase.name)) {
   3163          ML_(TyEnt__make_EMPTY)(&typeE);
   3164          typeE.tag = Te_TyVoid;
   3165          typeE.Te.TyVoid.isFake = False; /* it's a real one! */
   3166       }
   3167 
   3168       goto acquire_Type;
   3169    }
   3170 
   3171    /*
   3172     * An example of DW_TAG_rvalue_reference_type:
   3173     *
   3174     * $ readelf --debug-dump /usr/lib/debug/usr/lib/libstdc++.so.6.0.16.debug
   3175     *  <1><1014>: Abbrev Number: 55 (DW_TAG_rvalue_reference_type)
   3176     *     <1015>   DW_AT_byte_size   : 4
   3177     *     <1016>   DW_AT_type        : <0xe52>
   3178     */
   3179    if (dtag == DW_TAG_pointer_type || dtag == DW_TAG_reference_type
   3180        || dtag == DW_TAG_ptr_to_member_type
   3181        || dtag == DW_TAG_rvalue_reference_type) {
   3182       /* This seems legit for _pointer_type and _reference_type.  I
   3183          don't know if rolling _ptr_to_member_type in here really is
   3184          legit, but it's better than not handling it at all. */
   3185       VG_(memset)(&typeE, 0, sizeof(typeE));
   3186       typeE.cuOff = D3_INVALID_CUOFF;
   3187       switch (dtag) {
   3188       case DW_TAG_pointer_type:
   3189          typeE.tag = Te_TyPtr;
   3190          break;
   3191       case DW_TAG_reference_type:
   3192          typeE.tag = Te_TyRef;
   3193          break;
   3194       case DW_TAG_ptr_to_member_type:
   3195          typeE.tag = Te_TyPtrMbr;
   3196          break;
   3197       case DW_TAG_rvalue_reference_type:
   3198          typeE.tag = Te_TyRvalRef;
   3199          break;
   3200       default:
   3201          vg_assert(False);
   3202       }
   3203       /* target type defaults to void */
   3204       typeE.Te.TyPorR.typeR = D3_FAKEVOID_CUOFF;
   3205       /* These four type kinds don't *have* to specify their size, in
   3206          which case we assume it's a machine word.  But if they do
   3207          specify it, it must be a machine word :-)  This probably
   3208          assumes that the word size of the Dwarf3 we're reading is the
   3209          same size as that on the machine.  gcc appears to give a size
   3210          whereas icc9 doesn't. */
   3211       typeE.Te.TyPorR.szB = sizeof(UWord);
   3212       nf_i = 0;
   3213       while (True) {
   3214          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
   3215          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
   3216          nf_i++;
   3217          if (attr == 0 && form == 0) break;
   3218          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
   3219          if (attr == DW_AT_byte_size && cts.szB > 0) {
   3220             typeE.Te.TyPorR.szB = cts.u.val;
   3221          }
   3222          if (attr == DW_AT_type && cts.szB > 0) {
   3223             typeE.Te.TyPorR.typeR
   3224                = cook_die_using_form( cc, (UWord)cts.u.val, form );
   3225          }
   3226       }
   3227       /* Do we have something that looks sane? */
   3228       if (typeE.Te.TyPorR.szB != sizeof(UWord))
   3229          goto_bad_DIE;
   3230       else
   3231          goto acquire_Type;
   3232    }
   3233 
   3234    if (dtag == DW_TAG_enumeration_type) {
   3235       /* Create a new Type to hold the results. */
   3236       VG_(memset)(&typeE, 0, sizeof(typeE));
   3237       typeE.cuOff = posn;
   3238       typeE.tag   = Te_TyEnum;
   3239       Bool is_decl = False;
   3240       typeE.Te.TyEnum.atomRs
   3241          = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.enum_type.1",
   3242                        ML_(dinfo_free),
   3243                        sizeof(UWord) );
   3244       nf_i=0;
   3245       while (True) {
   3246          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
   3247          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
   3248          nf_i++;
   3249          if (attr == 0 && form == 0) break;
   3250          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
   3251          if (attr == DW_AT_name && cts.szB < 0) {
   3252             typeE.Te.TyEnum.name
   3253                = ML_(cur_read_strdup)( cts.u.cur,
   3254                                        "di.readdwarf3.pTD.enum_type.2" );
   3255          }
   3256          if (attr == DW_AT_byte_size && cts.szB > 0) {
   3257             typeE.Te.TyEnum.szB = cts.u.val;
   3258          }
   3259          if (attr == DW_AT_declaration) {
   3260             is_decl = True;
   3261          }
   3262       }
   3263 
   3264       if (!typeE.Te.TyEnum.name)
   3265          typeE.Te.TyEnum.name
   3266             = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enum_type.3",
   3267                                  "<anon_enum_type>" );
   3268 
   3269       /* Do we have something that looks sane? */
   3270       if (typeE.Te.TyEnum.szB == 0
   3271           /* we must know the size */
   3272           /* but not for Ada, which uses such dummy
   3273              enumerations as helper for gdb ada mode.
   3274              Also GCC allows incomplete enums as GNU extension.
   3275              http://gcc.gnu.org/onlinedocs/gcc/Incomplete-Enums.html
   3276              These are marked as DW_AT_declaration and won't have
   3277              a size. They can only be used in declaration or as
   3278              pointer types.  You can't allocate variables or storage
   3279              using such an enum type. (Also GCC seems to have a bug
   3280              that will put such an enumeration_type into a .debug_types
   3281              unit which should only contain complete types.) */
   3282           && (parser->language != 'A' && !is_decl)) {
   3283          goto_bad_DIE;
   3284       }
   3285 
   3286       /* On't stack! */
   3287       typestack_push( cc, parser, td3, &typeE, level );
   3288       goto acquire_Type;
   3289    }
   3290 
   3291    /* gcc (GCC) 4.4.0 20081017 (experimental) occasionally produces
   3292       DW_TAG_enumerator with only a DW_AT_name but no
   3293       DW_AT_const_value.  This is in violation of the Dwarf3 standard,
   3294       and appears to be a new "feature" of gcc - versions 4.3.x and
   3295       earlier do not appear to do this.  So accept DW_TAG_enumerator
   3296       which only have a name but no value.  An example:
   3297 
   3298       <1><180>: Abbrev Number: 6 (DW_TAG_enumeration_type)
   3299          <181>   DW_AT_name        : (indirect string, offset: 0xda70):
   3300                                      QtMsgType
   3301          <185>   DW_AT_byte_size   : 4
   3302          <186>   DW_AT_decl_file   : 14
   3303          <187>   DW_AT_decl_line   : 1480
   3304          <189>   DW_AT_sibling     : <0x1a7>
   3305       <2><18d>: Abbrev Number: 7 (DW_TAG_enumerator)
   3306          <18e>   DW_AT_name        : (indirect string, offset: 0x9e18):
   3307                                      QtDebugMsg
   3308       <2><192>: Abbrev Number: 7 (DW_TAG_enumerator)
   3309          <193>   DW_AT_name        : (indirect string, offset: 0x1505f):
   3310                                      QtWarningMsg
   3311       <2><197>: Abbrev Number: 7 (DW_TAG_enumerator)
   3312          <198>   DW_AT_name        : (indirect string, offset: 0x16f4a):
   3313                                      QtCriticalMsg
   3314       <2><19c>: Abbrev Number: 7 (DW_TAG_enumerator)
   3315          <19d>   DW_AT_name        : (indirect string, offset: 0x156dd):
   3316                                      QtFatalMsg
   3317       <2><1a1>: Abbrev Number: 7 (DW_TAG_enumerator)
   3318          <1a2>   DW_AT_name        : (indirect string, offset: 0x13660):
   3319                                      QtSystemMsg
   3320    */
   3321    if (dtag == DW_TAG_enumerator) {
   3322       VG_(memset)( &atomE, 0, sizeof(atomE) );
   3323       atomE.cuOff = posn;
   3324       atomE.tag   = Te_Atom;
   3325       nf_i = 0;
   3326       while (True) {
   3327          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
   3328          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
   3329          nf_i++;
   3330          if (attr == 0 && form == 0) break;
   3331          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
   3332          if (attr == DW_AT_name && cts.szB < 0) {
   3333             atomE.Te.Atom.name
   3334               = ML_(cur_read_strdup)( cts.u.cur,
   3335                                       "di.readdwarf3.pTD.enumerator.1" );
   3336          }
   3337          if (attr == DW_AT_const_value && cts.szB > 0) {
   3338             atomE.Te.Atom.value      = cts.u.val;
   3339             atomE.Te.Atom.valueKnown = True;
   3340          }
   3341       }
   3342       /* Do we have something that looks sane? */
   3343       if (atomE.Te.Atom.name == NULL)
   3344          goto_bad_DIE;
   3345       /* Do we have a plausible parent? */
   3346       if (typestack_is_empty(parser)) goto_bad_DIE;
   3347       vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
   3348       vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
   3349       if (level != parser->qlevel[parser->sp]+1) goto_bad_DIE;
   3350       if (parser->qparentE[parser->sp].tag != Te_TyEnum) goto_bad_DIE;
   3351       /* Record this child in the parent */
   3352       vg_assert(parser->qparentE[parser->sp].Te.TyEnum.atomRs);
   3353       VG_(addToXA)( parser->qparentE[parser->sp].Te.TyEnum.atomRs,
   3354                     &atomE );
   3355       /* And record the child itself */
   3356       goto acquire_Atom;
   3357    }
   3358 
   3359    /* Treat DW_TAG_class_type as if it was a DW_TAG_structure_type.  I
   3360       don't know if this is correct, but it at least makes this reader
   3361       usable for gcc-4.3 produced Dwarf3. */
   3362    if (dtag == DW_TAG_structure_type || dtag == DW_TAG_class_type
   3363        || dtag == DW_TAG_union_type) {
   3364       Bool have_szB = False;
   3365       Bool is_decl  = False;
   3366       Bool is_spec  = False;
   3367       /* Create a new Type to hold the results. */
   3368       VG_(memset)(&typeE, 0, sizeof(typeE));
   3369       typeE.cuOff = posn;
   3370       typeE.tag   = Te_TyStOrUn;
   3371       typeE.Te.TyStOrUn.name = NULL;
   3372       typeE.Te.TyStOrUn.typeR = D3_INVALID_CUOFF;
   3373       typeE.Te.TyStOrUn.fieldRs
   3374          = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.pTD.struct_type.1",
   3375                        ML_(dinfo_free),
   3376                        sizeof(UWord) );
   3377       typeE.Te.TyStOrUn.complete = True;
   3378       typeE.Te.TyStOrUn.isStruct = dtag == DW_TAG_structure_type
   3379                                    || dtag == DW_TAG_class_type;
   3380       nf_i = 0;
   3381       while (True) {
   3382          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
   3383          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
   3384          nf_i++;
   3385          if (attr == 0 && form == 0) break;
   3386          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
   3387          if (attr == DW_AT_name && cts.szB < 0) {
   3388             typeE.Te.TyStOrUn.name
   3389                = ML_(cur_read_strdup)( cts.u.cur,
   3390                                        "di.readdwarf3.ptD.struct_type.2" );
   3391          }
   3392          if (attr == DW_AT_byte_size && cts.szB >= 0) {
   3393             typeE.Te.TyStOrUn.szB = cts.u.val;
   3394             have_szB = True;
   3395          }
   3396          if (attr == DW_AT_declaration && cts.szB > 0 && cts.u.val > 0) {
   3397             is_decl = True;
   3398          }
   3399          if (attr == DW_AT_specification && cts.szB > 0 && cts.u.val > 0) {
   3400             is_spec = True;
   3401          }
   3402          if (attr == DW_AT_signature && form == DW_FORM_ref_sig8
   3403              && cts.szB > 0) {
   3404             have_szB = True;
   3405             typeE.Te.TyStOrUn.szB = 8;
   3406             typeE.Te.TyStOrUn.typeR
   3407                = cook_die_using_form( cc, (UWord)cts.u.val, form );
   3408          }
   3409       }
   3410       /* Do we have something that looks sane? */
   3411       if (is_decl && (!is_spec)) {
   3412          /* It's a DW_AT_declaration.  We require the name but
   3413             nothing else. */
   3414          /* JRS 2012-06-28: following discussion w/ tromey, if the
   3415             type doesn't have name, just make one up, and accept it.
   3416             It might be referred to by other DIEs, so ignoring it
   3417             doesn't seem like a safe option. */
   3418          if (typeE.Te.TyStOrUn.name == NULL)
   3419             typeE.Te.TyStOrUn.name
   3420                = ML_(dinfo_strdup)( "di.readdwarf3.ptD.struct_type.3",
   3421                                     "<anon_struct_type>" );
   3422          typeE.Te.TyStOrUn.complete = False;
   3423          /* JRS 2009 Aug 10: <possible kludge>? */
   3424          /* Push this tyent on the stack, even though it's incomplete.
   3425             It appears that gcc-4.4 on Fedora 11 will sometimes create
   3426             DW_TAG_member entries for it, and so we need to have a
   3427             plausible parent present in order for that to work.  See
   3428             #200029 comments 8 and 9. */
   3429          typestack_push( cc, parser, td3, &typeE, level );
   3430          /* </possible kludge> */
   3431          goto acquire_Type;
   3432       }
   3433       if ((!is_decl) /* && (!is_spec) */) {
   3434          /* this is the common, ordinary case */
   3435          /* The name can be present, or not */
   3436          if (!have_szB) {
   3437             /* We must know the size.
   3438                But in Ada, record with discriminants might have no size.
   3439                But in C, VLA in the middle of a struct (gcc extension)
   3440                might have no size.
   3441                Instead, some GNAT dwarf extensions and/or dwarf entries
   3442                allow to calculate the struct size at runtime.
   3443                We cannot do that (yet?) so, the temporary kludge is to use
   3444                a small size. */
   3445             typeE.Te.TyStOrUn.szB = 1;
   3446          }
   3447          /* On't stack! */
   3448          typestack_push( cc, parser, td3, &typeE, level );
   3449          goto acquire_Type;
   3450       }
   3451       else {
   3452          /* don't know how to handle any other variants just now */
   3453          goto_bad_DIE;
   3454       }
   3455    }
   3456 
   3457    if (dtag == DW_TAG_member) {
   3458       /* Acquire member entries for both DW_TAG_structure_type and
   3459          DW_TAG_union_type.  They differ minorly, in that struct
   3460          members must have a DW_AT_data_member_location expression
   3461          whereas union members must not. */
   3462       Bool parent_is_struct;
   3463       VG_(memset)( &fieldE, 0, sizeof(fieldE) );
   3464       fieldE.cuOff = posn;
   3465       fieldE.tag   = Te_Field;
   3466       fieldE.Te.Field.typeR = D3_INVALID_CUOFF;
   3467       nf_i = 0;
   3468       while (True) {
   3469          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
   3470          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
   3471          nf_i++;
   3472          if (attr == 0 && form == 0) break;
   3473          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
   3474          if (attr == DW_AT_name && cts.szB < 0) {
   3475             fieldE.Te.Field.name
   3476                = ML_(cur_read_strdup)( cts.u.cur,
   3477                                        "di.readdwarf3.ptD.member.1" );
   3478          }
   3479          if (attr == DW_AT_type && cts.szB > 0) {
   3480             fieldE.Te.Field.typeR
   3481                = cook_die_using_form( cc, (UWord)cts.u.val, form );
   3482          }
   3483          /* There are 2 different cases for DW_AT_data_member_location.
   3484             If it is a constant class attribute, it contains byte offset
   3485             from the beginning of the containing entity.
   3486             Otherwise it is a location expression.  */
   3487          if (attr == DW_AT_data_member_location && cts.szB > 0) {
   3488             fieldE.Te.Field.nLoc = -1;
   3489             fieldE.Te.Field.pos.offset = cts.u.val;
   3490          }
   3491          if (attr == DW_AT_data_member_location && cts.szB <= 0) {
   3492             fieldE.Te.Field.nLoc = (UWord)(-cts.szB);
   3493             fieldE.Te.Field.pos.loc
   3494                = ML_(cur_read_memdup)( cts.u.cur,
   3495                                        (SizeT)fieldE.Te.Field.nLoc,
   3496                                        "di.readdwarf3.ptD.member.2" );
   3497          }
   3498       }
   3499       /* Do we have a plausible parent? */
   3500       if (typestack_is_empty(parser)) goto_bad_DIE;
   3501       vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
   3502       vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
   3503       if (level != parser->qlevel[parser->sp]+1) goto_bad_DIE;
   3504       if (parser->qparentE[parser->sp].tag != Te_TyStOrUn) goto_bad_DIE;
   3505       /* Do we have something that looks sane?  If this a member of a
   3506          struct, we must have a location expression; but if a member
   3507          of a union that is irrelevant (D3 spec sec 5.6.6).  We ought
   3508          to reject in the latter case, but some compilers have been
   3509          observed to emit constant-zero expressions.  So just ignore
   3510          them. */
   3511       parent_is_struct
   3512          = parser->qparentE[parser->sp].Te.TyStOrUn.isStruct;
   3513       if (!fieldE.Te.Field.name)
   3514          fieldE.Te.Field.name
   3515             = ML_(dinfo_strdup)( "di.readdwarf3.ptD.member.3",
   3516                                  "<anon_field>" );
   3517       if (fieldE.Te.Field.typeR == D3_INVALID_CUOFF)
   3518          goto_bad_DIE;
   3519       if (fieldE.Te.Field.nLoc) {
   3520          if (!parent_is_struct) {
   3521             /* If this is a union type, pretend we haven't seen the data
   3522                member location expression, as it is by definition
   3523                redundant (it must be zero). */
   3524             if (fieldE.Te.Field.nLoc > 0)
   3525                ML_(dinfo_free)(fieldE.Te.Field.pos.loc);
   3526             fieldE.Te.Field.pos.loc = NULL;
   3527             fieldE.Te.Field.nLoc = 0;
   3528          }
   3529          /* Record this child in the parent */
   3530          fieldE.Te.Field.isStruct = parent_is_struct;
   3531          vg_assert(parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs);
   3532          VG_(addToXA)( parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs,
   3533                        &posn );
   3534          /* And record the child itself */
   3535          goto acquire_Field;
   3536       } else {
   3537          /* Member with no location - this can happen with static
   3538             const members in C++ code which are compile time constants
   3539             that do no exist in the class. They're not of any interest
   3540             to us so we ignore them. */
   3541          ML_(TyEnt__make_EMPTY)(&fieldE);
   3542       }
   3543    }
   3544 
   3545    if (dtag == DW_TAG_array_type) {
   3546       VG_(memset)(&typeE, 0, sizeof(typeE));
   3547       typeE.cuOff = posn;
   3548       typeE.tag   = Te_TyArray;
   3549       typeE.Te.TyArray.typeR = D3_INVALID_CUOFF;
   3550       typeE.Te.TyArray.boundRs
   3551          = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.array_type.1",
   3552                        ML_(dinfo_free),
   3553                        sizeof(UWord) );
   3554       nf_i = 0;
   3555       while (True) {
   3556          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
   3557          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
   3558          nf_i++;
   3559          if (attr == 0 && form == 0) break;
   3560          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
   3561          if (attr == DW_AT_type && cts.szB > 0) {
   3562             typeE.Te.TyArray.typeR
   3563                = cook_die_using_form( cc, (UWord)cts.u.val, form );
   3564          }
   3565       }
   3566       if (typeE.Te.TyArray.typeR == D3_INVALID_CUOFF)
   3567          goto_bad_DIE;
   3568       /* On't stack! */
   3569       typestack_push( cc, parser, td3, &typeE, level );
   3570       goto acquire_Type;
   3571    }
   3572 
   3573    /* this is a subrange type defining the bounds of an array. */
   3574    if (dtag == DW_TAG_subrange_type
   3575        && subrange_type_denotes_array_bounds(parser, dtag)) {
   3576       Bool have_lower = False;
   3577       Bool have_upper = False;
   3578       Bool have_count = False;
   3579       Long lower = 0;
   3580       Long upper = 0;
   3581 
   3582       switch (parser->language) {
   3583          case 'C': have_lower = True;  lower = 0; break;
   3584          case 'F': have_lower = True;  lower = 1; break;
   3585          case '?': have_lower = False; break;
   3586          case 'A': have_lower = False; break;
   3587          default:  vg_assert(0); /* assured us by handling of
   3588                                     DW_TAG_compile_unit in this fn */
   3589       }
   3590 
   3591       VG_(memset)( &boundE, 0, sizeof(boundE) );
   3592       boundE.cuOff = D3_INVALID_CUOFF;
   3593       boundE.tag   = Te_Bound;
   3594       nf_i = 0;
   3595       while (True) {
   3596          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
   3597          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
   3598          nf_i++;
   3599          if (attr == 0 && form == 0) break;
   3600          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
   3601          if (attr == DW_AT_lower_bound && cts.szB > 0) {
   3602             lower      = (Long)cts.u.val;
   3603             have_lower = True;
   3604          }
   3605          if (attr == DW_AT_upper_bound && cts.szB > 0) {
   3606             upper      = (Long)cts.u.val;
   3607             have_upper = True;
   3608          }
   3609          if (attr == DW_AT_count && cts.szB > 0) {
   3610             /*count    = (Long)cts.u.val;*/
   3611             have_count = True;
   3612          }
   3613       }
   3614       /* FIXME: potentially skip the rest if no parent present, since
   3615          it could be the case that this subrange type is free-standing
   3616          (not being used to describe the bounds of a containing array
   3617          type) */
   3618       /* Do we have a plausible parent? */
   3619       if (typestack_is_empty(parser)) goto_bad_DIE;
   3620       vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
   3621       vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
   3622       if (level != parser->qlevel[parser->sp]+1) goto_bad_DIE;
   3623       if (parser->qparentE[parser->sp].tag != Te_TyArray) goto_bad_DIE;
   3624 
   3625       /* Figure out if we have a definite range or not */
   3626       if (have_lower && have_upper && (!have_count)) {
   3627          boundE.Te.Bound.knownL = True;
   3628          boundE.Te.Bound.knownU = True;
   3629          boundE.Te.Bound.boundL = lower;
   3630          boundE.Te.Bound.boundU = upper;
   3631       }
   3632       else if (have_lower && (!have_upper) && (!have_count)) {
   3633          boundE.Te.Bound.knownL = True;
   3634          boundE.Te.Bound.knownU = False;
   3635          boundE.Te.Bound.boundL = lower;
   3636          boundE.Te.Bound.boundU = 0;
   3637       }
   3638       else if ((!have_lower) && have_upper && (!have_count)) {
   3639          boundE.Te.Bound.knownL = False;
   3640          boundE.Te.Bound.knownU = True;
   3641          boundE.Te.Bound.boundL = 0;
   3642          boundE.Te.Bound.boundU = upper;
   3643       }
   3644       else if ((!have_lower) && (!have_upper) && (!have_count)) {
   3645          boundE.Te.Bound.knownL = False;
   3646          boundE.Te.Bound.knownU = False;
   3647          boundE.Te.Bound.boundL = 0;
   3648          boundE.Te.Bound.boundU = 0;
   3649       } else {
   3650          /* FIXME: handle more cases */
   3651          goto_bad_DIE;
   3652       }
   3653 
   3654       /* Record this bound in the parent */
   3655       boundE.cuOff = posn;
   3656       vg_assert(parser->qparentE[parser->sp].Te.TyArray.boundRs);
   3657       VG_(addToXA)( parser->qparentE[parser->sp].Te.TyArray.boundRs,
   3658                     &boundE.cuOff );
   3659       /* And record the child itself */
   3660       goto acquire_Bound;
   3661    }
   3662 
   3663    /* typedef or subrange_type other than array bounds. */
   3664    if (dtag == DW_TAG_typedef
   3665        || (dtag == DW_TAG_subrange_type
   3666            && !subrange_type_denotes_array_bounds(parser, dtag))) {
   3667       /* subrange_type other than array bound is only for Ada. */
   3668       vg_assert (dtag == DW_TAG_typedef || parser->language == 'A');
   3669       /* We can pick up a new typedef/subrange_type any time. */
   3670       VG_(memset)(&typeE, 0, sizeof(typeE));
   3671       typeE.cuOff = D3_INVALID_CUOFF;
   3672       typeE.tag   = Te_TyTyDef;
   3673       typeE.Te.TyTyDef.name = NULL;
   3674       typeE.Te.TyTyDef.typeR = D3_INVALID_CUOFF;
   3675       nf_i = 0;
   3676       while (True) {
   3677          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
   3678          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
   3679          nf_i++;
   3680          if (attr == 0 && form == 0) break;
   3681          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
   3682          if (attr == DW_AT_name && cts.szB < 0) {
   3683             typeE.Te.TyTyDef.name
   3684                = ML_(cur_read_strdup)( cts.u.cur,
   3685                                        "di.readdwarf3.ptD.typedef.1" );
   3686          }
   3687          if (attr == DW_AT_type && cts.szB > 0) {
   3688             typeE.Te.TyTyDef.typeR
   3689                = cook_die_using_form( cc, (UWord)cts.u.val, form );
   3690          }
   3691       }
   3692       /* Do we have something that looks sane?
   3693          gcc gnat Ada generates minimal typedef
   3694          such as the below
   3695          <6><91cc>: DW_TAG_typedef
   3696             DW_AT_abstract_ori: <9066>
   3697          g++ for OMP can generate artificial functions that have
   3698          parameters that refer to pointers to unnamed typedefs.
   3699          See https://bugs.kde.org/show_bug.cgi?id=273475
   3700          So we cannot require a name for a DW_TAG_typedef.
   3701       */
   3702       goto acquire_Type;
   3703    }
   3704 
   3705    if (dtag == DW_TAG_subroutine_type) {
   3706       /* function type? just record that one fact and ask no
   3707          further questions. */
   3708       VG_(memset)(&typeE, 0, sizeof(typeE));
   3709       typeE.cuOff = D3_INVALID_CUOFF;
   3710       typeE.tag   = Te_TyFn;
   3711       goto acquire_Type;
   3712    }
   3713 
   3714    if (dtag == DW_TAG_volatile_type || dtag == DW_TAG_const_type
   3715        || dtag == DW_TAG_restrict_type) {
   3716       Int have_ty = 0;
   3717       VG_(memset)(&typeE, 0, sizeof(typeE));
   3718       typeE.cuOff = D3_INVALID_CUOFF;
   3719       typeE.tag   = Te_TyQual;
   3720       typeE.Te.TyQual.qual
   3721          = (dtag == DW_TAG_volatile_type ? 'V'
   3722             : (dtag == DW_TAG_const_type ? 'C' : 'R'));
   3723       /* target type defaults to 'void' */
   3724       typeE.Te.TyQual.typeR = D3_FAKEVOID_CUOFF;
   3725       nf_i = 0;
   3726       while (True) {
   3727          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
   3728          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
   3729          nf_i++;
   3730          if (attr == 0 && form == 0) break;
   3731          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
   3732          if (attr == DW_AT_type && cts.szB > 0) {
   3733             typeE.Te.TyQual.typeR
   3734                = cook_die_using_form( cc, (UWord)cts.u.val, form );
   3735             have_ty++;
   3736          }
   3737       }
   3738       /* gcc sometimes generates DW_TAG_const/volatile_type without
   3739          DW_AT_type and GDB appears to interpret the type as 'const
   3740          void' (resp. 'volatile void').  So just allow it .. */
   3741       if (have_ty == 1 || have_ty == 0)
   3742          goto acquire_Type;
   3743       else
   3744          goto_bad_DIE;
   3745    }
   3746 
   3747    /*
   3748     * Treat DW_TAG_unspecified_type as type void. An example of DW_TAG_unspecified_type:
   3749     *
   3750     * $ readelf --debug-dump /usr/lib/debug/usr/lib/libstdc++.so.6.0.16.debug
   3751     *  <1><10d4>: Abbrev Number: 53 (DW_TAG_unspecified_type)
   3752     *     <10d5>   DW_AT_name        : (indirect string, offset: 0xdb7): decltype(nullptr)
   3753     */
   3754    if (dtag == DW_TAG_unspecified_type) {
   3755       VG_(memset)(&typeE, 0, sizeof(typeE));
   3756       typeE.cuOff           = D3_INVALID_CUOFF;
   3757       typeE.tag             = Te_TyQual;
   3758       typeE.Te.TyQual.typeR = D3_FAKEVOID_CUOFF;
   3759       goto acquire_Type;
   3760    }
   3761 
   3762    /* else ignore this DIE */
   3763    return;
   3764    /*NOTREACHED*/
   3765 
   3766   acquire_Type:
   3767    if (0) VG_(printf)("YYYY Acquire Type\n");
   3768    vg_assert(ML_(TyEnt__is_type)( &typeE ));
   3769    vg_assert(typeE.cuOff == D3_INVALID_CUOFF || typeE.cuOff == posn);
   3770    typeE.cuOff = posn;
   3771    VG_(addToXA)( tyents, &typeE );
   3772    return;
   3773    /*NOTREACHED*/
   3774 
   3775   acquire_Atom:
   3776    if (0) VG_(printf)("YYYY Acquire Atom\n");
   3777    vg_assert(atomE.tag == Te_Atom);
   3778    vg_assert(atomE.cuOff == D3_INVALID_CUOFF || atomE.cuOff == posn);
   3779    atomE.cuOff = posn;
   3780    VG_(addToXA)( tyents, &atomE );
   3781    return;
   3782    /*NOTREACHED*/
   3783 
   3784   acquire_Field:
   3785    /* For union members, Expr should be absent */
   3786    if (0) VG_(printf)("YYYY Acquire Field\n");
   3787    vg_assert(fieldE.tag == Te_Field);
   3788    vg_assert(fieldE.Te.Field.nLoc <= 0 || fieldE.Te.Field.pos.loc != NULL);
   3789    vg_assert(fieldE.Te.Field.nLoc != 0 || fieldE.Te.Field.pos.loc == NULL);
   3790    if (fieldE.Te.Field.isStruct) {
   3791       vg_assert(fieldE.Te.Field.nLoc != 0);
   3792    } else {
   3793       vg_assert(fieldE.Te.Field.nLoc == 0);
   3794    }
   3795    vg_assert(fieldE.cuOff == D3_INVALID_CUOFF || fieldE.cuOff == posn);
   3796    fieldE.cuOff = posn;
   3797    VG_(addToXA)( tyents, &fieldE );
   3798    return;
   3799    /*NOTREACHED*/
   3800 
   3801   acquire_Bound:
   3802    if (0) VG_(printf)("YYYY Acquire Bound\n");
   3803    vg_assert(boundE.tag == Te_Bound);
   3804    vg_assert(boundE.cuOff == D3_INVALID_CUOFF || boundE.cuOff == posn);
   3805    boundE.cuOff = posn;
   3806    VG_(addToXA)( tyents, &boundE );
   3807    return;
   3808    /*NOTREACHED*/
   3809 
   3810   bad_DIE:
   3811    dump_bad_die_and_barf("parse_type_DIE", dtag, posn, level,
   3812                          c_die, saved_die_c_offset,
   3813                          abbv,
   3814                          cc);
   3815    /*NOTREACHED*/
   3816 }
   3817 
   3818 
   3819 /*------------------------------------------------------------*/
   3820 /*---                                                      ---*/
   3821 /*--- Compression of type DIE information                  ---*/
   3822 /*---                                                      ---*/
   3823 /*------------------------------------------------------------*/
   3824 
   3825 static UWord chase_cuOff ( Bool* changed,
   3826                            const XArray* /* of TyEnt */ ents,
   3827                            TyEntIndexCache* ents_cache,
   3828                            UWord cuOff )
   3829 {
   3830    TyEnt* ent;
   3831    ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache, cuOff );
   3832 
   3833    if (!ent) {
   3834       VG_(printf)("chase_cuOff: no entry for 0x%05lx\n", cuOff);
   3835       *changed = False;
   3836       return cuOff;
   3837    }
   3838 
   3839    vg_assert(ent->tag != Te_EMPTY);
   3840    if (ent->tag != Te_INDIR) {
   3841       *changed = False;
   3842       return cuOff;
   3843    } else {
   3844       vg_assert(ent->Te.INDIR.indR < cuOff);
   3845       *changed = True;
   3846       return ent->Te.INDIR.indR;
   3847    }
   3848 }
   3849 
   3850 static
   3851 void chase_cuOffs_in_XArray ( Bool* changed,
   3852                               const XArray* /* of TyEnt */ ents,
   3853                               TyEntIndexCache* ents_cache,
   3854                               /*MOD*/XArray* /* of UWord */ cuOffs )
   3855 {
   3856    Bool b2 = False;
   3857    Word i, n = VG_(sizeXA)( cuOffs );
   3858    for (i = 0; i < n; i++) {
   3859       Bool   b = False;
   3860       UWord* p = VG_(indexXA)( cuOffs, i );
   3861       *p = chase_cuOff( &b, ents, ents_cache, *p );
   3862       if (b)
   3863          b2 = True;
   3864    }
   3865    *changed = b2;
   3866 }
   3867 
   3868 static Bool TyEnt__subst_R_fields ( const XArray* /* of TyEnt */ ents,
   3869                                     TyEntIndexCache* ents_cache,
   3870                                     /*MOD*/TyEnt* te )
   3871 {
   3872    Bool b, changed = False;
   3873    switch (te->tag) {
   3874       case Te_EMPTY:
   3875          break;
   3876       case Te_INDIR:
   3877          te->Te.INDIR.indR
   3878             = chase_cuOff( &b, ents, ents_cache, te->Te.INDIR.indR );
   3879          if (b) changed = True;
   3880          break;
   3881       case Te_UNKNOWN:
   3882          break;
   3883       case Te_Atom:
   3884          break;
   3885       case Te_Field:
   3886          te->Te.Field.typeR
   3887             = chase_cuOff( &b, ents, ents_cache, te->Te.Field.typeR );
   3888          if (b) changed = True;
   3889          break;
   3890       case Te_Bound:
   3891          break;
   3892       case Te_TyBase:
   3893          break;
   3894       case Te_TyPtr:
   3895       case Te_TyRef:
   3896       case Te_TyPtrMbr:
   3897       case Te_TyRvalRef:
   3898          te->Te.TyPorR.typeR
   3899             = chase_cuOff( &b, ents, ents_cache, te->Te.TyPorR.typeR );
   3900          if (b) changed = True;
   3901          break;
   3902       case Te_TyTyDef:
   3903          te->Te.TyTyDef.typeR
   3904             = chase_cuOff( &b, ents, ents_cache, te->Te.TyTyDef.typeR );
   3905          if (b) changed = True;
   3906          break;
   3907       case Te_TyStOrUn:
   3908          chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyStOrUn.fieldRs );
   3909          if (b) changed = True;
   3910          break;
   3911       case Te_TyEnum:
   3912          chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyEnum.atomRs );
   3913          if (b) changed = True;
   3914          break;
   3915       case Te_TyArray:
   3916          te->Te.TyArray.typeR
   3917             = chase_cuOff( &b, ents, ents_cache, te->Te.TyArray.typeR );
   3918          if (b) changed = True;
   3919          chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyArray.boundRs );
   3920          if (b) changed = True;
   3921          break;
   3922       case Te_TyFn:
   3923          break;
   3924       case Te_TyQual:
   3925          te->Te.TyQual.typeR
   3926             = chase_cuOff( &b, ents, ents_cache, te->Te.TyQual.typeR );
   3927          if (b) changed = True;
   3928          break;
   3929       case Te_TyVoid:
   3930          break;
   3931       default:
   3932          ML_(pp_TyEnt)(te);
   3933          vg_assert(0);
   3934    }
   3935    return changed;
   3936 }
   3937 
   3938 /* Make a pass over 'ents'.  For each tyent, inspect the target of any
   3939    'R' or 'Rs' fields (those which refer to other tyents), and replace
   3940    any which point to INDIR nodes with the target of the indirection
   3941    (which should not itself be an indirection).  In summary, this
   3942    routine shorts out all references to indirection nodes. */
   3943 static
   3944 Word dedup_types_substitution_pass ( /*MOD*/XArray* /* of TyEnt */ ents,
   3945                                      TyEntIndexCache* ents_cache )
   3946 {
   3947    Word i, n, nChanged = 0;
   3948    Bool b;
   3949    n = VG_(sizeXA)( ents );
   3950    for (i = 0; i < n; i++) {
   3951       TyEnt* ent = VG_(indexXA)( ents, i );
   3952       vg_assert(ent->tag != Te_EMPTY);
   3953       /* We have to substitute everything, even indirections, so as to
   3954          ensure that chains of indirections don't build up. */
   3955       b = TyEnt__subst_R_fields( ents, ents_cache, ent );
   3956       if (b)
   3957          nChanged++;
   3958    }
   3959 
   3960    return nChanged;
   3961 }
   3962 
   3963 
   3964 /* Make a pass over 'ents', building a dictionary of TyEnts as we go.
   3965    Look up each new tyent in the dictionary in turn.  If it is already
   3966    in the dictionary, replace this tyent with an indirection to the
   3967    existing one, and delete any malloc'd stuff hanging off this one.
   3968    In summary, this routine commons up all tyents that are identical
   3969    as defined by TyEnt__cmp_by_all_except_cuOff. */
   3970 static
   3971 Word dedup_types_commoning_pass ( /*MOD*/XArray* /* of TyEnt */ ents )
   3972 {
   3973    Word    n, i, nDeleted;
   3974    WordFM* dict; /* TyEnt* -> void */
   3975    TyEnt*  ent;
   3976    UWord   keyW, valW;
   3977 
   3978    dict = VG_(newFM)(
   3979              ML_(dinfo_zalloc), "di.readdwarf3.dtcp.1",
   3980              ML_(dinfo_free),
   3981              (Word(*)(UWord,UWord)) ML_(TyEnt__cmp_by_all_except_cuOff)
   3982           );
   3983 
   3984    nDeleted = 0;
   3985    n = VG_(sizeXA)( ents );
   3986    for (i = 0; i < n; i++) {
   3987       ent = VG_(indexXA)( ents, i );
   3988       vg_assert(ent->tag != Te_EMPTY);
   3989 
   3990       /* Ignore indirections, although check that they are
   3991          not forming a cycle. */
   3992       if (ent->tag == Te_INDIR) {
   3993          vg_assert(ent->Te.INDIR.indR < ent->cuOff);
   3994          continue;
   3995       }
   3996 
   3997       keyW = valW = 0;
   3998       if (VG_(lookupFM)( dict, &keyW, &valW, (UWord)ent )) {
   3999          /* it's already in the dictionary. */
   4000          TyEnt* old = (TyEnt*)keyW;
   4001          vg_assert(valW == 0);
   4002          vg_assert(old != ent);
   4003          vg_assert(old->tag != Te_INDIR);
   4004          /* since we are traversing the array in increasing order of
   4005             cuOff: */
   4006          vg_assert(old->cuOff < ent->cuOff);
   4007          /* So anyway, dump this entry and replace it with an
   4008             indirection to the one in the dictionary.  Note that the
   4009             assertion above guarantees that we cannot create cycles of
   4010             indirections, since we are always creating an indirection
   4011             to a tyent with a cuOff lower than this one. */
   4012          ML_(TyEnt__make_EMPTY)( ent );
   4013          ent->tag = Te_INDIR;
   4014          ent->Te.INDIR.indR = old->cuOff;
   4015          nDeleted++;
   4016       } else {
   4017          /* not in dictionary; add it and keep going. */
   4018          VG_(addToFM)( dict, (UWord)ent, 0 );
   4019       }
   4020    }
   4021 
   4022    VG_(deleteFM)( dict, NULL, NULL );
   4023 
   4024    return nDeleted;
   4025 }
   4026 
   4027 
   4028 static
   4029 void dedup_types ( Bool td3,
   4030                    /*MOD*/XArray* /* of TyEnt */ ents,
   4031                    TyEntIndexCache* ents_cache )
   4032 {
   4033    Word m, n, i, nDel, nSubst, nThresh;
   4034    if (0) td3 = True;
   4035 
   4036    n = VG_(sizeXA)( ents );
   4037 
   4038    /* If a commoning pass and a substitution pass both make fewer than
   4039       this many changes, just stop.  It's pointless to burn up CPU
   4040       time trying to compress the last 1% or so out of the array. */
   4041    nThresh = n / 200;
   4042 
   4043    /* First we must sort .ents by its .cuOff fields, so we
   4044       can index into it. */
   4045    VG_(setCmpFnXA)( ents, (XACmpFn_t) ML_(TyEnt__cmp_by_cuOff_only) );
   4046    VG_(sortXA)( ents );
   4047 
   4048    /* Now repeatedly do commoning and substitution passes over
   4049       the array, until there are no more changes. */
   4050    do {
   4051       nDel   = dedup_types_commoning_pass ( ents );
   4052       nSubst = dedup_types_substitution_pass ( ents, ents_cache );
   4053       vg_assert(nDel >= 0 && nSubst >= 0);
   4054       TRACE_D3("   %ld deletions, %ld substitutions\n", nDel, nSubst);
   4055    } while (nDel > nThresh || nSubst > nThresh);
   4056 
   4057    /* Sanity check: all INDIR nodes should point at a non-INDIR thing.
   4058       In fact this should be true at the end of every loop iteration
   4059       above (a commoning pass followed by a substitution pass), but
   4060       checking it on every iteration is excessively expensive.  Note,
   4061       this loop also computes 'm' for the stats printing below it. */
   4062    m = 0;
   4063    n = VG_(sizeXA)( ents );
   4064    for (i = 0; i < n; i++) {
   4065       TyEnt *ent, *ind;
   4066       ent = VG_(indexXA)( ents, i );
   4067       if (ent->tag != Te_INDIR) continue;
   4068       m++;
   4069       ind = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
   4070                                          ent->Te.INDIR.indR );
   4071       vg_assert(ind);
   4072       vg_assert(ind->tag != Te_INDIR);
   4073    }
   4074 
   4075    TRACE_D3("Overall: %ld before, %ld after\n", n, n-m);
   4076 }
   4077 
   4078 
   4079 /*------------------------------------------------------------*/
   4080 /*---                                                      ---*/
   4081 /*--- Resolution of references to type DIEs                ---*/
   4082 /*---                                                      ---*/
   4083 /*------------------------------------------------------------*/
   4084 
   4085 /* Make a pass through the (temporary) variables array.  Examine the
   4086    type of each variable, check is it found, and chase any Te_INDIRs.
   4087    Postcondition is: each variable has a typeR field that refers to a
   4088    valid type in tyents, or a Te_UNKNOWN, and is certainly guaranteed
   4089    not to refer to a Te_INDIR.  (This is so that we can throw all the
   4090    Te_INDIRs away later). */
   4091 
   4092 __attribute__((noinline))
   4093 static void resolve_variable_types (
   4094                void (*barf)( const HChar* ) __attribute__((noreturn)),
   4095                /*R-O*/XArray* /* of TyEnt */ ents,
   4096                /*MOD*/TyEntIndexCache* ents_cache,
   4097                /*MOD*/XArray* /* of TempVar* */ vars
   4098             )
   4099 {
   4100    Word i, n;
   4101    n = VG_(sizeXA)( vars );
   4102    for (i = 0; i < n; i++) {
   4103       TempVar* var = *(TempVar**)VG_(indexXA)( vars, i );
   4104       /* This is the stated type of the variable.  But it might be
   4105          an indirection, so be careful. */
   4106       TyEnt* ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
   4107                                                 var->typeR );
   4108       if (ent && ent->tag == Te_INDIR) {
   4109          ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
   4110                                             ent->Te.INDIR.indR );
   4111          vg_assert(ent);
   4112          vg_assert(ent->tag != Te_INDIR);
   4113       }
   4114 
   4115       /* Deal first with "normal" cases */
   4116       if (ent && ML_(TyEnt__is_type)(ent)) {
   4117          var->typeR = ent->cuOff;
   4118          continue;
   4119       }
   4120 
   4121       /* If there's no ent, it probably we did not manage to read a
   4122          type at the cuOffset which is stated as being this variable's
   4123          type.  Maybe a deficiency in parse_type_DIE.  Complain. */
   4124       if (ent == NULL) {
   4125          VG_(printf)("\n: Invalid cuOff = 0x%05lx\n", var->typeR );
   4126          barf("resolve_variable_types: "
   4127               "cuOff does not refer to a known type");
   4128       }
   4129       vg_assert(ent);
   4130       /* If ent has any other tag, something bad happened, along the
   4131          lines of var->typeR not referring to a type at all. */
   4132       vg_assert(ent->tag == Te_UNKNOWN);
   4133       /* Just accept it; the type will be useless, but at least keep
   4134          going. */
   4135       var->typeR = ent->cuOff;
   4136    }
   4137 }
   4138 
   4139 
   4140 /*------------------------------------------------------------*/
   4141 /*---                                                      ---*/
   4142 /*--- Parsing of Compilation Units                         ---*/
   4143 /*---                                                      ---*/
   4144 /*------------------------------------------------------------*/
   4145 
   4146 static Int cmp_TempVar_by_dioff ( const void* v1, const void* v2 ) {
   4147    const TempVar* t1 = *(const TempVar *const *)v1;
   4148    const TempVar* t2 = *(const TempVar *const *)v2;
   4149    if (t1->dioff < t2->dioff) return -1;
   4150    if (t1->dioff > t2->dioff) return 1;
   4151    return 0;
   4152 }
   4153 
   4154 static void read_DIE (
   4155    /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree,
   4156    /*MOD*/XArray* /* of TyEnt */ tyents,
   4157    /*MOD*/XArray* /* of TempVar* */ tempvars,
   4158    /*MOD*/XArray* /* of GExpr* */ gexprs,
   4159    /*MOD*/D3TypeParser* typarser,
   4160    /*MOD*/D3VarParser* varparser,
   4161    /*MOD*/D3InlParser* inlparser,
   4162    Cursor* c, Bool td3, CUConst* cc, Int level
   4163 )
   4164 {
   4165    const g_abbv *abbv;
   4166    ULong  atag, abbv_code;
   4167    UWord  posn;
   4168    UInt   has_children;
   4169    UWord  start_die_c_offset;
   4170    UWord  after_die_c_offset;
   4171    // If the DIE we will parse has a sibling and the parser(s) are
   4172    // all indicating that parse_children is not necessary, then
   4173    // we will skip the children by jumping to the sibling of this DIE
   4174    // (if it has a sibling).
   4175    UWord  sibling = 0;
   4176    Bool   parse_children = False;
   4177 
   4178    /* --- Deal with this DIE --- */
   4179    posn      = cook_die( cc, get_position_of_Cursor( c ) );
   4180    abbv_code = get_ULEB128( c );
   4181    abbv = get_abbv(cc, abbv_code);
   4182    atag      = abbv->atag;
   4183 
   4184    if (TD3) {
   4185       TRACE_D3("\n");
   4186       trace_DIE ((DW_TAG)atag, posn, level,
   4187                  get_position_of_Cursor( c ), abbv, cc);
   4188    }
   4189 
   4190    if (atag == 0)
   4191       cc->barf("read_DIE: invalid zero tag on DIE");
   4192 
   4193    has_children = abbv->has_children;
   4194    if (has_children != DW_children_no && has_children != DW_children_yes)
   4195       cc->barf("read_DIE: invalid has_children value");
   4196 
   4197    /* We're set up to look at the fields of this DIE.  Hand it off to
   4198       any parser(s) that want to see it.  Since they will in general
   4199       advance the DIE cursor, remember the current settings so that we
   4200       can then back up. . */
   4201    start_die_c_offset  = get_position_of_Cursor( c );
   4202    after_die_c_offset  = 0; // set to c position if a parser has read the DIE.
   4203 
   4204    if (VG_(clo_read_var_info)) {
   4205       parse_type_DIE( tyents,
   4206                       typarser,
   4207                       (DW_TAG)atag,
   4208                       posn,
   4209                       level,
   4210                       c,     /* DIE cursor */
   4211                       abbv,  /* abbrev */
   4212                       cc,
   4213                       td3 );
   4214       if (get_position_of_Cursor( c ) != start_die_c_offset) {
   4215          after_die_c_offset = get_position_of_Cursor( c );
   4216          set_position_of_Cursor( c, start_die_c_offset );
   4217       }
   4218 
   4219       parse_var_DIE( rangestree,
   4220                      tempvars,
   4221                      gexprs,
   4222                      varparser,
   4223                      (DW_TAG)atag,
   4224                      posn,
   4225                      level,
   4226                      c,     /* DIE cursor */
   4227                      abbv,  /* abbrev */
   4228                      cc,
   4229                      td3 );
   4230       if (get_position_of_Cursor( c ) != start_die_c_offset) {
   4231          after_die_c_offset = get_position_of_Cursor( c );
   4232          set_position_of_Cursor( c, start_die_c_offset );
   4233       }
   4234 
   4235       parse_children = True;
   4236       // type and var parsers do not have logic to skip childrens and establish
   4237       // the value of sibling.
   4238    }
   4239 
   4240    if (VG_(clo_read_inline_info)) {
   4241       inlparser->sibling = 0;
   4242       parse_children =
   4243          parse_inl_DIE( inlparser,
   4244                         (DW_TAG)atag,
   4245                         posn,
   4246                         level,
   4247                         c,     /* DIE cursor */
   4248                         abbv, /* abbrev */
   4249                         cc,
   4250                         td3 )
   4251          || parse_children;
   4252       if (get_position_of_Cursor( c ) != start_die_c_offset) {
   4253          after_die_c_offset = get_position_of_Cursor( c );
   4254          // Last parser, no need to reset the cursor to start_die_c_offset.
   4255       }
   4256       if (sibling == 0)
   4257          sibling = inlparser->sibling;
   4258       vg_assert (inlparser->sibling == 0 || inlparser->sibling == sibling);
   4259    }
   4260 
   4261    if (after_die_c_offset > 0) {
   4262       // DIE was read by a parser above, so we know where the DIE ends.
   4263       set_position_of_Cursor( c, after_die_c_offset );
   4264    } else {
   4265       /* No parser has parsed this DIE. So, we need to skip the DIE,
   4266          in order to read the next DIE.
   4267          At the same time, establish sibling value if the DIE has one. */
   4268       TRACE_D3("    uninteresting DIE -> skipping ...\n");
   4269       skip_DIE (&sibling, c, abbv, cc);
   4270    }
   4271 
   4272    /* --- Now recurse into its children, if any
   4273       and the parsing of the children is requested by a parser --- */
   4274    if (has_children == DW_children_yes) {
   4275       if (parse_children || sibling == 0) {
   4276          if (0) TRACE_D3("BEGIN children of level %d\n", level);
   4277          while (True) {
   4278             atag = peek_ULEB128( c );
   4279             if (atag == 0) break;
   4280             read_DIE( rangestree, tyents, tempvars, gexprs,
   4281                       typarser, varparser, inlparser,
   4282                       c, td3, cc, level+1 );
   4283          }
   4284          /* Now we need to eat the terminating zero */
   4285          atag = get_ULEB128( c );
   4286          vg_assert(atag == 0);
   4287          if (0) TRACE_D3("END children of level %d\n", level);
   4288       } else {
   4289          // We can skip the childrens, by jumping to the sibling
   4290          TRACE_D3("    SKIPPING DIE's children,"
   4291                   "jumping to sibling <%d><%lx>\n",
   4292                   level, sibling);
   4293          set_position_of_Cursor( c, sibling );
   4294       }
   4295    }
   4296 
   4297 }
   4298 
   4299 static void trace_debug_loc (const DebugInfo* di,
   4300                              __attribute__((noreturn)) void (*barf)( const HChar* ),
   4301                              DiSlice escn_debug_loc)
   4302 {
   4303 #if 0
   4304    /* This doesn't work properly because it assumes all entries are
   4305       packed end to end, with no holes.  But that doesn't always
   4306       appear to be the case, so it loses sync.  And the D3 spec
   4307       doesn't appear to require a no-hole situation either. */
   4308    /* Display .debug_loc */
   4309    Addr  dl_base;
   4310    UWord dl_offset;
   4311    Cursor loc; /* for showing .debug_loc */
   4312    Bool td3 = di->trace_symtab;
   4313 
   4314    TRACE_SYMTAB("\n");
   4315    TRACE_SYMTAB("\n------ The contents of .debug_loc ------\n");
   4316    TRACE_SYMTAB("    Offset   Begin    End      Expression\n");
   4317    if (ML_(sli_is_valid)(escn_debug_loc)) {
   4318       init_Cursor( &loc, escn_debug_loc, 0, barf,
   4319                    "Overrun whilst reading .debug_loc section(1)" );
   4320       dl_base = 0;
   4321       dl_offset = 0;
   4322       while (True) {
   4323          UWord  w1, w2;
   4324          UWord  len;
   4325          if (is_at_end_Cursor( &loc ))
   4326             break;
   4327 
   4328          /* Read a (host-)word pair.  This is something of a hack since
   4329             the word size to read is really dictated by the ELF file;
   4330             however, we assume we're reading a file with the same
   4331             word-sizeness as the host.  Reasonably enough. */
   4332          w1 = get_UWord( &loc );
   4333          w2 = get_UWord( &loc );
   4334 
   4335          if (w1 == 0 && w2 == 0) {
   4336             /* end of list.  reset 'base' */
   4337             TRACE_D3("    %08lx <End of list>\n", dl_offset);
   4338             dl_base = 0;
   4339             dl_offset = get_position_of_Cursor( &loc );
   4340             continue;
   4341          }
   4342 
   4343          if (w1 == -1UL) {
   4344             /* new value for 'base' */
   4345             TRACE_D3("    %08lx %16lx %08lx (base address)\n",
   4346                      dl_offset, w1, w2);
   4347             dl_base = w2;
   4348             continue;
   4349          }
   4350 
   4351          /* else a location expression follows */
   4352          TRACE_D3("    %08lx %08lx %08lx ",
   4353                   dl_offset, w1 + dl_base, w2 + dl_base);
   4354          len = (UWord)get_UShort( &loc );
   4355          while (len > 0) {
   4356             UChar byte = get_UChar( &loc );
   4357             TRACE_D3("%02x", (UInt)byte);
   4358             len--;
   4359          }
   4360          TRACE_SYMTAB("\n");
   4361       }
   4362    }
   4363 #endif
   4364 }
   4365 
   4366 static void trace_debug_ranges (const DebugInfo* di,
   4367                                 __attribute__((noreturn)) void (*barf)( const HChar* ),
   4368                                 DiSlice escn_debug_ranges)
   4369 {
   4370    Cursor ranges; /* for showing .debug_ranges */
   4371    Addr  dr_base;
   4372    UWord dr_offset;
   4373    Bool td3 = di->trace_symtab;
   4374 
   4375    /* Display .debug_ranges */
   4376    TRACE_SYMTAB("\n");
   4377    TRACE_SYMTAB("\n------ The contents of .debug_ranges ------\n");
   4378    TRACE_SYMTAB("    Offset   Begin    End\n");
   4379    if (ML_(sli_is_valid)(escn_debug_ranges)) {
   4380       init_Cursor( &ranges, escn_debug_ranges, 0, barf,
   4381                    "Overrun whilst reading .debug_ranges section(1)" );
   4382       dr_base = 0;
   4383       dr_offset = 0;
   4384       while (True) {
   4385          UWord  w1, w2;
   4386 
   4387          if (is_at_end_Cursor( &ranges ))
   4388             break;
   4389 
   4390          /* Read a (host-)word pair.  This is something of a hack since
   4391             the word size to read is really dictated by the ELF file;
   4392             however, we assume we're reading a file with the same
   4393             word-sizeness as the host.  Reasonably enough. */
   4394          w1 = get_UWord( &ranges );
   4395          w2 = get_UWord( &ranges );
   4396 
   4397          if (w1 == 0 && w2 == 0) {
   4398             /* end of list.  reset 'base' */
   4399             TRACE_D3("    %08lx <End of list>\n", dr_offset);
   4400             dr_base = 0;
   4401             dr_offset = get_position_of_Cursor( &ranges );
   4402             continue;
   4403          }
   4404 
   4405          if (w1 == -1UL) {
   4406             /* new value for 'base' */
   4407             TRACE_D3("    %08lx %16lx %08lx (base address)\n",
   4408                      dr_offset, w1, w2);
   4409             dr_base = w2;
   4410             continue;
   4411          }
   4412 
   4413          /* else a range [w1+base, w2+base) is denoted */
   4414          TRACE_D3("    %08lx %08lx %08lx\n",
   4415                   dr_offset, w1 + dr_base, w2 + dr_base);
   4416       }
   4417    }
   4418 }
   4419 
   4420 static void trace_debug_abbrev (const DebugInfo* di,
   4421                                 __attribute__((noreturn)) void (*barf)( const HChar* ),
   4422                                 DiSlice escn_debug_abbv)
   4423 {
   4424    Cursor abbv; /* for showing .debug_abbrev */
   4425    Bool td3 = di->trace_symtab;
   4426 
   4427    /* Display .debug_abbrev */
   4428    TRACE_SYMTAB("\n");
   4429    TRACE_SYMTAB("\n------ The contents of .debug_abbrev ------\n");
   4430    if (ML_(sli_is_valid)(escn_debug_abbv)) {
   4431       init_Cursor( &abbv, escn_debug_abbv, 0, barf,
   4432                    "Overrun whilst reading .debug_abbrev section" );
   4433       while (True) {
   4434          if (is_at_end_Cursor( &abbv ))
   4435             break;
   4436          /* Read one abbreviation table */
   4437          TRACE_D3("  Number TAG\n");
   4438          while (True) {
   4439             ULong atag;
   4440             UInt  has_children;
   4441             ULong acode = get_ULEB128( &abbv );
   4442             if (acode == 0) break; /* end of the table */
   4443             atag = get_ULEB128( &abbv );
   4444             has_children = get_UChar( &abbv );
   4445             TRACE_D3("   %llu      %s    [%s]\n",
   4446                      acode, ML_(pp_DW_TAG)(atag),
   4447                             ML_(pp_DW_children)(has_children));
   4448             while (True) {
   4449                ULong at_name = get_ULEB128( &abbv );
   4450                ULong at_form = get_ULEB128( &abbv );
   4451                if (at_name == 0 && at_form == 0) break;
   4452                TRACE_D3("    %-18s %s\n",
   4453                         ML_(pp_DW_AT)(at_name), ML_(pp_DW_FORM)(at_form));
   4454             }
   4455          }
   4456       }
   4457    }
   4458 }
   4459 
   4460 static
   4461 void new_dwarf3_reader_wrk (
   4462    DebugInfo* di,
   4463    __attribute__((noreturn)) void (*barf)( const HChar* ),
   4464    DiSlice escn_debug_info,      DiSlice escn_debug_types,
   4465    DiSlice escn_debug_abbv,      DiSlice escn_debug_line,
   4466    DiSlice escn_debug_str,       DiSlice escn_debug_ranges,
   4467    DiSlice escn_debug_loc,       DiSlice escn_debug_info_alt,
   4468    DiSlice escn_debug_abbv_alt,  DiSlice escn_debug_line_alt,
   4469    DiSlice escn_debug_str_alt
   4470 )
   4471 {
   4472    XArray* /* of TyEnt */     tyents = NULL;
   4473    XArray* /* of TyEnt */     tyents_to_keep = NULL;
   4474    XArray* /* of GExpr* */    gexprs = NULL;
   4475    XArray* /* of TempVar* */  tempvars = NULL;
   4476    WordFM* /* of (XArray* of AddrRange, void) */ rangestree = NULL;
   4477    TyEntIndexCache* tyents_cache = NULL;
   4478    TyEntIndexCache* tyents_to_keep_cache = NULL;
   4479    TempVar *varp, *varp2;
   4480    GExpr* gexpr;
   4481    Cursor info; /* primary cursor for parsing .debug_info */
   4482    D3TypeParser typarser;
   4483    D3VarParser varparser;
   4484    D3InlParser inlparser;
   4485    Word  i, j, n;
   4486    Bool td3 = di->trace_symtab;
   4487    XArray* /* of TempVar* */ dioff_lookup_tab;
   4488    Int pass;
   4489    VgHashTable *signature_types = NULL;
   4490 
   4491    /* Display/trace various information, if requested. */
   4492    if (TD3) {
   4493       trace_debug_loc    (di, barf, escn_debug_loc);
   4494       trace_debug_ranges (di, barf, escn_debug_ranges);
   4495       trace_debug_abbrev (di, barf, escn_debug_abbv);
   4496       TRACE_SYMTAB("\n");
   4497    }
   4498 
   4499    /* Zero out all parsers. Parsers will really be initialised
   4500       according to VG_(clo_read_*_info). */
   4501    VG_(memset)( &inlparser, 0, sizeof(inlparser) );
   4502 
   4503    if (VG_(clo_read_var_info)) {
   4504       /* We'll park the harvested type information in here.  Also create
   4505          a fake "void" entry with offset D3_FAKEVOID_CUOFF, so we always
   4506          have at least one type entry to refer to.  D3_FAKEVOID_CUOFF is
   4507          huge and presumably will not occur in any valid DWARF3 file --
   4508          it would need to have a .debug_info section 4GB long for that to
   4509          happen.  These type entries end up in the DebugInfo. */
   4510       tyents = VG_(newXA)( ML_(dinfo_zalloc),
   4511                            "di.readdwarf3.ndrw.1 (TyEnt temp array)",
   4512                            ML_(dinfo_free), sizeof(TyEnt) );
   4513       { TyEnt tyent;
   4514         VG_(memset)(&tyent, 0, sizeof(tyent));
   4515         tyent.tag   = Te_TyVoid;
   4516         tyent.cuOff = D3_FAKEVOID_CUOFF;
   4517         tyent.Te.TyVoid.isFake = True;
   4518         VG_(addToXA)( tyents, &tyent );
   4519       }
   4520       { TyEnt tyent;
   4521         VG_(memset)(&tyent, 0, sizeof(tyent));
   4522         tyent.tag   = Te_UNKNOWN;
   4523         tyent.cuOff = D3_INVALID_CUOFF;
   4524         VG_(addToXA)( tyents, &tyent );
   4525       }
   4526 
   4527       /* This is a tree used to unique-ify the range lists that are
   4528          manufactured by parse_var_DIE.  References to the keys in the
   4529          tree wind up in .rngMany fields in TempVars.  We'll need to
   4530          delete this tree, and the XArrays attached to it, at the end of
   4531          this function. */
   4532       rangestree = VG_(newFM)( ML_(dinfo_zalloc),
   4533                                "di.readdwarf3.ndrw.2 (rangestree)",
   4534                                ML_(dinfo_free),
   4535                                (Word(*)(UWord,UWord))cmp__XArrays_of_AddrRange );
   4536 
   4537       /* List of variables we're accumulating.  These don't end up in the
   4538          DebugInfo; instead their contents are handed to ML_(addVar) and
   4539          the list elements are then deleted. */
   4540       tempvars = VG_(newXA)( ML_(dinfo_zalloc),
   4541                              "di.readdwarf3.ndrw.3 (TempVar*s array)",
   4542                              ML_(dinfo_free),
   4543                              sizeof(TempVar*) );
   4544 
   4545       /* List of GExprs we're accumulating.  These wind up in the
   4546          DebugInfo. */
   4547       gexprs = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.4",
   4548                            ML_(dinfo_free), sizeof(GExpr*) );
   4549 
   4550       /* We need a D3TypeParser to keep track of partially constructed
   4551          types.  It'll be discarded as soon as we've completed the CU,
   4552          since the resulting information is tipped in to 'tyents' as it
   4553          is generated. */
   4554       type_parser_init(&typarser);
   4555 
   4556       var_parser_init(&varparser);
   4557 
   4558       signature_types = VG_(HT_construct) ("signature_types");
   4559    }
   4560 
   4561    /* Do an initial pass to scan the .debug_types section, if any, and
   4562       fill in the signatured types hash table.  This lets us handle
   4563       mapping from a type signature to a (cooked) DIE offset directly
   4564       in get_Form_contents.  */
   4565    if (VG_(clo_read_var_info) && ML_(sli_is_valid)(escn_debug_types)) {
   4566       init_Cursor( &info, escn_debug_types, 0, barf,
   4567                    "Overrun whilst reading .debug_types section" );
   4568       TRACE_D3("\n------ Collecting signatures from "
   4569                ".debug_types section ------\n");
   4570 
   4571       while (True) {
   4572          UWord   cu_start_offset, cu_offset_now;
   4573          CUConst cc;
   4574 
   4575          cu_start_offset = get_position_of_Cursor( &info );
   4576          TRACE_D3("\n");
   4577          TRACE_D3("  Compilation Unit @ offset 0x%lx:\n", cu_start_offset);
   4578          /* parse_CU_header initialises the CU's abbv hash table.  */
   4579          parse_CU_Header( &cc, td3, &info, escn_debug_abbv, True, False );
   4580 
   4581          /* Needed by cook_die.  */
   4582          cc.types_cuOff_bias = escn_debug_info.szB;
   4583 
   4584          record_signatured_type( signature_types, cc.type_signature,
   4585                                  cook_die( &cc, cc.type_offset ));
   4586 
   4587          /* Until proven otherwise we assume we don't need the icc9
   4588             workaround in this case; see the DIE-reading loop below
   4589             for details.  */
   4590          cu_offset_now = (cu_start_offset + cc.unit_length
   4591                           + (cc.is_dw64 ? 12 : 4));
   4592 
   4593          clear_CUConst ( &cc);
   4594 
   4595          if (cu_offset_now >= escn_debug_types.szB) {
   4596             break;
   4597          }
   4598 
   4599          set_position_of_Cursor ( &info, cu_offset_now );
   4600       }
   4601    }
   4602 
   4603    /* Perform three DIE-reading passes.  The first pass reads DIEs from
   4604       alternate .debug_info (if any), the second pass reads DIEs from
   4605       .debug_info, and the third pass reads DIEs from .debug_types.
   4606       Moving the body of this loop into a separate function would
   4607       require a large number of arguments to be passed in, so it is
   4608       kept inline instead.  */
   4609    for (pass = 0; pass < 3; ++pass) {
   4610       ULong section_size;
   4611 
   4612       if (pass == 0) {
   4613          if (!ML_(sli_is_valid)(escn_debug_info_alt))
   4614 	    continue;
   4615          /* Now loop over the Compilation Units listed in the alternate
   4616             .debug_info section (see D3SPEC sec 7.5) paras 1 and 2.
   4617             Each compilation unit contains a Compilation Unit Header
   4618             followed by precisely one DW_TAG_compile_unit or
   4619             DW_TAG_partial_unit DIE. */
   4620          init_Cursor( &info, escn_debug_info_alt, 0, barf,
   4621                       "Overrun whilst reading alternate .debug_info section" );
   4622          section_size = escn_debug_info_alt.szB;
   4623 
   4624          TRACE_D3("\n------ Parsing alternate .debug_info section ------\n");
   4625       } else if (pass == 1) {
   4626          /* Now loop over the Compilation Units listed in the .debug_info
   4627             section (see D3SPEC sec 7.5) paras 1 and 2.  Each compilation
   4628             unit contains a Compilation Unit Header followed by precisely
   4629             one DW_TAG_compile_unit or DW_TAG_partial_unit DIE. */
   4630          init_Cursor( &info, escn_debug_info, 0, barf,
   4631                       "Overrun whilst reading .debug_info section" );
   4632          section_size = escn_debug_info.szB;
   4633 
   4634          TRACE_D3("\n------ Parsing .debug_info section ------\n");
   4635       } else {
   4636          if (!ML_(sli_is_valid)(escn_debug_types))
   4637             continue;
   4638          if (!VG_(clo_read_var_info))
   4639             continue; // Types not needed when only reading inline info.
   4640          init_Cursor( &info, escn_debug_types, 0, barf,
   4641                       "Overrun whilst reading .debug_types section" );
   4642          section_size = escn_debug_types.szB;
   4643 
   4644          TRACE_D3("\n------ Parsing .debug_types section ------\n");
   4645       }
   4646 
   4647       while (True) {
   4648          ULong   cu_start_offset, cu_offset_now;
   4649          CUConst cc;
   4650          /* It may be that the stated size of this CU is larger than the
   4651             amount of stuff actually in it.  icc9 seems to generate CUs
   4652             thusly.  We use these variables to figure out if this is
   4653             indeed the case, and if so how many bytes we need to skip to
   4654             get to the start of the next CU.  Not skipping those bytes
   4655             causes us to misidentify the start of the next CU, and it all
   4656             goes badly wrong after that (not surprisingly). */
   4657          UWord cu_size_including_IniLen, cu_amount_used;
   4658 
   4659          /* It seems icc9 finishes the DIE info before debug_info_sz
   4660             bytes have been used up.  So be flexible, and declare the
   4661             sequence complete if there is not enough remaining bytes to
   4662             hold even the smallest conceivable CU header.  (11 bytes I
   4663             reckon). */
   4664          /* JRS 23Jan09: I suspect this is no longer necessary now that
   4665             the code below contains a 'while (cu_amount_used <
   4666             cu_size_including_IniLen ...'  style loop, which skips over
   4667             any leftover bytes at the end of a CU in the case where the
   4668             CU's stated size is larger than its actual size (as
   4669             determined by reading all its DIEs).  However, for prudence,
   4670             I'll leave the following test in place.  I can't see that a
   4671             CU header can be smaller than 11 bytes, so I don't think
   4672             there's any harm possible through the test -- it just adds
   4673             robustness. */
   4674          Word avail = get_remaining_length_Cursor( &info );
   4675          if (avail < 11) {
   4676             if (avail > 0)
   4677                TRACE_D3("new_dwarf3_reader_wrk: warning: "
   4678                         "%ld unused bytes after end of DIEs\n", avail);
   4679             break;
   4680          }
   4681 
   4682          if (VG_(clo_read_var_info)) {
   4683             /* Check the varparser's stack is in a sane state. */
   4684             vg_assert(varparser.sp == -1);
   4685             /* Check the typarser's stack is in a sane state. */
   4686             vg_assert(typarser.sp == -1);
   4687          }
   4688 
   4689          cu_start_offset = get_position_of_Cursor( &info );
   4690          TRACE_D3("\n");
   4691          TRACE_D3("  Compilation Unit @ offset 0x%llx:\n", cu_start_offset);
   4692          /* parse_CU_header initialises the CU's hashtable of abbvs ht_abbvs */
   4693          if (pass == 0) {
   4694             parse_CU_Header( &cc, td3, &info, escn_debug_abbv_alt,
   4695                              False, True );
   4696          } else {
   4697             parse_CU_Header( &cc, td3, &info, escn_debug_abbv,
   4698                              pass == 2, False );
   4699          }
   4700          cc.escn_debug_str      = pass == 0 ? escn_debug_str_alt
   4701                                             : escn_debug_str;
   4702          cc.escn_debug_ranges   = escn_debug_ranges;
   4703          cc.escn_debug_loc      = escn_debug_loc;
   4704          cc.escn_debug_line     = pass == 0 ? escn_debug_line_alt
   4705                                             : escn_debug_line;
   4706          cc.escn_debug_info     = pass == 0 ? escn_debug_info_alt
   4707                                             : escn_debug_info;
   4708          cc.escn_debug_types    = escn_debug_types;
   4709          cc.escn_debug_info_alt = escn_debug_info_alt;
   4710          cc.escn_debug_str_alt  = escn_debug_str_alt;
   4711          cc.types_cuOff_bias    = escn_debug_info.szB;
   4712          cc.alt_cuOff_bias      = escn_debug_info.szB + escn_debug_types.szB;
   4713          cc.cu_start_offset     = cu_start_offset;
   4714          cc.di = di;
   4715          /* The CU's svma can be deduced by looking at the AT_low_pc
   4716             value in the top level TAG_compile_unit, which is the topmost
   4717             DIE.  We'll leave it for the 'varparser' to acquire that info
   4718             and fill it in -- since it is the only party to want to know
   4719             it. */
   4720          cc.cu_svma_known = False;
   4721          cc.cu_svma       = 0;
   4722 
   4723          if (VG_(clo_read_var_info)) {
   4724             cc.signature_types = signature_types;
   4725 
   4726             /* Create a fake outermost-level range covering the entire
   4727                address range.  So we always have *something* to catch all
   4728                variable declarations. */
   4729             varstack_push( &cc, &varparser, td3,
   4730                            unitary_range_list(0UL, ~0UL),
   4731                            -1, False/*isFunc*/, NULL/*fbGX*/ );
   4732 
   4733             /* And set up the fndn_ix_Table.  When we come across the top
   4734                level DIE for this CU (which is what the next call to
   4735                read_DIE should process) we will copy all the file names out
   4736                of the .debug_line img area and use this table to look up the
   4737                copies when we later see filename numbers in DW_TAG_variables
   4738                etc. */
   4739             vg_assert(!varparser.fndn_ix_Table );
   4740             varparser.fndn_ix_Table
   4741                = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.5var",
   4742                              ML_(dinfo_free),
   4743                              sizeof(UInt) );
   4744          }
   4745 
   4746          if (VG_(clo_read_inline_info)) {
   4747             /* fndn_ix_Table for the inlined call parser */
   4748             vg_assert(!inlparser.fndn_ix_Table );
   4749             inlparser.fndn_ix_Table
   4750                = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.5inl",
   4751                              ML_(dinfo_free),
   4752                              sizeof(UInt) );
   4753          }
   4754 
   4755          /* Now read the one-and-only top-level DIE for this CU. */
   4756          vg_assert(!VG_(clo_read_var_info) || varparser.sp == 0);
   4757          read_DIE( rangestree,
   4758                    tyents, tempvars, gexprs,
   4759                    &typarser, &varparser, &inlparser,
   4760                    &info, td3, &cc, 0 );
   4761 
   4762          cu_offset_now = get_position_of_Cursor( &info );
   4763 
   4764          if (0) VG_(printf)("Travelled: %llu  size %llu\n",
   4765                             cu_offset_now - cc.cu_start_offset,
   4766                             cc.unit_length + (cc.is_dw64 ? 12 : 4));
   4767 
   4768          /* How big the CU claims it is .. */
   4769          cu_size_including_IniLen = cc.unit_length + (cc.is_dw64 ? 12 : 4);
   4770          /* .. vs how big we have found it to be */
   4771          cu_amount_used = cu_offset_now - cc.cu_start_offset;
   4772 
   4773          if (1) TRACE_D3("offset now %llu, d-i-size %llu\n",
   4774                          cu_offset_now, section_size);
   4775          if (cu_offset_now > section_size)
   4776             barf("toplevel DIEs beyond end of CU");
   4777 
   4778          /* If the CU is bigger than it claims to be, we've got a serious
   4779             problem. */
   4780          if (cu_amount_used > cu_size_including_IniLen)
   4781             barf("CU's actual size appears to be larger than it claims it is");
   4782 
   4783          /* If the CU is smaller than it claims to be, we need to skip some
   4784             bytes.  Loop updates cu_offset_new and cu_amount_used. */
   4785          while (cu_amount_used < cu_size_including_IniLen
   4786                 && get_remaining_length_Cursor( &info ) > 0) {
   4787             if (0) VG_(printf)("SKIP\n");
   4788             (void)get_UChar( &info );
   4789             cu_offset_now = get_position_of_Cursor( &info );
   4790             cu_amount_used = cu_offset_now - cc.cu_start_offset;
   4791          }
   4792 
   4793          if (VG_(clo_read_var_info)) {
   4794             /* Preen to level -2.  DIEs have level >= 0 so -2 cannot occur
   4795                anywhere else at all.  Our fake the-entire-address-space
   4796                range is at level -1, so preening to -2 should completely
   4797                empty the stack out. */
   4798             TRACE_D3("\n");
   4799             varstack_preen( &varparser, td3, -2 );
   4800             /* Similarly, empty the type stack out. */
   4801             typestack_preen( &typarser, td3, -2 );
   4802          }
   4803 
   4804          if (VG_(clo_read_var_info)) {
   4805             vg_assert(varparser.fndn_ix_Table );
   4806             VG_(deleteXA)( varparser.fndn_ix_Table );
   4807             varparser.fndn_ix_Table = NULL;
   4808          }
   4809          if (VG_(clo_read_inline_info)) {
   4810             vg_assert(inlparser.fndn_ix_Table );
   4811             VG_(deleteXA)( inlparser.fndn_ix_Table );
   4812             inlparser.fndn_ix_Table = NULL;
   4813          }
   4814          clear_CUConst(&cc);
   4815 
   4816          if (cu_offset_now == section_size)
   4817             break;
   4818          /* else keep going */
   4819       }
   4820    }
   4821 
   4822 
   4823    if (VG_(clo_read_var_info)) {
   4824       /* From here on we're post-processing the stuff we got
   4825          out of the .debug_info section. */
   4826       if (TD3) {
   4827          TRACE_D3("\n");
   4828          ML_(pp_TyEnts)(tyents, "Initial type entity (TyEnt) array");
   4829          TRACE_D3("\n");
   4830          TRACE_D3("------ Compressing type entries ------\n");
   4831       }
   4832 
   4833       tyents_cache = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.6",
   4834                                         sizeof(TyEntIndexCache) );
   4835       ML_(TyEntIndexCache__invalidate)( tyents_cache );
   4836       dedup_types( td3, tyents, tyents_cache );
   4837       if (TD3) {
   4838          TRACE_D3("\n");
   4839          ML_(pp_TyEnts)(tyents, "After type entity (TyEnt) compression");
   4840       }
   4841 
   4842       TRACE_D3("\n");
   4843       TRACE_D3("------ Resolving the types of variables ------\n" );
   4844       resolve_variable_types( barf, tyents, tyents_cache, tempvars );
   4845 
   4846       /* Copy all the non-INDIR tyents into a new table.  For large
   4847          .so's, about 90% of the tyents will by now have been resolved to
   4848          INDIRs, and we no longer need them, and so don't need to store
   4849          them. */
   4850       tyents_to_keep
   4851          = VG_(newXA)( ML_(dinfo_zalloc),
   4852                        "di.readdwarf3.ndrw.7 (TyEnt to-keep array)",
   4853                        ML_(dinfo_free), sizeof(TyEnt) );
   4854       n = VG_(sizeXA)( tyents );
   4855       for (i = 0; i < n; i++) {
   4856          TyEnt* ent = VG_(indexXA)( tyents, i );
   4857          if (ent->tag != Te_INDIR)
   4858             VG_(addToXA)( tyents_to_keep, ent );
   4859       }
   4860 
   4861       VG_(deleteXA)( tyents );
   4862       tyents = NULL;
   4863       ML_(dinfo_free)( tyents_cache );
   4864       tyents_cache = NULL;
   4865 
   4866       /* Sort tyents_to_keep so we can lookup in it.  A complete (if
   4867          minor) waste of time, since tyents itself is sorted, but
   4868          necessary since VG_(lookupXA) refuses to cooperate if we
   4869          don't. */
   4870       VG_(setCmpFnXA)( tyents_to_keep, (XACmpFn_t) ML_(TyEnt__cmp_by_cuOff_only) );
   4871       VG_(sortXA)( tyents_to_keep );
   4872 
   4873       /* Enable cacheing on tyents_to_keep */
   4874       tyents_to_keep_cache
   4875          = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.8",
   4876                               sizeof(TyEntIndexCache) );
   4877       ML_(TyEntIndexCache__invalidate)( tyents_to_keep_cache );
   4878 
   4879       /* And record the tyents in the DebugInfo.  We do this before
   4880          starting to hand variables to ML_(addVar), since if ML_(addVar)
   4881          wants to do debug printing (of the types of said vars) then it
   4882          will need the tyents.*/
   4883       vg_assert(!di->admin_tyents);
   4884       di->admin_tyents = tyents_to_keep;
   4885 
   4886       /* Bias all the location expressions. */
   4887       TRACE_D3("\n");
   4888       TRACE_D3("------ Biasing the location expressions ------\n" );
   4889 
   4890       n = VG_(sizeXA)( gexprs );
   4891       for (i = 0; i < n; i++) {
   4892          gexpr = *(GExpr**)VG_(indexXA)( gexprs, i );
   4893          bias_GX( gexpr, di );
   4894       }
   4895 
   4896       TRACE_D3("\n");
   4897       TRACE_D3("------ Acquired the following variables: ------\n\n");
   4898 
   4899       /* Park (pointers to) all the vars in an XArray, so we can look up
   4900          abstract origins quickly.  The array is sorted (hence, looked-up
   4901          by) the .dioff fields.  Since the .dioffs should be in strictly
   4902          ascending order, there is no need to sort the array after
   4903          construction.  The ascendingness is however asserted for. */
   4904       dioff_lookup_tab
   4905          = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.9",
   4906                        ML_(dinfo_free),
   4907                        sizeof(TempVar*) );
   4908 
   4909       n = VG_(sizeXA)( tempvars );
   4910       Word first_primary_var = 0;
   4911       for (first_primary_var = 0;
   4912            escn_debug_info_alt.szB/*really?*/ && first_primary_var < n;
   4913            first_primary_var++) {
   4914          varp = *(TempVar**)VG_(indexXA)( tempvars, first_primary_var );
   4915          if (varp->dioff < escn_debug_info.szB + escn_debug_types.szB)
   4916             break;
   4917       }
   4918       for (i = 0; i < n; i++) {
   4919          varp = *(TempVar**)VG_(indexXA)( tempvars, (i + first_primary_var) % n );
   4920          if (i > first_primary_var) {
   4921             varp2 = *(TempVar**)VG_(indexXA)( tempvars,
   4922                                               (i + first_primary_var - 1) % n );
   4923             /* why should this hold?  Only, I think, because we've
   4924                constructed the array by reading .debug_info sequentially,
   4925                and so the array .dioff fields should reflect that, and be
   4926                strictly ascending. */
   4927             vg_assert(varp2->dioff < varp->dioff);
   4928          }
   4929          VG_(addToXA)( dioff_lookup_tab, &varp );
   4930       }
   4931       VG_(setCmpFnXA)( dioff_lookup_tab, cmp_TempVar_by_dioff );
   4932       VG_(sortXA)( dioff_lookup_tab ); /* POINTLESS; FIXME: rm */
   4933 
   4934       /* Now visit each var.  Collect up as much info as possible for
   4935          each var and hand it to ML_(addVar). */
   4936       n = VG_(sizeXA)( tempvars );
   4937       for (j = 0; j < n; j++) {
   4938          TyEnt* ent;
   4939          varp = *(TempVar**)VG_(indexXA)( tempvars, j );
   4940 
   4941          /* Possibly show .. */
   4942          if (TD3) {
   4943             VG_(printf)("<%lx> addVar: level %d: %s :: ",
   4944                         varp->dioff,
   4945                         varp->level,
   4946                         varp->name ? varp->name : "<anon_var>" );
   4947             if (varp->typeR) {
   4948                ML_(pp_TyEnt_C_ishly)( tyents_to_keep, varp->typeR );
   4949             } else {
   4950                VG_(printf)("NULL");
   4951             }
   4952             VG_(printf)("\n  Loc=");
   4953             if (varp->gexpr) {
   4954                ML_(pp_GX)(varp->gexpr);
   4955             } else {
   4956                VG_(printf)("NULL");
   4957             }
   4958             VG_(printf)("\n");
   4959             if (varp->fbGX) {
   4960                VG_(printf)("  FrB=");
   4961                ML_(pp_GX)( varp->fbGX );
   4962                VG_(printf)("\n");
   4963             } else {
   4964                VG_(printf)("  FrB=none\n");
   4965             }
   4966             VG_(printf)("  declared at: %u %s:%d\n",
   4967                         varp->fndn_ix,
   4968                         ML_(fndn_ix2filename) (di, varp->fndn_ix),
   4969                         varp->fLine );
   4970             if (varp->absOri != (UWord)D3_INVALID_CUOFF)
   4971                VG_(printf)("  abstract origin: <%lx>\n", varp->absOri);
   4972          }
   4973 
   4974          /* Skip variables which have no location.  These must be
   4975             abstract instances; they are useless as-is since with no
   4976             location they have no specified memory location.  They will
   4977             presumably be referred to via the absOri fields of other
   4978             variables. */
   4979          if (!varp->gexpr) {
   4980             TRACE_D3("  SKIP (no location)\n\n");
   4981             continue;
   4982          }
   4983 
   4984          /* So it has a location, at least.  If it refers to some other
   4985             entry through its absOri field, pull in further info through
   4986             that. */
   4987          if (varp->absOri != (UWord)D3_INVALID_CUOFF) {
   4988             Bool found;
   4989             Word ixFirst, ixLast;
   4990             TempVar key;
   4991             TempVar* keyp = &key;
   4992             TempVar *varAI;
   4993             VG_(memset)(&key, 0, sizeof(key)); /* not necessary */
   4994             key.dioff = varp->absOri; /* this is what we want to find */
   4995             found = VG_(lookupXA)( dioff_lookup_tab, &keyp,
   4996                                    &ixFirst, &ixLast );
   4997             if (!found) {
   4998                /* barf("DW_AT_abstract_origin can't be resolved"); */
   4999                TRACE_D3("  SKIP (DW_AT_abstract_origin can't be resolved)\n\n");
   5000                continue;
   5001             }
   5002             /* If the following fails, there is more than one entry with
   5003                the same dioff.  Which can't happen. */
   5004             vg_assert(ixFirst == ixLast);
   5005             varAI = *(TempVar**)VG_(indexXA)( dioff_lookup_tab, ixFirst );
   5006             /* stay sane */
   5007             vg_assert(varAI);
   5008             vg_assert(varAI->dioff == varp->absOri);
   5009 
   5010             /* Copy what useful info we can. */
   5011             if (varAI->typeR && !varp->typeR)
   5012                varp->typeR = varAI->typeR;
   5013             if (varAI->name && !varp->name)
   5014                varp->name = varAI->name;
   5015             if (varAI->fndn_ix && !varp->fndn_ix)
   5016                varp->fndn_ix = varAI->fndn_ix;
   5017             if (varAI->fLine > 0 && varp->fLine == 0)
   5018                varp->fLine = varAI->fLine;
   5019          }
   5020 
   5021          /* Give it a name if it doesn't have one. */
   5022          if (!varp->name)
   5023             varp->name = ML_(addStr)( di, "<anon_var>", -1 );
   5024 
   5025          /* So now does it have enough info to be useful? */
   5026          /* NOTE: re typeR: this is a hack.  If typeR is Te_UNKNOWN then
   5027             the type didn't get resolved.  Really, in that case
   5028             something's broken earlier on, and should be fixed, rather
   5029             than just skipping the variable. */
   5030          ent = ML_(TyEnts__index_by_cuOff)( tyents_to_keep,
   5031                                             tyents_to_keep_cache,
   5032                                             varp->typeR );
   5033          /* The next two assertions should be guaranteed by
   5034             our previous call to resolve_variable_types. */
   5035          vg_assert(ent);
   5036          vg_assert(ML_(TyEnt__is_type)(ent) || ent->tag == Te_UNKNOWN);
   5037 
   5038          if (ent->tag == Te_UNKNOWN) continue;
   5039 
   5040          vg_assert(varp->gexpr);
   5041          vg_assert(varp->name);
   5042          vg_assert(varp->typeR);
   5043          vg_assert(varp->level >= 0);
   5044 
   5045          /* Ok.  So we're going to keep it.  Call ML_(addVar) once for
   5046             each address range in which the variable exists. */
   5047          TRACE_D3("  ACQUIRE for range(s) ");
   5048          { AddrRange  oneRange;
   5049            AddrRange* varPcRanges;
   5050            Word       nVarPcRanges;
   5051            /* Set up to iterate over address ranges, however
   5052               represented. */
   5053            if (varp->nRanges == 0 || varp->nRanges == 1) {
   5054               vg_assert(!varp->rngMany);
   5055               if (varp->nRanges == 0) {
   5056                  vg_assert(varp->rngOneMin == 0);
   5057                  vg_assert(varp->rngOneMax == 0);
   5058               }
   5059               nVarPcRanges = varp->nRanges;
   5060               oneRange.aMin = varp->rngOneMin;
   5061               oneRange.aMax = varp->rngOneMax;
   5062               varPcRanges = &oneRange;
   5063            } else {
   5064               vg_assert(varp->rngMany);
   5065               vg_assert(varp->rngOneMin == 0);
   5066               vg_assert(varp->rngOneMax == 0);
   5067               nVarPcRanges = VG_(sizeXA)(varp->rngMany);
   5068               vg_assert(nVarPcRanges >= 2);
   5069               vg_assert(nVarPcRanges == (Word)varp->nRanges);
   5070               varPcRanges = VG_(indexXA)(varp->rngMany, 0);
   5071            }
   5072            if (varp->level == 0)
   5073               vg_assert( nVarPcRanges == 1 );
   5074            /* and iterate */
   5075            for (i = 0; i < nVarPcRanges; i++) {
   5076               Addr pcMin = varPcRanges[i].aMin;
   5077               Addr pcMax = varPcRanges[i].aMax;
   5078               vg_assert(pcMin <= pcMax);
   5079               /* Level 0 is the global address range.  So at level 0 we
   5080                  don't want to bias pcMin/pcMax; but at all other levels
   5081                  we do since those are derived from svmas in the Dwarf
   5082                  we're reading.  Be paranoid ... */
   5083               if (varp->level == 0) {
   5084                  vg_assert(pcMin == (Addr)0);
   5085                  vg_assert(pcMax == ~(Addr)0);
   5086               } else {
   5087                  /* vg_assert(pcMin > (Addr)0);
   5088                     No .. we can legitimately expect to see ranges like
   5089                     0x0-0x11D (pre-biasing, of course). */
   5090                  vg_assert(pcMax < ~(Addr)0);
   5091               }
   5092 
   5093               /* Apply text biasing, for non-global variables. */
   5094               if (varp->level > 0) {
   5095                  pcMin += di->text_debug_bias;
   5096                  pcMax += di->text_debug_bias;
   5097               }
   5098 
   5099               if (i > 0 && (i%2) == 0)
   5100                  TRACE_D3("\n                       ");
   5101               TRACE_D3("[%#lx,%#lx] ", pcMin, pcMax );
   5102 
   5103               ML_(addVar)(
   5104                  di, varp->level,
   5105                      pcMin, pcMax,
   5106                      varp->name,  varp->typeR,
   5107                      varp->gexpr, varp->fbGX,
   5108                      varp->fndn_ix, varp->fLine, td3
   5109               );
   5110            }
   5111          }
   5112 
   5113          TRACE_D3("\n\n");
   5114          /* and move on to the next var */
   5115       }
   5116 
   5117       /* Now free all the TempVars */
   5118       n = VG_(sizeXA)( tempvars );
   5119       for (i = 0; i < n; i++) {
   5120          varp = *(TempVar**)VG_(indexXA)( tempvars, i );
   5121          ML_(dinfo_free)(varp);
   5122       }
   5123       VG_(deleteXA)( tempvars );
   5124       tempvars = NULL;
   5125 
   5126       /* and the temp lookup table */
   5127       VG_(deleteXA)( dioff_lookup_tab );
   5128 
   5129       /* and the ranges tree.  Note that we need to also free the XArrays
   5130          which constitute the keys, hence pass VG_(deleteXA) as a
   5131          key-finalizer. */
   5132       VG_(deleteFM)( rangestree, (void(*)(UWord))VG_(deleteXA), NULL );
   5133 
   5134       /* and the tyents_to_keep cache */
   5135       ML_(dinfo_free)( tyents_to_keep_cache );
   5136       tyents_to_keep_cache = NULL;
   5137 
   5138       vg_assert( varparser.fndn_ix_Table == NULL );
   5139 
   5140       /* And the signatured type hash.  */
   5141       VG_(HT_destruct) ( signature_types, ML_(dinfo_free) );
   5142 
   5143       /* record the GExprs in di so they can be freed later */
   5144       vg_assert(!di->admin_gexprs);
   5145       di->admin_gexprs = gexprs;
   5146    }
   5147 
   5148    // Free up dynamically allocated memory
   5149    if (VG_(clo_read_var_info)) {
   5150       type_parser_release(&typarser);
   5151       var_parser_release(&varparser);
   5152    }
   5153 }
   5154 
   5155 
   5156 /*------------------------------------------------------------*/
   5157 /*---                                                      ---*/
   5158 /*--- The "new" DWARF3 reader -- top level control logic   ---*/
   5159 /*---                                                      ---*/
   5160 /*------------------------------------------------------------*/
   5161 
   5162 static Bool               d3rd_jmpbuf_valid  = False;
   5163 static const HChar*       d3rd_jmpbuf_reason = NULL;
   5164 static VG_MINIMAL_JMP_BUF(d3rd_jmpbuf);
   5165 
   5166 static __attribute__((noreturn)) void barf ( const HChar* reason ) {
   5167    vg_assert(d3rd_jmpbuf_valid);
   5168    d3rd_jmpbuf_reason = reason;
   5169    VG_MINIMAL_LONGJMP(d3rd_jmpbuf);
   5170    /*NOTREACHED*/
   5171    vg_assert(0);
   5172 }
   5173 
   5174 
   5175 void
   5176 ML_(new_dwarf3_reader) (
   5177    DebugInfo* di,
   5178    DiSlice escn_debug_info,      DiSlice escn_debug_types,
   5179    DiSlice escn_debug_abbv,      DiSlice escn_debug_line,
   5180    DiSlice escn_debug_str,       DiSlice escn_debug_ranges,
   5181    DiSlice escn_debug_loc,       DiSlice escn_debug_info_alt,
   5182    DiSlice escn_debug_abbv_alt,  DiSlice escn_debug_line_alt,
   5183    DiSlice escn_debug_str_alt
   5184 )
   5185 {
   5186    volatile Int  jumped;
   5187    volatile Bool td3 = di->trace_symtab;
   5188 
   5189    /* Run the _wrk function to read the dwarf3.  If it succeeds, it
   5190       just returns normally.  If there is any failure, it longjmp's
   5191       back here, having first set d3rd_jmpbuf_reason to something
   5192       useful. */
   5193    vg_assert(d3rd_jmpbuf_valid  == False);
   5194    vg_assert(d3rd_jmpbuf_reason == NULL);
   5195 
   5196    d3rd_jmpbuf_valid = True;
   5197    jumped = VG_MINIMAL_SETJMP(d3rd_jmpbuf);
   5198    if (jumped == 0) {
   5199       /* try this ... */
   5200       new_dwarf3_reader_wrk( di, barf,
   5201                              escn_debug_info,     escn_debug_types,
   5202                              escn_debug_abbv,     escn_debug_line,
   5203                              escn_debug_str,      escn_debug_ranges,
   5204                              escn_debug_loc,      escn_debug_info_alt,
   5205                              escn_debug_abbv_alt, escn_debug_line_alt,
   5206                              escn_debug_str_alt );
   5207       d3rd_jmpbuf_valid = False;
   5208       TRACE_D3("\n------ .debug_info reading was successful ------\n");
   5209    } else {
   5210       /* It longjmp'd. */
   5211       d3rd_jmpbuf_valid = False;
   5212       /* Can't longjump without giving some sort of reason. */
   5213       vg_assert(d3rd_jmpbuf_reason != NULL);
   5214 
   5215       TRACE_D3("\n------ .debug_info reading failed ------\n");
   5216 
   5217       ML_(symerr)(di, True, d3rd_jmpbuf_reason);
   5218    }
   5219 
   5220    d3rd_jmpbuf_valid  = False;
   5221    d3rd_jmpbuf_reason = NULL;
   5222 }
   5223 
   5224 
   5225 
   5226 /* --- Unused code fragments which might be useful one day. --- */
   5227 
   5228 #if 0
   5229    /* Read the arange tables */
   5230    TRACE_SYMTAB("\n");
   5231    TRACE_SYMTAB("\n------ The contents of .debug_arange ------\n");
   5232    init_Cursor( &aranges, debug_aranges_img,
   5233                 debug_aranges_sz, 0, barf,
   5234                 "Overrun whilst reading .debug_aranges section" );
   5235    while (True) {
   5236       ULong  len, d_i_offset;
   5237       Bool   is64;
   5238       UShort version;
   5239       UChar  asize, segsize;
   5240 
   5241       if (is_at_end_Cursor( &aranges ))
   5242          break;
   5243       /* Read one arange thingy */
   5244       /* initial_length field */
   5245       len = get_Initial_Length( &is64, &aranges,
   5246                "in .debug_aranges: invalid initial-length field" );
   5247       version    = get_UShort( &aranges );
   5248       d_i_offset = get_Dwarfish_UWord( &aranges, is64 );
   5249       asize      = get_UChar( &aranges );
   5250       segsize    = get_UChar( &aranges );
   5251       TRACE_D3("  Length:                   %llu\n", len);
   5252       TRACE_D3("  Version:                  %d\n", (Int)version);
   5253       TRACE_D3("  Offset into .debug_info:  %llx\n", d_i_offset);
   5254       TRACE_D3("  Pointer Size:             %d\n", (Int)asize);
   5255       TRACE_D3("  Segment Size:             %d\n", (Int)segsize);
   5256       TRACE_D3("\n");
   5257       TRACE_D3("    Address            Length\n");
   5258 
   5259       while ((get_position_of_Cursor( &aranges ) % (2 * asize)) > 0) {
   5260          (void)get_UChar( & aranges );
   5261       }
   5262       while (True) {
   5263          ULong address = get_Dwarfish_UWord( &aranges, asize==8 );
   5264          ULong length = get_Dwarfish_UWord( &aranges, asize==8 );
   5265          TRACE_D3("    0x%016llx 0x%llx\n", address, length);
   5266          if (address == 0 && length == 0) break;
   5267       }
   5268    }
   5269    TRACE_SYMTAB("\n");
   5270 #endif
   5271 
   5272 #endif // defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_solaris)
   5273 
   5274 /*--------------------------------------------------------------------*/
   5275 /*--- end                                                          ---*/
   5276 /*--------------------------------------------------------------------*/
   5277