Home | History | Annotate | Download | only in m_debuginfo
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- Format-neutral storage of and querying of info acquired from ---*/
      4 /*--- ELF/XCOFF stabs/dwarf1/dwarf2 debug info.                    ---*/
      5 /*---                                               priv_storage.h ---*/
      6 /*--------------------------------------------------------------------*/
      7 
      8 /*
      9    This file is part of Valgrind, a dynamic binary instrumentation
     10    framework.
     11 
     12    Copyright (C) 2000-2013 Julian Seward
     13       jseward (at) acm.org
     14 
     15    This program is free software; you can redistribute it and/or
     16    modify it under the terms of the GNU General Public License as
     17    published by the Free Software Foundation; either version 2 of the
     18    License, or (at your option) any later version.
     19 
     20    This program is distributed in the hope that it will be useful, but
     21    WITHOUT ANY WARRANTY; without even the implied warranty of
     22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     23    General Public License for more details.
     24 
     25    You should have received a copy of the GNU General Public License
     26    along with this program; if not, write to the Free Software
     27    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     28    02111-1307, USA.
     29 
     30    The GNU General Public License is contained in the file COPYING.
     31 */
     32 /*
     33    Stabs reader greatly improved by Nick Nethercote, Apr 02.
     34    This module was also extensively hacked on by Jeremy Fitzhardinge
     35    and Tom Hughes.
     36 */
     37 /* See comment at top of debuginfo.c for explanation of
     38    the _svma / _avma / _image / _bias naming scheme.
     39 */
     40 /* Note this is not freestanding; needs pub_core_xarray.h and
     41    priv_tytypes.h to be included before it. */
     42 
     43 #ifndef __PRIV_STORAGE_H
     44 #define __PRIV_STORAGE_H
     45 
     46 #include "pub_core_basics.h"   // Addr
     47 #include "pub_core_xarray.h"   // XArray
     48 #include "priv_d3basics.h"     // GExpr et al.
     49 #include "priv_image.h"        // DiCursor
     50 
     51 /* --------------------- SYMBOLS --------------------- */
     52 
     53 /* A structure to hold an ELF/MachO symbol (very crudely).  Usually
     54    the symbol only has one name, which is stored in ::pri_name, and
     55    ::sec_names is NULL.  If there are other names, these are stored in
     56    ::sec_names, which is a NULL terminated vector holding the names.
     57    The vector is allocated in VG_AR_DINFO, the names themselves live
     58    in DebugInfo::strchunks.
     59 
     60    From the point of view of ELF, the primary vs secondary distinction
     61    is artificial: they are all just names associated with the address,
     62    none of which has higher precedence than any other.  However, from
     63    the point of view of mapping an address to a name to display to the
     64    user, we need to choose one "preferred" name, and so that might as
     65    well be installed as the pri_name, whilst all others can live in
     66    sec_names[].  This has the convenient side effect that, in the
     67    common case where there is only one name for the address,
     68    sec_names[] does not need to be allocated.
     69 */
     70 typedef
     71    struct {
     72       Addr    addr;    /* lowest address of entity */
     73       Addr    tocptr;  /* ppc64-linux only: value that R2 should have */
     74       HChar*  pri_name;  /* primary name, never NULL */
     75       HChar** sec_names; /* NULL, or a NULL term'd array of other names */
     76       // XXX: this could be shrunk (on 32-bit platforms) by using 30
     77       // bits for the size and 1 bit each for isText and isIFunc.  If you
     78       // do this, make sure that all assignments to the latter two use
     79       // 0 or 1 (or True or False), and that a positive number larger
     80       // than 1 is never used to represent True.
     81       UInt    size;    /* size in bytes */
     82       Bool    isText;
     83       Bool    isIFunc; /* symbol is an indirect function? */
     84    }
     85    DiSym;
     86 
     87 /* --------------------- SRCLOCS --------------------- */
     88 
     89 /* Line count at which overflow happens, due to line numbers being
     90    stored as shorts in `struct nlist' in a.out.h. */
     91 #define LINENO_OVERFLOW (1 << (sizeof(short) * 8))
     92 
     93 #define LINENO_BITS     20
     94 #define LOC_SIZE_BITS  (32 - LINENO_BITS)
     95 #define MAX_LINENO     ((1 << LINENO_BITS) - 1)
     96 
     97 /* Unlikely to have any lines with instruction ranges > 4096 bytes */
     98 #define MAX_LOC_SIZE   ((1 << LOC_SIZE_BITS) - 1)
     99 
    100 /* Number used to detect line number overflows; if one line is
    101    60000-odd smaller than the previous, it was probably an overflow.
    102  */
    103 #define OVERFLOW_DIFFERENCE     (LINENO_OVERFLOW - 5000)
    104 
    105 /* A structure to hold addr-to-source info for a single line.  There
    106   can be a lot of these, hence the dense packing. */
    107 typedef
    108    struct {
    109       /* Word 1 */
    110       Addr   addr;               /* lowest address for this line */
    111       /* Word 2 */
    112       UShort size:LOC_SIZE_BITS; /* # bytes; we catch overflows of this */
    113       UInt   lineno:LINENO_BITS; /* source line number, or zero */
    114       /* Word 3 */
    115       const HChar* filename;     /* source filename */
    116       /* Word 4 */
    117       const HChar* dirname;      /* source directory name */
    118    }
    119    DiLoc;
    120 
    121 /* --------------------- CF INFO --------------------- */
    122 
    123 /* DiCfSI: a structure to summarise DWARF2/3 CFA info for the code
    124    address range [base .. base+len-1].
    125 
    126    On x86 and amd64 ("IA"), if you know ({e,r}sp, {e,r}bp, {e,r}ip) at
    127    some point and {e,r}ip is in the range [base .. base+len-1], it
    128    tells you how to calculate ({e,r}sp, {e,r}bp) for the caller of the
    129    current frame and also ra, the return address of the current frame.
    130 
    131    First off, calculate CFA, the Canonical Frame Address, thusly:
    132 
    133      cfa = case cfa_how of
    134               CFIC_IA_SPREL -> {e,r}sp + cfa_off
    135               CFIC_IA_BPREL -> {e,r}bp + cfa_off
    136               CFIC_EXPR     -> expr whose index is in cfa_off
    137 
    138    Once that is done, the previous frame's {e,r}sp/{e,r}bp values and
    139    this frame's {e,r}ra value can be calculated like this:
    140 
    141      old_{e,r}sp/{e,r}bp/ra
    142          = case {e,r}sp/{e,r}bp/ra_how of
    143               CFIR_UNKNOWN   -> we don't know, sorry
    144               CFIR_SAME      -> same as it was before (sp/fp only)
    145               CFIR_CFAREL    -> cfa + sp/bp/ra_off
    146               CFIR_MEMCFAREL -> *( cfa + sp/bp/ra_off )
    147               CFIR_EXPR      -> expr whose index is in sp/bp/ra_off
    148 
    149    On ARM it's pretty much the same, except we have more registers to
    150    keep track of:
    151 
    152      cfa = case cfa_how of
    153               CFIC_ARM_R13REL -> r13 + cfa_off
    154               CFIC_ARM_R12REL -> r12 + cfa_off
    155               CFIC_ARM_R11REL -> r11 + cfa_off
    156               CFIC_ARM_R7REL  -> r7  + cfa_off
    157               CFIR_EXPR       -> expr whose index is in cfa_off
    158 
    159      old_r14/r13/r12/r11/r7/ra
    160          = case r14/r13/r12/r11/r7/ra_how of
    161               CFIR_UNKNOWN   -> we don't know, sorry
    162               CFIR_SAME      -> same as it was before (r14/r13/r12/r11/r7 only)
    163               CFIR_CFAREL    -> cfa + r14/r13/r12/r11/r7/ra_off
    164               CFIR_MEMCFAREL -> *( cfa + r14/r13/r12/r11/r7/ra_off )
    165               CFIR_EXPR      -> expr whose index is in r14/r13/r12/r11/r7/ra_off
    166 
    167    On ARM64:
    168 
    169      cfa = case cfa_how of
    170               CFIC_ARM64_SPREL  -> sp + cfa_off
    171               CFIC_ARM64_X29REL -> x29 + cfa_off
    172               CFIC_EXPR         -> expr whose index is in cfa_off
    173 
    174      old_sp/x30/x29/ra
    175          = case sp/x30/x29/ra_how of
    176               CFIR_UNKNOWN   -> we don't know, sorry
    177               CFIR_SAME      -> same as it was before
    178               CFIR_CFAREL    -> cfa + sp/x30/x29/ra_how
    179               CFIR_MEMCFAREL -> *( cfa + sp/x30/x29/ra_how )
    180               CFIR_EXPR      -> expr whose index is in sp/x30/x29/ra_off
    181 
    182    On s390x we have a similar logic as x86 or amd64. We need the stack pointer
    183    (r15), the frame pointer r11 (like BP) and together with the instruction
    184    address in the PSW we can calculate the previous values:
    185      cfa = case cfa_how of
    186               CFIC_IA_SPREL -> r15 + cfa_off
    187               CFIC_IA_BPREL -> r11 + cfa_off
    188               CFIC_EXPR     -> expr whose index is in cfa_off
    189 
    190      old_sp/fp/ra
    191          = case sp/fp/ra_how of
    192               CFIR_UNKNOWN   -> we don't know, sorry
    193               CFIR_SAME      -> same as it was before (sp/fp only)
    194               CFIR_CFAREL    -> cfa + sp/fp/ra_off
    195               CFIR_MEMCFAREL -> *( cfa + sp/fp/ra_off )
    196               CFIR_EXPR      -> expr whose index is in sp/fp/ra_off
    197 */
    198 
    199 #define CFIC_IA_SPREL     ((UChar)1)
    200 #define CFIC_IA_BPREL     ((UChar)2)
    201 #define CFIC_ARM_R13REL   ((UChar)3)
    202 #define CFIC_ARM_R12REL   ((UChar)4)
    203 #define CFIC_ARM_R11REL   ((UChar)5)
    204 #define CFIC_ARM_R7REL    ((UChar)6)
    205 #define CFIC_ARM64_SPREL  ((UChar)7)
    206 #define CFIC_ARM64_X29REL ((UChar)8)
    207 #define CFIC_EXPR         ((UChar)9)  /* all targets */
    208 
    209 #define CFIR_UNKNOWN      ((UChar)64)
    210 #define CFIR_SAME         ((UChar)65)
    211 #define CFIR_CFAREL       ((UChar)66)
    212 #define CFIR_MEMCFAREL    ((UChar)67)
    213 #define CFIR_EXPR         ((UChar)68)
    214 
    215 #if defined(VGA_x86) || defined(VGA_amd64)
    216 typedef
    217    struct {
    218       Addr  base;
    219       UInt  len;
    220       UChar cfa_how; /* a CFIC_IA value */
    221       UChar ra_how;  /* a CFIR_ value */
    222       UChar sp_how;  /* a CFIR_ value */
    223       UChar bp_how;  /* a CFIR_ value */
    224       Int   cfa_off;
    225       Int   ra_off;
    226       Int   sp_off;
    227       Int   bp_off;
    228    }
    229    DiCfSI;
    230 #elif defined(VGA_arm)
    231 typedef
    232    struct {
    233       Addr  base;
    234       UInt  len;
    235       UChar cfa_how; /* a CFIC_ value */
    236       UChar ra_how;  /* a CFIR_ value */
    237       UChar r14_how; /* a CFIR_ value */
    238       UChar r13_how; /* a CFIR_ value */
    239       UChar r12_how; /* a CFIR_ value */
    240       UChar r11_how; /* a CFIR_ value */
    241       UChar r7_how;  /* a CFIR_ value */
    242       Int   cfa_off;
    243       Int   ra_off;
    244       Int   r14_off;
    245       Int   r13_off;
    246       Int   r12_off;
    247       Int   r11_off;
    248       Int   r7_off;
    249    }
    250    DiCfSI;
    251 #elif defined(VGA_arm64)
    252 typedef
    253    struct {
    254       Addr  base;
    255       UInt  len;
    256       UChar cfa_how; /* a CFIC_ value */
    257       UChar ra_how;  /* a CFIR_ value */
    258       UChar sp_how;  /* a CFIR_ value */ /*dw31=SP*/
    259       UChar x30_how; /* a CFIR_ value */ /*dw30=LR*/
    260       UChar x29_how; /* a CFIR_ value */ /*dw29=FP*/
    261       Int   cfa_off;
    262       Int   ra_off;
    263       Int   sp_off;
    264       Int   x30_off;
    265       Int   x29_off;
    266    }
    267    DiCfSI;
    268 #elif defined(VGA_ppc32) || defined(VGA_ppc64)
    269 /* Just have a struct with the common fields in, so that code that
    270    processes the common fields doesn't have to be ifdef'd against
    271    VGP_/VGA_ symbols.  These are not used in any way on ppc32/64-linux
    272    at the moment. */
    273 typedef
    274    struct {
    275       Addr  base;
    276       UInt  len;
    277       UChar cfa_how; /* a CFIC_ value */
    278       UChar ra_how;  /* a CFIR_ value */
    279       Int   cfa_off;
    280       Int   ra_off;
    281    }
    282    DiCfSI;
    283 #elif defined(VGA_s390x)
    284 typedef
    285    struct {
    286       Addr  base;
    287       UInt  len;
    288       UChar cfa_how; /* a CFIC_ value */
    289       UChar sp_how;  /* a CFIR_ value */
    290       UChar ra_how;  /* a CFIR_ value */
    291       UChar fp_how;  /* a CFIR_ value */
    292       Int   cfa_off;
    293       Int   sp_off;
    294       Int   ra_off;
    295       Int   fp_off;
    296    }
    297    DiCfSI;
    298 #elif defined(VGA_mips32) || defined(VGA_mips64)
    299 typedef
    300    struct {
    301       Addr  base;
    302       UInt  len;
    303       UChar cfa_how; /* a CFIC_ value */
    304       UChar ra_how;  /* a CFIR_ value */
    305       UChar sp_how;  /* a CFIR_ value */
    306       UChar fp_how;  /* a CFIR_ value */
    307       Int   cfa_off;
    308       Int   ra_off;
    309       Int   sp_off;
    310       Int   fp_off;
    311    }
    312    DiCfSI;
    313 #else
    314 #  error "Unknown arch"
    315 #endif
    316 
    317 
    318 typedef
    319    enum {
    320       Cunop_Abs=0x231,
    321       Cunop_Neg,
    322       Cunop_Not
    323    }
    324    CfiUnop;
    325 
    326 typedef
    327    enum {
    328       Cbinop_Add=0x321,
    329       Cbinop_Sub,
    330       Cbinop_And,
    331       Cbinop_Mul,
    332       Cbinop_Shl,
    333       Cbinop_Shr,
    334       Cbinop_Eq,
    335       Cbinop_Ge,
    336       Cbinop_Gt,
    337       Cbinop_Le,
    338       Cbinop_Lt,
    339       Cbinop_Ne
    340    }
    341    CfiBinop;
    342 
    343 typedef
    344    enum {
    345       Creg_IA_SP=0x213,
    346       Creg_IA_BP,
    347       Creg_IA_IP,
    348       Creg_ARM_R13,
    349       Creg_ARM_R12,
    350       Creg_ARM_R15,
    351       Creg_ARM_R14,
    352       Creg_ARM64_X30,
    353       Creg_S390_R14,
    354       Creg_MIPS_RA
    355    }
    356    CfiReg;
    357 
    358 typedef
    359    enum {
    360       Cex_Undef=0x123,
    361       Cex_Deref,
    362       Cex_Const,
    363       Cex_Unop,
    364       Cex_Binop,
    365       Cex_CfiReg,
    366       Cex_DwReg
    367    }
    368    CfiExprTag;
    369 
    370 typedef
    371    struct {
    372       CfiExprTag tag;
    373       union {
    374          struct {
    375          } Undef;
    376          struct {
    377             Int ixAddr;
    378          } Deref;
    379          struct {
    380             UWord con;
    381          } Const;
    382          struct {
    383             CfiUnop op;
    384             Int ix;
    385          } Unop;
    386          struct {
    387             CfiBinop op;
    388             Int ixL;
    389             Int ixR;
    390          } Binop;
    391          struct {
    392             CfiReg reg;
    393          } CfiReg;
    394          struct {
    395             Int reg;
    396          } DwReg;
    397       }
    398       Cex;
    399    }
    400    CfiExpr;
    401 
    402 extern Int ML_(CfiExpr_Undef) ( XArray* dst );
    403 extern Int ML_(CfiExpr_Deref) ( XArray* dst, Int ixAddr );
    404 extern Int ML_(CfiExpr_Const) ( XArray* dst, UWord con );
    405 extern Int ML_(CfiExpr_Unop)  ( XArray* dst, CfiUnop op, Int ix );
    406 extern Int ML_(CfiExpr_Binop) ( XArray* dst, CfiBinop op, Int ixL, Int ixR );
    407 extern Int ML_(CfiExpr_CfiReg)( XArray* dst, CfiReg reg );
    408 extern Int ML_(CfiExpr_DwReg) ( XArray* dst, Int reg );
    409 
    410 extern void ML_(ppCfiExpr)( XArray* src, Int ix );
    411 
    412 /* ---------------- FPO INFO (Windows PE) -------------- */
    413 
    414 /* for apps using Wine: MSVC++ PDB FramePointerOmitted: somewhat like
    415    a primitive CFI */
    416 typedef
    417    struct _FPO_DATA {  /* 16 bytes */
    418       UInt   ulOffStart; /* offset of 1st byte of function code */
    419       UInt   cbProcSize; /* # bytes in function */
    420       UInt   cdwLocals;  /* # bytes/4 in locals */
    421       UShort cdwParams;  /* # bytes/4 in params */
    422       UChar  cbProlog;   /* # bytes in prolog */
    423       UChar  cbRegs :3;  /* # regs saved */
    424       UChar  fHasSEH:1;  /* Structured Exception Handling */
    425       UChar  fUseBP :1;  /* EBP has been used */
    426       UChar  reserved:1;
    427       UChar  cbFrame:2;  /* frame type */
    428    }
    429    FPO_DATA;
    430 
    431 #define PDB_FRAME_FPO  0
    432 #define PDB_FRAME_TRAP 1
    433 #define PDB_FRAME_TSS  2
    434 
    435 /* --------------------- VARIABLES --------------------- */
    436 
    437 typedef
    438    struct {
    439       Addr    aMin;
    440       Addr    aMax;
    441       XArray* /* of DiVariable */ vars;
    442    }
    443    DiAddrRange;
    444 
    445 typedef
    446    struct {
    447       HChar* name;  /* in DebugInfo.strchunks */
    448       UWord  typeR; /* a cuOff */
    449       GExpr* gexpr; /* on DebugInfo.gexprs list */
    450       GExpr* fbGX;  /* SHARED. */
    451       HChar* fileName; /* where declared; may be NULL. in
    452                           DebugInfo.strchunks */
    453       Int    lineNo;   /* where declared; may be zero. */
    454    }
    455    DiVariable;
    456 
    457 Word
    458 ML_(cmp_for_DiAddrRange_range) ( const void* keyV, const void* elemV );
    459 
    460 /* --------------------- DEBUGINFO --------------------- */
    461 
    462 /* This is the top-level data type.  It's a structure which contains
    463    information pertaining to one mapped ELF object.  This type is
    464    exported only abstractly - in pub_tool_debuginfo.h. */
    465 
    466 /* First though, here's an auxiliary data structure.  It is only ever
    467    used as part of a struct _DebugInfo.  We use it to record
    468    observations about mappings and permission changes to the
    469    associated file, so as to decide when to read debug info.  It's
    470    essentially an ultra-trivial finite state machine which, when it
    471    reaches an accept state, signals that we should now read debug info
    472    from the object into the associated struct _DebugInfo.  The accept
    473    state is arrived at when have_rx_map and have_rw_map both become
    474    true.  The initial state is one in which we have no observations,
    475    so have_rx_map and have_rw_map are both false.
    476 
    477    This all started as a rather ad-hoc solution, but was further
    478    expanded to handle weird object layouts, e.g. more than one rw
    479    or rx mapping for one binary.
    480 
    481    The normal sequence of events is one of
    482 
    483    start  -->  r-x mapping  -->  rw- mapping  -->  accept
    484    start  -->  rw- mapping  -->  r-x mapping  -->  accept
    485 
    486    that is, take the first r-x and rw- mapping we see, and we're done.
    487 
    488    On MacOSX 10.7, 32-bit, there appears to be a new variant:
    489 
    490    start  -->  r-- mapping  -->  rw- mapping
    491           -->  upgrade r-- mapping to r-x mapping  -->  accept
    492 
    493    where the upgrade is done by a call to vm_protect.  Hence we
    494    need to also track this possibility.
    495 */
    496 
    497 struct _DebugInfoMapping
    498 {
    499    Addr  avma; /* these fields record the file offset, length */
    500    SizeT size; /* and map address of each mapping             */
    501    OffT  foff;
    502    Bool  rx, rw, ro;  /* memory access flags for this mapping */
    503 };
    504 
    505 struct _DebugInfoFSM
    506 {
    507    HChar*  filename;  /* in mallocville (VG_AR_DINFO)               */
    508    XArray* maps;      /* XArray of _DebugInfoMapping structs        */
    509    Bool  have_rx_map; /* did we see a r?x mapping yet for the file? */
    510    Bool  have_rw_map; /* did we see a rw? mapping yet for the file? */
    511    Bool  have_ro_map; /* did we see a r-- mapping yet for the file? */
    512 };
    513 
    514 
    515 /* To do with the string table in struct _DebugInfo (::strchunks) */
    516 #define SEGINFO_STRCHUNKSIZE (64*1024)
    517 
    518 
    519 /* We may encounter more than one .eh_frame section in an object --
    520    unusual but apparently allowed by ELF.  See
    521    http://sourceware.org/bugzilla/show_bug.cgi?id=12675
    522 */
    523 #define N_EHFRAME_SECTS 2
    524 
    525 
    526 /* So, the main structure for holding debug info for one object. */
    527 
    528 struct _DebugInfo {
    529 
    530    /* Admin stuff */
    531 
    532    struct _DebugInfo* next;   /* list of DebugInfos */
    533    Bool               mark;   /* marked for deletion? */
    534 
    535    /* An abstract handle, which can be used by entities outside of
    536       m_debuginfo to (in an abstract datatype sense) refer to this
    537       struct _DebugInfo.  A .handle of zero is invalid; valid handles
    538       are 1 and above.  The same handle is never issued twice (in any
    539       given run of Valgrind), so a handle becomes invalid when the
    540       associated struct _DebugInfo is discarded, and remains invalid
    541       forever thereafter.  The .handle field is set as soon as this
    542       structure is allocated. */
    543    ULong handle;
    544 
    545    /* Used for debugging only - indicate what stuff to dump whilst
    546       reading stuff into the seginfo.  Are computed as early in the
    547       lifetime of the DebugInfo as possible -- at the point when it is
    548       created.  Use these when deciding what to spew out; do not use
    549       the global VG_(clo_blah) flags. */
    550 
    551    Bool trace_symtab; /* symbols, our style */
    552    Bool trace_cfi;    /* dwarf frame unwind, our style */
    553    Bool ddump_syms;   /* mimic /usr/bin/readelf --syms */
    554    Bool ddump_line;   /* mimic /usr/bin/readelf --debug-dump=line */
    555    Bool ddump_frames; /* mimic /usr/bin/readelf --debug-dump=frames */
    556 
    557    /* The "decide when it is time to read debuginfo" state machine.
    558       This structure must get filled in before we can start reading
    559       anything from the ELF/MachO file.  This structure is filled in
    560       by VG_(di_notify_mmap) and its immediate helpers. */
    561    struct _DebugInfoFSM fsm;
    562 
    563    /* Once the ::fsm has reached an accept state -- typically, when
    564       both a rw? and r?x mapping for .filename have been observed --
    565       we can go on to read the symbol tables and debug info.
    566       .have_dinfo changes from False to True when the debug info has
    567       been completely read in and postprocessed (canonicalised) and is
    568       now suitable for querying. */
    569    /* If have_dinfo is False, then all fields below this point are
    570       invalid and should not be consulted. */
    571    Bool  have_dinfo; /* initially False */
    572 
    573    /* All the rest of the fields in this structure are filled in once
    574       we have committed to reading the symbols and debug info (that
    575       is, at the point where .have_dinfo is set to True). */
    576 
    577    /* The file's soname. */
    578    HChar* soname;
    579 
    580    /* Description of some important mapped segments.  The presence or
    581       absence of the mapping is denoted by the _present field, since
    582       in some obscure circumstances (to do with data/sdata/bss) it is
    583       possible for the mapping to be present but have zero size.
    584       Certainly text_ is mandatory on all platforms; not sure about
    585       the rest though.
    586 
    587       --------------------------------------------------------
    588 
    589       Comment_on_IMPORTANT_CFSI_REPRESENTATIONAL_INVARIANTS: we require that
    590 
    591       either (size of all rx maps == 0 && cfsi == NULL) (the degenerate case)
    592 
    593       or the normal case, which is the AND of the following:
    594       (0) size of at least one rx mapping > 0
    595       (1) no two DebugInfos with some rx mapping of size > 0
    596           have overlapping rx mappings
    597       (2) [cfsi_minavma,cfsi_maxavma] does not extend beyond
    598           [avma,+size) of one rx mapping; that is, the former
    599           is a subrange or equal to the latter.
    600       (3) all DiCfSI in the cfsi array all have ranges that fall within
    601           [avma,+size) of that rx mapping.
    602       (4) all DiCfSI in the cfsi array are non-overlapping
    603 
    604       The cumulative effect of these restrictions is to ensure that
    605       all the DiCfSI records in the entire system are non overlapping.
    606       Hence any address falls into either exactly one DiCfSI record,
    607       or none.  Hence it is safe to cache the results of searches for
    608       DiCfSI records.  This is the whole point of these restrictions.
    609       The caching of DiCfSI searches is done in VG_(use_CF_info).  The
    610       cache is flushed after any change to debugInfo_list.  DiCfSI
    611       searches are cached because they are central to stack unwinding
    612       on amd64-linux.
    613 
    614       Where are these invariants imposed and checked?
    615 
    616       They are checked after a successful read of debuginfo into
    617       a DebugInfo*, in check_CFSI_related_invariants.
    618 
    619       (1) is not really imposed anywhere.  We simply assume that the
    620       kernel will not map the text segments from two different objects
    621       into the same space.  Sounds reasonable.
    622 
    623       (2) follows from (4) and (3).  It is ensured by canonicaliseCFI.
    624       (3) is ensured by ML_(addDiCfSI).
    625       (4) is ensured by canonicaliseCFI.
    626 
    627       --------------------------------------------------------
    628 
    629       Comment_on_DEBUG_SVMA_and_DEBUG_BIAS_fields:
    630 
    631       The _debug_{svma,bias} fields were added as part of a fix to
    632       #185816.  The problem encompassed in that bug report was that it
    633       wasn't correct to use apply the bias values deduced for a
    634       primary object to its associated debuginfo object, because the
    635       debuginfo object (or the primary) could have been prelinked to a
    636       different SVMA.  Hence debuginfo and primary objects need to
    637       have their own biases.
    638 
    639       ------ JRS: (referring to r9329): ------
    640       Let me see if I understand the workings correctly.  Initially
    641       the _debug_ values are set to the same values as the "normal"
    642       ones, as there's a bunch of bits of code like this (in
    643       readelf.c)
    644 
    645          di->text_svma = svma;
    646          ...
    647          di->text_bias = rx_bias;
    648          di->text_debug_svma = svma;
    649          di->text_debug_bias = rx_bias;
    650 
    651       If a debuginfo object subsequently shows up then the
    652       _debug_svma/bias are set for the debuginfo object.  Result is
    653       that if there's no debuginfo object then the values are the same
    654       as the primary-object values, and if there is a debuginfo object
    655       then they will (or at least may) be different.
    656 
    657       Then when we need to actually bias something, we'll have to
    658       decide whether to use the primary bias or the debuginfo bias.
    659       And the strategy is to use the primary bias for ELF symbols but
    660       the debuginfo bias for anything pulled out of Dwarf.
    661 
    662       ------ THH: ------
    663       Correct - the debug_svma and bias values apply to any address
    664       read from the debug data regardless of where that debug data is
    665       stored and the other values are used for addresses from other
    666       places (primarily the symbol table).
    667 
    668       ------ JRS: ------
    669       Ok; so this was my only area of concern.  Are there any
    670       corner-case scenarios where this wouldn't be right?  It sounds
    671       like we're assuming the ELF symbols come from the primary object
    672       and, if there is a debug object, then all the Dwarf comes from
    673       there.  But what if (eg) both symbols and Dwarf come from the
    674       debug object?  Is that even possible or allowable?
    675 
    676       ------ THH: ------
    677       You may have a point...
    678 
    679       The current logic is to try and take any one set of data from
    680       either the base object or the debug object. There are four sets
    681       of data we consider:
    682 
    683          - Symbol Table
    684          - Stabs
    685          - DWARF1
    686          - DWARF2
    687 
    688       If we see the primary section for a given set in the base object
    689       then we ignore all sections relating to that set in the debug
    690       object.
    691 
    692       Now in principle if we saw a secondary section (like debug_line
    693       say) in the base object, but not the main section (debug_info in
    694       this case) then we would take debug_info from the debug object
    695       but would use the debug_line from the base object unless we saw
    696       a replacement copy in the debug object. That's probably unlikely
    697       however.
    698 
    699       A bigger issue might be, as you say, the symbol table as we will
    700       pick that up from the debug object if it isn't in the base. The
    701       dynamic symbol table will always have to be in the base object
    702       though so we will have to be careful when processing symbols to
    703       know which table we are reading in that case.
    704 
    705       What we probably need to do is tell read_elf_symtab which object
    706       the symbols it is being asked to read came from.
    707 
    708       (A followup patch to deal with this was committed in r9469).
    709    */
    710    /* .text */
    711    Bool     text_present;
    712    Addr     text_avma;
    713    Addr     text_svma;
    714    SizeT    text_size;
    715    PtrdiffT text_bias;
    716    Addr     text_debug_svma;
    717    PtrdiffT text_debug_bias;
    718    /* .data */
    719    Bool     data_present;
    720    Addr     data_svma;
    721    Addr     data_avma;
    722    SizeT    data_size;
    723    PtrdiffT data_bias;
    724    Addr     data_debug_svma;
    725    PtrdiffT data_debug_bias;
    726    /* .sdata */
    727    Bool     sdata_present;
    728    Addr     sdata_svma;
    729    Addr     sdata_avma;
    730    SizeT    sdata_size;
    731    PtrdiffT sdata_bias;
    732    Addr     sdata_debug_svma;
    733    PtrdiffT sdata_debug_bias;
    734    /* .rodata */
    735    Bool     rodata_present;
    736    Addr     rodata_svma;
    737    Addr     rodata_avma;
    738    SizeT    rodata_size;
    739    PtrdiffT rodata_bias;
    740    Addr     rodata_debug_svma;
    741    PtrdiffT rodata_debug_bias;
    742    /* .bss */
    743    Bool     bss_present;
    744    Addr     bss_svma;
    745    Addr     bss_avma;
    746    SizeT    bss_size;
    747    PtrdiffT bss_bias;
    748    Addr     bss_debug_svma;
    749    PtrdiffT bss_debug_bias;
    750    /* .sbss */
    751    Bool     sbss_present;
    752    Addr     sbss_svma;
    753    Addr     sbss_avma;
    754    SizeT    sbss_size;
    755    PtrdiffT sbss_bias;
    756    Addr     sbss_debug_svma;
    757    PtrdiffT sbss_debug_bias;
    758    /* .plt */
    759    Bool   plt_present;
    760    Addr	  plt_avma;
    761    SizeT  plt_size;
    762    /* .got */
    763    Bool   got_present;
    764    Addr   got_avma;
    765    SizeT  got_size;
    766    /* .got.plt */
    767    Bool   gotplt_present;
    768    Addr   gotplt_avma;
    769    SizeT  gotplt_size;
    770    /* .opd -- needed on ppc64-linux for finding symbols */
    771    Bool   opd_present;
    772    Addr   opd_avma;
    773    SizeT  opd_size;
    774    /* .ehframe -- needed on amd64-linux for stack unwinding.  We might
    775       see more than one, hence the arrays. */
    776    UInt   n_ehframe;  /* 0 .. N_EHFRAME_SECTS */
    777    Addr   ehframe_avma[N_EHFRAME_SECTS];
    778    SizeT  ehframe_size[N_EHFRAME_SECTS];
    779 
    780    /* Sorted tables of stuff we snarfed from the file.  This is the
    781       eventual product of reading the debug info.  All this stuff
    782       lives in VG_AR_DINFO. */
    783 
    784    /* An expandable array of symbols. */
    785    DiSym*  symtab;
    786    UWord   symtab_used;
    787    UWord   symtab_size;
    788    /* An expandable array of locations. */
    789    DiLoc*  loctab;
    790    UWord   loctab_used;
    791    UWord   loctab_size;
    792    /* An expandable array of CFI summary info records.  Also includes
    793       summary address bounds, showing the min and max address covered
    794       by any of the records, as an aid to fast searching.  And, if the
    795       records require any expression nodes, they are stored in
    796       cfsi_exprs. */
    797    DiCfSI* cfsi;
    798    UWord   cfsi_used;
    799    UWord   cfsi_size;
    800    Addr    cfsi_minavma;
    801    Addr    cfsi_maxavma;
    802    XArray* cfsi_exprs; /* XArray of CfiExpr */
    803 
    804    /* Optimized code under Wine x86: MSVC++ PDB FramePointerOmitted
    805       data.  Non-expandable array, hence .size == .used. */
    806    FPO_DATA* fpo;
    807    UWord     fpo_size;
    808    Addr      fpo_minavma;
    809    Addr      fpo_maxavma;
    810    Addr      fpo_base_avma;
    811 
    812    /* Expandable arrays of characters -- the string table.  Pointers
    813       into this are stable (the arrays are not reallocated). */
    814    struct strchunk {
    815       UInt   strtab_used;
    816       struct strchunk* next;
    817       HChar  strtab[SEGINFO_STRCHUNKSIZE];
    818    } *strchunks;
    819 
    820    /* Variable scope information, as harvested from Dwarf3 files.
    821 
    822       In short it's an
    823 
    824          array of (array of PC address ranges and variables)
    825 
    826       The outer array indexes over scopes, with Entry 0 containing
    827       information on variables which exist for any value of the program
    828       counter (PC) -- that is, the outermost scope.  Entries 1, 2, 3,
    829       etc contain information on increasinly deeply nested variables.
    830 
    831       Each inner array is an array of (an address range, and a set
    832       of variables that are in scope over that address range).
    833 
    834       The address ranges may not overlap.
    835 
    836       Since Entry 0 in the outer array holds information on variables
    837       that exist for any value of the PC (that is, global vars), it
    838       follows that Entry 0's inner array can only have one address
    839       range pair, one that covers the entire address space.
    840    */
    841    XArray* /* of OSet of DiAddrRange */varinfo;
    842 
    843    /* These are arrays of the relevant typed objects, held here
    844       partially for the purposes of visiting each object exactly once
    845       when we need to delete them. */
    846 
    847    /* An array of TyEnts.  These are needed to make sense of any types
    848       in the .varinfo.  Also, when deleting this DebugInfo, we must
    849       first traverse this array and throw away malloc'd stuff hanging
    850       off it -- by calling ML_(TyEnt__make_EMPTY) on each entry. */
    851    XArray* /* of TyEnt */ admin_tyents;
    852 
    853    /* An array of guarded DWARF3 expressions. */
    854    XArray* admin_gexprs;
    855 
    856    /* Cached last rx mapping matched and returned by ML_(find_rx_mapping).
    857       This helps performance a lot during ML_(addLineInfo) etc., which can
    858       easily be invoked hundreds of thousands of times. */
    859    struct _DebugInfoMapping* last_rx_map;
    860 };
    861 
    862 /* --------------------- functions --------------------- */
    863 
    864 /* ------ Adding ------ */
    865 
    866 /* Add a symbol to si's symbol table.  The contents of 'sym' are
    867    copied.  It is assumed (and checked) that 'sym' only contains one
    868    name, so there is no auxiliary ::sec_names vector to duplicate.
    869    IOW, the copy is a shallow copy, and there are assertions in place
    870    to ensure that's OK. */
    871 extern void ML_(addSym) ( struct _DebugInfo* di, DiSym* sym );
    872 
    873 /* Add a line-number record to a DebugInfo. */
    874 extern
    875 void ML_(addLineInfo) ( struct _DebugInfo* di,
    876                         const HChar* filename,
    877                         const HChar* dirname,  /* NULL is allowable */
    878                         Addr this, Addr next, Int lineno, Int entry);
    879 
    880 /* Add a CFI summary record.  The supplied DiCfSI is copied. */
    881 extern void ML_(addDiCfSI) ( struct _DebugInfo* di, DiCfSI* cfsi );
    882 
    883 /* Add a string to the string table of a DebugInfo.  If len==-1,
    884    ML_(addStr) will itself measure the length of the string. */
    885 extern HChar* ML_(addStr) ( struct _DebugInfo* di, const HChar* str, Int len );
    886 
    887 /* Add a string to the string table of a DebugInfo, by copying the
    888    string from the given DiCursor.  Measures the length of the string
    889    itself. */
    890 extern HChar* ML_(addStrFromCursor)( struct _DebugInfo* di, DiCursor c );
    891 
    892 extern void ML_(addVar)( struct _DebugInfo* di,
    893                          Int    level,
    894                          Addr   aMin,
    895                          Addr   aMax,
    896                          HChar* name,
    897                          UWord  typeR, /* a cuOff */
    898                          GExpr* gexpr,
    899                          GExpr* fbGX, /* SHARED. */
    900                          HChar* fileName, /* where decl'd - may be NULL */
    901                          Int    lineNo, /* where decl'd - may be zero */
    902                          Bool   show );
    903 
    904 /* Canonicalise the tables held by 'di', in preparation for use.  Call
    905    this after finishing adding entries to these tables. */
    906 extern void ML_(canonicaliseTables) ( struct _DebugInfo* di );
    907 
    908 /* Canonicalise the call-frame-info table held by 'di', in preparation
    909    for use. This is called by ML_(canonicaliseTables) but can also be
    910    called on it's own to sort just this table. */
    911 extern void ML_(canonicaliseCFI) ( struct _DebugInfo* di );
    912 
    913 /* ------ Searching ------ */
    914 
    915 /* Find a symbol-table index containing the specified pointer, or -1
    916    if not found.  Binary search.  */
    917 extern Word ML_(search_one_symtab) ( struct _DebugInfo* di, Addr ptr,
    918                                      Bool match_anywhere_in_sym,
    919                                      Bool findText );
    920 
    921 /* Find a location-table index containing the specified pointer, or -1
    922    if not found.  Binary search.  */
    923 extern Word ML_(search_one_loctab) ( struct _DebugInfo* di, Addr ptr );
    924 
    925 /* Find a CFI-table index containing the specified pointer, or -1 if
    926    not found.  Binary search.  */
    927 extern Word ML_(search_one_cfitab) ( struct _DebugInfo* di, Addr ptr );
    928 
    929 /* Find a FPO-table index containing the specified pointer, or -1
    930    if not found.  Binary search.  */
    931 extern Word ML_(search_one_fpotab) ( struct _DebugInfo* di, Addr ptr );
    932 
    933 /* Helper function for the most often needed searching for an rx
    934    mapping containing the specified address range.  The range must
    935    fall entirely within the mapping to be considered to be within it.
    936    Asserts if lo > hi; caller must ensure this doesn't happen. */
    937 extern struct _DebugInfoMapping* ML_(find_rx_mapping) ( struct _DebugInfo* di,
    938                                                         Addr lo, Addr hi );
    939 
    940 /* ------ Misc ------ */
    941 
    942 /* Show a non-fatal debug info reading error.  Use vg_panic if
    943    terminal.  'serious' errors are always shown, not 'serious' ones
    944    are shown only at verbosity level 2 and above. */
    945 extern
    946 void ML_(symerr) ( struct _DebugInfo* di, Bool serious, const HChar* msg );
    947 
    948 /* Print a symbol. */
    949 extern void ML_(ppSym) ( Int idx, DiSym* sym );
    950 
    951 /* Print a call-frame-info summary. */
    952 extern void ML_(ppDiCfSI) ( XArray* /* of CfiExpr */ exprs, DiCfSI* si );
    953 
    954 
    955 #define TRACE_SYMTAB_ENABLED (di->trace_symtab)
    956 #define TRACE_SYMTAB(format, args...) \
    957    if (TRACE_SYMTAB_ENABLED) { VG_(printf)(format, ## args); }
    958 
    959 
    960 #endif /* ndef __PRIV_STORAGE_H */
    961 
    962 /*--------------------------------------------------------------------*/
    963 /*--- end                                                          ---*/
    964 /*--------------------------------------------------------------------*/
    965