Home | History | Annotate | Download | only in m_demangle
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- Demangling of C++ mangled names.                  demangle.c ---*/
      4 /*--------------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2000-2015 Julian Seward
     11       jseward (at) acm.org
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     26    02111-1307, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 */
     30 
     31 #include "pub_core_basics.h"
     32 #include "pub_core_demangle.h"
     33 #include "pub_core_libcassert.h"
     34 #include "pub_core_libcbase.h"
     35 #include "pub_core_libcprint.h"
     36 #include "pub_core_mallocfree.h"
     37 #include "pub_core_options.h"
     38 
     39 #include "vg_libciface.h"
     40 #include "demangle.h"
     41 
     42 /* The demangler's job is to take a raw symbol name and turn it into
     43    something a Human Bean can understand.  There are two levels of
     44    mangling.
     45 
     46    1. First, C++ names are mangled by the compiler.  So we'll have to
     47       undo that.
     48 
     49    2. Optionally, in relatively rare cases, the resulting name is then
     50       itself encoded using Z-escaping (see pub_core_redir.h) so as to
     51       become part of a redirect-specification.
     52 
     53    Therefore, VG_(demangle) first tries to undo (2).  If successful,
     54    the soname part is discarded (humans don't want to see that).
     55    Then, it tries to undo (1) (using demangling code from GNU/FSF).
     56 
     57    Finally, change the name of all symbols which are known to be
     58    functions below main() to "(below main)".  This helps reduce
     59    variability of stack traces, something which has been a problem for
     60    the testsuite for a long time.
     61 
     62    --------
     63    If do_cxx_demangle == True, does all the above stages:
     64    - undo (2) [Z-encoding]
     65    - undo (1) [C++ mangling]
     66    - do the below-main hack
     67 
     68    If do_cxx_demangle == False, the middle stage is skipped:
     69    - undo (2) [Z-encoding]
     70    - do the below-main hack
     71 */
     72 
     73 /* Note that the C++ demangler is from GNU libiberty and is almost
     74    completely unmodified.  We use vg_libciface.h as a way to
     75    impedance-match the libiberty code into our own framework.
     76 
     77    The libiberty code included here was taken from the GCC repository
     78    and is released under the LGPL 2.1 license, which AFAICT is compatible
     79    with "GPL 2 or later" and so is OK for inclusion in Valgrind.
     80 
     81    To update to a newer libiberty, use the "update-demangler" script
     82    which is included in the valgrind repository. */
     83 
     84 /* This is the main, standard demangler entry point. */
     85 
     86 /* Upon return, *RESULT will point to the demangled name.
     87    The memory buffer that holds the demangled name is allocated on the
     88    heap and will be deallocated in the next invocation. Conceptually,
     89    that buffer is owned by VG_(demangle). That means two things:
     90    (1) Users of VG_(demangle) must not free that buffer.
     91    (2) If the demangled name needs to be stashed away for later use,
     92        the contents of the buffer needs to be copied. It is not sufficient
     93        to just store the pointer as it will point to deallocated memory
     94        after the next VG_(demangle) invocation. */
     95 void VG_(demangle) ( Bool do_cxx_demangling, Bool do_z_demangling,
     96                      /* IN */  const HChar  *orig,
     97                      /* OUT */ const HChar **result )
     98 {
     99    /* Possibly undo (2) */
    100    /* Z-Demangling was requested.
    101       The fastest way to see if it's a Z-mangled name is just to attempt
    102       to Z-demangle it (with NULL for the soname buffer, since we're not
    103       interested in that). */
    104    if (do_z_demangling) {
    105       const HChar *z_demangled;
    106 
    107       if (VG_(maybe_Z_demangle)( orig, NULL, /*soname*/
    108                                  &z_demangled, NULL, NULL, NULL )) {
    109          orig = z_demangled;
    110       }
    111    }
    112 
    113    /* Possibly undo (1) */
    114    if (do_cxx_demangling && VG_(clo_demangle)) {
    115       static HChar* demangled = NULL;
    116 
    117       /* Free up previously demangled name */
    118       if (demangled) VG_(arena_free) (VG_AR_DEMANGLE, demangled);
    119 
    120       demangled = ML_(cplus_demangle) ( orig, DMGL_ANSI | DMGL_PARAMS );
    121 
    122       *result = (demangled == NULL) ? orig : demangled;
    123    } else {
    124       *result = orig;
    125    }
    126 
    127    // 13 Mar 2005: We used to check here that the demangler wasn't leaking
    128    // by calling the (now-removed) function VG_(is_empty_arena)().  But,
    129    // very rarely (ie. I've heard of it twice in 3 years), the demangler
    130    // does leak.  But, we can't do much about it, and it's not a disaster,
    131    // so we just let it slide without aborting or telling the user.
    132 }
    133 
    134 
    135 /*------------------------------------------------------------*/
    136 /*--- DEMANGLE Z-ENCODED NAMES                             ---*/
    137 /*------------------------------------------------------------*/
    138 
    139 /* Demangle a Z-encoded name as described in pub_tool_redir.h.
    140    Z-encoded names are used by Valgrind for doing function
    141    interception/wrapping.
    142 
    143    Demangle 'sym' into its soname and fnname parts, putting them in
    144    the specified buffers.  Returns a Bool indicating whether the
    145    demangled failed or not.  A failure can occur because the prefix
    146    isn't recognised, the internal Z-escaping is wrong, or because one
    147    or the other (or both) of the output buffers becomes full.  Passing
    148    'so' as NULL is acceptable if the caller is only interested in the
    149    function name part. */
    150 
    151 Bool VG_(maybe_Z_demangle) ( const HChar* sym,
    152                              /*OUT*/const HChar** so,
    153                              /*OUT*/const HChar** fn,
    154                              /*OUT*/Bool* isWrap,
    155                              /*OUT*/Int*  eclassTag,
    156                              /*OUT*/Int*  eclassPrio )
    157 {
    158    static HChar *sobuf;
    159    static HChar *fnbuf;
    160    static SizeT  buf_len = 0;
    161 
    162    /* The length of the name after undoing Z-encoding is always smaller
    163       than the mangled name. Making the soname and fnname buffers as large
    164       as the demangled name is therefore always safe and overflow can never
    165       occur. */
    166    SizeT len = VG_(strlen)(sym) + 1;
    167 
    168    if (buf_len < len) {
    169       sobuf = VG_(arena_realloc)(VG_AR_DEMANGLE, "Z-demangle", sobuf, len);
    170       fnbuf = VG_(arena_realloc)(VG_AR_DEMANGLE, "Z-demangle", fnbuf, len);
    171       buf_len = len;
    172    }
    173    sobuf[0] = fnbuf[0] = '\0';
    174 
    175    if (so)
    176      *so = sobuf;
    177    *fn = fnbuf;
    178 
    179 #  define EMITSO(ch)                           \
    180       do {                                     \
    181          if (so) {                             \
    182             sobuf[soi++] = ch; sobuf[soi] = 0; \
    183          }                                     \
    184       } while (0)
    185 #  define EMITFN(ch)                           \
    186       do {                                     \
    187          fnbuf[fni++] = ch; fnbuf[fni] = 0;    \
    188       } while (0)
    189 
    190    Bool error, valid, fn_is_encoded, is_VG_Z_prefixed;
    191    Int  soi, fni, i;
    192 
    193    error = False;
    194    soi = 0;
    195    fni = 0;
    196 
    197    valid =     sym[0] == '_'
    198            &&  sym[1] == 'v'
    199            &&  sym[2] == 'g'
    200            && (sym[3] == 'r' || sym[3] == 'w')
    201            &&  VG_(isdigit)(sym[4])
    202            &&  VG_(isdigit)(sym[5])
    203            &&  VG_(isdigit)(sym[6])
    204            &&  VG_(isdigit)(sym[7])
    205            &&  VG_(isdigit)(sym[8])
    206            &&  sym[9] == 'Z'
    207            && (sym[10] == 'Z' || sym[10] == 'U')
    208            &&  sym[11] == '_';
    209 
    210    if (valid
    211        && sym[4] == '0' && sym[5] == '0' && sym[6] == '0' && sym[7] == '0'
    212        && sym[8] != '0') {
    213       /* If the eclass tag is 0000 (meaning "no eclass"), the priority
    214          must be 0 too. */
    215       valid = False;
    216    }
    217 
    218    if (!valid)
    219       return False;
    220 
    221    fn_is_encoded = sym[10] == 'Z';
    222 
    223    if (isWrap)
    224       *isWrap = sym[3] == 'w';
    225 
    226    if (eclassTag) {
    227       *eclassTag =    1000 * ((Int)sym[4] - '0')
    228                    +  100 * ((Int)sym[5] - '0')
    229                    +  10 * ((Int)sym[6] - '0')
    230                    +  1 * ((Int)sym[7] - '0');
    231       vg_assert(*eclassTag >= 0 && *eclassTag <= 9999);
    232    }
    233 
    234    if (eclassPrio) {
    235       *eclassPrio = ((Int)sym[8]) - '0';
    236       vg_assert(*eclassPrio >= 0 && *eclassPrio <= 9);
    237    }
    238 
    239    /* Now check the soname prefix isn't "VG_Z_", as described in
    240       pub_tool_redir.h. */
    241    is_VG_Z_prefixed =
    242       sym[12] == 'V' &&
    243       sym[13] == 'G' &&
    244       sym[14] == '_' &&
    245       sym[15] == 'Z' &&
    246       sym[16] == '_';
    247    if (is_VG_Z_prefixed) {
    248       vg_assert2(0, "symbol with a 'VG_Z_' prefix: %s.\n"
    249                     "see pub_tool_redir.h for an explanation.", sym);
    250    }
    251 
    252    /* Now scan the Z-encoded soname. */
    253    i = 12;
    254    while (True) {
    255 
    256       if (sym[i] == '_')
    257       /* Found the delimiter.  Move on to the fnname loop. */
    258          break;
    259 
    260       if (sym[i] == 0) {
    261          error = True;
    262          goto out;
    263       }
    264 
    265       if (sym[i] != 'Z') {
    266          EMITSO(sym[i]);
    267          i++;
    268          continue;
    269       }
    270 
    271       /* We've got a Z-escape. */
    272       i++;
    273       switch (sym[i]) {
    274          case 'a': EMITSO('*'); break;
    275          case 'c': EMITSO(':'); break;
    276          case 'd': EMITSO('.'); break;
    277          case 'h': EMITSO('-'); break;
    278          case 'p': EMITSO('+'); break;
    279          case 's': EMITSO(' '); break;
    280          case 'u': EMITSO('_'); break;
    281          case 'A': EMITSO('@'); break;
    282          case 'D': EMITSO('$'); break;
    283          case 'L': EMITSO('('); break;
    284          case 'R': EMITSO(')'); break;
    285          case 'S': EMITSO('/'); break;
    286          case 'Z': EMITSO('Z'); break;
    287          default: error = True; goto out;
    288       }
    289       i++;
    290    }
    291 
    292    vg_assert(sym[i] == '_');
    293    i++;
    294 
    295    /* Now deal with the function name part. */
    296    if (!fn_is_encoded) {
    297 
    298       /* simple; just copy. */
    299       while (True) {
    300          if (sym[i] == 0)
    301             break;
    302          EMITFN(sym[i]);
    303          i++;
    304       }
    305       goto out;
    306 
    307    }
    308 
    309    /* else use a Z-decoding loop like with soname */
    310    while (True) {
    311 
    312       if (sym[i] == 0)
    313          break;
    314 
    315       if (sym[i] != 'Z') {
    316          EMITFN(sym[i]);
    317          i++;
    318          continue;
    319       }
    320 
    321       /* We've got a Z-escape. */
    322       i++;
    323       switch (sym[i]) {
    324          case 'a': EMITFN('*'); break;
    325          case 'c': EMITFN(':'); break;
    326          case 'd': EMITFN('.'); break;
    327          case 'h': EMITFN('-'); break;
    328          case 'p': EMITFN('+'); break;
    329          case 's': EMITFN(' '); break;
    330          case 'u': EMITFN('_'); break;
    331          case 'A': EMITFN('@'); break;
    332          case 'D': EMITFN('$'); break;
    333          case 'L': EMITFN('('); break;
    334          case 'R': EMITFN(')'); break;
    335          case 'Z': EMITFN('Z'); break;
    336          default: error = True; goto out;
    337       }
    338       i++;
    339    }
    340 
    341   out:
    342    EMITSO(0);
    343    EMITFN(0);
    344 
    345    if (error) {
    346       /* Something's wrong.  Give up. */
    347       VG_(message)(Vg_UserMsg,
    348                    "m_demangle: error Z-demangling: %s\n", sym);
    349       return False;
    350    }
    351 
    352    return True;
    353 }
    354 
    355 
    356 /*--------------------------------------------------------------------*/
    357 /*--- end                                                          ---*/
    358 /*--------------------------------------------------------------------*/
    359