Home | History | Annotate | Download | only in m_demangle
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- Demangling of C++ mangled names.                  demangle.c ---*/
      4 /*--------------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2000-2017 Julian Seward
     11       jseward (at) acm.org
     12 
     13    Rust demangler components are
     14    Copyright (C) 2016-2016 David Tolnay
     15       dtolnay (at) gmail.com
     16 
     17    This program is free software; you can redistribute it and/or
     18    modify it under the terms of the GNU General Public License as
     19    published by the Free Software Foundation; either version 2 of the
     20    License, or (at your option) any later version.
     21 
     22    This program is distributed in the hope that it will be useful, but
     23    WITHOUT ANY WARRANTY; without even the implied warranty of
     24    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     25    General Public License for more details.
     26 
     27    You should have received a copy of the GNU General Public License
     28    along with this program; if not, write to the Free Software
     29    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     30    02111-1307, USA.
     31 
     32    The GNU General Public License is contained in the file COPYING.
     33 */
     34 
     35 #include "pub_core_basics.h"
     36 #include "pub_core_demangle.h"
     37 #include "pub_core_libcassert.h"
     38 #include "pub_core_libcbase.h"
     39 #include "pub_core_libcprint.h"
     40 #include "pub_core_mallocfree.h"
     41 #include "pub_core_options.h"
     42 
     43 #include "vg_libciface.h"
     44 #include "demangle.h"
     45 
     46 
     47 /*------------------------------------------------------------*/
     48 /*---                                                      ---*/
     49 /*------------------------------------------------------------*/
     50 
     51 /* The demangler's job is to take a raw symbol name and turn it into
     52    something a Human Bean can understand.  Our mangling model
     53    comprises a three stage pipeline.  Mangling pushes names forward
     54    through the pipeline (0, then 1, then 2) and demangling is
     55    obviously the reverse.  In practice it is highly unlikely that a
     56    name would require all stages, but it is not impossible either.
     57 
     58    0. If we're working with Rust, Rust names are lightly mangled by
     59       the Rust front end.
     60 
     61    1. Then the name is subject to standard C++ mangling.
     62 
     63    2. Optionally, in relatively rare cases, the resulting name is then
     64       itself encoded using Z-escaping (see pub_core_redir.h) so as to
     65       become part of a redirect-specification.
     66 
     67    Therefore, VG_(demangle) first tries to undo (2).  If successful,
     68    the soname part is discarded (humans don't want to see that).
     69    Then, it tries to undo (1) (using demangling code from GNU/FSF) and
     70    finally it tries to undo (0).
     71 
     72    Finally, it changes the name of all symbols which are known to be
     73    functions below main() to "(below main)".  This helps reduce
     74    variability of stack traces, something which has been a problem for
     75    the testsuite for a long time.
     76 
     77    --------
     78    If do_cxx_demangle == True, it does all the above stages:
     79    - undo (2) [Z-encoding]
     80    - undo (1) [C++ mangling]
     81    - if (1) succeeds, undo (0) [Rust mangling]
     82    - do the below-main hack
     83 
     84    Rust demangling (0) is only done if C++ demangling (1) succeeds
     85    because Rust demangling is performed in-place, and it is difficult
     86    to prove that we "own" the storage -- hence, that the in-place
     87    operation is safe -- unless it is clear that it has come from the
     88    C++ demangler, which returns its output in a heap-allocated buffer
     89    which we can be sure we own.  In practice (Nov 2016) this does not
     90    seem to be a problem, since the Rust compiler appears to apply C++
     91    mangling after Rust mangling, so we never encounter symbols that
     92    require Rust demangling but not C++ demangling.
     93 
     94    If do_cxx_demangle == False, the C++ and Rust stags are skipped:
     95    - undo (2) [Z-encoding]
     96    - do the below-main hack
     97 */
     98 
     99 /* Note that the C++ demangler is from GNU libiberty and is almost
    100    completely unmodified.  We use vg_libciface.h as a way to
    101    impedance-match the libiberty code into our own framework.
    102 
    103    The libiberty code included here was taken from the GCC repository
    104    and is released under the LGPL 2.1 license, which AFAICT is compatible
    105    with "GPL 2 or later" and so is OK for inclusion in Valgrind.
    106 
    107    To update to a newer libiberty, use the "update-demangler" script
    108    which is included in the valgrind repository. */
    109 
    110 /* This is the main, standard demangler entry point. */
    111 
    112 /* Upon return, *RESULT will point to the demangled name.
    113    The memory buffer that holds the demangled name is allocated on the
    114    heap and will be deallocated in the next invocation. Conceptually,
    115    that buffer is owned by VG_(demangle). That means two things:
    116    (1) Users of VG_(demangle) must not free that buffer.
    117    (2) If the demangled name needs to be stashed away for later use,
    118        the contents of the buffer need to be copied. It is not sufficient
    119        to just store the pointer as it will point to deallocated memory
    120        after the next VG_(demangle) invocation. */
    121 void VG_(demangle) ( Bool do_cxx_demangling, Bool do_z_demangling,
    122                      /* IN */  const HChar  *orig,
    123                      /* OUT */ const HChar **result )
    124 {
    125    /* Possibly undo (2) */
    126    /* Z-Demangling was requested.
    127       The fastest way to see if it's a Z-mangled name is just to attempt
    128       to Z-demangle it (with NULL for the soname buffer, since we're not
    129       interested in that). */
    130    if (do_z_demangling) {
    131       const HChar *z_demangled;
    132 
    133       if (VG_(maybe_Z_demangle)( orig, NULL, /*soname*/
    134                                  &z_demangled, NULL, NULL, NULL )) {
    135          orig = z_demangled;
    136       }
    137    }
    138 
    139    /* Possibly undo (1) */
    140    if (do_cxx_demangling && VG_(clo_demangle)
    141        && orig != NULL && orig[0] == '_' && orig[1] == 'Z') {
    142       /* !!! vvv STATIC vvv !!! */
    143       static HChar* demangled = NULL;
    144       /* !!! ^^^ STATIC ^^^ !!! */
    145 
    146       /* Free up previously demangled name */
    147       if (demangled) {
    148          VG_(arena_free) (VG_AR_DEMANGLE, demangled);
    149          demangled = NULL;
    150       }
    151       demangled = ML_(cplus_demangle) ( orig, DMGL_ANSI | DMGL_PARAMS );
    152 
    153       *result = (demangled == NULL) ? orig : demangled;
    154 
    155       if (demangled) {
    156          /* Possibly undo (0).  This is the only place where it is
    157             safe, from a storage management perspective, to
    158             Rust-demangle the symbol.  That's because Rust demangling
    159             happens in place, so we need to be sure that the storage
    160             it is happening in is actually owned by us, and non-const.
    161             In this case, the value returned by ML_(cplus_demangle)
    162             does have that property. */
    163          if (rust_is_mangled(demangled)) {
    164             rust_demangle_sym(demangled);
    165          }
    166          *result = demangled;
    167       } else {
    168          *result = orig;
    169       }
    170 
    171    } else {
    172       *result = orig;
    173    }
    174 
    175    // 13 Mar 2005: We used to check here that the demangler wasn't leaking
    176    // by calling the (now-removed) function VG_(is_empty_arena)().  But,
    177    // very rarely (ie. I've heard of it twice in 3 years), the demangler
    178    // does leak.  But, we can't do much about it, and it's not a disaster,
    179    // so we just let it slide without aborting or telling the user.
    180 }
    181 
    182 
    183 /*------------------------------------------------------------*/
    184 /*--- DEMANGLE Z-ENCODED NAMES                             ---*/
    185 /*------------------------------------------------------------*/
    186 
    187 /* Demangle a Z-encoded name as described in pub_tool_redir.h.
    188    Z-encoded names are used by Valgrind for doing function
    189    interception/wrapping.
    190 
    191    Demangle 'sym' into its soname and fnname parts, putting them in
    192    the specified buffers.  Returns a Bool indicating whether the
    193    demangled failed or not.  A failure can occur because the prefix
    194    isn't recognised, the internal Z-escaping is wrong, or because one
    195    or the other (or both) of the output buffers becomes full.  Passing
    196    'so' as NULL is acceptable if the caller is only interested in the
    197    function name part. */
    198 
    199 Bool VG_(maybe_Z_demangle) ( const HChar* sym,
    200                              /*OUT*/const HChar** so,
    201                              /*OUT*/const HChar** fn,
    202                              /*OUT*/Bool* isWrap,
    203                              /*OUT*/Int*  eclassTag,
    204                              /*OUT*/Int*  eclassPrio )
    205 {
    206    static HChar *sobuf;
    207    static HChar *fnbuf;
    208    static SizeT  buf_len = 0;
    209 
    210    /* The length of the name after undoing Z-encoding is always smaller
    211       than the mangled name. Making the soname and fnname buffers as large
    212       as the demangled name is therefore always safe and overflow can never
    213       occur. */
    214    SizeT len = VG_(strlen)(sym) + 1;
    215 
    216    if (buf_len < len) {
    217       sobuf = VG_(arena_realloc)(VG_AR_DEMANGLE, "Z-demangle", sobuf, len);
    218       fnbuf = VG_(arena_realloc)(VG_AR_DEMANGLE, "Z-demangle", fnbuf, len);
    219       buf_len = len;
    220    }
    221    sobuf[0] = fnbuf[0] = '\0';
    222 
    223    if (so)
    224      *so = sobuf;
    225    *fn = fnbuf;
    226 
    227 #  define EMITSO(ch)                           \
    228       do {                                     \
    229          if (so) {                             \
    230             sobuf[soi++] = ch; sobuf[soi] = 0; \
    231          }                                     \
    232       } while (0)
    233 #  define EMITFN(ch)                           \
    234       do {                                     \
    235          fnbuf[fni++] = ch; fnbuf[fni] = 0;    \
    236       } while (0)
    237 
    238    Bool error, valid, fn_is_encoded, is_VG_Z_prefixed;
    239    Int  soi, fni, i;
    240 
    241    error = False;
    242    soi = 0;
    243    fni = 0;
    244 
    245    valid =     sym[0] == '_'
    246            &&  sym[1] == 'v'
    247            &&  sym[2] == 'g'
    248            && (sym[3] == 'r' || sym[3] == 'w')
    249            &&  VG_(isdigit)(sym[4])
    250            &&  VG_(isdigit)(sym[5])
    251            &&  VG_(isdigit)(sym[6])
    252            &&  VG_(isdigit)(sym[7])
    253            &&  VG_(isdigit)(sym[8])
    254            &&  sym[9] == 'Z'
    255            && (sym[10] == 'Z' || sym[10] == 'U')
    256            &&  sym[11] == '_';
    257 
    258    if (valid
    259        && sym[4] == '0' && sym[5] == '0' && sym[6] == '0' && sym[7] == '0'
    260        && sym[8] != '0') {
    261       /* If the eclass tag is 0000 (meaning "no eclass"), the priority
    262          must be 0 too. */
    263       valid = False;
    264    }
    265 
    266    if (!valid)
    267       return False;
    268 
    269    fn_is_encoded = sym[10] == 'Z';
    270 
    271    if (isWrap)
    272       *isWrap = sym[3] == 'w';
    273 
    274    if (eclassTag) {
    275       *eclassTag =    1000 * ((Int)sym[4] - '0')
    276                    +  100 * ((Int)sym[5] - '0')
    277                    +  10 * ((Int)sym[6] - '0')
    278                    +  1 * ((Int)sym[7] - '0');
    279       vg_assert(*eclassTag >= 0 && *eclassTag <= 9999);
    280    }
    281 
    282    if (eclassPrio) {
    283       *eclassPrio = ((Int)sym[8]) - '0';
    284       vg_assert(*eclassPrio >= 0 && *eclassPrio <= 9);
    285    }
    286 
    287    /* Now check the soname prefix isn't "VG_Z_", as described in
    288       pub_tool_redir.h. */
    289    is_VG_Z_prefixed =
    290       sym[12] == 'V' &&
    291       sym[13] == 'G' &&
    292       sym[14] == '_' &&
    293       sym[15] == 'Z' &&
    294       sym[16] == '_';
    295    if (is_VG_Z_prefixed) {
    296       vg_assert2(0, "symbol with a 'VG_Z_' prefix: %s.\n"
    297                     "see pub_tool_redir.h for an explanation.", sym);
    298    }
    299 
    300    /* Now scan the Z-encoded soname. */
    301    i = 12;
    302    while (True) {
    303 
    304       if (sym[i] == '_')
    305       /* Found the delimiter.  Move on to the fnname loop. */
    306          break;
    307 
    308       if (sym[i] == 0) {
    309          error = True;
    310          goto out;
    311       }
    312 
    313       if (sym[i] != 'Z') {
    314          EMITSO(sym[i]);
    315          i++;
    316          continue;
    317       }
    318 
    319       /* We've got a Z-escape. */
    320       i++;
    321       switch (sym[i]) {
    322          case 'a': EMITSO('*'); break;
    323          case 'c': EMITSO(':'); break;
    324          case 'd': EMITSO('.'); break;
    325          case 'h': EMITSO('-'); break;
    326          case 'p': EMITSO('+'); break;
    327          case 's': EMITSO(' '); break;
    328          case 'u': EMITSO('_'); break;
    329          case 'A': EMITSO('@'); break;
    330          case 'D': EMITSO('$'); break;
    331          case 'L': EMITSO('('); break;
    332          case 'P': EMITSO('%'); break;
    333          case 'R': EMITSO(')'); break;
    334          case 'S': EMITSO('/'); break;
    335          case 'Z': EMITSO('Z'); break;
    336          default: error = True; goto out;
    337       }
    338       i++;
    339    }
    340 
    341    vg_assert(sym[i] == '_');
    342    i++;
    343 
    344    /* Now deal with the function name part. */
    345    if (!fn_is_encoded) {
    346 
    347       /* simple; just copy. */
    348       while (True) {
    349          if (sym[i] == 0)
    350             break;
    351          EMITFN(sym[i]);
    352          i++;
    353       }
    354       goto out;
    355 
    356    }
    357 
    358    /* else use a Z-decoding loop like with soname */
    359    while (True) {
    360 
    361       if (sym[i] == 0)
    362          break;
    363 
    364       if (sym[i] != 'Z') {
    365          EMITFN(sym[i]);
    366          i++;
    367          continue;
    368       }
    369 
    370       /* We've got a Z-escape. */
    371       i++;
    372       switch (sym[i]) {
    373          case 'a': EMITFN('*'); break;
    374          case 'c': EMITFN(':'); break;
    375          case 'd': EMITFN('.'); break;
    376          case 'h': EMITFN('-'); break;
    377          case 'p': EMITFN('+'); break;
    378          case 's': EMITFN(' '); break;
    379          case 'u': EMITFN('_'); break;
    380          case 'A': EMITFN('@'); break;
    381          case 'D': EMITFN('$'); break;
    382          case 'L': EMITFN('('); break;
    383          case 'P': EMITFN('%'); break;
    384          case 'R': EMITFN(')'); break;
    385          case 'S': EMITFN('/'); break;
    386          case 'Z': EMITFN('Z'); break;
    387          default: error = True; goto out;
    388       }
    389       i++;
    390    }
    391 
    392   out:
    393    EMITSO(0);
    394    EMITFN(0);
    395 
    396    if (error) {
    397       /* Something's wrong.  Give up. */
    398       VG_(message)(Vg_UserMsg,
    399                    "m_demangle: error Z-demangling: %s\n", sym);
    400       return False;
    401    }
    402 
    403    return True;
    404 }
    405 
    406 
    407 /*--------------------------------------------------------------------*/
    408 /*--- end                                                          ---*/
    409 /*--------------------------------------------------------------------*/
    410