Home | History | Annotate | Download | only in m_demangle
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- Demangling of C++ mangled names.                  demangle.c ---*/
      4 /*--------------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2000-2013 Julian Seward
     11       jseward (at) acm.org
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     26    02111-1307, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 */
     30 
     31 #include "pub_core_basics.h"
     32 #include "pub_core_demangle.h"
     33 #include "pub_core_libcassert.h"
     34 #include "pub_core_libcbase.h"
     35 #include "pub_core_libcprint.h"
     36 #include "pub_core_mallocfree.h"
     37 #include "pub_core_options.h"
     38 
     39 #include "vg_libciface.h"
     40 #include "demangle.h"
     41 
     42 /* The demangler's job is to take a raw symbol name and turn it into
     43    something a Human Bean can understand.  There are two levels of
     44    mangling.
     45 
     46    1. First, C++ names are mangled by the compiler.  So we'll have to
     47       undo that.
     48 
     49    2. Optionally, in relatively rare cases, the resulting name is then
     50       itself encoded using Z-escaping (see pub_core_redir.h) so as to
     51       become part of a redirect-specification.
     52 
     53    Therefore, VG_(demangle) first tries to undo (2).  If successful,
     54    the soname part is discarded (humans don't want to see that).
     55    Then, it tries to undo (1) (using demangling code from GNU/FSF).
     56 
     57    Finally, change the name of all symbols which are known to be
     58    functions below main() to "(below main)".  This helps reduce
     59    variability of stack traces, something which has been a problem for
     60    the testsuite for a long time.
     61 
     62    --------
     63    If do_cxx_demangle == True, does all the above stages:
     64    - undo (2) [Z-encoding]
     65    - undo (1) [C++ mangling]
     66    - do the below-main hack
     67 
     68    If do_cxx_demangle == False, the middle stage is skipped:
     69    - undo (2) [Z-encoding]
     70    - do the below-main hack
     71 */
     72 
     73 /* Note that the C++ demangler is from GNU libiberty and is almost
     74    completely unmodified.  We use vg_libciface.h as a way to
     75    impedance-match the libiberty code into our own framework.
     76 
     77    The current code is from libiberty in the gcc tree, gcc svn
     78    r181975, dated 12 Dec 2011 (when the gcc trunk was in Stage 3
     79    leading up to a gcc-4.7 release).  As of r141363, libiberty is LGPL
     80    2.1, which AFAICT is compatible with "GPL 2 or later" and so is OK
     81    for inclusion in Valgrind.
     82 
     83    To update to a newer libiberty, it might be simplest to svn diff
     84    the gcc tree libibery against r181975 and then apply those diffs
     85    here. */
     86 
     87 /* This is the main, standard demangler entry point. */
     88 
     89 void VG_(demangle) ( Bool do_cxx_demangling, Bool do_z_demangling,
     90                      HChar* orig, HChar* result, Int result_size )
     91 {
     92 #  define N_ZBUF 4096
     93    HChar* demangled = NULL;
     94    HChar z_demangled[N_ZBUF];
     95 
     96    /* Possibly undo (2) */
     97    /* Z-Demangling was requested.
     98       The fastest way to see if it's a Z-mangled name is just to attempt
     99       to Z-demangle it (with NULL for the soname buffer, since we're not
    100       interested in that). */
    101    if (do_z_demangling) {
    102       if (VG_(maybe_Z_demangle)( orig, NULL,0,/*soname*/
    103                                  z_demangled, N_ZBUF, NULL, NULL, NULL )) {
    104          orig = z_demangled;
    105       }
    106    }
    107 
    108    /* Possibly undo (1) */
    109    if (do_cxx_demangling && VG_(clo_demangle)) {
    110       demangled = ML_(cplus_demangle) ( orig, DMGL_ANSI | DMGL_PARAMS );
    111    } else {
    112       demangled = NULL;
    113    }
    114    if (demangled) {
    115       VG_(strncpy_safely)(result, demangled, result_size);
    116       VG_(arena_free) (VG_AR_DEMANGLE, demangled);
    117    } else {
    118       VG_(strncpy_safely)(result, orig, result_size);
    119    }
    120 
    121    // 13 Mar 2005: We used to check here that the demangler wasn't leaking
    122    // by calling the (now-removed) function VG_(is_empty_arena)().  But,
    123    // very rarely (ie. I've heard of it twice in 3 years), the demangler
    124    // does leak.  But, we can't do much about it, and it's not a disaster,
    125    // so we just let it slide without aborting or telling the user.
    126 #  undef N_ZBUF
    127 }
    128 
    129 
    130 /*------------------------------------------------------------*/
    131 /*--- DEMANGLE Z-ENCODED NAMES                             ---*/
    132 /*------------------------------------------------------------*/
    133 
    134 /* Demangle a Z-encoded name as described in pub_tool_redir.h.
    135    Z-encoded names are used by Valgrind for doing function
    136    interception/wrapping.
    137 
    138    Demangle 'sym' into its soname and fnname parts, putting them in
    139    the specified buffers.  Returns a Bool indicating whether the
    140    demangled failed or not.  A failure can occur because the prefix
    141    isn't recognised, the internal Z-escaping is wrong, or because one
    142    or the other (or both) of the output buffers becomes full.  Passing
    143    'so' as NULL is acceptable if the caller is only interested in the
    144    function name part. */
    145 
    146 Bool VG_(maybe_Z_demangle) ( const HChar* sym,
    147                              /*OUT*/HChar* so, Int soLen,
    148                              /*OUT*/HChar* fn, Int fnLen,
    149                              /*OUT*/Bool* isWrap,
    150                              /*OUT*/Int*  eclassTag,
    151                              /*OUT*/Int*  eclassPrio )
    152 {
    153 #  define EMITSO(ch)                           \
    154       do {                                     \
    155          if (so) {                             \
    156             if (soi >= soLen) {                \
    157                so[soLen-1] = 0; oflow = True;  \
    158             } else {                           \
    159                so[soi++] = ch; so[soi] = 0;    \
    160             }                                  \
    161          }                                     \
    162       } while (0)
    163 #  define EMITFN(ch)                           \
    164       do {                                     \
    165          if (fni >= fnLen) {                   \
    166             fn[fnLen-1] = 0; oflow = True;     \
    167          } else {                              \
    168             fn[fni++] = ch; fn[fni] = 0;       \
    169          }                                     \
    170       } while (0)
    171 
    172    Bool error, oflow, valid, fn_is_encoded, is_VG_Z_prefixed;
    173    Int  soi, fni, i;
    174 
    175    vg_assert(soLen > 0 || (soLen == 0 && so == NULL));
    176    vg_assert(fnLen > 0);
    177    error = False;
    178    oflow = False;
    179    soi = 0;
    180    fni = 0;
    181 
    182    valid =     sym[0] == '_'
    183            &&  sym[1] == 'v'
    184            &&  sym[2] == 'g'
    185            && (sym[3] == 'r' || sym[3] == 'w')
    186            &&  VG_(isdigit)(sym[4])
    187            &&  VG_(isdigit)(sym[5])
    188            &&  VG_(isdigit)(sym[6])
    189            &&  VG_(isdigit)(sym[7])
    190            &&  VG_(isdigit)(sym[8])
    191            &&  sym[9] == 'Z'
    192            && (sym[10] == 'Z' || sym[10] == 'U')
    193            &&  sym[11] == '_';
    194 
    195    if (valid
    196        && sym[4] == '0' && sym[5] == '0' && sym[6] == '0' && sym[7] == '0'
    197        && sym[8] != '0') {
    198       /* If the eclass tag is 0000 (meaning "no eclass"), the priority
    199          must be 0 too. */
    200       valid = False;
    201    }
    202 
    203    if (!valid)
    204       return False;
    205 
    206    fn_is_encoded = sym[10] == 'Z';
    207 
    208    if (isWrap)
    209       *isWrap = sym[3] == 'w';
    210 
    211    if (eclassTag) {
    212       *eclassTag =    1000 * ((Int)sym[4] - '0')
    213                    +  100 * ((Int)sym[5] - '0')
    214                    +  10 * ((Int)sym[6] - '0')
    215                    +  1 * ((Int)sym[7] - '0');
    216       vg_assert(*eclassTag >= 0 && *eclassTag <= 9999);
    217    }
    218 
    219    if (eclassPrio) {
    220       *eclassPrio = ((Int)sym[8]) - '0';
    221       vg_assert(*eclassPrio >= 0 && *eclassPrio <= 9);
    222    }
    223 
    224    /* Now check the soname prefix isn't "VG_Z_", as described in
    225       pub_tool_redir.h. */
    226    is_VG_Z_prefixed =
    227       sym[12] == 'V' &&
    228       sym[13] == 'G' &&
    229       sym[14] == '_' &&
    230       sym[15] == 'Z' &&
    231       sym[16] == '_';
    232    if (is_VG_Z_prefixed) {
    233       vg_assert2(0, "symbol with a 'VG_Z_' prefix: %s.\n"
    234                     "see pub_tool_redir.h for an explanation.", sym);
    235    }
    236 
    237    /* Now scan the Z-encoded soname. */
    238    i = 12;
    239    while (True) {
    240 
    241       if (sym[i] == '_')
    242       /* Found the delimiter.  Move on to the fnname loop. */
    243          break;
    244 
    245       if (sym[i] == 0) {
    246          error = True;
    247          goto out;
    248       }
    249 
    250       if (sym[i] != 'Z') {
    251          EMITSO(sym[i]);
    252          i++;
    253          continue;
    254       }
    255 
    256       /* We've got a Z-escape. */
    257       i++;
    258       switch (sym[i]) {
    259          case 'a': EMITSO('*'); break;
    260          case 'c': EMITSO(':'); break;
    261          case 'd': EMITSO('.'); break;
    262          case 'h': EMITSO('-'); break;
    263          case 'p': EMITSO('+'); break;
    264          case 's': EMITSO(' '); break;
    265          case 'u': EMITSO('_'); break;
    266          case 'A': EMITSO('@'); break;
    267          case 'D': EMITSO('$'); break;
    268          case 'L': EMITSO('('); break;
    269          case 'R': EMITSO(')'); break;
    270          case 'Z': EMITSO('Z'); break;
    271          default: error = True; goto out;
    272       }
    273       i++;
    274    }
    275 
    276    vg_assert(sym[i] == '_');
    277    i++;
    278 
    279    /* Now deal with the function name part. */
    280    if (!fn_is_encoded) {
    281 
    282       /* simple; just copy. */
    283       while (True) {
    284          if (sym[i] == 0)
    285             break;
    286          EMITFN(sym[i]);
    287          i++;
    288       }
    289       goto out;
    290 
    291    }
    292 
    293    /* else use a Z-decoding loop like with soname */
    294    while (True) {
    295 
    296       if (sym[i] == 0)
    297          break;
    298 
    299       if (sym[i] != 'Z') {
    300          EMITFN(sym[i]);
    301          i++;
    302          continue;
    303       }
    304 
    305       /* We've got a Z-escape. */
    306       i++;
    307       switch (sym[i]) {
    308          case 'a': EMITFN('*'); break;
    309          case 'c': EMITFN(':'); break;
    310          case 'd': EMITFN('.'); break;
    311          case 'h': EMITFN('-'); break;
    312          case 'p': EMITFN('+'); break;
    313          case 's': EMITFN(' '); break;
    314          case 'u': EMITFN('_'); break;
    315          case 'A': EMITFN('@'); break;
    316          case 'D': EMITFN('$'); break;
    317          case 'L': EMITFN('('); break;
    318          case 'R': EMITFN(')'); break;
    319          case 'Z': EMITFN('Z'); break;
    320          default: error = True; goto out;
    321       }
    322       i++;
    323    }
    324 
    325   out:
    326    EMITSO(0);
    327    EMITFN(0);
    328 
    329    if (error) {
    330       /* Something's wrong.  Give up. */
    331       VG_(message)(Vg_UserMsg,
    332                    "m_demangle: error Z-demangling: %s\n", sym);
    333       return False;
    334    }
    335    if (oflow) {
    336       /* It didn't fit.  Give up. */
    337       VG_(message)(Vg_UserMsg,
    338                    "m_demangle: oflow Z-demangling: %s\n", sym);
    339       return False;
    340    }
    341 
    342    return True;
    343 }
    344 
    345 
    346 /*--------------------------------------------------------------------*/
    347 /*--- end                                                          ---*/
    348 /*--------------------------------------------------------------------*/
    349