1 2 /*--------------------------------------------------------------------*/ 3 /*--- Demangling of C++ mangled names. demangle.c ---*/ 4 /*--------------------------------------------------------------------*/ 5 6 /* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2000-2015 Julian Seward 11 jseward (at) acm.org 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 26 02111-1307, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 */ 30 31 #include "pub_core_basics.h" 32 #include "pub_core_demangle.h" 33 #include "pub_core_libcassert.h" 34 #include "pub_core_libcbase.h" 35 #include "pub_core_libcprint.h" 36 #include "pub_core_mallocfree.h" 37 #include "pub_core_options.h" 38 39 #include "vg_libciface.h" 40 #include "demangle.h" 41 42 /* The demangler's job is to take a raw symbol name and turn it into 43 something a Human Bean can understand. There are two levels of 44 mangling. 45 46 1. First, C++ names are mangled by the compiler. So we'll have to 47 undo that. 48 49 2. Optionally, in relatively rare cases, the resulting name is then 50 itself encoded using Z-escaping (see pub_core_redir.h) so as to 51 become part of a redirect-specification. 52 53 Therefore, VG_(demangle) first tries to undo (2). If successful, 54 the soname part is discarded (humans don't want to see that). 55 Then, it tries to undo (1) (using demangling code from GNU/FSF). 56 57 Finally, change the name of all symbols which are known to be 58 functions below main() to "(below main)". This helps reduce 59 variability of stack traces, something which has been a problem for 60 the testsuite for a long time. 61 62 -------- 63 If do_cxx_demangle == True, does all the above stages: 64 - undo (2) [Z-encoding] 65 - undo (1) [C++ mangling] 66 - do the below-main hack 67 68 If do_cxx_demangle == False, the middle stage is skipped: 69 - undo (2) [Z-encoding] 70 - do the below-main hack 71 */ 72 73 /* Note that the C++ demangler is from GNU libiberty and is almost 74 completely unmodified. We use vg_libciface.h as a way to 75 impedance-match the libiberty code into our own framework. 76 77 The libiberty code included here was taken from the GCC repository 78 and is released under the LGPL 2.1 license, which AFAICT is compatible 79 with "GPL 2 or later" and so is OK for inclusion in Valgrind. 80 81 To update to a newer libiberty, use the "update-demangler" script 82 which is included in the valgrind repository. */ 83 84 /* This is the main, standard demangler entry point. */ 85 86 /* Upon return, *RESULT will point to the demangled name. 87 The memory buffer that holds the demangled name is allocated on the 88 heap and will be deallocated in the next invocation. Conceptually, 89 that buffer is owned by VG_(demangle). That means two things: 90 (1) Users of VG_(demangle) must not free that buffer. 91 (2) If the demangled name needs to be stashed away for later use, 92 the contents of the buffer needs to be copied. It is not sufficient 93 to just store the pointer as it will point to deallocated memory 94 after the next VG_(demangle) invocation. */ 95 void VG_(demangle) ( Bool do_cxx_demangling, Bool do_z_demangling, 96 /* IN */ const HChar *orig, 97 /* OUT */ const HChar **result ) 98 { 99 /* Possibly undo (2) */ 100 /* Z-Demangling was requested. 101 The fastest way to see if it's a Z-mangled name is just to attempt 102 to Z-demangle it (with NULL for the soname buffer, since we're not 103 interested in that). */ 104 if (do_z_demangling) { 105 const HChar *z_demangled; 106 107 if (VG_(maybe_Z_demangle)( orig, NULL, /*soname*/ 108 &z_demangled, NULL, NULL, NULL )) { 109 orig = z_demangled; 110 } 111 } 112 113 /* Possibly undo (1) */ 114 if (do_cxx_demangling && VG_(clo_demangle)) { 115 static HChar* demangled = NULL; 116 117 /* Free up previously demangled name */ 118 if (demangled) VG_(arena_free) (VG_AR_DEMANGLE, demangled); 119 120 demangled = ML_(cplus_demangle) ( orig, DMGL_ANSI | DMGL_PARAMS ); 121 122 *result = (demangled == NULL) ? orig : demangled; 123 } else { 124 *result = orig; 125 } 126 127 // 13 Mar 2005: We used to check here that the demangler wasn't leaking 128 // by calling the (now-removed) function VG_(is_empty_arena)(). But, 129 // very rarely (ie. I've heard of it twice in 3 years), the demangler 130 // does leak. But, we can't do much about it, and it's not a disaster, 131 // so we just let it slide without aborting or telling the user. 132 } 133 134 135 /*------------------------------------------------------------*/ 136 /*--- DEMANGLE Z-ENCODED NAMES ---*/ 137 /*------------------------------------------------------------*/ 138 139 /* Demangle a Z-encoded name as described in pub_tool_redir.h. 140 Z-encoded names are used by Valgrind for doing function 141 interception/wrapping. 142 143 Demangle 'sym' into its soname and fnname parts, putting them in 144 the specified buffers. Returns a Bool indicating whether the 145 demangled failed or not. A failure can occur because the prefix 146 isn't recognised, the internal Z-escaping is wrong, or because one 147 or the other (or both) of the output buffers becomes full. Passing 148 'so' as NULL is acceptable if the caller is only interested in the 149 function name part. */ 150 151 Bool VG_(maybe_Z_demangle) ( const HChar* sym, 152 /*OUT*/const HChar** so, 153 /*OUT*/const HChar** fn, 154 /*OUT*/Bool* isWrap, 155 /*OUT*/Int* eclassTag, 156 /*OUT*/Int* eclassPrio ) 157 { 158 static HChar *sobuf; 159 static HChar *fnbuf; 160 static SizeT buf_len = 0; 161 162 /* The length of the name after undoing Z-encoding is always smaller 163 than the mangled name. Making the soname and fnname buffers as large 164 as the demangled name is therefore always safe and overflow can never 165 occur. */ 166 SizeT len = VG_(strlen)(sym) + 1; 167 168 if (buf_len < len) { 169 sobuf = VG_(arena_realloc)(VG_AR_DEMANGLE, "Z-demangle", sobuf, len); 170 fnbuf = VG_(arena_realloc)(VG_AR_DEMANGLE, "Z-demangle", fnbuf, len); 171 buf_len = len; 172 } 173 sobuf[0] = fnbuf[0] = '\0'; 174 175 if (so) 176 *so = sobuf; 177 *fn = fnbuf; 178 179 # define EMITSO(ch) \ 180 do { \ 181 if (so) { \ 182 sobuf[soi++] = ch; sobuf[soi] = 0; \ 183 } \ 184 } while (0) 185 # define EMITFN(ch) \ 186 do { \ 187 fnbuf[fni++] = ch; fnbuf[fni] = 0; \ 188 } while (0) 189 190 Bool error, valid, fn_is_encoded, is_VG_Z_prefixed; 191 Int soi, fni, i; 192 193 error = False; 194 soi = 0; 195 fni = 0; 196 197 valid = sym[0] == '_' 198 && sym[1] == 'v' 199 && sym[2] == 'g' 200 && (sym[3] == 'r' || sym[3] == 'w') 201 && VG_(isdigit)(sym[4]) 202 && VG_(isdigit)(sym[5]) 203 && VG_(isdigit)(sym[6]) 204 && VG_(isdigit)(sym[7]) 205 && VG_(isdigit)(sym[8]) 206 && sym[9] == 'Z' 207 && (sym[10] == 'Z' || sym[10] == 'U') 208 && sym[11] == '_'; 209 210 if (valid 211 && sym[4] == '0' && sym[5] == '0' && sym[6] == '0' && sym[7] == '0' 212 && sym[8] != '0') { 213 /* If the eclass tag is 0000 (meaning "no eclass"), the priority 214 must be 0 too. */ 215 valid = False; 216 } 217 218 if (!valid) 219 return False; 220 221 fn_is_encoded = sym[10] == 'Z'; 222 223 if (isWrap) 224 *isWrap = sym[3] == 'w'; 225 226 if (eclassTag) { 227 *eclassTag = 1000 * ((Int)sym[4] - '0') 228 + 100 * ((Int)sym[5] - '0') 229 + 10 * ((Int)sym[6] - '0') 230 + 1 * ((Int)sym[7] - '0'); 231 vg_assert(*eclassTag >= 0 && *eclassTag <= 9999); 232 } 233 234 if (eclassPrio) { 235 *eclassPrio = ((Int)sym[8]) - '0'; 236 vg_assert(*eclassPrio >= 0 && *eclassPrio <= 9); 237 } 238 239 /* Now check the soname prefix isn't "VG_Z_", as described in 240 pub_tool_redir.h. */ 241 is_VG_Z_prefixed = 242 sym[12] == 'V' && 243 sym[13] == 'G' && 244 sym[14] == '_' && 245 sym[15] == 'Z' && 246 sym[16] == '_'; 247 if (is_VG_Z_prefixed) { 248 vg_assert2(0, "symbol with a 'VG_Z_' prefix: %s.\n" 249 "see pub_tool_redir.h for an explanation.", sym); 250 } 251 252 /* Now scan the Z-encoded soname. */ 253 i = 12; 254 while (True) { 255 256 if (sym[i] == '_') 257 /* Found the delimiter. Move on to the fnname loop. */ 258 break; 259 260 if (sym[i] == 0) { 261 error = True; 262 goto out; 263 } 264 265 if (sym[i] != 'Z') { 266 EMITSO(sym[i]); 267 i++; 268 continue; 269 } 270 271 /* We've got a Z-escape. */ 272 i++; 273 switch (sym[i]) { 274 case 'a': EMITSO('*'); break; 275 case 'c': EMITSO(':'); break; 276 case 'd': EMITSO('.'); break; 277 case 'h': EMITSO('-'); break; 278 case 'p': EMITSO('+'); break; 279 case 's': EMITSO(' '); break; 280 case 'u': EMITSO('_'); break; 281 case 'A': EMITSO('@'); break; 282 case 'D': EMITSO('$'); break; 283 case 'L': EMITSO('('); break; 284 case 'R': EMITSO(')'); break; 285 case 'S': EMITSO('/'); break; 286 case 'Z': EMITSO('Z'); break; 287 default: error = True; goto out; 288 } 289 i++; 290 } 291 292 vg_assert(sym[i] == '_'); 293 i++; 294 295 /* Now deal with the function name part. */ 296 if (!fn_is_encoded) { 297 298 /* simple; just copy. */ 299 while (True) { 300 if (sym[i] == 0) 301 break; 302 EMITFN(sym[i]); 303 i++; 304 } 305 goto out; 306 307 } 308 309 /* else use a Z-decoding loop like with soname */ 310 while (True) { 311 312 if (sym[i] == 0) 313 break; 314 315 if (sym[i] != 'Z') { 316 EMITFN(sym[i]); 317 i++; 318 continue; 319 } 320 321 /* We've got a Z-escape. */ 322 i++; 323 switch (sym[i]) { 324 case 'a': EMITFN('*'); break; 325 case 'c': EMITFN(':'); break; 326 case 'd': EMITFN('.'); break; 327 case 'h': EMITFN('-'); break; 328 case 'p': EMITFN('+'); break; 329 case 's': EMITFN(' '); break; 330 case 'u': EMITFN('_'); break; 331 case 'A': EMITFN('@'); break; 332 case 'D': EMITFN('$'); break; 333 case 'L': EMITFN('('); break; 334 case 'R': EMITFN(')'); break; 335 case 'Z': EMITFN('Z'); break; 336 default: error = True; goto out; 337 } 338 i++; 339 } 340 341 out: 342 EMITSO(0); 343 EMITFN(0); 344 345 if (error) { 346 /* Something's wrong. Give up. */ 347 VG_(message)(Vg_UserMsg, 348 "m_demangle: error Z-demangling: %s\n", sym); 349 return False; 350 } 351 352 return True; 353 } 354 355 356 /*--------------------------------------------------------------------*/ 357 /*--- end ---*/ 358 /*--------------------------------------------------------------------*/ 359