1 2 /*--------------------------------------------------------------------*/ 3 /*--- Demangling of C++ mangled names. demangle.c ---*/ 4 /*--------------------------------------------------------------------*/ 5 6 /* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2000-2017 Julian Seward 11 jseward (at) acm.org 12 13 Rust demangler components are 14 Copyright (C) 2016-2016 David Tolnay 15 dtolnay (at) gmail.com 16 17 This program is free software; you can redistribute it and/or 18 modify it under the terms of the GNU General Public License as 19 published by the Free Software Foundation; either version 2 of the 20 License, or (at your option) any later version. 21 22 This program is distributed in the hope that it will be useful, but 23 WITHOUT ANY WARRANTY; without even the implied warranty of 24 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 25 General Public License for more details. 26 27 You should have received a copy of the GNU General Public License 28 along with this program; if not, write to the Free Software 29 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 30 02111-1307, USA. 31 32 The GNU General Public License is contained in the file COPYING. 33 */ 34 35 #include "pub_core_basics.h" 36 #include "pub_core_demangle.h" 37 #include "pub_core_libcassert.h" 38 #include "pub_core_libcbase.h" 39 #include "pub_core_libcprint.h" 40 #include "pub_core_mallocfree.h" 41 #include "pub_core_options.h" 42 43 #include "vg_libciface.h" 44 #include "demangle.h" 45 46 47 /*------------------------------------------------------------*/ 48 /*--- ---*/ 49 /*------------------------------------------------------------*/ 50 51 /* The demangler's job is to take a raw symbol name and turn it into 52 something a Human Bean can understand. Our mangling model 53 comprises a three stage pipeline. Mangling pushes names forward 54 through the pipeline (0, then 1, then 2) and demangling is 55 obviously the reverse. In practice it is highly unlikely that a 56 name would require all stages, but it is not impossible either. 57 58 0. If we're working with Rust, Rust names are lightly mangled by 59 the Rust front end. 60 61 1. Then the name is subject to standard C++ mangling. 62 63 2. Optionally, in relatively rare cases, the resulting name is then 64 itself encoded using Z-escaping (see pub_core_redir.h) so as to 65 become part of a redirect-specification. 66 67 Therefore, VG_(demangle) first tries to undo (2). If successful, 68 the soname part is discarded (humans don't want to see that). 69 Then, it tries to undo (1) (using demangling code from GNU/FSF) and 70 finally it tries to undo (0). 71 72 Finally, it changes the name of all symbols which are known to be 73 functions below main() to "(below main)". This helps reduce 74 variability of stack traces, something which has been a problem for 75 the testsuite for a long time. 76 77 -------- 78 If do_cxx_demangle == True, it does all the above stages: 79 - undo (2) [Z-encoding] 80 - undo (1) [C++ mangling] 81 - if (1) succeeds, undo (0) [Rust mangling] 82 - do the below-main hack 83 84 Rust demangling (0) is only done if C++ demangling (1) succeeds 85 because Rust demangling is performed in-place, and it is difficult 86 to prove that we "own" the storage -- hence, that the in-place 87 operation is safe -- unless it is clear that it has come from the 88 C++ demangler, which returns its output in a heap-allocated buffer 89 which we can be sure we own. In practice (Nov 2016) this does not 90 seem to be a problem, since the Rust compiler appears to apply C++ 91 mangling after Rust mangling, so we never encounter symbols that 92 require Rust demangling but not C++ demangling. 93 94 If do_cxx_demangle == False, the C++ and Rust stags are skipped: 95 - undo (2) [Z-encoding] 96 - do the below-main hack 97 */ 98 99 /* Note that the C++ demangler is from GNU libiberty and is almost 100 completely unmodified. We use vg_libciface.h as a way to 101 impedance-match the libiberty code into our own framework. 102 103 The libiberty code included here was taken from the GCC repository 104 and is released under the LGPL 2.1 license, which AFAICT is compatible 105 with "GPL 2 or later" and so is OK for inclusion in Valgrind. 106 107 To update to a newer libiberty, use the "update-demangler" script 108 which is included in the valgrind repository. */ 109 110 /* This is the main, standard demangler entry point. */ 111 112 /* Upon return, *RESULT will point to the demangled name. 113 The memory buffer that holds the demangled name is allocated on the 114 heap and will be deallocated in the next invocation. Conceptually, 115 that buffer is owned by VG_(demangle). That means two things: 116 (1) Users of VG_(demangle) must not free that buffer. 117 (2) If the demangled name needs to be stashed away for later use, 118 the contents of the buffer need to be copied. It is not sufficient 119 to just store the pointer as it will point to deallocated memory 120 after the next VG_(demangle) invocation. */ 121 void VG_(demangle) ( Bool do_cxx_demangling, Bool do_z_demangling, 122 /* IN */ const HChar *orig, 123 /* OUT */ const HChar **result ) 124 { 125 /* Possibly undo (2) */ 126 /* Z-Demangling was requested. 127 The fastest way to see if it's a Z-mangled name is just to attempt 128 to Z-demangle it (with NULL for the soname buffer, since we're not 129 interested in that). */ 130 if (do_z_demangling) { 131 const HChar *z_demangled; 132 133 if (VG_(maybe_Z_demangle)( orig, NULL, /*soname*/ 134 &z_demangled, NULL, NULL, NULL )) { 135 orig = z_demangled; 136 } 137 } 138 139 /* Possibly undo (1) */ 140 if (do_cxx_demangling && VG_(clo_demangle) 141 && orig != NULL && orig[0] == '_' && orig[1] == 'Z') { 142 /* !!! vvv STATIC vvv !!! */ 143 static HChar* demangled = NULL; 144 /* !!! ^^^ STATIC ^^^ !!! */ 145 146 /* Free up previously demangled name */ 147 if (demangled) { 148 VG_(arena_free) (VG_AR_DEMANGLE, demangled); 149 demangled = NULL; 150 } 151 demangled = ML_(cplus_demangle) ( orig, DMGL_ANSI | DMGL_PARAMS ); 152 153 *result = (demangled == NULL) ? orig : demangled; 154 155 if (demangled) { 156 /* Possibly undo (0). This is the only place where it is 157 safe, from a storage management perspective, to 158 Rust-demangle the symbol. That's because Rust demangling 159 happens in place, so we need to be sure that the storage 160 it is happening in is actually owned by us, and non-const. 161 In this case, the value returned by ML_(cplus_demangle) 162 does have that property. */ 163 if (rust_is_mangled(demangled)) { 164 rust_demangle_sym(demangled); 165 } 166 *result = demangled; 167 } else { 168 *result = orig; 169 } 170 171 } else { 172 *result = orig; 173 } 174 175 // 13 Mar 2005: We used to check here that the demangler wasn't leaking 176 // by calling the (now-removed) function VG_(is_empty_arena)(). But, 177 // very rarely (ie. I've heard of it twice in 3 years), the demangler 178 // does leak. But, we can't do much about it, and it's not a disaster, 179 // so we just let it slide without aborting or telling the user. 180 } 181 182 183 /*------------------------------------------------------------*/ 184 /*--- DEMANGLE Z-ENCODED NAMES ---*/ 185 /*------------------------------------------------------------*/ 186 187 /* Demangle a Z-encoded name as described in pub_tool_redir.h. 188 Z-encoded names are used by Valgrind for doing function 189 interception/wrapping. 190 191 Demangle 'sym' into its soname and fnname parts, putting them in 192 the specified buffers. Returns a Bool indicating whether the 193 demangled failed or not. A failure can occur because the prefix 194 isn't recognised, the internal Z-escaping is wrong, or because one 195 or the other (or both) of the output buffers becomes full. Passing 196 'so' as NULL is acceptable if the caller is only interested in the 197 function name part. */ 198 199 Bool VG_(maybe_Z_demangle) ( const HChar* sym, 200 /*OUT*/const HChar** so, 201 /*OUT*/const HChar** fn, 202 /*OUT*/Bool* isWrap, 203 /*OUT*/Int* eclassTag, 204 /*OUT*/Int* eclassPrio ) 205 { 206 static HChar *sobuf; 207 static HChar *fnbuf; 208 static SizeT buf_len = 0; 209 210 /* The length of the name after undoing Z-encoding is always smaller 211 than the mangled name. Making the soname and fnname buffers as large 212 as the demangled name is therefore always safe and overflow can never 213 occur. */ 214 SizeT len = VG_(strlen)(sym) + 1; 215 216 if (buf_len < len) { 217 sobuf = VG_(arena_realloc)(VG_AR_DEMANGLE, "Z-demangle", sobuf, len); 218 fnbuf = VG_(arena_realloc)(VG_AR_DEMANGLE, "Z-demangle", fnbuf, len); 219 buf_len = len; 220 } 221 sobuf[0] = fnbuf[0] = '\0'; 222 223 if (so) 224 *so = sobuf; 225 *fn = fnbuf; 226 227 # define EMITSO(ch) \ 228 do { \ 229 if (so) { \ 230 sobuf[soi++] = ch; sobuf[soi] = 0; \ 231 } \ 232 } while (0) 233 # define EMITFN(ch) \ 234 do { \ 235 fnbuf[fni++] = ch; fnbuf[fni] = 0; \ 236 } while (0) 237 238 Bool error, valid, fn_is_encoded, is_VG_Z_prefixed; 239 Int soi, fni, i; 240 241 error = False; 242 soi = 0; 243 fni = 0; 244 245 valid = sym[0] == '_' 246 && sym[1] == 'v' 247 && sym[2] == 'g' 248 && (sym[3] == 'r' || sym[3] == 'w') 249 && VG_(isdigit)(sym[4]) 250 && VG_(isdigit)(sym[5]) 251 && VG_(isdigit)(sym[6]) 252 && VG_(isdigit)(sym[7]) 253 && VG_(isdigit)(sym[8]) 254 && sym[9] == 'Z' 255 && (sym[10] == 'Z' || sym[10] == 'U') 256 && sym[11] == '_'; 257 258 if (valid 259 && sym[4] == '0' && sym[5] == '0' && sym[6] == '0' && sym[7] == '0' 260 && sym[8] != '0') { 261 /* If the eclass tag is 0000 (meaning "no eclass"), the priority 262 must be 0 too. */ 263 valid = False; 264 } 265 266 if (!valid) 267 return False; 268 269 fn_is_encoded = sym[10] == 'Z'; 270 271 if (isWrap) 272 *isWrap = sym[3] == 'w'; 273 274 if (eclassTag) { 275 *eclassTag = 1000 * ((Int)sym[4] - '0') 276 + 100 * ((Int)sym[5] - '0') 277 + 10 * ((Int)sym[6] - '0') 278 + 1 * ((Int)sym[7] - '0'); 279 vg_assert(*eclassTag >= 0 && *eclassTag <= 9999); 280 } 281 282 if (eclassPrio) { 283 *eclassPrio = ((Int)sym[8]) - '0'; 284 vg_assert(*eclassPrio >= 0 && *eclassPrio <= 9); 285 } 286 287 /* Now check the soname prefix isn't "VG_Z_", as described in 288 pub_tool_redir.h. */ 289 is_VG_Z_prefixed = 290 sym[12] == 'V' && 291 sym[13] == 'G' && 292 sym[14] == '_' && 293 sym[15] == 'Z' && 294 sym[16] == '_'; 295 if (is_VG_Z_prefixed) { 296 vg_assert2(0, "symbol with a 'VG_Z_' prefix: %s.\n" 297 "see pub_tool_redir.h for an explanation.", sym); 298 } 299 300 /* Now scan the Z-encoded soname. */ 301 i = 12; 302 while (True) { 303 304 if (sym[i] == '_') 305 /* Found the delimiter. Move on to the fnname loop. */ 306 break; 307 308 if (sym[i] == 0) { 309 error = True; 310 goto out; 311 } 312 313 if (sym[i] != 'Z') { 314 EMITSO(sym[i]); 315 i++; 316 continue; 317 } 318 319 /* We've got a Z-escape. */ 320 i++; 321 switch (sym[i]) { 322 case 'a': EMITSO('*'); break; 323 case 'c': EMITSO(':'); break; 324 case 'd': EMITSO('.'); break; 325 case 'h': EMITSO('-'); break; 326 case 'p': EMITSO('+'); break; 327 case 's': EMITSO(' '); break; 328 case 'u': EMITSO('_'); break; 329 case 'A': EMITSO('@'); break; 330 case 'D': EMITSO('$'); break; 331 case 'L': EMITSO('('); break; 332 case 'P': EMITSO('%'); break; 333 case 'R': EMITSO(')'); break; 334 case 'S': EMITSO('/'); break; 335 case 'Z': EMITSO('Z'); break; 336 default: error = True; goto out; 337 } 338 i++; 339 } 340 341 vg_assert(sym[i] == '_'); 342 i++; 343 344 /* Now deal with the function name part. */ 345 if (!fn_is_encoded) { 346 347 /* simple; just copy. */ 348 while (True) { 349 if (sym[i] == 0) 350 break; 351 EMITFN(sym[i]); 352 i++; 353 } 354 goto out; 355 356 } 357 358 /* else use a Z-decoding loop like with soname */ 359 while (True) { 360 361 if (sym[i] == 0) 362 break; 363 364 if (sym[i] != 'Z') { 365 EMITFN(sym[i]); 366 i++; 367 continue; 368 } 369 370 /* We've got a Z-escape. */ 371 i++; 372 switch (sym[i]) { 373 case 'a': EMITFN('*'); break; 374 case 'c': EMITFN(':'); break; 375 case 'd': EMITFN('.'); break; 376 case 'h': EMITFN('-'); break; 377 case 'p': EMITFN('+'); break; 378 case 's': EMITFN(' '); break; 379 case 'u': EMITFN('_'); break; 380 case 'A': EMITFN('@'); break; 381 case 'D': EMITFN('$'); break; 382 case 'L': EMITFN('('); break; 383 case 'P': EMITFN('%'); break; 384 case 'R': EMITFN(')'); break; 385 case 'S': EMITFN('/'); break; 386 case 'Z': EMITFN('Z'); break; 387 default: error = True; goto out; 388 } 389 i++; 390 } 391 392 out: 393 EMITSO(0); 394 EMITFN(0); 395 396 if (error) { 397 /* Something's wrong. Give up. */ 398 VG_(message)(Vg_UserMsg, 399 "m_demangle: error Z-demangling: %s\n", sym); 400 return False; 401 } 402 403 return True; 404 } 405 406 407 /*--------------------------------------------------------------------*/ 408 /*--- end ---*/ 409 /*--------------------------------------------------------------------*/ 410