1 2 /*--------------------------------------------------------------------*/ 3 /*--- Format-neutral storage of and querying of info acquired from ---*/ 4 /*--- ELF/XCOFF stabs/dwarf1/dwarf2/dwarf3 debug info. ---*/ 5 /*--- storage.c ---*/ 6 /*--------------------------------------------------------------------*/ 7 8 /* 9 This file is part of Valgrind, a dynamic binary instrumentation 10 framework. 11 12 Copyright (C) 2000-2010 Julian Seward 13 jseward (at) acm.org 14 15 This program is free software; you can redistribute it and/or 16 modify it under the terms of the GNU General Public License as 17 published by the Free Software Foundation; either version 2 of the 18 License, or (at your option) any later version. 19 20 This program is distributed in the hope that it will be useful, but 21 WITHOUT ANY WARRANTY; without even the implied warranty of 22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 23 General Public License for more details. 24 25 You should have received a copy of the GNU General Public License 26 along with this program; if not, write to the Free Software 27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 28 02111-1307, USA. 29 30 The GNU General Public License is contained in the file COPYING. 31 */ 32 33 /* This file manages the data structures built by the debuginfo 34 system. These are: the top level SegInfo list. For each SegInfo, 35 there are tables for for address-to-symbol mappings, 36 address-to-src-file/line mappings, and address-to-CFI-info 37 mappings. 38 */ 39 40 #include "pub_core_basics.h" 41 #include "pub_core_options.h" /* VG_(clo_verbosity) */ 42 #include "pub_core_debuginfo.h" 43 #include "pub_core_libcassert.h" 44 #include "pub_core_libcbase.h" 45 #include "pub_core_libcprint.h" 46 #include "pub_core_xarray.h" 47 #include "pub_core_oset.h" 48 49 #include "priv_misc.h" /* dinfo_zalloc/free/strdup */ 50 #include "priv_d3basics.h" /* ML_(pp_GX) */ 51 #include "priv_tytypes.h" 52 #include "priv_storage.h" /* self */ 53 54 55 /*------------------------------------------------------------*/ 56 /*--- Misc (printing, errors) ---*/ 57 /*------------------------------------------------------------*/ 58 59 /* Show a non-fatal debug info reading error. Use vg_panic if 60 terminal. 'serious' errors are shown regardless of the 61 verbosity setting. */ 62 void ML_(symerr) ( struct _DebugInfo* di, Bool serious, HChar* msg ) 63 { 64 /* XML mode hides everything :-( */ 65 if (VG_(clo_xml)) 66 return; 67 68 if (serious) { 69 70 VG_(message)(Vg_DebugMsg, "WARNING: Serious error when " 71 "reading debug info\n"); 72 if (True || VG_(clo_verbosity) < 2) { 73 /* Need to show what the file name is, at verbosity levels 2 74 or below, since that won't already have been shown */ 75 VG_(message)(Vg_DebugMsg, 76 "When reading debug info from %s:\n", 77 (di && di->filename) ? di->filename : (UChar*)"???"); 78 } 79 VG_(message)(Vg_DebugMsg, "%s\n", msg); 80 81 } else { /* !serious */ 82 83 if (VG_(clo_verbosity) >= 2) 84 VG_(message)(Vg_DebugMsg, "%s\n", msg); 85 86 } 87 } 88 89 90 /* Print a symbol. */ 91 void ML_(ppSym) ( Int idx, DiSym* sym ) 92 { 93 VG_(printf)( "%5d: %#8lx .. %#8lx (%d) %s\n", 94 idx, 95 sym->addr, 96 sym->addr + sym->size - 1, sym->size, 97 sym->name ); 98 } 99 100 /* Print a call-frame-info summary. */ 101 void ML_(ppDiCfSI) ( XArray* /* of CfiExpr */ exprs, DiCfSI* si ) 102 { 103 # define SHOW_HOW(_how, _off) \ 104 do { \ 105 if (_how == CFIR_UNKNOWN) { \ 106 VG_(printf)("Unknown"); \ 107 } else \ 108 if (_how == CFIR_SAME) { \ 109 VG_(printf)("Same"); \ 110 } else \ 111 if (_how == CFIR_CFAREL) { \ 112 VG_(printf)("cfa+%d", _off); \ 113 } else \ 114 if (_how == CFIR_MEMCFAREL) { \ 115 VG_(printf)("*(cfa+%d)", _off); \ 116 } else \ 117 if (_how == CFIR_EXPR) { \ 118 VG_(printf)("{"); \ 119 ML_(ppCfiExpr)(exprs, _off); \ 120 VG_(printf)("}"); \ 121 } else { \ 122 vg_assert(0+0); \ 123 } \ 124 } while (0) 125 126 VG_(printf)("[%#lx .. %#lx]: ", si->base, 127 si->base + (UWord)si->len - 1); 128 switch (si->cfa_how) { 129 case CFIC_IA_SPREL: 130 VG_(printf)("let cfa=oldSP+%d", si->cfa_off); 131 break; 132 case CFIC_IA_BPREL: 133 VG_(printf)("let cfa=oldBP+%d", si->cfa_off); 134 break; 135 case CFIC_ARM_R13REL: 136 VG_(printf)("let cfa=oldR13+%d", si->cfa_off); 137 break; 138 case CFIC_ARM_R12REL: 139 VG_(printf)("let cfa=oldR12+%d", si->cfa_off); 140 break; 141 case CFIC_ARM_R11REL: 142 VG_(printf)("let cfa=oldR11+%d", si->cfa_off); 143 break; 144 case CFIC_ARM_R7REL: 145 VG_(printf)("let cfa=oldR7+%d", si->cfa_off); 146 break; 147 case CFIC_EXPR: 148 VG_(printf)("let cfa={"); 149 ML_(ppCfiExpr)(exprs, si->cfa_off); 150 VG_(printf)("}"); 151 break; 152 default: 153 vg_assert(0); 154 } 155 156 VG_(printf)(" in RA="); 157 SHOW_HOW(si->ra_how, si->ra_off); 158 # if defined(VGA_x86) || defined(VGA_amd64) 159 VG_(printf)(" SP="); 160 SHOW_HOW(si->sp_how, si->sp_off); 161 VG_(printf)(" BP="); 162 SHOW_HOW(si->bp_how, si->bp_off); 163 # elif defined(VGA_arm) 164 VG_(printf)(" R14="); 165 SHOW_HOW(si->r14_how, si->r14_off); 166 VG_(printf)(" R13="); 167 SHOW_HOW(si->r13_how, si->r13_off); 168 VG_(printf)(" R12="); 169 SHOW_HOW(si->r12_how, si->r12_off); 170 VG_(printf)(" R11="); 171 SHOW_HOW(si->r11_how, si->r11_off); 172 VG_(printf)(" R7="); 173 SHOW_HOW(si->r7_how, si->r7_off); 174 # elif defined(VGA_ppc32) || defined(VGA_ppc64) 175 # else 176 # error "Unknown arch" 177 # endif 178 VG_(printf)("\n"); 179 # undef SHOW_HOW 180 } 181 182 183 /*------------------------------------------------------------*/ 184 /*--- Adding stuff ---*/ 185 /*------------------------------------------------------------*/ 186 187 /* Add a str to the string table, including terminating zero, and 188 return pointer to the string in vg_strtab. Unless it's been seen 189 recently, in which case we find the old pointer and return that. 190 This avoids the most egregious duplications. 191 192 JSGF: changed from returning an index to a pointer, and changed to 193 a chunking memory allocator rather than reallocating, so the 194 pointers are stable. 195 */ 196 UChar* ML_(addStr) ( struct _DebugInfo* di, UChar* str, Int len ) 197 { 198 struct strchunk *chunk; 199 Int space_needed; 200 UChar* p; 201 202 if (len == -1) { 203 len = VG_(strlen)(str); 204 } else { 205 vg_assert(len >= 0); 206 } 207 208 space_needed = 1 + len; 209 210 // Allocate a new strtab chunk if necessary 211 if (di->strchunks == NULL || 212 (di->strchunks->strtab_used 213 + space_needed) > SEGINFO_STRCHUNKSIZE) { 214 chunk = ML_(dinfo_zalloc)("di.storage.addStr.1", sizeof(*chunk)); 215 chunk->strtab_used = 0; 216 chunk->next = di->strchunks; 217 di->strchunks = chunk; 218 } 219 chunk = di->strchunks; 220 221 p = &chunk->strtab[chunk->strtab_used]; 222 VG_(memcpy)(p, str, len); 223 chunk->strtab[chunk->strtab_used+len] = '\0'; 224 chunk->strtab_used += space_needed; 225 226 return p; 227 } 228 229 230 /* Add a symbol to the symbol table. 231 */ 232 void ML_(addSym) ( struct _DebugInfo* di, DiSym* sym ) 233 { 234 UInt new_sz, i; 235 DiSym* new_tab; 236 237 /* Ignore zero-sized syms. */ 238 if (sym->size == 0) return; 239 240 if (di->symtab_used == di->symtab_size) { 241 new_sz = 2 * di->symtab_size; 242 if (new_sz == 0) new_sz = 500; 243 new_tab = ML_(dinfo_zalloc)( "di.storage.addSym.1", 244 new_sz * sizeof(DiSym) ); 245 if (di->symtab != NULL) { 246 for (i = 0; i < di->symtab_used; i++) 247 new_tab[i] = di->symtab[i]; 248 ML_(dinfo_free)(di->symtab); 249 } 250 di->symtab = new_tab; 251 di->symtab_size = new_sz; 252 } 253 254 di->symtab[di->symtab_used] = *sym; 255 di->symtab_used++; 256 vg_assert(di->symtab_used <= di->symtab_size); 257 } 258 259 260 /* Resize the symbol table to save memory. 261 */ 262 void ML_(shrinkSym)( struct _DebugInfo* di ) 263 { 264 DiSym* new_tab; 265 UInt new_sz = di->symtab_used; 266 if (new_sz == di->symtab_size) return; 267 268 new_tab = ML_(dinfo_zalloc)( "di.storage.shrinkSym", 269 new_sz * sizeof(DiSym) ); 270 VG_(memcpy)(new_tab, di->symtab, new_sz * sizeof(DiSym)); 271 272 ML_(dinfo_free)(di->symtab); 273 di->symtab = new_tab; 274 di->symtab_size = new_sz; 275 } 276 277 278 /* Add a location to the location table. 279 */ 280 static void addLoc ( struct _DebugInfo* di, DiLoc* loc ) 281 { 282 UInt new_sz, i; 283 DiLoc* new_tab; 284 285 /* Zero-sized locs should have been ignored earlier */ 286 vg_assert(loc->size > 0); 287 288 if (di->loctab_used == di->loctab_size) { 289 new_sz = 2 * di->loctab_size; 290 if (new_sz == 0) new_sz = 500; 291 new_tab = ML_(dinfo_zalloc)( "di.storage.addLoc.1", 292 new_sz * sizeof(DiLoc) ); 293 if (di->loctab != NULL) { 294 for (i = 0; i < di->loctab_used; i++) 295 new_tab[i] = di->loctab[i]; 296 ML_(dinfo_free)(di->loctab); 297 } 298 di->loctab = new_tab; 299 di->loctab_size = new_sz; 300 } 301 302 di->loctab[di->loctab_used] = *loc; 303 di->loctab_used++; 304 vg_assert(di->loctab_used <= di->loctab_size); 305 } 306 307 308 /* Resize the lineinfo table to save memory. 309 */ 310 void ML_(shrinkLineInfo)( struct _DebugInfo* di ) 311 { 312 DiLoc* new_tab; 313 UInt new_sz = di->loctab_used; 314 if (new_sz == di->loctab_size) return; 315 316 new_tab = ML_(dinfo_zalloc)( "di.storage.shrinkLineInfo", 317 new_sz * sizeof(DiLoc) ); 318 VG_(memcpy)(new_tab, di->loctab, new_sz * sizeof(DiLoc)); 319 320 ML_(dinfo_free)(di->loctab); 321 di->loctab = new_tab; 322 di->loctab_size = new_sz; 323 } 324 325 326 /* Top-level place to call to add a source-location mapping entry. 327 */ 328 void ML_(addLineInfo) ( struct _DebugInfo* di, 329 UChar* filename, 330 UChar* dirname, /* NULL == directory is unknown */ 331 Addr this, 332 Addr next, 333 Int lineno, 334 Int entry /* only needed for debug printing */ 335 ) 336 { 337 static const Bool debug = False; 338 DiLoc loc; 339 Int size = next - this; 340 341 /* Ignore zero-sized locs */ 342 if (this == next) return; 343 344 if (debug) 345 VG_(printf)( " src %s %s line %d %#lx-%#lx\n", 346 dirname ? dirname : (UChar*)"(unknown)", 347 filename, lineno, this, next ); 348 349 /* Maximum sanity checking. Some versions of GNU as do a shabby 350 * job with stabs entries; if anything looks suspicious, revert to 351 * a size of 1. This should catch the instruction of interest 352 * (since if using asm-level debug info, one instruction will 353 * correspond to one line, unlike with C-level debug info where 354 * multiple instructions can map to the one line), but avoid 355 * catching any other instructions bogusly. */ 356 if (this > next) { 357 if (VG_(clo_verbosity) > 2) { 358 VG_(message)(Vg_DebugMsg, 359 "warning: line info addresses out of order " 360 "at entry %d: 0x%lx 0x%lx\n", entry, this, next); 361 } 362 size = 1; 363 } 364 365 if (size > MAX_LOC_SIZE) { 366 if (0) 367 VG_(message)(Vg_DebugMsg, 368 "warning: line info address range too large " 369 "at entry %d: %d\n", entry, size); 370 size = 1; 371 } 372 373 /* Rule out ones which are completely outside the r-x mapped area. 374 See "Comment_Regarding_Text_Range_Checks" elsewhere in this file 375 for background and rationale. */ 376 vg_assert(di->have_rx_map && di->have_rw_map); 377 if (next-1 < di->rx_map_avma 378 || this >= di->rx_map_avma + di->rx_map_size ) { 379 if (0) 380 VG_(message)(Vg_DebugMsg, 381 "warning: ignoring line info entry falling " 382 "outside current DebugInfo: %#lx %#lx %#lx %#lx\n", 383 di->text_avma, 384 di->text_avma + di->text_size, 385 this, next-1); 386 return; 387 } 388 389 vg_assert(lineno >= 0); 390 if (lineno > MAX_LINENO) { 391 static Bool complained = False; 392 if (!complained) { 393 complained = True; 394 VG_(message)(Vg_UserMsg, 395 "warning: ignoring line info entry with " 396 "huge line number (%d)\n", lineno); 397 VG_(message)(Vg_UserMsg, 398 " Can't handle line numbers " 399 "greater than %d, sorry\n", MAX_LINENO); 400 VG_(message)(Vg_UserMsg, 401 "(Nb: this message is only shown once)\n"); 402 } 403 return; 404 } 405 406 loc.addr = this; 407 loc.size = (UShort)size; 408 loc.lineno = lineno; 409 loc.filename = filename; 410 loc.dirname = dirname; 411 412 if (0) VG_(message)(Vg_DebugMsg, 413 "addLoc: addr %#lx, size %d, line %d, file %s\n", 414 this,size,lineno,filename); 415 416 addLoc ( di, &loc ); 417 } 418 419 420 /* Top-level place to call to add a CFI summary record. The supplied 421 DiCfSI is copied. */ 422 void ML_(addDiCfSI) ( struct _DebugInfo* di, DiCfSI* cfsi_orig ) 423 { 424 static const Bool debug = False; 425 UInt new_sz, i; 426 DiCfSI* new_tab; 427 SSizeT delta; 428 429 /* copy the original, so we can mess with it */ 430 DiCfSI cfsi = *cfsi_orig; 431 432 if (debug) { 433 VG_(printf)("adding DiCfSI: "); 434 ML_(ppDiCfSI)(di->cfsi_exprs, &cfsi); 435 } 436 437 /* sanity */ 438 vg_assert(cfsi.len > 0); 439 /* If this fails, the implication is you have a single procedure 440 with more than 5 million bytes of code. Which is pretty 441 unlikely. Either that, or the debuginfo reader is somehow 442 broken. 5 million is of course arbitrary; but it's big enough 443 to be bigger than the size of any plausible piece of code that 444 would fall within a single procedure. */ 445 vg_assert(cfsi.len < 5000000); 446 447 vg_assert(di->have_rx_map && di->have_rw_map); 448 /* If we have an empty r-x mapping (is that possible?) then the 449 DiCfSI can't possibly fall inside it. In which case skip. */ 450 if (di->rx_map_size == 0) 451 return; 452 453 /* Rule out ones which are completely outside the r-x mapped area. 454 See "Comment_Regarding_Text_Range_Checks" elsewhere in this file 455 for background and rationale. */ 456 if (cfsi.base + cfsi.len - 1 < di->rx_map_avma 457 || cfsi.base >= di->rx_map_avma + di->rx_map_size) { 458 static Int complaints = 10; 459 if (VG_(clo_trace_cfi) || complaints > 0) { 460 complaints--; 461 if (VG_(clo_verbosity) > 1) { 462 VG_(message)( 463 Vg_DebugMsg, 464 "warning: DiCfSI %#lx .. %#lx outside segment %#lx .. %#lx\n", 465 cfsi.base, 466 cfsi.base + cfsi.len - 1, 467 di->text_avma, 468 di->text_avma + di->text_size - 1 469 ); 470 } 471 if (VG_(clo_trace_cfi)) 472 ML_(ppDiCfSI)(di->cfsi_exprs, &cfsi); 473 } 474 return; 475 } 476 477 /* Now we know the range is at least partially inside the r-x 478 mapped area. That implies that at least one of the ends of the 479 range falls inside the area. If necessary, clip it so it is 480 completely within the area. If we don't do this, 481 check_CFSI_related_invariants() in debuginfo.c (invariant #2) 482 will fail. See 483 "Comment_on_IMPORTANT_CFSI_REPRESENTATIONAL_INVARIANTS" in 484 priv_storage.h for background. */ 485 if (cfsi.base < di->rx_map_avma) { 486 /* Lower end is outside the mapped area. Hence upper end must 487 be inside it. */ 488 if (0) VG_(printf)("XXX truncate lower\n"); 489 vg_assert(cfsi.base + cfsi.len - 1 >= di->rx_map_avma); 490 delta = (SSizeT)(di->rx_map_avma - cfsi.base); 491 vg_assert(delta > 0); 492 vg_assert(delta < (SSizeT)cfsi.len); 493 cfsi.base += delta; 494 cfsi.len -= delta; 495 } 496 else 497 if (cfsi.base + cfsi.len - 1 > di->rx_map_avma + di->rx_map_size - 1) { 498 /* Upper end is outside the mapped area. Hence lower end must be 499 inside it. */ 500 if (0) VG_(printf)("XXX truncate upper\n"); 501 vg_assert(cfsi.base <= di->rx_map_avma + di->rx_map_size - 1); 502 delta = (SSizeT)( (cfsi.base + cfsi.len - 1) 503 - (di->rx_map_avma + di->rx_map_size - 1) ); 504 vg_assert(delta > 0); vg_assert(delta < (SSizeT)cfsi.len); 505 cfsi.len -= delta; 506 } 507 508 /* Final checks */ 509 510 /* Because: either cfsi was entirely inside the range, in which 511 case we asserted that len > 0 at the start, OR it fell partially 512 inside the range, in which case we reduced it by some size 513 (delta) which is < its original size. */ 514 vg_assert(cfsi.len > 0); 515 516 /* Similar logic applies for the next two assertions. */ 517 vg_assert(cfsi.base >= di->rx_map_avma); 518 vg_assert(cfsi.base + cfsi.len - 1 519 <= di->rx_map_avma + di->rx_map_size - 1); 520 521 if (di->cfsi_used == di->cfsi_size) { 522 new_sz = 2 * di->cfsi_size; 523 if (new_sz == 0) new_sz = 20; 524 new_tab = ML_(dinfo_zalloc)( "di.storage.addDiCfSI.1", 525 new_sz * sizeof(DiCfSI) ); 526 if (di->cfsi != NULL) { 527 for (i = 0; i < di->cfsi_used; i++) 528 new_tab[i] = di->cfsi[i]; 529 ML_(dinfo_free)(di->cfsi); 530 } 531 di->cfsi = new_tab; 532 di->cfsi_size = new_sz; 533 } 534 535 di->cfsi[di->cfsi_used] = cfsi; 536 di->cfsi_used++; 537 vg_assert(di->cfsi_used <= di->cfsi_size); 538 } 539 540 541 Int ML_(CfiExpr_Undef)( XArray* dst ) 542 { 543 CfiExpr e; 544 VG_(memset)( &e, 0, sizeof(e) ); 545 e.tag = Cex_Undef; 546 return (Int)VG_(addToXA)( dst, &e ); 547 } 548 Int ML_(CfiExpr_Deref)( XArray* dst, Int ixAddr ) 549 { 550 CfiExpr e; 551 VG_(memset)( &e, 0, sizeof(e) ); 552 e.tag = Cex_Deref; 553 e.Cex.Deref.ixAddr = ixAddr; 554 return (Int)VG_(addToXA)( dst, &e ); 555 } 556 Int ML_(CfiExpr_Const)( XArray* dst, UWord con ) 557 { 558 CfiExpr e; 559 VG_(memset)( &e, 0, sizeof(e) ); 560 e.tag = Cex_Const; 561 e.Cex.Const.con = con; 562 return (Int)VG_(addToXA)( dst, &e ); 563 } 564 Int ML_(CfiExpr_Binop)( XArray* dst, CfiOp op, Int ixL, Int ixR ) 565 { 566 CfiExpr e; 567 VG_(memset)( &e, 0, sizeof(e) ); 568 e.tag = Cex_Binop; 569 e.Cex.Binop.op = op; 570 e.Cex.Binop.ixL = ixL; 571 e.Cex.Binop.ixR = ixR; 572 return (Int)VG_(addToXA)( dst, &e ); 573 } 574 Int ML_(CfiExpr_CfiReg)( XArray* dst, CfiReg reg ) 575 { 576 CfiExpr e; 577 VG_(memset)( &e, 0, sizeof(e) ); 578 e.tag = Cex_CfiReg; 579 e.Cex.CfiReg.reg = reg; 580 return (Int)VG_(addToXA)( dst, &e ); 581 } 582 Int ML_(CfiExpr_DwReg)( XArray* dst, Int reg ) 583 { 584 CfiExpr e; 585 VG_(memset)( &e, 0, sizeof(e) ); 586 e.tag = Cex_DwReg; 587 e.Cex.DwReg.reg = reg; 588 return (Int)VG_(addToXA)( dst, &e ); 589 } 590 591 static void ppCfiOp ( CfiOp op ) 592 { 593 switch (op) { 594 case Cop_Add: VG_(printf)("+"); break; 595 case Cop_Sub: VG_(printf)("-"); break; 596 case Cop_And: VG_(printf)("&"); break; 597 case Cop_Mul: VG_(printf)("*"); break; 598 default: vg_assert(0); 599 } 600 } 601 602 static void ppCfiReg ( CfiReg reg ) 603 { 604 switch (reg) { 605 case Creg_IA_SP: VG_(printf)("xSP"); break; 606 case Creg_IA_BP: VG_(printf)("xBP"); break; 607 case Creg_IA_IP: VG_(printf)("xIP"); break; 608 case Creg_ARM_R13: VG_(printf)("R13"); break; 609 case Creg_ARM_R12: VG_(printf)("R12"); break; 610 case Creg_ARM_R15: VG_(printf)("R15"); break; 611 case Creg_ARM_R14: VG_(printf)("R14"); break; 612 default: vg_assert(0); 613 } 614 } 615 616 void ML_(ppCfiExpr)( XArray* src, Int ix ) 617 { 618 /* VG_(indexXA) checks for invalid src/ix values, so we can 619 use it indiscriminately. */ 620 CfiExpr* e = (CfiExpr*) VG_(indexXA)( src, ix ); 621 switch (e->tag) { 622 case Cex_Undef: 623 VG_(printf)("Undef"); 624 break; 625 case Cex_Deref: 626 VG_(printf)("*("); 627 ML_(ppCfiExpr)(src, e->Cex.Deref.ixAddr); 628 VG_(printf)(")"); 629 break; 630 case Cex_Const: 631 VG_(printf)("0x%lx", e->Cex.Const.con); 632 break; 633 case Cex_Binop: 634 VG_(printf)("("); 635 ML_(ppCfiExpr)(src, e->Cex.Binop.ixL); 636 VG_(printf)(")"); 637 ppCfiOp(e->Cex.Binop.op); 638 VG_(printf)("("); 639 ML_(ppCfiExpr)(src, e->Cex.Binop.ixR); 640 VG_(printf)(")"); 641 break; 642 case Cex_CfiReg: 643 ppCfiReg(e->Cex.CfiReg.reg); 644 break; 645 case Cex_DwReg: 646 VG_(printf)("dwr%d", e->Cex.DwReg.reg); 647 break; 648 default: 649 VG_(core_panic)("ML_(ppCfiExpr)"); 650 /*NOTREACHED*/ 651 break; 652 } 653 } 654 655 656 Word ML_(cmp_for_DiAddrRange_range) ( const void* keyV, 657 const void* elemV ) { 658 const Addr* key = (const Addr*)keyV; 659 const DiAddrRange* elem = (const DiAddrRange*)elemV; 660 if (0) 661 VG_(printf)("cmp_for_DiAddrRange_range: %#lx vs %#lx\n", 662 *key, elem->aMin); 663 if ((*key) < elem->aMin) return -1; 664 if ((*key) > elem->aMax) return 1; 665 return 0; 666 } 667 668 static 669 void show_scope ( OSet* /* of DiAddrRange */ scope, HChar* who ) 670 { 671 DiAddrRange* range; 672 VG_(printf)("Scope \"%s\" = {\n", who); 673 VG_(OSetGen_ResetIter)( scope ); 674 while (True) { 675 range = VG_(OSetGen_Next)( scope ); 676 if (!range) break; 677 VG_(printf)(" %#lx .. %#lx: %lu vars\n", range->aMin, range->aMax, 678 range->vars ? VG_(sizeXA)(range->vars) : 0); 679 } 680 VG_(printf)("}\n"); 681 } 682 683 /* Add the variable 'var' to 'scope' for the address range [aMin,aMax] 684 (inclusive of aMin and aMax). Split existing ranges as required if 685 aMin or aMax or both don't match existing range boundaries, and add 686 'var' to all required ranges. Take great care to preserve the 687 invariant that the ranges in 'scope' cover the entire address range 688 exactly once, with no overlaps and no holes. */ 689 static void add_var_to_arange ( 690 /*MOD*/OSet* /* of DiAddrRange */ scope, 691 Addr aMin, 692 Addr aMax, 693 DiVariable* var 694 ) 695 { 696 DiAddrRange *first, *last, *range; 697 /* These xx variables are for assertion checking only; they don't 698 contribute anything to the actual work of this function. */ 699 DiAddrRange *xxRangep, *xxFirst, *xxLast; 700 UWord xxIters; 701 702 vg_assert(aMin <= aMax); 703 704 if (0) VG_(printf)("add_var_to_arange: %#lx .. %#lx\n", aMin, aMax); 705 if (0) show_scope( scope, "add_var_to_arange(1)" ); 706 707 /* See if the lower end of the range (aMin) falls exactly on an 708 existing range boundary. If not, find the range it does fall 709 into, and split it (copying the variables in the process), so 710 that aMin does exactly fall on a range boundary. */ 711 first = VG_(OSetGen_Lookup)( scope, &aMin ); 712 /* It must be present, since the presented OSet must cover 713 the entire address range. */ 714 vg_assert(first); 715 vg_assert(first->aMin <= first->aMax); 716 vg_assert(first->aMin <= aMin && aMin <= first->aMax); 717 718 /* Fast track common case, which is that the range specified for 719 the variable exactly coincides with one already-existing 720 range. */ 721 if (first->aMin == aMin && first->aMax == aMax) { 722 vg_assert(first->vars); 723 VG_(addToXA)( first->vars, var ); 724 return; 725 } 726 727 /* We have to get into splitting ranges, which is complex 728 and slow. */ 729 if (first->aMin < aMin) { 730 DiAddrRange* nyu; 731 /* Ok. We'll have to split 'first'. */ 732 /* truncate the upper end of 'first' */ 733 Addr tmp = first->aMax; 734 first->aMax = aMin-1; 735 vg_assert(first->aMin <= first->aMax); 736 /* create a new range */ 737 nyu = VG_(OSetGen_AllocNode)( scope, sizeof(DiAddrRange) ); 738 vg_assert(nyu); 739 nyu->aMin = aMin; 740 nyu->aMax = tmp; 741 vg_assert(nyu->aMin <= nyu->aMax); 742 /* copy vars into it */ 743 vg_assert(first->vars); 744 nyu->vars = VG_(cloneXA)( "di.storage.avta.1", first->vars ); 745 vg_assert(nyu->vars); 746 VG_(OSetGen_Insert)( scope, nyu ); 747 first = nyu; 748 } 749 750 vg_assert(first->aMin == aMin); 751 752 /* Now do exactly the same for the upper end (aMax): if it doesn't 753 fall on a boundary, cause it to do so by splitting the range it 754 does currently fall into. */ 755 last = VG_(OSetGen_Lookup)( scope, &aMax ); 756 vg_assert(last->aMin <= last->aMax); 757 vg_assert(last->aMin <= aMax && aMax <= last->aMax); 758 759 if (aMax < last->aMax) { 760 DiAddrRange* nyu; 761 /* We have to split 'last'. */ 762 /* truncate the lower end of 'last' */ 763 Addr tmp = last->aMin; 764 last->aMin = aMax+1; 765 vg_assert(last->aMin <= last->aMax); 766 /* create a new range */ 767 nyu = VG_(OSetGen_AllocNode)( scope, sizeof(DiAddrRange) ); 768 vg_assert(nyu); 769 nyu->aMin = tmp; 770 nyu->aMax = aMax; 771 vg_assert(nyu->aMin <= nyu->aMax); 772 /* copy vars into it */ 773 vg_assert(last->vars); 774 nyu->vars = VG_(cloneXA)( "di.storage.avta.2", last->vars ); 775 vg_assert(nyu->vars); 776 VG_(OSetGen_Insert)( scope, nyu ); 777 last = nyu; 778 } 779 780 vg_assert(aMax == last->aMax); 781 782 xxFirst = (DiAddrRange*)VG_(OSetGen_Lookup)(scope, &aMin); 783 xxLast = (DiAddrRange*)VG_(OSetGen_Lookup)(scope, &aMax); 784 vg_assert(xxFirst); 785 vg_assert(xxLast); 786 vg_assert(xxFirst->aMin == aMin); 787 vg_assert(xxLast->aMax == aMax); 788 if (xxFirst != xxLast) 789 vg_assert(xxFirst->aMax < xxLast->aMin); 790 791 /* Great. Now we merely need to iterate over the segments from 792 'first' to 'last' inclusive, and add 'var' to the variable set 793 of each of them. */ 794 if (0) { 795 static UWord ctr = 0; 796 ctr++; 797 VG_(printf)("ctr = %lu\n", ctr); 798 if (ctr >= 33263) show_scope( scope, "add_var_to_arange(2)" ); 799 } 800 801 xxIters = 0; 802 range = xxRangep = NULL; 803 VG_(OSetGen_ResetIterAt)( scope, &aMin ); 804 while (True) { 805 xxRangep = range; 806 range = VG_(OSetGen_Next)( scope ); 807 if (!range) break; 808 if (range->aMin > aMax) break; 809 xxIters++; 810 if (0) VG_(printf)("have range %#lx %#lx\n", 811 range->aMin, range->aMax); 812 813 /* Sanity checks */ 814 if (!xxRangep) { 815 /* This is the first in the range */ 816 vg_assert(range->aMin == aMin); 817 } else { 818 vg_assert(xxRangep->aMax + 1 == range->aMin); 819 } 820 821 vg_assert(range->vars); 822 VG_(addToXA)( range->vars, var ); 823 } 824 /* Done. We should have seen at least one range. */ 825 vg_assert(xxIters >= 1); 826 if (xxIters == 1) vg_assert(xxFirst == xxLast); 827 if (xxFirst == xxLast) vg_assert(xxIters == 1); 828 vg_assert(xxRangep); 829 vg_assert(xxRangep->aMax == aMax); 830 vg_assert(xxRangep == xxLast); 831 } 832 833 834 /* Top-level place to call to add a variable description (as extracted 835 from a DWARF3 .debug_info section. */ 836 void ML_(addVar)( struct _DebugInfo* di, 837 Int level, 838 Addr aMin, 839 Addr aMax, 840 UChar* name, /* in di's .strchunks */ 841 UWord typeR, /* a cuOff */ 842 GExpr* gexpr, 843 GExpr* fbGX, 844 UChar* fileName, /* where decl'd - may be NULL. 845 in di's .strchunks */ 846 Int lineNo, /* where decl'd - may be zero */ 847 Bool show ) 848 { 849 OSet* /* of DiAddrRange */ scope; 850 DiVariable var; 851 Bool all; 852 TyEnt* ent; 853 MaybeULong mul; 854 HChar* badness; 855 856 tl_assert(di && di->admin_tyents); 857 858 if (0) { 859 VG_(printf)(" ML_(addVar): level %d %#lx-%#lx %s :: ", 860 level, aMin, aMax, name ); 861 ML_(pp_TyEnt_C_ishly)( di->admin_tyents, typeR ); 862 VG_(printf)("\n Var="); 863 ML_(pp_GX)(gexpr); 864 VG_(printf)("\n"); 865 if (fbGX) { 866 VG_(printf)(" FrB="); 867 ML_(pp_GX)( fbGX ); 868 VG_(printf)("\n"); 869 } else { 870 VG_(printf)(" FrB=none\n"); 871 } 872 VG_(printf)("\n"); 873 } 874 875 vg_assert(level >= 0); 876 vg_assert(aMin <= aMax); 877 vg_assert(name); 878 vg_assert(gexpr); 879 880 ent = ML_(TyEnts__index_by_cuOff)( di->admin_tyents, NULL, typeR); 881 tl_assert(ent); 882 vg_assert(ML_(TyEnt__is_type)(ent)); 883 884 /* "Comment_Regarding_Text_Range_Checks" (is referred to elsewhere) 885 ---------------------------------------------------------------- 886 Ignore any variables whose aMin .. aMax (that is, range of text 887 addresses for which they actually exist) falls outside the text 888 segment. Is this indicative of a bug in the reader? Maybe. 889 (LATER): instead of restricting strictly to the .text segment, 890 be a bit more relaxed, and accept any variable whose text range 891 falls inside the r-x mapped area. This is useful because .text 892 is not always the only instruction-carrying segment: others are: 893 .init .plt __libc_freeres_fn and .fini. This implicitly assumes 894 that those extra sections have the same bias as .text, but that 895 seems a reasonable assumption to me. */ 896 /* This is assured us by top level steering logic in debuginfo.c, 897 and it is re-checked at the start of 898 ML_(read_elf_debug_info). */ 899 vg_assert(di->have_rx_map && di->have_rw_map); 900 if (level > 0 901 && (aMax < di->rx_map_avma 902 || aMin >= di->rx_map_avma + di->rx_map_size)) { 903 if (VG_(clo_verbosity) >= 0) { 904 VG_(message)(Vg_DebugMsg, 905 "warning: addVar: in range %#lx .. %#lx outside " 906 "segment %#lx .. %#lx (%s)\n", 907 aMin, aMax, 908 di->text_avma, di->text_avma + di->text_size -1, 909 name 910 ); 911 } 912 return; 913 } 914 915 /* If the type's size is zero (which can mean unknown size), ignore 916 it. We will never be able to actually relate a data address to 917 a data object with zero size, so there's no point in storing 918 info on it. On 32-bit platforms, also reject types whose size 919 is 2^32 bytes or large. (It's amazing what junk shows up ..) */ 920 mul = ML_(sizeOfType)(di->admin_tyents, typeR); 921 922 badness = NULL; 923 if (mul.b != True) 924 badness = "unknown size"; 925 else if (mul.ul == 0) 926 badness = "zero size "; 927 else if (sizeof(void*) == 4 && mul.ul >= (1ULL<<32)) 928 badness = "implausibly large"; 929 930 if (badness) { 931 static Int complaints = 10; 932 if (VG_(clo_verbosity) >= 2 && complaints > 0) { 933 VG_(message)(Vg_DebugMsg, "warning: addVar: %s (%s)\n", 934 badness, name ); 935 complaints--; 936 } 937 return; 938 } 939 940 if (!di->varinfo) { 941 di->varinfo = VG_(newXA)( ML_(dinfo_zalloc), 942 "di.storage.addVar.1", 943 ML_(dinfo_free), 944 sizeof(OSet*) ); 945 } 946 947 vg_assert(level < 256); /* arbitrary; stay sane */ 948 /* Expand the top level array enough to map this level */ 949 while ( VG_(sizeXA)(di->varinfo) <= level ) { 950 DiAddrRange* nyu; 951 scope = VG_(OSetGen_Create)( offsetof(DiAddrRange,aMin), 952 ML_(cmp_for_DiAddrRange_range), 953 ML_(dinfo_zalloc), "di.storage.addVar.2", 954 ML_(dinfo_free) ); 955 vg_assert(scope); 956 if (0) VG_(printf)("create: scope = %p, adding at %ld\n", 957 scope, VG_(sizeXA)(di->varinfo)); 958 VG_(addToXA)( di->varinfo, &scope ); 959 /* Add a single range covering the entire address space. At 960 level 0 we require this doesn't get split. At levels above 0 961 we require that any additions to it cause it to get split. 962 All of these invariants get checked both add_var_to_arange 963 and after reading is complete, in canonicaliseVarInfo. */ 964 nyu = VG_(OSetGen_AllocNode)( scope, sizeof(DiAddrRange) ); 965 vg_assert(nyu); 966 nyu->aMin = (Addr)0; 967 nyu->aMax = ~(Addr)0; 968 nyu->vars = VG_(newXA)( ML_(dinfo_zalloc), "di.storage.addVar.3", 969 ML_(dinfo_free), 970 sizeof(DiVariable) ); 971 vg_assert(nyu->vars); 972 VG_(OSetGen_Insert)( scope, nyu ); 973 } 974 975 vg_assert( VG_(sizeXA)(di->varinfo) > level ); 976 scope = *(OSet**)VG_(indexXA)( di->varinfo, level ); 977 vg_assert(scope); 978 979 var.name = name; 980 var.typeR = typeR; 981 var.gexpr = gexpr; 982 var.fbGX = fbGX; 983 var.fileName = fileName; 984 var.lineNo = lineNo; 985 986 all = aMin == (Addr)0 && aMax == ~(Addr)0; 987 vg_assert(level == 0 ? all : !all); 988 989 add_var_to_arange( /*MOD*/scope, aMin, aMax, &var ); 990 } 991 992 993 /* This really just checks the constructed data structure, as there is 994 no canonicalisation to do. */ 995 static void canonicaliseVarInfo ( struct _DebugInfo* di ) 996 { 997 Word i, nInThisScope; 998 999 if (!di->varinfo) 1000 return; 1001 1002 for (i = 0; i < VG_(sizeXA)(di->varinfo); i++) { 1003 1004 DiAddrRange *range, *rangep; 1005 OSet* scope = *(OSet**)VG_(indexXA)(di->varinfo, i); 1006 if (!scope) continue; 1007 1008 /* Deal with the global-scope case. */ 1009 if (i == 0) { 1010 Addr zero = 0; 1011 vg_assert(VG_(OSetGen_Size)( scope ) == 1); 1012 range = VG_(OSetGen_Lookup)( scope, &zero ); 1013 vg_assert(range); 1014 vg_assert(range->aMin == (Addr)0); 1015 vg_assert(range->aMax == ~(Addr)0); 1016 continue; 1017 } 1018 1019 /* All the rest of this is for the local-scope case. */ 1020 /* iterate over all entries in 'scope' */ 1021 nInThisScope = 0; 1022 rangep = NULL; 1023 VG_(OSetGen_ResetIter)(scope); 1024 while (True) { 1025 range = VG_(OSetGen_Next)(scope); 1026 if (!range) { 1027 /* We just saw the last one. There must have been at 1028 least one entry in the range. */ 1029 vg_assert(rangep); 1030 vg_assert(rangep->aMax == ~(Addr)0); 1031 break; 1032 } 1033 1034 vg_assert(range->aMin <= range->aMax); 1035 vg_assert(range->vars); 1036 1037 if (!rangep) { 1038 /* This is the first entry in the range. */ 1039 vg_assert(range->aMin == 0); 1040 } else { 1041 vg_assert(rangep->aMax + 1 == range->aMin); 1042 } 1043 1044 rangep = range; 1045 nInThisScope++; 1046 } /* iterating over ranges in a given scope */ 1047 1048 /* If there's only one entry in this (local) scope, it must 1049 cover the entire address space (obviously), but it must not 1050 contain any vars. */ 1051 1052 vg_assert(nInThisScope > 0); 1053 if (nInThisScope == 1) { 1054 Addr zero = 0; 1055 vg_assert(VG_(OSetGen_Size)( scope ) == 1); 1056 range = VG_(OSetGen_Lookup)( scope, &zero ); 1057 vg_assert(range); 1058 vg_assert(range->aMin == (Addr)0); 1059 vg_assert(range->aMax == ~(Addr)0); 1060 vg_assert(range->vars); 1061 vg_assert(VG_(sizeXA)(range->vars) == 0); 1062 } 1063 1064 } /* iterate over scopes */ 1065 } 1066 1067 1068 /*------------------------------------------------------------*/ 1069 /*--- Canonicalisers ---*/ 1070 /*------------------------------------------------------------*/ 1071 1072 /* Sort the symtab by starting address, and emit warnings if any 1073 symbols have overlapping address ranges. We use that old chestnut, 1074 shellsort. Mash the table around so as to establish the property 1075 that addresses are in order and the ranges to not overlap. This 1076 facilitates using binary search to map addresses to symbols when we 1077 come to query the table. 1078 */ 1079 static Int compare_DiSym ( void* va, void* vb ) 1080 { 1081 DiSym* a = (DiSym*)va; 1082 DiSym* b = (DiSym*)vb; 1083 if (a->addr < b->addr) return -1; 1084 if (a->addr > b->addr) return 1; 1085 return 0; 1086 } 1087 1088 1089 /* Two symbols have the same address. Which name do we prefer? In order: 1090 1091 - Prefer "PMPI_<foo>" over "MPI_<foo>". 1092 1093 - Else, prefer a non-NULL name over a NULL one. 1094 1095 - Else, prefer a non-whitespace name over an all-whitespace name. 1096 1097 - Else, prefer the shorter symbol name. If the symbol contains a 1098 version symbol ('@' on Linux, other platforms may differ), which means it 1099 is versioned, then the length up to the version symbol is used for length 1100 comparison purposes (so "foo (at) GLIBC_2.4.2" is considered shorter than 1101 "foobar"). 1102 1103 - Else, if two symbols have the same length, prefer a versioned symbol over 1104 a non-versioned symbol. 1105 1106 - Else, use alphabetical ordering. 1107 1108 - Otherwise, they must be the same; use the symbol with the lower address. 1109 1110 Very occasionally this goes wrong (eg. 'memcmp' and 'bcmp' are 1111 aliases in glibc, we choose the 'bcmp' symbol because it's shorter, 1112 so we can misdescribe memcmp() as bcmp()). This is hard to avoid. 1113 It's mentioned in the FAQ file. 1114 */ 1115 static DiSym* prefersym ( struct _DebugInfo* di, DiSym* a, DiSym* b ) 1116 { 1117 Word cmp; 1118 Word vlena, vlenb; /* length without version */ 1119 const UChar *vpa, *vpb; 1120 1121 Bool preferA = False; 1122 Bool preferB = False; 1123 1124 vg_assert(a->addr == b->addr); 1125 1126 vlena = VG_(strlen)(a->name); 1127 vlenb = VG_(strlen)(b->name); 1128 1129 #if defined(VGO_linux) || defined(VGO_aix5) 1130 # define VERSION_CHAR '@' 1131 #elif defined(VGO_darwin) 1132 # define VERSION_CHAR '$' 1133 #else 1134 # error Unknown OS 1135 #endif 1136 1137 vpa = VG_(strchr)(a->name, VERSION_CHAR); 1138 vpb = VG_(strchr)(b->name, VERSION_CHAR); 1139 1140 if (vpa) 1141 vlena = vpa - a->name; 1142 if (vpb) 1143 vlenb = vpb - b->name; 1144 1145 /* MPI hack: prefer PMPI_Foo over MPI_Foo */ 1146 if (0==VG_(strncmp)(a->name, "MPI_", 4) 1147 && 0==VG_(strncmp)(b->name, "PMPI_", 5) 1148 && 0==VG_(strcmp)(a->name, 1+b->name)) { 1149 preferB = True; goto out; 1150 } 1151 if (0==VG_(strncmp)(b->name, "MPI_", 4) 1152 && 0==VG_(strncmp)(a->name, "PMPI_", 5) 1153 && 0==VG_(strcmp)(b->name, 1+a->name)) { 1154 preferA = True; goto out; 1155 } 1156 1157 /* Prefer non-empty name. */ 1158 if (vlena && !vlenb) { 1159 preferA = True; goto out; 1160 } 1161 if (vlenb && !vlena) { 1162 preferB = True; goto out; 1163 } 1164 1165 /* Prefer non-whitespace name. */ 1166 { 1167 Bool blankA = True; 1168 Bool blankB = True; 1169 Char *s; 1170 s = a->name; 1171 while (*s) { 1172 if (!VG_(isspace)(*s++)) { 1173 blankA = False; 1174 break; 1175 } 1176 } 1177 s = b->name; 1178 while (*s) { 1179 if (!VG_(isspace)(*s++)) { 1180 blankB = False; 1181 break; 1182 } 1183 } 1184 1185 if (!blankA && blankB) { 1186 preferA = True; goto out; 1187 } 1188 if (!blankB && blankA) { 1189 preferB = True; goto out; 1190 } 1191 } 1192 1193 /* Select the shortest unversioned name */ 1194 if (vlena < vlenb) { 1195 preferA = True; goto out; 1196 } 1197 if (vlenb < vlena) { 1198 preferB = True; goto out; 1199 } 1200 1201 /* Equal lengths; select the versioned name */ 1202 if (vpa && !vpb) { 1203 preferA = True; goto out; 1204 } 1205 if (vpb && !vpa) { 1206 preferB = True; goto out; 1207 } 1208 1209 /* Either both versioned or neither is versioned; select them 1210 alphabetically */ 1211 cmp = VG_(strcmp)(a->name, b->name); 1212 if (cmp < 0) { 1213 preferA = True; goto out; 1214 } 1215 if (cmp > 0) { 1216 preferB = True; goto out; 1217 } 1218 1219 /* If we get here, they are the same name. */ 1220 1221 /* In this case we could choose either (arbitrarily), but might as 1222 well choose the one with the lowest DiSym* address, so as to try 1223 and make the comparison mechanism more stable (a la sorting 1224 parlance). Also, skip the diagnostic printing in this case. */ 1225 return a <= b ? a : b; 1226 1227 /*NOTREACHED*/ 1228 vg_assert(0); 1229 out: 1230 if (preferA && !preferB) { 1231 TRACE_SYMTAB("sym at %#lx: prefer '%s' to '%s'\n", 1232 a->addr, a->name, b->name ); 1233 return a; 1234 } 1235 if (preferB && !preferA) { 1236 TRACE_SYMTAB("sym at %#lx: prefer '%s' to '%s'\n", 1237 b->addr, b->name, a->name ); 1238 return b; 1239 } 1240 /*NOTREACHED*/ 1241 vg_assert(0); 1242 } 1243 1244 static void canonicaliseSymtab ( struct _DebugInfo* di ) 1245 { 1246 Word i, j, n_merged, n_truncated; 1247 Addr s1, s2, e1, e2, p1, p2; 1248 UChar *n1, *n2; 1249 Bool t1, t2, f1, f2; 1250 1251 # define SWAP(ty,aa,bb) \ 1252 do { ty tt = (aa); (aa) = (bb); (bb) = tt; } while (0) 1253 1254 if (di->symtab_used == 0) 1255 return; 1256 1257 VG_(ssort)(di->symtab, di->symtab_used, 1258 sizeof(*di->symtab), compare_DiSym); 1259 1260 cleanup_more: 1261 1262 /* If two symbols have identical address ranges, we pick one 1263 using prefersym() (see it for details). */ 1264 do { 1265 n_merged = 0; 1266 j = di->symtab_used; 1267 di->symtab_used = 0; 1268 for (i = 0; i < j; i++) { 1269 if (i < j-1 1270 && di->symtab[i].addr == di->symtab[i+1].addr 1271 && di->symtab[i].size == di->symtab[i+1].size 1272 ) { 1273 n_merged++; 1274 /* merge the two into one */ 1275 di->symtab[di->symtab_used++] 1276 = *prefersym(di, &di->symtab[i], &di->symtab[i+1]); 1277 i++; 1278 } else { 1279 di->symtab[di->symtab_used++] = di->symtab[i]; 1280 } 1281 } 1282 TRACE_SYMTAB( "canonicaliseSymtab: %ld symbols merged\n", n_merged); 1283 } 1284 while (n_merged > 0); 1285 1286 /* Detect and "fix" overlapping address ranges. */ 1287 n_truncated = 0; 1288 1289 for (i = 0; i < ((Word)di->symtab_used) -1; i++) { 1290 1291 vg_assert(di->symtab[i].addr <= di->symtab[i+1].addr); 1292 1293 /* Check for common (no overlap) case. */ 1294 if (di->symtab[i].addr + di->symtab[i].size 1295 <= di->symtab[i+1].addr) 1296 continue; 1297 1298 /* There's an overlap. Truncate one or the other. */ 1299 if (di->trace_symtab) { 1300 VG_(printf)("overlapping address ranges in symbol table\n\t"); 1301 ML_(ppSym)( i, &di->symtab[i] ); 1302 VG_(printf)("\t"); 1303 ML_(ppSym)( i+1, &di->symtab[i+1] ); 1304 VG_(printf)("\n"); 1305 } 1306 1307 /* Truncate one or the other. */ 1308 s1 = di->symtab[i].addr; 1309 e1 = s1 + di->symtab[i].size - 1; 1310 p1 = di->symtab[i].tocptr; 1311 n1 = di->symtab[i].name; 1312 t1 = di->symtab[i].isText; 1313 f1 = di->symtab[i].isIFunc; 1314 s2 = di->symtab[i+1].addr; 1315 e2 = s2 + di->symtab[i+1].size - 1; 1316 p2 = di->symtab[i+1].tocptr; 1317 n2 = di->symtab[i+1].name; 1318 t2 = di->symtab[i+1].isText; 1319 f2 = di->symtab[i+1].isIFunc; 1320 if (s1 < s2) { 1321 e1 = s2-1; 1322 } else { 1323 vg_assert(s1 == s2); 1324 if (e1 > e2) { 1325 s1 = e2+1; SWAP(Addr,s1,s2); SWAP(Addr,e1,e2); SWAP(Addr,p1,p2); 1326 SWAP(UChar *,n1,n2); SWAP(Bool,t1,t2); 1327 } else 1328 if (e1 < e2) { 1329 s2 = e1+1; 1330 } else { 1331 /* e1 == e2. Identical addr ranges. We'll eventually wind 1332 up back at cleanup_more, which will take care of it. */ 1333 } 1334 } 1335 di->symtab[i].addr = s1; 1336 di->symtab[i].size = e1 - s1 + 1; 1337 di->symtab[i].tocptr = p1; 1338 di->symtab[i].name = n1; 1339 di->symtab[i].isText = t1; 1340 di->symtab[i].isIFunc = f1; 1341 di->symtab[i+1].addr = s2; 1342 di->symtab[i+1].size = e2 - s2 + 1; 1343 di->symtab[i+1].tocptr = p2; 1344 di->symtab[i+1].name = n2; 1345 di->symtab[i+1].isText = t2; 1346 di->symtab[i+1].isIFunc = f2; 1347 vg_assert(s1 <= s2); 1348 vg_assert(di->symtab[i].size > 0); 1349 vg_assert(di->symtab[i+1].size > 0); 1350 /* It may be that the i+1 entry now needs to be moved further 1351 along to maintain the address order requirement. */ 1352 j = i+1; 1353 while (j < ((Word)di->symtab_used)-1 1354 && di->symtab[j].addr > di->symtab[j+1].addr) { 1355 SWAP(DiSym,di->symtab[j],di->symtab[j+1]); 1356 j++; 1357 } 1358 n_truncated++; 1359 } 1360 1361 if (n_truncated > 0) goto cleanup_more; 1362 1363 /* Ensure relevant postconditions hold. */ 1364 for (i = 0; i < ((Word)di->symtab_used)-1; i++) { 1365 /* No zero-sized symbols. */ 1366 vg_assert(di->symtab[i].size > 0); 1367 /* In order. */ 1368 vg_assert(di->symtab[i].addr < di->symtab[i+1].addr); 1369 /* No overlaps. */ 1370 vg_assert(di->symtab[i].addr + di->symtab[i].size - 1 1371 < di->symtab[i+1].addr); 1372 } 1373 # undef SWAP 1374 } 1375 1376 1377 /* Sort the location table by starting address. Mash the table around 1378 so as to establish the property that addresses are in order and the 1379 ranges do not overlap. This facilitates using binary search to map 1380 addresses to locations when we come to query the table. 1381 */ 1382 static Int compare_DiLoc ( void* va, void* vb ) 1383 { 1384 DiLoc* a = (DiLoc*)va; 1385 DiLoc* b = (DiLoc*)vb; 1386 if (a->addr < b->addr) return -1; 1387 if (a->addr > b->addr) return 1; 1388 return 0; 1389 } 1390 1391 static void canonicaliseLoctab ( struct _DebugInfo* di ) 1392 { 1393 Word i, j; 1394 1395 # define SWAP(ty,aa,bb) \ 1396 do { ty tt = (aa); (aa) = (bb); (bb) = tt; } while (0); 1397 1398 if (di->loctab_used == 0) 1399 return; 1400 1401 /* Sort by start address. */ 1402 VG_(ssort)(di->loctab, di->loctab_used, 1403 sizeof(*di->loctab), compare_DiLoc); 1404 1405 /* If two adjacent entries overlap, truncate the first. */ 1406 for (i = 0; i < ((Word)di->loctab_used)-1; i++) { 1407 vg_assert(di->loctab[i].size < 10000); 1408 if (di->loctab[i].addr + di->loctab[i].size > di->loctab[i+1].addr) { 1409 /* Do this in signed int32 because the actual .size fields 1410 are only 12 bits. */ 1411 Int new_size = di->loctab[i+1].addr - di->loctab[i].addr; 1412 if (new_size < 0) { 1413 di->loctab[i].size = 0; 1414 } else 1415 if (new_size > MAX_LOC_SIZE) { 1416 di->loctab[i].size = MAX_LOC_SIZE; 1417 } else { 1418 di->loctab[i].size = (UShort)new_size; 1419 } 1420 } 1421 } 1422 1423 /* Zap any zero-sized entries resulting from the truncation 1424 process. */ 1425 j = 0; 1426 for (i = 0; i < (Word)di->loctab_used; i++) { 1427 if (di->loctab[i].size > 0) { 1428 if (j != i) 1429 di->loctab[j] = di->loctab[i]; 1430 j++; 1431 } 1432 } 1433 di->loctab_used = j; 1434 1435 /* Ensure relevant postconditions hold. */ 1436 for (i = 0; i < ((Word)di->loctab_used)-1; i++) { 1437 /* 1438 VG_(printf)("%d (%d) %d 0x%x\n", 1439 i, di->loctab[i+1].confident, 1440 di->loctab[i+1].size, di->loctab[i+1].addr ); 1441 */ 1442 /* No zero-sized symbols. */ 1443 vg_assert(di->loctab[i].size > 0); 1444 /* In order. */ 1445 vg_assert(di->loctab[i].addr < di->loctab[i+1].addr); 1446 /* No overlaps. */ 1447 vg_assert(di->loctab[i].addr + di->loctab[i].size - 1 1448 < di->loctab[i+1].addr); 1449 } 1450 # undef SWAP 1451 } 1452 1453 1454 /* Sort the call-frame-info table by starting address. Mash the table 1455 around so as to establish the property that addresses are in order 1456 and the ranges do not overlap. This facilitates using binary 1457 search to map addresses to locations when we come to query the 1458 table. 1459 1460 Also, set cfisi_minaddr and cfisi_maxaddr to be the min and max of 1461 any of the address ranges contained in cfisi[0 .. cfisi_used-1], so 1462 as to facilitate rapidly skipping this SegInfo when looking for an 1463 address which falls outside that range. 1464 */ 1465 static Int compare_DiCfSI ( void* va, void* vb ) 1466 { 1467 DiCfSI* a = (DiCfSI*)va; 1468 DiCfSI* b = (DiCfSI*)vb; 1469 if (a->base < b->base) return -1; 1470 if (a->base > b->base) return 1; 1471 return 0; 1472 } 1473 1474 void ML_(canonicaliseCFI) ( struct _DebugInfo* di ) 1475 { 1476 Word i, j; 1477 const Addr minAvma = 0; 1478 const Addr maxAvma = ~minAvma; 1479 1480 /* Note: take care in here. di->cfsi can be NULL, in which 1481 case _used and _size fields will be zero. */ 1482 if (di->cfsi == NULL) { 1483 vg_assert(di->cfsi_used == 0); 1484 vg_assert(di->cfsi_size == 0); 1485 } 1486 1487 /* Set cfsi_minavma and cfsi_maxavma to summarise the entire 1488 address range contained in cfsi[0 .. cfsi_used-1]. */ 1489 di->cfsi_minavma = maxAvma; 1490 di->cfsi_maxavma = minAvma; 1491 for (i = 0; i < (Word)di->cfsi_used; i++) { 1492 Addr here_min = di->cfsi[i].base; 1493 Addr here_max = di->cfsi[i].base + di->cfsi[i].len - 1; 1494 if (here_min < di->cfsi_minavma) 1495 di->cfsi_minavma = here_min; 1496 if (here_max > di->cfsi_maxavma) 1497 di->cfsi_maxavma = here_max; 1498 } 1499 1500 if (di->trace_cfi) 1501 VG_(printf)("canonicaliseCfiSI: %ld entries, %#lx .. %#lx\n", 1502 di->cfsi_used, 1503 di->cfsi_minavma, di->cfsi_maxavma); 1504 1505 /* Sort the cfsi array by base address. */ 1506 VG_(ssort)(di->cfsi, di->cfsi_used, sizeof(*di->cfsi), compare_DiCfSI); 1507 1508 /* If two adjacent entries overlap, truncate the first. */ 1509 for (i = 0; i < (Word)di->cfsi_used-1; i++) { 1510 if (di->cfsi[i].base + di->cfsi[i].len > di->cfsi[i+1].base) { 1511 Word new_len = di->cfsi[i+1].base - di->cfsi[i].base; 1512 /* how could it be otherwise? The entries are sorted by the 1513 .base field. */ 1514 vg_assert(new_len >= 0); 1515 vg_assert(new_len <= di->cfsi[i].len); 1516 di->cfsi[i].len = new_len; 1517 } 1518 } 1519 1520 /* Zap any zero-sized entries resulting from the truncation 1521 process. */ 1522 j = 0; 1523 for (i = 0; i < (Word)di->cfsi_used; i++) { 1524 if (di->cfsi[i].len > 0) { 1525 if (j != i) 1526 di->cfsi[j] = di->cfsi[i]; 1527 j++; 1528 } 1529 } 1530 /* VG_(printf)("XXXXXXXXXXXXX %d %d\n", di->cfsi_used, j); */ 1531 di->cfsi_used = j; 1532 1533 /* Ensure relevant postconditions hold. */ 1534 for (i = 0; i < (Word)di->cfsi_used; i++) { 1535 /* No zero-length ranges. */ 1536 vg_assert(di->cfsi[i].len > 0); 1537 /* Makes sense w.r.t. summary address range */ 1538 vg_assert(di->cfsi[i].base >= di->cfsi_minavma); 1539 vg_assert(di->cfsi[i].base + di->cfsi[i].len - 1 1540 <= di->cfsi_maxavma); 1541 1542 if (i < di->cfsi_used - 1) { 1543 /* 1544 if (!(di->cfsi[i].base < di->cfsi[i+1].base)) { 1545 VG_(printf)("\nOOO cfsis:\n"); 1546 ML_(ppCfiSI)(&di->cfsi[i]); 1547 ML_(ppCfiSI)(&di->cfsi[i+1]); 1548 } 1549 */ 1550 /* In order. */ 1551 vg_assert(di->cfsi[i].base < di->cfsi[i+1].base); 1552 /* No overlaps. */ 1553 vg_assert(di->cfsi[i].base + di->cfsi[i].len - 1 1554 < di->cfsi[i+1].base); 1555 } 1556 } 1557 1558 } 1559 1560 1561 /* Canonicalise the tables held by 'di', in preparation for use. Call 1562 this after finishing adding entries to these tables. */ 1563 void ML_(canonicaliseTables) ( struct _DebugInfo* di ) 1564 { 1565 canonicaliseSymtab ( di ); 1566 canonicaliseLoctab ( di ); 1567 ML_(canonicaliseCFI) ( di ); 1568 canonicaliseVarInfo ( di ); 1569 } 1570 1571 1572 /*------------------------------------------------------------*/ 1573 /*--- Searching the tables ---*/ 1574 /*------------------------------------------------------------*/ 1575 1576 /* Find a symbol-table index containing the specified pointer, or -1 1577 if not found. Binary search. */ 1578 1579 Word ML_(search_one_symtab) ( struct _DebugInfo* di, Addr ptr, 1580 Bool match_anywhere_in_sym, 1581 Bool findText ) 1582 { 1583 Addr a_mid_lo, a_mid_hi; 1584 Word mid, size, 1585 lo = 0, 1586 hi = di->symtab_used-1; 1587 while (True) { 1588 /* current unsearched space is from lo to hi, inclusive. */ 1589 if (lo > hi) return -1; /* not found */ 1590 mid = (lo + hi) / 2; 1591 a_mid_lo = di->symtab[mid].addr; 1592 size = ( match_anywhere_in_sym 1593 ? di->symtab[mid].size 1594 : 1); 1595 a_mid_hi = ((Addr)di->symtab[mid].addr) + size - 1; 1596 1597 if (ptr < a_mid_lo) { hi = mid-1; continue; } 1598 if (ptr > a_mid_hi) { lo = mid+1; continue; } 1599 vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi); 1600 /* Found a symbol with the correct address range. But is it 1601 of the right kind (text vs data) ? */ 1602 if ( findText && di->symtab[mid].isText ) return mid; 1603 if ( (!findText) && (!di->symtab[mid].isText) ) return mid; 1604 return -1; 1605 } 1606 } 1607 1608 1609 /* Find a location-table index containing the specified pointer, or -1 1610 if not found. Binary search. */ 1611 1612 Word ML_(search_one_loctab) ( struct _DebugInfo* di, Addr ptr ) 1613 { 1614 Addr a_mid_lo, a_mid_hi; 1615 Word mid, 1616 lo = 0, 1617 hi = di->loctab_used-1; 1618 while (True) { 1619 /* current unsearched space is from lo to hi, inclusive. */ 1620 if (lo > hi) return -1; /* not found */ 1621 mid = (lo + hi) / 2; 1622 a_mid_lo = di->loctab[mid].addr; 1623 a_mid_hi = ((Addr)di->loctab[mid].addr) + di->loctab[mid].size - 1; 1624 1625 if (ptr < a_mid_lo) { hi = mid-1; continue; } 1626 if (ptr > a_mid_hi) { lo = mid+1; continue; } 1627 vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi); 1628 return mid; 1629 } 1630 } 1631 1632 1633 /* Find a CFI-table index containing the specified pointer, or -1 1634 if not found. Binary search. */ 1635 1636 Word ML_(search_one_cfitab) ( struct _DebugInfo* di, Addr ptr ) 1637 { 1638 Addr a_mid_lo, a_mid_hi; 1639 Word mid, size, 1640 lo = 0, 1641 hi = di->cfsi_used-1; 1642 while (True) { 1643 /* current unsearched space is from lo to hi, inclusive. */ 1644 if (lo > hi) return -1; /* not found */ 1645 mid = (lo + hi) / 2; 1646 a_mid_lo = di->cfsi[mid].base; 1647 size = di->cfsi[mid].len; 1648 a_mid_hi = a_mid_lo + size - 1; 1649 vg_assert(a_mid_hi >= a_mid_lo); 1650 if (ptr < a_mid_lo) { hi = mid-1; continue; } 1651 if (ptr > a_mid_hi) { lo = mid+1; continue; } 1652 vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi); 1653 return mid; 1654 } 1655 } 1656 1657 1658 /* Find a FPO-table index containing the specified pointer, or -1 1659 if not found. Binary search. */ 1660 1661 Word ML_(search_one_fpotab) ( struct _DebugInfo* di, Addr ptr ) 1662 { 1663 Addr const addr = ptr - di->rx_map_avma; 1664 Addr a_mid_lo, a_mid_hi; 1665 Word mid, size, 1666 lo = 0, 1667 hi = di->fpo_size-1; 1668 while (True) { 1669 /* current unsearched space is from lo to hi, inclusive. */ 1670 if (lo > hi) return -1; /* not found */ 1671 mid = (lo + hi) / 2; 1672 a_mid_lo = di->fpo[mid].ulOffStart; 1673 size = di->fpo[mid].cbProcSize; 1674 a_mid_hi = a_mid_lo + size - 1; 1675 vg_assert(a_mid_hi >= a_mid_lo); 1676 if (addr < a_mid_lo) { hi = mid-1; continue; } 1677 if (addr > a_mid_hi) { lo = mid+1; continue; } 1678 vg_assert(addr >= a_mid_lo && addr <= a_mid_hi); 1679 return mid; 1680 } 1681 } 1682 1683 /*--------------------------------------------------------------------*/ 1684 /*--- end ---*/ 1685 /*--------------------------------------------------------------------*/ 1686