1 2 /*--------------------------------------------------------------------*/ 3 /*--- Format-neutral storage of and querying of info acquired from ---*/ 4 /*--- ELF/XCOFF stabs/dwarf1/dwarf2 debug info. ---*/ 5 /*--- priv_storage.h ---*/ 6 /*--------------------------------------------------------------------*/ 7 8 /* 9 This file is part of Valgrind, a dynamic binary instrumentation 10 framework. 11 12 Copyright (C) 2000-2013 Julian Seward 13 jseward (at) acm.org 14 15 This program is free software; you can redistribute it and/or 16 modify it under the terms of the GNU General Public License as 17 published by the Free Software Foundation; either version 2 of the 18 License, or (at your option) any later version. 19 20 This program is distributed in the hope that it will be useful, but 21 WITHOUT ANY WARRANTY; without even the implied warranty of 22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 23 General Public License for more details. 24 25 You should have received a copy of the GNU General Public License 26 along with this program; if not, write to the Free Software 27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 28 02111-1307, USA. 29 30 The GNU General Public License is contained in the file COPYING. 31 */ 32 /* 33 Stabs reader greatly improved by Nick Nethercote, Apr 02. 34 This module was also extensively hacked on by Jeremy Fitzhardinge 35 and Tom Hughes. 36 */ 37 /* See comment at top of debuginfo.c for explanation of 38 the _svma / _avma / _image / _bias naming scheme. 39 */ 40 /* Note this is not freestanding; needs pub_core_xarray.h and 41 priv_tytypes.h to be included before it. */ 42 43 #ifndef __PRIV_STORAGE_H 44 #define __PRIV_STORAGE_H 45 46 #include "pub_core_basics.h" // Addr 47 #include "pub_core_xarray.h" // XArray 48 #include "priv_d3basics.h" // GExpr et al. 49 #include "priv_image.h" // DiCursor 50 51 /* --------------------- SYMBOLS --------------------- */ 52 53 /* A structure to hold an ELF/MachO symbol (very crudely). Usually 54 the symbol only has one name, which is stored in ::pri_name, and 55 ::sec_names is NULL. If there are other names, these are stored in 56 ::sec_names, which is a NULL terminated vector holding the names. 57 The vector is allocated in VG_AR_DINFO, the names themselves live 58 in DebugInfo::strchunks. 59 60 From the point of view of ELF, the primary vs secondary distinction 61 is artificial: they are all just names associated with the address, 62 none of which has higher precedence than any other. However, from 63 the point of view of mapping an address to a name to display to the 64 user, we need to choose one "preferred" name, and so that might as 65 well be installed as the pri_name, whilst all others can live in 66 sec_names[]. This has the convenient side effect that, in the 67 common case where there is only one name for the address, 68 sec_names[] does not need to be allocated. 69 */ 70 typedef 71 struct { 72 Addr addr; /* lowest address of entity */ 73 Addr tocptr; /* ppc64-linux only: value that R2 should have */ 74 HChar* pri_name; /* primary name, never NULL */ 75 HChar** sec_names; /* NULL, or a NULL term'd array of other names */ 76 // XXX: this could be shrunk (on 32-bit platforms) by using 30 77 // bits for the size and 1 bit each for isText and isIFunc. If you 78 // do this, make sure that all assignments to the latter two use 79 // 0 or 1 (or True or False), and that a positive number larger 80 // than 1 is never used to represent True. 81 UInt size; /* size in bytes */ 82 Bool isText; 83 Bool isIFunc; /* symbol is an indirect function? */ 84 } 85 DiSym; 86 87 /* --------------------- SRCLOCS --------------------- */ 88 89 /* Line count at which overflow happens, due to line numbers being 90 stored as shorts in `struct nlist' in a.out.h. */ 91 #define LINENO_OVERFLOW (1 << (sizeof(short) * 8)) 92 93 #define LINENO_BITS 20 94 #define LOC_SIZE_BITS (32 - LINENO_BITS) 95 #define MAX_LINENO ((1 << LINENO_BITS) - 1) 96 97 /* Unlikely to have any lines with instruction ranges > 4096 bytes */ 98 #define MAX_LOC_SIZE ((1 << LOC_SIZE_BITS) - 1) 99 100 /* Number used to detect line number overflows; if one line is 101 60000-odd smaller than the previous, it was probably an overflow. 102 */ 103 #define OVERFLOW_DIFFERENCE (LINENO_OVERFLOW - 5000) 104 105 /* A structure to hold addr-to-source info for a single line. There 106 can be a lot of these, hence the dense packing. */ 107 typedef 108 struct { 109 /* Word 1 */ 110 Addr addr; /* lowest address for this line */ 111 /* Word 2 */ 112 UShort size:LOC_SIZE_BITS; /* # bytes; we catch overflows of this */ 113 UInt lineno:LINENO_BITS; /* source line number, or zero */ 114 /* Word 3 */ 115 const HChar* filename; /* source filename */ 116 /* Word 4 */ 117 const HChar* dirname; /* source directory name */ 118 } 119 DiLoc; 120 121 /* --------------------- CF INFO --------------------- */ 122 123 /* DiCfSI: a structure to summarise DWARF2/3 CFA info for the code 124 address range [base .. base+len-1]. 125 126 On x86 and amd64 ("IA"), if you know ({e,r}sp, {e,r}bp, {e,r}ip) at 127 some point and {e,r}ip is in the range [base .. base+len-1], it 128 tells you how to calculate ({e,r}sp, {e,r}bp) for the caller of the 129 current frame and also ra, the return address of the current frame. 130 131 First off, calculate CFA, the Canonical Frame Address, thusly: 132 133 cfa = case cfa_how of 134 CFIC_IA_SPREL -> {e,r}sp + cfa_off 135 CFIC_IA_BPREL -> {e,r}bp + cfa_off 136 CFIC_EXPR -> expr whose index is in cfa_off 137 138 Once that is done, the previous frame's {e,r}sp/{e,r}bp values and 139 this frame's {e,r}ra value can be calculated like this: 140 141 old_{e,r}sp/{e,r}bp/ra 142 = case {e,r}sp/{e,r}bp/ra_how of 143 CFIR_UNKNOWN -> we don't know, sorry 144 CFIR_SAME -> same as it was before (sp/fp only) 145 CFIR_CFAREL -> cfa + sp/bp/ra_off 146 CFIR_MEMCFAREL -> *( cfa + sp/bp/ra_off ) 147 CFIR_EXPR -> expr whose index is in sp/bp/ra_off 148 149 On ARM it's pretty much the same, except we have more registers to 150 keep track of: 151 152 cfa = case cfa_how of 153 CFIC_ARM_R13REL -> r13 + cfa_off 154 CFIC_ARM_R12REL -> r12 + cfa_off 155 CFIC_ARM_R11REL -> r11 + cfa_off 156 CFIC_ARM_R7REL -> r7 + cfa_off 157 CFIR_EXPR -> expr whose index is in cfa_off 158 159 old_r14/r13/r12/r11/r7/ra 160 = case r14/r13/r12/r11/r7/ra_how of 161 CFIR_UNKNOWN -> we don't know, sorry 162 CFIR_SAME -> same as it was before (r14/r13/r12/r11/r7 only) 163 CFIR_CFAREL -> cfa + r14/r13/r12/r11/r7/ra_off 164 CFIR_MEMCFAREL -> *( cfa + r14/r13/r12/r11/r7/ra_off ) 165 CFIR_EXPR -> expr whose index is in r14/r13/r12/r11/r7/ra_off 166 167 On ARM64: 168 169 cfa = case cfa_how of 170 CFIC_ARM64_SPREL -> sp + cfa_off 171 CFIC_ARM64_X29REL -> x29 + cfa_off 172 CFIC_EXPR -> expr whose index is in cfa_off 173 174 old_sp/x30/x29/ra 175 = case sp/x30/x29/ra_how of 176 CFIR_UNKNOWN -> we don't know, sorry 177 CFIR_SAME -> same as it was before 178 CFIR_CFAREL -> cfa + sp/x30/x29/ra_how 179 CFIR_MEMCFAREL -> *( cfa + sp/x30/x29/ra_how ) 180 CFIR_EXPR -> expr whose index is in sp/x30/x29/ra_off 181 182 On s390x we have a similar logic as x86 or amd64. We need the stack pointer 183 (r15), the frame pointer r11 (like BP) and together with the instruction 184 address in the PSW we can calculate the previous values: 185 cfa = case cfa_how of 186 CFIC_IA_SPREL -> r15 + cfa_off 187 CFIC_IA_BPREL -> r11 + cfa_off 188 CFIC_EXPR -> expr whose index is in cfa_off 189 190 old_sp/fp/ra 191 = case sp/fp/ra_how of 192 CFIR_UNKNOWN -> we don't know, sorry 193 CFIR_SAME -> same as it was before (sp/fp only) 194 CFIR_CFAREL -> cfa + sp/fp/ra_off 195 CFIR_MEMCFAREL -> *( cfa + sp/fp/ra_off ) 196 CFIR_EXPR -> expr whose index is in sp/fp/ra_off 197 */ 198 199 #define CFIC_IA_SPREL ((UChar)1) 200 #define CFIC_IA_BPREL ((UChar)2) 201 #define CFIC_ARM_R13REL ((UChar)3) 202 #define CFIC_ARM_R12REL ((UChar)4) 203 #define CFIC_ARM_R11REL ((UChar)5) 204 #define CFIC_ARM_R7REL ((UChar)6) 205 #define CFIC_ARM64_SPREL ((UChar)7) 206 #define CFIC_ARM64_X29REL ((UChar)8) 207 #define CFIC_EXPR ((UChar)9) /* all targets */ 208 209 #define CFIR_UNKNOWN ((UChar)64) 210 #define CFIR_SAME ((UChar)65) 211 #define CFIR_CFAREL ((UChar)66) 212 #define CFIR_MEMCFAREL ((UChar)67) 213 #define CFIR_EXPR ((UChar)68) 214 215 #if defined(VGA_x86) || defined(VGA_amd64) 216 typedef 217 struct { 218 Addr base; 219 UInt len; 220 UChar cfa_how; /* a CFIC_IA value */ 221 UChar ra_how; /* a CFIR_ value */ 222 UChar sp_how; /* a CFIR_ value */ 223 UChar bp_how; /* a CFIR_ value */ 224 Int cfa_off; 225 Int ra_off; 226 Int sp_off; 227 Int bp_off; 228 } 229 DiCfSI; 230 #elif defined(VGA_arm) 231 typedef 232 struct { 233 Addr base; 234 UInt len; 235 UChar cfa_how; /* a CFIC_ value */ 236 UChar ra_how; /* a CFIR_ value */ 237 UChar r14_how; /* a CFIR_ value */ 238 UChar r13_how; /* a CFIR_ value */ 239 UChar r12_how; /* a CFIR_ value */ 240 UChar r11_how; /* a CFIR_ value */ 241 UChar r7_how; /* a CFIR_ value */ 242 Int cfa_off; 243 Int ra_off; 244 Int r14_off; 245 Int r13_off; 246 Int r12_off; 247 Int r11_off; 248 Int r7_off; 249 } 250 DiCfSI; 251 #elif defined(VGA_arm64) 252 typedef 253 struct { 254 Addr base; 255 UInt len; 256 UChar cfa_how; /* a CFIC_ value */ 257 UChar ra_how; /* a CFIR_ value */ 258 UChar sp_how; /* a CFIR_ value */ /*dw31=SP*/ 259 UChar x30_how; /* a CFIR_ value */ /*dw30=LR*/ 260 UChar x29_how; /* a CFIR_ value */ /*dw29=FP*/ 261 Int cfa_off; 262 Int ra_off; 263 Int sp_off; 264 Int x30_off; 265 Int x29_off; 266 } 267 DiCfSI; 268 #elif defined(VGA_ppc32) || defined(VGA_ppc64) 269 /* Just have a struct with the common fields in, so that code that 270 processes the common fields doesn't have to be ifdef'd against 271 VGP_/VGA_ symbols. These are not used in any way on ppc32/64-linux 272 at the moment. */ 273 typedef 274 struct { 275 Addr base; 276 UInt len; 277 UChar cfa_how; /* a CFIC_ value */ 278 UChar ra_how; /* a CFIR_ value */ 279 Int cfa_off; 280 Int ra_off; 281 } 282 DiCfSI; 283 #elif defined(VGA_s390x) 284 typedef 285 struct { 286 Addr base; 287 UInt len; 288 UChar cfa_how; /* a CFIC_ value */ 289 UChar sp_how; /* a CFIR_ value */ 290 UChar ra_how; /* a CFIR_ value */ 291 UChar fp_how; /* a CFIR_ value */ 292 Int cfa_off; 293 Int sp_off; 294 Int ra_off; 295 Int fp_off; 296 } 297 DiCfSI; 298 #elif defined(VGA_mips32) || defined(VGA_mips64) 299 typedef 300 struct { 301 Addr base; 302 UInt len; 303 UChar cfa_how; /* a CFIC_ value */ 304 UChar ra_how; /* a CFIR_ value */ 305 UChar sp_how; /* a CFIR_ value */ 306 UChar fp_how; /* a CFIR_ value */ 307 Int cfa_off; 308 Int ra_off; 309 Int sp_off; 310 Int fp_off; 311 } 312 DiCfSI; 313 #else 314 # error "Unknown arch" 315 #endif 316 317 318 typedef 319 enum { 320 Cunop_Abs=0x231, 321 Cunop_Neg, 322 Cunop_Not 323 } 324 CfiUnop; 325 326 typedef 327 enum { 328 Cbinop_Add=0x321, 329 Cbinop_Sub, 330 Cbinop_And, 331 Cbinop_Mul, 332 Cbinop_Shl, 333 Cbinop_Shr, 334 Cbinop_Eq, 335 Cbinop_Ge, 336 Cbinop_Gt, 337 Cbinop_Le, 338 Cbinop_Lt, 339 Cbinop_Ne 340 } 341 CfiBinop; 342 343 typedef 344 enum { 345 Creg_IA_SP=0x213, 346 Creg_IA_BP, 347 Creg_IA_IP, 348 Creg_ARM_R13, 349 Creg_ARM_R12, 350 Creg_ARM_R15, 351 Creg_ARM_R14, 352 Creg_ARM64_X30, 353 Creg_S390_R14, 354 Creg_MIPS_RA 355 } 356 CfiReg; 357 358 typedef 359 enum { 360 Cex_Undef=0x123, 361 Cex_Deref, 362 Cex_Const, 363 Cex_Unop, 364 Cex_Binop, 365 Cex_CfiReg, 366 Cex_DwReg 367 } 368 CfiExprTag; 369 370 typedef 371 struct { 372 CfiExprTag tag; 373 union { 374 struct { 375 } Undef; 376 struct { 377 Int ixAddr; 378 } Deref; 379 struct { 380 UWord con; 381 } Const; 382 struct { 383 CfiUnop op; 384 Int ix; 385 } Unop; 386 struct { 387 CfiBinop op; 388 Int ixL; 389 Int ixR; 390 } Binop; 391 struct { 392 CfiReg reg; 393 } CfiReg; 394 struct { 395 Int reg; 396 } DwReg; 397 } 398 Cex; 399 } 400 CfiExpr; 401 402 extern Int ML_(CfiExpr_Undef) ( XArray* dst ); 403 extern Int ML_(CfiExpr_Deref) ( XArray* dst, Int ixAddr ); 404 extern Int ML_(CfiExpr_Const) ( XArray* dst, UWord con ); 405 extern Int ML_(CfiExpr_Unop) ( XArray* dst, CfiUnop op, Int ix ); 406 extern Int ML_(CfiExpr_Binop) ( XArray* dst, CfiBinop op, Int ixL, Int ixR ); 407 extern Int ML_(CfiExpr_CfiReg)( XArray* dst, CfiReg reg ); 408 extern Int ML_(CfiExpr_DwReg) ( XArray* dst, Int reg ); 409 410 extern void ML_(ppCfiExpr)( XArray* src, Int ix ); 411 412 /* ---------------- FPO INFO (Windows PE) -------------- */ 413 414 /* for apps using Wine: MSVC++ PDB FramePointerOmitted: somewhat like 415 a primitive CFI */ 416 typedef 417 struct _FPO_DATA { /* 16 bytes */ 418 UInt ulOffStart; /* offset of 1st byte of function code */ 419 UInt cbProcSize; /* # bytes in function */ 420 UInt cdwLocals; /* # bytes/4 in locals */ 421 UShort cdwParams; /* # bytes/4 in params */ 422 UChar cbProlog; /* # bytes in prolog */ 423 UChar cbRegs :3; /* # regs saved */ 424 UChar fHasSEH:1; /* Structured Exception Handling */ 425 UChar fUseBP :1; /* EBP has been used */ 426 UChar reserved:1; 427 UChar cbFrame:2; /* frame type */ 428 } 429 FPO_DATA; 430 431 #define PDB_FRAME_FPO 0 432 #define PDB_FRAME_TRAP 1 433 #define PDB_FRAME_TSS 2 434 435 /* --------------------- VARIABLES --------------------- */ 436 437 typedef 438 struct { 439 Addr aMin; 440 Addr aMax; 441 XArray* /* of DiVariable */ vars; 442 } 443 DiAddrRange; 444 445 typedef 446 struct { 447 HChar* name; /* in DebugInfo.strchunks */ 448 UWord typeR; /* a cuOff */ 449 GExpr* gexpr; /* on DebugInfo.gexprs list */ 450 GExpr* fbGX; /* SHARED. */ 451 HChar* fileName; /* where declared; may be NULL. in 452 DebugInfo.strchunks */ 453 Int lineNo; /* where declared; may be zero. */ 454 } 455 DiVariable; 456 457 Word 458 ML_(cmp_for_DiAddrRange_range) ( const void* keyV, const void* elemV ); 459 460 /* --------------------- DEBUGINFO --------------------- */ 461 462 /* This is the top-level data type. It's a structure which contains 463 information pertaining to one mapped ELF object. This type is 464 exported only abstractly - in pub_tool_debuginfo.h. */ 465 466 /* First though, here's an auxiliary data structure. It is only ever 467 used as part of a struct _DebugInfo. We use it to record 468 observations about mappings and permission changes to the 469 associated file, so as to decide when to read debug info. It's 470 essentially an ultra-trivial finite state machine which, when it 471 reaches an accept state, signals that we should now read debug info 472 from the object into the associated struct _DebugInfo. The accept 473 state is arrived at when have_rx_map and have_rw_map both become 474 true. The initial state is one in which we have no observations, 475 so have_rx_map and have_rw_map are both false. 476 477 This all started as a rather ad-hoc solution, but was further 478 expanded to handle weird object layouts, e.g. more than one rw 479 or rx mapping for one binary. 480 481 The normal sequence of events is one of 482 483 start --> r-x mapping --> rw- mapping --> accept 484 start --> rw- mapping --> r-x mapping --> accept 485 486 that is, take the first r-x and rw- mapping we see, and we're done. 487 488 On MacOSX 10.7, 32-bit, there appears to be a new variant: 489 490 start --> r-- mapping --> rw- mapping 491 --> upgrade r-- mapping to r-x mapping --> accept 492 493 where the upgrade is done by a call to vm_protect. Hence we 494 need to also track this possibility. 495 */ 496 497 struct _DebugInfoMapping 498 { 499 Addr avma; /* these fields record the file offset, length */ 500 SizeT size; /* and map address of each mapping */ 501 OffT foff; 502 Bool rx, rw, ro; /* memory access flags for this mapping */ 503 }; 504 505 struct _DebugInfoFSM 506 { 507 HChar* filename; /* in mallocville (VG_AR_DINFO) */ 508 XArray* maps; /* XArray of _DebugInfoMapping structs */ 509 Bool have_rx_map; /* did we see a r?x mapping yet for the file? */ 510 Bool have_rw_map; /* did we see a rw? mapping yet for the file? */ 511 Bool have_ro_map; /* did we see a r-- mapping yet for the file? */ 512 }; 513 514 515 /* To do with the string table in struct _DebugInfo (::strchunks) */ 516 #define SEGINFO_STRCHUNKSIZE (64*1024) 517 518 519 /* We may encounter more than one .eh_frame section in an object -- 520 unusual but apparently allowed by ELF. See 521 http://sourceware.org/bugzilla/show_bug.cgi?id=12675 522 */ 523 #define N_EHFRAME_SECTS 2 524 525 526 /* So, the main structure for holding debug info for one object. */ 527 528 struct _DebugInfo { 529 530 /* Admin stuff */ 531 532 struct _DebugInfo* next; /* list of DebugInfos */ 533 Bool mark; /* marked for deletion? */ 534 535 /* An abstract handle, which can be used by entities outside of 536 m_debuginfo to (in an abstract datatype sense) refer to this 537 struct _DebugInfo. A .handle of zero is invalid; valid handles 538 are 1 and above. The same handle is never issued twice (in any 539 given run of Valgrind), so a handle becomes invalid when the 540 associated struct _DebugInfo is discarded, and remains invalid 541 forever thereafter. The .handle field is set as soon as this 542 structure is allocated. */ 543 ULong handle; 544 545 /* Used for debugging only - indicate what stuff to dump whilst 546 reading stuff into the seginfo. Are computed as early in the 547 lifetime of the DebugInfo as possible -- at the point when it is 548 created. Use these when deciding what to spew out; do not use 549 the global VG_(clo_blah) flags. */ 550 551 Bool trace_symtab; /* symbols, our style */ 552 Bool trace_cfi; /* dwarf frame unwind, our style */ 553 Bool ddump_syms; /* mimic /usr/bin/readelf --syms */ 554 Bool ddump_line; /* mimic /usr/bin/readelf --debug-dump=line */ 555 Bool ddump_frames; /* mimic /usr/bin/readelf --debug-dump=frames */ 556 557 /* The "decide when it is time to read debuginfo" state machine. 558 This structure must get filled in before we can start reading 559 anything from the ELF/MachO file. This structure is filled in 560 by VG_(di_notify_mmap) and its immediate helpers. */ 561 struct _DebugInfoFSM fsm; 562 563 /* Once the ::fsm has reached an accept state -- typically, when 564 both a rw? and r?x mapping for .filename have been observed -- 565 we can go on to read the symbol tables and debug info. 566 .have_dinfo changes from False to True when the debug info has 567 been completely read in and postprocessed (canonicalised) and is 568 now suitable for querying. */ 569 /* If have_dinfo is False, then all fields below this point are 570 invalid and should not be consulted. */ 571 Bool have_dinfo; /* initially False */ 572 573 /* All the rest of the fields in this structure are filled in once 574 we have committed to reading the symbols and debug info (that 575 is, at the point where .have_dinfo is set to True). */ 576 577 /* The file's soname. */ 578 HChar* soname; 579 580 /* Description of some important mapped segments. The presence or 581 absence of the mapping is denoted by the _present field, since 582 in some obscure circumstances (to do with data/sdata/bss) it is 583 possible for the mapping to be present but have zero size. 584 Certainly text_ is mandatory on all platforms; not sure about 585 the rest though. 586 587 -------------------------------------------------------- 588 589 Comment_on_IMPORTANT_CFSI_REPRESENTATIONAL_INVARIANTS: we require that 590 591 either (size of all rx maps == 0 && cfsi == NULL) (the degenerate case) 592 593 or the normal case, which is the AND of the following: 594 (0) size of at least one rx mapping > 0 595 (1) no two DebugInfos with some rx mapping of size > 0 596 have overlapping rx mappings 597 (2) [cfsi_minavma,cfsi_maxavma] does not extend beyond 598 [avma,+size) of one rx mapping; that is, the former 599 is a subrange or equal to the latter. 600 (3) all DiCfSI in the cfsi array all have ranges that fall within 601 [avma,+size) of that rx mapping. 602 (4) all DiCfSI in the cfsi array are non-overlapping 603 604 The cumulative effect of these restrictions is to ensure that 605 all the DiCfSI records in the entire system are non overlapping. 606 Hence any address falls into either exactly one DiCfSI record, 607 or none. Hence it is safe to cache the results of searches for 608 DiCfSI records. This is the whole point of these restrictions. 609 The caching of DiCfSI searches is done in VG_(use_CF_info). The 610 cache is flushed after any change to debugInfo_list. DiCfSI 611 searches are cached because they are central to stack unwinding 612 on amd64-linux. 613 614 Where are these invariants imposed and checked? 615 616 They are checked after a successful read of debuginfo into 617 a DebugInfo*, in check_CFSI_related_invariants. 618 619 (1) is not really imposed anywhere. We simply assume that the 620 kernel will not map the text segments from two different objects 621 into the same space. Sounds reasonable. 622 623 (2) follows from (4) and (3). It is ensured by canonicaliseCFI. 624 (3) is ensured by ML_(addDiCfSI). 625 (4) is ensured by canonicaliseCFI. 626 627 -------------------------------------------------------- 628 629 Comment_on_DEBUG_SVMA_and_DEBUG_BIAS_fields: 630 631 The _debug_{svma,bias} fields were added as part of a fix to 632 #185816. The problem encompassed in that bug report was that it 633 wasn't correct to use apply the bias values deduced for a 634 primary object to its associated debuginfo object, because the 635 debuginfo object (or the primary) could have been prelinked to a 636 different SVMA. Hence debuginfo and primary objects need to 637 have their own biases. 638 639 ------ JRS: (referring to r9329): ------ 640 Let me see if I understand the workings correctly. Initially 641 the _debug_ values are set to the same values as the "normal" 642 ones, as there's a bunch of bits of code like this (in 643 readelf.c) 644 645 di->text_svma = svma; 646 ... 647 di->text_bias = rx_bias; 648 di->text_debug_svma = svma; 649 di->text_debug_bias = rx_bias; 650 651 If a debuginfo object subsequently shows up then the 652 _debug_svma/bias are set for the debuginfo object. Result is 653 that if there's no debuginfo object then the values are the same 654 as the primary-object values, and if there is a debuginfo object 655 then they will (or at least may) be different. 656 657 Then when we need to actually bias something, we'll have to 658 decide whether to use the primary bias or the debuginfo bias. 659 And the strategy is to use the primary bias for ELF symbols but 660 the debuginfo bias for anything pulled out of Dwarf. 661 662 ------ THH: ------ 663 Correct - the debug_svma and bias values apply to any address 664 read from the debug data regardless of where that debug data is 665 stored and the other values are used for addresses from other 666 places (primarily the symbol table). 667 668 ------ JRS: ------ 669 Ok; so this was my only area of concern. Are there any 670 corner-case scenarios where this wouldn't be right? It sounds 671 like we're assuming the ELF symbols come from the primary object 672 and, if there is a debug object, then all the Dwarf comes from 673 there. But what if (eg) both symbols and Dwarf come from the 674 debug object? Is that even possible or allowable? 675 676 ------ THH: ------ 677 You may have a point... 678 679 The current logic is to try and take any one set of data from 680 either the base object or the debug object. There are four sets 681 of data we consider: 682 683 - Symbol Table 684 - Stabs 685 - DWARF1 686 - DWARF2 687 688 If we see the primary section for a given set in the base object 689 then we ignore all sections relating to that set in the debug 690 object. 691 692 Now in principle if we saw a secondary section (like debug_line 693 say) in the base object, but not the main section (debug_info in 694 this case) then we would take debug_info from the debug object 695 but would use the debug_line from the base object unless we saw 696 a replacement copy in the debug object. That's probably unlikely 697 however. 698 699 A bigger issue might be, as you say, the symbol table as we will 700 pick that up from the debug object if it isn't in the base. The 701 dynamic symbol table will always have to be in the base object 702 though so we will have to be careful when processing symbols to 703 know which table we are reading in that case. 704 705 What we probably need to do is tell read_elf_symtab which object 706 the symbols it is being asked to read came from. 707 708 (A followup patch to deal with this was committed in r9469). 709 */ 710 /* .text */ 711 Bool text_present; 712 Addr text_avma; 713 Addr text_svma; 714 SizeT text_size; 715 PtrdiffT text_bias; 716 Addr text_debug_svma; 717 PtrdiffT text_debug_bias; 718 /* .data */ 719 Bool data_present; 720 Addr data_svma; 721 Addr data_avma; 722 SizeT data_size; 723 PtrdiffT data_bias; 724 Addr data_debug_svma; 725 PtrdiffT data_debug_bias; 726 /* .sdata */ 727 Bool sdata_present; 728 Addr sdata_svma; 729 Addr sdata_avma; 730 SizeT sdata_size; 731 PtrdiffT sdata_bias; 732 Addr sdata_debug_svma; 733 PtrdiffT sdata_debug_bias; 734 /* .rodata */ 735 Bool rodata_present; 736 Addr rodata_svma; 737 Addr rodata_avma; 738 SizeT rodata_size; 739 PtrdiffT rodata_bias; 740 Addr rodata_debug_svma; 741 PtrdiffT rodata_debug_bias; 742 /* .bss */ 743 Bool bss_present; 744 Addr bss_svma; 745 Addr bss_avma; 746 SizeT bss_size; 747 PtrdiffT bss_bias; 748 Addr bss_debug_svma; 749 PtrdiffT bss_debug_bias; 750 /* .sbss */ 751 Bool sbss_present; 752 Addr sbss_svma; 753 Addr sbss_avma; 754 SizeT sbss_size; 755 PtrdiffT sbss_bias; 756 Addr sbss_debug_svma; 757 PtrdiffT sbss_debug_bias; 758 /* .plt */ 759 Bool plt_present; 760 Addr plt_avma; 761 SizeT plt_size; 762 /* .got */ 763 Bool got_present; 764 Addr got_avma; 765 SizeT got_size; 766 /* .got.plt */ 767 Bool gotplt_present; 768 Addr gotplt_avma; 769 SizeT gotplt_size; 770 /* .opd -- needed on ppc64-linux for finding symbols */ 771 Bool opd_present; 772 Addr opd_avma; 773 SizeT opd_size; 774 /* .ehframe -- needed on amd64-linux for stack unwinding. We might 775 see more than one, hence the arrays. */ 776 UInt n_ehframe; /* 0 .. N_EHFRAME_SECTS */ 777 Addr ehframe_avma[N_EHFRAME_SECTS]; 778 SizeT ehframe_size[N_EHFRAME_SECTS]; 779 780 /* Sorted tables of stuff we snarfed from the file. This is the 781 eventual product of reading the debug info. All this stuff 782 lives in VG_AR_DINFO. */ 783 784 /* An expandable array of symbols. */ 785 DiSym* symtab; 786 UWord symtab_used; 787 UWord symtab_size; 788 /* An expandable array of locations. */ 789 DiLoc* loctab; 790 UWord loctab_used; 791 UWord loctab_size; 792 /* An expandable array of CFI summary info records. Also includes 793 summary address bounds, showing the min and max address covered 794 by any of the records, as an aid to fast searching. And, if the 795 records require any expression nodes, they are stored in 796 cfsi_exprs. */ 797 DiCfSI* cfsi; 798 UWord cfsi_used; 799 UWord cfsi_size; 800 Addr cfsi_minavma; 801 Addr cfsi_maxavma; 802 XArray* cfsi_exprs; /* XArray of CfiExpr */ 803 804 /* Optimized code under Wine x86: MSVC++ PDB FramePointerOmitted 805 data. Non-expandable array, hence .size == .used. */ 806 FPO_DATA* fpo; 807 UWord fpo_size; 808 Addr fpo_minavma; 809 Addr fpo_maxavma; 810 Addr fpo_base_avma; 811 812 /* Expandable arrays of characters -- the string table. Pointers 813 into this are stable (the arrays are not reallocated). */ 814 struct strchunk { 815 UInt strtab_used; 816 struct strchunk* next; 817 HChar strtab[SEGINFO_STRCHUNKSIZE]; 818 } *strchunks; 819 820 /* Variable scope information, as harvested from Dwarf3 files. 821 822 In short it's an 823 824 array of (array of PC address ranges and variables) 825 826 The outer array indexes over scopes, with Entry 0 containing 827 information on variables which exist for any value of the program 828 counter (PC) -- that is, the outermost scope. Entries 1, 2, 3, 829 etc contain information on increasinly deeply nested variables. 830 831 Each inner array is an array of (an address range, and a set 832 of variables that are in scope over that address range). 833 834 The address ranges may not overlap. 835 836 Since Entry 0 in the outer array holds information on variables 837 that exist for any value of the PC (that is, global vars), it 838 follows that Entry 0's inner array can only have one address 839 range pair, one that covers the entire address space. 840 */ 841 XArray* /* of OSet of DiAddrRange */varinfo; 842 843 /* These are arrays of the relevant typed objects, held here 844 partially for the purposes of visiting each object exactly once 845 when we need to delete them. */ 846 847 /* An array of TyEnts. These are needed to make sense of any types 848 in the .varinfo. Also, when deleting this DebugInfo, we must 849 first traverse this array and throw away malloc'd stuff hanging 850 off it -- by calling ML_(TyEnt__make_EMPTY) on each entry. */ 851 XArray* /* of TyEnt */ admin_tyents; 852 853 /* An array of guarded DWARF3 expressions. */ 854 XArray* admin_gexprs; 855 856 /* Cached last rx mapping matched and returned by ML_(find_rx_mapping). 857 This helps performance a lot during ML_(addLineInfo) etc., which can 858 easily be invoked hundreds of thousands of times. */ 859 struct _DebugInfoMapping* last_rx_map; 860 }; 861 862 /* --------------------- functions --------------------- */ 863 864 /* ------ Adding ------ */ 865 866 /* Add a symbol to si's symbol table. The contents of 'sym' are 867 copied. It is assumed (and checked) that 'sym' only contains one 868 name, so there is no auxiliary ::sec_names vector to duplicate. 869 IOW, the copy is a shallow copy, and there are assertions in place 870 to ensure that's OK. */ 871 extern void ML_(addSym) ( struct _DebugInfo* di, DiSym* sym ); 872 873 /* Add a line-number record to a DebugInfo. */ 874 extern 875 void ML_(addLineInfo) ( struct _DebugInfo* di, 876 const HChar* filename, 877 const HChar* dirname, /* NULL is allowable */ 878 Addr this, Addr next, Int lineno, Int entry); 879 880 /* Add a CFI summary record. The supplied DiCfSI is copied. */ 881 extern void ML_(addDiCfSI) ( struct _DebugInfo* di, DiCfSI* cfsi ); 882 883 /* Add a string to the string table of a DebugInfo. If len==-1, 884 ML_(addStr) will itself measure the length of the string. */ 885 extern HChar* ML_(addStr) ( struct _DebugInfo* di, const HChar* str, Int len ); 886 887 /* Add a string to the string table of a DebugInfo, by copying the 888 string from the given DiCursor. Measures the length of the string 889 itself. */ 890 extern HChar* ML_(addStrFromCursor)( struct _DebugInfo* di, DiCursor c ); 891 892 extern void ML_(addVar)( struct _DebugInfo* di, 893 Int level, 894 Addr aMin, 895 Addr aMax, 896 HChar* name, 897 UWord typeR, /* a cuOff */ 898 GExpr* gexpr, 899 GExpr* fbGX, /* SHARED. */ 900 HChar* fileName, /* where decl'd - may be NULL */ 901 Int lineNo, /* where decl'd - may be zero */ 902 Bool show ); 903 904 /* Canonicalise the tables held by 'di', in preparation for use. Call 905 this after finishing adding entries to these tables. */ 906 extern void ML_(canonicaliseTables) ( struct _DebugInfo* di ); 907 908 /* Canonicalise the call-frame-info table held by 'di', in preparation 909 for use. This is called by ML_(canonicaliseTables) but can also be 910 called on it's own to sort just this table. */ 911 extern void ML_(canonicaliseCFI) ( struct _DebugInfo* di ); 912 913 /* ------ Searching ------ */ 914 915 /* Find a symbol-table index containing the specified pointer, or -1 916 if not found. Binary search. */ 917 extern Word ML_(search_one_symtab) ( struct _DebugInfo* di, Addr ptr, 918 Bool match_anywhere_in_sym, 919 Bool findText ); 920 921 /* Find a location-table index containing the specified pointer, or -1 922 if not found. Binary search. */ 923 extern Word ML_(search_one_loctab) ( struct _DebugInfo* di, Addr ptr ); 924 925 /* Find a CFI-table index containing the specified pointer, or -1 if 926 not found. Binary search. */ 927 extern Word ML_(search_one_cfitab) ( struct _DebugInfo* di, Addr ptr ); 928 929 /* Find a FPO-table index containing the specified pointer, or -1 930 if not found. Binary search. */ 931 extern Word ML_(search_one_fpotab) ( struct _DebugInfo* di, Addr ptr ); 932 933 /* Helper function for the most often needed searching for an rx 934 mapping containing the specified address range. The range must 935 fall entirely within the mapping to be considered to be within it. 936 Asserts if lo > hi; caller must ensure this doesn't happen. */ 937 extern struct _DebugInfoMapping* ML_(find_rx_mapping) ( struct _DebugInfo* di, 938 Addr lo, Addr hi ); 939 940 /* ------ Misc ------ */ 941 942 /* Show a non-fatal debug info reading error. Use vg_panic if 943 terminal. 'serious' errors are always shown, not 'serious' ones 944 are shown only at verbosity level 2 and above. */ 945 extern 946 void ML_(symerr) ( struct _DebugInfo* di, Bool serious, const HChar* msg ); 947 948 /* Print a symbol. */ 949 extern void ML_(ppSym) ( Int idx, DiSym* sym ); 950 951 /* Print a call-frame-info summary. */ 952 extern void ML_(ppDiCfSI) ( XArray* /* of CfiExpr */ exprs, DiCfSI* si ); 953 954 955 #define TRACE_SYMTAB_ENABLED (di->trace_symtab) 956 #define TRACE_SYMTAB(format, args...) \ 957 if (TRACE_SYMTAB_ENABLED) { VG_(printf)(format, ## args); } 958 959 960 #endif /* ndef __PRIV_STORAGE_H */ 961 962 /*--------------------------------------------------------------------*/ 963 /*--- end ---*/ 964 /*--------------------------------------------------------------------*/ 965