1 2 /*--------------------------------------------------------------------*/ 3 /*--- Format-neutral storage of and querying of info acquired from ---*/ 4 /*--- ELF/XCOFF stabs/dwarf1/dwarf2 debug info. ---*/ 5 /*--- priv_storage.h ---*/ 6 /*--------------------------------------------------------------------*/ 7 8 /* 9 This file is part of Valgrind, a dynamic binary instrumentation 10 framework. 11 12 Copyright (C) 2000-2011 Julian Seward 13 jseward (at) acm.org 14 15 This program is free software; you can redistribute it and/or 16 modify it under the terms of the GNU General Public License as 17 published by the Free Software Foundation; either version 2 of the 18 License, or (at your option) any later version. 19 20 This program is distributed in the hope that it will be useful, but 21 WITHOUT ANY WARRANTY; without even the implied warranty of 22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 23 General Public License for more details. 24 25 You should have received a copy of the GNU General Public License 26 along with this program; if not, write to the Free Software 27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 28 02111-1307, USA. 29 30 The GNU General Public License is contained in the file COPYING. 31 */ 32 /* 33 Stabs reader greatly improved by Nick Nethercote, Apr 02. 34 This module was also extensively hacked on by Jeremy Fitzhardinge 35 and Tom Hughes. 36 */ 37 /* See comment at top of debuginfo.c for explanation of 38 the _svma / _avma / _image / _bias naming scheme. 39 */ 40 /* Note this is not freestanding; needs pub_core_xarray.h and 41 priv_tytypes.h to be included before it. */ 42 43 #ifndef __PRIV_STORAGE_H 44 #define __PRIV_STORAGE_H 45 46 /* --------------------- SYMBOLS --------------------- */ 47 48 /* A structure to hold an ELF/MachO symbol (very crudely). Usually 49 the symbol only has one name, which is stored in ::pri_name, and 50 ::sec_names is NULL. If there are other names, these are stored in 51 ::sec_names, which is a NULL terminated vector holding the names. 52 The vector is allocated in VG_AR_DINFO, the names themselves live 53 in DebugInfo::strchunks. 54 55 From the point of view of ELF, the primary vs secondary distinction 56 is artificial: they are all just names associated with the address, 57 none of which has higher precedence than any other. However, from 58 the point of view of mapping an address to a name to display to the 59 user, we need to choose one "preferred" name, and so that might as 60 well be installed as the pri_name, whilst all others can live in 61 sec_names[]. This has the convenient side effect that, in the 62 common case where there is only one name for the address, 63 sec_names[] does not need to be allocated. 64 */ 65 typedef 66 struct { 67 Addr addr; /* lowest address of entity */ 68 Addr tocptr; /* ppc64-linux only: value that R2 should have */ 69 UChar* pri_name; /* primary name, never NULL */ 70 UChar** sec_names; /* NULL, or a NULL term'd array of other names */ 71 // XXX: this could be shrunk (on 32-bit platforms) by using 30 72 // bits for the size and 1 bit each for isText and isIFunc. If you 73 // do this, make sure that all assignments to the latter two use 74 // 0 or 1 (or True or False), and that a positive number larger 75 // than 1 is never used to represent True. 76 UInt size; /* size in bytes */ 77 Bool isText; 78 Bool isIFunc; /* symbol is an indirect function? */ 79 } 80 DiSym; 81 82 /* --------------------- SRCLOCS --------------------- */ 83 84 /* Line count at which overflow happens, due to line numbers being 85 stored as shorts in `struct nlist' in a.out.h. */ 86 #define LINENO_OVERFLOW (1 << (sizeof(short) * 8)) 87 88 #define LINENO_BITS 20 89 #define LOC_SIZE_BITS (32 - LINENO_BITS) 90 #define MAX_LINENO ((1 << LINENO_BITS) - 1) 91 92 /* Unlikely to have any lines with instruction ranges > 4096 bytes */ 93 #define MAX_LOC_SIZE ((1 << LOC_SIZE_BITS) - 1) 94 95 /* Number used to detect line number overflows; if one line is 96 60000-odd smaller than the previous, it was probably an overflow. 97 */ 98 #define OVERFLOW_DIFFERENCE (LINENO_OVERFLOW - 5000) 99 100 /* A structure to hold addr-to-source info for a single line. There 101 can be a lot of these, hence the dense packing. */ 102 typedef 103 struct { 104 /* Word 1 */ 105 Addr addr; /* lowest address for this line */ 106 /* Word 2 */ 107 UShort size:LOC_SIZE_BITS; /* # bytes; we catch overflows of this */ 108 UInt lineno:LINENO_BITS; /* source line number, or zero */ 109 /* Word 3 */ 110 UChar* filename; /* source filename */ 111 /* Word 4 */ 112 UChar* dirname; /* source directory name */ 113 } 114 DiLoc; 115 116 /* --------------------- CF INFO --------------------- */ 117 118 /* DiCfSI: a structure to summarise DWARF2/3 CFA info for the code 119 address range [base .. base+len-1]. 120 121 On x86 and amd64 ("IA"), if you know ({e,r}sp, {e,r}bp, {e,r}ip) at 122 some point and {e,r}ip is in the range [base .. base+len-1], it 123 tells you how to calculate ({e,r}sp, {e,r}bp) for the caller of the 124 current frame and also ra, the return address of the current frame. 125 126 First off, calculate CFA, the Canonical Frame Address, thusly: 127 128 cfa = case cfa_how of 129 CFIC_IA_SPREL -> {e,r}sp + cfa_off 130 CFIC_IA_BPREL -> {e,r}bp + cfa_off 131 CFIR_IA_EXPR -> expr whose index is in cfa_off 132 133 Once that is done, the previous frame's {e,r}sp/{e,r}bp values and 134 this frame's {e,r}ra value can be calculated like this: 135 136 old_{e,r}sp/{e,r}bp/ra 137 = case {e,r}sp/{e,r}bp/ra_how of 138 CFIR_UNKNOWN -> we don't know, sorry 139 CFIR_SAME -> same as it was before (sp/fp only) 140 CFIR_CFAREL -> cfa + sp/bp/ra_off 141 CFIR_MEMCFAREL -> *( cfa + sp/bp/ra_off ) 142 CFIR_EXPR -> expr whose index is in sp/bp/ra_off 143 144 On ARM it's pretty much the same, except we have more registers to 145 keep track of: 146 147 cfa = case cfa_how of 148 CFIC_R13REL -> r13 + cfa_off 149 CFIC_R12REL -> r12 + cfa_off 150 CFIC_R11REL -> r11 + cfa_off 151 CFIC_R7REL -> r7 + cfa_off 152 CFIR_EXPR -> expr whose index is in cfa_off 153 154 old_r14/r13/r12/r11/r7/ra 155 = case r14/r13/r12/r11/r7/ra_how of 156 CFIR_UNKNOWN -> we don't know, sorry 157 CFIR_SAME -> same as it was before (r14/r13/r12/r11/r7 only) 158 CFIR_CFAREL -> cfa + r14/r13/r12/r11/r7/ra_off 159 CFIR_MEMCFAREL -> *( cfa + r14/r13/r12/r11/r7/ra_off ) 160 CFIR_EXPR -> expr whose index is in r14/r13/r12/r11/r7/ra_off 161 162 On s390x we have a similar logic as x86 or amd64. We need the stack pointer 163 (r15), the frame pointer r11 (like BP) and together with the instruction 164 address in the PSW we can calculate the previous values: 165 cfa = case cfa_how of 166 CFIC_IA_SPREL -> r15 + cfa_off 167 CFIC_IA_BPREL -> r11 + cfa_off 168 CFIR_IA_EXPR -> expr whose index is in cfa_off 169 170 old_sp/fp/ra 171 = case sp/fp/ra_how of 172 CFIR_UNKNOWN -> we don't know, sorry 173 CFIR_SAME -> same as it was before (sp/fp only) 174 CFIR_CFAREL -> cfa + sp/fp/ra_off 175 CFIR_MEMCFAREL -> *( cfa + sp/fp/ra_off ) 176 CFIR_EXPR -> expr whose index is in sp/fp/ra_off 177 */ 178 179 #define CFIC_IA_SPREL ((UChar)1) 180 #define CFIC_IA_BPREL ((UChar)2) 181 #define CFIC_IA_EXPR ((UChar)3) 182 #define CFIC_ARM_R13REL ((UChar)4) 183 #define CFIC_ARM_R12REL ((UChar)5) 184 #define CFIC_ARM_R11REL ((UChar)6) 185 #define CFIC_ARM_R7REL ((UChar)7) 186 #define CFIC_EXPR ((UChar)8) /* all targets */ 187 188 #define CFIR_UNKNOWN ((UChar)64) 189 #define CFIR_SAME ((UChar)65) 190 #define CFIR_CFAREL ((UChar)66) 191 #define CFIR_MEMCFAREL ((UChar)67) 192 #define CFIR_EXPR ((UChar)68) 193 194 #if defined(VGA_x86) || defined(VGA_amd64) 195 typedef 196 struct { 197 Addr base; 198 UInt len; 199 UChar cfa_how; /* a CFIC_IA value */ 200 UChar ra_how; /* a CFIR_ value */ 201 UChar sp_how; /* a CFIR_ value */ 202 UChar bp_how; /* a CFIR_ value */ 203 Int cfa_off; 204 Int ra_off; 205 Int sp_off; 206 Int bp_off; 207 } 208 DiCfSI; 209 #elif defined(VGA_arm) 210 typedef 211 struct { 212 Addr base; 213 UInt len; 214 UChar cfa_how; /* a CFIC_ value */ 215 UChar ra_how; /* a CFIR_ value */ 216 UChar r14_how; /* a CFIR_ value */ 217 UChar r13_how; /* a CFIR_ value */ 218 UChar r12_how; /* a CFIR_ value */ 219 UChar r11_how; /* a CFIR_ value */ 220 UChar r7_how; /* a CFIR_ value */ 221 Int cfa_off; 222 Int ra_off; 223 Int r14_off; 224 Int r13_off; 225 Int r12_off; 226 Int r11_off; 227 Int r7_off; 228 } 229 DiCfSI; 230 #elif defined(VGA_ppc32) || defined(VGA_ppc64) 231 /* Just have a struct with the common fields in, so that code that 232 processes the common fields doesn't have to be ifdef'd against 233 VGP_/VGA_ symbols. These are not used in any way on ppc32/64-linux 234 at the moment. */ 235 typedef 236 struct { 237 Addr base; 238 UInt len; 239 UChar cfa_how; /* a CFIC_ value */ 240 UChar ra_how; /* a CFIR_ value */ 241 Int cfa_off; 242 Int ra_off; 243 } 244 DiCfSI; 245 #elif defined(VGA_s390x) 246 typedef 247 struct { 248 Addr base; 249 UInt len; 250 UChar cfa_how; /* a CFIC_ value */ 251 UChar sp_how; /* a CFIR_ value */ 252 UChar ra_how; /* a CFIR_ value */ 253 UChar fp_how; /* a CFIR_ value */ 254 Int cfa_off; 255 Int sp_off; 256 Int ra_off; 257 Int fp_off; 258 } 259 DiCfSI; 260 #else 261 # error "Unknown arch" 262 #endif 263 264 265 typedef 266 enum { 267 Cop_Add=0x321, 268 Cop_Sub, 269 Cop_And, 270 Cop_Mul, 271 Cop_Shl, 272 Cop_Shr, 273 Cop_Eq, 274 Cop_Ge, 275 Cop_Gt, 276 Cop_Le, 277 Cop_Lt, 278 Cop_Ne 279 } 280 CfiOp; 281 282 typedef 283 enum { 284 Creg_IA_SP=0x213, 285 Creg_IA_BP, 286 Creg_IA_IP, 287 Creg_ARM_R13, 288 Creg_ARM_R12, 289 Creg_ARM_R15, 290 Creg_ARM_R14, 291 Creg_S390_R14 292 } 293 CfiReg; 294 295 typedef 296 enum { 297 Cex_Undef=0x123, 298 Cex_Deref, 299 Cex_Const, 300 Cex_Binop, 301 Cex_CfiReg, 302 Cex_DwReg 303 } 304 CfiExprTag; 305 306 typedef 307 struct { 308 CfiExprTag tag; 309 union { 310 struct { 311 } Undef; 312 struct { 313 Int ixAddr; 314 } Deref; 315 struct { 316 UWord con; 317 } Const; 318 struct { 319 CfiOp op; 320 Int ixL; 321 Int ixR; 322 } Binop; 323 struct { 324 CfiReg reg; 325 } CfiReg; 326 struct { 327 Int reg; 328 } DwReg; 329 } 330 Cex; 331 } 332 CfiExpr; 333 334 extern Int ML_(CfiExpr_Undef) ( XArray* dst ); 335 extern Int ML_(CfiExpr_Deref) ( XArray* dst, Int ixAddr ); 336 extern Int ML_(CfiExpr_Const) ( XArray* dst, UWord con ); 337 extern Int ML_(CfiExpr_Binop) ( XArray* dst, CfiOp op, Int ixL, Int ixR ); 338 extern Int ML_(CfiExpr_CfiReg)( XArray* dst, CfiReg reg ); 339 extern Int ML_(CfiExpr_DwReg) ( XArray* dst, Int reg ); 340 341 extern void ML_(ppCfiExpr)( XArray* src, Int ix ); 342 343 /* ---------------- FPO INFO (Windows PE) -------------- */ 344 345 /* for apps using Wine: MSVC++ PDB FramePointerOmitted: somewhat like 346 a primitive CFI */ 347 typedef 348 struct _FPO_DATA { /* 16 bytes */ 349 UInt ulOffStart; /* offset of 1st byte of function code */ 350 UInt cbProcSize; /* # bytes in function */ 351 UInt cdwLocals; /* # bytes/4 in locals */ 352 UShort cdwParams; /* # bytes/4 in params */ 353 UChar cbProlog; /* # bytes in prolog */ 354 UChar cbRegs :3; /* # regs saved */ 355 UChar fHasSEH:1; /* Structured Exception Handling */ 356 UChar fUseBP :1; /* EBP has been used */ 357 UChar reserved:1; 358 UChar cbFrame:2; /* frame type */ 359 } 360 FPO_DATA; 361 362 #define PDB_FRAME_FPO 0 363 #define PDB_FRAME_TRAP 1 364 #define PDB_FRAME_TSS 2 365 366 /* --------------------- VARIABLES --------------------- */ 367 368 typedef 369 struct { 370 Addr aMin; 371 Addr aMax; 372 XArray* /* of DiVariable */ vars; 373 } 374 DiAddrRange; 375 376 typedef 377 struct { 378 UChar* name; /* in DebugInfo.strchunks */ 379 UWord typeR; /* a cuOff */ 380 GExpr* gexpr; /* on DebugInfo.gexprs list */ 381 GExpr* fbGX; /* SHARED. */ 382 UChar* fileName; /* where declared; may be NULL. in 383 DebugInfo.strchunks */ 384 Int lineNo; /* where declared; may be zero. */ 385 } 386 DiVariable; 387 388 Word 389 ML_(cmp_for_DiAddrRange_range) ( const void* keyV, const void* elemV ); 390 391 /* --------------------- DEBUGINFO --------------------- */ 392 393 /* This is the top-level data type. It's a structure which contains 394 information pertaining to one mapped ELF object. This type is 395 exported only abstractly - in pub_tool_debuginfo.h. */ 396 397 /* First though, here's an auxiliary data structure. It is only ever 398 used as part of a struct _DebugInfo. We use it to record 399 observations about mappings and permission changes to the 400 associated file, so as to decide when to read debug info. It's 401 essentially an ultra-trivial finite state machine which, when it 402 reaches an accept state, signals that we should now read debug info 403 from the object into the associated struct _DebugInfo. The accept 404 state is arrived at when have_rx_map and have_rw_map both become 405 true. The initial state is one in which we have no observations, 406 so have_rx_map and have_rw_map are both false. 407 408 This is all rather ad-hoc; for example it has no way to record more 409 than one rw or rx mapping for a given object, not because such 410 events have never been observed, but because we've never needed to 411 note more than the first one of any such in order when to decide to 412 read debug info. It may be that in future we need to track more 413 state in order to make the decision, so this struct would then get 414 expanded. 415 416 The normal sequence of events is one of 417 418 start --> r-x mapping --> rw- mapping --> accept 419 start --> rw- mapping --> r-x mapping --> accept 420 421 that is, take the first r-x and rw- mapping we see, and we're done. 422 423 On MacOSX 10.7, 32-bit, there appears to be a new variant: 424 425 start --> r-- mapping --> rw- mapping 426 --> upgrade r-- mapping to r-x mapping --> accept 427 428 where the upgrade is done by a call to vm_protect. Hence we 429 need to also track this possibility. 430 */ 431 struct _DebugInfoFSM 432 { 433 /* --- all targets --- */ 434 UChar* filename; /* in mallocville (VG_AR_DINFO) */ 435 436 Bool have_rx_map; /* did we see a r?x mapping yet for the file? */ 437 Bool have_rw_map; /* did we see a rw? mapping yet for the file? */ 438 439 Addr rx_map_avma; /* these fields record the file offset, length */ 440 SizeT rx_map_size; /* and map address of the r?x mapping we believe */ 441 OffT rx_map_foff; /* is the .text segment mapping */ 442 443 Addr rw_map_avma; /* ditto, for the rw? mapping we believe is the */ 444 SizeT rw_map_size; /* .data segment mapping */ 445 OffT rw_map_foff; 446 447 /* --- OSX 10.7, 32-bit only --- */ 448 Bool have_ro_map; /* did we see a r-- mapping yet for the file? */ 449 450 Addr ro_map_avma; /* file offset, length, avma for said mapping */ 451 SizeT ro_map_size; 452 OffT ro_map_foff; 453 }; 454 455 456 /* To do with the string table in struct _DebugInfo (::strchunks) */ 457 #define SEGINFO_STRCHUNKSIZE (64*1024) 458 459 460 /* We may encounter more than one .eh_frame section in an object -- 461 unusual but apparently allowed by ELF. See 462 http://sourceware.org/bugzilla/show_bug.cgi?id=12675 463 */ 464 #define N_EHFRAME_SECTS 2 465 466 467 /* So, the main structure for holding debug info for one object. */ 468 469 struct _DebugInfo { 470 471 /* Admin stuff */ 472 473 struct _DebugInfo* next; /* list of DebugInfos */ 474 Bool mark; /* marked for deletion? */ 475 476 /* An abstract handle, which can be used by entities outside of 477 m_debuginfo to (in an abstract datatype sense) refer to this 478 struct _DebugInfo. A .handle of zero is invalid; valid handles 479 are 1 and above. The same handle is never issued twice (in any 480 given run of Valgrind), so a handle becomes invalid when the 481 associated struct _DebugInfo is discarded, and remains invalid 482 forever thereafter. The .handle field is set as soon as this 483 structure is allocated. */ 484 ULong handle; 485 486 /* Used for debugging only - indicate what stuff to dump whilst 487 reading stuff into the seginfo. Are computed as early in the 488 lifetime of the DebugInfo as possible -- at the point when it is 489 created. Use these when deciding what to spew out; do not use 490 the global VG_(clo_blah) flags. */ 491 492 Bool trace_symtab; /* symbols, our style */ 493 Bool trace_cfi; /* dwarf frame unwind, our style */ 494 Bool ddump_syms; /* mimic /usr/bin/readelf --syms */ 495 Bool ddump_line; /* mimic /usr/bin/readelf --debug-dump=line */ 496 Bool ddump_frames; /* mimic /usr/bin/readelf --debug-dump=frames */ 497 498 /* The "decide when it is time to read debuginfo" state machine. 499 This structure must get filled in before we can start reading 500 anything from the ELF/MachO file. This structure is filled in 501 by VG_(di_notify_mmap) and its immediate helpers. */ 502 struct _DebugInfoFSM fsm; 503 504 /* Once the ::fsm has reached an accept state -- typically, when 505 both a rw? and r?x mapping for .filename have been observed -- 506 we can go on to read the symbol tables and debug info. 507 .have_dinfo changes from False to True when the debug info has 508 been completely read in and postprocessed (canonicalised) and is 509 now suitable for querying. */ 510 /* If have_dinfo is False, then all fields below this point are 511 invalid and should not be consulted. */ 512 Bool have_dinfo; /* initially False */ 513 514 /* All the rest of the fields in this structure are filled in once 515 we have committed to reading the symbols and debug info (that 516 is, at the point where .have_dinfo is set to True). */ 517 518 /* The file's soname. FIXME: ensure this is always allocated in 519 VG_AR_DINFO. */ 520 UChar* soname; 521 522 /* Description of some important mapped segments. The presence or 523 absence of the mapping is denoted by the _present field, since 524 in some obscure circumstances (to do with data/sdata/bss) it is 525 possible for the mapping to be present but have zero size. 526 Certainly text_ is mandatory on all platforms; not sure about 527 the rest though. 528 529 -------------------------------------------------------- 530 531 Comment_on_IMPORTANT_CFSI_REPRESENTATIONAL_INVARIANTS: we require that 532 533 either (rx_map_size == 0 && cfsi == NULL) (the degenerate case) 534 535 or the normal case, which is the AND of the following: 536 (0) rx_map_size > 0 537 (1) no two DebugInfos with rx_map_size > 0 538 have overlapping [rx_map_avma,+rx_map_size) 539 (2) [cfsi_minavma,cfsi_maxavma] does not extend 540 beyond [rx_map_avma,+rx_map_size); that is, the former is a 541 subrange or equal to the latter. 542 (3) all DiCfSI in the cfsi array all have ranges that fall within 543 [rx_map_avma,+rx_map_size). 544 (4) all DiCfSI in the cfsi array are non-overlapping 545 546 The cumulative effect of these restrictions is to ensure that 547 all the DiCfSI records in the entire system are non overlapping. 548 Hence any address falls into either exactly one DiCfSI record, 549 or none. Hence it is safe to cache the results of searches for 550 DiCfSI records. This is the whole point of these restrictions. 551 The caching of DiCfSI searches is done in VG_(use_CF_info). The 552 cache is flushed after any change to debugInfo_list. DiCfSI 553 searches are cached because they are central to stack unwinding 554 on amd64-linux. 555 556 Where are these invariants imposed and checked? 557 558 They are checked after a successful read of debuginfo into 559 a DebugInfo*, in check_CFSI_related_invariants. 560 561 (1) is not really imposed anywhere. We simply assume that the 562 kernel will not map the text segments from two different objects 563 into the same space. Sounds reasonable. 564 565 (2) follows from (4) and (3). It is ensured by canonicaliseCFI. 566 (3) is ensured by ML_(addDiCfSI). 567 (4) is ensured by canonicaliseCFI. 568 569 -------------------------------------------------------- 570 571 Comment_on_DEBUG_SVMA_and_DEBUG_BIAS_fields: 572 573 The _debug_{svma,bias} fields were added as part of a fix to 574 #185816. The problem encompassed in that bug report was that it 575 wasn't correct to use apply the bias values deduced for a 576 primary object to its associated debuginfo object, because the 577 debuginfo object (or the primary) could have been prelinked to a 578 different SVMA. Hence debuginfo and primary objects need to 579 have their own biases. 580 581 ------ JRS: (referring to r9329): ------ 582 Let me see if I understand the workings correctly. Initially 583 the _debug_ values are set to the same values as the "normal" 584 ones, as there's a bunch of bits of code like this (in 585 readelf.c) 586 587 di->text_svma = svma; 588 ... 589 di->text_bias = rx_bias; 590 di->text_debug_svma = svma; 591 di->text_debug_bias = rx_bias; 592 593 If a debuginfo object subsequently shows up then the 594 _debug_svma/bias are set for the debuginfo object. Result is 595 that if there's no debuginfo object then the values are the same 596 as the primary-object values, and if there is a debuginfo object 597 then they will (or at least may) be different. 598 599 Then when we need to actually bias something, we'll have to 600 decide whether to use the primary bias or the debuginfo bias. 601 And the strategy is to use the primary bias for ELF symbols but 602 the debuginfo bias for anything pulled out of Dwarf. 603 604 ------ THH: ------ 605 Correct - the debug_svma and bias values apply to any address 606 read from the debug data regardless of where that debug data is 607 stored and the other values are used for addresses from other 608 places (primarily the symbol table). 609 610 ------ JRS: ------ 611 Ok; so this was my only area of concern. Are there any 612 corner-case scenarios where this wouldn't be right? It sounds 613 like we're assuming the ELF symbols come from the primary object 614 and, if there is a debug object, then all the Dwarf comes from 615 there. But what if (eg) both symbols and Dwarf come from the 616 debug object? Is that even possible or allowable? 617 618 ------ THH: ------ 619 You may have a point... 620 621 The current logic is to try and take any one set of data from 622 either the base object or the debug object. There are four sets 623 of data we consider: 624 625 - Symbol Table 626 - Stabs 627 - DWARF1 628 - DWARF2 629 630 If we see the primary section for a given set in the base object 631 then we ignore all sections relating to that set in the debug 632 object. 633 634 Now in principle if we saw a secondary section (like debug_line 635 say) in the base object, but not the main section (debug_info in 636 this case) then we would take debug_info from the debug object 637 but would use the debug_line from the base object unless we saw 638 a replacement copy in the debug object. That's probably unlikely 639 however. 640 641 A bigger issue might be, as you say, the symbol table as we will 642 pick that up from the debug object if it isn't in the base. The 643 dynamic symbol table will always have to be in the base object 644 though so we will have to be careful when processing symbols to 645 know which table we are reading in that case. 646 647 What we probably need to do is tell read_elf_symtab which object 648 the symbols it is being asked to read came from. 649 650 (A followup patch to deal with this was committed in r9469). 651 */ 652 /* .text */ 653 Bool text_present; 654 Addr text_avma; 655 Addr text_svma; 656 SizeT text_size; 657 PtrdiffT text_bias; 658 Addr text_debug_svma; 659 PtrdiffT text_debug_bias; 660 /* .data */ 661 Bool data_present; 662 Addr data_svma; 663 Addr data_avma; 664 SizeT data_size; 665 PtrdiffT data_bias; 666 Addr data_debug_svma; 667 PtrdiffT data_debug_bias; 668 /* .sdata */ 669 Bool sdata_present; 670 Addr sdata_svma; 671 Addr sdata_avma; 672 SizeT sdata_size; 673 PtrdiffT sdata_bias; 674 Addr sdata_debug_svma; 675 PtrdiffT sdata_debug_bias; 676 /* .rodata */ 677 Bool rodata_present; 678 Addr rodata_svma; 679 Addr rodata_avma; 680 SizeT rodata_size; 681 PtrdiffT rodata_bias; 682 Addr rodata_debug_svma; 683 PtrdiffT rodata_debug_bias; 684 /* .bss */ 685 Bool bss_present; 686 Addr bss_svma; 687 Addr bss_avma; 688 SizeT bss_size; 689 PtrdiffT bss_bias; 690 Addr bss_debug_svma; 691 PtrdiffT bss_debug_bias; 692 /* .sbss */ 693 Bool sbss_present; 694 Addr sbss_svma; 695 Addr sbss_avma; 696 SizeT sbss_size; 697 PtrdiffT sbss_bias; 698 Addr sbss_debug_svma; 699 PtrdiffT sbss_debug_bias; 700 /* .plt */ 701 Bool plt_present; 702 Addr plt_avma; 703 SizeT plt_size; 704 /* .got */ 705 Bool got_present; 706 Addr got_avma; 707 SizeT got_size; 708 /* .got.plt */ 709 Bool gotplt_present; 710 Addr gotplt_avma; 711 SizeT gotplt_size; 712 /* .opd -- needed on ppc64-linux for finding symbols */ 713 Bool opd_present; 714 Addr opd_avma; 715 SizeT opd_size; 716 /* .ehframe -- needed on amd64-linux for stack unwinding. We might 717 see more than one, hence the arrays. */ 718 UInt n_ehframe; /* 0 .. N_EHFRAME_SECTS */ 719 Addr ehframe_avma[N_EHFRAME_SECTS]; 720 SizeT ehframe_size[N_EHFRAME_SECTS]; 721 722 /* Sorted tables of stuff we snarfed from the file. This is the 723 eventual product of reading the debug info. All this stuff 724 lives in VG_AR_DINFO. */ 725 726 /* An expandable array of symbols. */ 727 DiSym* symtab; 728 UWord symtab_used; 729 UWord symtab_size; 730 /* An expandable array of locations. */ 731 DiLoc* loctab; 732 UWord loctab_used; 733 UWord loctab_size; 734 /* An expandable array of CFI summary info records. Also includes 735 summary address bounds, showing the min and max address covered 736 by any of the records, as an aid to fast searching. And, if the 737 records require any expression nodes, they are stored in 738 cfsi_exprs. */ 739 DiCfSI* cfsi; 740 UWord cfsi_used; 741 UWord cfsi_size; 742 Addr cfsi_minavma; 743 Addr cfsi_maxavma; 744 XArray* cfsi_exprs; /* XArray of CfiExpr */ 745 746 /* Optimized code under Wine x86: MSVC++ PDB FramePointerOmitted 747 data. Non-expandable array, hence .size == .used. */ 748 FPO_DATA* fpo; 749 UWord fpo_size; 750 Addr fpo_minavma; 751 Addr fpo_maxavma; 752 753 /* Expandable arrays of characters -- the string table. Pointers 754 into this are stable (the arrays are not reallocated). */ 755 struct strchunk { 756 UInt strtab_used; 757 struct strchunk* next; 758 UChar strtab[SEGINFO_STRCHUNKSIZE]; 759 } *strchunks; 760 761 /* Variable scope information, as harvested from Dwarf3 files. 762 763 In short it's an 764 765 array of (array of PC address ranges and variables) 766 767 The outer array indexes over scopes, with Entry 0 containing 768 information on variables which exist for any value of the program 769 counter (PC) -- that is, the outermost scope. Entries 1, 2, 3, 770 etc contain information on increasinly deeply nested variables. 771 772 Each inner array is an array of (an address range, and a set 773 of variables that are in scope over that address range). 774 775 The address ranges may not overlap. 776 777 Since Entry 0 in the outer array holds information on variables 778 that exist for any value of the PC (that is, global vars), it 779 follows that Entry 0's inner array can only have one address 780 range pair, one that covers the entire address space. 781 */ 782 XArray* /* of OSet of DiAddrRange */varinfo; 783 784 /* These are arrays of the relevant typed objects, held here 785 partially for the purposes of visiting each object exactly once 786 when we need to delete them. */ 787 788 /* An array of TyEnts. These are needed to make sense of any types 789 in the .varinfo. Also, when deleting this DebugInfo, we must 790 first traverse this array and throw away malloc'd stuff hanging 791 off it -- by calling ML_(TyEnt__make_EMPTY) on each entry. */ 792 XArray* /* of TyEnt */ admin_tyents; 793 794 /* An array of guarded DWARF3 expressions. */ 795 XArray* admin_gexprs; 796 }; 797 798 /* --------------------- functions --------------------- */ 799 800 /* ------ Adding ------ */ 801 802 /* Add a symbol to si's symbol table. The contents of 'sym' are 803 copied. It is assumed (and checked) that 'sym' only contains one 804 name, so there is no auxiliary ::sec_names vector to duplicate. 805 IOW, the copy is a shallow copy, and there are assertions in place 806 to ensure that's OK. */ 807 extern void ML_(addSym) ( struct _DebugInfo* di, DiSym* sym ); 808 809 /* Add a line-number record to a DebugInfo. */ 810 extern 811 void ML_(addLineInfo) ( struct _DebugInfo* di, 812 UChar* filename, 813 UChar* dirname, /* NULL is allowable */ 814 Addr this, Addr next, Int lineno, Int entry); 815 816 /* Add a CFI summary record. The supplied DiCfSI is copied. */ 817 extern void ML_(addDiCfSI) ( struct _DebugInfo* di, DiCfSI* cfsi ); 818 819 /* Add a string to the string table of a DebugInfo. If len==-1, 820 ML_(addStr) will itself measure the length of the string. */ 821 extern UChar* ML_(addStr) ( struct _DebugInfo* di, UChar* str, Int len ); 822 823 extern void ML_(addVar)( struct _DebugInfo* di, 824 Int level, 825 Addr aMin, 826 Addr aMax, 827 UChar* name, 828 UWord typeR, /* a cuOff */ 829 GExpr* gexpr, 830 GExpr* fbGX, /* SHARED. */ 831 UChar* fileName, /* where decl'd - may be NULL */ 832 Int lineNo, /* where decl'd - may be zero */ 833 Bool show ); 834 835 /* Canonicalise the tables held by 'di', in preparation for use. Call 836 this after finishing adding entries to these tables. */ 837 extern void ML_(canonicaliseTables) ( struct _DebugInfo* di ); 838 839 /* Canonicalise the call-frame-info table held by 'di', in preparation 840 for use. This is called by ML_(canonicaliseTables) but can also be 841 called on it's own to sort just this table. */ 842 extern void ML_(canonicaliseCFI) ( struct _DebugInfo* di ); 843 844 /* ------ Searching ------ */ 845 846 /* Find a symbol-table index containing the specified pointer, or -1 847 if not found. Binary search. */ 848 extern Word ML_(search_one_symtab) ( struct _DebugInfo* di, Addr ptr, 849 Bool match_anywhere_in_sym, 850 Bool findText ); 851 852 /* Find a location-table index containing the specified pointer, or -1 853 if not found. Binary search. */ 854 extern Word ML_(search_one_loctab) ( struct _DebugInfo* di, Addr ptr ); 855 856 /* Find a CFI-table index containing the specified pointer, or -1 if 857 not found. Binary search. */ 858 extern Word ML_(search_one_cfitab) ( struct _DebugInfo* di, Addr ptr ); 859 860 /* Find a FPO-table index containing the specified pointer, or -1 861 if not found. Binary search. */ 862 extern Word ML_(search_one_fpotab) ( struct _DebugInfo* di, Addr ptr ); 863 864 /* ------ Misc ------ */ 865 866 /* Show a non-fatal debug info reading error. Use vg_panic if 867 terminal. 'serious' errors are always shown, not 'serious' ones 868 are shown only at verbosity level 2 and above. */ 869 extern 870 void ML_(symerr) ( struct _DebugInfo* di, Bool serious, HChar* msg ); 871 872 /* Print a symbol. */ 873 extern void ML_(ppSym) ( Int idx, DiSym* sym ); 874 875 /* Print a call-frame-info summary. */ 876 extern void ML_(ppDiCfSI) ( XArray* /* of CfiExpr */ exprs, DiCfSI* si ); 877 878 879 #define TRACE_SYMTAB(format, args...) \ 880 if (di->trace_symtab) { VG_(printf)(format, ## args); } 881 882 883 #endif /* ndef __PRIV_STORAGE_H */ 884 885 /*--------------------------------------------------------------------*/ 886 /*--- end ---*/ 887 /*--------------------------------------------------------------------*/ 888