1 2 /*--------------------------------------------------------------------*/ 3 /*--- Format-neutral storage of and querying of info acquired from ---*/ 4 /*--- ELF/XCOFF stabs/dwarf1/dwarf2 debug info. ---*/ 5 /*--- priv_storage.h ---*/ 6 /*--------------------------------------------------------------------*/ 7 8 /* 9 This file is part of Valgrind, a dynamic binary instrumentation 10 framework. 11 12 Copyright (C) 2000-2015 Julian Seward 13 jseward (at) acm.org 14 15 This program is free software; you can redistribute it and/or 16 modify it under the terms of the GNU General Public License as 17 published by the Free Software Foundation; either version 2 of the 18 License, or (at your option) any later version. 19 20 This program is distributed in the hope that it will be useful, but 21 WITHOUT ANY WARRANTY; without even the implied warranty of 22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 23 General Public License for more details. 24 25 You should have received a copy of the GNU General Public License 26 along with this program; if not, write to the Free Software 27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 28 02111-1307, USA. 29 30 The GNU General Public License is contained in the file COPYING. 31 */ 32 /* 33 Stabs reader greatly improved by Nick Nethercote, Apr 02. 34 This module was also extensively hacked on by Jeremy Fitzhardinge 35 and Tom Hughes. 36 */ 37 /* See comment at top of debuginfo.c for explanation of 38 the _svma / _avma / _image / _bias naming scheme. 39 */ 40 /* Note this is not freestanding; needs pub_core_xarray.h and 41 priv_tytypes.h to be included before it. */ 42 43 #ifndef __PRIV_STORAGE_H 44 #define __PRIV_STORAGE_H 45 46 #include "pub_core_basics.h" // Addr 47 #include "pub_core_xarray.h" // XArray 48 #include "pub_core_deduppoolalloc.h" // DedupPoolAlloc 49 #include "priv_d3basics.h" // GExpr et al. 50 #include "priv_image.h" // DiCursor 51 52 /* --------------------- SYMBOLS --------------------- */ 53 54 /* A structure to hold an ELF/MachO symbol (very crudely). Usually 55 the symbol only has one name, which is stored in ::pri_name, and 56 ::sec_names is NULL. If there are other names, these are stored in 57 ::sec_names, which is a NULL terminated vector holding the names. 58 The vector is allocated in VG_AR_DINFO, the names themselves live 59 in DebugInfo::strpool. 60 61 From the point of view of ELF, the primary vs secondary distinction 62 is artificial: they are all just names associated with the address, 63 none of which has higher precedence than any other. However, from 64 the point of view of mapping an address to a name to display to the 65 user, we need to choose one "preferred" name, and so that might as 66 well be installed as the pri_name, whilst all others can live in 67 sec_names[]. This has the convenient side effect that, in the 68 common case where there is only one name for the address, 69 sec_names[] does not need to be allocated. 70 */ 71 typedef 72 struct { 73 SymAVMAs avmas; /* Symbol Actual VMAs: lowest address of entity, 74 + platform specific fields, to access with 75 the macros defined in pub_core_debuginfo.h */ 76 const HChar* pri_name; /* primary name, never NULL */ 77 const HChar** sec_names; /* NULL, or a NULL term'd array of other names */ 78 // XXX: this could be shrunk (on 32-bit platforms) by using 30 79 // bits for the size and 1 bit each for isText and isIFunc. If you 80 // do this, make sure that all assignments to the latter two use 81 // 0 or 1 (or True or False), and that a positive number larger 82 // than 1 is never used to represent True. 83 UInt size; /* size in bytes */ 84 Bool isText; 85 Bool isIFunc; /* symbol is an indirect function? */ 86 } 87 DiSym; 88 89 /* --------------------- SRCLOCS --------------------- */ 90 91 /* Line count at which overflow happens, due to line numbers being 92 stored as shorts in `struct nlist' in a.out.h. */ 93 #define LINENO_OVERFLOW (1 << (sizeof(short) * 8)) 94 95 #define LINENO_BITS 20 96 #define LOC_SIZE_BITS (32 - LINENO_BITS) 97 #define MAX_LINENO ((1 << LINENO_BITS) - 1) 98 99 /* Unlikely to have any lines with instruction ranges > 4096 bytes */ 100 #define MAX_LOC_SIZE ((1 << LOC_SIZE_BITS) - 1) 101 102 /* Number used to detect line number overflows; if one line is 103 60000-odd smaller than the previous, it was probably an overflow. 104 */ 105 #define OVERFLOW_DIFFERENCE (LINENO_OVERFLOW - 5000) 106 107 /* Filename and Dirname pair. FnDn are stored in di->fndnpool 108 and are allocated using VG_(allocFixedEltDedupPA). 109 The filename/dirname strings are themselves stored in di->strpool. */ 110 typedef 111 struct { 112 const HChar* filename; /* source filename */ 113 const HChar* dirname; /* source directory name */ 114 } FnDn; 115 116 /* A structure to hold addr-to-source info for a single line. There 117 can be a lot of these, hence the dense packing. */ 118 typedef 119 struct { 120 /* Word 1 */ 121 Addr addr; /* lowest address for this line */ 122 /* Word 2 */ 123 UShort size:LOC_SIZE_BITS; /* # bytes; we catch overflows of this */ 124 UInt lineno:LINENO_BITS; /* source line number, or zero */ 125 } 126 DiLoc; 127 128 #define LEVEL_BITS (32 - LINENO_BITS) 129 #define MAX_LEVEL ((1 << LEVEL_BITS) - 1) 130 131 /* A structure to hold addr-to-inlined fn info. There 132 can be a lot of these, hence the dense packing. 133 Only caller source filename and lineno are stored. 134 Handling dirname should be done using fndn_ix technique 135 similar to ML_(addLineInfo). */ 136 typedef 137 struct { 138 /* Word 1 */ 139 Addr addr_lo; /* lowest address for inlined fn */ 140 /* Word 2 */ 141 Addr addr_hi; /* highest address following the inlined fn */ 142 /* Word 3 */ 143 const HChar* inlinedfn; /* inlined function name */ 144 /* Word 4 and 5 */ 145 UInt fndn_ix; /* index in di->fndnpool of caller source 146 dirname/filename */ 147 UInt lineno:LINENO_BITS; /* caller line number */ 148 UShort level:LEVEL_BITS; /* level of inlining */ 149 } 150 DiInlLoc; 151 152 /* --------------------- CF INFO --------------------- */ 153 154 /* DiCfSI: a structure to summarise DWARF2/3 CFA info for the code 155 address range [base .. base+len-1]. 156 157 On x86 and amd64 ("IA"), if you know ({e,r}sp, {e,r}bp, {e,r}ip) at 158 some point and {e,r}ip is in the range [base .. base+len-1], it 159 tells you how to calculate ({e,r}sp, {e,r}bp) for the caller of the 160 current frame and also ra, the return address of the current frame. 161 162 First off, calculate CFA, the Canonical Frame Address, thusly: 163 164 cfa = case cfa_how of 165 CFIC_IA_SPREL -> {e,r}sp + cfa_off 166 CFIC_IA_BPREL -> {e,r}bp + cfa_off 167 CFIC_EXPR -> expr whose index is in cfa_off 168 169 Once that is done, the previous frame's {e,r}sp/{e,r}bp values and 170 this frame's {e,r}ra value can be calculated like this: 171 172 old_{e,r}sp/{e,r}bp/ra 173 = case {e,r}sp/{e,r}bp/ra_how of 174 CFIR_UNKNOWN -> we don't know, sorry 175 CFIR_SAME -> same as it was before (sp/fp only) 176 CFIR_CFAREL -> cfa + sp/bp/ra_off 177 CFIR_MEMCFAREL -> *( cfa + sp/bp/ra_off ) 178 CFIR_EXPR -> expr whose index is in sp/bp/ra_off 179 180 On ARM it's pretty much the same, except we have more registers to 181 keep track of: 182 183 cfa = case cfa_how of 184 CFIC_ARM_R13REL -> r13 + cfa_off 185 CFIC_ARM_R12REL -> r12 + cfa_off 186 CFIC_ARM_R11REL -> r11 + cfa_off 187 CFIC_ARM_R7REL -> r7 + cfa_off 188 CFIR_EXPR -> expr whose index is in cfa_off 189 190 old_r14/r13/r12/r11/r7/ra 191 = case r14/r13/r12/r11/r7/ra_how of 192 CFIR_UNKNOWN -> we don't know, sorry 193 CFIR_SAME -> same as it was before (r14/r13/r12/r11/r7 only) 194 CFIR_CFAREL -> cfa + r14/r13/r12/r11/r7/ra_off 195 CFIR_MEMCFAREL -> *( cfa + r14/r13/r12/r11/r7/ra_off ) 196 CFIR_EXPR -> expr whose index is in r14/r13/r12/r11/r7/ra_off 197 198 On ARM64: 199 200 cfa = case cfa_how of 201 CFIC_ARM64_SPREL -> sp + cfa_off 202 CFIC_ARM64_X29REL -> x29 + cfa_off 203 CFIC_EXPR -> expr whose index is in cfa_off 204 205 old_sp/x30/x29/ra 206 = case sp/x30/x29/ra_how of 207 CFIR_UNKNOWN -> we don't know, sorry 208 CFIR_SAME -> same as it was before 209 CFIR_CFAREL -> cfa + sp/x30/x29/ra_how 210 CFIR_MEMCFAREL -> *( cfa + sp/x30/x29/ra_how ) 211 CFIR_EXPR -> expr whose index is in sp/x30/x29/ra_off 212 213 On s390x we have a similar logic as x86 or amd64. We need the stack pointer 214 (r15), the frame pointer r11 (like BP) and together with the instruction 215 address in the PSW we can calculate the previous values: 216 cfa = case cfa_how of 217 CFIC_IA_SPREL -> r15 + cfa_off 218 CFIC_IA_BPREL -> r11 + cfa_off 219 CFIC_EXPR -> expr whose index is in cfa_off 220 221 old_sp/fp/ra 222 = case sp/fp/ra_how of 223 CFIR_UNKNOWN -> we don't know, sorry 224 CFIR_SAME -> same as it was before (sp/fp only) 225 CFIR_CFAREL -> cfa + sp/fp/ra_off 226 CFIR_MEMCFAREL -> *( cfa + sp/fp/ra_off ) 227 CFIR_EXPR -> expr whose index is in sp/fp/ra_off 228 */ 229 230 #define CFIC_IA_SPREL ((UChar)1) 231 #define CFIC_IA_BPREL ((UChar)2) 232 #define CFIC_ARM_R13REL ((UChar)3) 233 #define CFIC_ARM_R12REL ((UChar)4) 234 #define CFIC_ARM_R11REL ((UChar)5) 235 #define CFIC_ARM_R7REL ((UChar)6) 236 #define CFIC_ARM64_SPREL ((UChar)7) 237 #define CFIC_ARM64_X29REL ((UChar)8) 238 #define CFIC_EXPR ((UChar)9) /* all targets */ 239 240 #define CFIR_UNKNOWN ((UChar)64) 241 #define CFIR_SAME ((UChar)65) 242 #define CFIR_CFAREL ((UChar)66) 243 #define CFIR_MEMCFAREL ((UChar)67) 244 #define CFIR_EXPR ((UChar)68) 245 246 /* Definition of the DiCfSI_m DiCfSI machine dependent part. 247 These are highly duplicated, and are stored in a pool. */ 248 #if defined(VGA_x86) || defined(VGA_amd64) 249 typedef 250 struct { 251 UChar cfa_how; /* a CFIC_IA value */ 252 UChar ra_how; /* a CFIR_ value */ 253 UChar sp_how; /* a CFIR_ value */ 254 UChar bp_how; /* a CFIR_ value */ 255 Int cfa_off; 256 Int ra_off; 257 Int sp_off; 258 Int bp_off; 259 } 260 DiCfSI_m; 261 #elif defined(VGA_arm) 262 typedef 263 struct { 264 UChar cfa_how; /* a CFIC_ value */ 265 UChar ra_how; /* a CFIR_ value */ 266 UChar r14_how; /* a CFIR_ value */ 267 UChar r13_how; /* a CFIR_ value */ 268 UChar r12_how; /* a CFIR_ value */ 269 UChar r11_how; /* a CFIR_ value */ 270 UChar r7_how; /* a CFIR_ value */ 271 Int cfa_off; 272 Int ra_off; 273 Int r14_off; 274 Int r13_off; 275 Int r12_off; 276 Int r11_off; 277 Int r7_off; 278 // If you add additional fields, don't forget to update the 279 // initialisation of this in readexidx.c accordingly. 280 } 281 DiCfSI_m; 282 #elif defined(VGA_arm64) 283 typedef 284 struct { 285 UChar cfa_how; /* a CFIC_ value */ 286 UChar ra_how; /* a CFIR_ value */ 287 UChar sp_how; /* a CFIR_ value */ /*dw31=SP*/ 288 UChar x30_how; /* a CFIR_ value */ /*dw30=LR*/ 289 UChar x29_how; /* a CFIR_ value */ /*dw29=FP*/ 290 Int cfa_off; 291 Int ra_off; 292 Int sp_off; 293 Int x30_off; 294 Int x29_off; 295 } 296 DiCfSI_m; 297 #elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) 298 /* Just have a struct with the common fields in, so that code that 299 processes the common fields doesn't have to be ifdef'd against 300 VGP_/VGA_ symbols. These are not used in any way on ppc32/64-linux 301 at the moment. */ 302 typedef 303 struct { 304 UChar cfa_how; /* a CFIC_ value */ 305 UChar ra_how; /* a CFIR_ value */ 306 Int cfa_off; 307 Int ra_off; 308 } 309 DiCfSI_m; 310 #elif defined(VGA_s390x) 311 typedef 312 struct { 313 UChar cfa_how; /* a CFIC_ value */ 314 UChar sp_how; /* a CFIR_ value */ 315 UChar ra_how; /* a CFIR_ value */ 316 UChar fp_how; /* a CFIR_ value */ 317 Int cfa_off; 318 Int sp_off; 319 Int ra_off; 320 Int fp_off; 321 } 322 DiCfSI_m; 323 #elif defined(VGA_mips32) || defined(VGA_mips64) 324 typedef 325 struct { 326 UChar cfa_how; /* a CFIC_ value */ 327 UChar ra_how; /* a CFIR_ value */ 328 UChar sp_how; /* a CFIR_ value */ 329 UChar fp_how; /* a CFIR_ value */ 330 Int cfa_off; 331 Int ra_off; 332 Int sp_off; 333 Int fp_off; 334 } 335 DiCfSI_m; 336 #elif defined(VGA_tilegx) 337 typedef 338 struct { 339 UChar cfa_how; /* a CFIC_IA value */ 340 UChar ra_how; /* a CFIR_ value */ 341 UChar sp_how; /* a CFIR_ value */ 342 UChar fp_how; /* a CFIR_ value */ 343 Int cfa_off; 344 Int ra_off; 345 Int sp_off; 346 Int fp_off; 347 } 348 DiCfSI_m; 349 #else 350 # error "Unknown arch" 351 #endif 352 353 typedef 354 struct { 355 Addr base; 356 UInt len; 357 UInt cfsi_m_ix; 358 } 359 DiCfSI; 360 361 typedef 362 enum { 363 Cunop_Abs=0x231, 364 Cunop_Neg, 365 Cunop_Not 366 } 367 CfiUnop; 368 369 typedef 370 enum { 371 Cbinop_Add=0x321, 372 Cbinop_Sub, 373 Cbinop_And, 374 Cbinop_Mul, 375 Cbinop_Shl, 376 Cbinop_Shr, 377 Cbinop_Eq, 378 Cbinop_Ge, 379 Cbinop_Gt, 380 Cbinop_Le, 381 Cbinop_Lt, 382 Cbinop_Ne 383 } 384 CfiBinop; 385 386 typedef 387 enum { 388 Creg_INVALID=0x213, 389 Creg_IA_SP, 390 Creg_IA_BP, 391 Creg_IA_IP, 392 Creg_ARM_R13, 393 Creg_ARM_R12, 394 Creg_ARM_R15, 395 Creg_ARM_R14, 396 Creg_ARM_R7, 397 Creg_ARM64_X30, 398 Creg_S390_IA, 399 Creg_S390_SP, 400 Creg_S390_FP, 401 Creg_S390_LR, 402 Creg_MIPS_RA, 403 Creg_TILEGX_IP, 404 Creg_TILEGX_SP, 405 Creg_TILEGX_BP, 406 Creg_TILEGX_LR 407 } 408 CfiReg; 409 410 typedef 411 enum { 412 Cex_Undef=0x123, 413 Cex_Deref, 414 Cex_Const, 415 Cex_Unop, 416 Cex_Binop, 417 Cex_CfiReg, 418 Cex_DwReg 419 } 420 CfiExprTag; 421 422 typedef 423 struct { 424 CfiExprTag tag; 425 union { 426 struct { 427 } Undef; 428 struct { 429 Int ixAddr; 430 } Deref; 431 struct { 432 UWord con; 433 } Const; 434 struct { 435 CfiUnop op; 436 Int ix; 437 } Unop; 438 struct { 439 CfiBinop op; 440 Int ixL; 441 Int ixR; 442 } Binop; 443 struct { 444 CfiReg reg; 445 } CfiReg; 446 struct { 447 Int reg; 448 } DwReg; 449 } 450 Cex; 451 } 452 CfiExpr; 453 454 extern Int ML_(CfiExpr_Undef) ( XArray* dst ); 455 extern Int ML_(CfiExpr_Deref) ( XArray* dst, Int ixAddr ); 456 extern Int ML_(CfiExpr_Const) ( XArray* dst, UWord con ); 457 extern Int ML_(CfiExpr_Unop) ( XArray* dst, CfiUnop op, Int ix ); 458 extern Int ML_(CfiExpr_Binop) ( XArray* dst, CfiBinop op, Int ixL, Int ixR ); 459 extern Int ML_(CfiExpr_CfiReg)( XArray* dst, CfiReg reg ); 460 extern Int ML_(CfiExpr_DwReg) ( XArray* dst, Int reg ); 461 462 extern void ML_(ppCfiExpr)( const XArray* src, Int ix ); 463 464 /* ---------------- FPO INFO (Windows PE) -------------- */ 465 466 /* for apps using Wine: MSVC++ PDB FramePointerOmitted: somewhat like 467 a primitive CFI */ 468 typedef 469 struct _FPO_DATA { /* 16 bytes */ 470 UInt ulOffStart; /* offset of 1st byte of function code */ 471 UInt cbProcSize; /* # bytes in function */ 472 UInt cdwLocals; /* # bytes/4 in locals */ 473 UShort cdwParams; /* # bytes/4 in params */ 474 UChar cbProlog; /* # bytes in prolog */ 475 UChar cbRegs :3; /* # regs saved */ 476 UChar fHasSEH:1; /* Structured Exception Handling */ 477 UChar fUseBP :1; /* EBP has been used */ 478 UChar reserved:1; 479 UChar cbFrame:2; /* frame type */ 480 } 481 FPO_DATA; 482 483 #define PDB_FRAME_FPO 0 484 #define PDB_FRAME_TRAP 1 485 #define PDB_FRAME_TSS 2 486 487 /* --------------------- VARIABLES --------------------- */ 488 489 typedef 490 struct { 491 Addr aMin; 492 Addr aMax; 493 XArray* /* of DiVariable */ vars; 494 } 495 DiAddrRange; 496 497 typedef 498 struct { 499 const HChar* name; /* in DebugInfo.strpool */ 500 UWord typeR; /* a cuOff */ 501 const GExpr* gexpr; /* on DebugInfo.gexprs list */ 502 const GExpr* fbGX; /* SHARED. */ 503 UInt fndn_ix; /* where declared; may be zero. index 504 in DebugInfo.fndnpool */ 505 Int lineNo; /* where declared; may be zero. */ 506 } 507 DiVariable; 508 509 Word 510 ML_(cmp_for_DiAddrRange_range) ( const void* keyV, const void* elemV ); 511 512 /* --------------------- DEBUGINFO --------------------- */ 513 514 /* This is the top-level data type. It's a structure which contains 515 information pertaining to one mapped ELF object. This type is 516 exported only abstractly - in pub_tool_debuginfo.h. */ 517 518 /* First though, here's an auxiliary data structure. It is only ever 519 used as part of a struct _DebugInfo. We use it to record 520 observations about mappings and permission changes to the 521 associated file, so as to decide when to read debug info. It's 522 essentially an ultra-trivial finite state machine which, when it 523 reaches an accept state, signals that we should now read debug info 524 from the object into the associated struct _DebugInfo. The accept 525 state is arrived at when have_rx_map and have_rw_map both become 526 true. The initial state is one in which we have no observations, 527 so have_rx_map and have_rw_map are both false. 528 529 This all started as a rather ad-hoc solution, but was further 530 expanded to handle weird object layouts, e.g. more than one rw 531 or rx mapping for one binary. 532 533 The normal sequence of events is one of 534 535 start --> r-x mapping --> rw- mapping --> accept 536 start --> rw- mapping --> r-x mapping --> accept 537 538 that is, take the first r-x and rw- mapping we see, and we're done. 539 540 On MacOSX >= 10.7, 32-bit, there appears to be a new variant: 541 542 start --> r-- mapping --> rw- mapping 543 --> upgrade r-- mapping to r-x mapping --> accept 544 545 where the upgrade is done by a call to mach_vm_protect (OSX 10.7) 546 or kernelrpc_mach_vm_protect_trap (OSX 10.9 and possibly 10.8). 547 Hence we need to also track this possibility. 548 549 From perusal of dyld sources, it appears that this scheme could 550 also be used 64 bit libraries, although that doesn't seem to happen 551 in practice. dyld uses this scheme when the text section requires 552 relocation, which only appears to be the case for 32 bit objects. 553 */ 554 555 typedef struct 556 { 557 Addr avma; /* these fields record the file offset, length */ 558 SizeT size; /* and map address of each mapping */ 559 OffT foff; 560 Bool rx, rw, ro; /* memory access flags for this mapping */ 561 } DebugInfoMapping; 562 563 struct _DebugInfoFSM 564 { 565 HChar* filename; /* in mallocville (VG_AR_DINFO) */ 566 HChar* dbgname; /* in mallocville (VG_AR_DINFO) */ 567 XArray* maps; /* XArray of DebugInfoMapping structs */ 568 Bool have_rx_map; /* did we see a r?x mapping yet for the file? */ 569 Bool have_rw_map; /* did we see a rw? mapping yet for the file? */ 570 Bool have_ro_map; /* did we see a r-- mapping yet for the file? */ 571 }; 572 573 574 /* To do with the string table in struct _DebugInfo (::strpool) */ 575 #define SEGINFO_STRPOOLSIZE (64*1024) 576 577 578 /* We may encounter more than one .eh_frame section in an object -- 579 unusual but apparently allowed by ELF. See 580 http://sourceware.org/bugzilla/show_bug.cgi?id=12675 581 */ 582 #define N_EHFRAME_SECTS 2 583 584 585 /* So, the main structure for holding debug info for one object. */ 586 587 struct _DebugInfo { 588 589 /* Admin stuff */ 590 591 struct _DebugInfo* next; /* list of DebugInfos */ 592 Bool mark; /* marked for deletion? */ 593 594 /* An abstract handle, which can be used by entities outside of 595 m_debuginfo to (in an abstract datatype sense) refer to this 596 struct _DebugInfo. A .handle of zero is invalid; valid handles 597 are 1 and above. The same handle is never issued twice (in any 598 given run of Valgrind), so a handle becomes invalid when the 599 associated struct _DebugInfo is discarded, and remains invalid 600 forever thereafter. The .handle field is set as soon as this 601 structure is allocated. */ 602 ULong handle; 603 604 /* Used for debugging only - indicate what stuff to dump whilst 605 reading stuff into the seginfo. Are computed as early in the 606 lifetime of the DebugInfo as possible -- at the point when it is 607 created. Use these when deciding what to spew out; do not use 608 the global VG_(clo_blah) flags. */ 609 610 Bool trace_symtab; /* symbols, our style */ 611 Bool trace_cfi; /* dwarf frame unwind, our style */ 612 Bool ddump_syms; /* mimic /usr/bin/readelf --syms */ 613 Bool ddump_line; /* mimic /usr/bin/readelf --debug-dump=line */ 614 Bool ddump_frames; /* mimic /usr/bin/readelf --debug-dump=frames */ 615 616 /* The "decide when it is time to read debuginfo" state machine. 617 This structure must get filled in before we can start reading 618 anything from the ELF/MachO file. This structure is filled in 619 by VG_(di_notify_mmap) and its immediate helpers. */ 620 struct _DebugInfoFSM fsm; 621 622 /* Once the ::fsm has reached an accept state -- typically, when 623 both a rw? and r?x mapping for .filename have been observed -- 624 we can go on to read the symbol tables and debug info. 625 .have_dinfo changes from False to True when the debug info has 626 been completely read in and postprocessed (canonicalised) and is 627 now suitable for querying. */ 628 /* If have_dinfo is False, then all fields below this point are 629 invalid and should not be consulted. */ 630 Bool have_dinfo; /* initially False */ 631 632 /* All the rest of the fields in this structure are filled in once 633 we have committed to reading the symbols and debug info (that 634 is, at the point where .have_dinfo is set to True). */ 635 636 /* The file's soname. */ 637 HChar* soname; 638 639 /* Description of some important mapped segments. The presence or 640 absence of the mapping is denoted by the _present field, since 641 in some obscure circumstances (to do with data/sdata/bss) it is 642 possible for the mapping to be present but have zero size. 643 Certainly text_ is mandatory on all platforms; not sure about 644 the rest though. 645 646 -------------------------------------------------------- 647 648 Comment_on_IMPORTANT_CFSI_REPRESENTATIONAL_INVARIANTS: we require that 649 650 either (size of all rx maps == 0 && cfsi == NULL) (the degenerate case) 651 652 or the normal case, which is the AND of the following: 653 (0) size of at least one rx mapping > 0 654 (1) no two DebugInfos with some rx mapping of size > 0 655 have overlapping rx mappings 656 (2) [cfsi_minavma,cfsi_maxavma] does not extend beyond 657 [avma,+size) of one rx mapping; that is, the former 658 is a subrange or equal to the latter. 659 (3) all DiCfSI in the cfsi array all have ranges that fall within 660 [avma,+size) of that rx mapping. 661 (4) all DiCfSI in the cfsi array are non-overlapping 662 663 The cumulative effect of these restrictions is to ensure that 664 all the DiCfSI records in the entire system are non overlapping. 665 Hence any address falls into either exactly one DiCfSI record, 666 or none. Hence it is safe to cache the results of searches for 667 DiCfSI records. This is the whole point of these restrictions. 668 The caching of DiCfSI searches is done in VG_(use_CF_info). The 669 cache is flushed after any change to debugInfo_list. DiCfSI 670 searches are cached because they are central to stack unwinding 671 on amd64-linux. 672 673 Where are these invariants imposed and checked? 674 675 They are checked after a successful read of debuginfo into 676 a DebugInfo*, in check_CFSI_related_invariants. 677 678 (1) is not really imposed anywhere. We simply assume that the 679 kernel will not map the text segments from two different objects 680 into the same space. Sounds reasonable. 681 682 (2) follows from (4) and (3). It is ensured by canonicaliseCFI. 683 (3) is ensured by ML_(addDiCfSI). 684 (4) is ensured by canonicaliseCFI. 685 686 -------------------------------------------------------- 687 688 Comment_on_DEBUG_SVMA_and_DEBUG_BIAS_fields: 689 690 The _debug_{svma,bias} fields were added as part of a fix to 691 #185816. The problem encompassed in that bug report was that it 692 wasn't correct to use apply the bias values deduced for a 693 primary object to its associated debuginfo object, because the 694 debuginfo object (or the primary) could have been prelinked to a 695 different SVMA. Hence debuginfo and primary objects need to 696 have their own biases. 697 698 ------ JRS: (referring to r9329): ------ 699 Let me see if I understand the workings correctly. Initially 700 the _debug_ values are set to the same values as the "normal" 701 ones, as there's a bunch of bits of code like this (in 702 readelf.c) 703 704 di->text_svma = svma; 705 ... 706 di->text_bias = rx_bias; 707 di->text_debug_svma = svma; 708 di->text_debug_bias = rx_bias; 709 710 If a debuginfo object subsequently shows up then the 711 _debug_svma/bias are set for the debuginfo object. Result is 712 that if there's no debuginfo object then the values are the same 713 as the primary-object values, and if there is a debuginfo object 714 then they will (or at least may) be different. 715 716 Then when we need to actually bias something, we'll have to 717 decide whether to use the primary bias or the debuginfo bias. 718 And the strategy is to use the primary bias for ELF symbols but 719 the debuginfo bias for anything pulled out of Dwarf. 720 721 ------ THH: ------ 722 Correct - the debug_svma and bias values apply to any address 723 read from the debug data regardless of where that debug data is 724 stored and the other values are used for addresses from other 725 places (primarily the symbol table). 726 727 ------ JRS: ------ 728 Ok; so this was my only area of concern. Are there any 729 corner-case scenarios where this wouldn't be right? It sounds 730 like we're assuming the ELF symbols come from the primary object 731 and, if there is a debug object, then all the Dwarf comes from 732 there. But what if (eg) both symbols and Dwarf come from the 733 debug object? Is that even possible or allowable? 734 735 ------ THH: ------ 736 You may have a point... 737 738 The current logic is to try and take any one set of data from 739 either the base object or the debug object. There are four sets 740 of data we consider: 741 742 - Symbol Table 743 - Stabs 744 - DWARF1 745 - DWARF2 746 747 If we see the primary section for a given set in the base object 748 then we ignore all sections relating to that set in the debug 749 object. 750 751 Now in principle if we saw a secondary section (like debug_line 752 say) in the base object, but not the main section (debug_info in 753 this case) then we would take debug_info from the debug object 754 but would use the debug_line from the base object unless we saw 755 a replacement copy in the debug object. That's probably unlikely 756 however. 757 758 A bigger issue might be, as you say, the symbol table as we will 759 pick that up from the debug object if it isn't in the base. The 760 dynamic symbol table will always have to be in the base object 761 though so we will have to be careful when processing symbols to 762 know which table we are reading in that case. 763 764 What we probably need to do is tell read_elf_symtab which object 765 the symbols it is being asked to read came from. 766 767 (A followup patch to deal with this was committed in r9469). 768 */ 769 /* .text */ 770 Bool text_present; 771 Addr text_avma; 772 Addr text_svma; 773 SizeT text_size; 774 PtrdiffT text_bias; 775 Addr text_debug_svma; 776 PtrdiffT text_debug_bias; 777 /* .data */ 778 Bool data_present; 779 Addr data_svma; 780 Addr data_avma; 781 SizeT data_size; 782 PtrdiffT data_bias; 783 Addr data_debug_svma; 784 PtrdiffT data_debug_bias; 785 /* .sdata */ 786 Bool sdata_present; 787 Addr sdata_svma; 788 Addr sdata_avma; 789 SizeT sdata_size; 790 PtrdiffT sdata_bias; 791 Addr sdata_debug_svma; 792 PtrdiffT sdata_debug_bias; 793 /* .rodata */ 794 Bool rodata_present; 795 Addr rodata_svma; 796 Addr rodata_avma; 797 SizeT rodata_size; 798 PtrdiffT rodata_bias; 799 Addr rodata_debug_svma; 800 PtrdiffT rodata_debug_bias; 801 /* .bss */ 802 Bool bss_present; 803 Addr bss_svma; 804 Addr bss_avma; 805 SizeT bss_size; 806 PtrdiffT bss_bias; 807 Addr bss_debug_svma; 808 PtrdiffT bss_debug_bias; 809 /* .sbss */ 810 Bool sbss_present; 811 Addr sbss_svma; 812 Addr sbss_avma; 813 SizeT sbss_size; 814 PtrdiffT sbss_bias; 815 Addr sbss_debug_svma; 816 PtrdiffT sbss_debug_bias; 817 /* .ARM.exidx -- sometimes present on arm32, containing unwind info. */ 818 Bool exidx_present; 819 Addr exidx_avma; 820 Addr exidx_svma; 821 SizeT exidx_size; 822 PtrdiffT exidx_bias; 823 /* .ARM.extab -- sometimes present on arm32, containing unwind info. */ 824 Bool extab_present; 825 Addr extab_avma; 826 Addr extab_svma; 827 SizeT extab_size; 828 PtrdiffT extab_bias; 829 /* .plt */ 830 Bool plt_present; 831 Addr plt_avma; 832 SizeT plt_size; 833 /* .got */ 834 Bool got_present; 835 Addr got_avma; 836 SizeT got_size; 837 /* .got.plt */ 838 Bool gotplt_present; 839 Addr gotplt_avma; 840 SizeT gotplt_size; 841 /* .opd -- needed on ppc64be-linux for finding symbols */ 842 Bool opd_present; 843 Addr opd_avma; 844 SizeT opd_size; 845 /* .ehframe -- needed on amd64-linux for stack unwinding. We might 846 see more than one, hence the arrays. */ 847 UInt n_ehframe; /* 0 .. N_EHFRAME_SECTS */ 848 Addr ehframe_avma[N_EHFRAME_SECTS]; 849 SizeT ehframe_size[N_EHFRAME_SECTS]; 850 851 /* Sorted tables of stuff we snarfed from the file. This is the 852 eventual product of reading the debug info. All this stuff 853 lives in VG_AR_DINFO. */ 854 855 /* An expandable array of symbols. */ 856 DiSym* symtab; 857 UWord symtab_used; 858 UWord symtab_size; 859 /* Two expandable arrays, storing locations and their filename/dirname. */ 860 DiLoc* loctab; 861 UInt sizeof_fndn_ix; /* Similar use as sizeof_cfsi_m_ix below. */ 862 void* loctab_fndn_ix; /* loctab[i] filename/dirname is identified by 863 loctab_fnindex_ix[i] (an index in di->fndnpool) 864 0 means filename/dirname unknown. 865 The void* is an UChar* or UShort* or UInt* 866 depending on sizeof_fndn_ix. */ 867 UWord loctab_used; 868 UWord loctab_size; 869 /* An expandable array of inlined fn info. 870 maxinl_codesz is the biggest inlined piece of code 871 in inltab (i.e. the max of 'addr_hi - addr_lo'. */ 872 DiInlLoc* inltab; 873 UWord inltab_used; 874 UWord inltab_size; 875 SizeT maxinl_codesz; 876 877 /* A set of expandable arrays to store CFI summary info records. 878 The machine specific information (i.e. the DiCfSI_m struct) 879 are stored in cfsi_m_pool, as these are highly duplicated. 880 The DiCfSI_m are allocated in cfsi_m_pool and identified using 881 a (we hope) small integer : often one byte is enough, sometimes 882 2 bytes are needed. 883 884 cfsi_base contains the bases of the code address ranges. 885 cfsi_size is the size of the cfsi_base array. 886 The elements cfsi_base[0] till cfsi_base[cfsi_used-1] are used. 887 Following elements are not used (yet). 888 889 For each base in cfsi_base, an index into cfsi_m_pool is stored 890 in cfsi_m_ix array. The size of cfsi_m_ix is equal to 891 cfsi_size*sizeof_cfsi_m_ix. The used portion of cfsi_m_ix is 892 cfsi_m_ix[0] till cfsi_m_ix[(cfsi_used-1)*sizeof_cfsi_m_ix]. 893 894 cfsi_base[i] gives the base address of a code range covered by 895 some CF Info. The corresponding CF Info is identified by an index 896 in cfsi_m_pool. The DiCfSI_m index in cfsi_m_pool corresponding to 897 cfsi_base[i] is given 898 by ((UChar*) cfsi_m_ix)[i] if sizeof_cfsi_m_ix == 1 899 by ((UShort*)cfsi_m_ix)[i] if sizeof_cfsi_m_ix == 2 900 by ((UInt*) cfsi_m_ix)[i] if sizeof_cfsi_m_ix == 4. 901 902 The end of the code range starting at cfsi_base[i] is given by 903 cfsi_base[i+1]-1 (or cfsi_maxavma for cfsi_base[cfsi_used-1]). 904 Some code ranges between cfsi_minavma and cfsi_maxavma might not 905 be covered by cfi information. Such not covered ranges are stored by 906 a base in cfsi_base and a corresponding 0 index in cfsi_m_ix. 907 908 A variable size representation has been chosen for the elements of 909 cfsi_m_ix as in many case, one byte is good enough. For big 910 objects, 2 bytes are needed. No object has yet been found where 911 4 bytes are needed (but the code is ready to handle this case). 912 Not covered ranges ('cfi holes') are stored explicitely in 913 cfsi_base/cfsi_m_ix as this is more memory efficient than storing 914 a length for each covered range : on x86 or amd64, we typically have 915 a hole every 8 covered ranges. On arm64, we have very few holes 916 (1 every 50 or 100 ranges). 917 918 The cfsi information is read and prepared in the cfsi_rd array. 919 Once all the information has been read, the cfsi_base and cfsi_m_ix 920 arrays will be filled in from cfsi_rd. cfsi_rd will then be freed. 921 This is all done by ML_(finish_CFSI_arrays). 922 923 Also includes summary address bounds, showing the min and max address 924 covered by any of the records, as an aid to fast searching. And, if the 925 records require any expression nodes, they are stored in 926 cfsi_exprs. */ 927 Addr* cfsi_base; 928 UInt sizeof_cfsi_m_ix; /* size in byte of indexes stored in cfsi_m_ix. */ 929 void* cfsi_m_ix; /* Each index occupies sizeof_cfsi_m_ix bytes. 930 The void* is an UChar* or UShort* or UInt* 931 depending on sizeof_cfsi_m_ix. */ 932 933 DiCfSI* cfsi_rd; /* Only used during reading, NULL once info is read. */ 934 935 UWord cfsi_used; 936 UWord cfsi_size; 937 938 DedupPoolAlloc *cfsi_m_pool; 939 Addr cfsi_minavma; 940 Addr cfsi_maxavma; 941 XArray* cfsi_exprs; /* XArray of CfiExpr */ 942 943 /* Optimized code under Wine x86: MSVC++ PDB FramePointerOmitted 944 data. Non-expandable array, hence .size == .used. */ 945 FPO_DATA* fpo; 946 UWord fpo_size; 947 Addr fpo_minavma; 948 Addr fpo_maxavma; 949 Addr fpo_base_avma; 950 951 /* Pool of strings -- the string table. Pointers 952 into this are stable (the memory is not reallocated). */ 953 DedupPoolAlloc *strpool; 954 955 /* Pool of FnDn -- filename and dirname. 956 Elements in the pool are allocated using VG_(allocFixedEltDedupPA). */ 957 DedupPoolAlloc *fndnpool; 958 959 /* Variable scope information, as harvested from Dwarf3 files. 960 961 In short it's an 962 963 array of (array of PC address ranges and variables) 964 965 The outer array indexes over scopes, with Entry 0 containing 966 information on variables which exist for any value of the program 967 counter (PC) -- that is, the outermost scope. Entries 1, 2, 3, 968 etc contain information on increasinly deeply nested variables. 969 970 Each inner array is an array of (an address range, and a set 971 of variables that are in scope over that address range). 972 973 The address ranges may not overlap. 974 975 Since Entry 0 in the outer array holds information on variables 976 that exist for any value of the PC (that is, global vars), it 977 follows that Entry 0's inner array can only have one address 978 range pair, one that covers the entire address space. 979 */ 980 XArray* /* of OSet of DiAddrRange */varinfo; 981 982 /* These are arrays of the relevant typed objects, held here 983 partially for the purposes of visiting each object exactly once 984 when we need to delete them. */ 985 986 /* An array of TyEnts. These are needed to make sense of any types 987 in the .varinfo. Also, when deleting this DebugInfo, we must 988 first traverse this array and throw away malloc'd stuff hanging 989 off it -- by calling ML_(TyEnt__make_EMPTY) on each entry. */ 990 XArray* /* of TyEnt */ admin_tyents; 991 992 /* An array of guarded DWARF3 expressions. */ 993 XArray* admin_gexprs; 994 995 /* Cached last rx mapping matched and returned by ML_(find_rx_mapping). 996 This helps performance a lot during ML_(addLineInfo) etc., which can 997 easily be invoked hundreds of thousands of times. */ 998 DebugInfoMapping* last_rx_map; 999 }; 1000 1001 /* --------------------- functions --------------------- */ 1002 1003 /* ------ Adding ------ */ 1004 1005 /* Add a symbol to si's symbol table. The contents of 'sym' are 1006 copied. It is assumed (and checked) that 'sym' only contains one 1007 name, so there is no auxiliary ::sec_names vector to duplicate. 1008 IOW, the copy is a shallow copy, and there are assertions in place 1009 to ensure that's OK. */ 1010 extern void ML_(addSym) ( struct _DebugInfo* di, DiSym* sym ); 1011 1012 /* Add a filename/dirname pair to a DebugInfo and returns the index 1013 in the fndnpool fixed pool. */ 1014 extern UInt ML_(addFnDn) (struct _DebugInfo* di, 1015 const HChar* filename, 1016 const HChar* dirname); /* NULL is allowable */ 1017 1018 /* Returns the filename of the fndn pair identified by fndn_ix. 1019 Returns "???" if fndn_ix is 0. */ 1020 extern const HChar* ML_(fndn_ix2filename) (const DebugInfo* di, 1021 UInt fndn_ix); 1022 1023 /* Returns the dirname of the fndn pair identified by fndn_ix. 1024 Returns "" if fndn_ix is 0 or fndn->dirname is NULL. */ 1025 extern const HChar* ML_(fndn_ix2dirname) (const DebugInfo* di, 1026 UInt fndn_ix); 1027 1028 /* Returns the fndn_ix for the LineInfo locno in di->loctab. 1029 0 if filename/dirname are unknown. */ 1030 extern UInt ML_(fndn_ix) (const DebugInfo* di, Word locno); 1031 1032 /* Add a line-number record to a DebugInfo. 1033 fndn_ix is an index in di->fndnpool, allocated using ML_(addFnDn). 1034 Give a 0 index for a unknown filename/dirname pair. */ 1035 extern 1036 void ML_(addLineInfo) ( struct _DebugInfo* di, 1037 UInt fndn_ix, 1038 Addr this, Addr next, Int lineno, Int entry); 1039 1040 /* Add a call inlined record to a DebugInfo. 1041 A call to the below means that inlinedfn code has been 1042 inlined, resulting in code from [addr_lo, addr_hi[. 1043 Note that addr_hi is excluded, i.e. is not part of the inlined code. 1044 fndn_ix and lineno identifies the location of the call that caused 1045 this inlining. 1046 fndn_ix is an index in di->fndnpool, allocated using ML_(addFnDn). 1047 Give a 0 index for an unknown filename/dirname pair. 1048 In case of nested inlining, a small level indicates the call 1049 is closer to main that a call with a higher level. */ 1050 extern 1051 void ML_(addInlInfo) ( struct _DebugInfo* di, 1052 Addr addr_lo, Addr addr_hi, 1053 const HChar* inlinedfn, 1054 UInt fndn_ix, 1055 Int lineno, UShort level); 1056 1057 /* Add a CFI summary record. The supplied DiCfSI_m is copied. */ 1058 extern void ML_(addDiCfSI) ( struct _DebugInfo* di, 1059 Addr base, UInt len, DiCfSI_m* cfsi_m ); 1060 1061 /* Given a position in the di->cfsi_base/cfsi_m_ix arrays, return 1062 the corresponding cfsi_m*. Return NULL if the position corresponds 1063 to a cfsi hole. */ 1064 DiCfSI_m* ML_(get_cfsi_m) (const DebugInfo* di, UInt pos); 1065 1066 /* Add a string to the string table of a DebugInfo. If len==-1, 1067 ML_(addStr) will itself measure the length of the string. */ 1068 extern const HChar* ML_(addStr) ( DebugInfo* di, const HChar* str, Int len ); 1069 1070 /* Add a string to the string table of a DebugInfo, by copying the 1071 string from the given DiCursor. Measures the length of the string 1072 itself. */ 1073 extern const HChar* ML_(addStrFromCursor)( DebugInfo* di, DiCursor c ); 1074 1075 extern void ML_(addVar)( struct _DebugInfo* di, 1076 Int level, 1077 Addr aMin, 1078 Addr aMax, 1079 const HChar* name, 1080 UWord typeR, /* a cuOff */ 1081 const GExpr* gexpr, 1082 const GExpr* fbGX, /* SHARED. */ 1083 UInt fndn_ix, /* where decl'd - may be zero */ 1084 Int lineNo, /* where decl'd - may be zero */ 1085 Bool show ); 1086 /* Note: fndn_ix identifies a filename/dirname pair similarly to 1087 ML_(addInlInfo) and ML_(addLineInfo). */ 1088 1089 /* Canonicalise the tables held by 'di', in preparation for use. Call 1090 this after finishing adding entries to these tables. */ 1091 extern void ML_(canonicaliseTables) ( struct _DebugInfo* di ); 1092 1093 /* Canonicalise the call-frame-info table held by 'di', in preparation 1094 for use. This is called by ML_(canonicaliseTables) but can also be 1095 called on it's own to sort just this table. */ 1096 extern void ML_(canonicaliseCFI) ( struct _DebugInfo* di ); 1097 1098 /* ML_(finish_CFSI_arrays) fills in the cfsi_base and cfsi_m_ix arrays 1099 from cfsi_rd array. cfsi_rd is then freed. */ 1100 extern void ML_(finish_CFSI_arrays) ( struct _DebugInfo* di ); 1101 1102 /* ------ Searching ------ */ 1103 1104 /* Find a symbol-table index containing the specified pointer, or -1 1105 if not found. Binary search. */ 1106 extern Word ML_(search_one_symtab) ( const DebugInfo* di, Addr ptr, 1107 Bool match_anywhere_in_sym, 1108 Bool findText ); 1109 1110 /* Find a location-table index containing the specified pointer, or -1 1111 if not found. Binary search. */ 1112 extern Word ML_(search_one_loctab) ( const DebugInfo* di, Addr ptr ); 1113 1114 /* Find a CFI-table index containing the specified pointer, or -1 if 1115 not found. Binary search. */ 1116 extern Word ML_(search_one_cfitab) ( const DebugInfo* di, Addr ptr ); 1117 1118 /* Find a FPO-table index containing the specified pointer, or -1 1119 if not found. Binary search. */ 1120 extern Word ML_(search_one_fpotab) ( const DebugInfo* di, Addr ptr ); 1121 1122 /* Helper function for the most often needed searching for an rx 1123 mapping containing the specified address range. The range must 1124 fall entirely within the mapping to be considered to be within it. 1125 Asserts if lo > hi; caller must ensure this doesn't happen. */ 1126 extern DebugInfoMapping* ML_(find_rx_mapping) ( DebugInfo* di, 1127 Addr lo, Addr hi ); 1128 1129 /* ------ Misc ------ */ 1130 1131 /* Show a non-fatal debug info reading error. Use VG_(core_panic) for 1132 fatal errors. 'serious' errors are always shown, not 'serious' ones 1133 are shown only at verbosity level 2 and above. */ 1134 extern 1135 void ML_(symerr) ( const DebugInfo* di, Bool serious, const HChar* msg ); 1136 1137 /* Print a symbol. */ 1138 extern void ML_(ppSym) ( Int idx, const DiSym* sym ); 1139 1140 /* Print a call-frame-info summary. */ 1141 extern void ML_(ppDiCfSI) ( const XArray* /* of CfiExpr */ exprs, 1142 Addr base, UInt len, 1143 const DiCfSI_m* si_m ); 1144 1145 1146 #define TRACE_SYMTAB_ENABLED (di->trace_symtab) 1147 #define TRACE_SYMTAB(format, args...) \ 1148 if (TRACE_SYMTAB_ENABLED) { VG_(printf)(format, ## args); } 1149 1150 1151 #endif /* ndef __PRIV_STORAGE_H */ 1152 1153 /*--------------------------------------------------------------------*/ 1154 /*--- end ---*/ 1155 /*--------------------------------------------------------------------*/ 1156