1 /* -*- mode: C; c-basic-offset: 3; -*- */ 2 3 /*--------------------------------------------------------------------*/ 4 /*--- Read DWARF3/4 ".debug_info" sections (DIE trees). ---*/ 5 /*--- readdwarf3.c ---*/ 6 /*--------------------------------------------------------------------*/ 7 8 /* 9 This file is part of Valgrind, a dynamic binary instrumentation 10 framework. 11 12 Copyright (C) 2008-2013 OpenWorks LLP 13 info (at) open-works.co.uk 14 15 This program is free software; you can redistribute it and/or 16 modify it under the terms of the GNU General Public License as 17 published by the Free Software Foundation; either version 2 of the 18 License, or (at your option) any later version. 19 20 This program is distributed in the hope that it will be useful, but 21 WITHOUT ANY WARRANTY; without even the implied warranty of 22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 23 General Public License for more details. 24 25 You should have received a copy of the GNU General Public License 26 along with this program; if not, write to the Free Software 27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 28 02111-1307, USA. 29 30 The GNU General Public License is contained in the file COPYING. 31 32 Neither the names of the U.S. Department of Energy nor the 33 University of California nor the names of its contributors may be 34 used to endorse or promote products derived from this software 35 without prior written permission. 36 */ 37 38 #if defined(VGO_linux) || defined(VGO_darwin) 39 40 /* REFERENCE (without which this code will not make much sense): 41 42 DWARF Debugging Information Format, Version 3, 43 dated 20 December 2005 (the "D3 spec"). 44 45 Available at http://www.dwarfstd.org/Dwarf3.pdf. There's also a 46 .doc (MS Word) version, but for some reason the section numbers 47 between the Word and PDF versions differ by 1 in the first digit. 48 All section references in this code are to the PDF version. 49 50 CURRENT HACKS: 51 52 DW_TAG_{const,volatile}_type no DW_AT_type is allowed; it is 53 assumed to mean "const void" or "volatile void" respectively. 54 GDB appears to interpret them like this, anyway. 55 56 In many cases it is important to know the svma of a CU (the "base 57 address of the CU", as the D3 spec calls it). There are some 58 situations in which the spec implies this value is unknown, but the 59 Dwarf3 produced by gcc-4.1 seems to assume is not unknown but 60 merely zero when not explicitly stated. So we too have to make 61 that assumption. 62 63 POTENTIAL BUG? Spotted 6 Sept 08. Why doesn't 64 unitary_range_list() bias the resulting range list in the same way 65 that its more general cousin, get_range_list(), does? I don't 66 know. 67 68 TODO, 2008 Feb 17: 69 70 get rid of cu_svma_known and document the assumed-zero svma hack. 71 72 ML_(sizeOfType): differentiate between zero sized types and types 73 for which the size is unknown. Is this important? I don't know. 74 75 DW_TAG_array_types: deal with explicit sizes (currently we compute 76 the size from the bounds and the element size, although that's 77 fragile, if the bounds incompletely specified, or completely 78 absent) 79 80 Document reason for difference (by 1) of stack preening depth in 81 parse_var_DIE vs parse_type_DIE. 82 83 Don't hand to ML_(addVars), vars whose locations are entirely in 84 registers (DW_OP_reg*). This is merely a space-saving 85 optimisation, as ML_(evaluate_Dwarf3_Expr) should handle these 86 expressions correctly, by failing to evaluate them and hence 87 effectively ignoring the variable with which they are associated. 88 89 Deal with DW_TAG_array_types which have element size != stride 90 91 In some cases, the info for a variable is split between two 92 different DIEs (generally a declarer and a definer). We punt on 93 these. Could do better here. 94 95 The 'data_bias' argument passed to the expression evaluator 96 (ML_(evaluate_Dwarf3_Expr)) should really be changed to a 97 MaybeUWord, to make it clear when we do vs don't know what it is 98 for the evaluation of an expression. At the moment zero is passed 99 for this parameter in the don't know case. That's a bit fragile 100 and obscure; using a MaybeUWord would be clearer. 101 102 POTENTIAL PERFORMANCE IMPROVEMENTS: 103 104 Currently, duplicate removal and all other queries for the type 105 entities array is done using cuOffset-based pointing, which 106 involves a binary search (VG_(lookupXA)) for each access. This is 107 wildly inefficient, although simple. It would be better to 108 translate all the cuOffset-based references (iow, all the "R" and 109 "Rs" fields in the TyEnts in 'tyents') to direct index numbers in 110 'tyents' right at the start of dedup_types(), and use direct 111 indexing (VG_(indexXA)) wherever possible after that. 112 113 cmp__XArrays_of_AddrRange is also a performance bottleneck. Move 114 VG_(indexXA) into pub_tool_xarray.h so it can be inlined at all use 115 points, and possibly also make an _UNCHECKED version which skips 116 the range checks in performance-critical situations such as this. 117 118 Handle interaction between read_DIE and parse_{var,type}_DIE 119 better. Currently read_DIE reads the entire DIE just to find where 120 the end is (and for debug printing), so that it can later reliably 121 move the cursor to the end regardless of what parse_{var,type}_DIE 122 do. This means many DIEs (most, even?) are read twice. It would 123 be smarter to make parse_{var,type}_DIE return a Bool indicating 124 whether or not they advanced the DIE cursor, and only if they 125 didn't should read_DIE itself read through the DIE. 126 127 ML_(addVar) and add_var_to_arange: quite a lot of DiAddrRanges have 128 zero variables in their .vars XArray. Rather than have an XArray 129 with zero elements (which uses 2 malloc'd blocks), allow the .vars 130 pointer to be NULL in this case. 131 132 More generally, reduce the amount of memory allocated and freed 133 while reading Dwarf3 type/variable information. Even modest (20MB) 134 objects cause this module to allocate and free hundreds of 135 thousands of small blocks, and ML_(arena_malloc) and its various 136 groupies always show up at the top of performance profiles. */ 137 138 #include "pub_core_basics.h" 139 #include "pub_core_debuginfo.h" 140 #include "pub_core_libcbase.h" 141 #include "pub_core_libcassert.h" 142 #include "pub_core_libcprint.h" 143 #include "pub_core_libcsetjmp.h" // setjmp facilities 144 #include "pub_core_hashtable.h" 145 #include "pub_core_options.h" 146 #include "pub_core_tooliface.h" /* VG_(needs) */ 147 #include "pub_core_xarray.h" 148 #include "pub_core_wordfm.h" 149 #include "priv_misc.h" /* dinfo_zalloc/free */ 150 #include "priv_image.h" 151 #include "priv_tytypes.h" 152 #include "priv_d3basics.h" 153 #include "priv_storage.h" 154 #include "priv_readdwarf3.h" /* self */ 155 156 157 /*------------------------------------------------------------*/ 158 /*--- ---*/ 159 /*--- Basic machinery for parsing DIEs. ---*/ 160 /*--- ---*/ 161 /*------------------------------------------------------------*/ 162 163 #define TRACE_D3(format, args...) \ 164 if (UNLIKELY(td3)) { VG_(printf)(format, ## args); } 165 #define TD3 (UNLIKELY(td3)) 166 167 #define D3_INVALID_CUOFF ((UWord)(-1UL)) 168 #define D3_FAKEVOID_CUOFF ((UWord)(-2UL)) 169 170 typedef 171 struct { 172 DiSlice sli; // to which this cursor applies 173 DiOffT sli_next; // offset in underlying DiImage; must be >= sli.ioff 174 void (*barf)( const HChar* ) __attribute__((noreturn)); 175 const HChar* barfstr; 176 } 177 Cursor; 178 179 static inline Bool is_sane_Cursor ( const Cursor* c ) { 180 if (!c) return False; 181 if (!c->barf) return False; 182 if (!c->barfstr) return False; 183 if (!ML_(sli_is_valid)(c->sli)) return False; 184 if (c->sli.ioff == DiOffT_INVALID) return False; 185 if (c->sli_next < c->sli.ioff) return False; 186 return True; 187 } 188 189 // Initialise a cursor from a DiSlice (ELF section, really) so as to 190 // start reading at offset |sli_initial_offset| from the start of the 191 // slice. 192 static void init_Cursor ( /*OUT*/Cursor* c, 193 DiSlice sli, 194 ULong sli_initial_offset, 195 __attribute__((noreturn)) void (*barf)(const HChar*), 196 const HChar* barfstr ) 197 { 198 vg_assert(c); 199 VG_(bzero_inline)(c, sizeof(*c)); 200 c->sli = sli; 201 c->sli_next = c->sli.ioff + sli_initial_offset; 202 c->barf = barf; 203 c->barfstr = barfstr; 204 vg_assert(is_sane_Cursor(c)); 205 } 206 207 static Bool is_at_end_Cursor ( const Cursor* c ) { 208 vg_assert(is_sane_Cursor(c)); 209 return c->sli_next >= c->sli.ioff + c->sli.szB; 210 } 211 212 static inline ULong get_position_of_Cursor ( const Cursor* c ) { 213 vg_assert(is_sane_Cursor(c)); 214 return c->sli_next - c->sli.ioff; 215 } 216 static inline void set_position_of_Cursor ( Cursor* c, ULong pos ) { 217 c->sli_next = c->sli.ioff + pos; 218 vg_assert(is_sane_Cursor(c)); 219 } 220 static inline void advance_position_of_Cursor ( Cursor* c, ULong delta ) { 221 c->sli_next += delta; 222 vg_assert(is_sane_Cursor(c)); 223 } 224 225 static /*signed*/Long get_remaining_length_Cursor ( const Cursor* c ) { 226 vg_assert(is_sane_Cursor(c)); 227 return c->sli.ioff + c->sli.szB - c->sli_next; 228 } 229 230 //static void* get_address_of_Cursor ( Cursor* c ) { 231 // vg_assert(is_sane_Cursor(c)); 232 // return &c->region_start_img[ c->region_next ]; 233 //} 234 235 static DiCursor get_DiCursor_from_Cursor ( const Cursor* c ) { 236 return mk_DiCursor(c->sli.img, c->sli_next); 237 } 238 239 /* FIXME: document assumptions on endianness for 240 get_UShort/UInt/ULong. */ 241 static inline UChar get_UChar ( Cursor* c ) { 242 UChar r; 243 vg_assert(is_sane_Cursor(c)); 244 if (c->sli_next + sizeof(UChar) > c->sli.ioff + c->sli.szB) { 245 c->barf(c->barfstr); 246 /*NOTREACHED*/ 247 vg_assert(0); 248 } 249 r = ML_(img_get_UChar)(c->sli.img, c->sli_next); 250 c->sli_next += sizeof(UChar); 251 return r; 252 } 253 static UShort get_UShort ( Cursor* c ) { 254 UShort r; 255 vg_assert(is_sane_Cursor(c)); 256 if (c->sli_next + sizeof(UShort) > c->sli.ioff + c->sli.szB) { 257 c->barf(c->barfstr); 258 /*NOTREACHED*/ 259 vg_assert(0); 260 } 261 r = ML_(img_get_UShort)(c->sli.img, c->sli_next); 262 c->sli_next += sizeof(UShort); 263 return r; 264 } 265 static UInt get_UInt ( Cursor* c ) { 266 UInt r; 267 vg_assert(is_sane_Cursor(c)); 268 if (c->sli_next + sizeof(UInt) > c->sli.ioff + c->sli.szB) { 269 c->barf(c->barfstr); 270 /*NOTREACHED*/ 271 vg_assert(0); 272 } 273 r = ML_(img_get_UInt)(c->sli.img, c->sli_next); 274 c->sli_next += sizeof(UInt); 275 return r; 276 } 277 static ULong get_ULong ( Cursor* c ) { 278 ULong r; 279 vg_assert(is_sane_Cursor(c)); 280 if (c->sli_next + sizeof(ULong) > c->sli.ioff + c->sli.szB) { 281 c->barf(c->barfstr); 282 /*NOTREACHED*/ 283 vg_assert(0); 284 } 285 r = ML_(img_get_ULong)(c->sli.img, c->sli_next); 286 c->sli_next += sizeof(ULong); 287 return r; 288 } 289 static ULong get_ULEB128 ( Cursor* c ) { 290 ULong result; 291 Int shift; 292 UChar byte; 293 /* unroll first iteration */ 294 byte = get_UChar( c ); 295 result = (ULong)(byte & 0x7f); 296 if (LIKELY(!(byte & 0x80))) return result; 297 shift = 7; 298 /* end unroll first iteration */ 299 do { 300 byte = get_UChar( c ); 301 result |= ((ULong)(byte & 0x7f)) << shift; 302 shift += 7; 303 } while (byte & 0x80); 304 return result; 305 } 306 static Long get_SLEB128 ( Cursor* c ) { 307 ULong result = 0; 308 Int shift = 0; 309 UChar byte; 310 do { 311 byte = get_UChar(c); 312 result |= ((ULong)(byte & 0x7f)) << shift; 313 shift += 7; 314 } while (byte & 0x80); 315 if (shift < 64 && (byte & 0x40)) 316 result |= -(1ULL << shift); 317 return result; 318 } 319 320 /* Assume 'c' points to the start of a string. Return a DiCursor of 321 whatever it points at, and advance it past the terminating zero. 322 This makes it safe for the caller to then copy the string with 323 ML_(addStr), since (w.r.t. image overruns) the process of advancing 324 past the terminating zero will already have "vetted" the string. */ 325 static DiCursor get_AsciiZ ( Cursor* c ) { 326 UChar uc; 327 DiCursor res = get_DiCursor_from_Cursor(c); 328 do { uc = get_UChar(c); } while (uc != 0); 329 return res; 330 } 331 332 static ULong peek_ULEB128 ( Cursor* c ) { 333 DiOffT here = c->sli_next; 334 ULong r = get_ULEB128( c ); 335 c->sli_next = here; 336 return r; 337 } 338 static UChar peek_UChar ( Cursor* c ) { 339 DiOffT here = c->sli_next; 340 UChar r = get_UChar( c ); 341 c->sli_next = here; 342 return r; 343 } 344 345 static ULong get_Dwarfish_UWord ( Cursor* c, Bool is_dw64 ) { 346 return is_dw64 ? get_ULong(c) : (ULong) get_UInt(c); 347 } 348 349 static UWord get_UWord ( Cursor* c ) { 350 vg_assert(sizeof(UWord) == sizeof(void*)); 351 if (sizeof(UWord) == 4) return get_UInt(c); 352 if (sizeof(UWord) == 8) return get_ULong(c); 353 vg_assert(0); 354 } 355 356 /* Read a DWARF3 'Initial Length' field */ 357 static ULong get_Initial_Length ( /*OUT*/Bool* is64, 358 Cursor* c, 359 const HChar* barfMsg ) 360 { 361 ULong w64; 362 UInt w32; 363 *is64 = False; 364 w32 = get_UInt( c ); 365 if (w32 >= 0xFFFFFFF0 && w32 < 0xFFFFFFFF) { 366 c->barf( barfMsg ); 367 } 368 else if (w32 == 0xFFFFFFFF) { 369 *is64 = True; 370 w64 = get_ULong( c ); 371 } else { 372 *is64 = False; 373 w64 = (ULong)w32; 374 } 375 return w64; 376 } 377 378 379 /*------------------------------------------------------------*/ 380 /*--- ---*/ 381 /*--- "CUConst" structure ---*/ 382 /*--- ---*/ 383 /*------------------------------------------------------------*/ 384 385 typedef 386 struct _name_form { 387 ULong at_name; // Dwarf Attribute name 388 ULong at_form; // Dwarf Attribute form 389 UInt skip_szB; // Nr of bytes skippable from here ... 390 UInt next_nf; // ... to reach this attr/form index in the g_abbv.nf 391 } name_form; 392 /* skip_szB and next_nf are used to optimise the skipping of uninteresting DIEs. 393 Each name_form maintains how many (fixed) nr of bytes can be skipped from 394 the beginning of this form till the next attr/form to look at. 395 The next form to look can be: 396 an 'interesting' attr/form to read while skipping a DIE 397 (currently, this is only DW_AT_sibling) 398 or 399 a variable length form which must be read to be skipped. 400 For a variable length form, the skip_szB will be equal to VARSZ_FORM. 401 402 Note: this technique could also be used to speed up the parsing 403 of DIEs : for each parser kind, we could have the nr of bytes 404 to skip to directly reach the interesting form(s) for the parser. */ 405 406 typedef 407 struct _g_abbv { 408 struct _g_abbv *next; // read/write by hash table. 409 UWord abbv_code; // key, read by hash table 410 ULong atag; 411 ULong has_children; 412 name_form nf[0]; 413 /* Variable-length array of name/form pairs, terminated 414 by a 0/0 pair. 415 The skip_szB/next_nf allows to skip efficiently a DIE 416 described by this g_abbv; */ 417 } g_abbv; 418 419 /* Holds information that is constant through the parsing of a 420 Compilation Unit. This is basically plumbed through to 421 everywhere. */ 422 typedef 423 struct { 424 /* Call here if anything goes wrong */ 425 void (*barf)( const HChar* ) __attribute__((noreturn)); 426 /* Is this 64-bit DWARF ? */ 427 Bool is_dw64; 428 /* Which DWARF version ? (2, 3 or 4) */ 429 UShort version; 430 /* Length of this Compilation Unit, as stated in the 431 .unit_length :: InitialLength field of the CU Header. 432 However, this size (as specified by the D3 spec) does not 433 include the size of the .unit_length field itself, which is 434 either 4 or 12 bytes (32-bit or 64-bit Dwarf3). That value 435 can be obtained through the expression ".is_dw64 ? 12 : 4". */ 436 ULong unit_length; 437 /* Offset of start of this unit in .debug_info */ 438 UWord cu_start_offset; 439 /* SVMA for this CU. In the D3 spec, is known as the "base 440 address of the compilation unit (last para sec 3.1.1). 441 Needed for (amongst things) interpretation of location-list 442 values. */ 443 Addr cu_svma; 444 Bool cu_svma_known; 445 446 /* The debug_abbreviations table to be used for this Unit */ 447 //UChar* debug_abbv; 448 /* Upper bound on size thereof (an overestimate, in general) */ 449 //UWord debug_abbv_maxszB; 450 /* A bounded area of the image, to be used as the 451 debug_abbreviations table tobe used for this Unit. */ 452 DiSlice debug_abbv; 453 454 /* Image information for various sections. */ 455 DiSlice escn_debug_str; 456 DiSlice escn_debug_ranges; 457 DiSlice escn_debug_loc; 458 DiSlice escn_debug_line; 459 DiSlice escn_debug_info; 460 DiSlice escn_debug_types; 461 DiSlice escn_debug_info_alt; 462 DiSlice escn_debug_str_alt; 463 /* How much to add to .debug_types resp. alternate .debug_info offsets 464 in cook_die*. */ 465 UWord types_cuOff_bias; 466 UWord alt_cuOff_bias; 467 /* --- Needed so we can add stuff to the string table. --- */ 468 struct _DebugInfo* di; 469 /* --- a hash table of g_abbv (i.e. parsed abbreviations) --- */ 470 VgHashTable *ht_abbvs; 471 472 /* True if this came from .debug_types; otherwise it came from 473 .debug_info. */ 474 Bool is_type_unit; 475 /* For a unit coming from .debug_types, these hold the TU's type 476 signature and the uncooked DIE offset of the TU's signatured 477 type. For a unit coming from .debug_info, these are unused. */ 478 ULong type_signature; 479 ULong type_offset; 480 481 /* Signatured type hash; computed once and then shared by all 482 CUs. */ 483 VgHashTable *signature_types; 484 485 /* True if this came from alternate .debug_info; otherwise 486 it came from normal .debug_info or .debug_types. */ 487 Bool is_alt_info; 488 } 489 CUConst; 490 491 492 /* Return the cooked value of DIE depending on whether CC represents a 493 .debug_types unit. To cook a DIE, we pretend that the .debug_info, 494 .debug_types and optional alternate .debug_info sections form 495 a contiguous whole, so that DIEs coming from .debug_types are numbered 496 starting at the end of .debug_info and DIEs coming from alternate 497 .debug_info are numbered starting at the end of .debug_types. */ 498 static UWord cook_die( const CUConst* cc, UWord die ) 499 { 500 if (cc->is_type_unit) 501 die += cc->types_cuOff_bias; 502 else if (cc->is_alt_info) 503 die += cc->alt_cuOff_bias; 504 return die; 505 } 506 507 /* Like cook_die, but understand that DIEs coming from a 508 DW_FORM_ref_sig8 reference are already cooked. Also, handle 509 DW_FORM_GNU_ref_alt from within primary .debug_info or .debug_types 510 as reference to alternate .debug_info. */ 511 static UWord cook_die_using_form( const CUConst *cc, UWord die, DW_FORM form) 512 { 513 if (form == DW_FORM_ref_sig8) 514 return die; 515 if (form == DW_FORM_GNU_ref_alt) 516 return die + cc->alt_cuOff_bias; 517 return cook_die( cc, die ); 518 } 519 520 /* Return the uncooked offset of DIE and set *TYPE_FLAG to true if the DIE 521 came from the .debug_types section and *ALT_FLAG to true if the DIE 522 came from alternate .debug_info section. */ 523 static UWord uncook_die( const CUConst *cc, UWord die, /*OUT*/Bool *type_flag, 524 Bool *alt_flag ) 525 { 526 *alt_flag = False; 527 *type_flag = False; 528 /* The use of escn_debug_{info,types}.szB seems safe to me even if 529 escn_debug_{info,types} are DiSlice_INVALID (meaning the 530 sections were not found), because DiSlice_INVALID.szB is always 531 zero. That said, it seems unlikely we'd ever get here if 532 .debug_info or .debug_types were missing. */ 533 if (die >= cc->escn_debug_info.szB) { 534 if (die >= cc->escn_debug_info.szB + cc->escn_debug_types.szB) { 535 *alt_flag = True; 536 die -= cc->escn_debug_info.szB + cc->escn_debug_types.szB; 537 } else { 538 *type_flag = True; 539 die -= cc->escn_debug_info.szB; 540 } 541 } 542 return die; 543 } 544 545 /*------------------------------------------------------------*/ 546 /*--- ---*/ 547 /*--- Helper functions for Guarded Expressions ---*/ 548 /*--- ---*/ 549 /*------------------------------------------------------------*/ 550 551 /* Parse the location list starting at img-offset 'debug_loc_offset' 552 in .debug_loc. Results are biased with 'svma_of_referencing_CU' 553 and so I believe are correct SVMAs for the object as a whole. This 554 function allocates the UChar*, and the caller must deallocate it. 555 The resulting block is in so-called Guarded-Expression format. 556 557 Guarded-Expression format is similar but not identical to the DWARF3 558 location-list format. The format of each returned block is: 559 560 UChar biasMe; 561 UChar isEnd; 562 followed by zero or more of 563 564 (Addr aMin; Addr aMax; UShort nbytes; ..bytes..; UChar isEnd) 565 566 '..bytes..' is an standard DWARF3 location expression which is 567 valid when aMin <= pc <= aMax (possibly after suitable biasing). 568 569 The number of bytes in '..bytes..' is nbytes. 570 571 The end of the sequence is marked by an isEnd == 1 value. All 572 previous isEnd values must be zero. 573 574 biasMe is 1 if the aMin/aMax fields need this DebugInfo's 575 text_bias added before use, and 0 if the GX is this is not 576 necessary (is ready to go). 577 578 Hence the block can be quickly parsed and is self-describing. Note 579 that aMax is 1 less than the corresponding value in a DWARF3 580 location list. Zero length ranges, with aMax == aMin-1, are not 581 allowed. 582 */ 583 /* 2008-sept-12: moved ML_(pp_GX) from here to d3basics.c, where 584 it more logically belongs. */ 585 586 587 /* Apply a text bias to a GX. */ 588 static void bias_GX ( /*MOD*/GExpr* gx, const DebugInfo* di ) 589 { 590 UShort nbytes; 591 UChar* p = &gx->payload[0]; 592 UChar* pA; 593 UChar uc; 594 uc = *p++; /*biasMe*/ 595 if (uc == 0) 596 return; 597 vg_assert(uc == 1); 598 p[-1] = 0; /* mark it as done */ 599 while (True) { 600 uc = *p++; 601 if (uc == 1) 602 break; /*isEnd*/ 603 vg_assert(uc == 0); 604 /* t-bias aMin */ 605 pA = (UChar*)p; 606 ML_(write_Addr)(pA, ML_(read_Addr)(pA) + di->text_debug_bias); 607 p += sizeof(Addr); 608 /* t-bias aMax */ 609 pA = (UChar*)p; 610 ML_(write_Addr)(pA, ML_(read_Addr)(pA) + di->text_debug_bias); 611 p += sizeof(Addr); 612 /* nbytes, and actual expression */ 613 nbytes = ML_(read_UShort)(p); p += sizeof(UShort); 614 p += nbytes; 615 } 616 } 617 618 __attribute__((noinline)) 619 static GExpr* make_singleton_GX ( DiCursor block, ULong nbytes ) 620 { 621 SizeT bytesReqd; 622 GExpr* gx; 623 UChar *p, *pstart; 624 625 vg_assert(sizeof(UWord) == sizeof(Addr)); 626 vg_assert(nbytes <= 0xFFFF); /* else we overflow the nbytes field */ 627 bytesReqd 628 = sizeof(UChar) /*biasMe*/ + sizeof(UChar) /*!isEnd*/ 629 + sizeof(UWord) /*aMin*/ + sizeof(UWord) /*aMax*/ 630 + sizeof(UShort) /*nbytes*/ + (SizeT)nbytes 631 + sizeof(UChar); /*isEnd*/ 632 633 gx = ML_(dinfo_zalloc)( "di.readdwarf3.msGX.1", 634 sizeof(GExpr) + bytesReqd ); 635 636 p = pstart = &gx->payload[0]; 637 638 p = ML_(write_UChar)(p, 0); /*biasMe*/ 639 p = ML_(write_UChar)(p, 0); /*!isEnd*/ 640 p = ML_(write_Addr)(p, 0); /*aMin*/ 641 p = ML_(write_Addr)(p, ~0); /*aMax*/ 642 p = ML_(write_UShort)(p, nbytes); /*nbytes*/ 643 ML_(cur_read_get)(p, block, nbytes); p += nbytes; 644 p = ML_(write_UChar)(p, 1); /*isEnd*/ 645 646 vg_assert( (SizeT)(p - pstart) == bytesReqd); 647 vg_assert( &gx->payload[bytesReqd] 648 == ((UChar*)gx) + sizeof(GExpr) + bytesReqd ); 649 650 return gx; 651 } 652 653 __attribute__((noinline)) 654 static GExpr* make_general_GX ( const CUConst* cc, 655 Bool td3, 656 ULong debug_loc_offset, 657 Addr svma_of_referencing_CU ) 658 { 659 Addr base; 660 Cursor loc; 661 XArray* xa; /* XArray of UChar */ 662 GExpr* gx; 663 Word nbytes; 664 665 vg_assert(sizeof(UWord) == sizeof(Addr)); 666 if (!ML_(sli_is_valid)(cc->escn_debug_loc) || cc->escn_debug_loc.szB == 0) 667 cc->barf("make_general_GX: .debug_loc is empty/missing"); 668 669 init_Cursor( &loc, cc->escn_debug_loc, 0, cc->barf, 670 "Overrun whilst reading .debug_loc section(2)" ); 671 set_position_of_Cursor( &loc, debug_loc_offset ); 672 673 TRACE_D3("make_general_GX (.debug_loc_offset = %llu, ioff = %llu) {\n", 674 debug_loc_offset, (ULong)get_DiCursor_from_Cursor(&loc).ioff ); 675 676 /* Who frees this xa? It is freed before this fn exits. */ 677 xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.mgGX.1", 678 ML_(dinfo_free), 679 sizeof(UChar) ); 680 681 { UChar c = 1; /*biasMe*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); } 682 683 base = 0; 684 while (True) { 685 Bool acquire; 686 UWord len; 687 /* Read a (host-)word pair. This is something of a hack since 688 the word size to read is really dictated by the ELF file; 689 however, we assume we're reading a file with the same 690 word-sizeness as the host. Reasonably enough. */ 691 UWord w1 = get_UWord( &loc ); 692 UWord w2 = get_UWord( &loc ); 693 694 TRACE_D3(" %08lx %08lx\n", w1, w2); 695 if (w1 == 0 && w2 == 0) 696 break; /* end of list */ 697 698 if (w1 == -1UL) { 699 /* new value for 'base' */ 700 base = w2; 701 continue; 702 } 703 704 /* else a location expression follows */ 705 /* else enumerate [w1+base, w2+base) */ 706 /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc" 707 (sec 2.17.2) */ 708 if (w1 > w2) { 709 TRACE_D3("negative range is for .debug_loc expr at " 710 "file offset %llu\n", 711 debug_loc_offset); 712 cc->barf( "negative range in .debug_loc section" ); 713 } 714 715 /* ignore zero length ranges */ 716 acquire = w1 < w2; 717 len = (UWord)get_UShort( &loc ); 718 719 if (acquire) { 720 UWord w; 721 UShort s; 722 UChar c; 723 c = 0; /* !isEnd*/ 724 VG_(addBytesToXA)( xa, &c, sizeof(c) ); 725 w = w1 + base + svma_of_referencing_CU; 726 VG_(addBytesToXA)( xa, &w, sizeof(w) ); 727 w = w2 -1 + base + svma_of_referencing_CU; 728 VG_(addBytesToXA)( xa, &w, sizeof(w) ); 729 s = (UShort)len; 730 VG_(addBytesToXA)( xa, &s, sizeof(s) ); 731 } 732 733 while (len > 0) { 734 UChar byte = get_UChar( &loc ); 735 TRACE_D3("%02x", (UInt)byte); 736 if (acquire) 737 VG_(addBytesToXA)( xa, &byte, 1 ); 738 len--; 739 } 740 TRACE_D3("\n"); 741 } 742 743 { UChar c = 1; /*isEnd*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); } 744 745 nbytes = VG_(sizeXA)( xa ); 746 vg_assert(nbytes >= 1); 747 748 gx = ML_(dinfo_zalloc)( "di.readdwarf3.mgGX.2", sizeof(GExpr) + nbytes ); 749 VG_(memcpy)( &gx->payload[0], (UChar*)VG_(indexXA)(xa,0), nbytes ); 750 vg_assert( &gx->payload[nbytes] 751 == ((UChar*)gx) + sizeof(GExpr) + nbytes ); 752 753 VG_(deleteXA)( xa ); 754 755 TRACE_D3("}\n"); 756 757 return gx; 758 } 759 760 761 /*------------------------------------------------------------*/ 762 /*--- ---*/ 763 /*--- Helper functions for range lists and CU headers ---*/ 764 /*--- ---*/ 765 /*------------------------------------------------------------*/ 766 767 /* Denotes an address range. Both aMin and aMax are included in the 768 range; hence a complete range is (0, ~0) and an empty range is any 769 (X, X-1) for X > 0.*/ 770 typedef 771 struct { Addr aMin; Addr aMax; } 772 AddrRange; 773 774 775 /* Generate an arbitrary structural total ordering on 776 XArray* of AddrRange. */ 777 static Word cmp__XArrays_of_AddrRange ( const XArray* rngs1, 778 const XArray* rngs2 ) 779 { 780 Word n1, n2, i; 781 vg_assert(rngs1 && rngs2); 782 n1 = VG_(sizeXA)( rngs1 ); 783 n2 = VG_(sizeXA)( rngs2 ); 784 if (n1 < n2) return -1; 785 if (n1 > n2) return 1; 786 for (i = 0; i < n1; i++) { 787 AddrRange* rng1 = (AddrRange*)VG_(indexXA)( rngs1, i ); 788 AddrRange* rng2 = (AddrRange*)VG_(indexXA)( rngs2, i ); 789 if (rng1->aMin < rng2->aMin) return -1; 790 if (rng1->aMin > rng2->aMin) return 1; 791 if (rng1->aMax < rng2->aMax) return -1; 792 if (rng1->aMax > rng2->aMax) return 1; 793 } 794 return 0; 795 } 796 797 798 __attribute__((noinline)) 799 static XArray* /* of AddrRange */ empty_range_list ( void ) 800 { 801 XArray* xa; /* XArray of AddrRange */ 802 /* Who frees this xa? varstack_preen() does. */ 803 xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.erl.1", 804 ML_(dinfo_free), 805 sizeof(AddrRange) ); 806 return xa; 807 } 808 809 810 __attribute__((noinline)) 811 static XArray* unitary_range_list ( Addr aMin, Addr aMax ) 812 { 813 XArray* xa; 814 AddrRange pair; 815 vg_assert(aMin <= aMax); 816 /* Who frees this xa? varstack_preen() does. */ 817 xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.url.1", 818 ML_(dinfo_free), 819 sizeof(AddrRange) ); 820 pair.aMin = aMin; 821 pair.aMax = aMax; 822 VG_(addToXA)( xa, &pair ); 823 return xa; 824 } 825 826 827 /* Enumerate the address ranges starting at img-offset 828 'debug_ranges_offset' in .debug_ranges. Results are biased with 829 'svma_of_referencing_CU' and so I believe are correct SVMAs for the 830 object as a whole. This function allocates the XArray, and the 831 caller must deallocate it. */ 832 __attribute__((noinline)) 833 static XArray* /* of AddrRange */ 834 get_range_list ( const CUConst* cc, 835 Bool td3, 836 UWord debug_ranges_offset, 837 Addr svma_of_referencing_CU ) 838 { 839 Addr base; 840 Cursor ranges; 841 XArray* xa; /* XArray of AddrRange */ 842 AddrRange pair; 843 844 if (!ML_(sli_is_valid)(cc->escn_debug_ranges) 845 || cc->escn_debug_ranges.szB == 0) 846 cc->barf("get_range_list: .debug_ranges is empty/missing"); 847 848 init_Cursor( &ranges, cc->escn_debug_ranges, 0, cc->barf, 849 "Overrun whilst reading .debug_ranges section(2)" ); 850 set_position_of_Cursor( &ranges, debug_ranges_offset ); 851 852 /* Who frees this xa? varstack_preen() does. */ 853 xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.grl.1", ML_(dinfo_free), 854 sizeof(AddrRange) ); 855 base = 0; 856 while (True) { 857 /* Read a (host-)word pair. This is something of a hack since 858 the word size to read is really dictated by the ELF file; 859 however, we assume we're reading a file with the same 860 word-sizeness as the host. Reasonably enough. */ 861 UWord w1 = get_UWord( &ranges ); 862 UWord w2 = get_UWord( &ranges ); 863 864 if (w1 == 0 && w2 == 0) 865 break; /* end of list. */ 866 867 if (w1 == -1UL) { 868 /* new value for 'base' */ 869 base = w2; 870 continue; 871 } 872 873 /* else enumerate [w1+base, w2+base) */ 874 /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc" 875 (sec 2.17.2) */ 876 if (w1 > w2) 877 cc->barf( "negative range in .debug_ranges section" ); 878 if (w1 < w2) { 879 pair.aMin = w1 + base + svma_of_referencing_CU; 880 pair.aMax = w2 - 1 + base + svma_of_referencing_CU; 881 vg_assert(pair.aMin <= pair.aMax); 882 VG_(addToXA)( xa, &pair ); 883 } 884 } 885 return xa; 886 } 887 888 #define VARSZ_FORM 0xffffffff 889 static UInt get_Form_szB (const CUConst* cc, DW_FORM form ); 890 891 /* Initialises the hash table of abbreviations. 892 We do a single scan of the abbv slice to parse and 893 build all abbreviations, for the following reasons: 894 * all or most abbreviations will be needed in any case 895 (at least for var-info reading). 896 * re-reading each time an abbreviation causes a lot of calls 897 to get_ULEB128. 898 * a CU should not have many abbreviations. */ 899 static void init_ht_abbvs (CUConst* cc, 900 Bool td3) 901 { 902 Cursor c; 903 g_abbv *ta; // temporary abbreviation, reallocated if needed. 904 UInt ta_nf_maxE; // max nr of pairs in ta.nf[], doubled when reallocated. 905 UInt ta_nf_n; // nr of pairs in ta->nf that are initialised. 906 g_abbv *ht_ta; // abbv to insert in hash table. 907 Int i; 908 909 #define SZ_G_ABBV(_nf_szE) (sizeof(g_abbv) + _nf_szE * sizeof(name_form)) 910 911 ta_nf_maxE = 10; // starting with enough for 9 pairs+terminating pair. 912 ta = ML_(dinfo_zalloc) ("di.readdwarf3.ht_ta_nf", SZ_G_ABBV(ta_nf_maxE)); 913 cc->ht_abbvs = VG_(HT_construct) ("di.readdwarf3.ht_abbvs"); 914 915 init_Cursor( &c, cc->debug_abbv, 0, cc->barf, 916 "Overrun whilst parsing .debug_abbrev section(2)" ); 917 while (True) { 918 ta->abbv_code = get_ULEB128( &c ); 919 if (ta->abbv_code == 0) break; /* end of the table */ 920 921 ta->atag = get_ULEB128( &c ); 922 ta->has_children = get_UChar( &c ); 923 ta_nf_n = 0; 924 while (True) { 925 if (ta_nf_n >= ta_nf_maxE) { 926 g_abbv *old_ta = ta; 927 ta = ML_(dinfo_zalloc) ("di.readdwarf3.ht_ta_nf", 928 SZ_G_ABBV(2 * ta_nf_maxE)); 929 ta_nf_maxE = 2 * ta_nf_maxE; 930 VG_(memcpy) (ta, old_ta, SZ_G_ABBV(ta_nf_n)); 931 ML_(dinfo_free) (old_ta); 932 } 933 ta->nf[ta_nf_n].at_name = get_ULEB128( &c ); 934 ta->nf[ta_nf_n].at_form = get_ULEB128( &c ); 935 if (ta->nf[ta_nf_n].at_name == 0 && ta->nf[ta_nf_n].at_form == 0) { 936 ta_nf_n++; 937 break; 938 } 939 ta_nf_n++; 940 } 941 942 // Initialises the skip_szB/next_nf elements : an element at position 943 // i must contain the sum of its own size + the sizes of all elements 944 // following i till either the next variable size element, the next 945 // sibling element or the end of the DIE. 946 ta->nf[ta_nf_n - 1].skip_szB = 0; 947 ta->nf[ta_nf_n - 1].next_nf = 0; 948 for (i = ta_nf_n - 2; i >= 0; i--) { 949 const UInt form_szB = get_Form_szB (cc, (DW_FORM)ta->nf[i].at_form); 950 951 if (ta->nf[i+1].at_name == DW_AT_sibling 952 || ta->nf[i+1].skip_szB == VARSZ_FORM) { 953 ta->nf[i].skip_szB = form_szB; 954 ta->nf[i].next_nf = i+1; 955 } else if (form_szB == VARSZ_FORM) { 956 ta->nf[i].skip_szB = form_szB; 957 ta->nf[i].next_nf = i+1; 958 } else { 959 ta->nf[i].skip_szB = ta->nf[i+1].skip_szB + form_szB; 960 ta->nf[i].next_nf = ta->nf[i+1].next_nf; 961 } 962 } 963 964 ht_ta = ML_(dinfo_zalloc) ("di.readdwarf3.ht_ta", SZ_G_ABBV(ta_nf_n)); 965 VG_(memcpy) (ht_ta, ta, SZ_G_ABBV(ta_nf_n)); 966 VG_(HT_add_node) ( cc->ht_abbvs, ht_ta ); 967 if (TD3) { 968 TRACE_D3(" Adding abbv_code %llu TAG %s [%s] nf %d ", 969 (ULong) ht_ta->abbv_code, ML_(pp_DW_TAG)(ht_ta->atag), 970 ML_(pp_DW_children)(ht_ta->has_children), 971 ta_nf_n); 972 TRACE_D3(" "); 973 for (i = 0; i < ta_nf_n; i++) 974 TRACE_D3("[%u,%u] ", ta->nf[i].skip_szB, ta->nf[i].next_nf); 975 TRACE_D3("\n"); 976 } 977 } 978 979 ML_(dinfo_free) (ta); 980 #undef SZ_G_ABBV 981 } 982 983 static g_abbv* get_abbv (const CUConst* cc, ULong abbv_code) 984 { 985 g_abbv *abbv; 986 987 abbv = VG_(HT_lookup) (cc->ht_abbvs, abbv_code); 988 if (!abbv) 989 cc->barf ("abbv_code not found in ht_abbvs table"); 990 return abbv; 991 } 992 993 /* Free the memory allocated in CUConst. */ 994 static void clear_CUConst (CUConst* cc) 995 { 996 VG_(HT_destruct) ( cc->ht_abbvs, ML_(dinfo_free)); 997 cc->ht_abbvs = NULL; 998 } 999 1000 /* Parse the Compilation Unit header indicated at 'c' and 1001 initialise 'cc' accordingly. */ 1002 static __attribute__((noinline)) 1003 void parse_CU_Header ( /*OUT*/CUConst* cc, 1004 Bool td3, 1005 Cursor* c, 1006 DiSlice escn_debug_abbv, 1007 Bool type_unit, 1008 Bool alt_info ) 1009 { 1010 UChar address_size; 1011 ULong debug_abbrev_offset; 1012 1013 VG_(memset)(cc, 0, sizeof(*cc)); 1014 vg_assert(c && c->barf); 1015 cc->barf = c->barf; 1016 1017 /* initial_length field */ 1018 cc->unit_length 1019 = get_Initial_Length( &cc->is_dw64, c, 1020 "parse_CU_Header: invalid initial-length field" ); 1021 1022 TRACE_D3(" Length: %lld\n", cc->unit_length ); 1023 1024 /* version */ 1025 cc->version = get_UShort( c ); 1026 if (cc->version != 2 && cc->version != 3 && cc->version != 4) 1027 cc->barf( "parse_CU_Header: is neither DWARF2 nor DWARF3 nor DWARF4" ); 1028 TRACE_D3(" Version: %d\n", (Int)cc->version ); 1029 1030 /* debug_abbrev_offset */ 1031 debug_abbrev_offset = get_Dwarfish_UWord( c, cc->is_dw64 ); 1032 if (debug_abbrev_offset >= escn_debug_abbv.szB) 1033 cc->barf( "parse_CU_Header: invalid debug_abbrev_offset" ); 1034 TRACE_D3(" Abbrev Offset: %lld\n", debug_abbrev_offset ); 1035 1036 /* address size. If this isn't equal to the host word size, just 1037 give up. This makes it safe to assume elsewhere that 1038 DW_FORM_addr and DW_FORM_ref_addr can be treated as a host 1039 word. */ 1040 address_size = get_UChar( c ); 1041 if (address_size != sizeof(void*)) 1042 cc->barf( "parse_CU_Header: invalid address_size" ); 1043 TRACE_D3(" Pointer Size: %d\n", (Int)address_size ); 1044 1045 cc->is_type_unit = type_unit; 1046 cc->is_alt_info = alt_info; 1047 1048 if (type_unit) { 1049 cc->type_signature = get_ULong( c ); 1050 cc->type_offset = get_Dwarfish_UWord( c, cc->is_dw64 ); 1051 } 1052 1053 /* Set up cc->debug_abbv to point to the relevant table for this 1054 CU. Set its .szB so that at least we can't read off the end of 1055 the debug_abbrev section -- potentially (and quite likely) too 1056 big, if this isn't the last table in the section, but at least 1057 it's safe. 1058 1059 This amounts to taking debug_abbv_escn and moving the start 1060 position along by debug_abbrev_offset bytes, hence forming a 1061 smaller DiSlice which has the same end point. Since we checked 1062 just above that debug_abbrev_offset is less than the size of 1063 debug_abbv_escn, this should leave us with a nonempty slice. */ 1064 vg_assert(debug_abbrev_offset < escn_debug_abbv.szB); 1065 cc->debug_abbv = escn_debug_abbv; 1066 cc->debug_abbv.ioff += debug_abbrev_offset; 1067 cc->debug_abbv.szB -= debug_abbrev_offset; 1068 1069 init_ht_abbvs(cc, td3); 1070 } 1071 1072 /* This represents a single signatured type. It maps a type signature 1073 (a ULong) to a cooked DIE offset. Objects of this type are stored 1074 in the type signature hash table. */ 1075 typedef 1076 struct D3SignatureType { 1077 struct D3SignatureType *next; 1078 UWord data; 1079 ULong type_signature; 1080 UWord die; 1081 } 1082 D3SignatureType; 1083 1084 /* Record a signatured type in the hash table. */ 1085 static void record_signatured_type ( VgHashTable *tab, 1086 ULong type_signature, 1087 UWord die ) 1088 { 1089 D3SignatureType *dstype = ML_(dinfo_zalloc) ( "di.readdwarf3.sigtype", 1090 sizeof(D3SignatureType) ); 1091 dstype->data = (UWord) type_signature; 1092 dstype->type_signature = type_signature; 1093 dstype->die = die; 1094 VG_(HT_add_node) ( tab, dstype ); 1095 } 1096 1097 /* Given a type signature hash table and a type signature, return the 1098 cooked DIE offset of the type. If the type cannot be found, call 1099 BARF. */ 1100 static UWord lookup_signatured_type ( const VgHashTable *tab, 1101 ULong type_signature, 1102 void (*barf)( const HChar* ) __attribute__((noreturn)) ) 1103 { 1104 D3SignatureType *dstype = VG_(HT_lookup) ( tab, (UWord) type_signature ); 1105 /* This may be unwarranted chumminess with the hash table 1106 implementation. */ 1107 while ( dstype != NULL && dstype->type_signature != type_signature) 1108 dstype = dstype->next; 1109 if (dstype == NULL) { 1110 barf("lookup_signatured_type: could not find signatured type"); 1111 /*NOTREACHED*/ 1112 vg_assert(0); 1113 } 1114 return dstype->die; 1115 } 1116 1117 1118 /* Represents Form data. If szB is 1/2/4/8 then the result is in the 1119 lowest 1/2/4/8 bytes of u.val. If szB is zero or negative then the 1120 result is an image section beginning at u.cur and with size -szB. 1121 No other szB values are allowed. */ 1122 typedef 1123 struct { 1124 Long szB; // 1, 2, 4, 8 or non-positive values only. 1125 union { ULong val; DiCursor cur; } u; 1126 } 1127 FormContents; 1128 1129 /* From 'c', get the Form data into 'cts'. Either it gets a 1/2/4/8 1130 byte scalar value, or (a reference to) zero or more bytes starting 1131 at a DiCursor.*/ 1132 static 1133 void get_Form_contents ( /*OUT*/FormContents* cts, 1134 const CUConst* cc, Cursor* c, 1135 Bool td3, DW_FORM form ) 1136 { 1137 VG_(bzero_inline)(cts, sizeof(*cts)); 1138 // !!! keep switch in sync with get_Form_szB. The nr of characters read below 1139 // must be computed similarly in get_Form_szB. 1140 // The consistency is verified in trace_DIE. 1141 switch (form) { 1142 case DW_FORM_data1: 1143 cts->u.val = (ULong)(UChar)get_UChar(c); 1144 cts->szB = 1; 1145 TRACE_D3("%u", (UInt)cts->u.val); 1146 break; 1147 case DW_FORM_data2: 1148 cts->u.val = (ULong)(UShort)get_UShort(c); 1149 cts->szB = 2; 1150 TRACE_D3("%u", (UInt)cts->u.val); 1151 break; 1152 case DW_FORM_data4: 1153 cts->u.val = (ULong)(UInt)get_UInt(c); 1154 cts->szB = 4; 1155 TRACE_D3("%u", (UInt)cts->u.val); 1156 break; 1157 case DW_FORM_data8: 1158 cts->u.val = get_ULong(c); 1159 cts->szB = 8; 1160 TRACE_D3("%llu", cts->u.val); 1161 break; 1162 case DW_FORM_sec_offset: 1163 cts->u.val = (ULong)get_Dwarfish_UWord( c, cc->is_dw64 ); 1164 cts->szB = cc->is_dw64 ? 8 : 4; 1165 TRACE_D3("%llu", cts->u.val); 1166 break; 1167 case DW_FORM_sdata: 1168 cts->u.val = (ULong)(Long)get_SLEB128(c); 1169 cts->szB = 8; 1170 TRACE_D3("%lld", (Long)cts->u.val); 1171 break; 1172 case DW_FORM_udata: 1173 cts->u.val = (ULong)(Long)get_ULEB128(c); 1174 cts->szB = 8; 1175 TRACE_D3("%llu", (Long)cts->u.val); 1176 break; 1177 case DW_FORM_addr: 1178 /* note, this is a hack. DW_FORM_addr is defined as getting 1179 a word the size of the target machine as defined by the 1180 address_size field in the CU Header. However, 1181 parse_CU_Header() rejects all inputs except those for 1182 which address_size == sizeof(Word), hence we can just 1183 treat it as a (host) Word. */ 1184 cts->u.val = (ULong)(UWord)get_UWord(c); 1185 cts->szB = sizeof(UWord); 1186 TRACE_D3("0x%lx", (UWord)cts->u.val); 1187 break; 1188 1189 case DW_FORM_ref_addr: 1190 /* We make the same word-size assumption as DW_FORM_addr. */ 1191 /* What does this really mean? From D3 Sec 7.5.4, 1192 description of "reference", it would appear to reference 1193 some other DIE, by specifying the offset from the 1194 beginning of a .debug_info section. The D3 spec mentions 1195 that this might be in some other shared object and 1196 executable. But I don't see how the name of the other 1197 object/exe is specified. 1198 1199 At least for the DW_FORM_ref_addrs created by icc11, the 1200 references seem to be within the same object/executable. 1201 So for the moment we merely range-check, to see that they 1202 actually do specify a plausible offset within this 1203 object's .debug_info, and return the value unchanged. 1204 1205 In DWARF 2, DW_FORM_ref_addr is address-sized, but in 1206 DWARF 3 and later, it is offset-sized. 1207 */ 1208 if (cc->version == 2) { 1209 cts->u.val = (ULong)(UWord)get_UWord(c); 1210 cts->szB = sizeof(UWord); 1211 } else { 1212 cts->u.val = get_Dwarfish_UWord(c, cc->is_dw64); 1213 cts->szB = cc->is_dw64 ? sizeof(ULong) : sizeof(UInt); 1214 } 1215 TRACE_D3("0x%lx", (UWord)cts->u.val); 1216 if (0) VG_(printf)("DW_FORM_ref_addr 0x%lx\n", (UWord)cts->u.val); 1217 if (/* the following is surely impossible, but ... */ 1218 !ML_(sli_is_valid)(cc->escn_debug_info) 1219 || cts->u.val >= (ULong)cc->escn_debug_info.szB) { 1220 /* Hmm. Offset is nonsensical for this object's .debug_info 1221 section. Be safe and reject it. */ 1222 cc->barf("get_Form_contents: DW_FORM_ref_addr points " 1223 "outside .debug_info"); 1224 } 1225 break; 1226 1227 case DW_FORM_strp: { 1228 /* this is an offset into .debug_str */ 1229 UWord uw = (UWord)get_Dwarfish_UWord( c, cc->is_dw64 ); 1230 if (!ML_(sli_is_valid)(cc->escn_debug_str) 1231 || uw >= cc->escn_debug_str.szB) 1232 cc->barf("get_Form_contents: DW_FORM_strp " 1233 "points outside .debug_str"); 1234 /* FIXME: check the entire string lies inside debug_str, 1235 not just the first byte of it. */ 1236 DiCursor str 1237 = ML_(cur_plus)( ML_(cur_from_sli)(cc->escn_debug_str), uw ); 1238 if (TD3) { 1239 HChar* tmp = ML_(cur_read_strdup)(str, "di.getFC.1"); 1240 TRACE_D3("(indirect string, offset: 0x%lx): %s", uw, tmp); 1241 ML_(dinfo_free)(tmp); 1242 } 1243 cts->u.cur = str; 1244 cts->szB = - (Long)(1 + (ULong)ML_(cur_strlen)(str)); 1245 break; 1246 } 1247 case DW_FORM_string: { 1248 DiCursor str = get_AsciiZ(c); 1249 if (TD3) { 1250 HChar* tmp = ML_(cur_read_strdup)(str, "di.getFC.2"); 1251 TRACE_D3("%s", tmp); 1252 ML_(dinfo_free)(tmp); 1253 } 1254 cts->u.cur = str; 1255 /* strlen is safe because get_AsciiZ already 'vetted' the 1256 entire string */ 1257 cts->szB = - (Long)(1 + (ULong)ML_(cur_strlen)(str)); 1258 break; 1259 } 1260 case DW_FORM_ref1: { 1261 UChar u8 = get_UChar(c); 1262 UWord res = cc->cu_start_offset + (UWord)u8; 1263 cts->u.val = (ULong)res; 1264 cts->szB = sizeof(UWord); 1265 TRACE_D3("<%lx>", res); 1266 break; 1267 } 1268 case DW_FORM_ref2: { 1269 UShort u16 = get_UShort(c); 1270 UWord res = cc->cu_start_offset + (UWord)u16; 1271 cts->u.val = (ULong)res; 1272 cts->szB = sizeof(UWord); 1273 TRACE_D3("<%lx>", res); 1274 break; 1275 } 1276 case DW_FORM_ref4: { 1277 UInt u32 = get_UInt(c); 1278 UWord res = cc->cu_start_offset + (UWord)u32; 1279 cts->u.val = (ULong)res; 1280 cts->szB = sizeof(UWord); 1281 TRACE_D3("<%lx>", res); 1282 break; 1283 } 1284 case DW_FORM_ref8: { 1285 ULong u64 = get_ULong(c); 1286 UWord res = cc->cu_start_offset + (UWord)u64; 1287 cts->u.val = (ULong)res; 1288 cts->szB = sizeof(UWord); 1289 TRACE_D3("<%lx>", res); 1290 break; 1291 } 1292 case DW_FORM_ref_udata: { 1293 ULong u64 = get_ULEB128(c); 1294 UWord res = cc->cu_start_offset + (UWord)u64; 1295 cts->u.val = (ULong)res; 1296 cts->szB = sizeof(UWord); 1297 TRACE_D3("<%lx>", res); 1298 break; 1299 } 1300 case DW_FORM_flag: { 1301 UChar u8 = get_UChar(c); 1302 TRACE_D3("%u", (UInt)u8); 1303 cts->u.val = (ULong)u8; 1304 cts->szB = 1; 1305 break; 1306 } 1307 case DW_FORM_flag_present: 1308 TRACE_D3("1"); 1309 cts->u.val = 1; 1310 cts->szB = 1; 1311 break; 1312 case DW_FORM_block1: { 1313 ULong u64b; 1314 ULong u64 = (ULong)get_UChar(c); 1315 DiCursor block = get_DiCursor_from_Cursor(c); 1316 TRACE_D3("%llu byte block: ", u64); 1317 for (u64b = u64; u64b > 0; u64b--) { 1318 UChar u8 = get_UChar(c); 1319 TRACE_D3("%x ", (UInt)u8); 1320 } 1321 cts->u.cur = block; 1322 cts->szB = - (Long)u64; 1323 break; 1324 } 1325 case DW_FORM_block2: { 1326 ULong u64b; 1327 ULong u64 = (ULong)get_UShort(c); 1328 DiCursor block = get_DiCursor_from_Cursor(c); 1329 TRACE_D3("%llu byte block: ", u64); 1330 for (u64b = u64; u64b > 0; u64b--) { 1331 UChar u8 = get_UChar(c); 1332 TRACE_D3("%x ", (UInt)u8); 1333 } 1334 cts->u.cur = block; 1335 cts->szB = - (Long)u64; 1336 break; 1337 } 1338 case DW_FORM_block4: { 1339 ULong u64b; 1340 ULong u64 = (ULong)get_UInt(c); 1341 DiCursor block = get_DiCursor_from_Cursor(c); 1342 TRACE_D3("%llu byte block: ", u64); 1343 for (u64b = u64; u64b > 0; u64b--) { 1344 UChar u8 = get_UChar(c); 1345 TRACE_D3("%x ", (UInt)u8); 1346 } 1347 cts->u.cur = block; 1348 cts->szB = - (Long)u64; 1349 break; 1350 } 1351 case DW_FORM_exprloc: 1352 case DW_FORM_block: { 1353 ULong u64b; 1354 ULong u64 = (ULong)get_ULEB128(c); 1355 DiCursor block = get_DiCursor_from_Cursor(c); 1356 TRACE_D3("%llu byte block: ", u64); 1357 for (u64b = u64; u64b > 0; u64b--) { 1358 UChar u8 = get_UChar(c); 1359 TRACE_D3("%x ", (UInt)u8); 1360 } 1361 cts->u.cur = block; 1362 cts->szB = - (Long)u64; 1363 break; 1364 } 1365 case DW_FORM_ref_sig8: { 1366 ULong u64b; 1367 ULong signature = get_ULong (c); 1368 ULong work = signature; 1369 TRACE_D3("8 byte signature: "); 1370 for (u64b = 8; u64b > 0; u64b--) { 1371 UChar u8 = work & 0xff; 1372 TRACE_D3("%x ", (UInt)u8); 1373 work >>= 8; 1374 } 1375 1376 /* cc->signature_types is only built/initialised when 1377 VG_(clo_read_var_info) is set. In this case, 1378 the DW_FORM_ref_sig8 can be looked up. 1379 But we can also arrive here when only reading inline info 1380 and VG_(clo_trace_symtab) is set. In such a case, 1381 we cannot lookup the DW_FORM_ref_sig8, we rather assign 1382 a dummy value. This is a kludge, but otherwise, 1383 the 'dwarf inline info reader' tracing would have to 1384 do type processing/reading. It is better to avoid 1385 adding significant 'real' processing only due to tracing. */ 1386 if (VG_(clo_read_var_info)) { 1387 /* Due to the way that the hash table is constructed, the 1388 resulting DIE offset here is already "cooked". See 1389 cook_die_using_form. */ 1390 cts->u.val = lookup_signatured_type (cc->signature_types, signature, 1391 c->barf); 1392 } else { 1393 vg_assert (td3); 1394 vg_assert (VG_(clo_read_inline_info)); 1395 TRACE_D3("<not dereferencing signature type>"); 1396 cts->u.val = 0; /* Assign a dummy/rubbish value */ 1397 } 1398 cts->szB = sizeof(UWord); 1399 break; 1400 } 1401 case DW_FORM_indirect: 1402 get_Form_contents (cts, cc, c, td3, (DW_FORM)get_ULEB128(c)); 1403 return; 1404 1405 case DW_FORM_GNU_ref_alt: 1406 cts->u.val = get_Dwarfish_UWord(c, cc->is_dw64); 1407 cts->szB = cc->is_dw64 ? sizeof(ULong) : sizeof(UInt); 1408 TRACE_D3("0x%lx", (UWord)cts->u.val); 1409 if (0) VG_(printf)("DW_FORM_GNU_ref_alt 0x%lx\n", (UWord)cts->u.val); 1410 if (/* the following is surely impossible, but ... */ 1411 !ML_(sli_is_valid)(cc->escn_debug_info_alt)) 1412 cc->barf("get_Form_contents: DW_FORM_GNU_ref_addr used, " 1413 "but no alternate .debug_info"); 1414 else if (cts->u.val >= (ULong)cc->escn_debug_info_alt.szB) { 1415 /* Hmm. Offset is nonsensical for this object's .debug_info 1416 section. Be safe and reject it. */ 1417 cc->barf("get_Form_contents: DW_FORM_GNU_ref_addr points " 1418 "outside alternate .debug_info"); 1419 } 1420 break; 1421 1422 case DW_FORM_GNU_strp_alt: { 1423 /* this is an offset into alternate .debug_str */ 1424 SizeT uw = (UWord)get_Dwarfish_UWord( c, cc->is_dw64 ); 1425 if (!ML_(sli_is_valid)(cc->escn_debug_str_alt)) 1426 cc->barf("get_Form_contents: DW_FORM_GNU_strp_alt used, " 1427 "but no alternate .debug_str"); 1428 else if (uw >= cc->escn_debug_str_alt.szB) 1429 cc->barf("get_Form_contents: DW_FORM_GNU_strp_alt " 1430 "points outside alternate .debug_str"); 1431 /* FIXME: check the entire string lies inside debug_str, 1432 not just the first byte of it. */ 1433 DiCursor str 1434 = ML_(cur_plus)( ML_(cur_from_sli)(cc->escn_debug_str_alt), uw); 1435 if (TD3) { 1436 HChar* tmp = ML_(cur_read_strdup)(str, "di.getFC.3"); 1437 TRACE_D3("(indirect alt string, offset: 0x%lx): %s", uw, tmp); 1438 ML_(dinfo_free)(tmp); 1439 } 1440 cts->u.cur = str; 1441 cts->szB = - (Long)(1 + (ULong)ML_(cur_strlen)(str)); 1442 break; 1443 } 1444 1445 default: 1446 VG_(printf)( 1447 "get_Form_contents: unhandled %d (%s) at <%llx>\n", 1448 form, ML_(pp_DW_FORM)(form), get_position_of_Cursor(c)); 1449 c->barf("get_Form_contents: unhandled DW_FORM"); 1450 } 1451 } 1452 1453 static inline UInt sizeof_Dwarfish_UWord (Bool is_dw64) 1454 { 1455 if (is_dw64) 1456 return sizeof(ULong); 1457 else 1458 return sizeof(UInt); 1459 } 1460 1461 #define VARSZ_FORM 0xffffffff 1462 /* If the form is a fixed length form, return the nr of bytes for this form. 1463 If the form is a variable length form, return VARSZ_FORM. */ 1464 static 1465 UInt get_Form_szB (const CUConst* cc, DW_FORM form ) 1466 { 1467 // !!! keep switch in sync with get_Form_contents : the nr of bytes 1468 // read from a cursor by get_Form_contents must be returned by 1469 // the below switch. 1470 // The consistency is verified in trace_DIE. 1471 switch (form) { 1472 case DW_FORM_data1: return 1; 1473 case DW_FORM_data2: return 2; 1474 case DW_FORM_data4: return 4; 1475 case DW_FORM_data8: return 8; 1476 case DW_FORM_sec_offset: 1477 if (cc->is_dw64) 1478 return 8; 1479 else 1480 return 4; 1481 case DW_FORM_sdata: 1482 return VARSZ_FORM; 1483 case DW_FORM_udata: 1484 return VARSZ_FORM; 1485 case DW_FORM_addr: // See hack in get_Form_contents 1486 return sizeof(UWord); 1487 case DW_FORM_ref_addr: // See hack in get_Form_contents 1488 if (cc->version == 2) 1489 return sizeof(UWord); 1490 else 1491 return sizeof_Dwarfish_UWord (cc->is_dw64); 1492 case DW_FORM_strp: 1493 return sizeof_Dwarfish_UWord (cc->is_dw64); 1494 case DW_FORM_string: 1495 return VARSZ_FORM; 1496 case DW_FORM_ref1: 1497 return 1; 1498 case DW_FORM_ref2: 1499 return 2; 1500 case DW_FORM_ref4: 1501 return 4; 1502 case DW_FORM_ref8: 1503 return 8; 1504 case DW_FORM_ref_udata: 1505 return VARSZ_FORM; 1506 case DW_FORM_flag: 1507 return 1; 1508 case DW_FORM_flag_present: 1509 return 0; // !!! special case, no data. 1510 case DW_FORM_block1: 1511 return VARSZ_FORM; 1512 case DW_FORM_block2: 1513 return VARSZ_FORM; 1514 case DW_FORM_block4: 1515 return VARSZ_FORM; 1516 case DW_FORM_exprloc: 1517 case DW_FORM_block: 1518 return VARSZ_FORM; 1519 case DW_FORM_ref_sig8: 1520 return 8; 1521 case DW_FORM_indirect: 1522 return VARSZ_FORM; 1523 case DW_FORM_GNU_ref_alt: 1524 return sizeof_Dwarfish_UWord(cc->is_dw64); 1525 case DW_FORM_GNU_strp_alt: 1526 return sizeof_Dwarfish_UWord(cc->is_dw64); 1527 default: 1528 VG_(printf)( 1529 "get_Form_szB: unhandled %d (%s)\n", 1530 form, ML_(pp_DW_FORM)(form)); 1531 cc->barf("get_Form_contents: unhandled DW_FORM"); 1532 } 1533 } 1534 1535 /* Skip a DIE as described by abbv. 1536 If the DIE has a sibling, *sibling is set to the skipped DIE sibling value. */ 1537 static 1538 void skip_DIE (UWord *sibling, 1539 Cursor* c_die, 1540 const g_abbv *abbv, 1541 const CUConst* cc) 1542 { 1543 UInt nf_i; 1544 FormContents cts; 1545 nf_i = 0; 1546 while (True) { 1547 if (abbv->nf[nf_i].at_name == DW_AT_sibling) { 1548 get_Form_contents( &cts, cc, c_die, False /*td3*/, 1549 (DW_FORM)abbv->nf[nf_i].at_form ); 1550 if ( cts.szB > 0 ) 1551 *sibling = cts.u.val; 1552 nf_i++; 1553 } else if (abbv->nf[nf_i].skip_szB == VARSZ_FORM) { 1554 get_Form_contents( &cts, cc, c_die, False /*td3*/, 1555 (DW_FORM)abbv->nf[nf_i].at_form ); 1556 nf_i++; 1557 } else { 1558 advance_position_of_Cursor (c_die, (ULong)abbv->nf[nf_i].skip_szB); 1559 nf_i = abbv->nf[nf_i].next_nf; 1560 } 1561 if (nf_i == 0) 1562 break; 1563 } 1564 } 1565 1566 1567 /*------------------------------------------------------------*/ 1568 /*--- ---*/ 1569 /*--- Parsing of variable-related DIEs ---*/ 1570 /*--- ---*/ 1571 /*------------------------------------------------------------*/ 1572 1573 typedef 1574 struct _TempVar { 1575 const HChar* name; /* in DebugInfo's .strpool */ 1576 /* Represent ranges economically. nRanges is the number of 1577 ranges. Cases: 1578 0: .rngOneMin .rngOneMax .manyRanges are all zero 1579 1: .rngOneMin .rngOneMax hold the range; .rngMany is NULL 1580 2: .rngOneMin .rngOneMax are zero; .rngMany holds the ranges. 1581 This is merely an optimisation to avoid having to allocate 1582 and free the XArray in the common (98%) of cases where there 1583 is zero or one address ranges. */ 1584 UWord nRanges; 1585 Addr rngOneMin; 1586 Addr rngOneMax; 1587 XArray* rngMany; /* of AddrRange. NON-UNIQUE PTR in AR_DINFO. */ 1588 /* Do not free .rngMany, since many TempVars will have the same 1589 value. Instead the associated storage is to be freed by 1590 deleting 'rangetree', which stores a single copy of each 1591 range. */ 1592 /* --- */ 1593 Int level; 1594 UWord typeR; /* a cuOff */ 1595 GExpr* gexpr; /* for this variable */ 1596 GExpr* fbGX; /* to find the frame base of the enclosing fn, if 1597 any */ 1598 UInt fndn_ix; /* declaring file/dirname index in fndnpool, or 0 */ 1599 Int fLine; /* declaring file line number, or zero */ 1600 /* offset in .debug_info, so that abstract instances can be 1601 found to satisfy references from concrete instances. */ 1602 UWord dioff; 1603 UWord absOri; /* so the absOri fields refer to dioff fields 1604 in some other, related TempVar. */ 1605 } 1606 TempVar; 1607 1608 typedef 1609 struct { 1610 /* Contains the range stack: a stack of address ranges, one 1611 stack entry for each nested scope. 1612 1613 Some scope entries are created by function definitions 1614 (DW_AT_subprogram), and for those, we also note the GExpr 1615 derived from its DW_AT_frame_base attribute, if any. 1616 Consequently it should be possible to find, for any 1617 variable's DIE, the GExpr for the the containing function's 1618 DW_AT_frame_base by scanning back through the stack to find 1619 the nearest entry associated with a function. This somewhat 1620 elaborate scheme is provided so as to make it possible to 1621 obtain the correct DW_AT_frame_base expression even in the 1622 presence of nested functions (or to be more precise, in the 1623 presence of nested DW_AT_subprogram DIEs). 1624 */ 1625 Int sp; /* [sp] is innermost active entry; sp==-1 for empty 1626 stack */ 1627 Int stack_size; 1628 XArray **ranges; /* XArray of AddrRange */ 1629 Int *level; /* D3 DIE levels */ 1630 Bool *isFunc; /* from DW_AT_subprogram? */ 1631 GExpr **fbGX; /* if isFunc, contains the FB expr, else NULL */ 1632 /* The fndn_ix file name/dirname table. Is a mapping from dwarf 1633 integer index to the index in di->fndnpool. */ 1634 XArray* /* of UInt* */ fndn_ix_Table; 1635 } 1636 D3VarParser; 1637 1638 /* Completely initialise a variable parser object */ 1639 static void 1640 var_parser_init ( D3VarParser *parser ) 1641 { 1642 parser->sp = -1; 1643 parser->stack_size = 0; 1644 parser->ranges = NULL; 1645 parser->level = NULL; 1646 parser->isFunc = NULL; 1647 parser->fbGX = NULL; 1648 parser->fndn_ix_Table = NULL; 1649 } 1650 1651 /* Release any memory hanging off a variable parser object */ 1652 static void 1653 var_parser_release ( D3VarParser *parser ) 1654 { 1655 ML_(dinfo_free)( parser->ranges ); 1656 ML_(dinfo_free)( parser->level ); 1657 ML_(dinfo_free)( parser->isFunc ); 1658 ML_(dinfo_free)( parser->fbGX ); 1659 } 1660 1661 static void varstack_show ( const D3VarParser* parser, const HChar* str ) 1662 { 1663 Word i, j; 1664 VG_(printf)(" varstack (%s) {\n", str); 1665 for (i = 0; i <= parser->sp; i++) { 1666 XArray* xa = parser->ranges[i]; 1667 vg_assert(xa); 1668 VG_(printf)(" [%ld] (level %d)", i, parser->level[i]); 1669 if (parser->isFunc[i]) { 1670 VG_(printf)(" (fbGX=%p)", parser->fbGX[i]); 1671 } else { 1672 vg_assert(parser->fbGX[i] == NULL); 1673 } 1674 VG_(printf)(": "); 1675 if (VG_(sizeXA)( xa ) == 0) { 1676 VG_(printf)("** empty PC range array **"); 1677 } else { 1678 for (j = 0; j < VG_(sizeXA)( xa ); j++) { 1679 AddrRange* range = (AddrRange*) VG_(indexXA)( xa, j ); 1680 vg_assert(range); 1681 VG_(printf)("[%#lx,%#lx] ", range->aMin, range->aMax); 1682 } 1683 } 1684 VG_(printf)("\n"); 1685 } 1686 VG_(printf)(" }\n"); 1687 } 1688 1689 /* Remove from the stack, all entries with .level > 'level' */ 1690 static 1691 void varstack_preen ( D3VarParser* parser, Bool td3, Int level ) 1692 { 1693 Bool changed = False; 1694 vg_assert(parser->sp < parser->stack_size); 1695 while (True) { 1696 vg_assert(parser->sp >= -1); 1697 if (parser->sp == -1) break; 1698 if (parser->level[parser->sp] <= level) break; 1699 if (0) 1700 TRACE_D3("BBBBAAAA varstack_pop [newsp=%d]\n", parser->sp-1); 1701 vg_assert(parser->ranges[parser->sp]); 1702 /* Who allocated this xa? get_range_list() or 1703 unitary_range_list(). */ 1704 VG_(deleteXA)( parser->ranges[parser->sp] ); 1705 parser->sp--; 1706 changed = True; 1707 } 1708 if (changed && td3) 1709 varstack_show( parser, "after preen" ); 1710 } 1711 1712 static void varstack_push ( const CUConst* cc, 1713 D3VarParser* parser, 1714 Bool td3, 1715 XArray* ranges, Int level, 1716 Bool isFunc, GExpr* fbGX ) { 1717 if (0) 1718 TRACE_D3("BBBBAAAA varstack_push[newsp=%d]: %d %p\n", 1719 parser->sp+1, level, ranges); 1720 1721 /* First we need to zap everything >= 'level', as we are about to 1722 replace any previous entry at 'level', so .. */ 1723 varstack_preen(parser, /*td3*/False, level-1); 1724 1725 vg_assert(parser->sp >= -1); 1726 vg_assert(parser->sp < parser->stack_size); 1727 if (parser->sp == parser->stack_size - 1) { 1728 parser->stack_size += 48; 1729 parser->ranges = 1730 ML_(dinfo_realloc)("di.readdwarf3.varpush.1", parser->ranges, 1731 parser->stack_size * sizeof parser->ranges[0]); 1732 parser->level = 1733 ML_(dinfo_realloc)("di.readdwarf3.varpush.2", parser->level, 1734 parser->stack_size * sizeof parser->level[0]); 1735 parser->isFunc = 1736 ML_(dinfo_realloc)("di.readdwarf3.varpush.3", parser->isFunc, 1737 parser->stack_size * sizeof parser->isFunc[0]); 1738 parser->fbGX = 1739 ML_(dinfo_realloc)("di.readdwarf3.varpush.4", parser->fbGX, 1740 parser->stack_size * sizeof parser->fbGX[0]); 1741 } 1742 if (parser->sp >= 0) 1743 vg_assert(parser->level[parser->sp] < level); 1744 parser->sp++; 1745 vg_assert(ranges != NULL); 1746 if (!isFunc) vg_assert(fbGX == NULL); 1747 parser->ranges[parser->sp] = ranges; 1748 parser->level[parser->sp] = level; 1749 parser->isFunc[parser->sp] = isFunc; 1750 parser->fbGX[parser->sp] = fbGX; 1751 if (TD3) 1752 varstack_show( parser, "after push" ); 1753 } 1754 1755 1756 /* cts is derived from a DW_AT_location and so refers either to a 1757 location expression or to a location list. Figure out which, and 1758 in both cases bundle the expression or location list into a 1759 so-called GExpr (guarded expression). */ 1760 __attribute__((noinline)) 1761 static GExpr* get_GX ( const CUConst* cc, Bool td3, const FormContents* cts ) 1762 { 1763 GExpr* gexpr = NULL; 1764 if (cts->szB < 0) { 1765 /* represents a non-empty in-line location expression, and 1766 cts->u.cur points at the image bytes */ 1767 gexpr = make_singleton_GX( cts->u.cur, (ULong)(- cts->szB) ); 1768 } 1769 else 1770 if (cts->szB > 0) { 1771 /* represents a location list. cts->u.val is the offset of it 1772 in .debug_loc. */ 1773 if (!cc->cu_svma_known) 1774 cc->barf("get_GX: location list, but CU svma is unknown"); 1775 gexpr = make_general_GX( cc, td3, cts->u.val, cc->cu_svma ); 1776 } 1777 else { 1778 vg_assert(0); /* else caller is bogus */ 1779 } 1780 return gexpr; 1781 } 1782 1783 /* Returns an xarray* of directory names (indexed by the dwarf dirname 1784 integer). 1785 If 'compdir' is NULL, entry [0] will be set to "." 1786 otherwise entry [0] is set to compdir. 1787 Entry [0] basically means "the current directory of the compilation", 1788 whatever that means, according to the DWARF3 spec. 1789 FIXME??? readdwarf3.c/readdwarf.c have a lot of duplicated code */ 1790 static 1791 XArray* read_dirname_xa (DebugInfo* di, const HChar *compdir, 1792 Cursor *c, 1793 Bool td3 ) 1794 { 1795 XArray* dirname_xa; /* xarray of HChar* dirname */ 1796 const HChar* dirname; 1797 UInt compdir_len; 1798 1799 dirname_xa = VG_(newXA) (ML_(dinfo_zalloc), "di.rdxa.1", ML_(dinfo_free), 1800 sizeof(HChar*) ); 1801 1802 if (compdir == NULL) { 1803 dirname = "."; 1804 compdir_len = 1; 1805 } else { 1806 dirname = compdir; 1807 compdir_len = VG_(strlen)(compdir); 1808 } 1809 VG_(addToXA) (dirname_xa, &dirname); 1810 1811 TRACE_D3(" The Directory Table%s\n", 1812 peek_UChar(c) == 0 ? " is empty." : ":" ); 1813 1814 while (peek_UChar(c) != 0) { 1815 1816 DiCursor cur = get_AsciiZ(c); 1817 HChar* data_str = ML_(cur_read_strdup)( cur, "dirname_xa.1" ); 1818 TRACE_D3(" %s\n", data_str); 1819 1820 /* If data_str[0] is '/', then 'data' is an absolute path and we 1821 don't mess with it. Otherwise, construct the 1822 path 'compdir' ++ "/" ++ 'data'. */ 1823 1824 if (data_str[0] != '/' 1825 /* not an absolute path */ 1826 && compdir 1827 /* actually got something sensible for compdir */ 1828 && compdir_len) 1829 { 1830 SizeT len = compdir_len + 1 + VG_(strlen)(data_str); 1831 HChar *buf = ML_(dinfo_zalloc)("dirname_xa.2", len + 1); 1832 1833 VG_(strcpy)(buf, compdir); 1834 VG_(strcat)(buf, "/"); 1835 VG_(strcat)(buf, data_str); 1836 1837 dirname = ML_(addStr)(di, buf, len); 1838 VG_(addToXA) (dirname_xa, &dirname); 1839 if (0) VG_(printf)("rel path %s\n", buf); 1840 ML_(dinfo_free)(buf); 1841 } else { 1842 /* just use 'data'. */ 1843 dirname = ML_(addStr)(di,data_str,-1); 1844 VG_(addToXA) (dirname_xa, &dirname); 1845 if (0) VG_(printf)("abs path %s\n", data_str); 1846 } 1847 1848 ML_(dinfo_free)(data_str); 1849 } 1850 1851 TRACE_D3 ("\n"); 1852 1853 if (get_UChar (c) != 0) { 1854 ML_(symerr)(NULL, True, 1855 "could not get NUL at end of DWARF directory table"); 1856 VG_(deleteXA)(dirname_xa); 1857 return NULL; 1858 } 1859 1860 return dirname_xa; 1861 } 1862 1863 static 1864 void read_filename_table( /*MOD*/XArray* /* of UInt* */ fndn_ix_Table, 1865 const HChar* compdir, 1866 const CUConst* cc, ULong debug_line_offset, 1867 Bool td3 ) 1868 { 1869 Bool is_dw64; 1870 Cursor c; 1871 Word i; 1872 UShort version; 1873 UChar opcode_base; 1874 const HChar* str; 1875 XArray* dirname_xa; /* xarray of HChar* dirname */ 1876 ULong dir_xa_ix; /* Index in dirname_xa, as read from dwarf info. */ 1877 const HChar* dirname; 1878 UInt fndn_ix; 1879 1880 vg_assert(fndn_ix_Table && cc && cc->barf); 1881 if (!ML_(sli_is_valid)(cc->escn_debug_line) 1882 || cc->escn_debug_line.szB <= debug_line_offset) { 1883 cc->barf("read_filename_table: .debug_line is missing?"); 1884 } 1885 1886 init_Cursor( &c, cc->escn_debug_line, debug_line_offset, cc->barf, 1887 "Overrun whilst reading .debug_line section(1)" ); 1888 1889 /* unit_length = */ 1890 get_Initial_Length( &is_dw64, &c, 1891 "read_filename_table: invalid initial-length field" ); 1892 version = get_UShort( &c ); 1893 if (version != 2 && version != 3 && version != 4) 1894 cc->barf("read_filename_table: Only DWARF version 2, 3 and 4 line info " 1895 "is currently supported."); 1896 /*header_length = (ULong)*/ get_Dwarfish_UWord( &c, is_dw64 ); 1897 /*minimum_instruction_length = */ get_UChar( &c ); 1898 if (version >= 4) 1899 /*maximum_operations_per_insn = */ get_UChar( &c ); 1900 /*default_is_stmt = */ get_UChar( &c ); 1901 /*line_base = (Char)*/ get_UChar( &c ); 1902 /*line_range = */ get_UChar( &c ); 1903 opcode_base = get_UChar( &c ); 1904 /* skip over "standard_opcode_lengths" */ 1905 for (i = 1; i < (Word)opcode_base; i++) 1906 (void)get_UChar( &c ); 1907 1908 dirname_xa = read_dirname_xa(cc->di, compdir, &c, td3); 1909 1910 /* Read and record the file names table */ 1911 vg_assert( VG_(sizeXA)( fndn_ix_Table ) == 0 ); 1912 /* Add a dummy index-zero entry. DWARF3 numbers its files 1913 from 1, for some reason. */ 1914 fndn_ix = ML_(addFnDn) ( cc->di, "<unknown_file>", NULL ); 1915 VG_(addToXA)( fndn_ix_Table, &fndn_ix ); 1916 while (peek_UChar(&c) != 0) { 1917 DiCursor cur = get_AsciiZ(&c); 1918 str = ML_(addStrFromCursor)( cc->di, cur ); 1919 dir_xa_ix = get_ULEB128( &c ); 1920 if (dirname_xa != NULL 1921 && dir_xa_ix >= 0 && dir_xa_ix < VG_(sizeXA) (dirname_xa)) 1922 dirname = *(HChar**)VG_(indexXA) ( dirname_xa, dir_xa_ix ); 1923 else 1924 dirname = NULL; 1925 fndn_ix = ML_(addFnDn)( cc->di, str, dirname); 1926 TRACE_D3(" read_filename_table: %ld fndn_ix %d %s %s\n", 1927 VG_(sizeXA)(fndn_ix_Table), fndn_ix, 1928 dirname, str); 1929 VG_(addToXA)( fndn_ix_Table, &fndn_ix ); 1930 (void)get_ULEB128( &c ); /* skip last mod time */ 1931 (void)get_ULEB128( &c ); /* file size */ 1932 } 1933 /* We're done! The rest of it is not interesting. */ 1934 if (dirname_xa != NULL) 1935 VG_(deleteXA)(dirname_xa); 1936 } 1937 1938 /* setup_cu_svma to be called when a cu is found at level 0, 1939 to establish the cu_svma. */ 1940 static void setup_cu_svma(CUConst* cc, Bool have_lo, Addr ip_lo, Bool td3) 1941 { 1942 Addr cu_svma; 1943 /* We have potentially more than one type of parser parsing the 1944 dwarf information. At least currently, each parser establishes 1945 the cu_svma. So, in case cu_svma_known, we check that the same 1946 result is obtained by the 2nd parsing of the cu. 1947 1948 Alternatively, we could reset cu_svma_known after each parsing 1949 and then check that we only see a single DW_TAG_compile_unit DIE 1950 at level 0, DWARF3 only allows exactly one top level DIE per 1951 CU. */ 1952 1953 if (have_lo) 1954 cu_svma = ip_lo; 1955 else { 1956 /* Now, it may be that this DIE doesn't tell us the CU's 1957 SVMA, by way of not having a DW_AT_low_pc. That's OK -- 1958 the CU doesn't *have* to have its SVMA specified. 1959 1960 But as per last para D3 spec sec 3.1.1 ("Normal and 1961 Partial Compilation Unit Entries", "If the base address 1962 (viz, the SVMA) is undefined, then any DWARF entry of 1963 structure defined interms of the base address of that 1964 compilation unit is not valid.". So that means, if whilst 1965 processing the children of this top level DIE (or their 1966 children, etc) we see a DW_AT_range, and cu_svma_known is 1967 False, then the DIE that contains it is (per the spec) 1968 invalid, and we can legitimately stop and complain. */ 1969 /* .. whereas The Reality is, simply assume the SVMA is zero 1970 if it isn't specified. */ 1971 cu_svma = 0; 1972 } 1973 1974 if (cc->cu_svma_known) { 1975 vg_assert (cu_svma == cc->cu_svma); 1976 } else { 1977 cc->cu_svma_known = True; 1978 cc->cu_svma = cu_svma; 1979 if (0) 1980 TRACE_D3("setup_cu_svma: acquire CU_SVMA of %p\n", (void*) cc->cu_svma); 1981 } 1982 } 1983 1984 static void trace_DIE( 1985 DW_TAG dtag, 1986 UWord posn, 1987 Int level, 1988 UWord saved_die_c_offset, 1989 const g_abbv *abbv, 1990 const CUConst* cc) 1991 { 1992 Cursor c; 1993 FormContents cts; 1994 UWord sibling = 0; 1995 UInt nf_i; 1996 Bool debug_types_flag; 1997 Bool alt_flag; 1998 Cursor check_skip; 1999 UWord check_sibling = 0; 2000 2001 posn = uncook_die( cc, posn, &debug_types_flag, &alt_flag ); 2002 init_Cursor (&c, 2003 debug_types_flag ? cc->escn_debug_types : 2004 alt_flag ? cc->escn_debug_info_alt : cc->escn_debug_info, 2005 saved_die_c_offset, cc->barf, 2006 "Overrun trace_DIE"); 2007 check_skip = c; 2008 VG_(printf)(" <%d><%lx>: Abbrev Number: %llu (%s)%s%s\n", 2009 level, posn, (ULong) abbv->abbv_code, ML_(pp_DW_TAG)( dtag ), 2010 debug_types_flag ? " (in .debug_types)" : "", 2011 alt_flag ? " (in alternate .debug_info)" : ""); 2012 nf_i = 0; 2013 while (True) { 2014 DW_AT attr = (DW_AT) abbv->nf[nf_i].at_name; 2015 DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form; 2016 nf_i++; 2017 if (attr == 0 && form == 0) break; 2018 VG_(printf)(" %-18s: ", ML_(pp_DW_AT)(attr)); 2019 /* Get the form contents, so as to print them */ 2020 get_Form_contents( &cts, cc, &c, True, form ); 2021 if (attr == DW_AT_sibling && cts.szB > 0) { 2022 sibling = cts.u.val; 2023 } 2024 VG_(printf)("\t\n"); 2025 } 2026 2027 /* Verify that skipping a DIE gives the same displacement as 2028 tracing (i.e. reading) a DIE. If there is an inconsistency in 2029 the nr of bytes read by get_Form_contents and get_Form_szB, this 2030 should be detected by the below. Using --trace-symtab=yes 2031 --read-var-info=yes will ensure all DIEs are systematically 2032 verified. */ 2033 skip_DIE (&check_sibling, &check_skip, abbv, cc); 2034 vg_assert (check_sibling == sibling); 2035 vg_assert (get_position_of_Cursor (&check_skip) 2036 == get_position_of_Cursor (&c)); 2037 } 2038 2039 __attribute__((noreturn)) 2040 static void dump_bad_die_and_barf( 2041 const HChar *whichparser, 2042 DW_TAG dtag, 2043 UWord posn, 2044 Int level, 2045 Cursor* c_die, 2046 UWord saved_die_c_offset, 2047 const g_abbv *abbv, 2048 const CUConst* cc) 2049 { 2050 trace_DIE (dtag, posn, level, saved_die_c_offset, abbv, cc); 2051 VG_(printf)("%s:\n", whichparser); 2052 cc->barf("confused by the above DIE"); 2053 } 2054 2055 __attribute__((noinline)) 2056 static void bad_DIE_confusion(int linenr) 2057 { 2058 VG_(printf)("\nparse DIE(readdwarf3.c:%d): confused by:\n", linenr); 2059 } 2060 #define goto_bad_DIE do {bad_DIE_confusion(__LINE__); goto bad_DIE;} while (0) 2061 2062 __attribute__((noinline)) 2063 static void parse_var_DIE ( 2064 /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree, 2065 /*MOD*/XArray* /* of TempVar* */ tempvars, 2066 /*MOD*/XArray* /* of GExpr* */ gexprs, 2067 /*MOD*/D3VarParser* parser, 2068 DW_TAG dtag, 2069 UWord posn, 2070 Int level, 2071 Cursor* c_die, 2072 const g_abbv *abbv, 2073 CUConst* cc, 2074 Bool td3 2075 ) 2076 { 2077 FormContents cts; 2078 UInt nf_i; 2079 2080 UWord saved_die_c_offset = get_position_of_Cursor( c_die ); 2081 2082 varstack_preen( parser, td3, level-1 ); 2083 2084 if (dtag == DW_TAG_compile_unit 2085 || dtag == DW_TAG_type_unit 2086 || dtag == DW_TAG_partial_unit) { 2087 Bool have_lo = False; 2088 Bool have_hi1 = False; 2089 Bool hiIsRelative = False; 2090 Bool have_range = False; 2091 Addr ip_lo = 0; 2092 Addr ip_hi1 = 0; 2093 Addr rangeoff = 0; 2094 const HChar *compdir = NULL; 2095 nf_i = 0; 2096 while (True) { 2097 DW_AT attr = (DW_AT) abbv->nf[nf_i].at_name; 2098 DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form; 2099 nf_i++; 2100 if (attr == 0 && form == 0) break; 2101 get_Form_contents( &cts, cc, c_die, False/*td3*/, form ); 2102 if (attr == DW_AT_low_pc && cts.szB > 0) { 2103 ip_lo = cts.u.val; 2104 have_lo = True; 2105 } 2106 if (attr == DW_AT_high_pc && cts.szB > 0) { 2107 ip_hi1 = cts.u.val; 2108 have_hi1 = True; 2109 if (form != DW_FORM_addr) 2110 hiIsRelative = True; 2111 } 2112 if (attr == DW_AT_ranges && cts.szB > 0) { 2113 rangeoff = cts.u.val; 2114 have_range = True; 2115 } 2116 if (attr == DW_AT_comp_dir) { 2117 if (cts.szB >= 0) 2118 cc->barf("parse_var_DIE compdir: expecting indirect string"); 2119 HChar *str = ML_(cur_read_strdup)( cts.u.cur, 2120 "parse_var_DIE.compdir" ); 2121 compdir = ML_(addStr)(cc->di, str, -1); 2122 ML_(dinfo_free) (str); 2123 } 2124 if (attr == DW_AT_stmt_list && cts.szB > 0) { 2125 read_filename_table( parser->fndn_ix_Table, compdir, 2126 cc, cts.u.val, td3 ); 2127 } 2128 } 2129 if (have_lo && have_hi1 && hiIsRelative) 2130 ip_hi1 += ip_lo; 2131 2132 /* Now, does this give us an opportunity to find this 2133 CU's svma? */ 2134 if (level == 0) 2135 setup_cu_svma(cc, have_lo, ip_lo, td3); 2136 2137 /* Do we have something that looks sane? */ 2138 if (have_lo && have_hi1 && (!have_range)) { 2139 if (ip_lo < ip_hi1) 2140 varstack_push( cc, parser, td3, 2141 unitary_range_list(ip_lo, ip_hi1 - 1), 2142 level, 2143 False/*isFunc*/, NULL/*fbGX*/ ); 2144 else if (ip_lo == 0 && ip_hi1 == 0) 2145 /* CU has no code, presumably? 2146 Such situations have been encountered for code 2147 compiled with -ffunction-sections -fdata-sections 2148 and linked with --gc-sections. Completely 2149 eliminated CU gives such 0 lo/hi pc. Similarly 2150 to a CU which has no lo/hi/range pc, we push 2151 an empty range list. */ 2152 varstack_push( cc, parser, td3, 2153 empty_range_list(), 2154 level, 2155 False/*isFunc*/, NULL/*fbGX*/ ); 2156 } else 2157 if ((!have_lo) && (!have_hi1) && have_range) { 2158 varstack_push( cc, parser, td3, 2159 get_range_list( cc, td3, 2160 rangeoff, cc->cu_svma ), 2161 level, 2162 False/*isFunc*/, NULL/*fbGX*/ ); 2163 } else 2164 if ((!have_lo) && (!have_hi1) && (!have_range)) { 2165 /* CU has no code, presumably? */ 2166 varstack_push( cc, parser, td3, 2167 empty_range_list(), 2168 level, 2169 False/*isFunc*/, NULL/*fbGX*/ ); 2170 } else 2171 if (have_lo && (!have_hi1) && have_range && ip_lo == 0) { 2172 /* broken DIE created by gcc-4.3.X ? Ignore the 2173 apparently-redundant DW_AT_low_pc and use the DW_AT_ranges 2174 instead. */ 2175 varstack_push( cc, parser, td3, 2176 get_range_list( cc, td3, 2177 rangeoff, cc->cu_svma ), 2178 level, 2179 False/*isFunc*/, NULL/*fbGX*/ ); 2180 } else { 2181 if (0) VG_(printf)("I got hlo %d hhi1 %d hrange %d\n", 2182 (Int)have_lo, (Int)have_hi1, (Int)have_range); 2183 goto_bad_DIE; 2184 } 2185 } 2186 2187 if (dtag == DW_TAG_lexical_block || dtag == DW_TAG_subprogram) { 2188 Bool have_lo = False; 2189 Bool have_hi1 = False; 2190 Bool have_range = False; 2191 Bool hiIsRelative = False; 2192 Addr ip_lo = 0; 2193 Addr ip_hi1 = 0; 2194 Addr rangeoff = 0; 2195 Bool isFunc = dtag == DW_TAG_subprogram; 2196 GExpr* fbGX = NULL; 2197 nf_i = 0; 2198 while (True) { 2199 DW_AT attr = (DW_AT) abbv->nf[nf_i].at_name; 2200 DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form; 2201 nf_i++; 2202 if (attr == 0 && form == 0) break; 2203 get_Form_contents( &cts, cc, c_die, False/*td3*/, form ); 2204 if (attr == DW_AT_low_pc && cts.szB > 0) { 2205 ip_lo = cts.u.val; 2206 have_lo = True; 2207 } 2208 if (attr == DW_AT_high_pc && cts.szB > 0) { 2209 ip_hi1 = cts.u.val; 2210 have_hi1 = True; 2211 if (form != DW_FORM_addr) 2212 hiIsRelative = True; 2213 } 2214 if (attr == DW_AT_ranges && cts.szB > 0) { 2215 rangeoff = cts.u.val; 2216 have_range = True; 2217 } 2218 if (isFunc 2219 && attr == DW_AT_frame_base 2220 && cts.szB != 0 /* either scalar or nonempty block */) { 2221 fbGX = get_GX( cc, False/*td3*/, &cts ); 2222 vg_assert(fbGX); 2223 VG_(addToXA)(gexprs, &fbGX); 2224 } 2225 } 2226 if (have_lo && have_hi1 && hiIsRelative) 2227 ip_hi1 += ip_lo; 2228 /* Do we have something that looks sane? */ 2229 if (dtag == DW_TAG_subprogram 2230 && (!have_lo) && (!have_hi1) && (!have_range)) { 2231 /* This is legit - ignore it. Sec 3.3.3: "A subroutine entry 2232 representing a subroutine declaration that is not also a 2233 definition does not have code address or range 2234 attributes." */ 2235 } else 2236 if (dtag == DW_TAG_lexical_block 2237 && (!have_lo) && (!have_hi1) && (!have_range)) { 2238 /* I believe this is legit, and means the lexical block 2239 contains no insns (whatever that might mean). Ignore. */ 2240 } else 2241 if (have_lo && have_hi1 && (!have_range)) { 2242 /* This scope supplies just a single address range. */ 2243 if (ip_lo < ip_hi1) 2244 varstack_push( cc, parser, td3, 2245 unitary_range_list(ip_lo, ip_hi1 - 1), 2246 level, isFunc, fbGX ); 2247 } else 2248 if ((!have_lo) && (!have_hi1) && have_range) { 2249 /* This scope supplies multiple address ranges via the use of 2250 a range list. */ 2251 varstack_push( cc, parser, td3, 2252 get_range_list( cc, td3, 2253 rangeoff, cc->cu_svma ), 2254 level, isFunc, fbGX ); 2255 } else 2256 if (have_lo && (!have_hi1) && (!have_range)) { 2257 /* This scope is bogus. The D3 spec sec 3.4 (Lexical Block 2258 Entries) says fairly clearly that a scope must have either 2259 _range or (_low_pc and _high_pc). */ 2260 /* The spec is a bit ambiguous though. Perhaps a single byte 2261 range is intended? See sec 2.17 (Code Addresses And Ranges) */ 2262 /* This case is here because icc9 produced this: 2263 <2><13bd>: DW_TAG_lexical_block 2264 DW_AT_decl_line : 5229 2265 DW_AT_decl_column : 37 2266 DW_AT_decl_file : 1 2267 DW_AT_low_pc : 0x401b03 2268 */ 2269 /* Ignore (seems safe than pushing a single byte range) */ 2270 } else 2271 goto_bad_DIE; 2272 } 2273 2274 if (dtag == DW_TAG_variable || dtag == DW_TAG_formal_parameter) { 2275 const HChar* name = NULL; 2276 UWord typeR = D3_INVALID_CUOFF; 2277 Bool global = False; 2278 GExpr* gexpr = NULL; 2279 Int n_attrs = 0; 2280 UWord abs_ori = (UWord)D3_INVALID_CUOFF; 2281 Int lineNo = 0; 2282 UInt fndn_ix = 0; 2283 nf_i = 0; 2284 while (True) { 2285 DW_AT attr = (DW_AT) abbv->nf[nf_i].at_name; 2286 DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form; 2287 nf_i++; 2288 if (attr == 0 && form == 0) break; 2289 get_Form_contents( &cts, cc, c_die, False/*td3*/, form ); 2290 n_attrs++; 2291 if (attr == DW_AT_name && cts.szB < 0) { 2292 name = ML_(addStrFromCursor)( cc->di, cts.u.cur ); 2293 } 2294 if (attr == DW_AT_location 2295 && cts.szB != 0 /* either scalar or nonempty block */) { 2296 gexpr = get_GX( cc, False/*td3*/, &cts ); 2297 vg_assert(gexpr); 2298 VG_(addToXA)(gexprs, &gexpr); 2299 } 2300 if (attr == DW_AT_type && cts.szB > 0) { 2301 typeR = cook_die_using_form( cc, cts.u.val, form ); 2302 } 2303 if (attr == DW_AT_external && cts.szB > 0 && cts.u.val > 0) { 2304 global = True; 2305 } 2306 if (attr == DW_AT_abstract_origin && cts.szB > 0) { 2307 abs_ori = (UWord)cts.u.val; 2308 } 2309 if (attr == DW_AT_declaration && cts.szB > 0 && cts.u.val > 0) { 2310 /*declaration = True;*/ 2311 } 2312 if (attr == DW_AT_decl_line && cts.szB > 0) { 2313 lineNo = (Int)cts.u.val; 2314 } 2315 if (attr == DW_AT_decl_file && cts.szB > 0) { 2316 Int ftabIx = (Int)cts.u.val; 2317 if (ftabIx >= 1 2318 && ftabIx < VG_(sizeXA)( parser->fndn_ix_Table )) { 2319 fndn_ix = *(UInt*)VG_(indexXA)( parser->fndn_ix_Table, ftabIx ); 2320 } 2321 if (0) VG_(printf)("XXX filename fndn_ix = %d %s\n", fndn_ix, 2322 ML_(fndn_ix2filename) (cc->di, fndn_ix)); 2323 } 2324 } 2325 if (!global && dtag == DW_TAG_variable && level == 1) { 2326 /* Case of a static variable. It is better to declare 2327 it global as the variable is not really related to 2328 a PC range, as its address can be used by program 2329 counters outside of the ranges where it is visible . */ 2330 global = True; 2331 } 2332 2333 /* We'll collect it under if one of the following three 2334 conditions holds: 2335 (1) has location and type -> completed 2336 (2) has type only -> is an abstract instance 2337 (3) has location and abs_ori -> is a concrete instance 2338 Name, fndn_ix and line number are all optional frills. 2339 */ 2340 if ( /* 1 */ (gexpr && typeR != D3_INVALID_CUOFF) 2341 /* 2 */ || (typeR != D3_INVALID_CUOFF) 2342 /* 3 */ || (gexpr && abs_ori != (UWord)D3_INVALID_CUOFF) ) { 2343 2344 /* Add this variable to the list of interesting looking 2345 variables. Crucially, note along with it the address 2346 range(s) associated with the variable, which for locals 2347 will be the address ranges at the top of the varparser's 2348 stack. */ 2349 GExpr* fbGX = NULL; 2350 Word i, nRanges; 2351 const XArray* /* of AddrRange */ xa; 2352 TempVar* tv; 2353 /* Stack can't be empty; we put a dummy entry on it for the 2354 entire address range before starting with the DIEs for 2355 this CU. */ 2356 vg_assert(parser->sp >= 0); 2357 2358 /* If this is a local variable (non-global), try to find 2359 the GExpr for the DW_AT_frame_base of the containing 2360 function. It should have been pushed on the stack at the 2361 time we encountered its DW_TAG_subprogram DIE, so the way 2362 to find it is to scan back down the stack looking for it. 2363 If there isn't an enclosing stack entry marked 'isFunc' 2364 then we must be seeing variable or formal param DIEs 2365 outside of a function, so we deem the Dwarf to be 2366 malformed if that happens. Note that the fbGX may be NULL 2367 if the containing DT_TAG_subprogram didn't supply a 2368 DW_AT_frame_base -- that's OK, but there must actually be 2369 a containing DW_TAG_subprogram. */ 2370 if (!global) { 2371 Bool found = False; 2372 for (i = parser->sp; i >= 0; i--) { 2373 if (parser->isFunc[i]) { 2374 fbGX = parser->fbGX[i]; 2375 found = True; 2376 break; 2377 } 2378 } 2379 if (!found) { 2380 if (0 && VG_(clo_verbosity) >= 0) { 2381 VG_(message)(Vg_DebugMsg, 2382 "warning: parse_var_DIE: non-global variable " 2383 "outside DW_TAG_subprogram\n"); 2384 } 2385 /* goto_bad_DIE; */ 2386 /* This seems to happen a lot. Just ignore it -- if, 2387 when we come to evaluation of the location (guarded) 2388 expression, it requires a frame base value, and 2389 there's no expression for that, then evaluation as a 2390 whole will fail. Harmless - a bit of a waste of 2391 cycles but nothing more. */ 2392 } 2393 } 2394 2395 /* re "global ? 0 : parser->sp" (twice), if the var is 2396 marked 'global' then we must put it at the global scope, 2397 as only the global scope (level 0) covers the entire PC 2398 address space. It is asserted elsewhere that level 0 2399 always covers the entire address space. */ 2400 xa = parser->ranges[global ? 0 : parser->sp]; 2401 nRanges = VG_(sizeXA)(xa); 2402 vg_assert(nRanges >= 0); 2403 2404 tv = ML_(dinfo_zalloc)( "di.readdwarf3.pvD.1", sizeof(TempVar) ); 2405 tv->name = name; 2406 tv->level = global ? 0 : parser->sp; 2407 tv->typeR = typeR; 2408 tv->gexpr = gexpr; 2409 tv->fbGX = fbGX; 2410 tv->fndn_ix= fndn_ix; 2411 tv->fLine = lineNo; 2412 tv->dioff = posn; 2413 tv->absOri = abs_ori; 2414 2415 /* See explanation on definition of type TempVar for the 2416 reason for this elaboration. */ 2417 tv->nRanges = nRanges; 2418 tv->rngOneMin = 0; 2419 tv->rngOneMax = 0; 2420 tv->rngMany = NULL; 2421 if (nRanges == 1) { 2422 AddrRange* range = VG_(indexXA)(xa, 0); 2423 tv->rngOneMin = range->aMin; 2424 tv->rngOneMax = range->aMax; 2425 } 2426 else if (nRanges > 1) { 2427 /* See if we already have a range list which is 2428 structurally identical. If so, use that; if not, clone 2429 this one, and add it to our collection. */ 2430 UWord keyW, valW; 2431 if (VG_(lookupFM)( rangestree, &keyW, &valW, (UWord)xa )) { 2432 XArray* old = (XArray*)keyW; 2433 vg_assert(valW == 0); 2434 vg_assert(old != xa); 2435 tv->rngMany = old; 2436 } else { 2437 XArray* cloned = VG_(cloneXA)( "di.readdwarf3.pvD.2", xa ); 2438 tv->rngMany = cloned; 2439 VG_(addToFM)( rangestree, (UWord)cloned, 0 ); 2440 } 2441 } 2442 2443 VG_(addToXA)( tempvars, &tv ); 2444 2445 TRACE_D3(" Recording this variable, with %ld PC range(s)\n", 2446 VG_(sizeXA)(xa) ); 2447 /* collect stats on how effective the ->ranges special 2448 casing is */ 2449 if (0) { 2450 static Int ntot=0, ngt=0; 2451 ntot++; 2452 if (tv->rngMany) ngt++; 2453 if (0 == (ntot % 100000)) 2454 VG_(printf)("XXXX %d tot, %d cloned\n", ntot, ngt); 2455 } 2456 2457 } 2458 2459 /* Here are some other weird cases seen in the wild: 2460 2461 We have a variable with a name and a type, but no 2462 location. I guess that's a sign that it has been 2463 optimised away. Ignore it. Here's an example: 2464 2465 static Int lc_compar(void* n1, void* n2) { 2466 MC_Chunk* mc1 = *(MC_Chunk**)n1; 2467 MC_Chunk* mc2 = *(MC_Chunk**)n2; 2468 return (mc1->data < mc2->data ? -1 : 1); 2469 } 2470 2471 Both mc1 and mc2 are like this 2472 <2><5bc>: Abbrev Number: 21 (DW_TAG_variable) 2473 DW_AT_name : mc1 2474 DW_AT_decl_file : 1 2475 DW_AT_decl_line : 216 2476 DW_AT_type : <5d3> 2477 2478 whereas n1 and n2 do have locations specified. 2479 2480 --------------------------------------------- 2481 2482 We see a DW_TAG_formal_parameter with a type, but 2483 no name and no location. It's probably part of a function type 2484 construction, thusly, hence ignore it: 2485 <1><2b4>: Abbrev Number: 12 (DW_TAG_subroutine_type) 2486 DW_AT_sibling : <2c9> 2487 DW_AT_prototyped : 1 2488 DW_AT_type : <114> 2489 <2><2be>: Abbrev Number: 13 (DW_TAG_formal_parameter) 2490 DW_AT_type : <13e> 2491 <2><2c3>: Abbrev Number: 13 (DW_TAG_formal_parameter) 2492 DW_AT_type : <133> 2493 2494 --------------------------------------------- 2495 2496 Is very minimal, like this: 2497 <4><81d>: Abbrev Number: 44 (DW_TAG_variable) 2498 DW_AT_abstract_origin: <7ba> 2499 What that signifies I have no idea. Ignore. 2500 2501 ---------------------------------------------- 2502 2503 Is very minimal, like this: 2504 <200f>: DW_TAG_formal_parameter 2505 DW_AT_abstract_ori: <1f4c> 2506 DW_AT_location : 13440 2507 What that signifies I have no idea. Ignore. 2508 It might be significant, though: the variable at least 2509 has a location and so might exist somewhere. 2510 Maybe we should handle this. 2511 2512 --------------------------------------------- 2513 2514 <22407>: DW_TAG_variable 2515 DW_AT_name : (indirect string, offset: 0x6579): 2516 vgPlain_trampoline_stuff_start 2517 DW_AT_decl_file : 29 2518 DW_AT_decl_line : 56 2519 DW_AT_external : 1 2520 DW_AT_declaration : 1 2521 2522 Nameless and typeless variable that has a location? Who 2523 knows. Not me. 2524 <2><3d178>: Abbrev Number: 22 (DW_TAG_variable) 2525 DW_AT_location : 9 byte block: 3 c0 c7 13 38 0 0 0 0 2526 (DW_OP_addr: 3813c7c0) 2527 2528 No, really. Check it out. gcc is quite simply borked. 2529 <3><168cc>: Abbrev Number: 141 (DW_TAG_variable) 2530 // followed by no attributes, and the next DIE is a sibling, 2531 // not a child 2532 */ 2533 } 2534 return; 2535 2536 bad_DIE: 2537 dump_bad_die_and_barf("parse_var_DIE", dtag, posn, level, 2538 c_die, saved_die_c_offset, 2539 abbv, 2540 cc); 2541 /*NOTREACHED*/ 2542 } 2543 2544 typedef 2545 struct { 2546 /* The fndn_ix file name/dirname table. Is a mapping from dwarf 2547 integer index to the index in di->fndnpool. */ 2548 XArray* /* of UInt* */ fndn_ix_Table; 2549 UWord sibling; // sibling of the last read DIE (if it has a sibling). 2550 } 2551 D3InlParser; 2552 2553 /* Return the function name corresponding to absori. 2554 2555 absori is a 'cooked' reference to a DIE, i.e. absori can be either 2556 in cc->escn_debug_info or in cc->escn_debug_info_alt. 2557 get_inlFnName will uncook absori. 2558 2559 The returned value is a (permanent) string in DebugInfo's .strchunks. 2560 2561 LIMITATION: absori must point in the CU of cc. If absori points 2562 in another CU, returns "UnknownInlinedFun". 2563 2564 Here are the problems to retrieve the fun name if absori is in 2565 another CU: the DIE reading code cannot properly extract data from 2566 another CU, as the abbv code retrieved in the other CU cannot be 2567 translated in an abbreviation. Reading data from the alternate debug 2568 info also gives problems as the string reference is also in the alternate 2569 file, but when reading the alt DIE, the string form is a 'local' string, 2570 but cannot be read in the current CU, but must be read in the alt CU. 2571 See bug 338803 comment#3 and attachment for a failed attempt to handle 2572 these problems (failed because with the patch, only one alt abbrev hash 2573 table is kept, while we must handle all abbreviations in all CUs 2574 referenced by an absori (being a reference to an alt CU, or a previous 2575 or following CU). */ 2576 static const HChar* get_inlFnName (Int absori, const CUConst* cc, Bool td3) 2577 { 2578 Cursor c; 2579 const g_abbv *abbv; 2580 ULong atag, abbv_code; 2581 UInt has_children; 2582 UWord posn; 2583 Bool type_flag, alt_flag; 2584 const HChar *ret = NULL; 2585 FormContents cts; 2586 UInt nf_i; 2587 2588 posn = uncook_die( cc, absori, &type_flag, &alt_flag); 2589 if (type_flag) 2590 cc->barf("get_inlFnName: uncooked absori in type debug info"); 2591 2592 /* LIMITATION: check we are in the same CU. 2593 If not, return unknown inlined function name. */ 2594 /* if crossing between alt debug info<>normal info 2595 or posn not in the cu range, 2596 then it is in another CU. */ 2597 if (alt_flag != cc->is_alt_info 2598 || posn < cc->cu_start_offset 2599 || posn >= cc->cu_start_offset + cc->unit_length) { 2600 static Bool reported = False; 2601 if (!reported && VG_(clo_verbosity) > 1) { 2602 VG_(message)(Vg_DebugMsg, 2603 "Warning: cross-CU LIMITATION: some inlined fn names\n" 2604 "might be shown as UnknownInlinedFun\n"); 2605 reported = True; 2606 } 2607 TRACE_D3(" <get_inlFnName><%lx>: cross-CU LIMITATION", posn); 2608 return ML_(addStr)(cc->di, "UnknownInlinedFun", -1); 2609 } 2610 2611 init_Cursor (&c, cc->escn_debug_info, posn, cc->barf, 2612 "Overrun get_inlFnName absori"); 2613 2614 abbv_code = get_ULEB128( &c ); 2615 abbv = get_abbv ( cc, abbv_code); 2616 atag = abbv->atag; 2617 TRACE_D3(" <get_inlFnName><%lx>: Abbrev Number: %llu (%s)\n", 2618 posn, abbv_code, ML_(pp_DW_TAG)( atag ) ); 2619 2620 if (atag == 0) 2621 cc->barf("get_inlFnName: invalid zero tag on DIE"); 2622 2623 has_children = abbv->has_children; 2624 if (has_children != DW_children_no && has_children != DW_children_yes) 2625 cc->barf("get_inlFnName: invalid has_children value"); 2626 2627 if (atag != DW_TAG_subprogram) 2628 cc->barf("get_inlFnName: absori not a subprogram"); 2629 2630 nf_i = 0; 2631 while (True) { 2632 DW_AT attr = (DW_AT) abbv->nf[nf_i].at_name; 2633 DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form; 2634 nf_i++; 2635 if (attr == 0 && form == 0) break; 2636 get_Form_contents( &cts, cc, &c, False/*td3*/, form ); 2637 if (attr == DW_AT_name) { 2638 HChar *fnname; 2639 if (cts.szB >= 0) 2640 cc->barf("get_inlFnName: expecting indirect string"); 2641 fnname = ML_(cur_read_strdup)( cts.u.cur, 2642 "get_inlFnName.1" ); 2643 ret = ML_(addStr)(cc->di, fnname, -1); 2644 ML_(dinfo_free) (fnname); 2645 break; /* Name found, get out of the loop, as this has priority over 2646 DW_AT_specification. */ 2647 } 2648 if (attr == DW_AT_specification) { 2649 UWord cdie; 2650 2651 if (cts.szB == 0) 2652 cc->barf("get_inlFnName: AT specification missing"); 2653 2654 /* The recursive call to get_inlFnName will uncook its arg. 2655 So, we need to cook it here, so as to reference the 2656 correct section (e.g. the alt info). */ 2657 cdie = cook_die_using_form(cc, (UWord)cts.u.val, form); 2658 2659 /* hoping that there is no loop */ 2660 ret = get_inlFnName (cdie, cc, td3); 2661 /* Unclear if having both DW_AT_specification and DW_AT_name is 2662 possible but in any case, we do not break here. 2663 If we find later on a DW_AT_name, it will override the name found 2664 in the DW_AT_specification.*/ 2665 } 2666 } 2667 2668 if (ret) 2669 return ret; 2670 else { 2671 TRACE_D3("AbsOriFnNameNotFound"); 2672 return ML_(addStr)(cc->di, "AbsOriFnNameNotFound", -1); 2673 } 2674 } 2675 2676 /* Returns True if the (possibly) childrens of the current DIE are interesting 2677 to parse. Returns False otherwise. 2678 If the current DIE has a sibling, the non interesting children can 2679 maybe be skipped (if the DIE has a DW_AT_sibling). */ 2680 __attribute__((noinline)) 2681 static Bool parse_inl_DIE ( 2682 /*MOD*/D3InlParser* parser, 2683 DW_TAG dtag, 2684 UWord posn, 2685 Int level, 2686 Cursor* c_die, 2687 const g_abbv *abbv, 2688 CUConst* cc, 2689 Bool td3 2690 ) 2691 { 2692 FormContents cts; 2693 UInt nf_i; 2694 2695 UWord saved_die_c_offset = get_position_of_Cursor( c_die ); 2696 2697 /* Get info about DW_TAG_compile_unit and DW_TAG_partial_unit 'which 2698 in theory could also contain inlined fn calls). */ 2699 if (dtag == DW_TAG_compile_unit || dtag == DW_TAG_partial_unit) { 2700 Bool have_lo = False; 2701 Addr ip_lo = 0; 2702 const HChar *compdir = NULL; 2703 2704 nf_i = 0; 2705 while (True) { 2706 DW_AT attr = (DW_AT) abbv->nf[nf_i].at_name; 2707 DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form; 2708 nf_i++; 2709 if (attr == 0 && form == 0) break; 2710 get_Form_contents( &cts, cc, c_die, False/*td3*/, form ); 2711 if (attr == DW_AT_low_pc && cts.szB > 0) { 2712 ip_lo = cts.u.val; 2713 have_lo = True; 2714 } 2715 if (attr == DW_AT_comp_dir) { 2716 if (cts.szB >= 0) 2717 cc->barf("parse_inl_DIE compdir: expecting indirect string"); 2718 HChar *str = ML_(cur_read_strdup)( cts.u.cur, 2719 "parse_inl_DIE.compdir" ); 2720 compdir = ML_(addStr)(cc->di, str, -1); 2721 ML_(dinfo_free) (str); 2722 } 2723 if (attr == DW_AT_stmt_list && cts.szB > 0) { 2724 read_filename_table( parser->fndn_ix_Table, compdir, 2725 cc, cts.u.val, td3 ); 2726 } 2727 if (attr == DW_AT_sibling && cts.szB > 0) { 2728 parser->sibling = cts.u.val; 2729 } 2730 } 2731 if (level == 0) 2732 setup_cu_svma (cc, have_lo, ip_lo, td3); 2733 } 2734 2735 if (dtag == DW_TAG_inlined_subroutine) { 2736 Bool have_lo = False; 2737 Bool have_hi1 = False; 2738 Bool have_range = False; 2739 Bool hiIsRelative = False; 2740 Addr ip_lo = 0; 2741 Addr ip_hi1 = 0; 2742 Addr rangeoff = 0; 2743 UInt caller_fndn_ix = 0; 2744 Int caller_lineno = 0; 2745 Int inlinedfn_abstract_origin = 0; 2746 2747 nf_i = 0; 2748 while (True) { 2749 DW_AT attr = (DW_AT) abbv->nf[nf_i].at_name; 2750 DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form; 2751 nf_i++; 2752 if (attr == 0 && form == 0) break; 2753 get_Form_contents( &cts, cc, c_die, False/*td3*/, form ); 2754 if (attr == DW_AT_call_file && cts.szB > 0) { 2755 Int ftabIx = (Int)cts.u.val; 2756 if (ftabIx >= 1 2757 && ftabIx < VG_(sizeXA)( parser->fndn_ix_Table )) { 2758 caller_fndn_ix = *(UInt*) 2759 VG_(indexXA)( parser->fndn_ix_Table, ftabIx ); 2760 } 2761 if (0) VG_(printf)("XXX caller_fndn_ix = %d %s\n", caller_fndn_ix, 2762 ML_(fndn_ix2filename) (cc->di, caller_fndn_ix)); 2763 } 2764 if (attr == DW_AT_call_line && cts.szB > 0) { 2765 caller_lineno = cts.u.val; 2766 } 2767 2768 if (attr == DW_AT_abstract_origin && cts.szB > 0) { 2769 inlinedfn_abstract_origin 2770 = cook_die_using_form (cc, (UWord)cts.u.val, form); 2771 } 2772 2773 if (attr == DW_AT_low_pc && cts.szB > 0) { 2774 ip_lo = cts.u.val; 2775 have_lo = True; 2776 } 2777 if (attr == DW_AT_high_pc && cts.szB > 0) { 2778 ip_hi1 = cts.u.val; 2779 have_hi1 = True; 2780 if (form != DW_FORM_addr) 2781 hiIsRelative = True; 2782 } 2783 if (attr == DW_AT_ranges && cts.szB > 0) { 2784 rangeoff = cts.u.val; 2785 have_range = True; 2786 } 2787 if (attr == DW_AT_sibling && cts.szB > 0) { 2788 parser->sibling = cts.u.val; 2789 } 2790 } 2791 if (have_lo && have_hi1 && hiIsRelative) 2792 ip_hi1 += ip_lo; 2793 /* Do we have something that looks sane? */ 2794 if (dtag == DW_TAG_inlined_subroutine 2795 && (!have_lo) && (!have_hi1) && (!have_range)) { 2796 /* Seems strange. How can an inlined subroutine have 2797 no code ? */ 2798 goto_bad_DIE; 2799 } else 2800 if (have_lo && have_hi1 && (!have_range)) { 2801 /* This inlined call is just a single address range. */ 2802 if (ip_lo < ip_hi1) { 2803 /* Apply text debug biasing */ 2804 ip_lo += cc->di->text_debug_bias; 2805 ip_hi1 += cc->di->text_debug_bias; 2806 ML_(addInlInfo) (cc->di, 2807 ip_lo, ip_hi1, 2808 get_inlFnName (inlinedfn_abstract_origin, cc, td3), 2809 caller_fndn_ix, 2810 caller_lineno, level); 2811 } 2812 } else if (have_range) { 2813 /* This inlined call is several address ranges. */ 2814 XArray *ranges; 2815 Word j; 2816 const HChar *inlfnname = 2817 get_inlFnName (inlinedfn_abstract_origin, cc, td3); 2818 2819 /* Ranges are biased for the inline info using the same logic 2820 as what is used for biasing ranges for the var info, for which 2821 ranges are read using cc->cu_svma (see parse_var_DIE). 2822 Then text_debug_bias is added when a (non global) var 2823 is recorded (see just before the call to ML_(addVar)) */ 2824 ranges = get_range_list( cc, td3, 2825 rangeoff, cc->cu_svma ); 2826 for (j = 0; j < VG_(sizeXA)( ranges ); j++) { 2827 AddrRange* range = (AddrRange*) VG_(indexXA)( ranges, j ); 2828 ML_(addInlInfo) (cc->di, 2829 range->aMin + cc->di->text_debug_bias, 2830 range->aMax+1 + cc->di->text_debug_bias, 2831 // aMax+1 as range has its last bound included 2832 // while ML_(addInlInfo) expects last bound not 2833 // included. 2834 inlfnname, 2835 caller_fndn_ix, 2836 caller_lineno, level); 2837 } 2838 VG_(deleteXA)( ranges ); 2839 } else 2840 goto_bad_DIE; 2841 } 2842 2843 // Only recursively parse the (possible) children for the DIE which 2844 // might maybe contain a DW_TAG_inlined_subroutine: 2845 return dtag == DW_TAG_lexical_block || dtag == DW_TAG_subprogram 2846 || dtag == DW_TAG_inlined_subroutine 2847 || dtag == DW_TAG_compile_unit || dtag == DW_TAG_partial_unit; 2848 2849 bad_DIE: 2850 dump_bad_die_and_barf("parse_inl_DIE", dtag, posn, level, 2851 c_die, saved_die_c_offset, 2852 abbv, 2853 cc); 2854 /*NOTREACHED*/ 2855 } 2856 2857 2858 /*------------------------------------------------------------*/ 2859 /*--- ---*/ 2860 /*--- Parsing of type-related DIEs ---*/ 2861 /*--- ---*/ 2862 /*------------------------------------------------------------*/ 2863 2864 typedef 2865 struct { 2866 /* What source language? 'A'=Ada83/95, 2867 'C'=C/C++, 2868 'F'=Fortran, 2869 '?'=other 2870 Established once per compilation unit. */ 2871 UChar language; 2872 /* A stack of types which are currently under construction */ 2873 Int sp; /* [sp] is innermost active entry; sp==-1 for empty 2874 stack */ 2875 Int stack_size; 2876 /* Note that the TyEnts in qparentE are temporary copies of the 2877 ones accumulating in the main tyent array. So it is not safe 2878 to free up anything on them when popping them off the stack 2879 (iow, it isn't safe to use TyEnt__make_EMPTY on them). Just 2880 memset them to zero when done. */ 2881 TyEnt *qparentE; /* parent TyEnts */ 2882 Int *qlevel; 2883 } 2884 D3TypeParser; 2885 2886 /* Completely initialise a type parser object */ 2887 static void 2888 type_parser_init ( D3TypeParser *parser ) 2889 { 2890 parser->sp = -1; 2891 parser->language = '?'; 2892 parser->stack_size = 0; 2893 parser->qparentE = NULL; 2894 parser->qlevel = NULL; 2895 } 2896 2897 /* Release any memory hanging off a type parser object */ 2898 static void 2899 type_parser_release ( D3TypeParser *parser ) 2900 { 2901 ML_(dinfo_free)( parser->qparentE ); 2902 ML_(dinfo_free)( parser->qlevel ); 2903 } 2904 2905 static void typestack_show ( const D3TypeParser* parser, const HChar* str ) 2906 { 2907 Word i; 2908 VG_(printf)(" typestack (%s) {\n", str); 2909 for (i = 0; i <= parser->sp; i++) { 2910 VG_(printf)(" [%ld] (level %d): ", i, parser->qlevel[i]); 2911 ML_(pp_TyEnt)( &parser->qparentE[i] ); 2912 VG_(printf)("\n"); 2913 } 2914 VG_(printf)(" }\n"); 2915 } 2916 2917 /* Remove from the stack, all entries with .level > 'level' */ 2918 static 2919 void typestack_preen ( D3TypeParser* parser, Bool td3, Int level ) 2920 { 2921 Bool changed = False; 2922 vg_assert(parser->sp < parser->stack_size); 2923 while (True) { 2924 vg_assert(parser->sp >= -1); 2925 if (parser->sp == -1) break; 2926 if (parser->qlevel[parser->sp] <= level) break; 2927 if (0) 2928 TRACE_D3("BBBBAAAA typestack_pop [newsp=%d]\n", parser->sp-1); 2929 vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp])); 2930 parser->sp--; 2931 changed = True; 2932 } 2933 if (changed && td3) 2934 typestack_show( parser, "after preen" ); 2935 } 2936 2937 static Bool typestack_is_empty ( const D3TypeParser* parser ) 2938 { 2939 vg_assert(parser->sp >= -1 && parser->sp < parser->stack_size); 2940 return parser->sp == -1; 2941 } 2942 2943 static void typestack_push ( const CUConst* cc, 2944 D3TypeParser* parser, 2945 Bool td3, 2946 const TyEnt* parentE, Int level ) 2947 { 2948 if (0) 2949 TRACE_D3("BBBBAAAA typestack_push[newsp=%d]: %d %05lx\n", 2950 parser->sp+1, level, parentE->cuOff); 2951 2952 /* First we need to zap everything >= 'level', as we are about to 2953 replace any previous entry at 'level', so .. */ 2954 typestack_preen(parser, /*td3*/False, level-1); 2955 2956 vg_assert(parser->sp >= -1); 2957 vg_assert(parser->sp < parser->stack_size); 2958 if (parser->sp == parser->stack_size - 1) { 2959 parser->stack_size += 16; 2960 parser->qparentE = 2961 ML_(dinfo_realloc)("di.readdwarf3.typush.1", parser->qparentE, 2962 parser->stack_size * sizeof parser->qparentE[0]); 2963 parser->qlevel = 2964 ML_(dinfo_realloc)("di.readdwarf3.typush.2", parser->qlevel, 2965 parser->stack_size * sizeof parser->qlevel[0]); 2966 } 2967 if (parser->sp >= 0) 2968 vg_assert(parser->qlevel[parser->sp] < level); 2969 parser->sp++; 2970 vg_assert(parentE); 2971 vg_assert(ML_(TyEnt__is_type)(parentE)); 2972 vg_assert(parentE->cuOff != D3_INVALID_CUOFF); 2973 parser->qparentE[parser->sp] = *parentE; 2974 parser->qlevel[parser->sp] = level; 2975 if (TD3) 2976 typestack_show( parser, "after push" ); 2977 } 2978 2979 /* True if the subrange type being parsed gives the bounds of an array. */ 2980 static Bool subrange_type_denotes_array_bounds ( const D3TypeParser* parser, 2981 DW_TAG dtag ) { 2982 vg_assert(dtag == DW_TAG_subrange_type); 2983 /* For most languages, a subrange_type dtag always gives the 2984 bounds of an array. 2985 For Ada, there are additional conditions as a subrange_type 2986 is also used for other purposes. */ 2987 if (parser->language != 'A') 2988 /* not Ada, so it definitely denotes an array bound. */ 2989 return True; 2990 else 2991 /* Extra constraints for Ada: it only denotes an array bound if .. */ 2992 return (! typestack_is_empty(parser) 2993 && parser->qparentE[parser->sp].tag == Te_TyArray); 2994 } 2995 2996 /* Parse a type-related DIE. 'parser' holds the current parser state. 2997 'admin' is where the completed types are dumped. 'dtag' is the tag 2998 for this DIE. 'c_die' points to the start of the data fields (FORM 2999 stuff) for the DIE. abbv is the parsed abbreviation which describe 3000 the DIE. 3001 3002 We may find the DIE uninteresting, in which case we should ignore 3003 it. 3004 3005 What happens: the DIE is examined. If uninteresting, it is ignored. 3006 Otherwise, the DIE gives rise to two things: 3007 3008 (1) the offset of this DIE in the CU -- the cuOffset, a UWord 3009 (2) a TyAdmin structure, which holds the type, or related stuff 3010 3011 (2) is added at the end of 'tyadmins', at some index, say 'i'. 3012 3013 A pair (cuOffset, i) is added to 'tydict'. 3014 3015 Hence 'tyadmins' holds the actual type entities, and 'tydict' holds 3016 a mapping from cuOffset to the index of the corresponding entry in 3017 'tyadmin'. 3018 3019 When resolving a cuOffset to a TyAdmin, first look up the cuOffset 3020 in the tydict (by binary search). This gives an index into 3021 tyadmins, and the required entity lives in tyadmins at that index. 3022 */ 3023 __attribute__((noinline)) 3024 static void parse_type_DIE ( /*MOD*/XArray* /* of TyEnt */ tyents, 3025 /*MOD*/D3TypeParser* parser, 3026 DW_TAG dtag, 3027 UWord posn, 3028 Int level, 3029 Cursor* c_die, 3030 const g_abbv *abbv, 3031 const CUConst* cc, 3032 Bool td3 ) 3033 { 3034 FormContents cts; 3035 UInt nf_i; 3036 TyEnt typeE; 3037 TyEnt atomE; 3038 TyEnt fieldE; 3039 TyEnt boundE; 3040 3041 UWord saved_die_c_offset = get_position_of_Cursor( c_die ); 3042 3043 VG_(memset)( &typeE, 0xAA, sizeof(typeE) ); 3044 VG_(memset)( &atomE, 0xAA, sizeof(atomE) ); 3045 VG_(memset)( &fieldE, 0xAA, sizeof(fieldE) ); 3046 VG_(memset)( &boundE, 0xAA, sizeof(boundE) ); 3047 3048 /* If we've returned to a level at or above any previously noted 3049 parent, un-note it, so we don't believe we're still collecting 3050 its children. */ 3051 typestack_preen( parser, td3, level-1 ); 3052 3053 if (dtag == DW_TAG_compile_unit 3054 || dtag == DW_TAG_type_unit 3055 || dtag == DW_TAG_partial_unit) { 3056 /* See if we can find DW_AT_language, since it is important for 3057 establishing array bounds (see DW_TAG_subrange_type below in 3058 this fn) */ 3059 nf_i = 0; 3060 while (True) { 3061 DW_AT attr = (DW_AT) abbv->nf[nf_i].at_name; 3062 DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form; 3063 nf_i++; 3064 if (attr == 0 && form == 0) break; 3065 get_Form_contents( &cts, cc, c_die, False/*td3*/, form ); 3066 if (attr != DW_AT_language) 3067 continue; 3068 if (cts.szB <= 0) 3069 goto_bad_DIE; 3070 switch (cts.u.val) { 3071 case DW_LANG_C89: case DW_LANG_C: 3072 case DW_LANG_C_plus_plus: case DW_LANG_ObjC: 3073 case DW_LANG_ObjC_plus_plus: case DW_LANG_UPC: 3074 case DW_LANG_Upc: case DW_LANG_C99: case DW_LANG_C11: 3075 case DW_LANG_C_plus_plus_11: case DW_LANG_C_plus_plus_14: 3076 parser->language = 'C'; break; 3077 case DW_LANG_Fortran77: case DW_LANG_Fortran90: 3078 case DW_LANG_Fortran95: case DW_LANG_Fortran03: 3079 case DW_LANG_Fortran08: 3080 parser->language = 'F'; break; 3081 case DW_LANG_Ada83: case DW_LANG_Ada95: 3082 parser->language = 'A'; break; 3083 case DW_LANG_Cobol74: 3084 case DW_LANG_Cobol85: case DW_LANG_Pascal83: 3085 case DW_LANG_Modula2: case DW_LANG_Java: 3086 case DW_LANG_PLI: 3087 case DW_LANG_D: case DW_LANG_Python: case DW_LANG_Go: 3088 case DW_LANG_Mips_Assembler: 3089 parser->language = '?'; break; 3090 default: 3091 goto_bad_DIE; 3092 } 3093 } 3094 } 3095 3096 if (dtag == DW_TAG_base_type) { 3097 /* We can pick up a new base type any time. */ 3098 VG_(memset)(&typeE, 0, sizeof(typeE)); 3099 typeE.cuOff = D3_INVALID_CUOFF; 3100 typeE.tag = Te_TyBase; 3101 nf_i = 0; 3102 while (True) { 3103 DW_AT attr = (DW_AT) abbv->nf[nf_i].at_name; 3104 DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form; 3105 nf_i++; 3106 if (attr == 0 && form == 0) break; 3107 get_Form_contents( &cts, cc, c_die, False/*td3*/, form ); 3108 if (attr == DW_AT_name && cts.szB < 0) { 3109 typeE.Te.TyBase.name 3110 = ML_(cur_read_strdup)( cts.u.cur, 3111 "di.readdwarf3.ptD.base_type.1" ); 3112 } 3113 if (attr == DW_AT_byte_size && cts.szB > 0) { 3114 typeE.Te.TyBase.szB = cts.u.val; 3115 } 3116 if (attr == DW_AT_encoding && cts.szB > 0) { 3117 switch (cts.u.val) { 3118 case DW_ATE_unsigned: case DW_ATE_unsigned_char: 3119 case DW_ATE_UTF: /* since DWARF4, e.g. char16_t from C++ */ 3120 case DW_ATE_boolean:/* FIXME - is this correct? */ 3121 case DW_ATE_unsigned_fixed: 3122 typeE.Te.TyBase.enc = 'U'; break; 3123 case DW_ATE_signed: case DW_ATE_signed_char: 3124 case DW_ATE_signed_fixed: 3125 typeE.Te.TyBase.enc = 'S'; break; 3126 case DW_ATE_float: 3127 typeE.Te.TyBase.enc = 'F'; break; 3128 case DW_ATE_complex_float: 3129 typeE.Te.TyBase.enc = 'C'; break; 3130 default: 3131 goto_bad_DIE; 3132 } 3133 } 3134 } 3135 3136 /* Invent a name if it doesn't have one. gcc-4.3 3137 -ftree-vectorize is observed to emit nameless base types. */ 3138 if (!typeE.Te.TyBase.name) 3139 typeE.Te.TyBase.name 3140 = ML_(dinfo_strdup)( "di.readdwarf3.ptD.base_type.2", 3141 "<anon_base_type>" ); 3142 3143 /* Do we have something that looks sane? */ 3144 if (/* must have a name */ 3145 typeE.Te.TyBase.name == NULL 3146 /* and a plausible size. Yes, really 32: "complex long 3147 double" apparently has size=32 */ 3148 || typeE.Te.TyBase.szB < 0 || typeE.Te.TyBase.szB > 32 3149 /* and a plausible encoding */ 3150 || (typeE.Te.TyBase.enc != 'U' 3151 && typeE.Te.TyBase.enc != 'S' 3152 && typeE.Te.TyBase.enc != 'F' 3153 && typeE.Te.TyBase.enc != 'C')) 3154 goto_bad_DIE; 3155 /* Last minute hack: if we see this 3156 <1><515>: DW_TAG_base_type 3157 DW_AT_byte_size : 0 3158 DW_AT_encoding : 5 3159 DW_AT_name : void 3160 convert it into a real Void type. */ 3161 if (typeE.Te.TyBase.szB == 0 3162 && 0 == VG_(strcmp)("void", typeE.Te.TyBase.name)) { 3163 ML_(TyEnt__make_EMPTY)(&typeE); 3164 typeE.tag = Te_TyVoid; 3165 typeE.Te.TyVoid.isFake = False; /* it's a real one! */ 3166 } 3167 3168 goto acquire_Type; 3169 } 3170 3171 /* 3172 * An example of DW_TAG_rvalue_reference_type: 3173 * 3174 * $ readelf --debug-dump /usr/lib/debug/usr/lib/libstdc++.so.6.0.16.debug 3175 * <1><1014>: Abbrev Number: 55 (DW_TAG_rvalue_reference_type) 3176 * <1015> DW_AT_byte_size : 4 3177 * <1016> DW_AT_type : <0xe52> 3178 */ 3179 if (dtag == DW_TAG_pointer_type || dtag == DW_TAG_reference_type 3180 || dtag == DW_TAG_ptr_to_member_type 3181 || dtag == DW_TAG_rvalue_reference_type) { 3182 /* This seems legit for _pointer_type and _reference_type. I 3183 don't know if rolling _ptr_to_member_type in here really is 3184 legit, but it's better than not handling it at all. */ 3185 VG_(memset)(&typeE, 0, sizeof(typeE)); 3186 typeE.cuOff = D3_INVALID_CUOFF; 3187 switch (dtag) { 3188 case DW_TAG_pointer_type: 3189 typeE.tag = Te_TyPtr; 3190 break; 3191 case DW_TAG_reference_type: 3192 typeE.tag = Te_TyRef; 3193 break; 3194 case DW_TAG_ptr_to_member_type: 3195 typeE.tag = Te_TyPtrMbr; 3196 break; 3197 case DW_TAG_rvalue_reference_type: 3198 typeE.tag = Te_TyRvalRef; 3199 break; 3200 default: 3201 vg_assert(False); 3202 } 3203 /* target type defaults to void */ 3204 typeE.Te.TyPorR.typeR = D3_FAKEVOID_CUOFF; 3205 /* These four type kinds don't *have* to specify their size, in 3206 which case we assume it's a machine word. But if they do 3207 specify it, it must be a machine word :-) This probably 3208 assumes that the word size of the Dwarf3 we're reading is the 3209 same size as that on the machine. gcc appears to give a size 3210 whereas icc9 doesn't. */ 3211 typeE.Te.TyPorR.szB = sizeof(UWord); 3212 nf_i = 0; 3213 while (True) { 3214 DW_AT attr = (DW_AT) abbv->nf[nf_i].at_name; 3215 DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form; 3216 nf_i++; 3217 if (attr == 0 && form == 0) break; 3218 get_Form_contents( &cts, cc, c_die, False/*td3*/, form ); 3219 if (attr == DW_AT_byte_size && cts.szB > 0) { 3220 typeE.Te.TyPorR.szB = cts.u.val; 3221 } 3222 if (attr == DW_AT_type && cts.szB > 0) { 3223 typeE.Te.TyPorR.typeR 3224 = cook_die_using_form( cc, (UWord)cts.u.val, form ); 3225 } 3226 } 3227 /* Do we have something that looks sane? */ 3228 if (typeE.Te.TyPorR.szB != sizeof(UWord)) 3229 goto_bad_DIE; 3230 else 3231 goto acquire_Type; 3232 } 3233 3234 if (dtag == DW_TAG_enumeration_type) { 3235 /* Create a new Type to hold the results. */ 3236 VG_(memset)(&typeE, 0, sizeof(typeE)); 3237 typeE.cuOff = posn; 3238 typeE.tag = Te_TyEnum; 3239 Bool is_decl = False; 3240 typeE.Te.TyEnum.atomRs 3241 = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.enum_type.1", 3242 ML_(dinfo_free), 3243 sizeof(UWord) ); 3244 nf_i=0; 3245 while (True) { 3246 DW_AT attr = (DW_AT) abbv->nf[nf_i].at_name; 3247 DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form; 3248 nf_i++; 3249 if (attr == 0 && form == 0) break; 3250 get_Form_contents( &cts, cc, c_die, False/*td3*/, form ); 3251 if (attr == DW_AT_name && cts.szB < 0) { 3252 typeE.Te.TyEnum.name 3253 = ML_(cur_read_strdup)( cts.u.cur, 3254 "di.readdwarf3.pTD.enum_type.2" ); 3255 } 3256 if (attr == DW_AT_byte_size && cts.szB > 0) { 3257 typeE.Te.TyEnum.szB = cts.u.val; 3258 } 3259 if (attr == DW_AT_declaration) { 3260 is_decl = True; 3261 } 3262 } 3263 3264 if (!typeE.Te.TyEnum.name) 3265 typeE.Te.TyEnum.name 3266 = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enum_type.3", 3267 "<anon_enum_type>" ); 3268 3269 /* Do we have something that looks sane? */ 3270 if (typeE.Te.TyEnum.szB == 0 3271 /* we must know the size */ 3272 /* but not for Ada, which uses such dummy 3273 enumerations as helper for gdb ada mode. 3274 Also GCC allows incomplete enums as GNU extension. 3275 http://gcc.gnu.org/onlinedocs/gcc/Incomplete-Enums.html 3276 These are marked as DW_AT_declaration and won't have 3277 a size. They can only be used in declaration or as 3278 pointer types. You can't allocate variables or storage 3279 using such an enum type. (Also GCC seems to have a bug 3280 that will put such an enumeration_type into a .debug_types 3281 unit which should only contain complete types.) */ 3282 && (parser->language != 'A' && !is_decl)) { 3283 goto_bad_DIE; 3284 } 3285 3286 /* On't stack! */ 3287 typestack_push( cc, parser, td3, &typeE, level ); 3288 goto acquire_Type; 3289 } 3290 3291 /* gcc (GCC) 4.4.0 20081017 (experimental) occasionally produces 3292 DW_TAG_enumerator with only a DW_AT_name but no 3293 DW_AT_const_value. This is in violation of the Dwarf3 standard, 3294 and appears to be a new "feature" of gcc - versions 4.3.x and 3295 earlier do not appear to do this. So accept DW_TAG_enumerator 3296 which only have a name but no value. An example: 3297 3298 <1><180>: Abbrev Number: 6 (DW_TAG_enumeration_type) 3299 <181> DW_AT_name : (indirect string, offset: 0xda70): 3300 QtMsgType 3301 <185> DW_AT_byte_size : 4 3302 <186> DW_AT_decl_file : 14 3303 <187> DW_AT_decl_line : 1480 3304 <189> DW_AT_sibling : <0x1a7> 3305 <2><18d>: Abbrev Number: 7 (DW_TAG_enumerator) 3306 <18e> DW_AT_name : (indirect string, offset: 0x9e18): 3307 QtDebugMsg 3308 <2><192>: Abbrev Number: 7 (DW_TAG_enumerator) 3309 <193> DW_AT_name : (indirect string, offset: 0x1505f): 3310 QtWarningMsg 3311 <2><197>: Abbrev Number: 7 (DW_TAG_enumerator) 3312 <198> DW_AT_name : (indirect string, offset: 0x16f4a): 3313 QtCriticalMsg 3314 <2><19c>: Abbrev Number: 7 (DW_TAG_enumerator) 3315 <19d> DW_AT_name : (indirect string, offset: 0x156dd): 3316 QtFatalMsg 3317 <2><1a1>: Abbrev Number: 7 (DW_TAG_enumerator) 3318 <1a2> DW_AT_name : (indirect string, offset: 0x13660): 3319 QtSystemMsg 3320 */ 3321 if (dtag == DW_TAG_enumerator) { 3322 VG_(memset)( &atomE, 0, sizeof(atomE) ); 3323 atomE.cuOff = posn; 3324 atomE.tag = Te_Atom; 3325 nf_i = 0; 3326 while (True) { 3327 DW_AT attr = (DW_AT) abbv->nf[nf_i].at_name; 3328 DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form; 3329 nf_i++; 3330 if (attr == 0 && form == 0) break; 3331 get_Form_contents( &cts, cc, c_die, False/*td3*/, form ); 3332 if (attr == DW_AT_name && cts.szB < 0) { 3333 atomE.Te.Atom.name 3334 = ML_(cur_read_strdup)( cts.u.cur, 3335 "di.readdwarf3.pTD.enumerator.1" ); 3336 } 3337 if (attr == DW_AT_const_value && cts.szB > 0) { 3338 atomE.Te.Atom.value = cts.u.val; 3339 atomE.Te.Atom.valueKnown = True; 3340 } 3341 } 3342 /* Do we have something that looks sane? */ 3343 if (atomE.Te.Atom.name == NULL) 3344 goto_bad_DIE; 3345 /* Do we have a plausible parent? */ 3346 if (typestack_is_empty(parser)) goto_bad_DIE; 3347 vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp])); 3348 vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF); 3349 if (level != parser->qlevel[parser->sp]+1) goto_bad_DIE; 3350 if (parser->qparentE[parser->sp].tag != Te_TyEnum) goto_bad_DIE; 3351 /* Record this child in the parent */ 3352 vg_assert(parser->qparentE[parser->sp].Te.TyEnum.atomRs); 3353 VG_(addToXA)( parser->qparentE[parser->sp].Te.TyEnum.atomRs, 3354 &atomE ); 3355 /* And record the child itself */ 3356 goto acquire_Atom; 3357 } 3358 3359 /* Treat DW_TAG_class_type as if it was a DW_TAG_structure_type. I 3360 don't know if this is correct, but it at least makes this reader 3361 usable for gcc-4.3 produced Dwarf3. */ 3362 if (dtag == DW_TAG_structure_type || dtag == DW_TAG_class_type 3363 || dtag == DW_TAG_union_type) { 3364 Bool have_szB = False; 3365 Bool is_decl = False; 3366 Bool is_spec = False; 3367 /* Create a new Type to hold the results. */ 3368 VG_(memset)(&typeE, 0, sizeof(typeE)); 3369 typeE.cuOff = posn; 3370 typeE.tag = Te_TyStOrUn; 3371 typeE.Te.TyStOrUn.name = NULL; 3372 typeE.Te.TyStOrUn.typeR = D3_INVALID_CUOFF; 3373 typeE.Te.TyStOrUn.fieldRs 3374 = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.pTD.struct_type.1", 3375 ML_(dinfo_free), 3376 sizeof(UWord) ); 3377 typeE.Te.TyStOrUn.complete = True; 3378 typeE.Te.TyStOrUn.isStruct = dtag == DW_TAG_structure_type 3379 || dtag == DW_TAG_class_type; 3380 nf_i = 0; 3381 while (True) { 3382 DW_AT attr = (DW_AT) abbv->nf[nf_i].at_name; 3383 DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form; 3384 nf_i++; 3385 if (attr == 0 && form == 0) break; 3386 get_Form_contents( &cts, cc, c_die, False/*td3*/, form ); 3387 if (attr == DW_AT_name && cts.szB < 0) { 3388 typeE.Te.TyStOrUn.name 3389 = ML_(cur_read_strdup)( cts.u.cur, 3390 "di.readdwarf3.ptD.struct_type.2" ); 3391 } 3392 if (attr == DW_AT_byte_size && cts.szB >= 0) { 3393 typeE.Te.TyStOrUn.szB = cts.u.val; 3394 have_szB = True; 3395 } 3396 if (attr == DW_AT_declaration && cts.szB > 0 && cts.u.val > 0) { 3397 is_decl = True; 3398 } 3399 if (attr == DW_AT_specification && cts.szB > 0 && cts.u.val > 0) { 3400 is_spec = True; 3401 } 3402 if (attr == DW_AT_signature && form == DW_FORM_ref_sig8 3403 && cts.szB > 0) { 3404 have_szB = True; 3405 typeE.Te.TyStOrUn.szB = 8; 3406 typeE.Te.TyStOrUn.typeR 3407 = cook_die_using_form( cc, (UWord)cts.u.val, form ); 3408 } 3409 } 3410 /* Do we have something that looks sane? */ 3411 if (is_decl && (!is_spec)) { 3412 /* It's a DW_AT_declaration. We require the name but 3413 nothing else. */ 3414 /* JRS 2012-06-28: following discussion w/ tromey, if the the 3415 type doesn't have name, just make one up, and accept it. 3416 It might be referred to by other DIEs, so ignoring it 3417 doesn't seem like a safe option. */ 3418 if (typeE.Te.TyStOrUn.name == NULL) 3419 typeE.Te.TyStOrUn.name 3420 = ML_(dinfo_strdup)( "di.readdwarf3.ptD.struct_type.3", 3421 "<anon_struct_type>" ); 3422 typeE.Te.TyStOrUn.complete = False; 3423 /* JRS 2009 Aug 10: <possible kludge>? */ 3424 /* Push this tyent on the stack, even though it's incomplete. 3425 It appears that gcc-4.4 on Fedora 11 will sometimes create 3426 DW_TAG_member entries for it, and so we need to have a 3427 plausible parent present in order for that to work. See 3428 #200029 comments 8 and 9. */ 3429 typestack_push( cc, parser, td3, &typeE, level ); 3430 /* </possible kludge> */ 3431 goto acquire_Type; 3432 } 3433 if ((!is_decl) /* && (!is_spec) */) { 3434 /* this is the common, ordinary case */ 3435 /* The name can be present, or not */ 3436 if (!have_szB) { 3437 /* We must know the size. 3438 But in Ada, record with discriminants might have no size. 3439 But in C, VLA in the middle of a struct (gcc extension) 3440 might have no size. 3441 Instead, some GNAT dwarf extensions and/or dwarf entries 3442 allow to calculate the struct size at runtime. 3443 We cannot do that (yet?) so, the temporary kludge is to use 3444 a small size. */ 3445 typeE.Te.TyStOrUn.szB = 1; 3446 } 3447 /* On't stack! */ 3448 typestack_push( cc, parser, td3, &typeE, level ); 3449 goto acquire_Type; 3450 } 3451 else { 3452 /* don't know how to handle any other variants just now */ 3453 goto_bad_DIE; 3454 } 3455 } 3456 3457 if (dtag == DW_TAG_member) { 3458 /* Acquire member entries for both DW_TAG_structure_type and 3459 DW_TAG_union_type. They differ minorly, in that struct 3460 members must have a DW_AT_data_member_location expression 3461 whereas union members must not. */ 3462 Bool parent_is_struct; 3463 VG_(memset)( &fieldE, 0, sizeof(fieldE) ); 3464 fieldE.cuOff = posn; 3465 fieldE.tag = Te_Field; 3466 fieldE.Te.Field.typeR = D3_INVALID_CUOFF; 3467 nf_i = 0; 3468 while (True) { 3469 DW_AT attr = (DW_AT) abbv->nf[nf_i].at_name; 3470 DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form; 3471 nf_i++; 3472 if (attr == 0 && form == 0) break; 3473 get_Form_contents( &cts, cc, c_die, False/*td3*/, form ); 3474 if (attr == DW_AT_name && cts.szB < 0) { 3475 fieldE.Te.Field.name 3476 = ML_(cur_read_strdup)( cts.u.cur, 3477 "di.readdwarf3.ptD.member.1" ); 3478 } 3479 if (attr == DW_AT_type && cts.szB > 0) { 3480 fieldE.Te.Field.typeR 3481 = cook_die_using_form( cc, (UWord)cts.u.val, form ); 3482 } 3483 /* There are 2 different cases for DW_AT_data_member_location. 3484 If it is a constant class attribute, it contains byte offset 3485 from the beginning of the containing entity. 3486 Otherwise it is a location expression. */ 3487 if (attr == DW_AT_data_member_location && cts.szB > 0) { 3488 fieldE.Te.Field.nLoc = -1; 3489 fieldE.Te.Field.pos.offset = cts.u.val; 3490 } 3491 if (attr == DW_AT_data_member_location && cts.szB <= 0) { 3492 fieldE.Te.Field.nLoc = (UWord)(-cts.szB); 3493 fieldE.Te.Field.pos.loc 3494 = ML_(cur_read_memdup)( cts.u.cur, 3495 (SizeT)fieldE.Te.Field.nLoc, 3496 "di.readdwarf3.ptD.member.2" ); 3497 } 3498 } 3499 /* Do we have a plausible parent? */ 3500 if (typestack_is_empty(parser)) goto_bad_DIE; 3501 vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp])); 3502 vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF); 3503 if (level != parser->qlevel[parser->sp]+1) goto_bad_DIE; 3504 if (parser->qparentE[parser->sp].tag != Te_TyStOrUn) goto_bad_DIE; 3505 /* Do we have something that looks sane? If this a member of a 3506 struct, we must have a location expression; but if a member 3507 of a union that is irrelevant (D3 spec sec 5.6.6). We ought 3508 to reject in the latter case, but some compilers have been 3509 observed to emit constant-zero expressions. So just ignore 3510 them. */ 3511 parent_is_struct 3512 = parser->qparentE[parser->sp].Te.TyStOrUn.isStruct; 3513 if (!fieldE.Te.Field.name) 3514 fieldE.Te.Field.name 3515 = ML_(dinfo_strdup)( "di.readdwarf3.ptD.member.3", 3516 "<anon_field>" ); 3517 if (fieldE.Te.Field.typeR == D3_INVALID_CUOFF) 3518 goto_bad_DIE; 3519 if (fieldE.Te.Field.nLoc) { 3520 if (!parent_is_struct) { 3521 /* If this is a union type, pretend we haven't seen the data 3522 member location expression, as it is by definition 3523 redundant (it must be zero). */ 3524 if (fieldE.Te.Field.nLoc > 0) 3525 ML_(dinfo_free)(fieldE.Te.Field.pos.loc); 3526 fieldE.Te.Field.pos.loc = NULL; 3527 fieldE.Te.Field.nLoc = 0; 3528 } 3529 /* Record this child in the parent */ 3530 fieldE.Te.Field.isStruct = parent_is_struct; 3531 vg_assert(parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs); 3532 VG_(addToXA)( parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs, 3533 &posn ); 3534 /* And record the child itself */ 3535 goto acquire_Field; 3536 } else { 3537 /* Member with no location - this can happen with static 3538 const members in C++ code which are compile time constants 3539 that do no exist in the class. They're not of any interest 3540 to us so we ignore them. */ 3541 ML_(TyEnt__make_EMPTY)(&fieldE); 3542 } 3543 } 3544 3545 if (dtag == DW_TAG_array_type) { 3546 VG_(memset)(&typeE, 0, sizeof(typeE)); 3547 typeE.cuOff = posn; 3548 typeE.tag = Te_TyArray; 3549 typeE.Te.TyArray.typeR = D3_INVALID_CUOFF; 3550 typeE.Te.TyArray.boundRs 3551 = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.array_type.1", 3552 ML_(dinfo_free), 3553 sizeof(UWord) ); 3554 nf_i = 0; 3555 while (True) { 3556 DW_AT attr = (DW_AT) abbv->nf[nf_i].at_name; 3557 DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form; 3558 nf_i++; 3559 if (attr == 0 && form == 0) break; 3560 get_Form_contents( &cts, cc, c_die, False/*td3*/, form ); 3561 if (attr == DW_AT_type && cts.szB > 0) { 3562 typeE.Te.TyArray.typeR 3563 = cook_die_using_form( cc, (UWord)cts.u.val, form ); 3564 } 3565 } 3566 if (typeE.Te.TyArray.typeR == D3_INVALID_CUOFF) 3567 goto_bad_DIE; 3568 /* On't stack! */ 3569 typestack_push( cc, parser, td3, &typeE, level ); 3570 goto acquire_Type; 3571 } 3572 3573 /* this is a subrange type defining the bounds of an array. */ 3574 if (dtag == DW_TAG_subrange_type 3575 && subrange_type_denotes_array_bounds(parser, dtag)) { 3576 Bool have_lower = False; 3577 Bool have_upper = False; 3578 Bool have_count = False; 3579 Long lower = 0; 3580 Long upper = 0; 3581 3582 switch (parser->language) { 3583 case 'C': have_lower = True; lower = 0; break; 3584 case 'F': have_lower = True; lower = 1; break; 3585 case '?': have_lower = False; break; 3586 case 'A': have_lower = False; break; 3587 default: vg_assert(0); /* assured us by handling of 3588 DW_TAG_compile_unit in this fn */ 3589 } 3590 3591 VG_(memset)( &boundE, 0, sizeof(boundE) ); 3592 boundE.cuOff = D3_INVALID_CUOFF; 3593 boundE.tag = Te_Bound; 3594 nf_i = 0; 3595 while (True) { 3596 DW_AT attr = (DW_AT) abbv->nf[nf_i].at_name; 3597 DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form; 3598 nf_i++; 3599 if (attr == 0 && form == 0) break; 3600 get_Form_contents( &cts, cc, c_die, False/*td3*/, form ); 3601 if (attr == DW_AT_lower_bound && cts.szB > 0) { 3602 lower = (Long)cts.u.val; 3603 have_lower = True; 3604 } 3605 if (attr == DW_AT_upper_bound && cts.szB > 0) { 3606 upper = (Long)cts.u.val; 3607 have_upper = True; 3608 } 3609 if (attr == DW_AT_count && cts.szB > 0) { 3610 /*count = (Long)cts.u.val;*/ 3611 have_count = True; 3612 } 3613 } 3614 /* FIXME: potentially skip the rest if no parent present, since 3615 it could be the case that this subrange type is free-standing 3616 (not being used to describe the bounds of a containing array 3617 type) */ 3618 /* Do we have a plausible parent? */ 3619 if (typestack_is_empty(parser)) goto_bad_DIE; 3620 vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp])); 3621 vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF); 3622 if (level != parser->qlevel[parser->sp]+1) goto_bad_DIE; 3623 if (parser->qparentE[parser->sp].tag != Te_TyArray) goto_bad_DIE; 3624 3625 /* Figure out if we have a definite range or not */ 3626 if (have_lower && have_upper && (!have_count)) { 3627 boundE.Te.Bound.knownL = True; 3628 boundE.Te.Bound.knownU = True; 3629 boundE.Te.Bound.boundL = lower; 3630 boundE.Te.Bound.boundU = upper; 3631 } 3632 else if (have_lower && (!have_upper) && (!have_count)) { 3633 boundE.Te.Bound.knownL = True; 3634 boundE.Te.Bound.knownU = False; 3635 boundE.Te.Bound.boundL = lower; 3636 boundE.Te.Bound.boundU = 0; 3637 } 3638 else if ((!have_lower) && have_upper && (!have_count)) { 3639 boundE.Te.Bound.knownL = False; 3640 boundE.Te.Bound.knownU = True; 3641 boundE.Te.Bound.boundL = 0; 3642 boundE.Te.Bound.boundU = upper; 3643 } 3644 else if ((!have_lower) && (!have_upper) && (!have_count)) { 3645 boundE.Te.Bound.knownL = False; 3646 boundE.Te.Bound.knownU = False; 3647 boundE.Te.Bound.boundL = 0; 3648 boundE.Te.Bound.boundU = 0; 3649 } else { 3650 /* FIXME: handle more cases */ 3651 goto_bad_DIE; 3652 } 3653 3654 /* Record this bound in the parent */ 3655 boundE.cuOff = posn; 3656 vg_assert(parser->qparentE[parser->sp].Te.TyArray.boundRs); 3657 VG_(addToXA)( parser->qparentE[parser->sp].Te.TyArray.boundRs, 3658 &boundE.cuOff ); 3659 /* And record the child itself */ 3660 goto acquire_Bound; 3661 } 3662 3663 /* typedef or subrange_type other than array bounds. */ 3664 if (dtag == DW_TAG_typedef 3665 || (dtag == DW_TAG_subrange_type 3666 && !subrange_type_denotes_array_bounds(parser, dtag))) { 3667 /* subrange_type other than array bound is only for Ada. */ 3668 vg_assert (dtag == DW_TAG_typedef || parser->language == 'A'); 3669 /* We can pick up a new typedef/subrange_type any time. */ 3670 VG_(memset)(&typeE, 0, sizeof(typeE)); 3671 typeE.cuOff = D3_INVALID_CUOFF; 3672 typeE.tag = Te_TyTyDef; 3673 typeE.Te.TyTyDef.name = NULL; 3674 typeE.Te.TyTyDef.typeR = D3_INVALID_CUOFF; 3675 nf_i = 0; 3676 while (True) { 3677 DW_AT attr = (DW_AT) abbv->nf[nf_i].at_name; 3678 DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form; 3679 nf_i++; 3680 if (attr == 0 && form == 0) break; 3681 get_Form_contents( &cts, cc, c_die, False/*td3*/, form ); 3682 if (attr == DW_AT_name && cts.szB < 0) { 3683 typeE.Te.TyTyDef.name 3684 = ML_(cur_read_strdup)( cts.u.cur, 3685 "di.readdwarf3.ptD.typedef.1" ); 3686 } 3687 if (attr == DW_AT_type && cts.szB > 0) { 3688 typeE.Te.TyTyDef.typeR 3689 = cook_die_using_form( cc, (UWord)cts.u.val, form ); 3690 } 3691 } 3692 /* Do we have something that looks sane? 3693 gcc gnat Ada generates minimal typedef 3694 such as the below 3695 <6><91cc>: DW_TAG_typedef 3696 DW_AT_abstract_ori: <9066> 3697 g++ for OMP can generate artificial functions that have 3698 parameters that refer to pointers to unnamed typedefs. 3699 See https://bugs.kde.org/show_bug.cgi?id=273475 3700 So we cannot require a name for a DW_TAG_typedef. 3701 */ 3702 goto acquire_Type; 3703 } 3704 3705 if (dtag == DW_TAG_subroutine_type) { 3706 /* function type? just record that one fact and ask no 3707 further questions. */ 3708 VG_(memset)(&typeE, 0, sizeof(typeE)); 3709 typeE.cuOff = D3_INVALID_CUOFF; 3710 typeE.tag = Te_TyFn; 3711 goto acquire_Type; 3712 } 3713 3714 if (dtag == DW_TAG_volatile_type || dtag == DW_TAG_const_type 3715 || dtag == DW_TAG_restrict_type) { 3716 Int have_ty = 0; 3717 VG_(memset)(&typeE, 0, sizeof(typeE)); 3718 typeE.cuOff = D3_INVALID_CUOFF; 3719 typeE.tag = Te_TyQual; 3720 typeE.Te.TyQual.qual 3721 = (dtag == DW_TAG_volatile_type ? 'V' 3722 : (dtag == DW_TAG_const_type ? 'C' : 'R')); 3723 /* target type defaults to 'void' */ 3724 typeE.Te.TyQual.typeR = D3_FAKEVOID_CUOFF; 3725 nf_i = 0; 3726 while (True) { 3727 DW_AT attr = (DW_AT) abbv->nf[nf_i].at_name; 3728 DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form; 3729 nf_i++; 3730 if (attr == 0 && form == 0) break; 3731 get_Form_contents( &cts, cc, c_die, False/*td3*/, form ); 3732 if (attr == DW_AT_type && cts.szB > 0) { 3733 typeE.Te.TyQual.typeR 3734 = cook_die_using_form( cc, (UWord)cts.u.val, form ); 3735 have_ty++; 3736 } 3737 } 3738 /* gcc sometimes generates DW_TAG_const/volatile_type without 3739 DW_AT_type and GDB appears to interpret the type as 'const 3740 void' (resp. 'volatile void'). So just allow it .. */ 3741 if (have_ty == 1 || have_ty == 0) 3742 goto acquire_Type; 3743 else 3744 goto_bad_DIE; 3745 } 3746 3747 /* 3748 * Treat DW_TAG_unspecified_type as type void. An example of DW_TAG_unspecified_type: 3749 * 3750 * $ readelf --debug-dump /usr/lib/debug/usr/lib/libstdc++.so.6.0.16.debug 3751 * <1><10d4>: Abbrev Number: 53 (DW_TAG_unspecified_type) 3752 * <10d5> DW_AT_name : (indirect string, offset: 0xdb7): decltype(nullptr) 3753 */ 3754 if (dtag == DW_TAG_unspecified_type) { 3755 VG_(memset)(&typeE, 0, sizeof(typeE)); 3756 typeE.cuOff = D3_INVALID_CUOFF; 3757 typeE.tag = Te_TyQual; 3758 typeE.Te.TyQual.typeR = D3_FAKEVOID_CUOFF; 3759 goto acquire_Type; 3760 } 3761 3762 /* else ignore this DIE */ 3763 return; 3764 /*NOTREACHED*/ 3765 3766 acquire_Type: 3767 if (0) VG_(printf)("YYYY Acquire Type\n"); 3768 vg_assert(ML_(TyEnt__is_type)( &typeE )); 3769 vg_assert(typeE.cuOff == D3_INVALID_CUOFF || typeE.cuOff == posn); 3770 typeE.cuOff = posn; 3771 VG_(addToXA)( tyents, &typeE ); 3772 return; 3773 /*NOTREACHED*/ 3774 3775 acquire_Atom: 3776 if (0) VG_(printf)("YYYY Acquire Atom\n"); 3777 vg_assert(atomE.tag == Te_Atom); 3778 vg_assert(atomE.cuOff == D3_INVALID_CUOFF || atomE.cuOff == posn); 3779 atomE.cuOff = posn; 3780 VG_(addToXA)( tyents, &atomE ); 3781 return; 3782 /*NOTREACHED*/ 3783 3784 acquire_Field: 3785 /* For union members, Expr should be absent */ 3786 if (0) VG_(printf)("YYYY Acquire Field\n"); 3787 vg_assert(fieldE.tag == Te_Field); 3788 vg_assert(fieldE.Te.Field.nLoc <= 0 || fieldE.Te.Field.pos.loc != NULL); 3789 vg_assert(fieldE.Te.Field.nLoc != 0 || fieldE.Te.Field.pos.loc == NULL); 3790 if (fieldE.Te.Field.isStruct) { 3791 vg_assert(fieldE.Te.Field.nLoc != 0); 3792 } else { 3793 vg_assert(fieldE.Te.Field.nLoc == 0); 3794 } 3795 vg_assert(fieldE.cuOff == D3_INVALID_CUOFF || fieldE.cuOff == posn); 3796 fieldE.cuOff = posn; 3797 VG_(addToXA)( tyents, &fieldE ); 3798 return; 3799 /*NOTREACHED*/ 3800 3801 acquire_Bound: 3802 if (0) VG_(printf)("YYYY Acquire Bound\n"); 3803 vg_assert(boundE.tag == Te_Bound); 3804 vg_assert(boundE.cuOff == D3_INVALID_CUOFF || boundE.cuOff == posn); 3805 boundE.cuOff = posn; 3806 VG_(addToXA)( tyents, &boundE ); 3807 return; 3808 /*NOTREACHED*/ 3809 3810 bad_DIE: 3811 dump_bad_die_and_barf("parse_type_DIE", dtag, posn, level, 3812 c_die, saved_die_c_offset, 3813 abbv, 3814 cc); 3815 /*NOTREACHED*/ 3816 } 3817 3818 3819 /*------------------------------------------------------------*/ 3820 /*--- ---*/ 3821 /*--- Compression of type DIE information ---*/ 3822 /*--- ---*/ 3823 /*------------------------------------------------------------*/ 3824 3825 static UWord chase_cuOff ( Bool* changed, 3826 const XArray* /* of TyEnt */ ents, 3827 TyEntIndexCache* ents_cache, 3828 UWord cuOff ) 3829 { 3830 TyEnt* ent; 3831 ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache, cuOff ); 3832 3833 if (!ent) { 3834 VG_(printf)("chase_cuOff: no entry for 0x%05lx\n", cuOff); 3835 *changed = False; 3836 return cuOff; 3837 } 3838 3839 vg_assert(ent->tag != Te_EMPTY); 3840 if (ent->tag != Te_INDIR) { 3841 *changed = False; 3842 return cuOff; 3843 } else { 3844 vg_assert(ent->Te.INDIR.indR < cuOff); 3845 *changed = True; 3846 return ent->Te.INDIR.indR; 3847 } 3848 } 3849 3850 static 3851 void chase_cuOffs_in_XArray ( Bool* changed, 3852 const XArray* /* of TyEnt */ ents, 3853 TyEntIndexCache* ents_cache, 3854 /*MOD*/XArray* /* of UWord */ cuOffs ) 3855 { 3856 Bool b2 = False; 3857 Word i, n = VG_(sizeXA)( cuOffs ); 3858 for (i = 0; i < n; i++) { 3859 Bool b = False; 3860 UWord* p = VG_(indexXA)( cuOffs, i ); 3861 *p = chase_cuOff( &b, ents, ents_cache, *p ); 3862 if (b) 3863 b2 = True; 3864 } 3865 *changed = b2; 3866 } 3867 3868 static Bool TyEnt__subst_R_fields ( const XArray* /* of TyEnt */ ents, 3869 TyEntIndexCache* ents_cache, 3870 /*MOD*/TyEnt* te ) 3871 { 3872 Bool b, changed = False; 3873 switch (te->tag) { 3874 case Te_EMPTY: 3875 break; 3876 case Te_INDIR: 3877 te->Te.INDIR.indR 3878 = chase_cuOff( &b, ents, ents_cache, te->Te.INDIR.indR ); 3879 if (b) changed = True; 3880 break; 3881 case Te_UNKNOWN: 3882 break; 3883 case Te_Atom: 3884 break; 3885 case Te_Field: 3886 te->Te.Field.typeR 3887 = chase_cuOff( &b, ents, ents_cache, te->Te.Field.typeR ); 3888 if (b) changed = True; 3889 break; 3890 case Te_Bound: 3891 break; 3892 case Te_TyBase: 3893 break; 3894 case Te_TyPtr: 3895 case Te_TyRef: 3896 case Te_TyPtrMbr: 3897 case Te_TyRvalRef: 3898 te->Te.TyPorR.typeR 3899 = chase_cuOff( &b, ents, ents_cache, te->Te.TyPorR.typeR ); 3900 if (b) changed = True; 3901 break; 3902 case Te_TyTyDef: 3903 te->Te.TyTyDef.typeR 3904 = chase_cuOff( &b, ents, ents_cache, te->Te.TyTyDef.typeR ); 3905 if (b) changed = True; 3906 break; 3907 case Te_TyStOrUn: 3908 chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyStOrUn.fieldRs ); 3909 if (b) changed = True; 3910 break; 3911 case Te_TyEnum: 3912 chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyEnum.atomRs ); 3913 if (b) changed = True; 3914 break; 3915 case Te_TyArray: 3916 te->Te.TyArray.typeR 3917 = chase_cuOff( &b, ents, ents_cache, te->Te.TyArray.typeR ); 3918 if (b) changed = True; 3919 chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyArray.boundRs ); 3920 if (b) changed = True; 3921 break; 3922 case Te_TyFn: 3923 break; 3924 case Te_TyQual: 3925 te->Te.TyQual.typeR 3926 = chase_cuOff( &b, ents, ents_cache, te->Te.TyQual.typeR ); 3927 if (b) changed = True; 3928 break; 3929 case Te_TyVoid: 3930 break; 3931 default: 3932 ML_(pp_TyEnt)(te); 3933 vg_assert(0); 3934 } 3935 return changed; 3936 } 3937 3938 /* Make a pass over 'ents'. For each tyent, inspect the target of any 3939 'R' or 'Rs' fields (those which refer to other tyents), and replace 3940 any which point to INDIR nodes with the target of the indirection 3941 (which should not itself be an indirection). In summary, this 3942 routine shorts out all references to indirection nodes. */ 3943 static 3944 Word dedup_types_substitution_pass ( /*MOD*/XArray* /* of TyEnt */ ents, 3945 TyEntIndexCache* ents_cache ) 3946 { 3947 Word i, n, nChanged = 0; 3948 Bool b; 3949 n = VG_(sizeXA)( ents ); 3950 for (i = 0; i < n; i++) { 3951 TyEnt* ent = VG_(indexXA)( ents, i ); 3952 vg_assert(ent->tag != Te_EMPTY); 3953 /* We have to substitute everything, even indirections, so as to 3954 ensure that chains of indirections don't build up. */ 3955 b = TyEnt__subst_R_fields( ents, ents_cache, ent ); 3956 if (b) 3957 nChanged++; 3958 } 3959 3960 return nChanged; 3961 } 3962 3963 3964 /* Make a pass over 'ents', building a dictionary of TyEnts as we go. 3965 Look up each new tyent in the dictionary in turn. If it is already 3966 in the dictionary, replace this tyent with an indirection to the 3967 existing one, and delete any malloc'd stuff hanging off this one. 3968 In summary, this routine commons up all tyents that are identical 3969 as defined by TyEnt__cmp_by_all_except_cuOff. */ 3970 static 3971 Word dedup_types_commoning_pass ( /*MOD*/XArray* /* of TyEnt */ ents ) 3972 { 3973 Word n, i, nDeleted; 3974 WordFM* dict; /* TyEnt* -> void */ 3975 TyEnt* ent; 3976 UWord keyW, valW; 3977 3978 dict = VG_(newFM)( 3979 ML_(dinfo_zalloc), "di.readdwarf3.dtcp.1", 3980 ML_(dinfo_free), 3981 (Word(*)(UWord,UWord)) ML_(TyEnt__cmp_by_all_except_cuOff) 3982 ); 3983 3984 nDeleted = 0; 3985 n = VG_(sizeXA)( ents ); 3986 for (i = 0; i < n; i++) { 3987 ent = VG_(indexXA)( ents, i ); 3988 vg_assert(ent->tag != Te_EMPTY); 3989 3990 /* Ignore indirections, although check that they are 3991 not forming a cycle. */ 3992 if (ent->tag == Te_INDIR) { 3993 vg_assert(ent->Te.INDIR.indR < ent->cuOff); 3994 continue; 3995 } 3996 3997 keyW = valW = 0; 3998 if (VG_(lookupFM)( dict, &keyW, &valW, (UWord)ent )) { 3999 /* it's already in the dictionary. */ 4000 TyEnt* old = (TyEnt*)keyW; 4001 vg_assert(valW == 0); 4002 vg_assert(old != ent); 4003 vg_assert(old->tag != Te_INDIR); 4004 /* since we are traversing the array in increasing order of 4005 cuOff: */ 4006 vg_assert(old->cuOff < ent->cuOff); 4007 /* So anyway, dump this entry and replace it with an 4008 indirection to the one in the dictionary. Note that the 4009 assertion above guarantees that we cannot create cycles of 4010 indirections, since we are always creating an indirection 4011 to a tyent with a cuOff lower than this one. */ 4012 ML_(TyEnt__make_EMPTY)( ent ); 4013 ent->tag = Te_INDIR; 4014 ent->Te.INDIR.indR = old->cuOff; 4015 nDeleted++; 4016 } else { 4017 /* not in dictionary; add it and keep going. */ 4018 VG_(addToFM)( dict, (UWord)ent, 0 ); 4019 } 4020 } 4021 4022 VG_(deleteFM)( dict, NULL, NULL ); 4023 4024 return nDeleted; 4025 } 4026 4027 4028 static 4029 void dedup_types ( Bool td3, 4030 /*MOD*/XArray* /* of TyEnt */ ents, 4031 TyEntIndexCache* ents_cache ) 4032 { 4033 Word m, n, i, nDel, nSubst, nThresh; 4034 if (0) td3 = True; 4035 4036 n = VG_(sizeXA)( ents ); 4037 4038 /* If a commoning pass and a substitution pass both make fewer than 4039 this many changes, just stop. It's pointless to burn up CPU 4040 time trying to compress the last 1% or so out of the array. */ 4041 nThresh = n / 200; 4042 4043 /* First we must sort .ents by its .cuOff fields, so we 4044 can index into it. */ 4045 VG_(setCmpFnXA)( ents, (XACmpFn_t) ML_(TyEnt__cmp_by_cuOff_only) ); 4046 VG_(sortXA)( ents ); 4047 4048 /* Now repeatedly do commoning and substitution passes over 4049 the array, until there are no more changes. */ 4050 do { 4051 nDel = dedup_types_commoning_pass ( ents ); 4052 nSubst = dedup_types_substitution_pass ( ents, ents_cache ); 4053 vg_assert(nDel >= 0 && nSubst >= 0); 4054 TRACE_D3(" %ld deletions, %ld substitutions\n", nDel, nSubst); 4055 } while (nDel > nThresh || nSubst > nThresh); 4056 4057 /* Sanity check: all INDIR nodes should point at a non-INDIR thing. 4058 In fact this should be true at the end of every loop iteration 4059 above (a commoning pass followed by a substitution pass), but 4060 checking it on every iteration is excessively expensive. Note, 4061 this loop also computes 'm' for the stats printing below it. */ 4062 m = 0; 4063 n = VG_(sizeXA)( ents ); 4064 for (i = 0; i < n; i++) { 4065 TyEnt *ent, *ind; 4066 ent = VG_(indexXA)( ents, i ); 4067 if (ent->tag != Te_INDIR) continue; 4068 m++; 4069 ind = ML_(TyEnts__index_by_cuOff)( ents, ents_cache, 4070 ent->Te.INDIR.indR ); 4071 vg_assert(ind); 4072 vg_assert(ind->tag != Te_INDIR); 4073 } 4074 4075 TRACE_D3("Overall: %ld before, %ld after\n", n, n-m); 4076 } 4077 4078 4079 /*------------------------------------------------------------*/ 4080 /*--- ---*/ 4081 /*--- Resolution of references to type DIEs ---*/ 4082 /*--- ---*/ 4083 /*------------------------------------------------------------*/ 4084 4085 /* Make a pass through the (temporary) variables array. Examine the 4086 type of each variable, check is it found, and chase any Te_INDIRs. 4087 Postcondition is: each variable has a typeR field that refers to a 4088 valid type in tyents, or a Te_UNKNOWN, and is certainly guaranteed 4089 not to refer to a Te_INDIR. (This is so that we can throw all the 4090 Te_INDIRs away later). */ 4091 4092 __attribute__((noinline)) 4093 static void resolve_variable_types ( 4094 void (*barf)( const HChar* ) __attribute__((noreturn)), 4095 /*R-O*/XArray* /* of TyEnt */ ents, 4096 /*MOD*/TyEntIndexCache* ents_cache, 4097 /*MOD*/XArray* /* of TempVar* */ vars 4098 ) 4099 { 4100 Word i, n; 4101 n = VG_(sizeXA)( vars ); 4102 for (i = 0; i < n; i++) { 4103 TempVar* var = *(TempVar**)VG_(indexXA)( vars, i ); 4104 /* This is the stated type of the variable. But it might be 4105 an indirection, so be careful. */ 4106 TyEnt* ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache, 4107 var->typeR ); 4108 if (ent && ent->tag == Te_INDIR) { 4109 ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache, 4110 ent->Te.INDIR.indR ); 4111 vg_assert(ent); 4112 vg_assert(ent->tag != Te_INDIR); 4113 } 4114 4115 /* Deal first with "normal" cases */ 4116 if (ent && ML_(TyEnt__is_type)(ent)) { 4117 var->typeR = ent->cuOff; 4118 continue; 4119 } 4120 4121 /* If there's no ent, it probably we did not manage to read a 4122 type at the cuOffset which is stated as being this variable's 4123 type. Maybe a deficiency in parse_type_DIE. Complain. */ 4124 if (ent == NULL) { 4125 VG_(printf)("\n: Invalid cuOff = 0x%05lx\n", var->typeR ); 4126 barf("resolve_variable_types: " 4127 "cuOff does not refer to a known type"); 4128 } 4129 vg_assert(ent); 4130 /* If ent has any other tag, something bad happened, along the 4131 lines of var->typeR not referring to a type at all. */ 4132 vg_assert(ent->tag == Te_UNKNOWN); 4133 /* Just accept it; the type will be useless, but at least keep 4134 going. */ 4135 var->typeR = ent->cuOff; 4136 } 4137 } 4138 4139 4140 /*------------------------------------------------------------*/ 4141 /*--- ---*/ 4142 /*--- Parsing of Compilation Units ---*/ 4143 /*--- ---*/ 4144 /*------------------------------------------------------------*/ 4145 4146 static Int cmp_TempVar_by_dioff ( const void* v1, const void* v2 ) { 4147 const TempVar* t1 = *(const TempVar *const *)v1; 4148 const TempVar* t2 = *(const TempVar *const *)v2; 4149 if (t1->dioff < t2->dioff) return -1; 4150 if (t1->dioff > t2->dioff) return 1; 4151 return 0; 4152 } 4153 4154 static void read_DIE ( 4155 /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree, 4156 /*MOD*/XArray* /* of TyEnt */ tyents, 4157 /*MOD*/XArray* /* of TempVar* */ tempvars, 4158 /*MOD*/XArray* /* of GExpr* */ gexprs, 4159 /*MOD*/D3TypeParser* typarser, 4160 /*MOD*/D3VarParser* varparser, 4161 /*MOD*/D3InlParser* inlparser, 4162 Cursor* c, Bool td3, CUConst* cc, Int level 4163 ) 4164 { 4165 const g_abbv *abbv; 4166 ULong atag, abbv_code; 4167 UWord posn; 4168 UInt has_children; 4169 UWord start_die_c_offset; 4170 UWord after_die_c_offset; 4171 // If the DIE we will parse has a sibling and the parser(s) are 4172 // all indicating that parse_children is not necessary, then 4173 // we will skip the children by jumping to the sibling of this DIE 4174 // (if it has a sibling). 4175 UWord sibling = 0; 4176 Bool parse_children = False; 4177 4178 /* --- Deal with this DIE --- */ 4179 posn = cook_die( cc, get_position_of_Cursor( c ) ); 4180 abbv_code = get_ULEB128( c ); 4181 abbv = get_abbv(cc, abbv_code); 4182 atag = abbv->atag; 4183 4184 if (TD3) { 4185 TRACE_D3("\n"); 4186 trace_DIE ((DW_TAG)atag, posn, level, 4187 get_position_of_Cursor( c ), abbv, cc); 4188 } 4189 4190 if (atag == 0) 4191 cc->barf("read_DIE: invalid zero tag on DIE"); 4192 4193 has_children = abbv->has_children; 4194 if (has_children != DW_children_no && has_children != DW_children_yes) 4195 cc->barf("read_DIE: invalid has_children value"); 4196 4197 /* We're set up to look at the fields of this DIE. Hand it off to 4198 any parser(s) that want to see it. Since they will in general 4199 advance the DIE cursor, remember the current settings so that we 4200 can then back up. . */ 4201 start_die_c_offset = get_position_of_Cursor( c ); 4202 after_die_c_offset = 0; // set to c position if a parser has read the DIE. 4203 4204 if (VG_(clo_read_var_info)) { 4205 parse_type_DIE( tyents, 4206 typarser, 4207 (DW_TAG)atag, 4208 posn, 4209 level, 4210 c, /* DIE cursor */ 4211 abbv, /* abbrev */ 4212 cc, 4213 td3 ); 4214 if (get_position_of_Cursor( c ) != start_die_c_offset) { 4215 after_die_c_offset = get_position_of_Cursor( c ); 4216 set_position_of_Cursor( c, start_die_c_offset ); 4217 } 4218 4219 parse_var_DIE( rangestree, 4220 tempvars, 4221 gexprs, 4222 varparser, 4223 (DW_TAG)atag, 4224 posn, 4225 level, 4226 c, /* DIE cursor */ 4227 abbv, /* abbrev */ 4228 cc, 4229 td3 ); 4230 if (get_position_of_Cursor( c ) != start_die_c_offset) { 4231 after_die_c_offset = get_position_of_Cursor( c ); 4232 set_position_of_Cursor( c, start_die_c_offset ); 4233 } 4234 4235 parse_children = True; 4236 // type and var parsers do not have logic to skip childrens and establish 4237 // the value of sibling. 4238 } 4239 4240 if (VG_(clo_read_inline_info)) { 4241 inlparser->sibling = 0; 4242 parse_children = 4243 parse_inl_DIE( inlparser, 4244 (DW_TAG)atag, 4245 posn, 4246 level, 4247 c, /* DIE cursor */ 4248 abbv, /* abbrev */ 4249 cc, 4250 td3 ) 4251 || parse_children; 4252 if (get_position_of_Cursor( c ) != start_die_c_offset) { 4253 after_die_c_offset = get_position_of_Cursor( c ); 4254 // Last parser, no need to reset the cursor to start_die_c_offset. 4255 } 4256 if (sibling == 0) 4257 sibling = inlparser->sibling; 4258 vg_assert (inlparser->sibling == 0 || inlparser->sibling == sibling); 4259 } 4260 4261 if (after_die_c_offset > 0) { 4262 // DIE was read by a parser above, so we know where the DIE ends. 4263 set_position_of_Cursor( c, after_die_c_offset ); 4264 } else { 4265 /* No parser has parsed this DIE. So, we need to skip the DIE, 4266 in order to read the next DIE. 4267 At the same time, establish sibling value if the DIE has one. */ 4268 TRACE_D3(" uninteresting DIE -> skipping ...\n"); 4269 skip_DIE (&sibling, c, abbv, cc); 4270 } 4271 4272 /* --- Now recurse into its children, if any 4273 and the parsing of the children is requested by a parser --- */ 4274 if (has_children == DW_children_yes) { 4275 if (parse_children || sibling == 0) { 4276 if (0) TRACE_D3("BEGIN children of level %d\n", level); 4277 while (True) { 4278 atag = peek_ULEB128( c ); 4279 if (atag == 0) break; 4280 read_DIE( rangestree, tyents, tempvars, gexprs, 4281 typarser, varparser, inlparser, 4282 c, td3, cc, level+1 ); 4283 } 4284 /* Now we need to eat the terminating zero */ 4285 atag = get_ULEB128( c ); 4286 vg_assert(atag == 0); 4287 if (0) TRACE_D3("END children of level %d\n", level); 4288 } else { 4289 // We can skip the childrens, by jumping to the sibling 4290 TRACE_D3(" SKIPPING DIE's children," 4291 "jumping to sibling <%d><%lx>\n", 4292 level, sibling); 4293 set_position_of_Cursor( c, sibling ); 4294 } 4295 } 4296 4297 } 4298 4299 static void trace_debug_loc (const DebugInfo* di, 4300 __attribute__((noreturn)) void (*barf)( const HChar* ), 4301 DiSlice escn_debug_loc) 4302 { 4303 #if 0 4304 /* This doesn't work properly because it assumes all entries are 4305 packed end to end, with no holes. But that doesn't always 4306 appear to be the case, so it loses sync. And the D3 spec 4307 doesn't appear to require a no-hole situation either. */ 4308 /* Display .debug_loc */ 4309 Addr dl_base; 4310 UWord dl_offset; 4311 Cursor loc; /* for showing .debug_loc */ 4312 Bool td3 = di->trace_symtab; 4313 4314 TRACE_SYMTAB("\n"); 4315 TRACE_SYMTAB("\n------ The contents of .debug_loc ------\n"); 4316 TRACE_SYMTAB(" Offset Begin End Expression\n"); 4317 if (ML_(sli_is_valid)(escn_debug_loc)) { 4318 init_Cursor( &loc, escn_debug_loc, 0, barf, 4319 "Overrun whilst reading .debug_loc section(1)" ); 4320 dl_base = 0; 4321 dl_offset = 0; 4322 while (True) { 4323 UWord w1, w2; 4324 UWord len; 4325 if (is_at_end_Cursor( &loc )) 4326 break; 4327 4328 /* Read a (host-)word pair. This is something of a hack since 4329 the word size to read is really dictated by the ELF file; 4330 however, we assume we're reading a file with the same 4331 word-sizeness as the host. Reasonably enough. */ 4332 w1 = get_UWord( &loc ); 4333 w2 = get_UWord( &loc ); 4334 4335 if (w1 == 0 && w2 == 0) { 4336 /* end of list. reset 'base' */ 4337 TRACE_D3(" %08lx <End of list>\n", dl_offset); 4338 dl_base = 0; 4339 dl_offset = get_position_of_Cursor( &loc ); 4340 continue; 4341 } 4342 4343 if (w1 == -1UL) { 4344 /* new value for 'base' */ 4345 TRACE_D3(" %08lx %16lx %08lx (base address)\n", 4346 dl_offset, w1, w2); 4347 dl_base = w2; 4348 continue; 4349 } 4350 4351 /* else a location expression follows */ 4352 TRACE_D3(" %08lx %08lx %08lx ", 4353 dl_offset, w1 + dl_base, w2 + dl_base); 4354 len = (UWord)get_UShort( &loc ); 4355 while (len > 0) { 4356 UChar byte = get_UChar( &loc ); 4357 TRACE_D3("%02x", (UInt)byte); 4358 len--; 4359 } 4360 TRACE_SYMTAB("\n"); 4361 } 4362 } 4363 #endif 4364 } 4365 4366 static void trace_debug_ranges (const DebugInfo* di, 4367 __attribute__((noreturn)) void (*barf)( const HChar* ), 4368 DiSlice escn_debug_ranges) 4369 { 4370 Cursor ranges; /* for showing .debug_ranges */ 4371 Addr dr_base; 4372 UWord dr_offset; 4373 Bool td3 = di->trace_symtab; 4374 4375 /* Display .debug_ranges */ 4376 TRACE_SYMTAB("\n"); 4377 TRACE_SYMTAB("\n------ The contents of .debug_ranges ------\n"); 4378 TRACE_SYMTAB(" Offset Begin End\n"); 4379 if (ML_(sli_is_valid)(escn_debug_ranges)) { 4380 init_Cursor( &ranges, escn_debug_ranges, 0, barf, 4381 "Overrun whilst reading .debug_ranges section(1)" ); 4382 dr_base = 0; 4383 dr_offset = 0; 4384 while (True) { 4385 UWord w1, w2; 4386 4387 if (is_at_end_Cursor( &ranges )) 4388 break; 4389 4390 /* Read a (host-)word pair. This is something of a hack since 4391 the word size to read is really dictated by the ELF file; 4392 however, we assume we're reading a file with the same 4393 word-sizeness as the host. Reasonably enough. */ 4394 w1 = get_UWord( &ranges ); 4395 w2 = get_UWord( &ranges ); 4396 4397 if (w1 == 0 && w2 == 0) { 4398 /* end of list. reset 'base' */ 4399 TRACE_D3(" %08lx <End of list>\n", dr_offset); 4400 dr_base = 0; 4401 dr_offset = get_position_of_Cursor( &ranges ); 4402 continue; 4403 } 4404 4405 if (w1 == -1UL) { 4406 /* new value for 'base' */ 4407 TRACE_D3(" %08lx %16lx %08lx (base address)\n", 4408 dr_offset, w1, w2); 4409 dr_base = w2; 4410 continue; 4411 } 4412 4413 /* else a range [w1+base, w2+base) is denoted */ 4414 TRACE_D3(" %08lx %08lx %08lx\n", 4415 dr_offset, w1 + dr_base, w2 + dr_base); 4416 } 4417 } 4418 } 4419 4420 static void trace_debug_abbrev (const DebugInfo* di, 4421 __attribute__((noreturn)) void (*barf)( const HChar* ), 4422 DiSlice escn_debug_abbv) 4423 { 4424 Cursor abbv; /* for showing .debug_abbrev */ 4425 Bool td3 = di->trace_symtab; 4426 4427 /* Display .debug_abbrev */ 4428 TRACE_SYMTAB("\n"); 4429 TRACE_SYMTAB("\n------ The contents of .debug_abbrev ------\n"); 4430 if (ML_(sli_is_valid)(escn_debug_abbv)) { 4431 init_Cursor( &abbv, escn_debug_abbv, 0, barf, 4432 "Overrun whilst reading .debug_abbrev section" ); 4433 while (True) { 4434 if (is_at_end_Cursor( &abbv )) 4435 break; 4436 /* Read one abbreviation table */ 4437 TRACE_D3(" Number TAG\n"); 4438 while (True) { 4439 ULong atag; 4440 UInt has_children; 4441 ULong acode = get_ULEB128( &abbv ); 4442 if (acode == 0) break; /* end of the table */ 4443 atag = get_ULEB128( &abbv ); 4444 has_children = get_UChar( &abbv ); 4445 TRACE_D3(" %llu %s [%s]\n", 4446 acode, ML_(pp_DW_TAG)(atag), 4447 ML_(pp_DW_children)(has_children)); 4448 while (True) { 4449 ULong at_name = get_ULEB128( &abbv ); 4450 ULong at_form = get_ULEB128( &abbv ); 4451 if (at_name == 0 && at_form == 0) break; 4452 TRACE_D3(" %-18s %s\n", 4453 ML_(pp_DW_AT)(at_name), ML_(pp_DW_FORM)(at_form)); 4454 } 4455 } 4456 } 4457 } 4458 } 4459 4460 static 4461 void new_dwarf3_reader_wrk ( 4462 DebugInfo* di, 4463 __attribute__((noreturn)) void (*barf)( const HChar* ), 4464 DiSlice escn_debug_info, DiSlice escn_debug_types, 4465 DiSlice escn_debug_abbv, DiSlice escn_debug_line, 4466 DiSlice escn_debug_str, DiSlice escn_debug_ranges, 4467 DiSlice escn_debug_loc, DiSlice escn_debug_info_alt, 4468 DiSlice escn_debug_abbv_alt, DiSlice escn_debug_line_alt, 4469 DiSlice escn_debug_str_alt 4470 ) 4471 { 4472 XArray* /* of TyEnt */ tyents = NULL; 4473 XArray* /* of TyEnt */ tyents_to_keep = NULL; 4474 XArray* /* of GExpr* */ gexprs = NULL; 4475 XArray* /* of TempVar* */ tempvars = NULL; 4476 WordFM* /* of (XArray* of AddrRange, void) */ rangestree = NULL; 4477 TyEntIndexCache* tyents_cache = NULL; 4478 TyEntIndexCache* tyents_to_keep_cache = NULL; 4479 TempVar *varp, *varp2; 4480 GExpr* gexpr; 4481 Cursor info; /* primary cursor for parsing .debug_info */ 4482 D3TypeParser typarser; 4483 D3VarParser varparser; 4484 D3InlParser inlparser; 4485 Word i, j, n; 4486 Bool td3 = di->trace_symtab; 4487 XArray* /* of TempVar* */ dioff_lookup_tab; 4488 Int pass; 4489 VgHashTable *signature_types = NULL; 4490 4491 /* Display/trace various information, if requested. */ 4492 if (TD3) { 4493 trace_debug_loc (di, barf, escn_debug_loc); 4494 trace_debug_ranges (di, barf, escn_debug_ranges); 4495 trace_debug_abbrev (di, barf, escn_debug_abbv); 4496 TRACE_SYMTAB("\n"); 4497 } 4498 4499 /* Zero out all parsers. Parsers will really be initialised 4500 according to VG_(clo_read_*_info). */ 4501 VG_(memset)( &inlparser, 0, sizeof(inlparser) ); 4502 4503 if (VG_(clo_read_var_info)) { 4504 /* We'll park the harvested type information in here. Also create 4505 a fake "void" entry with offset D3_FAKEVOID_CUOFF, so we always 4506 have at least one type entry to refer to. D3_FAKEVOID_CUOFF is 4507 huge and presumably will not occur in any valid DWARF3 file -- 4508 it would need to have a .debug_info section 4GB long for that to 4509 happen. These type entries end up in the DebugInfo. */ 4510 tyents = VG_(newXA)( ML_(dinfo_zalloc), 4511 "di.readdwarf3.ndrw.1 (TyEnt temp array)", 4512 ML_(dinfo_free), sizeof(TyEnt) ); 4513 { TyEnt tyent; 4514 VG_(memset)(&tyent, 0, sizeof(tyent)); 4515 tyent.tag = Te_TyVoid; 4516 tyent.cuOff = D3_FAKEVOID_CUOFF; 4517 tyent.Te.TyVoid.isFake = True; 4518 VG_(addToXA)( tyents, &tyent ); 4519 } 4520 { TyEnt tyent; 4521 VG_(memset)(&tyent, 0, sizeof(tyent)); 4522 tyent.tag = Te_UNKNOWN; 4523 tyent.cuOff = D3_INVALID_CUOFF; 4524 VG_(addToXA)( tyents, &tyent ); 4525 } 4526 4527 /* This is a tree used to unique-ify the range lists that are 4528 manufactured by parse_var_DIE. References to the keys in the 4529 tree wind up in .rngMany fields in TempVars. We'll need to 4530 delete this tree, and the XArrays attached to it, at the end of 4531 this function. */ 4532 rangestree = VG_(newFM)( ML_(dinfo_zalloc), 4533 "di.readdwarf3.ndrw.2 (rangestree)", 4534 ML_(dinfo_free), 4535 (Word(*)(UWord,UWord))cmp__XArrays_of_AddrRange ); 4536 4537 /* List of variables we're accumulating. These don't end up in the 4538 DebugInfo; instead their contents are handed to ML_(addVar) and 4539 the list elements are then deleted. */ 4540 tempvars = VG_(newXA)( ML_(dinfo_zalloc), 4541 "di.readdwarf3.ndrw.3 (TempVar*s array)", 4542 ML_(dinfo_free), 4543 sizeof(TempVar*) ); 4544 4545 /* List of GExprs we're accumulating. These wind up in the 4546 DebugInfo. */ 4547 gexprs = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.4", 4548 ML_(dinfo_free), sizeof(GExpr*) ); 4549 4550 /* We need a D3TypeParser to keep track of partially constructed 4551 types. It'll be discarded as soon as we've completed the CU, 4552 since the resulting information is tipped in to 'tyents' as it 4553 is generated. */ 4554 type_parser_init(&typarser); 4555 4556 var_parser_init(&varparser); 4557 4558 signature_types = VG_(HT_construct) ("signature_types"); 4559 } 4560 4561 /* Do an initial pass to scan the .debug_types section, if any, and 4562 fill in the signatured types hash table. This lets us handle 4563 mapping from a type signature to a (cooked) DIE offset directly 4564 in get_Form_contents. */ 4565 if (VG_(clo_read_var_info) && ML_(sli_is_valid)(escn_debug_types)) { 4566 init_Cursor( &info, escn_debug_types, 0, barf, 4567 "Overrun whilst reading .debug_types section" ); 4568 TRACE_D3("\n------ Collecting signatures from " 4569 ".debug_types section ------\n"); 4570 4571 while (True) { 4572 UWord cu_start_offset, cu_offset_now; 4573 CUConst cc; 4574 4575 cu_start_offset = get_position_of_Cursor( &info ); 4576 TRACE_D3("\n"); 4577 TRACE_D3(" Compilation Unit @ offset 0x%lx:\n", cu_start_offset); 4578 /* parse_CU_header initialises the CU's abbv hash table. */ 4579 parse_CU_Header( &cc, td3, &info, escn_debug_abbv, True, False ); 4580 4581 /* Needed by cook_die. */ 4582 cc.types_cuOff_bias = escn_debug_info.szB; 4583 4584 record_signatured_type( signature_types, cc.type_signature, 4585 cook_die( &cc, cc.type_offset )); 4586 4587 /* Until proven otherwise we assume we don't need the icc9 4588 workaround in this case; see the DIE-reading loop below 4589 for details. */ 4590 cu_offset_now = (cu_start_offset + cc.unit_length 4591 + (cc.is_dw64 ? 12 : 4)); 4592 4593 if (cu_offset_now >= escn_debug_types.szB) { 4594 clear_CUConst ( &cc); 4595 break; 4596 } 4597 4598 set_position_of_Cursor ( &info, cu_offset_now ); 4599 } 4600 } 4601 4602 /* Perform three DIE-reading passes. The first pass reads DIEs from 4603 alternate .debug_info (if any), the second pass reads DIEs from 4604 .debug_info, and the third pass reads DIEs from .debug_types. 4605 Moving the body of this loop into a separate function would 4606 require a large number of arguments to be passed in, so it is 4607 kept inline instead. */ 4608 for (pass = 0; pass < 3; ++pass) { 4609 ULong section_size; 4610 4611 if (pass == 0) { 4612 if (!ML_(sli_is_valid)(escn_debug_info_alt)) 4613 continue; 4614 /* Now loop over the Compilation Units listed in the alternate 4615 .debug_info section (see D3SPEC sec 7.5) paras 1 and 2. 4616 Each compilation unit contains a Compilation Unit Header 4617 followed by precisely one DW_TAG_compile_unit or 4618 DW_TAG_partial_unit DIE. */ 4619 init_Cursor( &info, escn_debug_info_alt, 0, barf, 4620 "Overrun whilst reading alternate .debug_info section" ); 4621 section_size = escn_debug_info_alt.szB; 4622 4623 TRACE_D3("\n------ Parsing alternate .debug_info section ------\n"); 4624 } else if (pass == 1) { 4625 /* Now loop over the Compilation Units listed in the .debug_info 4626 section (see D3SPEC sec 7.5) paras 1 and 2. Each compilation 4627 unit contains a Compilation Unit Header followed by precisely 4628 one DW_TAG_compile_unit or DW_TAG_partial_unit DIE. */ 4629 init_Cursor( &info, escn_debug_info, 0, barf, 4630 "Overrun whilst reading .debug_info section" ); 4631 section_size = escn_debug_info.szB; 4632 4633 TRACE_D3("\n------ Parsing .debug_info section ------\n"); 4634 } else { 4635 if (!ML_(sli_is_valid)(escn_debug_types)) 4636 continue; 4637 if (!VG_(clo_read_var_info)) 4638 continue; // Types not needed when only reading inline info. 4639 init_Cursor( &info, escn_debug_types, 0, barf, 4640 "Overrun whilst reading .debug_types section" ); 4641 section_size = escn_debug_types.szB; 4642 4643 TRACE_D3("\n------ Parsing .debug_types section ------\n"); 4644 } 4645 4646 while (True) { 4647 ULong cu_start_offset, cu_offset_now; 4648 CUConst cc; 4649 /* It may be that the stated size of this CU is larger than the 4650 amount of stuff actually in it. icc9 seems to generate CUs 4651 thusly. We use these variables to figure out if this is 4652 indeed the case, and if so how many bytes we need to skip to 4653 get to the start of the next CU. Not skipping those bytes 4654 causes us to misidentify the start of the next CU, and it all 4655 goes badly wrong after that (not surprisingly). */ 4656 UWord cu_size_including_IniLen, cu_amount_used; 4657 4658 /* It seems icc9 finishes the DIE info before debug_info_sz 4659 bytes have been used up. So be flexible, and declare the 4660 sequence complete if there is not enough remaining bytes to 4661 hold even the smallest conceivable CU header. (11 bytes I 4662 reckon). */ 4663 /* JRS 23Jan09: I suspect this is no longer necessary now that 4664 the code below contains a 'while (cu_amount_used < 4665 cu_size_including_IniLen ...' style loop, which skips over 4666 any leftover bytes at the end of a CU in the case where the 4667 CU's stated size is larger than its actual size (as 4668 determined by reading all its DIEs). However, for prudence, 4669 I'll leave the following test in place. I can't see that a 4670 CU header can be smaller than 11 bytes, so I don't think 4671 there's any harm possible through the test -- it just adds 4672 robustness. */ 4673 Word avail = get_remaining_length_Cursor( &info ); 4674 if (avail < 11) { 4675 if (avail > 0) 4676 TRACE_D3("new_dwarf3_reader_wrk: warning: " 4677 "%ld unused bytes after end of DIEs\n", avail); 4678 break; 4679 } 4680 4681 if (VG_(clo_read_var_info)) { 4682 /* Check the varparser's stack is in a sane state. */ 4683 vg_assert(varparser.sp == -1); 4684 /* Check the typarser's stack is in a sane state. */ 4685 vg_assert(typarser.sp == -1); 4686 } 4687 4688 cu_start_offset = get_position_of_Cursor( &info ); 4689 TRACE_D3("\n"); 4690 TRACE_D3(" Compilation Unit @ offset 0x%llx:\n", cu_start_offset); 4691 /* parse_CU_header initialises the CU's hashtable of abbvs ht_abbvs */ 4692 if (pass == 0) { 4693 parse_CU_Header( &cc, td3, &info, escn_debug_abbv_alt, 4694 False, True ); 4695 } else { 4696 parse_CU_Header( &cc, td3, &info, escn_debug_abbv, 4697 pass == 2, False ); 4698 } 4699 cc.escn_debug_str = pass == 0 ? escn_debug_str_alt 4700 : escn_debug_str; 4701 cc.escn_debug_ranges = escn_debug_ranges; 4702 cc.escn_debug_loc = escn_debug_loc; 4703 cc.escn_debug_line = pass == 0 ? escn_debug_line_alt 4704 : escn_debug_line; 4705 cc.escn_debug_info = pass == 0 ? escn_debug_info_alt 4706 : escn_debug_info; 4707 cc.escn_debug_types = escn_debug_types; 4708 cc.escn_debug_info_alt = escn_debug_info_alt; 4709 cc.escn_debug_str_alt = escn_debug_str_alt; 4710 cc.types_cuOff_bias = escn_debug_info.szB; 4711 cc.alt_cuOff_bias = escn_debug_info.szB + escn_debug_types.szB; 4712 cc.cu_start_offset = cu_start_offset; 4713 cc.di = di; 4714 /* The CU's svma can be deduced by looking at the AT_low_pc 4715 value in the top level TAG_compile_unit, which is the topmost 4716 DIE. We'll leave it for the 'varparser' to acquire that info 4717 and fill it in -- since it is the only party to want to know 4718 it. */ 4719 cc.cu_svma_known = False; 4720 cc.cu_svma = 0; 4721 4722 if (VG_(clo_read_var_info)) { 4723 cc.signature_types = signature_types; 4724 4725 /* Create a fake outermost-level range covering the entire 4726 address range. So we always have *something* to catch all 4727 variable declarations. */ 4728 varstack_push( &cc, &varparser, td3, 4729 unitary_range_list(0UL, ~0UL), 4730 -1, False/*isFunc*/, NULL/*fbGX*/ ); 4731 4732 /* And set up the fndn_ix_Table. When we come across the top 4733 level DIE for this CU (which is what the next call to 4734 read_DIE should process) we will copy all the file names out 4735 of the .debug_line img area and use this table to look up the 4736 copies when we later see filename numbers in DW_TAG_variables 4737 etc. */ 4738 vg_assert(!varparser.fndn_ix_Table ); 4739 varparser.fndn_ix_Table 4740 = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.5var", 4741 ML_(dinfo_free), 4742 sizeof(UInt) ); 4743 } 4744 4745 if (VG_(clo_read_inline_info)) { 4746 /* fndn_ix_Table for the inlined call parser */ 4747 vg_assert(!inlparser.fndn_ix_Table ); 4748 inlparser.fndn_ix_Table 4749 = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.5inl", 4750 ML_(dinfo_free), 4751 sizeof(UInt) ); 4752 } 4753 4754 /* Now read the one-and-only top-level DIE for this CU. */ 4755 vg_assert(!VG_(clo_read_var_info) || varparser.sp == 0); 4756 read_DIE( rangestree, 4757 tyents, tempvars, gexprs, 4758 &typarser, &varparser, &inlparser, 4759 &info, td3, &cc, 0 ); 4760 4761 cu_offset_now = get_position_of_Cursor( &info ); 4762 4763 if (0) VG_(printf)("Travelled: %llu size %llu\n", 4764 cu_offset_now - cc.cu_start_offset, 4765 cc.unit_length + (cc.is_dw64 ? 12 : 4)); 4766 4767 /* How big the CU claims it is .. */ 4768 cu_size_including_IniLen = cc.unit_length + (cc.is_dw64 ? 12 : 4); 4769 /* .. vs how big we have found it to be */ 4770 cu_amount_used = cu_offset_now - cc.cu_start_offset; 4771 4772 if (1) TRACE_D3("offset now %lld, d-i-size %lld\n", 4773 cu_offset_now, section_size); 4774 if (cu_offset_now > section_size) 4775 barf("toplevel DIEs beyond end of CU"); 4776 4777 /* If the CU is bigger than it claims to be, we've got a serious 4778 problem. */ 4779 if (cu_amount_used > cu_size_including_IniLen) 4780 barf("CU's actual size appears to be larger than it claims it is"); 4781 4782 /* If the CU is smaller than it claims to be, we need to skip some 4783 bytes. Loop updates cu_offset_new and cu_amount_used. */ 4784 while (cu_amount_used < cu_size_including_IniLen 4785 && get_remaining_length_Cursor( &info ) > 0) { 4786 if (0) VG_(printf)("SKIP\n"); 4787 (void)get_UChar( &info ); 4788 cu_offset_now = get_position_of_Cursor( &info ); 4789 cu_amount_used = cu_offset_now - cc.cu_start_offset; 4790 } 4791 4792 if (VG_(clo_read_var_info)) { 4793 /* Preen to level -2. DIEs have level >= 0 so -2 cannot occur 4794 anywhere else at all. Our fake the-entire-address-space 4795 range is at level -1, so preening to -2 should completely 4796 empty the stack out. */ 4797 TRACE_D3("\n"); 4798 varstack_preen( &varparser, td3, -2 ); 4799 /* Similarly, empty the type stack out. */ 4800 typestack_preen( &typarser, td3, -2 ); 4801 } 4802 4803 if (VG_(clo_read_var_info)) { 4804 vg_assert(varparser.fndn_ix_Table ); 4805 VG_(deleteXA)( varparser.fndn_ix_Table ); 4806 varparser.fndn_ix_Table = NULL; 4807 } 4808 if (VG_(clo_read_inline_info)) { 4809 vg_assert(inlparser.fndn_ix_Table ); 4810 VG_(deleteXA)( inlparser.fndn_ix_Table ); 4811 inlparser.fndn_ix_Table = NULL; 4812 } 4813 clear_CUConst(&cc); 4814 4815 if (cu_offset_now == section_size) 4816 break; 4817 /* else keep going */ 4818 } 4819 } 4820 4821 4822 if (VG_(clo_read_var_info)) { 4823 /* From here on we're post-processing the stuff we got 4824 out of the .debug_info section. */ 4825 if (TD3) { 4826 TRACE_D3("\n"); 4827 ML_(pp_TyEnts)(tyents, "Initial type entity (TyEnt) array"); 4828 TRACE_D3("\n"); 4829 TRACE_D3("------ Compressing type entries ------\n"); 4830 } 4831 4832 tyents_cache = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.6", 4833 sizeof(TyEntIndexCache) ); 4834 ML_(TyEntIndexCache__invalidate)( tyents_cache ); 4835 dedup_types( td3, tyents, tyents_cache ); 4836 if (TD3) { 4837 TRACE_D3("\n"); 4838 ML_(pp_TyEnts)(tyents, "After type entity (TyEnt) compression"); 4839 } 4840 4841 TRACE_D3("\n"); 4842 TRACE_D3("------ Resolving the types of variables ------\n" ); 4843 resolve_variable_types( barf, tyents, tyents_cache, tempvars ); 4844 4845 /* Copy all the non-INDIR tyents into a new table. For large 4846 .so's, about 90% of the tyents will by now have been resolved to 4847 INDIRs, and we no longer need them, and so don't need to store 4848 them. */ 4849 tyents_to_keep 4850 = VG_(newXA)( ML_(dinfo_zalloc), 4851 "di.readdwarf3.ndrw.7 (TyEnt to-keep array)", 4852 ML_(dinfo_free), sizeof(TyEnt) ); 4853 n = VG_(sizeXA)( tyents ); 4854 for (i = 0; i < n; i++) { 4855 TyEnt* ent = VG_(indexXA)( tyents, i ); 4856 if (ent->tag != Te_INDIR) 4857 VG_(addToXA)( tyents_to_keep, ent ); 4858 } 4859 4860 VG_(deleteXA)( tyents ); 4861 tyents = NULL; 4862 ML_(dinfo_free)( tyents_cache ); 4863 tyents_cache = NULL; 4864 4865 /* Sort tyents_to_keep so we can lookup in it. A complete (if 4866 minor) waste of time, since tyents itself is sorted, but 4867 necessary since VG_(lookupXA) refuses to cooperate if we 4868 don't. */ 4869 VG_(setCmpFnXA)( tyents_to_keep, (XACmpFn_t) ML_(TyEnt__cmp_by_cuOff_only) ); 4870 VG_(sortXA)( tyents_to_keep ); 4871 4872 /* Enable cacheing on tyents_to_keep */ 4873 tyents_to_keep_cache 4874 = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.8", 4875 sizeof(TyEntIndexCache) ); 4876 ML_(TyEntIndexCache__invalidate)( tyents_to_keep_cache ); 4877 4878 /* And record the tyents in the DebugInfo. We do this before 4879 starting to hand variables to ML_(addVar), since if ML_(addVar) 4880 wants to do debug printing (of the types of said vars) then it 4881 will need the tyents.*/ 4882 vg_assert(!di->admin_tyents); 4883 di->admin_tyents = tyents_to_keep; 4884 4885 /* Bias all the location expressions. */ 4886 TRACE_D3("\n"); 4887 TRACE_D3("------ Biasing the location expressions ------\n" ); 4888 4889 n = VG_(sizeXA)( gexprs ); 4890 for (i = 0; i < n; i++) { 4891 gexpr = *(GExpr**)VG_(indexXA)( gexprs, i ); 4892 bias_GX( gexpr, di ); 4893 } 4894 4895 TRACE_D3("\n"); 4896 TRACE_D3("------ Acquired the following variables: ------\n\n"); 4897 4898 /* Park (pointers to) all the vars in an XArray, so we can look up 4899 abstract origins quickly. The array is sorted (hence, looked-up 4900 by) the .dioff fields. Since the .dioffs should be in strictly 4901 ascending order, there is no need to sort the array after 4902 construction. The ascendingness is however asserted for. */ 4903 dioff_lookup_tab 4904 = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.9", 4905 ML_(dinfo_free), 4906 sizeof(TempVar*) ); 4907 4908 n = VG_(sizeXA)( tempvars ); 4909 Word first_primary_var = 0; 4910 for (first_primary_var = 0; 4911 escn_debug_info_alt.szB/*really?*/ && first_primary_var < n; 4912 first_primary_var++) { 4913 varp = *(TempVar**)VG_(indexXA)( tempvars, first_primary_var ); 4914 if (varp->dioff < escn_debug_info.szB + escn_debug_types.szB) 4915 break; 4916 } 4917 for (i = 0; i < n; i++) { 4918 varp = *(TempVar**)VG_(indexXA)( tempvars, (i + first_primary_var) % n ); 4919 if (i > first_primary_var) { 4920 varp2 = *(TempVar**)VG_(indexXA)( tempvars, 4921 (i + first_primary_var - 1) % n ); 4922 /* why should this hold? Only, I think, because we've 4923 constructed the array by reading .debug_info sequentially, 4924 and so the array .dioff fields should reflect that, and be 4925 strictly ascending. */ 4926 vg_assert(varp2->dioff < varp->dioff); 4927 } 4928 VG_(addToXA)( dioff_lookup_tab, &varp ); 4929 } 4930 VG_(setCmpFnXA)( dioff_lookup_tab, cmp_TempVar_by_dioff ); 4931 VG_(sortXA)( dioff_lookup_tab ); /* POINTLESS; FIXME: rm */ 4932 4933 /* Now visit each var. Collect up as much info as possible for 4934 each var and hand it to ML_(addVar). */ 4935 n = VG_(sizeXA)( tempvars ); 4936 for (j = 0; j < n; j++) { 4937 TyEnt* ent; 4938 varp = *(TempVar**)VG_(indexXA)( tempvars, j ); 4939 4940 /* Possibly show .. */ 4941 if (TD3) { 4942 VG_(printf)("<%lx> addVar: level %d: %s :: ", 4943 varp->dioff, 4944 varp->level, 4945 varp->name ? varp->name : "<anon_var>" ); 4946 if (varp->typeR) { 4947 ML_(pp_TyEnt_C_ishly)( tyents_to_keep, varp->typeR ); 4948 } else { 4949 VG_(printf)("NULL"); 4950 } 4951 VG_(printf)("\n Loc="); 4952 if (varp->gexpr) { 4953 ML_(pp_GX)(varp->gexpr); 4954 } else { 4955 VG_(printf)("NULL"); 4956 } 4957 VG_(printf)("\n"); 4958 if (varp->fbGX) { 4959 VG_(printf)(" FrB="); 4960 ML_(pp_GX)( varp->fbGX ); 4961 VG_(printf)("\n"); 4962 } else { 4963 VG_(printf)(" FrB=none\n"); 4964 } 4965 VG_(printf)(" declared at: %d %s:%d\n", 4966 varp->fndn_ix, 4967 ML_(fndn_ix2filename) (di, varp->fndn_ix), 4968 varp->fLine ); 4969 if (varp->absOri != (UWord)D3_INVALID_CUOFF) 4970 VG_(printf)(" abstract origin: <%lx>\n", varp->absOri); 4971 } 4972 4973 /* Skip variables which have no location. These must be 4974 abstract instances; they are useless as-is since with no 4975 location they have no specified memory location. They will 4976 presumably be referred to via the absOri fields of other 4977 variables. */ 4978 if (!varp->gexpr) { 4979 TRACE_D3(" SKIP (no location)\n\n"); 4980 continue; 4981 } 4982 4983 /* So it has a location, at least. If it refers to some other 4984 entry through its absOri field, pull in further info through 4985 that. */ 4986 if (varp->absOri != (UWord)D3_INVALID_CUOFF) { 4987 Bool found; 4988 Word ixFirst, ixLast; 4989 TempVar key; 4990 TempVar* keyp = &key; 4991 TempVar *varAI; 4992 VG_(memset)(&key, 0, sizeof(key)); /* not necessary */ 4993 key.dioff = varp->absOri; /* this is what we want to find */ 4994 found = VG_(lookupXA)( dioff_lookup_tab, &keyp, 4995 &ixFirst, &ixLast ); 4996 if (!found) { 4997 /* barf("DW_AT_abstract_origin can't be resolved"); */ 4998 TRACE_D3(" SKIP (DW_AT_abstract_origin can't be resolved)\n\n"); 4999 continue; 5000 } 5001 /* If the following fails, there is more than one entry with 5002 the same dioff. Which can't happen. */ 5003 vg_assert(ixFirst == ixLast); 5004 varAI = *(TempVar**)VG_(indexXA)( dioff_lookup_tab, ixFirst ); 5005 /* stay sane */ 5006 vg_assert(varAI); 5007 vg_assert(varAI->dioff == varp->absOri); 5008 5009 /* Copy what useful info we can. */ 5010 if (varAI->typeR && !varp->typeR) 5011 varp->typeR = varAI->typeR; 5012 if (varAI->name && !varp->name) 5013 varp->name = varAI->name; 5014 if (varAI->fndn_ix && !varp->fndn_ix) 5015 varp->fndn_ix = varAI->fndn_ix; 5016 if (varAI->fLine > 0 && varp->fLine == 0) 5017 varp->fLine = varAI->fLine; 5018 } 5019 5020 /* Give it a name if it doesn't have one. */ 5021 if (!varp->name) 5022 varp->name = ML_(addStr)( di, "<anon_var>", -1 ); 5023 5024 /* So now does it have enough info to be useful? */ 5025 /* NOTE: re typeR: this is a hack. If typeR is Te_UNKNOWN then 5026 the type didn't get resolved. Really, in that case 5027 something's broken earlier on, and should be fixed, rather 5028 than just skipping the variable. */ 5029 ent = ML_(TyEnts__index_by_cuOff)( tyents_to_keep, 5030 tyents_to_keep_cache, 5031 varp->typeR ); 5032 /* The next two assertions should be guaranteed by 5033 our previous call to resolve_variable_types. */ 5034 vg_assert(ent); 5035 vg_assert(ML_(TyEnt__is_type)(ent) || ent->tag == Te_UNKNOWN); 5036 5037 if (ent->tag == Te_UNKNOWN) continue; 5038 5039 vg_assert(varp->gexpr); 5040 vg_assert(varp->name); 5041 vg_assert(varp->typeR); 5042 vg_assert(varp->level >= 0); 5043 5044 /* Ok. So we're going to keep it. Call ML_(addVar) once for 5045 each address range in which the variable exists. */ 5046 TRACE_D3(" ACQUIRE for range(s) "); 5047 { AddrRange oneRange; 5048 AddrRange* varPcRanges; 5049 Word nVarPcRanges; 5050 /* Set up to iterate over address ranges, however 5051 represented. */ 5052 if (varp->nRanges == 0 || varp->nRanges == 1) { 5053 vg_assert(!varp->rngMany); 5054 if (varp->nRanges == 0) { 5055 vg_assert(varp->rngOneMin == 0); 5056 vg_assert(varp->rngOneMax == 0); 5057 } 5058 nVarPcRanges = varp->nRanges; 5059 oneRange.aMin = varp->rngOneMin; 5060 oneRange.aMax = varp->rngOneMax; 5061 varPcRanges = &oneRange; 5062 } else { 5063 vg_assert(varp->rngMany); 5064 vg_assert(varp->rngOneMin == 0); 5065 vg_assert(varp->rngOneMax == 0); 5066 nVarPcRanges = VG_(sizeXA)(varp->rngMany); 5067 vg_assert(nVarPcRanges >= 2); 5068 vg_assert(nVarPcRanges == (Word)varp->nRanges); 5069 varPcRanges = VG_(indexXA)(varp->rngMany, 0); 5070 } 5071 if (varp->level == 0) 5072 vg_assert( nVarPcRanges == 1 ); 5073 /* and iterate */ 5074 for (i = 0; i < nVarPcRanges; i++) { 5075 Addr pcMin = varPcRanges[i].aMin; 5076 Addr pcMax = varPcRanges[i].aMax; 5077 vg_assert(pcMin <= pcMax); 5078 /* Level 0 is the global address range. So at level 0 we 5079 don't want to bias pcMin/pcMax; but at all other levels 5080 we do since those are derived from svmas in the Dwarf 5081 we're reading. Be paranoid ... */ 5082 if (varp->level == 0) { 5083 vg_assert(pcMin == (Addr)0); 5084 vg_assert(pcMax == ~(Addr)0); 5085 } else { 5086 /* vg_assert(pcMin > (Addr)0); 5087 No .. we can legitimately expect to see ranges like 5088 0x0-0x11D (pre-biasing, of course). */ 5089 vg_assert(pcMax < ~(Addr)0); 5090 } 5091 5092 /* Apply text biasing, for non-global variables. */ 5093 if (varp->level > 0) { 5094 pcMin += di->text_debug_bias; 5095 pcMax += di->text_debug_bias; 5096 } 5097 5098 if (i > 0 && (i%2) == 0) 5099 TRACE_D3("\n "); 5100 TRACE_D3("[%#lx,%#lx] ", pcMin, pcMax ); 5101 5102 ML_(addVar)( 5103 di, varp->level, 5104 pcMin, pcMax, 5105 varp->name, varp->typeR, 5106 varp->gexpr, varp->fbGX, 5107 varp->fndn_ix, varp->fLine, td3 5108 ); 5109 } 5110 } 5111 5112 TRACE_D3("\n\n"); 5113 /* and move on to the next var */ 5114 } 5115 5116 /* Now free all the TempVars */ 5117 n = VG_(sizeXA)( tempvars ); 5118 for (i = 0; i < n; i++) { 5119 varp = *(TempVar**)VG_(indexXA)( tempvars, i ); 5120 ML_(dinfo_free)(varp); 5121 } 5122 VG_(deleteXA)( tempvars ); 5123 tempvars = NULL; 5124 5125 /* and the temp lookup table */ 5126 VG_(deleteXA)( dioff_lookup_tab ); 5127 5128 /* and the ranges tree. Note that we need to also free the XArrays 5129 which constitute the keys, hence pass VG_(deleteXA) as a 5130 key-finalizer. */ 5131 VG_(deleteFM)( rangestree, (void(*)(UWord))VG_(deleteXA), NULL ); 5132 5133 /* and the tyents_to_keep cache */ 5134 ML_(dinfo_free)( tyents_to_keep_cache ); 5135 tyents_to_keep_cache = NULL; 5136 5137 vg_assert( varparser.fndn_ix_Table == NULL ); 5138 5139 /* And the signatured type hash. */ 5140 VG_(HT_destruct) ( signature_types, ML_(dinfo_free) ); 5141 5142 /* record the GExprs in di so they can be freed later */ 5143 vg_assert(!di->admin_gexprs); 5144 di->admin_gexprs = gexprs; 5145 } 5146 5147 // Free up dynamically allocated memory 5148 if (VG_(clo_read_var_info)) { 5149 type_parser_release(&typarser); 5150 var_parser_release(&varparser); 5151 } 5152 } 5153 5154 5155 /*------------------------------------------------------------*/ 5156 /*--- ---*/ 5157 /*--- The "new" DWARF3 reader -- top level control logic ---*/ 5158 /*--- ---*/ 5159 /*------------------------------------------------------------*/ 5160 5161 static Bool d3rd_jmpbuf_valid = False; 5162 static const HChar* d3rd_jmpbuf_reason = NULL; 5163 static VG_MINIMAL_JMP_BUF(d3rd_jmpbuf); 5164 5165 static __attribute__((noreturn)) void barf ( const HChar* reason ) { 5166 vg_assert(d3rd_jmpbuf_valid); 5167 d3rd_jmpbuf_reason = reason; 5168 VG_MINIMAL_LONGJMP(d3rd_jmpbuf); 5169 /*NOTREACHED*/ 5170 vg_assert(0); 5171 } 5172 5173 5174 void 5175 ML_(new_dwarf3_reader) ( 5176 DebugInfo* di, 5177 DiSlice escn_debug_info, DiSlice escn_debug_types, 5178 DiSlice escn_debug_abbv, DiSlice escn_debug_line, 5179 DiSlice escn_debug_str, DiSlice escn_debug_ranges, 5180 DiSlice escn_debug_loc, DiSlice escn_debug_info_alt, 5181 DiSlice escn_debug_abbv_alt, DiSlice escn_debug_line_alt, 5182 DiSlice escn_debug_str_alt 5183 ) 5184 { 5185 volatile Int jumped; 5186 volatile Bool td3 = di->trace_symtab; 5187 5188 /* Run the _wrk function to read the dwarf3. If it succeeds, it 5189 just returns normally. If there is any failure, it longjmp's 5190 back here, having first set d3rd_jmpbuf_reason to something 5191 useful. */ 5192 vg_assert(d3rd_jmpbuf_valid == False); 5193 vg_assert(d3rd_jmpbuf_reason == NULL); 5194 5195 d3rd_jmpbuf_valid = True; 5196 jumped = VG_MINIMAL_SETJMP(d3rd_jmpbuf); 5197 if (jumped == 0) { 5198 /* try this ... */ 5199 new_dwarf3_reader_wrk( di, barf, 5200 escn_debug_info, escn_debug_types, 5201 escn_debug_abbv, escn_debug_line, 5202 escn_debug_str, escn_debug_ranges, 5203 escn_debug_loc, escn_debug_info_alt, 5204 escn_debug_abbv_alt, escn_debug_line_alt, 5205 escn_debug_str_alt ); 5206 d3rd_jmpbuf_valid = False; 5207 TRACE_D3("\n------ .debug_info reading was successful ------\n"); 5208 } else { 5209 /* It longjmp'd. */ 5210 d3rd_jmpbuf_valid = False; 5211 /* Can't longjump without giving some sort of reason. */ 5212 vg_assert(d3rd_jmpbuf_reason != NULL); 5213 5214 TRACE_D3("\n------ .debug_info reading failed ------\n"); 5215 5216 ML_(symerr)(di, True, d3rd_jmpbuf_reason); 5217 } 5218 5219 d3rd_jmpbuf_valid = False; 5220 d3rd_jmpbuf_reason = NULL; 5221 } 5222 5223 5224 5225 /* --- Unused code fragments which might be useful one day. --- */ 5226 5227 #if 0 5228 /* Read the arange tables */ 5229 TRACE_SYMTAB("\n"); 5230 TRACE_SYMTAB("\n------ The contents of .debug_arange ------\n"); 5231 init_Cursor( &aranges, debug_aranges_img, 5232 debug_aranges_sz, 0, barf, 5233 "Overrun whilst reading .debug_aranges section" ); 5234 while (True) { 5235 ULong len, d_i_offset; 5236 Bool is64; 5237 UShort version; 5238 UChar asize, segsize; 5239 5240 if (is_at_end_Cursor( &aranges )) 5241 break; 5242 /* Read one arange thingy */ 5243 /* initial_length field */ 5244 len = get_Initial_Length( &is64, &aranges, 5245 "in .debug_aranges: invalid initial-length field" ); 5246 version = get_UShort( &aranges ); 5247 d_i_offset = get_Dwarfish_UWord( &aranges, is64 ); 5248 asize = get_UChar( &aranges ); 5249 segsize = get_UChar( &aranges ); 5250 TRACE_D3(" Length: %llu\n", len); 5251 TRACE_D3(" Version: %d\n", (Int)version); 5252 TRACE_D3(" Offset into .debug_info: %llx\n", d_i_offset); 5253 TRACE_D3(" Pointer Size: %d\n", (Int)asize); 5254 TRACE_D3(" Segment Size: %d\n", (Int)segsize); 5255 TRACE_D3("\n"); 5256 TRACE_D3(" Address Length\n"); 5257 5258 while ((get_position_of_Cursor( &aranges ) % (2 * asize)) > 0) { 5259 (void)get_UChar( & aranges ); 5260 } 5261 while (True) { 5262 ULong address = get_Dwarfish_UWord( &aranges, asize==8 ); 5263 ULong length = get_Dwarfish_UWord( &aranges, asize==8 ); 5264 TRACE_D3(" 0x%016llx 0x%llx\n", address, length); 5265 if (address == 0 && length == 0) break; 5266 } 5267 } 5268 TRACE_SYMTAB("\n"); 5269 #endif 5270 5271 #endif // defined(VGO_linux) || defined(VGO_darwin) 5272 5273 /*--------------------------------------------------------------------*/ 5274 /*--- end ---*/ 5275 /*--------------------------------------------------------------------*/ 5276