1 2 /*--------------------------------------------------------------------*/ 3 /*--- Read DWARF3/4 ".debug_info" sections (DIE trees). ---*/ 4 /*--- readdwarf3.c ---*/ 5 /*--------------------------------------------------------------------*/ 6 7 /* 8 This file is part of Valgrind, a dynamic binary instrumentation 9 framework. 10 11 Copyright (C) 2008-2011 OpenWorks LLP 12 info (at) open-works.co.uk 13 14 This program is free software; you can redistribute it and/or 15 modify it under the terms of the GNU General Public License as 16 published by the Free Software Foundation; either version 2 of the 17 License, or (at your option) any later version. 18 19 This program is distributed in the hope that it will be useful, but 20 WITHOUT ANY WARRANTY; without even the implied warranty of 21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 General Public License for more details. 23 24 You should have received a copy of the GNU General Public License 25 along with this program; if not, write to the Free Software 26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 27 02111-1307, USA. 28 29 The GNU General Public License is contained in the file COPYING. 30 31 Neither the names of the U.S. Department of Energy nor the 32 University of California nor the names of its contributors may be 33 used to endorse or promote products derived from this software 34 without prior written permission. 35 */ 36 37 #if defined(VGO_linux) || defined(VGO_darwin) 38 39 /* REFERENCE (without which this code will not make much sense): 40 41 DWARF Debugging Information Format, Version 3, 42 dated 20 December 2005 (the "D3 spec"). 43 44 Available at http://www.dwarfstd.org/Dwarf3.pdf. There's also a 45 .doc (MS Word) version, but for some reason the section numbers 46 between the Word and PDF versions differ by 1 in the first digit. 47 All section references in this code are to the PDF version. 48 49 CURRENT HACKS: 50 51 DW_TAG_{const,volatile}_type no DW_AT_type is allowed; it is 52 assumed to mean "const void" or "volatile void" respectively. 53 GDB appears to interpret them like this, anyway. 54 55 In many cases it is important to know the svma of a CU (the "base 56 address of the CU", as the D3 spec calls it). There are some 57 situations in which the spec implies this value is unknown, but the 58 Dwarf3 produced by gcc-4.1 seems to assume is not unknown but 59 merely zero when not explicitly stated. So we too have to make 60 that assumption. 61 62 POTENTIAL BUG? Spotted 6 Sept 08. Why doesn't 63 unitary_range_list() bias the resulting range list in the same way 64 that its more general cousin, get_range_list(), does? I don't 65 know. 66 67 TODO, 2008 Feb 17: 68 69 get rid of cu_svma_known and document the assumed-zero svma hack. 70 71 ML_(sizeOfType): differentiate between zero sized types and types 72 for which the size is unknown. Is this important? I don't know. 73 74 DW_AT_array_types: deal with explicit sizes (currently we compute 75 the size from the bounds and the element size, although that's 76 fragile, if the bounds incompletely specified, or completely 77 absent) 78 79 Document reason for difference (by 1) of stack preening depth in 80 parse_var_DIE vs parse_type_DIE. 81 82 Don't hand to ML_(addVars), vars whose locations are entirely in 83 registers (DW_OP_reg*). This is merely a space-saving 84 optimisation, as ML_(evaluate_Dwarf3_Expr) should handle these 85 expressions correctly, by failing to evaluate them and hence 86 effectively ignoring the variable with which they are associated. 87 88 Deal with DW_AT_array_types which have element size != stride 89 90 In some cases, the info for a variable is split between two 91 different DIEs (generally a declarer and a definer). We punt on 92 these. Could do better here. 93 94 The 'data_bias' argument passed to the expression evaluator 95 (ML_(evaluate_Dwarf3_Expr)) should really be changed to a 96 MaybeUWord, to make it clear when we do vs don't know what it is 97 for the evaluation of an expression. At the moment zero is passed 98 for this parameter in the don't know case. That's a bit fragile 99 and obscure; using a MaybeUWord would be clearer. 100 101 POTENTIAL PERFORMANCE IMPROVEMENTS: 102 103 Currently, duplicate removal and all other queries for the type 104 entities array is done using cuOffset-based pointing, which 105 involves a binary search (VG_(lookupXA)) for each access. This is 106 wildly inefficient, although simple. It would be better to 107 translate all the cuOffset-based references (iow, all the "R" and 108 "Rs" fields in the TyEnts in 'tyents') to direct index numbers in 109 'tyents' right at the start of dedup_types(), and use direct 110 indexing (VG_(indexXA)) wherever possible after that. 111 112 cmp__XArrays_of_AddrRange is also a performance bottleneck. Move 113 VG_(indexXA) into pub_tool_xarray.h so it can be inlined at all use 114 points, and possibly also make an _UNCHECKED version which skips 115 the range checks in performance-critical situations such as this. 116 117 Handle interaction between read_DIE and parse_{var,type}_DIE 118 better. Currently read_DIE reads the entire DIE just to find where 119 the end is (and for debug printing), so that it can later reliably 120 move the cursor to the end regardless of what parse_{var,type}_DIE 121 do. This means many DIEs (most, even?) are read twice. It would 122 be smarter to make parse_{var,type}_DIE return a Bool indicating 123 whether or not they advanced the DIE cursor, and only if they 124 didn't should read_DIE itself read through the DIE. 125 126 ML_(addVar) and add_var_to_arange: quite a lot of DiAddrRanges have 127 zero variables in their .vars XArray. Rather than have an XArray 128 with zero elements (which uses 2 malloc'd blocks), allow the .vars 129 pointer to be NULL in this case. 130 131 More generally, reduce the amount of memory allocated and freed 132 while reading Dwarf3 type/variable information. Even modest (20MB) 133 objects cause this module to allocate and free hundreds of 134 thousands of small blocks, and ML_(arena_malloc) and its various 135 groupies always show up at the top of performance profiles. */ 136 137 #include "pub_core_basics.h" 138 #include "pub_core_debuginfo.h" 139 #include "pub_core_libcbase.h" 140 #include "pub_core_libcassert.h" 141 #include "pub_core_libcprint.h" 142 #include "pub_core_libcsetjmp.h" // setjmp facilities 143 #include "pub_core_options.h" 144 #include "pub_core_tooliface.h" /* VG_(needs) */ 145 #include "pub_core_xarray.h" 146 #include "pub_core_wordfm.h" 147 #include "priv_misc.h" /* dinfo_zalloc/free */ 148 #include "priv_tytypes.h" 149 #include "priv_d3basics.h" 150 #include "priv_storage.h" 151 #include "priv_readdwarf3.h" /* self */ 152 153 154 /*------------------------------------------------------------*/ 155 /*--- ---*/ 156 /*--- Basic machinery for parsing DIEs. ---*/ 157 /*--- ---*/ 158 /*------------------------------------------------------------*/ 159 160 #define TRACE_D3(format, args...) \ 161 if (td3) { VG_(printf)(format, ## args); } 162 163 #define D3_INVALID_CUOFF ((UWord)(-1UL)) 164 #define D3_FAKEVOID_CUOFF ((UWord)(-2UL)) 165 166 typedef 167 struct { 168 UChar* region_start_img; 169 UWord region_szB; 170 UWord region_next; 171 void (*barf)( HChar* ) __attribute__((noreturn)); 172 HChar* barfstr; 173 } 174 Cursor; 175 176 static inline Bool is_sane_Cursor ( Cursor* c ) { 177 if (!c) return False; 178 if (!c->barf) return False; 179 if (!c->barfstr) return False; 180 return True; 181 } 182 183 static void init_Cursor ( Cursor* c, 184 UChar* region_start_img, 185 UWord region_szB, 186 UWord region_next, 187 __attribute__((noreturn)) void (*barf)( HChar* ), 188 HChar* barfstr ) 189 { 190 vg_assert(c); 191 VG_(memset)(c, 0, sizeof(*c)); 192 c->region_start_img = region_start_img; 193 c->region_szB = region_szB; 194 c->region_next = region_next; 195 c->barf = barf; 196 c->barfstr = barfstr; 197 vg_assert(is_sane_Cursor(c)); 198 } 199 200 static Bool is_at_end_Cursor ( Cursor* c ) { 201 vg_assert(is_sane_Cursor(c)); 202 return c->region_next >= c->region_szB; 203 } 204 205 static inline UWord get_position_of_Cursor ( Cursor* c ) { 206 vg_assert(is_sane_Cursor(c)); 207 return c->region_next; 208 } 209 static inline void set_position_of_Cursor ( Cursor* c, UWord pos ) { 210 c->region_next = pos; 211 vg_assert(is_sane_Cursor(c)); 212 } 213 214 static /*signed*/Word get_remaining_length_Cursor ( Cursor* c ) { 215 vg_assert(is_sane_Cursor(c)); 216 return c->region_szB - c->region_next; 217 } 218 219 static UChar* get_address_of_Cursor ( Cursor* c ) { 220 vg_assert(is_sane_Cursor(c)); 221 return &c->region_start_img[ c->region_next ]; 222 } 223 224 /* FIXME: document assumptions on endianness for 225 get_UShort/UInt/ULong. */ 226 static inline UChar get_UChar ( Cursor* c ) { 227 UChar r; 228 /* vg_assert(is_sane_Cursor(c)); */ 229 if (c->region_next + sizeof(UChar) > c->region_szB) { 230 c->barf(c->barfstr); 231 /*NOTREACHED*/ 232 vg_assert(0); 233 } 234 r = * (UChar*) &c->region_start_img[ c->region_next ]; 235 c->region_next += sizeof(UChar); 236 return r; 237 } 238 static UShort get_UShort ( Cursor* c ) { 239 UShort r; 240 vg_assert(is_sane_Cursor(c)); 241 if (c->region_next + sizeof(UShort) > c->region_szB) { 242 c->barf(c->barfstr); 243 /*NOTREACHED*/ 244 vg_assert(0); 245 } 246 r = ML_(read_UShort)(&c->region_start_img[ c->region_next ]); 247 c->region_next += sizeof(UShort); 248 return r; 249 } 250 static UInt get_UInt ( Cursor* c ) { 251 UInt r; 252 vg_assert(is_sane_Cursor(c)); 253 if (c->region_next + sizeof(UInt) > c->region_szB) { 254 c->barf(c->barfstr); 255 /*NOTREACHED*/ 256 vg_assert(0); 257 } 258 r = ML_(read_UInt)(&c->region_start_img[ c->region_next ]); 259 c->region_next += sizeof(UInt); 260 return r; 261 } 262 static ULong get_ULong ( Cursor* c ) { 263 ULong r; 264 vg_assert(is_sane_Cursor(c)); 265 if (c->region_next + sizeof(ULong) > c->region_szB) { 266 c->barf(c->barfstr); 267 /*NOTREACHED*/ 268 vg_assert(0); 269 } 270 r = ML_(read_ULong)(&c->region_start_img[ c->region_next ]); 271 c->region_next += sizeof(ULong); 272 return r; 273 } 274 static inline ULong get_ULEB128 ( Cursor* c ) { 275 ULong result; 276 Int shift; 277 UChar byte; 278 /* unroll first iteration */ 279 byte = get_UChar( c ); 280 result = (ULong)(byte & 0x7f); 281 if (LIKELY(!(byte & 0x80))) return result; 282 shift = 7; 283 /* end unroll first iteration */ 284 do { 285 byte = get_UChar( c ); 286 result |= ((ULong)(byte & 0x7f)) << shift; 287 shift += 7; 288 } while (byte & 0x80); 289 return result; 290 } 291 static Long get_SLEB128 ( Cursor* c ) { 292 ULong result = 0; 293 Int shift = 0; 294 UChar byte; 295 do { 296 byte = get_UChar(c); 297 result |= ((ULong)(byte & 0x7f)) << shift; 298 shift += 7; 299 } while (byte & 0x80); 300 if (shift < 64 && (byte & 0x40)) 301 result |= -(1ULL << shift); 302 return result; 303 } 304 305 /* Assume 'c' points to the start of a string. Return the absolute 306 address of whatever it points at, and advance it past the 307 terminating zero. This makes it safe for the caller to then copy 308 the string with ML_(addStr), since (w.r.t. image overruns) the 309 process of advancing past the terminating zero will already have 310 "vetted" the string. */ 311 static UChar* get_AsciiZ ( Cursor* c ) { 312 UChar uc; 313 UChar* res = get_address_of_Cursor(c); 314 do { uc = get_UChar(c); } while (uc != 0); 315 return res; 316 } 317 318 static ULong peek_ULEB128 ( Cursor* c ) { 319 Word here = c->region_next; 320 ULong r = get_ULEB128( c ); 321 c->region_next = here; 322 return r; 323 } 324 static UChar peek_UChar ( Cursor* c ) { 325 Word here = c->region_next; 326 UChar r = get_UChar( c ); 327 c->region_next = here; 328 return r; 329 } 330 331 static ULong get_Dwarfish_UWord ( Cursor* c, Bool is_dw64 ) { 332 return is_dw64 ? get_ULong(c) : (ULong) get_UInt(c); 333 } 334 335 static UWord get_UWord ( Cursor* c ) { 336 vg_assert(sizeof(UWord) == sizeof(void*)); 337 if (sizeof(UWord) == 4) return get_UInt(c); 338 if (sizeof(UWord) == 8) return get_ULong(c); 339 vg_assert(0); 340 } 341 342 /* Read a DWARF3 'Initial Length' field */ 343 static ULong get_Initial_Length ( /*OUT*/Bool* is64, 344 Cursor* c, 345 HChar* barfMsg ) 346 { 347 ULong w64; 348 UInt w32; 349 *is64 = False; 350 w32 = get_UInt( c ); 351 if (w32 >= 0xFFFFFFF0 && w32 < 0xFFFFFFFF) { 352 c->barf( barfMsg ); 353 } 354 else if (w32 == 0xFFFFFFFF) { 355 *is64 = True; 356 w64 = get_ULong( c ); 357 } else { 358 *is64 = False; 359 w64 = (ULong)w32; 360 } 361 return w64; 362 } 363 364 365 /*------------------------------------------------------------*/ 366 /*--- ---*/ 367 /*--- "CUConst" structure ---*/ 368 /*--- ---*/ 369 /*------------------------------------------------------------*/ 370 371 #define N_ABBV_CACHE 32 372 373 /* Holds information that is constant through the parsing of a 374 Compilation Unit. This is basically plumbed through to 375 everywhere. */ 376 typedef 377 struct { 378 /* Call here if anything goes wrong */ 379 void (*barf)( HChar* ) __attribute__((noreturn)); 380 /* Is this 64-bit DWARF ? */ 381 Bool is_dw64; 382 /* Which DWARF version ? (2, 3 or 4) */ 383 UShort version; 384 /* Length of this Compilation Unit, as stated in the 385 .unit_length :: InitialLength field of the CU Header. 386 However, this size (as specified by the D3 spec) does not 387 include the size of the .unit_length field itself, which is 388 either 4 or 12 bytes (32-bit or 64-bit Dwarf3). That value 389 can be obtained through the expression ".is_dw64 ? 12 : 4". */ 390 ULong unit_length; 391 /* Offset of start of this unit in .debug_info */ 392 UWord cu_start_offset; 393 /* SVMA for this CU. In the D3 spec, is known as the "base 394 address of the compilation unit (last para sec 3.1.1). 395 Needed for (amongst things) interpretation of location-list 396 values. */ 397 Addr cu_svma; 398 Bool cu_svma_known; 399 /* The debug_abbreviations table to be used for this Unit */ 400 UChar* debug_abbv; 401 /* Upper bound on size thereof (an overestimate, in general) */ 402 UWord debug_abbv_maxszB; 403 /* Where is .debug_str ? */ 404 UChar* debug_str_img; 405 UWord debug_str_sz; 406 /* Where is .debug_ranges ? */ 407 UChar* debug_ranges_img; 408 UWord debug_ranges_sz; 409 /* Where is .debug_loc ? */ 410 UChar* debug_loc_img; 411 UWord debug_loc_sz; 412 /* Where is .debug_line? */ 413 UChar* debug_line_img; 414 UWord debug_line_sz; 415 /* Where is .debug_info? */ 416 UChar* debug_info_img; 417 UWord debug_info_sz; 418 /* --- Needed so we can add stuff to the string table. --- */ 419 struct _DebugInfo* di; 420 /* --- a cache for set_abbv_Cursor --- */ 421 /* abbv_code == (ULong)-1 for an unused entry. */ 422 struct { ULong abbv_code; UWord posn; } saC_cache[N_ABBV_CACHE]; 423 UWord saC_cache_queries; 424 UWord saC_cache_misses; 425 } 426 CUConst; 427 428 429 /*------------------------------------------------------------*/ 430 /*--- ---*/ 431 /*--- Helper functions for Guarded Expressions ---*/ 432 /*--- ---*/ 433 /*------------------------------------------------------------*/ 434 435 /* Parse the location list starting at img-offset 'debug_loc_offset' 436 in .debug_loc. Results are biased with 'svma_of_referencing_CU' 437 and so I believe are correct SVMAs for the object as a whole. This 438 function allocates the UChar*, and the caller must deallocate it. 439 The resulting block is in so-called Guarded-Expression format. 440 441 Guarded-Expression format is similar but not identical to the DWARF3 442 location-list format. The format of each returned block is: 443 444 UChar biasMe; 445 UChar isEnd; 446 followed by zero or more of 447 448 (Addr aMin; Addr aMax; UShort nbytes; ..bytes..; UChar isEnd) 449 450 '..bytes..' is an standard DWARF3 location expression which is 451 valid when aMin <= pc <= aMax (possibly after suitable biasing). 452 453 The number of bytes in '..bytes..' is nbytes. 454 455 The end of the sequence is marked by an isEnd == 1 value. All 456 previous isEnd values must be zero. 457 458 biasMe is 1 if the aMin/aMax fields need this DebugInfo's 459 text_bias added before use, and 0 if the GX is this is not 460 necessary (is ready to go). 461 462 Hence the block can be quickly parsed and is self-describing. Note 463 that aMax is 1 less than the corresponding value in a DWARF3 464 location list. Zero length ranges, with aMax == aMin-1, are not 465 allowed. 466 */ 467 /* 2008-sept-12: moved ML_(pp_GX) from here to d3basics.c, where 468 it more logically belongs. */ 469 470 471 /* Apply a text bias to a GX. */ 472 static void bias_GX ( /*MOD*/GExpr* gx, struct _DebugInfo* di ) 473 { 474 UShort nbytes; 475 UChar* p = &gx->payload[0]; 476 UChar* pA; 477 UChar uc; 478 uc = *p++; /*biasMe*/ 479 if (uc == 0) 480 return; 481 vg_assert(uc == 1); 482 p[-1] = 0; /* mark it as done */ 483 while (True) { 484 uc = *p++; 485 if (uc == 1) 486 break; /*isEnd*/ 487 vg_assert(uc == 0); 488 /* t-bias aMin */ 489 pA = (UChar*)p; 490 ML_(write_Addr)(pA, ML_(read_Addr)(pA) + di->text_debug_bias); 491 p += sizeof(Addr); 492 /* t-bias aMax */ 493 pA = (UChar*)p; 494 ML_(write_Addr)(pA, ML_(read_Addr)(pA) + di->text_debug_bias); 495 p += sizeof(Addr); 496 /* nbytes, and actual expression */ 497 nbytes = ML_(read_UShort)(p); p += sizeof(UShort); 498 p += nbytes; 499 } 500 } 501 502 __attribute__((noinline)) 503 static GExpr* make_singleton_GX ( UChar* block, UWord nbytes ) 504 { 505 SizeT bytesReqd; 506 GExpr* gx; 507 UChar *p, *pstart; 508 509 vg_assert(sizeof(UWord) == sizeof(Addr)); 510 vg_assert(nbytes <= 0xFFFF); /* else we overflow the nbytes field */ 511 bytesReqd 512 = sizeof(UChar) /*biasMe*/ + sizeof(UChar) /*!isEnd*/ 513 + sizeof(UWord) /*aMin*/ + sizeof(UWord) /*aMax*/ 514 + sizeof(UShort) /*nbytes*/ + nbytes 515 + sizeof(UChar); /*isEnd*/ 516 517 gx = ML_(dinfo_zalloc)( "di.readdwarf3.msGX.1", 518 sizeof(GExpr) + bytesReqd ); 519 vg_assert(gx); 520 521 p = pstart = &gx->payload[0]; 522 523 p = ML_(write_UChar)(p, 0); /*biasMe*/ 524 p = ML_(write_UChar)(p, 0); /*!isEnd*/ 525 p = ML_(write_Addr)(p, 0); /*aMin*/ 526 p = ML_(write_Addr)(p, ~0); /*aMax*/ 527 p = ML_(write_UShort)(p, nbytes); /*nbytes*/ 528 VG_(memcpy)(p, block, nbytes); p += nbytes; 529 p = ML_(write_UChar)(p, 1); /*isEnd*/ 530 531 vg_assert( (SizeT)(p - pstart) == bytesReqd); 532 vg_assert( &gx->payload[bytesReqd] 533 == ((UChar*)gx) + sizeof(GExpr) + bytesReqd ); 534 535 return gx; 536 } 537 538 __attribute__((noinline)) 539 static GExpr* make_general_GX ( CUConst* cc, 540 Bool td3, 541 UWord debug_loc_offset, 542 Addr svma_of_referencing_CU ) 543 { 544 Addr base; 545 Cursor loc; 546 XArray* xa; /* XArray of UChar */ 547 GExpr* gx; 548 Word nbytes; 549 550 vg_assert(sizeof(UWord) == sizeof(Addr)); 551 if (cc->debug_loc_sz == 0) 552 cc->barf("make_general_GX: .debug_loc is empty/missing"); 553 554 init_Cursor( &loc, cc->debug_loc_img, 555 cc->debug_loc_sz, 0, cc->barf, 556 "Overrun whilst reading .debug_loc section(2)" ); 557 set_position_of_Cursor( &loc, debug_loc_offset ); 558 559 TRACE_D3("make_general_GX (.debug_loc_offset = %lu, img = %p) {\n", 560 debug_loc_offset, get_address_of_Cursor( &loc ) ); 561 562 /* Who frees this xa? It is freed before this fn exits. */ 563 xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.mgGX.1", 564 ML_(dinfo_free), 565 sizeof(UChar) ); 566 567 { UChar c = 1; /*biasMe*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); } 568 569 base = 0; 570 while (True) { 571 Bool acquire; 572 UWord len; 573 /* Read a (host-)word pair. This is something of a hack since 574 the word size to read is really dictated by the ELF file; 575 however, we assume we're reading a file with the same 576 word-sizeness as the host. Reasonably enough. */ 577 UWord w1 = get_UWord( &loc ); 578 UWord w2 = get_UWord( &loc ); 579 580 TRACE_D3(" %08lx %08lx\n", w1, w2); 581 if (w1 == 0 && w2 == 0) 582 break; /* end of list */ 583 584 if (w1 == -1UL) { 585 /* new value for 'base' */ 586 base = w2; 587 continue; 588 } 589 590 /* else a location expression follows */ 591 /* else enumerate [w1+base, w2+base) */ 592 /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc" 593 (sec 2.17.2) */ 594 if (w1 > w2) { 595 TRACE_D3("negative range is for .debug_loc expr at " 596 "file offset %lu\n", 597 debug_loc_offset); 598 cc->barf( "negative range in .debug_loc section" ); 599 } 600 601 /* ignore zero length ranges */ 602 acquire = w1 < w2; 603 len = (UWord)get_UShort( &loc ); 604 605 if (acquire) { 606 UWord w; 607 UShort s; 608 UChar c; 609 c = 0; /* !isEnd*/ 610 VG_(addBytesToXA)( xa, &c, sizeof(c) ); 611 w = w1 + base + svma_of_referencing_CU; 612 VG_(addBytesToXA)( xa, &w, sizeof(w) ); 613 w = w2 -1 + base + svma_of_referencing_CU; 614 VG_(addBytesToXA)( xa, &w, sizeof(w) ); 615 s = (UShort)len; 616 VG_(addBytesToXA)( xa, &s, sizeof(s) ); 617 } 618 619 while (len > 0) { 620 UChar byte = get_UChar( &loc ); 621 TRACE_D3("%02x", (UInt)byte); 622 if (acquire) 623 VG_(addBytesToXA)( xa, &byte, 1 ); 624 len--; 625 } 626 TRACE_D3("\n"); 627 } 628 629 { UChar c = 1; /*isEnd*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); } 630 631 nbytes = VG_(sizeXA)( xa ); 632 vg_assert(nbytes >= 1); 633 634 gx = ML_(dinfo_zalloc)( "di.readdwarf3.mgGX.2", sizeof(GExpr) + nbytes ); 635 vg_assert(gx); 636 VG_(memcpy)( &gx->payload[0], (UChar*)VG_(indexXA)(xa,0), nbytes ); 637 vg_assert( &gx->payload[nbytes] 638 == ((UChar*)gx) + sizeof(GExpr) + nbytes ); 639 640 VG_(deleteXA)( xa ); 641 642 TRACE_D3("}\n"); 643 644 return gx; 645 } 646 647 648 /*------------------------------------------------------------*/ 649 /*--- ---*/ 650 /*--- Helper functions for range lists and CU headers ---*/ 651 /*--- ---*/ 652 /*------------------------------------------------------------*/ 653 654 /* Denotes an address range. Both aMin and aMax are included in the 655 range; hence a complete range is (0, ~0) and an empty range is any 656 (X, X-1) for X > 0.*/ 657 typedef 658 struct { Addr aMin; Addr aMax; } 659 AddrRange; 660 661 662 /* Generate an arbitrary structural total ordering on 663 XArray* of AddrRange. */ 664 static Word cmp__XArrays_of_AddrRange ( XArray* rngs1, XArray* rngs2 ) 665 { 666 Word n1, n2, i; 667 tl_assert(rngs1 && rngs2); 668 n1 = VG_(sizeXA)( rngs1 ); 669 n2 = VG_(sizeXA)( rngs2 ); 670 if (n1 < n2) return -1; 671 if (n1 > n2) return 1; 672 for (i = 0; i < n1; i++) { 673 AddrRange* rng1 = (AddrRange*)VG_(indexXA)( rngs1, i ); 674 AddrRange* rng2 = (AddrRange*)VG_(indexXA)( rngs2, i ); 675 if (rng1->aMin < rng2->aMin) return -1; 676 if (rng1->aMin > rng2->aMin) return 1; 677 if (rng1->aMax < rng2->aMax) return -1; 678 if (rng1->aMax > rng2->aMax) return 1; 679 } 680 return 0; 681 } 682 683 684 __attribute__((noinline)) 685 static XArray* /* of AddrRange */ empty_range_list ( void ) 686 { 687 XArray* xa; /* XArray of AddrRange */ 688 /* Who frees this xa? varstack_preen() does. */ 689 xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.erl.1", 690 ML_(dinfo_free), 691 sizeof(AddrRange) ); 692 return xa; 693 } 694 695 696 __attribute__((noinline)) 697 static XArray* unitary_range_list ( Addr aMin, Addr aMax ) 698 { 699 XArray* xa; 700 AddrRange pair; 701 vg_assert(aMin <= aMax); 702 /* Who frees this xa? varstack_preen() does. */ 703 xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.url.1", 704 ML_(dinfo_free), 705 sizeof(AddrRange) ); 706 pair.aMin = aMin; 707 pair.aMax = aMax; 708 VG_(addToXA)( xa, &pair ); 709 return xa; 710 } 711 712 713 /* Enumerate the address ranges starting at img-offset 714 'debug_ranges_offset' in .debug_ranges. Results are biased with 715 'svma_of_referencing_CU' and so I believe are correct SVMAs for the 716 object as a whole. This function allocates the XArray, and the 717 caller must deallocate it. */ 718 __attribute__((noinline)) 719 static XArray* /* of AddrRange */ 720 get_range_list ( CUConst* cc, 721 Bool td3, 722 UWord debug_ranges_offset, 723 Addr svma_of_referencing_CU ) 724 { 725 Addr base; 726 Cursor ranges; 727 XArray* xa; /* XArray of AddrRange */ 728 AddrRange pair; 729 730 if (cc->debug_ranges_sz == 0) 731 cc->barf("get_range_list: .debug_ranges is empty/missing"); 732 733 init_Cursor( &ranges, cc->debug_ranges_img, 734 cc->debug_ranges_sz, 0, cc->barf, 735 "Overrun whilst reading .debug_ranges section(2)" ); 736 set_position_of_Cursor( &ranges, debug_ranges_offset ); 737 738 /* Who frees this xa? varstack_preen() does. */ 739 xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.grl.1", ML_(dinfo_free), 740 sizeof(AddrRange) ); 741 base = 0; 742 while (True) { 743 /* Read a (host-)word pair. This is something of a hack since 744 the word size to read is really dictated by the ELF file; 745 however, we assume we're reading a file with the same 746 word-sizeness as the host. Reasonably enough. */ 747 UWord w1 = get_UWord( &ranges ); 748 UWord w2 = get_UWord( &ranges ); 749 750 if (w1 == 0 && w2 == 0) 751 break; /* end of list. */ 752 753 if (w1 == -1UL) { 754 /* new value for 'base' */ 755 base = w2; 756 continue; 757 } 758 759 /* else enumerate [w1+base, w2+base) */ 760 /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc" 761 (sec 2.17.2) */ 762 if (w1 > w2) 763 cc->barf( "negative range in .debug_ranges section" ); 764 if (w1 < w2) { 765 pair.aMin = w1 + base + svma_of_referencing_CU; 766 pair.aMax = w2 - 1 + base + svma_of_referencing_CU; 767 vg_assert(pair.aMin <= pair.aMax); 768 VG_(addToXA)( xa, &pair ); 769 } 770 } 771 return xa; 772 } 773 774 775 /* Parse the Compilation Unit header indicated at 'c' and 776 initialise 'cc' accordingly. */ 777 static __attribute__((noinline)) 778 void parse_CU_Header ( /*OUT*/CUConst* cc, 779 Bool td3, 780 Cursor* c, 781 UChar* debug_abbv_img, UWord debug_abbv_sz ) 782 { 783 UChar address_size; 784 UWord debug_abbrev_offset; 785 Int i; 786 787 VG_(memset)(cc, 0, sizeof(*cc)); 788 vg_assert(c && c->barf); 789 cc->barf = c->barf; 790 791 /* initial_length field */ 792 cc->unit_length 793 = get_Initial_Length( &cc->is_dw64, c, 794 "parse_CU_Header: invalid initial-length field" ); 795 796 TRACE_D3(" Length: %lld\n", cc->unit_length ); 797 798 /* version */ 799 cc->version = get_UShort( c ); 800 if (cc->version != 2 && cc->version != 3 && cc->version != 4) 801 cc->barf( "parse_CU_Header: is neither DWARF2 nor DWARF3 nor DWARF4" ); 802 TRACE_D3(" Version: %d\n", (Int)cc->version ); 803 804 /* debug_abbrev_offset */ 805 debug_abbrev_offset = get_Dwarfish_UWord( c, cc->is_dw64 ); 806 if (debug_abbrev_offset >= debug_abbv_sz) 807 cc->barf( "parse_CU_Header: invalid debug_abbrev_offset" ); 808 TRACE_D3(" Abbrev Offset: %ld\n", debug_abbrev_offset ); 809 810 /* address size. If this isn't equal to the host word size, just 811 give up. This makes it safe to assume elsewhere that 812 DW_FORM_addr and DW_FORM_ref_addr can be treated as a host 813 word. */ 814 address_size = get_UChar( c ); 815 if (address_size != sizeof(void*)) 816 cc->barf( "parse_CU_Header: invalid address_size" ); 817 TRACE_D3(" Pointer Size: %d\n", (Int)address_size ); 818 819 /* Set up so that cc->debug_abbv points to the relevant table for 820 this CU. Set the szB so that at least we can't read off the end 821 of the debug_abbrev section -- potentially (and quite likely) 822 too big, if this isn't the last table in the section, but at 823 least it's safe. */ 824 cc->debug_abbv = debug_abbv_img + debug_abbrev_offset; 825 cc->debug_abbv_maxszB = debug_abbv_sz - debug_abbrev_offset; 826 /* and empty out the set_abbv_Cursor cache */ 827 if (0) VG_(printf)("XXXXXX initialise set_abbv_Cursor cache\n"); 828 for (i = 0; i < N_ABBV_CACHE; i++) { 829 cc->saC_cache[i].abbv_code = (ULong)-1; /* unused */ 830 cc->saC_cache[i].posn = 0; 831 } 832 cc->saC_cache_queries = 0; 833 cc->saC_cache_misses = 0; 834 } 835 836 837 /* Set up 'c' so it is ready to parse the abbv table entry code 838 'abbv_code' for this compilation unit. */ 839 static __attribute__((noinline)) 840 void set_abbv_Cursor ( /*OUT*/Cursor* c, Bool td3, 841 CUConst* cc, ULong abbv_code ) 842 { 843 Int i; 844 ULong acode; 845 846 if (abbv_code == 0) 847 cc->barf("set_abbv_Cursor: abbv_code == 0" ); 848 849 /* (ULong)-1 is used to represent an empty cache slot. So we can't 850 allow it. In any case no valid DWARF3 should make a reference 851 to a negative abbreviation code. [at least, they always seem to 852 be numbered upwards from zero as far as I have seen] */ 853 vg_assert(abbv_code != (ULong)-1); 854 855 /* First search the cache. */ 856 if (0) VG_(printf)("XXXXXX search set_abbv_Cursor cache\n"); 857 cc->saC_cache_queries++; 858 for (i = 0; i < N_ABBV_CACHE; i++) { 859 /* No need to test the cached abbv_codes for -1 (empty), since 860 we just asserted that abbv_code is not -1. */ 861 if (cc->saC_cache[i].abbv_code == abbv_code) { 862 /* Found it. Cool. Set up the parser using the cached 863 position, and move this cache entry 1 step closer to the 864 front. */ 865 if (0) VG_(printf)("XXXXXX found in set_abbv_Cursor cache\n"); 866 init_Cursor( c, cc->debug_abbv, 867 cc->debug_abbv_maxszB, cc->saC_cache[i].posn, 868 cc->barf, 869 "Overrun whilst parsing .debug_abbrev section(1)" ); 870 if (i > 0) { 871 ULong t_abbv_code = cc->saC_cache[i].abbv_code; 872 UWord t_posn = cc->saC_cache[i].posn; 873 while (i > 0) { 874 cc->saC_cache[i] = cc->saC_cache[i-1]; 875 cc->saC_cache[0].abbv_code = t_abbv_code; 876 cc->saC_cache[0].posn = t_posn; 877 i--; 878 } 879 } 880 return; 881 } 882 } 883 884 /* No. It's not in the cache. We have to search through 885 .debug_abbrev, of course taking care to update the cache 886 when done. */ 887 888 cc->saC_cache_misses++; 889 init_Cursor( c, cc->debug_abbv, cc->debug_abbv_maxszB, 0, cc->barf, 890 "Overrun whilst parsing .debug_abbrev section(2)" ); 891 892 /* Now iterate though the table until we find the requested 893 entry. */ 894 while (True) { 895 //ULong atag; 896 //UInt has_children; 897 acode = get_ULEB128( c ); 898 if (acode == 0) break; /* end of the table */ 899 if (acode == abbv_code) break; /* found it */ 900 /*atag = */ get_ULEB128( c ); 901 /*has_children = */ get_UChar( c ); 902 //TRACE_D3(" %llu %s [%s]\n", 903 // acode, pp_DW_TAG(atag), pp_DW_children(has_children)); 904 while (True) { 905 ULong at_name = get_ULEB128( c ); 906 ULong at_form = get_ULEB128( c ); 907 if (at_name == 0 && at_form == 0) break; 908 //TRACE_D3(" %18s %s\n", 909 // pp_DW_AT(at_name), pp_DW_FORM(at_form)); 910 } 911 } 912 913 if (acode == 0) { 914 /* Not found. This is fatal. */ 915 cc->barf("set_abbv_Cursor: abbv_code not found"); 916 } 917 918 /* Otherwise, 'c' is now set correctly to parse the relevant entry, 919 starting from the abbreviation entry's tag. So just cache 920 the result, and return. */ 921 for (i = N_ABBV_CACHE-1; i > N_ABBV_CACHE/2; i--) { 922 cc->saC_cache[i] = cc->saC_cache[i-1]; 923 } 924 if (0) VG_(printf)("XXXXXX update set_abbv_Cursor cache\n"); 925 cc->saC_cache[N_ABBV_CACHE/2].abbv_code = abbv_code; 926 cc->saC_cache[N_ABBV_CACHE/2].posn = get_position_of_Cursor(c); 927 } 928 929 930 /* From 'c', get the Form data into the lowest 1/2/4/8 bytes of *cts. 931 932 If *cts itself contains the entire result, then *ctsSzB is set to 933 1,2,4 or 8 accordingly and *ctsMemSzB is set to zero. 934 935 Alternatively, the result can be a block of data (in the 936 transiently mapped-in object, so-called "image" space). If so then 937 the lowest sizeof(void*)/8 bytes of *cts hold a pointer to said 938 image, *ctsSzB is zero, and *ctsMemSzB is the size of the block. 939 940 Unfortunately this means it is impossible to represent a zero-size 941 image block since that would have *ctsSzB == 0 and *ctsMemSzB == 0 942 and so is ambiguous (which case it is?) 943 944 Invariant on successful return: 945 (*ctsSzB > 0 && *ctsMemSzB == 0) 946 || (*ctsSzB == 0 && *ctsMemSzB > 0) 947 */ 948 static 949 void get_Form_contents ( /*OUT*/ULong* cts, 950 /*OUT*/Int* ctsSzB, 951 /*OUT*/UWord* ctsMemSzB, 952 CUConst* cc, Cursor* c, 953 Bool td3, DW_FORM form ) 954 { 955 *cts = 0; 956 *ctsSzB = 0; 957 *ctsMemSzB = 0; 958 switch (form) { 959 case DW_FORM_data1: 960 *cts = (ULong)(UChar)get_UChar(c); 961 *ctsSzB = 1; 962 TRACE_D3("%u", (UInt)*cts); 963 break; 964 case DW_FORM_data2: 965 *cts = (ULong)(UShort)get_UShort(c); 966 *ctsSzB = 2; 967 TRACE_D3("%u", (UInt)*cts); 968 break; 969 case DW_FORM_data4: 970 *cts = (ULong)(UInt)get_UInt(c); 971 *ctsSzB = 4; 972 TRACE_D3("%u", (UInt)*cts); 973 break; 974 case DW_FORM_data8: 975 *cts = get_ULong(c); 976 *ctsSzB = 8; 977 TRACE_D3("%llu", *cts); 978 break; 979 case DW_FORM_sec_offset: 980 *cts = (ULong)get_Dwarfish_UWord( c, cc->is_dw64 ); 981 *ctsSzB = cc->is_dw64 ? 8 : 4; 982 TRACE_D3("%llu", *cts); 983 break; 984 case DW_FORM_sdata: 985 *cts = (ULong)(Long)get_SLEB128(c); 986 *ctsSzB = 8; 987 TRACE_D3("%lld", (Long)*cts); 988 break; 989 case DW_FORM_udata: 990 *cts = (ULong)(Long)get_ULEB128(c); 991 *ctsSzB = 8; 992 TRACE_D3("%llu", (Long)*cts); 993 break; 994 case DW_FORM_addr: 995 /* note, this is a hack. DW_FORM_addr is defined as getting 996 a word the size of the target machine as defined by the 997 address_size field in the CU Header. However, 998 parse_CU_Header() rejects all inputs except those for 999 which address_size == sizeof(Word), hence we can just 1000 treat it as a (host) Word. */ 1001 *cts = (ULong)(UWord)get_UWord(c); 1002 *ctsSzB = sizeof(UWord); 1003 TRACE_D3("0x%lx", (UWord)*cts); 1004 break; 1005 1006 case DW_FORM_ref_addr: 1007 /* We make the same word-size assumption as DW_FORM_addr. */ 1008 /* What does this really mean? From D3 Sec 7.5.4, 1009 description of "reference", it would appear to reference 1010 some other DIE, by specifying the offset from the 1011 beginning of a .debug_info section. The D3 spec mentions 1012 that this might be in some other shared object and 1013 executable. But I don't see how the name of the other 1014 object/exe is specified. 1015 1016 At least for the DW_FORM_ref_addrs created by icc11, the 1017 references seem to be within the same object/executable. 1018 So for the moment we merely range-check, to see that they 1019 actually do specify a plausible offset within this 1020 object's .debug_info, and return the value unchanged. 1021 */ 1022 *cts = (ULong)(UWord)get_UWord(c); 1023 *ctsSzB = sizeof(UWord); 1024 TRACE_D3("0x%lx", (UWord)*cts); 1025 if (0) VG_(printf)("DW_FORM_ref_addr 0x%lx\n", (UWord)*cts); 1026 if (/* the following 2 are surely impossible, but ... */ 1027 cc->debug_info_img == NULL || cc->debug_info_sz == 0 1028 || *cts >= (ULong)cc->debug_info_sz) { 1029 /* Hmm. Offset is nonsensical for this object's .debug_info 1030 section. Be safe and reject it. */ 1031 cc->barf("get_Form_contents: DW_FORM_ref_addr points " 1032 "outside .debug_info"); 1033 } 1034 break; 1035 1036 case DW_FORM_strp: { 1037 /* this is an offset into .debug_str */ 1038 UChar* str; 1039 UWord uw = (UWord)get_Dwarfish_UWord( c, cc->is_dw64 ); 1040 if (cc->debug_str_img == NULL || uw >= cc->debug_str_sz) 1041 cc->barf("get_Form_contents: DW_FORM_strp " 1042 "points outside .debug_str"); 1043 /* FIXME: check the entire string lies inside debug_str, 1044 not just the first byte of it. */ 1045 str = (UChar*)cc->debug_str_img + uw; 1046 TRACE_D3("(indirect string, offset: 0x%lx): %s", uw, str); 1047 *cts = (ULong)(UWord)str; 1048 *ctsMemSzB = 1 + (ULong)VG_(strlen)(str); 1049 break; 1050 } 1051 case DW_FORM_string: { 1052 UChar* str = get_AsciiZ(c); 1053 TRACE_D3("%s", str); 1054 *cts = (ULong)(UWord)str; 1055 /* strlen is safe because get_AsciiZ already 'vetted' the 1056 entire string */ 1057 *ctsMemSzB = 1 + (ULong)VG_(strlen)(str); 1058 break; 1059 } 1060 case DW_FORM_ref1: { 1061 UChar u8 = get_UChar(c); 1062 UWord res = cc->cu_start_offset + (UWord)u8; 1063 *cts = (ULong)res; 1064 *ctsSzB = sizeof(UWord); 1065 TRACE_D3("<%lx>", res); 1066 break; 1067 } 1068 case DW_FORM_ref2: { 1069 UShort u16 = get_UShort(c); 1070 UWord res = cc->cu_start_offset + (UWord)u16; 1071 *cts = (ULong)res; 1072 *ctsSzB = sizeof(UWord); 1073 TRACE_D3("<%lx>", res); 1074 break; 1075 } 1076 case DW_FORM_ref4: { 1077 UInt u32 = get_UInt(c); 1078 UWord res = cc->cu_start_offset + (UWord)u32; 1079 *cts = (ULong)res; 1080 *ctsSzB = sizeof(UWord); 1081 TRACE_D3("<%lx>", res); 1082 break; 1083 } 1084 case DW_FORM_ref8: { 1085 ULong u64 = get_ULong(c); 1086 UWord res = cc->cu_start_offset + (UWord)u64; 1087 *cts = (ULong)res; 1088 *ctsSzB = sizeof(UWord); 1089 TRACE_D3("<%lx>", res); 1090 break; 1091 } 1092 case DW_FORM_ref_udata: { 1093 ULong u64 = get_ULEB128(c); 1094 UWord res = cc->cu_start_offset + (UWord)u64; 1095 *cts = (ULong)res; 1096 *ctsSzB = sizeof(UWord); 1097 TRACE_D3("<%lx>", res); 1098 break; 1099 } 1100 case DW_FORM_flag: { 1101 UChar u8 = get_UChar(c); 1102 TRACE_D3("%u", (UInt)u8); 1103 *cts = (ULong)u8; 1104 *ctsSzB = 1; 1105 break; 1106 } 1107 case DW_FORM_flag_present: 1108 TRACE_D3("1"); 1109 *cts = 1; 1110 *ctsSzB = 1; 1111 break; 1112 case DW_FORM_block1: { 1113 ULong u64b; 1114 ULong u64 = (ULong)get_UChar(c); 1115 UChar* block = get_address_of_Cursor(c); 1116 TRACE_D3("%llu byte block: ", u64); 1117 for (u64b = u64; u64b > 0; u64b--) { 1118 UChar u8 = get_UChar(c); 1119 TRACE_D3("%x ", (UInt)u8); 1120 } 1121 *cts = (ULong)(UWord)block; 1122 *ctsMemSzB = (UWord)u64; 1123 break; 1124 } 1125 case DW_FORM_block2: { 1126 ULong u64b; 1127 ULong u64 = (ULong)get_UShort(c); 1128 UChar* block = get_address_of_Cursor(c); 1129 TRACE_D3("%llu byte block: ", u64); 1130 for (u64b = u64; u64b > 0; u64b--) { 1131 UChar u8 = get_UChar(c); 1132 TRACE_D3("%x ", (UInt)u8); 1133 } 1134 *cts = (ULong)(UWord)block; 1135 *ctsMemSzB = (UWord)u64; 1136 break; 1137 } 1138 case DW_FORM_block4: { 1139 ULong u64b; 1140 ULong u64 = (ULong)get_UInt(c); 1141 UChar* block = get_address_of_Cursor(c); 1142 TRACE_D3("%llu byte block: ", u64); 1143 for (u64b = u64; u64b > 0; u64b--) { 1144 UChar u8 = get_UChar(c); 1145 TRACE_D3("%x ", (UInt)u8); 1146 } 1147 *cts = (ULong)(UWord)block; 1148 *ctsMemSzB = (UWord)u64; 1149 break; 1150 } 1151 case DW_FORM_exprloc: 1152 case DW_FORM_block: { 1153 ULong u64b; 1154 ULong u64 = (ULong)get_ULEB128(c); 1155 UChar* block = get_address_of_Cursor(c); 1156 TRACE_D3("%llu byte block: ", u64); 1157 for (u64b = u64; u64b > 0; u64b--) { 1158 UChar u8 = get_UChar(c); 1159 TRACE_D3("%x ", (UInt)u8); 1160 } 1161 *cts = (ULong)(UWord)block; 1162 *ctsMemSzB = (UWord)u64; 1163 break; 1164 } 1165 case DW_FORM_ref_sig8: { 1166 ULong u64b; 1167 UChar* block = get_address_of_Cursor(c); 1168 TRACE_D3("8 byte signature: "); 1169 for (u64b = 8; u64b > 0; u64b--) { 1170 UChar u8 = get_UChar(c); 1171 TRACE_D3("%x ", (UInt)u8); 1172 } 1173 *cts = (ULong)(UWord)block; 1174 *ctsMemSzB = 8; 1175 break; 1176 } 1177 case DW_FORM_indirect: 1178 get_Form_contents (cts, ctsSzB, ctsMemSzB, cc, c, td3, 1179 (DW_FORM)get_ULEB128(c)); 1180 return; 1181 1182 default: 1183 VG_(printf)( 1184 "get_Form_contents: unhandled %d (%s) at <%lx>\n", 1185 form, ML_(pp_DW_FORM)(form), get_position_of_Cursor(c)); 1186 c->barf("get_Form_contents: unhandled DW_FORM"); 1187 } 1188 } 1189 1190 1191 /*------------------------------------------------------------*/ 1192 /*--- ---*/ 1193 /*--- Parsing of variable-related DIEs ---*/ 1194 /*--- ---*/ 1195 /*------------------------------------------------------------*/ 1196 1197 typedef 1198 struct _TempVar { 1199 UChar* name; /* in DebugInfo's .strchunks */ 1200 /* Represent ranges economically. nRanges is the number of 1201 ranges. Cases: 1202 0: .rngOneMin .rngOneMax .manyRanges are all zero 1203 1: .rngOneMin .rngOneMax hold the range; .rngMany is NULL 1204 2: .rngOneMin .rngOneMax are zero; .rngMany holds the ranges. 1205 This is merely an optimisation to avoid having to allocate 1206 and free the XArray in the common (98%) of cases where there 1207 is zero or one address ranges. */ 1208 UWord nRanges; 1209 Addr rngOneMin; 1210 Addr rngOneMax; 1211 XArray* rngMany; /* of AddrRange. NON-UNIQUE PTR in AR_DINFO. */ 1212 /* Do not free .rngMany, since many TempVars will have the same 1213 value. Instead the associated storage is to be freed by 1214 deleting 'rangetree', which stores a single copy of each 1215 range. */ 1216 /* --- */ 1217 Int level; 1218 UWord typeR; /* a cuOff */ 1219 GExpr* gexpr; /* for this variable */ 1220 GExpr* fbGX; /* to find the frame base of the enclosing fn, if 1221 any */ 1222 UChar* fName; /* declaring file name, or NULL */ 1223 Int fLine; /* declaring file line number, or zero */ 1224 /* offset in .debug_info, so that abstract instances can be 1225 found to satisfy references from concrete instances. */ 1226 UWord dioff; 1227 UWord absOri; /* so the absOri fields refer to dioff fields 1228 in some other, related TempVar. */ 1229 } 1230 TempVar; 1231 1232 #define N_D3_VAR_STACK 48 1233 1234 typedef 1235 struct { 1236 /* Contains the range stack: a stack of address ranges, one 1237 stack entry for each nested scope. 1238 1239 Some scope entries are created by function definitions 1240 (DW_AT_subprogram), and for those, we also note the GExpr 1241 derived from its DW_AT_frame_base attribute, if any. 1242 Consequently it should be possible to find, for any 1243 variable's DIE, the GExpr for the the containing function's 1244 DW_AT_frame_base by scanning back through the stack to find 1245 the nearest entry associated with a function. This somewhat 1246 elaborate scheme is provided so as to make it possible to 1247 obtain the correct DW_AT_frame_base expression even in the 1248 presence of nested functions (or to be more precise, in the 1249 presence of nested DW_AT_subprogram DIEs). 1250 */ 1251 Int sp; /* [sp] is innermost active entry; sp==-1 for empty 1252 stack */ 1253 XArray* ranges[N_D3_VAR_STACK]; /* XArray of AddrRange */ 1254 Int level[N_D3_VAR_STACK]; /* D3 DIE levels */ 1255 Bool isFunc[N_D3_VAR_STACK]; /* from DW_AT_subprogram? */ 1256 GExpr* fbGX[N_D3_VAR_STACK]; /* if isFunc, contains the FB 1257 expr, else NULL */ 1258 /* The file name table. Is a mapping from integer index to the 1259 (permanent) copy of the string, iow a non-img area. */ 1260 XArray* /* of UChar* */ filenameTable; 1261 } 1262 D3VarParser; 1263 1264 static void varstack_show ( D3VarParser* parser, HChar* str ) { 1265 Word i, j; 1266 VG_(printf)(" varstack (%s) {\n", str); 1267 for (i = 0; i <= parser->sp; i++) { 1268 XArray* xa = parser->ranges[i]; 1269 vg_assert(xa); 1270 VG_(printf)(" [%ld] (level %d)", i, parser->level[i]); 1271 if (parser->isFunc[i]) { 1272 VG_(printf)(" (fbGX=%p)", parser->fbGX[i]); 1273 } else { 1274 vg_assert(parser->fbGX[i] == NULL); 1275 } 1276 VG_(printf)(": "); 1277 if (VG_(sizeXA)( xa ) == 0) { 1278 VG_(printf)("** empty PC range array **"); 1279 } else { 1280 for (j = 0; j < VG_(sizeXA)( xa ); j++) { 1281 AddrRange* range = (AddrRange*) VG_(indexXA)( xa, j ); 1282 vg_assert(range); 1283 VG_(printf)("[%#lx,%#lx] ", range->aMin, range->aMax); 1284 } 1285 } 1286 VG_(printf)("\n"); 1287 } 1288 VG_(printf)(" }\n"); 1289 } 1290 1291 /* Remove from the stack, all entries with .level > 'level' */ 1292 static 1293 void varstack_preen ( D3VarParser* parser, Bool td3, Int level ) 1294 { 1295 Bool changed = False; 1296 vg_assert(parser->sp < N_D3_VAR_STACK); 1297 while (True) { 1298 vg_assert(parser->sp >= -1); 1299 if (parser->sp == -1) break; 1300 if (parser->level[parser->sp] <= level) break; 1301 if (0) 1302 TRACE_D3("BBBBAAAA varstack_pop [newsp=%d]\n", parser->sp-1); 1303 vg_assert(parser->ranges[parser->sp]); 1304 /* Who allocated this xa? get_range_list() or 1305 unitary_range_list(). */ 1306 VG_(deleteXA)( parser->ranges[parser->sp] ); 1307 parser->ranges[parser->sp] = NULL; 1308 parser->level[parser->sp] = 0; 1309 parser->isFunc[parser->sp] = False; 1310 parser->fbGX[parser->sp] = NULL; 1311 parser->sp--; 1312 changed = True; 1313 } 1314 if (changed && td3) 1315 varstack_show( parser, "after preen" ); 1316 } 1317 1318 static void varstack_push ( CUConst* cc, 1319 D3VarParser* parser, 1320 Bool td3, 1321 XArray* ranges, Int level, 1322 Bool isFunc, GExpr* fbGX ) { 1323 if (0) 1324 TRACE_D3("BBBBAAAA varstack_push[newsp=%d]: %d %p\n", 1325 parser->sp+1, level, ranges); 1326 1327 /* First we need to zap everything >= 'level', as we are about to 1328 replace any previous entry at 'level', so .. */ 1329 varstack_preen(parser, /*td3*/False, level-1); 1330 1331 vg_assert(parser->sp >= -1); 1332 vg_assert(parser->sp < N_D3_VAR_STACK); 1333 if (parser->sp == N_D3_VAR_STACK-1) 1334 cc->barf("varstack_push: N_D3_VAR_STACK is too low; " 1335 "increase and recompile"); 1336 if (parser->sp >= 0) 1337 vg_assert(parser->level[parser->sp] < level); 1338 parser->sp++; 1339 vg_assert(parser->ranges[parser->sp] == NULL); 1340 vg_assert(parser->level[parser->sp] == 0); 1341 vg_assert(parser->isFunc[parser->sp] == False); 1342 vg_assert(parser->fbGX[parser->sp] == NULL); 1343 vg_assert(ranges != NULL); 1344 if (!isFunc) vg_assert(fbGX == NULL); 1345 parser->ranges[parser->sp] = ranges; 1346 parser->level[parser->sp] = level; 1347 parser->isFunc[parser->sp] = isFunc; 1348 parser->fbGX[parser->sp] = fbGX; 1349 if (td3) 1350 varstack_show( parser, "after push" ); 1351 } 1352 1353 1354 /* cts, ctsSzB, ctsMemSzB are derived from a DW_AT_location and so 1355 refer either to a location expression or to a location list. 1356 Figure out which, and in both cases bundle the expression or 1357 location list into a so-called GExpr (guarded expression). */ 1358 __attribute__((noinline)) 1359 static GExpr* get_GX ( CUConst* cc, Bool td3, 1360 ULong cts, Int ctsSzB, UWord ctsMemSzB ) 1361 { 1362 GExpr* gexpr = NULL; 1363 if (ctsMemSzB > 0 && ctsSzB == 0) { 1364 /* represents an in-line location expression, and cts points 1365 right at it */ 1366 gexpr = make_singleton_GX( (UChar*)(UWord)cts, ctsMemSzB ); 1367 } 1368 else 1369 if (ctsMemSzB == 0 && ctsSzB > 0) { 1370 /* represents location list. cts is the offset of it in 1371 .debug_loc. */ 1372 if (!cc->cu_svma_known) 1373 cc->barf("get_GX: location list, but CU svma is unknown"); 1374 gexpr = make_general_GX( cc, td3, (UWord)cts, cc->cu_svma ); 1375 } 1376 else { 1377 vg_assert(0); /* else caller is bogus */ 1378 } 1379 return gexpr; 1380 } 1381 1382 1383 static 1384 void read_filename_table( /*MOD*/D3VarParser* parser, 1385 CUConst* cc, UWord debug_line_offset, 1386 Bool td3 ) 1387 { 1388 Bool is_dw64; 1389 Cursor c; 1390 Word i; 1391 UShort version; 1392 UChar opcode_base; 1393 UChar* str; 1394 1395 vg_assert(parser && cc && cc->barf); 1396 if ((!cc->debug_line_img) 1397 || cc->debug_line_sz <= debug_line_offset) 1398 cc->barf("read_filename_table: .debug_line is missing?"); 1399 1400 init_Cursor( &c, cc->debug_line_img, 1401 cc->debug_line_sz, debug_line_offset, cc->barf, 1402 "Overrun whilst reading .debug_line section(1)" ); 1403 1404 /* unit_length = */ 1405 get_Initial_Length( &is_dw64, &c, 1406 "read_filename_table: invalid initial-length field" ); 1407 version = get_UShort( &c ); 1408 if (version != 2 && version != 3 && version != 4) 1409 cc->barf("read_filename_table: Only DWARF version 2, 3 and 4 line info " 1410 "is currently supported."); 1411 /*header_length = (ULong)*/ get_Dwarfish_UWord( &c, is_dw64 ); 1412 /*minimum_instruction_length = */ get_UChar( &c ); 1413 if (version >= 4) 1414 /*maximum_operations_per_insn = */ get_UChar( &c ); 1415 /*default_is_stmt = */ get_UChar( &c ); 1416 /*line_base = (Char)*/ get_UChar( &c ); 1417 /*line_range = */ get_UChar( &c ); 1418 opcode_base = get_UChar( &c ); 1419 /* skip over "standard_opcode_lengths" */ 1420 for (i = 1; i < (Word)opcode_base; i++) 1421 (void)get_UChar( &c ); 1422 1423 /* skip over the directory names table */ 1424 while (peek_UChar(&c) != 0) { 1425 (void)get_AsciiZ(&c); 1426 } 1427 (void)get_UChar(&c); /* skip terminating zero */ 1428 1429 /* Read and record the file names table */ 1430 vg_assert(parser->filenameTable); 1431 vg_assert( VG_(sizeXA)( parser->filenameTable ) == 0 ); 1432 /* Add a dummy index-zero entry. DWARF3 numbers its files 1433 from 1, for some reason. */ 1434 str = ML_(addStr)( cc->di, "<unknown_file>", -1 ); 1435 VG_(addToXA)( parser->filenameTable, &str ); 1436 while (peek_UChar(&c) != 0) { 1437 str = get_AsciiZ(&c); 1438 TRACE_D3(" read_filename_table: %ld %s\n", 1439 VG_(sizeXA)(parser->filenameTable), str); 1440 str = ML_(addStr)( cc->di, str, -1 ); 1441 VG_(addToXA)( parser->filenameTable, &str ); 1442 (void)get_ULEB128( &c ); /* skip directory index # */ 1443 (void)get_ULEB128( &c ); /* skip last mod time */ 1444 (void)get_ULEB128( &c ); /* file size */ 1445 } 1446 /* We're done! The rest of it is not interesting. */ 1447 } 1448 1449 1450 __attribute__((noinline)) 1451 static void parse_var_DIE ( 1452 /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree, 1453 /*MOD*/XArray* /* of TempVar* */ tempvars, 1454 /*MOD*/XArray* /* of GExpr* */ gexprs, 1455 /*MOD*/D3VarParser* parser, 1456 DW_TAG dtag, 1457 UWord posn, 1458 Int level, 1459 Cursor* c_die, 1460 Cursor* c_abbv, 1461 CUConst* cc, 1462 Bool td3 1463 ) 1464 { 1465 ULong cts; 1466 Int ctsSzB; 1467 UWord ctsMemSzB; 1468 1469 UWord saved_die_c_offset = get_position_of_Cursor( c_die ); 1470 UWord saved_abbv_c_offset = get_position_of_Cursor( c_abbv ); 1471 1472 varstack_preen( parser, td3, level-1 ); 1473 1474 if (dtag == DW_TAG_compile_unit) { 1475 Bool have_lo = False; 1476 Bool have_hi1 = False; 1477 Bool have_range = False; 1478 Addr ip_lo = 0; 1479 Addr ip_hi1 = 0; 1480 Addr rangeoff = 0; 1481 while (True) { 1482 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 1483 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 1484 if (attr == 0 && form == 0) break; 1485 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 1486 cc, c_die, False/*td3*/, form ); 1487 if (attr == DW_AT_low_pc && ctsSzB > 0) { 1488 ip_lo = cts; 1489 have_lo = True; 1490 } 1491 if (attr == DW_AT_high_pc && ctsSzB > 0) { 1492 ip_hi1 = cts; 1493 have_hi1 = True; 1494 } 1495 if (attr == DW_AT_ranges && ctsSzB > 0) { 1496 rangeoff = cts; 1497 have_range = True; 1498 } 1499 if (attr == DW_AT_stmt_list && ctsSzB > 0) { 1500 read_filename_table( parser, cc, (UWord)cts, td3 ); 1501 } 1502 } 1503 /* Now, does this give us an opportunity to find this 1504 CU's svma? */ 1505 #if 0 1506 if (level == 0 && have_lo) { 1507 vg_assert(!cc->cu_svma_known); /* if this fails, it must be 1508 because we've already seen a DW_TAG_compile_unit DIE at level 1509 0. But that can't happen, because DWARF3 only allows exactly 1510 one top level DIE per CU. */ 1511 cc->cu_svma_known = True; 1512 cc->cu_svma = ip_lo; 1513 if (1) 1514 TRACE_D3("BBBBAAAA acquire CU_SVMA of %p\n", cc->cu_svma); 1515 /* Now, it may be that this DIE doesn't tell us the CU's 1516 SVMA, by way of not having a DW_AT_low_pc. That's OK -- 1517 the CU doesn't *have* to have its SVMA specified. 1518 1519 But as per last para D3 spec sec 3.1.1 ("Normal and 1520 Partial Compilation Unit Entries", "If the base address 1521 (viz, the SVMA) is undefined, then any DWARF entry of 1522 structure defined interms of the base address of that 1523 compilation unit is not valid.". So that means, if whilst 1524 processing the children of this top level DIE (or their 1525 children, etc) we see a DW_AT_range, and cu_svma_known is 1526 False, then the DIE that contains it is (per the spec) 1527 invalid, and we can legitimately stop and complain. */ 1528 } 1529 #else 1530 /* .. whereas The Reality is, simply assume the SVMA is zero 1531 if it isn't specified. */ 1532 if (level == 0) { 1533 vg_assert(!cc->cu_svma_known); 1534 cc->cu_svma_known = True; 1535 if (have_lo) 1536 cc->cu_svma = ip_lo; 1537 else 1538 cc->cu_svma = 0; 1539 } 1540 #endif 1541 /* Do we have something that looks sane? */ 1542 if (have_lo && have_hi1 && (!have_range)) { 1543 if (ip_lo < ip_hi1) 1544 varstack_push( cc, parser, td3, 1545 unitary_range_list(ip_lo, ip_hi1 - 1), 1546 level, 1547 False/*isFunc*/, NULL/*fbGX*/ ); 1548 } else 1549 if ((!have_lo) && (!have_hi1) && have_range) { 1550 varstack_push( cc, parser, td3, 1551 get_range_list( cc, td3, 1552 rangeoff, cc->cu_svma ), 1553 level, 1554 False/*isFunc*/, NULL/*fbGX*/ ); 1555 } else 1556 if ((!have_lo) && (!have_hi1) && (!have_range)) { 1557 /* CU has no code, presumably? */ 1558 varstack_push( cc, parser, td3, 1559 empty_range_list(), 1560 level, 1561 False/*isFunc*/, NULL/*fbGX*/ ); 1562 } else 1563 if (have_lo && (!have_hi1) && have_range && ip_lo == 0) { 1564 /* broken DIE created by gcc-4.3.X ? Ignore the 1565 apparently-redundant DW_AT_low_pc and use the DW_AT_ranges 1566 instead. */ 1567 varstack_push( cc, parser, td3, 1568 get_range_list( cc, td3, 1569 rangeoff, cc->cu_svma ), 1570 level, 1571 False/*isFunc*/, NULL/*fbGX*/ ); 1572 } else { 1573 if (0) VG_(printf)("I got hlo %d hhi1 %d hrange %d\n", 1574 (Int)have_lo, (Int)have_hi1, (Int)have_range); 1575 goto bad_DIE; 1576 } 1577 } 1578 1579 if (dtag == DW_TAG_lexical_block || dtag == DW_TAG_subprogram) { 1580 Bool have_lo = False; 1581 Bool have_hi1 = False; 1582 Bool have_range = False; 1583 Addr ip_lo = 0; 1584 Addr ip_hi1 = 0; 1585 Addr rangeoff = 0; 1586 Bool isFunc = dtag == DW_TAG_subprogram; 1587 GExpr* fbGX = NULL; 1588 while (True) { 1589 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 1590 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 1591 if (attr == 0 && form == 0) break; 1592 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 1593 cc, c_die, False/*td3*/, form ); 1594 if (attr == DW_AT_low_pc && ctsSzB > 0) { 1595 ip_lo = cts; 1596 have_lo = True; 1597 } 1598 if (attr == DW_AT_high_pc && ctsSzB > 0) { 1599 ip_hi1 = cts; 1600 have_hi1 = True; 1601 } 1602 if (attr == DW_AT_ranges && ctsSzB > 0) { 1603 rangeoff = cts; 1604 have_range = True; 1605 } 1606 if (isFunc 1607 && attr == DW_AT_frame_base 1608 && ((ctsMemSzB > 0 && ctsSzB == 0) 1609 || (ctsMemSzB == 0 && ctsSzB > 0))) { 1610 fbGX = get_GX( cc, False/*td3*/, cts, ctsSzB, ctsMemSzB ); 1611 vg_assert(fbGX); 1612 VG_(addToXA)(gexprs, &fbGX); 1613 } 1614 } 1615 /* Do we have something that looks sane? */ 1616 if (dtag == DW_TAG_subprogram 1617 && (!have_lo) && (!have_hi1) && (!have_range)) { 1618 /* This is legit - ignore it. Sec 3.3.3: "A subroutine entry 1619 representing a subroutine declaration that is not also a 1620 definition does not have code address or range 1621 attributes." */ 1622 } else 1623 if (dtag == DW_TAG_lexical_block 1624 && (!have_lo) && (!have_hi1) && (!have_range)) { 1625 /* I believe this is legit, and means the lexical block 1626 contains no insns (whatever that might mean). Ignore. */ 1627 } else 1628 if (have_lo && have_hi1 && (!have_range)) { 1629 /* This scope supplies just a single address range. */ 1630 if (ip_lo < ip_hi1) 1631 varstack_push( cc, parser, td3, 1632 unitary_range_list(ip_lo, ip_hi1 - 1), 1633 level, isFunc, fbGX ); 1634 } else 1635 if ((!have_lo) && (!have_hi1) && have_range) { 1636 /* This scope supplies multiple address ranges via the use of 1637 a range list. */ 1638 varstack_push( cc, parser, td3, 1639 get_range_list( cc, td3, 1640 rangeoff, cc->cu_svma ), 1641 level, isFunc, fbGX ); 1642 } else 1643 if (have_lo && (!have_hi1) && (!have_range)) { 1644 /* This scope is bogus. The D3 spec sec 3.4 (Lexical Block 1645 Entries) says fairly clearly that a scope must have either 1646 _range or (_low_pc and _high_pc). */ 1647 /* The spec is a bit ambiguous though. Perhaps a single byte 1648 range is intended? See sec 2.17 (Code Addresses And Ranges) */ 1649 /* This case is here because icc9 produced this: 1650 <2><13bd>: DW_TAG_lexical_block 1651 DW_AT_decl_line : 5229 1652 DW_AT_decl_column : 37 1653 DW_AT_decl_file : 1 1654 DW_AT_low_pc : 0x401b03 1655 */ 1656 /* Ignore (seems safe than pushing a single byte range) */ 1657 } else 1658 goto bad_DIE; 1659 } 1660 1661 if (dtag == DW_TAG_variable || dtag == DW_TAG_formal_parameter) { 1662 UChar* name = NULL; 1663 UWord typeR = D3_INVALID_CUOFF; 1664 Bool external = False; 1665 GExpr* gexpr = NULL; 1666 Int n_attrs = 0; 1667 UWord abs_ori = (UWord)D3_INVALID_CUOFF; 1668 Int lineNo = 0; 1669 UChar* fileName = NULL; 1670 while (True) { 1671 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 1672 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 1673 if (attr == 0 && form == 0) break; 1674 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 1675 cc, c_die, False/*td3*/, form ); 1676 n_attrs++; 1677 if (attr == DW_AT_name && ctsMemSzB > 0) { 1678 name = ML_(addStr)( cc->di, (UChar*)(UWord)cts, -1 ); 1679 } 1680 if (attr == DW_AT_location 1681 && ((ctsMemSzB > 0 && ctsSzB == 0) 1682 || (ctsMemSzB == 0 && ctsSzB > 0))) { 1683 gexpr = get_GX( cc, False/*td3*/, cts, ctsSzB, ctsMemSzB ); 1684 vg_assert(gexpr); 1685 VG_(addToXA)(gexprs, &gexpr); 1686 } 1687 if (attr == DW_AT_type && ctsSzB > 0) { 1688 typeR = (UWord)cts; 1689 } 1690 if (attr == DW_AT_external && ctsSzB > 0 && cts > 0) { 1691 external = True; 1692 } 1693 if (attr == DW_AT_abstract_origin && ctsSzB > 0) { 1694 abs_ori = (UWord)cts; 1695 } 1696 if (attr == DW_AT_declaration && ctsSzB > 0 && cts > 0) { 1697 /*declaration = True;*/ 1698 } 1699 if (attr == DW_AT_decl_line && ctsSzB > 0) { 1700 lineNo = (Int)cts; 1701 } 1702 if (attr == DW_AT_decl_file && ctsSzB > 0) { 1703 Int ftabIx = (Int)cts; 1704 if (ftabIx >= 1 1705 && ftabIx < VG_(sizeXA)( parser->filenameTable )) { 1706 fileName = *(UChar**) 1707 VG_(indexXA)( parser->filenameTable, ftabIx ); 1708 vg_assert(fileName); 1709 } 1710 if (0) VG_(printf)("XXX filename = %s\n", fileName); 1711 } 1712 } 1713 /* We'll collect it under if one of the following three 1714 conditions holds: 1715 (1) has location and type -> completed 1716 (2) has type only -> is an abstract instance 1717 (3) has location and abs_ori -> is a concrete instance 1718 Name, filename and line number are all optional frills. 1719 */ 1720 if ( /* 1 */ (gexpr && typeR != D3_INVALID_CUOFF) 1721 /* 2 */ || (typeR != D3_INVALID_CUOFF) 1722 /* 3 */ || (gexpr && abs_ori != (UWord)D3_INVALID_CUOFF) ) { 1723 1724 /* Add this variable to the list of interesting looking 1725 variables. Crucially, note along with it the address 1726 range(s) associated with the variable, which for locals 1727 will be the address ranges at the top of the varparser's 1728 stack. */ 1729 GExpr* fbGX = NULL; 1730 Word i, nRanges; 1731 XArray* /* of AddrRange */ xa; 1732 TempVar* tv; 1733 /* Stack can't be empty; we put a dummy entry on it for the 1734 entire address range before starting with the DIEs for 1735 this CU. */ 1736 vg_assert(parser->sp >= 0); 1737 1738 /* If this is a local variable (non-external), try to find 1739 the GExpr for the DW_AT_frame_base of the containing 1740 function. It should have been pushed on the stack at the 1741 time we encountered its DW_TAG_subprogram DIE, so the way 1742 to find it is to scan back down the stack looking for it. 1743 If there isn't an enclosing stack entry marked 'isFunc' 1744 then we must be seeing variable or formal param DIEs 1745 outside of a function, so we deem the Dwarf to be 1746 malformed if that happens. Note that the fbGX may be NULL 1747 if the containing DT_TAG_subprogram didn't supply a 1748 DW_AT_frame_base -- that's OK, but there must actually be 1749 a containing DW_TAG_subprogram. */ 1750 if (!external) { 1751 Bool found = False; 1752 for (i = parser->sp; i >= 0; i--) { 1753 if (parser->isFunc[i]) { 1754 fbGX = parser->fbGX[i]; 1755 found = True; 1756 break; 1757 } 1758 } 1759 if (!found) { 1760 if (0 && VG_(clo_verbosity) >= 0) { 1761 VG_(message)(Vg_DebugMsg, 1762 "warning: parse_var_DIE: non-external variable " 1763 "outside DW_TAG_subprogram\n"); 1764 } 1765 /* goto bad_DIE; */ 1766 /* This seems to happen a lot. Just ignore it -- if, 1767 when we come to evaluation of the location (guarded) 1768 expression, it requires a frame base value, and 1769 there's no expression for that, then evaluation as a 1770 whole will fail. Harmless - a bit of a waste of 1771 cycles but nothing more. */ 1772 } 1773 } 1774 1775 /* re "external ? 0 : parser->sp" (twice), if the var is 1776 marked 'external' then we must put it at the global scope, 1777 as only the global scope (level 0) covers the entire PC 1778 address space. It is asserted elsewhere that level 0 1779 always covers the entire address space. */ 1780 xa = parser->ranges[external ? 0 : parser->sp]; 1781 nRanges = VG_(sizeXA)(xa); 1782 vg_assert(nRanges >= 0); 1783 1784 tv = ML_(dinfo_zalloc)( "di.readdwarf3.pvD.1", sizeof(TempVar) ); 1785 tv->name = name; 1786 tv->level = external ? 0 : parser->sp; 1787 tv->typeR = typeR; 1788 tv->gexpr = gexpr; 1789 tv->fbGX = fbGX; 1790 tv->fName = fileName; 1791 tv->fLine = lineNo; 1792 tv->dioff = posn; 1793 tv->absOri = abs_ori; 1794 1795 /* See explanation on definition of type TempVar for the 1796 reason for this elaboration. */ 1797 tv->nRanges = nRanges; 1798 tv->rngOneMin = 0; 1799 tv->rngOneMax = 0; 1800 tv->rngMany = NULL; 1801 if (nRanges == 1) { 1802 AddrRange* range = VG_(indexXA)(xa, 0); 1803 tv->rngOneMin = range->aMin; 1804 tv->rngOneMax = range->aMax; 1805 } 1806 else if (nRanges > 1) { 1807 /* See if we already have a range list which is 1808 structurally identical. If so, use that; if not, clone 1809 this one, and add it to our collection. */ 1810 UWord keyW, valW; 1811 if (VG_(lookupFM)( rangestree, &keyW, &valW, (UWord)xa )) { 1812 XArray* old = (XArray*)keyW; 1813 tl_assert(valW == 0); 1814 tl_assert(old != xa); 1815 tv->rngMany = old; 1816 } else { 1817 XArray* cloned = VG_(cloneXA)( "di.readdwarf3.pvD.2", xa ); 1818 tv->rngMany = cloned; 1819 VG_(addToFM)( rangestree, (UWord)cloned, 0 ); 1820 } 1821 } 1822 1823 VG_(addToXA)( tempvars, &tv ); 1824 1825 TRACE_D3(" Recording this variable, with %ld PC range(s)\n", 1826 VG_(sizeXA)(xa) ); 1827 /* collect stats on how effective the ->ranges special 1828 casing is */ 1829 if (0) { 1830 static Int ntot=0, ngt=0; 1831 ntot++; 1832 if (tv->rngMany) ngt++; 1833 if (0 == (ntot % 100000)) 1834 VG_(printf)("XXXX %d tot, %d cloned\n", ntot, ngt); 1835 } 1836 1837 } 1838 1839 /* Here are some other weird cases seen in the wild: 1840 1841 We have a variable with a name and a type, but no 1842 location. I guess that's a sign that it has been 1843 optimised away. Ignore it. Here's an example: 1844 1845 static Int lc_compar(void* n1, void* n2) { 1846 MC_Chunk* mc1 = *(MC_Chunk**)n1; 1847 MC_Chunk* mc2 = *(MC_Chunk**)n2; 1848 return (mc1->data < mc2->data ? -1 : 1); 1849 } 1850 1851 Both mc1 and mc2 are like this 1852 <2><5bc>: Abbrev Number: 21 (DW_TAG_variable) 1853 DW_AT_name : mc1 1854 DW_AT_decl_file : 1 1855 DW_AT_decl_line : 216 1856 DW_AT_type : <5d3> 1857 1858 whereas n1 and n2 do have locations specified. 1859 1860 --------------------------------------------- 1861 1862 We see a DW_TAG_formal_parameter with a type, but 1863 no name and no location. It's probably part of a function type 1864 construction, thusly, hence ignore it: 1865 <1><2b4>: Abbrev Number: 12 (DW_TAG_subroutine_type) 1866 DW_AT_sibling : <2c9> 1867 DW_AT_prototyped : 1 1868 DW_AT_type : <114> 1869 <2><2be>: Abbrev Number: 13 (DW_TAG_formal_parameter) 1870 DW_AT_type : <13e> 1871 <2><2c3>: Abbrev Number: 13 (DW_TAG_formal_parameter) 1872 DW_AT_type : <133> 1873 1874 --------------------------------------------- 1875 1876 Is very minimal, like this: 1877 <4><81d>: Abbrev Number: 44 (DW_TAG_variable) 1878 DW_AT_abstract_origin: <7ba> 1879 What that signifies I have no idea. Ignore. 1880 1881 ---------------------------------------------- 1882 1883 Is very minimal, like this: 1884 <200f>: DW_TAG_formal_parameter 1885 DW_AT_abstract_ori: <1f4c> 1886 DW_AT_location : 13440 1887 What that signifies I have no idea. Ignore. 1888 It might be significant, though: the variable at least 1889 has a location and so might exist somewhere. 1890 Maybe we should handle this. 1891 1892 --------------------------------------------- 1893 1894 <22407>: DW_TAG_variable 1895 DW_AT_name : (indirect string, offset: 0x6579): 1896 vgPlain_trampoline_stuff_start 1897 DW_AT_decl_file : 29 1898 DW_AT_decl_line : 56 1899 DW_AT_external : 1 1900 DW_AT_declaration : 1 1901 1902 Nameless and typeless variable that has a location? Who 1903 knows. Not me. 1904 <2><3d178>: Abbrev Number: 22 (DW_TAG_variable) 1905 DW_AT_location : 9 byte block: 3 c0 c7 13 38 0 0 0 0 1906 (DW_OP_addr: 3813c7c0) 1907 1908 No, really. Check it out. gcc is quite simply borked. 1909 <3><168cc>: Abbrev Number: 141 (DW_TAG_variable) 1910 // followed by no attributes, and the next DIE is a sibling, 1911 // not a child 1912 */ 1913 } 1914 return; 1915 1916 bad_DIE: 1917 set_position_of_Cursor( c_die, saved_die_c_offset ); 1918 set_position_of_Cursor( c_abbv, saved_abbv_c_offset ); 1919 VG_(printf)("\nparse_var_DIE: confused by:\n"); 1920 VG_(printf)(" <%d><%lx>: %s\n", level, posn, ML_(pp_DW_TAG)( dtag ) ); 1921 while (True) { 1922 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 1923 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 1924 if (attr == 0 && form == 0) break; 1925 VG_(printf)(" %18s: ", ML_(pp_DW_AT)(attr)); 1926 /* Get the form contents, so as to print them */ 1927 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 1928 cc, c_die, True, form ); 1929 VG_(printf)("\t\n"); 1930 } 1931 VG_(printf)("\n"); 1932 cc->barf("parse_var_DIE: confused by the above DIE"); 1933 /*NOTREACHED*/ 1934 } 1935 1936 1937 /*------------------------------------------------------------*/ 1938 /*--- ---*/ 1939 /*--- Parsing of type-related DIEs ---*/ 1940 /*--- ---*/ 1941 /*------------------------------------------------------------*/ 1942 1943 #define N_D3_TYPE_STACK 16 1944 1945 typedef 1946 struct { 1947 /* What source language? 'A'=Ada83/95, 1948 'C'=C/C++, 1949 'F'=Fortran, 1950 '?'=other 1951 Established once per compilation unit. */ 1952 UChar language; 1953 /* A stack of types which are currently under construction */ 1954 Int sp; /* [sp] is innermost active entry; sp==-1 for empty 1955 stack */ 1956 /* Note that the TyEnts in qparentE are temporary copies of the 1957 ones accumulating in the main tyent array. So it is not safe 1958 to free up anything on them when popping them off the stack 1959 (iow, it isn't safe to use TyEnt__make_EMPTY on them). Just 1960 memset them to zero when done. */ 1961 TyEnt qparentE[N_D3_TYPE_STACK]; /* parent TyEnts */ 1962 Int qlevel[N_D3_TYPE_STACK]; 1963 1964 } 1965 D3TypeParser; 1966 1967 static void typestack_show ( D3TypeParser* parser, HChar* str ) { 1968 Word i; 1969 VG_(printf)(" typestack (%s) {\n", str); 1970 for (i = 0; i <= parser->sp; i++) { 1971 VG_(printf)(" [%ld] (level %d): ", i, parser->qlevel[i]); 1972 ML_(pp_TyEnt)( &parser->qparentE[i] ); 1973 VG_(printf)("\n"); 1974 } 1975 VG_(printf)(" }\n"); 1976 } 1977 1978 /* Remove from the stack, all entries with .level > 'level' */ 1979 static 1980 void typestack_preen ( D3TypeParser* parser, Bool td3, Int level ) 1981 { 1982 Bool changed = False; 1983 vg_assert(parser->sp < N_D3_TYPE_STACK); 1984 while (True) { 1985 vg_assert(parser->sp >= -1); 1986 if (parser->sp == -1) break; 1987 if (parser->qlevel[parser->sp] <= level) break; 1988 if (0) 1989 TRACE_D3("BBBBAAAA typestack_pop [newsp=%d]\n", parser->sp-1); 1990 vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp])); 1991 VG_(memset)(&parser->qparentE[parser->sp], 0, sizeof(TyEnt)); 1992 parser->qparentE[parser->sp].cuOff = D3_INVALID_CUOFF; 1993 parser->qparentE[parser->sp].tag = Te_EMPTY; 1994 parser->qlevel[parser->sp] = 0; 1995 parser->sp--; 1996 changed = True; 1997 } 1998 if (changed && td3) 1999 typestack_show( parser, "after preen" ); 2000 } 2001 2002 static Bool typestack_is_empty ( D3TypeParser* parser ) { 2003 vg_assert(parser->sp >= -1 && parser->sp < N_D3_TYPE_STACK); 2004 return parser->sp == -1; 2005 } 2006 2007 static void typestack_push ( CUConst* cc, 2008 D3TypeParser* parser, 2009 Bool td3, 2010 TyEnt* parentE, Int level ) { 2011 if (0) 2012 TRACE_D3("BBBBAAAA typestack_push[newsp=%d]: %d %05lx\n", 2013 parser->sp+1, level, parentE->cuOff); 2014 2015 /* First we need to zap everything >= 'level', as we are about to 2016 replace any previous entry at 'level', so .. */ 2017 typestack_preen(parser, /*td3*/False, level-1); 2018 2019 vg_assert(parser->sp >= -1); 2020 vg_assert(parser->sp < N_D3_TYPE_STACK); 2021 if (parser->sp == N_D3_TYPE_STACK-1) 2022 cc->barf("typestack_push: N_D3_TYPE_STACK is too low; " 2023 "increase and recompile"); 2024 if (parser->sp >= 0) 2025 vg_assert(parser->qlevel[parser->sp] < level); 2026 parser->sp++; 2027 vg_assert(parser->qparentE[parser->sp].tag == Te_EMPTY); 2028 vg_assert(parser->qlevel[parser->sp] == 0); 2029 vg_assert(parentE); 2030 vg_assert(ML_(TyEnt__is_type)(parentE)); 2031 vg_assert(parentE->cuOff != D3_INVALID_CUOFF); 2032 parser->qparentE[parser->sp] = *parentE; 2033 parser->qlevel[parser->sp] = level; 2034 if (td3) 2035 typestack_show( parser, "after push" ); 2036 } 2037 2038 /* True if the subrange type being parsed gives the bounds of an array. */ 2039 static Bool subrange_type_denotes_array_bounds ( D3TypeParser* parser, 2040 DW_TAG dtag ) { 2041 vg_assert(dtag == DW_TAG_subrange_type); 2042 /* For most languages, a subrange_type dtag always gives the 2043 bounds of an array. 2044 For Ada, there are additional conditions as a subrange_type 2045 is also used for other purposes. */ 2046 if (parser->language != 'A') 2047 /* not Ada, so it definitely denotes an array bound. */ 2048 return True; 2049 else 2050 /* Extra constraints for Ada: it only denotes an array bound if .. */ 2051 return (! typestack_is_empty(parser) 2052 && parser->qparentE[parser->sp].tag == Te_TyArray); 2053 } 2054 2055 /* Parse a type-related DIE. 'parser' holds the current parser state. 2056 'admin' is where the completed types are dumped. 'dtag' is the tag 2057 for this DIE. 'c_die' points to the start of the data fields (FORM 2058 stuff) for the DIE. c_abbv points to the start of the (name,form) 2059 pairs which describe the DIE. 2060 2061 We may find the DIE uninteresting, in which case we should ignore 2062 it. 2063 2064 What happens: the DIE is examined. If uninteresting, it is ignored. 2065 Otherwise, the DIE gives rise to two things: 2066 2067 (1) the offset of this DIE in the CU -- the cuOffset, a UWord 2068 (2) a TyAdmin structure, which holds the type, or related stuff 2069 2070 (2) is added at the end of 'tyadmins', at some index, say 'i'. 2071 2072 A pair (cuOffset, i) is added to 'tydict'. 2073 2074 Hence 'tyadmins' holds the actual type entities, and 'tydict' holds 2075 a mapping from cuOffset to the index of the corresponding entry in 2076 'tyadmin'. 2077 2078 When resolving a cuOffset to a TyAdmin, first look up the cuOffset 2079 in the tydict (by binary search). This gives an index into 2080 tyadmins, and the required entity lives in tyadmins at that index. 2081 */ 2082 __attribute__((noinline)) 2083 static void parse_type_DIE ( /*MOD*/XArray* /* of TyEnt */ tyents, 2084 /*MOD*/D3TypeParser* parser, 2085 DW_TAG dtag, 2086 UWord posn, 2087 Int level, 2088 Cursor* c_die, 2089 Cursor* c_abbv, 2090 CUConst* cc, 2091 Bool td3 ) 2092 { 2093 ULong cts; 2094 Int ctsSzB; 2095 UWord ctsMemSzB; 2096 TyEnt typeE; 2097 TyEnt atomE; 2098 TyEnt fieldE; 2099 TyEnt boundE; 2100 2101 UWord saved_die_c_offset = get_position_of_Cursor( c_die ); 2102 UWord saved_abbv_c_offset = get_position_of_Cursor( c_abbv ); 2103 2104 VG_(memset)( &typeE, 0xAA, sizeof(typeE) ); 2105 VG_(memset)( &atomE, 0xAA, sizeof(atomE) ); 2106 VG_(memset)( &fieldE, 0xAA, sizeof(fieldE) ); 2107 VG_(memset)( &boundE, 0xAA, sizeof(boundE) ); 2108 2109 /* If we've returned to a level at or above any previously noted 2110 parent, un-note it, so we don't believe we're still collecting 2111 its children. */ 2112 typestack_preen( parser, td3, level-1 ); 2113 2114 if (dtag == DW_TAG_compile_unit) { 2115 /* See if we can find DW_AT_language, since it is important for 2116 establishing array bounds (see DW_TAG_subrange_type below in 2117 this fn) */ 2118 while (True) { 2119 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2120 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2121 if (attr == 0 && form == 0) break; 2122 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2123 cc, c_die, False/*td3*/, form ); 2124 if (attr != DW_AT_language) 2125 continue; 2126 if (ctsSzB == 0) 2127 goto bad_DIE; 2128 switch (cts) { 2129 case DW_LANG_C89: case DW_LANG_C: 2130 case DW_LANG_C_plus_plus: case DW_LANG_ObjC: 2131 case DW_LANG_ObjC_plus_plus: case DW_LANG_UPC: 2132 case DW_LANG_Upc: case DW_LANG_C99: 2133 parser->language = 'C'; break; 2134 case DW_LANG_Fortran77: case DW_LANG_Fortran90: 2135 case DW_LANG_Fortran95: 2136 parser->language = 'F'; break; 2137 case DW_LANG_Ada83: case DW_LANG_Ada95: 2138 parser->language = 'A'; break; 2139 case DW_LANG_Cobol74: 2140 case DW_LANG_Cobol85: case DW_LANG_Pascal83: 2141 case DW_LANG_Modula2: case DW_LANG_Java: 2142 case DW_LANG_PLI: 2143 case DW_LANG_D: case DW_LANG_Python: 2144 case DW_LANG_Mips_Assembler: 2145 parser->language = '?'; break; 2146 default: 2147 goto bad_DIE; 2148 } 2149 } 2150 } 2151 2152 if (dtag == DW_TAG_base_type) { 2153 /* We can pick up a new base type any time. */ 2154 VG_(memset)(&typeE, 0, sizeof(typeE)); 2155 typeE.cuOff = D3_INVALID_CUOFF; 2156 typeE.tag = Te_TyBase; 2157 while (True) { 2158 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2159 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2160 if (attr == 0 && form == 0) break; 2161 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2162 cc, c_die, False/*td3*/, form ); 2163 if (attr == DW_AT_name && ctsMemSzB > 0) { 2164 typeE.Te.TyBase.name 2165 = ML_(dinfo_strdup)( "di.readdwarf3.ptD.base_type.1", 2166 (UChar*)(UWord)cts ); 2167 } 2168 if (attr == DW_AT_byte_size && ctsSzB > 0) { 2169 typeE.Te.TyBase.szB = cts; 2170 } 2171 if (attr == DW_AT_encoding && ctsSzB > 0) { 2172 switch (cts) { 2173 case DW_ATE_unsigned: case DW_ATE_unsigned_char: 2174 case DW_ATE_UTF: /* since DWARF4, e.g. char16_t from C++ */ 2175 case DW_ATE_boolean:/* FIXME - is this correct? */ 2176 typeE.Te.TyBase.enc = 'U'; break; 2177 case DW_ATE_signed: case DW_ATE_signed_char: 2178 typeE.Te.TyBase.enc = 'S'; break; 2179 case DW_ATE_float: 2180 typeE.Te.TyBase.enc = 'F'; break; 2181 case DW_ATE_complex_float: 2182 typeE.Te.TyBase.enc = 'C'; break; 2183 default: 2184 goto bad_DIE; 2185 } 2186 } 2187 } 2188 2189 /* Invent a name if it doesn't have one. gcc-4.3 2190 -ftree-vectorize is observed to emit nameless base types. */ 2191 if (!typeE.Te.TyBase.name) 2192 typeE.Te.TyBase.name 2193 = ML_(dinfo_strdup)( "di.readdwarf3.ptD.base_type.2", 2194 "<anon_base_type>" ); 2195 2196 /* Do we have something that looks sane? */ 2197 if (/* must have a name */ 2198 typeE.Te.TyBase.name == NULL 2199 /* and a plausible size. Yes, really 32: "complex long 2200 double" apparently has size=32 */ 2201 || typeE.Te.TyBase.szB < 0 || typeE.Te.TyBase.szB > 32 2202 /* and a plausible encoding */ 2203 || (typeE.Te.TyBase.enc != 'U' 2204 && typeE.Te.TyBase.enc != 'S' 2205 && typeE.Te.TyBase.enc != 'F' 2206 && typeE.Te.TyBase.enc != 'C')) 2207 goto bad_DIE; 2208 /* Last minute hack: if we see this 2209 <1><515>: DW_TAG_base_type 2210 DW_AT_byte_size : 0 2211 DW_AT_encoding : 5 2212 DW_AT_name : void 2213 convert it into a real Void type. */ 2214 if (typeE.Te.TyBase.szB == 0 2215 && 0 == VG_(strcmp)("void", typeE.Te.TyBase.name)) { 2216 ML_(TyEnt__make_EMPTY)(&typeE); 2217 typeE.tag = Te_TyVoid; 2218 typeE.Te.TyVoid.isFake = False; /* it's a real one! */ 2219 } 2220 2221 goto acquire_Type; 2222 } 2223 2224 if (dtag == DW_TAG_pointer_type || dtag == DW_TAG_reference_type 2225 || dtag == DW_TAG_ptr_to_member_type) { 2226 /* This seems legit for _pointer_type and _reference_type. I 2227 don't know if rolling _ptr_to_member_type in here really is 2228 legit, but it's better than not handling it at all. */ 2229 VG_(memset)(&typeE, 0, sizeof(typeE)); 2230 typeE.cuOff = D3_INVALID_CUOFF; 2231 typeE.tag = Te_TyPorR; 2232 /* target type defaults to void */ 2233 typeE.Te.TyPorR.typeR = D3_FAKEVOID_CUOFF; 2234 typeE.Te.TyPorR.isPtr = dtag == DW_TAG_pointer_type 2235 || dtag == DW_TAG_ptr_to_member_type; 2236 /* These three type kinds don't *have* to specify their size, in 2237 which case we assume it's a machine word. But if they do 2238 specify it, it must be a machine word :-) This probably 2239 assumes that the word size of the Dwarf3 we're reading is the 2240 same size as that on the machine. gcc appears to give a size 2241 whereas icc9 doesn't. */ 2242 typeE.Te.TyPorR.szB = sizeof(UWord); 2243 while (True) { 2244 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2245 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2246 if (attr == 0 && form == 0) break; 2247 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2248 cc, c_die, False/*td3*/, form ); 2249 if (attr == DW_AT_byte_size && ctsSzB > 0) { 2250 typeE.Te.TyPorR.szB = cts; 2251 } 2252 if (attr == DW_AT_type && ctsSzB > 0) { 2253 typeE.Te.TyPorR.typeR = (UWord)cts; 2254 } 2255 } 2256 /* Do we have something that looks sane? */ 2257 if (typeE.Te.TyPorR.szB != sizeof(UWord)) 2258 goto bad_DIE; 2259 else 2260 goto acquire_Type; 2261 } 2262 2263 if (dtag == DW_TAG_enumeration_type) { 2264 /* Create a new Type to hold the results. */ 2265 VG_(memset)(&typeE, 0, sizeof(typeE)); 2266 typeE.cuOff = posn; 2267 typeE.tag = Te_TyEnum; 2268 typeE.Te.TyEnum.atomRs 2269 = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.enum_type.1", 2270 ML_(dinfo_free), 2271 sizeof(UWord) ); 2272 while (True) { 2273 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2274 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2275 if (attr == 0 && form == 0) break; 2276 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2277 cc, c_die, False/*td3*/, form ); 2278 if (attr == DW_AT_name && ctsMemSzB > 0) { 2279 typeE.Te.TyEnum.name 2280 = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enum_type.2", 2281 (UChar*)(UWord)cts ); 2282 } 2283 if (attr == DW_AT_byte_size && ctsSzB > 0) { 2284 typeE.Te.TyEnum.szB = cts; 2285 } 2286 } 2287 2288 if (!typeE.Te.TyEnum.name) 2289 typeE.Te.TyEnum.name 2290 = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enum_type.3", 2291 "<anon_enum_type>" ); 2292 2293 /* Do we have something that looks sane? */ 2294 if (typeE.Te.TyEnum.szB == 0 2295 /* we must know the size */ 2296 /* but not for Ada, which uses such dummy 2297 enumerations as helper for gdb ada mode. */ 2298 && parser->language != 'A') 2299 goto bad_DIE; 2300 /* On't stack! */ 2301 typestack_push( cc, parser, td3, &typeE, level ); 2302 goto acquire_Type; 2303 } 2304 2305 /* gcc (GCC) 4.4.0 20081017 (experimental) occasionally produces 2306 DW_TAG_enumerator with only a DW_AT_name but no 2307 DW_AT_const_value. This is in violation of the Dwarf3 standard, 2308 and appears to be a new "feature" of gcc - versions 4.3.x and 2309 earlier do not appear to do this. So accept DW_TAG_enumerator 2310 which only have a name but no value. An example: 2311 2312 <1><180>: Abbrev Number: 6 (DW_TAG_enumeration_type) 2313 <181> DW_AT_name : (indirect string, offset: 0xda70): 2314 QtMsgType 2315 <185> DW_AT_byte_size : 4 2316 <186> DW_AT_decl_file : 14 2317 <187> DW_AT_decl_line : 1480 2318 <189> DW_AT_sibling : <0x1a7> 2319 <2><18d>: Abbrev Number: 7 (DW_TAG_enumerator) 2320 <18e> DW_AT_name : (indirect string, offset: 0x9e18): 2321 QtDebugMsg 2322 <2><192>: Abbrev Number: 7 (DW_TAG_enumerator) 2323 <193> DW_AT_name : (indirect string, offset: 0x1505f): 2324 QtWarningMsg 2325 <2><197>: Abbrev Number: 7 (DW_TAG_enumerator) 2326 <198> DW_AT_name : (indirect string, offset: 0x16f4a): 2327 QtCriticalMsg 2328 <2><19c>: Abbrev Number: 7 (DW_TAG_enumerator) 2329 <19d> DW_AT_name : (indirect string, offset: 0x156dd): 2330 QtFatalMsg 2331 <2><1a1>: Abbrev Number: 7 (DW_TAG_enumerator) 2332 <1a2> DW_AT_name : (indirect string, offset: 0x13660): 2333 QtSystemMsg 2334 */ 2335 if (dtag == DW_TAG_enumerator) { 2336 VG_(memset)( &atomE, 0, sizeof(atomE) ); 2337 atomE.cuOff = posn; 2338 atomE.tag = Te_Atom; 2339 while (True) { 2340 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2341 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2342 if (attr == 0 && form == 0) break; 2343 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2344 cc, c_die, False/*td3*/, form ); 2345 if (attr == DW_AT_name && ctsMemSzB > 0) { 2346 atomE.Te.Atom.name 2347 = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enumerator.1", 2348 (UChar*)(UWord)cts ); 2349 } 2350 if (attr == DW_AT_const_value && ctsSzB > 0) { 2351 atomE.Te.Atom.value = cts; 2352 atomE.Te.Atom.valueKnown = True; 2353 } 2354 } 2355 /* Do we have something that looks sane? */ 2356 if (atomE.Te.Atom.name == NULL) 2357 goto bad_DIE; 2358 /* Do we have a plausible parent? */ 2359 if (typestack_is_empty(parser)) goto bad_DIE; 2360 vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp])); 2361 vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF); 2362 if (level != parser->qlevel[parser->sp]+1) goto bad_DIE; 2363 if (parser->qparentE[parser->sp].tag != Te_TyEnum) goto bad_DIE; 2364 /* Record this child in the parent */ 2365 vg_assert(parser->qparentE[parser->sp].Te.TyEnum.atomRs); 2366 VG_(addToXA)( parser->qparentE[parser->sp].Te.TyEnum.atomRs, 2367 &atomE ); 2368 /* And record the child itself */ 2369 goto acquire_Atom; 2370 } 2371 2372 /* Treat DW_TAG_class_type as if it was a DW_TAG_structure_type. I 2373 don't know if this is correct, but it at least makes this reader 2374 usable for gcc-4.3 produced Dwarf3. */ 2375 if (dtag == DW_TAG_structure_type || dtag == DW_TAG_class_type 2376 || dtag == DW_TAG_union_type) { 2377 Bool have_szB = False; 2378 Bool is_decl = False; 2379 Bool is_spec = False; 2380 /* Create a new Type to hold the results. */ 2381 VG_(memset)(&typeE, 0, sizeof(typeE)); 2382 typeE.cuOff = posn; 2383 typeE.tag = Te_TyStOrUn; 2384 typeE.Te.TyStOrUn.name = NULL; 2385 typeE.Te.TyStOrUn.fieldRs 2386 = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.pTD.struct_type.1", 2387 ML_(dinfo_free), 2388 sizeof(UWord) ); 2389 typeE.Te.TyStOrUn.complete = True; 2390 typeE.Te.TyStOrUn.isStruct = dtag == DW_TAG_structure_type 2391 || dtag == DW_TAG_class_type; 2392 while (True) { 2393 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2394 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2395 if (attr == 0 && form == 0) break; 2396 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2397 cc, c_die, False/*td3*/, form ); 2398 if (attr == DW_AT_name && ctsMemSzB > 0) { 2399 typeE.Te.TyStOrUn.name 2400 = ML_(dinfo_strdup)( "di.readdwarf3.ptD.struct_type.2", 2401 (UChar*)(UWord)cts ); 2402 } 2403 if (attr == DW_AT_byte_size && ctsSzB >= 0) { 2404 typeE.Te.TyStOrUn.szB = cts; 2405 have_szB = True; 2406 } 2407 if (attr == DW_AT_declaration && ctsSzB > 0 && cts > 0) { 2408 is_decl = True; 2409 } 2410 if (attr == DW_AT_specification && ctsSzB > 0 && cts > 0) { 2411 is_spec = True; 2412 } 2413 } 2414 /* Do we have something that looks sane? */ 2415 if (is_decl && (!is_spec)) { 2416 /* It's a DW_AT_declaration. We require the name but 2417 nothing else. */ 2418 if (typeE.Te.TyStOrUn.name == NULL) 2419 goto bad_DIE; 2420 typeE.Te.TyStOrUn.complete = False; 2421 /* JRS 2009 Aug 10: <possible kludge>? */ 2422 /* Push this tyent on the stack, even though it's incomplete. 2423 It appears that gcc-4.4 on Fedora 11 will sometimes create 2424 DW_TAG_member entries for it, and so we need to have a 2425 plausible parent present in order for that to work. See 2426 #200029 comments 8 and 9. */ 2427 typestack_push( cc, parser, td3, &typeE, level ); 2428 /* </possible kludge> */ 2429 goto acquire_Type; 2430 } 2431 if ((!is_decl) /* && (!is_spec) */) { 2432 /* this is the common, ordinary case */ 2433 if ((!have_szB) /* we must know the size */ 2434 /* But the name can be present, or not */) 2435 goto bad_DIE; 2436 /* On't stack! */ 2437 typestack_push( cc, parser, td3, &typeE, level ); 2438 goto acquire_Type; 2439 } 2440 else { 2441 /* don't know how to handle any other variants just now */ 2442 goto bad_DIE; 2443 } 2444 } 2445 2446 if (dtag == DW_TAG_member) { 2447 /* Acquire member entries for both DW_TAG_structure_type and 2448 DW_TAG_union_type. They differ minorly, in that struct 2449 members must have a DW_AT_data_member_location expression 2450 whereas union members must not. */ 2451 Bool parent_is_struct; 2452 VG_(memset)( &fieldE, 0, sizeof(fieldE) ); 2453 fieldE.cuOff = posn; 2454 fieldE.tag = Te_Field; 2455 fieldE.Te.Field.typeR = D3_INVALID_CUOFF; 2456 while (True) { 2457 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2458 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2459 if (attr == 0 && form == 0) break; 2460 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2461 cc, c_die, False/*td3*/, form ); 2462 if (attr == DW_AT_name && ctsMemSzB > 0) { 2463 fieldE.Te.Field.name 2464 = ML_(dinfo_strdup)( "di.readdwarf3.ptD.member.1", 2465 (UChar*)(UWord)cts ); 2466 } 2467 if (attr == DW_AT_type && ctsSzB > 0) { 2468 fieldE.Te.Field.typeR = (UWord)cts; 2469 } 2470 /* There are 2 different cases for DW_AT_data_member_location. 2471 If it is a constant class attribute, it contains byte offset 2472 from the beginning of the containing entity. 2473 Otherwise it is a location expression. */ 2474 if (attr == DW_AT_data_member_location && ctsSzB > 0) { 2475 fieldE.Te.Field.nLoc = -1; 2476 fieldE.Te.Field.pos.offset = cts; 2477 } else if (attr == DW_AT_data_member_location && ctsMemSzB > 0) { 2478 fieldE.Te.Field.nLoc = (UWord)ctsMemSzB; 2479 fieldE.Te.Field.pos.loc 2480 = ML_(dinfo_memdup)( "di.readdwarf3.ptD.member.2", 2481 (UChar*)(UWord)cts, 2482 (SizeT)fieldE.Te.Field.nLoc ); 2483 } 2484 } 2485 /* Do we have a plausible parent? */ 2486 if (typestack_is_empty(parser)) goto bad_DIE; 2487 vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp])); 2488 vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF); 2489 if (level != parser->qlevel[parser->sp]+1) goto bad_DIE; 2490 if (parser->qparentE[parser->sp].tag != Te_TyStOrUn) goto bad_DIE; 2491 /* Do we have something that looks sane? If this a member of a 2492 struct, we must have a location expression; but if a member 2493 of a union that is irrelevant (D3 spec sec 5.6.6). We ought 2494 to reject in the latter case, but some compilers have been 2495 observed to emit constant-zero expressions. So just ignore 2496 them. */ 2497 parent_is_struct 2498 = parser->qparentE[parser->sp].Te.TyStOrUn.isStruct; 2499 if (!fieldE.Te.Field.name) 2500 fieldE.Te.Field.name 2501 = ML_(dinfo_strdup)( "di.readdwarf3.ptD.member.3", 2502 "<anon_field>" ); 2503 vg_assert(fieldE.Te.Field.name); 2504 if (fieldE.Te.Field.typeR == D3_INVALID_CUOFF) 2505 goto bad_DIE; 2506 if (fieldE.Te.Field.nLoc) { 2507 if (!parent_is_struct) { 2508 /* If this is a union type, pretend we haven't seen the data 2509 member location expression, as it is by definition 2510 redundant (it must be zero). */ 2511 if (fieldE.Te.Field.nLoc > 0) 2512 ML_(dinfo_free)(fieldE.Te.Field.pos.loc); 2513 fieldE.Te.Field.pos.loc = NULL; 2514 fieldE.Te.Field.nLoc = 0; 2515 } 2516 /* Record this child in the parent */ 2517 fieldE.Te.Field.isStruct = parent_is_struct; 2518 vg_assert(parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs); 2519 VG_(addToXA)( parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs, 2520 &posn ); 2521 /* And record the child itself */ 2522 goto acquire_Field; 2523 } else { 2524 /* Member with no location - this can happen with static 2525 const members in C++ code which are compile time constants 2526 that do no exist in the class. They're not of any interest 2527 to us so we ignore them. */ 2528 } 2529 } 2530 2531 if (dtag == DW_TAG_array_type) { 2532 VG_(memset)(&typeE, 0, sizeof(typeE)); 2533 typeE.cuOff = posn; 2534 typeE.tag = Te_TyArray; 2535 typeE.Te.TyArray.typeR = D3_INVALID_CUOFF; 2536 typeE.Te.TyArray.boundRs 2537 = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.array_type.1", 2538 ML_(dinfo_free), 2539 sizeof(UWord) ); 2540 while (True) { 2541 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2542 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2543 if (attr == 0 && form == 0) break; 2544 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2545 cc, c_die, False/*td3*/, form ); 2546 if (attr == DW_AT_type && ctsSzB > 0) { 2547 typeE.Te.TyArray.typeR = (UWord)cts; 2548 } 2549 } 2550 if (typeE.Te.TyArray.typeR == D3_INVALID_CUOFF) 2551 goto bad_DIE; 2552 /* On't stack! */ 2553 typestack_push( cc, parser, td3, &typeE, level ); 2554 goto acquire_Type; 2555 } 2556 2557 /* this is a subrange type defining the bounds of an array. */ 2558 if (dtag == DW_TAG_subrange_type 2559 && subrange_type_denotes_array_bounds(parser, dtag)) { 2560 Bool have_lower = False; 2561 Bool have_upper = False; 2562 Bool have_count = False; 2563 Long lower = 0; 2564 Long upper = 0; 2565 2566 switch (parser->language) { 2567 case 'C': have_lower = True; lower = 0; break; 2568 case 'F': have_lower = True; lower = 1; break; 2569 case '?': have_lower = False; break; 2570 case 'A': have_lower = False; break; 2571 default: vg_assert(0); /* assured us by handling of 2572 DW_TAG_compile_unit in this fn */ 2573 } 2574 2575 VG_(memset)( &boundE, 0, sizeof(boundE) ); 2576 boundE.cuOff = D3_INVALID_CUOFF; 2577 boundE.tag = Te_Bound; 2578 while (True) { 2579 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2580 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2581 if (attr == 0 && form == 0) break; 2582 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2583 cc, c_die, False/*td3*/, form ); 2584 if (attr == DW_AT_lower_bound && ctsSzB > 0) { 2585 lower = (Long)cts; 2586 have_lower = True; 2587 } 2588 if (attr == DW_AT_upper_bound && ctsSzB > 0) { 2589 upper = (Long)cts; 2590 have_upper = True; 2591 } 2592 if (attr == DW_AT_count && ctsSzB > 0) { 2593 /*count = (Long)cts;*/ 2594 have_count = True; 2595 } 2596 } 2597 /* FIXME: potentially skip the rest if no parent present, since 2598 it could be the case that this subrange type is free-standing 2599 (not being used to describe the bounds of a containing array 2600 type) */ 2601 /* Do we have a plausible parent? */ 2602 if (typestack_is_empty(parser)) goto bad_DIE; 2603 vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp])); 2604 vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF); 2605 if (level != parser->qlevel[parser->sp]+1) goto bad_DIE; 2606 if (parser->qparentE[parser->sp].tag != Te_TyArray) goto bad_DIE; 2607 2608 /* Figure out if we have a definite range or not */ 2609 if (have_lower && have_upper && (!have_count)) { 2610 boundE.Te.Bound.knownL = True; 2611 boundE.Te.Bound.knownU = True; 2612 boundE.Te.Bound.boundL = lower; 2613 boundE.Te.Bound.boundU = upper; 2614 } 2615 else if (have_lower && (!have_upper) && (!have_count)) { 2616 boundE.Te.Bound.knownL = True; 2617 boundE.Te.Bound.knownU = False; 2618 boundE.Te.Bound.boundL = lower; 2619 boundE.Te.Bound.boundU = 0; 2620 } 2621 else if ((!have_lower) && have_upper && (!have_count)) { 2622 boundE.Te.Bound.knownL = False; 2623 boundE.Te.Bound.knownU = True; 2624 boundE.Te.Bound.boundL = 0; 2625 boundE.Te.Bound.boundU = upper; 2626 } 2627 else if ((!have_lower) && (!have_upper) && (!have_count)) { 2628 boundE.Te.Bound.knownL = False; 2629 boundE.Te.Bound.knownU = False; 2630 boundE.Te.Bound.boundL = 0; 2631 boundE.Te.Bound.boundU = 0; 2632 } else { 2633 /* FIXME: handle more cases */ 2634 goto bad_DIE; 2635 } 2636 2637 /* Record this bound in the parent */ 2638 boundE.cuOff = posn; 2639 vg_assert(parser->qparentE[parser->sp].Te.TyArray.boundRs); 2640 VG_(addToXA)( parser->qparentE[parser->sp].Te.TyArray.boundRs, 2641 &boundE ); 2642 /* And record the child itself */ 2643 goto acquire_Bound; 2644 } 2645 2646 /* typedef or subrange_type other than array bounds. */ 2647 if (dtag == DW_TAG_typedef 2648 || (dtag == DW_TAG_subrange_type 2649 && !subrange_type_denotes_array_bounds(parser, dtag))) { 2650 /* subrange_type other than array bound is only for Ada. */ 2651 vg_assert (dtag == DW_TAG_typedef || parser->language == 'A'); 2652 /* We can pick up a new typedef/subrange_type any time. */ 2653 VG_(memset)(&typeE, 0, sizeof(typeE)); 2654 typeE.cuOff = D3_INVALID_CUOFF; 2655 typeE.tag = Te_TyTyDef; 2656 typeE.Te.TyTyDef.name = NULL; 2657 typeE.Te.TyTyDef.typeR = D3_INVALID_CUOFF; 2658 while (True) { 2659 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2660 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2661 if (attr == 0 && form == 0) break; 2662 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2663 cc, c_die, False/*td3*/, form ); 2664 if (attr == DW_AT_name && ctsMemSzB > 0) { 2665 typeE.Te.TyTyDef.name 2666 = ML_(dinfo_strdup)( "di.readdwarf3.ptD.typedef.1", 2667 (UChar*)(UWord)cts ); 2668 } 2669 if (attr == DW_AT_type && ctsSzB > 0) { 2670 typeE.Te.TyTyDef.typeR = (UWord)cts; 2671 } 2672 } 2673 /* Do we have something that looks sane? */ 2674 if (/* must have a name */ 2675 typeE.Te.TyTyDef.name == NULL 2676 /* However gcc gnat Ada generates minimal typedef 2677 such as the below => accept no name for Ada. 2678 <6><91cc>: DW_TAG_typedef 2679 DW_AT_abstract_ori: <9066> 2680 */ 2681 && parser->language != 'A' 2682 /* but the referred-to type can be absent */) 2683 goto bad_DIE; 2684 else 2685 goto acquire_Type; 2686 } 2687 2688 if (dtag == DW_TAG_subroutine_type) { 2689 /* function type? just record that one fact and ask no 2690 further questions. */ 2691 VG_(memset)(&typeE, 0, sizeof(typeE)); 2692 typeE.cuOff = D3_INVALID_CUOFF; 2693 typeE.tag = Te_TyFn; 2694 goto acquire_Type; 2695 } 2696 2697 if (dtag == DW_TAG_volatile_type || dtag == DW_TAG_const_type) { 2698 Int have_ty = 0; 2699 VG_(memset)(&typeE, 0, sizeof(typeE)); 2700 typeE.cuOff = D3_INVALID_CUOFF; 2701 typeE.tag = Te_TyQual; 2702 typeE.Te.TyQual.qual 2703 = dtag == DW_TAG_volatile_type ? 'V' : 'C'; 2704 /* target type defaults to 'void' */ 2705 typeE.Te.TyQual.typeR = D3_FAKEVOID_CUOFF; 2706 while (True) { 2707 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2708 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2709 if (attr == 0 && form == 0) break; 2710 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2711 cc, c_die, False/*td3*/, form ); 2712 if (attr == DW_AT_type && ctsSzB > 0) { 2713 typeE.Te.TyQual.typeR = (UWord)cts; 2714 have_ty++; 2715 } 2716 } 2717 /* gcc sometimes generates DW_TAG_const/volatile_type without 2718 DW_AT_type and GDB appears to interpret the type as 'const 2719 void' (resp. 'volatile void'). So just allow it .. */ 2720 if (have_ty == 1 || have_ty == 0) 2721 goto acquire_Type; 2722 else 2723 goto bad_DIE; 2724 } 2725 2726 /* else ignore this DIE */ 2727 return; 2728 /*NOTREACHED*/ 2729 2730 acquire_Type: 2731 if (0) VG_(printf)("YYYY Acquire Type\n"); 2732 vg_assert(ML_(TyEnt__is_type)( &typeE )); 2733 vg_assert(typeE.cuOff == D3_INVALID_CUOFF || typeE.cuOff == posn); 2734 typeE.cuOff = posn; 2735 VG_(addToXA)( tyents, &typeE ); 2736 return; 2737 /*NOTREACHED*/ 2738 2739 acquire_Atom: 2740 if (0) VG_(printf)("YYYY Acquire Atom\n"); 2741 vg_assert(atomE.tag == Te_Atom); 2742 vg_assert(atomE.cuOff == D3_INVALID_CUOFF || atomE.cuOff == posn); 2743 atomE.cuOff = posn; 2744 VG_(addToXA)( tyents, &atomE ); 2745 return; 2746 /*NOTREACHED*/ 2747 2748 acquire_Field: 2749 /* For union members, Expr should be absent */ 2750 if (0) VG_(printf)("YYYY Acquire Field\n"); 2751 vg_assert(fieldE.tag == Te_Field); 2752 vg_assert(fieldE.Te.Field.nLoc <= 0 || fieldE.Te.Field.pos.loc != NULL); 2753 vg_assert(fieldE.Te.Field.nLoc != 0 || fieldE.Te.Field.pos.loc == NULL); 2754 if (fieldE.Te.Field.isStruct) { 2755 vg_assert(fieldE.Te.Field.nLoc != 0); 2756 } else { 2757 vg_assert(fieldE.Te.Field.nLoc == 0); 2758 } 2759 vg_assert(fieldE.cuOff == D3_INVALID_CUOFF || fieldE.cuOff == posn); 2760 fieldE.cuOff = posn; 2761 VG_(addToXA)( tyents, &fieldE ); 2762 return; 2763 /*NOTREACHED*/ 2764 2765 acquire_Bound: 2766 if (0) VG_(printf)("YYYY Acquire Bound\n"); 2767 vg_assert(boundE.tag == Te_Bound); 2768 vg_assert(boundE.cuOff == D3_INVALID_CUOFF || boundE.cuOff == posn); 2769 boundE.cuOff = posn; 2770 VG_(addToXA)( tyents, &boundE ); 2771 return; 2772 /*NOTREACHED*/ 2773 2774 bad_DIE: 2775 set_position_of_Cursor( c_die, saved_die_c_offset ); 2776 set_position_of_Cursor( c_abbv, saved_abbv_c_offset ); 2777 VG_(printf)("\nparse_type_DIE: confused by:\n"); 2778 VG_(printf)(" <%d><%lx>: %s\n", level, posn, ML_(pp_DW_TAG)( dtag ) ); 2779 while (True) { 2780 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2781 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2782 if (attr == 0 && form == 0) break; 2783 VG_(printf)(" %18s: ", ML_(pp_DW_AT)(attr)); 2784 /* Get the form contents, so as to print them */ 2785 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2786 cc, c_die, True, form ); 2787 VG_(printf)("\t\n"); 2788 } 2789 VG_(printf)("\n"); 2790 cc->barf("parse_type_DIE: confused by the above DIE"); 2791 /*NOTREACHED*/ 2792 } 2793 2794 2795 /*------------------------------------------------------------*/ 2796 /*--- ---*/ 2797 /*--- Compression of type DIE information ---*/ 2798 /*--- ---*/ 2799 /*------------------------------------------------------------*/ 2800 2801 static UWord chase_cuOff ( Bool* changed, 2802 XArray* /* of TyEnt */ ents, 2803 TyEntIndexCache* ents_cache, 2804 UWord cuOff ) 2805 { 2806 TyEnt* ent; 2807 ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache, cuOff ); 2808 2809 if (!ent) { 2810 VG_(printf)("chase_cuOff: no entry for 0x%05lx\n", cuOff); 2811 *changed = False; 2812 return cuOff; 2813 } 2814 2815 vg_assert(ent->tag != Te_EMPTY); 2816 if (ent->tag != Te_INDIR) { 2817 *changed = False; 2818 return cuOff; 2819 } else { 2820 vg_assert(ent->Te.INDIR.indR < cuOff); 2821 *changed = True; 2822 return ent->Te.INDIR.indR; 2823 } 2824 } 2825 2826 static 2827 void chase_cuOffs_in_XArray ( Bool* changed, 2828 XArray* /* of TyEnt */ ents, 2829 TyEntIndexCache* ents_cache, 2830 /*MOD*/XArray* /* of UWord */ cuOffs ) 2831 { 2832 Bool b2 = False; 2833 Word i, n = VG_(sizeXA)( cuOffs ); 2834 for (i = 0; i < n; i++) { 2835 Bool b = False; 2836 UWord* p = VG_(indexXA)( cuOffs, i ); 2837 *p = chase_cuOff( &b, ents, ents_cache, *p ); 2838 if (b) 2839 b2 = True; 2840 } 2841 *changed = b2; 2842 } 2843 2844 static Bool TyEnt__subst_R_fields ( XArray* /* of TyEnt */ ents, 2845 TyEntIndexCache* ents_cache, 2846 /*MOD*/TyEnt* te ) 2847 { 2848 Bool b, changed = False; 2849 switch (te->tag) { 2850 case Te_EMPTY: 2851 break; 2852 case Te_INDIR: 2853 te->Te.INDIR.indR 2854 = chase_cuOff( &b, ents, ents_cache, te->Te.INDIR.indR ); 2855 if (b) changed = True; 2856 break; 2857 case Te_UNKNOWN: 2858 break; 2859 case Te_Atom: 2860 break; 2861 case Te_Field: 2862 te->Te.Field.typeR 2863 = chase_cuOff( &b, ents, ents_cache, te->Te.Field.typeR ); 2864 if (b) changed = True; 2865 break; 2866 case Te_Bound: 2867 break; 2868 case Te_TyBase: 2869 break; 2870 case Te_TyPorR: 2871 te->Te.TyPorR.typeR 2872 = chase_cuOff( &b, ents, ents_cache, te->Te.TyPorR.typeR ); 2873 if (b) changed = True; 2874 break; 2875 case Te_TyTyDef: 2876 te->Te.TyTyDef.typeR 2877 = chase_cuOff( &b, ents, ents_cache, te->Te.TyTyDef.typeR ); 2878 if (b) changed = True; 2879 break; 2880 case Te_TyStOrUn: 2881 chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyStOrUn.fieldRs ); 2882 if (b) changed = True; 2883 break; 2884 case Te_TyEnum: 2885 chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyEnum.atomRs ); 2886 if (b) changed = True; 2887 break; 2888 case Te_TyArray: 2889 te->Te.TyArray.typeR 2890 = chase_cuOff( &b, ents, ents_cache, te->Te.TyArray.typeR ); 2891 if (b) changed = True; 2892 chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyArray.boundRs ); 2893 if (b) changed = True; 2894 break; 2895 case Te_TyFn: 2896 break; 2897 case Te_TyQual: 2898 te->Te.TyQual.typeR 2899 = chase_cuOff( &b, ents, ents_cache, te->Te.TyQual.typeR ); 2900 if (b) changed = True; 2901 break; 2902 case Te_TyVoid: 2903 break; 2904 default: 2905 ML_(pp_TyEnt)(te); 2906 vg_assert(0); 2907 } 2908 return changed; 2909 } 2910 2911 /* Make a pass over 'ents'. For each tyent, inspect the target of any 2912 'R' or 'Rs' fields (those which refer to other tyents), and replace 2913 any which point to INDIR nodes with the target of the indirection 2914 (which should not itself be an indirection). In summary, this 2915 routine shorts out all references to indirection nodes. */ 2916 static 2917 Word dedup_types_substitution_pass ( /*MOD*/XArray* /* of TyEnt */ ents, 2918 TyEntIndexCache* ents_cache ) 2919 { 2920 Word i, n, nChanged = 0; 2921 Bool b; 2922 n = VG_(sizeXA)( ents ); 2923 for (i = 0; i < n; i++) { 2924 TyEnt* ent = VG_(indexXA)( ents, i ); 2925 vg_assert(ent->tag != Te_EMPTY); 2926 /* We have to substitute everything, even indirections, so as to 2927 ensure that chains of indirections don't build up. */ 2928 b = TyEnt__subst_R_fields( ents, ents_cache, ent ); 2929 if (b) 2930 nChanged++; 2931 } 2932 2933 return nChanged; 2934 } 2935 2936 2937 /* Make a pass over 'ents', building a dictionary of TyEnts as we go. 2938 Look up each new tyent in the dictionary in turn. If it is already 2939 in the dictionary, replace this tyent with an indirection to the 2940 existing one, and delete any malloc'd stuff hanging off this one. 2941 In summary, this routine commons up all tyents that are identical 2942 as defined by TyEnt__cmp_by_all_except_cuOff. */ 2943 static 2944 Word dedup_types_commoning_pass ( /*MOD*/XArray* /* of TyEnt */ ents ) 2945 { 2946 Word n, i, nDeleted; 2947 WordFM* dict; /* TyEnt* -> void */ 2948 TyEnt* ent; 2949 UWord keyW, valW; 2950 2951 dict = VG_(newFM)( 2952 ML_(dinfo_zalloc), "di.readdwarf3.dtcp.1", 2953 ML_(dinfo_free), 2954 (Word(*)(UWord,UWord)) ML_(TyEnt__cmp_by_all_except_cuOff) 2955 ); 2956 2957 nDeleted = 0; 2958 n = VG_(sizeXA)( ents ); 2959 for (i = 0; i < n; i++) { 2960 ent = VG_(indexXA)( ents, i ); 2961 vg_assert(ent->tag != Te_EMPTY); 2962 2963 /* Ignore indirections, although check that they are 2964 not forming a cycle. */ 2965 if (ent->tag == Te_INDIR) { 2966 vg_assert(ent->Te.INDIR.indR < ent->cuOff); 2967 continue; 2968 } 2969 2970 keyW = valW = 0; 2971 if (VG_(lookupFM)( dict, &keyW, &valW, (UWord)ent )) { 2972 /* it's already in the dictionary. */ 2973 TyEnt* old = (TyEnt*)keyW; 2974 vg_assert(valW == 0); 2975 vg_assert(old != ent); 2976 vg_assert(old->tag != Te_INDIR); 2977 /* since we are traversing the array in increasing order of 2978 cuOff: */ 2979 vg_assert(old->cuOff < ent->cuOff); 2980 /* So anyway, dump this entry and replace it with an 2981 indirection to the one in the dictionary. Note that the 2982 assertion above guarantees that we cannot create cycles of 2983 indirections, since we are always creating an indirection 2984 to a tyent with a cuOff lower than this one. */ 2985 ML_(TyEnt__make_EMPTY)( ent ); 2986 ent->tag = Te_INDIR; 2987 ent->Te.INDIR.indR = old->cuOff; 2988 nDeleted++; 2989 } else { 2990 /* not in dictionary; add it and keep going. */ 2991 VG_(addToFM)( dict, (UWord)ent, 0 ); 2992 } 2993 } 2994 2995 VG_(deleteFM)( dict, NULL, NULL ); 2996 2997 return nDeleted; 2998 } 2999 3000 3001 static 3002 void dedup_types ( Bool td3, 3003 /*MOD*/XArray* /* of TyEnt */ ents, 3004 TyEntIndexCache* ents_cache ) 3005 { 3006 Word m, n, i, nDel, nSubst, nThresh; 3007 if (0) td3 = True; 3008 3009 n = VG_(sizeXA)( ents ); 3010 3011 /* If a commoning pass and a substitution pass both make fewer than 3012 this many changes, just stop. It's pointless to burn up CPU 3013 time trying to compress the last 1% or so out of the array. */ 3014 nThresh = n / 200; 3015 3016 /* First we must sort .ents by its .cuOff fields, so we 3017 can index into it. */ 3018 VG_(setCmpFnXA)( 3019 ents, 3020 (Int(*)(void*,void*)) ML_(TyEnt__cmp_by_cuOff_only) 3021 ); 3022 VG_(sortXA)( ents ); 3023 3024 /* Now repeatedly do commoning and substitution passes over 3025 the array, until there are no more changes. */ 3026 do { 3027 nDel = dedup_types_commoning_pass ( ents ); 3028 nSubst = dedup_types_substitution_pass ( ents, ents_cache ); 3029 vg_assert(nDel >= 0 && nSubst >= 0); 3030 TRACE_D3(" %ld deletions, %ld substitutions\n", nDel, nSubst); 3031 } while (nDel > nThresh || nSubst > nThresh); 3032 3033 /* Sanity check: all INDIR nodes should point at a non-INDIR thing. 3034 In fact this should be true at the end of every loop iteration 3035 above (a commoning pass followed by a substitution pass), but 3036 checking it on every iteration is excessively expensive. Note, 3037 this loop also computes 'm' for the stats printing below it. */ 3038 m = 0; 3039 n = VG_(sizeXA)( ents ); 3040 for (i = 0; i < n; i++) { 3041 TyEnt *ent, *ind; 3042 ent = VG_(indexXA)( ents, i ); 3043 if (ent->tag != Te_INDIR) continue; 3044 m++; 3045 ind = ML_(TyEnts__index_by_cuOff)( ents, ents_cache, 3046 ent->Te.INDIR.indR ); 3047 vg_assert(ind); 3048 vg_assert(ind->tag != Te_INDIR); 3049 } 3050 3051 TRACE_D3("Overall: %ld before, %ld after\n", n, n-m); 3052 } 3053 3054 3055 /*------------------------------------------------------------*/ 3056 /*--- ---*/ 3057 /*--- Resolution of references to type DIEs ---*/ 3058 /*--- ---*/ 3059 /*------------------------------------------------------------*/ 3060 3061 /* Make a pass through the (temporary) variables array. Examine the 3062 type of each variable, check is it found, and chase any Te_INDIRs. 3063 Postcondition is: each variable has a typeR field that refers to a 3064 valid type in tyents, or a Te_UNKNOWN, and is certainly guaranteed 3065 not to refer to a Te_INDIR. (This is so that we can throw all the 3066 Te_INDIRs away later). */ 3067 3068 __attribute__((noinline)) 3069 static void resolve_variable_types ( 3070 void (*barf)( HChar* ) __attribute__((noreturn)), 3071 /*R-O*/XArray* /* of TyEnt */ ents, 3072 /*MOD*/TyEntIndexCache* ents_cache, 3073 /*MOD*/XArray* /* of TempVar* */ vars 3074 ) 3075 { 3076 Word i, n; 3077 n = VG_(sizeXA)( vars ); 3078 for (i = 0; i < n; i++) { 3079 TempVar* var = *(TempVar**)VG_(indexXA)( vars, i ); 3080 /* This is the stated type of the variable. But it might be 3081 an indirection, so be careful. */ 3082 TyEnt* ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache, 3083 var->typeR ); 3084 if (ent && ent->tag == Te_INDIR) { 3085 ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache, 3086 ent->Te.INDIR.indR ); 3087 vg_assert(ent); 3088 vg_assert(ent->tag != Te_INDIR); 3089 } 3090 3091 /* Deal first with "normal" cases */ 3092 if (ent && ML_(TyEnt__is_type)(ent)) { 3093 var->typeR = ent->cuOff; 3094 continue; 3095 } 3096 3097 /* If there's no ent, it probably we did not manage to read a 3098 type at the cuOffset which is stated as being this variable's 3099 type. Maybe a deficiency in parse_type_DIE. Complain. */ 3100 if (ent == NULL) { 3101 VG_(printf)("\n: Invalid cuOff = 0x%05lx\n", var->typeR ); 3102 barf("resolve_variable_types: " 3103 "cuOff does not refer to a known type"); 3104 } 3105 vg_assert(ent); 3106 /* If ent has any other tag, something bad happened, along the 3107 lines of var->typeR not referring to a type at all. */ 3108 vg_assert(ent->tag == Te_UNKNOWN); 3109 /* Just accept it; the type will be useless, but at least keep 3110 going. */ 3111 var->typeR = ent->cuOff; 3112 } 3113 } 3114 3115 3116 /*------------------------------------------------------------*/ 3117 /*--- ---*/ 3118 /*--- Parsing of Compilation Units ---*/ 3119 /*--- ---*/ 3120 /*------------------------------------------------------------*/ 3121 3122 static Int cmp_TempVar_by_dioff ( void* v1, void* v2 ) { 3123 TempVar* t1 = *(TempVar**)v1; 3124 TempVar* t2 = *(TempVar**)v2; 3125 if (t1->dioff < t2->dioff) return -1; 3126 if (t1->dioff > t2->dioff) return 1; 3127 return 0; 3128 } 3129 3130 static void read_DIE ( 3131 /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree, 3132 /*MOD*/XArray* /* of TyEnt */ tyents, 3133 /*MOD*/XArray* /* of TempVar* */ tempvars, 3134 /*MOD*/XArray* /* of GExpr* */ gexprs, 3135 /*MOD*/D3TypeParser* typarser, 3136 /*MOD*/D3VarParser* varparser, 3137 Cursor* c, Bool td3, CUConst* cc, Int level 3138 ) 3139 { 3140 Cursor abbv; 3141 ULong atag, abbv_code; 3142 UWord posn; 3143 UInt has_children; 3144 UWord start_die_c_offset, start_abbv_c_offset; 3145 UWord after_die_c_offset, after_abbv_c_offset; 3146 3147 /* --- Deal with this DIE --- */ 3148 posn = get_position_of_Cursor( c ); 3149 abbv_code = get_ULEB128( c ); 3150 set_abbv_Cursor( &abbv, td3, cc, abbv_code ); 3151 atag = get_ULEB128( &abbv ); 3152 TRACE_D3("\n"); 3153 TRACE_D3(" <%d><%lx>: Abbrev Number: %llu (%s)\n", 3154 level, posn, abbv_code, ML_(pp_DW_TAG)( atag ) ); 3155 3156 if (atag == 0) 3157 cc->barf("read_DIE: invalid zero tag on DIE"); 3158 3159 has_children = get_UChar( &abbv ); 3160 if (has_children != DW_children_no && has_children != DW_children_yes) 3161 cc->barf("read_DIE: invalid has_children value"); 3162 3163 /* We're set up to look at the fields of this DIE. Hand it off to 3164 any parser(s) that want to see it. Since they will in general 3165 advance both the DIE and abbrev cursors, remember their current 3166 settings so that we can then back up and do one final pass over 3167 the DIE, to print out its contents. */ 3168 3169 start_die_c_offset = get_position_of_Cursor( c ); 3170 start_abbv_c_offset = get_position_of_Cursor( &abbv ); 3171 3172 while (True) { 3173 ULong cts; 3174 Int ctsSzB; 3175 UWord ctsMemSzB; 3176 ULong at_name = get_ULEB128( &abbv ); 3177 ULong at_form = get_ULEB128( &abbv ); 3178 if (at_name == 0 && at_form == 0) break; 3179 TRACE_D3(" %18s: ", ML_(pp_DW_AT)(at_name)); 3180 /* Get the form contents, but ignore them; the only purpose is 3181 to print them, if td3 is True */ 3182 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 3183 cc, c, td3, (DW_FORM)at_form ); 3184 TRACE_D3("\t"); 3185 TRACE_D3("\n"); 3186 } 3187 3188 after_die_c_offset = get_position_of_Cursor( c ); 3189 after_abbv_c_offset = get_position_of_Cursor( &abbv ); 3190 3191 set_position_of_Cursor( c, start_die_c_offset ); 3192 set_position_of_Cursor( &abbv, start_abbv_c_offset ); 3193 3194 parse_type_DIE( tyents, 3195 typarser, 3196 (DW_TAG)atag, 3197 posn, 3198 level, 3199 c, /* DIE cursor */ 3200 &abbv, /* abbrev cursor */ 3201 cc, 3202 td3 ); 3203 3204 set_position_of_Cursor( c, start_die_c_offset ); 3205 set_position_of_Cursor( &abbv, start_abbv_c_offset ); 3206 3207 parse_var_DIE( rangestree, 3208 tempvars, 3209 gexprs, 3210 varparser, 3211 (DW_TAG)atag, 3212 posn, 3213 level, 3214 c, /* DIE cursor */ 3215 &abbv, /* abbrev cursor */ 3216 cc, 3217 td3 ); 3218 3219 set_position_of_Cursor( c, after_die_c_offset ); 3220 set_position_of_Cursor( &abbv, after_abbv_c_offset ); 3221 3222 /* --- Now recurse into its children, if any --- */ 3223 if (has_children == DW_children_yes) { 3224 if (0) TRACE_D3("BEGIN children of level %d\n", level); 3225 while (True) { 3226 atag = peek_ULEB128( c ); 3227 if (atag == 0) break; 3228 read_DIE( rangestree, tyents, tempvars, gexprs, 3229 typarser, varparser, 3230 c, td3, cc, level+1 ); 3231 } 3232 /* Now we need to eat the terminating zero */ 3233 atag = get_ULEB128( c ); 3234 vg_assert(atag == 0); 3235 if (0) TRACE_D3("END children of level %d\n", level); 3236 } 3237 3238 } 3239 3240 3241 static 3242 void new_dwarf3_reader_wrk ( 3243 struct _DebugInfo* di, 3244 __attribute__((noreturn)) void (*barf)( HChar* ), 3245 UChar* debug_info_img, SizeT debug_info_sz, 3246 UChar* debug_abbv_img, SizeT debug_abbv_sz, 3247 UChar* debug_line_img, SizeT debug_line_sz, 3248 UChar* debug_str_img, SizeT debug_str_sz, 3249 UChar* debug_ranges_img, SizeT debug_ranges_sz, 3250 UChar* debug_loc_img, SizeT debug_loc_sz 3251 ) 3252 { 3253 XArray* /* of TyEnt */ tyents; 3254 XArray* /* of TyEnt */ tyents_to_keep; 3255 XArray* /* of GExpr* */ gexprs; 3256 XArray* /* of TempVar* */ tempvars; 3257 WordFM* /* of (XArray* of AddrRange, void) */ rangestree; 3258 TyEntIndexCache* tyents_cache = NULL; 3259 TyEntIndexCache* tyents_to_keep_cache = NULL; 3260 TempVar *varp, *varp2; 3261 GExpr* gexpr; 3262 Cursor abbv; /* for showing .debug_abbrev */ 3263 Cursor info; /* primary cursor for parsing .debug_info */ 3264 Cursor ranges; /* for showing .debug_ranges */ 3265 D3TypeParser typarser; 3266 D3VarParser varparser; 3267 Addr dr_base; 3268 UWord dr_offset; 3269 Word i, j, n; 3270 Bool td3 = di->trace_symtab; 3271 XArray* /* of TempVar* */ dioff_lookup_tab; 3272 #if 0 3273 /* This doesn't work properly because it assumes all entries are 3274 packed end to end, with no holes. But that doesn't always 3275 appear to be the case, so it loses sync. And the D3 spec 3276 doesn't appear to require a no-hole situation either. */ 3277 /* Display .debug_loc */ 3278 Addr dl_base; 3279 UWord dl_offset; 3280 Cursor loc; /* for showing .debug_loc */ 3281 TRACE_SYMTAB("\n"); 3282 TRACE_SYMTAB("\n------ The contents of .debug_loc ------\n"); 3283 TRACE_SYMTAB(" Offset Begin End Expression\n"); 3284 init_Cursor( &loc, debug_loc_img, 3285 debug_loc_sz, 0, barf, 3286 "Overrun whilst reading .debug_loc section(1)" ); 3287 dl_base = 0; 3288 dl_offset = 0; 3289 while (True) { 3290 UWord w1, w2; 3291 UWord len; 3292 if (is_at_end_Cursor( &loc )) 3293 break; 3294 3295 /* Read a (host-)word pair. This is something of a hack since 3296 the word size to read is really dictated by the ELF file; 3297 however, we assume we're reading a file with the same 3298 word-sizeness as the host. Reasonably enough. */ 3299 w1 = get_UWord( &loc ); 3300 w2 = get_UWord( &loc ); 3301 3302 if (w1 == 0 && w2 == 0) { 3303 /* end of list. reset 'base' */ 3304 TRACE_D3(" %08lx <End of list>\n", dl_offset); 3305 dl_base = 0; 3306 dl_offset = get_position_of_Cursor( &loc ); 3307 continue; 3308 } 3309 3310 if (w1 == -1UL) { 3311 /* new value for 'base' */ 3312 TRACE_D3(" %08lx %16lx %08lx (base address)\n", 3313 dl_offset, w1, w2); 3314 dl_base = w2; 3315 continue; 3316 } 3317 3318 /* else a location expression follows */ 3319 TRACE_D3(" %08lx %08lx %08lx ", 3320 dl_offset, w1 + dl_base, w2 + dl_base); 3321 len = (UWord)get_UShort( &loc ); 3322 while (len > 0) { 3323 UChar byte = get_UChar( &loc ); 3324 TRACE_D3("%02x", (UInt)byte); 3325 len--; 3326 } 3327 TRACE_SYMTAB("\n"); 3328 } 3329 #endif 3330 3331 /* Display .debug_ranges */ 3332 TRACE_SYMTAB("\n"); 3333 TRACE_SYMTAB("\n------ The contents of .debug_ranges ------\n"); 3334 TRACE_SYMTAB(" Offset Begin End\n"); 3335 init_Cursor( &ranges, debug_ranges_img, 3336 debug_ranges_sz, 0, barf, 3337 "Overrun whilst reading .debug_ranges section(1)" ); 3338 dr_base = 0; 3339 dr_offset = 0; 3340 while (True) { 3341 UWord w1, w2; 3342 3343 if (is_at_end_Cursor( &ranges )) 3344 break; 3345 3346 /* Read a (host-)word pair. This is something of a hack since 3347 the word size to read is really dictated by the ELF file; 3348 however, we assume we're reading a file with the same 3349 word-sizeness as the host. Reasonably enough. */ 3350 w1 = get_UWord( &ranges ); 3351 w2 = get_UWord( &ranges ); 3352 3353 if (w1 == 0 && w2 == 0) { 3354 /* end of list. reset 'base' */ 3355 TRACE_D3(" %08lx <End of list>\n", dr_offset); 3356 dr_base = 0; 3357 dr_offset = get_position_of_Cursor( &ranges ); 3358 continue; 3359 } 3360 3361 if (w1 == -1UL) { 3362 /* new value for 'base' */ 3363 TRACE_D3(" %08lx %16lx %08lx (base address)\n", 3364 dr_offset, w1, w2); 3365 dr_base = w2; 3366 continue; 3367 } 3368 3369 /* else a range [w1+base, w2+base) is denoted */ 3370 TRACE_D3(" %08lx %08lx %08lx\n", 3371 dr_offset, w1 + dr_base, w2 + dr_base); 3372 } 3373 3374 /* Display .debug_abbrev */ 3375 init_Cursor( &abbv, debug_abbv_img, debug_abbv_sz, 0, barf, 3376 "Overrun whilst reading .debug_abbrev section" ); 3377 TRACE_SYMTAB("\n"); 3378 TRACE_SYMTAB("\n------ The contents of .debug_abbrev ------\n"); 3379 while (True) { 3380 if (is_at_end_Cursor( &abbv )) 3381 break; 3382 /* Read one abbreviation table */ 3383 TRACE_D3(" Number TAG\n"); 3384 while (True) { 3385 ULong atag; 3386 UInt has_children; 3387 ULong acode = get_ULEB128( &abbv ); 3388 if (acode == 0) break; /* end of the table */ 3389 atag = get_ULEB128( &abbv ); 3390 has_children = get_UChar( &abbv ); 3391 TRACE_D3(" %llu %s [%s]\n", 3392 acode, ML_(pp_DW_TAG)(atag), 3393 ML_(pp_DW_children)(has_children)); 3394 while (True) { 3395 ULong at_name = get_ULEB128( &abbv ); 3396 ULong at_form = get_ULEB128( &abbv ); 3397 if (at_name == 0 && at_form == 0) break; 3398 TRACE_D3(" %18s %s\n", 3399 ML_(pp_DW_AT)(at_name), ML_(pp_DW_FORM)(at_form)); 3400 } 3401 } 3402 } 3403 TRACE_SYMTAB("\n"); 3404 3405 /* Now loop over the Compilation Units listed in the .debug_info 3406 section (see D3SPEC sec 7.5) paras 1 and 2. Each compilation 3407 unit contains a Compilation Unit Header followed by precisely 3408 one DW_TAG_compile_unit or DW_TAG_partial_unit DIE. */ 3409 init_Cursor( &info, debug_info_img, debug_info_sz, 0, barf, 3410 "Overrun whilst reading .debug_info section" ); 3411 3412 /* We'll park the harvested type information in here. Also create 3413 a fake "void" entry with offset D3_FAKEVOID_CUOFF, so we always 3414 have at least one type entry to refer to. D3_FAKEVOID_CUOFF is 3415 huge and presumably will not occur in any valid DWARF3 file -- 3416 it would need to have a .debug_info section 4GB long for that to 3417 happen. These type entries end up in the DebugInfo. */ 3418 tyents = VG_(newXA)( ML_(dinfo_zalloc), 3419 "di.readdwarf3.ndrw.1 (TyEnt temp array)", 3420 ML_(dinfo_free), sizeof(TyEnt) ); 3421 { TyEnt tyent; 3422 VG_(memset)(&tyent, 0, sizeof(tyent)); 3423 tyent.tag = Te_TyVoid; 3424 tyent.cuOff = D3_FAKEVOID_CUOFF; 3425 tyent.Te.TyVoid.isFake = True; 3426 VG_(addToXA)( tyents, &tyent ); 3427 } 3428 { TyEnt tyent; 3429 VG_(memset)(&tyent, 0, sizeof(tyent)); 3430 tyent.tag = Te_UNKNOWN; 3431 tyent.cuOff = D3_INVALID_CUOFF; 3432 VG_(addToXA)( tyents, &tyent ); 3433 } 3434 3435 /* This is a tree used to unique-ify the range lists that are 3436 manufactured by parse_var_DIE. References to the keys in the 3437 tree wind up in .rngMany fields in TempVars. We'll need to 3438 delete this tree, and the XArrays attached to it, at the end of 3439 this function. */ 3440 rangestree = VG_(newFM)( ML_(dinfo_zalloc), 3441 "di.readdwarf3.ndrw.2 (rangestree)", 3442 ML_(dinfo_free), 3443 (Word(*)(UWord,UWord))cmp__XArrays_of_AddrRange ); 3444 3445 /* List of variables we're accumulating. These don't end up in the 3446 DebugInfo; instead their contents are handed to ML_(addVar) and 3447 the list elements are then deleted. */ 3448 tempvars = VG_(newXA)( ML_(dinfo_zalloc), 3449 "di.readdwarf3.ndrw.3 (TempVar*s array)", 3450 ML_(dinfo_free), 3451 sizeof(TempVar*) ); 3452 3453 /* List of GExprs we're accumulating. These wind up in the 3454 DebugInfo. */ 3455 gexprs = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.4", 3456 ML_(dinfo_free), sizeof(GExpr*) ); 3457 3458 /* We need a D3TypeParser to keep track of partially constructed 3459 types. It'll be discarded as soon as we've completed the CU, 3460 since the resulting information is tipped in to 'tyents' as it 3461 is generated. */ 3462 VG_(memset)( &typarser, 0, sizeof(typarser) ); 3463 typarser.sp = -1; 3464 typarser.language = '?'; 3465 for (i = 0; i < N_D3_TYPE_STACK; i++) { 3466 typarser.qparentE[i].tag = Te_EMPTY; 3467 typarser.qparentE[i].cuOff = D3_INVALID_CUOFF; 3468 } 3469 3470 VG_(memset)( &varparser, 0, sizeof(varparser) ); 3471 varparser.sp = -1; 3472 3473 TRACE_D3("\n------ Parsing .debug_info section ------\n"); 3474 while (True) { 3475 UWord cu_start_offset, cu_offset_now; 3476 CUConst cc; 3477 /* It may be that the stated size of this CU is larger than the 3478 amount of stuff actually in it. icc9 seems to generate CUs 3479 thusly. We use these variables to figure out if this is 3480 indeed the case, and if so how many bytes we need to skip to 3481 get to the start of the next CU. Not skipping those bytes 3482 causes us to misidentify the start of the next CU, and it all 3483 goes badly wrong after that (not surprisingly). */ 3484 UWord cu_size_including_IniLen, cu_amount_used; 3485 3486 /* It seems icc9 finishes the DIE info before debug_info_sz 3487 bytes have been used up. So be flexible, and declare the 3488 sequence complete if there is not enough remaining bytes to 3489 hold even the smallest conceivable CU header. (11 bytes I 3490 reckon). */ 3491 /* JRS 23Jan09: I suspect this is no longer necessary now that 3492 the code below contains a 'while (cu_amount_used < 3493 cu_size_including_IniLen ...' style loop, which skips over 3494 any leftover bytes at the end of a CU in the case where the 3495 CU's stated size is larger than its actual size (as 3496 determined by reading all its DIEs). However, for prudence, 3497 I'll leave the following test in place. I can't see that a 3498 CU header can be smaller than 11 bytes, so I don't think 3499 there's any harm possible through the test -- it just adds 3500 robustness. */ 3501 Word avail = get_remaining_length_Cursor( &info ); 3502 if (avail < 11) { 3503 if (avail > 0) 3504 TRACE_D3("new_dwarf3_reader_wrk: warning: " 3505 "%ld unused bytes after end of DIEs\n", avail); 3506 break; 3507 } 3508 3509 /* Check the varparser's stack is in a sane state. */ 3510 vg_assert(varparser.sp == -1); 3511 for (i = 0; i < N_D3_VAR_STACK; i++) { 3512 vg_assert(varparser.ranges[i] == NULL); 3513 vg_assert(varparser.level[i] == 0); 3514 } 3515 for (i = 0; i < N_D3_TYPE_STACK; i++) { 3516 vg_assert(typarser.qparentE[i].cuOff == D3_INVALID_CUOFF); 3517 vg_assert(typarser.qparentE[i].tag == Te_EMPTY); 3518 vg_assert(typarser.qlevel[i] == 0); 3519 } 3520 3521 cu_start_offset = get_position_of_Cursor( &info ); 3522 TRACE_D3("\n"); 3523 TRACE_D3(" Compilation Unit @ offset 0x%lx:\n", cu_start_offset); 3524 /* parse_CU_header initialises the CU's set_abbv_Cursor cache 3525 (saC_cache) */ 3526 parse_CU_Header( &cc, td3, &info, 3527 (UChar*)debug_abbv_img, debug_abbv_sz ); 3528 cc.debug_str_img = debug_str_img; 3529 cc.debug_str_sz = debug_str_sz; 3530 cc.debug_ranges_img = debug_ranges_img; 3531 cc.debug_ranges_sz = debug_ranges_sz; 3532 cc.debug_loc_img = debug_loc_img; 3533 cc.debug_loc_sz = debug_loc_sz; 3534 cc.debug_line_img = debug_line_img; 3535 cc.debug_line_sz = debug_line_sz; 3536 cc.debug_info_img = debug_info_img; 3537 cc.debug_info_sz = debug_info_sz; 3538 cc.cu_start_offset = cu_start_offset; 3539 cc.di = di; 3540 /* The CU's svma can be deduced by looking at the AT_low_pc 3541 value in the top level TAG_compile_unit, which is the topmost 3542 DIE. We'll leave it for the 'varparser' to acquire that info 3543 and fill it in -- since it is the only party to want to know 3544 it. */ 3545 cc.cu_svma_known = False; 3546 cc.cu_svma = 0; 3547 3548 /* Create a fake outermost-level range covering the entire 3549 address range. So we always have *something* to catch all 3550 variable declarations. */ 3551 varstack_push( &cc, &varparser, td3, 3552 unitary_range_list(0UL, ~0UL), 3553 -1, False/*isFunc*/, NULL/*fbGX*/ ); 3554 3555 /* And set up the file name table. When we come across the top 3556 level DIE for this CU (which is what the next call to 3557 read_DIE should process) we will copy all the file names out 3558 of the .debug_line img area and use this table to look up the 3559 copies when we later see filename numbers in DW_TAG_variables 3560 etc. */ 3561 vg_assert(!varparser.filenameTable ); 3562 varparser.filenameTable 3563 = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.5", 3564 ML_(dinfo_free), 3565 sizeof(UChar*) ); 3566 vg_assert(varparser.filenameTable); 3567 3568 /* Now read the one-and-only top-level DIE for this CU. */ 3569 vg_assert(varparser.sp == 0); 3570 read_DIE( rangestree, 3571 tyents, tempvars, gexprs, 3572 &typarser, &varparser, 3573 &info, td3, &cc, 0 ); 3574 3575 cu_offset_now = get_position_of_Cursor( &info ); 3576 3577 if (0) VG_(printf)("Travelled: %lu size %llu\n", 3578 cu_offset_now - cc.cu_start_offset, 3579 cc.unit_length + (cc.is_dw64 ? 12 : 4)); 3580 3581 /* How big the CU claims it is .. */ 3582 cu_size_including_IniLen = cc.unit_length + (cc.is_dw64 ? 12 : 4); 3583 /* .. vs how big we have found it to be */ 3584 cu_amount_used = cu_offset_now - cc.cu_start_offset; 3585 3586 if (1) TRACE_D3("offset now %ld, d-i-size %ld\n", 3587 cu_offset_now, debug_info_sz); 3588 if (cu_offset_now > debug_info_sz) 3589 barf("toplevel DIEs beyond end of CU"); 3590 3591 /* If the CU is bigger than it claims to be, we've got a serious 3592 problem. */ 3593 if (cu_amount_used > cu_size_including_IniLen) 3594 barf("CU's actual size appears to be larger than it claims it is"); 3595 3596 /* If the CU is smaller than it claims to be, we need to skip some 3597 bytes. Loop updates cu_offset_new and cu_amount_used. */ 3598 while (cu_amount_used < cu_size_including_IniLen 3599 && get_remaining_length_Cursor( &info ) > 0) { 3600 if (0) VG_(printf)("SKIP\n"); 3601 (void)get_UChar( &info ); 3602 cu_offset_now = get_position_of_Cursor( &info ); 3603 cu_amount_used = cu_offset_now - cc.cu_start_offset; 3604 } 3605 3606 if (cu_offset_now == debug_info_sz) 3607 break; 3608 3609 /* Preen to level -2. DIEs have level >= 0 so -2 cannot occur 3610 anywhere else at all. Our fake the-entire-address-space 3611 range is at level -1, so preening to -2 should completely 3612 empty the stack out. */ 3613 TRACE_D3("\n"); 3614 varstack_preen( &varparser, td3, -2 ); 3615 /* Similarly, empty the type stack out. */ 3616 typestack_preen( &typarser, td3, -2 ); 3617 /* else keep going */ 3618 3619 TRACE_D3("set_abbv_Cursor cache: %lu queries, %lu misses\n", 3620 cc.saC_cache_queries, cc.saC_cache_misses); 3621 3622 vg_assert(varparser.filenameTable ); 3623 VG_(deleteXA)( varparser.filenameTable ); 3624 varparser.filenameTable = NULL; 3625 } 3626 3627 /* From here on we're post-processing the stuff we got 3628 out of the .debug_info section. */ 3629 if (td3) { 3630 TRACE_D3("\n"); 3631 ML_(pp_TyEnts)(tyents, "Initial type entity (TyEnt) array"); 3632 TRACE_D3("\n"); 3633 TRACE_D3("------ Compressing type entries ------\n"); 3634 } 3635 3636 tyents_cache = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.6", 3637 sizeof(TyEntIndexCache) ); 3638 ML_(TyEntIndexCache__invalidate)( tyents_cache ); 3639 dedup_types( td3, tyents, tyents_cache ); 3640 if (td3) { 3641 TRACE_D3("\n"); 3642 ML_(pp_TyEnts)(tyents, "After type entity (TyEnt) compression"); 3643 } 3644 3645 TRACE_D3("\n"); 3646 TRACE_D3("------ Resolving the types of variables ------\n" ); 3647 resolve_variable_types( barf, tyents, tyents_cache, tempvars ); 3648 3649 /* Copy all the non-INDIR tyents into a new table. For large 3650 .so's, about 90% of the tyents will by now have been resolved to 3651 INDIRs, and we no longer need them, and so don't need to store 3652 them. */ 3653 tyents_to_keep 3654 = VG_(newXA)( ML_(dinfo_zalloc), 3655 "di.readdwarf3.ndrw.7 (TyEnt to-keep array)", 3656 ML_(dinfo_free), sizeof(TyEnt) ); 3657 n = VG_(sizeXA)( tyents ); 3658 for (i = 0; i < n; i++) { 3659 TyEnt* ent = VG_(indexXA)( tyents, i ); 3660 if (ent->tag != Te_INDIR) 3661 VG_(addToXA)( tyents_to_keep, ent ); 3662 } 3663 3664 VG_(deleteXA)( tyents ); 3665 tyents = NULL; 3666 ML_(dinfo_free)( tyents_cache ); 3667 tyents_cache = NULL; 3668 3669 /* Sort tyents_to_keep so we can lookup in it. A complete (if 3670 minor) waste of time, since tyents itself is sorted, but 3671 necessary since VG_(lookupXA) refuses to cooperate if we 3672 don't. */ 3673 VG_(setCmpFnXA)( 3674 tyents_to_keep, 3675 (Int(*)(void*,void*)) ML_(TyEnt__cmp_by_cuOff_only) 3676 ); 3677 VG_(sortXA)( tyents_to_keep ); 3678 3679 /* Enable cacheing on tyents_to_keep */ 3680 tyents_to_keep_cache 3681 = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.8", 3682 sizeof(TyEntIndexCache) ); 3683 ML_(TyEntIndexCache__invalidate)( tyents_to_keep_cache ); 3684 3685 /* And record the tyents in the DebugInfo. We do this before 3686 starting to hand variables to ML_(addVar), since if ML_(addVar) 3687 wants to do debug printing (of the types of said vars) then it 3688 will need the tyents.*/ 3689 vg_assert(!di->admin_tyents); 3690 di->admin_tyents = tyents_to_keep; 3691 3692 /* Bias all the location expressions. */ 3693 TRACE_D3("\n"); 3694 TRACE_D3("------ Biasing the location expressions ------\n" ); 3695 3696 n = VG_(sizeXA)( gexprs ); 3697 for (i = 0; i < n; i++) { 3698 gexpr = *(GExpr**)VG_(indexXA)( gexprs, i ); 3699 bias_GX( gexpr, di ); 3700 } 3701 3702 TRACE_D3("\n"); 3703 TRACE_D3("------ Acquired the following variables: ------\n\n"); 3704 3705 /* Park (pointers to) all the vars in an XArray, so we can look up 3706 abstract origins quickly. The array is sorted (hence, looked-up 3707 by) the .dioff fields. Since the .dioffs should be in strictly 3708 ascending order, there is no need to sort the array after 3709 construction. The ascendingness is however asserted for. */ 3710 dioff_lookup_tab 3711 = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.9", 3712 ML_(dinfo_free), 3713 sizeof(TempVar*) ); 3714 vg_assert(dioff_lookup_tab); 3715 3716 n = VG_(sizeXA)( tempvars ); 3717 for (i = 0; i < n; i++) { 3718 varp = *(TempVar**)VG_(indexXA)( tempvars, i ); 3719 if (i > 0) { 3720 varp2 = *(TempVar**)VG_(indexXA)( tempvars, i-1 ); 3721 /* why should this hold? Only, I think, because we've 3722 constructed the array by reading .debug_info sequentially, 3723 and so the array .dioff fields should reflect that, and be 3724 strictly ascending. */ 3725 vg_assert(varp2->dioff < varp->dioff); 3726 } 3727 VG_(addToXA)( dioff_lookup_tab, &varp ); 3728 } 3729 VG_(setCmpFnXA)( dioff_lookup_tab, cmp_TempVar_by_dioff ); 3730 VG_(sortXA)( dioff_lookup_tab ); /* POINTLESS; FIXME: rm */ 3731 3732 /* Now visit each var. Collect up as much info as possible for 3733 each var and hand it to ML_(addVar). */ 3734 n = VG_(sizeXA)( tempvars ); 3735 for (j = 0; j < n; j++) { 3736 TyEnt* ent; 3737 varp = *(TempVar**)VG_(indexXA)( tempvars, j ); 3738 3739 /* Possibly show .. */ 3740 if (td3) { 3741 VG_(printf)("<%lx> addVar: level %d: %s :: ", 3742 varp->dioff, 3743 varp->level, 3744 varp->name ? varp->name : (UChar*)"<anon_var>" ); 3745 if (varp->typeR) { 3746 ML_(pp_TyEnt_C_ishly)( tyents_to_keep, varp->typeR ); 3747 } else { 3748 VG_(printf)("NULL"); 3749 } 3750 VG_(printf)("\n Loc="); 3751 if (varp->gexpr) { 3752 ML_(pp_GX)(varp->gexpr); 3753 } else { 3754 VG_(printf)("NULL"); 3755 } 3756 VG_(printf)("\n"); 3757 if (varp->fbGX) { 3758 VG_(printf)(" FrB="); 3759 ML_(pp_GX)( varp->fbGX ); 3760 VG_(printf)("\n"); 3761 } else { 3762 VG_(printf)(" FrB=none\n"); 3763 } 3764 VG_(printf)(" declared at: %s:%d\n", 3765 varp->fName ? varp->fName : (UChar*)"NULL", 3766 varp->fLine ); 3767 if (varp->absOri != (UWord)D3_INVALID_CUOFF) 3768 VG_(printf)(" abstract origin: <%lx>\n", varp->absOri); 3769 } 3770 3771 /* Skip variables which have no location. These must be 3772 abstract instances; they are useless as-is since with no 3773 location they have no specified memory location. They will 3774 presumably be referred to via the absOri fields of other 3775 variables. */ 3776 if (!varp->gexpr) { 3777 TRACE_D3(" SKIP (no location)\n\n"); 3778 continue; 3779 } 3780 3781 /* So it has a location, at least. If it refers to some other 3782 entry through its absOri field, pull in further info through 3783 that. */ 3784 if (varp->absOri != (UWord)D3_INVALID_CUOFF) { 3785 Bool found; 3786 Word ixFirst, ixLast; 3787 TempVar key; 3788 TempVar* keyp = &key; 3789 TempVar *varAI; 3790 VG_(memset)(&key, 0, sizeof(key)); /* not necessary */ 3791 key.dioff = varp->absOri; /* this is what we want to find */ 3792 found = VG_(lookupXA)( dioff_lookup_tab, &keyp, 3793 &ixFirst, &ixLast ); 3794 if (!found) { 3795 /* barf("DW_AT_abstract_origin can't be resolved"); */ 3796 TRACE_D3(" SKIP (DW_AT_abstract_origin can't be resolved)\n\n"); 3797 continue; 3798 } 3799 /* If the following fails, there is more than one entry with 3800 the same dioff. Which can't happen. */ 3801 vg_assert(ixFirst == ixLast); 3802 varAI = *(TempVar**)VG_(indexXA)( dioff_lookup_tab, ixFirst ); 3803 /* stay sane */ 3804 vg_assert(varAI); 3805 vg_assert(varAI->dioff == varp->absOri); 3806 3807 /* Copy what useful info we can. */ 3808 if (varAI->typeR && !varp->typeR) 3809 varp->typeR = varAI->typeR; 3810 if (varAI->name && !varp->name) 3811 varp->name = varAI->name; 3812 if (varAI->fName && !varp->fName) 3813 varp->fName = varAI->fName; 3814 if (varAI->fLine > 0 && varp->fLine == 0) 3815 varp->fLine = varAI->fLine; 3816 } 3817 3818 /* Give it a name if it doesn't have one. */ 3819 if (!varp->name) 3820 varp->name = ML_(addStr)( di, "<anon_var>", -1 ); 3821 3822 /* So now does it have enough info to be useful? */ 3823 /* NOTE: re typeR: this is a hack. If typeR is Te_UNKNOWN then 3824 the type didn't get resolved. Really, in that case 3825 something's broken earlier on, and should be fixed, rather 3826 than just skipping the variable. */ 3827 ent = ML_(TyEnts__index_by_cuOff)( tyents_to_keep, 3828 tyents_to_keep_cache, 3829 varp->typeR ); 3830 /* The next two assertions should be guaranteed by 3831 our previous call to resolve_variable_types. */ 3832 vg_assert(ent); 3833 vg_assert(ML_(TyEnt__is_type)(ent) || ent->tag == Te_UNKNOWN); 3834 3835 if (ent->tag == Te_UNKNOWN) continue; 3836 3837 vg_assert(varp->gexpr); 3838 vg_assert(varp->name); 3839 vg_assert(varp->typeR); 3840 vg_assert(varp->level >= 0); 3841 3842 /* Ok. So we're going to keep it. Call ML_(addVar) once for 3843 each address range in which the variable exists. */ 3844 TRACE_D3(" ACQUIRE for range(s) "); 3845 { AddrRange oneRange; 3846 AddrRange* varPcRanges; 3847 Word nVarPcRanges; 3848 /* Set up to iterate over address ranges, however 3849 represented. */ 3850 if (varp->nRanges == 0 || varp->nRanges == 1) { 3851 vg_assert(!varp->rngMany); 3852 if (varp->nRanges == 0) { 3853 vg_assert(varp->rngOneMin == 0); 3854 vg_assert(varp->rngOneMax == 0); 3855 } 3856 nVarPcRanges = varp->nRanges; 3857 oneRange.aMin = varp->rngOneMin; 3858 oneRange.aMax = varp->rngOneMax; 3859 varPcRanges = &oneRange; 3860 } else { 3861 vg_assert(varp->rngMany); 3862 vg_assert(varp->rngOneMin == 0); 3863 vg_assert(varp->rngOneMax == 0); 3864 nVarPcRanges = VG_(sizeXA)(varp->rngMany); 3865 vg_assert(nVarPcRanges >= 2); 3866 vg_assert(nVarPcRanges == (Word)varp->nRanges); 3867 varPcRanges = VG_(indexXA)(varp->rngMany, 0); 3868 } 3869 if (varp->level == 0) 3870 vg_assert( nVarPcRanges == 1 ); 3871 /* and iterate */ 3872 for (i = 0; i < nVarPcRanges; i++) { 3873 Addr pcMin = varPcRanges[i].aMin; 3874 Addr pcMax = varPcRanges[i].aMax; 3875 vg_assert(pcMin <= pcMax); 3876 /* Level 0 is the global address range. So at level 0 we 3877 don't want to bias pcMin/pcMax; but at all other levels 3878 we do since those are derived from svmas in the Dwarf 3879 we're reading. Be paranoid ... */ 3880 if (varp->level == 0) { 3881 vg_assert(pcMin == (Addr)0); 3882 vg_assert(pcMax == ~(Addr)0); 3883 } else { 3884 /* vg_assert(pcMin > (Addr)0); 3885 No .. we can legitimately expect to see ranges like 3886 0x0-0x11D (pre-biasing, of course). */ 3887 vg_assert(pcMax < ~(Addr)0); 3888 } 3889 3890 /* Apply text biasing, for non-global variables. */ 3891 if (varp->level > 0) { 3892 pcMin += di->text_debug_bias; 3893 pcMax += di->text_debug_bias; 3894 } 3895 3896 if (i > 0 && (i%2) == 0) 3897 TRACE_D3("\n "); 3898 TRACE_D3("[%#lx,%#lx] ", pcMin, pcMax ); 3899 3900 ML_(addVar)( 3901 di, varp->level, 3902 pcMin, pcMax, 3903 varp->name, varp->typeR, 3904 varp->gexpr, varp->fbGX, 3905 varp->fName, varp->fLine, td3 3906 ); 3907 } 3908 } 3909 3910 TRACE_D3("\n\n"); 3911 /* and move on to the next var */ 3912 } 3913 3914 /* Now free all the TempVars */ 3915 n = VG_(sizeXA)( tempvars ); 3916 for (i = 0; i < n; i++) { 3917 varp = *(TempVar**)VG_(indexXA)( tempvars, i ); 3918 ML_(dinfo_free)(varp); 3919 } 3920 VG_(deleteXA)( tempvars ); 3921 tempvars = NULL; 3922 3923 /* and the temp lookup table */ 3924 VG_(deleteXA)( dioff_lookup_tab ); 3925 3926 /* and the ranges tree. Note that we need to also free the XArrays 3927 which constitute the keys, hence pass VG_(deleteXA) as a 3928 key-finalizer. */ 3929 VG_(deleteFM)( rangestree, (void(*)(UWord))VG_(deleteXA), NULL ); 3930 3931 /* and the tyents_to_keep cache */ 3932 ML_(dinfo_free)( tyents_to_keep_cache ); 3933 tyents_to_keep_cache = NULL; 3934 3935 /* and the file name table (just the array, not the entries 3936 themselves). (Apparently, 2008-Oct-23, varparser.filenameTable 3937 can be NULL here, for icc9 generated Dwarf3. Not sure what that 3938 signifies (a deeper problem with the reader?)) */ 3939 if (varparser.filenameTable) { 3940 VG_(deleteXA)( varparser.filenameTable ); 3941 varparser.filenameTable = NULL; 3942 } 3943 3944 /* record the GExprs in di so they can be freed later */ 3945 vg_assert(!di->admin_gexprs); 3946 di->admin_gexprs = gexprs; 3947 } 3948 3949 3950 /*------------------------------------------------------------*/ 3951 /*--- ---*/ 3952 /*--- The "new" DWARF3 reader -- top level control logic ---*/ 3953 /*--- ---*/ 3954 /*------------------------------------------------------------*/ 3955 3956 static Bool d3rd_jmpbuf_valid = False; 3957 static HChar* d3rd_jmpbuf_reason = NULL; 3958 static VG_MINIMAL_JMP_BUF(d3rd_jmpbuf); 3959 3960 static __attribute__((noreturn)) void barf ( HChar* reason ) { 3961 vg_assert(d3rd_jmpbuf_valid); 3962 d3rd_jmpbuf_reason = reason; 3963 VG_MINIMAL_LONGJMP(d3rd_jmpbuf); 3964 /*NOTREACHED*/ 3965 vg_assert(0); 3966 } 3967 3968 3969 void 3970 ML_(new_dwarf3_reader) ( 3971 struct _DebugInfo* di, 3972 UChar* debug_info_img, SizeT debug_info_sz, 3973 UChar* debug_abbv_img, SizeT debug_abbv_sz, 3974 UChar* debug_line_img, SizeT debug_line_sz, 3975 UChar* debug_str_img, SizeT debug_str_sz, 3976 UChar* debug_ranges_img, SizeT debug_ranges_sz, 3977 UChar* debug_loc_img, SizeT debug_loc_sz 3978 ) 3979 { 3980 volatile Int jumped; 3981 volatile Bool td3 = di->trace_symtab; 3982 3983 /* Run the _wrk function to read the dwarf3. If it succeeds, it 3984 just returns normally. If there is any failure, it longjmp's 3985 back here, having first set d3rd_jmpbuf_reason to something 3986 useful. */ 3987 vg_assert(d3rd_jmpbuf_valid == False); 3988 vg_assert(d3rd_jmpbuf_reason == NULL); 3989 3990 d3rd_jmpbuf_valid = True; 3991 jumped = VG_MINIMAL_SETJMP(d3rd_jmpbuf); 3992 if (jumped == 0) { 3993 /* try this ... */ 3994 new_dwarf3_reader_wrk( di, barf, 3995 debug_info_img, debug_info_sz, 3996 debug_abbv_img, debug_abbv_sz, 3997 debug_line_img, debug_line_sz, 3998 debug_str_img, debug_str_sz, 3999 debug_ranges_img, debug_ranges_sz, 4000 debug_loc_img, debug_loc_sz ); 4001 d3rd_jmpbuf_valid = False; 4002 TRACE_D3("\n------ .debug_info reading was successful ------\n"); 4003 } else { 4004 /* It longjmp'd. */ 4005 d3rd_jmpbuf_valid = False; 4006 /* Can't longjump without giving some sort of reason. */ 4007 vg_assert(d3rd_jmpbuf_reason != NULL); 4008 4009 TRACE_D3("\n------ .debug_info reading failed ------\n"); 4010 4011 ML_(symerr)(di, True, d3rd_jmpbuf_reason); 4012 } 4013 4014 d3rd_jmpbuf_valid = False; 4015 d3rd_jmpbuf_reason = NULL; 4016 } 4017 4018 4019 4020 /* --- Unused code fragments which might be useful one day. --- */ 4021 4022 #if 0 4023 /* Read the arange tables */ 4024 TRACE_SYMTAB("\n"); 4025 TRACE_SYMTAB("\n------ The contents of .debug_arange ------\n"); 4026 init_Cursor( &aranges, debug_aranges_img, 4027 debug_aranges_sz, 0, barf, 4028 "Overrun whilst reading .debug_aranges section" ); 4029 while (True) { 4030 ULong len, d_i_offset; 4031 Bool is64; 4032 UShort version; 4033 UChar asize, segsize; 4034 4035 if (is_at_end_Cursor( &aranges )) 4036 break; 4037 /* Read one arange thingy */ 4038 /* initial_length field */ 4039 len = get_Initial_Length( &is64, &aranges, 4040 "in .debug_aranges: invalid initial-length field" ); 4041 version = get_UShort( &aranges ); 4042 d_i_offset = get_Dwarfish_UWord( &aranges, is64 ); 4043 asize = get_UChar( &aranges ); 4044 segsize = get_UChar( &aranges ); 4045 TRACE_D3(" Length: %llu\n", len); 4046 TRACE_D3(" Version: %d\n", (Int)version); 4047 TRACE_D3(" Offset into .debug_info: %llx\n", d_i_offset); 4048 TRACE_D3(" Pointer Size: %d\n", (Int)asize); 4049 TRACE_D3(" Segment Size: %d\n", (Int)segsize); 4050 TRACE_D3("\n"); 4051 TRACE_D3(" Address Length\n"); 4052 4053 while ((get_position_of_Cursor( &aranges ) % (2 * asize)) > 0) { 4054 (void)get_UChar( & aranges ); 4055 } 4056 while (True) { 4057 ULong address = get_Dwarfish_UWord( &aranges, asize==8 ); 4058 ULong length = get_Dwarfish_UWord( &aranges, asize==8 ); 4059 TRACE_D3(" 0x%016llx 0x%llx\n", address, length); 4060 if (address == 0 && length == 0) break; 4061 } 4062 } 4063 TRACE_SYMTAB("\n"); 4064 #endif 4065 4066 #endif // defined(VGO_linux) || defined(VGO_darwin) 4067 4068 /*--------------------------------------------------------------------*/ 4069 /*--- end ---*/ 4070 /*--------------------------------------------------------------------*/ 4071