1 2 /*--------------------------------------------------------------------*/ 3 /*--- Read DWARF3/4 ".debug_info" sections (DIE trees). ---*/ 4 /*--- readdwarf3.c ---*/ 5 /*--------------------------------------------------------------------*/ 6 7 /* 8 This file is part of Valgrind, a dynamic binary instrumentation 9 framework. 10 11 Copyright (C) 2008-2010 OpenWorks LLP 12 info (at) open-works.co.uk 13 14 This program is free software; you can redistribute it and/or 15 modify it under the terms of the GNU General Public License as 16 published by the Free Software Foundation; either version 2 of the 17 License, or (at your option) any later version. 18 19 This program is distributed in the hope that it will be useful, but 20 WITHOUT ANY WARRANTY; without even the implied warranty of 21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 General Public License for more details. 23 24 You should have received a copy of the GNU General Public License 25 along with this program; if not, write to the Free Software 26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 27 02111-1307, USA. 28 29 The GNU General Public License is contained in the file COPYING. 30 31 Neither the names of the U.S. Department of Energy nor the 32 University of California nor the names of its contributors may be 33 used to endorse or promote products derived from this software 34 without prior written permission. 35 */ 36 37 #if defined(VGO_linux) || defined(VGO_darwin) 38 39 /* REFERENCE (without which this code will not make much sense): 40 41 DWARF Debugging Information Format, Version 3, 42 dated 20 December 2005 (the "D3 spec"). 43 44 Available at http://www.dwarfstd.org/Dwarf3.pdf. There's also a 45 .doc (MS Word) version, but for some reason the section numbers 46 between the Word and PDF versions differ by 1 in the first digit. 47 All section references in this code are to the PDF version. 48 49 CURRENT HACKS: 50 51 DW_TAG_{const,volatile}_type no DW_AT_type is allowed; it is 52 assumed to mean "const void" or "volatile void" respectively. 53 GDB appears to interpret them like this, anyway. 54 55 In many cases it is important to know the svma of a CU (the "base 56 address of the CU", as the D3 spec calls it). There are some 57 situations in which the spec implies this value is unknown, but the 58 Dwarf3 produced by gcc-4.1 seems to assume is not unknown but 59 merely zero when not explicitly stated. So we too have to make 60 that assumption. 61 62 POTENTIAL BUG? Spotted 6 Sept 08. Why doesn't 63 unitary_range_list() bias the resulting range list in the same way 64 that its more general cousin, get_range_list(), does? I don't 65 know. 66 67 TODO, 2008 Feb 17: 68 69 get rid of cu_svma_known and document the assumed-zero svma hack. 70 71 ML_(sizeOfType): differentiate between zero sized types and types 72 for which the size is unknown. Is this important? I don't know. 73 74 DW_AT_array_types: deal with explicit sizes (currently we compute 75 the size from the bounds and the element size, although that's 76 fragile, if the bounds incompletely specified, or completely 77 absent) 78 79 Document reason for difference (by 1) of stack preening depth in 80 parse_var_DIE vs parse_type_DIE. 81 82 Don't hand to ML_(addVars), vars whose locations are entirely in 83 registers (DW_OP_reg*). This is merely a space-saving 84 optimisation, as ML_(evaluate_Dwarf3_Expr) should handle these 85 expressions correctly, by failing to evaluate them and hence 86 effectively ignoring the variable with which they are associated. 87 88 Deal with DW_AT_array_types which have element size != stride 89 90 In some cases, the info for a variable is split between two 91 different DIEs (generally a declarer and a definer). We punt on 92 these. Could do better here. 93 94 The 'data_bias' argument passed to the expression evaluator 95 (ML_(evaluate_Dwarf3_Expr)) should really be changed to a 96 MaybeUWord, to make it clear when we do vs don't know what it is 97 for the evaluation of an expression. At the moment zero is passed 98 for this parameter in the don't know case. That's a bit fragile 99 and obscure; using a MaybeUWord would be clearer. 100 101 POTENTIAL PERFORMANCE IMPROVEMENTS: 102 103 Currently, duplicate removal and all other queries for the type 104 entities array is done using cuOffset-based pointing, which 105 involves a binary search (VG_(lookupXA)) for each access. This is 106 wildly inefficient, although simple. It would be better to 107 translate all the cuOffset-based references (iow, all the "R" and 108 "Rs" fields in the TyEnts in 'tyents') to direct index numbers in 109 'tyents' right at the start of dedup_types(), and use direct 110 indexing (VG_(indexXA)) wherever possible after that. 111 112 cmp__XArrays_of_AddrRange is also a performance bottleneck. Move 113 VG_(indexXA) into pub_tool_xarray.h so it can be inlined at all use 114 points, and possibly also make an _UNCHECKED version which skips 115 the range checks in performance-critical situations such as this. 116 117 Handle interaction between read_DIE and parse_{var,type}_DIE 118 better. Currently read_DIE reads the entire DIE just to find where 119 the end is (and for debug printing), so that it can later reliably 120 move the cursor to the end regardless of what parse_{var,type}_DIE 121 do. This means many DIEs (most, even?) are read twice. It would 122 be smarter to make parse_{var,type}_DIE return a Bool indicating 123 whether or not they advanced the DIE cursor, and only if they 124 didn't should read_DIE itself read through the DIE. 125 126 ML_(addVar) and add_var_to_arange: quite a lot of DiAddrRanges have 127 zero variables in their .vars XArray. Rather than have an XArray 128 with zero elements (which uses 2 malloc'd blocks), allow the .vars 129 pointer to be NULL in this case. 130 131 More generally, reduce the amount of memory allocated and freed 132 while reading Dwarf3 type/variable information. Even modest (20MB) 133 objects cause this module to allocate and free hundreds of 134 thousands of small blocks, and ML_(arena_malloc) and its various 135 groupies always show up at the top of performance profiles. */ 136 137 #include "pub_core_basics.h" 138 #include "pub_core_debuginfo.h" 139 #include "pub_core_libcbase.h" 140 #include "pub_core_libcassert.h" 141 #include "pub_core_libcprint.h" 142 #include "pub_core_options.h" 143 #include "pub_core_tooliface.h" /* VG_(needs) */ 144 #include "pub_core_xarray.h" 145 #include "pub_core_wordfm.h" 146 #include "priv_misc.h" /* dinfo_zalloc/free */ 147 #include "priv_tytypes.h" 148 #include "priv_d3basics.h" 149 #include "priv_storage.h" 150 #include "priv_readdwarf3.h" /* self */ 151 152 153 /*------------------------------------------------------------*/ 154 /*--- ---*/ 155 /*--- Basic machinery for parsing DIEs. ---*/ 156 /*--- ---*/ 157 /*------------------------------------------------------------*/ 158 159 #define TRACE_D3(format, args...) \ 160 if (td3) { VG_(printf)(format, ## args); } 161 162 #define D3_INVALID_CUOFF ((UWord)(-1UL)) 163 #define D3_FAKEVOID_CUOFF ((UWord)(-2UL)) 164 165 typedef 166 struct { 167 UChar* region_start_img; 168 UWord region_szB; 169 UWord region_next; 170 void (*barf)( HChar* ) __attribute__((noreturn)); 171 HChar* barfstr; 172 } 173 Cursor; 174 175 static inline Bool is_sane_Cursor ( Cursor* c ) { 176 if (!c) return False; 177 if (!c->barf) return False; 178 if (!c->barfstr) return False; 179 return True; 180 } 181 182 static void init_Cursor ( Cursor* c, 183 UChar* region_start_img, 184 UWord region_szB, 185 UWord region_next, 186 __attribute__((noreturn)) void (*barf)( HChar* ), 187 HChar* barfstr ) 188 { 189 vg_assert(c); 190 VG_(memset)(c, 0, sizeof(*c)); 191 c->region_start_img = region_start_img; 192 c->region_szB = region_szB; 193 c->region_next = region_next; 194 c->barf = barf; 195 c->barfstr = barfstr; 196 vg_assert(is_sane_Cursor(c)); 197 } 198 199 static Bool is_at_end_Cursor ( Cursor* c ) { 200 vg_assert(is_sane_Cursor(c)); 201 return c->region_next >= c->region_szB; 202 } 203 204 static inline UWord get_position_of_Cursor ( Cursor* c ) { 205 vg_assert(is_sane_Cursor(c)); 206 return c->region_next; 207 } 208 static inline void set_position_of_Cursor ( Cursor* c, UWord pos ) { 209 c->region_next = pos; 210 vg_assert(is_sane_Cursor(c)); 211 } 212 213 static /*signed*/Word get_remaining_length_Cursor ( Cursor* c ) { 214 vg_assert(is_sane_Cursor(c)); 215 return c->region_szB - c->region_next; 216 } 217 218 static UChar* get_address_of_Cursor ( Cursor* c ) { 219 vg_assert(is_sane_Cursor(c)); 220 return &c->region_start_img[ c->region_next ]; 221 } 222 223 /* FIXME: document assumptions on endianness for 224 get_UShort/UInt/ULong. */ 225 static inline UChar get_UChar ( Cursor* c ) { 226 UChar r; 227 /* vg_assert(is_sane_Cursor(c)); */ 228 if (c->region_next + sizeof(UChar) > c->region_szB) { 229 c->barf(c->barfstr); 230 /*NOTREACHED*/ 231 vg_assert(0); 232 } 233 r = * (UChar*) &c->region_start_img[ c->region_next ]; 234 c->region_next += sizeof(UChar); 235 return r; 236 } 237 static UShort get_UShort ( Cursor* c ) { 238 UShort r; 239 vg_assert(is_sane_Cursor(c)); 240 if (c->region_next + sizeof(UShort) > c->region_szB) { 241 c->barf(c->barfstr); 242 /*NOTREACHED*/ 243 vg_assert(0); 244 } 245 r = * (UShort*) &c->region_start_img[ c->region_next ]; 246 c->region_next += sizeof(UShort); 247 return r; 248 } 249 static UInt get_UInt ( Cursor* c ) { 250 UInt r; 251 vg_assert(is_sane_Cursor(c)); 252 if (c->region_next + sizeof(UInt) > c->region_szB) { 253 c->barf(c->barfstr); 254 /*NOTREACHED*/ 255 vg_assert(0); 256 } 257 r = * (UInt*) &c->region_start_img[ c->region_next ]; 258 c->region_next += sizeof(UInt); 259 return r; 260 } 261 static ULong get_ULong ( Cursor* c ) { 262 ULong r; 263 vg_assert(is_sane_Cursor(c)); 264 if (c->region_next + sizeof(ULong) > c->region_szB) { 265 c->barf(c->barfstr); 266 /*NOTREACHED*/ 267 vg_assert(0); 268 } 269 r = * (ULong*) &c->region_start_img[ c->region_next ]; 270 c->region_next += sizeof(ULong); 271 return r; 272 } 273 static inline ULong get_ULEB128 ( Cursor* c ) { 274 ULong result; 275 Int shift; 276 UChar byte; 277 /* unroll first iteration */ 278 byte = get_UChar( c ); 279 result = (ULong)(byte & 0x7f); 280 if (LIKELY(!(byte & 0x80))) return result; 281 shift = 7; 282 /* end unroll first iteration */ 283 do { 284 byte = get_UChar( c ); 285 result |= ((ULong)(byte & 0x7f)) << shift; 286 shift += 7; 287 } while (byte & 0x80); 288 return result; 289 } 290 static Long get_SLEB128 ( Cursor* c ) { 291 ULong result = 0; 292 Int shift = 0; 293 UChar byte; 294 do { 295 byte = get_UChar(c); 296 result |= ((ULong)(byte & 0x7f)) << shift; 297 shift += 7; 298 } while (byte & 0x80); 299 if (shift < 64 && (byte & 0x40)) 300 result |= -(1ULL << shift); 301 return result; 302 } 303 304 /* Assume 'c' points to the start of a string. Return the absolute 305 address of whatever it points at, and advance it past the 306 terminating zero. This makes it safe for the caller to then copy 307 the string with ML_(addStr), since (w.r.t. image overruns) the 308 process of advancing past the terminating zero will already have 309 "vetted" the string. */ 310 static UChar* get_AsciiZ ( Cursor* c ) { 311 UChar uc; 312 UChar* res = get_address_of_Cursor(c); 313 do { uc = get_UChar(c); } while (uc != 0); 314 return res; 315 } 316 317 static ULong peek_ULEB128 ( Cursor* c ) { 318 Word here = c->region_next; 319 ULong r = get_ULEB128( c ); 320 c->region_next = here; 321 return r; 322 } 323 static UChar peek_UChar ( Cursor* c ) { 324 Word here = c->region_next; 325 UChar r = get_UChar( c ); 326 c->region_next = here; 327 return r; 328 } 329 330 static ULong get_Dwarfish_UWord ( Cursor* c, Bool is_dw64 ) { 331 return is_dw64 ? get_ULong(c) : (ULong) get_UInt(c); 332 } 333 334 static UWord get_UWord ( Cursor* c ) { 335 vg_assert(sizeof(UWord) == sizeof(void*)); 336 if (sizeof(UWord) == 4) return get_UInt(c); 337 if (sizeof(UWord) == 8) return get_ULong(c); 338 vg_assert(0); 339 } 340 341 /* Read a DWARF3 'Initial Length' field */ 342 static ULong get_Initial_Length ( /*OUT*/Bool* is64, 343 Cursor* c, 344 HChar* barfMsg ) 345 { 346 ULong w64; 347 UInt w32; 348 *is64 = False; 349 w32 = get_UInt( c ); 350 if (w32 >= 0xFFFFFFF0 && w32 < 0xFFFFFFFF) { 351 c->barf( barfMsg ); 352 } 353 else if (w32 == 0xFFFFFFFF) { 354 *is64 = True; 355 w64 = get_ULong( c ); 356 } else { 357 *is64 = False; 358 w64 = (ULong)w32; 359 } 360 return w64; 361 } 362 363 364 /*------------------------------------------------------------*/ 365 /*--- ---*/ 366 /*--- "CUConst" structure ---*/ 367 /*--- ---*/ 368 /*------------------------------------------------------------*/ 369 370 #define N_ABBV_CACHE 32 371 372 /* Holds information that is constant through the parsing of a 373 Compilation Unit. This is basically plumbed through to 374 everywhere. */ 375 typedef 376 struct { 377 /* Call here if anything goes wrong */ 378 void (*barf)( HChar* ) __attribute__((noreturn)); 379 /* Is this 64-bit DWARF ? */ 380 Bool is_dw64; 381 /* Which DWARF version ? (2, 3 or 4) */ 382 UShort version; 383 /* Length of this Compilation Unit, as stated in the 384 .unit_length :: InitialLength field of the CU Header. 385 However, this size (as specified by the D3 spec) does not 386 include the size of the .unit_length field itself, which is 387 either 4 or 12 bytes (32-bit or 64-bit Dwarf3). That value 388 can be obtained through the expression ".is_dw64 ? 12 : 4". */ 389 ULong unit_length; 390 /* Offset of start of this unit in .debug_info */ 391 UWord cu_start_offset; 392 /* SVMA for this CU. In the D3 spec, is known as the "base 393 address of the compilation unit (last para sec 3.1.1). 394 Needed for (amongst things) interpretation of location-list 395 values. */ 396 Addr cu_svma; 397 Bool cu_svma_known; 398 /* The debug_abbreviations table to be used for this Unit */ 399 UChar* debug_abbv; 400 /* Upper bound on size thereof (an overestimate, in general) */ 401 UWord debug_abbv_maxszB; 402 /* Where is .debug_str ? */ 403 UChar* debug_str_img; 404 UWord debug_str_sz; 405 /* Where is .debug_ranges ? */ 406 UChar* debug_ranges_img; 407 UWord debug_ranges_sz; 408 /* Where is .debug_loc ? */ 409 UChar* debug_loc_img; 410 UWord debug_loc_sz; 411 /* Where is .debug_line? */ 412 UChar* debug_line_img; 413 UWord debug_line_sz; 414 /* Where is .debug_info? */ 415 UChar* debug_info_img; 416 UWord debug_info_sz; 417 /* --- Needed so we can add stuff to the string table. --- */ 418 struct _DebugInfo* di; 419 /* --- a cache for set_abbv_Cursor --- */ 420 /* abbv_code == (ULong)-1 for an unused entry. */ 421 struct { ULong abbv_code; UWord posn; } saC_cache[N_ABBV_CACHE]; 422 UWord saC_cache_queries; 423 UWord saC_cache_misses; 424 } 425 CUConst; 426 427 428 /*------------------------------------------------------------*/ 429 /*--- ---*/ 430 /*--- Helper functions for Guarded Expressions ---*/ 431 /*--- ---*/ 432 /*------------------------------------------------------------*/ 433 434 /* Parse the location list starting at img-offset 'debug_loc_offset' 435 in .debug_loc. Results are biased with 'svma_of_referencing_CU' 436 and so I believe are correct SVMAs for the object as a whole. This 437 function allocates the UChar*, and the caller must deallocate it. 438 The resulting block is in so-called Guarded-Expression format. 439 440 Guarded-Expression format is similar but not identical to the DWARF3 441 location-list format. The format of each returned block is: 442 443 UChar biasMe; 444 UChar isEnd; 445 followed by zero or more of 446 447 (Addr aMin; Addr aMax; UShort nbytes; ..bytes..; UChar isEnd) 448 449 '..bytes..' is an standard DWARF3 location expression which is 450 valid when aMin <= pc <= aMax (possibly after suitable biasing). 451 452 The number of bytes in '..bytes..' is nbytes. 453 454 The end of the sequence is marked by an isEnd == 1 value. All 455 previous isEnd values must be zero. 456 457 biasMe is 1 if the aMin/aMax fields need this DebugInfo's 458 text_bias added before use, and 0 if the GX is this is not 459 necessary (is ready to go). 460 461 Hence the block can be quickly parsed and is self-describing. Note 462 that aMax is 1 less than the corresponding value in a DWARF3 463 location list. Zero length ranges, with aMax == aMin-1, are not 464 allowed. 465 */ 466 /* 2008-sept-12: moved ML_(pp_GX) from here to d3basics.c, where 467 it more logically belongs. */ 468 469 470 /* Apply a text bias to a GX. */ 471 static void bias_GX ( /*MOD*/GExpr* gx, struct _DebugInfo* di ) 472 { 473 UShort nbytes; 474 Addr* pA; 475 UChar* p = &gx->payload[0]; 476 UChar uc; 477 uc = *p++; /*biasMe*/ 478 if (uc == 0) 479 return; 480 vg_assert(uc == 1); 481 p[-1] = 0; /* mark it as done */ 482 while (True) { 483 uc = *p++; 484 if (uc == 1) 485 break; /*isEnd*/ 486 vg_assert(uc == 0); 487 /* t-bias aMin */ 488 pA = (Addr*)p; 489 *pA += di->text_debug_bias; 490 p += sizeof(Addr); 491 /* t-bias aMax */ 492 pA = (Addr*)p; 493 *pA += di->text_debug_bias; 494 p += sizeof(Addr); 495 /* nbytes, and actual expression */ 496 nbytes = * (UShort*)p; p += sizeof(UShort); 497 p += nbytes; 498 } 499 } 500 501 __attribute__((noinline)) 502 static GExpr* make_singleton_GX ( UChar* block, UWord nbytes ) 503 { 504 SizeT bytesReqd; 505 GExpr* gx; 506 UChar *p, *pstart; 507 508 vg_assert(sizeof(UWord) == sizeof(Addr)); 509 vg_assert(nbytes <= 0xFFFF); /* else we overflow the nbytes field */ 510 bytesReqd 511 = sizeof(UChar) /*biasMe*/ + sizeof(UChar) /*!isEnd*/ 512 + sizeof(UWord) /*aMin*/ + sizeof(UWord) /*aMax*/ 513 + sizeof(UShort) /*nbytes*/ + nbytes 514 + sizeof(UChar); /*isEnd*/ 515 516 gx = ML_(dinfo_zalloc)( "di.readdwarf3.msGX.1", 517 sizeof(GExpr) + bytesReqd ); 518 vg_assert(gx); 519 520 p = pstart = &gx->payload[0]; 521 522 * ((UChar*)p) = 0; /*biasMe*/ p += sizeof(UChar); 523 * ((UChar*)p) = 0; /*!isEnd*/ p += sizeof(UChar); 524 * ((Addr*)p) = 0; /*aMin*/ p += sizeof(Addr); 525 * ((Addr*)p) = ~((Addr)0); /*aMax */ p += sizeof(Addr); 526 * ((UShort*)p) = (UShort)nbytes; /*nbytes*/ p += sizeof(UShort); 527 VG_(memcpy)(p, block, nbytes); p += nbytes; 528 * ((UChar*)p) = 1; /*isEnd*/ p += sizeof(UChar); 529 530 vg_assert( (SizeT)(p - pstart) == bytesReqd); 531 vg_assert( &gx->payload[bytesReqd] 532 == ((UChar*)gx) + sizeof(GExpr) + bytesReqd ); 533 534 return gx; 535 } 536 537 __attribute__((noinline)) 538 static GExpr* make_general_GX ( CUConst* cc, 539 Bool td3, 540 UWord debug_loc_offset, 541 Addr svma_of_referencing_CU ) 542 { 543 Addr base; 544 Cursor loc; 545 XArray* xa; /* XArray of UChar */ 546 GExpr* gx; 547 Word nbytes; 548 549 vg_assert(sizeof(UWord) == sizeof(Addr)); 550 if (cc->debug_loc_sz == 0) 551 cc->barf("make_general_GX: .debug_loc is empty/missing"); 552 553 init_Cursor( &loc, cc->debug_loc_img, 554 cc->debug_loc_sz, 0, cc->barf, 555 "Overrun whilst reading .debug_loc section(2)" ); 556 set_position_of_Cursor( &loc, debug_loc_offset ); 557 558 TRACE_D3("make_general_GX (.debug_loc_offset = %lu, img = %p) {\n", 559 debug_loc_offset, get_address_of_Cursor( &loc ) ); 560 561 /* Who frees this xa? It is freed before this fn exits. */ 562 xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.mgGX.1", 563 ML_(dinfo_free), 564 sizeof(UChar) ); 565 566 { UChar c = 1; /*biasMe*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); } 567 568 base = 0; 569 while (True) { 570 Bool acquire; 571 UWord len; 572 /* Read a (host-)word pair. This is something of a hack since 573 the word size to read is really dictated by the ELF file; 574 however, we assume we're reading a file with the same 575 word-sizeness as the host. Reasonably enough. */ 576 UWord w1 = get_UWord( &loc ); 577 UWord w2 = get_UWord( &loc ); 578 579 TRACE_D3(" %08lx %08lx\n", w1, w2); 580 if (w1 == 0 && w2 == 0) 581 break; /* end of list */ 582 583 if (w1 == -1UL) { 584 /* new value for 'base' */ 585 base = w2; 586 continue; 587 } 588 589 /* else a location expression follows */ 590 /* else enumerate [w1+base, w2+base) */ 591 /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc" 592 (sec 2.17.2) */ 593 if (w1 > w2) { 594 TRACE_D3("negative range is for .debug_loc expr at " 595 "file offset %lu\n", 596 debug_loc_offset); 597 cc->barf( "negative range in .debug_loc section" ); 598 } 599 600 /* ignore zero length ranges */ 601 acquire = w1 < w2; 602 len = (UWord)get_UShort( &loc ); 603 604 if (acquire) { 605 UWord w; 606 UShort s; 607 UChar c; 608 c = 0; /* !isEnd*/ 609 VG_(addBytesToXA)( xa, &c, sizeof(c) ); 610 w = w1 + base + svma_of_referencing_CU; 611 VG_(addBytesToXA)( xa, &w, sizeof(w) ); 612 w = w2 -1 + base + svma_of_referencing_CU; 613 VG_(addBytesToXA)( xa, &w, sizeof(w) ); 614 s = (UShort)len; 615 VG_(addBytesToXA)( xa, &s, sizeof(s) ); 616 } 617 618 while (len > 0) { 619 UChar byte = get_UChar( &loc ); 620 TRACE_D3("%02x", (UInt)byte); 621 if (acquire) 622 VG_(addBytesToXA)( xa, &byte, 1 ); 623 len--; 624 } 625 TRACE_D3("\n"); 626 } 627 628 { UChar c = 1; /*isEnd*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); } 629 630 nbytes = VG_(sizeXA)( xa ); 631 vg_assert(nbytes >= 1); 632 633 gx = ML_(dinfo_zalloc)( "di.readdwarf3.mgGX.2", sizeof(GExpr) + nbytes ); 634 vg_assert(gx); 635 VG_(memcpy)( &gx->payload[0], (UChar*)VG_(indexXA)(xa,0), nbytes ); 636 vg_assert( &gx->payload[nbytes] 637 == ((UChar*)gx) + sizeof(GExpr) + nbytes ); 638 639 VG_(deleteXA)( xa ); 640 641 TRACE_D3("}\n"); 642 643 return gx; 644 } 645 646 647 /*------------------------------------------------------------*/ 648 /*--- ---*/ 649 /*--- Helper functions for range lists and CU headers ---*/ 650 /*--- ---*/ 651 /*------------------------------------------------------------*/ 652 653 /* Denotes an address range. Both aMin and aMax are included in the 654 range; hence a complete range is (0, ~0) and an empty range is any 655 (X, X-1) for X > 0.*/ 656 typedef 657 struct { Addr aMin; Addr aMax; } 658 AddrRange; 659 660 661 /* Generate an arbitrary structural total ordering on 662 XArray* of AddrRange. */ 663 static Word cmp__XArrays_of_AddrRange ( XArray* rngs1, XArray* rngs2 ) 664 { 665 Word n1, n2, i; 666 tl_assert(rngs1 && rngs2); 667 n1 = VG_(sizeXA)( rngs1 ); 668 n2 = VG_(sizeXA)( rngs2 ); 669 if (n1 < n2) return -1; 670 if (n1 > n2) return 1; 671 for (i = 0; i < n1; i++) { 672 AddrRange* rng1 = (AddrRange*)VG_(indexXA)( rngs1, i ); 673 AddrRange* rng2 = (AddrRange*)VG_(indexXA)( rngs2, i ); 674 if (rng1->aMin < rng2->aMin) return -1; 675 if (rng1->aMin > rng2->aMin) return 1; 676 if (rng1->aMax < rng2->aMax) return -1; 677 if (rng1->aMax > rng2->aMax) return 1; 678 } 679 return 0; 680 } 681 682 683 __attribute__((noinline)) 684 static XArray* /* of AddrRange */ empty_range_list ( void ) 685 { 686 XArray* xa; /* XArray of AddrRange */ 687 /* Who frees this xa? varstack_preen() does. */ 688 xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.erl.1", 689 ML_(dinfo_free), 690 sizeof(AddrRange) ); 691 return xa; 692 } 693 694 695 __attribute__((noinline)) 696 static XArray* unitary_range_list ( Addr aMin, Addr aMax ) 697 { 698 XArray* xa; 699 AddrRange pair; 700 vg_assert(aMin <= aMax); 701 /* Who frees this xa? varstack_preen() does. */ 702 xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.url.1", 703 ML_(dinfo_free), 704 sizeof(AddrRange) ); 705 pair.aMin = aMin; 706 pair.aMax = aMax; 707 VG_(addToXA)( xa, &pair ); 708 return xa; 709 } 710 711 712 /* Enumerate the address ranges starting at img-offset 713 'debug_ranges_offset' in .debug_ranges. Results are biased with 714 'svma_of_referencing_CU' and so I believe are correct SVMAs for the 715 object as a whole. This function allocates the XArray, and the 716 caller must deallocate it. */ 717 __attribute__((noinline)) 718 static XArray* /* of AddrRange */ 719 get_range_list ( CUConst* cc, 720 Bool td3, 721 UWord debug_ranges_offset, 722 Addr svma_of_referencing_CU ) 723 { 724 Addr base; 725 Cursor ranges; 726 XArray* xa; /* XArray of AddrRange */ 727 AddrRange pair; 728 729 if (cc->debug_ranges_sz == 0) 730 cc->barf("get_range_list: .debug_ranges is empty/missing"); 731 732 init_Cursor( &ranges, cc->debug_ranges_img, 733 cc->debug_ranges_sz, 0, cc->barf, 734 "Overrun whilst reading .debug_ranges section(2)" ); 735 set_position_of_Cursor( &ranges, debug_ranges_offset ); 736 737 /* Who frees this xa? varstack_preen() does. */ 738 xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.grl.1", ML_(dinfo_free), 739 sizeof(AddrRange) ); 740 base = 0; 741 while (True) { 742 /* Read a (host-)word pair. This is something of a hack since 743 the word size to read is really dictated by the ELF file; 744 however, we assume we're reading a file with the same 745 word-sizeness as the host. Reasonably enough. */ 746 UWord w1 = get_UWord( &ranges ); 747 UWord w2 = get_UWord( &ranges ); 748 749 if (w1 == 0 && w2 == 0) 750 break; /* end of list. */ 751 752 if (w1 == -1UL) { 753 /* new value for 'base' */ 754 base = w2; 755 continue; 756 } 757 758 /* else enumerate [w1+base, w2+base) */ 759 /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc" 760 (sec 2.17.2) */ 761 if (w1 > w2) 762 cc->barf( "negative range in .debug_ranges section" ); 763 if (w1 < w2) { 764 pair.aMin = w1 + base + svma_of_referencing_CU; 765 pair.aMax = w2 - 1 + base + svma_of_referencing_CU; 766 vg_assert(pair.aMin <= pair.aMax); 767 VG_(addToXA)( xa, &pair ); 768 } 769 } 770 return xa; 771 } 772 773 774 /* Parse the Compilation Unit header indicated at 'c' and 775 initialise 'cc' accordingly. */ 776 static __attribute__((noinline)) 777 void parse_CU_Header ( /*OUT*/CUConst* cc, 778 Bool td3, 779 Cursor* c, 780 UChar* debug_abbv_img, UWord debug_abbv_sz ) 781 { 782 UChar address_size; 783 UWord debug_abbrev_offset; 784 Int i; 785 786 VG_(memset)(cc, 0, sizeof(*cc)); 787 vg_assert(c && c->barf); 788 cc->barf = c->barf; 789 790 /* initial_length field */ 791 cc->unit_length 792 = get_Initial_Length( &cc->is_dw64, c, 793 "parse_CU_Header: invalid initial-length field" ); 794 795 TRACE_D3(" Length: %lld\n", cc->unit_length ); 796 797 /* version */ 798 cc->version = get_UShort( c ); 799 if (cc->version != 2 && cc->version != 3 && cc->version != 4) 800 cc->barf( "parse_CU_Header: is neither DWARF2 nor DWARF3 nor DWARF4" ); 801 TRACE_D3(" Version: %d\n", (Int)cc->version ); 802 803 /* debug_abbrev_offset */ 804 debug_abbrev_offset = get_Dwarfish_UWord( c, cc->is_dw64 ); 805 if (debug_abbrev_offset >= debug_abbv_sz) 806 cc->barf( "parse_CU_Header: invalid debug_abbrev_offset" ); 807 TRACE_D3(" Abbrev Offset: %ld\n", debug_abbrev_offset ); 808 809 /* address size. If this isn't equal to the host word size, just 810 give up. This makes it safe to assume elsewhere that 811 DW_FORM_addr and DW_FORM_ref_addr can be treated as a host 812 word. */ 813 address_size = get_UChar( c ); 814 if (address_size != sizeof(void*)) 815 cc->barf( "parse_CU_Header: invalid address_size" ); 816 TRACE_D3(" Pointer Size: %d\n", (Int)address_size ); 817 818 /* Set up so that cc->debug_abbv points to the relevant table for 819 this CU. Set the szB so that at least we can't read off the end 820 of the debug_abbrev section -- potentially (and quite likely) 821 too big, if this isn't the last table in the section, but at 822 least it's safe. */ 823 cc->debug_abbv = debug_abbv_img + debug_abbrev_offset; 824 cc->debug_abbv_maxszB = debug_abbv_sz - debug_abbrev_offset; 825 /* and empty out the set_abbv_Cursor cache */ 826 if (0) VG_(printf)("XXXXXX initialise set_abbv_Cursor cache\n"); 827 for (i = 0; i < N_ABBV_CACHE; i++) { 828 cc->saC_cache[i].abbv_code = (ULong)-1; /* unused */ 829 cc->saC_cache[i].posn = 0; 830 } 831 cc->saC_cache_queries = 0; 832 cc->saC_cache_misses = 0; 833 } 834 835 836 /* Set up 'c' so it is ready to parse the abbv table entry code 837 'abbv_code' for this compilation unit. */ 838 static __attribute__((noinline)) 839 void set_abbv_Cursor ( /*OUT*/Cursor* c, Bool td3, 840 CUConst* cc, ULong abbv_code ) 841 { 842 Int i; 843 ULong acode; 844 845 if (abbv_code == 0) 846 cc->barf("set_abbv_Cursor: abbv_code == 0" ); 847 848 /* (ULong)-1 is used to represent an empty cache slot. So we can't 849 allow it. In any case no valid DWARF3 should make a reference 850 to a negative abbreviation code. [at least, they always seem to 851 be numbered upwards from zero as far as I have seen] */ 852 vg_assert(abbv_code != (ULong)-1); 853 854 /* First search the cache. */ 855 if (0) VG_(printf)("XXXXXX search set_abbv_Cursor cache\n"); 856 cc->saC_cache_queries++; 857 for (i = 0; i < N_ABBV_CACHE; i++) { 858 /* No need to test the cached abbv_codes for -1 (empty), since 859 we just asserted that abbv_code is not -1. */ 860 if (cc->saC_cache[i].abbv_code == abbv_code) { 861 /* Found it. Cool. Set up the parser using the cached 862 position, and move this cache entry 1 step closer to the 863 front. */ 864 if (0) VG_(printf)("XXXXXX found in set_abbv_Cursor cache\n"); 865 init_Cursor( c, cc->debug_abbv, 866 cc->debug_abbv_maxszB, cc->saC_cache[i].posn, 867 cc->barf, 868 "Overrun whilst parsing .debug_abbrev section(1)" ); 869 if (i > 0) { 870 ULong t_abbv_code = cc->saC_cache[i].abbv_code; 871 UWord t_posn = cc->saC_cache[i].posn; 872 while (i > 0) { 873 cc->saC_cache[i] = cc->saC_cache[i-1]; 874 cc->saC_cache[0].abbv_code = t_abbv_code; 875 cc->saC_cache[0].posn = t_posn; 876 i--; 877 } 878 } 879 return; 880 } 881 } 882 883 /* No. It's not in the cache. We have to search through 884 .debug_abbrev, of course taking care to update the cache 885 when done. */ 886 887 cc->saC_cache_misses++; 888 init_Cursor( c, cc->debug_abbv, cc->debug_abbv_maxszB, 0, cc->barf, 889 "Overrun whilst parsing .debug_abbrev section(2)" ); 890 891 /* Now iterate though the table until we find the requested 892 entry. */ 893 while (True) { 894 //ULong atag; 895 //UInt has_children; 896 acode = get_ULEB128( c ); 897 if (acode == 0) break; /* end of the table */ 898 if (acode == abbv_code) break; /* found it */ 899 /*atag = */ get_ULEB128( c ); 900 /*has_children = */ get_UChar( c ); 901 //TRACE_D3(" %llu %s [%s]\n", 902 // acode, pp_DW_TAG(atag), pp_DW_children(has_children)); 903 while (True) { 904 ULong at_name = get_ULEB128( c ); 905 ULong at_form = get_ULEB128( c ); 906 if (at_name == 0 && at_form == 0) break; 907 //TRACE_D3(" %18s %s\n", 908 // pp_DW_AT(at_name), pp_DW_FORM(at_form)); 909 } 910 } 911 912 if (acode == 0) { 913 /* Not found. This is fatal. */ 914 cc->barf("set_abbv_Cursor: abbv_code not found"); 915 } 916 917 /* Otherwise, 'c' is now set correctly to parse the relevant entry, 918 starting from the abbreviation entry's tag. So just cache 919 the result, and return. */ 920 for (i = N_ABBV_CACHE-1; i > N_ABBV_CACHE/2; i--) { 921 cc->saC_cache[i] = cc->saC_cache[i-1]; 922 } 923 if (0) VG_(printf)("XXXXXX update set_abbv_Cursor cache\n"); 924 cc->saC_cache[N_ABBV_CACHE/2].abbv_code = abbv_code; 925 cc->saC_cache[N_ABBV_CACHE/2].posn = get_position_of_Cursor(c); 926 } 927 928 929 /* From 'c', get the Form data into the lowest 1/2/4/8 bytes of *cts. 930 931 If *cts itself contains the entire result, then *ctsSzB is set to 932 1,2,4 or 8 accordingly and *ctsMemSzB is set to zero. 933 934 Alternatively, the result can be a block of data (in the 935 transiently mapped-in object, so-called "image" space). If so then 936 the lowest sizeof(void*)/8 bytes of *cts hold a pointer to said 937 image, *ctsSzB is zero, and *ctsMemSzB is the size of the block. 938 939 Unfortunately this means it is impossible to represent a zero-size 940 image block since that would have *ctsSzB == 0 and *ctsMemSzB == 0 941 and so is ambiguous (which case it is?) 942 943 Invariant on successful return: 944 (*ctsSzB > 0 && *ctsMemSzB == 0) 945 || (*ctsSzB == 0 && *ctsMemSzB > 0) 946 */ 947 static 948 void get_Form_contents ( /*OUT*/ULong* cts, 949 /*OUT*/Int* ctsSzB, 950 /*OUT*/UWord* ctsMemSzB, 951 CUConst* cc, Cursor* c, 952 Bool td3, DW_FORM form ) 953 { 954 *cts = 0; 955 *ctsSzB = 0; 956 *ctsMemSzB = 0; 957 switch (form) { 958 case DW_FORM_data1: 959 *cts = (ULong)(UChar)get_UChar(c); 960 *ctsSzB = 1; 961 TRACE_D3("%u", (UInt)*cts); 962 break; 963 case DW_FORM_data2: 964 *cts = (ULong)(UShort)get_UShort(c); 965 *ctsSzB = 2; 966 TRACE_D3("%u", (UInt)*cts); 967 break; 968 case DW_FORM_data4: 969 *cts = (ULong)(UInt)get_UInt(c); 970 *ctsSzB = 4; 971 TRACE_D3("%u", (UInt)*cts); 972 break; 973 case DW_FORM_data8: 974 *cts = get_ULong(c); 975 *ctsSzB = 8; 976 TRACE_D3("%llu", *cts); 977 break; 978 case DW_FORM_sec_offset: 979 *cts = (ULong)get_Dwarfish_UWord( c, cc->is_dw64 ); 980 *ctsSzB = cc->is_dw64 ? 8 : 4; 981 TRACE_D3("%llu", *cts); 982 break; 983 case DW_FORM_sdata: 984 *cts = (ULong)(Long)get_SLEB128(c); 985 *ctsSzB = 8; 986 TRACE_D3("%lld", (Long)*cts); 987 break; 988 case DW_FORM_udata: 989 *cts = (ULong)(Long)get_ULEB128(c); 990 *ctsSzB = 8; 991 TRACE_D3("%llu", (Long)*cts); 992 break; 993 case DW_FORM_addr: 994 /* note, this is a hack. DW_FORM_addr is defined as getting 995 a word the size of the target machine as defined by the 996 address_size field in the CU Header. However, 997 parse_CU_Header() rejects all inputs except those for 998 which address_size == sizeof(Word), hence we can just 999 treat it as a (host) Word. */ 1000 *cts = (ULong)(UWord)get_UWord(c); 1001 *ctsSzB = sizeof(UWord); 1002 TRACE_D3("0x%lx", (UWord)*cts); 1003 break; 1004 1005 case DW_FORM_ref_addr: 1006 /* We make the same word-size assumption as DW_FORM_addr. */ 1007 /* What does this really mean? From D3 Sec 7.5.4, 1008 description of "reference", it would appear to reference 1009 some other DIE, by specifying the offset from the 1010 beginning of a .debug_info section. The D3 spec mentions 1011 that this might be in some other shared object and 1012 executable. But I don't see how the name of the other 1013 object/exe is specified. 1014 1015 At least for the DW_FORM_ref_addrs created by icc11, the 1016 references seem to be within the same object/executable. 1017 So for the moment we merely range-check, to see that they 1018 actually do specify a plausible offset within this 1019 object's .debug_info, and return the value unchanged. 1020 */ 1021 *cts = (ULong)(UWord)get_UWord(c); 1022 *ctsSzB = sizeof(UWord); 1023 TRACE_D3("0x%lx", (UWord)*cts); 1024 if (0) VG_(printf)("DW_FORM_ref_addr 0x%lx\n", (UWord)*cts); 1025 if (/* the following 2 are surely impossible, but ... */ 1026 cc->debug_info_img == NULL || cc->debug_info_sz == 0 1027 || *cts >= (ULong)cc->debug_info_sz) { 1028 /* Hmm. Offset is nonsensical for this object's .debug_info 1029 section. Be safe and reject it. */ 1030 cc->barf("get_Form_contents: DW_FORM_ref_addr points " 1031 "outside .debug_info"); 1032 } 1033 break; 1034 1035 case DW_FORM_strp: { 1036 /* this is an offset into .debug_str */ 1037 UChar* str; 1038 UWord uw = (UWord)get_Dwarfish_UWord( c, cc->is_dw64 ); 1039 if (cc->debug_str_img == NULL || uw >= cc->debug_str_sz) 1040 cc->barf("get_Form_contents: DW_FORM_strp " 1041 "points outside .debug_str"); 1042 /* FIXME: check the entire string lies inside debug_str, 1043 not just the first byte of it. */ 1044 str = (UChar*)cc->debug_str_img + uw; 1045 TRACE_D3("(indirect string, offset: 0x%lx): %s", uw, str); 1046 *cts = (ULong)(UWord)str; 1047 *ctsMemSzB = 1 + (ULong)VG_(strlen)(str); 1048 break; 1049 } 1050 case DW_FORM_string: { 1051 UChar* str = get_AsciiZ(c); 1052 TRACE_D3("%s", str); 1053 *cts = (ULong)(UWord)str; 1054 /* strlen is safe because get_AsciiZ already 'vetted' the 1055 entire string */ 1056 *ctsMemSzB = 1 + (ULong)VG_(strlen)(str); 1057 break; 1058 } 1059 case DW_FORM_ref1: { 1060 UChar u8 = get_UChar(c); 1061 UWord res = cc->cu_start_offset + (UWord)u8; 1062 *cts = (ULong)res; 1063 *ctsSzB = sizeof(UWord); 1064 TRACE_D3("<%lx>", res); 1065 break; 1066 } 1067 case DW_FORM_ref2: { 1068 UShort u16 = get_UShort(c); 1069 UWord res = cc->cu_start_offset + (UWord)u16; 1070 *cts = (ULong)res; 1071 *ctsSzB = sizeof(UWord); 1072 TRACE_D3("<%lx>", res); 1073 break; 1074 } 1075 case DW_FORM_ref4: { 1076 UInt u32 = get_UInt(c); 1077 UWord res = cc->cu_start_offset + (UWord)u32; 1078 *cts = (ULong)res; 1079 *ctsSzB = sizeof(UWord); 1080 TRACE_D3("<%lx>", res); 1081 break; 1082 } 1083 case DW_FORM_ref8: { 1084 ULong u64 = get_ULong(c); 1085 UWord res = cc->cu_start_offset + (UWord)u64; 1086 *cts = (ULong)res; 1087 *ctsSzB = sizeof(UWord); 1088 TRACE_D3("<%lx>", res); 1089 break; 1090 } 1091 case DW_FORM_ref_udata: { 1092 ULong u64 = get_ULEB128(c); 1093 UWord res = cc->cu_start_offset + (UWord)u64; 1094 *cts = (ULong)res; 1095 *ctsSzB = sizeof(UWord); 1096 TRACE_D3("<%lx>", res); 1097 break; 1098 } 1099 case DW_FORM_flag: { 1100 UChar u8 = get_UChar(c); 1101 TRACE_D3("%u", (UInt)u8); 1102 *cts = (ULong)u8; 1103 *ctsSzB = 1; 1104 break; 1105 } 1106 case DW_FORM_flag_present: 1107 TRACE_D3("1"); 1108 *cts = 1; 1109 *ctsSzB = 1; 1110 break; 1111 case DW_FORM_block1: { 1112 ULong u64b; 1113 ULong u64 = (ULong)get_UChar(c); 1114 UChar* block = get_address_of_Cursor(c); 1115 TRACE_D3("%llu byte block: ", u64); 1116 for (u64b = u64; u64b > 0; u64b--) { 1117 UChar u8 = get_UChar(c); 1118 TRACE_D3("%x ", (UInt)u8); 1119 } 1120 *cts = (ULong)(UWord)block; 1121 *ctsMemSzB = (UWord)u64; 1122 break; 1123 } 1124 case DW_FORM_block2: { 1125 ULong u64b; 1126 ULong u64 = (ULong)get_UShort(c); 1127 UChar* block = get_address_of_Cursor(c); 1128 TRACE_D3("%llu byte block: ", u64); 1129 for (u64b = u64; u64b > 0; u64b--) { 1130 UChar u8 = get_UChar(c); 1131 TRACE_D3("%x ", (UInt)u8); 1132 } 1133 *cts = (ULong)(UWord)block; 1134 *ctsMemSzB = (UWord)u64; 1135 break; 1136 } 1137 case DW_FORM_block4: { 1138 ULong u64b; 1139 ULong u64 = (ULong)get_UInt(c); 1140 UChar* block = get_address_of_Cursor(c); 1141 TRACE_D3("%llu byte block: ", u64); 1142 for (u64b = u64; u64b > 0; u64b--) { 1143 UChar u8 = get_UChar(c); 1144 TRACE_D3("%x ", (UInt)u8); 1145 } 1146 *cts = (ULong)(UWord)block; 1147 *ctsMemSzB = (UWord)u64; 1148 break; 1149 } 1150 case DW_FORM_exprloc: 1151 case DW_FORM_block: { 1152 ULong u64b; 1153 ULong u64 = (ULong)get_ULEB128(c); 1154 UChar* block = get_address_of_Cursor(c); 1155 TRACE_D3("%llu byte block: ", u64); 1156 for (u64b = u64; u64b > 0; u64b--) { 1157 UChar u8 = get_UChar(c); 1158 TRACE_D3("%x ", (UInt)u8); 1159 } 1160 *cts = (ULong)(UWord)block; 1161 *ctsMemSzB = (UWord)u64; 1162 break; 1163 } 1164 case DW_FORM_ref_sig8: { 1165 ULong u64b; 1166 UChar* block = get_address_of_Cursor(c); 1167 TRACE_D3("8 byte signature: "); 1168 for (u64b = 8; u64b > 0; u64b--) { 1169 UChar u8 = get_UChar(c); 1170 TRACE_D3("%x ", (UInt)u8); 1171 } 1172 *cts = (ULong)(UWord)block; 1173 *ctsMemSzB = 8; 1174 break; 1175 } 1176 case DW_FORM_indirect: 1177 get_Form_contents (cts, ctsSzB, ctsMemSzB, cc, c, td3, 1178 (DW_FORM)get_ULEB128(c)); 1179 return; 1180 1181 default: 1182 VG_(printf)( 1183 "get_Form_contents: unhandled %d (%s) at <%lx>\n", 1184 form, ML_(pp_DW_FORM)(form), get_position_of_Cursor(c)); 1185 c->barf("get_Form_contents: unhandled DW_FORM"); 1186 } 1187 } 1188 1189 1190 /*------------------------------------------------------------*/ 1191 /*--- ---*/ 1192 /*--- Parsing of variable-related DIEs ---*/ 1193 /*--- ---*/ 1194 /*------------------------------------------------------------*/ 1195 1196 typedef 1197 struct _TempVar { 1198 UChar* name; /* in DebugInfo's .strchunks */ 1199 /* Represent ranges economically. nRanges is the number of 1200 ranges. Cases: 1201 0: .rngOneMin .rngOneMax .manyRanges are all zero 1202 1: .rngOneMin .rngOneMax hold the range; .rngMany is NULL 1203 2: .rngOneMin .rngOneMax are zero; .rngMany holds the ranges. 1204 This is merely an optimisation to avoid having to allocate 1205 and free the XArray in the common (98%) of cases where there 1206 is zero or one address ranges. */ 1207 UWord nRanges; 1208 Addr rngOneMin; 1209 Addr rngOneMax; 1210 XArray* rngMany; /* of AddrRange. NON-UNIQUE PTR in AR_DINFO. */ 1211 /* Do not free .rngMany, since many TempVars will have the same 1212 value. Instead the associated storage is to be freed by 1213 deleting 'rangetree', which stores a single copy of each 1214 range. */ 1215 /* --- */ 1216 Int level; 1217 UWord typeR; /* a cuOff */ 1218 GExpr* gexpr; /* for this variable */ 1219 GExpr* fbGX; /* to find the frame base of the enclosing fn, if 1220 any */ 1221 UChar* fName; /* declaring file name, or NULL */ 1222 Int fLine; /* declaring file line number, or zero */ 1223 /* offset in .debug_info, so that abstract instances can be 1224 found to satisfy references from concrete instances. */ 1225 UWord dioff; 1226 UWord absOri; /* so the absOri fields refer to dioff fields 1227 in some other, related TempVar. */ 1228 } 1229 TempVar; 1230 1231 #define N_D3_VAR_STACK 48 1232 1233 typedef 1234 struct { 1235 /* Contains the range stack: a stack of address ranges, one 1236 stack entry for each nested scope. 1237 1238 Some scope entries are created by function definitions 1239 (DW_AT_subprogram), and for those, we also note the GExpr 1240 derived from its DW_AT_frame_base attribute, if any. 1241 Consequently it should be possible to find, for any 1242 variable's DIE, the GExpr for the the containing function's 1243 DW_AT_frame_base by scanning back through the stack to find 1244 the nearest entry associated with a function. This somewhat 1245 elaborate scheme is provided so as to make it possible to 1246 obtain the correct DW_AT_frame_base expression even in the 1247 presence of nested functions (or to be more precise, in the 1248 presence of nested DW_AT_subprogram DIEs). 1249 */ 1250 Int sp; /* [sp] is innermost active entry; sp==-1 for empty 1251 stack */ 1252 XArray* ranges[N_D3_VAR_STACK]; /* XArray of AddrRange */ 1253 Int level[N_D3_VAR_STACK]; /* D3 DIE levels */ 1254 Bool isFunc[N_D3_VAR_STACK]; /* from DW_AT_subprogram? */ 1255 GExpr* fbGX[N_D3_VAR_STACK]; /* if isFunc, contains the FB 1256 expr, else NULL */ 1257 /* The file name table. Is a mapping from integer index to the 1258 (permanent) copy of the string, iow a non-img area. */ 1259 XArray* /* of UChar* */ filenameTable; 1260 } 1261 D3VarParser; 1262 1263 static void varstack_show ( D3VarParser* parser, HChar* str ) { 1264 Word i, j; 1265 VG_(printf)(" varstack (%s) {\n", str); 1266 for (i = 0; i <= parser->sp; i++) { 1267 XArray* xa = parser->ranges[i]; 1268 vg_assert(xa); 1269 VG_(printf)(" [%ld] (level %d)", i, parser->level[i]); 1270 if (parser->isFunc[i]) { 1271 VG_(printf)(" (fbGX=%p)", parser->fbGX[i]); 1272 } else { 1273 vg_assert(parser->fbGX[i] == NULL); 1274 } 1275 VG_(printf)(": "); 1276 if (VG_(sizeXA)( xa ) == 0) { 1277 VG_(printf)("** empty PC range array **"); 1278 } else { 1279 for (j = 0; j < VG_(sizeXA)( xa ); j++) { 1280 AddrRange* range = (AddrRange*) VG_(indexXA)( xa, j ); 1281 vg_assert(range); 1282 VG_(printf)("[%#lx,%#lx] ", range->aMin, range->aMax); 1283 } 1284 } 1285 VG_(printf)("\n"); 1286 } 1287 VG_(printf)(" }\n"); 1288 } 1289 1290 /* Remove from the stack, all entries with .level > 'level' */ 1291 static 1292 void varstack_preen ( D3VarParser* parser, Bool td3, Int level ) 1293 { 1294 Bool changed = False; 1295 vg_assert(parser->sp < N_D3_VAR_STACK); 1296 while (True) { 1297 vg_assert(parser->sp >= -1); 1298 if (parser->sp == -1) break; 1299 if (parser->level[parser->sp] <= level) break; 1300 if (0) 1301 TRACE_D3("BBBBAAAA varstack_pop [newsp=%d]\n", parser->sp-1); 1302 vg_assert(parser->ranges[parser->sp]); 1303 /* Who allocated this xa? get_range_list() or 1304 unitary_range_list(). */ 1305 VG_(deleteXA)( parser->ranges[parser->sp] ); 1306 parser->ranges[parser->sp] = NULL; 1307 parser->level[parser->sp] = 0; 1308 parser->isFunc[parser->sp] = False; 1309 parser->fbGX[parser->sp] = NULL; 1310 parser->sp--; 1311 changed = True; 1312 } 1313 if (changed && td3) 1314 varstack_show( parser, "after preen" ); 1315 } 1316 1317 static void varstack_push ( CUConst* cc, 1318 D3VarParser* parser, 1319 Bool td3, 1320 XArray* ranges, Int level, 1321 Bool isFunc, GExpr* fbGX ) { 1322 if (0) 1323 TRACE_D3("BBBBAAAA varstack_push[newsp=%d]: %d %p\n", 1324 parser->sp+1, level, ranges); 1325 1326 /* First we need to zap everything >= 'level', as we are about to 1327 replace any previous entry at 'level', so .. */ 1328 varstack_preen(parser, /*td3*/False, level-1); 1329 1330 vg_assert(parser->sp >= -1); 1331 vg_assert(parser->sp < N_D3_VAR_STACK); 1332 if (parser->sp == N_D3_VAR_STACK-1) 1333 cc->barf("varstack_push: N_D3_VAR_STACK is too low; " 1334 "increase and recompile"); 1335 if (parser->sp >= 0) 1336 vg_assert(parser->level[parser->sp] < level); 1337 parser->sp++; 1338 vg_assert(parser->ranges[parser->sp] == NULL); 1339 vg_assert(parser->level[parser->sp] == 0); 1340 vg_assert(parser->isFunc[parser->sp] == False); 1341 vg_assert(parser->fbGX[parser->sp] == NULL); 1342 vg_assert(ranges != NULL); 1343 if (!isFunc) vg_assert(fbGX == NULL); 1344 parser->ranges[parser->sp] = ranges; 1345 parser->level[parser->sp] = level; 1346 parser->isFunc[parser->sp] = isFunc; 1347 parser->fbGX[parser->sp] = fbGX; 1348 if (td3) 1349 varstack_show( parser, "after push" ); 1350 } 1351 1352 1353 /* cts, ctsSzB, ctsMemSzB are derived from a DW_AT_location and so 1354 refer either to a location expression or to a location list. 1355 Figure out which, and in both cases bundle the expression or 1356 location list into a so-called GExpr (guarded expression). */ 1357 __attribute__((noinline)) 1358 static GExpr* get_GX ( CUConst* cc, Bool td3, 1359 ULong cts, Int ctsSzB, UWord ctsMemSzB ) 1360 { 1361 GExpr* gexpr = NULL; 1362 if (ctsMemSzB > 0 && ctsSzB == 0) { 1363 /* represents an in-line location expression, and cts points 1364 right at it */ 1365 gexpr = make_singleton_GX( (UChar*)(UWord)cts, ctsMemSzB ); 1366 } 1367 else 1368 if (ctsMemSzB == 0 && ctsSzB > 0) { 1369 /* represents location list. cts is the offset of it in 1370 .debug_loc. */ 1371 if (!cc->cu_svma_known) 1372 cc->barf("get_GX: location list, but CU svma is unknown"); 1373 gexpr = make_general_GX( cc, td3, (UWord)cts, cc->cu_svma ); 1374 } 1375 else { 1376 vg_assert(0); /* else caller is bogus */ 1377 } 1378 return gexpr; 1379 } 1380 1381 1382 static 1383 void read_filename_table( /*MOD*/D3VarParser* parser, 1384 CUConst* cc, UWord debug_line_offset, 1385 Bool td3 ) 1386 { 1387 Bool is_dw64; 1388 Cursor c; 1389 Word i; 1390 UShort version; 1391 UChar opcode_base; 1392 UChar* str; 1393 1394 vg_assert(parser && cc && cc->barf); 1395 if ((!cc->debug_line_img) 1396 || cc->debug_line_sz <= debug_line_offset) 1397 cc->barf("read_filename_table: .debug_line is missing?"); 1398 1399 init_Cursor( &c, cc->debug_line_img, 1400 cc->debug_line_sz, debug_line_offset, cc->barf, 1401 "Overrun whilst reading .debug_line section(1)" ); 1402 1403 /* unit_length = */ 1404 get_Initial_Length( &is_dw64, &c, 1405 "read_filename_table: invalid initial-length field" ); 1406 version = get_UShort( &c ); 1407 if (version != 2 && version != 3 && version != 4) 1408 cc->barf("read_filename_table: Only DWARF version 2, 3 and 4 line info " 1409 "is currently supported."); 1410 /*header_length = (ULong)*/ get_Dwarfish_UWord( &c, is_dw64 ); 1411 /*minimum_instruction_length = */ get_UChar( &c ); 1412 if (version >= 4) 1413 /*maximum_operations_per_insn = */ get_UChar( &c ); 1414 /*default_is_stmt = */ get_UChar( &c ); 1415 /*line_base = (Char)*/ get_UChar( &c ); 1416 /*line_range = */ get_UChar( &c ); 1417 opcode_base = get_UChar( &c ); 1418 /* skip over "standard_opcode_lengths" */ 1419 for (i = 1; i < (Word)opcode_base; i++) 1420 (void)get_UChar( &c ); 1421 1422 /* skip over the directory names table */ 1423 while (peek_UChar(&c) != 0) { 1424 (void)get_AsciiZ(&c); 1425 } 1426 (void)get_UChar(&c); /* skip terminating zero */ 1427 1428 /* Read and record the file names table */ 1429 vg_assert(parser->filenameTable); 1430 vg_assert( VG_(sizeXA)( parser->filenameTable ) == 0 ); 1431 /* Add a dummy index-zero entry. DWARF3 numbers its files 1432 from 1, for some reason. */ 1433 str = ML_(addStr)( cc->di, "<unknown_file>", -1 ); 1434 VG_(addToXA)( parser->filenameTable, &str ); 1435 while (peek_UChar(&c) != 0) { 1436 str = get_AsciiZ(&c); 1437 TRACE_D3(" read_filename_table: %ld %s\n", 1438 VG_(sizeXA)(parser->filenameTable), str); 1439 str = ML_(addStr)( cc->di, str, -1 ); 1440 VG_(addToXA)( parser->filenameTable, &str ); 1441 (void)get_ULEB128( &c ); /* skip directory index # */ 1442 (void)get_ULEB128( &c ); /* skip last mod time */ 1443 (void)get_ULEB128( &c ); /* file size */ 1444 } 1445 /* We're done! The rest of it is not interesting. */ 1446 } 1447 1448 1449 __attribute__((noinline)) 1450 static void parse_var_DIE ( 1451 /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree, 1452 /*MOD*/XArray* /* of TempVar* */ tempvars, 1453 /*MOD*/XArray* /* of GExpr* */ gexprs, 1454 /*MOD*/D3VarParser* parser, 1455 DW_TAG dtag, 1456 UWord posn, 1457 Int level, 1458 Cursor* c_die, 1459 Cursor* c_abbv, 1460 CUConst* cc, 1461 Bool td3 1462 ) 1463 { 1464 ULong cts; 1465 Int ctsSzB; 1466 UWord ctsMemSzB; 1467 1468 UWord saved_die_c_offset = get_position_of_Cursor( c_die ); 1469 UWord saved_abbv_c_offset = get_position_of_Cursor( c_abbv ); 1470 1471 varstack_preen( parser, td3, level-1 ); 1472 1473 if (dtag == DW_TAG_compile_unit) { 1474 Bool have_lo = False; 1475 Bool have_hi1 = False; 1476 Bool have_range = False; 1477 Addr ip_lo = 0; 1478 Addr ip_hi1 = 0; 1479 Addr rangeoff = 0; 1480 while (True) { 1481 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 1482 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 1483 if (attr == 0 && form == 0) break; 1484 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 1485 cc, c_die, False/*td3*/, form ); 1486 if (attr == DW_AT_low_pc && ctsSzB > 0) { 1487 ip_lo = cts; 1488 have_lo = True; 1489 } 1490 if (attr == DW_AT_high_pc && ctsSzB > 0) { 1491 ip_hi1 = cts; 1492 have_hi1 = True; 1493 } 1494 if (attr == DW_AT_ranges && ctsSzB > 0) { 1495 rangeoff = cts; 1496 have_range = True; 1497 } 1498 if (attr == DW_AT_stmt_list && ctsSzB > 0) { 1499 read_filename_table( parser, cc, (UWord)cts, td3 ); 1500 } 1501 } 1502 /* Now, does this give us an opportunity to find this 1503 CU's svma? */ 1504 #if 0 1505 if (level == 0 && have_lo) { 1506 vg_assert(!cc->cu_svma_known); /* if this fails, it must be 1507 because we've already seen a DW_TAG_compile_unit DIE at level 1508 0. But that can't happen, because DWARF3 only allows exactly 1509 one top level DIE per CU. */ 1510 cc->cu_svma_known = True; 1511 cc->cu_svma = ip_lo; 1512 if (1) 1513 TRACE_D3("BBBBAAAA acquire CU_SVMA of %p\n", cc->cu_svma); 1514 /* Now, it may be that this DIE doesn't tell us the CU's 1515 SVMA, by way of not having a DW_AT_low_pc. That's OK -- 1516 the CU doesn't *have* to have its SVMA specified. 1517 1518 But as per last para D3 spec sec 3.1.1 ("Normal and 1519 Partial Compilation Unit Entries", "If the base address 1520 (viz, the SVMA) is undefined, then any DWARF entry of 1521 structure defined interms of the base address of that 1522 compilation unit is not valid.". So that means, if whilst 1523 processing the children of this top level DIE (or their 1524 children, etc) we see a DW_AT_range, and cu_svma_known is 1525 False, then the DIE that contains it is (per the spec) 1526 invalid, and we can legitimately stop and complain. */ 1527 } 1528 #else 1529 /* .. whereas The Reality is, simply assume the SVMA is zero 1530 if it isn't specified. */ 1531 if (level == 0) { 1532 vg_assert(!cc->cu_svma_known); 1533 cc->cu_svma_known = True; 1534 if (have_lo) 1535 cc->cu_svma = ip_lo; 1536 else 1537 cc->cu_svma = 0; 1538 } 1539 #endif 1540 /* Do we have something that looks sane? */ 1541 if (have_lo && have_hi1 && (!have_range)) { 1542 if (ip_lo < ip_hi1) 1543 varstack_push( cc, parser, td3, 1544 unitary_range_list(ip_lo, ip_hi1 - 1), 1545 level, 1546 False/*isFunc*/, NULL/*fbGX*/ ); 1547 } else 1548 if ((!have_lo) && (!have_hi1) && have_range) { 1549 varstack_push( cc, parser, td3, 1550 get_range_list( cc, td3, 1551 rangeoff, cc->cu_svma ), 1552 level, 1553 False/*isFunc*/, NULL/*fbGX*/ ); 1554 } else 1555 if ((!have_lo) && (!have_hi1) && (!have_range)) { 1556 /* CU has no code, presumably? */ 1557 varstack_push( cc, parser, td3, 1558 empty_range_list(), 1559 level, 1560 False/*isFunc*/, NULL/*fbGX*/ ); 1561 } else 1562 if (have_lo && (!have_hi1) && have_range && ip_lo == 0) { 1563 /* broken DIE created by gcc-4.3.X ? Ignore the 1564 apparently-redundant DW_AT_low_pc and use the DW_AT_ranges 1565 instead. */ 1566 varstack_push( cc, parser, td3, 1567 get_range_list( cc, td3, 1568 rangeoff, cc->cu_svma ), 1569 level, 1570 False/*isFunc*/, NULL/*fbGX*/ ); 1571 } else { 1572 if (0) VG_(printf)("I got hlo %d hhi1 %d hrange %d\n", 1573 (Int)have_lo, (Int)have_hi1, (Int)have_range); 1574 goto bad_DIE; 1575 } 1576 } 1577 1578 if (dtag == DW_TAG_lexical_block || dtag == DW_TAG_subprogram) { 1579 Bool have_lo = False; 1580 Bool have_hi1 = False; 1581 Bool have_range = False; 1582 Addr ip_lo = 0; 1583 Addr ip_hi1 = 0; 1584 Addr rangeoff = 0; 1585 Bool isFunc = dtag == DW_TAG_subprogram; 1586 GExpr* fbGX = NULL; 1587 while (True) { 1588 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 1589 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 1590 if (attr == 0 && form == 0) break; 1591 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 1592 cc, c_die, False/*td3*/, form ); 1593 if (attr == DW_AT_low_pc && ctsSzB > 0) { 1594 ip_lo = cts; 1595 have_lo = True; 1596 } 1597 if (attr == DW_AT_high_pc && ctsSzB > 0) { 1598 ip_hi1 = cts; 1599 have_hi1 = True; 1600 } 1601 if (attr == DW_AT_ranges && ctsSzB > 0) { 1602 rangeoff = cts; 1603 have_range = True; 1604 } 1605 if (isFunc 1606 && attr == DW_AT_frame_base 1607 && ((ctsMemSzB > 0 && ctsSzB == 0) 1608 || (ctsMemSzB == 0 && ctsSzB > 0))) { 1609 fbGX = get_GX( cc, False/*td3*/, cts, ctsSzB, ctsMemSzB ); 1610 vg_assert(fbGX); 1611 VG_(addToXA)(gexprs, &fbGX); 1612 } 1613 } 1614 /* Do we have something that looks sane? */ 1615 if (dtag == DW_TAG_subprogram 1616 && (!have_lo) && (!have_hi1) && (!have_range)) { 1617 /* This is legit - ignore it. Sec 3.3.3: "A subroutine entry 1618 representing a subroutine declaration that is not also a 1619 definition does not have code address or range 1620 attributes." */ 1621 } else 1622 if (dtag == DW_TAG_lexical_block 1623 && (!have_lo) && (!have_hi1) && (!have_range)) { 1624 /* I believe this is legit, and means the lexical block 1625 contains no insns (whatever that might mean). Ignore. */ 1626 } else 1627 if (have_lo && have_hi1 && (!have_range)) { 1628 /* This scope supplies just a single address range. */ 1629 if (ip_lo < ip_hi1) 1630 varstack_push( cc, parser, td3, 1631 unitary_range_list(ip_lo, ip_hi1 - 1), 1632 level, isFunc, fbGX ); 1633 } else 1634 if ((!have_lo) && (!have_hi1) && have_range) { 1635 /* This scope supplies multiple address ranges via the use of 1636 a range list. */ 1637 varstack_push( cc, parser, td3, 1638 get_range_list( cc, td3, 1639 rangeoff, cc->cu_svma ), 1640 level, isFunc, fbGX ); 1641 } else 1642 if (have_lo && (!have_hi1) && (!have_range)) { 1643 /* This scope is bogus. The D3 spec sec 3.4 (Lexical Block 1644 Entries) says fairly clearly that a scope must have either 1645 _range or (_low_pc and _high_pc). */ 1646 /* The spec is a bit ambiguous though. Perhaps a single byte 1647 range is intended? See sec 2.17 (Code Addresses And Ranges) */ 1648 /* This case is here because icc9 produced this: 1649 <2><13bd>: DW_TAG_lexical_block 1650 DW_AT_decl_line : 5229 1651 DW_AT_decl_column : 37 1652 DW_AT_decl_file : 1 1653 DW_AT_low_pc : 0x401b03 1654 */ 1655 /* Ignore (seems safe than pushing a single byte range) */ 1656 } else 1657 goto bad_DIE; 1658 } 1659 1660 if (dtag == DW_TAG_variable || dtag == DW_TAG_formal_parameter) { 1661 UChar* name = NULL; 1662 UWord typeR = D3_INVALID_CUOFF; 1663 Bool external = False; 1664 GExpr* gexpr = NULL; 1665 Int n_attrs = 0; 1666 UWord abs_ori = (UWord)D3_INVALID_CUOFF; 1667 Int lineNo = 0; 1668 UChar* fileName = NULL; 1669 while (True) { 1670 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 1671 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 1672 if (attr == 0 && form == 0) break; 1673 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 1674 cc, c_die, False/*td3*/, form ); 1675 n_attrs++; 1676 if (attr == DW_AT_name && ctsMemSzB > 0) { 1677 name = ML_(addStr)( cc->di, (UChar*)(UWord)cts, -1 ); 1678 } 1679 if (attr == DW_AT_location 1680 && ((ctsMemSzB > 0 && ctsSzB == 0) 1681 || (ctsMemSzB == 0 && ctsSzB > 0))) { 1682 gexpr = get_GX( cc, False/*td3*/, cts, ctsSzB, ctsMemSzB ); 1683 vg_assert(gexpr); 1684 VG_(addToXA)(gexprs, &gexpr); 1685 } 1686 if (attr == DW_AT_type && ctsSzB > 0) { 1687 typeR = (UWord)cts; 1688 } 1689 if (attr == DW_AT_external && ctsSzB > 0 && cts > 0) { 1690 external = True; 1691 } 1692 if (attr == DW_AT_abstract_origin && ctsSzB > 0) { 1693 abs_ori = (UWord)cts; 1694 } 1695 if (attr == DW_AT_declaration && ctsSzB > 0 && cts > 0) { 1696 /*declaration = True;*/ 1697 } 1698 if (attr == DW_AT_decl_line && ctsSzB > 0) { 1699 lineNo = (Int)cts; 1700 } 1701 if (attr == DW_AT_decl_file && ctsSzB > 0) { 1702 Int ftabIx = (Int)cts; 1703 if (ftabIx >= 1 1704 && ftabIx < VG_(sizeXA)( parser->filenameTable )) { 1705 fileName = *(UChar**) 1706 VG_(indexXA)( parser->filenameTable, ftabIx ); 1707 vg_assert(fileName); 1708 } 1709 if (0) VG_(printf)("XXX filename = %s\n", fileName); 1710 } 1711 } 1712 /* We'll collect it under if one of the following three 1713 conditions holds: 1714 (1) has location and type -> completed 1715 (2) has type only -> is an abstract instance 1716 (3) has location and abs_ori -> is a concrete instance 1717 Name, filename and line number are all optional frills. 1718 */ 1719 if ( /* 1 */ (gexpr && typeR != D3_INVALID_CUOFF) 1720 /* 2 */ || (typeR != D3_INVALID_CUOFF) 1721 /* 3 */ || (gexpr && abs_ori != (UWord)D3_INVALID_CUOFF) ) { 1722 1723 /* Add this variable to the list of interesting looking 1724 variables. Crucially, note along with it the address 1725 range(s) associated with the variable, which for locals 1726 will be the address ranges at the top of the varparser's 1727 stack. */ 1728 GExpr* fbGX = NULL; 1729 Word i, nRanges; 1730 XArray* /* of AddrRange */ xa; 1731 TempVar* tv; 1732 /* Stack can't be empty; we put a dummy entry on it for the 1733 entire address range before starting with the DIEs for 1734 this CU. */ 1735 vg_assert(parser->sp >= 0); 1736 1737 /* If this is a local variable (non-external), try to find 1738 the GExpr for the DW_AT_frame_base of the containing 1739 function. It should have been pushed on the stack at the 1740 time we encountered its DW_TAG_subprogram DIE, so the way 1741 to find it is to scan back down the stack looking for it. 1742 If there isn't an enclosing stack entry marked 'isFunc' 1743 then we must be seeing variable or formal param DIEs 1744 outside of a function, so we deem the Dwarf to be 1745 malformed if that happens. Note that the fbGX may be NULL 1746 if the containing DT_TAG_subprogram didn't supply a 1747 DW_AT_frame_base -- that's OK, but there must actually be 1748 a containing DW_TAG_subprogram. */ 1749 if (!external) { 1750 Bool found = False; 1751 for (i = parser->sp; i >= 0; i--) { 1752 if (parser->isFunc[i]) { 1753 fbGX = parser->fbGX[i]; 1754 found = True; 1755 break; 1756 } 1757 } 1758 if (!found) { 1759 if (0 && VG_(clo_verbosity) >= 0) { 1760 VG_(message)(Vg_DebugMsg, 1761 "warning: parse_var_DIE: non-external variable " 1762 "outside DW_TAG_subprogram\n"); 1763 } 1764 /* goto bad_DIE; */ 1765 /* This seems to happen a lot. Just ignore it -- if, 1766 when we come to evaluation of the location (guarded) 1767 expression, it requires a frame base value, and 1768 there's no expression for that, then evaluation as a 1769 whole will fail. Harmless - a bit of a waste of 1770 cycles but nothing more. */ 1771 } 1772 } 1773 1774 /* re "external ? 0 : parser->sp" (twice), if the var is 1775 marked 'external' then we must put it at the global scope, 1776 as only the global scope (level 0) covers the entire PC 1777 address space. It is asserted elsewhere that level 0 1778 always covers the entire address space. */ 1779 xa = parser->ranges[external ? 0 : parser->sp]; 1780 nRanges = VG_(sizeXA)(xa); 1781 vg_assert(nRanges >= 0); 1782 1783 tv = ML_(dinfo_zalloc)( "di.readdwarf3.pvD.1", sizeof(TempVar) ); 1784 tv->name = name; 1785 tv->level = external ? 0 : parser->sp; 1786 tv->typeR = typeR; 1787 tv->gexpr = gexpr; 1788 tv->fbGX = fbGX; 1789 tv->fName = fileName; 1790 tv->fLine = lineNo; 1791 tv->dioff = posn; 1792 tv->absOri = abs_ori; 1793 1794 /* See explanation on definition of type TempVar for the 1795 reason for this elaboration. */ 1796 tv->nRanges = nRanges; 1797 tv->rngOneMin = 0; 1798 tv->rngOneMax = 0; 1799 tv->rngMany = NULL; 1800 if (nRanges == 1) { 1801 AddrRange* range = VG_(indexXA)(xa, 0); 1802 tv->rngOneMin = range->aMin; 1803 tv->rngOneMax = range->aMax; 1804 } 1805 else if (nRanges > 1) { 1806 /* See if we already have a range list which is 1807 structurally identical. If so, use that; if not, clone 1808 this one, and add it to our collection. */ 1809 UWord keyW, valW; 1810 if (VG_(lookupFM)( rangestree, &keyW, &valW, (UWord)xa )) { 1811 XArray* old = (XArray*)keyW; 1812 tl_assert(valW == 0); 1813 tl_assert(old != xa); 1814 tv->rngMany = old; 1815 } else { 1816 XArray* cloned = VG_(cloneXA)( "di.readdwarf3.pvD.2", xa ); 1817 tv->rngMany = cloned; 1818 VG_(addToFM)( rangestree, (UWord)cloned, 0 ); 1819 } 1820 } 1821 1822 VG_(addToXA)( tempvars, &tv ); 1823 1824 TRACE_D3(" Recording this variable, with %ld PC range(s)\n", 1825 VG_(sizeXA)(xa) ); 1826 /* collect stats on how effective the ->ranges special 1827 casing is */ 1828 if (0) { 1829 static Int ntot=0, ngt=0; 1830 ntot++; 1831 if (tv->rngMany) ngt++; 1832 if (0 == (ntot % 100000)) 1833 VG_(printf)("XXXX %d tot, %d cloned\n", ntot, ngt); 1834 } 1835 1836 } 1837 1838 /* Here are some other weird cases seen in the wild: 1839 1840 We have a variable with a name and a type, but no 1841 location. I guess that's a sign that it has been 1842 optimised away. Ignore it. Here's an example: 1843 1844 static Int lc_compar(void* n1, void* n2) { 1845 MC_Chunk* mc1 = *(MC_Chunk**)n1; 1846 MC_Chunk* mc2 = *(MC_Chunk**)n2; 1847 return (mc1->data < mc2->data ? -1 : 1); 1848 } 1849 1850 Both mc1 and mc2 are like this 1851 <2><5bc>: Abbrev Number: 21 (DW_TAG_variable) 1852 DW_AT_name : mc1 1853 DW_AT_decl_file : 1 1854 DW_AT_decl_line : 216 1855 DW_AT_type : <5d3> 1856 1857 whereas n1 and n2 do have locations specified. 1858 1859 --------------------------------------------- 1860 1861 We see a DW_TAG_formal_parameter with a type, but 1862 no name and no location. It's probably part of a function type 1863 construction, thusly, hence ignore it: 1864 <1><2b4>: Abbrev Number: 12 (DW_TAG_subroutine_type) 1865 DW_AT_sibling : <2c9> 1866 DW_AT_prototyped : 1 1867 DW_AT_type : <114> 1868 <2><2be>: Abbrev Number: 13 (DW_TAG_formal_parameter) 1869 DW_AT_type : <13e> 1870 <2><2c3>: Abbrev Number: 13 (DW_TAG_formal_parameter) 1871 DW_AT_type : <133> 1872 1873 --------------------------------------------- 1874 1875 Is very minimal, like this: 1876 <4><81d>: Abbrev Number: 44 (DW_TAG_variable) 1877 DW_AT_abstract_origin: <7ba> 1878 What that signifies I have no idea. Ignore. 1879 1880 ---------------------------------------------- 1881 1882 Is very minimal, like this: 1883 <200f>: DW_TAG_formal_parameter 1884 DW_AT_abstract_ori: <1f4c> 1885 DW_AT_location : 13440 1886 What that signifies I have no idea. Ignore. 1887 It might be significant, though: the variable at least 1888 has a location and so might exist somewhere. 1889 Maybe we should handle this. 1890 1891 --------------------------------------------- 1892 1893 <22407>: DW_TAG_variable 1894 DW_AT_name : (indirect string, offset: 0x6579): 1895 vgPlain_trampoline_stuff_start 1896 DW_AT_decl_file : 29 1897 DW_AT_decl_line : 56 1898 DW_AT_external : 1 1899 DW_AT_declaration : 1 1900 1901 Nameless and typeless variable that has a location? Who 1902 knows. Not me. 1903 <2><3d178>: Abbrev Number: 22 (DW_TAG_variable) 1904 DW_AT_location : 9 byte block: 3 c0 c7 13 38 0 0 0 0 1905 (DW_OP_addr: 3813c7c0) 1906 1907 No, really. Check it out. gcc is quite simply borked. 1908 <3><168cc>: Abbrev Number: 141 (DW_TAG_variable) 1909 // followed by no attributes, and the next DIE is a sibling, 1910 // not a child 1911 */ 1912 } 1913 return; 1914 1915 bad_DIE: 1916 set_position_of_Cursor( c_die, saved_die_c_offset ); 1917 set_position_of_Cursor( c_abbv, saved_abbv_c_offset ); 1918 VG_(printf)("\nparse_var_DIE: confused by:\n"); 1919 VG_(printf)(" <%d><%lx>: %s\n", level, posn, ML_(pp_DW_TAG)( dtag ) ); 1920 while (True) { 1921 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 1922 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 1923 if (attr == 0 && form == 0) break; 1924 VG_(printf)(" %18s: ", ML_(pp_DW_AT)(attr)); 1925 /* Get the form contents, so as to print them */ 1926 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 1927 cc, c_die, True, form ); 1928 VG_(printf)("\t\n"); 1929 } 1930 VG_(printf)("\n"); 1931 cc->barf("parse_var_DIE: confused by the above DIE"); 1932 /*NOTREACHED*/ 1933 } 1934 1935 1936 /*------------------------------------------------------------*/ 1937 /*--- ---*/ 1938 /*--- Parsing of type-related DIEs ---*/ 1939 /*--- ---*/ 1940 /*------------------------------------------------------------*/ 1941 1942 #define N_D3_TYPE_STACK 16 1943 1944 typedef 1945 struct { 1946 /* What source language? 'A'=Ada83/95, 1947 'C'=C/C++, 1948 'F'=Fortran, 1949 '?'=other 1950 Established once per compilation unit. */ 1951 UChar language; 1952 /* A stack of types which are currently under construction */ 1953 Int sp; /* [sp] is innermost active entry; sp==-1 for empty 1954 stack */ 1955 /* Note that the TyEnts in qparentE are temporary copies of the 1956 ones accumulating in the main tyent array. So it is not safe 1957 to free up anything on them when popping them off the stack 1958 (iow, it isn't safe to use TyEnt__make_EMPTY on them). Just 1959 memset them to zero when done. */ 1960 TyEnt qparentE[N_D3_TYPE_STACK]; /* parent TyEnts */ 1961 Int qlevel[N_D3_TYPE_STACK]; 1962 1963 } 1964 D3TypeParser; 1965 1966 static void typestack_show ( D3TypeParser* parser, HChar* str ) { 1967 Word i; 1968 VG_(printf)(" typestack (%s) {\n", str); 1969 for (i = 0; i <= parser->sp; i++) { 1970 VG_(printf)(" [%ld] (level %d): ", i, parser->qlevel[i]); 1971 ML_(pp_TyEnt)( &parser->qparentE[i] ); 1972 VG_(printf)("\n"); 1973 } 1974 VG_(printf)(" }\n"); 1975 } 1976 1977 /* Remove from the stack, all entries with .level > 'level' */ 1978 static 1979 void typestack_preen ( D3TypeParser* parser, Bool td3, Int level ) 1980 { 1981 Bool changed = False; 1982 vg_assert(parser->sp < N_D3_TYPE_STACK); 1983 while (True) { 1984 vg_assert(parser->sp >= -1); 1985 if (parser->sp == -1) break; 1986 if (parser->qlevel[parser->sp] <= level) break; 1987 if (0) 1988 TRACE_D3("BBBBAAAA typestack_pop [newsp=%d]\n", parser->sp-1); 1989 vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp])); 1990 VG_(memset)(&parser->qparentE[parser->sp], 0, sizeof(TyEnt)); 1991 parser->qparentE[parser->sp].cuOff = D3_INVALID_CUOFF; 1992 parser->qparentE[parser->sp].tag = Te_EMPTY; 1993 parser->qlevel[parser->sp] = 0; 1994 parser->sp--; 1995 changed = True; 1996 } 1997 if (changed && td3) 1998 typestack_show( parser, "after preen" ); 1999 } 2000 2001 static Bool typestack_is_empty ( D3TypeParser* parser ) { 2002 vg_assert(parser->sp >= -1 && parser->sp < N_D3_TYPE_STACK); 2003 return parser->sp == -1; 2004 } 2005 2006 static void typestack_push ( CUConst* cc, 2007 D3TypeParser* parser, 2008 Bool td3, 2009 TyEnt* parentE, Int level ) { 2010 if (0) 2011 TRACE_D3("BBBBAAAA typestack_push[newsp=%d]: %d %05lx\n", 2012 parser->sp+1, level, parentE->cuOff); 2013 2014 /* First we need to zap everything >= 'level', as we are about to 2015 replace any previous entry at 'level', so .. */ 2016 typestack_preen(parser, /*td3*/False, level-1); 2017 2018 vg_assert(parser->sp >= -1); 2019 vg_assert(parser->sp < N_D3_TYPE_STACK); 2020 if (parser->sp == N_D3_TYPE_STACK-1) 2021 cc->barf("typestack_push: N_D3_TYPE_STACK is too low; " 2022 "increase and recompile"); 2023 if (parser->sp >= 0) 2024 vg_assert(parser->qlevel[parser->sp] < level); 2025 parser->sp++; 2026 vg_assert(parser->qparentE[parser->sp].tag == Te_EMPTY); 2027 vg_assert(parser->qlevel[parser->sp] == 0); 2028 vg_assert(parentE); 2029 vg_assert(ML_(TyEnt__is_type)(parentE)); 2030 vg_assert(parentE->cuOff != D3_INVALID_CUOFF); 2031 parser->qparentE[parser->sp] = *parentE; 2032 parser->qlevel[parser->sp] = level; 2033 if (td3) 2034 typestack_show( parser, "after push" ); 2035 } 2036 2037 /* True if the subrange type being parsed gives the bounds of an array. */ 2038 static Bool subrange_type_denotes_array_bounds ( D3TypeParser* parser, 2039 DW_TAG dtag ) { 2040 vg_assert(dtag == DW_TAG_subrange_type); 2041 /* For most languages, a subrange_type dtag always gives the 2042 bounds of an array. 2043 For Ada, there are additional conditions as a subrange_type 2044 is also used for other purposes. */ 2045 if (parser->language != 'A') 2046 /* not Ada, so it definitely denotes an array bound. */ 2047 return True; 2048 else 2049 /* Extra constraints for Ada: it only denotes an array bound if .. */ 2050 return (! typestack_is_empty(parser) 2051 && parser->qparentE[parser->sp].tag == Te_TyArray); 2052 } 2053 2054 /* Parse a type-related DIE. 'parser' holds the current parser state. 2055 'admin' is where the completed types are dumped. 'dtag' is the tag 2056 for this DIE. 'c_die' points to the start of the data fields (FORM 2057 stuff) for the DIE. c_abbv points to the start of the (name,form) 2058 pairs which describe the DIE. 2059 2060 We may find the DIE uninteresting, in which case we should ignore 2061 it. 2062 2063 What happens: the DIE is examined. If uninteresting, it is ignored. 2064 Otherwise, the DIE gives rise to two things: 2065 2066 (1) the offset of this DIE in the CU -- the cuOffset, a UWord 2067 (2) a TyAdmin structure, which holds the type, or related stuff 2068 2069 (2) is added at the end of 'tyadmins', at some index, say 'i'. 2070 2071 A pair (cuOffset, i) is added to 'tydict'. 2072 2073 Hence 'tyadmins' holds the actual type entities, and 'tydict' holds 2074 a mapping from cuOffset to the index of the corresponding entry in 2075 'tyadmin'. 2076 2077 When resolving a cuOffset to a TyAdmin, first look up the cuOffset 2078 in the tydict (by binary search). This gives an index into 2079 tyadmins, and the required entity lives in tyadmins at that index. 2080 */ 2081 __attribute__((noinline)) 2082 static void parse_type_DIE ( /*MOD*/XArray* /* of TyEnt */ tyents, 2083 /*MOD*/D3TypeParser* parser, 2084 DW_TAG dtag, 2085 UWord posn, 2086 Int level, 2087 Cursor* c_die, 2088 Cursor* c_abbv, 2089 CUConst* cc, 2090 Bool td3 ) 2091 { 2092 ULong cts; 2093 Int ctsSzB; 2094 UWord ctsMemSzB; 2095 TyEnt typeE; 2096 TyEnt atomE; 2097 TyEnt fieldE; 2098 TyEnt boundE; 2099 2100 UWord saved_die_c_offset = get_position_of_Cursor( c_die ); 2101 UWord saved_abbv_c_offset = get_position_of_Cursor( c_abbv ); 2102 2103 VG_(memset)( &typeE, 0xAA, sizeof(typeE) ); 2104 VG_(memset)( &atomE, 0xAA, sizeof(atomE) ); 2105 VG_(memset)( &fieldE, 0xAA, sizeof(fieldE) ); 2106 VG_(memset)( &boundE, 0xAA, sizeof(boundE) ); 2107 2108 /* If we've returned to a level at or above any previously noted 2109 parent, un-note it, so we don't believe we're still collecting 2110 its children. */ 2111 typestack_preen( parser, td3, level-1 ); 2112 2113 if (dtag == DW_TAG_compile_unit) { 2114 /* See if we can find DW_AT_language, since it is important for 2115 establishing array bounds (see DW_TAG_subrange_type below in 2116 this fn) */ 2117 while (True) { 2118 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2119 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2120 if (attr == 0 && form == 0) break; 2121 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2122 cc, c_die, False/*td3*/, form ); 2123 if (attr != DW_AT_language) 2124 continue; 2125 if (ctsSzB == 0) 2126 goto bad_DIE; 2127 switch (cts) { 2128 case DW_LANG_C89: case DW_LANG_C: 2129 case DW_LANG_C_plus_plus: case DW_LANG_ObjC: 2130 case DW_LANG_ObjC_plus_plus: case DW_LANG_UPC: 2131 case DW_LANG_Upc: case DW_LANG_C99: 2132 parser->language = 'C'; break; 2133 case DW_LANG_Fortran77: case DW_LANG_Fortran90: 2134 case DW_LANG_Fortran95: 2135 parser->language = 'F'; break; 2136 case DW_LANG_Ada83: case DW_LANG_Ada95: 2137 parser->language = 'A'; break; 2138 case DW_LANG_Cobol74: 2139 case DW_LANG_Cobol85: case DW_LANG_Pascal83: 2140 case DW_LANG_Modula2: case DW_LANG_Java: 2141 case DW_LANG_PLI: 2142 case DW_LANG_D: case DW_LANG_Python: 2143 case DW_LANG_Mips_Assembler: 2144 parser->language = '?'; break; 2145 default: 2146 goto bad_DIE; 2147 } 2148 } 2149 } 2150 2151 if (dtag == DW_TAG_base_type) { 2152 /* We can pick up a new base type any time. */ 2153 VG_(memset)(&typeE, 0, sizeof(typeE)); 2154 typeE.cuOff = D3_INVALID_CUOFF; 2155 typeE.tag = Te_TyBase; 2156 while (True) { 2157 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2158 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2159 if (attr == 0 && form == 0) break; 2160 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2161 cc, c_die, False/*td3*/, form ); 2162 if (attr == DW_AT_name && ctsMemSzB > 0) { 2163 typeE.Te.TyBase.name 2164 = ML_(dinfo_strdup)( "di.readdwarf3.ptD.base_type.1", 2165 (UChar*)(UWord)cts ); 2166 } 2167 if (attr == DW_AT_byte_size && ctsSzB > 0) { 2168 typeE.Te.TyBase.szB = cts; 2169 } 2170 if (attr == DW_AT_encoding && ctsSzB > 0) { 2171 switch (cts) { 2172 case DW_ATE_unsigned: case DW_ATE_unsigned_char: 2173 case DW_ATE_UTF: /* since DWARF4, e.g. char16_t from C++ */ 2174 case DW_ATE_boolean:/* FIXME - is this correct? */ 2175 typeE.Te.TyBase.enc = 'U'; break; 2176 case DW_ATE_signed: case DW_ATE_signed_char: 2177 typeE.Te.TyBase.enc = 'S'; break; 2178 case DW_ATE_float: 2179 typeE.Te.TyBase.enc = 'F'; break; 2180 case DW_ATE_complex_float: 2181 typeE.Te.TyBase.enc = 'C'; break; 2182 default: 2183 goto bad_DIE; 2184 } 2185 } 2186 } 2187 2188 /* Invent a name if it doesn't have one. gcc-4.3 2189 -ftree-vectorize is observed to emit nameless base types. */ 2190 if (!typeE.Te.TyBase.name) 2191 typeE.Te.TyBase.name 2192 = ML_(dinfo_strdup)( "di.readdwarf3.ptD.base_type.2", 2193 "<anon_base_type>" ); 2194 2195 /* Do we have something that looks sane? */ 2196 if (/* must have a name */ 2197 typeE.Te.TyBase.name == NULL 2198 /* and a plausible size. Yes, really 32: "complex long 2199 double" apparently has size=32 */ 2200 || typeE.Te.TyBase.szB < 0 || typeE.Te.TyBase.szB > 32 2201 /* and a plausible encoding */ 2202 || (typeE.Te.TyBase.enc != 'U' 2203 && typeE.Te.TyBase.enc != 'S' 2204 && typeE.Te.TyBase.enc != 'F' 2205 && typeE.Te.TyBase.enc != 'C')) 2206 goto bad_DIE; 2207 /* Last minute hack: if we see this 2208 <1><515>: DW_TAG_base_type 2209 DW_AT_byte_size : 0 2210 DW_AT_encoding : 5 2211 DW_AT_name : void 2212 convert it into a real Void type. */ 2213 if (typeE.Te.TyBase.szB == 0 2214 && 0 == VG_(strcmp)("void", typeE.Te.TyBase.name)) { 2215 ML_(TyEnt__make_EMPTY)(&typeE); 2216 typeE.tag = Te_TyVoid; 2217 typeE.Te.TyVoid.isFake = False; /* it's a real one! */ 2218 } 2219 2220 goto acquire_Type; 2221 } 2222 2223 if (dtag == DW_TAG_pointer_type || dtag == DW_TAG_reference_type 2224 || dtag == DW_TAG_ptr_to_member_type) { 2225 /* This seems legit for _pointer_type and _reference_type. I 2226 don't know if rolling _ptr_to_member_type in here really is 2227 legit, but it's better than not handling it at all. */ 2228 VG_(memset)(&typeE, 0, sizeof(typeE)); 2229 typeE.cuOff = D3_INVALID_CUOFF; 2230 typeE.tag = Te_TyPorR; 2231 /* target type defaults to void */ 2232 typeE.Te.TyPorR.typeR = D3_FAKEVOID_CUOFF; 2233 typeE.Te.TyPorR.isPtr = dtag == DW_TAG_pointer_type 2234 || dtag == DW_TAG_ptr_to_member_type; 2235 /* These three type kinds don't *have* to specify their size, in 2236 which case we assume it's a machine word. But if they do 2237 specify it, it must be a machine word :-) This probably 2238 assumes that the word size of the Dwarf3 we're reading is the 2239 same size as that on the machine. gcc appears to give a size 2240 whereas icc9 doesn't. */ 2241 typeE.Te.TyPorR.szB = sizeof(UWord); 2242 while (True) { 2243 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2244 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2245 if (attr == 0 && form == 0) break; 2246 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2247 cc, c_die, False/*td3*/, form ); 2248 if (attr == DW_AT_byte_size && ctsSzB > 0) { 2249 typeE.Te.TyPorR.szB = cts; 2250 } 2251 if (attr == DW_AT_type && ctsSzB > 0) { 2252 typeE.Te.TyPorR.typeR = (UWord)cts; 2253 } 2254 } 2255 /* Do we have something that looks sane? */ 2256 if (typeE.Te.TyPorR.szB != sizeof(UWord)) 2257 goto bad_DIE; 2258 else 2259 goto acquire_Type; 2260 } 2261 2262 if (dtag == DW_TAG_enumeration_type) { 2263 /* Create a new Type to hold the results. */ 2264 VG_(memset)(&typeE, 0, sizeof(typeE)); 2265 typeE.cuOff = posn; 2266 typeE.tag = Te_TyEnum; 2267 typeE.Te.TyEnum.atomRs 2268 = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.enum_type.1", 2269 ML_(dinfo_free), 2270 sizeof(UWord) ); 2271 while (True) { 2272 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2273 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2274 if (attr == 0 && form == 0) break; 2275 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2276 cc, c_die, False/*td3*/, form ); 2277 if (attr == DW_AT_name && ctsMemSzB > 0) { 2278 typeE.Te.TyEnum.name 2279 = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enum_type.2", 2280 (UChar*)(UWord)cts ); 2281 } 2282 if (attr == DW_AT_byte_size && ctsSzB > 0) { 2283 typeE.Te.TyEnum.szB = cts; 2284 } 2285 } 2286 2287 if (!typeE.Te.TyEnum.name) 2288 typeE.Te.TyEnum.name 2289 = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enum_type.3", 2290 "<anon_enum_type>" ); 2291 2292 /* Do we have something that looks sane? */ 2293 if (typeE.Te.TyEnum.szB == 0 2294 /* we must know the size */ 2295 /* but not for Ada, which uses such dummy 2296 enumerations as helper for gdb ada mode. */ 2297 && parser->language != 'A') 2298 goto bad_DIE; 2299 /* On't stack! */ 2300 typestack_push( cc, parser, td3, &typeE, level ); 2301 goto acquire_Type; 2302 } 2303 2304 /* gcc (GCC) 4.4.0 20081017 (experimental) occasionally produces 2305 DW_TAG_enumerator with only a DW_AT_name but no 2306 DW_AT_const_value. This is in violation of the Dwarf3 standard, 2307 and appears to be a new "feature" of gcc - versions 4.3.x and 2308 earlier do not appear to do this. So accept DW_TAG_enumerator 2309 which only have a name but no value. An example: 2310 2311 <1><180>: Abbrev Number: 6 (DW_TAG_enumeration_type) 2312 <181> DW_AT_name : (indirect string, offset: 0xda70): 2313 QtMsgType 2314 <185> DW_AT_byte_size : 4 2315 <186> DW_AT_decl_file : 14 2316 <187> DW_AT_decl_line : 1480 2317 <189> DW_AT_sibling : <0x1a7> 2318 <2><18d>: Abbrev Number: 7 (DW_TAG_enumerator) 2319 <18e> DW_AT_name : (indirect string, offset: 0x9e18): 2320 QtDebugMsg 2321 <2><192>: Abbrev Number: 7 (DW_TAG_enumerator) 2322 <193> DW_AT_name : (indirect string, offset: 0x1505f): 2323 QtWarningMsg 2324 <2><197>: Abbrev Number: 7 (DW_TAG_enumerator) 2325 <198> DW_AT_name : (indirect string, offset: 0x16f4a): 2326 QtCriticalMsg 2327 <2><19c>: Abbrev Number: 7 (DW_TAG_enumerator) 2328 <19d> DW_AT_name : (indirect string, offset: 0x156dd): 2329 QtFatalMsg 2330 <2><1a1>: Abbrev Number: 7 (DW_TAG_enumerator) 2331 <1a2> DW_AT_name : (indirect string, offset: 0x13660): 2332 QtSystemMsg 2333 */ 2334 if (dtag == DW_TAG_enumerator) { 2335 VG_(memset)( &atomE, 0, sizeof(atomE) ); 2336 atomE.cuOff = posn; 2337 atomE.tag = Te_Atom; 2338 while (True) { 2339 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2340 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2341 if (attr == 0 && form == 0) break; 2342 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2343 cc, c_die, False/*td3*/, form ); 2344 if (attr == DW_AT_name && ctsMemSzB > 0) { 2345 atomE.Te.Atom.name 2346 = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enumerator.1", 2347 (UChar*)(UWord)cts ); 2348 } 2349 if (attr == DW_AT_const_value && ctsSzB > 0) { 2350 atomE.Te.Atom.value = cts; 2351 atomE.Te.Atom.valueKnown = True; 2352 } 2353 } 2354 /* Do we have something that looks sane? */ 2355 if (atomE.Te.Atom.name == NULL) 2356 goto bad_DIE; 2357 /* Do we have a plausible parent? */ 2358 if (typestack_is_empty(parser)) goto bad_DIE; 2359 vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp])); 2360 vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF); 2361 if (level != parser->qlevel[parser->sp]+1) goto bad_DIE; 2362 if (parser->qparentE[parser->sp].tag != Te_TyEnum) goto bad_DIE; 2363 /* Record this child in the parent */ 2364 vg_assert(parser->qparentE[parser->sp].Te.TyEnum.atomRs); 2365 VG_(addToXA)( parser->qparentE[parser->sp].Te.TyEnum.atomRs, 2366 &atomE ); 2367 /* And record the child itself */ 2368 goto acquire_Atom; 2369 } 2370 2371 /* Treat DW_TAG_class_type as if it was a DW_TAG_structure_type. I 2372 don't know if this is correct, but it at least makes this reader 2373 usable for gcc-4.3 produced Dwarf3. */ 2374 if (dtag == DW_TAG_structure_type || dtag == DW_TAG_class_type 2375 || dtag == DW_TAG_union_type) { 2376 Bool have_szB = False; 2377 Bool is_decl = False; 2378 Bool is_spec = False; 2379 /* Create a new Type to hold the results. */ 2380 VG_(memset)(&typeE, 0, sizeof(typeE)); 2381 typeE.cuOff = posn; 2382 typeE.tag = Te_TyStOrUn; 2383 typeE.Te.TyStOrUn.name = NULL; 2384 typeE.Te.TyStOrUn.fieldRs 2385 = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.pTD.struct_type.1", 2386 ML_(dinfo_free), 2387 sizeof(UWord) ); 2388 typeE.Te.TyStOrUn.complete = True; 2389 typeE.Te.TyStOrUn.isStruct = dtag == DW_TAG_structure_type 2390 || dtag == DW_TAG_class_type; 2391 while (True) { 2392 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2393 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2394 if (attr == 0 && form == 0) break; 2395 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2396 cc, c_die, False/*td3*/, form ); 2397 if (attr == DW_AT_name && ctsMemSzB > 0) { 2398 typeE.Te.TyStOrUn.name 2399 = ML_(dinfo_strdup)( "di.readdwarf3.ptD.struct_type.2", 2400 (UChar*)(UWord)cts ); 2401 } 2402 if (attr == DW_AT_byte_size && ctsSzB >= 0) { 2403 typeE.Te.TyStOrUn.szB = cts; 2404 have_szB = True; 2405 } 2406 if (attr == DW_AT_declaration && ctsSzB > 0 && cts > 0) { 2407 is_decl = True; 2408 } 2409 if (attr == DW_AT_specification && ctsSzB > 0 && cts > 0) { 2410 is_spec = True; 2411 } 2412 } 2413 /* Do we have something that looks sane? */ 2414 if (is_decl && (!is_spec)) { 2415 /* It's a DW_AT_declaration. We require the name but 2416 nothing else. */ 2417 if (typeE.Te.TyStOrUn.name == NULL) 2418 goto bad_DIE; 2419 typeE.Te.TyStOrUn.complete = False; 2420 /* JRS 2009 Aug 10: <possible kludge>? */ 2421 /* Push this tyent on the stack, even though it's incomplete. 2422 It appears that gcc-4.4 on Fedora 11 will sometimes create 2423 DW_TAG_member entries for it, and so we need to have a 2424 plausible parent present in order for that to work. See 2425 #200029 comments 8 and 9. */ 2426 typestack_push( cc, parser, td3, &typeE, level ); 2427 /* </possible kludge> */ 2428 goto acquire_Type; 2429 } 2430 if ((!is_decl) /* && (!is_spec) */) { 2431 /* this is the common, ordinary case */ 2432 if ((!have_szB) /* we must know the size */ 2433 /* But the name can be present, or not */) 2434 goto bad_DIE; 2435 /* On't stack! */ 2436 typestack_push( cc, parser, td3, &typeE, level ); 2437 goto acquire_Type; 2438 } 2439 else { 2440 /* don't know how to handle any other variants just now */ 2441 goto bad_DIE; 2442 } 2443 } 2444 2445 if (dtag == DW_TAG_member) { 2446 /* Acquire member entries for both DW_TAG_structure_type and 2447 DW_TAG_union_type. They differ minorly, in that struct 2448 members must have a DW_AT_data_member_location expression 2449 whereas union members must not. */ 2450 Bool parent_is_struct; 2451 VG_(memset)( &fieldE, 0, sizeof(fieldE) ); 2452 fieldE.cuOff = posn; 2453 fieldE.tag = Te_Field; 2454 fieldE.Te.Field.typeR = D3_INVALID_CUOFF; 2455 while (True) { 2456 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2457 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2458 if (attr == 0 && form == 0) break; 2459 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2460 cc, c_die, False/*td3*/, form ); 2461 if (attr == DW_AT_name && ctsMemSzB > 0) { 2462 fieldE.Te.Field.name 2463 = ML_(dinfo_strdup)( "di.readdwarf3.ptD.member.1", 2464 (UChar*)(UWord)cts ); 2465 } 2466 if (attr == DW_AT_type && ctsSzB > 0) { 2467 fieldE.Te.Field.typeR = (UWord)cts; 2468 } 2469 /* There are 2 different cases for DW_AT_data_member_location. 2470 If it is a constant class attribute, it contains byte offset 2471 from the beginning of the containing entity. 2472 Otherwise it is a location expression. */ 2473 if (attr == DW_AT_data_member_location && ctsSzB > 0) { 2474 fieldE.Te.Field.nLoc = -1; 2475 fieldE.Te.Field.pos.offset = cts; 2476 } else if (attr == DW_AT_data_member_location && ctsMemSzB > 0) { 2477 fieldE.Te.Field.nLoc = (UWord)ctsMemSzB; 2478 fieldE.Te.Field.pos.loc 2479 = ML_(dinfo_memdup)( "di.readdwarf3.ptD.member.2", 2480 (UChar*)(UWord)cts, 2481 (SizeT)fieldE.Te.Field.nLoc ); 2482 } 2483 } 2484 /* Do we have a plausible parent? */ 2485 if (typestack_is_empty(parser)) goto bad_DIE; 2486 vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp])); 2487 vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF); 2488 if (level != parser->qlevel[parser->sp]+1) goto bad_DIE; 2489 if (parser->qparentE[parser->sp].tag != Te_TyStOrUn) goto bad_DIE; 2490 /* Do we have something that looks sane? If this a member of a 2491 struct, we must have a location expression; but if a member 2492 of a union that is irrelevant (D3 spec sec 5.6.6). We ought 2493 to reject in the latter case, but some compilers have been 2494 observed to emit constant-zero expressions. So just ignore 2495 them. */ 2496 parent_is_struct 2497 = parser->qparentE[parser->sp].Te.TyStOrUn.isStruct; 2498 if (!fieldE.Te.Field.name) 2499 fieldE.Te.Field.name 2500 = ML_(dinfo_strdup)( "di.readdwarf3.ptD.member.3", 2501 "<anon_field>" ); 2502 vg_assert(fieldE.Te.Field.name); 2503 if (fieldE.Te.Field.typeR == D3_INVALID_CUOFF) 2504 goto bad_DIE; 2505 if (fieldE.Te.Field.nLoc) { 2506 if (!parent_is_struct) { 2507 /* If this is a union type, pretend we haven't seen the data 2508 member location expression, as it is by definition 2509 redundant (it must be zero). */ 2510 if (fieldE.Te.Field.nLoc > 0) 2511 ML_(dinfo_free)(fieldE.Te.Field.pos.loc); 2512 fieldE.Te.Field.pos.loc = NULL; 2513 fieldE.Te.Field.nLoc = 0; 2514 } 2515 /* Record this child in the parent */ 2516 fieldE.Te.Field.isStruct = parent_is_struct; 2517 vg_assert(parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs); 2518 VG_(addToXA)( parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs, 2519 &posn ); 2520 /* And record the child itself */ 2521 goto acquire_Field; 2522 } else { 2523 /* Member with no location - this can happen with static 2524 const members in C++ code which are compile time constants 2525 that do no exist in the class. They're not of any interest 2526 to us so we ignore them. */ 2527 } 2528 } 2529 2530 if (dtag == DW_TAG_array_type) { 2531 VG_(memset)(&typeE, 0, sizeof(typeE)); 2532 typeE.cuOff = posn; 2533 typeE.tag = Te_TyArray; 2534 typeE.Te.TyArray.typeR = D3_INVALID_CUOFF; 2535 typeE.Te.TyArray.boundRs 2536 = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.array_type.1", 2537 ML_(dinfo_free), 2538 sizeof(UWord) ); 2539 while (True) { 2540 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2541 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2542 if (attr == 0 && form == 0) break; 2543 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2544 cc, c_die, False/*td3*/, form ); 2545 if (attr == DW_AT_type && ctsSzB > 0) { 2546 typeE.Te.TyArray.typeR = (UWord)cts; 2547 } 2548 } 2549 if (typeE.Te.TyArray.typeR == D3_INVALID_CUOFF) 2550 goto bad_DIE; 2551 /* On't stack! */ 2552 typestack_push( cc, parser, td3, &typeE, level ); 2553 goto acquire_Type; 2554 } 2555 2556 /* this is a subrange type defining the bounds of an array. */ 2557 if (dtag == DW_TAG_subrange_type 2558 && subrange_type_denotes_array_bounds(parser, dtag)) { 2559 Bool have_lower = False; 2560 Bool have_upper = False; 2561 Bool have_count = False; 2562 Long lower = 0; 2563 Long upper = 0; 2564 2565 switch (parser->language) { 2566 case 'C': have_lower = True; lower = 0; break; 2567 case 'F': have_lower = True; lower = 1; break; 2568 case '?': have_lower = False; break; 2569 case 'A': have_lower = False; break; 2570 default: vg_assert(0); /* assured us by handling of 2571 DW_TAG_compile_unit in this fn */ 2572 } 2573 2574 VG_(memset)( &boundE, 0, sizeof(boundE) ); 2575 boundE.cuOff = D3_INVALID_CUOFF; 2576 boundE.tag = Te_Bound; 2577 while (True) { 2578 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2579 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2580 if (attr == 0 && form == 0) break; 2581 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2582 cc, c_die, False/*td3*/, form ); 2583 if (attr == DW_AT_lower_bound && ctsSzB > 0) { 2584 lower = (Long)cts; 2585 have_lower = True; 2586 } 2587 if (attr == DW_AT_upper_bound && ctsSzB > 0) { 2588 upper = (Long)cts; 2589 have_upper = True; 2590 } 2591 if (attr == DW_AT_count && ctsSzB > 0) { 2592 /*count = (Long)cts;*/ 2593 have_count = True; 2594 } 2595 } 2596 /* FIXME: potentially skip the rest if no parent present, since 2597 it could be the case that this subrange type is free-standing 2598 (not being used to describe the bounds of a containing array 2599 type) */ 2600 /* Do we have a plausible parent? */ 2601 if (typestack_is_empty(parser)) goto bad_DIE; 2602 vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp])); 2603 vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF); 2604 if (level != parser->qlevel[parser->sp]+1) goto bad_DIE; 2605 if (parser->qparentE[parser->sp].tag != Te_TyArray) goto bad_DIE; 2606 2607 /* Figure out if we have a definite range or not */ 2608 if (have_lower && have_upper && (!have_count)) { 2609 boundE.Te.Bound.knownL = True; 2610 boundE.Te.Bound.knownU = True; 2611 boundE.Te.Bound.boundL = lower; 2612 boundE.Te.Bound.boundU = upper; 2613 } 2614 else if (have_lower && (!have_upper) && (!have_count)) { 2615 boundE.Te.Bound.knownL = True; 2616 boundE.Te.Bound.knownU = False; 2617 boundE.Te.Bound.boundL = lower; 2618 boundE.Te.Bound.boundU = 0; 2619 } 2620 else if ((!have_lower) && have_upper && (!have_count)) { 2621 boundE.Te.Bound.knownL = False; 2622 boundE.Te.Bound.knownU = True; 2623 boundE.Te.Bound.boundL = 0; 2624 boundE.Te.Bound.boundU = upper; 2625 } 2626 else if ((!have_lower) && (!have_upper) && (!have_count)) { 2627 boundE.Te.Bound.knownL = False; 2628 boundE.Te.Bound.knownU = False; 2629 boundE.Te.Bound.boundL = 0; 2630 boundE.Te.Bound.boundU = 0; 2631 } else { 2632 /* FIXME: handle more cases */ 2633 goto bad_DIE; 2634 } 2635 2636 /* Record this bound in the parent */ 2637 boundE.cuOff = posn; 2638 vg_assert(parser->qparentE[parser->sp].Te.TyArray.boundRs); 2639 VG_(addToXA)( parser->qparentE[parser->sp].Te.TyArray.boundRs, 2640 &boundE ); 2641 /* And record the child itself */ 2642 goto acquire_Bound; 2643 } 2644 2645 /* typedef or subrange_type other than array bounds. */ 2646 if (dtag == DW_TAG_typedef 2647 || (dtag == DW_TAG_subrange_type 2648 && !subrange_type_denotes_array_bounds(parser, dtag))) { 2649 /* subrange_type other than array bound is only for Ada. */ 2650 vg_assert (dtag == DW_TAG_typedef || parser->language == 'A'); 2651 /* We can pick up a new typedef/subrange_type any time. */ 2652 VG_(memset)(&typeE, 0, sizeof(typeE)); 2653 typeE.cuOff = D3_INVALID_CUOFF; 2654 typeE.tag = Te_TyTyDef; 2655 typeE.Te.TyTyDef.name = NULL; 2656 typeE.Te.TyTyDef.typeR = D3_INVALID_CUOFF; 2657 while (True) { 2658 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2659 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2660 if (attr == 0 && form == 0) break; 2661 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2662 cc, c_die, False/*td3*/, form ); 2663 if (attr == DW_AT_name && ctsMemSzB > 0) { 2664 typeE.Te.TyTyDef.name 2665 = ML_(dinfo_strdup)( "di.readdwarf3.ptD.typedef.1", 2666 (UChar*)(UWord)cts ); 2667 } 2668 if (attr == DW_AT_type && ctsSzB > 0) { 2669 typeE.Te.TyTyDef.typeR = (UWord)cts; 2670 } 2671 } 2672 /* Do we have something that looks sane? */ 2673 if (/* must have a name */ 2674 typeE.Te.TyTyDef.name == NULL 2675 /* However gcc gnat Ada generates minimal typedef 2676 such as the below => accept no name for Ada. 2677 <6><91cc>: DW_TAG_typedef 2678 DW_AT_abstract_ori: <9066> 2679 */ 2680 && parser->language != 'A' 2681 /* but the referred-to type can be absent */) 2682 goto bad_DIE; 2683 else 2684 goto acquire_Type; 2685 } 2686 2687 if (dtag == DW_TAG_subroutine_type) { 2688 /* function type? just record that one fact and ask no 2689 further questions. */ 2690 VG_(memset)(&typeE, 0, sizeof(typeE)); 2691 typeE.cuOff = D3_INVALID_CUOFF; 2692 typeE.tag = Te_TyFn; 2693 goto acquire_Type; 2694 } 2695 2696 if (dtag == DW_TAG_volatile_type || dtag == DW_TAG_const_type) { 2697 Int have_ty = 0; 2698 VG_(memset)(&typeE, 0, sizeof(typeE)); 2699 typeE.cuOff = D3_INVALID_CUOFF; 2700 typeE.tag = Te_TyQual; 2701 typeE.Te.TyQual.qual 2702 = dtag == DW_TAG_volatile_type ? 'V' : 'C'; 2703 /* target type defaults to 'void' */ 2704 typeE.Te.TyQual.typeR = D3_FAKEVOID_CUOFF; 2705 while (True) { 2706 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2707 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2708 if (attr == 0 && form == 0) break; 2709 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2710 cc, c_die, False/*td3*/, form ); 2711 if (attr == DW_AT_type && ctsSzB > 0) { 2712 typeE.Te.TyQual.typeR = (UWord)cts; 2713 have_ty++; 2714 } 2715 } 2716 /* gcc sometimes generates DW_TAG_const/volatile_type without 2717 DW_AT_type and GDB appears to interpret the type as 'const 2718 void' (resp. 'volatile void'). So just allow it .. */ 2719 if (have_ty == 1 || have_ty == 0) 2720 goto acquire_Type; 2721 else 2722 goto bad_DIE; 2723 } 2724 2725 /* else ignore this DIE */ 2726 return; 2727 /*NOTREACHED*/ 2728 2729 acquire_Type: 2730 if (0) VG_(printf)("YYYY Acquire Type\n"); 2731 vg_assert(ML_(TyEnt__is_type)( &typeE )); 2732 vg_assert(typeE.cuOff == D3_INVALID_CUOFF || typeE.cuOff == posn); 2733 typeE.cuOff = posn; 2734 VG_(addToXA)( tyents, &typeE ); 2735 return; 2736 /*NOTREACHED*/ 2737 2738 acquire_Atom: 2739 if (0) VG_(printf)("YYYY Acquire Atom\n"); 2740 vg_assert(atomE.tag == Te_Atom); 2741 vg_assert(atomE.cuOff == D3_INVALID_CUOFF || atomE.cuOff == posn); 2742 atomE.cuOff = posn; 2743 VG_(addToXA)( tyents, &atomE ); 2744 return; 2745 /*NOTREACHED*/ 2746 2747 acquire_Field: 2748 /* For union members, Expr should be absent */ 2749 if (0) VG_(printf)("YYYY Acquire Field\n"); 2750 vg_assert(fieldE.tag == Te_Field); 2751 vg_assert(fieldE.Te.Field.nLoc <= 0 || fieldE.Te.Field.pos.loc != NULL); 2752 vg_assert(fieldE.Te.Field.nLoc != 0 || fieldE.Te.Field.pos.loc == NULL); 2753 if (fieldE.Te.Field.isStruct) { 2754 vg_assert(fieldE.Te.Field.nLoc != 0); 2755 } else { 2756 vg_assert(fieldE.Te.Field.nLoc == 0); 2757 } 2758 vg_assert(fieldE.cuOff == D3_INVALID_CUOFF || fieldE.cuOff == posn); 2759 fieldE.cuOff = posn; 2760 VG_(addToXA)( tyents, &fieldE ); 2761 return; 2762 /*NOTREACHED*/ 2763 2764 acquire_Bound: 2765 if (0) VG_(printf)("YYYY Acquire Bound\n"); 2766 vg_assert(boundE.tag == Te_Bound); 2767 vg_assert(boundE.cuOff == D3_INVALID_CUOFF || boundE.cuOff == posn); 2768 boundE.cuOff = posn; 2769 VG_(addToXA)( tyents, &boundE ); 2770 return; 2771 /*NOTREACHED*/ 2772 2773 bad_DIE: 2774 set_position_of_Cursor( c_die, saved_die_c_offset ); 2775 set_position_of_Cursor( c_abbv, saved_abbv_c_offset ); 2776 VG_(printf)("\nparse_type_DIE: confused by:\n"); 2777 VG_(printf)(" <%d><%lx>: %s\n", level, posn, ML_(pp_DW_TAG)( dtag ) ); 2778 while (True) { 2779 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2780 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2781 if (attr == 0 && form == 0) break; 2782 VG_(printf)(" %18s: ", ML_(pp_DW_AT)(attr)); 2783 /* Get the form contents, so as to print them */ 2784 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2785 cc, c_die, True, form ); 2786 VG_(printf)("\t\n"); 2787 } 2788 VG_(printf)("\n"); 2789 cc->barf("parse_type_DIE: confused by the above DIE"); 2790 /*NOTREACHED*/ 2791 } 2792 2793 2794 /*------------------------------------------------------------*/ 2795 /*--- ---*/ 2796 /*--- Compression of type DIE information ---*/ 2797 /*--- ---*/ 2798 /*------------------------------------------------------------*/ 2799 2800 static UWord chase_cuOff ( Bool* changed, 2801 XArray* /* of TyEnt */ ents, 2802 TyEntIndexCache* ents_cache, 2803 UWord cuOff ) 2804 { 2805 TyEnt* ent; 2806 ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache, cuOff ); 2807 2808 if (!ent) { 2809 VG_(printf)("chase_cuOff: no entry for 0x%05lx\n", cuOff); 2810 *changed = False; 2811 return cuOff; 2812 } 2813 2814 vg_assert(ent->tag != Te_EMPTY); 2815 if (ent->tag != Te_INDIR) { 2816 *changed = False; 2817 return cuOff; 2818 } else { 2819 vg_assert(ent->Te.INDIR.indR < cuOff); 2820 *changed = True; 2821 return ent->Te.INDIR.indR; 2822 } 2823 } 2824 2825 static 2826 void chase_cuOffs_in_XArray ( Bool* changed, 2827 XArray* /* of TyEnt */ ents, 2828 TyEntIndexCache* ents_cache, 2829 /*MOD*/XArray* /* of UWord */ cuOffs ) 2830 { 2831 Bool b2 = False; 2832 Word i, n = VG_(sizeXA)( cuOffs ); 2833 for (i = 0; i < n; i++) { 2834 Bool b = False; 2835 UWord* p = VG_(indexXA)( cuOffs, i ); 2836 *p = chase_cuOff( &b, ents, ents_cache, *p ); 2837 if (b) 2838 b2 = True; 2839 } 2840 *changed = b2; 2841 } 2842 2843 static Bool TyEnt__subst_R_fields ( XArray* /* of TyEnt */ ents, 2844 TyEntIndexCache* ents_cache, 2845 /*MOD*/TyEnt* te ) 2846 { 2847 Bool b, changed = False; 2848 switch (te->tag) { 2849 case Te_EMPTY: 2850 break; 2851 case Te_INDIR: 2852 te->Te.INDIR.indR 2853 = chase_cuOff( &b, ents, ents_cache, te->Te.INDIR.indR ); 2854 if (b) changed = True; 2855 break; 2856 case Te_UNKNOWN: 2857 break; 2858 case Te_Atom: 2859 break; 2860 case Te_Field: 2861 te->Te.Field.typeR 2862 = chase_cuOff( &b, ents, ents_cache, te->Te.Field.typeR ); 2863 if (b) changed = True; 2864 break; 2865 case Te_Bound: 2866 break; 2867 case Te_TyBase: 2868 break; 2869 case Te_TyPorR: 2870 te->Te.TyPorR.typeR 2871 = chase_cuOff( &b, ents, ents_cache, te->Te.TyPorR.typeR ); 2872 if (b) changed = True; 2873 break; 2874 case Te_TyTyDef: 2875 te->Te.TyTyDef.typeR 2876 = chase_cuOff( &b, ents, ents_cache, te->Te.TyTyDef.typeR ); 2877 if (b) changed = True; 2878 break; 2879 case Te_TyStOrUn: 2880 chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyStOrUn.fieldRs ); 2881 if (b) changed = True; 2882 break; 2883 case Te_TyEnum: 2884 chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyEnum.atomRs ); 2885 if (b) changed = True; 2886 break; 2887 case Te_TyArray: 2888 te->Te.TyArray.typeR 2889 = chase_cuOff( &b, ents, ents_cache, te->Te.TyArray.typeR ); 2890 if (b) changed = True; 2891 chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyArray.boundRs ); 2892 if (b) changed = True; 2893 break; 2894 case Te_TyFn: 2895 break; 2896 case Te_TyQual: 2897 te->Te.TyQual.typeR 2898 = chase_cuOff( &b, ents, ents_cache, te->Te.TyQual.typeR ); 2899 if (b) changed = True; 2900 break; 2901 case Te_TyVoid: 2902 break; 2903 default: 2904 ML_(pp_TyEnt)(te); 2905 vg_assert(0); 2906 } 2907 return changed; 2908 } 2909 2910 /* Make a pass over 'ents'. For each tyent, inspect the target of any 2911 'R' or 'Rs' fields (those which refer to other tyents), and replace 2912 any which point to INDIR nodes with the target of the indirection 2913 (which should not itself be an indirection). In summary, this 2914 routine shorts out all references to indirection nodes. */ 2915 static 2916 Word dedup_types_substitution_pass ( /*MOD*/XArray* /* of TyEnt */ ents, 2917 TyEntIndexCache* ents_cache ) 2918 { 2919 Word i, n, nChanged = 0; 2920 Bool b; 2921 n = VG_(sizeXA)( ents ); 2922 for (i = 0; i < n; i++) { 2923 TyEnt* ent = VG_(indexXA)( ents, i ); 2924 vg_assert(ent->tag != Te_EMPTY); 2925 /* We have to substitute everything, even indirections, so as to 2926 ensure that chains of indirections don't build up. */ 2927 b = TyEnt__subst_R_fields( ents, ents_cache, ent ); 2928 if (b) 2929 nChanged++; 2930 } 2931 2932 return nChanged; 2933 } 2934 2935 2936 /* Make a pass over 'ents', building a dictionary of TyEnts as we go. 2937 Look up each new tyent in the dictionary in turn. If it is already 2938 in the dictionary, replace this tyent with an indirection to the 2939 existing one, and delete any malloc'd stuff hanging off this one. 2940 In summary, this routine commons up all tyents that are identical 2941 as defined by TyEnt__cmp_by_all_except_cuOff. */ 2942 static 2943 Word dedup_types_commoning_pass ( /*MOD*/XArray* /* of TyEnt */ ents ) 2944 { 2945 Word n, i, nDeleted; 2946 WordFM* dict; /* TyEnt* -> void */ 2947 TyEnt* ent; 2948 UWord keyW, valW; 2949 2950 dict = VG_(newFM)( 2951 ML_(dinfo_zalloc), "di.readdwarf3.dtcp.1", 2952 ML_(dinfo_free), 2953 (Word(*)(UWord,UWord)) ML_(TyEnt__cmp_by_all_except_cuOff) 2954 ); 2955 2956 nDeleted = 0; 2957 n = VG_(sizeXA)( ents ); 2958 for (i = 0; i < n; i++) { 2959 ent = VG_(indexXA)( ents, i ); 2960 vg_assert(ent->tag != Te_EMPTY); 2961 2962 /* Ignore indirections, although check that they are 2963 not forming a cycle. */ 2964 if (ent->tag == Te_INDIR) { 2965 vg_assert(ent->Te.INDIR.indR < ent->cuOff); 2966 continue; 2967 } 2968 2969 keyW = valW = 0; 2970 if (VG_(lookupFM)( dict, &keyW, &valW, (UWord)ent )) { 2971 /* it's already in the dictionary. */ 2972 TyEnt* old = (TyEnt*)keyW; 2973 vg_assert(valW == 0); 2974 vg_assert(old != ent); 2975 vg_assert(old->tag != Te_INDIR); 2976 /* since we are traversing the array in increasing order of 2977 cuOff: */ 2978 vg_assert(old->cuOff < ent->cuOff); 2979 /* So anyway, dump this entry and replace it with an 2980 indirection to the one in the dictionary. Note that the 2981 assertion above guarantees that we cannot create cycles of 2982 indirections, since we are always creating an indirection 2983 to a tyent with a cuOff lower than this one. */ 2984 ML_(TyEnt__make_EMPTY)( ent ); 2985 ent->tag = Te_INDIR; 2986 ent->Te.INDIR.indR = old->cuOff; 2987 nDeleted++; 2988 } else { 2989 /* not in dictionary; add it and keep going. */ 2990 VG_(addToFM)( dict, (UWord)ent, 0 ); 2991 } 2992 } 2993 2994 VG_(deleteFM)( dict, NULL, NULL ); 2995 2996 return nDeleted; 2997 } 2998 2999 3000 static 3001 void dedup_types ( Bool td3, 3002 /*MOD*/XArray* /* of TyEnt */ ents, 3003 TyEntIndexCache* ents_cache ) 3004 { 3005 Word m, n, i, nDel, nSubst, nThresh; 3006 if (0) td3 = True; 3007 3008 n = VG_(sizeXA)( ents ); 3009 3010 /* If a commoning pass and a substitution pass both make fewer than 3011 this many changes, just stop. It's pointless to burn up CPU 3012 time trying to compress the last 1% or so out of the array. */ 3013 nThresh = n / 200; 3014 3015 /* First we must sort .ents by its .cuOff fields, so we 3016 can index into it. */ 3017 VG_(setCmpFnXA)( 3018 ents, 3019 (Int(*)(void*,void*)) ML_(TyEnt__cmp_by_cuOff_only) 3020 ); 3021 VG_(sortXA)( ents ); 3022 3023 /* Now repeatedly do commoning and substitution passes over 3024 the array, until there are no more changes. */ 3025 do { 3026 nDel = dedup_types_commoning_pass ( ents ); 3027 nSubst = dedup_types_substitution_pass ( ents, ents_cache ); 3028 vg_assert(nDel >= 0 && nSubst >= 0); 3029 TRACE_D3(" %ld deletions, %ld substitutions\n", nDel, nSubst); 3030 } while (nDel > nThresh || nSubst > nThresh); 3031 3032 /* Sanity check: all INDIR nodes should point at a non-INDIR thing. 3033 In fact this should be true at the end of every loop iteration 3034 above (a commoning pass followed by a substitution pass), but 3035 checking it on every iteration is excessively expensive. Note, 3036 this loop also computes 'm' for the stats printing below it. */ 3037 m = 0; 3038 n = VG_(sizeXA)( ents ); 3039 for (i = 0; i < n; i++) { 3040 TyEnt *ent, *ind; 3041 ent = VG_(indexXA)( ents, i ); 3042 if (ent->tag != Te_INDIR) continue; 3043 m++; 3044 ind = ML_(TyEnts__index_by_cuOff)( ents, ents_cache, 3045 ent->Te.INDIR.indR ); 3046 vg_assert(ind); 3047 vg_assert(ind->tag != Te_INDIR); 3048 } 3049 3050 TRACE_D3("Overall: %ld before, %ld after\n", n, n-m); 3051 } 3052 3053 3054 /*------------------------------------------------------------*/ 3055 /*--- ---*/ 3056 /*--- Resolution of references to type DIEs ---*/ 3057 /*--- ---*/ 3058 /*------------------------------------------------------------*/ 3059 3060 /* Make a pass through the (temporary) variables array. Examine the 3061 type of each variable, check is it found, and chase any Te_INDIRs. 3062 Postcondition is: each variable has a typeR field that refers to a 3063 valid type in tyents, or a Te_UNKNOWN, and is certainly guaranteed 3064 not to refer to a Te_INDIR. (This is so that we can throw all the 3065 Te_INDIRs away later). */ 3066 3067 __attribute__((noinline)) 3068 static void resolve_variable_types ( 3069 void (*barf)( HChar* ) __attribute__((noreturn)), 3070 /*R-O*/XArray* /* of TyEnt */ ents, 3071 /*MOD*/TyEntIndexCache* ents_cache, 3072 /*MOD*/XArray* /* of TempVar* */ vars 3073 ) 3074 { 3075 Word i, n; 3076 n = VG_(sizeXA)( vars ); 3077 for (i = 0; i < n; i++) { 3078 TempVar* var = *(TempVar**)VG_(indexXA)( vars, i ); 3079 /* This is the stated type of the variable. But it might be 3080 an indirection, so be careful. */ 3081 TyEnt* ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache, 3082 var->typeR ); 3083 if (ent && ent->tag == Te_INDIR) { 3084 ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache, 3085 ent->Te.INDIR.indR ); 3086 vg_assert(ent); 3087 vg_assert(ent->tag != Te_INDIR); 3088 } 3089 3090 /* Deal first with "normal" cases */ 3091 if (ent && ML_(TyEnt__is_type)(ent)) { 3092 var->typeR = ent->cuOff; 3093 continue; 3094 } 3095 3096 /* If there's no ent, it probably we did not manage to read a 3097 type at the cuOffset which is stated as being this variable's 3098 type. Maybe a deficiency in parse_type_DIE. Complain. */ 3099 if (ent == NULL) { 3100 VG_(printf)("\n: Invalid cuOff = 0x%05lx\n", var->typeR ); 3101 barf("resolve_variable_types: " 3102 "cuOff does not refer to a known type"); 3103 } 3104 vg_assert(ent); 3105 /* If ent has any other tag, something bad happened, along the 3106 lines of var->typeR not referring to a type at all. */ 3107 vg_assert(ent->tag == Te_UNKNOWN); 3108 /* Just accept it; the type will be useless, but at least keep 3109 going. */ 3110 var->typeR = ent->cuOff; 3111 } 3112 } 3113 3114 3115 /*------------------------------------------------------------*/ 3116 /*--- ---*/ 3117 /*--- Parsing of Compilation Units ---*/ 3118 /*--- ---*/ 3119 /*------------------------------------------------------------*/ 3120 3121 static Int cmp_TempVar_by_dioff ( void* v1, void* v2 ) { 3122 TempVar* t1 = *(TempVar**)v1; 3123 TempVar* t2 = *(TempVar**)v2; 3124 if (t1->dioff < t2->dioff) return -1; 3125 if (t1->dioff > t2->dioff) return 1; 3126 return 0; 3127 } 3128 3129 static void read_DIE ( 3130 /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree, 3131 /*MOD*/XArray* /* of TyEnt */ tyents, 3132 /*MOD*/XArray* /* of TempVar* */ tempvars, 3133 /*MOD*/XArray* /* of GExpr* */ gexprs, 3134 /*MOD*/D3TypeParser* typarser, 3135 /*MOD*/D3VarParser* varparser, 3136 Cursor* c, Bool td3, CUConst* cc, Int level 3137 ) 3138 { 3139 Cursor abbv; 3140 ULong atag, abbv_code; 3141 UWord posn; 3142 UInt has_children; 3143 UWord start_die_c_offset, start_abbv_c_offset; 3144 UWord after_die_c_offset, after_abbv_c_offset; 3145 3146 /* --- Deal with this DIE --- */ 3147 posn = get_position_of_Cursor( c ); 3148 abbv_code = get_ULEB128( c ); 3149 set_abbv_Cursor( &abbv, td3, cc, abbv_code ); 3150 atag = get_ULEB128( &abbv ); 3151 TRACE_D3("\n"); 3152 TRACE_D3(" <%d><%lx>: Abbrev Number: %llu (%s)\n", 3153 level, posn, abbv_code, ML_(pp_DW_TAG)( atag ) ); 3154 3155 if (atag == 0) 3156 cc->barf("read_DIE: invalid zero tag on DIE"); 3157 3158 has_children = get_UChar( &abbv ); 3159 if (has_children != DW_children_no && has_children != DW_children_yes) 3160 cc->barf("read_DIE: invalid has_children value"); 3161 3162 /* We're set up to look at the fields of this DIE. Hand it off to 3163 any parser(s) that want to see it. Since they will in general 3164 advance both the DIE and abbrev cursors, remember their current 3165 settings so that we can then back up and do one final pass over 3166 the DIE, to print out its contents. */ 3167 3168 start_die_c_offset = get_position_of_Cursor( c ); 3169 start_abbv_c_offset = get_position_of_Cursor( &abbv ); 3170 3171 while (True) { 3172 ULong cts; 3173 Int ctsSzB; 3174 UWord ctsMemSzB; 3175 ULong at_name = get_ULEB128( &abbv ); 3176 ULong at_form = get_ULEB128( &abbv ); 3177 if (at_name == 0 && at_form == 0) break; 3178 TRACE_D3(" %18s: ", ML_(pp_DW_AT)(at_name)); 3179 /* Get the form contents, but ignore them; the only purpose is 3180 to print them, if td3 is True */ 3181 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 3182 cc, c, td3, (DW_FORM)at_form ); 3183 TRACE_D3("\t"); 3184 TRACE_D3("\n"); 3185 } 3186 3187 after_die_c_offset = get_position_of_Cursor( c ); 3188 after_abbv_c_offset = get_position_of_Cursor( &abbv ); 3189 3190 set_position_of_Cursor( c, start_die_c_offset ); 3191 set_position_of_Cursor( &abbv, start_abbv_c_offset ); 3192 3193 parse_type_DIE( tyents, 3194 typarser, 3195 (DW_TAG)atag, 3196 posn, 3197 level, 3198 c, /* DIE cursor */ 3199 &abbv, /* abbrev cursor */ 3200 cc, 3201 td3 ); 3202 3203 set_position_of_Cursor( c, start_die_c_offset ); 3204 set_position_of_Cursor( &abbv, start_abbv_c_offset ); 3205 3206 parse_var_DIE( rangestree, 3207 tempvars, 3208 gexprs, 3209 varparser, 3210 (DW_TAG)atag, 3211 posn, 3212 level, 3213 c, /* DIE cursor */ 3214 &abbv, /* abbrev cursor */ 3215 cc, 3216 td3 ); 3217 3218 set_position_of_Cursor( c, after_die_c_offset ); 3219 set_position_of_Cursor( &abbv, after_abbv_c_offset ); 3220 3221 /* --- Now recurse into its children, if any --- */ 3222 if (has_children == DW_children_yes) { 3223 if (0) TRACE_D3("BEGIN children of level %d\n", level); 3224 while (True) { 3225 atag = peek_ULEB128( c ); 3226 if (atag == 0) break; 3227 read_DIE( rangestree, tyents, tempvars, gexprs, 3228 typarser, varparser, 3229 c, td3, cc, level+1 ); 3230 } 3231 /* Now we need to eat the terminating zero */ 3232 atag = get_ULEB128( c ); 3233 vg_assert(atag == 0); 3234 if (0) TRACE_D3("END children of level %d\n", level); 3235 } 3236 3237 } 3238 3239 3240 static 3241 void new_dwarf3_reader_wrk ( 3242 struct _DebugInfo* di, 3243 __attribute__((noreturn)) void (*barf)( HChar* ), 3244 UChar* debug_info_img, SizeT debug_info_sz, 3245 UChar* debug_abbv_img, SizeT debug_abbv_sz, 3246 UChar* debug_line_img, SizeT debug_line_sz, 3247 UChar* debug_str_img, SizeT debug_str_sz, 3248 UChar* debug_ranges_img, SizeT debug_ranges_sz, 3249 UChar* debug_loc_img, SizeT debug_loc_sz 3250 ) 3251 { 3252 XArray* /* of TyEnt */ tyents; 3253 XArray* /* of TyEnt */ tyents_to_keep; 3254 XArray* /* of GExpr* */ gexprs; 3255 XArray* /* of TempVar* */ tempvars; 3256 WordFM* /* of (XArray* of AddrRange, void) */ rangestree; 3257 TyEntIndexCache* tyents_cache = NULL; 3258 TyEntIndexCache* tyents_to_keep_cache = NULL; 3259 TempVar *varp, *varp2; 3260 GExpr* gexpr; 3261 Cursor abbv; /* for showing .debug_abbrev */ 3262 Cursor info; /* primary cursor for parsing .debug_info */ 3263 Cursor ranges; /* for showing .debug_ranges */ 3264 D3TypeParser typarser; 3265 D3VarParser varparser; 3266 Addr dr_base; 3267 UWord dr_offset; 3268 Word i, j, n; 3269 Bool td3 = di->trace_symtab; 3270 XArray* /* of TempVar* */ dioff_lookup_tab; 3271 #if 0 3272 /* This doesn't work properly because it assumes all entries are 3273 packed end to end, with no holes. But that doesn't always 3274 appear to be the case, so it loses sync. And the D3 spec 3275 doesn't appear to require a no-hole situation either. */ 3276 /* Display .debug_loc */ 3277 Addr dl_base; 3278 UWord dl_offset; 3279 Cursor loc; /* for showing .debug_loc */ 3280 TRACE_SYMTAB("\n"); 3281 TRACE_SYMTAB("\n------ The contents of .debug_loc ------\n"); 3282 TRACE_SYMTAB(" Offset Begin End Expression\n"); 3283 init_Cursor( &loc, debug_loc_img, 3284 debug_loc_sz, 0, barf, 3285 "Overrun whilst reading .debug_loc section(1)" ); 3286 dl_base = 0; 3287 dl_offset = 0; 3288 while (True) { 3289 UWord w1, w2; 3290 UWord len; 3291 if (is_at_end_Cursor( &loc )) 3292 break; 3293 3294 /* Read a (host-)word pair. This is something of a hack since 3295 the word size to read is really dictated by the ELF file; 3296 however, we assume we're reading a file with the same 3297 word-sizeness as the host. Reasonably enough. */ 3298 w1 = get_UWord( &loc ); 3299 w2 = get_UWord( &loc ); 3300 3301 if (w1 == 0 && w2 == 0) { 3302 /* end of list. reset 'base' */ 3303 TRACE_D3(" %08lx <End of list>\n", dl_offset); 3304 dl_base = 0; 3305 dl_offset = get_position_of_Cursor( &loc ); 3306 continue; 3307 } 3308 3309 if (w1 == -1UL) { 3310 /* new value for 'base' */ 3311 TRACE_D3(" %08lx %16lx %08lx (base address)\n", 3312 dl_offset, w1, w2); 3313 dl_base = w2; 3314 continue; 3315 } 3316 3317 /* else a location expression follows */ 3318 TRACE_D3(" %08lx %08lx %08lx ", 3319 dl_offset, w1 + dl_base, w2 + dl_base); 3320 len = (UWord)get_UShort( &loc ); 3321 while (len > 0) { 3322 UChar byte = get_UChar( &loc ); 3323 TRACE_D3("%02x", (UInt)byte); 3324 len--; 3325 } 3326 TRACE_SYMTAB("\n"); 3327 } 3328 #endif 3329 3330 /* Display .debug_ranges */ 3331 TRACE_SYMTAB("\n"); 3332 TRACE_SYMTAB("\n------ The contents of .debug_ranges ------\n"); 3333 TRACE_SYMTAB(" Offset Begin End\n"); 3334 init_Cursor( &ranges, debug_ranges_img, 3335 debug_ranges_sz, 0, barf, 3336 "Overrun whilst reading .debug_ranges section(1)" ); 3337 dr_base = 0; 3338 dr_offset = 0; 3339 while (True) { 3340 UWord w1, w2; 3341 3342 if (is_at_end_Cursor( &ranges )) 3343 break; 3344 3345 /* Read a (host-)word pair. This is something of a hack since 3346 the word size to read is really dictated by the ELF file; 3347 however, we assume we're reading a file with the same 3348 word-sizeness as the host. Reasonably enough. */ 3349 w1 = get_UWord( &ranges ); 3350 w2 = get_UWord( &ranges ); 3351 3352 if (w1 == 0 && w2 == 0) { 3353 /* end of list. reset 'base' */ 3354 TRACE_D3(" %08lx <End of list>\n", dr_offset); 3355 dr_base = 0; 3356 dr_offset = get_position_of_Cursor( &ranges ); 3357 continue; 3358 } 3359 3360 if (w1 == -1UL) { 3361 /* new value for 'base' */ 3362 TRACE_D3(" %08lx %16lx %08lx (base address)\n", 3363 dr_offset, w1, w2); 3364 dr_base = w2; 3365 continue; 3366 } 3367 3368 /* else a range [w1+base, w2+base) is denoted */ 3369 TRACE_D3(" %08lx %08lx %08lx\n", 3370 dr_offset, w1 + dr_base, w2 + dr_base); 3371 } 3372 3373 /* Display .debug_abbrev */ 3374 init_Cursor( &abbv, debug_abbv_img, debug_abbv_sz, 0, barf, 3375 "Overrun whilst reading .debug_abbrev section" ); 3376 TRACE_SYMTAB("\n"); 3377 TRACE_SYMTAB("\n------ The contents of .debug_abbrev ------\n"); 3378 while (True) { 3379 if (is_at_end_Cursor( &abbv )) 3380 break; 3381 /* Read one abbreviation table */ 3382 TRACE_D3(" Number TAG\n"); 3383 while (True) { 3384 ULong atag; 3385 UInt has_children; 3386 ULong acode = get_ULEB128( &abbv ); 3387 if (acode == 0) break; /* end of the table */ 3388 atag = get_ULEB128( &abbv ); 3389 has_children = get_UChar( &abbv ); 3390 TRACE_D3(" %llu %s [%s]\n", 3391 acode, ML_(pp_DW_TAG)(atag), 3392 ML_(pp_DW_children)(has_children)); 3393 while (True) { 3394 ULong at_name = get_ULEB128( &abbv ); 3395 ULong at_form = get_ULEB128( &abbv ); 3396 if (at_name == 0 && at_form == 0) break; 3397 TRACE_D3(" %18s %s\n", 3398 ML_(pp_DW_AT)(at_name), ML_(pp_DW_FORM)(at_form)); 3399 } 3400 } 3401 } 3402 TRACE_SYMTAB("\n"); 3403 3404 /* Now loop over the Compilation Units listed in the .debug_info 3405 section (see D3SPEC sec 7.5) paras 1 and 2. Each compilation 3406 unit contains a Compilation Unit Header followed by precisely 3407 one DW_TAG_compile_unit or DW_TAG_partial_unit DIE. */ 3408 init_Cursor( &info, debug_info_img, debug_info_sz, 0, barf, 3409 "Overrun whilst reading .debug_info section" ); 3410 3411 /* We'll park the harvested type information in here. Also create 3412 a fake "void" entry with offset D3_FAKEVOID_CUOFF, so we always 3413 have at least one type entry to refer to. D3_FAKEVOID_CUOFF is 3414 huge and presumably will not occur in any valid DWARF3 file -- 3415 it would need to have a .debug_info section 4GB long for that to 3416 happen. These type entries end up in the DebugInfo. */ 3417 tyents = VG_(newXA)( ML_(dinfo_zalloc), 3418 "di.readdwarf3.ndrw.1 (TyEnt temp array)", 3419 ML_(dinfo_free), sizeof(TyEnt) ); 3420 { TyEnt tyent; 3421 VG_(memset)(&tyent, 0, sizeof(tyent)); 3422 tyent.tag = Te_TyVoid; 3423 tyent.cuOff = D3_FAKEVOID_CUOFF; 3424 tyent.Te.TyVoid.isFake = True; 3425 VG_(addToXA)( tyents, &tyent ); 3426 } 3427 { TyEnt tyent; 3428 VG_(memset)(&tyent, 0, sizeof(tyent)); 3429 tyent.tag = Te_UNKNOWN; 3430 tyent.cuOff = D3_INVALID_CUOFF; 3431 VG_(addToXA)( tyents, &tyent ); 3432 } 3433 3434 /* This is a tree used to unique-ify the range lists that are 3435 manufactured by parse_var_DIE. References to the keys in the 3436 tree wind up in .rngMany fields in TempVars. We'll need to 3437 delete this tree, and the XArrays attached to it, at the end of 3438 this function. */ 3439 rangestree = VG_(newFM)( ML_(dinfo_zalloc), 3440 "di.readdwarf3.ndrw.2 (rangestree)", 3441 ML_(dinfo_free), 3442 (Word(*)(UWord,UWord))cmp__XArrays_of_AddrRange ); 3443 3444 /* List of variables we're accumulating. These don't end up in the 3445 DebugInfo; instead their contents are handed to ML_(addVar) and 3446 the list elements are then deleted. */ 3447 tempvars = VG_(newXA)( ML_(dinfo_zalloc), 3448 "di.readdwarf3.ndrw.3 (TempVar*s array)", 3449 ML_(dinfo_free), 3450 sizeof(TempVar*) ); 3451 3452 /* List of GExprs we're accumulating. These wind up in the 3453 DebugInfo. */ 3454 gexprs = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.4", 3455 ML_(dinfo_free), sizeof(GExpr*) ); 3456 3457 /* We need a D3TypeParser to keep track of partially constructed 3458 types. It'll be discarded as soon as we've completed the CU, 3459 since the resulting information is tipped in to 'tyents' as it 3460 is generated. */ 3461 VG_(memset)( &typarser, 0, sizeof(typarser) ); 3462 typarser.sp = -1; 3463 typarser.language = '?'; 3464 for (i = 0; i < N_D3_TYPE_STACK; i++) { 3465 typarser.qparentE[i].tag = Te_EMPTY; 3466 typarser.qparentE[i].cuOff = D3_INVALID_CUOFF; 3467 } 3468 3469 VG_(memset)( &varparser, 0, sizeof(varparser) ); 3470 varparser.sp = -1; 3471 3472 TRACE_D3("\n------ Parsing .debug_info section ------\n"); 3473 while (True) { 3474 UWord cu_start_offset, cu_offset_now; 3475 CUConst cc; 3476 /* It may be that the stated size of this CU is larger than the 3477 amount of stuff actually in it. icc9 seems to generate CUs 3478 thusly. We use these variables to figure out if this is 3479 indeed the case, and if so how many bytes we need to skip to 3480 get to the start of the next CU. Not skipping those bytes 3481 causes us to misidentify the start of the next CU, and it all 3482 goes badly wrong after that (not surprisingly). */ 3483 UWord cu_size_including_IniLen, cu_amount_used; 3484 3485 /* It seems icc9 finishes the DIE info before debug_info_sz 3486 bytes have been used up. So be flexible, and declare the 3487 sequence complete if there is not enough remaining bytes to 3488 hold even the smallest conceivable CU header. (11 bytes I 3489 reckon). */ 3490 /* JRS 23Jan09: I suspect this is no longer necessary now that 3491 the code below contains a 'while (cu_amount_used < 3492 cu_size_including_IniLen ...' style loop, which skips over 3493 any leftover bytes at the end of a CU in the case where the 3494 CU's stated size is larger than its actual size (as 3495 determined by reading all its DIEs). However, for prudence, 3496 I'll leave the following test in place. I can't see that a 3497 CU header can be smaller than 11 bytes, so I don't think 3498 there's any harm possible through the test -- it just adds 3499 robustness. */ 3500 Word avail = get_remaining_length_Cursor( &info ); 3501 if (avail < 11) { 3502 if (avail > 0) 3503 TRACE_D3("new_dwarf3_reader_wrk: warning: " 3504 "%ld unused bytes after end of DIEs\n", avail); 3505 break; 3506 } 3507 3508 /* Check the varparser's stack is in a sane state. */ 3509 vg_assert(varparser.sp == -1); 3510 for (i = 0; i < N_D3_VAR_STACK; i++) { 3511 vg_assert(varparser.ranges[i] == NULL); 3512 vg_assert(varparser.level[i] == 0); 3513 } 3514 for (i = 0; i < N_D3_TYPE_STACK; i++) { 3515 vg_assert(typarser.qparentE[i].cuOff == D3_INVALID_CUOFF); 3516 vg_assert(typarser.qparentE[i].tag == Te_EMPTY); 3517 vg_assert(typarser.qlevel[i] == 0); 3518 } 3519 3520 cu_start_offset = get_position_of_Cursor( &info ); 3521 TRACE_D3("\n"); 3522 TRACE_D3(" Compilation Unit @ offset 0x%lx:\n", cu_start_offset); 3523 /* parse_CU_header initialises the CU's set_abbv_Cursor cache 3524 (saC_cache) */ 3525 parse_CU_Header( &cc, td3, &info, 3526 (UChar*)debug_abbv_img, debug_abbv_sz ); 3527 cc.debug_str_img = debug_str_img; 3528 cc.debug_str_sz = debug_str_sz; 3529 cc.debug_ranges_img = debug_ranges_img; 3530 cc.debug_ranges_sz = debug_ranges_sz; 3531 cc.debug_loc_img = debug_loc_img; 3532 cc.debug_loc_sz = debug_loc_sz; 3533 cc.debug_line_img = debug_line_img; 3534 cc.debug_line_sz = debug_line_sz; 3535 cc.debug_info_img = debug_info_img; 3536 cc.debug_info_sz = debug_info_sz; 3537 cc.cu_start_offset = cu_start_offset; 3538 cc.di = di; 3539 /* The CU's svma can be deduced by looking at the AT_low_pc 3540 value in the top level TAG_compile_unit, which is the topmost 3541 DIE. We'll leave it for the 'varparser' to acquire that info 3542 and fill it in -- since it is the only party to want to know 3543 it. */ 3544 cc.cu_svma_known = False; 3545 cc.cu_svma = 0; 3546 3547 /* Create a fake outermost-level range covering the entire 3548 address range. So we always have *something* to catch all 3549 variable declarations. */ 3550 varstack_push( &cc, &varparser, td3, 3551 unitary_range_list(0UL, ~0UL), 3552 -1, False/*isFunc*/, NULL/*fbGX*/ ); 3553 3554 /* And set up the file name table. When we come across the top 3555 level DIE for this CU (which is what the next call to 3556 read_DIE should process) we will copy all the file names out 3557 of the .debug_line img area and use this table to look up the 3558 copies when we later see filename numbers in DW_TAG_variables 3559 etc. */ 3560 vg_assert(!varparser.filenameTable ); 3561 varparser.filenameTable 3562 = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.5", 3563 ML_(dinfo_free), 3564 sizeof(UChar*) ); 3565 vg_assert(varparser.filenameTable); 3566 3567 /* Now read the one-and-only top-level DIE for this CU. */ 3568 vg_assert(varparser.sp == 0); 3569 read_DIE( rangestree, 3570 tyents, tempvars, gexprs, 3571 &typarser, &varparser, 3572 &info, td3, &cc, 0 ); 3573 3574 cu_offset_now = get_position_of_Cursor( &info ); 3575 3576 if (0) VG_(printf)("Travelled: %lu size %llu\n", 3577 cu_offset_now - cc.cu_start_offset, 3578 cc.unit_length + (cc.is_dw64 ? 12 : 4)); 3579 3580 /* How big the CU claims it is .. */ 3581 cu_size_including_IniLen = cc.unit_length + (cc.is_dw64 ? 12 : 4); 3582 /* .. vs how big we have found it to be */ 3583 cu_amount_used = cu_offset_now - cc.cu_start_offset; 3584 3585 if (1) TRACE_D3("offset now %ld, d-i-size %ld\n", 3586 cu_offset_now, debug_info_sz); 3587 if (cu_offset_now > debug_info_sz) 3588 barf("toplevel DIEs beyond end of CU"); 3589 3590 /* If the CU is bigger than it claims to be, we've got a serious 3591 problem. */ 3592 if (cu_amount_used > cu_size_including_IniLen) 3593 barf("CU's actual size appears to be larger than it claims it is"); 3594 3595 /* If the CU is smaller than it claims to be, we need to skip some 3596 bytes. Loop updates cu_offset_new and cu_amount_used. */ 3597 while (cu_amount_used < cu_size_including_IniLen 3598 && get_remaining_length_Cursor( &info ) > 0) { 3599 if (0) VG_(printf)("SKIP\n"); 3600 (void)get_UChar( &info ); 3601 cu_offset_now = get_position_of_Cursor( &info ); 3602 cu_amount_used = cu_offset_now - cc.cu_start_offset; 3603 } 3604 3605 if (cu_offset_now == debug_info_sz) 3606 break; 3607 3608 /* Preen to level -2. DIEs have level >= 0 so -2 cannot occur 3609 anywhere else at all. Our fake the-entire-address-space 3610 range is at level -1, so preening to -2 should completely 3611 empty the stack out. */ 3612 TRACE_D3("\n"); 3613 varstack_preen( &varparser, td3, -2 ); 3614 /* Similarly, empty the type stack out. */ 3615 typestack_preen( &typarser, td3, -2 ); 3616 /* else keep going */ 3617 3618 TRACE_D3("set_abbv_Cursor cache: %lu queries, %lu misses\n", 3619 cc.saC_cache_queries, cc.saC_cache_misses); 3620 3621 vg_assert(varparser.filenameTable ); 3622 VG_(deleteXA)( varparser.filenameTable ); 3623 varparser.filenameTable = NULL; 3624 } 3625 3626 /* From here on we're post-processing the stuff we got 3627 out of the .debug_info section. */ 3628 if (td3) { 3629 TRACE_D3("\n"); 3630 ML_(pp_TyEnts)(tyents, "Initial type entity (TyEnt) array"); 3631 TRACE_D3("\n"); 3632 TRACE_D3("------ Compressing type entries ------\n"); 3633 } 3634 3635 tyents_cache = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.6", 3636 sizeof(TyEntIndexCache) ); 3637 ML_(TyEntIndexCache__invalidate)( tyents_cache ); 3638 dedup_types( td3, tyents, tyents_cache ); 3639 if (td3) { 3640 TRACE_D3("\n"); 3641 ML_(pp_TyEnts)(tyents, "After type entity (TyEnt) compression"); 3642 } 3643 3644 TRACE_D3("\n"); 3645 TRACE_D3("------ Resolving the types of variables ------\n" ); 3646 resolve_variable_types( barf, tyents, tyents_cache, tempvars ); 3647 3648 /* Copy all the non-INDIR tyents into a new table. For large 3649 .so's, about 90% of the tyents will by now have been resolved to 3650 INDIRs, and we no longer need them, and so don't need to store 3651 them. */ 3652 tyents_to_keep 3653 = VG_(newXA)( ML_(dinfo_zalloc), 3654 "di.readdwarf3.ndrw.7 (TyEnt to-keep array)", 3655 ML_(dinfo_free), sizeof(TyEnt) ); 3656 n = VG_(sizeXA)( tyents ); 3657 for (i = 0; i < n; i++) { 3658 TyEnt* ent = VG_(indexXA)( tyents, i ); 3659 if (ent->tag != Te_INDIR) 3660 VG_(addToXA)( tyents_to_keep, ent ); 3661 } 3662 3663 VG_(deleteXA)( tyents ); 3664 tyents = NULL; 3665 ML_(dinfo_free)( tyents_cache ); 3666 tyents_cache = NULL; 3667 3668 /* Sort tyents_to_keep so we can lookup in it. A complete (if 3669 minor) waste of time, since tyents itself is sorted, but 3670 necessary since VG_(lookupXA) refuses to cooperate if we 3671 don't. */ 3672 VG_(setCmpFnXA)( 3673 tyents_to_keep, 3674 (Int(*)(void*,void*)) ML_(TyEnt__cmp_by_cuOff_only) 3675 ); 3676 VG_(sortXA)( tyents_to_keep ); 3677 3678 /* Enable cacheing on tyents_to_keep */ 3679 tyents_to_keep_cache 3680 = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.8", 3681 sizeof(TyEntIndexCache) ); 3682 ML_(TyEntIndexCache__invalidate)( tyents_to_keep_cache ); 3683 3684 /* And record the tyents in the DebugInfo. We do this before 3685 starting to hand variables to ML_(addVar), since if ML_(addVar) 3686 wants to do debug printing (of the types of said vars) then it 3687 will need the tyents.*/ 3688 vg_assert(!di->admin_tyents); 3689 di->admin_tyents = tyents_to_keep; 3690 3691 /* Bias all the location expressions. */ 3692 TRACE_D3("\n"); 3693 TRACE_D3("------ Biasing the location expressions ------\n" ); 3694 3695 n = VG_(sizeXA)( gexprs ); 3696 for (i = 0; i < n; i++) { 3697 gexpr = *(GExpr**)VG_(indexXA)( gexprs, i ); 3698 bias_GX( gexpr, di ); 3699 } 3700 3701 TRACE_D3("\n"); 3702 TRACE_D3("------ Acquired the following variables: ------\n\n"); 3703 3704 /* Park (pointers to) all the vars in an XArray, so we can look up 3705 abstract origins quickly. The array is sorted (hence, looked-up 3706 by) the .dioff fields. Since the .dioffs should be in strictly 3707 ascending order, there is no need to sort the array after 3708 construction. The ascendingness is however asserted for. */ 3709 dioff_lookup_tab 3710 = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.9", 3711 ML_(dinfo_free), 3712 sizeof(TempVar*) ); 3713 vg_assert(dioff_lookup_tab); 3714 3715 n = VG_(sizeXA)( tempvars ); 3716 for (i = 0; i < n; i++) { 3717 varp = *(TempVar**)VG_(indexXA)( tempvars, i ); 3718 if (i > 0) { 3719 varp2 = *(TempVar**)VG_(indexXA)( tempvars, i-1 ); 3720 /* why should this hold? Only, I think, because we've 3721 constructed the array by reading .debug_info sequentially, 3722 and so the array .dioff fields should reflect that, and be 3723 strictly ascending. */ 3724 vg_assert(varp2->dioff < varp->dioff); 3725 } 3726 VG_(addToXA)( dioff_lookup_tab, &varp ); 3727 } 3728 VG_(setCmpFnXA)( dioff_lookup_tab, cmp_TempVar_by_dioff ); 3729 VG_(sortXA)( dioff_lookup_tab ); /* POINTLESS; FIXME: rm */ 3730 3731 /* Now visit each var. Collect up as much info as possible for 3732 each var and hand it to ML_(addVar). */ 3733 n = VG_(sizeXA)( tempvars ); 3734 for (j = 0; j < n; j++) { 3735 TyEnt* ent; 3736 varp = *(TempVar**)VG_(indexXA)( tempvars, j ); 3737 3738 /* Possibly show .. */ 3739 if (td3) { 3740 VG_(printf)("<%lx> addVar: level %d: %s :: ", 3741 varp->dioff, 3742 varp->level, 3743 varp->name ? varp->name : (UChar*)"<anon_var>" ); 3744 if (varp->typeR) { 3745 ML_(pp_TyEnt_C_ishly)( tyents_to_keep, varp->typeR ); 3746 } else { 3747 VG_(printf)("NULL"); 3748 } 3749 VG_(printf)("\n Loc="); 3750 if (varp->gexpr) { 3751 ML_(pp_GX)(varp->gexpr); 3752 } else { 3753 VG_(printf)("NULL"); 3754 } 3755 VG_(printf)("\n"); 3756 if (varp->fbGX) { 3757 VG_(printf)(" FrB="); 3758 ML_(pp_GX)( varp->fbGX ); 3759 VG_(printf)("\n"); 3760 } else { 3761 VG_(printf)(" FrB=none\n"); 3762 } 3763 VG_(printf)(" declared at: %s:%d\n", 3764 varp->fName ? varp->fName : (UChar*)"NULL", 3765 varp->fLine ); 3766 if (varp->absOri != (UWord)D3_INVALID_CUOFF) 3767 VG_(printf)(" abstract origin: <%lx>\n", varp->absOri); 3768 } 3769 3770 /* Skip variables which have no location. These must be 3771 abstract instances; they are useless as-is since with no 3772 location they have no specified memory location. They will 3773 presumably be referred to via the absOri fields of other 3774 variables. */ 3775 if (!varp->gexpr) { 3776 TRACE_D3(" SKIP (no location)\n\n"); 3777 continue; 3778 } 3779 3780 /* So it has a location, at least. If it refers to some other 3781 entry through its absOri field, pull in further info through 3782 that. */ 3783 if (varp->absOri != (UWord)D3_INVALID_CUOFF) { 3784 Bool found; 3785 Word ixFirst, ixLast; 3786 TempVar key; 3787 TempVar* keyp = &key; 3788 TempVar *varAI; 3789 VG_(memset)(&key, 0, sizeof(key)); /* not necessary */ 3790 key.dioff = varp->absOri; /* this is what we want to find */ 3791 found = VG_(lookupXA)( dioff_lookup_tab, &keyp, 3792 &ixFirst, &ixLast ); 3793 if (!found) { 3794 /* barf("DW_AT_abstract_origin can't be resolved"); */ 3795 TRACE_D3(" SKIP (DW_AT_abstract_origin can't be resolved)\n\n"); 3796 continue; 3797 } 3798 /* If the following fails, there is more than one entry with 3799 the same dioff. Which can't happen. */ 3800 vg_assert(ixFirst == ixLast); 3801 varAI = *(TempVar**)VG_(indexXA)( dioff_lookup_tab, ixFirst ); 3802 /* stay sane */ 3803 vg_assert(varAI); 3804 vg_assert(varAI->dioff == varp->absOri); 3805 3806 /* Copy what useful info we can. */ 3807 if (varAI->typeR && !varp->typeR) 3808 varp->typeR = varAI->typeR; 3809 if (varAI->name && !varp->name) 3810 varp->name = varAI->name; 3811 if (varAI->fName && !varp->fName) 3812 varp->fName = varAI->fName; 3813 if (varAI->fLine > 0 && varp->fLine == 0) 3814 varp->fLine = varAI->fLine; 3815 } 3816 3817 /* Give it a name if it doesn't have one. */ 3818 if (!varp->name) 3819 varp->name = ML_(addStr)( di, "<anon_var>", -1 ); 3820 3821 /* So now does it have enough info to be useful? */ 3822 /* NOTE: re typeR: this is a hack. If typeR is Te_UNKNOWN then 3823 the type didn't get resolved. Really, in that case 3824 something's broken earlier on, and should be fixed, rather 3825 than just skipping the variable. */ 3826 ent = ML_(TyEnts__index_by_cuOff)( tyents_to_keep, 3827 tyents_to_keep_cache, 3828 varp->typeR ); 3829 /* The next two assertions should be guaranteed by 3830 our previous call to resolve_variable_types. */ 3831 vg_assert(ent); 3832 vg_assert(ML_(TyEnt__is_type)(ent) || ent->tag == Te_UNKNOWN); 3833 3834 if (ent->tag == Te_UNKNOWN) continue; 3835 3836 vg_assert(varp->gexpr); 3837 vg_assert(varp->name); 3838 vg_assert(varp->typeR); 3839 vg_assert(varp->level >= 0); 3840 3841 /* Ok. So we're going to keep it. Call ML_(addVar) once for 3842 each address range in which the variable exists. */ 3843 TRACE_D3(" ACQUIRE for range(s) "); 3844 { AddrRange oneRange; 3845 AddrRange* varPcRanges; 3846 Word nVarPcRanges; 3847 /* Set up to iterate over address ranges, however 3848 represented. */ 3849 if (varp->nRanges == 0 || varp->nRanges == 1) { 3850 vg_assert(!varp->rngMany); 3851 if (varp->nRanges == 0) { 3852 vg_assert(varp->rngOneMin == 0); 3853 vg_assert(varp->rngOneMax == 0); 3854 } 3855 nVarPcRanges = varp->nRanges; 3856 oneRange.aMin = varp->rngOneMin; 3857 oneRange.aMax = varp->rngOneMax; 3858 varPcRanges = &oneRange; 3859 } else { 3860 vg_assert(varp->rngMany); 3861 vg_assert(varp->rngOneMin == 0); 3862 vg_assert(varp->rngOneMax == 0); 3863 nVarPcRanges = VG_(sizeXA)(varp->rngMany); 3864 vg_assert(nVarPcRanges >= 2); 3865 vg_assert(nVarPcRanges == (Word)varp->nRanges); 3866 varPcRanges = VG_(indexXA)(varp->rngMany, 0); 3867 } 3868 if (varp->level == 0) 3869 vg_assert( nVarPcRanges == 1 ); 3870 /* and iterate */ 3871 for (i = 0; i < nVarPcRanges; i++) { 3872 Addr pcMin = varPcRanges[i].aMin; 3873 Addr pcMax = varPcRanges[i].aMax; 3874 vg_assert(pcMin <= pcMax); 3875 /* Level 0 is the global address range. So at level 0 we 3876 don't want to bias pcMin/pcMax; but at all other levels 3877 we do since those are derived from svmas in the Dwarf 3878 we're reading. Be paranoid ... */ 3879 if (varp->level == 0) { 3880 vg_assert(pcMin == (Addr)0); 3881 vg_assert(pcMax == ~(Addr)0); 3882 } else { 3883 /* vg_assert(pcMin > (Addr)0); 3884 No .. we can legitimately expect to see ranges like 3885 0x0-0x11D (pre-biasing, of course). */ 3886 vg_assert(pcMax < ~(Addr)0); 3887 } 3888 3889 /* Apply text biasing, for non-global variables. */ 3890 if (varp->level > 0) { 3891 pcMin += di->text_debug_bias; 3892 pcMax += di->text_debug_bias; 3893 } 3894 3895 if (i > 0 && (i%2) == 0) 3896 TRACE_D3("\n "); 3897 TRACE_D3("[%#lx,%#lx] ", pcMin, pcMax ); 3898 3899 ML_(addVar)( 3900 di, varp->level, 3901 pcMin, pcMax, 3902 varp->name, varp->typeR, 3903 varp->gexpr, varp->fbGX, 3904 varp->fName, varp->fLine, td3 3905 ); 3906 } 3907 } 3908 3909 TRACE_D3("\n\n"); 3910 /* and move on to the next var */ 3911 } 3912 3913 /* Now free all the TempVars */ 3914 n = VG_(sizeXA)( tempvars ); 3915 for (i = 0; i < n; i++) { 3916 varp = *(TempVar**)VG_(indexXA)( tempvars, i ); 3917 ML_(dinfo_free)(varp); 3918 } 3919 VG_(deleteXA)( tempvars ); 3920 tempvars = NULL; 3921 3922 /* and the temp lookup table */ 3923 VG_(deleteXA)( dioff_lookup_tab ); 3924 3925 /* and the ranges tree. Note that we need to also free the XArrays 3926 which constitute the keys, hence pass VG_(deleteXA) as a 3927 key-finalizer. */ 3928 VG_(deleteFM)( rangestree, (void(*)(UWord))VG_(deleteXA), NULL ); 3929 3930 /* and the tyents_to_keep cache */ 3931 ML_(dinfo_free)( tyents_to_keep_cache ); 3932 tyents_to_keep_cache = NULL; 3933 3934 /* and the file name table (just the array, not the entries 3935 themselves). (Apparently, 2008-Oct-23, varparser.filenameTable 3936 can be NULL here, for icc9 generated Dwarf3. Not sure what that 3937 signifies (a deeper problem with the reader?)) */ 3938 if (varparser.filenameTable) { 3939 VG_(deleteXA)( varparser.filenameTable ); 3940 varparser.filenameTable = NULL; 3941 } 3942 3943 /* record the GExprs in di so they can be freed later */ 3944 vg_assert(!di->admin_gexprs); 3945 di->admin_gexprs = gexprs; 3946 } 3947 3948 3949 /*------------------------------------------------------------*/ 3950 /*--- ---*/ 3951 /*--- The "new" DWARF3 reader -- top level control logic ---*/ 3952 /*--- ---*/ 3953 /*------------------------------------------------------------*/ 3954 3955 /* --- !!! --- EXTERNAL HEADERS start --- !!! --- */ 3956 #include <setjmp.h> /* For jmp_buf */ 3957 /* --- !!! --- EXTERNAL HEADERS end --- !!! --- */ 3958 3959 static Bool d3rd_jmpbuf_valid = False; 3960 static HChar* d3rd_jmpbuf_reason = NULL; 3961 static jmp_buf d3rd_jmpbuf; 3962 3963 static __attribute__((noreturn)) void barf ( HChar* reason ) { 3964 vg_assert(d3rd_jmpbuf_valid); 3965 d3rd_jmpbuf_reason = reason; 3966 __builtin_longjmp(&d3rd_jmpbuf, 1); 3967 /*NOTREACHED*/ 3968 vg_assert(0); 3969 } 3970 3971 3972 void 3973 ML_(new_dwarf3_reader) ( 3974 struct _DebugInfo* di, 3975 UChar* debug_info_img, SizeT debug_info_sz, 3976 UChar* debug_abbv_img, SizeT debug_abbv_sz, 3977 UChar* debug_line_img, SizeT debug_line_sz, 3978 UChar* debug_str_img, SizeT debug_str_sz, 3979 UChar* debug_ranges_img, SizeT debug_ranges_sz, 3980 UChar* debug_loc_img, SizeT debug_loc_sz 3981 ) 3982 { 3983 volatile Int jumped; 3984 volatile Bool td3 = di->trace_symtab; 3985 3986 /* Run the _wrk function to read the dwarf3. If it succeeds, it 3987 just returns normally. If there is any failure, it longjmp's 3988 back here, having first set d3rd_jmpbuf_reason to something 3989 useful. */ 3990 vg_assert(d3rd_jmpbuf_valid == False); 3991 vg_assert(d3rd_jmpbuf_reason == NULL); 3992 3993 d3rd_jmpbuf_valid = True; 3994 jumped = __builtin_setjmp(&d3rd_jmpbuf); 3995 if (jumped == 0) { 3996 /* try this ... */ 3997 new_dwarf3_reader_wrk( di, barf, 3998 debug_info_img, debug_info_sz, 3999 debug_abbv_img, debug_abbv_sz, 4000 debug_line_img, debug_line_sz, 4001 debug_str_img, debug_str_sz, 4002 debug_ranges_img, debug_ranges_sz, 4003 debug_loc_img, debug_loc_sz ); 4004 d3rd_jmpbuf_valid = False; 4005 TRACE_D3("\n------ .debug_info reading was successful ------\n"); 4006 } else { 4007 /* It longjmp'd. */ 4008 d3rd_jmpbuf_valid = False; 4009 /* Can't longjump without giving some sort of reason. */ 4010 vg_assert(d3rd_jmpbuf_reason != NULL); 4011 4012 TRACE_D3("\n------ .debug_info reading failed ------\n"); 4013 4014 ML_(symerr)(di, True, d3rd_jmpbuf_reason); 4015 } 4016 4017 d3rd_jmpbuf_valid = False; 4018 d3rd_jmpbuf_reason = NULL; 4019 } 4020 4021 4022 4023 /* --- Unused code fragments which might be useful one day. --- */ 4024 4025 #if 0 4026 /* Read the arange tables */ 4027 TRACE_SYMTAB("\n"); 4028 TRACE_SYMTAB("\n------ The contents of .debug_arange ------\n"); 4029 init_Cursor( &aranges, debug_aranges_img, 4030 debug_aranges_sz, 0, barf, 4031 "Overrun whilst reading .debug_aranges section" ); 4032 while (True) { 4033 ULong len, d_i_offset; 4034 Bool is64; 4035 UShort version; 4036 UChar asize, segsize; 4037 4038 if (is_at_end_Cursor( &aranges )) 4039 break; 4040 /* Read one arange thingy */ 4041 /* initial_length field */ 4042 len = get_Initial_Length( &is64, &aranges, 4043 "in .debug_aranges: invalid initial-length field" ); 4044 version = get_UShort( &aranges ); 4045 d_i_offset = get_Dwarfish_UWord( &aranges, is64 ); 4046 asize = get_UChar( &aranges ); 4047 segsize = get_UChar( &aranges ); 4048 TRACE_D3(" Length: %llu\n", len); 4049 TRACE_D3(" Version: %d\n", (Int)version); 4050 TRACE_D3(" Offset into .debug_info: %llx\n", d_i_offset); 4051 TRACE_D3(" Pointer Size: %d\n", (Int)asize); 4052 TRACE_D3(" Segment Size: %d\n", (Int)segsize); 4053 TRACE_D3("\n"); 4054 TRACE_D3(" Address Length\n"); 4055 4056 while ((get_position_of_Cursor( &aranges ) % (2 * asize)) > 0) { 4057 (void)get_UChar( & aranges ); 4058 } 4059 while (True) { 4060 ULong address = get_Dwarfish_UWord( &aranges, asize==8 ); 4061 ULong length = get_Dwarfish_UWord( &aranges, asize==8 ); 4062 TRACE_D3(" 0x%016llx 0x%llx\n", address, length); 4063 if (address == 0 && length == 0) break; 4064 } 4065 } 4066 TRACE_SYMTAB("\n"); 4067 #endif 4068 4069 #endif // defined(VGO_linux) || defined(VGO_darwin) 4070 4071 /*--------------------------------------------------------------------*/ 4072 /*--- end ---*/ 4073 /*--------------------------------------------------------------------*/ 4074