1 // Copyright (c) 2010 Google Inc. 2 // All rights reserved. 3 // 4 // Redistribution and use in source and binary forms, with or without 5 // modification, are permitted provided that the following conditions are 6 // met: 7 // 8 // * Redistributions of source code must retain the above copyright 9 // notice, this list of conditions and the following disclaimer. 10 // * Redistributions in binary form must reproduce the above 11 // copyright notice, this list of conditions and the following disclaimer 12 // in the documentation and/or other materials provided with the 13 // distribution. 14 // * Neither the name of Google Inc. nor the names of its 15 // contributors may be used to endorse or promote products derived from 16 // this software without specific prior written permission. 17 // 18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30 // Original author: Jim Blandy <jimb (at) mozilla.com> <jimb (at) red-bean.com> 31 32 // Implement the DwarfCUToModule class; see dwarf_cu_to_module.h. 33 34 // For <inttypes.h> PRI* macros, before anything else might #include it. 35 #ifndef __STDC_FORMAT_MACROS 36 #define __STDC_FORMAT_MACROS 37 #endif /* __STDC_FORMAT_MACROS */ 38 39 #include "common/dwarf_cu_to_module.h" 40 41 #include <assert.h> 42 #if !defined(__ANDROID__) 43 #include <cxxabi.h> 44 #endif 45 #include <inttypes.h> 46 #include <stdio.h> 47 48 #include <algorithm> 49 #include <utility> 50 51 #include "common/dwarf_line_to_module.h" 52 #include "common/unordered.h" 53 54 namespace google_breakpad { 55 56 using std::map; 57 using std::pair; 58 using std::sort; 59 using std::vector; 60 61 // Data provided by a DWARF specification DIE. 62 // 63 // In DWARF, the DIE for a definition may contain a DW_AT_specification 64 // attribute giving the offset of the corresponding declaration DIE, and 65 // the definition DIE may omit information given in the declaration. For 66 // example, it's common for a function's address range to appear only in 67 // its definition DIE, but its name to appear only in its declaration 68 // DIE. 69 // 70 // The dumper needs to be able to follow DW_AT_specification links to 71 // bring all this information together in a FUNC record. Conveniently, 72 // DIEs that are the target of such links have a DW_AT_declaration flag 73 // set, so we can identify them when we first see them, and record their 74 // contents for later reference. 75 // 76 // A Specification holds information gathered from a declaration DIE that 77 // we may need if we find a DW_AT_specification link pointing to it. 78 struct DwarfCUToModule::Specification { 79 // The qualified name that can be found by demangling DW_AT_MIPS_linkage_name. 80 string qualified_name; 81 82 // The name of the enclosing scope, or the empty string if there is none. 83 string enclosing_name; 84 85 // The name for the specification DIE itself, without any enclosing 86 // name components. 87 string unqualified_name; 88 }; 89 90 // An abstract origin -- base definition of an inline function. 91 struct AbstractOrigin { 92 AbstractOrigin() : name() {} 93 explicit AbstractOrigin(const string& name) : name(name) {} 94 95 string name; 96 }; 97 98 typedef map<uint64, AbstractOrigin> AbstractOriginByOffset; 99 100 // Data global to the DWARF-bearing file that is private to the 101 // DWARF-to-Module process. 102 struct DwarfCUToModule::FilePrivate { 103 // A set of strings used in this CU. Before storing a string in one of 104 // our data structures, insert it into this set, and then use the string 105 // from the set. 106 // 107 // In some STL implementations, strings are reference-counted internally, 108 // meaning that simply using strings from this set, even if passed by 109 // value, assigned, or held directly in structures and containers 110 // (map<string, ...>, for example), causes those strings to share a 111 // single instance of each distinct piece of text. GNU's libstdc++ uses 112 // reference counts, and I believe MSVC did as well, at some point. 113 // However, C++ '11 implementations are moving away from reference 114 // counting. 115 // 116 // In other implementations, string assignments copy the string's text, 117 // so this set will actually hold yet another copy of the string (although 118 // everything will still work). To improve memory consumption portably, 119 // we will probably need to use pointers to strings held in this set. 120 unordered_set<string> common_strings; 121 122 // A map from offsets of DIEs within the .debug_info section to 123 // Specifications describing those DIEs. Specification references can 124 // cross compilation unit boundaries. 125 SpecificationByOffset specifications; 126 127 AbstractOriginByOffset origins; 128 }; 129 130 DwarfCUToModule::FileContext::FileContext(const string &filename, 131 Module *module, 132 bool handle_inter_cu_refs) 133 : filename_(filename), 134 module_(module), 135 handle_inter_cu_refs_(handle_inter_cu_refs), 136 file_private_(new FilePrivate()) { 137 } 138 139 DwarfCUToModule::FileContext::~FileContext() { 140 } 141 142 void DwarfCUToModule::FileContext::AddSectionToSectionMap( 143 const string& name, const char* contents, uint64 length) { 144 section_map_[name] = std::make_pair(contents, length); 145 } 146 147 void DwarfCUToModule::FileContext::ClearSectionMapForTest() { 148 section_map_.clear(); 149 } 150 151 const dwarf2reader::SectionMap& 152 DwarfCUToModule::FileContext::section_map() const { 153 return section_map_; 154 } 155 156 void DwarfCUToModule::FileContext::ClearSpecifications() { 157 if (!handle_inter_cu_refs_) 158 file_private_->specifications.clear(); 159 } 160 161 bool DwarfCUToModule::FileContext::IsUnhandledInterCUReference( 162 uint64 offset, uint64 compilation_unit_start) const { 163 if (handle_inter_cu_refs_) 164 return false; 165 return offset < compilation_unit_start; 166 } 167 168 // Information global to the particular compilation unit we're 169 // parsing. This is for data shared across the CU's entire DIE tree, 170 // and parameters from the code invoking the CU parser. 171 struct DwarfCUToModule::CUContext { 172 CUContext(FileContext *file_context_arg, WarningReporter *reporter_arg) 173 : file_context(file_context_arg), 174 reporter(reporter_arg), 175 language(Language::CPlusPlus) {} 176 177 ~CUContext() { 178 for (vector<Module::Function *>::iterator it = functions.begin(); 179 it != functions.end(); ++it) { 180 delete *it; 181 } 182 }; 183 184 // The DWARF-bearing file into which this CU was incorporated. 185 FileContext *file_context; 186 187 // For printing error messages. 188 WarningReporter *reporter; 189 190 // The source language of this compilation unit. 191 const Language *language; 192 193 // The functions defined in this compilation unit. We accumulate 194 // them here during parsing. Then, in DwarfCUToModule::Finish, we 195 // assign them lines and add them to file_context->module. 196 // 197 // Destroying this destroys all the functions this vector points to. 198 vector<Module::Function *> functions; 199 }; 200 201 // Information about the context of a particular DIE. This is for 202 // information that changes as we descend the tree towards the leaves: 203 // the containing classes/namespaces, etc. 204 struct DwarfCUToModule::DIEContext { 205 // The fully-qualified name of the context. For example, for a 206 // tree like: 207 // 208 // DW_TAG_namespace Foo 209 // DW_TAG_class Bar 210 // DW_TAG_subprogram Baz 211 // 212 // in a C++ compilation unit, the DIEContext's name for the 213 // DW_TAG_subprogram DIE would be "Foo::Bar". The DIEContext's 214 // name for the DW_TAG_namespace DIE would be "". 215 string name; 216 }; 217 218 // An abstract base class for all the dumper's DIE handlers. 219 class DwarfCUToModule::GenericDIEHandler: public dwarf2reader::DIEHandler { 220 public: 221 // Create a handler for the DIE at OFFSET whose compilation unit is 222 // described by CU_CONTEXT, and whose immediate context is described 223 // by PARENT_CONTEXT. 224 GenericDIEHandler(CUContext *cu_context, DIEContext *parent_context, 225 uint64 offset) 226 : cu_context_(cu_context), 227 parent_context_(parent_context), 228 offset_(offset), 229 declaration_(false), 230 specification_(NULL) { } 231 232 // Derived classes' ProcessAttributeUnsigned can defer to this to 233 // handle DW_AT_declaration, or simply not override it. 234 void ProcessAttributeUnsigned(enum DwarfAttribute attr, 235 enum DwarfForm form, 236 uint64 data); 237 238 // Derived classes' ProcessAttributeReference can defer to this to 239 // handle DW_AT_specification, or simply not override it. 240 void ProcessAttributeReference(enum DwarfAttribute attr, 241 enum DwarfForm form, 242 uint64 data); 243 244 // Derived classes' ProcessAttributeReference can defer to this to 245 // handle DW_AT_specification, or simply not override it. 246 void ProcessAttributeString(enum DwarfAttribute attr, 247 enum DwarfForm form, 248 const string &data); 249 250 protected: 251 // Compute and return the fully-qualified name of the DIE. If this 252 // DIE is a declaration DIE, to be cited by other DIEs' 253 // DW_AT_specification attributes, record its enclosing name and 254 // unqualified name in the specification table. 255 // 256 // Use this from EndAttributes member functions, not ProcessAttribute* 257 // functions; only the former can be sure that all the DIE's attributes 258 // have been seen. 259 string ComputeQualifiedName(); 260 261 CUContext *cu_context_; 262 DIEContext *parent_context_; 263 uint64 offset_; 264 265 // Place the name in the global set of strings. Even though this looks 266 // like a copy, all the major std::string implementations use reference 267 // counting internally, so the effect is to have all the data structures 268 // share copies of strings whenever possible. 269 // FIXME: Should this return something like a string_ref to avoid the 270 // assumption about how strings are implemented? 271 string AddStringToPool(const string &str); 272 273 // If this DIE has a DW_AT_declaration attribute, this is its value. 274 // It is false on DIEs with no DW_AT_declaration attribute. 275 bool declaration_; 276 277 // If this DIE has a DW_AT_specification attribute, this is the 278 // Specification structure for the DIE the attribute refers to. 279 // Otherwise, this is NULL. 280 Specification *specification_; 281 282 // The value of the DW_AT_name attribute, or the empty string if the 283 // DIE has no such attribute. 284 string name_attribute_; 285 286 // The demangled value of the DW_AT_MIPS_linkage_name attribute, or the empty 287 // string if the DIE has no such attribute or its content could not be 288 // demangled. 289 string demangled_name_; 290 }; 291 292 void DwarfCUToModule::GenericDIEHandler::ProcessAttributeUnsigned( 293 enum DwarfAttribute attr, 294 enum DwarfForm form, 295 uint64 data) { 296 switch (attr) { 297 case dwarf2reader::DW_AT_declaration: declaration_ = (data != 0); break; 298 default: break; 299 } 300 } 301 302 void DwarfCUToModule::GenericDIEHandler::ProcessAttributeReference( 303 enum DwarfAttribute attr, 304 enum DwarfForm form, 305 uint64 data) { 306 switch (attr) { 307 case dwarf2reader::DW_AT_specification: { 308 FileContext *file_context = cu_context_->file_context; 309 if (file_context->IsUnhandledInterCUReference( 310 data, cu_context_->reporter->cu_offset())) { 311 cu_context_->reporter->UnhandledInterCUReference(offset_, data); 312 break; 313 } 314 // Find the Specification to which this attribute refers, and 315 // set specification_ appropriately. We could do more processing 316 // here, but it's better to leave the real work to our 317 // EndAttribute member function, at which point we know we have 318 // seen all the DIE's attributes. 319 SpecificationByOffset *specifications = 320 &file_context->file_private_->specifications; 321 SpecificationByOffset::iterator spec = specifications->find(data); 322 if (spec != specifications->end()) { 323 specification_ = &spec->second; 324 } else { 325 // Technically, there's no reason a DW_AT_specification 326 // couldn't be a forward reference, but supporting that would 327 // be a lot of work (changing to a two-pass structure), and I 328 // don't think any producers we care about ever emit such 329 // things. 330 cu_context_->reporter->UnknownSpecification(offset_, data); 331 } 332 break; 333 } 334 default: break; 335 } 336 } 337 338 string DwarfCUToModule::GenericDIEHandler::AddStringToPool(const string &str) { 339 pair<unordered_set<string>::iterator, bool> result = 340 cu_context_->file_context->file_private_->common_strings.insert(str); 341 return *result.first; 342 } 343 344 void DwarfCUToModule::GenericDIEHandler::ProcessAttributeString( 345 enum DwarfAttribute attr, 346 enum DwarfForm form, 347 const string &data) { 348 switch (attr) { 349 case dwarf2reader::DW_AT_name: 350 name_attribute_ = AddStringToPool(data); 351 break; 352 case dwarf2reader::DW_AT_MIPS_linkage_name: { 353 char* demangled = NULL; 354 int status = -1; 355 #if !defined(__ANDROID__) // Android NDK doesn't provide abi::__cxa_demangle. 356 demangled = abi::__cxa_demangle(data.c_str(), NULL, NULL, &status); 357 #endif 358 if (status != 0) { 359 cu_context_->reporter->DemangleError(data, status); 360 demangled_name_ = ""; 361 break; 362 } 363 if (demangled) { 364 demangled_name_ = AddStringToPool(demangled); 365 free(reinterpret_cast<void*>(demangled)); 366 } 367 break; 368 } 369 default: break; 370 } 371 } 372 373 string DwarfCUToModule::GenericDIEHandler::ComputeQualifiedName() { 374 // Use the demangled name, if one is available. Demangled names are 375 // preferable to those inferred from the DWARF structure because they 376 // include argument types. 377 const string *qualified_name = NULL; 378 if (!demangled_name_.empty()) { 379 // Found it is this DIE. 380 qualified_name = &demangled_name_; 381 } else if (specification_ && !specification_->qualified_name.empty()) { 382 // Found it on the specification. 383 qualified_name = &specification_->qualified_name; 384 } 385 386 const string *unqualified_name; 387 const string *enclosing_name; 388 if (!qualified_name) { 389 // Find our unqualified name. If the DIE has its own DW_AT_name 390 // attribute, then use that; otherwise, check our specification. 391 if (name_attribute_.empty() && specification_) 392 unqualified_name = &specification_->unqualified_name; 393 else 394 unqualified_name = &name_attribute_; 395 396 // Find the name of our enclosing context. If we have a 397 // specification, it's the specification's enclosing context that 398 // counts; otherwise, use this DIE's context. 399 if (specification_) 400 enclosing_name = &specification_->enclosing_name; 401 else 402 enclosing_name = &parent_context_->name; 403 } 404 405 // Prepare the return value before upcoming mutations possibly invalidate the 406 // existing pointers. 407 string return_value; 408 if (qualified_name) { 409 return_value = *qualified_name; 410 } else { 411 // Combine the enclosing name and unqualified name to produce our 412 // own fully-qualified name. 413 return_value = cu_context_->language->MakeQualifiedName(*enclosing_name, 414 *unqualified_name); 415 } 416 417 // If this DIE was marked as a declaration, record its names in the 418 // specification table. 419 if (declaration_) { 420 Specification spec; 421 if (qualified_name) { 422 spec.qualified_name = *qualified_name; 423 } else { 424 spec.enclosing_name = *enclosing_name; 425 spec.unqualified_name = *unqualified_name; 426 } 427 cu_context_->file_context->file_private_->specifications[offset_] = spec; 428 } 429 430 return return_value; 431 } 432 433 // A handler class for DW_TAG_subprogram DIEs. 434 class DwarfCUToModule::FuncHandler: public GenericDIEHandler { 435 public: 436 FuncHandler(CUContext *cu_context, DIEContext *parent_context, 437 uint64 offset) 438 : GenericDIEHandler(cu_context, parent_context, offset), 439 low_pc_(0), high_pc_(0), high_pc_form_(dwarf2reader::DW_FORM_addr), 440 abstract_origin_(NULL), inline_(false) { } 441 void ProcessAttributeUnsigned(enum DwarfAttribute attr, 442 enum DwarfForm form, 443 uint64 data); 444 void ProcessAttributeSigned(enum DwarfAttribute attr, 445 enum DwarfForm form, 446 int64 data); 447 void ProcessAttributeReference(enum DwarfAttribute attr, 448 enum DwarfForm form, 449 uint64 data); 450 451 bool EndAttributes(); 452 void Finish(); 453 454 private: 455 // The fully-qualified name, as derived from name_attribute_, 456 // specification_, parent_context_. Computed in EndAttributes. 457 string name_; 458 uint64 low_pc_, high_pc_; // DW_AT_low_pc, DW_AT_high_pc 459 DwarfForm high_pc_form_; // DW_AT_high_pc can be length or address. 460 const AbstractOrigin* abstract_origin_; 461 bool inline_; 462 }; 463 464 void DwarfCUToModule::FuncHandler::ProcessAttributeUnsigned( 465 enum DwarfAttribute attr, 466 enum DwarfForm form, 467 uint64 data) { 468 switch (attr) { 469 // If this attribute is present at all --- even if its value is 470 // DW_INL_not_inlined --- then GCC may cite it as someone else's 471 // DW_AT_abstract_origin attribute. 472 case dwarf2reader::DW_AT_inline: inline_ = true; break; 473 474 case dwarf2reader::DW_AT_low_pc: low_pc_ = data; break; 475 case dwarf2reader::DW_AT_high_pc: 476 high_pc_form_ = form; 477 high_pc_ = data; 478 break; 479 480 default: 481 GenericDIEHandler::ProcessAttributeUnsigned(attr, form, data); 482 break; 483 } 484 } 485 486 void DwarfCUToModule::FuncHandler::ProcessAttributeSigned( 487 enum DwarfAttribute attr, 488 enum DwarfForm form, 489 int64 data) { 490 switch (attr) { 491 // If this attribute is present at all --- even if its value is 492 // DW_INL_not_inlined --- then GCC may cite it as someone else's 493 // DW_AT_abstract_origin attribute. 494 case dwarf2reader::DW_AT_inline: inline_ = true; break; 495 496 default: 497 break; 498 } 499 } 500 501 void DwarfCUToModule::FuncHandler::ProcessAttributeReference( 502 enum DwarfAttribute attr, 503 enum DwarfForm form, 504 uint64 data) { 505 switch (attr) { 506 case dwarf2reader::DW_AT_abstract_origin: { 507 const AbstractOriginByOffset& origins = 508 cu_context_->file_context->file_private_->origins; 509 AbstractOriginByOffset::const_iterator origin = origins.find(data); 510 if (origin != origins.end()) { 511 abstract_origin_ = &(origin->second); 512 } else { 513 cu_context_->reporter->UnknownAbstractOrigin(offset_, data); 514 } 515 break; 516 } 517 default: 518 GenericDIEHandler::ProcessAttributeReference(attr, form, data); 519 break; 520 } 521 } 522 523 bool DwarfCUToModule::FuncHandler::EndAttributes() { 524 // Compute our name, and record a specification, if appropriate. 525 name_ = ComputeQualifiedName(); 526 if (name_.empty() && abstract_origin_) { 527 name_ = abstract_origin_->name; 528 } 529 return true; 530 } 531 532 void DwarfCUToModule::FuncHandler::Finish() { 533 // Make high_pc_ an address, if it isn't already. 534 if (high_pc_form_ != dwarf2reader::DW_FORM_addr) { 535 high_pc_ += low_pc_; 536 } 537 538 // Did we collect the information we need? Not all DWARF function 539 // entries have low and high addresses (for example, inlined 540 // functions that were never used), but all the ones we're 541 // interested in cover a non-empty range of bytes. 542 if (low_pc_ < high_pc_) { 543 // Malformed DWARF may omit the name, but all Module::Functions must 544 // have names. 545 string name; 546 if (!name_.empty()) { 547 name = name_; 548 } else { 549 cu_context_->reporter->UnnamedFunction(offset_); 550 name = "<name omitted>"; 551 } 552 553 // Create a Module::Function based on the data we've gathered, and 554 // add it to the functions_ list. 555 scoped_ptr<Module::Function> func(new Module::Function(name, low_pc_)); 556 func->size = high_pc_ - low_pc_; 557 func->parameter_size = 0; 558 if (func->address) { 559 // If the function address is zero this is a sign that this function 560 // description is just empty debug data and should just be discarded. 561 cu_context_->functions.push_back(func.release()); 562 } 563 } else if (inline_) { 564 AbstractOrigin origin(name_); 565 cu_context_->file_context->file_private_->origins[offset_] = origin; 566 } 567 } 568 569 // A handler for DIEs that contain functions and contribute a 570 // component to their names: namespaces, classes, etc. 571 class DwarfCUToModule::NamedScopeHandler: public GenericDIEHandler { 572 public: 573 NamedScopeHandler(CUContext *cu_context, DIEContext *parent_context, 574 uint64 offset) 575 : GenericDIEHandler(cu_context, parent_context, offset) { } 576 bool EndAttributes(); 577 DIEHandler *FindChildHandler(uint64 offset, enum DwarfTag tag); 578 579 private: 580 DIEContext child_context_; // A context for our children. 581 }; 582 583 bool DwarfCUToModule::NamedScopeHandler::EndAttributes() { 584 child_context_.name = ComputeQualifiedName(); 585 return true; 586 } 587 588 dwarf2reader::DIEHandler *DwarfCUToModule::NamedScopeHandler::FindChildHandler( 589 uint64 offset, 590 enum DwarfTag tag) { 591 switch (tag) { 592 case dwarf2reader::DW_TAG_subprogram: 593 return new FuncHandler(cu_context_, &child_context_, offset); 594 case dwarf2reader::DW_TAG_namespace: 595 case dwarf2reader::DW_TAG_class_type: 596 case dwarf2reader::DW_TAG_structure_type: 597 case dwarf2reader::DW_TAG_union_type: 598 return new NamedScopeHandler(cu_context_, &child_context_, offset); 599 default: 600 return NULL; 601 } 602 } 603 604 void DwarfCUToModule::WarningReporter::CUHeading() { 605 if (printed_cu_header_) 606 return; 607 fprintf(stderr, "%s: in compilation unit '%s' (offset 0x%llx):\n", 608 filename_.c_str(), cu_name_.c_str(), cu_offset_); 609 printed_cu_header_ = true; 610 } 611 612 void DwarfCUToModule::WarningReporter::UnknownSpecification(uint64 offset, 613 uint64 target) { 614 CUHeading(); 615 fprintf(stderr, "%s: the DIE at offset 0x%llx has a DW_AT_specification" 616 " attribute referring to the die at offset 0x%llx, which either" 617 " was not marked as a declaration, or comes later in the file\n", 618 filename_.c_str(), offset, target); 619 } 620 621 void DwarfCUToModule::WarningReporter::UnknownAbstractOrigin(uint64 offset, 622 uint64 target) { 623 CUHeading(); 624 fprintf(stderr, "%s: the DIE at offset 0x%llx has a DW_AT_abstract_origin" 625 " attribute referring to the die at offset 0x%llx, which either" 626 " was not marked as an inline, or comes later in the file\n", 627 filename_.c_str(), offset, target); 628 } 629 630 void DwarfCUToModule::WarningReporter::MissingSection(const string &name) { 631 CUHeading(); 632 fprintf(stderr, "%s: warning: couldn't find DWARF '%s' section\n", 633 filename_.c_str(), name.c_str()); 634 } 635 636 void DwarfCUToModule::WarningReporter::BadLineInfoOffset(uint64 offset) { 637 CUHeading(); 638 fprintf(stderr, "%s: warning: line number data offset beyond end" 639 " of '.debug_line' section\n", 640 filename_.c_str()); 641 } 642 643 void DwarfCUToModule::WarningReporter::UncoveredHeading() { 644 if (printed_unpaired_header_) 645 return; 646 CUHeading(); 647 fprintf(stderr, "%s: warning: skipping unpaired lines/functions:\n", 648 filename_.c_str()); 649 printed_unpaired_header_ = true; 650 } 651 652 void DwarfCUToModule::WarningReporter::UncoveredFunction( 653 const Module::Function &function) { 654 if (!uncovered_warnings_enabled_) 655 return; 656 UncoveredHeading(); 657 fprintf(stderr, " function%s: %s\n", 658 function.size == 0 ? " (zero-length)" : "", 659 function.name.c_str()); 660 } 661 662 void DwarfCUToModule::WarningReporter::UncoveredLine(const Module::Line &line) { 663 if (!uncovered_warnings_enabled_) 664 return; 665 UncoveredHeading(); 666 fprintf(stderr, " line%s: %s:%d at 0x%" PRIx64 "\n", 667 (line.size == 0 ? " (zero-length)" : ""), 668 line.file->name.c_str(), line.number, line.address); 669 } 670 671 void DwarfCUToModule::WarningReporter::UnnamedFunction(uint64 offset) { 672 CUHeading(); 673 fprintf(stderr, "%s: warning: function at offset 0x%llx has no name\n", 674 filename_.c_str(), offset); 675 } 676 677 void DwarfCUToModule::WarningReporter::DemangleError( 678 const string &input, int error) { 679 CUHeading(); 680 fprintf(stderr, "%s: warning: failed to demangle %s with error %d\n", 681 filename_.c_str(), input.c_str(), error); 682 } 683 684 void DwarfCUToModule::WarningReporter::UnhandledInterCUReference( 685 uint64 offset, uint64 target) { 686 CUHeading(); 687 fprintf(stderr, "%s: warning: the DIE at offset 0x%llx has a " 688 "DW_FORM_ref_addr attribute with an inter-CU reference to " 689 "0x%llx, but inter-CU reference handling is turned off.\n", 690 filename_.c_str(), offset, target); 691 } 692 693 DwarfCUToModule::DwarfCUToModule(FileContext *file_context, 694 LineToModuleHandler *line_reader, 695 WarningReporter *reporter) 696 : line_reader_(line_reader), 697 cu_context_(new CUContext(file_context, reporter)), 698 child_context_(new DIEContext()), 699 has_source_line_info_(false) { 700 } 701 702 DwarfCUToModule::~DwarfCUToModule() { 703 } 704 705 void DwarfCUToModule::ProcessAttributeSigned(enum DwarfAttribute attr, 706 enum DwarfForm form, 707 int64 data) { 708 switch (attr) { 709 case dwarf2reader::DW_AT_language: // source language of this CU 710 SetLanguage(static_cast<DwarfLanguage>(data)); 711 break; 712 default: 713 break; 714 } 715 } 716 717 void DwarfCUToModule::ProcessAttributeUnsigned(enum DwarfAttribute attr, 718 enum DwarfForm form, 719 uint64 data) { 720 switch (attr) { 721 case dwarf2reader::DW_AT_stmt_list: // Line number information. 722 has_source_line_info_ = true; 723 source_line_offset_ = data; 724 break; 725 case dwarf2reader::DW_AT_language: // source language of this CU 726 SetLanguage(static_cast<DwarfLanguage>(data)); 727 break; 728 default: 729 break; 730 } 731 } 732 733 void DwarfCUToModule::ProcessAttributeString(enum DwarfAttribute attr, 734 enum DwarfForm form, 735 const string &data) { 736 switch (attr) { 737 case dwarf2reader::DW_AT_name: 738 cu_context_->reporter->SetCUName(data); 739 break; 740 case dwarf2reader::DW_AT_comp_dir: 741 line_reader_->StartCompilationUnit(data); 742 break; 743 default: 744 break; 745 } 746 } 747 748 bool DwarfCUToModule::EndAttributes() { 749 return true; 750 } 751 752 dwarf2reader::DIEHandler *DwarfCUToModule::FindChildHandler( 753 uint64 offset, 754 enum DwarfTag tag) { 755 switch (tag) { 756 case dwarf2reader::DW_TAG_subprogram: 757 return new FuncHandler(cu_context_.get(), child_context_.get(), offset); 758 case dwarf2reader::DW_TAG_namespace: 759 case dwarf2reader::DW_TAG_class_type: 760 case dwarf2reader::DW_TAG_structure_type: 761 case dwarf2reader::DW_TAG_union_type: 762 return new NamedScopeHandler(cu_context_.get(), child_context_.get(), 763 offset); 764 default: 765 return NULL; 766 } 767 } 768 769 void DwarfCUToModule::SetLanguage(DwarfLanguage language) { 770 switch (language) { 771 case dwarf2reader::DW_LANG_Java: 772 cu_context_->language = Language::Java; 773 break; 774 775 // DWARF has no generic language code for assembly language; this is 776 // what the GNU toolchain uses. 777 case dwarf2reader::DW_LANG_Mips_Assembler: 778 cu_context_->language = Language::Assembler; 779 break; 780 781 // C++ covers so many cases that it probably has some way to cope 782 // with whatever the other languages throw at us. So make it the 783 // default. 784 // 785 // Objective C and Objective C++ seem to create entries for 786 // methods whose DW_AT_name values are already fully-qualified: 787 // "-[Classname method:]". These appear at the top level. 788 // 789 // DWARF data for C should never include namespaces or functions 790 // nested in struct types, but if it ever does, then C++'s 791 // notation is probably not a bad choice for that. 792 default: 793 case dwarf2reader::DW_LANG_ObjC: 794 case dwarf2reader::DW_LANG_ObjC_plus_plus: 795 case dwarf2reader::DW_LANG_C: 796 case dwarf2reader::DW_LANG_C89: 797 case dwarf2reader::DW_LANG_C99: 798 case dwarf2reader::DW_LANG_C_plus_plus: 799 cu_context_->language = Language::CPlusPlus; 800 break; 801 } 802 } 803 804 void DwarfCUToModule::ReadSourceLines(uint64 offset) { 805 const dwarf2reader::SectionMap §ion_map 806 = cu_context_->file_context->section_map(); 807 dwarf2reader::SectionMap::const_iterator map_entry 808 = section_map.find(".debug_line"); 809 // Mac OS X puts DWARF data in sections whose names begin with "__" 810 // instead of ".". 811 if (map_entry == section_map.end()) 812 map_entry = section_map.find("__debug_line"); 813 if (map_entry == section_map.end()) { 814 cu_context_->reporter->MissingSection(".debug_line"); 815 return; 816 } 817 const char *section_start = map_entry->second.first; 818 uint64 section_length = map_entry->second.second; 819 if (offset >= section_length) { 820 cu_context_->reporter->BadLineInfoOffset(offset); 821 return; 822 } 823 line_reader_->ReadProgram(section_start + offset, section_length - offset, 824 cu_context_->file_context->module_, &lines_); 825 } 826 827 namespace { 828 // Return true if ADDRESS falls within the range of ITEM. 829 template <class T> 830 inline bool within(const T &item, Module::Address address) { 831 // Because Module::Address is unsigned, and unsigned arithmetic 832 // wraps around, this will be false if ADDRESS falls before the 833 // start of ITEM, or if it falls after ITEM's end. 834 return address - item.address < item.size; 835 } 836 } 837 838 void DwarfCUToModule::AssignLinesToFunctions() { 839 vector<Module::Function *> *functions = &cu_context_->functions; 840 WarningReporter *reporter = cu_context_->reporter; 841 842 // This would be simpler if we assumed that source line entries 843 // don't cross function boundaries. However, there's no real reason 844 // to assume that (say) a series of function definitions on the same 845 // line wouldn't get coalesced into one line number entry. The 846 // DWARF spec certainly makes no such promises. 847 // 848 // So treat the functions and lines as peers, and take the trouble 849 // to compute their ranges' intersections precisely. In any case, 850 // the hair here is a constant factor for performance; the 851 // complexity from here on out is linear. 852 853 // Put both our functions and lines in order by address. 854 std::sort(functions->begin(), functions->end(), 855 Module::Function::CompareByAddress); 856 std::sort(lines_.begin(), lines_.end(), Module::Line::CompareByAddress); 857 858 // The last line that we used any piece of. We use this only for 859 // generating warnings. 860 const Module::Line *last_line_used = NULL; 861 862 // The last function and line we warned about --- so we can avoid 863 // doing so more than once. 864 const Module::Function *last_function_cited = NULL; 865 const Module::Line *last_line_cited = NULL; 866 867 // Make a single pass through both vectors from lower to higher 868 // addresses, populating each Function's lines vector with lines 869 // from our lines_ vector that fall within the function's address 870 // range. 871 vector<Module::Function *>::iterator func_it = functions->begin(); 872 vector<Module::Line>::const_iterator line_it = lines_.begin(); 873 874 Module::Address current; 875 876 // Pointers to the referents of func_it and line_it, or NULL if the 877 // iterator is at the end of the sequence. 878 Module::Function *func; 879 const Module::Line *line; 880 881 // Start current at the beginning of the first line or function, 882 // whichever is earlier. 883 if (func_it != functions->end() && line_it != lines_.end()) { 884 func = *func_it; 885 line = &*line_it; 886 current = std::min(func->address, line->address); 887 } else if (line_it != lines_.end()) { 888 func = NULL; 889 line = &*line_it; 890 current = line->address; 891 } else if (func_it != functions->end()) { 892 func = *func_it; 893 line = NULL; 894 current = (*func_it)->address; 895 } else { 896 return; 897 } 898 899 while (func || line) { 900 // This loop has two invariants that hold at the top. 901 // 902 // First, at least one of the iterators is not at the end of its 903 // sequence, and those that are not refer to the earliest 904 // function or line that contains or starts after CURRENT. 905 // 906 // Note that every byte is in one of four states: it is covered 907 // or not covered by a function, and, independently, it is 908 // covered or not covered by a line. 909 // 910 // The second invariant is that CURRENT refers to a byte whose 911 // state is different from its predecessor, or it refers to the 912 // first byte in the address space. In other words, CURRENT is 913 // always the address of a transition. 914 // 915 // Note that, although each iteration advances CURRENT from one 916 // transition address to the next in each iteration, it might 917 // not advance the iterators. Suppose we have a function that 918 // starts with a line, has a gap, and then a second line, and 919 // suppose that we enter an iteration with CURRENT at the end of 920 // the first line. The next transition address is the start of 921 // the second line, after the gap, so the iteration should 922 // advance CURRENT to that point. At the head of that iteration, 923 // the invariants require that the line iterator be pointing at 924 // the second line. But this is also true at the head of the 925 // next. And clearly, the iteration must not change the function 926 // iterator. So neither iterator moves. 927 928 // Assert the first invariant (see above). 929 assert(!func || current < func->address || within(*func, current)); 930 assert(!line || current < line->address || within(*line, current)); 931 932 // The next transition after CURRENT. 933 Module::Address next_transition; 934 935 // Figure out which state we're in, add lines or warn, and compute 936 // the next transition address. 937 if (func && current >= func->address) { 938 if (line && current >= line->address) { 939 // Covered by both a line and a function. 940 Module::Address func_left = func->size - (current - func->address); 941 Module::Address line_left = line->size - (current - line->address); 942 // This may overflow, but things work out. 943 next_transition = current + std::min(func_left, line_left); 944 Module::Line l = *line; 945 l.address = current; 946 l.size = next_transition - current; 947 func->lines.push_back(l); 948 last_line_used = line; 949 } else { 950 // Covered by a function, but no line. 951 if (func != last_function_cited) { 952 reporter->UncoveredFunction(*func); 953 last_function_cited = func; 954 } 955 if (line && within(*func, line->address)) 956 next_transition = line->address; 957 else 958 // If this overflows, we'll catch it below. 959 next_transition = func->address + func->size; 960 } 961 } else { 962 if (line && current >= line->address) { 963 // Covered by a line, but no function. 964 // 965 // If GCC emits padding after one function to align the start 966 // of the next, then it will attribute the padding 967 // instructions to the last source line of function (to reduce 968 // the size of the line number info), but omit it from the 969 // DW_AT_{low,high}_pc range given in .debug_info (since it 970 // costs nothing to be precise there). If we did use at least 971 // some of the line we're about to skip, and it ends at the 972 // start of the next function, then assume this is what 973 // happened, and don't warn. 974 if (line != last_line_cited 975 && !(func 976 && line == last_line_used 977 && func->address - line->address == line->size)) { 978 reporter->UncoveredLine(*line); 979 last_line_cited = line; 980 } 981 if (func && within(*line, func->address)) 982 next_transition = func->address; 983 else 984 // If this overflows, we'll catch it below. 985 next_transition = line->address + line->size; 986 } else { 987 // Covered by neither a function nor a line. By the invariant, 988 // both func and line begin after CURRENT. The next transition 989 // is the start of the next function or next line, whichever 990 // is earliest. 991 assert(func || line); 992 if (func && line) 993 next_transition = std::min(func->address, line->address); 994 else if (func) 995 next_transition = func->address; 996 else 997 next_transition = line->address; 998 } 999 } 1000 1001 // If a function or line abuts the end of the address space, then 1002 // next_transition may end up being zero, in which case we've completed 1003 // our pass. Handle that here, instead of trying to deal with it in 1004 // each place we compute next_transition. 1005 if (!next_transition) 1006 break; 1007 1008 // Advance iterators as needed. If lines overlap or functions overlap, 1009 // then we could go around more than once. We don't worry too much 1010 // about what result we produce in that case, just as long as we don't 1011 // hang or crash. 1012 while (func_it != functions->end() 1013 && next_transition >= (*func_it)->address 1014 && !within(**func_it, next_transition)) 1015 func_it++; 1016 func = (func_it != functions->end()) ? *func_it : NULL; 1017 while (line_it != lines_.end() 1018 && next_transition >= line_it->address 1019 && !within(*line_it, next_transition)) 1020 line_it++; 1021 line = (line_it != lines_.end()) ? &*line_it : NULL; 1022 1023 // We must make progress. 1024 assert(next_transition > current); 1025 current = next_transition; 1026 } 1027 } 1028 1029 void DwarfCUToModule::Finish() { 1030 // Assembly language files have no function data, and that gives us 1031 // no place to store our line numbers (even though the GNU toolchain 1032 // will happily produce source line info for assembly language 1033 // files). To avoid spurious warnings about lines we can't assign 1034 // to functions, skip CUs in languages that lack functions. 1035 if (!cu_context_->language->HasFunctions()) 1036 return; 1037 1038 // Read source line info, if we have any. 1039 if (has_source_line_info_) 1040 ReadSourceLines(source_line_offset_); 1041 1042 vector<Module::Function *> *functions = &cu_context_->functions; 1043 1044 // Dole out lines to the appropriate functions. 1045 AssignLinesToFunctions(); 1046 1047 // Add our functions, which now have source lines assigned to them, 1048 // to module_. 1049 cu_context_->file_context->module_->AddFunctions(functions->begin(), 1050 functions->end()); 1051 1052 // Ownership of the function objects has shifted from cu_context to 1053 // the Module. 1054 functions->clear(); 1055 1056 cu_context_->file_context->ClearSpecifications(); 1057 } 1058 1059 bool DwarfCUToModule::StartCompilationUnit(uint64 offset, 1060 uint8 address_size, 1061 uint8 offset_size, 1062 uint64 cu_length, 1063 uint8 dwarf_version) { 1064 return dwarf_version >= 2; 1065 } 1066 1067 bool DwarfCUToModule::StartRootDIE(uint64 offset, enum DwarfTag tag) { 1068 // We don't deal with partial compilation units (the only other tag 1069 // likely to be used for root DIE). 1070 return tag == dwarf2reader::DW_TAG_compile_unit; 1071 } 1072 1073 } // namespace google_breakpad 1074