Home | History | Annotate | Download | only in common
      1 // Copyright (c) 2010 Google Inc.
      2 // All rights reserved.
      3 //
      4 // Redistribution and use in source and binary forms, with or without
      5 // modification, are permitted provided that the following conditions are
      6 // met:
      7 //
      8 //     * Redistributions of source code must retain the above copyright
      9 // notice, this list of conditions and the following disclaimer.
     10 //     * Redistributions in binary form must reproduce the above
     11 // copyright notice, this list of conditions and the following disclaimer
     12 // in the documentation and/or other materials provided with the
     13 // distribution.
     14 //     * Neither the name of Google Inc. nor the names of its
     15 // contributors may be used to endorse or promote products derived from
     16 // this software without specific prior written permission.
     17 //
     18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 
     30 // Original author: Jim Blandy <jimb (at) mozilla.com> <jimb (at) red-bean.com>
     31 
     32 // Implement the DwarfCUToModule class; see dwarf_cu_to_module.h.
     33 
     34 // For <inttypes.h> PRI* macros, before anything else might #include it.
     35 #ifndef __STDC_FORMAT_MACROS
     36 #define __STDC_FORMAT_MACROS
     37 #endif  /* __STDC_FORMAT_MACROS */
     38 
     39 #include "common/dwarf_cu_to_module.h"
     40 
     41 #include <assert.h>
     42 #if !defined(__ANDROID__)
     43 #include <cxxabi.h>
     44 #endif
     45 #include <inttypes.h>
     46 #include <stdio.h>
     47 
     48 #include <algorithm>
     49 #include <utility>
     50 
     51 #include "common/dwarf_line_to_module.h"
     52 #include "common/unordered.h"
     53 
     54 namespace google_breakpad {
     55 
     56 using std::map;
     57 using std::pair;
     58 using std::sort;
     59 using std::vector;
     60 
     61 // Data provided by a DWARF specification DIE.
     62 //
     63 // In DWARF, the DIE for a definition may contain a DW_AT_specification
     64 // attribute giving the offset of the corresponding declaration DIE, and
     65 // the definition DIE may omit information given in the declaration. For
     66 // example, it's common for a function's address range to appear only in
     67 // its definition DIE, but its name to appear only in its declaration
     68 // DIE.
     69 //
     70 // The dumper needs to be able to follow DW_AT_specification links to
     71 // bring all this information together in a FUNC record. Conveniently,
     72 // DIEs that are the target of such links have a DW_AT_declaration flag
     73 // set, so we can identify them when we first see them, and record their
     74 // contents for later reference.
     75 //
     76 // A Specification holds information gathered from a declaration DIE that
     77 // we may need if we find a DW_AT_specification link pointing to it.
     78 struct DwarfCUToModule::Specification {
     79   // The qualified name that can be found by demangling DW_AT_MIPS_linkage_name.
     80   string qualified_name;
     81 
     82   // The name of the enclosing scope, or the empty string if there is none.
     83   string enclosing_name;
     84 
     85   // The name for the specification DIE itself, without any enclosing
     86   // name components.
     87   string unqualified_name;
     88 };
     89 
     90 // An abstract origin -- base definition of an inline function.
     91 struct AbstractOrigin {
     92   AbstractOrigin() : name() {}
     93   explicit AbstractOrigin(const string& name) : name(name) {}
     94 
     95   string name;
     96 };
     97 
     98 typedef map<uint64, AbstractOrigin> AbstractOriginByOffset;
     99 
    100 // Data global to the DWARF-bearing file that is private to the
    101 // DWARF-to-Module process.
    102 struct DwarfCUToModule::FilePrivate {
    103   // A set of strings used in this CU. Before storing a string in one of
    104   // our data structures, insert it into this set, and then use the string
    105   // from the set.
    106   //
    107   // In some STL implementations, strings are reference-counted internally,
    108   // meaning that simply using strings from this set, even if passed by
    109   // value, assigned, or held directly in structures and containers
    110   // (map<string, ...>, for example), causes those strings to share a
    111   // single instance of each distinct piece of text. GNU's libstdc++ uses
    112   // reference counts, and I believe MSVC did as well, at some point.
    113   // However, C++ '11 implementations are moving away from reference
    114   // counting.
    115   //
    116   // In other implementations, string assignments copy the string's text,
    117   // so this set will actually hold yet another copy of the string (although
    118   // everything will still work). To improve memory consumption portably,
    119   // we will probably need to use pointers to strings held in this set.
    120   unordered_set<string> common_strings;
    121 
    122   // A map from offsets of DIEs within the .debug_info section to
    123   // Specifications describing those DIEs. Specification references can
    124   // cross compilation unit boundaries.
    125   SpecificationByOffset specifications;
    126 
    127   AbstractOriginByOffset origins;
    128 };
    129 
    130 DwarfCUToModule::FileContext::FileContext(const string &filename,
    131                                           Module *module,
    132                                           bool handle_inter_cu_refs)
    133     : filename_(filename),
    134       module_(module),
    135       handle_inter_cu_refs_(handle_inter_cu_refs),
    136       file_private_(new FilePrivate()) {
    137 }
    138 
    139 DwarfCUToModule::FileContext::~FileContext() {
    140 }
    141 
    142 void DwarfCUToModule::FileContext::AddSectionToSectionMap(
    143     const string& name, const char* contents, uint64 length) {
    144   section_map_[name] = std::make_pair(contents, length);
    145 }
    146 
    147 void DwarfCUToModule::FileContext::ClearSectionMapForTest() {
    148   section_map_.clear();
    149 }
    150 
    151 const dwarf2reader::SectionMap&
    152 DwarfCUToModule::FileContext::section_map() const {
    153   return section_map_;
    154 }
    155 
    156 void DwarfCUToModule::FileContext::ClearSpecifications() {
    157   if (!handle_inter_cu_refs_)
    158     file_private_->specifications.clear();
    159 }
    160 
    161 bool DwarfCUToModule::FileContext::IsUnhandledInterCUReference(
    162     uint64 offset, uint64 compilation_unit_start) const {
    163   if (handle_inter_cu_refs_)
    164     return false;
    165   return offset < compilation_unit_start;
    166 }
    167 
    168 // Information global to the particular compilation unit we're
    169 // parsing. This is for data shared across the CU's entire DIE tree,
    170 // and parameters from the code invoking the CU parser.
    171 struct DwarfCUToModule::CUContext {
    172   CUContext(FileContext *file_context_arg, WarningReporter *reporter_arg)
    173       : file_context(file_context_arg),
    174         reporter(reporter_arg),
    175         language(Language::CPlusPlus) {}
    176 
    177   ~CUContext() {
    178     for (vector<Module::Function *>::iterator it = functions.begin();
    179          it != functions.end(); ++it) {
    180       delete *it;
    181     }
    182   };
    183 
    184   // The DWARF-bearing file into which this CU was incorporated.
    185   FileContext *file_context;
    186 
    187   // For printing error messages.
    188   WarningReporter *reporter;
    189 
    190   // The source language of this compilation unit.
    191   const Language *language;
    192 
    193   // The functions defined in this compilation unit. We accumulate
    194   // them here during parsing. Then, in DwarfCUToModule::Finish, we
    195   // assign them lines and add them to file_context->module.
    196   //
    197   // Destroying this destroys all the functions this vector points to.
    198   vector<Module::Function *> functions;
    199 };
    200 
    201 // Information about the context of a particular DIE. This is for
    202 // information that changes as we descend the tree towards the leaves:
    203 // the containing classes/namespaces, etc.
    204 struct DwarfCUToModule::DIEContext {
    205   // The fully-qualified name of the context. For example, for a
    206   // tree like:
    207   //
    208   // DW_TAG_namespace Foo
    209   //   DW_TAG_class Bar
    210   //     DW_TAG_subprogram Baz
    211   //
    212   // in a C++ compilation unit, the DIEContext's name for the
    213   // DW_TAG_subprogram DIE would be "Foo::Bar". The DIEContext's
    214   // name for the DW_TAG_namespace DIE would be "".
    215   string name;
    216 };
    217 
    218 // An abstract base class for all the dumper's DIE handlers.
    219 class DwarfCUToModule::GenericDIEHandler: public dwarf2reader::DIEHandler {
    220  public:
    221   // Create a handler for the DIE at OFFSET whose compilation unit is
    222   // described by CU_CONTEXT, and whose immediate context is described
    223   // by PARENT_CONTEXT.
    224   GenericDIEHandler(CUContext *cu_context, DIEContext *parent_context,
    225                     uint64 offset)
    226       : cu_context_(cu_context),
    227         parent_context_(parent_context),
    228         offset_(offset),
    229         declaration_(false),
    230         specification_(NULL) { }
    231 
    232   // Derived classes' ProcessAttributeUnsigned can defer to this to
    233   // handle DW_AT_declaration, or simply not override it.
    234   void ProcessAttributeUnsigned(enum DwarfAttribute attr,
    235                                 enum DwarfForm form,
    236                                 uint64 data);
    237 
    238   // Derived classes' ProcessAttributeReference can defer to this to
    239   // handle DW_AT_specification, or simply not override it.
    240   void ProcessAttributeReference(enum DwarfAttribute attr,
    241                                  enum DwarfForm form,
    242                                  uint64 data);
    243 
    244   // Derived classes' ProcessAttributeReference can defer to this to
    245   // handle DW_AT_specification, or simply not override it.
    246   void ProcessAttributeString(enum DwarfAttribute attr,
    247                               enum DwarfForm form,
    248                               const string &data);
    249 
    250  protected:
    251   // Compute and return the fully-qualified name of the DIE. If this
    252   // DIE is a declaration DIE, to be cited by other DIEs'
    253   // DW_AT_specification attributes, record its enclosing name and
    254   // unqualified name in the specification table.
    255   //
    256   // Use this from EndAttributes member functions, not ProcessAttribute*
    257   // functions; only the former can be sure that all the DIE's attributes
    258   // have been seen.
    259   string ComputeQualifiedName();
    260 
    261   CUContext *cu_context_;
    262   DIEContext *parent_context_;
    263   uint64 offset_;
    264 
    265   // Place the name in the global set of strings. Even though this looks
    266   // like a copy, all the major std::string implementations use reference
    267   // counting internally, so the effect is to have all the data structures
    268   // share copies of strings whenever possible.
    269   // FIXME: Should this return something like a string_ref to avoid the
    270   // assumption about how strings are implemented?
    271   string AddStringToPool(const string &str);
    272 
    273   // If this DIE has a DW_AT_declaration attribute, this is its value.
    274   // It is false on DIEs with no DW_AT_declaration attribute.
    275   bool declaration_;
    276 
    277   // If this DIE has a DW_AT_specification attribute, this is the
    278   // Specification structure for the DIE the attribute refers to.
    279   // Otherwise, this is NULL.
    280   Specification *specification_;
    281 
    282   // The value of the DW_AT_name attribute, or the empty string if the
    283   // DIE has no such attribute.
    284   string name_attribute_;
    285 
    286   // The demangled value of the DW_AT_MIPS_linkage_name attribute, or the empty
    287   // string if the DIE has no such attribute or its content could not be
    288   // demangled.
    289   string demangled_name_;
    290 };
    291 
    292 void DwarfCUToModule::GenericDIEHandler::ProcessAttributeUnsigned(
    293     enum DwarfAttribute attr,
    294     enum DwarfForm form,
    295     uint64 data) {
    296   switch (attr) {
    297     case dwarf2reader::DW_AT_declaration: declaration_ = (data != 0); break;
    298     default: break;
    299   }
    300 }
    301 
    302 void DwarfCUToModule::GenericDIEHandler::ProcessAttributeReference(
    303     enum DwarfAttribute attr,
    304     enum DwarfForm form,
    305     uint64 data) {
    306   switch (attr) {
    307     case dwarf2reader::DW_AT_specification: {
    308       FileContext *file_context = cu_context_->file_context;
    309       if (file_context->IsUnhandledInterCUReference(
    310               data, cu_context_->reporter->cu_offset())) {
    311         cu_context_->reporter->UnhandledInterCUReference(offset_, data);
    312         break;
    313       }
    314       // Find the Specification to which this attribute refers, and
    315       // set specification_ appropriately. We could do more processing
    316       // here, but it's better to leave the real work to our
    317       // EndAttribute member function, at which point we know we have
    318       // seen all the DIE's attributes.
    319       SpecificationByOffset *specifications =
    320           &file_context->file_private_->specifications;
    321       SpecificationByOffset::iterator spec = specifications->find(data);
    322       if (spec != specifications->end()) {
    323         specification_ = &spec->second;
    324       } else {
    325         // Technically, there's no reason a DW_AT_specification
    326         // couldn't be a forward reference, but supporting that would
    327         // be a lot of work (changing to a two-pass structure), and I
    328         // don't think any producers we care about ever emit such
    329         // things.
    330         cu_context_->reporter->UnknownSpecification(offset_, data);
    331       }
    332       break;
    333     }
    334     default: break;
    335   }
    336 }
    337 
    338 string DwarfCUToModule::GenericDIEHandler::AddStringToPool(const string &str) {
    339   pair<unordered_set<string>::iterator, bool> result =
    340     cu_context_->file_context->file_private_->common_strings.insert(str);
    341   return *result.first;
    342 }
    343 
    344 void DwarfCUToModule::GenericDIEHandler::ProcessAttributeString(
    345     enum DwarfAttribute attr,
    346     enum DwarfForm form,
    347     const string &data) {
    348   switch (attr) {
    349     case dwarf2reader::DW_AT_name:
    350       name_attribute_ = AddStringToPool(data);
    351       break;
    352     case dwarf2reader::DW_AT_MIPS_linkage_name: {
    353       char* demangled = NULL;
    354       int status = -1;
    355 #if !defined(__ANDROID__)  // Android NDK doesn't provide abi::__cxa_demangle.
    356       demangled = abi::__cxa_demangle(data.c_str(), NULL, NULL, &status);
    357 #endif
    358       if (status != 0) {
    359         cu_context_->reporter->DemangleError(data, status);
    360         demangled_name_ = "";
    361         break;
    362       }
    363       if (demangled) {
    364         demangled_name_ = AddStringToPool(demangled);
    365         free(reinterpret_cast<void*>(demangled));
    366       }
    367       break;
    368     }
    369     default: break;
    370   }
    371 }
    372 
    373 string DwarfCUToModule::GenericDIEHandler::ComputeQualifiedName() {
    374   // Use the demangled name, if one is available. Demangled names are
    375   // preferable to those inferred from the DWARF structure because they
    376   // include argument types.
    377   const string *qualified_name = NULL;
    378   if (!demangled_name_.empty()) {
    379     // Found it is this DIE.
    380     qualified_name = &demangled_name_;
    381   } else if (specification_ && !specification_->qualified_name.empty()) {
    382     // Found it on the specification.
    383     qualified_name = &specification_->qualified_name;
    384   }
    385 
    386   const string *unqualified_name;
    387   const string *enclosing_name;
    388   if (!qualified_name) {
    389     // Find our unqualified name. If the DIE has its own DW_AT_name
    390     // attribute, then use that; otherwise, check our specification.
    391     if (name_attribute_.empty() && specification_)
    392       unqualified_name = &specification_->unqualified_name;
    393     else
    394       unqualified_name = &name_attribute_;
    395 
    396     // Find the name of our enclosing context. If we have a
    397     // specification, it's the specification's enclosing context that
    398     // counts; otherwise, use this DIE's context.
    399     if (specification_)
    400       enclosing_name = &specification_->enclosing_name;
    401     else
    402       enclosing_name = &parent_context_->name;
    403   }
    404 
    405   // Prepare the return value before upcoming mutations possibly invalidate the
    406   // existing pointers.
    407   string return_value;
    408   if (qualified_name) {
    409     return_value = *qualified_name;
    410   } else {
    411     // Combine the enclosing name and unqualified name to produce our
    412     // own fully-qualified name.
    413     return_value = cu_context_->language->MakeQualifiedName(*enclosing_name,
    414                                                             *unqualified_name);
    415   }
    416 
    417   // If this DIE was marked as a declaration, record its names in the
    418   // specification table.
    419   if (declaration_) {
    420     Specification spec;
    421     if (qualified_name) {
    422       spec.qualified_name = *qualified_name;
    423     } else {
    424       spec.enclosing_name = *enclosing_name;
    425       spec.unqualified_name = *unqualified_name;
    426     }
    427     cu_context_->file_context->file_private_->specifications[offset_] = spec;
    428   }
    429 
    430   return return_value;
    431 }
    432 
    433 // A handler class for DW_TAG_subprogram DIEs.
    434 class DwarfCUToModule::FuncHandler: public GenericDIEHandler {
    435  public:
    436   FuncHandler(CUContext *cu_context, DIEContext *parent_context,
    437               uint64 offset)
    438       : GenericDIEHandler(cu_context, parent_context, offset),
    439         low_pc_(0), high_pc_(0), high_pc_form_(dwarf2reader::DW_FORM_addr),
    440         abstract_origin_(NULL), inline_(false) { }
    441   void ProcessAttributeUnsigned(enum DwarfAttribute attr,
    442                                 enum DwarfForm form,
    443                                 uint64 data);
    444   void ProcessAttributeSigned(enum DwarfAttribute attr,
    445                               enum DwarfForm form,
    446                               int64 data);
    447   void ProcessAttributeReference(enum DwarfAttribute attr,
    448                                  enum DwarfForm form,
    449                                  uint64 data);
    450 
    451   bool EndAttributes();
    452   void Finish();
    453 
    454  private:
    455   // The fully-qualified name, as derived from name_attribute_,
    456   // specification_, parent_context_.  Computed in EndAttributes.
    457   string name_;
    458   uint64 low_pc_, high_pc_; // DW_AT_low_pc, DW_AT_high_pc
    459   DwarfForm high_pc_form_; // DW_AT_high_pc can be length or address.
    460   const AbstractOrigin* abstract_origin_;
    461   bool inline_;
    462 };
    463 
    464 void DwarfCUToModule::FuncHandler::ProcessAttributeUnsigned(
    465     enum DwarfAttribute attr,
    466     enum DwarfForm form,
    467     uint64 data) {
    468   switch (attr) {
    469     // If this attribute is present at all --- even if its value is
    470     // DW_INL_not_inlined --- then GCC may cite it as someone else's
    471     // DW_AT_abstract_origin attribute.
    472     case dwarf2reader::DW_AT_inline:      inline_  = true; break;
    473 
    474     case dwarf2reader::DW_AT_low_pc:      low_pc_  = data; break;
    475     case dwarf2reader::DW_AT_high_pc:
    476       high_pc_form_ = form;
    477       high_pc_ = data;
    478       break;
    479 
    480     default:
    481       GenericDIEHandler::ProcessAttributeUnsigned(attr, form, data);
    482       break;
    483   }
    484 }
    485 
    486 void DwarfCUToModule::FuncHandler::ProcessAttributeSigned(
    487     enum DwarfAttribute attr,
    488     enum DwarfForm form,
    489     int64 data) {
    490   switch (attr) {
    491     // If this attribute is present at all --- even if its value is
    492     // DW_INL_not_inlined --- then GCC may cite it as someone else's
    493     // DW_AT_abstract_origin attribute.
    494     case dwarf2reader::DW_AT_inline:      inline_  = true; break;
    495 
    496     default:
    497       break;
    498   }
    499 }
    500 
    501 void DwarfCUToModule::FuncHandler::ProcessAttributeReference(
    502     enum DwarfAttribute attr,
    503     enum DwarfForm form,
    504     uint64 data) {
    505   switch (attr) {
    506     case dwarf2reader::DW_AT_abstract_origin: {
    507       const AbstractOriginByOffset& origins =
    508           cu_context_->file_context->file_private_->origins;
    509       AbstractOriginByOffset::const_iterator origin = origins.find(data);
    510       if (origin != origins.end()) {
    511         abstract_origin_ = &(origin->second);
    512       } else {
    513         cu_context_->reporter->UnknownAbstractOrigin(offset_, data);
    514       }
    515       break;
    516     }
    517     default:
    518       GenericDIEHandler::ProcessAttributeReference(attr, form, data);
    519       break;
    520   }
    521 }
    522 
    523 bool DwarfCUToModule::FuncHandler::EndAttributes() {
    524   // Compute our name, and record a specification, if appropriate.
    525   name_ = ComputeQualifiedName();
    526   if (name_.empty() && abstract_origin_) {
    527     name_ = abstract_origin_->name;
    528   }
    529   return true;
    530 }
    531 
    532 void DwarfCUToModule::FuncHandler::Finish() {
    533   // Make high_pc_ an address, if it isn't already.
    534   if (high_pc_form_ != dwarf2reader::DW_FORM_addr) {
    535     high_pc_ += low_pc_;
    536   }
    537 
    538   // Did we collect the information we need?  Not all DWARF function
    539   // entries have low and high addresses (for example, inlined
    540   // functions that were never used), but all the ones we're
    541   // interested in cover a non-empty range of bytes.
    542   if (low_pc_ < high_pc_) {
    543     // Malformed DWARF may omit the name, but all Module::Functions must
    544     // have names.
    545     string name;
    546     if (!name_.empty()) {
    547       name = name_;
    548     } else {
    549       cu_context_->reporter->UnnamedFunction(offset_);
    550       name = "<name omitted>";
    551     }
    552 
    553     // Create a Module::Function based on the data we've gathered, and
    554     // add it to the functions_ list.
    555     scoped_ptr<Module::Function> func(new Module::Function(name, low_pc_));
    556     func->size = high_pc_ - low_pc_;
    557     func->parameter_size = 0;
    558     if (func->address) {
    559        // If the function address is zero this is a sign that this function
    560        // description is just empty debug data and should just be discarded.
    561        cu_context_->functions.push_back(func.release());
    562      }
    563   } else if (inline_) {
    564     AbstractOrigin origin(name_);
    565     cu_context_->file_context->file_private_->origins[offset_] = origin;
    566   }
    567 }
    568 
    569 // A handler for DIEs that contain functions and contribute a
    570 // component to their names: namespaces, classes, etc.
    571 class DwarfCUToModule::NamedScopeHandler: public GenericDIEHandler {
    572  public:
    573   NamedScopeHandler(CUContext *cu_context, DIEContext *parent_context,
    574                     uint64 offset)
    575       : GenericDIEHandler(cu_context, parent_context, offset) { }
    576   bool EndAttributes();
    577   DIEHandler *FindChildHandler(uint64 offset, enum DwarfTag tag);
    578 
    579  private:
    580   DIEContext child_context_; // A context for our children.
    581 };
    582 
    583 bool DwarfCUToModule::NamedScopeHandler::EndAttributes() {
    584   child_context_.name = ComputeQualifiedName();
    585   return true;
    586 }
    587 
    588 dwarf2reader::DIEHandler *DwarfCUToModule::NamedScopeHandler::FindChildHandler(
    589     uint64 offset,
    590     enum DwarfTag tag) {
    591   switch (tag) {
    592     case dwarf2reader::DW_TAG_subprogram:
    593       return new FuncHandler(cu_context_, &child_context_, offset);
    594     case dwarf2reader::DW_TAG_namespace:
    595     case dwarf2reader::DW_TAG_class_type:
    596     case dwarf2reader::DW_TAG_structure_type:
    597     case dwarf2reader::DW_TAG_union_type:
    598       return new NamedScopeHandler(cu_context_, &child_context_, offset);
    599     default:
    600       return NULL;
    601   }
    602 }
    603 
    604 void DwarfCUToModule::WarningReporter::CUHeading() {
    605   if (printed_cu_header_)
    606     return;
    607   fprintf(stderr, "%s: in compilation unit '%s' (offset 0x%llx):\n",
    608           filename_.c_str(), cu_name_.c_str(), cu_offset_);
    609   printed_cu_header_ = true;
    610 }
    611 
    612 void DwarfCUToModule::WarningReporter::UnknownSpecification(uint64 offset,
    613                                                             uint64 target) {
    614   CUHeading();
    615   fprintf(stderr, "%s: the DIE at offset 0x%llx has a DW_AT_specification"
    616           " attribute referring to the die at offset 0x%llx, which either"
    617           " was not marked as a declaration, or comes later in the file\n",
    618           filename_.c_str(), offset, target);
    619 }
    620 
    621 void DwarfCUToModule::WarningReporter::UnknownAbstractOrigin(uint64 offset,
    622                                                              uint64 target) {
    623   CUHeading();
    624   fprintf(stderr, "%s: the DIE at offset 0x%llx has a DW_AT_abstract_origin"
    625           " attribute referring to the die at offset 0x%llx, which either"
    626           " was not marked as an inline, or comes later in the file\n",
    627           filename_.c_str(), offset, target);
    628 }
    629 
    630 void DwarfCUToModule::WarningReporter::MissingSection(const string &name) {
    631   CUHeading();
    632   fprintf(stderr, "%s: warning: couldn't find DWARF '%s' section\n",
    633           filename_.c_str(), name.c_str());
    634 }
    635 
    636 void DwarfCUToModule::WarningReporter::BadLineInfoOffset(uint64 offset) {
    637   CUHeading();
    638   fprintf(stderr, "%s: warning: line number data offset beyond end"
    639           " of '.debug_line' section\n",
    640           filename_.c_str());
    641 }
    642 
    643 void DwarfCUToModule::WarningReporter::UncoveredHeading() {
    644   if (printed_unpaired_header_)
    645     return;
    646   CUHeading();
    647   fprintf(stderr, "%s: warning: skipping unpaired lines/functions:\n",
    648           filename_.c_str());
    649   printed_unpaired_header_ = true;
    650 }
    651 
    652 void DwarfCUToModule::WarningReporter::UncoveredFunction(
    653     const Module::Function &function) {
    654   if (!uncovered_warnings_enabled_)
    655     return;
    656   UncoveredHeading();
    657   fprintf(stderr, "    function%s: %s\n",
    658           function.size == 0 ? " (zero-length)" : "",
    659           function.name.c_str());
    660 }
    661 
    662 void DwarfCUToModule::WarningReporter::UncoveredLine(const Module::Line &line) {
    663   if (!uncovered_warnings_enabled_)
    664     return;
    665   UncoveredHeading();
    666   fprintf(stderr, "    line%s: %s:%d at 0x%" PRIx64 "\n",
    667           (line.size == 0 ? " (zero-length)" : ""),
    668           line.file->name.c_str(), line.number, line.address);
    669 }
    670 
    671 void DwarfCUToModule::WarningReporter::UnnamedFunction(uint64 offset) {
    672   CUHeading();
    673   fprintf(stderr, "%s: warning: function at offset 0x%llx has no name\n",
    674           filename_.c_str(), offset);
    675 }
    676 
    677 void DwarfCUToModule::WarningReporter::DemangleError(
    678     const string &input, int error) {
    679   CUHeading();
    680   fprintf(stderr, "%s: warning: failed to demangle %s with error %d\n",
    681           filename_.c_str(), input.c_str(), error);
    682 }
    683 
    684 void DwarfCUToModule::WarningReporter::UnhandledInterCUReference(
    685     uint64 offset, uint64 target) {
    686   CUHeading();
    687   fprintf(stderr, "%s: warning: the DIE at offset 0x%llx has a "
    688                   "DW_FORM_ref_addr attribute with an inter-CU reference to "
    689                   "0x%llx, but inter-CU reference handling is turned off.\n",
    690                   filename_.c_str(), offset, target);
    691 }
    692 
    693 DwarfCUToModule::DwarfCUToModule(FileContext *file_context,
    694                                  LineToModuleHandler *line_reader,
    695                                  WarningReporter *reporter)
    696     : line_reader_(line_reader),
    697       cu_context_(new CUContext(file_context, reporter)),
    698       child_context_(new DIEContext()),
    699       has_source_line_info_(false) {
    700 }
    701 
    702 DwarfCUToModule::~DwarfCUToModule() {
    703 }
    704 
    705 void DwarfCUToModule::ProcessAttributeSigned(enum DwarfAttribute attr,
    706                                              enum DwarfForm form,
    707                                              int64 data) {
    708   switch (attr) {
    709     case dwarf2reader::DW_AT_language: // source language of this CU
    710       SetLanguage(static_cast<DwarfLanguage>(data));
    711       break;
    712     default:
    713       break;
    714   }
    715 }
    716 
    717 void DwarfCUToModule::ProcessAttributeUnsigned(enum DwarfAttribute attr,
    718                                                enum DwarfForm form,
    719                                                uint64 data) {
    720   switch (attr) {
    721     case dwarf2reader::DW_AT_stmt_list: // Line number information.
    722       has_source_line_info_ = true;
    723       source_line_offset_ = data;
    724       break;
    725     case dwarf2reader::DW_AT_language: // source language of this CU
    726       SetLanguage(static_cast<DwarfLanguage>(data));
    727       break;
    728     default:
    729       break;
    730   }
    731 }
    732 
    733 void DwarfCUToModule::ProcessAttributeString(enum DwarfAttribute attr,
    734                                              enum DwarfForm form,
    735                                              const string &data) {
    736   switch (attr) {
    737     case dwarf2reader::DW_AT_name:
    738       cu_context_->reporter->SetCUName(data);
    739       break;
    740     case dwarf2reader::DW_AT_comp_dir:
    741       line_reader_->StartCompilationUnit(data);
    742       break;
    743     default:
    744       break;
    745   }
    746 }
    747 
    748 bool DwarfCUToModule::EndAttributes() {
    749   return true;
    750 }
    751 
    752 dwarf2reader::DIEHandler *DwarfCUToModule::FindChildHandler(
    753     uint64 offset,
    754     enum DwarfTag tag) {
    755   switch (tag) {
    756     case dwarf2reader::DW_TAG_subprogram:
    757       return new FuncHandler(cu_context_.get(), child_context_.get(), offset);
    758     case dwarf2reader::DW_TAG_namespace:
    759     case dwarf2reader::DW_TAG_class_type:
    760     case dwarf2reader::DW_TAG_structure_type:
    761     case dwarf2reader::DW_TAG_union_type:
    762       return new NamedScopeHandler(cu_context_.get(), child_context_.get(),
    763                                    offset);
    764     default:
    765       return NULL;
    766   }
    767 }
    768 
    769 void DwarfCUToModule::SetLanguage(DwarfLanguage language) {
    770   switch (language) {
    771     case dwarf2reader::DW_LANG_Java:
    772       cu_context_->language = Language::Java;
    773       break;
    774 
    775     // DWARF has no generic language code for assembly language; this is
    776     // what the GNU toolchain uses.
    777     case dwarf2reader::DW_LANG_Mips_Assembler:
    778       cu_context_->language = Language::Assembler;
    779       break;
    780 
    781     // C++ covers so many cases that it probably has some way to cope
    782     // with whatever the other languages throw at us. So make it the
    783     // default.
    784     //
    785     // Objective C and Objective C++ seem to create entries for
    786     // methods whose DW_AT_name values are already fully-qualified:
    787     // "-[Classname method:]".  These appear at the top level.
    788     //
    789     // DWARF data for C should never include namespaces or functions
    790     // nested in struct types, but if it ever does, then C++'s
    791     // notation is probably not a bad choice for that.
    792     default:
    793     case dwarf2reader::DW_LANG_ObjC:
    794     case dwarf2reader::DW_LANG_ObjC_plus_plus:
    795     case dwarf2reader::DW_LANG_C:
    796     case dwarf2reader::DW_LANG_C89:
    797     case dwarf2reader::DW_LANG_C99:
    798     case dwarf2reader::DW_LANG_C_plus_plus:
    799       cu_context_->language = Language::CPlusPlus;
    800       break;
    801   }
    802 }
    803 
    804 void DwarfCUToModule::ReadSourceLines(uint64 offset) {
    805   const dwarf2reader::SectionMap &section_map
    806       = cu_context_->file_context->section_map();
    807   dwarf2reader::SectionMap::const_iterator map_entry
    808       = section_map.find(".debug_line");
    809   // Mac OS X puts DWARF data in sections whose names begin with "__"
    810   // instead of ".".
    811   if (map_entry == section_map.end())
    812     map_entry = section_map.find("__debug_line");
    813   if (map_entry == section_map.end()) {
    814     cu_context_->reporter->MissingSection(".debug_line");
    815     return;
    816   }
    817   const char *section_start = map_entry->second.first;
    818   uint64 section_length = map_entry->second.second;
    819   if (offset >= section_length) {
    820     cu_context_->reporter->BadLineInfoOffset(offset);
    821     return;
    822   }
    823   line_reader_->ReadProgram(section_start + offset, section_length - offset,
    824                             cu_context_->file_context->module_, &lines_);
    825 }
    826 
    827 namespace {
    828 // Return true if ADDRESS falls within the range of ITEM.
    829 template <class T>
    830 inline bool within(const T &item, Module::Address address) {
    831   // Because Module::Address is unsigned, and unsigned arithmetic
    832   // wraps around, this will be false if ADDRESS falls before the
    833   // start of ITEM, or if it falls after ITEM's end.
    834   return address - item.address < item.size;
    835 }
    836 }
    837 
    838 void DwarfCUToModule::AssignLinesToFunctions() {
    839   vector<Module::Function *> *functions = &cu_context_->functions;
    840   WarningReporter *reporter = cu_context_->reporter;
    841 
    842   // This would be simpler if we assumed that source line entries
    843   // don't cross function boundaries.  However, there's no real reason
    844   // to assume that (say) a series of function definitions on the same
    845   // line wouldn't get coalesced into one line number entry.  The
    846   // DWARF spec certainly makes no such promises.
    847   //
    848   // So treat the functions and lines as peers, and take the trouble
    849   // to compute their ranges' intersections precisely.  In any case,
    850   // the hair here is a constant factor for performance; the
    851   // complexity from here on out is linear.
    852 
    853   // Put both our functions and lines in order by address.
    854   std::sort(functions->begin(), functions->end(),
    855             Module::Function::CompareByAddress);
    856   std::sort(lines_.begin(), lines_.end(), Module::Line::CompareByAddress);
    857 
    858   // The last line that we used any piece of.  We use this only for
    859   // generating warnings.
    860   const Module::Line *last_line_used = NULL;
    861 
    862   // The last function and line we warned about --- so we can avoid
    863   // doing so more than once.
    864   const Module::Function *last_function_cited = NULL;
    865   const Module::Line *last_line_cited = NULL;
    866 
    867   // Make a single pass through both vectors from lower to higher
    868   // addresses, populating each Function's lines vector with lines
    869   // from our lines_ vector that fall within the function's address
    870   // range.
    871   vector<Module::Function *>::iterator func_it = functions->begin();
    872   vector<Module::Line>::const_iterator line_it = lines_.begin();
    873 
    874   Module::Address current;
    875 
    876   // Pointers to the referents of func_it and line_it, or NULL if the
    877   // iterator is at the end of the sequence.
    878   Module::Function *func;
    879   const Module::Line *line;
    880 
    881   // Start current at the beginning of the first line or function,
    882   // whichever is earlier.
    883   if (func_it != functions->end() && line_it != lines_.end()) {
    884     func = *func_it;
    885     line = &*line_it;
    886     current = std::min(func->address, line->address);
    887   } else if (line_it != lines_.end()) {
    888     func = NULL;
    889     line = &*line_it;
    890     current = line->address;
    891   } else if (func_it != functions->end()) {
    892     func = *func_it;
    893     line = NULL;
    894     current = (*func_it)->address;
    895   } else {
    896     return;
    897   }
    898 
    899   while (func || line) {
    900     // This loop has two invariants that hold at the top.
    901     //
    902     // First, at least one of the iterators is not at the end of its
    903     // sequence, and those that are not refer to the earliest
    904     // function or line that contains or starts after CURRENT.
    905     //
    906     // Note that every byte is in one of four states: it is covered
    907     // or not covered by a function, and, independently, it is
    908     // covered or not covered by a line.
    909     //
    910     // The second invariant is that CURRENT refers to a byte whose
    911     // state is different from its predecessor, or it refers to the
    912     // first byte in the address space. In other words, CURRENT is
    913     // always the address of a transition.
    914     //
    915     // Note that, although each iteration advances CURRENT from one
    916     // transition address to the next in each iteration, it might
    917     // not advance the iterators. Suppose we have a function that
    918     // starts with a line, has a gap, and then a second line, and
    919     // suppose that we enter an iteration with CURRENT at the end of
    920     // the first line. The next transition address is the start of
    921     // the second line, after the gap, so the iteration should
    922     // advance CURRENT to that point. At the head of that iteration,
    923     // the invariants require that the line iterator be pointing at
    924     // the second line. But this is also true at the head of the
    925     // next. And clearly, the iteration must not change the function
    926     // iterator. So neither iterator moves.
    927 
    928     // Assert the first invariant (see above).
    929     assert(!func || current < func->address || within(*func, current));
    930     assert(!line || current < line->address || within(*line, current));
    931 
    932     // The next transition after CURRENT.
    933     Module::Address next_transition;
    934 
    935     // Figure out which state we're in, add lines or warn, and compute
    936     // the next transition address.
    937     if (func && current >= func->address) {
    938       if (line && current >= line->address) {
    939         // Covered by both a line and a function.
    940         Module::Address func_left = func->size - (current - func->address);
    941         Module::Address line_left = line->size - (current - line->address);
    942         // This may overflow, but things work out.
    943         next_transition = current + std::min(func_left, line_left);
    944         Module::Line l = *line;
    945         l.address = current;
    946         l.size = next_transition - current;
    947         func->lines.push_back(l);
    948         last_line_used = line;
    949       } else {
    950         // Covered by a function, but no line.
    951         if (func != last_function_cited) {
    952           reporter->UncoveredFunction(*func);
    953           last_function_cited = func;
    954         }
    955         if (line && within(*func, line->address))
    956           next_transition = line->address;
    957         else
    958           // If this overflows, we'll catch it below.
    959           next_transition = func->address + func->size;
    960       }
    961     } else {
    962       if (line && current >= line->address) {
    963         // Covered by a line, but no function.
    964         //
    965         // If GCC emits padding after one function to align the start
    966         // of the next, then it will attribute the padding
    967         // instructions to the last source line of function (to reduce
    968         // the size of the line number info), but omit it from the
    969         // DW_AT_{low,high}_pc range given in .debug_info (since it
    970         // costs nothing to be precise there). If we did use at least
    971         // some of the line we're about to skip, and it ends at the
    972         // start of the next function, then assume this is what
    973         // happened, and don't warn.
    974         if (line != last_line_cited
    975             && !(func
    976                  && line == last_line_used
    977                  && func->address - line->address == line->size)) {
    978           reporter->UncoveredLine(*line);
    979           last_line_cited = line;
    980         }
    981         if (func && within(*line, func->address))
    982           next_transition = func->address;
    983         else
    984           // If this overflows, we'll catch it below.
    985           next_transition = line->address + line->size;
    986       } else {
    987         // Covered by neither a function nor a line. By the invariant,
    988         // both func and line begin after CURRENT. The next transition
    989         // is the start of the next function or next line, whichever
    990         // is earliest.
    991         assert(func || line);
    992         if (func && line)
    993           next_transition = std::min(func->address, line->address);
    994         else if (func)
    995           next_transition = func->address;
    996         else
    997           next_transition = line->address;
    998       }
    999     }
   1000 
   1001     // If a function or line abuts the end of the address space, then
   1002     // next_transition may end up being zero, in which case we've completed
   1003     // our pass. Handle that here, instead of trying to deal with it in
   1004     // each place we compute next_transition.
   1005     if (!next_transition)
   1006       break;
   1007 
   1008     // Advance iterators as needed. If lines overlap or functions overlap,
   1009     // then we could go around more than once. We don't worry too much
   1010     // about what result we produce in that case, just as long as we don't
   1011     // hang or crash.
   1012     while (func_it != functions->end()
   1013            && next_transition >= (*func_it)->address
   1014            && !within(**func_it, next_transition))
   1015       func_it++;
   1016     func = (func_it != functions->end()) ? *func_it : NULL;
   1017     while (line_it != lines_.end()
   1018            && next_transition >= line_it->address
   1019            && !within(*line_it, next_transition))
   1020       line_it++;
   1021     line = (line_it != lines_.end()) ? &*line_it : NULL;
   1022 
   1023     // We must make progress.
   1024     assert(next_transition > current);
   1025     current = next_transition;
   1026   }
   1027 }
   1028 
   1029 void DwarfCUToModule::Finish() {
   1030   // Assembly language files have no function data, and that gives us
   1031   // no place to store our line numbers (even though the GNU toolchain
   1032   // will happily produce source line info for assembly language
   1033   // files).  To avoid spurious warnings about lines we can't assign
   1034   // to functions, skip CUs in languages that lack functions.
   1035   if (!cu_context_->language->HasFunctions())
   1036     return;
   1037 
   1038   // Read source line info, if we have any.
   1039   if (has_source_line_info_)
   1040     ReadSourceLines(source_line_offset_);
   1041 
   1042   vector<Module::Function *> *functions = &cu_context_->functions;
   1043 
   1044   // Dole out lines to the appropriate functions.
   1045   AssignLinesToFunctions();
   1046 
   1047   // Add our functions, which now have source lines assigned to them,
   1048   // to module_.
   1049   cu_context_->file_context->module_->AddFunctions(functions->begin(),
   1050                                                    functions->end());
   1051 
   1052   // Ownership of the function objects has shifted from cu_context to
   1053   // the Module.
   1054   functions->clear();
   1055 
   1056   cu_context_->file_context->ClearSpecifications();
   1057 }
   1058 
   1059 bool DwarfCUToModule::StartCompilationUnit(uint64 offset,
   1060                                            uint8 address_size,
   1061                                            uint8 offset_size,
   1062                                            uint64 cu_length,
   1063                                            uint8 dwarf_version) {
   1064   return dwarf_version >= 2;
   1065 }
   1066 
   1067 bool DwarfCUToModule::StartRootDIE(uint64 offset, enum DwarfTag tag) {
   1068   // We don't deal with partial compilation units (the only other tag
   1069   // likely to be used for root DIE).
   1070   return tag == dwarf2reader::DW_TAG_compile_unit;
   1071 }
   1072 
   1073 } // namespace google_breakpad
   1074