Home | History | Annotate | Download | only in gold
      1 // dwarf_reader.h -- parse dwarf2/3 debug information for gold  -*- C++ -*-
      2 
      3 // Copyright (C) 2007-2014 Free Software Foundation, Inc.
      4 // Written by Ian Lance Taylor <iant (at) google.com>.
      5 
      6 // This file is part of gold.
      7 
      8 // This program is free software; you can redistribute it and/or modify
      9 // it under the terms of the GNU General Public License as published by
     10 // the Free Software Foundation; either version 3 of the License, or
     11 // (at your option) any later version.
     12 
     13 // This program is distributed in the hope that it will be useful,
     14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
     15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     16 // GNU General Public License for more details.
     17 
     18 // You should have received a copy of the GNU General Public License
     19 // along with this program; if not, write to the Free Software
     20 // Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston,
     21 // MA 02110-1301, USA.
     22 
     23 #ifndef GOLD_DWARF_READER_H
     24 #define GOLD_DWARF_READER_H
     25 
     26 #include <vector>
     27 #include <map>
     28 #include <limits.h>
     29 #include <sys/types.h>
     30 
     31 #include "elfcpp.h"
     32 #include "elfcpp_swap.h"
     33 #include "dwarf.h"
     34 #include "reloc.h"
     35 
     36 namespace gold
     37 {
     38 
     39 class Dwarf_info_reader;
     40 struct LineStateMachine;
     41 
     42 // This class is used to extract the section index and offset of
     43 // the target of a relocation for a given offset within the section.
     44 
     45 class Elf_reloc_mapper
     46 {
     47  public:
     48   Elf_reloc_mapper()
     49   { }
     50 
     51   virtual
     52   ~Elf_reloc_mapper()
     53   { }
     54 
     55   // Initialize the relocation tracker for section RELOC_SHNDX.
     56   bool
     57   initialize(unsigned int reloc_shndx, unsigned int reloc_type)
     58   { return this->do_initialize(reloc_shndx, reloc_type); }
     59 
     60   // Return the next reloc_offset.
     61   off_t
     62   next_offset()
     63   { return this->do_next_offset(); }
     64 
     65   // Advance to the next relocation past OFFSET.
     66   void
     67   advance(off_t offset)
     68   { this->do_advance(offset); }
     69 
     70   // Return the section index and offset within the section of the target
     71   // of the relocation for RELOC_OFFSET in the referring section.
     72   unsigned int
     73   get_reloc_target(off_t reloc_offset, off_t* target_offset)
     74   { return this->do_get_reloc_target(reloc_offset, target_offset); }
     75 
     76   // Checkpoint the current position in the reloc section.
     77   uint64_t
     78   checkpoint() const
     79   { return this->do_checkpoint(); }
     80 
     81   // Reset the current position to the CHECKPOINT.
     82   void
     83   reset(uint64_t checkpoint)
     84   { this->do_reset(checkpoint); }
     85 
     86  protected:
     87   virtual bool
     88   do_initialize(unsigned int, unsigned int) = 0;
     89 
     90   // Return the next reloc_offset.
     91   virtual off_t
     92   do_next_offset() = 0;
     93 
     94   // Advance to the next relocation past OFFSET.
     95   virtual void
     96   do_advance(off_t offset) = 0;
     97 
     98   virtual unsigned int
     99   do_get_reloc_target(off_t reloc_offset, off_t* target_offset) = 0;
    100 
    101   // Checkpoint the current position in the reloc section.
    102   virtual uint64_t
    103   do_checkpoint() const = 0;
    104 
    105   // Reset the current position to the CHECKPOINT.
    106   virtual void
    107   do_reset(uint64_t checkpoint) = 0;
    108 };
    109 
    110 template<int size, bool big_endian>
    111 class Sized_elf_reloc_mapper : public Elf_reloc_mapper
    112 {
    113  public:
    114   Sized_elf_reloc_mapper(Object* object, const unsigned char* symtab,
    115 			 off_t symtab_size)
    116     : object_(object), symtab_(symtab), symtab_size_(symtab_size),
    117       reloc_type_(0), track_relocs_()
    118   { }
    119 
    120  protected:
    121   bool
    122   do_initialize(unsigned int reloc_shndx, unsigned int reloc_type);
    123 
    124   // Return the next reloc_offset.
    125   virtual off_t
    126   do_next_offset()
    127   { return this->track_relocs_.next_offset(); }
    128 
    129   // Advance to the next relocation past OFFSET.
    130   virtual void
    131   do_advance(off_t offset)
    132   { this->track_relocs_.advance(offset); }
    133 
    134   unsigned int
    135   do_get_reloc_target(off_t reloc_offset, off_t* target_offset);
    136 
    137   // Checkpoint the current position in the reloc section.
    138   uint64_t
    139   do_checkpoint() const
    140   { return this->track_relocs_.checkpoint(); }
    141 
    142   // Reset the current position to the CHECKPOINT.
    143   void
    144   do_reset(uint64_t checkpoint)
    145   { this->track_relocs_.reset(checkpoint); }
    146 
    147  private:
    148   typedef typename elfcpp::Elf_types<size>::Elf_Addr Address;
    149 
    150   // Return the section index of symbol SYMNDX, and copy its value to *VALUE.
    151   // Set *IS_ORDINARY true if the section index is an ordinary section index.
    152   unsigned int
    153   symbol_section(unsigned int symndx, Address* value, bool* is_ordinary);
    154 
    155   // The object file.
    156   Object* object_;
    157   // The ELF symbol table.
    158   const unsigned char* symtab_;
    159   // The size of the ELF symbol table.
    160   off_t symtab_size_;
    161   // Type of the relocation section (SHT_REL or SHT_RELA).
    162   unsigned int reloc_type_;
    163   // Relocations for the referring section.
    164   Track_relocs<size, big_endian> track_relocs_;
    165 };
    166 
    167 // This class is used to read the abbreviations table from the
    168 // .debug_abbrev section of the object file.
    169 
    170 class Dwarf_abbrev_table
    171 {
    172  public:
    173   // An attribute list entry.
    174   struct Attribute
    175   {
    176     Attribute(unsigned int a, unsigned int f)
    177       : attr(a), form(f)
    178     { }
    179     unsigned int attr;
    180     unsigned int form;
    181   };
    182 
    183   // An abbrev code entry.
    184   struct Abbrev_code
    185   {
    186     Abbrev_code(unsigned int t, bool hc)
    187       : tag(t), has_children(hc), has_sibling_attribute(false), attributes()
    188     {
    189       this->attributes.reserve(10);
    190     }
    191 
    192     void
    193     add_attribute(unsigned int attr, unsigned int form)
    194     {
    195       this->attributes.push_back(Attribute(attr, form));
    196     }
    197 
    198     // The DWARF tag.
    199     unsigned int tag;
    200     // True if the DIE has children.
    201     bool has_children : 1;
    202     // True if the DIE has a sibling attribute.
    203     bool has_sibling_attribute : 1;
    204     // The list of attributes and forms.
    205     std::vector<Attribute> attributes;
    206   };
    207 
    208   Dwarf_abbrev_table()
    209     : abbrev_shndx_(0), abbrev_offset_(0), buffer_(NULL), buffer_end_(NULL),
    210       owns_buffer_(false), buffer_pos_(NULL), high_abbrev_codes_()
    211   {
    212     memset(this->low_abbrev_codes_, 0, sizeof(this->low_abbrev_codes_));
    213   }
    214 
    215   ~Dwarf_abbrev_table()
    216   {
    217     if (this->owns_buffer_ && this->buffer_ != NULL)
    218       delete[] this->buffer_;
    219     this->clear_abbrev_codes();
    220   }
    221 
    222   // Read the abbrev table from an object file.
    223   bool
    224   read_abbrevs(Relobj* object,
    225 	       unsigned int abbrev_shndx,
    226 	       off_t abbrev_offset)
    227   {
    228     // If we've already read this abbrev table, return immediately.
    229     if (this->abbrev_shndx_ > 0
    230 	&& this->abbrev_shndx_ == abbrev_shndx
    231 	&& this->abbrev_offset_ == abbrev_offset)
    232       return true;
    233     return this->do_read_abbrevs(object, abbrev_shndx, abbrev_offset);
    234   }
    235 
    236   // Return the abbrev code entry for CODE.  This is a fast path for
    237   // abbrev codes that are in the direct lookup table.  If not found
    238   // there, we call do_get_abbrev() to do the hard work.
    239   const Abbrev_code*
    240   get_abbrev(unsigned int code)
    241   {
    242     if (code < this->low_abbrev_code_max_
    243 	&& this->low_abbrev_codes_[code] != NULL)
    244       return this->low_abbrev_codes_[code];
    245     return this->do_get_abbrev(code);
    246   }
    247 
    248  private:
    249   // Read the abbrev table from an object file.
    250   bool
    251   do_read_abbrevs(Relobj* object,
    252 		  unsigned int abbrev_shndx,
    253 		  off_t abbrev_offset);
    254 
    255   // Lookup the abbrev code entry for CODE.
    256   const Abbrev_code*
    257   do_get_abbrev(unsigned int code);
    258 
    259   // Store an abbrev code entry for CODE.
    260   void
    261   store_abbrev(unsigned int code, const Abbrev_code* entry)
    262   {
    263     if (code < this->low_abbrev_code_max_)
    264       this->low_abbrev_codes_[code] = entry;
    265     else
    266       this->high_abbrev_codes_[code] = entry;
    267   }
    268 
    269   // Clear the abbrev code table and release the memory it uses.
    270   void
    271   clear_abbrev_codes();
    272 
    273   typedef Unordered_map<unsigned int, const Abbrev_code*> Abbrev_code_table;
    274 
    275   // The section index of the current abbrev table.
    276   unsigned int abbrev_shndx_;
    277   // The offset within the section of the current abbrev table.
    278   off_t abbrev_offset_;
    279   // The buffer containing the .debug_abbrev section.
    280   const unsigned char* buffer_;
    281   const unsigned char* buffer_end_;
    282   // True if this object owns the buffer and needs to delete it.
    283   bool owns_buffer_;
    284   // Pointer to the current position in the buffer.
    285   const unsigned char* buffer_pos_;
    286   // The table of abbrev codes.
    287   // We use a direct-lookup array for low abbrev codes,
    288   // and store the rest in a hash table.
    289   static const unsigned int low_abbrev_code_max_ = 256;
    290   const Abbrev_code* low_abbrev_codes_[low_abbrev_code_max_];
    291   Abbrev_code_table high_abbrev_codes_;
    292 };
    293 
    294 // A DWARF range list.  The start and end offsets are relative
    295 // to the input section SHNDX.  Each range must lie entirely
    296 // within a single section.
    297 
    298 class Dwarf_range_list
    299 {
    300  public:
    301   struct Range
    302   {
    303     Range(unsigned int a_shndx, off_t a_start, off_t a_end)
    304       : shndx(a_shndx), start(a_start), end(a_end)
    305     { }
    306 
    307     unsigned int shndx;
    308     off_t start;
    309     off_t end;
    310   };
    311 
    312   Dwarf_range_list()
    313     : range_list_()
    314   { }
    315 
    316   void
    317   add(unsigned int shndx, off_t start, off_t end)
    318   { this->range_list_.push_back(Range(shndx, start, end)); }
    319 
    320   size_t
    321   size() const
    322   { return this->range_list_.size(); }
    323 
    324   const Range&
    325   operator[](off_t i) const
    326   { return this->range_list_[i]; }
    327 
    328  private:
    329   std::vector<Range> range_list_;
    330 };
    331 
    332 // This class is used to read the ranges table from the
    333 // .debug_ranges section of the object file.
    334 
    335 class Dwarf_ranges_table
    336 {
    337  public:
    338   Dwarf_ranges_table(Dwarf_info_reader* dwinfo)
    339     : dwinfo_(dwinfo), ranges_shndx_(0), ranges_buffer_(NULL),
    340       ranges_buffer_end_(NULL), owns_ranges_buffer_(false),
    341       ranges_reloc_mapper_(NULL), reloc_type_(0), output_section_offset_(0)
    342   { }
    343 
    344   ~Dwarf_ranges_table()
    345   {
    346     if (this->owns_ranges_buffer_ && this->ranges_buffer_ != NULL)
    347       delete[] this->ranges_buffer_;
    348     if (this->ranges_reloc_mapper_ != NULL)
    349       delete this->ranges_reloc_mapper_;
    350   }
    351 
    352   // Read the ranges table from an object file.
    353   bool
    354   read_ranges_table(Relobj* object,
    355 		    const unsigned char* symtab,
    356 		    off_t symtab_size,
    357 		    unsigned int ranges_shndx);
    358 
    359   // Read the range table from an object file.
    360   Dwarf_range_list*
    361   read_range_list(Relobj* object,
    362 		  const unsigned char* symtab,
    363 		  off_t symtab_size,
    364 		  unsigned int address_size,
    365 		  unsigned int ranges_shndx,
    366 		  off_t ranges_offset);
    367 
    368   // Look for a relocation at offset OFF in the range table,
    369   // and return the section index and offset of the target.
    370   unsigned int
    371   lookup_reloc(off_t off, off_t* target_off);
    372 
    373  private:
    374   // The Dwarf_info_reader, for reading data.
    375   Dwarf_info_reader* dwinfo_;
    376   // The section index of the ranges table.
    377   unsigned int ranges_shndx_;
    378   // The buffer containing the .debug_ranges section.
    379   const unsigned char* ranges_buffer_;
    380   const unsigned char* ranges_buffer_end_;
    381   // True if this object owns the buffer and needs to delete it.
    382   bool owns_ranges_buffer_;
    383   // Relocation mapper for the .debug_ranges section.
    384   Elf_reloc_mapper* ranges_reloc_mapper_;
    385   // Type of the relocation section (SHT_REL or SHT_RELA).
    386   unsigned int reloc_type_;
    387   // For incremental update links, this will hold the offset of the
    388   // input section within the output section.  Offsets read from
    389   // relocated data will be relative to the output section, and need
    390   // to be corrected before reading data from the input section.
    391   uint64_t output_section_offset_;
    392 };
    393 
    394 // This class is used to read the pubnames and pubtypes tables from the
    395 // .debug_pubnames and .debug_pubtypes sections of the object file.
    396 
    397 class Dwarf_pubnames_table
    398 {
    399  public:
    400   Dwarf_pubnames_table(Dwarf_info_reader* dwinfo, bool is_pubtypes)
    401     : dwinfo_(dwinfo), buffer_(NULL), buffer_end_(NULL), owns_buffer_(false),
    402       offset_size_(0), pinfo_(NULL), end_of_table_(NULL),
    403       is_pubtypes_(is_pubtypes), is_gnu_style_(false),
    404       unit_length_(0), cu_offset_(0)
    405   { }
    406 
    407   ~Dwarf_pubnames_table()
    408   {
    409     if (this->owns_buffer_ && this->buffer_ != NULL)
    410       delete[] this->buffer_;
    411   }
    412 
    413   // Read the pubnames section from the object file, using the symbol
    414   // table for relocating it.
    415   bool
    416   read_section(Relobj* object, const unsigned char* symbol_table,
    417                off_t symtab_size);
    418 
    419   // Read the header for the set at OFFSET.
    420   bool
    421   read_header(off_t offset);
    422 
    423   // Return the offset to the cu within the info or types section.
    424   off_t
    425   cu_offset()
    426   { return this->cu_offset_; }
    427 
    428   // Return the size of this subsection of the table.  The unit length
    429   // doesn't include the size of its own field.
    430   off_t
    431   subsection_size()
    432   { return this->unit_length_; }
    433 
    434   // Read the next name from the set.  If the pubname table is gnu-style,
    435   // FLAG_BYTE is set to the high-byte of a gdb_index version 7 cu_index.
    436   const char*
    437   next_name(uint8_t* flag_byte);
    438 
    439  private:
    440   // The Dwarf_info_reader, for reading data.
    441   Dwarf_info_reader* dwinfo_;
    442   // The buffer containing the .debug_ranges section.
    443   const unsigned char* buffer_;
    444   const unsigned char* buffer_end_;
    445   // True if this object owns the buffer and needs to delete it.
    446   bool owns_buffer_;
    447   // The size of a DWARF offset for the current set.
    448   unsigned int offset_size_;
    449   // The current position within the buffer.
    450   const unsigned char* pinfo_;
    451   // The end of the current pubnames table.
    452   const unsigned char* end_of_table_;
    453   // TRUE if this is a .debug_pubtypes section.
    454   bool is_pubtypes_;
    455   // Gnu-style pubnames table. This style has an extra flag byte between the
    456   // offset and the name, and is used for generating version 7 of gdb-index.
    457   bool is_gnu_style_;
    458   // Fields read from the header.
    459   uint64_t unit_length_;
    460   off_t cu_offset_;
    461 
    462   // Track relocations for this table so we can find the CUs that
    463   // correspond to the subsections.
    464   Elf_reloc_mapper* reloc_mapper_;
    465   // Type of the relocation section (SHT_REL or SHT_RELA).
    466   unsigned int reloc_type_;
    467 };
    468 
    469 // This class represents a DWARF Debug Info Entry (DIE).
    470 
    471 class Dwarf_die
    472 {
    473  public:
    474   // An attribute value.
    475   struct Attribute_value
    476   {
    477     unsigned int attr;
    478     unsigned int form;
    479     union
    480     {
    481       int64_t intval;
    482       uint64_t uintval;
    483       const char* stringval;
    484       const unsigned char* blockval;
    485       off_t refval;
    486     } val;
    487     union
    488     {
    489       // Section index for reference forms.
    490       unsigned int shndx;
    491       // Block length for block forms.
    492       unsigned int blocklen;
    493       // Attribute offset for DW_FORM_strp.
    494       unsigned int attr_off;
    495     } aux;
    496   };
    497 
    498   // A list of attribute values.
    499   typedef std::vector<Attribute_value> Attributes;
    500 
    501   Dwarf_die(Dwarf_info_reader* dwinfo,
    502 	    off_t die_offset,
    503 	    Dwarf_die* parent);
    504 
    505   // Return the DWARF tag for this DIE.
    506   unsigned int
    507   tag() const
    508   {
    509     if (this->abbrev_code_ == NULL)
    510       return 0;
    511     return this->abbrev_code_->tag;
    512   }
    513 
    514   // Return true if this DIE has children.
    515   bool
    516   has_children() const
    517   {
    518     gold_assert(this->abbrev_code_ != NULL);
    519     return this->abbrev_code_->has_children;
    520   }
    521 
    522   // Return true if this DIE has a sibling attribute.
    523   bool
    524   has_sibling_attribute() const
    525   {
    526     gold_assert(this->abbrev_code_ != NULL);
    527     return this->abbrev_code_->has_sibling_attribute;
    528   }
    529 
    530   // Return the value of attribute ATTR.
    531   const Attribute_value*
    532   attribute(unsigned int attr);
    533 
    534   // Return the value of the DW_AT_name attribute.
    535   const char*
    536   name()
    537   {
    538     if (this->name_ == NULL)
    539       this->set_name();
    540     return this->name_;
    541   }
    542 
    543   // Return the value of the DW_AT_linkage_name
    544   // or DW_AT_MIPS_linkage_name attribute.
    545   const char*
    546   linkage_name()
    547   {
    548     if (this->linkage_name_ == NULL)
    549       this->set_linkage_name();
    550     return this->linkage_name_;
    551   }
    552 
    553   // Return the value of the DW_AT_specification attribute.
    554   off_t
    555   specification()
    556   {
    557     if (!this->attributes_read_)
    558       this->read_attributes();
    559     return this->specification_;
    560   }
    561 
    562   // Return the value of the DW_AT_abstract_origin attribute.
    563   off_t
    564   abstract_origin()
    565   {
    566     if (!this->attributes_read_)
    567       this->read_attributes();
    568     return this->abstract_origin_;
    569   }
    570 
    571   // Return the value of attribute ATTR as a string.
    572   const char*
    573   string_attribute(unsigned int attr);
    574 
    575   // Return the value of attribute ATTR as an integer.
    576   int64_t
    577   int_attribute(unsigned int attr);
    578 
    579   // Return the value of attribute ATTR as an unsigned integer.
    580   uint64_t
    581   uint_attribute(unsigned int attr);
    582 
    583   // Return the value of attribute ATTR as a reference.
    584   off_t
    585   ref_attribute(unsigned int attr, unsigned int* shndx);
    586 
    587   // Return the value of attribute ATTR as a address.
    588   off_t
    589   address_attribute(unsigned int attr, unsigned int* shndx);
    590 
    591   // Return the value of attribute ATTR as a flag.
    592   bool
    593   flag_attribute(unsigned int attr)
    594   { return this->int_attribute(attr) != 0; }
    595 
    596   // Return true if this DIE is a declaration.
    597   bool
    598   is_declaration()
    599   { return this->flag_attribute(elfcpp::DW_AT_declaration); }
    600 
    601   // Return the parent of this DIE.
    602   Dwarf_die*
    603   parent() const
    604   { return this->parent_; }
    605 
    606   // Return the offset of this DIE.
    607   off_t
    608   offset() const
    609   { return this->die_offset_; }
    610 
    611   // Return the offset of this DIE's first child.
    612   off_t
    613   child_offset();
    614 
    615   // Set the offset of this DIE's next sibling.
    616   void
    617   set_sibling_offset(off_t sibling_offset)
    618   { this->sibling_offset_ = sibling_offset; }
    619 
    620   // Return the offset of this DIE's next sibling.
    621   off_t
    622   sibling_offset();
    623 
    624  private:
    625   typedef Dwarf_abbrev_table::Abbrev_code Abbrev_code;
    626 
    627   // Read all the attributes of the DIE.
    628   bool
    629   read_attributes();
    630 
    631   // Set the name of the DIE if present.
    632   void
    633   set_name();
    634 
    635   // Set the linkage name if present.
    636   void
    637   set_linkage_name();
    638 
    639   // Skip all the attributes of the DIE and return the offset
    640   // of the next DIE.
    641   off_t
    642   skip_attributes();
    643 
    644   // The Dwarf_info_reader, for reading attributes.
    645   Dwarf_info_reader* dwinfo_;
    646   // The parent of this DIE.
    647   Dwarf_die* parent_;
    648   // Offset of this DIE within its compilation unit.
    649   off_t die_offset_;
    650   // Offset of the first attribute, relative to the beginning of the DIE.
    651   off_t attr_offset_;
    652   // Offset of the first child, relative to the compilation unit.
    653   off_t child_offset_;
    654   // Offset of the next sibling, relative to the compilation unit.
    655   off_t sibling_offset_;
    656   // The abbreviation table entry.
    657   const Abbrev_code* abbrev_code_;
    658   // The list of attributes.
    659   Attributes attributes_;
    660   // True if the attributes have been read.
    661   bool attributes_read_;
    662   // The following fields hold common attributes to avoid a linear
    663   // search through the attribute list.
    664   // The DIE name (DW_AT_name).
    665   const char* name_;
    666   // Offset of the name in the string table (for DW_FORM_strp).
    667   off_t name_off_;
    668   // The linkage name (DW_AT_linkage_name or DW_AT_MIPS_linkage_name).
    669   const char* linkage_name_;
    670   // Offset of the linkage name in the string table (for DW_FORM_strp).
    671   off_t linkage_name_off_;
    672   // Section index of the string table (for DW_FORM_strp).
    673   unsigned int string_shndx_;
    674   // The value of a DW_AT_specification attribute.
    675   off_t specification_;
    676   // The value of a DW_AT_abstract_origin attribute.
    677   off_t abstract_origin_;
    678 };
    679 
    680 // This class is used to read the debug info from the .debug_info
    681 // or .debug_types sections.  This is a base class that implements
    682 // the generic parsing of the compilation unit header and DIE
    683 // structure.  The parse() method parses the entire section, and
    684 // calls the various visit_xxx() methods for each header.  Clients
    685 // should derive a new class from this one and implement the
    686 // visit_compilation_unit() and visit_type_unit() functions.
    687 
    688 class Dwarf_info_reader
    689 {
    690  public:
    691   Dwarf_info_reader(bool is_type_unit,
    692 		    Relobj* object,
    693 		    const unsigned char* symtab,
    694 		    off_t symtab_size,
    695 		    unsigned int shndx,
    696 		    unsigned int reloc_shndx,
    697 		    unsigned int reloc_type)
    698     : is_type_unit_(is_type_unit), object_(object), symtab_(symtab),
    699       symtab_size_(symtab_size), shndx_(shndx), reloc_shndx_(reloc_shndx),
    700       reloc_type_(reloc_type), abbrev_shndx_(0), string_shndx_(0),
    701       buffer_(NULL), buffer_end_(NULL), cu_offset_(0), cu_length_(0),
    702       offset_size_(0), address_size_(0), cu_version_(0),
    703       abbrev_table_(), ranges_table_(this),
    704       reloc_mapper_(NULL), string_buffer_(NULL), string_buffer_end_(NULL),
    705       owns_string_buffer_(false), string_output_section_offset_(0)
    706   { }
    707 
    708   virtual
    709   ~Dwarf_info_reader()
    710   {
    711     if (this->reloc_mapper_ != NULL)
    712       delete this->reloc_mapper_;
    713     if (this->owns_string_buffer_ && this->string_buffer_ != NULL)
    714       delete[] this->string_buffer_;
    715   }
    716 
    717   // Begin parsing the debug info.  This calls visit_compilation_unit()
    718   // or visit_type_unit() for each compilation or type unit found in the
    719   // section, and visit_die() for each top-level DIE.
    720   void
    721   parse();
    722 
    723   // Return the abbrev code entry for a CODE.
    724   const Dwarf_abbrev_table::Abbrev_code*
    725   get_abbrev(unsigned int code)
    726   { return this->abbrev_table_.get_abbrev(code); }
    727 
    728   // Return a pointer to the DWARF info buffer at OFFSET.
    729   const unsigned char*
    730   buffer_at_offset(off_t offset) const
    731   {
    732     const unsigned char* p = this->buffer_ + this->cu_offset_ + offset;
    733     if (this->check_buffer(p + 1))
    734       return p;
    735     return NULL;
    736   }
    737 
    738   // Read a possibly unaligned integer of SIZE.
    739   template <int valsize>
    740   inline typename elfcpp::Valtype_base<valsize>::Valtype
    741   read_from_pointer(const unsigned char* source);
    742 
    743   // Read a possibly unaligned integer of SIZE.  Update SOURCE after read.
    744   template <int valsize>
    745   inline typename elfcpp::Valtype_base<valsize>::Valtype
    746   read_from_pointer(const unsigned char** source);
    747 
    748   // Look for a relocation at offset ATTR_OFF in the dwarf info,
    749   // and return the section index and offset of the target.
    750   unsigned int
    751   lookup_reloc(off_t attr_off, off_t* target_off);
    752 
    753   // Return a string from the DWARF string table.
    754   const char*
    755   get_string(off_t str_off, unsigned int string_shndx);
    756 
    757   // Return the size of a DWARF offset.
    758   unsigned int
    759   offset_size() const
    760   { return this->offset_size_; }
    761 
    762   // Return the size of an address.
    763   unsigned int
    764   address_size() const
    765   { return this->address_size_; }
    766 
    767   // Set the section index of the .debug_abbrev section.
    768   // We use this if there are no relocations for the .debug_info section.
    769   // If not set, the code parse() routine will search for the section by name.
    770   void
    771   set_abbrev_shndx(unsigned int abbrev_shndx)
    772   { this->abbrev_shndx_ = abbrev_shndx; }
    773 
    774   // Return a pointer to the object file's ELF symbol table.
    775   const unsigned char*
    776   symtab() const
    777   { return this->symtab_; }
    778 
    779   // Return the size of the object file's ELF symbol table.
    780   off_t
    781   symtab_size() const
    782   { return this->symtab_size_; }
    783 
    784   // Return the offset of the current compilation unit.
    785   off_t
    786   cu_offset() const
    787   { return this->cu_offset_; }
    788 
    789  protected:
    790   // Begin parsing the debug info.  This calls visit_compilation_unit()
    791   // or visit_type_unit() for each compilation or type unit found in the
    792   // section, and visit_die() for each top-level DIE.
    793   template<bool big_endian>
    794   void
    795   do_parse();
    796 
    797   // The following methods are hooks that are meant to be implemented
    798   // by a derived class.  A default, do-nothing, implementation of
    799   // each is provided for this base class.
    800 
    801   // Visit a compilation unit.
    802   virtual void
    803   visit_compilation_unit(off_t cu_offset, off_t cu_length, Dwarf_die* root_die);
    804 
    805   // Visit a type unit.
    806   virtual void
    807   visit_type_unit(off_t tu_offset, off_t tu_length, off_t type_offset,
    808 		  uint64_t signature, Dwarf_die* root_die);
    809 
    810   // Read the range table.
    811   Dwarf_range_list*
    812   read_range_list(unsigned int ranges_shndx, off_t ranges_offset)
    813   {
    814     return this->ranges_table_.read_range_list(this->object_,
    815 					       this->symtab_,
    816 					       this->symtab_size_,
    817 					       this->address_size_,
    818 					       ranges_shndx,
    819 					       ranges_offset);
    820   }
    821 
    822   // Return the object.
    823   Relobj*
    824   object() const
    825   { return this->object_; }
    826 
    827   // Checkpoint the relocation tracker.
    828   uint64_t
    829   get_reloc_checkpoint() const
    830   { return this->reloc_mapper_->checkpoint(); }
    831 
    832   // Reset the relocation tracker to the CHECKPOINT.
    833   void
    834   reset_relocs(uint64_t checkpoint)
    835   { this->reloc_mapper_->reset(checkpoint); }
    836 
    837  private:
    838   // Print a warning about a corrupt debug section.
    839   void
    840   warn_corrupt_debug_section() const;
    841 
    842   // Check that P is within the bounds of the current section.
    843   bool
    844   check_buffer(const unsigned char* p) const
    845   {
    846     if (p > this->buffer_ + this->cu_offset_ + this->cu_length_)
    847       {
    848 	this->warn_corrupt_debug_section();
    849 	return false;
    850       }
    851     return true;
    852   }
    853 
    854   // Read the DWARF string table.
    855   bool
    856   read_string_table(unsigned int string_shndx)
    857   {
    858     // If we've already read this string table, return immediately.
    859     if (this->string_shndx_ > 0 && this->string_shndx_ == string_shndx)
    860       return true;
    861     if (string_shndx == 0 && this->string_shndx_ > 0)
    862       return true;
    863     return this->do_read_string_table(string_shndx);
    864   }
    865 
    866   bool
    867   do_read_string_table(unsigned int string_shndx);
    868 
    869   // True if this is a type unit; false for a compilation unit.
    870   bool is_type_unit_;
    871   // The object containing the .debug_info or .debug_types input section.
    872   Relobj* object_;
    873   // The ELF symbol table.
    874   const unsigned char* symtab_;
    875   // The size of the ELF symbol table.
    876   off_t symtab_size_;
    877   // Index of the .debug_info or .debug_types section.
    878   unsigned int shndx_;
    879   // Index of the relocation section.
    880   unsigned int reloc_shndx_;
    881   // Type of the relocation section (SHT_REL or SHT_RELA).
    882   unsigned int reloc_type_;
    883   // Index of the .debug_abbrev section (0 if not known).
    884   unsigned int abbrev_shndx_;
    885   // Index of the .debug_str section.
    886   unsigned int string_shndx_;
    887   // The buffer for the debug info.
    888   const unsigned char* buffer_;
    889   const unsigned char* buffer_end_;
    890   // Offset of the current compilation unit.
    891   off_t cu_offset_;
    892   // Length of the current compilation unit.
    893   off_t cu_length_;
    894   // Size of a DWARF offset for the current compilation unit.
    895   unsigned int offset_size_;
    896   // Size of an address for the target architecture.
    897   unsigned int address_size_;
    898   // Compilation unit version number.
    899   unsigned int cu_version_;
    900   // Abbreviations table for current compilation unit.
    901   Dwarf_abbrev_table abbrev_table_;
    902   // Ranges table for the current compilation unit.
    903   Dwarf_ranges_table ranges_table_;
    904   // Relocation mapper for the section.
    905   Elf_reloc_mapper* reloc_mapper_;
    906   // The buffer for the debug string table.
    907   const char* string_buffer_;
    908   const char* string_buffer_end_;
    909   // True if this object owns the buffer and needs to delete it.
    910   bool owns_string_buffer_;
    911   // For incremental update links, this will hold the offset of the
    912   // input .debug_str section within the output section.  Offsets read
    913   // from relocated data will be relative to the output section, and need
    914   // to be corrected before reading data from the input section.
    915   uint64_t string_output_section_offset_;
    916 };
    917 
    918 // We can't do better than to keep the offsets in a sorted vector.
    919 // Here, offset is the key, and file_num/line_num is the value.
    920 struct Offset_to_lineno_entry
    921 {
    922   off_t offset;
    923   int header_num;  // which file-list to use (i.e. which .o file are we in)
    924   // A pointer into files_.
    925   unsigned int file_num : sizeof(int) * CHAR_BIT - 1;
    926   // True if this was the last entry for the current offset, meaning
    927   // it's the line that actually applies.
    928   unsigned int last_line_for_offset : 1;
    929   // The line number in the source file.  -1 to indicate end-of-function.
    930   int line_num;
    931 
    932   // This sorts by offsets first, and then puts the correct line to
    933   // report for a given offset at the beginning of the run of equal
    934   // offsets (so that asking for 1 line gives the best answer).  This
    935   // is not a total ordering.
    936   bool operator<(const Offset_to_lineno_entry& that) const
    937   {
    938     if (this->offset != that.offset)
    939       return this->offset < that.offset;
    940     // Note the '>' which makes this sort 'true' first.
    941     return this->last_line_for_offset > that.last_line_for_offset;
    942   }
    943 };
    944 
    945 // This class is used to read the line information from the debugging
    946 // section of an object file.
    947 
    948 class Dwarf_line_info
    949 {
    950  public:
    951   Dwarf_line_info()
    952   { }
    953 
    954   virtual
    955   ~Dwarf_line_info()
    956   { }
    957 
    958   // Given a section number and an offset, returns the associated
    959   // file and line-number, as a string: "file:lineno".  If unable
    960   // to do the mapping, returns the empty string.  You must call
    961   // read_line_mappings() before calling this function.  If
    962   // 'other_lines' is non-NULL, fills that in with other line
    963   // numbers assigned to the same offset.
    964   std::string
    965   addr2line(unsigned int shndx, off_t offset,
    966             std::vector<std::string>* other_lines)
    967   { return this->do_addr2line(shndx, offset, other_lines); }
    968 
    969   // A helper function for a single addr2line lookup.  It also keeps a
    970   // cache of the last CACHE_SIZE Dwarf_line_info objects it created;
    971   // set to 0 not to cache at all.  The larger CACHE_SIZE is, the more
    972   // chance this routine won't have to re-create a Dwarf_line_info
    973   // object for its addr2line computation; such creations are slow.
    974   // NOTE: Not thread-safe, so only call from one thread at a time.
    975   static std::string
    976   one_addr2line(Object* object, unsigned int shndx, off_t offset,
    977                 size_t cache_size, std::vector<std::string>* other_lines);
    978 
    979   // This reclaims all the memory that one_addr2line may have cached.
    980   // Use this when you know you will not be calling one_addr2line again.
    981   static void
    982   clear_addr2line_cache();
    983 
    984  private:
    985   virtual std::string
    986   do_addr2line(unsigned int shndx, off_t offset,
    987                std::vector<std::string>* other_lines) = 0;
    988 };
    989 
    990 template<int size, bool big_endian>
    991 class Sized_dwarf_line_info : public Dwarf_line_info
    992 {
    993  public:
    994   // Initializes a .debug_line reader for a given object file.
    995   // If SHNDX is specified and non-negative, only read the debug
    996   // information that pertains to the specified section.
    997   Sized_dwarf_line_info(Object* object, unsigned int read_shndx = -1U);
    998 
    999   virtual
   1000   ~Sized_dwarf_line_info()
   1001   {
   1002     if (this->buffer_start_ != NULL)
   1003       delete[] this->buffer_start_;
   1004     if (this->str_buffer_start_ != NULL)
   1005       delete[] this->str_buffer_start_;
   1006   }
   1007 
   1008  private:
   1009   const static int DWARF5_EXPERIMENTAL_LINE_TABLE = 0xf006;
   1010 
   1011   std::string
   1012   do_addr2line(unsigned int shndx, off_t offset,
   1013                std::vector<std::string>* other_lines);
   1014 
   1015   // Formats a file and line number to a string like "dirname/filename:lineno".
   1016   std::string
   1017   format_file_lineno(const Offset_to_lineno_entry& lineno) const;
   1018 
   1019   // Start processing line info, and populates the offset_map_.
   1020   // If SHNDX is non-negative, only store debug information that
   1021   // pertains to the specified section.
   1022   void
   1023   read_line_mappings(unsigned int shndx);
   1024 
   1025   // Reads the relocation section associated with .debug_line and
   1026   // stores relocation information in reloc_map_.
   1027   void
   1028   read_relocs();
   1029 
   1030   // Reads the DWARF2/3 header for this line info.  Each takes as input
   1031   // a starting buffer position, and returns the ending position.
   1032   const unsigned char*
   1033   read_header_prolog(const unsigned char* lineptr);
   1034 
   1035   const unsigned char*
   1036   read_header_tables(const unsigned char* lineptr);
   1037 
   1038   const unsigned char*
   1039   read_header_tables_v5(const unsigned char* lineptr);
   1040 
   1041   // Reads the DWARF2/3 line information.  If shndx is non-negative,
   1042   // discard all line information that doesn't pertain to the given
   1043   // section.
   1044   const unsigned char*
   1045   read_lines(const unsigned char* lineptr, const unsigned char* endptr,
   1046 	     std::vector<LineStateMachine>* logicals,
   1047 	     bool is_logicals_table, bool is_actuals_table,
   1048 	     unsigned int shndx);
   1049 
   1050   // Process a single line info opcode at START using the state
   1051   // machine at LSM.  Return true if we should define a line using the
   1052   // current state of the line state machine.  Place the length of the
   1053   // opcode in LEN.
   1054   bool
   1055   process_one_opcode(const unsigned char* start,
   1056                      struct LineStateMachine* lsm, size_t* len,
   1057                      std::vector<LineStateMachine>* logicals,
   1058 		     bool is_logicals_table, bool is_actuals_table);
   1059 
   1060   // Some parts of processing differ depending on whether the input
   1061   // was a .o file or not.
   1062   bool input_is_relobj();
   1063 
   1064   // If we saw anything amiss while parsing, we set this to false.
   1065   // Then addr2line will always fail (rather than return possibly-
   1066   // corrupt data).
   1067   bool data_valid_;
   1068 
   1069   // A DWARF2/3 line info header.  This is not the same size as in the
   1070   // actual file, as the one in the file may have a 32 bit or 64 bit
   1071   // lengths.
   1072 
   1073   struct Dwarf_line_infoHeader
   1074   {
   1075     off_t total_length;
   1076     int version;
   1077     off_t prologue_length;
   1078     int min_insn_length; // insn stands for instructin
   1079     int max_ops_per_insn;
   1080     bool default_is_stmt; // stmt stands for statement
   1081     signed char line_base;
   1082     int line_range;
   1083     unsigned char opcode_base;
   1084     std::vector<unsigned char> std_opcode_lengths;
   1085     int offset_size;
   1086   } header_;
   1087 
   1088   // buffer is the buffer for our line info, starting at exactly where
   1089   // the line info to read is.
   1090   const unsigned char* buffer_;
   1091   const unsigned char* buffer_end_;
   1092   // If the buffer was allocated temporarily, and therefore must be
   1093   // deallocated in the dtor, this contains a pointer to the start
   1094   // of the buffer.
   1095   const unsigned char* buffer_start_;
   1096 
   1097   // buffer is the buffer for our line info, starting at exactly where
   1098   // the line info to read is.
   1099   const unsigned char* str_buffer_;
   1100   const unsigned char* str_buffer_end_;
   1101   // If the buffer was allocated temporarily, and therefore must be
   1102   // deallocated in the dtor, this contains a pointer to the start
   1103   // of the buffer.
   1104   const unsigned char* str_buffer_start_;
   1105 
   1106   // Pointer to the end of the header_length field (aka prologue_length).
   1107   // The offsets to the line number programs are relative to this point.
   1108   const unsigned char* end_of_header_length_;
   1109 
   1110   // Pointers to the start of the line number programs.
   1111   const unsigned char* logicals_start_;
   1112   const unsigned char* actuals_start_;
   1113 
   1114   // Pointer to the end of the current compilation unit.
   1115   const unsigned char* end_of_unit_;
   1116 
   1117   // This has relocations that point into buffer.
   1118   Sized_elf_reloc_mapper<size, big_endian>* reloc_mapper_;
   1119   // The type of the reloc section in track_relocs_--SHT_REL or SHT_RELA.
   1120   unsigned int track_relocs_type_;
   1121 
   1122   // This is used to figure out what section to apply a relocation to.
   1123   const unsigned char* symtab_buffer_;
   1124   section_size_type symtab_buffer_size_;
   1125 
   1126   // Holds the directories and files as we see them.  We have an array
   1127   // of directory-lists, one for each .o file we're reading (usually
   1128   // there will just be one, but there may be more if input is a .so).
   1129   std::vector<std::vector<std::string> > directories_;
   1130   // The first part is an index into directories_, the second the filename.
   1131   std::vector<std::vector< std::pair<int, std::string> > > files_;
   1132 
   1133   // An index into the current directories_ and files_ vectors.
   1134   int current_header_index_;
   1135 
   1136   // A sorted map from offset of the relocation target to the shndx
   1137   // and addend for the relocation.
   1138   typedef std::map<off_t, std::pair<unsigned int, off_t> >
   1139   Reloc_map;
   1140   Reloc_map reloc_map_;
   1141 
   1142   // We have a vector of offset->lineno entries for every input section.
   1143   typedef Unordered_map<unsigned int, std::vector<Offset_to_lineno_entry> >
   1144   Lineno_map;
   1145 
   1146   Lineno_map line_number_map_;
   1147 };
   1148 
   1149 } // End namespace gold.
   1150 
   1151 #endif // !defined(GOLD_DWARF_READER_H)
   1152