Home | History | Annotate | Download | only in gold
      1 // merge.h -- handle section merging for gold  -*- C++ -*-
      2 
      3 // Copyright (C) 2006-2016 Free Software Foundation, Inc.
      4 // Written by Ian Lance Taylor <iant (at) google.com>.
      5 
      6 // This file is part of gold.
      7 
      8 // This program is free software; you can redistribute it and/or modify
      9 // it under the terms of the GNU General Public License as published by
     10 // the Free Software Foundation; either version 3 of the License, or
     11 // (at your option) any later version.
     12 
     13 // This program is distributed in the hope that it will be useful,
     14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
     15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     16 // GNU General Public License for more details.
     17 
     18 // You should have received a copy of the GNU General Public License
     19 // along with this program; if not, write to the Free Software
     20 // Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston,
     21 // MA 02110-1301, USA.
     22 
     23 #ifndef GOLD_MERGE_H
     24 #define GOLD_MERGE_H
     25 
     26 #include <climits>
     27 #include <map>
     28 #include <vector>
     29 
     30 #include "stringpool.h"
     31 #include "output.h"
     32 
     33 namespace gold
     34 {
     35 
     36 // For each object with merge sections, we store an Object_merge_map.
     37 // This is used to map locations in input sections to a merged output
     38 // section.  The output section itself is not recorded here--it can be
     39 // found in the output_sections_ field of the Object.
     40 
     41 class Object_merge_map
     42 {
     43  public:
     44   Object_merge_map()
     45     : section_merge_maps_()
     46   { }
     47 
     48   ~Object_merge_map();
     49 
     50   // Add a mapping for MERGE_MAP, for the bytes from OFFSET to OFFSET
     51   // + LENGTH in the input section SHNDX to OUTPUT_OFFSET in the
     52   // output section.  An OUTPUT_OFFSET of -1 means that the bytes are
     53   // discarded.  OUTPUT_OFFSET is relative to the start of the merged
     54   // data in the output section.
     55   void
     56   add_mapping(const Output_section_data*, unsigned int shndx,
     57               section_offset_type offset, section_size_type length,
     58               section_offset_type output_offset);
     59 
     60   // Get the output offset for an input address.  MERGE_MAP is the map
     61   // we are looking for, or NULL if we don't care.  The input address
     62   // is at offset OFFSET in section SHNDX.  This sets *OUTPUT_OFFSET
     63   // to the offset in the output section; this will be -1 if the bytes
     64   // are not being copied to the output.  This returns true if the
     65   // mapping is known, false otherwise.  *OUTPUT_OFFSET is relative to
     66   // the start of the merged data in the output section.
     67   bool
     68   get_output_offset(unsigned int shndx,
     69 		    section_offset_type offset,
     70 		    section_offset_type* output_offset);
     71 
     72   const Output_section_data*
     73   find_merge_section(unsigned int shndx) const;
     74 
     75   // Initialize an mapping from input offsets to output addresses for
     76   // section SHNDX.  STARTING_ADDRESS is the output address of the
     77   // merged section.
     78   template<int size>
     79   void
     80   initialize_input_to_output_map(
     81       unsigned int shndx,
     82       typename elfcpp::Elf_types<size>::Elf_Addr starting_address,
     83       Unordered_map<section_offset_type,
     84 		    typename elfcpp::Elf_types<size>::Elf_Addr>*);
     85 
     86   // Map input section offsets to a length and an output section
     87   // offset.  An output section offset of -1 means that this part of
     88   // the input section is being discarded.
     89   struct Input_merge_entry
     90   {
     91     // The offset in the input section.
     92     section_offset_type input_offset;
     93     // The length.
     94     section_size_type length;
     95     // The offset in the output section.
     96     section_offset_type output_offset;
     97   };
     98 
     99   // A list of entries for a particular input section.
    100   struct Input_merge_map
    101   {
    102     void add_mapping(section_offset_type input_offset, section_size_type length,
    103                      section_offset_type output_offset);
    104 
    105     typedef std::vector<Input_merge_entry> Entries;
    106 
    107     // We store these with the Relobj, and we look them up by input
    108     // section.  It is possible to have two different merge maps
    109     // associated with a single output section.  For example, this
    110     // happens routinely with .rodata, when merged string constants
    111     // and merged fixed size constants are both put into .rodata.  The
    112     // output offset that we store is not the offset from the start of
    113     // the output section; it is the offset from the start of the
    114     // merged data in the output section.  That means that the caller
    115     // is going to add the offset of the merged data within the output
    116     // section, which means that the caller needs to know which set of
    117     // merged data it found the entry in.  So it's not enough to find
    118     // this data based on the input section and the output section; we
    119     // also have to find it based on a set of merged data in the
    120     // output section.  In order to verify that we are looking at the
    121     // right data, we store a pointer to the Merge_map here, and we
    122     // pass in a pointer when looking at the data.  If we are asked to
    123     // look up information for a different Merge_map, we report that
    124     // we don't have it, rather than trying a lookup and returning an
    125     // answer which will receive the wrong offset.
    126     const Output_section_data* output_data;
    127     // The list of mappings.
    128     Entries entries;
    129     // Whether the ENTRIES field is sorted by input_offset.
    130     bool sorted;
    131 
    132     Input_merge_map()
    133       : output_data(NULL), entries(), sorted(true)
    134     { }
    135   };
    136 
    137   // Get or make the Input_merge_map to use for the section SHNDX
    138   // with MERGE_MAP.
    139   Input_merge_map*
    140   get_or_make_input_merge_map(const Output_section_data* merge_map,
    141                               unsigned int shndx);
    142 
    143   private:
    144   // A less-than comparison routine for Input_merge_entry.
    145   struct Input_merge_compare
    146   {
    147     bool
    148     operator()(const Input_merge_entry& i1, const Input_merge_entry& i2) const
    149     { return i1.input_offset < i2.input_offset; }
    150   };
    151 
    152   // Map input section indices to merge maps.
    153   typedef std::vector<std::pair<unsigned int, Input_merge_map*> >
    154       Section_merge_maps;
    155 
    156   // Return a pointer to the Input_merge_map to use for the input
    157   // section SHNDX, or NULL.
    158   const Input_merge_map*
    159   get_input_merge_map(unsigned int shndx) const;
    160 
    161   Input_merge_map *
    162   get_input_merge_map(unsigned int shndx) {
    163     return const_cast<Input_merge_map *>(static_cast<const Object_merge_map *>(
    164                                              this)->get_input_merge_map(shndx));
    165   }
    166 
    167   Section_merge_maps section_merge_maps_;
    168 };
    169 
    170 // A general class for SHF_MERGE data, to hold functions shared by
    171 // fixed-size constant data and string data.
    172 
    173 class Output_merge_base : public Output_section_data
    174 {
    175  public:
    176   Output_merge_base(uint64_t entsize, uint64_t addralign)
    177     : Output_section_data(addralign), entsize_(entsize),
    178       keeps_input_sections_(false), first_relobj_(NULL), first_shndx_(-1),
    179       input_sections_()
    180   { }
    181 
    182   // Return the entry size.
    183   uint64_t
    184   entsize() const
    185   { return this->entsize_; }
    186 
    187   // Whether this is a merge string section.  This is only true of
    188   // Output_merge_string.
    189   bool
    190   is_string()
    191   { return this->do_is_string(); }
    192 
    193   // Whether this keeps input sections.
    194   bool
    195   keeps_input_sections() const
    196   { return this->keeps_input_sections_; }
    197 
    198   // Set the keeps-input-sections flag.  This is virtual so that sub-classes
    199   // can perform additional checks.
    200   void
    201   set_keeps_input_sections()
    202   { this->do_set_keeps_input_sections(); }
    203 
    204   // Return the object of the first merged input section.  This used
    205   // for script processing.  This is NULL if merge section is empty.
    206   Relobj*
    207   first_relobj() const
    208   { return this->first_relobj_; }
    209 
    210   // Return the section index of the first merged input section.  This
    211   // is used for script processing.  This is valid only if merge section
    212   // is not valid.
    213   unsigned int
    214   first_shndx() const
    215   {
    216     gold_assert(this->first_relobj_ != NULL);
    217     return this->first_shndx_;
    218   }
    219 
    220   // Set of merged input sections.
    221   typedef Unordered_set<Section_id, Section_id_hash> Input_sections;
    222 
    223   // Beginning of merged input sections.
    224   Input_sections::const_iterator
    225   input_sections_begin() const
    226   {
    227     gold_assert(this->keeps_input_sections_);
    228     return this->input_sections_.begin();
    229   }
    230 
    231   // Beginning of merged input sections.
    232   Input_sections::const_iterator
    233   input_sections_end() const
    234   {
    235     gold_assert(this->keeps_input_sections_);
    236     return this->input_sections_.end();
    237   }
    238 
    239  protected:
    240   // Return the output offset for an input offset.
    241   bool
    242   do_output_offset(const Relobj* object, unsigned int shndx,
    243 		   section_offset_type offset,
    244 		   section_offset_type* poutput) const;
    245 
    246   // This may be overridden by the child class.
    247   virtual bool
    248   do_is_string()
    249   { return false; }
    250 
    251   // This may be overridden by the child class.
    252   virtual void
    253   do_set_keeps_input_sections()
    254   { this->keeps_input_sections_ = true; }
    255 
    256   // Record the merged input section for script processing.
    257   void
    258   record_input_section(Relobj* relobj, unsigned int shndx);
    259 
    260  private:
    261   // The entry size.  For fixed-size constants, this is the size of
    262   // the constants.  For strings, this is the size of a character.
    263   uint64_t entsize_;
    264   // Whether we keep input sections.
    265   bool keeps_input_sections_;
    266   // Object of the first merged input section.  We use this for script
    267   // processing.
    268   Relobj* first_relobj_;
    269   // Section index of the first merged input section.
    270   unsigned int first_shndx_;
    271   // Input sections.  We only keep them is keeps_input_sections_ is true.
    272   Input_sections input_sections_;
    273 };
    274 
    275 // Handle SHF_MERGE sections with fixed-size constant data.
    276 
    277 class Output_merge_data : public Output_merge_base
    278 {
    279  public:
    280   Output_merge_data(uint64_t entsize, uint64_t addralign)
    281     : Output_merge_base(entsize, addralign), p_(NULL), len_(0), alc_(0),
    282       input_count_(0),
    283       hashtable_(128, Merge_data_hash(this), Merge_data_eq(this))
    284   { }
    285 
    286  protected:
    287   // Add an input section.
    288   bool
    289   do_add_input_section(Relobj* object, unsigned int shndx);
    290 
    291   // Set the final data size.
    292   void
    293   set_final_data_size();
    294 
    295   // Write the data to the file.
    296   void
    297   do_write(Output_file*);
    298 
    299   // Write the data to a buffer.
    300   void
    301   do_write_to_buffer(unsigned char*);
    302 
    303   // Write to a map file.
    304   void
    305   do_print_to_mapfile(Mapfile* mapfile) const
    306   { mapfile->print_output_data(this, _("** merge constants")); }
    307 
    308   // Print merge stats to stderr.
    309   void
    310   do_print_merge_stats(const char* section_name);
    311 
    312   // Set keeps-input-sections flag.
    313   void
    314   do_set_keeps_input_sections()
    315   {
    316     gold_assert(this->input_count_ == 0);
    317     Output_merge_base::do_set_keeps_input_sections();
    318   }
    319 
    320  private:
    321   // We build a hash table of the fixed-size constants.  Each constant
    322   // is stored as a pointer into the section data we are accumulating.
    323 
    324   // A key in the hash table.  This is an offset in the section
    325   // contents we are building.
    326   typedef section_offset_type Merge_data_key;
    327 
    328   // Compute the hash code.  To do this we need a pointer back to the
    329   // object holding the data.
    330   class Merge_data_hash
    331   {
    332    public:
    333     Merge_data_hash(const Output_merge_data* pomd)
    334       : pomd_(pomd)
    335     { }
    336 
    337     size_t
    338     operator()(Merge_data_key) const;
    339 
    340    private:
    341     const Output_merge_data* pomd_;
    342   };
    343 
    344   friend class Merge_data_hash;
    345 
    346   // Compare two entries in the hash table for equality.  To do this
    347   // we need a pointer back to the object holding the data.  Note that
    348   // we now have a pointer to the object stored in two places in the
    349   // hash table.  Fixing this would require specializing the hash
    350   // table, which would be hard to do portably.
    351   class Merge_data_eq
    352   {
    353    public:
    354     Merge_data_eq(const Output_merge_data* pomd)
    355       : pomd_(pomd)
    356     { }
    357 
    358     bool
    359     operator()(Merge_data_key k1, Merge_data_key k2) const;
    360 
    361    private:
    362     const Output_merge_data* pomd_;
    363   };
    364 
    365   friend class Merge_data_eq;
    366 
    367   // The type of the hash table.
    368   typedef Unordered_set<Merge_data_key, Merge_data_hash, Merge_data_eq>
    369     Merge_data_hashtable;
    370 
    371   // Given a hash table key, which is just an offset into the section
    372   // data, return a pointer to the corresponding constant.
    373   const unsigned char*
    374   constant(Merge_data_key k) const
    375   {
    376     gold_assert(k >= 0 && k < static_cast<section_offset_type>(this->len_));
    377     return this->p_ + k;
    378   }
    379 
    380   // Add a constant to the output.
    381   void
    382   add_constant(const unsigned char*);
    383 
    384   // The accumulated data.
    385   unsigned char* p_;
    386   // The length of the accumulated data.
    387   section_size_type len_;
    388   // The size of the allocated buffer.
    389   section_size_type alc_;
    390   // The number of entries seen in input files.
    391   size_t input_count_;
    392   // The hash table.
    393   Merge_data_hashtable hashtable_;
    394 };
    395 
    396 // Handle SHF_MERGE sections with string data.  This is a template
    397 // based on the type of the characters in the string.
    398 
    399 template<typename Char_type>
    400 class Output_merge_string : public Output_merge_base
    401 {
    402  public:
    403   Output_merge_string(uint64_t addralign)
    404     : Output_merge_base(sizeof(Char_type), addralign), stringpool_(addralign),
    405       merged_strings_lists_(), input_count_(0), input_size_(0)
    406   {
    407     this->stringpool_.set_no_zero_null();
    408   }
    409 
    410  protected:
    411   // Add an input section.
    412   bool
    413   do_add_input_section(Relobj* object, unsigned int shndx);
    414 
    415   // Do all the final processing after the input sections are read in.
    416   // Returns the final data size.
    417   section_size_type
    418   finalize_merged_data();
    419 
    420   // Set the final data size.
    421   void
    422   set_final_data_size();
    423 
    424   // Write the data to the file.
    425   void
    426   do_write(Output_file*);
    427 
    428   // Write the data to a buffer.
    429   void
    430   do_write_to_buffer(unsigned char*);
    431 
    432   // Write to a map file.
    433   void
    434   do_print_to_mapfile(Mapfile* mapfile) const
    435   { mapfile->print_output_data(this, _("** merge strings")); }
    436 
    437   // Print merge stats to stderr.
    438   void
    439   do_print_merge_stats(const char* section_name);
    440 
    441   // Writes the stringpool to a buffer.
    442   void
    443   stringpool_to_buffer(unsigned char* buffer, section_size_type buffer_size)
    444   { this->stringpool_.write_to_buffer(buffer, buffer_size); }
    445 
    446   // Clears all the data in the stringpool, to save on memory.
    447   void
    448   clear_stringpool()
    449   { this->stringpool_.clear(); }
    450 
    451   // Whether this is a merge string section.
    452   virtual bool
    453   do_is_string()
    454   { return true; }
    455 
    456   // Set keeps-input-sections flag.
    457   void
    458   do_set_keeps_input_sections()
    459   {
    460     gold_assert(this->input_count_ == 0);
    461     Output_merge_base::do_set_keeps_input_sections();
    462   }
    463 
    464  private:
    465   // The name of the string type, for stats.
    466   const char*
    467   string_name();
    468 
    469   // As we see input sections, we build a mapping from object, section
    470   // index and offset to strings.
    471   struct Merged_string
    472   {
    473     // The offset in the input section.
    474     section_offset_type offset;
    475     // The key in the Stringpool.
    476     Stringpool::Key stringpool_key;
    477 
    478     Merged_string(section_offset_type offseta, Stringpool::Key stringpool_keya)
    479       : offset(offseta), stringpool_key(stringpool_keya)
    480     { }
    481   };
    482 
    483   typedef std::vector<Merged_string> Merged_strings;
    484 
    485   struct Merged_strings_list
    486   {
    487     // The input object where the strings were found.
    488     Relobj* object;
    489     // The input section in the input object.
    490     unsigned int shndx;
    491     // The list of merged strings.
    492     Merged_strings merged_strings;
    493 
    494     Merged_strings_list(Relobj* objecta, unsigned int shndxa)
    495       : object(objecta), shndx(shndxa), merged_strings()
    496     { }
    497   };
    498 
    499   typedef std::vector<Merged_strings_list*> Merged_strings_lists;
    500 
    501   // As we see the strings, we add them to a Stringpool.
    502   Stringpool_template<Char_type> stringpool_;
    503   // Map from a location in an input object to an entry in the
    504   // Stringpool.
    505   Merged_strings_lists merged_strings_lists_;
    506   // The number of entries seen in input files.
    507   size_t input_count_;
    508   // The total size of input sections.
    509   size_t input_size_;
    510 };
    511 
    512 } // End namespace gold.
    513 
    514 #endif // !defined(GOLD_MERGE_H)
    515