Home | History | Annotate | Download | only in gold
      1 // merge.h -- handle section merging for gold  -*- C++ -*-
      2 
      3 // Copyright (C) 2006-2014 Free Software Foundation, Inc.
      4 // Written by Ian Lance Taylor <iant (at) google.com>.
      5 
      6 // This file is part of gold.
      7 
      8 // This program is free software; you can redistribute it and/or modify
      9 // it under the terms of the GNU General Public License as published by
     10 // the Free Software Foundation; either version 3 of the License, or
     11 // (at your option) any later version.
     12 
     13 // This program is distributed in the hope that it will be useful,
     14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
     15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     16 // GNU General Public License for more details.
     17 
     18 // You should have received a copy of the GNU General Public License
     19 // along with this program; if not, write to the Free Software
     20 // Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston,
     21 // MA 02110-1301, USA.
     22 
     23 #ifndef GOLD_MERGE_H
     24 #define GOLD_MERGE_H
     25 
     26 #include <climits>
     27 #include <map>
     28 #include <vector>
     29 
     30 #include "stringpool.h"
     31 #include "output.h"
     32 
     33 namespace gold
     34 {
     35 
     36 class Merge_map;
     37 
     38 // For each object with merge sections, we store an Object_merge_map.
     39 // This is used to map locations in input sections to a merged output
     40 // section.  The output section itself is not recorded here--it can be
     41 // found in the output_sections_ field of the Object.
     42 
     43 class Object_merge_map
     44 {
     45  public:
     46   Object_merge_map()
     47     : first_shnum_(-1U), first_map_(),
     48       second_shnum_(-1U), second_map_(),
     49       section_merge_maps_()
     50   { }
     51 
     52   ~Object_merge_map();
     53 
     54   // Add a mapping for MERGE_MAP, for the bytes from OFFSET to OFFSET
     55   // + LENGTH in the input section SHNDX to OUTPUT_OFFSET in the
     56   // output section.  An OUTPUT_OFFSET of -1 means that the bytes are
     57   // discarded.  OUTPUT_OFFSET is relative to the start of the merged
     58   // data in the output section.
     59   void
     60   add_mapping(const Merge_map*, unsigned int shndx, section_offset_type offset,
     61 	      section_size_type length, section_offset_type output_offset);
     62 
     63   // Get the output offset for an input address.  MERGE_MAP is the map
     64   // we are looking for, or NULL if we don't care.  The input address
     65   // is at offset OFFSET in section SHNDX.  This sets *OUTPUT_OFFSET
     66   // to the offset in the output section; this will be -1 if the bytes
     67   // are not being copied to the output.  This returns true if the
     68   // mapping is known, false otherwise.  *OUTPUT_OFFSET is relative to
     69   // the start of the merged data in the output section.
     70   bool
     71   get_output_offset(const Merge_map*, unsigned int shndx,
     72 		    section_offset_type offset,
     73 		    section_offset_type* output_offset);
     74 
     75   // Return whether this is the merge map for section SHNDX.
     76   bool
     77   is_merge_section_for(const Merge_map*, unsigned int shndx);
     78 
     79   // Initialize an mapping from input offsets to output addresses for
     80   // section SHNDX.  STARTING_ADDRESS is the output address of the
     81   // merged section.
     82   template<int size>
     83   void
     84   initialize_input_to_output_map(
     85       unsigned int shndx,
     86       typename elfcpp::Elf_types<size>::Elf_Addr starting_address,
     87       Unordered_map<section_offset_type,
     88 		    typename elfcpp::Elf_types<size>::Elf_Addr>*);
     89 
     90  private:
     91   // Map input section offsets to a length and an output section
     92   // offset.  An output section offset of -1 means that this part of
     93   // the input section is being discarded.
     94   struct Input_merge_entry
     95   {
     96     // The offset in the input section.
     97     section_offset_type input_offset;
     98     // The length.
     99     section_size_type length;
    100     // The offset in the output section.
    101     section_offset_type output_offset;
    102   };
    103 
    104   // A less-than comparison routine for Input_merge_entry.
    105   struct Input_merge_compare
    106   {
    107     bool
    108     operator()(const Input_merge_entry& i1, const Input_merge_entry& i2) const
    109     { return i1.input_offset < i2.input_offset; }
    110   };
    111 
    112   // A list of entries for a particular input section.
    113   struct Input_merge_map
    114   {
    115     typedef std::vector<Input_merge_entry> Entries;
    116 
    117     // We store these with the Relobj, and we look them up by input
    118     // section.  It is possible to have two different merge maps
    119     // associated with a single output section.  For example, this
    120     // happens routinely with .rodata, when merged string constants
    121     // and merged fixed size constants are both put into .rodata.  The
    122     // output offset that we store is not the offset from the start of
    123     // the output section; it is the offset from the start of the
    124     // merged data in the output section.  That means that the caller
    125     // is going to add the offset of the merged data within the output
    126     // section, which means that the caller needs to know which set of
    127     // merged data it found the entry in.  So it's not enough to find
    128     // this data based on the input section and the output section; we
    129     // also have to find it based on a set of merged data in the
    130     // output section.  In order to verify that we are looking at the
    131     // right data, we store a pointer to the Merge_map here, and we
    132     // pass in a pointer when looking at the data.  If we are asked to
    133     // look up information for a different Merge_map, we report that
    134     // we don't have it, rather than trying a lookup and returning an
    135     // answer which will receive the wrong offset.
    136     const Merge_map* merge_map;
    137     // The list of mappings.
    138     Entries entries;
    139     // Whether the ENTRIES field is sorted by input_offset.
    140     bool sorted;
    141 
    142     Input_merge_map()
    143       : merge_map(NULL), entries(), sorted(true)
    144     { }
    145   };
    146 
    147   // Map input section indices to merge maps.
    148   typedef std::map<unsigned int, Input_merge_map*> Section_merge_maps;
    149 
    150   // Return a pointer to the Input_merge_map to use for the input
    151   // section SHNDX, or NULL.
    152   Input_merge_map*
    153   get_input_merge_map(unsigned int shndx);
    154 
    155   // Get or make the Input_merge_map to use for the section SHNDX
    156   // with MERGE_MAP.
    157   Input_merge_map*
    158   get_or_make_input_merge_map(const Merge_map* merge_map, unsigned int shndx);
    159 
    160   // Any given object file will normally only have a couple of input
    161   // sections with mergeable contents.  So we keep the first two input
    162   // section numbers inline, and push any further ones into a map.  A
    163   // value of -1U in first_shnum_ or second_shnum_ means that we don't
    164   // have a corresponding entry.
    165   unsigned int first_shnum_;
    166   Input_merge_map first_map_;
    167   unsigned int second_shnum_;
    168   Input_merge_map second_map_;
    169   Section_merge_maps section_merge_maps_;
    170 };
    171 
    172 // This class manages mappings from input sections to offsets in an
    173 // output section.  This is used where input sections are merged.  The
    174 // actual data is stored in fields in Object.
    175 
    176 class Merge_map
    177 {
    178  public:
    179   Merge_map()
    180   { }
    181 
    182   // Add a mapping for the bytes from OFFSET to OFFSET + LENGTH in the
    183   // input section SHNDX in object OBJECT to OUTPUT_OFFSET in the
    184   // output section.  An OUTPUT_OFFSET of -1 means that the bytes are
    185   // discarded.  OUTPUT_OFFSET is not the offset from the start of the
    186   // output section, it is the offset from the start of the merged
    187   // data within the output section.
    188   void
    189   add_mapping(Relobj* object, unsigned int shndx,
    190 	      section_offset_type offset, section_size_type length,
    191 	      section_offset_type output_offset);
    192 
    193   // Return the output offset for an input address.  The input address
    194   // is at offset OFFSET in section SHNDX in OBJECT.  This sets
    195   // *OUTPUT_OFFSET to the offset in the output section; this will be
    196   // -1 if the bytes are not being copied to the output.  This returns
    197   // true if the mapping is known, false otherwise.  This returns the
    198   // value stored by add_mapping, namely the offset from the start of
    199   // the merged data within the output section.
    200   bool
    201   get_output_offset(const Relobj* object, unsigned int shndx,
    202 		    section_offset_type offset,
    203 		    section_offset_type* output_offset) const;
    204 
    205   // Return whether this is the merge mapping for section SHNDX in
    206   // OBJECT.  This should return true when get_output_offset would
    207   // return true for some input offset.
    208   bool
    209   is_merge_section_for(const Relobj* object, unsigned int shndx) const;
    210 };
    211 
    212 // A general class for SHF_MERGE data, to hold functions shared by
    213 // fixed-size constant data and string data.
    214 
    215 class Output_merge_base : public Output_section_data
    216 {
    217  public:
    218   Output_merge_base(uint64_t entsize, uint64_t addralign)
    219     : Output_section_data(addralign), merge_map_(), entsize_(entsize),
    220       keeps_input_sections_(false), first_relobj_(NULL), first_shndx_(-1),
    221       input_sections_()
    222   { }
    223 
    224   // Return the entry size.
    225   uint64_t
    226   entsize() const
    227   { return this->entsize_; }
    228 
    229   // Whether this is a merge string section.  This is only true of
    230   // Output_merge_string.
    231   bool
    232   is_string()
    233   { return this->do_is_string(); }
    234 
    235   // Whether this keeps input sections.
    236   bool
    237   keeps_input_sections() const
    238   { return this->keeps_input_sections_; }
    239 
    240   // Set the keeps-input-sections flag.  This is virtual so that sub-classes
    241   // can perform additional checks.
    242   void
    243   set_keeps_input_sections()
    244   { this->do_set_keeps_input_sections(); }
    245 
    246   // Return the object of the first merged input section.  This used
    247   // for script processing.  This is NULL if merge section is empty.
    248   Relobj*
    249   first_relobj() const
    250   { return this->first_relobj_; }
    251 
    252   // Return the section index of the first merged input section.  This
    253   // is used for script processing.  This is valid only if merge section
    254   // is not valid.
    255   unsigned int
    256   first_shndx() const
    257   {
    258     gold_assert(this->first_relobj_ != NULL);
    259     return this->first_shndx_;
    260   }
    261 
    262   // Set of merged input sections.
    263   typedef Unordered_set<Section_id, Section_id_hash> Input_sections;
    264 
    265   // Beginning of merged input sections.
    266   Input_sections::const_iterator
    267   input_sections_begin() const
    268   {
    269     gold_assert(this->keeps_input_sections_);
    270     return this->input_sections_.begin();
    271   }
    272 
    273   // Beginning of merged input sections.
    274   Input_sections::const_iterator
    275   input_sections_end() const
    276   {
    277     gold_assert(this->keeps_input_sections_);
    278     return this->input_sections_.end();
    279   }
    280 
    281  protected:
    282   // Return the output offset for an input offset.
    283   bool
    284   do_output_offset(const Relobj* object, unsigned int shndx,
    285 		   section_offset_type offset,
    286 		   section_offset_type* poutput) const;
    287 
    288   // Return whether this is the merge section for an input section.
    289   bool
    290   do_is_merge_section_for(const Relobj*, unsigned int shndx) const;
    291 
    292   // Add a mapping from an OFFSET in input section SHNDX in object
    293   // OBJECT to an OUTPUT_OFFSET in the output section.  OUTPUT_OFFSET
    294   // is the offset from the start of the merged data in the output
    295   // section.
    296   void
    297   add_mapping(Relobj* object, unsigned int shndx, section_offset_type offset,
    298 	      section_size_type length, section_offset_type output_offset)
    299   {
    300     this->merge_map_.add_mapping(object, shndx, offset, length, output_offset);
    301   }
    302 
    303   // This may be overridden by the child class.
    304   virtual bool
    305   do_is_string()
    306   { return false; }
    307 
    308   // This may be overridden by the child class.
    309   virtual void
    310   do_set_keeps_input_sections()
    311   { this->keeps_input_sections_ = true; }
    312 
    313   // Record the merged input section for script processing.
    314   void
    315   record_input_section(Relobj* relobj, unsigned int shndx);
    316 
    317  private:
    318   // A mapping from input object/section/offset to offset in output
    319   // section.
    320   Merge_map merge_map_;
    321   // The entry size.  For fixed-size constants, this is the size of
    322   // the constants.  For strings, this is the size of a character.
    323   uint64_t entsize_;
    324   // Whether we keep input sections.
    325   bool keeps_input_sections_;
    326   // Object of the first merged input section.  We use this for script
    327   // processing.
    328   Relobj* first_relobj_;
    329   // Section index of the first merged input section.
    330   unsigned int first_shndx_;
    331   // Input sections.  We only keep them is keeps_input_sections_ is true.
    332   Input_sections input_sections_;
    333 };
    334 
    335 // Handle SHF_MERGE sections with fixed-size constant data.
    336 
    337 class Output_merge_data : public Output_merge_base
    338 {
    339  public:
    340   Output_merge_data(uint64_t entsize, uint64_t addralign)
    341     : Output_merge_base(entsize, addralign), p_(NULL), len_(0), alc_(0),
    342       input_count_(0),
    343       hashtable_(128, Merge_data_hash(this), Merge_data_eq(this))
    344   { }
    345 
    346  protected:
    347   // Add an input section.
    348   bool
    349   do_add_input_section(Relobj* object, unsigned int shndx);
    350 
    351   // Set the final data size.
    352   void
    353   set_final_data_size();
    354 
    355   // Write the data to the file.
    356   void
    357   do_write(Output_file*);
    358 
    359   // Write the data to a buffer.
    360   void
    361   do_write_to_buffer(unsigned char*);
    362 
    363   // Write to a map file.
    364   void
    365   do_print_to_mapfile(Mapfile* mapfile) const
    366   { mapfile->print_output_data(this, _("** merge constants")); }
    367 
    368   // Print merge stats to stderr.
    369   void
    370   do_print_merge_stats(const char* section_name);
    371 
    372   // Set keeps-input-sections flag.
    373   void
    374   do_set_keeps_input_sections()
    375   {
    376     gold_assert(this->input_count_ == 0);
    377     Output_merge_base::do_set_keeps_input_sections();
    378   }
    379 
    380  private:
    381   // We build a hash table of the fixed-size constants.  Each constant
    382   // is stored as a pointer into the section data we are accumulating.
    383 
    384   // A key in the hash table.  This is an offset in the section
    385   // contents we are building.
    386   typedef section_offset_type Merge_data_key;
    387 
    388   // Compute the hash code.  To do this we need a pointer back to the
    389   // object holding the data.
    390   class Merge_data_hash
    391   {
    392    public:
    393     Merge_data_hash(const Output_merge_data* pomd)
    394       : pomd_(pomd)
    395     { }
    396 
    397     size_t
    398     operator()(Merge_data_key) const;
    399 
    400    private:
    401     const Output_merge_data* pomd_;
    402   };
    403 
    404   friend class Merge_data_hash;
    405 
    406   // Compare two entries in the hash table for equality.  To do this
    407   // we need a pointer back to the object holding the data.  Note that
    408   // we now have a pointer to the object stored in two places in the
    409   // hash table.  Fixing this would require specializing the hash
    410   // table, which would be hard to do portably.
    411   class Merge_data_eq
    412   {
    413    public:
    414     Merge_data_eq(const Output_merge_data* pomd)
    415       : pomd_(pomd)
    416     { }
    417 
    418     bool
    419     operator()(Merge_data_key k1, Merge_data_key k2) const;
    420 
    421    private:
    422     const Output_merge_data* pomd_;
    423   };
    424 
    425   friend class Merge_data_eq;
    426 
    427   // The type of the hash table.
    428   typedef Unordered_set<Merge_data_key, Merge_data_hash, Merge_data_eq>
    429     Merge_data_hashtable;
    430 
    431   // Given a hash table key, which is just an offset into the section
    432   // data, return a pointer to the corresponding constant.
    433   const unsigned char*
    434   constant(Merge_data_key k) const
    435   {
    436     gold_assert(k >= 0 && k < static_cast<section_offset_type>(this->len_));
    437     return this->p_ + k;
    438   }
    439 
    440   // Add a constant to the output.
    441   void
    442   add_constant(const unsigned char*);
    443 
    444   // The accumulated data.
    445   unsigned char* p_;
    446   // The length of the accumulated data.
    447   section_size_type len_;
    448   // The size of the allocated buffer.
    449   section_size_type alc_;
    450   // The number of entries seen in input files.
    451   size_t input_count_;
    452   // The hash table.
    453   Merge_data_hashtable hashtable_;
    454 };
    455 
    456 // Handle SHF_MERGE sections with string data.  This is a template
    457 // based on the type of the characters in the string.
    458 
    459 template<typename Char_type>
    460 class Output_merge_string : public Output_merge_base
    461 {
    462  public:
    463   Output_merge_string(uint64_t addralign)
    464     : Output_merge_base(sizeof(Char_type), addralign), stringpool_(addralign),
    465       merged_strings_lists_(), input_count_(0), input_size_(0)
    466   {
    467     this->stringpool_.set_no_zero_null();
    468   }
    469 
    470  protected:
    471   // Add an input section.
    472   bool
    473   do_add_input_section(Relobj* object, unsigned int shndx);
    474 
    475   // Do all the final processing after the input sections are read in.
    476   // Returns the final data size.
    477   section_size_type
    478   finalize_merged_data();
    479 
    480   // Set the final data size.
    481   void
    482   set_final_data_size();
    483 
    484   // Write the data to the file.
    485   void
    486   do_write(Output_file*);
    487 
    488   // Write the data to a buffer.
    489   void
    490   do_write_to_buffer(unsigned char*);
    491 
    492   // Write to a map file.
    493   void
    494   do_print_to_mapfile(Mapfile* mapfile) const
    495   { mapfile->print_output_data(this, _("** merge strings")); }
    496 
    497   // Print merge stats to stderr.
    498   void
    499   do_print_merge_stats(const char* section_name);
    500 
    501   // Writes the stringpool to a buffer.
    502   void
    503   stringpool_to_buffer(unsigned char* buffer, section_size_type buffer_size)
    504   { this->stringpool_.write_to_buffer(buffer, buffer_size); }
    505 
    506   // Clears all the data in the stringpool, to save on memory.
    507   void
    508   clear_stringpool()
    509   { this->stringpool_.clear(); }
    510 
    511   // Whether this is a merge string section.
    512   virtual bool
    513   do_is_string()
    514   { return true; }
    515 
    516   // Set keeps-input-sections flag.
    517   void
    518   do_set_keeps_input_sections()
    519   {
    520     gold_assert(this->input_count_ == 0);
    521     Output_merge_base::do_set_keeps_input_sections();
    522   }
    523 
    524  private:
    525   // The name of the string type, for stats.
    526   const char*
    527   string_name();
    528 
    529   // As we see input sections, we build a mapping from object, section
    530   // index and offset to strings.
    531   struct Merged_string
    532   {
    533     // The offset in the input section.
    534     section_offset_type offset;
    535     // The key in the Stringpool.
    536     Stringpool::Key stringpool_key;
    537 
    538     Merged_string(section_offset_type offseta, Stringpool::Key stringpool_keya)
    539       : offset(offseta), stringpool_key(stringpool_keya)
    540     { }
    541   };
    542 
    543   typedef std::vector<Merged_string> Merged_strings;
    544 
    545   struct Merged_strings_list
    546   {
    547     // The input object where the strings were found.
    548     Relobj* object;
    549     // The input section in the input object.
    550     unsigned int shndx;
    551     // The list of merged strings.
    552     Merged_strings merged_strings;
    553 
    554     Merged_strings_list(Relobj* objecta, unsigned int shndxa)
    555       : object(objecta), shndx(shndxa), merged_strings()
    556     { }
    557   };
    558 
    559   typedef std::vector<Merged_strings_list*> Merged_strings_lists;
    560 
    561   // As we see the strings, we add them to a Stringpool.
    562   Stringpool_template<Char_type> stringpool_;
    563   // Map from a location in an input object to an entry in the
    564   // Stringpool.
    565   Merged_strings_lists merged_strings_lists_;
    566   // The number of entries seen in input files.
    567   size_t input_count_;
    568   // The total size of input sections.
    569   size_t input_size_;
    570 };
    571 
    572 } // End namespace gold.
    573 
    574 #endif // !defined(GOLD_MERGE_H)
    575