1 // merge.h -- handle section merging for gold -*- C++ -*- 2 3 // Copyright (C) 2006-2014 Free Software Foundation, Inc. 4 // Written by Ian Lance Taylor <iant (at) google.com>. 5 6 // This file is part of gold. 7 8 // This program is free software; you can redistribute it and/or modify 9 // it under the terms of the GNU General Public License as published by 10 // the Free Software Foundation; either version 3 of the License, or 11 // (at your option) any later version. 12 13 // This program is distributed in the hope that it will be useful, 14 // but WITHOUT ANY WARRANTY; without even the implied warranty of 15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 // GNU General Public License for more details. 17 18 // You should have received a copy of the GNU General Public License 19 // along with this program; if not, write to the Free Software 20 // Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, 21 // MA 02110-1301, USA. 22 23 #ifndef GOLD_MERGE_H 24 #define GOLD_MERGE_H 25 26 #include <climits> 27 #include <map> 28 #include <vector> 29 30 #include "stringpool.h" 31 #include "output.h" 32 33 namespace gold 34 { 35 36 class Merge_map; 37 38 // For each object with merge sections, we store an Object_merge_map. 39 // This is used to map locations in input sections to a merged output 40 // section. The output section itself is not recorded here--it can be 41 // found in the output_sections_ field of the Object. 42 43 class Object_merge_map 44 { 45 public: 46 Object_merge_map() 47 : first_shnum_(-1U), first_map_(), 48 second_shnum_(-1U), second_map_(), 49 section_merge_maps_() 50 { } 51 52 ~Object_merge_map(); 53 54 // Add a mapping for MERGE_MAP, for the bytes from OFFSET to OFFSET 55 // + LENGTH in the input section SHNDX to OUTPUT_OFFSET in the 56 // output section. An OUTPUT_OFFSET of -1 means that the bytes are 57 // discarded. OUTPUT_OFFSET is relative to the start of the merged 58 // data in the output section. 59 void 60 add_mapping(const Merge_map*, unsigned int shndx, section_offset_type offset, 61 section_size_type length, section_offset_type output_offset); 62 63 // Get the output offset for an input address. MERGE_MAP is the map 64 // we are looking for, or NULL if we don't care. The input address 65 // is at offset OFFSET in section SHNDX. This sets *OUTPUT_OFFSET 66 // to the offset in the output section; this will be -1 if the bytes 67 // are not being copied to the output. This returns true if the 68 // mapping is known, false otherwise. *OUTPUT_OFFSET is relative to 69 // the start of the merged data in the output section. 70 bool 71 get_output_offset(const Merge_map*, unsigned int shndx, 72 section_offset_type offset, 73 section_offset_type* output_offset); 74 75 // Return whether this is the merge map for section SHNDX. 76 bool 77 is_merge_section_for(const Merge_map*, unsigned int shndx); 78 79 // Initialize an mapping from input offsets to output addresses for 80 // section SHNDX. STARTING_ADDRESS is the output address of the 81 // merged section. 82 template<int size> 83 void 84 initialize_input_to_output_map( 85 unsigned int shndx, 86 typename elfcpp::Elf_types<size>::Elf_Addr starting_address, 87 Unordered_map<section_offset_type, 88 typename elfcpp::Elf_types<size>::Elf_Addr>*); 89 90 private: 91 // Map input section offsets to a length and an output section 92 // offset. An output section offset of -1 means that this part of 93 // the input section is being discarded. 94 struct Input_merge_entry 95 { 96 // The offset in the input section. 97 section_offset_type input_offset; 98 // The length. 99 section_size_type length; 100 // The offset in the output section. 101 section_offset_type output_offset; 102 }; 103 104 // A less-than comparison routine for Input_merge_entry. 105 struct Input_merge_compare 106 { 107 bool 108 operator()(const Input_merge_entry& i1, const Input_merge_entry& i2) const 109 { return i1.input_offset < i2.input_offset; } 110 }; 111 112 // A list of entries for a particular input section. 113 struct Input_merge_map 114 { 115 typedef std::vector<Input_merge_entry> Entries; 116 117 // We store these with the Relobj, and we look them up by input 118 // section. It is possible to have two different merge maps 119 // associated with a single output section. For example, this 120 // happens routinely with .rodata, when merged string constants 121 // and merged fixed size constants are both put into .rodata. The 122 // output offset that we store is not the offset from the start of 123 // the output section; it is the offset from the start of the 124 // merged data in the output section. That means that the caller 125 // is going to add the offset of the merged data within the output 126 // section, which means that the caller needs to know which set of 127 // merged data it found the entry in. So it's not enough to find 128 // this data based on the input section and the output section; we 129 // also have to find it based on a set of merged data in the 130 // output section. In order to verify that we are looking at the 131 // right data, we store a pointer to the Merge_map here, and we 132 // pass in a pointer when looking at the data. If we are asked to 133 // look up information for a different Merge_map, we report that 134 // we don't have it, rather than trying a lookup and returning an 135 // answer which will receive the wrong offset. 136 const Merge_map* merge_map; 137 // The list of mappings. 138 Entries entries; 139 // Whether the ENTRIES field is sorted by input_offset. 140 bool sorted; 141 142 Input_merge_map() 143 : merge_map(NULL), entries(), sorted(true) 144 { } 145 }; 146 147 // Map input section indices to merge maps. 148 typedef std::map<unsigned int, Input_merge_map*> Section_merge_maps; 149 150 // Return a pointer to the Input_merge_map to use for the input 151 // section SHNDX, or NULL. 152 Input_merge_map* 153 get_input_merge_map(unsigned int shndx); 154 155 // Get or make the Input_merge_map to use for the section SHNDX 156 // with MERGE_MAP. 157 Input_merge_map* 158 get_or_make_input_merge_map(const Merge_map* merge_map, unsigned int shndx); 159 160 // Any given object file will normally only have a couple of input 161 // sections with mergeable contents. So we keep the first two input 162 // section numbers inline, and push any further ones into a map. A 163 // value of -1U in first_shnum_ or second_shnum_ means that we don't 164 // have a corresponding entry. 165 unsigned int first_shnum_; 166 Input_merge_map first_map_; 167 unsigned int second_shnum_; 168 Input_merge_map second_map_; 169 Section_merge_maps section_merge_maps_; 170 }; 171 172 // This class manages mappings from input sections to offsets in an 173 // output section. This is used where input sections are merged. The 174 // actual data is stored in fields in Object. 175 176 class Merge_map 177 { 178 public: 179 Merge_map() 180 { } 181 182 // Add a mapping for the bytes from OFFSET to OFFSET + LENGTH in the 183 // input section SHNDX in object OBJECT to OUTPUT_OFFSET in the 184 // output section. An OUTPUT_OFFSET of -1 means that the bytes are 185 // discarded. OUTPUT_OFFSET is not the offset from the start of the 186 // output section, it is the offset from the start of the merged 187 // data within the output section. 188 void 189 add_mapping(Relobj* object, unsigned int shndx, 190 section_offset_type offset, section_size_type length, 191 section_offset_type output_offset); 192 193 // Return the output offset for an input address. The input address 194 // is at offset OFFSET in section SHNDX in OBJECT. This sets 195 // *OUTPUT_OFFSET to the offset in the output section; this will be 196 // -1 if the bytes are not being copied to the output. This returns 197 // true if the mapping is known, false otherwise. This returns the 198 // value stored by add_mapping, namely the offset from the start of 199 // the merged data within the output section. 200 bool 201 get_output_offset(const Relobj* object, unsigned int shndx, 202 section_offset_type offset, 203 section_offset_type* output_offset) const; 204 205 // Return whether this is the merge mapping for section SHNDX in 206 // OBJECT. This should return true when get_output_offset would 207 // return true for some input offset. 208 bool 209 is_merge_section_for(const Relobj* object, unsigned int shndx) const; 210 }; 211 212 // A general class for SHF_MERGE data, to hold functions shared by 213 // fixed-size constant data and string data. 214 215 class Output_merge_base : public Output_section_data 216 { 217 public: 218 Output_merge_base(uint64_t entsize, uint64_t addralign) 219 : Output_section_data(addralign), merge_map_(), entsize_(entsize), 220 keeps_input_sections_(false), first_relobj_(NULL), first_shndx_(-1), 221 input_sections_() 222 { } 223 224 // Return the entry size. 225 uint64_t 226 entsize() const 227 { return this->entsize_; } 228 229 // Whether this is a merge string section. This is only true of 230 // Output_merge_string. 231 bool 232 is_string() 233 { return this->do_is_string(); } 234 235 // Whether this keeps input sections. 236 bool 237 keeps_input_sections() const 238 { return this->keeps_input_sections_; } 239 240 // Set the keeps-input-sections flag. This is virtual so that sub-classes 241 // can perform additional checks. 242 void 243 set_keeps_input_sections() 244 { this->do_set_keeps_input_sections(); } 245 246 // Return the object of the first merged input section. This used 247 // for script processing. This is NULL if merge section is empty. 248 Relobj* 249 first_relobj() const 250 { return this->first_relobj_; } 251 252 // Return the section index of the first merged input section. This 253 // is used for script processing. This is valid only if merge section 254 // is not valid. 255 unsigned int 256 first_shndx() const 257 { 258 gold_assert(this->first_relobj_ != NULL); 259 return this->first_shndx_; 260 } 261 262 // Set of merged input sections. 263 typedef Unordered_set<Section_id, Section_id_hash> Input_sections; 264 265 // Beginning of merged input sections. 266 Input_sections::const_iterator 267 input_sections_begin() const 268 { 269 gold_assert(this->keeps_input_sections_); 270 return this->input_sections_.begin(); 271 } 272 273 // Beginning of merged input sections. 274 Input_sections::const_iterator 275 input_sections_end() const 276 { 277 gold_assert(this->keeps_input_sections_); 278 return this->input_sections_.end(); 279 } 280 281 protected: 282 // Return the output offset for an input offset. 283 bool 284 do_output_offset(const Relobj* object, unsigned int shndx, 285 section_offset_type offset, 286 section_offset_type* poutput) const; 287 288 // Return whether this is the merge section for an input section. 289 bool 290 do_is_merge_section_for(const Relobj*, unsigned int shndx) const; 291 292 // Add a mapping from an OFFSET in input section SHNDX in object 293 // OBJECT to an OUTPUT_OFFSET in the output section. OUTPUT_OFFSET 294 // is the offset from the start of the merged data in the output 295 // section. 296 void 297 add_mapping(Relobj* object, unsigned int shndx, section_offset_type offset, 298 section_size_type length, section_offset_type output_offset) 299 { 300 this->merge_map_.add_mapping(object, shndx, offset, length, output_offset); 301 } 302 303 // This may be overridden by the child class. 304 virtual bool 305 do_is_string() 306 { return false; } 307 308 // This may be overridden by the child class. 309 virtual void 310 do_set_keeps_input_sections() 311 { this->keeps_input_sections_ = true; } 312 313 // Record the merged input section for script processing. 314 void 315 record_input_section(Relobj* relobj, unsigned int shndx); 316 317 private: 318 // A mapping from input object/section/offset to offset in output 319 // section. 320 Merge_map merge_map_; 321 // The entry size. For fixed-size constants, this is the size of 322 // the constants. For strings, this is the size of a character. 323 uint64_t entsize_; 324 // Whether we keep input sections. 325 bool keeps_input_sections_; 326 // Object of the first merged input section. We use this for script 327 // processing. 328 Relobj* first_relobj_; 329 // Section index of the first merged input section. 330 unsigned int first_shndx_; 331 // Input sections. We only keep them is keeps_input_sections_ is true. 332 Input_sections input_sections_; 333 }; 334 335 // Handle SHF_MERGE sections with fixed-size constant data. 336 337 class Output_merge_data : public Output_merge_base 338 { 339 public: 340 Output_merge_data(uint64_t entsize, uint64_t addralign) 341 : Output_merge_base(entsize, addralign), p_(NULL), len_(0), alc_(0), 342 input_count_(0), 343 hashtable_(128, Merge_data_hash(this), Merge_data_eq(this)) 344 { } 345 346 protected: 347 // Add an input section. 348 bool 349 do_add_input_section(Relobj* object, unsigned int shndx); 350 351 // Set the final data size. 352 void 353 set_final_data_size(); 354 355 // Write the data to the file. 356 void 357 do_write(Output_file*); 358 359 // Write the data to a buffer. 360 void 361 do_write_to_buffer(unsigned char*); 362 363 // Write to a map file. 364 void 365 do_print_to_mapfile(Mapfile* mapfile) const 366 { mapfile->print_output_data(this, _("** merge constants")); } 367 368 // Print merge stats to stderr. 369 void 370 do_print_merge_stats(const char* section_name); 371 372 // Set keeps-input-sections flag. 373 void 374 do_set_keeps_input_sections() 375 { 376 gold_assert(this->input_count_ == 0); 377 Output_merge_base::do_set_keeps_input_sections(); 378 } 379 380 private: 381 // We build a hash table of the fixed-size constants. Each constant 382 // is stored as a pointer into the section data we are accumulating. 383 384 // A key in the hash table. This is an offset in the section 385 // contents we are building. 386 typedef section_offset_type Merge_data_key; 387 388 // Compute the hash code. To do this we need a pointer back to the 389 // object holding the data. 390 class Merge_data_hash 391 { 392 public: 393 Merge_data_hash(const Output_merge_data* pomd) 394 : pomd_(pomd) 395 { } 396 397 size_t 398 operator()(Merge_data_key) const; 399 400 private: 401 const Output_merge_data* pomd_; 402 }; 403 404 friend class Merge_data_hash; 405 406 // Compare two entries in the hash table for equality. To do this 407 // we need a pointer back to the object holding the data. Note that 408 // we now have a pointer to the object stored in two places in the 409 // hash table. Fixing this would require specializing the hash 410 // table, which would be hard to do portably. 411 class Merge_data_eq 412 { 413 public: 414 Merge_data_eq(const Output_merge_data* pomd) 415 : pomd_(pomd) 416 { } 417 418 bool 419 operator()(Merge_data_key k1, Merge_data_key k2) const; 420 421 private: 422 const Output_merge_data* pomd_; 423 }; 424 425 friend class Merge_data_eq; 426 427 // The type of the hash table. 428 typedef Unordered_set<Merge_data_key, Merge_data_hash, Merge_data_eq> 429 Merge_data_hashtable; 430 431 // Given a hash table key, which is just an offset into the section 432 // data, return a pointer to the corresponding constant. 433 const unsigned char* 434 constant(Merge_data_key k) const 435 { 436 gold_assert(k >= 0 && k < static_cast<section_offset_type>(this->len_)); 437 return this->p_ + k; 438 } 439 440 // Add a constant to the output. 441 void 442 add_constant(const unsigned char*); 443 444 // The accumulated data. 445 unsigned char* p_; 446 // The length of the accumulated data. 447 section_size_type len_; 448 // The size of the allocated buffer. 449 section_size_type alc_; 450 // The number of entries seen in input files. 451 size_t input_count_; 452 // The hash table. 453 Merge_data_hashtable hashtable_; 454 }; 455 456 // Handle SHF_MERGE sections with string data. This is a template 457 // based on the type of the characters in the string. 458 459 template<typename Char_type> 460 class Output_merge_string : public Output_merge_base 461 { 462 public: 463 Output_merge_string(uint64_t addralign) 464 : Output_merge_base(sizeof(Char_type), addralign), stringpool_(addralign), 465 merged_strings_lists_(), input_count_(0), input_size_(0) 466 { 467 this->stringpool_.set_no_zero_null(); 468 } 469 470 protected: 471 // Add an input section. 472 bool 473 do_add_input_section(Relobj* object, unsigned int shndx); 474 475 // Do all the final processing after the input sections are read in. 476 // Returns the final data size. 477 section_size_type 478 finalize_merged_data(); 479 480 // Set the final data size. 481 void 482 set_final_data_size(); 483 484 // Write the data to the file. 485 void 486 do_write(Output_file*); 487 488 // Write the data to a buffer. 489 void 490 do_write_to_buffer(unsigned char*); 491 492 // Write to a map file. 493 void 494 do_print_to_mapfile(Mapfile* mapfile) const 495 { mapfile->print_output_data(this, _("** merge strings")); } 496 497 // Print merge stats to stderr. 498 void 499 do_print_merge_stats(const char* section_name); 500 501 // Writes the stringpool to a buffer. 502 void 503 stringpool_to_buffer(unsigned char* buffer, section_size_type buffer_size) 504 { this->stringpool_.write_to_buffer(buffer, buffer_size); } 505 506 // Clears all the data in the stringpool, to save on memory. 507 void 508 clear_stringpool() 509 { this->stringpool_.clear(); } 510 511 // Whether this is a merge string section. 512 virtual bool 513 do_is_string() 514 { return true; } 515 516 // Set keeps-input-sections flag. 517 void 518 do_set_keeps_input_sections() 519 { 520 gold_assert(this->input_count_ == 0); 521 Output_merge_base::do_set_keeps_input_sections(); 522 } 523 524 private: 525 // The name of the string type, for stats. 526 const char* 527 string_name(); 528 529 // As we see input sections, we build a mapping from object, section 530 // index and offset to strings. 531 struct Merged_string 532 { 533 // The offset in the input section. 534 section_offset_type offset; 535 // The key in the Stringpool. 536 Stringpool::Key stringpool_key; 537 538 Merged_string(section_offset_type offseta, Stringpool::Key stringpool_keya) 539 : offset(offseta), stringpool_key(stringpool_keya) 540 { } 541 }; 542 543 typedef std::vector<Merged_string> Merged_strings; 544 545 struct Merged_strings_list 546 { 547 // The input object where the strings were found. 548 Relobj* object; 549 // The input section in the input object. 550 unsigned int shndx; 551 // The list of merged strings. 552 Merged_strings merged_strings; 553 554 Merged_strings_list(Relobj* objecta, unsigned int shndxa) 555 : object(objecta), shndx(shndxa), merged_strings() 556 { } 557 }; 558 559 typedef std::vector<Merged_strings_list*> Merged_strings_lists; 560 561 // As we see the strings, we add them to a Stringpool. 562 Stringpool_template<Char_type> stringpool_; 563 // Map from a location in an input object to an entry in the 564 // Stringpool. 565 Merged_strings_lists merged_strings_lists_; 566 // The number of entries seen in input files. 567 size_t input_count_; 568 // The total size of input sections. 569 size_t input_size_; 570 }; 571 572 } // End namespace gold. 573 574 #endif // !defined(GOLD_MERGE_H) 575