Home | History | Annotate | Download | only in LD
      1 //===- IndenticalCodeFolding.cpp ------------------------------------------===//
      2 //
      3 //                     The MCLinker Project
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 #include "mcld/LD/IdenticalCodeFolding.h"
     10 
     11 #include "mcld/GeneralOptions.h"
     12 #include "mcld/Module.h"
     13 #include "mcld/Fragment/RegionFragment.h"
     14 #include "mcld/LD/LDContext.h"
     15 #include "mcld/LD/LDSection.h"
     16 #include "mcld/LD/RelocData.h"
     17 #include "mcld/LD/Relocator.h"
     18 #include "mcld/LD/ResolveInfo.h"
     19 #include "mcld/LD/SectionData.h"
     20 #include "mcld/LinkerConfig.h"
     21 #include "mcld/MC/Input.h"
     22 #include "mcld/Support/Demangle.h"
     23 #include "mcld/Support/MsgHandling.h"
     24 #include "mcld/Target/GNULDBackend.h"
     25 
     26 #include <llvm/ADT/StringRef.h>
     27 #include <llvm/Support/Casting.h>
     28 #include <llvm/Support/Format.h>
     29 
     30 #include <cassert>
     31 #include <map>
     32 #include <set>
     33 
     34 #include <zlib.h>
     35 
     36 namespace mcld {
     37 
     38 static bool isSymCtorOrDtor(const ResolveInfo& pSym) {
     39   // We can always fold ctors and dtors since accessing function pointer in C++
     40   // is forbidden.
     41   llvm::StringRef name(pSym.name(), pSym.nameSize());
     42   if (!name.startswith("_ZZ") && !name.startswith("_ZN")) {
     43     return false;
     44   }
     45   return isCtorOrDtor(pSym.name(), pSym.nameSize());
     46 }
     47 
     48 IdenticalCodeFolding::IdenticalCodeFolding(const LinkerConfig& pConfig,
     49                                            const TargetLDBackend& pBackend,
     50                                            Module& pModule)
     51     : m_Config(pConfig), m_Backend(pBackend), m_Module(pModule) {
     52 }
     53 
     54 void IdenticalCodeFolding::foldIdenticalCode() {
     55   // 1. Find folding candidates.
     56   FoldingCandidates candidate_list;
     57   findCandidates(candidate_list);
     58 
     59   // 2. Initialize constant section content
     60   for (size_t i = 0; i < candidate_list.size(); ++i) {
     61     candidate_list[i].initConstantContent(m_Backend, m_KeptSections);
     62   }
     63 
     64   // 3. Find identical code until convergence
     65   bool converged = false;
     66   size_t iterations = 0;
     67   while (!converged && (iterations < m_Config.options().getICFIterations())) {
     68     converged = matchCandidates(candidate_list);
     69     ++iterations;
     70   }
     71   if (m_Config.options().printICFSections()) {
     72     debug(diag::debug_icf_iterations) << iterations;
     73   }
     74 
     75   // 4. Fold the identical code
     76   typedef std::set<Input*> FoldedObjects;
     77   FoldedObjects folded_objs;
     78   KeptSections::iterator kept, keptEnd = m_KeptSections.end();
     79   size_t index = 0;
     80   for (kept = m_KeptSections.begin(); kept != keptEnd; ++kept, ++index) {
     81     LDSection* sect = (*kept).first;
     82     Input* obj = (*kept).second.first;
     83     size_t kept_index = (*kept).second.second;
     84     if (index != kept_index) {
     85       sect->setKind(LDFileFormat::Folded);
     86       folded_objs.insert(obj);
     87 
     88       if (m_Config.options().printICFSections()) {
     89         KeptSections::iterator it = m_KeptSections.begin() + kept_index;
     90         LDSection* kept_sect = (*it).first;
     91         Input* kept_obj = (*it).second.first;
     92         debug(diag::debug_icf_folded_section) << sect->name() << obj->name()
     93                                               << kept_sect->name()
     94                                               << kept_obj->name();
     95       }
     96     }
     97   }
     98 
     99   // Adjust the fragment reference of the folded symbols.
    100   FoldedObjects::iterator fobj, fobjEnd = folded_objs.end();
    101   for (fobj = folded_objs.begin(); fobj != fobjEnd; ++fobj) {
    102     LDContext::sym_iterator sym, symEnd = (*fobj)->context()->symTabEnd();
    103     for (sym = (*fobj)->context()->symTabBegin(); sym != symEnd; ++sym) {
    104       if ((*sym)->hasFragRef() && ((*sym)->type() == ResolveInfo::Function)) {
    105         LDSymbol* out_sym = (*sym)->resolveInfo()->outSymbol();
    106         FragmentRef* frag_ref = out_sym->fragRef();
    107         LDSection* sect = &(frag_ref->frag()->getParent()->getSection());
    108         if (sect->kind() == LDFileFormat::Folded) {
    109           size_t kept_index = m_KeptSections[sect].second;
    110           LDSection* kept_sect = (*(m_KeptSections.begin() + kept_index)).first;
    111           frag_ref->assign(kept_sect->getSectionData()->front(),
    112                            frag_ref->offset());
    113         }
    114       }
    115     }  // for each symbol
    116   }    // for each folded object
    117 }
    118 
    119 void IdenticalCodeFolding::findCandidates(FoldingCandidates& pCandidateList) {
    120   Module::obj_iterator obj, objEnd = m_Module.obj_end();
    121   for (obj = m_Module.obj_begin(); obj != objEnd; ++obj) {
    122     std::set<const LDSection*> funcptr_access_set;
    123     typedef std::map<LDSection*, LDSection*> CandidateMap;
    124     CandidateMap candidate_map;
    125     LDContext::sect_iterator sect, sectEnd = (*obj)->context()->sectEnd();
    126     for (sect = (*obj)->context()->sectBegin(); sect != sectEnd; ++sect) {
    127       switch ((*sect)->kind()) {
    128         case LDFileFormat::TEXT: {
    129           candidate_map.insert(
    130               std::make_pair(*sect, reinterpret_cast<LDSection*>(NULL)));
    131           break;
    132         }
    133         case LDFileFormat::Relocation: {
    134           LDSection* target = (*sect)->getLink();
    135           if (target->kind() == LDFileFormat::TEXT) {
    136             candidate_map[target] = *sect;
    137           }
    138 
    139           // Safe icf
    140           if (m_Config.options().getICFMode() == GeneralOptions::ICF::Safe) {
    141             RelocData::iterator rel, relEnd = (*sect)->getRelocData()->end();
    142             for (rel = (*sect)->getRelocData()->begin(); rel != relEnd; ++rel) {
    143               LDSymbol* sym = rel->symInfo()->outSymbol();
    144               if (sym->hasFragRef() && (sym->type() == ResolveInfo::Function)) {
    145                 const LDSection* def =
    146                     &sym->fragRef()->frag()->getParent()->getSection();
    147                 if (!isSymCtorOrDtor(*rel->symInfo()) &&
    148                     m_Backend.mayHaveUnsafeFunctionPointerAccess(*target) &&
    149                     m_Backend.getRelocator()
    150                         ->mayHaveFunctionPointerAccess(*rel)) {
    151                   funcptr_access_set.insert(def);
    152                 }
    153               }
    154             }  // for each reloc
    155           }
    156 
    157           break;
    158         }
    159         default: {
    160           // skip
    161           break;
    162         }
    163       }  // end of switch
    164     }    // for each section
    165 
    166     CandidateMap::iterator candidate, candidateEnd = candidate_map.end();
    167     for (candidate = candidate_map.begin(); candidate != candidateEnd;
    168          ++candidate) {
    169       if ((m_Config.options().getICFMode() == GeneralOptions::ICF::All) ||
    170           (funcptr_access_set.count(candidate->first) == 0)) {
    171         size_t index = m_KeptSections.size();
    172         m_KeptSections[candidate->first] = ObjectAndId(*obj, index);
    173         pCandidateList.push_back(
    174             FoldingCandidate(candidate->first, candidate->second, *obj));
    175       }
    176     }  // for each possible candidate
    177   }  // for each obj
    178 }
    179 
    180 bool IdenticalCodeFolding::matchCandidates(FoldingCandidates& pCandidateList) {
    181   typedef std::multimap<uint32_t, size_t> ChecksumMap;
    182   ChecksumMap checksum_map;
    183   std::vector<std::string> contents(pCandidateList.size());
    184   bool converged = true;
    185 
    186   for (size_t index = 0; index < pCandidateList.size(); ++index) {
    187     contents[index] = pCandidateList[index].getContentWithVariables(
    188         m_Backend, m_KeptSections);
    189     uint32_t checksum = ::crc32(0xFFFFFFFF,
    190                                 (const uint8_t*)contents[index].c_str(),
    191                                 contents[index].length());
    192 
    193     size_t count = checksum_map.count(checksum);
    194     if (count == 0) {
    195       checksum_map.insert(std::make_pair(checksum, index));
    196     } else {
    197       std::pair<ChecksumMap::iterator, ChecksumMap::iterator> ret =
    198           checksum_map.equal_range(checksum);
    199       for (ChecksumMap::iterator it = ret.first; it != ret.second; ++it) {
    200         size_t kept_index = (*it).second;
    201         if (contents[index].compare(contents[kept_index]) == 0) {
    202           m_KeptSections[pCandidateList[index].sect].second = kept_index;
    203           converged = false;
    204           break;
    205         }
    206       }
    207     }
    208   }
    209 
    210   return converged;
    211 }
    212 
    213 void IdenticalCodeFolding::FoldingCandidate::initConstantContent(
    214     const TargetLDBackend& pBackend,
    215     const IdenticalCodeFolding::KeptSections& pKeptSections) {
    216   // Get the static content from text.
    217   assert(sect != NULL && sect->hasSectionData());
    218   SectionData::const_iterator frag, fragEnd = sect->getSectionData()->end();
    219   for (frag = sect->getSectionData()->begin(); frag != fragEnd; ++frag) {
    220     switch (frag->getKind()) {
    221       case Fragment::Region: {
    222         const RegionFragment& region = llvm::cast<RegionFragment>(*frag);
    223         content.append(region.getRegion().begin(), region.size());
    224         break;
    225       }
    226       default: {
    227         // FIXME: Currently we only take care of RegionFragment.
    228         break;
    229       }
    230     }
    231   }
    232 
    233   // Get the static content from relocs.
    234   if (reloc_sect != NULL && reloc_sect->hasRelocData()) {
    235     for (Relocation& rel : *reloc_sect->getRelocData()) {
    236       llvm::format_object<Relocation::Type,
    237                           Relocation::Address,
    238                           Relocation::Address,
    239                           Relocation::Address> rel_info("%x%llx%llx%llx",
    240                                                         rel.type(),
    241                                                         rel.symValue(),
    242                                                         rel.addend(),
    243                                                         rel.place());
    244       char rel_str[48];
    245       rel_info.print(rel_str, sizeof(rel_str));
    246       content.append(rel_str);
    247 
    248       // Handle the recursive call.
    249       LDSymbol* sym = rel.symInfo()->outSymbol();
    250       if ((sym->type() == ResolveInfo::Function) && sym->hasFragRef()) {
    251         LDSection* def = &sym->fragRef()->frag()->getParent()->getSection();
    252         if (def == sect) {
    253           continue;
    254         }
    255       }
    256 
    257       if (!pBackend.isSymbolPreemptible(*rel.symInfo()) && sym->hasFragRef() &&
    258           (pKeptSections.find(
    259                &sym->fragRef()->frag()->getParent()->getSection()) !=
    260            pKeptSections.end())) {
    261         // Mark this reloc as a variable.
    262         variable_relocs.push_back(&rel);
    263       } else {
    264         // TODO: Support inlining merge sections if possible (target-dependent).
    265         if ((sym->binding() == ResolveInfo::Local) ||
    266             (sym->binding() == ResolveInfo::Absolute)) {
    267           // ABS or Local symbols.
    268           content.append(sym->name()).append(obj->name()).append(
    269               obj->path().native());
    270         } else {
    271           content.append(sym->name());
    272         }
    273       }
    274     }
    275   }
    276 }
    277 
    278 std::string IdenticalCodeFolding::FoldingCandidate::getContentWithVariables(
    279     const TargetLDBackend& pBackend,
    280     const IdenticalCodeFolding::KeptSections& pKeptSections) {
    281   std::string result(content);
    282   // Compute the variable content from relocs.
    283   std::vector<Relocation*>::const_iterator rel, relEnd = variable_relocs.end();
    284   for (rel = variable_relocs.begin(); rel != relEnd; ++rel) {
    285     LDSymbol* sym = (*rel)->symInfo()->outSymbol();
    286     LDSection* def = &sym->fragRef()->frag()->getParent()->getSection();
    287     // Use the kept section index.
    288     KeptSections::const_iterator it = pKeptSections.find(def);
    289     llvm::format_object<size_t> kept_info("%x", (*it).second.second);
    290     char kept_str[8];
    291     kept_info.print(kept_str, sizeof(kept_str));
    292     result.append(kept_str);
    293   }
    294 
    295   return result;
    296 }
    297 
    298 }  // namespace mcld
    299