1 //===- IndenticalCodeFolding.cpp ------------------------------------------===// 2 // 3 // The MCLinker Project 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 #include "mcld/LD/IdenticalCodeFolding.h" 10 11 #include "mcld/GeneralOptions.h" 12 #include "mcld/Module.h" 13 #include "mcld/Fragment/RegionFragment.h" 14 #include "mcld/LD/LDContext.h" 15 #include "mcld/LD/LDSection.h" 16 #include "mcld/LD/RelocData.h" 17 #include "mcld/LD/Relocator.h" 18 #include "mcld/LD/ResolveInfo.h" 19 #include "mcld/LD/SectionData.h" 20 #include "mcld/LinkerConfig.h" 21 #include "mcld/MC/Input.h" 22 #include "mcld/Support/Demangle.h" 23 #include "mcld/Support/MsgHandling.h" 24 #include "mcld/Target/GNULDBackend.h" 25 26 #include <llvm/ADT/StringRef.h> 27 #include <llvm/Support/Casting.h> 28 #include <llvm/Support/Format.h> 29 30 #include <cassert> 31 #include <map> 32 #include <set> 33 34 #include <zlib.h> 35 36 namespace mcld { 37 38 static bool isSymCtorOrDtor(const ResolveInfo& pSym) { 39 // We can always fold ctors and dtors since accessing function pointer in C++ 40 // is forbidden. 41 llvm::StringRef name(pSym.name(), pSym.nameSize()); 42 if (!name.startswith("_ZZ") && !name.startswith("_ZN")) { 43 return false; 44 } 45 return isCtorOrDtor(pSym.name(), pSym.nameSize()); 46 } 47 48 IdenticalCodeFolding::IdenticalCodeFolding(const LinkerConfig& pConfig, 49 const TargetLDBackend& pBackend, 50 Module& pModule) 51 : m_Config(pConfig), m_Backend(pBackend), m_Module(pModule) { 52 } 53 54 void IdenticalCodeFolding::foldIdenticalCode() { 55 // 1. Find folding candidates. 56 FoldingCandidates candidate_list; 57 findCandidates(candidate_list); 58 59 // 2. Initialize constant section content 60 for (size_t i = 0; i < candidate_list.size(); ++i) { 61 candidate_list[i].initConstantContent(m_Backend, m_KeptSections); 62 } 63 64 // 3. Find identical code until convergence 65 bool converged = false; 66 size_t iterations = 0; 67 while (!converged && (iterations < m_Config.options().getICFIterations())) { 68 converged = matchCandidates(candidate_list); 69 ++iterations; 70 } 71 if (m_Config.options().printICFSections()) { 72 debug(diag::debug_icf_iterations) << iterations; 73 } 74 75 // 4. Fold the identical code 76 typedef std::set<Input*> FoldedObjects; 77 FoldedObjects folded_objs; 78 KeptSections::iterator kept, keptEnd = m_KeptSections.end(); 79 size_t index = 0; 80 for (kept = m_KeptSections.begin(); kept != keptEnd; ++kept, ++index) { 81 LDSection* sect = (*kept).first; 82 Input* obj = (*kept).second.first; 83 size_t kept_index = (*kept).second.second; 84 if (index != kept_index) { 85 sect->setKind(LDFileFormat::Folded); 86 folded_objs.insert(obj); 87 88 if (m_Config.options().printICFSections()) { 89 KeptSections::iterator it = m_KeptSections.begin() + kept_index; 90 LDSection* kept_sect = (*it).first; 91 Input* kept_obj = (*it).second.first; 92 debug(diag::debug_icf_folded_section) << sect->name() << obj->name() 93 << kept_sect->name() 94 << kept_obj->name(); 95 } 96 } 97 } 98 99 // Adjust the fragment reference of the folded symbols. 100 FoldedObjects::iterator fobj, fobjEnd = folded_objs.end(); 101 for (fobj = folded_objs.begin(); fobj != fobjEnd; ++fobj) { 102 LDContext::sym_iterator sym, symEnd = (*fobj)->context()->symTabEnd(); 103 for (sym = (*fobj)->context()->symTabBegin(); sym != symEnd; ++sym) { 104 if ((*sym)->hasFragRef() && ((*sym)->type() == ResolveInfo::Function)) { 105 LDSymbol* out_sym = (*sym)->resolveInfo()->outSymbol(); 106 FragmentRef* frag_ref = out_sym->fragRef(); 107 LDSection* sect = &(frag_ref->frag()->getParent()->getSection()); 108 if (sect->kind() == LDFileFormat::Folded) { 109 size_t kept_index = m_KeptSections[sect].second; 110 LDSection* kept_sect = (*(m_KeptSections.begin() + kept_index)).first; 111 frag_ref->assign(kept_sect->getSectionData()->front(), 112 frag_ref->offset()); 113 } 114 } 115 } // for each symbol 116 } // for each folded object 117 } 118 119 void IdenticalCodeFolding::findCandidates(FoldingCandidates& pCandidateList) { 120 Module::obj_iterator obj, objEnd = m_Module.obj_end(); 121 for (obj = m_Module.obj_begin(); obj != objEnd; ++obj) { 122 std::set<const LDSection*> funcptr_access_set; 123 typedef std::map<LDSection*, LDSection*> CandidateMap; 124 CandidateMap candidate_map; 125 LDContext::sect_iterator sect, sectEnd = (*obj)->context()->sectEnd(); 126 for (sect = (*obj)->context()->sectBegin(); sect != sectEnd; ++sect) { 127 switch ((*sect)->kind()) { 128 case LDFileFormat::TEXT: { 129 candidate_map.insert( 130 std::make_pair(*sect, reinterpret_cast<LDSection*>(NULL))); 131 break; 132 } 133 case LDFileFormat::Relocation: { 134 LDSection* target = (*sect)->getLink(); 135 if (target->kind() == LDFileFormat::TEXT) { 136 candidate_map[target] = *sect; 137 } 138 139 // Safe icf 140 if (m_Config.options().getICFMode() == GeneralOptions::ICF::Safe) { 141 RelocData::iterator rel, relEnd = (*sect)->getRelocData()->end(); 142 for (rel = (*sect)->getRelocData()->begin(); rel != relEnd; ++rel) { 143 LDSymbol* sym = rel->symInfo()->outSymbol(); 144 if (sym->hasFragRef() && (sym->type() == ResolveInfo::Function)) { 145 const LDSection* def = 146 &sym->fragRef()->frag()->getParent()->getSection(); 147 if (!isSymCtorOrDtor(*rel->symInfo()) && 148 m_Backend.mayHaveUnsafeFunctionPointerAccess(*target) && 149 m_Backend.getRelocator() 150 ->mayHaveFunctionPointerAccess(*rel)) { 151 funcptr_access_set.insert(def); 152 } 153 } 154 } // for each reloc 155 } 156 157 break; 158 } 159 default: { 160 // skip 161 break; 162 } 163 } // end of switch 164 } // for each section 165 166 CandidateMap::iterator candidate, candidateEnd = candidate_map.end(); 167 for (candidate = candidate_map.begin(); candidate != candidateEnd; 168 ++candidate) { 169 if ((m_Config.options().getICFMode() == GeneralOptions::ICF::All) || 170 (funcptr_access_set.count(candidate->first) == 0)) { 171 size_t index = m_KeptSections.size(); 172 m_KeptSections[candidate->first] = ObjectAndId(*obj, index); 173 pCandidateList.push_back( 174 FoldingCandidate(candidate->first, candidate->second, *obj)); 175 } 176 } // for each possible candidate 177 } // for each obj 178 } 179 180 bool IdenticalCodeFolding::matchCandidates(FoldingCandidates& pCandidateList) { 181 typedef std::multimap<uint32_t, size_t> ChecksumMap; 182 ChecksumMap checksum_map; 183 std::vector<std::string> contents(pCandidateList.size()); 184 bool converged = true; 185 186 for (size_t index = 0; index < pCandidateList.size(); ++index) { 187 contents[index] = pCandidateList[index].getContentWithVariables( 188 m_Backend, m_KeptSections); 189 uint32_t checksum = ::crc32(0xFFFFFFFF, 190 (const uint8_t*)contents[index].c_str(), 191 contents[index].length()); 192 193 size_t count = checksum_map.count(checksum); 194 if (count == 0) { 195 checksum_map.insert(std::make_pair(checksum, index)); 196 } else { 197 std::pair<ChecksumMap::iterator, ChecksumMap::iterator> ret = 198 checksum_map.equal_range(checksum); 199 for (ChecksumMap::iterator it = ret.first; it != ret.second; ++it) { 200 size_t kept_index = (*it).second; 201 if (contents[index].compare(contents[kept_index]) == 0) { 202 m_KeptSections[pCandidateList[index].sect].second = kept_index; 203 converged = false; 204 break; 205 } 206 } 207 } 208 } 209 210 return converged; 211 } 212 213 void IdenticalCodeFolding::FoldingCandidate::initConstantContent( 214 const TargetLDBackend& pBackend, 215 const IdenticalCodeFolding::KeptSections& pKeptSections) { 216 // Get the static content from text. 217 assert(sect != NULL && sect->hasSectionData()); 218 SectionData::const_iterator frag, fragEnd = sect->getSectionData()->end(); 219 for (frag = sect->getSectionData()->begin(); frag != fragEnd; ++frag) { 220 switch (frag->getKind()) { 221 case Fragment::Region: { 222 const RegionFragment& region = llvm::cast<RegionFragment>(*frag); 223 content.append(region.getRegion().begin(), region.size()); 224 break; 225 } 226 default: { 227 // FIXME: Currently we only take care of RegionFragment. 228 break; 229 } 230 } 231 } 232 233 // Get the static content from relocs. 234 if (reloc_sect != NULL && reloc_sect->hasRelocData()) { 235 for (Relocation& rel : *reloc_sect->getRelocData()) { 236 llvm::format_object<Relocation::Type, 237 Relocation::Address, 238 Relocation::Address, 239 Relocation::Address> rel_info("%x%llx%llx%llx", 240 rel.type(), 241 rel.symValue(), 242 rel.addend(), 243 rel.place()); 244 char rel_str[48]; 245 rel_info.print(rel_str, sizeof(rel_str)); 246 content.append(rel_str); 247 248 // Handle the recursive call. 249 LDSymbol* sym = rel.symInfo()->outSymbol(); 250 if ((sym->type() == ResolveInfo::Function) && sym->hasFragRef()) { 251 LDSection* def = &sym->fragRef()->frag()->getParent()->getSection(); 252 if (def == sect) { 253 continue; 254 } 255 } 256 257 if (!pBackend.isSymbolPreemptible(*rel.symInfo()) && sym->hasFragRef() && 258 (pKeptSections.find( 259 &sym->fragRef()->frag()->getParent()->getSection()) != 260 pKeptSections.end())) { 261 // Mark this reloc as a variable. 262 variable_relocs.push_back(&rel); 263 } else { 264 // TODO: Support inlining merge sections if possible (target-dependent). 265 if ((sym->binding() == ResolveInfo::Local) || 266 (sym->binding() == ResolveInfo::Absolute)) { 267 // ABS or Local symbols. 268 content.append(sym->name()).append(obj->name()).append( 269 obj->path().native()); 270 } else { 271 content.append(sym->name()); 272 } 273 } 274 } 275 } 276 } 277 278 std::string IdenticalCodeFolding::FoldingCandidate::getContentWithVariables( 279 const TargetLDBackend& pBackend, 280 const IdenticalCodeFolding::KeptSections& pKeptSections) { 281 std::string result(content); 282 // Compute the variable content from relocs. 283 std::vector<Relocation*>::const_iterator rel, relEnd = variable_relocs.end(); 284 for (rel = variable_relocs.begin(); rel != relEnd; ++rel) { 285 LDSymbol* sym = (*rel)->symInfo()->outSymbol(); 286 LDSection* def = &sym->fragRef()->frag()->getParent()->getSection(); 287 // Use the kept section index. 288 KeptSections::const_iterator it = pKeptSections.find(def); 289 llvm::format_object<size_t> kept_info("%x", (*it).second.second); 290 char kept_str[8]; 291 kept_info.print(kept_str, sizeof(kept_str)); 292 result.append(kept_str); 293 } 294 295 return result; 296 } 297 298 } // namespace mcld 299