1 //===- GNUArchiveReader.cpp -----------------------------------------------===// 2 // 3 // The MCLinker Project 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 #include "mcld/LD/GNUArchiveReader.h" 10 11 #include "mcld/InputTree.h" 12 #include "mcld/LinkerConfig.h" 13 #include "mcld/Module.h" 14 #include "mcld/ADT/SizeTraits.h" 15 #include "mcld/MC/Attribute.h" 16 #include "mcld/MC/Input.h" 17 #include "mcld/LD/ELFObjectReader.h" 18 #include "mcld/LD/ResolveInfo.h" 19 #include "mcld/Support/FileHandle.h" 20 #include "mcld/Support/FileSystem.h" 21 #include "mcld/Support/MemoryArea.h" 22 #include "mcld/Support/MsgHandling.h" 23 #include "mcld/Support/Path.h" 24 25 #include <llvm/ADT/StringRef.h> 26 #include <llvm/Support/Host.h> 27 28 #include <cstdlib> 29 #include <cstring> 30 31 namespace mcld { 32 33 GNUArchiveReader::GNUArchiveReader(Module& pModule, 34 ELFObjectReader& pELFObjectReader) 35 : m_Module(pModule), m_ELFObjectReader(pELFObjectReader) { 36 } 37 38 GNUArchiveReader::~GNUArchiveReader() { 39 } 40 41 /// isMyFormat 42 bool GNUArchiveReader::isMyFormat(Input& pInput, bool& pContinue) const { 43 assert(pInput.hasMemArea()); 44 if (pInput.memArea()->size() < Archive::MAGIC_LEN) 45 return false; 46 47 llvm::StringRef region = 48 pInput.memArea()->request(pInput.fileOffset(), Archive::MAGIC_LEN); 49 const char* str = region.begin(); 50 51 bool result = false; 52 assert(str != NULL); 53 pContinue = true; 54 if (isArchive(str) || isThinArchive(str)) 55 result = true; 56 57 return result; 58 } 59 60 /// isArchive 61 bool GNUArchiveReader::isArchive(const char* pStr) const { 62 return (memcmp(pStr, Archive::MAGIC, Archive::MAGIC_LEN) == 0); 63 } 64 65 /// isThinArchive 66 bool GNUArchiveReader::isThinArchive(const char* pStr) const { 67 return (memcmp(pStr, Archive::THIN_MAGIC, Archive::MAGIC_LEN) == 0); 68 } 69 70 /// isThinArchive 71 bool GNUArchiveReader::isThinArchive(Input& pInput) const { 72 assert(pInput.hasMemArea()); 73 llvm::StringRef region = 74 pInput.memArea()->request(pInput.fileOffset(), Archive::MAGIC_LEN); 75 const char* str = region.begin(); 76 77 bool result = false; 78 assert(str != NULL); 79 if (isThinArchive(str)) 80 result = true; 81 82 return result; 83 } 84 85 bool GNUArchiveReader::readArchive(const LinkerConfig& pConfig, 86 Archive& pArchive) { 87 // bypass the empty archive 88 if (Archive::MAGIC_LEN == pArchive.getARFile().memArea()->size()) 89 return true; 90 91 if (pArchive.getARFile().attribute()->isWholeArchive()) 92 return includeAllMembers(pConfig, pArchive); 93 94 // if this is the first time read this archive, setup symtab and strtab 95 if (pArchive.getSymbolTable().empty()) { 96 // read the symtab of the archive 97 readSymbolTable(pArchive); 98 99 // read the strtab of the archive 100 readStringTable(pArchive); 101 102 // add root archive to ArchiveMemberMap 103 pArchive.addArchiveMember(pArchive.getARFile().name(), 104 pArchive.inputs().root(), 105 &InputTree::Downward); 106 } 107 108 // include the needed members in the archive and build up the input tree 109 bool willSymResolved; 110 do { 111 willSymResolved = false; 112 for (size_t idx = 0; idx < pArchive.numOfSymbols(); ++idx) { 113 // bypass if we already decided to include this symbol or not 114 if (Archive::Symbol::Unknown != pArchive.getSymbolStatus(idx)) 115 continue; 116 117 // bypass if another symbol with the same object file offset is included 118 if (pArchive.hasObjectMember(pArchive.getObjFileOffset(idx))) { 119 pArchive.setSymbolStatus(idx, Archive::Symbol::Include); 120 continue; 121 } 122 123 // check if we should include this defined symbol 124 Archive::Symbol::Status status = 125 shouldIncludeSymbol(pArchive.getSymbolName(idx)); 126 if (Archive::Symbol::Unknown != status) 127 pArchive.setSymbolStatus(idx, status); 128 129 if (Archive::Symbol::Include == status) { 130 // include the object member from the given offset 131 includeMember(pConfig, pArchive, pArchive.getObjFileOffset(idx)); 132 willSymResolved = true; 133 } // end of if 134 } // end of for 135 } while (willSymResolved); 136 137 return true; 138 } 139 140 /// readMemberHeader - read the header of a member in a archive file and then 141 /// return the corresponding archive member (it may be an input object or 142 /// another archive) 143 /// @param pArchiveRoot - the archive root that holds the strtab (extended 144 /// name table) 145 /// @param pArchiveFile - the archive that contains the needed object 146 /// @param pFileOffset - file offset of the member header in the archive 147 /// @param pNestedOffset - used when we find a nested archive 148 /// @param pMemberSize - the file size of this member 149 Input* GNUArchiveReader::readMemberHeader(Archive& pArchiveRoot, 150 Input& pArchiveFile, 151 uint32_t pFileOffset, 152 uint32_t& pNestedOffset, 153 size_t& pMemberSize) { 154 assert(pArchiveFile.hasMemArea()); 155 156 llvm::StringRef header_region = pArchiveFile.memArea()->request( 157 (pArchiveFile.fileOffset() + pFileOffset), sizeof(Archive::MemberHeader)); 158 const Archive::MemberHeader* header = 159 reinterpret_cast<const Archive::MemberHeader*>(header_region.begin()); 160 161 assert(memcmp(header->fmag, Archive::MEMBER_MAGIC, sizeof(header->fmag)) == 162 0); 163 164 pMemberSize = atoi(header->size); 165 166 // parse the member name and nested offset if any 167 std::string member_name; 168 llvm::StringRef name_field(header->name, sizeof(header->name)); 169 if (header->name[0] != '/') { 170 // this is an object file in an archive 171 size_t pos = name_field.find_first_of('/'); 172 member_name.assign(name_field.substr(0, pos).str()); 173 } else { 174 // this is an object/archive file in a thin archive 175 size_t begin = 1; 176 size_t end = name_field.find_first_of(" :"); 177 uint32_t name_offset = 0; 178 // parse the name offset 179 name_field.substr(begin, end - begin).getAsInteger(10, name_offset); 180 181 if (name_field[end] == ':') { 182 // there is a nested offset 183 begin = end + 1; 184 end = name_field.find_first_of(' ', begin); 185 name_field.substr(begin, end - begin).getAsInteger(10, pNestedOffset); 186 } 187 188 // get the member name from the extended name table 189 assert(pArchiveRoot.hasStrTable()); 190 begin = name_offset; 191 end = pArchiveRoot.getStrTable().find_first_of('\n', begin); 192 member_name.assign( 193 pArchiveRoot.getStrTable().substr(begin, end - begin - 1)); 194 } 195 196 Input* member = NULL; 197 bool isThinAR = isThinArchive(pArchiveFile); 198 if (!isThinAR) { 199 // this is an object file in an archive 200 member = pArchiveRoot.getMemberFile( 201 pArchiveFile, 202 isThinAR, 203 member_name, 204 pArchiveFile.path(), 205 (pFileOffset + sizeof(Archive::MemberHeader))); 206 } else { 207 // this is a member in a thin archive 208 // try to find if this is a archive already in the map first 209 Archive::ArchiveMember* ar_member = 210 pArchiveRoot.getArchiveMember(member_name); 211 if (ar_member != NULL) { 212 return ar_member->file; 213 } 214 215 // get nested file path, the nested file's member name is the relative 216 // path to the archive containing it. 217 sys::fs::Path input_path(pArchiveFile.path().parent_path()); 218 if (!input_path.empty()) 219 input_path.append(sys::fs::Path(member_name)); 220 else 221 input_path.assign(member_name); 222 223 member = pArchiveRoot.getMemberFile( 224 pArchiveFile, isThinAR, member_name, input_path); 225 } 226 227 return member; 228 } 229 230 template <size_t SIZE> 231 static void readSymbolTableEntries(Archive& pArchive, 232 llvm::StringRef pMemRegion) { 233 typedef typename SizeTraits<SIZE>::Offset Offset; 234 235 const Offset* data = reinterpret_cast<const Offset*>(pMemRegion.begin()); 236 237 // read the number of symbols 238 Offset number = 0; 239 if (llvm::sys::IsLittleEndianHost) 240 number = mcld::bswap<SIZE>(*data); 241 else 242 number = *data; 243 244 // set up the pointers for file offset and name offset 245 ++data; 246 const char* name = reinterpret_cast<const char*>(data + number); 247 248 // add the archive symbols 249 for (Offset i = 0; i < number; ++i) { 250 if (llvm::sys::IsLittleEndianHost) 251 pArchive.addSymbol(name, mcld::bswap<SIZE>(*data)); 252 else 253 pArchive.addSymbol(name, *data); 254 name += strlen(name) + 1; 255 ++data; 256 } 257 } 258 259 /// readSymbolTable - read the archive symbol map (armap) 260 bool GNUArchiveReader::readSymbolTable(Archive& pArchive) { 261 assert(pArchive.getARFile().hasMemArea()); 262 MemoryArea* memory_area = pArchive.getARFile().memArea(); 263 264 llvm::StringRef header_region = memory_area->request( 265 (pArchive.getARFile().fileOffset() + Archive::MAGIC_LEN), 266 sizeof(Archive::MemberHeader)); 267 const Archive::MemberHeader* header = 268 reinterpret_cast<const Archive::MemberHeader*>(header_region.begin()); 269 assert(memcmp(header->fmag, Archive::MEMBER_MAGIC, sizeof(header->fmag)) == 270 0); 271 272 int symtab_size = atoi(header->size); 273 pArchive.setSymTabSize(symtab_size); 274 275 if (!pArchive.getARFile().attribute()->isWholeArchive()) { 276 llvm::StringRef symtab_region = memory_area->request( 277 (pArchive.getARFile().fileOffset() + Archive::MAGIC_LEN + 278 sizeof(Archive::MemberHeader)), 279 symtab_size); 280 281 if (strncmp(header->name, 282 Archive::SVR4_SYMTAB_NAME, 283 strlen(Archive::SVR4_SYMTAB_NAME)) == 0) 284 readSymbolTableEntries<32>(pArchive, symtab_region); 285 else if (strncmp(header->name, 286 Archive::IRIX6_SYMTAB_NAME, 287 strlen(Archive::IRIX6_SYMTAB_NAME)) == 0) 288 readSymbolTableEntries<64>(pArchive, symtab_region); 289 else 290 unreachable(diag::err_unsupported_archive); 291 } 292 return true; 293 } 294 295 /// readStringTable - read the strtab for long file name of the archive 296 bool GNUArchiveReader::readStringTable(Archive& pArchive) { 297 size_t offset = Archive::MAGIC_LEN + sizeof(Archive::MemberHeader) + 298 pArchive.getSymTabSize(); 299 300 if ((offset & 1) != 0x0) 301 ++offset; 302 303 assert(pArchive.getARFile().hasMemArea()); 304 MemoryArea* memory_area = pArchive.getARFile().memArea(); 305 306 llvm::StringRef header_region = 307 memory_area->request((pArchive.getARFile().fileOffset() + offset), 308 sizeof(Archive::MemberHeader)); 309 const Archive::MemberHeader* header = 310 reinterpret_cast<const Archive::MemberHeader*>(header_region.begin()); 311 312 assert(memcmp(header->fmag, Archive::MEMBER_MAGIC, sizeof(header->fmag)) == 313 0); 314 315 if (memcmp(header->name, Archive::STRTAB_NAME, sizeof(header->name)) == 0) { 316 // read the extended name table 317 int strtab_size = atoi(header->size); 318 llvm::StringRef strtab_region = 319 memory_area->request((pArchive.getARFile().fileOffset() + offset + 320 sizeof(Archive::MemberHeader)), 321 strtab_size); 322 const char* strtab = strtab_region.begin(); 323 pArchive.getStrTable().assign(strtab, strtab_size); 324 } 325 return true; 326 } 327 328 /// shouldIncludeStatus - given a sym name from armap and check if including 329 /// the corresponding archive member, and then return the decision 330 enum Archive::Symbol::Status GNUArchiveReader::shouldIncludeSymbol( 331 const llvm::StringRef& pSymName) const { 332 // TODO: handle symbol version issue and user defined symbols 333 const ResolveInfo* info = m_Module.getNamePool().findInfo(pSymName); 334 if (info != NULL) { 335 if (!info->isUndef()) 336 return Archive::Symbol::Exclude; 337 if (info->isWeak()) 338 return Archive::Symbol::Unknown; 339 return Archive::Symbol::Include; 340 } 341 return Archive::Symbol::Unknown; 342 } 343 344 /// includeMember - include the object member in the given file offset, and 345 /// return the size of the object 346 /// @param pConfig - LinkerConfig 347 /// @param pArchiveRoot - the archive root 348 /// @param pFileOffset - file offset of the member header in the archive 349 size_t GNUArchiveReader::includeMember(const LinkerConfig& pConfig, 350 Archive& pArchive, 351 uint32_t pFileOffset) { 352 Input* cur_archive = &(pArchive.getARFile()); 353 Input* member = NULL; 354 uint32_t file_offset = pFileOffset; 355 size_t size = 0; 356 do { 357 uint32_t nested_offset = 0; 358 // use the file offset in current archive to find out the member we 359 // want to include 360 member = readMemberHeader( 361 pArchive, *cur_archive, file_offset, nested_offset, size); 362 assert(member != NULL); 363 // bypass if we get an archive that is already in the map 364 if (Input::Archive == member->type()) { 365 cur_archive = member; 366 file_offset = nested_offset; 367 continue; 368 } 369 370 // insert a node into the subtree of current archive. 371 Archive::ArchiveMember* parent = 372 pArchive.getArchiveMember(cur_archive->name()); 373 374 assert(parent != NULL); 375 pArchive.inputs().insert(parent->lastPos, *(parent->move), *member); 376 377 // move the iterator to new created node, and also adjust the 378 // direction to Afterward for next insertion in this subtree 379 parent->move->move(parent->lastPos); 380 parent->move = &InputTree::Afterward; 381 bool doContinue = false; 382 383 if (m_ELFObjectReader.isMyFormat(*member, doContinue)) { 384 member->setType(Input::Object); 385 // Set this object as no export if the archive is in the exclude libs. 386 if (pArchive.getARFile().noExport()) { 387 member->setNoExport(); 388 } 389 pArchive.addObjectMember(pFileOffset, parent->lastPos); 390 m_ELFObjectReader.readHeader(*member); 391 m_ELFObjectReader.readSections(*member); 392 m_ELFObjectReader.readSymbols(*member); 393 m_Module.getObjectList().push_back(member); 394 } else if (doContinue && isMyFormat(*member, doContinue)) { 395 member->setType(Input::Archive); 396 // when adding a new archive node, set the iterator to archive 397 // itself, and set the direction to Downward 398 pArchive.addArchiveMember( 399 member->name(), parent->lastPos, &InputTree::Downward); 400 cur_archive = member; 401 file_offset = nested_offset; 402 } else { 403 warning(diag::warn_unrecognized_input_file) 404 << member->path() << pConfig.targets().triple().str(); 405 } 406 } while (Input::Object != member->type()); 407 return size; 408 } 409 410 /// includeAllMembers - include all object members. This is called if 411 /// --whole-archive is the attribute for this archive file. 412 bool GNUArchiveReader::includeAllMembers(const LinkerConfig& pConfig, 413 Archive& pArchive) { 414 // read the symtab of the archive 415 readSymbolTable(pArchive); 416 417 // read the strtab of the archive 418 readStringTable(pArchive); 419 420 // add root archive to ArchiveMemberMap 421 pArchive.addArchiveMember(pArchive.getARFile().name(), 422 pArchive.inputs().root(), 423 &InputTree::Downward); 424 425 bool isThinAR = isThinArchive(pArchive.getARFile()); 426 uint32_t begin_offset = pArchive.getARFile().fileOffset() + 427 Archive::MAGIC_LEN + sizeof(Archive::MemberHeader) + 428 pArchive.getSymTabSize(); 429 if (pArchive.hasStrTable()) { 430 if ((begin_offset & 1) != 0x0) 431 ++begin_offset; 432 begin_offset += 433 sizeof(Archive::MemberHeader) + pArchive.getStrTable().size(); 434 } 435 uint32_t end_offset = pArchive.getARFile().memArea()->size(); 436 for (uint32_t offset = begin_offset; offset < end_offset; 437 offset += sizeof(Archive::MemberHeader)) { 438 size_t size = includeMember(pConfig, pArchive, offset); 439 440 if (!isThinAR) { 441 offset += size; 442 } 443 444 if ((offset & 1) != 0x0) 445 ++offset; 446 } 447 return true; 448 } 449 450 } // namespace mcld 451