1 //===- GNUArchiveReader.cpp -----------------------------------------------===// 2 // 3 // The MCLinker Project 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 #include <mcld/LD/GNUArchiveReader.h> 10 11 #include <mcld/Module.h> 12 #include <mcld/InputTree.h> 13 #include <mcld/LinkerConfig.h> 14 #include <mcld/MC/Attribute.h> 15 #include <mcld/MC/Input.h> 16 #include <mcld/LD/ResolveInfo.h> 17 #include <mcld/LD/ELFObjectReader.h> 18 #include <mcld/Support/FileSystem.h> 19 #include <mcld/Support/FileHandle.h> 20 #include <mcld/Support/MemoryArea.h> 21 #include <mcld/Support/MsgHandling.h> 22 #include <mcld/Support/Path.h> 23 #include <mcld/ADT/SizeTraits.h> 24 25 #include <llvm/ADT/StringRef.h> 26 #include <llvm/Support/Host.h> 27 28 #include <cstring> 29 #include <cstdlib> 30 31 using namespace mcld; 32 33 GNUArchiveReader::GNUArchiveReader(Module& pModule, 34 ELFObjectReader& pELFObjectReader) 35 : m_Module(pModule), 36 m_ELFObjectReader(pELFObjectReader) 37 { 38 } 39 40 GNUArchiveReader::~GNUArchiveReader() 41 { 42 } 43 44 /// isMyFormat 45 bool GNUArchiveReader::isMyFormat(Input& pInput, bool &pContinue) const 46 { 47 assert(pInput.hasMemArea()); 48 if (pInput.memArea()->size() < Archive::MAGIC_LEN) 49 return false; 50 51 llvm::StringRef region = 52 pInput.memArea()->request(pInput.fileOffset(), Archive::MAGIC_LEN); 53 const char* str = region.begin(); 54 55 bool result = false; 56 assert(NULL != str); 57 pContinue = true; 58 if (isArchive(str) || isThinArchive(str)) 59 result = true; 60 61 return result; 62 } 63 64 /// isArchive 65 bool GNUArchiveReader::isArchive(const char* pStr) const 66 { 67 return (0 == memcmp(pStr, Archive::MAGIC, Archive::MAGIC_LEN)); 68 } 69 70 /// isThinArchive 71 bool GNUArchiveReader::isThinArchive(const char* pStr) const 72 { 73 return (0 == memcmp(pStr, Archive::THIN_MAGIC, Archive::MAGIC_LEN)); 74 } 75 76 /// isThinArchive 77 bool GNUArchiveReader::isThinArchive(Input& pInput) const 78 { 79 assert(pInput.hasMemArea()); 80 llvm::StringRef region = 81 pInput.memArea()->request(pInput.fileOffset(), Archive::MAGIC_LEN); 82 const char* str = region.begin(); 83 84 bool result = false; 85 assert(NULL != str); 86 if (isThinArchive(str)) 87 result = true; 88 89 return result; 90 } 91 92 bool GNUArchiveReader::readArchive(const LinkerConfig& pConfig, 93 Archive& pArchive) 94 { 95 // bypass the empty archive 96 if (Archive::MAGIC_LEN == pArchive.getARFile().memArea()->size()) 97 return true; 98 99 if (pArchive.getARFile().attribute()->isWholeArchive()) 100 return includeAllMembers(pConfig, pArchive); 101 102 // if this is the first time read this archive, setup symtab and strtab 103 if (pArchive.getSymbolTable().empty()) { 104 // read the symtab of the archive 105 readSymbolTable(pArchive); 106 107 // read the strtab of the archive 108 readStringTable(pArchive); 109 110 // add root archive to ArchiveMemberMap 111 pArchive.addArchiveMember(pArchive.getARFile().name(), 112 pArchive.inputs().root(), 113 &InputTree::Downward); 114 } 115 116 // include the needed members in the archive and build up the input tree 117 bool willSymResolved; 118 do { 119 willSymResolved = false; 120 for (size_t idx = 0; idx < pArchive.numOfSymbols(); ++idx) { 121 // bypass if we already decided to include this symbol or not 122 if (Archive::Symbol::Unknown != pArchive.getSymbolStatus(idx)) 123 continue; 124 125 // bypass if another symbol with the same object file offset is included 126 if (pArchive.hasObjectMember(pArchive.getObjFileOffset(idx))) { 127 pArchive.setSymbolStatus(idx, Archive::Symbol::Include); 128 continue; 129 } 130 131 // check if we should include this defined symbol 132 Archive::Symbol::Status status = 133 shouldIncludeSymbol(pArchive.getSymbolName(idx)); 134 if (Archive::Symbol::Unknown != status) 135 pArchive.setSymbolStatus(idx, status); 136 137 if (Archive::Symbol::Include == status) { 138 // include the object member from the given offset 139 includeMember(pConfig, pArchive, pArchive.getObjFileOffset(idx)); 140 willSymResolved = true; 141 } // end of if 142 } // end of for 143 } while (willSymResolved); 144 145 return true; 146 } 147 148 /// readMemberHeader - read the header of a member in a archive file and then 149 /// return the corresponding archive member (it may be an input object or 150 /// another archive) 151 /// @param pArchiveRoot - the archive root that holds the strtab (extended 152 /// name table) 153 /// @param pArchiveFile - the archive that contains the needed object 154 /// @param pFileOffset - file offset of the member header in the archive 155 /// @param pNestedOffset - used when we find a nested archive 156 /// @param pMemberSize - the file size of this member 157 Input* GNUArchiveReader::readMemberHeader(Archive& pArchiveRoot, 158 Input& pArchiveFile, 159 uint32_t pFileOffset, 160 uint32_t& pNestedOffset, 161 size_t& pMemberSize) 162 { 163 assert(pArchiveFile.hasMemArea()); 164 165 llvm::StringRef header_region = 166 pArchiveFile.memArea()->request((pArchiveFile.fileOffset() + pFileOffset), 167 sizeof(Archive::MemberHeader)); 168 const Archive::MemberHeader* header = 169 reinterpret_cast<const Archive::MemberHeader*>(header_region.begin()); 170 171 assert(0 == memcmp(header->fmag, Archive::MEMBER_MAGIC, sizeof(header->fmag))); 172 173 pMemberSize = atoi(header->size); 174 175 // parse the member name and nested offset if any 176 std::string member_name; 177 llvm::StringRef name_field(header->name, sizeof(header->name)); 178 if ('/' != header->name[0]) { 179 // this is an object file in an archive 180 size_t pos = name_field.find_first_of('/'); 181 member_name.assign(name_field.substr(0, pos).str()); 182 } 183 else { 184 // this is an object/archive file in a thin archive 185 size_t begin = 1; 186 size_t end = name_field.find_first_of(" :"); 187 uint32_t name_offset = 0; 188 // parse the name offset 189 name_field.substr(begin, end - begin).getAsInteger(10, name_offset); 190 191 if (':' == name_field[end]) { 192 // there is a nested offset 193 begin = end + 1; 194 end = name_field.find_first_of(' ', begin); 195 name_field.substr(begin, end - begin).getAsInteger(10, pNestedOffset); 196 } 197 198 // get the member name from the extended name table 199 assert(pArchiveRoot.hasStrTable()); 200 begin = name_offset; 201 end = pArchiveRoot.getStrTable().find_first_of('\n', begin); 202 member_name.assign(pArchiveRoot.getStrTable().substr(begin, end - begin -1)); 203 } 204 205 Input* member = NULL; 206 bool isThinAR = isThinArchive(pArchiveFile); 207 if (!isThinAR) { 208 // this is an object file in an archive 209 member = pArchiveRoot.getMemberFile(pArchiveFile, 210 isThinAR, 211 member_name, 212 pArchiveFile.path(), 213 (pFileOffset + 214 sizeof(Archive::MemberHeader))); 215 } 216 else { 217 // this is a member in a thin archive 218 // try to find if this is a archive already in the map first 219 Archive::ArchiveMember* ar_member = 220 pArchiveRoot.getArchiveMember(member_name); 221 if (NULL != ar_member) { 222 return ar_member->file; 223 } 224 225 // get nested file path, the nested file's member name is the relative 226 // path to the archive containing it. 227 sys::fs::Path input_path(pArchiveFile.path().parent_path()); 228 if (!input_path.empty()) 229 input_path.append(member_name); 230 else 231 input_path.assign(member_name); 232 233 member = pArchiveRoot.getMemberFile(pArchiveFile, 234 isThinAR, 235 member_name, 236 input_path); 237 } 238 239 return member; 240 } 241 242 template <size_t SIZE> 243 static void readSymbolTableEntries(Archive& pArchive, llvm::StringRef pMemRegion) 244 { 245 typedef typename SizeTraits<SIZE>::Offset Offset; 246 247 const Offset* data = reinterpret_cast<const Offset*>(pMemRegion.begin()); 248 249 // read the number of symbols 250 Offset number = 0; 251 if (llvm::sys::IsLittleEndianHost) 252 number = mcld::bswap<SIZE>(*data); 253 else 254 number = *data; 255 256 // set up the pointers for file offset and name offset 257 ++data; 258 const char* name = reinterpret_cast<const char*>(data + number); 259 260 // add the archive symbols 261 for (Offset i = 0; i < number; ++i) { 262 if (llvm::sys::IsLittleEndianHost) 263 pArchive.addSymbol(name, mcld::bswap<SIZE>(*data)); 264 else 265 pArchive.addSymbol(name, *data); 266 name += strlen(name) + 1; 267 ++data; 268 } 269 } 270 271 /// readSymbolTable - read the archive symbol map (armap) 272 bool GNUArchiveReader::readSymbolTable(Archive& pArchive) 273 { 274 assert(pArchive.getARFile().hasMemArea()); 275 276 llvm::StringRef header_region = 277 pArchive.getARFile().memArea()->request((pArchive.getARFile().fileOffset() + 278 Archive::MAGIC_LEN), 279 sizeof(Archive::MemberHeader)); 280 const Archive::MemberHeader* header = 281 reinterpret_cast<const Archive::MemberHeader*>(header_region.begin()); 282 assert(0 == memcmp(header->fmag, Archive::MEMBER_MAGIC, sizeof(header->fmag))); 283 284 int symtab_size = atoi(header->size); 285 pArchive.setSymTabSize(symtab_size); 286 287 if (!pArchive.getARFile().attribute()->isWholeArchive()) { 288 llvm::StringRef symtab_region = pArchive.getARFile().memArea()->request( 289 (pArchive.getARFile().fileOffset() + 290 Archive::MAGIC_LEN + 291 sizeof(Archive::MemberHeader)), 292 symtab_size); 293 294 if (0 == strncmp(header->name, Archive::SVR4_SYMTAB_NAME, 295 strlen(Archive::SVR4_SYMTAB_NAME))) 296 readSymbolTableEntries<32>(pArchive, symtab_region); 297 else if (0 == strncmp(header->name, Archive::IRIX6_SYMTAB_NAME, 298 strlen(Archive::IRIX6_SYMTAB_NAME))) 299 readSymbolTableEntries<64>(pArchive, symtab_region); 300 else 301 unreachable(diag::err_unsupported_archive); 302 303 } 304 return true; 305 } 306 307 /// readStringTable - read the strtab for long file name of the archive 308 bool GNUArchiveReader::readStringTable(Archive& pArchive) 309 { 310 size_t offset = Archive::MAGIC_LEN + 311 sizeof(Archive::MemberHeader) + 312 pArchive.getSymTabSize(); 313 314 if (0x0 != (offset & 1)) 315 ++offset; 316 317 assert(pArchive.getARFile().hasMemArea()); 318 319 llvm::StringRef header_region = 320 pArchive.getARFile().memArea()->request((pArchive.getARFile().fileOffset() + 321 offset), 322 sizeof(Archive::MemberHeader)); 323 const Archive::MemberHeader* header = 324 reinterpret_cast<const Archive::MemberHeader*>(header_region.begin()); 325 326 assert(0 == memcmp(header->fmag, Archive::MEMBER_MAGIC, sizeof(header->fmag))); 327 328 if (0 == memcmp(header->name, Archive::STRTAB_NAME, sizeof(header->name))) { 329 // read the extended name table 330 int strtab_size = atoi(header->size); 331 llvm::StringRef strtab_region = 332 pArchive.getARFile().memArea()->request( 333 (pArchive.getARFile().fileOffset() + 334 offset + sizeof(Archive::MemberHeader)), 335 strtab_size); 336 const char* strtab = strtab_region.begin(); 337 pArchive.getStrTable().assign(strtab, strtab_size); 338 } 339 return true; 340 } 341 342 /// shouldIncludeStatus - given a sym name from armap and check if including 343 /// the corresponding archive member, and then return the decision 344 enum Archive::Symbol::Status 345 GNUArchiveReader::shouldIncludeSymbol(const llvm::StringRef& pSymName) const 346 { 347 // TODO: handle symbol version issue and user defined symbols 348 const ResolveInfo* info = m_Module.getNamePool().findInfo(pSymName); 349 if (NULL != info) { 350 if (!info->isUndef()) 351 return Archive::Symbol::Exclude; 352 if (info->isWeak()) 353 return Archive::Symbol::Unknown; 354 return Archive::Symbol::Include; 355 } 356 return Archive::Symbol::Unknown; 357 } 358 359 /// includeMember - include the object member in the given file offset, and 360 /// return the size of the object 361 /// @param pConfig - LinkerConfig 362 /// @param pArchiveRoot - the archive root 363 /// @param pFileOffset - file offset of the member header in the archive 364 size_t GNUArchiveReader::includeMember(const LinkerConfig& pConfig, 365 Archive& pArchive, 366 uint32_t pFileOffset) 367 { 368 Input* cur_archive = &(pArchive.getARFile()); 369 Input* member = NULL; 370 uint32_t file_offset = pFileOffset; 371 size_t size = 0; 372 do { 373 uint32_t nested_offset = 0; 374 // use the file offset in current archive to find out the member we 375 // want to include 376 member = readMemberHeader(pArchive, 377 *cur_archive, 378 file_offset, 379 nested_offset, 380 size); 381 assert(member != NULL); 382 // bypass if we get an archive that is already in the map 383 if (Input::Archive == member->type()) { 384 cur_archive = member; 385 file_offset = nested_offset; 386 continue; 387 } 388 389 // insert a node into the subtree of current archive. 390 Archive::ArchiveMember* parent = 391 pArchive.getArchiveMember(cur_archive->name()); 392 393 assert(NULL != parent); 394 pArchive.inputs().insert(parent->lastPos, *(parent->move), *member); 395 396 // move the iterator to new created node, and also adjust the 397 // direction to Afterward for next insertion in this subtree 398 parent->move->move(parent->lastPos); 399 parent->move = &InputTree::Afterward; 400 bool doContinue = false; 401 402 if (m_ELFObjectReader.isMyFormat(*member, doContinue)) { 403 member->setType(Input::Object); 404 // Set this object as no export if the archive is in the exclude libs. 405 if (pArchive.getARFile().noExport()) { 406 member->setNoExport(); 407 } 408 pArchive.addObjectMember(pFileOffset, parent->lastPos); 409 m_ELFObjectReader.readHeader(*member); 410 m_ELFObjectReader.readSections(*member); 411 m_ELFObjectReader.readSymbols(*member); 412 m_Module.getObjectList().push_back(member); 413 } 414 else if (doContinue && isMyFormat(*member, doContinue)) { 415 member->setType(Input::Archive); 416 // when adding a new archive node, set the iterator to archive 417 // itself, and set the direction to Downward 418 pArchive.addArchiveMember(member->name(), 419 parent->lastPos, 420 &InputTree::Downward); 421 cur_archive = member; 422 file_offset = nested_offset; 423 } 424 else { 425 warning(diag::warn_unrecognized_input_file) << member->path() 426 << pConfig.targets().triple().str(); 427 } 428 } while (Input::Object != member->type()); 429 return size; 430 } 431 432 /// includeAllMembers - include all object members. This is called if 433 /// --whole-archive is the attribute for this archive file. 434 bool GNUArchiveReader::includeAllMembers(const LinkerConfig& pConfig, 435 Archive& pArchive) 436 { 437 // read the symtab of the archive 438 readSymbolTable(pArchive); 439 440 // read the strtab of the archive 441 readStringTable(pArchive); 442 443 // add root archive to ArchiveMemberMap 444 pArchive.addArchiveMember(pArchive.getARFile().name(), 445 pArchive.inputs().root(), 446 &InputTree::Downward); 447 448 bool isThinAR = isThinArchive(pArchive.getARFile()); 449 uint32_t begin_offset = pArchive.getARFile().fileOffset() + 450 Archive::MAGIC_LEN + 451 sizeof(Archive::MemberHeader) + 452 pArchive.getSymTabSize(); 453 if (pArchive.hasStrTable()) { 454 if (0x0 != (begin_offset & 1)) 455 ++begin_offset; 456 begin_offset += sizeof(Archive::MemberHeader) + 457 pArchive.getStrTable().size(); 458 } 459 uint32_t end_offset = pArchive.getARFile().memArea()->size(); 460 for (uint32_t offset = begin_offset; 461 offset < end_offset; 462 offset += sizeof(Archive::MemberHeader)) { 463 464 size_t size = includeMember(pConfig, pArchive, offset); 465 466 if (!isThinAR) { 467 offset += size; 468 } 469 470 if (0x0 != (offset & 1)) 471 ++offset; 472 } 473 return true; 474 } 475