Home | History | Annotate | Download | only in LD
      1 //===- GNUArchiveReader.cpp -----------------------------------------------===//
      2 //
      3 //                     The MCLinker Project
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 #include "mcld/LD/GNUArchiveReader.h"
     10 
     11 #include "mcld/InputTree.h"
     12 #include "mcld/LinkerConfig.h"
     13 #include "mcld/Module.h"
     14 #include "mcld/ADT/SizeTraits.h"
     15 #include "mcld/MC/Attribute.h"
     16 #include "mcld/MC/Input.h"
     17 #include "mcld/LD/ELFObjectReader.h"
     18 #include "mcld/LD/ResolveInfo.h"
     19 #include "mcld/Support/FileHandle.h"
     20 #include "mcld/Support/FileSystem.h"
     21 #include "mcld/Support/MemoryArea.h"
     22 #include "mcld/Support/MsgHandling.h"
     23 #include "mcld/Support/Path.h"
     24 
     25 #include <llvm/ADT/StringRef.h>
     26 #include <llvm/Support/Host.h>
     27 
     28 #include <cstdlib>
     29 #include <cstring>
     30 
     31 namespace mcld {
     32 
     33 GNUArchiveReader::GNUArchiveReader(Module& pModule,
     34                                    ELFObjectReader& pELFObjectReader)
     35     : m_Module(pModule), m_ELFObjectReader(pELFObjectReader) {
     36 }
     37 
     38 GNUArchiveReader::~GNUArchiveReader() {
     39 }
     40 
     41 /// isMyFormat
     42 bool GNUArchiveReader::isMyFormat(Input& pInput, bool& pContinue) const {
     43   assert(pInput.hasMemArea());
     44   if (pInput.memArea()->size() < Archive::MAGIC_LEN)
     45     return false;
     46 
     47   llvm::StringRef region =
     48       pInput.memArea()->request(pInput.fileOffset(), Archive::MAGIC_LEN);
     49   const char* str = region.begin();
     50 
     51   bool result = false;
     52   assert(str != NULL);
     53   pContinue = true;
     54   if (isArchive(str) || isThinArchive(str))
     55     result = true;
     56 
     57   return result;
     58 }
     59 
     60 /// isArchive
     61 bool GNUArchiveReader::isArchive(const char* pStr) const {
     62   return (memcmp(pStr, Archive::MAGIC, Archive::MAGIC_LEN) == 0);
     63 }
     64 
     65 /// isThinArchive
     66 bool GNUArchiveReader::isThinArchive(const char* pStr) const {
     67   return (memcmp(pStr, Archive::THIN_MAGIC, Archive::MAGIC_LEN) == 0);
     68 }
     69 
     70 /// isThinArchive
     71 bool GNUArchiveReader::isThinArchive(Input& pInput) const {
     72   assert(pInput.hasMemArea());
     73   llvm::StringRef region =
     74       pInput.memArea()->request(pInput.fileOffset(), Archive::MAGIC_LEN);
     75   const char* str = region.begin();
     76 
     77   bool result = false;
     78   assert(str != NULL);
     79   if (isThinArchive(str))
     80     result = true;
     81 
     82   return result;
     83 }
     84 
     85 bool GNUArchiveReader::readArchive(const LinkerConfig& pConfig,
     86                                    Archive& pArchive) {
     87   // bypass the empty archive
     88   if (Archive::MAGIC_LEN == pArchive.getARFile().memArea()->size())
     89     return true;
     90 
     91   if (pArchive.getARFile().attribute()->isWholeArchive())
     92     return includeAllMembers(pConfig, pArchive);
     93 
     94   // if this is the first time read this archive, setup symtab and strtab
     95   if (pArchive.getSymbolTable().empty()) {
     96     // read the symtab of the archive
     97     readSymbolTable(pArchive);
     98 
     99     // read the strtab of the archive
    100     readStringTable(pArchive);
    101 
    102     // add root archive to ArchiveMemberMap
    103     pArchive.addArchiveMember(pArchive.getARFile().name(),
    104                               pArchive.inputs().root(),
    105                               &InputTree::Downward);
    106   }
    107 
    108   // include the needed members in the archive and build up the input tree
    109   bool willSymResolved;
    110   do {
    111     willSymResolved = false;
    112     for (size_t idx = 0; idx < pArchive.numOfSymbols(); ++idx) {
    113       // bypass if we already decided to include this symbol or not
    114       if (Archive::Symbol::Unknown != pArchive.getSymbolStatus(idx))
    115         continue;
    116 
    117       // bypass if another symbol with the same object file offset is included
    118       if (pArchive.hasObjectMember(pArchive.getObjFileOffset(idx))) {
    119         pArchive.setSymbolStatus(idx, Archive::Symbol::Include);
    120         continue;
    121       }
    122 
    123       // check if we should include this defined symbol
    124       Archive::Symbol::Status status =
    125           shouldIncludeSymbol(pArchive.getSymbolName(idx));
    126       if (Archive::Symbol::Unknown != status)
    127         pArchive.setSymbolStatus(idx, status);
    128 
    129       if (Archive::Symbol::Include == status) {
    130         // include the object member from the given offset
    131         includeMember(pConfig, pArchive, pArchive.getObjFileOffset(idx));
    132         willSymResolved = true;
    133       }  // end of if
    134     }    // end of for
    135   } while (willSymResolved);
    136 
    137   return true;
    138 }
    139 
    140 /// readMemberHeader - read the header of a member in a archive file and then
    141 /// return the corresponding archive member (it may be an input object or
    142 /// another archive)
    143 /// @param pArchiveRoot  - the archive root that holds the strtab (extended
    144 ///                        name table)
    145 /// @param pArchiveFile  - the archive that contains the needed object
    146 /// @param pFileOffset   - file offset of the member header in the archive
    147 /// @param pNestedOffset - used when we find a nested archive
    148 /// @param pMemberSize   - the file size of this member
    149 Input* GNUArchiveReader::readMemberHeader(Archive& pArchiveRoot,
    150                                           Input& pArchiveFile,
    151                                           uint32_t pFileOffset,
    152                                           uint32_t& pNestedOffset,
    153                                           size_t& pMemberSize) {
    154   assert(pArchiveFile.hasMemArea());
    155 
    156   llvm::StringRef header_region = pArchiveFile.memArea()->request(
    157       (pArchiveFile.fileOffset() + pFileOffset), sizeof(Archive::MemberHeader));
    158   const Archive::MemberHeader* header =
    159       reinterpret_cast<const Archive::MemberHeader*>(header_region.begin());
    160 
    161   assert(memcmp(header->fmag, Archive::MEMBER_MAGIC, sizeof(header->fmag)) ==
    162          0);
    163 
    164   pMemberSize = atoi(header->size);
    165 
    166   // parse the member name and nested offset if any
    167   std::string member_name;
    168   llvm::StringRef name_field(header->name, sizeof(header->name));
    169   if (header->name[0] != '/') {
    170     // this is an object file in an archive
    171     size_t pos = name_field.find_first_of('/');
    172     member_name.assign(name_field.substr(0, pos).str());
    173   } else {
    174     // this is an object/archive file in a thin archive
    175     size_t begin = 1;
    176     size_t end = name_field.find_first_of(" :");
    177     uint32_t name_offset = 0;
    178     // parse the name offset
    179     name_field.substr(begin, end - begin).getAsInteger(10, name_offset);
    180 
    181     if (name_field[end] == ':') {
    182       // there is a nested offset
    183       begin = end + 1;
    184       end = name_field.find_first_of(' ', begin);
    185       name_field.substr(begin, end - begin).getAsInteger(10, pNestedOffset);
    186     }
    187 
    188     // get the member name from the extended name table
    189     assert(pArchiveRoot.hasStrTable());
    190     begin = name_offset;
    191     end = pArchiveRoot.getStrTable().find_first_of('\n', begin);
    192     member_name.assign(
    193         pArchiveRoot.getStrTable().substr(begin, end - begin - 1));
    194   }
    195 
    196   Input* member = NULL;
    197   bool isThinAR = isThinArchive(pArchiveFile);
    198   if (!isThinAR) {
    199     // this is an object file in an archive
    200     member = pArchiveRoot.getMemberFile(
    201         pArchiveFile,
    202         isThinAR,
    203         member_name,
    204         pArchiveFile.path(),
    205         (pFileOffset + sizeof(Archive::MemberHeader)));
    206   } else {
    207     // this is a member in a thin archive
    208     // try to find if this is a archive already in the map first
    209     Archive::ArchiveMember* ar_member =
    210         pArchiveRoot.getArchiveMember(member_name);
    211     if (ar_member != NULL) {
    212       return ar_member->file;
    213     }
    214 
    215     // get nested file path, the nested file's member name is the relative
    216     // path to the archive containing it.
    217     sys::fs::Path input_path(pArchiveFile.path().parent_path());
    218     if (!input_path.empty())
    219       input_path.append(sys::fs::Path(member_name));
    220     else
    221       input_path.assign(member_name);
    222 
    223     member = pArchiveRoot.getMemberFile(
    224         pArchiveFile, isThinAR, member_name, input_path);
    225   }
    226 
    227   return member;
    228 }
    229 
    230 template <size_t SIZE>
    231 static void readSymbolTableEntries(Archive& pArchive,
    232                                    llvm::StringRef pMemRegion) {
    233   typedef typename SizeTraits<SIZE>::Offset Offset;
    234 
    235   const Offset* data = reinterpret_cast<const Offset*>(pMemRegion.begin());
    236 
    237   // read the number of symbols
    238   Offset number = 0;
    239   if (llvm::sys::IsLittleEndianHost)
    240     number = mcld::bswap<SIZE>(*data);
    241   else
    242     number = *data;
    243 
    244   // set up the pointers for file offset and name offset
    245   ++data;
    246   const char* name = reinterpret_cast<const char*>(data + number);
    247 
    248   // add the archive symbols
    249   for (Offset i = 0; i < number; ++i) {
    250     if (llvm::sys::IsLittleEndianHost)
    251       pArchive.addSymbol(name, mcld::bswap<SIZE>(*data));
    252     else
    253       pArchive.addSymbol(name, *data);
    254     name += strlen(name) + 1;
    255     ++data;
    256   }
    257 }
    258 
    259 /// readSymbolTable - read the archive symbol map (armap)
    260 bool GNUArchiveReader::readSymbolTable(Archive& pArchive) {
    261   assert(pArchive.getARFile().hasMemArea());
    262   MemoryArea* memory_area = pArchive.getARFile().memArea();
    263 
    264   llvm::StringRef header_region = memory_area->request(
    265       (pArchive.getARFile().fileOffset() + Archive::MAGIC_LEN),
    266       sizeof(Archive::MemberHeader));
    267   const Archive::MemberHeader* header =
    268       reinterpret_cast<const Archive::MemberHeader*>(header_region.begin());
    269   assert(memcmp(header->fmag, Archive::MEMBER_MAGIC, sizeof(header->fmag)) ==
    270          0);
    271 
    272   int symtab_size = atoi(header->size);
    273   pArchive.setSymTabSize(symtab_size);
    274 
    275   if (!pArchive.getARFile().attribute()->isWholeArchive()) {
    276     llvm::StringRef symtab_region = memory_area->request(
    277         (pArchive.getARFile().fileOffset() + Archive::MAGIC_LEN +
    278          sizeof(Archive::MemberHeader)),
    279         symtab_size);
    280 
    281     if (strncmp(header->name,
    282                 Archive::SVR4_SYMTAB_NAME,
    283                 strlen(Archive::SVR4_SYMTAB_NAME)) == 0)
    284       readSymbolTableEntries<32>(pArchive, symtab_region);
    285     else if (strncmp(header->name,
    286                      Archive::IRIX6_SYMTAB_NAME,
    287                      strlen(Archive::IRIX6_SYMTAB_NAME)) == 0)
    288       readSymbolTableEntries<64>(pArchive, symtab_region);
    289     else
    290       unreachable(diag::err_unsupported_archive);
    291   }
    292   return true;
    293 }
    294 
    295 /// readStringTable - read the strtab for long file name of the archive
    296 bool GNUArchiveReader::readStringTable(Archive& pArchive) {
    297   size_t offset = Archive::MAGIC_LEN + sizeof(Archive::MemberHeader) +
    298                   pArchive.getSymTabSize();
    299 
    300   if ((offset & 1) != 0x0)
    301     ++offset;
    302 
    303   assert(pArchive.getARFile().hasMemArea());
    304   MemoryArea* memory_area = pArchive.getARFile().memArea();
    305 
    306   llvm::StringRef header_region =
    307       memory_area->request((pArchive.getARFile().fileOffset() + offset),
    308                            sizeof(Archive::MemberHeader));
    309   const Archive::MemberHeader* header =
    310       reinterpret_cast<const Archive::MemberHeader*>(header_region.begin());
    311 
    312   assert(memcmp(header->fmag, Archive::MEMBER_MAGIC, sizeof(header->fmag)) ==
    313          0);
    314 
    315   if (memcmp(header->name, Archive::STRTAB_NAME, sizeof(header->name)) == 0) {
    316     // read the extended name table
    317     int strtab_size = atoi(header->size);
    318     llvm::StringRef strtab_region =
    319         memory_area->request((pArchive.getARFile().fileOffset() + offset +
    320                               sizeof(Archive::MemberHeader)),
    321                              strtab_size);
    322     const char* strtab = strtab_region.begin();
    323     pArchive.getStrTable().assign(strtab, strtab_size);
    324   }
    325   return true;
    326 }
    327 
    328 /// shouldIncludeStatus - given a sym name from armap and check if including
    329 /// the corresponding archive member, and then return the decision
    330 enum Archive::Symbol::Status GNUArchiveReader::shouldIncludeSymbol(
    331     const llvm::StringRef& pSymName) const {
    332   // TODO: handle symbol version issue and user defined symbols
    333   const ResolveInfo* info = m_Module.getNamePool().findInfo(pSymName);
    334   if (info != NULL) {
    335     if (!info->isUndef())
    336       return Archive::Symbol::Exclude;
    337     if (info->isWeak())
    338       return Archive::Symbol::Unknown;
    339     return Archive::Symbol::Include;
    340   }
    341   return Archive::Symbol::Unknown;
    342 }
    343 
    344 /// includeMember - include the object member in the given file offset, and
    345 /// return the size of the object
    346 /// @param pConfig - LinkerConfig
    347 /// @param pArchiveRoot - the archive root
    348 /// @param pFileOffset  - file offset of the member header in the archive
    349 size_t GNUArchiveReader::includeMember(const LinkerConfig& pConfig,
    350                                        Archive& pArchive,
    351                                        uint32_t pFileOffset) {
    352   Input* cur_archive = &(pArchive.getARFile());
    353   Input* member = NULL;
    354   uint32_t file_offset = pFileOffset;
    355   size_t size = 0;
    356   do {
    357     uint32_t nested_offset = 0;
    358     // use the file offset in current archive to find out the member we
    359     // want to include
    360     member = readMemberHeader(
    361         pArchive, *cur_archive, file_offset, nested_offset, size);
    362     assert(member != NULL);
    363     // bypass if we get an archive that is already in the map
    364     if (Input::Archive == member->type()) {
    365       cur_archive = member;
    366       file_offset = nested_offset;
    367       continue;
    368     }
    369 
    370     // insert a node into the subtree of current archive.
    371     Archive::ArchiveMember* parent =
    372         pArchive.getArchiveMember(cur_archive->name());
    373 
    374     assert(parent != NULL);
    375     pArchive.inputs().insert(parent->lastPos, *(parent->move), *member);
    376 
    377     // move the iterator to new created node, and also adjust the
    378     // direction to Afterward for next insertion in this subtree
    379     parent->move->move(parent->lastPos);
    380     parent->move = &InputTree::Afterward;
    381     bool doContinue = false;
    382 
    383     if (m_ELFObjectReader.isMyFormat(*member, doContinue)) {
    384       member->setType(Input::Object);
    385       // Set this object as no export if the archive is in the exclude libs.
    386       if (pArchive.getARFile().noExport()) {
    387         member->setNoExport();
    388       }
    389       pArchive.addObjectMember(pFileOffset, parent->lastPos);
    390       m_ELFObjectReader.readHeader(*member);
    391       m_ELFObjectReader.readSections(*member);
    392       m_ELFObjectReader.readSymbols(*member);
    393       m_Module.getObjectList().push_back(member);
    394     } else if (doContinue && isMyFormat(*member, doContinue)) {
    395       member->setType(Input::Archive);
    396       // when adding a new archive node, set the iterator to archive
    397       // itself, and set the direction to Downward
    398       pArchive.addArchiveMember(
    399           member->name(), parent->lastPos, &InputTree::Downward);
    400       cur_archive = member;
    401       file_offset = nested_offset;
    402     } else {
    403       warning(diag::warn_unrecognized_input_file)
    404           << member->path() << pConfig.targets().triple().str();
    405     }
    406   } while (Input::Object != member->type());
    407   return size;
    408 }
    409 
    410 /// includeAllMembers - include all object members. This is called if
    411 /// --whole-archive is the attribute for this archive file.
    412 bool GNUArchiveReader::includeAllMembers(const LinkerConfig& pConfig,
    413                                          Archive& pArchive) {
    414   // read the symtab of the archive
    415   readSymbolTable(pArchive);
    416 
    417   // read the strtab of the archive
    418   readStringTable(pArchive);
    419 
    420   // add root archive to ArchiveMemberMap
    421   pArchive.addArchiveMember(pArchive.getARFile().name(),
    422                             pArchive.inputs().root(),
    423                             &InputTree::Downward);
    424 
    425   bool isThinAR = isThinArchive(pArchive.getARFile());
    426   uint32_t begin_offset = pArchive.getARFile().fileOffset() +
    427                           Archive::MAGIC_LEN + sizeof(Archive::MemberHeader) +
    428                           pArchive.getSymTabSize();
    429   if (pArchive.hasStrTable()) {
    430     if ((begin_offset & 1) != 0x0)
    431       ++begin_offset;
    432     begin_offset +=
    433         sizeof(Archive::MemberHeader) + pArchive.getStrTable().size();
    434   }
    435   uint32_t end_offset = pArchive.getARFile().memArea()->size();
    436   for (uint32_t offset = begin_offset; offset < end_offset;
    437        offset += sizeof(Archive::MemberHeader)) {
    438     size_t size = includeMember(pConfig, pArchive, offset);
    439 
    440     if (!isThinAR) {
    441       offset += size;
    442     }
    443 
    444     if ((offset & 1) != 0x0)
    445       ++offset;
    446   }
    447   return true;
    448 }
    449 
    450 }  // namespace mcld
    451