Home | History | Annotate | Download | only in LD
      1 //===- GNUArchiveReader.cpp -----------------------------------------------===//
      2 //
      3 //                     The MCLinker Project
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 #include <mcld/LD/GNUArchiveReader.h>
     10 
     11 #include <mcld/Module.h>
     12 #include <mcld/InputTree.h>
     13 #include <mcld/LinkerConfig.h>
     14 #include <mcld/MC/Attribute.h>
     15 #include <mcld/MC/Input.h>
     16 #include <mcld/LD/ResolveInfo.h>
     17 #include <mcld/LD/ELFObjectReader.h>
     18 #include <mcld/Support/FileSystem.h>
     19 #include <mcld/Support/FileHandle.h>
     20 #include <mcld/Support/MemoryArea.h>
     21 #include <mcld/Support/MsgHandling.h>
     22 #include <mcld/Support/Path.h>
     23 #include <mcld/ADT/SizeTraits.h>
     24 
     25 #include <llvm/ADT/StringRef.h>
     26 #include <llvm/Support/Host.h>
     27 
     28 #include <cstring>
     29 #include <cstdlib>
     30 
     31 using namespace mcld;
     32 
     33 GNUArchiveReader::GNUArchiveReader(Module& pModule,
     34                                    ELFObjectReader& pELFObjectReader)
     35  : m_Module(pModule),
     36    m_ELFObjectReader(pELFObjectReader)
     37 {
     38 }
     39 
     40 GNUArchiveReader::~GNUArchiveReader()
     41 {
     42 }
     43 
     44 /// isMyFormat
     45 bool GNUArchiveReader::isMyFormat(Input& pInput, bool &pContinue) const
     46 {
     47   assert(pInput.hasMemArea());
     48   if (pInput.memArea()->size() < Archive::MAGIC_LEN)
     49     return false;
     50 
     51   llvm::StringRef region =
     52       pInput.memArea()->request(pInput.fileOffset(), Archive::MAGIC_LEN);
     53   const char* str = region.begin();
     54 
     55   bool result = false;
     56   assert(NULL != str);
     57   pContinue = true;
     58   if (isArchive(str) || isThinArchive(str))
     59     result = true;
     60 
     61   return result;
     62 }
     63 
     64 /// isArchive
     65 bool GNUArchiveReader::isArchive(const char* pStr) const
     66 {
     67   return (0 == memcmp(pStr, Archive::MAGIC, Archive::MAGIC_LEN));
     68 }
     69 
     70 /// isThinArchive
     71 bool GNUArchiveReader::isThinArchive(const char* pStr) const
     72 {
     73   return (0 == memcmp(pStr, Archive::THIN_MAGIC, Archive::MAGIC_LEN));
     74 }
     75 
     76 /// isThinArchive
     77 bool GNUArchiveReader::isThinArchive(Input& pInput) const
     78 {
     79   assert(pInput.hasMemArea());
     80   llvm::StringRef region =
     81       pInput.memArea()->request(pInput.fileOffset(), Archive::MAGIC_LEN);
     82   const char* str = region.begin();
     83 
     84   bool result = false;
     85   assert(NULL != str);
     86   if (isThinArchive(str))
     87     result = true;
     88 
     89   return result;
     90 }
     91 
     92 bool GNUArchiveReader::readArchive(const LinkerConfig& pConfig,
     93                                    Archive& pArchive)
     94 {
     95   // bypass the empty archive
     96   if (Archive::MAGIC_LEN == pArchive.getARFile().memArea()->size())
     97     return true;
     98 
     99   if (pArchive.getARFile().attribute()->isWholeArchive())
    100     return includeAllMembers(pConfig, pArchive);
    101 
    102   // if this is the first time read this archive, setup symtab and strtab
    103   if (pArchive.getSymbolTable().empty()) {
    104   // read the symtab of the archive
    105   readSymbolTable(pArchive);
    106 
    107   // read the strtab of the archive
    108   readStringTable(pArchive);
    109 
    110   // add root archive to ArchiveMemberMap
    111   pArchive.addArchiveMember(pArchive.getARFile().name(),
    112                             pArchive.inputs().root(),
    113                             &InputTree::Downward);
    114   }
    115 
    116   // include the needed members in the archive and build up the input tree
    117   bool willSymResolved;
    118   do {
    119     willSymResolved = false;
    120     for (size_t idx = 0; idx < pArchive.numOfSymbols(); ++idx) {
    121       // bypass if we already decided to include this symbol or not
    122       if (Archive::Symbol::Unknown != pArchive.getSymbolStatus(idx))
    123         continue;
    124 
    125       // bypass if another symbol with the same object file offset is included
    126       if (pArchive.hasObjectMember(pArchive.getObjFileOffset(idx))) {
    127         pArchive.setSymbolStatus(idx, Archive::Symbol::Include);
    128         continue;
    129       }
    130 
    131       // check if we should include this defined symbol
    132       Archive::Symbol::Status status =
    133         shouldIncludeSymbol(pArchive.getSymbolName(idx));
    134       if (Archive::Symbol::Unknown != status)
    135         pArchive.setSymbolStatus(idx, status);
    136 
    137       if (Archive::Symbol::Include == status) {
    138         // include the object member from the given offset
    139         includeMember(pConfig, pArchive, pArchive.getObjFileOffset(idx));
    140         willSymResolved = true;
    141       } // end of if
    142     } // end of for
    143   } while (willSymResolved);
    144 
    145   return true;
    146 }
    147 
    148 /// readMemberHeader - read the header of a member in a archive file and then
    149 /// return the corresponding archive member (it may be an input object or
    150 /// another archive)
    151 /// @param pArchiveRoot  - the archive root that holds the strtab (extended
    152 ///                        name table)
    153 /// @param pArchiveFile  - the archive that contains the needed object
    154 /// @param pFileOffset   - file offset of the member header in the archive
    155 /// @param pNestedOffset - used when we find a nested archive
    156 /// @param pMemberSize   - the file size of this member
    157 Input* GNUArchiveReader::readMemberHeader(Archive& pArchiveRoot,
    158                                           Input& pArchiveFile,
    159                                           uint32_t pFileOffset,
    160                                           uint32_t& pNestedOffset,
    161                                           size_t& pMemberSize)
    162 {
    163   assert(pArchiveFile.hasMemArea());
    164 
    165   llvm::StringRef header_region =
    166     pArchiveFile.memArea()->request((pArchiveFile.fileOffset() + pFileOffset),
    167                                     sizeof(Archive::MemberHeader));
    168   const Archive::MemberHeader* header =
    169     reinterpret_cast<const Archive::MemberHeader*>(header_region.begin());
    170 
    171   assert(0 == memcmp(header->fmag, Archive::MEMBER_MAGIC, sizeof(header->fmag)));
    172 
    173   pMemberSize = atoi(header->size);
    174 
    175   // parse the member name and nested offset if any
    176   std::string member_name;
    177   llvm::StringRef name_field(header->name, sizeof(header->name));
    178   if ('/' != header->name[0]) {
    179     // this is an object file in an archive
    180     size_t pos = name_field.find_first_of('/');
    181     member_name.assign(name_field.substr(0, pos).str());
    182   }
    183   else {
    184     // this is an object/archive file in a thin archive
    185     size_t begin = 1;
    186     size_t end = name_field.find_first_of(" :");
    187     uint32_t name_offset = 0;
    188     // parse the name offset
    189     name_field.substr(begin, end - begin).getAsInteger(10, name_offset);
    190 
    191     if (':' == name_field[end]) {
    192       // there is a nested offset
    193       begin = end + 1;
    194       end = name_field.find_first_of(' ', begin);
    195       name_field.substr(begin, end - begin).getAsInteger(10, pNestedOffset);
    196     }
    197 
    198     // get the member name from the extended name table
    199     assert(pArchiveRoot.hasStrTable());
    200     begin = name_offset;
    201     end = pArchiveRoot.getStrTable().find_first_of('\n', begin);
    202     member_name.assign(pArchiveRoot.getStrTable().substr(begin, end - begin -1));
    203   }
    204 
    205   Input* member = NULL;
    206   bool isThinAR = isThinArchive(pArchiveFile);
    207   if (!isThinAR) {
    208     // this is an object file in an archive
    209     member = pArchiveRoot.getMemberFile(pArchiveFile,
    210                                         isThinAR,
    211                                         member_name,
    212                                         pArchiveFile.path(),
    213                                         (pFileOffset +
    214                                          sizeof(Archive::MemberHeader)));
    215   }
    216   else {
    217     // this is a member in a thin archive
    218     // try to find if this is a archive already in the map first
    219     Archive::ArchiveMember* ar_member =
    220       pArchiveRoot.getArchiveMember(member_name);
    221     if (NULL != ar_member) {
    222       return ar_member->file;
    223     }
    224 
    225     // get nested file path, the nested file's member name is the relative
    226     // path to the archive containing it.
    227     sys::fs::Path input_path(pArchiveFile.path().parent_path());
    228     if (!input_path.empty())
    229       input_path.append(member_name);
    230     else
    231       input_path.assign(member_name);
    232 
    233     member = pArchiveRoot.getMemberFile(pArchiveFile,
    234                                         isThinAR,
    235                                         member_name,
    236                                         input_path);
    237   }
    238 
    239   return member;
    240 }
    241 
    242 template <size_t SIZE>
    243 static void readSymbolTableEntries(Archive& pArchive, llvm::StringRef pMemRegion)
    244 {
    245   typedef typename SizeTraits<SIZE>::Offset Offset;
    246 
    247   const Offset* data = reinterpret_cast<const Offset*>(pMemRegion.begin());
    248 
    249   // read the number of symbols
    250   Offset number = 0;
    251   if (llvm::sys::IsLittleEndianHost)
    252     number = mcld::bswap<SIZE>(*data);
    253   else
    254     number = *data;
    255 
    256   // set up the pointers for file offset and name offset
    257   ++data;
    258   const char* name = reinterpret_cast<const char*>(data + number);
    259 
    260   // add the archive symbols
    261   for (Offset i = 0; i < number; ++i) {
    262     if (llvm::sys::IsLittleEndianHost)
    263       pArchive.addSymbol(name, mcld::bswap<SIZE>(*data));
    264     else
    265       pArchive.addSymbol(name, *data);
    266     name += strlen(name) + 1;
    267     ++data;
    268   }
    269 }
    270 
    271 /// readSymbolTable - read the archive symbol map (armap)
    272 bool GNUArchiveReader::readSymbolTable(Archive& pArchive)
    273 {
    274   assert(pArchive.getARFile().hasMemArea());
    275 
    276   llvm::StringRef header_region =
    277     pArchive.getARFile().memArea()->request((pArchive.getARFile().fileOffset() +
    278                                              Archive::MAGIC_LEN),
    279                                             sizeof(Archive::MemberHeader));
    280   const Archive::MemberHeader* header =
    281     reinterpret_cast<const Archive::MemberHeader*>(header_region.begin());
    282   assert(0 == memcmp(header->fmag, Archive::MEMBER_MAGIC, sizeof(header->fmag)));
    283 
    284   int symtab_size = atoi(header->size);
    285   pArchive.setSymTabSize(symtab_size);
    286 
    287   if (!pArchive.getARFile().attribute()->isWholeArchive()) {
    288     llvm::StringRef symtab_region = pArchive.getARFile().memArea()->request(
    289         (pArchive.getARFile().fileOffset() +
    290          Archive::MAGIC_LEN +
    291          sizeof(Archive::MemberHeader)),
    292         symtab_size);
    293 
    294     if (0 == strncmp(header->name, Archive::SVR4_SYMTAB_NAME,
    295                                    strlen(Archive::SVR4_SYMTAB_NAME)))
    296       readSymbolTableEntries<32>(pArchive, symtab_region);
    297     else if (0 == strncmp(header->name, Archive::IRIX6_SYMTAB_NAME,
    298                                         strlen(Archive::IRIX6_SYMTAB_NAME)))
    299       readSymbolTableEntries<64>(pArchive, symtab_region);
    300     else
    301       unreachable(diag::err_unsupported_archive);
    302 
    303   }
    304   return true;
    305 }
    306 
    307 /// readStringTable - read the strtab for long file name of the archive
    308 bool GNUArchiveReader::readStringTable(Archive& pArchive)
    309 {
    310   size_t offset = Archive::MAGIC_LEN +
    311                   sizeof(Archive::MemberHeader) +
    312                   pArchive.getSymTabSize();
    313 
    314   if (0x0 != (offset & 1))
    315     ++offset;
    316 
    317   assert(pArchive.getARFile().hasMemArea());
    318 
    319   llvm::StringRef header_region =
    320     pArchive.getARFile().memArea()->request((pArchive.getARFile().fileOffset() +
    321                                              offset),
    322                                             sizeof(Archive::MemberHeader));
    323   const Archive::MemberHeader* header =
    324     reinterpret_cast<const Archive::MemberHeader*>(header_region.begin());
    325 
    326   assert(0 == memcmp(header->fmag, Archive::MEMBER_MAGIC, sizeof(header->fmag)));
    327 
    328   if (0 == memcmp(header->name, Archive::STRTAB_NAME, sizeof(header->name))) {
    329     // read the extended name table
    330     int strtab_size = atoi(header->size);
    331     llvm::StringRef strtab_region =
    332       pArchive.getARFile().memArea()->request(
    333                                    (pArchive.getARFile().fileOffset() +
    334                                     offset + sizeof(Archive::MemberHeader)),
    335                                    strtab_size);
    336     const char* strtab = strtab_region.begin();
    337     pArchive.getStrTable().assign(strtab, strtab_size);
    338   }
    339   return true;
    340 }
    341 
    342 /// shouldIncludeStatus - given a sym name from armap and check if including
    343 /// the corresponding archive member, and then return the decision
    344 enum Archive::Symbol::Status
    345 GNUArchiveReader::shouldIncludeSymbol(const llvm::StringRef& pSymName) const
    346 {
    347   // TODO: handle symbol version issue and user defined symbols
    348   const ResolveInfo* info = m_Module.getNamePool().findInfo(pSymName);
    349   if (NULL != info) {
    350     if (!info->isUndef())
    351       return Archive::Symbol::Exclude;
    352     if (info->isWeak())
    353       return Archive::Symbol::Unknown;
    354     return Archive::Symbol::Include;
    355   }
    356   return Archive::Symbol::Unknown;
    357 }
    358 
    359 /// includeMember - include the object member in the given file offset, and
    360 /// return the size of the object
    361 /// @param pConfig - LinkerConfig
    362 /// @param pArchiveRoot - the archive root
    363 /// @param pFileOffset  - file offset of the member header in the archive
    364 size_t GNUArchiveReader::includeMember(const LinkerConfig& pConfig,
    365                                        Archive& pArchive,
    366                                        uint32_t pFileOffset)
    367 {
    368   Input* cur_archive = &(pArchive.getARFile());
    369   Input* member = NULL;
    370   uint32_t file_offset = pFileOffset;
    371   size_t size = 0;
    372   do {
    373     uint32_t nested_offset = 0;
    374     // use the file offset in current archive to find out the member we
    375     // want to include
    376     member = readMemberHeader(pArchive,
    377                               *cur_archive,
    378                               file_offset,
    379                               nested_offset,
    380                               size);
    381     assert(member != NULL);
    382     // bypass if we get an archive that is already in the map
    383     if (Input::Archive == member->type()) {
    384         cur_archive = member;
    385         file_offset = nested_offset;
    386         continue;
    387     }
    388 
    389     // insert a node into the subtree of current archive.
    390     Archive::ArchiveMember* parent =
    391       pArchive.getArchiveMember(cur_archive->name());
    392 
    393     assert(NULL != parent);
    394     pArchive.inputs().insert(parent->lastPos, *(parent->move), *member);
    395 
    396     // move the iterator to new created node, and also adjust the
    397     // direction to Afterward for next insertion in this subtree
    398     parent->move->move(parent->lastPos);
    399     parent->move = &InputTree::Afterward;
    400     bool doContinue = false;
    401 
    402     if (m_ELFObjectReader.isMyFormat(*member, doContinue)) {
    403       member->setType(Input::Object);
    404       // Set this object as no export if the archive is in the exclude libs.
    405       if (pArchive.getARFile().noExport()) {
    406         member->setNoExport();
    407       }
    408       pArchive.addObjectMember(pFileOffset, parent->lastPos);
    409       m_ELFObjectReader.readHeader(*member);
    410       m_ELFObjectReader.readSections(*member);
    411       m_ELFObjectReader.readSymbols(*member);
    412       m_Module.getObjectList().push_back(member);
    413     }
    414     else if (doContinue && isMyFormat(*member, doContinue)) {
    415       member->setType(Input::Archive);
    416       // when adding a new archive node, set the iterator to archive
    417       // itself, and set the direction to Downward
    418       pArchive.addArchiveMember(member->name(),
    419                                 parent->lastPos,
    420                                 &InputTree::Downward);
    421       cur_archive = member;
    422       file_offset = nested_offset;
    423     }
    424     else {
    425       warning(diag::warn_unrecognized_input_file) << member->path()
    426         << pConfig.targets().triple().str();
    427     }
    428   } while (Input::Object != member->type());
    429   return size;
    430 }
    431 
    432 /// includeAllMembers - include all object members. This is called if
    433 /// --whole-archive is the attribute for this archive file.
    434 bool GNUArchiveReader::includeAllMembers(const LinkerConfig& pConfig,
    435                                          Archive& pArchive)
    436 {
    437   // read the symtab of the archive
    438   readSymbolTable(pArchive);
    439 
    440   // read the strtab of the archive
    441   readStringTable(pArchive);
    442 
    443   // add root archive to ArchiveMemberMap
    444   pArchive.addArchiveMember(pArchive.getARFile().name(),
    445                             pArchive.inputs().root(),
    446                             &InputTree::Downward);
    447 
    448   bool isThinAR = isThinArchive(pArchive.getARFile());
    449   uint32_t begin_offset = pArchive.getARFile().fileOffset() +
    450                           Archive::MAGIC_LEN +
    451                           sizeof(Archive::MemberHeader) +
    452                           pArchive.getSymTabSize();
    453   if (pArchive.hasStrTable()) {
    454     if (0x0 != (begin_offset & 1))
    455       ++begin_offset;
    456     begin_offset += sizeof(Archive::MemberHeader) +
    457                     pArchive.getStrTable().size();
    458   }
    459   uint32_t end_offset = pArchive.getARFile().memArea()->size();
    460   for (uint32_t offset = begin_offset;
    461        offset < end_offset;
    462        offset += sizeof(Archive::MemberHeader)) {
    463 
    464     size_t size = includeMember(pConfig, pArchive, offset);
    465 
    466     if (!isThinAR) {
    467       offset += size;
    468     }
    469 
    470     if (0x0 != (offset & 1))
    471       ++offset;
    472   }
    473   return true;
    474 }
    475