Home | History | Annotate | Download | only in LD
      1 //===- GNUArchiveReader.cpp -----------------------------------------------===//
      2 //
      3 //                     The MCLinker Project
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 #include <mcld/MC/MCLDInfo.h>
     10 #include <mcld/MC/MCLDInput.h>
     11 #include <mcld/MC/InputTree.h>
     12 #include <mcld/LD/GNUArchiveReader.h>
     13 #include <mcld/LD/ResolveInfo.h>
     14 #include <mcld/LD/ELFObjectReader.h>
     15 #include <mcld/Support/FileSystem.h>
     16 #include <mcld/Support/FileHandle.h>
     17 #include <mcld/Support/MemoryArea.h>
     18 #include <mcld/Support/MemoryRegion.h>
     19 #include <mcld/Support/MemoryAreaFactory.h>
     20 #include <mcld/Support/MsgHandling.h>
     21 #include <mcld/Support/Path.h>
     22 #include <mcld/ADT/SizeTraits.h>
     23 
     24 #include <llvm/ADT/StringRef.h>
     25 #include <llvm/Support/Host.h>
     26 
     27 #include <cstring>
     28 #include <cstdlib>
     29 
     30 using namespace mcld;
     31 
     32 GNUArchiveReader::GNUArchiveReader(MCLDInfo& pLDInfo,
     33                                    MemoryAreaFactory& pMemAreaFactory,
     34                                    ELFObjectReader& pELFObjectReader)
     35  : m_LDInfo(pLDInfo),
     36    m_MemAreaFactory(pMemAreaFactory),
     37    m_ELFObjectReader(pELFObjectReader)
     38 {
     39 }
     40 
     41 GNUArchiveReader::~GNUArchiveReader()
     42 {
     43 }
     44 
     45 /// isMyFormat
     46 bool GNUArchiveReader::isMyFormat(Input& pInput) const
     47 {
     48   assert(pInput.hasMemArea());
     49   MemoryRegion* region = pInput.memArea()->request(pInput.fileOffset(),
     50                                                    Archive::MAGIC_LEN);
     51   const char* str = reinterpret_cast<const char*>(region->getBuffer());
     52 
     53   bool result = false;
     54   assert(NULL != str);
     55   if (isArchive(str) || isThinArchive(str))
     56     result = true;
     57 
     58   pInput.memArea()->release(region);
     59   return result;
     60 }
     61 
     62 /// isArchive
     63 bool GNUArchiveReader::isArchive(const char* pStr) const
     64 {
     65   return (0 == memcmp(pStr, Archive::MAGIC, Archive::MAGIC_LEN));
     66 }
     67 
     68 /// isThinArchive
     69 bool GNUArchiveReader::isThinArchive(const char* pStr) const
     70 {
     71   return (0 == memcmp(pStr, Archive::THIN_MAGIC, Archive::MAGIC_LEN));
     72 }
     73 
     74 /// isThinArchive
     75 bool GNUArchiveReader::isThinArchive(Input& pInput) const
     76 {
     77   assert(pInput.hasMemArea());
     78   MemoryRegion* region = pInput.memArea()->request(pInput.fileOffset(),
     79                                                    Archive::MAGIC_LEN);
     80   const char* str = reinterpret_cast<const char*>(region->getBuffer());
     81 
     82   bool result = false;
     83   assert(NULL != str);
     84   if (isThinArchive(str))
     85     result = true;
     86 
     87   pInput.memArea()->release(region);
     88   return result;
     89 }
     90 
     91 bool GNUArchiveReader::readArchive(Archive& pArchive)
     92 {
     93   // read the symtab of the archive
     94   readSymbolTable(pArchive);
     95 
     96   // read the strtab of the archive
     97   readStringTable(pArchive);
     98 
     99   // add root archive to ArchiveMemberMap
    100   pArchive.addArchiveMember(pArchive.getARFile().name(),
    101                             pArchive.inputs().root(),
    102                             &InputTree::Downward);
    103 
    104   // include the needed members in the archive and build up the input tree
    105   bool willSymResolved;
    106   do {
    107     willSymResolved = false;
    108     for (size_t idx = 0; idx < pArchive.numOfSymbols(); ++idx) {
    109       // bypass if we already decided to include this symbol or not
    110       if (Archive::Symbol::Unknown != pArchive.getSymbolStatus(idx))
    111         continue;
    112 
    113       // bypass if another symbol with the same object file offset is included
    114       if (pArchive.hasObjectMember(pArchive.getObjFileOffset(idx))) {
    115         pArchive.setSymbolStatus(idx, Archive::Symbol::Include);
    116         continue;
    117       }
    118 
    119       // check if we should include this defined symbol
    120       Archive::Symbol::Status status =
    121         shouldIncludeSymbol(pArchive.getSymbolName(idx));
    122       if (Archive::Symbol::Unknown != status)
    123         pArchive.setSymbolStatus(idx, status);
    124 
    125       if (Archive::Symbol::Include == status) {
    126         Input* cur_archive = &(pArchive.getARFile());
    127         Input* member = cur_archive;
    128         uint32_t file_offset = pArchive.getObjFileOffset(idx);
    129         while ((member != NULL) && (Input::Object != member->type())) {
    130           uint32_t nested_offset = 0;
    131           // use the file offset in current archive to find out the member we
    132           // want to include
    133           member = readMemberHeader(pArchive,
    134                                     *cur_archive,
    135                                     file_offset,
    136                                     nested_offset);
    137           assert(member != NULL);
    138           // bypass if we get an archive that is already in the map
    139           if (Input::Archive == member->type()) {
    140               cur_archive = member;
    141               file_offset = nested_offset;
    142               continue;
    143           }
    144 
    145           // insert a node into the subtree of current archive.
    146           Archive::ArchiveMember* parent =
    147             pArchive.getArchiveMember(cur_archive->name());
    148 
    149           assert(NULL != parent);
    150           pArchive.inputs().insert(parent->lastPos, *(parent->move), *member);
    151 
    152           // move the iterator to new created node, and also adjust the
    153           // direction to Afterward for next insertion in this subtree
    154           parent->move->move(parent->lastPos);
    155           parent->move = &InputTree::Afterward;
    156 
    157           if (m_ELFObjectReader.isMyFormat(*member)) {
    158             member->setType(Input::Object);
    159             pArchive.addObjectMember(pArchive.getObjFileOffset(idx),
    160                                      parent->lastPos);
    161             m_ELFObjectReader.readObject(*member);
    162             m_ELFObjectReader.readSections(*member);
    163             m_ELFObjectReader.readSymbols(*member);
    164           }
    165           else if (isMyFormat(*member)) {
    166             member->setType(Input::Archive);
    167             // when adding a new archive node, set the iterator to archive
    168             // itself, and set the direction to Downward
    169             pArchive.addArchiveMember(member->name(),
    170                                       parent->lastPos,
    171                                       &InputTree::Downward);
    172             cur_archive = member;
    173             file_offset = nested_offset;
    174           }
    175         } // end of while
    176         willSymResolved = true;
    177       } // end of if
    178     } // end of for
    179   } while (willSymResolved);
    180 
    181   return true;
    182 }
    183 
    184 /// readMemberHeader - read the header of a member in a archive file and then
    185 /// return the corresponding archive member (it may be an input object or
    186 /// another archive)
    187 /// @param pArchiveRoot  - the archive root that holds the strtab (extended
    188 ///                        name table)
    189 /// @param pArchiveFile  - the archive that contains the needed object
    190 /// @param pFileOffset   - file offset of the member header in the archive
    191 /// @param pNestedOffset - used when we find a nested archive
    192 Input* GNUArchiveReader::readMemberHeader(Archive& pArchiveRoot,
    193                                           Input& pArchiveFile,
    194                                           uint32_t pFileOffset,
    195                                           uint32_t& pNestedOffset)
    196 {
    197   assert(pArchiveFile.hasMemArea());
    198 
    199   MemoryRegion* header_region =
    200     pArchiveFile.memArea()->request((pArchiveFile.fileOffset() + pFileOffset),
    201                                     sizeof(Archive::MemberHeader));
    202   const Archive::MemberHeader* header =
    203     reinterpret_cast<const Archive::MemberHeader*>(header_region->getBuffer());
    204 
    205   assert(0 == memcmp(header->fmag, Archive::MEMBER_MAGIC, 2));
    206 
    207   // int size = atoi(header->size);
    208 
    209   // parse the member name and nested offset if any
    210   std::string member_name;
    211   llvm::StringRef name_field(header->name, 16);
    212   if ('/' != header->name[0]) {
    213     // this is an object file in an archive
    214     size_t pos = name_field.find_first_of('/');
    215     member_name.assign(name_field.substr(0, pos).str());
    216   }
    217   else {
    218     // this is an object/archive file in a thin archive
    219     size_t begin = 1;
    220     size_t end = name_field.find_first_of(" :");
    221     uint32_t name_offset = 0;
    222     // parse the name offset
    223     name_field.substr(begin, end - begin).getAsInteger(10, name_offset);
    224 
    225     if (':' == name_field[end]) {
    226       // there is a nested offset
    227       begin = end + 1;
    228       end = name_field.find_first_of(' ', begin);
    229       name_field.substr(begin, end - begin).getAsInteger(10, pNestedOffset);
    230     }
    231 
    232     // get the member name from the extended name table
    233     begin = name_offset;
    234     end = pArchiveRoot.getStrTable().find_first_of('\n', begin);
    235     member_name.assign(pArchiveRoot.getStrTable().substr(begin, end - begin -1));
    236   }
    237 
    238   Input* member = NULL;
    239   if (!isThinArchive(pArchiveFile)) {
    240     // this is an object file in an archive
    241     member =
    242       m_LDInfo.inputFactory().produce(member_name,
    243                                       pArchiveFile.path(),
    244                                       Input::Unknown,
    245                                       (pFileOffset +
    246                                        sizeof(Archive::MemberHeader)));
    247     assert(member != NULL);
    248     member->setMemArea(pArchiveFile.memArea());
    249     LDContext *input_context = m_LDInfo.contextFactory().produce();
    250     member->setContext(input_context);
    251   }
    252   else {
    253     // this is a member in a thin archive
    254     // try to find if this is a archive already in the map first
    255     Archive::ArchiveMember* ar_member =
    256       pArchiveRoot.getArchiveMember(member_name);
    257     if (NULL != ar_member) {
    258       return ar_member->file;
    259     }
    260 
    261     // get nested file path, the nested file's member name is the relative
    262     // path to the archive containing it.
    263     sys::fs::Path input_path(pArchiveFile.path().parent_path());
    264     if (!input_path.empty())
    265       input_path.append(member_name);
    266     else
    267       input_path.assign(member_name);
    268     member =
    269       m_LDInfo.inputFactory().produce(member_name, input_path, Input::Unknown);
    270 
    271     assert(member != NULL);
    272     MemoryArea* input_memory =
    273       m_MemAreaFactory.produce(member->path(), FileHandle::ReadOnly);
    274     if (input_memory->handler()->isGood()) {
    275       member->setMemArea(input_memory);
    276     }
    277     else {
    278       error(diag::err_cannot_open_input) << member->name() << member->path();
    279       return NULL;
    280     }
    281     LDContext *input_context = m_LDInfo.contextFactory().produce(input_path);
    282     member->setContext(input_context);
    283   }
    284 
    285   pArchiveFile.memArea()->release(header_region);
    286   return member;
    287 }
    288 
    289 /// readSymbolTable - read the archive symbol map (armap)
    290 bool GNUArchiveReader::readSymbolTable(Archive& pArchive)
    291 {
    292   assert(pArchive.getARFile().hasMemArea());
    293 
    294   MemoryRegion* header_region =
    295     pArchive.getARFile().memArea()->request((pArchive.getARFile().fileOffset() +
    296                                              Archive::MAGIC_LEN),
    297                                             sizeof(Archive::MemberHeader));
    298   const Archive::MemberHeader* header =
    299     reinterpret_cast<const Archive::MemberHeader*>(header_region->getBuffer());
    300   assert(0 == memcmp(header->fmag, Archive::MEMBER_MAGIC, 2));
    301 
    302   int symtab_size = atoi(header->size);
    303   pArchive.setSymTabSize(symtab_size);
    304 
    305   MemoryRegion* symtab_region =
    306     pArchive.getARFile().memArea()->request((pArchive.getARFile().fileOffset() +
    307                                              Archive::MAGIC_LEN +
    308                                              sizeof(Archive::MemberHeader)),
    309                                             symtab_size);
    310   const uint32_t* data =
    311     reinterpret_cast<const uint32_t*>(symtab_region->getBuffer());
    312 
    313   // read the number of symbols
    314   uint32_t number = 0;
    315   if (llvm::sys::isLittleEndianHost())
    316     number = bswap32(*data);
    317   else
    318     number = *data;
    319 
    320   // set up the pointers for file offset and name offset
    321   ++data;
    322   const char* name = reinterpret_cast<const char*>(data + number);
    323 
    324   // add the archive symbols
    325   for (uint32_t i = 0; i < number; ++i) {
    326     if (llvm::sys::isLittleEndianHost())
    327       pArchive.addSymbol(name, bswap32(*data));
    328     else
    329       pArchive.addSymbol(name, *data);
    330     name += strlen(name) + 1;
    331     ++data;
    332   }
    333 
    334   pArchive.getARFile().memArea()->release(header_region);
    335   pArchive.getARFile().memArea()->release(symtab_region);
    336   return true;
    337 }
    338 
    339 /// readStringTable - read the strtab for long file name of the archive
    340 bool GNUArchiveReader::readStringTable(Archive& pArchive)
    341 {
    342   size_t offset = Archive::MAGIC_LEN +
    343                   sizeof(Archive::MemberHeader) +
    344                   pArchive.getSymTabSize();
    345 
    346   if (0x0 != (offset & 1))
    347     ++offset;
    348 
    349   assert(pArchive.getARFile().hasMemArea());
    350 
    351   MemoryRegion* header_region =
    352     pArchive.getARFile().memArea()->request((pArchive.getARFile().fileOffset() +
    353                                              offset),
    354                                             sizeof(Archive::MemberHeader));
    355   const Archive::MemberHeader* header =
    356     reinterpret_cast<const Archive::MemberHeader*>(header_region->getBuffer());
    357 
    358   assert(0 == memcmp(header->fmag, Archive::MEMBER_MAGIC, 2));
    359 
    360   int strtab_size = atoi(header->size);
    361 
    362   MemoryRegion* strtab_region =
    363     pArchive.getARFile().memArea()->request((pArchive.getARFile().fileOffset() +
    364                                              offset +
    365                                              sizeof(Archive::MemberHeader)),
    366                                             strtab_size);
    367   const char* strtab =
    368     reinterpret_cast<const char*>(strtab_region->getBuffer());
    369 
    370   pArchive.getStrTable().assign(strtab, strtab_size);
    371 
    372   pArchive.getARFile().memArea()->release(header_region);
    373   pArchive.getARFile().memArea()->release(strtab_region);
    374   return true;
    375 }
    376 
    377 /// shouldIncludeStatus - given a sym name from armap and check if including
    378 /// the corresponding archive member, and then return the decision
    379 enum Archive::Symbol::Status
    380 GNUArchiveReader::shouldIncludeSymbol(const llvm::StringRef& pSymName) const
    381 {
    382   // TODO: handle symbol version issue and user defined symbols
    383   ResolveInfo* info = m_LDInfo.getNamePool().findInfo(pSymName);
    384   if (NULL != info) {
    385     if (!info->isUndef())
    386       return Archive::Symbol::Exclude;
    387     if (info->isWeak())
    388       return Archive::Symbol::Unknown;
    389     return Archive::Symbol::Include;
    390   }
    391   return Archive::Symbol::Unknown;
    392 }
    393 
    394