Home | History | Annotate | Download | only in LD
      1 //===- GNUArchiveReader.cpp -----------------------------------------------===//
      2 //
      3 //                     The MCLinker Project
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 #include <mcld/LD/GNUArchiveReader.h>
     10 
     11 #include <mcld/Module.h>
     12 #include <mcld/InputTree.h>
     13 #include <mcld/MC/Attribute.h>
     14 #include <mcld/MC/MCLDInput.h>
     15 #include <mcld/LD/ResolveInfo.h>
     16 #include <mcld/LD/ELFObjectReader.h>
     17 #include <mcld/Support/FileSystem.h>
     18 #include <mcld/Support/FileHandle.h>
     19 #include <mcld/Support/MemoryArea.h>
     20 #include <mcld/Support/MemoryRegion.h>
     21 #include <mcld/Support/MsgHandling.h>
     22 #include <mcld/Support/Path.h>
     23 #include <mcld/ADT/SizeTraits.h>
     24 
     25 #include <llvm/ADT/StringRef.h>
     26 #include <llvm/Support/Host.h>
     27 
     28 #include <cstring>
     29 #include <cstdlib>
     30 
     31 using namespace mcld;
     32 
     33 GNUArchiveReader::GNUArchiveReader(Module& pModule,
     34                                    ELFObjectReader& pELFObjectReader)
     35  : m_Module(pModule),
     36    m_ELFObjectReader(pELFObjectReader)
     37 {
     38 }
     39 
     40 GNUArchiveReader::~GNUArchiveReader()
     41 {
     42 }
     43 
     44 /// isMyFormat
     45 bool GNUArchiveReader::isMyFormat(Input& pInput) const
     46 {
     47   assert(pInput.hasMemArea());
     48   MemoryRegion* region = pInput.memArea()->request(pInput.fileOffset(),
     49                                                    Archive::MAGIC_LEN);
     50   const char* str = reinterpret_cast<const char*>(region->getBuffer());
     51 
     52   bool result = false;
     53   assert(NULL != str);
     54   if (isArchive(str) || isThinArchive(str))
     55     result = true;
     56 
     57   pInput.memArea()->release(region);
     58   return result;
     59 }
     60 
     61 /// isArchive
     62 bool GNUArchiveReader::isArchive(const char* pStr) const
     63 {
     64   return (0 == memcmp(pStr, Archive::MAGIC, Archive::MAGIC_LEN));
     65 }
     66 
     67 /// isThinArchive
     68 bool GNUArchiveReader::isThinArchive(const char* pStr) const
     69 {
     70   return (0 == memcmp(pStr, Archive::THIN_MAGIC, Archive::MAGIC_LEN));
     71 }
     72 
     73 /// isThinArchive
     74 bool GNUArchiveReader::isThinArchive(Input& pInput) const
     75 {
     76   assert(pInput.hasMemArea());
     77   MemoryRegion* region = pInput.memArea()->request(pInput.fileOffset(),
     78                                                    Archive::MAGIC_LEN);
     79   const char* str = reinterpret_cast<const char*>(region->getBuffer());
     80 
     81   bool result = false;
     82   assert(NULL != str);
     83   if (isThinArchive(str))
     84     result = true;
     85 
     86   pInput.memArea()->release(region);
     87   return result;
     88 }
     89 
     90 bool GNUArchiveReader::readArchive(Archive& pArchive)
     91 {
     92   // bypass the empty archive
     93   if (Archive::MAGIC_LEN == pArchive.getARFile().memArea()->handler()->size())
     94     return true;
     95 
     96   if (pArchive.getARFile().attribute()->isWholeArchive())
     97     return includeAllMembers(pArchive);
     98 
     99   // if this is the first time read this archive, setup symtab and strtab
    100   if (pArchive.getSymbolTable().empty()) {
    101   // read the symtab of the archive
    102   readSymbolTable(pArchive);
    103 
    104   // read the strtab of the archive
    105   readStringTable(pArchive);
    106 
    107   // add root archive to ArchiveMemberMap
    108   pArchive.addArchiveMember(pArchive.getARFile().name(),
    109                             pArchive.inputs().root(),
    110                             &InputTree::Downward);
    111   }
    112 
    113   // include the needed members in the archive and build up the input tree
    114   bool willSymResolved;
    115   do {
    116     willSymResolved = false;
    117     for (size_t idx = 0; idx < pArchive.numOfSymbols(); ++idx) {
    118       // bypass if we already decided to include this symbol or not
    119       if (Archive::Symbol::Unknown != pArchive.getSymbolStatus(idx))
    120         continue;
    121 
    122       // bypass if another symbol with the same object file offset is included
    123       if (pArchive.hasObjectMember(pArchive.getObjFileOffset(idx))) {
    124         pArchive.setSymbolStatus(idx, Archive::Symbol::Include);
    125         continue;
    126       }
    127 
    128       // check if we should include this defined symbol
    129       Archive::Symbol::Status status =
    130         shouldIncludeSymbol(pArchive.getSymbolName(idx));
    131       if (Archive::Symbol::Unknown != status)
    132         pArchive.setSymbolStatus(idx, status);
    133 
    134       if (Archive::Symbol::Include == status) {
    135         // include the object member from the given offset
    136         includeMember(pArchive, pArchive.getObjFileOffset(idx));
    137         willSymResolved = true;
    138       } // end of if
    139     } // end of for
    140   } while (willSymResolved);
    141 
    142   return true;
    143 }
    144 
    145 /// readMemberHeader - read the header of a member in a archive file and then
    146 /// return the corresponding archive member (it may be an input object or
    147 /// another archive)
    148 /// @param pArchiveRoot  - the archive root that holds the strtab (extended
    149 ///                        name table)
    150 /// @param pArchiveFile  - the archive that contains the needed object
    151 /// @param pFileOffset   - file offset of the member header in the archive
    152 /// @param pNestedOffset - used when we find a nested archive
    153 /// @param pMemberSize   - the file size of this member
    154 Input* GNUArchiveReader::readMemberHeader(Archive& pArchiveRoot,
    155                                           Input& pArchiveFile,
    156                                           uint32_t pFileOffset,
    157                                           uint32_t& pNestedOffset,
    158                                           size_t& pMemberSize)
    159 {
    160   assert(pArchiveFile.hasMemArea());
    161 
    162   MemoryRegion* header_region =
    163     pArchiveFile.memArea()->request((pArchiveFile.fileOffset() + pFileOffset),
    164                                     sizeof(Archive::MemberHeader));
    165   const Archive::MemberHeader* header =
    166     reinterpret_cast<const Archive::MemberHeader*>(header_region->getBuffer());
    167 
    168   assert(0 == memcmp(header->fmag, Archive::MEMBER_MAGIC, sizeof(header->fmag)));
    169 
    170   pMemberSize = atoi(header->size);
    171 
    172   // parse the member name and nested offset if any
    173   std::string member_name;
    174   llvm::StringRef name_field(header->name, sizeof(header->name));
    175   if ('/' != header->name[0]) {
    176     // this is an object file in an archive
    177     size_t pos = name_field.find_first_of('/');
    178     member_name.assign(name_field.substr(0, pos).str());
    179   }
    180   else {
    181     // this is an object/archive file in a thin archive
    182     size_t begin = 1;
    183     size_t end = name_field.find_first_of(" :");
    184     uint32_t name_offset = 0;
    185     // parse the name offset
    186     name_field.substr(begin, end - begin).getAsInteger(10, name_offset);
    187 
    188     if (':' == name_field[end]) {
    189       // there is a nested offset
    190       begin = end + 1;
    191       end = name_field.find_first_of(' ', begin);
    192       name_field.substr(begin, end - begin).getAsInteger(10, pNestedOffset);
    193     }
    194 
    195     // get the member name from the extended name table
    196     assert(pArchiveRoot.hasStrTable());
    197     begin = name_offset;
    198     end = pArchiveRoot.getStrTable().find_first_of('\n', begin);
    199     member_name.assign(pArchiveRoot.getStrTable().substr(begin, end - begin -1));
    200   }
    201 
    202   Input* member = NULL;
    203   bool isThinAR = isThinArchive(pArchiveFile);
    204   if (!isThinAR) {
    205     // this is an object file in an archive
    206     member = pArchiveRoot.getMemberFile(pArchiveFile,
    207                                         isThinAR,
    208                                         member_name,
    209                                         pArchiveFile.path(),
    210                                         (pFileOffset +
    211                                          sizeof(Archive::MemberHeader)));
    212   }
    213   else {
    214     // this is a member in a thin archive
    215     // try to find if this is a archive already in the map first
    216     Archive::ArchiveMember* ar_member =
    217       pArchiveRoot.getArchiveMember(member_name);
    218     if (NULL != ar_member) {
    219       return ar_member->file;
    220     }
    221 
    222     // get nested file path, the nested file's member name is the relative
    223     // path to the archive containing it.
    224     sys::fs::Path input_path(pArchiveFile.path().parent_path());
    225     if (!input_path.empty())
    226       input_path.append(member_name);
    227     else
    228       input_path.assign(member_name);
    229 
    230     member = pArchiveRoot.getMemberFile(pArchiveFile,
    231                                         isThinAR,
    232                                         member_name,
    233                                         input_path);
    234   }
    235 
    236   pArchiveFile.memArea()->release(header_region);
    237   return member;
    238 }
    239 
    240 /// readSymbolTable - read the archive symbol map (armap)
    241 bool GNUArchiveReader::readSymbolTable(Archive& pArchive)
    242 {
    243   assert(pArchive.getARFile().hasMemArea());
    244 
    245   MemoryRegion* header_region =
    246     pArchive.getARFile().memArea()->request((pArchive.getARFile().fileOffset() +
    247                                              Archive::MAGIC_LEN),
    248                                             sizeof(Archive::MemberHeader));
    249   const Archive::MemberHeader* header =
    250     reinterpret_cast<const Archive::MemberHeader*>(header_region->getBuffer());
    251   assert(0 == memcmp(header->fmag, Archive::MEMBER_MAGIC, sizeof(header->fmag)));
    252 
    253   int symtab_size = atoi(header->size);
    254   pArchive.setSymTabSize(symtab_size);
    255 
    256   if (!pArchive.getARFile().attribute()->isWholeArchive()) {
    257     MemoryRegion* symtab_region =
    258       pArchive.getARFile().memArea()->request(
    259                                             (pArchive.getARFile().fileOffset() +
    260                                              Archive::MAGIC_LEN +
    261                                              sizeof(Archive::MemberHeader)),
    262                                             symtab_size);
    263     const uint32_t* data =
    264       reinterpret_cast<const uint32_t*>(symtab_region->getBuffer());
    265 
    266     // read the number of symbols
    267     uint32_t number = 0;
    268     if (llvm::sys::isLittleEndianHost())
    269       number = mcld::bswap32(*data);
    270     else
    271       number = *data;
    272 
    273     // set up the pointers for file offset and name offset
    274     ++data;
    275     const char* name = reinterpret_cast<const char*>(data + number);
    276 
    277     // add the archive symbols
    278     for (uint32_t i = 0; i < number; ++i) {
    279       if (llvm::sys::isLittleEndianHost())
    280         pArchive.addSymbol(name, mcld::bswap32(*data));
    281       else
    282         pArchive.addSymbol(name, *data);
    283       name += strlen(name) + 1;
    284       ++data;
    285     }
    286     pArchive.getARFile().memArea()->release(symtab_region);
    287   }
    288   pArchive.getARFile().memArea()->release(header_region);
    289   return true;
    290 }
    291 
    292 /// readStringTable - read the strtab for long file name of the archive
    293 bool GNUArchiveReader::readStringTable(Archive& pArchive)
    294 {
    295   size_t offset = Archive::MAGIC_LEN +
    296                   sizeof(Archive::MemberHeader) +
    297                   pArchive.getSymTabSize();
    298 
    299   if (0x0 != (offset & 1))
    300     ++offset;
    301 
    302   assert(pArchive.getARFile().hasMemArea());
    303 
    304   MemoryRegion* header_region =
    305     pArchive.getARFile().memArea()->request((pArchive.getARFile().fileOffset() +
    306                                              offset),
    307                                             sizeof(Archive::MemberHeader));
    308   const Archive::MemberHeader* header =
    309     reinterpret_cast<const Archive::MemberHeader*>(header_region->getBuffer());
    310 
    311   assert(0 == memcmp(header->fmag, Archive::MEMBER_MAGIC, sizeof(header->fmag)));
    312 
    313   if (0 == memcmp(header->name, Archive::STRTAB_NAME, sizeof(header->name))) {
    314     // read the extended name table
    315     int strtab_size = atoi(header->size);
    316     MemoryRegion* strtab_region =
    317       pArchive.getARFile().memArea()->request(
    318                                    (pArchive.getARFile().fileOffset() +
    319                                     offset + sizeof(Archive::MemberHeader)),
    320                                    strtab_size);
    321     const char* strtab =
    322       reinterpret_cast<const char*>(strtab_region->getBuffer());
    323     pArchive.getStrTable().assign(strtab, strtab_size);
    324     pArchive.getARFile().memArea()->release(strtab_region);
    325   }
    326   pArchive.getARFile().memArea()->release(header_region);
    327   return true;
    328 }
    329 
    330 /// shouldIncludeStatus - given a sym name from armap and check if including
    331 /// the corresponding archive member, and then return the decision
    332 enum Archive::Symbol::Status
    333 GNUArchiveReader::shouldIncludeSymbol(const llvm::StringRef& pSymName) const
    334 {
    335   // TODO: handle symbol version issue and user defined symbols
    336   const ResolveInfo* info = m_Module.getNamePool().findInfo(pSymName);
    337   if (NULL != info) {
    338     if (!info->isUndef())
    339       return Archive::Symbol::Exclude;
    340     if (info->isWeak())
    341       return Archive::Symbol::Unknown;
    342     return Archive::Symbol::Include;
    343   }
    344   return Archive::Symbol::Unknown;
    345 }
    346 
    347 /// includeMember - include the object member in the given file offset, and
    348 /// return the size of the object
    349 /// @param pArchiveRoot - the archive root
    350 /// @param pFileOffset  - file offset of the member header in the archive
    351 size_t GNUArchiveReader::includeMember(Archive& pArchive, uint32_t pFileOffset)
    352 {
    353   Input* cur_archive = &(pArchive.getARFile());
    354   Input* member = NULL;
    355   uint32_t file_offset = pFileOffset;
    356   size_t size = 0;
    357   do {
    358     uint32_t nested_offset = 0;
    359     // use the file offset in current archive to find out the member we
    360     // want to include
    361     member = readMemberHeader(pArchive,
    362                               *cur_archive,
    363                               file_offset,
    364                               nested_offset,
    365                               size);
    366     assert(member != NULL);
    367     // bypass if we get an archive that is already in the map
    368     if (Input::Archive == member->type()) {
    369         cur_archive = member;
    370         file_offset = nested_offset;
    371         continue;
    372     }
    373 
    374     // insert a node into the subtree of current archive.
    375     Archive::ArchiveMember* parent =
    376       pArchive.getArchiveMember(cur_archive->name());
    377 
    378     assert(NULL != parent);
    379     pArchive.inputs().insert(parent->lastPos, *(parent->move), *member);
    380 
    381     // move the iterator to new created node, and also adjust the
    382     // direction to Afterward for next insertion in this subtree
    383     parent->move->move(parent->lastPos);
    384     parent->move = &InputTree::Afterward;
    385 
    386     if (m_ELFObjectReader.isMyFormat(*member)) {
    387       member->setType(Input::Object);
    388       pArchive.addObjectMember(pFileOffset, parent->lastPos);
    389       m_ELFObjectReader.readHeader(*member);
    390       m_ELFObjectReader.readSections(*member);
    391       m_ELFObjectReader.readSymbols(*member);
    392       m_Module.getObjectList().push_back(member);
    393     }
    394     else if (isMyFormat(*member)) {
    395       member->setType(Input::Archive);
    396       // when adding a new archive node, set the iterator to archive
    397       // itself, and set the direction to Downward
    398       pArchive.addArchiveMember(member->name(),
    399                                 parent->lastPos,
    400                                 &InputTree::Downward);
    401       cur_archive = member;
    402       file_offset = nested_offset;
    403     }
    404   } while (Input::Object != member->type());
    405   return size;
    406 }
    407 
    408 /// includeAllMembers - include all object members. This is called if
    409 /// --whole-archive is the attribute for this archive file.
    410 bool GNUArchiveReader::includeAllMembers(Archive& pArchive)
    411 {
    412   // read the symtab of the archive
    413   readSymbolTable(pArchive);
    414 
    415   // read the strtab of the archive
    416   readStringTable(pArchive);
    417 
    418   // add root archive to ArchiveMemberMap
    419   pArchive.addArchiveMember(pArchive.getARFile().name(),
    420                             pArchive.inputs().root(),
    421                             &InputTree::Downward);
    422 
    423   bool isThinAR = isThinArchive(pArchive.getARFile());
    424   uint32_t begin_offset = pArchive.getARFile().fileOffset() +
    425                           Archive::MAGIC_LEN +
    426                           sizeof(Archive::MemberHeader) +
    427                           pArchive.getSymTabSize();
    428   if (pArchive.hasStrTable()) {
    429     if (0x0 != (begin_offset & 1))
    430       ++begin_offset;
    431     begin_offset += sizeof(Archive::MemberHeader) +
    432                     pArchive.getStrTable().size();
    433   }
    434   uint32_t end_offset = pArchive.getARFile().memArea()->handler()->size();
    435   for (uint32_t offset = begin_offset;
    436        offset < end_offset;
    437        offset += sizeof(Archive::MemberHeader)) {
    438 
    439     size_t size = includeMember(pArchive, offset);
    440 
    441     if (!isThinAR) {
    442       offset += size;
    443     }
    444 
    445     if (0x0 != (offset & 1))
    446       ++offset;
    447   }
    448   return true;
    449 }
    450 
    451