Home | History | Annotate | Download | only in Archive
      1 //===-- ArchiveReader.cpp - Read LLVM archive files -------------*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // Builds up standard unix archive files (.a) containing LLVM bitcode.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "ArchiveInternals.h"
     15 #include "llvm/ADT/SmallPtrSet.h"
     16 #include "llvm/Bitcode/ReaderWriter.h"
     17 #include "llvm/Support/MemoryBuffer.h"
     18 #include "llvm/Module.h"
     19 #include <cstdio>
     20 #include <cstdlib>
     21 #include <memory>
     22 using namespace llvm;
     23 
     24 /// Read a variable-bit-rate encoded unsigned integer
     25 static inline unsigned readInteger(const char*&At, const char*End) {
     26   unsigned Shift = 0;
     27   unsigned Result = 0;
     28 
     29   do {
     30     if (At == End)
     31       return Result;
     32     Result |= (unsigned)((*At++) & 0x7F) << Shift;
     33     Shift += 7;
     34   } while (At[-1] & 0x80);
     35   return Result;
     36 }
     37 
     38 // Completely parse the Archive's symbol table and populate symTab member var.
     39 bool
     40 Archive::parseSymbolTable(const void* data, unsigned size, std::string* error) {
     41   const char* At = (const char*) data;
     42   const char* End = At + size;
     43   while (At < End) {
     44     unsigned offset = readInteger(At, End);
     45     if (At == End) {
     46       if (error)
     47         *error = "Ran out of data reading vbr_uint for symtab offset!";
     48       return false;
     49     }
     50     unsigned length = readInteger(At, End);
     51     if (At == End) {
     52       if (error)
     53         *error = "Ran out of data reading vbr_uint for symtab length!";
     54       return false;
     55     }
     56     if (At + length > End) {
     57       if (error)
     58         *error = "Malformed symbol table: length not consistent with size";
     59       return false;
     60     }
     61     // we don't care if it can't be inserted (duplicate entry)
     62     symTab.insert(std::make_pair(std::string(At, length), offset));
     63     At += length;
     64   }
     65   symTabSize = size;
     66   return true;
     67 }
     68 
     69 // This member parses an ArchiveMemberHeader that is presumed to be pointed to
     70 // by At. The At pointer is updated to the byte just after the header, which
     71 // can be variable in size.
     72 ArchiveMember*
     73 Archive::parseMemberHeader(const char*& At, const char* End, std::string* error)
     74 {
     75   if (At + sizeof(ArchiveMemberHeader) >= End) {
     76     if (error)
     77       *error = "Unexpected end of file";
     78     return 0;
     79   }
     80 
     81   // Cast archive member header
     82   const ArchiveMemberHeader* Hdr = (const ArchiveMemberHeader*)At;
     83   At += sizeof(ArchiveMemberHeader);
     84 
     85   int flags = 0;
     86   int MemberSize = atoi(Hdr->size);
     87   assert(MemberSize >= 0);
     88 
     89   // Check the size of the member for sanity
     90   if (At + MemberSize > End) {
     91     if (error)
     92       *error = "invalid member length in archive file";
     93     return 0;
     94   }
     95 
     96   // Check the member signature
     97   if (!Hdr->checkSignature()) {
     98     if (error)
     99       *error = "invalid file member signature";
    100     return 0;
    101   }
    102 
    103   // Convert and check the member name
    104   // The empty name ( '/' and 15 blanks) is for a foreign (non-LLVM) symbol
    105   // table. The special name "//" and 14 blanks is for a string table, used
    106   // for long file names. This library doesn't generate either of those but
    107   // it will accept them. If the name starts with #1/ and the remainder is
    108   // digits, then those digits specify the length of the name that is
    109   // stored immediately following the header. The special name
    110   // __LLVM_SYM_TAB__ identifies the symbol table for LLVM bitcode.
    111   // Anything else is a regular, short filename that is terminated with
    112   // a '/' and blanks.
    113 
    114   std::string pathname;
    115   switch (Hdr->name[0]) {
    116     case '#':
    117       if (Hdr->name[1] == '1' && Hdr->name[2] == '/') {
    118         if (isdigit(Hdr->name[3])) {
    119           unsigned len = atoi(&Hdr->name[3]);
    120           const char *nulp = (const char *)memchr(At, '\0', len);
    121           pathname.assign(At, nulp != 0 ? (uintptr_t)(nulp - At) : len);
    122           At += len;
    123           MemberSize -= len;
    124           flags |= ArchiveMember::HasLongFilenameFlag;
    125         } else {
    126           if (error)
    127             *error = "invalid long filename";
    128           return 0;
    129         }
    130       } else if (Hdr->name[1] == '_' &&
    131                  (0 == memcmp(Hdr->name, ARFILE_LLVM_SYMTAB_NAME, 16))) {
    132         // The member is using a long file name (>15 chars) format.
    133         // This format is standard for 4.4BSD and Mac OSX operating
    134         // systems. LLVM uses it similarly. In this format, the
    135         // remainder of the name field (after #1/) specifies the
    136         // length of the file name which occupy the first bytes of
    137         // the member's data. The pathname already has the #1/ stripped.
    138         pathname.assign(ARFILE_LLVM_SYMTAB_NAME);
    139         flags |= ArchiveMember::LLVMSymbolTableFlag;
    140       }
    141       break;
    142     case '/':
    143       if (Hdr->name[1]== '/') {
    144         if (0 == memcmp(Hdr->name, ARFILE_STRTAB_NAME, 16)) {
    145           pathname.assign(ARFILE_STRTAB_NAME);
    146           flags |= ArchiveMember::StringTableFlag;
    147         } else {
    148           if (error)
    149             *error = "invalid string table name";
    150           return 0;
    151         }
    152       } else if (Hdr->name[1] == ' ') {
    153         if (0 == memcmp(Hdr->name, ARFILE_SVR4_SYMTAB_NAME, 16)) {
    154           pathname.assign(ARFILE_SVR4_SYMTAB_NAME);
    155           flags |= ArchiveMember::SVR4SymbolTableFlag;
    156         } else {
    157           if (error)
    158             *error = "invalid SVR4 symbol table name";
    159           return 0;
    160         }
    161       } else if (isdigit(Hdr->name[1])) {
    162         unsigned index = atoi(&Hdr->name[1]);
    163         if (index < strtab.length()) {
    164           const char* namep = strtab.c_str() + index;
    165           const char* endp = strtab.c_str() + strtab.length();
    166           const char* p = namep;
    167           const char* last_p = p;
    168           while (p < endp) {
    169             if (*p == '\n' && *last_p == '/') {
    170               pathname.assign(namep, last_p - namep);
    171               flags |= ArchiveMember::HasLongFilenameFlag;
    172               break;
    173             }
    174             last_p = p;
    175             p++;
    176           }
    177           if (p >= endp) {
    178             if (error)
    179               *error = "missing name termiantor in string table";
    180             return 0;
    181           }
    182         } else {
    183           if (error)
    184             *error = "name index beyond string table";
    185           return 0;
    186         }
    187       }
    188       break;
    189     case '_':
    190       if (Hdr->name[1] == '_' &&
    191           (0 == memcmp(Hdr->name, ARFILE_BSD4_SYMTAB_NAME, 16))) {
    192         pathname.assign(ARFILE_BSD4_SYMTAB_NAME);
    193         flags |= ArchiveMember::BSD4SymbolTableFlag;
    194         break;
    195       }
    196       /* FALL THROUGH */
    197 
    198     default:
    199       const char* slash = (const char*) memchr(Hdr->name, '/', 16);
    200       if (slash == 0)
    201         slash = Hdr->name + 16;
    202       pathname.assign(Hdr->name, slash - Hdr->name);
    203       break;
    204   }
    205 
    206   // Determine if this is a bitcode file
    207   switch (sys::IdentifyFileType(At, 4)) {
    208     case sys::Bitcode_FileType:
    209       flags |= ArchiveMember::BitcodeFlag;
    210       break;
    211     default:
    212       flags &= ~ArchiveMember::BitcodeFlag;
    213       break;
    214   }
    215 
    216   // Instantiate the ArchiveMember to be filled
    217   ArchiveMember* member = new ArchiveMember(this);
    218 
    219   // Fill in fields of the ArchiveMember
    220   member->parent = this;
    221   member->path.set(pathname);
    222   member->info.fileSize = MemberSize;
    223   member->info.modTime.fromEpochTime(atoi(Hdr->date));
    224   unsigned int mode;
    225   sscanf(Hdr->mode, "%o", &mode);
    226   member->info.mode = mode;
    227   member->info.user = atoi(Hdr->uid);
    228   member->info.group = atoi(Hdr->gid);
    229   member->flags = flags;
    230   member->data = At;
    231 
    232   return member;
    233 }
    234 
    235 bool
    236 Archive::checkSignature(std::string* error) {
    237   // Check the magic string at file's header
    238   if (mapfile->getBufferSize() < 8 || memcmp(base, ARFILE_MAGIC, 8)) {
    239     if (error)
    240       *error = "invalid signature for an archive file";
    241     return false;
    242   }
    243   return true;
    244 }
    245 
    246 // This function loads the entire archive and fully populates its ilist with
    247 // the members of the archive file. This is typically used in preparation for
    248 // editing the contents of the archive.
    249 bool
    250 Archive::loadArchive(std::string* error) {
    251 
    252   // Set up parsing
    253   members.clear();
    254   symTab.clear();
    255   const char *At = base;
    256   const char *End = mapfile->getBufferEnd();
    257 
    258   if (!checkSignature(error))
    259     return false;
    260 
    261   At += 8;  // Skip the magic string.
    262 
    263   bool seenSymbolTable = false;
    264   bool foundFirstFile = false;
    265   while (At < End) {
    266     // parse the member header
    267     const char* Save = At;
    268     ArchiveMember* mbr = parseMemberHeader(At, End, error);
    269     if (!mbr)
    270       return false;
    271 
    272     // check if this is the foreign symbol table
    273     if (mbr->isSVR4SymbolTable() || mbr->isBSD4SymbolTable()) {
    274       // We just save this but don't do anything special
    275       // with it. It doesn't count as the "first file".
    276       if (foreignST) {
    277         // What? Multiple foreign symbol tables? Just chuck it
    278         // and retain the last one found.
    279         delete foreignST;
    280       }
    281       foreignST = mbr;
    282       At += mbr->getSize();
    283       if ((intptr_t(At) & 1) == 1)
    284         At++;
    285     } else if (mbr->isStringTable()) {
    286       // Simply suck the entire string table into a string
    287       // variable. This will be used to get the names of the
    288       // members that use the "/ddd" format for their names
    289       // (SVR4 style long names).
    290       strtab.assign(At, mbr->getSize());
    291       At += mbr->getSize();
    292       if ((intptr_t(At) & 1) == 1)
    293         At++;
    294       delete mbr;
    295     } else if (mbr->isLLVMSymbolTable()) {
    296       // This is the LLVM symbol table for the archive. If we've seen it
    297       // already, its an error. Otherwise, parse the symbol table and move on.
    298       if (seenSymbolTable) {
    299         if (error)
    300           *error = "invalid archive: multiple symbol tables";
    301         return false;
    302       }
    303       if (!parseSymbolTable(mbr->getData(), mbr->getSize(), error))
    304         return false;
    305       seenSymbolTable = true;
    306       At += mbr->getSize();
    307       if ((intptr_t(At) & 1) == 1)
    308         At++;
    309       delete mbr; // We don't need this member in the list of members.
    310     } else {
    311       // This is just a regular file. If its the first one, save its offset.
    312       // Otherwise just push it on the list and move on to the next file.
    313       if (!foundFirstFile) {
    314         firstFileOffset = Save - base;
    315         foundFirstFile = true;
    316       }
    317       members.push_back(mbr);
    318       At += mbr->getSize();
    319       if ((intptr_t(At) & 1) == 1)
    320         At++;
    321     }
    322   }
    323   return true;
    324 }
    325 
    326 // Open and completely load the archive file.
    327 Archive*
    328 Archive::OpenAndLoad(const sys::Path& file, LLVMContext& C,
    329                      std::string* ErrorMessage) {
    330   std::auto_ptr<Archive> result ( new Archive(file, C));
    331   if (result->mapToMemory(ErrorMessage))
    332     return 0;
    333   if (!result->loadArchive(ErrorMessage))
    334     return 0;
    335   return result.release();
    336 }
    337 
    338 // Get all the bitcode modules from the archive
    339 bool
    340 Archive::getAllModules(std::vector<Module*>& Modules,
    341                        std::string* ErrMessage) {
    342 
    343   for (iterator I=begin(), E=end(); I != E; ++I) {
    344     if (I->isBitcode()) {
    345       std::string FullMemberName = archPath.str() +
    346         "(" + I->getPath().str() + ")";
    347       MemoryBuffer *Buffer =
    348         MemoryBuffer::getMemBufferCopy(StringRef(I->getData(), I->getSize()),
    349                                        FullMemberName.c_str());
    350 
    351       Module *M = ParseBitcodeFile(Buffer, Context, ErrMessage);
    352       delete Buffer;
    353       if (!M)
    354         return true;
    355 
    356       Modules.push_back(M);
    357     }
    358   }
    359   return false;
    360 }
    361 
    362 // Load just the symbol table from the archive file
    363 bool
    364 Archive::loadSymbolTable(std::string* ErrorMsg) {
    365 
    366   // Set up parsing
    367   members.clear();
    368   symTab.clear();
    369   const char *At = base;
    370   const char *End = mapfile->getBufferEnd();
    371 
    372   // Make sure we're dealing with an archive
    373   if (!checkSignature(ErrorMsg))
    374     return false;
    375 
    376   At += 8; // Skip signature
    377 
    378   // Parse the first file member header
    379   const char* FirstFile = At;
    380   ArchiveMember* mbr = parseMemberHeader(At, End, ErrorMsg);
    381   if (!mbr)
    382     return false;
    383 
    384   if (mbr->isSVR4SymbolTable() || mbr->isBSD4SymbolTable()) {
    385     // Skip the foreign symbol table, we don't do anything with it
    386     At += mbr->getSize();
    387     if ((intptr_t(At) & 1) == 1)
    388       At++;
    389     delete mbr;
    390 
    391     // Read the next one
    392     FirstFile = At;
    393     mbr = parseMemberHeader(At, End, ErrorMsg);
    394     if (!mbr) {
    395       delete mbr;
    396       return false;
    397     }
    398   }
    399 
    400   if (mbr->isStringTable()) {
    401     // Process the string table entry
    402     strtab.assign((const char*)mbr->getData(), mbr->getSize());
    403     At += mbr->getSize();
    404     if ((intptr_t(At) & 1) == 1)
    405       At++;
    406     delete mbr;
    407     // Get the next one
    408     FirstFile = At;
    409     mbr = parseMemberHeader(At, End, ErrorMsg);
    410     if (!mbr) {
    411       delete mbr;
    412       return false;
    413     }
    414   }
    415 
    416   // See if its the symbol table
    417   if (mbr->isLLVMSymbolTable()) {
    418     if (!parseSymbolTable(mbr->getData(), mbr->getSize(), ErrorMsg)) {
    419       delete mbr;
    420       return false;
    421     }
    422 
    423     At += mbr->getSize();
    424     if ((intptr_t(At) & 1) == 1)
    425       At++;
    426     delete mbr;
    427     // Can't be any more symtab headers so just advance
    428     FirstFile = At;
    429   } else {
    430     // There's no symbol table in the file. We have to rebuild it from scratch
    431     // because the intent of this method is to get the symbol table loaded so
    432     // it can be searched efficiently.
    433     // Add the member to the members list
    434     members.push_back(mbr);
    435   }
    436 
    437   firstFileOffset = FirstFile - base;
    438   return true;
    439 }
    440 
    441 // Open the archive and load just the symbol tables
    442 Archive* Archive::OpenAndLoadSymbols(const sys::Path& file,
    443                                      LLVMContext& C,
    444                                      std::string* ErrorMessage) {
    445   std::auto_ptr<Archive> result ( new Archive(file, C) );
    446   if (result->mapToMemory(ErrorMessage))
    447     return 0;
    448   if (!result->loadSymbolTable(ErrorMessage))
    449     return 0;
    450   return result.release();
    451 }
    452 
    453 // Look up one symbol in the symbol table and return the module that defines
    454 // that symbol.
    455 Module*
    456 Archive::findModuleDefiningSymbol(const std::string& symbol,
    457                                   std::string* ErrMsg) {
    458   SymTabType::iterator SI = symTab.find(symbol);
    459   if (SI == symTab.end())
    460     return 0;
    461 
    462   // The symbol table was previously constructed assuming that the members were
    463   // written without the symbol table header. Because VBR encoding is used, the
    464   // values could not be adjusted to account for the offset of the symbol table
    465   // because that could affect the size of the symbol table due to VBR encoding.
    466   // We now have to account for this by adjusting the offset by the size of the
    467   // symbol table and its header.
    468   unsigned fileOffset =
    469     SI->second +                // offset in symbol-table-less file
    470     firstFileOffset;            // add offset to first "real" file in archive
    471 
    472   // See if the module is already loaded
    473   ModuleMap::iterator MI = modules.find(fileOffset);
    474   if (MI != modules.end())
    475     return MI->second.first;
    476 
    477   // Module hasn't been loaded yet, we need to load it
    478   const char* modptr = base + fileOffset;
    479   ArchiveMember* mbr = parseMemberHeader(modptr, mapfile->getBufferEnd(),
    480                                          ErrMsg);
    481   if (!mbr)
    482     return 0;
    483 
    484   // Now, load the bitcode module to get the Module.
    485   std::string FullMemberName = archPath.str() + "(" +
    486     mbr->getPath().str() + ")";
    487   MemoryBuffer *Buffer =
    488     MemoryBuffer::getMemBufferCopy(StringRef(mbr->getData(), mbr->getSize()),
    489                                    FullMemberName.c_str());
    490 
    491   Module *m = getLazyBitcodeModule(Buffer, Context, ErrMsg);
    492   if (!m)
    493     return 0;
    494 
    495   modules.insert(std::make_pair(fileOffset, std::make_pair(m, mbr)));
    496 
    497   return m;
    498 }
    499 
    500 // Look up multiple symbols in the symbol table and return a set of
    501 // Modules that define those symbols.
    502 bool
    503 Archive::findModulesDefiningSymbols(std::set<std::string>& symbols,
    504                                     SmallVectorImpl<Module*>& result,
    505                                     std::string* error) {
    506   if (!mapfile || !base) {
    507     if (error)
    508       *error = "Empty archive invalid for finding modules defining symbols";
    509     return false;
    510   }
    511 
    512   if (symTab.empty()) {
    513     // We don't have a symbol table, so we must build it now but lets also
    514     // make sure that we populate the modules table as we do this to ensure
    515     // that we don't load them twice when findModuleDefiningSymbol is called
    516     // below.
    517 
    518     // Get a pointer to the first file
    519     const char* At  = base + firstFileOffset;
    520     const char* End = mapfile->getBufferEnd();
    521 
    522     while ( At < End) {
    523       // Compute the offset to be put in the symbol table
    524       unsigned offset = At - base - firstFileOffset;
    525 
    526       // Parse the file's header
    527       ArchiveMember* mbr = parseMemberHeader(At, End, error);
    528       if (!mbr)
    529         return false;
    530 
    531       // If it contains symbols
    532       if (mbr->isBitcode()) {
    533         // Get the symbols
    534         std::vector<std::string> symbols;
    535         std::string FullMemberName = archPath.str() + "(" +
    536           mbr->getPath().str() + ")";
    537         Module* M =
    538           GetBitcodeSymbols(At, mbr->getSize(), FullMemberName, Context,
    539                             symbols, error);
    540 
    541         if (M) {
    542           // Insert the module's symbols into the symbol table
    543           for (std::vector<std::string>::iterator I = symbols.begin(),
    544                E=symbols.end(); I != E; ++I ) {
    545             symTab.insert(std::make_pair(*I, offset));
    546           }
    547           // Insert the Module and the ArchiveMember into the table of
    548           // modules.
    549           modules.insert(std::make_pair(offset, std::make_pair(M, mbr)));
    550         } else {
    551           if (error)
    552             *error = "Can't parse bitcode member: " +
    553               mbr->getPath().str() + ": " + *error;
    554           delete mbr;
    555           return false;
    556         }
    557       }
    558 
    559       // Go to the next file location
    560       At += mbr->getSize();
    561       if ((intptr_t(At) & 1) == 1)
    562         At++;
    563     }
    564   }
    565 
    566   // At this point we have a valid symbol table (one way or another) so we
    567   // just use it to quickly find the symbols requested.
    568 
    569   SmallPtrSet<Module*, 16> Added;
    570   for (std::set<std::string>::iterator I=symbols.begin(),
    571          Next = I,
    572          E=symbols.end(); I != E; I = Next) {
    573     // Increment Next before we invalidate it.
    574     ++Next;
    575 
    576     // See if this symbol exists
    577     Module* m = findModuleDefiningSymbol(*I,error);
    578     if (!m)
    579       continue;
    580     bool NewMember = Added.insert(m);
    581     if (!NewMember)
    582       continue;
    583 
    584     // The symbol exists, insert the Module into our result.
    585     result.push_back(m);
    586 
    587     // Remove the symbol now that its been resolved.
    588     symbols.erase(I);
    589   }
    590   return true;
    591 }
    592 
    593 bool Archive::isBitcodeArchive() {
    594   // Make sure the symTab has been loaded. In most cases this should have been
    595   // done when the archive was constructed, but still,  this is just in case.
    596   if (symTab.empty())
    597     if (!loadSymbolTable(0))
    598       return false;
    599 
    600   // Now that we know it's been loaded, return true
    601   // if it has a size
    602   if (symTab.size()) return true;
    603 
    604   // We still can't be sure it isn't a bitcode archive
    605   if (!loadArchive(0))
    606     return false;
    607 
    608   std::vector<Module *> Modules;
    609   std::string ErrorMessage;
    610 
    611   // Scan the archive, trying to load a bitcode member.  We only load one to
    612   // see if this works.
    613   for (iterator I = begin(), E = end(); I != E; ++I) {
    614     if (!I->isBitcode())
    615       continue;
    616 
    617     std::string FullMemberName =
    618       archPath.str() + "(" + I->getPath().str() + ")";
    619 
    620     MemoryBuffer *Buffer =
    621       MemoryBuffer::getMemBufferCopy(StringRef(I->getData(), I->getSize()),
    622                                      FullMemberName.c_str());
    623     Module *M = ParseBitcodeFile(Buffer, Context);
    624     delete Buffer;
    625     if (!M)
    626       return false;  // Couldn't parse bitcode, not a bitcode archive.
    627     delete M;
    628     return true;
    629   }
    630 
    631   return false;
    632 }
    633