Home | History | Annotate | Download | only in Object
      1 //===- Archive.cpp - ar File Format implementation --------------*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file defines the ArchiveObjectFile class.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "llvm/Object/Archive.h"
     15 #include "llvm/ADT/APInt.h"
     16 #include "llvm/ADT/SmallString.h"
     17 #include "llvm/ADT/Twine.h"
     18 #include "llvm/Support/Endian.h"
     19 #include "llvm/Support/MemoryBuffer.h"
     20 
     21 using namespace llvm;
     22 using namespace object;
     23 using namespace llvm::support::endian;
     24 
     25 static const char *const Magic = "!<arch>\n";
     26 static const char *const ThinMagic = "!<thin>\n";
     27 
     28 void Archive::anchor() { }
     29 
     30 StringRef ArchiveMemberHeader::getName() const {
     31   char EndCond;
     32   if (Name[0] == '/' || Name[0] == '#')
     33     EndCond = ' ';
     34   else
     35     EndCond = '/';
     36   llvm::StringRef::size_type end =
     37       llvm::StringRef(Name, sizeof(Name)).find(EndCond);
     38   if (end == llvm::StringRef::npos)
     39     end = sizeof(Name);
     40   assert(end <= sizeof(Name) && end > 0);
     41   // Don't include the EndCond if there is one.
     42   return llvm::StringRef(Name, end);
     43 }
     44 
     45 uint32_t ArchiveMemberHeader::getSize() const {
     46   uint32_t Ret;
     47   if (llvm::StringRef(Size, sizeof(Size)).rtrim(" ").getAsInteger(10, Ret))
     48     llvm_unreachable("Size is not a decimal number.");
     49   return Ret;
     50 }
     51 
     52 sys::fs::perms ArchiveMemberHeader::getAccessMode() const {
     53   unsigned Ret;
     54   if (StringRef(AccessMode, sizeof(AccessMode)).rtrim(" ").getAsInteger(8, Ret))
     55     llvm_unreachable("Access mode is not an octal number.");
     56   return static_cast<sys::fs::perms>(Ret);
     57 }
     58 
     59 sys::TimeValue ArchiveMemberHeader::getLastModified() const {
     60   unsigned Seconds;
     61   if (StringRef(LastModified, sizeof(LastModified)).rtrim(" ")
     62           .getAsInteger(10, Seconds))
     63     llvm_unreachable("Last modified time not a decimal number.");
     64 
     65   sys::TimeValue Ret;
     66   Ret.fromEpochTime(Seconds);
     67   return Ret;
     68 }
     69 
     70 unsigned ArchiveMemberHeader::getUID() const {
     71   unsigned Ret;
     72   if (StringRef(UID, sizeof(UID)).rtrim(" ").getAsInteger(10, Ret))
     73     llvm_unreachable("UID time not a decimal number.");
     74   return Ret;
     75 }
     76 
     77 unsigned ArchiveMemberHeader::getGID() const {
     78   unsigned Ret;
     79   if (StringRef(GID, sizeof(GID)).rtrim(" ").getAsInteger(10, Ret))
     80     llvm_unreachable("GID time not a decimal number.");
     81   return Ret;
     82 }
     83 
     84 Archive::Child::Child(const Archive *Parent, const char *Start)
     85     : Parent(Parent) {
     86   if (!Start)
     87     return;
     88 
     89   const ArchiveMemberHeader *Header =
     90       reinterpret_cast<const ArchiveMemberHeader *>(Start);
     91   uint64_t Size = sizeof(ArchiveMemberHeader);
     92   if (!Parent->IsThin || Header->getName() == "/" || Header->getName() == "//")
     93     Size += Header->getSize();
     94   Data = StringRef(Start, Size);
     95 
     96   // Setup StartOfFile and PaddingBytes.
     97   StartOfFile = sizeof(ArchiveMemberHeader);
     98   // Don't include attached name.
     99   StringRef Name = Header->getName();
    100   if (Name.startswith("#1/")) {
    101     uint64_t NameSize;
    102     if (Name.substr(3).rtrim(" ").getAsInteger(10, NameSize))
    103       llvm_unreachable("Long name length is not an integer");
    104     StartOfFile += NameSize;
    105   }
    106 }
    107 
    108 uint64_t Archive::Child::getSize() const {
    109   if (Parent->IsThin)
    110     return getHeader()->getSize();
    111   return Data.size() - StartOfFile;
    112 }
    113 
    114 uint64_t Archive::Child::getRawSize() const {
    115   return getHeader()->getSize();
    116 }
    117 
    118 Archive::Child Archive::Child::getNext() const {
    119   size_t SpaceToSkip = Data.size();
    120   // If it's odd, add 1 to make it even.
    121   if (SpaceToSkip & 1)
    122     ++SpaceToSkip;
    123 
    124   const char *NextLoc = Data.data() + SpaceToSkip;
    125 
    126   // Check to see if this is past the end of the archive.
    127   if (NextLoc >= Parent->Data.getBufferEnd())
    128     return Child(Parent, nullptr);
    129 
    130   return Child(Parent, NextLoc);
    131 }
    132 
    133 uint64_t Archive::Child::getChildOffset() const {
    134   const char *a = Parent->Data.getBuffer().data();
    135   const char *c = Data.data();
    136   uint64_t offset = c - a;
    137   return offset;
    138 }
    139 
    140 ErrorOr<StringRef> Archive::Child::getName() const {
    141   StringRef name = getRawName();
    142   // Check if it's a special name.
    143   if (name[0] == '/') {
    144     if (name.size() == 1) // Linker member.
    145       return name;
    146     if (name.size() == 2 && name[1] == '/') // String table.
    147       return name;
    148     // It's a long name.
    149     // Get the offset.
    150     std::size_t offset;
    151     if (name.substr(1).rtrim(" ").getAsInteger(10, offset))
    152       llvm_unreachable("Long name offset is not an integer");
    153     const char *addr = Parent->StringTable->Data.begin()
    154                        + sizeof(ArchiveMemberHeader)
    155                        + offset;
    156     // Verify it.
    157     if (Parent->StringTable == Parent->child_end()
    158         || addr < (Parent->StringTable->Data.begin()
    159                    + sizeof(ArchiveMemberHeader))
    160         || addr > (Parent->StringTable->Data.begin()
    161                    + sizeof(ArchiveMemberHeader)
    162                    + Parent->StringTable->getSize()))
    163       return object_error::parse_failed;
    164 
    165     // GNU long file names end with a /.
    166     if (Parent->kind() == K_GNU || Parent->kind() == K_MIPS64) {
    167       StringRef::size_type End = StringRef(addr).find('/');
    168       return StringRef(addr, End);
    169     }
    170     return StringRef(addr);
    171   } else if (name.startswith("#1/")) {
    172     uint64_t name_size;
    173     if (name.substr(3).rtrim(" ").getAsInteger(10, name_size))
    174       llvm_unreachable("Long name length is not an ingeter");
    175     return Data.substr(sizeof(ArchiveMemberHeader), name_size)
    176         .rtrim(StringRef("\0", 1));
    177   }
    178   // It's a simple name.
    179   if (name[name.size() - 1] == '/')
    180     return name.substr(0, name.size() - 1);
    181   return name;
    182 }
    183 
    184 ErrorOr<MemoryBufferRef> Archive::Child::getMemoryBufferRef() const {
    185   ErrorOr<StringRef> NameOrErr = getName();
    186   if (std::error_code EC = NameOrErr.getError())
    187     return EC;
    188   StringRef Name = NameOrErr.get();
    189   return MemoryBufferRef(getBuffer(), Name);
    190 }
    191 
    192 ErrorOr<std::unique_ptr<Binary>>
    193 Archive::Child::getAsBinary(LLVMContext *Context) const {
    194   ErrorOr<MemoryBufferRef> BuffOrErr = getMemoryBufferRef();
    195   if (std::error_code EC = BuffOrErr.getError())
    196     return EC;
    197 
    198   return createBinary(BuffOrErr.get(), Context);
    199 }
    200 
    201 ErrorOr<std::unique_ptr<Archive>> Archive::create(MemoryBufferRef Source) {
    202   std::error_code EC;
    203   std::unique_ptr<Archive> Ret(new Archive(Source, EC));
    204   if (EC)
    205     return EC;
    206   return std::move(Ret);
    207 }
    208 
    209 Archive::Archive(MemoryBufferRef Source, std::error_code &ec)
    210     : Binary(Binary::ID_Archive, Source), SymbolTable(child_end()) {
    211   StringRef Buffer = Data.getBuffer();
    212   // Check for sufficient magic.
    213   if (Buffer.startswith(ThinMagic)) {
    214     IsThin = true;
    215   } else if (Buffer.startswith(Magic)) {
    216     IsThin = false;
    217   } else {
    218     ec = object_error::invalid_file_type;
    219     return;
    220   }
    221 
    222   // Get the special members.
    223   child_iterator i = child_begin(false);
    224   child_iterator e = child_end();
    225 
    226   if (i == e) {
    227     ec = object_error::success;
    228     return;
    229   }
    230 
    231   StringRef Name = i->getRawName();
    232 
    233   // Below is the pattern that is used to figure out the archive format
    234   // GNU archive format
    235   //  First member : / (may exist, if it exists, points to the symbol table )
    236   //  Second member : // (may exist, if it exists, points to the string table)
    237   //  Note : The string table is used if the filename exceeds 15 characters
    238   // BSD archive format
    239   //  First member : __.SYMDEF or "__.SYMDEF SORTED" (the symbol table)
    240   //  There is no string table, if the filename exceeds 15 characters or has a
    241   //  embedded space, the filename has #1/<size>, The size represents the size
    242   //  of the filename that needs to be read after the archive header
    243   // COFF archive format
    244   //  First member : /
    245   //  Second member : / (provides a directory of symbols)
    246   //  Third member : // (may exist, if it exists, contains the string table)
    247   //  Note: Microsoft PE/COFF Spec 8.3 says that the third member is present
    248   //  even if the string table is empty. However, lib.exe does not in fact
    249   //  seem to create the third member if there's no member whose filename
    250   //  exceeds 15 characters. So the third member is optional.
    251 
    252   if (Name == "__.SYMDEF") {
    253     Format = K_BSD;
    254     SymbolTable = i;
    255     ++i;
    256     FirstRegular = i;
    257     ec = object_error::success;
    258     return;
    259   }
    260 
    261   if (Name.startswith("#1/")) {
    262     Format = K_BSD;
    263     // We know this is BSD, so getName will work since there is no string table.
    264     ErrorOr<StringRef> NameOrErr = i->getName();
    265     ec = NameOrErr.getError();
    266     if (ec)
    267       return;
    268     Name = NameOrErr.get();
    269     if (Name == "__.SYMDEF SORTED" || Name == "__.SYMDEF") {
    270       SymbolTable = i;
    271       ++i;
    272     }
    273     FirstRegular = i;
    274     return;
    275   }
    276 
    277   // MIPS 64-bit ELF archives use a special format of a symbol table.
    278   // This format is marked by `ar_name` field equals to "/SYM64/".
    279   // For detailed description see page 96 in the following document:
    280   // http://techpubs.sgi.com/library/manuals/4000/007-4658-001/pdf/007-4658-001.pdf
    281 
    282   bool has64SymTable = false;
    283   if (Name == "/" || Name == "/SYM64/") {
    284     SymbolTable = i;
    285     if (Name == "/SYM64/")
    286       has64SymTable = true;
    287 
    288     ++i;
    289     if (i == e) {
    290       ec = object_error::parse_failed;
    291       return;
    292     }
    293     Name = i->getRawName();
    294   }
    295 
    296   if (Name == "//") {
    297     Format = has64SymTable ? K_MIPS64 : K_GNU;
    298     StringTable = i;
    299     ++i;
    300     FirstRegular = i;
    301     ec = object_error::success;
    302     return;
    303   }
    304 
    305   if (Name[0] != '/') {
    306     Format = has64SymTable ? K_MIPS64 : K_GNU;
    307     FirstRegular = i;
    308     ec = object_error::success;
    309     return;
    310   }
    311 
    312   if (Name != "/") {
    313     ec = object_error::parse_failed;
    314     return;
    315   }
    316 
    317   Format = K_COFF;
    318   SymbolTable = i;
    319 
    320   ++i;
    321   if (i == e) {
    322     FirstRegular = i;
    323     ec = object_error::success;
    324     return;
    325   }
    326 
    327   Name = i->getRawName();
    328 
    329   if (Name == "//") {
    330     StringTable = i;
    331     ++i;
    332   }
    333 
    334   FirstRegular = i;
    335   ec = object_error::success;
    336 }
    337 
    338 Archive::child_iterator Archive::child_begin(bool SkipInternal) const {
    339   if (Data.getBufferSize() == 8) // empty archive.
    340     return child_end();
    341 
    342   if (SkipInternal)
    343     return FirstRegular;
    344 
    345   const char *Loc = Data.getBufferStart() + strlen(Magic);
    346   Child c(this, Loc);
    347   return c;
    348 }
    349 
    350 Archive::child_iterator Archive::child_end() const {
    351   return Child(this, nullptr);
    352 }
    353 
    354 StringRef Archive::Symbol::getName() const {
    355   return Parent->SymbolTable->getBuffer().begin() + StringIndex;
    356 }
    357 
    358 ErrorOr<Archive::child_iterator> Archive::Symbol::getMember() const {
    359   const char *Buf = Parent->SymbolTable->getBuffer().begin();
    360   const char *Offsets = Buf;
    361   if (Parent->kind() == K_MIPS64)
    362     Offsets += sizeof(uint64_t);
    363   else
    364     Offsets += sizeof(uint32_t);
    365   uint32_t Offset = 0;
    366   if (Parent->kind() == K_GNU) {
    367     Offset = read32be(Offsets + SymbolIndex * 4);
    368   } else if (Parent->kind() == K_MIPS64) {
    369     Offset = read64be(Offsets + SymbolIndex * 8);
    370   } else if (Parent->kind() == K_BSD) {
    371     // The SymbolIndex is an index into the ranlib structs that start at
    372     // Offsets (the first uint32_t is the number of bytes of the ranlib
    373     // structs).  The ranlib structs are a pair of uint32_t's the first
    374     // being a string table offset and the second being the offset into
    375     // the archive of the member that defines the symbol.  Which is what
    376     // is needed here.
    377     Offset = read32le(Offsets + SymbolIndex * 8 + 4);
    378   } else {
    379     // Skip offsets.
    380     uint32_t MemberCount = read32le(Buf);
    381     Buf += MemberCount * 4 + 4;
    382 
    383     uint32_t SymbolCount = read32le(Buf);
    384     if (SymbolIndex >= SymbolCount)
    385       return object_error::parse_failed;
    386 
    387     // Skip SymbolCount to get to the indices table.
    388     const char *Indices = Buf + 4;
    389 
    390     // Get the index of the offset in the file member offset table for this
    391     // symbol.
    392     uint16_t OffsetIndex = read16le(Indices + SymbolIndex * 2);
    393     // Subtract 1 since OffsetIndex is 1 based.
    394     --OffsetIndex;
    395 
    396     if (OffsetIndex >= MemberCount)
    397       return object_error::parse_failed;
    398 
    399     Offset = read32le(Offsets + OffsetIndex * 4);
    400   }
    401 
    402   const char *Loc = Parent->getData().begin() + Offset;
    403   child_iterator Iter(Child(Parent, Loc));
    404   return Iter;
    405 }
    406 
    407 Archive::Symbol Archive::Symbol::getNext() const {
    408   Symbol t(*this);
    409   if (Parent->kind() == K_BSD) {
    410     // t.StringIndex is an offset from the start of the __.SYMDEF or
    411     // "__.SYMDEF SORTED" member into the string table for the ranlib
    412     // struct indexed by t.SymbolIndex .  To change t.StringIndex to the
    413     // offset in the string table for t.SymbolIndex+1 we subtract the
    414     // its offset from the start of the string table for t.SymbolIndex
    415     // and add the offset of the string table for t.SymbolIndex+1.
    416 
    417     // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t
    418     // which is the number of bytes of ranlib structs that follow.  The ranlib
    419     // structs are a pair of uint32_t's the first being a string table offset
    420     // and the second being the offset into the archive of the member that
    421     // define the symbol. After that the next uint32_t is the byte count of
    422     // the string table followed by the string table.
    423     const char *Buf = Parent->SymbolTable->getBuffer().begin();
    424     uint32_t RanlibCount = 0;
    425     RanlibCount = read32le(Buf) / 8;
    426     // If t.SymbolIndex + 1 will be past the count of symbols (the RanlibCount)
    427     // don't change the t.StringIndex as we don't want to reference a ranlib
    428     // past RanlibCount.
    429     if (t.SymbolIndex + 1 < RanlibCount) {
    430       const char *Ranlibs = Buf + 4;
    431       uint32_t CurRanStrx = 0;
    432       uint32_t NextRanStrx = 0;
    433       CurRanStrx = read32le(Ranlibs + t.SymbolIndex * 8);
    434       NextRanStrx = read32le(Ranlibs + (t.SymbolIndex + 1) * 8);
    435       t.StringIndex -= CurRanStrx;
    436       t.StringIndex += NextRanStrx;
    437     }
    438   } else {
    439     // Go to one past next null.
    440     t.StringIndex =
    441         Parent->SymbolTable->getBuffer().find('\0', t.StringIndex) + 1;
    442   }
    443   ++t.SymbolIndex;
    444   return t;
    445 }
    446 
    447 Archive::symbol_iterator Archive::symbol_begin() const {
    448   if (!hasSymbolTable())
    449     return symbol_iterator(Symbol(this, 0, 0));
    450 
    451   const char *buf = SymbolTable->getBuffer().begin();
    452   if (kind() == K_GNU) {
    453     uint32_t symbol_count = 0;
    454     symbol_count = read32be(buf);
    455     buf += sizeof(uint32_t) + (symbol_count * (sizeof(uint32_t)));
    456   } else if (kind() == K_MIPS64) {
    457     uint64_t symbol_count = read64be(buf);
    458     buf += sizeof(uint64_t) + (symbol_count * (sizeof(uint64_t)));
    459   } else if (kind() == K_BSD) {
    460     // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t
    461     // which is the number of bytes of ranlib structs that follow.  The ranlib
    462     // structs are a pair of uint32_t's the first being a string table offset
    463     // and the second being the offset into the archive of the member that
    464     // define the symbol. After that the next uint32_t is the byte count of
    465     // the string table followed by the string table.
    466     uint32_t ranlib_count = 0;
    467     ranlib_count = read32le(buf) / 8;
    468     const char *ranlibs = buf + 4;
    469     uint32_t ran_strx = 0;
    470     ran_strx = read32le(ranlibs);
    471     buf += sizeof(uint32_t) + (ranlib_count * (2 * (sizeof(uint32_t))));
    472     // Skip the byte count of the string table.
    473     buf += sizeof(uint32_t);
    474     buf += ran_strx;
    475   } else {
    476     uint32_t member_count = 0;
    477     uint32_t symbol_count = 0;
    478     member_count = read32le(buf);
    479     buf += 4 + (member_count * 4); // Skip offsets.
    480     symbol_count = read32le(buf);
    481     buf += 4 + (symbol_count * 2); // Skip indices.
    482   }
    483   uint32_t string_start_offset = buf - SymbolTable->getBuffer().begin();
    484   return symbol_iterator(Symbol(this, 0, string_start_offset));
    485 }
    486 
    487 Archive::symbol_iterator Archive::symbol_end() const {
    488   if (!hasSymbolTable())
    489     return symbol_iterator(Symbol(this, 0, 0));
    490 
    491   const char *buf = SymbolTable->getBuffer().begin();
    492   uint32_t symbol_count = 0;
    493   if (kind() == K_GNU) {
    494     symbol_count = read32be(buf);
    495   } else if (kind() == K_MIPS64) {
    496     symbol_count = read64be(buf);
    497   } else if (kind() == K_BSD) {
    498     symbol_count = read32le(buf) / 8;
    499   } else {
    500     uint32_t member_count = 0;
    501     member_count = read32le(buf);
    502     buf += 4 + (member_count * 4); // Skip offsets.
    503     symbol_count = read32le(buf);
    504   }
    505   return symbol_iterator(Symbol(this, symbol_count, 0));
    506 }
    507 
    508 Archive::child_iterator Archive::findSym(StringRef name) const {
    509   Archive::symbol_iterator bs = symbol_begin();
    510   Archive::symbol_iterator es = symbol_end();
    511 
    512   for (; bs != es; ++bs) {
    513     StringRef SymName = bs->getName();
    514     if (SymName == name) {
    515       ErrorOr<Archive::child_iterator> ResultOrErr = bs->getMember();
    516       // FIXME: Should we really eat the error?
    517       if (ResultOrErr.getError())
    518         return child_end();
    519       return ResultOrErr.get();
    520     }
    521   }
    522   return child_end();
    523 }
    524 
    525 bool Archive::hasSymbolTable() const {
    526   return SymbolTable != child_end();
    527 }
    528