1 //===- Archive.cpp - ar File Format implementation --------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the ArchiveObjectFile class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Object/Archive.h" 15 #include "llvm/ADT/APInt.h" 16 #include "llvm/ADT/SmallString.h" 17 #include "llvm/ADT/Twine.h" 18 #include "llvm/Support/Endian.h" 19 #include "llvm/Support/MemoryBuffer.h" 20 21 using namespace llvm; 22 using namespace object; 23 using namespace llvm::support::endian; 24 25 static const char *const Magic = "!<arch>\n"; 26 static const char *const ThinMagic = "!<thin>\n"; 27 28 void Archive::anchor() { } 29 30 StringRef ArchiveMemberHeader::getName() const { 31 char EndCond; 32 if (Name[0] == '/' || Name[0] == '#') 33 EndCond = ' '; 34 else 35 EndCond = '/'; 36 llvm::StringRef::size_type end = 37 llvm::StringRef(Name, sizeof(Name)).find(EndCond); 38 if (end == llvm::StringRef::npos) 39 end = sizeof(Name); 40 assert(end <= sizeof(Name) && end > 0); 41 // Don't include the EndCond if there is one. 42 return llvm::StringRef(Name, end); 43 } 44 45 uint32_t ArchiveMemberHeader::getSize() const { 46 uint32_t Ret; 47 if (llvm::StringRef(Size, sizeof(Size)).rtrim(" ").getAsInteger(10, Ret)) 48 llvm_unreachable("Size is not a decimal number."); 49 return Ret; 50 } 51 52 sys::fs::perms ArchiveMemberHeader::getAccessMode() const { 53 unsigned Ret; 54 if (StringRef(AccessMode, sizeof(AccessMode)).rtrim(" ").getAsInteger(8, Ret)) 55 llvm_unreachable("Access mode is not an octal number."); 56 return static_cast<sys::fs::perms>(Ret); 57 } 58 59 sys::TimeValue ArchiveMemberHeader::getLastModified() const { 60 unsigned Seconds; 61 if (StringRef(LastModified, sizeof(LastModified)).rtrim(" ") 62 .getAsInteger(10, Seconds)) 63 llvm_unreachable("Last modified time not a decimal number."); 64 65 sys::TimeValue Ret; 66 Ret.fromEpochTime(Seconds); 67 return Ret; 68 } 69 70 unsigned ArchiveMemberHeader::getUID() const { 71 unsigned Ret; 72 if (StringRef(UID, sizeof(UID)).rtrim(" ").getAsInteger(10, Ret)) 73 llvm_unreachable("UID time not a decimal number."); 74 return Ret; 75 } 76 77 unsigned ArchiveMemberHeader::getGID() const { 78 unsigned Ret; 79 if (StringRef(GID, sizeof(GID)).rtrim(" ").getAsInteger(10, Ret)) 80 llvm_unreachable("GID time not a decimal number."); 81 return Ret; 82 } 83 84 Archive::Child::Child(const Archive *Parent, const char *Start) 85 : Parent(Parent) { 86 if (!Start) 87 return; 88 89 const ArchiveMemberHeader *Header = 90 reinterpret_cast<const ArchiveMemberHeader *>(Start); 91 uint64_t Size = sizeof(ArchiveMemberHeader); 92 if (!Parent->IsThin || Header->getName() == "/" || Header->getName() == "//") 93 Size += Header->getSize(); 94 Data = StringRef(Start, Size); 95 96 // Setup StartOfFile and PaddingBytes. 97 StartOfFile = sizeof(ArchiveMemberHeader); 98 // Don't include attached name. 99 StringRef Name = Header->getName(); 100 if (Name.startswith("#1/")) { 101 uint64_t NameSize; 102 if (Name.substr(3).rtrim(" ").getAsInteger(10, NameSize)) 103 llvm_unreachable("Long name length is not an integer"); 104 StartOfFile += NameSize; 105 } 106 } 107 108 uint64_t Archive::Child::getSize() const { 109 if (Parent->IsThin) 110 return getHeader()->getSize(); 111 return Data.size() - StartOfFile; 112 } 113 114 uint64_t Archive::Child::getRawSize() const { 115 return getHeader()->getSize(); 116 } 117 118 Archive::Child Archive::Child::getNext() const { 119 size_t SpaceToSkip = Data.size(); 120 // If it's odd, add 1 to make it even. 121 if (SpaceToSkip & 1) 122 ++SpaceToSkip; 123 124 const char *NextLoc = Data.data() + SpaceToSkip; 125 126 // Check to see if this is past the end of the archive. 127 if (NextLoc >= Parent->Data.getBufferEnd()) 128 return Child(Parent, nullptr); 129 130 return Child(Parent, NextLoc); 131 } 132 133 uint64_t Archive::Child::getChildOffset() const { 134 const char *a = Parent->Data.getBuffer().data(); 135 const char *c = Data.data(); 136 uint64_t offset = c - a; 137 return offset; 138 } 139 140 ErrorOr<StringRef> Archive::Child::getName() const { 141 StringRef name = getRawName(); 142 // Check if it's a special name. 143 if (name[0] == '/') { 144 if (name.size() == 1) // Linker member. 145 return name; 146 if (name.size() == 2 && name[1] == '/') // String table. 147 return name; 148 // It's a long name. 149 // Get the offset. 150 std::size_t offset; 151 if (name.substr(1).rtrim(" ").getAsInteger(10, offset)) 152 llvm_unreachable("Long name offset is not an integer"); 153 const char *addr = Parent->StringTable->Data.begin() 154 + sizeof(ArchiveMemberHeader) 155 + offset; 156 // Verify it. 157 if (Parent->StringTable == Parent->child_end() 158 || addr < (Parent->StringTable->Data.begin() 159 + sizeof(ArchiveMemberHeader)) 160 || addr > (Parent->StringTable->Data.begin() 161 + sizeof(ArchiveMemberHeader) 162 + Parent->StringTable->getSize())) 163 return object_error::parse_failed; 164 165 // GNU long file names end with a /. 166 if (Parent->kind() == K_GNU || Parent->kind() == K_MIPS64) { 167 StringRef::size_type End = StringRef(addr).find('/'); 168 return StringRef(addr, End); 169 } 170 return StringRef(addr); 171 } else if (name.startswith("#1/")) { 172 uint64_t name_size; 173 if (name.substr(3).rtrim(" ").getAsInteger(10, name_size)) 174 llvm_unreachable("Long name length is not an ingeter"); 175 return Data.substr(sizeof(ArchiveMemberHeader), name_size) 176 .rtrim(StringRef("\0", 1)); 177 } 178 // It's a simple name. 179 if (name[name.size() - 1] == '/') 180 return name.substr(0, name.size() - 1); 181 return name; 182 } 183 184 ErrorOr<MemoryBufferRef> Archive::Child::getMemoryBufferRef() const { 185 ErrorOr<StringRef> NameOrErr = getName(); 186 if (std::error_code EC = NameOrErr.getError()) 187 return EC; 188 StringRef Name = NameOrErr.get(); 189 return MemoryBufferRef(getBuffer(), Name); 190 } 191 192 ErrorOr<std::unique_ptr<Binary>> 193 Archive::Child::getAsBinary(LLVMContext *Context) const { 194 ErrorOr<MemoryBufferRef> BuffOrErr = getMemoryBufferRef(); 195 if (std::error_code EC = BuffOrErr.getError()) 196 return EC; 197 198 return createBinary(BuffOrErr.get(), Context); 199 } 200 201 ErrorOr<std::unique_ptr<Archive>> Archive::create(MemoryBufferRef Source) { 202 std::error_code EC; 203 std::unique_ptr<Archive> Ret(new Archive(Source, EC)); 204 if (EC) 205 return EC; 206 return std::move(Ret); 207 } 208 209 Archive::Archive(MemoryBufferRef Source, std::error_code &ec) 210 : Binary(Binary::ID_Archive, Source), SymbolTable(child_end()) { 211 StringRef Buffer = Data.getBuffer(); 212 // Check for sufficient magic. 213 if (Buffer.startswith(ThinMagic)) { 214 IsThin = true; 215 } else if (Buffer.startswith(Magic)) { 216 IsThin = false; 217 } else { 218 ec = object_error::invalid_file_type; 219 return; 220 } 221 222 // Get the special members. 223 child_iterator i = child_begin(false); 224 child_iterator e = child_end(); 225 226 if (i == e) { 227 ec = object_error::success; 228 return; 229 } 230 231 StringRef Name = i->getRawName(); 232 233 // Below is the pattern that is used to figure out the archive format 234 // GNU archive format 235 // First member : / (may exist, if it exists, points to the symbol table ) 236 // Second member : // (may exist, if it exists, points to the string table) 237 // Note : The string table is used if the filename exceeds 15 characters 238 // BSD archive format 239 // First member : __.SYMDEF or "__.SYMDEF SORTED" (the symbol table) 240 // There is no string table, if the filename exceeds 15 characters or has a 241 // embedded space, the filename has #1/<size>, The size represents the size 242 // of the filename that needs to be read after the archive header 243 // COFF archive format 244 // First member : / 245 // Second member : / (provides a directory of symbols) 246 // Third member : // (may exist, if it exists, contains the string table) 247 // Note: Microsoft PE/COFF Spec 8.3 says that the third member is present 248 // even if the string table is empty. However, lib.exe does not in fact 249 // seem to create the third member if there's no member whose filename 250 // exceeds 15 characters. So the third member is optional. 251 252 if (Name == "__.SYMDEF") { 253 Format = K_BSD; 254 SymbolTable = i; 255 ++i; 256 FirstRegular = i; 257 ec = object_error::success; 258 return; 259 } 260 261 if (Name.startswith("#1/")) { 262 Format = K_BSD; 263 // We know this is BSD, so getName will work since there is no string table. 264 ErrorOr<StringRef> NameOrErr = i->getName(); 265 ec = NameOrErr.getError(); 266 if (ec) 267 return; 268 Name = NameOrErr.get(); 269 if (Name == "__.SYMDEF SORTED" || Name == "__.SYMDEF") { 270 SymbolTable = i; 271 ++i; 272 } 273 FirstRegular = i; 274 return; 275 } 276 277 // MIPS 64-bit ELF archives use a special format of a symbol table. 278 // This format is marked by `ar_name` field equals to "/SYM64/". 279 // For detailed description see page 96 in the following document: 280 // http://techpubs.sgi.com/library/manuals/4000/007-4658-001/pdf/007-4658-001.pdf 281 282 bool has64SymTable = false; 283 if (Name == "/" || Name == "/SYM64/") { 284 SymbolTable = i; 285 if (Name == "/SYM64/") 286 has64SymTable = true; 287 288 ++i; 289 if (i == e) { 290 ec = object_error::parse_failed; 291 return; 292 } 293 Name = i->getRawName(); 294 } 295 296 if (Name == "//") { 297 Format = has64SymTable ? K_MIPS64 : K_GNU; 298 StringTable = i; 299 ++i; 300 FirstRegular = i; 301 ec = object_error::success; 302 return; 303 } 304 305 if (Name[0] != '/') { 306 Format = has64SymTable ? K_MIPS64 : K_GNU; 307 FirstRegular = i; 308 ec = object_error::success; 309 return; 310 } 311 312 if (Name != "/") { 313 ec = object_error::parse_failed; 314 return; 315 } 316 317 Format = K_COFF; 318 SymbolTable = i; 319 320 ++i; 321 if (i == e) { 322 FirstRegular = i; 323 ec = object_error::success; 324 return; 325 } 326 327 Name = i->getRawName(); 328 329 if (Name == "//") { 330 StringTable = i; 331 ++i; 332 } 333 334 FirstRegular = i; 335 ec = object_error::success; 336 } 337 338 Archive::child_iterator Archive::child_begin(bool SkipInternal) const { 339 if (Data.getBufferSize() == 8) // empty archive. 340 return child_end(); 341 342 if (SkipInternal) 343 return FirstRegular; 344 345 const char *Loc = Data.getBufferStart() + strlen(Magic); 346 Child c(this, Loc); 347 return c; 348 } 349 350 Archive::child_iterator Archive::child_end() const { 351 return Child(this, nullptr); 352 } 353 354 StringRef Archive::Symbol::getName() const { 355 return Parent->SymbolTable->getBuffer().begin() + StringIndex; 356 } 357 358 ErrorOr<Archive::child_iterator> Archive::Symbol::getMember() const { 359 const char *Buf = Parent->SymbolTable->getBuffer().begin(); 360 const char *Offsets = Buf; 361 if (Parent->kind() == K_MIPS64) 362 Offsets += sizeof(uint64_t); 363 else 364 Offsets += sizeof(uint32_t); 365 uint32_t Offset = 0; 366 if (Parent->kind() == K_GNU) { 367 Offset = read32be(Offsets + SymbolIndex * 4); 368 } else if (Parent->kind() == K_MIPS64) { 369 Offset = read64be(Offsets + SymbolIndex * 8); 370 } else if (Parent->kind() == K_BSD) { 371 // The SymbolIndex is an index into the ranlib structs that start at 372 // Offsets (the first uint32_t is the number of bytes of the ranlib 373 // structs). The ranlib structs are a pair of uint32_t's the first 374 // being a string table offset and the second being the offset into 375 // the archive of the member that defines the symbol. Which is what 376 // is needed here. 377 Offset = read32le(Offsets + SymbolIndex * 8 + 4); 378 } else { 379 // Skip offsets. 380 uint32_t MemberCount = read32le(Buf); 381 Buf += MemberCount * 4 + 4; 382 383 uint32_t SymbolCount = read32le(Buf); 384 if (SymbolIndex >= SymbolCount) 385 return object_error::parse_failed; 386 387 // Skip SymbolCount to get to the indices table. 388 const char *Indices = Buf + 4; 389 390 // Get the index of the offset in the file member offset table for this 391 // symbol. 392 uint16_t OffsetIndex = read16le(Indices + SymbolIndex * 2); 393 // Subtract 1 since OffsetIndex is 1 based. 394 --OffsetIndex; 395 396 if (OffsetIndex >= MemberCount) 397 return object_error::parse_failed; 398 399 Offset = read32le(Offsets + OffsetIndex * 4); 400 } 401 402 const char *Loc = Parent->getData().begin() + Offset; 403 child_iterator Iter(Child(Parent, Loc)); 404 return Iter; 405 } 406 407 Archive::Symbol Archive::Symbol::getNext() const { 408 Symbol t(*this); 409 if (Parent->kind() == K_BSD) { 410 // t.StringIndex is an offset from the start of the __.SYMDEF or 411 // "__.SYMDEF SORTED" member into the string table for the ranlib 412 // struct indexed by t.SymbolIndex . To change t.StringIndex to the 413 // offset in the string table for t.SymbolIndex+1 we subtract the 414 // its offset from the start of the string table for t.SymbolIndex 415 // and add the offset of the string table for t.SymbolIndex+1. 416 417 // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t 418 // which is the number of bytes of ranlib structs that follow. The ranlib 419 // structs are a pair of uint32_t's the first being a string table offset 420 // and the second being the offset into the archive of the member that 421 // define the symbol. After that the next uint32_t is the byte count of 422 // the string table followed by the string table. 423 const char *Buf = Parent->SymbolTable->getBuffer().begin(); 424 uint32_t RanlibCount = 0; 425 RanlibCount = read32le(Buf) / 8; 426 // If t.SymbolIndex + 1 will be past the count of symbols (the RanlibCount) 427 // don't change the t.StringIndex as we don't want to reference a ranlib 428 // past RanlibCount. 429 if (t.SymbolIndex + 1 < RanlibCount) { 430 const char *Ranlibs = Buf + 4; 431 uint32_t CurRanStrx = 0; 432 uint32_t NextRanStrx = 0; 433 CurRanStrx = read32le(Ranlibs + t.SymbolIndex * 8); 434 NextRanStrx = read32le(Ranlibs + (t.SymbolIndex + 1) * 8); 435 t.StringIndex -= CurRanStrx; 436 t.StringIndex += NextRanStrx; 437 } 438 } else { 439 // Go to one past next null. 440 t.StringIndex = 441 Parent->SymbolTable->getBuffer().find('\0', t.StringIndex) + 1; 442 } 443 ++t.SymbolIndex; 444 return t; 445 } 446 447 Archive::symbol_iterator Archive::symbol_begin() const { 448 if (!hasSymbolTable()) 449 return symbol_iterator(Symbol(this, 0, 0)); 450 451 const char *buf = SymbolTable->getBuffer().begin(); 452 if (kind() == K_GNU) { 453 uint32_t symbol_count = 0; 454 symbol_count = read32be(buf); 455 buf += sizeof(uint32_t) + (symbol_count * (sizeof(uint32_t))); 456 } else if (kind() == K_MIPS64) { 457 uint64_t symbol_count = read64be(buf); 458 buf += sizeof(uint64_t) + (symbol_count * (sizeof(uint64_t))); 459 } else if (kind() == K_BSD) { 460 // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t 461 // which is the number of bytes of ranlib structs that follow. The ranlib 462 // structs are a pair of uint32_t's the first being a string table offset 463 // and the second being the offset into the archive of the member that 464 // define the symbol. After that the next uint32_t is the byte count of 465 // the string table followed by the string table. 466 uint32_t ranlib_count = 0; 467 ranlib_count = read32le(buf) / 8; 468 const char *ranlibs = buf + 4; 469 uint32_t ran_strx = 0; 470 ran_strx = read32le(ranlibs); 471 buf += sizeof(uint32_t) + (ranlib_count * (2 * (sizeof(uint32_t)))); 472 // Skip the byte count of the string table. 473 buf += sizeof(uint32_t); 474 buf += ran_strx; 475 } else { 476 uint32_t member_count = 0; 477 uint32_t symbol_count = 0; 478 member_count = read32le(buf); 479 buf += 4 + (member_count * 4); // Skip offsets. 480 symbol_count = read32le(buf); 481 buf += 4 + (symbol_count * 2); // Skip indices. 482 } 483 uint32_t string_start_offset = buf - SymbolTable->getBuffer().begin(); 484 return symbol_iterator(Symbol(this, 0, string_start_offset)); 485 } 486 487 Archive::symbol_iterator Archive::symbol_end() const { 488 if (!hasSymbolTable()) 489 return symbol_iterator(Symbol(this, 0, 0)); 490 491 const char *buf = SymbolTable->getBuffer().begin(); 492 uint32_t symbol_count = 0; 493 if (kind() == K_GNU) { 494 symbol_count = read32be(buf); 495 } else if (kind() == K_MIPS64) { 496 symbol_count = read64be(buf); 497 } else if (kind() == K_BSD) { 498 symbol_count = read32le(buf) / 8; 499 } else { 500 uint32_t member_count = 0; 501 member_count = read32le(buf); 502 buf += 4 + (member_count * 4); // Skip offsets. 503 symbol_count = read32le(buf); 504 } 505 return symbol_iterator(Symbol(this, symbol_count, 0)); 506 } 507 508 Archive::child_iterator Archive::findSym(StringRef name) const { 509 Archive::symbol_iterator bs = symbol_begin(); 510 Archive::symbol_iterator es = symbol_end(); 511 512 for (; bs != es; ++bs) { 513 StringRef SymName = bs->getName(); 514 if (SymName == name) { 515 ErrorOr<Archive::child_iterator> ResultOrErr = bs->getMember(); 516 // FIXME: Should we really eat the error? 517 if (ResultOrErr.getError()) 518 return child_end(); 519 return ResultOrErr.get(); 520 } 521 } 522 return child_end(); 523 } 524 525 bool Archive::hasSymbolTable() const { 526 return SymbolTable != child_end(); 527 } 528