1 //===-- ArchiveReader.cpp - Read LLVM archive files -------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Builds up standard unix archive files (.a) containing LLVM bitcode. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Bitcode/Archive.h" 15 #include "ArchiveInternals.h" 16 #include "llvm/ADT/OwningPtr.h" 17 #include "llvm/ADT/SmallPtrSet.h" 18 #include "llvm/Bitcode/ReaderWriter.h" 19 #include "llvm/IR/Module.h" 20 #include "llvm/Support/MemoryBuffer.h" 21 #include <cctype> 22 #include <cstdio> 23 #include <cstdlib> 24 using namespace llvm; 25 26 /// Read a variable-bit-rate encoded unsigned integer 27 static inline unsigned readInteger(const char*&At, const char*End) { 28 unsigned Shift = 0; 29 unsigned Result = 0; 30 31 do { 32 if (At == End) 33 return Result; 34 Result |= (unsigned)((*At++) & 0x7F) << Shift; 35 Shift += 7; 36 } while (At[-1] & 0x80); 37 return Result; 38 } 39 40 // Completely parse the Archive's symbol table and populate symTab member var. 41 bool 42 Archive::parseSymbolTable(const void* data, unsigned size, std::string* error) { 43 const char* At = (const char*) data; 44 const char* End = At + size; 45 while (At < End) { 46 unsigned offset = readInteger(At, End); 47 if (At == End) { 48 if (error) 49 *error = "Ran out of data reading vbr_uint for symtab offset!"; 50 return false; 51 } 52 unsigned length = readInteger(At, End); 53 if (At == End) { 54 if (error) 55 *error = "Ran out of data reading vbr_uint for symtab length!"; 56 return false; 57 } 58 if (At + length > End) { 59 if (error) 60 *error = "Malformed symbol table: length not consistent with size"; 61 return false; 62 } 63 // we don't care if it can't be inserted (duplicate entry) 64 symTab.insert(std::make_pair(std::string(At, length), offset)); 65 At += length; 66 } 67 symTabSize = size; 68 return true; 69 } 70 71 // This member parses an ArchiveMemberHeader that is presumed to be pointed to 72 // by At. The At pointer is updated to the byte just after the header, which 73 // can be variable in size. 74 ArchiveMember* 75 Archive::parseMemberHeader(const char*& At, const char* End, std::string* error) 76 { 77 if (At + sizeof(ArchiveMemberHeader) >= End) { 78 if (error) 79 *error = "Unexpected end of file"; 80 return 0; 81 } 82 83 // Cast archive member header 84 const ArchiveMemberHeader* Hdr = (const ArchiveMemberHeader*)At; 85 At += sizeof(ArchiveMemberHeader); 86 87 int flags = 0; 88 int MemberSize = atoi(Hdr->size); 89 assert(MemberSize >= 0); 90 91 // Check the size of the member for sanity 92 if (At + MemberSize > End) { 93 if (error) 94 *error = "invalid member length in archive file"; 95 return 0; 96 } 97 98 // Check the member signature 99 if (!Hdr->checkSignature()) { 100 if (error) 101 *error = "invalid file member signature"; 102 return 0; 103 } 104 105 // Convert and check the member name 106 // The empty name ( '/' and 15 blanks) is for a foreign (non-LLVM) symbol 107 // table. The special name "//" and 14 blanks is for a string table, used 108 // for long file names. This library doesn't generate either of those but 109 // it will accept them. If the name starts with #1/ and the remainder is 110 // digits, then those digits specify the length of the name that is 111 // stored immediately following the header. The special name 112 // __LLVM_SYM_TAB__ identifies the symbol table for LLVM bitcode. 113 // Anything else is a regular, short filename that is terminated with 114 // a '/' and blanks. 115 116 std::string pathname; 117 switch (Hdr->name[0]) { 118 case '#': 119 if (Hdr->name[1] == '1' && Hdr->name[2] == '/') { 120 if (isdigit(Hdr->name[3])) { 121 unsigned len = atoi(&Hdr->name[3]); 122 const char *nulp = (const char *)memchr(At, '\0', len); 123 pathname.assign(At, nulp != 0 ? (uintptr_t)(nulp - At) : len); 124 At += len; 125 MemberSize -= len; 126 flags |= ArchiveMember::HasLongFilenameFlag; 127 } else { 128 if (error) 129 *error = "invalid long filename"; 130 return 0; 131 } 132 } else if (Hdr->name[1] == '_' && 133 (0 == memcmp(Hdr->name, ARFILE_LLVM_SYMTAB_NAME, 16))) { 134 // The member is using a long file name (>15 chars) format. 135 // This format is standard for 4.4BSD and Mac OSX operating 136 // systems. LLVM uses it similarly. In this format, the 137 // remainder of the name field (after #1/) specifies the 138 // length of the file name which occupy the first bytes of 139 // the member's data. The pathname already has the #1/ stripped. 140 pathname.assign(ARFILE_LLVM_SYMTAB_NAME); 141 flags |= ArchiveMember::LLVMSymbolTableFlag; 142 } 143 break; 144 case '/': 145 if (Hdr->name[1]== '/') { 146 if (0 == memcmp(Hdr->name, ARFILE_STRTAB_NAME, 16)) { 147 pathname.assign(ARFILE_STRTAB_NAME); 148 flags |= ArchiveMember::StringTableFlag; 149 } else { 150 if (error) 151 *error = "invalid string table name"; 152 return 0; 153 } 154 } else if (Hdr->name[1] == ' ') { 155 if (0 == memcmp(Hdr->name, ARFILE_SVR4_SYMTAB_NAME, 16)) { 156 pathname.assign(ARFILE_SVR4_SYMTAB_NAME); 157 flags |= ArchiveMember::SVR4SymbolTableFlag; 158 } else { 159 if (error) 160 *error = "invalid SVR4 symbol table name"; 161 return 0; 162 } 163 } else if (isdigit(Hdr->name[1])) { 164 unsigned index = atoi(&Hdr->name[1]); 165 if (index < strtab.length()) { 166 const char* namep = strtab.c_str() + index; 167 const char* endp = strtab.c_str() + strtab.length(); 168 const char* p = namep; 169 const char* last_p = p; 170 while (p < endp) { 171 if (*p == '\n' && *last_p == '/') { 172 pathname.assign(namep, last_p - namep); 173 flags |= ArchiveMember::HasLongFilenameFlag; 174 break; 175 } 176 last_p = p; 177 p++; 178 } 179 if (p >= endp) { 180 if (error) 181 *error = "missing name terminator in string table"; 182 return 0; 183 } 184 } else { 185 if (error) 186 *error = "name index beyond string table"; 187 return 0; 188 } 189 } 190 break; 191 case '_': 192 if (Hdr->name[1] == '_' && 193 (0 == memcmp(Hdr->name, ARFILE_BSD4_SYMTAB_NAME, 16))) { 194 pathname.assign(ARFILE_BSD4_SYMTAB_NAME); 195 flags |= ArchiveMember::BSD4SymbolTableFlag; 196 break; 197 } 198 /* FALL THROUGH */ 199 200 default: 201 const char* slash = (const char*) memchr(Hdr->name, '/', 16); 202 if (slash == 0) 203 slash = Hdr->name + 16; 204 pathname.assign(Hdr->name, slash - Hdr->name); 205 break; 206 } 207 208 // Determine if this is a bitcode file 209 switch (sys::IdentifyFileType(At, 4)) { 210 case sys::Bitcode_FileType: 211 flags |= ArchiveMember::BitcodeFlag; 212 break; 213 default: 214 flags &= ~ArchiveMember::BitcodeFlag; 215 break; 216 } 217 218 // Instantiate the ArchiveMember to be filled 219 ArchiveMember* member = new ArchiveMember(this); 220 221 // Fill in fields of the ArchiveMember 222 member->parent = this; 223 member->path.set(pathname); 224 member->info.fileSize = MemberSize; 225 member->info.modTime.fromEpochTime(atoi(Hdr->date)); 226 unsigned int mode; 227 sscanf(Hdr->mode, "%o", &mode); 228 member->info.mode = mode; 229 member->info.user = atoi(Hdr->uid); 230 member->info.group = atoi(Hdr->gid); 231 member->flags = flags; 232 member->data = At; 233 234 return member; 235 } 236 237 bool 238 Archive::checkSignature(std::string* error) { 239 // Check the magic string at file's header 240 if (mapfile->getBufferSize() < 8 || memcmp(base, ARFILE_MAGIC, 8)) { 241 if (error) 242 *error = "invalid signature for an archive file"; 243 return false; 244 } 245 return true; 246 } 247 248 // This function loads the entire archive and fully populates its ilist with 249 // the members of the archive file. This is typically used in preparation for 250 // editing the contents of the archive. 251 bool 252 Archive::loadArchive(std::string* error) { 253 254 // Set up parsing 255 members.clear(); 256 symTab.clear(); 257 const char *At = base; 258 const char *End = mapfile->getBufferEnd(); 259 260 if (!checkSignature(error)) 261 return false; 262 263 At += 8; // Skip the magic string. 264 265 bool seenSymbolTable = false; 266 bool foundFirstFile = false; 267 while (At < End) { 268 // parse the member header 269 const char* Save = At; 270 ArchiveMember* mbr = parseMemberHeader(At, End, error); 271 if (!mbr) 272 return false; 273 274 // check if this is the foreign symbol table 275 if (mbr->isSVR4SymbolTable() || mbr->isBSD4SymbolTable()) { 276 // We just save this but don't do anything special 277 // with it. It doesn't count as the "first file". 278 if (foreignST) { 279 // What? Multiple foreign symbol tables? Just chuck it 280 // and retain the last one found. 281 delete foreignST; 282 } 283 foreignST = mbr; 284 At += mbr->getSize(); 285 if ((intptr_t(At) & 1) == 1) 286 At++; 287 } else if (mbr->isStringTable()) { 288 // Simply suck the entire string table into a string 289 // variable. This will be used to get the names of the 290 // members that use the "/ddd" format for their names 291 // (SVR4 style long names). 292 strtab.assign(At, mbr->getSize()); 293 At += mbr->getSize(); 294 if ((intptr_t(At) & 1) == 1) 295 At++; 296 delete mbr; 297 } else if (mbr->isLLVMSymbolTable()) { 298 // This is the LLVM symbol table for the archive. If we've seen it 299 // already, its an error. Otherwise, parse the symbol table and move on. 300 if (seenSymbolTable) { 301 if (error) 302 *error = "invalid archive: multiple symbol tables"; 303 return false; 304 } 305 if (!parseSymbolTable(mbr->getData(), mbr->getSize(), error)) 306 return false; 307 seenSymbolTable = true; 308 At += mbr->getSize(); 309 if ((intptr_t(At) & 1) == 1) 310 At++; 311 delete mbr; // We don't need this member in the list of members. 312 } else { 313 // This is just a regular file. If its the first one, save its offset. 314 // Otherwise just push it on the list and move on to the next file. 315 if (!foundFirstFile) { 316 firstFileOffset = Save - base; 317 foundFirstFile = true; 318 } 319 members.push_back(mbr); 320 At += mbr->getSize(); 321 if ((intptr_t(At) & 1) == 1) 322 At++; 323 } 324 } 325 return true; 326 } 327 328 // Open and completely load the archive file. 329 Archive* 330 Archive::OpenAndLoad(const sys::Path& File, LLVMContext& C, 331 std::string* ErrorMessage) { 332 OwningPtr<Archive> result ( new Archive(File, C)); 333 if (result->mapToMemory(ErrorMessage)) 334 return NULL; 335 if (!result->loadArchive(ErrorMessage)) 336 return NULL; 337 return result.take(); 338 } 339 340 // Get all the bitcode modules from the archive 341 bool 342 Archive::getAllModules(std::vector<Module*>& Modules, 343 std::string* ErrMessage) { 344 345 for (iterator I=begin(), E=end(); I != E; ++I) { 346 if (I->isBitcode()) { 347 std::string FullMemberName = archPath.str() + 348 "(" + I->getPath().str() + ")"; 349 MemoryBuffer *Buffer = 350 MemoryBuffer::getMemBufferCopy(StringRef(I->getData(), I->getSize()), 351 FullMemberName.c_str()); 352 353 Module *M = ParseBitcodeFile(Buffer, Context, ErrMessage); 354 delete Buffer; 355 if (!M) 356 return true; 357 358 Modules.push_back(M); 359 } 360 } 361 return false; 362 } 363 364 // Load just the symbol table from the archive file 365 bool 366 Archive::loadSymbolTable(std::string* ErrorMsg) { 367 368 // Set up parsing 369 members.clear(); 370 symTab.clear(); 371 const char *At = base; 372 const char *End = mapfile->getBufferEnd(); 373 374 // Make sure we're dealing with an archive 375 if (!checkSignature(ErrorMsg)) 376 return false; 377 378 At += 8; // Skip signature 379 380 // Parse the first file member header 381 const char* FirstFile = At; 382 ArchiveMember* mbr = parseMemberHeader(At, End, ErrorMsg); 383 if (!mbr) 384 return false; 385 386 if (mbr->isSVR4SymbolTable() || mbr->isBSD4SymbolTable()) { 387 // Skip the foreign symbol table, we don't do anything with it 388 At += mbr->getSize(); 389 if ((intptr_t(At) & 1) == 1) 390 At++; 391 delete mbr; 392 393 // Read the next one 394 FirstFile = At; 395 mbr = parseMemberHeader(At, End, ErrorMsg); 396 if (!mbr) { 397 delete mbr; 398 return false; 399 } 400 } 401 402 if (mbr->isStringTable()) { 403 // Process the string table entry 404 strtab.assign((const char*)mbr->getData(), mbr->getSize()); 405 At += mbr->getSize(); 406 if ((intptr_t(At) & 1) == 1) 407 At++; 408 delete mbr; 409 // Get the next one 410 FirstFile = At; 411 mbr = parseMemberHeader(At, End, ErrorMsg); 412 if (!mbr) { 413 delete mbr; 414 return false; 415 } 416 } 417 418 // See if its the symbol table 419 if (mbr->isLLVMSymbolTable()) { 420 if (!parseSymbolTable(mbr->getData(), mbr->getSize(), ErrorMsg)) { 421 delete mbr; 422 return false; 423 } 424 425 At += mbr->getSize(); 426 if ((intptr_t(At) & 1) == 1) 427 At++; 428 delete mbr; 429 // Can't be any more symtab headers so just advance 430 FirstFile = At; 431 } else { 432 // There's no symbol table in the file. We have to rebuild it from scratch 433 // because the intent of this method is to get the symbol table loaded so 434 // it can be searched efficiently. 435 // Add the member to the members list 436 members.push_back(mbr); 437 } 438 439 firstFileOffset = FirstFile - base; 440 return true; 441 } 442 443 // Open the archive and load just the symbol tables 444 Archive* Archive::OpenAndLoadSymbols(const sys::Path& File, 445 LLVMContext& C, 446 std::string* ErrorMessage) { 447 OwningPtr<Archive> result ( new Archive(File, C) ); 448 if (result->mapToMemory(ErrorMessage)) 449 return NULL; 450 if (!result->loadSymbolTable(ErrorMessage)) 451 return NULL; 452 return result.take(); 453 } 454 455 // Look up one symbol in the symbol table and return the module that defines 456 // that symbol. 457 Module* 458 Archive::findModuleDefiningSymbol(const std::string& symbol, 459 std::string* ErrMsg) { 460 SymTabType::iterator SI = symTab.find(symbol); 461 if (SI == symTab.end()) 462 return 0; 463 464 // The symbol table was previously constructed assuming that the members were 465 // written without the symbol table header. Because VBR encoding is used, the 466 // values could not be adjusted to account for the offset of the symbol table 467 // because that could affect the size of the symbol table due to VBR encoding. 468 // We now have to account for this by adjusting the offset by the size of the 469 // symbol table and its header. 470 unsigned fileOffset = 471 SI->second + // offset in symbol-table-less file 472 firstFileOffset; // add offset to first "real" file in archive 473 474 // See if the module is already loaded 475 ModuleMap::iterator MI = modules.find(fileOffset); 476 if (MI != modules.end()) 477 return MI->second.first; 478 479 // Module hasn't been loaded yet, we need to load it 480 const char* modptr = base + fileOffset; 481 ArchiveMember* mbr = parseMemberHeader(modptr, mapfile->getBufferEnd(), 482 ErrMsg); 483 if (!mbr) 484 return 0; 485 486 // Now, load the bitcode module to get the Module. 487 std::string FullMemberName = archPath.str() + "(" + 488 mbr->getPath().str() + ")"; 489 MemoryBuffer *Buffer = 490 MemoryBuffer::getMemBufferCopy(StringRef(mbr->getData(), mbr->getSize()), 491 FullMemberName.c_str()); 492 493 Module *m = getLazyBitcodeModule(Buffer, Context, ErrMsg); 494 if (!m) 495 return 0; 496 497 modules.insert(std::make_pair(fileOffset, std::make_pair(m, mbr))); 498 499 return m; 500 } 501 502 // Look up multiple symbols in the symbol table and return a set of 503 // Modules that define those symbols. 504 bool 505 Archive::findModulesDefiningSymbols(std::set<std::string>& symbols, 506 SmallVectorImpl<Module*>& result, 507 std::string* error) { 508 if (!mapfile || !base) { 509 if (error) 510 *error = "Empty archive invalid for finding modules defining symbols"; 511 return false; 512 } 513 514 if (symTab.empty()) { 515 // We don't have a symbol table, so we must build it now but lets also 516 // make sure that we populate the modules table as we do this to ensure 517 // that we don't load them twice when findModuleDefiningSymbol is called 518 // below. 519 520 // Get a pointer to the first file 521 const char* At = base + firstFileOffset; 522 const char* End = mapfile->getBufferEnd(); 523 524 while ( At < End) { 525 // Compute the offset to be put in the symbol table 526 unsigned offset = At - base - firstFileOffset; 527 528 // Parse the file's header 529 ArchiveMember* mbr = parseMemberHeader(At, End, error); 530 if (!mbr) 531 return false; 532 533 // If it contains symbols 534 if (mbr->isBitcode()) { 535 // Get the symbols 536 std::vector<std::string> symbols; 537 std::string FullMemberName = archPath.str() + "(" + 538 mbr->getPath().str() + ")"; 539 Module* M = 540 GetBitcodeSymbols(At, mbr->getSize(), FullMemberName, Context, 541 symbols, error); 542 543 if (M) { 544 // Insert the module's symbols into the symbol table 545 for (std::vector<std::string>::iterator I = symbols.begin(), 546 E=symbols.end(); I != E; ++I ) { 547 symTab.insert(std::make_pair(*I, offset)); 548 } 549 // Insert the Module and the ArchiveMember into the table of 550 // modules. 551 modules.insert(std::make_pair(offset, std::make_pair(M, mbr))); 552 } else { 553 if (error) 554 *error = "Can't parse bitcode member: " + 555 mbr->getPath().str() + ": " + *error; 556 delete mbr; 557 return false; 558 } 559 } 560 561 // Go to the next file location 562 At += mbr->getSize(); 563 if ((intptr_t(At) & 1) == 1) 564 At++; 565 } 566 } 567 568 // At this point we have a valid symbol table (one way or another) so we 569 // just use it to quickly find the symbols requested. 570 571 SmallPtrSet<Module*, 16> Added; 572 for (std::set<std::string>::iterator I=symbols.begin(), 573 Next = I, 574 E=symbols.end(); I != E; I = Next) { 575 // Increment Next before we invalidate it. 576 ++Next; 577 578 // See if this symbol exists 579 Module* m = findModuleDefiningSymbol(*I,error); 580 if (!m) 581 continue; 582 bool NewMember = Added.insert(m); 583 if (!NewMember) 584 continue; 585 586 // The symbol exists, insert the Module into our result. 587 result.push_back(m); 588 589 // Remove the symbol now that its been resolved. 590 symbols.erase(I); 591 } 592 return true; 593 } 594 595 bool Archive::isBitcodeArchive() { 596 // Make sure the symTab has been loaded. In most cases this should have been 597 // done when the archive was constructed, but still, this is just in case. 598 if (symTab.empty()) 599 if (!loadSymbolTable(0)) 600 return false; 601 602 // Now that we know it's been loaded, return true 603 // if it has a size 604 if (symTab.size()) return true; 605 606 // We still can't be sure it isn't a bitcode archive 607 if (!loadArchive(0)) 608 return false; 609 610 std::vector<Module *> Modules; 611 std::string ErrorMessage; 612 613 // Scan the archive, trying to load a bitcode member. We only load one to 614 // see if this works. 615 for (iterator I = begin(), E = end(); I != E; ++I) { 616 if (!I->isBitcode()) 617 continue; 618 619 std::string FullMemberName = 620 archPath.str() + "(" + I->getPath().str() + ")"; 621 622 MemoryBuffer *Buffer = 623 MemoryBuffer::getMemBufferCopy(StringRef(I->getData(), I->getSize()), 624 FullMemberName.c_str()); 625 Module *M = ParseBitcodeFile(Buffer, Context); 626 delete Buffer; 627 if (!M) 628 return false; // Couldn't parse bitcode, not a bitcode archive. 629 delete M; 630 return true; 631 } 632 633 return false; 634 } 635