1 //===- GNUArchiveReader.cpp -----------------------------------------------===// 2 // 3 // The MCLinker Project 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 #include <mcld/LD/GNUArchiveReader.h> 10 11 #include <mcld/Module.h> 12 #include <mcld/InputTree.h> 13 #include <mcld/MC/Attribute.h> 14 #include <mcld/MC/MCLDInput.h> 15 #include <mcld/LD/ResolveInfo.h> 16 #include <mcld/LD/ELFObjectReader.h> 17 #include <mcld/Support/FileSystem.h> 18 #include <mcld/Support/FileHandle.h> 19 #include <mcld/Support/MemoryArea.h> 20 #include <mcld/Support/MemoryRegion.h> 21 #include <mcld/Support/MsgHandling.h> 22 #include <mcld/Support/Path.h> 23 #include <mcld/ADT/SizeTraits.h> 24 25 #include <llvm/ADT/StringRef.h> 26 #include <llvm/Support/Host.h> 27 28 #include <cstring> 29 #include <cstdlib> 30 31 using namespace mcld; 32 33 GNUArchiveReader::GNUArchiveReader(Module& pModule, 34 ELFObjectReader& pELFObjectReader) 35 : m_Module(pModule), 36 m_ELFObjectReader(pELFObjectReader) 37 { 38 } 39 40 GNUArchiveReader::~GNUArchiveReader() 41 { 42 } 43 44 /// isMyFormat 45 bool GNUArchiveReader::isMyFormat(Input& pInput) const 46 { 47 assert(pInput.hasMemArea()); 48 MemoryRegion* region = pInput.memArea()->request(pInput.fileOffset(), 49 Archive::MAGIC_LEN); 50 const char* str = reinterpret_cast<const char*>(region->getBuffer()); 51 52 bool result = false; 53 assert(NULL != str); 54 if (isArchive(str) || isThinArchive(str)) 55 result = true; 56 57 pInput.memArea()->release(region); 58 return result; 59 } 60 61 /// isArchive 62 bool GNUArchiveReader::isArchive(const char* pStr) const 63 { 64 return (0 == memcmp(pStr, Archive::MAGIC, Archive::MAGIC_LEN)); 65 } 66 67 /// isThinArchive 68 bool GNUArchiveReader::isThinArchive(const char* pStr) const 69 { 70 return (0 == memcmp(pStr, Archive::THIN_MAGIC, Archive::MAGIC_LEN)); 71 } 72 73 /// isThinArchive 74 bool GNUArchiveReader::isThinArchive(Input& pInput) const 75 { 76 assert(pInput.hasMemArea()); 77 MemoryRegion* region = pInput.memArea()->request(pInput.fileOffset(), 78 Archive::MAGIC_LEN); 79 const char* str = reinterpret_cast<const char*>(region->getBuffer()); 80 81 bool result = false; 82 assert(NULL != str); 83 if (isThinArchive(str)) 84 result = true; 85 86 pInput.memArea()->release(region); 87 return result; 88 } 89 90 bool GNUArchiveReader::readArchive(Archive& pArchive) 91 { 92 // bypass the empty archive 93 if (Archive::MAGIC_LEN == pArchive.getARFile().memArea()->handler()->size()) 94 return true; 95 96 if (pArchive.getARFile().attribute()->isWholeArchive()) 97 return includeAllMembers(pArchive); 98 99 // if this is the first time read this archive, setup symtab and strtab 100 if (pArchive.getSymbolTable().empty()) { 101 // read the symtab of the archive 102 readSymbolTable(pArchive); 103 104 // read the strtab of the archive 105 readStringTable(pArchive); 106 107 // add root archive to ArchiveMemberMap 108 pArchive.addArchiveMember(pArchive.getARFile().name(), 109 pArchive.inputs().root(), 110 &InputTree::Downward); 111 } 112 113 // include the needed members in the archive and build up the input tree 114 bool willSymResolved; 115 do { 116 willSymResolved = false; 117 for (size_t idx = 0; idx < pArchive.numOfSymbols(); ++idx) { 118 // bypass if we already decided to include this symbol or not 119 if (Archive::Symbol::Unknown != pArchive.getSymbolStatus(idx)) 120 continue; 121 122 // bypass if another symbol with the same object file offset is included 123 if (pArchive.hasObjectMember(pArchive.getObjFileOffset(idx))) { 124 pArchive.setSymbolStatus(idx, Archive::Symbol::Include); 125 continue; 126 } 127 128 // check if we should include this defined symbol 129 Archive::Symbol::Status status = 130 shouldIncludeSymbol(pArchive.getSymbolName(idx)); 131 if (Archive::Symbol::Unknown != status) 132 pArchive.setSymbolStatus(idx, status); 133 134 if (Archive::Symbol::Include == status) { 135 // include the object member from the given offset 136 includeMember(pArchive, pArchive.getObjFileOffset(idx)); 137 willSymResolved = true; 138 } // end of if 139 } // end of for 140 } while (willSymResolved); 141 142 return true; 143 } 144 145 /// readMemberHeader - read the header of a member in a archive file and then 146 /// return the corresponding archive member (it may be an input object or 147 /// another archive) 148 /// @param pArchiveRoot - the archive root that holds the strtab (extended 149 /// name table) 150 /// @param pArchiveFile - the archive that contains the needed object 151 /// @param pFileOffset - file offset of the member header in the archive 152 /// @param pNestedOffset - used when we find a nested archive 153 /// @param pMemberSize - the file size of this member 154 Input* GNUArchiveReader::readMemberHeader(Archive& pArchiveRoot, 155 Input& pArchiveFile, 156 uint32_t pFileOffset, 157 uint32_t& pNestedOffset, 158 size_t& pMemberSize) 159 { 160 assert(pArchiveFile.hasMemArea()); 161 162 MemoryRegion* header_region = 163 pArchiveFile.memArea()->request((pArchiveFile.fileOffset() + pFileOffset), 164 sizeof(Archive::MemberHeader)); 165 const Archive::MemberHeader* header = 166 reinterpret_cast<const Archive::MemberHeader*>(header_region->getBuffer()); 167 168 assert(0 == memcmp(header->fmag, Archive::MEMBER_MAGIC, sizeof(header->fmag))); 169 170 pMemberSize = atoi(header->size); 171 172 // parse the member name and nested offset if any 173 std::string member_name; 174 llvm::StringRef name_field(header->name, sizeof(header->name)); 175 if ('/' != header->name[0]) { 176 // this is an object file in an archive 177 size_t pos = name_field.find_first_of('/'); 178 member_name.assign(name_field.substr(0, pos).str()); 179 } 180 else { 181 // this is an object/archive file in a thin archive 182 size_t begin = 1; 183 size_t end = name_field.find_first_of(" :"); 184 uint32_t name_offset = 0; 185 // parse the name offset 186 name_field.substr(begin, end - begin).getAsInteger(10, name_offset); 187 188 if (':' == name_field[end]) { 189 // there is a nested offset 190 begin = end + 1; 191 end = name_field.find_first_of(' ', begin); 192 name_field.substr(begin, end - begin).getAsInteger(10, pNestedOffset); 193 } 194 195 // get the member name from the extended name table 196 assert(pArchiveRoot.hasStrTable()); 197 begin = name_offset; 198 end = pArchiveRoot.getStrTable().find_first_of('\n', begin); 199 member_name.assign(pArchiveRoot.getStrTable().substr(begin, end - begin -1)); 200 } 201 202 Input* member = NULL; 203 bool isThinAR = isThinArchive(pArchiveFile); 204 if (!isThinAR) { 205 // this is an object file in an archive 206 member = pArchiveRoot.getMemberFile(pArchiveFile, 207 isThinAR, 208 member_name, 209 pArchiveFile.path(), 210 (pFileOffset + 211 sizeof(Archive::MemberHeader))); 212 } 213 else { 214 // this is a member in a thin archive 215 // try to find if this is a archive already in the map first 216 Archive::ArchiveMember* ar_member = 217 pArchiveRoot.getArchiveMember(member_name); 218 if (NULL != ar_member) { 219 return ar_member->file; 220 } 221 222 // get nested file path, the nested file's member name is the relative 223 // path to the archive containing it. 224 sys::fs::Path input_path(pArchiveFile.path().parent_path()); 225 if (!input_path.empty()) 226 input_path.append(member_name); 227 else 228 input_path.assign(member_name); 229 230 member = pArchiveRoot.getMemberFile(pArchiveFile, 231 isThinAR, 232 member_name, 233 input_path); 234 } 235 236 pArchiveFile.memArea()->release(header_region); 237 return member; 238 } 239 240 /// readSymbolTable - read the archive symbol map (armap) 241 bool GNUArchiveReader::readSymbolTable(Archive& pArchive) 242 { 243 assert(pArchive.getARFile().hasMemArea()); 244 245 MemoryRegion* header_region = 246 pArchive.getARFile().memArea()->request((pArchive.getARFile().fileOffset() + 247 Archive::MAGIC_LEN), 248 sizeof(Archive::MemberHeader)); 249 const Archive::MemberHeader* header = 250 reinterpret_cast<const Archive::MemberHeader*>(header_region->getBuffer()); 251 assert(0 == memcmp(header->fmag, Archive::MEMBER_MAGIC, sizeof(header->fmag))); 252 253 int symtab_size = atoi(header->size); 254 pArchive.setSymTabSize(symtab_size); 255 256 if (!pArchive.getARFile().attribute()->isWholeArchive()) { 257 MemoryRegion* symtab_region = 258 pArchive.getARFile().memArea()->request( 259 (pArchive.getARFile().fileOffset() + 260 Archive::MAGIC_LEN + 261 sizeof(Archive::MemberHeader)), 262 symtab_size); 263 const uint32_t* data = 264 reinterpret_cast<const uint32_t*>(symtab_region->getBuffer()); 265 266 // read the number of symbols 267 uint32_t number = 0; 268 if (llvm::sys::IsLittleEndianHost) 269 number = mcld::bswap32(*data); 270 else 271 number = *data; 272 273 // set up the pointers for file offset and name offset 274 ++data; 275 const char* name = reinterpret_cast<const char*>(data + number); 276 277 // add the archive symbols 278 for (uint32_t i = 0; i < number; ++i) { 279 if (llvm::sys::IsLittleEndianHost) 280 pArchive.addSymbol(name, mcld::bswap32(*data)); 281 else 282 pArchive.addSymbol(name, *data); 283 name += strlen(name) + 1; 284 ++data; 285 } 286 pArchive.getARFile().memArea()->release(symtab_region); 287 } 288 pArchive.getARFile().memArea()->release(header_region); 289 return true; 290 } 291 292 /// readStringTable - read the strtab for long file name of the archive 293 bool GNUArchiveReader::readStringTable(Archive& pArchive) 294 { 295 size_t offset = Archive::MAGIC_LEN + 296 sizeof(Archive::MemberHeader) + 297 pArchive.getSymTabSize(); 298 299 if (0x0 != (offset & 1)) 300 ++offset; 301 302 assert(pArchive.getARFile().hasMemArea()); 303 304 MemoryRegion* header_region = 305 pArchive.getARFile().memArea()->request((pArchive.getARFile().fileOffset() + 306 offset), 307 sizeof(Archive::MemberHeader)); 308 const Archive::MemberHeader* header = 309 reinterpret_cast<const Archive::MemberHeader*>(header_region->getBuffer()); 310 311 assert(0 == memcmp(header->fmag, Archive::MEMBER_MAGIC, sizeof(header->fmag))); 312 313 if (0 == memcmp(header->name, Archive::STRTAB_NAME, sizeof(header->name))) { 314 // read the extended name table 315 int strtab_size = atoi(header->size); 316 MemoryRegion* strtab_region = 317 pArchive.getARFile().memArea()->request( 318 (pArchive.getARFile().fileOffset() + 319 offset + sizeof(Archive::MemberHeader)), 320 strtab_size); 321 const char* strtab = 322 reinterpret_cast<const char*>(strtab_region->getBuffer()); 323 pArchive.getStrTable().assign(strtab, strtab_size); 324 pArchive.getARFile().memArea()->release(strtab_region); 325 } 326 pArchive.getARFile().memArea()->release(header_region); 327 return true; 328 } 329 330 /// shouldIncludeStatus - given a sym name from armap and check if including 331 /// the corresponding archive member, and then return the decision 332 enum Archive::Symbol::Status 333 GNUArchiveReader::shouldIncludeSymbol(const llvm::StringRef& pSymName) const 334 { 335 // TODO: handle symbol version issue and user defined symbols 336 const ResolveInfo* info = m_Module.getNamePool().findInfo(pSymName); 337 if (NULL != info) { 338 if (!info->isUndef()) 339 return Archive::Symbol::Exclude; 340 if (info->isWeak()) 341 return Archive::Symbol::Unknown; 342 return Archive::Symbol::Include; 343 } 344 return Archive::Symbol::Unknown; 345 } 346 347 /// includeMember - include the object member in the given file offset, and 348 /// return the size of the object 349 /// @param pArchiveRoot - the archive root 350 /// @param pFileOffset - file offset of the member header in the archive 351 size_t GNUArchiveReader::includeMember(Archive& pArchive, uint32_t pFileOffset) 352 { 353 Input* cur_archive = &(pArchive.getARFile()); 354 Input* member = NULL; 355 uint32_t file_offset = pFileOffset; 356 size_t size = 0; 357 do { 358 uint32_t nested_offset = 0; 359 // use the file offset in current archive to find out the member we 360 // want to include 361 member = readMemberHeader(pArchive, 362 *cur_archive, 363 file_offset, 364 nested_offset, 365 size); 366 assert(member != NULL); 367 // bypass if we get an archive that is already in the map 368 if (Input::Archive == member->type()) { 369 cur_archive = member; 370 file_offset = nested_offset; 371 continue; 372 } 373 374 // insert a node into the subtree of current archive. 375 Archive::ArchiveMember* parent = 376 pArchive.getArchiveMember(cur_archive->name()); 377 378 assert(NULL != parent); 379 pArchive.inputs().insert(parent->lastPos, *(parent->move), *member); 380 381 // move the iterator to new created node, and also adjust the 382 // direction to Afterward for next insertion in this subtree 383 parent->move->move(parent->lastPos); 384 parent->move = &InputTree::Afterward; 385 386 if (m_ELFObjectReader.isMyFormat(*member)) { 387 member->setType(Input::Object); 388 pArchive.addObjectMember(pFileOffset, parent->lastPos); 389 m_ELFObjectReader.readHeader(*member); 390 m_ELFObjectReader.readSections(*member); 391 m_ELFObjectReader.readSymbols(*member); 392 m_Module.getObjectList().push_back(member); 393 } 394 else if (isMyFormat(*member)) { 395 member->setType(Input::Archive); 396 // when adding a new archive node, set the iterator to archive 397 // itself, and set the direction to Downward 398 pArchive.addArchiveMember(member->name(), 399 parent->lastPos, 400 &InputTree::Downward); 401 cur_archive = member; 402 file_offset = nested_offset; 403 } 404 } while (Input::Object != member->type()); 405 return size; 406 } 407 408 /// includeAllMembers - include all object members. This is called if 409 /// --whole-archive is the attribute for this archive file. 410 bool GNUArchiveReader::includeAllMembers(Archive& pArchive) 411 { 412 // read the symtab of the archive 413 readSymbolTable(pArchive); 414 415 // read the strtab of the archive 416 readStringTable(pArchive); 417 418 // add root archive to ArchiveMemberMap 419 pArchive.addArchiveMember(pArchive.getARFile().name(), 420 pArchive.inputs().root(), 421 &InputTree::Downward); 422 423 bool isThinAR = isThinArchive(pArchive.getARFile()); 424 uint32_t begin_offset = pArchive.getARFile().fileOffset() + 425 Archive::MAGIC_LEN + 426 sizeof(Archive::MemberHeader) + 427 pArchive.getSymTabSize(); 428 if (pArchive.hasStrTable()) { 429 if (0x0 != (begin_offset & 1)) 430 ++begin_offset; 431 begin_offset += sizeof(Archive::MemberHeader) + 432 pArchive.getStrTable().size(); 433 } 434 uint32_t end_offset = pArchive.getARFile().memArea()->handler()->size(); 435 for (uint32_t offset = begin_offset; 436 offset < end_offset; 437 offset += sizeof(Archive::MemberHeader)) { 438 439 size_t size = includeMember(pArchive, offset); 440 441 if (!isThinAR) { 442 offset += size; 443 } 444 445 if (0x0 != (offset & 1)) 446 ++offset; 447 } 448 return true; 449 } 450 451