1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "read_elf.h" 18 #include "read_apk.h" 19 20 #include <stdio.h> 21 #include <string.h> 22 #include <sys/stat.h> 23 #include <sys/types.h> 24 25 #include <algorithm> 26 #include <limits> 27 28 #include <android-base/file.h> 29 #include <android-base/logging.h> 30 31 #pragma clang diagnostic push 32 #pragma clang diagnostic ignored "-Wunused-parameter" 33 34 #include <llvm/ADT/StringRef.h> 35 #include <llvm/Object/Binary.h> 36 #include <llvm/Object/ELFObjectFile.h> 37 #include <llvm/Object/ObjectFile.h> 38 39 #pragma clang diagnostic pop 40 41 #include "utils.h" 42 43 #define ELF_NOTE_GNU "GNU" 44 #define NT_GNU_BUILD_ID 3 45 46 std::ostream& operator<<(std::ostream& os, const ElfStatus& status) { 47 switch (status) { 48 case ElfStatus::NO_ERROR: 49 os << "No error"; 50 break; 51 case ElfStatus::FILE_NOT_FOUND: 52 os << "File not found"; 53 break; 54 case ElfStatus::READ_FAILED: 55 os << "Read failed"; 56 break; 57 case ElfStatus::FILE_MALFORMED: 58 os << "Malformed file"; 59 break; 60 case ElfStatus::NO_SYMBOL_TABLE: 61 os << "No symbol table"; 62 break; 63 case ElfStatus::NO_BUILD_ID: 64 os << "No build id"; 65 break; 66 case ElfStatus::BUILD_ID_MISMATCH: 67 os << "Build id mismatch"; 68 break; 69 case ElfStatus::SECTION_NOT_FOUND: 70 os << "Section not found"; 71 break; 72 } 73 return os; 74 } 75 76 ElfStatus IsValidElfFile(int fd) { 77 static const char elf_magic[] = {0x7f, 'E', 'L', 'F'}; 78 char buf[4]; 79 if (!android::base::ReadFully(fd, buf, 4)) { 80 return ElfStatus::READ_FAILED; 81 } 82 if (memcmp(buf, elf_magic, 4) != 0) { 83 return ElfStatus::FILE_MALFORMED; 84 } 85 return ElfStatus::NO_ERROR; 86 } 87 88 ElfStatus IsValidElfPath(const std::string& filename) { 89 if (!IsRegularFile(filename)) { 90 return ElfStatus::FILE_NOT_FOUND; 91 } 92 std::string mode = std::string("rb") + CLOSE_ON_EXEC_MODE; 93 FILE* fp = fopen(filename.c_str(), mode.c_str()); 94 if (fp == nullptr) { 95 return ElfStatus::READ_FAILED; 96 } 97 ElfStatus result = IsValidElfFile(fileno(fp)); 98 fclose(fp); 99 return result; 100 } 101 102 bool GetBuildIdFromNoteSection(const char* section, size_t section_size, BuildId* build_id) { 103 const char* p = section; 104 const char* end = p + section_size; 105 while (p < end) { 106 if (p + 12 >= end) { 107 return false; 108 } 109 uint32_t namesz; 110 uint32_t descsz; 111 uint32_t type; 112 MoveFromBinaryFormat(namesz, p); 113 MoveFromBinaryFormat(descsz, p); 114 MoveFromBinaryFormat(type, p); 115 namesz = Align(namesz, 4); 116 descsz = Align(descsz, 4); 117 if ((type == NT_GNU_BUILD_ID) && (p < end) && (strcmp(p, ELF_NOTE_GNU) == 0)) { 118 const char* desc_start = p + namesz; 119 const char* desc_end = desc_start + descsz; 120 if (desc_start > p && desc_start < desc_end && desc_end <= end) { 121 *build_id = BuildId(p + namesz, descsz); 122 return true; 123 } else { 124 return false; 125 } 126 } 127 p += namesz + descsz; 128 } 129 return false; 130 } 131 132 ElfStatus GetBuildIdFromNoteFile(const std::string& filename, BuildId* build_id) { 133 std::string content; 134 if (!android::base::ReadFileToString(filename, &content)) { 135 return ElfStatus::READ_FAILED; 136 } 137 if (!GetBuildIdFromNoteSection(content.c_str(), content.size(), build_id)) { 138 return ElfStatus::NO_BUILD_ID; 139 } 140 return ElfStatus::NO_ERROR; 141 } 142 143 template <class ELFT> 144 ElfStatus GetBuildIdFromELFFile(const llvm::object::ELFObjectFile<ELFT>* elf, BuildId* build_id) { 145 for (auto it = elf->section_begin(); it != elf->section_end(); ++it) { 146 const llvm::object::ELFSectionRef& section_ref = *it; 147 if (section_ref.getType() == llvm::ELF::SHT_NOTE) { 148 llvm::StringRef data; 149 if (it->getContents(data)) { 150 return ElfStatus::READ_FAILED; 151 } 152 if (GetBuildIdFromNoteSection(data.data(), data.size(), build_id)) { 153 return ElfStatus::NO_ERROR; 154 } 155 } 156 } 157 return ElfStatus::NO_BUILD_ID; 158 } 159 160 static ElfStatus GetBuildIdFromObjectFile(llvm::object::ObjectFile* obj, BuildId* build_id) { 161 if (auto elf = llvm::dyn_cast<llvm::object::ELF32LEObjectFile>(obj)) { 162 return GetBuildIdFromELFFile(elf, build_id); 163 } else if (auto elf = llvm::dyn_cast<llvm::object::ELF64LEObjectFile>(obj)) { 164 return GetBuildIdFromELFFile(elf, build_id); 165 } 166 return ElfStatus::FILE_MALFORMED; 167 } 168 169 struct BinaryWrapper { 170 llvm::object::OwningBinary<llvm::object::Binary> binary; 171 llvm::object::ObjectFile* obj; 172 173 BinaryWrapper() : obj(nullptr) { 174 } 175 }; 176 177 static ElfStatus OpenObjectFile(const std::string& filename, uint64_t file_offset, 178 uint64_t file_size, BinaryWrapper* wrapper) { 179 FileHelper fhelper = FileHelper::OpenReadOnly(filename); 180 if (!fhelper) { 181 return ElfStatus::READ_FAILED; 182 } 183 if (file_size == 0) { 184 file_size = GetFileSize(filename); 185 if (file_size == 0) { 186 return ElfStatus::READ_FAILED; 187 } 188 } 189 auto buffer_or_err = llvm::MemoryBuffer::getOpenFileSlice(fhelper.fd(), filename, file_size, file_offset); 190 if (!buffer_or_err) { 191 return ElfStatus::READ_FAILED; 192 } 193 auto binary_or_err = llvm::object::createBinary(buffer_or_err.get()->getMemBufferRef()); 194 if (!binary_or_err) { 195 return ElfStatus::READ_FAILED; 196 } 197 wrapper->binary = llvm::object::OwningBinary<llvm::object::Binary>(std::move(binary_or_err.get()), 198 std::move(buffer_or_err.get())); 199 wrapper->obj = llvm::dyn_cast<llvm::object::ObjectFile>(wrapper->binary.getBinary()); 200 if (wrapper->obj == nullptr) { 201 return ElfStatus::FILE_MALFORMED; 202 } 203 return ElfStatus::NO_ERROR; 204 } 205 206 static ElfStatus OpenObjectFileFromString(const std::string& s, BinaryWrapper* wrapper) { 207 auto buffer = llvm::MemoryBuffer::getMemBuffer(s); 208 auto binary_or_err = llvm::object::createBinary(buffer->getMemBufferRef()); 209 if (!binary_or_err) { 210 return ElfStatus::FILE_MALFORMED; 211 } 212 wrapper->binary = llvm::object::OwningBinary<llvm::object::Binary>(std::move(binary_or_err.get()), 213 std::move(buffer)); 214 wrapper->obj = llvm::dyn_cast<llvm::object::ObjectFile>(wrapper->binary.getBinary()); 215 if (wrapper->obj == nullptr) { 216 return ElfStatus::FILE_MALFORMED; 217 } 218 return ElfStatus::NO_ERROR; 219 } 220 221 ElfStatus GetBuildIdFromElfFile(const std::string& filename, BuildId* build_id) { 222 ElfStatus result = IsValidElfPath(filename); 223 if (result != ElfStatus::NO_ERROR) { 224 return result; 225 } 226 return GetBuildIdFromEmbeddedElfFile(filename, 0, 0, build_id); 227 } 228 229 ElfStatus GetBuildIdFromEmbeddedElfFile(const std::string& filename, uint64_t file_offset, 230 uint32_t file_size, BuildId* build_id) { 231 BinaryWrapper wrapper; 232 ElfStatus result = OpenObjectFile(filename, file_offset, file_size, &wrapper); 233 if (result != ElfStatus::NO_ERROR) { 234 return result; 235 } 236 return GetBuildIdFromObjectFile(wrapper.obj, build_id); 237 } 238 239 template <class ELFT> 240 ElfStatus ReadSectionFromELFFile(const llvm::object::ELFObjectFile<ELFT>* elf, const std::string& section_name, 241 std::string* content) { 242 for (llvm::object::section_iterator it = elf->section_begin(); it != elf->section_end(); ++it) { 243 llvm::StringRef name; 244 if (it->getName(name) || name != section_name) { 245 continue; 246 } 247 llvm::StringRef data; 248 std::error_code err = it->getContents(data); 249 if (err) { 250 return ElfStatus::READ_FAILED; 251 } 252 *content = data; 253 return ElfStatus::NO_ERROR; 254 } 255 return ElfStatus::SECTION_NOT_FOUND; 256 } 257 258 bool IsArmMappingSymbol(const char* name) { 259 // Mapping symbols in arm, which are described in "ELF for ARM Architecture" and 260 // "ELF for ARM 64-bit Architecture". The regular expression to match mapping symbol 261 // is ^\$(a|d|t|x)(\..*)?$ 262 return name[0] == '$' && strchr("adtx", name[1]) != nullptr && (name[2] == '\0' || name[2] == '.'); 263 } 264 265 void ReadSymbolTable(llvm::object::symbol_iterator sym_begin, 266 llvm::object::symbol_iterator sym_end, 267 const std::function<void(const ElfFileSymbol&)>& callback, 268 bool is_arm) { 269 for (; sym_begin != sym_end; ++sym_begin) { 270 ElfFileSymbol symbol; 271 auto symbol_ref = static_cast<const llvm::object::ELFSymbolRef*>(&*sym_begin); 272 llvm::Expected<llvm::object::section_iterator> section_it_or_err = symbol_ref->getSection(); 273 if (!section_it_or_err) { 274 continue; 275 } 276 277 llvm::StringRef section_name; 278 if (section_it_or_err.get()->getName(section_name) || section_name.empty()) { 279 continue; 280 } 281 if (section_name == ".text") { 282 symbol.is_in_text_section = true; 283 } 284 llvm::Expected<llvm::StringRef> symbol_name_or_err = symbol_ref->getName(); 285 if (!symbol_name_or_err || symbol_name_or_err.get().empty()) { 286 continue; 287 } 288 289 symbol.name = symbol_name_or_err.get(); 290 symbol.vaddr = symbol_ref->getValue(); 291 if ((symbol.vaddr & 1) != 0 && is_arm) { 292 // Arm sets bit 0 to mark it as thumb code, remove the flag. 293 symbol.vaddr &= ~1; 294 } 295 symbol.len = symbol_ref->getSize(); 296 llvm::object::SymbolRef::Type symbol_type = *symbol_ref->getType(); 297 if (symbol_type == llvm::object::SymbolRef::ST_Function) { 298 symbol.is_func = true; 299 } else if (symbol_type == llvm::object::SymbolRef::ST_Unknown) { 300 if (symbol.is_in_text_section) { 301 symbol.is_label = true; 302 if (is_arm) { 303 // Remove mapping symbols in arm. 304 const char* p = (symbol.name.compare(0, linker_prefix.size(), linker_prefix) == 0) 305 ? symbol.name.c_str() + linker_prefix.size() 306 : symbol.name.c_str(); 307 if (IsArmMappingSymbol(p)) { 308 symbol.is_label = false; 309 } 310 } 311 } 312 } 313 314 callback(symbol); 315 } 316 } 317 318 template <class ELFT> 319 void AddSymbolForPltSection(const llvm::object::ELFObjectFile<ELFT>* elf, 320 const std::function<void(const ElfFileSymbol&)>& callback) { 321 // We may sample instructions in .plt section if the program 322 // calls functions from shared libraries. Different architectures use 323 // different formats to store .plt section, so it needs a lot of work to match 324 // instructions in .plt section to symbols. As samples in .plt section rarely 325 // happen, and .plt section can hardly be a performance bottleneck, we can 326 // just use a symbol @plt to represent instructions in .plt section. 327 for (auto it = elf->section_begin(); it != elf->section_end(); ++it) { 328 const llvm::object::ELFSectionRef& section_ref = *it; 329 llvm::StringRef section_name; 330 std::error_code err = section_ref.getName(section_name); 331 if (err || section_name != ".plt") { 332 continue; 333 } 334 const auto* shdr = elf->getSection(section_ref.getRawDataRefImpl()); 335 if (shdr == nullptr) { 336 return; 337 } 338 ElfFileSymbol symbol; 339 symbol.vaddr = shdr->sh_addr; 340 symbol.len = shdr->sh_size; 341 symbol.is_func = true; 342 symbol.is_label = true; 343 symbol.is_in_text_section = true; 344 symbol.name = "@plt"; 345 callback(symbol); 346 return; 347 } 348 } 349 350 template <class ELFT> 351 void CheckSymbolSections(const llvm::object::ELFObjectFile<ELFT>* elf, 352 bool* has_symtab, bool* has_dynsym) { 353 *has_symtab = false; 354 *has_dynsym = false; 355 for (auto it = elf->section_begin(); it != elf->section_end(); ++it) { 356 const llvm::object::ELFSectionRef& section_ref = *it; 357 llvm::StringRef section_name; 358 std::error_code err = section_ref.getName(section_name); 359 if (err) { 360 continue; 361 } 362 if (section_name == ".dynsym") { 363 *has_dynsym = true; 364 } else if (section_name == ".symtab") { 365 *has_symtab = true; 366 } 367 } 368 } 369 370 template <class ELFT> 371 ElfStatus ParseSymbolsFromELFFile(const llvm::object::ELFObjectFile<ELFT>* elf, 372 const std::function<void(const ElfFileSymbol&)>& callback) { 373 auto machine = elf->getELFFile()->getHeader()->e_machine; 374 bool is_arm = (machine == llvm::ELF::EM_ARM || machine == llvm::ELF::EM_AARCH64); 375 AddSymbolForPltSection(elf, callback); 376 // Some applications deliberately ship elf files with broken section tables. 377 // So check the existence of .symtab section and .dynsym section before reading symbols. 378 bool has_symtab; 379 bool has_dynsym; 380 CheckSymbolSections(elf, &has_symtab, &has_dynsym); 381 if (has_symtab && elf->symbol_begin() != elf->symbol_end()) { 382 ReadSymbolTable(elf->symbol_begin(), elf->symbol_end(), callback, is_arm); 383 return ElfStatus::NO_ERROR; 384 } else if (has_dynsym && 385 elf->dynamic_symbol_begin()->getRawDataRefImpl() != llvm::object::DataRefImpl()) { 386 ReadSymbolTable(elf->dynamic_symbol_begin(), elf->dynamic_symbol_end(), callback, is_arm); 387 } 388 std::string debugdata; 389 ElfStatus result = ReadSectionFromELFFile(elf, ".gnu_debugdata", &debugdata); 390 if (result == ElfStatus::SECTION_NOT_FOUND) { 391 return ElfStatus::NO_SYMBOL_TABLE; 392 } else if (result == ElfStatus::NO_ERROR) { 393 std::string decompressed_data; 394 if (XzDecompress(debugdata, &decompressed_data)) { 395 BinaryWrapper wrapper; 396 result = OpenObjectFileFromString(decompressed_data, &wrapper); 397 if (result == ElfStatus::NO_ERROR) { 398 if (auto elf = llvm::dyn_cast<llvm::object::ELF32LEObjectFile>(wrapper.obj)) { 399 return ParseSymbolsFromELFFile(elf, callback); 400 } else if (auto elf = llvm::dyn_cast<llvm::object::ELF64LEObjectFile>(wrapper.obj)) { 401 return ParseSymbolsFromELFFile(elf, callback); 402 } else { 403 return ElfStatus::FILE_MALFORMED; 404 } 405 } 406 } 407 } 408 return result; 409 } 410 411 ElfStatus MatchBuildId(llvm::object::ObjectFile* obj, const BuildId& expected_build_id) { 412 if (expected_build_id.IsEmpty()) { 413 return ElfStatus::NO_ERROR; 414 } 415 BuildId real_build_id; 416 ElfStatus result = GetBuildIdFromObjectFile(obj, &real_build_id); 417 if (result != ElfStatus::NO_ERROR) { 418 return result; 419 } 420 if (expected_build_id != real_build_id) { 421 return ElfStatus::BUILD_ID_MISMATCH; 422 } 423 return ElfStatus::NO_ERROR; 424 } 425 426 ElfStatus ParseSymbolsFromElfFile(const std::string& filename, 427 const BuildId& expected_build_id, 428 const std::function<void(const ElfFileSymbol&)>& callback) { 429 ElfStatus result = IsValidElfPath(filename); 430 if (result != ElfStatus::NO_ERROR) { 431 return result; 432 } 433 return ParseSymbolsFromEmbeddedElfFile(filename, 0, 0, expected_build_id, callback); 434 } 435 436 ElfStatus ParseSymbolsFromEmbeddedElfFile(const std::string& filename, uint64_t file_offset, 437 uint32_t file_size, const BuildId& expected_build_id, 438 const std::function<void(const ElfFileSymbol&)>& callback) { 439 BinaryWrapper wrapper; 440 ElfStatus result = OpenObjectFile(filename, file_offset, file_size, &wrapper); 441 if (result != ElfStatus::NO_ERROR) { 442 return result; 443 } 444 result = MatchBuildId(wrapper.obj, expected_build_id); 445 if (result != ElfStatus::NO_ERROR) { 446 return result; 447 } 448 if (auto elf = llvm::dyn_cast<llvm::object::ELF32LEObjectFile>(wrapper.obj)) { 449 return ParseSymbolsFromELFFile(elf, callback); 450 } else if (auto elf = llvm::dyn_cast<llvm::object::ELF64LEObjectFile>(wrapper.obj)) { 451 return ParseSymbolsFromELFFile(elf, callback); 452 } 453 return ElfStatus::FILE_MALFORMED; 454 } 455 456 template <class ELFT> 457 ElfStatus ReadMinExecutableVirtualAddress(const llvm::object::ELFFile<ELFT>* elf, uint64_t* p_vaddr) { 458 bool has_vaddr = false; 459 uint64_t min_addr = std::numeric_limits<uint64_t>::max(); 460 for (auto it = elf->program_header_begin(); it != elf->program_header_end(); ++it) { 461 if ((it->p_type == llvm::ELF::PT_LOAD) && (it->p_flags & llvm::ELF::PF_X)) { 462 if (it->p_vaddr < min_addr) { 463 min_addr = it->p_vaddr; 464 has_vaddr = true; 465 } 466 } 467 } 468 if (!has_vaddr) { 469 return ElfStatus::FILE_MALFORMED; 470 } 471 *p_vaddr = min_addr; 472 return ElfStatus::NO_ERROR; 473 } 474 475 ElfStatus ReadMinExecutableVirtualAddressFromElfFile(const std::string& filename, 476 const BuildId& expected_build_id, 477 uint64_t* min_vaddr) { 478 ElfStatus result = IsValidElfPath(filename); 479 if (result != ElfStatus::NO_ERROR) { 480 return result; 481 } 482 BinaryWrapper wrapper; 483 result = OpenObjectFile(filename, 0, 0, &wrapper); 484 if (result != ElfStatus::NO_ERROR) { 485 return result; 486 } 487 result = MatchBuildId(wrapper.obj, expected_build_id); 488 if (result != ElfStatus::NO_ERROR) { 489 return result; 490 } 491 492 if (auto elf = llvm::dyn_cast<llvm::object::ELF32LEObjectFile>(wrapper.obj)) { 493 return ReadMinExecutableVirtualAddress(elf->getELFFile(), min_vaddr); 494 } else if (auto elf = llvm::dyn_cast<llvm::object::ELF64LEObjectFile>(wrapper.obj)) { 495 return ReadMinExecutableVirtualAddress(elf->getELFFile(), min_vaddr); 496 } else { 497 return ElfStatus::FILE_MALFORMED; 498 } 499 } 500 501 ElfStatus ReadSectionFromElfFile(const std::string& filename, const std::string& section_name, 502 std::string* content) { 503 ElfStatus result = IsValidElfPath(filename); 504 if (result != ElfStatus::NO_ERROR) { 505 return result; 506 } 507 BinaryWrapper wrapper; 508 result = OpenObjectFile(filename, 0, 0, &wrapper); 509 if (result != ElfStatus::NO_ERROR) { 510 return result; 511 } 512 if (auto elf = llvm::dyn_cast<llvm::object::ELF32LEObjectFile>(wrapper.obj)) { 513 return ReadSectionFromELFFile(elf, section_name, content); 514 } else if (auto elf = llvm::dyn_cast<llvm::object::ELF64LEObjectFile>(wrapper.obj)) { 515 return ReadSectionFromELFFile(elf, section_name, content); 516 } else { 517 return ElfStatus::FILE_MALFORMED; 518 } 519 } 520