1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "read_elf.h" 18 #include "read_apk.h" 19 20 #include <stdio.h> 21 #include <string.h> 22 #include <sys/stat.h> 23 #include <sys/types.h> 24 25 #include <algorithm> 26 #include <limits> 27 28 #include <android-base/file.h> 29 #include <android-base/logging.h> 30 31 #pragma clang diagnostic push 32 #pragma clang diagnostic ignored "-Wunused-parameter" 33 34 #include <llvm/ADT/StringRef.h> 35 #include <llvm/Object/Binary.h> 36 #include <llvm/Object/ELFObjectFile.h> 37 #include <llvm/Object/ObjectFile.h> 38 39 #pragma clang diagnostic pop 40 41 #include "utils.h" 42 43 #define ELF_NOTE_GNU "GNU" 44 #define NT_GNU_BUILD_ID 3 45 46 std::ostream& operator<<(std::ostream& os, const ElfStatus& status) { 47 switch (status) { 48 case ElfStatus::NO_ERROR: 49 os << "No error"; 50 break; 51 case ElfStatus::FILE_NOT_FOUND: 52 os << "File not found"; 53 break; 54 case ElfStatus::READ_FAILED: 55 os << "Read failed"; 56 break; 57 case ElfStatus::FILE_MALFORMED: 58 os << "Malformed file"; 59 break; 60 case ElfStatus::NO_SYMBOL_TABLE: 61 os << "No symbol table"; 62 break; 63 case ElfStatus::NO_BUILD_ID: 64 os << "No build id"; 65 break; 66 case ElfStatus::BUILD_ID_MISMATCH: 67 os << "Build id mismatch"; 68 break; 69 case ElfStatus::SECTION_NOT_FOUND: 70 os << "Section not found"; 71 break; 72 } 73 return os; 74 } 75 76 ElfStatus IsValidElfFile(int fd) { 77 static const char elf_magic[] = {0x7f, 'E', 'L', 'F'}; 78 char buf[4]; 79 if (!android::base::ReadFully(fd, buf, 4)) { 80 return ElfStatus::READ_FAILED; 81 } 82 if (memcmp(buf, elf_magic, 4) != 0) { 83 return ElfStatus::FILE_MALFORMED; 84 } 85 return ElfStatus::NO_ERROR; 86 } 87 88 ElfStatus IsValidElfPath(const std::string& filename) { 89 if (!IsRegularFile(filename)) { 90 return ElfStatus::FILE_NOT_FOUND; 91 } 92 std::string mode = std::string("rb") + CLOSE_ON_EXEC_MODE; 93 FILE* fp = fopen(filename.c_str(), mode.c_str()); 94 if (fp == nullptr) { 95 return ElfStatus::READ_FAILED; 96 } 97 ElfStatus result = IsValidElfFile(fileno(fp)); 98 fclose(fp); 99 return result; 100 } 101 102 bool GetBuildIdFromNoteSection(const char* section, size_t section_size, BuildId* build_id) { 103 const char* p = section; 104 const char* end = p + section_size; 105 while (p < end) { 106 if (p + 12 >= end) { 107 return false; 108 } 109 uint32_t namesz; 110 uint32_t descsz; 111 uint32_t type; 112 MoveFromBinaryFormat(namesz, p); 113 MoveFromBinaryFormat(descsz, p); 114 MoveFromBinaryFormat(type, p); 115 namesz = Align(namesz, 4); 116 descsz = Align(descsz, 4); 117 if ((type == NT_GNU_BUILD_ID) && (p < end) && (strcmp(p, ELF_NOTE_GNU) == 0)) { 118 const char* desc_start = p + namesz; 119 const char* desc_end = desc_start + descsz; 120 if (desc_start > p && desc_start < desc_end && desc_end <= end) { 121 *build_id = BuildId(p + namesz, descsz); 122 return true; 123 } else { 124 return false; 125 } 126 } 127 p += namesz + descsz; 128 } 129 return false; 130 } 131 132 ElfStatus GetBuildIdFromNoteFile(const std::string& filename, BuildId* build_id) { 133 std::string content; 134 if (!android::base::ReadFileToString(filename, &content)) { 135 return ElfStatus::READ_FAILED; 136 } 137 if (!GetBuildIdFromNoteSection(content.c_str(), content.size(), build_id)) { 138 return ElfStatus::NO_BUILD_ID; 139 } 140 return ElfStatus::NO_ERROR; 141 } 142 143 template <class ELFT> 144 ElfStatus GetBuildIdFromELFFile(const llvm::object::ELFObjectFile<ELFT>* elf, BuildId* build_id) { 145 for (auto it = elf->section_begin(); it != elf->section_end(); ++it) { 146 const llvm::object::ELFSectionRef& section_ref = *it; 147 if (section_ref.getType() == llvm::ELF::SHT_NOTE) { 148 llvm::StringRef data; 149 if (it->getContents(data)) { 150 return ElfStatus::READ_FAILED; 151 } 152 if (GetBuildIdFromNoteSection(data.data(), data.size(), build_id)) { 153 return ElfStatus::NO_ERROR; 154 } 155 } 156 } 157 return ElfStatus::NO_BUILD_ID; 158 } 159 160 static ElfStatus GetBuildIdFromObjectFile(llvm::object::ObjectFile* obj, BuildId* build_id) { 161 if (auto elf = llvm::dyn_cast<llvm::object::ELF32LEObjectFile>(obj)) { 162 return GetBuildIdFromELFFile(elf, build_id); 163 } else if (auto elf = llvm::dyn_cast<llvm::object::ELF64LEObjectFile>(obj)) { 164 return GetBuildIdFromELFFile(elf, build_id); 165 } 166 return ElfStatus::FILE_MALFORMED; 167 } 168 169 struct BinaryWrapper { 170 llvm::object::OwningBinary<llvm::object::Binary> binary; 171 llvm::object::ObjectFile* obj; 172 173 BinaryWrapper() : obj(nullptr) { 174 } 175 }; 176 177 static ElfStatus OpenObjectFile(const std::string& filename, uint64_t file_offset, 178 uint64_t file_size, BinaryWrapper* wrapper) { 179 FileHelper fhelper = FileHelper::OpenReadOnly(filename); 180 if (!fhelper) { 181 return ElfStatus::READ_FAILED; 182 } 183 if (file_size == 0) { 184 file_size = GetFileSize(filename); 185 if (file_size == 0) { 186 return ElfStatus::READ_FAILED; 187 } 188 } 189 auto buffer_or_err = llvm::MemoryBuffer::getOpenFileSlice(fhelper.fd(), filename, file_size, file_offset); 190 if (!buffer_or_err) { 191 return ElfStatus::READ_FAILED; 192 } 193 auto binary_or_err = llvm::object::createBinary(buffer_or_err.get()->getMemBufferRef()); 194 if (!binary_or_err) { 195 return ElfStatus::READ_FAILED; 196 } 197 wrapper->binary = llvm::object::OwningBinary<llvm::object::Binary>(std::move(binary_or_err.get()), 198 std::move(buffer_or_err.get())); 199 wrapper->obj = llvm::dyn_cast<llvm::object::ObjectFile>(wrapper->binary.getBinary()); 200 if (wrapper->obj == nullptr) { 201 return ElfStatus::FILE_MALFORMED; 202 } 203 return ElfStatus::NO_ERROR; 204 } 205 206 static ElfStatus OpenObjectFileFromString(const std::string& s, BinaryWrapper* wrapper) { 207 auto buffer = llvm::MemoryBuffer::getMemBuffer(s); 208 auto binary_or_err = llvm::object::createBinary(buffer->getMemBufferRef()); 209 if (!binary_or_err) { 210 return ElfStatus::FILE_MALFORMED; 211 } 212 wrapper->binary = llvm::object::OwningBinary<llvm::object::Binary>(std::move(binary_or_err.get()), 213 std::move(buffer)); 214 wrapper->obj = llvm::dyn_cast<llvm::object::ObjectFile>(wrapper->binary.getBinary()); 215 if (wrapper->obj == nullptr) { 216 return ElfStatus::FILE_MALFORMED; 217 } 218 return ElfStatus::NO_ERROR; 219 } 220 221 ElfStatus GetBuildIdFromElfFile(const std::string& filename, BuildId* build_id) { 222 ElfStatus result = IsValidElfPath(filename); 223 if (result != ElfStatus::NO_ERROR) { 224 return result; 225 } 226 return GetBuildIdFromEmbeddedElfFile(filename, 0, 0, build_id); 227 } 228 229 ElfStatus GetBuildIdFromEmbeddedElfFile(const std::string& filename, uint64_t file_offset, 230 uint32_t file_size, BuildId* build_id) { 231 BinaryWrapper wrapper; 232 ElfStatus result = OpenObjectFile(filename, file_offset, file_size, &wrapper); 233 if (result != ElfStatus::NO_ERROR) { 234 return result; 235 } 236 return GetBuildIdFromObjectFile(wrapper.obj, build_id); 237 } 238 239 template <class ELFT> 240 ElfStatus ReadSectionFromELFFile(const llvm::object::ELFObjectFile<ELFT>* elf, const std::string& section_name, 241 std::string* content) { 242 for (llvm::object::section_iterator it = elf->section_begin(); it != elf->section_end(); ++it) { 243 llvm::StringRef name; 244 if (it->getName(name) || name != section_name) { 245 continue; 246 } 247 llvm::StringRef data; 248 std::error_code err = it->getContents(data); 249 if (err) { 250 return ElfStatus::READ_FAILED; 251 } 252 *content = data; 253 return ElfStatus::NO_ERROR; 254 } 255 return ElfStatus::SECTION_NOT_FOUND; 256 } 257 258 bool IsArmMappingSymbol(const char* name) { 259 // Mapping symbols in arm, which are described in "ELF for ARM Architecture" and 260 // "ELF for ARM 64-bit Architecture". The regular expression to match mapping symbol 261 // is ^\$(a|d|t|x)(\..*)?$ 262 return name[0] == '$' && strchr("adtx", name[1]) != nullptr && (name[2] == '\0' || name[2] == '.'); 263 } 264 265 void ReadSymbolTable(llvm::object::symbol_iterator sym_begin, 266 llvm::object::symbol_iterator sym_end, 267 const std::function<void(const ElfFileSymbol&)>& callback, 268 bool is_arm) { 269 for (; sym_begin != sym_end; ++sym_begin) { 270 ElfFileSymbol symbol; 271 auto symbol_ref = static_cast<const llvm::object::ELFSymbolRef*>(&*sym_begin); 272 llvm::Expected<llvm::object::section_iterator> section_it_or_err = symbol_ref->getSection(); 273 if (!section_it_or_err) { 274 continue; 275 } 276 277 llvm::StringRef section_name; 278 if (section_it_or_err.get()->getName(section_name) || section_name.empty()) { 279 continue; 280 } 281 if (section_name == ".text") { 282 symbol.is_in_text_section = true; 283 } 284 llvm::Expected<llvm::StringRef> symbol_name_or_err = symbol_ref->getName(); 285 if (!symbol_name_or_err || symbol_name_or_err.get().empty()) { 286 continue; 287 } 288 289 symbol.name = symbol_name_or_err.get(); 290 symbol.vaddr = symbol_ref->getValue(); 291 if ((symbol.vaddr & 1) != 0 && is_arm) { 292 // Arm sets bit 0 to mark it as thumb code, remove the flag. 293 symbol.vaddr &= ~1; 294 } 295 symbol.len = symbol_ref->getSize(); 296 llvm::object::SymbolRef::Type symbol_type = *symbol_ref->getType(); 297 if (symbol_type == llvm::object::SymbolRef::ST_Function) { 298 symbol.is_func = true; 299 } else if (symbol_type == llvm::object::SymbolRef::ST_Unknown) { 300 if (symbol.is_in_text_section) { 301 symbol.is_label = true; 302 if (is_arm) { 303 // Remove mapping symbols in arm. 304 const char* p = (symbol.name.compare(0, linker_prefix.size(), linker_prefix) == 0) 305 ? symbol.name.c_str() + linker_prefix.size() 306 : symbol.name.c_str(); 307 if (IsArmMappingSymbol(p)) { 308 symbol.is_label = false; 309 } 310 } 311 } 312 } 313 314 callback(symbol); 315 } 316 } 317 318 template <class ELFT> 319 void AddSymbolForPltSection(const llvm::object::ELFObjectFile<ELFT>* elf, 320 const std::function<void(const ElfFileSymbol&)>& callback) { 321 // We may sample instructions in .plt section if the program 322 // calls functions from shared libraries. Different architectures use 323 // different formats to store .plt section, so it needs a lot of work to match 324 // instructions in .plt section to symbols. As samples in .plt section rarely 325 // happen, and .plt section can hardly be a performance bottleneck, we can 326 // just use a symbol @plt to represent instructions in .plt section. 327 for (auto it = elf->section_begin(); it != elf->section_end(); ++it) { 328 const llvm::object::ELFSectionRef& section_ref = *it; 329 llvm::StringRef section_name; 330 std::error_code err = section_ref.getName(section_name); 331 if (err || section_name != ".plt") { 332 continue; 333 } 334 const auto* shdr = elf->getSection(section_ref.getRawDataRefImpl()); 335 if (shdr == nullptr) { 336 return; 337 } 338 ElfFileSymbol symbol; 339 symbol.vaddr = shdr->sh_addr; 340 symbol.len = shdr->sh_size; 341 symbol.is_func = true; 342 symbol.is_label = true; 343 symbol.is_in_text_section = true; 344 symbol.name = "@plt"; 345 callback(symbol); 346 return; 347 } 348 } 349 350 template <class ELFT> 351 ElfStatus ParseSymbolsFromELFFile(const llvm::object::ELFObjectFile<ELFT>* elf, 352 const std::function<void(const ElfFileSymbol&)>& callback) { 353 auto machine = elf->getELFFile()->getHeader()->e_machine; 354 bool is_arm = (machine == llvm::ELF::EM_ARM || machine == llvm::ELF::EM_AARCH64); 355 AddSymbolForPltSection(elf, callback); 356 if (elf->symbol_begin() != elf->symbol_end()) { 357 ReadSymbolTable(elf->symbol_begin(), elf->symbol_end(), callback, is_arm); 358 return ElfStatus::NO_ERROR; 359 } else if (elf->dynamic_symbol_begin()->getRawDataRefImpl() != llvm::object::DataRefImpl()) { 360 ReadSymbolTable(elf->dynamic_symbol_begin(), elf->dynamic_symbol_end(), callback, is_arm); 361 } 362 std::string debugdata; 363 ElfStatus result = ReadSectionFromELFFile(elf, ".gnu_debugdata", &debugdata); 364 if (result == ElfStatus::SECTION_NOT_FOUND) { 365 return ElfStatus::NO_SYMBOL_TABLE; 366 } else if (result == ElfStatus::NO_ERROR) { 367 std::string decompressed_data; 368 if (XzDecompress(debugdata, &decompressed_data)) { 369 BinaryWrapper wrapper; 370 result = OpenObjectFileFromString(decompressed_data, &wrapper); 371 if (result == ElfStatus::NO_ERROR) { 372 if (auto elf = llvm::dyn_cast<llvm::object::ELF32LEObjectFile>(wrapper.obj)) { 373 return ParseSymbolsFromELFFile(elf, callback); 374 } else if (auto elf = llvm::dyn_cast<llvm::object::ELF64LEObjectFile>(wrapper.obj)) { 375 return ParseSymbolsFromELFFile(elf, callback); 376 } else { 377 return ElfStatus::FILE_MALFORMED; 378 } 379 } 380 } 381 } 382 return result; 383 } 384 385 ElfStatus MatchBuildId(llvm::object::ObjectFile* obj, const BuildId& expected_build_id) { 386 if (expected_build_id.IsEmpty()) { 387 return ElfStatus::NO_ERROR; 388 } 389 BuildId real_build_id; 390 ElfStatus result = GetBuildIdFromObjectFile(obj, &real_build_id); 391 if (result != ElfStatus::NO_ERROR) { 392 return result; 393 } 394 if (expected_build_id != real_build_id) { 395 return ElfStatus::BUILD_ID_MISMATCH; 396 } 397 return ElfStatus::NO_ERROR; 398 } 399 400 ElfStatus ParseSymbolsFromElfFile(const std::string& filename, 401 const BuildId& expected_build_id, 402 const std::function<void(const ElfFileSymbol&)>& callback) { 403 ElfStatus result = IsValidElfPath(filename); 404 if (result != ElfStatus::NO_ERROR) { 405 return result; 406 } 407 return ParseSymbolsFromEmbeddedElfFile(filename, 0, 0, expected_build_id, callback); 408 } 409 410 ElfStatus ParseSymbolsFromEmbeddedElfFile(const std::string& filename, uint64_t file_offset, 411 uint32_t file_size, const BuildId& expected_build_id, 412 const std::function<void(const ElfFileSymbol&)>& callback) { 413 BinaryWrapper wrapper; 414 ElfStatus result = OpenObjectFile(filename, file_offset, file_size, &wrapper); 415 if (result != ElfStatus::NO_ERROR) { 416 return result; 417 } 418 result = MatchBuildId(wrapper.obj, expected_build_id); 419 if (result != ElfStatus::NO_ERROR) { 420 return result; 421 } 422 if (auto elf = llvm::dyn_cast<llvm::object::ELF32LEObjectFile>(wrapper.obj)) { 423 return ParseSymbolsFromELFFile(elf, callback); 424 } else if (auto elf = llvm::dyn_cast<llvm::object::ELF64LEObjectFile>(wrapper.obj)) { 425 return ParseSymbolsFromELFFile(elf, callback); 426 } 427 return ElfStatus::FILE_MALFORMED; 428 } 429 430 template <class ELFT> 431 ElfStatus ReadMinExecutableVirtualAddress(const llvm::object::ELFFile<ELFT>* elf, uint64_t* p_vaddr) { 432 bool has_vaddr = false; 433 uint64_t min_addr = std::numeric_limits<uint64_t>::max(); 434 for (auto it = elf->program_header_begin(); it != elf->program_header_end(); ++it) { 435 if ((it->p_type == llvm::ELF::PT_LOAD) && (it->p_flags & llvm::ELF::PF_X)) { 436 if (it->p_vaddr < min_addr) { 437 min_addr = it->p_vaddr; 438 has_vaddr = true; 439 } 440 } 441 } 442 if (!has_vaddr) { 443 return ElfStatus::FILE_MALFORMED; 444 } 445 *p_vaddr = min_addr; 446 return ElfStatus::NO_ERROR; 447 } 448 449 ElfStatus ReadMinExecutableVirtualAddressFromElfFile(const std::string& filename, 450 const BuildId& expected_build_id, 451 uint64_t* min_vaddr) { 452 ElfStatus result = IsValidElfPath(filename); 453 if (result != ElfStatus::NO_ERROR) { 454 return result; 455 } 456 BinaryWrapper wrapper; 457 result = OpenObjectFile(filename, 0, 0, &wrapper); 458 if (result != ElfStatus::NO_ERROR) { 459 return result; 460 } 461 result = MatchBuildId(wrapper.obj, expected_build_id); 462 if (result != ElfStatus::NO_ERROR) { 463 return result; 464 } 465 466 if (auto elf = llvm::dyn_cast<llvm::object::ELF32LEObjectFile>(wrapper.obj)) { 467 return ReadMinExecutableVirtualAddress(elf->getELFFile(), min_vaddr); 468 } else if (auto elf = llvm::dyn_cast<llvm::object::ELF64LEObjectFile>(wrapper.obj)) { 469 return ReadMinExecutableVirtualAddress(elf->getELFFile(), min_vaddr); 470 } else { 471 return ElfStatus::FILE_MALFORMED; 472 } 473 } 474 475 ElfStatus ReadSectionFromElfFile(const std::string& filename, const std::string& section_name, 476 std::string* content) { 477 ElfStatus result = IsValidElfPath(filename); 478 if (result != ElfStatus::NO_ERROR) { 479 return result; 480 } 481 BinaryWrapper wrapper; 482 result = OpenObjectFile(filename, 0, 0, &wrapper); 483 if (result != ElfStatus::NO_ERROR) { 484 return result; 485 } 486 if (auto elf = llvm::dyn_cast<llvm::object::ELF32LEObjectFile>(wrapper.obj)) { 487 return ReadSectionFromELFFile(elf, section_name, content); 488 } else if (auto elf = llvm::dyn_cast<llvm::object::ELF64LEObjectFile>(wrapper.obj)) { 489 return ReadSectionFromELFFile(elf, section_name, content); 490 } else { 491 return ElfStatus::FILE_MALFORMED; 492 } 493 } 494