Home | History | Annotate | Download | only in simpleperf
      1 /*
      2  * Copyright (C) 2015 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "read_elf.h"
     18 #include "read_apk.h"
     19 
     20 #include <stdio.h>
     21 #include <string.h>
     22 #include <sys/stat.h>
     23 #include <sys/types.h>
     24 
     25 #include <algorithm>
     26 #include <limits>
     27 
     28 #include <android-base/file.h>
     29 #include <android-base/logging.h>
     30 
     31 #pragma clang diagnostic push
     32 #pragma clang diagnostic ignored "-Wunused-parameter"
     33 
     34 #include <llvm/ADT/StringRef.h>
     35 #include <llvm/Object/Binary.h>
     36 #include <llvm/Object/ELFObjectFile.h>
     37 #include <llvm/Object/ObjectFile.h>
     38 
     39 #pragma clang diagnostic pop
     40 
     41 #include "utils.h"
     42 
     43 #define ELF_NOTE_GNU "GNU"
     44 #define NT_GNU_BUILD_ID 3
     45 
     46 std::ostream& operator<<(std::ostream& os, const ElfStatus& status) {
     47   switch (status) {
     48     case ElfStatus::NO_ERROR:
     49       os << "No error";
     50       break;
     51     case ElfStatus::FILE_NOT_FOUND:
     52       os << "File not found";
     53       break;
     54     case ElfStatus::READ_FAILED:
     55       os << "Read failed";
     56       break;
     57     case ElfStatus::FILE_MALFORMED:
     58       os << "Malformed file";
     59       break;
     60     case ElfStatus::NO_SYMBOL_TABLE:
     61       os << "No symbol table";
     62       break;
     63     case ElfStatus::NO_BUILD_ID:
     64       os << "No build id";
     65       break;
     66     case ElfStatus::BUILD_ID_MISMATCH:
     67       os << "Build id mismatch";
     68       break;
     69     case ElfStatus::SECTION_NOT_FOUND:
     70       os << "Section not found";
     71       break;
     72   }
     73   return os;
     74 }
     75 
     76 ElfStatus IsValidElfFile(int fd) {
     77   static const char elf_magic[] = {0x7f, 'E', 'L', 'F'};
     78   char buf[4];
     79   if (!android::base::ReadFully(fd, buf, 4)) {
     80     return ElfStatus::READ_FAILED;
     81   }
     82   if (memcmp(buf, elf_magic, 4) != 0) {
     83     return ElfStatus::FILE_MALFORMED;
     84   }
     85   return ElfStatus::NO_ERROR;
     86 }
     87 
     88 ElfStatus IsValidElfPath(const std::string& filename) {
     89   if (!IsRegularFile(filename)) {
     90     return ElfStatus::FILE_NOT_FOUND;
     91   }
     92   std::string mode = std::string("rb") + CLOSE_ON_EXEC_MODE;
     93   FILE* fp = fopen(filename.c_str(), mode.c_str());
     94   if (fp == nullptr) {
     95     return ElfStatus::READ_FAILED;
     96   }
     97   ElfStatus result = IsValidElfFile(fileno(fp));
     98   fclose(fp);
     99   return result;
    100 }
    101 
    102 bool GetBuildIdFromNoteSection(const char* section, size_t section_size, BuildId* build_id) {
    103   const char* p = section;
    104   const char* end = p + section_size;
    105   while (p < end) {
    106     if (p + 12 >= end) {
    107       return false;
    108     }
    109     uint32_t namesz;
    110     uint32_t descsz;
    111     uint32_t type;
    112     MoveFromBinaryFormat(namesz, p);
    113     MoveFromBinaryFormat(descsz, p);
    114     MoveFromBinaryFormat(type, p);
    115     namesz = Align(namesz, 4);
    116     descsz = Align(descsz, 4);
    117     if ((type == NT_GNU_BUILD_ID) && (p < end) && (strcmp(p, ELF_NOTE_GNU) == 0)) {
    118       const char* desc_start = p + namesz;
    119       const char* desc_end = desc_start + descsz;
    120       if (desc_start > p && desc_start < desc_end && desc_end <= end) {
    121         *build_id = BuildId(p + namesz, descsz);
    122         return true;
    123       } else {
    124         return false;
    125       }
    126     }
    127     p += namesz + descsz;
    128   }
    129   return false;
    130 }
    131 
    132 ElfStatus GetBuildIdFromNoteFile(const std::string& filename, BuildId* build_id) {
    133   std::string content;
    134   if (!android::base::ReadFileToString(filename, &content)) {
    135     return ElfStatus::READ_FAILED;
    136   }
    137   if (!GetBuildIdFromNoteSection(content.c_str(), content.size(), build_id)) {
    138     return ElfStatus::NO_BUILD_ID;
    139   }
    140   return ElfStatus::NO_ERROR;
    141 }
    142 
    143 template <class ELFT>
    144 ElfStatus GetBuildIdFromELFFile(const llvm::object::ELFObjectFile<ELFT>* elf, BuildId* build_id) {
    145   for (auto it = elf->section_begin(); it != elf->section_end(); ++it) {
    146     const llvm::object::ELFSectionRef& section_ref = *it;
    147     if (section_ref.getType() == llvm::ELF::SHT_NOTE) {
    148       llvm::StringRef data;
    149       if (it->getContents(data)) {
    150         return ElfStatus::READ_FAILED;
    151       }
    152       if (GetBuildIdFromNoteSection(data.data(), data.size(), build_id)) {
    153         return ElfStatus::NO_ERROR;
    154       }
    155     }
    156   }
    157   return ElfStatus::NO_BUILD_ID;
    158 }
    159 
    160 static ElfStatus GetBuildIdFromObjectFile(llvm::object::ObjectFile* obj, BuildId* build_id) {
    161   if (auto elf = llvm::dyn_cast<llvm::object::ELF32LEObjectFile>(obj)) {
    162     return GetBuildIdFromELFFile(elf, build_id);
    163   } else if (auto elf = llvm::dyn_cast<llvm::object::ELF64LEObjectFile>(obj)) {
    164     return GetBuildIdFromELFFile(elf, build_id);
    165   }
    166   return ElfStatus::FILE_MALFORMED;
    167 }
    168 
    169 struct BinaryWrapper {
    170   llvm::object::OwningBinary<llvm::object::Binary> binary;
    171   llvm::object::ObjectFile* obj;
    172 
    173   BinaryWrapper() : obj(nullptr) {
    174   }
    175 };
    176 
    177 static ElfStatus OpenObjectFile(const std::string& filename, uint64_t file_offset,
    178                                 uint64_t file_size, BinaryWrapper* wrapper) {
    179   FileHelper fhelper = FileHelper::OpenReadOnly(filename);
    180   if (!fhelper) {
    181     return ElfStatus::READ_FAILED;
    182   }
    183   if (file_size == 0) {
    184     file_size = GetFileSize(filename);
    185     if (file_size == 0) {
    186       return ElfStatus::READ_FAILED;
    187     }
    188   }
    189   auto buffer_or_err = llvm::MemoryBuffer::getOpenFileSlice(fhelper.fd(), filename, file_size, file_offset);
    190   if (!buffer_or_err) {
    191     return ElfStatus::READ_FAILED;
    192   }
    193   auto binary_or_err = llvm::object::createBinary(buffer_or_err.get()->getMemBufferRef());
    194   if (!binary_or_err) {
    195     return ElfStatus::READ_FAILED;
    196   }
    197   wrapper->binary = llvm::object::OwningBinary<llvm::object::Binary>(std::move(binary_or_err.get()),
    198                                                                         std::move(buffer_or_err.get()));
    199   wrapper->obj = llvm::dyn_cast<llvm::object::ObjectFile>(wrapper->binary.getBinary());
    200   if (wrapper->obj == nullptr) {
    201     return ElfStatus::FILE_MALFORMED;
    202   }
    203   return ElfStatus::NO_ERROR;
    204 }
    205 
    206 static ElfStatus OpenObjectFileFromString(const std::string& s, BinaryWrapper* wrapper) {
    207   auto buffer = llvm::MemoryBuffer::getMemBuffer(s);
    208   auto binary_or_err = llvm::object::createBinary(buffer->getMemBufferRef());
    209   if (!binary_or_err) {
    210     return ElfStatus::FILE_MALFORMED;
    211   }
    212   wrapper->binary = llvm::object::OwningBinary<llvm::object::Binary>(std::move(binary_or_err.get()),
    213                                                                 std::move(buffer));
    214   wrapper->obj = llvm::dyn_cast<llvm::object::ObjectFile>(wrapper->binary.getBinary());
    215   if (wrapper->obj == nullptr) {
    216     return ElfStatus::FILE_MALFORMED;
    217   }
    218   return ElfStatus::NO_ERROR;
    219 }
    220 
    221 ElfStatus GetBuildIdFromElfFile(const std::string& filename, BuildId* build_id) {
    222   ElfStatus result = IsValidElfPath(filename);
    223   if (result != ElfStatus::NO_ERROR) {
    224     return result;
    225   }
    226   return GetBuildIdFromEmbeddedElfFile(filename, 0, 0, build_id);
    227 }
    228 
    229 ElfStatus GetBuildIdFromEmbeddedElfFile(const std::string& filename, uint64_t file_offset,
    230                                         uint32_t file_size, BuildId* build_id) {
    231   BinaryWrapper wrapper;
    232   ElfStatus result = OpenObjectFile(filename, file_offset, file_size, &wrapper);
    233   if (result != ElfStatus::NO_ERROR) {
    234     return result;
    235   }
    236   return GetBuildIdFromObjectFile(wrapper.obj, build_id);
    237 }
    238 
    239 template <class ELFT>
    240 ElfStatus ReadSectionFromELFFile(const llvm::object::ELFObjectFile<ELFT>* elf, const std::string& section_name,
    241                                  std::string* content) {
    242   for (llvm::object::section_iterator it = elf->section_begin(); it != elf->section_end(); ++it) {
    243     llvm::StringRef name;
    244     if (it->getName(name) || name != section_name) {
    245       continue;
    246     }
    247     llvm::StringRef data;
    248     std::error_code err = it->getContents(data);
    249     if (err) {
    250       return ElfStatus::READ_FAILED;
    251     }
    252     *content = data;
    253     return ElfStatus::NO_ERROR;
    254   }
    255   return ElfStatus::SECTION_NOT_FOUND;
    256 }
    257 
    258 bool IsArmMappingSymbol(const char* name) {
    259   // Mapping symbols in arm, which are described in "ELF for ARM Architecture" and
    260   // "ELF for ARM 64-bit Architecture". The regular expression to match mapping symbol
    261   // is ^\$(a|d|t|x)(\..*)?$
    262   return name[0] == '$' && strchr("adtx", name[1]) != nullptr && (name[2] == '\0' || name[2] == '.');
    263 }
    264 
    265 void ReadSymbolTable(llvm::object::symbol_iterator sym_begin,
    266                      llvm::object::symbol_iterator sym_end,
    267                      const std::function<void(const ElfFileSymbol&)>& callback,
    268                      bool is_arm) {
    269   for (; sym_begin != sym_end; ++sym_begin) {
    270     ElfFileSymbol symbol;
    271     auto symbol_ref = static_cast<const llvm::object::ELFSymbolRef*>(&*sym_begin);
    272     llvm::Expected<llvm::object::section_iterator> section_it_or_err = symbol_ref->getSection();
    273     if (!section_it_or_err) {
    274       continue;
    275     }
    276 
    277     llvm::StringRef section_name;
    278     if (section_it_or_err.get()->getName(section_name) || section_name.empty()) {
    279       continue;
    280     }
    281     if (section_name == ".text") {
    282       symbol.is_in_text_section = true;
    283     }
    284     llvm::Expected<llvm::StringRef> symbol_name_or_err = symbol_ref->getName();
    285     if (!symbol_name_or_err || symbol_name_or_err.get().empty()) {
    286       continue;
    287     }
    288 
    289     symbol.name = symbol_name_or_err.get();
    290     symbol.vaddr = symbol_ref->getValue();
    291     if ((symbol.vaddr & 1) != 0 && is_arm) {
    292       // Arm sets bit 0 to mark it as thumb code, remove the flag.
    293       symbol.vaddr &= ~1;
    294     }
    295     symbol.len = symbol_ref->getSize();
    296     llvm::object::SymbolRef::Type symbol_type = *symbol_ref->getType();
    297     if (symbol_type == llvm::object::SymbolRef::ST_Function) {
    298       symbol.is_func = true;
    299     } else if (symbol_type == llvm::object::SymbolRef::ST_Unknown) {
    300       if (symbol.is_in_text_section) {
    301         symbol.is_label = true;
    302         if (is_arm) {
    303           // Remove mapping symbols in arm.
    304           const char* p = (symbol.name.compare(0, linker_prefix.size(), linker_prefix) == 0)
    305                               ? symbol.name.c_str() + linker_prefix.size()
    306                               : symbol.name.c_str();
    307           if (IsArmMappingSymbol(p)) {
    308             symbol.is_label = false;
    309           }
    310         }
    311       }
    312     }
    313 
    314     callback(symbol);
    315   }
    316 }
    317 
    318 template <class ELFT>
    319 void AddSymbolForPltSection(const llvm::object::ELFObjectFile<ELFT>* elf,
    320                             const std::function<void(const ElfFileSymbol&)>& callback) {
    321   // We may sample instructions in .plt section if the program
    322   // calls functions from shared libraries. Different architectures use
    323   // different formats to store .plt section, so it needs a lot of work to match
    324   // instructions in .plt section to symbols. As samples in .plt section rarely
    325   // happen, and .plt section can hardly be a performance bottleneck, we can
    326   // just use a symbol @plt to represent instructions in .plt section.
    327   for (auto it = elf->section_begin(); it != elf->section_end(); ++it) {
    328     const llvm::object::ELFSectionRef& section_ref = *it;
    329     llvm::StringRef section_name;
    330     std::error_code err = section_ref.getName(section_name);
    331     if (err || section_name != ".plt") {
    332       continue;
    333     }
    334     const auto* shdr = elf->getSection(section_ref.getRawDataRefImpl());
    335     if (shdr == nullptr) {
    336       return;
    337     }
    338     ElfFileSymbol symbol;
    339     symbol.vaddr = shdr->sh_addr;
    340     symbol.len = shdr->sh_size;
    341     symbol.is_func = true;
    342     symbol.is_label = true;
    343     symbol.is_in_text_section = true;
    344     symbol.name = "@plt";
    345     callback(symbol);
    346     return;
    347   }
    348 }
    349 
    350 template <class ELFT>
    351 void CheckSymbolSections(const llvm::object::ELFObjectFile<ELFT>* elf,
    352                          bool* has_symtab, bool* has_dynsym) {
    353   *has_symtab = false;
    354   *has_dynsym = false;
    355   for (auto it = elf->section_begin(); it != elf->section_end(); ++it) {
    356     const llvm::object::ELFSectionRef& section_ref = *it;
    357     llvm::StringRef section_name;
    358     std::error_code err = section_ref.getName(section_name);
    359     if (err) {
    360       continue;
    361     }
    362     if (section_name == ".dynsym") {
    363       *has_dynsym = true;
    364     } else if (section_name == ".symtab") {
    365       *has_symtab = true;
    366     }
    367   }
    368 }
    369 
    370 template <class ELFT>
    371 ElfStatus ParseSymbolsFromELFFile(const llvm::object::ELFObjectFile<ELFT>* elf,
    372                                   const std::function<void(const ElfFileSymbol&)>& callback) {
    373   auto machine = elf->getELFFile()->getHeader()->e_machine;
    374   bool is_arm = (machine == llvm::ELF::EM_ARM || machine == llvm::ELF::EM_AARCH64);
    375   AddSymbolForPltSection(elf, callback);
    376   // Some applications deliberately ship elf files with broken section tables.
    377   // So check the existence of .symtab section and .dynsym section before reading symbols.
    378   bool has_symtab;
    379   bool has_dynsym;
    380   CheckSymbolSections(elf, &has_symtab, &has_dynsym);
    381   if (has_symtab && elf->symbol_begin() != elf->symbol_end()) {
    382     ReadSymbolTable(elf->symbol_begin(), elf->symbol_end(), callback, is_arm);
    383     return ElfStatus::NO_ERROR;
    384   } else if (has_dynsym &&
    385       elf->dynamic_symbol_begin()->getRawDataRefImpl() != llvm::object::DataRefImpl()) {
    386     ReadSymbolTable(elf->dynamic_symbol_begin(), elf->dynamic_symbol_end(), callback, is_arm);
    387   }
    388   std::string debugdata;
    389   ElfStatus result = ReadSectionFromELFFile(elf, ".gnu_debugdata", &debugdata);
    390   if (result == ElfStatus::SECTION_NOT_FOUND) {
    391     return ElfStatus::NO_SYMBOL_TABLE;
    392   } else if (result == ElfStatus::NO_ERROR) {
    393     std::string decompressed_data;
    394     if (XzDecompress(debugdata, &decompressed_data)) {
    395       BinaryWrapper wrapper;
    396       result = OpenObjectFileFromString(decompressed_data, &wrapper);
    397       if (result == ElfStatus::NO_ERROR) {
    398         if (auto elf = llvm::dyn_cast<llvm::object::ELF32LEObjectFile>(wrapper.obj)) {
    399           return ParseSymbolsFromELFFile(elf, callback);
    400         } else if (auto elf = llvm::dyn_cast<llvm::object::ELF64LEObjectFile>(wrapper.obj)) {
    401           return ParseSymbolsFromELFFile(elf, callback);
    402         } else {
    403           return ElfStatus::FILE_MALFORMED;
    404         }
    405       }
    406     }
    407   }
    408   return result;
    409 }
    410 
    411 ElfStatus MatchBuildId(llvm::object::ObjectFile* obj, const BuildId& expected_build_id) {
    412   if (expected_build_id.IsEmpty()) {
    413     return ElfStatus::NO_ERROR;
    414   }
    415   BuildId real_build_id;
    416   ElfStatus result = GetBuildIdFromObjectFile(obj, &real_build_id);
    417   if (result != ElfStatus::NO_ERROR) {
    418     return result;
    419   }
    420   if (expected_build_id != real_build_id) {
    421     return ElfStatus::BUILD_ID_MISMATCH;
    422   }
    423   return ElfStatus::NO_ERROR;
    424 }
    425 
    426 ElfStatus ParseSymbolsFromElfFile(const std::string& filename,
    427                                   const BuildId& expected_build_id,
    428                                   const std::function<void(const ElfFileSymbol&)>& callback) {
    429   ElfStatus result = IsValidElfPath(filename);
    430   if (result != ElfStatus::NO_ERROR) {
    431     return result;
    432   }
    433   return ParseSymbolsFromEmbeddedElfFile(filename, 0, 0, expected_build_id, callback);
    434 }
    435 
    436 ElfStatus ParseSymbolsFromEmbeddedElfFile(const std::string& filename, uint64_t file_offset,
    437                                      uint32_t file_size, const BuildId& expected_build_id,
    438                                      const std::function<void(const ElfFileSymbol&)>& callback) {
    439   BinaryWrapper wrapper;
    440   ElfStatus result = OpenObjectFile(filename, file_offset, file_size, &wrapper);
    441   if (result != ElfStatus::NO_ERROR) {
    442     return result;
    443   }
    444   result = MatchBuildId(wrapper.obj, expected_build_id);
    445   if (result != ElfStatus::NO_ERROR) {
    446     return result;
    447   }
    448   if (auto elf = llvm::dyn_cast<llvm::object::ELF32LEObjectFile>(wrapper.obj)) {
    449     return ParseSymbolsFromELFFile(elf, callback);
    450   } else if (auto elf = llvm::dyn_cast<llvm::object::ELF64LEObjectFile>(wrapper.obj)) {
    451     return ParseSymbolsFromELFFile(elf, callback);
    452   }
    453   return ElfStatus::FILE_MALFORMED;
    454 }
    455 
    456 template <class ELFT>
    457 ElfStatus ReadMinExecutableVirtualAddress(const llvm::object::ELFFile<ELFT>* elf, uint64_t* p_vaddr) {
    458   bool has_vaddr = false;
    459   uint64_t min_addr = std::numeric_limits<uint64_t>::max();
    460   for (auto it = elf->program_header_begin(); it != elf->program_header_end(); ++it) {
    461     if ((it->p_type == llvm::ELF::PT_LOAD) && (it->p_flags & llvm::ELF::PF_X)) {
    462       if (it->p_vaddr < min_addr) {
    463         min_addr = it->p_vaddr;
    464         has_vaddr = true;
    465       }
    466     }
    467   }
    468   if (!has_vaddr) {
    469     return ElfStatus::FILE_MALFORMED;
    470   }
    471   *p_vaddr = min_addr;
    472   return ElfStatus::NO_ERROR;
    473 }
    474 
    475 ElfStatus ReadMinExecutableVirtualAddressFromElfFile(const std::string& filename,
    476                                                      const BuildId& expected_build_id,
    477                                                      uint64_t* min_vaddr) {
    478   ElfStatus result = IsValidElfPath(filename);
    479   if (result != ElfStatus::NO_ERROR) {
    480     return result;
    481   }
    482   BinaryWrapper wrapper;
    483   result = OpenObjectFile(filename, 0, 0, &wrapper);
    484   if (result != ElfStatus::NO_ERROR) {
    485     return result;
    486   }
    487   result = MatchBuildId(wrapper.obj, expected_build_id);
    488   if (result != ElfStatus::NO_ERROR) {
    489     return result;
    490   }
    491 
    492   if (auto elf = llvm::dyn_cast<llvm::object::ELF32LEObjectFile>(wrapper.obj)) {
    493     return ReadMinExecutableVirtualAddress(elf->getELFFile(), min_vaddr);
    494   } else if (auto elf = llvm::dyn_cast<llvm::object::ELF64LEObjectFile>(wrapper.obj)) {
    495     return ReadMinExecutableVirtualAddress(elf->getELFFile(), min_vaddr);
    496   } else {
    497     return ElfStatus::FILE_MALFORMED;
    498   }
    499 }
    500 
    501 ElfStatus ReadSectionFromElfFile(const std::string& filename, const std::string& section_name,
    502                                  std::string* content) {
    503   ElfStatus result = IsValidElfPath(filename);
    504   if (result != ElfStatus::NO_ERROR) {
    505     return result;
    506   }
    507   BinaryWrapper wrapper;
    508   result = OpenObjectFile(filename, 0, 0, &wrapper);
    509   if (result != ElfStatus::NO_ERROR) {
    510     return result;
    511   }
    512   if (auto elf = llvm::dyn_cast<llvm::object::ELF32LEObjectFile>(wrapper.obj)) {
    513     return ReadSectionFromELFFile(elf, section_name, content);
    514   } else if (auto elf = llvm::dyn_cast<llvm::object::ELF64LEObjectFile>(wrapper.obj)) {
    515     return ReadSectionFromELFFile(elf, section_name, content);
    516   } else {
    517     return ElfStatus::FILE_MALFORMED;
    518   }
    519 }
    520