Home | History | Annotate | Download | only in cc
      1 /*
      2  * Copyright (c) 2016 GitHub, Inc.
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  * http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include <cxxabi.h>
     18 #include <cstring>
     19 #include <fcntl.h>
     20 #include <linux/elf.h>
     21 #include <string.h>
     22 #include <sys/stat.h>
     23 #include <sys/types.h>
     24 #include <unistd.h>
     25 #include <cstdio>
     26 
     27 #include "bcc_elf.h"
     28 #include "bcc_perf_map.h"
     29 #include "bcc_proc.h"
     30 #include "bcc_syms.h"
     31 #include "common.h"
     32 #include "vendor/tinyformat.hpp"
     33 
     34 #include "syms.h"
     35 
     36 ino_t ProcStat::getinode_() {
     37   struct stat s;
     38   return (!stat(procfs_.c_str(), &s)) ? s.st_ino : -1;
     39 }
     40 
     41 bool ProcStat::is_stale() {
     42   ino_t cur_inode = getinode_();
     43   return (cur_inode > 0) && (cur_inode != inode_);
     44 }
     45 
     46 ProcStat::ProcStat(int pid)
     47     : procfs_(tfm::format("/proc/%d/exe", pid)), inode_(getinode_()) {}
     48 
     49 void KSyms::_add_symbol(const char *symname, uint64_t addr, void *p) {
     50   KSyms *ks = static_cast<KSyms *>(p);
     51   ks->syms_.emplace_back(symname, addr);
     52 }
     53 
     54 void KSyms::refresh() {
     55   if (syms_.empty()) {
     56     bcc_procutils_each_ksym(_add_symbol, this);
     57     std::sort(syms_.begin(), syms_.end());
     58   }
     59 }
     60 
     61 bool KSyms::resolve_addr(uint64_t addr, struct bcc_symbol *sym, bool demangle) {
     62   refresh();
     63 
     64   std::vector<Symbol>::iterator it;
     65 
     66   if (syms_.empty())
     67     goto unknown_symbol;
     68 
     69   it = std::upper_bound(syms_.begin(), syms_.end(), Symbol("", addr));
     70   if (it != syms_.begin()) {
     71     it--;
     72     sym->name = (*it).name.c_str();
     73     if (demangle)
     74       sym->demangle_name = sym->name;
     75     sym->module = "kernel";
     76     sym->offset = addr - (*it).addr;
     77     return true;
     78   }
     79 
     80 unknown_symbol:
     81   memset(sym, 0, sizeof(struct bcc_symbol));
     82   return false;
     83 }
     84 
     85 bool KSyms::resolve_name(const char *_unused, const char *name,
     86                          uint64_t *addr) {
     87   refresh();
     88 
     89   if (syms_.size() != symnames_.size()) {
     90     symnames_.clear();
     91     for (Symbol &sym : syms_) {
     92       symnames_[sym.name] = sym.addr;
     93     }
     94   }
     95 
     96   auto it = symnames_.find(name);
     97   if (it == symnames_.end())
     98     return false;
     99 
    100   *addr = it->second;
    101   return true;
    102 }
    103 
    104 ProcSyms::ProcSyms(int pid, struct bcc_symbol_option *option)
    105     : pid_(pid), procstat_(pid), mount_ns_instance_(new ProcMountNS(pid_)) {
    106   if (option)
    107     std::memcpy(&symbol_option_, option, sizeof(bcc_symbol_option));
    108   else
    109     symbol_option_ = {
    110       .use_debug_file = 1,
    111       .check_debug_file_crc = 1,
    112       .use_symbol_type = (1 << STT_FUNC) | (1 << STT_GNU_IFUNC)
    113     };
    114   load_modules();
    115 }
    116 
    117 int ProcSyms::_add_load_sections(uint64_t v_addr, uint64_t mem_sz,
    118                                  uint64_t file_offset, void *payload) {
    119   auto module = static_cast<Module *>(payload);
    120   module->ranges_.emplace_back(v_addr, v_addr + mem_sz, file_offset);
    121   return 0;
    122 }
    123 
    124 void ProcSyms::load_exe() {
    125   std::string exe = ebpf::get_pid_exe(pid_);
    126   Module module(exe.c_str(), mount_ns_instance_.get(), &symbol_option_);
    127 
    128   if (module.type_ != ModuleType::EXEC)
    129     return;
    130 
    131   ProcMountNSGuard g(mount_ns_instance_.get());
    132 
    133   bcc_elf_foreach_load_section(exe.c_str(), &_add_load_sections, &module);
    134 
    135   if (!module.ranges_.empty())
    136     modules_.emplace_back(std::move(module));
    137 }
    138 
    139 void ProcSyms::load_modules() {
    140   load_exe();
    141   bcc_procutils_each_module(pid_, _add_module, this);
    142 }
    143 
    144 void ProcSyms::refresh() {
    145   modules_.clear();
    146   mount_ns_instance_.reset(new ProcMountNS(pid_));
    147   load_modules();
    148   procstat_.reset();
    149 }
    150 
    151 int ProcSyms::_add_module(const char *modname, uint64_t start, uint64_t end,
    152                           uint64_t offset, bool check_mount_ns, void *payload) {
    153   ProcSyms *ps = static_cast<ProcSyms *>(payload);
    154   auto it = std::find_if(
    155       ps->modules_.begin(), ps->modules_.end(),
    156       [=](const ProcSyms::Module &m) { return m.name_ == modname; });
    157   if (it == ps->modules_.end()) {
    158     auto module = Module(
    159         modname, check_mount_ns ? ps->mount_ns_instance_.get() : nullptr,
    160         &ps->symbol_option_);
    161 
    162     // pid/maps doesn't account for file_offset of text within the ELF.
    163     // It only gives the mmap offset. We need the real offset for symbol
    164     // lookup.
    165     if (module.type_ == ModuleType::SO) {
    166       if (bcc_elf_get_text_scn_info(modname, &module.elf_so_addr_,
    167                                     &module.elf_so_offset_) < 0) {
    168         fprintf(stderr, "WARNING: Couldn't find .text section in %s\n", modname);
    169         fprintf(stderr, "WARNING: BCC can't handle sym look ups for %s", modname);
    170       }
    171     }
    172 
    173     if (!bcc_is_perf_map(modname) || module.type_ != ModuleType::UNKNOWN)
    174       // Always add the module even if we can't read it, so that we could
    175       // report correct module name. Unless it's a perf map that we only
    176       // add readable ones.
    177       it = ps->modules_.insert(ps->modules_.end(), std::move(module));
    178     else
    179       return 0;
    180   }
    181   it->ranges_.emplace_back(start, end, offset);
    182   // perf-PID map is added last. We try both inside the Process's mount
    183   // namespace + chroot, and in global /tmp. Make sure we only add one.
    184   if (it->type_ == ModuleType::PERF_MAP)
    185     return -1;
    186 
    187   return 0;
    188 }
    189 
    190 bool ProcSyms::resolve_addr(uint64_t addr, struct bcc_symbol *sym,
    191                             bool demangle) {
    192   if (procstat_.is_stale())
    193     refresh();
    194 
    195   memset(sym, 0, sizeof(struct bcc_symbol));
    196 
    197   const char *original_module = nullptr;
    198   uint64_t offset;
    199   bool only_perf_map = false;
    200   for (Module &mod : modules_) {
    201     if (only_perf_map && (mod.type_ != ModuleType::PERF_MAP))
    202       continue;
    203     if (mod.contains(addr, offset)) {
    204       if (mod.find_addr(offset, sym)) {
    205         if (demangle) {
    206           if (sym->name && (!strncmp(sym->name, "_Z", 2) || !strncmp(sym->name, "___Z", 4)))
    207             sym->demangle_name =
    208                 abi::__cxa_demangle(sym->name, nullptr, nullptr, nullptr);
    209           if (!sym->demangle_name)
    210             sym->demangle_name = sym->name;
    211         }
    212         return true;
    213       } else if (mod.type_ != ModuleType::PERF_MAP) {
    214         // In this case, we found the address in the range of a module, but
    215         // not able to find a symbol of that address in the module.
    216         // Thus, we would try to find the address in perf map, and
    217         // save the module's name in case we will need it later.
    218         original_module = mod.name_.c_str();
    219         only_perf_map = true;
    220       }
    221     }
    222   }
    223   // If we didn't find the symbol anywhere, the module name is probably
    224   // set to be the perf map's name as it would be the last we tried.
    225   // In this case, if we have found the address previously in a module,
    226   // report the saved original module name instead.
    227   if (original_module)
    228     sym->module = original_module;
    229   return false;
    230 }
    231 
    232 bool ProcSyms::resolve_name(const char *module, const char *name,
    233                             uint64_t *addr) {
    234   if (procstat_.is_stale())
    235     refresh();
    236 
    237   for (Module &mod : modules_) {
    238     if (mod.name_ == module)
    239       return mod.find_name(name, addr);
    240   }
    241   return false;
    242 }
    243 
    244 ProcSyms::Module::Module(const char *name, ProcMountNS *mount_ns,
    245                          struct bcc_symbol_option *option)
    246     : name_(name),
    247       loaded_(false),
    248       mount_ns_(mount_ns),
    249       symbol_option_(option),
    250       type_(ModuleType::UNKNOWN) {
    251   ProcMountNSGuard g(mount_ns_);
    252   int elf_type = bcc_elf_get_type(name_.c_str());
    253   // The Module is an ELF file
    254   if (elf_type >= 0) {
    255     if (elf_type == ET_EXEC)
    256       type_ = ModuleType::EXEC;
    257     else if (elf_type == ET_DYN)
    258       type_ = ModuleType::SO;
    259     return;
    260   }
    261   // Other symbol files
    262   if (bcc_is_valid_perf_map(name_.c_str()) == 1)
    263     type_ = ModuleType::PERF_MAP;
    264   else if (bcc_elf_is_vdso(name_.c_str()) == 1)
    265     type_ = ModuleType::VDSO;
    266 
    267   // Will be stored later
    268   elf_so_offset_ = 0;
    269   elf_so_addr_ = 0;
    270 }
    271 
    272 int ProcSyms::Module::_add_symbol(const char *symname, uint64_t start,
    273                                   uint64_t size, void *p) {
    274   Module *m = static_cast<Module *>(p);
    275   auto res = m->symnames_.emplace(symname);
    276   m->syms_.emplace_back(&*(res.first), start, size);
    277   return 0;
    278 }
    279 
    280 void ProcSyms::Module::load_sym_table() {
    281   if (loaded_)
    282     return;
    283   loaded_ = true;
    284 
    285   if (type_ == ModuleType::UNKNOWN)
    286     return;
    287 
    288   ProcMountNSGuard g(mount_ns_);
    289 
    290   if (type_ == ModuleType::PERF_MAP)
    291     bcc_perf_map_foreach_sym(name_.c_str(), _add_symbol, this);
    292   if (type_ == ModuleType::EXEC || type_ == ModuleType::SO)
    293     bcc_elf_foreach_sym(name_.c_str(), _add_symbol, symbol_option_, this);
    294   if (type_ == ModuleType::VDSO)
    295     bcc_elf_foreach_vdso_sym(_add_symbol, this);
    296 
    297   std::sort(syms_.begin(), syms_.end());
    298 }
    299 
    300 bool ProcSyms::Module::contains(uint64_t addr, uint64_t &offset) const {
    301   for (const auto &range : ranges_) {
    302     if (addr >= range.start && addr < range.end) {
    303       if (type_ == ModuleType::SO || type_ == ModuleType::VDSO) {
    304         // Offset within the mmap
    305         offset = addr - range.start + range.file_offset;
    306 
    307         // Offset within the ELF for SO symbol lookup
    308         offset += (elf_so_addr_ - elf_so_offset_);
    309       } else {
    310         offset = addr;
    311       }
    312 
    313       return true;
    314     }
    315   }
    316 
    317   return false;
    318 }
    319 
    320 bool ProcSyms::Module::find_name(const char *symname, uint64_t *addr) {
    321   load_sym_table();
    322 
    323   for (Symbol &s : syms_) {
    324     if (*(s.name) == symname) {
    325       *addr = type_ == ModuleType::SO ? start() + s.start : s.start;
    326       return true;
    327     }
    328   }
    329   return false;
    330 }
    331 
    332 bool ProcSyms::Module::find_addr(uint64_t offset, struct bcc_symbol *sym) {
    333   load_sym_table();
    334 
    335   sym->module = name_.c_str();
    336   sym->offset = offset;
    337 
    338   auto it = std::upper_bound(syms_.begin(), syms_.end(), Symbol(nullptr, offset, 0));
    339   if (it == syms_.begin())
    340     return false;
    341 
    342   // 'it' points to the symbol whose start address is strictly greater than
    343   // the address we're looking for. Start stepping backwards as long as the
    344   // current symbol is still below the desired address, and see if the end
    345   // of the current symbol (start + size) is above the desired address. Once
    346   // we have a matching symbol, return it. Note that simply looking at '--it'
    347   // is not enough, because symbols can be nested. For example, we could be
    348   // looking for offset 0x12 with the following symbols available:
    349   // SYMBOL   START   SIZE    END
    350   // goo      0x0     0x6     0x0 + 0x6 = 0x6
    351   // foo      0x6     0x10    0x6 + 0x10 = 0x16
    352   // bar      0x8     0x4     0x8 + 0x4 = 0xc
    353   // baz      0x16    0x10    0x16 + 0x10 = 0x26
    354   // The upper_bound lookup will return baz, and then going one symbol back
    355   // brings us to bar, which does not contain offset 0x12 and is nested inside
    356   // foo. Going back one more symbol brings us to foo, which contains 0x12
    357   // and is a match.
    358   // However, we also don't want to walk through the entire symbol list for
    359   // unknown / missing symbols. So we will break if we reach a function that
    360   // doesn't cover the function immediately before 'it', which means it is
    361   // not possibly a nested function containing the address we're looking for.
    362   --it;
    363   uint64_t limit = it->start;
    364   for (; offset >= it->start; --it) {
    365     if (offset < it->start + it->size) {
    366       sym->name = it->name->c_str();
    367       sym->offset = (offset - it->start);
    368       return true;
    369     }
    370     if (limit > it->start + it->size)
    371       break;
    372     // But don't step beyond begin()!
    373     if (it == syms_.begin())
    374       break;
    375   }
    376 
    377   return false;
    378 }
    379 
    380 extern "C" {
    381 
    382 void *bcc_symcache_new(int pid, struct bcc_symbol_option *option) {
    383   if (pid < 0)
    384     return static_cast<void *>(new KSyms());
    385   return static_cast<void *>(new ProcSyms(pid, option));
    386 }
    387 
    388 void bcc_free_symcache(void *symcache, int pid) {
    389   if (pid < 0)
    390     delete static_cast<KSyms*>(symcache);
    391   else
    392     delete static_cast<ProcSyms*>(symcache);
    393 }
    394 
    395 void bcc_symbol_free_demangle_name(struct bcc_symbol *sym) {
    396   if (sym->demangle_name && (sym->demangle_name != sym->name))
    397     free(const_cast<char*>(sym->demangle_name));
    398 }
    399 
    400 int bcc_symcache_resolve(void *resolver, uint64_t addr,
    401                          struct bcc_symbol *sym) {
    402   SymbolCache *cache = static_cast<SymbolCache *>(resolver);
    403   return cache->resolve_addr(addr, sym) ? 0 : -1;
    404 }
    405 
    406 int bcc_symcache_resolve_no_demangle(void *resolver, uint64_t addr,
    407                                      struct bcc_symbol *sym) {
    408   SymbolCache *cache = static_cast<SymbolCache *>(resolver);
    409   return cache->resolve_addr(addr, sym, false) ? 0 : -1;
    410 }
    411 
    412 int bcc_symcache_resolve_name(void *resolver, const char *module,
    413                               const char *name, uint64_t *addr) {
    414   SymbolCache *cache = static_cast<SymbolCache *>(resolver);
    415   return cache->resolve_name(module, name, addr) ? 0 : -1;
    416 }
    417 
    418 void bcc_symcache_refresh(void *resolver) {
    419   SymbolCache *cache = static_cast<SymbolCache *>(resolver);
    420   cache->refresh();
    421 }
    422 
    423 struct mod_st {
    424   const char *name;
    425   uint64_t start;
    426   uint64_t file_offset;
    427 };
    428 
    429 static int _find_module(const char *modname, uint64_t start, uint64_t end,
    430                         uint64_t offset, bool, void *p) {
    431   struct mod_st *mod = (struct mod_st *)p;
    432   if (!strcmp(modname, mod->name)) {
    433     mod->start = start;
    434     mod->file_offset = offset;
    435     return -1;
    436   }
    437   return 0;
    438 }
    439 
    440 int bcc_resolve_global_addr(int pid, const char *module, const uint64_t address,
    441                             uint64_t *global) {
    442   struct mod_st mod = {module, 0x0};
    443   if (bcc_procutils_each_module(pid, _find_module, &mod) < 0 ||
    444       mod.start == 0x0)
    445     return -1;
    446 
    447   *global = mod.start - mod.file_offset + address;
    448   return 0;
    449 }
    450 
    451 static int _sym_cb_wrapper(const char *symname, uint64_t addr, uint64_t,
    452                            void *payload) {
    453   SYM_CB cb = (SYM_CB) payload;
    454   return cb(symname, addr);
    455 }
    456 
    457 int bcc_foreach_function_symbol(const char *module, SYM_CB cb) {
    458   if (module == 0 || cb == 0)
    459     return -1;
    460 
    461   static struct bcc_symbol_option default_option = {
    462     .use_debug_file = 1,
    463     .check_debug_file_crc = 1,
    464     .use_symbol_type = (1 << STT_FUNC) | (1 << STT_GNU_IFUNC)
    465   };
    466 
    467   return bcc_elf_foreach_sym(
    468       module, _sym_cb_wrapper, &default_option, (void *)cb);
    469 }
    470 
    471 static int _find_sym(const char *symname, uint64_t addr, uint64_t,
    472                      void *payload) {
    473   struct bcc_symbol *sym = (struct bcc_symbol *)payload;
    474   if (!strcmp(sym->name, symname)) {
    475     sym->offset = addr;
    476     return -1;
    477   }
    478   return 0;
    479 }
    480 
    481 struct load_addr_t {
    482   uint64_t target_addr;
    483   uint64_t binary_addr;
    484 };
    485 int _find_load(uint64_t v_addr, uint64_t mem_sz, uint64_t file_offset,
    486                        void *payload) {
    487   struct load_addr_t *addr = static_cast<load_addr_t *>(payload);
    488   if (addr->target_addr >= v_addr && addr->target_addr < (v_addr + mem_sz)) {
    489     addr->binary_addr = addr->target_addr - v_addr + file_offset;
    490     return -1;
    491   }
    492   return 0;
    493 }
    494 
    495 int bcc_resolve_symname(const char *module, const char *symname,
    496                         const uint64_t addr, int pid,
    497                         struct bcc_symbol_option *option,
    498                         struct bcc_symbol *sym) {
    499   static struct bcc_symbol_option default_option = {
    500     .use_debug_file = 1,
    501     .check_debug_file_crc = 1,
    502     .use_symbol_type = BCC_SYM_ALL_TYPES,
    503   };
    504 
    505   if (module == NULL)
    506     return -1;
    507 
    508   memset(sym, 0, sizeof(bcc_symbol));
    509 
    510   if (strchr(module, '/')) {
    511     sym->module = strdup(module);
    512   } else {
    513     sym->module = bcc_procutils_which_so(module, pid);
    514   }
    515   if (sym->module == NULL)
    516     return -1;
    517 
    518   ProcMountNSGuard g(pid);
    519 
    520   sym->name = symname;
    521   sym->offset = addr;
    522   if (option == NULL)
    523     option = &default_option;
    524 
    525   if (sym->name && sym->offset == 0x0)
    526     if (bcc_elf_foreach_sym(sym->module, _find_sym, option, sym) < 0)
    527       goto invalid_module;
    528   if (sym->offset == 0x0)
    529     goto invalid_module;
    530 
    531   // For executable (ET_EXEC) binaries, translate the virtual address
    532   // to physical address in the binary file.
    533   // For shared object binaries (ET_DYN), the address from symbol table should
    534   // already be physical address in the binary file.
    535   if (bcc_elf_get_type(sym->module) == ET_EXEC) {
    536     struct load_addr_t addr = {
    537       .target_addr = sym->offset,
    538       .binary_addr = 0x0,
    539     };
    540     if (bcc_elf_foreach_load_section(sym->module, &_find_load, &addr) < 0)
    541       goto invalid_module;
    542     if (!addr.binary_addr)
    543       goto invalid_module;
    544     sym->offset = addr.binary_addr;
    545   }
    546   return 0;
    547 
    548 invalid_module:
    549   if (sym->module) {
    550     ::free(const_cast<char*>(sym->module));
    551     sym->module = NULL;
    552   }
    553   return -1;
    554 }
    555 }
    556