1 /* 2 * Copyright (c) 2016 GitHub, Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include <cxxabi.h> 18 #include <cstring> 19 #include <fcntl.h> 20 #include <linux/elf.h> 21 #include <string.h> 22 #include <sys/stat.h> 23 #include <sys/types.h> 24 #include <unistd.h> 25 #include <cstdio> 26 27 #include "bcc_elf.h" 28 #include "bcc_perf_map.h" 29 #include "bcc_proc.h" 30 #include "bcc_syms.h" 31 #include "common.h" 32 #include "vendor/tinyformat.hpp" 33 34 #include "syms.h" 35 36 ino_t ProcStat::getinode_() { 37 struct stat s; 38 return (!stat(procfs_.c_str(), &s)) ? s.st_ino : -1; 39 } 40 41 bool ProcStat::is_stale() { 42 ino_t cur_inode = getinode_(); 43 return (cur_inode > 0) && (cur_inode != inode_); 44 } 45 46 ProcStat::ProcStat(int pid) 47 : procfs_(tfm::format("/proc/%d/exe", pid)), inode_(getinode_()) {} 48 49 void KSyms::_add_symbol(const char *symname, uint64_t addr, void *p) { 50 KSyms *ks = static_cast<KSyms *>(p); 51 ks->syms_.emplace_back(symname, addr); 52 } 53 54 void KSyms::refresh() { 55 if (syms_.empty()) { 56 bcc_procutils_each_ksym(_add_symbol, this); 57 std::sort(syms_.begin(), syms_.end()); 58 } 59 } 60 61 bool KSyms::resolve_addr(uint64_t addr, struct bcc_symbol *sym, bool demangle) { 62 refresh(); 63 64 std::vector<Symbol>::iterator it; 65 66 if (syms_.empty()) 67 goto unknown_symbol; 68 69 it = std::upper_bound(syms_.begin(), syms_.end(), Symbol("", addr)); 70 if (it != syms_.begin()) { 71 it--; 72 sym->name = (*it).name.c_str(); 73 if (demangle) 74 sym->demangle_name = sym->name; 75 sym->module = "kernel"; 76 sym->offset = addr - (*it).addr; 77 return true; 78 } 79 80 unknown_symbol: 81 memset(sym, 0, sizeof(struct bcc_symbol)); 82 return false; 83 } 84 85 bool KSyms::resolve_name(const char *_unused, const char *name, 86 uint64_t *addr) { 87 refresh(); 88 89 if (syms_.size() != symnames_.size()) { 90 symnames_.clear(); 91 for (Symbol &sym : syms_) { 92 symnames_[sym.name] = sym.addr; 93 } 94 } 95 96 auto it = symnames_.find(name); 97 if (it == symnames_.end()) 98 return false; 99 100 *addr = it->second; 101 return true; 102 } 103 104 ProcSyms::ProcSyms(int pid, struct bcc_symbol_option *option) 105 : pid_(pid), procstat_(pid), mount_ns_instance_(new ProcMountNS(pid_)) { 106 if (option) 107 std::memcpy(&symbol_option_, option, sizeof(bcc_symbol_option)); 108 else 109 symbol_option_ = { 110 .use_debug_file = 1, 111 .check_debug_file_crc = 1, 112 .use_symbol_type = (1 << STT_FUNC) | (1 << STT_GNU_IFUNC) 113 }; 114 load_modules(); 115 } 116 117 int ProcSyms::_add_load_sections(uint64_t v_addr, uint64_t mem_sz, 118 uint64_t file_offset, void *payload) { 119 auto module = static_cast<Module *>(payload); 120 module->ranges_.emplace_back(v_addr, v_addr + mem_sz, file_offset); 121 return 0; 122 } 123 124 void ProcSyms::load_exe() { 125 std::string exe = ebpf::get_pid_exe(pid_); 126 Module module(exe.c_str(), mount_ns_instance_.get(), &symbol_option_); 127 128 if (module.type_ != ModuleType::EXEC) 129 return; 130 131 ProcMountNSGuard g(mount_ns_instance_.get()); 132 133 bcc_elf_foreach_load_section(exe.c_str(), &_add_load_sections, &module); 134 135 if (!module.ranges_.empty()) 136 modules_.emplace_back(std::move(module)); 137 } 138 139 void ProcSyms::load_modules() { 140 load_exe(); 141 bcc_procutils_each_module(pid_, _add_module, this); 142 } 143 144 void ProcSyms::refresh() { 145 modules_.clear(); 146 mount_ns_instance_.reset(new ProcMountNS(pid_)); 147 load_modules(); 148 procstat_.reset(); 149 } 150 151 int ProcSyms::_add_module(const char *modname, uint64_t start, uint64_t end, 152 uint64_t offset, bool check_mount_ns, void *payload) { 153 ProcSyms *ps = static_cast<ProcSyms *>(payload); 154 auto it = std::find_if( 155 ps->modules_.begin(), ps->modules_.end(), 156 [=](const ProcSyms::Module &m) { return m.name_ == modname; }); 157 if (it == ps->modules_.end()) { 158 auto module = Module( 159 modname, check_mount_ns ? ps->mount_ns_instance_.get() : nullptr, 160 &ps->symbol_option_); 161 162 // pid/maps doesn't account for file_offset of text within the ELF. 163 // It only gives the mmap offset. We need the real offset for symbol 164 // lookup. 165 if (module.type_ == ModuleType::SO) { 166 if (bcc_elf_get_text_scn_info(modname, &module.elf_so_addr_, 167 &module.elf_so_offset_) < 0) { 168 fprintf(stderr, "WARNING: Couldn't find .text section in %s\n", modname); 169 fprintf(stderr, "WARNING: BCC can't handle sym look ups for %s", modname); 170 } 171 } 172 173 if (!bcc_is_perf_map(modname) || module.type_ != ModuleType::UNKNOWN) 174 // Always add the module even if we can't read it, so that we could 175 // report correct module name. Unless it's a perf map that we only 176 // add readable ones. 177 it = ps->modules_.insert(ps->modules_.end(), std::move(module)); 178 else 179 return 0; 180 } 181 it->ranges_.emplace_back(start, end, offset); 182 // perf-PID map is added last. We try both inside the Process's mount 183 // namespace + chroot, and in global /tmp. Make sure we only add one. 184 if (it->type_ == ModuleType::PERF_MAP) 185 return -1; 186 187 return 0; 188 } 189 190 bool ProcSyms::resolve_addr(uint64_t addr, struct bcc_symbol *sym, 191 bool demangle) { 192 if (procstat_.is_stale()) 193 refresh(); 194 195 memset(sym, 0, sizeof(struct bcc_symbol)); 196 197 const char *original_module = nullptr; 198 uint64_t offset; 199 bool only_perf_map = false; 200 for (Module &mod : modules_) { 201 if (only_perf_map && (mod.type_ != ModuleType::PERF_MAP)) 202 continue; 203 if (mod.contains(addr, offset)) { 204 if (mod.find_addr(offset, sym)) { 205 if (demangle) { 206 if (sym->name && (!strncmp(sym->name, "_Z", 2) || !strncmp(sym->name, "___Z", 4))) 207 sym->demangle_name = 208 abi::__cxa_demangle(sym->name, nullptr, nullptr, nullptr); 209 if (!sym->demangle_name) 210 sym->demangle_name = sym->name; 211 } 212 return true; 213 } else if (mod.type_ != ModuleType::PERF_MAP) { 214 // In this case, we found the address in the range of a module, but 215 // not able to find a symbol of that address in the module. 216 // Thus, we would try to find the address in perf map, and 217 // save the module's name in case we will need it later. 218 original_module = mod.name_.c_str(); 219 only_perf_map = true; 220 } 221 } 222 } 223 // If we didn't find the symbol anywhere, the module name is probably 224 // set to be the perf map's name as it would be the last we tried. 225 // In this case, if we have found the address previously in a module, 226 // report the saved original module name instead. 227 if (original_module) 228 sym->module = original_module; 229 return false; 230 } 231 232 bool ProcSyms::resolve_name(const char *module, const char *name, 233 uint64_t *addr) { 234 if (procstat_.is_stale()) 235 refresh(); 236 237 for (Module &mod : modules_) { 238 if (mod.name_ == module) 239 return mod.find_name(name, addr); 240 } 241 return false; 242 } 243 244 ProcSyms::Module::Module(const char *name, ProcMountNS *mount_ns, 245 struct bcc_symbol_option *option) 246 : name_(name), 247 loaded_(false), 248 mount_ns_(mount_ns), 249 symbol_option_(option), 250 type_(ModuleType::UNKNOWN) { 251 ProcMountNSGuard g(mount_ns_); 252 int elf_type = bcc_elf_get_type(name_.c_str()); 253 // The Module is an ELF file 254 if (elf_type >= 0) { 255 if (elf_type == ET_EXEC) 256 type_ = ModuleType::EXEC; 257 else if (elf_type == ET_DYN) 258 type_ = ModuleType::SO; 259 return; 260 } 261 // Other symbol files 262 if (bcc_is_valid_perf_map(name_.c_str()) == 1) 263 type_ = ModuleType::PERF_MAP; 264 else if (bcc_elf_is_vdso(name_.c_str()) == 1) 265 type_ = ModuleType::VDSO; 266 267 // Will be stored later 268 elf_so_offset_ = 0; 269 elf_so_addr_ = 0; 270 } 271 272 int ProcSyms::Module::_add_symbol(const char *symname, uint64_t start, 273 uint64_t size, void *p) { 274 Module *m = static_cast<Module *>(p); 275 auto res = m->symnames_.emplace(symname); 276 m->syms_.emplace_back(&*(res.first), start, size); 277 return 0; 278 } 279 280 void ProcSyms::Module::load_sym_table() { 281 if (loaded_) 282 return; 283 loaded_ = true; 284 285 if (type_ == ModuleType::UNKNOWN) 286 return; 287 288 ProcMountNSGuard g(mount_ns_); 289 290 if (type_ == ModuleType::PERF_MAP) 291 bcc_perf_map_foreach_sym(name_.c_str(), _add_symbol, this); 292 if (type_ == ModuleType::EXEC || type_ == ModuleType::SO) 293 bcc_elf_foreach_sym(name_.c_str(), _add_symbol, symbol_option_, this); 294 if (type_ == ModuleType::VDSO) 295 bcc_elf_foreach_vdso_sym(_add_symbol, this); 296 297 std::sort(syms_.begin(), syms_.end()); 298 } 299 300 bool ProcSyms::Module::contains(uint64_t addr, uint64_t &offset) const { 301 for (const auto &range : ranges_) { 302 if (addr >= range.start && addr < range.end) { 303 if (type_ == ModuleType::SO || type_ == ModuleType::VDSO) { 304 // Offset within the mmap 305 offset = addr - range.start + range.file_offset; 306 307 // Offset within the ELF for SO symbol lookup 308 offset += (elf_so_addr_ - elf_so_offset_); 309 } else { 310 offset = addr; 311 } 312 313 return true; 314 } 315 } 316 317 return false; 318 } 319 320 bool ProcSyms::Module::find_name(const char *symname, uint64_t *addr) { 321 load_sym_table(); 322 323 for (Symbol &s : syms_) { 324 if (*(s.name) == symname) { 325 *addr = type_ == ModuleType::SO ? start() + s.start : s.start; 326 return true; 327 } 328 } 329 return false; 330 } 331 332 bool ProcSyms::Module::find_addr(uint64_t offset, struct bcc_symbol *sym) { 333 load_sym_table(); 334 335 sym->module = name_.c_str(); 336 sym->offset = offset; 337 338 auto it = std::upper_bound(syms_.begin(), syms_.end(), Symbol(nullptr, offset, 0)); 339 if (it == syms_.begin()) 340 return false; 341 342 // 'it' points to the symbol whose start address is strictly greater than 343 // the address we're looking for. Start stepping backwards as long as the 344 // current symbol is still below the desired address, and see if the end 345 // of the current symbol (start + size) is above the desired address. Once 346 // we have a matching symbol, return it. Note that simply looking at '--it' 347 // is not enough, because symbols can be nested. For example, we could be 348 // looking for offset 0x12 with the following symbols available: 349 // SYMBOL START SIZE END 350 // goo 0x0 0x6 0x0 + 0x6 = 0x6 351 // foo 0x6 0x10 0x6 + 0x10 = 0x16 352 // bar 0x8 0x4 0x8 + 0x4 = 0xc 353 // baz 0x16 0x10 0x16 + 0x10 = 0x26 354 // The upper_bound lookup will return baz, and then going one symbol back 355 // brings us to bar, which does not contain offset 0x12 and is nested inside 356 // foo. Going back one more symbol brings us to foo, which contains 0x12 357 // and is a match. 358 // However, we also don't want to walk through the entire symbol list for 359 // unknown / missing symbols. So we will break if we reach a function that 360 // doesn't cover the function immediately before 'it', which means it is 361 // not possibly a nested function containing the address we're looking for. 362 --it; 363 uint64_t limit = it->start; 364 for (; offset >= it->start; --it) { 365 if (offset < it->start + it->size) { 366 sym->name = it->name->c_str(); 367 sym->offset = (offset - it->start); 368 return true; 369 } 370 if (limit > it->start + it->size) 371 break; 372 // But don't step beyond begin()! 373 if (it == syms_.begin()) 374 break; 375 } 376 377 return false; 378 } 379 380 extern "C" { 381 382 void *bcc_symcache_new(int pid, struct bcc_symbol_option *option) { 383 if (pid < 0) 384 return static_cast<void *>(new KSyms()); 385 return static_cast<void *>(new ProcSyms(pid, option)); 386 } 387 388 void bcc_free_symcache(void *symcache, int pid) { 389 if (pid < 0) 390 delete static_cast<KSyms*>(symcache); 391 else 392 delete static_cast<ProcSyms*>(symcache); 393 } 394 395 void bcc_symbol_free_demangle_name(struct bcc_symbol *sym) { 396 if (sym->demangle_name && (sym->demangle_name != sym->name)) 397 free(const_cast<char*>(sym->demangle_name)); 398 } 399 400 int bcc_symcache_resolve(void *resolver, uint64_t addr, 401 struct bcc_symbol *sym) { 402 SymbolCache *cache = static_cast<SymbolCache *>(resolver); 403 return cache->resolve_addr(addr, sym) ? 0 : -1; 404 } 405 406 int bcc_symcache_resolve_no_demangle(void *resolver, uint64_t addr, 407 struct bcc_symbol *sym) { 408 SymbolCache *cache = static_cast<SymbolCache *>(resolver); 409 return cache->resolve_addr(addr, sym, false) ? 0 : -1; 410 } 411 412 int bcc_symcache_resolve_name(void *resolver, const char *module, 413 const char *name, uint64_t *addr) { 414 SymbolCache *cache = static_cast<SymbolCache *>(resolver); 415 return cache->resolve_name(module, name, addr) ? 0 : -1; 416 } 417 418 void bcc_symcache_refresh(void *resolver) { 419 SymbolCache *cache = static_cast<SymbolCache *>(resolver); 420 cache->refresh(); 421 } 422 423 struct mod_st { 424 const char *name; 425 uint64_t start; 426 uint64_t file_offset; 427 }; 428 429 static int _find_module(const char *modname, uint64_t start, uint64_t end, 430 uint64_t offset, bool, void *p) { 431 struct mod_st *mod = (struct mod_st *)p; 432 if (!strcmp(modname, mod->name)) { 433 mod->start = start; 434 mod->file_offset = offset; 435 return -1; 436 } 437 return 0; 438 } 439 440 int bcc_resolve_global_addr(int pid, const char *module, const uint64_t address, 441 uint64_t *global) { 442 struct mod_st mod = {module, 0x0}; 443 if (bcc_procutils_each_module(pid, _find_module, &mod) < 0 || 444 mod.start == 0x0) 445 return -1; 446 447 *global = mod.start - mod.file_offset + address; 448 return 0; 449 } 450 451 static int _sym_cb_wrapper(const char *symname, uint64_t addr, uint64_t, 452 void *payload) { 453 SYM_CB cb = (SYM_CB) payload; 454 return cb(symname, addr); 455 } 456 457 int bcc_foreach_function_symbol(const char *module, SYM_CB cb) { 458 if (module == 0 || cb == 0) 459 return -1; 460 461 static struct bcc_symbol_option default_option = { 462 .use_debug_file = 1, 463 .check_debug_file_crc = 1, 464 .use_symbol_type = (1 << STT_FUNC) | (1 << STT_GNU_IFUNC) 465 }; 466 467 return bcc_elf_foreach_sym( 468 module, _sym_cb_wrapper, &default_option, (void *)cb); 469 } 470 471 static int _find_sym(const char *symname, uint64_t addr, uint64_t, 472 void *payload) { 473 struct bcc_symbol *sym = (struct bcc_symbol *)payload; 474 if (!strcmp(sym->name, symname)) { 475 sym->offset = addr; 476 return -1; 477 } 478 return 0; 479 } 480 481 struct load_addr_t { 482 uint64_t target_addr; 483 uint64_t binary_addr; 484 }; 485 int _find_load(uint64_t v_addr, uint64_t mem_sz, uint64_t file_offset, 486 void *payload) { 487 struct load_addr_t *addr = static_cast<load_addr_t *>(payload); 488 if (addr->target_addr >= v_addr && addr->target_addr < (v_addr + mem_sz)) { 489 addr->binary_addr = addr->target_addr - v_addr + file_offset; 490 return -1; 491 } 492 return 0; 493 } 494 495 int bcc_resolve_symname(const char *module, const char *symname, 496 const uint64_t addr, int pid, 497 struct bcc_symbol_option *option, 498 struct bcc_symbol *sym) { 499 static struct bcc_symbol_option default_option = { 500 .use_debug_file = 1, 501 .check_debug_file_crc = 1, 502 .use_symbol_type = BCC_SYM_ALL_TYPES, 503 }; 504 505 if (module == NULL) 506 return -1; 507 508 memset(sym, 0, sizeof(bcc_symbol)); 509 510 if (strchr(module, '/')) { 511 sym->module = strdup(module); 512 } else { 513 sym->module = bcc_procutils_which_so(module, pid); 514 } 515 if (sym->module == NULL) 516 return -1; 517 518 ProcMountNSGuard g(pid); 519 520 sym->name = symname; 521 sym->offset = addr; 522 if (option == NULL) 523 option = &default_option; 524 525 if (sym->name && sym->offset == 0x0) 526 if (bcc_elf_foreach_sym(sym->module, _find_sym, option, sym) < 0) 527 goto invalid_module; 528 if (sym->offset == 0x0) 529 goto invalid_module; 530 531 // For executable (ET_EXEC) binaries, translate the virtual address 532 // to physical address in the binary file. 533 // For shared object binaries (ET_DYN), the address from symbol table should 534 // already be physical address in the binary file. 535 if (bcc_elf_get_type(sym->module) == ET_EXEC) { 536 struct load_addr_t addr = { 537 .target_addr = sym->offset, 538 .binary_addr = 0x0, 539 }; 540 if (bcc_elf_foreach_load_section(sym->module, &_find_load, &addr) < 0) 541 goto invalid_module; 542 if (!addr.binary_addr) 543 goto invalid_module; 544 sym->offset = addr.binary_addr; 545 } 546 return 0; 547 548 invalid_module: 549 if (sym->module) { 550 ::free(const_cast<char*>(sym->module)); 551 sym->module = NULL; 552 } 553 return -1; 554 } 555 } 556