1 /* 2 * Copyright (c) 2016 Facebook, Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include <fcntl.h> 18 #include <linux/elf.h> 19 #include <linux/perf_event.h> 20 #include <sys/epoll.h> 21 #include <unistd.h> 22 #include <cerrno> 23 #include <cinttypes> 24 #include <cstdint> 25 #include <cstring> 26 #include <iostream> 27 #include <memory> 28 29 #include "BPFTable.h" 30 31 #include "bcc_exception.h" 32 #include "bcc_syms.h" 33 #include "common.h" 34 #include "file_desc.h" 35 #include "libbpf.h" 36 #include "perf_reader.h" 37 38 namespace ebpf { 39 40 BPFTable::BPFTable(const TableDesc& desc) : BPFTableBase<void, void>(desc) {} 41 42 StatusTuple BPFTable::get_value(const std::string& key_str, 43 std::string& value_str) { 44 char key[desc.key_size]; 45 char value[desc.leaf_size]; 46 47 StatusTuple r(0); 48 49 r = string_to_key(key_str, key); 50 if (r.code() != 0) 51 return r; 52 53 if (!lookup(key, value)) 54 return StatusTuple(-1, "error getting value"); 55 56 return leaf_to_string(value, value_str); 57 } 58 59 StatusTuple BPFTable::get_value(const std::string& key_str, 60 std::vector<std::string>& value_str) { 61 size_t ncpus = get_possible_cpus().size(); 62 char key[desc.key_size]; 63 char value[desc.leaf_size * ncpus]; 64 65 StatusTuple r(0); 66 67 r = string_to_key(key_str, key); 68 if (r.code() != 0) 69 return r; 70 71 if (!lookup(key, value)) 72 return StatusTuple(-1, "error getting value"); 73 74 value_str.resize(ncpus); 75 76 for (size_t i = 0; i < ncpus; i++) { 77 r = leaf_to_string(value + i * desc.leaf_size, value_str.at(i)); 78 if (r.code() != 0) 79 return r; 80 } 81 return StatusTuple(0); 82 } 83 84 StatusTuple BPFTable::update_value(const std::string& key_str, 85 const std::string& value_str) { 86 char key[desc.key_size]; 87 char value[desc.leaf_size]; 88 89 StatusTuple r(0); 90 91 r = string_to_key(key_str, key); 92 if (r.code() != 0) 93 return r; 94 95 r = string_to_leaf(value_str, value); 96 if (r.code() != 0) 97 return r; 98 99 if (!update(key, value)) 100 return StatusTuple(-1, "error updating element"); 101 102 return StatusTuple(0); 103 } 104 105 StatusTuple BPFTable::update_value(const std::string& key_str, 106 const std::vector<std::string>& value_str) { 107 size_t ncpus = get_possible_cpus().size(); 108 char key[desc.key_size]; 109 char value[desc.leaf_size * ncpus]; 110 111 StatusTuple r(0); 112 113 r = string_to_key(key_str, key); 114 if (r.code() != 0) 115 return r; 116 117 if (value_str.size() != ncpus) 118 return StatusTuple(-1, "bad value size"); 119 120 for (size_t i = 0; i < ncpus; i++) { 121 r = string_to_leaf(value_str.at(i), value + i * desc.leaf_size); 122 if (r.code() != 0) 123 return r; 124 } 125 126 if (!update(key, value)) 127 return StatusTuple(-1, "error updating element"); 128 129 return StatusTuple(0); 130 } 131 132 StatusTuple BPFTable::remove_value(const std::string& key_str) { 133 char key[desc.key_size]; 134 135 StatusTuple r(0); 136 137 r = string_to_key(key_str, key); 138 if (r.code() != 0) 139 return r; 140 141 if (!remove(key)) 142 return StatusTuple(-1, "error removing element"); 143 144 return StatusTuple(0); 145 } 146 147 StatusTuple BPFTable::clear_table_non_atomic() { 148 if (desc.type == BPF_MAP_TYPE_HASH || desc.type == BPF_MAP_TYPE_PERCPU_HASH || 149 desc.type == BPF_MAP_TYPE_LRU_HASH || 150 desc.type == BPF_MAP_TYPE_PERCPU_HASH || 151 desc.type == BPF_MAP_TYPE_HASH_OF_MAPS) { 152 // For hash maps, use the first() interface (which uses get_next_key) to 153 // iterate through the map and clear elements 154 auto key = std::unique_ptr<void, decltype(::free)*>(::malloc(desc.key_size), 155 ::free); 156 157 while (this->first(key.get())) 158 if (!this->remove(key.get())) { 159 return StatusTuple(-1, 160 "Failed to delete element when clearing table %s", 161 desc.name.c_str()); 162 } 163 } else if (desc.type == BPF_MAP_TYPE_ARRAY || 164 desc.type == BPF_MAP_TYPE_PERCPU_ARRAY) { 165 return StatusTuple(-1, "Array map %s do not support clearing elements", 166 desc.name.c_str()); 167 } else if (desc.type == BPF_MAP_TYPE_PROG_ARRAY || 168 desc.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || 169 desc.type == BPF_MAP_TYPE_STACK_TRACE || 170 desc.type == BPF_MAP_TYPE_ARRAY_OF_MAPS) { 171 // For Stack-trace and FD arrays, just iterate over all indices 172 for (size_t i = 0; i < desc.max_entries; i++) { 173 this->remove(&i); 174 } 175 } else { 176 return StatusTuple(-1, "Clearing for map type of %s not supported yet", 177 desc.name.c_str()); 178 } 179 180 return StatusTuple(0); 181 } 182 183 StatusTuple BPFTable::get_table_offline( 184 std::vector<std::pair<std::string, std::string>> &res) { 185 StatusTuple r(0); 186 int err; 187 188 auto key = std::unique_ptr<void, decltype(::free)*>(::malloc(desc.key_size), 189 ::free); 190 auto value = std::unique_ptr<void, decltype(::free)*>(::malloc(desc.leaf_size), 191 ::free); 192 std::string key_str; 193 std::string value_str; 194 195 if (desc.type == BPF_MAP_TYPE_ARRAY || 196 desc.type == BPF_MAP_TYPE_PROG_ARRAY || 197 desc.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || 198 desc.type == BPF_MAP_TYPE_PERCPU_ARRAY || 199 desc.type == BPF_MAP_TYPE_CGROUP_ARRAY || 200 desc.type == BPF_MAP_TYPE_ARRAY_OF_MAPS || 201 desc.type == BPF_MAP_TYPE_DEVMAP || 202 desc.type == BPF_MAP_TYPE_CPUMAP || 203 desc.type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) { 204 // For arrays, just iterate over all indices 205 for (size_t i = 0; i < desc.max_entries; i++) { 206 err = bpf_lookup_elem(desc.fd, &i, value.get()); 207 if (err < 0 && errno == ENOENT) { 208 // Element is not present, skip it 209 continue; 210 } else if (err < 0) { 211 // Other error, abort 212 return StatusTuple(-1, "Error looking up value: %s", std::strerror(errno)); 213 } 214 215 r = key_to_string(&i, key_str); 216 if (r.code() != 0) 217 return r; 218 219 r = leaf_to_string(value.get(), value_str); 220 if (r.code() != 0) 221 return r; 222 res.emplace_back(key_str, value_str); 223 } 224 } else { 225 res.clear(); 226 // For other maps, try to use the first() and next() interfaces 227 if (!this->first(key.get())) 228 return StatusTuple(0); 229 230 while (true) { 231 if (!this->lookup(key.get(), value.get())) 232 break; 233 r = key_to_string(key.get(), key_str); 234 if (r.code() != 0) 235 return r; 236 237 r = leaf_to_string(value.get(), value_str); 238 if (r.code() != 0) 239 return r; 240 res.emplace_back(key_str, value_str); 241 if (!this->next(key.get(), key.get())) 242 break; 243 } 244 } 245 246 return StatusTuple(0); 247 } 248 249 size_t BPFTable::get_possible_cpu_count() { return get_possible_cpus().size(); } 250 251 BPFStackTable::BPFStackTable(const TableDesc& desc, bool use_debug_file, 252 bool check_debug_file_crc) 253 : BPFTableBase<int, stacktrace_t>(desc) { 254 if (desc.type != BPF_MAP_TYPE_STACK_TRACE) 255 throw std::invalid_argument("Table '" + desc.name + 256 "' is not a stack table"); 257 258 symbol_option_ = {.use_debug_file = use_debug_file, 259 .check_debug_file_crc = check_debug_file_crc, 260 .use_symbol_type = (1 << STT_FUNC) | (1 << STT_GNU_IFUNC)}; 261 } 262 263 BPFStackTable::BPFStackTable(BPFStackTable&& that) 264 : BPFTableBase<int, stacktrace_t>(that.desc), 265 symbol_option_(std::move(that.symbol_option_)), 266 pid_sym_(std::move(that.pid_sym_)) { 267 that.pid_sym_.clear(); 268 } 269 270 BPFStackTable::~BPFStackTable() { 271 for (auto it : pid_sym_) 272 bcc_free_symcache(it.second, it.first); 273 } 274 275 void BPFStackTable::clear_table_non_atomic() { 276 for (int i = 0; size_t(i) < capacity(); i++) { 277 remove(&i); 278 } 279 } 280 281 std::vector<uintptr_t> BPFStackTable::get_stack_addr(int stack_id) { 282 std::vector<uintptr_t> res; 283 stacktrace_t stack; 284 if (stack_id < 0) 285 return res; 286 if (!lookup(&stack_id, &stack)) 287 return res; 288 for (int i = 0; (i < BPF_MAX_STACK_DEPTH) && (stack.ip[i] != 0); i++) 289 res.push_back(stack.ip[i]); 290 return res; 291 } 292 293 std::vector<std::string> BPFStackTable::get_stack_symbol(int stack_id, 294 int pid) { 295 auto addresses = get_stack_addr(stack_id); 296 std::vector<std::string> res; 297 if (addresses.empty()) 298 return res; 299 res.reserve(addresses.size()); 300 301 if (pid < 0) 302 pid = -1; 303 if (pid_sym_.find(pid) == pid_sym_.end()) 304 pid_sym_[pid] = bcc_symcache_new(pid, &symbol_option_); 305 void* cache = pid_sym_[pid]; 306 307 bcc_symbol symbol; 308 for (auto addr : addresses) 309 if (bcc_symcache_resolve(cache, addr, &symbol) != 0) 310 res.emplace_back("[UNKNOWN]"); 311 else { 312 res.push_back(symbol.demangle_name); 313 bcc_symbol_free_demangle_name(&symbol); 314 } 315 316 return res; 317 } 318 319 BPFPerfBuffer::BPFPerfBuffer(const TableDesc& desc) 320 : BPFTableBase<int, int>(desc), epfd_(-1) { 321 if (desc.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) 322 throw std::invalid_argument("Table '" + desc.name + 323 "' is not a perf buffer"); 324 } 325 326 StatusTuple BPFPerfBuffer::open_on_cpu(perf_reader_raw_cb cb, 327 perf_reader_lost_cb lost_cb, int cpu, 328 void* cb_cookie, int page_cnt) { 329 if (cpu_readers_.find(cpu) != cpu_readers_.end()) 330 return StatusTuple(-1, "Perf buffer already open on CPU %d", cpu); 331 332 auto reader = static_cast<perf_reader*>( 333 bpf_open_perf_buffer(cb, lost_cb, cb_cookie, -1, cpu, page_cnt)); 334 if (reader == nullptr) 335 return StatusTuple(-1, "Unable to construct perf reader"); 336 337 int reader_fd = perf_reader_fd(reader); 338 if (!update(&cpu, &reader_fd)) { 339 perf_reader_free(static_cast<void*>(reader)); 340 return StatusTuple(-1, "Unable to open perf buffer on CPU %d: %s", cpu, 341 std::strerror(errno)); 342 } 343 344 struct epoll_event event = {}; 345 event.events = EPOLLIN; 346 event.data.ptr = static_cast<void*>(reader); 347 if (epoll_ctl(epfd_, EPOLL_CTL_ADD, reader_fd, &event) != 0) { 348 perf_reader_free(static_cast<void*>(reader)); 349 return StatusTuple(-1, "Unable to add perf_reader FD to epoll: %s", 350 std::strerror(errno)); 351 } 352 353 cpu_readers_[cpu] = reader; 354 return StatusTuple(0); 355 } 356 357 StatusTuple BPFPerfBuffer::open_all_cpu(perf_reader_raw_cb cb, 358 perf_reader_lost_cb lost_cb, 359 void* cb_cookie, int page_cnt) { 360 if (cpu_readers_.size() != 0 || epfd_ != -1) 361 return StatusTuple(-1, "Previously opened perf buffer not cleaned"); 362 363 std::vector<int> cpus = get_online_cpus(); 364 ep_events_.reset(new epoll_event[cpus.size()]); 365 epfd_ = epoll_create1(EPOLL_CLOEXEC); 366 367 for (int i : cpus) { 368 auto res = open_on_cpu(cb, lost_cb, i, cb_cookie, page_cnt); 369 if (res.code() != 0) { 370 TRY2(close_all_cpu()); 371 return res; 372 } 373 } 374 return StatusTuple(0); 375 } 376 377 StatusTuple BPFPerfBuffer::close_on_cpu(int cpu) { 378 auto it = cpu_readers_.find(cpu); 379 if (it == cpu_readers_.end()) 380 return StatusTuple(0); 381 perf_reader_free(static_cast<void*>(it->second)); 382 if (!remove(const_cast<int*>(&(it->first)))) 383 return StatusTuple(-1, "Unable to close perf buffer on CPU %d", it->first); 384 cpu_readers_.erase(it); 385 return StatusTuple(0); 386 } 387 388 StatusTuple BPFPerfBuffer::close_all_cpu() { 389 std::string errors; 390 bool has_error = false; 391 392 if (epfd_ >= 0) { 393 int close_res = close(epfd_); 394 epfd_ = -1; 395 ep_events_.reset(); 396 if (close_res != 0) { 397 has_error = true; 398 errors += std::string(std::strerror(errno)) + "\n"; 399 } 400 } 401 402 std::vector<int> opened_cpus; 403 for (auto it : cpu_readers_) 404 opened_cpus.push_back(it.first); 405 for (int i : opened_cpus) { 406 auto res = close_on_cpu(i); 407 if (res.code() != 0) { 408 errors += "Failed to close CPU" + std::to_string(i) + " perf buffer: "; 409 errors += res.msg() + "\n"; 410 has_error = true; 411 } 412 } 413 414 if (has_error) 415 return StatusTuple(-1, errors); 416 return StatusTuple(0); 417 } 418 419 int BPFPerfBuffer::poll(int timeout_ms) { 420 if (epfd_ < 0) 421 return -1; 422 int cnt = 423 epoll_wait(epfd_, ep_events_.get(), cpu_readers_.size(), timeout_ms); 424 for (int i = 0; i < cnt; i++) 425 perf_reader_event_read(static_cast<perf_reader*>(ep_events_[i].data.ptr)); 426 return cnt; 427 } 428 429 BPFPerfBuffer::~BPFPerfBuffer() { 430 auto res = close_all_cpu(); 431 if (res.code() != 0) 432 std::cerr << "Failed to close all perf buffer on destruction: " << res.msg() 433 << std::endl; 434 } 435 436 BPFPerfEventArray::BPFPerfEventArray(const TableDesc& desc) 437 : BPFTableBase<int, int>(desc) { 438 if (desc.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) 439 throw std::invalid_argument("Table '" + desc.name + 440 "' is not a perf event array"); 441 } 442 443 StatusTuple BPFPerfEventArray::open_all_cpu(uint32_t type, uint64_t config) { 444 if (cpu_fds_.size() != 0) 445 return StatusTuple(-1, "Previously opened perf event not cleaned"); 446 447 std::vector<int> cpus = get_online_cpus(); 448 449 for (int i : cpus) { 450 auto res = open_on_cpu(i, type, config); 451 if (res.code() != 0) { 452 TRY2(close_all_cpu()); 453 return res; 454 } 455 } 456 return StatusTuple(0); 457 } 458 459 StatusTuple BPFPerfEventArray::close_all_cpu() { 460 std::string errors; 461 bool has_error = false; 462 463 std::vector<int> opened_cpus; 464 for (auto it : cpu_fds_) 465 opened_cpus.push_back(it.first); 466 for (int i : opened_cpus) { 467 auto res = close_on_cpu(i); 468 if (res.code() != 0) { 469 errors += "Failed to close CPU" + std::to_string(i) + " perf event: "; 470 errors += res.msg() + "\n"; 471 has_error = true; 472 } 473 } 474 475 if (has_error) 476 return StatusTuple(-1, errors); 477 return StatusTuple(0); 478 } 479 480 StatusTuple BPFPerfEventArray::open_on_cpu(int cpu, uint32_t type, 481 uint64_t config) { 482 if (cpu_fds_.find(cpu) != cpu_fds_.end()) 483 return StatusTuple(-1, "Perf event already open on CPU %d", cpu); 484 int fd = bpf_open_perf_event(type, config, -1, cpu); 485 if (fd < 0) { 486 return StatusTuple(-1, "Error constructing perf event %" PRIu32 ":%" PRIu64, 487 type, config); 488 } 489 if (!update(&cpu, &fd)) { 490 bpf_close_perf_event_fd(fd); 491 return StatusTuple(-1, "Unable to open perf event on CPU %d: %s", cpu, 492 std::strerror(errno)); 493 } 494 cpu_fds_[cpu] = fd; 495 return StatusTuple(0); 496 } 497 498 StatusTuple BPFPerfEventArray::close_on_cpu(int cpu) { 499 auto it = cpu_fds_.find(cpu); 500 if (it == cpu_fds_.end()) { 501 return StatusTuple(0); 502 } 503 bpf_close_perf_event_fd(it->second); 504 cpu_fds_.erase(it); 505 return StatusTuple(0); 506 } 507 508 BPFPerfEventArray::~BPFPerfEventArray() { 509 auto res = close_all_cpu(); 510 if (res.code() != 0) { 511 std::cerr << "Failed to close all perf buffer on destruction: " << res.msg() 512 << std::endl; 513 } 514 } 515 516 BPFProgTable::BPFProgTable(const TableDesc& desc) 517 : BPFTableBase<int, int>(desc) { 518 if (desc.type != BPF_MAP_TYPE_PROG_ARRAY) 519 throw std::invalid_argument("Table '" + desc.name + 520 "' is not a prog table"); 521 } 522 523 StatusTuple BPFProgTable::update_value(const int& index, const int& prog_fd) { 524 if (!this->update(const_cast<int*>(&index), const_cast<int*>(&prog_fd))) 525 return StatusTuple(-1, "Error updating value: %s", std::strerror(errno)); 526 return StatusTuple(0); 527 } 528 529 StatusTuple BPFProgTable::remove_value(const int& index) { 530 if (!this->remove(const_cast<int*>(&index))) 531 return StatusTuple(-1, "Error removing value: %s", std::strerror(errno)); 532 return StatusTuple(0); 533 } 534 535 BPFCgroupArray::BPFCgroupArray(const TableDesc& desc) 536 : BPFTableBase<int, int>(desc) { 537 if (desc.type != BPF_MAP_TYPE_CGROUP_ARRAY) 538 throw std::invalid_argument("Table '" + desc.name + 539 "' is not a cgroup array"); 540 } 541 542 StatusTuple BPFCgroupArray::update_value(const int& index, 543 const int& cgroup2_fd) { 544 if (!this->update(const_cast<int*>(&index), const_cast<int*>(&cgroup2_fd))) 545 return StatusTuple(-1, "Error updating value: %s", std::strerror(errno)); 546 return StatusTuple(0); 547 } 548 549 StatusTuple BPFCgroupArray::update_value(const int& index, 550 const std::string& cgroup2_path) { 551 FileDesc f(::open(cgroup2_path.c_str(), O_RDONLY | O_CLOEXEC)); 552 if ((int)f < 0) 553 return StatusTuple(-1, "Unable to open %s", cgroup2_path.c_str()); 554 TRY2(update_value(index, (int)f)); 555 return StatusTuple(0); 556 } 557 558 StatusTuple BPFCgroupArray::remove_value(const int& index) { 559 if (!this->remove(const_cast<int*>(&index))) 560 return StatusTuple(-1, "Error removing value: %s", std::strerror(errno)); 561 return StatusTuple(0); 562 } 563 564 BPFDevmapTable::BPFDevmapTable(const TableDesc& desc) 565 : BPFTableBase<int, int>(desc) { 566 if(desc.type != BPF_MAP_TYPE_DEVMAP) 567 throw std::invalid_argument("Table '" + desc.name + 568 "' is not a devmap table"); 569 } 570 571 StatusTuple BPFDevmapTable::update_value(const int& index, 572 const int& value) { 573 if (!this->update(const_cast<int*>(&index), const_cast<int*>(&value))) 574 return StatusTuple(-1, "Error updating value: %s", std::strerror(errno)); 575 return StatusTuple(0); 576 } 577 578 StatusTuple BPFDevmapTable::get_value(const int& index, 579 int& value) { 580 if (!this->lookup(const_cast<int*>(&index), &value)) 581 return StatusTuple(-1, "Error getting value: %s", std::strerror(errno)); 582 return StatusTuple(0); 583 } 584 585 StatusTuple BPFDevmapTable::remove_value(const int& index) { 586 if (!this->remove(const_cast<int*>(&index))) 587 return StatusTuple(-1, "Error removing value: %s", std::strerror(errno)); 588 return StatusTuple(0); 589 } 590 591 } // namespace ebpf 592