Home | History | Annotate | Download | only in api
      1 /*
      2  * Copyright (c) 2016 Facebook, Inc.
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  * http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include <fcntl.h>
     18 #include <linux/elf.h>
     19 #include <linux/perf_event.h>
     20 #include <sys/epoll.h>
     21 #include <unistd.h>
     22 #include <cerrno>
     23 #include <cinttypes>
     24 #include <cstdint>
     25 #include <cstring>
     26 #include <iostream>
     27 #include <memory>
     28 
     29 #include "BPFTable.h"
     30 
     31 #include "bcc_exception.h"
     32 #include "bcc_syms.h"
     33 #include "common.h"
     34 #include "file_desc.h"
     35 #include "libbpf.h"
     36 #include "perf_reader.h"
     37 
     38 namespace ebpf {
     39 
     40 BPFTable::BPFTable(const TableDesc& desc) : BPFTableBase<void, void>(desc) {}
     41 
     42 StatusTuple BPFTable::get_value(const std::string& key_str,
     43                                 std::string& value_str) {
     44   char key[desc.key_size];
     45   char value[desc.leaf_size];
     46 
     47   StatusTuple r(0);
     48 
     49   r = string_to_key(key_str, key);
     50   if (r.code() != 0)
     51     return r;
     52 
     53   if (!lookup(key, value))
     54     return StatusTuple(-1, "error getting value");
     55 
     56   return leaf_to_string(value, value_str);
     57 }
     58 
     59 StatusTuple BPFTable::get_value(const std::string& key_str,
     60                                 std::vector<std::string>& value_str) {
     61   size_t ncpus = get_possible_cpus().size();
     62   char key[desc.key_size];
     63   char value[desc.leaf_size * ncpus];
     64 
     65   StatusTuple r(0);
     66 
     67   r = string_to_key(key_str, key);
     68   if (r.code() != 0)
     69     return r;
     70 
     71   if (!lookup(key, value))
     72     return StatusTuple(-1, "error getting value");
     73 
     74   value_str.resize(ncpus);
     75 
     76   for (size_t i = 0; i < ncpus; i++) {
     77     r = leaf_to_string(value + i * desc.leaf_size, value_str.at(i));
     78     if (r.code() != 0)
     79       return r;
     80   }
     81   return StatusTuple(0);
     82 }
     83 
     84 StatusTuple BPFTable::update_value(const std::string& key_str,
     85                                    const std::string& value_str) {
     86   char key[desc.key_size];
     87   char value[desc.leaf_size];
     88 
     89   StatusTuple r(0);
     90 
     91   r = string_to_key(key_str, key);
     92   if (r.code() != 0)
     93     return r;
     94 
     95   r = string_to_leaf(value_str, value);
     96   if (r.code() != 0)
     97     return r;
     98 
     99   if (!update(key, value))
    100     return StatusTuple(-1, "error updating element");
    101 
    102   return StatusTuple(0);
    103 }
    104 
    105 StatusTuple BPFTable::update_value(const std::string& key_str,
    106                                    const std::vector<std::string>& value_str) {
    107   size_t ncpus = get_possible_cpus().size();
    108   char key[desc.key_size];
    109   char value[desc.leaf_size * ncpus];
    110 
    111   StatusTuple r(0);
    112 
    113   r = string_to_key(key_str, key);
    114   if (r.code() != 0)
    115     return r;
    116 
    117   if (value_str.size() != ncpus)
    118     return StatusTuple(-1, "bad value size");
    119 
    120   for (size_t i = 0; i < ncpus; i++) {
    121     r = string_to_leaf(value_str.at(i), value + i * desc.leaf_size);
    122     if (r.code() != 0)
    123       return r;
    124   }
    125 
    126   if (!update(key, value))
    127     return StatusTuple(-1, "error updating element");
    128 
    129   return StatusTuple(0);
    130 }
    131 
    132 StatusTuple BPFTable::remove_value(const std::string& key_str) {
    133   char key[desc.key_size];
    134 
    135   StatusTuple r(0);
    136 
    137   r = string_to_key(key_str, key);
    138   if (r.code() != 0)
    139     return r;
    140 
    141   if (!remove(key))
    142     return StatusTuple(-1, "error removing element");
    143 
    144   return StatusTuple(0);
    145 }
    146 
    147 StatusTuple BPFTable::clear_table_non_atomic() {
    148   if (desc.type == BPF_MAP_TYPE_HASH || desc.type == BPF_MAP_TYPE_PERCPU_HASH ||
    149       desc.type == BPF_MAP_TYPE_LRU_HASH ||
    150       desc.type == BPF_MAP_TYPE_PERCPU_HASH ||
    151       desc.type == BPF_MAP_TYPE_HASH_OF_MAPS) {
    152     // For hash maps, use the first() interface (which uses get_next_key) to
    153     // iterate through the map and clear elements
    154     auto key = std::unique_ptr<void, decltype(::free)*>(::malloc(desc.key_size),
    155                                                         ::free);
    156 
    157     while (this->first(key.get()))
    158       if (!this->remove(key.get())) {
    159         return StatusTuple(-1,
    160                            "Failed to delete element when clearing table %s",
    161                            desc.name.c_str());
    162       }
    163   } else if (desc.type == BPF_MAP_TYPE_ARRAY ||
    164              desc.type == BPF_MAP_TYPE_PERCPU_ARRAY) {
    165     return StatusTuple(-1, "Array map %s do not support clearing elements",
    166                        desc.name.c_str());
    167   } else if (desc.type == BPF_MAP_TYPE_PROG_ARRAY ||
    168              desc.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY ||
    169              desc.type == BPF_MAP_TYPE_STACK_TRACE ||
    170              desc.type == BPF_MAP_TYPE_ARRAY_OF_MAPS) {
    171     // For Stack-trace and FD arrays, just iterate over all indices
    172     for (size_t i = 0; i < desc.max_entries; i++) {
    173       this->remove(&i);
    174     }
    175   } else {
    176     return StatusTuple(-1, "Clearing for map type of %s not supported yet",
    177                        desc.name.c_str());
    178   }
    179 
    180   return StatusTuple(0);
    181 }
    182 
    183 StatusTuple BPFTable::get_table_offline(
    184   std::vector<std::pair<std::string, std::string>> &res) {
    185   StatusTuple r(0);
    186   int err;
    187 
    188   auto key = std::unique_ptr<void, decltype(::free)*>(::malloc(desc.key_size),
    189                                                       ::free);
    190   auto value = std::unique_ptr<void, decltype(::free)*>(::malloc(desc.leaf_size),
    191                                                       ::free);
    192   std::string key_str;
    193   std::string value_str;
    194 
    195   if (desc.type == BPF_MAP_TYPE_ARRAY ||
    196       desc.type == BPF_MAP_TYPE_PROG_ARRAY ||
    197       desc.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY ||
    198       desc.type == BPF_MAP_TYPE_PERCPU_ARRAY ||
    199       desc.type == BPF_MAP_TYPE_CGROUP_ARRAY ||
    200       desc.type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
    201       desc.type == BPF_MAP_TYPE_DEVMAP ||
    202       desc.type == BPF_MAP_TYPE_CPUMAP ||
    203       desc.type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
    204     // For arrays, just iterate over all indices
    205     for (size_t i = 0; i < desc.max_entries; i++) {
    206       err = bpf_lookup_elem(desc.fd, &i, value.get());
    207       if (err < 0 && errno == ENOENT) {
    208         // Element is not present, skip it
    209         continue;
    210       } else if (err < 0) {
    211         // Other error, abort
    212         return StatusTuple(-1, "Error looking up value: %s", std::strerror(errno));
    213       }
    214 
    215       r = key_to_string(&i, key_str);
    216       if (r.code() != 0)
    217         return r;
    218 
    219       r = leaf_to_string(value.get(), value_str);
    220       if (r.code() != 0)
    221         return r;
    222       res.emplace_back(key_str, value_str);
    223     }
    224   } else {
    225     res.clear();
    226     // For other maps, try to use the first() and next() interfaces
    227     if (!this->first(key.get()))
    228       return StatusTuple(0);
    229 
    230     while (true) {
    231       if (!this->lookup(key.get(), value.get()))
    232         break;
    233       r = key_to_string(key.get(), key_str);
    234       if (r.code() != 0)
    235         return r;
    236 
    237       r = leaf_to_string(value.get(), value_str);
    238       if (r.code() != 0)
    239         return r;
    240       res.emplace_back(key_str, value_str);
    241       if (!this->next(key.get(), key.get()))
    242         break;
    243     }
    244   }
    245 
    246   return StatusTuple(0);
    247 }
    248 
    249 size_t BPFTable::get_possible_cpu_count() { return get_possible_cpus().size(); }
    250 
    251 BPFStackTable::BPFStackTable(const TableDesc& desc, bool use_debug_file,
    252                              bool check_debug_file_crc)
    253     : BPFTableBase<int, stacktrace_t>(desc) {
    254   if (desc.type != BPF_MAP_TYPE_STACK_TRACE)
    255     throw std::invalid_argument("Table '" + desc.name +
    256                                 "' is not a stack table");
    257 
    258   symbol_option_ = {.use_debug_file = use_debug_file,
    259                     .check_debug_file_crc = check_debug_file_crc,
    260                     .use_symbol_type = (1 << STT_FUNC) | (1 << STT_GNU_IFUNC)};
    261 }
    262 
    263 BPFStackTable::BPFStackTable(BPFStackTable&& that)
    264     : BPFTableBase<int, stacktrace_t>(that.desc),
    265       symbol_option_(std::move(that.symbol_option_)),
    266       pid_sym_(std::move(that.pid_sym_)) {
    267   that.pid_sym_.clear();
    268 }
    269 
    270 BPFStackTable::~BPFStackTable() {
    271   for (auto it : pid_sym_)
    272     bcc_free_symcache(it.second, it.first);
    273 }
    274 
    275 void BPFStackTable::clear_table_non_atomic() {
    276   for (int i = 0; size_t(i) < capacity(); i++) {
    277     remove(&i);
    278   }
    279 }
    280 
    281 std::vector<uintptr_t> BPFStackTable::get_stack_addr(int stack_id) {
    282   std::vector<uintptr_t> res;
    283   stacktrace_t stack;
    284   if (stack_id < 0)
    285     return res;
    286   if (!lookup(&stack_id, &stack))
    287     return res;
    288   for (int i = 0; (i < BPF_MAX_STACK_DEPTH) && (stack.ip[i] != 0); i++)
    289     res.push_back(stack.ip[i]);
    290   return res;
    291 }
    292 
    293 std::vector<std::string> BPFStackTable::get_stack_symbol(int stack_id,
    294                                                          int pid) {
    295   auto addresses = get_stack_addr(stack_id);
    296   std::vector<std::string> res;
    297   if (addresses.empty())
    298     return res;
    299   res.reserve(addresses.size());
    300 
    301   if (pid < 0)
    302     pid = -1;
    303   if (pid_sym_.find(pid) == pid_sym_.end())
    304     pid_sym_[pid] = bcc_symcache_new(pid, &symbol_option_);
    305   void* cache = pid_sym_[pid];
    306 
    307   bcc_symbol symbol;
    308   for (auto addr : addresses)
    309     if (bcc_symcache_resolve(cache, addr, &symbol) != 0)
    310       res.emplace_back("[UNKNOWN]");
    311     else {
    312       res.push_back(symbol.demangle_name);
    313       bcc_symbol_free_demangle_name(&symbol);
    314     }
    315 
    316   return res;
    317 }
    318 
    319 BPFPerfBuffer::BPFPerfBuffer(const TableDesc& desc)
    320     : BPFTableBase<int, int>(desc), epfd_(-1) {
    321   if (desc.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
    322     throw std::invalid_argument("Table '" + desc.name +
    323                                 "' is not a perf buffer");
    324 }
    325 
    326 StatusTuple BPFPerfBuffer::open_on_cpu(perf_reader_raw_cb cb,
    327                                        perf_reader_lost_cb lost_cb, int cpu,
    328                                        void* cb_cookie, int page_cnt) {
    329   if (cpu_readers_.find(cpu) != cpu_readers_.end())
    330     return StatusTuple(-1, "Perf buffer already open on CPU %d", cpu);
    331 
    332   auto reader = static_cast<perf_reader*>(
    333       bpf_open_perf_buffer(cb, lost_cb, cb_cookie, -1, cpu, page_cnt));
    334   if (reader == nullptr)
    335     return StatusTuple(-1, "Unable to construct perf reader");
    336 
    337   int reader_fd = perf_reader_fd(reader);
    338   if (!update(&cpu, &reader_fd)) {
    339     perf_reader_free(static_cast<void*>(reader));
    340     return StatusTuple(-1, "Unable to open perf buffer on CPU %d: %s", cpu,
    341                        std::strerror(errno));
    342   }
    343 
    344   struct epoll_event event = {};
    345   event.events = EPOLLIN;
    346   event.data.ptr = static_cast<void*>(reader);
    347   if (epoll_ctl(epfd_, EPOLL_CTL_ADD, reader_fd, &event) != 0) {
    348     perf_reader_free(static_cast<void*>(reader));
    349     return StatusTuple(-1, "Unable to add perf_reader FD to epoll: %s",
    350                        std::strerror(errno));
    351   }
    352 
    353   cpu_readers_[cpu] = reader;
    354   return StatusTuple(0);
    355 }
    356 
    357 StatusTuple BPFPerfBuffer::open_all_cpu(perf_reader_raw_cb cb,
    358                                         perf_reader_lost_cb lost_cb,
    359                                         void* cb_cookie, int page_cnt) {
    360   if (cpu_readers_.size() != 0 || epfd_ != -1)
    361     return StatusTuple(-1, "Previously opened perf buffer not cleaned");
    362 
    363   std::vector<int> cpus = get_online_cpus();
    364   ep_events_.reset(new epoll_event[cpus.size()]);
    365   epfd_ = epoll_create1(EPOLL_CLOEXEC);
    366 
    367   for (int i : cpus) {
    368     auto res = open_on_cpu(cb, lost_cb, i, cb_cookie, page_cnt);
    369     if (res.code() != 0) {
    370       TRY2(close_all_cpu());
    371       return res;
    372     }
    373   }
    374   return StatusTuple(0);
    375 }
    376 
    377 StatusTuple BPFPerfBuffer::close_on_cpu(int cpu) {
    378   auto it = cpu_readers_.find(cpu);
    379   if (it == cpu_readers_.end())
    380     return StatusTuple(0);
    381   perf_reader_free(static_cast<void*>(it->second));
    382   if (!remove(const_cast<int*>(&(it->first))))
    383     return StatusTuple(-1, "Unable to close perf buffer on CPU %d", it->first);
    384   cpu_readers_.erase(it);
    385   return StatusTuple(0);
    386 }
    387 
    388 StatusTuple BPFPerfBuffer::close_all_cpu() {
    389   std::string errors;
    390   bool has_error = false;
    391 
    392   if (epfd_ >= 0) {
    393     int close_res = close(epfd_);
    394     epfd_ = -1;
    395     ep_events_.reset();
    396     if (close_res != 0) {
    397       has_error = true;
    398       errors += std::string(std::strerror(errno)) + "\n";
    399     }
    400   }
    401 
    402   std::vector<int> opened_cpus;
    403   for (auto it : cpu_readers_)
    404     opened_cpus.push_back(it.first);
    405   for (int i : opened_cpus) {
    406     auto res = close_on_cpu(i);
    407     if (res.code() != 0) {
    408       errors += "Failed to close CPU" + std::to_string(i) + " perf buffer: ";
    409       errors += res.msg() + "\n";
    410       has_error = true;
    411     }
    412   }
    413 
    414   if (has_error)
    415     return StatusTuple(-1, errors);
    416   return StatusTuple(0);
    417 }
    418 
    419 int BPFPerfBuffer::poll(int timeout_ms) {
    420   if (epfd_ < 0)
    421     return -1;
    422   int cnt =
    423       epoll_wait(epfd_, ep_events_.get(), cpu_readers_.size(), timeout_ms);
    424   for (int i = 0; i < cnt; i++)
    425     perf_reader_event_read(static_cast<perf_reader*>(ep_events_[i].data.ptr));
    426   return cnt;
    427 }
    428 
    429 BPFPerfBuffer::~BPFPerfBuffer() {
    430   auto res = close_all_cpu();
    431   if (res.code() != 0)
    432     std::cerr << "Failed to close all perf buffer on destruction: " << res.msg()
    433               << std::endl;
    434 }
    435 
    436 BPFPerfEventArray::BPFPerfEventArray(const TableDesc& desc)
    437     : BPFTableBase<int, int>(desc) {
    438   if (desc.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
    439     throw std::invalid_argument("Table '" + desc.name +
    440                                 "' is not a perf event array");
    441 }
    442 
    443 StatusTuple BPFPerfEventArray::open_all_cpu(uint32_t type, uint64_t config) {
    444   if (cpu_fds_.size() != 0)
    445     return StatusTuple(-1, "Previously opened perf event not cleaned");
    446 
    447   std::vector<int> cpus = get_online_cpus();
    448 
    449   for (int i : cpus) {
    450     auto res = open_on_cpu(i, type, config);
    451     if (res.code() != 0) {
    452       TRY2(close_all_cpu());
    453       return res;
    454     }
    455   }
    456   return StatusTuple(0);
    457 }
    458 
    459 StatusTuple BPFPerfEventArray::close_all_cpu() {
    460   std::string errors;
    461   bool has_error = false;
    462 
    463   std::vector<int> opened_cpus;
    464   for (auto it : cpu_fds_)
    465     opened_cpus.push_back(it.first);
    466   for (int i : opened_cpus) {
    467     auto res = close_on_cpu(i);
    468     if (res.code() != 0) {
    469       errors += "Failed to close CPU" + std::to_string(i) + " perf event: ";
    470       errors += res.msg() + "\n";
    471       has_error = true;
    472     }
    473   }
    474 
    475   if (has_error)
    476     return StatusTuple(-1, errors);
    477   return StatusTuple(0);
    478 }
    479 
    480 StatusTuple BPFPerfEventArray::open_on_cpu(int cpu, uint32_t type,
    481                                            uint64_t config) {
    482   if (cpu_fds_.find(cpu) != cpu_fds_.end())
    483     return StatusTuple(-1, "Perf event already open on CPU %d", cpu);
    484   int fd = bpf_open_perf_event(type, config, -1, cpu);
    485   if (fd < 0) {
    486     return StatusTuple(-1, "Error constructing perf event %" PRIu32 ":%" PRIu64,
    487                        type, config);
    488   }
    489   if (!update(&cpu, &fd)) {
    490     bpf_close_perf_event_fd(fd);
    491     return StatusTuple(-1, "Unable to open perf event on CPU %d: %s", cpu,
    492                        std::strerror(errno));
    493   }
    494   cpu_fds_[cpu] = fd;
    495   return StatusTuple(0);
    496 }
    497 
    498 StatusTuple BPFPerfEventArray::close_on_cpu(int cpu) {
    499   auto it = cpu_fds_.find(cpu);
    500   if (it == cpu_fds_.end()) {
    501     return StatusTuple(0);
    502   }
    503   bpf_close_perf_event_fd(it->second);
    504   cpu_fds_.erase(it);
    505   return StatusTuple(0);
    506 }
    507 
    508 BPFPerfEventArray::~BPFPerfEventArray() {
    509   auto res = close_all_cpu();
    510   if (res.code() != 0) {
    511     std::cerr << "Failed to close all perf buffer on destruction: " << res.msg()
    512               << std::endl;
    513   }
    514 }
    515 
    516 BPFProgTable::BPFProgTable(const TableDesc& desc)
    517     : BPFTableBase<int, int>(desc) {
    518   if (desc.type != BPF_MAP_TYPE_PROG_ARRAY)
    519     throw std::invalid_argument("Table '" + desc.name +
    520                                 "' is not a prog table");
    521 }
    522 
    523 StatusTuple BPFProgTable::update_value(const int& index, const int& prog_fd) {
    524   if (!this->update(const_cast<int*>(&index), const_cast<int*>(&prog_fd)))
    525     return StatusTuple(-1, "Error updating value: %s", std::strerror(errno));
    526   return StatusTuple(0);
    527 }
    528 
    529 StatusTuple BPFProgTable::remove_value(const int& index) {
    530   if (!this->remove(const_cast<int*>(&index)))
    531     return StatusTuple(-1, "Error removing value: %s", std::strerror(errno));
    532   return StatusTuple(0);
    533 }
    534 
    535 BPFCgroupArray::BPFCgroupArray(const TableDesc& desc)
    536     : BPFTableBase<int, int>(desc) {
    537   if (desc.type != BPF_MAP_TYPE_CGROUP_ARRAY)
    538     throw std::invalid_argument("Table '" + desc.name +
    539                                 "' is not a cgroup array");
    540 }
    541 
    542 StatusTuple BPFCgroupArray::update_value(const int& index,
    543                                          const int& cgroup2_fd) {
    544   if (!this->update(const_cast<int*>(&index), const_cast<int*>(&cgroup2_fd)))
    545     return StatusTuple(-1, "Error updating value: %s", std::strerror(errno));
    546   return StatusTuple(0);
    547 }
    548 
    549 StatusTuple BPFCgroupArray::update_value(const int& index,
    550                                          const std::string& cgroup2_path) {
    551   FileDesc f(::open(cgroup2_path.c_str(), O_RDONLY | O_CLOEXEC));
    552   if ((int)f < 0)
    553     return StatusTuple(-1, "Unable to open %s", cgroup2_path.c_str());
    554   TRY2(update_value(index, (int)f));
    555   return StatusTuple(0);
    556 }
    557 
    558 StatusTuple BPFCgroupArray::remove_value(const int& index) {
    559   if (!this->remove(const_cast<int*>(&index)))
    560     return StatusTuple(-1, "Error removing value: %s", std::strerror(errno));
    561   return StatusTuple(0);
    562 }
    563 
    564 BPFDevmapTable::BPFDevmapTable(const TableDesc& desc)
    565     : BPFTableBase<int, int>(desc) {
    566     if(desc.type != BPF_MAP_TYPE_DEVMAP)
    567       throw std::invalid_argument("Table '" + desc.name +
    568                                   "' is not a devmap table");
    569 }
    570 
    571 StatusTuple BPFDevmapTable::update_value(const int& index,
    572                                          const int& value) {
    573     if (!this->update(const_cast<int*>(&index), const_cast<int*>(&value)))
    574       return StatusTuple(-1, "Error updating value: %s", std::strerror(errno));
    575     return StatusTuple(0);
    576 }
    577 
    578 StatusTuple BPFDevmapTable::get_value(const int& index,
    579                                       int& value) {
    580     if (!this->lookup(const_cast<int*>(&index), &value))
    581       return StatusTuple(-1, "Error getting value: %s", std::strerror(errno));
    582     return StatusTuple(0);
    583 }
    584 
    585 StatusTuple BPFDevmapTable::remove_value(const int& index) {
    586     if (!this->remove(const_cast<int*>(&index)))
    587       return StatusTuple(-1, "Error removing value: %s", std::strerror(errno));
    588     return StatusTuple(0);
    589 }
    590 
    591 }  // namespace ebpf
    592