Home | History | Annotate | Download | only in usdt
      1 /*
      2  * Copyright (c) 2016 GitHub, Inc.
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  * http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 #include <unordered_map>
     17 #include <regex>
     18 
     19 #include "syms.h"
     20 #include "usdt.h"
     21 #include "vendor/tinyformat.hpp"
     22 
     23 #include "bcc_elf.h"
     24 #include "bcc_syms.h"
     25 
     26 namespace USDT {
     27 
     28 Argument::Argument() {}
     29 Argument::~Argument() {}
     30 
     31 std::string Argument::ctype() const {
     32   const int s = arg_size() * 8;
     33   return (s < 0) ? tfm::format("int%d_t", -s) : tfm::format("uint%d_t", s);
     34 }
     35 
     36 bool Argument::get_global_address(uint64_t *address, const std::string &binpath,
     37                                   const optional<int> &pid) const {
     38   if (pid) {
     39     static struct bcc_symbol_option default_option = {
     40       .use_debug_file = 1,
     41       .check_debug_file_crc = 1,
     42       .use_symbol_type = BCC_SYM_ALL_TYPES
     43     };
     44     return ProcSyms(*pid, &default_option)
     45         .resolve_name(binpath.c_str(), deref_ident_->c_str(), address);
     46   }
     47 
     48   if (!bcc_elf_is_shared_obj(binpath.c_str())) {
     49     struct bcc_symbol sym;
     50     if (bcc_resolve_symname(binpath.c_str(), deref_ident_->c_str(), 0x0, -1, nullptr, &sym) == 0) {
     51       *address = sym.offset;
     52       if (sym.module)
     53         ::free(const_cast<char*>(sym.module));
     54       return true;
     55     }
     56   }
     57 
     58   return false;
     59 }
     60 
     61 bool Argument::assign_to_local(std::ostream &stream,
     62                                const std::string &local_name,
     63                                const std::string &binpath,
     64                                const optional<int> &pid) const {
     65   if (constant_) {
     66     tfm::format(stream, "%s = %d;", local_name, *constant_);
     67     return true;
     68   }
     69 
     70   if (!deref_offset_) {
     71     tfm::format(stream, "%s = ctx->%s;", local_name, *base_register_name_);
     72     // Put a compiler barrier to prevent optimization
     73     // like llvm SimplifyCFG SinkThenElseCodeToEnd
     74     // Volatile marking is not sufficient to prevent such optimization.
     75     tfm::format(stream, " %s", COMPILER_BARRIER);
     76     return true;
     77   }
     78 
     79   if (deref_offset_ && !deref_ident_) {
     80     tfm::format(stream, "{ u64 __addr = ctx->%s + %d",
     81                 *base_register_name_, *deref_offset_);
     82     if (index_register_name_) {
     83       int scale = scale_.value_or(1);
     84       tfm::format(stream, " + (ctx->%s * %d);", *index_register_name_, scale);
     85     } else {
     86       tfm::format(stream, ";");
     87     }
     88     // Theoretically, llvm SimplifyCFG SinkThenElseCodeToEnd may still
     89     // sink bpf_probe_read call, so put a barrier here to prevent sinking
     90     // of ctx->#fields.
     91     tfm::format(stream, " %s ", COMPILER_BARRIER);
     92     tfm::format(stream,
     93                 "%s __res = 0x0; "
     94                 "bpf_probe_read(&__res, sizeof(__res), (void *)__addr); "
     95                 "%s = __res; }",
     96                 ctype(), local_name);
     97     return true;
     98   }
     99 
    100   if (deref_offset_ && deref_ident_ && *base_register_name_ == "ip") {
    101     uint64_t global_address;
    102     if (!get_global_address(&global_address, binpath, pid))
    103       return false;
    104 
    105     tfm::format(stream,
    106                 "{ u64 __addr = 0x%xull + %d; %s __res = 0x0; "
    107                 "bpf_probe_read(&__res, sizeof(__res), (void *)__addr); "
    108                 "%s = __res; }",
    109                 global_address, *deref_offset_, ctype(), local_name);
    110     return true;
    111   }
    112 
    113   return false;
    114 }
    115 
    116 void ArgumentParser::print_error(ssize_t pos) {
    117   fprintf(stderr, "Parse error:\n    %s\n", arg_);
    118   for (ssize_t i = 0; i < pos + 4; ++i) fputc('-', stderr);
    119   fputc('^', stderr);
    120   fputc('\n', stderr);
    121 }
    122 
    123 void ArgumentParser::skip_whitespace_from(size_t pos) {
    124     while (isspace(arg_[pos])) pos++;
    125     cur_pos_ = pos;
    126 }
    127 
    128 void ArgumentParser::skip_until_whitespace_from(size_t pos) {
    129     while (arg_[pos] != '\0' && !isspace(arg_[pos]))
    130         pos++;
    131     cur_pos_ = pos;
    132 }
    133 
    134 bool ArgumentParser_aarch64::parse_register(ssize_t pos, ssize_t &new_pos,
    135                                             optional<int> *reg_num) {
    136   new_pos = parse_number(pos, reg_num);
    137   if (new_pos == pos || *reg_num < 0 || *reg_num > 31)
    138     return error_return(pos, pos);
    139   return true;
    140 }
    141 
    142 bool ArgumentParser_aarch64::parse_size(ssize_t pos, ssize_t &new_pos,
    143                                         optional<int> *arg_size) {
    144   int abs_arg_size;
    145 
    146   new_pos = parse_number(pos, arg_size);
    147   if (new_pos == pos)
    148     return error_return(pos, pos);
    149 
    150   abs_arg_size = abs(arg_size->value());
    151   if (abs_arg_size != 1 && abs_arg_size != 2 && abs_arg_size != 4 &&
    152       abs_arg_size != 8)
    153     return error_return(pos, pos);
    154   return true;
    155 }
    156 
    157 bool ArgumentParser_aarch64::parse_mem(ssize_t pos, ssize_t &new_pos,
    158                                        optional<int> *reg_num,
    159                                        optional<int> *offset) {
    160   if (arg_[pos] != 'x')
    161     return error_return(pos, pos);
    162   if (parse_register(pos + 1, new_pos, reg_num) == false)
    163     return false;
    164 
    165   if (arg_[new_pos] == ',') {
    166     pos = new_pos + 1;
    167     new_pos = parse_number(pos, offset);
    168     if (new_pos == pos)
    169       return error_return(pos, pos);
    170   }
    171   if (arg_[new_pos] != ']')
    172     return error_return(new_pos, new_pos);
    173   new_pos++;
    174   return true;
    175 }
    176 
    177 bool ArgumentParser_aarch64::parse(Argument *dest) {
    178   if (done())
    179     return false;
    180 
    181   // Support the following argument patterns:
    182   //   [-]<size>@<value>, [-]<size>@<reg>, [-]<size>@[<reg>], or
    183   //   [-]<size>@[<reg>,<offset>]
    184   ssize_t cur_pos = cur_pos_, new_pos;
    185   optional<int> arg_size;
    186 
    187   // Parse [-]<size>
    188   if (parse_size(cur_pos, new_pos, &arg_size) == false)
    189     return false;
    190   dest->arg_size_ = arg_size;
    191 
    192   // Make sure '@' present
    193   if (arg_[new_pos] != '@')
    194     return error_return(new_pos, new_pos);
    195   cur_pos = new_pos + 1;
    196 
    197   if (arg_[cur_pos] == 'x') {
    198     // Parse ...@<reg>
    199     optional<int> reg_num;
    200     if (parse_register(cur_pos + 1, new_pos, &reg_num) == false)
    201       return false;
    202     cur_pos_ = new_pos;
    203     dest->base_register_name_ = "regs[" + std::to_string(reg_num.value()) + "]";
    204   } else if (arg_[cur_pos] == '[') {
    205     // Parse ...@[<reg>] and ...@[<reg,<offset>]
    206     optional<int> reg_num, offset = 0;
    207     if (parse_mem(cur_pos + 1, new_pos, &reg_num, &offset) == false)
    208       return false;
    209     cur_pos_ = new_pos;
    210     dest->base_register_name_ = "regs[" + std::to_string(reg_num.value()) + "]";
    211     dest->deref_offset_ = offset;
    212   } else {
    213     // Parse ...@<value>
    214     optional<int> val;
    215     new_pos = parse_number(cur_pos, &val);
    216     if (cur_pos == new_pos)
    217       return error_return(cur_pos, cur_pos);
    218     cur_pos_ = new_pos;
    219     dest->constant_ = val;
    220   }
    221 
    222   skip_whitespace_from(cur_pos_);
    223   return true;
    224 }
    225 
    226 bool ArgumentParser_powerpc64::parse(Argument *dest) {
    227   if (done())
    228     return false;
    229 
    230   bool matched;
    231   std::smatch matches;
    232   std::string arg_str(&arg_[cur_pos_]);
    233   std::regex arg_n_regex("^(\\-?[1248])\\@");
    234   // Operands with constants of form iNUM or i-NUM
    235   std::regex arg_op_regex_const("^i(\\-?[0-9]+)( +|$)");
    236   // Operands with register only of form REG or %rREG
    237   std::regex arg_op_regex_reg("^(?:%r)?([1-2]?[0-9]|3[0-1])( +|$)");
    238   // Operands with a base register and an offset of form
    239   // NUM(REG) or -NUM(REG) or NUM(%rREG) or -NUM(%rREG)
    240   std::regex arg_op_regex_breg_off(
    241         "^(\\-?[0-9]+)\\((?:%r)?([1-2]?[0-9]|3[0-1])\\)( +|$)");
    242   // Operands with a base register and an index register
    243   // of form REG,REG or %rREG,%rREG
    244   std::regex arg_op_regex_breg_ireg(
    245         "^(?:%r)?([1-2]?[0-9]|3[0-1])\\,(?:%r)?([1-2]?[0-9]|3[0-1])( +|$)");
    246 
    247   matched = std::regex_search(arg_str, matches, arg_n_regex);
    248   if (matched) {
    249     dest->arg_size_ = stoi(matches.str(1));
    250     cur_pos_ += matches.length(0);
    251     arg_str = &arg_[cur_pos_];
    252 
    253     if (std::regex_search(arg_str, matches, arg_op_regex_const)) {
    254       dest->constant_ = stoi(matches.str(1));
    255     } else if (std::regex_search(arg_str, matches, arg_op_regex_reg)) {
    256       dest->base_register_name_ = "gpr[" + matches.str(1) + "]";
    257     } else if (std::regex_search(arg_str, matches, arg_op_regex_breg_off)) {
    258       dest->deref_offset_ = stoi(matches.str(1));
    259       dest->base_register_name_ = "gpr[" + matches.str(2) + "]";
    260     } else if (std::regex_search(arg_str, matches, arg_op_regex_breg_ireg)) {
    261       dest->deref_offset_ = 0; // In powerpc64, such operands contain a base
    262                                // register and an index register which are
    263                                // part of an indexed load/store operation.
    264                                // Even if no offset value is present, this
    265                                // is required by Argument::assign_to_local()
    266                                // in order to generate code for reading the
    267                                // argument. So, this is set to zero.
    268       dest->base_register_name_ = "gpr[" + matches.str(1) + "]";
    269       dest->index_register_name_ = "gpr[" + matches.str(2) + "]";
    270       dest->scale_ = abs(*dest->arg_size_);
    271     } else {
    272       matched = false;
    273     }
    274   }
    275 
    276   if (!matched) {
    277     print_error(cur_pos_);
    278     skip_until_whitespace_from(cur_pos_);
    279     skip_whitespace_from(cur_pos_);
    280     return false;
    281   }
    282 
    283   cur_pos_ += matches.length(0);
    284   skip_whitespace_from(cur_pos_);
    285   return true;
    286 }
    287 
    288 ssize_t ArgumentParser_x64::parse_identifier(ssize_t pos,
    289                                              optional<std::string> *result) {
    290   if (isalpha(arg_[pos]) || arg_[pos] == '_') {
    291     ssize_t start = pos++;
    292     while (isalnum(arg_[pos]) || arg_[pos] == '_') pos++;
    293     if (pos - start)
    294       result->emplace(arg_ + start, pos - start);
    295   }
    296   return pos;
    297 }
    298 
    299 ssize_t ArgumentParser_x64::parse_register(ssize_t pos, std::string &name,
    300                                            int &size) {
    301   ssize_t start = ++pos;
    302   if (arg_[start - 1] != '%')
    303     return -start;
    304 
    305   while (isalnum(arg_[pos])) pos++;
    306 
    307   std::string regname(arg_ + start, pos - start);
    308   if (!normalize_register(&regname, &size))
    309     return -start;
    310 
    311   name = regname;
    312   return pos;
    313 }
    314 
    315 ssize_t ArgumentParser_x64::parse_base_register(ssize_t pos, Argument *dest) {
    316   int size;
    317   std::string name;
    318   ssize_t res = parse_register(pos, name, size);
    319   if (res < 0)
    320       return res;
    321 
    322   dest->base_register_name_ = name;
    323   if (!dest->arg_size_)
    324     dest->arg_size_ = size;
    325 
    326   return res;
    327 }
    328 
    329 ssize_t ArgumentParser_x64::parse_index_register(ssize_t pos, Argument *dest) {
    330   int size;
    331   std::string name;
    332   ssize_t res = parse_register(pos, name, size);
    333   if (res < 0)
    334       return res;
    335 
    336   dest->index_register_name_ = name;
    337 
    338   return res;
    339 }
    340 
    341 ssize_t ArgumentParser_x64::parse_scale(ssize_t pos, Argument *dest) {
    342   return parse_number(pos, &dest->scale_);
    343 }
    344 
    345 ssize_t ArgumentParser_x64::parse_expr(ssize_t pos, Argument *dest) {
    346   if (arg_[pos] == '$')
    347     return parse_number(pos + 1, &dest->constant_);
    348 
    349   if (arg_[pos] == '%')
    350     return parse_base_register(pos, dest);
    351 
    352   if (isdigit(arg_[pos]) || arg_[pos] == '-') {
    353     pos = parse_number(pos, &dest->deref_offset_);
    354     if (arg_[pos] == '+') {
    355       pos = parse_identifier(pos + 1, &dest->deref_ident_);
    356       if (!dest->deref_ident_)
    357         return -pos;
    358     }
    359   } else {
    360     dest->deref_offset_ = 0;
    361     pos = parse_identifier(pos, &dest->deref_ident_);
    362     if (arg_[pos] == '+' || arg_[pos] == '-') {
    363       pos = parse_number(pos, &dest->deref_offset_);
    364     }
    365   }
    366 
    367   if (arg_[pos] != '(')
    368     return -pos;
    369 
    370   pos = parse_base_register(pos + 1, dest);
    371   if (pos < 0)
    372     return pos;
    373 
    374   if (arg_[pos] == ',') {
    375     pos = parse_index_register(pos + 1, dest);
    376     if (pos < 0)
    377       return pos;
    378 
    379     if (arg_[pos] == ',') {
    380       pos = parse_scale(pos + 1, dest);
    381       if (pos < 0)
    382         return pos;
    383     }
    384   }
    385 
    386   return (arg_[pos] == ')') ? pos + 1 : -pos;
    387 }
    388 
    389 ssize_t ArgumentParser_x64::parse_1(ssize_t pos, Argument *dest) {
    390   if (isdigit(arg_[pos]) || arg_[pos] == '-') {
    391     optional<int> asize;
    392     ssize_t m = parse_number(pos, &asize);
    393     if (arg_[m] == '@' && asize) {
    394       dest->arg_size_ = asize;
    395       return parse_expr(m + 1, dest);
    396     }
    397   }
    398   return parse_expr(pos, dest);
    399 }
    400 
    401 bool ArgumentParser_x64::parse(Argument *dest) {
    402   if (done())
    403     return false;
    404 
    405   ssize_t res = parse_1(cur_pos_, dest);
    406   if (res < 0)
    407     return error_return(-res, -res + 1);
    408   if (!isspace(arg_[res]) && arg_[res] != '\0')
    409     return error_return(res, res);
    410   skip_whitespace_from(res);
    411   return true;
    412 }
    413 
    414 const std::unordered_map<std::string, ArgumentParser_x64::RegInfo>
    415     ArgumentParser_x64::registers_ = {
    416         {"rax", {REG_A, 8}},   {"eax", {REG_A, 4}},
    417         {"ax", {REG_A, 2}},    {"al", {REG_A, 1}},
    418 
    419         {"rbx", {REG_B, 8}},   {"ebx", {REG_B, 4}},
    420         {"bx", {REG_B, 2}},    {"bl", {REG_B, 1}},
    421 
    422         {"rcx", {REG_C, 8}},   {"ecx", {REG_C, 4}},
    423         {"cx", {REG_C, 2}},    {"cl", {REG_C, 1}},
    424 
    425         {"rdx", {REG_D, 8}},   {"edx", {REG_D, 4}},
    426         {"dx", {REG_D, 2}},    {"dl", {REG_D, 1}},
    427 
    428         {"rsi", {REG_SI, 8}},  {"esi", {REG_SI, 4}},
    429         {"si", {REG_SI, 2}},   {"sil", {REG_SI, 1}},
    430 
    431         {"rdi", {REG_DI, 8}},  {"edi", {REG_DI, 4}},
    432         {"di", {REG_DI, 2}},   {"dil", {REG_DI, 1}},
    433 
    434         {"rbp", {REG_BP, 8}},  {"ebp", {REG_BP, 4}},
    435         {"bp", {REG_BP, 2}},   {"bpl", {REG_BP, 1}},
    436 
    437         {"rsp", {REG_SP, 8}},  {"esp", {REG_SP, 4}},
    438         {"sp", {REG_SP, 2}},   {"spl", {REG_SP, 1}},
    439 
    440         {"r8", {REG_8, 8}},    {"r8d", {REG_8, 4}},
    441         {"r8w", {REG_8, 2}},   {"r8b", {REG_8, 1}},
    442 
    443         {"r9", {REG_9, 8}},    {"r9d", {REG_9, 4}},
    444         {"r9w", {REG_9, 2}},   {"r9b", {REG_9, 1}},
    445 
    446         {"r10", {REG_10, 8}},  {"r10d", {REG_10, 4}},
    447         {"r10w", {REG_10, 2}}, {"r10b", {REG_10, 1}},
    448 
    449         {"r11", {REG_11, 8}},  {"r11d", {REG_11, 4}},
    450         {"r11w", {REG_11, 2}}, {"r11b", {REG_11, 1}},
    451 
    452         {"r12", {REG_12, 8}},  {"r12d", {REG_12, 4}},
    453         {"r12w", {REG_12, 2}}, {"r12b", {REG_12, 1}},
    454 
    455         {"r13", {REG_13, 8}},  {"r13d", {REG_13, 4}},
    456         {"r13w", {REG_13, 2}}, {"r13b", {REG_13, 1}},
    457 
    458         {"r14", {REG_14, 8}},  {"r14d", {REG_14, 4}},
    459         {"r14w", {REG_14, 2}}, {"r14b", {REG_14, 1}},
    460 
    461         {"r15", {REG_15, 8}},  {"r15d", {REG_15, 4}},
    462         {"r15w", {REG_15, 2}}, {"r15b", {REG_15, 1}},
    463 
    464         {"rip", {REG_RIP, 8}},
    465 };
    466 
    467 void ArgumentParser_x64::reg_to_name(std::string *norm, Register reg) {
    468   switch (reg) {
    469   case REG_A:
    470     *norm = "ax";
    471     break;
    472   case REG_B:
    473     *norm = "bx";
    474     break;
    475   case REG_C:
    476     *norm = "cx";
    477     break;
    478   case REG_D:
    479     *norm = "dx";
    480     break;
    481 
    482   case REG_SI:
    483     *norm = "si";
    484     break;
    485   case REG_DI:
    486     *norm = "di";
    487     break;
    488   case REG_BP:
    489     *norm = "bp";
    490     break;
    491   case REG_SP:
    492     *norm = "sp";
    493     break;
    494 
    495   case REG_8:
    496     *norm = "r8";
    497     break;
    498   case REG_9:
    499     *norm = "r9";
    500     break;
    501   case REG_10:
    502     *norm = "r10";
    503     break;
    504   case REG_11:
    505     *norm = "r11";
    506     break;
    507   case REG_12:
    508     *norm = "r12";
    509     break;
    510   case REG_13:
    511     *norm = "r13";
    512     break;
    513   case REG_14:
    514     *norm = "r14";
    515     break;
    516   case REG_15:
    517     *norm = "r15";
    518     break;
    519 
    520   case REG_RIP:
    521     *norm = "ip";
    522     break;
    523   }
    524 }
    525 
    526 bool ArgumentParser_x64::normalize_register(std::string *reg, int *reg_size) {
    527   auto it = registers_.find(*reg);
    528   if (it == registers_.end())
    529     return false;
    530 
    531   *reg_size = it->second.size;
    532   reg_to_name(reg, it->second.reg);
    533   return true;
    534 }
    535 }
    536