1 /* 2 * Copyright (c) 2016 GitHub, Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #include <unordered_map> 17 #include <regex> 18 19 #include "syms.h" 20 #include "usdt.h" 21 #include "vendor/tinyformat.hpp" 22 23 #include "bcc_elf.h" 24 #include "bcc_syms.h" 25 26 namespace USDT { 27 28 Argument::Argument() {} 29 Argument::~Argument() {} 30 31 std::string Argument::ctype() const { 32 const int s = arg_size() * 8; 33 return (s < 0) ? tfm::format("int%d_t", -s) : tfm::format("uint%d_t", s); 34 } 35 36 bool Argument::get_global_address(uint64_t *address, const std::string &binpath, 37 const optional<int> &pid) const { 38 if (pid) { 39 static struct bcc_symbol_option default_option = { 40 .use_debug_file = 1, 41 .check_debug_file_crc = 1, 42 .use_symbol_type = BCC_SYM_ALL_TYPES 43 }; 44 return ProcSyms(*pid, &default_option) 45 .resolve_name(binpath.c_str(), deref_ident_->c_str(), address); 46 } 47 48 if (!bcc_elf_is_shared_obj(binpath.c_str())) { 49 struct bcc_symbol sym; 50 if (bcc_resolve_symname(binpath.c_str(), deref_ident_->c_str(), 0x0, -1, nullptr, &sym) == 0) { 51 *address = sym.offset; 52 if (sym.module) 53 ::free(const_cast<char*>(sym.module)); 54 return true; 55 } 56 } 57 58 return false; 59 } 60 61 bool Argument::assign_to_local(std::ostream &stream, 62 const std::string &local_name, 63 const std::string &binpath, 64 const optional<int> &pid) const { 65 if (constant_) { 66 tfm::format(stream, "%s = %d;", local_name, *constant_); 67 return true; 68 } 69 70 if (!deref_offset_) { 71 tfm::format(stream, "%s = ctx->%s;", local_name, *base_register_name_); 72 // Put a compiler barrier to prevent optimization 73 // like llvm SimplifyCFG SinkThenElseCodeToEnd 74 // Volatile marking is not sufficient to prevent such optimization. 75 tfm::format(stream, " %s", COMPILER_BARRIER); 76 return true; 77 } 78 79 if (deref_offset_ && !deref_ident_) { 80 tfm::format(stream, "{ u64 __addr = ctx->%s + %d", 81 *base_register_name_, *deref_offset_); 82 if (index_register_name_) { 83 int scale = scale_.value_or(1); 84 tfm::format(stream, " + (ctx->%s * %d);", *index_register_name_, scale); 85 } else { 86 tfm::format(stream, ";"); 87 } 88 // Theoretically, llvm SimplifyCFG SinkThenElseCodeToEnd may still 89 // sink bpf_probe_read call, so put a barrier here to prevent sinking 90 // of ctx->#fields. 91 tfm::format(stream, " %s ", COMPILER_BARRIER); 92 tfm::format(stream, 93 "%s __res = 0x0; " 94 "bpf_probe_read(&__res, sizeof(__res), (void *)__addr); " 95 "%s = __res; }", 96 ctype(), local_name); 97 return true; 98 } 99 100 if (deref_offset_ && deref_ident_ && *base_register_name_ == "ip") { 101 uint64_t global_address; 102 if (!get_global_address(&global_address, binpath, pid)) 103 return false; 104 105 tfm::format(stream, 106 "{ u64 __addr = 0x%xull + %d; %s __res = 0x0; " 107 "bpf_probe_read(&__res, sizeof(__res), (void *)__addr); " 108 "%s = __res; }", 109 global_address, *deref_offset_, ctype(), local_name); 110 return true; 111 } 112 113 return false; 114 } 115 116 void ArgumentParser::print_error(ssize_t pos) { 117 fprintf(stderr, "Parse error:\n %s\n", arg_); 118 for (ssize_t i = 0; i < pos + 4; ++i) fputc('-', stderr); 119 fputc('^', stderr); 120 fputc('\n', stderr); 121 } 122 123 void ArgumentParser::skip_whitespace_from(size_t pos) { 124 while (isspace(arg_[pos])) pos++; 125 cur_pos_ = pos; 126 } 127 128 void ArgumentParser::skip_until_whitespace_from(size_t pos) { 129 while (arg_[pos] != '\0' && !isspace(arg_[pos])) 130 pos++; 131 cur_pos_ = pos; 132 } 133 134 bool ArgumentParser_aarch64::parse_register(ssize_t pos, ssize_t &new_pos, 135 optional<int> *reg_num) { 136 new_pos = parse_number(pos, reg_num); 137 if (new_pos == pos || *reg_num < 0 || *reg_num > 31) 138 return error_return(pos, pos); 139 return true; 140 } 141 142 bool ArgumentParser_aarch64::parse_size(ssize_t pos, ssize_t &new_pos, 143 optional<int> *arg_size) { 144 int abs_arg_size; 145 146 new_pos = parse_number(pos, arg_size); 147 if (new_pos == pos) 148 return error_return(pos, pos); 149 150 abs_arg_size = abs(arg_size->value()); 151 if (abs_arg_size != 1 && abs_arg_size != 2 && abs_arg_size != 4 && 152 abs_arg_size != 8) 153 return error_return(pos, pos); 154 return true; 155 } 156 157 bool ArgumentParser_aarch64::parse_mem(ssize_t pos, ssize_t &new_pos, 158 optional<int> *reg_num, 159 optional<int> *offset) { 160 if (arg_[pos] != 'x') 161 return error_return(pos, pos); 162 if (parse_register(pos + 1, new_pos, reg_num) == false) 163 return false; 164 165 if (arg_[new_pos] == ',') { 166 pos = new_pos + 1; 167 new_pos = parse_number(pos, offset); 168 if (new_pos == pos) 169 return error_return(pos, pos); 170 } 171 if (arg_[new_pos] != ']') 172 return error_return(new_pos, new_pos); 173 new_pos++; 174 return true; 175 } 176 177 bool ArgumentParser_aarch64::parse(Argument *dest) { 178 if (done()) 179 return false; 180 181 // Support the following argument patterns: 182 // [-]<size>@<value>, [-]<size>@<reg>, [-]<size>@[<reg>], or 183 // [-]<size>@[<reg>,<offset>] 184 ssize_t cur_pos = cur_pos_, new_pos; 185 optional<int> arg_size; 186 187 // Parse [-]<size> 188 if (parse_size(cur_pos, new_pos, &arg_size) == false) 189 return false; 190 dest->arg_size_ = arg_size; 191 192 // Make sure '@' present 193 if (arg_[new_pos] != '@') 194 return error_return(new_pos, new_pos); 195 cur_pos = new_pos + 1; 196 197 if (arg_[cur_pos] == 'x') { 198 // Parse ...@<reg> 199 optional<int> reg_num; 200 if (parse_register(cur_pos + 1, new_pos, ®_num) == false) 201 return false; 202 cur_pos_ = new_pos; 203 dest->base_register_name_ = "regs[" + std::to_string(reg_num.value()) + "]"; 204 } else if (arg_[cur_pos] == '[') { 205 // Parse ...@[<reg>] and ...@[<reg,<offset>] 206 optional<int> reg_num, offset = 0; 207 if (parse_mem(cur_pos + 1, new_pos, ®_num, &offset) == false) 208 return false; 209 cur_pos_ = new_pos; 210 dest->base_register_name_ = "regs[" + std::to_string(reg_num.value()) + "]"; 211 dest->deref_offset_ = offset; 212 } else { 213 // Parse ...@<value> 214 optional<int> val; 215 new_pos = parse_number(cur_pos, &val); 216 if (cur_pos == new_pos) 217 return error_return(cur_pos, cur_pos); 218 cur_pos_ = new_pos; 219 dest->constant_ = val; 220 } 221 222 skip_whitespace_from(cur_pos_); 223 return true; 224 } 225 226 bool ArgumentParser_powerpc64::parse(Argument *dest) { 227 if (done()) 228 return false; 229 230 bool matched; 231 std::smatch matches; 232 std::string arg_str(&arg_[cur_pos_]); 233 std::regex arg_n_regex("^(\\-?[1248])\\@"); 234 // Operands with constants of form iNUM or i-NUM 235 std::regex arg_op_regex_const("^i(\\-?[0-9]+)( +|$)"); 236 // Operands with register only of form REG or %rREG 237 std::regex arg_op_regex_reg("^(?:%r)?([1-2]?[0-9]|3[0-1])( +|$)"); 238 // Operands with a base register and an offset of form 239 // NUM(REG) or -NUM(REG) or NUM(%rREG) or -NUM(%rREG) 240 std::regex arg_op_regex_breg_off( 241 "^(\\-?[0-9]+)\\((?:%r)?([1-2]?[0-9]|3[0-1])\\)( +|$)"); 242 // Operands with a base register and an index register 243 // of form REG,REG or %rREG,%rREG 244 std::regex arg_op_regex_breg_ireg( 245 "^(?:%r)?([1-2]?[0-9]|3[0-1])\\,(?:%r)?([1-2]?[0-9]|3[0-1])( +|$)"); 246 247 matched = std::regex_search(arg_str, matches, arg_n_regex); 248 if (matched) { 249 dest->arg_size_ = stoi(matches.str(1)); 250 cur_pos_ += matches.length(0); 251 arg_str = &arg_[cur_pos_]; 252 253 if (std::regex_search(arg_str, matches, arg_op_regex_const)) { 254 dest->constant_ = stoi(matches.str(1)); 255 } else if (std::regex_search(arg_str, matches, arg_op_regex_reg)) { 256 dest->base_register_name_ = "gpr[" + matches.str(1) + "]"; 257 } else if (std::regex_search(arg_str, matches, arg_op_regex_breg_off)) { 258 dest->deref_offset_ = stoi(matches.str(1)); 259 dest->base_register_name_ = "gpr[" + matches.str(2) + "]"; 260 } else if (std::regex_search(arg_str, matches, arg_op_regex_breg_ireg)) { 261 dest->deref_offset_ = 0; // In powerpc64, such operands contain a base 262 // register and an index register which are 263 // part of an indexed load/store operation. 264 // Even if no offset value is present, this 265 // is required by Argument::assign_to_local() 266 // in order to generate code for reading the 267 // argument. So, this is set to zero. 268 dest->base_register_name_ = "gpr[" + matches.str(1) + "]"; 269 dest->index_register_name_ = "gpr[" + matches.str(2) + "]"; 270 dest->scale_ = abs(*dest->arg_size_); 271 } else { 272 matched = false; 273 } 274 } 275 276 if (!matched) { 277 print_error(cur_pos_); 278 skip_until_whitespace_from(cur_pos_); 279 skip_whitespace_from(cur_pos_); 280 return false; 281 } 282 283 cur_pos_ += matches.length(0); 284 skip_whitespace_from(cur_pos_); 285 return true; 286 } 287 288 ssize_t ArgumentParser_x64::parse_identifier(ssize_t pos, 289 optional<std::string> *result) { 290 if (isalpha(arg_[pos]) || arg_[pos] == '_') { 291 ssize_t start = pos++; 292 while (isalnum(arg_[pos]) || arg_[pos] == '_') pos++; 293 if (pos - start) 294 result->emplace(arg_ + start, pos - start); 295 } 296 return pos; 297 } 298 299 ssize_t ArgumentParser_x64::parse_register(ssize_t pos, std::string &name, 300 int &size) { 301 ssize_t start = ++pos; 302 if (arg_[start - 1] != '%') 303 return -start; 304 305 while (isalnum(arg_[pos])) pos++; 306 307 std::string regname(arg_ + start, pos - start); 308 if (!normalize_register(®name, &size)) 309 return -start; 310 311 name = regname; 312 return pos; 313 } 314 315 ssize_t ArgumentParser_x64::parse_base_register(ssize_t pos, Argument *dest) { 316 int size; 317 std::string name; 318 ssize_t res = parse_register(pos, name, size); 319 if (res < 0) 320 return res; 321 322 dest->base_register_name_ = name; 323 if (!dest->arg_size_) 324 dest->arg_size_ = size; 325 326 return res; 327 } 328 329 ssize_t ArgumentParser_x64::parse_index_register(ssize_t pos, Argument *dest) { 330 int size; 331 std::string name; 332 ssize_t res = parse_register(pos, name, size); 333 if (res < 0) 334 return res; 335 336 dest->index_register_name_ = name; 337 338 return res; 339 } 340 341 ssize_t ArgumentParser_x64::parse_scale(ssize_t pos, Argument *dest) { 342 return parse_number(pos, &dest->scale_); 343 } 344 345 ssize_t ArgumentParser_x64::parse_expr(ssize_t pos, Argument *dest) { 346 if (arg_[pos] == '$') 347 return parse_number(pos + 1, &dest->constant_); 348 349 if (arg_[pos] == '%') 350 return parse_base_register(pos, dest); 351 352 if (isdigit(arg_[pos]) || arg_[pos] == '-') { 353 pos = parse_number(pos, &dest->deref_offset_); 354 if (arg_[pos] == '+') { 355 pos = parse_identifier(pos + 1, &dest->deref_ident_); 356 if (!dest->deref_ident_) 357 return -pos; 358 } 359 } else { 360 dest->deref_offset_ = 0; 361 pos = parse_identifier(pos, &dest->deref_ident_); 362 if (arg_[pos] == '+' || arg_[pos] == '-') { 363 pos = parse_number(pos, &dest->deref_offset_); 364 } 365 } 366 367 if (arg_[pos] != '(') 368 return -pos; 369 370 pos = parse_base_register(pos + 1, dest); 371 if (pos < 0) 372 return pos; 373 374 if (arg_[pos] == ',') { 375 pos = parse_index_register(pos + 1, dest); 376 if (pos < 0) 377 return pos; 378 379 if (arg_[pos] == ',') { 380 pos = parse_scale(pos + 1, dest); 381 if (pos < 0) 382 return pos; 383 } 384 } 385 386 return (arg_[pos] == ')') ? pos + 1 : -pos; 387 } 388 389 ssize_t ArgumentParser_x64::parse_1(ssize_t pos, Argument *dest) { 390 if (isdigit(arg_[pos]) || arg_[pos] == '-') { 391 optional<int> asize; 392 ssize_t m = parse_number(pos, &asize); 393 if (arg_[m] == '@' && asize) { 394 dest->arg_size_ = asize; 395 return parse_expr(m + 1, dest); 396 } 397 } 398 return parse_expr(pos, dest); 399 } 400 401 bool ArgumentParser_x64::parse(Argument *dest) { 402 if (done()) 403 return false; 404 405 ssize_t res = parse_1(cur_pos_, dest); 406 if (res < 0) 407 return error_return(-res, -res + 1); 408 if (!isspace(arg_[res]) && arg_[res] != '\0') 409 return error_return(res, res); 410 skip_whitespace_from(res); 411 return true; 412 } 413 414 const std::unordered_map<std::string, ArgumentParser_x64::RegInfo> 415 ArgumentParser_x64::registers_ = { 416 {"rax", {REG_A, 8}}, {"eax", {REG_A, 4}}, 417 {"ax", {REG_A, 2}}, {"al", {REG_A, 1}}, 418 419 {"rbx", {REG_B, 8}}, {"ebx", {REG_B, 4}}, 420 {"bx", {REG_B, 2}}, {"bl", {REG_B, 1}}, 421 422 {"rcx", {REG_C, 8}}, {"ecx", {REG_C, 4}}, 423 {"cx", {REG_C, 2}}, {"cl", {REG_C, 1}}, 424 425 {"rdx", {REG_D, 8}}, {"edx", {REG_D, 4}}, 426 {"dx", {REG_D, 2}}, {"dl", {REG_D, 1}}, 427 428 {"rsi", {REG_SI, 8}}, {"esi", {REG_SI, 4}}, 429 {"si", {REG_SI, 2}}, {"sil", {REG_SI, 1}}, 430 431 {"rdi", {REG_DI, 8}}, {"edi", {REG_DI, 4}}, 432 {"di", {REG_DI, 2}}, {"dil", {REG_DI, 1}}, 433 434 {"rbp", {REG_BP, 8}}, {"ebp", {REG_BP, 4}}, 435 {"bp", {REG_BP, 2}}, {"bpl", {REG_BP, 1}}, 436 437 {"rsp", {REG_SP, 8}}, {"esp", {REG_SP, 4}}, 438 {"sp", {REG_SP, 2}}, {"spl", {REG_SP, 1}}, 439 440 {"r8", {REG_8, 8}}, {"r8d", {REG_8, 4}}, 441 {"r8w", {REG_8, 2}}, {"r8b", {REG_8, 1}}, 442 443 {"r9", {REG_9, 8}}, {"r9d", {REG_9, 4}}, 444 {"r9w", {REG_9, 2}}, {"r9b", {REG_9, 1}}, 445 446 {"r10", {REG_10, 8}}, {"r10d", {REG_10, 4}}, 447 {"r10w", {REG_10, 2}}, {"r10b", {REG_10, 1}}, 448 449 {"r11", {REG_11, 8}}, {"r11d", {REG_11, 4}}, 450 {"r11w", {REG_11, 2}}, {"r11b", {REG_11, 1}}, 451 452 {"r12", {REG_12, 8}}, {"r12d", {REG_12, 4}}, 453 {"r12w", {REG_12, 2}}, {"r12b", {REG_12, 1}}, 454 455 {"r13", {REG_13, 8}}, {"r13d", {REG_13, 4}}, 456 {"r13w", {REG_13, 2}}, {"r13b", {REG_13, 1}}, 457 458 {"r14", {REG_14, 8}}, {"r14d", {REG_14, 4}}, 459 {"r14w", {REG_14, 2}}, {"r14b", {REG_14, 1}}, 460 461 {"r15", {REG_15, 8}}, {"r15d", {REG_15, 4}}, 462 {"r15w", {REG_15, 2}}, {"r15b", {REG_15, 1}}, 463 464 {"rip", {REG_RIP, 8}}, 465 }; 466 467 void ArgumentParser_x64::reg_to_name(std::string *norm, Register reg) { 468 switch (reg) { 469 case REG_A: 470 *norm = "ax"; 471 break; 472 case REG_B: 473 *norm = "bx"; 474 break; 475 case REG_C: 476 *norm = "cx"; 477 break; 478 case REG_D: 479 *norm = "dx"; 480 break; 481 482 case REG_SI: 483 *norm = "si"; 484 break; 485 case REG_DI: 486 *norm = "di"; 487 break; 488 case REG_BP: 489 *norm = "bp"; 490 break; 491 case REG_SP: 492 *norm = "sp"; 493 break; 494 495 case REG_8: 496 *norm = "r8"; 497 break; 498 case REG_9: 499 *norm = "r9"; 500 break; 501 case REG_10: 502 *norm = "r10"; 503 break; 504 case REG_11: 505 *norm = "r11"; 506 break; 507 case REG_12: 508 *norm = "r12"; 509 break; 510 case REG_13: 511 *norm = "r13"; 512 break; 513 case REG_14: 514 *norm = "r14"; 515 break; 516 case REG_15: 517 *norm = "r15"; 518 break; 519 520 case REG_RIP: 521 *norm = "ip"; 522 break; 523 } 524 } 525 526 bool ArgumentParser_x64::normalize_register(std::string *reg, int *reg_size) { 527 auto it = registers_.find(*reg); 528 if (it == registers_.end()) 529 return false; 530 531 *reg_size = it->second.size; 532 reg_to_name(reg, it->second.reg); 533 return true; 534 } 535 } 536