1 /** 2 * @file bfd_support.cpp 3 * BFD muck we have to deal with. 4 * 5 * @remark Copyright 2005 OProfile authors 6 * @remark Read the file COPYING 7 * 8 * @author John Levon 9 */ 10 11 #include "bfd_support.h" 12 13 #include "op_bfd.h" 14 #include "op_fileio.h" 15 #include "op_config.h" 16 #include "string_manip.h" 17 #include "file_manip.h" 18 #include "cverb.h" 19 #include "locate_images.h" 20 21 #include <cstdlib> 22 #include <cstring> 23 #include <cassert> 24 #include <iostream> 25 #include <fstream> 26 #include <sstream> 27 #include <string> 28 #include <cstring> 29 #include <cstdlib> 30 31 using namespace std; 32 33 extern verbose vbfd; 34 35 namespace { 36 37 38 void check_format(string const & file, bfd ** ibfd) 39 { 40 if (!bfd_check_format_matches(*ibfd, bfd_object, NULL)) { 41 cverb << vbfd << "BFD format failure for " << file << endl; 42 bfd_close(*ibfd); 43 *ibfd = NULL; 44 } 45 } 46 47 48 bool separate_debug_file_exists(string & name, unsigned long const crc, 49 extra_images const & extra) 50 { 51 unsigned long file_crc = 0; 52 // The size of 2 * 1024 elements for the buffer is arbitrary. 53 char buffer[2 * 1024]; 54 55 image_error img_ok; 56 string const image_path = extra.find_image_path(name, img_ok, true); 57 58 if (img_ok != image_ok) 59 return false; 60 61 name = image_path; 62 63 ifstream file(image_path.c_str()); 64 if (!file) 65 return false; 66 67 cverb << vbfd << "found " << name; 68 while (file) { 69 file.read(buffer, sizeof(buffer)); 70 file_crc = calc_crc32(file_crc, 71 reinterpret_cast<unsigned char *>(&buffer[0]), 72 file.gcount()); 73 } 74 cverb << vbfd << " with crc32 = " << hex << file_crc << endl; 75 return crc == file_crc; 76 } 77 78 79 bool get_debug_link_info(bfd * ibfd, string & filename, unsigned long & crc32) 80 { 81 asection * sect; 82 83 cverb << vbfd << "fetching .gnu_debuglink section" << endl; 84 sect = bfd_get_section_by_name(ibfd, ".gnu_debuglink"); 85 86 if (sect == NULL) 87 return false; 88 89 bfd_size_type debuglink_size = bfd_section_size(ibfd, sect); 90 char contents[debuglink_size]; 91 cverb << vbfd 92 << ".gnu_debuglink section has size " << debuglink_size << endl; 93 94 if (!bfd_get_section_contents(ibfd, sect, 95 reinterpret_cast<unsigned char *>(contents), 96 static_cast<file_ptr>(0), debuglink_size)) { 97 bfd_perror("bfd_get_section_contents:get_debug:"); 98 exit(2); 99 } 100 101 /* CRC value is stored after the filename, aligned up to 4 bytes. */ 102 size_t filename_len = strlen(contents); 103 size_t crc_offset = filename_len + 1; 104 crc_offset = (crc_offset + 3) & ~3; 105 106 crc32 = bfd_get_32(ibfd, 107 reinterpret_cast<bfd_byte *>(contents + crc_offset)); 108 filename = string(contents, filename_len); 109 cverb << vbfd << ".gnu_debuglink filename is " << filename << endl; 110 return true; 111 } 112 113 114 /** 115 * With Objective C, we'll get strings like: 116 * 117 * _i_GSUnicodeString__rangeOfCharacterSetFromSet_options_range 118 * 119 * for the symbol name, and: 120 * -[GSUnicodeString rangeOfCharacterFromSet:options:range:] 121 * 122 * for the function name, so we have to do some looser matching 123 * than for other languages (unfortunately, it's not possible 124 * to demangle Objective C symbols). 125 */ 126 bool objc_match(string const & sym, string const & method) 127 { 128 if (method.length() < 3) 129 return false; 130 131 string mangled; 132 133 if (is_prefix(method, "-[")) { 134 mangled += "_i_"; 135 } else if (is_prefix(method, "+[")) { 136 mangled += "_c_"; 137 } else { 138 return false; 139 } 140 141 string::const_iterator it = method.begin() + 2; 142 string::const_iterator const end = method.end(); 143 144 bool found_paren = false; 145 146 for (; it != end; ++it) { 147 switch (*it) { 148 case ' ': 149 mangled += '_'; 150 if (!found_paren) 151 mangled += '_'; 152 break; 153 case ':': 154 mangled += '_'; 155 break; 156 case ')': 157 case ']': 158 break; 159 case '(': 160 found_paren = true; 161 mangled += '_'; 162 break; 163 default: 164 mangled += *it; 165 } 166 } 167 168 return sym == mangled; 169 } 170 171 172 /* 173 * With a binary image where some objects are missing debug 174 * info, we can end up attributing to a completely different 175 * function (#484660): bfd_nearest_line() will happily move from one 176 * symbol to the nearest one it can find with debug information. 177 * To mitigate this problem, we check that the symbol name 178 * matches the returned function name. 179 * 180 * However, this check fails in some cases it shouldn't: 181 * Objective C, and C++ static inline functions (as discussed in 182 * GCC bugzilla #11774). So, we have a looser check that 183 * accepts merely a substring, plus some magic for Objective C. 184 * 185 * If even the loose check fails, then we give up. 186 */ 187 bool is_correct_function(string const & function, string const & name) 188 { 189 if (name == function) 190 return true; 191 192 if (objc_match(name, function)) 193 return true; 194 195 // warn the user if we had to use the loose check 196 if (name.find(function) != string::npos) { 197 static bool warned = false; 198 if (!warned) { 199 cerr << "warning: some functions compiled without " 200 << "debug information may have incorrect source " 201 << "line attributions" << endl; 202 warned = true; 203 } 204 cverb << vbfd << "is_correct_function(" << function << ", " 205 << name << ") fuzzy match." << endl; 206 return true; 207 } 208 209 return false; 210 } 211 212 213 /* 214 * binutils 2.12 and below have a small bug where functions without a 215 * debug entry at the prologue start do not give a useful line number 216 * from bfd_find_nearest_line(). This can happen with certain gcc 217 * versions such as 2.95. 218 * 219 * We work around this problem by scanning forward for a vma with valid 220 * linenr info, if we can't get a valid line number. Problem uncovered 221 * by Norbert Kaufmann. The work-around decreases, on the tincas 222 * application, the number of failure to retrieve linenr info from 835 223 * to 173. Most of the remaining are c++ inline functions mainly from 224 * the STL library. Fix #529622 225 */ 226 void fixup_linenr(bfd * abfd, asection * section, asymbol ** syms, 227 string const & name, bfd_vma pc, 228 char const ** filename, unsigned int * line) 229 { 230 char const * cfilename; 231 char const * function; 232 unsigned int linenr; 233 234 // FIXME: looking at debug info for all gcc version shows than 235 // the same problems can -perhaps- occur for epilog code: find a 236 // samples files with samples in epilog and try opreport -l -g 237 // on it, check it also with opannotate. 238 239 // first restrict the search on a sensible range of vma, 16 is 240 // an intuitive value based on epilog code look 241 size_t max_search = 16; 242 size_t section_size = bfd_section_size(abfd, section); 243 if (pc + max_search > section_size) 244 max_search = section_size - pc; 245 246 for (size_t i = 1; i < max_search; ++i) { 247 bool ret = bfd_find_nearest_line(abfd, section, syms, pc + i, 248 &cfilename, &function, 249 &linenr); 250 251 if (ret && cfilename && function && linenr != 0 252 && is_correct_function(function, name)) { 253 *filename = cfilename; 254 *line = linenr; 255 return; 256 } 257 } 258 } 259 260 261 } // namespace anon 262 263 264 bfd * open_bfd(string const & file) 265 { 266 /* bfd keeps its own reference to the filename char *, 267 * so it must have a lifetime longer than the ibfd */ 268 bfd * ibfd = bfd_openr(file.c_str(), NULL); 269 if (!ibfd) { 270 cverb << vbfd << "bfd_openr failed for " << file << endl; 271 return NULL; 272 } 273 274 check_format(file, &ibfd); 275 276 return ibfd; 277 } 278 279 280 bfd * fdopen_bfd(string const & file, int fd) 281 { 282 /* bfd keeps its own reference to the filename char *, 283 * so it must have a lifetime longer than the ibfd */ 284 bfd * ibfd = bfd_fdopenr(file.c_str(), NULL, fd); 285 if (!ibfd) { 286 cverb << vbfd << "bfd_openr failed for " << file << endl; 287 return NULL; 288 } 289 290 check_format(file, &ibfd); 291 292 return ibfd; 293 } 294 295 296 bool find_separate_debug_file(bfd * ibfd, string const & filepath_in, 297 string & debug_filename, extra_images const & extra) 298 { 299 string filepath(filepath_in); 300 string basename; 301 unsigned long crc32; 302 303 if (!get_debug_link_info(ibfd, basename, crc32)) 304 return false; 305 306 // Work out the image file's directory prefix 307 string filedir = op_dirname(filepath); 308 // Make sure it starts with / 309 if (filedir.size() > 0 && filedir.at(filedir.size() - 1) != '/') 310 filedir += '/'; 311 312 string first_try(filedir + ".debug/" + basename); 313 string second_try(DEBUGDIR + filedir + basename); 314 string third_try(filedir + basename); 315 316 cverb << vbfd << "looking for debugging file " << basename 317 << " with crc32 = " << hex << crc32 << endl; 318 319 if (separate_debug_file_exists(first_try, crc32, extra)) 320 debug_filename = first_try; 321 else if (separate_debug_file_exists(second_try, crc32, extra)) 322 debug_filename = second_try; 323 else if (separate_debug_file_exists(third_try, crc32, extra)) 324 debug_filename = third_try; 325 else 326 return false; 327 328 return true; 329 } 330 331 332 bool interesting_symbol(asymbol * sym) 333 { 334 // #717720 some binutils are miscompiled by gcc 2.95, one of the 335 // typical symptom can be catched here. 336 if (!sym->section) { 337 ostringstream os; 338 os << "Your version of binutils seems to have a bug.\n" 339 << "Read http://oprofile.sf.net/faq/#binutilsbug\n"; 340 throw op_runtime_error(os.str()); 341 } 342 343 if (!(sym->section->flags & SEC_CODE)) 344 return false; 345 346 // returning true for fix up in op_bfd_symbol() 347 if (!sym->name || sym->name[0] == '\0') 348 return true; 349 /* ARM assembler internal mapping symbols aren't interesting */ 350 if ((strcmp("$a", sym->name) == 0) || 351 (strcmp("$t", sym->name) == 0) || 352 (strcmp("$d", sym->name) == 0)) 353 return false; 354 355 // C++ exception stuff 356 if (sym->name[0] == '.' && sym->name[1] == 'L') 357 return false; 358 359 /* This case cannot be moved to boring_symbol(), 360 * because that's only used for duplicate VMAs, 361 * and sometimes this symbol appears at an address 362 * different from all other symbols. 363 */ 364 if (!strcmp("gcc2_compiled.", sym->name)) 365 return false; 366 367 if (sym->flags & BSF_SECTION_SYM) 368 return false; 369 370 if (!(sym->section->flags & SEC_LOAD)) 371 return false; 372 373 return true; 374 } 375 376 377 bool boring_symbol(op_bfd_symbol const & first, op_bfd_symbol const & second) 378 { 379 if (first.name() == "Letext") 380 return true; 381 else if (second.name() == "Letext") 382 return false; 383 384 if (first.name().substr(0, 2) == "??") 385 return true; 386 else if (second.name().substr(0, 2) == "??") 387 return false; 388 389 if (first.hidden() && !second.hidden()) 390 return true; 391 else if (!first.hidden() && second.hidden()) 392 return false; 393 394 if (first.name()[0] == '_' && second.name()[0] != '_') 395 return true; 396 else if (first.name()[0] != '_' && second.name()[0] == '_') 397 return false; 398 399 if (first.weak() && !second.weak()) 400 return true; 401 else if (!first.weak() && second.weak()) 402 return false; 403 404 return false; 405 } 406 407 408 bool bfd_info::has_debug_info() const 409 { 410 if (!valid()) 411 return false; 412 413 for (asection const * sect = abfd->sections; sect; sect = sect->next) { 414 if (sect->flags & SEC_DEBUGGING) 415 return true; 416 } 417 418 return false; 419 } 420 421 422 bfd_info::~bfd_info() 423 { 424 free(synth_syms); 425 close(); 426 } 427 428 429 void bfd_info::close() 430 { 431 if (abfd) 432 bfd_close(abfd); 433 } 434 435 /** 436 * This function is only called when processing symbols retrieved from a 437 * debuginfo file that is separate from the actual runtime binary image. 438 * Separate debuginfo files may be needed in two different cases: 439 * 1) the real image is completely stripped, where there is no symbol 440 information at all 441 * 2) the real image has debuginfo stripped, and the user is requesting "-g" 442 * (src file/line num info) 443 * After all symbols are gathered up, there will be some filtering/removal of 444 * unnecessary symbols. In particular, the bfd_info::interesting_symbol() 445 * function filters out symbols whose section's flag value does not include 446 * SEC_LOAD. This filtering is required, so it must be retained. However, 447 * we run into a problem with symbols from debuginfo files, since the 448 * section flag does NOT include SEC_LOAD. To solve this problem, the 449 * translate_debuginfo_syms function maps the debuginfo symbol's sections to 450 * that of their corresponding real image. 451 */ 452 void bfd_info::translate_debuginfo_syms(asymbol ** dbg_syms, long nr_dbg_syms) 453 { 454 unsigned int img_sect_cnt = 0; 455 bfd * image_bfd = image_bfd_info->abfd; 456 multimap<string, bfd_section *> image_sections; 457 458 for (bfd_section * sect = image_bfd->sections; 459 sect && img_sect_cnt < image_bfd->section_count; 460 sect = sect->next) { 461 // A comment section marks the end of the needed sections 462 if (strstr(sect->name, ".comment") == sect->name) 463 break; 464 image_sections.insert(pair<string, bfd_section *>(sect->name, sect)); 465 img_sect_cnt++; 466 } 467 468 asymbol * sym = dbg_syms[0]; 469 string prev_sect_name = ""; 470 bfd_section * matched_section = NULL; 471 for (int i = 0; i < nr_dbg_syms; sym = dbg_syms[++i]) { 472 bool section_switch; 473 474 if (strcmp(prev_sect_name.c_str(), sym->section->name)) { 475 section_switch = true; 476 prev_sect_name = sym->section->name; 477 } else { 478 section_switch = false; 479 } 480 if (sym->section->owner && sym->section->owner == abfd) { 481 if (section_switch ) { 482 matched_section = NULL; 483 multimap<string, bfd_section *>::iterator it; 484 pair<multimap<string, bfd_section *>::iterator, 485 multimap<string, bfd_section *>::iterator> range; 486 487 range = image_sections.equal_range(sym->section->name); 488 for (it = range.first; it != range.second; it++) { 489 if ((*it).second->vma == sym->section->vma) { 490 matched_section = (*it).second; 491 break; 492 } 493 } 494 } 495 if (matched_section) { 496 sym->section = matched_section; 497 sym->the_bfd = image_bfd; 498 } 499 } 500 } 501 } 502 503 #if SYNTHESIZE_SYMBOLS 504 bool bfd_info::get_synth_symbols() 505 { 506 extern const bfd_target bfd_elf64_powerpc_vec; 507 extern const bfd_target bfd_elf64_powerpcle_vec; 508 bool is_elf64_powerpc_target = (abfd->xvec == &bfd_elf64_powerpc_vec) 509 || (abfd->xvec == &bfd_elf64_powerpcle_vec); 510 511 if (!is_elf64_powerpc_target) 512 return false; 513 514 void * buf; 515 uint tmp; 516 long nr_mini_syms = bfd_read_minisymbols(abfd, 0, &buf, &tmp); 517 if (nr_mini_syms < 1) 518 return false; 519 520 asymbol ** mini_syms = (asymbol **)buf; 521 buf = NULL; 522 bfd * synth_bfd; 523 524 /* For ppc64, a debuginfo file by itself does not hold enough symbol 525 * information for us to properly attribute samples to symbols. If 526 * the image file's bfd has no symbols (as in a super-stripped library), 527 * then we need to do the extra processing in translate_debuginfo_syms. 528 */ 529 if (image_bfd_info && image_bfd_info->nr_syms == 0) { 530 translate_debuginfo_syms(mini_syms, nr_mini_syms); 531 synth_bfd = image_bfd_info->abfd; 532 } else 533 synth_bfd = abfd; 534 535 long nr_synth_syms = bfd_get_synthetic_symtab(synth_bfd, 536 nr_mini_syms, 537 mini_syms, 0, 538 NULL, &synth_syms); 539 540 if (nr_synth_syms < 0) { 541 free(mini_syms); 542 return false; 543 } 544 545 cverb << vbfd << "mini_syms: " << dec << nr_mini_syms << hex << endl; 546 cverb << vbfd << "synth_syms: " << dec << nr_synth_syms << hex << endl; 547 548 nr_syms = nr_mini_syms + nr_synth_syms; 549 syms.reset(new asymbol *[nr_syms + 1]); 550 551 for (size_t i = 0; i < (size_t)nr_mini_syms; ++i) 552 syms[i] = mini_syms[i]; 553 554 555 for (size_t i = 0; i < (size_t)nr_synth_syms; ++i) 556 syms[nr_mini_syms + i] = synth_syms + i; 557 558 559 free(mini_syms); 560 561 // bfd_canonicalize_symtab does this, so shall we 562 syms[nr_syms] = NULL; 563 564 return true; 565 } 566 #else 567 bool bfd_info::get_synth_symbols() 568 { 569 return false; 570 } 571 #endif /* SYNTHESIZE_SYMBOLS */ 572 573 574 void bfd_info::get_symbols() 575 { 576 if (!abfd) 577 return; 578 579 cverb << vbfd << "bfd_info::get_symbols() for " 580 << bfd_get_filename(abfd) << endl; 581 582 if (get_synth_symbols()) 583 return; 584 585 if (bfd_get_file_flags(abfd) & HAS_SYMS) 586 nr_syms = bfd_get_symtab_upper_bound(abfd); 587 588 cverb << vbfd << "bfd_get_symtab_upper_bound: " << dec 589 << nr_syms << hex << endl; 590 591 nr_syms /= sizeof(asymbol *); 592 593 if (nr_syms < 1) 594 return; 595 596 syms.reset(new asymbol *[nr_syms]); 597 598 nr_syms = bfd_canonicalize_symtab(abfd, syms.get()); 599 600 if (image_bfd_info) 601 translate_debuginfo_syms(syms.get(), nr_syms); 602 603 cverb << vbfd << "bfd_canonicalize_symtab: " << dec 604 << nr_syms << hex << endl; 605 } 606 607 608 linenr_info const 609 find_nearest_line(bfd_info const & b, op_bfd_symbol const & sym, 610 bfd_vma offset, bool anon_obj) 611 { 612 char const * function = ""; 613 char const * cfilename = ""; 614 unsigned int linenr = 0; 615 linenr_info info; 616 bfd * abfd; 617 asymbol ** syms; 618 asection * section; 619 bfd_vma pc; 620 bool ret; 621 622 if (!b.valid()) 623 goto fail; 624 625 // take care about artificial symbol 626 if (!sym.symbol()) 627 goto fail; 628 629 abfd = b.abfd; 630 syms = b.syms.get(); 631 if (!syms) 632 goto fail; 633 section = sym.symbol()->section; 634 if (anon_obj) 635 pc = offset - sym.symbol()->section->vma; 636 else 637 pc = (sym.value() + offset) - sym.filepos(); 638 639 if ((bfd_get_section_flags(abfd, section) & SEC_ALLOC) == 0) 640 goto fail; 641 642 if (pc >= bfd_section_size(abfd, section)) 643 goto fail; 644 645 ret = bfd_find_nearest_line(abfd, section, syms, pc, &cfilename, 646 &function, &linenr); 647 648 if (!ret || !cfilename || !function) 649 goto fail; 650 651 /* 652 * is_correct_function does not handle the case of static inlines, 653 * but if the linenr is non-zero in the inline case, it is the correct 654 * line number. 655 */ 656 if (linenr == 0 && !is_correct_function(function, sym.name())) 657 goto fail; 658 659 if (linenr == 0) { 660 fixup_linenr(abfd, section, syms, sym.name(), pc, &cfilename, 661 &linenr); 662 } 663 664 info.found = true; 665 info.filename = cfilename; 666 info.line = linenr; 667 return info; 668 669 fail: 670 info.found = false; 671 // some stl lacks string::clear() 672 info.filename.erase(info.filename.begin(), info.filename.end()); 673 info.line = 0; 674 return info; 675 } 676