Home | History | Annotate | Download | only in libutil++
      1 /**
      2  * @file bfd_support.cpp
      3  * BFD muck we have to deal with.
      4  *
      5  * @remark Copyright 2005 OProfile authors
      6  * @remark Read the file COPYING
      7  *
      8  * @author John Levon
      9  */
     10 
     11 #include "bfd_support.h"
     12 
     13 #include "op_bfd.h"
     14 #include "op_fileio.h"
     15 #include "op_config.h"
     16 #include "string_manip.h"
     17 #include "file_manip.h"
     18 #include "cverb.h"
     19 #include "locate_images.h"
     20 
     21 #include <cstdlib>
     22 #include <cstring>
     23 #include <cassert>
     24 #include <iostream>
     25 #include <fstream>
     26 #include <sstream>
     27 #include <string>
     28 #include <cstring>
     29 #include <cstdlib>
     30 
     31 using namespace std;
     32 
     33 extern verbose vbfd;
     34 
     35 namespace {
     36 
     37 
     38 void check_format(string const & file, bfd ** ibfd)
     39 {
     40 	if (!bfd_check_format_matches(*ibfd, bfd_object, NULL)) {
     41 		cverb << vbfd << "BFD format failure for " << file << endl;
     42 		bfd_close(*ibfd);
     43 		*ibfd = NULL;
     44 	}
     45 }
     46 
     47 
     48 bool separate_debug_file_exists(string & name, unsigned long const crc,
     49                                 extra_images const & extra)
     50 {
     51 	unsigned long file_crc = 0;
     52 	// The size of 2 * 1024 elements for the buffer is arbitrary.
     53 	char buffer[2 * 1024];
     54 
     55 	image_error img_ok;
     56 	string const image_path = extra.find_image_path(name, img_ok, true);
     57 
     58 	if (img_ok != image_ok)
     59 		return false;
     60 
     61 	name = image_path;
     62 
     63 	ifstream file(image_path.c_str());
     64 	if (!file)
     65 		return false;
     66 
     67 	cverb << vbfd << "found " << name;
     68 	while (file) {
     69 		file.read(buffer, sizeof(buffer));
     70 		file_crc = calc_crc32(file_crc,
     71 				      reinterpret_cast<unsigned char *>(&buffer[0]),
     72 				      file.gcount());
     73 	}
     74 	cverb << vbfd << " with crc32 = " << hex << file_crc << endl;
     75 	return crc == file_crc;
     76 }
     77 
     78 
     79 bool get_debug_link_info(bfd * ibfd, string & filename, unsigned long & crc32)
     80 {
     81 	asection * sect;
     82 
     83 	cverb << vbfd << "fetching .gnu_debuglink section" << endl;
     84 	sect = bfd_get_section_by_name(ibfd, ".gnu_debuglink");
     85 
     86 	if (sect == NULL)
     87 		return false;
     88 
     89 	bfd_size_type debuglink_size = bfd_section_size(ibfd, sect);
     90 	char contents[debuglink_size];
     91 	cverb << vbfd
     92 	      << ".gnu_debuglink section has size " << debuglink_size << endl;
     93 
     94 	if (!bfd_get_section_contents(ibfd, sect,
     95 				 reinterpret_cast<unsigned char *>(contents),
     96 				 static_cast<file_ptr>(0), debuglink_size)) {
     97 		bfd_perror("bfd_get_section_contents:get_debug:");
     98 		exit(2);
     99 	}
    100 
    101 	/* CRC value is stored after the filename, aligned up to 4 bytes. */
    102 	size_t filename_len = strlen(contents);
    103 	size_t crc_offset = filename_len + 1;
    104 	crc_offset = (crc_offset + 3) & ~3;
    105 
    106 	crc32 = bfd_get_32(ibfd,
    107 			       reinterpret_cast<bfd_byte *>(contents + crc_offset));
    108 	filename = string(contents, filename_len);
    109 	cverb << vbfd << ".gnu_debuglink filename is " << filename << endl;
    110 	return true;
    111 }
    112 
    113 
    114 /**
    115  * With Objective C, we'll get strings like:
    116  *
    117  * _i_GSUnicodeString__rangeOfCharacterSetFromSet_options_range
    118  *
    119  * for the symbol name, and:
    120  * -[GSUnicodeString rangeOfCharacterFromSet:options:range:]
    121  *
    122  * for the function name, so we have to do some looser matching
    123  * than for other languages (unfortunately, it's not possible
    124  * to demangle Objective C symbols).
    125  */
    126 bool objc_match(string const & sym, string const & method)
    127 {
    128 	if (method.length() < 3)
    129 		return false;
    130 
    131 	string mangled;
    132 
    133 	if (is_prefix(method, "-[")) {
    134 		mangled += "_i_";
    135 	} else if (is_prefix(method, "+[")) {
    136 		mangled += "_c_";
    137 	} else {
    138 		return false;
    139 	}
    140 
    141 	string::const_iterator it = method.begin() + 2;
    142 	string::const_iterator const end = method.end();
    143 
    144 	bool found_paren = false;
    145 
    146 	for (; it != end; ++it) {
    147 		switch (*it) {
    148 		case ' ':
    149 			mangled += '_';
    150 			if (!found_paren)
    151 				mangled += '_';
    152 			break;
    153 		case ':':
    154 			mangled += '_';
    155 			break;
    156 		case ')':
    157 		case ']':
    158 			break;
    159 		case '(':
    160 			found_paren = true;
    161 			mangled += '_';
    162 			break;
    163 		default:
    164 			mangled += *it;
    165 		}
    166 	}
    167 
    168 	return sym == mangled;
    169 }
    170 
    171 
    172 /*
    173  * With a binary image where some objects are missing debug
    174  * info, we can end up attributing to a completely different
    175  * function (#484660): bfd_nearest_line() will happily move from one
    176  * symbol to the nearest one it can find with debug information.
    177  * To mitigate this problem, we check that the symbol name
    178  * matches the returned function name.
    179  *
    180  * However, this check fails in some cases it shouldn't:
    181  * Objective C, and C++ static inline functions (as discussed in
    182  * GCC bugzilla #11774). So, we have a looser check that
    183  * accepts merely a substring, plus some magic for Objective C.
    184  *
    185  * If even the loose check fails, then we give up.
    186  */
    187 bool is_correct_function(string const & function, string const & name)
    188 {
    189 	if (name == function)
    190 		return true;
    191 
    192 	if (objc_match(name, function))
    193 		return true;
    194 
    195 	// warn the user if we had to use the loose check
    196 	if (name.find(function) != string::npos) {
    197 		static bool warned = false;
    198 		if (!warned) {
    199 			cerr << "warning: some functions compiled without "
    200 			     << "debug information may have incorrect source "
    201 			     << "line attributions" << endl;
    202 				warned = true;
    203 		}
    204 		cverb << vbfd << "is_correct_function(" << function << ", "
    205 		      << name << ") fuzzy match." << endl;
    206 		return true;
    207 	}
    208 
    209 	return false;
    210 }
    211 
    212 
    213 /*
    214  * binutils 2.12 and below have a small bug where functions without a
    215  * debug entry at the prologue start do not give a useful line number
    216  * from bfd_find_nearest_line(). This can happen with certain gcc
    217  * versions such as 2.95.
    218  *
    219  * We work around this problem by scanning forward for a vma with valid
    220  * linenr info, if we can't get a valid line number.  Problem uncovered
    221  * by Norbert Kaufmann. The work-around decreases, on the tincas
    222  * application, the number of failure to retrieve linenr info from 835
    223  * to 173. Most of the remaining are c++ inline functions mainly from
    224  * the STL library. Fix #529622
    225  */
    226 void fixup_linenr(bfd * abfd, asection * section, asymbol ** syms,
    227 		  string const & name, bfd_vma pc,
    228                   char const ** filename, unsigned int * line)
    229 {
    230 	char const * cfilename;
    231 	char const * function;
    232 	unsigned int linenr;
    233 
    234 	// FIXME: looking at debug info for all gcc version shows than
    235 	// the same problems can -perhaps- occur for epilog code: find a
    236 	// samples files with samples in epilog and try opreport -l -g
    237 	// on it, check it also with opannotate.
    238 
    239 	// first restrict the search on a sensible range of vma, 16 is
    240 	// an intuitive value based on epilog code look
    241 	size_t max_search = 16;
    242 	size_t section_size = bfd_section_size(abfd, section);
    243 	if (pc + max_search > section_size)
    244 		max_search = section_size - pc;
    245 
    246 	for (size_t i = 1; i < max_search; ++i) {
    247 		bool ret = bfd_find_nearest_line(abfd, section, syms, pc + i,
    248 						 &cfilename, &function,
    249 						 &linenr);
    250 
    251 		if (ret && cfilename && function && linenr != 0
    252 		    && is_correct_function(function, name)) {
    253 			*filename = cfilename;
    254 			*line = linenr;
    255 			return;
    256 		}
    257 	}
    258 }
    259 
    260 
    261 } // namespace anon
    262 
    263 
    264 bfd * open_bfd(string const & file)
    265 {
    266 	/* bfd keeps its own reference to the filename char *,
    267 	 * so it must have a lifetime longer than the ibfd */
    268 	bfd * ibfd = bfd_openr(file.c_str(), NULL);
    269 	if (!ibfd) {
    270 		cverb << vbfd << "bfd_openr failed for " << file << endl;
    271 		return NULL;
    272 	}
    273 
    274 	check_format(file, &ibfd);
    275 
    276 	return ibfd;
    277 }
    278 
    279 
    280 bfd * fdopen_bfd(string const & file, int fd)
    281 {
    282 	/* bfd keeps its own reference to the filename char *,
    283 	 * so it must have a lifetime longer than the ibfd */
    284 	bfd * ibfd = bfd_fdopenr(file.c_str(), NULL, fd);
    285 	if (!ibfd) {
    286 		cverb << vbfd << "bfd_openr failed for " << file << endl;
    287 		return NULL;
    288 	}
    289 
    290 	check_format(file, &ibfd);
    291 
    292 	return ibfd;
    293 }
    294 
    295 
    296 bool find_separate_debug_file(bfd * ibfd, string const & filepath_in,
    297                               string & debug_filename, extra_images const & extra)
    298 {
    299 	string filepath(filepath_in);
    300 	string basename;
    301 	unsigned long crc32;
    302 
    303 	if (!get_debug_link_info(ibfd, basename, crc32))
    304 		return false;
    305 
    306 	// Work out the image file's directory prefix
    307 	string filedir = op_dirname(filepath);
    308 	// Make sure it starts with /
    309 	if (filedir.size() > 0 && filedir.at(filedir.size() - 1) != '/')
    310 		filedir += '/';
    311 
    312 	string first_try(filedir + ".debug/" + basename);
    313 	string second_try(DEBUGDIR + filedir + basename);
    314 	string third_try(filedir + basename);
    315 
    316 	cverb << vbfd << "looking for debugging file " << basename
    317 	      << " with crc32 = " << hex << crc32 << endl;
    318 
    319 	if (separate_debug_file_exists(first_try, crc32, extra))
    320 		debug_filename = first_try;
    321 	else if (separate_debug_file_exists(second_try, crc32, extra))
    322 		debug_filename = second_try;
    323 	else if (separate_debug_file_exists(third_try, crc32, extra))
    324 		debug_filename = third_try;
    325 	else
    326 		return false;
    327 
    328 	return true;
    329 }
    330 
    331 
    332 bool interesting_symbol(asymbol * sym)
    333 {
    334 	// #717720 some binutils are miscompiled by gcc 2.95, one of the
    335 	// typical symptom can be catched here.
    336 	if (!sym->section) {
    337 		ostringstream os;
    338 		os << "Your version of binutils seems to have a bug.\n"
    339 		   << "Read http://oprofile.sf.net/faq/#binutilsbug\n";
    340 		throw op_runtime_error(os.str());
    341 	}
    342 
    343 	if (!(sym->section->flags & SEC_CODE))
    344 		return false;
    345 
    346 	// returning true for fix up in op_bfd_symbol()
    347 	if (!sym->name || sym->name[0] == '\0')
    348 		return true;
    349 	/* ARM assembler internal mapping symbols aren't interesting */
    350 	if ((strcmp("$a", sym->name) == 0) ||
    351 	    (strcmp("$t", sym->name) == 0) ||
    352 	    (strcmp("$d", sym->name) == 0))
    353 		return false;
    354 
    355 	// C++ exception stuff
    356 	if (sym->name[0] == '.' && sym->name[1] == 'L')
    357 		return false;
    358 
    359 	/* This case cannot be moved to boring_symbol(),
    360 	 * because that's only used for duplicate VMAs,
    361 	 * and sometimes this symbol appears at an address
    362 	 * different from all other symbols.
    363 	 */
    364 	if (!strcmp("gcc2_compiled.", sym->name))
    365 		return false;
    366 
    367         if (sym->flags & BSF_SECTION_SYM)
    368                 return false;
    369 
    370 	if (!(sym->section->flags & SEC_LOAD))
    371 		return false;
    372 
    373 	return true;
    374 }
    375 
    376 
    377 bool boring_symbol(op_bfd_symbol const & first, op_bfd_symbol const & second)
    378 {
    379 	if (first.name() == "Letext")
    380 		return true;
    381 	else if (second.name() == "Letext")
    382 		return false;
    383 
    384 	if (first.name().substr(0, 2) == "??")
    385 		return true;
    386 	else if (second.name().substr(0, 2) == "??")
    387 		return false;
    388 
    389 	if (first.hidden() && !second.hidden())
    390 		return true;
    391 	else if (!first.hidden() && second.hidden())
    392 		return false;
    393 
    394 	if (first.name()[0] == '_' && second.name()[0] != '_')
    395 		return true;
    396 	else if (first.name()[0] != '_' && second.name()[0] == '_')
    397 		return false;
    398 
    399 	if (first.weak() && !second.weak())
    400 		return true;
    401 	else if (!first.weak() && second.weak())
    402 		return false;
    403 
    404 	return false;
    405 }
    406 
    407 
    408 bool bfd_info::has_debug_info() const
    409 {
    410 	if (!valid())
    411 		return false;
    412 
    413 	for (asection const * sect = abfd->sections; sect; sect = sect->next) {
    414 		if (sect->flags & SEC_DEBUGGING)
    415 			return true;
    416 	}
    417 
    418 	return false;
    419 }
    420 
    421 
    422 bfd_info::~bfd_info()
    423 {
    424 	free(synth_syms);
    425 	close();
    426 }
    427 
    428 
    429 void bfd_info::close()
    430 {
    431 	if (abfd)
    432 		bfd_close(abfd);
    433 }
    434 
    435 /**
    436  * This function is only called when processing symbols retrieved from a
    437  * debuginfo file that is separate from the actual runtime binary image.
    438  * Separate debuginfo files may be needed in two different cases:
    439  *   1) the real image is completely stripped, where there is no symbol
    440 	information at all
    441  *   2) the real image has debuginfo stripped, and the user is requesting "-g"
    442  *   (src file/line num info)
    443  * After all symbols are gathered up, there will be some filtering/removal of
    444  * unnecessary symbols.  In particular, the bfd_info::interesting_symbol()
    445  * function filters out symbols whose section's flag value does not include
    446  * SEC_LOAD.  This filtering is required, so it must be retained.  However,
    447  * we run into a problem with symbols from debuginfo files, since the
    448  * section flag does NOT include SEC_LOAD.  To solve this problem, the
    449  * translate_debuginfo_syms function maps the debuginfo symbol's sections to
    450  * that of their corresponding real image.
    451 */
    452 void bfd_info::translate_debuginfo_syms(asymbol ** dbg_syms, long nr_dbg_syms)
    453 {
    454 	unsigned int img_sect_cnt = 0;
    455 	bfd * image_bfd = image_bfd_info->abfd;
    456 	multimap<string, bfd_section *> image_sections;
    457 
    458 	for (bfd_section * sect = image_bfd->sections;
    459 	     sect && img_sect_cnt < image_bfd->section_count;
    460 	     sect = sect->next) {
    461 		// A comment section marks the end of the needed sections
    462 		if (strstr(sect->name, ".comment") == sect->name)
    463 			break;
    464 		image_sections.insert(pair<string, bfd_section *>(sect->name, sect));
    465 		img_sect_cnt++;
    466 	}
    467 
    468 	asymbol * sym = dbg_syms[0];
    469 	string prev_sect_name = "";
    470 	bfd_section * matched_section = NULL;
    471 	for (int i = 0; i < nr_dbg_syms; sym = dbg_syms[++i]) {
    472 		bool section_switch;
    473 
    474 		if (strcmp(prev_sect_name.c_str(), sym->section->name)) {
    475 			section_switch = true;
    476 			prev_sect_name = sym->section->name;
    477 		} else {
    478 			section_switch = false;
    479 		}
    480 		if (sym->section->owner && sym->section->owner == abfd) {
    481 			if (section_switch ) {
    482 				matched_section = NULL;
    483 				multimap<string, bfd_section *>::iterator it;
    484 				pair<multimap<string, bfd_section *>::iterator,
    485 				     multimap<string, bfd_section *>::iterator> range;
    486 
    487 				range = image_sections.equal_range(sym->section->name);
    488 				for (it = range.first; it != range.second; it++) {
    489 					if ((*it).second->vma == sym->section->vma) {
    490 						matched_section = (*it).second;
    491 						break;
    492 					}
    493 				}
    494 			}
    495 			if (matched_section) {
    496 				sym->section = matched_section;
    497 				sym->the_bfd = image_bfd;
    498 			}
    499 		}
    500 	}
    501 }
    502 
    503 #if SYNTHESIZE_SYMBOLS
    504 bool bfd_info::get_synth_symbols()
    505 {
    506 	extern const bfd_target bfd_elf64_powerpc_vec;
    507 	extern const bfd_target bfd_elf64_powerpcle_vec;
    508 	bool is_elf64_powerpc_target = (abfd->xvec == &bfd_elf64_powerpc_vec)
    509 		|| (abfd->xvec == &bfd_elf64_powerpcle_vec);
    510 
    511 	if (!is_elf64_powerpc_target)
    512 		return false;
    513 
    514 	void * buf;
    515 	uint tmp;
    516 	long nr_mini_syms = bfd_read_minisymbols(abfd, 0, &buf, &tmp);
    517 	if (nr_mini_syms < 1)
    518 		return false;
    519 
    520 	asymbol ** mini_syms = (asymbol **)buf;
    521 	buf = NULL;
    522 	bfd * synth_bfd;
    523 
    524 	/* For ppc64, a debuginfo file by itself does not hold enough symbol
    525 	 * information for us to properly attribute samples to symbols.  If
    526 	 * the image file's bfd has no symbols (as in a super-stripped library),
    527 	 * then we need to do the extra processing in translate_debuginfo_syms.
    528 	 */
    529 	if (image_bfd_info && image_bfd_info->nr_syms == 0) {
    530 		translate_debuginfo_syms(mini_syms, nr_mini_syms);
    531 		synth_bfd = image_bfd_info->abfd;
    532 	} else
    533 		synth_bfd = abfd;
    534 
    535 	long nr_synth_syms = bfd_get_synthetic_symtab(synth_bfd,
    536 	                                              nr_mini_syms,
    537 	                                              mini_syms, 0,
    538 	                                              NULL, &synth_syms);
    539 
    540 	if (nr_synth_syms < 0) {
    541 		free(mini_syms);
    542 		return false;
    543 	}
    544 
    545 	cverb << vbfd << "mini_syms: " << dec << nr_mini_syms << hex << endl;
    546 	cverb << vbfd << "synth_syms: " << dec << nr_synth_syms << hex << endl;
    547 
    548 	nr_syms = nr_mini_syms + nr_synth_syms;
    549 	syms.reset(new asymbol *[nr_syms + 1]);
    550 
    551 	for (size_t i = 0; i < (size_t)nr_mini_syms; ++i)
    552 		syms[i] = mini_syms[i];
    553 
    554 
    555 	for (size_t i = 0; i < (size_t)nr_synth_syms; ++i)
    556 		syms[nr_mini_syms + i] = synth_syms + i;
    557 
    558 
    559 	free(mini_syms);
    560 
    561 	// bfd_canonicalize_symtab does this, so shall we
    562 	syms[nr_syms] = NULL;
    563 
    564 	return true;
    565 }
    566 #else
    567 bool bfd_info::get_synth_symbols()
    568 {
    569 	return false;
    570 }
    571 #endif /* SYNTHESIZE_SYMBOLS */
    572 
    573 
    574 void bfd_info::get_symbols()
    575 {
    576 	if (!abfd)
    577 		return;
    578 
    579 	cverb << vbfd << "bfd_info::get_symbols() for "
    580 	      << bfd_get_filename(abfd) << endl;
    581 
    582 	if (get_synth_symbols())
    583 		return;
    584 
    585 	if (bfd_get_file_flags(abfd) & HAS_SYMS)
    586 		nr_syms = bfd_get_symtab_upper_bound(abfd);
    587 
    588 	cverb << vbfd << "bfd_get_symtab_upper_bound: " << dec
    589 	      << nr_syms << hex << endl;
    590 
    591 	nr_syms /= sizeof(asymbol *);
    592 
    593 	if (nr_syms < 1)
    594 		return;
    595 
    596 	syms.reset(new asymbol *[nr_syms]);
    597 
    598 	nr_syms = bfd_canonicalize_symtab(abfd, syms.get());
    599 
    600 	if (image_bfd_info)
    601 		translate_debuginfo_syms(syms.get(), nr_syms);
    602 
    603 	cverb << vbfd << "bfd_canonicalize_symtab: " << dec
    604 	      << nr_syms << hex << endl;
    605 }
    606 
    607 
    608 linenr_info const
    609 find_nearest_line(bfd_info const & b, op_bfd_symbol const & sym,
    610                   bfd_vma offset, bool anon_obj)
    611 {
    612 	char const * function = "";
    613 	char const * cfilename = "";
    614 	unsigned int linenr = 0;
    615 	linenr_info info;
    616 	bfd * abfd;
    617 	asymbol ** syms;
    618 	asection * section;
    619 	bfd_vma pc;
    620 	bool ret;
    621 
    622 	if (!b.valid())
    623 		goto fail;
    624 
    625 	// take care about artificial symbol
    626 	if (!sym.symbol())
    627 		goto fail;
    628 
    629 	abfd = b.abfd;
    630 	syms = b.syms.get();
    631 	if (!syms)
    632 		goto fail;
    633 	section = sym.symbol()->section;
    634 	if (anon_obj)
    635 		pc = offset - sym.symbol()->section->vma;
    636 	else
    637 		pc = (sym.value() + offset) - sym.filepos();
    638 
    639 	if ((bfd_get_section_flags(abfd, section) & SEC_ALLOC) == 0)
    640 		goto fail;
    641 
    642 	if (pc >= bfd_section_size(abfd, section))
    643 		goto fail;
    644 
    645 	ret = bfd_find_nearest_line(abfd, section, syms, pc, &cfilename,
    646 	                                 &function, &linenr);
    647 
    648 	if (!ret || !cfilename || !function)
    649 		goto fail;
    650 
    651 	/*
    652 	 * is_correct_function does not handle the case of static inlines,
    653 	 * but if the linenr is non-zero in the inline case, it is the correct
    654 	 * line number.
    655 	 */
    656 	if (linenr == 0 && !is_correct_function(function, sym.name()))
    657 		goto fail;
    658 
    659 	if (linenr == 0) {
    660 		fixup_linenr(abfd, section, syms, sym.name(), pc, &cfilename,
    661 		             &linenr);
    662 	}
    663 
    664 	info.found = true;
    665 	info.filename = cfilename;
    666 	info.line = linenr;
    667 	return info;
    668 
    669 fail:
    670 	info.found = false;
    671 	// some stl lacks string::clear()
    672 	info.filename.erase(info.filename.begin(), info.filename.end());
    673 	info.line = 0;
    674 	return info;
    675 }
    676