Home | History | Annotate | Download | only in libpp
      1 /**
      2  * @file profile_spec.cpp
      3  * Contains a PP profile specification
      4  *
      5  * @remark Copyright 2003 OProfile authors
      6  * @remark Read the file COPYING
      7  *
      8  * @author Philippe Elie
      9  */
     10 
     11 #include <algorithm>
     12 #include <set>
     13 #include <sstream>
     14 #include <iterator>
     15 #include <iostream>
     16 #include <dirent.h>
     17 
     18 #include "file_manip.h"
     19 #include "op_config.h"
     20 #include "profile_spec.h"
     21 #include "string_manip.h"
     22 #include "glob_filter.h"
     23 #include "locate_images.h"
     24 #include "op_exception.h"
     25 #include "op_header.h"
     26 #include "op_fileio.h"
     27 
     28 using namespace std;
     29 
     30 namespace {
     31 
     32 // PP:3.7, full path, or relative path. If we can't find it,
     33 // we should maintain the original to maintain the wordexp etc.
     34 string const fixup_image_spec(string const & str, extra_images const & extra)
     35 {
     36 	// On error find_image_path() return str, so if an occur we will
     37 	// use the provided image_name not the fixed one.
     38 	image_error error;
     39 	return extra.find_image_path(str, error, true);
     40 }
     41 
     42 void fixup_image_spec(vector<string> & images, extra_images const & extra)
     43 {
     44 	vector<string>::iterator it = images.begin();
     45 	vector<string>::iterator const end = images.end();
     46 
     47 	for (; it != end; ++it)
     48 		*it = fixup_image_spec(*it, extra);
     49 }
     50 
     51 }  // anon namespace
     52 
     53 
     54 profile_spec::profile_spec()
     55 	:
     56 	extra_found_images()
     57 {
     58 	parse_table["archive"] = &profile_spec::parse_archive_path;
     59 	parse_table["session"] = &profile_spec::parse_session;
     60 	parse_table["session-exclude"] =
     61 		&profile_spec::parse_session_exclude;
     62 	parse_table["image"] = &profile_spec::parse_image;
     63 	parse_table["image-exclude"] = &profile_spec::parse_image_exclude;
     64 	parse_table["lib-image"] = &profile_spec::parse_lib_image;
     65 	parse_table["event"] = &profile_spec::parse_event;
     66 	parse_table["count"] = &profile_spec::parse_count;
     67 	parse_table["unit-mask"] = &profile_spec::parse_unitmask;
     68 	parse_table["tid"] = &profile_spec::parse_tid;
     69 	parse_table["tgid"] = &profile_spec::parse_tgid;
     70 	parse_table["cpu"] = &profile_spec::parse_cpu;
     71 }
     72 
     73 
     74 void profile_spec::parse(string const & tag_value)
     75 {
     76 	string value;
     77 	action_t action = get_handler(tag_value, value);
     78 	if (!action) {
     79 		throw invalid_argument("profile_spec::parse(): not "
     80 				       "a valid tag \"" + tag_value + "\"");
     81 	}
     82 
     83 	(this->*action)(value);
     84 }
     85 
     86 
     87 bool profile_spec::is_valid_tag(string const & tag_value)
     88 {
     89 	string value;
     90 	return get_handler(tag_value, value);
     91 }
     92 
     93 
     94 void profile_spec::set_image_or_lib_name(string const & str)
     95 {
     96 	/* FIXME: what does spec say about this being allowed to be
     97 	 * a comma list or not ? */
     98 	image_or_lib_image.push_back(fixup_image_spec(str, extra_found_images));
     99 }
    100 
    101 
    102 void profile_spec::parse_archive_path(string const & str)
    103 {
    104 	archive_path = op_realpath(str);
    105 }
    106 
    107 
    108 string profile_spec::get_archive_path() const
    109 {
    110 	return archive_path;
    111 }
    112 
    113 
    114 void profile_spec::parse_session(string const & str)
    115 {
    116 	session = separate_token(str, ',');
    117 }
    118 
    119 
    120 void profile_spec::parse_session_exclude(string const & str)
    121 {
    122 	session_exclude = separate_token(str, ',');
    123 }
    124 
    125 
    126 void profile_spec::parse_image(string const & str)
    127 {
    128 	image = separate_token(str, ',');
    129 	fixup_image_spec(image, extra_found_images);
    130 }
    131 
    132 
    133 void profile_spec::parse_image_exclude(string const & str)
    134 {
    135 	image_exclude = separate_token(str, ',');
    136 	fixup_image_spec(image_exclude, extra_found_images);
    137 }
    138 
    139 
    140 void profile_spec::parse_lib_image(string const & str)
    141 {
    142 	lib_image = separate_token(str, ',');
    143 	fixup_image_spec(lib_image, extra_found_images);
    144 }
    145 
    146 
    147 void profile_spec::parse_event(string const & str)
    148 {
    149 	event.set(str);
    150 }
    151 
    152 
    153 void profile_spec::parse_count(string const & str)
    154 {
    155 	count.set(str);
    156 }
    157 
    158 
    159 void profile_spec::parse_unitmask(string const & str)
    160 {
    161 	unitmask.set(str);
    162 }
    163 
    164 
    165 void profile_spec::parse_tid(string const & str)
    166 {
    167 	tid.set(str);
    168 }
    169 
    170 
    171 void profile_spec::parse_tgid(string const & str)
    172 {
    173 	tgid.set(str);
    174 }
    175 
    176 
    177 void profile_spec::parse_cpu(string const & str)
    178 {
    179 	cpu.set(str);
    180 }
    181 
    182 
    183 profile_spec::action_t
    184 profile_spec::get_handler(string const & tag_value, string & value)
    185 {
    186 	string::size_type pos = tag_value.find_first_of(':');
    187 	if (pos == string::npos)
    188 		return 0;
    189 
    190 	string tag(tag_value.substr(0, pos));
    191 	value = tag_value.substr(pos + 1);
    192 
    193 	parse_table_t::const_iterator it = parse_table.find(tag);
    194 	if (it == parse_table.end())
    195 		return 0;
    196 
    197 	return it->second;
    198 }
    199 
    200 
    201 namespace {
    202 
    203 /// return true if the value from the profile spec may match the comma
    204 /// list
    205 template<typename T>
    206 bool comma_match(comma_list<T> const & cl, generic_spec<T> const & value)
    207 {
    208 	// if the profile spec is "all" we match the sample file
    209 	if (!cl.is_set())
    210 		return true;
    211 
    212 	// an "all" sample file should never match specified profile
    213 	// spec values
    214 	if (!value.is_set())
    215 		return false;
    216 
    217 	// now match each profile spec value against the sample file
    218 	return cl.match(value.value());
    219 }
    220 
    221 }
    222 
    223 
    224 bool profile_spec::match(filename_spec const & spec) const
    225 {
    226 	bool matched_by_image_or_lib_image = false;
    227 
    228 	// We need the true image name not the one based on the sample
    229 	// filename for the benefit of module which have /oprofile in their
    230 	// sample filename. This allow to specify profile spec based on the
    231 	// real name of the image, e.g. 'binary:*oprofile.ko'
    232 	string simage = fixup_image_spec(spec.image, extra_found_images);
    233 	string slib_image = fixup_image_spec(spec.lib_image,
    234 					     extra_found_images);
    235 
    236 	// PP:3.19
    237 	if (!image_or_lib_image.empty()) {
    238 		glob_filter filter(image_or_lib_image, image_exclude);
    239 		if (filter.match(simage) || filter.match(slib_image))
    240 			matched_by_image_or_lib_image = true;
    241 	}
    242 
    243 	if (!matched_by_image_or_lib_image) {
    244 		// PP:3.7 3.8
    245 		if (!image.empty()) {
    246 			glob_filter filter(image, image_exclude);
    247 			if (!filter.match(simage))
    248 				return false;
    249 		} else if (!image_or_lib_image.empty()) {
    250 			// image.empty() means match all except if user
    251 			// specified image_or_lib_image
    252 			return false;
    253 		}
    254 
    255 		// PP:3.9 3.10
    256 		if (!lib_image.empty()) {
    257 			glob_filter filter(lib_image, image_exclude);
    258 			if (!filter.match(slib_image))
    259 				return false;
    260 		} else if (image.empty() && !image_or_lib_image.empty()) {
    261 			// lib_image empty means match all except if user
    262 			// specified image_or_lib_image *or* we already
    263 			// matched this spec through image
    264 			return false;
    265 		}
    266 	}
    267 
    268 	if (!matched_by_image_or_lib_image) {
    269 		// if we don't match by image_or_lib_image we must try to
    270 		// exclude from spec, exclusion from image_or_lib_image has
    271 		// been handled above
    272 		vector<string> empty;
    273 		glob_filter filter(empty, image_exclude);
    274 		if (!filter.match(simage))
    275 			return false;
    276 		if (!spec.lib_image.empty() && !filter.match(slib_image))
    277 			return false;
    278 	}
    279 
    280 	if (!event.match(spec.event))
    281 		return false;
    282 
    283 	if (!count.match(spec.count))
    284 		return false;
    285 
    286 	if (!unitmask.match(spec.unitmask))
    287 		return false;
    288 
    289 	if (!comma_match(cpu, spec.cpu))
    290 		return false;
    291 
    292 	if (!comma_match(tid, spec.tid))
    293 		return false;
    294 
    295 	if (!comma_match(tgid, spec.tgid))
    296 		return false;
    297 
    298 	return true;
    299 }
    300 
    301 
    302 profile_spec profile_spec::create(list<string> const & args,
    303                                   vector<string> const & image_path,
    304 				  string const & root_path)
    305 {
    306 	profile_spec spec;
    307 	set<string> tag_seen;
    308 	vector<string> temp_image_or_lib;
    309 
    310 	list<string>::const_iterator it = args.begin();
    311 	list<string>::const_iterator end = args.end();
    312 
    313 	for (; it != end; ++it) {
    314 		if (spec.is_valid_tag(*it)) {
    315 			if (tag_seen.find(*it) != tag_seen.end()) {
    316 				throw op_runtime_error("tag specified "
    317 				       "more than once: " + *it);
    318 			}
    319 			tag_seen.insert(*it);
    320 			spec.parse(*it);
    321 		} else {
    322 			string const file = op_realpath(*it);
    323 			temp_image_or_lib.push_back(file);
    324 		}
    325 	}
    326 
    327 	// PP:3.5 no session given means use the current session.
    328 	if (spec.session.empty())
    329 		spec.session.push_back("current");
    330 
    331 	bool ok = true;
    332 	vector<string>::const_iterator ip_it = image_path.begin();
    333 	for ( ; ip_it != image_path.end(); ++ip_it) {
    334 		if (!is_directory(spec.get_archive_path() + "/" + *ip_it)) {
    335 			cerr << spec.get_archive_path() + "/" + *ip_it << " isn't a valid directory\n";
    336 			ok = false;
    337 		}
    338 	}
    339 	if (!ok)
    340 		throw op_runtime_error("invalid --image-path= options");
    341 
    342 	spec.extra_found_images.populate(image_path, spec.get_archive_path(),
    343 					 root_path);
    344 	vector<string>::const_iterator im = temp_image_or_lib.begin();
    345 	vector<string>::const_iterator last = temp_image_or_lib.end();
    346 	for (; im != last; ++im)
    347 		spec.set_image_or_lib_name(*im);
    348 
    349 	return spec;
    350 }
    351 
    352 namespace {
    353 
    354 vector<string> filter_session(vector<string> const & session,
    355 			      vector<string> const & session_exclude)
    356 {
    357 	vector<string> result(session);
    358 
    359 	if (result.empty())
    360 		result.push_back("current");
    361 
    362 	for (size_t i = 0 ; i < session_exclude.size() ; ++i) {
    363 		// FIXME: would we use fnmatch on each item, are we allowed
    364 		// to --session=current* ?
    365 		vector<string>::iterator it =
    366 			find(result.begin(), result.end(), session_exclude[i]);
    367 
    368 		if (it != result.end())
    369 			result.erase(it);
    370 	}
    371 
    372 	return result;
    373 }
    374 
    375 static bool invalid_sample_file;
    376 bool valid_candidate(string const & base_dir, string const & filename,
    377                      profile_spec const & spec, bool exclude_dependent,
    378                      bool exclude_cg)
    379 {
    380 	if (exclude_cg && filename.find("{cg}") != string::npos)
    381 		return false;
    382 
    383 	// strip out non sample files
    384 	string const & sub = filename.substr(base_dir.size(), string::npos);
    385 	if (!is_prefix(sub, "/{root}/") && !is_prefix(sub, "/{kern}/"))
    386 		return false;
    387 
    388 	/* When overflows occur in the oprofile kernel driver's sample
    389 	 * buffers (caused by too high of a sampling rate), it's possible
    390 	 * for samples to be mis-attributed.  A common scenario is that,
    391 	 * while profiling process 'abc' running binary 'xzy', the task
    392 	 * switch for 'abc' gets dropped somehow.  Then, samples are taken
    393 	 * for the 'xyz' binary.  In the attempt to attribute the samples to
    394 	 * the associated binary, the oprofile kernel code examines the
    395 	 * the memory mappings for the last process for which it recorded
    396 	 * a task switch.  When profiling at a very high rate, the oprofile
    397 	 * daemon is often the process that is mistakenly examined.  Then the
    398 	 * sample from binary 'xyz' is matched to some file that's open in
    399 	 * oprofiled's memory space.  Because oprofiled has many sample files
    400 	 * open at any given time, there's a good chance the sample's VMA is
    401 	 * contained within one of those sample files.  So, once finding this
    402 	 * bogus match, the oprofile kernel records a cookie switch for the
    403 	 * sample file.  This scenario is made even more likely if a high
    404 	 * sampling rate (e.g., profiling on several events) is paired with
    405 	 * callgraph data collection.
    406 	 *
    407 	 * When the daemon processes this sample data from the kernel, it
    408 	 * creates a sample file for the sample file, resulting in something
    409 	 * of the form:
    410 	 *    <session-dir>/[blah]<session-dir>/[blah]
    411 	 *
    412 	 * When the sample data is post-processed, the sample file is parsed to
    413 	 * try to determine the name of the binary, but it gets horribly confused.
    414 	 * At best, the post-processing tool will spit out some warning messages,
    415 	 * such as:
    416 	 * warning:
    417 	 * /lib64/libdl-2.9.so/CYCLES.10000.0.all.all.all/{dep}/{root}/var/lib/oprofile/samples/current/{root}/lib64/libdl-2.9.so/{dep}/{root}/lib64/libdl-2.9.so/PM_RUN_CYC_GRP12.10000.0.all.all.all
    418 	 * could not be found.
    419 	 *
    420 	 * At worst, the parsing may result in an "invalid argument" runtime error
    421 	 * because of the inability to parse a sample file whose name contains that
    422 	 * of another sample file.  This typically seems to happen when callgraph
    423 	 * data is being collected.
    424 	 *
    425 	 * The next several lines of code checks if the passed filename
    426 	 * contains <session-dir>/samples; if so, we discard it as an
    427 	 * invalid sample file.
    428 	 */
    429 
    430 	unsigned int j = base_dir.rfind('/');
    431 	string session_samples_dir = base_dir.substr(0, j);
    432 	if (sub.find(session_samples_dir) != string::npos) {
    433 		invalid_sample_file = true;
    434 		return false;
    435 	}
    436 
    437 	// strip out generated JIT object files for samples of anonymous regions
    438 	if (is_jit_sample(sub))
    439 		return false;
    440 
    441 	filename_spec file_spec(filename, spec.extra_found_images);
    442 	if (spec.match(file_spec)) {
    443 		if (exclude_dependent && file_spec.is_dependent())
    444 			return false;
    445 		return true;
    446 	}
    447 
    448 	return false;
    449 }
    450 
    451 
    452 /**
    453  * Print a warning message if we detect any sample buffer overflows
    454  * occurred in the kernel driver.
    455  */
    456 void warn_if_kern_buffs_overflow(string const & session_samples_dir)
    457 {
    458 	DIR * dir;
    459 	struct dirent * dirent;
    460 	string stats_path;
    461 	int ret = 0;
    462 
    463 	stats_path = session_samples_dir + "stats/";
    464 	ret = op_read_int_from_file((stats_path + "event_lost_overflow").
    465 				    c_str(), 0);
    466 
    467 	if (!(dir = opendir(stats_path.c_str()))) {
    468 		ret = -1;
    469 		goto done;
    470 	}
    471 
    472 	while ((dirent = readdir(dir)) && !ret) {
    473 		int cpu_nr;
    474 		string path;
    475 		if (sscanf(dirent->d_name, "cpu%d", &cpu_nr) != 1)
    476 			continue;
    477 		path = stats_path + dirent->d_name + "/";
    478 		ret = op_read_int_from_file((path + "sample_lost_overflow").
    479 					    c_str(), 0);
    480 	}
    481 	closedir(dir);
    482 
    483  done:
    484 	if (ret > 0) {
    485 		cerr << "WARNING! The OProfile kernel driver reports sample "
    486 		     << "buffer overflows." << endl;
    487 		cerr << "Such overflows can result in incorrect sample attribution"
    488 		     << ", invalid sample" << endl
    489 		     <<	"files and other symptoms.  "
    490 		     << "See the oprofiled.log for details." << endl;
    491 		cerr << "You should adjust your sampling frequency to eliminate"
    492 		     << " (or at least minimize)" << endl
    493 		     <<	"these overflows." << endl;
    494 	}
    495 }
    496 
    497 
    498 }  // anonymous namespace
    499 
    500 
    501 list<string> profile_spec::generate_file_list(bool exclude_dependent,
    502   bool exclude_cg) const
    503 {
    504 	// FIXME: isn't remove_duplicates faster than doing this, then copy() ?
    505 	set<string> unique_files;
    506 
    507 	vector<string> sessions = filter_session(session, session_exclude);
    508 
    509 	if (sessions.empty()) {
    510 		ostringstream os;
    511 		os << "No session given\n"
    512 		   << "included session was:\n";
    513 		copy(session.begin(), session.end(),
    514 		     ostream_iterator<string>(os, "\n"));
    515 		os << "excluded session was:\n";
    516 		copy(session_exclude.begin(), session_exclude.end(),
    517 		     ostream_iterator<string>(os, "\n"));
    518 		throw invalid_argument(os.str());
    519 	}
    520 
    521 	bool found_file = false;
    522 
    523 	vector<string>::const_iterator cit = sessions.begin();
    524 	vector<string>::const_iterator end = sessions.end();
    525 
    526 	for (; cit != end; ++cit) {
    527 		if (cit->empty())
    528 			continue;
    529 
    530 		string base_dir;
    531 		invalid_sample_file = false;
    532 		if ((*cit)[0] != '.' && (*cit)[0] != '/')
    533 			base_dir = archive_path + op_samples_dir;
    534 		base_dir += *cit;
    535 
    536 		base_dir = op_realpath(base_dir);
    537 
    538 		list<string> files;
    539 		create_file_list(files, base_dir, "*", true);
    540 
    541 		if (!files.empty()) {
    542 			found_file = true;
    543 			warn_if_kern_buffs_overflow(base_dir + "/");
    544 		}
    545 
    546 		list<string>::const_iterator it = files.begin();
    547 		list<string>::const_iterator fend = files.end();
    548 		for (; it != fend; ++it) {
    549 			if (valid_candidate(base_dir, *it, *this,
    550 			    exclude_dependent, exclude_cg)) {
    551 				unique_files.insert(*it);
    552 			}
    553 		}
    554 		if (invalid_sample_file) {
    555 			cerr << "Warning: Invalid sample files found in "
    556 			     << base_dir << endl;
    557 			cerr << "This problem can be caused by too high of a sampling rate."
    558 			     << endl;
    559 		}
    560 	}
    561 
    562 	if (!found_file) {
    563 		ostringstream os;
    564 		os  << "No sample file found: try running opcontrol --dump\n"
    565 		    << "or specify a session containing sample files\n";
    566 		throw op_fatal_error(os.str());
    567 	}
    568 
    569 	list<string> result;
    570 	copy(unique_files.begin(), unique_files.end(), back_inserter(result));
    571 
    572 	return result;
    573 }
    574