1 /** 2 * @file profile_spec.cpp 3 * Contains a PP profile specification 4 * 5 * @remark Copyright 2003 OProfile authors 6 * @remark Read the file COPYING 7 * 8 * @author Philippe Elie 9 */ 10 11 #include <algorithm> 12 #include <set> 13 #include <sstream> 14 #include <iterator> 15 #include <iostream> 16 #include <dirent.h> 17 18 #include "file_manip.h" 19 #include "op_config.h" 20 #include "profile_spec.h" 21 #include "string_manip.h" 22 #include "glob_filter.h" 23 #include "locate_images.h" 24 #include "op_exception.h" 25 #include "op_header.h" 26 #include "op_fileio.h" 27 28 using namespace std; 29 30 namespace { 31 32 // PP:3.7, full path, or relative path. If we can't find it, 33 // we should maintain the original to maintain the wordexp etc. 34 string const fixup_image_spec(string const & str, extra_images const & extra) 35 { 36 // On error find_image_path() return str, so if an occur we will 37 // use the provided image_name not the fixed one. 38 image_error error; 39 return extra.find_image_path(str, error, true); 40 } 41 42 void fixup_image_spec(vector<string> & images, extra_images const & extra) 43 { 44 vector<string>::iterator it = images.begin(); 45 vector<string>::iterator const end = images.end(); 46 47 for (; it != end; ++it) 48 *it = fixup_image_spec(*it, extra); 49 } 50 51 } // anon namespace 52 53 54 profile_spec::profile_spec() 55 : 56 extra_found_images() 57 { 58 parse_table["archive"] = &profile_spec::parse_archive_path; 59 parse_table["session"] = &profile_spec::parse_session; 60 parse_table["session-exclude"] = 61 &profile_spec::parse_session_exclude; 62 parse_table["image"] = &profile_spec::parse_image; 63 parse_table["image-exclude"] = &profile_spec::parse_image_exclude; 64 parse_table["lib-image"] = &profile_spec::parse_lib_image; 65 parse_table["event"] = &profile_spec::parse_event; 66 parse_table["count"] = &profile_spec::parse_count; 67 parse_table["unit-mask"] = &profile_spec::parse_unitmask; 68 parse_table["tid"] = &profile_spec::parse_tid; 69 parse_table["tgid"] = &profile_spec::parse_tgid; 70 parse_table["cpu"] = &profile_spec::parse_cpu; 71 } 72 73 74 void profile_spec::parse(string const & tag_value) 75 { 76 string value; 77 action_t action = get_handler(tag_value, value); 78 if (!action) { 79 throw invalid_argument("profile_spec::parse(): not " 80 "a valid tag \"" + tag_value + "\""); 81 } 82 83 (this->*action)(value); 84 } 85 86 87 bool profile_spec::is_valid_tag(string const & tag_value) 88 { 89 string value; 90 return get_handler(tag_value, value); 91 } 92 93 94 void profile_spec::set_image_or_lib_name(string const & str) 95 { 96 /* FIXME: what does spec say about this being allowed to be 97 * a comma list or not ? */ 98 image_or_lib_image.push_back(fixup_image_spec(str, extra_found_images)); 99 } 100 101 102 void profile_spec::parse_archive_path(string const & str) 103 { 104 archive_path = op_realpath(str); 105 } 106 107 108 string profile_spec::get_archive_path() const 109 { 110 return archive_path; 111 } 112 113 114 void profile_spec::parse_session(string const & str) 115 { 116 session = separate_token(str, ','); 117 } 118 119 120 void profile_spec::parse_session_exclude(string const & str) 121 { 122 session_exclude = separate_token(str, ','); 123 } 124 125 126 void profile_spec::parse_image(string const & str) 127 { 128 image = separate_token(str, ','); 129 fixup_image_spec(image, extra_found_images); 130 } 131 132 133 void profile_spec::parse_image_exclude(string const & str) 134 { 135 image_exclude = separate_token(str, ','); 136 fixup_image_spec(image_exclude, extra_found_images); 137 } 138 139 140 void profile_spec::parse_lib_image(string const & str) 141 { 142 lib_image = separate_token(str, ','); 143 fixup_image_spec(lib_image, extra_found_images); 144 } 145 146 147 void profile_spec::parse_event(string const & str) 148 { 149 event.set(str); 150 } 151 152 153 void profile_spec::parse_count(string const & str) 154 { 155 count.set(str); 156 } 157 158 159 void profile_spec::parse_unitmask(string const & str) 160 { 161 unitmask.set(str); 162 } 163 164 165 void profile_spec::parse_tid(string const & str) 166 { 167 tid.set(str); 168 } 169 170 171 void profile_spec::parse_tgid(string const & str) 172 { 173 tgid.set(str); 174 } 175 176 177 void profile_spec::parse_cpu(string const & str) 178 { 179 cpu.set(str); 180 } 181 182 183 profile_spec::action_t 184 profile_spec::get_handler(string const & tag_value, string & value) 185 { 186 string::size_type pos = tag_value.find_first_of(':'); 187 if (pos == string::npos) 188 return 0; 189 190 string tag(tag_value.substr(0, pos)); 191 value = tag_value.substr(pos + 1); 192 193 parse_table_t::const_iterator it = parse_table.find(tag); 194 if (it == parse_table.end()) 195 return 0; 196 197 return it->second; 198 } 199 200 201 namespace { 202 203 /// return true if the value from the profile spec may match the comma 204 /// list 205 template<typename T> 206 bool comma_match(comma_list<T> const & cl, generic_spec<T> const & value) 207 { 208 // if the profile spec is "all" we match the sample file 209 if (!cl.is_set()) 210 return true; 211 212 // an "all" sample file should never match specified profile 213 // spec values 214 if (!value.is_set()) 215 return false; 216 217 // now match each profile spec value against the sample file 218 return cl.match(value.value()); 219 } 220 221 } 222 223 224 bool profile_spec::match(filename_spec const & spec) const 225 { 226 bool matched_by_image_or_lib_image = false; 227 228 // We need the true image name not the one based on the sample 229 // filename for the benefit of module which have /oprofile in their 230 // sample filename. This allow to specify profile spec based on the 231 // real name of the image, e.g. 'binary:*oprofile.ko' 232 string simage = fixup_image_spec(spec.image, extra_found_images); 233 string slib_image = fixup_image_spec(spec.lib_image, 234 extra_found_images); 235 236 // PP:3.19 237 if (!image_or_lib_image.empty()) { 238 glob_filter filter(image_or_lib_image, image_exclude); 239 if (filter.match(simage) || filter.match(slib_image)) 240 matched_by_image_or_lib_image = true; 241 } 242 243 if (!matched_by_image_or_lib_image) { 244 // PP:3.7 3.8 245 if (!image.empty()) { 246 glob_filter filter(image, image_exclude); 247 if (!filter.match(simage)) 248 return false; 249 } else if (!image_or_lib_image.empty()) { 250 // image.empty() means match all except if user 251 // specified image_or_lib_image 252 return false; 253 } 254 255 // PP:3.9 3.10 256 if (!lib_image.empty()) { 257 glob_filter filter(lib_image, image_exclude); 258 if (!filter.match(slib_image)) 259 return false; 260 } else if (image.empty() && !image_or_lib_image.empty()) { 261 // lib_image empty means match all except if user 262 // specified image_or_lib_image *or* we already 263 // matched this spec through image 264 return false; 265 } 266 } 267 268 if (!matched_by_image_or_lib_image) { 269 // if we don't match by image_or_lib_image we must try to 270 // exclude from spec, exclusion from image_or_lib_image has 271 // been handled above 272 vector<string> empty; 273 glob_filter filter(empty, image_exclude); 274 if (!filter.match(simage)) 275 return false; 276 if (!spec.lib_image.empty() && !filter.match(slib_image)) 277 return false; 278 } 279 280 if (!event.match(spec.event)) 281 return false; 282 283 if (!count.match(spec.count)) 284 return false; 285 286 if (!unitmask.match(spec.unitmask)) 287 return false; 288 289 if (!comma_match(cpu, spec.cpu)) 290 return false; 291 292 if (!comma_match(tid, spec.tid)) 293 return false; 294 295 if (!comma_match(tgid, spec.tgid)) 296 return false; 297 298 return true; 299 } 300 301 302 profile_spec profile_spec::create(list<string> const & args, 303 vector<string> const & image_path, 304 string const & root_path) 305 { 306 profile_spec spec; 307 set<string> tag_seen; 308 vector<string> temp_image_or_lib; 309 310 list<string>::const_iterator it = args.begin(); 311 list<string>::const_iterator end = args.end(); 312 313 for (; it != end; ++it) { 314 if (spec.is_valid_tag(*it)) { 315 if (tag_seen.find(*it) != tag_seen.end()) { 316 throw op_runtime_error("tag specified " 317 "more than once: " + *it); 318 } 319 tag_seen.insert(*it); 320 spec.parse(*it); 321 } else { 322 string const file = op_realpath(*it); 323 temp_image_or_lib.push_back(file); 324 } 325 } 326 327 // PP:3.5 no session given means use the current session. 328 if (spec.session.empty()) 329 spec.session.push_back("current"); 330 331 bool ok = true; 332 vector<string>::const_iterator ip_it = image_path.begin(); 333 for ( ; ip_it != image_path.end(); ++ip_it) { 334 if (!is_directory(spec.get_archive_path() + "/" + *ip_it)) { 335 cerr << spec.get_archive_path() + "/" + *ip_it << " isn't a valid directory\n"; 336 ok = false; 337 } 338 } 339 if (!ok) 340 throw op_runtime_error("invalid --image-path= options"); 341 342 spec.extra_found_images.populate(image_path, spec.get_archive_path(), 343 root_path); 344 vector<string>::const_iterator im = temp_image_or_lib.begin(); 345 vector<string>::const_iterator last = temp_image_or_lib.end(); 346 for (; im != last; ++im) 347 spec.set_image_or_lib_name(*im); 348 349 return spec; 350 } 351 352 namespace { 353 354 vector<string> filter_session(vector<string> const & session, 355 vector<string> const & session_exclude) 356 { 357 vector<string> result(session); 358 359 if (result.empty()) 360 result.push_back("current"); 361 362 for (size_t i = 0 ; i < session_exclude.size() ; ++i) { 363 // FIXME: would we use fnmatch on each item, are we allowed 364 // to --session=current* ? 365 vector<string>::iterator it = 366 find(result.begin(), result.end(), session_exclude[i]); 367 368 if (it != result.end()) 369 result.erase(it); 370 } 371 372 return result; 373 } 374 375 static bool invalid_sample_file; 376 bool valid_candidate(string const & base_dir, string const & filename, 377 profile_spec const & spec, bool exclude_dependent, 378 bool exclude_cg) 379 { 380 if (exclude_cg && filename.find("{cg}") != string::npos) 381 return false; 382 383 // strip out non sample files 384 string const & sub = filename.substr(base_dir.size(), string::npos); 385 if (!is_prefix(sub, "/{root}/") && !is_prefix(sub, "/{kern}/")) 386 return false; 387 388 /* When overflows occur in the oprofile kernel driver's sample 389 * buffers (caused by too high of a sampling rate), it's possible 390 * for samples to be mis-attributed. A common scenario is that, 391 * while profiling process 'abc' running binary 'xzy', the task 392 * switch for 'abc' gets dropped somehow. Then, samples are taken 393 * for the 'xyz' binary. In the attempt to attribute the samples to 394 * the associated binary, the oprofile kernel code examines the 395 * the memory mappings for the last process for which it recorded 396 * a task switch. When profiling at a very high rate, the oprofile 397 * daemon is often the process that is mistakenly examined. Then the 398 * sample from binary 'xyz' is matched to some file that's open in 399 * oprofiled's memory space. Because oprofiled has many sample files 400 * open at any given time, there's a good chance the sample's VMA is 401 * contained within one of those sample files. So, once finding this 402 * bogus match, the oprofile kernel records a cookie switch for the 403 * sample file. This scenario is made even more likely if a high 404 * sampling rate (e.g., profiling on several events) is paired with 405 * callgraph data collection. 406 * 407 * When the daemon processes this sample data from the kernel, it 408 * creates a sample file for the sample file, resulting in something 409 * of the form: 410 * <session-dir>/[blah]<session-dir>/[blah] 411 * 412 * When the sample data is post-processed, the sample file is parsed to 413 * try to determine the name of the binary, but it gets horribly confused. 414 * At best, the post-processing tool will spit out some warning messages, 415 * such as: 416 * warning: 417 * /lib64/libdl-2.9.so/CYCLES.10000.0.all.all.all/{dep}/{root}/var/lib/oprofile/samples/current/{root}/lib64/libdl-2.9.so/{dep}/{root}/lib64/libdl-2.9.so/PM_RUN_CYC_GRP12.10000.0.all.all.all 418 * could not be found. 419 * 420 * At worst, the parsing may result in an "invalid argument" runtime error 421 * because of the inability to parse a sample file whose name contains that 422 * of another sample file. This typically seems to happen when callgraph 423 * data is being collected. 424 * 425 * The next several lines of code checks if the passed filename 426 * contains <session-dir>/samples; if so, we discard it as an 427 * invalid sample file. 428 */ 429 430 unsigned int j = base_dir.rfind('/'); 431 string session_samples_dir = base_dir.substr(0, j); 432 if (sub.find(session_samples_dir) != string::npos) { 433 invalid_sample_file = true; 434 return false; 435 } 436 437 // strip out generated JIT object files for samples of anonymous regions 438 if (is_jit_sample(sub)) 439 return false; 440 441 filename_spec file_spec(filename, spec.extra_found_images); 442 if (spec.match(file_spec)) { 443 if (exclude_dependent && file_spec.is_dependent()) 444 return false; 445 return true; 446 } 447 448 return false; 449 } 450 451 452 /** 453 * Print a warning message if we detect any sample buffer overflows 454 * occurred in the kernel driver. 455 */ 456 void warn_if_kern_buffs_overflow(string const & session_samples_dir) 457 { 458 DIR * dir; 459 struct dirent * dirent; 460 string stats_path; 461 int ret = 0; 462 463 stats_path = session_samples_dir + "stats/"; 464 ret = op_read_int_from_file((stats_path + "event_lost_overflow"). 465 c_str(), 0); 466 467 if (!(dir = opendir(stats_path.c_str()))) { 468 ret = -1; 469 goto done; 470 } 471 472 while ((dirent = readdir(dir)) && !ret) { 473 int cpu_nr; 474 string path; 475 if (sscanf(dirent->d_name, "cpu%d", &cpu_nr) != 1) 476 continue; 477 path = stats_path + dirent->d_name + "/"; 478 ret = op_read_int_from_file((path + "sample_lost_overflow"). 479 c_str(), 0); 480 } 481 closedir(dir); 482 483 done: 484 if (ret > 0) { 485 cerr << "WARNING! The OProfile kernel driver reports sample " 486 << "buffer overflows." << endl; 487 cerr << "Such overflows can result in incorrect sample attribution" 488 << ", invalid sample" << endl 489 << "files and other symptoms. " 490 << "See the oprofiled.log for details." << endl; 491 cerr << "You should adjust your sampling frequency to eliminate" 492 << " (or at least minimize)" << endl 493 << "these overflows." << endl; 494 } 495 } 496 497 498 } // anonymous namespace 499 500 501 list<string> profile_spec::generate_file_list(bool exclude_dependent, 502 bool exclude_cg) const 503 { 504 // FIXME: isn't remove_duplicates faster than doing this, then copy() ? 505 set<string> unique_files; 506 507 vector<string> sessions = filter_session(session, session_exclude); 508 509 if (sessions.empty()) { 510 ostringstream os; 511 os << "No session given\n" 512 << "included session was:\n"; 513 copy(session.begin(), session.end(), 514 ostream_iterator<string>(os, "\n")); 515 os << "excluded session was:\n"; 516 copy(session_exclude.begin(), session_exclude.end(), 517 ostream_iterator<string>(os, "\n")); 518 throw invalid_argument(os.str()); 519 } 520 521 bool found_file = false; 522 523 vector<string>::const_iterator cit = sessions.begin(); 524 vector<string>::const_iterator end = sessions.end(); 525 526 for (; cit != end; ++cit) { 527 if (cit->empty()) 528 continue; 529 530 string base_dir; 531 invalid_sample_file = false; 532 if ((*cit)[0] != '.' && (*cit)[0] != '/') 533 base_dir = archive_path + op_samples_dir; 534 base_dir += *cit; 535 536 base_dir = op_realpath(base_dir); 537 538 list<string> files; 539 create_file_list(files, base_dir, "*", true); 540 541 if (!files.empty()) { 542 found_file = true; 543 warn_if_kern_buffs_overflow(base_dir + "/"); 544 } 545 546 list<string>::const_iterator it = files.begin(); 547 list<string>::const_iterator fend = files.end(); 548 for (; it != fend; ++it) { 549 if (valid_candidate(base_dir, *it, *this, 550 exclude_dependent, exclude_cg)) { 551 unique_files.insert(*it); 552 } 553 } 554 if (invalid_sample_file) { 555 cerr << "Warning: Invalid sample files found in " 556 << base_dir << endl; 557 cerr << "This problem can be caused by too high of a sampling rate." 558 << endl; 559 } 560 } 561 562 if (!found_file) { 563 ostringstream os; 564 os << "No sample file found: try running opcontrol --dump\n" 565 << "or specify a session containing sample files\n"; 566 throw op_fatal_error(os.str()); 567 } 568 569 list<string> result; 570 copy(unique_files.begin(), unique_files.end(), back_inserter(result)); 571 572 return result; 573 } 574