Home | History | Annotate | Download | only in libpp
      1 /**
      2  * @file parse_filename.cpp
      3  * Split a sample filename into its constituent parts
      4  *
      5  * @remark Copyright 2003 OProfile authors
      6  * @remark Read the file COPYING
      7  *
      8  * @author Philippe Elie
      9  */
     10 
     11 #include <stdexcept>
     12 #include <vector>
     13 #include <string>
     14 #include <iostream>
     15 #include <sys/stat.h>
     16 
     17 #include "parse_filename.h"
     18 #include "file_manip.h"
     19 #include "string_manip.h"
     20 #include "locate_images.h"
     21 
     22 using namespace std;
     23 
     24 namespace {
     25 
     26 // PP:3.19 event_name.count.unitmask.tgid.tid.cpu
     27 parsed_filename parse_event_spec(string const & event_spec)
     28 {
     29 	typedef vector<string> parts_type;
     30 	typedef parts_type::size_type size_type;
     31 
     32 	size_type const nr_parts = 6;
     33 
     34 	parts_type parts = separate_token(event_spec, '.');
     35 
     36 	if (parts.size() != nr_parts) {
     37 		throw invalid_argument("parse_event_spec(): bad event specification: " + event_spec);
     38 	}
     39 
     40 	for (size_type i = 0; i < nr_parts ; ++i) {
     41 		if (parts[i].empty()) {
     42 			throw invalid_argument("parse_event_spec(): bad event specification: " + event_spec);
     43 		}
     44 	}
     45 
     46 	parsed_filename result;
     47 
     48 	size_type i = 0;
     49 	result.event = parts[i++];
     50 	result.count = parts[i++];
     51 	result.unitmask = parts[i++];
     52 	result.tgid = parts[i++];
     53 	result.tid = parts[i++];
     54 	result.cpu = parts[i++];
     55 
     56 	return result;
     57 }
     58 
     59 
     60 /**
     61  * @param component  path component
     62  *
     63  * remove from path_component all directory left to {root}, {kern} or {anon}
     64  */
     65 void remove_base_dir(vector<string> & path)
     66 {
     67 	vector<string>::iterator it;
     68 	for (it = path.begin(); it != path.end(); ++it) {
     69 		if (*it == "{root}" || *it == "{kern}"  || *it == "{anon}")
     70 			break;
     71 	}
     72 
     73 	path.erase(path.begin(), it);
     74 }
     75 
     76 
     77 /// Handle an anon region. Pretty print the details.
     78 /// The second argument is the anon portion of the path which will
     79 /// contain extra details such as the anon region name (unknown, vdso, heap etc.)
     80 string const parse_anon(string const & str, string const & str2)
     81 {
     82 	string name = str2;
     83 	// Get rid of "{anon:
     84 	name.erase(0, 6);
     85 	// Catch the case where we end up with an empty string.  This should
     86 	// never happen, except where things have gone awfully bad with profile
     87 	// data collection, resulting in one or more bogus sample files.
     88 	if(0 == name.size())
     89 		throw invalid_argument("parse_anon() invalid name: " + str2 + "\n"
     90 			+ "This error indicates your sample data is suspect. It is "
     91 			+ "recommended you do a --reset and collect new profile data.");
     92 	// Get rid of the trailing '}'
     93 	name.erase(name.size() - 1, 1);
     94 	vector<string> parts = separate_token(str, '.');
     95 	if (parts.size() != 3)
     96 		throw invalid_argument("parse_anon() invalid name: " + str);
     97 
     98 	string ret = name +" (tgid:";
     99 	ret += parts[0] + " range:" + parts[1] + "-" + parts[2] + ")";
    100 	return ret;
    101 }
    102 
    103 
    104 }  // anonymous namespace
    105 
    106 
    107 /*
    108  *  valid filename are variations on:
    109  *
    110  * {kern}/name/event_spec
    111  * {root}/path/to/bin/{dep}/{root}/path/to/bin/event_spec
    112  * {root}/path/to/bin/{dep}/{anon:anon}/pid.start.end/event_spec
    113  * {root}/path/to/bin/{dep}/{anon:[vdso]}/pid.start.end/event_spec
    114  * {root}/path/to/bin/{dep}/{kern}/name/event_spec
    115  * {root}/path/to/bin/{dep}/{root}/path/to/bin/{cg}/{root}/path/to/bin/event_spec
    116 
    117  *
    118  * where /name/ denote a unique path component
    119  */
    120 parsed_filename parse_filename(string const & filename,
    121 			       extra_images const & extra_found_images)
    122 {
    123 	struct stat st;
    124 
    125 	string::size_type pos = filename.find_last_of('/');
    126 	if (pos == string::npos) {
    127 		throw invalid_argument("parse_filename() invalid filename: " +
    128 				       filename);
    129 	}
    130 	string event_spec = filename.substr(pos + 1);
    131 	string filename_spec = filename.substr(0, pos);
    132 
    133 	parsed_filename result = parse_event_spec(event_spec);
    134 
    135 	result.filename = filename;
    136 
    137 	vector<string> path = separate_token(filename_spec, '/');
    138 
    139 	remove_base_dir(path);
    140 
    141 	// pp_interface PP:3.19 to PP:3.23 path must start either with {root}
    142 	// or {kern} and we must found at least 2 component, remove_base_dir()
    143 	// return an empty path if {root} or {kern} are not found
    144 	if (path.size() < 2) {
    145 		throw invalid_argument("parse_filename() invalid filename: " +
    146 				       filename);
    147 	}
    148 
    149 	size_t i;
    150 	for (i = 1 ; i < path.size() ; ++i) {
    151 		if (path[i] == "{dep}")
    152 			break;
    153 
    154 		result.image += "/" + path[i];
    155 	}
    156 
    157 	if (i == path.size()) {
    158 		throw invalid_argument("parse_filename() invalid filename: " +
    159 				       filename);
    160 	}
    161 
    162 	// skip "{dep}"
    163 	++i;
    164 
    165 	// PP:3.19 {dep}/ must be followed by {kern}/, {root}/ or {anon}/
    166 	if (path[i] != "{kern}" && path[i] != "{root}" &&
    167 	    path[i].find("{anon", 0) != 0) {
    168 		throw invalid_argument("parse_filename() invalid filename: " +
    169 				       filename);
    170 	}
    171 
    172 	bool anon = path[i].find("{anon:", 0) == 0;
    173 
    174 	// skip "{root}", "{kern}" or "{anon:.*}"
    175 	++i;
    176 
    177 	for (; i < path.size(); ++i) {
    178 		if (path[i] == "{cg}")
    179 			break;
    180 
    181 		if (anon) {
    182 			pos = filename_spec.rfind('.');
    183 			pos = filename_spec.rfind('.', pos-1);
    184 			if (pos == string::npos) {
    185 				throw invalid_argument("parse_filename() pid.addr.addr name expected: " +
    186 						       filename_spec);
    187 			}
    188 			string jitdump = filename_spec.substr(0, pos) + ".jo";
    189 			// if a jitdump file exists, we point to this file
    190 			if (!stat(jitdump.c_str(), &st)) {
    191 				// later code assumes an optional prefix path
    192 				// is stripped from the lib_image.
    193 				result.lib_image =
    194 					extra_found_images.strip_path_prefix(jitdump);
    195 				result.jit_dumpfile_exists = true;
    196 			} else {
    197 				result.lib_image =  parse_anon(path[i], path[i - 1]);
    198 			}
    199 			i++;
    200 			break;
    201 		} else {
    202 			result.lib_image += "/" + path[i];
    203 		}
    204 	}
    205 
    206 	if (i == path.size())
    207 		return result;
    208 
    209 	// skip "{cg}"
    210 	++i;
    211 	if (i == path.size() ||
    212 	    (path[i] != "{kern}" && path[i] != "{root}" &&
    213 	     path[i].find("{anon", 0) != 0)) {
    214 		throw invalid_argument("parse_filename() invalid filename: "
    215 		                       + filename);
    216 	}
    217 
    218 	// skip "{root}", "{kern}" or "{anon}"
    219 	anon = (path[i].find("{anon", 0) == 0);
    220 	++i;
    221 
    222 	if (anon) {
    223 		result.cg_image = parse_anon(path[i], path[i - 1]);
    224 		i++;
    225 	} else {
    226 		for (; i < path.size(); ++i)
    227 			result.cg_image += "/" + path[i];
    228 	}
    229 
    230 	return result;
    231 }
    232 
    233 bool parsed_filename::profile_spec_equal(parsed_filename const & parsed)
    234 {
    235 	return 	event == parsed.event &&
    236 		count == parsed.count &&
    237 		unitmask == parsed.unitmask &&
    238 		tgid == parsed.tgid &&
    239 		tid == parsed.tid &&
    240 		cpu == parsed.tid;
    241 }
    242 
    243 ostream & operator<<(ostream & out, parsed_filename const & data)
    244 {
    245 	out << data.filename << endl;
    246 	out << data.image << " " << data.lib_image << " "
    247 	    << data.event << " " << data.count << " "
    248 	    << data.unitmask << " " << data.tgid << " "
    249 	    << data.tid << " " << data.cpu << endl;
    250 
    251 	return out;
    252 }
    253