Home | History | Annotate | Download | only in util
      1 #define _FILE_OFFSET_BITS 64
      2 
      3 /* ANDROID_CHANGE_BEGIN */
      4 #ifdef __APPLE__
      5 #include "include/linux/kernel.h"
      6 #else
      7 #include <linux/kernel.h>
      8 #endif
      9 /* ANDROID_CHANGE_END */
     10 
     11 #include <byteswap.h>
     12 #include <unistd.h>
     13 #include <sys/types.h>
     14 #include <sys/mman.h>
     15 
     16 #include "evlist.h"
     17 #include "evsel.h"
     18 #include "session.h"
     19 #include "sort.h"
     20 #include "util.h"
     21 
     22 static int perf_session__open(struct perf_session *self, bool force)
     23 {
     24 	struct stat input_stat;
     25 
     26 	if (!strcmp(self->filename, "-")) {
     27 		self->fd_pipe = true;
     28 		self->fd = STDIN_FILENO;
     29 
     30 		if (perf_session__read_header(self, self->fd) < 0)
     31 			pr_err("incompatible file format");
     32 
     33 		return 0;
     34 	}
     35 
     36 	self->fd = open(self->filename, O_RDONLY);
     37 	if (self->fd < 0) {
     38 		int err = errno;
     39 
     40 		pr_err("failed to open %s: %s", self->filename, strerror(err));
     41 		if (err == ENOENT && !strcmp(self->filename, "perf.data"))
     42 			pr_err("  (try 'perf record' first)");
     43 		pr_err("\n");
     44 		return -errno;
     45 	}
     46 
     47 	if (fstat(self->fd, &input_stat) < 0)
     48 		goto out_close;
     49 
     50 	if (!force && input_stat.st_uid && (input_stat.st_uid != geteuid())) {
     51 		pr_err("file %s not owned by current user or root\n",
     52 		       self->filename);
     53 		goto out_close;
     54 	}
     55 
     56 	if (!input_stat.st_size) {
     57 		pr_info("zero-sized file (%s), nothing to do!\n",
     58 			self->filename);
     59 		goto out_close;
     60 	}
     61 
     62 	if (perf_session__read_header(self, self->fd) < 0) {
     63 		pr_err("incompatible file format");
     64 		goto out_close;
     65 	}
     66 
     67 	if (!perf_evlist__valid_sample_type(self->evlist)) {
     68 		pr_err("non matching sample_type");
     69 		goto out_close;
     70 	}
     71 
     72 	if (!perf_evlist__valid_sample_id_all(self->evlist)) {
     73 		pr_err("non matching sample_id_all");
     74 		goto out_close;
     75 	}
     76 
     77 	self->size = input_stat.st_size;
     78 	return 0;
     79 
     80 out_close:
     81 	close(self->fd);
     82 	self->fd = -1;
     83 	return -1;
     84 }
     85 
     86 static void perf_session__id_header_size(struct perf_session *session)
     87 {
     88        struct perf_sample *data;
     89        u64 sample_type = session->sample_type;
     90        u16 size = 0;
     91 
     92 	if (!session->sample_id_all)
     93 		goto out;
     94 
     95        if (sample_type & PERF_SAMPLE_TID)
     96                size += sizeof(data->tid) * 2;
     97 
     98        if (sample_type & PERF_SAMPLE_TIME)
     99                size += sizeof(data->time);
    100 
    101        if (sample_type & PERF_SAMPLE_ID)
    102                size += sizeof(data->id);
    103 
    104        if (sample_type & PERF_SAMPLE_STREAM_ID)
    105                size += sizeof(data->stream_id);
    106 
    107        if (sample_type & PERF_SAMPLE_CPU)
    108                size += sizeof(data->cpu) * 2;
    109 out:
    110        session->id_hdr_size = size;
    111 }
    112 
    113 void perf_session__update_sample_type(struct perf_session *self)
    114 {
    115 	self->sample_type = perf_evlist__sample_type(self->evlist);
    116 	self->sample_size = __perf_evsel__sample_size(self->sample_type);
    117 	self->sample_id_all = perf_evlist__sample_id_all(self->evlist);
    118 	perf_session__id_header_size(self);
    119 }
    120 
    121 int perf_session__create_kernel_maps(struct perf_session *self)
    122 {
    123 	int ret = machine__create_kernel_maps(&self->host_machine);
    124 
    125 	if (ret >= 0)
    126 		ret = machines__create_guest_kernel_maps(&self->machines);
    127 	return ret;
    128 }
    129 
    130 static void perf_session__destroy_kernel_maps(struct perf_session *self)
    131 {
    132 	machine__destroy_kernel_maps(&self->host_machine);
    133 	machines__destroy_guest_kernel_maps(&self->machines);
    134 }
    135 
    136 struct perf_session *perf_session__new(const char *filename, int mode,
    137 				       bool force, bool repipe,
    138 				       struct perf_event_ops *ops)
    139 {
    140 	size_t len = filename ? strlen(filename) + 1 : 0;
    141 	struct perf_session *self = zalloc(sizeof(*self) + len);
    142 
    143 	if (self == NULL)
    144 		goto out;
    145 
    146 	memcpy(self->filename, filename, len);
    147 	self->threads = RB_ROOT;
    148 	INIT_LIST_HEAD(&self->dead_threads);
    149 	self->last_match = NULL;
    150 	/*
    151 	 * On 64bit we can mmap the data file in one go. No need for tiny mmap
    152 	 * slices. On 32bit we use 32MB.
    153 	 */
    154 #if BITS_PER_LONG == 64
    155 	self->mmap_window = ULLONG_MAX;
    156 #else
    157 	self->mmap_window = 32 * 1024 * 1024ULL;
    158 #endif
    159 	self->machines = RB_ROOT;
    160 	self->repipe = repipe;
    161 	INIT_LIST_HEAD(&self->ordered_samples.samples);
    162 	INIT_LIST_HEAD(&self->ordered_samples.sample_cache);
    163 	INIT_LIST_HEAD(&self->ordered_samples.to_free);
    164 	machine__init(&self->host_machine, "", HOST_KERNEL_ID);
    165 
    166 	if (mode == O_RDONLY) {
    167 		if (perf_session__open(self, force) < 0)
    168 			goto out_delete;
    169 		perf_session__update_sample_type(self);
    170 	} else if (mode == O_WRONLY) {
    171 		/*
    172 		 * In O_RDONLY mode this will be performed when reading the
    173 		 * kernel MMAP event, in perf_event__process_mmap().
    174 		 */
    175 		if (perf_session__create_kernel_maps(self) < 0)
    176 			goto out_delete;
    177 	}
    178 
    179 	if (ops && ops->ordering_requires_timestamps &&
    180 	    ops->ordered_samples && !self->sample_id_all) {
    181 		dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n");
    182 		ops->ordered_samples = false;
    183 	}
    184 
    185 out:
    186 	return self;
    187 out_delete:
    188 	perf_session__delete(self);
    189 	return NULL;
    190 }
    191 
    192 static void perf_session__delete_dead_threads(struct perf_session *self)
    193 {
    194 	struct thread *n, *t;
    195 
    196 	list_for_each_entry_safe(t, n, &self->dead_threads, node) {
    197 		list_del(&t->node);
    198 		thread__delete(t);
    199 	}
    200 }
    201 
    202 static void perf_session__delete_threads(struct perf_session *self)
    203 {
    204 	struct rb_node *nd = rb_first(&self->threads);
    205 
    206 	while (nd) {
    207 		struct thread *t = rb_entry(nd, struct thread, rb_node);
    208 
    209 		rb_erase(&t->rb_node, &self->threads);
    210 		nd = rb_next(nd);
    211 		thread__delete(t);
    212 	}
    213 }
    214 
    215 void perf_session__delete(struct perf_session *self)
    216 {
    217 	perf_session__destroy_kernel_maps(self);
    218 	perf_session__delete_dead_threads(self);
    219 	perf_session__delete_threads(self);
    220 	machine__exit(&self->host_machine);
    221 	close(self->fd);
    222 	free(self);
    223 }
    224 
    225 void perf_session__remove_thread(struct perf_session *self, struct thread *th)
    226 {
    227 	self->last_match = NULL;
    228 	rb_erase(&th->rb_node, &self->threads);
    229 	/*
    230 	 * We may have references to this thread, for instance in some hist_entry
    231 	 * instances, so just move them to a separate list.
    232 	 */
    233 	list_add_tail(&th->node, &self->dead_threads);
    234 }
    235 
    236 static bool symbol__match_parent_regex(struct symbol *sym)
    237 {
    238 	if (sym->name && !regexec(&parent_regex, sym->name, 0, NULL, 0))
    239 		return 1;
    240 
    241 	return 0;
    242 }
    243 
    244 int perf_session__resolve_callchain(struct perf_session *self,
    245 				    struct thread *thread,
    246 				    struct ip_callchain *chain,
    247 				    struct symbol **parent)
    248 {
    249 	u8 cpumode = PERF_RECORD_MISC_USER;
    250 	unsigned int i;
    251 	int err;
    252 
    253 	callchain_cursor_reset(&self->callchain_cursor);
    254 
    255 	for (i = 0; i < chain->nr; i++) {
    256 		u64 ip = chain->ips[i];
    257 		struct addr_location al;
    258 
    259 		if (ip >= PERF_CONTEXT_MAX) {
    260 			switch (ip) {
    261 			case PERF_CONTEXT_HV:
    262 				cpumode = PERF_RECORD_MISC_HYPERVISOR;	break;
    263 			case PERF_CONTEXT_KERNEL:
    264 				cpumode = PERF_RECORD_MISC_KERNEL;	break;
    265 			case PERF_CONTEXT_USER:
    266 				cpumode = PERF_RECORD_MISC_USER;	break;
    267 			default:
    268 				break;
    269 			}
    270 			continue;
    271 		}
    272 
    273 		al.filtered = false;
    274 		thread__find_addr_location(thread, self, cpumode,
    275 				MAP__FUNCTION, thread->pid, ip, &al, NULL);
    276 		if (al.sym != NULL) {
    277 			if (sort__has_parent && !*parent &&
    278 			    symbol__match_parent_regex(al.sym))
    279 				*parent = al.sym;
    280 			if (!symbol_conf.use_callchain)
    281 				break;
    282 		}
    283 
    284 		err = callchain_cursor_append(&self->callchain_cursor,
    285 					      ip, al.map, al.sym);
    286 		if (err)
    287 			return err;
    288 	}
    289 
    290 	return 0;
    291 }
    292 
    293 static int process_event_synth_stub(union perf_event *event __used,
    294 				    struct perf_session *session __used)
    295 {
    296 	dump_printf(": unhandled!\n");
    297 	return 0;
    298 }
    299 
    300 static int process_event_sample_stub(union perf_event *event __used,
    301 				     struct perf_sample *sample __used,
    302 				     struct perf_evsel *evsel __used,
    303 				     struct perf_session *session __used)
    304 {
    305 	dump_printf(": unhandled!\n");
    306 	return 0;
    307 }
    308 
    309 static int process_event_stub(union perf_event *event __used,
    310 			      struct perf_sample *sample __used,
    311 			      struct perf_session *session __used)
    312 {
    313 	dump_printf(": unhandled!\n");
    314 	return 0;
    315 }
    316 
    317 static int process_finished_round_stub(union perf_event *event __used,
    318 				       struct perf_session *session __used,
    319 				       struct perf_event_ops *ops __used)
    320 {
    321 	dump_printf(": unhandled!\n");
    322 	return 0;
    323 }
    324 
    325 static int process_finished_round(union perf_event *event,
    326 				  struct perf_session *session,
    327 				  struct perf_event_ops *ops);
    328 
    329 static void perf_event_ops__fill_defaults(struct perf_event_ops *handler)
    330 {
    331 	if (handler->sample == NULL)
    332 		handler->sample = process_event_sample_stub;
    333 	if (handler->mmap == NULL)
    334 		handler->mmap = process_event_stub;
    335 	if (handler->comm == NULL)
    336 		handler->comm = process_event_stub;
    337 	if (handler->fork == NULL)
    338 		handler->fork = process_event_stub;
    339 	if (handler->exit == NULL)
    340 		handler->exit = process_event_stub;
    341 	if (handler->lost == NULL)
    342 		handler->lost = perf_event__process_lost;
    343 	if (handler->read == NULL)
    344 		handler->read = process_event_stub;
    345 	if (handler->throttle == NULL)
    346 		handler->throttle = process_event_stub;
    347 	if (handler->unthrottle == NULL)
    348 		handler->unthrottle = process_event_stub;
    349 	if (handler->attr == NULL)
    350 		handler->attr = process_event_synth_stub;
    351 	if (handler->event_type == NULL)
    352 		handler->event_type = process_event_synth_stub;
    353 	if (handler->tracing_data == NULL)
    354 		handler->tracing_data = process_event_synth_stub;
    355 	if (handler->build_id == NULL)
    356 		handler->build_id = process_event_synth_stub;
    357 	if (handler->finished_round == NULL) {
    358 		if (handler->ordered_samples)
    359 			handler->finished_round = process_finished_round;
    360 		else
    361 			handler->finished_round = process_finished_round_stub;
    362 	}
    363 }
    364 
    365 void mem_bswap_64(void *src, int byte_size)
    366 {
    367 	u64 *m = src;
    368 
    369 	while (byte_size > 0) {
    370 		*m = bswap_64(*m);
    371 		byte_size -= sizeof(u64);
    372 		++m;
    373 	}
    374 }
    375 
    376 static void perf_event__all64_swap(union perf_event *event)
    377 {
    378 	struct perf_event_header *hdr = &event->header;
    379 	mem_bswap_64(hdr + 1, event->header.size - sizeof(*hdr));
    380 }
    381 
    382 static void perf_event__comm_swap(union perf_event *event)
    383 {
    384 	event->comm.pid = bswap_32(event->comm.pid);
    385 	event->comm.tid = bswap_32(event->comm.tid);
    386 }
    387 
    388 static void perf_event__mmap_swap(union perf_event *event)
    389 {
    390 	event->mmap.pid	  = bswap_32(event->mmap.pid);
    391 	event->mmap.tid	  = bswap_32(event->mmap.tid);
    392 	event->mmap.start = bswap_64(event->mmap.start);
    393 	event->mmap.len	  = bswap_64(event->mmap.len);
    394 	event->mmap.pgoff = bswap_64(event->mmap.pgoff);
    395 }
    396 
    397 static void perf_event__task_swap(union perf_event *event)
    398 {
    399 	event->fork.pid	 = bswap_32(event->fork.pid);
    400 	event->fork.tid	 = bswap_32(event->fork.tid);
    401 	event->fork.ppid = bswap_32(event->fork.ppid);
    402 	event->fork.ptid = bswap_32(event->fork.ptid);
    403 	event->fork.time = bswap_64(event->fork.time);
    404 }
    405 
    406 static void perf_event__read_swap(union perf_event *event)
    407 {
    408 	event->read.pid		 = bswap_32(event->read.pid);
    409 	event->read.tid		 = bswap_32(event->read.tid);
    410 	event->read.value	 = bswap_64(event->read.value);
    411 	event->read.time_enabled = bswap_64(event->read.time_enabled);
    412 	event->read.time_running = bswap_64(event->read.time_running);
    413 	event->read.id		 = bswap_64(event->read.id);
    414 }
    415 
    416 /* exported for swapping attributes in file header */
    417 void perf_event__attr_swap(struct perf_event_attr *attr)
    418 {
    419 	attr->type		= bswap_32(attr->type);
    420 	attr->size		= bswap_32(attr->size);
    421 	attr->config		= bswap_64(attr->config);
    422 	attr->sample_period	= bswap_64(attr->sample_period);
    423 	attr->sample_type	= bswap_64(attr->sample_type);
    424 	attr->read_format	= bswap_64(attr->read_format);
    425 	attr->wakeup_events	= bswap_32(attr->wakeup_events);
    426 	attr->bp_type		= bswap_32(attr->bp_type);
    427 	attr->bp_addr		= bswap_64(attr->bp_addr);
    428 	attr->bp_len		= bswap_64(attr->bp_len);
    429 }
    430 
    431 static void perf_event__hdr_attr_swap(union perf_event *event)
    432 {
    433 	size_t size;
    434 
    435 	perf_event__attr_swap(&event->attr.attr);
    436 
    437 	size = event->header.size;
    438 	size -= (void *)&event->attr.id - (void *)event;
    439 	mem_bswap_64(event->attr.id, size);
    440 }
    441 
    442 static void perf_event__event_type_swap(union perf_event *event)
    443 {
    444 	event->event_type.event_type.event_id =
    445 		bswap_64(event->event_type.event_type.event_id);
    446 }
    447 
    448 static void perf_event__tracing_data_swap(union perf_event *event)
    449 {
    450 	event->tracing_data.size = bswap_32(event->tracing_data.size);
    451 }
    452 
    453 typedef void (*perf_event__swap_op)(union perf_event *event);
    454 
    455 static perf_event__swap_op perf_event__swap_ops[] = {
    456 	[PERF_RECORD_MMAP]		  = perf_event__mmap_swap,
    457 	[PERF_RECORD_COMM]		  = perf_event__comm_swap,
    458 	[PERF_RECORD_FORK]		  = perf_event__task_swap,
    459 	[PERF_RECORD_EXIT]		  = perf_event__task_swap,
    460 	[PERF_RECORD_LOST]		  = perf_event__all64_swap,
    461 	[PERF_RECORD_READ]		  = perf_event__read_swap,
    462 	[PERF_RECORD_SAMPLE]		  = perf_event__all64_swap,
    463 	[PERF_RECORD_HEADER_ATTR]	  = perf_event__hdr_attr_swap,
    464 	[PERF_RECORD_HEADER_EVENT_TYPE]	  = perf_event__event_type_swap,
    465 	[PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap,
    466 	[PERF_RECORD_HEADER_BUILD_ID]	  = NULL,
    467 	[PERF_RECORD_HEADER_MAX]	  = NULL,
    468 };
    469 
    470 struct sample_queue {
    471 	u64			timestamp;
    472 	u64			file_offset;
    473 	union perf_event	*event;
    474 	struct list_head	list;
    475 };
    476 
    477 static void perf_session_free_sample_buffers(struct perf_session *session)
    478 {
    479 	struct ordered_samples *os = &session->ordered_samples;
    480 
    481 	while (!list_empty(&os->to_free)) {
    482 		struct sample_queue *sq;
    483 
    484 		sq = list_entry(os->to_free.next, struct sample_queue, list);
    485 		list_del(&sq->list);
    486 		free(sq);
    487 	}
    488 }
    489 
    490 static int perf_session_deliver_event(struct perf_session *session,
    491 				      union perf_event *event,
    492 				      struct perf_sample *sample,
    493 				      struct perf_event_ops *ops,
    494 				      u64 file_offset);
    495 
    496 static void flush_sample_queue(struct perf_session *s,
    497 			       struct perf_event_ops *ops)
    498 {
    499 	struct ordered_samples *os = &s->ordered_samples;
    500 	struct list_head *head = &os->samples;
    501 	struct sample_queue *tmp, *iter;
    502 	struct perf_sample sample;
    503 	u64 limit = os->next_flush;
    504 	u64 last_ts = os->last_sample ? os->last_sample->timestamp : 0ULL;
    505 	int ret;
    506 
    507 	if (!ops->ordered_samples || !limit)
    508 		return;
    509 
    510 	list_for_each_entry_safe(iter, tmp, head, list) {
    511 		if (iter->timestamp > limit)
    512 			break;
    513 
    514 		ret = perf_session__parse_sample(s, iter->event, &sample);
    515 		if (ret)
    516 			pr_err("Can't parse sample, err = %d\n", ret);
    517 		else
    518 			perf_session_deliver_event(s, iter->event, &sample, ops,
    519 						   iter->file_offset);
    520 
    521 		os->last_flush = iter->timestamp;
    522 		list_del(&iter->list);
    523 		list_add(&iter->list, &os->sample_cache);
    524 	}
    525 
    526 	if (list_empty(head)) {
    527 		os->last_sample = NULL;
    528 	} else if (last_ts <= limit) {
    529 		os->last_sample =
    530 			list_entry(head->prev, struct sample_queue, list);
    531 	}
    532 }
    533 
    534 /*
    535  * When perf record finishes a pass on every buffers, it records this pseudo
    536  * event.
    537  * We record the max timestamp t found in the pass n.
    538  * Assuming these timestamps are monotonic across cpus, we know that if
    539  * a buffer still has events with timestamps below t, they will be all
    540  * available and then read in the pass n + 1.
    541  * Hence when we start to read the pass n + 2, we can safely flush every
    542  * events with timestamps below t.
    543  *
    544  *    ============ PASS n =================
    545  *       CPU 0         |   CPU 1
    546  *                     |
    547  *    cnt1 timestamps  |   cnt2 timestamps
    548  *          1          |         2
    549  *          2          |         3
    550  *          -          |         4  <--- max recorded
    551  *
    552  *    ============ PASS n + 1 ==============
    553  *       CPU 0         |   CPU 1
    554  *                     |
    555  *    cnt1 timestamps  |   cnt2 timestamps
    556  *          3          |         5
    557  *          4          |         6
    558  *          5          |         7 <---- max recorded
    559  *
    560  *      Flush every events below timestamp 4
    561  *
    562  *    ============ PASS n + 2 ==============
    563  *       CPU 0         |   CPU 1
    564  *                     |
    565  *    cnt1 timestamps  |   cnt2 timestamps
    566  *          6          |         8
    567  *          7          |         9
    568  *          -          |         10
    569  *
    570  *      Flush every events below timestamp 7
    571  *      etc...
    572  */
    573 static int process_finished_round(union perf_event *event __used,
    574 				  struct perf_session *session,
    575 				  struct perf_event_ops *ops)
    576 {
    577 	flush_sample_queue(session, ops);
    578 	session->ordered_samples.next_flush = session->ordered_samples.max_timestamp;
    579 
    580 	return 0;
    581 }
    582 
    583 /* The queue is ordered by time */
    584 static void __queue_event(struct sample_queue *new, struct perf_session *s)
    585 {
    586 	struct ordered_samples *os = &s->ordered_samples;
    587 	struct sample_queue *sample = os->last_sample;
    588 	u64 timestamp = new->timestamp;
    589 	struct list_head *p;
    590 
    591 	os->last_sample = new;
    592 
    593 	if (!sample) {
    594 		list_add(&new->list, &os->samples);
    595 		os->max_timestamp = timestamp;
    596 		return;
    597 	}
    598 
    599 	/*
    600 	 * last_sample might point to some random place in the list as it's
    601 	 * the last queued event. We expect that the new event is close to
    602 	 * this.
    603 	 */
    604 	if (sample->timestamp <= timestamp) {
    605 		while (sample->timestamp <= timestamp) {
    606 			p = sample->list.next;
    607 			if (p == &os->samples) {
    608 				list_add_tail(&new->list, &os->samples);
    609 				os->max_timestamp = timestamp;
    610 				return;
    611 			}
    612 			sample = list_entry(p, struct sample_queue, list);
    613 		}
    614 		list_add_tail(&new->list, &sample->list);
    615 	} else {
    616 		while (sample->timestamp > timestamp) {
    617 			p = sample->list.prev;
    618 			if (p == &os->samples) {
    619 				list_add(&new->list, &os->samples);
    620 				return;
    621 			}
    622 			sample = list_entry(p, struct sample_queue, list);
    623 		}
    624 		list_add(&new->list, &sample->list);
    625 	}
    626 }
    627 
    628 #define MAX_SAMPLE_BUFFER	(64 * 1024 / sizeof(struct sample_queue))
    629 
    630 static int perf_session_queue_event(struct perf_session *s, union perf_event *event,
    631 				    struct perf_sample *sample, u64 file_offset)
    632 {
    633 	struct ordered_samples *os = &s->ordered_samples;
    634 	struct list_head *sc = &os->sample_cache;
    635 	u64 timestamp = sample->time;
    636 	struct sample_queue *new;
    637 
    638 	if (!timestamp || timestamp == ~0ULL)
    639 		return -ETIME;
    640 
    641 	if (timestamp < s->ordered_samples.last_flush) {
    642 		printf("Warning: Timestamp below last timeslice flush\n");
    643 		return -EINVAL;
    644 	}
    645 
    646 	if (!list_empty(sc)) {
    647 		new = list_entry(sc->next, struct sample_queue, list);
    648 		list_del(&new->list);
    649 	} else if (os->sample_buffer) {
    650 		new = os->sample_buffer + os->sample_buffer_idx;
    651 		if (++os->sample_buffer_idx == MAX_SAMPLE_BUFFER)
    652 			os->sample_buffer = NULL;
    653 	} else {
    654 		os->sample_buffer = malloc(MAX_SAMPLE_BUFFER * sizeof(*new));
    655 		if (!os->sample_buffer)
    656 			return -ENOMEM;
    657 		list_add(&os->sample_buffer->list, &os->to_free);
    658 		os->sample_buffer_idx = 2;
    659 		new = os->sample_buffer + 1;
    660 	}
    661 
    662 	new->timestamp = timestamp;
    663 	new->file_offset = file_offset;
    664 	new->event = event;
    665 
    666 	__queue_event(new, s);
    667 
    668 	return 0;
    669 }
    670 
    671 static void callchain__printf(struct perf_sample *sample)
    672 {
    673 	unsigned int i;
    674 
    675 	printf("... chain: nr:%" PRIu64 "\n", sample->callchain->nr);
    676 
    677 	for (i = 0; i < sample->callchain->nr; i++)
    678 		printf("..... %2d: %016" PRIx64 "\n",
    679 		       i, sample->callchain->ips[i]);
    680 }
    681 
    682 static void perf_session__print_tstamp(struct perf_session *session,
    683 				       union perf_event *event,
    684 				       struct perf_sample *sample)
    685 {
    686 	if (event->header.type != PERF_RECORD_SAMPLE &&
    687 	    !session->sample_id_all) {
    688 		fputs("-1 -1 ", stdout);
    689 		return;
    690 	}
    691 
    692 	if ((session->sample_type & PERF_SAMPLE_CPU))
    693 		printf("%u ", sample->cpu);
    694 
    695 	if (session->sample_type & PERF_SAMPLE_TIME)
    696 		printf("%" PRIu64 " ", sample->time);
    697 }
    698 
    699 static void dump_event(struct perf_session *session, union perf_event *event,
    700 		       u64 file_offset, struct perf_sample *sample)
    701 {
    702 	if (!dump_trace)
    703 		return;
    704 
    705 	printf("\n%#" PRIx64 " [%#x]: event: %d\n",
    706 	       file_offset, event->header.size, event->header.type);
    707 
    708 	trace_event(event);
    709 
    710 	if (sample)
    711 		perf_session__print_tstamp(session, event, sample);
    712 
    713 	printf("%#" PRIx64 " [%#x]: PERF_RECORD_%s", file_offset,
    714 	       event->header.size, perf_event__name(event->header.type));
    715 }
    716 
    717 static void dump_sample(struct perf_session *session, union perf_event *event,
    718 			struct perf_sample *sample)
    719 {
    720 	if (!dump_trace)
    721 		return;
    722 
    723 	printf("(IP, %d): %d/%d: %#" PRIx64 " period: %" PRIu64 "\n",
    724 	       event->header.misc, sample->pid, sample->tid, sample->ip,
    725 	       sample->period);
    726 
    727 	if (session->sample_type & PERF_SAMPLE_CALLCHAIN)
    728 		callchain__printf(sample);
    729 }
    730 
    731 static int perf_session_deliver_event(struct perf_session *session,
    732 				      union perf_event *event,
    733 				      struct perf_sample *sample,
    734 				      struct perf_event_ops *ops,
    735 				      u64 file_offset)
    736 {
    737 	struct perf_evsel *evsel;
    738 
    739 	dump_event(session, event, file_offset, sample);
    740 
    741 	switch (event->header.type) {
    742 	case PERF_RECORD_SAMPLE:
    743 		dump_sample(session, event, sample);
    744 		evsel = perf_evlist__id2evsel(session->evlist, sample->id);
    745 		if (evsel == NULL) {
    746 			++session->hists.stats.nr_unknown_id;
    747 			return -1;
    748 		}
    749 		return ops->sample(event, sample, evsel, session);
    750 	case PERF_RECORD_MMAP:
    751 		return ops->mmap(event, sample, session);
    752 	case PERF_RECORD_COMM:
    753 		return ops->comm(event, sample, session);
    754 	case PERF_RECORD_FORK:
    755 		return ops->fork(event, sample, session);
    756 	case PERF_RECORD_EXIT:
    757 		return ops->exit(event, sample, session);
    758 	case PERF_RECORD_LOST:
    759 		return ops->lost(event, sample, session);
    760 	case PERF_RECORD_READ:
    761 		return ops->read(event, sample, session);
    762 	case PERF_RECORD_THROTTLE:
    763 		return ops->throttle(event, sample, session);
    764 	case PERF_RECORD_UNTHROTTLE:
    765 		return ops->unthrottle(event, sample, session);
    766 	default:
    767 		++session->hists.stats.nr_unknown_events;
    768 		return -1;
    769 	}
    770 }
    771 
    772 static int perf_session__preprocess_sample(struct perf_session *session,
    773 					   union perf_event *event, struct perf_sample *sample)
    774 {
    775 	if (event->header.type != PERF_RECORD_SAMPLE ||
    776 	    !(session->sample_type & PERF_SAMPLE_CALLCHAIN))
    777 		return 0;
    778 
    779 	if (!ip_callchain__valid(sample->callchain, event)) {
    780 		pr_debug("call-chain problem with event, skipping it.\n");
    781 		++session->hists.stats.nr_invalid_chains;
    782 		session->hists.stats.total_invalid_chains += sample->period;
    783 		return -EINVAL;
    784 	}
    785 	return 0;
    786 }
    787 
    788 static int perf_session__process_user_event(struct perf_session *session, union perf_event *event,
    789 					    struct perf_event_ops *ops, u64 file_offset)
    790 {
    791 	dump_event(session, event, file_offset, NULL);
    792 
    793 	/* These events are processed right away */
    794 	switch (event->header.type) {
    795 	case PERF_RECORD_HEADER_ATTR:
    796 		return ops->attr(event, session);
    797 	case PERF_RECORD_HEADER_EVENT_TYPE:
    798 		return ops->event_type(event, session);
    799 	case PERF_RECORD_HEADER_TRACING_DATA:
    800 		/* setup for reading amidst mmap */
    801 		lseek(session->fd, file_offset, SEEK_SET);
    802 		return ops->tracing_data(event, session);
    803 	case PERF_RECORD_HEADER_BUILD_ID:
    804 		return ops->build_id(event, session);
    805 	case PERF_RECORD_FINISHED_ROUND:
    806 		return ops->finished_round(event, session, ops);
    807 	default:
    808 		return -EINVAL;
    809 	}
    810 }
    811 
    812 static int perf_session__process_event(struct perf_session *session,
    813 				       union perf_event *event,
    814 				       struct perf_event_ops *ops,
    815 				       u64 file_offset)
    816 {
    817 	struct perf_sample sample;
    818 	int ret;
    819 
    820 	if (session->header.needs_swap &&
    821 	    perf_event__swap_ops[event->header.type])
    822 		perf_event__swap_ops[event->header.type](event);
    823 
    824 	if (event->header.type >= PERF_RECORD_HEADER_MAX)
    825 		return -EINVAL;
    826 
    827 	hists__inc_nr_events(&session->hists, event->header.type);
    828 
    829 	if (event->header.type >= PERF_RECORD_USER_TYPE_START)
    830 		return perf_session__process_user_event(session, event, ops, file_offset);
    831 
    832 	/*
    833 	 * For all kernel events we get the sample data
    834 	 */
    835 	ret = perf_session__parse_sample(session, event, &sample);
    836 	if (ret)
    837 		return ret;
    838 
    839 	/* Preprocess sample records - precheck callchains */
    840 	if (perf_session__preprocess_sample(session, event, &sample))
    841 		return 0;
    842 
    843 	if (ops->ordered_samples) {
    844 		ret = perf_session_queue_event(session, event, &sample,
    845 					       file_offset);
    846 		if (ret != -ETIME)
    847 			return ret;
    848 	}
    849 
    850 	return perf_session_deliver_event(session, event, &sample, ops,
    851 					  file_offset);
    852 }
    853 
    854 void perf_event_header__bswap(struct perf_event_header *self)
    855 {
    856 	self->type = bswap_32(self->type);
    857 	self->misc = bswap_16(self->misc);
    858 	self->size = bswap_16(self->size);
    859 }
    860 
    861 static struct thread *perf_session__register_idle_thread(struct perf_session *self)
    862 {
    863 	struct thread *thread = perf_session__findnew(self, 0);
    864 
    865 	if (thread == NULL || thread__set_comm(thread, "swapper")) {
    866 		pr_err("problem inserting idle task.\n");
    867 		thread = NULL;
    868 	}
    869 
    870 	return thread;
    871 }
    872 
    873 static void perf_session__warn_about_errors(const struct perf_session *session,
    874 					    const struct perf_event_ops *ops)
    875 {
    876 	if (ops->lost == perf_event__process_lost &&
    877 	    session->hists.stats.total_lost != 0) {
    878 		ui__warning("Processed %" PRIu64 " events and LOST %" PRIu64
    879 			    "!\n\nCheck IO/CPU overload!\n\n",
    880 			    session->hists.stats.total_period,
    881 			    session->hists.stats.total_lost);
    882 	}
    883 
    884 	if (session->hists.stats.nr_unknown_events != 0) {
    885 		ui__warning("Found %u unknown events!\n\n"
    886 			    "Is this an older tool processing a perf.data "
    887 			    "file generated by a more recent tool?\n\n"
    888 			    "If that is not the case, consider "
    889 			    "reporting to linux-kernel (at) vger.kernel.org.\n\n",
    890 			    session->hists.stats.nr_unknown_events);
    891 	}
    892 
    893 	if (session->hists.stats.nr_unknown_id != 0) {
    894 		ui__warning("%u samples with id not present in the header\n",
    895 			    session->hists.stats.nr_unknown_id);
    896 	}
    897 
    898  	if (session->hists.stats.nr_invalid_chains != 0) {
    899  		ui__warning("Found invalid callchains!\n\n"
    900  			    "%u out of %u events were discarded for this reason.\n\n"
    901  			    "Consider reporting to linux-kernel (at) vger.kernel.org.\n\n",
    902  			    session->hists.stats.nr_invalid_chains,
    903  			    session->hists.stats.nr_events[PERF_RECORD_SAMPLE]);
    904  	}
    905 }
    906 
    907 #define session_done()	(*(volatile int *)(&session_done))
    908 volatile int session_done;
    909 
    910 static int __perf_session__process_pipe_events(struct perf_session *self,
    911 					       struct perf_event_ops *ops)
    912 {
    913 	union perf_event event;
    914 	uint32_t size;
    915 	int skip = 0;
    916 	u64 head;
    917 	int err;
    918 	void *p;
    919 
    920 	perf_event_ops__fill_defaults(ops);
    921 
    922 	head = 0;
    923 more:
    924 	err = readn(self->fd, &event, sizeof(struct perf_event_header));
    925 	if (err <= 0) {
    926 		if (err == 0)
    927 			goto done;
    928 
    929 		pr_err("failed to read event header\n");
    930 		goto out_err;
    931 	}
    932 
    933 	if (self->header.needs_swap)
    934 		perf_event_header__bswap(&event.header);
    935 
    936 	size = event.header.size;
    937 	if (size == 0)
    938 		size = 8;
    939 
    940 	p = &event;
    941 	p += sizeof(struct perf_event_header);
    942 
    943 	if (size - sizeof(struct perf_event_header)) {
    944 		err = readn(self->fd, p, size - sizeof(struct perf_event_header));
    945 		if (err <= 0) {
    946 			if (err == 0) {
    947 				pr_err("unexpected end of event stream\n");
    948 				goto done;
    949 			}
    950 
    951 			pr_err("failed to read event data\n");
    952 			goto out_err;
    953 		}
    954 	}
    955 
    956 	if (size == 0 ||
    957 	    (skip = perf_session__process_event(self, &event, ops, head)) < 0) {
    958 		dump_printf("%#" PRIx64 " [%#x]: skipping unknown header type: %d\n",
    959 			    head, event.header.size, event.header.type);
    960 		/*
    961 		 * assume we lost track of the stream, check alignment, and
    962 		 * increment a single u64 in the hope to catch on again 'soon'.
    963 		 */
    964 		if (unlikely(head & 7))
    965 			head &= ~7ULL;
    966 
    967 		size = 8;
    968 	}
    969 
    970 	head += size;
    971 
    972 	if (skip > 0)
    973 		head += skip;
    974 
    975 	if (!session_done())
    976 		goto more;
    977 done:
    978 	err = 0;
    979 out_err:
    980 	perf_session__warn_about_errors(self, ops);
    981 	perf_session_free_sample_buffers(self);
    982 	return err;
    983 }
    984 
    985 static union perf_event *
    986 fetch_mmaped_event(struct perf_session *session,
    987 		   u64 head, size_t mmap_size, char *buf)
    988 {
    989 	union perf_event *event;
    990 
    991 	/*
    992 	 * Ensure we have enough space remaining to read
    993 	 * the size of the event in the headers.
    994 	 */
    995 	if (head + sizeof(event->header) > mmap_size)
    996 		return NULL;
    997 
    998 	event = (union perf_event *)(buf + head);
    999 
   1000 	if (session->header.needs_swap)
   1001 		perf_event_header__bswap(&event->header);
   1002 
   1003 	if (head + event->header.size > mmap_size)
   1004 		return NULL;
   1005 
   1006 	return event;
   1007 }
   1008 
   1009 int __perf_session__process_events(struct perf_session *session,
   1010 				   u64 data_offset, u64 data_size,
   1011 				   u64 file_size, struct perf_event_ops *ops)
   1012 {
   1013 	u64 head, page_offset, file_offset, file_pos, progress_next;
   1014 	int err, mmap_prot, mmap_flags, map_idx = 0;
   1015 	struct ui_progress *progress;
   1016 	size_t	page_size, mmap_size;
   1017 	char *buf, *mmaps[8];
   1018 	union perf_event *event;
   1019 	uint32_t size;
   1020 
   1021 	perf_event_ops__fill_defaults(ops);
   1022 
   1023 	page_size = sysconf(_SC_PAGESIZE);
   1024 
   1025 	page_offset = page_size * (data_offset / page_size);
   1026 	file_offset = page_offset;
   1027 	head = data_offset - page_offset;
   1028 
   1029 	if (data_offset + data_size < file_size)
   1030 		file_size = data_offset + data_size;
   1031 
   1032 	progress_next = file_size / 16;
   1033 	progress = ui_progress__new("Processing events...", file_size);
   1034 	if (progress == NULL)
   1035 		return -1;
   1036 
   1037 	mmap_size = session->mmap_window;
   1038 	if (mmap_size > file_size)
   1039 		mmap_size = file_size;
   1040 
   1041 	memset(mmaps, 0, sizeof(mmaps));
   1042 
   1043 	mmap_prot  = PROT_READ;
   1044 	mmap_flags = MAP_SHARED;
   1045 
   1046 	if (session->header.needs_swap) {
   1047 		mmap_prot  |= PROT_WRITE;
   1048 		mmap_flags = MAP_PRIVATE;
   1049 	}
   1050 remap:
   1051 	buf = mmap(NULL, mmap_size, mmap_prot, mmap_flags, session->fd,
   1052 		   file_offset);
   1053 	if (buf == MAP_FAILED) {
   1054 		pr_err("failed to mmap file\n");
   1055 		err = -errno;
   1056 		goto out_err;
   1057 	}
   1058 	mmaps[map_idx] = buf;
   1059 	map_idx = (map_idx + 1) & (ARRAY_SIZE(mmaps) - 1);
   1060 	file_pos = file_offset + head;
   1061 
   1062 more:
   1063 	event = fetch_mmaped_event(session, head, mmap_size, buf);
   1064 	if (!event) {
   1065 		if (mmaps[map_idx]) {
   1066 			munmap(mmaps[map_idx], mmap_size);
   1067 			mmaps[map_idx] = NULL;
   1068 		}
   1069 
   1070 		page_offset = page_size * (head / page_size);
   1071 		file_offset += page_offset;
   1072 		head -= page_offset;
   1073 		goto remap;
   1074 	}
   1075 
   1076 	size = event->header.size;
   1077 
   1078 	if (size == 0 ||
   1079 	    perf_session__process_event(session, event, ops, file_pos) < 0) {
   1080 		dump_printf("%#" PRIx64 " [%#x]: skipping unknown header type: %d\n",
   1081 			    file_offset + head, event->header.size,
   1082 			    event->header.type);
   1083 		/*
   1084 		 * assume we lost track of the stream, check alignment, and
   1085 		 * increment a single u64 in the hope to catch on again 'soon'.
   1086 		 */
   1087 		if (unlikely(head & 7))
   1088 			head &= ~7ULL;
   1089 
   1090 		size = 8;
   1091 	}
   1092 
   1093 	head += size;
   1094 	file_pos += size;
   1095 
   1096 	if (file_pos >= progress_next) {
   1097 		progress_next += file_size / 16;
   1098 		ui_progress__update(progress, file_pos);
   1099 	}
   1100 
   1101 	if (file_pos < file_size)
   1102 		goto more;
   1103 
   1104 	err = 0;
   1105 	/* do the final flush for ordered samples */
   1106 	session->ordered_samples.next_flush = ULLONG_MAX;
   1107 	flush_sample_queue(session, ops);
   1108 out_err:
   1109 	ui_progress__delete(progress);
   1110 	perf_session__warn_about_errors(session, ops);
   1111 	perf_session_free_sample_buffers(session);
   1112 	return err;
   1113 }
   1114 
   1115 int perf_session__process_events(struct perf_session *self,
   1116 				 struct perf_event_ops *ops)
   1117 {
   1118 	int err;
   1119 
   1120 	if (perf_session__register_idle_thread(self) == NULL)
   1121 		return -ENOMEM;
   1122 
   1123 	if (!self->fd_pipe)
   1124 		err = __perf_session__process_events(self,
   1125 						     self->header.data_offset,
   1126 						     self->header.data_size,
   1127 						     self->size, ops);
   1128 	else
   1129 		err = __perf_session__process_pipe_events(self, ops);
   1130 
   1131 	return err;
   1132 }
   1133 
   1134 bool perf_session__has_traces(struct perf_session *self, const char *msg)
   1135 {
   1136 	if (!(self->sample_type & PERF_SAMPLE_RAW)) {
   1137 		pr_err("No trace sample to read. Did you call 'perf %s'?\n", msg);
   1138 		return false;
   1139 	}
   1140 
   1141 	return true;
   1142 }
   1143 
   1144 int perf_session__set_kallsyms_ref_reloc_sym(struct map **maps,
   1145 					     const char *symbol_name,
   1146 					     u64 addr)
   1147 {
   1148 	char *bracket;
   1149 	enum map_type i;
   1150 	struct ref_reloc_sym *ref;
   1151 
   1152 	ref = zalloc(sizeof(struct ref_reloc_sym));
   1153 	if (ref == NULL)
   1154 		return -ENOMEM;
   1155 
   1156 	ref->name = strdup(symbol_name);
   1157 	if (ref->name == NULL) {
   1158 		free(ref);
   1159 		return -ENOMEM;
   1160 	}
   1161 
   1162 	bracket = strchr(ref->name, ']');
   1163 	if (bracket)
   1164 		*bracket = '\0';
   1165 
   1166 	ref->addr = addr;
   1167 
   1168 	for (i = 0; i < MAP__NR_TYPES; ++i) {
   1169 		struct kmap *kmap = map__kmap(maps[i]);
   1170 		kmap->ref_reloc_sym = ref;
   1171 	}
   1172 
   1173 	return 0;
   1174 }
   1175 
   1176 size_t perf_session__fprintf_dsos(struct perf_session *self, FILE *fp)
   1177 {
   1178 	return __dsos__fprintf(&self->host_machine.kernel_dsos, fp) +
   1179 	       __dsos__fprintf(&self->host_machine.user_dsos, fp) +
   1180 	       machines__fprintf_dsos(&self->machines, fp);
   1181 }
   1182 
   1183 size_t perf_session__fprintf_dsos_buildid(struct perf_session *self, FILE *fp,
   1184 					  bool with_hits)
   1185 {
   1186 	size_t ret = machine__fprintf_dsos_buildid(&self->host_machine, fp, with_hits);
   1187 	return ret + machines__fprintf_dsos_buildid(&self->machines, fp, with_hits);
   1188 }
   1189 
   1190 size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp)
   1191 {
   1192 	struct perf_evsel *pos;
   1193 	size_t ret = fprintf(fp, "Aggregated stats:\n");
   1194 
   1195 	ret += hists__fprintf_nr_events(&session->hists, fp);
   1196 
   1197 	list_for_each_entry(pos, &session->evlist->entries, node) {
   1198 		ret += fprintf(fp, "%s stats:\n", event_name(pos));
   1199 		ret += hists__fprintf_nr_events(&pos->hists, fp);
   1200 	}
   1201 
   1202 	return ret;
   1203 }
   1204 
   1205 struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,
   1206 					      unsigned int type)
   1207 {
   1208 	struct perf_evsel *pos;
   1209 
   1210 	list_for_each_entry(pos, &session->evlist->entries, node) {
   1211 		if (pos->attr.type == type)
   1212 			return pos;
   1213 	}
   1214 	return NULL;
   1215 }
   1216 
   1217 void perf_session__print_symbols(union perf_event *event,
   1218 				struct perf_sample *sample,
   1219 				struct perf_session *session)
   1220 {
   1221 	struct addr_location al;
   1222 	const char *symname, *dsoname;
   1223 	struct callchain_cursor *cursor = &session->callchain_cursor;
   1224 	struct callchain_cursor_node *node;
   1225 
   1226 	if (perf_event__preprocess_sample(event, session, &al, sample,
   1227 					  NULL) < 0) {
   1228 		error("problem processing %d event, skipping it.\n",
   1229 			event->header.type);
   1230 		return;
   1231 	}
   1232 
   1233 	if (symbol_conf.use_callchain && sample->callchain) {
   1234 
   1235 		if (perf_session__resolve_callchain(session, al.thread,
   1236 						sample->callchain, NULL) != 0) {
   1237 			if (verbose)
   1238 				error("Failed to resolve callchain. Skipping\n");
   1239 			return;
   1240 		}
   1241 		callchain_cursor_commit(cursor);
   1242 
   1243 		while (1) {
   1244 			node = callchain_cursor_current(cursor);
   1245 			if (!node)
   1246 				break;
   1247 
   1248 			if (node->sym && node->sym->name)
   1249 				symname = node->sym->name;
   1250 			else
   1251 				symname = "";
   1252 
   1253 			if (node->map && node->map->dso && node->map->dso->name)
   1254 				dsoname = node->map->dso->name;
   1255 			else
   1256 				dsoname = "";
   1257 
   1258 			printf("\t%16" PRIx64 " %s (%s)\n", node->ip, symname, dsoname);
   1259 
   1260 			callchain_cursor_advance(cursor);
   1261 		}
   1262 
   1263 	} else {
   1264 		if (al.sym && al.sym->name)
   1265 			symname = al.sym->name;
   1266 		else
   1267 			symname = "";
   1268 
   1269 		if (al.map && al.map->dso && al.map->dso->name)
   1270 			dsoname = al.map->dso->name;
   1271 		else
   1272 			dsoname = "";
   1273 
   1274 		printf("%16" PRIx64 " %s (%s)", al.addr, symname, dsoname);
   1275 	}
   1276 }
   1277