Home | History | Annotate | Download | only in linux-tools-perf
      1 #include "builtin.h"
      2 #include "perf.h"
      3 
      4 #include "util/util.h"
      5 #include "util/cache.h"
      6 #include "util/symbol.h"
      7 #include "util/thread.h"
      8 #include "util/header.h"
      9 #include "util/session.h"
     10 
     11 #include "util/parse-options.h"
     12 #include "util/trace-event.h"
     13 
     14 #include "util/debug.h"
     15 
     16 /* ANDROID_CHANGE_BEGIN */
     17 #ifndef __APPLE__
     18 #include <sys/prctl.h>
     19 #endif
     20 /* ANDROID_CHANGE_END */
     21 
     22 #include <semaphore.h>
     23 /* ANDROID_CHANGE_BEGIN */
     24 #ifdef __BIONIC__
     25 /* PTHREAD_STACK_MIN is defined as (2 * PAGE_SIZE) */
     26 #include <asm/page.h>
     27 #endif
     28 /* ANDROID_CHANGE_END */
     29 #include <pthread.h>
     30 #include <math.h>
     31 
     32 static char			const *input_name = "perf.data";
     33 
     34 static char			default_sort_order[] = "avg, max, switch, runtime";
     35 static const char		*sort_order = default_sort_order;
     36 
     37 static int			profile_cpu = -1;
     38 
     39 #define PR_SET_NAME		15               /* Set process name */
     40 #define MAX_CPUS		4096
     41 
     42 static u64			run_measurement_overhead;
     43 static u64			sleep_measurement_overhead;
     44 
     45 #define COMM_LEN		20
     46 #define SYM_LEN			129
     47 
     48 #define MAX_PID			65536
     49 
     50 static unsigned long		nr_tasks;
     51 
     52 struct sched_atom;
     53 
     54 struct task_desc {
     55 	unsigned long		nr;
     56 	unsigned long		pid;
     57 	char			comm[COMM_LEN];
     58 
     59 	unsigned long		nr_events;
     60 	unsigned long		curr_event;
     61 	struct sched_atom	**atoms;
     62 
     63 	pthread_t		thread;
     64 	sem_t			sleep_sem;
     65 
     66 	sem_t			ready_for_work;
     67 	sem_t			work_done_sem;
     68 
     69 	u64			cpu_usage;
     70 };
     71 
     72 enum sched_event_type {
     73 	SCHED_EVENT_RUN,
     74 	SCHED_EVENT_SLEEP,
     75 	SCHED_EVENT_WAKEUP,
     76 	SCHED_EVENT_MIGRATION,
     77 };
     78 
     79 struct sched_atom {
     80 	enum sched_event_type	type;
     81 	int			specific_wait;
     82 	u64			timestamp;
     83 	u64			duration;
     84 	unsigned long		nr;
     85 	sem_t			*wait_sem;
     86 	struct task_desc	*wakee;
     87 };
     88 
     89 static struct task_desc		*pid_to_task[MAX_PID];
     90 
     91 static struct task_desc		**tasks;
     92 
     93 static pthread_mutex_t		start_work_mutex = PTHREAD_MUTEX_INITIALIZER;
     94 static u64			start_time;
     95 
     96 static pthread_mutex_t		work_done_wait_mutex = PTHREAD_MUTEX_INITIALIZER;
     97 
     98 static unsigned long		nr_run_events;
     99 static unsigned long		nr_sleep_events;
    100 static unsigned long		nr_wakeup_events;
    101 
    102 static unsigned long		nr_sleep_corrections;
    103 static unsigned long		nr_run_events_optimized;
    104 
    105 static unsigned long		targetless_wakeups;
    106 static unsigned long		multitarget_wakeups;
    107 
    108 static u64			cpu_usage;
    109 static u64			runavg_cpu_usage;
    110 static u64			parent_cpu_usage;
    111 static u64			runavg_parent_cpu_usage;
    112 
    113 static unsigned long		nr_runs;
    114 static u64			sum_runtime;
    115 static u64			sum_fluct;
    116 static u64			run_avg;
    117 
    118 static unsigned int		replay_repeat = 10;
    119 static unsigned long		nr_timestamps;
    120 static unsigned long		nr_unordered_timestamps;
    121 static unsigned long		nr_state_machine_bugs;
    122 static unsigned long		nr_context_switch_bugs;
    123 static unsigned long		nr_events;
    124 static unsigned long		nr_lost_chunks;
    125 static unsigned long		nr_lost_events;
    126 
    127 #define TASK_STATE_TO_CHAR_STR "RSDTtZX"
    128 
    129 enum thread_state {
    130 	THREAD_SLEEPING = 0,
    131 	THREAD_WAIT_CPU,
    132 	THREAD_SCHED_IN,
    133 	THREAD_IGNORE
    134 };
    135 
    136 struct work_atom {
    137 	struct list_head	list;
    138 	enum thread_state	state;
    139 	u64			sched_out_time;
    140 	u64			wake_up_time;
    141 	u64			sched_in_time;
    142 	u64			runtime;
    143 };
    144 
    145 struct work_atoms {
    146 	struct list_head	work_list;
    147 	struct thread		*thread;
    148 	struct rb_node		node;
    149 	u64			max_lat;
    150 	u64			max_lat_at;
    151 	u64			total_lat;
    152 	u64			nb_atoms;
    153 	u64			total_runtime;
    154 };
    155 
    156 typedef int (*sort_fn_t)(struct work_atoms *, struct work_atoms *);
    157 
    158 static struct rb_root		atom_root, sorted_atom_root;
    159 
    160 static u64			all_runtime;
    161 static u64			all_count;
    162 
    163 
    164 static u64 get_nsecs(void)
    165 {
    166 	/* ANDROID_CHANGE_BEGIN */
    167 #ifndef __APPLE__
    168 	struct timespec ts;
    169 
    170 	clock_gettime(CLOCK_MONOTONIC, &ts);
    171 
    172 	return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
    173 #else
    174 	return 0;
    175 #endif
    176 	/* ANDROID_CHANGE_END */
    177 }
    178 
    179 static void burn_nsecs(u64 nsecs)
    180 {
    181 	u64 T0 = get_nsecs(), T1;
    182 
    183 	do {
    184 		T1 = get_nsecs();
    185 	} while (T1 + run_measurement_overhead < T0 + nsecs);
    186 }
    187 
    188 static void sleep_nsecs(u64 nsecs)
    189 {
    190 	struct timespec ts;
    191 
    192 	ts.tv_nsec = nsecs % 999999999;
    193 	ts.tv_sec = nsecs / 999999999;
    194 
    195 	nanosleep(&ts, NULL);
    196 }
    197 
    198 static void calibrate_run_measurement_overhead(void)
    199 {
    200 	u64 T0, T1, delta, min_delta = 1000000000ULL;
    201 	int i;
    202 
    203 	for (i = 0; i < 10; i++) {
    204 		T0 = get_nsecs();
    205 		burn_nsecs(0);
    206 		T1 = get_nsecs();
    207 		delta = T1-T0;
    208 		min_delta = min(min_delta, delta);
    209 	}
    210 	run_measurement_overhead = min_delta;
    211 
    212 	printf("run measurement overhead: %" PRIu64 " nsecs\n", min_delta);
    213 }
    214 
    215 static void calibrate_sleep_measurement_overhead(void)
    216 {
    217 	u64 T0, T1, delta, min_delta = 1000000000ULL;
    218 	int i;
    219 
    220 	for (i = 0; i < 10; i++) {
    221 		T0 = get_nsecs();
    222 		sleep_nsecs(10000);
    223 		T1 = get_nsecs();
    224 		delta = T1-T0;
    225 		min_delta = min(min_delta, delta);
    226 	}
    227 	min_delta -= 10000;
    228 	sleep_measurement_overhead = min_delta;
    229 
    230 	printf("sleep measurement overhead: %" PRIu64 " nsecs\n", min_delta);
    231 }
    232 
    233 static struct sched_atom *
    234 get_new_event(struct task_desc *task, u64 timestamp)
    235 {
    236 	struct sched_atom *event = zalloc(sizeof(*event));
    237 	unsigned long idx = task->nr_events;
    238 	size_t size;
    239 
    240 	event->timestamp = timestamp;
    241 	event->nr = idx;
    242 
    243 	task->nr_events++;
    244 	size = sizeof(struct sched_atom *) * task->nr_events;
    245 	task->atoms = realloc(task->atoms, size);
    246 	BUG_ON(!task->atoms);
    247 
    248 	task->atoms[idx] = event;
    249 
    250 	return event;
    251 }
    252 
    253 static struct sched_atom *last_event(struct task_desc *task)
    254 {
    255 	if (!task->nr_events)
    256 		return NULL;
    257 
    258 	return task->atoms[task->nr_events - 1];
    259 }
    260 
    261 static void
    262 add_sched_event_run(struct task_desc *task, u64 timestamp, u64 duration)
    263 {
    264 	struct sched_atom *event, *curr_event = last_event(task);
    265 
    266 	/*
    267 	 * optimize an existing RUN event by merging this one
    268 	 * to it:
    269 	 */
    270 	if (curr_event && curr_event->type == SCHED_EVENT_RUN) {
    271 		nr_run_events_optimized++;
    272 		curr_event->duration += duration;
    273 		return;
    274 	}
    275 
    276 	event = get_new_event(task, timestamp);
    277 
    278 	event->type = SCHED_EVENT_RUN;
    279 	event->duration = duration;
    280 
    281 	nr_run_events++;
    282 }
    283 
    284 static void
    285 add_sched_event_wakeup(struct task_desc *task, u64 timestamp,
    286 		       struct task_desc *wakee)
    287 {
    288 	struct sched_atom *event, *wakee_event;
    289 
    290 	event = get_new_event(task, timestamp);
    291 	event->type = SCHED_EVENT_WAKEUP;
    292 	event->wakee = wakee;
    293 
    294 	wakee_event = last_event(wakee);
    295 	if (!wakee_event || wakee_event->type != SCHED_EVENT_SLEEP) {
    296 		targetless_wakeups++;
    297 		return;
    298 	}
    299 	if (wakee_event->wait_sem) {
    300 		multitarget_wakeups++;
    301 		return;
    302 	}
    303 
    304 	wakee_event->wait_sem = zalloc(sizeof(*wakee_event->wait_sem));
    305 	sem_init(wakee_event->wait_sem, 0, 0);
    306 	wakee_event->specific_wait = 1;
    307 	event->wait_sem = wakee_event->wait_sem;
    308 
    309 	nr_wakeup_events++;
    310 }
    311 
    312 static void
    313 add_sched_event_sleep(struct task_desc *task, u64 timestamp,
    314 		      u64 task_state __used)
    315 {
    316 	struct sched_atom *event = get_new_event(task, timestamp);
    317 
    318 	event->type = SCHED_EVENT_SLEEP;
    319 
    320 	nr_sleep_events++;
    321 }
    322 
    323 static struct task_desc *register_pid(unsigned long pid, const char *comm)
    324 {
    325 	struct task_desc *task;
    326 
    327 	BUG_ON(pid >= MAX_PID);
    328 
    329 	task = pid_to_task[pid];
    330 
    331 	if (task)
    332 		return task;
    333 
    334 	task = zalloc(sizeof(*task));
    335 	task->pid = pid;
    336 	task->nr = nr_tasks;
    337 	strcpy(task->comm, comm);
    338 	/*
    339 	 * every task starts in sleeping state - this gets ignored
    340 	 * if there's no wakeup pointing to this sleep state:
    341 	 */
    342 	add_sched_event_sleep(task, 0, 0);
    343 
    344 	pid_to_task[pid] = task;
    345 	nr_tasks++;
    346 	tasks = realloc(tasks, nr_tasks*sizeof(struct task_task *));
    347 	BUG_ON(!tasks);
    348 	tasks[task->nr] = task;
    349 
    350 	if (verbose)
    351 		printf("registered task #%ld, PID %ld (%s)\n", nr_tasks, pid, comm);
    352 
    353 	return task;
    354 }
    355 
    356 
    357 static void print_task_traces(void)
    358 {
    359 	struct task_desc *task;
    360 	unsigned long i;
    361 
    362 	for (i = 0; i < nr_tasks; i++) {
    363 		task = tasks[i];
    364 		printf("task %6ld (%20s:%10ld), nr_events: %ld\n",
    365 			task->nr, task->comm, task->pid, task->nr_events);
    366 	}
    367 }
    368 
    369 static void add_cross_task_wakeups(void)
    370 {
    371 	struct task_desc *task1, *task2;
    372 	unsigned long i, j;
    373 
    374 	for (i = 0; i < nr_tasks; i++) {
    375 		task1 = tasks[i];
    376 		j = i + 1;
    377 		if (j == nr_tasks)
    378 			j = 0;
    379 		task2 = tasks[j];
    380 		add_sched_event_wakeup(task1, 0, task2);
    381 	}
    382 }
    383 
    384 static void
    385 process_sched_event(struct task_desc *this_task __used, struct sched_atom *atom)
    386 {
    387 	int ret = 0;
    388 
    389 	switch (atom->type) {
    390 		case SCHED_EVENT_RUN:
    391 			burn_nsecs(atom->duration);
    392 			break;
    393 		case SCHED_EVENT_SLEEP:
    394 			if (atom->wait_sem)
    395 				ret = sem_wait(atom->wait_sem);
    396 			BUG_ON(ret);
    397 			break;
    398 		case SCHED_EVENT_WAKEUP:
    399 			if (atom->wait_sem)
    400 				ret = sem_post(atom->wait_sem);
    401 			BUG_ON(ret);
    402 			break;
    403 		case SCHED_EVENT_MIGRATION:
    404 			break;
    405 		default:
    406 			BUG_ON(1);
    407 	}
    408 }
    409 
    410 static u64 get_cpu_usage_nsec_parent(void)
    411 {
    412 	struct rusage ru;
    413 	u64 sum;
    414 	int err;
    415 
    416 	err = getrusage(RUSAGE_SELF, &ru);
    417 	BUG_ON(err);
    418 
    419 	sum =  ru.ru_utime.tv_sec*1e9 + ru.ru_utime.tv_usec*1e3;
    420 	sum += ru.ru_stime.tv_sec*1e9 + ru.ru_stime.tv_usec*1e3;
    421 
    422 	return sum;
    423 }
    424 
    425 static int self_open_counters(void)
    426 {
    427 /* ANDROID_CHANGE_BEGIN */
    428 #ifndef __APPLE__
    429 	struct perf_event_attr attr;
    430 	int fd;
    431 
    432 	memset(&attr, 0, sizeof(attr));
    433 
    434 	attr.type = PERF_TYPE_SOFTWARE;
    435 	attr.config = PERF_COUNT_SW_TASK_CLOCK;
    436 
    437 	fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
    438 
    439 	if (fd < 0)
    440 		die("Error: sys_perf_event_open() syscall returned"
    441 		    "with %d (%s)\n", fd, strerror(errno));
    442 	return fd;
    443 #else
    444 	die("Error: sys_perf_event_open() syscall not supported on host\n");
    445 #endif
    446 /* ANDROID_CHANGE_END */
    447 }
    448 
    449 static u64 get_cpu_usage_nsec_self(int fd)
    450 {
    451 	u64 runtime;
    452 	int ret;
    453 
    454 	ret = read(fd, &runtime, sizeof(runtime));
    455 	BUG_ON(ret != sizeof(runtime));
    456 
    457 	return runtime;
    458 }
    459 
    460 static void *thread_func(void *ctx)
    461 {
    462 /* ANDROID_CHANGE_BEGIN */
    463 #ifndef __APPLE__
    464 /* ANDROID_CHANGE_END */
    465 	struct task_desc *this_task = ctx;
    466 	u64 cpu_usage_0, cpu_usage_1;
    467 	unsigned long i, ret;
    468 	char comm2[22];
    469 	int fd;
    470 
    471 	sprintf(comm2, ":%s", this_task->comm);
    472 	prctl(PR_SET_NAME, comm2);
    473 	fd = self_open_counters();
    474 
    475 again:
    476 	ret = sem_post(&this_task->ready_for_work);
    477 	BUG_ON(ret);
    478 	ret = pthread_mutex_lock(&start_work_mutex);
    479 	BUG_ON(ret);
    480 	ret = pthread_mutex_unlock(&start_work_mutex);
    481 	BUG_ON(ret);
    482 
    483 	cpu_usage_0 = get_cpu_usage_nsec_self(fd);
    484 
    485 	for (i = 0; i < this_task->nr_events; i++) {
    486 		this_task->curr_event = i;
    487 		process_sched_event(this_task, this_task->atoms[i]);
    488 	}
    489 
    490 	cpu_usage_1 = get_cpu_usage_nsec_self(fd);
    491 	this_task->cpu_usage = cpu_usage_1 - cpu_usage_0;
    492 	ret = sem_post(&this_task->work_done_sem);
    493 	BUG_ON(ret);
    494 
    495 	ret = pthread_mutex_lock(&work_done_wait_mutex);
    496 	BUG_ON(ret);
    497 	ret = pthread_mutex_unlock(&work_done_wait_mutex);
    498 	BUG_ON(ret);
    499 
    500 	goto again;
    501 /* ANDROID_CHANGE_BEGIN */
    502 #endif // __APPLE__
    503 /* ANDROID_CHANGE_END */
    504         /* ANDROID_CHANGE_BEGIN */
    505 #if defined(__BIONIC__) || defined(__APPLE__)
    506         return NULL;
    507 #endif
    508         /* ANDROID_CHANGE_END */
    509 }
    510 
    511 static void create_tasks(void)
    512 {
    513 	struct task_desc *task;
    514 	pthread_attr_t attr;
    515 	unsigned long i;
    516 	int err;
    517 
    518 	err = pthread_attr_init(&attr);
    519 	BUG_ON(err);
    520         /* ANDROID_CHANGE_BEGIN */
    521 #if 0
    522 	err = pthread_attr_setstacksize(&attr,
    523 			(size_t) max(16 * 1024, PTHREAD_STACK_MIN));
    524 #else
    525 	err = pthread_attr_setstacksize(&attr,
    526 			(size_t) max((unsigned) 16 * 1024, (unsigned) PTHREAD_STACK_MIN));
    527 #endif
    528         /* ANDROID_CHANGE_END */
    529 	BUG_ON(err);
    530 	err = pthread_mutex_lock(&start_work_mutex);
    531 	BUG_ON(err);
    532 	err = pthread_mutex_lock(&work_done_wait_mutex);
    533 	BUG_ON(err);
    534 	for (i = 0; i < nr_tasks; i++) {
    535 		task = tasks[i];
    536 		sem_init(&task->sleep_sem, 0, 0);
    537 		sem_init(&task->ready_for_work, 0, 0);
    538 		sem_init(&task->work_done_sem, 0, 0);
    539 		task->curr_event = 0;
    540 		err = pthread_create(&task->thread, &attr, thread_func, task);
    541 		BUG_ON(err);
    542 	}
    543 }
    544 
    545 static void wait_for_tasks(void)
    546 {
    547 	u64 cpu_usage_0, cpu_usage_1;
    548 	struct task_desc *task;
    549 	unsigned long i, ret;
    550 
    551 	start_time = get_nsecs();
    552 	cpu_usage = 0;
    553 	pthread_mutex_unlock(&work_done_wait_mutex);
    554 
    555 	for (i = 0; i < nr_tasks; i++) {
    556 		task = tasks[i];
    557 		ret = sem_wait(&task->ready_for_work);
    558 		BUG_ON(ret);
    559 		sem_init(&task->ready_for_work, 0, 0);
    560 	}
    561 	ret = pthread_mutex_lock(&work_done_wait_mutex);
    562 	BUG_ON(ret);
    563 
    564 	cpu_usage_0 = get_cpu_usage_nsec_parent();
    565 
    566 	pthread_mutex_unlock(&start_work_mutex);
    567 
    568 	for (i = 0; i < nr_tasks; i++) {
    569 		task = tasks[i];
    570 		ret = sem_wait(&task->work_done_sem);
    571 		BUG_ON(ret);
    572 		sem_init(&task->work_done_sem, 0, 0);
    573 		cpu_usage += task->cpu_usage;
    574 		task->cpu_usage = 0;
    575 	}
    576 
    577 	cpu_usage_1 = get_cpu_usage_nsec_parent();
    578 	if (!runavg_cpu_usage)
    579 		runavg_cpu_usage = cpu_usage;
    580 	runavg_cpu_usage = (runavg_cpu_usage*9 + cpu_usage)/10;
    581 
    582 	parent_cpu_usage = cpu_usage_1 - cpu_usage_0;
    583 	if (!runavg_parent_cpu_usage)
    584 		runavg_parent_cpu_usage = parent_cpu_usage;
    585 	runavg_parent_cpu_usage = (runavg_parent_cpu_usage*9 +
    586 				   parent_cpu_usage)/10;
    587 
    588 	ret = pthread_mutex_lock(&start_work_mutex);
    589 	BUG_ON(ret);
    590 
    591 	for (i = 0; i < nr_tasks; i++) {
    592 		task = tasks[i];
    593 		sem_init(&task->sleep_sem, 0, 0);
    594 		task->curr_event = 0;
    595 	}
    596 }
    597 
    598 static void run_one_test(void)
    599 {
    600 	u64 T0, T1, delta, avg_delta, fluct;
    601 
    602 	T0 = get_nsecs();
    603 	wait_for_tasks();
    604 	T1 = get_nsecs();
    605 
    606 	delta = T1 - T0;
    607 	sum_runtime += delta;
    608 	nr_runs++;
    609 
    610 	avg_delta = sum_runtime / nr_runs;
    611 	if (delta < avg_delta)
    612 		fluct = avg_delta - delta;
    613 	else
    614 		fluct = delta - avg_delta;
    615 	sum_fluct += fluct;
    616 	if (!run_avg)
    617 		run_avg = delta;
    618 	run_avg = (run_avg*9 + delta)/10;
    619 
    620 	printf("#%-3ld: %0.3f, ",
    621 		nr_runs, (double)delta/1000000.0);
    622 
    623 	printf("ravg: %0.2f, ",
    624 		(double)run_avg/1e6);
    625 
    626 	printf("cpu: %0.2f / %0.2f",
    627 		(double)cpu_usage/1e6, (double)runavg_cpu_usage/1e6);
    628 
    629 #if 0
    630 	/*
    631 	 * rusage statistics done by the parent, these are less
    632 	 * accurate than the sum_exec_runtime based statistics:
    633 	 */
    634 	printf(" [%0.2f / %0.2f]",
    635 		(double)parent_cpu_usage/1e6,
    636 		(double)runavg_parent_cpu_usage/1e6);
    637 #endif
    638 
    639 	printf("\n");
    640 
    641 	if (nr_sleep_corrections)
    642 		printf(" (%ld sleep corrections)\n", nr_sleep_corrections);
    643 	nr_sleep_corrections = 0;
    644 }
    645 
    646 static void test_calibrations(void)
    647 {
    648 	u64 T0, T1;
    649 
    650 	T0 = get_nsecs();
    651 	burn_nsecs(1e6);
    652 	T1 = get_nsecs();
    653 
    654 	printf("the run test took %" PRIu64 " nsecs\n", T1 - T0);
    655 
    656 	T0 = get_nsecs();
    657 	sleep_nsecs(1e6);
    658 	T1 = get_nsecs();
    659 
    660 	printf("the sleep test took %" PRIu64 " nsecs\n", T1 - T0);
    661 }
    662 
    663 #define FILL_FIELD(ptr, field, event, data)	\
    664 	ptr.field = (typeof(ptr.field)) raw_field_value(event, #field, data)
    665 
    666 #define FILL_ARRAY(ptr, array, event, data)			\
    667 do {								\
    668 	void *__array = raw_field_ptr(event, #array, data);	\
    669 	memcpy(ptr.array, __array, sizeof(ptr.array));	\
    670 } while(0)
    671 
    672 #define FILL_COMMON_FIELDS(ptr, event, data)			\
    673 do {								\
    674 	FILL_FIELD(ptr, common_type, event, data);		\
    675 	FILL_FIELD(ptr, common_flags, event, data);		\
    676 	FILL_FIELD(ptr, common_preempt_count, event, data);	\
    677 	FILL_FIELD(ptr, common_pid, event, data);		\
    678 	FILL_FIELD(ptr, common_tgid, event, data);		\
    679 } while (0)
    680 
    681 
    682 
    683 struct trace_switch_event {
    684 	u32 size;
    685 
    686 	u16 common_type;
    687 	u8 common_flags;
    688 	u8 common_preempt_count;
    689 	u32 common_pid;
    690 	u32 common_tgid;
    691 
    692 	char prev_comm[16];
    693 	u32 prev_pid;
    694 	u32 prev_prio;
    695 	u64 prev_state;
    696 	char next_comm[16];
    697 	u32 next_pid;
    698 	u32 next_prio;
    699 };
    700 
    701 struct trace_runtime_event {
    702 	u32 size;
    703 
    704 	u16 common_type;
    705 	u8 common_flags;
    706 	u8 common_preempt_count;
    707 	u32 common_pid;
    708 	u32 common_tgid;
    709 
    710 	char comm[16];
    711 	u32 pid;
    712 	u64 runtime;
    713 	u64 vruntime;
    714 };
    715 
    716 struct trace_wakeup_event {
    717 	u32 size;
    718 
    719 	u16 common_type;
    720 	u8 common_flags;
    721 	u8 common_preempt_count;
    722 	u32 common_pid;
    723 	u32 common_tgid;
    724 
    725 	char comm[16];
    726 	u32 pid;
    727 
    728 	u32 prio;
    729 	u32 success;
    730 	u32 cpu;
    731 };
    732 
    733 struct trace_fork_event {
    734 	u32 size;
    735 
    736 	u16 common_type;
    737 	u8 common_flags;
    738 	u8 common_preempt_count;
    739 	u32 common_pid;
    740 	u32 common_tgid;
    741 
    742 	char parent_comm[16];
    743 	u32 parent_pid;
    744 	char child_comm[16];
    745 	u32 child_pid;
    746 };
    747 
    748 struct trace_migrate_task_event {
    749 	u32 size;
    750 
    751 	u16 common_type;
    752 	u8 common_flags;
    753 	u8 common_preempt_count;
    754 	u32 common_pid;
    755 	u32 common_tgid;
    756 
    757 	char comm[16];
    758 	u32 pid;
    759 
    760 	u32 prio;
    761 	u32 cpu;
    762 };
    763 
    764 struct trace_sched_handler {
    765 	void (*switch_event)(struct trace_switch_event *,
    766 			     struct perf_session *,
    767 			     struct event *,
    768 			     int cpu,
    769 			     u64 timestamp,
    770 			     struct thread *thread);
    771 
    772 	void (*runtime_event)(struct trace_runtime_event *,
    773 			      struct perf_session *,
    774 			      struct event *,
    775 			      int cpu,
    776 			      u64 timestamp,
    777 			      struct thread *thread);
    778 
    779 	void (*wakeup_event)(struct trace_wakeup_event *,
    780 			     struct perf_session *,
    781 			     struct event *,
    782 			     int cpu,
    783 			     u64 timestamp,
    784 			     struct thread *thread);
    785 
    786 	void (*fork_event)(struct trace_fork_event *,
    787 			   struct event *,
    788 			   int cpu,
    789 			   u64 timestamp,
    790 			   struct thread *thread);
    791 
    792 	void (*migrate_task_event)(struct trace_migrate_task_event *,
    793 			   struct perf_session *session,
    794 			   struct event *,
    795 			   int cpu,
    796 			   u64 timestamp,
    797 			   struct thread *thread);
    798 };
    799 
    800 
    801 static void
    802 replay_wakeup_event(struct trace_wakeup_event *wakeup_event,
    803 		    struct perf_session *session __used,
    804 		    struct event *event,
    805 		    int cpu __used,
    806 		    u64 timestamp __used,
    807 		    struct thread *thread __used)
    808 {
    809 	struct task_desc *waker, *wakee;
    810 
    811 	if (verbose) {
    812 		printf("sched_wakeup event %p\n", event);
    813 
    814 		printf(" ... pid %d woke up %s/%d\n",
    815 			wakeup_event->common_pid,
    816 			wakeup_event->comm,
    817 			wakeup_event->pid);
    818 	}
    819 
    820 	waker = register_pid(wakeup_event->common_pid, "<unknown>");
    821 	wakee = register_pid(wakeup_event->pid, wakeup_event->comm);
    822 
    823 	add_sched_event_wakeup(waker, timestamp, wakee);
    824 }
    825 
    826 static u64 cpu_last_switched[MAX_CPUS];
    827 
    828 static void
    829 replay_switch_event(struct trace_switch_event *switch_event,
    830 		    struct perf_session *session __used,
    831 		    struct event *event,
    832 		    int cpu,
    833 		    u64 timestamp,
    834 		    struct thread *thread __used)
    835 {
    836 	struct task_desc *prev, __used *next;
    837 	u64 timestamp0;
    838 	s64 delta;
    839 
    840 	if (verbose)
    841 		printf("sched_switch event %p\n", event);
    842 
    843 	if (cpu >= MAX_CPUS || cpu < 0)
    844 		return;
    845 
    846 	timestamp0 = cpu_last_switched[cpu];
    847 	if (timestamp0)
    848 		delta = timestamp - timestamp0;
    849 	else
    850 		delta = 0;
    851 
    852 	if (delta < 0)
    853 		die("hm, delta: %" PRIu64 " < 0 ?\n", delta);
    854 
    855 	if (verbose) {
    856 		printf(" ... switch from %s/%d to %s/%d [ran %" PRIu64 " nsecs]\n",
    857 			switch_event->prev_comm, switch_event->prev_pid,
    858 			switch_event->next_comm, switch_event->next_pid,
    859 			delta);
    860 	}
    861 
    862 	prev = register_pid(switch_event->prev_pid, switch_event->prev_comm);
    863 	next = register_pid(switch_event->next_pid, switch_event->next_comm);
    864 
    865 	cpu_last_switched[cpu] = timestamp;
    866 
    867 	add_sched_event_run(prev, timestamp, delta);
    868 	add_sched_event_sleep(prev, timestamp, switch_event->prev_state);
    869 }
    870 
    871 
    872 static void
    873 replay_fork_event(struct trace_fork_event *fork_event,
    874 		  struct event *event,
    875 		  int cpu __used,
    876 		  u64 timestamp __used,
    877 		  struct thread *thread __used)
    878 {
    879 	if (verbose) {
    880 		printf("sched_fork event %p\n", event);
    881 		printf("... parent: %s/%d\n", fork_event->parent_comm, fork_event->parent_pid);
    882 		printf("...  child: %s/%d\n", fork_event->child_comm, fork_event->child_pid);
    883 	}
    884 	register_pid(fork_event->parent_pid, fork_event->parent_comm);
    885 	register_pid(fork_event->child_pid, fork_event->child_comm);
    886 }
    887 
    888 static struct trace_sched_handler replay_ops  = {
    889 	.wakeup_event		= replay_wakeup_event,
    890 	.switch_event		= replay_switch_event,
    891 	.fork_event		= replay_fork_event,
    892 };
    893 
    894 struct sort_dimension {
    895 	const char		*name;
    896 	sort_fn_t		cmp;
    897 	struct list_head	list;
    898 };
    899 
    900 static LIST_HEAD(cmp_pid);
    901 
    902 static int
    903 thread_lat_cmp(struct list_head *list, struct work_atoms *l, struct work_atoms *r)
    904 {
    905 	struct sort_dimension *sort;
    906 	int ret = 0;
    907 
    908 	BUG_ON(list_empty(list));
    909 
    910 	list_for_each_entry(sort, list, list) {
    911 		ret = sort->cmp(l, r);
    912 		if (ret)
    913 			return ret;
    914 	}
    915 
    916 	return ret;
    917 }
    918 
    919 static struct work_atoms *
    920 thread_atoms_search(struct rb_root *root, struct thread *thread,
    921 			 struct list_head *sort_list)
    922 {
    923 	struct rb_node *node = root->rb_node;
    924 	struct work_atoms key = { .thread = thread };
    925 
    926 	while (node) {
    927 		struct work_atoms *atoms;
    928 		int cmp;
    929 
    930 		atoms = container_of(node, struct work_atoms, node);
    931 
    932 		cmp = thread_lat_cmp(sort_list, &key, atoms);
    933 		if (cmp > 0)
    934 			node = node->rb_left;
    935 		else if (cmp < 0)
    936 			node = node->rb_right;
    937 		else {
    938 			BUG_ON(thread != atoms->thread);
    939 			return atoms;
    940 		}
    941 	}
    942 	return NULL;
    943 }
    944 
    945 static void
    946 __thread_latency_insert(struct rb_root *root, struct work_atoms *data,
    947 			 struct list_head *sort_list)
    948 {
    949 	struct rb_node **new = &(root->rb_node), *parent = NULL;
    950 
    951 	while (*new) {
    952 		struct work_atoms *this;
    953 		int cmp;
    954 
    955 		this = container_of(*new, struct work_atoms, node);
    956 		parent = *new;
    957 
    958 		cmp = thread_lat_cmp(sort_list, data, this);
    959 
    960 		if (cmp > 0)
    961 			new = &((*new)->rb_left);
    962 		else
    963 			new = &((*new)->rb_right);
    964 	}
    965 
    966 	rb_link_node(&data->node, parent, new);
    967 	rb_insert_color(&data->node, root);
    968 }
    969 
    970 static void thread_atoms_insert(struct thread *thread)
    971 {
    972 	struct work_atoms *atoms = zalloc(sizeof(*atoms));
    973 	if (!atoms)
    974 		die("No memory");
    975 
    976 	atoms->thread = thread;
    977 	INIT_LIST_HEAD(&atoms->work_list);
    978 	__thread_latency_insert(&atom_root, atoms, &cmp_pid);
    979 }
    980 
    981 static void
    982 latency_fork_event(struct trace_fork_event *fork_event __used,
    983 		   struct event *event __used,
    984 		   int cpu __used,
    985 		   u64 timestamp __used,
    986 		   struct thread *thread __used)
    987 {
    988 	/* should insert the newcomer */
    989 }
    990 
    991 __used
    992 static char sched_out_state(struct trace_switch_event *switch_event)
    993 {
    994 	const char *str = TASK_STATE_TO_CHAR_STR;
    995 
    996 	return str[switch_event->prev_state];
    997 }
    998 
    999 static void
   1000 add_sched_out_event(struct work_atoms *atoms,
   1001 		    char run_state,
   1002 		    u64 timestamp)
   1003 {
   1004 	struct work_atom *atom = zalloc(sizeof(*atom));
   1005 	if (!atom)
   1006 		die("Non memory");
   1007 
   1008 	atom->sched_out_time = timestamp;
   1009 
   1010 	if (run_state == 'R') {
   1011 		atom->state = THREAD_WAIT_CPU;
   1012 		atom->wake_up_time = atom->sched_out_time;
   1013 	}
   1014 
   1015 	list_add_tail(&atom->list, &atoms->work_list);
   1016 }
   1017 
   1018 static void
   1019 add_runtime_event(struct work_atoms *atoms, u64 delta, u64 timestamp __used)
   1020 {
   1021 	struct work_atom *atom;
   1022 
   1023 	BUG_ON(list_empty(&atoms->work_list));
   1024 
   1025 	atom = list_entry(atoms->work_list.prev, struct work_atom, list);
   1026 
   1027 	atom->runtime += delta;
   1028 	atoms->total_runtime += delta;
   1029 }
   1030 
   1031 static void
   1032 add_sched_in_event(struct work_atoms *atoms, u64 timestamp)
   1033 {
   1034 	struct work_atom *atom;
   1035 	u64 delta;
   1036 
   1037 	if (list_empty(&atoms->work_list))
   1038 		return;
   1039 
   1040 	atom = list_entry(atoms->work_list.prev, struct work_atom, list);
   1041 
   1042 	if (atom->state != THREAD_WAIT_CPU)
   1043 		return;
   1044 
   1045 	if (timestamp < atom->wake_up_time) {
   1046 		atom->state = THREAD_IGNORE;
   1047 		return;
   1048 	}
   1049 
   1050 	atom->state = THREAD_SCHED_IN;
   1051 	atom->sched_in_time = timestamp;
   1052 
   1053 	delta = atom->sched_in_time - atom->wake_up_time;
   1054 	atoms->total_lat += delta;
   1055 	if (delta > atoms->max_lat) {
   1056 		atoms->max_lat = delta;
   1057 		atoms->max_lat_at = timestamp;
   1058 	}
   1059 	atoms->nb_atoms++;
   1060 }
   1061 
   1062 static void
   1063 latency_switch_event(struct trace_switch_event *switch_event,
   1064 		     struct perf_session *session,
   1065 		     struct event *event __used,
   1066 		     int cpu,
   1067 		     u64 timestamp,
   1068 		     struct thread *thread __used)
   1069 {
   1070 	struct work_atoms *out_events, *in_events;
   1071 	struct thread *sched_out, *sched_in;
   1072 	u64 timestamp0;
   1073 	s64 delta;
   1074 
   1075 	BUG_ON(cpu >= MAX_CPUS || cpu < 0);
   1076 
   1077 	timestamp0 = cpu_last_switched[cpu];
   1078 	cpu_last_switched[cpu] = timestamp;
   1079 	if (timestamp0)
   1080 		delta = timestamp - timestamp0;
   1081 	else
   1082 		delta = 0;
   1083 
   1084 	if (delta < 0)
   1085 		die("hm, delta: %" PRIu64 " < 0 ?\n", delta);
   1086 
   1087 
   1088 	sched_out = perf_session__findnew(session, switch_event->prev_pid);
   1089 	sched_in = perf_session__findnew(session, switch_event->next_pid);
   1090 
   1091 	out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid);
   1092 	if (!out_events) {
   1093 		thread_atoms_insert(sched_out);
   1094 		out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid);
   1095 		if (!out_events)
   1096 			die("out-event: Internal tree error");
   1097 	}
   1098 	add_sched_out_event(out_events, sched_out_state(switch_event), timestamp);
   1099 
   1100 	in_events = thread_atoms_search(&atom_root, sched_in, &cmp_pid);
   1101 	if (!in_events) {
   1102 		thread_atoms_insert(sched_in);
   1103 		in_events = thread_atoms_search(&atom_root, sched_in, &cmp_pid);
   1104 		if (!in_events)
   1105 			die("in-event: Internal tree error");
   1106 		/*
   1107 		 * Take came in we have not heard about yet,
   1108 		 * add in an initial atom in runnable state:
   1109 		 */
   1110 		add_sched_out_event(in_events, 'R', timestamp);
   1111 	}
   1112 	add_sched_in_event(in_events, timestamp);
   1113 }
   1114 
   1115 static void
   1116 latency_runtime_event(struct trace_runtime_event *runtime_event,
   1117 		     struct perf_session *session,
   1118 		     struct event *event __used,
   1119 		     int cpu,
   1120 		     u64 timestamp,
   1121 		     struct thread *this_thread __used)
   1122 {
   1123 	struct thread *thread = perf_session__findnew(session, runtime_event->pid);
   1124 	struct work_atoms *atoms = thread_atoms_search(&atom_root, thread, &cmp_pid);
   1125 
   1126 	BUG_ON(cpu >= MAX_CPUS || cpu < 0);
   1127 	if (!atoms) {
   1128 		thread_atoms_insert(thread);
   1129 		atoms = thread_atoms_search(&atom_root, thread, &cmp_pid);
   1130 		if (!atoms)
   1131 			die("in-event: Internal tree error");
   1132 		add_sched_out_event(atoms, 'R', timestamp);
   1133 	}
   1134 
   1135 	add_runtime_event(atoms, runtime_event->runtime, timestamp);
   1136 }
   1137 
   1138 static void
   1139 latency_wakeup_event(struct trace_wakeup_event *wakeup_event,
   1140 		     struct perf_session *session,
   1141 		     struct event *__event __used,
   1142 		     int cpu __used,
   1143 		     u64 timestamp,
   1144 		     struct thread *thread __used)
   1145 {
   1146 	struct work_atoms *atoms;
   1147 	struct work_atom *atom;
   1148 	struct thread *wakee;
   1149 
   1150 	/* Note for later, it may be interesting to observe the failing cases */
   1151 	if (!wakeup_event->success)
   1152 		return;
   1153 
   1154 	wakee = perf_session__findnew(session, wakeup_event->pid);
   1155 	atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid);
   1156 	if (!atoms) {
   1157 		thread_atoms_insert(wakee);
   1158 		atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid);
   1159 		if (!atoms)
   1160 			die("wakeup-event: Internal tree error");
   1161 		add_sched_out_event(atoms, 'S', timestamp);
   1162 	}
   1163 
   1164 	BUG_ON(list_empty(&atoms->work_list));
   1165 
   1166 	atom = list_entry(atoms->work_list.prev, struct work_atom, list);
   1167 
   1168 	/*
   1169 	 * You WILL be missing events if you've recorded only
   1170 	 * one CPU, or are only looking at only one, so don't
   1171 	 * make useless noise.
   1172 	 */
   1173 	if (profile_cpu == -1 && atom->state != THREAD_SLEEPING)
   1174 		nr_state_machine_bugs++;
   1175 
   1176 	nr_timestamps++;
   1177 	if (atom->sched_out_time > timestamp) {
   1178 		nr_unordered_timestamps++;
   1179 		return;
   1180 	}
   1181 
   1182 	atom->state = THREAD_WAIT_CPU;
   1183 	atom->wake_up_time = timestamp;
   1184 }
   1185 
   1186 static void
   1187 latency_migrate_task_event(struct trace_migrate_task_event *migrate_task_event,
   1188 		     struct perf_session *session,
   1189 		     struct event *__event __used,
   1190 		     int cpu __used,
   1191 		     u64 timestamp,
   1192 		     struct thread *thread __used)
   1193 {
   1194 	struct work_atoms *atoms;
   1195 	struct work_atom *atom;
   1196 	struct thread *migrant;
   1197 
   1198 	/*
   1199 	 * Only need to worry about migration when profiling one CPU.
   1200 	 */
   1201 	if (profile_cpu == -1)
   1202 		return;
   1203 
   1204 	migrant = perf_session__findnew(session, migrate_task_event->pid);
   1205 	atoms = thread_atoms_search(&atom_root, migrant, &cmp_pid);
   1206 	if (!atoms) {
   1207 		thread_atoms_insert(migrant);
   1208 		register_pid(migrant->pid, migrant->comm);
   1209 		atoms = thread_atoms_search(&atom_root, migrant, &cmp_pid);
   1210 		if (!atoms)
   1211 			die("migration-event: Internal tree error");
   1212 		add_sched_out_event(atoms, 'R', timestamp);
   1213 	}
   1214 
   1215 	BUG_ON(list_empty(&atoms->work_list));
   1216 
   1217 	atom = list_entry(atoms->work_list.prev, struct work_atom, list);
   1218 	atom->sched_in_time = atom->sched_out_time = atom->wake_up_time = timestamp;
   1219 
   1220 	nr_timestamps++;
   1221 
   1222 	if (atom->sched_out_time > timestamp)
   1223 		nr_unordered_timestamps++;
   1224 }
   1225 
   1226 static struct trace_sched_handler lat_ops  = {
   1227 	.wakeup_event		= latency_wakeup_event,
   1228 	.switch_event		= latency_switch_event,
   1229 	.runtime_event		= latency_runtime_event,
   1230 	.fork_event		= latency_fork_event,
   1231 	.migrate_task_event	= latency_migrate_task_event,
   1232 };
   1233 
   1234 static void output_lat_thread(struct work_atoms *work_list)
   1235 {
   1236 	int i;
   1237 	int ret;
   1238 	u64 avg;
   1239 
   1240 	if (!work_list->nb_atoms)
   1241 		return;
   1242 	/*
   1243 	 * Ignore idle threads:
   1244 	 */
   1245 	if (!strcmp(work_list->thread->comm, "swapper"))
   1246 		return;
   1247 
   1248 	all_runtime += work_list->total_runtime;
   1249 	all_count += work_list->nb_atoms;
   1250 
   1251 	ret = printf("  %s:%d ", work_list->thread->comm, work_list->thread->pid);
   1252 
   1253 	for (i = 0; i < 24 - ret; i++)
   1254 		printf(" ");
   1255 
   1256 	avg = work_list->total_lat / work_list->nb_atoms;
   1257 
   1258 	printf("|%11.3f ms |%9" PRIu64 " | avg:%9.3f ms | max:%9.3f ms | max at: %9.6f s\n",
   1259 	      (double)work_list->total_runtime / 1e6,
   1260 		 work_list->nb_atoms, (double)avg / 1e6,
   1261 		 (double)work_list->max_lat / 1e6,
   1262 		 (double)work_list->max_lat_at / 1e9);
   1263 }
   1264 
   1265 static int pid_cmp(struct work_atoms *l, struct work_atoms *r)
   1266 {
   1267 	if (l->thread->pid < r->thread->pid)
   1268 		return -1;
   1269 	if (l->thread->pid > r->thread->pid)
   1270 		return 1;
   1271 
   1272 	return 0;
   1273 }
   1274 
   1275 static struct sort_dimension pid_sort_dimension = {
   1276 	.name			= "pid",
   1277 	.cmp			= pid_cmp,
   1278 };
   1279 
   1280 static int avg_cmp(struct work_atoms *l, struct work_atoms *r)
   1281 {
   1282 	u64 avgl, avgr;
   1283 
   1284 	if (!l->nb_atoms)
   1285 		return -1;
   1286 
   1287 	if (!r->nb_atoms)
   1288 		return 1;
   1289 
   1290 	avgl = l->total_lat / l->nb_atoms;
   1291 	avgr = r->total_lat / r->nb_atoms;
   1292 
   1293 	if (avgl < avgr)
   1294 		return -1;
   1295 	if (avgl > avgr)
   1296 		return 1;
   1297 
   1298 	return 0;
   1299 }
   1300 
   1301 static struct sort_dimension avg_sort_dimension = {
   1302 	.name			= "avg",
   1303 	.cmp			= avg_cmp,
   1304 };
   1305 
   1306 static int max_cmp(struct work_atoms *l, struct work_atoms *r)
   1307 {
   1308 	if (l->max_lat < r->max_lat)
   1309 		return -1;
   1310 	if (l->max_lat > r->max_lat)
   1311 		return 1;
   1312 
   1313 	return 0;
   1314 }
   1315 
   1316 static struct sort_dimension max_sort_dimension = {
   1317 	.name			= "max",
   1318 	.cmp			= max_cmp,
   1319 };
   1320 
   1321 static int switch_cmp(struct work_atoms *l, struct work_atoms *r)
   1322 {
   1323 	if (l->nb_atoms < r->nb_atoms)
   1324 		return -1;
   1325 	if (l->nb_atoms > r->nb_atoms)
   1326 		return 1;
   1327 
   1328 	return 0;
   1329 }
   1330 
   1331 static struct sort_dimension switch_sort_dimension = {
   1332 	.name			= "switch",
   1333 	.cmp			= switch_cmp,
   1334 };
   1335 
   1336 static int runtime_cmp(struct work_atoms *l, struct work_atoms *r)
   1337 {
   1338 	if (l->total_runtime < r->total_runtime)
   1339 		return -1;
   1340 	if (l->total_runtime > r->total_runtime)
   1341 		return 1;
   1342 
   1343 	return 0;
   1344 }
   1345 
   1346 static struct sort_dimension runtime_sort_dimension = {
   1347 	.name			= "runtime",
   1348 	.cmp			= runtime_cmp,
   1349 };
   1350 
   1351 static struct sort_dimension *available_sorts[] = {
   1352 	&pid_sort_dimension,
   1353 	&avg_sort_dimension,
   1354 	&max_sort_dimension,
   1355 	&switch_sort_dimension,
   1356 	&runtime_sort_dimension,
   1357 };
   1358 
   1359 #define NB_AVAILABLE_SORTS	(int)(sizeof(available_sorts) / sizeof(struct sort_dimension *))
   1360 
   1361 static LIST_HEAD(sort_list);
   1362 
   1363 static int sort_dimension__add(const char *tok, struct list_head *list)
   1364 {
   1365 	int i;
   1366 
   1367 	for (i = 0; i < NB_AVAILABLE_SORTS; i++) {
   1368 		if (!strcmp(available_sorts[i]->name, tok)) {
   1369 			list_add_tail(&available_sorts[i]->list, list);
   1370 
   1371 			return 0;
   1372 		}
   1373 	}
   1374 
   1375 	return -1;
   1376 }
   1377 
   1378 static void setup_sorting(void);
   1379 
   1380 static void sort_lat(void)
   1381 {
   1382 	struct rb_node *node;
   1383 
   1384 	for (;;) {
   1385 		struct work_atoms *data;
   1386 		node = rb_first(&atom_root);
   1387 		if (!node)
   1388 			break;
   1389 
   1390 		rb_erase(node, &atom_root);
   1391 		data = rb_entry(node, struct work_atoms, node);
   1392 		__thread_latency_insert(&sorted_atom_root, data, &sort_list);
   1393 	}
   1394 }
   1395 
   1396 static struct trace_sched_handler *trace_handler;
   1397 
   1398 static void
   1399 process_sched_wakeup_event(void *data, struct perf_session *session,
   1400 			   struct event *event,
   1401 			   int cpu __used,
   1402 			   u64 timestamp __used,
   1403 			   struct thread *thread __used)
   1404 {
   1405 	struct trace_wakeup_event wakeup_event;
   1406 
   1407 	FILL_COMMON_FIELDS(wakeup_event, event, data);
   1408 
   1409 	FILL_ARRAY(wakeup_event, comm, event, data);
   1410 	FILL_FIELD(wakeup_event, pid, event, data);
   1411 	FILL_FIELD(wakeup_event, prio, event, data);
   1412 	FILL_FIELD(wakeup_event, success, event, data);
   1413 	FILL_FIELD(wakeup_event, cpu, event, data);
   1414 
   1415 	if (trace_handler->wakeup_event)
   1416 		trace_handler->wakeup_event(&wakeup_event, session, event,
   1417 					    cpu, timestamp, thread);
   1418 }
   1419 
   1420 /*
   1421  * Track the current task - that way we can know whether there's any
   1422  * weird events, such as a task being switched away that is not current.
   1423  */
   1424 static int max_cpu;
   1425 
   1426 static u32 curr_pid[MAX_CPUS] = { [0 ... MAX_CPUS-1] = -1 };
   1427 
   1428 static struct thread *curr_thread[MAX_CPUS];
   1429 
   1430 static char next_shortname1 = 'A';
   1431 static char next_shortname2 = '0';
   1432 
   1433 static void
   1434 map_switch_event(struct trace_switch_event *switch_event,
   1435 		 struct perf_session *session,
   1436 		 struct event *event __used,
   1437 		 int this_cpu,
   1438 		 u64 timestamp,
   1439 		 struct thread *thread __used)
   1440 {
   1441 	struct thread *sched_out __used, *sched_in;
   1442 	int new_shortname;
   1443 	u64 timestamp0;
   1444 	s64 delta;
   1445 	int cpu;
   1446 
   1447 	BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0);
   1448 
   1449 	if (this_cpu > max_cpu)
   1450 		max_cpu = this_cpu;
   1451 
   1452 	timestamp0 = cpu_last_switched[this_cpu];
   1453 	cpu_last_switched[this_cpu] = timestamp;
   1454 	if (timestamp0)
   1455 		delta = timestamp - timestamp0;
   1456 	else
   1457 		delta = 0;
   1458 
   1459 	if (delta < 0)
   1460 		die("hm, delta: %" PRIu64 " < 0 ?\n", delta);
   1461 
   1462 
   1463 	sched_out = perf_session__findnew(session, switch_event->prev_pid);
   1464 	sched_in = perf_session__findnew(session, switch_event->next_pid);
   1465 
   1466 	curr_thread[this_cpu] = sched_in;
   1467 
   1468 	printf("  ");
   1469 
   1470 	new_shortname = 0;
   1471 	if (!sched_in->shortname[0]) {
   1472 		sched_in->shortname[0] = next_shortname1;
   1473 		sched_in->shortname[1] = next_shortname2;
   1474 
   1475 		if (next_shortname1 < 'Z') {
   1476 			next_shortname1++;
   1477 		} else {
   1478 			next_shortname1='A';
   1479 			if (next_shortname2 < '9') {
   1480 				next_shortname2++;
   1481 			} else {
   1482 				next_shortname2='0';
   1483 			}
   1484 		}
   1485 		new_shortname = 1;
   1486 	}
   1487 
   1488 	for (cpu = 0; cpu <= max_cpu; cpu++) {
   1489 		if (cpu != this_cpu)
   1490 			printf(" ");
   1491 		else
   1492 			printf("*");
   1493 
   1494 		if (curr_thread[cpu]) {
   1495 			if (curr_thread[cpu]->pid)
   1496 				printf("%2s ", curr_thread[cpu]->shortname);
   1497 			else
   1498 				printf(".  ");
   1499 		} else
   1500 			printf("   ");
   1501 	}
   1502 
   1503 	printf("  %12.6f secs ", (double)timestamp/1e9);
   1504 	if (new_shortname) {
   1505 		printf("%s => %s:%d\n",
   1506 			sched_in->shortname, sched_in->comm, sched_in->pid);
   1507 	} else {
   1508 		printf("\n");
   1509 	}
   1510 }
   1511 
   1512 
   1513 static void
   1514 process_sched_switch_event(void *data, struct perf_session *session,
   1515 			   struct event *event,
   1516 			   int this_cpu,
   1517 			   u64 timestamp __used,
   1518 			   struct thread *thread __used)
   1519 {
   1520 	struct trace_switch_event switch_event;
   1521 
   1522 	FILL_COMMON_FIELDS(switch_event, event, data);
   1523 
   1524 	FILL_ARRAY(switch_event, prev_comm, event, data);
   1525 	FILL_FIELD(switch_event, prev_pid, event, data);
   1526 	FILL_FIELD(switch_event, prev_prio, event, data);
   1527 	FILL_FIELD(switch_event, prev_state, event, data);
   1528 	FILL_ARRAY(switch_event, next_comm, event, data);
   1529 	FILL_FIELD(switch_event, next_pid, event, data);
   1530 	FILL_FIELD(switch_event, next_prio, event, data);
   1531 
   1532 	if (curr_pid[this_cpu] != (u32)-1) {
   1533 		/*
   1534 		 * Are we trying to switch away a PID that is
   1535 		 * not current?
   1536 		 */
   1537 		if (curr_pid[this_cpu] != switch_event.prev_pid)
   1538 			nr_context_switch_bugs++;
   1539 	}
   1540 	if (trace_handler->switch_event)
   1541 		trace_handler->switch_event(&switch_event, session, event,
   1542 					    this_cpu, timestamp, thread);
   1543 
   1544 	curr_pid[this_cpu] = switch_event.next_pid;
   1545 }
   1546 
   1547 static void
   1548 process_sched_runtime_event(void *data, struct perf_session *session,
   1549 			   struct event *event,
   1550 			   int cpu __used,
   1551 			   u64 timestamp __used,
   1552 			   struct thread *thread __used)
   1553 {
   1554 	struct trace_runtime_event runtime_event;
   1555 
   1556 	FILL_ARRAY(runtime_event, comm, event, data);
   1557 	FILL_FIELD(runtime_event, pid, event, data);
   1558 	FILL_FIELD(runtime_event, runtime, event, data);
   1559 	FILL_FIELD(runtime_event, vruntime, event, data);
   1560 
   1561 	if (trace_handler->runtime_event)
   1562 		trace_handler->runtime_event(&runtime_event, session, event, cpu, timestamp, thread);
   1563 }
   1564 
   1565 static void
   1566 process_sched_fork_event(void *data,
   1567 			 struct event *event,
   1568 			 int cpu __used,
   1569 			 u64 timestamp __used,
   1570 			 struct thread *thread __used)
   1571 {
   1572 	struct trace_fork_event fork_event;
   1573 
   1574 	FILL_COMMON_FIELDS(fork_event, event, data);
   1575 
   1576 	FILL_ARRAY(fork_event, parent_comm, event, data);
   1577 	FILL_FIELD(fork_event, parent_pid, event, data);
   1578 	FILL_ARRAY(fork_event, child_comm, event, data);
   1579 	FILL_FIELD(fork_event, child_pid, event, data);
   1580 
   1581 	if (trace_handler->fork_event)
   1582 		trace_handler->fork_event(&fork_event, event,
   1583 					  cpu, timestamp, thread);
   1584 }
   1585 
   1586 static void
   1587 process_sched_exit_event(struct event *event,
   1588 			 int cpu __used,
   1589 			 u64 timestamp __used,
   1590 			 struct thread *thread __used)
   1591 {
   1592 	if (verbose)
   1593 		printf("sched_exit event %p\n", event);
   1594 }
   1595 
   1596 static void
   1597 process_sched_migrate_task_event(void *data, struct perf_session *session,
   1598 			   struct event *event,
   1599 			   int cpu __used,
   1600 			   u64 timestamp __used,
   1601 			   struct thread *thread __used)
   1602 {
   1603 	struct trace_migrate_task_event migrate_task_event;
   1604 
   1605 	FILL_COMMON_FIELDS(migrate_task_event, event, data);
   1606 
   1607 	FILL_ARRAY(migrate_task_event, comm, event, data);
   1608 	FILL_FIELD(migrate_task_event, pid, event, data);
   1609 	FILL_FIELD(migrate_task_event, prio, event, data);
   1610 	FILL_FIELD(migrate_task_event, cpu, event, data);
   1611 
   1612 	if (trace_handler->migrate_task_event)
   1613 		trace_handler->migrate_task_event(&migrate_task_event, session,
   1614 						 event, cpu, timestamp, thread);
   1615 }
   1616 
   1617 static void process_raw_event(union perf_event *raw_event __used,
   1618 			      struct perf_session *session, void *data, int cpu,
   1619 			      u64 timestamp, struct thread *thread)
   1620 {
   1621 	struct event *event;
   1622 	int type;
   1623 
   1624 
   1625 	type = trace_parse_common_type(data);
   1626 	event = trace_find_event(type);
   1627 
   1628 	if (!strcmp(event->name, "sched_switch"))
   1629 		process_sched_switch_event(data, session, event, cpu, timestamp, thread);
   1630 	if (!strcmp(event->name, "sched_stat_runtime"))
   1631 		process_sched_runtime_event(data, session, event, cpu, timestamp, thread);
   1632 	if (!strcmp(event->name, "sched_wakeup"))
   1633 		process_sched_wakeup_event(data, session, event, cpu, timestamp, thread);
   1634 	if (!strcmp(event->name, "sched_wakeup_new"))
   1635 		process_sched_wakeup_event(data, session, event, cpu, timestamp, thread);
   1636 	if (!strcmp(event->name, "sched_process_fork"))
   1637 		process_sched_fork_event(data, event, cpu, timestamp, thread);
   1638 	if (!strcmp(event->name, "sched_process_exit"))
   1639 		process_sched_exit_event(event, cpu, timestamp, thread);
   1640 	if (!strcmp(event->name, "sched_migrate_task"))
   1641 		process_sched_migrate_task_event(data, session, event, cpu, timestamp, thread);
   1642 }
   1643 
   1644 static int process_sample_event(union perf_event *event,
   1645 				struct perf_sample *sample,
   1646 				struct perf_evsel *evsel __used,
   1647 				struct perf_session *session)
   1648 {
   1649 	struct thread *thread;
   1650 
   1651 	if (!(session->sample_type & PERF_SAMPLE_RAW))
   1652 		return 0;
   1653 
   1654 	thread = perf_session__findnew(session, sample->pid);
   1655 	if (thread == NULL) {
   1656 		pr_debug("problem processing %d event, skipping it.\n",
   1657 			 event->header.type);
   1658 		return -1;
   1659 	}
   1660 
   1661 	dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
   1662 
   1663 	if (profile_cpu != -1 && profile_cpu != (int)sample->cpu)
   1664 		return 0;
   1665 
   1666 	process_raw_event(event, session, sample->raw_data, sample->cpu,
   1667 			  sample->time, thread);
   1668 
   1669 	return 0;
   1670 }
   1671 
   1672 static struct perf_event_ops event_ops = {
   1673 	.sample			= process_sample_event,
   1674 	.comm			= perf_event__process_comm,
   1675 	.lost			= perf_event__process_lost,
   1676 	.fork			= perf_event__process_task,
   1677 	.ordered_samples	= true,
   1678 };
   1679 
   1680 static int read_events(void)
   1681 {
   1682 	int err = -EINVAL;
   1683 	struct perf_session *session = perf_session__new(input_name, O_RDONLY,
   1684 							 0, false, &event_ops);
   1685 	if (session == NULL)
   1686 		return -ENOMEM;
   1687 
   1688 	if (perf_session__has_traces(session, "record -R")) {
   1689 		err = perf_session__process_events(session, &event_ops);
   1690 		nr_events      = session->hists.stats.nr_events[0];
   1691 		nr_lost_events = session->hists.stats.total_lost;
   1692 		nr_lost_chunks = session->hists.stats.nr_events[PERF_RECORD_LOST];
   1693 	}
   1694 
   1695 	perf_session__delete(session);
   1696 	return err;
   1697 }
   1698 
   1699 static void print_bad_events(void)
   1700 {
   1701 	if (nr_unordered_timestamps && nr_timestamps) {
   1702 		printf("  INFO: %.3f%% unordered timestamps (%ld out of %ld)\n",
   1703 			(double)nr_unordered_timestamps/(double)nr_timestamps*100.0,
   1704 			nr_unordered_timestamps, nr_timestamps);
   1705 	}
   1706 	if (nr_lost_events && nr_events) {
   1707 		printf("  INFO: %.3f%% lost events (%ld out of %ld, in %ld chunks)\n",
   1708 			(double)nr_lost_events/(double)nr_events*100.0,
   1709 			nr_lost_events, nr_events, nr_lost_chunks);
   1710 	}
   1711 	if (nr_state_machine_bugs && nr_timestamps) {
   1712 		printf("  INFO: %.3f%% state machine bugs (%ld out of %ld)",
   1713 			(double)nr_state_machine_bugs/(double)nr_timestamps*100.0,
   1714 			nr_state_machine_bugs, nr_timestamps);
   1715 		if (nr_lost_events)
   1716 			printf(" (due to lost events?)");
   1717 		printf("\n");
   1718 	}
   1719 	if (nr_context_switch_bugs && nr_timestamps) {
   1720 		printf("  INFO: %.3f%% context switch bugs (%ld out of %ld)",
   1721 			(double)nr_context_switch_bugs/(double)nr_timestamps*100.0,
   1722 			nr_context_switch_bugs, nr_timestamps);
   1723 		if (nr_lost_events)
   1724 			printf(" (due to lost events?)");
   1725 		printf("\n");
   1726 	}
   1727 }
   1728 
   1729 static void __cmd_lat(void)
   1730 {
   1731 	struct rb_node *next;
   1732 
   1733 	setup_pager();
   1734 	read_events();
   1735 	sort_lat();
   1736 
   1737 	printf("\n ---------------------------------------------------------------------------------------------------------------\n");
   1738 	printf("  Task                  |   Runtime ms  | Switches | Average delay ms | Maximum delay ms | Maximum delay at     |\n");
   1739 	printf(" ---------------------------------------------------------------------------------------------------------------\n");
   1740 
   1741 	next = rb_first(&sorted_atom_root);
   1742 
   1743 	while (next) {
   1744 		struct work_atoms *work_list;
   1745 
   1746 		work_list = rb_entry(next, struct work_atoms, node);
   1747 		output_lat_thread(work_list);
   1748 		next = rb_next(next);
   1749 	}
   1750 
   1751 	printf(" -----------------------------------------------------------------------------------------\n");
   1752 	printf("  TOTAL:                |%11.3f ms |%9" PRIu64 " |\n",
   1753 		(double)all_runtime/1e6, all_count);
   1754 
   1755 	printf(" ---------------------------------------------------\n");
   1756 
   1757 	print_bad_events();
   1758 	printf("\n");
   1759 
   1760 }
   1761 
   1762 static struct trace_sched_handler map_ops  = {
   1763 	.wakeup_event		= NULL,
   1764 	.switch_event		= map_switch_event,
   1765 	.runtime_event		= NULL,
   1766 	.fork_event		= NULL,
   1767 };
   1768 
   1769 static void __cmd_map(void)
   1770 {
   1771 	max_cpu = sysconf(_SC_NPROCESSORS_CONF);
   1772 
   1773 	setup_pager();
   1774 	read_events();
   1775 	print_bad_events();
   1776 }
   1777 
   1778 static void __cmd_replay(void)
   1779 {
   1780 	unsigned long i;
   1781 
   1782 	calibrate_run_measurement_overhead();
   1783 	calibrate_sleep_measurement_overhead();
   1784 
   1785 	test_calibrations();
   1786 
   1787 	read_events();
   1788 
   1789 	printf("nr_run_events:        %ld\n", nr_run_events);
   1790 	printf("nr_sleep_events:      %ld\n", nr_sleep_events);
   1791 	printf("nr_wakeup_events:     %ld\n", nr_wakeup_events);
   1792 
   1793 	if (targetless_wakeups)
   1794 		printf("target-less wakeups:  %ld\n", targetless_wakeups);
   1795 	if (multitarget_wakeups)
   1796 		printf("multi-target wakeups: %ld\n", multitarget_wakeups);
   1797 	if (nr_run_events_optimized)
   1798 		printf("run atoms optimized: %ld\n",
   1799 			nr_run_events_optimized);
   1800 
   1801 	print_task_traces();
   1802 	add_cross_task_wakeups();
   1803 
   1804 	create_tasks();
   1805 	printf("------------------------------------------------------------\n");
   1806 	for (i = 0; i < replay_repeat; i++)
   1807 		run_one_test();
   1808 }
   1809 
   1810 
   1811 static const char * const sched_usage[] = {
   1812 	"perf sched [<options>] {record|latency|map|replay|trace}",
   1813 	NULL
   1814 };
   1815 
   1816 static const struct option sched_options[] = {
   1817 	OPT_STRING('i', "input", &input_name, "file",
   1818 		    "input file name"),
   1819 	OPT_INCR('v', "verbose", &verbose,
   1820 		    "be more verbose (show symbol address, etc)"),
   1821 	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
   1822 		    "dump raw trace in ASCII"),
   1823 	OPT_END()
   1824 };
   1825 
   1826 static const char * const latency_usage[] = {
   1827 	"perf sched latency [<options>]",
   1828 	NULL
   1829 };
   1830 
   1831 static const struct option latency_options[] = {
   1832 	OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
   1833 		   "sort by key(s): runtime, switch, avg, max"),
   1834 	OPT_INCR('v', "verbose", &verbose,
   1835 		    "be more verbose (show symbol address, etc)"),
   1836 	OPT_INTEGER('C', "CPU", &profile_cpu,
   1837 		    "CPU to profile on"),
   1838 	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
   1839 		    "dump raw trace in ASCII"),
   1840 	OPT_END()
   1841 };
   1842 
   1843 static const char * const replay_usage[] = {
   1844 	"perf sched replay [<options>]",
   1845 	NULL
   1846 };
   1847 
   1848 static const struct option replay_options[] = {
   1849 	OPT_UINTEGER('r', "repeat", &replay_repeat,
   1850 		     "repeat the workload replay N times (-1: infinite)"),
   1851 	OPT_INCR('v', "verbose", &verbose,
   1852 		    "be more verbose (show symbol address, etc)"),
   1853 	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
   1854 		    "dump raw trace in ASCII"),
   1855 	OPT_END()
   1856 };
   1857 
   1858 static void setup_sorting(void)
   1859 {
   1860 	char *tmp, *tok, *str = strdup(sort_order);
   1861 
   1862 	for (tok = strtok_r(str, ", ", &tmp);
   1863 			tok; tok = strtok_r(NULL, ", ", &tmp)) {
   1864 		if (sort_dimension__add(tok, &sort_list) < 0) {
   1865 			error("Unknown --sort key: `%s'", tok);
   1866 			usage_with_options(latency_usage, latency_options);
   1867 		}
   1868 	}
   1869 
   1870 	free(str);
   1871 
   1872 	sort_dimension__add("pid", &cmp_pid);
   1873 }
   1874 
   1875 static const char *record_args[] = {
   1876 	"record",
   1877 	"-a",
   1878 	"-R",
   1879 	"-f",
   1880 	"-m", "1024",
   1881 	"-c", "1",
   1882 	"-e", "sched:sched_switch",
   1883 	"-e", "sched:sched_stat_wait",
   1884 	"-e", "sched:sched_stat_sleep",
   1885 	"-e", "sched:sched_stat_iowait",
   1886 	"-e", "sched:sched_stat_runtime",
   1887 	"-e", "sched:sched_process_exit",
   1888 	"-e", "sched:sched_process_fork",
   1889 	"-e", "sched:sched_wakeup",
   1890 	"-e", "sched:sched_migrate_task",
   1891 };
   1892 
   1893 static int __cmd_record(int argc, const char **argv)
   1894 {
   1895 	unsigned int rec_argc, i, j;
   1896 	const char **rec_argv;
   1897 
   1898 	rec_argc = ARRAY_SIZE(record_args) + argc - 1;
   1899 	rec_argv = calloc(rec_argc + 1, sizeof(char *));
   1900 
   1901 	if (rec_argv == NULL)
   1902 		return -ENOMEM;
   1903 
   1904 	for (i = 0; i < ARRAY_SIZE(record_args); i++)
   1905 		rec_argv[i] = strdup(record_args[i]);
   1906 
   1907 	for (j = 1; j < (unsigned int)argc; j++, i++)
   1908 		rec_argv[i] = argv[j];
   1909 
   1910 	BUG_ON(i != rec_argc);
   1911 
   1912 	return cmd_record(i, rec_argv, NULL);
   1913 }
   1914 
   1915 int cmd_sched(int argc, const char **argv, const char *prefix __used)
   1916 {
   1917 	argc = parse_options(argc, argv, sched_options, sched_usage,
   1918 			     PARSE_OPT_STOP_AT_NON_OPTION);
   1919 	if (!argc)
   1920 		usage_with_options(sched_usage, sched_options);
   1921 
   1922 	/*
   1923 	 * Aliased to 'perf script' for now:
   1924 	 */
   1925 	if (!strcmp(argv[0], "script"))
   1926 		return cmd_script(argc, argv, prefix);
   1927 
   1928 	symbol__init();
   1929 	if (!strncmp(argv[0], "rec", 3)) {
   1930 		return __cmd_record(argc, argv);
   1931 	} else if (!strncmp(argv[0], "lat", 3)) {
   1932 		trace_handler = &lat_ops;
   1933 		if (argc > 1) {
   1934 			argc = parse_options(argc, argv, latency_options, latency_usage, 0);
   1935 			if (argc)
   1936 				usage_with_options(latency_usage, latency_options);
   1937 		}
   1938 		setup_sorting();
   1939 		__cmd_lat();
   1940 	} else if (!strcmp(argv[0], "map")) {
   1941 		trace_handler = &map_ops;
   1942 		setup_sorting();
   1943 		__cmd_map();
   1944 	} else if (!strncmp(argv[0], "rep", 3)) {
   1945 		trace_handler = &replay_ops;
   1946 		if (argc) {
   1947 			argc = parse_options(argc, argv, replay_options, replay_usage, 0);
   1948 			if (argc)
   1949 				usage_with_options(replay_usage, replay_options);
   1950 		}
   1951 		__cmd_replay();
   1952 	} else {
   1953 		usage_with_options(sched_usage, sched_options);
   1954 	}
   1955 
   1956 	return 0;
   1957 }
   1958