Home | History | Annotate | Download | only in perf
      1 /*
      2  * builtin-stat.c
      3  *
      4  * Builtin stat command: Give a precise performance counters summary
      5  * overview about any workload, CPU or specific PID.
      6  *
      7  * Sample output:
      8 
      9    $ perf stat ./hackbench 10
     10 
     11   Time: 0.118
     12 
     13   Performance counter stats for './hackbench 10':
     14 
     15        1708.761321 task-clock                #   11.037 CPUs utilized
     16             41,190 context-switches          #    0.024 M/sec
     17              6,735 CPU-migrations            #    0.004 M/sec
     18             17,318 page-faults               #    0.010 M/sec
     19      5,205,202,243 cycles                    #    3.046 GHz
     20      3,856,436,920 stalled-cycles-frontend   #   74.09% frontend cycles idle
     21      1,600,790,871 stalled-cycles-backend    #   30.75% backend  cycles idle
     22      2,603,501,247 instructions              #    0.50  insns per cycle
     23                                              #    1.48  stalled cycles per insn
     24        484,357,498 branches                  #  283.455 M/sec
     25          6,388,934 branch-misses             #    1.32% of all branches
     26 
     27         0.154822978  seconds time elapsed
     28 
     29  *
     30  * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo (at) redhat.com>
     31  *
     32  * Improvements and fixes by:
     33  *
     34  *   Arjan van de Ven <arjan (at) linux.intel.com>
     35  *   Yanmin Zhang <yanmin.zhang (at) intel.com>
     36  *   Wu Fengguang <fengguang.wu (at) intel.com>
     37  *   Mike Galbraith <efault (at) gmx.de>
     38  *   Paul Mackerras <paulus (at) samba.org>
     39  *   Jaswinder Singh Rajput <jaswinder (at) kernel.org>
     40  *
     41  * Released under the GPL v2. (and only v2, not any later version)
     42  */
     43 
     44 #include "perf.h"
     45 #include "builtin.h"
     46 #include "util/util.h"
     47 #include "util/parse-options.h"
     48 #include "util/parse-events.h"
     49 #include "util/event.h"
     50 #include "util/evlist.h"
     51 #include "util/evsel.h"
     52 #include "util/debug.h"
     53 #include "util/color.h"
     54 #include "util/stat.h"
     55 #include "util/header.h"
     56 #include "util/cpumap.h"
     57 #include "util/thread.h"
     58 #include "util/thread_map.h"
     59 
     60 #include <stdlib.h>
     61 #include <sys/prctl.h>
     62 #include <locale.h>
     63 
     64 #define DEFAULT_SEPARATOR	" "
     65 #define CNTR_NOT_SUPPORTED	"<not supported>"
     66 #define CNTR_NOT_COUNTED	"<not counted>"
     67 
     68 static void print_stat(int argc, const char **argv);
     69 static void print_counter_aggr(struct perf_evsel *counter, char *prefix);
     70 static void print_counter(struct perf_evsel *counter, char *prefix);
     71 static void print_aggr(char *prefix);
     72 
     73 static struct perf_evlist	*evsel_list;
     74 
     75 static struct perf_target	target = {
     76 	.uid	= UINT_MAX,
     77 };
     78 
     79 enum aggr_mode {
     80 	AGGR_NONE,
     81 	AGGR_GLOBAL,
     82 	AGGR_SOCKET,
     83 	AGGR_CORE,
     84 };
     85 
     86 static int			run_count			=  1;
     87 static bool			no_inherit			= false;
     88 static bool			scale				=  true;
     89 static enum aggr_mode		aggr_mode			= AGGR_GLOBAL;
     90 static volatile pid_t		child_pid			= -1;
     91 static bool			null_run			=  false;
     92 static int			detailed_run			=  0;
     93 static bool			big_num				=  true;
     94 static int			big_num_opt			=  -1;
     95 static const char		*csv_sep			= NULL;
     96 static bool			csv_output			= false;
     97 static bool			group				= false;
     98 static FILE			*output				= NULL;
     99 static const char		*pre_cmd			= NULL;
    100 static const char		*post_cmd			= NULL;
    101 static bool			sync_run			= false;
    102 static unsigned int		interval			= 0;
    103 static unsigned int		initial_delay			= 0;
    104 static bool			forever				= false;
    105 static struct timespec		ref_time;
    106 static struct cpu_map		*aggr_map;
    107 static int			(*aggr_get_id)(struct cpu_map *m, int cpu);
    108 
    109 static volatile int done = 0;
    110 
    111 struct perf_stat {
    112 	struct stats	  res_stats[3];
    113 };
    114 
    115 static inline void diff_timespec(struct timespec *r, struct timespec *a,
    116 				 struct timespec *b)
    117 {
    118 	r->tv_sec = a->tv_sec - b->tv_sec;
    119 	if (a->tv_nsec < b->tv_nsec) {
    120 		r->tv_nsec = a->tv_nsec + 1000000000L - b->tv_nsec;
    121 		r->tv_sec--;
    122 	} else {
    123 		r->tv_nsec = a->tv_nsec - b->tv_nsec ;
    124 	}
    125 }
    126 
    127 static inline struct cpu_map *perf_evsel__cpus(struct perf_evsel *evsel)
    128 {
    129 	return (evsel->cpus && !target.cpu_list) ? evsel->cpus : evsel_list->cpus;
    130 }
    131 
    132 static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel)
    133 {
    134 	return perf_evsel__cpus(evsel)->nr;
    135 }
    136 
    137 static void perf_evsel__reset_stat_priv(struct perf_evsel *evsel)
    138 {
    139 	memset(evsel->priv, 0, sizeof(struct perf_stat));
    140 }
    141 
    142 static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
    143 {
    144 	evsel->priv = zalloc(sizeof(struct perf_stat));
    145 	return evsel->priv == NULL ? -ENOMEM : 0;
    146 }
    147 
    148 static void perf_evsel__free_stat_priv(struct perf_evsel *evsel)
    149 {
    150 	free(evsel->priv);
    151 	evsel->priv = NULL;
    152 }
    153 
    154 static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel)
    155 {
    156 	void *addr;
    157 	size_t sz;
    158 
    159 	sz = sizeof(*evsel->counts) +
    160 	     (perf_evsel__nr_cpus(evsel) * sizeof(struct perf_counts_values));
    161 
    162 	addr = zalloc(sz);
    163 	if (!addr)
    164 		return -ENOMEM;
    165 
    166 	evsel->prev_raw_counts =  addr;
    167 
    168 	return 0;
    169 }
    170 
    171 static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel)
    172 {
    173 	free(evsel->prev_raw_counts);
    174 	evsel->prev_raw_counts = NULL;
    175 }
    176 
    177 static void perf_evlist__free_stats(struct perf_evlist *evlist)
    178 {
    179 	struct perf_evsel *evsel;
    180 
    181 	list_for_each_entry(evsel, &evlist->entries, node) {
    182 		perf_evsel__free_stat_priv(evsel);
    183 		perf_evsel__free_counts(evsel);
    184 		perf_evsel__free_prev_raw_counts(evsel);
    185 	}
    186 }
    187 
    188 static int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw)
    189 {
    190 	struct perf_evsel *evsel;
    191 
    192 	list_for_each_entry(evsel, &evlist->entries, node) {
    193 		if (perf_evsel__alloc_stat_priv(evsel) < 0 ||
    194 		    perf_evsel__alloc_counts(evsel, perf_evsel__nr_cpus(evsel)) < 0 ||
    195 		    (alloc_raw && perf_evsel__alloc_prev_raw_counts(evsel) < 0))
    196 			goto out_free;
    197 	}
    198 
    199 	return 0;
    200 
    201 out_free:
    202 	perf_evlist__free_stats(evlist);
    203 	return -1;
    204 }
    205 
    206 static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
    207 static struct stats runtime_cycles_stats[MAX_NR_CPUS];
    208 static struct stats runtime_stalled_cycles_front_stats[MAX_NR_CPUS];
    209 static struct stats runtime_stalled_cycles_back_stats[MAX_NR_CPUS];
    210 static struct stats runtime_branches_stats[MAX_NR_CPUS];
    211 static struct stats runtime_cacherefs_stats[MAX_NR_CPUS];
    212 static struct stats runtime_l1_dcache_stats[MAX_NR_CPUS];
    213 static struct stats runtime_l1_icache_stats[MAX_NR_CPUS];
    214 static struct stats runtime_ll_cache_stats[MAX_NR_CPUS];
    215 static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS];
    216 static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS];
    217 static struct stats walltime_nsecs_stats;
    218 
    219 static void perf_stat__reset_stats(struct perf_evlist *evlist)
    220 {
    221 	struct perf_evsel *evsel;
    222 
    223 	list_for_each_entry(evsel, &evlist->entries, node) {
    224 		perf_evsel__reset_stat_priv(evsel);
    225 		perf_evsel__reset_counts(evsel, perf_evsel__nr_cpus(evsel));
    226 	}
    227 
    228 	memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats));
    229 	memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats));
    230 	memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats));
    231 	memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats));
    232 	memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats));
    233 	memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats));
    234 	memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats));
    235 	memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats));
    236 	memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats));
    237 	memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats));
    238 	memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats));
    239 	memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
    240 }
    241 
    242 static int create_perf_stat_counter(struct perf_evsel *evsel)
    243 {
    244 	struct perf_event_attr *attr = &evsel->attr;
    245 
    246 	if (scale)
    247 		attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
    248 				    PERF_FORMAT_TOTAL_TIME_RUNNING;
    249 
    250 	attr->inherit = !no_inherit;
    251 
    252 	if (perf_target__has_cpu(&target))
    253 		return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel));
    254 
    255 	if (!perf_target__has_task(&target) &&
    256 	    perf_evsel__is_group_leader(evsel)) {
    257 		attr->disabled = 1;
    258 		if (!initial_delay)
    259 			attr->enable_on_exec = 1;
    260 	}
    261 
    262 	return perf_evsel__open_per_thread(evsel, evsel_list->threads);
    263 }
    264 
    265 /*
    266  * Does the counter have nsecs as a unit?
    267  */
    268 static inline int nsec_counter(struct perf_evsel *evsel)
    269 {
    270 	if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) ||
    271 	    perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
    272 		return 1;
    273 
    274 	return 0;
    275 }
    276 
    277 /*
    278  * Update various tracking values we maintain to print
    279  * more semantic information such as miss/hit ratios,
    280  * instruction rates, etc:
    281  */
    282 static void update_shadow_stats(struct perf_evsel *counter, u64 *count)
    283 {
    284 	if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
    285 		update_stats(&runtime_nsecs_stats[0], count[0]);
    286 	else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
    287 		update_stats(&runtime_cycles_stats[0], count[0]);
    288 	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
    289 		update_stats(&runtime_stalled_cycles_front_stats[0], count[0]);
    290 	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
    291 		update_stats(&runtime_stalled_cycles_back_stats[0], count[0]);
    292 	else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
    293 		update_stats(&runtime_branches_stats[0], count[0]);
    294 	else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
    295 		update_stats(&runtime_cacherefs_stats[0], count[0]);
    296 	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
    297 		update_stats(&runtime_l1_dcache_stats[0], count[0]);
    298 	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
    299 		update_stats(&runtime_l1_icache_stats[0], count[0]);
    300 	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
    301 		update_stats(&runtime_ll_cache_stats[0], count[0]);
    302 	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
    303 		update_stats(&runtime_dtlb_cache_stats[0], count[0]);
    304 	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
    305 		update_stats(&runtime_itlb_cache_stats[0], count[0]);
    306 }
    307 
    308 /*
    309  * Read out the results of a single counter:
    310  * aggregate counts across CPUs in system-wide mode
    311  */
    312 static int read_counter_aggr(struct perf_evsel *counter)
    313 {
    314 	struct perf_stat *ps = counter->priv;
    315 	u64 *count = counter->counts->aggr.values;
    316 	int i;
    317 
    318 	if (__perf_evsel__read(counter, perf_evsel__nr_cpus(counter),
    319 			       thread_map__nr(evsel_list->threads), scale) < 0)
    320 		return -1;
    321 
    322 	for (i = 0; i < 3; i++)
    323 		update_stats(&ps->res_stats[i], count[i]);
    324 
    325 	if (verbose) {
    326 		fprintf(output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
    327 			perf_evsel__name(counter), count[0], count[1], count[2]);
    328 	}
    329 
    330 	/*
    331 	 * Save the full runtime - to allow normalization during printout:
    332 	 */
    333 	update_shadow_stats(counter, count);
    334 
    335 	return 0;
    336 }
    337 
    338 /*
    339  * Read out the results of a single counter:
    340  * do not aggregate counts across CPUs in system-wide mode
    341  */
    342 static int read_counter(struct perf_evsel *counter)
    343 {
    344 	u64 *count;
    345 	int cpu;
    346 
    347 	for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
    348 		if (__perf_evsel__read_on_cpu(counter, cpu, 0, scale) < 0)
    349 			return -1;
    350 
    351 		count = counter->counts->cpu[cpu].values;
    352 
    353 		update_shadow_stats(counter, count);
    354 	}
    355 
    356 	return 0;
    357 }
    358 
    359 static void print_interval(void)
    360 {
    361 #ifndef __APPLE__
    362 	static int num_print_interval;
    363 	struct perf_evsel *counter;
    364 	struct perf_stat *ps;
    365 	struct timespec ts, rs;
    366 	char prefix[64];
    367 
    368 	if (aggr_mode == AGGR_GLOBAL) {
    369 		list_for_each_entry(counter, &evsel_list->entries, node) {
    370 			ps = counter->priv;
    371 			memset(ps->res_stats, 0, sizeof(ps->res_stats));
    372 			read_counter_aggr(counter);
    373 		}
    374 	} else	{
    375 		list_for_each_entry(counter, &evsel_list->entries, node) {
    376 			ps = counter->priv;
    377 			memset(ps->res_stats, 0, sizeof(ps->res_stats));
    378 			read_counter(counter);
    379 		}
    380 	}
    381 
    382 	clock_gettime(CLOCK_MONOTONIC, &ts);
    383 	diff_timespec(&rs, &ts, &ref_time);
    384 	sprintf(prefix, "%6lu.%09lu%s", rs.tv_sec, rs.tv_nsec, csv_sep);
    385 
    386 	if (num_print_interval == 0 && !csv_output) {
    387 		switch (aggr_mode) {
    388 		case AGGR_SOCKET:
    389 			fprintf(output, "#           time socket cpus             counts events\n");
    390 			break;
    391 		case AGGR_CORE:
    392 			fprintf(output, "#           time core         cpus             counts events\n");
    393 			break;
    394 		case AGGR_NONE:
    395 			fprintf(output, "#           time CPU                 counts events\n");
    396 			break;
    397 		case AGGR_GLOBAL:
    398 		default:
    399 			fprintf(output, "#           time             counts events\n");
    400 		}
    401 	}
    402 
    403 	if (++num_print_interval == 25)
    404 		num_print_interval = 0;
    405 
    406 	switch (aggr_mode) {
    407 	case AGGR_CORE:
    408 	case AGGR_SOCKET:
    409 		print_aggr(prefix);
    410 		break;
    411 	case AGGR_NONE:
    412 		list_for_each_entry(counter, &evsel_list->entries, node)
    413 			print_counter(counter, prefix);
    414 		break;
    415 	case AGGR_GLOBAL:
    416 	default:
    417 		list_for_each_entry(counter, &evsel_list->entries, node)
    418 			print_counter_aggr(counter, prefix);
    419 	}
    420 
    421 	fflush(output);
    422 #else
    423 	perror("print_interval not supported on MacOS");
    424 #endif
    425 }
    426 
    427 static void handle_initial_delay(void)
    428 {
    429 	struct perf_evsel *counter;
    430 
    431 	if (initial_delay) {
    432 		const int ncpus = cpu_map__nr(evsel_list->cpus),
    433 			nthreads = thread_map__nr(evsel_list->threads);
    434 
    435 		usleep(initial_delay * 1000);
    436 		list_for_each_entry(counter, &evsel_list->entries, node)
    437 			perf_evsel__enable(counter, ncpus, nthreads);
    438 	}
    439 }
    440 
    441 static int __run_perf_stat(int argc, const char **argv)
    442 {
    443 #ifndef __APPLE__
    444 	char msg[512];
    445 	unsigned long long t0, t1;
    446 	struct perf_evsel *counter;
    447 	struct timespec ts;
    448 	int status = 0;
    449 	const bool forks = (argc > 0);
    450 
    451 	if (interval) {
    452 		ts.tv_sec  = interval / 1000;
    453 		ts.tv_nsec = (interval % 1000) * 1000000;
    454 	} else {
    455 		ts.tv_sec  = 1;
    456 		ts.tv_nsec = 0;
    457 	}
    458 
    459 	if (forks) {
    460 		if (perf_evlist__prepare_workload(evsel_list, &target, argv,
    461 						  false, false) < 0) {
    462 			perror("failed to prepare workload");
    463 			return -1;
    464 		}
    465 		child_pid = evsel_list->workload.pid;
    466 	}
    467 
    468 	if (group)
    469 		perf_evlist__set_leader(evsel_list);
    470 
    471 	list_for_each_entry(counter, &evsel_list->entries, node) {
    472 		if (create_perf_stat_counter(counter) < 0) {
    473 			/*
    474 			 * PPC returns ENXIO for HW counters until 2.6.37
    475 			 * (behavior changed with commit b0a873e).
    476 			 */
    477 			if (errno == EINVAL || errno == ENOSYS ||
    478 			    errno == ENOENT || errno == EOPNOTSUPP ||
    479 			    errno == ENXIO) {
    480 				if (verbose)
    481 					ui__warning("%s event is not supported by the kernel.\n",
    482 						    perf_evsel__name(counter));
    483 				counter->supported = false;
    484 				continue;
    485 			}
    486 
    487 			perf_evsel__open_strerror(counter, &target,
    488 						  errno, msg, sizeof(msg));
    489 			ui__error("%s\n", msg);
    490 
    491 			if (child_pid != -1)
    492 				kill(child_pid, SIGTERM);
    493 
    494 			return -1;
    495 		}
    496 		counter->supported = true;
    497 	}
    498 
    499 	if (perf_evlist__apply_filters(evsel_list)) {
    500 		error("failed to set filter with %d (%s)\n", errno,
    501 			strerror(errno));
    502 		return -1;
    503 	}
    504 
    505 	/*
    506 	 * Enable counters and exec the command:
    507 	 */
    508 	t0 = rdclock();
    509 	clock_gettime(CLOCK_MONOTONIC, &ref_time);
    510 
    511 	if (forks) {
    512 		perf_evlist__start_workload(evsel_list);
    513 		handle_initial_delay();
    514 
    515 		if (interval) {
    516 			while (!waitpid(child_pid, &status, WNOHANG)) {
    517 				nanosleep(&ts, NULL);
    518 				print_interval();
    519 			}
    520 		}
    521 		wait(&status);
    522 		if (WIFSIGNALED(status))
    523 			psignal(WTERMSIG(status), argv[0]);
    524 	} else {
    525 		handle_initial_delay();
    526 		while (!done) {
    527 			nanosleep(&ts, NULL);
    528 			if (interval)
    529 				print_interval();
    530 		}
    531 	}
    532 
    533 	t1 = rdclock();
    534 
    535 	update_stats(&walltime_nsecs_stats, t1 - t0);
    536 
    537 	if (aggr_mode == AGGR_GLOBAL) {
    538 		list_for_each_entry(counter, &evsel_list->entries, node) {
    539 			read_counter_aggr(counter);
    540 			perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter),
    541 					     thread_map__nr(evsel_list->threads));
    542 		}
    543 	} else {
    544 		list_for_each_entry(counter, &evsel_list->entries, node) {
    545 			read_counter(counter);
    546 			perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 1);
    547 		}
    548 	}
    549 
    550 	return WEXITSTATUS(status);
    551 #else
    552 	return -1;
    553 #endif
    554 }
    555 
    556 static int run_perf_stat(int argc __maybe_unused, const char **argv)
    557 {
    558 	int ret;
    559 
    560 	if (pre_cmd) {
    561 		ret = system(pre_cmd);
    562 		if (ret)
    563 			return ret;
    564 	}
    565 
    566 	if (sync_run)
    567 		sync();
    568 
    569 	ret = __run_perf_stat(argc, argv);
    570 	if (ret)
    571 		return ret;
    572 
    573 	if (post_cmd) {
    574 		ret = system(post_cmd);
    575 		if (ret)
    576 			return ret;
    577 	}
    578 
    579 	return ret;
    580 }
    581 
    582 static void print_noise_pct(double total, double avg)
    583 {
    584 	double pct = rel_stddev_stats(total, avg);
    585 
    586 	if (csv_output)
    587 		fprintf(output, "%s%.2f%%", csv_sep, pct);
    588 	else if (pct)
    589 		fprintf(output, "  ( +-%6.2f%% )", pct);
    590 }
    591 
    592 static void print_noise(struct perf_evsel *evsel, double avg)
    593 {
    594 	struct perf_stat *ps;
    595 
    596 	if (run_count == 1)
    597 		return;
    598 
    599 	ps = evsel->priv;
    600 	print_noise_pct(stddev_stats(&ps->res_stats[0]), avg);
    601 }
    602 
    603 static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
    604 {
    605 	switch (aggr_mode) {
    606 	case AGGR_CORE:
    607 		fprintf(output, "S%d-C%*d%s%*d%s",
    608 			cpu_map__id_to_socket(id),
    609 			csv_output ? 0 : -8,
    610 			cpu_map__id_to_cpu(id),
    611 			csv_sep,
    612 			csv_output ? 0 : 4,
    613 			nr,
    614 			csv_sep);
    615 		break;
    616 	case AGGR_SOCKET:
    617 		fprintf(output, "S%*d%s%*d%s",
    618 			csv_output ? 0 : -5,
    619 			id,
    620 			csv_sep,
    621 			csv_output ? 0 : 4,
    622 			nr,
    623 			csv_sep);
    624 			break;
    625 	case AGGR_NONE:
    626 		fprintf(output, "CPU%*d%s",
    627 			csv_output ? 0 : -4,
    628 			perf_evsel__cpus(evsel)->map[id], csv_sep);
    629 		break;
    630 	case AGGR_GLOBAL:
    631 	default:
    632 		break;
    633 	}
    634 }
    635 
    636 static void nsec_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
    637 {
    638 	double msecs = avg / 1e6;
    639 	const char *fmt = csv_output ? "%.6f%s%s" : "%18.6f%s%-25s";
    640 
    641 	aggr_printout(evsel, cpu, nr);
    642 
    643 	fprintf(output, fmt, msecs, csv_sep, perf_evsel__name(evsel));
    644 
    645 	if (evsel->cgrp)
    646 		fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
    647 
    648 	if (csv_output || interval)
    649 		return;
    650 
    651 	if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
    652 		fprintf(output, " # %8.3f CPUs utilized          ",
    653 			avg / avg_stats(&walltime_nsecs_stats));
    654 	else
    655 		fprintf(output, "                                   ");
    656 }
    657 
    658 /* used for get_ratio_color() */
    659 enum grc_type {
    660 	GRC_STALLED_CYCLES_FE,
    661 	GRC_STALLED_CYCLES_BE,
    662 	GRC_CACHE_MISSES,
    663 	GRC_MAX_NR
    664 };
    665 
    666 static const char *get_ratio_color(enum grc_type type, double ratio)
    667 {
    668 	static const double grc_table[GRC_MAX_NR][3] = {
    669 		[GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
    670 		[GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
    671 		[GRC_CACHE_MISSES] 	= { 20.0, 10.0, 5.0 },
    672 	};
    673 	const char *color = PERF_COLOR_NORMAL;
    674 
    675 	if (ratio > grc_table[type][0])
    676 		color = PERF_COLOR_RED;
    677 	else if (ratio > grc_table[type][1])
    678 		color = PERF_COLOR_MAGENTA;
    679 	else if (ratio > grc_table[type][2])
    680 		color = PERF_COLOR_YELLOW;
    681 
    682 	return color;
    683 }
    684 
    685 static void print_stalled_cycles_frontend(int cpu,
    686 					  struct perf_evsel *evsel
    687 					  __maybe_unused, double avg)
    688 {
    689 	double total, ratio = 0.0;
    690 	const char *color;
    691 
    692 	total = avg_stats(&runtime_cycles_stats[cpu]);
    693 
    694 	if (total)
    695 		ratio = avg / total * 100.0;
    696 
    697 	color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
    698 
    699 	fprintf(output, " #  ");
    700 	color_fprintf(output, color, "%6.2f%%", ratio);
    701 	fprintf(output, " frontend cycles idle   ");
    702 }
    703 
    704 static void print_stalled_cycles_backend(int cpu,
    705 					 struct perf_evsel *evsel
    706 					 __maybe_unused, double avg)
    707 {
    708 	double total, ratio = 0.0;
    709 	const char *color;
    710 
    711 	total = avg_stats(&runtime_cycles_stats[cpu]);
    712 
    713 	if (total)
    714 		ratio = avg / total * 100.0;
    715 
    716 	color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
    717 
    718 	fprintf(output, " #  ");
    719 	color_fprintf(output, color, "%6.2f%%", ratio);
    720 	fprintf(output, " backend  cycles idle   ");
    721 }
    722 
    723 static void print_branch_misses(int cpu,
    724 				struct perf_evsel *evsel __maybe_unused,
    725 				double avg)
    726 {
    727 	double total, ratio = 0.0;
    728 	const char *color;
    729 
    730 	total = avg_stats(&runtime_branches_stats[cpu]);
    731 
    732 	if (total)
    733 		ratio = avg / total * 100.0;
    734 
    735 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
    736 
    737 	fprintf(output, " #  ");
    738 	color_fprintf(output, color, "%6.2f%%", ratio);
    739 	fprintf(output, " of all branches        ");
    740 }
    741 
    742 static void print_l1_dcache_misses(int cpu,
    743 				   struct perf_evsel *evsel __maybe_unused,
    744 				   double avg)
    745 {
    746 	double total, ratio = 0.0;
    747 	const char *color;
    748 
    749 	total = avg_stats(&runtime_l1_dcache_stats[cpu]);
    750 
    751 	if (total)
    752 		ratio = avg / total * 100.0;
    753 
    754 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
    755 
    756 	fprintf(output, " #  ");
    757 	color_fprintf(output, color, "%6.2f%%", ratio);
    758 	fprintf(output, " of all L1-dcache hits  ");
    759 }
    760 
    761 static void print_l1_icache_misses(int cpu,
    762 				   struct perf_evsel *evsel __maybe_unused,
    763 				   double avg)
    764 {
    765 	double total, ratio = 0.0;
    766 	const char *color;
    767 
    768 	total = avg_stats(&runtime_l1_icache_stats[cpu]);
    769 
    770 	if (total)
    771 		ratio = avg / total * 100.0;
    772 
    773 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
    774 
    775 	fprintf(output, " #  ");
    776 	color_fprintf(output, color, "%6.2f%%", ratio);
    777 	fprintf(output, " of all L1-icache hits  ");
    778 }
    779 
    780 static void print_dtlb_cache_misses(int cpu,
    781 				    struct perf_evsel *evsel __maybe_unused,
    782 				    double avg)
    783 {
    784 	double total, ratio = 0.0;
    785 	const char *color;
    786 
    787 	total = avg_stats(&runtime_dtlb_cache_stats[cpu]);
    788 
    789 	if (total)
    790 		ratio = avg / total * 100.0;
    791 
    792 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
    793 
    794 	fprintf(output, " #  ");
    795 	color_fprintf(output, color, "%6.2f%%", ratio);
    796 	fprintf(output, " of all dTLB cache hits ");
    797 }
    798 
    799 static void print_itlb_cache_misses(int cpu,
    800 				    struct perf_evsel *evsel __maybe_unused,
    801 				    double avg)
    802 {
    803 	double total, ratio = 0.0;
    804 	const char *color;
    805 
    806 	total = avg_stats(&runtime_itlb_cache_stats[cpu]);
    807 
    808 	if (total)
    809 		ratio = avg / total * 100.0;
    810 
    811 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
    812 
    813 	fprintf(output, " #  ");
    814 	color_fprintf(output, color, "%6.2f%%", ratio);
    815 	fprintf(output, " of all iTLB cache hits ");
    816 }
    817 
    818 static void print_ll_cache_misses(int cpu,
    819 				  struct perf_evsel *evsel __maybe_unused,
    820 				  double avg)
    821 {
    822 	double total, ratio = 0.0;
    823 	const char *color;
    824 
    825 	total = avg_stats(&runtime_ll_cache_stats[cpu]);
    826 
    827 	if (total)
    828 		ratio = avg / total * 100.0;
    829 
    830 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
    831 
    832 	fprintf(output, " #  ");
    833 	color_fprintf(output, color, "%6.2f%%", ratio);
    834 	fprintf(output, " of all LL-cache hits   ");
    835 }
    836 
    837 static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
    838 {
    839 	double total, ratio = 0.0;
    840 	const char *fmt;
    841 
    842 	if (csv_output)
    843 		fmt = "%.0f%s%s";
    844 	else if (big_num)
    845 		fmt = "%'18.0f%s%-25s";
    846 	else
    847 		fmt = "%18.0f%s%-25s";
    848 
    849 	aggr_printout(evsel, cpu, nr);
    850 
    851 	if (aggr_mode == AGGR_GLOBAL)
    852 		cpu = 0;
    853 
    854 	fprintf(output, fmt, avg, csv_sep, perf_evsel__name(evsel));
    855 
    856 	if (evsel->cgrp)
    857 		fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
    858 
    859 	if (csv_output || interval)
    860 		return;
    861 
    862 	if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
    863 		total = avg_stats(&runtime_cycles_stats[cpu]);
    864 		if (total)
    865 			ratio = avg / total;
    866 
    867 		fprintf(output, " #   %5.2f  insns per cycle        ", ratio);
    868 
    869 		total = avg_stats(&runtime_stalled_cycles_front_stats[cpu]);
    870 		total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[cpu]));
    871 
    872 		if (total && avg) {
    873 			ratio = total / avg;
    874 			fprintf(output, "\n                                             #   %5.2f  stalled cycles per insn", ratio);
    875 		}
    876 
    877 	} else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
    878 			runtime_branches_stats[cpu].n != 0) {
    879 		print_branch_misses(cpu, evsel, avg);
    880 	} else if (
    881 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
    882 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1D |
    883 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
    884 					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
    885 			runtime_l1_dcache_stats[cpu].n != 0) {
    886 		print_l1_dcache_misses(cpu, evsel, avg);
    887 	} else if (
    888 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
    889 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1I |
    890 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
    891 					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
    892 			runtime_l1_icache_stats[cpu].n != 0) {
    893 		print_l1_icache_misses(cpu, evsel, avg);
    894 	} else if (
    895 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
    896 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_DTLB |
    897 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
    898 					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
    899 			runtime_dtlb_cache_stats[cpu].n != 0) {
    900 		print_dtlb_cache_misses(cpu, evsel, avg);
    901 	} else if (
    902 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
    903 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_ITLB |
    904 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
    905 					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
    906 			runtime_itlb_cache_stats[cpu].n != 0) {
    907 		print_itlb_cache_misses(cpu, evsel, avg);
    908 	} else if (
    909 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
    910 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_LL |
    911 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
    912 					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
    913 			runtime_ll_cache_stats[cpu].n != 0) {
    914 		print_ll_cache_misses(cpu, evsel, avg);
    915 	} else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
    916 			runtime_cacherefs_stats[cpu].n != 0) {
    917 		total = avg_stats(&runtime_cacherefs_stats[cpu]);
    918 
    919 		if (total)
    920 			ratio = avg * 100 / total;
    921 
    922 		fprintf(output, " # %8.3f %% of all cache refs    ", ratio);
    923 
    924 	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
    925 		print_stalled_cycles_frontend(cpu, evsel, avg);
    926 	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
    927 		print_stalled_cycles_backend(cpu, evsel, avg);
    928 	} else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
    929 		total = avg_stats(&runtime_nsecs_stats[cpu]);
    930 
    931 		if (total)
    932 			ratio = 1.0 * avg / total;
    933 
    934 		fprintf(output, " # %8.3f GHz                    ", ratio);
    935 	} else if (runtime_nsecs_stats[cpu].n != 0) {
    936 		char unit = 'M';
    937 
    938 		total = avg_stats(&runtime_nsecs_stats[cpu]);
    939 
    940 		if (total)
    941 			ratio = 1000.0 * avg / total;
    942 		if (ratio < 0.001) {
    943 			ratio *= 1000;
    944 			unit = 'K';
    945 		}
    946 
    947 		fprintf(output, " # %8.3f %c/sec                  ", ratio, unit);
    948 	} else {
    949 		fprintf(output, "                                   ");
    950 	}
    951 }
    952 
    953 static void print_aggr(char *prefix)
    954 {
    955 	struct perf_evsel *counter;
    956 	int cpu, cpu2, s, s2, id, nr;
    957 	u64 ena, run, val;
    958 
    959 	if (!(aggr_map || aggr_get_id))
    960 		return;
    961 
    962 	for (s = 0; s < aggr_map->nr; s++) {
    963 		id = aggr_map->map[s];
    964 		list_for_each_entry(counter, &evsel_list->entries, node) {
    965 			val = ena = run = 0;
    966 			nr = 0;
    967 			for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
    968 				cpu2 = perf_evsel__cpus(counter)->map[cpu];
    969 				s2 = aggr_get_id(evsel_list->cpus, cpu2);
    970 				if (s2 != id)
    971 					continue;
    972 				val += counter->counts->cpu[cpu].val;
    973 				ena += counter->counts->cpu[cpu].ena;
    974 				run += counter->counts->cpu[cpu].run;
    975 				nr++;
    976 			}
    977 			if (prefix)
    978 				fprintf(output, "%s", prefix);
    979 
    980 			if (run == 0 || ena == 0) {
    981 				aggr_printout(counter, id, nr);
    982 
    983 				fprintf(output, "%*s%s%*s",
    984 					csv_output ? 0 : 18,
    985 					counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
    986 					csv_sep,
    987 					csv_output ? 0 : -24,
    988 					perf_evsel__name(counter));
    989 
    990 				if (counter->cgrp)
    991 					fprintf(output, "%s%s",
    992 						csv_sep, counter->cgrp->name);
    993 
    994 				fputc('\n', output);
    995 				continue;
    996 			}
    997 
    998 			if (nsec_counter(counter))
    999 				nsec_printout(id, nr, counter, val);
   1000 			else
   1001 				abs_printout(id, nr, counter, val);
   1002 
   1003 			if (!csv_output) {
   1004 				print_noise(counter, 1.0);
   1005 
   1006 				if (run != ena)
   1007 					fprintf(output, "  (%.2f%%)",
   1008 						100.0 * run / ena);
   1009 			}
   1010 			fputc('\n', output);
   1011 		}
   1012 	}
   1013 }
   1014 
   1015 /*
   1016  * Print out the results of a single counter:
   1017  * aggregated counts in system-wide mode
   1018  */
   1019 static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
   1020 {
   1021 	struct perf_stat *ps = counter->priv;
   1022 	double avg = avg_stats(&ps->res_stats[0]);
   1023 	int scaled = counter->counts->scaled;
   1024 
   1025 	if (prefix)
   1026 		fprintf(output, "%s", prefix);
   1027 
   1028 	if (scaled == -1) {
   1029 		fprintf(output, "%*s%s%*s",
   1030 			csv_output ? 0 : 18,
   1031 			counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
   1032 			csv_sep,
   1033 			csv_output ? 0 : -24,
   1034 			perf_evsel__name(counter));
   1035 
   1036 		if (counter->cgrp)
   1037 			fprintf(output, "%s%s", csv_sep, counter->cgrp->name);
   1038 
   1039 		fputc('\n', output);
   1040 		return;
   1041 	}
   1042 
   1043 	if (nsec_counter(counter))
   1044 		nsec_printout(-1, 0, counter, avg);
   1045 	else
   1046 		abs_printout(-1, 0, counter, avg);
   1047 
   1048 	print_noise(counter, avg);
   1049 
   1050 	if (csv_output) {
   1051 		fputc('\n', output);
   1052 		return;
   1053 	}
   1054 
   1055 	if (scaled) {
   1056 		double avg_enabled, avg_running;
   1057 
   1058 		avg_enabled = avg_stats(&ps->res_stats[1]);
   1059 		avg_running = avg_stats(&ps->res_stats[2]);
   1060 
   1061 		fprintf(output, " [%5.2f%%]", 100 * avg_running / avg_enabled);
   1062 	}
   1063 	fprintf(output, "\n");
   1064 }
   1065 
   1066 /*
   1067  * Print out the results of a single counter:
   1068  * does not use aggregated count in system-wide
   1069  */
   1070 static void print_counter(struct perf_evsel *counter, char *prefix)
   1071 {
   1072 	u64 ena, run, val;
   1073 	int cpu;
   1074 
   1075 	for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
   1076 		val = counter->counts->cpu[cpu].val;
   1077 		ena = counter->counts->cpu[cpu].ena;
   1078 		run = counter->counts->cpu[cpu].run;
   1079 
   1080 		if (prefix)
   1081 			fprintf(output, "%s", prefix);
   1082 
   1083 		if (run == 0 || ena == 0) {
   1084 			fprintf(output, "CPU%*d%s%*s%s%*s",
   1085 				csv_output ? 0 : -4,
   1086 				perf_evsel__cpus(counter)->map[cpu], csv_sep,
   1087 				csv_output ? 0 : 18,
   1088 				counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
   1089 				csv_sep,
   1090 				csv_output ? 0 : -24,
   1091 				perf_evsel__name(counter));
   1092 
   1093 			if (counter->cgrp)
   1094 				fprintf(output, "%s%s",
   1095 					csv_sep, counter->cgrp->name);
   1096 
   1097 			fputc('\n', output);
   1098 			continue;
   1099 		}
   1100 
   1101 		if (nsec_counter(counter))
   1102 			nsec_printout(cpu, 0, counter, val);
   1103 		else
   1104 			abs_printout(cpu, 0, counter, val);
   1105 
   1106 		if (!csv_output) {
   1107 			print_noise(counter, 1.0);
   1108 
   1109 			if (run != ena)
   1110 				fprintf(output, "  (%.2f%%)",
   1111 					100.0 * run / ena);
   1112 		}
   1113 		fputc('\n', output);
   1114 	}
   1115 }
   1116 
   1117 static void print_stat(int argc, const char **argv)
   1118 {
   1119 	struct perf_evsel *counter;
   1120 	int i;
   1121 
   1122 	fflush(stdout);
   1123 
   1124 	if (!csv_output) {
   1125 		fprintf(output, "\n");
   1126 		fprintf(output, " Performance counter stats for ");
   1127 		if (!perf_target__has_task(&target)) {
   1128 			fprintf(output, "\'%s", argv[0]);
   1129 			for (i = 1; i < argc; i++)
   1130 				fprintf(output, " %s", argv[i]);
   1131 		} else if (target.pid)
   1132 			fprintf(output, "process id \'%s", target.pid);
   1133 		else
   1134 			fprintf(output, "thread id \'%s", target.tid);
   1135 
   1136 		fprintf(output, "\'");
   1137 		if (run_count > 1)
   1138 			fprintf(output, " (%d runs)", run_count);
   1139 		fprintf(output, ":\n\n");
   1140 	}
   1141 
   1142 	switch (aggr_mode) {
   1143 	case AGGR_CORE:
   1144 	case AGGR_SOCKET:
   1145 		print_aggr(NULL);
   1146 		break;
   1147 	case AGGR_GLOBAL:
   1148 		list_for_each_entry(counter, &evsel_list->entries, node)
   1149 			print_counter_aggr(counter, NULL);
   1150 		break;
   1151 	case AGGR_NONE:
   1152 		list_for_each_entry(counter, &evsel_list->entries, node)
   1153 			print_counter(counter, NULL);
   1154 		break;
   1155 	default:
   1156 		break;
   1157 	}
   1158 
   1159 	if (!csv_output) {
   1160 		if (!null_run)
   1161 			fprintf(output, "\n");
   1162 		fprintf(output, " %17.9f seconds time elapsed",
   1163 				avg_stats(&walltime_nsecs_stats)/1e9);
   1164 		if (run_count > 1) {
   1165 			fprintf(output, "                                        ");
   1166 			print_noise_pct(stddev_stats(&walltime_nsecs_stats),
   1167 					avg_stats(&walltime_nsecs_stats));
   1168 		}
   1169 		fprintf(output, "\n\n");
   1170 	}
   1171 }
   1172 
   1173 static volatile int signr = -1;
   1174 
   1175 static void skip_signal(int signo)
   1176 {
   1177 	if ((child_pid == -1) || interval)
   1178 		done = 1;
   1179 
   1180 	signr = signo;
   1181 	/*
   1182 	 * render child_pid harmless
   1183 	 * won't send SIGTERM to a random
   1184 	 * process in case of race condition
   1185 	 * and fast PID recycling
   1186 	 */
   1187 	child_pid = -1;
   1188 }
   1189 
   1190 static void sig_atexit(void)
   1191 {
   1192 	sigset_t set, oset;
   1193 
   1194 	/*
   1195 	 * avoid race condition with SIGCHLD handler
   1196 	 * in skip_signal() which is modifying child_pid
   1197 	 * goal is to avoid send SIGTERM to a random
   1198 	 * process
   1199 	 */
   1200 	sigemptyset(&set);
   1201 	sigaddset(&set, SIGCHLD);
   1202 	sigprocmask(SIG_BLOCK, &set, &oset);
   1203 
   1204 	if (child_pid != -1)
   1205 		kill(child_pid, SIGTERM);
   1206 
   1207 	sigprocmask(SIG_SETMASK, &oset, NULL);
   1208 
   1209 	if (signr == -1)
   1210 		return;
   1211 
   1212 	signal(signr, SIG_DFL);
   1213 	kill(getpid(), signr);
   1214 }
   1215 
   1216 static int stat__set_big_num(const struct option *opt __maybe_unused,
   1217 			     const char *s __maybe_unused, int unset)
   1218 {
   1219 	big_num_opt = unset ? 0 : 1;
   1220 	return 0;
   1221 }
   1222 
   1223 static int perf_stat_init_aggr_mode(void)
   1224 {
   1225 	switch (aggr_mode) {
   1226 	case AGGR_SOCKET:
   1227 		if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) {
   1228 			perror("cannot build socket map");
   1229 			return -1;
   1230 		}
   1231 		aggr_get_id = cpu_map__get_socket;
   1232 		break;
   1233 	case AGGR_CORE:
   1234 		if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) {
   1235 			perror("cannot build core map");
   1236 			return -1;
   1237 		}
   1238 		aggr_get_id = cpu_map__get_core;
   1239 		break;
   1240 	case AGGR_NONE:
   1241 	case AGGR_GLOBAL:
   1242 	default:
   1243 		break;
   1244 	}
   1245 	return 0;
   1246 }
   1247 
   1248 
   1249 /*
   1250  * Add default attributes, if there were no attributes specified or
   1251  * if -d/--detailed, -d -d or -d -d -d is used:
   1252  */
   1253 static int add_default_attributes(void)
   1254 {
   1255 	struct perf_event_attr default_attrs[] = {
   1256 
   1257   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK		},
   1258   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES	},
   1259   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS		},
   1260   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS		},
   1261 
   1262   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES		},
   1263   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND	},
   1264   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND	},
   1265   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS		},
   1266   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS	},
   1267   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES		},
   1268 
   1269 };
   1270 
   1271 /*
   1272  * Detailed stats (-d), covering the L1 and last level data caches:
   1273  */
   1274 	struct perf_event_attr detailed_attrs[] = {
   1275 
   1276   { .type = PERF_TYPE_HW_CACHE,
   1277     .config =
   1278 	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
   1279 	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
   1280 	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
   1281 
   1282   { .type = PERF_TYPE_HW_CACHE,
   1283     .config =
   1284 	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
   1285 	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
   1286 	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
   1287 
   1288   { .type = PERF_TYPE_HW_CACHE,
   1289     .config =
   1290 	 PERF_COUNT_HW_CACHE_LL			<<  0  |
   1291 	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
   1292 	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
   1293 
   1294   { .type = PERF_TYPE_HW_CACHE,
   1295     .config =
   1296 	 PERF_COUNT_HW_CACHE_LL			<<  0  |
   1297 	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
   1298 	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
   1299 };
   1300 
   1301 /*
   1302  * Very detailed stats (-d -d), covering the instruction cache and the TLB caches:
   1303  */
   1304 	struct perf_event_attr very_detailed_attrs[] = {
   1305 
   1306   { .type = PERF_TYPE_HW_CACHE,
   1307     .config =
   1308 	 PERF_COUNT_HW_CACHE_L1I		<<  0  |
   1309 	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
   1310 	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
   1311 
   1312   { .type = PERF_TYPE_HW_CACHE,
   1313     .config =
   1314 	 PERF_COUNT_HW_CACHE_L1I		<<  0  |
   1315 	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
   1316 	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
   1317 
   1318   { .type = PERF_TYPE_HW_CACHE,
   1319     .config =
   1320 	 PERF_COUNT_HW_CACHE_DTLB		<<  0  |
   1321 	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
   1322 	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
   1323 
   1324   { .type = PERF_TYPE_HW_CACHE,
   1325     .config =
   1326 	 PERF_COUNT_HW_CACHE_DTLB		<<  0  |
   1327 	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
   1328 	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
   1329 
   1330   { .type = PERF_TYPE_HW_CACHE,
   1331     .config =
   1332 	 PERF_COUNT_HW_CACHE_ITLB		<<  0  |
   1333 	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
   1334 	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
   1335 
   1336   { .type = PERF_TYPE_HW_CACHE,
   1337     .config =
   1338 	 PERF_COUNT_HW_CACHE_ITLB		<<  0  |
   1339 	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
   1340 	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
   1341 
   1342 };
   1343 
   1344 /*
   1345  * Very, very detailed stats (-d -d -d), adding prefetch events:
   1346  */
   1347 	struct perf_event_attr very_very_detailed_attrs[] = {
   1348 
   1349   { .type = PERF_TYPE_HW_CACHE,
   1350     .config =
   1351 	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
   1352 	(PERF_COUNT_HW_CACHE_OP_PREFETCH	<<  8) |
   1353 	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
   1354 
   1355   { .type = PERF_TYPE_HW_CACHE,
   1356     .config =
   1357 	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
   1358 	(PERF_COUNT_HW_CACHE_OP_PREFETCH	<<  8) |
   1359 	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
   1360 };
   1361 
   1362 	/* Set attrs if no event is selected and !null_run: */
   1363 	if (null_run)
   1364 		return 0;
   1365 
   1366 	if (!evsel_list->nr_entries) {
   1367 		if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0)
   1368 			return -1;
   1369 	}
   1370 
   1371 	/* Detailed events get appended to the event list: */
   1372 
   1373 	if (detailed_run <  1)
   1374 		return 0;
   1375 
   1376 	/* Append detailed run extra attributes: */
   1377 	if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0)
   1378 		return -1;
   1379 
   1380 	if (detailed_run < 2)
   1381 		return 0;
   1382 
   1383 	/* Append very detailed run extra attributes: */
   1384 	if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0)
   1385 		return -1;
   1386 
   1387 	if (detailed_run < 3)
   1388 		return 0;
   1389 
   1390 	/* Append very, very detailed run extra attributes: */
   1391 	return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs);
   1392 }
   1393 
   1394 int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
   1395 {
   1396 #ifndef __APPLE__
   1397 	bool append_file = false;
   1398 	int output_fd = 0;
   1399 	const char *output_name	= NULL;
   1400 	const struct option options[] = {
   1401 	OPT_CALLBACK('e', "event", &evsel_list, "event",
   1402 		     "event selector. use 'perf list' to list available events",
   1403 		     parse_events_option),
   1404 	OPT_CALLBACK(0, "filter", &evsel_list, "filter",
   1405 		     "event filter", parse_filter),
   1406 	OPT_BOOLEAN('i', "no-inherit", &no_inherit,
   1407 		    "child tasks do not inherit counters"),
   1408 	OPT_STRING('p', "pid", &target.pid, "pid",
   1409 		   "stat events on existing process id"),
   1410 	OPT_STRING('t', "tid", &target.tid, "tid",
   1411 		   "stat events on existing thread id"),
   1412 	OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
   1413 		    "system-wide collection from all CPUs"),
   1414 	OPT_BOOLEAN('g', "group", &group,
   1415 		    "put the counters into a counter group"),
   1416 	OPT_BOOLEAN('c', "scale", &scale, "scale/normalize counters"),
   1417 	OPT_INCR('v', "verbose", &verbose,
   1418 		    "be more verbose (show counter open errors, etc)"),
   1419 	OPT_INTEGER('r', "repeat", &run_count,
   1420 		    "repeat command and print average + stddev (max: 100, forever: 0)"),
   1421 	OPT_BOOLEAN('n', "null", &null_run,
   1422 		    "null run - dont start any counters"),
   1423 	OPT_INCR('d', "detailed", &detailed_run,
   1424 		    "detailed run - start a lot of events"),
   1425 	OPT_BOOLEAN('S', "sync", &sync_run,
   1426 		    "call sync() before starting a run"),
   1427 	OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL,
   1428 			   "print large numbers with thousands\' separators",
   1429 			   stat__set_big_num),
   1430 	OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
   1431 		    "list of cpus to monitor in system-wide"),
   1432 	OPT_SET_UINT('A', "no-aggr", &aggr_mode,
   1433 		    "disable CPU count aggregation", AGGR_NONE),
   1434 	OPT_STRING('x', "field-separator", &csv_sep, "separator",
   1435 		   "print counts with custom separator"),
   1436 	OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
   1437 		     "monitor event in cgroup name only", parse_cgroups),
   1438 	OPT_STRING('o', "output", &output_name, "file", "output file name"),
   1439 	OPT_BOOLEAN(0, "append", &append_file, "append to the output file"),
   1440 	OPT_INTEGER(0, "log-fd", &output_fd,
   1441 		    "log output to fd, instead of stderr"),
   1442 	OPT_STRING(0, "pre", &pre_cmd, "command",
   1443 			"command to run prior to the measured command"),
   1444 	OPT_STRING(0, "post", &post_cmd, "command",
   1445 			"command to run after to the measured command"),
   1446 	OPT_UINTEGER('I', "interval-print", &interval,
   1447 		    "print counts at regular interval in ms (>= 100)"),
   1448 	OPT_SET_UINT(0, "per-socket", &aggr_mode,
   1449 		     "aggregate counts per processor socket", AGGR_SOCKET),
   1450 	OPT_SET_UINT(0, "per-core", &aggr_mode,
   1451 		     "aggregate counts per physical processor core", AGGR_CORE),
   1452 	OPT_UINTEGER('D', "delay", &initial_delay,
   1453 		     "ms to wait before starting measurement after program start"),
   1454 	OPT_END()
   1455 	};
   1456 	const char * const stat_usage[] = {
   1457 		"perf stat [<options>] [<command>]",
   1458 		NULL
   1459 	};
   1460 	int status = -ENOMEM, run_idx;
   1461 	const char *mode;
   1462 
   1463 	setlocale(LC_ALL, "");
   1464 
   1465 	evsel_list = perf_evlist__new();
   1466 	if (evsel_list == NULL)
   1467 		return -ENOMEM;
   1468 
   1469 	argc = parse_options(argc, argv, options, stat_usage,
   1470 		PARSE_OPT_STOP_AT_NON_OPTION);
   1471 
   1472 	output = stderr;
   1473 	if (output_name && strcmp(output_name, "-"))
   1474 		output = NULL;
   1475 
   1476 	if (output_name && output_fd) {
   1477 		fprintf(stderr, "cannot use both --output and --log-fd\n");
   1478 		usage_with_options(stat_usage, options);
   1479 	}
   1480 
   1481 	if (output_fd < 0) {
   1482 		fprintf(stderr, "argument to --log-fd must be a > 0\n");
   1483 		usage_with_options(stat_usage, options);
   1484 	}
   1485 
   1486 	if (!output) {
   1487 		struct timespec tm;
   1488 		mode = append_file ? "a" : "w";
   1489 
   1490 		output = fopen(output_name, mode);
   1491 		if (!output) {
   1492 			perror("failed to create output file");
   1493 			return -1;
   1494 		}
   1495 		clock_gettime(CLOCK_REALTIME, &tm);
   1496 		fprintf(output, "# started on %s\n", ctime(&tm.tv_sec));
   1497 	} else if (output_fd > 0) {
   1498 		mode = append_file ? "a" : "w";
   1499 		output = fdopen(output_fd, mode);
   1500 		if (!output) {
   1501 			perror("Failed opening logfd");
   1502 			return -errno;
   1503 		}
   1504 	}
   1505 
   1506 	if (csv_sep) {
   1507 		csv_output = true;
   1508 		if (!strcmp(csv_sep, "\\t"))
   1509 			csv_sep = "\t";
   1510 	} else
   1511 		csv_sep = DEFAULT_SEPARATOR;
   1512 
   1513 	/*
   1514 	 * let the spreadsheet do the pretty-printing
   1515 	 */
   1516 	if (csv_output) {
   1517 		/* User explicitly passed -B? */
   1518 		if (big_num_opt == 1) {
   1519 			fprintf(stderr, "-B option not supported with -x\n");
   1520 			usage_with_options(stat_usage, options);
   1521 		} else /* Nope, so disable big number formatting */
   1522 			big_num = false;
   1523 	} else if (big_num_opt == 0) /* User passed --no-big-num */
   1524 		big_num = false;
   1525 
   1526 	if (!argc && !perf_target__has_task(&target))
   1527 		usage_with_options(stat_usage, options);
   1528 	if (run_count < 0) {
   1529 		usage_with_options(stat_usage, options);
   1530 	} else if (run_count == 0) {
   1531 		forever = true;
   1532 		run_count = 1;
   1533 	}
   1534 
   1535 	/* no_aggr, cgroup are for system-wide only */
   1536 	if ((aggr_mode != AGGR_GLOBAL || nr_cgroups)
   1537 	     && !perf_target__has_cpu(&target)) {
   1538 		fprintf(stderr, "both cgroup and no-aggregation "
   1539 			"modes only available in system-wide mode\n");
   1540 
   1541 		usage_with_options(stat_usage, options);
   1542 		return -1;
   1543 	}
   1544 
   1545 	if (add_default_attributes())
   1546 		goto out;
   1547 
   1548 	perf_target__validate(&target);
   1549 
   1550 	if (perf_evlist__create_maps(evsel_list, &target) < 0) {
   1551 		if (perf_target__has_task(&target))
   1552 			pr_err("Problems finding threads of monitor\n");
   1553 		if (perf_target__has_cpu(&target))
   1554 			perror("failed to parse CPUs map");
   1555 
   1556 		usage_with_options(stat_usage, options);
   1557 		return -1;
   1558 	}
   1559 	if (interval && interval < 100) {
   1560 		pr_err("print interval must be >= 100ms\n");
   1561 		usage_with_options(stat_usage, options);
   1562 		return -1;
   1563 	}
   1564 
   1565 	if (perf_evlist__alloc_stats(evsel_list, interval))
   1566 		goto out_free_maps;
   1567 
   1568 	if (perf_stat_init_aggr_mode())
   1569 		goto out;
   1570 
   1571 	/*
   1572 	 * We dont want to block the signals - that would cause
   1573 	 * child tasks to inherit that and Ctrl-C would not work.
   1574 	 * What we want is for Ctrl-C to work in the exec()-ed
   1575 	 * task, but being ignored by perf stat itself:
   1576 	 */
   1577 	atexit(sig_atexit);
   1578 	if (!forever)
   1579 		signal(SIGINT,  skip_signal);
   1580 	signal(SIGCHLD, skip_signal);
   1581 	signal(SIGALRM, skip_signal);
   1582 	signal(SIGABRT, skip_signal);
   1583 
   1584 	status = 0;
   1585 	for (run_idx = 0; forever || run_idx < run_count; run_idx++) {
   1586 		if (run_count != 1 && verbose)
   1587 			fprintf(output, "[ perf stat: executing run #%d ... ]\n",
   1588 				run_idx + 1);
   1589 
   1590 		status = run_perf_stat(argc, argv);
   1591 		if (forever && status != -1) {
   1592 			print_stat(argc, argv);
   1593 			perf_stat__reset_stats(evsel_list);
   1594 		}
   1595 	}
   1596 
   1597 	if (!forever && status != -1 && !interval)
   1598 		print_stat(argc, argv);
   1599 
   1600 	perf_evlist__free_stats(evsel_list);
   1601 out_free_maps:
   1602 	perf_evlist__delete_maps(evsel_list);
   1603 out:
   1604 	perf_evlist__delete(evsel_list);
   1605 	return status;
   1606 #else
   1607 	perror("cmd_stat not supported on MacOS");
   1608 	return -1;
   1609 #endif
   1610 }
   1611