Home | History | Annotate | Download | only in linux-tools-perf
      1 /*
      2  * builtin-record.c
      3  *
      4  * Builtin record command: Record the profile of a workload
      5  * (or a CPU, or a PID) into the perf.data output file - for
      6  * later analysis via perf report.
      7  */
      8 #define _FILE_OFFSET_BITS 64
      9 
     10 #include "builtin.h"
     11 
     12 #include "perf.h"
     13 
     14 #include "util/build-id.h"
     15 #include "util/util.h"
     16 #include "util/parse-options.h"
     17 #include "util/parse-events.h"
     18 
     19 #include "util/header.h"
     20 #include "util/event.h"
     21 #include "util/evlist.h"
     22 #include "util/evsel.h"
     23 #include "util/debug.h"
     24 #include "util/session.h"
     25 #include "util/symbol.h"
     26 #include "util/cpumap.h"
     27 #include "util/thread_map.h"
     28 
     29 #include <unistd.h>
     30 #include <sched.h>
     31 #include <sys/mman.h>
     32 
     33 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
     34 
     35 enum write_mode_t {
     36 	WRITE_FORCE,
     37 	WRITE_APPEND
     38 };
     39 
     40 static u64			user_interval			= ULLONG_MAX;
     41 static u64			default_interval		=      0;
     42 
     43 static unsigned int		page_size;
     44 static unsigned int		mmap_pages			= UINT_MAX;
     45 static unsigned int		user_freq 			= UINT_MAX;
     46 static int			freq				=   1000;
     47 static int			output;
     48 static int			pipe_output			=      0;
     49 static const char		*output_name			= NULL;
     50 static int			group				=      0;
     51 static int			realtime_prio			=      0;
     52 static bool			nodelay				=  false;
     53 static bool			raw_samples			=  false;
     54 static bool			sample_id_all_avail		=   true;
     55 static bool			system_wide			=  false;
     56 static pid_t			target_pid			=     -1;
     57 static pid_t			target_tid			=     -1;
     58 static pid_t			child_pid			=     -1;
     59 static bool			no_inherit			=  false;
     60 static enum write_mode_t	write_mode			= WRITE_FORCE;
     61 static bool			call_graph			=  false;
     62 static bool			inherit_stat			=  false;
     63 static bool			no_samples			=  false;
     64 static bool			sample_address			=  false;
     65 static bool			sample_time			=  false;
     66 static bool			no_buildid			=  false;
     67 static bool			no_buildid_cache		=  false;
     68 static struct perf_evlist	*evsel_list;
     69 
     70 static long			samples				=      0;
     71 static u64			bytes_written			=      0;
     72 
     73 static int			file_new			=      1;
     74 static off_t			post_processing_offset;
     75 
     76 static struct perf_session	*session;
     77 static const char		*cpu_list;
     78 
     79 static void advance_output(size_t size)
     80 {
     81 	bytes_written += size;
     82 }
     83 
     84 static void write_output(void *buf, size_t size)
     85 {
     86 	while (size) {
     87 		int ret = write(output, buf, size);
     88 
     89 		if (ret < 0)
     90 			die("failed to write");
     91 
     92 		size -= ret;
     93 		buf += ret;
     94 
     95 		bytes_written += ret;
     96 	}
     97 }
     98 
     99 static int process_synthesized_event(union perf_event *event,
    100 				     struct perf_sample *sample __used,
    101 				     struct perf_session *self __used)
    102 {
    103 	write_output(event, event->header.size);
    104 	return 0;
    105 }
    106 
    107 static void mmap_read(struct perf_mmap *md)
    108 {
    109 	unsigned int head = perf_mmap__read_head(md);
    110 	unsigned int old = md->prev;
    111 	unsigned char *data = md->base + page_size;
    112 	unsigned long size;
    113 	void *buf;
    114 
    115 	if (old == head)
    116 		return;
    117 
    118 	samples++;
    119 
    120 	size = head - old;
    121 
    122 	if ((old & md->mask) + size != (head & md->mask)) {
    123 		buf = &data[old & md->mask];
    124 		size = md->mask + 1 - (old & md->mask);
    125 		old += size;
    126 
    127 		write_output(buf, size);
    128 	}
    129 
    130 	buf = &data[old & md->mask];
    131 	size = head - old;
    132 	old += size;
    133 
    134 	write_output(buf, size);
    135 
    136 	md->prev = old;
    137 	perf_mmap__write_tail(md, old);
    138 }
    139 
    140 static volatile int done = 0;
    141 static volatile int signr = -1;
    142 
    143 static void sig_handler(int sig)
    144 {
    145 	done = 1;
    146 	signr = sig;
    147 }
    148 
    149 static void sig_atexit(void)
    150 {
    151 	if (child_pid > 0)
    152 		kill(child_pid, SIGTERM);
    153 
    154 	if (signr == -1 || signr == SIGUSR1)
    155 		return;
    156 
    157 	signal(signr, SIG_DFL);
    158 	kill(getpid(), signr);
    159 }
    160 
    161 static void config_attr(struct perf_evsel *evsel, struct perf_evlist *evlist)
    162 {
    163 	struct perf_event_attr *attr = &evsel->attr;
    164 	int track = !evsel->idx; /* only the first counter needs these */
    165 
    166 	attr->inherit		= !no_inherit;
    167 	attr->read_format	= PERF_FORMAT_TOTAL_TIME_ENABLED |
    168 				  PERF_FORMAT_TOTAL_TIME_RUNNING |
    169 				  PERF_FORMAT_ID;
    170 
    171 	attr->sample_type	|= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
    172 
    173 	if (evlist->nr_entries > 1)
    174 		attr->sample_type |= PERF_SAMPLE_ID;
    175 
    176 	/*
    177 	 * We default some events to a 1 default interval. But keep
    178 	 * it a weak assumption overridable by the user.
    179 	 */
    180 	if (!attr->sample_period || (user_freq != UINT_MAX &&
    181 				     user_interval != ULLONG_MAX)) {
    182 		if (freq) {
    183 			attr->sample_type	|= PERF_SAMPLE_PERIOD;
    184 			attr->freq		= 1;
    185 			attr->sample_freq	= freq;
    186 		} else {
    187 			attr->sample_period = default_interval;
    188 		}
    189 	}
    190 
    191 	if (no_samples)
    192 		attr->sample_freq = 0;
    193 
    194 	if (inherit_stat)
    195 		attr->inherit_stat = 1;
    196 
    197 	if (sample_address) {
    198 		attr->sample_type	|= PERF_SAMPLE_ADDR;
    199 		attr->mmap_data = track;
    200 	}
    201 
    202 	if (call_graph)
    203 		attr->sample_type	|= PERF_SAMPLE_CALLCHAIN;
    204 
    205 	if (system_wide)
    206 		attr->sample_type	|= PERF_SAMPLE_CPU;
    207 
    208 	if (sample_id_all_avail &&
    209 	    (sample_time || system_wide || !no_inherit || cpu_list))
    210 		attr->sample_type	|= PERF_SAMPLE_TIME;
    211 
    212 	if (raw_samples) {
    213 		attr->sample_type	|= PERF_SAMPLE_TIME;
    214 		attr->sample_type	|= PERF_SAMPLE_RAW;
    215 		attr->sample_type	|= PERF_SAMPLE_CPU;
    216 	}
    217 
    218 	if (nodelay) {
    219 		attr->watermark = 0;
    220 		attr->wakeup_events = 1;
    221 	}
    222 
    223 	attr->mmap		= track;
    224 	attr->comm		= track;
    225 
    226 	if (target_pid == -1 && target_tid == -1 && !system_wide) {
    227 		attr->disabled = 1;
    228 		attr->enable_on_exec = 1;
    229 	}
    230 }
    231 
    232 static bool perf_evlist__equal(struct perf_evlist *evlist,
    233 			       struct perf_evlist *other)
    234 {
    235 	struct perf_evsel *pos, *pair;
    236 
    237 	if (evlist->nr_entries != other->nr_entries)
    238 		return false;
    239 
    240 	pair = list_entry(other->entries.next, struct perf_evsel, node);
    241 
    242 	list_for_each_entry(pos, &evlist->entries, node) {
    243 		if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
    244 			return false;
    245 		pair = list_entry(pair->node.next, struct perf_evsel, node);
    246 	}
    247 
    248 	return true;
    249 }
    250 
    251 static void open_counters(struct perf_evlist *evlist)
    252 {
    253 	struct perf_evsel *pos;
    254 
    255 	if (evlist->cpus->map[0] < 0)
    256 		no_inherit = true;
    257 
    258 	list_for_each_entry(pos, &evlist->entries, node) {
    259 		struct perf_event_attr *attr = &pos->attr;
    260 		/*
    261 		 * Check if parse_single_tracepoint_event has already asked for
    262 		 * PERF_SAMPLE_TIME.
    263 		 *
    264 		 * XXX this is kludgy but short term fix for problems introduced by
    265 		 * eac23d1c that broke 'perf script' by having different sample_types
    266 		 * when using multiple tracepoint events when we use a perf binary
    267 		 * that tries to use sample_id_all on an older kernel.
    268 		 *
    269 		 * We need to move counter creation to perf_session, support
    270 		 * different sample_types, etc.
    271 		 */
    272 		bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
    273 
    274 		config_attr(pos, evlist);
    275 retry_sample_id:
    276 		attr->sample_id_all = sample_id_all_avail ? 1 : 0;
    277 try_again:
    278 		if (perf_evsel__open(pos, evlist->cpus, evlist->threads, group) < 0) {
    279 			int err = errno;
    280 
    281 			if (err == EPERM || err == EACCES) {
    282 				ui__warning_paranoid();
    283 				exit(EXIT_FAILURE);
    284 			} else if (err ==  ENODEV && cpu_list) {
    285 				die("No such device - did you specify"
    286 					" an out-of-range profile CPU?\n");
    287 			} else if (err == EINVAL && sample_id_all_avail) {
    288 				/*
    289 				 * Old kernel, no attr->sample_id_type_all field
    290 				 */
    291 				sample_id_all_avail = false;
    292 				if (!sample_time && !raw_samples && !time_needed)
    293 					attr->sample_type &= ~PERF_SAMPLE_TIME;
    294 
    295 				goto retry_sample_id;
    296 			}
    297 
    298 			/*
    299 			 * If it's cycles then fall back to hrtimer
    300 			 * based cpu-clock-tick sw counter, which
    301 			 * is always available even if no PMU support:
    302 			 */
    303 			if (attr->type == PERF_TYPE_HARDWARE
    304 					&& attr->config == PERF_COUNT_HW_CPU_CYCLES) {
    305 
    306 				if (verbose)
    307 					ui__warning("The cycles event is not supported, "
    308 						    "trying to fall back to cpu-clock-ticks\n");
    309 				attr->type = PERF_TYPE_SOFTWARE;
    310 				attr->config = PERF_COUNT_SW_CPU_CLOCK;
    311 				goto try_again;
    312 			}
    313 
    314 			if (err == ENOENT) {
    315 				ui__warning("The %s event is not supported.\n",
    316 					    event_name(pos));
    317 				exit(EXIT_FAILURE);
    318 			}
    319 
    320 			printf("\n");
    321 			error("sys_perf_event_open() syscall returned with %d (%s).  /bin/dmesg may provide additional information.\n",
    322 			      err, strerror(err));
    323 
    324 #if defined(__i386__) || defined(__x86_64__)
    325 			if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
    326 				die("No hardware sampling interrupt available."
    327 				    " No APIC? If so then you can boot the kernel"
    328 				    " with the \"lapic\" boot parameter to"
    329 				    " force-enable it.\n");
    330 #endif
    331 
    332 			die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
    333 		}
    334 	}
    335 
    336 	if (perf_evlist__set_filters(evlist)) {
    337 		error("failed to set filter with %d (%s)\n", errno,
    338 			strerror(errno));
    339 		exit(-1);
    340 	}
    341 
    342 	if (perf_evlist__mmap(evlist, mmap_pages, false) < 0)
    343 		die("failed to mmap with %d (%s)\n", errno, strerror(errno));
    344 
    345 	if (file_new)
    346 		session->evlist = evlist;
    347 	else {
    348 		if (!perf_evlist__equal(session->evlist, evlist)) {
    349 			fprintf(stderr, "incompatible append\n");
    350 			exit(-1);
    351 		}
    352  	}
    353 
    354 	perf_session__update_sample_type(session);
    355 }
    356 
    357 static int process_buildids(void)
    358 {
    359 	u64 size = lseek(output, 0, SEEK_CUR);
    360 
    361 	if (size == 0)
    362 		return 0;
    363 
    364 	session->fd = output;
    365 	return __perf_session__process_events(session, post_processing_offset,
    366 					      size - post_processing_offset,
    367 					      size, &build_id__mark_dso_hit_ops);
    368 }
    369 
    370 static void atexit_header(void)
    371 {
    372 	if (!pipe_output) {
    373 		session->header.data_size += bytes_written;
    374 
    375 		if (!no_buildid)
    376 			process_buildids();
    377 		perf_session__write_header(session, evsel_list, output, true);
    378 		perf_session__delete(session);
    379 		perf_evlist__delete(evsel_list);
    380 		symbol__exit();
    381 	}
    382 }
    383 
    384 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
    385 {
    386 	int err;
    387 	struct perf_session *psession = data;
    388 
    389 	if (machine__is_host(machine))
    390 		return;
    391 
    392 	/*
    393 	 *As for guest kernel when processing subcommand record&report,
    394 	 *we arrange module mmap prior to guest kernel mmap and trigger
    395 	 *a preload dso because default guest module symbols are loaded
    396 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
    397 	 *method is used to avoid symbol missing when the first addr is
    398 	 *in module instead of in guest kernel.
    399 	 */
    400 	err = perf_event__synthesize_modules(process_synthesized_event,
    401 					     psession, machine);
    402 	if (err < 0)
    403 		pr_err("Couldn't record guest kernel [%d]'s reference"
    404 		       " relocation symbol.\n", machine->pid);
    405 
    406 	/*
    407 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
    408 	 * have no _text sometimes.
    409 	 */
    410 	err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
    411 						 psession, machine, "_text");
    412 	if (err < 0)
    413 		err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
    414 							 psession, machine,
    415 							 "_stext");
    416 	if (err < 0)
    417 		pr_err("Couldn't record guest kernel [%d]'s reference"
    418 		       " relocation symbol.\n", machine->pid);
    419 }
    420 
    421 static struct perf_event_header finished_round_event = {
    422 	.size = sizeof(struct perf_event_header),
    423 	.type = PERF_RECORD_FINISHED_ROUND,
    424 };
    425 
    426 static void mmap_read_all(void)
    427 {
    428 	int i;
    429 
    430 	for (i = 0; i < evsel_list->nr_mmaps; i++) {
    431 		if (evsel_list->mmap[i].base)
    432 			mmap_read(&evsel_list->mmap[i]);
    433 	}
    434 
    435 	if (perf_header__has_feat(&session->header, HEADER_TRACE_INFO))
    436 		write_output(&finished_round_event, sizeof(finished_round_event));
    437 }
    438 
    439 static int __cmd_record(int argc, const char **argv)
    440 {
    441 	/* ANDROID_CHANGE_BEGIN */
    442 #ifndef __APPLE__
    443 	int i;
    444 	struct stat st;
    445 	int flags;
    446 	int err;
    447 	unsigned long waking = 0;
    448 	int child_ready_pipe[2], go_pipe[2];
    449 	const bool forks = argc > 0;
    450 	char buf;
    451 	struct machine *machine;
    452 
    453 	page_size = sysconf(_SC_PAGE_SIZE);
    454 
    455 	atexit(sig_atexit);
    456 	signal(SIGCHLD, sig_handler);
    457 	signal(SIGINT, sig_handler);
    458 	signal(SIGUSR1, sig_handler);
    459 
    460 	if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
    461 		perror("failed to create pipes");
    462 		exit(-1);
    463 	}
    464 
    465 	if (!output_name) {
    466 		if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
    467 			pipe_output = 1;
    468 		else
    469                         /* ANDROID_CHANGE_BEGIN */
    470 #ifdef __BIONIC__
    471 			output_name = "/data/perf.data";
    472 #else
    473 			output_name = "perf.data";
    474 #endif
    475                         /* ANDROID_CHANGE_END */
    476 	}
    477 	if (output_name) {
    478 		if (!strcmp(output_name, "-"))
    479 			pipe_output = 1;
    480 		else if (!stat(output_name, &st) && st.st_size) {
    481 			if (write_mode == WRITE_FORCE) {
    482 				char oldname[PATH_MAX];
    483 				snprintf(oldname, sizeof(oldname), "%s.old",
    484 					 output_name);
    485 				unlink(oldname);
    486 				rename(output_name, oldname);
    487 			}
    488 		} else if (write_mode == WRITE_APPEND) {
    489 			write_mode = WRITE_FORCE;
    490 		}
    491 	}
    492 
    493 	flags = O_CREAT|O_RDWR;
    494 	if (write_mode == WRITE_APPEND)
    495 		file_new = 0;
    496 	else
    497 		flags |= O_TRUNC;
    498 
    499 	if (pipe_output)
    500 		output = STDOUT_FILENO;
    501 	else
    502 		output = open(output_name, flags, S_IRUSR | S_IWUSR);
    503 	if (output < 0) {
    504 		perror("failed to create output file");
    505 		exit(-1);
    506 	}
    507 
    508 	session = perf_session__new(output_name, O_WRONLY,
    509 				    write_mode == WRITE_FORCE, false, NULL);
    510 	if (session == NULL) {
    511 		pr_err("Not enough memory for reading perf file header\n");
    512 		return -1;
    513 	}
    514 
    515 	if (!no_buildid)
    516 		perf_header__set_feat(&session->header, HEADER_BUILD_ID);
    517 
    518 	if (!file_new) {
    519 		err = perf_session__read_header(session, output);
    520 		if (err < 0)
    521 			goto out_delete_session;
    522 	}
    523 
    524 	if (have_tracepoints(&evsel_list->entries))
    525 		perf_header__set_feat(&session->header, HEADER_TRACE_INFO);
    526 
    527 	/* 512 kiB: default amount of unprivileged mlocked memory */
    528 	if (mmap_pages == UINT_MAX)
    529 		mmap_pages = (512 * 1024) / page_size;
    530 
    531 	if (forks) {
    532 		child_pid = fork();
    533 		if (child_pid < 0) {
    534 			perror("failed to fork");
    535 			exit(-1);
    536 		}
    537 
    538 		if (!child_pid) {
    539 			if (pipe_output)
    540 				dup2(2, 1);
    541 			close(child_ready_pipe[0]);
    542 			close(go_pipe[1]);
    543 			fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
    544 
    545 			/*
    546 			 * Do a dummy execvp to get the PLT entry resolved,
    547 			 * so we avoid the resolver overhead on the real
    548 			 * execvp call.
    549 			 */
    550 			execvp("", (char **)argv);
    551 
    552 			/*
    553 			 * Tell the parent we're ready to go
    554 			 */
    555 			close(child_ready_pipe[1]);
    556 
    557 			/*
    558 			 * Wait until the parent tells us to go.
    559 			 */
    560 			if (read(go_pipe[0], &buf, 1) == -1)
    561 				perror("unable to read pipe");
    562 
    563 			execvp(argv[0], (char **)argv);
    564 
    565 			perror(argv[0]);
    566 			kill(getppid(), SIGUSR1);
    567 			exit(-1);
    568 		}
    569 
    570 		if (!system_wide && target_tid == -1 && target_pid == -1)
    571 			evsel_list->threads->map[0] = child_pid;
    572 
    573 		close(child_ready_pipe[1]);
    574 		close(go_pipe[0]);
    575 		/*
    576 		 * wait for child to settle
    577 		 */
    578 		if (read(child_ready_pipe[0], &buf, 1) == -1) {
    579 			perror("unable to read pipe");
    580 			exit(-1);
    581 		}
    582 		close(child_ready_pipe[0]);
    583 	}
    584 
    585 	open_counters(evsel_list);
    586 
    587 	/*
    588 	 * perf_session__delete(session) will be called at atexit_header()
    589 	 */
    590 	atexit(atexit_header);
    591 
    592 	if (pipe_output) {
    593 		err = perf_header__write_pipe(output);
    594 		if (err < 0)
    595 			return err;
    596 	} else if (file_new) {
    597 		err = perf_session__write_header(session, evsel_list,
    598 						 output, false);
    599 		if (err < 0)
    600 			return err;
    601 	}
    602 
    603 	post_processing_offset = lseek(output, 0, SEEK_CUR);
    604 
    605 	if (pipe_output) {
    606 		err = perf_session__synthesize_attrs(session,
    607 						     process_synthesized_event);
    608 		if (err < 0) {
    609 			pr_err("Couldn't synthesize attrs.\n");
    610 			return err;
    611 		}
    612 
    613 		err = perf_event__synthesize_event_types(process_synthesized_event,
    614 							 session);
    615 		if (err < 0) {
    616 			pr_err("Couldn't synthesize event_types.\n");
    617 			return err;
    618 		}
    619 
    620 		if (have_tracepoints(&evsel_list->entries)) {
    621 			/*
    622 			 * FIXME err <= 0 here actually means that
    623 			 * there were no tracepoints so its not really
    624 			 * an error, just that we don't need to
    625 			 * synthesize anything.  We really have to
    626 			 * return this more properly and also
    627 			 * propagate errors that now are calling die()
    628 			 */
    629 			err = perf_event__synthesize_tracing_data(output, evsel_list,
    630 								  process_synthesized_event,
    631 								  session);
    632 			if (err <= 0) {
    633 				pr_err("Couldn't record tracing data.\n");
    634 				return err;
    635 			}
    636 			advance_output(err);
    637 		}
    638 	}
    639 
    640 	machine = perf_session__find_host_machine(session);
    641 	if (!machine) {
    642 		pr_err("Couldn't find native kernel information.\n");
    643 		return -1;
    644 	}
    645 
    646 	err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
    647 						 session, machine, "_text");
    648 	if (err < 0)
    649 		err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
    650 							 session, machine, "_stext");
    651 	if (err < 0)
    652 		pr_err("Couldn't record kernel reference relocation symbol\n"
    653 		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
    654 		       "Check /proc/kallsyms permission or run as root.\n");
    655 
    656 	err = perf_event__synthesize_modules(process_synthesized_event,
    657 					     session, machine);
    658 	if (err < 0)
    659 		pr_err("Couldn't record kernel module information.\n"
    660 		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
    661 		       "Check /proc/modules permission or run as root.\n");
    662 
    663 	if (perf_guest)
    664 		perf_session__process_machines(session,
    665 					       perf_event__synthesize_guest_os);
    666 
    667 	if (!system_wide)
    668 		perf_event__synthesize_thread_map(evsel_list->threads,
    669 						  process_synthesized_event,
    670 						  session);
    671 	else
    672 		perf_event__synthesize_threads(process_synthesized_event,
    673 					       session);
    674 
    675 	if (realtime_prio) {
    676 		struct sched_param param;
    677 
    678 		param.sched_priority = realtime_prio;
    679 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
    680 			pr_err("Could not set realtime priority.\n");
    681 			exit(-1);
    682 		}
    683 	}
    684 
    685 	/*
    686 	 * Let the child rip
    687 	 */
    688 	if (forks)
    689 		close(go_pipe[1]);
    690 
    691 	for (;;) {
    692 		int hits = samples;
    693 		int thread;
    694 
    695 		mmap_read_all();
    696 
    697 		if (hits == samples) {
    698 			if (done)
    699 				break;
    700 			err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
    701 			waking++;
    702 		}
    703 
    704 		if (done) {
    705 			for (i = 0; i < evsel_list->cpus->nr; i++) {
    706 				struct perf_evsel *pos;
    707 
    708 				list_for_each_entry(pos, &evsel_list->entries, node) {
    709 					for (thread = 0;
    710 						thread < evsel_list->threads->nr;
    711 						thread++)
    712 						ioctl(FD(pos, i, thread),
    713 							PERF_EVENT_IOC_DISABLE);
    714 				}
    715 			}
    716 		}
    717 	}
    718 
    719 	if (quiet || signr == SIGUSR1)
    720 		return 0;
    721 
    722 	fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
    723 
    724 	/*
    725 	 * Approximate RIP event size: 24 bytes.
    726 	 */
    727 	fprintf(stderr,
    728 		"[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
    729 		(double)bytes_written / 1024.0 / 1024.0,
    730 		output_name,
    731 		bytes_written / 24);
    732 
    733 	return 0;
    734 
    735 out_delete_session:
    736 	perf_session__delete(session);
    737 	return err;
    738 #else
    739 	return -1;
    740 #endif
    741 	/* ANDROID_CHANGE_END */
    742 }
    743 
    744 static const char * const record_usage[] = {
    745 	"perf record [<options>] [<command>]",
    746 	"perf record [<options>] -- <command> [<options>]",
    747 	NULL
    748 };
    749 
    750 static bool force, append_file;
    751 
    752 const struct option record_options[] = {
    753 	OPT_CALLBACK('e', "event", &evsel_list, "event",
    754 		     "event selector. use 'perf list' to list available events",
    755 		     parse_events),
    756 	OPT_CALLBACK(0, "filter", &evsel_list, "filter",
    757 		     "event filter", parse_filter),
    758 	OPT_INTEGER('p', "pid", &target_pid,
    759 		    "record events on existing process id"),
    760 	OPT_INTEGER('t', "tid", &target_tid,
    761 		    "record events on existing thread id"),
    762 	OPT_INTEGER('r', "realtime", &realtime_prio,
    763 		    "collect data with this RT SCHED_FIFO priority"),
    764 	OPT_BOOLEAN('D', "no-delay", &nodelay,
    765 		    "collect data without buffering"),
    766 	OPT_BOOLEAN('R', "raw-samples", &raw_samples,
    767 		    "collect raw sample records from all opened counters"),
    768 	OPT_BOOLEAN('a', "all-cpus", &system_wide,
    769 			    "system-wide collection from all CPUs"),
    770 	OPT_BOOLEAN('A', "append", &append_file,
    771 			    "append to the output file to do incremental profiling"),
    772 	OPT_STRING('C', "cpu", &cpu_list, "cpu",
    773 		    "list of cpus to monitor"),
    774 	OPT_BOOLEAN('f', "force", &force,
    775 			"overwrite existing data file (deprecated)"),
    776 	OPT_U64('c', "count", &user_interval, "event period to sample"),
    777 	OPT_STRING('o', "output", &output_name, "file",
    778 		    "output file name"),
    779 	OPT_BOOLEAN('i', "no-inherit", &no_inherit,
    780 		    "child tasks do not inherit counters"),
    781 	OPT_UINTEGER('F', "freq", &user_freq, "profile at this frequency"),
    782 	OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"),
    783 	OPT_BOOLEAN('g', "call-graph", &call_graph,
    784 		    "do call-graph (stack chain/backtrace) recording"),
    785 	OPT_INCR('v', "verbose", &verbose,
    786 		    "be more verbose (show counter open errors, etc)"),
    787 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
    788 	OPT_BOOLEAN('s', "stat", &inherit_stat,
    789 		    "per thread counts"),
    790 	OPT_BOOLEAN('d', "data", &sample_address,
    791 		    "Sample addresses"),
    792 	OPT_BOOLEAN('T', "timestamp", &sample_time, "Sample timestamps"),
    793 	OPT_BOOLEAN('n', "no-samples", &no_samples,
    794 		    "don't sample"),
    795 	OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid_cache,
    796 		    "do not update the buildid cache"),
    797 	OPT_BOOLEAN('B', "no-buildid", &no_buildid,
    798 		    "do not collect buildids in perf.data"),
    799 	OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
    800 		     "monitor event in cgroup name only",
    801 		     parse_cgroups),
    802 	OPT_END()
    803 };
    804 
    805 int cmd_record(int argc, const char **argv, const char *prefix __used)
    806 {
    807 	int err = -ENOMEM;
    808 	struct perf_evsel *pos;
    809 
    810 	evsel_list = perf_evlist__new(NULL, NULL);
    811 	if (evsel_list == NULL)
    812 		return -ENOMEM;
    813 
    814 	argc = parse_options(argc, argv, record_options, record_usage,
    815 			    PARSE_OPT_STOP_AT_NON_OPTION);
    816 	if (!argc && target_pid == -1 && target_tid == -1 &&
    817 		!system_wide && !cpu_list)
    818 		usage_with_options(record_usage, record_options);
    819 
    820 	if (force && append_file) {
    821 		fprintf(stderr, "Can't overwrite and append at the same time."
    822 				" You need to choose between -f and -A");
    823 		usage_with_options(record_usage, record_options);
    824 	} else if (append_file) {
    825 		write_mode = WRITE_APPEND;
    826 	} else {
    827 		write_mode = WRITE_FORCE;
    828 	}
    829 
    830 	if (nr_cgroups && !system_wide) {
    831 		fprintf(stderr, "cgroup monitoring only available in"
    832 			" system-wide mode\n");
    833 		usage_with_options(record_usage, record_options);
    834 	}
    835 
    836 	symbol__init();
    837 
    838 	if (symbol_conf.kptr_restrict)
    839 		pr_warning(
    840 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
    841 "check /proc/sys/kernel/kptr_restrict.\n\n"
    842 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
    843 "file is not found in the buildid cache or in the vmlinux path.\n\n"
    844 "Samples in kernel modules won't be resolved at all.\n\n"
    845 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
    846 "even with a suitable vmlinux or kallsyms file.\n\n");
    847 
    848 	if (no_buildid_cache || no_buildid)
    849 		disable_buildid_cache();
    850 
    851 	if (evsel_list->nr_entries == 0 &&
    852 	    perf_evlist__add_default(evsel_list) < 0) {
    853 		pr_err("Not enough memory for event selector list\n");
    854 		goto out_symbol_exit;
    855 	}
    856 
    857 	if (target_pid != -1)
    858 		target_tid = target_pid;
    859 
    860 	if (perf_evlist__create_maps(evsel_list, target_pid,
    861 				     target_tid, cpu_list) < 0)
    862 		usage_with_options(record_usage, record_options);
    863 
    864 	list_for_each_entry(pos, &evsel_list->entries, node) {
    865 		if (perf_evsel__alloc_fd(pos, evsel_list->cpus->nr,
    866 					 evsel_list->threads->nr) < 0)
    867 			goto out_free_fd;
    868 		if (perf_header__push_event(pos->attr.config, event_name(pos)))
    869 			goto out_free_fd;
    870 	}
    871 
    872 	if (perf_evlist__alloc_pollfd(evsel_list) < 0)
    873 		goto out_free_fd;
    874 
    875 	if (user_interval != ULLONG_MAX)
    876 		default_interval = user_interval;
    877 	if (user_freq != UINT_MAX)
    878 		freq = user_freq;
    879 
    880 	/*
    881 	 * User specified count overrides default frequency.
    882 	 */
    883 	if (default_interval)
    884 		freq = 0;
    885 	else if (freq) {
    886 		default_interval = freq;
    887 	} else {
    888 		fprintf(stderr, "frequency and count are zero, aborting\n");
    889 		err = -EINVAL;
    890 		goto out_free_fd;
    891 	}
    892 
    893 	err = __cmd_record(argc, argv);
    894 out_free_fd:
    895 	perf_evlist__delete_maps(evsel_list);
    896 out_symbol_exit:
    897 	symbol__exit();
    898 	return err;
    899 }
    900