Home | History | Annotate | Download | only in perf
      1 /*
      2  * builtin-record.c
      3  *
      4  * Builtin record command: Record the profile of a workload
      5  * (or a CPU, or a PID) into the perf.data output file - for
      6  * later analysis via perf report.
      7  */
      8 #include "builtin.h"
      9 
     10 #include "perf.h"
     11 
     12 #include "util/build-id.h"
     13 #include "util/util.h"
     14 #include "util/parse-options.h"
     15 #include "util/parse-events.h"
     16 
     17 #include "util/header.h"
     18 #include "util/event.h"
     19 #include "util/evlist.h"
     20 #include "util/evsel.h"
     21 #include "util/debug.h"
     22 #include "util/session.h"
     23 #include "util/tool.h"
     24 #include "util/symbol.h"
     25 #include "util/cpumap.h"
     26 #include "util/thread_map.h"
     27 
     28 #include <unistd.h>
     29 #include <sched.h>
     30 #include <sys/mman.h>
     31 
     32 #ifndef HAVE_ON_EXIT
     33 #ifndef ATEXIT_MAX
     34 #define ATEXIT_MAX 32
     35 #endif
     36 static int __on_exit_count = 0;
     37 typedef void (*on_exit_func_t) (int, void *);
     38 static on_exit_func_t __on_exit_funcs[ATEXIT_MAX];
     39 static void *__on_exit_args[ATEXIT_MAX];
     40 static int __exitcode = 0;
     41 static void __handle_on_exit_funcs(void);
     42 static int on_exit(on_exit_func_t function, void *arg);
     43 #define exit(x) (exit)(__exitcode = (x))
     44 
     45 static int on_exit(on_exit_func_t function, void *arg)
     46 {
     47 	if (__on_exit_count == ATEXIT_MAX)
     48 		return -ENOMEM;
     49 	else if (__on_exit_count == 0)
     50 		atexit(__handle_on_exit_funcs);
     51 	__on_exit_funcs[__on_exit_count] = function;
     52 	__on_exit_args[__on_exit_count++] = arg;
     53 	return 0;
     54 }
     55 
     56 static void __handle_on_exit_funcs(void)
     57 {
     58 	int i;
     59 	for (i = 0; i < __on_exit_count; i++)
     60 		__on_exit_funcs[i] (__exitcode, __on_exit_args[i]);
     61 }
     62 #endif
     63 
     64 struct perf_record {
     65 	struct perf_tool	tool;
     66 	struct perf_record_opts	opts;
     67 	u64			bytes_written;
     68 	const char		*output_name;
     69 	struct perf_evlist	*evlist;
     70 	struct perf_session	*session;
     71 	const char		*progname;
     72 	int			output;
     73 	unsigned int		page_size;
     74 	int			realtime_prio;
     75 	bool			no_buildid;
     76 	bool			no_buildid_cache;
     77 	long			samples;
     78 	off_t			post_processing_offset;
     79 };
     80 
     81 static void advance_output(struct perf_record *rec, size_t size)
     82 {
     83 	rec->bytes_written += size;
     84 }
     85 
     86 static int write_output(struct perf_record *rec, void *buf, size_t size)
     87 {
     88 	while (size) {
     89 		int ret = write(rec->output, buf, size);
     90 
     91 		if (ret < 0) {
     92 			pr_err("failed to write\n");
     93 			return -1;
     94 		}
     95 
     96 		size -= ret;
     97 		buf += ret;
     98 
     99 		rec->bytes_written += ret;
    100 	}
    101 
    102 	return 0;
    103 }
    104 
    105 static int process_synthesized_event(struct perf_tool *tool,
    106 				     union perf_event *event,
    107 				     struct perf_sample *sample __maybe_unused,
    108 				     struct machine *machine __maybe_unused)
    109 {
    110 	struct perf_record *rec = container_of(tool, struct perf_record, tool);
    111 	if (write_output(rec, event, event->header.size) < 0)
    112 		return -1;
    113 
    114 	return 0;
    115 }
    116 
    117 static int perf_record__mmap_read(struct perf_record *rec,
    118 				   struct perf_mmap *md)
    119 {
    120 	unsigned int head = perf_mmap__read_head(md);
    121 	unsigned int old = md->prev;
    122 	unsigned char *data = md->base + rec->page_size;
    123 	unsigned long size;
    124 	void *buf;
    125 	int rc = 0;
    126 
    127 	if (old == head)
    128 		return 0;
    129 
    130 	rec->samples++;
    131 
    132 	size = head - old;
    133 
    134 	if ((old & md->mask) + size != (head & md->mask)) {
    135 		buf = &data[old & md->mask];
    136 		size = md->mask + 1 - (old & md->mask);
    137 		old += size;
    138 
    139 		if (write_output(rec, buf, size) < 0) {
    140 			rc = -1;
    141 			goto out;
    142 		}
    143 	}
    144 
    145 	buf = &data[old & md->mask];
    146 	size = head - old;
    147 	old += size;
    148 
    149 	if (write_output(rec, buf, size) < 0) {
    150 		rc = -1;
    151 		goto out;
    152 	}
    153 
    154 	md->prev = old;
    155 	perf_mmap__write_tail(md, old);
    156 
    157 out:
    158 	return rc;
    159 }
    160 
    161 static volatile int done = 0;
    162 static volatile int signr = -1;
    163 static volatile int child_finished = 0;
    164 
    165 static void sig_handler(int sig)
    166 {
    167 	if (sig == SIGCHLD)
    168 		child_finished = 1;
    169 
    170 	done = 1;
    171 	signr = sig;
    172 }
    173 
    174 static void perf_record__sig_exit(int exit_status __maybe_unused, void *arg)
    175 {
    176 	struct perf_record *rec = arg;
    177 	int status;
    178 
    179 	if (rec->evlist->workload.pid > 0) {
    180 		if (!child_finished)
    181 			kill(rec->evlist->workload.pid, SIGTERM);
    182 
    183 		wait(&status);
    184 		if (WIFSIGNALED(status))
    185 			psignal(WTERMSIG(status), rec->progname);
    186 	}
    187 
    188 	if (signr == -1 || signr == SIGUSR1)
    189 		return;
    190 
    191 	signal(signr, SIG_DFL);
    192 }
    193 
    194 static int perf_record__open(struct perf_record *rec)
    195 {
    196 	char msg[512];
    197 	struct perf_evsel *pos;
    198 	struct perf_evlist *evlist = rec->evlist;
    199 	struct perf_session *session = rec->session;
    200 	struct perf_record_opts *opts = &rec->opts;
    201 	int rc = 0;
    202 
    203 	perf_evlist__config(evlist, opts);
    204 
    205 	list_for_each_entry(pos, &evlist->entries, node) {
    206 try_again:
    207 		if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
    208 			if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
    209 				if (verbose)
    210 					ui__warning("%s\n", msg);
    211 				goto try_again;
    212 			}
    213 
    214 			rc = -errno;
    215 			perf_evsel__open_strerror(pos, &opts->target,
    216 						  errno, msg, sizeof(msg));
    217 			ui__error("%s\n", msg);
    218 			goto out;
    219 		}
    220 	}
    221 
    222 	if (perf_evlist__apply_filters(evlist)) {
    223 		error("failed to set filter with %d (%s)\n", errno,
    224 			strerror(errno));
    225 		rc = -1;
    226 		goto out;
    227 	}
    228 
    229 	if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
    230 		if (errno == EPERM) {
    231 			pr_err("Permission error mapping pages.\n"
    232 			       "Consider increasing "
    233 			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
    234 			       "or try again with a smaller value of -m/--mmap_pages.\n"
    235 			       "(current value: %d)\n", opts->mmap_pages);
    236 			rc = -errno;
    237 		} else if (!is_power_of_2(opts->mmap_pages) &&
    238 			   (opts->mmap_pages != UINT_MAX)) {
    239 			pr_err("--mmap_pages/-m value must be a power of two.");
    240 			rc = -EINVAL;
    241 		} else {
    242 			pr_err("failed to mmap with %d (%s)\n", errno, strerror(errno));
    243 			rc = -errno;
    244 		}
    245 		goto out;
    246 	}
    247 
    248 	session->evlist = evlist;
    249 	perf_session__set_id_hdr_size(session);
    250 out:
    251 	return rc;
    252 }
    253 
    254 static int process_buildids(struct perf_record *rec)
    255 {
    256 	u64 size = lseek(rec->output, 0, SEEK_CUR);
    257 
    258 	if (size == 0)
    259 		return 0;
    260 
    261 	rec->session->fd = rec->output;
    262 	return __perf_session__process_events(rec->session, rec->post_processing_offset,
    263 					      size - rec->post_processing_offset,
    264 					      size, &build_id__mark_dso_hit_ops);
    265 }
    266 
    267 static void perf_record__exit(int status, void *arg)
    268 {
    269 	struct perf_record *rec = arg;
    270 
    271 	if (status != 0)
    272 		return;
    273 
    274 	if (!rec->opts.pipe_output) {
    275 		rec->session->header.data_size += rec->bytes_written;
    276 
    277 		if (!rec->no_buildid)
    278 			process_buildids(rec);
    279 		perf_session__write_header(rec->session, rec->evlist,
    280 					   rec->output, true);
    281 		perf_session__delete(rec->session);
    282 		perf_evlist__delete(rec->evlist);
    283 		symbol__exit();
    284 	}
    285 }
    286 
    287 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
    288 {
    289 	int err;
    290 	struct perf_tool *tool = data;
    291 	/*
    292 	 *As for guest kernel when processing subcommand record&report,
    293 	 *we arrange module mmap prior to guest kernel mmap and trigger
    294 	 *a preload dso because default guest module symbols are loaded
    295 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
    296 	 *method is used to avoid symbol missing when the first addr is
    297 	 *in module instead of in guest kernel.
    298 	 */
    299 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
    300 					     machine);
    301 	if (err < 0)
    302 		pr_err("Couldn't record guest kernel [%d]'s reference"
    303 		       " relocation symbol.\n", machine->pid);
    304 
    305 	/*
    306 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
    307 	 * have no _text sometimes.
    308 	 */
    309 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
    310 						 machine, "_text");
    311 	if (err < 0)
    312 		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
    313 							 machine, "_stext");
    314 	if (err < 0)
    315 		pr_err("Couldn't record guest kernel [%d]'s reference"
    316 		       " relocation symbol.\n", machine->pid);
    317 }
    318 
    319 static struct perf_event_header finished_round_event = {
    320 	.size = sizeof(struct perf_event_header),
    321 	.type = PERF_RECORD_FINISHED_ROUND,
    322 };
    323 
    324 static int perf_record__mmap_read_all(struct perf_record *rec)
    325 {
    326 	int i;
    327 	int rc = 0;
    328 
    329 	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
    330 		if (rec->evlist->mmap[i].base) {
    331 			if (perf_record__mmap_read(rec, &rec->evlist->mmap[i]) != 0) {
    332 				rc = -1;
    333 				goto out;
    334 			}
    335 		}
    336 	}
    337 
    338 	if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA))
    339 		rc = write_output(rec, &finished_round_event,
    340 				  sizeof(finished_round_event));
    341 
    342 out:
    343 	return rc;
    344 }
    345 
    346 static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
    347 {
    348 	struct stat st;
    349 	int flags;
    350 	int err, output, feat;
    351 	unsigned long waking = 0;
    352 	const bool forks = argc > 0;
    353 	struct machine *machine;
    354 	struct perf_tool *tool = &rec->tool;
    355 	struct perf_record_opts *opts = &rec->opts;
    356 	struct perf_evlist *evsel_list = rec->evlist;
    357 	const char *output_name = rec->output_name;
    358 	struct perf_session *session;
    359 	bool disabled = false;
    360 
    361 	rec->progname = argv[0];
    362 
    363 	rec->page_size = sysconf(_SC_PAGE_SIZE);
    364 
    365 	on_exit(perf_record__sig_exit, rec);
    366 	signal(SIGCHLD, sig_handler);
    367 	signal(SIGINT, sig_handler);
    368 	signal(SIGUSR1, sig_handler);
    369 	signal(SIGTERM, sig_handler);
    370 
    371 	if (!output_name) {
    372 		if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
    373 			opts->pipe_output = true;
    374 		else
    375 			rec->output_name = output_name = "perf.data";
    376 	}
    377 	if (output_name) {
    378 		if (!strcmp(output_name, "-"))
    379 			opts->pipe_output = true;
    380 		else if (!stat(output_name, &st) && st.st_size) {
    381 			char oldname[PATH_MAX];
    382 			snprintf(oldname, sizeof(oldname), "%s.old",
    383 				 output_name);
    384 			unlink(oldname);
    385 			rename(output_name, oldname);
    386 		}
    387 	}
    388 
    389 	flags = O_CREAT|O_RDWR|O_TRUNC;
    390 
    391 	if (opts->pipe_output)
    392 		output = STDOUT_FILENO;
    393 	else
    394 		output = open(output_name, flags, S_IRUSR | S_IWUSR);
    395 	if (output < 0) {
    396 		perror("failed to create output file");
    397 		return -1;
    398 	}
    399 
    400 	rec->output = output;
    401 
    402 	session = perf_session__new(output_name, O_WRONLY,
    403 				    true, false, NULL);
    404 	if (session == NULL) {
    405 		pr_err("Not enough memory for reading perf file header\n");
    406 		return -1;
    407 	}
    408 
    409 	rec->session = session;
    410 
    411 	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
    412 		perf_header__set_feat(&session->header, feat);
    413 
    414 	if (rec->no_buildid)
    415 		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
    416 
    417 	if (!have_tracepoints(&evsel_list->entries))
    418 		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
    419 
    420 	if (!rec->opts.branch_stack)
    421 		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
    422 
    423 	if (forks) {
    424 		err = perf_evlist__prepare_workload(evsel_list, &opts->target,
    425 						    argv, opts->pipe_output,
    426 						    true);
    427 		if (err < 0) {
    428 			pr_err("Couldn't run the workload!\n");
    429 			goto out_delete_session;
    430 		}
    431 	}
    432 
    433 	if (perf_record__open(rec) != 0) {
    434 		err = -1;
    435 		goto out_delete_session;
    436 	}
    437 
    438 	if (!evsel_list->nr_groups)
    439 		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
    440 
    441 	/*
    442 	 * perf_session__delete(session) will be called at perf_record__exit()
    443 	 */
    444 	on_exit(perf_record__exit, rec);
    445 
    446 	if (opts->pipe_output) {
    447 		err = perf_header__write_pipe(output);
    448 		if (err < 0)
    449 			goto out_delete_session;
    450 	} else {
    451 		err = perf_session__write_header(session, evsel_list,
    452 						 output, false);
    453 		if (err < 0)
    454 			goto out_delete_session;
    455 	}
    456 
    457 	if (!rec->no_buildid
    458 	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
    459 		pr_err("Couldn't generate buildids. "
    460 		       "Use --no-buildid to profile anyway.\n");
    461 		err = -1;
    462 		goto out_delete_session;
    463 	}
    464 
    465 	rec->post_processing_offset = lseek(output, 0, SEEK_CUR);
    466 
    467 	machine = &session->machines.host;
    468 
    469 	if (opts->pipe_output) {
    470 		err = perf_event__synthesize_attrs(tool, session,
    471 						   process_synthesized_event);
    472 		if (err < 0) {
    473 			pr_err("Couldn't synthesize attrs.\n");
    474 			goto out_delete_session;
    475 		}
    476 
    477 		if (have_tracepoints(&evsel_list->entries)) {
    478 			/*
    479 			 * FIXME err <= 0 here actually means that
    480 			 * there were no tracepoints so its not really
    481 			 * an error, just that we don't need to
    482 			 * synthesize anything.  We really have to
    483 			 * return this more properly and also
    484 			 * propagate errors that now are calling die()
    485 			 */
    486 			err = perf_event__synthesize_tracing_data(tool, output, evsel_list,
    487 								  process_synthesized_event);
    488 			if (err <= 0) {
    489 				pr_err("Couldn't record tracing data.\n");
    490 				goto out_delete_session;
    491 			}
    492 			advance_output(rec, err);
    493 		}
    494 	}
    495 
    496 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
    497 						 machine, "_text");
    498 	if (err < 0)
    499 		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
    500 							 machine, "_stext");
    501 	if (err < 0)
    502 		pr_err("Couldn't record kernel reference relocation symbol\n"
    503 		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
    504 		       "Check /proc/kallsyms permission or run as root.\n");
    505 
    506 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
    507 					     machine);
    508 	if (err < 0)
    509 		pr_err("Couldn't record kernel module information.\n"
    510 		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
    511 		       "Check /proc/modules permission or run as root.\n");
    512 
    513 	if (perf_guest) {
    514 		machines__process_guests(&session->machines,
    515 					 perf_event__synthesize_guest_os, tool);
    516 	}
    517 
    518 	if (perf_target__has_task(&opts->target))
    519 		err = perf_event__synthesize_thread_map(tool, evsel_list->threads,
    520 						  process_synthesized_event,
    521 						  machine);
    522 	else if (perf_target__has_cpu(&opts->target))
    523 		err = perf_event__synthesize_threads(tool, process_synthesized_event,
    524 					       machine);
    525 	else /* command specified */
    526 		err = 0;
    527 
    528 	if (err != 0)
    529 		goto out_delete_session;
    530 
    531 	if (rec->realtime_prio) {
    532 		struct sched_param param;
    533 
    534 		param.sched_priority = rec->realtime_prio;
    535 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
    536 			pr_err("Could not set realtime priority.\n");
    537 			err = -1;
    538 			goto out_delete_session;
    539 		}
    540 	}
    541 
    542 	/*
    543 	 * When perf is starting the traced process, all the events
    544 	 * (apart from group members) have enable_on_exec=1 set,
    545 	 * so don't spoil it by prematurely enabling them.
    546 	 */
    547 	if (!perf_target__none(&opts->target))
    548 		perf_evlist__enable(evsel_list);
    549 
    550 	/*
    551 	 * Let the child rip
    552 	 */
    553 	if (forks)
    554 		perf_evlist__start_workload(evsel_list);
    555 
    556 	for (;;) {
    557 		int hits = rec->samples;
    558 
    559 		if (perf_record__mmap_read_all(rec) < 0) {
    560 			err = -1;
    561 			goto out_delete_session;
    562 		}
    563 
    564 		if (hits == rec->samples) {
    565 			if (done)
    566 				break;
    567 			err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
    568 			waking++;
    569 		}
    570 
    571 		/*
    572 		 * When perf is starting the traced process, at the end events
    573 		 * die with the process and we wait for that. Thus no need to
    574 		 * disable events in this case.
    575 		 */
    576 		if (done && !disabled && !perf_target__none(&opts->target)) {
    577 			perf_evlist__disable(evsel_list);
    578 			disabled = true;
    579 		}
    580 	}
    581 
    582 	if (quiet || signr == SIGUSR1)
    583 		return 0;
    584 
    585 	fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
    586 
    587 	/*
    588 	 * Approximate RIP event size: 24 bytes.
    589 	 */
    590 	fprintf(stderr,
    591 		"[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
    592 		(double)rec->bytes_written / 1024.0 / 1024.0,
    593 		output_name,
    594 		rec->bytes_written / 24);
    595 
    596 	return 0;
    597 
    598 out_delete_session:
    599 	perf_session__delete(session);
    600 	return err;
    601 }
    602 
    603 #define BRANCH_OPT(n, m) \
    604 	{ .name = n, .mode = (m) }
    605 
    606 #define BRANCH_END { .name = NULL }
    607 
    608 struct branch_mode {
    609 	const char *name;
    610 	int mode;
    611 };
    612 
    613 static const struct branch_mode branch_modes[] = {
    614 	BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
    615 	BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
    616 	BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
    617 	BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
    618 	BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
    619 	BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
    620 	BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
    621 	BRANCH_END
    622 };
    623 
    624 static int
    625 parse_branch_stack(const struct option *opt, const char *str, int unset)
    626 {
    627 #define ONLY_PLM \
    628 	(PERF_SAMPLE_BRANCH_USER	|\
    629 	 PERF_SAMPLE_BRANCH_KERNEL	|\
    630 	 PERF_SAMPLE_BRANCH_HV)
    631 
    632 	uint64_t *mode = (uint64_t *)opt->value;
    633 	const struct branch_mode *br;
    634 	char *s, *os = NULL, *p;
    635 	int ret = -1;
    636 
    637 	if (unset)
    638 		return 0;
    639 
    640 	/*
    641 	 * cannot set it twice, -b + --branch-filter for instance
    642 	 */
    643 	if (*mode)
    644 		return -1;
    645 
    646 	/* str may be NULL in case no arg is passed to -b */
    647 	if (str) {
    648 		/* because str is read-only */
    649 		s = os = strdup(str);
    650 		if (!s)
    651 			return -1;
    652 
    653 		for (;;) {
    654 			p = strchr(s, ',');
    655 			if (p)
    656 				*p = '\0';
    657 
    658 			for (br = branch_modes; br->name; br++) {
    659 				if (!strcasecmp(s, br->name))
    660 					break;
    661 			}
    662 			if (!br->name) {
    663 				ui__warning("unknown branch filter %s,"
    664 					    " check man page\n", s);
    665 				goto error;
    666 			}
    667 
    668 			*mode |= br->mode;
    669 
    670 			if (!p)
    671 				break;
    672 
    673 			s = p + 1;
    674 		}
    675 	}
    676 	ret = 0;
    677 
    678 	/* default to any branch */
    679 	if ((*mode & ~ONLY_PLM) == 0) {
    680 		*mode = PERF_SAMPLE_BRANCH_ANY;
    681 	}
    682 error:
    683 	free(os);
    684 	return ret;
    685 }
    686 
    687 #ifdef LIBUNWIND_SUPPORT
    688 static int get_stack_size(char *str, unsigned long *_size)
    689 {
    690 	char *endptr;
    691 	unsigned long size;
    692 	unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
    693 
    694 	size = strtoul(str, &endptr, 0);
    695 
    696 	do {
    697 		if (*endptr)
    698 			break;
    699 
    700 		size = round_up(size, sizeof(u64));
    701 		if (!size || size > max_size)
    702 			break;
    703 
    704 		*_size = size;
    705 		return 0;
    706 
    707 	} while (0);
    708 
    709 	pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
    710 	       max_size, str);
    711 	return -1;
    712 }
    713 #endif /* LIBUNWIND_SUPPORT */
    714 
    715 int record_parse_callchain(const char *arg, struct perf_record_opts *opts)
    716 {
    717 	char *tok, *name, *saveptr = NULL;
    718 	char *buf;
    719 	int ret = -1;
    720 
    721 	/* We need buffer that we know we can write to. */
    722 	buf = malloc(strlen(arg) + 1);
    723 	if (!buf)
    724 		return -ENOMEM;
    725 
    726 	strcpy(buf, arg);
    727 
    728 	tok = strtok_r((char *)buf, ",", &saveptr);
    729 	name = tok ? : (char *)buf;
    730 
    731 	do {
    732 		/* Framepointer style */
    733 		if (!strncmp(name, "fp", sizeof("fp"))) {
    734 			if (!strtok_r(NULL, ",", &saveptr)) {
    735 				opts->call_graph = CALLCHAIN_FP;
    736 				ret = 0;
    737 			} else
    738 				pr_err("callchain: No more arguments "
    739 				       "needed for -g fp\n");
    740 			break;
    741 
    742 #ifdef LIBUNWIND_SUPPORT
    743 		/* Dwarf style */
    744 		} else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
    745 			const unsigned long default_stack_dump_size = 8192;
    746 
    747 			ret = 0;
    748 			opts->call_graph = CALLCHAIN_DWARF;
    749 			opts->stack_dump_size = default_stack_dump_size;
    750 
    751 			tok = strtok_r(NULL, ",", &saveptr);
    752 			if (tok) {
    753 				unsigned long size = 0;
    754 
    755 				ret = get_stack_size(tok, &size);
    756 				opts->stack_dump_size = size;
    757 			}
    758 #endif /* LIBUNWIND_SUPPORT */
    759 		} else {
    760 			pr_err("callchain: Unknown --call-graph option "
    761 			       "value: %s\n", arg);
    762 			break;
    763 		}
    764 
    765 	} while (0);
    766 
    767 	free(buf);
    768 	return ret;
    769 }
    770 
    771 static void callchain_debug(struct perf_record_opts *opts)
    772 {
    773 	pr_debug("callchain: type %d\n", opts->call_graph);
    774 
    775 	if (opts->call_graph == CALLCHAIN_DWARF)
    776 		pr_debug("callchain: stack dump size %d\n",
    777 			 opts->stack_dump_size);
    778 }
    779 
    780 int record_parse_callchain_opt(const struct option *opt,
    781 			       const char *arg,
    782 			       int unset)
    783 {
    784 	struct perf_record_opts *opts = opt->value;
    785 	int ret;
    786 
    787 	/* --no-call-graph */
    788 	if (unset) {
    789 		opts->call_graph = CALLCHAIN_NONE;
    790 		pr_debug("callchain: disabled\n");
    791 		return 0;
    792 	}
    793 
    794 	ret = record_parse_callchain(arg, opts);
    795 	if (!ret)
    796 		callchain_debug(opts);
    797 
    798 	return ret;
    799 }
    800 
    801 int record_callchain_opt(const struct option *opt,
    802 			 const char *arg __maybe_unused,
    803 			 int unset __maybe_unused)
    804 {
    805 	struct perf_record_opts *opts = opt->value;
    806 
    807 	if (opts->call_graph == CALLCHAIN_NONE)
    808 		opts->call_graph = CALLCHAIN_FP;
    809 
    810 	callchain_debug(opts);
    811 	return 0;
    812 }
    813 
    814 static const char * const record_usage[] = {
    815 	"perf record [<options>] [<command>]",
    816 	"perf record [<options>] -- <command> [<options>]",
    817 	NULL
    818 };
    819 
    820 /*
    821  * XXX Ideally would be local to cmd_record() and passed to a perf_record__new
    822  * because we need to have access to it in perf_record__exit, that is called
    823  * after cmd_record() exits, but since record_options need to be accessible to
    824  * builtin-script, leave it here.
    825  *
    826  * At least we don't ouch it in all the other functions here directly.
    827  *
    828  * Just say no to tons of global variables, sigh.
    829  */
    830 static struct perf_record record = {
    831 	.opts = {
    832 		.mmap_pages	     = UINT_MAX,
    833 		.user_freq	     = UINT_MAX,
    834 		.user_interval	     = ULLONG_MAX,
    835 		.freq		     = 4000,
    836 		.target		     = {
    837 			.uses_mmap   = true,
    838 		},
    839 	},
    840 };
    841 
    842 #define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace) recording: "
    843 
    844 #ifdef LIBUNWIND_SUPPORT
    845 const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf";
    846 #else
    847 const char record_callchain_help[] = CALLCHAIN_HELP "fp";
    848 #endif
    849 
    850 /*
    851  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
    852  * with it and switch to use the library functions in perf_evlist that came
    853  * from builtin-record.c, i.e. use perf_record_opts,
    854  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
    855  * using pipes, etc.
    856  */
    857 const struct option record_options[] = {
    858 	OPT_CALLBACK('e', "event", &record.evlist, "event",
    859 		     "event selector. use 'perf list' to list available events",
    860 		     parse_events_option),
    861 	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
    862 		     "event filter", parse_filter),
    863 	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
    864 		    "record events on existing process id"),
    865 	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
    866 		    "record events on existing thread id"),
    867 	OPT_INTEGER('r', "realtime", &record.realtime_prio,
    868 		    "collect data with this RT SCHED_FIFO priority"),
    869 	OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay,
    870 		    "collect data without buffering"),
    871 	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
    872 		    "collect raw sample records from all opened counters"),
    873 	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
    874 			    "system-wide collection from all CPUs"),
    875 	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
    876 		    "list of cpus to monitor"),
    877 	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
    878 	OPT_STRING('o', "output", &record.output_name, "file",
    879 		    "output file name"),
    880 	OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit,
    881 		    "child tasks do not inherit counters"),
    882 	OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
    883 	OPT_UINTEGER('m', "mmap-pages", &record.opts.mmap_pages,
    884 		     "number of mmap data pages"),
    885 	OPT_BOOLEAN(0, "group", &record.opts.group,
    886 		    "put the counters into a counter group"),
    887 	OPT_CALLBACK_NOOPT('g', NULL, &record.opts,
    888 			   NULL, "enables call-graph recording" ,
    889 			   &record_callchain_opt),
    890 	OPT_CALLBACK(0, "call-graph", &record.opts,
    891 		     "mode[,dump_size]", record_callchain_help,
    892 		     &record_parse_callchain_opt),
    893 	OPT_INCR('v', "verbose", &verbose,
    894 		    "be more verbose (show counter open errors, etc)"),
    895 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
    896 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
    897 		    "per thread counts"),
    898 	OPT_BOOLEAN('d', "data", &record.opts.sample_address,
    899 		    "Sample addresses"),
    900 	OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
    901 	OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
    902 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
    903 		    "don't sample"),
    904 	OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
    905 		    "do not update the buildid cache"),
    906 	OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
    907 		    "do not collect buildids in perf.data"),
    908 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
    909 		     "monitor event in cgroup name only",
    910 		     parse_cgroups),
    911 	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
    912 		   "user to profile"),
    913 
    914 	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
    915 		     "branch any", "sample any taken branches",
    916 		     parse_branch_stack),
    917 
    918 	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
    919 		     "branch filter mask", "branch stack filter modes",
    920 		     parse_branch_stack),
    921 	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
    922 		    "sample by weight (on special events only)"),
    923 	OPT_END()
    924 };
    925 
    926 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
    927 {
    928 	int err = -ENOMEM;
    929 	struct perf_evlist *evsel_list;
    930 	struct perf_record *rec = &record;
    931 	char errbuf[BUFSIZ];
    932 
    933 	evsel_list = perf_evlist__new();
    934 	if (evsel_list == NULL)
    935 		return -ENOMEM;
    936 
    937 	rec->evlist = evsel_list;
    938 
    939 	argc = parse_options(argc, argv, record_options, record_usage,
    940 			    PARSE_OPT_STOP_AT_NON_OPTION);
    941 	if (!argc && perf_target__none(&rec->opts.target))
    942 		usage_with_options(record_usage, record_options);
    943 
    944 	if (nr_cgroups && !rec->opts.target.system_wide) {
    945 		ui__error("cgroup monitoring only available in"
    946 			  " system-wide mode\n");
    947 		usage_with_options(record_usage, record_options);
    948 	}
    949 
    950 	symbol__init();
    951 
    952 	if (symbol_conf.kptr_restrict)
    953 		pr_warning(
    954 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
    955 "check /proc/sys/kernel/kptr_restrict.\n\n"
    956 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
    957 "file is not found in the buildid cache or in the vmlinux path.\n\n"
    958 "Samples in kernel modules won't be resolved at all.\n\n"
    959 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
    960 "even with a suitable vmlinux or kallsyms file.\n\n");
    961 
    962 	if (rec->no_buildid_cache || rec->no_buildid)
    963 		disable_buildid_cache();
    964 
    965 	if (evsel_list->nr_entries == 0 &&
    966 	    perf_evlist__add_default(evsel_list) < 0) {
    967 		pr_err("Not enough memory for event selector list\n");
    968 		goto out_symbol_exit;
    969 	}
    970 
    971 	err = perf_target__validate(&rec->opts.target);
    972 	if (err) {
    973 		perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
    974 		ui__warning("%s", errbuf);
    975 	}
    976 
    977 	err = perf_target__parse_uid(&rec->opts.target);
    978 	if (err) {
    979 		int saved_errno = errno;
    980 
    981 		perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
    982 		ui__error("%s", errbuf);
    983 
    984 		err = -saved_errno;
    985 		goto out_symbol_exit;
    986 	}
    987 
    988 	err = -ENOMEM;
    989 	if (perf_evlist__create_maps(evsel_list, &rec->opts.target) < 0)
    990 		usage_with_options(record_usage, record_options);
    991 
    992 	if (rec->opts.user_interval != ULLONG_MAX)
    993 		rec->opts.default_interval = rec->opts.user_interval;
    994 	if (rec->opts.user_freq != UINT_MAX)
    995 		rec->opts.freq = rec->opts.user_freq;
    996 
    997 	/*
    998 	 * User specified count overrides default frequency.
    999 	 */
   1000 	if (rec->opts.default_interval)
   1001 		rec->opts.freq = 0;
   1002 	else if (rec->opts.freq) {
   1003 		rec->opts.default_interval = rec->opts.freq;
   1004 	} else {
   1005 		ui__error("frequency and count are zero, aborting\n");
   1006 		err = -EINVAL;
   1007 		goto out_free_fd;
   1008 	}
   1009 
   1010 	err = __cmd_record(&record, argc, argv);
   1011 
   1012 	perf_evlist__munmap(evsel_list);
   1013 	perf_evlist__close(evsel_list);
   1014 out_free_fd:
   1015 	perf_evlist__delete_maps(evsel_list);
   1016 out_symbol_exit:
   1017 	symbol__exit();
   1018 	return err;
   1019 }
   1020