Home | History | Annotate | Download | only in strace
      1 /*
      2  * Copyright (c) 2013 Ben Noordhuis <info (at) bnoordhuis.nl>
      3  * Copyright (c) 2013-2015 Dmitry V. Levin <ldv (at) altlinux.org>
      4  * Copyright (c) 2016 Eugene Syromyatnikov <evgsyr (at) gmail.com>
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. The name of the author may not be used to endorse or promote products
     16  *    derived from this software without specific prior written permission.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     28  */
     29 
     30 #include "defs.h"
     31 
     32 #include "perf_event_struct.h"
     33 
     34 #include "xlat/clocknames.h"
     35 #include "xlat/hw_breakpoint_len.h"
     36 #include "xlat/hw_breakpoint_type.h"
     37 #include "xlat/perf_attr_size.h"
     38 #include "xlat/perf_branch_sample_type.h"
     39 #include "xlat/perf_event_open_flags.h"
     40 #include "xlat/perf_event_read_format.h"
     41 #include "xlat/perf_event_sample_format.h"
     42 #include "xlat/perf_hw_cache_id.h"
     43 #include "xlat/perf_hw_cache_op_id.h"
     44 #include "xlat/perf_hw_cache_op_result_id.h"
     45 #include "xlat/perf_hw_id.h"
     46 #include "xlat/perf_sw_ids.h"
     47 #include "xlat/perf_type_id.h"
     48 
     49 struct pea_desc {
     50 	struct perf_event_attr *attr;
     51 	uint32_t size;
     52 };
     53 
     54 static void
     55 free_pea_desc(void *pea_desc_ptr)
     56 {
     57 	struct pea_desc *desc = pea_desc_ptr;
     58 
     59 	free(desc->attr);
     60 	free(desc);
     61 }
     62 
     63 static int
     64 fetch_perf_event_attr(struct tcb *const tcp, const kernel_ulong_t addr)
     65 {
     66 	struct pea_desc *desc;
     67 	struct perf_event_attr *attr;
     68 	uint32_t size;
     69 
     70 	if (umove(tcp, addr + offsetof(struct perf_event_attr, size), &size)) {
     71 		printaddr(addr);
     72 		return 1;
     73 	}
     74 
     75 	if (size > sizeof(*attr))
     76 		size = sizeof(*attr);
     77 
     78 	if (!size)
     79 		size = PERF_ATTR_SIZE_VER0;
     80 
     81 	/*
     82 	 * Kernel (rightfully) deems invalid attribute structures with size less
     83 	 * than first published format size, and we do the same.
     84 	 */
     85 	if (size < PERF_ATTR_SIZE_VER0) {
     86 		printaddr(addr);
     87 		return 1;
     88 	}
     89 
     90 	if (abbrev(tcp))
     91 		size = offsetofend(struct perf_event_attr, config);
     92 
     93 	/* Size should be multiple of 8, but kernel doesn't check for it */
     94 	/* size &= ~7; */
     95 
     96 	attr = xcalloc(1, sizeof(*attr));
     97 
     98 	if (umoven_or_printaddr(tcp, addr, size, attr)) {
     99 		free(attr);
    100 
    101 		return 1;
    102 	}
    103 
    104 	desc = xmalloc(sizeof(*desc));
    105 
    106 	desc->attr = attr;
    107 	desc->size = size;
    108 
    109 	set_tcb_priv_data(tcp, desc, free_pea_desc);
    110 
    111 	return 0;
    112 }
    113 
    114 #define PRINT_XLAT(prefix, xlat, x, dflt) \
    115 	do { \
    116 		tprints(prefix); \
    117 		printxval_search(xlat, x, dflt); \
    118 	} while (0)
    119 
    120 static void
    121 print_perf_event_attr(struct tcb *const tcp, const kernel_ulong_t addr)
    122 {
    123 	static const char *precise_ip_desc[] = {
    124 		"arbitrary skid",
    125 		"constant skid",
    126 		"requested to have 0 skid",
    127 		"must have 0 skid",
    128 	};
    129 
    130 	struct pea_desc *desc;
    131 	struct perf_event_attr *attr;
    132 	uint32_t size;
    133 	uint32_t new_size;
    134 	int use_new_size = 0;
    135 
    136 	/*
    137 	 * Amusingly, kernel accepts structures with only part of the field
    138 	 * present, so we making check like this (instead of checking
    139 	 * offsetofend against size) in order to print fields as kernel sees
    140 	 * them. This also should work great on big endian architectures.
    141 	 */
    142 	#define _PERF_CHECK_FIELD(_field) \
    143 		do { \
    144 			if (offsetof(struct perf_event_attr, _field) >= size) \
    145 				goto print_perf_event_attr_out; \
    146 		} while (0)
    147 
    148 	desc = get_tcb_priv_data(tcp);
    149 
    150 	attr = desc->attr;
    151 	size = desc->size;
    152 
    153 	/* The only error which expected to change size field currently */
    154 	if (tcp->u_error == E2BIG) {
    155 		if (umove(tcp, addr + offsetof(struct perf_event_attr, size),
    156 		    &new_size))
    157 			use_new_size = -1;
    158 		else
    159 			use_new_size = 1;
    160 	}
    161 
    162 	PRINT_XLAT("{type=", perf_type_id, attr->type, "PERF_TYPE_???");
    163 	tprintf(", size=");
    164 	printxval(perf_attr_size, attr->size, "PERF_ATTR_SIZE_???");
    165 
    166 	if (use_new_size) {
    167 		tprints(" => ");
    168 
    169 		if (use_new_size > 0)
    170 			printxval(perf_attr_size, new_size,
    171 			          "PERF_ATTR_SIZE_???");
    172 		else
    173 			tprints("???");
    174 	}
    175 
    176 	switch (attr->type) {
    177 	case PERF_TYPE_HARDWARE:
    178 		PRINT_XLAT(", config=", perf_hw_id, attr->config,
    179 		           "PERF_COUNT_HW_???");
    180 		break;
    181 	case PERF_TYPE_SOFTWARE:
    182 		PRINT_XLAT(", config=", perf_sw_ids, attr->config,
    183 		           "PERF_COUNT_SW_???");
    184 		break;
    185 	case PERF_TYPE_TRACEPOINT:
    186 		/*
    187 		 * "The value to use in config can be obtained from under
    188 		 * debugfs tracing/events/../../id if ftrace is enabled in the
    189                  * kernel."
    190 		 */
    191 		tprintf(", config=%" PRIu64, attr->config);
    192 		break;
    193 	case PERF_TYPE_HW_CACHE:
    194 		/*
    195 		 * (perf_hw_cache_id) | (perf_hw_cache_op_id << 8) |
    196 		 * (perf_hw_cache_op_result_id << 16)
    197 		 */
    198 		PRINT_XLAT(", config=", perf_hw_cache_id, attr->config & 0xFF,
    199 		           "PERF_COUNT_HW_CACHE_???");
    200 		PRINT_XLAT("|", perf_hw_cache_op_id, (attr->config >> 8) & 0xFF,
    201 		           "PERF_COUNT_HW_CACHE_OP_???");
    202 		/*
    203 		 * Current code (see set_ext_hw_attr in arch/x86/events/core.c,
    204 		 * tile_map_cache_event in arch/tile/kernel/perf_event.c,
    205 		 * arc_pmu_cache_event in arch/arc/kernel/perf_event.c,
    206 		 * hw_perf_cache_event in arch/blackfin/kernel/perf_event.c,
    207 		 * _hw_perf_cache_event in arch/metag/kernel/perf/perf_event.c,
    208 		 * mipspmu_map_cache_event in arch/mips/kernel/perf_event_mipsxx.c,
    209 		 * hw_perf_cache_event in arch/powerpc/perf/core-book3s.c,
    210 		 * hw_perf_cache_event in arch/powerpc/perf/core-fsl-emb.c,
    211 		 * hw_perf_cache_event in arch/sh/kernel/perf_event.c,
    212 		 * sparc_map_cache_event in arch/sparc/kernel/perf_event.c,
    213 		 * xtensa_pmu_cache_event in arch/xtensa/kernel/perf_event.c,
    214 		 * armpmu_map_cache_event in drivers/perf/arm_pmu.c) assumes
    215 		 * that cache result is 8 bits in size.
    216 		 */
    217 		PRINT_XLAT("<<8|", perf_hw_cache_op_result_id,
    218 		           (attr->config >> 16) & 0xFF,
    219 		           "PERF_COUNT_HW_CACHE_RESULT_???");
    220 		tprintf("<<16");
    221 		if (attr->config >> 24)
    222 			tprintf("|%#" PRIx64 "<<24 "
    223 			        "/* PERF_COUNT_HW_CACHE_??? */",
    224 			        attr->config >> 24);
    225 		break;
    226 	case PERF_TYPE_RAW:
    227 		/*
    228 		 * "If type is PERF_TYPE_RAW, then a custom "raw" config
    229 		 * value is needed. Most CPUs support events that are not
    230 		 * covered by the "generalized" events. These are
    231 		 * implementation defined; see your CPU manual (for example the
    232 		 * Intel Volume 3B documentation or the AMD BIOS and Kernel
    233 		 * Developer Guide). The libpfm4 library can be used to
    234 		 * translate from the name in the architectural manuals
    235 		 * to the raw hex value perf_event_open() expects in this
    236 		 * field."
    237 		 */
    238 	case PERF_TYPE_BREAKPOINT:
    239 		/*
    240 		 * "If type is PERF_TYPE_BREAKPOINT, then leave config set
    241 		 * to zero. Its parameters are set in other places."
    242 		 */
    243 	default:
    244 		tprintf(", config=%#" PRIx64, attr->config);
    245 		break;
    246 	}
    247 
    248 	if (abbrev(tcp))
    249 		goto print_perf_event_attr_out;
    250 
    251 	if (attr->freq)
    252 		tprintf(", sample_freq=%" PRIu64, attr->sample_freq);
    253 	else
    254 		tprintf(", sample_period=%" PRIu64, attr->sample_period);
    255 
    256 	tprintf(", sample_type=");
    257 	printflags64(perf_event_sample_format, attr->sample_type,
    258 		"PERF_SAMPLE_???");
    259 
    260 	tprintf(", read_format=");
    261 	printflags64(perf_event_read_format, attr->read_format,
    262 		"PERF_FORMAT_???");
    263 
    264 	tprintf(", disabled=%u"
    265 	        ", inherit=%u"
    266 	        ", pinned=%u"
    267 	        ", exclusive=%u"
    268 	        ", exclusive_user=%u"
    269 	        ", exclude_kernel=%u"
    270 	        ", exclude_hv=%u"
    271 	        ", exclude_idle=%u"
    272 	        ", mmap=%u"
    273 	        ", comm=%u"
    274 	        ", freq=%u"
    275 	        ", inherit_stat=%u"
    276 	        ", enable_on_exec=%u"
    277 	        ", task=%u"
    278 	        ", watermark=%u"
    279 	        ", precise_ip=%u /* %s */"
    280 	        ", mmap_data=%u"
    281 	        ", sample_id_all=%u"
    282 	        ", exclude_host=%u"
    283 	        ", exclude_guest=%u"
    284 	        ", exclude_callchain_kernel=%u"
    285 	        ", exclude_callchain_user=%u"
    286 	        ", mmap2=%u"
    287 	        ", comm_exec=%u"
    288 	        ", use_clockid=%u"
    289 	        ", context_switch=%u"
    290 	        ", write_backward=%u",
    291 	        attr->disabled,
    292 	        attr->inherit,
    293 	        attr->pinned,
    294 	        attr->exclusive,
    295 	        attr->exclude_user,
    296 	        attr->exclude_kernel,
    297 	        attr->exclude_hv,
    298 	        attr->exclude_idle,
    299 	        attr->mmap,
    300 	        attr->comm,
    301 	        attr->freq,
    302 	        attr->inherit_stat,
    303 	        attr->enable_on_exec,
    304 	        attr->task,
    305 	        attr->watermark,
    306 	        attr->precise_ip, precise_ip_desc[attr->precise_ip],
    307 	        attr->mmap_data,
    308 	        attr->sample_id_all,
    309 	        attr->exclude_host,
    310 	        attr->exclude_guest,
    311 	        attr->exclude_callchain_kernel,
    312 	        attr->exclude_callchain_user,
    313 	        attr->mmap2,
    314 	        attr->comm_exec,
    315 	        attr->use_clockid,
    316 	        attr->context_switch,
    317 	        attr->write_backward);
    318 
    319 	/*
    320 	 * Print it only in case it is non-zero, since it may contain flags we
    321 	 * are not aware about.
    322 	 */
    323 	if (attr->__reserved_1)
    324 		tprintf(", __reserved_1=%#" PRIx64 " /* Bits 63..28 */",
    325 		        (uint64_t) attr->__reserved_1);
    326 
    327 	if (attr->watermark)
    328 		tprintf(", wakeup_watermark=%u", attr->wakeup_watermark);
    329 	else
    330 		tprintf(", wakeup_events=%u", attr->wakeup_events);
    331 
    332 	if (attr->type == PERF_TYPE_BREAKPOINT)
    333 		/* Any combination of R/W with X is deemed invalid */
    334 		PRINT_XLAT(", bp_type=", hw_breakpoint_type, attr->bp_type,
    335 		           (attr->bp_type <=
    336 		                   (HW_BREAKPOINT_X | HW_BREAKPOINT_RW)) ?
    337 		                           "HW_BREAKPOINT_INVALID" :
    338 		                           "HW_BREAKPOINT_???");
    339 
    340 	if (attr->type == PERF_TYPE_BREAKPOINT)
    341 		tprintf(", bp_addr=%#" PRIx64, attr->bp_addr);
    342 	else
    343 		tprintf(", config1=%#" PRIx64, attr->config1);
    344 
    345 	/*
    346 	 * Fields after bp_addr/config1 are optional and may not present; check
    347 	 * against size is needed.
    348 	 */
    349 
    350 	_PERF_CHECK_FIELD(bp_len);
    351 	if (attr->type == PERF_TYPE_BREAKPOINT)
    352 		tprintf(", bp_len=%" PRIu64, attr->bp_len);
    353 	else
    354 		tprintf(", config2=%#" PRIx64, attr->config2);
    355 
    356 	_PERF_CHECK_FIELD(branch_sample_type);
    357 	if (attr->sample_type & PERF_SAMPLE_BRANCH_STACK) {
    358 		tprintf(", branch_sample_type=");
    359 		printflags64(perf_branch_sample_type, attr->branch_sample_type,
    360 		             "PERF_SAMPLE_BRANCH_???");
    361 	}
    362 
    363 	_PERF_CHECK_FIELD(sample_regs_user);
    364 	/*
    365 	 * "This bit mask defines the set of user CPU registers to dump on
    366 	 * samples. The layout of the register mask is architecture-specific and
    367 	 * described in the kernel header
    368 	 * arch/ARCH/include/uapi/asm/perf_regs.h."
    369 	 */
    370 	tprintf(", sample_regs_user=%#" PRIx64, attr->sample_regs_user);
    371 
    372 	_PERF_CHECK_FIELD(sample_stack_user);
    373 	/*
    374 	 * "size of the user stack to dump if PERF_SAMPLE_STACK_USER is
    375 	 * specified."
    376 	 */
    377 	if (attr->sample_type & PERF_SAMPLE_STACK_USER)
    378 		tprintf(", sample_stack_user=%#" PRIx32,
    379 		        attr->sample_stack_user);
    380 
    381 	if (attr->use_clockid) {
    382 		_PERF_CHECK_FIELD(clockid);
    383 		tprintf(", clockid=");
    384 		printxval(clocknames, attr->clockid, "CLOCK_???");
    385 	}
    386 
    387 	_PERF_CHECK_FIELD(sample_regs_intr);
    388 	tprintf(", sample_regs_intr=%#" PRIx64, attr->sample_regs_intr);
    389 
    390 	_PERF_CHECK_FIELD(aux_watermark);
    391 	tprintf(", aux_watermark=%" PRIu32, attr->aux_watermark);
    392 
    393 	_PERF_CHECK_FIELD(sample_max_stack);
    394 	tprintf(", sample_max_stack=%" PRIu16, attr->sample_max_stack);
    395 
    396 	/* _PERF_CHECK_FIELD(__reserved_2);
    397 	tprintf(", __reserved2=%" PRIu16, attr->__reserved_2); */
    398 
    399 print_perf_event_attr_out:
    400 	if ((attr->size && (attr->size > size)) ||
    401 	    (!attr->size && (size < PERF_ATTR_SIZE_VER0)))
    402 		tprintf(", ...");
    403 
    404 	tprintf("}");
    405 }
    406 
    407 SYS_FUNC(perf_event_open)
    408 {
    409 	/*
    410 	 * We try to copy out the whole structure on entering in order to check
    411 	 * size value on exiting. We do not check the rest of the fields because
    412 	 * they shouldn't be changed, but copy the whole structure instead
    413 	 * of just size field because they could.
    414 	 */
    415 	if (entering(tcp)) {
    416 		if (!fetch_perf_event_attr(tcp, tcp->u_arg[0]))
    417 			return 0;
    418 	} else {
    419 		print_perf_event_attr(tcp, tcp->u_arg[0]);
    420 	}
    421 
    422 	tprintf(", %d, %d, %d, ",
    423 		(int) tcp->u_arg[1],
    424 		(int) tcp->u_arg[2],
    425 		(int) tcp->u_arg[3]);
    426 	printflags64(perf_event_open_flags, tcp->u_arg[4], "PERF_FLAG_???");
    427 
    428 	return RVAL_DECODED | RVAL_FD;
    429 }
    430