Home | History | Annotate | Download | only in strace
      1 /*
      2  * Copyright (c) 2013 Ben Noordhuis <info (at) bnoordhuis.nl>
      3  * Copyright (c) 2013-2015 Dmitry V. Levin <ldv (at) altlinux.org>
      4  * Copyright (c) 2016 Eugene Syromyatnikov <evgsyr (at) gmail.com>
      5  * Copyright (c) 2015-2018 The strace developers.
      6  * All rights reserved.
      7  *
      8  * Redistribution and use in source and binary forms, with or without
      9  * modification, are permitted provided that the following conditions
     10  * are met:
     11  * 1. Redistributions of source code must retain the above copyright
     12  *    notice, this list of conditions and the following disclaimer.
     13  * 2. Redistributions in binary form must reproduce the above copyright
     14  *    notice, this list of conditions and the following disclaimer in the
     15  *    documentation and/or other materials provided with the distribution.
     16  * 3. The name of the author may not be used to endorse or promote products
     17  *    derived from this software without specific prior written permission.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     20  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     21  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     22  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     24  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     28  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29  */
     30 
     31 #include "defs.h"
     32 
     33 #include "perf_event_struct.h"
     34 #include "print_fields.h"
     35 
     36 #include "xlat/hw_breakpoint_len.h"
     37 #include "xlat/hw_breakpoint_type.h"
     38 #include "xlat/perf_attr_size.h"
     39 #include "xlat/perf_branch_sample_type.h"
     40 #include "xlat/perf_event_open_flags.h"
     41 #include "xlat/perf_event_read_format.h"
     42 #include "xlat/perf_event_sample_format.h"
     43 #include "xlat/perf_hw_cache_id.h"
     44 #include "xlat/perf_hw_cache_op_id.h"
     45 #include "xlat/perf_hw_cache_op_result_id.h"
     46 #include "xlat/perf_hw_id.h"
     47 #include "xlat/perf_sw_ids.h"
     48 #include "xlat/perf_type_id.h"
     49 
     50 struct pea_desc {
     51 	struct perf_event_attr *attr;
     52 	uint32_t size;
     53 };
     54 
     55 static void
     56 free_pea_desc(void *pea_desc_ptr)
     57 {
     58 	struct pea_desc *desc = pea_desc_ptr;
     59 
     60 	free(desc->attr);
     61 	free(desc);
     62 }
     63 
     64 int
     65 fetch_perf_event_attr(struct tcb *const tcp, const kernel_ulong_t addr)
     66 {
     67 	struct pea_desc *desc;
     68 	struct perf_event_attr *attr;
     69 	uint32_t size;
     70 
     71 	if (umove(tcp, addr + offsetof(struct perf_event_attr, size), &size)) {
     72 		printaddr(addr);
     73 		return 1;
     74 	}
     75 
     76 	if (size > sizeof(*attr))
     77 		size = sizeof(*attr);
     78 
     79 	if (!size)
     80 		size = PERF_ATTR_SIZE_VER0;
     81 
     82 	/*
     83 	 * Kernel (rightfully) deems invalid attribute structures with size less
     84 	 * than first published format size, and we do the same.
     85 	 */
     86 	if (size < PERF_ATTR_SIZE_VER0) {
     87 		printaddr(addr);
     88 		return 1;
     89 	}
     90 
     91 	if (abbrev(tcp))
     92 		size = offsetofend(struct perf_event_attr, config);
     93 
     94 	/* Size should be multiple of 8, but kernel doesn't check for it */
     95 	/* size &= ~7; */
     96 
     97 	attr = xcalloc(1, sizeof(*attr));
     98 
     99 	if (umoven_or_printaddr(tcp, addr, size, attr)) {
    100 		free(attr);
    101 
    102 		return 1;
    103 	}
    104 
    105 	desc = xmalloc(sizeof(*desc));
    106 
    107 	desc->attr = attr;
    108 	desc->size = size;
    109 
    110 	set_tcb_priv_data(tcp, desc, free_pea_desc);
    111 
    112 	return 0;
    113 }
    114 
    115 void
    116 print_perf_event_attr(struct tcb *const tcp, const kernel_ulong_t addr)
    117 {
    118 	static const char *precise_ip_desc[] = {
    119 		"arbitrary skid",
    120 		"constant skid",
    121 		"requested to have 0 skid",
    122 		"must have 0 skid",
    123 	};
    124 
    125 	struct pea_desc *desc;
    126 	struct perf_event_attr *attr;
    127 	uint32_t size;
    128 	uint32_t new_size;
    129 	int use_new_size = 0;
    130 
    131 	/*
    132 	 * Amusingly, kernel accepts structures with only part of the field
    133 	 * present, so we making check like this (instead of checking
    134 	 * offsetofend against size) in order to print fields as kernel sees
    135 	 * them. This also should work great on big endian architectures.
    136 	 */
    137 	#define _PERF_CHECK_FIELD(_field) \
    138 		do { \
    139 			if (offsetof(struct perf_event_attr, _field) >= size) \
    140 				goto print_perf_event_attr_out; \
    141 		} while (0)
    142 
    143 	desc = get_tcb_priv_data(tcp);
    144 
    145 	attr = desc->attr;
    146 	size = desc->size;
    147 
    148 	/* The only error which expected to change size field currently */
    149 	if (tcp->u_error == E2BIG) {
    150 		if (umove(tcp, addr + offsetof(struct perf_event_attr, size),
    151 		    &new_size))
    152 			use_new_size = -1;
    153 		else
    154 			use_new_size = 1;
    155 	}
    156 
    157 	PRINT_FIELD_XVAL("{", *attr, type, perf_type_id, "PERF_TYPE_???");
    158 	PRINT_FIELD_XVAL(", ", *attr, size, perf_attr_size,
    159 			 "PERF_ATTR_SIZE_???");
    160 
    161 	if (use_new_size) {
    162 		tprints(" => ");
    163 
    164 		if (use_new_size > 0)
    165 			printxval(perf_attr_size, new_size,
    166 				  "PERF_ATTR_SIZE_???");
    167 		else
    168 			tprints("???");
    169 	}
    170 
    171 	switch (attr->type) {
    172 	case PERF_TYPE_HARDWARE:
    173 		PRINT_FIELD_XVAL(", ", *attr, config, perf_hw_id,
    174 				 "PERF_COUNT_HW_???");
    175 		break;
    176 	case PERF_TYPE_SOFTWARE:
    177 		PRINT_FIELD_XVAL(", ", *attr, config, perf_sw_ids,
    178 				 "PERF_COUNT_SW_???");
    179 		break;
    180 	case PERF_TYPE_TRACEPOINT:
    181 		/*
    182 		 * "The value to use in config can be obtained from under
    183 		 * debugfs tracing/events/../../id if ftrace is enabled
    184 		 * in the kernel."
    185 		 */
    186 		PRINT_FIELD_U(", ", *attr, config);
    187 		break;
    188 	case PERF_TYPE_HW_CACHE:
    189 		/*
    190 		 * (perf_hw_cache_id) | (perf_hw_cache_op_id << 8) |
    191 		 * (perf_hw_cache_op_result_id << 16)
    192 		 */
    193 		tprints(", config=");
    194 		printxval(perf_hw_cache_id, attr->config & 0xFF,
    195 			  "PERF_COUNT_HW_CACHE_???");
    196 		tprints("|");
    197 		printxval(perf_hw_cache_op_id, (attr->config >> 8) & 0xFF,
    198 			   "PERF_COUNT_HW_CACHE_OP_???");
    199 		tprints("<<8|");
    200 		/*
    201 		 * Current code (see set_ext_hw_attr in arch/x86/events/core.c,
    202 		 * tile_map_cache_event in arch/tile/kernel/perf_event.c,
    203 		 * arc_pmu_cache_event in arch/arc/kernel/perf_event.c,
    204 		 * hw_perf_cache_event in arch/blackfin/kernel/perf_event.c,
    205 		 * _hw_perf_cache_event in arch/metag/kernel/perf/perf_event.c,
    206 		 * mipspmu_map_cache_event in arch/mips/kernel/perf_event_mipsxx.c,
    207 		 * hw_perf_cache_event in arch/powerpc/perf/core-book3s.c,
    208 		 * hw_perf_cache_event in arch/powerpc/perf/core-fsl-emb.c,
    209 		 * hw_perf_cache_event in arch/sh/kernel/perf_event.c,
    210 		 * sparc_map_cache_event in arch/sparc/kernel/perf_event.c,
    211 		 * xtensa_pmu_cache_event in arch/xtensa/kernel/perf_event.c,
    212 		 * armpmu_map_cache_event in drivers/perf/arm_pmu.c) assumes
    213 		 * that cache result is 8 bits in size.
    214 		 */
    215 		printxval(perf_hw_cache_op_result_id,
    216 			  (attr->config >> 16) & 0xFF,
    217 			  "PERF_COUNT_HW_CACHE_RESULT_???");
    218 		tprints("<<16");
    219 		if (attr->config >> 24) {
    220 			tprintf("|%#" PRIx64 "<<24", attr->config >> 24);
    221 			tprints_comment("PERF_COUNT_HW_CACHE_???");
    222 		}
    223 		break;
    224 	case PERF_TYPE_RAW:
    225 		/*
    226 		 * "If type is PERF_TYPE_RAW, then a custom "raw" config
    227 		 * value is needed. Most CPUs support events that are not
    228 		 * covered by the "generalized" events. These are
    229 		 * implementation defined; see your CPU manual (for example the
    230 		 * Intel Volume 3B documentation or the AMD BIOS and Kernel
    231 		 * Developer Guide). The libpfm4 library can be used to
    232 		 * translate from the name in the architectural manuals
    233 		 * to the raw hex value perf_event_open() expects in this
    234 		 * field."
    235 		 */
    236 	case PERF_TYPE_BREAKPOINT:
    237 		/*
    238 		 * "If type is PERF_TYPE_BREAKPOINT, then leave config set
    239 		 * to zero. Its parameters are set in other places."
    240 		 */
    241 	default:
    242 		PRINT_FIELD_X(", ", *attr, config);
    243 		break;
    244 	}
    245 
    246 	if (abbrev(tcp))
    247 		goto print_perf_event_attr_out;
    248 
    249 	if (attr->freq)
    250 		PRINT_FIELD_U(", ", *attr, sample_freq);
    251 	else
    252 		PRINT_FIELD_U(", ", *attr, sample_period);
    253 
    254 	PRINT_FIELD_FLAGS(", ", *attr, sample_type, perf_event_sample_format,
    255 			  "PERF_SAMPLE_???");
    256 	PRINT_FIELD_FLAGS(", ", *attr, read_format, perf_event_read_format,
    257 			  "PERF_FORMAT_???");
    258 
    259 	tprintf(", disabled=%u"
    260 		", inherit=%u"
    261 		", pinned=%u"
    262 		", exclusive=%u"
    263 		", exclusive_user=%u"
    264 		", exclude_kernel=%u"
    265 		", exclude_hv=%u"
    266 		", exclude_idle=%u"
    267 		", mmap=%u"
    268 		", comm=%u"
    269 		", freq=%u"
    270 		", inherit_stat=%u"
    271 		", enable_on_exec=%u"
    272 		", task=%u"
    273 		", watermark=%u"
    274 		", precise_ip=%u",
    275 		attr->disabled,
    276 		attr->inherit,
    277 		attr->pinned,
    278 		attr->exclusive,
    279 		attr->exclude_user,
    280 		attr->exclude_kernel,
    281 		attr->exclude_hv,
    282 		attr->exclude_idle,
    283 		attr->mmap,
    284 		attr->comm,
    285 		attr->freq,
    286 		attr->inherit_stat,
    287 		attr->enable_on_exec,
    288 		attr->task,
    289 		attr->watermark,
    290 		attr->precise_ip);
    291 	tprints_comment(precise_ip_desc[attr->precise_ip]);
    292 	tprintf(", mmap_data=%u"
    293 		", sample_id_all=%u"
    294 		", exclude_host=%u"
    295 		", exclude_guest=%u"
    296 		", exclude_callchain_kernel=%u"
    297 		", exclude_callchain_user=%u"
    298 		", mmap2=%u"
    299 		", comm_exec=%u"
    300 		", use_clockid=%u"
    301 		", context_switch=%u"
    302 		", write_backward=%u"
    303 		", namespaces=%u",
    304 		attr->mmap_data,
    305 		attr->sample_id_all,
    306 		attr->exclude_host,
    307 		attr->exclude_guest,
    308 		attr->exclude_callchain_kernel,
    309 		attr->exclude_callchain_user,
    310 		attr->mmap2,
    311 		attr->comm_exec,
    312 		attr->use_clockid,
    313 		attr->context_switch,
    314 		attr->write_backward,
    315 		attr->namespaces);
    316 
    317 	/*
    318 	 * Print it only in case it is non-zero, since it may contain flags we
    319 	 * are not aware about.
    320 	 */
    321 	if (attr->__reserved_1) {
    322 		tprintf(", __reserved_1=%#" PRIx64,
    323 			(uint64_t) attr->__reserved_1);
    324 		tprints_comment("Bits 63..29");
    325 	}
    326 
    327 	if (attr->watermark)
    328 		PRINT_FIELD_U(", ", *attr, wakeup_watermark);
    329 	else
    330 		PRINT_FIELD_U(", ", *attr, wakeup_events);
    331 
    332 	if (attr->type == PERF_TYPE_BREAKPOINT)
    333 		/* Any combination of R/W with X is deemed invalid */
    334 		PRINT_FIELD_XVAL(", ", *attr, bp_type, hw_breakpoint_type,
    335 				 (attr->bp_type <=
    336 					(HW_BREAKPOINT_X | HW_BREAKPOINT_RW))
    337 						? "HW_BREAKPOINT_INVALID"
    338 						: "HW_BREAKPOINT_???");
    339 
    340 	if (attr->type == PERF_TYPE_BREAKPOINT)
    341 		PRINT_FIELD_X(", ", *attr, bp_addr);
    342 	else
    343 		PRINT_FIELD_X(", ", *attr, config1);
    344 
    345 	/*
    346 	 * Fields after bp_addr/config1 are optional and may not present; check
    347 	 * against size is needed.
    348 	 */
    349 
    350 	_PERF_CHECK_FIELD(bp_len);
    351 	if (attr->type == PERF_TYPE_BREAKPOINT)
    352 		PRINT_FIELD_U(", ", *attr, bp_len);
    353 	else
    354 		PRINT_FIELD_X(", ", *attr, config2);
    355 
    356 	_PERF_CHECK_FIELD(branch_sample_type);
    357 	if (attr->sample_type & PERF_SAMPLE_BRANCH_STACK) {
    358 		PRINT_FIELD_FLAGS(", ", *attr, branch_sample_type,
    359 				  perf_branch_sample_type,
    360 				  "PERF_SAMPLE_BRANCH_???");
    361 	}
    362 
    363 	_PERF_CHECK_FIELD(sample_regs_user);
    364 	/*
    365 	 * "This bit mask defines the set of user CPU registers to dump on
    366 	 * samples. The layout of the register mask is architecture-specific and
    367 	 * described in the kernel header
    368 	 * arch/ARCH/include/uapi/asm/perf_regs.h."
    369 	 */
    370 	PRINT_FIELD_X(", ", *attr, sample_regs_user);
    371 
    372 	_PERF_CHECK_FIELD(sample_stack_user);
    373 	/*
    374 	 * "size of the user stack to dump if PERF_SAMPLE_STACK_USER is
    375 	 * specified."
    376 	 */
    377 	if (attr->sample_type & PERF_SAMPLE_STACK_USER)
    378 		PRINT_FIELD_X(", ", *attr, sample_stack_user);
    379 
    380 	if (attr->use_clockid) {
    381 		_PERF_CHECK_FIELD(clockid);
    382 		PRINT_FIELD_XVAL(", ", *attr, clockid, clocknames, "CLOCK_???");
    383 	}
    384 
    385 	_PERF_CHECK_FIELD(sample_regs_intr);
    386 	PRINT_FIELD_X(", ", *attr, sample_regs_intr);
    387 
    388 	_PERF_CHECK_FIELD(aux_watermark);
    389 	PRINT_FIELD_U(", ", *attr, aux_watermark);
    390 
    391 	_PERF_CHECK_FIELD(sample_max_stack);
    392 	PRINT_FIELD_U(", ", *attr, sample_max_stack);
    393 
    394 	/* _PERF_CHECK_FIELD(__reserved_2);
    395 	PRINT_FIELD_U(", ", *attr, __reserved2); */
    396 
    397 print_perf_event_attr_out:
    398 	if ((attr->size && (attr->size > size)) ||
    399 	    (!attr->size && (size < PERF_ATTR_SIZE_VER0)))
    400 		tprints(", ...");
    401 
    402 	tprints("}");
    403 }
    404 
    405 SYS_FUNC(perf_event_open)
    406 {
    407 	/*
    408 	 * We try to copy out the whole structure on entering in order to check
    409 	 * size value on exiting. We do not check the rest of the fields because
    410 	 * they shouldn't be changed, but copy the whole structure instead
    411 	 * of just size field because they could.
    412 	 */
    413 	if (entering(tcp)) {
    414 		if (!fetch_perf_event_attr(tcp, tcp->u_arg[0]))
    415 			return 0;
    416 	} else {
    417 		print_perf_event_attr(tcp, tcp->u_arg[0]);
    418 	}
    419 
    420 	tprintf(", %d, %d, ",
    421 		(int) tcp->u_arg[1],
    422 		(int) tcp->u_arg[2]);
    423 	printfd(tcp, tcp->u_arg[3]);
    424 	tprints(", ");
    425 	printflags64(perf_event_open_flags, tcp->u_arg[4], "PERF_FLAG_???");
    426 
    427 	return RVAL_DECODED | RVAL_FD;
    428 }
    429