1 /* 2 * Copyright (c) 2013 Ben Noordhuis <info (at) bnoordhuis.nl> 3 * Copyright (c) 2013-2015 Dmitry V. Levin <ldv (at) altlinux.org> 4 * Copyright (c) 2016 Eugene Syromyatnikov <evgsyr (at) gmail.com> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. The name of the author may not be used to endorse or promote products 16 * derived from this software without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 #include "defs.h" 31 32 #include "perf_event_struct.h" 33 34 #include "xlat/clocknames.h" 35 #include "xlat/hw_breakpoint_len.h" 36 #include "xlat/hw_breakpoint_type.h" 37 #include "xlat/perf_attr_size.h" 38 #include "xlat/perf_branch_sample_type.h" 39 #include "xlat/perf_event_open_flags.h" 40 #include "xlat/perf_event_read_format.h" 41 #include "xlat/perf_event_sample_format.h" 42 #include "xlat/perf_hw_cache_id.h" 43 #include "xlat/perf_hw_cache_op_id.h" 44 #include "xlat/perf_hw_cache_op_result_id.h" 45 #include "xlat/perf_hw_id.h" 46 #include "xlat/perf_sw_ids.h" 47 #include "xlat/perf_type_id.h" 48 49 struct pea_desc { 50 struct perf_event_attr *attr; 51 uint32_t size; 52 }; 53 54 static void 55 free_pea_desc(void *pea_desc_ptr) 56 { 57 struct pea_desc *desc = pea_desc_ptr; 58 59 free(desc->attr); 60 free(desc); 61 } 62 63 static int 64 fetch_perf_event_attr(struct tcb *const tcp, const kernel_ulong_t addr) 65 { 66 struct pea_desc *desc; 67 struct perf_event_attr *attr; 68 uint32_t size; 69 70 if (umove(tcp, addr + offsetof(struct perf_event_attr, size), &size)) { 71 printaddr(addr); 72 return 1; 73 } 74 75 if (size > sizeof(*attr)) 76 size = sizeof(*attr); 77 78 if (!size) 79 size = PERF_ATTR_SIZE_VER0; 80 81 /* 82 * Kernel (rightfully) deems invalid attribute structures with size less 83 * than first published format size, and we do the same. 84 */ 85 if (size < PERF_ATTR_SIZE_VER0) { 86 printaddr(addr); 87 return 1; 88 } 89 90 if (abbrev(tcp)) 91 size = offsetofend(struct perf_event_attr, config); 92 93 /* Size should be multiple of 8, but kernel doesn't check for it */ 94 /* size &= ~7; */ 95 96 attr = xcalloc(1, sizeof(*attr)); 97 98 if (umoven_or_printaddr(tcp, addr, size, attr)) { 99 free(attr); 100 101 return 1; 102 } 103 104 desc = xmalloc(sizeof(*desc)); 105 106 desc->attr = attr; 107 desc->size = size; 108 109 set_tcb_priv_data(tcp, desc, free_pea_desc); 110 111 return 0; 112 } 113 114 #define PRINT_XLAT(prefix, xlat, x, dflt) \ 115 do { \ 116 tprints(prefix); \ 117 printxval_search(xlat, x, dflt); \ 118 } while (0) 119 120 static void 121 print_perf_event_attr(struct tcb *const tcp, const kernel_ulong_t addr) 122 { 123 static const char *precise_ip_desc[] = { 124 "arbitrary skid", 125 "constant skid", 126 "requested to have 0 skid", 127 "must have 0 skid", 128 }; 129 130 struct pea_desc *desc; 131 struct perf_event_attr *attr; 132 uint32_t size; 133 uint32_t new_size; 134 int use_new_size = 0; 135 136 /* 137 * Amusingly, kernel accepts structures with only part of the field 138 * present, so we making check like this (instead of checking 139 * offsetofend against size) in order to print fields as kernel sees 140 * them. This also should work great on big endian architectures. 141 */ 142 #define _PERF_CHECK_FIELD(_field) \ 143 do { \ 144 if (offsetof(struct perf_event_attr, _field) >= size) \ 145 goto print_perf_event_attr_out; \ 146 } while (0) 147 148 desc = get_tcb_priv_data(tcp); 149 150 attr = desc->attr; 151 size = desc->size; 152 153 /* The only error which expected to change size field currently */ 154 if (tcp->u_error == E2BIG) { 155 if (umove(tcp, addr + offsetof(struct perf_event_attr, size), 156 &new_size)) 157 use_new_size = -1; 158 else 159 use_new_size = 1; 160 } 161 162 PRINT_XLAT("{type=", perf_type_id, attr->type, "PERF_TYPE_???"); 163 tprintf(", size="); 164 printxval(perf_attr_size, attr->size, "PERF_ATTR_SIZE_???"); 165 166 if (use_new_size) { 167 tprints(" => "); 168 169 if (use_new_size > 0) 170 printxval(perf_attr_size, new_size, 171 "PERF_ATTR_SIZE_???"); 172 else 173 tprints("???"); 174 } 175 176 switch (attr->type) { 177 case PERF_TYPE_HARDWARE: 178 PRINT_XLAT(", config=", perf_hw_id, attr->config, 179 "PERF_COUNT_HW_???"); 180 break; 181 case PERF_TYPE_SOFTWARE: 182 PRINT_XLAT(", config=", perf_sw_ids, attr->config, 183 "PERF_COUNT_SW_???"); 184 break; 185 case PERF_TYPE_TRACEPOINT: 186 /* 187 * "The value to use in config can be obtained from under 188 * debugfs tracing/events/../../id if ftrace is enabled in the 189 * kernel." 190 */ 191 tprintf(", config=%" PRIu64, attr->config); 192 break; 193 case PERF_TYPE_HW_CACHE: 194 /* 195 * (perf_hw_cache_id) | (perf_hw_cache_op_id << 8) | 196 * (perf_hw_cache_op_result_id << 16) 197 */ 198 PRINT_XLAT(", config=", perf_hw_cache_id, attr->config & 0xFF, 199 "PERF_COUNT_HW_CACHE_???"); 200 PRINT_XLAT("|", perf_hw_cache_op_id, (attr->config >> 8) & 0xFF, 201 "PERF_COUNT_HW_CACHE_OP_???"); 202 /* 203 * Current code (see set_ext_hw_attr in arch/x86/events/core.c, 204 * tile_map_cache_event in arch/tile/kernel/perf_event.c, 205 * arc_pmu_cache_event in arch/arc/kernel/perf_event.c, 206 * hw_perf_cache_event in arch/blackfin/kernel/perf_event.c, 207 * _hw_perf_cache_event in arch/metag/kernel/perf/perf_event.c, 208 * mipspmu_map_cache_event in arch/mips/kernel/perf_event_mipsxx.c, 209 * hw_perf_cache_event in arch/powerpc/perf/core-book3s.c, 210 * hw_perf_cache_event in arch/powerpc/perf/core-fsl-emb.c, 211 * hw_perf_cache_event in arch/sh/kernel/perf_event.c, 212 * sparc_map_cache_event in arch/sparc/kernel/perf_event.c, 213 * xtensa_pmu_cache_event in arch/xtensa/kernel/perf_event.c, 214 * armpmu_map_cache_event in drivers/perf/arm_pmu.c) assumes 215 * that cache result is 8 bits in size. 216 */ 217 PRINT_XLAT("<<8|", perf_hw_cache_op_result_id, 218 (attr->config >> 16) & 0xFF, 219 "PERF_COUNT_HW_CACHE_RESULT_???"); 220 tprintf("<<16"); 221 if (attr->config >> 24) 222 tprintf("|%#" PRIx64 "<<24 " 223 "/* PERF_COUNT_HW_CACHE_??? */", 224 attr->config >> 24); 225 break; 226 case PERF_TYPE_RAW: 227 /* 228 * "If type is PERF_TYPE_RAW, then a custom "raw" config 229 * value is needed. Most CPUs support events that are not 230 * covered by the "generalized" events. These are 231 * implementation defined; see your CPU manual (for example the 232 * Intel Volume 3B documentation or the AMD BIOS and Kernel 233 * Developer Guide). The libpfm4 library can be used to 234 * translate from the name in the architectural manuals 235 * to the raw hex value perf_event_open() expects in this 236 * field." 237 */ 238 case PERF_TYPE_BREAKPOINT: 239 /* 240 * "If type is PERF_TYPE_BREAKPOINT, then leave config set 241 * to zero. Its parameters are set in other places." 242 */ 243 default: 244 tprintf(", config=%#" PRIx64, attr->config); 245 break; 246 } 247 248 if (abbrev(tcp)) 249 goto print_perf_event_attr_out; 250 251 if (attr->freq) 252 tprintf(", sample_freq=%" PRIu64, attr->sample_freq); 253 else 254 tprintf(", sample_period=%" PRIu64, attr->sample_period); 255 256 tprintf(", sample_type="); 257 printflags64(perf_event_sample_format, attr->sample_type, 258 "PERF_SAMPLE_???"); 259 260 tprintf(", read_format="); 261 printflags64(perf_event_read_format, attr->read_format, 262 "PERF_FORMAT_???"); 263 264 tprintf(", disabled=%u" 265 ", inherit=%u" 266 ", pinned=%u" 267 ", exclusive=%u" 268 ", exclusive_user=%u" 269 ", exclude_kernel=%u" 270 ", exclude_hv=%u" 271 ", exclude_idle=%u" 272 ", mmap=%u" 273 ", comm=%u" 274 ", freq=%u" 275 ", inherit_stat=%u" 276 ", enable_on_exec=%u" 277 ", task=%u" 278 ", watermark=%u" 279 ", precise_ip=%u /* %s */" 280 ", mmap_data=%u" 281 ", sample_id_all=%u" 282 ", exclude_host=%u" 283 ", exclude_guest=%u" 284 ", exclude_callchain_kernel=%u" 285 ", exclude_callchain_user=%u" 286 ", mmap2=%u" 287 ", comm_exec=%u" 288 ", use_clockid=%u" 289 ", context_switch=%u" 290 ", write_backward=%u", 291 attr->disabled, 292 attr->inherit, 293 attr->pinned, 294 attr->exclusive, 295 attr->exclude_user, 296 attr->exclude_kernel, 297 attr->exclude_hv, 298 attr->exclude_idle, 299 attr->mmap, 300 attr->comm, 301 attr->freq, 302 attr->inherit_stat, 303 attr->enable_on_exec, 304 attr->task, 305 attr->watermark, 306 attr->precise_ip, precise_ip_desc[attr->precise_ip], 307 attr->mmap_data, 308 attr->sample_id_all, 309 attr->exclude_host, 310 attr->exclude_guest, 311 attr->exclude_callchain_kernel, 312 attr->exclude_callchain_user, 313 attr->mmap2, 314 attr->comm_exec, 315 attr->use_clockid, 316 attr->context_switch, 317 attr->write_backward); 318 319 /* 320 * Print it only in case it is non-zero, since it may contain flags we 321 * are not aware about. 322 */ 323 if (attr->__reserved_1) 324 tprintf(", __reserved_1=%#" PRIx64 " /* Bits 63..28 */", 325 (uint64_t) attr->__reserved_1); 326 327 if (attr->watermark) 328 tprintf(", wakeup_watermark=%u", attr->wakeup_watermark); 329 else 330 tprintf(", wakeup_events=%u", attr->wakeup_events); 331 332 if (attr->type == PERF_TYPE_BREAKPOINT) 333 /* Any combination of R/W with X is deemed invalid */ 334 PRINT_XLAT(", bp_type=", hw_breakpoint_type, attr->bp_type, 335 (attr->bp_type <= 336 (HW_BREAKPOINT_X | HW_BREAKPOINT_RW)) ? 337 "HW_BREAKPOINT_INVALID" : 338 "HW_BREAKPOINT_???"); 339 340 if (attr->type == PERF_TYPE_BREAKPOINT) 341 tprintf(", bp_addr=%#" PRIx64, attr->bp_addr); 342 else 343 tprintf(", config1=%#" PRIx64, attr->config1); 344 345 /* 346 * Fields after bp_addr/config1 are optional and may not present; check 347 * against size is needed. 348 */ 349 350 _PERF_CHECK_FIELD(bp_len); 351 if (attr->type == PERF_TYPE_BREAKPOINT) 352 tprintf(", bp_len=%" PRIu64, attr->bp_len); 353 else 354 tprintf(", config2=%#" PRIx64, attr->config2); 355 356 _PERF_CHECK_FIELD(branch_sample_type); 357 if (attr->sample_type & PERF_SAMPLE_BRANCH_STACK) { 358 tprintf(", branch_sample_type="); 359 printflags64(perf_branch_sample_type, attr->branch_sample_type, 360 "PERF_SAMPLE_BRANCH_???"); 361 } 362 363 _PERF_CHECK_FIELD(sample_regs_user); 364 /* 365 * "This bit mask defines the set of user CPU registers to dump on 366 * samples. The layout of the register mask is architecture-specific and 367 * described in the kernel header 368 * arch/ARCH/include/uapi/asm/perf_regs.h." 369 */ 370 tprintf(", sample_regs_user=%#" PRIx64, attr->sample_regs_user); 371 372 _PERF_CHECK_FIELD(sample_stack_user); 373 /* 374 * "size of the user stack to dump if PERF_SAMPLE_STACK_USER is 375 * specified." 376 */ 377 if (attr->sample_type & PERF_SAMPLE_STACK_USER) 378 tprintf(", sample_stack_user=%#" PRIx32, 379 attr->sample_stack_user); 380 381 if (attr->use_clockid) { 382 _PERF_CHECK_FIELD(clockid); 383 tprintf(", clockid="); 384 printxval(clocknames, attr->clockid, "CLOCK_???"); 385 } 386 387 _PERF_CHECK_FIELD(sample_regs_intr); 388 tprintf(", sample_regs_intr=%#" PRIx64, attr->sample_regs_intr); 389 390 _PERF_CHECK_FIELD(aux_watermark); 391 tprintf(", aux_watermark=%" PRIu32, attr->aux_watermark); 392 393 _PERF_CHECK_FIELD(sample_max_stack); 394 tprintf(", sample_max_stack=%" PRIu16, attr->sample_max_stack); 395 396 /* _PERF_CHECK_FIELD(__reserved_2); 397 tprintf(", __reserved2=%" PRIu16, attr->__reserved_2); */ 398 399 print_perf_event_attr_out: 400 if ((attr->size && (attr->size > size)) || 401 (!attr->size && (size < PERF_ATTR_SIZE_VER0))) 402 tprintf(", ..."); 403 404 tprintf("}"); 405 } 406 407 SYS_FUNC(perf_event_open) 408 { 409 /* 410 * We try to copy out the whole structure on entering in order to check 411 * size value on exiting. We do not check the rest of the fields because 412 * they shouldn't be changed, but copy the whole structure instead 413 * of just size field because they could. 414 */ 415 if (entering(tcp)) { 416 if (!fetch_perf_event_attr(tcp, tcp->u_arg[0])) 417 return 0; 418 } else { 419 print_perf_event_attr(tcp, tcp->u_arg[0]); 420 } 421 422 tprintf(", %d, %d, %d, ", 423 (int) tcp->u_arg[1], 424 (int) tcp->u_arg[2], 425 (int) tcp->u_arg[3]); 426 printflags64(perf_event_open_flags, tcp->u_arg[4], "PERF_FLAG_???"); 427 428 return RVAL_DECODED | RVAL_FD; 429 } 430