1 /* 2 * Copyright (c) 2013 Ben Noordhuis <info (at) bnoordhuis.nl> 3 * Copyright (c) 2013-2015 Dmitry V. Levin <ldv (at) altlinux.org> 4 * Copyright (c) 2016 Eugene Syromyatnikov <evgsyr (at) gmail.com> 5 * Copyright (c) 2015-2018 The strace developers. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. The name of the author may not be used to endorse or promote products 17 * derived from this software without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #include "defs.h" 32 33 #include "perf_event_struct.h" 34 #include "print_fields.h" 35 36 #include "xlat/hw_breakpoint_len.h" 37 #include "xlat/hw_breakpoint_type.h" 38 #include "xlat/perf_attr_size.h" 39 #include "xlat/perf_branch_sample_type.h" 40 #include "xlat/perf_event_open_flags.h" 41 #include "xlat/perf_event_read_format.h" 42 #include "xlat/perf_event_sample_format.h" 43 #include "xlat/perf_hw_cache_id.h" 44 #include "xlat/perf_hw_cache_op_id.h" 45 #include "xlat/perf_hw_cache_op_result_id.h" 46 #include "xlat/perf_hw_id.h" 47 #include "xlat/perf_sw_ids.h" 48 #include "xlat/perf_type_id.h" 49 50 struct pea_desc { 51 struct perf_event_attr *attr; 52 uint32_t size; 53 }; 54 55 static void 56 free_pea_desc(void *pea_desc_ptr) 57 { 58 struct pea_desc *desc = pea_desc_ptr; 59 60 free(desc->attr); 61 free(desc); 62 } 63 64 int 65 fetch_perf_event_attr(struct tcb *const tcp, const kernel_ulong_t addr) 66 { 67 struct pea_desc *desc; 68 struct perf_event_attr *attr; 69 uint32_t size; 70 71 if (umove(tcp, addr + offsetof(struct perf_event_attr, size), &size)) { 72 printaddr(addr); 73 return 1; 74 } 75 76 if (size > sizeof(*attr)) 77 size = sizeof(*attr); 78 79 if (!size) 80 size = PERF_ATTR_SIZE_VER0; 81 82 /* 83 * Kernel (rightfully) deems invalid attribute structures with size less 84 * than first published format size, and we do the same. 85 */ 86 if (size < PERF_ATTR_SIZE_VER0) { 87 printaddr(addr); 88 return 1; 89 } 90 91 if (abbrev(tcp)) 92 size = offsetofend(struct perf_event_attr, config); 93 94 /* Size should be multiple of 8, but kernel doesn't check for it */ 95 /* size &= ~7; */ 96 97 attr = xcalloc(1, sizeof(*attr)); 98 99 if (umoven_or_printaddr(tcp, addr, size, attr)) { 100 free(attr); 101 102 return 1; 103 } 104 105 desc = xmalloc(sizeof(*desc)); 106 107 desc->attr = attr; 108 desc->size = size; 109 110 set_tcb_priv_data(tcp, desc, free_pea_desc); 111 112 return 0; 113 } 114 115 void 116 print_perf_event_attr(struct tcb *const tcp, const kernel_ulong_t addr) 117 { 118 static const char *precise_ip_desc[] = { 119 "arbitrary skid", 120 "constant skid", 121 "requested to have 0 skid", 122 "must have 0 skid", 123 }; 124 125 struct pea_desc *desc; 126 struct perf_event_attr *attr; 127 uint32_t size; 128 uint32_t new_size; 129 int use_new_size = 0; 130 131 /* 132 * Amusingly, kernel accepts structures with only part of the field 133 * present, so we making check like this (instead of checking 134 * offsetofend against size) in order to print fields as kernel sees 135 * them. This also should work great on big endian architectures. 136 */ 137 #define _PERF_CHECK_FIELD(_field) \ 138 do { \ 139 if (offsetof(struct perf_event_attr, _field) >= size) \ 140 goto print_perf_event_attr_out; \ 141 } while (0) 142 143 desc = get_tcb_priv_data(tcp); 144 145 attr = desc->attr; 146 size = desc->size; 147 148 /* The only error which expected to change size field currently */ 149 if (tcp->u_error == E2BIG) { 150 if (umove(tcp, addr + offsetof(struct perf_event_attr, size), 151 &new_size)) 152 use_new_size = -1; 153 else 154 use_new_size = 1; 155 } 156 157 PRINT_FIELD_XVAL("{", *attr, type, perf_type_id, "PERF_TYPE_???"); 158 PRINT_FIELD_XVAL(", ", *attr, size, perf_attr_size, 159 "PERF_ATTR_SIZE_???"); 160 161 if (use_new_size) { 162 tprints(" => "); 163 164 if (use_new_size > 0) 165 printxval(perf_attr_size, new_size, 166 "PERF_ATTR_SIZE_???"); 167 else 168 tprints("???"); 169 } 170 171 switch (attr->type) { 172 case PERF_TYPE_HARDWARE: 173 PRINT_FIELD_XVAL(", ", *attr, config, perf_hw_id, 174 "PERF_COUNT_HW_???"); 175 break; 176 case PERF_TYPE_SOFTWARE: 177 PRINT_FIELD_XVAL(", ", *attr, config, perf_sw_ids, 178 "PERF_COUNT_SW_???"); 179 break; 180 case PERF_TYPE_TRACEPOINT: 181 /* 182 * "The value to use in config can be obtained from under 183 * debugfs tracing/events/../../id if ftrace is enabled 184 * in the kernel." 185 */ 186 PRINT_FIELD_U(", ", *attr, config); 187 break; 188 case PERF_TYPE_HW_CACHE: 189 /* 190 * (perf_hw_cache_id) | (perf_hw_cache_op_id << 8) | 191 * (perf_hw_cache_op_result_id << 16) 192 */ 193 tprints(", config="); 194 printxval(perf_hw_cache_id, attr->config & 0xFF, 195 "PERF_COUNT_HW_CACHE_???"); 196 tprints("|"); 197 printxval(perf_hw_cache_op_id, (attr->config >> 8) & 0xFF, 198 "PERF_COUNT_HW_CACHE_OP_???"); 199 tprints("<<8|"); 200 /* 201 * Current code (see set_ext_hw_attr in arch/x86/events/core.c, 202 * tile_map_cache_event in arch/tile/kernel/perf_event.c, 203 * arc_pmu_cache_event in arch/arc/kernel/perf_event.c, 204 * hw_perf_cache_event in arch/blackfin/kernel/perf_event.c, 205 * _hw_perf_cache_event in arch/metag/kernel/perf/perf_event.c, 206 * mipspmu_map_cache_event in arch/mips/kernel/perf_event_mipsxx.c, 207 * hw_perf_cache_event in arch/powerpc/perf/core-book3s.c, 208 * hw_perf_cache_event in arch/powerpc/perf/core-fsl-emb.c, 209 * hw_perf_cache_event in arch/sh/kernel/perf_event.c, 210 * sparc_map_cache_event in arch/sparc/kernel/perf_event.c, 211 * xtensa_pmu_cache_event in arch/xtensa/kernel/perf_event.c, 212 * armpmu_map_cache_event in drivers/perf/arm_pmu.c) assumes 213 * that cache result is 8 bits in size. 214 */ 215 printxval(perf_hw_cache_op_result_id, 216 (attr->config >> 16) & 0xFF, 217 "PERF_COUNT_HW_CACHE_RESULT_???"); 218 tprints("<<16"); 219 if (attr->config >> 24) { 220 tprintf("|%#" PRIx64 "<<24", attr->config >> 24); 221 tprints_comment("PERF_COUNT_HW_CACHE_???"); 222 } 223 break; 224 case PERF_TYPE_RAW: 225 /* 226 * "If type is PERF_TYPE_RAW, then a custom "raw" config 227 * value is needed. Most CPUs support events that are not 228 * covered by the "generalized" events. These are 229 * implementation defined; see your CPU manual (for example the 230 * Intel Volume 3B documentation or the AMD BIOS and Kernel 231 * Developer Guide). The libpfm4 library can be used to 232 * translate from the name in the architectural manuals 233 * to the raw hex value perf_event_open() expects in this 234 * field." 235 */ 236 case PERF_TYPE_BREAKPOINT: 237 /* 238 * "If type is PERF_TYPE_BREAKPOINT, then leave config set 239 * to zero. Its parameters are set in other places." 240 */ 241 default: 242 PRINT_FIELD_X(", ", *attr, config); 243 break; 244 } 245 246 if (abbrev(tcp)) 247 goto print_perf_event_attr_out; 248 249 if (attr->freq) 250 PRINT_FIELD_U(", ", *attr, sample_freq); 251 else 252 PRINT_FIELD_U(", ", *attr, sample_period); 253 254 PRINT_FIELD_FLAGS(", ", *attr, sample_type, perf_event_sample_format, 255 "PERF_SAMPLE_???"); 256 PRINT_FIELD_FLAGS(", ", *attr, read_format, perf_event_read_format, 257 "PERF_FORMAT_???"); 258 259 tprintf(", disabled=%u" 260 ", inherit=%u" 261 ", pinned=%u" 262 ", exclusive=%u" 263 ", exclusive_user=%u" 264 ", exclude_kernel=%u" 265 ", exclude_hv=%u" 266 ", exclude_idle=%u" 267 ", mmap=%u" 268 ", comm=%u" 269 ", freq=%u" 270 ", inherit_stat=%u" 271 ", enable_on_exec=%u" 272 ", task=%u" 273 ", watermark=%u" 274 ", precise_ip=%u", 275 attr->disabled, 276 attr->inherit, 277 attr->pinned, 278 attr->exclusive, 279 attr->exclude_user, 280 attr->exclude_kernel, 281 attr->exclude_hv, 282 attr->exclude_idle, 283 attr->mmap, 284 attr->comm, 285 attr->freq, 286 attr->inherit_stat, 287 attr->enable_on_exec, 288 attr->task, 289 attr->watermark, 290 attr->precise_ip); 291 tprints_comment(precise_ip_desc[attr->precise_ip]); 292 tprintf(", mmap_data=%u" 293 ", sample_id_all=%u" 294 ", exclude_host=%u" 295 ", exclude_guest=%u" 296 ", exclude_callchain_kernel=%u" 297 ", exclude_callchain_user=%u" 298 ", mmap2=%u" 299 ", comm_exec=%u" 300 ", use_clockid=%u" 301 ", context_switch=%u" 302 ", write_backward=%u" 303 ", namespaces=%u", 304 attr->mmap_data, 305 attr->sample_id_all, 306 attr->exclude_host, 307 attr->exclude_guest, 308 attr->exclude_callchain_kernel, 309 attr->exclude_callchain_user, 310 attr->mmap2, 311 attr->comm_exec, 312 attr->use_clockid, 313 attr->context_switch, 314 attr->write_backward, 315 attr->namespaces); 316 317 /* 318 * Print it only in case it is non-zero, since it may contain flags we 319 * are not aware about. 320 */ 321 if (attr->__reserved_1) { 322 tprintf(", __reserved_1=%#" PRIx64, 323 (uint64_t) attr->__reserved_1); 324 tprints_comment("Bits 63..29"); 325 } 326 327 if (attr->watermark) 328 PRINT_FIELD_U(", ", *attr, wakeup_watermark); 329 else 330 PRINT_FIELD_U(", ", *attr, wakeup_events); 331 332 if (attr->type == PERF_TYPE_BREAKPOINT) 333 /* Any combination of R/W with X is deemed invalid */ 334 PRINT_FIELD_XVAL(", ", *attr, bp_type, hw_breakpoint_type, 335 (attr->bp_type <= 336 (HW_BREAKPOINT_X | HW_BREAKPOINT_RW)) 337 ? "HW_BREAKPOINT_INVALID" 338 : "HW_BREAKPOINT_???"); 339 340 if (attr->type == PERF_TYPE_BREAKPOINT) 341 PRINT_FIELD_X(", ", *attr, bp_addr); 342 else 343 PRINT_FIELD_X(", ", *attr, config1); 344 345 /* 346 * Fields after bp_addr/config1 are optional and may not present; check 347 * against size is needed. 348 */ 349 350 _PERF_CHECK_FIELD(bp_len); 351 if (attr->type == PERF_TYPE_BREAKPOINT) 352 PRINT_FIELD_U(", ", *attr, bp_len); 353 else 354 PRINT_FIELD_X(", ", *attr, config2); 355 356 _PERF_CHECK_FIELD(branch_sample_type); 357 if (attr->sample_type & PERF_SAMPLE_BRANCH_STACK) { 358 PRINT_FIELD_FLAGS(", ", *attr, branch_sample_type, 359 perf_branch_sample_type, 360 "PERF_SAMPLE_BRANCH_???"); 361 } 362 363 _PERF_CHECK_FIELD(sample_regs_user); 364 /* 365 * "This bit mask defines the set of user CPU registers to dump on 366 * samples. The layout of the register mask is architecture-specific and 367 * described in the kernel header 368 * arch/ARCH/include/uapi/asm/perf_regs.h." 369 */ 370 PRINT_FIELD_X(", ", *attr, sample_regs_user); 371 372 _PERF_CHECK_FIELD(sample_stack_user); 373 /* 374 * "size of the user stack to dump if PERF_SAMPLE_STACK_USER is 375 * specified." 376 */ 377 if (attr->sample_type & PERF_SAMPLE_STACK_USER) 378 PRINT_FIELD_X(", ", *attr, sample_stack_user); 379 380 if (attr->use_clockid) { 381 _PERF_CHECK_FIELD(clockid); 382 PRINT_FIELD_XVAL(", ", *attr, clockid, clocknames, "CLOCK_???"); 383 } 384 385 _PERF_CHECK_FIELD(sample_regs_intr); 386 PRINT_FIELD_X(", ", *attr, sample_regs_intr); 387 388 _PERF_CHECK_FIELD(aux_watermark); 389 PRINT_FIELD_U(", ", *attr, aux_watermark); 390 391 _PERF_CHECK_FIELD(sample_max_stack); 392 PRINT_FIELD_U(", ", *attr, sample_max_stack); 393 394 /* _PERF_CHECK_FIELD(__reserved_2); 395 PRINT_FIELD_U(", ", *attr, __reserved2); */ 396 397 print_perf_event_attr_out: 398 if ((attr->size && (attr->size > size)) || 399 (!attr->size && (size < PERF_ATTR_SIZE_VER0))) 400 tprints(", ..."); 401 402 tprints("}"); 403 } 404 405 SYS_FUNC(perf_event_open) 406 { 407 /* 408 * We try to copy out the whole structure on entering in order to check 409 * size value on exiting. We do not check the rest of the fields because 410 * they shouldn't be changed, but copy the whole structure instead 411 * of just size field because they could. 412 */ 413 if (entering(tcp)) { 414 if (!fetch_perf_event_attr(tcp, tcp->u_arg[0])) 415 return 0; 416 } else { 417 print_perf_event_attr(tcp, tcp->u_arg[0]); 418 } 419 420 tprintf(", %d, %d, ", 421 (int) tcp->u_arg[1], 422 (int) tcp->u_arg[2]); 423 printfd(tcp, tcp->u_arg[3]); 424 tprints(", "); 425 printflags64(perf_event_open_flags, tcp->u_arg[4], "PERF_FLAG_???"); 426 427 return RVAL_DECODED | RVAL_FD; 428 } 429