1 /** 2 * @file opd_perfmon.c 3 * perfmonctl() handling 4 * 5 * @remark Copyright 2003 OProfile authors 6 * @remark Read the file COPYING 7 * 8 * @author John Levon 9 */ 10 11 #ifdef __ia64__ 12 13 /* need this for sched_setaffinity() in <sched.h> */ 14 #define _GNU_SOURCE 15 16 #include "oprofiled.h" 17 #include "opd_perfmon.h" 18 #include "opd_events.h" 19 20 #include "op_cpu_type.h" 21 #include "op_libiberty.h" 22 #include "op_hw_config.h" 23 24 #include <sys/syscall.h> 25 #include <sys/wait.h> 26 #include <unistd.h> 27 #include <limits.h> 28 #include <signal.h> 29 #include <stdio.h> 30 #include <stdlib.h> 31 #include <string.h> 32 #include <errno.h> 33 #ifdef HAVE_SCHED_SETAFFINITY 34 #include <sched.h> 35 #endif 36 37 extern op_cpu cpu_type; 38 39 #ifndef HAVE_SCHED_SETAFFINITY 40 41 /* many glibc's are not yet up to date */ 42 #ifndef __NR_sched_setaffinity 43 #define __NR_sched_setaffinity 1231 44 #endif 45 46 /* Copied from glibc's <sched.h> and <bits/sched.h> and munged */ 47 #define CPU_SETSIZE 1024 48 #define __NCPUBITS (8 * sizeof (unsigned long)) 49 typedef struct 50 { 51 unsigned long __bits[CPU_SETSIZE / __NCPUBITS]; 52 } cpu_set_t; 53 54 #define CPU_SET(cpu, cpusetp) \ 55 ((cpusetp)->__bits[(cpu)/__NCPUBITS] |= (1UL << ((cpu) % __NCPUBITS))) 56 #define CPU_ZERO(cpusetp) \ 57 memset((cpusetp), 0, sizeof(cpu_set_t)) 58 59 static int 60 sched_setaffinity(pid_t pid, size_t len, cpu_set_t const * cpusetp) 61 { 62 return syscall(__NR_sched_setaffinity, pid, len, cpusetp); 63 } 64 #endif 65 66 67 #ifndef HAVE_PERFMONCTL 68 #ifndef __NR_perfmonctl 69 #define __NR_perfmonctl 1175 70 #endif 71 72 static int perfmonctl(int fd, int cmd, void * arg, int narg) 73 { 74 return syscall(__NR_perfmonctl, fd, cmd, arg, narg); 75 } 76 #endif 77 78 79 static unsigned char uuid[16] = { 80 0x77, 0x7a, 0x6e, 0x61, 0x20, 0x65, 0x73, 0x69, 81 0x74, 0x6e, 0x72, 0x20, 0x61, 0x65, 0x0a, 0x6c 82 }; 83 84 85 static size_t nr_cpus; 86 87 struct child { 88 pid_t pid; 89 int up_pipe[2]; 90 int ctx_fd; 91 sig_atomic_t sigusr1; 92 sig_atomic_t sigusr2; 93 sig_atomic_t sigterm; 94 }; 95 96 static struct child * children; 97 98 static void perfmon_start_child(int ctx_fd) 99 { 100 if (perfmonctl(ctx_fd, PFM_START, 0, 0) == -1) { 101 perror("Couldn't start perfmon: "); 102 exit(EXIT_FAILURE); 103 } 104 } 105 106 107 static void perfmon_stop_child(int ctx_fd) 108 { 109 if (perfmonctl(ctx_fd, PFM_STOP, 0, 0) == -1) { 110 perror("Couldn't stop perfmon: "); 111 exit(EXIT_FAILURE); 112 } 113 } 114 115 116 static void child_sigusr1(int val __attribute__((unused))) 117 { 118 size_t i; 119 120 for (i = 0; i < nr_cpus; ++i) { 121 if (children[i].pid == getpid()) { 122 children[i].sigusr1 = 1; 123 return; 124 } 125 } 126 } 127 128 129 static void child_sigusr2(int val __attribute__((unused))) 130 { 131 size_t i; 132 133 for (i = 0; i < nr_cpus; ++i) { 134 if (children[i].pid == getpid()) { 135 children[i].sigusr2 = 1; 136 return; 137 } 138 } 139 } 140 141 142 static void child_sigterm(int val __attribute__((unused))) 143 { 144 printf("Child received SIGTERM, killing parent.\n"); 145 kill(getppid(), SIGTERM); 146 } 147 148 149 static void set_affinity(size_t cpu) 150 { 151 cpu_set_t set; 152 153 CPU_ZERO(&set); 154 CPU_SET(cpu, &set); 155 156 int err = sched_setaffinity(getpid(), sizeof(set), &set); 157 158 if (err == -1) { 159 fprintf(stderr, "Failed to set affinity: %s\n", 160 strerror(errno)); 161 exit(EXIT_FAILURE); 162 } 163 } 164 165 166 static void setup_signals(void) 167 { 168 struct sigaction act; 169 sigset_t mask; 170 171 sigemptyset(&mask); 172 sigaddset(&mask, SIGUSR1); 173 sigaddset(&mask, SIGUSR2); 174 sigprocmask(SIG_BLOCK, &mask, NULL); 175 176 act.sa_handler = child_sigusr1; 177 act.sa_flags = 0; 178 sigemptyset(&act.sa_mask); 179 180 if (sigaction(SIGUSR1, &act, NULL)) { 181 perror("oprofiled: install of SIGUSR1 handler failed: "); 182 exit(EXIT_FAILURE); 183 } 184 185 act.sa_handler = child_sigusr2; 186 act.sa_flags = 0; 187 sigemptyset(&act.sa_mask); 188 189 if (sigaction(SIGUSR2, &act, NULL)) { 190 perror("oprofiled: install of SIGUSR2 handler failed: "); 191 exit(EXIT_FAILURE); 192 } 193 194 act.sa_handler = child_sigterm; 195 act.sa_flags = 0; 196 sigemptyset(&act.sa_mask); 197 198 if (sigaction(SIGTERM, &act, NULL)) { 199 perror("oprofiled: install of SIGTERM handler failed: "); 200 exit(EXIT_FAILURE); 201 } 202 } 203 204 205 /** create the per-cpu context */ 206 static void create_context(struct child * self) 207 { 208 pfarg_context_t ctx; 209 int err; 210 211 memset(&ctx, 0, sizeof(pfarg_context_t)); 212 memcpy(&ctx.ctx_smpl_buf_id, &uuid, 16); 213 ctx.ctx_flags = PFM_FL_SYSTEM_WIDE; 214 215 err = perfmonctl(0, PFM_CREATE_CONTEXT, &ctx, 1); 216 if (err == -1) { 217 fprintf(stderr, "CREATE_CONTEXT failed: %s\n", 218 strerror(errno)); 219 exit(EXIT_FAILURE); 220 } 221 222 self->ctx_fd = ctx.ctx_fd; 223 } 224 225 226 /** program the perfmon counters */ 227 static void write_pmu(struct child * self) 228 { 229 pfarg_reg_t pc[OP_MAX_COUNTERS]; 230 pfarg_reg_t pd[OP_MAX_COUNTERS]; 231 int err; 232 size_t i; 233 234 memset(pc, 0, sizeof(pc)); 235 memset(pd, 0, sizeof(pd)); 236 237 #define PMC_GEN_INTERRUPT (1UL << 5) 238 #define PMC_PRIV_MONITOR (1UL << 6) 239 /* McKinley requires pmc4 to have bit 23 set (enable PMU). 240 * It is supposedly ignored in other pmc registers. 241 */ 242 #define PMC_MANDATORY (1UL << 23) 243 #define PMC_USER (1UL << 3) 244 #define PMC_KERNEL (1UL << 0) 245 for (i = 0; i < op_nr_counters && opd_events[i].name; ++i) { 246 struct opd_event * event = &opd_events[i]; 247 pc[i].reg_num = event->counter + 4; 248 pc[i].reg_value = PMC_GEN_INTERRUPT; 249 pc[i].reg_value |= PMC_PRIV_MONITOR; 250 pc[i].reg_value |= PMC_MANDATORY; 251 (event->user) ? (pc[i].reg_value |= PMC_USER) 252 : (pc[i].reg_value &= ~PMC_USER); 253 (event->kernel) ? (pc[i].reg_value |= PMC_KERNEL) 254 : (pc[i].reg_value &= ~PMC_KERNEL); 255 pc[i].reg_value &= ~(0xff << 8); 256 pc[i].reg_value |= ((event->value & 0xff) << 8); 257 pc[i].reg_value &= ~(0xf << 16); 258 pc[i].reg_value |= ((event->um & 0xf) << 16); 259 pc[i].reg_smpl_eventid = event->counter; 260 } 261 262 for (i = 0; i < op_nr_counters && opd_events[i].name; ++i) { 263 struct opd_event * event = &opd_events[i]; 264 pd[i].reg_value = ~0UL - event->count + 1; 265 pd[i].reg_short_reset = ~0UL - event->count + 1; 266 pd[i].reg_num = event->counter + 4; 267 } 268 269 err = perfmonctl(self->ctx_fd, PFM_WRITE_PMCS, pc, i); 270 if (err == -1) { 271 perror("Couldn't write PMCs: "); 272 exit(EXIT_FAILURE); 273 } 274 275 err = perfmonctl(self->ctx_fd, PFM_WRITE_PMDS, pd, i); 276 if (err == -1) { 277 perror("Couldn't write PMDs: "); 278 exit(EXIT_FAILURE); 279 } 280 } 281 282 283 static void load_context(struct child * self) 284 { 285 pfarg_load_t load_args; 286 int err; 287 288 memset(&load_args, 0, sizeof(load_args)); 289 load_args.load_pid = self->pid; 290 291 err = perfmonctl(self->ctx_fd, PFM_LOAD_CONTEXT, &load_args, 1); 292 if (err == -1) { 293 perror("Couldn't load context: "); 294 exit(EXIT_FAILURE); 295 } 296 } 297 298 299 static void notify_parent(struct child * self, size_t cpu) 300 { 301 for (;;) { 302 ssize_t ret; 303 ret = write(self->up_pipe[1], &cpu, sizeof(size_t)); 304 if (ret == sizeof(size_t)) 305 break; 306 if (ret < 0 && errno != EINTR) { 307 fprintf(stderr, "Failed to write child pipe with %s\n", 308 strerror(errno)); 309 exit(EXIT_FAILURE); 310 } 311 } 312 } 313 314 315 static void run_child(size_t cpu) 316 { 317 struct child * self = &children[cpu]; 318 319 self->pid = getpid(); 320 self->sigusr1 = 0; 321 self->sigusr2 = 0; 322 self->sigterm = 0; 323 324 setup_signals(); 325 326 set_affinity(cpu); 327 328 create_context(self); 329 330 write_pmu(self); 331 332 load_context(self); 333 334 notify_parent(self, cpu); 335 336 for (;;) { 337 sigset_t sigmask; 338 sigfillset(&sigmask); 339 sigdelset(&sigmask, SIGUSR1); 340 sigdelset(&sigmask, SIGUSR2); 341 sigdelset(&sigmask, SIGTERM); 342 343 if (self->sigusr1) { 344 printf("PFM_START on CPU%d\n", (int)cpu); 345 fflush(stdout); 346 perfmon_start_child(self->ctx_fd); 347 self->sigusr1 = 0; 348 } 349 350 if (self->sigusr2) { 351 printf("PFM_STOP on CPU%d\n", (int)cpu); 352 fflush(stdout); 353 perfmon_stop_child(self->ctx_fd); 354 self->sigusr2 = 0; 355 } 356 357 sigsuspend(&sigmask); 358 } 359 } 360 361 362 static void wait_for_child(struct child * child) 363 { 364 size_t tmp; 365 for (;;) { 366 ssize_t ret; 367 ret = read(child->up_pipe[0], &tmp, sizeof(size_t)); 368 if (ret == sizeof(size_t)) 369 break; 370 if (ret < 0 && errno != EINTR) { 371 fprintf(stderr, "Failed to read child pipe with %s\n", 372 strerror(errno)); 373 exit(EXIT_FAILURE); 374 } 375 } 376 printf("Perfmon child up on CPU%d\n", (int)tmp); 377 fflush(stdout); 378 379 close(child->up_pipe[0]); 380 close(child->up_pipe[1]); 381 } 382 383 static struct child* xen_ctx; 384 385 void perfmon_init(void) 386 { 387 size_t i; 388 long nr; 389 390 if (cpu_type == CPU_TIMER_INT) 391 return; 392 393 if (!no_xen) { 394 xen_ctx = xmalloc(sizeof(struct child)); 395 xen_ctx->pid = getpid(); 396 xen_ctx->up_pipe[0] = -1; 397 xen_ctx->up_pipe[1] = -1; 398 xen_ctx->sigusr1 = 0; 399 xen_ctx->sigusr2 = 0; 400 xen_ctx->sigterm = 0; 401 402 create_context(xen_ctx); 403 404 write_pmu(xen_ctx); 405 406 load_context(xen_ctx); 407 return; 408 } 409 410 411 nr = sysconf(_SC_NPROCESSORS_ONLN); 412 if (nr == -1) { 413 fprintf(stderr, "Couldn't determine number of CPUs.\n"); 414 exit(EXIT_FAILURE); 415 } 416 417 nr_cpus = nr; 418 419 children = xmalloc(sizeof(struct child) * nr_cpus); 420 421 for (i = 0; i < nr_cpus; ++i) { 422 int ret; 423 424 if (pipe(children[i].up_pipe)) { 425 perror("Couldn't create child pipe.\n"); 426 exit(EXIT_FAILURE); 427 } 428 429 ret = fork(); 430 if (ret == -1) { 431 fprintf(stderr, "Couldn't fork perfmon child.\n"); 432 exit(EXIT_FAILURE); 433 } else if (ret == 0) { 434 printf("Running perfmon child on CPU%d.\n", (int)i); 435 fflush(stdout); 436 run_child(i); 437 } else { 438 children[i].pid = ret; 439 printf("Waiting on CPU%d\n", (int)i); 440 wait_for_child(&children[i]); 441 } 442 } 443 } 444 445 446 void perfmon_exit(void) 447 { 448 size_t i; 449 450 if (cpu_type == CPU_TIMER_INT) 451 return; 452 453 if (!no_xen) 454 return; 455 456 for (i = 0; i < nr_cpus; ++i) { 457 kill(children[i].pid, SIGKILL); 458 waitpid(children[i].pid, NULL, 0); 459 } 460 } 461 462 463 void perfmon_start(void) 464 { 465 size_t i; 466 467 if (cpu_type == CPU_TIMER_INT) 468 return; 469 470 if (!no_xen) { 471 perfmon_start_child(xen_ctx->ctx_fd); 472 return; 473 } 474 475 for (i = 0; i < nr_cpus; ++i) 476 kill(children[i].pid, SIGUSR1); 477 } 478 479 480 void perfmon_stop(void) 481 { 482 size_t i; 483 484 if (cpu_type == CPU_TIMER_INT) 485 return; 486 487 if (!no_xen) { 488 perfmon_stop_child(xen_ctx->ctx_fd); 489 return; 490 } 491 492 for (i = 0; i < nr_cpus; ++i) 493 kill(children[i].pid, SIGUSR2); 494 } 495 496 #endif /* __ia64__ */ 497