1 /** 2 * @file opd_perfmon.c 3 * perfmonctl() handling 4 * 5 * @remark Copyright 2003 OProfile authors 6 * @remark Read the file COPYING 7 * 8 * @author John Levon 9 */ 10 11 #ifdef __ia64__ 12 13 /* need this for sched_setaffinity() in <sched.h> */ 14 #define _GNU_SOURCE 15 16 #include "oprofiled.h" 17 #include "opd_perfmon.h" 18 #include "opd_events.h" 19 20 #include "op_cpu_type.h" 21 #include "op_libiberty.h" 22 #include "op_hw_config.h" 23 24 #include <sys/syscall.h> 25 #include <sys/wait.h> 26 #include <unistd.h> 27 #include <limits.h> 28 #include <signal.h> 29 #include <stdio.h> 30 #include <stdlib.h> 31 #include <string.h> 32 #include <errno.h> 33 #include <sys/types.h> 34 #include <sys/stat.h> 35 #ifdef HAVE_SCHED_SETAFFINITY 36 #include <sched.h> 37 #endif 38 39 extern op_cpu cpu_type; 40 41 #ifndef HAVE_SCHED_SETAFFINITY 42 43 /* many glibc's are not yet up to date */ 44 #ifndef __NR_sched_setaffinity 45 #define __NR_sched_setaffinity 1231 46 #endif 47 48 /* Copied from glibc's <sched.h> and <bits/sched.h> and munged */ 49 #define CPU_SETSIZE 1024 50 #define __NCPUBITS (8 * sizeof (unsigned long)) 51 typedef struct 52 { 53 unsigned long __bits[CPU_SETSIZE / __NCPUBITS]; 54 } cpu_set_t; 55 56 #define CPU_SET(cpu, cpusetp) \ 57 ((cpusetp)->__bits[(cpu)/__NCPUBITS] |= (1UL << ((cpu) % __NCPUBITS))) 58 #define CPU_ZERO(cpusetp) \ 59 memset((cpusetp), 0, sizeof(cpu_set_t)) 60 61 static int 62 sched_setaffinity(pid_t pid, size_t len, cpu_set_t const * cpusetp) 63 { 64 return syscall(__NR_sched_setaffinity, pid, len, cpusetp); 65 } 66 #endif 67 68 69 #ifndef HAVE_PERFMONCTL 70 #ifndef __NR_perfmonctl 71 #define __NR_perfmonctl 1175 72 #endif 73 74 static int perfmonctl(int fd, int cmd, void * arg, int narg) 75 { 76 return syscall(__NR_perfmonctl, fd, cmd, arg, narg); 77 } 78 #endif 79 80 81 static unsigned char uuid[16] = { 82 0x77, 0x7a, 0x6e, 0x61, 0x20, 0x65, 0x73, 0x69, 83 0x74, 0x6e, 0x72, 0x20, 0x61, 0x65, 0x0a, 0x6c 84 }; 85 86 87 static size_t nr_cpus; 88 89 struct child { 90 pid_t pid; 91 int up_pipe[2]; 92 int ctx_fd; 93 sig_atomic_t sigusr1; 94 sig_atomic_t sigusr2; 95 sig_atomic_t sigterm; 96 }; 97 98 static struct child * children; 99 100 static void perfmon_start_child(int ctx_fd) 101 { 102 if (perfmonctl(ctx_fd, PFM_START, 0, 0) == -1) { 103 exit(EXIT_FAILURE); 104 } 105 } 106 107 108 static void perfmon_stop_child(int ctx_fd) 109 { 110 if (perfmonctl(ctx_fd, PFM_STOP, 0, 0) == -1) { 111 exit(EXIT_FAILURE); 112 } 113 } 114 115 116 static void child_sigusr1(int val __attribute__((unused))) 117 { 118 size_t i; 119 120 for (i = 0; i < nr_cpus; ++i) { 121 if (children[i].pid == getpid()) { 122 children[i].sigusr1 = 1; 123 return; 124 } 125 } 126 } 127 128 129 static void child_sigusr2(int val __attribute__((unused))) 130 { 131 size_t i; 132 133 for (i = 0; i < nr_cpus; ++i) { 134 if (children[i].pid == getpid()) { 135 children[i].sigusr2 = 1; 136 return; 137 } 138 } 139 } 140 141 142 static void child_sigterm(int val __attribute__((unused))) 143 { 144 kill(getppid(), SIGTERM); 145 } 146 147 148 static void set_affinity(size_t cpu) 149 { 150 cpu_set_t set; 151 int err; 152 153 CPU_ZERO(&set); 154 CPU_SET(cpu, &set); 155 156 err = sched_setaffinity(getpid(), sizeof(set), &set); 157 158 if (err == -1) { 159 perror("Failed to set affinity"); 160 exit(EXIT_FAILURE); 161 } 162 } 163 164 165 static void setup_signals(void) 166 { 167 struct sigaction act; 168 sigset_t mask; 169 170 sigemptyset(&mask); 171 sigaddset(&mask, SIGUSR1); 172 sigaddset(&mask, SIGUSR2); 173 sigprocmask(SIG_BLOCK, &mask, NULL); 174 175 act.sa_handler = child_sigusr1; 176 act.sa_flags = 0; 177 sigemptyset(&act.sa_mask); 178 179 if (sigaction(SIGUSR1, &act, NULL)) { 180 perror("oprofiled: install of SIGUSR1 handler failed"); 181 exit(EXIT_FAILURE); 182 } 183 184 act.sa_handler = child_sigusr2; 185 act.sa_flags = 0; 186 sigemptyset(&act.sa_mask); 187 188 if (sigaction(SIGUSR2, &act, NULL)) { 189 perror("oprofiled: install of SIGUSR2 handler failed"); 190 exit(EXIT_FAILURE); 191 } 192 193 act.sa_handler = child_sigterm; 194 act.sa_flags = 0; 195 sigemptyset(&act.sa_mask); 196 197 if (sigaction(SIGTERM, &act, NULL)) { 198 perror("oprofiled: install of SIGTERM handler failed"); 199 exit(EXIT_FAILURE); 200 } 201 } 202 203 204 /** create the per-cpu context */ 205 static void create_context(struct child * self) 206 { 207 pfarg_context_t ctx; 208 int err; 209 210 memset(&ctx, 0, sizeof(pfarg_context_t)); 211 memcpy(&ctx.ctx_smpl_buf_id, &uuid, 16); 212 ctx.ctx_flags = PFM_FL_SYSTEM_WIDE; 213 214 err = perfmonctl(0, PFM_CREATE_CONTEXT, &ctx, 1); 215 if (err == -1) { 216 perror("CREATE_CONTEXT failed"); 217 exit(EXIT_FAILURE); 218 } 219 220 self->ctx_fd = ctx.ctx_fd; 221 } 222 223 224 /** program the perfmon counters */ 225 static void write_pmu(struct child * self) 226 { 227 pfarg_reg_t pc[OP_MAX_COUNTERS]; 228 pfarg_reg_t pd[OP_MAX_COUNTERS]; 229 int err; 230 size_t i; 231 232 memset(pc, 0, sizeof(pc)); 233 memset(pd, 0, sizeof(pd)); 234 235 #define PMC_GEN_INTERRUPT (1UL << 5) 236 #define PMC_PRIV_MONITOR (1UL << 6) 237 /* McKinley requires pmc4 to have bit 23 set (enable PMU). 238 * It is supposedly ignored in other pmc registers. 239 */ 240 #define PMC_MANDATORY (1UL << 23) 241 #define PMC_USER (1UL << 3) 242 #define PMC_KERNEL (1UL << 0) 243 for (i = 0; i < op_nr_counters && opd_events[i].name; ++i) { 244 struct opd_event * event = &opd_events[i]; 245 pc[i].reg_num = event->counter + 4; 246 pc[i].reg_value = PMC_GEN_INTERRUPT; 247 pc[i].reg_value |= PMC_PRIV_MONITOR; 248 pc[i].reg_value |= PMC_MANDATORY; 249 (event->user) ? (pc[i].reg_value |= PMC_USER) 250 : (pc[i].reg_value &= ~PMC_USER); 251 (event->kernel) ? (pc[i].reg_value |= PMC_KERNEL) 252 : (pc[i].reg_value &= ~PMC_KERNEL); 253 pc[i].reg_value &= ~(0xff << 8); 254 pc[i].reg_value |= ((event->value & 0xff) << 8); 255 pc[i].reg_value &= ~(0xf << 16); 256 pc[i].reg_value |= ((event->um & 0xf) << 16); 257 pc[i].reg_smpl_eventid = event->counter; 258 } 259 260 for (i = 0; i < op_nr_counters && opd_events[i].name; ++i) { 261 struct opd_event * event = &opd_events[i]; 262 pd[i].reg_value = ~0UL - event->count + 1; 263 pd[i].reg_short_reset = ~0UL - event->count + 1; 264 pd[i].reg_num = event->counter + 4; 265 } 266 267 err = perfmonctl(self->ctx_fd, PFM_WRITE_PMCS, pc, i); 268 if (err == -1) { 269 perror("Couldn't write PMCs"); 270 exit(EXIT_FAILURE); 271 } 272 273 err = perfmonctl(self->ctx_fd, PFM_WRITE_PMDS, pd, i); 274 if (err == -1) { 275 perror("Couldn't write PMDs"); 276 exit(EXIT_FAILURE); 277 } 278 } 279 280 281 static void load_context(struct child * self) 282 { 283 pfarg_load_t load_args; 284 int err; 285 286 memset(&load_args, 0, sizeof(load_args)); 287 load_args.load_pid = self->pid; 288 289 err = perfmonctl(self->ctx_fd, PFM_LOAD_CONTEXT, &load_args, 1); 290 if (err == -1) { 291 perror("Couldn't load context"); 292 exit(EXIT_FAILURE); 293 } 294 } 295 296 297 static void notify_parent(struct child * self, size_t cpu) 298 { 299 for (;;) { 300 ssize_t ret; 301 ret = write(self->up_pipe[1], &cpu, sizeof(size_t)); 302 if (ret == sizeof(size_t)) 303 break; 304 if (ret < 0 && errno != EINTR) { 305 perror("Failed to write child pipe:"); 306 exit(EXIT_FAILURE); 307 } 308 } 309 } 310 311 static struct child * inner_child; 312 void close_pipe(void) 313 { 314 close(inner_child->up_pipe[1]); 315 } 316 317 static void run_child(size_t cpu) 318 { 319 struct child * self = &children[cpu]; 320 321 self->pid = getpid(); 322 self->sigusr1 = 0; 323 self->sigusr2 = 0; 324 self->sigterm = 0; 325 326 inner_child = self; 327 if (atexit(close_pipe)){ 328 close_pipe(); 329 exit(EXIT_FAILURE); 330 } 331 332 umask(0); 333 /* Change directory to allow directory to be removed */ 334 if (chdir("/") < 0) { 335 perror("Unable to chdir to \"/\""); 336 exit(EXIT_FAILURE); 337 } 338 339 setup_signals(); 340 341 set_affinity(cpu); 342 343 create_context(self); 344 345 write_pmu(self); 346 347 load_context(self); 348 349 notify_parent(self, cpu); 350 351 /* Redirect standard files to /dev/null */ 352 freopen( "/dev/null", "r", stdin); 353 freopen( "/dev/null", "w", stdout); 354 freopen( "/dev/null", "w", stderr); 355 356 for (;;) { 357 sigset_t sigmask; 358 sigfillset(&sigmask); 359 sigdelset(&sigmask, SIGUSR1); 360 sigdelset(&sigmask, SIGUSR2); 361 sigdelset(&sigmask, SIGTERM); 362 363 if (self->sigusr1) { 364 perfmon_start_child(self->ctx_fd); 365 self->sigusr1 = 0; 366 } 367 368 if (self->sigusr2) { 369 perfmon_stop_child(self->ctx_fd); 370 self->sigusr2 = 0; 371 } 372 373 sigsuspend(&sigmask); 374 } 375 } 376 377 378 static void wait_for_child(struct child * child) 379 { 380 size_t tmp; 381 for (;;) { 382 ssize_t ret; 383 ret = read(child->up_pipe[0], &tmp, sizeof(size_t)); 384 if (ret == sizeof(size_t)) 385 break; 386 if ((ret < 0 && errno != EINTR) || ret == 0 ) { 387 perror("Failed to read child pipe"); 388 exit(EXIT_FAILURE); 389 } 390 } 391 printf("Perfmon child up on CPU%d\n", (int)tmp); 392 fflush(stdout); 393 394 close(child->up_pipe[0]); 395 } 396 397 static struct child* xen_ctx; 398 399 void perfmon_init(void) 400 { 401 size_t i; 402 long nr; 403 404 if (cpu_type == CPU_TIMER_INT) 405 return; 406 407 if (!no_xen) { 408 xen_ctx = xmalloc(sizeof(struct child)); 409 xen_ctx->pid = getpid(); 410 xen_ctx->up_pipe[0] = -1; 411 xen_ctx->up_pipe[1] = -1; 412 xen_ctx->sigusr1 = 0; 413 xen_ctx->sigusr2 = 0; 414 xen_ctx->sigterm = 0; 415 416 create_context(xen_ctx); 417 418 write_pmu(xen_ctx); 419 420 load_context(xen_ctx); 421 return; 422 } 423 424 425 nr = sysconf(_SC_NPROCESSORS_ONLN); 426 if (nr == -1) { 427 fprintf(stderr, "Couldn't determine number of CPUs.\n"); 428 exit(EXIT_FAILURE); 429 } 430 431 nr_cpus = nr; 432 433 children = xmalloc(sizeof(struct child) * nr_cpus); 434 bzero(children, sizeof(struct child) * nr_cpus); 435 436 for (i = 0; i < nr_cpus; ++i) { 437 int ret; 438 439 if (pipe(children[i].up_pipe)) { 440 perror("Couldn't create child pipe"); 441 exit(EXIT_FAILURE); 442 } 443 444 ret = fork(); 445 if (ret == -1) { 446 perror("Couldn't fork perfmon child"); 447 exit(EXIT_FAILURE); 448 } else if (ret == 0) { 449 close(children[i].up_pipe[0]); 450 run_child(i); 451 } else { 452 children[i].pid = ret; 453 close(children[i].up_pipe[1]); 454 printf("Waiting on CPU%d\n", (int)i); 455 wait_for_child(&children[i]); 456 } 457 } 458 } 459 460 461 void perfmon_exit(void) 462 { 463 size_t i; 464 465 if (cpu_type == CPU_TIMER_INT) 466 return; 467 468 if (!no_xen) 469 return; 470 471 for (i = 0; i < nr_cpus; ++i) { 472 if (children[i].pid) { 473 int c_pid = children[i].pid; 474 children[i].pid = 0; 475 if (kill(c_pid, SIGKILL)==0) 476 waitpid(c_pid, NULL, 0); 477 } 478 } 479 } 480 481 482 void perfmon_start(void) 483 { 484 size_t i; 485 486 if (cpu_type == CPU_TIMER_INT) 487 return; 488 489 if (!no_xen) { 490 perfmon_start_child(xen_ctx->ctx_fd); 491 return; 492 } 493 494 for (i = 0; i < nr_cpus; ++i) { 495 if (kill(children[i].pid, SIGUSR1)) { 496 perror("Unable to start perfmon"); 497 exit(EXIT_FAILURE); 498 } 499 } 500 } 501 502 503 void perfmon_stop(void) 504 { 505 size_t i; 506 507 if (cpu_type == CPU_TIMER_INT) 508 return; 509 510 if (!no_xen) { 511 perfmon_stop_child(xen_ctx->ctx_fd); 512 return; 513 } 514 515 for (i = 0; i < nr_cpus; ++i) 516 if (kill(children[i].pid, SIGUSR2)) { 517 perror("Unable to stop perfmon"); 518 exit(EXIT_FAILURE); 519 } 520 } 521 522 #endif /* __ia64__ */ 523