Home | History | Annotate | Download | only in daemon
      1 /**
      2  * @file opd_perfmon.c
      3  * perfmonctl() handling
      4  *
      5  * @remark Copyright 2003 OProfile authors
      6  * @remark Read the file COPYING
      7  *
      8  * @author John Levon
      9  */
     10 
     11 #ifdef __ia64__
     12 
     13 /* need this for sched_setaffinity() in <sched.h> */
     14 #define _GNU_SOURCE
     15 
     16 #include "oprofiled.h"
     17 #include "opd_perfmon.h"
     18 #include "opd_events.h"
     19 
     20 #include "op_cpu_type.h"
     21 #include "op_libiberty.h"
     22 #include "op_hw_config.h"
     23 
     24 #include <sys/syscall.h>
     25 #include <sys/wait.h>
     26 #include <unistd.h>
     27 #include <limits.h>
     28 #include <signal.h>
     29 #include <stdio.h>
     30 #include <stdlib.h>
     31 #include <string.h>
     32 #include <errno.h>
     33 #include <sys/types.h>
     34 #include <sys/stat.h>
     35 #ifdef HAVE_SCHED_SETAFFINITY
     36 #include <sched.h>
     37 #endif
     38 
     39 extern op_cpu cpu_type;
     40 
     41 #ifndef HAVE_SCHED_SETAFFINITY
     42 
     43 /* many glibc's are not yet up to date */
     44 #ifndef __NR_sched_setaffinity
     45 #define __NR_sched_setaffinity 1231
     46 #endif
     47 
     48 /* Copied from glibc's <sched.h> and <bits/sched.h> and munged */
     49 #define CPU_SETSIZE	1024
     50 #define __NCPUBITS	(8 * sizeof (unsigned long))
     51 typedef struct
     52 {
     53 	unsigned long __bits[CPU_SETSIZE / __NCPUBITS];
     54 } cpu_set_t;
     55 
     56 #define CPU_SET(cpu, cpusetp) \
     57 	((cpusetp)->__bits[(cpu)/__NCPUBITS] |= (1UL << ((cpu) % __NCPUBITS)))
     58 #define CPU_ZERO(cpusetp) \
     59 	memset((cpusetp), 0, sizeof(cpu_set_t))
     60 
     61 static int
     62 sched_setaffinity(pid_t pid, size_t len, cpu_set_t const * cpusetp)
     63 {
     64 	return syscall(__NR_sched_setaffinity, pid, len, cpusetp);
     65 }
     66 #endif
     67 
     68 
     69 #ifndef HAVE_PERFMONCTL
     70 #ifndef __NR_perfmonctl
     71 #define __NR_perfmonctl 1175
     72 #endif
     73 
     74 static int perfmonctl(int fd, int cmd, void * arg, int narg)
     75 {
     76 	return syscall(__NR_perfmonctl, fd, cmd, arg, narg);
     77 }
     78 #endif
     79 
     80 
     81 static unsigned char uuid[16] = {
     82 	0x77, 0x7a, 0x6e, 0x61, 0x20, 0x65, 0x73, 0x69,
     83 	0x74, 0x6e, 0x72, 0x20, 0x61, 0x65, 0x0a, 0x6c
     84 };
     85 
     86 
     87 static size_t nr_cpus;
     88 
     89 struct child {
     90 	pid_t pid;
     91 	int up_pipe[2];
     92 	int ctx_fd;
     93 	sig_atomic_t sigusr1;
     94 	sig_atomic_t sigusr2;
     95 	sig_atomic_t sigterm;
     96 };
     97 
     98 static struct child * children;
     99 
    100 static void perfmon_start_child(int ctx_fd)
    101 {
    102 	if (perfmonctl(ctx_fd, PFM_START, 0, 0) == -1) {
    103 		exit(EXIT_FAILURE);
    104 	}
    105 }
    106 
    107 
    108 static void perfmon_stop_child(int ctx_fd)
    109 {
    110 	if (perfmonctl(ctx_fd, PFM_STOP, 0, 0) == -1) {
    111 		exit(EXIT_FAILURE);
    112 	}
    113 }
    114 
    115 
    116 static void child_sigusr1(int val __attribute__((unused)))
    117 {
    118 	size_t i;
    119 
    120 	for (i = 0; i < nr_cpus; ++i) {
    121 		if (children[i].pid == getpid()) {
    122 			children[i].sigusr1 = 1;
    123 			return;
    124 		}
    125 	}
    126 }
    127 
    128 
    129 static void child_sigusr2(int val __attribute__((unused)))
    130 {
    131 	size_t i;
    132 
    133 	for (i = 0; i < nr_cpus; ++i) {
    134 		if (children[i].pid == getpid()) {
    135 			children[i].sigusr2 = 1;
    136 			return;
    137 		}
    138 	}
    139 }
    140 
    141 
    142 static void child_sigterm(int val __attribute__((unused)))
    143 {
    144 	kill(getppid(), SIGTERM);
    145 }
    146 
    147 
    148 static void set_affinity(size_t cpu)
    149 {
    150 	cpu_set_t set;
    151 	int err;
    152 
    153 	CPU_ZERO(&set);
    154 	CPU_SET(cpu, &set);
    155 
    156 	err = sched_setaffinity(getpid(), sizeof(set), &set);
    157 
    158 	if (err == -1) {
    159 		perror("Failed to set affinity");
    160 		exit(EXIT_FAILURE);
    161 	}
    162 }
    163 
    164 
    165 static void setup_signals(void)
    166 {
    167 	struct sigaction act;
    168 	sigset_t mask;
    169 
    170 	sigemptyset(&mask);
    171 	sigaddset(&mask, SIGUSR1);
    172 	sigaddset(&mask, SIGUSR2);
    173 	sigprocmask(SIG_BLOCK, &mask, NULL);
    174 
    175 	act.sa_handler = child_sigusr1;
    176 	act.sa_flags = 0;
    177 	sigemptyset(&act.sa_mask);
    178 
    179 	if (sigaction(SIGUSR1, &act, NULL)) {
    180 		perror("oprofiled: install of SIGUSR1 handler failed");
    181 		exit(EXIT_FAILURE);
    182 	}
    183 
    184 	act.sa_handler = child_sigusr2;
    185 	act.sa_flags = 0;
    186 	sigemptyset(&act.sa_mask);
    187 
    188 	if (sigaction(SIGUSR2, &act, NULL)) {
    189 		perror("oprofiled: install of SIGUSR2 handler failed");
    190 		exit(EXIT_FAILURE);
    191 	}
    192 
    193 	act.sa_handler = child_sigterm;
    194 	act.sa_flags = 0;
    195 	sigemptyset(&act.sa_mask);
    196 
    197 	if (sigaction(SIGTERM, &act, NULL)) {
    198 		perror("oprofiled: install of SIGTERM handler failed");
    199 		exit(EXIT_FAILURE);
    200 	}
    201 }
    202 
    203 
    204 /** create the per-cpu context */
    205 static void create_context(struct child * self)
    206 {
    207 	pfarg_context_t ctx;
    208 	int err;
    209 
    210 	memset(&ctx, 0, sizeof(pfarg_context_t));
    211 	memcpy(&ctx.ctx_smpl_buf_id, &uuid, 16);
    212 	ctx.ctx_flags = PFM_FL_SYSTEM_WIDE;
    213 
    214 	err = perfmonctl(0, PFM_CREATE_CONTEXT, &ctx, 1);
    215 	if (err == -1) {
    216 		perror("CREATE_CONTEXT failed");
    217 		exit(EXIT_FAILURE);
    218 	}
    219 
    220 	self->ctx_fd = ctx.ctx_fd;
    221 }
    222 
    223 
    224 /** program the perfmon counters */
    225 static void write_pmu(struct child * self)
    226 {
    227 	pfarg_reg_t pc[OP_MAX_COUNTERS];
    228 	pfarg_reg_t pd[OP_MAX_COUNTERS];
    229 	int err;
    230 	size_t i;
    231 
    232 	memset(pc, 0, sizeof(pc));
    233 	memset(pd, 0, sizeof(pd));
    234 
    235 #define PMC_GEN_INTERRUPT (1UL << 5)
    236 #define PMC_PRIV_MONITOR (1UL << 6)
    237 /* McKinley requires pmc4 to have bit 23 set (enable PMU).
    238  * It is supposedly ignored in other pmc registers.
    239  */
    240 #define PMC_MANDATORY (1UL << 23)
    241 #define PMC_USER (1UL << 3)
    242 #define PMC_KERNEL (1UL << 0)
    243 	for (i = 0; i < op_nr_counters && opd_events[i].name; ++i) {
    244 		struct opd_event * event = &opd_events[i];
    245 		pc[i].reg_num = event->counter + 4;
    246 		pc[i].reg_value = PMC_GEN_INTERRUPT;
    247 		pc[i].reg_value |= PMC_PRIV_MONITOR;
    248 		pc[i].reg_value |= PMC_MANDATORY;
    249 		(event->user) ? (pc[i].reg_value |= PMC_USER)
    250 		              : (pc[i].reg_value &= ~PMC_USER);
    251 		(event->kernel) ? (pc[i].reg_value |= PMC_KERNEL)
    252 		                : (pc[i].reg_value &= ~PMC_KERNEL);
    253 		pc[i].reg_value &= ~(0xff << 8);
    254 		pc[i].reg_value |= ((event->value & 0xff) << 8);
    255 		pc[i].reg_value &= ~(0xf << 16);
    256 		pc[i].reg_value |= ((event->um & 0xf) << 16);
    257 		pc[i].reg_smpl_eventid = event->counter;
    258 	}
    259 
    260 	for (i = 0; i < op_nr_counters && opd_events[i].name; ++i) {
    261 		struct opd_event * event = &opd_events[i];
    262 		pd[i].reg_value = ~0UL - event->count + 1;
    263 		pd[i].reg_short_reset = ~0UL - event->count + 1;
    264 		pd[i].reg_num = event->counter + 4;
    265 	}
    266 
    267 	err = perfmonctl(self->ctx_fd, PFM_WRITE_PMCS, pc, i);
    268 	if (err == -1) {
    269 		perror("Couldn't write PMCs");
    270 		exit(EXIT_FAILURE);
    271 	}
    272 
    273 	err = perfmonctl(self->ctx_fd, PFM_WRITE_PMDS, pd, i);
    274 	if (err == -1) {
    275 		perror("Couldn't write PMDs");
    276 		exit(EXIT_FAILURE);
    277 	}
    278 }
    279 
    280 
    281 static void load_context(struct child * self)
    282 {
    283 	pfarg_load_t load_args;
    284 	int err;
    285 
    286 	memset(&load_args, 0, sizeof(load_args));
    287 	load_args.load_pid = self->pid;
    288 
    289 	err = perfmonctl(self->ctx_fd, PFM_LOAD_CONTEXT, &load_args, 1);
    290 	if (err == -1) {
    291 		perror("Couldn't load context");
    292 		exit(EXIT_FAILURE);
    293 	}
    294 }
    295 
    296 
    297 static void notify_parent(struct child * self, size_t cpu)
    298 {
    299 	for (;;) {
    300 		ssize_t ret;
    301 		ret = write(self->up_pipe[1], &cpu, sizeof(size_t));
    302 		if (ret == sizeof(size_t))
    303 			break;
    304 		if (ret < 0 && errno != EINTR) {
    305 			perror("Failed to write child pipe:");
    306 			exit(EXIT_FAILURE);
    307 		}
    308 	}
    309 }
    310 
    311 static struct child * inner_child;
    312 void close_pipe(void)
    313 {
    314 	close(inner_child->up_pipe[1]);
    315 }
    316 
    317 static void run_child(size_t cpu)
    318 {
    319 	struct child * self = &children[cpu];
    320 
    321 	self->pid = getpid();
    322 	self->sigusr1 = 0;
    323 	self->sigusr2 = 0;
    324 	self->sigterm = 0;
    325 
    326 	inner_child = self;
    327 	if (atexit(close_pipe)){
    328 		close_pipe();
    329 		exit(EXIT_FAILURE);
    330 	}
    331 
    332 	umask(0);
    333 	/* Change directory to allow directory to be removed */
    334 	if (chdir("/") < 0) {
    335 		perror("Unable to chdir to \"/\"");
    336 		exit(EXIT_FAILURE);
    337 	}
    338 
    339 	setup_signals();
    340 
    341 	set_affinity(cpu);
    342 
    343 	create_context(self);
    344 
    345 	write_pmu(self);
    346 
    347 	load_context(self);
    348 
    349 	notify_parent(self, cpu);
    350 
    351 	/* Redirect standard files to /dev/null */
    352 	freopen( "/dev/null", "r", stdin);
    353 	freopen( "/dev/null", "w", stdout);
    354 	freopen( "/dev/null", "w", stderr);
    355 
    356 	for (;;) {
    357 		sigset_t sigmask;
    358 		sigfillset(&sigmask);
    359 		sigdelset(&sigmask, SIGUSR1);
    360 		sigdelset(&sigmask, SIGUSR2);
    361 		sigdelset(&sigmask, SIGTERM);
    362 
    363 		if (self->sigusr1) {
    364 			perfmon_start_child(self->ctx_fd);
    365 			self->sigusr1 = 0;
    366 		}
    367 
    368 		if (self->sigusr2) {
    369 			perfmon_stop_child(self->ctx_fd);
    370 			self->sigusr2 = 0;
    371 		}
    372 
    373 		sigsuspend(&sigmask);
    374 	}
    375 }
    376 
    377 
    378 static void wait_for_child(struct child * child)
    379 {
    380 	size_t tmp;
    381 	for (;;) {
    382 		ssize_t ret;
    383 		ret = read(child->up_pipe[0], &tmp, sizeof(size_t));
    384 		if (ret == sizeof(size_t))
    385 			break;
    386 		if ((ret < 0 && errno != EINTR) || ret == 0 ) {
    387 			perror("Failed to read child pipe");
    388 			exit(EXIT_FAILURE);
    389 		}
    390 	}
    391 	printf("Perfmon child up on CPU%d\n", (int)tmp);
    392 	fflush(stdout);
    393 
    394 	close(child->up_pipe[0]);
    395 }
    396 
    397 static struct child* xen_ctx;
    398 
    399 void perfmon_init(void)
    400 {
    401 	size_t i;
    402 	long nr;
    403 
    404 	if (cpu_type == CPU_TIMER_INT)
    405 		return;
    406 
    407 	if (!no_xen) {
    408 		xen_ctx = xmalloc(sizeof(struct child));
    409 		xen_ctx->pid = getpid();
    410 		xen_ctx->up_pipe[0] = -1;
    411 		xen_ctx->up_pipe[1] = -1;
    412 		xen_ctx->sigusr1 = 0;
    413 		xen_ctx->sigusr2 = 0;
    414 		xen_ctx->sigterm = 0;
    415 
    416 		create_context(xen_ctx);
    417 
    418 		write_pmu(xen_ctx);
    419 
    420 		load_context(xen_ctx);
    421 		return;
    422 	}
    423 
    424 
    425 	nr = sysconf(_SC_NPROCESSORS_ONLN);
    426 	if (nr == -1) {
    427 		fprintf(stderr, "Couldn't determine number of CPUs.\n");
    428 		exit(EXIT_FAILURE);
    429 	}
    430 
    431 	nr_cpus = nr;
    432 
    433 	children = xmalloc(sizeof(struct child) * nr_cpus);
    434 	bzero(children, sizeof(struct child) * nr_cpus);
    435 
    436 	for (i = 0; i < nr_cpus; ++i) {
    437 		int ret;
    438 
    439 		if (pipe(children[i].up_pipe)) {
    440 			perror("Couldn't create child pipe");
    441 			exit(EXIT_FAILURE);
    442 		}
    443 
    444 		ret = fork();
    445 		if (ret == -1) {
    446 			perror("Couldn't fork perfmon child");
    447 			exit(EXIT_FAILURE);
    448 		} else if (ret == 0) {
    449 			close(children[i].up_pipe[0]);
    450 			run_child(i);
    451 		} else {
    452 			children[i].pid = ret;
    453 			close(children[i].up_pipe[1]);
    454 			printf("Waiting on CPU%d\n", (int)i);
    455 			wait_for_child(&children[i]);
    456 		}
    457 	}
    458 }
    459 
    460 
    461 void perfmon_exit(void)
    462 {
    463 	size_t i;
    464 
    465 	if (cpu_type == CPU_TIMER_INT)
    466 		return;
    467 
    468 	if (!no_xen)
    469 		return;
    470 
    471 	for (i = 0; i < nr_cpus; ++i) {
    472 		if (children[i].pid) {
    473 			int c_pid = children[i].pid;
    474 			children[i].pid = 0;
    475 			if (kill(c_pid, SIGKILL)==0)
    476 				waitpid(c_pid, NULL, 0);
    477 		}
    478 	}
    479 }
    480 
    481 
    482 void perfmon_start(void)
    483 {
    484 	size_t i;
    485 
    486 	if (cpu_type == CPU_TIMER_INT)
    487 		return;
    488 
    489 	if (!no_xen) {
    490 		perfmon_start_child(xen_ctx->ctx_fd);
    491 		return;
    492 	}
    493 
    494 	for (i = 0; i < nr_cpus; ++i) {
    495 		if (kill(children[i].pid, SIGUSR1)) {
    496 			perror("Unable to start perfmon");
    497 			exit(EXIT_FAILURE);
    498 		}
    499 	}
    500 }
    501 
    502 
    503 void perfmon_stop(void)
    504 {
    505 	size_t i;
    506 
    507 	if (cpu_type == CPU_TIMER_INT)
    508 		return;
    509 
    510 	if (!no_xen) {
    511 		perfmon_stop_child(xen_ctx->ctx_fd);
    512 		return;
    513 	}
    514 
    515 	for (i = 0; i < nr_cpus; ++i)
    516 		if (kill(children[i].pid, SIGUSR2)) {
    517 			perror("Unable to stop perfmon");
    518 			exit(EXIT_FAILURE);
    519 		}
    520 }
    521 
    522 #endif /* __ia64__ */
    523