Home | History | Annotate | Download | only in daemon
      1 /**
      2  * @file opd_perfmon.c
      3  * perfmonctl() handling
      4  *
      5  * @remark Copyright 2003 OProfile authors
      6  * @remark Read the file COPYING
      7  *
      8  * @author John Levon
      9  */
     10 
     11 #ifdef __ia64__
     12 
     13 /* need this for sched_setaffinity() in <sched.h> */
     14 #define _GNU_SOURCE
     15 
     16 #include "oprofiled.h"
     17 #include "opd_perfmon.h"
     18 #include "opd_events.h"
     19 
     20 #include "op_cpu_type.h"
     21 #include "op_libiberty.h"
     22 #include "op_hw_config.h"
     23 
     24 #include <sys/syscall.h>
     25 #include <sys/wait.h>
     26 #include <unistd.h>
     27 #include <limits.h>
     28 #include <signal.h>
     29 #include <stdio.h>
     30 #include <stdlib.h>
     31 #include <string.h>
     32 #include <errno.h>
     33 #ifdef HAVE_SCHED_SETAFFINITY
     34 #include <sched.h>
     35 #endif
     36 
     37 extern op_cpu cpu_type;
     38 
     39 #ifndef HAVE_SCHED_SETAFFINITY
     40 
     41 /* many glibc's are not yet up to date */
     42 #ifndef __NR_sched_setaffinity
     43 #define __NR_sched_setaffinity 1231
     44 #endif
     45 
     46 /* Copied from glibc's <sched.h> and <bits/sched.h> and munged */
     47 #define CPU_SETSIZE	1024
     48 #define __NCPUBITS	(8 * sizeof (unsigned long))
     49 typedef struct
     50 {
     51 	unsigned long __bits[CPU_SETSIZE / __NCPUBITS];
     52 } cpu_set_t;
     53 
     54 #define CPU_SET(cpu, cpusetp) \
     55 	((cpusetp)->__bits[(cpu)/__NCPUBITS] |= (1UL << ((cpu) % __NCPUBITS)))
     56 #define CPU_ZERO(cpusetp) \
     57 	memset((cpusetp), 0, sizeof(cpu_set_t))
     58 
     59 static int
     60 sched_setaffinity(pid_t pid, size_t len, cpu_set_t const * cpusetp)
     61 {
     62 	return syscall(__NR_sched_setaffinity, pid, len, cpusetp);
     63 }
     64 #endif
     65 
     66 
     67 #ifndef HAVE_PERFMONCTL
     68 #ifndef __NR_perfmonctl
     69 #define __NR_perfmonctl 1175
     70 #endif
     71 
     72 static int perfmonctl(int fd, int cmd, void * arg, int narg)
     73 {
     74 	return syscall(__NR_perfmonctl, fd, cmd, arg, narg);
     75 }
     76 #endif
     77 
     78 
     79 static unsigned char uuid[16] = {
     80 	0x77, 0x7a, 0x6e, 0x61, 0x20, 0x65, 0x73, 0x69,
     81 	0x74, 0x6e, 0x72, 0x20, 0x61, 0x65, 0x0a, 0x6c
     82 };
     83 
     84 
     85 static size_t nr_cpus;
     86 
     87 struct child {
     88 	pid_t pid;
     89 	int up_pipe[2];
     90 	int ctx_fd;
     91 	sig_atomic_t sigusr1;
     92 	sig_atomic_t sigusr2;
     93 	sig_atomic_t sigterm;
     94 };
     95 
     96 static struct child * children;
     97 
     98 static void perfmon_start_child(int ctx_fd)
     99 {
    100 	if (perfmonctl(ctx_fd, PFM_START, 0, 0) == -1) {
    101 		perror("Couldn't start perfmon: ");
    102 		exit(EXIT_FAILURE);
    103 	}
    104 }
    105 
    106 
    107 static void perfmon_stop_child(int ctx_fd)
    108 {
    109 	if (perfmonctl(ctx_fd, PFM_STOP, 0, 0) == -1) {
    110 		perror("Couldn't stop perfmon: ");
    111 		exit(EXIT_FAILURE);
    112 	}
    113 }
    114 
    115 
    116 static void child_sigusr1(int val __attribute__((unused)))
    117 {
    118 	size_t i;
    119 
    120 	for (i = 0; i < nr_cpus; ++i) {
    121 		if (children[i].pid == getpid()) {
    122 			children[i].sigusr1 = 1;
    123 			return;
    124 		}
    125 	}
    126 }
    127 
    128 
    129 static void child_sigusr2(int val __attribute__((unused)))
    130 {
    131 	size_t i;
    132 
    133 	for (i = 0; i < nr_cpus; ++i) {
    134 		if (children[i].pid == getpid()) {
    135 			children[i].sigusr2 = 1;
    136 			return;
    137 		}
    138 	}
    139 }
    140 
    141 
    142 static void child_sigterm(int val __attribute__((unused)))
    143 {
    144 	printf("Child received SIGTERM, killing parent.\n");
    145 	kill(getppid(), SIGTERM);
    146 }
    147 
    148 
    149 static void set_affinity(size_t cpu)
    150 {
    151 	cpu_set_t set;
    152 
    153 	CPU_ZERO(&set);
    154 	CPU_SET(cpu, &set);
    155 
    156 	int err = sched_setaffinity(getpid(), sizeof(set), &set);
    157 
    158 	if (err == -1) {
    159 		fprintf(stderr, "Failed to set affinity: %s\n",
    160 			    strerror(errno));
    161 		exit(EXIT_FAILURE);
    162 	}
    163 }
    164 
    165 
    166 static void setup_signals(void)
    167 {
    168 	struct sigaction act;
    169 	sigset_t mask;
    170 
    171 	sigemptyset(&mask);
    172 	sigaddset(&mask, SIGUSR1);
    173 	sigaddset(&mask, SIGUSR2);
    174 	sigprocmask(SIG_BLOCK, &mask, NULL);
    175 
    176 	act.sa_handler = child_sigusr1;
    177 	act.sa_flags = 0;
    178 	sigemptyset(&act.sa_mask);
    179 
    180 	if (sigaction(SIGUSR1, &act, NULL)) {
    181 		perror("oprofiled: install of SIGUSR1 handler failed: ");
    182 		exit(EXIT_FAILURE);
    183 	}
    184 
    185 	act.sa_handler = child_sigusr2;
    186 	act.sa_flags = 0;
    187 	sigemptyset(&act.sa_mask);
    188 
    189 	if (sigaction(SIGUSR2, &act, NULL)) {
    190 		perror("oprofiled: install of SIGUSR2 handler failed: ");
    191 		exit(EXIT_FAILURE);
    192 	}
    193 
    194 	act.sa_handler = child_sigterm;
    195 	act.sa_flags = 0;
    196 	sigemptyset(&act.sa_mask);
    197 
    198 	if (sigaction(SIGTERM, &act, NULL)) {
    199 		perror("oprofiled: install of SIGTERM handler failed: ");
    200 		exit(EXIT_FAILURE);
    201 	}
    202 }
    203 
    204 
    205 /** create the per-cpu context */
    206 static void create_context(struct child * self)
    207 {
    208 	pfarg_context_t ctx;
    209 	int err;
    210 
    211 	memset(&ctx, 0, sizeof(pfarg_context_t));
    212 	memcpy(&ctx.ctx_smpl_buf_id, &uuid, 16);
    213 	ctx.ctx_flags = PFM_FL_SYSTEM_WIDE;
    214 
    215 	err = perfmonctl(0, PFM_CREATE_CONTEXT, &ctx, 1);
    216 	if (err == -1) {
    217 		fprintf(stderr, "CREATE_CONTEXT failed: %s\n",
    218 		        strerror(errno));
    219 		exit(EXIT_FAILURE);
    220 	}
    221 
    222 	self->ctx_fd = ctx.ctx_fd;
    223 }
    224 
    225 
    226 /** program the perfmon counters */
    227 static void write_pmu(struct child * self)
    228 {
    229 	pfarg_reg_t pc[OP_MAX_COUNTERS];
    230 	pfarg_reg_t pd[OP_MAX_COUNTERS];
    231 	int err;
    232 	size_t i;
    233 
    234 	memset(pc, 0, sizeof(pc));
    235 	memset(pd, 0, sizeof(pd));
    236 
    237 #define PMC_GEN_INTERRUPT (1UL << 5)
    238 #define PMC_PRIV_MONITOR (1UL << 6)
    239 /* McKinley requires pmc4 to have bit 23 set (enable PMU).
    240  * It is supposedly ignored in other pmc registers.
    241  */
    242 #define PMC_MANDATORY (1UL << 23)
    243 #define PMC_USER (1UL << 3)
    244 #define PMC_KERNEL (1UL << 0)
    245 	for (i = 0; i < op_nr_counters && opd_events[i].name; ++i) {
    246 		struct opd_event * event = &opd_events[i];
    247 		pc[i].reg_num = event->counter + 4;
    248 		pc[i].reg_value = PMC_GEN_INTERRUPT;
    249 		pc[i].reg_value |= PMC_PRIV_MONITOR;
    250 		pc[i].reg_value |= PMC_MANDATORY;
    251 		(event->user) ? (pc[i].reg_value |= PMC_USER)
    252 		              : (pc[i].reg_value &= ~PMC_USER);
    253 		(event->kernel) ? (pc[i].reg_value |= PMC_KERNEL)
    254 		                : (pc[i].reg_value &= ~PMC_KERNEL);
    255 		pc[i].reg_value &= ~(0xff << 8);
    256 		pc[i].reg_value |= ((event->value & 0xff) << 8);
    257 		pc[i].reg_value &= ~(0xf << 16);
    258 		pc[i].reg_value |= ((event->um & 0xf) << 16);
    259 		pc[i].reg_smpl_eventid = event->counter;
    260 	}
    261 
    262 	for (i = 0; i < op_nr_counters && opd_events[i].name; ++i) {
    263 		struct opd_event * event = &opd_events[i];
    264 		pd[i].reg_value = ~0UL - event->count + 1;
    265 		pd[i].reg_short_reset = ~0UL - event->count + 1;
    266 		pd[i].reg_num = event->counter + 4;
    267 	}
    268 
    269 	err = perfmonctl(self->ctx_fd, PFM_WRITE_PMCS, pc, i);
    270 	if (err == -1) {
    271 		perror("Couldn't write PMCs: ");
    272 		exit(EXIT_FAILURE);
    273 	}
    274 
    275 	err = perfmonctl(self->ctx_fd, PFM_WRITE_PMDS, pd, i);
    276 	if (err == -1) {
    277 		perror("Couldn't write PMDs: ");
    278 		exit(EXIT_FAILURE);
    279 	}
    280 }
    281 
    282 
    283 static void load_context(struct child * self)
    284 {
    285 	pfarg_load_t load_args;
    286 	int err;
    287 
    288 	memset(&load_args, 0, sizeof(load_args));
    289 	load_args.load_pid = self->pid;
    290 
    291 	err = perfmonctl(self->ctx_fd, PFM_LOAD_CONTEXT, &load_args, 1);
    292 	if (err == -1) {
    293 		perror("Couldn't load context: ");
    294 		exit(EXIT_FAILURE);
    295 	}
    296 }
    297 
    298 
    299 static void notify_parent(struct child * self, size_t cpu)
    300 {
    301 	for (;;) {
    302 		ssize_t ret;
    303 		ret = write(self->up_pipe[1], &cpu, sizeof(size_t));
    304 		if (ret == sizeof(size_t))
    305 			break;
    306 		if (ret < 0 && errno != EINTR) {
    307 			fprintf(stderr, "Failed to write child pipe with %s\n",
    308 			        strerror(errno));
    309 			exit(EXIT_FAILURE);
    310 		}
    311 	}
    312 }
    313 
    314 
    315 static void run_child(size_t cpu)
    316 {
    317 	struct child * self = &children[cpu];
    318 
    319 	self->pid = getpid();
    320 	self->sigusr1 = 0;
    321 	self->sigusr2 = 0;
    322 	self->sigterm = 0;
    323 
    324 	setup_signals();
    325 
    326 	set_affinity(cpu);
    327 
    328 	create_context(self);
    329 
    330 	write_pmu(self);
    331 
    332 	load_context(self);
    333 
    334 	notify_parent(self, cpu);
    335 
    336 	for (;;) {
    337 		sigset_t sigmask;
    338 		sigfillset(&sigmask);
    339 		sigdelset(&sigmask, SIGUSR1);
    340 		sigdelset(&sigmask, SIGUSR2);
    341 		sigdelset(&sigmask, SIGTERM);
    342 
    343 		if (self->sigusr1) {
    344 			printf("PFM_START on CPU%d\n", (int)cpu);
    345 			fflush(stdout);
    346 			perfmon_start_child(self->ctx_fd);
    347 			self->sigusr1 = 0;
    348 		}
    349 
    350 		if (self->sigusr2) {
    351 			printf("PFM_STOP on CPU%d\n", (int)cpu);
    352 			fflush(stdout);
    353 			perfmon_stop_child(self->ctx_fd);
    354 			self->sigusr2 = 0;
    355 		}
    356 
    357 		sigsuspend(&sigmask);
    358 	}
    359 }
    360 
    361 
    362 static void wait_for_child(struct child * child)
    363 {
    364 	size_t tmp;
    365 	for (;;) {
    366 		ssize_t ret;
    367 		ret = read(child->up_pipe[0], &tmp, sizeof(size_t));
    368 		if (ret == sizeof(size_t))
    369 			break;
    370 		if (ret < 0 && errno != EINTR) {
    371 			fprintf(stderr, "Failed to read child pipe with %s\n",
    372 			        strerror(errno));
    373 			exit(EXIT_FAILURE);
    374 		}
    375 	}
    376 	printf("Perfmon child up on CPU%d\n", (int)tmp);
    377 	fflush(stdout);
    378 
    379 	close(child->up_pipe[0]);
    380 	close(child->up_pipe[1]);
    381 }
    382 
    383 static struct child* xen_ctx;
    384 
    385 void perfmon_init(void)
    386 {
    387 	size_t i;
    388 	long nr;
    389 
    390 	if (cpu_type == CPU_TIMER_INT)
    391 		return;
    392 
    393 	if (!no_xen) {
    394 		xen_ctx = xmalloc(sizeof(struct child));
    395 		xen_ctx->pid = getpid();
    396 		xen_ctx->up_pipe[0] = -1;
    397 		xen_ctx->up_pipe[1] = -1;
    398 		xen_ctx->sigusr1 = 0;
    399 		xen_ctx->sigusr2 = 0;
    400 		xen_ctx->sigterm = 0;
    401 
    402 		create_context(xen_ctx);
    403 
    404 		write_pmu(xen_ctx);
    405 
    406 		load_context(xen_ctx);
    407 		return;
    408 	}
    409 
    410 
    411 	nr = sysconf(_SC_NPROCESSORS_ONLN);
    412 	if (nr == -1) {
    413 		fprintf(stderr, "Couldn't determine number of CPUs.\n");
    414 		exit(EXIT_FAILURE);
    415 	}
    416 
    417 	nr_cpus = nr;
    418 
    419 	children = xmalloc(sizeof(struct child) * nr_cpus);
    420 
    421 	for (i = 0; i < nr_cpus; ++i) {
    422 		int ret;
    423 
    424 		if (pipe(children[i].up_pipe)) {
    425 			perror("Couldn't create child pipe.\n");
    426 			exit(EXIT_FAILURE);
    427 		}
    428 
    429 		ret = fork();
    430 		if (ret == -1) {
    431 			fprintf(stderr, "Couldn't fork perfmon child.\n");
    432 			exit(EXIT_FAILURE);
    433 		} else if (ret == 0) {
    434 			printf("Running perfmon child on CPU%d.\n", (int)i);
    435 			fflush(stdout);
    436 			run_child(i);
    437 		} else {
    438 			children[i].pid = ret;
    439 			printf("Waiting on CPU%d\n", (int)i);
    440 			wait_for_child(&children[i]);
    441 		}
    442 	}
    443 }
    444 
    445 
    446 void perfmon_exit(void)
    447 {
    448 	size_t i;
    449 
    450 	if (cpu_type == CPU_TIMER_INT)
    451 		return;
    452 
    453 	if (!no_xen)
    454 		return;
    455 
    456 	for (i = 0; i < nr_cpus; ++i) {
    457 		kill(children[i].pid, SIGKILL);
    458 		waitpid(children[i].pid, NULL, 0);
    459 	}
    460 }
    461 
    462 
    463 void perfmon_start(void)
    464 {
    465 	size_t i;
    466 
    467 	if (cpu_type == CPU_TIMER_INT)
    468 		return;
    469 
    470 	if (!no_xen) {
    471 		perfmon_start_child(xen_ctx->ctx_fd);
    472 		return;
    473 	}
    474 
    475 	for (i = 0; i < nr_cpus; ++i)
    476 		kill(children[i].pid, SIGUSR1);
    477 }
    478 
    479 
    480 void perfmon_stop(void)
    481 {
    482 	size_t i;
    483 
    484 	if (cpu_type == CPU_TIMER_INT)
    485 		return;
    486 
    487 	if (!no_xen) {
    488 		perfmon_stop_child(xen_ctx->ctx_fd);
    489 		return;
    490 	}
    491 
    492 	for (i = 0; i < nr_cpus; ++i)
    493 		kill(children[i].pid, SIGUSR2);
    494 }
    495 
    496 #endif /* __ia64__ */
    497