Home | History | Annotate | Download | only in ia64
      1 /**
      2  * @file op_pmu.c
      3  * Setup and handling of IA64 Performance Monitoring Unit (PMU)
      4  *
      5  * @remark Copyright 2002 OProfile authors
      6  * @remark Read the file COPYING
      7  *
      8  * @author Bob Montgomery
      9  * @author Will Cohen
     10  * @author John Levon
     11  * @author Philippe Elie
     12  */
     13 
     14 
     15 #include "oprofile.h"
     16 #include "op_util.h"
     17 #include <asm/perfmon.h>
     18 #include "op_ia64_model.h"
     19 
     20 /* number of counters physically present */
     21 static uint op_nr_counters = 4;
     22 
     23 /* performance counters are in pairs: pmcN and pmdN.  The pmc register acts
     24  * as the event selection; the pmd register is the counter. */
     25 #define perf_reg(c)	((c)+4)
     26 
     27 #define IA64_1_PMD_MASK_VAL	((1UL << 32) - 1)
     28 #define IA64_2_PMD_MASK_VAL	((1UL << 47) - 1)
     29 
     30 /* The appropriate value is selected in pmu_init() */
     31 unsigned long pmd_mask = IA64_2_PMD_MASK_VAL;
     32 
     33 #define pmd_overflowed(r, c) ((r) & (1 << perf_reg(c)))
     34 #define set_pmd_neg(v, c) do { \
     35 	ia64_set_pmd(perf_reg(c), -(ulong)(v) & pmd_mask); \
     36 	ia64_srlz_d(); } while (0)
     37 #define set_pmd(v, c) do { \
     38 	ia64_set_pmd(perf_reg(c), (v) & pmd_mask); \
     39 	ia64_srlz_d(); } while (0)
     40 #define set_pmc(v, c) do { ia64_set_pmc(perf_reg(c), (v)); ia64_srlz_d(); } while (0)
     41 #define get_pmd(c) ia64_get_pmd(perf_reg(c))
     42 #define get_pmc(c) ia64_get_pmc(perf_reg(c))
     43 
     44 /* ---------------- IRQ handler ------------------ */
     45 
     46 /* The args match the args for pfm_overflow_handler in perfmon.c.
     47  * The task_struct is currently filled in with the perfmon "owner" of
     48  * the PMU.  This might change.  I'm not sure it makes sense in perfmon
     49  * either with system-wide profiling.
     50  * pmc0 is a bit mask for overflowed counters (bits 4-7)
     51  * This routine should return 0 to resume interrupts.
     52  */
     53 inline static void
     54 op_do_pmu_interrupt(u64 pmc0, struct pt_regs * regs)
     55 {
     56 	uint cpu = op_cpu_id();
     57 	int ctr;
     58 
     59 	for (ctr = 0 ; ctr < op_nr_counters ; ++ctr) {
     60 		if (pmd_overflowed(pmc0, ctr)) {
     61 			op_do_profile(cpu, regs->cr_iip, 1, ctr);
     62 			set_pmd_neg(oprof_data[cpu].ctr_count[ctr], ctr);
     63 		}
     64 	}
     65 	return;
     66 }
     67 
     68 
     69 static void
     70 op_raw_pmu_interrupt(int irq, void * arg, struct pt_regs * regs)
     71 {
     72 	u64 pmc0;
     73 
     74 	pmc0 = ia64_get_pmc(0);
     75 
     76 	if ((pmc0 & ~0x1UL) != 0UL) {
     77 		op_do_pmu_interrupt(pmc0, regs);
     78 		ia64_set_pmc(0, 0);
     79 		ia64_srlz_d();
     80 	}
     81 }
     82 
     83 
     84 #define MY_OPROFILE_VECTOR (IA64_PERFMON_VECTOR - 2)
     85 
     86 static void
     87 op_set_pmv(void * dummy)
     88 {
     89 	ia64_set_pmv(MY_OPROFILE_VECTOR);
     90 	ia64_srlz_d();
     91 }
     92 
     93 
     94 static void
     95 op_restore_pmv(void* dummy)
     96 {
     97 	ia64_set_pmv(IA64_PERFMON_VECTOR);
     98 	ia64_srlz_d();
     99 }
    100 
    101 
    102 static int
    103 install_handler(void)
    104 {
    105 	int err = 0;
    106 
    107 	/* Try it legally - confusion about vec vs irq */
    108 	err = request_irq(MY_OPROFILE_VECTOR, op_raw_pmu_interrupt,
    109 			SA_INTERRUPT | SA_PERCPU_IRQ, "oprofile", NULL);
    110 
    111 	if (err) {
    112 		printk(KERN_ALERT "oprofile_IA64: request_irq fails, "
    113 				"returns %d\n", err);
    114 		return err;
    115 	}
    116 
    117 	if ((smp_call_function(op_set_pmv, NULL, 0, 1))) {
    118 		printk(KERN_ALERT "oprofile_IA64: unexpected failure "
    119 				"of smp_call_function(op_set_pmv)\n");
    120 	}
    121 
    122 	op_set_pmv(NULL);
    123 
    124 	return err;
    125 }
    126 
    127 
    128 static int
    129 restore_handler(void)
    130 {
    131 	int err = 0;
    132 
    133 	if ((smp_call_function(op_restore_pmv, NULL, 0, 1))) {
    134 		printk(KERN_ALERT "oprofile_IA64: unexpected failure "
    135 				"of smp_call_function(op_restore_pmv)\n");
    136 	}
    137 
    138 	op_restore_pmv(NULL);
    139 
    140 	free_irq(MY_OPROFILE_VECTOR, NULL);
    141 	return err;
    142 }
    143 
    144 
    145 /* ---------------- PMU setup ------------------ */
    146 
    147 /* This is kind of artificial.  The proc interface might really want to
    148  * accept register values directly.  There are other features not exposed
    149  * by this limited interface.  Of course that might require all sorts of
    150  * validity checking??? */
    151 static void
    152 pmc_fill_in(ulong * val, u8 kernel, u8 user, u8 event, u8 um)
    153 {
    154 	/* enable interrupt generation */
    155 	*val |= (1 << 5);
    156 
    157 	/* setup as a privileged monitor */
    158 	*val |= (1 << 6);
    159 
    160 	/* McKinley requires pmc4 to have bit 23 set (enable PMU).
    161 	 * It is supposedly ignored in other pmc registers.
    162 	 * Try assuming it's ignored in Itanium, too, and just
    163 	 * set it for everyone.
    164 	 */
    165 
    166 	*val |= (1 << 23);
    167 
    168 	/* enable/disable chosen OS and USR counting */
    169 	(user)   ? (*val |= (1 << 3))
    170 		 : (*val &= ~(1 << 3));
    171 
    172 	(kernel) ? (*val |= (1 << 0))
    173 		 : (*val &= ~(1 << 0));
    174 
    175 	/* what are we counting ? */
    176 	*val &= ~(0xff << 8);
    177 	*val |= ((event & 0xff) << 8);
    178 	*val &= ~(0xf << 16);
    179 	*val |= ((um & 0xf) << 16);
    180 }
    181 
    182 
    183 static void
    184 pmu_setup(void * dummy)
    185 {
    186 	ulong pmc_val;
    187 	int ii;
    188 
    189 	/* setup each counter */
    190 	for (ii = 0 ; ii < op_nr_counters ; ++ii) {
    191 		if (sysctl.ctr[ii].enabled) {
    192 			pmc_val = 0;
    193 
    194 			set_pmd_neg(sysctl.ctr[ii].count, ii);
    195 			pmc_fill_in(&pmc_val, sysctl.ctr[ii].kernel,
    196 				sysctl.ctr[ii].user, sysctl.ctr[ii].event,
    197 				sysctl.ctr[ii].unit_mask);
    198 
    199 			set_pmc(pmc_val, ii);
    200 		}
    201 	}
    202 }
    203 
    204 
    205 void
    206 disable_psr(void * dummy)
    207 {
    208 	struct pt_regs * regs;
    209 	/* disable profiling for my saved state */
    210 	regs = (struct pt_regs *)((unsigned long) current + IA64_STK_OFFSET);
    211 	regs--;
    212 	ia64_psr(regs)->pp = 0;
    213 	/* shouldn't need to */
    214 	ia64_psr(regs)->up = 0;
    215 
    216 	/* disable profiling for my current state */
    217 	__asm__ __volatile__ ("rsm psr.pp;;"::: "memory");
    218 
    219 #if defined(CONFIG_PERFMON) && defined(CONFIG_SMP)
    220 #if V_AT_LEAST(2, 4, 21)
    221 	local_cpu_data->pfm_syst_info |=  PFM_CPUINFO_SYST_WIDE;
    222 	local_cpu_data->pfm_syst_info &= ~PFM_CPUINFO_DCR_PP;
    223 	/* FIXME: what todo with the 3rd flags PFM_CPUINFO_EXCL_IDLE 0x4 */
    224 #else
    225 	/* disable profiling for everyone else */
    226 	local_cpu_data->pfm_syst_wide = 1;
    227 	local_cpu_data->pfm_dcr_pp = 0;
    228 #endif
    229 #endif
    230 	ia64_set_pmc(0, 0);
    231 	ia64_srlz_d();
    232 }
    233 
    234 
    235 static int
    236 pmu_setup_all(void)
    237 {
    238 
    239 	/* This would be a great place to reserve all cpus with
    240 	 * some sort of call to perfmonctl (something like the
    241 	 * CREATE_CONTEXT command).  The current interface to
    242 	 * perfmonctl wants to be called from a different task id
    243 	 * for each CPU to be set up (and doesn't allow calls from
    244 	 * modules.
    245 	 */
    246 
    247 	/* disable profiling with the psr.pp bit */
    248 	if ((smp_call_function(disable_psr, NULL, 0, 1)))
    249 		return -EFAULT;
    250 
    251 	disable_psr(NULL);
    252 
    253 	/* now I've reserved the PMUs and they should be quiet */
    254 
    255 	if ((smp_call_function(pmu_setup, NULL, 0, 1)))
    256 		return -EFAULT;
    257 
    258 	pmu_setup(NULL);
    259 	return 0;
    260 }
    261 
    262 
    263 #ifndef CONFIG_SMP
    264 /* from linux/arch/ia64/kernel/perfmon.c */
    265 /*
    266  * Originaly Written by Ganesh Venkitachalam, IBM Corp.
    267  * Copyright (C) 1999 Ganesh Venkitachalam <venkitac (at) us.ibm.com>
    268  *
    269  * Modifications by Stephane Eranian, Hewlett-Packard Co.
    270  * Modifications by David Mosberger-Tang, Hewlett-Packard Co.
    271  *
    272  * Copyright (C) 1999-2002  Hewlett Packard Co
    273  *               Stephane Eranian <eranian (at) hpl.hp.com>
    274  *               David Mosberger-Tang <davidm (at) hpl.hp.com>
    275  */
    276 
    277 /*
    278  * On UP kernels, we do not need to constantly set the psr.pp bit
    279  * when a task is scheduled. The psr.pp bit can only be changed in
    280  * the kernel because of a user request. Given we are on a UP non preeemptive
    281  * kernel we know that no other task is running, so we cna simply update their
    282  * psr.pp from their saved state. There is this no impact on the context switch
    283  * code compared to the SMP case.
    284  */
    285 static void
    286 op_tasklist_toggle_pp(unsigned int val)
    287 {
    288 	struct task_struct * p;
    289 	struct pt_regs * regs;
    290 
    291 	read_lock(&tasklist_lock);
    292 
    293 	for_each_task(p) {
    294 		regs = (struct pt_regs *)((unsigned long) p + IA64_STK_OFFSET);
    295 
    296 		/*
    297 		 * position on pt_regs saved on stack on 1st entry into the kernel
    298 		 */
    299 		regs--;
    300 
    301 		/*
    302 		 * update psr.pp
    303 		 */
    304 		ia64_psr(regs)->pp = val;
    305 	}
    306 	read_unlock(&tasklist_lock);
    307 }
    308 #endif
    309 
    310 
    311 static void
    312 pmu_start(void * info)
    313 {
    314 	struct pt_regs * regs;
    315 
    316 	if (info && (*((uint *)info) != op_cpu_id()))
    317 		return;
    318 
    319 	/* printk(KERN_ALERT "oprofile_IA64: pmu_start on cpu %d\n",
    320 	  	op_cpu_id()); */
    321 	/* The default control register pp value is copied into psr.pp
    322 	 * on an interrupt.  This allows interrupt service routines to
    323 	 * be monitored.
    324 	 */
    325 	ia64_set_dcr(ia64_get_dcr() | IA64_DCR_PP);
    326 
    327 #ifdef CONFIG_PERFMON
    328 #ifdef CONFIG_SMP
    329 #if V_AT_LEAST(2, 4, 21)
    330 	local_cpu_data->pfm_syst_info |= PFM_CPUINFO_SYST_WIDE;
    331 	local_cpu_data->pfm_syst_info |= PFM_CPUINFO_DCR_PP;
    332 	/* FIXME: what todo with the 3rd flags PFM_CPUINFO_EXCL_IDLE 0x4 */
    333 #else
    334 	local_cpu_data->pfm_syst_wide = 1;
    335 	local_cpu_data->pfm_dcr_pp = 1;
    336 #endif
    337 #else
    338 	op_tasklist_toggle_pp(1);
    339 #endif
    340 #endif
    341 	/* set it in my saved state */
    342 	regs = (struct pt_regs *)((unsigned long) current + IA64_STK_OFFSET);
    343 	regs--;
    344 	ia64_psr(regs)->pp = 1;
    345 
    346 	/* set it in my current state */
    347 	__asm__ __volatile__ ("ssm psr.pp;;"::: "memory");
    348 	ia64_srlz_d();
    349 }
    350 
    351 
    352 static void
    353 pmu_stop(void * info)
    354 {
    355 	struct pt_regs * regs;
    356 
    357 	if (info && (*((uint *)info) != op_cpu_id()))
    358 		return;
    359 
    360 	/* stop in my current state */
    361 	__asm__ __volatile__ ("rsm psr.pp;;"::: "memory");
    362 
    363 	/* disable the dcr pp */
    364 	ia64_set_dcr(ia64_get_dcr() & ~IA64_DCR_PP);
    365 
    366 #ifdef CONFIG_PERFMON
    367 #ifdef CONFIG_SMP
    368 #if V_AT_LEAST(2, 4, 21)
    369 	local_cpu_data->pfm_syst_info &= ~PFM_CPUINFO_SYST_WIDE;
    370 	local_cpu_data->pfm_syst_info &= ~PFM_CPUINFO_DCR_PP;
    371 	/* FIXME: what todo with the 3rd flags PFM_CPUINFO_EXCL_IDLE 0x4 */
    372 #else
    373 	local_cpu_data->pfm_syst_wide = 0;
    374 	local_cpu_data->pfm_dcr_pp = 0;
    375 #endif
    376 #else
    377 	pfm_tasklist_toggle_pp(0);
    378 #endif
    379 #endif
    380 
    381 	/* disable in my saved state */
    382 	regs = (struct pt_regs *)((unsigned long) current + IA64_STK_OFFSET);
    383 	regs--;
    384 	ia64_psr(regs)->pp = 0;
    385 }
    386 
    387 
    388 static void
    389 pmu_select_start(uint cpu)
    390 {
    391 	if (cpu == op_cpu_id())
    392 		pmu_start(NULL);
    393 	else
    394 		smp_call_function(pmu_start, &cpu, 0, 1);
    395 }
    396 
    397 
    398 static void
    399 pmu_select_stop(uint cpu)
    400 {
    401 	if (cpu == op_cpu_id())
    402 		pmu_stop(NULL);
    403 	else
    404 		smp_call_function(pmu_stop, &cpu, 0, 1);
    405 }
    406 
    407 
    408 static void
    409 pmu_start_all(void)
    410 {
    411 	int cpu, i;
    412 
    413 	for (cpu=0; cpu < smp_num_cpus; cpu++) {
    414 		struct _oprof_data * data = &oprof_data[cpu];
    415 
    416 		for (i = 0 ; i < op_nr_counters ; ++i) {
    417 			if (sysctl.ctr[i].enabled) {
    418 				data->ctr_count[i] = sysctl.ctr[i].count;
    419 			} else {
    420 				data->ctr_count[i] = 0;
    421 			}
    422 		}
    423 	}
    424 
    425 	if (!install_handler()) {
    426 		smp_call_function(pmu_start, NULL, 0, 1);
    427 		pmu_start(NULL);
    428 	}
    429 		/* FIXME need some way to fail here */;
    430 }
    431 
    432 
    433 static void
    434 pmu_stop_all(void)
    435 {
    436 	smp_call_function(pmu_stop, NULL, 0, 1);
    437 	pmu_stop(NULL);
    438 	restore_handler();
    439 }
    440 
    441 
    442 static int
    443 pmu_check_params(void)
    444 {
    445 	int i;
    446 	int enabled = 0;
    447 
    448 	for (i = 0; i < op_nr_counters ; i++) {
    449 		if (!sysctl.ctr[i].enabled)
    450 			continue;
    451 
    452 		enabled = 1;
    453 
    454 		if (!sysctl.ctr[i].user && !sysctl.ctr[i].kernel) {
    455 			printk(KERN_ERR "oprofile: neither kernel nor user "
    456 			       "set for counter %d\n", i);
    457 			return -EINVAL;
    458 		}
    459 
    460 		if (check_range(sysctl.ctr[i].count, 1, OP_MAX_PERF_COUNT,
    461 			"ctr count value %d not in range (%d %ld)\n"))
    462 			return -EINVAL;
    463 	}
    464 
    465 	if (!enabled) {
    466 		printk(KERN_ERR "oprofile: no counters have been enabled.\n");
    467 		return -EINVAL;
    468 	}
    469 
    470 	return 0;
    471 }
    472 
    473 
    474 static struct op_msrs cpu_msrs[NR_CPUS];
    475 
    476 
    477 static void free_msr_group(struct op_msr_group * group)
    478 {
    479 	if (group->addrs)
    480 		kfree(group->addrs);
    481 	if (group->saved)
    482 		kfree(group->saved);
    483 	group->addrs = NULL;
    484 	group->saved = NULL;
    485 }
    486 
    487 
    488 static void pmu_save_registers(void * dummy)
    489 {
    490 	uint i;
    491 	uint const cpu = op_cpu_id();
    492 	struct op_msr_group * counters = &cpu_msrs[cpu].counters;
    493 	struct op_msr_group * controls = &cpu_msrs[cpu].controls;
    494 
    495 	counters->addrs = NULL;
    496 	counters->saved = NULL;
    497 	controls->addrs = NULL;
    498 	controls->saved = NULL;
    499 
    500 	counters->saved = kmalloc(
    501 		op_nr_counters * sizeof(struct op_saved_msr), GFP_KERNEL);
    502 	if (!counters->saved)
    503 		goto fault;
    504 
    505 	controls->saved = kmalloc(
    506 		op_nr_counters * sizeof(struct op_saved_msr), GFP_KERNEL);
    507 	if (!controls->saved)
    508 		goto fault;
    509 
    510 	for (i = 0; i < op_nr_counters; ++i) {
    511 		controls->saved[i].low = get_pmc(i);
    512 		counters->saved[i].low = get_pmd(i);
    513 	}
    514 	return;
    515 
    516 fault:
    517 	free_msr_group(counters);
    518 	free_msr_group(controls);
    519 }
    520 
    521 
    522 static void pmu_restore_registers(void * dummy)
    523 {
    524 	uint i;
    525 	uint const cpu = op_cpu_id();
    526 	struct op_msr_group * counters = &cpu_msrs[cpu].counters;
    527 	struct op_msr_group * controls = &cpu_msrs[cpu].controls;
    528 
    529 	for (i = 0; i < op_nr_counters; ++i) {
    530 		set_pmc(controls->saved[i].low, i);
    531 		set_pmd(counters->saved[i].low, i);
    532 	}
    533 
    534 	free_msr_group(counters);
    535 	free_msr_group(controls);
    536 }
    537 
    538 
    539 
    540 static int
    541 pmu_init(void)
    542 {
    543 	int err = 0;
    544 
    545 	/* figure out processor type configure number of bits in pmd
    546 	   and number of counters */
    547 	switch (get_cpu_type()) {
    548 	case CPU_IA64_1:
    549 		pmd_mask = IA64_1_PMD_MASK_VAL; break;
    550 	case CPU_IA64_2:
    551 	case CPU_IA64:
    552 		pmd_mask = IA64_2_PMD_MASK_VAL; break;
    553 	default:
    554 		err = -EIO; break;
    555 	}
    556 
    557 	op_nr_counters = 4;
    558 
    559 	if ((err = smp_call_function(pmu_save_registers, NULL, 0, 1)))
    560 		goto out;
    561 
    562 	pmu_save_registers(NULL);
    563 
    564 out:
    565 	return err;
    566 }
    567 
    568 
    569 static void
    570 pmu_deinit(void)
    571 {
    572 	smp_call_function(pmu_restore_registers, NULL, 0, 1);
    573 	pmu_restore_registers(NULL);
    574 }
    575 
    576 
    577 static char * names[] = { "0", "1", "2", "3", };
    578 
    579 
    580 static int
    581 pmu_add_sysctls(ctl_table * next)
    582 {
    583 	ctl_table * start = next;
    584 	ctl_table * tab;
    585 	int i, j;
    586 
    587 	for (i=0; i < op_nr_counters; i++) {
    588 		next->ctl_name = 1;
    589 		next->procname = names[i];
    590 		next->mode = 0700;
    591 
    592 		if (!(tab = kmalloc(sizeof(ctl_table)*7, GFP_KERNEL)))
    593 			goto cleanup;
    594 
    595 		next->child = tab;
    596 
    597 		memset(tab, 0, sizeof(ctl_table)*7);
    598 		tab[0] = ((ctl_table) { 1, "enabled", &sysctl_parms.ctr[i].enabled, sizeof(int), 0600, NULL, lproc_dointvec, NULL, });
    599 		tab[1] = ((ctl_table) { 1, "event", &sysctl_parms.ctr[i].event, sizeof(int), 0600, NULL, lproc_dointvec, NULL,  });
    600 		tab[2] = ((ctl_table) { 1, "count", &sysctl_parms.ctr[i].count, sizeof(int), 0600, NULL, lproc_dointvec, NULL, });
    601 		tab[3] = ((ctl_table) { 1, "unit_mask", &sysctl_parms.ctr[i].unit_mask, sizeof(int), 0600, NULL, lproc_dointvec, NULL, });
    602 		tab[4] = ((ctl_table) { 1, "kernel", &sysctl_parms.ctr[i].kernel, sizeof(int), 0600, NULL, lproc_dointvec, NULL, });
    603 		tab[5] = ((ctl_table) { 1, "user", &sysctl_parms.ctr[i].user, sizeof(int), 0600, NULL, lproc_dointvec, NULL, });
    604 		next++;
    605 	}
    606 
    607 	return 0;
    608 
    609 cleanup:
    610 	next = start;
    611 	for (j = 0; j < i; j++) {
    612 		kfree(next->child);
    613 		next++;
    614 	}
    615 	return -EFAULT;
    616 }
    617 
    618 
    619 static void pmu_remove_sysctls(ctl_table * next)
    620 {
    621 	int ii;
    622 
    623 	for (ii=0; ii < op_nr_counters; ii++) {
    624 		kfree(next->child);
    625 		next++;
    626 	}
    627 }
    628 
    629 
    630 struct op_int_operations op_nmi_ops = {
    631 	init: pmu_init,
    632 	deinit: pmu_deinit,
    633 	add_sysctls: pmu_add_sysctls,
    634 	remove_sysctls: pmu_remove_sysctls,
    635 	check_params: pmu_check_params,
    636 	setup: pmu_setup_all,
    637 	start: pmu_start_all,
    638 	stop: pmu_stop_all,
    639 	start_cpu: pmu_select_start,
    640 	stop_cpu: pmu_select_stop,
    641 };
    642 
    643 
    644 struct op_int_operations const * op_int_interface()
    645 {
    646 	return &op_nmi_ops;
    647 }
    648 
    649 /* Need this dummy so module/oprofile.c links */
    650 struct op_int_operations op_rtc_ops = {
    651 	init: NULL,
    652 	deinit: NULL,
    653 	add_sysctls: NULL,
    654 	remove_sysctls: NULL,
    655 	check_params: NULL,
    656 	setup: NULL,
    657 	start: NULL,
    658 	stop: NULL,
    659 	start_cpu: NULL,
    660 	stop_cpu: NULL,
    661 };
    662