1 /** 2 * @file op_pmu.c 3 * Setup and handling of IA64 Performance Monitoring Unit (PMU) 4 * 5 * @remark Copyright 2002 OProfile authors 6 * @remark Read the file COPYING 7 * 8 * @author Bob Montgomery 9 * @author Will Cohen 10 * @author John Levon 11 * @author Philippe Elie 12 */ 13 14 15 #include "oprofile.h" 16 #include "op_util.h" 17 #include <asm/perfmon.h> 18 #include "op_ia64_model.h" 19 20 /* number of counters physically present */ 21 static uint op_nr_counters = 4; 22 23 /* performance counters are in pairs: pmcN and pmdN. The pmc register acts 24 * as the event selection; the pmd register is the counter. */ 25 #define perf_reg(c) ((c)+4) 26 27 #define IA64_1_PMD_MASK_VAL ((1UL << 32) - 1) 28 #define IA64_2_PMD_MASK_VAL ((1UL << 47) - 1) 29 30 /* The appropriate value is selected in pmu_init() */ 31 unsigned long pmd_mask = IA64_2_PMD_MASK_VAL; 32 33 #define pmd_overflowed(r, c) ((r) & (1 << perf_reg(c))) 34 #define set_pmd_neg(v, c) do { \ 35 ia64_set_pmd(perf_reg(c), -(ulong)(v) & pmd_mask); \ 36 ia64_srlz_d(); } while (0) 37 #define set_pmd(v, c) do { \ 38 ia64_set_pmd(perf_reg(c), (v) & pmd_mask); \ 39 ia64_srlz_d(); } while (0) 40 #define set_pmc(v, c) do { ia64_set_pmc(perf_reg(c), (v)); ia64_srlz_d(); } while (0) 41 #define get_pmd(c) ia64_get_pmd(perf_reg(c)) 42 #define get_pmc(c) ia64_get_pmc(perf_reg(c)) 43 44 /* ---------------- IRQ handler ------------------ */ 45 46 /* The args match the args for pfm_overflow_handler in perfmon.c. 47 * The task_struct is currently filled in with the perfmon "owner" of 48 * the PMU. This might change. I'm not sure it makes sense in perfmon 49 * either with system-wide profiling. 50 * pmc0 is a bit mask for overflowed counters (bits 4-7) 51 * This routine should return 0 to resume interrupts. 52 */ 53 inline static void 54 op_do_pmu_interrupt(u64 pmc0, struct pt_regs * regs) 55 { 56 uint cpu = op_cpu_id(); 57 int ctr; 58 59 for (ctr = 0 ; ctr < op_nr_counters ; ++ctr) { 60 if (pmd_overflowed(pmc0, ctr)) { 61 op_do_profile(cpu, regs->cr_iip, 1, ctr); 62 set_pmd_neg(oprof_data[cpu].ctr_count[ctr], ctr); 63 } 64 } 65 return; 66 } 67 68 69 static void 70 op_raw_pmu_interrupt(int irq, void * arg, struct pt_regs * regs) 71 { 72 u64 pmc0; 73 74 pmc0 = ia64_get_pmc(0); 75 76 if ((pmc0 & ~0x1UL) != 0UL) { 77 op_do_pmu_interrupt(pmc0, regs); 78 ia64_set_pmc(0, 0); 79 ia64_srlz_d(); 80 } 81 } 82 83 84 #define MY_OPROFILE_VECTOR (IA64_PERFMON_VECTOR - 2) 85 86 static void 87 op_set_pmv(void * dummy) 88 { 89 ia64_set_pmv(MY_OPROFILE_VECTOR); 90 ia64_srlz_d(); 91 } 92 93 94 static void 95 op_restore_pmv(void* dummy) 96 { 97 ia64_set_pmv(IA64_PERFMON_VECTOR); 98 ia64_srlz_d(); 99 } 100 101 102 static int 103 install_handler(void) 104 { 105 int err = 0; 106 107 /* Try it legally - confusion about vec vs irq */ 108 err = request_irq(MY_OPROFILE_VECTOR, op_raw_pmu_interrupt, 109 SA_INTERRUPT | SA_PERCPU_IRQ, "oprofile", NULL); 110 111 if (err) { 112 printk(KERN_ALERT "oprofile_IA64: request_irq fails, " 113 "returns %d\n", err); 114 return err; 115 } 116 117 if ((smp_call_function(op_set_pmv, NULL, 0, 1))) { 118 printk(KERN_ALERT "oprofile_IA64: unexpected failure " 119 "of smp_call_function(op_set_pmv)\n"); 120 } 121 122 op_set_pmv(NULL); 123 124 return err; 125 } 126 127 128 static int 129 restore_handler(void) 130 { 131 int err = 0; 132 133 if ((smp_call_function(op_restore_pmv, NULL, 0, 1))) { 134 printk(KERN_ALERT "oprofile_IA64: unexpected failure " 135 "of smp_call_function(op_restore_pmv)\n"); 136 } 137 138 op_restore_pmv(NULL); 139 140 free_irq(MY_OPROFILE_VECTOR, NULL); 141 return err; 142 } 143 144 145 /* ---------------- PMU setup ------------------ */ 146 147 /* This is kind of artificial. The proc interface might really want to 148 * accept register values directly. There are other features not exposed 149 * by this limited interface. Of course that might require all sorts of 150 * validity checking??? */ 151 static void 152 pmc_fill_in(ulong * val, u8 kernel, u8 user, u8 event, u8 um) 153 { 154 /* enable interrupt generation */ 155 *val |= (1 << 5); 156 157 /* setup as a privileged monitor */ 158 *val |= (1 << 6); 159 160 /* McKinley requires pmc4 to have bit 23 set (enable PMU). 161 * It is supposedly ignored in other pmc registers. 162 * Try assuming it's ignored in Itanium, too, and just 163 * set it for everyone. 164 */ 165 166 *val |= (1 << 23); 167 168 /* enable/disable chosen OS and USR counting */ 169 (user) ? (*val |= (1 << 3)) 170 : (*val &= ~(1 << 3)); 171 172 (kernel) ? (*val |= (1 << 0)) 173 : (*val &= ~(1 << 0)); 174 175 /* what are we counting ? */ 176 *val &= ~(0xff << 8); 177 *val |= ((event & 0xff) << 8); 178 *val &= ~(0xf << 16); 179 *val |= ((um & 0xf) << 16); 180 } 181 182 183 static void 184 pmu_setup(void * dummy) 185 { 186 ulong pmc_val; 187 int ii; 188 189 /* setup each counter */ 190 for (ii = 0 ; ii < op_nr_counters ; ++ii) { 191 if (sysctl.ctr[ii].enabled) { 192 pmc_val = 0; 193 194 set_pmd_neg(sysctl.ctr[ii].count, ii); 195 pmc_fill_in(&pmc_val, sysctl.ctr[ii].kernel, 196 sysctl.ctr[ii].user, sysctl.ctr[ii].event, 197 sysctl.ctr[ii].unit_mask); 198 199 set_pmc(pmc_val, ii); 200 } 201 } 202 } 203 204 205 void 206 disable_psr(void * dummy) 207 { 208 struct pt_regs * regs; 209 /* disable profiling for my saved state */ 210 regs = (struct pt_regs *)((unsigned long) current + IA64_STK_OFFSET); 211 regs--; 212 ia64_psr(regs)->pp = 0; 213 /* shouldn't need to */ 214 ia64_psr(regs)->up = 0; 215 216 /* disable profiling for my current state */ 217 __asm__ __volatile__ ("rsm psr.pp;;"::: "memory"); 218 219 #if defined(CONFIG_PERFMON) && defined(CONFIG_SMP) 220 #if V_AT_LEAST(2, 4, 21) 221 local_cpu_data->pfm_syst_info |= PFM_CPUINFO_SYST_WIDE; 222 local_cpu_data->pfm_syst_info &= ~PFM_CPUINFO_DCR_PP; 223 /* FIXME: what todo with the 3rd flags PFM_CPUINFO_EXCL_IDLE 0x4 */ 224 #else 225 /* disable profiling for everyone else */ 226 local_cpu_data->pfm_syst_wide = 1; 227 local_cpu_data->pfm_dcr_pp = 0; 228 #endif 229 #endif 230 ia64_set_pmc(0, 0); 231 ia64_srlz_d(); 232 } 233 234 235 static int 236 pmu_setup_all(void) 237 { 238 239 /* This would be a great place to reserve all cpus with 240 * some sort of call to perfmonctl (something like the 241 * CREATE_CONTEXT command). The current interface to 242 * perfmonctl wants to be called from a different task id 243 * for each CPU to be set up (and doesn't allow calls from 244 * modules. 245 */ 246 247 /* disable profiling with the psr.pp bit */ 248 if ((smp_call_function(disable_psr, NULL, 0, 1))) 249 return -EFAULT; 250 251 disable_psr(NULL); 252 253 /* now I've reserved the PMUs and they should be quiet */ 254 255 if ((smp_call_function(pmu_setup, NULL, 0, 1))) 256 return -EFAULT; 257 258 pmu_setup(NULL); 259 return 0; 260 } 261 262 263 #ifndef CONFIG_SMP 264 /* from linux/arch/ia64/kernel/perfmon.c */ 265 /* 266 * Originaly Written by Ganesh Venkitachalam, IBM Corp. 267 * Copyright (C) 1999 Ganesh Venkitachalam <venkitac (at) us.ibm.com> 268 * 269 * Modifications by Stephane Eranian, Hewlett-Packard Co. 270 * Modifications by David Mosberger-Tang, Hewlett-Packard Co. 271 * 272 * Copyright (C) 1999-2002 Hewlett Packard Co 273 * Stephane Eranian <eranian (at) hpl.hp.com> 274 * David Mosberger-Tang <davidm (at) hpl.hp.com> 275 */ 276 277 /* 278 * On UP kernels, we do not need to constantly set the psr.pp bit 279 * when a task is scheduled. The psr.pp bit can only be changed in 280 * the kernel because of a user request. Given we are on a UP non preeemptive 281 * kernel we know that no other task is running, so we cna simply update their 282 * psr.pp from their saved state. There is this no impact on the context switch 283 * code compared to the SMP case. 284 */ 285 static void 286 op_tasklist_toggle_pp(unsigned int val) 287 { 288 struct task_struct * p; 289 struct pt_regs * regs; 290 291 read_lock(&tasklist_lock); 292 293 for_each_task(p) { 294 regs = (struct pt_regs *)((unsigned long) p + IA64_STK_OFFSET); 295 296 /* 297 * position on pt_regs saved on stack on 1st entry into the kernel 298 */ 299 regs--; 300 301 /* 302 * update psr.pp 303 */ 304 ia64_psr(regs)->pp = val; 305 } 306 read_unlock(&tasklist_lock); 307 } 308 #endif 309 310 311 static void 312 pmu_start(void * info) 313 { 314 struct pt_regs * regs; 315 316 if (info && (*((uint *)info) != op_cpu_id())) 317 return; 318 319 /* printk(KERN_ALERT "oprofile_IA64: pmu_start on cpu %d\n", 320 op_cpu_id()); */ 321 /* The default control register pp value is copied into psr.pp 322 * on an interrupt. This allows interrupt service routines to 323 * be monitored. 324 */ 325 ia64_set_dcr(ia64_get_dcr() | IA64_DCR_PP); 326 327 #ifdef CONFIG_PERFMON 328 #ifdef CONFIG_SMP 329 #if V_AT_LEAST(2, 4, 21) 330 local_cpu_data->pfm_syst_info |= PFM_CPUINFO_SYST_WIDE; 331 local_cpu_data->pfm_syst_info |= PFM_CPUINFO_DCR_PP; 332 /* FIXME: what todo with the 3rd flags PFM_CPUINFO_EXCL_IDLE 0x4 */ 333 #else 334 local_cpu_data->pfm_syst_wide = 1; 335 local_cpu_data->pfm_dcr_pp = 1; 336 #endif 337 #else 338 op_tasklist_toggle_pp(1); 339 #endif 340 #endif 341 /* set it in my saved state */ 342 regs = (struct pt_regs *)((unsigned long) current + IA64_STK_OFFSET); 343 regs--; 344 ia64_psr(regs)->pp = 1; 345 346 /* set it in my current state */ 347 __asm__ __volatile__ ("ssm psr.pp;;"::: "memory"); 348 ia64_srlz_d(); 349 } 350 351 352 static void 353 pmu_stop(void * info) 354 { 355 struct pt_regs * regs; 356 357 if (info && (*((uint *)info) != op_cpu_id())) 358 return; 359 360 /* stop in my current state */ 361 __asm__ __volatile__ ("rsm psr.pp;;"::: "memory"); 362 363 /* disable the dcr pp */ 364 ia64_set_dcr(ia64_get_dcr() & ~IA64_DCR_PP); 365 366 #ifdef CONFIG_PERFMON 367 #ifdef CONFIG_SMP 368 #if V_AT_LEAST(2, 4, 21) 369 local_cpu_data->pfm_syst_info &= ~PFM_CPUINFO_SYST_WIDE; 370 local_cpu_data->pfm_syst_info &= ~PFM_CPUINFO_DCR_PP; 371 /* FIXME: what todo with the 3rd flags PFM_CPUINFO_EXCL_IDLE 0x4 */ 372 #else 373 local_cpu_data->pfm_syst_wide = 0; 374 local_cpu_data->pfm_dcr_pp = 0; 375 #endif 376 #else 377 pfm_tasklist_toggle_pp(0); 378 #endif 379 #endif 380 381 /* disable in my saved state */ 382 regs = (struct pt_regs *)((unsigned long) current + IA64_STK_OFFSET); 383 regs--; 384 ia64_psr(regs)->pp = 0; 385 } 386 387 388 static void 389 pmu_select_start(uint cpu) 390 { 391 if (cpu == op_cpu_id()) 392 pmu_start(NULL); 393 else 394 smp_call_function(pmu_start, &cpu, 0, 1); 395 } 396 397 398 static void 399 pmu_select_stop(uint cpu) 400 { 401 if (cpu == op_cpu_id()) 402 pmu_stop(NULL); 403 else 404 smp_call_function(pmu_stop, &cpu, 0, 1); 405 } 406 407 408 static void 409 pmu_start_all(void) 410 { 411 int cpu, i; 412 413 for (cpu=0; cpu < smp_num_cpus; cpu++) { 414 struct _oprof_data * data = &oprof_data[cpu]; 415 416 for (i = 0 ; i < op_nr_counters ; ++i) { 417 if (sysctl.ctr[i].enabled) { 418 data->ctr_count[i] = sysctl.ctr[i].count; 419 } else { 420 data->ctr_count[i] = 0; 421 } 422 } 423 } 424 425 if (!install_handler()) { 426 smp_call_function(pmu_start, NULL, 0, 1); 427 pmu_start(NULL); 428 } 429 /* FIXME need some way to fail here */; 430 } 431 432 433 static void 434 pmu_stop_all(void) 435 { 436 smp_call_function(pmu_stop, NULL, 0, 1); 437 pmu_stop(NULL); 438 restore_handler(); 439 } 440 441 442 static int 443 pmu_check_params(void) 444 { 445 int i; 446 int enabled = 0; 447 448 for (i = 0; i < op_nr_counters ; i++) { 449 if (!sysctl.ctr[i].enabled) 450 continue; 451 452 enabled = 1; 453 454 if (!sysctl.ctr[i].user && !sysctl.ctr[i].kernel) { 455 printk(KERN_ERR "oprofile: neither kernel nor user " 456 "set for counter %d\n", i); 457 return -EINVAL; 458 } 459 460 if (check_range(sysctl.ctr[i].count, 1, OP_MAX_PERF_COUNT, 461 "ctr count value %d not in range (%d %ld)\n")) 462 return -EINVAL; 463 } 464 465 if (!enabled) { 466 printk(KERN_ERR "oprofile: no counters have been enabled.\n"); 467 return -EINVAL; 468 } 469 470 return 0; 471 } 472 473 474 static struct op_msrs cpu_msrs[NR_CPUS]; 475 476 477 static void free_msr_group(struct op_msr_group * group) 478 { 479 if (group->addrs) 480 kfree(group->addrs); 481 if (group->saved) 482 kfree(group->saved); 483 group->addrs = NULL; 484 group->saved = NULL; 485 } 486 487 488 static void pmu_save_registers(void * dummy) 489 { 490 uint i; 491 uint const cpu = op_cpu_id(); 492 struct op_msr_group * counters = &cpu_msrs[cpu].counters; 493 struct op_msr_group * controls = &cpu_msrs[cpu].controls; 494 495 counters->addrs = NULL; 496 counters->saved = NULL; 497 controls->addrs = NULL; 498 controls->saved = NULL; 499 500 counters->saved = kmalloc( 501 op_nr_counters * sizeof(struct op_saved_msr), GFP_KERNEL); 502 if (!counters->saved) 503 goto fault; 504 505 controls->saved = kmalloc( 506 op_nr_counters * sizeof(struct op_saved_msr), GFP_KERNEL); 507 if (!controls->saved) 508 goto fault; 509 510 for (i = 0; i < op_nr_counters; ++i) { 511 controls->saved[i].low = get_pmc(i); 512 counters->saved[i].low = get_pmd(i); 513 } 514 return; 515 516 fault: 517 free_msr_group(counters); 518 free_msr_group(controls); 519 } 520 521 522 static void pmu_restore_registers(void * dummy) 523 { 524 uint i; 525 uint const cpu = op_cpu_id(); 526 struct op_msr_group * counters = &cpu_msrs[cpu].counters; 527 struct op_msr_group * controls = &cpu_msrs[cpu].controls; 528 529 for (i = 0; i < op_nr_counters; ++i) { 530 set_pmc(controls->saved[i].low, i); 531 set_pmd(counters->saved[i].low, i); 532 } 533 534 free_msr_group(counters); 535 free_msr_group(controls); 536 } 537 538 539 540 static int 541 pmu_init(void) 542 { 543 int err = 0; 544 545 /* figure out processor type configure number of bits in pmd 546 and number of counters */ 547 switch (get_cpu_type()) { 548 case CPU_IA64_1: 549 pmd_mask = IA64_1_PMD_MASK_VAL; break; 550 case CPU_IA64_2: 551 case CPU_IA64: 552 pmd_mask = IA64_2_PMD_MASK_VAL; break; 553 default: 554 err = -EIO; break; 555 } 556 557 op_nr_counters = 4; 558 559 if ((err = smp_call_function(pmu_save_registers, NULL, 0, 1))) 560 goto out; 561 562 pmu_save_registers(NULL); 563 564 out: 565 return err; 566 } 567 568 569 static void 570 pmu_deinit(void) 571 { 572 smp_call_function(pmu_restore_registers, NULL, 0, 1); 573 pmu_restore_registers(NULL); 574 } 575 576 577 static char * names[] = { "0", "1", "2", "3", }; 578 579 580 static int 581 pmu_add_sysctls(ctl_table * next) 582 { 583 ctl_table * start = next; 584 ctl_table * tab; 585 int i, j; 586 587 for (i=0; i < op_nr_counters; i++) { 588 next->ctl_name = 1; 589 next->procname = names[i]; 590 next->mode = 0700; 591 592 if (!(tab = kmalloc(sizeof(ctl_table)*7, GFP_KERNEL))) 593 goto cleanup; 594 595 next->child = tab; 596 597 memset(tab, 0, sizeof(ctl_table)*7); 598 tab[0] = ((ctl_table) { 1, "enabled", &sysctl_parms.ctr[i].enabled, sizeof(int), 0600, NULL, lproc_dointvec, NULL, }); 599 tab[1] = ((ctl_table) { 1, "event", &sysctl_parms.ctr[i].event, sizeof(int), 0600, NULL, lproc_dointvec, NULL, }); 600 tab[2] = ((ctl_table) { 1, "count", &sysctl_parms.ctr[i].count, sizeof(int), 0600, NULL, lproc_dointvec, NULL, }); 601 tab[3] = ((ctl_table) { 1, "unit_mask", &sysctl_parms.ctr[i].unit_mask, sizeof(int), 0600, NULL, lproc_dointvec, NULL, }); 602 tab[4] = ((ctl_table) { 1, "kernel", &sysctl_parms.ctr[i].kernel, sizeof(int), 0600, NULL, lproc_dointvec, NULL, }); 603 tab[5] = ((ctl_table) { 1, "user", &sysctl_parms.ctr[i].user, sizeof(int), 0600, NULL, lproc_dointvec, NULL, }); 604 next++; 605 } 606 607 return 0; 608 609 cleanup: 610 next = start; 611 for (j = 0; j < i; j++) { 612 kfree(next->child); 613 next++; 614 } 615 return -EFAULT; 616 } 617 618 619 static void pmu_remove_sysctls(ctl_table * next) 620 { 621 int ii; 622 623 for (ii=0; ii < op_nr_counters; ii++) { 624 kfree(next->child); 625 next++; 626 } 627 } 628 629 630 struct op_int_operations op_nmi_ops = { 631 init: pmu_init, 632 deinit: pmu_deinit, 633 add_sysctls: pmu_add_sysctls, 634 remove_sysctls: pmu_remove_sysctls, 635 check_params: pmu_check_params, 636 setup: pmu_setup_all, 637 start: pmu_start_all, 638 stop: pmu_stop_all, 639 start_cpu: pmu_select_start, 640 stop_cpu: pmu_select_stop, 641 }; 642 643 644 struct op_int_operations const * op_int_interface() 645 { 646 return &op_nmi_ops; 647 } 648 649 /* Need this dummy so module/oprofile.c links */ 650 struct op_int_operations op_rtc_ops = { 651 init: NULL, 652 deinit: NULL, 653 add_sysctls: NULL, 654 remove_sysctls: NULL, 655 check_params: NULL, 656 setup: NULL, 657 start: NULL, 658 stop: NULL, 659 start_cpu: NULL, 660 stop_cpu: NULL, 661 }; 662