1 #include "builtin.h" 2 #include "perf.h" 3 4 #include "util/util.h" 5 #include "util/cache.h" 6 #include "util/symbol.h" 7 #include "util/thread.h" 8 #include "util/header.h" 9 #include "util/session.h" 10 11 #include "util/parse-options.h" 12 #include "util/trace-event.h" 13 14 #include "util/debug.h" 15 16 /* ANDROID_CHANGE_BEGIN */ 17 #ifndef __APPLE__ 18 #include <sys/prctl.h> 19 #endif 20 /* ANDROID_CHANGE_END */ 21 22 #include <semaphore.h> 23 /* ANDROID_CHANGE_BEGIN */ 24 #ifdef __BIONIC__ 25 /* PTHREAD_STACK_MIN is defined as (2 * PAGE_SIZE) */ 26 #include <asm/page.h> 27 #endif 28 /* ANDROID_CHANGE_END */ 29 #include <pthread.h> 30 #include <math.h> 31 32 static char const *input_name = "perf.data"; 33 34 static char default_sort_order[] = "avg, max, switch, runtime"; 35 static const char *sort_order = default_sort_order; 36 37 static int profile_cpu = -1; 38 39 #define PR_SET_NAME 15 /* Set process name */ 40 #define MAX_CPUS 4096 41 42 static u64 run_measurement_overhead; 43 static u64 sleep_measurement_overhead; 44 45 #define COMM_LEN 20 46 #define SYM_LEN 129 47 48 #define MAX_PID 65536 49 50 static unsigned long nr_tasks; 51 52 struct sched_atom; 53 54 struct task_desc { 55 unsigned long nr; 56 unsigned long pid; 57 char comm[COMM_LEN]; 58 59 unsigned long nr_events; 60 unsigned long curr_event; 61 struct sched_atom **atoms; 62 63 pthread_t thread; 64 sem_t sleep_sem; 65 66 sem_t ready_for_work; 67 sem_t work_done_sem; 68 69 u64 cpu_usage; 70 }; 71 72 enum sched_event_type { 73 SCHED_EVENT_RUN, 74 SCHED_EVENT_SLEEP, 75 SCHED_EVENT_WAKEUP, 76 SCHED_EVENT_MIGRATION, 77 }; 78 79 struct sched_atom { 80 enum sched_event_type type; 81 int specific_wait; 82 u64 timestamp; 83 u64 duration; 84 unsigned long nr; 85 sem_t *wait_sem; 86 struct task_desc *wakee; 87 }; 88 89 static struct task_desc *pid_to_task[MAX_PID]; 90 91 static struct task_desc **tasks; 92 93 static pthread_mutex_t start_work_mutex = PTHREAD_MUTEX_INITIALIZER; 94 static u64 start_time; 95 96 static pthread_mutex_t work_done_wait_mutex = PTHREAD_MUTEX_INITIALIZER; 97 98 static unsigned long nr_run_events; 99 static unsigned long nr_sleep_events; 100 static unsigned long nr_wakeup_events; 101 102 static unsigned long nr_sleep_corrections; 103 static unsigned long nr_run_events_optimized; 104 105 static unsigned long targetless_wakeups; 106 static unsigned long multitarget_wakeups; 107 108 static u64 cpu_usage; 109 static u64 runavg_cpu_usage; 110 static u64 parent_cpu_usage; 111 static u64 runavg_parent_cpu_usage; 112 113 static unsigned long nr_runs; 114 static u64 sum_runtime; 115 static u64 sum_fluct; 116 static u64 run_avg; 117 118 static unsigned int replay_repeat = 10; 119 static unsigned long nr_timestamps; 120 static unsigned long nr_unordered_timestamps; 121 static unsigned long nr_state_machine_bugs; 122 static unsigned long nr_context_switch_bugs; 123 static unsigned long nr_events; 124 static unsigned long nr_lost_chunks; 125 static unsigned long nr_lost_events; 126 127 #define TASK_STATE_TO_CHAR_STR "RSDTtZX" 128 129 enum thread_state { 130 THREAD_SLEEPING = 0, 131 THREAD_WAIT_CPU, 132 THREAD_SCHED_IN, 133 THREAD_IGNORE 134 }; 135 136 struct work_atom { 137 struct list_head list; 138 enum thread_state state; 139 u64 sched_out_time; 140 u64 wake_up_time; 141 u64 sched_in_time; 142 u64 runtime; 143 }; 144 145 struct work_atoms { 146 struct list_head work_list; 147 struct thread *thread; 148 struct rb_node node; 149 u64 max_lat; 150 u64 max_lat_at; 151 u64 total_lat; 152 u64 nb_atoms; 153 u64 total_runtime; 154 }; 155 156 typedef int (*sort_fn_t)(struct work_atoms *, struct work_atoms *); 157 158 static struct rb_root atom_root, sorted_atom_root; 159 160 static u64 all_runtime; 161 static u64 all_count; 162 163 164 static u64 get_nsecs(void) 165 { 166 /* ANDROID_CHANGE_BEGIN */ 167 #ifndef __APPLE__ 168 struct timespec ts; 169 170 clock_gettime(CLOCK_MONOTONIC, &ts); 171 172 return ts.tv_sec * 1000000000ULL + ts.tv_nsec; 173 #else 174 return 0; 175 #endif 176 /* ANDROID_CHANGE_END */ 177 } 178 179 static void burn_nsecs(u64 nsecs) 180 { 181 u64 T0 = get_nsecs(), T1; 182 183 do { 184 T1 = get_nsecs(); 185 } while (T1 + run_measurement_overhead < T0 + nsecs); 186 } 187 188 static void sleep_nsecs(u64 nsecs) 189 { 190 struct timespec ts; 191 192 ts.tv_nsec = nsecs % 999999999; 193 ts.tv_sec = nsecs / 999999999; 194 195 nanosleep(&ts, NULL); 196 } 197 198 static void calibrate_run_measurement_overhead(void) 199 { 200 u64 T0, T1, delta, min_delta = 1000000000ULL; 201 int i; 202 203 for (i = 0; i < 10; i++) { 204 T0 = get_nsecs(); 205 burn_nsecs(0); 206 T1 = get_nsecs(); 207 delta = T1-T0; 208 min_delta = min(min_delta, delta); 209 } 210 run_measurement_overhead = min_delta; 211 212 printf("run measurement overhead: %" PRIu64 " nsecs\n", min_delta); 213 } 214 215 static void calibrate_sleep_measurement_overhead(void) 216 { 217 u64 T0, T1, delta, min_delta = 1000000000ULL; 218 int i; 219 220 for (i = 0; i < 10; i++) { 221 T0 = get_nsecs(); 222 sleep_nsecs(10000); 223 T1 = get_nsecs(); 224 delta = T1-T0; 225 min_delta = min(min_delta, delta); 226 } 227 min_delta -= 10000; 228 sleep_measurement_overhead = min_delta; 229 230 printf("sleep measurement overhead: %" PRIu64 " nsecs\n", min_delta); 231 } 232 233 static struct sched_atom * 234 get_new_event(struct task_desc *task, u64 timestamp) 235 { 236 struct sched_atom *event = zalloc(sizeof(*event)); 237 unsigned long idx = task->nr_events; 238 size_t size; 239 240 event->timestamp = timestamp; 241 event->nr = idx; 242 243 task->nr_events++; 244 size = sizeof(struct sched_atom *) * task->nr_events; 245 task->atoms = realloc(task->atoms, size); 246 BUG_ON(!task->atoms); 247 248 task->atoms[idx] = event; 249 250 return event; 251 } 252 253 static struct sched_atom *last_event(struct task_desc *task) 254 { 255 if (!task->nr_events) 256 return NULL; 257 258 return task->atoms[task->nr_events - 1]; 259 } 260 261 static void 262 add_sched_event_run(struct task_desc *task, u64 timestamp, u64 duration) 263 { 264 struct sched_atom *event, *curr_event = last_event(task); 265 266 /* 267 * optimize an existing RUN event by merging this one 268 * to it: 269 */ 270 if (curr_event && curr_event->type == SCHED_EVENT_RUN) { 271 nr_run_events_optimized++; 272 curr_event->duration += duration; 273 return; 274 } 275 276 event = get_new_event(task, timestamp); 277 278 event->type = SCHED_EVENT_RUN; 279 event->duration = duration; 280 281 nr_run_events++; 282 } 283 284 static void 285 add_sched_event_wakeup(struct task_desc *task, u64 timestamp, 286 struct task_desc *wakee) 287 { 288 struct sched_atom *event, *wakee_event; 289 290 event = get_new_event(task, timestamp); 291 event->type = SCHED_EVENT_WAKEUP; 292 event->wakee = wakee; 293 294 wakee_event = last_event(wakee); 295 if (!wakee_event || wakee_event->type != SCHED_EVENT_SLEEP) { 296 targetless_wakeups++; 297 return; 298 } 299 if (wakee_event->wait_sem) { 300 multitarget_wakeups++; 301 return; 302 } 303 304 wakee_event->wait_sem = zalloc(sizeof(*wakee_event->wait_sem)); 305 sem_init(wakee_event->wait_sem, 0, 0); 306 wakee_event->specific_wait = 1; 307 event->wait_sem = wakee_event->wait_sem; 308 309 nr_wakeup_events++; 310 } 311 312 static void 313 add_sched_event_sleep(struct task_desc *task, u64 timestamp, 314 u64 task_state __used) 315 { 316 struct sched_atom *event = get_new_event(task, timestamp); 317 318 event->type = SCHED_EVENT_SLEEP; 319 320 nr_sleep_events++; 321 } 322 323 static struct task_desc *register_pid(unsigned long pid, const char *comm) 324 { 325 struct task_desc *task; 326 327 BUG_ON(pid >= MAX_PID); 328 329 task = pid_to_task[pid]; 330 331 if (task) 332 return task; 333 334 task = zalloc(sizeof(*task)); 335 task->pid = pid; 336 task->nr = nr_tasks; 337 strcpy(task->comm, comm); 338 /* 339 * every task starts in sleeping state - this gets ignored 340 * if there's no wakeup pointing to this sleep state: 341 */ 342 add_sched_event_sleep(task, 0, 0); 343 344 pid_to_task[pid] = task; 345 nr_tasks++; 346 tasks = realloc(tasks, nr_tasks*sizeof(struct task_task *)); 347 BUG_ON(!tasks); 348 tasks[task->nr] = task; 349 350 if (verbose) 351 printf("registered task #%ld, PID %ld (%s)\n", nr_tasks, pid, comm); 352 353 return task; 354 } 355 356 357 static void print_task_traces(void) 358 { 359 struct task_desc *task; 360 unsigned long i; 361 362 for (i = 0; i < nr_tasks; i++) { 363 task = tasks[i]; 364 printf("task %6ld (%20s:%10ld), nr_events: %ld\n", 365 task->nr, task->comm, task->pid, task->nr_events); 366 } 367 } 368 369 static void add_cross_task_wakeups(void) 370 { 371 struct task_desc *task1, *task2; 372 unsigned long i, j; 373 374 for (i = 0; i < nr_tasks; i++) { 375 task1 = tasks[i]; 376 j = i + 1; 377 if (j == nr_tasks) 378 j = 0; 379 task2 = tasks[j]; 380 add_sched_event_wakeup(task1, 0, task2); 381 } 382 } 383 384 static void 385 process_sched_event(struct task_desc *this_task __used, struct sched_atom *atom) 386 { 387 int ret = 0; 388 389 switch (atom->type) { 390 case SCHED_EVENT_RUN: 391 burn_nsecs(atom->duration); 392 break; 393 case SCHED_EVENT_SLEEP: 394 if (atom->wait_sem) 395 ret = sem_wait(atom->wait_sem); 396 BUG_ON(ret); 397 break; 398 case SCHED_EVENT_WAKEUP: 399 if (atom->wait_sem) 400 ret = sem_post(atom->wait_sem); 401 BUG_ON(ret); 402 break; 403 case SCHED_EVENT_MIGRATION: 404 break; 405 default: 406 BUG_ON(1); 407 } 408 } 409 410 static u64 get_cpu_usage_nsec_parent(void) 411 { 412 struct rusage ru; 413 u64 sum; 414 int err; 415 416 err = getrusage(RUSAGE_SELF, &ru); 417 BUG_ON(err); 418 419 sum = ru.ru_utime.tv_sec*1e9 + ru.ru_utime.tv_usec*1e3; 420 sum += ru.ru_stime.tv_sec*1e9 + ru.ru_stime.tv_usec*1e3; 421 422 return sum; 423 } 424 425 static int self_open_counters(void) 426 { 427 /* ANDROID_CHANGE_BEGIN */ 428 #ifndef __APPLE__ 429 struct perf_event_attr attr; 430 int fd; 431 432 memset(&attr, 0, sizeof(attr)); 433 434 attr.type = PERF_TYPE_SOFTWARE; 435 attr.config = PERF_COUNT_SW_TASK_CLOCK; 436 437 fd = sys_perf_event_open(&attr, 0, -1, -1, 0); 438 439 if (fd < 0) 440 die("Error: sys_perf_event_open() syscall returned" 441 "with %d (%s)\n", fd, strerror(errno)); 442 return fd; 443 #else 444 die("Error: sys_perf_event_open() syscall not supported on host\n"); 445 #endif 446 /* ANDROID_CHANGE_END */ 447 } 448 449 static u64 get_cpu_usage_nsec_self(int fd) 450 { 451 u64 runtime; 452 int ret; 453 454 ret = read(fd, &runtime, sizeof(runtime)); 455 BUG_ON(ret != sizeof(runtime)); 456 457 return runtime; 458 } 459 460 static void *thread_func(void *ctx) 461 { 462 /* ANDROID_CHANGE_BEGIN */ 463 #ifndef __APPLE__ 464 /* ANDROID_CHANGE_END */ 465 struct task_desc *this_task = ctx; 466 u64 cpu_usage_0, cpu_usage_1; 467 unsigned long i, ret; 468 char comm2[22]; 469 int fd; 470 471 sprintf(comm2, ":%s", this_task->comm); 472 prctl(PR_SET_NAME, comm2); 473 fd = self_open_counters(); 474 475 again: 476 ret = sem_post(&this_task->ready_for_work); 477 BUG_ON(ret); 478 ret = pthread_mutex_lock(&start_work_mutex); 479 BUG_ON(ret); 480 ret = pthread_mutex_unlock(&start_work_mutex); 481 BUG_ON(ret); 482 483 cpu_usage_0 = get_cpu_usage_nsec_self(fd); 484 485 for (i = 0; i < this_task->nr_events; i++) { 486 this_task->curr_event = i; 487 process_sched_event(this_task, this_task->atoms[i]); 488 } 489 490 cpu_usage_1 = get_cpu_usage_nsec_self(fd); 491 this_task->cpu_usage = cpu_usage_1 - cpu_usage_0; 492 ret = sem_post(&this_task->work_done_sem); 493 BUG_ON(ret); 494 495 ret = pthread_mutex_lock(&work_done_wait_mutex); 496 BUG_ON(ret); 497 ret = pthread_mutex_unlock(&work_done_wait_mutex); 498 BUG_ON(ret); 499 500 goto again; 501 /* ANDROID_CHANGE_BEGIN */ 502 #endif // __APPLE__ 503 /* ANDROID_CHANGE_END */ 504 /* ANDROID_CHANGE_BEGIN */ 505 #if defined(__BIONIC__) || defined(__APPLE__) 506 return NULL; 507 #endif 508 /* ANDROID_CHANGE_END */ 509 } 510 511 static void create_tasks(void) 512 { 513 struct task_desc *task; 514 pthread_attr_t attr; 515 unsigned long i; 516 int err; 517 518 err = pthread_attr_init(&attr); 519 BUG_ON(err); 520 /* ANDROID_CHANGE_BEGIN */ 521 #if 0 522 err = pthread_attr_setstacksize(&attr, 523 (size_t) max(16 * 1024, PTHREAD_STACK_MIN)); 524 #else 525 err = pthread_attr_setstacksize(&attr, 526 (size_t) max((unsigned) 16 * 1024, (unsigned) PTHREAD_STACK_MIN)); 527 #endif 528 /* ANDROID_CHANGE_END */ 529 BUG_ON(err); 530 err = pthread_mutex_lock(&start_work_mutex); 531 BUG_ON(err); 532 err = pthread_mutex_lock(&work_done_wait_mutex); 533 BUG_ON(err); 534 for (i = 0; i < nr_tasks; i++) { 535 task = tasks[i]; 536 sem_init(&task->sleep_sem, 0, 0); 537 sem_init(&task->ready_for_work, 0, 0); 538 sem_init(&task->work_done_sem, 0, 0); 539 task->curr_event = 0; 540 err = pthread_create(&task->thread, &attr, thread_func, task); 541 BUG_ON(err); 542 } 543 } 544 545 static void wait_for_tasks(void) 546 { 547 u64 cpu_usage_0, cpu_usage_1; 548 struct task_desc *task; 549 unsigned long i, ret; 550 551 start_time = get_nsecs(); 552 cpu_usage = 0; 553 pthread_mutex_unlock(&work_done_wait_mutex); 554 555 for (i = 0; i < nr_tasks; i++) { 556 task = tasks[i]; 557 ret = sem_wait(&task->ready_for_work); 558 BUG_ON(ret); 559 sem_init(&task->ready_for_work, 0, 0); 560 } 561 ret = pthread_mutex_lock(&work_done_wait_mutex); 562 BUG_ON(ret); 563 564 cpu_usage_0 = get_cpu_usage_nsec_parent(); 565 566 pthread_mutex_unlock(&start_work_mutex); 567 568 for (i = 0; i < nr_tasks; i++) { 569 task = tasks[i]; 570 ret = sem_wait(&task->work_done_sem); 571 BUG_ON(ret); 572 sem_init(&task->work_done_sem, 0, 0); 573 cpu_usage += task->cpu_usage; 574 task->cpu_usage = 0; 575 } 576 577 cpu_usage_1 = get_cpu_usage_nsec_parent(); 578 if (!runavg_cpu_usage) 579 runavg_cpu_usage = cpu_usage; 580 runavg_cpu_usage = (runavg_cpu_usage*9 + cpu_usage)/10; 581 582 parent_cpu_usage = cpu_usage_1 - cpu_usage_0; 583 if (!runavg_parent_cpu_usage) 584 runavg_parent_cpu_usage = parent_cpu_usage; 585 runavg_parent_cpu_usage = (runavg_parent_cpu_usage*9 + 586 parent_cpu_usage)/10; 587 588 ret = pthread_mutex_lock(&start_work_mutex); 589 BUG_ON(ret); 590 591 for (i = 0; i < nr_tasks; i++) { 592 task = tasks[i]; 593 sem_init(&task->sleep_sem, 0, 0); 594 task->curr_event = 0; 595 } 596 } 597 598 static void run_one_test(void) 599 { 600 u64 T0, T1, delta, avg_delta, fluct; 601 602 T0 = get_nsecs(); 603 wait_for_tasks(); 604 T1 = get_nsecs(); 605 606 delta = T1 - T0; 607 sum_runtime += delta; 608 nr_runs++; 609 610 avg_delta = sum_runtime / nr_runs; 611 if (delta < avg_delta) 612 fluct = avg_delta - delta; 613 else 614 fluct = delta - avg_delta; 615 sum_fluct += fluct; 616 if (!run_avg) 617 run_avg = delta; 618 run_avg = (run_avg*9 + delta)/10; 619 620 printf("#%-3ld: %0.3f, ", 621 nr_runs, (double)delta/1000000.0); 622 623 printf("ravg: %0.2f, ", 624 (double)run_avg/1e6); 625 626 printf("cpu: %0.2f / %0.2f", 627 (double)cpu_usage/1e6, (double)runavg_cpu_usage/1e6); 628 629 #if 0 630 /* 631 * rusage statistics done by the parent, these are less 632 * accurate than the sum_exec_runtime based statistics: 633 */ 634 printf(" [%0.2f / %0.2f]", 635 (double)parent_cpu_usage/1e6, 636 (double)runavg_parent_cpu_usage/1e6); 637 #endif 638 639 printf("\n"); 640 641 if (nr_sleep_corrections) 642 printf(" (%ld sleep corrections)\n", nr_sleep_corrections); 643 nr_sleep_corrections = 0; 644 } 645 646 static void test_calibrations(void) 647 { 648 u64 T0, T1; 649 650 T0 = get_nsecs(); 651 burn_nsecs(1e6); 652 T1 = get_nsecs(); 653 654 printf("the run test took %" PRIu64 " nsecs\n", T1 - T0); 655 656 T0 = get_nsecs(); 657 sleep_nsecs(1e6); 658 T1 = get_nsecs(); 659 660 printf("the sleep test took %" PRIu64 " nsecs\n", T1 - T0); 661 } 662 663 #define FILL_FIELD(ptr, field, event, data) \ 664 ptr.field = (typeof(ptr.field)) raw_field_value(event, #field, data) 665 666 #define FILL_ARRAY(ptr, array, event, data) \ 667 do { \ 668 void *__array = raw_field_ptr(event, #array, data); \ 669 memcpy(ptr.array, __array, sizeof(ptr.array)); \ 670 } while(0) 671 672 #define FILL_COMMON_FIELDS(ptr, event, data) \ 673 do { \ 674 FILL_FIELD(ptr, common_type, event, data); \ 675 FILL_FIELD(ptr, common_flags, event, data); \ 676 FILL_FIELD(ptr, common_preempt_count, event, data); \ 677 FILL_FIELD(ptr, common_pid, event, data); \ 678 FILL_FIELD(ptr, common_tgid, event, data); \ 679 } while (0) 680 681 682 683 struct trace_switch_event { 684 u32 size; 685 686 u16 common_type; 687 u8 common_flags; 688 u8 common_preempt_count; 689 u32 common_pid; 690 u32 common_tgid; 691 692 char prev_comm[16]; 693 u32 prev_pid; 694 u32 prev_prio; 695 u64 prev_state; 696 char next_comm[16]; 697 u32 next_pid; 698 u32 next_prio; 699 }; 700 701 struct trace_runtime_event { 702 u32 size; 703 704 u16 common_type; 705 u8 common_flags; 706 u8 common_preempt_count; 707 u32 common_pid; 708 u32 common_tgid; 709 710 char comm[16]; 711 u32 pid; 712 u64 runtime; 713 u64 vruntime; 714 }; 715 716 struct trace_wakeup_event { 717 u32 size; 718 719 u16 common_type; 720 u8 common_flags; 721 u8 common_preempt_count; 722 u32 common_pid; 723 u32 common_tgid; 724 725 char comm[16]; 726 u32 pid; 727 728 u32 prio; 729 u32 success; 730 u32 cpu; 731 }; 732 733 struct trace_fork_event { 734 u32 size; 735 736 u16 common_type; 737 u8 common_flags; 738 u8 common_preempt_count; 739 u32 common_pid; 740 u32 common_tgid; 741 742 char parent_comm[16]; 743 u32 parent_pid; 744 char child_comm[16]; 745 u32 child_pid; 746 }; 747 748 struct trace_migrate_task_event { 749 u32 size; 750 751 u16 common_type; 752 u8 common_flags; 753 u8 common_preempt_count; 754 u32 common_pid; 755 u32 common_tgid; 756 757 char comm[16]; 758 u32 pid; 759 760 u32 prio; 761 u32 cpu; 762 }; 763 764 struct trace_sched_handler { 765 void (*switch_event)(struct trace_switch_event *, 766 struct perf_session *, 767 struct event *, 768 int cpu, 769 u64 timestamp, 770 struct thread *thread); 771 772 void (*runtime_event)(struct trace_runtime_event *, 773 struct perf_session *, 774 struct event *, 775 int cpu, 776 u64 timestamp, 777 struct thread *thread); 778 779 void (*wakeup_event)(struct trace_wakeup_event *, 780 struct perf_session *, 781 struct event *, 782 int cpu, 783 u64 timestamp, 784 struct thread *thread); 785 786 void (*fork_event)(struct trace_fork_event *, 787 struct event *, 788 int cpu, 789 u64 timestamp, 790 struct thread *thread); 791 792 void (*migrate_task_event)(struct trace_migrate_task_event *, 793 struct perf_session *session, 794 struct event *, 795 int cpu, 796 u64 timestamp, 797 struct thread *thread); 798 }; 799 800 801 static void 802 replay_wakeup_event(struct trace_wakeup_event *wakeup_event, 803 struct perf_session *session __used, 804 struct event *event, 805 int cpu __used, 806 u64 timestamp __used, 807 struct thread *thread __used) 808 { 809 struct task_desc *waker, *wakee; 810 811 if (verbose) { 812 printf("sched_wakeup event %p\n", event); 813 814 printf(" ... pid %d woke up %s/%d\n", 815 wakeup_event->common_pid, 816 wakeup_event->comm, 817 wakeup_event->pid); 818 } 819 820 waker = register_pid(wakeup_event->common_pid, "<unknown>"); 821 wakee = register_pid(wakeup_event->pid, wakeup_event->comm); 822 823 add_sched_event_wakeup(waker, timestamp, wakee); 824 } 825 826 static u64 cpu_last_switched[MAX_CPUS]; 827 828 static void 829 replay_switch_event(struct trace_switch_event *switch_event, 830 struct perf_session *session __used, 831 struct event *event, 832 int cpu, 833 u64 timestamp, 834 struct thread *thread __used) 835 { 836 struct task_desc *prev, __used *next; 837 u64 timestamp0; 838 s64 delta; 839 840 if (verbose) 841 printf("sched_switch event %p\n", event); 842 843 if (cpu >= MAX_CPUS || cpu < 0) 844 return; 845 846 timestamp0 = cpu_last_switched[cpu]; 847 if (timestamp0) 848 delta = timestamp - timestamp0; 849 else 850 delta = 0; 851 852 if (delta < 0) 853 die("hm, delta: %" PRIu64 " < 0 ?\n", delta); 854 855 if (verbose) { 856 printf(" ... switch from %s/%d to %s/%d [ran %" PRIu64 " nsecs]\n", 857 switch_event->prev_comm, switch_event->prev_pid, 858 switch_event->next_comm, switch_event->next_pid, 859 delta); 860 } 861 862 prev = register_pid(switch_event->prev_pid, switch_event->prev_comm); 863 next = register_pid(switch_event->next_pid, switch_event->next_comm); 864 865 cpu_last_switched[cpu] = timestamp; 866 867 add_sched_event_run(prev, timestamp, delta); 868 add_sched_event_sleep(prev, timestamp, switch_event->prev_state); 869 } 870 871 872 static void 873 replay_fork_event(struct trace_fork_event *fork_event, 874 struct event *event, 875 int cpu __used, 876 u64 timestamp __used, 877 struct thread *thread __used) 878 { 879 if (verbose) { 880 printf("sched_fork event %p\n", event); 881 printf("... parent: %s/%d\n", fork_event->parent_comm, fork_event->parent_pid); 882 printf("... child: %s/%d\n", fork_event->child_comm, fork_event->child_pid); 883 } 884 register_pid(fork_event->parent_pid, fork_event->parent_comm); 885 register_pid(fork_event->child_pid, fork_event->child_comm); 886 } 887 888 static struct trace_sched_handler replay_ops = { 889 .wakeup_event = replay_wakeup_event, 890 .switch_event = replay_switch_event, 891 .fork_event = replay_fork_event, 892 }; 893 894 struct sort_dimension { 895 const char *name; 896 sort_fn_t cmp; 897 struct list_head list; 898 }; 899 900 static LIST_HEAD(cmp_pid); 901 902 static int 903 thread_lat_cmp(struct list_head *list, struct work_atoms *l, struct work_atoms *r) 904 { 905 struct sort_dimension *sort; 906 int ret = 0; 907 908 BUG_ON(list_empty(list)); 909 910 list_for_each_entry(sort, list, list) { 911 ret = sort->cmp(l, r); 912 if (ret) 913 return ret; 914 } 915 916 return ret; 917 } 918 919 static struct work_atoms * 920 thread_atoms_search(struct rb_root *root, struct thread *thread, 921 struct list_head *sort_list) 922 { 923 struct rb_node *node = root->rb_node; 924 struct work_atoms key = { .thread = thread }; 925 926 while (node) { 927 struct work_atoms *atoms; 928 int cmp; 929 930 atoms = container_of(node, struct work_atoms, node); 931 932 cmp = thread_lat_cmp(sort_list, &key, atoms); 933 if (cmp > 0) 934 node = node->rb_left; 935 else if (cmp < 0) 936 node = node->rb_right; 937 else { 938 BUG_ON(thread != atoms->thread); 939 return atoms; 940 } 941 } 942 return NULL; 943 } 944 945 static void 946 __thread_latency_insert(struct rb_root *root, struct work_atoms *data, 947 struct list_head *sort_list) 948 { 949 struct rb_node **new = &(root->rb_node), *parent = NULL; 950 951 while (*new) { 952 struct work_atoms *this; 953 int cmp; 954 955 this = container_of(*new, struct work_atoms, node); 956 parent = *new; 957 958 cmp = thread_lat_cmp(sort_list, data, this); 959 960 if (cmp > 0) 961 new = &((*new)->rb_left); 962 else 963 new = &((*new)->rb_right); 964 } 965 966 rb_link_node(&data->node, parent, new); 967 rb_insert_color(&data->node, root); 968 } 969 970 static void thread_atoms_insert(struct thread *thread) 971 { 972 struct work_atoms *atoms = zalloc(sizeof(*atoms)); 973 if (!atoms) 974 die("No memory"); 975 976 atoms->thread = thread; 977 INIT_LIST_HEAD(&atoms->work_list); 978 __thread_latency_insert(&atom_root, atoms, &cmp_pid); 979 } 980 981 static void 982 latency_fork_event(struct trace_fork_event *fork_event __used, 983 struct event *event __used, 984 int cpu __used, 985 u64 timestamp __used, 986 struct thread *thread __used) 987 { 988 /* should insert the newcomer */ 989 } 990 991 __used 992 static char sched_out_state(struct trace_switch_event *switch_event) 993 { 994 const char *str = TASK_STATE_TO_CHAR_STR; 995 996 return str[switch_event->prev_state]; 997 } 998 999 static void 1000 add_sched_out_event(struct work_atoms *atoms, 1001 char run_state, 1002 u64 timestamp) 1003 { 1004 struct work_atom *atom = zalloc(sizeof(*atom)); 1005 if (!atom) 1006 die("Non memory"); 1007 1008 atom->sched_out_time = timestamp; 1009 1010 if (run_state == 'R') { 1011 atom->state = THREAD_WAIT_CPU; 1012 atom->wake_up_time = atom->sched_out_time; 1013 } 1014 1015 list_add_tail(&atom->list, &atoms->work_list); 1016 } 1017 1018 static void 1019 add_runtime_event(struct work_atoms *atoms, u64 delta, u64 timestamp __used) 1020 { 1021 struct work_atom *atom; 1022 1023 BUG_ON(list_empty(&atoms->work_list)); 1024 1025 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1026 1027 atom->runtime += delta; 1028 atoms->total_runtime += delta; 1029 } 1030 1031 static void 1032 add_sched_in_event(struct work_atoms *atoms, u64 timestamp) 1033 { 1034 struct work_atom *atom; 1035 u64 delta; 1036 1037 if (list_empty(&atoms->work_list)) 1038 return; 1039 1040 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1041 1042 if (atom->state != THREAD_WAIT_CPU) 1043 return; 1044 1045 if (timestamp < atom->wake_up_time) { 1046 atom->state = THREAD_IGNORE; 1047 return; 1048 } 1049 1050 atom->state = THREAD_SCHED_IN; 1051 atom->sched_in_time = timestamp; 1052 1053 delta = atom->sched_in_time - atom->wake_up_time; 1054 atoms->total_lat += delta; 1055 if (delta > atoms->max_lat) { 1056 atoms->max_lat = delta; 1057 atoms->max_lat_at = timestamp; 1058 } 1059 atoms->nb_atoms++; 1060 } 1061 1062 static void 1063 latency_switch_event(struct trace_switch_event *switch_event, 1064 struct perf_session *session, 1065 struct event *event __used, 1066 int cpu, 1067 u64 timestamp, 1068 struct thread *thread __used) 1069 { 1070 struct work_atoms *out_events, *in_events; 1071 struct thread *sched_out, *sched_in; 1072 u64 timestamp0; 1073 s64 delta; 1074 1075 BUG_ON(cpu >= MAX_CPUS || cpu < 0); 1076 1077 timestamp0 = cpu_last_switched[cpu]; 1078 cpu_last_switched[cpu] = timestamp; 1079 if (timestamp0) 1080 delta = timestamp - timestamp0; 1081 else 1082 delta = 0; 1083 1084 if (delta < 0) 1085 die("hm, delta: %" PRIu64 " < 0 ?\n", delta); 1086 1087 1088 sched_out = perf_session__findnew(session, switch_event->prev_pid); 1089 sched_in = perf_session__findnew(session, switch_event->next_pid); 1090 1091 out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid); 1092 if (!out_events) { 1093 thread_atoms_insert(sched_out); 1094 out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid); 1095 if (!out_events) 1096 die("out-event: Internal tree error"); 1097 } 1098 add_sched_out_event(out_events, sched_out_state(switch_event), timestamp); 1099 1100 in_events = thread_atoms_search(&atom_root, sched_in, &cmp_pid); 1101 if (!in_events) { 1102 thread_atoms_insert(sched_in); 1103 in_events = thread_atoms_search(&atom_root, sched_in, &cmp_pid); 1104 if (!in_events) 1105 die("in-event: Internal tree error"); 1106 /* 1107 * Take came in we have not heard about yet, 1108 * add in an initial atom in runnable state: 1109 */ 1110 add_sched_out_event(in_events, 'R', timestamp); 1111 } 1112 add_sched_in_event(in_events, timestamp); 1113 } 1114 1115 static void 1116 latency_runtime_event(struct trace_runtime_event *runtime_event, 1117 struct perf_session *session, 1118 struct event *event __used, 1119 int cpu, 1120 u64 timestamp, 1121 struct thread *this_thread __used) 1122 { 1123 struct thread *thread = perf_session__findnew(session, runtime_event->pid); 1124 struct work_atoms *atoms = thread_atoms_search(&atom_root, thread, &cmp_pid); 1125 1126 BUG_ON(cpu >= MAX_CPUS || cpu < 0); 1127 if (!atoms) { 1128 thread_atoms_insert(thread); 1129 atoms = thread_atoms_search(&atom_root, thread, &cmp_pid); 1130 if (!atoms) 1131 die("in-event: Internal tree error"); 1132 add_sched_out_event(atoms, 'R', timestamp); 1133 } 1134 1135 add_runtime_event(atoms, runtime_event->runtime, timestamp); 1136 } 1137 1138 static void 1139 latency_wakeup_event(struct trace_wakeup_event *wakeup_event, 1140 struct perf_session *session, 1141 struct event *__event __used, 1142 int cpu __used, 1143 u64 timestamp, 1144 struct thread *thread __used) 1145 { 1146 struct work_atoms *atoms; 1147 struct work_atom *atom; 1148 struct thread *wakee; 1149 1150 /* Note for later, it may be interesting to observe the failing cases */ 1151 if (!wakeup_event->success) 1152 return; 1153 1154 wakee = perf_session__findnew(session, wakeup_event->pid); 1155 atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid); 1156 if (!atoms) { 1157 thread_atoms_insert(wakee); 1158 atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid); 1159 if (!atoms) 1160 die("wakeup-event: Internal tree error"); 1161 add_sched_out_event(atoms, 'S', timestamp); 1162 } 1163 1164 BUG_ON(list_empty(&atoms->work_list)); 1165 1166 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1167 1168 /* 1169 * You WILL be missing events if you've recorded only 1170 * one CPU, or are only looking at only one, so don't 1171 * make useless noise. 1172 */ 1173 if (profile_cpu == -1 && atom->state != THREAD_SLEEPING) 1174 nr_state_machine_bugs++; 1175 1176 nr_timestamps++; 1177 if (atom->sched_out_time > timestamp) { 1178 nr_unordered_timestamps++; 1179 return; 1180 } 1181 1182 atom->state = THREAD_WAIT_CPU; 1183 atom->wake_up_time = timestamp; 1184 } 1185 1186 static void 1187 latency_migrate_task_event(struct trace_migrate_task_event *migrate_task_event, 1188 struct perf_session *session, 1189 struct event *__event __used, 1190 int cpu __used, 1191 u64 timestamp, 1192 struct thread *thread __used) 1193 { 1194 struct work_atoms *atoms; 1195 struct work_atom *atom; 1196 struct thread *migrant; 1197 1198 /* 1199 * Only need to worry about migration when profiling one CPU. 1200 */ 1201 if (profile_cpu == -1) 1202 return; 1203 1204 migrant = perf_session__findnew(session, migrate_task_event->pid); 1205 atoms = thread_atoms_search(&atom_root, migrant, &cmp_pid); 1206 if (!atoms) { 1207 thread_atoms_insert(migrant); 1208 register_pid(migrant->pid, migrant->comm); 1209 atoms = thread_atoms_search(&atom_root, migrant, &cmp_pid); 1210 if (!atoms) 1211 die("migration-event: Internal tree error"); 1212 add_sched_out_event(atoms, 'R', timestamp); 1213 } 1214 1215 BUG_ON(list_empty(&atoms->work_list)); 1216 1217 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1218 atom->sched_in_time = atom->sched_out_time = atom->wake_up_time = timestamp; 1219 1220 nr_timestamps++; 1221 1222 if (atom->sched_out_time > timestamp) 1223 nr_unordered_timestamps++; 1224 } 1225 1226 static struct trace_sched_handler lat_ops = { 1227 .wakeup_event = latency_wakeup_event, 1228 .switch_event = latency_switch_event, 1229 .runtime_event = latency_runtime_event, 1230 .fork_event = latency_fork_event, 1231 .migrate_task_event = latency_migrate_task_event, 1232 }; 1233 1234 static void output_lat_thread(struct work_atoms *work_list) 1235 { 1236 int i; 1237 int ret; 1238 u64 avg; 1239 1240 if (!work_list->nb_atoms) 1241 return; 1242 /* 1243 * Ignore idle threads: 1244 */ 1245 if (!strcmp(work_list->thread->comm, "swapper")) 1246 return; 1247 1248 all_runtime += work_list->total_runtime; 1249 all_count += work_list->nb_atoms; 1250 1251 ret = printf(" %s:%d ", work_list->thread->comm, work_list->thread->pid); 1252 1253 for (i = 0; i < 24 - ret; i++) 1254 printf(" "); 1255 1256 avg = work_list->total_lat / work_list->nb_atoms; 1257 1258 printf("|%11.3f ms |%9" PRIu64 " | avg:%9.3f ms | max:%9.3f ms | max at: %9.6f s\n", 1259 (double)work_list->total_runtime / 1e6, 1260 work_list->nb_atoms, (double)avg / 1e6, 1261 (double)work_list->max_lat / 1e6, 1262 (double)work_list->max_lat_at / 1e9); 1263 } 1264 1265 static int pid_cmp(struct work_atoms *l, struct work_atoms *r) 1266 { 1267 if (l->thread->pid < r->thread->pid) 1268 return -1; 1269 if (l->thread->pid > r->thread->pid) 1270 return 1; 1271 1272 return 0; 1273 } 1274 1275 static struct sort_dimension pid_sort_dimension = { 1276 .name = "pid", 1277 .cmp = pid_cmp, 1278 }; 1279 1280 static int avg_cmp(struct work_atoms *l, struct work_atoms *r) 1281 { 1282 u64 avgl, avgr; 1283 1284 if (!l->nb_atoms) 1285 return -1; 1286 1287 if (!r->nb_atoms) 1288 return 1; 1289 1290 avgl = l->total_lat / l->nb_atoms; 1291 avgr = r->total_lat / r->nb_atoms; 1292 1293 if (avgl < avgr) 1294 return -1; 1295 if (avgl > avgr) 1296 return 1; 1297 1298 return 0; 1299 } 1300 1301 static struct sort_dimension avg_sort_dimension = { 1302 .name = "avg", 1303 .cmp = avg_cmp, 1304 }; 1305 1306 static int max_cmp(struct work_atoms *l, struct work_atoms *r) 1307 { 1308 if (l->max_lat < r->max_lat) 1309 return -1; 1310 if (l->max_lat > r->max_lat) 1311 return 1; 1312 1313 return 0; 1314 } 1315 1316 static struct sort_dimension max_sort_dimension = { 1317 .name = "max", 1318 .cmp = max_cmp, 1319 }; 1320 1321 static int switch_cmp(struct work_atoms *l, struct work_atoms *r) 1322 { 1323 if (l->nb_atoms < r->nb_atoms) 1324 return -1; 1325 if (l->nb_atoms > r->nb_atoms) 1326 return 1; 1327 1328 return 0; 1329 } 1330 1331 static struct sort_dimension switch_sort_dimension = { 1332 .name = "switch", 1333 .cmp = switch_cmp, 1334 }; 1335 1336 static int runtime_cmp(struct work_atoms *l, struct work_atoms *r) 1337 { 1338 if (l->total_runtime < r->total_runtime) 1339 return -1; 1340 if (l->total_runtime > r->total_runtime) 1341 return 1; 1342 1343 return 0; 1344 } 1345 1346 static struct sort_dimension runtime_sort_dimension = { 1347 .name = "runtime", 1348 .cmp = runtime_cmp, 1349 }; 1350 1351 static struct sort_dimension *available_sorts[] = { 1352 &pid_sort_dimension, 1353 &avg_sort_dimension, 1354 &max_sort_dimension, 1355 &switch_sort_dimension, 1356 &runtime_sort_dimension, 1357 }; 1358 1359 #define NB_AVAILABLE_SORTS (int)(sizeof(available_sorts) / sizeof(struct sort_dimension *)) 1360 1361 static LIST_HEAD(sort_list); 1362 1363 static int sort_dimension__add(const char *tok, struct list_head *list) 1364 { 1365 int i; 1366 1367 for (i = 0; i < NB_AVAILABLE_SORTS; i++) { 1368 if (!strcmp(available_sorts[i]->name, tok)) { 1369 list_add_tail(&available_sorts[i]->list, list); 1370 1371 return 0; 1372 } 1373 } 1374 1375 return -1; 1376 } 1377 1378 static void setup_sorting(void); 1379 1380 static void sort_lat(void) 1381 { 1382 struct rb_node *node; 1383 1384 for (;;) { 1385 struct work_atoms *data; 1386 node = rb_first(&atom_root); 1387 if (!node) 1388 break; 1389 1390 rb_erase(node, &atom_root); 1391 data = rb_entry(node, struct work_atoms, node); 1392 __thread_latency_insert(&sorted_atom_root, data, &sort_list); 1393 } 1394 } 1395 1396 static struct trace_sched_handler *trace_handler; 1397 1398 static void 1399 process_sched_wakeup_event(void *data, struct perf_session *session, 1400 struct event *event, 1401 int cpu __used, 1402 u64 timestamp __used, 1403 struct thread *thread __used) 1404 { 1405 struct trace_wakeup_event wakeup_event; 1406 1407 FILL_COMMON_FIELDS(wakeup_event, event, data); 1408 1409 FILL_ARRAY(wakeup_event, comm, event, data); 1410 FILL_FIELD(wakeup_event, pid, event, data); 1411 FILL_FIELD(wakeup_event, prio, event, data); 1412 FILL_FIELD(wakeup_event, success, event, data); 1413 FILL_FIELD(wakeup_event, cpu, event, data); 1414 1415 if (trace_handler->wakeup_event) 1416 trace_handler->wakeup_event(&wakeup_event, session, event, 1417 cpu, timestamp, thread); 1418 } 1419 1420 /* 1421 * Track the current task - that way we can know whether there's any 1422 * weird events, such as a task being switched away that is not current. 1423 */ 1424 static int max_cpu; 1425 1426 static u32 curr_pid[MAX_CPUS] = { [0 ... MAX_CPUS-1] = -1 }; 1427 1428 static struct thread *curr_thread[MAX_CPUS]; 1429 1430 static char next_shortname1 = 'A'; 1431 static char next_shortname2 = '0'; 1432 1433 static void 1434 map_switch_event(struct trace_switch_event *switch_event, 1435 struct perf_session *session, 1436 struct event *event __used, 1437 int this_cpu, 1438 u64 timestamp, 1439 struct thread *thread __used) 1440 { 1441 struct thread *sched_out __used, *sched_in; 1442 int new_shortname; 1443 u64 timestamp0; 1444 s64 delta; 1445 int cpu; 1446 1447 BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0); 1448 1449 if (this_cpu > max_cpu) 1450 max_cpu = this_cpu; 1451 1452 timestamp0 = cpu_last_switched[this_cpu]; 1453 cpu_last_switched[this_cpu] = timestamp; 1454 if (timestamp0) 1455 delta = timestamp - timestamp0; 1456 else 1457 delta = 0; 1458 1459 if (delta < 0) 1460 die("hm, delta: %" PRIu64 " < 0 ?\n", delta); 1461 1462 1463 sched_out = perf_session__findnew(session, switch_event->prev_pid); 1464 sched_in = perf_session__findnew(session, switch_event->next_pid); 1465 1466 curr_thread[this_cpu] = sched_in; 1467 1468 printf(" "); 1469 1470 new_shortname = 0; 1471 if (!sched_in->shortname[0]) { 1472 sched_in->shortname[0] = next_shortname1; 1473 sched_in->shortname[1] = next_shortname2; 1474 1475 if (next_shortname1 < 'Z') { 1476 next_shortname1++; 1477 } else { 1478 next_shortname1='A'; 1479 if (next_shortname2 < '9') { 1480 next_shortname2++; 1481 } else { 1482 next_shortname2='0'; 1483 } 1484 } 1485 new_shortname = 1; 1486 } 1487 1488 for (cpu = 0; cpu <= max_cpu; cpu++) { 1489 if (cpu != this_cpu) 1490 printf(" "); 1491 else 1492 printf("*"); 1493 1494 if (curr_thread[cpu]) { 1495 if (curr_thread[cpu]->pid) 1496 printf("%2s ", curr_thread[cpu]->shortname); 1497 else 1498 printf(". "); 1499 } else 1500 printf(" "); 1501 } 1502 1503 printf(" %12.6f secs ", (double)timestamp/1e9); 1504 if (new_shortname) { 1505 printf("%s => %s:%d\n", 1506 sched_in->shortname, sched_in->comm, sched_in->pid); 1507 } else { 1508 printf("\n"); 1509 } 1510 } 1511 1512 1513 static void 1514 process_sched_switch_event(void *data, struct perf_session *session, 1515 struct event *event, 1516 int this_cpu, 1517 u64 timestamp __used, 1518 struct thread *thread __used) 1519 { 1520 struct trace_switch_event switch_event; 1521 1522 FILL_COMMON_FIELDS(switch_event, event, data); 1523 1524 FILL_ARRAY(switch_event, prev_comm, event, data); 1525 FILL_FIELD(switch_event, prev_pid, event, data); 1526 FILL_FIELD(switch_event, prev_prio, event, data); 1527 FILL_FIELD(switch_event, prev_state, event, data); 1528 FILL_ARRAY(switch_event, next_comm, event, data); 1529 FILL_FIELD(switch_event, next_pid, event, data); 1530 FILL_FIELD(switch_event, next_prio, event, data); 1531 1532 if (curr_pid[this_cpu] != (u32)-1) { 1533 /* 1534 * Are we trying to switch away a PID that is 1535 * not current? 1536 */ 1537 if (curr_pid[this_cpu] != switch_event.prev_pid) 1538 nr_context_switch_bugs++; 1539 } 1540 if (trace_handler->switch_event) 1541 trace_handler->switch_event(&switch_event, session, event, 1542 this_cpu, timestamp, thread); 1543 1544 curr_pid[this_cpu] = switch_event.next_pid; 1545 } 1546 1547 static void 1548 process_sched_runtime_event(void *data, struct perf_session *session, 1549 struct event *event, 1550 int cpu __used, 1551 u64 timestamp __used, 1552 struct thread *thread __used) 1553 { 1554 struct trace_runtime_event runtime_event; 1555 1556 FILL_ARRAY(runtime_event, comm, event, data); 1557 FILL_FIELD(runtime_event, pid, event, data); 1558 FILL_FIELD(runtime_event, runtime, event, data); 1559 FILL_FIELD(runtime_event, vruntime, event, data); 1560 1561 if (trace_handler->runtime_event) 1562 trace_handler->runtime_event(&runtime_event, session, event, cpu, timestamp, thread); 1563 } 1564 1565 static void 1566 process_sched_fork_event(void *data, 1567 struct event *event, 1568 int cpu __used, 1569 u64 timestamp __used, 1570 struct thread *thread __used) 1571 { 1572 struct trace_fork_event fork_event; 1573 1574 FILL_COMMON_FIELDS(fork_event, event, data); 1575 1576 FILL_ARRAY(fork_event, parent_comm, event, data); 1577 FILL_FIELD(fork_event, parent_pid, event, data); 1578 FILL_ARRAY(fork_event, child_comm, event, data); 1579 FILL_FIELD(fork_event, child_pid, event, data); 1580 1581 if (trace_handler->fork_event) 1582 trace_handler->fork_event(&fork_event, event, 1583 cpu, timestamp, thread); 1584 } 1585 1586 static void 1587 process_sched_exit_event(struct event *event, 1588 int cpu __used, 1589 u64 timestamp __used, 1590 struct thread *thread __used) 1591 { 1592 if (verbose) 1593 printf("sched_exit event %p\n", event); 1594 } 1595 1596 static void 1597 process_sched_migrate_task_event(void *data, struct perf_session *session, 1598 struct event *event, 1599 int cpu __used, 1600 u64 timestamp __used, 1601 struct thread *thread __used) 1602 { 1603 struct trace_migrate_task_event migrate_task_event; 1604 1605 FILL_COMMON_FIELDS(migrate_task_event, event, data); 1606 1607 FILL_ARRAY(migrate_task_event, comm, event, data); 1608 FILL_FIELD(migrate_task_event, pid, event, data); 1609 FILL_FIELD(migrate_task_event, prio, event, data); 1610 FILL_FIELD(migrate_task_event, cpu, event, data); 1611 1612 if (trace_handler->migrate_task_event) 1613 trace_handler->migrate_task_event(&migrate_task_event, session, 1614 event, cpu, timestamp, thread); 1615 } 1616 1617 static void process_raw_event(union perf_event *raw_event __used, 1618 struct perf_session *session, void *data, int cpu, 1619 u64 timestamp, struct thread *thread) 1620 { 1621 struct event *event; 1622 int type; 1623 1624 1625 type = trace_parse_common_type(data); 1626 event = trace_find_event(type); 1627 1628 if (!strcmp(event->name, "sched_switch")) 1629 process_sched_switch_event(data, session, event, cpu, timestamp, thread); 1630 if (!strcmp(event->name, "sched_stat_runtime")) 1631 process_sched_runtime_event(data, session, event, cpu, timestamp, thread); 1632 if (!strcmp(event->name, "sched_wakeup")) 1633 process_sched_wakeup_event(data, session, event, cpu, timestamp, thread); 1634 if (!strcmp(event->name, "sched_wakeup_new")) 1635 process_sched_wakeup_event(data, session, event, cpu, timestamp, thread); 1636 if (!strcmp(event->name, "sched_process_fork")) 1637 process_sched_fork_event(data, event, cpu, timestamp, thread); 1638 if (!strcmp(event->name, "sched_process_exit")) 1639 process_sched_exit_event(event, cpu, timestamp, thread); 1640 if (!strcmp(event->name, "sched_migrate_task")) 1641 process_sched_migrate_task_event(data, session, event, cpu, timestamp, thread); 1642 } 1643 1644 static int process_sample_event(union perf_event *event, 1645 struct perf_sample *sample, 1646 struct perf_evsel *evsel __used, 1647 struct perf_session *session) 1648 { 1649 struct thread *thread; 1650 1651 if (!(session->sample_type & PERF_SAMPLE_RAW)) 1652 return 0; 1653 1654 thread = perf_session__findnew(session, sample->pid); 1655 if (thread == NULL) { 1656 pr_debug("problem processing %d event, skipping it.\n", 1657 event->header.type); 1658 return -1; 1659 } 1660 1661 dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); 1662 1663 if (profile_cpu != -1 && profile_cpu != (int)sample->cpu) 1664 return 0; 1665 1666 process_raw_event(event, session, sample->raw_data, sample->cpu, 1667 sample->time, thread); 1668 1669 return 0; 1670 } 1671 1672 static struct perf_event_ops event_ops = { 1673 .sample = process_sample_event, 1674 .comm = perf_event__process_comm, 1675 .lost = perf_event__process_lost, 1676 .fork = perf_event__process_task, 1677 .ordered_samples = true, 1678 }; 1679 1680 static int read_events(void) 1681 { 1682 int err = -EINVAL; 1683 struct perf_session *session = perf_session__new(input_name, O_RDONLY, 1684 0, false, &event_ops); 1685 if (session == NULL) 1686 return -ENOMEM; 1687 1688 if (perf_session__has_traces(session, "record -R")) { 1689 err = perf_session__process_events(session, &event_ops); 1690 nr_events = session->hists.stats.nr_events[0]; 1691 nr_lost_events = session->hists.stats.total_lost; 1692 nr_lost_chunks = session->hists.stats.nr_events[PERF_RECORD_LOST]; 1693 } 1694 1695 perf_session__delete(session); 1696 return err; 1697 } 1698 1699 static void print_bad_events(void) 1700 { 1701 if (nr_unordered_timestamps && nr_timestamps) { 1702 printf(" INFO: %.3f%% unordered timestamps (%ld out of %ld)\n", 1703 (double)nr_unordered_timestamps/(double)nr_timestamps*100.0, 1704 nr_unordered_timestamps, nr_timestamps); 1705 } 1706 if (nr_lost_events && nr_events) { 1707 printf(" INFO: %.3f%% lost events (%ld out of %ld, in %ld chunks)\n", 1708 (double)nr_lost_events/(double)nr_events*100.0, 1709 nr_lost_events, nr_events, nr_lost_chunks); 1710 } 1711 if (nr_state_machine_bugs && nr_timestamps) { 1712 printf(" INFO: %.3f%% state machine bugs (%ld out of %ld)", 1713 (double)nr_state_machine_bugs/(double)nr_timestamps*100.0, 1714 nr_state_machine_bugs, nr_timestamps); 1715 if (nr_lost_events) 1716 printf(" (due to lost events?)"); 1717 printf("\n"); 1718 } 1719 if (nr_context_switch_bugs && nr_timestamps) { 1720 printf(" INFO: %.3f%% context switch bugs (%ld out of %ld)", 1721 (double)nr_context_switch_bugs/(double)nr_timestamps*100.0, 1722 nr_context_switch_bugs, nr_timestamps); 1723 if (nr_lost_events) 1724 printf(" (due to lost events?)"); 1725 printf("\n"); 1726 } 1727 } 1728 1729 static void __cmd_lat(void) 1730 { 1731 struct rb_node *next; 1732 1733 setup_pager(); 1734 read_events(); 1735 sort_lat(); 1736 1737 printf("\n ---------------------------------------------------------------------------------------------------------------\n"); 1738 printf(" Task | Runtime ms | Switches | Average delay ms | Maximum delay ms | Maximum delay at |\n"); 1739 printf(" ---------------------------------------------------------------------------------------------------------------\n"); 1740 1741 next = rb_first(&sorted_atom_root); 1742 1743 while (next) { 1744 struct work_atoms *work_list; 1745 1746 work_list = rb_entry(next, struct work_atoms, node); 1747 output_lat_thread(work_list); 1748 next = rb_next(next); 1749 } 1750 1751 printf(" -----------------------------------------------------------------------------------------\n"); 1752 printf(" TOTAL: |%11.3f ms |%9" PRIu64 " |\n", 1753 (double)all_runtime/1e6, all_count); 1754 1755 printf(" ---------------------------------------------------\n"); 1756 1757 print_bad_events(); 1758 printf("\n"); 1759 1760 } 1761 1762 static struct trace_sched_handler map_ops = { 1763 .wakeup_event = NULL, 1764 .switch_event = map_switch_event, 1765 .runtime_event = NULL, 1766 .fork_event = NULL, 1767 }; 1768 1769 static void __cmd_map(void) 1770 { 1771 max_cpu = sysconf(_SC_NPROCESSORS_CONF); 1772 1773 setup_pager(); 1774 read_events(); 1775 print_bad_events(); 1776 } 1777 1778 static void __cmd_replay(void) 1779 { 1780 unsigned long i; 1781 1782 calibrate_run_measurement_overhead(); 1783 calibrate_sleep_measurement_overhead(); 1784 1785 test_calibrations(); 1786 1787 read_events(); 1788 1789 printf("nr_run_events: %ld\n", nr_run_events); 1790 printf("nr_sleep_events: %ld\n", nr_sleep_events); 1791 printf("nr_wakeup_events: %ld\n", nr_wakeup_events); 1792 1793 if (targetless_wakeups) 1794 printf("target-less wakeups: %ld\n", targetless_wakeups); 1795 if (multitarget_wakeups) 1796 printf("multi-target wakeups: %ld\n", multitarget_wakeups); 1797 if (nr_run_events_optimized) 1798 printf("run atoms optimized: %ld\n", 1799 nr_run_events_optimized); 1800 1801 print_task_traces(); 1802 add_cross_task_wakeups(); 1803 1804 create_tasks(); 1805 printf("------------------------------------------------------------\n"); 1806 for (i = 0; i < replay_repeat; i++) 1807 run_one_test(); 1808 } 1809 1810 1811 static const char * const sched_usage[] = { 1812 "perf sched [<options>] {record|latency|map|replay|trace}", 1813 NULL 1814 }; 1815 1816 static const struct option sched_options[] = { 1817 OPT_STRING('i', "input", &input_name, "file", 1818 "input file name"), 1819 OPT_INCR('v', "verbose", &verbose, 1820 "be more verbose (show symbol address, etc)"), 1821 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 1822 "dump raw trace in ASCII"), 1823 OPT_END() 1824 }; 1825 1826 static const char * const latency_usage[] = { 1827 "perf sched latency [<options>]", 1828 NULL 1829 }; 1830 1831 static const struct option latency_options[] = { 1832 OPT_STRING('s', "sort", &sort_order, "key[,key2...]", 1833 "sort by key(s): runtime, switch, avg, max"), 1834 OPT_INCR('v', "verbose", &verbose, 1835 "be more verbose (show symbol address, etc)"), 1836 OPT_INTEGER('C', "CPU", &profile_cpu, 1837 "CPU to profile on"), 1838 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 1839 "dump raw trace in ASCII"), 1840 OPT_END() 1841 }; 1842 1843 static const char * const replay_usage[] = { 1844 "perf sched replay [<options>]", 1845 NULL 1846 }; 1847 1848 static const struct option replay_options[] = { 1849 OPT_UINTEGER('r', "repeat", &replay_repeat, 1850 "repeat the workload replay N times (-1: infinite)"), 1851 OPT_INCR('v', "verbose", &verbose, 1852 "be more verbose (show symbol address, etc)"), 1853 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 1854 "dump raw trace in ASCII"), 1855 OPT_END() 1856 }; 1857 1858 static void setup_sorting(void) 1859 { 1860 char *tmp, *tok, *str = strdup(sort_order); 1861 1862 for (tok = strtok_r(str, ", ", &tmp); 1863 tok; tok = strtok_r(NULL, ", ", &tmp)) { 1864 if (sort_dimension__add(tok, &sort_list) < 0) { 1865 error("Unknown --sort key: `%s'", tok); 1866 usage_with_options(latency_usage, latency_options); 1867 } 1868 } 1869 1870 free(str); 1871 1872 sort_dimension__add("pid", &cmp_pid); 1873 } 1874 1875 static const char *record_args[] = { 1876 "record", 1877 "-a", 1878 "-R", 1879 "-f", 1880 "-m", "1024", 1881 "-c", "1", 1882 "-e", "sched:sched_switch", 1883 "-e", "sched:sched_stat_wait", 1884 "-e", "sched:sched_stat_sleep", 1885 "-e", "sched:sched_stat_iowait", 1886 "-e", "sched:sched_stat_runtime", 1887 "-e", "sched:sched_process_exit", 1888 "-e", "sched:sched_process_fork", 1889 "-e", "sched:sched_wakeup", 1890 "-e", "sched:sched_migrate_task", 1891 }; 1892 1893 static int __cmd_record(int argc, const char **argv) 1894 { 1895 unsigned int rec_argc, i, j; 1896 const char **rec_argv; 1897 1898 rec_argc = ARRAY_SIZE(record_args) + argc - 1; 1899 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 1900 1901 if (rec_argv == NULL) 1902 return -ENOMEM; 1903 1904 for (i = 0; i < ARRAY_SIZE(record_args); i++) 1905 rec_argv[i] = strdup(record_args[i]); 1906 1907 for (j = 1; j < (unsigned int)argc; j++, i++) 1908 rec_argv[i] = argv[j]; 1909 1910 BUG_ON(i != rec_argc); 1911 1912 return cmd_record(i, rec_argv, NULL); 1913 } 1914 1915 int cmd_sched(int argc, const char **argv, const char *prefix __used) 1916 { 1917 argc = parse_options(argc, argv, sched_options, sched_usage, 1918 PARSE_OPT_STOP_AT_NON_OPTION); 1919 if (!argc) 1920 usage_with_options(sched_usage, sched_options); 1921 1922 /* 1923 * Aliased to 'perf script' for now: 1924 */ 1925 if (!strcmp(argv[0], "script")) 1926 return cmd_script(argc, argv, prefix); 1927 1928 symbol__init(); 1929 if (!strncmp(argv[0], "rec", 3)) { 1930 return __cmd_record(argc, argv); 1931 } else if (!strncmp(argv[0], "lat", 3)) { 1932 trace_handler = &lat_ops; 1933 if (argc > 1) { 1934 argc = parse_options(argc, argv, latency_options, latency_usage, 0); 1935 if (argc) 1936 usage_with_options(latency_usage, latency_options); 1937 } 1938 setup_sorting(); 1939 __cmd_lat(); 1940 } else if (!strcmp(argv[0], "map")) { 1941 trace_handler = &map_ops; 1942 setup_sorting(); 1943 __cmd_map(); 1944 } else if (!strncmp(argv[0], "rep", 3)) { 1945 trace_handler = &replay_ops; 1946 if (argc) { 1947 argc = parse_options(argc, argv, replay_options, replay_usage, 0); 1948 if (argc) 1949 usage_with_options(replay_usage, replay_options); 1950 } 1951 __cmd_replay(); 1952 } else { 1953 usage_with_options(sched_usage, sched_options); 1954 } 1955 1956 return 0; 1957 } 1958