1 #include "builtin.h" 2 #include "perf.h" 3 4 #include "util/util.h" 5 #include "util/cache.h" 6 #include "util/symbol.h" 7 #include "util/thread.h" 8 #include "util/header.h" 9 #include "util/session.h" 10 11 #include "util/parse-options.h" 12 #include "util/trace-event.h" 13 14 #include "util/debug.h" 15 16 /* ANDROID_CHANGE_BEGIN */ 17 #if 0 18 #include <linux/rbtree.h> 19 #else 20 #include "util/include/linux/rbtree.h" 21 #endif 22 /* ANDROID_CHANGE_END */ 23 24 struct alloc_stat; 25 typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *); 26 27 static char const *input_name = "perf.data"; 28 29 static int alloc_flag; 30 static int caller_flag; 31 32 static int alloc_lines = -1; 33 static int caller_lines = -1; 34 35 static bool raw_ip; 36 37 static char default_sort_order[] = "frag,hit,bytes"; 38 39 static int *cpunode_map; 40 static int max_cpu_num; 41 42 struct alloc_stat { 43 u64 call_site; 44 u64 ptr; 45 u64 bytes_req; 46 u64 bytes_alloc; 47 u32 hit; 48 u32 pingpong; 49 50 short alloc_cpu; 51 52 struct rb_node node; 53 }; 54 55 static struct rb_root root_alloc_stat; 56 static struct rb_root root_alloc_sorted; 57 static struct rb_root root_caller_stat; 58 static struct rb_root root_caller_sorted; 59 60 static unsigned long total_requested, total_allocated; 61 static unsigned long nr_allocs, nr_cross_allocs; 62 63 #define PATH_SYS_NODE "/sys/devices/system/node" 64 65 static void init_cpunode_map(void) 66 { 67 FILE *fp; 68 int i; 69 70 fp = fopen("/sys/devices/system/cpu/kernel_max", "r"); 71 if (!fp) { 72 max_cpu_num = 4096; 73 return; 74 } 75 76 if (fscanf(fp, "%d", &max_cpu_num) < 1) 77 die("Failed to read 'kernel_max' from sysfs"); 78 max_cpu_num++; 79 80 cpunode_map = calloc(max_cpu_num, sizeof(int)); 81 if (!cpunode_map) 82 die("calloc"); 83 for (i = 0; i < max_cpu_num; i++) 84 cpunode_map[i] = -1; 85 fclose(fp); 86 } 87 88 static void setup_cpunode_map(void) 89 { 90 struct dirent *dent1, *dent2; 91 DIR *dir1, *dir2; 92 unsigned int cpu, mem; 93 char buf[PATH_MAX]; 94 95 init_cpunode_map(); 96 97 dir1 = opendir(PATH_SYS_NODE); 98 if (!dir1) 99 return; 100 101 while ((dent1 = readdir(dir1)) != NULL) { 102 if (dent1->d_type != DT_DIR || 103 sscanf(dent1->d_name, "node%u", &mem) < 1) 104 continue; 105 106 snprintf(buf, PATH_MAX, "%s/%s", PATH_SYS_NODE, dent1->d_name); 107 dir2 = opendir(buf); 108 if (!dir2) 109 continue; 110 while ((dent2 = readdir(dir2)) != NULL) { 111 if (dent2->d_type != DT_LNK || 112 sscanf(dent2->d_name, "cpu%u", &cpu) < 1) 113 continue; 114 cpunode_map[cpu] = mem; 115 } 116 } 117 } 118 119 static void insert_alloc_stat(unsigned long call_site, unsigned long ptr, 120 int bytes_req, int bytes_alloc, int cpu) 121 { 122 struct rb_node **node = &root_alloc_stat.rb_node; 123 struct rb_node *parent = NULL; 124 struct alloc_stat *data = NULL; 125 126 while (*node) { 127 parent = *node; 128 data = rb_entry(*node, struct alloc_stat, node); 129 130 if (ptr > data->ptr) 131 node = &(*node)->rb_right; 132 else if (ptr < data->ptr) 133 node = &(*node)->rb_left; 134 else 135 break; 136 } 137 138 if (data && data->ptr == ptr) { 139 data->hit++; 140 data->bytes_req += bytes_req; 141 data->bytes_alloc += bytes_alloc; 142 } else { 143 data = malloc(sizeof(*data)); 144 if (!data) 145 die("malloc"); 146 data->ptr = ptr; 147 data->pingpong = 0; 148 data->hit = 1; 149 data->bytes_req = bytes_req; 150 data->bytes_alloc = bytes_alloc; 151 152 rb_link_node(&data->node, parent, node); 153 rb_insert_color(&data->node, &root_alloc_stat); 154 } 155 data->call_site = call_site; 156 data->alloc_cpu = cpu; 157 } 158 159 static void insert_caller_stat(unsigned long call_site, 160 int bytes_req, int bytes_alloc) 161 { 162 struct rb_node **node = &root_caller_stat.rb_node; 163 struct rb_node *parent = NULL; 164 struct alloc_stat *data = NULL; 165 166 while (*node) { 167 parent = *node; 168 data = rb_entry(*node, struct alloc_stat, node); 169 170 if (call_site > data->call_site) 171 node = &(*node)->rb_right; 172 else if (call_site < data->call_site) 173 node = &(*node)->rb_left; 174 else 175 break; 176 } 177 178 if (data && data->call_site == call_site) { 179 data->hit++; 180 data->bytes_req += bytes_req; 181 data->bytes_alloc += bytes_alloc; 182 } else { 183 data = malloc(sizeof(*data)); 184 if (!data) 185 die("malloc"); 186 data->call_site = call_site; 187 data->pingpong = 0; 188 data->hit = 1; 189 data->bytes_req = bytes_req; 190 data->bytes_alloc = bytes_alloc; 191 192 rb_link_node(&data->node, parent, node); 193 rb_insert_color(&data->node, &root_caller_stat); 194 } 195 } 196 197 static void process_alloc_event(void *data, 198 struct event *event, 199 int cpu, 200 u64 timestamp __used, 201 struct thread *thread __used, 202 int node) 203 { 204 unsigned long call_site; 205 unsigned long ptr; 206 int bytes_req; 207 int bytes_alloc; 208 int node1, node2; 209 210 ptr = raw_field_value(event, "ptr", data); 211 call_site = raw_field_value(event, "call_site", data); 212 bytes_req = raw_field_value(event, "bytes_req", data); 213 bytes_alloc = raw_field_value(event, "bytes_alloc", data); 214 215 insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, cpu); 216 insert_caller_stat(call_site, bytes_req, bytes_alloc); 217 218 total_requested += bytes_req; 219 total_allocated += bytes_alloc; 220 221 if (node) { 222 node1 = cpunode_map[cpu]; 223 node2 = raw_field_value(event, "node", data); 224 if (node1 != node2) 225 nr_cross_allocs++; 226 } 227 nr_allocs++; 228 } 229 230 static int ptr_cmp(struct alloc_stat *, struct alloc_stat *); 231 static int callsite_cmp(struct alloc_stat *, struct alloc_stat *); 232 233 static struct alloc_stat *search_alloc_stat(unsigned long ptr, 234 unsigned long call_site, 235 struct rb_root *root, 236 sort_fn_t sort_fn) 237 { 238 struct rb_node *node = root->rb_node; 239 struct alloc_stat key = { .ptr = ptr, .call_site = call_site }; 240 241 while (node) { 242 struct alloc_stat *data; 243 int cmp; 244 245 data = rb_entry(node, struct alloc_stat, node); 246 247 cmp = sort_fn(&key, data); 248 if (cmp < 0) 249 node = node->rb_left; 250 else if (cmp > 0) 251 node = node->rb_right; 252 else 253 return data; 254 } 255 return NULL; 256 } 257 258 static void process_free_event(void *data, 259 struct event *event, 260 int cpu, 261 u64 timestamp __used, 262 struct thread *thread __used) 263 { 264 unsigned long ptr; 265 struct alloc_stat *s_alloc, *s_caller; 266 267 ptr = raw_field_value(event, "ptr", data); 268 269 s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp); 270 if (!s_alloc) 271 return; 272 273 if (cpu != s_alloc->alloc_cpu) { 274 s_alloc->pingpong++; 275 276 s_caller = search_alloc_stat(0, s_alloc->call_site, 277 &root_caller_stat, callsite_cmp); 278 assert(s_caller); 279 s_caller->pingpong++; 280 } 281 s_alloc->alloc_cpu = -1; 282 } 283 284 static void process_raw_event(union perf_event *raw_event __used, void *data, 285 int cpu, u64 timestamp, struct thread *thread) 286 { 287 struct event *event; 288 int type; 289 290 type = trace_parse_common_type(data); 291 event = trace_find_event(type); 292 293 if (!strcmp(event->name, "kmalloc") || 294 !strcmp(event->name, "kmem_cache_alloc")) { 295 process_alloc_event(data, event, cpu, timestamp, thread, 0); 296 return; 297 } 298 299 if (!strcmp(event->name, "kmalloc_node") || 300 !strcmp(event->name, "kmem_cache_alloc_node")) { 301 process_alloc_event(data, event, cpu, timestamp, thread, 1); 302 return; 303 } 304 305 if (!strcmp(event->name, "kfree") || 306 !strcmp(event->name, "kmem_cache_free")) { 307 process_free_event(data, event, cpu, timestamp, thread); 308 return; 309 } 310 } 311 312 static int process_sample_event(union perf_event *event, 313 struct perf_sample *sample, 314 struct perf_evsel *evsel __used, 315 struct perf_session *session) 316 { 317 struct thread *thread = perf_session__findnew(session, event->ip.pid); 318 319 if (thread == NULL) { 320 pr_debug("problem processing %d event, skipping it.\n", 321 event->header.type); 322 return -1; 323 } 324 325 dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); 326 327 process_raw_event(event, sample->raw_data, sample->cpu, 328 sample->time, thread); 329 330 return 0; 331 } 332 333 static struct perf_event_ops event_ops = { 334 .sample = process_sample_event, 335 .comm = perf_event__process_comm, 336 .ordered_samples = true, 337 }; 338 339 static double fragmentation(unsigned long n_req, unsigned long n_alloc) 340 { 341 if (n_alloc == 0) 342 return 0.0; 343 else 344 return 100.0 - (100.0 * n_req / n_alloc); 345 } 346 347 static void __print_result(struct rb_root *root, struct perf_session *session, 348 int n_lines, int is_caller) 349 { 350 struct rb_node *next; 351 struct machine *machine; 352 353 printf("%.102s\n", graph_dotted_line); 354 printf(" %-34s |", is_caller ? "Callsite": "Alloc Ptr"); 355 printf(" Total_alloc/Per | Total_req/Per | Hit | Ping-pong | Frag\n"); 356 printf("%.102s\n", graph_dotted_line); 357 358 next = rb_first(root); 359 360 machine = perf_session__find_host_machine(session); 361 if (!machine) { 362 pr_err("__print_result: couldn't find kernel information\n"); 363 return; 364 } 365 while (next && n_lines--) { 366 struct alloc_stat *data = rb_entry(next, struct alloc_stat, 367 node); 368 struct symbol *sym = NULL; 369 struct map *map; 370 char buf[BUFSIZ]; 371 u64 addr; 372 373 if (is_caller) { 374 addr = data->call_site; 375 if (!raw_ip) 376 sym = machine__find_kernel_function(machine, addr, &map, NULL); 377 } else 378 addr = data->ptr; 379 380 if (sym != NULL) 381 snprintf(buf, sizeof(buf), "%s+%" PRIx64 "", sym->name, 382 addr - map->unmap_ip(map, sym->start)); 383 else 384 snprintf(buf, sizeof(buf), "%#" PRIx64 "", addr); 385 printf(" %-34s |", buf); 386 387 printf(" %9llu/%-5lu | %9llu/%-5lu | %8lu | %8lu | %6.3f%%\n", 388 (unsigned long long)data->bytes_alloc, 389 (unsigned long)data->bytes_alloc / data->hit, 390 (unsigned long long)data->bytes_req, 391 (unsigned long)data->bytes_req / data->hit, 392 (unsigned long)data->hit, 393 (unsigned long)data->pingpong, 394 fragmentation(data->bytes_req, data->bytes_alloc)); 395 396 next = rb_next(next); 397 } 398 399 if (n_lines == -1) 400 printf(" ... | ... | ... | ... | ... | ... \n"); 401 402 printf("%.102s\n", graph_dotted_line); 403 } 404 405 static void print_summary(void) 406 { 407 printf("\nSUMMARY\n=======\n"); 408 printf("Total bytes requested: %lu\n", total_requested); 409 printf("Total bytes allocated: %lu\n", total_allocated); 410 printf("Total bytes wasted on internal fragmentation: %lu\n", 411 total_allocated - total_requested); 412 printf("Internal fragmentation: %f%%\n", 413 fragmentation(total_requested, total_allocated)); 414 printf("Cross CPU allocations: %lu/%lu\n", nr_cross_allocs, nr_allocs); 415 } 416 417 static void print_result(struct perf_session *session) 418 { 419 if (caller_flag) 420 __print_result(&root_caller_sorted, session, caller_lines, 1); 421 if (alloc_flag) 422 __print_result(&root_alloc_sorted, session, alloc_lines, 0); 423 print_summary(); 424 } 425 426 struct sort_dimension { 427 const char name[20]; 428 sort_fn_t cmp; 429 struct list_head list; 430 }; 431 432 static LIST_HEAD(caller_sort); 433 static LIST_HEAD(alloc_sort); 434 435 static void sort_insert(struct rb_root *root, struct alloc_stat *data, 436 struct list_head *sort_list) 437 { 438 struct rb_node **new = &(root->rb_node); 439 struct rb_node *parent = NULL; 440 struct sort_dimension *sort; 441 442 while (*new) { 443 struct alloc_stat *this; 444 int cmp = 0; 445 446 this = rb_entry(*new, struct alloc_stat, node); 447 parent = *new; 448 449 list_for_each_entry(sort, sort_list, list) { 450 cmp = sort->cmp(data, this); 451 if (cmp) 452 break; 453 } 454 455 if (cmp > 0) 456 new = &((*new)->rb_left); 457 else 458 new = &((*new)->rb_right); 459 } 460 461 rb_link_node(&data->node, parent, new); 462 rb_insert_color(&data->node, root); 463 } 464 465 static void __sort_result(struct rb_root *root, struct rb_root *root_sorted, 466 struct list_head *sort_list) 467 { 468 struct rb_node *node; 469 struct alloc_stat *data; 470 471 for (;;) { 472 node = rb_first(root); 473 if (!node) 474 break; 475 476 rb_erase(node, root); 477 data = rb_entry(node, struct alloc_stat, node); 478 sort_insert(root_sorted, data, sort_list); 479 } 480 } 481 482 static void sort_result(void) 483 { 484 __sort_result(&root_alloc_stat, &root_alloc_sorted, &alloc_sort); 485 __sort_result(&root_caller_stat, &root_caller_sorted, &caller_sort); 486 } 487 488 static int __cmd_kmem(void) 489 { 490 int err = -EINVAL; 491 struct perf_session *session = perf_session__new(input_name, O_RDONLY, 492 0, false, &event_ops); 493 if (session == NULL) 494 return -ENOMEM; 495 496 if (perf_session__create_kernel_maps(session) < 0) 497 goto out_delete; 498 499 if (!perf_session__has_traces(session, "kmem record")) 500 goto out_delete; 501 502 setup_pager(); 503 err = perf_session__process_events(session, &event_ops); 504 if (err != 0) 505 goto out_delete; 506 sort_result(); 507 print_result(session); 508 out_delete: 509 perf_session__delete(session); 510 return err; 511 } 512 513 static const char * const kmem_usage[] = { 514 "perf kmem [<options>] {record|stat}", 515 NULL 516 }; 517 518 static int ptr_cmp(struct alloc_stat *l, struct alloc_stat *r) 519 { 520 if (l->ptr < r->ptr) 521 return -1; 522 else if (l->ptr > r->ptr) 523 return 1; 524 return 0; 525 } 526 527 static struct sort_dimension ptr_sort_dimension = { 528 .name = "ptr", 529 .cmp = ptr_cmp, 530 }; 531 532 static int callsite_cmp(struct alloc_stat *l, struct alloc_stat *r) 533 { 534 if (l->call_site < r->call_site) 535 return -1; 536 else if (l->call_site > r->call_site) 537 return 1; 538 return 0; 539 } 540 541 static struct sort_dimension callsite_sort_dimension = { 542 .name = "callsite", 543 .cmp = callsite_cmp, 544 }; 545 546 static int hit_cmp(struct alloc_stat *l, struct alloc_stat *r) 547 { 548 if (l->hit < r->hit) 549 return -1; 550 else if (l->hit > r->hit) 551 return 1; 552 return 0; 553 } 554 555 static struct sort_dimension hit_sort_dimension = { 556 .name = "hit", 557 .cmp = hit_cmp, 558 }; 559 560 static int bytes_cmp(struct alloc_stat *l, struct alloc_stat *r) 561 { 562 if (l->bytes_alloc < r->bytes_alloc) 563 return -1; 564 else if (l->bytes_alloc > r->bytes_alloc) 565 return 1; 566 return 0; 567 } 568 569 static struct sort_dimension bytes_sort_dimension = { 570 .name = "bytes", 571 .cmp = bytes_cmp, 572 }; 573 574 static int frag_cmp(struct alloc_stat *l, struct alloc_stat *r) 575 { 576 double x, y; 577 578 x = fragmentation(l->bytes_req, l->bytes_alloc); 579 y = fragmentation(r->bytes_req, r->bytes_alloc); 580 581 if (x < y) 582 return -1; 583 else if (x > y) 584 return 1; 585 return 0; 586 } 587 588 static struct sort_dimension frag_sort_dimension = { 589 .name = "frag", 590 .cmp = frag_cmp, 591 }; 592 593 static int pingpong_cmp(struct alloc_stat *l, struct alloc_stat *r) 594 { 595 if (l->pingpong < r->pingpong) 596 return -1; 597 else if (l->pingpong > r->pingpong) 598 return 1; 599 return 0; 600 } 601 602 static struct sort_dimension pingpong_sort_dimension = { 603 .name = "pingpong", 604 .cmp = pingpong_cmp, 605 }; 606 607 static struct sort_dimension *avail_sorts[] = { 608 &ptr_sort_dimension, 609 &callsite_sort_dimension, 610 &hit_sort_dimension, 611 &bytes_sort_dimension, 612 &frag_sort_dimension, 613 &pingpong_sort_dimension, 614 }; 615 616 #define NUM_AVAIL_SORTS \ 617 (int)(sizeof(avail_sorts) / sizeof(struct sort_dimension *)) 618 619 static int sort_dimension__add(const char *tok, struct list_head *list) 620 { 621 struct sort_dimension *sort; 622 int i; 623 624 for (i = 0; i < NUM_AVAIL_SORTS; i++) { 625 if (!strcmp(avail_sorts[i]->name, tok)) { 626 sort = malloc(sizeof(*sort)); 627 if (!sort) 628 die("malloc"); 629 memcpy(sort, avail_sorts[i], sizeof(*sort)); 630 list_add_tail(&sort->list, list); 631 return 0; 632 } 633 } 634 635 return -1; 636 } 637 638 static int setup_sorting(struct list_head *sort_list, const char *arg) 639 { 640 char *tok; 641 char *str = strdup(arg); 642 643 if (!str) 644 die("strdup"); 645 646 while (true) { 647 tok = strsep(&str, ","); 648 if (!tok) 649 break; 650 if (sort_dimension__add(tok, sort_list) < 0) { 651 error("Unknown --sort key: '%s'", tok); 652 return -1; 653 } 654 } 655 656 free(str); 657 return 0; 658 } 659 660 static int parse_sort_opt(const struct option *opt __used, 661 const char *arg, int unset __used) 662 { 663 if (!arg) 664 return -1; 665 666 if (caller_flag > alloc_flag) 667 return setup_sorting(&caller_sort, arg); 668 else 669 return setup_sorting(&alloc_sort, arg); 670 671 return 0; 672 } 673 674 static int parse_caller_opt(const struct option *opt __used, 675 const char *arg __used, int unset __used) 676 { 677 caller_flag = (alloc_flag + 1); 678 return 0; 679 } 680 681 static int parse_alloc_opt(const struct option *opt __used, 682 const char *arg __used, int unset __used) 683 { 684 alloc_flag = (caller_flag + 1); 685 return 0; 686 } 687 688 static int parse_line_opt(const struct option *opt __used, 689 const char *arg, int unset __used) 690 { 691 int lines; 692 693 if (!arg) 694 return -1; 695 696 lines = strtoul(arg, NULL, 10); 697 698 if (caller_flag > alloc_flag) 699 caller_lines = lines; 700 else 701 alloc_lines = lines; 702 703 return 0; 704 } 705 706 static const struct option kmem_options[] = { 707 OPT_STRING('i', "input", &input_name, "file", 708 "input file name"), 709 OPT_CALLBACK_NOOPT(0, "caller", NULL, NULL, 710 "show per-callsite statistics", 711 parse_caller_opt), 712 OPT_CALLBACK_NOOPT(0, "alloc", NULL, NULL, 713 "show per-allocation statistics", 714 parse_alloc_opt), 715 OPT_CALLBACK('s', "sort", NULL, "key[,key2...]", 716 "sort by keys: ptr, call_site, bytes, hit, pingpong, frag", 717 parse_sort_opt), 718 OPT_CALLBACK('l', "line", NULL, "num", 719 "show n lines", 720 parse_line_opt), 721 OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"), 722 OPT_END() 723 }; 724 725 static const char *record_args[] = { 726 "record", 727 "-a", 728 "-R", 729 "-f", 730 "-c", "1", 731 "-e", "kmem:kmalloc", 732 "-e", "kmem:kmalloc_node", 733 "-e", "kmem:kfree", 734 "-e", "kmem:kmem_cache_alloc", 735 "-e", "kmem:kmem_cache_alloc_node", 736 "-e", "kmem:kmem_cache_free", 737 }; 738 739 static int __cmd_record(int argc, const char **argv) 740 { 741 unsigned int rec_argc, i, j; 742 const char **rec_argv; 743 744 rec_argc = ARRAY_SIZE(record_args) + argc - 1; 745 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 746 747 if (rec_argv == NULL) 748 return -ENOMEM; 749 750 for (i = 0; i < ARRAY_SIZE(record_args); i++) 751 rec_argv[i] = strdup(record_args[i]); 752 753 for (j = 1; j < (unsigned int)argc; j++, i++) 754 rec_argv[i] = argv[j]; 755 756 return cmd_record(i, rec_argv, NULL); 757 } 758 759 int cmd_kmem(int argc, const char **argv, const char *prefix __used) 760 { 761 argc = parse_options(argc, argv, kmem_options, kmem_usage, 0); 762 763 if (!argc) 764 usage_with_options(kmem_usage, kmem_options); 765 766 symbol__init(); 767 768 if (!strncmp(argv[0], "rec", 3)) { 769 return __cmd_record(argc, argv); 770 } else if (!strcmp(argv[0], "stat")) { 771 setup_cpunode_map(); 772 773 if (list_empty(&caller_sort)) 774 setup_sorting(&caller_sort, default_sort_order); 775 if (list_empty(&alloc_sort)) 776 setup_sorting(&alloc_sort, default_sort_order); 777 778 return __cmd_kmem(); 779 } else 780 usage_with_options(kmem_usage, kmem_options); 781 782 return 0; 783 } 784