1 /* cg_print.c - Print routines for displaying call graphs. 2 3 Copyright (C) 2000-2014 Free Software Foundation, Inc. 4 5 This file is part of GNU Binutils. 6 7 This program is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3 of the License, or 10 (at your option) any later version. 11 12 This program is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with this program; if not, write to the Free Software 19 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 20 02110-1301, USA. */ 21 22 #include "gprof.h" 24 #include "libiberty.h" 25 #include "filenames.h" 26 #include "search_list.h" 27 #include "source.h" 28 #include "symtab.h" 29 #include "cg_arcs.h" 30 #include "cg_print.h" 31 #include "hist.h" 32 #include "utils.h" 33 #include "corefile.h" 34 35 /* Return value of comparison functions used to sort tables. */ 36 #define LESSTHAN -1 37 #define EQUALTO 0 38 #define GREATERTHAN 1 39 40 static void print_header (void); 41 static void print_cycle (Sym *); 42 static int cmp_member (Sym *, Sym *); 43 static void sort_members (Sym *); 44 static void print_members (Sym *); 45 static int cmp_arc (Arc *, Arc *); 46 static void sort_parents (Sym *); 47 static void print_parents (Sym *); 48 static void sort_children (Sym *); 49 static void print_children (Sym *); 50 static void print_line (Sym *); 51 static int cmp_name (const PTR, const PTR); 52 static int cmp_arc_count (const PTR, const PTR); 53 static int cmp_fun_nuses (const PTR, const PTR); 54 static void order_and_dump_functions_by_arcs 55 (Arc **, unsigned long, int, Arc **, unsigned long *); 56 57 /* Declarations of automatically generated functions to output blurbs. */ 58 extern void bsd_callg_blurb (FILE * fp); 59 extern void fsf_callg_blurb (FILE * fp); 60 61 double print_time = 0.0; 62 63 64 static void 65 print_header () 66 { 67 if (first_output) 68 first_output = FALSE; 69 else 70 printf ("\f\n"); 71 72 if (!bsd_style_output) 73 { 74 if (print_descriptions) 75 printf (_("\t\t Call graph (explanation follows)\n\n")); 76 else 77 printf (_("\t\t\tCall graph\n\n")); 78 } 79 80 printf (_("\ngranularity: each sample hit covers %ld byte(s)"), 81 (long) hist_scale * (long) sizeof (UNIT)); 82 83 if (print_time > 0.0) 84 printf (_(" for %.2f%% of %.2f seconds\n\n"), 85 100.0 / print_time, print_time / hz); 86 else 87 { 88 printf (_(" no time propagated\n\n")); 89 90 /* This doesn't hurt, since all the numerators will be 0.0. */ 91 print_time = 1.0; 92 } 93 94 if (bsd_style_output) 95 { 96 printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n", 97 "", "", "", "", _("called"), _("total"), _("parents")); 98 printf ("%-6.6s %5.5s %7.7s %11.11s %7.7s+%-7.7s %-8.8s\t%5.5s\n", 99 _("index"), 100 /* xgettext:no-c-format */ 101 _("%time"), 102 _("self"), _("descendants"), _("called"), _("self"), 103 _("name"), _("index")); 104 printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n", 105 "", "", "", "", _("called"), _("total"), _("children")); 106 printf ("\n"); 107 } 108 else 109 { 110 printf (_("index %% time self children called name\n")); 111 } 112 } 113 114 /* Print a cycle header. */ 115 116 static void 117 print_cycle (Sym *cyc) 118 { 119 char buf[BUFSIZ]; 120 121 sprintf (buf, "[%d]", cyc->cg.index); 122 printf (bsd_style_output 123 ? "%-6.6s %5.1f %7.2f %11.2f %7lu" 124 : "%-6.6s %5.1f %7.2f %7.2f %7lu", buf, 125 100 * (cyc->cg.prop.self + cyc->cg.prop.child) / print_time, 126 cyc->cg.prop.self / hz, cyc->cg.prop.child / hz, cyc->ncalls); 127 128 if (cyc->cg.self_calls != 0) 129 printf ("+%-7lu", cyc->cg.self_calls); 130 else 131 printf (" %7.7s", ""); 132 133 printf (_(" <cycle %d as a whole> [%d]\n"), cyc->cg.cyc.num, cyc->cg.index); 134 } 135 136 /* Compare LEFT and RIGHT membmer. Major comparison key is 137 CG.PROP.SELF+CG.PROP.CHILD, secondary key is NCALLS+CG.SELF_CALLS. */ 138 139 static int 140 cmp_member (Sym *left, Sym *right) 141 { 142 double left_time = left->cg.prop.self + left->cg.prop.child; 143 double right_time = right->cg.prop.self + right->cg.prop.child; 144 unsigned long left_calls = left->ncalls + left->cg.self_calls; 145 unsigned long right_calls = right->ncalls + right->cg.self_calls; 146 147 if (left_time > right_time) 148 return GREATERTHAN; 149 150 if (left_time < right_time) 151 return LESSTHAN; 152 153 if (left_calls > right_calls) 154 return GREATERTHAN; 155 156 if (left_calls < right_calls) 157 return LESSTHAN; 158 159 return EQUALTO; 160 } 161 162 /* Sort members of a cycle. */ 163 164 static void 165 sort_members (Sym *cyc) 166 { 167 Sym *todo, *doing, *prev; 168 169 /* Detach cycle members from cyclehead, 170 and insertion sort them back on. */ 171 todo = cyc->cg.cyc.next; 172 cyc->cg.cyc.next = 0; 173 174 for (doing = todo; doing != NULL; doing = todo) 175 { 176 todo = doing->cg.cyc.next; 177 178 for (prev = cyc; prev->cg.cyc.next; prev = prev->cg.cyc.next) 179 { 180 if (cmp_member (doing, prev->cg.cyc.next) == GREATERTHAN) 181 break; 182 } 183 184 doing->cg.cyc.next = prev->cg.cyc.next; 185 prev->cg.cyc.next = doing; 186 } 187 } 188 189 /* Print the members of a cycle. */ 190 191 static void 192 print_members (Sym *cyc) 193 { 194 Sym *member; 195 196 sort_members (cyc); 197 198 for (member = cyc->cg.cyc.next; member; member = member->cg.cyc.next) 199 { 200 printf (bsd_style_output 201 ? "%6.6s %5.5s %7.2f %11.2f %7lu" 202 : "%6.6s %5.5s %7.2f %7.2f %7lu", 203 "", "", member->cg.prop.self / hz, member->cg.prop.child / hz, 204 member->ncalls); 205 206 if (member->cg.self_calls != 0) 207 printf ("+%-7lu", member->cg.self_calls); 208 else 209 printf (" %7.7s", ""); 210 211 printf (" "); 212 print_name (member); 213 printf ("\n"); 214 } 215 } 216 217 /* Compare two arcs to/from the same child/parent. 218 - if one arc is a self arc, it's least. 219 - if one arc is within a cycle, it's less than. 220 - if both arcs are within a cycle, compare arc counts. 221 - if neither arc is within a cycle, compare with 222 time + child_time as major key 223 arc count as minor key. */ 224 225 static int 226 cmp_arc (Arc *left, Arc *right) 227 { 228 Sym *left_parent = left->parent; 229 Sym *left_child = left->child; 230 Sym *right_parent = right->parent; 231 Sym *right_child = right->child; 232 double left_time, right_time; 233 234 DBG (TIMEDEBUG, 235 printf ("[cmp_arc] "); 236 print_name (left_parent); 237 printf (" calls "); 238 print_name (left_child); 239 printf (" %f + %f %lu/%lu\n", left->time, left->child_time, 240 left->count, left_child->ncalls); 241 printf ("[cmp_arc] "); 242 print_name (right_parent); 243 printf (" calls "); 244 print_name (right_child); 245 printf (" %f + %f %lu/%lu\n", right->time, right->child_time, 246 right->count, right_child->ncalls); 247 printf ("\n"); 248 ); 249 250 if (left_parent == left_child) 251 return LESSTHAN; /* Left is a self call. */ 252 253 if (right_parent == right_child) 254 return GREATERTHAN; /* Right is a self call. */ 255 256 if (left_parent->cg.cyc.num != 0 && left_child->cg.cyc.num != 0 257 && left_parent->cg.cyc.num == left_child->cg.cyc.num) 258 { 259 /* Left is a call within a cycle. */ 260 if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0 261 && right_parent->cg.cyc.num == right_child->cg.cyc.num) 262 { 263 /* Right is a call within the cycle, too. */ 264 if (left->count < right->count) 265 return LESSTHAN; 266 267 if (left->count > right->count) 268 return GREATERTHAN; 269 270 return EQUALTO; 271 } 272 else 273 { 274 /* Right isn't a call within the cycle. */ 275 return LESSTHAN; 276 } 277 } 278 else 279 { 280 /* Left isn't a call within a cycle. */ 281 if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0 282 && right_parent->cg.cyc.num == right_child->cg.cyc.num) 283 { 284 /* Right is a call within a cycle. */ 285 return GREATERTHAN; 286 } 287 else 288 { 289 /* Neither is a call within a cycle. */ 290 left_time = left->time + left->child_time; 291 right_time = right->time + right->child_time; 292 293 if (left_time < right_time) 294 return LESSTHAN; 295 296 if (left_time > right_time) 297 return GREATERTHAN; 298 299 if (left->count < right->count) 300 return LESSTHAN; 301 302 if (left->count > right->count) 303 return GREATERTHAN; 304 305 return EQUALTO; 306 } 307 } 308 } 309 310 311 static void 312 sort_parents (Sym * child) 313 { 314 Arc *arc, *detached, sorted, *prev; 315 316 /* Unlink parents from child, then insertion sort back on to 317 sorted's parents. 318 *arc the arc you have detached and are inserting. 319 *detached the rest of the arcs to be sorted. 320 sorted arc list onto which you insertion sort. 321 *prev arc before the arc you are comparing. */ 322 sorted.next_parent = 0; 323 324 for (arc = child->cg.parents; arc; arc = detached) 325 { 326 detached = arc->next_parent; 327 328 /* Consider *arc as disconnected; insert it into sorted. */ 329 for (prev = &sorted; prev->next_parent; prev = prev->next_parent) 330 { 331 if (cmp_arc (arc, prev->next_parent) != GREATERTHAN) 332 break; 333 } 334 335 arc->next_parent = prev->next_parent; 336 prev->next_parent = arc; 337 } 338 339 /* Reattach sorted arcs to child. */ 340 child->cg.parents = sorted.next_parent; 341 } 342 343 344 static void 345 print_parents (Sym *child) 346 { 347 Sym *parent; 348 Arc *arc; 349 Sym *cycle_head; 350 351 if (child->cg.cyc.head != 0) 352 cycle_head = child->cg.cyc.head; 353 else 354 cycle_head = child; 355 356 if (!child->cg.parents) 357 { 358 printf (bsd_style_output 359 ? _("%6.6s %5.5s %7.7s %11.11s %7.7s %7.7s <spontaneous>\n") 360 : _("%6.6s %5.5s %7.7s %7.7s %7.7s %7.7s <spontaneous>\n"), 361 "", "", "", "", "", ""); 362 return; 363 } 364 365 sort_parents (child); 366 367 for (arc = child->cg.parents; arc; arc = arc->next_parent) 368 { 369 parent = arc->parent; 370 if (child == parent || (child->cg.cyc.num != 0 371 && parent->cg.cyc.num == child->cg.cyc.num)) 372 { 373 /* Selfcall or call among siblings. */ 374 printf (bsd_style_output 375 ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s " 376 : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s ", 377 "", "", "", "", 378 arc->count, ""); 379 print_name (parent); 380 printf ("\n"); 381 } 382 else 383 { 384 /* Regular parent of child. */ 385 printf (bsd_style_output 386 ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu " 387 : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu ", 388 "", "", 389 arc->time / hz, arc->child_time / hz, 390 arc->count, cycle_head->ncalls); 391 print_name (parent); 392 printf ("\n"); 393 } 394 } 395 } 396 397 398 static void 399 sort_children (Sym *parent) 400 { 401 Arc *arc, *detached, sorted, *prev; 402 403 /* Unlink children from parent, then insertion sort back on to 404 sorted's children. 405 *arc the arc you have detached and are inserting. 406 *detached the rest of the arcs to be sorted. 407 sorted arc list onto which you insertion sort. 408 *prev arc before the arc you are comparing. */ 409 sorted.next_child = 0; 410 411 for (arc = parent->cg.children; arc; arc = detached) 412 { 413 detached = arc->next_child; 414 415 /* Consider *arc as disconnected; insert it into sorted. */ 416 for (prev = &sorted; prev->next_child; prev = prev->next_child) 417 { 418 if (cmp_arc (arc, prev->next_child) != LESSTHAN) 419 break; 420 } 421 422 arc->next_child = prev->next_child; 423 prev->next_child = arc; 424 } 425 426 /* Reattach sorted children to parent. */ 427 parent->cg.children = sorted.next_child; 428 } 429 430 431 static void 432 print_children (Sym *parent) 433 { 434 Sym *child; 435 Arc *arc; 436 437 sort_children (parent); 438 arc = parent->cg.children; 439 440 for (arc = parent->cg.children; arc; arc = arc->next_child) 441 { 442 child = arc->child; 443 if (child == parent || (child->cg.cyc.num != 0 444 && child->cg.cyc.num == parent->cg.cyc.num)) 445 { 446 /* Self call or call to sibling. */ 447 printf (bsd_style_output 448 ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s " 449 : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s ", 450 "", "", "", "", arc->count, ""); 451 print_name (child); 452 printf ("\n"); 453 } 454 else 455 { 456 /* Regular child of parent. */ 457 printf (bsd_style_output 458 ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu " 459 : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu ", 460 "", "", 461 arc->time / hz, arc->child_time / hz, 462 arc->count, child->cg.cyc.head->ncalls); 463 print_name (child); 464 printf ("\n"); 465 } 466 } 467 } 468 469 470 static void 471 print_line (Sym *np) 472 { 473 char buf[BUFSIZ]; 474 475 sprintf (buf, "[%d]", np->cg.index); 476 printf (bsd_style_output 477 ? "%-6.6s %5.1f %7.2f %11.2f" 478 : "%-6.6s %5.1f %7.2f %7.2f", buf, 479 100 * (np->cg.prop.self + np->cg.prop.child) / print_time, 480 np->cg.prop.self / hz, np->cg.prop.child / hz); 481 482 if ((np->ncalls + np->cg.self_calls) != 0) 483 { 484 printf (" %7lu", np->ncalls); 485 486 if (np->cg.self_calls != 0) 487 printf ("+%-7lu ", np->cg.self_calls); 488 else 489 printf (" %7.7s ", ""); 490 } 491 else 492 { 493 printf (" %7.7s %7.7s ", "", ""); 494 } 495 496 print_name (np); 497 printf ("\n"); 498 } 499 500 501 /* Print dynamic call graph. */ 502 503 void 504 cg_print (Sym ** timesortsym) 505 { 506 unsigned int sym_index; 507 Sym *parent; 508 509 if (print_descriptions && bsd_style_output) 510 bsd_callg_blurb (stdout); 511 512 print_header (); 513 514 for (sym_index = 0; sym_index < symtab.len + num_cycles; ++sym_index) 515 { 516 parent = timesortsym[sym_index]; 517 518 if ((ignore_zeros && parent->ncalls == 0 519 && parent->cg.self_calls == 0 && parent->cg.prop.self == 0 520 && parent->cg.prop.child == 0) 521 || !parent->cg.print_flag 522 || (line_granularity && ! parent->is_func)) 523 continue; 524 525 if (!parent->name && parent->cg.cyc.num != 0) 526 { 527 /* Cycle header. */ 528 print_cycle (parent); 529 print_members (parent); 530 } 531 else 532 { 533 print_parents (parent); 534 print_line (parent); 535 print_children (parent); 536 } 537 538 if (bsd_style_output) 539 printf ("\n"); 540 541 printf ("-----------------------------------------------\n"); 542 543 if (bsd_style_output) 544 printf ("\n"); 545 } 546 547 free (timesortsym); 548 549 if (print_descriptions && !bsd_style_output) 550 fsf_callg_blurb (stdout); 551 } 552 553 554 static int 555 cmp_name (const PTR left, const PTR right) 556 { 557 const Sym **npp1 = (const Sym **) left; 558 const Sym **npp2 = (const Sym **) right; 559 560 return strcmp ((*npp1)->name, (*npp2)->name); 561 } 562 563 564 void 565 cg_print_index () 566 { 567 unsigned int sym_index; 568 unsigned int nnames, todo, i, j; 569 int col, starting_col; 570 Sym **name_sorted_syms, *sym; 571 const char *filename; 572 char buf[20]; 573 int column_width = (output_width - 1) / 3; /* Don't write in last col! */ 574 575 /* Now, sort regular function name 576 alphabetically to create an index. */ 577 name_sorted_syms = (Sym **) xmalloc ((symtab.len + num_cycles) * sizeof (Sym *)); 578 579 for (sym_index = 0, nnames = 0; sym_index < symtab.len; sym_index++) 580 { 581 if (ignore_zeros && symtab.base[sym_index].ncalls == 0 582 && symtab.base[sym_index].hist.time == 0) 583 continue; 584 585 name_sorted_syms[nnames++] = &symtab.base[sym_index]; 586 } 587 588 qsort (name_sorted_syms, nnames, sizeof (Sym *), cmp_name); 589 590 for (sym_index = 1, todo = nnames; sym_index <= num_cycles; sym_index++) 591 name_sorted_syms[todo++] = &cycle_header[sym_index]; 592 593 printf ("\f\n"); 594 printf (_("Index by function name\n\n")); 595 sym_index = (todo + 2) / 3; 596 597 for (i = 0; i < sym_index; i++) 598 { 599 col = 0; 600 starting_col = 0; 601 602 for (j = i; j < todo; j += sym_index) 603 { 604 sym = name_sorted_syms[j]; 605 606 if (sym->cg.print_flag) 607 sprintf (buf, "[%d]", sym->cg.index); 608 else 609 sprintf (buf, "(%d)", sym->cg.index); 610 611 if (j < nnames) 612 { 613 if (bsd_style_output) 614 { 615 printf ("%6.6s %-19.19s", buf, sym->name); 616 } 617 else 618 { 619 col += strlen (buf); 620 621 for (; col < starting_col + 5; ++col) 622 putchar (' '); 623 624 printf (" %s ", buf); 625 col += print_name_only (sym); 626 627 if (!line_granularity && sym->is_static && sym->file) 628 { 629 filename = sym->file->name; 630 631 if (!print_path) 632 { 633 filename = strrchr (filename, '/'); 634 635 if (filename) 636 ++filename; 637 else 638 filename = sym->file->name; 639 } 640 641 printf (" (%s)", filename); 642 col += strlen (filename) + 3; 643 } 644 } 645 } 646 else 647 { 648 if (bsd_style_output) 649 { 650 printf ("%6.6s ", buf); 651 sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num); 652 printf ("%-19.19s", buf); 653 } 654 else 655 { 656 col += strlen (buf); 657 for (; col < starting_col + 5; ++col) 658 putchar (' '); 659 printf (" %s ", buf); 660 sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num); 661 printf ("%s", buf); 662 col += strlen (buf); 663 } 664 } 665 666 starting_col += column_width; 667 } 668 669 printf ("\n"); 670 } 671 672 free (name_sorted_syms); 673 } 674 675 /* Compare two arcs based on their usage counts. 676 We want to sort in descending order. */ 677 678 static int 679 cmp_arc_count (const PTR left, const PTR right) 680 { 681 const Arc **npp1 = (const Arc **) left; 682 const Arc **npp2 = (const Arc **) right; 683 684 if ((*npp1)->count > (*npp2)->count) 685 return -1; 686 else if ((*npp1)->count < (*npp2)->count) 687 return 1; 688 else 689 return 0; 690 } 691 692 /* Compare two funtions based on their usage counts. 693 We want to sort in descending order. */ 694 695 static int 696 cmp_fun_nuses (const PTR left, const PTR right) 697 { 698 const Sym **npp1 = (const Sym **) left; 699 const Sym **npp2 = (const Sym **) right; 700 701 if ((*npp1)->nuses > (*npp2)->nuses) 702 return -1; 703 else if ((*npp1)->nuses < (*npp2)->nuses) 704 return 1; 705 else 706 return 0; 707 } 708 709 /* Print a suggested function ordering based on the profiling data. 710 711 We perform 4 major steps when ordering functions: 712 713 * Group unused functions together and place them at the 714 end of the function order. 715 716 * Search the highest use arcs (those which account for 90% of 717 the total arc count) for functions which have several parents. 718 719 Group those with the most call sites together (currently the 720 top 1.25% which have at least five different call sites). 721 722 These are emitted at the start of the function order. 723 724 * Use a greedy placement algorithm to place functions which 725 occur in the top 99% of the arcs in the profile. Some provisions 726 are made to handle high usage arcs where the parent and/or 727 child has already been placed. 728 729 * Run the same greedy placement algorithm on the remaining 730 arcs to place the leftover functions. 731 732 733 The various "magic numbers" should (one day) be tuneable by command 734 line options. They were arrived at by benchmarking a few applications 735 with various values to see which values produced better overall function 736 orderings. 737 738 Of course, profiling errors, machine limitations (PA long calls), and 739 poor cutoff values for the placement algorithm may limit the usefullness 740 of the resulting function order. Improvements would be greatly appreciated. 741 742 Suggestions: 743 744 * Place the functions with many callers near the middle of the 745 list to reduce long calls. 746 747 * Propagate arc usage changes as functions are placed. Ie if 748 func1 and func2 are placed together, arcs to/from those arcs 749 to the same parent/child should be combined, then resort the 750 arcs to choose the next one. 751 752 * Implement some global positioning algorithm to place the 753 chains made by the greedy local positioning algorithm. Probably 754 by examining arcs which haven't been placed yet to tie two 755 chains together. 756 757 * Take a function's size and time into account in the algorithm; 758 size in particular is important on the PA (long calls). Placing 759 many small functions onto their own page may be wise. 760 761 * Use better profiling information; many published algorithms 762 are based on call sequences through time, rather than just 763 arc counts. 764 765 * Prodecure cloning could improve performance when a small number 766 of arcs account for most of the calls to a particular function. 767 768 * Use relocation information to avoid moving unused functions 769 completely out of the code stream; this would avoid severe lossage 770 when the profile data bears little resemblance to actual runs. 771 772 * Propagation of arc usages should also improve .o link line 773 ordering which shares the same arc placement algorithm with 774 the function ordering code (in fact it is a degenerate case 775 of function ordering). */ 776 777 void 778 cg_print_function_ordering (void) 779 { 780 unsigned long sym_index; 781 unsigned long arc_index; 782 unsigned long used, unused, scratch_index; 783 unsigned long unplaced_arc_count, high_arc_count, scratch_arc_count; 784 #ifdef __GNUC__ 785 unsigned long long total_arcs, tmp_arcs_count; 786 #else 787 unsigned long total_arcs, tmp_arcs_count; 788 #endif 789 Sym **unused_syms, **used_syms, **scratch_syms; 790 Arc **unplaced_arcs, **high_arcs, **scratch_arcs; 791 792 sym_index = 0; 793 used = 0; 794 unused = 0; 795 scratch_index = 0; 796 unplaced_arc_count = 0; 797 high_arc_count = 0; 798 scratch_arc_count = 0; 799 800 /* First group all the unused functions together. */ 801 unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *)); 802 used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *)); 803 scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *)); 804 high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *)); 805 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *)); 806 unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *)); 807 808 /* Walk through all the functions; mark those which are never 809 called as placed (we'll emit them as a group later). */ 810 for (sym_index = 0, used = 0, unused = 0; sym_index < symtab.len; sym_index++) 811 { 812 if (symtab.base[sym_index].ncalls == 0) 813 { 814 unused_syms[unused++] = &symtab.base[sym_index]; 815 symtab.base[sym_index].has_been_placed = 1; 816 } 817 else 818 { 819 used_syms[used++] = &symtab.base[sym_index]; 820 symtab.base[sym_index].has_been_placed = 0; 821 symtab.base[sym_index].next = 0; 822 symtab.base[sym_index].prev = 0; 823 symtab.base[sym_index].nuses = 0; 824 } 825 } 826 827 /* Sort the arcs from most used to least used. */ 828 qsort (arcs, numarcs, sizeof (Arc *), cmp_arc_count); 829 830 /* Compute the total arc count. Also mark arcs as unplaced. 831 832 Note we don't compensate for overflow if that happens! 833 Overflow is much less likely when this file is compiled 834 with GCC as it can double-wide integers via long long. */ 835 total_arcs = 0; 836 for (arc_index = 0; arc_index < numarcs; arc_index++) 837 { 838 total_arcs += arcs[arc_index]->count; 839 arcs[arc_index]->has_been_placed = 0; 840 } 841 842 /* We want to pull out those functions which are referenced 843 by many highly used arcs and emit them as a group. This 844 could probably use some tuning. */ 845 tmp_arcs_count = 0; 846 for (arc_index = 0; arc_index < numarcs; arc_index++) 847 { 848 tmp_arcs_count += arcs[arc_index]->count; 849 850 /* Count how many times each parent and child are used up 851 to our threshhold of arcs (90%). */ 852 if ((double)tmp_arcs_count / (double)total_arcs > 0.90) 853 break; 854 855 arcs[arc_index]->child->nuses++; 856 } 857 858 /* Now sort a temporary symbol table based on the number of 859 times each function was used in the highest used arcs. */ 860 memcpy (scratch_syms, used_syms, used * sizeof (Sym *)); 861 qsort (scratch_syms, used, sizeof (Sym *), cmp_fun_nuses); 862 863 /* Now pick out those symbols we're going to emit as 864 a group. We take up to 1.25% of the used symbols. */ 865 for (sym_index = 0; sym_index < used / 80; sym_index++) 866 { 867 Sym *sym = scratch_syms[sym_index]; 868 Arc *arc; 869 870 /* If we hit symbols that aren't used from many call sites, 871 then we can quit. We choose five as the low limit for 872 no particular reason. */ 873 if (sym->nuses == 5) 874 break; 875 876 /* We're going to need the arcs between these functions. 877 Unfortunately, we don't know all these functions 878 until we're done. So we keep track of all the arcs 879 to the functions we care about, then prune out those 880 which are uninteresting. 881 882 An interesting variation would be to quit when we found 883 multi-call site functions which account for some percentage 884 of the arcs. */ 885 arc = sym->cg.children; 886 887 while (arc) 888 { 889 if (arc->parent != arc->child) 890 scratch_arcs[scratch_arc_count++] = arc; 891 arc->has_been_placed = 1; 892 arc = arc->next_child; 893 } 894 895 arc = sym->cg.parents; 896 897 while (arc) 898 { 899 if (arc->parent != arc->child) 900 scratch_arcs[scratch_arc_count++] = arc; 901 arc->has_been_placed = 1; 902 arc = arc->next_parent; 903 } 904 905 /* Keep track of how many symbols we're going to place. */ 906 scratch_index = sym_index; 907 908 /* A lie, but it makes identifying 909 these functions easier later. */ 910 sym->has_been_placed = 1; 911 } 912 913 /* Now walk through the temporary arcs and copy 914 those we care about into the high arcs array. */ 915 for (arc_index = 0; arc_index < scratch_arc_count; arc_index++) 916 { 917 Arc *arc = scratch_arcs[arc_index]; 918 919 /* If this arc refers to highly used functions, then 920 then we want to keep it. */ 921 if (arc->child->has_been_placed 922 && arc->parent->has_been_placed) 923 { 924 high_arcs[high_arc_count++] = scratch_arcs[arc_index]; 925 926 /* We need to turn of has_been_placed since we're going to 927 use the main arc placement algorithm on these arcs. */ 928 arc->child->has_been_placed = 0; 929 arc->parent->has_been_placed = 0; 930 } 931 } 932 933 /* Dump the multi-site high usage functions which are not 934 going to be ordered by the main ordering algorithm. */ 935 for (sym_index = 0; sym_index < scratch_index; sym_index++) 936 { 937 if (scratch_syms[sym_index]->has_been_placed) 938 printf ("%s\n", scratch_syms[sym_index]->name); 939 } 940 941 /* Now we can order the multi-site high use 942 functions based on the arcs between them. */ 943 qsort (high_arcs, high_arc_count, sizeof (Arc *), cmp_arc_count); 944 order_and_dump_functions_by_arcs (high_arcs, high_arc_count, 1, 945 unplaced_arcs, &unplaced_arc_count); 946 947 /* Order and dump the high use functions left, 948 these typically have only a few call sites. */ 949 order_and_dump_functions_by_arcs (arcs, numarcs, 0, 950 unplaced_arcs, &unplaced_arc_count); 951 952 /* Now place the rarely used functions. */ 953 order_and_dump_functions_by_arcs (unplaced_arcs, unplaced_arc_count, 1, 954 scratch_arcs, &scratch_arc_count); 955 956 /* Output any functions not emitted by the order_and_dump calls. */ 957 for (sym_index = 0; sym_index < used; sym_index++) 958 if (used_syms[sym_index]->has_been_placed == 0) 959 printf("%s\n", used_syms[sym_index]->name); 960 961 /* Output the unused functions. */ 962 for (sym_index = 0; sym_index < unused; sym_index++) 963 printf("%s\n", unused_syms[sym_index]->name); 964 965 unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *)); 966 used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *)); 967 scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *)); 968 high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *)); 969 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *)); 970 unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *)); 971 972 free (unused_syms); 973 free (used_syms); 974 free (scratch_syms); 975 free (high_arcs); 976 free (scratch_arcs); 977 free (unplaced_arcs); 978 } 979 980 /* Place functions based on the arcs in THE_ARCS with ARC_COUNT entries; 981 place unused arcs into UNPLACED_ARCS/UNPLACED_ARC_COUNT. 982 983 If ALL is nonzero, then place all functions referenced by THE_ARCS, 984 else only place those referenced in the top 99% of the arcs in THE_ARCS. */ 985 986 #define MOST 0.99 987 static void 988 order_and_dump_functions_by_arcs (the_arcs, arc_count, all, 989 unplaced_arcs, unplaced_arc_count) 990 Arc **the_arcs; 991 unsigned long arc_count; 992 int all; 993 Arc **unplaced_arcs; 994 unsigned long *unplaced_arc_count; 995 { 996 #ifdef __GNUC__ 997 unsigned long long tmp_arcs, total_arcs; 998 #else 999 unsigned long tmp_arcs, total_arcs; 1000 #endif 1001 unsigned int arc_index; 1002 1003 /* If needed, compute the total arc count. 1004 1005 Note we don't compensate for overflow if that happens! */ 1006 if (! all) 1007 { 1008 total_arcs = 0; 1009 for (arc_index = 0; arc_index < arc_count; arc_index++) 1010 total_arcs += the_arcs[arc_index]->count; 1011 } 1012 else 1013 total_arcs = 0; 1014 1015 tmp_arcs = 0; 1016 1017 for (arc_index = 0; arc_index < arc_count; arc_index++) 1018 { 1019 Sym *sym1, *sym2; 1020 Sym *child, *parent; 1021 1022 tmp_arcs += the_arcs[arc_index]->count; 1023 1024 /* Ignore this arc if it's already been placed. */ 1025 if (the_arcs[arc_index]->has_been_placed) 1026 continue; 1027 1028 child = the_arcs[arc_index]->child; 1029 parent = the_arcs[arc_index]->parent; 1030 1031 /* If we're not using all arcs, and this is a rarely used 1032 arc, then put it on the unplaced_arc list. Similarly 1033 if both the parent and child of this arc have been placed. */ 1034 if ((! all && (double)tmp_arcs / (double)total_arcs > MOST) 1035 || child->has_been_placed || parent->has_been_placed) 1036 { 1037 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index]; 1038 continue; 1039 } 1040 1041 /* If all slots in the parent and child are full, then there isn't 1042 anything we can do right now. We'll place this arc on the 1043 unplaced arc list in the hope that a global positioning 1044 algorithm can use it to place function chains. */ 1045 if (parent->next && parent->prev && child->next && child->prev) 1046 { 1047 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index]; 1048 continue; 1049 } 1050 1051 /* If the parent is unattached, then find the closest 1052 place to attach it onto child's chain. Similarly 1053 for the opposite case. */ 1054 if (!parent->next && !parent->prev) 1055 { 1056 int next_count = 0; 1057 int prev_count = 0; 1058 Sym *prev = child; 1059 Sym *next = child; 1060 1061 /* Walk to the beginning and end of the child's chain. */ 1062 while (next->next) 1063 { 1064 next = next->next; 1065 next_count++; 1066 } 1067 1068 while (prev->prev) 1069 { 1070 prev = prev->prev; 1071 prev_count++; 1072 } 1073 1074 /* Choose the closest. */ 1075 child = next_count < prev_count ? next : prev; 1076 } 1077 else if (! child->next && !child->prev) 1078 { 1079 int next_count = 0; 1080 int prev_count = 0; 1081 Sym *prev = parent; 1082 Sym *next = parent; 1083 1084 while (next->next) 1085 { 1086 next = next->next; 1087 next_count++; 1088 } 1089 1090 while (prev->prev) 1091 { 1092 prev = prev->prev; 1093 prev_count++; 1094 } 1095 1096 parent = prev_count < next_count ? prev : next; 1097 } 1098 else 1099 { 1100 /* Couldn't find anywhere to attach the functions, 1101 put the arc on the unplaced arc list. */ 1102 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index]; 1103 continue; 1104 } 1105 1106 /* Make sure we don't tie two ends together. */ 1107 sym1 = parent; 1108 if (sym1->next) 1109 while (sym1->next) 1110 sym1 = sym1->next; 1111 else 1112 while (sym1->prev) 1113 sym1 = sym1->prev; 1114 1115 sym2 = child; 1116 if (sym2->next) 1117 while (sym2->next) 1118 sym2 = sym2->next; 1119 else 1120 while (sym2->prev) 1121 sym2 = sym2->prev; 1122 1123 if (sym1 == child 1124 && sym2 == parent) 1125 { 1126 /* This would tie two ends together. */ 1127 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index]; 1128 continue; 1129 } 1130 1131 if (parent->next) 1132 { 1133 /* Must attach to the parent's prev field. */ 1134 if (! child->next) 1135 { 1136 /* parent-prev and child-next */ 1137 parent->prev = child; 1138 child->next = parent; 1139 the_arcs[arc_index]->has_been_placed = 1; 1140 } 1141 } 1142 else if (parent->prev) 1143 { 1144 /* Must attach to the parent's next field. */ 1145 if (! child->prev) 1146 { 1147 /* parent-next and child-prev */ 1148 parent->next = child; 1149 child->prev = parent; 1150 the_arcs[arc_index]->has_been_placed = 1; 1151 } 1152 } 1153 else 1154 { 1155 /* Can attach to either field in the parent, depends 1156 on where we've got space in the child. */ 1157 if (child->prev) 1158 { 1159 /* parent-prev and child-next. */ 1160 parent->prev = child; 1161 child->next = parent; 1162 the_arcs[arc_index]->has_been_placed = 1; 1163 } 1164 else 1165 { 1166 /* parent-next and child-prev. */ 1167 parent->next = child; 1168 child->prev = parent; 1169 the_arcs[arc_index]->has_been_placed = 1; 1170 } 1171 } 1172 } 1173 1174 /* Dump the chains of functions we've made. */ 1175 for (arc_index = 0; arc_index < arc_count; arc_index++) 1176 { 1177 Sym *sym; 1178 if (the_arcs[arc_index]->parent->has_been_placed 1179 || the_arcs[arc_index]->child->has_been_placed) 1180 continue; 1181 1182 sym = the_arcs[arc_index]->parent; 1183 1184 /* If this symbol isn't attached to any other 1185 symbols, then we've got a rarely used arc. 1186 1187 Skip it for now, we'll deal with them later. */ 1188 if (sym->next == NULL 1189 && sym->prev == NULL) 1190 continue; 1191 1192 /* Get to the start of this chain. */ 1193 while (sym->prev) 1194 sym = sym->prev; 1195 1196 while (sym) 1197 { 1198 /* Mark it as placed. */ 1199 sym->has_been_placed = 1; 1200 printf ("%s\n", sym->name); 1201 sym = sym->next; 1202 } 1203 } 1204 1205 /* If we want to place all the arcs, then output 1206 those which weren't placed by the main algorithm. */ 1207 if (all) 1208 for (arc_index = 0; arc_index < arc_count; arc_index++) 1209 { 1210 Sym *sym; 1211 if (the_arcs[arc_index]->parent->has_been_placed 1212 || the_arcs[arc_index]->child->has_been_placed) 1213 continue; 1214 1215 sym = the_arcs[arc_index]->parent; 1216 1217 sym->has_been_placed = 1; 1218 printf ("%s\n", sym->name); 1219 } 1220 } 1221 1222 /* Compare two function_map structs based on file name. 1223 We want to sort in ascending order. */ 1224 1225 static int 1226 cmp_symbol_map (const void * l, const void * r) 1227 { 1228 return filename_cmp (((struct function_map *) l)->file_name, 1229 ((struct function_map *) r)->file_name); 1230 } 1231 1232 /* Print a suggested .o ordering for files on a link line based 1233 on profiling information. This uses the function placement 1234 code for the bulk of its work. */ 1235 1236 void 1237 cg_print_file_ordering (void) 1238 { 1239 unsigned long scratch_arc_count; 1240 unsigned long arc_index; 1241 unsigned long sym_index; 1242 Arc **scratch_arcs; 1243 char *last; 1244 1245 scratch_arc_count = 0; 1246 1247 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *)); 1248 for (arc_index = 0; arc_index < numarcs; arc_index++) 1249 { 1250 if (! arcs[arc_index]->parent->mapped 1251 || ! arcs[arc_index]->child->mapped) 1252 arcs[arc_index]->has_been_placed = 1; 1253 } 1254 1255 order_and_dump_functions_by_arcs (arcs, numarcs, 0, 1256 scratch_arcs, &scratch_arc_count); 1257 1258 /* Output .o's not handled by the main placement algorithm. */ 1259 for (sym_index = 0; sym_index < symtab.len; sym_index++) 1260 { 1261 if (symtab.base[sym_index].mapped 1262 && ! symtab.base[sym_index].has_been_placed) 1263 printf ("%s\n", symtab.base[sym_index].name); 1264 } 1265 1266 qsort (symbol_map, symbol_map_count, sizeof (struct function_map), cmp_symbol_map); 1267 1268 /* Now output any .o's that didn't have any text symbols. */ 1269 last = NULL; 1270 for (sym_index = 0; sym_index < symbol_map_count; sym_index++) 1271 { 1272 unsigned int index2; 1273 1274 /* Don't bother searching if this symbol 1275 is the same as the previous one. */ 1276 if (last && !filename_cmp (last, symbol_map[sym_index].file_name)) 1277 continue; 1278 1279 for (index2 = 0; index2 < symtab.len; index2++) 1280 { 1281 if (! symtab.base[index2].mapped) 1282 continue; 1283 1284 if (!filename_cmp (symtab.base[index2].name, 1285 symbol_map[sym_index].file_name)) 1286 break; 1287 } 1288 1289 /* If we didn't find it in the symbol table, then it must 1290 be a .o with no text symbols. Output it last. */ 1291 if (index2 == symtab.len) 1292 printf ("%s\n", symbol_map[sym_index].file_name); 1293 last = symbol_map[sym_index].file_name; 1294 } 1295 } 1296