Home | History | Annotate | Download | only in gprof
      1 /* cg_print.c -  Print routines for displaying call graphs.
      2 
      3    Copyright (C) 2000-2016 Free Software Foundation, Inc.
      4 
      5    This file is part of GNU Binutils.
      6 
      7    This program is free software; you can redistribute it and/or modify
      8    it under the terms of the GNU General Public License as published by
      9    the Free Software Foundation; either version 3 of the License, or
     10    (at your option) any later version.
     11 
     12    This program is distributed in the hope that it will be useful,
     13    but WITHOUT ANY WARRANTY; without even the implied warranty of
     14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     15    GNU General Public License for more details.
     16 
     17    You should have received a copy of the GNU General Public License
     18    along with this program; if not, write to the Free Software
     19    Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
     20    02110-1301, USA.  */
     21 
     22 #include "gprof.h"
     24 #include "libiberty.h"
     25 #include "filenames.h"
     26 #include "search_list.h"
     27 #include "source.h"
     28 #include "symtab.h"
     29 #include "cg_arcs.h"
     30 #include "cg_print.h"
     31 #include "hist.h"
     32 #include "utils.h"
     33 #include "corefile.h"
     34 
     35 /* Return value of comparison functions used to sort tables.  */
     36 #define	LESSTHAN	-1
     37 #define	EQUALTO		0
     38 #define	GREATERTHAN	1
     39 
     40 static void print_header (void);
     41 static void print_cycle (Sym *);
     42 static int cmp_member (Sym *, Sym *);
     43 static void sort_members (Sym *);
     44 static void print_members (Sym *);
     45 static int cmp_arc (Arc *, Arc *);
     46 static void sort_parents (Sym *);
     47 static void print_parents (Sym *);
     48 static void sort_children (Sym *);
     49 static void print_children (Sym *);
     50 static void print_line (Sym *);
     51 static int cmp_name (const PTR, const PTR);
     52 static int cmp_arc_count (const PTR, const PTR);
     53 static int cmp_fun_nuses (const PTR, const PTR);
     54 static void order_and_dump_functions_by_arcs
     55   (Arc **, unsigned long, int, Arc **, unsigned long *);
     56 
     57 /* Declarations of automatically generated functions to output blurbs.  */
     58 extern void bsd_callg_blurb (FILE * fp);
     59 extern void fsf_callg_blurb (FILE * fp);
     60 
     61 double print_time = 0.0;
     62 
     63 
     64 static void
     65 print_header (void)
     66 {
     67   if (first_output)
     68     first_output = FALSE;
     69   else
     70     printf ("\f\n");
     71 
     72   if (!bsd_style_output)
     73     {
     74       if (print_descriptions)
     75 	printf (_("\t\t     Call graph (explanation follows)\n\n"));
     76       else
     77 	printf (_("\t\t\tCall graph\n\n"));
     78     }
     79 
     80   printf (_("\ngranularity: each sample hit covers %ld byte(s)"),
     81 	  (long) hist_scale * (long) sizeof (UNIT));
     82 
     83   if (print_time > 0.0)
     84     printf (_(" for %.2f%% of %.2f seconds\n\n"),
     85 	    100.0 / print_time, print_time / hz);
     86   else
     87     {
     88       printf (_(" no time propagated\n\n"));
     89 
     90       /* This doesn't hurt, since all the numerators will be 0.0.  */
     91       print_time = 1.0;
     92     }
     93 
     94   if (bsd_style_output)
     95     {
     96       printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s     %-8.8s\n",
     97 	      "", "", "", "", _("called"), _("total"), _("parents"));
     98       printf ("%-6.6s %5.5s %7.7s %11.11s %7.7s+%-7.7s %-8.8s\t%5.5s\n",
     99 	      _("index"),
    100 	      /* xgettext:no-c-format */
    101 	      _("%time"),
    102 	      _("self"), _("descendants"), _("called"), _("self"),
    103 	      _("name"), _("index"));
    104       printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s     %-8.8s\n",
    105 	      "", "", "", "", _("called"), _("total"), _("children"));
    106       printf ("\n");
    107     }
    108   else
    109     {
    110       printf (_("index %% time    self  children    called     name\n"));
    111     }
    112 }
    113 
    114 /* Print a cycle header.  */
    115 
    116 static void
    117 print_cycle (Sym *cyc)
    118 {
    119   char buf[BUFSIZ];
    120 
    121   sprintf (buf, "[%d]", cyc->cg.index);
    122   printf (bsd_style_output
    123 	  ? "%-6.6s %5.1f %7.2f %11.2f %7lu"
    124 	  : "%-6.6s %5.1f %7.2f %7.2f %7lu", buf,
    125 	  100 * (cyc->cg.prop.self + cyc->cg.prop.child) / print_time,
    126 	  cyc->cg.prop.self / hz, cyc->cg.prop.child / hz, cyc->ncalls);
    127 
    128   if (cyc->cg.self_calls != 0)
    129     printf ("+%-7lu", cyc->cg.self_calls);
    130   else
    131     printf (" %7.7s", "");
    132 
    133   printf (_(" <cycle %d as a whole> [%d]\n"), cyc->cg.cyc.num, cyc->cg.index);
    134 }
    135 
    136 /* Compare LEFT and RIGHT membmer.  Major comparison key is
    137    CG.PROP.SELF+CG.PROP.CHILD, secondary key is NCALLS+CG.SELF_CALLS.  */
    138 
    139 static int
    140 cmp_member (Sym *left, Sym *right)
    141 {
    142   double left_time = left->cg.prop.self + left->cg.prop.child;
    143   double right_time = right->cg.prop.self + right->cg.prop.child;
    144   unsigned long left_calls = left->ncalls + left->cg.self_calls;
    145   unsigned long right_calls = right->ncalls + right->cg.self_calls;
    146 
    147   if (left_time > right_time)
    148     return GREATERTHAN;
    149 
    150   if (left_time < right_time)
    151     return LESSTHAN;
    152 
    153   if (left_calls > right_calls)
    154     return GREATERTHAN;
    155 
    156   if (left_calls < right_calls)
    157     return LESSTHAN;
    158 
    159   return EQUALTO;
    160 }
    161 
    162 /* Sort members of a cycle.  */
    163 
    164 static void
    165 sort_members (Sym *cyc)
    166 {
    167   Sym *todo, *doing, *prev;
    168 
    169   /* Detach cycle members from cyclehead,
    170      and insertion sort them back on.  */
    171   todo = cyc->cg.cyc.next;
    172   cyc->cg.cyc.next = 0;
    173 
    174   for (doing = todo; doing != NULL; doing = todo)
    175     {
    176       todo = doing->cg.cyc.next;
    177 
    178       for (prev = cyc; prev->cg.cyc.next; prev = prev->cg.cyc.next)
    179 	{
    180 	  if (cmp_member (doing, prev->cg.cyc.next) == GREATERTHAN)
    181 	    break;
    182 	}
    183 
    184       doing->cg.cyc.next = prev->cg.cyc.next;
    185       prev->cg.cyc.next = doing;
    186     }
    187 }
    188 
    189 /* Print the members of a cycle.  */
    190 
    191 static void
    192 print_members (Sym *cyc)
    193 {
    194   Sym *member;
    195 
    196   sort_members (cyc);
    197 
    198   for (member = cyc->cg.cyc.next; member; member = member->cg.cyc.next)
    199     {
    200       printf (bsd_style_output
    201 	      ? "%6.6s %5.5s %7.2f %11.2f %7lu"
    202 	      : "%6.6s %5.5s %7.2f %7.2f %7lu",
    203 	      "", "", member->cg.prop.self / hz, member->cg.prop.child / hz,
    204 	      member->ncalls);
    205 
    206       if (member->cg.self_calls != 0)
    207 	printf ("+%-7lu", member->cg.self_calls);
    208       else
    209 	printf (" %7.7s", "");
    210 
    211       printf ("     ");
    212       print_name (member);
    213       printf ("\n");
    214     }
    215 }
    216 
    217 /* Compare two arcs to/from the same child/parent.
    218 	- if one arc is a self arc, it's least.
    219 	- if one arc is within a cycle, it's less than.
    220 	- if both arcs are within a cycle, compare arc counts.
    221 	- if neither arc is within a cycle, compare with
    222 		time + child_time as major key
    223 		arc count as minor key.  */
    224 
    225 static int
    226 cmp_arc (Arc *left, Arc *right)
    227 {
    228   Sym *left_parent = left->parent;
    229   Sym *left_child = left->child;
    230   Sym *right_parent = right->parent;
    231   Sym *right_child = right->child;
    232   double left_time, right_time;
    233 
    234   DBG (TIMEDEBUG,
    235        printf ("[cmp_arc] ");
    236        print_name (left_parent);
    237        printf (" calls ");
    238        print_name (left_child);
    239        printf (" %f + %f %lu/%lu\n", left->time, left->child_time,
    240 	       left->count, left_child->ncalls);
    241        printf ("[cmp_arc] ");
    242        print_name (right_parent);
    243        printf (" calls ");
    244        print_name (right_child);
    245        printf (" %f + %f %lu/%lu\n", right->time, right->child_time,
    246 	       right->count, right_child->ncalls);
    247        printf ("\n");
    248     );
    249 
    250   if (left_parent == left_child)
    251     return LESSTHAN;		/* Left is a self call.  */
    252 
    253   if (right_parent == right_child)
    254     return GREATERTHAN;		/* Right is a self call.  */
    255 
    256   if (left_parent->cg.cyc.num != 0 && left_child->cg.cyc.num != 0
    257       && left_parent->cg.cyc.num == left_child->cg.cyc.num)
    258     {
    259       /* Left is a call within a cycle.  */
    260       if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
    261 	  && right_parent->cg.cyc.num == right_child->cg.cyc.num)
    262 	{
    263 	  /* Right is a call within the cycle, too.  */
    264 	  if (left->count < right->count)
    265 	    return LESSTHAN;
    266 
    267 	  if (left->count > right->count)
    268 	    return GREATERTHAN;
    269 
    270 	  return EQUALTO;
    271 	}
    272       else
    273 	{
    274 	  /* Right isn't a call within the cycle.  */
    275 	  return LESSTHAN;
    276 	}
    277     }
    278   else
    279     {
    280       /* Left isn't a call within a cycle.  */
    281       if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
    282 	  && right_parent->cg.cyc.num == right_child->cg.cyc.num)
    283 	{
    284 	  /* Right is a call within a cycle.  */
    285 	  return GREATERTHAN;
    286 	}
    287       else
    288 	{
    289 	  /* Neither is a call within a cycle.  */
    290 	  left_time = left->time + left->child_time;
    291 	  right_time = right->time + right->child_time;
    292 
    293 	  if (left_time < right_time)
    294 	    return LESSTHAN;
    295 
    296 	  if (left_time > right_time)
    297 	    return GREATERTHAN;
    298 
    299 	  if (left->count < right->count)
    300 	    return LESSTHAN;
    301 
    302 	  if (left->count > right->count)
    303 	    return GREATERTHAN;
    304 
    305 	  return EQUALTO;
    306 	}
    307     }
    308 }
    309 
    310 
    311 static void
    312 sort_parents (Sym * child)
    313 {
    314   Arc *arc, *detached, sorted, *prev;
    315 
    316   /* Unlink parents from child, then insertion sort back on to
    317      sorted's parents.
    318 	  *arc        the arc you have detached and are inserting.
    319 	  *detached   the rest of the arcs to be sorted.
    320 	  sorted      arc list onto which you insertion sort.
    321 	  *prev       arc before the arc you are comparing.  */
    322   sorted.next_parent = 0;
    323 
    324   for (arc = child->cg.parents; arc; arc = detached)
    325     {
    326       detached = arc->next_parent;
    327 
    328       /* Consider *arc as disconnected; insert it into sorted.  */
    329       for (prev = &sorted; prev->next_parent; prev = prev->next_parent)
    330 	{
    331 	  if (cmp_arc (arc, prev->next_parent) != GREATERTHAN)
    332 	    break;
    333 	}
    334 
    335       arc->next_parent = prev->next_parent;
    336       prev->next_parent = arc;
    337     }
    338 
    339   /* Reattach sorted arcs to child.  */
    340   child->cg.parents = sorted.next_parent;
    341 }
    342 
    343 
    344 static void
    345 print_parents (Sym *child)
    346 {
    347   Sym *parent;
    348   Arc *arc;
    349   Sym *cycle_head;
    350 
    351   if (child->cg.cyc.head != 0)
    352     cycle_head = child->cg.cyc.head;
    353   else
    354     cycle_head = child;
    355 
    356   if (!child->cg.parents)
    357     {
    358       printf (bsd_style_output
    359 	      ? _("%6.6s %5.5s %7.7s %11.11s %7.7s %7.7s     <spontaneous>\n")
    360 	      : _("%6.6s %5.5s %7.7s %7.7s %7.7s %7.7s     <spontaneous>\n"),
    361 	      "", "", "", "", "", "");
    362       return;
    363     }
    364 
    365   sort_parents (child);
    366 
    367   for (arc = child->cg.parents; arc; arc = arc->next_parent)
    368     {
    369       parent = arc->parent;
    370       if (child == parent || (child->cg.cyc.num != 0
    371 			      && parent->cg.cyc.num == child->cg.cyc.num))
    372 	{
    373 	  /* Selfcall or call among siblings.  */
    374 	  printf (bsd_style_output
    375 		  ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s     "
    376 		  : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s     ",
    377 		  "", "", "", "",
    378 		  arc->count, "");
    379 	  print_name (parent);
    380 	  printf ("\n");
    381 	}
    382       else
    383 	{
    384 	  /* Regular parent of child.  */
    385 	  printf (bsd_style_output
    386 		  ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu     "
    387 		  : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu     ",
    388 		  "", "",
    389 		  arc->time / hz, arc->child_time / hz,
    390 		  arc->count, cycle_head->ncalls);
    391 	  print_name (parent);
    392 	  printf ("\n");
    393 	}
    394     }
    395 }
    396 
    397 
    398 static void
    399 sort_children (Sym *parent)
    400 {
    401   Arc *arc, *detached, sorted, *prev;
    402 
    403   /* Unlink children from parent, then insertion sort back on to
    404      sorted's children.
    405 	  *arc        the arc you have detached and are inserting.
    406 	  *detached   the rest of the arcs to be sorted.
    407 	  sorted      arc list onto which you insertion sort.
    408 	  *prev       arc before the arc you are comparing.  */
    409   sorted.next_child = 0;
    410 
    411   for (arc = parent->cg.children; arc; arc = detached)
    412     {
    413       detached = arc->next_child;
    414 
    415       /* Consider *arc as disconnected; insert it into sorted.  */
    416       for (prev = &sorted; prev->next_child; prev = prev->next_child)
    417 	{
    418 	  if (cmp_arc (arc, prev->next_child) != LESSTHAN)
    419 	    break;
    420 	}
    421 
    422       arc->next_child = prev->next_child;
    423       prev->next_child = arc;
    424     }
    425 
    426   /* Reattach sorted children to parent.  */
    427   parent->cg.children = sorted.next_child;
    428 }
    429 
    430 
    431 static void
    432 print_children (Sym *parent)
    433 {
    434   Sym *child;
    435   Arc *arc;
    436 
    437   sort_children (parent);
    438   arc = parent->cg.children;
    439 
    440   for (arc = parent->cg.children; arc; arc = arc->next_child)
    441     {
    442       child = arc->child;
    443       if (child == parent || (child->cg.cyc.num != 0
    444 			      && child->cg.cyc.num == parent->cg.cyc.num))
    445 	{
    446 	  /* Self call or call to sibling.  */
    447 	  printf (bsd_style_output
    448 		  ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s     "
    449 		  : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s     ",
    450 		  "", "", "", "", arc->count, "");
    451 	  print_name (child);
    452 	  printf ("\n");
    453 	}
    454       else
    455 	{
    456 	  /* Regular child of parent.  */
    457 	  printf (bsd_style_output
    458 		  ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu     "
    459 		  : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu     ",
    460 		  "", "",
    461 		  arc->time / hz, arc->child_time / hz,
    462 		  arc->count, child->cg.cyc.head->ncalls);
    463 	  print_name (child);
    464 	  printf ("\n");
    465 	}
    466     }
    467 }
    468 
    469 
    470 static void
    471 print_line (Sym *np)
    472 {
    473   char buf[BUFSIZ];
    474 
    475   sprintf (buf, "[%d]", np->cg.index);
    476   printf (bsd_style_output
    477 	  ? "%-6.6s %5.1f %7.2f %11.2f"
    478 	  : "%-6.6s %5.1f %7.2f %7.2f", buf,
    479 	  100 * (np->cg.prop.self + np->cg.prop.child) / print_time,
    480 	  np->cg.prop.self / hz, np->cg.prop.child / hz);
    481 
    482   if ((np->ncalls + np->cg.self_calls) != 0)
    483     {
    484       printf (" %7lu", np->ncalls);
    485 
    486       if (np->cg.self_calls != 0)
    487 	  printf ("+%-7lu ", np->cg.self_calls);
    488       else
    489 	  printf (" %7.7s ", "");
    490     }
    491   else
    492     {
    493       printf (" %7.7s %7.7s ", "", "");
    494     }
    495 
    496   print_name (np);
    497   printf ("\n");
    498 }
    499 
    500 
    501 /* Print dynamic call graph.  */
    502 
    503 void
    504 cg_print (Sym ** timesortsym)
    505 {
    506   unsigned int sym_index;
    507   Sym *parent;
    508 
    509   if (print_descriptions && bsd_style_output)
    510     bsd_callg_blurb (stdout);
    511 
    512   print_header ();
    513 
    514   for (sym_index = 0; sym_index < symtab.len + num_cycles; ++sym_index)
    515     {
    516       parent = timesortsym[sym_index];
    517 
    518       if ((ignore_zeros && parent->ncalls == 0
    519 	   && parent->cg.self_calls == 0 && parent->cg.prop.self == 0
    520 	   && parent->cg.prop.child == 0)
    521 	  || !parent->cg.print_flag
    522 	  || (line_granularity && ! parent->is_func))
    523 	continue;
    524 
    525       if (!parent->name && parent->cg.cyc.num != 0)
    526 	{
    527 	  /* Cycle header.  */
    528 	  print_cycle (parent);
    529 	  print_members (parent);
    530 	}
    531       else
    532 	{
    533 	  print_parents (parent);
    534 	  print_line (parent);
    535 	  print_children (parent);
    536 	}
    537 
    538       if (bsd_style_output)
    539 	printf ("\n");
    540 
    541       printf ("-----------------------------------------------\n");
    542 
    543       if (bsd_style_output)
    544 	printf ("\n");
    545     }
    546 
    547   free (timesortsym);
    548 
    549   if (print_descriptions && !bsd_style_output)
    550     fsf_callg_blurb (stdout);
    551 }
    552 
    553 
    554 static int
    555 cmp_name (const PTR left, const PTR right)
    556 {
    557   const Sym **npp1 = (const Sym **) left;
    558   const Sym **npp2 = (const Sym **) right;
    559 
    560   return strcmp ((*npp1)->name, (*npp2)->name);
    561 }
    562 
    563 
    564 void
    565 cg_print_index (void)
    566 {
    567   unsigned int sym_index;
    568   unsigned int nnames, todo, i, j;
    569   int col, starting_col;
    570   Sym **name_sorted_syms, *sym;
    571   const char *filename;
    572   char buf[20];
    573   int column_width = (output_width - 1) / 3;	/* Don't write in last col!  */
    574 
    575   /* Now, sort regular function name
    576      alphabetically to create an index.  */
    577   name_sorted_syms = (Sym **) xmalloc ((symtab.len + num_cycles) * sizeof (Sym *));
    578 
    579   for (sym_index = 0, nnames = 0; sym_index < symtab.len; sym_index++)
    580     {
    581       if (ignore_zeros && symtab.base[sym_index].ncalls == 0
    582 	  && symtab.base[sym_index].hist.time == 0)
    583 	continue;
    584 
    585       name_sorted_syms[nnames++] = &symtab.base[sym_index];
    586     }
    587 
    588   qsort (name_sorted_syms, nnames, sizeof (Sym *), cmp_name);
    589 
    590   for (sym_index = 1, todo = nnames; sym_index <= num_cycles; sym_index++)
    591     name_sorted_syms[todo++] = &cycle_header[sym_index];
    592 
    593   printf ("\f\n");
    594   printf (_("Index by function name\n\n"));
    595   sym_index = (todo + 2) / 3;
    596 
    597   for (i = 0; i < sym_index; i++)
    598     {
    599       col = 0;
    600       starting_col = 0;
    601 
    602       for (j = i; j < todo; j += sym_index)
    603 	{
    604 	  sym = name_sorted_syms[j];
    605 
    606 	  if (sym->cg.print_flag)
    607 	    sprintf (buf, "[%d]", sym->cg.index);
    608 	  else
    609 	    sprintf (buf, "(%d)", sym->cg.index);
    610 
    611 	  if (j < nnames)
    612 	    {
    613 	      if (bsd_style_output)
    614 		{
    615 		  printf ("%6.6s %-19.19s", buf, sym->name);
    616 		}
    617 	      else
    618 		{
    619 		  col += strlen (buf);
    620 
    621 		  for (; col < starting_col + 5; ++col)
    622 		    putchar (' ');
    623 
    624 		  printf (" %s ", buf);
    625 		  col += print_name_only (sym);
    626 
    627 		  if (!line_granularity && sym->is_static && sym->file)
    628 		    {
    629 		      filename = sym->file->name;
    630 
    631 		      if (!print_path)
    632 			{
    633 			  filename = strrchr (filename, '/');
    634 
    635 			  if (filename)
    636 			    ++filename;
    637 			  else
    638 			    filename = sym->file->name;
    639 			}
    640 
    641 		      printf (" (%s)", filename);
    642 		      col += strlen (filename) + 3;
    643 		    }
    644 		}
    645 	    }
    646 	  else
    647 	    {
    648 	      if (bsd_style_output)
    649 		{
    650 		  printf ("%6.6s ", buf);
    651 		  sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
    652 		  printf ("%-19.19s", buf);
    653 		}
    654 	      else
    655 		{
    656 		  col += strlen (buf);
    657 		  for (; col < starting_col + 5; ++col)
    658 		    putchar (' ');
    659 		  printf (" %s ", buf);
    660 		  sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
    661 		  printf ("%s", buf);
    662 		  col += strlen (buf);
    663 		}
    664 	    }
    665 
    666 	  starting_col += column_width;
    667 	}
    668 
    669       printf ("\n");
    670     }
    671 
    672   free (name_sorted_syms);
    673 }
    674 
    675 /* Compare two arcs based on their usage counts.
    676    We want to sort in descending order.  */
    677 
    678 static int
    679 cmp_arc_count (const PTR left, const PTR right)
    680 {
    681   const Arc **npp1 = (const Arc **) left;
    682   const Arc **npp2 = (const Arc **) right;
    683 
    684   if ((*npp1)->count > (*npp2)->count)
    685     return -1;
    686   else if ((*npp1)->count < (*npp2)->count)
    687     return 1;
    688   else
    689     return 0;
    690 }
    691 
    692 /* Compare two funtions based on their usage counts.
    693    We want to sort in descending order.  */
    694 
    695 static int
    696 cmp_fun_nuses (const PTR left, const PTR right)
    697 {
    698   const Sym **npp1 = (const Sym **) left;
    699   const Sym **npp2 = (const Sym **) right;
    700 
    701   if ((*npp1)->nuses > (*npp2)->nuses)
    702     return -1;
    703   else if ((*npp1)->nuses < (*npp2)->nuses)
    704     return 1;
    705   else
    706     return 0;
    707 }
    708 
    709 /* Print a suggested function ordering based on the profiling data.
    710 
    711    We perform 4 major steps when ordering functions:
    712 
    713 	* Group unused functions together and place them at the
    714 	end of the function order.
    715 
    716 	* Search the highest use arcs (those which account for 90% of
    717 	the total arc count) for functions which have several parents.
    718 
    719 	Group those with the most call sites together (currently the
    720 	top 1.25% which have at least five different call sites).
    721 
    722 	These are emitted at the start of the function order.
    723 
    724 	* Use a greedy placement algorithm to place functions which
    725 	occur in the top 99% of the arcs in the profile.  Some provisions
    726 	are made to handle high usage arcs where the parent and/or
    727 	child has already been placed.
    728 
    729 	* Run the same greedy placement algorithm on the remaining
    730 	arcs to place the leftover functions.
    731 
    732 
    733    The various "magic numbers" should (one day) be tuneable by command
    734    line options.  They were arrived at by benchmarking a few applications
    735    with various values to see which values produced better overall function
    736    orderings.
    737 
    738    Of course, profiling errors, machine limitations (PA long calls), and
    739    poor cutoff values for the placement algorithm may limit the usefullness
    740    of the resulting function order.  Improvements would be greatly appreciated.
    741 
    742    Suggestions:
    743 
    744 	* Place the functions with many callers near the middle of the
    745 	list to reduce long calls.
    746 
    747 	* Propagate arc usage changes as functions are placed.  Ie if
    748 	func1 and func2 are placed together, arcs to/from those arcs
    749 	to the same parent/child should be combined, then resort the
    750 	arcs to choose the next one.
    751 
    752 	* Implement some global positioning algorithm to place the
    753 	chains made by the greedy local positioning algorithm.  Probably
    754 	by examining arcs which haven't been placed yet to tie two
    755 	chains together.
    756 
    757 	* Take a function's size and time into account in the algorithm;
    758 	size in particular is important on the PA (long calls).  Placing
    759 	many small functions onto their own page may be wise.
    760 
    761 	* Use better profiling information; many published algorithms
    762 	are based on call sequences through time, rather than just
    763 	arc counts.
    764 
    765 	* Prodecure cloning could improve performance when a small number
    766 	of arcs account for most of the calls to a particular function.
    767 
    768 	* Use relocation information to avoid moving unused functions
    769 	completely out of the code stream; this would avoid severe lossage
    770 	when the profile data bears little resemblance to actual runs.
    771 
    772 	* Propagation of arc usages should also improve .o link line
    773 	ordering which shares the same arc placement algorithm with
    774 	the function ordering code (in fact it is a degenerate case
    775 	of function ordering).  */
    776 
    777 void
    778 cg_print_function_ordering (void)
    779 {
    780   unsigned long sym_index;
    781   unsigned long arc_index;
    782   unsigned long used, unused, scratch_index;
    783   unsigned long  unplaced_arc_count, high_arc_count, scratch_arc_count;
    784 #ifdef __GNUC__
    785   unsigned long long total_arcs, tmp_arcs_count;
    786 #else
    787   unsigned long total_arcs, tmp_arcs_count;
    788 #endif
    789   Sym **unused_syms, **used_syms, **scratch_syms;
    790   Arc **unplaced_arcs, **high_arcs, **scratch_arcs;
    791 
    792   sym_index = 0;
    793   used = 0;
    794   unused = 0;
    795   scratch_index = 0;
    796   unplaced_arc_count = 0;
    797   high_arc_count = 0;
    798   scratch_arc_count = 0;
    799 
    800   /* First group all the unused functions together.  */
    801   unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
    802   used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
    803   scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
    804   high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
    805   scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
    806   unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
    807 
    808   /* Walk through all the functions; mark those which are never
    809      called as placed (we'll emit them as a group later).  */
    810   for (sym_index = 0, used = 0, unused = 0; sym_index < symtab.len; sym_index++)
    811     {
    812       if (symtab.base[sym_index].ncalls == 0)
    813 	{
    814 	  unused_syms[unused++] = &symtab.base[sym_index];
    815 	  symtab.base[sym_index].has_been_placed = 1;
    816 	}
    817       else
    818 	{
    819 	  used_syms[used++] = &symtab.base[sym_index];
    820 	  symtab.base[sym_index].has_been_placed = 0;
    821 	  symtab.base[sym_index].next = 0;
    822 	  symtab.base[sym_index].prev = 0;
    823 	  symtab.base[sym_index].nuses = 0;
    824 	}
    825     }
    826 
    827   /* Sort the arcs from most used to least used.  */
    828   qsort (arcs, numarcs, sizeof (Arc *), cmp_arc_count);
    829 
    830   /* Compute the total arc count.  Also mark arcs as unplaced.
    831 
    832      Note we don't compensate for overflow if that happens!
    833      Overflow is much less likely when this file is compiled
    834      with GCC as it can double-wide integers via long long.  */
    835   total_arcs = 0;
    836   for (arc_index = 0; arc_index < numarcs; arc_index++)
    837     {
    838       total_arcs += arcs[arc_index]->count;
    839       arcs[arc_index]->has_been_placed = 0;
    840     }
    841 
    842   /* We want to pull out those functions which are referenced
    843      by many highly used arcs and emit them as a group.  This
    844      could probably use some tuning.  */
    845   tmp_arcs_count = 0;
    846   for (arc_index = 0; arc_index < numarcs; arc_index++)
    847     {
    848       tmp_arcs_count += arcs[arc_index]->count;
    849 
    850       /* Count how many times each parent and child are used up
    851 	 to our threshhold of arcs (90%).  */
    852       if ((double)tmp_arcs_count / (double)total_arcs > 0.90)
    853 	break;
    854 
    855       arcs[arc_index]->child->nuses++;
    856     }
    857 
    858   /* Now sort a temporary symbol table based on the number of
    859      times each function was used in the highest used arcs.  */
    860   memcpy (scratch_syms, used_syms, used * sizeof (Sym *));
    861   qsort (scratch_syms, used, sizeof (Sym *), cmp_fun_nuses);
    862 
    863   /* Now pick out those symbols we're going to emit as
    864      a group.  We take up to 1.25% of the used symbols.  */
    865   for (sym_index = 0; sym_index < used / 80; sym_index++)
    866     {
    867       Sym *sym = scratch_syms[sym_index];
    868       Arc *arc;
    869 
    870       /* If we hit symbols that aren't used from many call sites,
    871 	 then we can quit.  We choose five as the low limit for
    872 	 no particular reason.  */
    873       if (sym->nuses == 5)
    874 	break;
    875 
    876       /* We're going to need the arcs between these functions.
    877 	 Unfortunately, we don't know all these functions
    878 	 until we're done.  So we keep track of all the arcs
    879 	 to the functions we care about, then prune out those
    880 	 which are uninteresting.
    881 
    882 	 An interesting variation would be to quit when we found
    883 	 multi-call site functions which account for some percentage
    884 	 of the arcs.  */
    885       arc = sym->cg.children;
    886 
    887       while (arc)
    888 	{
    889 	  if (arc->parent != arc->child)
    890 	    scratch_arcs[scratch_arc_count++] = arc;
    891 	  arc->has_been_placed = 1;
    892 	  arc = arc->next_child;
    893 	}
    894 
    895       arc = sym->cg.parents;
    896 
    897       while (arc)
    898 	{
    899 	  if (arc->parent != arc->child)
    900 	    scratch_arcs[scratch_arc_count++] = arc;
    901 	  arc->has_been_placed = 1;
    902 	  arc = arc->next_parent;
    903 	}
    904 
    905       /* Keep track of how many symbols we're going to place.  */
    906       scratch_index = sym_index;
    907 
    908       /* A lie, but it makes identifying
    909 	 these functions easier later.  */
    910       sym->has_been_placed = 1;
    911     }
    912 
    913   /* Now walk through the temporary arcs and copy
    914      those we care about into the high arcs array.  */
    915   for (arc_index = 0; arc_index < scratch_arc_count; arc_index++)
    916     {
    917       Arc *arc = scratch_arcs[arc_index];
    918 
    919       /* If this arc refers to highly used functions, then
    920 	 then we want to keep it.  */
    921       if (arc->child->has_been_placed
    922 	  && arc->parent->has_been_placed)
    923 	{
    924 	  high_arcs[high_arc_count++] = scratch_arcs[arc_index];
    925 
    926 	  /* We need to turn of has_been_placed since we're going to
    927 	     use the main arc placement algorithm on these arcs.  */
    928 	  arc->child->has_been_placed = 0;
    929 	  arc->parent->has_been_placed = 0;
    930 	}
    931     }
    932 
    933   /* Dump the multi-site high usage functions which are not
    934      going to be ordered by the main ordering algorithm.  */
    935   for (sym_index = 0; sym_index < scratch_index; sym_index++)
    936     {
    937       if (scratch_syms[sym_index]->has_been_placed)
    938 	printf ("%s\n", scratch_syms[sym_index]->name);
    939     }
    940 
    941   /* Now we can order the multi-site high use
    942      functions based on the arcs between them.  */
    943   qsort (high_arcs, high_arc_count, sizeof (Arc *), cmp_arc_count);
    944   order_and_dump_functions_by_arcs (high_arcs, high_arc_count, 1,
    945 				    unplaced_arcs, &unplaced_arc_count);
    946 
    947   /* Order and dump the high use functions left,
    948      these typically have only a few call sites.  */
    949   order_and_dump_functions_by_arcs (arcs, numarcs, 0,
    950 				    unplaced_arcs, &unplaced_arc_count);
    951 
    952   /* Now place the rarely used functions.  */
    953   order_and_dump_functions_by_arcs (unplaced_arcs, unplaced_arc_count, 1,
    954 				    scratch_arcs, &scratch_arc_count);
    955 
    956   /* Output any functions not emitted by the order_and_dump calls.  */
    957   for (sym_index = 0; sym_index < used; sym_index++)
    958     if (used_syms[sym_index]->has_been_placed == 0)
    959       printf("%s\n", used_syms[sym_index]->name);
    960 
    961   /* Output the unused functions.  */
    962   for (sym_index = 0; sym_index < unused; sym_index++)
    963     printf("%s\n", unused_syms[sym_index]->name);
    964 
    965   unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
    966   used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
    967   scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
    968   high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
    969   scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
    970   unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
    971 
    972   free (unused_syms);
    973   free (used_syms);
    974   free (scratch_syms);
    975   free (high_arcs);
    976   free (scratch_arcs);
    977   free (unplaced_arcs);
    978 }
    979 
    980 /* Place functions based on the arcs in THE_ARCS with ARC_COUNT entries;
    981    place unused arcs into UNPLACED_ARCS/UNPLACED_ARC_COUNT.
    982 
    983    If ALL is nonzero, then place all functions referenced by THE_ARCS,
    984    else only place those referenced in the top 99% of the arcs in THE_ARCS.  */
    985 
    986 #define MOST 0.99
    987 static void
    988 order_and_dump_functions_by_arcs (Arc **the_arcs, unsigned long arc_count,
    989 				  int all, Arc **unplaced_arcs,
    990 				  unsigned long *unplaced_arc_count)
    991 {
    992 #ifdef __GNUC__
    993   unsigned long long tmp_arcs, total_arcs;
    994 #else
    995   unsigned long tmp_arcs, total_arcs;
    996 #endif
    997   unsigned int arc_index;
    998 
    999   /* If needed, compute the total arc count.
   1000 
   1001      Note we don't compensate for overflow if that happens!  */
   1002   if (! all)
   1003     {
   1004       total_arcs = 0;
   1005       for (arc_index = 0; arc_index < arc_count; arc_index++)
   1006 	total_arcs += the_arcs[arc_index]->count;
   1007     }
   1008   else
   1009     total_arcs = 0;
   1010 
   1011   tmp_arcs = 0;
   1012 
   1013   for (arc_index = 0; arc_index < arc_count; arc_index++)
   1014     {
   1015       Sym *sym1, *sym2;
   1016       Sym *child, *parent;
   1017 
   1018       tmp_arcs += the_arcs[arc_index]->count;
   1019 
   1020       /* Ignore this arc if it's already been placed.  */
   1021       if (the_arcs[arc_index]->has_been_placed)
   1022 	continue;
   1023 
   1024       child = the_arcs[arc_index]->child;
   1025       parent = the_arcs[arc_index]->parent;
   1026 
   1027       /* If we're not using all arcs, and this is a rarely used
   1028 	 arc, then put it on the unplaced_arc list.  Similarly
   1029 	 if both the parent and child of this arc have been placed.  */
   1030       if ((! all && (double)tmp_arcs / (double)total_arcs > MOST)
   1031 	  || child->has_been_placed || parent->has_been_placed)
   1032 	{
   1033 	  unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
   1034 	  continue;
   1035 	}
   1036 
   1037       /* If all slots in the parent and child are full, then there isn't
   1038 	 anything we can do right now.  We'll place this arc on the
   1039 	 unplaced arc list in the hope that a global positioning
   1040 	 algorithm can use it to place function chains.  */
   1041       if (parent->next && parent->prev && child->next && child->prev)
   1042 	{
   1043 	  unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
   1044 	  continue;
   1045 	}
   1046 
   1047       /* If the parent is unattached, then find the closest
   1048 	 place to attach it onto child's chain.   Similarly
   1049 	 for the opposite case.  */
   1050       if (!parent->next && !parent->prev)
   1051 	{
   1052 	  int next_count = 0;
   1053 	  int prev_count = 0;
   1054 	  Sym *prev = child;
   1055 	  Sym *next = child;
   1056 
   1057 	  /* Walk to the beginning and end of the child's chain.  */
   1058 	  while (next->next)
   1059 	    {
   1060 	      next = next->next;
   1061 	      next_count++;
   1062 	    }
   1063 
   1064 	  while (prev->prev)
   1065 	    {
   1066 	      prev = prev->prev;
   1067 	      prev_count++;
   1068 	    }
   1069 
   1070 	  /* Choose the closest.  */
   1071 	  child = next_count < prev_count ? next : prev;
   1072 	}
   1073       else if (! child->next && !child->prev)
   1074 	{
   1075 	  int next_count = 0;
   1076 	  int prev_count = 0;
   1077 	  Sym *prev = parent;
   1078 	  Sym *next = parent;
   1079 
   1080 	  while (next->next)
   1081 	    {
   1082 	      next = next->next;
   1083 	      next_count++;
   1084 	    }
   1085 
   1086 	  while (prev->prev)
   1087 	    {
   1088 	      prev = prev->prev;
   1089 	      prev_count++;
   1090 	    }
   1091 
   1092 	  parent = prev_count < next_count ? prev : next;
   1093 	}
   1094       else
   1095 	{
   1096 	  /* Couldn't find anywhere to attach the functions,
   1097 	     put the arc on the unplaced arc list.  */
   1098 	  unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
   1099 	  continue;
   1100 	}
   1101 
   1102       /* Make sure we don't tie two ends together.  */
   1103       sym1 = parent;
   1104       if (sym1->next)
   1105 	while (sym1->next)
   1106 	  sym1 = sym1->next;
   1107       else
   1108 	while (sym1->prev)
   1109 	  sym1 = sym1->prev;
   1110 
   1111       sym2 = child;
   1112       if (sym2->next)
   1113 	while (sym2->next)
   1114 	  sym2 = sym2->next;
   1115       else
   1116 	while (sym2->prev)
   1117 	  sym2 = sym2->prev;
   1118 
   1119       if (sym1 == child
   1120 	  && sym2 == parent)
   1121 	{
   1122 	  /* This would tie two ends together.  */
   1123 	  unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
   1124 	  continue;
   1125 	}
   1126 
   1127       if (parent->next)
   1128 	{
   1129 	  /* Must attach to the parent's prev field.  */
   1130 	  if (! child->next)
   1131 	    {
   1132 	      /* parent-prev and child-next */
   1133 	      parent->prev = child;
   1134 	      child->next = parent;
   1135 	      the_arcs[arc_index]->has_been_placed = 1;
   1136 	    }
   1137 	}
   1138       else if (parent->prev)
   1139 	{
   1140 	  /* Must attach to the parent's next field.  */
   1141 	  if (! child->prev)
   1142 	    {
   1143 	      /* parent-next and child-prev */
   1144 	      parent->next = child;
   1145 	      child->prev = parent;
   1146 	      the_arcs[arc_index]->has_been_placed = 1;
   1147 	    }
   1148 	}
   1149       else
   1150 	{
   1151 	  /* Can attach to either field in the parent, depends
   1152 	     on where we've got space in the child.  */
   1153 	  if (child->prev)
   1154 	    {
   1155 	      /* parent-prev and child-next.  */
   1156 	      parent->prev = child;
   1157 	      child->next = parent;
   1158 	      the_arcs[arc_index]->has_been_placed = 1;
   1159 	    }
   1160 	  else
   1161 	    {
   1162 	      /* parent-next and child-prev.  */
   1163 	      parent->next = child;
   1164 	      child->prev = parent;
   1165 	      the_arcs[arc_index]->has_been_placed = 1;
   1166 	    }
   1167 	}
   1168     }
   1169 
   1170   /* Dump the chains of functions we've made.  */
   1171   for (arc_index = 0; arc_index < arc_count; arc_index++)
   1172     {
   1173       Sym *sym;
   1174       if (the_arcs[arc_index]->parent->has_been_placed
   1175 	  || the_arcs[arc_index]->child->has_been_placed)
   1176 	continue;
   1177 
   1178       sym = the_arcs[arc_index]->parent;
   1179 
   1180       /* If this symbol isn't attached to any other
   1181 	 symbols, then we've got a rarely used arc.
   1182 
   1183 	 Skip it for now, we'll deal with them later.  */
   1184       if (sym->next == NULL
   1185 	  && sym->prev == NULL)
   1186 	continue;
   1187 
   1188       /* Get to the start of this chain.  */
   1189       while (sym->prev)
   1190 	sym = sym->prev;
   1191 
   1192       while (sym)
   1193 	{
   1194 	  /* Mark it as placed.  */
   1195 	  sym->has_been_placed = 1;
   1196 	  printf ("%s\n", sym->name);
   1197 	  sym = sym->next;
   1198 	}
   1199     }
   1200 
   1201   /* If we want to place all the arcs, then output
   1202      those which weren't placed by the main algorithm.  */
   1203   if (all)
   1204     for (arc_index = 0; arc_index < arc_count; arc_index++)
   1205       {
   1206 	Sym *sym;
   1207 	if (the_arcs[arc_index]->parent->has_been_placed
   1208 	    || the_arcs[arc_index]->child->has_been_placed)
   1209 	  continue;
   1210 
   1211 	sym = the_arcs[arc_index]->parent;
   1212 
   1213 	sym->has_been_placed = 1;
   1214 	printf ("%s\n", sym->name);
   1215       }
   1216 }
   1217 
   1218 /* Compare two function_map structs based on file name.
   1219    We want to sort in ascending order.  */
   1220 
   1221 static int
   1222 cmp_symbol_map (const void * l, const void * r)
   1223 {
   1224   return filename_cmp (((struct function_map *) l)->file_name,
   1225 		       ((struct function_map *) r)->file_name);
   1226 }
   1227 
   1228 /* Print a suggested .o ordering for files on a link line based
   1229    on profiling information.  This uses the function placement
   1230    code for the bulk of its work.  */
   1231 
   1232 void
   1233 cg_print_file_ordering (void)
   1234 {
   1235   unsigned long scratch_arc_count;
   1236   unsigned long arc_index;
   1237   unsigned long sym_index;
   1238   Arc **scratch_arcs;
   1239   char *last;
   1240 
   1241   scratch_arc_count = 0;
   1242 
   1243   scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
   1244   for (arc_index = 0; arc_index < numarcs; arc_index++)
   1245     {
   1246       if (! arcs[arc_index]->parent->mapped
   1247 	  || ! arcs[arc_index]->child->mapped)
   1248 	arcs[arc_index]->has_been_placed = 1;
   1249     }
   1250 
   1251   order_and_dump_functions_by_arcs (arcs, numarcs, 0,
   1252 				    scratch_arcs, &scratch_arc_count);
   1253 
   1254   /* Output .o's not handled by the main placement algorithm.  */
   1255   for (sym_index = 0; sym_index < symtab.len; sym_index++)
   1256     {
   1257       if (symtab.base[sym_index].mapped
   1258 	  && ! symtab.base[sym_index].has_been_placed)
   1259 	printf ("%s\n", symtab.base[sym_index].name);
   1260     }
   1261 
   1262   qsort (symbol_map, symbol_map_count, sizeof (struct function_map), cmp_symbol_map);
   1263 
   1264   /* Now output any .o's that didn't have any text symbols.  */
   1265   last = NULL;
   1266   for (sym_index = 0; sym_index < symbol_map_count; sym_index++)
   1267     {
   1268       unsigned int index2;
   1269 
   1270       /* Don't bother searching if this symbol
   1271 	 is the same as the previous one.  */
   1272       if (last && !filename_cmp (last, symbol_map[sym_index].file_name))
   1273 	continue;
   1274 
   1275       for (index2 = 0; index2 < symtab.len; index2++)
   1276 	{
   1277 	  if (! symtab.base[index2].mapped)
   1278 	    continue;
   1279 
   1280 	  if (!filename_cmp (symtab.base[index2].name,
   1281 			     symbol_map[sym_index].file_name))
   1282 	    break;
   1283 	}
   1284 
   1285       /* If we didn't find it in the symbol table, then it must
   1286 	 be a .o with no text symbols.  Output it last.  */
   1287       if (index2 == symtab.len)
   1288 	printf ("%s\n", symbol_map[sym_index].file_name);
   1289       last = symbol_map[sym_index].file_name;
   1290     }
   1291 }
   1292