Home | History | Annotate | Download | only in gprof
      1 /* cg_print.c -  Print routines for displaying call graphs.
      2 
      3    Copyright (C) 2000-2014 Free Software Foundation, Inc.
      4 
      5    This file is part of GNU Binutils.
      6 
      7    This program is free software; you can redistribute it and/or modify
      8    it under the terms of the GNU General Public License as published by
      9    the Free Software Foundation; either version 3 of the License, or
     10    (at your option) any later version.
     11 
     12    This program is distributed in the hope that it will be useful,
     13    but WITHOUT ANY WARRANTY; without even the implied warranty of
     14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     15    GNU General Public License for more details.
     16 
     17    You should have received a copy of the GNU General Public License
     18    along with this program; if not, write to the Free Software
     19    Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
     20    02110-1301, USA.  */
     21 
     22 #include "gprof.h"
     24 #include "libiberty.h"
     25 #include "filenames.h"
     26 #include "search_list.h"
     27 #include "source.h"
     28 #include "symtab.h"
     29 #include "cg_arcs.h"
     30 #include "cg_print.h"
     31 #include "hist.h"
     32 #include "utils.h"
     33 #include "corefile.h"
     34 
     35 /* Return value of comparison functions used to sort tables.  */
     36 #define	LESSTHAN	-1
     37 #define	EQUALTO		0
     38 #define	GREATERTHAN	1
     39 
     40 static void print_header (void);
     41 static void print_cycle (Sym *);
     42 static int cmp_member (Sym *, Sym *);
     43 static void sort_members (Sym *);
     44 static void print_members (Sym *);
     45 static int cmp_arc (Arc *, Arc *);
     46 static void sort_parents (Sym *);
     47 static void print_parents (Sym *);
     48 static void sort_children (Sym *);
     49 static void print_children (Sym *);
     50 static void print_line (Sym *);
     51 static int cmp_name (const PTR, const PTR);
     52 static int cmp_arc_count (const PTR, const PTR);
     53 static int cmp_fun_nuses (const PTR, const PTR);
     54 static void order_and_dump_functions_by_arcs
     55   (Arc **, unsigned long, int, Arc **, unsigned long *);
     56 
     57 /* Declarations of automatically generated functions to output blurbs.  */
     58 extern void bsd_callg_blurb (FILE * fp);
     59 extern void fsf_callg_blurb (FILE * fp);
     60 
     61 double print_time = 0.0;
     62 
     63 
     64 static void
     65 print_header ()
     66 {
     67   if (first_output)
     68     first_output = FALSE;
     69   else
     70     printf ("\f\n");
     71 
     72   if (!bsd_style_output)
     73     {
     74       if (print_descriptions)
     75 	printf (_("\t\t     Call graph (explanation follows)\n\n"));
     76       else
     77 	printf (_("\t\t\tCall graph\n\n"));
     78     }
     79 
     80   printf (_("\ngranularity: each sample hit covers %ld byte(s)"),
     81 	  (long) hist_scale * (long) sizeof (UNIT));
     82 
     83   if (print_time > 0.0)
     84     printf (_(" for %.2f%% of %.2f seconds\n\n"),
     85 	    100.0 / print_time, print_time / hz);
     86   else
     87     {
     88       printf (_(" no time propagated\n\n"));
     89 
     90       /* This doesn't hurt, since all the numerators will be 0.0.  */
     91       print_time = 1.0;
     92     }
     93 
     94   if (bsd_style_output)
     95     {
     96       printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s     %-8.8s\n",
     97 	      "", "", "", "", _("called"), _("total"), _("parents"));
     98       printf ("%-6.6s %5.5s %7.7s %11.11s %7.7s+%-7.7s %-8.8s\t%5.5s\n",
     99 	      _("index"),
    100 	      /* xgettext:no-c-format */
    101 	      _("%time"),
    102 	      _("self"), _("descendants"), _("called"), _("self"),
    103 	      _("name"), _("index"));
    104       printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s     %-8.8s\n",
    105 	      "", "", "", "", _("called"), _("total"), _("children"));
    106       printf ("\n");
    107     }
    108   else
    109     {
    110       printf (_("index %% time    self  children    called     name\n"));
    111     }
    112 }
    113 
    114 /* Print a cycle header.  */
    115 
    116 static void
    117 print_cycle (Sym *cyc)
    118 {
    119   char buf[BUFSIZ];
    120 
    121   sprintf (buf, "[%d]", cyc->cg.index);
    122   printf (bsd_style_output
    123 	  ? "%-6.6s %5.1f %7.2f %11.2f %7lu"
    124 	  : "%-6.6s %5.1f %7.2f %7.2f %7lu", buf,
    125 	  100 * (cyc->cg.prop.self + cyc->cg.prop.child) / print_time,
    126 	  cyc->cg.prop.self / hz, cyc->cg.prop.child / hz, cyc->ncalls);
    127 
    128   if (cyc->cg.self_calls != 0)
    129     printf ("+%-7lu", cyc->cg.self_calls);
    130   else
    131     printf (" %7.7s", "");
    132 
    133   printf (_(" <cycle %d as a whole> [%d]\n"), cyc->cg.cyc.num, cyc->cg.index);
    134 }
    135 
    136 /* Compare LEFT and RIGHT membmer.  Major comparison key is
    137    CG.PROP.SELF+CG.PROP.CHILD, secondary key is NCALLS+CG.SELF_CALLS.  */
    138 
    139 static int
    140 cmp_member (Sym *left, Sym *right)
    141 {
    142   double left_time = left->cg.prop.self + left->cg.prop.child;
    143   double right_time = right->cg.prop.self + right->cg.prop.child;
    144   unsigned long left_calls = left->ncalls + left->cg.self_calls;
    145   unsigned long right_calls = right->ncalls + right->cg.self_calls;
    146 
    147   if (left_time > right_time)
    148     return GREATERTHAN;
    149 
    150   if (left_time < right_time)
    151     return LESSTHAN;
    152 
    153   if (left_calls > right_calls)
    154     return GREATERTHAN;
    155 
    156   if (left_calls < right_calls)
    157     return LESSTHAN;
    158 
    159   return EQUALTO;
    160 }
    161 
    162 /* Sort members of a cycle.  */
    163 
    164 static void
    165 sort_members (Sym *cyc)
    166 {
    167   Sym *todo, *doing, *prev;
    168 
    169   /* Detach cycle members from cyclehead,
    170      and insertion sort them back on.  */
    171   todo = cyc->cg.cyc.next;
    172   cyc->cg.cyc.next = 0;
    173 
    174   for (doing = todo; doing != NULL; doing = todo)
    175     {
    176       todo = doing->cg.cyc.next;
    177 
    178       for (prev = cyc; prev->cg.cyc.next; prev = prev->cg.cyc.next)
    179 	{
    180 	  if (cmp_member (doing, prev->cg.cyc.next) == GREATERTHAN)
    181 	    break;
    182 	}
    183 
    184       doing->cg.cyc.next = prev->cg.cyc.next;
    185       prev->cg.cyc.next = doing;
    186     }
    187 }
    188 
    189 /* Print the members of a cycle.  */
    190 
    191 static void
    192 print_members (Sym *cyc)
    193 {
    194   Sym *member;
    195 
    196   sort_members (cyc);
    197 
    198   for (member = cyc->cg.cyc.next; member; member = member->cg.cyc.next)
    199     {
    200       printf (bsd_style_output
    201 	      ? "%6.6s %5.5s %7.2f %11.2f %7lu"
    202 	      : "%6.6s %5.5s %7.2f %7.2f %7lu",
    203 	      "", "", member->cg.prop.self / hz, member->cg.prop.child / hz,
    204 	      member->ncalls);
    205 
    206       if (member->cg.self_calls != 0)
    207 	printf ("+%-7lu", member->cg.self_calls);
    208       else
    209 	printf (" %7.7s", "");
    210 
    211       printf ("     ");
    212       print_name (member);
    213       printf ("\n");
    214     }
    215 }
    216 
    217 /* Compare two arcs to/from the same child/parent.
    218 	- if one arc is a self arc, it's least.
    219 	- if one arc is within a cycle, it's less than.
    220 	- if both arcs are within a cycle, compare arc counts.
    221 	- if neither arc is within a cycle, compare with
    222 		time + child_time as major key
    223 		arc count as minor key.  */
    224 
    225 static int
    226 cmp_arc (Arc *left, Arc *right)
    227 {
    228   Sym *left_parent = left->parent;
    229   Sym *left_child = left->child;
    230   Sym *right_parent = right->parent;
    231   Sym *right_child = right->child;
    232   double left_time, right_time;
    233 
    234   DBG (TIMEDEBUG,
    235        printf ("[cmp_arc] ");
    236        print_name (left_parent);
    237        printf (" calls ");
    238        print_name (left_child);
    239        printf (" %f + %f %lu/%lu\n", left->time, left->child_time,
    240 	       left->count, left_child->ncalls);
    241        printf ("[cmp_arc] ");
    242        print_name (right_parent);
    243        printf (" calls ");
    244        print_name (right_child);
    245        printf (" %f + %f %lu/%lu\n", right->time, right->child_time,
    246 	       right->count, right_child->ncalls);
    247        printf ("\n");
    248     );
    249 
    250   if (left_parent == left_child)
    251     return LESSTHAN;		/* Left is a self call.  */
    252 
    253   if (right_parent == right_child)
    254     return GREATERTHAN;		/* Right is a self call.  */
    255 
    256   if (left_parent->cg.cyc.num != 0 && left_child->cg.cyc.num != 0
    257       && left_parent->cg.cyc.num == left_child->cg.cyc.num)
    258     {
    259       /* Left is a call within a cycle.  */
    260       if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
    261 	  && right_parent->cg.cyc.num == right_child->cg.cyc.num)
    262 	{
    263 	  /* Right is a call within the cycle, too.  */
    264 	  if (left->count < right->count)
    265 	    return LESSTHAN;
    266 
    267 	  if (left->count > right->count)
    268 	    return GREATERTHAN;
    269 
    270 	  return EQUALTO;
    271 	}
    272       else
    273 	{
    274 	  /* Right isn't a call within the cycle.  */
    275 	  return LESSTHAN;
    276 	}
    277     }
    278   else
    279     {
    280       /* Left isn't a call within a cycle.  */
    281       if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
    282 	  && right_parent->cg.cyc.num == right_child->cg.cyc.num)
    283 	{
    284 	  /* Right is a call within a cycle.  */
    285 	  return GREATERTHAN;
    286 	}
    287       else
    288 	{
    289 	  /* Neither is a call within a cycle.  */
    290 	  left_time = left->time + left->child_time;
    291 	  right_time = right->time + right->child_time;
    292 
    293 	  if (left_time < right_time)
    294 	    return LESSTHAN;
    295 
    296 	  if (left_time > right_time)
    297 	    return GREATERTHAN;
    298 
    299 	  if (left->count < right->count)
    300 	    return LESSTHAN;
    301 
    302 	  if (left->count > right->count)
    303 	    return GREATERTHAN;
    304 
    305 	  return EQUALTO;
    306 	}
    307     }
    308 }
    309 
    310 
    311 static void
    312 sort_parents (Sym * child)
    313 {
    314   Arc *arc, *detached, sorted, *prev;
    315 
    316   /* Unlink parents from child, then insertion sort back on to
    317      sorted's parents.
    318 	  *arc        the arc you have detached and are inserting.
    319 	  *detached   the rest of the arcs to be sorted.
    320 	  sorted      arc list onto which you insertion sort.
    321 	  *prev       arc before the arc you are comparing.  */
    322   sorted.next_parent = 0;
    323 
    324   for (arc = child->cg.parents; arc; arc = detached)
    325     {
    326       detached = arc->next_parent;
    327 
    328       /* Consider *arc as disconnected; insert it into sorted.  */
    329       for (prev = &sorted; prev->next_parent; prev = prev->next_parent)
    330 	{
    331 	  if (cmp_arc (arc, prev->next_parent) != GREATERTHAN)
    332 	    break;
    333 	}
    334 
    335       arc->next_parent = prev->next_parent;
    336       prev->next_parent = arc;
    337     }
    338 
    339   /* Reattach sorted arcs to child.  */
    340   child->cg.parents = sorted.next_parent;
    341 }
    342 
    343 
    344 static void
    345 print_parents (Sym *child)
    346 {
    347   Sym *parent;
    348   Arc *arc;
    349   Sym *cycle_head;
    350 
    351   if (child->cg.cyc.head != 0)
    352     cycle_head = child->cg.cyc.head;
    353   else
    354     cycle_head = child;
    355 
    356   if (!child->cg.parents)
    357     {
    358       printf (bsd_style_output
    359 	      ? _("%6.6s %5.5s %7.7s %11.11s %7.7s %7.7s     <spontaneous>\n")
    360 	      : _("%6.6s %5.5s %7.7s %7.7s %7.7s %7.7s     <spontaneous>\n"),
    361 	      "", "", "", "", "", "");
    362       return;
    363     }
    364 
    365   sort_parents (child);
    366 
    367   for (arc = child->cg.parents; arc; arc = arc->next_parent)
    368     {
    369       parent = arc->parent;
    370       if (child == parent || (child->cg.cyc.num != 0
    371 			      && parent->cg.cyc.num == child->cg.cyc.num))
    372 	{
    373 	  /* Selfcall or call among siblings.  */
    374 	  printf (bsd_style_output
    375 		  ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s     "
    376 		  : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s     ",
    377 		  "", "", "", "",
    378 		  arc->count, "");
    379 	  print_name (parent);
    380 	  printf ("\n");
    381 	}
    382       else
    383 	{
    384 	  /* Regular parent of child.  */
    385 	  printf (bsd_style_output
    386 		  ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu     "
    387 		  : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu     ",
    388 		  "", "",
    389 		  arc->time / hz, arc->child_time / hz,
    390 		  arc->count, cycle_head->ncalls);
    391 	  print_name (parent);
    392 	  printf ("\n");
    393 	}
    394     }
    395 }
    396 
    397 
    398 static void
    399 sort_children (Sym *parent)
    400 {
    401   Arc *arc, *detached, sorted, *prev;
    402 
    403   /* Unlink children from parent, then insertion sort back on to
    404      sorted's children.
    405 	  *arc        the arc you have detached and are inserting.
    406 	  *detached   the rest of the arcs to be sorted.
    407 	  sorted      arc list onto which you insertion sort.
    408 	  *prev       arc before the arc you are comparing.  */
    409   sorted.next_child = 0;
    410 
    411   for (arc = parent->cg.children; arc; arc = detached)
    412     {
    413       detached = arc->next_child;
    414 
    415       /* Consider *arc as disconnected; insert it into sorted.  */
    416       for (prev = &sorted; prev->next_child; prev = prev->next_child)
    417 	{
    418 	  if (cmp_arc (arc, prev->next_child) != LESSTHAN)
    419 	    break;
    420 	}
    421 
    422       arc->next_child = prev->next_child;
    423       prev->next_child = arc;
    424     }
    425 
    426   /* Reattach sorted children to parent.  */
    427   parent->cg.children = sorted.next_child;
    428 }
    429 
    430 
    431 static void
    432 print_children (Sym *parent)
    433 {
    434   Sym *child;
    435   Arc *arc;
    436 
    437   sort_children (parent);
    438   arc = parent->cg.children;
    439 
    440   for (arc = parent->cg.children; arc; arc = arc->next_child)
    441     {
    442       child = arc->child;
    443       if (child == parent || (child->cg.cyc.num != 0
    444 			      && child->cg.cyc.num == parent->cg.cyc.num))
    445 	{
    446 	  /* Self call or call to sibling.  */
    447 	  printf (bsd_style_output
    448 		  ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s     "
    449 		  : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s     ",
    450 		  "", "", "", "", arc->count, "");
    451 	  print_name (child);
    452 	  printf ("\n");
    453 	}
    454       else
    455 	{
    456 	  /* Regular child of parent.  */
    457 	  printf (bsd_style_output
    458 		  ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu     "
    459 		  : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu     ",
    460 		  "", "",
    461 		  arc->time / hz, arc->child_time / hz,
    462 		  arc->count, child->cg.cyc.head->ncalls);
    463 	  print_name (child);
    464 	  printf ("\n");
    465 	}
    466     }
    467 }
    468 
    469 
    470 static void
    471 print_line (Sym *np)
    472 {
    473   char buf[BUFSIZ];
    474 
    475   sprintf (buf, "[%d]", np->cg.index);
    476   printf (bsd_style_output
    477 	  ? "%-6.6s %5.1f %7.2f %11.2f"
    478 	  : "%-6.6s %5.1f %7.2f %7.2f", buf,
    479 	  100 * (np->cg.prop.self + np->cg.prop.child) / print_time,
    480 	  np->cg.prop.self / hz, np->cg.prop.child / hz);
    481 
    482   if ((np->ncalls + np->cg.self_calls) != 0)
    483     {
    484       printf (" %7lu", np->ncalls);
    485 
    486       if (np->cg.self_calls != 0)
    487 	  printf ("+%-7lu ", np->cg.self_calls);
    488       else
    489 	  printf (" %7.7s ", "");
    490     }
    491   else
    492     {
    493       printf (" %7.7s %7.7s ", "", "");
    494     }
    495 
    496   print_name (np);
    497   printf ("\n");
    498 }
    499 
    500 
    501 /* Print dynamic call graph.  */
    502 
    503 void
    504 cg_print (Sym ** timesortsym)
    505 {
    506   unsigned int sym_index;
    507   Sym *parent;
    508 
    509   if (print_descriptions && bsd_style_output)
    510     bsd_callg_blurb (stdout);
    511 
    512   print_header ();
    513 
    514   for (sym_index = 0; sym_index < symtab.len + num_cycles; ++sym_index)
    515     {
    516       parent = timesortsym[sym_index];
    517 
    518       if ((ignore_zeros && parent->ncalls == 0
    519 	   && parent->cg.self_calls == 0 && parent->cg.prop.self == 0
    520 	   && parent->cg.prop.child == 0)
    521 	  || !parent->cg.print_flag
    522 	  || (line_granularity && ! parent->is_func))
    523 	continue;
    524 
    525       if (!parent->name && parent->cg.cyc.num != 0)
    526 	{
    527 	  /* Cycle header.  */
    528 	  print_cycle (parent);
    529 	  print_members (parent);
    530 	}
    531       else
    532 	{
    533 	  print_parents (parent);
    534 	  print_line (parent);
    535 	  print_children (parent);
    536 	}
    537 
    538       if (bsd_style_output)
    539 	printf ("\n");
    540 
    541       printf ("-----------------------------------------------\n");
    542 
    543       if (bsd_style_output)
    544 	printf ("\n");
    545     }
    546 
    547   free (timesortsym);
    548 
    549   if (print_descriptions && !bsd_style_output)
    550     fsf_callg_blurb (stdout);
    551 }
    552 
    553 
    554 static int
    555 cmp_name (const PTR left, const PTR right)
    556 {
    557   const Sym **npp1 = (const Sym **) left;
    558   const Sym **npp2 = (const Sym **) right;
    559 
    560   return strcmp ((*npp1)->name, (*npp2)->name);
    561 }
    562 
    563 
    564 void
    565 cg_print_index ()
    566 {
    567   unsigned int sym_index;
    568   unsigned int nnames, todo, i, j;
    569   int col, starting_col;
    570   Sym **name_sorted_syms, *sym;
    571   const char *filename;
    572   char buf[20];
    573   int column_width = (output_width - 1) / 3;	/* Don't write in last col!  */
    574 
    575   /* Now, sort regular function name
    576      alphabetically to create an index.  */
    577   name_sorted_syms = (Sym **) xmalloc ((symtab.len + num_cycles) * sizeof (Sym *));
    578 
    579   for (sym_index = 0, nnames = 0; sym_index < symtab.len; sym_index++)
    580     {
    581       if (ignore_zeros && symtab.base[sym_index].ncalls == 0
    582 	  && symtab.base[sym_index].hist.time == 0)
    583 	continue;
    584 
    585       name_sorted_syms[nnames++] = &symtab.base[sym_index];
    586     }
    587 
    588   qsort (name_sorted_syms, nnames, sizeof (Sym *), cmp_name);
    589 
    590   for (sym_index = 1, todo = nnames; sym_index <= num_cycles; sym_index++)
    591     name_sorted_syms[todo++] = &cycle_header[sym_index];
    592 
    593   printf ("\f\n");
    594   printf (_("Index by function name\n\n"));
    595   sym_index = (todo + 2) / 3;
    596 
    597   for (i = 0; i < sym_index; i++)
    598     {
    599       col = 0;
    600       starting_col = 0;
    601 
    602       for (j = i; j < todo; j += sym_index)
    603 	{
    604 	  sym = name_sorted_syms[j];
    605 
    606 	  if (sym->cg.print_flag)
    607 	    sprintf (buf, "[%d]", sym->cg.index);
    608 	  else
    609 	    sprintf (buf, "(%d)", sym->cg.index);
    610 
    611 	  if (j < nnames)
    612 	    {
    613 	      if (bsd_style_output)
    614 		{
    615 		  printf ("%6.6s %-19.19s", buf, sym->name);
    616 		}
    617 	      else
    618 		{
    619 		  col += strlen (buf);
    620 
    621 		  for (; col < starting_col + 5; ++col)
    622 		    putchar (' ');
    623 
    624 		  printf (" %s ", buf);
    625 		  col += print_name_only (sym);
    626 
    627 		  if (!line_granularity && sym->is_static && sym->file)
    628 		    {
    629 		      filename = sym->file->name;
    630 
    631 		      if (!print_path)
    632 			{
    633 			  filename = strrchr (filename, '/');
    634 
    635 			  if (filename)
    636 			    ++filename;
    637 			  else
    638 			    filename = sym->file->name;
    639 			}
    640 
    641 		      printf (" (%s)", filename);
    642 		      col += strlen (filename) + 3;
    643 		    }
    644 		}
    645 	    }
    646 	  else
    647 	    {
    648 	      if (bsd_style_output)
    649 		{
    650 		  printf ("%6.6s ", buf);
    651 		  sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
    652 		  printf ("%-19.19s", buf);
    653 		}
    654 	      else
    655 		{
    656 		  col += strlen (buf);
    657 		  for (; col < starting_col + 5; ++col)
    658 		    putchar (' ');
    659 		  printf (" %s ", buf);
    660 		  sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
    661 		  printf ("%s", buf);
    662 		  col += strlen (buf);
    663 		}
    664 	    }
    665 
    666 	  starting_col += column_width;
    667 	}
    668 
    669       printf ("\n");
    670     }
    671 
    672   free (name_sorted_syms);
    673 }
    674 
    675 /* Compare two arcs based on their usage counts.
    676    We want to sort in descending order.  */
    677 
    678 static int
    679 cmp_arc_count (const PTR left, const PTR right)
    680 {
    681   const Arc **npp1 = (const Arc **) left;
    682   const Arc **npp2 = (const Arc **) right;
    683 
    684   if ((*npp1)->count > (*npp2)->count)
    685     return -1;
    686   else if ((*npp1)->count < (*npp2)->count)
    687     return 1;
    688   else
    689     return 0;
    690 }
    691 
    692 /* Compare two funtions based on their usage counts.
    693    We want to sort in descending order.  */
    694 
    695 static int
    696 cmp_fun_nuses (const PTR left, const PTR right)
    697 {
    698   const Sym **npp1 = (const Sym **) left;
    699   const Sym **npp2 = (const Sym **) right;
    700 
    701   if ((*npp1)->nuses > (*npp2)->nuses)
    702     return -1;
    703   else if ((*npp1)->nuses < (*npp2)->nuses)
    704     return 1;
    705   else
    706     return 0;
    707 }
    708 
    709 /* Print a suggested function ordering based on the profiling data.
    710 
    711    We perform 4 major steps when ordering functions:
    712 
    713 	* Group unused functions together and place them at the
    714 	end of the function order.
    715 
    716 	* Search the highest use arcs (those which account for 90% of
    717 	the total arc count) for functions which have several parents.
    718 
    719 	Group those with the most call sites together (currently the
    720 	top 1.25% which have at least five different call sites).
    721 
    722 	These are emitted at the start of the function order.
    723 
    724 	* Use a greedy placement algorithm to place functions which
    725 	occur in the top 99% of the arcs in the profile.  Some provisions
    726 	are made to handle high usage arcs where the parent and/or
    727 	child has already been placed.
    728 
    729 	* Run the same greedy placement algorithm on the remaining
    730 	arcs to place the leftover functions.
    731 
    732 
    733    The various "magic numbers" should (one day) be tuneable by command
    734    line options.  They were arrived at by benchmarking a few applications
    735    with various values to see which values produced better overall function
    736    orderings.
    737 
    738    Of course, profiling errors, machine limitations (PA long calls), and
    739    poor cutoff values for the placement algorithm may limit the usefullness
    740    of the resulting function order.  Improvements would be greatly appreciated.
    741 
    742    Suggestions:
    743 
    744 	* Place the functions with many callers near the middle of the
    745 	list to reduce long calls.
    746 
    747 	* Propagate arc usage changes as functions are placed.  Ie if
    748 	func1 and func2 are placed together, arcs to/from those arcs
    749 	to the same parent/child should be combined, then resort the
    750 	arcs to choose the next one.
    751 
    752 	* Implement some global positioning algorithm to place the
    753 	chains made by the greedy local positioning algorithm.  Probably
    754 	by examining arcs which haven't been placed yet to tie two
    755 	chains together.
    756 
    757 	* Take a function's size and time into account in the algorithm;
    758 	size in particular is important on the PA (long calls).  Placing
    759 	many small functions onto their own page may be wise.
    760 
    761 	* Use better profiling information; many published algorithms
    762 	are based on call sequences through time, rather than just
    763 	arc counts.
    764 
    765 	* Prodecure cloning could improve performance when a small number
    766 	of arcs account for most of the calls to a particular function.
    767 
    768 	* Use relocation information to avoid moving unused functions
    769 	completely out of the code stream; this would avoid severe lossage
    770 	when the profile data bears little resemblance to actual runs.
    771 
    772 	* Propagation of arc usages should also improve .o link line
    773 	ordering which shares the same arc placement algorithm with
    774 	the function ordering code (in fact it is a degenerate case
    775 	of function ordering).  */
    776 
    777 void
    778 cg_print_function_ordering (void)
    779 {
    780   unsigned long sym_index;
    781   unsigned long arc_index;
    782   unsigned long used, unused, scratch_index;
    783   unsigned long  unplaced_arc_count, high_arc_count, scratch_arc_count;
    784 #ifdef __GNUC__
    785   unsigned long long total_arcs, tmp_arcs_count;
    786 #else
    787   unsigned long total_arcs, tmp_arcs_count;
    788 #endif
    789   Sym **unused_syms, **used_syms, **scratch_syms;
    790   Arc **unplaced_arcs, **high_arcs, **scratch_arcs;
    791 
    792   sym_index = 0;
    793   used = 0;
    794   unused = 0;
    795   scratch_index = 0;
    796   unplaced_arc_count = 0;
    797   high_arc_count = 0;
    798   scratch_arc_count = 0;
    799 
    800   /* First group all the unused functions together.  */
    801   unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
    802   used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
    803   scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
    804   high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
    805   scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
    806   unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
    807 
    808   /* Walk through all the functions; mark those which are never
    809      called as placed (we'll emit them as a group later).  */
    810   for (sym_index = 0, used = 0, unused = 0; sym_index < symtab.len; sym_index++)
    811     {
    812       if (symtab.base[sym_index].ncalls == 0)
    813 	{
    814 	  unused_syms[unused++] = &symtab.base[sym_index];
    815 	  symtab.base[sym_index].has_been_placed = 1;
    816 	}
    817       else
    818 	{
    819 	  used_syms[used++] = &symtab.base[sym_index];
    820 	  symtab.base[sym_index].has_been_placed = 0;
    821 	  symtab.base[sym_index].next = 0;
    822 	  symtab.base[sym_index].prev = 0;
    823 	  symtab.base[sym_index].nuses = 0;
    824 	}
    825     }
    826 
    827   /* Sort the arcs from most used to least used.  */
    828   qsort (arcs, numarcs, sizeof (Arc *), cmp_arc_count);
    829 
    830   /* Compute the total arc count.  Also mark arcs as unplaced.
    831 
    832      Note we don't compensate for overflow if that happens!
    833      Overflow is much less likely when this file is compiled
    834      with GCC as it can double-wide integers via long long.  */
    835   total_arcs = 0;
    836   for (arc_index = 0; arc_index < numarcs; arc_index++)
    837     {
    838       total_arcs += arcs[arc_index]->count;
    839       arcs[arc_index]->has_been_placed = 0;
    840     }
    841 
    842   /* We want to pull out those functions which are referenced
    843      by many highly used arcs and emit them as a group.  This
    844      could probably use some tuning.  */
    845   tmp_arcs_count = 0;
    846   for (arc_index = 0; arc_index < numarcs; arc_index++)
    847     {
    848       tmp_arcs_count += arcs[arc_index]->count;
    849 
    850       /* Count how many times each parent and child are used up
    851 	 to our threshhold of arcs (90%).  */
    852       if ((double)tmp_arcs_count / (double)total_arcs > 0.90)
    853 	break;
    854 
    855       arcs[arc_index]->child->nuses++;
    856     }
    857 
    858   /* Now sort a temporary symbol table based on the number of
    859      times each function was used in the highest used arcs.  */
    860   memcpy (scratch_syms, used_syms, used * sizeof (Sym *));
    861   qsort (scratch_syms, used, sizeof (Sym *), cmp_fun_nuses);
    862 
    863   /* Now pick out those symbols we're going to emit as
    864      a group.  We take up to 1.25% of the used symbols.  */
    865   for (sym_index = 0; sym_index < used / 80; sym_index++)
    866     {
    867       Sym *sym = scratch_syms[sym_index];
    868       Arc *arc;
    869 
    870       /* If we hit symbols that aren't used from many call sites,
    871 	 then we can quit.  We choose five as the low limit for
    872 	 no particular reason.  */
    873       if (sym->nuses == 5)
    874 	break;
    875 
    876       /* We're going to need the arcs between these functions.
    877 	 Unfortunately, we don't know all these functions
    878 	 until we're done.  So we keep track of all the arcs
    879 	 to the functions we care about, then prune out those
    880 	 which are uninteresting.
    881 
    882 	 An interesting variation would be to quit when we found
    883 	 multi-call site functions which account for some percentage
    884 	 of the arcs.  */
    885       arc = sym->cg.children;
    886 
    887       while (arc)
    888 	{
    889 	  if (arc->parent != arc->child)
    890 	    scratch_arcs[scratch_arc_count++] = arc;
    891 	  arc->has_been_placed = 1;
    892 	  arc = arc->next_child;
    893 	}
    894 
    895       arc = sym->cg.parents;
    896 
    897       while (arc)
    898 	{
    899 	  if (arc->parent != arc->child)
    900 	    scratch_arcs[scratch_arc_count++] = arc;
    901 	  arc->has_been_placed = 1;
    902 	  arc = arc->next_parent;
    903 	}
    904 
    905       /* Keep track of how many symbols we're going to place.  */
    906       scratch_index = sym_index;
    907 
    908       /* A lie, but it makes identifying
    909 	 these functions easier later.  */
    910       sym->has_been_placed = 1;
    911     }
    912 
    913   /* Now walk through the temporary arcs and copy
    914      those we care about into the high arcs array.  */
    915   for (arc_index = 0; arc_index < scratch_arc_count; arc_index++)
    916     {
    917       Arc *arc = scratch_arcs[arc_index];
    918 
    919       /* If this arc refers to highly used functions, then
    920 	 then we want to keep it.  */
    921       if (arc->child->has_been_placed
    922 	  && arc->parent->has_been_placed)
    923 	{
    924 	  high_arcs[high_arc_count++] = scratch_arcs[arc_index];
    925 
    926 	  /* We need to turn of has_been_placed since we're going to
    927 	     use the main arc placement algorithm on these arcs.  */
    928 	  arc->child->has_been_placed = 0;
    929 	  arc->parent->has_been_placed = 0;
    930 	}
    931     }
    932 
    933   /* Dump the multi-site high usage functions which are not
    934      going to be ordered by the main ordering algorithm.  */
    935   for (sym_index = 0; sym_index < scratch_index; sym_index++)
    936     {
    937       if (scratch_syms[sym_index]->has_been_placed)
    938 	printf ("%s\n", scratch_syms[sym_index]->name);
    939     }
    940 
    941   /* Now we can order the multi-site high use
    942      functions based on the arcs between them.  */
    943   qsort (high_arcs, high_arc_count, sizeof (Arc *), cmp_arc_count);
    944   order_and_dump_functions_by_arcs (high_arcs, high_arc_count, 1,
    945 				    unplaced_arcs, &unplaced_arc_count);
    946 
    947   /* Order and dump the high use functions left,
    948      these typically have only a few call sites.  */
    949   order_and_dump_functions_by_arcs (arcs, numarcs, 0,
    950 				    unplaced_arcs, &unplaced_arc_count);
    951 
    952   /* Now place the rarely used functions.  */
    953   order_and_dump_functions_by_arcs (unplaced_arcs, unplaced_arc_count, 1,
    954 				    scratch_arcs, &scratch_arc_count);
    955 
    956   /* Output any functions not emitted by the order_and_dump calls.  */
    957   for (sym_index = 0; sym_index < used; sym_index++)
    958     if (used_syms[sym_index]->has_been_placed == 0)
    959       printf("%s\n", used_syms[sym_index]->name);
    960 
    961   /* Output the unused functions.  */
    962   for (sym_index = 0; sym_index < unused; sym_index++)
    963     printf("%s\n", unused_syms[sym_index]->name);
    964 
    965   unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
    966   used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
    967   scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
    968   high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
    969   scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
    970   unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
    971 
    972   free (unused_syms);
    973   free (used_syms);
    974   free (scratch_syms);
    975   free (high_arcs);
    976   free (scratch_arcs);
    977   free (unplaced_arcs);
    978 }
    979 
    980 /* Place functions based on the arcs in THE_ARCS with ARC_COUNT entries;
    981    place unused arcs into UNPLACED_ARCS/UNPLACED_ARC_COUNT.
    982 
    983    If ALL is nonzero, then place all functions referenced by THE_ARCS,
    984    else only place those referenced in the top 99% of the arcs in THE_ARCS.  */
    985 
    986 #define MOST 0.99
    987 static void
    988 order_and_dump_functions_by_arcs (the_arcs, arc_count, all,
    989 				  unplaced_arcs, unplaced_arc_count)
    990      Arc **the_arcs;
    991      unsigned long arc_count;
    992      int all;
    993      Arc **unplaced_arcs;
    994      unsigned long *unplaced_arc_count;
    995 {
    996 #ifdef __GNUC__
    997   unsigned long long tmp_arcs, total_arcs;
    998 #else
    999   unsigned long tmp_arcs, total_arcs;
   1000 #endif
   1001   unsigned int arc_index;
   1002 
   1003   /* If needed, compute the total arc count.
   1004 
   1005      Note we don't compensate for overflow if that happens!  */
   1006   if (! all)
   1007     {
   1008       total_arcs = 0;
   1009       for (arc_index = 0; arc_index < arc_count; arc_index++)
   1010 	total_arcs += the_arcs[arc_index]->count;
   1011     }
   1012   else
   1013     total_arcs = 0;
   1014 
   1015   tmp_arcs = 0;
   1016 
   1017   for (arc_index = 0; arc_index < arc_count; arc_index++)
   1018     {
   1019       Sym *sym1, *sym2;
   1020       Sym *child, *parent;
   1021 
   1022       tmp_arcs += the_arcs[arc_index]->count;
   1023 
   1024       /* Ignore this arc if it's already been placed.  */
   1025       if (the_arcs[arc_index]->has_been_placed)
   1026 	continue;
   1027 
   1028       child = the_arcs[arc_index]->child;
   1029       parent = the_arcs[arc_index]->parent;
   1030 
   1031       /* If we're not using all arcs, and this is a rarely used
   1032 	 arc, then put it on the unplaced_arc list.  Similarly
   1033 	 if both the parent and child of this arc have been placed.  */
   1034       if ((! all && (double)tmp_arcs / (double)total_arcs > MOST)
   1035 	  || child->has_been_placed || parent->has_been_placed)
   1036 	{
   1037 	  unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
   1038 	  continue;
   1039 	}
   1040 
   1041       /* If all slots in the parent and child are full, then there isn't
   1042 	 anything we can do right now.  We'll place this arc on the
   1043 	 unplaced arc list in the hope that a global positioning
   1044 	 algorithm can use it to place function chains.  */
   1045       if (parent->next && parent->prev && child->next && child->prev)
   1046 	{
   1047 	  unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
   1048 	  continue;
   1049 	}
   1050 
   1051       /* If the parent is unattached, then find the closest
   1052 	 place to attach it onto child's chain.   Similarly
   1053 	 for the opposite case.  */
   1054       if (!parent->next && !parent->prev)
   1055 	{
   1056 	  int next_count = 0;
   1057 	  int prev_count = 0;
   1058 	  Sym *prev = child;
   1059 	  Sym *next = child;
   1060 
   1061 	  /* Walk to the beginning and end of the child's chain.  */
   1062 	  while (next->next)
   1063 	    {
   1064 	      next = next->next;
   1065 	      next_count++;
   1066 	    }
   1067 
   1068 	  while (prev->prev)
   1069 	    {
   1070 	      prev = prev->prev;
   1071 	      prev_count++;
   1072 	    }
   1073 
   1074 	  /* Choose the closest.  */
   1075 	  child = next_count < prev_count ? next : prev;
   1076 	}
   1077       else if (! child->next && !child->prev)
   1078 	{
   1079 	  int next_count = 0;
   1080 	  int prev_count = 0;
   1081 	  Sym *prev = parent;
   1082 	  Sym *next = parent;
   1083 
   1084 	  while (next->next)
   1085 	    {
   1086 	      next = next->next;
   1087 	      next_count++;
   1088 	    }
   1089 
   1090 	  while (prev->prev)
   1091 	    {
   1092 	      prev = prev->prev;
   1093 	      prev_count++;
   1094 	    }
   1095 
   1096 	  parent = prev_count < next_count ? prev : next;
   1097 	}
   1098       else
   1099 	{
   1100 	  /* Couldn't find anywhere to attach the functions,
   1101 	     put the arc on the unplaced arc list.  */
   1102 	  unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
   1103 	  continue;
   1104 	}
   1105 
   1106       /* Make sure we don't tie two ends together.  */
   1107       sym1 = parent;
   1108       if (sym1->next)
   1109 	while (sym1->next)
   1110 	  sym1 = sym1->next;
   1111       else
   1112 	while (sym1->prev)
   1113 	  sym1 = sym1->prev;
   1114 
   1115       sym2 = child;
   1116       if (sym2->next)
   1117 	while (sym2->next)
   1118 	  sym2 = sym2->next;
   1119       else
   1120 	while (sym2->prev)
   1121 	  sym2 = sym2->prev;
   1122 
   1123       if (sym1 == child
   1124 	  && sym2 == parent)
   1125 	{
   1126 	  /* This would tie two ends together.  */
   1127 	  unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
   1128 	  continue;
   1129 	}
   1130 
   1131       if (parent->next)
   1132 	{
   1133 	  /* Must attach to the parent's prev field.  */
   1134 	  if (! child->next)
   1135 	    {
   1136 	      /* parent-prev and child-next */
   1137 	      parent->prev = child;
   1138 	      child->next = parent;
   1139 	      the_arcs[arc_index]->has_been_placed = 1;
   1140 	    }
   1141 	}
   1142       else if (parent->prev)
   1143 	{
   1144 	  /* Must attach to the parent's next field.  */
   1145 	  if (! child->prev)
   1146 	    {
   1147 	      /* parent-next and child-prev */
   1148 	      parent->next = child;
   1149 	      child->prev = parent;
   1150 	      the_arcs[arc_index]->has_been_placed = 1;
   1151 	    }
   1152 	}
   1153       else
   1154 	{
   1155 	  /* Can attach to either field in the parent, depends
   1156 	     on where we've got space in the child.  */
   1157 	  if (child->prev)
   1158 	    {
   1159 	      /* parent-prev and child-next.  */
   1160 	      parent->prev = child;
   1161 	      child->next = parent;
   1162 	      the_arcs[arc_index]->has_been_placed = 1;
   1163 	    }
   1164 	  else
   1165 	    {
   1166 	      /* parent-next and child-prev.  */
   1167 	      parent->next = child;
   1168 	      child->prev = parent;
   1169 	      the_arcs[arc_index]->has_been_placed = 1;
   1170 	    }
   1171 	}
   1172     }
   1173 
   1174   /* Dump the chains of functions we've made.  */
   1175   for (arc_index = 0; arc_index < arc_count; arc_index++)
   1176     {
   1177       Sym *sym;
   1178       if (the_arcs[arc_index]->parent->has_been_placed
   1179 	  || the_arcs[arc_index]->child->has_been_placed)
   1180 	continue;
   1181 
   1182       sym = the_arcs[arc_index]->parent;
   1183 
   1184       /* If this symbol isn't attached to any other
   1185 	 symbols, then we've got a rarely used arc.
   1186 
   1187 	 Skip it for now, we'll deal with them later.  */
   1188       if (sym->next == NULL
   1189 	  && sym->prev == NULL)
   1190 	continue;
   1191 
   1192       /* Get to the start of this chain.  */
   1193       while (sym->prev)
   1194 	sym = sym->prev;
   1195 
   1196       while (sym)
   1197 	{
   1198 	  /* Mark it as placed.  */
   1199 	  sym->has_been_placed = 1;
   1200 	  printf ("%s\n", sym->name);
   1201 	  sym = sym->next;
   1202 	}
   1203     }
   1204 
   1205   /* If we want to place all the arcs, then output
   1206      those which weren't placed by the main algorithm.  */
   1207   if (all)
   1208     for (arc_index = 0; arc_index < arc_count; arc_index++)
   1209       {
   1210 	Sym *sym;
   1211 	if (the_arcs[arc_index]->parent->has_been_placed
   1212 	    || the_arcs[arc_index]->child->has_been_placed)
   1213 	  continue;
   1214 
   1215 	sym = the_arcs[arc_index]->parent;
   1216 
   1217 	sym->has_been_placed = 1;
   1218 	printf ("%s\n", sym->name);
   1219       }
   1220 }
   1221 
   1222 /* Compare two function_map structs based on file name.
   1223    We want to sort in ascending order.  */
   1224 
   1225 static int
   1226 cmp_symbol_map (const void * l, const void * r)
   1227 {
   1228   return filename_cmp (((struct function_map *) l)->file_name,
   1229 		       ((struct function_map *) r)->file_name);
   1230 }
   1231 
   1232 /* Print a suggested .o ordering for files on a link line based
   1233    on profiling information.  This uses the function placement
   1234    code for the bulk of its work.  */
   1235 
   1236 void
   1237 cg_print_file_ordering (void)
   1238 {
   1239   unsigned long scratch_arc_count;
   1240   unsigned long arc_index;
   1241   unsigned long sym_index;
   1242   Arc **scratch_arcs;
   1243   char *last;
   1244 
   1245   scratch_arc_count = 0;
   1246 
   1247   scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
   1248   for (arc_index = 0; arc_index < numarcs; arc_index++)
   1249     {
   1250       if (! arcs[arc_index]->parent->mapped
   1251 	  || ! arcs[arc_index]->child->mapped)
   1252 	arcs[arc_index]->has_been_placed = 1;
   1253     }
   1254 
   1255   order_and_dump_functions_by_arcs (arcs, numarcs, 0,
   1256 				    scratch_arcs, &scratch_arc_count);
   1257 
   1258   /* Output .o's not handled by the main placement algorithm.  */
   1259   for (sym_index = 0; sym_index < symtab.len; sym_index++)
   1260     {
   1261       if (symtab.base[sym_index].mapped
   1262 	  && ! symtab.base[sym_index].has_been_placed)
   1263 	printf ("%s\n", symtab.base[sym_index].name);
   1264     }
   1265 
   1266   qsort (symbol_map, symbol_map_count, sizeof (struct function_map), cmp_symbol_map);
   1267 
   1268   /* Now output any .o's that didn't have any text symbols.  */
   1269   last = NULL;
   1270   for (sym_index = 0; sym_index < symbol_map_count; sym_index++)
   1271     {
   1272       unsigned int index2;
   1273 
   1274       /* Don't bother searching if this symbol
   1275 	 is the same as the previous one.  */
   1276       if (last && !filename_cmp (last, symbol_map[sym_index].file_name))
   1277 	continue;
   1278 
   1279       for (index2 = 0; index2 < symtab.len; index2++)
   1280 	{
   1281 	  if (! symtab.base[index2].mapped)
   1282 	    continue;
   1283 
   1284 	  if (!filename_cmp (symtab.base[index2].name,
   1285 			     symbol_map[sym_index].file_name))
   1286 	    break;
   1287 	}
   1288 
   1289       /* If we didn't find it in the symbol table, then it must
   1290 	 be a .o with no text symbols.  Output it last.  */
   1291       if (index2 == symtab.len)
   1292 	printf ("%s\n", symbol_map[sym_index].file_name);
   1293       last = symbol_map[sym_index].file_name;
   1294     }
   1295 }
   1296