Home | History | Annotate | Download | only in auxprogs
      1 #! /usr/bin/perl
      2 #
      3 # Generate Valgrind's module dependence graph in 'dot' format.
      4 #
      5 # You can run it from anywhere(?) in a Valgrind source tree, but the two
      6 # most interesting places are:
      7 # - the root, to get the entire module dependence graph.
      8 # - coregrind/, to get the core's module dependence graph.
      9 #
     10 # It sends a dot-format graph to stdout.  You can use dot (part of the
     11 # "GraphViz" package) to generated a PostScript graphs like this:
     12 #
     13 #   dot -Tps foo.dot -o foo.ps
     14 #
     15 # Caveats:
     16 # - It's not a proper parser.  If you have a #include that is commented out,
     17 #   it will think it's there.  We see that particularly in m_demangle.
     18 # - It only looks in C files, not in header files, so it will miss any
     19 #   extra dependencies this causes.  Fortunately we don't have many like
     20 #   that.
     21 
     22 use warnings;
     23 use strict;
     24 
     25 #----------------------------------------------------------------------------
     26 # Global variables
     27 #----------------------------------------------------------------------------
     28 
     29 # The dependence graph is a set of src-->dst pairs, stored in a double-hash:
     30 #
     31 #   hash(src, hash(dst, dst_realname))
     32 #
     33 # Each of 'src' and 'dst' are node names.  'src' is always a module name,
     34 # eg. 'm_main' or 'memcheck'.  The destination is sometimes a module name,
     35 # and sometimes a header file.  Because Dot can't handle certain characters
     36 # in its node names, when 'dst' represents a header file it's a transformed
     37 # version of the header file name, eg. 'INT_memcheck_h' or 'EXT_sys_mman_h'.
     38 # The 'dst_realname' holds the original name, eg.  '"memcheck.h"' or
     39 # "<sys/mman.h>".  We use 'dst' for the node name in the graph, but label it
     40 # with 'dst_realname' and that's what gets seen.  (Although we use "" for
     41 # 'dst_realname' when it would be the same as 'dst'.)
     42 my $deps = {};
     43 
     44 # Directories to skip.  These are the defaults, they can be augmented
     45 # using command-line options.
     46 my %dirs_to_skip = ( auxprogs => 1, hp2ps => 1, tests => 1 );
     47 
     48 # Command-line variables -- things we should show.  Default is yes.
     49 my $show_tools   = 1;
     50 my $show_headers = 1;
     51 my $show_libc    = 1;
     52 
     53 # Modules to hide.
     54 my %hide;
     55 
     56 # List of all tools.
     57 my @tools = ( "cachegrind", "helgrind",
     58               "lackey", "massif", "memcheck", "none" );
     59 
     60 my $usage = <<END
     61 usage: gen-mdg [options]
     62 
     63   options:
     64     --headers=no|yes    show headers, ie. show module-to-module deps only
     65     --hide=<a>,<b>,...  hide module(s) named <a>, <b>, ...
     66 END
     67 ;
     68 
     69 #----------------------------------------------------------------------------
     70 # Subroutines
     71 #----------------------------------------------------------------------------
     72 
     73 sub process_cmd_line()
     74 {
     75    for my $arg (@ARGV) { 
     76 
     77         # --headers=yes|no
     78         if ($arg =~ /^--headers=(yes|no)$/) {
     79             $show_headers = 1 if ($1 eq "yes");
     80             $show_headers = 0 if ($1 eq "no");
     81 
     82         # --hide=<a>,<b>,...
     83         } elsif ($arg =~ /^--hide=(.*)$/) {
     84             my @hiders = split(/,/, $1);
     85             foreach my $h (@hiders) {
     86                 $hide{$h} = 1;
     87             }
     88 
     89         } else {
     90             die $usage;
     91         }
     92     }
     93 
     94     if (!$show_tools) {
     95         foreach my $tool (@tools) {
     96             $dirs_to_skip{$tool} = 1;
     97         }
     98     }
     99 }
    100 
    101 # Convert a header filename into a node name acceptable by dot.
    102 sub clean_nodename($)
    103 {
    104     my ($s) = @_;
    105     $s =~ s/"([^"]+)"/INT_$1/;  # "foo.h" --> foo.h
    106     $s =~ s/<([^>]+)>/EXT_$1/;  # <foo.h> --> foo.h
    107     $s =~ s/\./_/g;  # foo.h     --> foo_h
    108     $s =~ s/-/_/g;   # foo-bar.h --> foo_bar_h
    109     $s =~ s/\//_/g;  # bar/foo_h --> bar_foo_h
    110     return $s;
    111 }
    112 
    113 # Convert a header filename into a node label acceptable by dot.
    114 sub clean_nodelabel($)
    115 {
    116     my ($s) = @_;
    117     $s =~ s/"/\\"/g;    # "foo.h" --> \"foo.h\"
    118     return $s;
    119 }
    120 
    121 # $module is the module to which the C/asm file $f belongs.
    122 sub scan_C_or_asm_file($$)
    123 {
    124     my ($module, $f) = @_;
    125 
    126     # Skip if this is a module we want to hide
    127     if ($hide{$module}) {
    128         return;
    129     }
    130     
    131     # Get any existing dependencies for this module, initialise if none
    132     my $module_deps = $deps->{$module};
    133     if (not defined $module_deps) {
    134         $module_deps = {};
    135     }
    136     
    137     # Scan the C/asm file
    138     open(CFILE, "< $f") || die "File $f not openable\n";
    139     while (my $line = <CFILE>) {
    140         if ($line =~ /#include\s+(("|<)[^">]+("|>))/) {
    141             # Right!  We've found a #include line.
    142             my $include_string = $1;
    143             my $target;
    144             my $realname;
    145             if ($include_string =~ /"pub_(core|tool)_([A-Za-z]+).h"/) {
    146                 # If #include string is "pub_core_foo.h" or "pub_tool_foo.h", 
    147                 # the target module is "m_foo".
    148                 #
    149                 # Nb: assuming the "foo" part does not contains underscores!
    150                 $target   = "m_$2";
    151                 $realname = "";
    152 
    153                 # But don't show hidden modules
    154                 if ($hide{$target}) {
    155                     $target = "";
    156                 }
    157 
    158             } elsif ($show_headers) {
    159                 # Otherwise use the #include string as-is for the target.
    160                 # Note that "#include pub_core_foo_asm.h" falls into this
    161                 # category.  We don't consider that part of the m_foo module
    162                 # because the *_asm.h only define some constants.
    163                 $target   = clean_nodename($include_string);
    164                 $realname = clean_nodelabel($include_string);
    165 
    166             } else {
    167                 # Don't record anything
    168                 $target   = "";
    169                 $realname = "";
    170             }
    171 
    172             # Maybe record dependency (unless it's circular)
    173             if ($target ne "" and $target ne $module) {
    174                 $module_deps->{$target} = $realname;
    175             }
    176         }
    177     }
    178     close(CFILE);
    179 
    180     # Store the updated dependencies.
    181     $deps->{$module} = $module_deps;
    182 }
    183 
    184 sub process_dir($);      # forward declarations required because of recursion
    185 sub process_dir($)
    186 {
    187     my ($parentd) = @_;
    188 
    189     # Go through each file/dir in the directory.
    190     my @fs = <*>;
    191     foreach my $f (@fs) {
    192         if (-d $f) {
    193             # Directory -- recursively process unless we want to skip it.
    194             if (not exists $dirs_to_skip{$f}) {
    195                 chdir $f or die;
    196                 process_dir($f);
    197                 chdir ".." or die;
    198             }
    199 
    200         } elsif (-f $f) {
    201             if ($f =~ /\w+\.[cS]$/) {
    202                 # If this is a .c/.S file in coregrind/, it's a module in its
    203                 # own right, eg. coregrind/m_redir.c --> module name of
    204                 # "m_redir".
    205                 #
    206                 # Otherwise, it belongs to the module whose name is that of
    207                 # the parent directory, eg. coregrind/m_debuginfo/symtab.c
    208                 # --> module name of "m_debuginfo".
    209                 my $module;
    210                 if ($parentd eq "coregrind") {
    211                     $module = $f;
    212                     $module =~ s/(\w+).[cS]$/$1/;     # foo.c --> foo
    213                 } else {
    214                     $module = $parentd;
    215                 }
    216                 # Now the module/f pair is either:
    217                 #   -    like this:  (m_redir, m_redir.c)
    218                 #   - or like this:  (m_debuginfo, symtab.c)
    219                 scan_C_or_asm_file($module, $f);
    220             }
    221 
    222         } else {
    223             die "$f is not a dir nor a file\n";
    224         }
    225     }
    226 }
    227 
    228 sub print_graph()
    229 {
    230     my %printed_realnames;
    231 
    232     print("digraph G {\n");
    233     while (my ($src, $dst_hash) = each %$deps) {
    234         while (my ($dst, $dst_realname) = each %$dst_hash) {
    235 
    236             # If the dstnode has a realname, print just the dstnode with that
    237             # realname, and record it in %printed_realnames so we don't print
    238             # it again.
    239             if ($dst_realname ne "") {
    240                 if (not defined $printed_realnames{$dst}) {
    241                     print("  $dst [label=\"$dst_realname\"]\n");
    242                     $printed_realnames{$dst} = 1;
    243                 }
    244             }
    245             
    246             # Print the src-->dst edge.
    247             print("  $src -> $dst\n");
    248         }
    249     }
    250     print("}\n");
    251 }
    252 
    253 #----------------------------------------------------------------------------
    254 # main
    255 #----------------------------------------------------------------------------
    256 process_cmd_line();
    257 
    258 my $start_dir = `basename \`pwd\``;
    259 chop($start_dir);           # trim newline
    260 process_dir($start_dir);
    261 
    262 print_graph();
    263 
    264 
    265