Home | History | Annotate | Download | only in auxprogs
      1 #!/usr/bin/env perl 
      2 
      3 #-----------------------------------------------------------------
      4 # Quick and dirty script to summarize build information for a
      5 # set of nightly runs.
      6 #
      7 # The results of the nighly regression runs are extracted from 
      8 # the GMANE mail archive. The URL for a given mail sent to the
      9 # valgrind-developers mailing list is
     10 #
     11 #   http://article.gmane.org/gmane.comp.debugging.valgrind.devel/<integer>
     12 #
     13 # The script extracts information about the regression run from a
     14 # block of information at the beginning of the mail. That information 
     15 # was added beginning October 4, 2011. Therefore, only regression runs
     16 # from that date or later can be analyzed.
     17 #
     18 # There is unfortunately no good way of figuring out the interval
     19 # of integers in the above URL that include all nightly regression
     20 # runs.
     21 #
     22 # The function get_regtest_data does all the work. It returns a hash
     23 # whose keys are the dates at which nightly runs took place. The value
     24 # is in turn a hash.
     25 #
     26 # Each such hash has the following keys:
     27 #   "builds"                 array of hashes
     28 #   "num_builds"             int
     29 #   "num_failing_builds"     int
     30 #   "num_passing_builds"     int
     31 #   "num_testcase_failures"  int
     32 #   "num_failing_testcases"  int
     33 #   "failure_frequency"      hash indexed by testcase name; value = int
     34 # 
     35 # "builds" is an array of hashes with the following keys
     36 #   "arch"                   string (architecture)
     37 #   "distro"                 string (distribution, e.g. Fedora-15)
     38 #   "failures"               array of strings (failing testcases)
     39 #   "valgrind revision"      integer
     40 #   "VEX revision"           integer
     41 #   "GCC version"            string
     42 #   "C library"              string
     43 #   "uname -mrs"             string
     44 #   "Vendor version"         string
     45 # 
     46 #-----------------------------------------------------------------
     47 use strict;
     48 use warnings; 
     49 
     50 use LWP::Simple;
     51 use Getopt::Long;
     52 
     53 my $prog_name = "nightly-build-summary";
     54 
     55 my $debug = 0;
     56 my $keep  = 0;
     57 
     58 my $usage=<<EOF;
     59 USAGE
     60 
     61   $prog_name
     62 
     63      --from=INTEGER    beginning of mail interval; > 14800
     64 
     65     [--to=INTEGER]     end of mail interval; default = from + 100
     66 
     67     [--debug]          verbose mode (debugging)
     68 
     69     [--keep]           write individual emails to files (debugging)
     70 
     71     [--dump]           write results suitable for post-processing
     72 
     73     [--readable]       write results in human readable form (default)
     74 
     75 EOF
     76 
     77 
     78 #-----------------------------------------------------------------
     79 # Search for a line indicating that this is an email containing
     80 # the results of a valgrind regression run.
     81 # Return 1, if found and 0 oherwise.
     82 #-----------------------------------------------------------------
     83 sub is_regtest_result {
     84     my (@lines) = @_;
     85 
     86     foreach my $line (@lines) {
     87         return 1 if ($line =~ "^valgrind revision:");
     88     }
     89 
     90     return 0;
     91 }
     92 
     93 
     94 #-----------------------------------------------------------------
     95 # Extract information from the run. Don't prep the data here. This
     96 # is done later on.
     97 #-----------------------------------------------------------------
     98 sub get_raw_data {
     99     my (@lines, $msgno) = @_;
    100     my ($i, $n, $line, $date);
    101 
    102     $n = scalar @lines;
    103 
    104     my %hash = ();
    105 
    106 # 1) Locate the section with the info about the environment of this nightly run
    107     for ($i = 0; $i < $n; ++$i) {
    108         last if ($lines[$i] =~ /^valgrind revision:/);
    109     }
    110     die "no info block in message $msgno" if ($i == $n);
    111 
    112 # 2) Read the info about the build: compiler, valgrind revision etc.
    113 #    and put it into a hash.
    114     for ( ; $i < $n; ++$i) {
    115         $line = $lines[$i];
    116         last if ($line =~ /^$/);    # empty line indicates end of section
    117         my ($key, $value) = split(/:/, $line);
    118         $value =~ s/^[ ]*//;        # removing leading blanks
    119         $hash{$key} = $value;
    120     }
    121 
    122     if ($debug) {
    123         foreach my $key (keys %hash) {
    124             my ($val) = $hash{$key};
    125             print "regtest env: KEY = |$key|  VAL = |$val|\n";
    126         }
    127     }
    128 
    129 # 3) Get the date from when the build was kicked off.
    130     for ( ; $i < $n; ++$i) {
    131         $line = $lines[$i];
    132 
    133         if ($line =~ /^Started at[ ]+([^ ]+)/) {
    134             $date = $1;
    135             print "DATE = $date\n";
    136             last;
    137         }
    138     }
    139     die "no date found in message $msgno" if ($i == $n);
    140 
    141 
    142 # 4) Find out if the regression run failed or passed
    143     $hash{"failures"} = [];
    144     for ($i = $i + 1; $i < $n; ++$i) {
    145         $line = $lines[$i];
    146         if ($line =~ /Running regression tests/) {
    147             return %hash if ($line =~ /done$/);   # regtest succeeded; no failures
    148             die "cannot determine regtest outcome for message $msgno"
    149                 if (! ($line =~ /failed$/));
    150             last;
    151         }
    152     }
    153 
    154 # 5) Regtest failed; locate the section with the list of failing testcases
    155     for ($i = $i + 1; $i < $n; ++$i) {
    156         $line = $lines[$i];
    157 # Match for end-of-line == because line might be split.
    158         last if ($line =~ /==$/);
    159     }
    160     die "cannot locate failing testcases in message $msgno" if ($i == $n);
    161 
    162 # 6) Get list of failing testcases
    163     for ($i = $i + 1; $i < $n; ++$i) {
    164         $line = $lines[$i];
    165 
    166         last if ($line =~ /^$/);
    167 
    168         my ($testcase) = (split(/\s+/, $line))[0];
    169         print "ADD failing testcase $testcase\n" if ($debug);
    170         push @{$hash{"failures"}}, $testcase;
    171     }
    172 
    173     return ($date, %hash);
    174 }
    175 
    176 
    177 #-----------------------------------------------------------------
    178 # Extract architecture; get a pretty name for the distro
    179 #-----------------------------------------------------------------
    180 sub prep_regtest_data {
    181     my (%hash) = @_;
    182     my ($val, $arch, $distro);
    183 
    184     $val = $hash{"uname -mrs"};
    185     die "uname -mrs info is missing" if (! defined $val);
    186     $arch = (split(/ /, $val))[2];
    187 
    188     $val = $hash{"Vendor version"};
    189     die "Vendor version info is missing" if (! defined $val);
    190 
    191     if ($val =~ /Fedora release ([0-9]+)/) {
    192         $distro = "Fedora-$1";
    193     } elsif ($val =~ /openSUSE ([0-9]+)\.([0-9]+)/) {
    194         $distro = "openSUSE-$1.$2";
    195     } elsif ($val =~ /SUSE Linux Enterprise Server 11 SP1/) {
    196         $distro = "SLES-11-SP1";
    197     } elsif ($val =~ /Red Hat Enterprise Linux AS release 4/) {
    198         $distro = "RHEL-4";
    199     } else {
    200         $distro = "UNKNOWN";
    201     }
    202 
    203 # Add architecture and distribution to hash
    204     $hash{"arch"}   = $arch;
    205     $hash{"distro"} = $distro;
    206 
    207     return %hash;
    208 }
    209 
    210 
    211 #-----------------------------------------------------------------
    212 # Precompute some summary information and record it
    213 #-----------------------------------------------------------------
    214 sub precompute_summary_info
    215 {
    216     my (%dates) = @_;
    217 
    218     foreach my $date (sort keys %dates) {
    219         my %failure_frequency = ();
    220 
    221         my %nightly = %{ $dates{$date} };
    222         my @builds  = @{ $nightly{"builds"} };
    223 
    224         $nightly{"num_builds"} = scalar (@builds);
    225         $nightly{"num_failing_builds"} = 0;
    226         $nightly{"num_testcase_failures"} = 0;
    227 
    228         foreach my $build (@builds) {
    229             my %regtest_data   = %{ $build };
    230 
    231             my @failures = @{ $regtest_data{"failures"} };
    232             my $num_fail = scalar (@failures);
    233 
    234             ++$nightly{"num_failing_builds"} if ($num_fail != 0);
    235             $nightly{"num_testcase_failures"} += $num_fail;
    236 
    237 # Compute how often a testcase failed
    238             foreach my $test ( @failures ) {
    239                 if (defined $failure_frequency{$test}) {
    240                     ++$failure_frequency{$test};
    241                 } else {
    242                     $failure_frequency{$test} = 1;
    243                 }
    244             }
    245         }
    246 
    247         $nightly{"num_passing_builds"} = 
    248             $nightly{"num_builds"} - $nightly{"num_failing_builds"};
    249 
    250         $nightly{"num_failing_testcases"} = scalar (keys %failure_frequency);
    251 
    252         $nightly{"failure_frequency"} = { %failure_frequency };
    253 
    254         $dates{$date} = { %nightly };
    255     }
    256 
    257     return %dates;
    258 }
    259 
    260 
    261 #-----------------------------------------------------------------
    262 # Get messages from GMANE, and build up a database of results.
    263 #-----------------------------------------------------------------
    264 sub get_regtest_data {
    265     my ($from, $to) = @_;
    266 
    267     my $url_base = "http://article.gmane.org/gmane.comp.debugging.valgrind.devel/";
    268 
    269     my %dates = ();
    270 
    271     my $old_date = "-1";
    272     my @builds = ();
    273 
    274     for (my $i = $from; $i <= $to; ++$i) {
    275         my $url = "$url_base" . "$i";
    276 
    277 	my $page = get("$url");
    278 
    279         if ($keep) {
    280             open (EMAIL, ">$i");
    281             print EMAIL  $page;
    282             close(EMAIL);
    283         }
    284 
    285 # Detect if the article does not exist. Happens for too large --to= values 
    286         last if ($page eq "No such file.\n");
    287 
    288 # Split the page into lines
    289         my @lines = split(/\n/, $page);
    290 
    291 # Check whether it contains a regression test result
    292         next if (! is_regtest_result(@lines));
    293         print "message $i is a regression test result\n" if ($debug);
    294 
    295 # Get the raw data
    296         my ($date, %regtest_data) = get_raw_data(@lines);
    297 
    298         %regtest_data = prep_regtest_data(%regtest_data);
    299 
    300         if ($date ne $old_date) {
    301             my %nightly = ();
    302             $nightly{"builds"} = [ @builds ];
    303             $dates{$old_date} = { %nightly } if ($old_date ne "-1");
    304 
    305             $old_date = $date;
    306             @builds = ();
    307         }
    308 
    309         push @builds, { %regtest_data };
    310     }
    311     my %nightly = ();
    312     $nightly{"builds"} = [ @builds ];
    313     $dates{$old_date} = { %nightly } if ($old_date ne "-1");
    314 
    315 # Convenience: precompute some info we'll be interested in
    316     %dates = precompute_summary_info( %dates );
    317 
    318     return %dates;
    319 }
    320 
    321 
    322 #-----------------------------------------------------------------
    323 # Write out the results in a form suitable for automatic post-processing
    324 #-----------------------------------------------------------------
    325 sub dump_results {
    326     my (%dates) = @_;
    327 
    328     foreach my $date (sort keys %dates) {
    329 
    330         my %nightly = %{ $dates{$date} };
    331         my @builds  = @{ $nightly{"builds"} };
    332 
    333         foreach my $build (@builds) {
    334             my %regtest_data   = %{ $build };
    335 
    336             my $arch     = $regtest_data{"arch"};
    337             my $distro   = $regtest_data{"distro"};
    338             my @failures = @{ $regtest_data{"failures"} };
    339             my $num_fail = scalar (@failures);
    340             my $fails    = join(":", sort @failures);
    341 
    342             printf("Regrun: %s  %3d  %-10s %-20s %s\n",
    343                    $date, $num_fail, $arch, $distro, $fails);
    344         }
    345 
    346         my %failure_frequency = %{ $nightly{"failure_frequency"} };
    347 
    348         foreach my $test (keys %failure_frequency) {
    349             printf("Test:   %s  %3d  %s\n",
    350                    $date, $failure_frequency{$test}, $test);
    351         }
    352 
    353         printf("Total:  %s  builds: %d  %d fail  %d pass  tests: %d fail  %d unique\n",
    354                $date, $nightly{"num_builds"}, $nightly{"num_failing_builds"},
    355                $nightly{"num_passing_builds"}, $nightly{"num_testcase_failures"},
    356                $nightly{"num_failing_testcases"});
    357     }
    358 }
    359 
    360 
    361 sub write_readable_results {
    362     my (%dates) = @_;
    363 
    364     foreach my $date (sort keys %dates) {
    365         my %nightly = %{ $dates{$date} };
    366 
    367         print "$date\n----------\n";
    368 
    369         printf("%3d builds\n", $nightly{"num_builds"});
    370         printf("%3d builds fail\n", $nightly{"num_failing_builds"});
    371         printf("%3d builds pass\n", $nightly{"num_passing_builds"});
    372         print "\n";
    373         printf("%3d testcase failures (across all runs)\n",
    374                $nightly{"num_testcase_failures"});
    375         printf("%3d failing testcases (unique)\n",
    376                $nightly{"num_failing_testcases"});
    377         print "\n";
    378 
    379         my @builds  = @{ $nightly{"builds"} };
    380 
    381         if ($nightly{"num_passing_builds"} != 0) {
    382             print "Passing builds\n";
    383             print "--------------\n";
    384             foreach my $build (@builds) {
    385                 my %regtest_data = %{ $build };
    386                 my @failures     = @{ $regtest_data{"failures"} };
    387                 my $num_fail     = scalar (@failures);
    388 
    389                 if ($num_fail == 0) {
    390                     my $arch   = $regtest_data{"arch"};
    391                     my $distro = $regtest_data{"distro"};
    392 
    393                     printf("%-8s %-15s\n", $arch, $distro);
    394                 }
    395                 print "\n";
    396             }
    397             print "\n";
    398         }
    399 
    400         if ($nightly{"num_failing_builds"} != 0) {
    401             print "Failing builds\n";
    402             print "--------------\n";
    403             foreach my $build (@builds) {
    404                 my %regtest_data = %{ $build };
    405                 my @failures     = @{ $regtest_data{"failures"} };
    406                 my $num_fail     = scalar (@failures);
    407 
    408                 if ($num_fail != 0) {
    409                     my $arch     = $regtest_data{"arch"};
    410                     my $distro   = $regtest_data{"distro"};
    411 
    412                     printf("%-8s %-15s %d failures\n", $arch, $distro, $num_fail);
    413                     foreach my $test (@failures) {
    414                         print "         $test\n";
    415                     }
    416                     print "\n";
    417                 }
    418             }
    419             print "\n";
    420         }
    421 
    422         print "Failing testcases and their frequency\n";
    423         print "-------------------------------------\n";
    424         my %failure_frequency = %{ $nightly{"failure_frequency"} };
    425 
    426 # Sorted in decreasing frequency
    427         foreach my $test (sort {$failure_frequency{$b} cmp $failure_frequency{$a} }
    428                           keys %failure_frequency) {
    429             printf("%3d  %s\n", $failure_frequency{$test}, $test);
    430         }
    431         print "\n";
    432     }
    433 }
    434 
    435 
    436 sub main
    437 {
    438     my ($from, $to, $dump, $readable);
    439 
    440     $from = $to = 0;
    441     $dump = $readable = 0;
    442 
    443     GetOptions( "from=i"   => \$from,
    444                 "to=i"     => \$to,
    445                 "debug"    => \$debug,
    446                 "dump"     => \$dump,
    447                 "keep"     => \$keep,
    448                 "readable" => \$readable
    449         ) || die $usage;
    450 
    451 # 14800 is about Oct 4, 2011 which is when we began including information
    452 # about the environment
    453 
    454     die $usage if ($from < 14800);
    455 
    456     $to = $from + 100 if ($to == 0);
    457 
    458     if ($from > $to) {
    459         print STDERR "*** invalid [from,to] interval. Try again\n";
    460         die $usage;
    461     }
    462 
    463     $readable = 1 if ($dump == 0 && $readable == 0);
    464 
    465     print "check message interval [$from...$to]\n" if ($debug);
    466 
    467 # Get mails from GMANE mail archive
    468 
    469     my %dates = get_regtest_data($from, $to);
    470 
    471     dump_results(%dates) if ($dump);
    472 
    473     write_readable_results(%dates) if ($readable);
    474 }
    475 
    476 main();
    477 
    478 exit 0;
    479