1 #!/usr/bin/env perl 2 3 #----------------------------------------------------------------- 4 # Quick and dirty script to summarize build information for a 5 # set of nightly runs. 6 # 7 # The results of the nighly regression runs are extracted from 8 # the GMANE mail archive. The URL for a given mail sent to the 9 # valgrind-developers mailing list is 10 # 11 # http://article.gmane.org/gmane.comp.debugging.valgrind.devel/<integer> 12 # 13 # The script extracts information about the regression run from a 14 # block of information at the beginning of the mail. That information 15 # was added beginning October 4, 2011. Therefore, only regression runs 16 # from that date or later can be analyzed. 17 # 18 # There is unfortunately no good way of figuring out the interval 19 # of integers in the above URL that include all nightly regression 20 # runs. 21 # 22 # The function get_regtest_data does all the work. It returns a hash 23 # whose keys are the dates at which nightly runs took place. The value 24 # is in turn a hash. 25 # 26 # Each such hash has the following keys: 27 # "builds" array of hashes 28 # "num_builds" int 29 # "num_failing_builds" int 30 # "num_passing_builds" int 31 # "num_testcase_failures" int 32 # "num_failing_testcases" int 33 # "failure_frequency" hash indexed by testcase name; value = int 34 # 35 # "builds" is an array of hashes with the following keys 36 # "arch" string (architecture) 37 # "distro" string (distribution, e.g. Fedora-15) 38 # "failures" array of strings (failing testcases) 39 # "valgrind revision" integer 40 # "VEX revision" integer 41 # "GCC version" string 42 # "C library" string 43 # "uname -mrs" string 44 # "Vendor version" string 45 # 46 #----------------------------------------------------------------- 47 use strict; 48 use warnings; 49 50 use LWP::Simple; 51 use Getopt::Long; 52 53 my $prog_name = "nightly-build-summary"; 54 55 my $debug = 0; 56 my $keep = 0; 57 58 my $usage=<<EOF; 59 USAGE 60 61 $prog_name 62 63 --from=INTEGER beginning of mail interval; > 14800 64 65 [--to=INTEGER] end of mail interval; default = from + 100 66 67 [--debug] verbose mode (debugging) 68 69 [--keep] write individual emails to files (debugging) 70 71 [--dump] write results suitable for post-processing 72 73 [--readable] write results in human readable form (default) 74 75 EOF 76 77 78 #----------------------------------------------------------------- 79 # Search for a line indicating that this is an email containing 80 # the results of a valgrind regression run. 81 # Return 1, if found and 0 oherwise. 82 #----------------------------------------------------------------- 83 sub is_regtest_result { 84 my (@lines) = @_; 85 86 foreach my $line (@lines) { 87 return 1 if ($line =~ "^valgrind revision:"); 88 } 89 90 return 0; 91 } 92 93 94 #----------------------------------------------------------------- 95 # Extract information from the run. Don't prep the data here. This 96 # is done later on. 97 #----------------------------------------------------------------- 98 sub get_raw_data { 99 my (@lines, $msgno) = @_; 100 my ($i, $n, $line, $date); 101 102 $n = scalar @lines; 103 104 my %hash = (); 105 106 # 1) Locate the section with the info about the environment of this nightly run 107 for ($i = 0; $i < $n; ++$i) { 108 last if ($lines[$i] =~ /^valgrind revision:/); 109 } 110 die "no info block in message $msgno" if ($i == $n); 111 112 # 2) Read the info about the build: compiler, valgrind revision etc. 113 # and put it into a hash. 114 for ( ; $i < $n; ++$i) { 115 $line = $lines[$i]; 116 last if ($line =~ /^$/); # empty line indicates end of section 117 my ($key, $value) = split(/:/, $line); 118 $value =~ s/^[ ]*//; # removing leading blanks 119 $hash{$key} = $value; 120 } 121 122 if ($debug) { 123 foreach my $key (keys %hash) { 124 my ($val) = $hash{$key}; 125 print "regtest env: KEY = |$key| VAL = |$val|\n"; 126 } 127 } 128 129 # 3) Get the date from when the build was kicked off. 130 for ( ; $i < $n; ++$i) { 131 $line = $lines[$i]; 132 133 if ($line =~ /^Started at[ ]+([^ ]+)/) { 134 $date = $1; 135 print "DATE = $date\n"; 136 last; 137 } 138 } 139 die "no date found in message $msgno" if ($i == $n); 140 141 142 # 4) Find out if the regression run failed or passed 143 $hash{"failures"} = []; 144 for ($i = $i + 1; $i < $n; ++$i) { 145 $line = $lines[$i]; 146 if ($line =~ /Running regression tests/) { 147 return %hash if ($line =~ /done$/); # regtest succeeded; no failures 148 die "cannot determine regtest outcome for message $msgno" 149 if (! ($line =~ /failed$/)); 150 last; 151 } 152 } 153 154 # 5) Regtest failed; locate the section with the list of failing testcases 155 for ($i = $i + 1; $i < $n; ++$i) { 156 $line = $lines[$i]; 157 # Match for end-of-line == because line might be split. 158 last if ($line =~ /==$/); 159 } 160 die "cannot locate failing testcases in message $msgno" if ($i == $n); 161 162 # 6) Get list of failing testcases 163 for ($i = $i + 1; $i < $n; ++$i) { 164 $line = $lines[$i]; 165 166 last if ($line =~ /^$/); 167 168 my ($testcase) = (split(/\s+/, $line))[0]; 169 print "ADD failing testcase $testcase\n" if ($debug); 170 push @{$hash{"failures"}}, $testcase; 171 } 172 173 return ($date, %hash); 174 } 175 176 177 #----------------------------------------------------------------- 178 # Extract architecture; get a pretty name for the distro 179 #----------------------------------------------------------------- 180 sub prep_regtest_data { 181 my (%hash) = @_; 182 my ($val, $arch, $distro); 183 184 $val = $hash{"uname -mrs"}; 185 die "uname -mrs info is missing" if (! defined $val); 186 $arch = (split(/ /, $val))[2]; 187 188 $val = $hash{"Vendor version"}; 189 die "Vendor version info is missing" if (! defined $val); 190 191 if ($val =~ /Fedora release ([0-9]+)/) { 192 $distro = "Fedora-$1"; 193 } elsif ($val =~ /openSUSE ([0-9]+)\.([0-9]+)/) { 194 $distro = "openSUSE-$1.$2"; 195 } elsif ($val =~ /SUSE Linux Enterprise Server 11 SP1/) { 196 $distro = "SLES-11-SP1"; 197 } elsif ($val =~ /Red Hat Enterprise Linux AS release 4/) { 198 $distro = "RHEL-4"; 199 } else { 200 $distro = "UNKNOWN"; 201 } 202 203 # Add architecture and distribution to hash 204 $hash{"arch"} = $arch; 205 $hash{"distro"} = $distro; 206 207 return %hash; 208 } 209 210 211 #----------------------------------------------------------------- 212 # Precompute some summary information and record it 213 #----------------------------------------------------------------- 214 sub precompute_summary_info 215 { 216 my (%dates) = @_; 217 218 foreach my $date (sort keys %dates) { 219 my %failure_frequency = (); 220 221 my %nightly = %{ $dates{$date} }; 222 my @builds = @{ $nightly{"builds"} }; 223 224 $nightly{"num_builds"} = scalar (@builds); 225 $nightly{"num_failing_builds"} = 0; 226 $nightly{"num_testcase_failures"} = 0; 227 228 foreach my $build (@builds) { 229 my %regtest_data = %{ $build }; 230 231 my @failures = @{ $regtest_data{"failures"} }; 232 my $num_fail = scalar (@failures); 233 234 ++$nightly{"num_failing_builds"} if ($num_fail != 0); 235 $nightly{"num_testcase_failures"} += $num_fail; 236 237 # Compute how often a testcase failed 238 foreach my $test ( @failures ) { 239 if (defined $failure_frequency{$test}) { 240 ++$failure_frequency{$test}; 241 } else { 242 $failure_frequency{$test} = 1; 243 } 244 } 245 } 246 247 $nightly{"num_passing_builds"} = 248 $nightly{"num_builds"} - $nightly{"num_failing_builds"}; 249 250 $nightly{"num_failing_testcases"} = scalar (keys %failure_frequency); 251 252 $nightly{"failure_frequency"} = { %failure_frequency }; 253 254 $dates{$date} = { %nightly }; 255 } 256 257 return %dates; 258 } 259 260 261 #----------------------------------------------------------------- 262 # Get messages from GMANE, and build up a database of results. 263 #----------------------------------------------------------------- 264 sub get_regtest_data { 265 my ($from, $to) = @_; 266 267 my $url_base = "http://article.gmane.org/gmane.comp.debugging.valgrind.devel/"; 268 269 my %dates = (); 270 271 my $old_date = "-1"; 272 my @builds = (); 273 274 for (my $i = $from; $i <= $to; ++$i) { 275 my $url = "$url_base" . "$i"; 276 277 my $page = get("$url"); 278 279 if ($keep) { 280 open (EMAIL, ">$i"); 281 print EMAIL $page; 282 close(EMAIL); 283 } 284 285 # Detect if the article does not exist. Happens for too large --to= values 286 last if ($page eq "No such file.\n"); 287 288 # Split the page into lines 289 my @lines = split(/\n/, $page); 290 291 # Check whether it contains a regression test result 292 next if (! is_regtest_result(@lines)); 293 print "message $i is a regression test result\n" if ($debug); 294 295 # Get the raw data 296 my ($date, %regtest_data) = get_raw_data(@lines); 297 298 %regtest_data = prep_regtest_data(%regtest_data); 299 300 if ($date ne $old_date) { 301 my %nightly = (); 302 $nightly{"builds"} = [ @builds ]; 303 $dates{$old_date} = { %nightly } if ($old_date ne "-1"); 304 305 $old_date = $date; 306 @builds = (); 307 } 308 309 push @builds, { %regtest_data }; 310 } 311 my %nightly = (); 312 $nightly{"builds"} = [ @builds ]; 313 $dates{$old_date} = { %nightly } if ($old_date ne "-1"); 314 315 # Convenience: precompute some info we'll be interested in 316 %dates = precompute_summary_info( %dates ); 317 318 return %dates; 319 } 320 321 322 #----------------------------------------------------------------- 323 # Write out the results in a form suitable for automatic post-processing 324 #----------------------------------------------------------------- 325 sub dump_results { 326 my (%dates) = @_; 327 328 foreach my $date (sort keys %dates) { 329 330 my %nightly = %{ $dates{$date} }; 331 my @builds = @{ $nightly{"builds"} }; 332 333 foreach my $build (@builds) { 334 my %regtest_data = %{ $build }; 335 336 my $arch = $regtest_data{"arch"}; 337 my $distro = $regtest_data{"distro"}; 338 my @failures = @{ $regtest_data{"failures"} }; 339 my $num_fail = scalar (@failures); 340 my $fails = join(":", sort @failures); 341 342 printf("Regrun: %s %3d %-10s %-20s %s\n", 343 $date, $num_fail, $arch, $distro, $fails); 344 } 345 346 my %failure_frequency = %{ $nightly{"failure_frequency"} }; 347 348 foreach my $test (keys %failure_frequency) { 349 printf("Test: %s %3d %s\n", 350 $date, $failure_frequency{$test}, $test); 351 } 352 353 printf("Total: %s builds: %d %d fail %d pass tests: %d fail %d unique\n", 354 $date, $nightly{"num_builds"}, $nightly{"num_failing_builds"}, 355 $nightly{"num_passing_builds"}, $nightly{"num_testcase_failures"}, 356 $nightly{"num_failing_testcases"}); 357 } 358 } 359 360 361 sub write_readable_results { 362 my (%dates) = @_; 363 364 foreach my $date (sort keys %dates) { 365 my %nightly = %{ $dates{$date} }; 366 367 print "$date\n----------\n"; 368 369 printf("%3d builds\n", $nightly{"num_builds"}); 370 printf("%3d builds fail\n", $nightly{"num_failing_builds"}); 371 printf("%3d builds pass\n", $nightly{"num_passing_builds"}); 372 print "\n"; 373 printf("%3d testcase failures (across all runs)\n", 374 $nightly{"num_testcase_failures"}); 375 printf("%3d failing testcases (unique)\n", 376 $nightly{"num_failing_testcases"}); 377 print "\n"; 378 379 my @builds = @{ $nightly{"builds"} }; 380 381 if ($nightly{"num_passing_builds"} != 0) { 382 print "Passing builds\n"; 383 print "--------------\n"; 384 foreach my $build (@builds) { 385 my %regtest_data = %{ $build }; 386 my @failures = @{ $regtest_data{"failures"} }; 387 my $num_fail = scalar (@failures); 388 389 if ($num_fail == 0) { 390 my $arch = $regtest_data{"arch"}; 391 my $distro = $regtest_data{"distro"}; 392 393 printf("%-8s %-15s\n", $arch, $distro); 394 } 395 print "\n"; 396 } 397 print "\n"; 398 } 399 400 if ($nightly{"num_failing_builds"} != 0) { 401 print "Failing builds\n"; 402 print "--------------\n"; 403 foreach my $build (@builds) { 404 my %regtest_data = %{ $build }; 405 my @failures = @{ $regtest_data{"failures"} }; 406 my $num_fail = scalar (@failures); 407 408 if ($num_fail != 0) { 409 my $arch = $regtest_data{"arch"}; 410 my $distro = $regtest_data{"distro"}; 411 412 printf("%-8s %-15s %d failures\n", $arch, $distro, $num_fail); 413 foreach my $test (@failures) { 414 print " $test\n"; 415 } 416 print "\n"; 417 } 418 } 419 print "\n"; 420 } 421 422 print "Failing testcases and their frequency\n"; 423 print "-------------------------------------\n"; 424 my %failure_frequency = %{ $nightly{"failure_frequency"} }; 425 426 # Sorted in decreasing frequency 427 foreach my $test (sort {$failure_frequency{$b} cmp $failure_frequency{$a} } 428 keys %failure_frequency) { 429 printf("%3d %s\n", $failure_frequency{$test}, $test); 430 } 431 print "\n"; 432 } 433 } 434 435 436 sub main 437 { 438 my ($from, $to, $dump, $readable); 439 440 $from = $to = 0; 441 $dump = $readable = 0; 442 443 GetOptions( "from=i" => \$from, 444 "to=i" => \$to, 445 "debug" => \$debug, 446 "dump" => \$dump, 447 "keep" => \$keep, 448 "readable" => \$readable 449 ) || die $usage; 450 451 # 14800 is about Oct 4, 2011 which is when we began including information 452 # about the environment 453 454 die $usage if ($from < 14800); 455 456 $to = $from + 100 if ($to == 0); 457 458 if ($from > $to) { 459 print STDERR "*** invalid [from,to] interval. Try again\n"; 460 die $usage; 461 } 462 463 $readable = 1 if ($dump == 0 && $readable == 0); 464 465 print "check message interval [$from...$to]\n" if ($debug); 466 467 # Get mails from GMANE mail archive 468 469 my %dates = get_regtest_data($from, $to); 470 471 dump_results(%dates) if ($dump); 472 473 write_readable_results(%dates) if ($readable); 474 } 475 476 main(); 477 478 exit 0; 479