Home | History | Annotate | Download | only in web
      1 #!/usr/bin/perl -w
      2 #--*-Perl-*--
      3 
      4 # NOTES:
      5 #
      6 # 'tagscan' refers to the procedure of examining the CVS data (rlog output
      7 # for each file) and determining what bug IDs exist between two tags.
      8 #
      9 # 'dcuthelp' refers to the procedures of examining the CVS rlog cache
     10 # given a tag and a list of bugs, and helping to incorporate those bug
     11 # fixes into the tag.  For this to occur, in each file, any changes after
     12 # tag within the bug list must be contiguous and must begin in the tag's
     13 # revision.
     14 #
     15 # Params:
     16 #  debug - if set, output debugging info
     17 #  user - user name
     18 #  path_info - override actual path info, for debugging, e.g., "/form"
     19 #  mod - module(s) list
     20 #  include_attic - if set, include Attic during search (ignored by default)
     21 
     22 use strict;
     23 use CGI;
     24 #use CGI::Carp qw(fatalsToBrowser); # Do NOT use this -- doesn't work
     25 use File::Path;
     26 use IO::Handle;
     27 use Time::Local 'timelocal_nocheck';
     28 use Carp;
     29 #use Data::Dumper;
     30 
     31 use vars qw($QUERY $DEBUG $USER $TITLE $CLDR
     32 	    $DIFF_URL $DIFF_URL_SUFFIX $CVSWEB_REP_ID $CVSWEB_REP_SUFF $LOG_URL_SUFFIX $SHOW_URL $SHOW_URL_SUFFIX $LOG_URL
     33 	    $CVSROOT $BASE_REV %MOD_ABBREV $DEFAULT_MOD $NO_JITTERBUG
     34 	    $CACHE $INSTA $INSTA_ATTIC
     35 	    $UPDATE_COUNT $UPDATE_ATTIC_COUNT $UPDATE_NONATTIC_COUNT
     36 	    $TAGSCAN_TAG_LO $TAGSCAN_TAG_HI %TAGSCAN_IDS $TAGSCAN_COUNT
     37 	    $TAGSCAN_TAG_HI_DATE
     38 	    %TAGSCAN_ALLTAGS %TAGSCAN_WHY
     39 	    $DCUTHELP_TAG %DCUTHELP_IDS
     40 	    @DCUTHELP_BADFILES $DCUTHELP_COUNT @DCUTHELP_RETAGS
     41 	    @TAGLESS_FILES @BRANCHED_FILES @NO_JITTERBUG_FILES
     42 	    %MODE_MAP $NOW $YEAR $CVS_MSG_KW
     43 	    );
     44 
     45 &initGlobals;
     46 &main;
     47 exit(0);
     48 
     49 #---------------------------------------------------------------------
     50 sub initGlobals() {
     51     $QUERY = new CGI;
     52 
     53     $DEBUG = $QUERY->param('debug');
     54     $CLDR=1;
     55 
     56     # User name, if any.  We try to propagate the user name so a logged-in
     57     # jitterbug user can stay that way.
     58     $USER = $QUERY->param('user');
     59 
     60     $CVSWEB_REP_ID = "ICU";	
     61 
     62     if ($CLDR == 0) {
     63     	$TITLE="ICU Jitterbug Diffs";
     64     } else {
     65 	$TITLE="CLDR Jitterbug Diffs";
     66     }
     67     #$CVSWEB_REP_SUFF = "&cvsroot=" . $CVSWEB_REP_ID;
     68     $CVSWEB_REP_SUFF = "";
     69 
     70     # The following URLs should be suffixed with a module name
     71     # such as "icu/icu".
     72 
     73     # Display the diffs between two revisions of a file
     74     # E.g., suffix with "/icu/icu/license.html.diff?r1=1.2&r2=1.3"
     75     $DIFF_URL = "http://www.unicode.org/cgi-bin/viewcvs.cgi"; # No trailing "/"
     76     $DIFF_URL_SUFFIX = $CVSWEB_REP_SUFF;
     77 
     78     # Display a specific file revision
     79     # E.g., suffix with "/icu/icu/license.html?rev=1.1$SHOW_URL_SUFFIX"
     80     $SHOW_URL = $DIFF_URL; # No trailing "/"
     81     $SHOW_URL_SUFFIX = "&content-type=text/x-cvsweb-markup" . $CVSWEB_REP_SUFF;
     82 
     83     # Display the CVS log for a file
     84     # E.g., suffix with "/icu/icu/license.html"
     85     $LOG_URL = $DIFF_URL; # No trailing "/"
     86     $LOG_URL_SUFFIX = $CVSWEB_REP_SUFF;
     87 
     88     # CVS root
     89     if ( $CLDR == 0 ) {
     90 	$CVSROOT = "/data/mirrors/icu"; # Must NOT end with "/"
     91     } else {
     92 	$CVSROOT = "/home/cvsroot";
     93     }
     94 
     95     # A fake revision number indicating the slot before the oldest revision in
     96     # the rlog history.  Not user visible.
     97     $BASE_REV = "0";    
     98 
     99     if ($CLDR == 0) {
    100     # Recognized abbreviated module names.
    101     %MOD_ABBREV = (
    102         icu          => 'icu',
    103         icuapps      => 'icuapps',
    104         icu4j        => 'icu4j',
    105         icu4jni      => 'icu4jni',
    106         unicodetools => 'unicodetools',
    107         charset      => 'charset',
    108     );
    109 
    110     # Default modules to search
    111     $DEFAULT_MOD = 'icu icu4j';
    112     } else {
    113     # Recognized abbreviated module names.
    114     %MOD_ABBREV = (
    115         cldr      => 'cldr',
    116         common      => 'cldr/common',
    117     );
    118 
    119     # Default modules to search
    120     $DEFAULT_MOD = 'common';
    121     }
    122 	
    123 
    124     # Magic Jitterbug ID used when a CVS checkin does not include a
    125     # Jitterbug ID.  Should be unlikely (or impossible) to be a real
    126     # Jitterbug ID.
    127     $NO_JITTERBUG = 9999987;
    128 
    129     # Root of our cache of CVS meta-information.  Right now this cache
    130     # takes the form of a mirror of /usr/cvs.  We only mirror
    131     # /usr/cvs/icu/icu and /usr/cvs/icu4j/icu4j at this point.  All CVS
    132     # files (*,v) have an identically named file in the same location in
    133     # the cache.  Currently the cache file is the output of rlog.  In the
    134     # future a more compressed form could be used (although there isn't
    135     # much to be gained, maybe 10%).  Instead of grepping over the CVS
    136     # repository, we grep over the cache.  This cuts the grep time by
    137     # about 90%.  Before using the cache, we update it by walking through
    138     # the CVS repository and checking file mod dates.  Any file that's
    139     # been changed gets updated in the cache.
    140     # Use real path; link causes problems.
    141     #$CACHE = "/www/software10/cgi-bin/icu/grepj.cache";
    142     if($CLDR==0) {
    143     	$CACHE = "/tmp/icu-grepj.cache"; # No trailing "/"
    144     } else {
    145     	$CACHE = "/tmp/icu-grepj-cldr.cache"; # No trailing "/"
    146     }
    147 
    148     # Another cache that holds the results of the last searches.
    149     # Invalidate this cache whenever the main cache needs updating.
    150     # This cache consists of files named "1234".  Each file
    151     # contains the final HTML for that bug ID.  Searches that include
    152     # the attic are kept in a subdirectory 'Attic'.
    153     $INSTA = "$CACHE/insta";
    154     $INSTA_ATTIC = "$INSTA/Attic";
    155 
    156     # Count of updated cache files
    157     $UPDATE_COUNT = 0;
    158     $UPDATE_ATTIC_COUNT = 0;
    159     $UPDATE_NONATTIC_COUNT = 0;
    160 
    161     # Dispatch table mapping path_info to sub
    162     %MODE_MAP = (
    163 	'/top'         =>  \&emit_top,
    164 	'/form'        =>  \&emit_form,
    165 	'/difflist'    =>  \&emit_difflist,
    166 	'/nav'         =>  \&emit_nav,
    167 	'/result'      =>  \&emit_result,
    168 	'/help'        =>  \&emit_help,
    169 	'/admintop'    =>  \&emit_admintop,
    170 	'/adminform'   =>  \&emit_adminform,
    171 	'/adminresult' =>  \&emit_adminresult,
    172         '/localdiff'   =>  \&emit_localdiff,
    173     );
    174 
    175     $NOW = time();
    176     $YEAR = 1900+@{[localtime]}[5]; # Get the current year
    177 
    178     # Regex for grepping for jitterbug checkin comments
    179     # Will be surrounded by parens
    180     if($CLDR == 0) {
    181 	$CVS_MSG_KW = "jitterbug|fixed";
    182     } else {
    183     	$CVS_MSG_KW = "cldrbug";
    184     }
    185 }
    186 
    187 #---------------------------------------------------------------------
    188 # This script generates various frames within framesets.  The 'mode'
    189 # parameter determines which frame is generated.
    190 sub main() {
    191 
    192     STDOUT->autoflush(1); # Make progress output appear progressively...
    193 
    194     my $needed = 'h'; # next up: 'h'eader or 'e'nd_html
    195 
    196     eval {
    197 	local $SIG{'__DIE__'}; # disable installed DIE hooks
    198 	local $SIG{'__WARN__'} = sub {  die $_[0]; }; # transmute warnings
    199 
    200 	# The path info specifies what we are being called to emit.
    201 	# This script emits the frameset and the frames within it
    202 	# depending on this param.  For the URL
    203 	# "http://oss.software.ibm.com/cvs/icu-jinfo/foo", the path
    204 	# info is "/foo".  The path info can be overridden (for debugging)
    205 	# with a CGI param of "path_info=/bar".
    206 	my $path_info = $QUERY->path_info;
    207 	if ($QUERY->param('path_info')) {
    208 	    $path_info = $QUERY->param('path_info');
    209 	}
    210 	
    211 	# Simplify it:  "/foo/..." or "/foo&..." => "/foo"
    212 	$path_info =~ s|(\w)\W.*|$1|;
    213 	$path_info ||= '/top'; # default
    214 	
    215 	my $fn = $MODE_MAP{$path_info};
    216 	die "unknown path_info \"$path_info\"" unless ($fn);
    217 
    218 	if ($path_info ne '/localdiff') {
    219 	    print $QUERY->header;
    220 	    $needed = 'e';
    221 	}
    222 
    223 	$fn->();
    224     };
    225 
    226     if ($@) {
    227 	if ($needed eq 'h') {
    228 	    print $QUERY->header;
    229 	    $needed = 'e';
    230 	}
    231 	print "<hr><b>Internal error: ", $@,
    232               "<br>Please contact <a href=\"mailto:alanliu\@us.ibm.com\">Alan</a></b>";
    233     }
    234 
    235     if ($needed eq 'e') {
    236 	print $QUERY->end_html;
    237     }
    238 }
    239 
    240 #---------------------------------------------------------------------
    241 # Create URL for the reviewer index
    242 # @param user (or empty string if none)
    243 sub reviewersURL {
    244     my $user = shift || '';
    245     $user = "?user=$user" if ($user);
    246     return "http://bugs.icu-project.org/cgibin/private/byname/review$user";
    247 }
    248 
    249 #---------------------------------------------------------------------
    250 # Create URL for jitterbug
    251 # @param user (or empty string if none)
    252 # @param ID (or empty if none);
    253 sub jitterbugURL {
    254     my $user = shift || '';
    255     my $id = shift || '';
    256 
    257    if($CLDR == 0) {
    258     if ($id ne '') {
    259 	if ($user) {
    260 	    return "http://bugs.icu-project.org/cgibin/private/icu-bugs-private?;user=$user;findid=$id";
    261 	} else {
    262 	    return "http://bugs.icu-project.org/cgibin/icu-bugs?findid=$id";
    263 	}
    264     } else {
    265 	if ($user) {
    266 	    return "http://bugs.icu-project.org/cgibin/private/icu-bugs-private?;user=$user;";
    267 	} else {
    268 	    return "http://bugs.icu-project.org/cgibin/icu-bugs";
    269 	}
    270     }
    271   } else {
    272     if ($id ne '') {
    273 	if ($user) {
    274 	    return "http://bugs.icu-project.org/cgibin/cldr/locale-bugs-private?;user=$user;findid=$id";
    275 	} else {
    276 	    return "http://bugs.icu-project.org/cgibin/locale-bugs?findid=$id";
    277 	}
    278     } else {
    279 	if ($user) {
    280 	    return "http://bugs.icu-project.org/cgibin/cldr/locale-bugs-private?;user=$user;";
    281 	} else {
    282 	    return "http://bugs.icu-project.org/cgibin/locale-bugs";
    283 	}
    284     }
    285    }
    286 }
    287 
    288 ######################################################################
    289 # HTML GUI
    290 ######################################################################
    291 
    292 # Emit the HTML for the top frameset in normal (bug diffs) mode
    293 sub emit_top {
    294     # Propagate url parameters down to the frames within the frameset
    295 
    296     my $self = $QUERY->url(-full=>1, -query=>1);
    297     my $f  = urlPathInfo($self, '/form');
    298     my $dl = urlPathInfo($self, '/difflist');
    299     my $n  = urlPathInfo($self, '/nav');
    300     my $r  = urlPathInfo($self, '/result');
    301 
    302     print <<END;
    303 <html><head><title>$TITLE</title></head>
    304 <!--$self-->
    305 <frameset cols="300,*">
    306  <frameset rows="135,*">
    307   <frame src="$f" name="form" scrolling=no>
    308   <frame src="$dl" name="difflist">
    309  </frameset>
    310  <frame src="$r" name="result">
    311 </frameset>
    312 END
    313 
    314 # <frameset rows="30,*">
    315 #  <frame src="$n" name="nav" scrolling=no>
    316 #  <frame src="$r" name="result">
    317 # </frameset>
    318 }
    319 
    320 sub emit_form {
    321     print $QUERY->start_html(-title=>$TITLE,
    322                              -target=>'difflist');
    323 
    324     my $script_name = $QUERY->script_name;
    325 
    326     print $QUERY->startform(-action=>urlPathInfo($script_name, '/difflist'),
    327                             -target=>'difflist',
    328 			    -method=>'GET');
    329 
    330     my $user = $QUERY->param('user') || '';
    331 
    332     print "<H2>$TITLE"; # h1 too big
    333     print " <FONT SIZE=-1>($user)</FONT>" if ($user);
    334     print "</H2>";
    335 
    336     print "ID? ",$QUERY->textfield(-name=>'id',-size=>5)
    337 	, $QUERY->submit(-name=>'Search')
    338 	, " <FONT SIZE=-1><A href=\""
    339 	, urlPathInfo($script_name, '/help')
    340 	, "\">Help</A></FONT>";
    341 
    342     print "\&nbsp;<FONT SIZE=-1>"
    343 	, "<A href=\"", urlPathInfo($script_name, '/admintop')
    344 	, "?user=$user\" target=\"_top\">Admin</A></FONT>";
    345  
    346     print "<BR>\nModules:&nbsp;";
    347     print $QUERY->textfield(-name=>'mod',
    348 			    -default=>$DEFAULT_MOD,
    349 			    -size=>30);
    350 
    351     print "<BR>\n";
    352 
    353     print "<FONT SIZE=-1>";
    354     print $QUERY->checkbox(-name=>"include_attic",
    355                            -label=>"Incl. Attic");
    356     print $QUERY->checkbox(-name=>"localdiff",
    357 			   -label=>"Local Diff");
    358     print "</FONT>";
    359 
    360     print "\&nbsp;<A href=\"", reviewersURL($user), "\" target=\"_top\" title=\"List bugs by reviewer\">Reviewers</A>";
    361 
    362     print "\&nbsp;<A href=\"", jitterbugURL($user), "\" target=\"_top\" title=\"Go to main Jitterbug page\">Jitterbug</A>";
    363 
    364     # Propagate params that don't have corresponding form elements
    365     print $QUERY->hidden('user');
    366     print $QUERY->hidden('debug');
    367     if($CLDR==1) {
    368       print $QUERY->hidden('cldr');
    369     }
    370 
    371     print $QUERY->end_form;
    372 }
    373 
    374 sub emit_nav {
    375     print $QUERY->start_html(-title=>$TITLE,
    376                              -target=>'result');
    377     print "Under construction: Navigation bar goes here";
    378 }
    379 
    380 sub emit_difflist {
    381     print $QUERY->start_html(-title=>$TITLE,
    382                              -target=>'result');
    383 
    384     ############################################################
    385     # ID
    386 
    387     my $ID = $QUERY->param('id') || '';
    388     $ID =~ s/\s//g;
    389 
    390     #print "<br/><b>query:</b>";
    391     #print $QUERY->Dump;
    392     #print "<br/>";
    393 
    394     if ($ID eq '') {
    395 	print "(Warning: search, but No ID given.)<br/> \n";
    396         &emit_help;
    397         return;
    398     }
    399 
    400     if ($ID =~ /^0*(\d+)$/) {
    401 	$ID = $1;
    402     } else {
    403         print "\"$ID\" is not a valid Jitterbug ID.  Please ";
    404         print "enter one or more decimal digits.";
    405         return;
    406     }
    407 
    408     ############################################################
    409     # User
    410 
    411     my $user = $QUERY->param('user');
    412 
    413     ############################################################
    414     # Modules
    415 
    416     my @m;
    417     return if (!parseMod(\@m)); # what modules are we searching?
    418 
    419     my $localDiff = $QUERY->param('localdiff');
    420 
    421     # Only use the INSTA cache for standard module searches.
    422     my $isStd = (join(' ', sort @m) eq 'icu/icu icu4j/icu4j')
    423 	&& !$localDiff;
    424 
    425     ############################################################
    426     # Output
    427 
    428     print "What is Jitterbug ", jitterbugLink($user, $ID), "?";
    429 
    430     foreach (@m) {
    431 	updateCacheDir($_);
    432     }
    433 
    434     # If the cache has been updated then the instaCache entries
    435     # are all invalid and must be deleted.  Otherwise try to
    436     # look up the diffs from the instaCache.
    437     mkpath($INSTA_ATTIC, 0, 0777);
    438     if ($UPDATE_COUNT) {
    439 	print "done ($UPDATE_NONATTIC_COUNT,$UPDATE_ATTIC_COUNT).";
    440 	resetInstaCache(0);
    441     } elsif ($isStd) {
    442 	my $diffs = instaGet($ID);
    443 	if ($diffs) {
    444 	    print $diffs;
    445 	    print "<BR><EM><FONT SIZE=-1>(Results from cache)</FONT></EM>";
    446 	    return;
    447 	}
    448     }
    449 
    450     # If we don't find the ID in the instaCache, then generate
    451     # the diffs the hard way and store the result in the
    452     # instaCache.
    453     my $diffs;
    454     foreach my $module (@m) {
    455 	debugOut("module $module") if ($DEBUG);
    456 	my $m = $module;
    457 	$m =~ s|^.+/||;
    458         $diffs .= out("<HR><CENTER><B><FONT SIZE=+1>", uc($m),
    459 		      "</FONT></B></CENTER><HR>");
    460         debugOut("+generateDiffsList($ID, $module)") if ($DEBUG);
    461         $diffs .= generateDiffsList($ID, $module);
    462         debugOut("-generateDiffsList($ID, $module)") if ($DEBUG);
    463     }
    464     instaPut($ID, $diffs) if ($isStd);
    465 }
    466 
    467 sub emit_localdiff {
    468     print $QUERY->header(-type=>'application/octet-stream',
    469 			 -attachment=>'localdiff.bat');
    470     my $file = $QUERY->param('file');
    471     my $r1 = $QUERY->param('r1');
    472     my $r2 = $QUERY->param('r2');
    473     my $mod = $QUERY->param('m');
    474     my $leaf = $file;
    475     $leaf =~ s|.*[/\\]([^/\\]+)+$|$1|;
    476     $file = "$mod/$file";
    477     my $eol = "\015\012"; # DOS eol
    478     print "cd %TEMP%$eol";
    479     print "mkdir grepj$eol";
    480     print "cd grepj$eol";
    481     print "set CVSROOT=:pserver:$USER\@oss.software.ibm.com:/usr/cvs/$mod$eol";
    482     print "cvs checkout -p -r $r1 $file > $leaf-$r1$eol";
    483     print "cvs checkout -p -r $r2 $file > $leaf-$r2$eol";
    484     print "start wincmp $leaf-$r1 $leaf-$r2$eol";
    485     print "del \%0$eol";
    486 }
    487 
    488 sub emit_result {
    489     print $QUERY->start_html(-title=>$TITLE);
    490 }
    491 
    492 sub emit_help {
    493     my $x = join(" ", sort keys(%MOD_ABBREV));
    494     print <<END;
    495 Search the ICU and ICU4J CVS repositories for changes committed against
    496 a specific Jitterbug.
    497 
    498 <P>For a change to be recognized,
    499 its commit comment must start with "<CODE>Jitterbug <B>n</B></CODE>",
    500 where <CODE><B>n</B></CODE> is the bug ID.
    501 
    502 <P>The search generates a list of all files changes for this bug,
    503 together with the specific revisions in each
    504 file that are relevant (there may be more than one).
    505 
    506 <P>In the diff list,
    507 select a <B>file name link</B> to see the CVS log
    508 for that file.
    509 
    510 <P>Select a <B>revision link</B> to see changes
    511 checked in against that revision.  "Diff" revision links
    512 show diffs against the previous revision.  "View" links
    513 show initial check in revisions.
    514 
    515 <P>If a file contains more than one revision relevant to this
    516 Jitterbug ID, then an <B>overall revision link</B> will be available.
    517 Use this to see the effect of all changes at once.  <I>If the revisions
    518 are not contiguous, then this diff will contain changes
    519 not related to this Jitterbug.</I>  In that case you may
    520 prefer to view the individual diffs instead.
    521 
    522 <P><B>Incl. Attic</B> causes files under any directory named
    523 "Attic" to be included.
    524 
    525 <P><B>Local Diff</B> enables special links that look like this [*]
    526 which cause your browser to download a Windows batch file.  The
    527 batch file, when executed, will bring up the relevant diffs in
    528 Compare It!.  For this to work, you need the following:
    529 
    530 <UL><LI><B>cvs</B> must be on your PATH.  For example, you may
    531 add <CODE>C:\\Program Files\\GNU\\WinCVS 1.2</CODE> to your PATH.
    532 <LI><B>wincmp</B> must be on your PATH.  This is the Compare It!
    533 executable.  For example, you may add <CODE>C:\\Program Files\\Compare
    534 It!</CODE> to your PATH.
    535 <LI>You must be "logged in" for the cvs checkouts to work.  If your
    536 name is present in parentheses next to "ICU Jitterbug Diffs" in the
    537 upper left frame, you are logged in.
    538 </UL>
    539 
    540 <P><B>Modules</B> lists the modules to be searched.  By default
    541 this is "icu icu4j" but any modules (under /usr/cvs) may be listed.
    542 Full module names (e.g., "icu/icuapps") may be used.  The following
    543 abbreviations are recognized:  <CODE>$x</CODE>.
    544 END
    545 }
    546 
    547 ######################################################################
    548 # Admin GUI
    549 ######################################################################
    550 
    551 # Emit the HTML for the top frameset in admin mode
    552 sub emit_admintop {
    553     # Propagate url parameters down to the frames within the frameset
    554 
    555     my $self = $QUERY->url(-full=>1, -query=>1);
    556     my $f = urlPathInfo($self, '/adminform');
    557     my $r = urlPathInfo($self, '/adminresult');
    558     my $TITLETXT = $TITLE;
    559 
    560     #if ($id ne '') {
    561 #`h	TITLETXT = "$id - $TITLETXT";
    562   #  }
    563 
    564     print <<END;
    565 <html><head><title>$TITLE</title></head>
    566 <frameset cols="300,*">
    567   <frame src="$f" name="adminform" scrolling=yes>
    568   <frame src="$r" name="adminresult">
    569 </frameset>
    570 END
    571 }
    572 
    573 # Print the admin input form.
    574 sub emit_adminform {
    575 
    576     print $QUERY->start_html(-title=>$TITLE,
    577                              -target=>'adminresult');
    578 
    579     my $script_name = $QUERY->script_name;
    580 
    581     print $QUERY->startform(-action=>urlPathInfo($script_name, '/adminresult'),
    582                             -TARGET=>'adminresult');
    583 
    584     print "<FONT SIZE=+2><B>Administrative Tools</B></FONT>";
    585 
    586     my $user = $QUERY->param('user');
    587     my $u = $user ? "?user=$user" : '';
    588     print "\&nbsp;<FONT SIZE=-1>"
    589 	, "<A href=\"$script_name$u\" target=\"_top\">Back</A></FONT><BR>";
    590 
    591     print '<FONT SIZE=-1>Tags may be specified in full, e.g. '
    592 	, '"release-2-4", or as release numbers, such as "2.4".  ',
    593 	'Specify module(s) here for commands below.',
    594 	'</FONT><BR>';
    595 
    596     print "Modules:&nbsp;";
    597     print $QUERY->textfield(-name=>'mod',
    598 			    -default=>$DEFAULT_MOD,
    599 			    -size=>30);
    600     print "<HR>";
    601 
    602     print "<B>List Bugs Between CVS Tags</B><BR>";
    603     print "<TABLE><TR><TD nowrap>Start Tag:</TD><TD>";
    604     print $QUERY->textfield(-name=>'tag_lo',-size=>30);
    605     print "</TD></TR><TR><TD nowrap>End Tag:</TD><TD>";
    606     print $QUERY->textfield(-name=>'tag_hi',-size=>30);
    607     print "</TD></TR><TR><TD></TD><TD>";
    608     print $QUERY->submit(-name=>'Find Bugs');
    609     print "</TD></TR></TABLE>";
    610     print '<FONT SIZE=-1>Bugs are listed that occur after the start tag, up to and including the end tag.  Specify module(s) above.</FONT>';
    611 
    612     print "<HR>\n";
    613 
    614     print "<B>DCUT Helper</B><BR>";
    615     print "<TABLE><TR><TD>Tag:</TD><TD>";
    616     print $QUERY->textfield(-name=>'dcut_tag',-size=>33);
    617     print "</TD></TR><TR VALIGN=TOP><TD>Bug IDs:</TD><TD>";
    618     print $QUERY->textarea(-name=>'dcut_ids',-rows=>8,-columns=>26);
    619     print "</TD></TR><TR><TD></TD><TD>";
    620     print $QUERY->submit(-name=>'Check');
    621     print "</TD></TR></TABLE>";
    622     print '<FONT SIZE=-1>Enter a CVS tag and list of bugs to incorporate '
    623 	, 'those bugs into the tag.  '
    624 	, 'Specify module(s) above.</FONT>';
    625 
    626     print "<HR>\n";
    627 
    628     print $QUERY->submit(-name=>'Reset Insta Cache'), "<BR>";
    629     print '<FONT SIZE=-1>The insta cache contains the HTML output for previous'
    630         , ' bug diff search results.  In some cases (typically during script'
    631         , ' development), it can get out of sync.</FONT>';
    632 
    633     print "<HR>\n";
    634 
    635     print $QUERY->submit(-name=>'Delete Cache File:'), "&nbsp;";
    636     print $QUERY->textfield(-name=>'del_cache',-size=>17), "<BR>";
    637     print '<FONT SIZE=-1 >Delete a file from the cache.  Path is relative'
    638 	, ' to cache root and must begin with the module path'
    639 	, ' (e.g. "icu/icu").</FONT>';
    640 
    641     # Propagate params that don't have corresponding form elements
    642     print $QUERY->hidden('user');
    643     print $QUERY->hidden('debug');
    644 
    645     print $QUERY->end_form;
    646 }
    647 
    648 # Implement the admin functions.
    649 sub emit_adminresult {
    650     print $QUERY->start_html(-title=>$TITLE);
    651 
    652     if ($QUERY->param('Find Bugs')) {
    653 	&do_tagscan;
    654 	return;
    655     }
    656 
    657     if ($QUERY->param('Check')) {
    658 	&do_dcuthelp;
    659 	return;
    660     }
    661 
    662     if ($QUERY->param('Reset Insta Cache')) {
    663 	resetInstaCache(1);
    664 	print "Cache at $INSTA has been erased.";
    665 	return;
    666     }
    667 
    668     if ($QUERY->param('Delete Cache File:')) {
    669 	my $f = $QUERY->param('del_cache');
    670 	# Careful here -- don't let the user delete anything but a
    671 	# legitimate cache file.  Watch out for "..", "~", "$", etc.
    672 	if ($f !~ m|^[a-z0-9_]+(/[a-z0-9_]+)+\.[a-z0-9]+$|i) {
    673 	    print "\"$f\" does not look like a valid path.";
    674 	    return;
    675 	}
    676 	$f = $CACHE . '/' . $f . ',v';
    677  	if (! -e $f) {
    678 	    print "\"$f\" does not exist.";
    679 	    return;
    680 	}
    681  	if (! -f $f) {
    682 	    print "\"$f\" is not a file.";
    683 	    return;
    684 	}
    685 	unlink($f);
    686 	# This check doesn't seem to work.
    687  	#if (! -e $f) {
    688 	#    print "Error: Could not delete \"$f\".";
    689 	#    return;
    690 	#} else {	
    691 	    print "Cache file \"$f\" deleted.";
    692 	#}
    693 	return;
    694     }
    695 }
    696 
    697 ######################################################################
    698 # Jitterbug diffs
    699 ######################################################################
    700 
    701 #---------------------------------------------------------------------
    702 # Find the diffs for a jitterbug and display them.
    703 # Also display other useful links for this bug.
    704 # Param: ID number
    705 # Param: module name ("icu/icu" or "icu4j/icu4j" or other)
    706 # Return: The generated HTML.  Also print it to STDOUT
    707 # on the fly.
    708 sub generateDiffsList {
    709     my $ID = shift;
    710     my $module = shift;
    711     my $result;
    712 
    713     my $greproot = "$CACHE/$module";
    714     my $log_url  = "$LOG_URL/$module/";
    715     my $show_url = "$SHOW_URL/$module/";
    716     my $diff_url = "$DIFF_URL/$module/";
    717 
    718     # ID matching pattern
    719     my $pat = "0*$ID";
    720 
    721     # During merging, the bug IDs 1-98 for icu4j were migrated to
    722     # 1301-1398.  Therefore, when the user requests a bug in the range
    723     # 1301-1398, we search under both n and n-1300 in icu4j
    724     # repository.
    725     if ($module =~ /^icu4j/ && $ID >= 1301 && $ID <= 1398) {
    726         my $ID2 = $ID - 1300;
    727         $pat = "($pat|0*$ID2)";
    728     }
    729 
    730     # -E use extended regexp
    731     # -i ignore case
    732     # -I ignore binary files
    733     # -l stop at first match and list file name
    734     # -r recurse
    735     # N/A now that we cache the rlog output
    736     #my $flags = $ignoreBinaries ? "-EiIlr" : "-Eilr";
    737 
    738     # (1 of 3 REGEXPS) SEE ALSO other regexps; keep them in sync
    739     # TODO improve error handling in following line
    740     my @files = `grep -Eilr "($CVS_MSG_KW)[ \\t]*$pat\\b" $greproot`;
    741 
    742     if (!$QUERY->param('include_attic')) {
    743         @files = grep(!m|/attic/|i, @files);
    744     }
    745     
    746     if (@files < 1) {
    747         $result .= out("No changes found for Jitterbug $ID.\n");
    748         return $result;
    749     }
    750     
    751     $result .= out("<FONT SIZE=-1>");
    752 
    753     my $first = 1;
    754 
    755     foreach my $f (sort cmpfiles @files) {
    756         my @r = findRevisions($f, $pat);
    757 
    758         if ($first) {
    759             $first = 0;
    760         } else {
    761             $result .= out("<HR>\n");
    762         }
    763 
    764 	my $localDiff = $QUERY->param('localdiff');
    765 
    766         my $relFile = $f;
    767         $relFile =~ s/^$greproot\///;
    768         $relFile =~ s/,v//;
    769         my $a = '';
    770         my $b = $relFile;
    771         if ($b =~ m|(.*/)(.+)|) {
    772             ($a ,$b) = ($1, $2);
    773         }
    774         $result .= out("$a<A href=\"$log_url$relFile?$LOG_URL_SUFFIX\" title=\"View CVS log for $b\"><B>$b</B></A><BR>");
    775         if (@r > 1) {
    776             # Show diff of earliest to latest.
    777             my $discontiguous = 0;
    778             for (my $i=0; $i<$#r; $i++) { # [sic] from first to last-1
    779                 if ($r[$i]->{old} ne $r[$i+1]->{new}) {
    780                     $discontiguous = 1;
    781                     last;
    782                 }
    783             }
    784             my $new = $r[0]->{new};
    785             my $old = $r[$#r]->{old};
    786             $result .= out("<CENTER>");
    787 	    if ($discontiguous) {
    788 		$result .= out("<B>Contains other changes: </B>");
    789 	    }
    790             if ($old eq $BASE_REV) {
    791                 $result .= out("<A href=\"$show_url$relFile?rev=$new$SHOW_URL_SUFFIX\">");
    792                 $result .= out("<B>View $new</B></A>");
    793             } else {
    794                 $result .= out("<A href=\"$diff_url$relFile?r1=$old&r2=$new$DIFF_URL_SUFFIX\">");
    795                 $result .= out("<B>Diff $new vs $old</B></A>");
    796 		if ($localDiff) {
    797 		    my $self = $QUERY->url(-full=>1, -query=>1);
    798 		    my $url = urlPathInfo($self, '/localdiff');
    799 		    my $mod = $module;
    800 		    $mod =~ s|/.+||;
    801 		    out(" [<A href=\"$url;m=$mod;file=$relFile;r1=$old;r2=$new$DIFF_URL_SUFFIX\">*</A>]");
    802 		}
    803             }
    804 
    805             # Construct contiguous ranges if the overall diff is
    806             # discontiguous.
    807             if ($discontiguous) {
    808                 my @ranges;
    809                 my $start = 0;
    810                 for (my $i=0; $i<$#r; $i++) { # [sic] from first to last-1
    811                     if ($r[$i]->{old} ne $r[$i+1]->{new}) {
    812                         push @ranges, [$start, $i];
    813                         $start = $i+1;
    814                     }
    815                 }
    816                 push @ranges, [$start, $#r];
    817                 my $first = 1;
    818                 foreach my $range (@ranges) {
    819                     my $new = $r[$range->[0]]->{new};
    820                     my $old = $r[$range->[1]]->{old};
    821                     if ($first) {
    822                         $result .= out("<BR>\n(");
    823                         $first = 0;
    824                     } else {
    825                         $result .= out("<BR>\n");
    826                     }
    827                     if ($old eq $BASE_REV) {
    828                         $result .= out("<A href=\"$show_url$relFile?rev=$new$SHOW_URL_SUFFIX\">");
    829                         $result .= out("View $new</A>");
    830                     } else {
    831                         $result .= out("<A href=\"$diff_url$relFile?r1=$old&r2=$new$DIFF_URL_SUFFIX\">");
    832                         $result .= out("Diff $new vs $old</A>");
    833 			if ($localDiff) {
    834 			    my $self = $QUERY->url(-full=>1, -query=>1);
    835 			    my $url = urlPathInfo($self, '/localdiff');
    836 			    my $mod = $module;
    837 			    $mod =~ s|/.+||;
    838 			    out(" [<A href=\"$url;m=$mod;file=$relFile;r1=$old;r2=$new$DIFF_URL_SUFFIX\">*</A>]");
    839 			}
    840                     }
    841                 }
    842                 $result .= out(")");
    843             }
    844 
    845             $result .= out("</CENTER>");
    846         }
    847 
    848         for (my $i=0; $i<@r; $i++) {
    849             my $h = $r[$i];
    850             my $new = $h->{new};
    851             my $old = $h->{old};
    852             if ($old eq $BASE_REV) {
    853                 $result .= out("<A href=\"$show_url$relFile?rev=$new$SHOW_URL_SUFFIX\">");
    854                 $result .= out("<B>View $new</B></A>");
    855             } else {
    856                 $result .= out("<A href=\"$diff_url$relFile?r1=$old&r2=$new$DIFF_URL_SUFFIX\">");
    857                 $result .= out("<B>Diff $new</B></A>");
    858 		if ($localDiff) {
    859 		    my $self = $QUERY->url(-full=>1, -query=>1);
    860 		    my $url = urlPathInfo($self, '/localdiff');
    861 		    my $mod = $module;
    862 		    $mod =~ s|/.+||;
    863 		    out(" [<A href=\"$url;m=$mod;file=$relFile;r1=$old;r2=$new$DIFF_URL_SUFFIX\">*</A>]");
    864 		}
    865             }
    866             $result .= out(" <EM>", $h->{date}, "</EM> by <EM>", $h->{author}, "</EM><BR>");
    867             $result .= out($h->{comment});
    868             $result .= out("<BR>\n");
    869         }
    870     }
    871 
    872     $result .= out("</FONT>");
    873     $result;
    874 }
    875 
    876 # Sort criterion for file diffs
    877 sub cmpfiles {
    878     my $aa = $a;
    879     my $bb = $b;
    880     $aa =~ s|/unicode(/[^/]+)$|$1|;
    881     $bb =~ s|/unicode(/[^/]+)$|$1|;
    882     $aa =~ s|\.h,|.1h,|;
    883     $bb =~ s|\.h,|.1h,|;
    884     return $aa cmp $bb;
    885 }
    886 
    887 # Sort criterion for revision numbers, e.g. "1.9" vs "1.10"
    888 sub cmprevs {
    889     my @a = split('\.', $a);
    890     my @b = split('\.', $b);
    891     for (my $i=0; $i<=$#a && $i<=$#b; ++$i) {
    892         my $c = $b[$i] - $a[$i]; 
    893         return $c if ($c);
    894     }
    895     return $#b - $#a;
    896 }
    897 
    898 ######################################################################
    899 # tagscan
    900 ######################################################################
    901 
    902 # Perform a "tagscan" and emit the results.  A tagscan is a scan of
    903 # the CVS rlog cache in which bug IDs between two tags are compiled.
    904 # If a file is marked 'dead' it is ignored.  If it was created after
    905 # the latest date of the HI tag (as determined by checking _every_
    906 # file's date for that tag) then it is ignored.
    907 sub do_tagscan {
    908     $TAGSCAN_TAG_LO = expandTag($QUERY->param('tag_lo'));
    909     $TAGSCAN_TAG_HI = expandTag($QUERY->param('tag_hi'));
    910 
    911     $TAGSCAN_TAG_HI_DATE = '';
    912 
    913     if (!$TAGSCAN_TAG_LO || !$TAGSCAN_TAG_HI) {
    914 	print "Please enter two CVS tags and try again.";
    915 	return;
    916     }
    917 
    918     my $user = $QUERY->param('user');
    919 
    920     my @m;
    921     return if (!parseMod(\@m)); # what modules are we searching?
    922 
    923     # Slight limitation -- our tagLink will only refer to the first module
    924     print "Searching module(s) <B>", join(", ", @m)
    925 	, "</B> for bugs after tag <B>",
    926 	tagLink($TAGSCAN_TAG_LO,$m[0],'grepj_2'),
    927 	"</B> up to and including tag <B>",
    928 	tagLink($TAGSCAN_TAG_HI,$m[0],'grepj_2'),
    929 	"</B>.  <EM>Note: Dead files and Attic files will be ignored.</EM><BR>\n";
    930 
    931     foreach (@m) {
    932 	updateCacheDir($_);
    933     }
    934 
    935     if ($UPDATE_COUNT) {
    936 	print "done ($UPDATE_NONATTIC_COUNT,$UPDATE_ATTIC_COUNT).";
    937     }
    938 
    939     %TAGSCAN_IDS = ();
    940 #at	%TAGSCAN_ALLTAGS = ();
    941     %TAGSCAN_WHY = ();
    942     $TAGSCAN_COUNT = 0;
    943     print "<HR>Scanning CVS tree for bug IDs...";
    944     foreach (@m) {
    945 	tagscanDir($_);
    946     }
    947     print "done.<HR>";
    948 
    949     # Filter out tagless files that were created after the HI tag
    950     # date.
    951     my @a;
    952     foreach my $f (@TAGLESS_FILES) {
    953 	my $d = getRev11Date("$CACHE/$f");
    954 	if ($d && $d le $TAGSCAN_TAG_HI_DATE) {
    955 	    push @a, $f;
    956 	}
    957     }
    958     @TAGLESS_FILES = @a;
    959 
    960     if (@NO_JITTERBUG_FILES) {
    961 	print "The following revisions have no associated Jitterbug, or the bug number could not be parsed from the checkin comment.\n";
    962 	print "Checkins older than a year are not listed.\n";
    963 	print "<BLOCKQUOTE>";
    964 	print join("<BR>\n",
    965 		   map {logLink($_->[0],'grepj_2') .
    966 			", " . $_->[1] . "<BR><CODE>" .
    967 			$_->[2] . "</CODE>"}
    968 		   @NO_JITTERBUG_FILES);
    969 	print "</BLOCKQUOTE><HR>\n";
    970     }
    971 
    972     if (@TAGLESS_FILES) {
    973 	print "<EM>The following ", scalar @TAGLESS_FILES
    974 	    , " files were ignored because they are missing one or both tags."
    975 	    , " </EM>Files created after <B>$TAGSCAN_TAG_HI</B> should not be listed"
    976 	    , " here.\n<BLOCKQUOTE>";
    977 	print join("<BR>\n",
    978 		   map {logLink($_,'grepj_2')}
    979 		   @TAGLESS_FILES)
    980 	    , "</BLOCKQUOTE><HR>\n";
    981     }
    982 
    983     if (@BRANCHED_FILES) {
    984 	print "<EM>The following ", scalar @BRANCHED_FILES
    985 	    , " files were ignored because the tags occur on different"
    986 	    , " branches.\n</EM><BLOCKQUOTE>";
    987 	print join("<BR>\n",
    988 		   map {logLink($_->[0],'grepj_2') .
    989 			": " . $_->[1] . " => " . $_->[2]}
    990 		   @BRANCHED_FILES)
    991 	    , "</BLOCKQUOTE><HR>\n";
    992     }
    993 
    994 #at	print "Other tags seen: ",
    995 #at	      join(" ", 
    996 #at		   map {my $a=tagToRelease($_); $a?"$_($a)":"$_*"}
    997 #at                sort keys %TAGSCAN_ALLTAGS), "\n<HR>";
    998 
    999     print "Details: "
   1000 	, join("; ",
   1001 	       map {"(" . jitterbugLink($user, $_, 'grepj_2') .
   1002                     ": " . join(", ",
   1003                  map {s|^.+?/||; s|,v$||; $_} sort keys %{$TAGSCAN_WHY{$_}}) . ")"}
   1004 	       sort {$a<=>$b} keys %TAGSCAN_WHY)
   1005 	, "<HR>\n";
   1006 
   1007     print "Jitterbug IDs found (",scalar keys %TAGSCAN_IDS,"): "
   1008 	, join(", ",
   1009 	       map {jitterbugLink($user, $_, 'grepj_2')}
   1010 	       sort {$a<=>$b} keys %TAGSCAN_IDS);
   1011 
   1012     my $bugs = join(',', sort {$a<=>$b} keys %TAGSCAN_IDS);
   1013     print <<END;
   1014   <form method=post action=http://bugs.icu-project.org/cgibin/private/tasklist/buglist.html>
   1015     <input type=hidden name=tag1 value=$TAGSCAN_TAG_LO>
   1016     <input type=hidden name=tag2 value=$TAGSCAN_TAG_HI>
   1017     <input type=hidden name=bugs value="$bugs">
   1018    <input type=submit value="Bug List Report">
   1019   </form>
   1020 END
   1021     my $bugs2 = join(' ', sort {$a<=>$b} keys %TAGSCAN_IDS);
   1022     print <<END;
   1023   <form method=GET action=http://bugs.icu-project.org/cgibin/private/byname/review>
   1024     <input type=hidden name=user value=$user>
   1025     <input type=hidden name=bugs value="$bugs2">
   1026     <input type=hidden name=showclosed value=>
   1027    <input type=submit value="Reviewer Report">
   1028   </form>
   1029 END
   1030     print <<END;
   1031   <form method=GET action=http://bugs.icu-project.org/cgibin/private/byname/assign>
   1032     <input type=hidden name=user value=$user>
   1033     <input type=hidden name=bugs value="$bugs2">
   1034     <input type=hidden name=showclosed value=>
   1035    <input type=submit value="Assignee Report">
   1036   </form>
   1037 END
   1038 }
   1039 
   1040 # Given a relative path to $CVSROOT, tagscan the
   1041 # corresponding item under $CACHE.  Path may point to a
   1042 # file or a directory.
   1043 # @param relative directory, not ending in "/", e.g. "icu/icu"
   1044 # @param item name in that directory
   1045 sub tagscanEntry {
   1046     my $relDir = shift;
   1047     my $item = shift; # A file or dir in $CVSROOT/$relDir
   1048 
   1049     if (-d "$CACHE/$relDir/$item") {
   1050         tagscanDir("$relDir/$item");
   1051     } elsif ($item =~ /,v$/) {
   1052         tagscanFile("$relDir/$item");
   1053     }
   1054 }
   1055 
   1056 # Given a relative directory path to $CACHE, tagscan the
   1057 # underlying files.
   1058 # @param relative directory, not ending in "/", e.g. "icu/icu"
   1059 sub tagscanDir {
   1060     my $relDir = shift;
   1061 
   1062     # Ignore stuff in the Attic
   1063     return if ($relDir eq 'Attic');
   1064 
   1065     debugOut("+tagscanDir($relDir)") if ($DEBUG);
   1066 
   1067     my $cacheDir = "$CACHE/$relDir";
   1068 
   1069     # First tagscan files in this directory
   1070     opendir(DIR, $cacheDir);
   1071     my @cacheList = grep !/^\.\.?$/, readdir(DIR);
   1072     closedir(DIR);
   1073 
   1074     # Tagscan each individual entry
   1075     foreach (@cacheList) {
   1076         tagscanEntry($relDir, $_);
   1077     }
   1078 
   1079     debugOut("-tagscanDir($relDir)") if ($DEBUG);
   1080 }
   1081 
   1082 # Given a relative file path to $CVSROOT, tagscan the
   1083 # corresponding file under $CACHE, if necessary.
   1084 # @param relative file path
   1085 sub tagscanFile {
   1086     my $relFile = shift;
   1087 
   1088     # Display progress; it takes awhile
   1089     if (++$TAGSCAN_COUNT % 100 == 0) {
   1090 	print " $TAGSCAN_COUNT...";
   1091     }
   1092 
   1093     # This file contains the output of rlog.
   1094     my $file = "$CACHE/$relFile";
   1095 
   1096     # Parse the rlog file.  Start by extracting the tag names.  Look
   1097     # for the TAGSCAN_TAG_LO and TAGSCAN_TAG_HI's associated revision
   1098     # numbers.
   1099     open(IN, $file);
   1100     while (<IN>) {
   1101 	last if (/^symbolic names:\s*$/);
   1102     }
   1103     my $rev_lo;
   1104     my $rev_hi;
   1105     my $rel_min; # lowest release number seen
   1106     my @odd_tags;
   1107     if ($TAGSCAN_TAG_HI eq 'HEAD') {
   1108 	$rev_hi = 'HEAD';
   1109     }
   1110     while (<IN>) {
   1111 	last if (/^\S/);
   1112 	if (!$rev_lo && /^\s+$TAGSCAN_TAG_LO:\s*(\S+)/) {
   1113 	    $rev_lo = $1;
   1114 	}
   1115 	elsif (!$rev_hi && /^\s+$TAGSCAN_TAG_HI:\s*(\S+)/) {
   1116 	    $rev_hi = $1;
   1117 	}
   1118 	elsif (/^\s+(\S+?):/) {
   1119 	    my $tag = $1;
   1120 #at	    $TAGSCAN_ALLTAGS{$tag} = 1;
   1121 	    my $r = tagToRelease($tag);
   1122 	    if ($r) {
   1123 		if (!$rel_min) {
   1124 		    $rel_min = $r;
   1125 		} elsif ($r < $rel_min) {
   1126 		    $rel_min = $r;
   1127 		}
   1128 	    } else {
   1129 		push @odd_tags, $tag;
   1130 	    }
   1131 	}
   1132     }
   1133 
   1134     # Check for dead files.  Look ahead and find the state of the head
   1135     # revision.
   1136     my $pos = tell(IN);
   1137     my $state = '';
   1138     while (<IN>) {
   1139 	if (/^date:.+state: ([A-Za-z]+)/) {
   1140 	    $state = $1;
   1141 	    last;
   1142 	}
   1143     }    
   1144     seek(IN,$pos,0);
   1145 
   1146     # If this file is 'dead', we're done.
   1147     return if ($state eq 'dead');
   1148 
   1149     # Usually we find both tags.  However, in several special cases one
   1150     # or both tags will be missing.
   1151     if (!$rev_lo || !$rev_hi) {
   1152 	my $ok = 0;
   1153 
   1154 	# If we see the high tag, but not the low, then this may be a
   1155 	# new file (created after the low tag).  To check for this, examine
   1156 	# the other tags.  If this is a new file; we can just scan
   1157 	# from rev_hi all the end of the log (with rev_lo set to '1.1').
   1158 	if ($rev_hi) {
   1159 	    if (!$rel_min) {
   1160 		# The only tag seen was the HI tag.
   1161 		$ok = 1;
   1162 	    } else {
   1163 		my $lo = tagToRelease($TAGSCAN_TAG_LO);
   1164 		if ($lo && $rel_min > $lo && (scalar @odd_tags)==0) {
   1165 		    # Other tags were seen, but all were above the LO tag.
   1166 		    $ok = 1;
   1167 		}
   1168 	    }
   1169 	    $rev_lo = '1.1';
   1170 	}
   1171 
   1172 	if (!$ok) {
   1173 	    push @TAGLESS_FILES, $relFile;
   1174 	    return;
   1175 	}
   1176     }
   1177 
   1178     # If the low and high revisions are the same then there are no bugs
   1179     # to record from this file.
   1180     if ($rev_lo eq $rev_hi) {
   1181 	# Scan down to get the date of the rev_hi
   1182 	while (<IN>) {
   1183 	    if (/^revision $rev_hi\s*$/) {
   1184 		$_ = <IN>; # Read date line
   1185 		if (/^date: (.+?);/) {
   1186 		    $TAGSCAN_TAG_HI_DATE = $1
   1187 			if ($TAGSCAN_TAG_HI_DATE lt $1);
   1188 		} else {
   1189 		    cantParse('date', $relFile, $_, $rev_hi);
   1190 		}
   1191 	    }
   1192 	}
   1193 	return;
   1194     }
   1195 
   1196     my $inRange;
   1197 
   1198     my @result;
   1199 
   1200     # The rlog output (the CACHE file) contains a series
   1201     # of groups of lines, like so:
   1202     #|----------------------------
   1203     #|revision 1.40
   1204     #|date: 2001/08/02 18:24:58;  author: grhoten;  state: Exp;  lines: +82 -73
   1205     #|jitterbug 1080: general readme.html updates
   1206     # That is, the first line has the revision #.
   1207     # The third line has the bug ID.
   1208 
   1209     # Are revisions on the same branch?
   1210     my $branch_lo = revToBranch($rev_lo);
   1211     my $branch_hi = revToBranch($rev_hi);
   1212     if ($branch_lo eq $branch_hi) {
   1213 	
   1214 	while (<IN>) {
   1215 	    if (/^-{20,}$/) {
   1216 		$_ = <IN>; # Read revision line
   1217 		if (/revision (\S+)/) {
   1218 		    my $rev = $1;
   1219 		    last if ($rev eq $rev_lo);
   1220 		    if (!$inRange) {
   1221 			if ($rev eq $rev_hi || $rev_hi eq 'HEAD') {
   1222 			    $inRange = 1;
   1223 			}
   1224 		    }
   1225 		    if ($inRange) {
   1226 			my $date = <IN>; # Read date line
   1227 			$_ = <IN>; # Read comment or branches: line
   1228 			$_ = <IN> if (/^branches:/); # Read line after branches:
   1229 			my $id;
   1230 			if (/^\s*jitterbug\s+0*(\d+)/i) {
   1231 			    $id = $1;
   1232 			} else {
   1233 			    push @NO_JITTERBUG_FILES, [$relFile, $rev, $_]
   1234 				if (noJitterbugFilter($rev, $date));
   1235 			    $id = $NO_JITTERBUG;
   1236 			}
   1237 			push @result, [$rev, $id, $date];
   1238 		    }
   1239 		} else {
   1240 		    cantParse('revision', $relFile, $_);
   1241 		    last; # This is very bad - bail out
   1242 		}
   1243 	    }
   1244 	}
   1245     }
   1246 
   1247     elsif ($branch_hi =~ /^\Q$branch_lo\E\./) {
   1248 	# Special case:  E.g., going from 1.25 => 1.25.2.1 means
   1249 	# going from branch 1 to 1.25.2.  We can handle this.
   1250 	
   1251 	my @revs = traverseRevisions($rev_lo, $rev_hi);
   1252 
   1253 	#print "[$relFile: ", join(",",@revs), "]";
   1254 
   1255 	shift(@revs); # discard rev_lo
   1256 	my %revs;
   1257 	foreach (@revs) { $revs{$_} = 1; } # convert to hash
   1258 
   1259 	while (<IN>) {
   1260 	    if (/^-{20,}$/) {
   1261 		$_ = <IN>; # Read revision line
   1262 		if (/revision (\S+)/) {
   1263 		    my $rev = $1;
   1264 		    if (exists $revs{$rev}) {
   1265 			delete $revs{$rev};
   1266 			my $date = <IN>; # Read date line
   1267 			if ($rev eq $rev_hi) {
   1268 			    # Record latest date corresponding to HI tag
   1269 			    if ($date =~ /^date: (.+?);/) {
   1270 				$TAGSCAN_TAG_HI_DATE = $1
   1271 				    if ($TAGSCAN_TAG_HI_DATE lt $1);
   1272 			    } else {
   1273 				cantParse('date', $relFile, $date, $rev);
   1274 			    }
   1275 			}
   1276 			$_ = <IN>; # Read comment or branches: line
   1277 			$_ = <IN> if (/^branches:/); # Read line after branches:
   1278 			my $id;
   1279 			if (/^\s*jitterbug\s+0*(\d+)/i) {
   1280 			    $id = $1;
   1281 			    $TAGSCAN_WHY{$id}->{$relFile} = 1;
   1282 			} else {
   1283 			    push @NO_JITTERBUG_FILES, [$relFile, $rev, $_]
   1284 				if (noJitterbugFilter($rev, $date));
   1285 			    $id = $NO_JITTERBUG;
   1286 			}
   1287 			$TAGSCAN_IDS{$id} = 1;
   1288 			last unless (%revs);
   1289 		    }
   1290 		} else {
   1291 		    cantParse('revision', $relFile, $_);
   1292 		    last; # This is very bad - bail out
   1293 		}
   1294 	    }
   1295 	}
   1296     }
   1297 
   1298     else {
   1299 	# Tags on different branches
   1300 	push @BRANCHED_FILES, [$relFile, $rev_lo, $rev_hi];
   1301     }
   1302 
   1303     close(IN);
   1304     my $a = \@result;
   1305 
   1306     foreach my $revision (@$a) {
   1307 	# $revision->[ revision, jitterbug ID, date: line ]
   1308 	$TAGSCAN_IDS{$revision->[1]} = 1;
   1309 	$TAGSCAN_WHY{$revision->[1]}->{$relFile} = 1;
   1310     }
   1311 
   1312     if (@$a) {
   1313 	# Record latest date corresponding to HI tag
   1314 	if ($a->[0]->[2] =~ /^date: (.+?);/) {
   1315 	    $TAGSCAN_TAG_HI_DATE = $1
   1316 		if ($TAGSCAN_TAG_HI_DATE lt $1);
   1317 	} else {
   1318 	    cantParse('date', $relFile, $a->[0]->[2], $a->[0]->[0]);
   1319 	}
   1320     }
   1321 }
   1322 
   1323 ######################################################################
   1324 # dcuthelp
   1325 ######################################################################
   1326 
   1327 # Perform a "dcuthelp" and emit the results.
   1328 sub do_dcuthelp {
   1329     $DCUTHELP_TAG = expandTag($QUERY->param('dcut_tag'));
   1330     my $ids = $QUERY->param('dcut_ids');
   1331     my $user = $QUERY->param('user');
   1332 
   1333     # Process the ID list; create a hash of IDs in %DCUTHELP_IDS
   1334     $ids =~ s/,/ /g;
   1335     my @ids = grep { /\S/ } split(/\s+/, $ids);
   1336     my @bogus = grep { !/^\d+$/ } @ids;
   1337     if (@bogus) {
   1338 	print "These are not valid Jitterbug IDs: ", join(", ", @bogus);
   1339 	return;
   1340     }
   1341     foreach my $id (@ids) {
   1342 	local $_ = $id;
   1343 	s/^0+//;
   1344 	if (!$_) { print "0 is not a valid Jitterbug ID."; return; }
   1345 	if (exists $DCUTHELP_IDS{$_}) { print "$id is duplicated in the Jitterbug ID list."; return; }
   1346 	$DCUTHELP_IDS{$_} = 1;
   1347     }
   1348 
   1349     if ($DCUTHELP_TAG!~/\S/ || 0==scalar keys %DCUTHELP_IDS) {
   1350 	print "Please enter a CVS tag and list of Jitterbug IDs and try again.";
   1351 	return;
   1352     }
   1353 
   1354     my @m;
   1355     return if (!parseMod(\@m)); # what modules are we searching?
   1356 
   1357     # Announce our intentions
   1358     print "Performing a DCUT check in module(s) <B>", join(", ", @m)
   1359         , "</B> against tag <B>", tagLink($DCUTHELP_TAG,$m[0],'grepj_2'),
   1360 	"</B>";
   1361     print " with Jitterbug IDs <B>";
   1362     print join(", ",
   1363 	       map {jitterbugLink($user, $_, 'grepj_2')}
   1364 	       sort {$a<=>$b} keys %DCUTHELP_IDS)
   1365 	, "</B>";
   1366     print ".\n";
   1367 
   1368     foreach (@m) {
   1369 	updateCacheDir($_);
   1370     }
   1371 
   1372     if ($UPDATE_COUNT) {
   1373 	print "done ($UPDATE_NONATTIC_COUNT,$UPDATE_ATTIC_COUNT).";
   1374     }
   1375 
   1376     $DCUTHELP_COUNT = 0;
   1377     print "<HR>Scanning CVS tree...";
   1378     foreach (@m) {
   1379 	dcuthelpDir($_);
   1380     }
   1381     print "done.";
   1382 
   1383     if (@NO_JITTERBUG_FILES) {
   1384 	print "<HR>The following revisions have no associated Jitterbug, or the bug number could not be parsed from the checkin comment.\n";
   1385 	print "Checkins older than a year are not listed.\n";
   1386 	print "<BLOCKQUOTE>";
   1387 	print join("<BR>\n",
   1388 		   map {logLink($_->[0],'grepj_2') .
   1389 			", " . $_->[1] . "<BR><CODE>" .
   1390 			$_->[2] . "</CODE>"}
   1391 		   @NO_JITTERBUG_FILES);
   1392 	print "</BLOCKQUOTE>\n";
   1393     }
   1394 
   1395     my %tagless;
   1396     if (@TAGLESS_FILES) {
   1397 	print "<HR><EM>The following ", scalar @TAGLESS_FILES
   1398 	    , " files are missing the tag <B>"
   1399 	    , $DCUTHELP_TAG, "</B>.  They were treated as if the tag existed "
   1400 	    , "on the initial revision.</EM>\n<BLOCKQUOTE>";
   1401 	print join("<BR>\n",
   1402 		   map {logLink($_, 'grepj_2')}
   1403 		   @TAGLESS_FILES);
   1404 	print "</BLOCKQUOTE>\n";
   1405 	for my $f (@TAGLESS_FILES) { $tagless{$f} = 1; }
   1406     }
   1407 
   1408     if (@BRANCHED_FILES) {
   1409 	print "<HR><EM><B>Error: The following ", scalar @BRANCHED_FILES
   1410 	    , " files contain the listed bug changes on different "
   1411 	    , " branches.\n</B></EM><BLOCKQUOTE>";
   1412 	print join("<BR>\n",
   1413 		   map {logLink($_->[0],'grepj_2') .
   1414 			": " . $_->[1] . ", " . $_->[2]}
   1415 		   @BRANCHED_FILES)
   1416 	    , "</BLOCKQUOTE>\n";
   1417     }
   1418 
   1419     if (@DCUTHELP_BADFILES) {
   1420 	print "<HR><EM><B>Error: The following "
   1421 	    , scalar @DCUTHELP_BADFILES,
   1422 	    " files contain intermingled bug fixes not specified in the list.",
   1423 	    "</B></EM>\n<BLOCKQUOTE>";
   1424 	my %badids;
   1425 	foreach (@DCUTHELP_BADFILES) {
   1426 	    my $relFile = $_->[0];
   1427 	    my $ids = $_->[1];
   1428 	    print logLink($relFile, 'grepj_2'), ": "
   1429 		, join(", ",
   1430 		       map {jitterbugLink($user, $_, 'grepj_2')}
   1431 		       @$ids)
   1432 		, "<BR>\n";
   1433 	    foreach my $i (@$ids) { $badids{$i} = 1; }
   1434 	}
   1435 	print "</BLOCKQUOTE>\n";
   1436 	print "Jitterbug changes not in the list: "
   1437             , join(", ",
   1438                    map {jitterbugLink($user, $_, 'grepj_2')}
   1439                    sort {$a<=>$b} keys %badids)
   1440             , "\n";
   1441     }
   1442 
   1443     if (@DCUTHELP_RETAGS) {
   1444 	print "<HR>CVS commands to update the tags in files containing "
   1445 	    ,"only the listed bugs (copy & paste into a shell window).";
   1446 	if (@DCUTHELP_BADFILES || @BRANCHED_FILES) {
   1447 	    print "<B>WARNING!  Some files (see above) contain other bug changes!  Files below are all \"legal\" but you may wish to address above problems before retagging.</B>";
   1448 	}
   1449 	print "<BR><BR><CODE><FONT SIZE=-1>";
   1450 	print "cd $CVSROOT<BR>\n";
   1451 	# Two passes, one for normal files, another for tagless
   1452 	my $tagless_count = 0;
   1453 	for (my $pass=0; $pass<2; ++$pass) {
   1454 	    print "<FONT COLOR=\"#0000FF\"># The following files do not contain the tag $DCUTHELP_TAG<BR>\n" if ($pass);
   1455 	    foreach (@DCUTHELP_RETAGS) {
   1456 		my $relFile = $_->[0];
   1457 		if ($pass == 0) {
   1458 		    if ($tagless{$relFile}) {
   1459 			++$tagless_count;
   1460 			next;
   1461 		    }
   1462 		} else {
   1463 		    next unless ($tagless{$relFile});
   1464 		}
   1465 		my $rev_hi = $_->[1];
   1466 		$relFile =~ s/,v$//;
   1467 		my $onBranch = ($rev_hi =~ /\d+\.\d+\.\d+/);
   1468 		print "<FONT COLOR=\"#FF0000\">" if ($onBranch);
   1469 		print "cvs tag -F -r $rev_hi $DCUTHELP_TAG $relFile";
   1470 		print "</FONT>" if ($onBranch);
   1471 		print "<BR>\n";
   1472 	    }
   1473 	    last unless ($tagless_count);
   1474 	    print "</FONT>\n" if ($pass);
   1475 	}
   1476 	print "</FONT></CODE>";
   1477     } else {
   1478 	print "<HR>Nothing to do; no clean checkins for bugs "
   1479 	    , join(", ",
   1480 		   map {jitterbugLink($user, $_, 'grepj_2')}
   1481 		   sort {$a<=>$b} keys %DCUTHELP_IDS)
   1482 	    , " after "
   1483 	    , tagLink($DCUTHELP_TAG,$m[0],'grepj_2')
   1484 	    , " in module(s) <B>"
   1485 	    , join(", ", @m), "</B>.\n"
   1486 	    ;
   1487     }
   1488 }
   1489 
   1490 # Given a relative path to $CVSROOT, dcuthelp the
   1491 # corresponding item under $CACHE.  Path may point to a
   1492 # file or a directory.
   1493 # @param relative directory, not ending in "/", e.g. "icu/icu"
   1494 # @param item name in that directory
   1495 sub dcuthelpEntry {
   1496     my $relDir = shift;
   1497     my $item = shift; # A file or dir in $CVSROOT/$relDir
   1498 
   1499     # Ignore stuff in the Attic
   1500     return if ($item eq 'Attic');
   1501 
   1502     if (-d "$CACHE/$relDir/$item") {
   1503         dcuthelpDir("$relDir/$item");
   1504     } elsif ($item =~ /,v$/) {
   1505         dcuthelpFile("$relDir/$item");
   1506     }
   1507 }
   1508 
   1509 # Given a relative directory path to $CACHE, dcuthelp the
   1510 # underlying files.
   1511 # @param relative directory, not ending in "/", e.g. "icu/icu"
   1512 sub dcuthelpDir {
   1513     my $relDir = shift;
   1514 
   1515     debugOut("dcuthelpDir($relDir)") if ($DEBUG);
   1516 
   1517     my $cacheDir = "$CACHE/$relDir";
   1518 
   1519     # First dcuthelp files in this directory
   1520     opendir(DIR, $cacheDir);
   1521     my @cacheList = grep !/^\.\.?$/, readdir(DIR);
   1522     closedir(DIR);
   1523 
   1524     # Dcuthelp each individual entry
   1525     foreach (@cacheList) {
   1526         dcuthelpEntry($relDir, $_);
   1527     }
   1528 }
   1529 
   1530 # Given a relative file path to $CVSROOT, dcuthelp the
   1531 # corresponding file under $CACHE.
   1532 # @param relative file path
   1533 sub dcuthelpFile {
   1534     my $relFile = shift;
   1535 
   1536     # Display progress; it takes awhile
   1537     if (++$DCUTHELP_COUNT % 100 == 0) {
   1538 	print " $DCUTHELP_COUNT...";
   1539     }
   1540 
   1541     # This file contains the output of rlog.
   1542     my $file = "$CACHE/$relFile";
   1543 
   1544     # Parse the rlog file.  Start by extracting the tag names.  Look
   1545     # for the DCUTHELP_TAG and its associated revision
   1546     # number.
   1547     open(IN, $file);
   1548     while (<IN>) {
   1549 	last if (/^symbolic names:\s*$/);
   1550     }
   1551     my $rev_tag = '';
   1552     while (<IN>) {
   1553 	last if (/^\S/);
   1554 	if (/^\s+$DCUTHELP_TAG:\s*(\S+)/) {
   1555 	    $rev_tag = $1;
   1556 	    last;
   1557 	}
   1558     }
   1559 
   1560     # Check for dead files.  Look ahead and find the state of the head
   1561     # revision.
   1562     my $pos = tell(IN);
   1563     my $state = '';
   1564     while (<IN>) {
   1565 	if (/^date:.+state: ([A-Za-z]+)/) {
   1566 	    $state = $1;
   1567 	    last;
   1568 	}
   1569     }
   1570     seek(IN,$pos,0);
   1571 
   1572     # If this file is 'dead', we're done.
   1573     return if ($state eq 'dead');
   1574 
   1575     # If the tag is missing, record the fact.  Continue to process
   1576     # the file as if the tag existed on the earliest revision.
   1577     # This allows the tagging of newly added files.
   1578     if (!$rev_tag) {
   1579 	push @TAGLESS_FILES, $relFile;
   1580     }
   1581 
   1582     # I'm going to assume the rlog output (the CACHE file) contains a series
   1583     # of groups of lines, like so:
   1584     #|----------------------------
   1585     #|revision 1.40
   1586     #|date: 2001/08/02 18:24:58;  author: grhoten;  state: Exp;  lines: +82 -73
   1587     #|jitterbug 1080: general readme.html updates
   1588     # That is, the first line has the revision #.
   1589     # The third line has the bug ID.  Sometimes the third line has a
   1590     # branch field.
   1591 
   1592     # Find bug IDs later than the given tag, and record any that aren't
   1593     # on the allowed list.  Locate $rev_hi - the high
   1594     # revision of any bug found in the list.
   1595     my @problem_ids; # Bug IDs between $rev_tag and $rev_hi not in the list
   1596     my $rev_hi;
   1597     my $bottom_rev = ''; # Last revision in the file
   1598     while (<IN>) {
   1599         if (/^-{20,}$/) {
   1600 	    $_ = <IN>; # Read revision line
   1601 	    if (/revision (\S+)/) {
   1602 		my $rev = $1;
   1603 		$bottom_rev = $rev;
   1604 		if ($rev eq $rev_tag) {
   1605 		    # Scan remainder of file to record last rev
   1606 		    while (<IN>) {
   1607 			if (/^-{20,}$/) {
   1608 			    $_ = <IN>; # Read revision line
   1609 			    $bottom_rev = $1 if (/revision (\S+)/);
   1610 			}
   1611 		    }
   1612 		    last;
   1613 		}
   1614 		my $date = <IN>; # Read date line
   1615 		$_ = <IN>; # Read comment or branches: line
   1616 		$_ = <IN> if (/^branches:/); # Read line after branches:
   1617 		my $id;
   1618 		if (/^\s*jitterbug\s+0*(\d+)/i) {
   1619 		    $id = $1;
   1620 		} else {
   1621 		    push @NO_JITTERBUG_FILES, [$relFile, $rev, $_]
   1622 			if (noJitterbugFilter($rev, $date));
   1623 		    $id = $NO_JITTERBUG;
   1624 		}
   1625 		my $in_list = (exists $DCUTHELP_IDS{$id});
   1626 #		# Handle tagless files a little differently
   1627 #		if (!$rev_tag) {
   1628 #		    if (!$rev_hi) {
   1629 #			if ($in_list) {
   1630 #			    $rev_hi = $rev;
   1631 #			} else {
   1632 #			}
   1633 #		    }
   1634 #
   1635 #		}
   1636 		if (!$rev_hi) {
   1637 		    if ($in_list) {
   1638 			$rev_hi = $rev;
   1639 		    }
   1640 		} else {
   1641 		    if (!$in_list) {
   1642 			push @problem_ids, $id;
   1643 		    }
   1644 		}
   1645 	    } else {
   1646 		cantParse('revision', $relFile, $_);
   1647 	    }
   1648         }
   1649     }
   1650 
   1651     # If the bottom revision looks like a branch, then we need
   1652     # to do extra processing.  Branch revisions are listed at the
   1653     # end of the rlog output.
   1654     if ($bottom_rev =~ /\d+\.\d+\.\d+\.\d+/ &&
   1655 	$bottom_rev ne '1.1.1.1') {
   1656 
   1657 	# This file contains branches; do special handling
   1658 
   1659 	# Parse all the revisions and form a branch tree.
   1660 	# Construct a hash (%tree) of revision numbers to jitterbugs.
   1661 	# In addition, "$rev-" maps to a ref to an array of branches,
   1662 	# if any.
   1663 	my %tree;
   1664 	seek(IN,0,0); # rewind to start
   1665 	while (<IN>) {
   1666 	    if (/^-{20,}$/) {
   1667 		$_ = <IN>; # Read revision line
   1668 		if (/revision (\S+)/) {
   1669 		    my $rev = $1;
   1670 		    my $date = <IN>; # Read date line
   1671 		    $_ = <IN>; # Read comment or branches: line
   1672 		    if (/^branches:\s*(.*)/) {
   1673 			my @branches = split(/;\s*/, $1);
   1674 			$tree{$rev . '-'} = \@branches;
   1675 			$_ = <IN>; # Read comment line
   1676 		    }
   1677 		    my $id;
   1678 		    if (/^\s*jitterbug\s+0*(\d+)/i) {
   1679 			$id = $1;
   1680 		    } else {
   1681 			push @NO_JITTERBUG_FILES, [$relFile, $rev, $_]
   1682 			    if (noJitterbugFilter($rev, $date));
   1683 			$id = $NO_JITTERBUG;
   1684 		    }
   1685 		    $tree{$rev} = $id;
   1686 		} else {
   1687 		    cantParse('revision', $relFile, $_);
   1688 		}
   1689 	    }
   1690 	}
   1691 
   1692 #	print "[$relFile: ";
   1693 #	print join("; ",
   1694 #		   map {$_ . " => " .
   1695 #		       (ref($tree{$_})
   1696 #			?("(".join(",",@{$tree{$_}}).")")
   1697 #			:$tree{$_})}
   1698 #		   sort keys %tree);
   1699 
   1700 	$rev_hi = dcuthelpScan(\%tree, $rev_tag, 1);
   1701 
   1702 #	print ": scan=>$rev_hi]";
   1703 
   1704 	@problem_ids = ();
   1705 	if ($rev_hi =~ /;/) {
   1706 	    # Tags on different branches
   1707 	    my @a = split(/;/, $rev_hi);
   1708 	    unshift @a, $relFile;
   1709 	    push @BRANCHED_FILES, \@a;
   1710 	    return;
   1711 	} elsif ($rev_hi) {
   1712 	    my @revs = traverseRevisions($rev_tag, $rev_hi);
   1713 
   1714 	    shift(@revs); # discard rev_lo
   1715 	    my %revs;
   1716 	    foreach (@revs) { $revs{$_} = 1; } # convert to hash
   1717 
   1718 	    seek(IN,0,0); # rewind to start
   1719 	    while (<IN>) {
   1720 		if (/^-{20,}$/) {
   1721 		    $_ = <IN>; # Read revision line
   1722 		    if (/revision (\S+)/) {
   1723 			my $rev = $1;
   1724 			if (exists $revs{$rev}) {
   1725 			    delete $revs{$rev};
   1726 			    my $date = <IN>; # Read date line
   1727 			    $_ = <IN>; # Read comment or branches: line
   1728 			    $_ = <IN> if (/^branches:/); # Read line after branches:
   1729 			    my $id;
   1730 			    if (/^\s*jitterbug\s+0*(\d+)/i) {
   1731 				$id = $1;
   1732 			    } else {
   1733 				push @NO_JITTERBUG_FILES, [$relFile, $rev, $_]
   1734 				    if (noJitterbugFilter($rev, $date));
   1735 				$id = $NO_JITTERBUG;
   1736 			    }
   1737 			    if (!exists $DCUTHELP_IDS{$id}) {
   1738 				push @problem_ids, $id;
   1739 			    }
   1740 			    last unless (%revs);
   1741 			}
   1742 		    } else {
   1743 			cantParse('revision', $relFile, $_);
   1744 			last; # This is very bad - bail out
   1745 		    }
   1746 		}
   1747 	    }    
   1748 	}
   1749     }
   1750 
   1751     if (@problem_ids) {
   1752 	my @a = sortedUniqueInts(@problem_ids);
   1753 	push @DCUTHELP_BADFILES, [$relFile, \@a];
   1754     } elsif ($rev_hi) {
   1755 	# This file is okay; record the data needed for moving the tag
   1756 	push @DCUTHELP_RETAGS, [$relFile, $rev_hi];
   1757     }
   1758 
   1759     close(IN);
   1760 }
   1761 
   1762 # Given a revision tree (see dcuthelpFile), look for %DCUTHELP_IDS
   1763 # bugs along various branches, starting at a given revision.  Proceed
   1764 # along the branch of the given revision by incrementing it using
   1765 # incRev().  If any revision along the way is a branch point, follow
   1766 # that branch by recursing.  If found on two split branches,
   1767 # return 'rev;rev'.  If not found at all, return ''.  If found on
   1768 # exactly one branch, return the furthest revision at which it was
   1769 # found.
   1770 #
   1771 # @param tree, as created by dcuthelpFile
   1772 # @param first revision to examine
   1773 # @param if true, exclude given revision from bug search
   1774 #        but not from branch analysis.
   1775 #
   1776 # @return either a revision, or 'rev;rev' if the bugs occur
   1777 #         on two split branches, or '' if the bugs aren't seen.
   1778 sub dcuthelpScan {
   1779     my $tree = shift; # parsed revision tree; see dcuthelpFile
   1780     my $rev = shift; # rev to start at
   1781     my $exclusive = shift || ''; # is $rev exclusive?
   1782 
   1783 #   print "[scan $tree $rev $exclusive]";
   1784 
   1785     # If there are no branches between $rev and the end of its branch,
   1786     # then return the top revision at which one of %DCUTHELP_IDS is seen.
   1787     my $branchrev = ''; # First rev at which branch was seen, if any
   1788     my $lastbugrev = ''; # Last rev at which bug was seen
   1789     my $r;
   1790     for ($r=$rev ;exists $tree->{$r}; $r=incRev($r)) {
   1791 #	print "{$r}";
   1792 	if (exists $DCUTHELP_IDS{$tree->{$r}}) {
   1793 	    $lastbugrev = $r;
   1794 	}
   1795 	if (exists $tree->{"$r-"}) {
   1796 	    $branchrev = $r;
   1797 	    last;
   1798 	}
   1799     }
   1800 
   1801     # If $exclusive it true, can't return this rev.
   1802     if ($exclusive && ($lastbugrev eq $rev)) {
   1803 	$lastbugrev = '';
   1804     }
   1805 
   1806     # If there are no branches we are done.
   1807     if (!$branchrev) {
   1808 	return $lastbugrev;
   1809     }
   1810 
   1811     # Otherwise, examine the n branches and the continuation of
   1812     # this branch separately.  Convert branch revisions to the first
   1813     # rev on each branch, e.g., "1.14.2" => "1.14.2.1"
   1814     my @branches = map {"$_.1"} @{$tree->{"$branchrev-"}};
   1815     $r = incRev($branchrev);
   1816     push @branches, $r if (exists $tree->{$r});
   1817 
   1818     $r = '';
   1819     foreach (@branches) {
   1820 	my $a = dcuthelpScan($tree, $_);
   1821 	return $a if ($a =~ /;/);
   1822 	if ($a) {
   1823 	    if ($r) {
   1824 		# Our bugs were seen on more than one branch
   1825 		return "$r;$a";
   1826 	    }
   1827 	    $r = $a;
   1828 	}
   1829     }
   1830 
   1831     # If we haven't seen it on any branches, use result up to the
   1832     # branch point, found above.
   1833     $r ||= $lastbugrev;
   1834 
   1835     return $r;
   1836 }
   1837 
   1838 ######################################################################
   1839 # CVS rlog cache
   1840 ######################################################################
   1841 
   1842 #---------------------------------------------------------------------
   1843 # Given a relative path to $CVSROOT, update the
   1844 # corresponding item under $CACHE.  Path may point to a
   1845 # file or a directory.
   1846 # @param relative directory, not ending in "/", e.g. "icu/icu"
   1847 # @param item name in that directory
   1848 sub updateCacheEntry {
   1849     my $relDir = shift;
   1850     my $item = shift; # A file or dir in $CVSROOT/$relDir
   1851 
   1852     if (-d "$CVSROOT/$relDir/$item") {
   1853         updateCacheDir("$relDir/$item");
   1854     } elsif ($item =~ /,v$/) {
   1855         updateCacheFile("$relDir/$item");
   1856     }
   1857 }
   1858 
   1859 #---------------------------------------------------------------------
   1860 # Given a relative directory path to $CVSROOT, update the
   1861 # corresponding directory under $CACHE.
   1862 # @param relative directory, not ending in "/", e.g. "icu/icu"
   1863 sub updateCacheDir {
   1864     my $relDir = shift;
   1865 
   1866     debugOut("+updateCacheDir($relDir)") if ($DEBUG);
   1867 
   1868     my $cvsDir = "$CVSROOT/$relDir";
   1869     my $cacheDir = "$CACHE/$relDir";
   1870 
   1871     # First update files in this directory
   1872     opendir(DIR, $cvsDir);
   1873     my @cvsList = grep !/^\.\.?$/ && $_ ne 'CVS', readdir(DIR);
   1874     closedir(DIR);
   1875     my %cvsPruneHash;
   1876     foreach (@cvsList) { $cvsPruneHash{$_} = 1; }
   1877     if (!$QUERY->param('include_attic')) {
   1878         @cvsList = grep !/^attic$/i, @cvsList;
   1879     }
   1880     my %cvsHash;
   1881     foreach (@cvsList) { $cvsHash{$_} = 1; }
   1882 
   1883     # Update/create the cache directory.  If it doesn't exist,
   1884     # create it.  If it does, prune out any obsolete entries.
   1885     if (-d $cacheDir) {
   1886         if (!opendir(DIR, $cacheDir)) {
   1887             print "Can't open dir $cacheDir: $!";
   1888 	    debugOut("-!updateCacheDir($relDir)") if ($DEBUG);
   1889             return;
   1890         }
   1891         my @cacheList = grep !/^\.\.?$/, readdir(DIR);
   1892         closedir(DIR);
   1893 
   1894         # Delete things that don't exist in CVS
   1895         foreach (@cacheList) {
   1896             if (!exists $cvsPruneHash{$_}) {
   1897 		debugOut ( " Removing $cacheDir/$_ .." ) if ($DEBUG);
   1898                 rmtree("$cacheDir/$_", 0, 1);
   1899             }
   1900         }
   1901     } else {
   1902         mkpath($cacheDir, 0, 0777);
   1903     }
   1904 
   1905     # Update each individual entry
   1906     foreach (@cvsList) {
   1907         updateCacheEntry($relDir, $_);
   1908     }
   1909 
   1910     debugOut("-updateCacheDir($relDir)") if ($DEBUG);
   1911 }
   1912 
   1913 #---------------------------------------------------------------------
   1914 # Given a relative file path to $CVSROOT, update the
   1915 # corresponding file under $CACHE, if necessary.
   1916 # @param relative file path
   1917 sub updateCacheFile {
   1918     my $relFile = shift;
   1919 
   1920     if (! -e "$CACHE/$relFile" ||
   1921         (-M "$CACHE/$relFile" > -M "$CVSROOT/$relFile")) {
   1922         if (!$UPDATE_COUNT) {
   1923             print "<HR>Updating cache...";
   1924 	    if(! -e "$CACHE/$relFile") { 
   1925 		debugOut ( " because $CACHE/$relFile was not cached.." ) if ($DEBUG);
   1926 	    } else {
   1927 		debugOut ( " because $relFile was updated.." ) if ($DEBUG);
   1928 	    }
   1929         } elsif ($UPDATE_COUNT % 25 == 0) {
   1930 	    print " $UPDATE_COUNT...";
   1931 	}
   1932         ++$UPDATE_COUNT;
   1933 	if ($relFile =~ m|/attic/|i) {
   1934 	    ++$UPDATE_ATTIC_COUNT;
   1935 	} else {
   1936 	    ++$UPDATE_NONATTIC_COUNT;
   1937 	}
   1938 	my $f = "$CACHE/$relFile";
   1939 	command("rlog $CVSROOT/$relFile > $f", $f);
   1940 	my $size = -s $f;
   1941 	if ($size <= 0) {
   1942 	    print " <B>{Fatal Error: rlog of $relFile failed}</B> ";
   1943 	    unlink($f);
   1944 	}
   1945 	command("touch -r $CVSROOT/$relFile $f");
   1946     }
   1947 }
   1948 
   1949 ######################################################################
   1950 # instaCache
   1951 ######################################################################
   1952 
   1953 #---------------------------------------------------------------------
   1954 # Lookup an ID in the instaCache, and return the diffs stored
   1955 # there.  If there is no entry for the ID, then return the
   1956 # empty string.  The ID will be suffixed with 'a' if the
   1957 # Attic is included.
   1958 sub instaGet {
   1959     my $id = shift;
   1960     my $diffs;
   1961     my $dir = $QUERY->param('include_attic') ? $INSTA_ATTIC : $INSTA;
   1962     my $file = "$dir/$id";
   1963     if (-e $file) {
   1964 	if (open(IN, $file)) {
   1965 	    while (<IN>) { $diffs .= $_; }
   1966 	    close(IN);
   1967 	}
   1968     }
   1969     return $diffs;
   1970 }
   1971 
   1972 #---------------------------------------------------------------------
   1973 # Store diffs for the given ID in the instaCache.  The ID will be
   1974 # suffixed with 'a' if the Attic is included.
   1975 sub instaPut {
   1976     my $id = shift;
   1977     my $diffs = shift;
   1978     my $dir = $QUERY->param('include_attic') ? $INSTA_ATTIC : $INSTA;
   1979     my $file = "$dir/$id";
   1980     open(IN, ">$file") or return;
   1981     print IN $diffs;
   1982     close(IN);
   1983 }
   1984 
   1985 #---------------------------------------------------------------------
   1986 # Reset the instaCache by deleting all entries.  We need
   1987 # to do this whenever the main cache is invalidated.
   1988 # Param: if true, then force reset of all instaCaches.
   1989 # Otherwise do a smart reset based on the update counts.
   1990 sub resetInstaCache {
   1991     if (shift) {
   1992 	command("rm -rf $INSTA"); # Recursive
   1993 	return;
   1994     }
   1995 
   1996     # If there have been changes to non-Attic files, we
   1997     # have to reset everything.
   1998     if ($UPDATE_NONATTIC_COUNT) {
   1999 	# The following will fail with:
   2000 	# rm: cannot remove `/tmp/icu-grepj.cache/insta/Attic': Is a directory
   2001 	#command("rm -f $INSTA/*") if (-d $INSTA);
   2002 	command("find $INSTA -type f -maxdepth 1 -exec rm {} \\;")
   2003 	    if (-d $INSTA);
   2004     } else {
   2005 	# Otherwise just clear the attic instaCache
   2006 	command("rm -f $INSTA_ATTIC/*") if (-d $INSTA_ATTIC);
   2007     }
   2008 }
   2009 
   2010 ######################################################################
   2011 # CVS Utilities
   2012 ######################################################################
   2013 
   2014 #---------------------------------------------------------------------
   2015 # Get the date corresponding to the revision 1.1 in the
   2016 # given rlog output.  We use this as the "creation date" for the
   2017 # corresponding CVS file.
   2018 # @param absolute rlog output file path (in the cache)
   2019 # @return date string of the form "2002/08/23 23:21:38"
   2020 sub getRev11Date {
   2021     my $file = shift;
   2022 
   2023     # Parse the rlog file.  Return the date line for 1.1
   2024     open(IN, $file);
   2025     while (<IN>) {
   2026 	if (/^-{20,}$/) {
   2027 	    $_ = <IN>;
   2028 	    if (/revision 1.1$/) {
   2029 		$_ = <IN>;
   2030 		if (/^date: (.+?);/) {
   2031 		    return $1;
   2032 		}
   2033 	    }
   2034 	}
   2035     }
   2036     close(IN);
   2037 
   2038     ''; # Parse failure - should never happen
   2039 }
   2040 
   2041 #---------------------------------------------------------------------
   2042 # Given a ,v file, find the revisions containing the
   2043 # jitterbug ID change.  Return an array of hash refs.
   2044 # Newest revision is first, that is, it is $result[0].
   2045 # Each hash has:
   2046 #   new (revision#)
   2047 #   old (revision#)
   2048 #   date
   2049 #   author
   2050 #   comment
   2051 # If the very first revision is labeled with the jitterbug
   2052 # $ID, then {old} will be $BASE_REV.
   2053 #
   2054 sub findRevisions {
   2055     my $file = shift;
   2056     my $pat = shift;
   2057     my @result;
   2058 
   2059     # rlog output:
   2060     #|revision 1.3
   2061     #|date: 1999/10/14 22:14:04;  author: schererm;  state: Exp;  lines: +4 -2
   2062     #|jitterbug 14: echo off now and use the Release versions of the tools
   2063     #|----------------------------
   2064     #|revision 1.2
   2065     #|date: 1999/10/13 01:10:24;  author: schererm;  state: Exp;  lines: +9 -6
   2066     #|jitterbug 15: windows: genrb puts .res files into the current directory
   2067     #|more text
   2068     #|----------------------------
   2069     #|revision 1.1
   2070     #|date: 1999/10/12 21:50:30;  author: schererm;  state: Exp;
   2071     #|jitterbug 14: Windows: create a batch file to make the /icu/data files
   2072     #|=============================================================================
   2073 
   2074     # We read our rlog info from the cache now
   2075     my %log; # $log{<revision>} = <block of text>
   2076     my $l=''; my $r='';
   2077     open(IN, $file);
   2078     while (<IN>) {
   2079         if (/^-{20,}$/) {
   2080 	    $log{$r} = $l if ($r);
   2081             $l = $r = '';
   2082         } elsif ($r) {
   2083             $l .= $_;
   2084         } else {
   2085 	    if (/revision\s+(\S+)/) {
   2086 		$r = $1;
   2087 		die "Duplicate revision $r in $file" if (exists $log{$r});
   2088 	    }
   2089 	}
   2090     }
   2091     close(IN);
   2092     $log{$r} = $l if ($r);
   2093 
   2094     for $r (sort cmprevs keys %log) {
   2095         local $_ = $log{$r};
   2096 
   2097         # (2 of 3 REGEXPS) SEE ALSO other regexps; keep them in sync
   2098         if (/^\s*(?:$CVS_MSG_KW)\s*$pat\b/im) {
   2099             my %h;
   2100             $h{new} = $r;
   2101 	    my $rold = decRev($r);
   2102             if (exists $log{$rold}) {
   2103                 $h{old} = $rold;
   2104             } else {
   2105                 $h{old} = $BASE_REV;
   2106             }
   2107             if (/date:\s*(.+?);/) {
   2108                 $h{date} = $1;
   2109             }
   2110             if (/author:\s*(.+?);/) {
   2111                 $h{author} = $1;
   2112             }
   2113 
   2114             # (3 of 3 REGEXPS) SEE ALSO other regexps; keep them in sync
   2115             if (/^\s*(?:$CVS_MSG_KW)\s*$pat\b(.*)/ism) {
   2116                 local $_ = $1;
   2117                 s/^\s*:?\s*//;
   2118                 s/\s*----+\s*$//;
   2119                 s/\s*====+\s*$//;
   2120                 s/\s*\n+\s*/ /g;
   2121                 $h{comment} = $_;
   2122             }
   2123             push @result, \%h;
   2124         }
   2125     }
   2126 
   2127     @result;
   2128 }
   2129 
   2130 ######################################################################
   2131 # CVS tag parsing
   2132 ######################################################################
   2133 
   2134 #---------------------------------------------------------------------
   2135 # Given a tag name like this: "2.1", expand it to "release-2-1".
   2136 # Convert 'head' (case insens.) to 'HEAD'.
   2137 # Otherwise leave it alone.
   2138 sub expandTag {
   2139     local $_ = shift;
   2140     s/^\s+//;
   2141     s/\s+$//;
   2142     if (/^\d+(\.\d+)/) {
   2143 	s|\.|-|g;
   2144 	$_ = "release-" . $_;
   2145     } elsif (/^head$/i) {
   2146 	$_ = 'HEAD';
   2147     }
   2148     $_;
   2149 }
   2150 
   2151 #---------------------------------------------------------------------
   2152 # Given a tag name like this: "release-1-5-0-d03", return a normalized
   2153 # release number.  The release number in this case would be 1500003.
   2154 # The final release (no 'd') "release-1-5-0" is 1500099; that is, it
   2155 # behaves like "d99".  Up to 5 digits are allowed prior to the 'd'
   2156 # number (if any).  This should suffice; in practice we use only 4
   2157 # (e.g., "release-1-4-1-2").  Assume all numbers are single digits
   2158 # except for the 'd' number.  The tag must start with /release-?/.
   2159 # All digits must be separated by '-', except the '-' before the 'd03'
   2160 # may be omitted.  One or two digits are allowed after the 'd'.
   2161 # Trailing text after an otherwise valid tag, with no 'd', is treated
   2162 # as a 'd' of 00, e.g., "release-2-0-2s-branch".
   2163 #
   2164 # @param a tag string, like "release-1-5-0-d03"
   2165 # @param a release integer, that can be compared numerically,
   2166 #        like 1500003, or if the tag can't be parsed.
   2167 sub tagToRelease {
   2168     local $_ = shift;
   2169     if (s/^release-?//i) {
   2170 	my @a;
   2171 	my $d = -1;
   2172 	for (;;) {
   2173 	    if (s/^(\d)-// ||
   2174 		s/^(\d)$// ||
   2175 		s/(\d)(\D)/$2/) { # e.g., "release-1-4-2d01"
   2176 		push @a, $1;
   2177 	    } elsif ($d<0 && s/^d(\d{1,2})$//) {
   2178 		$d = $1;
   2179 	    } else {
   2180 		last;
   2181 	    }
   2182 	}
   2183 	# If we have some trailing non-standard text, and no 'd',
   2184 	# then treat it as a 'd' of 00.
   2185 	if ($_ && $d<0 && (scalar @a)>0) {
   2186 	    $_ = '';
   2187 	    $d = 0;
   2188 	}
   2189 	if (!$_) {
   2190 	    push @a, (0, 0, 0, 0); # Pad with 0's
   2191 	    @a = @a[0..4];
   2192 	    return join('',@a) . sprintf("%02d", $d<0?99:$d);
   2193 	}
   2194     }
   2195     0; # parse failure
   2196 }
   2197 
   2198 ######################################################################
   2199 # Utilities
   2200 ######################################################################
   2201 
   2202 # Output a string in debug mode
   2203 # Usage:  debugOut("string") if ($DEBUG);
   2204 sub debugOut {
   2205     print "<P><FONT SIZE=-1><B>", join(" ", @_), "</B></FONT></P>";
   2206 }
   2207 
   2208 #|# Set or change a GET param of a URL.  If the param exists,
   2209 #|# change it.  If it doesn't, add it.
   2210 #|# @param a URL, with or without trailing parameters
   2211 #|# @param a parameter string of the form a=b, a=, or a
   2212 #|# @param modified URL
   2213 #|sub urlParam {
   2214 #|    my $url = shift;
   2215 #|    my $param = shift;
   2216 #|    my $key = $param;
   2217 #|    $key =~ s/=.*//;
   2218 #|    if ($url =~ s/([\?&;])$key=[^&;]*/$1$param/ ||
   2219 #|	$url =~ s/([\?&;])$key$/$1$param/) {
   2220 #|	return $url;
   2221 #|    }
   2222 #|    $url . ($url =~ /\?/ ? '&' : '?') . $param;
   2223 #|}
   2224 
   2225 # Append the given path-info to the given URL
   2226 # Param: URL, possibly including '?xxx=yyy' params, NOT ending in '/'
   2227 # Param: Path info, MUST start with '/'
   2228 sub urlPathInfo {
   2229     my $url = shift;
   2230     my $pi = shift;
   2231     if ($url =~ s|\?|$pi?|) {
   2232     } else {
   2233 	$url .= $pi;
   2234     }
   2235     $url;
   2236 }
   2237 
   2238 # Parse the module params given by the user
   2239 # @param ref to array to receive list of modules.  Prior contents will
   2240 #        be lost.
   2241 # @return 1 on success, or 0 if bad or no modules were seen.
   2242 sub parseMod {
   2243     my $m = shift; # ref to array
   2244     my @badMod;
   2245 
   2246     my $mod = $QUERY->param('mod') || $DEFAULT_MOD;
   2247     $mod =~ s|^\s+||;
   2248     $mod =~ s|\s+$||;
   2249     $mod =~ s|\s+| |g;
   2250     @$m = split(' ', $mod);
   2251     foreach (@$m) {
   2252 	# !Modify element of @m in place!
   2253 	$_ = $MOD_ABBREV{$_} if (exists $MOD_ABBREV{$_});
   2254 	push @badMod, $_ if (! -d "$CVSROOT/$_");
   2255     }
   2256     if (@badMod) {
   2257 	print "Invalid modules: <CODE>",
   2258 	      join(" ", @badMod), "</CODE>";
   2259 	print "<BR>Did you try the full module name (e.g. \"icu/charset\")?  Only some modules can be abbreviated: <CODE>", join(" ", sort keys %MOD_ABBREV), "</CODE>.";
   2260 	return 0;
   2261     }
   2262     1;
   2263 }
   2264 
   2265 # Return the HTML for a link to the given jitterbug.
   2266 # @param user
   2267 # @param bug ID
   2268 # @param OPTIONAL target
   2269 # @return HTML for A tag
   2270 sub jitterbugLink {
   2271     my $user = shift;
   2272     my $id = shift;
   2273     my $targ = shift || '';
   2274     if ($id eq $NO_JITTERBUG) {
   2275 	return "<EM>no jitterbug</EM>";
   2276     }
   2277     $targ = " target=\"$targ\"" if ($targ);
   2278     "<A href=\"" . jitterbugURL($user, $id) . "\"$targ>$id</A>";
   2279 }
   2280 
   2281 # Return the HTML for a link to the WebCVS log of a file.
   2282 # @param relative path (from $CVSROOT) to file, optionally with
   2283 #        trailing ",v"
   2284 # @param OPTIONAL target
   2285 # @return HTML for A tag
   2286 sub logLink {
   2287     my $relFile = shift;
   2288     my $targ = shift;
   2289     $targ = " target=\"$targ\"" if ($targ);
   2290     $relFile =~ s/,v$//;
   2291     "<A href=\"$LOG_URL/$relFile\"$targ>$relFile</A>";
   2292 }
   2293 
   2294 # Return the HTML for a link to the WebCVS "tag" page.  This will
   2295 # just be the page for the root of the given module, with the given
   2296 # tag selected.
   2297 # @param tag
   2298 # @param module, e.g., "icu/icu"
   2299 # @param OPTIONAL target
   2300 # @return HTML for A tag
   2301 sub tagLink {
   2302     my $tag = shift;
   2303     my $mod = shift;
   2304     my $targ = shift;
   2305     $targ = " target=\"$targ\"" if ($targ);
   2306     "<A href=\"$LOG_URL/$mod/?only_with_tag=$tag\"$targ>$tag</A>";
   2307 }
   2308 
   2309 # Emit an error (in HTML) about failing to parse a line.
   2310 # @param what can't be parsed, e.g., 'revision'
   2311 # @param relative file path, e.g., 'icu/icu/readme.html'
   2312 # @param the line that can't be parsed
   2313 # @param revision
   2314 sub cantParse {
   2315     my $what = shift;
   2316     my $relFile = shift;
   2317     my $line = shift;
   2318     my $rev = shift;
   2319     $rev = ', '.$rev if ($rev);
   2320     print "<BR>Error: Can't parse $what in "
   2321 	, logLink($relFile, 'grepj_2'), "$rev:<BR>\n";
   2322     print "<CODE>$line</CODE><BR>";
   2323 }
   2324 
   2325 # Print the given string(s) to STDOUT and also return the
   2326 # output as a single string.
   2327 sub out {
   2328     local $_ = join('', @_);
   2329     print;
   2330     $_;
   2331 }
   2332 
   2333 # Given an array of numbers, return a sorted unique list.
   2334 sub sortedUniqueInts {
   2335     my @a = @_;
   2336     my %a;
   2337     foreach (@a) {
   2338 	s/^0+(\d)/$1/;
   2339 	$a{$_} = 1;
   2340     }
   2341     sort {$a<=>$b} keys %a;
   2342 }
   2343 
   2344 # Convert a revision number to a branch number.
   2345 # Generally this means dropping the last dotted integer, but if
   2346 # the last two dotted integers are 0.n, then the 0. must be dropped:
   2347 # 1.14.0.2 => 1.14.2.  (This is a magic CVS revision representing
   2348 # the branch.)  Also 'HEAD' is branch '1'.
   2349 sub revToBranch {
   2350     local $_ = shift;
   2351     s/\.0(\.\d+)$/$1/ || s/\.\d+$// || s/HEAD/1/;
   2352     $_;
   2353 }
   2354 
   2355 # Given two CVS revisions, return a sequence of revisions traversing
   2356 # the logical path between them.
   2357 #
   2358 # WARNING!: The revisions must actually have a path between them.  If
   2359 # you pass in 1.4 => 1.2 or 1.5 => 1.2.2.4 the sub will run
   2360 # infinitely.
   2361 #
   2362 # @param low revision, e.g. 1.2 or 1.2.0.4
   2363 # @param high revision, e.g., 1.5.2.3
   2364 # @return an array of revisions from low to high inclusive
   2365 sub traverseRevisions {
   2366     my $rev_lo = shift;
   2367     my $rev_hi = shift;
   2368     my @a = split(/\./, $rev_lo);
   2369     my @limit = split(/\./, $rev_hi);
   2370     my @list;
   2371     for (;;) {
   2372 	push @list, join('.', @a);
   2373 	if (@a == @limit) {
   2374 	    last if ($a[-1] == $limit[-1]);
   2375 	    # Fall through
   2376 	} else {
   2377 	    my $a = join('.', @a);
   2378 	    if ($rev_hi =~ /^\Q$a\E\./) {
   2379 		push @a, $limit[@a];
   2380 		push @a, 1;
   2381 		next;
   2382 	    }
   2383 	    # Else fall through
   2384 	}
   2385 
   2386 	if ($a[-2] == 0) {
   2387 	    # Handle magic CVS revisions like 1.14.0.2
   2388 	    $a[-2] = $a[-1];
   2389 	    $a[-1] = 1;
   2390 	} else {
   2391 	    $a[-1]++;
   2392 	}
   2393     }
   2394     @list;
   2395 }
   2396 
   2397 # Given a CVS numeric revision, increment it (increment last integer)
   2398 sub incRev {
   2399     local $_ = shift;
   2400     if (/(\d+)$/) {
   2401 	my $i = $1 + 1;
   2402 	s/\d+$/$i/;
   2403 	return $_;
   2404     }
   2405     die "Can't increment $_";
   2406 }
   2407 
   2408 # Given a CVS numeric revisions, decrement it.  This handles
   2409 # branches.  If the resulting revision number goes to zero,
   2410 # return BASE_REV.  Does not handle magic revisions like 1.14.0.2.
   2411 # 1.3 => 1.2
   2412 # 1.3.2.1 => 1.3
   2413 # 1.3.2.2 => 1.3.2.1
   2414 sub decRev {
   2415     local $_ = shift;
   2416     if (/(\d+)$/) {
   2417 	my $i = $1 - 1;
   2418 	if ($i >= 1) {
   2419 	    s/\d+$/$i/;
   2420 	} elsif (s/(^1\.\d+)\.2\.1$/$1/) {
   2421 	    # 1.3.2.1 => 1.3
   2422 	} else {
   2423 	    return $BASE_REV;
   2424 	}
   2425 	return $_;
   2426     }
   2427     die "Can't decrement $_";
   2428 }
   2429 
   2430 # Given a date string, in CVS format, like "2003/05/29 22:10:17",
   2431 # return the duration $NOW - x, in days.
   2432 sub ageInDays {
   2433     local $_ = shift;
   2434     if (m|(\d+)/(\d+)/(\d+)\s+(\d+):(\d+):(\d+)|) {
   2435 	my ($y,$m,$d,$H,$M,$S) = ($1,$2-1,$3,$4,$5,$6);
   2436 	if ($y =~ /^\d\d$/) {
   2437 	    $y = 100*int($YEAR / 100) + $y;
   2438 	    $y -= 100 if ($y > $YEAR);
   2439 	}
   2440 	return ($NOW - timelocal_nocheck($S,$M,$H,$d,$m,$y)) / 86400.0;
   2441     } else {
   2442 	die "Can't parse date $_\n";
   2443     }
   2444 }
   2445 
   2446 # Filter for which files we care about that don't have jitterbugs.
   2447 # Our rule is that if the checkin is over a year old, we don't care
   2448 # about it.  We used to also require the revision to be 1.1 or 1.1.1.1
   2449 # to be ignored, but we dropped this.
   2450 sub noJitterbugFilter {
   2451     my $rev = shift;
   2452     my $date = shift;
   2453     #if ($rev eq '1.1' || $rev eq '1.1.1.1') {
   2454 	return ageInDays($date) <= 365.25;
   2455     #}
   2456     #1;
   2457 }
   2458 
   2459 # Execute a command, trapping errors.
   2460 # Options second arg: Path to a file to delete upon failure
   2461 sub command {
   2462     my $cmd = shift;
   2463     my $fileToDeleteOnFailure = shift;
   2464 
   2465     my $err = "$CACHE/grepj.stderr";
   2466     my $status = system($cmd . " 2> $err");
   2467     if ($status != 0) {
   2468 	unlink($fileToDeleteOnFailure) if defined($fileToDeleteOnFailure);
   2469 	print "<HR><B>Fatal Error: "
   2470 	    . "\"$cmd\" exited with value "
   2471 	    . ($status >> 8)
   2472 	    . " (signal " . ($status & 127) . ")"
   2473 	    . (($status & 128) ? " (core dumped)" : "")
   2474 	    . "<BR></B>";
   2475 	print "stderr:<BR>";
   2476 	if (open(IN, $err)) {
   2477 	    while (<IN>) {
   2478 		print $_, "<BR>";
   2479 	    }
   2480 	    close(IN);
   2481 	}
   2482 	croak "Couldn't execute \"$cmd\"";
   2483     }
   2484 }
   2485 
   2486 #eof
   2487