Home | History | Annotate | Download | only in Scripts
      1 # Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013 Apple Inc.  All rights reserved.
      2 # Copyright (C) 2009, 2010 Chris Jerdonek (chris.jerdonek (at] gmail.com)
      3 # Copyright (C) 2010, 2011 Research In Motion Limited. All rights reserved.
      4 # Copyright (C) 2012 Daniel Bates (dbates (at] intudata.com)
      5 #
      6 # Redistribution and use in source and binary forms, with or without
      7 # modification, are permitted provided that the following conditions
      8 # are met:
      9 #
     10 # 1.  Redistributions of source code must retain the above copyright
     11 #     notice, this list of conditions and the following disclaimer.
     12 # 2.  Redistributions in binary form must reproduce the above copyright
     13 #     notice, this list of conditions and the following disclaimer in the
     14 #     documentation and/or other materials provided with the distribution.
     15 # 3.  Neither the name of Apple Computer, Inc. ("Apple") nor the names of
     16 #     its contributors may be used to endorse or promote products derived
     17 #     from this software without specific prior written permission.
     18 #
     19 # THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
     20 # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     21 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     22 # DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
     23 # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     24 # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     25 # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
     26 # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     28 # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 
     30 # Module to share code to work with various version control systems.
     31 package VCSUtils;
     32 
     33 use strict;
     34 use warnings;
     35 
     36 use Cwd qw();  # "qw()" prevents warnings about redefining getcwd() with "use POSIX;"
     37 use English; # for $POSTMATCH, etc.
     38 use File::Basename;
     39 use File::Spec;
     40 use POSIX;
     41 use Term::ANSIColor qw(colored);
     42 
     43 BEGIN {
     44     use Exporter   ();
     45     our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS);
     46     $VERSION     = 1.00;
     47     @ISA         = qw(Exporter);
     48     @EXPORT      = qw(
     49         &applyGitBinaryPatchDelta
     50         &callSilently
     51         &canonicalizePath
     52         &changeLogEmailAddress
     53         &changeLogFileName
     54         &changeLogName
     55         &chdirReturningRelativePath
     56         &decodeGitBinaryChunk
     57         &decodeGitBinaryPatch
     58         &determineSVNRoot
     59         &determineVCSRoot
     60         &escapeSubversionPath
     61         &exitStatus
     62         &fixChangeLogPatch
     63         &gitBranch
     64         &gitdiff2svndiff
     65         &isGit
     66         &isGitSVN
     67         &isGitBranchBuild
     68         &isGitDirectory
     69         &isSVN
     70         &isSVNDirectory
     71         &isSVNVersion16OrNewer
     72         &makeFilePathRelative
     73         &mergeChangeLogs
     74         &normalizePath
     75         &parseChunkRange
     76         &parseFirstEOL
     77         &parsePatch
     78         &pathRelativeToSVNRepositoryRootForPath
     79         &possiblyColored
     80         &prepareParsedPatch
     81         &removeEOL
     82         &runCommand
     83         &runPatchCommand
     84         &scmMoveOrRenameFile
     85         &scmToggleExecutableBit
     86         &setChangeLogDateAndReviewer
     87         &svnRevisionForDirectory
     88         &svnStatus
     89         &toWindowsLineEndings
     90         &gitCommitForSVNRevision
     91         &listOfChangedFilesBetweenRevisions
     92     );
     93     %EXPORT_TAGS = ( );
     94     @EXPORT_OK   = ();
     95 }
     96 
     97 our @EXPORT_OK;
     98 
     99 my $gitBranch;
    100 my $gitRoot;
    101 my $isGit;
    102 my $isGitSVN;
    103 my $isGitBranchBuild;
    104 my $isSVN;
    105 my $svnVersion;
    106 
    107 # Project time zone for Cupertino, CA, US
    108 my $changeLogTimeZone = "PST8PDT";
    109 
    110 my $gitDiffStartRegEx = qr#^diff --git (\w/)?(.+) (\w/)?([^\r\n]+)#;
    111 my $svnDiffStartRegEx = qr#^Index: ([^\r\n]+)#;
    112 my $svnPropertiesStartRegEx = qr#^Property changes on: ([^\r\n]+)#; # $1 is normally the same as the index path.
    113 my $svnPropertyStartRegEx = qr#^(Modified|Name|Added|Deleted): ([^\r\n]+)#; # $2 is the name of the property.
    114 my $svnPropertyValueStartRegEx = qr#^\s*(\+|-|Merged|Reverse-merged)\s*([^\r\n]+)#; # $2 is the start of the property's value (which may span multiple lines).
    115 my $svnPropertyValueNoNewlineRegEx = qr#\ No newline at end of property#;
    116 
    117 # This method is for portability. Return the system-appropriate exit
    118 # status of a child process.
    119 #
    120 # Args: pass the child error status returned by the last pipe close,
    121 #       for example "$?".
    122 sub exitStatus($)
    123 {
    124     my ($returnvalue) = @_;
    125     if ($^O eq "MSWin32") {
    126         return $returnvalue >> 8;
    127     }
    128     if (!WIFEXITED($returnvalue)) {
    129         return 254;
    130     }
    131     return WEXITSTATUS($returnvalue);
    132 }
    133 
    134 # Call a function while suppressing STDERR, and return the return values
    135 # as an array.
    136 sub callSilently($@) {
    137     my ($func, @args) = @_;
    138 
    139     # The following pattern was taken from here:
    140     #   http://www.sdsc.edu/~moreland/courses/IntroPerl/docs/manual/pod/perlfunc/open.html
    141     #
    142     # Also see this Perl documentation (search for "open OLDERR"):
    143     #   http://perldoc.perl.org/functions/open.html
    144     open(OLDERR, ">&STDERR");
    145     close(STDERR);
    146     my @returnValue = &$func(@args);
    147     open(STDERR, ">&OLDERR");
    148     close(OLDERR);
    149 
    150     return @returnValue;
    151 }
    152 
    153 sub toWindowsLineEndings
    154 {
    155     my ($text) = @_;
    156     $text =~ s/\n/\r\n/g;
    157     return $text;
    158 }
    159 
    160 # Note, this method will not error if the file corresponding to the $source path does not exist.
    161 sub scmMoveOrRenameFile
    162 {
    163     my ($source, $destination) = @_;
    164     return if ! -e $source;
    165     if (isSVN()) {
    166         my $escapedDestination = escapeSubversionPath($destination);
    167         my $escapedSource = escapeSubversionPath($source);
    168         system("svn", "move", $escapedSource, $escapedDestination);
    169     } elsif (isGit()) {
    170         system("git", "mv", $source, $destination);
    171     }
    172 }
    173 
    174 # Note, this method will not error if the file corresponding to the path does not exist.
    175 sub scmToggleExecutableBit
    176 {
    177     my ($path, $executableBitDelta) = @_;
    178     return if ! -e $path;
    179     if ($executableBitDelta == 1) {
    180         scmAddExecutableBit($path);
    181     } elsif ($executableBitDelta == -1) {
    182         scmRemoveExecutableBit($path);
    183     }
    184 }
    185 
    186 sub scmAddExecutableBit($)
    187 {
    188     my ($path) = @_;
    189 
    190     if (isSVN()) {
    191         my $escapedPath = escapeSubversionPath($path);
    192         system("svn", "propset", "svn:executable", "on", $escapedPath) == 0 or die "Failed to run 'svn propset svn:executable on $escapedPath'.";
    193     } elsif (isGit()) {
    194         chmod(0755, $path);
    195     }
    196 }
    197 
    198 sub scmRemoveExecutableBit($)
    199 {
    200     my ($path) = @_;
    201 
    202     if (isSVN()) {
    203         my $escapedPath = escapeSubversionPath($path);
    204         system("svn", "propdel", "svn:executable", $escapedPath) == 0 or die "Failed to run 'svn propdel svn:executable $escapedPath'.";
    205     } elsif (isGit()) {
    206         chmod(0664, $path);
    207     }
    208 }
    209 
    210 sub isGitDirectory($)
    211 {
    212     my ($dir) = @_;
    213     return system("cd $dir && git rev-parse > " . File::Spec->devnull() . " 2>&1") == 0;
    214 }
    215 
    216 sub isGit()
    217 {
    218     return $isGit if defined $isGit;
    219 
    220     $isGit = isGitDirectory(".");
    221     return $isGit;
    222 }
    223 
    224 sub isGitSVN()
    225 {
    226     return $isGitSVN if defined $isGitSVN;
    227 
    228     # There doesn't seem to be an officially documented way to determine
    229     # if you're in a git-svn checkout. The best suggestions seen so far
    230     # all use something like the following:
    231     my $output = `git config --get svn-remote.svn.fetch 2>& 1`;
    232     $isGitSVN = $output ne '';
    233     return $isGitSVN;
    234 }
    235 
    236 sub gitBranch()
    237 {
    238     unless (defined $gitBranch) {
    239         chomp($gitBranch = `git symbolic-ref -q HEAD`);
    240         $gitBranch = "" if exitStatus($?);
    241         $gitBranch =~ s#^refs/heads/##;
    242         $gitBranch = "" if $gitBranch eq "master";
    243     }
    244 
    245     return $gitBranch;
    246 }
    247 
    248 sub isGitBranchBuild()
    249 {
    250     my $branch = gitBranch();
    251     chomp(my $override = `git config --bool branch.$branch.webKitBranchBuild`);
    252     return 1 if $override eq "true";
    253     return 0 if $override eq "false";
    254 
    255     unless (defined $isGitBranchBuild) {
    256         chomp(my $gitBranchBuild = `git config --bool core.webKitBranchBuild`);
    257         $isGitBranchBuild = $gitBranchBuild eq "true";
    258     }
    259 
    260     return $isGitBranchBuild;
    261 }
    262 
    263 sub isSVNDirectory($)
    264 {
    265     my ($dir) = @_;
    266     return system("cd $dir && svn info > " . File::Spec->devnull() . " 2>&1") == 0;
    267 }
    268 
    269 sub isSVN()
    270 {
    271     return $isSVN if defined $isSVN;
    272 
    273     $isSVN = isSVNDirectory(".");
    274     return $isSVN;
    275 }
    276 
    277 sub svnVersion()
    278 {
    279     return $svnVersion if defined $svnVersion;
    280 
    281     if (!isSVN()) {
    282         $svnVersion = 0;
    283     } else {
    284         chomp($svnVersion = `svn --version --quiet`);
    285     }
    286     return $svnVersion;
    287 }
    288 
    289 sub isSVNVersion16OrNewer()
    290 {
    291     my $version = svnVersion();
    292     return eval "v$version" ge v1.6;
    293 }
    294 
    295 sub chdirReturningRelativePath($)
    296 {
    297     my ($directory) = @_;
    298     my $previousDirectory = Cwd::getcwd();
    299     chdir $directory;
    300     my $newDirectory = Cwd::getcwd();
    301     return "." if $newDirectory eq $previousDirectory;
    302     return File::Spec->abs2rel($previousDirectory, $newDirectory);
    303 }
    304 
    305 sub determineGitRoot()
    306 {
    307     chomp(my $gitDir = `git rev-parse --git-dir`);
    308     return dirname($gitDir);
    309 }
    310 
    311 sub determineSVNRoot()
    312 {
    313     my $last = '';
    314     my $path = '.';
    315     my $parent = '..';
    316     my $repositoryRoot;
    317     my $repositoryUUID;
    318     while (1) {
    319         my $thisRoot;
    320         my $thisUUID;
    321         my $escapedPath = escapeSubversionPath($path);
    322         # Ignore error messages in case we've run past the root of the checkout.
    323         open INFO, "svn info '$escapedPath' 2> " . File::Spec->devnull() . " |" or die;
    324         while (<INFO>) {
    325             if (/^Repository Root: (.+)/) {
    326                 $thisRoot = $1;
    327             }
    328             if (/^Repository UUID: (.+)/) {
    329                 $thisUUID = $1;
    330             }
    331             if ($thisRoot && $thisUUID) {
    332                 local $/ = undef;
    333                 <INFO>; # Consume the rest of the input.
    334             }
    335         }
    336         close INFO;
    337 
    338         # It's possible (e.g. for developers of some ports) to have a WebKit
    339         # checkout in a subdirectory of another checkout.  So abort if the
    340         # repository root or the repository UUID suddenly changes.
    341         last if !$thisUUID;
    342         $repositoryUUID = $thisUUID if !$repositoryUUID;
    343         last if $thisUUID ne $repositoryUUID;
    344 
    345         last if !$thisRoot;
    346         $repositoryRoot = $thisRoot if !$repositoryRoot;
    347         last if $thisRoot ne $repositoryRoot;
    348 
    349         $last = $path;
    350         $path = File::Spec->catdir($parent, $path);
    351     }
    352 
    353     return File::Spec->rel2abs($last);
    354 }
    355 
    356 sub determineVCSRoot()
    357 {
    358     if (isGit()) {
    359         return determineGitRoot();
    360     }
    361 
    362     if (!isSVN()) {
    363         # Some users have a workflow where svn-create-patch, svn-apply and
    364         # svn-unapply are used outside of multiple svn working directores,
    365         # so warn the user and assume Subversion is being used in this case.
    366         warn "Unable to determine VCS root for '" . Cwd::getcwd() . "'; assuming Subversion";
    367         $isSVN = 1;
    368     }
    369 
    370     return determineSVNRoot();
    371 }
    372 
    373 sub isWindows()
    374 {
    375     return ($^O eq "MSWin32") || 0;
    376 }
    377 
    378 sub svnRevisionForDirectory($)
    379 {
    380     my ($dir) = @_;
    381     my $revision;
    382 
    383     if (isSVNDirectory($dir)) {
    384         my $escapedDir = escapeSubversionPath($dir);
    385         my $command = "svn info $escapedDir | grep Revision:";
    386         $command = "LC_ALL=C $command" if !isWindows();
    387         my $svnInfo = `$command`;
    388         ($revision) = ($svnInfo =~ m/Revision: (\d+).*/g);
    389     } elsif (isGitDirectory($dir)) {
    390         my $command = "git log --grep=\"git-svn-id: \" -n 1 | grep git-svn-id:";
    391         $command = "LC_ALL=C $command" if !isWindows();
    392         $command = "cd $dir && $command";
    393         my $gitLog = `$command`;
    394         ($revision) = ($gitLog =~ m/ +git-svn-id: .+@(\d+) /g);
    395     }
    396     if (!defined($revision)) {
    397         $revision = "unknown";
    398         warn "Unable to determine current SVN revision in $dir";
    399     }
    400     return $revision;
    401 }
    402 
    403 sub pathRelativeToSVNRepositoryRootForPath($)
    404 {
    405     my ($file) = @_;
    406     my $relativePath = File::Spec->abs2rel($file);
    407 
    408     my $svnInfo;
    409     if (isSVN()) {
    410         my $escapedRelativePath = escapeSubversionPath($relativePath);
    411         my $command = "svn info $escapedRelativePath";
    412         $command = "LC_ALL=C $command" if !isWindows();
    413         $svnInfo = `$command`;
    414     } elsif (isGit()) {
    415         my $command = "git svn info $relativePath";
    416         $command = "LC_ALL=C $command" if !isWindows();
    417         $svnInfo = `$command`;
    418     }
    419 
    420     $svnInfo =~ /.*^URL: (.*?)$/m;
    421     my $svnURL = $1;
    422 
    423     $svnInfo =~ /.*^Repository Root: (.*?)$/m;
    424     my $repositoryRoot = $1;
    425 
    426     $svnURL =~ s/$repositoryRoot\///;
    427     return $svnURL;
    428 }
    429 
    430 sub makeFilePathRelative($)
    431 {
    432     my ($path) = @_;
    433     return $path unless isGit();
    434 
    435     unless (defined $gitRoot) {
    436         chomp($gitRoot = `git rev-parse --show-cdup`);
    437     }
    438     return $gitRoot . $path;
    439 }
    440 
    441 sub normalizePath($)
    442 {
    443     my ($path) = @_;
    444     $path =~ s/\\/\//g;
    445     return $path;
    446 }
    447 
    448 sub possiblyColored($$)
    449 {
    450     my ($colors, $string) = @_;
    451 
    452     if (-t STDOUT) {
    453         return colored([$colors], $string);
    454     } else {
    455         return $string;
    456     }
    457 }
    458 
    459 sub adjustPathForRecentRenamings($)
    460 {
    461     my ($fullPath) = @_;
    462 
    463     $fullPath =~ s|WebCore/webaudio|WebCore/Modules/webaudio|g;
    464     $fullPath =~ s|JavaScriptCore/wtf|WTF/wtf|g;
    465     $fullPath =~ s|test_expectations.txt|TestExpectations|g;
    466 
    467     return $fullPath;
    468 }
    469 
    470 sub canonicalizePath($)
    471 {
    472     my ($file) = @_;
    473 
    474     # Remove extra slashes and '.' directories in path
    475     $file = File::Spec->canonpath($file);
    476 
    477     # Remove '..' directories in path
    478     my @dirs = ();
    479     foreach my $dir (File::Spec->splitdir($file)) {
    480         if ($dir eq '..' && $#dirs >= 0 && $dirs[$#dirs] ne '..') {
    481             pop(@dirs);
    482         } else {
    483             push(@dirs, $dir);
    484         }
    485     }
    486     return ($#dirs >= 0) ? File::Spec->catdir(@dirs) : ".";
    487 }
    488 
    489 sub removeEOL($)
    490 {
    491     my ($line) = @_;
    492     return "" unless $line;
    493 
    494     $line =~ s/[\r\n]+$//g;
    495     return $line;
    496 }
    497 
    498 sub parseFirstEOL($)
    499 {
    500     my ($fileHandle) = @_;
    501 
    502     # Make input record separator the new-line character to simplify regex matching below.
    503     my $savedInputRecordSeparator = $INPUT_RECORD_SEPARATOR;
    504     $INPUT_RECORD_SEPARATOR = "\n";
    505     my $firstLine  = <$fileHandle>;
    506     $INPUT_RECORD_SEPARATOR = $savedInputRecordSeparator;
    507 
    508     return unless defined($firstLine);
    509 
    510     my $eol;
    511     if ($firstLine =~ /\r\n/) {
    512         $eol = "\r\n";
    513     } elsif ($firstLine =~ /\r/) {
    514         $eol = "\r";
    515     } elsif ($firstLine =~ /\n/) {
    516         $eol = "\n";
    517     }
    518     return $eol;
    519 }
    520 
    521 sub firstEOLInFile($)
    522 {
    523     my ($file) = @_;
    524     my $eol;
    525     if (open(FILE, $file)) {
    526         $eol = parseFirstEOL(*FILE);
    527         close(FILE);
    528     }
    529     return $eol;
    530 }
    531 
    532 # Parses a chunk range line into its components.
    533 #
    534 # A chunk range line has the form: @@ -L_1,N_1 +L_2,N_2 @@, where the pairs (L_1, N_1),
    535 # (L_2, N_2) are ranges that represent the starting line number and line count in the
    536 # original file and new file, respectively.
    537 #
    538 # Note, some versions of GNU diff may omit the comma and trailing line count (e.g. N_1),
    539 # in which case the omitted line count defaults to 1. For example, GNU diff may output
    540 # @@ -1 +1 @@, which is equivalent to @@ -1,1 +1,1 @@.
    541 #
    542 # This subroutine returns undef if given an invalid or malformed chunk range.
    543 #
    544 # Args:
    545 #   $line: the line to parse.
    546 #   $chunkSentinel: the sentinel that surrounds the chunk range information (defaults to "@@").
    547 #
    548 # Returns $chunkRangeHashRef
    549 #   $chunkRangeHashRef: a hash reference representing the parts of a chunk range, as follows--
    550 #     startingLine: the starting line in the original file.
    551 #     lineCount: the line count in the original file.
    552 #     newStartingLine: the new starting line in the new file.
    553 #     newLineCount: the new line count in the new file.
    554 sub parseChunkRange($;$)
    555 {
    556     my ($line, $chunkSentinel) = @_;
    557     $chunkSentinel = "@@" if !$chunkSentinel;
    558     my $chunkRangeRegEx = qr#^\Q$chunkSentinel\E -(\d+)(,(\d+))? \+(\d+)(,(\d+))? \Q$chunkSentinel\E#;
    559     if ($line !~ /$chunkRangeRegEx/) {
    560         return;
    561     }
    562     my %chunkRange;
    563     $chunkRange{startingLine} = $1;
    564     $chunkRange{lineCount} = defined($2) ? $3 : 1;
    565     $chunkRange{newStartingLine} = $4;
    566     $chunkRange{newLineCount} = defined($5) ? $6 : 1;
    567     return \%chunkRange;
    568 }
    569 
    570 sub svnStatus($)
    571 {
    572     my ($fullPath) = @_;
    573     my $escapedFullPath = escapeSubversionPath($fullPath);
    574     my $svnStatus;
    575     open SVN, "svn status --non-interactive --non-recursive '$escapedFullPath' |" or die;
    576     if (-d $fullPath) {
    577         # When running "svn stat" on a directory, we can't assume that only one
    578         # status will be returned (since any files with a status below the
    579         # directory will be returned), and we can't assume that the directory will
    580         # be first (since any files with unknown status will be listed first).
    581         my $normalizedFullPath = File::Spec->catdir(File::Spec->splitdir($fullPath));
    582         while (<SVN>) {
    583             # Input may use a different EOL sequence than $/, so avoid chomp.
    584             $_ = removeEOL($_);
    585             my $normalizedStatPath = File::Spec->catdir(File::Spec->splitdir(substr($_, 7)));
    586             if ($normalizedFullPath eq $normalizedStatPath) {
    587                 $svnStatus = "$_\n";
    588                 last;
    589             }
    590         }
    591         # Read the rest of the svn command output to avoid a broken pipe warning.
    592         local $/ = undef;
    593         <SVN>;
    594     }
    595     else {
    596         # Files will have only one status returned.
    597         $svnStatus = removeEOL(<SVN>) . "\n";
    598     }
    599     close SVN;
    600     return $svnStatus;
    601 }
    602 
    603 # Return whether the given file mode is executable in the source control
    604 # sense.  We make this determination based on whether the executable bit
    605 # is set for "others" rather than the stronger condition that it be set
    606 # for the user, group, and others.  This is sufficient for distinguishing
    607 # the default behavior in Git and SVN.
    608 #
    609 # Args:
    610 #   $fileMode: A number or string representing a file mode in octal notation.
    611 sub isExecutable($)
    612 {
    613     my $fileMode = shift;
    614 
    615     return $fileMode % 2;
    616 }
    617 
    618 # Parse the next Git diff header from the given file handle, and advance
    619 # the handle so the last line read is the first line after the header.
    620 #
    621 # This subroutine dies if given leading junk.
    622 #
    623 # Args:
    624 #   $fileHandle: advanced so the last line read from the handle is the first
    625 #                line of the header to parse.  This should be a line
    626 #                beginning with "diff --git".
    627 #   $line: the line last read from $fileHandle
    628 #
    629 # Returns ($headerHashRef, $lastReadLine):
    630 #   $headerHashRef: a hash reference representing a diff header, as follows--
    631 #     copiedFromPath: the path from which the file was copied or moved if
    632 #                     the diff is a copy or move.
    633 #     executableBitDelta: the value 1 or -1 if the executable bit was added or
    634 #                         removed, respectively.  New and deleted files have
    635 #                         this value only if the file is executable, in which
    636 #                         case the value is 1 and -1, respectively.
    637 #     indexPath: the path of the target file.
    638 #     isBinary: the value 1 if the diff is for a binary file.
    639 #     isDeletion: the value 1 if the diff is a file deletion.
    640 #     isCopyWithChanges: the value 1 if the file was copied or moved and
    641 #                        the target file was changed in some way after being
    642 #                        copied or moved (e.g. if its contents or executable
    643 #                        bit were changed).
    644 #     isNew: the value 1 if the diff is for a new file.
    645 #     shouldDeleteSource: the value 1 if the file was copied or moved and
    646 #                         the source file was deleted -- i.e. if the copy
    647 #                         was actually a move.
    648 #     svnConvertedText: the header text with some lines converted to SVN
    649 #                       format.  Git-specific lines are preserved.
    650 #   $lastReadLine: the line last read from $fileHandle.
    651 sub parseGitDiffHeader($$)
    652 {
    653     my ($fileHandle, $line) = @_;
    654 
    655     $_ = $line;
    656 
    657     my $indexPath;
    658     if (/$gitDiffStartRegEx/) {
    659         # The first and second paths can differ in the case of copies
    660         # and renames.  We use the second file path because it is the
    661         # destination path.
    662         $indexPath = adjustPathForRecentRenamings($4);
    663         # Use $POSTMATCH to preserve the end-of-line character.
    664         $_ = "Index: $indexPath$POSTMATCH"; # Convert to SVN format.
    665     } else {
    666         die("Could not parse leading \"diff --git\" line: \"$line\".");
    667     }
    668 
    669     my $copiedFromPath;
    670     my $foundHeaderEnding;
    671     my $isBinary;
    672     my $isDeletion;
    673     my $isNew;
    674     my $newExecutableBit = 0;
    675     my $oldExecutableBit = 0;
    676     my $shouldDeleteSource = 0;
    677     my $similarityIndex = 0;
    678     my $svnConvertedText;
    679     while (1) {
    680         # Temporarily strip off any end-of-line characters to simplify
    681         # regex matching below.
    682         s/([\n\r]+)$//;
    683         my $eol = $1;
    684 
    685         if (/^(deleted file|old) mode (\d+)/) {
    686             $oldExecutableBit = (isExecutable($2) ? 1 : 0);
    687             $isDeletion = 1 if $1 eq "deleted file";
    688         } elsif (/^new( file)? mode (\d+)/) {
    689             $newExecutableBit = (isExecutable($2) ? 1 : 0);
    690             $isNew = 1 if $1;
    691         } elsif (/^similarity index (\d+)%/) {
    692             $similarityIndex = $1;
    693         } elsif (/^copy from (\S+)/) {
    694             $copiedFromPath = $1;
    695         } elsif (/^rename from (\S+)/) {
    696             # FIXME: Record this as a move rather than as a copy-and-delete.
    697             #        This will simplify adding rename support to svn-unapply.
    698             #        Otherwise, the hash for a deletion would have to know
    699             #        everything about the file being deleted in order to
    700             #        support undoing itself.  Recording as a move will also
    701             #        permit us to use "svn move" and "git move".
    702             $copiedFromPath = $1;
    703             $shouldDeleteSource = 1;
    704         } elsif (/^--- \S+/) {
    705             $_ = "--- $indexPath"; # Convert to SVN format.
    706         } elsif (/^\+\+\+ \S+/) {
    707             $_ = "+++ $indexPath"; # Convert to SVN format.
    708             $foundHeaderEnding = 1;
    709         } elsif (/^GIT binary patch$/ ) {
    710             $isBinary = 1;
    711             $foundHeaderEnding = 1;
    712         # The "git diff" command includes a line of the form "Binary files
    713         # <path1> and <path2> differ" if the --binary flag is not used.
    714         } elsif (/^Binary files / ) {
    715             die("Error: the Git diff contains a binary file without the binary data in ".
    716                 "line: \"$_\".  Be sure to use the --binary flag when invoking \"git diff\" ".
    717                 "with diffs containing binary files.");
    718         }
    719 
    720         $svnConvertedText .= "$_$eol"; # Also restore end-of-line characters.
    721 
    722         $_ = <$fileHandle>; # Not defined if end-of-file reached.
    723 
    724         last if (!defined($_) || /$gitDiffStartRegEx/ || $foundHeaderEnding);
    725     }
    726 
    727     my $executableBitDelta = $newExecutableBit - $oldExecutableBit;
    728 
    729     my %header;
    730 
    731     $header{copiedFromPath} = $copiedFromPath if $copiedFromPath;
    732     $header{executableBitDelta} = $executableBitDelta if $executableBitDelta;
    733     $header{indexPath} = $indexPath;
    734     $header{isBinary} = $isBinary if $isBinary;
    735     $header{isCopyWithChanges} = 1 if ($copiedFromPath && ($similarityIndex != 100 || $executableBitDelta));
    736     $header{isDeletion} = $isDeletion if $isDeletion;
    737     $header{isNew} = $isNew if $isNew;
    738     $header{shouldDeleteSource} = $shouldDeleteSource if $shouldDeleteSource;
    739     $header{svnConvertedText} = $svnConvertedText;
    740 
    741     return (\%header, $_);
    742 }
    743 
    744 # Parse the next SVN diff header from the given file handle, and advance
    745 # the handle so the last line read is the first line after the header.
    746 #
    747 # This subroutine dies if given leading junk or if it could not detect
    748 # the end of the header block.
    749 #
    750 # Args:
    751 #   $fileHandle: advanced so the last line read from the handle is the first
    752 #                line of the header to parse.  This should be a line
    753 #                beginning with "Index:".
    754 #   $line: the line last read from $fileHandle
    755 #
    756 # Returns ($headerHashRef, $lastReadLine):
    757 #   $headerHashRef: a hash reference representing a diff header, as follows--
    758 #     copiedFromPath: the path from which the file was copied if the diff
    759 #                     is a copy.
    760 #     indexPath: the path of the target file, which is the path found in
    761 #                the "Index:" line.
    762 #     isBinary: the value 1 if the diff is for a binary file.
    763 #     isNew: the value 1 if the diff is for a new file.
    764 #     sourceRevision: the revision number of the source, if it exists.  This
    765 #                     is the same as the revision number the file was copied
    766 #                     from, in the case of a file copy.
    767 #     svnConvertedText: the header text converted to a header with the paths
    768 #                       in some lines corrected.
    769 #   $lastReadLine: the line last read from $fileHandle.
    770 sub parseSvnDiffHeader($$)
    771 {
    772     my ($fileHandle, $line) = @_;
    773 
    774     $_ = $line;
    775 
    776     my $indexPath;
    777     if (/$svnDiffStartRegEx/) {
    778         $indexPath = adjustPathForRecentRenamings($1);
    779     } else {
    780         die("First line of SVN diff does not begin with \"Index \": \"$_\"");
    781     }
    782 
    783     my $copiedFromPath;
    784     my $foundHeaderEnding;
    785     my $isBinary;
    786     my $isNew;
    787     my $sourceRevision;
    788     my $svnConvertedText;
    789     while (1) {
    790         # Temporarily strip off any end-of-line characters to simplify
    791         # regex matching below.
    792         s/([\n\r]+)$//;
    793         my $eol = $1;
    794 
    795         # Fix paths on "---" and "+++" lines to match the leading
    796         # index line.
    797         if (s/^--- [^\t\n\r]+/--- $indexPath/) {
    798             # ---
    799             if (/^--- .+\(revision (\d+)\)/) {
    800                 $sourceRevision = $1;
    801                 $isNew = 1 if !$sourceRevision; # if revision 0.
    802                 if (/\(from (\S+):(\d+)\)$/) {
    803                     # The "from" clause is created by svn-create-patch, in
    804                     # which case there is always also a "revision" clause.
    805                     $copiedFromPath = $1;
    806                     die("Revision number \"$2\" in \"from\" clause does not match " .
    807                         "source revision number \"$sourceRevision\".") if ($2 != $sourceRevision);
    808                 }
    809             }
    810         } elsif (s/^\+\+\+ [^\t\n\r]+/+++ $indexPath/ || $isBinary && /^$/) {
    811             $foundHeaderEnding = 1;
    812         } elsif (/^Cannot display: file marked as a binary type.$/) {
    813             $isBinary = 1;
    814             # SVN 1.7 has an unusual display format for a binary diff. It repeats the first
    815             # two lines of the diff header. For example:
    816             #     Index: test_file.swf
    817             #     ===================================================================
    818             #     Cannot display: file marked as a binary type.
    819             #     svn:mime-type = application/octet-stream
    820             #     Index: test_file.swf
    821             #     ===================================================================
    822             #     --- test_file.swf
    823             #     +++ test_file.swf
    824             #
    825             #     ...
    826             #     Q1dTBx0AAAB42itg4GlgYJjGwMDDyODMxMDw34GBgQEAJPQDJA==
    827             # Therefore, we continue reading the diff header until we either encounter a line
    828             # that begins with "+++" (SVN 1.7 or greater) or an empty line (SVN version less
    829             # than 1.7).
    830         }
    831 
    832         $svnConvertedText .= "$_$eol"; # Also restore end-of-line characters.
    833 
    834         $_ = <$fileHandle>; # Not defined if end-of-file reached.
    835 
    836         last if (!defined($_) || !$isBinary && /$svnDiffStartRegEx/ || $foundHeaderEnding);
    837     }
    838 
    839     if (!$foundHeaderEnding) {
    840         die("Did not find end of header block corresponding to index path \"$indexPath\".");
    841     }
    842 
    843     my %header;
    844 
    845     $header{copiedFromPath} = $copiedFromPath if $copiedFromPath;
    846     $header{indexPath} = $indexPath;
    847     $header{isBinary} = $isBinary if $isBinary;
    848     $header{isNew} = $isNew if $isNew;
    849     $header{sourceRevision} = $sourceRevision if $sourceRevision;
    850     $header{svnConvertedText} = $svnConvertedText;
    851 
    852     return (\%header, $_);
    853 }
    854 
    855 # Parse the next diff header from the given file handle, and advance
    856 # the handle so the last line read is the first line after the header.
    857 #
    858 # This subroutine dies if given leading junk or if it could not detect
    859 # the end of the header block.
    860 #
    861 # Args:
    862 #   $fileHandle: advanced so the last line read from the handle is the first
    863 #                line of the header to parse.  For SVN-formatted diffs, this
    864 #                is a line beginning with "Index:".  For Git, this is a line
    865 #                beginning with "diff --git".
    866 #   $line: the line last read from $fileHandle
    867 #
    868 # Returns ($headerHashRef, $lastReadLine):
    869 #   $headerHashRef: a hash reference representing a diff header
    870 #     copiedFromPath: the path from which the file was copied if the diff
    871 #                     is a copy.
    872 #     executableBitDelta: the value 1 or -1 if the executable bit was added or
    873 #                         removed, respectively.  New and deleted files have
    874 #                         this value only if the file is executable, in which
    875 #                         case the value is 1 and -1, respectively.
    876 #     indexPath: the path of the target file.
    877 #     isBinary: the value 1 if the diff is for a binary file.
    878 #     isGit: the value 1 if the diff is Git-formatted.
    879 #     isSvn: the value 1 if the diff is SVN-formatted.
    880 #     sourceRevision: the revision number of the source, if it exists.  This
    881 #                     is the same as the revision number the file was copied
    882 #                     from, in the case of a file copy.
    883 #     svnConvertedText: the header text with some lines converted to SVN
    884 #                       format.  Git-specific lines are preserved.
    885 #   $lastReadLine: the line last read from $fileHandle.
    886 sub parseDiffHeader($$)
    887 {
    888     my ($fileHandle, $line) = @_;
    889 
    890     my $header;  # This is a hash ref.
    891     my $isGit;
    892     my $isSvn;
    893     my $lastReadLine;
    894 
    895     if ($line =~ $svnDiffStartRegEx) {
    896         $isSvn = 1;
    897         ($header, $lastReadLine) = parseSvnDiffHeader($fileHandle, $line);
    898     } elsif ($line =~ $gitDiffStartRegEx) {
    899         $isGit = 1;
    900         ($header, $lastReadLine) = parseGitDiffHeader($fileHandle, $line);
    901     } else {
    902         die("First line of diff does not begin with \"Index:\" or \"diff --git\": \"$line\"");
    903     }
    904 
    905     $header->{isGit} = $isGit if $isGit;
    906     $header->{isSvn} = $isSvn if $isSvn;
    907 
    908     return ($header, $lastReadLine);
    909 }
    910 
    911 # FIXME: The %diffHash "object" should not have an svnConvertedText property.
    912 #        Instead, the hash object should store its information in a
    913 #        structured way as properties.  This should be done in a way so
    914 #        that, if necessary, the text of an SVN or Git patch can be
    915 #        reconstructed from the information in those hash properties.
    916 #
    917 # A %diffHash is a hash representing a source control diff of a single
    918 # file operation (e.g. a file modification, copy, or delete).
    919 #
    920 # These hashes appear, for example, in the parseDiff(), parsePatch(),
    921 # and prepareParsedPatch() subroutines of this package.
    922 #
    923 # The corresponding values are--
    924 #
    925 #   copiedFromPath: the path from which the file was copied if the diff
    926 #                   is a copy.
    927 #   executableBitDelta: the value 1 or -1 if the executable bit was added or
    928 #                       removed from the target file, respectively.
    929 #   indexPath: the path of the target file.  For SVN-formatted diffs,
    930 #              this is the same as the path in the "Index:" line.
    931 #   isBinary: the value 1 if the diff is for a binary file.
    932 #   isDeletion: the value 1 if the diff is known from the header to be a deletion.
    933 #   isGit: the value 1 if the diff is Git-formatted.
    934 #   isNew: the value 1 if the dif is known from the header to be a new file.
    935 #   isSvn: the value 1 if the diff is SVN-formatted.
    936 #   sourceRevision: the revision number of the source, if it exists.  This
    937 #                   is the same as the revision number the file was copied
    938 #                   from, in the case of a file copy.
    939 #   svnConvertedText: the diff with some lines converted to SVN format.
    940 #                     Git-specific lines are preserved.
    941 
    942 # Parse one diff from a patch file created by svn-create-patch, and
    943 # advance the file handle so the last line read is the first line
    944 # of the next header block.
    945 #
    946 # This subroutine preserves any leading junk encountered before the header.
    947 #
    948 # Composition of an SVN diff
    949 #
    950 # There are three parts to an SVN diff: the header, the property change, and
    951 # the binary contents, in that order. Either the header or the property change
    952 # may be ommitted, but not both. If there are binary changes, then you always
    953 # have all three.
    954 #
    955 # Args:
    956 #   $fileHandle: a file handle advanced to the first line of the next
    957 #                header block. Leading junk is okay.
    958 #   $line: the line last read from $fileHandle.
    959 #   $optionsHashRef: a hash reference representing optional options to use
    960 #                    when processing a diff.
    961 #     shouldNotUseIndexPathEOL: whether to use the line endings in the diff instead
    962 #                               instead of the line endings in the target file; the
    963 #                               value of 1 if svnConvertedText should use the line
    964 #                               endings in the diff.
    965 #
    966 # Returns ($diffHashRefs, $lastReadLine):
    967 #   $diffHashRefs: A reference to an array of references to %diffHash hashes.
    968 #                  See the %diffHash documentation above.
    969 #   $lastReadLine: the line last read from $fileHandle
    970 sub parseDiff($$;$)
    971 {
    972     # FIXME: Adjust this method so that it dies if the first line does not
    973     #        match the start of a diff.  This will require a change to
    974     #        parsePatch() so that parsePatch() skips over leading junk.
    975     my ($fileHandle, $line, $optionsHashRef) = @_;
    976 
    977     my $headerStartRegEx = $svnDiffStartRegEx; # SVN-style header for the default
    978 
    979     my $headerHashRef; # Last header found, as returned by parseDiffHeader().
    980     my $svnPropertiesHashRef; # Last SVN properties diff found, as returned by parseSvnDiffProperties().
    981     my $svnText;
    982     my $indexPathEOL;
    983     my $numTextChunks = 0;
    984     while (defined($line)) {
    985         if (!$headerHashRef && ($line =~ $gitDiffStartRegEx)) {
    986             # Then assume all diffs in the patch are Git-formatted. This
    987             # block was made to be enterable at most once since we assume
    988             # all diffs in the patch are formatted the same (SVN or Git).
    989             $headerStartRegEx = $gitDiffStartRegEx;
    990         }
    991 
    992         if ($line =~ $svnPropertiesStartRegEx) {
    993             my $propertyPath = $1;
    994             if ($svnPropertiesHashRef || $headerHashRef && ($propertyPath ne $headerHashRef->{indexPath})) {
    995                 # This is the start of the second diff in the while loop, which happens to
    996                 # be a property diff.  If $svnPropertiesHasRef is defined, then this is the
    997                 # second consecutive property diff, otherwise it's the start of a property
    998                 # diff for a file that only has property changes.
    999                 last;
   1000             }
   1001             ($svnPropertiesHashRef, $line) = parseSvnDiffProperties($fileHandle, $line);
   1002             next;
   1003         }
   1004         if ($line !~ $headerStartRegEx) {
   1005             # Then we are in the body of the diff.
   1006             my $isChunkRange = defined(parseChunkRange($line));
   1007             $numTextChunks += 1 if $isChunkRange;
   1008             my $nextLine = <$fileHandle>;
   1009             my $willAddNewLineAtEndOfFile = defined($nextLine) && $nextLine =~ /^\\ No newline at end of file$/;
   1010             if ($willAddNewLineAtEndOfFile) {
   1011                 # Diff(1) always emits a LF character preceeding the line "\ No newline at end of file".
   1012                 # We must preserve both the added LF character and the line ending of this sentinel line
   1013                 # or patch(1) will complain.
   1014                 $svnText .= $line . $nextLine;
   1015                 $line = <$fileHandle>;
   1016                 next;
   1017             }
   1018             if ($indexPathEOL && !$isChunkRange) {
   1019                 # The chunk range is part of the body of the diff, but its line endings should't be
   1020                 # modified or patch(1) will complain. So, we only modify non-chunk range lines.
   1021                 $line =~ s/\r\n|\r|\n/$indexPathEOL/g;
   1022             }
   1023             $svnText .= $line;
   1024             $line = $nextLine;
   1025             next;
   1026         } # Otherwise, we found a diff header.
   1027 
   1028         if ($svnPropertiesHashRef || $headerHashRef) {
   1029             # Then either we just processed an SVN property change or this
   1030             # is the start of the second diff header of this while loop.
   1031             last;
   1032         }
   1033 
   1034         ($headerHashRef, $line) = parseDiffHeader($fileHandle, $line);
   1035         if (!$optionsHashRef || !$optionsHashRef->{shouldNotUseIndexPathEOL}) {
   1036             # FIXME: We shouldn't query the file system (via firstEOLInFile()) to determine the
   1037             #        line endings of the file indexPath. Instead, either the caller to parseDiff()
   1038             #        should provide this information or parseDiff() should take a delegate that it
   1039             #        can use to query for this information.
   1040             $indexPathEOL = firstEOLInFile($headerHashRef->{indexPath}) if !$headerHashRef->{isNew} && !$headerHashRef->{isBinary};
   1041         }
   1042 
   1043         $svnText .= $headerHashRef->{svnConvertedText};
   1044     }
   1045 
   1046     my @diffHashRefs;
   1047 
   1048     if ($headerHashRef->{shouldDeleteSource}) {
   1049         my %deletionHash;
   1050         $deletionHash{indexPath} = $headerHashRef->{copiedFromPath};
   1051         $deletionHash{isDeletion} = 1;
   1052         push @diffHashRefs, \%deletionHash;
   1053     }
   1054     if ($headerHashRef->{copiedFromPath}) {
   1055         my %copyHash;
   1056         $copyHash{copiedFromPath} = $headerHashRef->{copiedFromPath};
   1057         $copyHash{indexPath} = $headerHashRef->{indexPath};
   1058         $copyHash{sourceRevision} = $headerHashRef->{sourceRevision} if $headerHashRef->{sourceRevision};
   1059         if ($headerHashRef->{isSvn}) {
   1060             $copyHash{executableBitDelta} = $svnPropertiesHashRef->{executableBitDelta} if $svnPropertiesHashRef->{executableBitDelta};
   1061         }
   1062         push @diffHashRefs, \%copyHash;
   1063     }
   1064 
   1065     # Note, the order of evaluation for the following if conditional has been explicitly chosen so that
   1066     # it evaluates to false when there is no headerHashRef (e.g. a property change diff for a file that
   1067     # only has property changes).
   1068     if ($headerHashRef->{isCopyWithChanges} || (%$headerHashRef && !$headerHashRef->{copiedFromPath})) {
   1069         # Then add the usual file modification.
   1070         my %diffHash;
   1071         # FIXME: We should expand this code to support other properties.  In the future,
   1072         #        parseSvnDiffProperties may return a hash whose keys are the properties.
   1073         if ($headerHashRef->{isSvn}) {
   1074             # SVN records the change to the executable bit in a separate property change diff
   1075             # that follows the contents of the diff, except for binary diffs.  For binary
   1076             # diffs, the property change diff follows the diff header.
   1077             $diffHash{executableBitDelta} = $svnPropertiesHashRef->{executableBitDelta} if $svnPropertiesHashRef->{executableBitDelta};
   1078         } elsif ($headerHashRef->{isGit}) {
   1079             # Git records the change to the executable bit in the header of a diff.
   1080             $diffHash{executableBitDelta} = $headerHashRef->{executableBitDelta} if $headerHashRef->{executableBitDelta};
   1081         }
   1082         $diffHash{indexPath} = $headerHashRef->{indexPath};
   1083         $diffHash{isBinary} = $headerHashRef->{isBinary} if $headerHashRef->{isBinary};
   1084         $diffHash{isDeletion} = $headerHashRef->{isDeletion} if $headerHashRef->{isDeletion};
   1085         $diffHash{isGit} = $headerHashRef->{isGit} if $headerHashRef->{isGit};
   1086         $diffHash{isNew} = $headerHashRef->{isNew} if $headerHashRef->{isNew};
   1087         $diffHash{isSvn} = $headerHashRef->{isSvn} if $headerHashRef->{isSvn};
   1088         if (!$headerHashRef->{copiedFromPath}) {
   1089             # If the file was copied, then we have already incorporated the
   1090             # sourceRevision information into the change.
   1091             $diffHash{sourceRevision} = $headerHashRef->{sourceRevision} if $headerHashRef->{sourceRevision};
   1092         }
   1093         # FIXME: Remove the need for svnConvertedText.  See the %diffHash
   1094         #        code comments above for more information.
   1095         #
   1096         # Note, we may not always have SVN converted text since we intend
   1097         # to deprecate it in the future.  For example, a property change
   1098         # diff for a file that only has property changes will not return
   1099         # any SVN converted text.
   1100         $diffHash{svnConvertedText} = $svnText if $svnText;
   1101         $diffHash{numTextChunks} = $numTextChunks if $svnText && !$headerHashRef->{isBinary};
   1102         push @diffHashRefs, \%diffHash;
   1103     }
   1104 
   1105     if (!%$headerHashRef && $svnPropertiesHashRef) {
   1106         # A property change diff for a file that only has property changes.
   1107         my %propertyChangeHash;
   1108         $propertyChangeHash{executableBitDelta} = $svnPropertiesHashRef->{executableBitDelta} if $svnPropertiesHashRef->{executableBitDelta};
   1109         $propertyChangeHash{indexPath} = $svnPropertiesHashRef->{propertyPath};
   1110         $propertyChangeHash{isSvn} = 1;
   1111         push @diffHashRefs, \%propertyChangeHash;
   1112     }
   1113 
   1114     return (\@diffHashRefs, $line);
   1115 }
   1116 
   1117 # Parse an SVN property change diff from the given file handle, and advance
   1118 # the handle so the last line read is the first line after this diff.
   1119 #
   1120 # For the case of an SVN binary diff, the binary contents will follow the
   1121 # the property changes.
   1122 #
   1123 # This subroutine dies if the first line does not begin with "Property changes on"
   1124 # or if the separator line that follows this line is missing.
   1125 #
   1126 # Args:
   1127 #   $fileHandle: advanced so the last line read from the handle is the first
   1128 #                line of the footer to parse.  This line begins with
   1129 #                "Property changes on".
   1130 #   $line: the line last read from $fileHandle.
   1131 #
   1132 # Returns ($propertyHashRef, $lastReadLine):
   1133 #   $propertyHashRef: a hash reference representing an SVN diff footer.
   1134 #     propertyPath: the path of the target file.
   1135 #     executableBitDelta: the value 1 or -1 if the executable bit was added or
   1136 #                         removed from the target file, respectively.
   1137 #   $lastReadLine: the line last read from $fileHandle.
   1138 sub parseSvnDiffProperties($$)
   1139 {
   1140     my ($fileHandle, $line) = @_;
   1141 
   1142     $_ = $line;
   1143 
   1144     my %footer;
   1145     if (/$svnPropertiesStartRegEx/) {
   1146         $footer{propertyPath} = $1;
   1147     } else {
   1148         die("Failed to find start of SVN property change, \"Property changes on \": \"$_\"");
   1149     }
   1150 
   1151     # We advance $fileHandle two lines so that the next line that
   1152     # we process is $svnPropertyStartRegEx in a well-formed footer.
   1153     # A well-formed footer has the form:
   1154     # Property changes on: FileA
   1155     # ___________________________________________________________________
   1156     # Added: svn:executable
   1157     #    + *
   1158     $_ = <$fileHandle>; # Not defined if end-of-file reached.
   1159     my $separator = "_" x 67;
   1160     if (defined($_) && /^$separator[\r\n]+$/) {
   1161         $_ = <$fileHandle>;
   1162     } else {
   1163         die("Failed to find separator line: \"$_\".");
   1164     }
   1165 
   1166     # FIXME: We should expand this to support other SVN properties
   1167     #        (e.g. return a hash of property key-values that represents
   1168     #        all properties).
   1169     #
   1170     # Notice, we keep processing until we hit end-of-file or some
   1171     # line that does not resemble $svnPropertyStartRegEx, such as
   1172     # the empty line that precedes the start of the binary contents
   1173     # of a patch, or the start of the next diff (e.g. "Index:").
   1174     my $propertyHashRef;
   1175     while (defined($_) && /$svnPropertyStartRegEx/) {
   1176         ($propertyHashRef, $_) = parseSvnProperty($fileHandle, $_);
   1177         if ($propertyHashRef->{name} eq "svn:executable") {
   1178             # Notice, for SVN properties, propertyChangeDelta is always non-zero
   1179             # because a property can only be added or removed.
   1180             $footer{executableBitDelta} = $propertyHashRef->{propertyChangeDelta};
   1181         }
   1182     }
   1183 
   1184     return(\%footer, $_);
   1185 }
   1186 
   1187 # Parse the next SVN property from the given file handle, and advance the handle so the last
   1188 # line read is the first line after the property.
   1189 #
   1190 # This subroutine dies if the first line is not a valid start of an SVN property,
   1191 # or the property is missing a value, or the property change type (e.g. "Added")
   1192 # does not correspond to the property value type (e.g. "+").
   1193 #
   1194 # Args:
   1195 #   $fileHandle: advanced so the last line read from the handle is the first
   1196 #                line of the property to parse.  This should be a line
   1197 #                that matches $svnPropertyStartRegEx.
   1198 #   $line: the line last read from $fileHandle.
   1199 #
   1200 # Returns ($propertyHashRef, $lastReadLine):
   1201 #   $propertyHashRef: a hash reference representing a SVN property.
   1202 #     name: the name of the property.
   1203 #     value: the last property value.  For instance, suppose the property is "Modified".
   1204 #            Then it has both a '-' and '+' property value in that order.  Therefore,
   1205 #            the value of this key is the value of the '+' property by ordering (since
   1206 #            it is the last value).
   1207 #     propertyChangeDelta: the value 1 or -1 if the property was added or
   1208 #                          removed, respectively.
   1209 #   $lastReadLine: the line last read from $fileHandle.
   1210 sub parseSvnProperty($$)
   1211 {
   1212     my ($fileHandle, $line) = @_;
   1213 
   1214     $_ = $line;
   1215 
   1216     my $propertyName;
   1217     my $propertyChangeType;
   1218     if (/$svnPropertyStartRegEx/) {
   1219         $propertyChangeType = $1;
   1220         $propertyName = $2;
   1221     } else {
   1222         die("Failed to find SVN property: \"$_\".");
   1223     }
   1224 
   1225     $_ = <$fileHandle>; # Not defined if end-of-file reached.
   1226 
   1227     if (defined($_) && defined(parseChunkRange($_, "##"))) {
   1228         # FIXME: We should validate the chunk range line that is part of an SVN 1.7
   1229         #        property diff. For now, we ignore this line.
   1230         $_ = <$fileHandle>;
   1231     }
   1232 
   1233     # The "svn diff" command neither inserts newline characters between property values
   1234     # nor between successive properties.
   1235     #
   1236     # As of SVN 1.7, "svn diff" may insert "\ No newline at end of property" after a
   1237     # property value that doesn't end in a newline.
   1238     #
   1239     # FIXME: We do not support property values that contain tailing newline characters
   1240     #        as it is difficult to disambiguate these trailing newlines from the empty
   1241     #        line that precedes the contents of a binary patch.
   1242     my $propertyValue;
   1243     my $propertyValueType;
   1244     while (defined($_) && /$svnPropertyValueStartRegEx/) {
   1245         # Note, a '-' property may be followed by a '+' property in the case of a "Modified"
   1246         # or "Name" property.  We only care about the ending value (i.e. the '+' property)
   1247         # in such circumstances.  So, we take the property value for the property to be its
   1248         # last parsed property value.
   1249         #
   1250         # FIXME: We may want to consider strictly enforcing a '-', '+' property ordering or
   1251         #        add error checking to prevent '+', '+', ..., '+' and other invalid combinations.
   1252         $propertyValueType = $1;
   1253         ($propertyValue, $_) = parseSvnPropertyValue($fileHandle, $_);
   1254         $_ = <$fileHandle> if defined($_) && /$svnPropertyValueNoNewlineRegEx/;
   1255     }
   1256 
   1257     if (!$propertyValue) {
   1258         die("Failed to find the property value for the SVN property \"$propertyName\": \"$_\".");
   1259     }
   1260 
   1261     my $propertyChangeDelta;
   1262     if ($propertyValueType eq "+" || $propertyValueType eq "Merged") {
   1263         $propertyChangeDelta = 1;
   1264     } elsif ($propertyValueType eq "-" || $propertyValueType eq "Reverse-merged") {
   1265         $propertyChangeDelta = -1;
   1266     } else {
   1267         die("Not reached.");
   1268     }
   1269 
   1270     # We perform a simple validation that an "Added" or "Deleted" property
   1271     # change type corresponds with a "+" and "-" value type, respectively.
   1272     my $expectedChangeDelta;
   1273     if ($propertyChangeType eq "Added") {
   1274         $expectedChangeDelta = 1;
   1275     } elsif ($propertyChangeType eq "Deleted") {
   1276         $expectedChangeDelta = -1;
   1277     }
   1278 
   1279     if ($expectedChangeDelta && $propertyChangeDelta != $expectedChangeDelta) {
   1280         die("The final property value type found \"$propertyValueType\" does not " .
   1281             "correspond to the property change type found \"$propertyChangeType\".");
   1282     }
   1283 
   1284     my %propertyHash;
   1285     $propertyHash{name} = $propertyName;
   1286     $propertyHash{propertyChangeDelta} = $propertyChangeDelta;
   1287     $propertyHash{value} = $propertyValue;
   1288     return (\%propertyHash, $_);
   1289 }
   1290 
   1291 # Parse the value of an SVN property from the given file handle, and advance
   1292 # the handle so the last line read is the first line after the property value.
   1293 #
   1294 # This subroutine dies if the first line is an invalid SVN property value line
   1295 # (i.e. a line that does not begin with "   +" or "   -").
   1296 #
   1297 # Args:
   1298 #   $fileHandle: advanced so the last line read from the handle is the first
   1299 #                line of the property value to parse.  This should be a line
   1300 #                beginning with "   +" or "   -".
   1301 #   $line: the line last read from $fileHandle.
   1302 #
   1303 # Returns ($propertyValue, $lastReadLine):
   1304 #   $propertyValue: the value of the property.
   1305 #   $lastReadLine: the line last read from $fileHandle.
   1306 sub parseSvnPropertyValue($$)
   1307 {
   1308     my ($fileHandle, $line) = @_;
   1309 
   1310     $_ = $line;
   1311 
   1312     my $propertyValue;
   1313     my $eol;
   1314     if (/$svnPropertyValueStartRegEx/) {
   1315         $propertyValue = $2; # Does not include the end-of-line character(s).
   1316         $eol = $POSTMATCH;
   1317     } else {
   1318         die("Failed to find property value beginning with '+', '-', 'Merged', or 'Reverse-merged': \"$_\".");
   1319     }
   1320 
   1321     while (<$fileHandle>) {
   1322         if (/^[\r\n]+$/ || /$svnPropertyValueStartRegEx/ || /$svnPropertyStartRegEx/ || /$svnPropertyValueNoNewlineRegEx/) {
   1323             # Note, we may encounter an empty line before the contents of a binary patch.
   1324             # Also, we check for $svnPropertyValueStartRegEx because a '-' property may be
   1325             # followed by a '+' property in the case of a "Modified" or "Name" property.
   1326             # We check for $svnPropertyStartRegEx because it indicates the start of the
   1327             # next property to parse.
   1328             last;
   1329         }
   1330 
   1331         # Temporarily strip off any end-of-line characters. We add the end-of-line characters
   1332         # from the previously processed line to the start of this line so that the last line
   1333         # of the property value does not end in end-of-line characters.
   1334         s/([\n\r]+)$//;
   1335         $propertyValue .= "$eol$_";
   1336         $eol = $1;
   1337     }
   1338 
   1339     return ($propertyValue, $_);
   1340 }
   1341 
   1342 # Parse a patch file created by svn-create-patch.
   1343 #
   1344 # Args:
   1345 #   $fileHandle: A file handle to the patch file that has not yet been
   1346 #                read from.
   1347 #   $optionsHashRef: a hash reference representing optional options to use
   1348 #                    when processing a diff.
   1349 #     shouldNotUseIndexPathEOL: whether to use the line endings in the diff instead
   1350 #                               instead of the line endings in the target file; the
   1351 #                               value of 1 if svnConvertedText should use the line
   1352 #                               endings in the diff.
   1353 #
   1354 # Returns:
   1355 #   @diffHashRefs: an array of diff hash references.
   1356 #                  See the %diffHash documentation above.
   1357 sub parsePatch($;$)
   1358 {
   1359     my ($fileHandle, $optionsHashRef) = @_;
   1360 
   1361     my $newDiffHashRefs;
   1362     my @diffHashRefs; # return value
   1363 
   1364     my $line = <$fileHandle>;
   1365 
   1366     while (defined($line)) { # Otherwise, at EOF.
   1367 
   1368         ($newDiffHashRefs, $line) = parseDiff($fileHandle, $line, $optionsHashRef);
   1369 
   1370         push @diffHashRefs, @$newDiffHashRefs;
   1371     }
   1372 
   1373     return @diffHashRefs;
   1374 }
   1375 
   1376 # Prepare the results of parsePatch() for use in svn-apply and svn-unapply.
   1377 #
   1378 # Args:
   1379 #   $shouldForce: Whether to continue processing if an unexpected
   1380 #                 state occurs.
   1381 #   @diffHashRefs: An array of references to %diffHashes.
   1382 #                  See the %diffHash documentation above.
   1383 #
   1384 # Returns $preparedPatchHashRef:
   1385 #   copyDiffHashRefs: A reference to an array of the $diffHashRefs in
   1386 #                     @diffHashRefs that represent file copies. The original
   1387 #                     ordering is preserved.
   1388 #   nonCopyDiffHashRefs: A reference to an array of the $diffHashRefs in
   1389 #                        @diffHashRefs that do not represent file copies.
   1390 #                        The original ordering is preserved.
   1391 #   sourceRevisionHash: A reference to a hash of source path to source
   1392 #                       revision number.
   1393 sub prepareParsedPatch($@)
   1394 {
   1395     my ($shouldForce, @diffHashRefs) = @_;
   1396 
   1397     my %copiedFiles;
   1398 
   1399     # Return values
   1400     my @copyDiffHashRefs = ();
   1401     my @nonCopyDiffHashRefs = ();
   1402     my %sourceRevisionHash = ();
   1403     for my $diffHashRef (@diffHashRefs) {
   1404         my $copiedFromPath = $diffHashRef->{copiedFromPath};
   1405         my $indexPath = $diffHashRef->{indexPath};
   1406         my $sourceRevision = $diffHashRef->{sourceRevision};
   1407         my $sourcePath;
   1408 
   1409         if (defined($copiedFromPath)) {
   1410             # Then the diff is a copy operation.
   1411             $sourcePath = $copiedFromPath;
   1412 
   1413             # FIXME: Consider printing a warning or exiting if
   1414             #        exists($copiedFiles{$indexPath}) is true -- i.e. if
   1415             #        $indexPath appears twice as a copy target.
   1416             $copiedFiles{$indexPath} = $sourcePath;
   1417 
   1418             push @copyDiffHashRefs, $diffHashRef;
   1419         } else {
   1420             # Then the diff is not a copy operation.
   1421             $sourcePath = $indexPath;
   1422 
   1423             push @nonCopyDiffHashRefs, $diffHashRef;
   1424         }
   1425 
   1426         if (defined($sourceRevision)) {
   1427             if (exists($sourceRevisionHash{$sourcePath}) &&
   1428                 ($sourceRevisionHash{$sourcePath} != $sourceRevision)) {
   1429                 if (!$shouldForce) {
   1430                     die "Two revisions of the same file required as a source:\n".
   1431                         "    $sourcePath:$sourceRevisionHash{$sourcePath}\n".
   1432                         "    $sourcePath:$sourceRevision";
   1433                 }
   1434             }
   1435             $sourceRevisionHash{$sourcePath} = $sourceRevision;
   1436         }
   1437     }
   1438 
   1439     my %preparedPatchHash;
   1440 
   1441     $preparedPatchHash{copyDiffHashRefs} = \@copyDiffHashRefs;
   1442     $preparedPatchHash{nonCopyDiffHashRefs} = \@nonCopyDiffHashRefs;
   1443     $preparedPatchHash{sourceRevisionHash} = \%sourceRevisionHash;
   1444 
   1445     return \%preparedPatchHash;
   1446 }
   1447 
   1448 # Return localtime() for the project's time zone, given an integer time as
   1449 # returned by Perl's time() function.
   1450 sub localTimeInProjectTimeZone($)
   1451 {
   1452     my $epochTime = shift;
   1453 
   1454     # Change the time zone temporarily for the localtime() call.
   1455     my $savedTimeZone = $ENV{'TZ'};
   1456     $ENV{'TZ'} = $changeLogTimeZone;
   1457     my @localTime = localtime($epochTime);
   1458     if (defined $savedTimeZone) {
   1459          $ENV{'TZ'} = $savedTimeZone;
   1460     } else {
   1461          delete $ENV{'TZ'};
   1462     }
   1463 
   1464     return @localTime;
   1465 }
   1466 
   1467 # Set the reviewer and date in a ChangeLog patch, and return the new patch.
   1468 #
   1469 # Args:
   1470 #   $patch: a ChangeLog patch as a string.
   1471 #   $reviewer: the name of the reviewer, or undef if the reviewer should not be set.
   1472 #   $epochTime: an integer time as returned by Perl's time() function.
   1473 sub setChangeLogDateAndReviewer($$$)
   1474 {
   1475     my ($patch, $reviewer, $epochTime) = @_;
   1476 
   1477     my @localTime = localTimeInProjectTimeZone($epochTime);
   1478     my $newDate = strftime("%Y-%m-%d", @localTime);
   1479 
   1480     my $firstChangeLogLineRegEx = qr#(\n\+)\d{4}-[^-]{2}-[^-]{2}(  )#;
   1481     $patch =~ s/$firstChangeLogLineRegEx/$1$newDate$2/;
   1482 
   1483     if (defined($reviewer)) {
   1484         # We include a leading plus ("+") in the regular expression to make
   1485         # the regular expression less likely to match text in the leading junk
   1486         # for the patch, if the patch has leading junk.
   1487         $patch =~ s/(\n\+.*)NOBODY \(OOPS!\)/$1$reviewer/;
   1488     }
   1489 
   1490     return $patch;
   1491 }
   1492 
   1493 # If possible, returns a ChangeLog patch equivalent to the given one,
   1494 # but with the newest ChangeLog entry inserted at the top of the
   1495 # file -- i.e. no leading context and all lines starting with "+".
   1496 #
   1497 # If given a patch string not representable as a patch with the above
   1498 # properties, it returns the input back unchanged.
   1499 #
   1500 # WARNING: This subroutine can return an inequivalent patch string if
   1501 # both the beginning of the new ChangeLog file matches the beginning
   1502 # of the source ChangeLog, and the source beginning was modified.
   1503 # Otherwise, it is guaranteed to return an equivalent patch string,
   1504 # if it returns.
   1505 #
   1506 # Applying this subroutine to ChangeLog patches allows svn-apply to
   1507 # insert new ChangeLog entries at the top of the ChangeLog file.
   1508 # svn-apply uses patch with --fuzz=3 to do this. We need to apply
   1509 # this subroutine because the diff(1) command is greedy when matching
   1510 # lines. A new ChangeLog entry with the same date and author as the
   1511 # previous will match and cause the diff to have lines of starting
   1512 # context.
   1513 #
   1514 # This subroutine has unit tests in VCSUtils_unittest.pl.
   1515 #
   1516 # Returns $changeLogHashRef:
   1517 #   $changeLogHashRef: a hash reference representing a change log patch.
   1518 #     patch: a ChangeLog patch equivalent to the given one, but with the
   1519 #            newest ChangeLog entry inserted at the top of the file, if possible.
   1520 sub fixChangeLogPatch($)
   1521 {
   1522     my $patch = shift; # $patch will only contain patch fragments for ChangeLog.
   1523 
   1524     $patch =~ s|test_expectations.txt:|TestExpectations:|g;
   1525 
   1526     $patch =~ /(\r?\n)/;
   1527     my $lineEnding = $1;
   1528     my @lines = split(/$lineEnding/, $patch);
   1529 
   1530     my $i = 0; # We reuse the same index throughout.
   1531 
   1532     # Skip to beginning of first chunk.
   1533     for (; $i < @lines; ++$i) {
   1534         if (substr($lines[$i], 0, 1) eq "@") {
   1535             last;
   1536         }
   1537     }
   1538     my $chunkStartIndex = ++$i;
   1539     my %changeLogHashRef;
   1540 
   1541     # Optimization: do not process if new lines already begin the chunk.
   1542     if (substr($lines[$i], 0, 1) eq "+") {
   1543         $changeLogHashRef{patch} = $patch;
   1544         return \%changeLogHashRef;
   1545     }
   1546 
   1547     # Skip to first line of newly added ChangeLog entry.
   1548     # For example, +2009-06-03  Eric Seidel  <eric (at] webkit.org>
   1549     my $dateStartRegEx = '^\+(\d{4}-\d{2}-\d{2})' # leading "+" and date
   1550                          . '\s+(.+)\s+' # name
   1551                          . '<([^<>]+)>$'; # e-mail address
   1552 
   1553     for (; $i < @lines; ++$i) {
   1554         my $line = $lines[$i];
   1555         my $firstChar = substr($line, 0, 1);
   1556         if ($line =~ /$dateStartRegEx/) {
   1557             last;
   1558         } elsif ($firstChar eq " " or $firstChar eq "+") {
   1559             next;
   1560         }
   1561         $changeLogHashRef{patch} = $patch; # Do not change if, for example, "-" or "@" found.
   1562         return \%changeLogHashRef;
   1563     }
   1564     if ($i >= @lines) {
   1565         $changeLogHashRef{patch} = $patch; # Do not change if date not found.
   1566         return \%changeLogHashRef;
   1567     }
   1568     my $dateStartIndex = $i;
   1569 
   1570     # Rewrite overlapping lines to lead with " ".
   1571     my @overlappingLines = (); # These will include a leading "+".
   1572     for (; $i < @lines; ++$i) {
   1573         my $line = $lines[$i];
   1574         if (substr($line, 0, 1) ne "+") {
   1575           last;
   1576         }
   1577         push(@overlappingLines, $line);
   1578         $lines[$i] = " " . substr($line, 1);
   1579     }
   1580 
   1581     # Remove excess ending context, if necessary.
   1582     my $shouldTrimContext = 1;
   1583     for (; $i < @lines; ++$i) {
   1584         my $firstChar = substr($lines[$i], 0, 1);
   1585         if ($firstChar eq " ") {
   1586             next;
   1587         } elsif ($firstChar eq "@") {
   1588             last;
   1589         }
   1590         $shouldTrimContext = 0; # For example, if "+" or "-" encountered.
   1591         last;
   1592     }
   1593     my $deletedLineCount = 0;
   1594     if ($shouldTrimContext) { # Also occurs if end of file reached.
   1595         splice(@lines, $i - @overlappingLines, @overlappingLines);
   1596         $deletedLineCount = @overlappingLines;
   1597     }
   1598 
   1599     # Work backwards, shifting overlapping lines towards front
   1600     # while checking that patch stays equivalent.
   1601     for ($i = $dateStartIndex - 1; @overlappingLines && $i >= $chunkStartIndex; --$i) {
   1602         my $line = $lines[$i];
   1603         if (substr($line, 0, 1) ne " ") {
   1604             next;
   1605         }
   1606         my $text = substr($line, 1);
   1607         my $newLine = pop(@overlappingLines);
   1608         if ($text ne substr($newLine, 1)) {
   1609             $changeLogHashRef{patch} = $patch; # Unexpected difference.
   1610             return \%changeLogHashRef;
   1611         }
   1612         $lines[$i] = "+$text";
   1613     }
   1614 
   1615     # If @overlappingLines > 0, this is where we make use of the
   1616     # assumption that the beginning of the source file was not modified.
   1617     splice(@lines, $chunkStartIndex, 0, @overlappingLines);
   1618 
   1619     # Update the date start index as it may have changed after shifting
   1620     # the overlapping lines towards the front.
   1621     for ($i = $chunkStartIndex; $i < $dateStartIndex; ++$i) {
   1622         $dateStartIndex = $i if $lines[$i] =~ /$dateStartRegEx/;
   1623     }
   1624     splice(@lines, $chunkStartIndex, $dateStartIndex - $chunkStartIndex); # Remove context of later entry.
   1625     $deletedLineCount += $dateStartIndex - $chunkStartIndex;
   1626 
   1627     # Update the initial chunk range.
   1628     my $chunkRangeHashRef = parseChunkRange($lines[$chunkStartIndex - 1]);
   1629     if (!$chunkRangeHashRef) {
   1630         # FIXME: Handle errors differently from ChangeLog files that
   1631         # are okay but should not be altered. That way we can find out
   1632         # if improvements to the script ever become necessary.
   1633         $changeLogHashRef{patch} = $patch; # Error: unexpected patch string format.
   1634         return \%changeLogHashRef;
   1635     }
   1636     my $oldSourceLineCount = $chunkRangeHashRef->{lineCount};
   1637     my $oldTargetLineCount = $chunkRangeHashRef->{newLineCount};
   1638 
   1639     my $sourceLineCount = $oldSourceLineCount + @overlappingLines - $deletedLineCount;
   1640     my $targetLineCount = $oldTargetLineCount + @overlappingLines - $deletedLineCount;
   1641     $lines[$chunkStartIndex - 1] = "@@ -1,$sourceLineCount +1,$targetLineCount @@";
   1642 
   1643     $changeLogHashRef{patch} = join($lineEnding, @lines) . "\n"; # patch(1) expects an extra trailing newline.
   1644     return \%changeLogHashRef;
   1645 }
   1646 
   1647 # This is a supporting method for runPatchCommand.
   1648 #
   1649 # Arg: the optional $args parameter passed to runPatchCommand (can be undefined).
   1650 #
   1651 # Returns ($patchCommand, $isForcing).
   1652 #
   1653 # This subroutine has unit tests in VCSUtils_unittest.pl.
   1654 sub generatePatchCommand($)
   1655 {
   1656     my ($passedArgsHashRef) = @_;
   1657 
   1658     my $argsHashRef = { # Defaults
   1659         ensureForce => 0,
   1660         shouldReverse => 0,
   1661         options => []
   1662     };
   1663 
   1664     # Merges hash references. It's okay here if passed hash reference is undefined.
   1665     @{$argsHashRef}{keys %{$passedArgsHashRef}} = values %{$passedArgsHashRef};
   1666 
   1667     my $ensureForce = $argsHashRef->{ensureForce};
   1668     my $shouldReverse = $argsHashRef->{shouldReverse};
   1669     my $options = $argsHashRef->{options};
   1670 
   1671     if (! $options) {
   1672         $options = [];
   1673     } else {
   1674         $options = [@{$options}]; # Copy to avoid side effects.
   1675     }
   1676 
   1677     my $isForcing = 0;
   1678     if (grep /^--force$/, @{$options}) {
   1679         $isForcing = 1;
   1680     } elsif ($ensureForce) {
   1681         push @{$options}, "--force";
   1682         $isForcing = 1;
   1683     }
   1684 
   1685     if ($shouldReverse) { # No check: --reverse should never be passed explicitly.
   1686         push @{$options}, "--reverse";
   1687     }
   1688 
   1689     @{$options} = sort(@{$options}); # For easier testing.
   1690 
   1691     my $patchCommand = join(" ", "patch -p0", @{$options});
   1692 
   1693     return ($patchCommand, $isForcing);
   1694 }
   1695 
   1696 # Apply the given patch using the patch(1) command.
   1697 #
   1698 # On success, return the resulting exit status. Otherwise, exit with the
   1699 # exit status. If "--force" is passed as an option, however, then never
   1700 # exit and always return the exit status.
   1701 #
   1702 # Args:
   1703 #   $patch: a patch string.
   1704 #   $repositoryRootPath: an absolute path to the repository root.
   1705 #   $pathRelativeToRoot: the path of the file to be patched, relative to the
   1706 #                        repository root. This should normally be the path
   1707 #                        found in the patch's "Index:" line. It is passed
   1708 #                        explicitly rather than reparsed from the patch
   1709 #                        string for optimization purposes.
   1710 #                            This is used only for error reporting. The
   1711 #                        patch command gleans the actual file to patch
   1712 #                        from the patch string.
   1713 #   $args: a reference to a hash of optional arguments. The possible
   1714 #          keys are --
   1715 #            ensureForce: whether to ensure --force is passed (defaults to 0).
   1716 #            shouldReverse: whether to pass --reverse (defaults to 0).
   1717 #            options: a reference to an array of options to pass to the
   1718 #                     patch command. The subroutine passes the -p0 option
   1719 #                     no matter what. This should not include --reverse.
   1720 #
   1721 # This subroutine has unit tests in VCSUtils_unittest.pl.
   1722 sub runPatchCommand($$$;$)
   1723 {
   1724     my ($patch, $repositoryRootPath, $pathRelativeToRoot, $args) = @_;
   1725 
   1726     my ($patchCommand, $isForcing) = generatePatchCommand($args);
   1727 
   1728     # Temporarily change the working directory since the path found
   1729     # in the patch's "Index:" line is relative to the repository root
   1730     # (i.e. the same as $pathRelativeToRoot).
   1731     my $cwd = Cwd::getcwd();
   1732     chdir $repositoryRootPath;
   1733 
   1734     open PATCH, "| $patchCommand" or die "Could not call \"$patchCommand\" for file \"$pathRelativeToRoot\": $!";
   1735     print PATCH $patch;
   1736     close PATCH;
   1737     my $exitStatus = exitStatus($?);
   1738 
   1739     chdir $cwd;
   1740 
   1741     if ($exitStatus && !$isForcing) {
   1742         print "Calling \"$patchCommand\" for file \"$pathRelativeToRoot\" returned " .
   1743               "status $exitStatus.  Pass --force to ignore patch failures.\n";
   1744         exit $exitStatus;
   1745     }
   1746 
   1747     return $exitStatus;
   1748 }
   1749 
   1750 # Merge ChangeLog patches using a three-file approach.
   1751 #
   1752 # This is used by resolve-ChangeLogs when it's operated as a merge driver
   1753 # and when it's used to merge conflicts after a patch is applied or after
   1754 # an svn update.
   1755 #
   1756 # It's also used for traditional rejected patches.
   1757 #
   1758 # Args:
   1759 #   $fileMine:  The merged version of the file.  Also known in git as the
   1760 #               other branch's version (%B) or "ours".
   1761 #               For traditional patch rejects, this is the *.rej file.
   1762 #   $fileOlder: The base version of the file.  Also known in git as the
   1763 #               ancestor version (%O) or "base".
   1764 #               For traditional patch rejects, this is the *.orig file.
   1765 #   $fileNewer: The current version of the file.  Also known in git as the
   1766 #               current version (%A) or "theirs".
   1767 #               For traditional patch rejects, this is the original-named
   1768 #               file.
   1769 #
   1770 # Returns 1 if merge was successful, else 0.
   1771 sub mergeChangeLogs($$$)
   1772 {
   1773     my ($fileMine, $fileOlder, $fileNewer) = @_;
   1774 
   1775     my $traditionalReject = $fileMine =~ /\.rej$/ ? 1 : 0;
   1776 
   1777     local $/ = undef;
   1778 
   1779     my $patch;
   1780     if ($traditionalReject) {
   1781         open(DIFF, "<", $fileMine) or die $!;
   1782         $patch = <DIFF>;
   1783         close(DIFF);
   1784         rename($fileMine, "$fileMine.save");
   1785         rename($fileOlder, "$fileOlder.save");
   1786     } else {
   1787         open(DIFF, "diff -u -a --binary \"$fileOlder\" \"$fileMine\" |") or die $!;
   1788         $patch = <DIFF>;
   1789         close(DIFF);
   1790     }
   1791 
   1792     unlink("${fileNewer}.orig");
   1793     unlink("${fileNewer}.rej");
   1794 
   1795     open(PATCH, "| patch --force --fuzz=3 --binary \"$fileNewer\" > " . File::Spec->devnull()) or die $!;
   1796     if ($traditionalReject) {
   1797         print PATCH $patch;
   1798     } else {
   1799         my $changeLogHash = fixChangeLogPatch($patch);
   1800         print PATCH $changeLogHash->{patch};
   1801     }
   1802     close(PATCH);
   1803 
   1804     my $result = !exitStatus($?);
   1805 
   1806     # Refuse to merge the patch if it did not apply cleanly
   1807     if (-e "${fileNewer}.rej") {
   1808         unlink("${fileNewer}.rej");
   1809         if (-f "${fileNewer}.orig") {
   1810             unlink($fileNewer);
   1811             rename("${fileNewer}.orig", $fileNewer);
   1812         }
   1813     } else {
   1814         unlink("${fileNewer}.orig");
   1815     }
   1816 
   1817     if ($traditionalReject) {
   1818         rename("$fileMine.save", $fileMine);
   1819         rename("$fileOlder.save", $fileOlder);
   1820     }
   1821 
   1822     return $result;
   1823 }
   1824 
   1825 sub gitConfig($)
   1826 {
   1827     return unless $isGit;
   1828 
   1829     my ($config) = @_;
   1830 
   1831     my $result = `git config $config`;
   1832     chomp $result;
   1833     return $result;
   1834 }
   1835 
   1836 sub changeLogSuffix()
   1837 {
   1838     my $rootPath = determineVCSRoot();
   1839     my $changeLogSuffixFile = File::Spec->catfile($rootPath, ".changeLogSuffix");
   1840     return "" if ! -e $changeLogSuffixFile;
   1841     open FILE, $changeLogSuffixFile or die "Could not open $changeLogSuffixFile: $!";
   1842     my $changeLogSuffix = <FILE>;
   1843     chomp $changeLogSuffix;
   1844     close FILE;
   1845     return $changeLogSuffix;
   1846 }
   1847 
   1848 sub changeLogFileName()
   1849 {
   1850     return "ChangeLog" . changeLogSuffix()
   1851 }
   1852 
   1853 sub changeLogNameError($)
   1854 {
   1855     my ($message) = @_;
   1856     print STDERR "$message\nEither:\n";
   1857     print STDERR "  set CHANGE_LOG_NAME in your environment\n";
   1858     print STDERR "  OR pass --name= on the command line\n";
   1859     print STDERR "  OR set REAL_NAME in your environment";
   1860     print STDERR "  OR git users can set 'git config user.name'\n";
   1861     exit(1);
   1862 }
   1863 
   1864 sub changeLogName()
   1865 {
   1866     my $name = $ENV{CHANGE_LOG_NAME} || $ENV{REAL_NAME} || gitConfig("user.name") || (split /\s*,\s*/, (getpwuid $<)[6])[0];
   1867 
   1868     changeLogNameError("Failed to determine ChangeLog name.") unless $name;
   1869     # getpwuid seems to always succeed on windows, returning the username instead of the full name.  This check will catch that case.
   1870     changeLogNameError("'$name' does not contain a space!  ChangeLogs should contain your full name.") unless ($name =~ /\S\s\S/);
   1871 
   1872     return $name;
   1873 }
   1874 
   1875 sub changeLogEmailAddressError($)
   1876 {
   1877     my ($message) = @_;
   1878     print STDERR "$message\nEither:\n";
   1879     print STDERR "  set CHANGE_LOG_EMAIL_ADDRESS in your environment\n";
   1880     print STDERR "  OR pass --email= on the command line\n";
   1881     print STDERR "  OR set EMAIL_ADDRESS in your environment\n";
   1882     print STDERR "  OR git users can set 'git config user.email'\n";
   1883     exit(1);
   1884 }
   1885 
   1886 sub changeLogEmailAddress()
   1887 {
   1888     my $emailAddress = $ENV{CHANGE_LOG_EMAIL_ADDRESS} || $ENV{EMAIL_ADDRESS} || gitConfig("user.email");
   1889 
   1890     changeLogEmailAddressError("Failed to determine email address for ChangeLog.") unless $emailAddress;
   1891     changeLogEmailAddressError("Email address '$emailAddress' does not contain '\@' and is likely invalid.") unless ($emailAddress =~ /\@/);
   1892 
   1893     return $emailAddress;
   1894 }
   1895 
   1896 # http://tools.ietf.org/html/rfc1924
   1897 sub decodeBase85($)
   1898 {
   1899     my ($encoded) = @_;
   1900     my %table;
   1901     my @characters = ('0'..'9', 'A'..'Z', 'a'..'z', '!', '#', '$', '%', '&', '(', ')', '*', '+', '-', ';', '<', '=', '>', '?', '@', '^', '_', '`', '{', '|', '}', '~');
   1902     for (my $i = 0; $i < 85; $i++) {
   1903         $table{$characters[$i]} = $i;
   1904     }
   1905 
   1906     my $decoded = '';
   1907     my @encodedChars = $encoded =~ /./g;
   1908 
   1909     for (my $encodedIter = 0; defined($encodedChars[$encodedIter]);) {
   1910         my $digit = 0;
   1911         for (my $i = 0; $i < 5; $i++) {
   1912             $digit *= 85;
   1913             my $char = $encodedChars[$encodedIter];
   1914             $digit += $table{$char};
   1915             $encodedIter++;
   1916         }
   1917 
   1918         for (my $i = 0; $i < 4; $i++) {
   1919             $decoded .= chr(($digit >> (3 - $i) * 8) & 255);
   1920         }
   1921     }
   1922 
   1923     return $decoded;
   1924 }
   1925 
   1926 sub decodeGitBinaryChunk($$)
   1927 {
   1928     my ($contents, $fullPath) = @_;
   1929 
   1930     # Load this module lazily in case the user don't have this module
   1931     # and won't handle git binary patches.
   1932     require Compress::Zlib;
   1933 
   1934     my $encoded = "";
   1935     my $compressedSize = 0;
   1936     while ($contents =~ /^([A-Za-z])(.*)$/gm) {
   1937         my $line = $2;
   1938         next if $line eq "";
   1939         die "$fullPath: unexpected size of a line: $&" if length($2) % 5 != 0;
   1940         my $actualSize = length($2) / 5 * 4;
   1941         my $encodedExpectedSize = ord($1);
   1942         my $expectedSize = $encodedExpectedSize <= ord("Z") ? $encodedExpectedSize - ord("A") + 1 : $encodedExpectedSize - ord("a") + 27;
   1943 
   1944         die "$fullPath: unexpected size of a line: $&" if int(($expectedSize + 3) / 4) * 4 != $actualSize;
   1945         $compressedSize += $expectedSize;
   1946         $encoded .= $line;
   1947     }
   1948 
   1949     my $compressed = decodeBase85($encoded);
   1950     $compressed = substr($compressed, 0, $compressedSize);
   1951     return Compress::Zlib::uncompress($compressed);
   1952 }
   1953 
   1954 sub decodeGitBinaryPatch($$)
   1955 {
   1956     my ($contents, $fullPath) = @_;
   1957 
   1958     # Git binary patch has two chunks. One is for the normal patching
   1959     # and another is for the reverse patching.
   1960     #
   1961     # Each chunk a line which starts from either "literal" or "delta",
   1962     # followed by a number which specifies decoded size of the chunk.
   1963     #
   1964     # Then, content of the chunk comes. To decode the content, we
   1965     # need decode it with base85 first, and then zlib.
   1966     my $gitPatchRegExp = '(literal|delta) ([0-9]+)\n([A-Za-z0-9!#$%&()*+-;<=>?@^_`{|}~\\n]*?)\n\n';
   1967     if ($contents !~ m"\nGIT binary patch\n$gitPatchRegExp$gitPatchRegExp\Z") {
   1968         die "$fullPath: unknown git binary patch format"
   1969     }
   1970 
   1971     my $binaryChunkType = $1;
   1972     my $binaryChunkExpectedSize = $2;
   1973     my $encodedChunk = $3;
   1974     my $reverseBinaryChunkType = $4;
   1975     my $reverseBinaryChunkExpectedSize = $5;
   1976     my $encodedReverseChunk = $6;
   1977 
   1978     my $binaryChunk = decodeGitBinaryChunk($encodedChunk, $fullPath);
   1979     my $binaryChunkActualSize = length($binaryChunk);
   1980     my $reverseBinaryChunk = decodeGitBinaryChunk($encodedReverseChunk, $fullPath);
   1981     my $reverseBinaryChunkActualSize = length($reverseBinaryChunk);
   1982 
   1983     die "$fullPath: unexpected size of the first chunk (expected $binaryChunkExpectedSize but was $binaryChunkActualSize" if ($binaryChunkType eq "literal" and $binaryChunkExpectedSize != $binaryChunkActualSize);
   1984     die "$fullPath: unexpected size of the second chunk (expected $reverseBinaryChunkExpectedSize but was $reverseBinaryChunkActualSize" if ($reverseBinaryChunkType eq "literal" and $reverseBinaryChunkExpectedSize != $reverseBinaryChunkActualSize);
   1985 
   1986     return ($binaryChunkType, $binaryChunk, $reverseBinaryChunkType, $reverseBinaryChunk);
   1987 }
   1988 
   1989 sub readByte($$)
   1990 {
   1991     my ($data, $location) = @_;
   1992 
   1993     # Return the byte at $location in $data as a numeric value.
   1994     return ord(substr($data, $location, 1));
   1995 }
   1996 
   1997 # The git binary delta format is undocumented, except in code:
   1998 # - https://github.com/git/git/blob/master/delta.h:get_delta_hdr_size is the source
   1999 #   of the algorithm in decodeGitBinaryPatchDeltaSize.
   2000 # - https://github.com/git/git/blob/master/patch-delta.c:patch_delta is the source
   2001 #   of the algorithm in applyGitBinaryPatchDelta.
   2002 sub decodeGitBinaryPatchDeltaSize($)
   2003 {
   2004     my ($binaryChunk) = @_;
   2005 
   2006     # Source and destination buffer sizes are stored in 7-bit chunks at the
   2007     # start of the binary delta patch data.  The highest bit in each byte
   2008     # except the last is set; the remaining 7 bits provide the next
   2009     # chunk of the size.  The chunks are stored in ascending significance
   2010     # order.
   2011     my $cmd;
   2012     my $size = 0;
   2013     my $shift = 0;
   2014     for (my $i = 0; $i < length($binaryChunk);) {
   2015         $cmd = readByte($binaryChunk, $i++);
   2016         $size |= ($cmd & 0x7f) << $shift;
   2017         $shift += 7;
   2018         if (!($cmd & 0x80)) {
   2019             return ($size, $i);
   2020         }
   2021     }
   2022 }
   2023 
   2024 sub applyGitBinaryPatchDelta($$)
   2025 {
   2026     my ($binaryChunk, $originalContents) = @_;
   2027 
   2028     # Git delta format consists of two headers indicating source buffer size
   2029     # and result size, then a series of commands.  Each command is either
   2030     # a copy-from-old-version (the 0x80 bit is set) or a copy-from-delta
   2031     # command.  Commands are applied sequentially to generate the result.
   2032     #
   2033     # A copy-from-old-version command encodes an offset and size to copy
   2034     # from in subsequent bits, while a copy-from-delta command consists only
   2035     # of the number of bytes to copy from the delta.
   2036 
   2037     # We don't use these values, but we need to know how big they are so that
   2038     # we can skip to the diff data.
   2039     my ($size, $bytesUsed) = decodeGitBinaryPatchDeltaSize($binaryChunk);
   2040     $binaryChunk = substr($binaryChunk, $bytesUsed);
   2041     ($size, $bytesUsed) = decodeGitBinaryPatchDeltaSize($binaryChunk);
   2042     $binaryChunk = substr($binaryChunk, $bytesUsed);
   2043 
   2044     my $out = "";
   2045     for (my $i = 0; $i < length($binaryChunk); ) {
   2046         my $cmd = ord(substr($binaryChunk, $i++, 1));
   2047         if ($cmd & 0x80) {
   2048             # Extract an offset and size from the delta data, then copy
   2049             # $size bytes from $offset in the original data into the output.
   2050             my $offset = 0;
   2051             my $size = 0;
   2052             if ($cmd & 0x01) { $offset = readByte($binaryChunk, $i++); }
   2053             if ($cmd & 0x02) { $offset |= readByte($binaryChunk, $i++) << 8; }
   2054             if ($cmd & 0x04) { $offset |= readByte($binaryChunk, $i++) << 16; }
   2055             if ($cmd & 0x08) { $offset |= readByte($binaryChunk, $i++) << 24; }
   2056             if ($cmd & 0x10) { $size = readByte($binaryChunk, $i++); }
   2057             if ($cmd & 0x20) { $size |= readByte($binaryChunk, $i++) << 8; }
   2058             if ($cmd & 0x40) { $size |= readByte($binaryChunk, $i++) << 16; }
   2059             if ($size == 0) { $size = 0x10000; }
   2060             $out .= substr($originalContents, $offset, $size);
   2061         } elsif ($cmd) {
   2062             # Copy $cmd bytes from the delta data into the output.
   2063             $out .= substr($binaryChunk, $i, $cmd);
   2064             $i += $cmd;
   2065         } else {
   2066             die "unexpected delta opcode 0";
   2067         }
   2068     }
   2069 
   2070     return $out;
   2071 }
   2072 
   2073 sub escapeSubversionPath($)
   2074 {
   2075     my ($path) = @_;
   2076     $path .= "@" if $path =~ /@/;
   2077     return $path;
   2078 }
   2079 
   2080 sub runCommand(@)
   2081 {
   2082     my @args = @_;
   2083     my $pid = open(CHILD, "-|");
   2084     if (!defined($pid)) {
   2085         die "Failed to fork(): $!";
   2086     }
   2087     if ($pid) {
   2088         # Parent process
   2089         my $childStdout;
   2090         while (<CHILD>) {
   2091             $childStdout .= $_;
   2092         }
   2093         close(CHILD);
   2094         my %childOutput;
   2095         $childOutput{exitStatus} = exitStatus($?);
   2096         $childOutput{stdout} = $childStdout if $childStdout;
   2097         return \%childOutput;
   2098     }
   2099     # Child process
   2100     # FIXME: Consider further hardening of this function, including sanitizing the environment.
   2101     exec { $args[0] } @args or die "Failed to exec(): $!";
   2102 }
   2103 
   2104 sub gitCommitForSVNRevision
   2105 {
   2106     my ($svnRevision) = @_;
   2107     my $command = "git svn find-rev r" . $svnRevision;
   2108     $command = "LC_ALL=C $command" if !isWindows();
   2109     my $gitHash = `$command`;
   2110     if (!defined($gitHash)) {
   2111         $gitHash = "unknown";
   2112         warn "Unable to determine GIT commit from SVN revision";
   2113     } else {
   2114         chop($gitHash);
   2115     }
   2116     return $gitHash;
   2117 }
   2118 
   2119 sub listOfChangedFilesBetweenRevisions
   2120 {
   2121     my ($sourceDir, $firstRevision, $lastRevision) = @_;
   2122     my $command;
   2123 
   2124     if ($firstRevision eq "unknown" or $lastRevision eq "unknown") {
   2125         return ();
   2126     }
   2127 
   2128     # Some VCS functions don't work from within the build dir, so always
   2129     # go to the source dir first.
   2130     my $cwd = Cwd::getcwd();
   2131     chdir $sourceDir;
   2132 
   2133     if (isGit()) {
   2134         my $firstCommit = gitCommitForSVNRevision($firstRevision);
   2135         my $lastCommit = gitCommitForSVNRevision($lastRevision);
   2136         $command = "git diff --name-status $firstCommit..$lastCommit";
   2137     } elsif (isSVN()) {
   2138         $command = "svn diff --summarize -r $firstRevision:$lastRevision";
   2139     }
   2140 
   2141     my @result = ();
   2142 
   2143     if ($command) {
   2144         my $diffOutput = `$command`;
   2145         $diffOutput =~ s/^[A-Z]\s+//gm;
   2146         @result = split(/[\r\n]+/, $diffOutput);
   2147     }
   2148 
   2149     chdir $cwd;
   2150 
   2151     return @result;
   2152 }
   2153 
   2154 
   2155 1;
   2156