1 # Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013 Apple Inc. All rights reserved. 2 # Copyright (C) 2009, 2010 Chris Jerdonek (chris.jerdonek (at] gmail.com) 3 # Copyright (C) 2010, 2011 Research In Motion Limited. All rights reserved. 4 # Copyright (C) 2012 Daniel Bates (dbates (at] intudata.com) 5 # 6 # Redistribution and use in source and binary forms, with or without 7 # modification, are permitted provided that the following conditions 8 # are met: 9 # 10 # 1. Redistributions of source code must retain the above copyright 11 # notice, this list of conditions and the following disclaimer. 12 # 2. Redistributions in binary form must reproduce the above copyright 13 # notice, this list of conditions and the following disclaimer in the 14 # documentation and/or other materials provided with the distribution. 15 # 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of 16 # its contributors may be used to endorse or promote products derived 17 # from this software without specific prior written permission. 18 # 19 # THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY 20 # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 # DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY 23 # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24 # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25 # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 26 # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 28 # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30 # Module to share code to work with various version control systems. 31 package VCSUtils; 32 33 use strict; 34 use warnings; 35 36 use Cwd qw(); # "qw()" prevents warnings about redefining getcwd() with "use POSIX;" 37 use English; # for $POSTMATCH, etc. 38 use File::Basename; 39 use File::Spec; 40 use POSIX; 41 use Term::ANSIColor qw(colored); 42 43 BEGIN { 44 use Exporter (); 45 our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS); 46 $VERSION = 1.00; 47 @ISA = qw(Exporter); 48 @EXPORT = qw( 49 &applyGitBinaryPatchDelta 50 &callSilently 51 &canonicalizePath 52 &changeLogEmailAddress 53 &changeLogFileName 54 &changeLogName 55 &chdirReturningRelativePath 56 &decodeGitBinaryChunk 57 &decodeGitBinaryPatch 58 &determineSVNRoot 59 &determineVCSRoot 60 &escapeSubversionPath 61 &exitStatus 62 &fixChangeLogPatch 63 &gitBranch 64 &gitdiff2svndiff 65 &isGit 66 &isGitSVN 67 &isGitBranchBuild 68 &isGitDirectory 69 &isSVN 70 &isSVNDirectory 71 &isSVNVersion16OrNewer 72 &makeFilePathRelative 73 &mergeChangeLogs 74 &normalizePath 75 &parseChunkRange 76 &parseFirstEOL 77 &parsePatch 78 &pathRelativeToSVNRepositoryRootForPath 79 &possiblyColored 80 &prepareParsedPatch 81 &removeEOL 82 &runCommand 83 &runPatchCommand 84 &scmMoveOrRenameFile 85 &scmToggleExecutableBit 86 &setChangeLogDateAndReviewer 87 &svnRevisionForDirectory 88 &svnStatus 89 &toWindowsLineEndings 90 &gitCommitForSVNRevision 91 &listOfChangedFilesBetweenRevisions 92 ); 93 %EXPORT_TAGS = ( ); 94 @EXPORT_OK = (); 95 } 96 97 our @EXPORT_OK; 98 99 my $gitBranch; 100 my $gitRoot; 101 my $isGit; 102 my $isGitSVN; 103 my $isGitBranchBuild; 104 my $isSVN; 105 my $svnVersion; 106 107 # Project time zone for Cupertino, CA, US 108 my $changeLogTimeZone = "PST8PDT"; 109 110 my $gitDiffStartRegEx = qr#^diff --git (\w/)?(.+) (\w/)?([^\r\n]+)#; 111 my $svnDiffStartRegEx = qr#^Index: ([^\r\n]+)#; 112 my $svnPropertiesStartRegEx = qr#^Property changes on: ([^\r\n]+)#; # $1 is normally the same as the index path. 113 my $svnPropertyStartRegEx = qr#^(Modified|Name|Added|Deleted): ([^\r\n]+)#; # $2 is the name of the property. 114 my $svnPropertyValueStartRegEx = qr#^\s*(\+|-|Merged|Reverse-merged)\s*([^\r\n]+)#; # $2 is the start of the property's value (which may span multiple lines). 115 my $svnPropertyValueNoNewlineRegEx = qr#\ No newline at end of property#; 116 117 # This method is for portability. Return the system-appropriate exit 118 # status of a child process. 119 # 120 # Args: pass the child error status returned by the last pipe close, 121 # for example "$?". 122 sub exitStatus($) 123 { 124 my ($returnvalue) = @_; 125 if ($^O eq "MSWin32") { 126 return $returnvalue >> 8; 127 } 128 if (!WIFEXITED($returnvalue)) { 129 return 254; 130 } 131 return WEXITSTATUS($returnvalue); 132 } 133 134 # Call a function while suppressing STDERR, and return the return values 135 # as an array. 136 sub callSilently($@) { 137 my ($func, @args) = @_; 138 139 # The following pattern was taken from here: 140 # http://www.sdsc.edu/~moreland/courses/IntroPerl/docs/manual/pod/perlfunc/open.html 141 # 142 # Also see this Perl documentation (search for "open OLDERR"): 143 # http://perldoc.perl.org/functions/open.html 144 open(OLDERR, ">&STDERR"); 145 close(STDERR); 146 my @returnValue = &$func(@args); 147 open(STDERR, ">&OLDERR"); 148 close(OLDERR); 149 150 return @returnValue; 151 } 152 153 sub toWindowsLineEndings 154 { 155 my ($text) = @_; 156 $text =~ s/\n/\r\n/g; 157 return $text; 158 } 159 160 # Note, this method will not error if the file corresponding to the $source path does not exist. 161 sub scmMoveOrRenameFile 162 { 163 my ($source, $destination) = @_; 164 return if ! -e $source; 165 if (isSVN()) { 166 my $escapedDestination = escapeSubversionPath($destination); 167 my $escapedSource = escapeSubversionPath($source); 168 system("svn", "move", $escapedSource, $escapedDestination); 169 } elsif (isGit()) { 170 system("git", "mv", $source, $destination); 171 } 172 } 173 174 # Note, this method will not error if the file corresponding to the path does not exist. 175 sub scmToggleExecutableBit 176 { 177 my ($path, $executableBitDelta) = @_; 178 return if ! -e $path; 179 if ($executableBitDelta == 1) { 180 scmAddExecutableBit($path); 181 } elsif ($executableBitDelta == -1) { 182 scmRemoveExecutableBit($path); 183 } 184 } 185 186 sub scmAddExecutableBit($) 187 { 188 my ($path) = @_; 189 190 if (isSVN()) { 191 my $escapedPath = escapeSubversionPath($path); 192 system("svn", "propset", "svn:executable", "on", $escapedPath) == 0 or die "Failed to run 'svn propset svn:executable on $escapedPath'."; 193 } elsif (isGit()) { 194 chmod(0755, $path); 195 } 196 } 197 198 sub scmRemoveExecutableBit($) 199 { 200 my ($path) = @_; 201 202 if (isSVN()) { 203 my $escapedPath = escapeSubversionPath($path); 204 system("svn", "propdel", "svn:executable", $escapedPath) == 0 or die "Failed to run 'svn propdel svn:executable $escapedPath'."; 205 } elsif (isGit()) { 206 chmod(0664, $path); 207 } 208 } 209 210 sub isGitDirectory($) 211 { 212 my ($dir) = @_; 213 return system("cd $dir && git rev-parse > " . File::Spec->devnull() . " 2>&1") == 0; 214 } 215 216 sub isGit() 217 { 218 return $isGit if defined $isGit; 219 220 $isGit = isGitDirectory("."); 221 return $isGit; 222 } 223 224 sub isGitSVN() 225 { 226 return $isGitSVN if defined $isGitSVN; 227 228 # There doesn't seem to be an officially documented way to determine 229 # if you're in a git-svn checkout. The best suggestions seen so far 230 # all use something like the following: 231 my $output = `git config --get svn-remote.svn.fetch 2>& 1`; 232 $isGitSVN = $output ne ''; 233 return $isGitSVN; 234 } 235 236 sub gitBranch() 237 { 238 unless (defined $gitBranch) { 239 chomp($gitBranch = `git symbolic-ref -q HEAD`); 240 $gitBranch = "" if exitStatus($?); 241 $gitBranch =~ s#^refs/heads/##; 242 $gitBranch = "" if $gitBranch eq "master"; 243 } 244 245 return $gitBranch; 246 } 247 248 sub isGitBranchBuild() 249 { 250 my $branch = gitBranch(); 251 chomp(my $override = `git config --bool branch.$branch.webKitBranchBuild`); 252 return 1 if $override eq "true"; 253 return 0 if $override eq "false"; 254 255 unless (defined $isGitBranchBuild) { 256 chomp(my $gitBranchBuild = `git config --bool core.webKitBranchBuild`); 257 $isGitBranchBuild = $gitBranchBuild eq "true"; 258 } 259 260 return $isGitBranchBuild; 261 } 262 263 sub isSVNDirectory($) 264 { 265 my ($dir) = @_; 266 return system("cd $dir && svn info > " . File::Spec->devnull() . " 2>&1") == 0; 267 } 268 269 sub isSVN() 270 { 271 return $isSVN if defined $isSVN; 272 273 $isSVN = isSVNDirectory("."); 274 return $isSVN; 275 } 276 277 sub svnVersion() 278 { 279 return $svnVersion if defined $svnVersion; 280 281 if (!isSVN()) { 282 $svnVersion = 0; 283 } else { 284 chomp($svnVersion = `svn --version --quiet`); 285 } 286 return $svnVersion; 287 } 288 289 sub isSVNVersion16OrNewer() 290 { 291 my $version = svnVersion(); 292 return eval "v$version" ge v1.6; 293 } 294 295 sub chdirReturningRelativePath($) 296 { 297 my ($directory) = @_; 298 my $previousDirectory = Cwd::getcwd(); 299 chdir $directory; 300 my $newDirectory = Cwd::getcwd(); 301 return "." if $newDirectory eq $previousDirectory; 302 return File::Spec->abs2rel($previousDirectory, $newDirectory); 303 } 304 305 sub determineGitRoot() 306 { 307 chomp(my $gitDir = `git rev-parse --git-dir`); 308 return dirname($gitDir); 309 } 310 311 sub determineSVNRoot() 312 { 313 my $last = ''; 314 my $path = '.'; 315 my $parent = '..'; 316 my $repositoryRoot; 317 my $repositoryUUID; 318 while (1) { 319 my $thisRoot; 320 my $thisUUID; 321 my $escapedPath = escapeSubversionPath($path); 322 # Ignore error messages in case we've run past the root of the checkout. 323 open INFO, "svn info '$escapedPath' 2> " . File::Spec->devnull() . " |" or die; 324 while (<INFO>) { 325 if (/^Repository Root: (.+)/) { 326 $thisRoot = $1; 327 } 328 if (/^Repository UUID: (.+)/) { 329 $thisUUID = $1; 330 } 331 if ($thisRoot && $thisUUID) { 332 local $/ = undef; 333 <INFO>; # Consume the rest of the input. 334 } 335 } 336 close INFO; 337 338 # It's possible (e.g. for developers of some ports) to have a WebKit 339 # checkout in a subdirectory of another checkout. So abort if the 340 # repository root or the repository UUID suddenly changes. 341 last if !$thisUUID; 342 $repositoryUUID = $thisUUID if !$repositoryUUID; 343 last if $thisUUID ne $repositoryUUID; 344 345 last if !$thisRoot; 346 $repositoryRoot = $thisRoot if !$repositoryRoot; 347 last if $thisRoot ne $repositoryRoot; 348 349 $last = $path; 350 $path = File::Spec->catdir($parent, $path); 351 } 352 353 return File::Spec->rel2abs($last); 354 } 355 356 sub determineVCSRoot() 357 { 358 if (isGit()) { 359 return determineGitRoot(); 360 } 361 362 if (!isSVN()) { 363 # Some users have a workflow where svn-create-patch, svn-apply and 364 # svn-unapply are used outside of multiple svn working directores, 365 # so warn the user and assume Subversion is being used in this case. 366 warn "Unable to determine VCS root for '" . Cwd::getcwd() . "'; assuming Subversion"; 367 $isSVN = 1; 368 } 369 370 return determineSVNRoot(); 371 } 372 373 sub isWindows() 374 { 375 return ($^O eq "MSWin32") || 0; 376 } 377 378 sub svnRevisionForDirectory($) 379 { 380 my ($dir) = @_; 381 my $revision; 382 383 if (isSVNDirectory($dir)) { 384 my $escapedDir = escapeSubversionPath($dir); 385 my $command = "svn info $escapedDir | grep Revision:"; 386 $command = "LC_ALL=C $command" if !isWindows(); 387 my $svnInfo = `$command`; 388 ($revision) = ($svnInfo =~ m/Revision: (\d+).*/g); 389 } elsif (isGitDirectory($dir)) { 390 my $command = "git log --grep=\"git-svn-id: \" -n 1 | grep git-svn-id:"; 391 $command = "LC_ALL=C $command" if !isWindows(); 392 $command = "cd $dir && $command"; 393 my $gitLog = `$command`; 394 ($revision) = ($gitLog =~ m/ +git-svn-id: .+@(\d+) /g); 395 } 396 if (!defined($revision)) { 397 $revision = "unknown"; 398 warn "Unable to determine current SVN revision in $dir"; 399 } 400 return $revision; 401 } 402 403 sub pathRelativeToSVNRepositoryRootForPath($) 404 { 405 my ($file) = @_; 406 my $relativePath = File::Spec->abs2rel($file); 407 408 my $svnInfo; 409 if (isSVN()) { 410 my $escapedRelativePath = escapeSubversionPath($relativePath); 411 my $command = "svn info $escapedRelativePath"; 412 $command = "LC_ALL=C $command" if !isWindows(); 413 $svnInfo = `$command`; 414 } elsif (isGit()) { 415 my $command = "git svn info $relativePath"; 416 $command = "LC_ALL=C $command" if !isWindows(); 417 $svnInfo = `$command`; 418 } 419 420 $svnInfo =~ /.*^URL: (.*?)$/m; 421 my $svnURL = $1; 422 423 $svnInfo =~ /.*^Repository Root: (.*?)$/m; 424 my $repositoryRoot = $1; 425 426 $svnURL =~ s/$repositoryRoot\///; 427 return $svnURL; 428 } 429 430 sub makeFilePathRelative($) 431 { 432 my ($path) = @_; 433 return $path unless isGit(); 434 435 unless (defined $gitRoot) { 436 chomp($gitRoot = `git rev-parse --show-cdup`); 437 } 438 return $gitRoot . $path; 439 } 440 441 sub normalizePath($) 442 { 443 my ($path) = @_; 444 $path =~ s/\\/\//g; 445 return $path; 446 } 447 448 sub possiblyColored($$) 449 { 450 my ($colors, $string) = @_; 451 452 if (-t STDOUT) { 453 return colored([$colors], $string); 454 } else { 455 return $string; 456 } 457 } 458 459 sub adjustPathForRecentRenamings($) 460 { 461 my ($fullPath) = @_; 462 463 $fullPath =~ s|WebCore/webaudio|WebCore/Modules/webaudio|g; 464 $fullPath =~ s|JavaScriptCore/wtf|WTF/wtf|g; 465 $fullPath =~ s|test_expectations.txt|TestExpectations|g; 466 467 return $fullPath; 468 } 469 470 sub canonicalizePath($) 471 { 472 my ($file) = @_; 473 474 # Remove extra slashes and '.' directories in path 475 $file = File::Spec->canonpath($file); 476 477 # Remove '..' directories in path 478 my @dirs = (); 479 foreach my $dir (File::Spec->splitdir($file)) { 480 if ($dir eq '..' && $#dirs >= 0 && $dirs[$#dirs] ne '..') { 481 pop(@dirs); 482 } else { 483 push(@dirs, $dir); 484 } 485 } 486 return ($#dirs >= 0) ? File::Spec->catdir(@dirs) : "."; 487 } 488 489 sub removeEOL($) 490 { 491 my ($line) = @_; 492 return "" unless $line; 493 494 $line =~ s/[\r\n]+$//g; 495 return $line; 496 } 497 498 sub parseFirstEOL($) 499 { 500 my ($fileHandle) = @_; 501 502 # Make input record separator the new-line character to simplify regex matching below. 503 my $savedInputRecordSeparator = $INPUT_RECORD_SEPARATOR; 504 $INPUT_RECORD_SEPARATOR = "\n"; 505 my $firstLine = <$fileHandle>; 506 $INPUT_RECORD_SEPARATOR = $savedInputRecordSeparator; 507 508 return unless defined($firstLine); 509 510 my $eol; 511 if ($firstLine =~ /\r\n/) { 512 $eol = "\r\n"; 513 } elsif ($firstLine =~ /\r/) { 514 $eol = "\r"; 515 } elsif ($firstLine =~ /\n/) { 516 $eol = "\n"; 517 } 518 return $eol; 519 } 520 521 sub firstEOLInFile($) 522 { 523 my ($file) = @_; 524 my $eol; 525 if (open(FILE, $file)) { 526 $eol = parseFirstEOL(*FILE); 527 close(FILE); 528 } 529 return $eol; 530 } 531 532 # Parses a chunk range line into its components. 533 # 534 # A chunk range line has the form: @@ -L_1,N_1 +L_2,N_2 @@, where the pairs (L_1, N_1), 535 # (L_2, N_2) are ranges that represent the starting line number and line count in the 536 # original file and new file, respectively. 537 # 538 # Note, some versions of GNU diff may omit the comma and trailing line count (e.g. N_1), 539 # in which case the omitted line count defaults to 1. For example, GNU diff may output 540 # @@ -1 +1 @@, which is equivalent to @@ -1,1 +1,1 @@. 541 # 542 # This subroutine returns undef if given an invalid or malformed chunk range. 543 # 544 # Args: 545 # $line: the line to parse. 546 # $chunkSentinel: the sentinel that surrounds the chunk range information (defaults to "@@"). 547 # 548 # Returns $chunkRangeHashRef 549 # $chunkRangeHashRef: a hash reference representing the parts of a chunk range, as follows-- 550 # startingLine: the starting line in the original file. 551 # lineCount: the line count in the original file. 552 # newStartingLine: the new starting line in the new file. 553 # newLineCount: the new line count in the new file. 554 sub parseChunkRange($;$) 555 { 556 my ($line, $chunkSentinel) = @_; 557 $chunkSentinel = "@@" if !$chunkSentinel; 558 my $chunkRangeRegEx = qr#^\Q$chunkSentinel\E -(\d+)(,(\d+))? \+(\d+)(,(\d+))? \Q$chunkSentinel\E#; 559 if ($line !~ /$chunkRangeRegEx/) { 560 return; 561 } 562 my %chunkRange; 563 $chunkRange{startingLine} = $1; 564 $chunkRange{lineCount} = defined($2) ? $3 : 1; 565 $chunkRange{newStartingLine} = $4; 566 $chunkRange{newLineCount} = defined($5) ? $6 : 1; 567 return \%chunkRange; 568 } 569 570 sub svnStatus($) 571 { 572 my ($fullPath) = @_; 573 my $escapedFullPath = escapeSubversionPath($fullPath); 574 my $svnStatus; 575 open SVN, "svn status --non-interactive --non-recursive '$escapedFullPath' |" or die; 576 if (-d $fullPath) { 577 # When running "svn stat" on a directory, we can't assume that only one 578 # status will be returned (since any files with a status below the 579 # directory will be returned), and we can't assume that the directory will 580 # be first (since any files with unknown status will be listed first). 581 my $normalizedFullPath = File::Spec->catdir(File::Spec->splitdir($fullPath)); 582 while (<SVN>) { 583 # Input may use a different EOL sequence than $/, so avoid chomp. 584 $_ = removeEOL($_); 585 my $normalizedStatPath = File::Spec->catdir(File::Spec->splitdir(substr($_, 7))); 586 if ($normalizedFullPath eq $normalizedStatPath) { 587 $svnStatus = "$_\n"; 588 last; 589 } 590 } 591 # Read the rest of the svn command output to avoid a broken pipe warning. 592 local $/ = undef; 593 <SVN>; 594 } 595 else { 596 # Files will have only one status returned. 597 $svnStatus = removeEOL(<SVN>) . "\n"; 598 } 599 close SVN; 600 return $svnStatus; 601 } 602 603 # Return whether the given file mode is executable in the source control 604 # sense. We make this determination based on whether the executable bit 605 # is set for "others" rather than the stronger condition that it be set 606 # for the user, group, and others. This is sufficient for distinguishing 607 # the default behavior in Git and SVN. 608 # 609 # Args: 610 # $fileMode: A number or string representing a file mode in octal notation. 611 sub isExecutable($) 612 { 613 my $fileMode = shift; 614 615 return $fileMode % 2; 616 } 617 618 # Parse the next Git diff header from the given file handle, and advance 619 # the handle so the last line read is the first line after the header. 620 # 621 # This subroutine dies if given leading junk. 622 # 623 # Args: 624 # $fileHandle: advanced so the last line read from the handle is the first 625 # line of the header to parse. This should be a line 626 # beginning with "diff --git". 627 # $line: the line last read from $fileHandle 628 # 629 # Returns ($headerHashRef, $lastReadLine): 630 # $headerHashRef: a hash reference representing a diff header, as follows-- 631 # copiedFromPath: the path from which the file was copied or moved if 632 # the diff is a copy or move. 633 # executableBitDelta: the value 1 or -1 if the executable bit was added or 634 # removed, respectively. New and deleted files have 635 # this value only if the file is executable, in which 636 # case the value is 1 and -1, respectively. 637 # indexPath: the path of the target file. 638 # isBinary: the value 1 if the diff is for a binary file. 639 # isDeletion: the value 1 if the diff is a file deletion. 640 # isCopyWithChanges: the value 1 if the file was copied or moved and 641 # the target file was changed in some way after being 642 # copied or moved (e.g. if its contents or executable 643 # bit were changed). 644 # isNew: the value 1 if the diff is for a new file. 645 # shouldDeleteSource: the value 1 if the file was copied or moved and 646 # the source file was deleted -- i.e. if the copy 647 # was actually a move. 648 # svnConvertedText: the header text with some lines converted to SVN 649 # format. Git-specific lines are preserved. 650 # $lastReadLine: the line last read from $fileHandle. 651 sub parseGitDiffHeader($$) 652 { 653 my ($fileHandle, $line) = @_; 654 655 $_ = $line; 656 657 my $indexPath; 658 if (/$gitDiffStartRegEx/) { 659 # The first and second paths can differ in the case of copies 660 # and renames. We use the second file path because it is the 661 # destination path. 662 $indexPath = adjustPathForRecentRenamings($4); 663 # Use $POSTMATCH to preserve the end-of-line character. 664 $_ = "Index: $indexPath$POSTMATCH"; # Convert to SVN format. 665 } else { 666 die("Could not parse leading \"diff --git\" line: \"$line\"."); 667 } 668 669 my $copiedFromPath; 670 my $foundHeaderEnding; 671 my $isBinary; 672 my $isDeletion; 673 my $isNew; 674 my $newExecutableBit = 0; 675 my $oldExecutableBit = 0; 676 my $shouldDeleteSource = 0; 677 my $similarityIndex = 0; 678 my $svnConvertedText; 679 while (1) { 680 # Temporarily strip off any end-of-line characters to simplify 681 # regex matching below. 682 s/([\n\r]+)$//; 683 my $eol = $1; 684 685 if (/^(deleted file|old) mode (\d+)/) { 686 $oldExecutableBit = (isExecutable($2) ? 1 : 0); 687 $isDeletion = 1 if $1 eq "deleted file"; 688 } elsif (/^new( file)? mode (\d+)/) { 689 $newExecutableBit = (isExecutable($2) ? 1 : 0); 690 $isNew = 1 if $1; 691 } elsif (/^similarity index (\d+)%/) { 692 $similarityIndex = $1; 693 } elsif (/^copy from (\S+)/) { 694 $copiedFromPath = $1; 695 } elsif (/^rename from (\S+)/) { 696 # FIXME: Record this as a move rather than as a copy-and-delete. 697 # This will simplify adding rename support to svn-unapply. 698 # Otherwise, the hash for a deletion would have to know 699 # everything about the file being deleted in order to 700 # support undoing itself. Recording as a move will also 701 # permit us to use "svn move" and "git move". 702 $copiedFromPath = $1; 703 $shouldDeleteSource = 1; 704 } elsif (/^--- \S+/) { 705 $_ = "--- $indexPath"; # Convert to SVN format. 706 } elsif (/^\+\+\+ \S+/) { 707 $_ = "+++ $indexPath"; # Convert to SVN format. 708 $foundHeaderEnding = 1; 709 } elsif (/^GIT binary patch$/ ) { 710 $isBinary = 1; 711 $foundHeaderEnding = 1; 712 # The "git diff" command includes a line of the form "Binary files 713 # <path1> and <path2> differ" if the --binary flag is not used. 714 } elsif (/^Binary files / ) { 715 die("Error: the Git diff contains a binary file without the binary data in ". 716 "line: \"$_\". Be sure to use the --binary flag when invoking \"git diff\" ". 717 "with diffs containing binary files."); 718 } 719 720 $svnConvertedText .= "$_$eol"; # Also restore end-of-line characters. 721 722 $_ = <$fileHandle>; # Not defined if end-of-file reached. 723 724 last if (!defined($_) || /$gitDiffStartRegEx/ || $foundHeaderEnding); 725 } 726 727 my $executableBitDelta = $newExecutableBit - $oldExecutableBit; 728 729 my %header; 730 731 $header{copiedFromPath} = $copiedFromPath if $copiedFromPath; 732 $header{executableBitDelta} = $executableBitDelta if $executableBitDelta; 733 $header{indexPath} = $indexPath; 734 $header{isBinary} = $isBinary if $isBinary; 735 $header{isCopyWithChanges} = 1 if ($copiedFromPath && ($similarityIndex != 100 || $executableBitDelta)); 736 $header{isDeletion} = $isDeletion if $isDeletion; 737 $header{isNew} = $isNew if $isNew; 738 $header{shouldDeleteSource} = $shouldDeleteSource if $shouldDeleteSource; 739 $header{svnConvertedText} = $svnConvertedText; 740 741 return (\%header, $_); 742 } 743 744 # Parse the next SVN diff header from the given file handle, and advance 745 # the handle so the last line read is the first line after the header. 746 # 747 # This subroutine dies if given leading junk or if it could not detect 748 # the end of the header block. 749 # 750 # Args: 751 # $fileHandle: advanced so the last line read from the handle is the first 752 # line of the header to parse. This should be a line 753 # beginning with "Index:". 754 # $line: the line last read from $fileHandle 755 # 756 # Returns ($headerHashRef, $lastReadLine): 757 # $headerHashRef: a hash reference representing a diff header, as follows-- 758 # copiedFromPath: the path from which the file was copied if the diff 759 # is a copy. 760 # indexPath: the path of the target file, which is the path found in 761 # the "Index:" line. 762 # isBinary: the value 1 if the diff is for a binary file. 763 # isNew: the value 1 if the diff is for a new file. 764 # sourceRevision: the revision number of the source, if it exists. This 765 # is the same as the revision number the file was copied 766 # from, in the case of a file copy. 767 # svnConvertedText: the header text converted to a header with the paths 768 # in some lines corrected. 769 # $lastReadLine: the line last read from $fileHandle. 770 sub parseSvnDiffHeader($$) 771 { 772 my ($fileHandle, $line) = @_; 773 774 $_ = $line; 775 776 my $indexPath; 777 if (/$svnDiffStartRegEx/) { 778 $indexPath = adjustPathForRecentRenamings($1); 779 } else { 780 die("First line of SVN diff does not begin with \"Index \": \"$_\""); 781 } 782 783 my $copiedFromPath; 784 my $foundHeaderEnding; 785 my $isBinary; 786 my $isNew; 787 my $sourceRevision; 788 my $svnConvertedText; 789 while (1) { 790 # Temporarily strip off any end-of-line characters to simplify 791 # regex matching below. 792 s/([\n\r]+)$//; 793 my $eol = $1; 794 795 # Fix paths on "---" and "+++" lines to match the leading 796 # index line. 797 if (s/^--- [^\t\n\r]+/--- $indexPath/) { 798 # --- 799 if (/^--- .+\(revision (\d+)\)/) { 800 $sourceRevision = $1; 801 $isNew = 1 if !$sourceRevision; # if revision 0. 802 if (/\(from (\S+):(\d+)\)$/) { 803 # The "from" clause is created by svn-create-patch, in 804 # which case there is always also a "revision" clause. 805 $copiedFromPath = $1; 806 die("Revision number \"$2\" in \"from\" clause does not match " . 807 "source revision number \"$sourceRevision\".") if ($2 != $sourceRevision); 808 } 809 } 810 } elsif (s/^\+\+\+ [^\t\n\r]+/+++ $indexPath/ || $isBinary && /^$/) { 811 $foundHeaderEnding = 1; 812 } elsif (/^Cannot display: file marked as a binary type.$/) { 813 $isBinary = 1; 814 # SVN 1.7 has an unusual display format for a binary diff. It repeats the first 815 # two lines of the diff header. For example: 816 # Index: test_file.swf 817 # =================================================================== 818 # Cannot display: file marked as a binary type. 819 # svn:mime-type = application/octet-stream 820 # Index: test_file.swf 821 # =================================================================== 822 # --- test_file.swf 823 # +++ test_file.swf 824 # 825 # ... 826 # Q1dTBx0AAAB42itg4GlgYJjGwMDDyODMxMDw34GBgQEAJPQDJA== 827 # Therefore, we continue reading the diff header until we either encounter a line 828 # that begins with "+++" (SVN 1.7 or greater) or an empty line (SVN version less 829 # than 1.7). 830 } 831 832 $svnConvertedText .= "$_$eol"; # Also restore end-of-line characters. 833 834 $_ = <$fileHandle>; # Not defined if end-of-file reached. 835 836 last if (!defined($_) || !$isBinary && /$svnDiffStartRegEx/ || $foundHeaderEnding); 837 } 838 839 if (!$foundHeaderEnding) { 840 die("Did not find end of header block corresponding to index path \"$indexPath\"."); 841 } 842 843 my %header; 844 845 $header{copiedFromPath} = $copiedFromPath if $copiedFromPath; 846 $header{indexPath} = $indexPath; 847 $header{isBinary} = $isBinary if $isBinary; 848 $header{isNew} = $isNew if $isNew; 849 $header{sourceRevision} = $sourceRevision if $sourceRevision; 850 $header{svnConvertedText} = $svnConvertedText; 851 852 return (\%header, $_); 853 } 854 855 # Parse the next diff header from the given file handle, and advance 856 # the handle so the last line read is the first line after the header. 857 # 858 # This subroutine dies if given leading junk or if it could not detect 859 # the end of the header block. 860 # 861 # Args: 862 # $fileHandle: advanced so the last line read from the handle is the first 863 # line of the header to parse. For SVN-formatted diffs, this 864 # is a line beginning with "Index:". For Git, this is a line 865 # beginning with "diff --git". 866 # $line: the line last read from $fileHandle 867 # 868 # Returns ($headerHashRef, $lastReadLine): 869 # $headerHashRef: a hash reference representing a diff header 870 # copiedFromPath: the path from which the file was copied if the diff 871 # is a copy. 872 # executableBitDelta: the value 1 or -1 if the executable bit was added or 873 # removed, respectively. New and deleted files have 874 # this value only if the file is executable, in which 875 # case the value is 1 and -1, respectively. 876 # indexPath: the path of the target file. 877 # isBinary: the value 1 if the diff is for a binary file. 878 # isGit: the value 1 if the diff is Git-formatted. 879 # isSvn: the value 1 if the diff is SVN-formatted. 880 # sourceRevision: the revision number of the source, if it exists. This 881 # is the same as the revision number the file was copied 882 # from, in the case of a file copy. 883 # svnConvertedText: the header text with some lines converted to SVN 884 # format. Git-specific lines are preserved. 885 # $lastReadLine: the line last read from $fileHandle. 886 sub parseDiffHeader($$) 887 { 888 my ($fileHandle, $line) = @_; 889 890 my $header; # This is a hash ref. 891 my $isGit; 892 my $isSvn; 893 my $lastReadLine; 894 895 if ($line =~ $svnDiffStartRegEx) { 896 $isSvn = 1; 897 ($header, $lastReadLine) = parseSvnDiffHeader($fileHandle, $line); 898 } elsif ($line =~ $gitDiffStartRegEx) { 899 $isGit = 1; 900 ($header, $lastReadLine) = parseGitDiffHeader($fileHandle, $line); 901 } else { 902 die("First line of diff does not begin with \"Index:\" or \"diff --git\": \"$line\""); 903 } 904 905 $header->{isGit} = $isGit if $isGit; 906 $header->{isSvn} = $isSvn if $isSvn; 907 908 return ($header, $lastReadLine); 909 } 910 911 # FIXME: The %diffHash "object" should not have an svnConvertedText property. 912 # Instead, the hash object should store its information in a 913 # structured way as properties. This should be done in a way so 914 # that, if necessary, the text of an SVN or Git patch can be 915 # reconstructed from the information in those hash properties. 916 # 917 # A %diffHash is a hash representing a source control diff of a single 918 # file operation (e.g. a file modification, copy, or delete). 919 # 920 # These hashes appear, for example, in the parseDiff(), parsePatch(), 921 # and prepareParsedPatch() subroutines of this package. 922 # 923 # The corresponding values are-- 924 # 925 # copiedFromPath: the path from which the file was copied if the diff 926 # is a copy. 927 # executableBitDelta: the value 1 or -1 if the executable bit was added or 928 # removed from the target file, respectively. 929 # indexPath: the path of the target file. For SVN-formatted diffs, 930 # this is the same as the path in the "Index:" line. 931 # isBinary: the value 1 if the diff is for a binary file. 932 # isDeletion: the value 1 if the diff is known from the header to be a deletion. 933 # isGit: the value 1 if the diff is Git-formatted. 934 # isNew: the value 1 if the dif is known from the header to be a new file. 935 # isSvn: the value 1 if the diff is SVN-formatted. 936 # sourceRevision: the revision number of the source, if it exists. This 937 # is the same as the revision number the file was copied 938 # from, in the case of a file copy. 939 # svnConvertedText: the diff with some lines converted to SVN format. 940 # Git-specific lines are preserved. 941 942 # Parse one diff from a patch file created by svn-create-patch, and 943 # advance the file handle so the last line read is the first line 944 # of the next header block. 945 # 946 # This subroutine preserves any leading junk encountered before the header. 947 # 948 # Composition of an SVN diff 949 # 950 # There are three parts to an SVN diff: the header, the property change, and 951 # the binary contents, in that order. Either the header or the property change 952 # may be ommitted, but not both. If there are binary changes, then you always 953 # have all three. 954 # 955 # Args: 956 # $fileHandle: a file handle advanced to the first line of the next 957 # header block. Leading junk is okay. 958 # $line: the line last read from $fileHandle. 959 # $optionsHashRef: a hash reference representing optional options to use 960 # when processing a diff. 961 # shouldNotUseIndexPathEOL: whether to use the line endings in the diff instead 962 # instead of the line endings in the target file; the 963 # value of 1 if svnConvertedText should use the line 964 # endings in the diff. 965 # 966 # Returns ($diffHashRefs, $lastReadLine): 967 # $diffHashRefs: A reference to an array of references to %diffHash hashes. 968 # See the %diffHash documentation above. 969 # $lastReadLine: the line last read from $fileHandle 970 sub parseDiff($$;$) 971 { 972 # FIXME: Adjust this method so that it dies if the first line does not 973 # match the start of a diff. This will require a change to 974 # parsePatch() so that parsePatch() skips over leading junk. 975 my ($fileHandle, $line, $optionsHashRef) = @_; 976 977 my $headerStartRegEx = $svnDiffStartRegEx; # SVN-style header for the default 978 979 my $headerHashRef; # Last header found, as returned by parseDiffHeader(). 980 my $svnPropertiesHashRef; # Last SVN properties diff found, as returned by parseSvnDiffProperties(). 981 my $svnText; 982 my $indexPathEOL; 983 my $numTextChunks = 0; 984 while (defined($line)) { 985 if (!$headerHashRef && ($line =~ $gitDiffStartRegEx)) { 986 # Then assume all diffs in the patch are Git-formatted. This 987 # block was made to be enterable at most once since we assume 988 # all diffs in the patch are formatted the same (SVN or Git). 989 $headerStartRegEx = $gitDiffStartRegEx; 990 } 991 992 if ($line =~ $svnPropertiesStartRegEx) { 993 my $propertyPath = $1; 994 if ($svnPropertiesHashRef || $headerHashRef && ($propertyPath ne $headerHashRef->{indexPath})) { 995 # This is the start of the second diff in the while loop, which happens to 996 # be a property diff. If $svnPropertiesHasRef is defined, then this is the 997 # second consecutive property diff, otherwise it's the start of a property 998 # diff for a file that only has property changes. 999 last; 1000 } 1001 ($svnPropertiesHashRef, $line) = parseSvnDiffProperties($fileHandle, $line); 1002 next; 1003 } 1004 if ($line !~ $headerStartRegEx) { 1005 # Then we are in the body of the diff. 1006 my $isChunkRange = defined(parseChunkRange($line)); 1007 $numTextChunks += 1 if $isChunkRange; 1008 my $nextLine = <$fileHandle>; 1009 my $willAddNewLineAtEndOfFile = defined($nextLine) && $nextLine =~ /^\\ No newline at end of file$/; 1010 if ($willAddNewLineAtEndOfFile) { 1011 # Diff(1) always emits a LF character preceeding the line "\ No newline at end of file". 1012 # We must preserve both the added LF character and the line ending of this sentinel line 1013 # or patch(1) will complain. 1014 $svnText .= $line . $nextLine; 1015 $line = <$fileHandle>; 1016 next; 1017 } 1018 if ($indexPathEOL && !$isChunkRange) { 1019 # The chunk range is part of the body of the diff, but its line endings should't be 1020 # modified or patch(1) will complain. So, we only modify non-chunk range lines. 1021 $line =~ s/\r\n|\r|\n/$indexPathEOL/g; 1022 } 1023 $svnText .= $line; 1024 $line = $nextLine; 1025 next; 1026 } # Otherwise, we found a diff header. 1027 1028 if ($svnPropertiesHashRef || $headerHashRef) { 1029 # Then either we just processed an SVN property change or this 1030 # is the start of the second diff header of this while loop. 1031 last; 1032 } 1033 1034 ($headerHashRef, $line) = parseDiffHeader($fileHandle, $line); 1035 if (!$optionsHashRef || !$optionsHashRef->{shouldNotUseIndexPathEOL}) { 1036 # FIXME: We shouldn't query the file system (via firstEOLInFile()) to determine the 1037 # line endings of the file indexPath. Instead, either the caller to parseDiff() 1038 # should provide this information or parseDiff() should take a delegate that it 1039 # can use to query for this information. 1040 $indexPathEOL = firstEOLInFile($headerHashRef->{indexPath}) if !$headerHashRef->{isNew} && !$headerHashRef->{isBinary}; 1041 } 1042 1043 $svnText .= $headerHashRef->{svnConvertedText}; 1044 } 1045 1046 my @diffHashRefs; 1047 1048 if ($headerHashRef->{shouldDeleteSource}) { 1049 my %deletionHash; 1050 $deletionHash{indexPath} = $headerHashRef->{copiedFromPath}; 1051 $deletionHash{isDeletion} = 1; 1052 push @diffHashRefs, \%deletionHash; 1053 } 1054 if ($headerHashRef->{copiedFromPath}) { 1055 my %copyHash; 1056 $copyHash{copiedFromPath} = $headerHashRef->{copiedFromPath}; 1057 $copyHash{indexPath} = $headerHashRef->{indexPath}; 1058 $copyHash{sourceRevision} = $headerHashRef->{sourceRevision} if $headerHashRef->{sourceRevision}; 1059 if ($headerHashRef->{isSvn}) { 1060 $copyHash{executableBitDelta} = $svnPropertiesHashRef->{executableBitDelta} if $svnPropertiesHashRef->{executableBitDelta}; 1061 } 1062 push @diffHashRefs, \%copyHash; 1063 } 1064 1065 # Note, the order of evaluation for the following if conditional has been explicitly chosen so that 1066 # it evaluates to false when there is no headerHashRef (e.g. a property change diff for a file that 1067 # only has property changes). 1068 if ($headerHashRef->{isCopyWithChanges} || (%$headerHashRef && !$headerHashRef->{copiedFromPath})) { 1069 # Then add the usual file modification. 1070 my %diffHash; 1071 # FIXME: We should expand this code to support other properties. In the future, 1072 # parseSvnDiffProperties may return a hash whose keys are the properties. 1073 if ($headerHashRef->{isSvn}) { 1074 # SVN records the change to the executable bit in a separate property change diff 1075 # that follows the contents of the diff, except for binary diffs. For binary 1076 # diffs, the property change diff follows the diff header. 1077 $diffHash{executableBitDelta} = $svnPropertiesHashRef->{executableBitDelta} if $svnPropertiesHashRef->{executableBitDelta}; 1078 } elsif ($headerHashRef->{isGit}) { 1079 # Git records the change to the executable bit in the header of a diff. 1080 $diffHash{executableBitDelta} = $headerHashRef->{executableBitDelta} if $headerHashRef->{executableBitDelta}; 1081 } 1082 $diffHash{indexPath} = $headerHashRef->{indexPath}; 1083 $diffHash{isBinary} = $headerHashRef->{isBinary} if $headerHashRef->{isBinary}; 1084 $diffHash{isDeletion} = $headerHashRef->{isDeletion} if $headerHashRef->{isDeletion}; 1085 $diffHash{isGit} = $headerHashRef->{isGit} if $headerHashRef->{isGit}; 1086 $diffHash{isNew} = $headerHashRef->{isNew} if $headerHashRef->{isNew}; 1087 $diffHash{isSvn} = $headerHashRef->{isSvn} if $headerHashRef->{isSvn}; 1088 if (!$headerHashRef->{copiedFromPath}) { 1089 # If the file was copied, then we have already incorporated the 1090 # sourceRevision information into the change. 1091 $diffHash{sourceRevision} = $headerHashRef->{sourceRevision} if $headerHashRef->{sourceRevision}; 1092 } 1093 # FIXME: Remove the need for svnConvertedText. See the %diffHash 1094 # code comments above for more information. 1095 # 1096 # Note, we may not always have SVN converted text since we intend 1097 # to deprecate it in the future. For example, a property change 1098 # diff for a file that only has property changes will not return 1099 # any SVN converted text. 1100 $diffHash{svnConvertedText} = $svnText if $svnText; 1101 $diffHash{numTextChunks} = $numTextChunks if $svnText && !$headerHashRef->{isBinary}; 1102 push @diffHashRefs, \%diffHash; 1103 } 1104 1105 if (!%$headerHashRef && $svnPropertiesHashRef) { 1106 # A property change diff for a file that only has property changes. 1107 my %propertyChangeHash; 1108 $propertyChangeHash{executableBitDelta} = $svnPropertiesHashRef->{executableBitDelta} if $svnPropertiesHashRef->{executableBitDelta}; 1109 $propertyChangeHash{indexPath} = $svnPropertiesHashRef->{propertyPath}; 1110 $propertyChangeHash{isSvn} = 1; 1111 push @diffHashRefs, \%propertyChangeHash; 1112 } 1113 1114 return (\@diffHashRefs, $line); 1115 } 1116 1117 # Parse an SVN property change diff from the given file handle, and advance 1118 # the handle so the last line read is the first line after this diff. 1119 # 1120 # For the case of an SVN binary diff, the binary contents will follow the 1121 # the property changes. 1122 # 1123 # This subroutine dies if the first line does not begin with "Property changes on" 1124 # or if the separator line that follows this line is missing. 1125 # 1126 # Args: 1127 # $fileHandle: advanced so the last line read from the handle is the first 1128 # line of the footer to parse. This line begins with 1129 # "Property changes on". 1130 # $line: the line last read from $fileHandle. 1131 # 1132 # Returns ($propertyHashRef, $lastReadLine): 1133 # $propertyHashRef: a hash reference representing an SVN diff footer. 1134 # propertyPath: the path of the target file. 1135 # executableBitDelta: the value 1 or -1 if the executable bit was added or 1136 # removed from the target file, respectively. 1137 # $lastReadLine: the line last read from $fileHandle. 1138 sub parseSvnDiffProperties($$) 1139 { 1140 my ($fileHandle, $line) = @_; 1141 1142 $_ = $line; 1143 1144 my %footer; 1145 if (/$svnPropertiesStartRegEx/) { 1146 $footer{propertyPath} = $1; 1147 } else { 1148 die("Failed to find start of SVN property change, \"Property changes on \": \"$_\""); 1149 } 1150 1151 # We advance $fileHandle two lines so that the next line that 1152 # we process is $svnPropertyStartRegEx in a well-formed footer. 1153 # A well-formed footer has the form: 1154 # Property changes on: FileA 1155 # ___________________________________________________________________ 1156 # Added: svn:executable 1157 # + * 1158 $_ = <$fileHandle>; # Not defined if end-of-file reached. 1159 my $separator = "_" x 67; 1160 if (defined($_) && /^$separator[\r\n]+$/) { 1161 $_ = <$fileHandle>; 1162 } else { 1163 die("Failed to find separator line: \"$_\"."); 1164 } 1165 1166 # FIXME: We should expand this to support other SVN properties 1167 # (e.g. return a hash of property key-values that represents 1168 # all properties). 1169 # 1170 # Notice, we keep processing until we hit end-of-file or some 1171 # line that does not resemble $svnPropertyStartRegEx, such as 1172 # the empty line that precedes the start of the binary contents 1173 # of a patch, or the start of the next diff (e.g. "Index:"). 1174 my $propertyHashRef; 1175 while (defined($_) && /$svnPropertyStartRegEx/) { 1176 ($propertyHashRef, $_) = parseSvnProperty($fileHandle, $_); 1177 if ($propertyHashRef->{name} eq "svn:executable") { 1178 # Notice, for SVN properties, propertyChangeDelta is always non-zero 1179 # because a property can only be added or removed. 1180 $footer{executableBitDelta} = $propertyHashRef->{propertyChangeDelta}; 1181 } 1182 } 1183 1184 return(\%footer, $_); 1185 } 1186 1187 # Parse the next SVN property from the given file handle, and advance the handle so the last 1188 # line read is the first line after the property. 1189 # 1190 # This subroutine dies if the first line is not a valid start of an SVN property, 1191 # or the property is missing a value, or the property change type (e.g. "Added") 1192 # does not correspond to the property value type (e.g. "+"). 1193 # 1194 # Args: 1195 # $fileHandle: advanced so the last line read from the handle is the first 1196 # line of the property to parse. This should be a line 1197 # that matches $svnPropertyStartRegEx. 1198 # $line: the line last read from $fileHandle. 1199 # 1200 # Returns ($propertyHashRef, $lastReadLine): 1201 # $propertyHashRef: a hash reference representing a SVN property. 1202 # name: the name of the property. 1203 # value: the last property value. For instance, suppose the property is "Modified". 1204 # Then it has both a '-' and '+' property value in that order. Therefore, 1205 # the value of this key is the value of the '+' property by ordering (since 1206 # it is the last value). 1207 # propertyChangeDelta: the value 1 or -1 if the property was added or 1208 # removed, respectively. 1209 # $lastReadLine: the line last read from $fileHandle. 1210 sub parseSvnProperty($$) 1211 { 1212 my ($fileHandle, $line) = @_; 1213 1214 $_ = $line; 1215 1216 my $propertyName; 1217 my $propertyChangeType; 1218 if (/$svnPropertyStartRegEx/) { 1219 $propertyChangeType = $1; 1220 $propertyName = $2; 1221 } else { 1222 die("Failed to find SVN property: \"$_\"."); 1223 } 1224 1225 $_ = <$fileHandle>; # Not defined if end-of-file reached. 1226 1227 if (defined($_) && defined(parseChunkRange($_, "##"))) { 1228 # FIXME: We should validate the chunk range line that is part of an SVN 1.7 1229 # property diff. For now, we ignore this line. 1230 $_ = <$fileHandle>; 1231 } 1232 1233 # The "svn diff" command neither inserts newline characters between property values 1234 # nor between successive properties. 1235 # 1236 # As of SVN 1.7, "svn diff" may insert "\ No newline at end of property" after a 1237 # property value that doesn't end in a newline. 1238 # 1239 # FIXME: We do not support property values that contain tailing newline characters 1240 # as it is difficult to disambiguate these trailing newlines from the empty 1241 # line that precedes the contents of a binary patch. 1242 my $propertyValue; 1243 my $propertyValueType; 1244 while (defined($_) && /$svnPropertyValueStartRegEx/) { 1245 # Note, a '-' property may be followed by a '+' property in the case of a "Modified" 1246 # or "Name" property. We only care about the ending value (i.e. the '+' property) 1247 # in such circumstances. So, we take the property value for the property to be its 1248 # last parsed property value. 1249 # 1250 # FIXME: We may want to consider strictly enforcing a '-', '+' property ordering or 1251 # add error checking to prevent '+', '+', ..., '+' and other invalid combinations. 1252 $propertyValueType = $1; 1253 ($propertyValue, $_) = parseSvnPropertyValue($fileHandle, $_); 1254 $_ = <$fileHandle> if defined($_) && /$svnPropertyValueNoNewlineRegEx/; 1255 } 1256 1257 if (!$propertyValue) { 1258 die("Failed to find the property value for the SVN property \"$propertyName\": \"$_\"."); 1259 } 1260 1261 my $propertyChangeDelta; 1262 if ($propertyValueType eq "+" || $propertyValueType eq "Merged") { 1263 $propertyChangeDelta = 1; 1264 } elsif ($propertyValueType eq "-" || $propertyValueType eq "Reverse-merged") { 1265 $propertyChangeDelta = -1; 1266 } else { 1267 die("Not reached."); 1268 } 1269 1270 # We perform a simple validation that an "Added" or "Deleted" property 1271 # change type corresponds with a "+" and "-" value type, respectively. 1272 my $expectedChangeDelta; 1273 if ($propertyChangeType eq "Added") { 1274 $expectedChangeDelta = 1; 1275 } elsif ($propertyChangeType eq "Deleted") { 1276 $expectedChangeDelta = -1; 1277 } 1278 1279 if ($expectedChangeDelta && $propertyChangeDelta != $expectedChangeDelta) { 1280 die("The final property value type found \"$propertyValueType\" does not " . 1281 "correspond to the property change type found \"$propertyChangeType\"."); 1282 } 1283 1284 my %propertyHash; 1285 $propertyHash{name} = $propertyName; 1286 $propertyHash{propertyChangeDelta} = $propertyChangeDelta; 1287 $propertyHash{value} = $propertyValue; 1288 return (\%propertyHash, $_); 1289 } 1290 1291 # Parse the value of an SVN property from the given file handle, and advance 1292 # the handle so the last line read is the first line after the property value. 1293 # 1294 # This subroutine dies if the first line is an invalid SVN property value line 1295 # (i.e. a line that does not begin with " +" or " -"). 1296 # 1297 # Args: 1298 # $fileHandle: advanced so the last line read from the handle is the first 1299 # line of the property value to parse. This should be a line 1300 # beginning with " +" or " -". 1301 # $line: the line last read from $fileHandle. 1302 # 1303 # Returns ($propertyValue, $lastReadLine): 1304 # $propertyValue: the value of the property. 1305 # $lastReadLine: the line last read from $fileHandle. 1306 sub parseSvnPropertyValue($$) 1307 { 1308 my ($fileHandle, $line) = @_; 1309 1310 $_ = $line; 1311 1312 my $propertyValue; 1313 my $eol; 1314 if (/$svnPropertyValueStartRegEx/) { 1315 $propertyValue = $2; # Does not include the end-of-line character(s). 1316 $eol = $POSTMATCH; 1317 } else { 1318 die("Failed to find property value beginning with '+', '-', 'Merged', or 'Reverse-merged': \"$_\"."); 1319 } 1320 1321 while (<$fileHandle>) { 1322 if (/^[\r\n]+$/ || /$svnPropertyValueStartRegEx/ || /$svnPropertyStartRegEx/ || /$svnPropertyValueNoNewlineRegEx/) { 1323 # Note, we may encounter an empty line before the contents of a binary patch. 1324 # Also, we check for $svnPropertyValueStartRegEx because a '-' property may be 1325 # followed by a '+' property in the case of a "Modified" or "Name" property. 1326 # We check for $svnPropertyStartRegEx because it indicates the start of the 1327 # next property to parse. 1328 last; 1329 } 1330 1331 # Temporarily strip off any end-of-line characters. We add the end-of-line characters 1332 # from the previously processed line to the start of this line so that the last line 1333 # of the property value does not end in end-of-line characters. 1334 s/([\n\r]+)$//; 1335 $propertyValue .= "$eol$_"; 1336 $eol = $1; 1337 } 1338 1339 return ($propertyValue, $_); 1340 } 1341 1342 # Parse a patch file created by svn-create-patch. 1343 # 1344 # Args: 1345 # $fileHandle: A file handle to the patch file that has not yet been 1346 # read from. 1347 # $optionsHashRef: a hash reference representing optional options to use 1348 # when processing a diff. 1349 # shouldNotUseIndexPathEOL: whether to use the line endings in the diff instead 1350 # instead of the line endings in the target file; the 1351 # value of 1 if svnConvertedText should use the line 1352 # endings in the diff. 1353 # 1354 # Returns: 1355 # @diffHashRefs: an array of diff hash references. 1356 # See the %diffHash documentation above. 1357 sub parsePatch($;$) 1358 { 1359 my ($fileHandle, $optionsHashRef) = @_; 1360 1361 my $newDiffHashRefs; 1362 my @diffHashRefs; # return value 1363 1364 my $line = <$fileHandle>; 1365 1366 while (defined($line)) { # Otherwise, at EOF. 1367 1368 ($newDiffHashRefs, $line) = parseDiff($fileHandle, $line, $optionsHashRef); 1369 1370 push @diffHashRefs, @$newDiffHashRefs; 1371 } 1372 1373 return @diffHashRefs; 1374 } 1375 1376 # Prepare the results of parsePatch() for use in svn-apply and svn-unapply. 1377 # 1378 # Args: 1379 # $shouldForce: Whether to continue processing if an unexpected 1380 # state occurs. 1381 # @diffHashRefs: An array of references to %diffHashes. 1382 # See the %diffHash documentation above. 1383 # 1384 # Returns $preparedPatchHashRef: 1385 # copyDiffHashRefs: A reference to an array of the $diffHashRefs in 1386 # @diffHashRefs that represent file copies. The original 1387 # ordering is preserved. 1388 # nonCopyDiffHashRefs: A reference to an array of the $diffHashRefs in 1389 # @diffHashRefs that do not represent file copies. 1390 # The original ordering is preserved. 1391 # sourceRevisionHash: A reference to a hash of source path to source 1392 # revision number. 1393 sub prepareParsedPatch($@) 1394 { 1395 my ($shouldForce, @diffHashRefs) = @_; 1396 1397 my %copiedFiles; 1398 1399 # Return values 1400 my @copyDiffHashRefs = (); 1401 my @nonCopyDiffHashRefs = (); 1402 my %sourceRevisionHash = (); 1403 for my $diffHashRef (@diffHashRefs) { 1404 my $copiedFromPath = $diffHashRef->{copiedFromPath}; 1405 my $indexPath = $diffHashRef->{indexPath}; 1406 my $sourceRevision = $diffHashRef->{sourceRevision}; 1407 my $sourcePath; 1408 1409 if (defined($copiedFromPath)) { 1410 # Then the diff is a copy operation. 1411 $sourcePath = $copiedFromPath; 1412 1413 # FIXME: Consider printing a warning or exiting if 1414 # exists($copiedFiles{$indexPath}) is true -- i.e. if 1415 # $indexPath appears twice as a copy target. 1416 $copiedFiles{$indexPath} = $sourcePath; 1417 1418 push @copyDiffHashRefs, $diffHashRef; 1419 } else { 1420 # Then the diff is not a copy operation. 1421 $sourcePath = $indexPath; 1422 1423 push @nonCopyDiffHashRefs, $diffHashRef; 1424 } 1425 1426 if (defined($sourceRevision)) { 1427 if (exists($sourceRevisionHash{$sourcePath}) && 1428 ($sourceRevisionHash{$sourcePath} != $sourceRevision)) { 1429 if (!$shouldForce) { 1430 die "Two revisions of the same file required as a source:\n". 1431 " $sourcePath:$sourceRevisionHash{$sourcePath}\n". 1432 " $sourcePath:$sourceRevision"; 1433 } 1434 } 1435 $sourceRevisionHash{$sourcePath} = $sourceRevision; 1436 } 1437 } 1438 1439 my %preparedPatchHash; 1440 1441 $preparedPatchHash{copyDiffHashRefs} = \@copyDiffHashRefs; 1442 $preparedPatchHash{nonCopyDiffHashRefs} = \@nonCopyDiffHashRefs; 1443 $preparedPatchHash{sourceRevisionHash} = \%sourceRevisionHash; 1444 1445 return \%preparedPatchHash; 1446 } 1447 1448 # Return localtime() for the project's time zone, given an integer time as 1449 # returned by Perl's time() function. 1450 sub localTimeInProjectTimeZone($) 1451 { 1452 my $epochTime = shift; 1453 1454 # Change the time zone temporarily for the localtime() call. 1455 my $savedTimeZone = $ENV{'TZ'}; 1456 $ENV{'TZ'} = $changeLogTimeZone; 1457 my @localTime = localtime($epochTime); 1458 if (defined $savedTimeZone) { 1459 $ENV{'TZ'} = $savedTimeZone; 1460 } else { 1461 delete $ENV{'TZ'}; 1462 } 1463 1464 return @localTime; 1465 } 1466 1467 # Set the reviewer and date in a ChangeLog patch, and return the new patch. 1468 # 1469 # Args: 1470 # $patch: a ChangeLog patch as a string. 1471 # $reviewer: the name of the reviewer, or undef if the reviewer should not be set. 1472 # $epochTime: an integer time as returned by Perl's time() function. 1473 sub setChangeLogDateAndReviewer($$$) 1474 { 1475 my ($patch, $reviewer, $epochTime) = @_; 1476 1477 my @localTime = localTimeInProjectTimeZone($epochTime); 1478 my $newDate = strftime("%Y-%m-%d", @localTime); 1479 1480 my $firstChangeLogLineRegEx = qr#(\n\+)\d{4}-[^-]{2}-[^-]{2}( )#; 1481 $patch =~ s/$firstChangeLogLineRegEx/$1$newDate$2/; 1482 1483 if (defined($reviewer)) { 1484 # We include a leading plus ("+") in the regular expression to make 1485 # the regular expression less likely to match text in the leading junk 1486 # for the patch, if the patch has leading junk. 1487 $patch =~ s/(\n\+.*)NOBODY \(OOPS!\)/$1$reviewer/; 1488 } 1489 1490 return $patch; 1491 } 1492 1493 # If possible, returns a ChangeLog patch equivalent to the given one, 1494 # but with the newest ChangeLog entry inserted at the top of the 1495 # file -- i.e. no leading context and all lines starting with "+". 1496 # 1497 # If given a patch string not representable as a patch with the above 1498 # properties, it returns the input back unchanged. 1499 # 1500 # WARNING: This subroutine can return an inequivalent patch string if 1501 # both the beginning of the new ChangeLog file matches the beginning 1502 # of the source ChangeLog, and the source beginning was modified. 1503 # Otherwise, it is guaranteed to return an equivalent patch string, 1504 # if it returns. 1505 # 1506 # Applying this subroutine to ChangeLog patches allows svn-apply to 1507 # insert new ChangeLog entries at the top of the ChangeLog file. 1508 # svn-apply uses patch with --fuzz=3 to do this. We need to apply 1509 # this subroutine because the diff(1) command is greedy when matching 1510 # lines. A new ChangeLog entry with the same date and author as the 1511 # previous will match and cause the diff to have lines of starting 1512 # context. 1513 # 1514 # This subroutine has unit tests in VCSUtils_unittest.pl. 1515 # 1516 # Returns $changeLogHashRef: 1517 # $changeLogHashRef: a hash reference representing a change log patch. 1518 # patch: a ChangeLog patch equivalent to the given one, but with the 1519 # newest ChangeLog entry inserted at the top of the file, if possible. 1520 sub fixChangeLogPatch($) 1521 { 1522 my $patch = shift; # $patch will only contain patch fragments for ChangeLog. 1523 1524 $patch =~ s|test_expectations.txt:|TestExpectations:|g; 1525 1526 $patch =~ /(\r?\n)/; 1527 my $lineEnding = $1; 1528 my @lines = split(/$lineEnding/, $patch); 1529 1530 my $i = 0; # We reuse the same index throughout. 1531 1532 # Skip to beginning of first chunk. 1533 for (; $i < @lines; ++$i) { 1534 if (substr($lines[$i], 0, 1) eq "@") { 1535 last; 1536 } 1537 } 1538 my $chunkStartIndex = ++$i; 1539 my %changeLogHashRef; 1540 1541 # Optimization: do not process if new lines already begin the chunk. 1542 if (substr($lines[$i], 0, 1) eq "+") { 1543 $changeLogHashRef{patch} = $patch; 1544 return \%changeLogHashRef; 1545 } 1546 1547 # Skip to first line of newly added ChangeLog entry. 1548 # For example, +2009-06-03 Eric Seidel <eric (at] webkit.org> 1549 my $dateStartRegEx = '^\+(\d{4}-\d{2}-\d{2})' # leading "+" and date 1550 . '\s+(.+)\s+' # name 1551 . '<([^<>]+)>$'; # e-mail address 1552 1553 for (; $i < @lines; ++$i) { 1554 my $line = $lines[$i]; 1555 my $firstChar = substr($line, 0, 1); 1556 if ($line =~ /$dateStartRegEx/) { 1557 last; 1558 } elsif ($firstChar eq " " or $firstChar eq "+") { 1559 next; 1560 } 1561 $changeLogHashRef{patch} = $patch; # Do not change if, for example, "-" or "@" found. 1562 return \%changeLogHashRef; 1563 } 1564 if ($i >= @lines) { 1565 $changeLogHashRef{patch} = $patch; # Do not change if date not found. 1566 return \%changeLogHashRef; 1567 } 1568 my $dateStartIndex = $i; 1569 1570 # Rewrite overlapping lines to lead with " ". 1571 my @overlappingLines = (); # These will include a leading "+". 1572 for (; $i < @lines; ++$i) { 1573 my $line = $lines[$i]; 1574 if (substr($line, 0, 1) ne "+") { 1575 last; 1576 } 1577 push(@overlappingLines, $line); 1578 $lines[$i] = " " . substr($line, 1); 1579 } 1580 1581 # Remove excess ending context, if necessary. 1582 my $shouldTrimContext = 1; 1583 for (; $i < @lines; ++$i) { 1584 my $firstChar = substr($lines[$i], 0, 1); 1585 if ($firstChar eq " ") { 1586 next; 1587 } elsif ($firstChar eq "@") { 1588 last; 1589 } 1590 $shouldTrimContext = 0; # For example, if "+" or "-" encountered. 1591 last; 1592 } 1593 my $deletedLineCount = 0; 1594 if ($shouldTrimContext) { # Also occurs if end of file reached. 1595 splice(@lines, $i - @overlappingLines, @overlappingLines); 1596 $deletedLineCount = @overlappingLines; 1597 } 1598 1599 # Work backwards, shifting overlapping lines towards front 1600 # while checking that patch stays equivalent. 1601 for ($i = $dateStartIndex - 1; @overlappingLines && $i >= $chunkStartIndex; --$i) { 1602 my $line = $lines[$i]; 1603 if (substr($line, 0, 1) ne " ") { 1604 next; 1605 } 1606 my $text = substr($line, 1); 1607 my $newLine = pop(@overlappingLines); 1608 if ($text ne substr($newLine, 1)) { 1609 $changeLogHashRef{patch} = $patch; # Unexpected difference. 1610 return \%changeLogHashRef; 1611 } 1612 $lines[$i] = "+$text"; 1613 } 1614 1615 # If @overlappingLines > 0, this is where we make use of the 1616 # assumption that the beginning of the source file was not modified. 1617 splice(@lines, $chunkStartIndex, 0, @overlappingLines); 1618 1619 # Update the date start index as it may have changed after shifting 1620 # the overlapping lines towards the front. 1621 for ($i = $chunkStartIndex; $i < $dateStartIndex; ++$i) { 1622 $dateStartIndex = $i if $lines[$i] =~ /$dateStartRegEx/; 1623 } 1624 splice(@lines, $chunkStartIndex, $dateStartIndex - $chunkStartIndex); # Remove context of later entry. 1625 $deletedLineCount += $dateStartIndex - $chunkStartIndex; 1626 1627 # Update the initial chunk range. 1628 my $chunkRangeHashRef = parseChunkRange($lines[$chunkStartIndex - 1]); 1629 if (!$chunkRangeHashRef) { 1630 # FIXME: Handle errors differently from ChangeLog files that 1631 # are okay but should not be altered. That way we can find out 1632 # if improvements to the script ever become necessary. 1633 $changeLogHashRef{patch} = $patch; # Error: unexpected patch string format. 1634 return \%changeLogHashRef; 1635 } 1636 my $oldSourceLineCount = $chunkRangeHashRef->{lineCount}; 1637 my $oldTargetLineCount = $chunkRangeHashRef->{newLineCount}; 1638 1639 my $sourceLineCount = $oldSourceLineCount + @overlappingLines - $deletedLineCount; 1640 my $targetLineCount = $oldTargetLineCount + @overlappingLines - $deletedLineCount; 1641 $lines[$chunkStartIndex - 1] = "@@ -1,$sourceLineCount +1,$targetLineCount @@"; 1642 1643 $changeLogHashRef{patch} = join($lineEnding, @lines) . "\n"; # patch(1) expects an extra trailing newline. 1644 return \%changeLogHashRef; 1645 } 1646 1647 # This is a supporting method for runPatchCommand. 1648 # 1649 # Arg: the optional $args parameter passed to runPatchCommand (can be undefined). 1650 # 1651 # Returns ($patchCommand, $isForcing). 1652 # 1653 # This subroutine has unit tests in VCSUtils_unittest.pl. 1654 sub generatePatchCommand($) 1655 { 1656 my ($passedArgsHashRef) = @_; 1657 1658 my $argsHashRef = { # Defaults 1659 ensureForce => 0, 1660 shouldReverse => 0, 1661 options => [] 1662 }; 1663 1664 # Merges hash references. It's okay here if passed hash reference is undefined. 1665 @{$argsHashRef}{keys %{$passedArgsHashRef}} = values %{$passedArgsHashRef}; 1666 1667 my $ensureForce = $argsHashRef->{ensureForce}; 1668 my $shouldReverse = $argsHashRef->{shouldReverse}; 1669 my $options = $argsHashRef->{options}; 1670 1671 if (! $options) { 1672 $options = []; 1673 } else { 1674 $options = [@{$options}]; # Copy to avoid side effects. 1675 } 1676 1677 my $isForcing = 0; 1678 if (grep /^--force$/, @{$options}) { 1679 $isForcing = 1; 1680 } elsif ($ensureForce) { 1681 push @{$options}, "--force"; 1682 $isForcing = 1; 1683 } 1684 1685 if ($shouldReverse) { # No check: --reverse should never be passed explicitly. 1686 push @{$options}, "--reverse"; 1687 } 1688 1689 @{$options} = sort(@{$options}); # For easier testing. 1690 1691 my $patchCommand = join(" ", "patch -p0", @{$options}); 1692 1693 return ($patchCommand, $isForcing); 1694 } 1695 1696 # Apply the given patch using the patch(1) command. 1697 # 1698 # On success, return the resulting exit status. Otherwise, exit with the 1699 # exit status. If "--force" is passed as an option, however, then never 1700 # exit and always return the exit status. 1701 # 1702 # Args: 1703 # $patch: a patch string. 1704 # $repositoryRootPath: an absolute path to the repository root. 1705 # $pathRelativeToRoot: the path of the file to be patched, relative to the 1706 # repository root. This should normally be the path 1707 # found in the patch's "Index:" line. It is passed 1708 # explicitly rather than reparsed from the patch 1709 # string for optimization purposes. 1710 # This is used only for error reporting. The 1711 # patch command gleans the actual file to patch 1712 # from the patch string. 1713 # $args: a reference to a hash of optional arguments. The possible 1714 # keys are -- 1715 # ensureForce: whether to ensure --force is passed (defaults to 0). 1716 # shouldReverse: whether to pass --reverse (defaults to 0). 1717 # options: a reference to an array of options to pass to the 1718 # patch command. The subroutine passes the -p0 option 1719 # no matter what. This should not include --reverse. 1720 # 1721 # This subroutine has unit tests in VCSUtils_unittest.pl. 1722 sub runPatchCommand($$$;$) 1723 { 1724 my ($patch, $repositoryRootPath, $pathRelativeToRoot, $args) = @_; 1725 1726 my ($patchCommand, $isForcing) = generatePatchCommand($args); 1727 1728 # Temporarily change the working directory since the path found 1729 # in the patch's "Index:" line is relative to the repository root 1730 # (i.e. the same as $pathRelativeToRoot). 1731 my $cwd = Cwd::getcwd(); 1732 chdir $repositoryRootPath; 1733 1734 open PATCH, "| $patchCommand" or die "Could not call \"$patchCommand\" for file \"$pathRelativeToRoot\": $!"; 1735 print PATCH $patch; 1736 close PATCH; 1737 my $exitStatus = exitStatus($?); 1738 1739 chdir $cwd; 1740 1741 if ($exitStatus && !$isForcing) { 1742 print "Calling \"$patchCommand\" for file \"$pathRelativeToRoot\" returned " . 1743 "status $exitStatus. Pass --force to ignore patch failures.\n"; 1744 exit $exitStatus; 1745 } 1746 1747 return $exitStatus; 1748 } 1749 1750 # Merge ChangeLog patches using a three-file approach. 1751 # 1752 # This is used by resolve-ChangeLogs when it's operated as a merge driver 1753 # and when it's used to merge conflicts after a patch is applied or after 1754 # an svn update. 1755 # 1756 # It's also used for traditional rejected patches. 1757 # 1758 # Args: 1759 # $fileMine: The merged version of the file. Also known in git as the 1760 # other branch's version (%B) or "ours". 1761 # For traditional patch rejects, this is the *.rej file. 1762 # $fileOlder: The base version of the file. Also known in git as the 1763 # ancestor version (%O) or "base". 1764 # For traditional patch rejects, this is the *.orig file. 1765 # $fileNewer: The current version of the file. Also known in git as the 1766 # current version (%A) or "theirs". 1767 # For traditional patch rejects, this is the original-named 1768 # file. 1769 # 1770 # Returns 1 if merge was successful, else 0. 1771 sub mergeChangeLogs($$$) 1772 { 1773 my ($fileMine, $fileOlder, $fileNewer) = @_; 1774 1775 my $traditionalReject = $fileMine =~ /\.rej$/ ? 1 : 0; 1776 1777 local $/ = undef; 1778 1779 my $patch; 1780 if ($traditionalReject) { 1781 open(DIFF, "<", $fileMine) or die $!; 1782 $patch = <DIFF>; 1783 close(DIFF); 1784 rename($fileMine, "$fileMine.save"); 1785 rename($fileOlder, "$fileOlder.save"); 1786 } else { 1787 open(DIFF, "diff -u -a --binary \"$fileOlder\" \"$fileMine\" |") or die $!; 1788 $patch = <DIFF>; 1789 close(DIFF); 1790 } 1791 1792 unlink("${fileNewer}.orig"); 1793 unlink("${fileNewer}.rej"); 1794 1795 open(PATCH, "| patch --force --fuzz=3 --binary \"$fileNewer\" > " . File::Spec->devnull()) or die $!; 1796 if ($traditionalReject) { 1797 print PATCH $patch; 1798 } else { 1799 my $changeLogHash = fixChangeLogPatch($patch); 1800 print PATCH $changeLogHash->{patch}; 1801 } 1802 close(PATCH); 1803 1804 my $result = !exitStatus($?); 1805 1806 # Refuse to merge the patch if it did not apply cleanly 1807 if (-e "${fileNewer}.rej") { 1808 unlink("${fileNewer}.rej"); 1809 if (-f "${fileNewer}.orig") { 1810 unlink($fileNewer); 1811 rename("${fileNewer}.orig", $fileNewer); 1812 } 1813 } else { 1814 unlink("${fileNewer}.orig"); 1815 } 1816 1817 if ($traditionalReject) { 1818 rename("$fileMine.save", $fileMine); 1819 rename("$fileOlder.save", $fileOlder); 1820 } 1821 1822 return $result; 1823 } 1824 1825 sub gitConfig($) 1826 { 1827 return unless $isGit; 1828 1829 my ($config) = @_; 1830 1831 my $result = `git config $config`; 1832 chomp $result; 1833 return $result; 1834 } 1835 1836 sub changeLogSuffix() 1837 { 1838 my $rootPath = determineVCSRoot(); 1839 my $changeLogSuffixFile = File::Spec->catfile($rootPath, ".changeLogSuffix"); 1840 return "" if ! -e $changeLogSuffixFile; 1841 open FILE, $changeLogSuffixFile or die "Could not open $changeLogSuffixFile: $!"; 1842 my $changeLogSuffix = <FILE>; 1843 chomp $changeLogSuffix; 1844 close FILE; 1845 return $changeLogSuffix; 1846 } 1847 1848 sub changeLogFileName() 1849 { 1850 return "ChangeLog" . changeLogSuffix() 1851 } 1852 1853 sub changeLogNameError($) 1854 { 1855 my ($message) = @_; 1856 print STDERR "$message\nEither:\n"; 1857 print STDERR " set CHANGE_LOG_NAME in your environment\n"; 1858 print STDERR " OR pass --name= on the command line\n"; 1859 print STDERR " OR set REAL_NAME in your environment"; 1860 print STDERR " OR git users can set 'git config user.name'\n"; 1861 exit(1); 1862 } 1863 1864 sub changeLogName() 1865 { 1866 my $name = $ENV{CHANGE_LOG_NAME} || $ENV{REAL_NAME} || gitConfig("user.name") || (split /\s*,\s*/, (getpwuid $<)[6])[0]; 1867 1868 changeLogNameError("Failed to determine ChangeLog name.") unless $name; 1869 # getpwuid seems to always succeed on windows, returning the username instead of the full name. This check will catch that case. 1870 changeLogNameError("'$name' does not contain a space! ChangeLogs should contain your full name.") unless ($name =~ /\S\s\S/); 1871 1872 return $name; 1873 } 1874 1875 sub changeLogEmailAddressError($) 1876 { 1877 my ($message) = @_; 1878 print STDERR "$message\nEither:\n"; 1879 print STDERR " set CHANGE_LOG_EMAIL_ADDRESS in your environment\n"; 1880 print STDERR " OR pass --email= on the command line\n"; 1881 print STDERR " OR set EMAIL_ADDRESS in your environment\n"; 1882 print STDERR " OR git users can set 'git config user.email'\n"; 1883 exit(1); 1884 } 1885 1886 sub changeLogEmailAddress() 1887 { 1888 my $emailAddress = $ENV{CHANGE_LOG_EMAIL_ADDRESS} || $ENV{EMAIL_ADDRESS} || gitConfig("user.email"); 1889 1890 changeLogEmailAddressError("Failed to determine email address for ChangeLog.") unless $emailAddress; 1891 changeLogEmailAddressError("Email address '$emailAddress' does not contain '\@' and is likely invalid.") unless ($emailAddress =~ /\@/); 1892 1893 return $emailAddress; 1894 } 1895 1896 # http://tools.ietf.org/html/rfc1924 1897 sub decodeBase85($) 1898 { 1899 my ($encoded) = @_; 1900 my %table; 1901 my @characters = ('0'..'9', 'A'..'Z', 'a'..'z', '!', '#', '$', '%', '&', '(', ')', '*', '+', '-', ';', '<', '=', '>', '?', '@', '^', '_', '`', '{', '|', '}', '~'); 1902 for (my $i = 0; $i < 85; $i++) { 1903 $table{$characters[$i]} = $i; 1904 } 1905 1906 my $decoded = ''; 1907 my @encodedChars = $encoded =~ /./g; 1908 1909 for (my $encodedIter = 0; defined($encodedChars[$encodedIter]);) { 1910 my $digit = 0; 1911 for (my $i = 0; $i < 5; $i++) { 1912 $digit *= 85; 1913 my $char = $encodedChars[$encodedIter]; 1914 $digit += $table{$char}; 1915 $encodedIter++; 1916 } 1917 1918 for (my $i = 0; $i < 4; $i++) { 1919 $decoded .= chr(($digit >> (3 - $i) * 8) & 255); 1920 } 1921 } 1922 1923 return $decoded; 1924 } 1925 1926 sub decodeGitBinaryChunk($$) 1927 { 1928 my ($contents, $fullPath) = @_; 1929 1930 # Load this module lazily in case the user don't have this module 1931 # and won't handle git binary patches. 1932 require Compress::Zlib; 1933 1934 my $encoded = ""; 1935 my $compressedSize = 0; 1936 while ($contents =~ /^([A-Za-z])(.*)$/gm) { 1937 my $line = $2; 1938 next if $line eq ""; 1939 die "$fullPath: unexpected size of a line: $&" if length($2) % 5 != 0; 1940 my $actualSize = length($2) / 5 * 4; 1941 my $encodedExpectedSize = ord($1); 1942 my $expectedSize = $encodedExpectedSize <= ord("Z") ? $encodedExpectedSize - ord("A") + 1 : $encodedExpectedSize - ord("a") + 27; 1943 1944 die "$fullPath: unexpected size of a line: $&" if int(($expectedSize + 3) / 4) * 4 != $actualSize; 1945 $compressedSize += $expectedSize; 1946 $encoded .= $line; 1947 } 1948 1949 my $compressed = decodeBase85($encoded); 1950 $compressed = substr($compressed, 0, $compressedSize); 1951 return Compress::Zlib::uncompress($compressed); 1952 } 1953 1954 sub decodeGitBinaryPatch($$) 1955 { 1956 my ($contents, $fullPath) = @_; 1957 1958 # Git binary patch has two chunks. One is for the normal patching 1959 # and another is for the reverse patching. 1960 # 1961 # Each chunk a line which starts from either "literal" or "delta", 1962 # followed by a number which specifies decoded size of the chunk. 1963 # 1964 # Then, content of the chunk comes. To decode the content, we 1965 # need decode it with base85 first, and then zlib. 1966 my $gitPatchRegExp = '(literal|delta) ([0-9]+)\n([A-Za-z0-9!#$%&()*+-;<=>?@^_`{|}~\\n]*?)\n\n'; 1967 if ($contents !~ m"\nGIT binary patch\n$gitPatchRegExp$gitPatchRegExp\Z") { 1968 die "$fullPath: unknown git binary patch format" 1969 } 1970 1971 my $binaryChunkType = $1; 1972 my $binaryChunkExpectedSize = $2; 1973 my $encodedChunk = $3; 1974 my $reverseBinaryChunkType = $4; 1975 my $reverseBinaryChunkExpectedSize = $5; 1976 my $encodedReverseChunk = $6; 1977 1978 my $binaryChunk = decodeGitBinaryChunk($encodedChunk, $fullPath); 1979 my $binaryChunkActualSize = length($binaryChunk); 1980 my $reverseBinaryChunk = decodeGitBinaryChunk($encodedReverseChunk, $fullPath); 1981 my $reverseBinaryChunkActualSize = length($reverseBinaryChunk); 1982 1983 die "$fullPath: unexpected size of the first chunk (expected $binaryChunkExpectedSize but was $binaryChunkActualSize" if ($binaryChunkType eq "literal" and $binaryChunkExpectedSize != $binaryChunkActualSize); 1984 die "$fullPath: unexpected size of the second chunk (expected $reverseBinaryChunkExpectedSize but was $reverseBinaryChunkActualSize" if ($reverseBinaryChunkType eq "literal" and $reverseBinaryChunkExpectedSize != $reverseBinaryChunkActualSize); 1985 1986 return ($binaryChunkType, $binaryChunk, $reverseBinaryChunkType, $reverseBinaryChunk); 1987 } 1988 1989 sub readByte($$) 1990 { 1991 my ($data, $location) = @_; 1992 1993 # Return the byte at $location in $data as a numeric value. 1994 return ord(substr($data, $location, 1)); 1995 } 1996 1997 # The git binary delta format is undocumented, except in code: 1998 # - https://github.com/git/git/blob/master/delta.h:get_delta_hdr_size is the source 1999 # of the algorithm in decodeGitBinaryPatchDeltaSize. 2000 # - https://github.com/git/git/blob/master/patch-delta.c:patch_delta is the source 2001 # of the algorithm in applyGitBinaryPatchDelta. 2002 sub decodeGitBinaryPatchDeltaSize($) 2003 { 2004 my ($binaryChunk) = @_; 2005 2006 # Source and destination buffer sizes are stored in 7-bit chunks at the 2007 # start of the binary delta patch data. The highest bit in each byte 2008 # except the last is set; the remaining 7 bits provide the next 2009 # chunk of the size. The chunks are stored in ascending significance 2010 # order. 2011 my $cmd; 2012 my $size = 0; 2013 my $shift = 0; 2014 for (my $i = 0; $i < length($binaryChunk);) { 2015 $cmd = readByte($binaryChunk, $i++); 2016 $size |= ($cmd & 0x7f) << $shift; 2017 $shift += 7; 2018 if (!($cmd & 0x80)) { 2019 return ($size, $i); 2020 } 2021 } 2022 } 2023 2024 sub applyGitBinaryPatchDelta($$) 2025 { 2026 my ($binaryChunk, $originalContents) = @_; 2027 2028 # Git delta format consists of two headers indicating source buffer size 2029 # and result size, then a series of commands. Each command is either 2030 # a copy-from-old-version (the 0x80 bit is set) or a copy-from-delta 2031 # command. Commands are applied sequentially to generate the result. 2032 # 2033 # A copy-from-old-version command encodes an offset and size to copy 2034 # from in subsequent bits, while a copy-from-delta command consists only 2035 # of the number of bytes to copy from the delta. 2036 2037 # We don't use these values, but we need to know how big they are so that 2038 # we can skip to the diff data. 2039 my ($size, $bytesUsed) = decodeGitBinaryPatchDeltaSize($binaryChunk); 2040 $binaryChunk = substr($binaryChunk, $bytesUsed); 2041 ($size, $bytesUsed) = decodeGitBinaryPatchDeltaSize($binaryChunk); 2042 $binaryChunk = substr($binaryChunk, $bytesUsed); 2043 2044 my $out = ""; 2045 for (my $i = 0; $i < length($binaryChunk); ) { 2046 my $cmd = ord(substr($binaryChunk, $i++, 1)); 2047 if ($cmd & 0x80) { 2048 # Extract an offset and size from the delta data, then copy 2049 # $size bytes from $offset in the original data into the output. 2050 my $offset = 0; 2051 my $size = 0; 2052 if ($cmd & 0x01) { $offset = readByte($binaryChunk, $i++); } 2053 if ($cmd & 0x02) { $offset |= readByte($binaryChunk, $i++) << 8; } 2054 if ($cmd & 0x04) { $offset |= readByte($binaryChunk, $i++) << 16; } 2055 if ($cmd & 0x08) { $offset |= readByte($binaryChunk, $i++) << 24; } 2056 if ($cmd & 0x10) { $size = readByte($binaryChunk, $i++); } 2057 if ($cmd & 0x20) { $size |= readByte($binaryChunk, $i++) << 8; } 2058 if ($cmd & 0x40) { $size |= readByte($binaryChunk, $i++) << 16; } 2059 if ($size == 0) { $size = 0x10000; } 2060 $out .= substr($originalContents, $offset, $size); 2061 } elsif ($cmd) { 2062 # Copy $cmd bytes from the delta data into the output. 2063 $out .= substr($binaryChunk, $i, $cmd); 2064 $i += $cmd; 2065 } else { 2066 die "unexpected delta opcode 0"; 2067 } 2068 } 2069 2070 return $out; 2071 } 2072 2073 sub escapeSubversionPath($) 2074 { 2075 my ($path) = @_; 2076 $path .= "@" if $path =~ /@/; 2077 return $path; 2078 } 2079 2080 sub runCommand(@) 2081 { 2082 my @args = @_; 2083 my $pid = open(CHILD, "-|"); 2084 if (!defined($pid)) { 2085 die "Failed to fork(): $!"; 2086 } 2087 if ($pid) { 2088 # Parent process 2089 my $childStdout; 2090 while (<CHILD>) { 2091 $childStdout .= $_; 2092 } 2093 close(CHILD); 2094 my %childOutput; 2095 $childOutput{exitStatus} = exitStatus($?); 2096 $childOutput{stdout} = $childStdout if $childStdout; 2097 return \%childOutput; 2098 } 2099 # Child process 2100 # FIXME: Consider further hardening of this function, including sanitizing the environment. 2101 exec { $args[0] } @args or die "Failed to exec(): $!"; 2102 } 2103 2104 sub gitCommitForSVNRevision 2105 { 2106 my ($svnRevision) = @_; 2107 my $command = "git svn find-rev r" . $svnRevision; 2108 $command = "LC_ALL=C $command" if !isWindows(); 2109 my $gitHash = `$command`; 2110 if (!defined($gitHash)) { 2111 $gitHash = "unknown"; 2112 warn "Unable to determine GIT commit from SVN revision"; 2113 } else { 2114 chop($gitHash); 2115 } 2116 return $gitHash; 2117 } 2118 2119 sub listOfChangedFilesBetweenRevisions 2120 { 2121 my ($sourceDir, $firstRevision, $lastRevision) = @_; 2122 my $command; 2123 2124 if ($firstRevision eq "unknown" or $lastRevision eq "unknown") { 2125 return (); 2126 } 2127 2128 # Some VCS functions don't work from within the build dir, so always 2129 # go to the source dir first. 2130 my $cwd = Cwd::getcwd(); 2131 chdir $sourceDir; 2132 2133 if (isGit()) { 2134 my $firstCommit = gitCommitForSVNRevision($firstRevision); 2135 my $lastCommit = gitCommitForSVNRevision($lastRevision); 2136 $command = "git diff --name-status $firstCommit..$lastCommit"; 2137 } elsif (isSVN()) { 2138 $command = "svn diff --summarize -r $firstRevision:$lastRevision"; 2139 } 2140 2141 my @result = (); 2142 2143 if ($command) { 2144 my $diffOutput = `$command`; 2145 $diffOutput =~ s/^[A-Z]\s+//gm; 2146 @result = split(/[\r\n]+/, $diffOutput); 2147 } 2148 2149 chdir $cwd; 2150 2151 return @result; 2152 } 2153 2154 2155 1; 2156