1 # Copyright (C) 2007, 2008, 2009 Apple Inc. All rights reserved. 2 # Copyright (C) 2009, 2010 Chris Jerdonek (chris.jerdonek (at] gmail.com) 3 # Copyright (C) 2010, 2011 Research In Motion Limited. All rights reserved. 4 # 5 # Redistribution and use in source and binary forms, with or without 6 # modification, are permitted provided that the following conditions 7 # are met: 8 # 9 # 1. Redistributions of source code must retain the above copyright 10 # notice, this list of conditions and the following disclaimer. 11 # 2. Redistributions in binary form must reproduce the above copyright 12 # notice, this list of conditions and the following disclaimer in the 13 # documentation and/or other materials provided with the distribution. 14 # 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of 15 # its contributors may be used to endorse or promote products derived 16 # from this software without specific prior written permission. 17 # 18 # THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY 19 # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 # DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY 22 # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 25 # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 29 # Module to share code to work with various version control systems. 30 package VCSUtils; 31 32 use strict; 33 use warnings; 34 35 use Cwd qw(); # "qw()" prevents warnings about redefining getcwd() with "use POSIX;" 36 use English; # for $POSTMATCH, etc. 37 use File::Basename; 38 use File::Spec; 39 use POSIX; 40 41 BEGIN { 42 use Exporter (); 43 our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS); 44 $VERSION = 1.00; 45 @ISA = qw(Exporter); 46 @EXPORT = qw( 47 &applyGitBinaryPatchDelta 48 &callSilently 49 &canonicalizePath 50 &changeLogEmailAddress 51 &changeLogName 52 &chdirReturningRelativePath 53 &decodeGitBinaryChunk 54 &decodeGitBinaryPatch 55 &determineSVNRoot 56 &determineVCSRoot 57 &exitStatus 58 &fixChangeLogPatch 59 &gitBranch 60 &gitdiff2svndiff 61 &isGit 62 &isGitSVN 63 &isGitBranchBuild 64 &isGitDirectory 65 &isSVN 66 &isSVNDirectory 67 &isSVNVersion16OrNewer 68 &makeFilePathRelative 69 &mergeChangeLogs 70 &normalizePath 71 &parseFirstEOL 72 &parsePatch 73 &pathRelativeToSVNRepositoryRootForPath 74 &prepareParsedPatch 75 &removeEOL 76 &runPatchCommand 77 &scmMoveOrRenameFile 78 &scmToggleExecutableBit 79 &setChangeLogDateAndReviewer 80 &svnRevisionForDirectory 81 &svnStatus 82 &toWindowsLineEndings 83 ); 84 %EXPORT_TAGS = ( ); 85 @EXPORT_OK = (); 86 } 87 88 our @EXPORT_OK; 89 90 my $gitBranch; 91 my $gitRoot; 92 my $isGit; 93 my $isGitSVN; 94 my $isGitBranchBuild; 95 my $isSVN; 96 my $svnVersion; 97 98 # Project time zone for Cupertino, CA, US 99 my $changeLogTimeZone = "PST8PDT"; 100 101 my $chunkRangeRegEx = qr#^\@\@ -(\d+),(\d+) \+\d+,(\d+) \@\@$#; # e.g. @@ -2,6 +2,18 @@ 102 my $gitDiffStartRegEx = qr#^diff --git (\w/)?(.+) (\w/)?([^\r\n]+)#; 103 my $svnDiffStartRegEx = qr#^Index: ([^\r\n]+)#; 104 my $svnPropertiesStartRegEx = qr#^Property changes on: ([^\r\n]+)#; # $1 is normally the same as the index path. 105 my $svnPropertyStartRegEx = qr#^(Modified|Name|Added|Deleted): ([^\r\n]+)#; # $2 is the name of the property. 106 my $svnPropertyValueStartRegEx = qr#^ (\+|-|Merged|Reverse-merged) ([^\r\n]+)#; # $2 is the start of the property's value (which may span multiple lines). 107 108 # This method is for portability. Return the system-appropriate exit 109 # status of a child process. 110 # 111 # Args: pass the child error status returned by the last pipe close, 112 # for example "$?". 113 sub exitStatus($) 114 { 115 my ($returnvalue) = @_; 116 if ($^O eq "MSWin32") { 117 return $returnvalue >> 8; 118 } 119 return WEXITSTATUS($returnvalue); 120 } 121 122 # Call a function while suppressing STDERR, and return the return values 123 # as an array. 124 sub callSilently($@) { 125 my ($func, @args) = @_; 126 127 # The following pattern was taken from here: 128 # http://www.sdsc.edu/~moreland/courses/IntroPerl/docs/manual/pod/perlfunc/open.html 129 # 130 # Also see this Perl documentation (search for "open OLDERR"): 131 # http://perldoc.perl.org/functions/open.html 132 open(OLDERR, ">&STDERR"); 133 close(STDERR); 134 my @returnValue = &$func(@args); 135 open(STDERR, ">&OLDERR"); 136 close(OLDERR); 137 138 return @returnValue; 139 } 140 141 sub toWindowsLineEndings 142 { 143 my ($text) = @_; 144 $text =~ s/\n/\r\n/g; 145 return $text; 146 } 147 148 # Note, this method will not error if the file corresponding to the $source path does not exist. 149 sub scmMoveOrRenameFile 150 { 151 my ($source, $destination) = @_; 152 return if ! -e $source; 153 if (isSVN()) { 154 system("svn", "move", $source, $destination); 155 } elsif (isGit()) { 156 system("git", "mv", $source, $destination); 157 } 158 } 159 160 # Note, this method will not error if the file corresponding to the path does not exist. 161 sub scmToggleExecutableBit 162 { 163 my ($path, $executableBitDelta) = @_; 164 return if ! -e $path; 165 if ($executableBitDelta == 1) { 166 scmAddExecutableBit($path); 167 } elsif ($executableBitDelta == -1) { 168 scmRemoveExecutableBit($path); 169 } 170 } 171 172 sub scmAddExecutableBit($) 173 { 174 my ($path) = @_; 175 176 if (isSVN()) { 177 system("svn", "propset", "svn:executable", "on", $path) == 0 or die "Failed to run 'svn propset svn:executable on $path'."; 178 } elsif (isGit()) { 179 chmod(0755, $path); 180 } 181 } 182 183 sub scmRemoveExecutableBit($) 184 { 185 my ($path) = @_; 186 187 if (isSVN()) { 188 system("svn", "propdel", "svn:executable", $path) == 0 or die "Failed to run 'svn propdel svn:executable $path'."; 189 } elsif (isGit()) { 190 chmod(0664, $path); 191 } 192 } 193 194 sub isGitDirectory($) 195 { 196 my ($dir) = @_; 197 return system("cd $dir && git rev-parse > " . File::Spec->devnull() . " 2>&1") == 0; 198 } 199 200 sub isGit() 201 { 202 return $isGit if defined $isGit; 203 204 $isGit = isGitDirectory("."); 205 return $isGit; 206 } 207 208 sub isGitSVN() 209 { 210 return $isGitSVN if defined $isGitSVN; 211 212 # There doesn't seem to be an officially documented way to determine 213 # if you're in a git-svn checkout. The best suggestions seen so far 214 # all use something like the following: 215 my $output = `git config --get svn-remote.svn.fetch 2>& 1`; 216 $isGitSVN = $output ne ''; 217 return $isGitSVN; 218 } 219 220 sub gitBranch() 221 { 222 unless (defined $gitBranch) { 223 chomp($gitBranch = `git symbolic-ref -q HEAD`); 224 $gitBranch = "" if exitStatus($?); 225 $gitBranch =~ s#^refs/heads/##; 226 $gitBranch = "" if $gitBranch eq "master"; 227 } 228 229 return $gitBranch; 230 } 231 232 sub isGitBranchBuild() 233 { 234 my $branch = gitBranch(); 235 chomp(my $override = `git config --bool branch.$branch.webKitBranchBuild`); 236 return 1 if $override eq "true"; 237 return 0 if $override eq "false"; 238 239 unless (defined $isGitBranchBuild) { 240 chomp(my $gitBranchBuild = `git config --bool core.webKitBranchBuild`); 241 $isGitBranchBuild = $gitBranchBuild eq "true"; 242 } 243 244 return $isGitBranchBuild; 245 } 246 247 sub isSVNDirectory($) 248 { 249 my ($dir) = @_; 250 251 return -d File::Spec->catdir($dir, ".svn"); 252 } 253 254 sub isSVN() 255 { 256 return $isSVN if defined $isSVN; 257 258 $isSVN = isSVNDirectory("."); 259 return $isSVN; 260 } 261 262 sub svnVersion() 263 { 264 return $svnVersion if defined $svnVersion; 265 266 if (!isSVN()) { 267 $svnVersion = 0; 268 } else { 269 chomp($svnVersion = `svn --version --quiet`); 270 } 271 return $svnVersion; 272 } 273 274 sub isSVNVersion16OrNewer() 275 { 276 my $version = svnVersion(); 277 return eval "v$version" ge v1.6; 278 } 279 280 sub chdirReturningRelativePath($) 281 { 282 my ($directory) = @_; 283 my $previousDirectory = Cwd::getcwd(); 284 chdir $directory; 285 my $newDirectory = Cwd::getcwd(); 286 return "." if $newDirectory eq $previousDirectory; 287 return File::Spec->abs2rel($previousDirectory, $newDirectory); 288 } 289 290 sub determineGitRoot() 291 { 292 chomp(my $gitDir = `git rev-parse --git-dir`); 293 return dirname($gitDir); 294 } 295 296 sub determineSVNRoot() 297 { 298 my $last = ''; 299 my $path = '.'; 300 my $parent = '..'; 301 my $repositoryRoot; 302 my $repositoryUUID; 303 while (1) { 304 my $thisRoot; 305 my $thisUUID; 306 # Ignore error messages in case we've run past the root of the checkout. 307 open INFO, "svn info '$path' 2> " . File::Spec->devnull() . " |" or die; 308 while (<INFO>) { 309 if (/^Repository Root: (.+)/) { 310 $thisRoot = $1; 311 } 312 if (/^Repository UUID: (.+)/) { 313 $thisUUID = $1; 314 } 315 if ($thisRoot && $thisUUID) { 316 local $/ = undef; 317 <INFO>; # Consume the rest of the input. 318 } 319 } 320 close INFO; 321 322 # It's possible (e.g. for developers of some ports) to have a WebKit 323 # checkout in a subdirectory of another checkout. So abort if the 324 # repository root or the repository UUID suddenly changes. 325 last if !$thisUUID; 326 $repositoryUUID = $thisUUID if !$repositoryUUID; 327 last if $thisUUID ne $repositoryUUID; 328 329 last if !$thisRoot; 330 $repositoryRoot = $thisRoot if !$repositoryRoot; 331 last if $thisRoot ne $repositoryRoot; 332 333 $last = $path; 334 $path = File::Spec->catdir($parent, $path); 335 } 336 337 return File::Spec->rel2abs($last); 338 } 339 340 sub determineVCSRoot() 341 { 342 if (isGit()) { 343 return determineGitRoot(); 344 } 345 346 if (!isSVN()) { 347 # Some users have a workflow where svn-create-patch, svn-apply and 348 # svn-unapply are used outside of multiple svn working directores, 349 # so warn the user and assume Subversion is being used in this case. 350 warn "Unable to determine VCS root; assuming Subversion"; 351 $isSVN = 1; 352 } 353 354 return determineSVNRoot(); 355 } 356 357 sub svnRevisionForDirectory($) 358 { 359 my ($dir) = @_; 360 my $revision; 361 362 if (isSVNDirectory($dir)) { 363 my $svnInfo = `LC_ALL=C svn info $dir | grep Revision:`; 364 ($revision) = ($svnInfo =~ m/Revision: (\d+).*/g); 365 } elsif (isGitDirectory($dir)) { 366 my $gitLog = `cd $dir && LC_ALL=C git log --grep='git-svn-id: ' -n 1 | grep git-svn-id:`; 367 ($revision) = ($gitLog =~ m/ +git-svn-id: .+@(\d+) /g); 368 } 369 die "Unable to determine current SVN revision in $dir" unless (defined $revision); 370 return $revision; 371 } 372 373 sub pathRelativeToSVNRepositoryRootForPath($) 374 { 375 my ($file) = @_; 376 my $relativePath = File::Spec->abs2rel($file); 377 378 my $svnInfo; 379 if (isSVN()) { 380 $svnInfo = `LC_ALL=C svn info $relativePath`; 381 } elsif (isGit()) { 382 $svnInfo = `LC_ALL=C git svn info $relativePath`; 383 } 384 385 $svnInfo =~ /.*^URL: (.*?)$/m; 386 my $svnURL = $1; 387 388 $svnInfo =~ /.*^Repository Root: (.*?)$/m; 389 my $repositoryRoot = $1; 390 391 $svnURL =~ s/$repositoryRoot\///; 392 return $svnURL; 393 } 394 395 sub makeFilePathRelative($) 396 { 397 my ($path) = @_; 398 return $path unless isGit(); 399 400 unless (defined $gitRoot) { 401 chomp($gitRoot = `git rev-parse --show-cdup`); 402 } 403 return $gitRoot . $path; 404 } 405 406 sub normalizePath($) 407 { 408 my ($path) = @_; 409 $path =~ s/\\/\//g; 410 return $path; 411 } 412 413 sub adjustPathForRecentRenamings($) 414 { 415 my ($fullPath) = @_; 416 417 if ($fullPath =~ m|^WebCore/| 418 || $fullPath =~ m|^JavaScriptCore/| 419 || $fullPath =~ m|^WebKit/| 420 || $fullPath =~ m|^WebKit2/|) { 421 return "Source/$fullPath"; 422 } 423 return $fullPath; 424 } 425 426 sub canonicalizePath($) 427 { 428 my ($file) = @_; 429 430 # Remove extra slashes and '.' directories in path 431 $file = File::Spec->canonpath($file); 432 433 # Remove '..' directories in path 434 my @dirs = (); 435 foreach my $dir (File::Spec->splitdir($file)) { 436 if ($dir eq '..' && $#dirs >= 0 && $dirs[$#dirs] ne '..') { 437 pop(@dirs); 438 } else { 439 push(@dirs, $dir); 440 } 441 } 442 return ($#dirs >= 0) ? File::Spec->catdir(@dirs) : "."; 443 } 444 445 sub removeEOL($) 446 { 447 my ($line) = @_; 448 return "" unless $line; 449 450 $line =~ s/[\r\n]+$//g; 451 return $line; 452 } 453 454 sub parseFirstEOL($) 455 { 456 my ($fileHandle) = @_; 457 458 # Make input record separator the new-line character to simplify regex matching below. 459 my $savedInputRecordSeparator = $INPUT_RECORD_SEPARATOR; 460 $INPUT_RECORD_SEPARATOR = "\n"; 461 my $firstLine = <$fileHandle>; 462 $INPUT_RECORD_SEPARATOR = $savedInputRecordSeparator; 463 464 return unless defined($firstLine); 465 466 my $eol; 467 if ($firstLine =~ /\r\n/) { 468 $eol = "\r\n"; 469 } elsif ($firstLine =~ /\r/) { 470 $eol = "\r"; 471 } elsif ($firstLine =~ /\n/) { 472 $eol = "\n"; 473 } 474 return $eol; 475 } 476 477 sub firstEOLInFile($) 478 { 479 my ($file) = @_; 480 my $eol; 481 if (open(FILE, $file)) { 482 $eol = parseFirstEOL(*FILE); 483 close(FILE); 484 } 485 return $eol; 486 } 487 488 sub svnStatus($) 489 { 490 my ($fullPath) = @_; 491 my $svnStatus; 492 open SVN, "svn status --non-interactive --non-recursive '$fullPath' |" or die; 493 if (-d $fullPath) { 494 # When running "svn stat" on a directory, we can't assume that only one 495 # status will be returned (since any files with a status below the 496 # directory will be returned), and we can't assume that the directory will 497 # be first (since any files with unknown status will be listed first). 498 my $normalizedFullPath = File::Spec->catdir(File::Spec->splitdir($fullPath)); 499 while (<SVN>) { 500 # Input may use a different EOL sequence than $/, so avoid chomp. 501 $_ = removeEOL($_); 502 my $normalizedStatPath = File::Spec->catdir(File::Spec->splitdir(substr($_, 7))); 503 if ($normalizedFullPath eq $normalizedStatPath) { 504 $svnStatus = "$_\n"; 505 last; 506 } 507 } 508 # Read the rest of the svn command output to avoid a broken pipe warning. 509 local $/ = undef; 510 <SVN>; 511 } 512 else { 513 # Files will have only one status returned. 514 $svnStatus = removeEOL(<SVN>) . "\n"; 515 } 516 close SVN; 517 return $svnStatus; 518 } 519 520 # Return whether the given file mode is executable in the source control 521 # sense. We make this determination based on whether the executable bit 522 # is set for "others" rather than the stronger condition that it be set 523 # for the user, group, and others. This is sufficient for distinguishing 524 # the default behavior in Git and SVN. 525 # 526 # Args: 527 # $fileMode: A number or string representing a file mode in octal notation. 528 sub isExecutable($) 529 { 530 my $fileMode = shift; 531 532 return $fileMode % 2; 533 } 534 535 # Parse the next Git diff header from the given file handle, and advance 536 # the handle so the last line read is the first line after the header. 537 # 538 # This subroutine dies if given leading junk. 539 # 540 # Args: 541 # $fileHandle: advanced so the last line read from the handle is the first 542 # line of the header to parse. This should be a line 543 # beginning with "diff --git". 544 # $line: the line last read from $fileHandle 545 # 546 # Returns ($headerHashRef, $lastReadLine): 547 # $headerHashRef: a hash reference representing a diff header, as follows-- 548 # copiedFromPath: the path from which the file was copied or moved if 549 # the diff is a copy or move. 550 # executableBitDelta: the value 1 or -1 if the executable bit was added or 551 # removed, respectively. New and deleted files have 552 # this value only if the file is executable, in which 553 # case the value is 1 and -1, respectively. 554 # indexPath: the path of the target file. 555 # isBinary: the value 1 if the diff is for a binary file. 556 # isDeletion: the value 1 if the diff is a file deletion. 557 # isCopyWithChanges: the value 1 if the file was copied or moved and 558 # the target file was changed in some way after being 559 # copied or moved (e.g. if its contents or executable 560 # bit were changed). 561 # isNew: the value 1 if the diff is for a new file. 562 # shouldDeleteSource: the value 1 if the file was copied or moved and 563 # the source file was deleted -- i.e. if the copy 564 # was actually a move. 565 # svnConvertedText: the header text with some lines converted to SVN 566 # format. Git-specific lines are preserved. 567 # $lastReadLine: the line last read from $fileHandle. 568 sub parseGitDiffHeader($$) 569 { 570 my ($fileHandle, $line) = @_; 571 572 $_ = $line; 573 574 my $indexPath; 575 if (/$gitDiffStartRegEx/) { 576 # The first and second paths can differ in the case of copies 577 # and renames. We use the second file path because it is the 578 # destination path. 579 $indexPath = adjustPathForRecentRenamings($4); 580 # Use $POSTMATCH to preserve the end-of-line character. 581 $_ = "Index: $indexPath$POSTMATCH"; # Convert to SVN format. 582 } else { 583 die("Could not parse leading \"diff --git\" line: \"$line\"."); 584 } 585 586 my $copiedFromPath; 587 my $foundHeaderEnding; 588 my $isBinary; 589 my $isDeletion; 590 my $isNew; 591 my $newExecutableBit = 0; 592 my $oldExecutableBit = 0; 593 my $shouldDeleteSource = 0; 594 my $similarityIndex = 0; 595 my $svnConvertedText; 596 while (1) { 597 # Temporarily strip off any end-of-line characters to simplify 598 # regex matching below. 599 s/([\n\r]+)$//; 600 my $eol = $1; 601 602 if (/^(deleted file|old) mode (\d+)/) { 603 $oldExecutableBit = (isExecutable($2) ? 1 : 0); 604 $isDeletion = 1 if $1 eq "deleted file"; 605 } elsif (/^new( file)? mode (\d+)/) { 606 $newExecutableBit = (isExecutable($2) ? 1 : 0); 607 $isNew = 1 if $1; 608 } elsif (/^similarity index (\d+)%/) { 609 $similarityIndex = $1; 610 } elsif (/^copy from (\S+)/) { 611 $copiedFromPath = $1; 612 } elsif (/^rename from (\S+)/) { 613 # FIXME: Record this as a move rather than as a copy-and-delete. 614 # This will simplify adding rename support to svn-unapply. 615 # Otherwise, the hash for a deletion would have to know 616 # everything about the file being deleted in order to 617 # support undoing itself. Recording as a move will also 618 # permit us to use "svn move" and "git move". 619 $copiedFromPath = $1; 620 $shouldDeleteSource = 1; 621 } elsif (/^--- \S+/) { 622 $_ = "--- $indexPath"; # Convert to SVN format. 623 } elsif (/^\+\+\+ \S+/) { 624 $_ = "+++ $indexPath"; # Convert to SVN format. 625 $foundHeaderEnding = 1; 626 } elsif (/^GIT binary patch$/ ) { 627 $isBinary = 1; 628 $foundHeaderEnding = 1; 629 # The "git diff" command includes a line of the form "Binary files 630 # <path1> and <path2> differ" if the --binary flag is not used. 631 } elsif (/^Binary files / ) { 632 die("Error: the Git diff contains a binary file without the binary data in ". 633 "line: \"$_\". Be sure to use the --binary flag when invoking \"git diff\" ". 634 "with diffs containing binary files."); 635 } 636 637 $svnConvertedText .= "$_$eol"; # Also restore end-of-line characters. 638 639 $_ = <$fileHandle>; # Not defined if end-of-file reached. 640 641 last if (!defined($_) || /$gitDiffStartRegEx/ || $foundHeaderEnding); 642 } 643 644 my $executableBitDelta = $newExecutableBit - $oldExecutableBit; 645 646 my %header; 647 648 $header{copiedFromPath} = $copiedFromPath if $copiedFromPath; 649 $header{executableBitDelta} = $executableBitDelta if $executableBitDelta; 650 $header{indexPath} = $indexPath; 651 $header{isBinary} = $isBinary if $isBinary; 652 $header{isCopyWithChanges} = 1 if ($copiedFromPath && ($similarityIndex != 100 || $executableBitDelta)); 653 $header{isDeletion} = $isDeletion if $isDeletion; 654 $header{isNew} = $isNew if $isNew; 655 $header{shouldDeleteSource} = $shouldDeleteSource if $shouldDeleteSource; 656 $header{svnConvertedText} = $svnConvertedText; 657 658 return (\%header, $_); 659 } 660 661 # Parse the next SVN diff header from the given file handle, and advance 662 # the handle so the last line read is the first line after the header. 663 # 664 # This subroutine dies if given leading junk or if it could not detect 665 # the end of the header block. 666 # 667 # Args: 668 # $fileHandle: advanced so the last line read from the handle is the first 669 # line of the header to parse. This should be a line 670 # beginning with "Index:". 671 # $line: the line last read from $fileHandle 672 # 673 # Returns ($headerHashRef, $lastReadLine): 674 # $headerHashRef: a hash reference representing a diff header, as follows-- 675 # copiedFromPath: the path from which the file was copied if the diff 676 # is a copy. 677 # indexPath: the path of the target file, which is the path found in 678 # the "Index:" line. 679 # isBinary: the value 1 if the diff is for a binary file. 680 # isNew: the value 1 if the diff is for a new file. 681 # sourceRevision: the revision number of the source, if it exists. This 682 # is the same as the revision number the file was copied 683 # from, in the case of a file copy. 684 # svnConvertedText: the header text converted to a header with the paths 685 # in some lines corrected. 686 # $lastReadLine: the line last read from $fileHandle. 687 sub parseSvnDiffHeader($$) 688 { 689 my ($fileHandle, $line) = @_; 690 691 $_ = $line; 692 693 my $indexPath; 694 if (/$svnDiffStartRegEx/) { 695 $indexPath = adjustPathForRecentRenamings($1); 696 } else { 697 die("First line of SVN diff does not begin with \"Index \": \"$_\""); 698 } 699 700 my $copiedFromPath; 701 my $foundHeaderEnding; 702 my $isBinary; 703 my $isNew; 704 my $sourceRevision; 705 my $svnConvertedText; 706 while (1) { 707 # Temporarily strip off any end-of-line characters to simplify 708 # regex matching below. 709 s/([\n\r]+)$//; 710 my $eol = $1; 711 712 # Fix paths on ""---" and "+++" lines to match the leading 713 # index line. 714 if (s/^--- \S+/--- $indexPath/) { 715 # --- 716 if (/^--- .+\(revision (\d+)\)/) { 717 $sourceRevision = $1; 718 $isNew = 1 if !$sourceRevision; # if revision 0. 719 if (/\(from (\S+):(\d+)\)$/) { 720 # The "from" clause is created by svn-create-patch, in 721 # which case there is always also a "revision" clause. 722 $copiedFromPath = $1; 723 die("Revision number \"$2\" in \"from\" clause does not match " . 724 "source revision number \"$sourceRevision\".") if ($2 != $sourceRevision); 725 } 726 } 727 } elsif (s/^\+\+\+ \S+/+++ $indexPath/) { 728 $foundHeaderEnding = 1; 729 } elsif (/^Cannot display: file marked as a binary type.$/) { 730 $isBinary = 1; 731 $foundHeaderEnding = 1; 732 } 733 734 $svnConvertedText .= "$_$eol"; # Also restore end-of-line characters. 735 736 $_ = <$fileHandle>; # Not defined if end-of-file reached. 737 738 last if (!defined($_) || /$svnDiffStartRegEx/ || $foundHeaderEnding); 739 } 740 741 if (!$foundHeaderEnding) { 742 die("Did not find end of header block corresponding to index path \"$indexPath\"."); 743 } 744 745 my %header; 746 747 $header{copiedFromPath} = $copiedFromPath if $copiedFromPath; 748 $header{indexPath} = $indexPath; 749 $header{isBinary} = $isBinary if $isBinary; 750 $header{isNew} = $isNew if $isNew; 751 $header{sourceRevision} = $sourceRevision if $sourceRevision; 752 $header{svnConvertedText} = $svnConvertedText; 753 754 return (\%header, $_); 755 } 756 757 # Parse the next diff header from the given file handle, and advance 758 # the handle so the last line read is the first line after the header. 759 # 760 # This subroutine dies if given leading junk or if it could not detect 761 # the end of the header block. 762 # 763 # Args: 764 # $fileHandle: advanced so the last line read from the handle is the first 765 # line of the header to parse. For SVN-formatted diffs, this 766 # is a line beginning with "Index:". For Git, this is a line 767 # beginning with "diff --git". 768 # $line: the line last read from $fileHandle 769 # 770 # Returns ($headerHashRef, $lastReadLine): 771 # $headerHashRef: a hash reference representing a diff header 772 # copiedFromPath: the path from which the file was copied if the diff 773 # is a copy. 774 # executableBitDelta: the value 1 or -1 if the executable bit was added or 775 # removed, respectively. New and deleted files have 776 # this value only if the file is executable, in which 777 # case the value is 1 and -1, respectively. 778 # indexPath: the path of the target file. 779 # isBinary: the value 1 if the diff is for a binary file. 780 # isGit: the value 1 if the diff is Git-formatted. 781 # isSvn: the value 1 if the diff is SVN-formatted. 782 # sourceRevision: the revision number of the source, if it exists. This 783 # is the same as the revision number the file was copied 784 # from, in the case of a file copy. 785 # svnConvertedText: the header text with some lines converted to SVN 786 # format. Git-specific lines are preserved. 787 # $lastReadLine: the line last read from $fileHandle. 788 sub parseDiffHeader($$) 789 { 790 my ($fileHandle, $line) = @_; 791 792 my $header; # This is a hash ref. 793 my $isGit; 794 my $isSvn; 795 my $lastReadLine; 796 797 if ($line =~ $svnDiffStartRegEx) { 798 $isSvn = 1; 799 ($header, $lastReadLine) = parseSvnDiffHeader($fileHandle, $line); 800 } elsif ($line =~ $gitDiffStartRegEx) { 801 $isGit = 1; 802 ($header, $lastReadLine) = parseGitDiffHeader($fileHandle, $line); 803 } else { 804 die("First line of diff does not begin with \"Index:\" or \"diff --git\": \"$line\""); 805 } 806 807 $header->{isGit} = $isGit if $isGit; 808 $header->{isSvn} = $isSvn if $isSvn; 809 810 return ($header, $lastReadLine); 811 } 812 813 # FIXME: The %diffHash "object" should not have an svnConvertedText property. 814 # Instead, the hash object should store its information in a 815 # structured way as properties. This should be done in a way so 816 # that, if necessary, the text of an SVN or Git patch can be 817 # reconstructed from the information in those hash properties. 818 # 819 # A %diffHash is a hash representing a source control diff of a single 820 # file operation (e.g. a file modification, copy, or delete). 821 # 822 # These hashes appear, for example, in the parseDiff(), parsePatch(), 823 # and prepareParsedPatch() subroutines of this package. 824 # 825 # The corresponding values are-- 826 # 827 # copiedFromPath: the path from which the file was copied if the diff 828 # is a copy. 829 # executableBitDelta: the value 1 or -1 if the executable bit was added or 830 # removed from the target file, respectively. 831 # indexPath: the path of the target file. For SVN-formatted diffs, 832 # this is the same as the path in the "Index:" line. 833 # isBinary: the value 1 if the diff is for a binary file. 834 # isDeletion: the value 1 if the diff is known from the header to be a deletion. 835 # isGit: the value 1 if the diff is Git-formatted. 836 # isNew: the value 1 if the dif is known from the header to be a new file. 837 # isSvn: the value 1 if the diff is SVN-formatted. 838 # sourceRevision: the revision number of the source, if it exists. This 839 # is the same as the revision number the file was copied 840 # from, in the case of a file copy. 841 # svnConvertedText: the diff with some lines converted to SVN format. 842 # Git-specific lines are preserved. 843 844 # Parse one diff from a patch file created by svn-create-patch, and 845 # advance the file handle so the last line read is the first line 846 # of the next header block. 847 # 848 # This subroutine preserves any leading junk encountered before the header. 849 # 850 # Composition of an SVN diff 851 # 852 # There are three parts to an SVN diff: the header, the property change, and 853 # the binary contents, in that order. Either the header or the property change 854 # may be ommitted, but not both. If there are binary changes, then you always 855 # have all three. 856 # 857 # Args: 858 # $fileHandle: a file handle advanced to the first line of the next 859 # header block. Leading junk is okay. 860 # $line: the line last read from $fileHandle. 861 # $optionsHashRef: a hash reference representing optional options to use 862 # when processing a diff. 863 # shouldNotUseIndexPathEOL: whether to use the line endings in the diff instead 864 # instead of the line endings in the target file; the 865 # value of 1 if svnConvertedText should use the line 866 # endings in the diff. 867 # 868 # Returns ($diffHashRefs, $lastReadLine): 869 # $diffHashRefs: A reference to an array of references to %diffHash hashes. 870 # See the %diffHash documentation above. 871 # $lastReadLine: the line last read from $fileHandle 872 sub parseDiff($$;$) 873 { 874 # FIXME: Adjust this method so that it dies if the first line does not 875 # match the start of a diff. This will require a change to 876 # parsePatch() so that parsePatch() skips over leading junk. 877 my ($fileHandle, $line, $optionsHashRef) = @_; 878 879 my $headerStartRegEx = $svnDiffStartRegEx; # SVN-style header for the default 880 881 my $headerHashRef; # Last header found, as returned by parseDiffHeader(). 882 my $svnPropertiesHashRef; # Last SVN properties diff found, as returned by parseSvnDiffProperties(). 883 my $svnText; 884 my $indexPathEOL; 885 while (defined($line)) { 886 if (!$headerHashRef && ($line =~ $gitDiffStartRegEx)) { 887 # Then assume all diffs in the patch are Git-formatted. This 888 # block was made to be enterable at most once since we assume 889 # all diffs in the patch are formatted the same (SVN or Git). 890 $headerStartRegEx = $gitDiffStartRegEx; 891 } 892 893 if ($line =~ $svnPropertiesStartRegEx) { 894 my $propertyPath = $1; 895 if ($svnPropertiesHashRef || $headerHashRef && ($propertyPath ne $headerHashRef->{indexPath})) { 896 # This is the start of the second diff in the while loop, which happens to 897 # be a property diff. If $svnPropertiesHasRef is defined, then this is the 898 # second consecutive property diff, otherwise it's the start of a property 899 # diff for a file that only has property changes. 900 last; 901 } 902 ($svnPropertiesHashRef, $line) = parseSvnDiffProperties($fileHandle, $line); 903 next; 904 } 905 if ($line !~ $headerStartRegEx) { 906 # Then we are in the body of the diff. 907 if ($indexPathEOL && $line !~ /$chunkRangeRegEx/) { 908 # The chunk range is part of the body of the diff, but its line endings should't be 909 # modified or patch(1) will complain. So, we only modify non-chunk range lines. 910 $line =~ s/\r\n|\r|\n/$indexPathEOL/g; 911 } 912 $svnText .= $line; 913 $line = <$fileHandle>; 914 next; 915 } # Otherwise, we found a diff header. 916 917 if ($svnPropertiesHashRef || $headerHashRef) { 918 # Then either we just processed an SVN property change or this 919 # is the start of the second diff header of this while loop. 920 last; 921 } 922 923 ($headerHashRef, $line) = parseDiffHeader($fileHandle, $line); 924 if (!$optionsHashRef || !$optionsHashRef->{shouldNotUseIndexPathEOL}) { 925 $indexPathEOL = firstEOLInFile($headerHashRef->{indexPath}) if !$headerHashRef->{isNew} && !$headerHashRef->{isBinary}; 926 } 927 928 $svnText .= $headerHashRef->{svnConvertedText}; 929 } 930 931 my @diffHashRefs; 932 933 if ($headerHashRef->{shouldDeleteSource}) { 934 my %deletionHash; 935 $deletionHash{indexPath} = $headerHashRef->{copiedFromPath}; 936 $deletionHash{isDeletion} = 1; 937 push @diffHashRefs, \%deletionHash; 938 } 939 if ($headerHashRef->{copiedFromPath}) { 940 my %copyHash; 941 $copyHash{copiedFromPath} = $headerHashRef->{copiedFromPath}; 942 $copyHash{indexPath} = $headerHashRef->{indexPath}; 943 $copyHash{sourceRevision} = $headerHashRef->{sourceRevision} if $headerHashRef->{sourceRevision}; 944 if ($headerHashRef->{isSvn}) { 945 $copyHash{executableBitDelta} = $svnPropertiesHashRef->{executableBitDelta} if $svnPropertiesHashRef->{executableBitDelta}; 946 } 947 push @diffHashRefs, \%copyHash; 948 } 949 950 # Note, the order of evaluation for the following if conditional has been explicitly chosen so that 951 # it evaluates to false when there is no headerHashRef (e.g. a property change diff for a file that 952 # only has property changes). 953 if ($headerHashRef->{isCopyWithChanges} || (%$headerHashRef && !$headerHashRef->{copiedFromPath})) { 954 # Then add the usual file modification. 955 my %diffHash; 956 # FIXME: We should expand this code to support other properties. In the future, 957 # parseSvnDiffProperties may return a hash whose keys are the properties. 958 if ($headerHashRef->{isSvn}) { 959 # SVN records the change to the executable bit in a separate property change diff 960 # that follows the contents of the diff, except for binary diffs. For binary 961 # diffs, the property change diff follows the diff header. 962 $diffHash{executableBitDelta} = $svnPropertiesHashRef->{executableBitDelta} if $svnPropertiesHashRef->{executableBitDelta}; 963 } elsif ($headerHashRef->{isGit}) { 964 # Git records the change to the executable bit in the header of a diff. 965 $diffHash{executableBitDelta} = $headerHashRef->{executableBitDelta} if $headerHashRef->{executableBitDelta}; 966 } 967 $diffHash{indexPath} = $headerHashRef->{indexPath}; 968 $diffHash{isBinary} = $headerHashRef->{isBinary} if $headerHashRef->{isBinary}; 969 $diffHash{isDeletion} = $headerHashRef->{isDeletion} if $headerHashRef->{isDeletion}; 970 $diffHash{isGit} = $headerHashRef->{isGit} if $headerHashRef->{isGit}; 971 $diffHash{isNew} = $headerHashRef->{isNew} if $headerHashRef->{isNew}; 972 $diffHash{isSvn} = $headerHashRef->{isSvn} if $headerHashRef->{isSvn}; 973 if (!$headerHashRef->{copiedFromPath}) { 974 # If the file was copied, then we have already incorporated the 975 # sourceRevision information into the change. 976 $diffHash{sourceRevision} = $headerHashRef->{sourceRevision} if $headerHashRef->{sourceRevision}; 977 } 978 # FIXME: Remove the need for svnConvertedText. See the %diffHash 979 # code comments above for more information. 980 # 981 # Note, we may not always have SVN converted text since we intend 982 # to deprecate it in the future. For example, a property change 983 # diff for a file that only has property changes will not return 984 # any SVN converted text. 985 $diffHash{svnConvertedText} = $svnText if $svnText; 986 push @diffHashRefs, \%diffHash; 987 } 988 989 if (!%$headerHashRef && $svnPropertiesHashRef) { 990 # A property change diff for a file that only has property changes. 991 my %propertyChangeHash; 992 $propertyChangeHash{executableBitDelta} = $svnPropertiesHashRef->{executableBitDelta} if $svnPropertiesHashRef->{executableBitDelta}; 993 $propertyChangeHash{indexPath} = $svnPropertiesHashRef->{propertyPath}; 994 $propertyChangeHash{isSvn} = 1; 995 push @diffHashRefs, \%propertyChangeHash; 996 } 997 998 return (\@diffHashRefs, $line); 999 } 1000 1001 # Parse an SVN property change diff from the given file handle, and advance 1002 # the handle so the last line read is the first line after this diff. 1003 # 1004 # For the case of an SVN binary diff, the binary contents will follow the 1005 # the property changes. 1006 # 1007 # This subroutine dies if the first line does not begin with "Property changes on" 1008 # or if the separator line that follows this line is missing. 1009 # 1010 # Args: 1011 # $fileHandle: advanced so the last line read from the handle is the first 1012 # line of the footer to parse. This line begins with 1013 # "Property changes on". 1014 # $line: the line last read from $fileHandle. 1015 # 1016 # Returns ($propertyHashRef, $lastReadLine): 1017 # $propertyHashRef: a hash reference representing an SVN diff footer. 1018 # propertyPath: the path of the target file. 1019 # executableBitDelta: the value 1 or -1 if the executable bit was added or 1020 # removed from the target file, respectively. 1021 # $lastReadLine: the line last read from $fileHandle. 1022 sub parseSvnDiffProperties($$) 1023 { 1024 my ($fileHandle, $line) = @_; 1025 1026 $_ = $line; 1027 1028 my %footer; 1029 if (/$svnPropertiesStartRegEx/) { 1030 $footer{propertyPath} = $1; 1031 } else { 1032 die("Failed to find start of SVN property change, \"Property changes on \": \"$_\""); 1033 } 1034 1035 # We advance $fileHandle two lines so that the next line that 1036 # we process is $svnPropertyStartRegEx in a well-formed footer. 1037 # A well-formed footer has the form: 1038 # Property changes on: FileA 1039 # ___________________________________________________________________ 1040 # Added: svn:executable 1041 # + * 1042 $_ = <$fileHandle>; # Not defined if end-of-file reached. 1043 my $separator = "_" x 67; 1044 if (defined($_) && /^$separator[\r\n]+$/) { 1045 $_ = <$fileHandle>; 1046 } else { 1047 die("Failed to find separator line: \"$_\"."); 1048 } 1049 1050 # FIXME: We should expand this to support other SVN properties 1051 # (e.g. return a hash of property key-values that represents 1052 # all properties). 1053 # 1054 # Notice, we keep processing until we hit end-of-file or some 1055 # line that does not resemble $svnPropertyStartRegEx, such as 1056 # the empty line that precedes the start of the binary contents 1057 # of a patch, or the start of the next diff (e.g. "Index:"). 1058 my $propertyHashRef; 1059 while (defined($_) && /$svnPropertyStartRegEx/) { 1060 ($propertyHashRef, $_) = parseSvnProperty($fileHandle, $_); 1061 if ($propertyHashRef->{name} eq "svn:executable") { 1062 # Notice, for SVN properties, propertyChangeDelta is always non-zero 1063 # because a property can only be added or removed. 1064 $footer{executableBitDelta} = $propertyHashRef->{propertyChangeDelta}; 1065 } 1066 } 1067 1068 return(\%footer, $_); 1069 } 1070 1071 # Parse the next SVN property from the given file handle, and advance the handle so the last 1072 # line read is the first line after the property. 1073 # 1074 # This subroutine dies if the first line is not a valid start of an SVN property, 1075 # or the property is missing a value, or the property change type (e.g. "Added") 1076 # does not correspond to the property value type (e.g. "+"). 1077 # 1078 # Args: 1079 # $fileHandle: advanced so the last line read from the handle is the first 1080 # line of the property to parse. This should be a line 1081 # that matches $svnPropertyStartRegEx. 1082 # $line: the line last read from $fileHandle. 1083 # 1084 # Returns ($propertyHashRef, $lastReadLine): 1085 # $propertyHashRef: a hash reference representing a SVN property. 1086 # name: the name of the property. 1087 # value: the last property value. For instance, suppose the property is "Modified". 1088 # Then it has both a '-' and '+' property value in that order. Therefore, 1089 # the value of this key is the value of the '+' property by ordering (since 1090 # it is the last value). 1091 # propertyChangeDelta: the value 1 or -1 if the property was added or 1092 # removed, respectively. 1093 # $lastReadLine: the line last read from $fileHandle. 1094 sub parseSvnProperty($$) 1095 { 1096 my ($fileHandle, $line) = @_; 1097 1098 $_ = $line; 1099 1100 my $propertyName; 1101 my $propertyChangeType; 1102 if (/$svnPropertyStartRegEx/) { 1103 $propertyChangeType = $1; 1104 $propertyName = $2; 1105 } else { 1106 die("Failed to find SVN property: \"$_\"."); 1107 } 1108 1109 $_ = <$fileHandle>; # Not defined if end-of-file reached. 1110 1111 # The "svn diff" command neither inserts newline characters between property values 1112 # nor between successive properties. 1113 # 1114 # FIXME: We do not support property values that contain tailing newline characters 1115 # as it is difficult to disambiguate these trailing newlines from the empty 1116 # line that precedes the contents of a binary patch. 1117 my $propertyValue; 1118 my $propertyValueType; 1119 while (defined($_) && /$svnPropertyValueStartRegEx/) { 1120 # Note, a '-' property may be followed by a '+' property in the case of a "Modified" 1121 # or "Name" property. We only care about the ending value (i.e. the '+' property) 1122 # in such circumstances. So, we take the property value for the property to be its 1123 # last parsed property value. 1124 # 1125 # FIXME: We may want to consider strictly enforcing a '-', '+' property ordering or 1126 # add error checking to prevent '+', '+', ..., '+' and other invalid combinations. 1127 $propertyValueType = $1; 1128 ($propertyValue, $_) = parseSvnPropertyValue($fileHandle, $_); 1129 } 1130 1131 if (!$propertyValue) { 1132 die("Failed to find the property value for the SVN property \"$propertyName\": \"$_\"."); 1133 } 1134 1135 my $propertyChangeDelta; 1136 if ($propertyValueType eq "+" || $propertyValueType eq "Merged") { 1137 $propertyChangeDelta = 1; 1138 } elsif ($propertyValueType eq "-" || $propertyValueType eq "Reverse-merged") { 1139 $propertyChangeDelta = -1; 1140 } else { 1141 die("Not reached."); 1142 } 1143 1144 # We perform a simple validation that an "Added" or "Deleted" property 1145 # change type corresponds with a "+" and "-" value type, respectively. 1146 my $expectedChangeDelta; 1147 if ($propertyChangeType eq "Added") { 1148 $expectedChangeDelta = 1; 1149 } elsif ($propertyChangeType eq "Deleted") { 1150 $expectedChangeDelta = -1; 1151 } 1152 1153 if ($expectedChangeDelta && $propertyChangeDelta != $expectedChangeDelta) { 1154 die("The final property value type found \"$propertyValueType\" does not " . 1155 "correspond to the property change type found \"$propertyChangeType\"."); 1156 } 1157 1158 my %propertyHash; 1159 $propertyHash{name} = $propertyName; 1160 $propertyHash{propertyChangeDelta} = $propertyChangeDelta; 1161 $propertyHash{value} = $propertyValue; 1162 return (\%propertyHash, $_); 1163 } 1164 1165 # Parse the value of an SVN property from the given file handle, and advance 1166 # the handle so the last line read is the first line after the property value. 1167 # 1168 # This subroutine dies if the first line is an invalid SVN property value line 1169 # (i.e. a line that does not begin with " +" or " -"). 1170 # 1171 # Args: 1172 # $fileHandle: advanced so the last line read from the handle is the first 1173 # line of the property value to parse. This should be a line 1174 # beginning with " +" or " -". 1175 # $line: the line last read from $fileHandle. 1176 # 1177 # Returns ($propertyValue, $lastReadLine): 1178 # $propertyValue: the value of the property. 1179 # $lastReadLine: the line last read from $fileHandle. 1180 sub parseSvnPropertyValue($$) 1181 { 1182 my ($fileHandle, $line) = @_; 1183 1184 $_ = $line; 1185 1186 my $propertyValue; 1187 my $eol; 1188 if (/$svnPropertyValueStartRegEx/) { 1189 $propertyValue = $2; # Does not include the end-of-line character(s). 1190 $eol = $POSTMATCH; 1191 } else { 1192 die("Failed to find property value beginning with '+', '-', 'Merged', or 'Reverse-merged': \"$_\"."); 1193 } 1194 1195 while (<$fileHandle>) { 1196 if (/^[\r\n]+$/ || /$svnPropertyValueStartRegEx/ || /$svnPropertyStartRegEx/) { 1197 # Note, we may encounter an empty line before the contents of a binary patch. 1198 # Also, we check for $svnPropertyValueStartRegEx because a '-' property may be 1199 # followed by a '+' property in the case of a "Modified" or "Name" property. 1200 # We check for $svnPropertyStartRegEx because it indicates the start of the 1201 # next property to parse. 1202 last; 1203 } 1204 1205 # Temporarily strip off any end-of-line characters. We add the end-of-line characters 1206 # from the previously processed line to the start of this line so that the last line 1207 # of the property value does not end in end-of-line characters. 1208 s/([\n\r]+)$//; 1209 $propertyValue .= "$eol$_"; 1210 $eol = $1; 1211 } 1212 1213 return ($propertyValue, $_); 1214 } 1215 1216 # Parse a patch file created by svn-create-patch. 1217 # 1218 # Args: 1219 # $fileHandle: A file handle to the patch file that has not yet been 1220 # read from. 1221 # $optionsHashRef: a hash reference representing optional options to use 1222 # when processing a diff. 1223 # shouldNotUseIndexPathEOL: whether to use the line endings in the diff instead 1224 # instead of the line endings in the target file; the 1225 # value of 1 if svnConvertedText should use the line 1226 # endings in the diff. 1227 # 1228 # Returns: 1229 # @diffHashRefs: an array of diff hash references. 1230 # See the %diffHash documentation above. 1231 sub parsePatch($;$) 1232 { 1233 my ($fileHandle, $optionsHashRef) = @_; 1234 1235 my $newDiffHashRefs; 1236 my @diffHashRefs; # return value 1237 1238 my $line = <$fileHandle>; 1239 1240 while (defined($line)) { # Otherwise, at EOF. 1241 1242 ($newDiffHashRefs, $line) = parseDiff($fileHandle, $line, $optionsHashRef); 1243 1244 push @diffHashRefs, @$newDiffHashRefs; 1245 } 1246 1247 return @diffHashRefs; 1248 } 1249 1250 # Prepare the results of parsePatch() for use in svn-apply and svn-unapply. 1251 # 1252 # Args: 1253 # $shouldForce: Whether to continue processing if an unexpected 1254 # state occurs. 1255 # @diffHashRefs: An array of references to %diffHashes. 1256 # See the %diffHash documentation above. 1257 # 1258 # Returns $preparedPatchHashRef: 1259 # copyDiffHashRefs: A reference to an array of the $diffHashRefs in 1260 # @diffHashRefs that represent file copies. The original 1261 # ordering is preserved. 1262 # nonCopyDiffHashRefs: A reference to an array of the $diffHashRefs in 1263 # @diffHashRefs that do not represent file copies. 1264 # The original ordering is preserved. 1265 # sourceRevisionHash: A reference to a hash of source path to source 1266 # revision number. 1267 sub prepareParsedPatch($@) 1268 { 1269 my ($shouldForce, @diffHashRefs) = @_; 1270 1271 my %copiedFiles; 1272 1273 # Return values 1274 my @copyDiffHashRefs = (); 1275 my @nonCopyDiffHashRefs = (); 1276 my %sourceRevisionHash = (); 1277 for my $diffHashRef (@diffHashRefs) { 1278 my $copiedFromPath = $diffHashRef->{copiedFromPath}; 1279 my $indexPath = $diffHashRef->{indexPath}; 1280 my $sourceRevision = $diffHashRef->{sourceRevision}; 1281 my $sourcePath; 1282 1283 if (defined($copiedFromPath)) { 1284 # Then the diff is a copy operation. 1285 $sourcePath = $copiedFromPath; 1286 1287 # FIXME: Consider printing a warning or exiting if 1288 # exists($copiedFiles{$indexPath}) is true -- i.e. if 1289 # $indexPath appears twice as a copy target. 1290 $copiedFiles{$indexPath} = $sourcePath; 1291 1292 push @copyDiffHashRefs, $diffHashRef; 1293 } else { 1294 # Then the diff is not a copy operation. 1295 $sourcePath = $indexPath; 1296 1297 push @nonCopyDiffHashRefs, $diffHashRef; 1298 } 1299 1300 if (defined($sourceRevision)) { 1301 if (exists($sourceRevisionHash{$sourcePath}) && 1302 ($sourceRevisionHash{$sourcePath} != $sourceRevision)) { 1303 if (!$shouldForce) { 1304 die "Two revisions of the same file required as a source:\n". 1305 " $sourcePath:$sourceRevisionHash{$sourcePath}\n". 1306 " $sourcePath:$sourceRevision"; 1307 } 1308 } 1309 $sourceRevisionHash{$sourcePath} = $sourceRevision; 1310 } 1311 } 1312 1313 my %preparedPatchHash; 1314 1315 $preparedPatchHash{copyDiffHashRefs} = \@copyDiffHashRefs; 1316 $preparedPatchHash{nonCopyDiffHashRefs} = \@nonCopyDiffHashRefs; 1317 $preparedPatchHash{sourceRevisionHash} = \%sourceRevisionHash; 1318 1319 return \%preparedPatchHash; 1320 } 1321 1322 # Return localtime() for the project's time zone, given an integer time as 1323 # returned by Perl's time() function. 1324 sub localTimeInProjectTimeZone($) 1325 { 1326 my $epochTime = shift; 1327 1328 # Change the time zone temporarily for the localtime() call. 1329 my $savedTimeZone = $ENV{'TZ'}; 1330 $ENV{'TZ'} = $changeLogTimeZone; 1331 my @localTime = localtime($epochTime); 1332 if (defined $savedTimeZone) { 1333 $ENV{'TZ'} = $savedTimeZone; 1334 } else { 1335 delete $ENV{'TZ'}; 1336 } 1337 1338 return @localTime; 1339 } 1340 1341 # Set the reviewer and date in a ChangeLog patch, and return the new patch. 1342 # 1343 # Args: 1344 # $patch: a ChangeLog patch as a string. 1345 # $reviewer: the name of the reviewer, or undef if the reviewer should not be set. 1346 # $epochTime: an integer time as returned by Perl's time() function. 1347 sub setChangeLogDateAndReviewer($$$) 1348 { 1349 my ($patch, $reviewer, $epochTime) = @_; 1350 1351 my @localTime = localTimeInProjectTimeZone($epochTime); 1352 my $newDate = strftime("%Y-%m-%d", @localTime); 1353 1354 my $firstChangeLogLineRegEx = qr#(\n\+)\d{4}-[^-]{2}-[^-]{2}( )#; 1355 $patch =~ s/$firstChangeLogLineRegEx/$1$newDate$2/; 1356 1357 if (defined($reviewer)) { 1358 # We include a leading plus ("+") in the regular expression to make 1359 # the regular expression less likely to match text in the leading junk 1360 # for the patch, if the patch has leading junk. 1361 $patch =~ s/(\n\+.*)NOBODY \(OOPS!\)/$1$reviewer/; 1362 } 1363 1364 return $patch; 1365 } 1366 1367 # If possible, returns a ChangeLog patch equivalent to the given one, 1368 # but with the newest ChangeLog entry inserted at the top of the 1369 # file -- i.e. no leading context and all lines starting with "+". 1370 # 1371 # If given a patch string not representable as a patch with the above 1372 # properties, it returns the input back unchanged. 1373 # 1374 # WARNING: This subroutine can return an inequivalent patch string if 1375 # both the beginning of the new ChangeLog file matches the beginning 1376 # of the source ChangeLog, and the source beginning was modified. 1377 # Otherwise, it is guaranteed to return an equivalent patch string, 1378 # if it returns. 1379 # 1380 # Applying this subroutine to ChangeLog patches allows svn-apply to 1381 # insert new ChangeLog entries at the top of the ChangeLog file. 1382 # svn-apply uses patch with --fuzz=3 to do this. We need to apply 1383 # this subroutine because the diff(1) command is greedy when matching 1384 # lines. A new ChangeLog entry with the same date and author as the 1385 # previous will match and cause the diff to have lines of starting 1386 # context. 1387 # 1388 # This subroutine has unit tests in VCSUtils_unittest.pl. 1389 # 1390 # Returns $changeLogHashRef: 1391 # $changeLogHashRef: a hash reference representing a change log patch. 1392 # patch: a ChangeLog patch equivalent to the given one, but with the 1393 # newest ChangeLog entry inserted at the top of the file, if possible. 1394 sub fixChangeLogPatch($) 1395 { 1396 my $patch = shift; # $patch will only contain patch fragments for ChangeLog. 1397 1398 $patch =~ /(\r?\n)/; 1399 my $lineEnding = $1; 1400 my @lines = split(/$lineEnding/, $patch); 1401 1402 my $i = 0; # We reuse the same index throughout. 1403 1404 # Skip to beginning of first chunk. 1405 for (; $i < @lines; ++$i) { 1406 if (substr($lines[$i], 0, 1) eq "@") { 1407 last; 1408 } 1409 } 1410 my $chunkStartIndex = ++$i; 1411 my %changeLogHashRef; 1412 1413 # Optimization: do not process if new lines already begin the chunk. 1414 if (substr($lines[$i], 0, 1) eq "+") { 1415 $changeLogHashRef{patch} = $patch; 1416 return \%changeLogHashRef; 1417 } 1418 1419 # Skip to first line of newly added ChangeLog entry. 1420 # For example, +2009-06-03 Eric Seidel <eric (at] webkit.org> 1421 my $dateStartRegEx = '^\+(\d{4}-\d{2}-\d{2})' # leading "+" and date 1422 . '\s+(.+)\s+' # name 1423 . '<([^<>]+)>$'; # e-mail address 1424 1425 for (; $i < @lines; ++$i) { 1426 my $line = $lines[$i]; 1427 my $firstChar = substr($line, 0, 1); 1428 if ($line =~ /$dateStartRegEx/) { 1429 last; 1430 } elsif ($firstChar eq " " or $firstChar eq "+") { 1431 next; 1432 } 1433 $changeLogHashRef{patch} = $patch; # Do not change if, for example, "-" or "@" found. 1434 return \%changeLogHashRef; 1435 } 1436 if ($i >= @lines) { 1437 $changeLogHashRef{patch} = $patch; # Do not change if date not found. 1438 return \%changeLogHashRef; 1439 } 1440 my $dateStartIndex = $i; 1441 1442 # Rewrite overlapping lines to lead with " ". 1443 my @overlappingLines = (); # These will include a leading "+". 1444 for (; $i < @lines; ++$i) { 1445 my $line = $lines[$i]; 1446 if (substr($line, 0, 1) ne "+") { 1447 last; 1448 } 1449 push(@overlappingLines, $line); 1450 $lines[$i] = " " . substr($line, 1); 1451 } 1452 1453 # Remove excess ending context, if necessary. 1454 my $shouldTrimContext = 1; 1455 for (; $i < @lines; ++$i) { 1456 my $firstChar = substr($lines[$i], 0, 1); 1457 if ($firstChar eq " ") { 1458 next; 1459 } elsif ($firstChar eq "@") { 1460 last; 1461 } 1462 $shouldTrimContext = 0; # For example, if "+" or "-" encountered. 1463 last; 1464 } 1465 my $deletedLineCount = 0; 1466 if ($shouldTrimContext) { # Also occurs if end of file reached. 1467 splice(@lines, $i - @overlappingLines, @overlappingLines); 1468 $deletedLineCount = @overlappingLines; 1469 } 1470 1471 # Work backwards, shifting overlapping lines towards front 1472 # while checking that patch stays equivalent. 1473 for ($i = $dateStartIndex - 1; @overlappingLines && $i >= $chunkStartIndex; --$i) { 1474 my $line = $lines[$i]; 1475 if (substr($line, 0, 1) ne " ") { 1476 next; 1477 } 1478 my $text = substr($line, 1); 1479 my $newLine = pop(@overlappingLines); 1480 if ($text ne substr($newLine, 1)) { 1481 $changeLogHashRef{patch} = $patch; # Unexpected difference. 1482 return \%changeLogHashRef; 1483 } 1484 $lines[$i] = "+$text"; 1485 } 1486 1487 # If @overlappingLines > 0, this is where we make use of the 1488 # assumption that the beginning of the source file was not modified. 1489 splice(@lines, $chunkStartIndex, 0, @overlappingLines); 1490 1491 # Update the date start index as it may have changed after shifting 1492 # the overlapping lines towards the front. 1493 for ($i = $chunkStartIndex; $i < $dateStartIndex; ++$i) { 1494 $dateStartIndex = $i if $lines[$i] =~ /$dateStartRegEx/; 1495 } 1496 splice(@lines, $chunkStartIndex, $dateStartIndex - $chunkStartIndex); # Remove context of later entry. 1497 $deletedLineCount += $dateStartIndex - $chunkStartIndex; 1498 1499 # Update the initial chunk range. 1500 if ($lines[$chunkStartIndex - 1] !~ /$chunkRangeRegEx/) { 1501 # FIXME: Handle errors differently from ChangeLog files that 1502 # are okay but should not be altered. That way we can find out 1503 # if improvements to the script ever become necessary. 1504 $changeLogHashRef{patch} = $patch; # Error: unexpected patch string format. 1505 return \%changeLogHashRef; 1506 } 1507 my $oldSourceLineCount = $2; 1508 my $oldTargetLineCount = $3; 1509 1510 my $sourceLineCount = $oldSourceLineCount + @overlappingLines - $deletedLineCount; 1511 my $targetLineCount = $oldTargetLineCount + @overlappingLines - $deletedLineCount; 1512 $lines[$chunkStartIndex - 1] = "@@ -1,$sourceLineCount +1,$targetLineCount @@"; 1513 1514 $changeLogHashRef{patch} = join($lineEnding, @lines) . "\n"; # patch(1) expects an extra trailing newline. 1515 return \%changeLogHashRef; 1516 } 1517 1518 # This is a supporting method for runPatchCommand. 1519 # 1520 # Arg: the optional $args parameter passed to runPatchCommand (can be undefined). 1521 # 1522 # Returns ($patchCommand, $isForcing). 1523 # 1524 # This subroutine has unit tests in VCSUtils_unittest.pl. 1525 sub generatePatchCommand($) 1526 { 1527 my ($passedArgsHashRef) = @_; 1528 1529 my $argsHashRef = { # Defaults 1530 ensureForce => 0, 1531 shouldReverse => 0, 1532 options => [] 1533 }; 1534 1535 # Merges hash references. It's okay here if passed hash reference is undefined. 1536 @{$argsHashRef}{keys %{$passedArgsHashRef}} = values %{$passedArgsHashRef}; 1537 1538 my $ensureForce = $argsHashRef->{ensureForce}; 1539 my $shouldReverse = $argsHashRef->{shouldReverse}; 1540 my $options = $argsHashRef->{options}; 1541 1542 if (! $options) { 1543 $options = []; 1544 } else { 1545 $options = [@{$options}]; # Copy to avoid side effects. 1546 } 1547 1548 my $isForcing = 0; 1549 if (grep /^--force$/, @{$options}) { 1550 $isForcing = 1; 1551 } elsif ($ensureForce) { 1552 push @{$options}, "--force"; 1553 $isForcing = 1; 1554 } 1555 1556 if ($shouldReverse) { # No check: --reverse should never be passed explicitly. 1557 push @{$options}, "--reverse"; 1558 } 1559 1560 @{$options} = sort(@{$options}); # For easier testing. 1561 1562 my $patchCommand = join(" ", "patch -p0", @{$options}); 1563 1564 return ($patchCommand, $isForcing); 1565 } 1566 1567 # Apply the given patch using the patch(1) command. 1568 # 1569 # On success, return the resulting exit status. Otherwise, exit with the 1570 # exit status. If "--force" is passed as an option, however, then never 1571 # exit and always return the exit status. 1572 # 1573 # Args: 1574 # $patch: a patch string. 1575 # $repositoryRootPath: an absolute path to the repository root. 1576 # $pathRelativeToRoot: the path of the file to be patched, relative to the 1577 # repository root. This should normally be the path 1578 # found in the patch's "Index:" line. It is passed 1579 # explicitly rather than reparsed from the patch 1580 # string for optimization purposes. 1581 # This is used only for error reporting. The 1582 # patch command gleans the actual file to patch 1583 # from the patch string. 1584 # $args: a reference to a hash of optional arguments. The possible 1585 # keys are -- 1586 # ensureForce: whether to ensure --force is passed (defaults to 0). 1587 # shouldReverse: whether to pass --reverse (defaults to 0). 1588 # options: a reference to an array of options to pass to the 1589 # patch command. The subroutine passes the -p0 option 1590 # no matter what. This should not include --reverse. 1591 # 1592 # This subroutine has unit tests in VCSUtils_unittest.pl. 1593 sub runPatchCommand($$$;$) 1594 { 1595 my ($patch, $repositoryRootPath, $pathRelativeToRoot, $args) = @_; 1596 1597 my ($patchCommand, $isForcing) = generatePatchCommand($args); 1598 1599 # Temporarily change the working directory since the path found 1600 # in the patch's "Index:" line is relative to the repository root 1601 # (i.e. the same as $pathRelativeToRoot). 1602 my $cwd = Cwd::getcwd(); 1603 chdir $repositoryRootPath; 1604 1605 open PATCH, "| $patchCommand" or die "Could not call \"$patchCommand\" for file \"$pathRelativeToRoot\": $!"; 1606 print PATCH $patch; 1607 close PATCH; 1608 my $exitStatus = exitStatus($?); 1609 1610 chdir $cwd; 1611 1612 if ($exitStatus && !$isForcing) { 1613 print "Calling \"$patchCommand\" for file \"$pathRelativeToRoot\" returned " . 1614 "status $exitStatus. Pass --force to ignore patch failures.\n"; 1615 exit $exitStatus; 1616 } 1617 1618 return $exitStatus; 1619 } 1620 1621 # Merge ChangeLog patches using a three-file approach. 1622 # 1623 # This is used by resolve-ChangeLogs when it's operated as a merge driver 1624 # and when it's used to merge conflicts after a patch is applied or after 1625 # an svn update. 1626 # 1627 # It's also used for traditional rejected patches. 1628 # 1629 # Args: 1630 # $fileMine: The merged version of the file. Also known in git as the 1631 # other branch's version (%B) or "ours". 1632 # For traditional patch rejects, this is the *.rej file. 1633 # $fileOlder: The base version of the file. Also known in git as the 1634 # ancestor version (%O) or "base". 1635 # For traditional patch rejects, this is the *.orig file. 1636 # $fileNewer: The current version of the file. Also known in git as the 1637 # current version (%A) or "theirs". 1638 # For traditional patch rejects, this is the original-named 1639 # file. 1640 # 1641 # Returns 1 if merge was successful, else 0. 1642 sub mergeChangeLogs($$$) 1643 { 1644 my ($fileMine, $fileOlder, $fileNewer) = @_; 1645 1646 my $traditionalReject = $fileMine =~ /\.rej$/ ? 1 : 0; 1647 1648 local $/ = undef; 1649 1650 my $patch; 1651 if ($traditionalReject) { 1652 open(DIFF, "<", $fileMine) or die $!; 1653 $patch = <DIFF>; 1654 close(DIFF); 1655 rename($fileMine, "$fileMine.save"); 1656 rename($fileOlder, "$fileOlder.save"); 1657 } else { 1658 open(DIFF, "-|", qw(diff -u -a --binary), $fileOlder, $fileMine) or die $!; 1659 $patch = <DIFF>; 1660 close(DIFF); 1661 } 1662 1663 unlink("${fileNewer}.orig"); 1664 unlink("${fileNewer}.rej"); 1665 1666 open(PATCH, "| patch --force --fuzz=3 --binary $fileNewer > " . File::Spec->devnull()) or die $!; 1667 if ($traditionalReject) { 1668 print PATCH $patch; 1669 } else { 1670 my $changeLogHash = fixChangeLogPatch($patch); 1671 print PATCH $changeLogHash->{patch}; 1672 } 1673 close(PATCH); 1674 1675 my $result = !exitStatus($?); 1676 1677 # Refuse to merge the patch if it did not apply cleanly 1678 if (-e "${fileNewer}.rej") { 1679 unlink("${fileNewer}.rej"); 1680 if (-f "${fileNewer}.orig") { 1681 unlink($fileNewer); 1682 rename("${fileNewer}.orig", $fileNewer); 1683 } 1684 } else { 1685 unlink("${fileNewer}.orig"); 1686 } 1687 1688 if ($traditionalReject) { 1689 rename("$fileMine.save", $fileMine); 1690 rename("$fileOlder.save", $fileOlder); 1691 } 1692 1693 return $result; 1694 } 1695 1696 sub gitConfig($) 1697 { 1698 return unless $isGit; 1699 1700 my ($config) = @_; 1701 1702 my $result = `git config $config`; 1703 if (($? >> 8)) { 1704 $result = `git repo-config $config`; 1705 } 1706 chomp $result; 1707 return $result; 1708 } 1709 1710 sub changeLogNameError($) 1711 { 1712 my ($message) = @_; 1713 print STDERR "$message\nEither:\n"; 1714 print STDERR " set CHANGE_LOG_NAME in your environment\n"; 1715 print STDERR " OR pass --name= on the command line\n"; 1716 print STDERR " OR set REAL_NAME in your environment"; 1717 print STDERR " OR git users can set 'git config user.name'\n"; 1718 exit(1); 1719 } 1720 1721 sub changeLogName() 1722 { 1723 my $name = $ENV{CHANGE_LOG_NAME} || $ENV{REAL_NAME} || gitConfig("user.name") || (split /\s*,\s*/, (getpwuid $<)[6])[0]; 1724 1725 changeLogNameError("Failed to determine ChangeLog name.") unless $name; 1726 # getpwuid seems to always succeed on windows, returning the username instead of the full name. This check will catch that case. 1727 changeLogNameError("'$name' does not contain a space! ChangeLogs should contain your full name.") unless ($name =~ /\w \w/); 1728 1729 return $name; 1730 } 1731 1732 sub changeLogEmailAddressError($) 1733 { 1734 my ($message) = @_; 1735 print STDERR "$message\nEither:\n"; 1736 print STDERR " set CHANGE_LOG_EMAIL_ADDRESS in your environment\n"; 1737 print STDERR " OR pass --email= on the command line\n"; 1738 print STDERR " OR set EMAIL_ADDRESS in your environment\n"; 1739 print STDERR " OR git users can set 'git config user.email'\n"; 1740 exit(1); 1741 } 1742 1743 sub changeLogEmailAddress() 1744 { 1745 my $emailAddress = $ENV{CHANGE_LOG_EMAIL_ADDRESS} || $ENV{EMAIL_ADDRESS} || gitConfig("user.email"); 1746 1747 changeLogEmailAddressError("Failed to determine email address for ChangeLog.") unless $emailAddress; 1748 changeLogEmailAddressError("Email address '$emailAddress' does not contain '\@' and is likely invalid.") unless ($emailAddress =~ /\@/); 1749 1750 return $emailAddress; 1751 } 1752 1753 # http://tools.ietf.org/html/rfc1924 1754 sub decodeBase85($) 1755 { 1756 my ($encoded) = @_; 1757 my %table; 1758 my @characters = ('0'..'9', 'A'..'Z', 'a'..'z', '!', '#', '$', '%', '&', '(', ')', '*', '+', '-', ';', '<', '=', '>', '?', '@', '^', '_', '`', '{', '|', '}', '~'); 1759 for (my $i = 0; $i < 85; $i++) { 1760 $table{$characters[$i]} = $i; 1761 } 1762 1763 my $decoded = ''; 1764 my @encodedChars = $encoded =~ /./g; 1765 1766 for (my $encodedIter = 0; defined($encodedChars[$encodedIter]);) { 1767 my $digit = 0; 1768 for (my $i = 0; $i < 5; $i++) { 1769 $digit *= 85; 1770 my $char = $encodedChars[$encodedIter]; 1771 $digit += $table{$char}; 1772 $encodedIter++; 1773 } 1774 1775 for (my $i = 0; $i < 4; $i++) { 1776 $decoded .= chr(($digit >> (3 - $i) * 8) & 255); 1777 } 1778 } 1779 1780 return $decoded; 1781 } 1782 1783 sub decodeGitBinaryChunk($$) 1784 { 1785 my ($contents, $fullPath) = @_; 1786 1787 # Load this module lazily in case the user don't have this module 1788 # and won't handle git binary patches. 1789 require Compress::Zlib; 1790 1791 my $encoded = ""; 1792 my $compressedSize = 0; 1793 while ($contents =~ /^([A-Za-z])(.*)$/gm) { 1794 my $line = $2; 1795 next if $line eq ""; 1796 die "$fullPath: unexpected size of a line: $&" if length($2) % 5 != 0; 1797 my $actualSize = length($2) / 5 * 4; 1798 my $encodedExpectedSize = ord($1); 1799 my $expectedSize = $encodedExpectedSize <= ord("Z") ? $encodedExpectedSize - ord("A") + 1 : $encodedExpectedSize - ord("a") + 27; 1800 1801 die "$fullPath: unexpected size of a line: $&" if int(($expectedSize + 3) / 4) * 4 != $actualSize; 1802 $compressedSize += $expectedSize; 1803 $encoded .= $line; 1804 } 1805 1806 my $compressed = decodeBase85($encoded); 1807 $compressed = substr($compressed, 0, $compressedSize); 1808 return Compress::Zlib::uncompress($compressed); 1809 } 1810 1811 sub decodeGitBinaryPatch($$) 1812 { 1813 my ($contents, $fullPath) = @_; 1814 1815 # Git binary patch has two chunks. One is for the normal patching 1816 # and another is for the reverse patching. 1817 # 1818 # Each chunk a line which starts from either "literal" or "delta", 1819 # followed by a number which specifies decoded size of the chunk. 1820 # 1821 # Then, content of the chunk comes. To decode the content, we 1822 # need decode it with base85 first, and then zlib. 1823 my $gitPatchRegExp = '(literal|delta) ([0-9]+)\n([A-Za-z0-9!#$%&()*+-;<=>?@^_`{|}~\\n]*?)\n\n'; 1824 if ($contents !~ m"\nGIT binary patch\n$gitPatchRegExp$gitPatchRegExp\Z") { 1825 die "$fullPath: unknown git binary patch format" 1826 } 1827 1828 my $binaryChunkType = $1; 1829 my $binaryChunkExpectedSize = $2; 1830 my $encodedChunk = $3; 1831 my $reverseBinaryChunkType = $4; 1832 my $reverseBinaryChunkExpectedSize = $5; 1833 my $encodedReverseChunk = $6; 1834 1835 my $binaryChunk = decodeGitBinaryChunk($encodedChunk, $fullPath); 1836 my $binaryChunkActualSize = length($binaryChunk); 1837 my $reverseBinaryChunk = decodeGitBinaryChunk($encodedReverseChunk, $fullPath); 1838 my $reverseBinaryChunkActualSize = length($reverseBinaryChunk); 1839 1840 die "$fullPath: unexpected size of the first chunk (expected $binaryChunkExpectedSize but was $binaryChunkActualSize" if ($binaryChunkType eq "literal" and $binaryChunkExpectedSize != $binaryChunkActualSize); 1841 die "$fullPath: unexpected size of the second chunk (expected $reverseBinaryChunkExpectedSize but was $reverseBinaryChunkActualSize" if ($reverseBinaryChunkType eq "literal" and $reverseBinaryChunkExpectedSize != $reverseBinaryChunkActualSize); 1842 1843 return ($binaryChunkType, $binaryChunk, $reverseBinaryChunkType, $reverseBinaryChunk); 1844 } 1845 1846 sub readByte($$) 1847 { 1848 my ($data, $location) = @_; 1849 1850 # Return the byte at $location in $data as a numeric value. 1851 return ord(substr($data, $location, 1)); 1852 } 1853 1854 # The git binary delta format is undocumented, except in code: 1855 # - https://github.com/git/git/blob/master/delta.h:get_delta_hdr_size is the source 1856 # of the algorithm in decodeGitBinaryPatchDeltaSize. 1857 # - https://github.com/git/git/blob/master/patch-delta.c:patch_delta is the source 1858 # of the algorithm in applyGitBinaryPatchDelta. 1859 sub decodeGitBinaryPatchDeltaSize($) 1860 { 1861 my ($binaryChunk) = @_; 1862 1863 # Source and destination buffer sizes are stored in 7-bit chunks at the 1864 # start of the binary delta patch data. The highest bit in each byte 1865 # except the last is set; the remaining 7 bits provide the next 1866 # chunk of the size. The chunks are stored in ascending significance 1867 # order. 1868 my $cmd; 1869 my $size = 0; 1870 my $shift = 0; 1871 for (my $i = 0; $i < length($binaryChunk);) { 1872 $cmd = readByte($binaryChunk, $i++); 1873 $size |= ($cmd & 0x7f) << $shift; 1874 $shift += 7; 1875 if (!($cmd & 0x80)) { 1876 return ($size, $i); 1877 } 1878 } 1879 } 1880 1881 sub applyGitBinaryPatchDelta($$) 1882 { 1883 my ($binaryChunk, $originalContents) = @_; 1884 1885 # Git delta format consists of two headers indicating source buffer size 1886 # and result size, then a series of commands. Each command is either 1887 # a copy-from-old-version (the 0x80 bit is set) or a copy-from-delta 1888 # command. Commands are applied sequentially to generate the result. 1889 # 1890 # A copy-from-old-version command encodes an offset and size to copy 1891 # from in subsequent bits, while a copy-from-delta command consists only 1892 # of the number of bytes to copy from the delta. 1893 1894 # We don't use these values, but we need to know how big they are so that 1895 # we can skip to the diff data. 1896 my ($size, $bytesUsed) = decodeGitBinaryPatchDeltaSize($binaryChunk); 1897 $binaryChunk = substr($binaryChunk, $bytesUsed); 1898 ($size, $bytesUsed) = decodeGitBinaryPatchDeltaSize($binaryChunk); 1899 $binaryChunk = substr($binaryChunk, $bytesUsed); 1900 1901 my $out = ""; 1902 for (my $i = 0; $i < length($binaryChunk); ) { 1903 my $cmd = ord(substr($binaryChunk, $i++, 1)); 1904 if ($cmd & 0x80) { 1905 # Extract an offset and size from the delta data, then copy 1906 # $size bytes from $offset in the original data into the output. 1907 my $offset = 0; 1908 my $size = 0; 1909 if ($cmd & 0x01) { $offset = readByte($binaryChunk, $i++); } 1910 if ($cmd & 0x02) { $offset |= readByte($binaryChunk, $i++) << 8; } 1911 if ($cmd & 0x04) { $offset |= readByte($binaryChunk, $i++) << 16; } 1912 if ($cmd & 0x08) { $offset |= readByte($binaryChunk, $i++) << 24; } 1913 if ($cmd & 0x10) { $size = readByte($binaryChunk, $i++); } 1914 if ($cmd & 0x20) { $size |= readByte($binaryChunk, $i++) << 8; } 1915 if ($cmd & 0x40) { $size |= readByte($binaryChunk, $i++) << 16; } 1916 if ($size == 0) { $size = 0x10000; } 1917 $out .= substr($originalContents, $offset, $size); 1918 } elsif ($cmd) { 1919 # Copy $cmd bytes from the delta data into the output. 1920 $out .= substr($binaryChunk, $i, $cmd); 1921 $i += $cmd; 1922 } else { 1923 die "unexpected delta opcode 0"; 1924 } 1925 } 1926 1927 return $out; 1928 } 1929 1930 1; 1931