Home | History | Annotate | Download | only in checkout
      1 # Copyright (c) 2009, Google Inc. All rights reserved.
      2 # Copyright (c) 2009 Apple Inc. All rights reserved.
      3 # 
      4 # Redistribution and use in source and binary forms, with or without
      5 # modification, are permitted provided that the following conditions are
      6 # met:
      7 # 
      8 #     * Redistributions of source code must retain the above copyright
      9 # notice, this list of conditions and the following disclaimer.
     10 #     * Redistributions in binary form must reproduce the above
     11 # copyright notice, this list of conditions and the following disclaimer
     12 # in the documentation and/or other materials provided with the
     13 # distribution.
     14 #     * Neither the name of Google Inc. nor the names of its
     15 # contributors may be used to endorse or promote products derived from
     16 # this software without specific prior written permission.
     17 # 
     18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 #
     30 # Python module for interacting with an SCM system (like SVN or Git)
     31 
     32 import logging
     33 import os
     34 import re
     35 import sys
     36 import shutil
     37 
     38 from webkitpy.common.memoized import memoized
     39 from webkitpy.common.system.deprecated_logging import error, log
     40 from webkitpy.common.system.executive import Executive, run_command, ScriptError
     41 from webkitpy.common.system import ospath
     42 
     43 
     44 def find_checkout_root():
     45     """Returns the current checkout root (as determined by default_scm().
     46 
     47     Returns the absolute path to the top of the WebKit checkout, or None
     48     if it cannot be determined.
     49 
     50     """
     51     scm_system = default_scm()
     52     if scm_system:
     53         return scm_system.checkout_root
     54     return None
     55 
     56 
     57 def default_scm(patch_directories=None):
     58     """Return the default SCM object as determined by the CWD and running code.
     59 
     60     Returns the default SCM object for the current working directory; if the
     61     CWD is not in a checkout, then we attempt to figure out if the SCM module
     62     itself is part of a checkout, and return that one. If neither is part of
     63     a checkout, None is returned.
     64 
     65     """
     66     cwd = os.getcwd()
     67     scm_system = detect_scm_system(cwd, patch_directories)
     68     if not scm_system:
     69         script_directory = os.path.dirname(os.path.abspath(__file__))
     70         scm_system = detect_scm_system(script_directory, patch_directories)
     71         if scm_system:
     72             log("The current directory (%s) is not a WebKit checkout, using %s" % (cwd, scm_system.checkout_root))
     73         else:
     74             error("FATAL: Failed to determine the SCM system for either %s or %s" % (cwd, script_directory))
     75     return scm_system
     76 
     77 
     78 def detect_scm_system(path, patch_directories=None):
     79     absolute_path = os.path.abspath(path)
     80 
     81     if patch_directories == []:
     82         patch_directories = None
     83 
     84     if SVN.in_working_directory(absolute_path):
     85         return SVN(cwd=absolute_path, patch_directories=patch_directories)
     86     
     87     if Git.in_working_directory(absolute_path):
     88         return Git(cwd=absolute_path)
     89     
     90     return None
     91 
     92 
     93 def first_non_empty_line_after_index(lines, index=0):
     94     first_non_empty_line = index
     95     for line in lines[index:]:
     96         if re.match("^\s*$", line):
     97             first_non_empty_line += 1
     98         else:
     99             break
    100     return first_non_empty_line
    101 
    102 
    103 class CommitMessage:
    104     def __init__(self, message):
    105         self.message_lines = message[first_non_empty_line_after_index(message, 0):]
    106 
    107     def body(self, lstrip=False):
    108         lines = self.message_lines[first_non_empty_line_after_index(self.message_lines, 1):]
    109         if lstrip:
    110             lines = [line.lstrip() for line in lines]
    111         return "\n".join(lines) + "\n"
    112 
    113     def description(self, lstrip=False, strip_url=False):
    114         line = self.message_lines[0]
    115         if lstrip:
    116             line = line.lstrip()
    117         if strip_url:
    118             line = re.sub("^(\s*)<.+> ", "\1", line)
    119         return line
    120 
    121     def message(self):
    122         return "\n".join(self.message_lines) + "\n"
    123 
    124 
    125 class CheckoutNeedsUpdate(ScriptError):
    126     def __init__(self, script_args, exit_code, output, cwd):
    127         ScriptError.__init__(self, script_args=script_args, exit_code=exit_code, output=output, cwd=cwd)
    128 
    129 
    130 def commit_error_handler(error):
    131     if re.search("resource out of date", error.output):
    132         raise CheckoutNeedsUpdate(script_args=error.script_args, exit_code=error.exit_code, output=error.output, cwd=error.cwd)
    133     Executive.default_error_handler(error)
    134 
    135 
    136 class AuthenticationError(Exception):
    137     def __init__(self, server_host, prompt_for_password=False):
    138         self.server_host = server_host
    139         self.prompt_for_password = prompt_for_password
    140 
    141 
    142 class AmbiguousCommitError(Exception):
    143     def __init__(self, num_local_commits, working_directory_is_clean):
    144         self.num_local_commits = num_local_commits
    145         self.working_directory_is_clean = working_directory_is_clean
    146 
    147 
    148 # SCM methods are expected to return paths relative to self.checkout_root.
    149 class SCM:
    150     def __init__(self, cwd, executive=None):
    151         self.cwd = cwd
    152         self.checkout_root = self.find_checkout_root(self.cwd)
    153         self.dryrun = False
    154         self._executive = executive or Executive()
    155 
    156     # A wrapper used by subclasses to create processes.
    157     def run(self, args, cwd=None, input=None, error_handler=None, return_exit_code=False, return_stderr=True, decode_output=True):
    158         # FIXME: We should set cwd appropriately.
    159         return self._executive.run_command(args,
    160                            cwd=cwd,
    161                            input=input,
    162                            error_handler=error_handler,
    163                            return_exit_code=return_exit_code,
    164                            return_stderr=return_stderr,
    165                            decode_output=decode_output)
    166 
    167     # SCM always returns repository relative path, but sometimes we need
    168     # absolute paths to pass to rm, etc.
    169     def absolute_path(self, repository_relative_path):
    170         return os.path.join(self.checkout_root, repository_relative_path)
    171 
    172     # FIXME: This belongs in Checkout, not SCM.
    173     def scripts_directory(self):
    174         return os.path.join(self.checkout_root, "Tools", "Scripts")
    175 
    176     # FIXME: This belongs in Checkout, not SCM.
    177     def script_path(self, script_name):
    178         return os.path.join(self.scripts_directory(), script_name)
    179 
    180     def ensure_clean_working_directory(self, force_clean):
    181         if self.working_directory_is_clean():
    182             return
    183         if not force_clean:
    184             # FIXME: Shouldn't this use cwd=self.checkout_root?
    185             print self.run(self.status_command(), error_handler=Executive.ignore_error)
    186             raise ScriptError(message="Working directory has modifications, pass --force-clean or --no-clean to continue.")
    187         log("Cleaning working directory")
    188         self.clean_working_directory()
    189 
    190     def ensure_no_local_commits(self, force):
    191         if not self.supports_local_commits():
    192             return
    193         commits = self.local_commits()
    194         if not len(commits):
    195             return
    196         if not force:
    197             error("Working directory has local commits, pass --force-clean to continue.")
    198         self.discard_local_commits()
    199 
    200     def run_status_and_extract_filenames(self, status_command, status_regexp):
    201         filenames = []
    202         # We run with cwd=self.checkout_root so that returned-paths are root-relative.
    203         for line in self.run(status_command, cwd=self.checkout_root).splitlines():
    204             match = re.search(status_regexp, line)
    205             if not match:
    206                 continue
    207             # status = match.group('status')
    208             filename = match.group('filename')
    209             filenames.append(filename)
    210         return filenames
    211 
    212     def strip_r_from_svn_revision(self, svn_revision):
    213         match = re.match("^r(?P<svn_revision>\d+)", unicode(svn_revision))
    214         if (match):
    215             return match.group('svn_revision')
    216         return svn_revision
    217 
    218     def svn_revision_from_commit_text(self, commit_text):
    219         match = re.search(self.commit_success_regexp(), commit_text, re.MULTILINE)
    220         return match.group('svn_revision')
    221 
    222     @staticmethod
    223     def _subclass_must_implement():
    224         raise NotImplementedError("subclasses must implement")
    225 
    226     @staticmethod
    227     def in_working_directory(path):
    228         SCM._subclass_must_implement()
    229 
    230     @staticmethod
    231     def find_checkout_root(path):
    232         SCM._subclass_must_implement()
    233 
    234     @staticmethod
    235     def commit_success_regexp():
    236         SCM._subclass_must_implement()
    237 
    238     def working_directory_is_clean(self):
    239         self._subclass_must_implement()
    240 
    241     def clean_working_directory(self):
    242         self._subclass_must_implement()
    243 
    244     def status_command(self):
    245         self._subclass_must_implement()
    246 
    247     def add(self, path, return_exit_code=False):
    248         self._subclass_must_implement()
    249 
    250     def delete(self, path):
    251         self._subclass_must_implement()
    252 
    253     def changed_files(self, git_commit=None):
    254         self._subclass_must_implement()
    255 
    256     def changed_files_for_revision(self, revision):
    257         self._subclass_must_implement()
    258 
    259     def revisions_changing_file(self, path, limit=5):
    260         self._subclass_must_implement()
    261 
    262     def added_files(self):
    263         self._subclass_must_implement()
    264 
    265     def conflicted_files(self):
    266         self._subclass_must_implement()
    267 
    268     def display_name(self):
    269         self._subclass_must_implement()
    270 
    271     def create_patch(self, git_commit=None, changed_files=None):
    272         self._subclass_must_implement()
    273 
    274     def committer_email_for_revision(self, revision):
    275         self._subclass_must_implement()
    276 
    277     def contents_at_revision(self, path, revision):
    278         self._subclass_must_implement()
    279 
    280     def diff_for_revision(self, revision):
    281         self._subclass_must_implement()
    282 
    283     def diff_for_file(self, path, log=None):
    284         self._subclass_must_implement()
    285 
    286     def show_head(self, path):
    287         self._subclass_must_implement()
    288 
    289     def apply_reverse_diff(self, revision):
    290         self._subclass_must_implement()
    291 
    292     def revert_files(self, file_paths):
    293         self._subclass_must_implement()
    294 
    295     def commit_with_message(self, message, username=None, password=None, git_commit=None, force_squash=False, changed_files=None):
    296         self._subclass_must_implement()
    297 
    298     def svn_commit_log(self, svn_revision):
    299         self._subclass_must_implement()
    300 
    301     def last_svn_commit_log(self):
    302         self._subclass_must_implement()
    303 
    304     # Subclasses must indicate if they support local commits,
    305     # but the SCM baseclass will only call local_commits methods when this is true.
    306     @staticmethod
    307     def supports_local_commits():
    308         SCM._subclass_must_implement()
    309 
    310     def remote_merge_base():
    311         SCM._subclass_must_implement()
    312 
    313     def commit_locally_with_message(self, message):
    314         error("Your source control manager does not support local commits.")
    315 
    316     def discard_local_commits(self):
    317         pass
    318 
    319     def local_commits(self):
    320         return []
    321 
    322 
    323 # A mixin class that represents common functionality for SVN and Git-SVN.
    324 class SVNRepository:
    325     def has_authorization_for_realm(self, realm, home_directory=os.getenv("HOME")):
    326         # Assumes find and grep are installed.
    327         if not os.path.isdir(os.path.join(home_directory, ".subversion")):
    328             return False
    329         find_args = ["find", ".subversion", "-type", "f", "-exec", "grep", "-q", realm, "{}", ";", "-print"]
    330         find_output = self.run(find_args, cwd=home_directory, error_handler=Executive.ignore_error).rstrip()
    331         return find_output and os.path.isfile(os.path.join(home_directory, find_output))
    332 
    333 
    334 class SVN(SCM, SVNRepository):
    335     # FIXME: These belong in common.config.urls
    336     svn_server_host = "svn.webkit.org"
    337     svn_server_realm = "<http://svn.webkit.org:80> Mac OS Forge"
    338 
    339     def __init__(self, cwd, patch_directories, executive=None):
    340         SCM.__init__(self, cwd, executive)
    341         self._bogus_dir = None
    342         if patch_directories == []:
    343             # FIXME: ScriptError is for Executive, this should probably be a normal Exception.
    344             raise ScriptError(script_args=svn_info_args, message='Empty list of patch directories passed to SCM.__init__')
    345         elif patch_directories == None:
    346             self._patch_directories = [ospath.relpath(cwd, self.checkout_root)]
    347         else:
    348             self._patch_directories = patch_directories
    349 
    350     @staticmethod
    351     def in_working_directory(path):
    352         return os.path.isdir(os.path.join(path, '.svn'))
    353     
    354     @classmethod
    355     def find_uuid(cls, path):
    356         if not cls.in_working_directory(path):
    357             return None
    358         return cls.value_from_svn_info(path, 'Repository UUID')
    359 
    360     @classmethod
    361     def value_from_svn_info(cls, path, field_name):
    362         svn_info_args = ['svn', 'info', path]
    363         info_output = run_command(svn_info_args).rstrip()
    364         match = re.search("^%s: (?P<value>.+)$" % field_name, info_output, re.MULTILINE)
    365         if not match:
    366             raise ScriptError(script_args=svn_info_args, message='svn info did not contain a %s.' % field_name)
    367         return match.group('value')
    368 
    369     @staticmethod
    370     def find_checkout_root(path):
    371         uuid = SVN.find_uuid(path)
    372         # If |path| is not in a working directory, we're supposed to return |path|.
    373         if not uuid:
    374             return path
    375         # Search up the directory hierarchy until we find a different UUID.
    376         last_path = None
    377         while True:
    378             if uuid != SVN.find_uuid(path):
    379                 return last_path
    380             last_path = path
    381             (path, last_component) = os.path.split(path)
    382             if last_path == path:
    383                 return None
    384 
    385     @staticmethod
    386     def commit_success_regexp():
    387         return "^Committed revision (?P<svn_revision>\d+)\.$"
    388 
    389     @memoized
    390     def svn_version(self):
    391         return self.run(['svn', '--version', '--quiet'])
    392 
    393     def working_directory_is_clean(self):
    394         return self.run(["svn", "diff"], cwd=self.checkout_root, decode_output=False) == ""
    395 
    396     def clean_working_directory(self):
    397         # Make sure there are no locks lying around from a previously aborted svn invocation.
    398         # This is slightly dangerous, as it's possible the user is running another svn process
    399         # on this checkout at the same time.  However, it's much more likely that we're running
    400         # under windows and svn just sucks (or the user interrupted svn and it failed to clean up).
    401         self.run(["svn", "cleanup"], cwd=self.checkout_root)
    402 
    403         # svn revert -R is not as awesome as git reset --hard.
    404         # It will leave added files around, causing later svn update
    405         # calls to fail on the bots.  We make this mirror git reset --hard
    406         # by deleting any added files as well.
    407         added_files = reversed(sorted(self.added_files()))
    408         # added_files() returns directories for SVN, we walk the files in reverse path
    409         # length order so that we remove files before we try to remove the directories.
    410         self.run(["svn", "revert", "-R", "."], cwd=self.checkout_root)
    411         for path in added_files:
    412             # This is robust against cwd != self.checkout_root
    413             absolute_path = self.absolute_path(path)
    414             # Completely lame that there is no easy way to remove both types with one call.
    415             if os.path.isdir(path):
    416                 os.rmdir(absolute_path)
    417             else:
    418                 os.remove(absolute_path)
    419 
    420     def status_command(self):
    421         return ['svn', 'status']
    422 
    423     def _status_regexp(self, expected_types):
    424         field_count = 6 if self.svn_version() > "1.6" else 5
    425         return "^(?P<status>[%s]).{%s} (?P<filename>.+)$" % (expected_types, field_count)
    426 
    427     def _add_parent_directories(self, path):
    428         """Does 'svn add' to the path and its parents."""
    429         if self.in_working_directory(path):
    430             return
    431         dirname = os.path.dirname(path)
    432         # We have dirname directry - ensure it added.
    433         if dirname != path:
    434             self._add_parent_directories(dirname)
    435         self.add(path)
    436 
    437     def add(self, path, return_exit_code=False):
    438         self._add_parent_directories(os.path.dirname(os.path.abspath(path)))
    439         return self.run(["svn", "add", path], return_exit_code=return_exit_code)
    440 
    441     def delete(self, path):
    442         parent, base = os.path.split(os.path.abspath(path))
    443         return self.run(["svn", "delete", "--force", base], cwd=parent)
    444 
    445     def changed_files(self, git_commit=None):
    446         status_command = ["svn", "status"]
    447         status_command.extend(self._patch_directories)
    448         # ACDMR: Addded, Conflicted, Deleted, Modified or Replaced
    449         return self.run_status_and_extract_filenames(status_command, self._status_regexp("ACDMR"))
    450 
    451     def changed_files_for_revision(self, revision):
    452         # As far as I can tell svn diff --summarize output looks just like svn status output.
    453         # No file contents printed, thus utf-8 auto-decoding in self.run is fine.
    454         status_command = ["svn", "diff", "--summarize", "-c", revision]
    455         return self.run_status_and_extract_filenames(status_command, self._status_regexp("ACDMR"))
    456 
    457     def revisions_changing_file(self, path, limit=5):
    458         revisions = []
    459         # svn log will exit(1) (and thus self.run will raise) if the path does not exist.
    460         log_command = ['svn', 'log', '--quiet', '--limit=%s' % limit, path]
    461         for line in self.run(log_command, cwd=self.checkout_root).splitlines():
    462             match = re.search('^r(?P<revision>\d+) ', line)
    463             if not match:
    464                 continue
    465             revisions.append(int(match.group('revision')))
    466         return revisions
    467 
    468     def conflicted_files(self):
    469         return self.run_status_and_extract_filenames(self.status_command(), self._status_regexp("C"))
    470 
    471     def added_files(self):
    472         return self.run_status_and_extract_filenames(self.status_command(), self._status_regexp("A"))
    473 
    474     def deleted_files(self):
    475         return self.run_status_and_extract_filenames(self.status_command(), self._status_regexp("D"))
    476 
    477     @staticmethod
    478     def supports_local_commits():
    479         return False
    480 
    481     def display_name(self):
    482         return "svn"
    483 
    484     # FIXME: This method should be on Checkout.
    485     def create_patch(self, git_commit=None, changed_files=None):
    486         """Returns a byte array (str()) representing the patch file.
    487         Patch files are effectively binary since they may contain
    488         files of multiple different encodings."""
    489         if changed_files == []:
    490             return ""
    491         elif changed_files == None:
    492             changed_files = []
    493         return self.run([self.script_path("svn-create-patch")] + changed_files,
    494             cwd=self.checkout_root, return_stderr=False,
    495             decode_output=False)
    496 
    497     def committer_email_for_revision(self, revision):
    498         return self.run(["svn", "propget", "svn:author", "--revprop", "-r", revision]).rstrip()
    499 
    500     def contents_at_revision(self, path, revision):
    501         """Returns a byte array (str()) containing the contents
    502         of path @ revision in the repository."""
    503         remote_path = "%s/%s" % (self._repository_url(), path)
    504         return self.run(["svn", "cat", "-r", revision, remote_path], decode_output=False)
    505 
    506     def diff_for_revision(self, revision):
    507         # FIXME: This should probably use cwd=self.checkout_root
    508         return self.run(['svn', 'diff', '-c', revision])
    509 
    510     def _bogus_dir_name(self):
    511         if sys.platform.startswith("win"):
    512             parent_dir = tempfile.gettempdir()
    513         else:
    514             parent_dir = sys.path[0]  # tempdir is not secure.
    515         return os.path.join(parent_dir, "temp_svn_config")
    516 
    517     def _setup_bogus_dir(self, log):
    518         self._bogus_dir = self._bogus_dir_name()
    519         if not os.path.exists(self._bogus_dir):
    520             os.mkdir(self._bogus_dir)
    521             self._delete_bogus_dir = True
    522         else:
    523             self._delete_bogus_dir = False
    524         if log:
    525             log.debug('  Html: temp config dir: "%s".', self._bogus_dir)
    526 
    527     def _teardown_bogus_dir(self, log):
    528         if self._delete_bogus_dir:
    529             shutil.rmtree(self._bogus_dir, True)
    530             if log:
    531                 log.debug('  Html: removed temp config dir: "%s".', self._bogus_dir)
    532         self._bogus_dir = None
    533 
    534     def diff_for_file(self, path, log=None):
    535         self._setup_bogus_dir(log)
    536         try:
    537             args = ['svn', 'diff']
    538             if self._bogus_dir:
    539                 args += ['--config-dir', self._bogus_dir]
    540             args.append(path)
    541             return self.run(args)
    542         finally:
    543             self._teardown_bogus_dir(log)
    544 
    545     def show_head(self, path):
    546         return self.run(['svn', 'cat', '-r', 'BASE', path], decode_output=False)
    547 
    548     def _repository_url(self):
    549         return self.value_from_svn_info(self.checkout_root, 'URL')
    550 
    551     def apply_reverse_diff(self, revision):
    552         # '-c -revision' applies the inverse diff of 'revision'
    553         svn_merge_args = ['svn', 'merge', '--non-interactive', '-c', '-%s' % revision, self._repository_url()]
    554         log("WARNING: svn merge has been known to take more than 10 minutes to complete.  It is recommended you use git for rollouts.")
    555         log("Running '%s'" % " ".join(svn_merge_args))
    556         # FIXME: Should this use cwd=self.checkout_root?
    557         self.run(svn_merge_args)
    558 
    559     def revert_files(self, file_paths):
    560         # FIXME: This should probably use cwd=self.checkout_root.
    561         self.run(['svn', 'revert'] + file_paths)
    562 
    563     def commit_with_message(self, message, username=None, password=None, git_commit=None, force_squash=False, changed_files=None):
    564         # git-commit and force are not used by SVN.
    565         svn_commit_args = ["svn", "commit"]
    566 
    567         if not username and not self.has_authorization_for_realm(self.svn_server_realm):
    568             raise AuthenticationError(self.svn_server_host)
    569         if username:
    570             svn_commit_args.extend(["--username", username])
    571 
    572         svn_commit_args.extend(["-m", message])
    573 
    574         if changed_files:
    575             svn_commit_args.extend(changed_files)
    576 
    577         if self.dryrun:
    578             _log = logging.getLogger("webkitpy.common.system")
    579             _log.debug('Would run SVN command: "' + " ".join(svn_commit_args) + '"')
    580 
    581             # Return a string which looks like a commit so that things which parse this output will succeed.
    582             return "Dry run, no commit.\nCommitted revision 0."
    583 
    584         return self.run(svn_commit_args, cwd=self.checkout_root, error_handler=commit_error_handler)
    585 
    586     def svn_commit_log(self, svn_revision):
    587         svn_revision = self.strip_r_from_svn_revision(svn_revision)
    588         return self.run(['svn', 'log', '--non-interactive', '--revision', svn_revision])
    589 
    590     def last_svn_commit_log(self):
    591         # BASE is the checkout revision, HEAD is the remote repository revision
    592         # http://svnbook.red-bean.com/en/1.0/ch03s03.html
    593         return self.svn_commit_log('BASE')
    594 
    595     def propset(self, pname, pvalue, path):
    596         dir, base = os.path.split(path)
    597         return self.run(['svn', 'pset', pname, pvalue, base], cwd=dir)
    598 
    599     def propget(self, pname, path):
    600         dir, base = os.path.split(path)
    601         return self.run(['svn', 'pget', pname, base], cwd=dir).encode('utf-8').rstrip("\n")
    602 
    603 
    604 # All git-specific logic should go here.
    605 class Git(SCM, SVNRepository):
    606     def __init__(self, cwd, executive=None):
    607         SCM.__init__(self, cwd, executive)
    608         self._check_git_architecture()
    609 
    610     def _machine_is_64bit(self):
    611         import platform
    612         # This only is tested on Mac.
    613         if not platform.mac_ver()[0]:
    614             return False
    615 
    616         # platform.architecture()[0] can be '64bit' even if the machine is 32bit:
    617         # http://mail.python.org/pipermail/pythonmac-sig/2009-September/021648.html
    618         # Use the sysctl command to find out what the processor actually supports.
    619         return self.run(['sysctl', '-n', 'hw.cpu64bit_capable']).rstrip() == '1'
    620 
    621     def _executable_is_64bit(self, path):
    622         # Again, platform.architecture() fails us.  On my machine
    623         # git_bits = platform.architecture(executable=git_path, bits='default')[0]
    624         # git_bits is just 'default', meaning the call failed.
    625         file_output = self.run(['file', path])
    626         return re.search('x86_64', file_output)
    627 
    628     def _check_git_architecture(self):
    629         if not self._machine_is_64bit():
    630             return
    631 
    632         # We could path-search entirely in python or with
    633         # which.py (http://code.google.com/p/which), but this is easier:
    634         git_path = self.run(['which', 'git']).rstrip()
    635         if self._executable_is_64bit(git_path):
    636             return
    637 
    638         webkit_dev_thead_url = "https://lists.webkit.org/pipermail/webkit-dev/2010-December/015249.html"
    639         log("Warning: This machine is 64-bit, but the git binary (%s) does not support 64-bit.\nInstall a 64-bit git for better performance, see:\n%s\n" % (git_path, webkit_dev_thead_url))
    640 
    641     @classmethod
    642     def in_working_directory(cls, path):
    643         return run_command(['git', 'rev-parse', '--is-inside-work-tree'], cwd=path, error_handler=Executive.ignore_error).rstrip() == "true"
    644 
    645     @classmethod
    646     def find_checkout_root(cls, path):
    647         # "git rev-parse --show-cdup" would be another way to get to the root
    648         (checkout_root, dot_git) = os.path.split(run_command(['git', 'rev-parse', '--git-dir'], cwd=(path or "./")))
    649         # If we were using 2.6 # checkout_root = os.path.relpath(checkout_root, path)
    650         if not os.path.isabs(checkout_root): # Sometimes git returns relative paths
    651             checkout_root = os.path.join(path, checkout_root)
    652         return checkout_root
    653 
    654     @classmethod
    655     def to_object_name(cls, filepath):
    656         root_end_with_slash = os.path.join(cls.find_checkout_root(os.path.dirname(filepath)), '')
    657         return filepath.replace(root_end_with_slash, '')
    658 
    659     @classmethod
    660     def read_git_config(cls, key):
    661         # FIXME: This should probably use cwd=self.checkout_root.
    662         # Pass --get-all for cases where the config has multiple values
    663         return run_command(["git", "config", "--get-all", key],
    664             error_handler=Executive.ignore_error).rstrip('\n')
    665 
    666     @staticmethod
    667     def commit_success_regexp():
    668         return "^Committed r(?P<svn_revision>\d+)$"
    669 
    670     def discard_local_commits(self):
    671         # FIXME: This should probably use cwd=self.checkout_root
    672         self.run(['git', 'reset', '--hard', self.remote_branch_ref()])
    673     
    674     def local_commits(self):
    675         # FIXME: This should probably use cwd=self.checkout_root
    676         return self.run(['git', 'log', '--pretty=oneline', 'HEAD...' + self.remote_branch_ref()]).splitlines()
    677 
    678     def rebase_in_progress(self):
    679         return os.path.exists(os.path.join(self.checkout_root, '.git/rebase-apply'))
    680 
    681     def working_directory_is_clean(self):
    682         # FIXME: This should probably use cwd=self.checkout_root
    683         return self.run(['git', 'diff', 'HEAD', '--name-only']) == ""
    684 
    685     def clean_working_directory(self):
    686         # FIXME: These should probably use cwd=self.checkout_root.
    687         # Could run git clean here too, but that wouldn't match working_directory_is_clean
    688         self.run(['git', 'reset', '--hard', 'HEAD'])
    689         # Aborting rebase even though this does not match working_directory_is_clean
    690         if self.rebase_in_progress():
    691             self.run(['git', 'rebase', '--abort'])
    692 
    693     def status_command(self):
    694         # git status returns non-zero when there are changes, so we use git diff name --name-status HEAD instead.
    695         # No file contents printed, thus utf-8 autodecoding in self.run is fine.
    696         return ["git", "diff", "--name-status", "HEAD"]
    697 
    698     def _status_regexp(self, expected_types):
    699         return '^(?P<status>[%s])\t(?P<filename>.+)$' % expected_types
    700 
    701     def add(self, path, return_exit_code=False):
    702         return self.run(["git", "add", path], return_exit_code=return_exit_code)
    703 
    704     def delete(self, path):
    705         return self.run(["git", "rm", "-f", path])
    706 
    707     def merge_base(self, git_commit):
    708         if git_commit:
    709             # Special-case HEAD.. to mean working-copy changes only.
    710             if git_commit.upper() == 'HEAD..':
    711                 return 'HEAD'
    712 
    713             if '..' not in git_commit:
    714                 git_commit = git_commit + "^.." + git_commit
    715             return git_commit
    716 
    717         return self.remote_merge_base()
    718 
    719     def changed_files(self, git_commit=None):
    720         # FIXME: --diff-filter could be used to avoid the "extract_filenames" step.
    721         status_command = ['git', 'diff', '-r', '--name-status', '-C', '-M', "--no-ext-diff", "--full-index", self.merge_base(git_commit)]
    722         # FIXME: I'm not sure we're returning the same set of files that SVN.changed_files is.
    723         # Added (A), Copied (C), Deleted (D), Modified (M), Renamed (R)
    724         return self.run_status_and_extract_filenames(status_command, self._status_regexp("ADM"))
    725 
    726     def _changes_files_for_commit(self, git_commit):
    727         # --pretty="format:" makes git show not print the commit log header,
    728         changed_files = self.run(["git", "show", "--pretty=format:", "--name-only", git_commit]).splitlines()
    729         # instead it just prints a blank line at the top, so we skip the blank line:
    730         return changed_files[1:]
    731 
    732     def changed_files_for_revision(self, revision):
    733         commit_id = self.git_commit_from_svn_revision(revision)
    734         return self._changes_files_for_commit(commit_id)
    735 
    736     def revisions_changing_file(self, path, limit=5):
    737         # git rev-list head --remove-empty --limit=5 -- path would be equivalent.
    738         commit_ids = self.run(["git", "log", "--remove-empty", "--pretty=format:%H", "-%s" % limit, "--", path]).splitlines()
    739         return filter(lambda revision: revision, map(self.svn_revision_from_git_commit, commit_ids))
    740 
    741     def conflicted_files(self):
    742         # We do not need to pass decode_output for this diff command
    743         # as we're passing --name-status which does not output any data.
    744         status_command = ['git', 'diff', '--name-status', '-C', '-M', '--diff-filter=U']
    745         return self.run_status_and_extract_filenames(status_command, self._status_regexp("U"))
    746 
    747     def added_files(self):
    748         return self.run_status_and_extract_filenames(self.status_command(), self._status_regexp("A"))
    749 
    750     def deleted_files(self):
    751         return self.run_status_and_extract_filenames(self.status_command(), self._status_regexp("D"))
    752 
    753     @staticmethod
    754     def supports_local_commits():
    755         return True
    756 
    757     def display_name(self):
    758         return "git"
    759 
    760     def prepend_svn_revision(self, diff):
    761         git_log = self.run(['git', 'log', '-25'])
    762         match = re.search("^\s*git-svn-id:.*@(?P<svn_revision>\d+)\ ", git_log, re.MULTILINE)
    763         if not match:
    764             return diff
    765 
    766         return "Subversion Revision: " + str(match.group('svn_revision')) + '\n' + diff
    767 
    768     def create_patch(self, git_commit=None, changed_files=None):
    769         """Returns a byte array (str()) representing the patch file.
    770         Patch files are effectively binary since they may contain
    771         files of multiple different encodings."""
    772         command = ['git', 'diff', '--binary', "--no-ext-diff", "--full-index", "-M", self.merge_base(git_commit), "--"]
    773         if changed_files:
    774             command += changed_files
    775         return self.prepend_svn_revision(self.run(command, decode_output=False, cwd=self.checkout_root))
    776 
    777     def _run_git_svn_find_rev(self, arg):
    778         # git svn find-rev always exits 0, even when the revision or commit is not found.
    779         return self.run(['git', 'svn', 'find-rev', arg], cwd=self.checkout_root).rstrip()
    780 
    781     def _string_to_int_or_none(self, string):
    782         try:
    783             return int(string)
    784         except ValueError, e:
    785             return None
    786 
    787     @memoized
    788     def git_commit_from_svn_revision(self, svn_revision):
    789         git_commit = self._run_git_svn_find_rev('r%s' % svn_revision)
    790         if not git_commit:
    791             # FIXME: Alternatively we could offer to update the checkout? Or return None?
    792             raise ScriptError(message='Failed to find git commit for revision %s, your checkout likely needs an update.' % svn_revision)
    793         return git_commit
    794 
    795     @memoized
    796     def svn_revision_from_git_commit(self, git_commit):
    797         svn_revision = self._run_git_svn_find_rev(git_commit)
    798         return self._string_to_int_or_none(svn_revision)
    799 
    800     def contents_at_revision(self, path, revision):
    801         """Returns a byte array (str()) containing the contents
    802         of path @ revision in the repository."""
    803         return self.run(["git", "show", "%s:%s" % (self.git_commit_from_svn_revision(revision), path)], decode_output=False)
    804 
    805     def diff_for_revision(self, revision):
    806         git_commit = self.git_commit_from_svn_revision(revision)
    807         return self.create_patch(git_commit)
    808 
    809     def diff_for_file(self, path, log=None):
    810         return self.run(['git', 'diff', 'HEAD', '--', path])
    811 
    812     def show_head(self, path):
    813         return self.run(['git', 'show', 'HEAD:' + self.to_object_name(path)], decode_output=False)
    814 
    815     def committer_email_for_revision(self, revision):
    816         git_commit = self.git_commit_from_svn_revision(revision)
    817         committer_email = self.run(["git", "log", "-1", "--pretty=format:%ce", git_commit])
    818         # Git adds an extra @repository_hash to the end of every committer email, remove it:
    819         return committer_email.rsplit("@", 1)[0]
    820 
    821     def apply_reverse_diff(self, revision):
    822         # Assume the revision is an svn revision.
    823         git_commit = self.git_commit_from_svn_revision(revision)
    824         # I think this will always fail due to ChangeLogs.
    825         self.run(['git', 'revert', '--no-commit', git_commit], error_handler=Executive.ignore_error)
    826 
    827     def revert_files(self, file_paths):
    828         self.run(['git', 'checkout', 'HEAD'] + file_paths)
    829 
    830     def _assert_can_squash(self, working_directory_is_clean):
    831         squash = Git.read_git_config('webkit-patch.commit-should-always-squash')
    832         should_squash = squash and squash.lower() == "true"
    833 
    834         if not should_squash:
    835             # Only warn if there are actually multiple commits to squash.
    836             num_local_commits = len(self.local_commits())
    837             if num_local_commits > 1 or (num_local_commits > 0 and not working_directory_is_clean):
    838                 raise AmbiguousCommitError(num_local_commits, working_directory_is_clean)
    839 
    840     def commit_with_message(self, message, username=None, password=None, git_commit=None, force_squash=False, changed_files=None):
    841         # Username is ignored during Git commits.
    842         working_directory_is_clean = self.working_directory_is_clean()
    843 
    844         if git_commit:
    845             # Special-case HEAD.. to mean working-copy changes only.
    846             if git_commit.upper() == 'HEAD..':
    847                 if working_directory_is_clean:
    848                     raise ScriptError(message="The working copy is not modified. --git-commit=HEAD.. only commits working copy changes.")
    849                 self.commit_locally_with_message(message)
    850                 return self._commit_on_branch(message, 'HEAD', username=username, password=password)
    851 
    852             # Need working directory changes to be committed so we can checkout the merge branch.
    853             if not working_directory_is_clean:
    854                 # FIXME: webkit-patch land will modify the ChangeLogs to correct the reviewer.
    855                 # That will modify the working-copy and cause us to hit this error.
    856                 # The ChangeLog modification could be made to modify the existing local commit.
    857                 raise ScriptError(message="Working copy is modified. Cannot commit individual git_commits.")
    858             return self._commit_on_branch(message, git_commit, username=username, password=password)
    859 
    860         if not force_squash:
    861             self._assert_can_squash(working_directory_is_clean)
    862         self.run(['git', 'reset', '--soft', self.remote_merge_base()])
    863         self.commit_locally_with_message(message)
    864         return self.push_local_commits_to_server(username=username, password=password)
    865 
    866     def _commit_on_branch(self, message, git_commit, username=None, password=None):
    867         branch_ref = self.run(['git', 'symbolic-ref', 'HEAD']).strip()
    868         branch_name = branch_ref.replace('refs/heads/', '')
    869         commit_ids = self.commit_ids_from_commitish_arguments([git_commit])
    870 
    871         # We want to squash all this branch's commits into one commit with the proper description.
    872         # We do this by doing a "merge --squash" into a new commit branch, then dcommitting that.
    873         MERGE_BRANCH_NAME = 'webkit-patch-land'
    874         self.delete_branch(MERGE_BRANCH_NAME)
    875 
    876         # We might be in a directory that's present in this branch but not in the
    877         # trunk.  Move up to the top of the tree so that git commands that expect a
    878         # valid CWD won't fail after we check out the merge branch.
    879         os.chdir(self.checkout_root)
    880 
    881         # Stuff our change into the merge branch.
    882         # We wrap in a try...finally block so if anything goes wrong, we clean up the branches.
    883         commit_succeeded = True
    884         try:
    885             self.run(['git', 'checkout', '-q', '-b', MERGE_BRANCH_NAME, self.remote_branch_ref()])
    886 
    887             for commit in commit_ids:
    888                 # We're on a different branch now, so convert "head" to the branch name.
    889                 commit = re.sub(r'(?i)head', branch_name, commit)
    890                 # FIXME: Once changed_files and create_patch are modified to separately handle each
    891                 # commit in a commit range, commit each cherry pick so they'll get dcommitted separately.
    892                 self.run(['git', 'cherry-pick', '--no-commit', commit])
    893 
    894             self.run(['git', 'commit', '-m', message])
    895             output = self.push_local_commits_to_server(username=username, password=password)
    896         except Exception, e:
    897             log("COMMIT FAILED: " + str(e))
    898             output = "Commit failed."
    899             commit_succeeded = False
    900         finally:
    901             # And then swap back to the original branch and clean up.
    902             self.clean_working_directory()
    903             self.run(['git', 'checkout', '-q', branch_name])
    904             self.delete_branch(MERGE_BRANCH_NAME)
    905 
    906         return output
    907 
    908     def svn_commit_log(self, svn_revision):
    909         svn_revision = self.strip_r_from_svn_revision(svn_revision)
    910         return self.run(['git', 'svn', 'log', '-r', svn_revision])
    911 
    912     def last_svn_commit_log(self):
    913         return self.run(['git', 'svn', 'log', '--limit=1'])
    914 
    915     # Git-specific methods:
    916     def _branch_ref_exists(self, branch_ref):
    917         return self.run(['git', 'show-ref', '--quiet', '--verify', branch_ref], return_exit_code=True) == 0
    918 
    919     def delete_branch(self, branch_name):
    920         if self._branch_ref_exists('refs/heads/' + branch_name):
    921             self.run(['git', 'branch', '-D', branch_name])
    922 
    923     def remote_merge_base(self):
    924         return self.run(['git', 'merge-base', self.remote_branch_ref(), 'HEAD']).strip()
    925 
    926     def remote_branch_ref(self):
    927         # Use references so that we can avoid collisions, e.g. we don't want to operate on refs/heads/trunk if it exists.
    928         remote_branch_refs = Git.read_git_config('svn-remote.svn.fetch')
    929         if not remote_branch_refs:
    930             remote_master_ref = 'refs/remotes/origin/master'
    931             if not self._branch_ref_exists(remote_master_ref):
    932                 raise ScriptError(message="Can't find a branch to diff against. svn-remote.svn.fetch is not in the git config and %s does not exist" % remote_master_ref)
    933             return remote_master_ref
    934 
    935         # FIXME: What's the right behavior when there are multiple svn-remotes listed?
    936         # For now, just use the first one.
    937         first_remote_branch_ref = remote_branch_refs.split('\n')[0]
    938         return first_remote_branch_ref.split(':')[1]
    939 
    940     def commit_locally_with_message(self, message):
    941         self.run(['git', 'commit', '--all', '-F', '-'], input=message)
    942 
    943     def push_local_commits_to_server(self, username=None, password=None):
    944         dcommit_command = ['git', 'svn', 'dcommit']
    945         if self.dryrun:
    946             dcommit_command.append('--dry-run')
    947         if not self.has_authorization_for_realm(SVN.svn_server_realm):
    948             raise AuthenticationError(SVN.svn_server_host, prompt_for_password=True)
    949         if username:
    950             dcommit_command.extend(["--username", username])
    951         output = self.run(dcommit_command, error_handler=commit_error_handler, input=password)
    952         # Return a string which looks like a commit so that things which parse this output will succeed.
    953         if self.dryrun:
    954             output += "\nCommitted r0"
    955         return output
    956 
    957     # This function supports the following argument formats:
    958     # no args : rev-list trunk..HEAD
    959     # A..B    : rev-list A..B
    960     # A...B   : error!
    961     # A B     : [A, B]  (different from git diff, which would use "rev-list A..B")
    962     def commit_ids_from_commitish_arguments(self, args):
    963         if not len(args):
    964             args.append('%s..HEAD' % self.remote_branch_ref())
    965 
    966         commit_ids = []
    967         for commitish in args:
    968             if '...' in commitish:
    969                 raise ScriptError(message="'...' is not supported (found in '%s'). Did you mean '..'?" % commitish)
    970             elif '..' in commitish:
    971                 commit_ids += reversed(self.run(['git', 'rev-list', commitish]).splitlines())
    972             else:
    973                 # Turn single commits or branch or tag names into commit ids.
    974                 commit_ids += self.run(['git', 'rev-parse', '--revs-only', commitish]).splitlines()
    975         return commit_ids
    976 
    977     def commit_message_for_local_commit(self, commit_id):
    978         commit_lines = self.run(['git', 'cat-file', 'commit', commit_id]).splitlines()
    979 
    980         # Skip the git headers.
    981         first_line_after_headers = 0
    982         for line in commit_lines:
    983             first_line_after_headers += 1
    984             if line == "":
    985                 break
    986         return CommitMessage(commit_lines[first_line_after_headers:])
    987 
    988     def files_changed_summary_for_commit(self, commit_id):
    989         return self.run(['git', 'diff-tree', '--shortstat', '--no-commit-id', commit_id])
    990